diff --git a/sys/net/if_gif.c b/sys/net/if_gif.c
index 011ad7aedeaf..a8c6fb5c9c7d 100644
--- a/sys/net/if_gif.c
+++ b/sys/net/if_gif.c
@@ -1,1066 +1,1078 @@
 /*-
  * SPDX-License-Identifier: BSD-3-Clause
  *
  * Copyright (C) 1995, 1996, 1997, and 1998 WIDE Project.
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 3. Neither the name of the project nor the names of its contributors
  *    may be used to endorse or promote products derived from this software
  *    without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE PROJECT AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE PROJECT OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  *	$KAME: if_gif.c,v 1.87 2001/10/19 08:50:27 itojun Exp $
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include "opt_inet.h"
 #include "opt_inet6.h"
 
 #include <sys/param.h>
 #include <sys/systm.h>
 #include <sys/jail.h>
 #include <sys/kernel.h>
 #include <sys/lock.h>
 #include <sys/malloc.h>
 #include <sys/mbuf.h>
 #include <sys/module.h>
 #include <sys/rmlock.h>
 #include <sys/socket.h>
 #include <sys/sockio.h>
 #include <sys/sx.h>
 #include <sys/errno.h>
 #include <sys/time.h>
 #include <sys/sysctl.h>
 #include <sys/syslog.h>
 #include <sys/priv.h>
 #include <sys/proc.h>
 #include <sys/protosw.h>
 #include <sys/conf.h>
 #include <machine/cpu.h>
 
 #include <net/if.h>
 #include <net/if_var.h>
 #include <net/if_clone.h>
 #include <net/if_types.h>
 #include <net/netisr.h>
 #include <net/route.h>
 #include <net/bpf.h>
 #include <net/vnet.h>
 
 #include <netinet/in.h>
 #include <netinet/in_systm.h>
 #include <netinet/ip.h>
 #include <netinet/ip_ecn.h>
 #ifdef	INET
 #include <netinet/in_var.h>
 #include <netinet/ip_var.h>
 #endif	/* INET */
 
 #ifdef INET6
 #ifndef INET
 #include <netinet/in.h>
 #endif
 #include <netinet6/in6_var.h>
 #include <netinet/ip6.h>
 #include <netinet6/ip6_ecn.h>
 #include <netinet6/ip6_var.h>
 #include <netinet6/scope6_var.h>
 #include <netinet6/ip6protosw.h>
 #endif /* INET6 */
 
 #include <netinet/ip_encap.h>
 #include <net/ethernet.h>
 #include <net/if_bridgevar.h>
 #include <net/if_gif.h>
 
 #include <security/mac/mac_framework.h>
 
 static const char gifname[] = "gif";
 
 /*
  * gif_mtx protects a per-vnet gif_softc_list.
  */
 static VNET_DEFINE(struct mtx, gif_mtx);
 #define	V_gif_mtx		VNET(gif_mtx)
 static MALLOC_DEFINE(M_GIF, "gif", "Generic Tunnel Interface");
 static VNET_DEFINE(LIST_HEAD(, gif_softc), gif_softc_list);
 #define	V_gif_softc_list	VNET(gif_softc_list)
 static struct sx gif_ioctl_sx;
 SX_SYSINIT(gif_ioctl_sx, &gif_ioctl_sx, "gif_ioctl");
 
 #define	GIF_LIST_LOCK_INIT(x)		mtx_init(&V_gif_mtx, "gif_mtx", \
 					    NULL, MTX_DEF)
 #define	GIF_LIST_LOCK_DESTROY(x)	mtx_destroy(&V_gif_mtx)
 #define	GIF_LIST_LOCK(x)		mtx_lock(&V_gif_mtx)
 #define	GIF_LIST_UNLOCK(x)		mtx_unlock(&V_gif_mtx)
 
 void	(*ng_gif_input_p)(struct ifnet *ifp, struct mbuf **mp, int af);
 void	(*ng_gif_input_orphan_p)(struct ifnet *ifp, struct mbuf *m, int af);
 void	(*ng_gif_attach_p)(struct ifnet *ifp);
 void	(*ng_gif_detach_p)(struct ifnet *ifp);
 
 static int	gif_check_nesting(struct ifnet *, struct mbuf *);
 static int	gif_set_tunnel(struct ifnet *, struct sockaddr *,
     struct sockaddr *);
 static void	gif_delete_tunnel(struct ifnet *);
 static int	gif_ioctl(struct ifnet *, u_long, caddr_t);
 static int	gif_transmit(struct ifnet *, struct mbuf *);
 static void	gif_qflush(struct ifnet *);
 static int	gif_clone_create(struct if_clone *, int, caddr_t);
 static void	gif_clone_destroy(struct ifnet *);
 static VNET_DEFINE(struct if_clone *, gif_cloner);
 #define	V_gif_cloner	VNET(gif_cloner)
 
 static int gifmodevent(module_t, int, void *);
 
 SYSCTL_DECL(_net_link);
 static SYSCTL_NODE(_net_link, IFT_GIF, gif, CTLFLAG_RW, 0,
     "Generic Tunnel Interface");
 #ifndef MAX_GIF_NEST
 /*
  * This macro controls the default upper limitation on nesting of gif tunnels.
  * Since, setting a large value to this macro with a careless configuration
  * may introduce system crash, we don't allow any nestings by default.
  * If you need to configure nested gif tunnels, you can define this macro
  * in your kernel configuration file.  However, if you do so, please be
  * careful to configure the tunnels so that it won't make a loop.
  */
 #define MAX_GIF_NEST 1
 #endif
 static VNET_DEFINE(int, max_gif_nesting) = MAX_GIF_NEST;
 #define	V_max_gif_nesting	VNET(max_gif_nesting)
 SYSCTL_INT(_net_link_gif, OID_AUTO, max_nesting, CTLFLAG_VNET | CTLFLAG_RW,
     &VNET_NAME(max_gif_nesting), 0, "Max nested tunnels");
 
 /*
  * By default, we disallow creation of multiple tunnels between the same
  * pair of addresses.  Some applications require this functionality so
  * we allow control over this check here.
  */
 #ifdef XBONEHACK
 static VNET_DEFINE(int, parallel_tunnels) = 1;
 #else
 static VNET_DEFINE(int, parallel_tunnels) = 0;
 #endif
 #define	V_parallel_tunnels	VNET(parallel_tunnels)
 SYSCTL_INT(_net_link_gif, OID_AUTO, parallel_tunnels,
     CTLFLAG_VNET | CTLFLAG_RW, &VNET_NAME(parallel_tunnels), 0,
     "Allow parallel tunnels?");
 
 static int
 gif_clone_create(struct if_clone *ifc, int unit, caddr_t params)
 {
 	struct gif_softc *sc;
 
 	sc = malloc(sizeof(struct gif_softc), M_GIF, M_WAITOK | M_ZERO);
 	sc->gif_fibnum = curthread->td_proc->p_fibnum;
 	GIF2IFP(sc) = if_alloc(IFT_GIF);
 	GIF_LOCK_INIT(sc);
 	GIF2IFP(sc)->if_softc = sc;
 	if_initname(GIF2IFP(sc), gifname, unit);
 
 	GIF2IFP(sc)->if_addrlen = 0;
 	GIF2IFP(sc)->if_mtu    = GIF_MTU;
 	GIF2IFP(sc)->if_flags  = IFF_POINTOPOINT | IFF_MULTICAST;
 #if 0
 	/* turn off ingress filter */
 	GIF2IFP(sc)->if_flags  |= IFF_LINK2;
 #endif
 	GIF2IFP(sc)->if_ioctl  = gif_ioctl;
 	GIF2IFP(sc)->if_transmit  = gif_transmit;
 	GIF2IFP(sc)->if_qflush  = gif_qflush;
 	GIF2IFP(sc)->if_output = gif_output;
 	GIF2IFP(sc)->if_capabilities |= IFCAP_LINKSTATE;
 	GIF2IFP(sc)->if_capenable |= IFCAP_LINKSTATE;
 	if_attach(GIF2IFP(sc));
 	bpfattach(GIF2IFP(sc), DLT_NULL, sizeof(u_int32_t));
 	if (ng_gif_attach_p != NULL)
 		(*ng_gif_attach_p)(GIF2IFP(sc));
 
 	GIF_LIST_LOCK();
 	LIST_INSERT_HEAD(&V_gif_softc_list, sc, gif_list);
 	GIF_LIST_UNLOCK();
 	return (0);
 }
 
 static void
 gif_clone_destroy(struct ifnet *ifp)
 {
 	struct gif_softc *sc;
 
 	sx_xlock(&gif_ioctl_sx);
 	sc = ifp->if_softc;
 	gif_delete_tunnel(ifp);
 	GIF_LIST_LOCK();
 	LIST_REMOVE(sc, gif_list);
 	GIF_LIST_UNLOCK();
 	if (ng_gif_detach_p != NULL)
 		(*ng_gif_detach_p)(ifp);
 	bpfdetach(ifp);
 	if_detach(ifp);
 	ifp->if_softc = NULL;
 	sx_xunlock(&gif_ioctl_sx);
 
 	if_free(ifp);
 	GIF_LOCK_DESTROY(sc);
 	free(sc, M_GIF);
 }
 
 static void
 vnet_gif_init(const void *unused __unused)
 {
 
 	LIST_INIT(&V_gif_softc_list);
 	GIF_LIST_LOCK_INIT();
 	V_gif_cloner = if_clone_simple(gifname, gif_clone_create,
 	    gif_clone_destroy, 0);
 }
 VNET_SYSINIT(vnet_gif_init, SI_SUB_PROTO_IFATTACHDOMAIN, SI_ORDER_ANY,
     vnet_gif_init, NULL);
 
 static void
 vnet_gif_uninit(const void *unused __unused)
 {
 
 	if_clone_detach(V_gif_cloner);
 	GIF_LIST_LOCK_DESTROY();
 }
 VNET_SYSUNINIT(vnet_gif_uninit, SI_SUB_PROTO_IFATTACHDOMAIN, SI_ORDER_ANY,
     vnet_gif_uninit, NULL);
 
 static int
 gifmodevent(module_t mod, int type, void *data)
 {
 
 	switch (type) {
 	case MOD_LOAD:
 	case MOD_UNLOAD:
 		break;
 	default:
 		return (EOPNOTSUPP);
 	}
 	return (0);
 }
 
 static moduledata_t gif_mod = {
 	"if_gif",
 	gifmodevent,
 	0
 };
 
 DECLARE_MODULE(if_gif, gif_mod, SI_SUB_PSEUDO, SI_ORDER_ANY);
 MODULE_VERSION(if_gif, 1);
 
 int
 gif_encapcheck(const struct mbuf *m, int off, int proto, void *arg)
 {
 	GIF_RLOCK_TRACKER;
 	const struct ip *ip;
 	struct gif_softc *sc;
 	int ret;
 
 	sc = (struct gif_softc *)arg;
 	if (sc == NULL || (GIF2IFP(sc)->if_flags & IFF_UP) == 0)
 		return (0);
 
 	ret = 0;
 	GIF_RLOCK(sc);
 
 	/* no physical address */
 	if (sc->gif_family == 0)
 		goto done;
 
 	switch (proto) {
 #ifdef INET
 	case IPPROTO_IPV4:
 #endif
 #ifdef INET6
 	case IPPROTO_IPV6:
 #endif
 	case IPPROTO_ETHERIP:
 		break;
 	default:
 		goto done;
 	}
 
 	/* Bail on short packets */
 	M_ASSERTPKTHDR(m);
 	if (m->m_pkthdr.len < sizeof(struct ip))
 		goto done;
 
 	ip = mtod(m, const struct ip *);
 	switch (ip->ip_v) {
 #ifdef INET
 	case 4:
 		if (sc->gif_family != AF_INET)
 			goto done;
 		ret = in_gif_encapcheck(m, off, proto, arg);
 		break;
 #endif
 #ifdef INET6
 	case 6:
 		if (m->m_pkthdr.len < sizeof(struct ip6_hdr))
 			goto done;
 		if (sc->gif_family != AF_INET6)
 			goto done;
 		ret = in6_gif_encapcheck(m, off, proto, arg);
 		break;
 #endif
 	}
 done:
 	GIF_RUNLOCK(sc);
 	return (ret);
 }
 
 static int
 gif_transmit(struct ifnet *ifp, struct mbuf *m)
 {
 	struct gif_softc *sc;
 	struct etherip_header *eth;
 #ifdef INET
 	struct ip *ip;
 #endif
 #ifdef INET6
 	struct ip6_hdr *ip6;
 	uint32_t t;
 #endif
 	uint32_t af;
 	uint8_t proto, ecn;
 	int error;
 
 #ifdef MAC
 	error = mac_ifnet_check_transmit(ifp, m);
 	if (error) {
 		m_freem(m);
 		goto err;
 	}
 #endif
 	error = ENETDOWN;
 	sc = ifp->if_softc;
 	if ((ifp->if_flags & IFF_MONITOR) != 0 ||
 	    (ifp->if_flags & IFF_UP) == 0 ||
 	    sc->gif_family == 0 ||
 	    (error = gif_check_nesting(ifp, m)) != 0) {
 		m_freem(m);
 		goto err;
 	}
 	/* Now pull back the af that we stashed in the csum_data. */
 	if (ifp->if_bridge)
 		af = AF_LINK;
 	else
 		af = m->m_pkthdr.csum_data;
 	m->m_flags &= ~(M_BCAST|M_MCAST);
 	M_SETFIB(m, sc->gif_fibnum);
 	BPF_MTAP2(ifp, &af, sizeof(af), m);
 	if_inc_counter(ifp, IFCOUNTER_OPACKETS, 1);
 	if_inc_counter(ifp, IFCOUNTER_OBYTES, m->m_pkthdr.len);
 	/* inner AF-specific encapsulation */
 	ecn = 0;
 	switch (af) {
 #ifdef INET
 	case AF_INET:
 		proto = IPPROTO_IPV4;
 		if (m->m_len < sizeof(struct ip))
 			m = m_pullup(m, sizeof(struct ip));
 		if (m == NULL) {
 			error = ENOBUFS;
 			goto err;
 		}
 		ip = mtod(m, struct ip *);
 		ip_ecn_ingress((ifp->if_flags & IFF_LINK1) ? ECN_ALLOWED:
 		    ECN_NOCARE, &ecn, &ip->ip_tos);
 		break;
 #endif
 #ifdef INET6
 	case AF_INET6:
 		proto = IPPROTO_IPV6;
 		if (m->m_len < sizeof(struct ip6_hdr))
 			m = m_pullup(m, sizeof(struct ip6_hdr));
 		if (m == NULL) {
 			error = ENOBUFS;
 			goto err;
 		}
 		t = 0;
 		ip6 = mtod(m, struct ip6_hdr *);
 		ip6_ecn_ingress((ifp->if_flags & IFF_LINK1) ? ECN_ALLOWED:
 		    ECN_NOCARE, &t, &ip6->ip6_flow);
 		ecn = (ntohl(t) >> 20) & 0xff;
 		break;
 #endif
 	case AF_LINK:
 		proto = IPPROTO_ETHERIP;
 		M_PREPEND(m, sizeof(struct etherip_header), M_NOWAIT);
 		if (m == NULL) {
 			error = ENOBUFS;
 			goto err;
 		}
 		eth = mtod(m, struct etherip_header *);
 		eth->eip_resvh = 0;
 		eth->eip_ver = ETHERIP_VERSION;
 		eth->eip_resvl = 0;
 		break;
 	default:
 		error = EAFNOSUPPORT;
 		m_freem(m);
 		goto err;
 	}
 	/* XXX should we check if our outer source is legal? */
 	/* dispatch to output logic based on outer AF */
 	switch (sc->gif_family) {
 #ifdef INET
 	case AF_INET:
 		error = in_gif_output(ifp, m, proto, ecn);
 		break;
 #endif
 #ifdef INET6
 	case AF_INET6:
 		error = in6_gif_output(ifp, m, proto, ecn);
 		break;
 #endif
 	default:
 		m_freem(m);
 	}
 err:
 	if (error)
 		if_inc_counter(ifp, IFCOUNTER_OERRORS, 1);
 	return (error);
 }
 
 static void
 gif_qflush(struct ifnet *ifp __unused)
 {
 
 }
 
 #define	MTAG_GIF	1080679712
 static int
 gif_check_nesting(struct ifnet *ifp, struct mbuf *m)
 {
 	struct m_tag *mtag;
 	int count;
 
 	/*
 	 * gif may cause infinite recursion calls when misconfigured.
 	 * We'll prevent this by detecting loops.
 	 *
 	 * High nesting level may cause stack exhaustion.
 	 * We'll prevent this by introducing upper limit.
 	 */
 	count = 1;
 	mtag = NULL;
 	while ((mtag = m_tag_locate(m, MTAG_GIF, 0, mtag)) != NULL) {
 		if (*(struct ifnet **)(mtag + 1) == ifp) {
 			log(LOG_NOTICE, "%s: loop detected\n", if_name(ifp));
 			return (EIO);
 		}
 		count++;
 	}
 	if (count > V_max_gif_nesting) {
 		log(LOG_NOTICE,
 		    "%s: if_output recursively called too many times(%d)\n",
 		    if_name(ifp), count);
 		return (EIO);
 	}
 	mtag = m_tag_alloc(MTAG_GIF, 0, sizeof(struct ifnet *), M_NOWAIT);
 	if (mtag == NULL)
 		return (ENOMEM);
 	*(struct ifnet **)(mtag + 1) = ifp;
 	m_tag_prepend(m, mtag);
 	return (0);
 }
 
 int
 gif_output(struct ifnet *ifp, struct mbuf *m, const struct sockaddr *dst,
 	struct route *ro)
 {
 	uint32_t af;
 
 	if (dst->sa_family == AF_UNSPEC)
 		bcopy(dst->sa_data, &af, sizeof(af));
 	else
 		af = dst->sa_family;
 	/*
 	 * Now save the af in the inbound pkt csum data, this is a cheat since
 	 * we are using the inbound csum_data field to carry the af over to
 	 * the gif_transmit() routine, avoiding using yet another mtag.
 	 */
 	m->m_pkthdr.csum_data = af;
 	return (ifp->if_transmit(ifp, m));
 }
 
 void
 gif_input(struct mbuf *m, struct ifnet *ifp, int proto, uint8_t ecn)
 {
 	struct etherip_header *eip;
 #ifdef INET
 	struct ip *ip;
 #endif
 #ifdef INET6
 	struct ip6_hdr *ip6;
 	uint32_t t;
 #endif
 	struct ether_header *eh;
 	struct ifnet *oldifp;
 	int isr, n, af;
 
 	if (ifp == NULL) {
 		/* just in case */
 		m_freem(m);
 		return;
 	}
 	m->m_pkthdr.rcvif = ifp;
 	m_clrprotoflags(m);
 	switch (proto) {
 #ifdef INET
 	case IPPROTO_IPV4:
 		af = AF_INET;
 		if (m->m_len < sizeof(struct ip))
 			m = m_pullup(m, sizeof(struct ip));
 		if (m == NULL)
 			goto drop;
 		ip = mtod(m, struct ip *);
 		if (ip_ecn_egress((ifp->if_flags & IFF_LINK1) ? ECN_ALLOWED:
 		    ECN_NOCARE, &ecn, &ip->ip_tos) == 0) {
 			m_freem(m);
 			goto drop;
 		}
 		break;
 #endif
 #ifdef INET6
 	case IPPROTO_IPV6:
 		af = AF_INET6;
 		if (m->m_len < sizeof(struct ip6_hdr))
 			m = m_pullup(m, sizeof(struct ip6_hdr));
 		if (m == NULL)
 			goto drop;
 		t = htonl((uint32_t)ecn << 20);
 		ip6 = mtod(m, struct ip6_hdr *);
 		if (ip6_ecn_egress((ifp->if_flags & IFF_LINK1) ? ECN_ALLOWED:
 		    ECN_NOCARE, &t, &ip6->ip6_flow) == 0) {
 			m_freem(m);
 			goto drop;
 		}
 		break;
 #endif
 	case IPPROTO_ETHERIP:
 		af = AF_LINK;
 		break;
 	default:
 		m_freem(m);
 		goto drop;
 	}
 
 #ifdef MAC
 	mac_ifnet_create_mbuf(ifp, m);
 #endif
 
 	if (bpf_peers_present(ifp->if_bpf)) {
 		uint32_t af1 = af;
 		bpf_mtap2(ifp->if_bpf, &af1, sizeof(af1), m);
 	}
 
 	if ((ifp->if_flags & IFF_MONITOR) != 0) {
 		if_inc_counter(ifp, IFCOUNTER_IPACKETS, 1);
 		if_inc_counter(ifp, IFCOUNTER_IBYTES, m->m_pkthdr.len);
 		m_freem(m);
 		return;
 	}
 
 	if (ng_gif_input_p != NULL) {
 		(*ng_gif_input_p)(ifp, &m, af);
 		if (m == NULL)
 			goto drop;
 	}
 
 	/*
 	 * Put the packet to the network layer input queue according to the
 	 * specified address family.
 	 * Note: older versions of gif_input directly called network layer
 	 * input functions, e.g. ip6_input, here.  We changed the policy to
 	 * prevent too many recursive calls of such input functions, which
 	 * might cause kernel panic.  But the change may introduce another
 	 * problem; if the input queue is full, packets are discarded.
 	 * The kernel stack overflow really happened, and we believed
 	 * queue-full rarely occurs, so we changed the policy.
 	 */
 	switch (af) {
 #ifdef INET
 	case AF_INET:
 		isr = NETISR_IP;
 		break;
 #endif
 #ifdef INET6
 	case AF_INET6:
 		isr = NETISR_IPV6;
 		break;
 #endif
 	case AF_LINK:
 		n = sizeof(struct etherip_header) + sizeof(struct ether_header);
 		if (n > m->m_len)
 			m = m_pullup(m, n);
 		if (m == NULL)
 			goto drop;
 		eip = mtod(m, struct etherip_header *);
 		if (eip->eip_ver != ETHERIP_VERSION) {
 			/* discard unknown versions */
 			m_freem(m);
 			goto drop;
 		}
 		m_adj(m, sizeof(struct etherip_header));
 
 		m->m_flags &= ~(M_BCAST|M_MCAST);
 		m->m_pkthdr.rcvif = ifp;
 
 		if (ifp->if_bridge) {
 			oldifp = ifp;
 			eh = mtod(m, struct ether_header *);
 			if (ETHER_IS_MULTICAST(eh->ether_dhost)) {
 				if (ETHER_IS_BROADCAST(eh->ether_dhost))
 					m->m_flags |= M_BCAST;
 				else
 					m->m_flags |= M_MCAST;
 				if_inc_counter(ifp, IFCOUNTER_IMCASTS, 1);
 			}
 			BRIDGE_INPUT(ifp, m);
 
 			if (m != NULL && ifp != oldifp) {
 				/*
 				 * The bridge gave us back itself or one of the
 				 * members for which the frame is addressed.
 				 */
 				ether_demux(ifp, m);
 				return;
 			}
 		}
 		if (m != NULL)
 			m_freem(m);
 		return;
 
 	default:
 		if (ng_gif_input_orphan_p != NULL)
 			(*ng_gif_input_orphan_p)(ifp, m, af);
 		else
 			m_freem(m);
 		return;
 	}
 
 	if_inc_counter(ifp, IFCOUNTER_IPACKETS, 1);
 	if_inc_counter(ifp, IFCOUNTER_IBYTES, m->m_pkthdr.len);
 	M_SETFIB(m, ifp->if_fib);
 	netisr_dispatch(isr, m);
 	return;
 drop:
 	if_inc_counter(ifp, IFCOUNTER_IERRORS, 1);
 }
 
 /* XXX how should we handle IPv6 scope on SIOC[GS]IFPHYADDR? */
 int
 gif_ioctl(struct ifnet *ifp, u_long cmd, caddr_t data)
 {
 	GIF_RLOCK_TRACKER;
 	struct ifreq *ifr = (struct ifreq*)data;
 	struct sockaddr *dst, *src;
 	struct gif_softc *sc;
 #ifdef INET
 	struct sockaddr_in *sin = NULL;
 #endif
 #ifdef INET6
 	struct sockaddr_in6 *sin6 = NULL;
 #endif
 	u_int options;
 	int error;
 
 	switch (cmd) {
 	case SIOCSIFADDR:
 		ifp->if_flags |= IFF_UP;
 	case SIOCADDMULTI:
 	case SIOCDELMULTI:
 	case SIOCGIFMTU:
 	case SIOCSIFFLAGS:
 		return (0);
 	case SIOCSIFMTU:
 		if (ifr->ifr_mtu < GIF_MTU_MIN ||
 		    ifr->ifr_mtu > GIF_MTU_MAX)
 			return (EINVAL);
 		else
 			ifp->if_mtu = ifr->ifr_mtu;
 		return (0);
 	}
 	sx_xlock(&gif_ioctl_sx);
 	sc = ifp->if_softc;
 	if (sc == NULL) {
 		error = ENXIO;
 		goto bad;
 	}
 	error = 0;
 	switch (cmd) {
 	case SIOCSIFPHYADDR:
 #ifdef INET6
 	case SIOCSIFPHYADDR_IN6:
 #endif
 		error = EINVAL;
 		switch (cmd) {
 #ifdef INET
 		case SIOCSIFPHYADDR:
 			src = (struct sockaddr *)
 				&(((struct in_aliasreq *)data)->ifra_addr);
 			dst = (struct sockaddr *)
 				&(((struct in_aliasreq *)data)->ifra_dstaddr);
 			break;
 #endif
 #ifdef INET6
 		case SIOCSIFPHYADDR_IN6:
 			src = (struct sockaddr *)
 				&(((struct in6_aliasreq *)data)->ifra_addr);
 			dst = (struct sockaddr *)
 				&(((struct in6_aliasreq *)data)->ifra_dstaddr);
 			break;
 #endif
 		default:
 			goto bad;
 		}
 		/* sa_family must be equal */
 		if (src->sa_family != dst->sa_family ||
 		    src->sa_len != dst->sa_len)
 			goto bad;
 
 		/* validate sa_len */
 		/* check sa_family looks sane for the cmd */
 		switch (src->sa_family) {
 #ifdef INET
 		case AF_INET:
 			if (src->sa_len != sizeof(struct sockaddr_in))
 				goto bad;
 			if (cmd != SIOCSIFPHYADDR) {
 				error = EAFNOSUPPORT;
 				goto bad;
 			}
 			if (satosin(src)->sin_addr.s_addr == INADDR_ANY ||
 			    satosin(dst)->sin_addr.s_addr == INADDR_ANY) {
 				error = EADDRNOTAVAIL;
 				goto bad;
 			}
 			break;
 #endif
 #ifdef INET6
 		case AF_INET6:
 			if (src->sa_len != sizeof(struct sockaddr_in6))
 				goto bad;
 			if (cmd != SIOCSIFPHYADDR_IN6) {
 				error = EAFNOSUPPORT;
 				goto bad;
 			}
 			error = EADDRNOTAVAIL;
 			if (IN6_IS_ADDR_UNSPECIFIED(&satosin6(src)->sin6_addr)
 			    ||
 			    IN6_IS_ADDR_UNSPECIFIED(&satosin6(dst)->sin6_addr))
 				goto bad;
 			/*
 			 * Check validity of the scope zone ID of the
 			 * addresses, and convert it into the kernel
 			 * internal form if necessary.
 			 */
 			error = sa6_embedscope(satosin6(src), 0);
 			if (error != 0)
 				goto bad;
 			error = sa6_embedscope(satosin6(dst), 0);
 			if (error != 0)
 				goto bad;
 			break;
 #endif
 		default:
 			error = EAFNOSUPPORT;
 			goto bad;
 		}
 		error = gif_set_tunnel(ifp, src, dst);
 		break;
 	case SIOCDIFPHYADDR:
 		gif_delete_tunnel(ifp);
 		break;
 	case SIOCGIFPSRCADDR:
 	case SIOCGIFPDSTADDR:
 #ifdef INET6
 	case SIOCGIFPSRCADDR_IN6:
 	case SIOCGIFPDSTADDR_IN6:
 #endif
 		if (sc->gif_family == 0) {
 			error = EADDRNOTAVAIL;
 			break;
 		}
 		GIF_RLOCK(sc);
 		switch (cmd) {
 #ifdef INET
 		case SIOCGIFPSRCADDR:
 		case SIOCGIFPDSTADDR:
 			if (sc->gif_family != AF_INET) {
 				error = EADDRNOTAVAIL;
 				break;
 			}
 			sin = (struct sockaddr_in *)&ifr->ifr_addr;
 			memset(sin, 0, sizeof(*sin));
 			sin->sin_family = AF_INET;
 			sin->sin_len = sizeof(*sin);
 			break;
 #endif
 #ifdef INET6
 		case SIOCGIFPSRCADDR_IN6:
 		case SIOCGIFPDSTADDR_IN6:
 			if (sc->gif_family != AF_INET6) {
 				error = EADDRNOTAVAIL;
 				break;
 			}
 			sin6 = (struct sockaddr_in6 *)
 				&(((struct in6_ifreq *)data)->ifr_addr);
 			memset(sin6, 0, sizeof(*sin6));
 			sin6->sin6_family = AF_INET6;
 			sin6->sin6_len = sizeof(*sin6);
 			break;
 #endif
 		default:
 			error = EAFNOSUPPORT;
 		}
 		if (error == 0) {
 			switch (cmd) {
 #ifdef INET
 			case SIOCGIFPSRCADDR:
 				sin->sin_addr = sc->gif_iphdr->ip_src;
 				break;
 			case SIOCGIFPDSTADDR:
 				sin->sin_addr = sc->gif_iphdr->ip_dst;
 				break;
 #endif
 #ifdef INET6
 			case SIOCGIFPSRCADDR_IN6:
 				sin6->sin6_addr = sc->gif_ip6hdr->ip6_src;
 				break;
 			case SIOCGIFPDSTADDR_IN6:
 				sin6->sin6_addr = sc->gif_ip6hdr->ip6_dst;
 				break;
 #endif
 			}
 		}
 		GIF_RUNLOCK(sc);
 		if (error != 0)
 			break;
 		switch (cmd) {
 #ifdef INET
 		case SIOCGIFPSRCADDR:
 		case SIOCGIFPDSTADDR:
 			error = prison_if(curthread->td_ucred,
 			    (struct sockaddr *)sin);
 			if (error != 0)
 				memset(sin, 0, sizeof(*sin));
 			break;
 #endif
 #ifdef INET6
 		case SIOCGIFPSRCADDR_IN6:
 		case SIOCGIFPDSTADDR_IN6:
 			error = prison_if(curthread->td_ucred,
 			    (struct sockaddr *)sin6);
 			if (error == 0)
 				error = sa6_recoverscope(sin6);
 			if (error != 0)
 				memset(sin6, 0, sizeof(*sin6));
 #endif
 		}
 		break;
 	case SIOCGTUNFIB:
 		ifr->ifr_fib = sc->gif_fibnum;
 		break;
 	case SIOCSTUNFIB:
 		if ((error = priv_check(curthread, PRIV_NET_GIF)) != 0)
 			break;
 		if (ifr->ifr_fib >= rt_numfibs)
 			error = EINVAL;
 		else
 			sc->gif_fibnum = ifr->ifr_fib;
 		break;
 	case GIFGOPTS:
 		options = sc->gif_options;
 		error = copyout(&options, ifr_data_get_ptr(ifr),
 		    sizeof(options));
 		break;
 	case GIFSOPTS:
 		if ((error = priv_check(curthread, PRIV_NET_GIF)) != 0)
 			break;
 		error = copyin(ifr_data_get_ptr(ifr), &options,
 		    sizeof(options));
 		if (error)
 			break;
 		if (options & ~GIF_OPTMASK)
 			error = EINVAL;
 		else
 			sc->gif_options = options;
 		break;
 	default:
 		error = EINVAL;
 		break;
 	}
 bad:
 	sx_xunlock(&gif_ioctl_sx);
 	return (error);
 }
 
 static void
-gif_detach(struct gif_softc *sc)
+gif_detach(struct gif_softc *sc, int family)
 {
 
 	sx_assert(&gif_ioctl_sx, SA_XLOCKED);
-	if (sc->gif_ecookie != NULL)
-		encap_detach(sc->gif_ecookie);
+	if (sc->gif_ecookie != NULL) {
+		switch (family) {
+#ifdef INET
+		case AF_INET:
+			ip_encap_detach(sc->gif_ecookie);
+			break;
+#endif
+#ifdef INET6
+		case AF_INET6:
+			ip6_encap_detach(sc->gif_ecookie);
+			break;
+#endif
+		}
+	}
 	sc->gif_ecookie = NULL;
 }
 
 static int
 gif_attach(struct gif_softc *sc, int af)
 {
 
 	sx_assert(&gif_ioctl_sx, SA_XLOCKED);
 	switch (af) {
 #ifdef INET
 	case AF_INET:
 		return (in_gif_attach(sc));
 #endif
 #ifdef INET6
 	case AF_INET6:
 		return (in6_gif_attach(sc));
 #endif
 	}
 	return (EAFNOSUPPORT);
 }
 
 static int
 gif_set_tunnel(struct ifnet *ifp, struct sockaddr *src, struct sockaddr *dst)
 {
 	struct gif_softc *sc = ifp->if_softc;
 	struct gif_softc *tsc;
 #ifdef INET
 	struct ip *ip;
 #endif
 #ifdef INET6
 	struct ip6_hdr *ip6;
 #endif
 	void *hdr;
 	int error = 0;
 
 	if (sc == NULL)
 		return (ENXIO);
 	/* Disallow parallel tunnels unless instructed otherwise. */
 	if (V_parallel_tunnels == 0) {
 		GIF_LIST_LOCK();
 		LIST_FOREACH(tsc, &V_gif_softc_list, gif_list) {
 			if (tsc == sc || tsc->gif_family != src->sa_family)
 				continue;
 #ifdef INET
 			if (tsc->gif_family == AF_INET &&
 			    tsc->gif_iphdr->ip_src.s_addr ==
 			    satosin(src)->sin_addr.s_addr &&
 			    tsc->gif_iphdr->ip_dst.s_addr ==
 			    satosin(dst)->sin_addr.s_addr) {
 				error = EADDRNOTAVAIL;
 				GIF_LIST_UNLOCK();
 				goto bad;
 			}
 #endif
 #ifdef INET6
 			if (tsc->gif_family == AF_INET6 &&
 			    IN6_ARE_ADDR_EQUAL(&tsc->gif_ip6hdr->ip6_src,
 			    &satosin6(src)->sin6_addr) &&
 			    IN6_ARE_ADDR_EQUAL(&tsc->gif_ip6hdr->ip6_dst,
 			    &satosin6(dst)->sin6_addr)) {
 				error = EADDRNOTAVAIL;
 				GIF_LIST_UNLOCK();
 				goto bad;
 			}
 #endif
 		}
 		GIF_LIST_UNLOCK();
 	}
 	switch (src->sa_family) {
 #ifdef INET
 	case AF_INET:
 		hdr = ip = malloc(sizeof(struct ip), M_GIF,
 		    M_WAITOK | M_ZERO);
 		ip->ip_src.s_addr = satosin(src)->sin_addr.s_addr;
 		ip->ip_dst.s_addr = satosin(dst)->sin_addr.s_addr;
 		break;
 #endif
 #ifdef INET6
 	case AF_INET6:
 		hdr = ip6 = malloc(sizeof(struct ip6_hdr), M_GIF,
 		    M_WAITOK | M_ZERO);
 		ip6->ip6_src = satosin6(src)->sin6_addr;
 		ip6->ip6_dst = satosin6(dst)->sin6_addr;
 		ip6->ip6_vfc = IPV6_VERSION;
 		break;
 #endif
 	default:
 		return (EAFNOSUPPORT);
 	}
 
 	if (sc->gif_family != src->sa_family)
-		gif_detach(sc);
+		gif_detach(sc, sc->gif_family);
 	if (sc->gif_family == 0 ||
 	    sc->gif_family != src->sa_family)
 		error = gif_attach(sc, src->sa_family);
 
 	GIF_WLOCK(sc);
 	if (sc->gif_family != 0)
 		free(sc->gif_hdr, M_GIF);
 	sc->gif_family = src->sa_family;
 	sc->gif_hdr = hdr;
 	GIF_WUNLOCK(sc);
 #if defined(INET) || defined(INET6)
 bad:
 #endif
 	if (error == 0 && sc->gif_family != 0) {
 		ifp->if_drv_flags |= IFF_DRV_RUNNING;
 		if_link_state_change(ifp, LINK_STATE_UP);
 	} else {
 		ifp->if_drv_flags &= ~IFF_DRV_RUNNING;
 		if_link_state_change(ifp, LINK_STATE_DOWN);
 	}
 	return (error);
 }
 
 static void
 gif_delete_tunnel(struct ifnet *ifp)
 {
 	struct gif_softc *sc = ifp->if_softc;
 	int family;
 
 	if (sc == NULL)
 		return;
 
 	GIF_WLOCK(sc);
 	family = sc->gif_family;
 	sc->gif_family = 0;
 	GIF_WUNLOCK(sc);
 	if (family != 0) {
-		gif_detach(sc);
+		gif_detach(sc, family);
 		free(sc->gif_hdr, M_GIF);
 	}
 	ifp->if_drv_flags &= ~IFF_DRV_RUNNING;
 	if_link_state_change(ifp, LINK_STATE_DOWN);
 }
diff --git a/sys/net/if_gre.c b/sys/net/if_gre.c
index a3e578e4cad3..2160f350bf9b 100644
--- a/sys/net/if_gre.c
+++ b/sys/net/if_gre.c
@@ -1,999 +1,1006 @@
 /*-
  * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
  *
  * Copyright (c) 1998 The NetBSD Foundation, Inc.
  * Copyright (c) 2014 Andrey V. Elsukov <ae@FreeBSD.org>
  * All rights reserved.
  *
  * This code is derived from software contributed to The NetBSD Foundation
  * by Heiko W.Rupp <hwr@pilhuhn.de>
  *
  * IPv6-over-GRE contributed by Gert Doering <gert@greenie.muc.de>
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
  * POSSIBILITY OF SUCH DAMAGE.
  *
  * $NetBSD: if_gre.c,v 1.49 2003/12/11 00:22:29 itojun Exp $
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include "opt_inet.h"
 #include "opt_inet6.h"
 
 #include <sys/param.h>
 #include <sys/jail.h>
 #include <sys/kernel.h>
 #include <sys/lock.h>
 #include <sys/libkern.h>
 #include <sys/malloc.h>
 #include <sys/module.h>
 #include <sys/mbuf.h>
 #include <sys/priv.h>
 #include <sys/proc.h>
 #include <sys/protosw.h>
 #include <sys/rmlock.h>
 #include <sys/socket.h>
 #include <sys/sockio.h>
 #include <sys/sx.h>
 #include <sys/sysctl.h>
 #include <sys/syslog.h>
 #include <sys/systm.h>
 
 #include <net/ethernet.h>
 #include <net/if.h>
 #include <net/if_var.h>
 #include <net/if_clone.h>
 #include <net/if_types.h>
 #include <net/netisr.h>
 #include <net/vnet.h>
 #include <net/route.h>
 
 #include <netinet/in.h>
 #ifdef INET
 #include <netinet/in_systm.h>
 #include <netinet/in_var.h>
 #include <netinet/ip.h>
 #include <netinet/ip_var.h>
 #endif
 
 #ifdef INET6
 #include <netinet/ip6.h>
 #include <netinet6/in6_var.h>
 #include <netinet6/ip6_var.h>
 #include <netinet6/scope6_var.h>
 #endif
 
 #include <netinet/ip_encap.h>
 #include <net/bpf.h>
 #include <net/if_gre.h>
 
 #include <machine/in_cksum.h>
 #include <security/mac/mac_framework.h>
 
 #define	GREMTU			1476
 static const char grename[] = "gre";
 static MALLOC_DEFINE(M_GRE, grename, "Generic Routing Encapsulation");
 static VNET_DEFINE(struct mtx, gre_mtx);
 #define	V_gre_mtx	VNET(gre_mtx)
 #define	GRE_LIST_LOCK_INIT(x)		mtx_init(&V_gre_mtx, "gre_mtx", NULL, \
 					    MTX_DEF)
 #define	GRE_LIST_LOCK_DESTROY(x)	mtx_destroy(&V_gre_mtx)
 #define	GRE_LIST_LOCK(x)		mtx_lock(&V_gre_mtx)
 #define	GRE_LIST_UNLOCK(x)		mtx_unlock(&V_gre_mtx)
 
 static VNET_DEFINE(LIST_HEAD(, gre_softc), gre_softc_list);
 #define	V_gre_softc_list	VNET(gre_softc_list)
 static struct sx gre_ioctl_sx;
 SX_SYSINIT(gre_ioctl_sx, &gre_ioctl_sx, "gre_ioctl");
 
 static int	gre_clone_create(struct if_clone *, int, caddr_t);
 static void	gre_clone_destroy(struct ifnet *);
 static VNET_DEFINE(struct if_clone *, gre_cloner);
 #define	V_gre_cloner	VNET(gre_cloner)
 
 static void	gre_qflush(struct ifnet *);
 static int	gre_transmit(struct ifnet *, struct mbuf *);
 static int	gre_ioctl(struct ifnet *, u_long, caddr_t);
 static int	gre_output(struct ifnet *, struct mbuf *,
 		    const struct sockaddr *, struct route *);
 
 static void	gre_updatehdr(struct gre_softc *);
 static int	gre_set_tunnel(struct ifnet *, struct sockaddr *,
     struct sockaddr *);
 static void	gre_delete_tunnel(struct ifnet *);
 
 SYSCTL_DECL(_net_link);
 static SYSCTL_NODE(_net_link, IFT_TUNNEL, gre, CTLFLAG_RW, 0,
     "Generic Routing Encapsulation");
 #ifndef MAX_GRE_NEST
 /*
  * This macro controls the default upper limitation on nesting of gre tunnels.
  * Since, setting a large value to this macro with a careless configuration
  * may introduce system crash, we don't allow any nestings by default.
  * If you need to configure nested gre tunnels, you can define this macro
  * in your kernel configuration file.  However, if you do so, please be
  * careful to configure the tunnels so that it won't make a loop.
  */
 #define MAX_GRE_NEST 1
 #endif
 
 static VNET_DEFINE(int, max_gre_nesting) = MAX_GRE_NEST;
 #define	V_max_gre_nesting	VNET(max_gre_nesting)
 SYSCTL_INT(_net_link_gre, OID_AUTO, max_nesting, CTLFLAG_RW | CTLFLAG_VNET,
     &VNET_NAME(max_gre_nesting), 0, "Max nested tunnels");
 
 static void
 vnet_gre_init(const void *unused __unused)
 {
 	LIST_INIT(&V_gre_softc_list);
 	GRE_LIST_LOCK_INIT();
 	V_gre_cloner = if_clone_simple(grename, gre_clone_create,
 	    gre_clone_destroy, 0);
 }
 VNET_SYSINIT(vnet_gre_init, SI_SUB_PROTO_IFATTACHDOMAIN, SI_ORDER_ANY,
     vnet_gre_init, NULL);
 
 static void
 vnet_gre_uninit(const void *unused __unused)
 {
 
 	if_clone_detach(V_gre_cloner);
 	GRE_LIST_LOCK_DESTROY();
 }
 VNET_SYSUNINIT(vnet_gre_uninit, SI_SUB_PROTO_IFATTACHDOMAIN, SI_ORDER_ANY,
     vnet_gre_uninit, NULL);
 
 static int
 gre_clone_create(struct if_clone *ifc, int unit, caddr_t params)
 {
 	struct gre_softc *sc;
 
 	sc = malloc(sizeof(struct gre_softc), M_GRE, M_WAITOK | M_ZERO);
 	sc->gre_fibnum = curthread->td_proc->p_fibnum;
 	GRE2IFP(sc) = if_alloc(IFT_TUNNEL);
 	GRE_LOCK_INIT(sc);
 	GRE2IFP(sc)->if_softc = sc;
 	if_initname(GRE2IFP(sc), grename, unit);
 
 	GRE2IFP(sc)->if_mtu = GREMTU;
 	GRE2IFP(sc)->if_flags = IFF_POINTOPOINT|IFF_MULTICAST;
 	GRE2IFP(sc)->if_output = gre_output;
 	GRE2IFP(sc)->if_ioctl = gre_ioctl;
 	GRE2IFP(sc)->if_transmit = gre_transmit;
 	GRE2IFP(sc)->if_qflush = gre_qflush;
 	GRE2IFP(sc)->if_capabilities |= IFCAP_LINKSTATE;
 	GRE2IFP(sc)->if_capenable |= IFCAP_LINKSTATE;
 	if_attach(GRE2IFP(sc));
 	bpfattach(GRE2IFP(sc), DLT_NULL, sizeof(u_int32_t));
 	GRE_LIST_LOCK();
 	LIST_INSERT_HEAD(&V_gre_softc_list, sc, gre_list);
 	GRE_LIST_UNLOCK();
 	return (0);
 }
 
 static void
 gre_clone_destroy(struct ifnet *ifp)
 {
 	struct gre_softc *sc;
 
 	sx_xlock(&gre_ioctl_sx);
 	sc = ifp->if_softc;
 	gre_delete_tunnel(ifp);
 	GRE_LIST_LOCK();
 	LIST_REMOVE(sc, gre_list);
 	GRE_LIST_UNLOCK();
 	bpfdetach(ifp);
 	if_detach(ifp);
 	ifp->if_softc = NULL;
 	sx_xunlock(&gre_ioctl_sx);
 
 	if_free(ifp);
 	GRE_LOCK_DESTROY(sc);
 	free(sc, M_GRE);
 }
 
 static int
 gre_ioctl(struct ifnet *ifp, u_long cmd, caddr_t data)
 {
 	GRE_RLOCK_TRACKER;
 	struct ifreq *ifr = (struct ifreq *)data;
 	struct sockaddr *src, *dst;
 	struct gre_softc *sc;
 #ifdef INET
 	struct sockaddr_in *sin = NULL;
 #endif
 #ifdef INET6
 	struct sockaddr_in6 *sin6 = NULL;
 #endif
 	uint32_t opt;
 	int error;
 
 	switch (cmd) {
 	case SIOCSIFMTU:
 		 /* XXX: */
 		if (ifr->ifr_mtu < 576)
 			return (EINVAL);
 		ifp->if_mtu = ifr->ifr_mtu;
 		return (0);
 	case SIOCSIFADDR:
 		ifp->if_flags |= IFF_UP;
 	case SIOCSIFFLAGS:
 	case SIOCADDMULTI:
 	case SIOCDELMULTI:
 		return (0);
 	case GRESADDRS:
 	case GRESADDRD:
 	case GREGADDRS:
 	case GREGADDRD:
 	case GRESPROTO:
 	case GREGPROTO:
 		return (EOPNOTSUPP);
 	}
 	src = dst = NULL;
 	sx_xlock(&gre_ioctl_sx);
 	sc = ifp->if_softc;
 	if (sc == NULL) {
 		error = ENXIO;
 		goto end;
 	}
 	error = 0;
 	switch (cmd) {
 	case SIOCSIFPHYADDR:
 #ifdef INET6
 	case SIOCSIFPHYADDR_IN6:
 #endif
 		error = EINVAL;
 		switch (cmd) {
 #ifdef INET
 		case SIOCSIFPHYADDR:
 			src = (struct sockaddr *)
 				&(((struct in_aliasreq *)data)->ifra_addr);
 			dst = (struct sockaddr *)
 				&(((struct in_aliasreq *)data)->ifra_dstaddr);
 			break;
 #endif
 #ifdef INET6
 		case SIOCSIFPHYADDR_IN6:
 			src = (struct sockaddr *)
 				&(((struct in6_aliasreq *)data)->ifra_addr);
 			dst = (struct sockaddr *)
 				&(((struct in6_aliasreq *)data)->ifra_dstaddr);
 			break;
 #endif
 		default:
 			error = EAFNOSUPPORT;
 			goto end;
 		}
 		/* sa_family must be equal */
 		if (src->sa_family != dst->sa_family ||
 		    src->sa_len != dst->sa_len)
 			goto end;
 
 		/* validate sa_len */
 		switch (src->sa_family) {
 #ifdef INET
 		case AF_INET:
 			if (src->sa_len != sizeof(struct sockaddr_in))
 				goto end;
 			break;
 #endif
 #ifdef INET6
 		case AF_INET6:
 			if (src->sa_len != sizeof(struct sockaddr_in6))
 				goto end;
 			break;
 #endif
 		default:
 			error = EAFNOSUPPORT;
 			goto end;
 		}
 		/* check sa_family looks sane for the cmd */
 		error = EAFNOSUPPORT;
 		switch (cmd) {
 #ifdef INET
 		case SIOCSIFPHYADDR:
 			if (src->sa_family == AF_INET)
 				break;
 			goto end;
 #endif
 #ifdef INET6
 		case SIOCSIFPHYADDR_IN6:
 			if (src->sa_family == AF_INET6)
 				break;
 			goto end;
 #endif
 		}
 		error = EADDRNOTAVAIL;
 		switch (src->sa_family) {
 #ifdef INET
 		case AF_INET:
 			if (satosin(src)->sin_addr.s_addr == INADDR_ANY ||
 			    satosin(dst)->sin_addr.s_addr == INADDR_ANY)
 				goto end;
 			break;
 #endif
 #ifdef INET6
 		case AF_INET6:
 			if (IN6_IS_ADDR_UNSPECIFIED(&satosin6(src)->sin6_addr)
 			    ||
 			    IN6_IS_ADDR_UNSPECIFIED(&satosin6(dst)->sin6_addr))
 				goto end;
 			/*
 			 * Check validity of the scope zone ID of the
 			 * addresses, and convert it into the kernel
 			 * internal form if necessary.
 			 */
 			error = sa6_embedscope(satosin6(src), 0);
 			if (error != 0)
 				goto end;
 			error = sa6_embedscope(satosin6(dst), 0);
 			if (error != 0)
 				goto end;
 #endif
 		}
 		error = gre_set_tunnel(ifp, src, dst);
 		break;
 	case SIOCDIFPHYADDR:
 		gre_delete_tunnel(ifp);
 		break;
 	case SIOCGIFPSRCADDR:
 	case SIOCGIFPDSTADDR:
 #ifdef INET6
 	case SIOCGIFPSRCADDR_IN6:
 	case SIOCGIFPDSTADDR_IN6:
 #endif
 		if (sc->gre_family == 0) {
 			error = EADDRNOTAVAIL;
 			break;
 		}
 		GRE_RLOCK(sc);
 		switch (cmd) {
 #ifdef INET
 		case SIOCGIFPSRCADDR:
 		case SIOCGIFPDSTADDR:
 			if (sc->gre_family != AF_INET) {
 				error = EADDRNOTAVAIL;
 				break;
 			}
 			sin = (struct sockaddr_in *)&ifr->ifr_addr;
 			memset(sin, 0, sizeof(*sin));
 			sin->sin_family = AF_INET;
 			sin->sin_len = sizeof(*sin);
 			break;
 #endif
 #ifdef INET6
 		case SIOCGIFPSRCADDR_IN6:
 		case SIOCGIFPDSTADDR_IN6:
 			if (sc->gre_family != AF_INET6) {
 				error = EADDRNOTAVAIL;
 				break;
 			}
 			sin6 = (struct sockaddr_in6 *)
 				&(((struct in6_ifreq *)data)->ifr_addr);
 			memset(sin6, 0, sizeof(*sin6));
 			sin6->sin6_family = AF_INET6;
 			sin6->sin6_len = sizeof(*sin6);
 			break;
 #endif
 		}
 		if (error == 0) {
 			switch (cmd) {
 #ifdef INET
 			case SIOCGIFPSRCADDR:
 				sin->sin_addr = sc->gre_oip.ip_src;
 				break;
 			case SIOCGIFPDSTADDR:
 				sin->sin_addr = sc->gre_oip.ip_dst;
 				break;
 #endif
 #ifdef INET6
 			case SIOCGIFPSRCADDR_IN6:
 				sin6->sin6_addr = sc->gre_oip6.ip6_src;
 				break;
 			case SIOCGIFPDSTADDR_IN6:
 				sin6->sin6_addr = sc->gre_oip6.ip6_dst;
 				break;
 #endif
 			}
 		}
 		GRE_RUNLOCK(sc);
 		if (error != 0)
 			break;
 		switch (cmd) {
 #ifdef INET
 		case SIOCGIFPSRCADDR:
 		case SIOCGIFPDSTADDR:
 			error = prison_if(curthread->td_ucred,
 			    (struct sockaddr *)sin);
 			if (error != 0)
 				memset(sin, 0, sizeof(*sin));
 			break;
 #endif
 #ifdef INET6
 		case SIOCGIFPSRCADDR_IN6:
 		case SIOCGIFPDSTADDR_IN6:
 			error = prison_if(curthread->td_ucred,
 			    (struct sockaddr *)sin6);
 			if (error == 0)
 				error = sa6_recoverscope(sin6);
 			if (error != 0)
 				memset(sin6, 0, sizeof(*sin6));
 #endif
 		}
 		break;
 	case SIOCGTUNFIB:
 		ifr->ifr_fib = sc->gre_fibnum;
 		break;
 	case SIOCSTUNFIB:
 		if ((error = priv_check(curthread, PRIV_NET_GRE)) != 0)
 			break;
 		if (ifr->ifr_fib >= rt_numfibs)
 			error = EINVAL;
 		else
 			sc->gre_fibnum = ifr->ifr_fib;
 		break;
 	case GRESKEY:
 		if ((error = priv_check(curthread, PRIV_NET_GRE)) != 0)
 			break;
 		if ((error = copyin(ifr_data_get_ptr(ifr), &opt,
 		    sizeof(opt))) != 0)
 			break;
 		if (sc->gre_key != opt) {
 			GRE_WLOCK(sc);
 			sc->gre_key = opt;
 			gre_updatehdr(sc);
 			GRE_WUNLOCK(sc);
 		}
 		break;
 	case GREGKEY:
 		error = copyout(&sc->gre_key, ifr_data_get_ptr(ifr),
 		    sizeof(sc->gre_key));
 		break;
 	case GRESOPTS:
 		if ((error = priv_check(curthread, PRIV_NET_GRE)) != 0)
 			break;
 		if ((error = copyin(ifr_data_get_ptr(ifr), &opt,
 		    sizeof(opt))) != 0)
 			break;
 		if (opt & ~GRE_OPTMASK)
 			error = EINVAL;
 		else {
 			if (sc->gre_options != opt) {
 				GRE_WLOCK(sc);
 				sc->gre_options = opt;
 				gre_updatehdr(sc);
 				GRE_WUNLOCK(sc);
 			}
 		}
 		break;
 
 	case GREGOPTS:
 		error = copyout(&sc->gre_options, ifr_data_get_ptr(ifr),
 		    sizeof(sc->gre_options));
 		break;
 	default:
 		error = EINVAL;
 		break;
 	}
 end:
 	sx_xunlock(&gre_ioctl_sx);
 	return (error);
 }
 
 static void
 gre_updatehdr(struct gre_softc *sc)
 {
 	struct grehdr *gh = NULL;
 	uint32_t *opts;
 	uint16_t flags;
 
 	GRE_WLOCK_ASSERT(sc);
 	switch (sc->gre_family) {
 #ifdef INET
 	case AF_INET:
 		sc->gre_hlen = sizeof(struct greip);
 		sc->gre_oip.ip_v = IPPROTO_IPV4;
 		sc->gre_oip.ip_hl = sizeof(struct ip) >> 2;
 		sc->gre_oip.ip_p = IPPROTO_GRE;
 		gh = &sc->gre_gihdr->gi_gre;
 		break;
 #endif
 #ifdef INET6
 	case AF_INET6:
 		sc->gre_hlen = sizeof(struct greip6);
 		sc->gre_oip6.ip6_vfc = IPV6_VERSION;
 		sc->gre_oip6.ip6_nxt = IPPROTO_GRE;
 		gh = &sc->gre_gi6hdr->gi6_gre;
 		break;
 #endif
 	default:
 		return;
 	}
 	flags = 0;
 	opts = gh->gre_opts;
 	if (sc->gre_options & GRE_ENABLE_CSUM) {
 		flags |= GRE_FLAGS_CP;
 		sc->gre_hlen += 2 * sizeof(uint16_t);
 		*opts++ = 0;
 	}
 	if (sc->gre_key != 0) {
 		flags |= GRE_FLAGS_KP;
 		sc->gre_hlen += sizeof(uint32_t);
 		*opts++ = htonl(sc->gre_key);
 	}
 	if (sc->gre_options & GRE_ENABLE_SEQ) {
 		flags |= GRE_FLAGS_SP;
 		sc->gre_hlen += sizeof(uint32_t);
 		*opts++ = 0;
 	} else
 		sc->gre_oseq = 0;
 	gh->gre_flags = htons(flags);
 }
 
 static void
-gre_detach(struct gre_softc *sc)
+gre_detach(struct gre_softc *sc, int family)
 {
 
 	sx_assert(&gre_ioctl_sx, SA_XLOCKED);
-	if (sc->gre_ecookie != NULL)
-		encap_detach(sc->gre_ecookie);
+	if (sc->gre_ecookie != NULL) {
+		switch (family) {
+#ifdef INET
+		case AF_INET:
+			ip_encap_detach(sc->gre_ecookie);
+			break;
+#endif
+#ifdef INET6
+		case AF_INET6:
+			ip6_encap_detach(sc->gre_ecookie);
+			break;
+#endif
+		}
+	}
 	sc->gre_ecookie = NULL;
 }
 
 static int
 gre_set_tunnel(struct ifnet *ifp, struct sockaddr *src,
     struct sockaddr *dst)
 {
 	struct gre_softc *sc, *tsc;
 #ifdef INET6
 	struct ip6_hdr *ip6;
 #endif
 #ifdef INET
 	struct ip *ip;
 #endif
 	void *hdr;
 	int error;
 
 	sx_assert(&gre_ioctl_sx, SA_XLOCKED);
 	GRE_LIST_LOCK();
 	sc = ifp->if_softc;
 	LIST_FOREACH(tsc, &V_gre_softc_list, gre_list) {
 		if (tsc == sc || tsc->gre_family != src->sa_family)
 			continue;
 #ifdef INET
 		if (tsc->gre_family == AF_INET &&
 		    tsc->gre_oip.ip_src.s_addr ==
 		    satosin(src)->sin_addr.s_addr &&
 		    tsc->gre_oip.ip_dst.s_addr ==
 		    satosin(dst)->sin_addr.s_addr) {
 			GRE_LIST_UNLOCK();
 			return (EADDRNOTAVAIL);
 		}
 #endif
 #ifdef INET6
 		if (tsc->gre_family == AF_INET6 &&
 		    IN6_ARE_ADDR_EQUAL(&tsc->gre_oip6.ip6_src,
 		    &satosin6(src)->sin6_addr) &&
 		    IN6_ARE_ADDR_EQUAL(&tsc->gre_oip6.ip6_dst,
 			&satosin6(dst)->sin6_addr)) {
 			GRE_LIST_UNLOCK();
 			return (EADDRNOTAVAIL);
 		}
 #endif
 	}
 	GRE_LIST_UNLOCK();
 
 	switch (src->sa_family) {
 #ifdef INET
 	case AF_INET:
 		hdr = ip = malloc(sizeof(struct greip) +
 		    3 * sizeof(uint32_t), M_GRE, M_WAITOK | M_ZERO);
 		ip->ip_src = satosin(src)->sin_addr;
 		ip->ip_dst = satosin(dst)->sin_addr;
 		break;
 #endif
 #ifdef INET6
 	case AF_INET6:
 		hdr = ip6 = malloc(sizeof(struct greip6) +
 		    3 * sizeof(uint32_t), M_GRE, M_WAITOK | M_ZERO);
 		ip6->ip6_src = satosin6(src)->sin6_addr;
 		ip6->ip6_dst = satosin6(dst)->sin6_addr;
 		break;
 #endif
 	default:
 		return (EAFNOSUPPORT);
 	}
 	if (sc->gre_family != 0)
-		gre_detach(sc);
+		gre_detach(sc, sc->gre_family);
 	GRE_WLOCK(sc);
 	if (sc->gre_family != 0)
 		free(sc->gre_hdr, M_GRE);
 	sc->gre_family = src->sa_family;
 	sc->gre_hdr = hdr;
 	sc->gre_oseq = 0;
 	sc->gre_iseq = UINT32_MAX;
 	gre_updatehdr(sc);
 	GRE_WUNLOCK(sc);
 
 	error = 0;
 	switch (src->sa_family) {
 #ifdef INET
 	case AF_INET:
 		error = in_gre_attach(sc);
 		break;
 #endif
 #ifdef INET6
 	case AF_INET6:
 		error = in6_gre_attach(sc);
 		break;
 #endif
 	}
 	if (error == 0) {
 		ifp->if_drv_flags |= IFF_DRV_RUNNING;
 		if_link_state_change(ifp, LINK_STATE_UP);
 	}
 	return (error);
 }
 
 static void
 gre_delete_tunnel(struct ifnet *ifp)
 {
 	struct gre_softc *sc = ifp->if_softc;
 	int family;
 
 	GRE_WLOCK(sc);
 	family = sc->gre_family;
 	sc->gre_family = 0;
 	GRE_WUNLOCK(sc);
 	if (family != 0) {
-		gre_detach(sc);
+		gre_detach(sc, family);
 		free(sc->gre_hdr, M_GRE);
 	}
 	ifp->if_drv_flags &= ~IFF_DRV_RUNNING;
 	if_link_state_change(ifp, LINK_STATE_DOWN);
 }
 
 int
-gre_input(struct mbuf **mp, int *offp, int proto)
+gre_input(struct mbuf *m, int off, int proto, void *arg)
 {
-	struct gre_softc *sc;
+	struct gre_softc *sc = arg;
 	struct grehdr *gh;
 	struct ifnet *ifp;
-	struct mbuf *m;
 	uint32_t *opts;
 #ifdef notyet
 	uint32_t key;
 #endif
 	uint16_t flags;
 	int hlen, isr, af;
 
-	m = *mp;
-	sc = encap_getarg(m);
-	KASSERT(sc != NULL, ("encap_getarg returned NULL"));
-
 	ifp = GRE2IFP(sc);
-	hlen = *offp + sizeof(struct grehdr) + 4 * sizeof(uint32_t);
+	hlen = off + sizeof(struct grehdr) + 4 * sizeof(uint32_t);
 	if (m->m_pkthdr.len < hlen)
 		goto drop;
 	if (m->m_len < hlen) {
 		m = m_pullup(m, hlen);
 		if (m == NULL)
 			goto drop;
 	}
-	gh = (struct grehdr *)mtodo(m, *offp);
+	gh = (struct grehdr *)mtodo(m, off);
 	flags = ntohs(gh->gre_flags);
 	if (flags & ~GRE_FLAGS_MASK)
 		goto drop;
 	opts = gh->gre_opts;
 	hlen = 2 * sizeof(uint16_t);
 	if (flags & GRE_FLAGS_CP) {
 		/* reserved1 field must be zero */
 		if (((uint16_t *)opts)[1] != 0)
 			goto drop;
-		if (in_cksum_skip(m, m->m_pkthdr.len, *offp) != 0)
+		if (in_cksum_skip(m, m->m_pkthdr.len, off) != 0)
 			goto drop;
 		hlen += 2 * sizeof(uint16_t);
 		opts++;
 	}
 	if (flags & GRE_FLAGS_KP) {
 #ifdef notyet
         /* 
          * XXX: The current implementation uses the key only for outgoing
          * packets. But we can check the key value here, or even in the
          * encapcheck function.
          */
 		key = ntohl(*opts);
 #endif
 		hlen += sizeof(uint32_t);
 		opts++;
     }
 #ifdef notyet
 	} else
 		key = 0;
 
 	if (sc->gre_key != 0 && (key != sc->gre_key || key != 0))
 		goto drop;
 #endif
 	if (flags & GRE_FLAGS_SP) {
 #ifdef notyet
 		seq = ntohl(*opts);
 #endif
 		hlen += sizeof(uint32_t);
 	}
 	switch (ntohs(gh->gre_proto)) {
 	case ETHERTYPE_WCCP:
 		/*
 		 * For WCCP skip an additional 4 bytes if after GRE header
 		 * doesn't follow an IP header.
 		 */
 		if (flags == 0 && (*(uint8_t *)gh->gre_opts & 0xF0) != 0x40)
 			hlen += sizeof(uint32_t);
 		/* FALLTHROUGH */
 	case ETHERTYPE_IP:
 		isr = NETISR_IP;
 		af = AF_INET;
 		break;
 	case ETHERTYPE_IPV6:
 		isr = NETISR_IPV6;
 		af = AF_INET6;
 		break;
 	default:
 		goto drop;
 	}
-	m_adj(m, *offp + hlen);
+	m_adj(m, off + hlen);
 	m_clrprotoflags(m);
 	m->m_pkthdr.rcvif = ifp;
 	M_SETFIB(m, ifp->if_fib);
 #ifdef MAC
 	mac_ifnet_create_mbuf(ifp, m);
 #endif
 	BPF_MTAP2(ifp, &af, sizeof(af), m);
 	if_inc_counter(ifp, IFCOUNTER_IPACKETS, 1);
 	if_inc_counter(ifp, IFCOUNTER_IBYTES, m->m_pkthdr.len);
 	if ((ifp->if_flags & IFF_MONITOR) != 0)
 		m_freem(m);
 	else
 		netisr_dispatch(isr, m);
 	return (IPPROTO_DONE);
 drop:
 	if_inc_counter(ifp, IFCOUNTER_IERRORS, 1);
 	m_freem(m);
 	return (IPPROTO_DONE);
 }
 
 #define	MTAG_GRE	1307983903
 static int
 gre_check_nesting(struct ifnet *ifp, struct mbuf *m)
 {
 	struct m_tag *mtag;
 	int count;
 
 	count = 1;
 	mtag = NULL;
 	while ((mtag = m_tag_locate(m, MTAG_GRE, 0, mtag)) != NULL) {
 		if (*(struct ifnet **)(mtag + 1) == ifp) {
 			log(LOG_NOTICE, "%s: loop detected\n", ifp->if_xname);
 			return (EIO);
 		}
 		count++;
 	}
 	if (count > V_max_gre_nesting) {
 		log(LOG_NOTICE,
 		    "%s: if_output recursively called too many times(%d)\n",
 		    ifp->if_xname, count);
 		return (EIO);
 	}
 	mtag = m_tag_alloc(MTAG_GRE, 0, sizeof(struct ifnet *), M_NOWAIT);
 	if (mtag == NULL)
 		return (ENOMEM);
 	*(struct ifnet **)(mtag + 1) = ifp;
 	m_tag_prepend(m, mtag);
 	return (0);
 }
 
 static int
 gre_output(struct ifnet *ifp, struct mbuf *m, const struct sockaddr *dst,
    struct route *ro)
 {
 	uint32_t af;
 	int error;
 
 #ifdef MAC
 	error = mac_ifnet_check_transmit(ifp, m);
 	if (error != 0)
 		goto drop;
 #endif
 	if ((ifp->if_flags & IFF_MONITOR) != 0 ||
 	    (ifp->if_flags & IFF_UP) == 0) {
 		error = ENETDOWN;
 		goto drop;
 	}
 
 	error = gre_check_nesting(ifp, m);
 	if (error != 0)
 		goto drop;
 
 	m->m_flags &= ~(M_BCAST|M_MCAST);
 	if (dst->sa_family == AF_UNSPEC)
 		bcopy(dst->sa_data, &af, sizeof(af));
 	else
 		af = dst->sa_family;
 	BPF_MTAP2(ifp, &af, sizeof(af), m);
 	m->m_pkthdr.csum_data = af;	/* save af for if_transmit */
 	return (ifp->if_transmit(ifp, m));
 drop:
 	m_freem(m);
 	if_inc_counter(ifp, IFCOUNTER_OERRORS, 1);
 	return (error);
 }
 
 static void
 gre_setseqn(struct grehdr *gh, uint32_t seq)
 {
 	uint32_t *opts;
 	uint16_t flags;
 
 	opts = gh->gre_opts;
 	flags = ntohs(gh->gre_flags);
 	KASSERT((flags & GRE_FLAGS_SP) != 0,
 	    ("gre_setseqn called, but GRE_FLAGS_SP isn't set "));
 	if (flags & GRE_FLAGS_CP)
 		opts++;
 	if (flags & GRE_FLAGS_KP)
 		opts++;
 	*opts = htonl(seq);
 }
 
 static int
 gre_transmit(struct ifnet *ifp, struct mbuf *m)
 {
 	GRE_RLOCK_TRACKER;
 	struct gre_softc *sc;
 	struct grehdr *gh;
 	uint32_t iaf, oaf, oseq;
 	int error, hlen, olen, plen;
 	int want_seq, want_csum;
 
 	plen = 0;
 	sc = ifp->if_softc;
 	if (sc == NULL) {
 		error = ENETDOWN;
 		m_freem(m);
 		goto drop;
 	}
 	GRE_RLOCK(sc);
 	if (sc->gre_family == 0) {
 		GRE_RUNLOCK(sc);
 		error = ENETDOWN;
 		m_freem(m);
 		goto drop;
 	}
 	iaf = m->m_pkthdr.csum_data;
 	oaf = sc->gre_family;
 	hlen = sc->gre_hlen;
 	want_seq = (sc->gre_options & GRE_ENABLE_SEQ) != 0;
 	if (want_seq)
 		oseq = sc->gre_oseq++; /* XXX */
 	else
 		oseq = 0;		/* Make compiler happy. */
 	want_csum = (sc->gre_options & GRE_ENABLE_CSUM) != 0;
 	M_SETFIB(m, sc->gre_fibnum);
 	M_PREPEND(m, hlen, M_NOWAIT);
 	if (m == NULL) {
 		GRE_RUNLOCK(sc);
 		error = ENOBUFS;
 		goto drop;
 	}
 	bcopy(sc->gre_hdr, mtod(m, void *), hlen);
 	GRE_RUNLOCK(sc);
 	switch (oaf) {
 #ifdef INET
 	case AF_INET:
 		olen = sizeof(struct ip);
 		break;
 #endif
 #ifdef INET6
 	case AF_INET6:
 		olen = sizeof(struct ip6_hdr);
 		break;
 #endif
 	default:
 		error = ENETDOWN;
 		goto drop;
 	}
 	gh = (struct grehdr *)mtodo(m, olen);
 	switch (iaf) {
 #ifdef INET
 	case AF_INET:
 		gh->gre_proto = htons(ETHERTYPE_IP);
 		break;
 #endif
 #ifdef INET6
 	case AF_INET6:
 		gh->gre_proto = htons(ETHERTYPE_IPV6);
 		break;
 #endif
 	default:
 		error = ENETDOWN;
 		goto drop;
 	}
 	if (want_seq)
 		gre_setseqn(gh, oseq);
 	if (want_csum) {
 		*(uint16_t *)gh->gre_opts = in_cksum_skip(m,
 		    m->m_pkthdr.len, olen);
 	}
 	plen = m->m_pkthdr.len - hlen;
 	switch (oaf) {
 #ifdef INET
 	case AF_INET:
 		error = in_gre_output(m, iaf, hlen);
 		break;
 #endif
 #ifdef INET6
 	case AF_INET6:
 		error = in6_gre_output(m, iaf, hlen);
 		break;
 #endif
 	default:
 		m_freem(m);
 		error = ENETDOWN;
 	}
 drop:
 	if (error)
 		if_inc_counter(ifp, IFCOUNTER_OERRORS, 1);
 	else {
 		if_inc_counter(ifp, IFCOUNTER_OPACKETS, 1);
 		if_inc_counter(ifp, IFCOUNTER_OBYTES, plen);
 	}
 	return (error);
 }
 
 static void
 gre_qflush(struct ifnet *ifp __unused)
 {
 
 }
 
 static int
 gremodevent(module_t mod, int type, void *data)
 {
 
 	switch (type) {
 	case MOD_LOAD:
 	case MOD_UNLOAD:
 		break;
 	default:
 		return (EOPNOTSUPP);
 	}
 	return (0);
 }
 
 static moduledata_t gre_mod = {
 	"if_gre",
 	gremodevent,
 	0
 };
 
 DECLARE_MODULE(if_gre, gre_mod, SI_SUB_PSEUDO, SI_ORDER_ANY);
 MODULE_VERSION(if_gre, 1);
diff --git a/sys/net/if_gre.h b/sys/net/if_gre.h
index 0eac9e9f33b0..1a068d4b1118 100644
--- a/sys/net/if_gre.h
+++ b/sys/net/if_gre.h
@@ -1,137 +1,137 @@
 /*-
  * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
  *
  * Copyright (c) 1998 The NetBSD Foundation, Inc.
  * Copyright (c) 2014 Andrey V. Elsukov <ae@FreeBSD.org>
  * All rights reserved
  *
  * This code is derived from software contributed to The NetBSD Foundation
  * by Heiko W.Rupp <hwr@pilhuhn.de>
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
  * POSSIBILITY OF SUCH DAMAGE.
  *
  * $NetBSD: if_gre.h,v 1.13 2003/11/10 08:51:52 wiz Exp $
  * $FreeBSD$
  */
 
 #ifndef _NET_IF_GRE_H_
 #define _NET_IF_GRE_H_
 
 #ifdef _KERNEL
 /* GRE header according to RFC 2784 and RFC 2890 */
 struct grehdr {
 	uint16_t	gre_flags;	/* GRE flags */
 #define	GRE_FLAGS_CP	0x8000		/* checksum present */
 #define	GRE_FLAGS_KP	0x2000		/* key present */
 #define	GRE_FLAGS_SP	0x1000		/* sequence present */
 #define	GRE_FLAGS_MASK	(GRE_FLAGS_CP|GRE_FLAGS_KP|GRE_FLAGS_SP)
 	uint16_t	gre_proto;	/* protocol type */
 	uint32_t	gre_opts[0];	/* optional fields */
 } __packed;
 
 #ifdef INET
 struct greip {
 	struct ip	gi_ip;
 	struct grehdr	gi_gre;
 } __packed;
 #endif
 
 #ifdef INET6
 struct greip6 {
 	struct ip6_hdr	gi6_ip6;
 	struct grehdr	gi6_gre;
 } __packed;
 #endif
 
 struct gre_softc {
 	struct ifnet		*gre_ifp;
 	LIST_ENTRY(gre_softc)	gre_list;
 	struct rmlock		gre_lock;
 	int			gre_family;	/* AF of delivery header */
 	uint32_t		gre_iseq;
 	uint32_t		gre_oseq;
 	uint32_t		gre_key;
 	uint32_t		gre_options;
 	u_int			gre_fibnum;
 	u_int			gre_hlen;	/* header size */
 	union {
 		void		*hdr;
 #ifdef INET
 		struct greip	*gihdr;
 #endif
 #ifdef INET6
 		struct greip6	*gi6hdr;
 #endif
 	} gre_uhdr;
 	const struct encaptab	*gre_ecookie;
 };
 #define	GRE2IFP(sc)		((sc)->gre_ifp)
 #define	GRE_LOCK_INIT(sc)	rm_init(&(sc)->gre_lock, "gre softc")
 #define	GRE_LOCK_DESTROY(sc)	rm_destroy(&(sc)->gre_lock)
 #define	GRE_RLOCK_TRACKER	struct rm_priotracker gre_tracker
 #define	GRE_RLOCK(sc)		rm_rlock(&(sc)->gre_lock, &gre_tracker)
 #define	GRE_RUNLOCK(sc)		rm_runlock(&(sc)->gre_lock, &gre_tracker)
 #define	GRE_RLOCK_ASSERT(sc)	rm_assert(&(sc)->gre_lock, RA_RLOCKED)
 #define	GRE_WLOCK(sc)		rm_wlock(&(sc)->gre_lock)
 #define	GRE_WUNLOCK(sc)		rm_wunlock(&(sc)->gre_lock)
 #define	GRE_WLOCK_ASSERT(sc)	rm_assert(&(sc)->gre_lock, RA_WLOCKED)
 
 #define	gre_hdr			gre_uhdr.hdr
 #define	gre_gihdr		gre_uhdr.gihdr
 #define	gre_gi6hdr		gre_uhdr.gi6hdr
 #define	gre_oip			gre_gihdr->gi_ip
 #define	gre_oip6		gre_gi6hdr->gi6_ip6
 
-int	gre_input(struct mbuf **, int *, int);
+int	gre_input(struct mbuf *, int, int, void *);
 #ifdef INET
 int	in_gre_attach(struct gre_softc *);
 int	in_gre_output(struct mbuf *, int, int);
 #endif
 #ifdef INET6
 int	in6_gre_attach(struct gre_softc *);
 int	in6_gre_output(struct mbuf *, int, int);
 #endif
 /*
  * CISCO uses special type for GRE tunnel created as part of WCCP
  * connection, while in fact those packets are just IPv4 encapsulated
  * into GRE.
  */
 #define ETHERTYPE_WCCP		0x883E
 #endif /* _KERNEL */
 
 #define GRESADDRS	_IOW('i', 101, struct ifreq)
 #define GRESADDRD	_IOW('i', 102, struct ifreq)
 #define GREGADDRS	_IOWR('i', 103, struct ifreq)
 #define GREGADDRD	_IOWR('i', 104, struct ifreq)
 #define GRESPROTO	_IOW('i' , 105, struct ifreq)
 #define GREGPROTO	_IOWR('i', 106, struct ifreq)
 
 #define	GREGKEY		_IOWR('i', 107, struct ifreq)
 #define	GRESKEY		_IOW('i', 108, struct ifreq)
 #define	GREGOPTS	_IOWR('i', 109, struct ifreq)
 #define	GRESOPTS	_IOW('i', 110, struct ifreq)
 
 #define	GRE_ENABLE_CSUM		0x0001
 #define	GRE_ENABLE_SEQ		0x0002
 #define	GRE_OPTMASK		(GRE_ENABLE_CSUM|GRE_ENABLE_SEQ)
 
 #endif /* _NET_IF_GRE_H_ */
diff --git a/sys/net/if_me.c b/sys/net/if_me.c
index 4ab013bdcce2..806c57eabb4f 100644
--- a/sys/net/if_me.c
+++ b/sys/net/if_me.c
@@ -1,664 +1,652 @@
 /*-
  * Copyright (c) 2014 Andrey V. Elsukov <ae@FreeBSD.org>
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  *
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include <sys/param.h>
 #include <sys/jail.h>
 #include <sys/kernel.h>
 #include <sys/lock.h>
 #include <sys/libkern.h>
 #include <sys/malloc.h>
 #include <sys/module.h>
 #include <sys/mbuf.h>
 #include <sys/priv.h>
 #include <sys/proc.h>
-#include <sys/protosw.h>
 #include <sys/rmlock.h>
 #include <sys/socket.h>
 #include <sys/sockio.h>
 #include <sys/sx.h>
 #include <sys/sysctl.h>
 #include <sys/syslog.h>
 #include <sys/systm.h>
 
 #include <net/bpf.h>
 #include <net/ethernet.h>
 #include <net/if.h>
 #include <net/if_var.h>
 #include <net/if_clone.h>
 #include <net/if_types.h>
 #include <net/netisr.h>
 #include <net/vnet.h>
 #include <net/route.h>
 
 #include <netinet/in.h>
 #include <netinet/in_systm.h>
 #include <netinet/in_var.h>
 #include <netinet/ip.h>
 #include <netinet/ip_var.h>
 #include <netinet/ip_encap.h>
 
 #include <machine/in_cksum.h>
 #include <security/mac/mac_framework.h>
 
 #define	MEMTU			(1500 - sizeof(struct mobhdr))
 static const char mename[] = "me";
 static MALLOC_DEFINE(M_IFME, mename, "Minimal Encapsulation for IP");
 static VNET_DEFINE(struct mtx, me_mtx);
 #define	V_me_mtx	VNET(me_mtx)
 /* Minimal forwarding header RFC 2004 */
 struct mobhdr {
 	uint8_t		mob_proto;	/* protocol */
 	uint8_t		mob_flags;	/* flags */
 #define	MOB_FLAGS_SP	0x80		/* source present */
 	uint16_t	mob_csum;	/* header checksum */
 	struct in_addr	mob_dst;	/* original destination address */
 	struct in_addr	mob_src;	/* original source addr (optional) */
 } __packed;
 
 struct me_softc {
 	struct ifnet		*me_ifp;
 	LIST_ENTRY(me_softc)	me_list;
 	struct rmlock		me_lock;
 	u_int			me_fibnum;
 	const struct encaptab	*me_ecookie;
 	struct in_addr		me_src;
 	struct in_addr		me_dst;
 };
 #define	ME2IFP(sc)		((sc)->me_ifp)
 #define	ME_READY(sc)		((sc)->me_src.s_addr != 0)
 #define	ME_LOCK_INIT(sc)	rm_init(&(sc)->me_lock, "me softc")
 #define	ME_LOCK_DESTROY(sc)	rm_destroy(&(sc)->me_lock)
 #define	ME_RLOCK_TRACKER	struct rm_priotracker me_tracker
 #define	ME_RLOCK(sc)		rm_rlock(&(sc)->me_lock, &me_tracker)
 #define	ME_RUNLOCK(sc)		rm_runlock(&(sc)->me_lock, &me_tracker)
 #define	ME_RLOCK_ASSERT(sc)	rm_assert(&(sc)->me_lock, RA_RLOCKED)
 #define	ME_WLOCK(sc)		rm_wlock(&(sc)->me_lock)
 #define	ME_WUNLOCK(sc)		rm_wunlock(&(sc)->me_lock)
 #define	ME_WLOCK_ASSERT(sc)	rm_assert(&(sc)->me_lock, RA_WLOCKED)
 
 #define	ME_LIST_LOCK_INIT(x)	mtx_init(&V_me_mtx, "me_mtx", NULL, MTX_DEF)
 #define	ME_LIST_LOCK_DESTROY(x)	mtx_destroy(&V_me_mtx)
 #define	ME_LIST_LOCK(x)		mtx_lock(&V_me_mtx)
 #define	ME_LIST_UNLOCK(x)	mtx_unlock(&V_me_mtx)
 
 static VNET_DEFINE(LIST_HEAD(, me_softc), me_softc_list);
 #define	V_me_softc_list	VNET(me_softc_list)
 static struct sx me_ioctl_sx;
 SX_SYSINIT(me_ioctl_sx, &me_ioctl_sx, "me_ioctl");
 
 static int	me_clone_create(struct if_clone *, int, caddr_t);
 static void	me_clone_destroy(struct ifnet *);
 static VNET_DEFINE(struct if_clone *, me_cloner);
 #define	V_me_cloner	VNET(me_cloner)
 
 static void	me_qflush(struct ifnet *);
 static int	me_transmit(struct ifnet *, struct mbuf *);
 static int	me_ioctl(struct ifnet *, u_long, caddr_t);
 static int	me_output(struct ifnet *, struct mbuf *,
 		    const struct sockaddr *, struct route *);
-static int	me_input(struct mbuf **, int *, int);
+static int	me_input(struct mbuf *, int, int, void *);
 
 static int	me_set_tunnel(struct ifnet *, struct sockaddr_in *,
     struct sockaddr_in *);
 static void	me_delete_tunnel(struct ifnet *);
+static int	me_encapcheck(const struct mbuf *, int, int, void *);
+
+#define	ME_MINLEN	(sizeof(struct ip) + sizeof(struct mobhdr) -\
+    sizeof(in_addr_t))
+static const struct encap_config ipv4_encap_cfg = {
+	.proto = IPPROTO_MOBILE,
+	.min_length = ME_MINLEN,
+	.exact_match = (sizeof(in_addr_t) << 4) + 8,
+	.check = me_encapcheck,
+	.input = me_input
+};
 
 SYSCTL_DECL(_net_link);
 static SYSCTL_NODE(_net_link, IFT_TUNNEL, me, CTLFLAG_RW, 0,
     "Minimal Encapsulation for IP (RFC 2004)");
 #ifndef MAX_ME_NEST
 #define MAX_ME_NEST 1
 #endif
 
 static VNET_DEFINE(int, max_me_nesting) = MAX_ME_NEST;
 #define	V_max_me_nesting	VNET(max_me_nesting)
 SYSCTL_INT(_net_link_me, OID_AUTO, max_nesting, CTLFLAG_RW | CTLFLAG_VNET,
     &VNET_NAME(max_me_nesting), 0, "Max nested tunnels");
 
-extern struct domain inetdomain;
-static const struct protosw in_mobile_protosw = {
-	.pr_type =		SOCK_RAW,
-	.pr_domain =		&inetdomain,
-	.pr_protocol =		IPPROTO_MOBILE,
-	.pr_flags =		PR_ATOMIC|PR_ADDR,
-	.pr_input =		me_input,
-	.pr_output =		rip_output,
-	.pr_ctlinput =		rip_ctlinput,
-	.pr_ctloutput =		rip_ctloutput,
-	.pr_usrreqs =		&rip_usrreqs
-};
-
 static void
 vnet_me_init(const void *unused __unused)
 {
 	LIST_INIT(&V_me_softc_list);
 	ME_LIST_LOCK_INIT();
 	V_me_cloner = if_clone_simple(mename, me_clone_create,
 	    me_clone_destroy, 0);
 }
 VNET_SYSINIT(vnet_me_init, SI_SUB_PROTO_IFATTACHDOMAIN, SI_ORDER_ANY,
     vnet_me_init, NULL);
 
 static void
 vnet_me_uninit(const void *unused __unused)
 {
 
 	if_clone_detach(V_me_cloner);
 	ME_LIST_LOCK_DESTROY();
 }
 VNET_SYSUNINIT(vnet_me_uninit, SI_SUB_PROTO_IFATTACHDOMAIN, SI_ORDER_ANY,
     vnet_me_uninit, NULL);
 
 static int
 me_clone_create(struct if_clone *ifc, int unit, caddr_t params)
 {
 	struct me_softc *sc;
 
 	sc = malloc(sizeof(struct me_softc), M_IFME, M_WAITOK | M_ZERO);
 	sc->me_fibnum = curthread->td_proc->p_fibnum;
 	ME2IFP(sc) = if_alloc(IFT_TUNNEL);
 	ME_LOCK_INIT(sc);
 	ME2IFP(sc)->if_softc = sc;
 	if_initname(ME2IFP(sc), mename, unit);
 
 	ME2IFP(sc)->if_mtu = MEMTU;;
 	ME2IFP(sc)->if_flags = IFF_POINTOPOINT|IFF_MULTICAST;
 	ME2IFP(sc)->if_output = me_output;
 	ME2IFP(sc)->if_ioctl = me_ioctl;
 	ME2IFP(sc)->if_transmit = me_transmit;
 	ME2IFP(sc)->if_qflush = me_qflush;
 	ME2IFP(sc)->if_capabilities |= IFCAP_LINKSTATE;
 	ME2IFP(sc)->if_capenable |= IFCAP_LINKSTATE;
 	if_attach(ME2IFP(sc));
 	bpfattach(ME2IFP(sc), DLT_NULL, sizeof(u_int32_t));
 	ME_LIST_LOCK();
 	LIST_INSERT_HEAD(&V_me_softc_list, sc, me_list);
 	ME_LIST_UNLOCK();
 	return (0);
 }
 
 static void
 me_clone_destroy(struct ifnet *ifp)
 {
 	struct me_softc *sc;
 
 	sx_xlock(&me_ioctl_sx);
 	sc = ifp->if_softc;
 	me_delete_tunnel(ifp);
 	ME_LIST_LOCK();
 	LIST_REMOVE(sc, me_list);
 	ME_LIST_UNLOCK();
 	bpfdetach(ifp);
 	if_detach(ifp);
 	ifp->if_softc = NULL;
 	sx_xunlock(&me_ioctl_sx);
 
 	if_free(ifp);
 	ME_LOCK_DESTROY(sc);
 	free(sc, M_IFME);
 }
 
 static int
 me_ioctl(struct ifnet *ifp, u_long cmd, caddr_t data)
 {
 	ME_RLOCK_TRACKER;
 	struct ifreq *ifr = (struct ifreq *)data;
 	struct sockaddr_in *src, *dst;
 	struct me_softc *sc;
 	int error;
 
 	switch (cmd) {
 	case SIOCSIFMTU:
 		if (ifr->ifr_mtu < 576)
 			return (EINVAL);
 		ifp->if_mtu = ifr->ifr_mtu;
 		return (0);
 	case SIOCSIFADDR:
 		ifp->if_flags |= IFF_UP;
 	case SIOCSIFFLAGS:
 	case SIOCADDMULTI:
 	case SIOCDELMULTI:
 		return (0);
 	}
 	sx_xlock(&me_ioctl_sx);
 	sc = ifp->if_softc;
 	if (sc == NULL) {
 		error = ENXIO;
 		goto end;
 	}
 	error = 0;
 	switch (cmd) {
 	case SIOCSIFPHYADDR:
 		src = (struct sockaddr_in *)
 			&(((struct in_aliasreq *)data)->ifra_addr);
 		dst = (struct sockaddr_in *)
 			&(((struct in_aliasreq *)data)->ifra_dstaddr);
 		if (src->sin_family != dst->sin_family ||
 		    src->sin_family != AF_INET ||
 		    src->sin_len != dst->sin_len ||
 		    src->sin_len != sizeof(struct sockaddr_in)) {
 			error = EINVAL;
 			break;
 		}
 		if (src->sin_addr.s_addr == INADDR_ANY ||
 		    dst->sin_addr.s_addr == INADDR_ANY) {
 			error = EADDRNOTAVAIL;
 			break;
 		}
 		error = me_set_tunnel(ifp, src, dst);
 		break;
 	case SIOCDIFPHYADDR:
 		me_delete_tunnel(ifp);
 		break;
 	case SIOCGIFPSRCADDR:
 	case SIOCGIFPDSTADDR:
 		ME_RLOCK(sc);
 		if (!ME_READY(sc)) {
 			error = EADDRNOTAVAIL;
 			ME_RUNLOCK(sc);
 			break;
 		}
 		src = (struct sockaddr_in *)&ifr->ifr_addr;
 		memset(src, 0, sizeof(*src));
 		src->sin_family = AF_INET;
 		src->sin_len = sizeof(*src);
 		switch (cmd) {
 		case SIOCGIFPSRCADDR:
 			src->sin_addr = sc->me_src;
 			break;
 		case SIOCGIFPDSTADDR:
 			src->sin_addr = sc->me_dst;
 			break;
 		}
 		ME_RUNLOCK(sc);
 		error = prison_if(curthread->td_ucred, sintosa(src));
 		if (error != 0)
 			memset(src, 0, sizeof(*src));
 		break;
 	case SIOCGTUNFIB:
 		ifr->ifr_fib = sc->me_fibnum;
 		break;
 	case SIOCSTUNFIB:
 		if ((error = priv_check(curthread, PRIV_NET_GRE)) != 0)
 			break;
 		if (ifr->ifr_fib >= rt_numfibs)
 			error = EINVAL;
 		else
 			sc->me_fibnum = ifr->ifr_fib;
 		break;
 	default:
 		error = EINVAL;
 		break;
 	}
 end:
 	sx_xunlock(&me_ioctl_sx);
 	return (error);
 }
 
 static int
 me_encapcheck(const struct mbuf *m, int off, int proto, void *arg)
 {
 	ME_RLOCK_TRACKER;
 	struct me_softc *sc;
 	struct ip *ip;
 	int ret;
 
 	sc = (struct me_softc *)arg;
 	if ((ME2IFP(sc)->if_flags & IFF_UP) == 0)
 		return (0);
 
 	M_ASSERTPKTHDR(m);
 
-	if (m->m_pkthdr.len < sizeof(struct ip) + sizeof(struct mobhdr) -
-	    sizeof(struct in_addr))
-		return (0);
-
 	ret = 0;
 	ME_RLOCK(sc);
 	if (ME_READY(sc)) {
 		ip = mtod(m, struct ip *);
 		if (sc->me_src.s_addr == ip->ip_dst.s_addr &&
 		    sc->me_dst.s_addr == ip->ip_src.s_addr)
-			ret = 32 * 2;
+			ret = 32 * 2 + 8;
 	}
 	ME_RUNLOCK(sc);
 	return (ret);
 }
 
 static int
 me_set_tunnel(struct ifnet *ifp, struct sockaddr_in *src,
     struct sockaddr_in *dst)
 {
 	struct me_softc *sc, *tsc;
 
 	sx_assert(&me_ioctl_sx, SA_XLOCKED);
 	ME_LIST_LOCK();
 	sc = ifp->if_softc;
 	LIST_FOREACH(tsc, &V_me_softc_list, me_list) {
 		if (tsc == sc || !ME_READY(tsc))
 			continue;
 		if (tsc->me_src.s_addr == src->sin_addr.s_addr &&
 		    tsc->me_dst.s_addr == dst->sin_addr.s_addr) {
 			ME_LIST_UNLOCK();
 			return (EADDRNOTAVAIL);
 		}
 	}
 	ME_LIST_UNLOCK();
 
 	ME_WLOCK(sc);
 	sc->me_dst = dst->sin_addr;
 	sc->me_src = src->sin_addr;
 	ME_WUNLOCK(sc);
 
 	if (sc->me_ecookie == NULL)
-		sc->me_ecookie = encap_attach_func(AF_INET, IPPROTO_MOBILE,
-		    me_encapcheck, &in_mobile_protosw, sc);
+		sc->me_ecookie = ip_encap_attach(&ipv4_encap_cfg,
+		    sc, M_WAITOK);
 	if (sc->me_ecookie != NULL) {
 		ifp->if_drv_flags |= IFF_DRV_RUNNING;
 		if_link_state_change(ifp, LINK_STATE_UP);
 	}
 	return (0);
 }
 
 static void
 me_delete_tunnel(struct ifnet *ifp)
 {
 	struct me_softc *sc = ifp->if_softc;
 
 	sx_assert(&me_ioctl_sx, SA_XLOCKED);
 	if (sc->me_ecookie != NULL)
-		encap_detach(sc->me_ecookie);
+		ip_encap_detach(sc->me_ecookie);
 	sc->me_ecookie = NULL;
 	ME_WLOCK(sc);
 	sc->me_src.s_addr = 0;
 	sc->me_dst.s_addr = 0;
 	ME_WUNLOCK(sc);
 	ifp->if_drv_flags &= ~IFF_DRV_RUNNING;
 	if_link_state_change(ifp, LINK_STATE_DOWN);
 }
 
 static uint16_t
 me_in_cksum(uint16_t *p, int nwords)
 {
 	uint32_t sum = 0;
 
 	while (nwords-- > 0)
 		sum += *p++;
 	sum = (sum >> 16) + (sum & 0xffff);
 	sum += (sum >> 16);
 	return (~sum);
 }
 
-int
-me_input(struct mbuf **mp, int *offp, int proto)
+static int
+me_input(struct mbuf *m, int off, int proto, void *arg)
 {
-	struct me_softc *sc;
+	struct me_softc *sc = arg;
 	struct mobhdr *mh;
 	struct ifnet *ifp;
-	struct mbuf *m;
 	struct ip *ip;
 	int hlen;
 
-	m = *mp;
-	sc = encap_getarg(m);
-	KASSERT(sc != NULL, ("encap_getarg returned NULL"));
-
 	ifp = ME2IFP(sc);
 	/* checks for short packets */
 	hlen = sizeof(struct mobhdr);
 	if (m->m_pkthdr.len < sizeof(struct ip) + hlen)
 		hlen -= sizeof(struct in_addr);
 	if (m->m_len < sizeof(struct ip) + hlen)
 		m = m_pullup(m, sizeof(struct ip) + hlen);
 	if (m == NULL)
 		goto drop;
 	mh = (struct mobhdr *)mtodo(m, sizeof(struct ip));
 	/* check for wrong flags */
 	if (mh->mob_flags & (~MOB_FLAGS_SP)) {
 		m_freem(m);
 		goto drop;
 	}
 	if (mh->mob_flags) {
 	       if (hlen != sizeof(struct mobhdr)) {
 			m_freem(m);
 			goto drop;
 	       }
 	} else
 		hlen = sizeof(struct mobhdr) - sizeof(struct in_addr);
 	/* check mobile header checksum */
 	if (me_in_cksum((uint16_t *)mh, hlen / sizeof(uint16_t)) != 0) {
 		m_freem(m);
 		goto drop;
 	}
 #ifdef MAC
 	mac_ifnet_create_mbuf(ifp, m);
 #endif
 	ip = mtod(m, struct ip *);
 	ip->ip_dst = mh->mob_dst;
 	ip->ip_p = mh->mob_proto;
 	ip->ip_sum = 0;
 	ip->ip_len = htons(m->m_pkthdr.len - hlen);
 	if (mh->mob_flags)
 		ip->ip_src = mh->mob_src;
 	memmove(mtodo(m, hlen), ip, sizeof(struct ip));
 	m_adj(m, hlen);
 	m_clrprotoflags(m);
 	m->m_pkthdr.rcvif = ifp;
 	m->m_pkthdr.csum_flags |= (CSUM_IP_CHECKED | CSUM_IP_VALID);
 	M_SETFIB(m, ifp->if_fib);
 	hlen = AF_INET;
 	BPF_MTAP2(ifp, &hlen, sizeof(hlen), m);
 	if_inc_counter(ifp, IFCOUNTER_IPACKETS, 1);
 	if_inc_counter(ifp, IFCOUNTER_IBYTES, m->m_pkthdr.len);
 	if ((ifp->if_flags & IFF_MONITOR) != 0)
 		m_freem(m);
 	else
 		netisr_dispatch(NETISR_IP, m);
 	return (IPPROTO_DONE);
 drop:
 	if_inc_counter(ifp, IFCOUNTER_IERRORS, 1);
 	return (IPPROTO_DONE);
 }
 
 #define	MTAG_ME	1414491977
 static int
 me_check_nesting(struct ifnet *ifp, struct mbuf *m)
 {
 	struct m_tag *mtag;
 	int count;
 
 	count = 1;
 	mtag = NULL;
 	while ((mtag = m_tag_locate(m, MTAG_ME, 0, mtag)) != NULL) {
 		if (*(struct ifnet **)(mtag + 1) == ifp) {
 			log(LOG_NOTICE, "%s: loop detected\n", ifp->if_xname);
 			return (EIO);
 		}
 		count++;
 	}
 	if (count > V_max_me_nesting) {
 		log(LOG_NOTICE,
 		    "%s: if_output recursively called too many times(%d)\n",
 		    ifp->if_xname, count);
 		return (EIO);
 	}
 	mtag = m_tag_alloc(MTAG_ME, 0, sizeof(struct ifnet *), M_NOWAIT);
 	if (mtag == NULL)
 		return (ENOMEM);
 	*(struct ifnet **)(mtag + 1) = ifp;
 	m_tag_prepend(m, mtag);
 	return (0);
 }
 
 static int
 me_output(struct ifnet *ifp, struct mbuf *m, const struct sockaddr *dst,
    struct route *ro)
 {
 	uint32_t af;
 	int error;
 
 #ifdef MAC
 	error = mac_ifnet_check_transmit(ifp, m);
 	if (error != 0)
 		goto drop;
 #endif
 	if ((ifp->if_flags & IFF_MONITOR) != 0 ||
 	    (ifp->if_flags & IFF_UP) == 0) {
 		error = ENETDOWN;
 		goto drop;
 	}
 
 	error = me_check_nesting(ifp, m);
 	if (error != 0)
 		goto drop;
 
 	m->m_flags &= ~(M_BCAST|M_MCAST);
 	if (dst->sa_family == AF_UNSPEC)
 		bcopy(dst->sa_data, &af, sizeof(af));
 	else
 		af = dst->sa_family;
 	if (af != AF_INET) {
 		error = EAFNOSUPPORT;
 		goto drop;
 	}
 	BPF_MTAP2(ifp, &af, sizeof(af), m);
 	return (ifp->if_transmit(ifp, m));
 drop:
 	m_freem(m);
 	if_inc_counter(ifp, IFCOUNTER_OERRORS, 1);
 	return (error);
 }
 
 static int
 me_transmit(struct ifnet *ifp, struct mbuf *m)
 {
 	ME_RLOCK_TRACKER;
 	struct mobhdr mh;
 	struct me_softc *sc;
 	struct ip *ip;
 	int error, hlen, plen;
 
 	sc = ifp->if_softc;
 	if (sc == NULL) {
 		error = ENETDOWN;
 		m_freem(m);
 		goto drop;
 	}
 	if (m->m_len < sizeof(struct ip))
 		m = m_pullup(m, sizeof(struct ip));
 	if (m == NULL) {
 		error = ENOBUFS;
 		goto drop;
 	}
 	ip = mtod(m, struct ip *);
 	/* Fragmented datagramms shouldn't be encapsulated */
 	if (ip->ip_off & htons(IP_MF | IP_OFFMASK)) {
 		error = EINVAL;
 		m_freem(m);
 		goto drop;
 	}
 	mh.mob_proto = ip->ip_p;
 	mh.mob_src = ip->ip_src;
 	mh.mob_dst = ip->ip_dst;
 	ME_RLOCK(sc);
 	if (!ME_READY(sc)) {
 		ME_RUNLOCK(sc);
 		error = ENETDOWN;
 		m_freem(m);
 		goto drop;
 	}
 	if (in_hosteq(sc->me_src, ip->ip_src)) {
 		hlen = sizeof(struct mobhdr) - sizeof(struct in_addr);
 		mh.mob_flags = 0;
 	} else {
 		hlen = sizeof(struct mobhdr);
 		mh.mob_flags = MOB_FLAGS_SP;
 	}
 	plen = m->m_pkthdr.len;
 	ip->ip_src = sc->me_src;
 	ip->ip_dst = sc->me_dst;
 	M_SETFIB(m, sc->me_fibnum);
 	ME_RUNLOCK(sc);
 	M_PREPEND(m, hlen, M_NOWAIT);
 	if (m == NULL) {
 		error = ENOBUFS;
 		goto drop;
 	}
 	if (m->m_len < sizeof(struct ip) + hlen)
 		m = m_pullup(m, sizeof(struct ip) + hlen);
 	if (m == NULL) {
 		error = ENOBUFS;
 		goto drop;
 	}
 	memmove(mtod(m, void *), mtodo(m, hlen), sizeof(struct ip));
 	ip = mtod(m, struct ip *);
 	ip->ip_len = htons(m->m_pkthdr.len);
 	ip->ip_p = IPPROTO_MOBILE;
 	ip->ip_sum = 0;
 	mh.mob_csum = 0;
 	mh.mob_csum = me_in_cksum((uint16_t *)&mh, hlen / sizeof(uint16_t));
 	bcopy(&mh, mtodo(m, sizeof(struct ip)), hlen);
 	error = ip_output(m, NULL, NULL, IP_FORWARDING, NULL, NULL);
 drop:
 	if (error)
 		if_inc_counter(ifp, IFCOUNTER_OERRORS, 1);
 	else {
 		if_inc_counter(ifp, IFCOUNTER_OPACKETS, 1);
 		if_inc_counter(ifp, IFCOUNTER_OBYTES, plen);
 	}
 	return (error);
 }
 
 static void
 me_qflush(struct ifnet *ifp __unused)
 {
 
 }
 
 static int
 memodevent(module_t mod, int type, void *data)
 {
 
 	switch (type) {
 	case MOD_LOAD:
 	case MOD_UNLOAD:
 		break;
 	default:
 		return (EOPNOTSUPP);
 	}
 	return (0);
 }
 
 static moduledata_t me_mod = {
 	"if_me",
 	memodevent,
 	0
 };
 
 DECLARE_MODULE(if_me, me_mod, SI_SUB_PSEUDO, SI_ORDER_ANY);
 MODULE_VERSION(if_me, 1);
diff --git a/sys/net/if_stf.c b/sys/net/if_stf.c
index b10202076d02..f073e20858cd 100644
--- a/sys/net/if_stf.c
+++ b/sys/net/if_stf.c
@@ -1,767 +1,753 @@
 /*	$FreeBSD$	*/
 /*	$KAME: if_stf.c,v 1.73 2001/12/03 11:08:30 keiichi Exp $	*/
 
 /*-
  * SPDX-License-Identifier: BSD-3-Clause
  *
  * Copyright (C) 2000 WIDE Project.
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 3. Neither the name of the project nor the names of its contributors
  *    may be used to endorse or promote products derived from this software
  *    without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE PROJECT AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE PROJECT OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  */
 
 /*
  * 6to4 interface, based on RFC3056.
  *
  * 6to4 interface is NOT capable of link-layer (I mean, IPv4) multicasting.
  * There is no address mapping defined from IPv6 multicast address to IPv4
  * address.  Therefore, we do not have IFF_MULTICAST on the interface.
  *
  * Due to the lack of address mapping for link-local addresses, we cannot
  * throw packets toward link-local addresses (fe80::x).  Also, we cannot throw
  * packets to link-local multicast addresses (ff02::x).
  *
  * Here are interesting symptoms due to the lack of link-local address:
  *
  * Unicast routing exchange:
  * - RIPng: Impossible.  Uses link-local multicast packet toward ff02::9,
  *   and link-local addresses as nexthop.
  * - OSPFv6: Impossible.  OSPFv6 assumes that there's link-local address
  *   assigned to the link, and makes use of them.  Also, HELLO packets use
  *   link-local multicast addresses (ff02::5 and ff02::6).
  * - BGP4+: Maybe.  You can only use global address as nexthop, and global
  *   address as TCP endpoint address.
  *
  * Multicast routing protocols:
  * - PIM: Hello packet cannot be used to discover adjacent PIM routers.
  *   Adjacent PIM routers must be configured manually (is it really spec-wise
  *   correct thing to do?).
  *
  * ICMPv6:
  * - Redirects cannot be used due to the lack of link-local address.
  *
  * stf interface does not have, and will not need, a link-local address.  
  * It seems to have no real benefit and does not help the above symptoms much.
  * Even if we assign link-locals to interface, we cannot really
  * use link-local unicast/multicast on top of 6to4 cloud (since there's no
  * encapsulation defined for link-local address), and the above analysis does
  * not change.  RFC3056 does not mandate the assignment of link-local address
  * either.
  *
  * 6to4 interface has security issues.  Refer to
  * http://playground.iijlab.net/i-d/draft-itojun-ipv6-transition-abuse-00.txt
  * for details.  The code tries to filter out some of malicious packets.
  * Note that there is no way to be 100% secure.
  */
 
 #include <sys/param.h>
 #include <sys/systm.h>
 #include <sys/socket.h>
 #include <sys/sockio.h>
 #include <sys/mbuf.h>
 #include <sys/errno.h>
 #include <sys/kernel.h>
 #include <sys/lock.h>
 #include <sys/module.h>
-#include <sys/protosw.h>
 #include <sys/proc.h>
 #include <sys/queue.h>
 #include <sys/rmlock.h>
 #include <sys/sysctl.h>
 #include <machine/cpu.h>
 
 #include <sys/malloc.h>
 
 #include <net/if.h>
 #include <net/if_var.h>
 #include <net/if_clone.h>
 #include <net/route.h>
 #include <net/netisr.h>
 #include <net/if_types.h>
 #include <net/vnet.h>
 
 #include <netinet/in.h>
 #include <netinet/in_fib.h>
 #include <netinet/in_systm.h>
 #include <netinet/ip.h>
 #include <netinet/ip_var.h>
 #include <netinet/in_var.h>
 
 #include <netinet/ip6.h>
 #include <netinet6/ip6_var.h>
 #include <netinet6/in6_var.h>
 #include <netinet/ip_ecn.h>
 
 #include <netinet/ip_encap.h>
 
 #include <machine/stdarg.h>
 
 #include <net/bpf.h>
 
 #include <security/mac/mac_framework.h>
 
 SYSCTL_DECL(_net_link);
 static SYSCTL_NODE(_net_link, IFT_STF, stf, CTLFLAG_RW, 0, "6to4 Interface");
 
 static int stf_permit_rfc1918 = 0;
 SYSCTL_INT(_net_link_stf, OID_AUTO, permit_rfc1918, CTLFLAG_RWTUN,
     &stf_permit_rfc1918, 0, "Permit the use of private IPv4 addresses");
 
 #define STFUNIT		0
 
 #define IN6_IS_ADDR_6TO4(x)	(ntohs((x)->s6_addr16[0]) == 0x2002)
 
 /*
  * XXX: Return a pointer with 16-bit aligned.  Don't cast it to
  * struct in_addr *; use bcopy() instead.
  */
 #define GET_V4(x)	(&(x)->s6_addr16[1])
 
 struct stf_softc {
 	struct ifnet	*sc_ifp;
 	u_int	sc_fibnum;
 	const struct encaptab *encap_cookie;
 };
 #define STF2IFP(sc)	((sc)->sc_ifp)
 
 static const char stfname[] = "stf";
 
 static MALLOC_DEFINE(M_STF, stfname, "6to4 Tunnel Interface");
 static const int ip_stf_ttl = 40;
 
-extern  struct domain inetdomain;
-static int in_stf_input(struct mbuf **, int *, int);
-static struct protosw in_stf_protosw = {
-	.pr_type =		SOCK_RAW,
-	.pr_domain =		&inetdomain,
-	.pr_protocol =		IPPROTO_IPV6,
-	.pr_flags =		PR_ATOMIC|PR_ADDR,
-	.pr_input =		in_stf_input,
-	.pr_output =		rip_output,
-	.pr_ctloutput =		rip_ctloutput,
-	.pr_usrreqs =		&rip_usrreqs
-};
-
+static int in_stf_input(struct mbuf *, int, int, void *);
 static char *stfnames[] = {"stf0", "stf", "6to4", NULL};
 
 static int stfmodevent(module_t, int, void *);
 static int stf_encapcheck(const struct mbuf *, int, int, void *);
 static int stf_getsrcifa6(struct ifnet *, struct in6_addr *, struct in6_addr *);
 static int stf_output(struct ifnet *, struct mbuf *, const struct sockaddr *,
 	struct route *);
 static int isrfc1918addr(struct in_addr *);
 static int stf_checkaddr4(struct stf_softc *, struct in_addr *,
 	struct ifnet *);
 static int stf_checkaddr6(struct stf_softc *, struct in6_addr *,
 	struct ifnet *);
 static int stf_ioctl(struct ifnet *, u_long, caddr_t);
 
 static int stf_clone_match(struct if_clone *, const char *);
 static int stf_clone_create(struct if_clone *, char *, size_t, caddr_t);
 static int stf_clone_destroy(struct if_clone *, struct ifnet *);
 static struct if_clone *stf_cloner;
 
+static const struct encap_config ipv4_encap_cfg = {
+	.proto = IPPROTO_IPV6,
+	.min_length = sizeof(struct ip),
+	.exact_match = (sizeof(in_addr_t) << 3) + 8,
+	.check = stf_encapcheck,
+	.input = in_stf_input
+};
+
 static int
 stf_clone_match(struct if_clone *ifc, const char *name)
 {
 	int i;
 
 	for(i = 0; stfnames[i] != NULL; i++) {
 		if (strcmp(stfnames[i], name) == 0)
 			return (1);
 	}
 
 	return (0);
 }
 
 static int
 stf_clone_create(struct if_clone *ifc, char *name, size_t len, caddr_t params)
 {
 	char *dp;
 	int err, unit, wildcard;
 	struct stf_softc *sc;
 	struct ifnet *ifp;
 
 	err = ifc_name2unit(name, &unit);
 	if (err != 0)
 		return (err);
 	wildcard = (unit < 0);
 
 	/*
 	 * We can only have one unit, but since unit allocation is
 	 * already locked, we use it to keep from allocating extra
 	 * interfaces.
 	 */
 	unit = STFUNIT;
 	err = ifc_alloc_unit(ifc, &unit);
 	if (err != 0)
 		return (err);
 
 	sc = malloc(sizeof(struct stf_softc), M_STF, M_WAITOK | M_ZERO);
 	ifp = STF2IFP(sc) = if_alloc(IFT_STF);
 	if (ifp == NULL) {
 		free(sc, M_STF);
 		ifc_free_unit(ifc, unit);
 		return (ENOSPC);
 	}
 	ifp->if_softc = sc;
 	sc->sc_fibnum = curthread->td_proc->p_fibnum;
 
 	/*
 	 * Set the name manually rather then using if_initname because
 	 * we don't conform to the default naming convention for interfaces.
 	 * In the wildcard case, we need to update the name.
 	 */
 	if (wildcard) {
 		for (dp = name; *dp != '\0'; dp++);
 		if (snprintf(dp, len - (dp-name), "%d", unit) >
 		    len - (dp-name) - 1) {
 			/*
 			 * This can only be a programmer error and
 			 * there's no straightforward way to recover if
 			 * it happens.
 			 */
 			panic("if_clone_create(): interface name too long");
 		}
 	}
 	strlcpy(ifp->if_xname, name, IFNAMSIZ);
 	ifp->if_dname = stfname;
 	ifp->if_dunit = IF_DUNIT_NONE;
 
-	sc->encap_cookie = encap_attach_func(AF_INET, IPPROTO_IPV6,
-	    stf_encapcheck, &in_stf_protosw, sc);
+	sc->encap_cookie = ip_encap_attach(&ipv4_encap_cfg, sc, M_WAITOK);
 	if (sc->encap_cookie == NULL) {
 		if_printf(ifp, "attach failed\n");
 		free(sc, M_STF);
 		ifc_free_unit(ifc, unit);
 		return (ENOMEM);
 	}
 
 	ifp->if_mtu    = IPV6_MMTU;
 	ifp->if_ioctl  = stf_ioctl;
 	ifp->if_output = stf_output;
 	ifp->if_snd.ifq_maxlen = ifqmaxlen;
 	if_attach(ifp);
 	bpfattach(ifp, DLT_NULL, sizeof(u_int32_t));
 	return (0);
 }
 
 static int
 stf_clone_destroy(struct if_clone *ifc, struct ifnet *ifp)
 {
 	struct stf_softc *sc = ifp->if_softc;
 	int err __unused;
 
-	err = encap_detach(sc->encap_cookie);
+	err = ip_encap_detach(sc->encap_cookie);
 	KASSERT(err == 0, ("Unexpected error detaching encap_cookie"));
 	bpfdetach(ifp);
 	if_detach(ifp);
 	if_free(ifp);
 
 	free(sc, M_STF);
 	ifc_free_unit(ifc, STFUNIT);
 
 	return (0);
 }
 
 static int
 stfmodevent(module_t mod, int type, void *data)
 {
 
 	switch (type) {
 	case MOD_LOAD:
 		stf_cloner = if_clone_advanced(stfname, 0, stf_clone_match,
 		    stf_clone_create, stf_clone_destroy);
 		break;
 	case MOD_UNLOAD:
 		if_clone_detach(stf_cloner);
 		break;
 	default:
 		return (EOPNOTSUPP);
 	}
 
 	return (0);
 }
 
 static moduledata_t stf_mod = {
 	"if_stf",
 	stfmodevent,
 	0
 };
 
 DECLARE_MODULE(if_stf, stf_mod, SI_SUB_PSEUDO, SI_ORDER_ANY);
 
 static int
 stf_encapcheck(const struct mbuf *m, int off, int proto, void *arg)
 {
 	struct ip ip;
 	struct stf_softc *sc;
 	struct in_addr a, b, mask;
 	struct in6_addr addr6, mask6;
 
 	sc = (struct stf_softc *)arg;
 	if (sc == NULL)
 		return 0;
 
 	if ((STF2IFP(sc)->if_flags & IFF_UP) == 0)
 		return 0;
 
 	/* IFF_LINK0 means "no decapsulation" */
 	if ((STF2IFP(sc)->if_flags & IFF_LINK0) != 0)
 		return 0;
 
 	if (proto != IPPROTO_IPV6)
 		return 0;
 
 	m_copydata(m, 0, sizeof(ip), (caddr_t)&ip);
 
 	if (ip.ip_v != 4)
 		return 0;
 
 	if (stf_getsrcifa6(STF2IFP(sc), &addr6, &mask6) != 0)
 		return (0);
 
 	/*
 	 * check if IPv4 dst matches the IPv4 address derived from the
 	 * local 6to4 address.
 	 * success on: dst = 10.1.1.1, ia6->ia_addr = 2002:0a01:0101:...
 	 */
 	if (bcmp(GET_V4(&addr6), &ip.ip_dst, sizeof(ip.ip_dst)) != 0)
 		return 0;
 
 	/*
 	 * check if IPv4 src matches the IPv4 address derived from the
 	 * local 6to4 address masked by prefixmask.
 	 * success on: src = 10.1.1.1, ia6->ia_addr = 2002:0a00:.../24
 	 * fail on: src = 10.1.1.1, ia6->ia_addr = 2002:0b00:.../24
 	 */
 	bzero(&a, sizeof(a));
 	bcopy(GET_V4(&addr6), &a, sizeof(a));
 	bcopy(GET_V4(&mask6), &mask, sizeof(mask));
 	a.s_addr &= mask.s_addr;
 	b = ip.ip_src;
 	b.s_addr &= mask.s_addr;
 	if (a.s_addr != b.s_addr)
 		return 0;
 
 	/* stf interface makes single side match only */
 	return 32;
 }
 
 static int
 stf_getsrcifa6(struct ifnet *ifp, struct in6_addr *addr, struct in6_addr *mask)
 {
 	struct ifaddr *ia;
 	struct in_ifaddr *ia4;
 	struct in6_ifaddr *ia6;
 	struct sockaddr_in6 *sin6;
 	struct in_addr in;
 
 	if_addr_rlock(ifp);
 	CK_STAILQ_FOREACH(ia, &ifp->if_addrhead, ifa_link) {
 		if (ia->ifa_addr->sa_family != AF_INET6)
 			continue;
 		sin6 = (struct sockaddr_in6 *)ia->ifa_addr;
 		if (!IN6_IS_ADDR_6TO4(&sin6->sin6_addr))
 			continue;
 
 		bcopy(GET_V4(&sin6->sin6_addr), &in, sizeof(in));
 		LIST_FOREACH(ia4, INADDR_HASH(in.s_addr), ia_hash)
 			if (ia4->ia_addr.sin_addr.s_addr == in.s_addr)
 				break;
 		if (ia4 == NULL)
 			continue;
 
 		ia6 = (struct in6_ifaddr *)ia;
 
 		*addr = sin6->sin6_addr;
 		*mask = ia6->ia_prefixmask.sin6_addr;
 		if_addr_runlock(ifp);
 		return (0);
 	}
 	if_addr_runlock(ifp);
 
 	return (ENOENT);
 }
 
 static int
 stf_output(struct ifnet *ifp, struct mbuf *m, const struct sockaddr *dst,
     struct route *ro)
 {
 	struct stf_softc *sc;
 	const struct sockaddr_in6 *dst6;
 	struct in_addr in4;
 	const void *ptr;
 	u_int8_t tos;
 	struct ip *ip;
 	struct ip6_hdr *ip6;
 	struct in6_addr addr6, mask6;
 	int error;
 
 #ifdef MAC
 	error = mac_ifnet_check_transmit(ifp, m);
 	if (error) {
 		m_freem(m);
 		return (error);
 	}
 #endif
 
 	sc = ifp->if_softc;
 	dst6 = (const struct sockaddr_in6 *)dst;
 
 	/* just in case */
 	if ((ifp->if_flags & IFF_UP) == 0) {
 		m_freem(m);
 		if_inc_counter(ifp, IFCOUNTER_OERRORS, 1);
 		return ENETDOWN;
 	}
 
 	/*
 	 * If we don't have an ip4 address that match my inner ip6 address,
 	 * we shouldn't generate output.  Without this check, we'll end up
 	 * using wrong IPv4 source.
 	 */
 	if (stf_getsrcifa6(ifp, &addr6, &mask6) != 0) {
 		m_freem(m);
 		if_inc_counter(ifp, IFCOUNTER_OERRORS, 1);
 		return ENETDOWN;
 	}
 
 	if (m->m_len < sizeof(*ip6)) {
 		m = m_pullup(m, sizeof(*ip6));
 		if (!m) {
 			if_inc_counter(ifp, IFCOUNTER_OERRORS, 1);
 			return ENOBUFS;
 		}
 	}
 	ip6 = mtod(m, struct ip6_hdr *);
 	tos = (ntohl(ip6->ip6_flow) >> 20) & 0xff;
 
 	/*
 	 * Pickup the right outer dst addr from the list of candidates.
 	 * ip6_dst has priority as it may be able to give us shorter IPv4 hops.
 	 */
 	ptr = NULL;
 	if (IN6_IS_ADDR_6TO4(&ip6->ip6_dst))
 		ptr = GET_V4(&ip6->ip6_dst);
 	else if (IN6_IS_ADDR_6TO4(&dst6->sin6_addr))
 		ptr = GET_V4(&dst6->sin6_addr);
 	else {
 		m_freem(m);
 		if_inc_counter(ifp, IFCOUNTER_OERRORS, 1);
 		return ENETUNREACH;
 	}
 	bcopy(ptr, &in4, sizeof(in4));
 
 	if (bpf_peers_present(ifp->if_bpf)) {
 		/*
 		 * We need to prepend the address family as
 		 * a four byte field.  Cons up a dummy header
 		 * to pacify bpf.  This is safe because bpf
 		 * will only read from the mbuf (i.e., it won't
 		 * try to free it or keep a pointer a to it).
 		 */
 		u_int af = AF_INET6;
 		bpf_mtap2(ifp->if_bpf, &af, sizeof(af), m);
 	}
 
 	M_PREPEND(m, sizeof(struct ip), M_NOWAIT);
 	if (m == NULL) {
 		if_inc_counter(ifp, IFCOUNTER_OERRORS, 1);
 		return ENOBUFS;
 	}
 	ip = mtod(m, struct ip *);
 
 	bzero(ip, sizeof(*ip));
 
 	bcopy(GET_V4(&addr6), &ip->ip_src, sizeof(ip->ip_src));
 	bcopy(&in4, &ip->ip_dst, sizeof(ip->ip_dst));
 	ip->ip_p = IPPROTO_IPV6;
 	ip->ip_ttl = ip_stf_ttl;
 	ip->ip_len = htons(m->m_pkthdr.len);
 	if (ifp->if_flags & IFF_LINK1)
 		ip_ecn_ingress(ECN_ALLOWED, &ip->ip_tos, &tos);
 	else
 		ip_ecn_ingress(ECN_NOCARE, &ip->ip_tos, &tos);
 
 	M_SETFIB(m, sc->sc_fibnum);
 	if_inc_counter(ifp, IFCOUNTER_OPACKETS, 1);
 	error = ip_output(m, NULL, NULL, 0, NULL, NULL);
 
 	return error;
 }
 
 static int
 isrfc1918addr(struct in_addr *in)
 {
 	/*
 	 * returns 1 if private address range:
 	 * 10.0.0.0/8 172.16.0.0/12 192.168.0.0/16
 	 */
 	if (stf_permit_rfc1918 == 0 && (
 	    (ntohl(in->s_addr) & 0xff000000) >> 24 == 10 ||
 	    (ntohl(in->s_addr) & 0xfff00000) >> 16 == 172 * 256 + 16 ||
 	    (ntohl(in->s_addr) & 0xffff0000) >> 16 == 192 * 256 + 168))
 		return 1;
 
 	return 0;
 }
 
 static int
 stf_checkaddr4(struct stf_softc *sc, struct in_addr *in, struct ifnet *inifp)
 {
 	struct rm_priotracker in_ifa_tracker;
 	struct in_ifaddr *ia4;
 
 	/*
 	 * reject packets with the following address:
 	 * 224.0.0.0/4 0.0.0.0/8 127.0.0.0/8 255.0.0.0/8
 	 */
 	if (IN_MULTICAST(ntohl(in->s_addr)))
 		return -1;
 	switch ((ntohl(in->s_addr) & 0xff000000) >> 24) {
 	case 0: case 127: case 255:
 		return -1;
 	}
 
 	/*
 	 * reject packets with private address range.
 	 * (requirement from RFC3056 section 2 1st paragraph)
 	 */
 	if (isrfc1918addr(in))
 		return -1;
 
 	/*
 	 * reject packets with broadcast
 	 */
 	IN_IFADDR_RLOCK(&in_ifa_tracker);
 	CK_STAILQ_FOREACH(ia4, &V_in_ifaddrhead, ia_link) {
 		if ((ia4->ia_ifa.ifa_ifp->if_flags & IFF_BROADCAST) == 0)
 			continue;
 		if (in->s_addr == ia4->ia_broadaddr.sin_addr.s_addr) {
 			IN_IFADDR_RUNLOCK(&in_ifa_tracker);
 			return -1;
 		}
 	}
 	IN_IFADDR_RUNLOCK(&in_ifa_tracker);
 
 	/*
 	 * perform ingress filter
 	 */
 	if (sc && (STF2IFP(sc)->if_flags & IFF_LINK2) == 0 && inifp) {
 		struct nhop4_basic nh4;
 
 		if (fib4_lookup_nh_basic(sc->sc_fibnum, *in, 0, 0, &nh4) != 0)
 			return (-1);
 
 		if (nh4.nh_ifp != inifp)
 			return (-1);
 	}
 
 	return 0;
 }
 
 static int
 stf_checkaddr6(struct stf_softc *sc, struct in6_addr *in6, struct ifnet *inifp)
 {
 	/*
 	 * check 6to4 addresses
 	 */
 	if (IN6_IS_ADDR_6TO4(in6)) {
 		struct in_addr in4;
 		bcopy(GET_V4(in6), &in4, sizeof(in4));
 		return stf_checkaddr4(sc, &in4, inifp);
 	}
 
 	/*
 	 * reject anything that look suspicious.  the test is implemented
 	 * in ip6_input too, but we check here as well to
 	 * (1) reject bad packets earlier, and
 	 * (2) to be safe against future ip6_input change.
 	 */
 	if (IN6_IS_ADDR_V4COMPAT(in6) || IN6_IS_ADDR_V4MAPPED(in6))
 		return -1;
 
 	return 0;
 }
 
 static int
-in_stf_input(struct mbuf **mp, int *offp, int proto)
+in_stf_input(struct mbuf *m, int off, int proto, void *arg)
 {
-	struct stf_softc *sc;
+	struct stf_softc *sc = arg;
 	struct ip *ip;
 	struct ip6_hdr *ip6;
-	struct mbuf *m;
 	u_int8_t otos, itos;
 	struct ifnet *ifp;
-	int off;
-
-	m = *mp;
-	off = *offp;
 
 	if (proto != IPPROTO_IPV6) {
 		m_freem(m);
 		return (IPPROTO_DONE);
 	}
 
 	ip = mtod(m, struct ip *);
-
-	sc = (struct stf_softc *)encap_getarg(m);
-
 	if (sc == NULL || (STF2IFP(sc)->if_flags & IFF_UP) == 0) {
 		m_freem(m);
 		return (IPPROTO_DONE);
 	}
 
 	ifp = STF2IFP(sc);
 
 #ifdef MAC
 	mac_ifnet_create_mbuf(ifp, m);
 #endif
 
 	/*
 	 * perform sanity check against outer src/dst.
 	 * for source, perform ingress filter as well.
 	 */
 	if (stf_checkaddr4(sc, &ip->ip_dst, NULL) < 0 ||
 	    stf_checkaddr4(sc, &ip->ip_src, m->m_pkthdr.rcvif) < 0) {
 		m_freem(m);
 		return (IPPROTO_DONE);
 	}
 
 	otos = ip->ip_tos;
 	m_adj(m, off);
 
 	if (m->m_len < sizeof(*ip6)) {
 		m = m_pullup(m, sizeof(*ip6));
 		if (!m)
 			return (IPPROTO_DONE);
 	}
 	ip6 = mtod(m, struct ip6_hdr *);
 
 	/*
 	 * perform sanity check against inner src/dst.
 	 * for source, perform ingress filter as well.
 	 */
 	if (stf_checkaddr6(sc, &ip6->ip6_dst, NULL) < 0 ||
 	    stf_checkaddr6(sc, &ip6->ip6_src, m->m_pkthdr.rcvif) < 0) {
 		m_freem(m);
 		return (IPPROTO_DONE);
 	}
 
 	itos = (ntohl(ip6->ip6_flow) >> 20) & 0xff;
 	if ((ifp->if_flags & IFF_LINK1) != 0)
 		ip_ecn_egress(ECN_ALLOWED, &otos, &itos);
 	else
 		ip_ecn_egress(ECN_NOCARE, &otos, &itos);
 	ip6->ip6_flow &= ~htonl(0xff << 20);
 	ip6->ip6_flow |= htonl((u_int32_t)itos << 20);
 
 	m->m_pkthdr.rcvif = ifp;
-	
+
 	if (bpf_peers_present(ifp->if_bpf)) {
 		/*
 		 * We need to prepend the address family as
 		 * a four byte field.  Cons up a dummy header
 		 * to pacify bpf.  This is safe because bpf
 		 * will only read from the mbuf (i.e., it won't
 		 * try to free it or keep a pointer a to it).
 		 */
 		u_int32_t af = AF_INET6;
 		bpf_mtap2(ifp->if_bpf, &af, sizeof(af), m);
 	}
 
 	/*
 	 * Put the packet to the network layer input queue according to the
 	 * specified address family.
 	 * See net/if_gif.c for possible issues with packet processing
 	 * reorder due to extra queueing.
 	 */
 	if_inc_counter(ifp, IFCOUNTER_IPACKETS, 1);
 	if_inc_counter(ifp, IFCOUNTER_IBYTES, m->m_pkthdr.len);
 	M_SETFIB(m, ifp->if_fib);
 	netisr_dispatch(NETISR_IPV6, m);
 	return (IPPROTO_DONE);
 }
 
 static int
 stf_ioctl(struct ifnet *ifp, u_long cmd, caddr_t data)
 {
 	struct ifaddr *ifa;
 	struct ifreq *ifr;
 	struct sockaddr_in6 *sin6;
 	struct in_addr addr;
 	int error, mtu;
 
 	error = 0;
 	switch (cmd) {
 	case SIOCSIFADDR:
 		ifa = (struct ifaddr *)data;
 		if (ifa == NULL || ifa->ifa_addr->sa_family != AF_INET6) {
 			error = EAFNOSUPPORT;
 			break;
 		}
 		sin6 = (struct sockaddr_in6 *)ifa->ifa_addr;
 		if (!IN6_IS_ADDR_6TO4(&sin6->sin6_addr)) {
 			error = EINVAL;
 			break;
 		}
 		bcopy(GET_V4(&sin6->sin6_addr), &addr, sizeof(addr));
 		if (isrfc1918addr(&addr)) {
 			error = EINVAL;
 			break;
 		}
 
 		ifp->if_flags |= IFF_UP;
 		break;
 
 	case SIOCADDMULTI:
 	case SIOCDELMULTI:
 		ifr = (struct ifreq *)data;
 		if (ifr && ifr->ifr_addr.sa_family == AF_INET6)
 			;
 		else
 			error = EAFNOSUPPORT;
 		break;
 
 	case SIOCGIFMTU:
 		break;
 
 	case SIOCSIFMTU:
 		ifr = (struct ifreq *)data;
 		mtu = ifr->ifr_mtu;
 		/* RFC 4213 3.2 ideal world MTU */
 		if (mtu < IPV6_MINMTU || mtu > IF_MAXMTU - 20)
 			return (EINVAL);
 		ifp->if_mtu = mtu;
 		break;
 
 	default:
 		error = EINVAL;
 		break;
 	}
 
 	return error;
 }
diff --git a/sys/netinet/in_gif.c b/sys/netinet/in_gif.c
index c30709c878ea..835760c8a4d6 100644
--- a/sys/netinet/in_gif.c
+++ b/sys/netinet/in_gif.c
@@ -1,218 +1,206 @@
 /*-
  * SPDX-License-Identifier: BSD-3-Clause
  *
  * Copyright (C) 1995, 1996, 1997, and 1998 WIDE Project.
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 3. Neither the name of the project nor the names of its contributors
  *    may be used to endorse or promote products derived from this software
  *    without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE PROJECT AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE PROJECT OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  *	$KAME: in_gif.c,v 1.54 2001/05/14 14:02:16 itojun Exp $
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include "opt_inet.h"
 #include "opt_inet6.h"
 
 #include <sys/param.h>
 #include <sys/lock.h>
 #include <sys/rmlock.h>
 #include <sys/systm.h>
 #include <sys/socket.h>
 #include <sys/sockio.h>
 #include <sys/mbuf.h>
 #include <sys/errno.h>
 #include <sys/kernel.h>
 #include <sys/sysctl.h>
-#include <sys/protosw.h>
 #include <sys/malloc.h>
 
 #include <net/if.h>
 #include <net/if_var.h>
 #include <net/route.h>
 #include <net/vnet.h>
 
 #include <netinet/in.h>
 #include <netinet/in_systm.h>
 #include <netinet/ip.h>
 #include <netinet/ip_var.h>
 #include <netinet/in_var.h>
 #include <netinet/ip_encap.h>
 #include <netinet/ip_ecn.h>
 #include <netinet/in_fib.h>
 
 #ifdef INET6
 #include <netinet/ip6.h>
 #endif
 
 #include <net/if_gif.h>
 
-static int in_gif_input(struct mbuf **, int *, int);
-
-extern  struct domain inetdomain;
-static struct protosw in_gif_protosw = {
-	.pr_type =		SOCK_RAW,
-	.pr_domain =		&inetdomain,
-	.pr_protocol =		0/* IPPROTO_IPV[46] */,
-	.pr_flags =		PR_ATOMIC|PR_ADDR,
-	.pr_input =		in_gif_input,
-	.pr_output =		rip_output,
-	.pr_ctloutput =		rip_ctloutput,
-	.pr_usrreqs =		&rip_usrreqs
-};
-
 #define GIF_TTL		30
 static VNET_DEFINE(int, ip_gif_ttl) = GIF_TTL;
 #define	V_ip_gif_ttl		VNET(ip_gif_ttl)
 SYSCTL_INT(_net_inet_ip, IPCTL_GIF_TTL, gifttl, CTLFLAG_VNET | CTLFLAG_RW,
-	&VNET_NAME(ip_gif_ttl), 0, "");
+    &VNET_NAME(ip_gif_ttl), 0, "Default TTL value for encapsulated packets");
 
 int
 in_gif_output(struct ifnet *ifp, struct mbuf *m, int proto, uint8_t ecn)
 {
 	GIF_RLOCK_TRACKER;
 	struct gif_softc *sc = ifp->if_softc;
 	struct ip *ip;
 	int len;
 
 	/* prepend new IP header */
 	len = sizeof(struct ip);
 #ifndef __NO_STRICT_ALIGNMENT
 	if (proto == IPPROTO_ETHERIP)
 		len += ETHERIP_ALIGN;
 #endif
 	M_PREPEND(m, len, M_NOWAIT);
 	if (m == NULL)
 		return (ENOBUFS);
 #ifndef __NO_STRICT_ALIGNMENT
 	if (proto == IPPROTO_ETHERIP) {
 		len = mtod(m, vm_offset_t) & 3;
 		KASSERT(len == 0 || len == ETHERIP_ALIGN,
 		    ("in_gif_output: unexpected misalignment"));
 		m->m_data += len;
 		m->m_len -= ETHERIP_ALIGN;
 	}
 #endif
 	ip = mtod(m, struct ip *);
 	GIF_RLOCK(sc);
 	if (sc->gif_family != AF_INET) {
 		m_freem(m);
 		GIF_RUNLOCK(sc);
 		return (ENETDOWN);
 	}
 	bcopy(sc->gif_iphdr, ip, sizeof(struct ip));
 	GIF_RUNLOCK(sc);
 
 	ip->ip_p = proto;
 	/* version will be set in ip_output() */
 	ip->ip_ttl = V_ip_gif_ttl;
 	ip->ip_len = htons(m->m_pkthdr.len);
 	ip->ip_tos = ecn;
 
 	return (ip_output(m, NULL, NULL, 0, NULL, NULL));
 }
 
 static int
-in_gif_input(struct mbuf **mp, int *offp, int proto)
+in_gif_input(struct mbuf *m, int off, int proto, void *arg)
 {
-	struct mbuf *m = *mp;
-	struct gif_softc *sc;
+	struct gif_softc *sc = arg;
 	struct ifnet *gifp;
 	struct ip *ip;
 	uint8_t ecn;
 
-	sc = encap_getarg(m);
 	if (sc == NULL) {
 		m_freem(m);
 		KMOD_IPSTAT_INC(ips_nogif);
 		return (IPPROTO_DONE);
 	}
 	gifp = GIF2IFP(sc);
 	if ((gifp->if_flags & IFF_UP) != 0) {
 		ip = mtod(m, struct ip *);
 		ecn = ip->ip_tos;
-		m_adj(m, *offp);
+		m_adj(m, off);
 		gif_input(m, gifp, proto, ecn);
 	} else {
 		m_freem(m);
 		KMOD_IPSTAT_INC(ips_nogif);
 	}
 	return (IPPROTO_DONE);
 }
 
 /*
  * we know that we are in IFF_UP, outer address available, and outer family
  * matched the physical addr family.  see gif_encapcheck().
  */
 int
 in_gif_encapcheck(const struct mbuf *m, int off, int proto, void *arg)
 {
 	const struct ip *ip;
 	struct gif_softc *sc;
 	int ret;
 
 	/* sanity check done in caller */
 	sc = (struct gif_softc *)arg;
 	GIF_RLOCK_ASSERT(sc);
 
 	/* check for address match */
 	ip = mtod(m, const struct ip *);
 	if (sc->gif_iphdr->ip_src.s_addr != ip->ip_dst.s_addr)
 		return (0);
-	ret = 32;
+	ret = 32 + 8; /* src + proto */
 	if (sc->gif_iphdr->ip_dst.s_addr != ip->ip_src.s_addr) {
 		if ((sc->gif_options & GIF_IGNORE_SOURCE) == 0)
 			return (0);
 	} else
 		ret += 32;
 
 	/* ingress filters on outer source */
 	if ((GIF2IFP(sc)->if_flags & IFF_LINK2) == 0) {
 		struct nhop4_basic nh4;
 		struct in_addr dst;
 
 		dst = ip->ip_src;
 
 		if (fib4_lookup_nh_basic(sc->gif_fibnum, dst, 0, 0, &nh4) != 0)
 			return (0);
 
 		if (nh4.nh_ifp != m->m_pkthdr.rcvif)
 			return (0);
 	}
 	return (ret);
 }
 
+static const struct encap_config ipv4_encap_cfg = {
+	.proto = -1,
+	.min_length = sizeof(struct ip),
+	.exact_match = (sizeof(in_addr_t) << 4) + 8,
+	.check = gif_encapcheck,
+	.input = in_gif_input
+};
+
 int
 in_gif_attach(struct gif_softc *sc)
 {
 
 	KASSERT(sc->gif_ecookie == NULL, ("gif_ecookie isn't NULL"));
-	sc->gif_ecookie = encap_attach_func(AF_INET, -1, gif_encapcheck,
-	    &in_gif_protosw, sc);
-	if (sc->gif_ecookie == NULL)
-		return (EEXIST);
+	sc->gif_ecookie = ip_encap_attach(&ipv4_encap_cfg, sc, M_WAITOK);
 	return (0);
 }
diff --git a/sys/netinet/ip_encap.c b/sys/netinet/ip_encap.c
index 82d46986e12b..7679d4202bae 100644
--- a/sys/netinet/ip_encap.c
+++ b/sys/netinet/ip_encap.c
@@ -1,472 +1,270 @@
 /*	$KAME: ip_encap.c,v 1.41 2001/03/15 08:35:08 itojun Exp $	*/
 
 /*-
  * SPDX-License-Identifier: BSD-3-Clause
  *
  * Copyright (C) 1995, 1996, 1997, and 1998 WIDE Project.
+ * Copyright (c) 2018 Andrey V. Elsukov <ae@FreeBSD.org>
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 3. Neither the name of the project nor the names of its contributors
  *    may be used to endorse or promote products derived from this software
  *    without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE PROJECT AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE PROJECT OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  */
 /*
  * My grandfather said that there's a devil inside tunnelling technology...
  *
  * We have surprisingly many protocols that want packets with IP protocol
  * #4 or #41.  Here's a list of protocols that want protocol #41:
  *	RFC1933 configured tunnel
  *	RFC1933 automatic tunnel
  *	RFC2401 IPsec tunnel
  *	RFC2473 IPv6 generic packet tunnelling
  *	RFC2529 6over4 tunnel
  *	mobile-ip6 (uses RFC2473)
  *	RFC3056 6to4 tunnel
  *	isatap tunnel
  * Here's a list of protocol that want protocol #4:
  *	RFC1853 IPv4-in-IPv4 tunnelling
  *	RFC2003 IPv4 encapsulation within IPv4
  *	RFC2344 reverse tunnelling for mobile-ip4
  *	RFC2401 IPsec tunnel
  * Well, what can I say.  They impose different en/decapsulation mechanism
  * from each other, so they need separate protocol handler.  The only one
  * we can easily determine by protocol # is IPsec, which always has
  * AH/ESP/IPComp header right after outer IP header.
  *
  * So, clearly good old protosw does not work for protocol #4 and #41.
  * The code will let you match protocol via src/dst address pair.
  */
-/* XXX is M_NETADDR correct? */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
-#include "opt_mrouting.h"
 #include "opt_inet.h"
 #include "opt_inet6.h"
 
 #include <sys/param.h>
 #include <sys/systm.h>
+#include <sys/kernel.h>
 #include <sys/lock.h>
+#include <sys/malloc.h>
 #include <sys/mutex.h>
-#include <sys/socket.h>
-#include <sys/sockio.h>
 #include <sys/mbuf.h>
 #include <sys/errno.h>
-#include <sys/protosw.h>
-#include <sys/queue.h>
+#include <sys/socket.h>
 
 #include <net/if.h>
-#include <net/route.h>
+#include <net/if_var.h>
 
 #include <netinet/in.h>
-#include <netinet/in_systm.h>
-#include <netinet/ip.h>
 #include <netinet/ip_var.h>
 #include <netinet/ip_encap.h>
 
 #ifdef INET6
-#include <netinet/ip6.h>
 #include <netinet6/ip6_var.h>
 #endif
 
-#include <machine/stdarg.h>
+static MALLOC_DEFINE(M_NETADDR, "encap_export_host",
+    "Export host address structure");
 
-#include <sys/kernel.h>
-#include <sys/malloc.h>
-static MALLOC_DEFINE(M_NETADDR, "encap_export_host", "Export host address structure");
+struct encaptab {
+	CK_LIST_ENTRY(encaptab) chain;
+	int		proto;
+	int		min_length;
+	int		exact_match;
+	void		*arg;
 
-static void encap_add(struct encaptab *);
-static int mask_match(const struct encaptab *, const struct sockaddr *,
-		const struct sockaddr *);
-static void encap_fillarg(struct mbuf *, void *);
+	encap_lookup_t	lookup;
+	encap_check_t	check;
+	encap_input_t	input;
+};
+
+CK_LIST_HEAD(encaptab_head, encaptab);
+#ifdef INET
+static struct encaptab_head ipv4_encaptab = CK_LIST_HEAD_INITIALIZER();
+#endif
+#ifdef INET6
+static struct encaptab_head ipv6_encaptab = CK_LIST_HEAD_INITIALIZER();
+#endif
 
-/*
- * All global variables in ip_encap.c are locked using encapmtx.
- */
 static struct mtx encapmtx;
 MTX_SYSINIT(encapmtx, &encapmtx, "encapmtx", MTX_DEF);
-static LIST_HEAD(, encaptab) encaptab = LIST_HEAD_INITIALIZER(encaptab);
-
-#ifdef INET
-int
-encap4_input(struct mbuf **mp, int *offp, int proto)
+#define	ENCAP_WLOCK()		mtx_lock(&encapmtx)
+#define	ENCAP_WUNLOCK()		mtx_unlock(&encapmtx)
+#define	ENCAP_RLOCK()		epoch_enter_preempt(net_epoch_preempt)
+#define	ENCAP_RUNLOCK()		epoch_exit_preempt(net_epoch_preempt)
+#define	ENCAP_WAIT()		epoch_wait_preempt(net_epoch_preempt)
+
+static struct encaptab *
+encap_attach(struct encaptab_head *head, const struct encap_config *cfg,
+    void *arg, int mflags)
 {
-	struct ip *ip;
-	struct mbuf *m;
-	struct sockaddr_in s, d;
-	const struct protosw *psw;
-	struct encaptab *ep, *match;
-	void *arg;
-	int matchprio, off, prio;
-
-	m = *mp;
-	off = *offp;
-	ip = mtod(m, struct ip *);
-
-	bzero(&s, sizeof(s));
-	s.sin_family = AF_INET;
-	s.sin_len = sizeof(struct sockaddr_in);
-	s.sin_addr = ip->ip_src;
-	bzero(&d, sizeof(d));
-	d.sin_family = AF_INET;
-	d.sin_len = sizeof(struct sockaddr_in);
-	d.sin_addr = ip->ip_dst;
-
-	arg = NULL;
-	psw = NULL;
-	match = NULL;
-	matchprio = 0;
-	mtx_lock(&encapmtx);
-	LIST_FOREACH(ep, &encaptab, chain) {
-		if (ep->af != AF_INET)
-			continue;
-		if (ep->proto >= 0 && ep->proto != proto)
-			continue;
-		if (ep->func)
-			prio = (*ep->func)(m, off, proto, ep->arg);
-		else {
-			/*
-			 * it's inbound traffic, we need to match in reverse
-			 * order
-			 */
-			prio = mask_match(ep, (struct sockaddr *)&d,
-			    (struct sockaddr *)&s);
-		}
+	struct encaptab *ep, *tmp;
 
-		/*
-		 * We prioritize the matches by using bit length of the
-		 * matches.  mask_match() and user-supplied matching function
-		 * should return the bit length of the matches (for example,
-		 * if both src/dst are matched for IPv4, 64 should be returned).
-		 * 0 or negative return value means "it did not match".
-		 *
-		 * The question is, since we have two "mask" portion, we
-		 * cannot really define total order between entries.
-		 * For example, which of these should be preferred?
-		 * mask_match() returns 48 (32 + 16) for both of them.
-		 *	src=3ffe::/16, dst=3ffe:501::/32
-		 *	src=3ffe:501::/32, dst=3ffe::/16
-		 *
-		 * We need to loop through all the possible candidates
-		 * to get the best match - the search takes O(n) for
-		 * n attachments (i.e. interfaces).
-		 */
-		if (prio <= 0)
-			continue;
-		if (prio > matchprio) {
-			matchprio = prio;
-			match = ep;
-		}
-	}
-	if (match != NULL) {
-		psw = match->psw;
-		arg = match->arg;
-	}
-	mtx_unlock(&encapmtx);
+	if (cfg == NULL || cfg->input == NULL ||
+	    (cfg->check == NULL && cfg->lookup == NULL) ||
+	    (cfg->lookup != NULL && cfg->exact_match != ENCAP_DRV_LOOKUP) ||
+	    (cfg->exact_match == ENCAP_DRV_LOOKUP && cfg->lookup == NULL))
+		return (NULL);
 
-	if (match != NULL) {
-		/* found a match, "match" has the best one */
-		if (psw != NULL && psw->pr_input != NULL) {
-			encap_fillarg(m, arg);
-			(*psw->pr_input)(mp, offp, proto);
-		} else
-			m_freem(m);
-		return (IPPROTO_DONE);
+	ep = malloc(sizeof(*ep), M_NETADDR, mflags);
+	if (ep == NULL)
+		return (NULL);
+
+	ep->proto = cfg->proto;
+	ep->min_length = cfg->min_length;
+	ep->exact_match = cfg->exact_match;
+	ep->arg = arg;
+	ep->lookup = cfg->exact_match == ENCAP_DRV_LOOKUP ? cfg->lookup: NULL;
+	ep->check = cfg->exact_match != ENCAP_DRV_LOOKUP ? cfg->check: NULL;
+	ep->input = cfg->input;
+
+	ENCAP_WLOCK();
+	CK_LIST_FOREACH(tmp, head, chain) {
+		if (tmp->exact_match <= ep->exact_match)
+			break;
 	}
+	if (tmp == NULL)
+		CK_LIST_INSERT_HEAD(head, ep, chain);
+	else
+		CK_LIST_INSERT_BEFORE(tmp, ep, chain);
+	ENCAP_WUNLOCK();
+	return (ep);
+}
+
+static int
+encap_detach(struct encaptab_head *head, const struct encaptab *cookie)
+{
+	struct encaptab *ep;
 
-	/* last resort: inject to raw socket */
-	return (rip_input(mp, offp, proto));
+	ENCAP_WLOCK();
+	CK_LIST_FOREACH(ep, head, chain) {
+		if (ep == cookie) {
+			CK_LIST_REMOVE(ep, chain);
+			ENCAP_WUNLOCK();
+			ENCAP_WAIT();
+			free(ep, M_NETADDR);
+			return (0);
+		}
+	}
+	ENCAP_WUNLOCK();
+	return (EINVAL);
 }
-#endif
 
-#ifdef INET6
-int
-encap6_input(struct mbuf **mp, int *offp, int proto)
+static int
+encap_input(struct encaptab_head *head, struct mbuf *m, int off, int proto)
 {
-	struct mbuf *m = *mp;
-	struct ip6_hdr *ip6;
-	struct sockaddr_in6 s, d;
-	const struct protosw *psw;
 	struct encaptab *ep, *match;
 	void *arg;
-	int prio, matchprio;
-
-	ip6 = mtod(m, struct ip6_hdr *);
+	int matchprio, ret;
 
-	bzero(&s, sizeof(s));
-	s.sin6_family = AF_INET6;
-	s.sin6_len = sizeof(struct sockaddr_in6);
-	s.sin6_addr = ip6->ip6_src;
-	bzero(&d, sizeof(d));
-	d.sin6_family = AF_INET6;
-	d.sin6_len = sizeof(struct sockaddr_in6);
-	d.sin6_addr = ip6->ip6_dst;
-
-	arg = NULL;
-	psw = NULL;
 	match = NULL;
 	matchprio = 0;
-	mtx_lock(&encapmtx);
-	LIST_FOREACH(ep, &encaptab, chain) {
-		if (ep->af != AF_INET6)
-			continue;
+
+	ENCAP_RLOCK();
+	CK_LIST_FOREACH(ep, head, chain) {
 		if (ep->proto >= 0 && ep->proto != proto)
 			continue;
-		if (ep->func)
-			prio = (*ep->func)(m, *offp, proto, ep->arg);
-		else {
-			/*
-			 * it's inbound traffic, we need to match in reverse
-			 * order
-			 */
-			prio = mask_match(ep, (struct sockaddr *)&d,
-			    (struct sockaddr *)&s);
-		}
-
-		/* see encap4_input() for issues here */
-		if (prio <= 0)
+		if (ep->min_length > m->m_pkthdr.len)
 			continue;
-		if (prio > matchprio) {
-			matchprio = prio;
+		if (ep->exact_match == ENCAP_DRV_LOOKUP)
+			ret = (*ep->lookup)(m, off, proto, &arg);
+		else
+			ret = (*ep->check)(m, off, proto, ep->arg);
+		if (ret <= 0)
+			continue;
+		if (ret > matchprio) {
 			match = ep;
+			if (ep->exact_match != ENCAP_DRV_LOOKUP)
+				arg = ep->arg;
+			/*
+			 * No need to continue the search, we got the
+			 * exact match.
+			 */
+			if (ret >= ep->exact_match)
+				break;
+			matchprio = ret;
 		}
 	}
-	if (match != NULL) {
-		psw = match->psw;
-		arg = match->arg;
-	}
-	mtx_unlock(&encapmtx);
 
 	if (match != NULL) {
-		/* found a match */
-		if (psw != NULL && psw->pr_input != NULL) {
-			encap_fillarg(m, arg);
-			return (*psw->pr_input)(mp, offp, proto);
-		} else {
-			m_freem(m);
-			return (IPPROTO_DONE);
-		}
+		/* found a match, "match" has the best one */
+		ret = (*match->input)(m, off, proto, arg);
+		ENCAP_RUNLOCK();
+		MPASS(ret == IPPROTO_DONE);
+		return (IPPROTO_DONE);
 	}
-
-	/* last resort: inject to raw socket */
-	return rip6_input(mp, offp, proto);
-}
-#endif
-
-/*lint -sem(encap_add, custodial(1)) */
-static void
-encap_add(struct encaptab *ep)
-{
-
-	mtx_assert(&encapmtx, MA_OWNED);
-	LIST_INSERT_HEAD(&encaptab, ep, chain);
+	ENCAP_RUNLOCK();
+	return (0);
 }
 
-/*
- * sp (src ptr) is always my side, and dp (dst ptr) is always remote side.
- * length of mask (sm and dm) is assumed to be same as sp/dp.
- * Return value will be necessary as input (cookie) for encap_detach().
- */
+#ifdef INET
 const struct encaptab *
-encap_attach(int af, int proto, const struct sockaddr *sp,
-    const struct sockaddr *sm, const struct sockaddr *dp,
-    const struct sockaddr *dm, const struct protosw *psw, void *arg)
+ip_encap_attach(const struct encap_config *cfg, void *arg, int mflags)
 {
-	struct encaptab *ep;
-
-	/* sanity check on args */
-	if (sp->sa_len > sizeof(ep->src) || dp->sa_len > sizeof(ep->dst))
-		return (NULL);
-	if (sp->sa_len != dp->sa_len)
-		return (NULL);
-	if (af != sp->sa_family || af != dp->sa_family)
-		return (NULL);
 
-	/* check if anyone have already attached with exactly same config */
-	mtx_lock(&encapmtx);
-	LIST_FOREACH(ep, &encaptab, chain) {
-		if (ep->af != af)
-			continue;
-		if (ep->proto != proto)
-			continue;
-		if (ep->src.ss_len != sp->sa_len ||
-		    bcmp(&ep->src, sp, sp->sa_len) != 0 ||
-		    bcmp(&ep->srcmask, sm, sp->sa_len) != 0)
-			continue;
-		if (ep->dst.ss_len != dp->sa_len ||
-		    bcmp(&ep->dst, dp, dp->sa_len) != 0 ||
-		    bcmp(&ep->dstmask, dm, dp->sa_len) != 0)
-			continue;
-
-		mtx_unlock(&encapmtx);
-		return (NULL);
-	}
-
-	ep = malloc(sizeof(*ep), M_NETADDR, M_NOWAIT);	/*XXX*/
-	if (ep == NULL) {
-		mtx_unlock(&encapmtx);
-		return (NULL);
-	}
-	bzero(ep, sizeof(*ep));
-
-	ep->af = af;
-	ep->proto = proto;
-	bcopy(sp, &ep->src, sp->sa_len);
-	bcopy(sm, &ep->srcmask, sp->sa_len);
-	bcopy(dp, &ep->dst, dp->sa_len);
-	bcopy(dm, &ep->dstmask, dp->sa_len);
-	ep->psw = psw;
-	ep->arg = arg;
-
-	encap_add(ep);
-	mtx_unlock(&encapmtx);
-	return (ep);
+	return (encap_attach(&ipv4_encaptab, cfg, arg, mflags));
 }
 
-const struct encaptab *
-encap_attach_func(int af, int proto,
-    int (*func)(const struct mbuf *, int, int, void *),
-    const struct protosw *psw, void *arg)
+int
+ip_encap_detach(const struct encaptab *cookie)
 {
-	struct encaptab *ep;
 
-	/* sanity check on args */
-	if (!func)
-		return (NULL);
-
-	ep = malloc(sizeof(*ep), M_NETADDR, M_NOWAIT);	/*XXX*/
-	if (ep == NULL)
-		return (NULL);
-	bzero(ep, sizeof(*ep));
-
-	ep->af = af;
-	ep->proto = proto;
-	ep->func = func;
-	ep->psw = psw;
-	ep->arg = arg;
-
-	mtx_lock(&encapmtx);
-	encap_add(ep);
-	mtx_unlock(&encapmtx);
-	return (ep);
+	return (encap_detach(&ipv4_encaptab, cookie));
 }
 
 int
-encap_detach(const struct encaptab *cookie)
+encap4_input(struct mbuf **mp, int *offp, int proto)
 {
-	const struct encaptab *ep = cookie;
-	struct encaptab *p;
-
-	mtx_lock(&encapmtx);
-	LIST_FOREACH(p, &encaptab, chain) {
-		if (p == ep) {
-			LIST_REMOVE(p, chain);
-			mtx_unlock(&encapmtx);
-			free(p, M_NETADDR);	/*XXX*/
-			return 0;
-		}
-	}
-	mtx_unlock(&encapmtx);
 
-	return EINVAL;
+	if (encap_input(&ipv4_encaptab, *mp, *offp, proto) != IPPROTO_DONE)
+		return (rip_input(mp, offp, proto));
+	return (IPPROTO_DONE);
 }
+#endif /* INET */
 
-static int
-mask_match(const struct encaptab *ep, const struct sockaddr *sp,
-    const struct sockaddr *dp)
+#ifdef INET6
+const struct encaptab *
+ip6_encap_attach(const struct encap_config *cfg, void *arg, int mflags)
 {
-	struct sockaddr_storage s;
-	struct sockaddr_storage d;
-	int i;
-	const u_int8_t *p, *q;
-	u_int8_t *r;
-	int matchlen;
-
-	if (sp->sa_len > sizeof(s) || dp->sa_len > sizeof(d))
-		return 0;
-	if (sp->sa_family != ep->af || dp->sa_family != ep->af)
-		return 0;
-	if (sp->sa_len != ep->src.ss_len || dp->sa_len != ep->dst.ss_len)
-		return 0;
-
-	matchlen = 0;
-
-	p = (const u_int8_t *)sp;
-	q = (const u_int8_t *)&ep->srcmask;
-	r = (u_int8_t *)&s;
-	for (i = 0 ; i < sp->sa_len; i++) {
-		r[i] = p[i] & q[i];
-		/* XXX estimate */
-		matchlen += (q[i] ? 8 : 0);
-	}
 
-	p = (const u_int8_t *)dp;
-	q = (const u_int8_t *)&ep->dstmask;
-	r = (u_int8_t *)&d;
-	for (i = 0 ; i < dp->sa_len; i++) {
-		r[i] = p[i] & q[i];
-		/* XXX rough estimate */
-		matchlen += (q[i] ? 8 : 0);
-	}
-
-	/* need to overwrite len/family portion as we don't compare them */
-	s.ss_len = sp->sa_len;
-	s.ss_family = sp->sa_family;
-	d.ss_len = dp->sa_len;
-	d.ss_family = dp->sa_family;
-
-	if (bcmp(&s, &ep->src, ep->src.ss_len) == 0 &&
-	    bcmp(&d, &ep->dst, ep->dst.ss_len) == 0) {
-		return matchlen;
-	} else
-		return 0;
+	return (encap_attach(&ipv6_encaptab, cfg, arg, mflags));
 }
 
-static void
-encap_fillarg(struct mbuf *m, void *arg)
+int
+ip6_encap_detach(const struct encaptab *cookie)
 {
-	struct m_tag *tag;
 
-	if (arg != NULL) {
-		tag = m_tag_get(PACKET_TAG_ENCAP, sizeof(void *), M_NOWAIT);
-		if (tag != NULL) {
-			*(void**)(tag+1) = arg;
-			m_tag_prepend(m, tag);
-		}
-	}
+	return (encap_detach(&ipv6_encaptab, cookie));
 }
 
-void *
-encap_getarg(struct mbuf *m)
+int
+encap6_input(struct mbuf **mp, int *offp, int proto)
 {
-	void *p = NULL;
-	struct m_tag *tag;
 
-	tag = m_tag_find(m, PACKET_TAG_ENCAP, NULL);
-	if (tag) {
-		p = *(void**)(tag+1);
-		m_tag_delete(m, tag);
-	}
-	return p;
+	if (encap_input(&ipv6_encaptab, *mp, *offp, proto) != IPPROTO_DONE)
+		return (rip6_input(mp, offp, proto));
+	return (IPPROTO_DONE);
 }
+#endif /* INET6 */
diff --git a/sys/netinet/ip_encap.h b/sys/netinet/ip_encap.h
index ef232189f398..f3d1d3afcab8 100644
--- a/sys/netinet/ip_encap.h
+++ b/sys/netinet/ip_encap.h
@@ -1,65 +1,70 @@
 /*	$FreeBSD$	*/
 /*	$KAME: ip_encap.h,v 1.7 2000/03/25 07:23:37 sumikawa Exp $	*/
 
 /*-
  * SPDX-License-Identifier: BSD-3-Clause
  *
  * Copyright (C) 1995, 1996, 1997, and 1998 WIDE Project.
+ * Copyright (c) 2018 Andrey V. Elsukov <ae@FreeBSD.org>
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 3. Neither the name of the project nor the names of its contributors
  *    may be used to endorse or promote products derived from this software
  *    without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE PROJECT AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE PROJECT OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  */
 
 #ifndef _NETINET_IP_ENCAP_H_
 #define _NETINET_IP_ENCAP_H_
 
 #ifdef _KERNEL
 
-struct encaptab {
-	LIST_ENTRY(encaptab) chain;
-	int af;
-	int proto;			/* -1: don't care, I'll check myself */
-	struct sockaddr_storage src;	/* my addr */
-	struct sockaddr_storage srcmask;
-	struct sockaddr_storage dst;	/* remote addr */
-	struct sockaddr_storage dstmask;
-	int (*func)(const struct mbuf *, int, int, void *);
-	const struct protosw *psw;	/* only pr_input will be used */
-	void *arg;			/* passed via m->m_pkthdr.aux */
-};
-
 int	encap4_input(struct mbuf **, int *, int);
 int	encap6_input(struct mbuf **, int *, int);
-const struct encaptab *encap_attach(int, int, const struct sockaddr *,
-	const struct sockaddr *, const struct sockaddr *,
-	const struct sockaddr *, const struct protosw *, void *);
-const struct encaptab *encap_attach_func(int, int,
-	int (*)(const struct mbuf *, int, int, void *),
-	const struct protosw *, void *);
-int	encap_detach(const struct encaptab *);
-void	*encap_getarg(struct mbuf *);
+
+typedef int (*encap_lookup_t)(const struct mbuf *, int, int, void **);
+typedef int (*encap_check_t)(const struct mbuf *, int, int, void *);
+typedef int (*encap_input_t)(struct mbuf *, int , int, void *);
+
+struct encap_config {
+	int		proto;		/* protocol */
+	int		min_length;	/* minimum packet length */
+	int		exact_match;	/* a packet is exactly matched */
+#define	ENCAP_DRV_LOOKUP	0x7fffffff
+
+	encap_lookup_t	lookup;
+	encap_check_t	check;
+	encap_input_t	input;
+};
+
+struct encaptab;
+
+const struct encaptab *ip_encap_attach(const struct encap_config *,
+    void *arg, int mflags);
+const struct encaptab *ip6_encap_attach(const struct encap_config *,
+    void *arg, int mflags);
+
+int ip_encap_detach(const struct encaptab *);
+int ip6_encap_detach(const struct encaptab *);
 #endif
 
 #endif /*_NETINET_IP_ENCAP_H_*/
diff --git a/sys/netinet/ip_gre.c b/sys/netinet/ip_gre.c
index 5b865e07bc45..66c1ce895ab9 100644
--- a/sys/netinet/ip_gre.c
+++ b/sys/netinet/ip_gre.c
@@ -1,169 +1,154 @@
 /*-
  * SPDX-License-Identifier: BSD-2-Clause-NetBSD
  *
  * Copyright (c) 1998 The NetBSD Foundation, Inc.
  * Copyright (c) 2014 Andrey V. Elsukov <ae@FreeBSD.org>
  * All rights reserved.
  *
  * This code is derived from software contributed to The NetBSD Foundation
  * by Heiko W.Rupp <hwr@pilhuhn.de>
  *
  * IPv6-over-GRE contributed by Gert Doering <gert@greenie.muc.de>
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
  * POSSIBILITY OF SUCH DAMAGE.
  *
  * $NetBSD: ip_gre.c,v 1.29 2003/09/05 23:02:43 itojun Exp $
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include "opt_inet.h"
 #include "opt_inet6.h"
 
 #include <sys/param.h>
 #include <sys/systm.h>
 #include <sys/mbuf.h>
 #include <sys/socket.h>
 #include <sys/socketvar.h>
-#include <sys/protosw.h>
 #include <sys/errno.h>
 #include <sys/time.h>
 #include <sys/kernel.h>
 #include <sys/lock.h>
 #include <sys/rmlock.h>
 #include <sys/sysctl.h>
 #include <net/ethernet.h>
 #include <net/if.h>
 #include <net/if_var.h>
 #include <net/vnet.h>
 
 #include <netinet/in.h>
 #include <netinet/in_var.h>
 #include <netinet/ip.h>
 #include <netinet/ip_encap.h>
 #include <netinet/ip_var.h>
 
 #ifdef INET6
 #include <netinet/ip6.h>
 #endif
 
 #include <net/if_gre.h>
 
-extern struct domain inetdomain;
-static const struct protosw in_gre_protosw = {
-	.pr_type =		SOCK_RAW,
-	.pr_domain =		&inetdomain,
-	.pr_protocol =		IPPROTO_GRE,
-	.pr_flags =		PR_ATOMIC|PR_ADDR,
-	.pr_input =		gre_input,
-	.pr_output =		rip_output,
-	.pr_ctlinput =		rip_ctlinput,
-	.pr_ctloutput =		rip_ctloutput,
-	.pr_usrreqs =		&rip_usrreqs
-};
-
 #define	GRE_TTL			30
 VNET_DEFINE(int, ip_gre_ttl) = GRE_TTL;
 #define	V_ip_gre_ttl		VNET(ip_gre_ttl)
 SYSCTL_INT(_net_inet_ip, OID_AUTO, grettl, CTLFLAG_VNET | CTLFLAG_RW,
-	&VNET_NAME(ip_gre_ttl), 0, "");
+    &VNET_NAME(ip_gre_ttl), 0, "Default TTL value for encapsulated packets");
 
 static int
 in_gre_encapcheck(const struct mbuf *m, int off, int proto, void *arg)
 {
 	GRE_RLOCK_TRACKER;
 	struct gre_softc *sc;
 	struct ip *ip;
 
 	sc = (struct gre_softc *)arg;
 	if ((GRE2IFP(sc)->if_flags & IFF_UP) == 0)
 		return (0);
 
 	M_ASSERTPKTHDR(m);
-	/*
-	 * We expect that payload contains at least IPv4
-	 * or IPv6 packet.
-	 */
-	if (m->m_pkthdr.len < sizeof(struct greip) + sizeof(struct ip))
-		return (0);
 
 	GRE_RLOCK(sc);
 	if (sc->gre_family == 0)
 		goto bad;
 
 	KASSERT(sc->gre_family == AF_INET,
 	    ("wrong gre_family: %d", sc->gre_family));
 
 	ip = mtod(m, struct ip *);
 	if (sc->gre_oip.ip_src.s_addr != ip->ip_dst.s_addr ||
 	    sc->gre_oip.ip_dst.s_addr != ip->ip_src.s_addr)
 		goto bad;
 
 	GRE_RUNLOCK(sc);
-	return (32 * 2);
+	return (32 * 3); /* src + dst + gre_hdr */
 bad:
 	GRE_RUNLOCK(sc);
 	return (0);
 }
 
 int
 in_gre_output(struct mbuf *m, int af, int hlen)
 {
 	struct greip *gi;
 
 	gi = mtod(m, struct greip *);
 	switch (af) {
 	case AF_INET:
 		/*
 		 * gre_transmit() has used M_PREPEND() that doesn't guarantee
 		 * m_data is contiguous more than hlen bytes. Use m_copydata()
 		 * here to avoid m_pullup().
 		 */
 		m_copydata(m, hlen + offsetof(struct ip, ip_tos),
 		    sizeof(u_char), &gi->gi_ip.ip_tos);
 		m_copydata(m, hlen + offsetof(struct ip, ip_id),
 		    sizeof(u_short), (caddr_t)&gi->gi_ip.ip_id);
 		break;
 #ifdef INET6
 	case AF_INET6:
 		gi->gi_ip.ip_tos = 0; /* XXX */
 		ip_fillid(&gi->gi_ip);
 		break;
 #endif
 	}
 	gi->gi_ip.ip_ttl = V_ip_gre_ttl;
 	gi->gi_ip.ip_len = htons(m->m_pkthdr.len);
 	return (ip_output(m, NULL, NULL, IP_FORWARDING, NULL, NULL));
 }
 
+static const struct encap_config ipv4_encap_cfg = {
+	.proto = IPPROTO_GRE,
+	.min_length = sizeof(struct greip) + sizeof(struct ip),
+	.exact_match = (sizeof(in_addr_t) << 4) + 32,
+	.check = in_gre_encapcheck,
+	.input = gre_input
+};
+
 int
 in_gre_attach(struct gre_softc *sc)
 {
 
 	KASSERT(sc->gre_ecookie == NULL, ("gre_ecookie isn't NULL"));
-	sc->gre_ecookie = encap_attach_func(AF_INET, IPPROTO_GRE,
-	    in_gre_encapcheck, &in_gre_protosw, sc);
-	if (sc->gre_ecookie == NULL)
-		return (EEXIST);
+	sc->gre_ecookie = ip_encap_attach(&ipv4_encap_cfg, sc, M_WAITOK);
 	return (0);
 }
diff --git a/sys/netinet/ip_mroute.c b/sys/netinet/ip_mroute.c
index fbf298fa54a0..ee747baa8bcc 100644
--- a/sys/netinet/ip_mroute.c
+++ b/sys/netinet/ip_mroute.c
@@ -1,2954 +1,2940 @@
 /*-
  * SPDX-License-Identifier: BSD-3-Clause
  *
  * Copyright (c) 1989 Stephen Deering
  * Copyright (c) 1992, 1993
  *      The Regents of the University of California.  All rights reserved.
  *
  * This code is derived from software contributed to Berkeley by
  * Stephen Deering of Stanford University.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 3. Neither the name of the University nor the names of its contributors
  *    may be used to endorse or promote products derived from this software
  *    without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  *      @(#)ip_mroute.c 8.2 (Berkeley) 11/15/93
  */
 
 /*
  * IP multicast forwarding procedures
  *
  * Written by David Waitzman, BBN Labs, August 1988.
  * Modified by Steve Deering, Stanford, February 1989.
  * Modified by Mark J. Steiglitz, Stanford, May, 1991
  * Modified by Van Jacobson, LBL, January 1993
  * Modified by Ajit Thyagarajan, PARC, August 1993
  * Modified by Bill Fenner, PARC, April 1995
  * Modified by Ahmed Helmy, SGI, June 1996
  * Modified by George Edmond Eddy (Rusty), ISI, February 1998
  * Modified by Pavlin Radoslavov, USC/ISI, May 1998, August 1999, October 2000
  * Modified by Hitoshi Asaeda, WIDE, August 2000
  * Modified by Pavlin Radoslavov, ICSI, October 2002
  *
  * MROUTING Revision: 3.5
  * and PIM-SMv2 and PIM-DM support, advanced API support,
  * bandwidth metering and signaling
  */
 
 /*
  * TODO: Prefix functions with ipmf_.
  * TODO: Maintain a refcount on if_allmulti() in ifnet or in the protocol
  * domain attachment (if_afdata) so we can track consumers of that service.
  * TODO: Deprecate routing socket path for SIOCGETSGCNT and SIOCGETVIFCNT,
  * move it to socket options.
  * TODO: Cleanup LSRR removal further.
  * TODO: Push RSVP stubs into raw_ip.c.
  * TODO: Use bitstring.h for vif set.
  * TODO: Fix mrt6_ioctl dangling ref when dynamically loaded.
  * TODO: Sync ip6_mroute.c with this file.
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include "opt_inet.h"
 #include "opt_mrouting.h"
 
 #define _PIM_VT 1
 
 #include <sys/param.h>
 #include <sys/kernel.h>
 #include <sys/stddef.h>
 #include <sys/eventhandler.h>
 #include <sys/lock.h>
 #include <sys/ktr.h>
 #include <sys/malloc.h>
 #include <sys/mbuf.h>
 #include <sys/module.h>
 #include <sys/priv.h>
 #include <sys/protosw.h>
 #include <sys/signalvar.h>
 #include <sys/socket.h>
 #include <sys/socketvar.h>
 #include <sys/sockio.h>
 #include <sys/sx.h>
 #include <sys/sysctl.h>
 #include <sys/syslog.h>
 #include <sys/systm.h>
 #include <sys/time.h>
 #include <sys/counter.h>
 
 #include <net/if.h>
 #include <net/if_var.h>
 #include <net/netisr.h>
 #include <net/route.h>
 #include <net/vnet.h>
 
 #include <netinet/in.h>
 #include <netinet/igmp.h>
 #include <netinet/in_systm.h>
 #include <netinet/in_var.h>
 #include <netinet/ip.h>
 #include <netinet/ip_encap.h>
 #include <netinet/ip_mroute.h>
 #include <netinet/ip_var.h>
 #include <netinet/ip_options.h>
 #include <netinet/pim.h>
 #include <netinet/pim_var.h>
 #include <netinet/udp.h>
 
 #include <machine/in_cksum.h>
 
 #ifndef KTR_IPMF
 #define KTR_IPMF KTR_INET
 #endif
 
 #define		VIFI_INVALID	((vifi_t) -1)
 
 static VNET_DEFINE(uint32_t, last_tv_sec); /* last time we processed this */
 #define	V_last_tv_sec	VNET(last_tv_sec)
 
 static MALLOC_DEFINE(M_MRTABLE, "mroutetbl", "multicast forwarding cache");
 
 /*
  * Locking.  We use two locks: one for the virtual interface table and
  * one for the forwarding table.  These locks may be nested in which case
  * the VIF lock must always be taken first.  Note that each lock is used
  * to cover not only the specific data structure but also related data
  * structures.
  */
 
 static struct mtx mrouter_mtx;
 #define	MROUTER_LOCK()		mtx_lock(&mrouter_mtx)
 #define	MROUTER_UNLOCK()	mtx_unlock(&mrouter_mtx)
 #define	MROUTER_LOCK_ASSERT()	mtx_assert(&mrouter_mtx, MA_OWNED)
 #define	MROUTER_LOCK_INIT()						\
 	mtx_init(&mrouter_mtx, "IPv4 multicast forwarding", NULL, MTX_DEF)
 #define	MROUTER_LOCK_DESTROY()	mtx_destroy(&mrouter_mtx)
 
 static int ip_mrouter_cnt;	/* # of vnets with active mrouters */
 static int ip_mrouter_unloading; /* Allow no more V_ip_mrouter sockets */
 
 static VNET_PCPUSTAT_DEFINE(struct mrtstat, mrtstat);
 VNET_PCPUSTAT_SYSINIT(mrtstat);
 VNET_PCPUSTAT_SYSUNINIT(mrtstat);
 SYSCTL_VNET_PCPUSTAT(_net_inet_ip, OID_AUTO, mrtstat, struct mrtstat,
     mrtstat, "IPv4 Multicast Forwarding Statistics (struct mrtstat, "
     "netinet/ip_mroute.h)");
 
 static VNET_DEFINE(u_long, mfchash);
 #define	V_mfchash		VNET(mfchash)
 #define	MFCHASH(a, g)							\
 	((((a).s_addr >> 20) ^ ((a).s_addr >> 10) ^ (a).s_addr ^ \
 	  ((g).s_addr >> 20) ^ ((g).s_addr >> 10) ^ (g).s_addr) & V_mfchash)
 #define	MFCHASHSIZE	256
 
 static u_long mfchashsize;			/* Hash size */
 static VNET_DEFINE(u_char *, nexpire);		/* 0..mfchashsize-1 */
 #define	V_nexpire		VNET(nexpire)
 static VNET_DEFINE(LIST_HEAD(mfchashhdr, mfc)*, mfchashtbl);
 #define	V_mfchashtbl		VNET(mfchashtbl)
 
 static struct mtx mfc_mtx;
 #define	MFC_LOCK()		mtx_lock(&mfc_mtx)
 #define	MFC_UNLOCK()		mtx_unlock(&mfc_mtx)
 #define	MFC_LOCK_ASSERT()	mtx_assert(&mfc_mtx, MA_OWNED)
 #define	MFC_LOCK_INIT()							\
 	mtx_init(&mfc_mtx, "IPv4 multicast forwarding cache", NULL, MTX_DEF)
 #define	MFC_LOCK_DESTROY()	mtx_destroy(&mfc_mtx)
 
 static VNET_DEFINE(vifi_t, numvifs);
 #define	V_numvifs		VNET(numvifs)
 static VNET_DEFINE(struct vif, viftable[MAXVIFS]);
 #define	V_viftable		VNET(viftable)
 SYSCTL_OPAQUE(_net_inet_ip, OID_AUTO, viftable, CTLFLAG_VNET | CTLFLAG_RD,
     &VNET_NAME(viftable), sizeof(V_viftable), "S,vif[MAXVIFS]",
     "IPv4 Multicast Interfaces (struct vif[MAXVIFS], netinet/ip_mroute.h)");
 
 static struct mtx vif_mtx;
 #define	VIF_LOCK()		mtx_lock(&vif_mtx)
 #define	VIF_UNLOCK()		mtx_unlock(&vif_mtx)
 #define	VIF_LOCK_ASSERT()	mtx_assert(&vif_mtx, MA_OWNED)
 #define	VIF_LOCK_INIT()							\
 	mtx_init(&vif_mtx, "IPv4 multicast interfaces", NULL, MTX_DEF)
 #define	VIF_LOCK_DESTROY()	mtx_destroy(&vif_mtx)
 
 static eventhandler_tag if_detach_event_tag = NULL;
 
 static VNET_DEFINE(struct callout, expire_upcalls_ch);
 #define	V_expire_upcalls_ch	VNET(expire_upcalls_ch)
 
 #define		EXPIRE_TIMEOUT	(hz / 4)	/* 4x / second		*/
 #define		UPCALL_EXPIRE	6		/* number of timeouts	*/
 
 /*
  * Bandwidth meter variables and constants
  */
 static MALLOC_DEFINE(M_BWMETER, "bwmeter", "multicast upcall bw meters");
 /*
  * Pending timeouts are stored in a hash table, the key being the
  * expiration time. Periodically, the entries are analysed and processed.
  */
 #define	BW_METER_BUCKETS	1024
 static VNET_DEFINE(struct bw_meter*, bw_meter_timers[BW_METER_BUCKETS]);
 #define	V_bw_meter_timers	VNET(bw_meter_timers)
 static VNET_DEFINE(struct callout, bw_meter_ch);
 #define	V_bw_meter_ch		VNET(bw_meter_ch)
 #define	BW_METER_PERIOD (hz)		/* periodical handling of bw meters */
 
 /*
  * Pending upcalls are stored in a vector which is flushed when
  * full, or periodically
  */
 static VNET_DEFINE(struct bw_upcall, bw_upcalls[BW_UPCALLS_MAX]);
 #define	V_bw_upcalls		VNET(bw_upcalls)
 static VNET_DEFINE(u_int, bw_upcalls_n); /* # of pending upcalls */
 #define	V_bw_upcalls_n    	VNET(bw_upcalls_n)
 static VNET_DEFINE(struct callout, bw_upcalls_ch);
 #define	V_bw_upcalls_ch		VNET(bw_upcalls_ch)
 
 #define BW_UPCALLS_PERIOD (hz)		/* periodical flush of bw upcalls */
 
 static VNET_PCPUSTAT_DEFINE(struct pimstat, pimstat);
 VNET_PCPUSTAT_SYSINIT(pimstat);
 VNET_PCPUSTAT_SYSUNINIT(pimstat);
 
 SYSCTL_NODE(_net_inet, IPPROTO_PIM, pim, CTLFLAG_RW, 0, "PIM");
 SYSCTL_VNET_PCPUSTAT(_net_inet_pim, PIMCTL_STATS, stats, struct pimstat,
     pimstat, "PIM Statistics (struct pimstat, netinet/pim_var.h)");
 
 static u_long	pim_squelch_wholepkt = 0;
 SYSCTL_ULONG(_net_inet_pim, OID_AUTO, squelch_wholepkt, CTLFLAG_RW,
     &pim_squelch_wholepkt, 0,
     "Disable IGMP_WHOLEPKT notifications if rendezvous point is unspecified");
 
-extern  struct domain inetdomain;
-static const struct protosw in_pim_protosw = {
-	.pr_type =		SOCK_RAW,
-	.pr_domain =		&inetdomain,
-	.pr_protocol =		IPPROTO_PIM,
-	.pr_flags =		PR_ATOMIC|PR_ADDR|PR_LASTHDR,
-	.pr_input =		pim_input,
-	.pr_output =		rip_output,
-	.pr_ctloutput =		rip_ctloutput,
-	.pr_usrreqs =		&rip_usrreqs
-};
 static const struct encaptab *pim_encap_cookie;
-
 static int pim_encapcheck(const struct mbuf *, int, int, void *);
+static int pim_input(struct mbuf *, int, int, void *);
+
+static const struct encap_config ipv4_encap_cfg = {
+	.proto = IPPROTO_PIM,
+	.min_length = sizeof(struct ip) + PIM_MINLEN,
+	.exact_match = 8,
+	.check = pim_encapcheck,
+	.input = pim_input
+};
 
 /*
  * Note: the PIM Register encapsulation adds the following in front of a
  * data packet:
  *
  * struct pim_encap_hdr {
  *    struct ip ip;
  *    struct pim_encap_pimhdr  pim;
  * }
  *
  */
 
 struct pim_encap_pimhdr {
 	struct pim pim;
 	uint32_t   flags;
 };
 #define		PIM_ENCAP_TTL	64
 
 static struct ip pim_encap_iphdr = {
 #if BYTE_ORDER == LITTLE_ENDIAN
 	sizeof(struct ip) >> 2,
 	IPVERSION,
 #else
 	IPVERSION,
 	sizeof(struct ip) >> 2,
 #endif
 	0,			/* tos */
 	sizeof(struct ip),	/* total length */
 	0,			/* id */
 	0,			/* frag offset */
 	PIM_ENCAP_TTL,
 	IPPROTO_PIM,
 	0,			/* checksum */
 };
 
 static struct pim_encap_pimhdr pim_encap_pimhdr = {
     {
 	PIM_MAKE_VT(PIM_VERSION, PIM_REGISTER), /* PIM vers and message type */
 	0,			/* reserved */
 	0,			/* checksum */
     },
     0				/* flags */
 };
 
 static VNET_DEFINE(vifi_t, reg_vif_num) = VIFI_INVALID;
 #define	V_reg_vif_num		VNET(reg_vif_num)
 static VNET_DEFINE(struct ifnet, multicast_register_if);
 #define	V_multicast_register_if	VNET(multicast_register_if)
 
 /*
  * Private variables.
  */
 
 static u_long	X_ip_mcast_src(int);
 static int	X_ip_mforward(struct ip *, struct ifnet *, struct mbuf *,
 		    struct ip_moptions *);
 static int	X_ip_mrouter_done(void);
 static int	X_ip_mrouter_get(struct socket *, struct sockopt *);
 static int	X_ip_mrouter_set(struct socket *, struct sockopt *);
 static int	X_legal_vif_num(int);
 static int	X_mrt_ioctl(u_long, caddr_t, int);
 
 static int	add_bw_upcall(struct bw_upcall *);
 static int	add_mfc(struct mfcctl2 *);
 static int	add_vif(struct vifctl *);
 static void	bw_meter_prepare_upcall(struct bw_meter *, struct timeval *);
 static void	bw_meter_process(void);
 static void	bw_meter_receive_packet(struct bw_meter *, int,
 		    struct timeval *);
 static void	bw_upcalls_send(void);
 static int	del_bw_upcall(struct bw_upcall *);
 static int	del_mfc(struct mfcctl2 *);
 static int	del_vif(vifi_t);
 static int	del_vif_locked(vifi_t);
 static void	expire_bw_meter_process(void *);
 static void	expire_bw_upcalls_send(void *);
 static void	expire_mfc(struct mfc *);
 static void	expire_upcalls(void *);
 static void	free_bw_list(struct bw_meter *);
 static int	get_sg_cnt(struct sioc_sg_req *);
 static int	get_vif_cnt(struct sioc_vif_req *);
 static void	if_detached_event(void *, struct ifnet *);
 static int	ip_mdq(struct mbuf *, struct ifnet *, struct mfc *, vifi_t);
 static int	ip_mrouter_init(struct socket *, int);
 static __inline struct mfc *
 		mfc_find(struct in_addr *, struct in_addr *);
 static void	phyint_send(struct ip *, struct vif *, struct mbuf *);
 static struct mbuf *
 		pim_register_prepare(struct ip *, struct mbuf *);
 static int	pim_register_send(struct ip *, struct vif *,
 		    struct mbuf *, struct mfc *);
 static int	pim_register_send_rp(struct ip *, struct vif *,
 		    struct mbuf *, struct mfc *);
 static int	pim_register_send_upcall(struct ip *, struct vif *,
 		    struct mbuf *, struct mfc *);
 static void	schedule_bw_meter(struct bw_meter *, struct timeval *);
 static void	send_packet(struct vif *, struct mbuf *);
 static int	set_api_config(uint32_t *);
 static int	set_assert(int);
 static int	socket_send(struct socket *, struct mbuf *,
 		    struct sockaddr_in *);
 static void	unschedule_bw_meter(struct bw_meter *);
 
 /*
  * Kernel multicast forwarding API capabilities and setup.
  * If more API capabilities are added to the kernel, they should be
  * recorded in `mrt_api_support'.
  */
 #define MRT_API_VERSION		0x0305
 
 static const int mrt_api_version = MRT_API_VERSION;
 static const uint32_t mrt_api_support = (MRT_MFC_FLAGS_DISABLE_WRONGVIF |
 					 MRT_MFC_FLAGS_BORDER_VIF |
 					 MRT_MFC_RP |
 					 MRT_MFC_BW_UPCALL);
 static VNET_DEFINE(uint32_t, mrt_api_config);
 #define	V_mrt_api_config	VNET(mrt_api_config)
 static VNET_DEFINE(int, pim_assert_enabled);
 #define	V_pim_assert_enabled	VNET(pim_assert_enabled)
 static struct timeval pim_assert_interval = { 3, 0 };	/* Rate limit */
 
 /*
  * Find a route for a given origin IP address and multicast group address.
  * Statistics must be updated by the caller.
  */
 static __inline struct mfc *
 mfc_find(struct in_addr *o, struct in_addr *g)
 {
 	struct mfc *rt;
 
 	MFC_LOCK_ASSERT();
 
 	LIST_FOREACH(rt, &V_mfchashtbl[MFCHASH(*o, *g)], mfc_hash) {
 		if (in_hosteq(rt->mfc_origin, *o) &&
 		    in_hosteq(rt->mfc_mcastgrp, *g) &&
 		    TAILQ_EMPTY(&rt->mfc_stall))
 			break;
 	}
 
 	return (rt);
 }
 
 /*
  * Handle MRT setsockopt commands to modify the multicast forwarding tables.
  */
 static int
 X_ip_mrouter_set(struct socket *so, struct sockopt *sopt)
 {
     int	error, optval;
     vifi_t	vifi;
     struct	vifctl vifc;
     struct	mfcctl2 mfc;
     struct	bw_upcall bw_upcall;
     uint32_t	i;
 
     if (so != V_ip_mrouter && sopt->sopt_name != MRT_INIT)
 	return EPERM;
 
     error = 0;
     switch (sopt->sopt_name) {
     case MRT_INIT:
 	error = sooptcopyin(sopt, &optval, sizeof optval, sizeof optval);
 	if (error)
 	    break;
 	error = ip_mrouter_init(so, optval);
 	break;
 
     case MRT_DONE:
 	error = ip_mrouter_done();
 	break;
 
     case MRT_ADD_VIF:
 	error = sooptcopyin(sopt, &vifc, sizeof vifc, sizeof vifc);
 	if (error)
 	    break;
 	error = add_vif(&vifc);
 	break;
 
     case MRT_DEL_VIF:
 	error = sooptcopyin(sopt, &vifi, sizeof vifi, sizeof vifi);
 	if (error)
 	    break;
 	error = del_vif(vifi);
 	break;
 
     case MRT_ADD_MFC:
     case MRT_DEL_MFC:
 	/*
 	 * select data size depending on API version.
 	 */
 	if (sopt->sopt_name == MRT_ADD_MFC &&
 		V_mrt_api_config & MRT_API_FLAGS_ALL) {
 	    error = sooptcopyin(sopt, &mfc, sizeof(struct mfcctl2),
 				sizeof(struct mfcctl2));
 	} else {
 	    error = sooptcopyin(sopt, &mfc, sizeof(struct mfcctl),
 				sizeof(struct mfcctl));
 	    bzero((caddr_t)&mfc + sizeof(struct mfcctl),
 			sizeof(mfc) - sizeof(struct mfcctl));
 	}
 	if (error)
 	    break;
 	if (sopt->sopt_name == MRT_ADD_MFC)
 	    error = add_mfc(&mfc);
 	else
 	    error = del_mfc(&mfc);
 	break;
 
     case MRT_ASSERT:
 	error = sooptcopyin(sopt, &optval, sizeof optval, sizeof optval);
 	if (error)
 	    break;
 	set_assert(optval);
 	break;
 
     case MRT_API_CONFIG:
 	error = sooptcopyin(sopt, &i, sizeof i, sizeof i);
 	if (!error)
 	    error = set_api_config(&i);
 	if (!error)
 	    error = sooptcopyout(sopt, &i, sizeof i);
 	break;
 
     case MRT_ADD_BW_UPCALL:
     case MRT_DEL_BW_UPCALL:
 	error = sooptcopyin(sopt, &bw_upcall, sizeof bw_upcall,
 				sizeof bw_upcall);
 	if (error)
 	    break;
 	if (sopt->sopt_name == MRT_ADD_BW_UPCALL)
 	    error = add_bw_upcall(&bw_upcall);
 	else
 	    error = del_bw_upcall(&bw_upcall);
 	break;
 
     default:
 	error = EOPNOTSUPP;
 	break;
     }
     return error;
 }
 
 /*
  * Handle MRT getsockopt commands
  */
 static int
 X_ip_mrouter_get(struct socket *so, struct sockopt *sopt)
 {
     int error;
 
     switch (sopt->sopt_name) {
     case MRT_VERSION:
 	error = sooptcopyout(sopt, &mrt_api_version, sizeof mrt_api_version);
 	break;
 
     case MRT_ASSERT:
 	error = sooptcopyout(sopt, &V_pim_assert_enabled,
 	    sizeof V_pim_assert_enabled);
 	break;
 
     case MRT_API_SUPPORT:
 	error = sooptcopyout(sopt, &mrt_api_support, sizeof mrt_api_support);
 	break;
 
     case MRT_API_CONFIG:
 	error = sooptcopyout(sopt, &V_mrt_api_config, sizeof V_mrt_api_config);
 	break;
 
     default:
 	error = EOPNOTSUPP;
 	break;
     }
     return error;
 }
 
 /*
  * Handle ioctl commands to obtain information from the cache
  */
 static int
 X_mrt_ioctl(u_long cmd, caddr_t data, int fibnum __unused)
 {
     int error = 0;
 
     /*
      * Currently the only function calling this ioctl routine is rtioctl_fib().
      * Typically, only root can create the raw socket in order to execute
      * this ioctl method, however the request might be coming from a prison
      */
     error = priv_check(curthread, PRIV_NETINET_MROUTE);
     if (error)
 	return (error);
     switch (cmd) {
     case (SIOCGETVIFCNT):
 	error = get_vif_cnt((struct sioc_vif_req *)data);
 	break;
 
     case (SIOCGETSGCNT):
 	error = get_sg_cnt((struct sioc_sg_req *)data);
 	break;
 
     default:
 	error = EINVAL;
 	break;
     }
     return error;
 }
 
 /*
  * returns the packet, byte, rpf-failure count for the source group provided
  */
 static int
 get_sg_cnt(struct sioc_sg_req *req)
 {
     struct mfc *rt;
 
     MFC_LOCK();
     rt = mfc_find(&req->src, &req->grp);
     if (rt == NULL) {
 	MFC_UNLOCK();
 	req->pktcnt = req->bytecnt = req->wrong_if = 0xffffffff;
 	return EADDRNOTAVAIL;
     }
     req->pktcnt = rt->mfc_pkt_cnt;
     req->bytecnt = rt->mfc_byte_cnt;
     req->wrong_if = rt->mfc_wrong_if;
     MFC_UNLOCK();
     return 0;
 }
 
 /*
  * returns the input and output packet and byte counts on the vif provided
  */
 static int
 get_vif_cnt(struct sioc_vif_req *req)
 {
     vifi_t vifi = req->vifi;
 
     VIF_LOCK();
     if (vifi >= V_numvifs) {
 	VIF_UNLOCK();
 	return EINVAL;
     }
 
     req->icount = V_viftable[vifi].v_pkt_in;
     req->ocount = V_viftable[vifi].v_pkt_out;
     req->ibytes = V_viftable[vifi].v_bytes_in;
     req->obytes = V_viftable[vifi].v_bytes_out;
     VIF_UNLOCK();
 
     return 0;
 }
 
 static void
 if_detached_event(void *arg __unused, struct ifnet *ifp)
 {
     vifi_t vifi;
     u_long i;
 
     MROUTER_LOCK();
 
     if (V_ip_mrouter == NULL) {
 	MROUTER_UNLOCK();
 	return;
     }
 
     VIF_LOCK();
     MFC_LOCK();
 
     /*
      * Tear down multicast forwarder state associated with this ifnet.
      * 1. Walk the vif list, matching vifs against this ifnet.
      * 2. Walk the multicast forwarding cache (mfc) looking for
      *    inner matches with this vif's index.
      * 3. Expire any matching multicast forwarding cache entries.
      * 4. Free vif state. This should disable ALLMULTI on the interface.
      */
     for (vifi = 0; vifi < V_numvifs; vifi++) {
 	if (V_viftable[vifi].v_ifp != ifp)
 		continue;
 	for (i = 0; i < mfchashsize; i++) {
 		struct mfc *rt, *nrt;
 
 		LIST_FOREACH_SAFE(rt, &V_mfchashtbl[i], mfc_hash, nrt) {
 			if (rt->mfc_parent == vifi) {
 				expire_mfc(rt);
 			}
 		}
 	}
 	del_vif_locked(vifi);
     }
 
     MFC_UNLOCK();
     VIF_UNLOCK();
 
     MROUTER_UNLOCK();
 }
                         
 /*
  * Enable multicast forwarding.
  */
 static int
 ip_mrouter_init(struct socket *so, int version)
 {
 
     CTR3(KTR_IPMF, "%s: so_type %d, pr_protocol %d", __func__,
         so->so_type, so->so_proto->pr_protocol);
 
     if (so->so_type != SOCK_RAW || so->so_proto->pr_protocol != IPPROTO_IGMP)
 	return EOPNOTSUPP;
 
     if (version != 1)
 	return ENOPROTOOPT;
 
     MROUTER_LOCK();
 
     if (ip_mrouter_unloading) {
 	MROUTER_UNLOCK();
 	return ENOPROTOOPT;
     }
 
     if (V_ip_mrouter != NULL) {
 	MROUTER_UNLOCK();
 	return EADDRINUSE;
     }
 
     V_mfchashtbl = hashinit_flags(mfchashsize, M_MRTABLE, &V_mfchash,
 	HASH_NOWAIT);
 
     callout_reset(&V_expire_upcalls_ch, EXPIRE_TIMEOUT, expire_upcalls,
 	curvnet);
     callout_reset(&V_bw_upcalls_ch, BW_UPCALLS_PERIOD, expire_bw_upcalls_send,
 	curvnet);
     callout_reset(&V_bw_meter_ch, BW_METER_PERIOD, expire_bw_meter_process,
 	curvnet);
 
     V_ip_mrouter = so;
     ip_mrouter_cnt++;
 
     MROUTER_UNLOCK();
 
     CTR1(KTR_IPMF, "%s: done", __func__);
 
     return 0;
 }
 
 /*
  * Disable multicast forwarding.
  */
 static int
 X_ip_mrouter_done(void)
 {
     struct ifnet *ifp;
     u_long i;
     vifi_t vifi;
 
     MROUTER_LOCK();
 
     if (V_ip_mrouter == NULL) {
 	MROUTER_UNLOCK();
 	return EINVAL;
     }
 
     /*
      * Detach/disable hooks to the reset of the system.
      */
     V_ip_mrouter = NULL;
     ip_mrouter_cnt--;
     V_mrt_api_config = 0;
 
     VIF_LOCK();
 
     /*
      * For each phyint in use, disable promiscuous reception of all IP
      * multicasts.
      */
     for (vifi = 0; vifi < V_numvifs; vifi++) {
 	if (!in_nullhost(V_viftable[vifi].v_lcl_addr) &&
 		!(V_viftable[vifi].v_flags & (VIFF_TUNNEL | VIFF_REGISTER))) {
 	    ifp = V_viftable[vifi].v_ifp;
 	    if_allmulti(ifp, 0);
 	}
     }
     bzero((caddr_t)V_viftable, sizeof(V_viftable));
     V_numvifs = 0;
     V_pim_assert_enabled = 0;
     
     VIF_UNLOCK();
 
     callout_stop(&V_expire_upcalls_ch);
     callout_stop(&V_bw_upcalls_ch);
     callout_stop(&V_bw_meter_ch);
 
     MFC_LOCK();
 
     /*
      * Free all multicast forwarding cache entries.
      * Do not use hashdestroy(), as we must perform other cleanup.
      */
     for (i = 0; i < mfchashsize; i++) {
 	struct mfc *rt, *nrt;
 
 	LIST_FOREACH_SAFE(rt, &V_mfchashtbl[i], mfc_hash, nrt) {
 		expire_mfc(rt);
 	}
     }
     free(V_mfchashtbl, M_MRTABLE);
     V_mfchashtbl = NULL;
 
     bzero(V_nexpire, sizeof(V_nexpire[0]) * mfchashsize);
 
     V_bw_upcalls_n = 0;
     bzero(V_bw_meter_timers, sizeof(V_bw_meter_timers));
 
     MFC_UNLOCK();
 
     V_reg_vif_num = VIFI_INVALID;
 
     MROUTER_UNLOCK();
 
     CTR1(KTR_IPMF, "%s: done", __func__);
 
     return 0;
 }
 
 /*
  * Set PIM assert processing global
  */
 static int
 set_assert(int i)
 {
     if ((i != 1) && (i != 0))
 	return EINVAL;
 
     V_pim_assert_enabled = i;
 
     return 0;
 }
 
 /*
  * Configure API capabilities
  */
 int
 set_api_config(uint32_t *apival)
 {
     u_long i;
 
     /*
      * We can set the API capabilities only if it is the first operation
      * after MRT_INIT. I.e.:
      *  - there are no vifs installed
      *  - pim_assert is not enabled
      *  - the MFC table is empty
      */
     if (V_numvifs > 0) {
 	*apival = 0;
 	return EPERM;
     }
     if (V_pim_assert_enabled) {
 	*apival = 0;
 	return EPERM;
     }
 
     MFC_LOCK();
 
     for (i = 0; i < mfchashsize; i++) {
 	if (LIST_FIRST(&V_mfchashtbl[i]) != NULL) {
 	    MFC_UNLOCK();
 	    *apival = 0;
 	    return EPERM;
 	}
     }
 
     MFC_UNLOCK();
 
     V_mrt_api_config = *apival & mrt_api_support;
     *apival = V_mrt_api_config;
 
     return 0;
 }
 
 /*
  * Add a vif to the vif table
  */
 static int
 add_vif(struct vifctl *vifcp)
 {
     struct vif *vifp = V_viftable + vifcp->vifc_vifi;
     struct sockaddr_in sin = {sizeof sin, AF_INET};
     struct ifaddr *ifa;
     struct ifnet *ifp;
     int error;
 
     VIF_LOCK();
     if (vifcp->vifc_vifi >= MAXVIFS) {
 	VIF_UNLOCK();
 	return EINVAL;
     }
     /* rate limiting is no longer supported by this code */
     if (vifcp->vifc_rate_limit != 0) {
 	log(LOG_ERR, "rate limiting is no longer supported\n");
 	VIF_UNLOCK();
 	return EINVAL;
     }
     if (!in_nullhost(vifp->v_lcl_addr)) {
 	VIF_UNLOCK();
 	return EADDRINUSE;
     }
     if (in_nullhost(vifcp->vifc_lcl_addr)) {
 	VIF_UNLOCK();
 	return EADDRNOTAVAIL;
     }
 
     /* Find the interface with an address in AF_INET family */
     if (vifcp->vifc_flags & VIFF_REGISTER) {
 	/*
 	 * XXX: Because VIFF_REGISTER does not really need a valid
 	 * local interface (e.g. it could be 127.0.0.2), we don't
 	 * check its address.
 	 */
 	ifp = NULL;
     } else {
 	sin.sin_addr = vifcp->vifc_lcl_addr;
 	NET_EPOCH_ENTER();
 	ifa = ifa_ifwithaddr((struct sockaddr *)&sin);
 	if (ifa == NULL) {
 		NET_EPOCH_EXIT();
 	    VIF_UNLOCK();
 	    return EADDRNOTAVAIL;
 	}
 	ifp = ifa->ifa_ifp;
 	NET_EPOCH_EXIT();
     }
 
     if ((vifcp->vifc_flags & VIFF_TUNNEL) != 0) {
 	CTR1(KTR_IPMF, "%s: tunnels are no longer supported", __func__);
 	VIF_UNLOCK();
 	return EOPNOTSUPP;
     } else if (vifcp->vifc_flags & VIFF_REGISTER) {
 	ifp = &V_multicast_register_if;
 	CTR2(KTR_IPMF, "%s: add register vif for ifp %p", __func__, ifp);
 	if (V_reg_vif_num == VIFI_INVALID) {
 	    if_initname(&V_multicast_register_if, "register_vif", 0);
 	    V_multicast_register_if.if_flags = IFF_LOOPBACK;
 	    V_reg_vif_num = vifcp->vifc_vifi;
 	}
     } else {		/* Make sure the interface supports multicast */
 	if ((ifp->if_flags & IFF_MULTICAST) == 0) {
 	    VIF_UNLOCK();
 	    return EOPNOTSUPP;
 	}
 
 	/* Enable promiscuous reception of all IP multicasts from the if */
 	error = if_allmulti(ifp, 1);
 	if (error) {
 	    VIF_UNLOCK();
 	    return error;
 	}
     }
 
     vifp->v_flags     = vifcp->vifc_flags;
     vifp->v_threshold = vifcp->vifc_threshold;
     vifp->v_lcl_addr  = vifcp->vifc_lcl_addr;
     vifp->v_rmt_addr  = vifcp->vifc_rmt_addr;
     vifp->v_ifp       = ifp;
     /* initialize per vif pkt counters */
     vifp->v_pkt_in    = 0;
     vifp->v_pkt_out   = 0;
     vifp->v_bytes_in  = 0;
     vifp->v_bytes_out = 0;
 
     /* Adjust numvifs up if the vifi is higher than numvifs */
     if (V_numvifs <= vifcp->vifc_vifi)
 	V_numvifs = vifcp->vifc_vifi + 1;
 
     VIF_UNLOCK();
 
     CTR4(KTR_IPMF, "%s: add vif %d laddr 0x%08x thresh %x", __func__,
 	(int)vifcp->vifc_vifi, ntohl(vifcp->vifc_lcl_addr.s_addr),
 	(int)vifcp->vifc_threshold);
 
     return 0;
 }
 
 /*
  * Delete a vif from the vif table
  */
 static int
 del_vif_locked(vifi_t vifi)
 {
     struct vif *vifp;
 
     VIF_LOCK_ASSERT();
 
     if (vifi >= V_numvifs) {
 	return EINVAL;
     }
     vifp = &V_viftable[vifi];
     if (in_nullhost(vifp->v_lcl_addr)) {
 	return EADDRNOTAVAIL;
     }
 
     if (!(vifp->v_flags & (VIFF_TUNNEL | VIFF_REGISTER)))
 	if_allmulti(vifp->v_ifp, 0);
 
     if (vifp->v_flags & VIFF_REGISTER)
 	V_reg_vif_num = VIFI_INVALID;
 
     bzero((caddr_t)vifp, sizeof (*vifp));
 
     CTR2(KTR_IPMF, "%s: delete vif %d", __func__, (int)vifi);
 
     /* Adjust numvifs down */
     for (vifi = V_numvifs; vifi > 0; vifi--)
 	if (!in_nullhost(V_viftable[vifi-1].v_lcl_addr))
 	    break;
     V_numvifs = vifi;
 
     return 0;
 }
 
 static int
 del_vif(vifi_t vifi)
 {
     int cc;
 
     VIF_LOCK();
     cc = del_vif_locked(vifi);
     VIF_UNLOCK();
 
     return cc;
 }
 
 /*
  * update an mfc entry without resetting counters and S,G addresses.
  */
 static void
 update_mfc_params(struct mfc *rt, struct mfcctl2 *mfccp)
 {
     int i;
 
     rt->mfc_parent = mfccp->mfcc_parent;
     for (i = 0; i < V_numvifs; i++) {
 	rt->mfc_ttls[i] = mfccp->mfcc_ttls[i];
 	rt->mfc_flags[i] = mfccp->mfcc_flags[i] & V_mrt_api_config &
 	    MRT_MFC_FLAGS_ALL;
     }
     /* set the RP address */
     if (V_mrt_api_config & MRT_MFC_RP)
 	rt->mfc_rp = mfccp->mfcc_rp;
     else
 	rt->mfc_rp.s_addr = INADDR_ANY;
 }
 
 /*
  * fully initialize an mfc entry from the parameter.
  */
 static void
 init_mfc_params(struct mfc *rt, struct mfcctl2 *mfccp)
 {
     rt->mfc_origin     = mfccp->mfcc_origin;
     rt->mfc_mcastgrp   = mfccp->mfcc_mcastgrp;
 
     update_mfc_params(rt, mfccp);
 
     /* initialize pkt counters per src-grp */
     rt->mfc_pkt_cnt    = 0;
     rt->mfc_byte_cnt   = 0;
     rt->mfc_wrong_if   = 0;
     timevalclear(&rt->mfc_last_assert);
 }
 
 static void
 expire_mfc(struct mfc *rt)
 {
 	struct rtdetq *rte, *nrte;
 
 	MFC_LOCK_ASSERT();
 
 	free_bw_list(rt->mfc_bw_meter);
 
 	TAILQ_FOREACH_SAFE(rte, &rt->mfc_stall, rte_link, nrte) {
 		m_freem(rte->m);
 		TAILQ_REMOVE(&rt->mfc_stall, rte, rte_link);
 		free(rte, M_MRTABLE);
 	}
 
 	LIST_REMOVE(rt, mfc_hash);
 	free(rt, M_MRTABLE);
 }
 
 /*
  * Add an mfc entry
  */
 static int
 add_mfc(struct mfcctl2 *mfccp)
 {
     struct mfc *rt;
     struct rtdetq *rte, *nrte;
     u_long hash = 0;
     u_short nstl;
 
     VIF_LOCK();
     MFC_LOCK();
 
     rt = mfc_find(&mfccp->mfcc_origin, &mfccp->mfcc_mcastgrp);
 
     /* If an entry already exists, just update the fields */
     if (rt) {
 	CTR4(KTR_IPMF, "%s: update mfc orig 0x%08x group %lx parent %x",
 	    __func__, ntohl(mfccp->mfcc_origin.s_addr),
 	    (u_long)ntohl(mfccp->mfcc_mcastgrp.s_addr),
 	    mfccp->mfcc_parent);
 	update_mfc_params(rt, mfccp);
 	MFC_UNLOCK();
 	VIF_UNLOCK();
 	return (0);
     }
 
     /*
      * Find the entry for which the upcall was made and update
      */
     nstl = 0;
     hash = MFCHASH(mfccp->mfcc_origin, mfccp->mfcc_mcastgrp);
     LIST_FOREACH(rt, &V_mfchashtbl[hash], mfc_hash) {
 	if (in_hosteq(rt->mfc_origin, mfccp->mfcc_origin) &&
 	    in_hosteq(rt->mfc_mcastgrp, mfccp->mfcc_mcastgrp) &&
 	    !TAILQ_EMPTY(&rt->mfc_stall)) {
 		CTR5(KTR_IPMF,
 		    "%s: add mfc orig 0x%08x group %lx parent %x qh %p",
 		    __func__, ntohl(mfccp->mfcc_origin.s_addr),
 		    (u_long)ntohl(mfccp->mfcc_mcastgrp.s_addr),
 		    mfccp->mfcc_parent,
 		    TAILQ_FIRST(&rt->mfc_stall));
 		if (nstl++)
 			CTR1(KTR_IPMF, "%s: multiple matches", __func__);
 
 		init_mfc_params(rt, mfccp);
 		rt->mfc_expire = 0;	/* Don't clean this guy up */
 		V_nexpire[hash]--;
 
 		/* Free queued packets, but attempt to forward them first. */
 		TAILQ_FOREACH_SAFE(rte, &rt->mfc_stall, rte_link, nrte) {
 			if (rte->ifp != NULL)
 				ip_mdq(rte->m, rte->ifp, rt, -1);
 			m_freem(rte->m);
 			TAILQ_REMOVE(&rt->mfc_stall, rte, rte_link);
 			rt->mfc_nstall--;
 			free(rte, M_MRTABLE);
 		}
 	}
     }
 
     /*
      * It is possible that an entry is being inserted without an upcall
      */
     if (nstl == 0) {
 	CTR1(KTR_IPMF, "%s: adding mfc w/o upcall", __func__);
 	LIST_FOREACH(rt, &V_mfchashtbl[hash], mfc_hash) {
 		if (in_hosteq(rt->mfc_origin, mfccp->mfcc_origin) &&
 		    in_hosteq(rt->mfc_mcastgrp, mfccp->mfcc_mcastgrp)) {
 			init_mfc_params(rt, mfccp);
 			if (rt->mfc_expire)
 			    V_nexpire[hash]--;
 			rt->mfc_expire = 0;
 			break; /* XXX */
 		}
 	}
 
 	if (rt == NULL) {		/* no upcall, so make a new entry */
 	    rt = (struct mfc *)malloc(sizeof(*rt), M_MRTABLE, M_NOWAIT);
 	    if (rt == NULL) {
 		MFC_UNLOCK();
 		VIF_UNLOCK();
 		return (ENOBUFS);
 	    }
 
 	    init_mfc_params(rt, mfccp);
 	    TAILQ_INIT(&rt->mfc_stall);
 	    rt->mfc_nstall = 0;
 
 	    rt->mfc_expire     = 0;
 	    rt->mfc_bw_meter = NULL;
 
 	    /* insert new entry at head of hash chain */
 	    LIST_INSERT_HEAD(&V_mfchashtbl[hash], rt, mfc_hash);
 	}
     }
 
     MFC_UNLOCK();
     VIF_UNLOCK();
 
     return (0);
 }
 
 /*
  * Delete an mfc entry
  */
 static int
 del_mfc(struct mfcctl2 *mfccp)
 {
     struct in_addr	origin;
     struct in_addr	mcastgrp;
     struct mfc		*rt;
 
     origin = mfccp->mfcc_origin;
     mcastgrp = mfccp->mfcc_mcastgrp;
 
     CTR3(KTR_IPMF, "%s: delete mfc orig 0x%08x group %lx", __func__,
 	ntohl(origin.s_addr), (u_long)ntohl(mcastgrp.s_addr));
 
     MFC_LOCK();
 
     rt = mfc_find(&origin, &mcastgrp);
     if (rt == NULL) {
 	MFC_UNLOCK();
 	return EADDRNOTAVAIL;
     }
 
     /*
      * free the bw_meter entries
      */
     free_bw_list(rt->mfc_bw_meter);
     rt->mfc_bw_meter = NULL;
 
     LIST_REMOVE(rt, mfc_hash);
     free(rt, M_MRTABLE);
 
     MFC_UNLOCK();
 
     return (0);
 }
 
 /*
  * Send a message to the routing daemon on the multicast routing socket.
  */
 static int
 socket_send(struct socket *s, struct mbuf *mm, struct sockaddr_in *src)
 {
     if (s) {
 	SOCKBUF_LOCK(&s->so_rcv);
 	if (sbappendaddr_locked(&s->so_rcv, (struct sockaddr *)src, mm,
 	    NULL) != 0) {
 	    sorwakeup_locked(s);
 	    return 0;
 	}
 	SOCKBUF_UNLOCK(&s->so_rcv);
     }
     m_freem(mm);
     return -1;
 }
 
 /*
  * IP multicast forwarding function. This function assumes that the packet
  * pointed to by "ip" has arrived on (or is about to be sent to) the interface
  * pointed to by "ifp", and the packet is to be relayed to other networks
  * that have members of the packet's destination IP multicast group.
  *
  * The packet is returned unscathed to the caller, unless it is
  * erroneous, in which case a non-zero return value tells the caller to
  * discard it.
  */
 
 #define TUNNEL_LEN  12  /* # bytes of IP option for tunnel encapsulation  */
 
 static int
 X_ip_mforward(struct ip *ip, struct ifnet *ifp, struct mbuf *m,
     struct ip_moptions *imo)
 {
     struct mfc *rt;
     int error;
     vifi_t vifi;
 
     CTR3(KTR_IPMF, "ip_mforward: delete mfc orig 0x%08x group %lx ifp %p",
 	ntohl(ip->ip_src.s_addr), (u_long)ntohl(ip->ip_dst.s_addr), ifp);
 
     if (ip->ip_hl < (sizeof(struct ip) + TUNNEL_LEN) >> 2 ||
 		((u_char *)(ip + 1))[1] != IPOPT_LSRR ) {
 	/*
 	 * Packet arrived via a physical interface or
 	 * an encapsulated tunnel or a register_vif.
 	 */
     } else {
 	/*
 	 * Packet arrived through a source-route tunnel.
 	 * Source-route tunnels are no longer supported.
 	 */
 	return (1);
     }
 
     VIF_LOCK();
     MFC_LOCK();
     if (imo && ((vifi = imo->imo_multicast_vif) < V_numvifs)) {
 	if (ip->ip_ttl < MAXTTL)
 	    ip->ip_ttl++;	/* compensate for -1 in *_send routines */
 	error = ip_mdq(m, ifp, NULL, vifi);
 	MFC_UNLOCK();
 	VIF_UNLOCK();
 	return error;
     }
 
     /*
      * Don't forward a packet with time-to-live of zero or one,
      * or a packet destined to a local-only group.
      */
     if (ip->ip_ttl <= 1 || IN_LOCAL_GROUP(ntohl(ip->ip_dst.s_addr))) {
 	MFC_UNLOCK();
 	VIF_UNLOCK();
 	return 0;
     }
 
     /*
      * Determine forwarding vifs from the forwarding cache table
      */
     MRTSTAT_INC(mrts_mfc_lookups);
     rt = mfc_find(&ip->ip_src, &ip->ip_dst);
 
     /* Entry exists, so forward if necessary */
     if (rt != NULL) {
 	error = ip_mdq(m, ifp, rt, -1);
 	MFC_UNLOCK();
 	VIF_UNLOCK();
 	return error;
     } else {
 	/*
 	 * If we don't have a route for packet's origin,
 	 * Make a copy of the packet & send message to routing daemon
 	 */
 
 	struct mbuf *mb0;
 	struct rtdetq *rte;
 	u_long hash;
 	int hlen = ip->ip_hl << 2;
 
 	MRTSTAT_INC(mrts_mfc_misses);
 	MRTSTAT_INC(mrts_no_route);
 	CTR2(KTR_IPMF, "ip_mforward: no mfc for (0x%08x,%lx)",
 	    ntohl(ip->ip_src.s_addr), (u_long)ntohl(ip->ip_dst.s_addr));
 
 	/*
 	 * Allocate mbufs early so that we don't do extra work if we are
 	 * just going to fail anyway.  Make sure to pullup the header so
 	 * that other people can't step on it.
 	 */
 	rte = (struct rtdetq *)malloc((sizeof *rte), M_MRTABLE,
 	    M_NOWAIT|M_ZERO);
 	if (rte == NULL) {
 	    MFC_UNLOCK();
 	    VIF_UNLOCK();
 	    return ENOBUFS;
 	}
 
 	mb0 = m_copypacket(m, M_NOWAIT);
 	if (mb0 && (!M_WRITABLE(mb0) || mb0->m_len < hlen))
 	    mb0 = m_pullup(mb0, hlen);
 	if (mb0 == NULL) {
 	    free(rte, M_MRTABLE);
 	    MFC_UNLOCK();
 	    VIF_UNLOCK();
 	    return ENOBUFS;
 	}
 
 	/* is there an upcall waiting for this flow ? */
 	hash = MFCHASH(ip->ip_src, ip->ip_dst);
 	LIST_FOREACH(rt, &V_mfchashtbl[hash], mfc_hash) {
 		if (in_hosteq(ip->ip_src, rt->mfc_origin) &&
 		    in_hosteq(ip->ip_dst, rt->mfc_mcastgrp) &&
 		    !TAILQ_EMPTY(&rt->mfc_stall))
 			break;
 	}
 
 	if (rt == NULL) {
 	    int i;
 	    struct igmpmsg *im;
 	    struct sockaddr_in k_igmpsrc = { sizeof k_igmpsrc, AF_INET };
 	    struct mbuf *mm;
 
 	    /*
 	     * Locate the vifi for the incoming interface for this packet.
 	     * If none found, drop packet.
 	     */
 	    for (vifi = 0; vifi < V_numvifs &&
 		    V_viftable[vifi].v_ifp != ifp; vifi++)
 		;
 	    if (vifi >= V_numvifs)	/* vif not found, drop packet */
 		goto non_fatal;
 
 	    /* no upcall, so make a new entry */
 	    rt = (struct mfc *)malloc(sizeof(*rt), M_MRTABLE, M_NOWAIT);
 	    if (rt == NULL)
 		goto fail;
 
 	    /* Make a copy of the header to send to the user level process */
 	    mm = m_copym(mb0, 0, hlen, M_NOWAIT);
 	    if (mm == NULL)
 		goto fail1;
 
 	    /*
 	     * Send message to routing daemon to install
 	     * a route into the kernel table
 	     */
 
 	    im = mtod(mm, struct igmpmsg *);
 	    im->im_msgtype = IGMPMSG_NOCACHE;
 	    im->im_mbz = 0;
 	    im->im_vif = vifi;
 
 	    MRTSTAT_INC(mrts_upcalls);
 
 	    k_igmpsrc.sin_addr = ip->ip_src;
 	    if (socket_send(V_ip_mrouter, mm, &k_igmpsrc) < 0) {
 		CTR0(KTR_IPMF, "ip_mforward: socket queue full");
 		MRTSTAT_INC(mrts_upq_sockfull);
 fail1:
 		free(rt, M_MRTABLE);
 fail:
 		free(rte, M_MRTABLE);
 		m_freem(mb0);
 		MFC_UNLOCK();
 		VIF_UNLOCK();
 		return ENOBUFS;
 	    }
 
 	    /* insert new entry at head of hash chain */
 	    rt->mfc_origin.s_addr     = ip->ip_src.s_addr;
 	    rt->mfc_mcastgrp.s_addr   = ip->ip_dst.s_addr;
 	    rt->mfc_expire	      = UPCALL_EXPIRE;
 	    V_nexpire[hash]++;
 	    for (i = 0; i < V_numvifs; i++) {
 		rt->mfc_ttls[i] = 0;
 		rt->mfc_flags[i] = 0;
 	    }
 	    rt->mfc_parent = -1;
 
 	    /* clear the RP address */
 	    rt->mfc_rp.s_addr = INADDR_ANY;
 	    rt->mfc_bw_meter = NULL;
 
 	    /* initialize pkt counters per src-grp */
 	    rt->mfc_pkt_cnt = 0;
 	    rt->mfc_byte_cnt = 0;
 	    rt->mfc_wrong_if = 0;
 	    timevalclear(&rt->mfc_last_assert);
 
 	    TAILQ_INIT(&rt->mfc_stall);
 	    rt->mfc_nstall = 0;
 
 	    /* link into table */
 	    LIST_INSERT_HEAD(&V_mfchashtbl[hash], rt, mfc_hash);
 	    TAILQ_INSERT_HEAD(&rt->mfc_stall, rte, rte_link);
 	    rt->mfc_nstall++;
 
 	} else {
 	    /* determine if queue has overflowed */
 	    if (rt->mfc_nstall > MAX_UPQ) {
 		MRTSTAT_INC(mrts_upq_ovflw);
 non_fatal:
 		free(rte, M_MRTABLE);
 		m_freem(mb0);
 		MFC_UNLOCK();
 		VIF_UNLOCK();
 		return (0);
 	    }
 	    TAILQ_INSERT_TAIL(&rt->mfc_stall, rte, rte_link);
 	    rt->mfc_nstall++;
 	}
 
 	rte->m			= mb0;
 	rte->ifp		= ifp;
 
 	MFC_UNLOCK();
 	VIF_UNLOCK();
 
 	return 0;
     }
 }
 
 /*
  * Clean up the cache entry if upcall is not serviced
  */
 static void
 expire_upcalls(void *arg)
 {
     u_long i;
 
     CURVNET_SET((struct vnet *) arg);
 
     MFC_LOCK();
 
     for (i = 0; i < mfchashsize; i++) {
 	struct mfc *rt, *nrt;
 
 	if (V_nexpire[i] == 0)
 	    continue;
 
 	LIST_FOREACH_SAFE(rt, &V_mfchashtbl[i], mfc_hash, nrt) {
 		if (TAILQ_EMPTY(&rt->mfc_stall))
 			continue;
 
 		if (rt->mfc_expire == 0 || --rt->mfc_expire > 0)
 			continue;
 
 		/*
 		 * free the bw_meter entries
 		 */
 		while (rt->mfc_bw_meter != NULL) {
 		    struct bw_meter *x = rt->mfc_bw_meter;
 
 		    rt->mfc_bw_meter = x->bm_mfc_next;
 		    free(x, M_BWMETER);
 		}
 
 		MRTSTAT_INC(mrts_cache_cleanups);
 		CTR3(KTR_IPMF, "%s: expire (%lx, %lx)", __func__,
 		    (u_long)ntohl(rt->mfc_origin.s_addr),
 		    (u_long)ntohl(rt->mfc_mcastgrp.s_addr));
 
 		expire_mfc(rt);
 	    }
     }
 
     MFC_UNLOCK();
 
     callout_reset(&V_expire_upcalls_ch, EXPIRE_TIMEOUT, expire_upcalls,
 	curvnet);
 
     CURVNET_RESTORE();
 }
 
 /*
  * Packet forwarding routine once entry in the cache is made
  */
 static int
 ip_mdq(struct mbuf *m, struct ifnet *ifp, struct mfc *rt, vifi_t xmt_vif)
 {
     struct ip  *ip = mtod(m, struct ip *);
     vifi_t vifi;
     int plen = ntohs(ip->ip_len);
 
     VIF_LOCK_ASSERT();
 
     /*
      * If xmt_vif is not -1, send on only the requested vif.
      *
      * (since vifi_t is u_short, -1 becomes MAXUSHORT, which > numvifs.)
      */
     if (xmt_vif < V_numvifs) {
 	if (V_viftable[xmt_vif].v_flags & VIFF_REGISTER)
 		pim_register_send(ip, V_viftable + xmt_vif, m, rt);
 	else
 		phyint_send(ip, V_viftable + xmt_vif, m);
 	return 1;
     }
 
     /*
      * Don't forward if it didn't arrive from the parent vif for its origin.
      */
     vifi = rt->mfc_parent;
     if ((vifi >= V_numvifs) || (V_viftable[vifi].v_ifp != ifp)) {
 	CTR4(KTR_IPMF, "%s: rx on wrong ifp %p (vifi %d, v_ifp %p)",
 	    __func__, ifp, (int)vifi, V_viftable[vifi].v_ifp);
 	MRTSTAT_INC(mrts_wrong_if);
 	++rt->mfc_wrong_if;
 	/*
 	 * If we are doing PIM assert processing, send a message
 	 * to the routing daemon.
 	 *
 	 * XXX: A PIM-SM router needs the WRONGVIF detection so it
 	 * can complete the SPT switch, regardless of the type
 	 * of the iif (broadcast media, GRE tunnel, etc).
 	 */
 	if (V_pim_assert_enabled && (vifi < V_numvifs) &&
 	    V_viftable[vifi].v_ifp) {
 
 	    if (ifp == &V_multicast_register_if)
 		PIMSTAT_INC(pims_rcv_registers_wrongiif);
 
 	    /* Get vifi for the incoming packet */
 	    for (vifi = 0; vifi < V_numvifs && V_viftable[vifi].v_ifp != ifp;
 		vifi++)
 		;
 	    if (vifi >= V_numvifs)
 		return 0;	/* The iif is not found: ignore the packet. */
 
 	    if (rt->mfc_flags[vifi] & MRT_MFC_FLAGS_DISABLE_WRONGVIF)
 		return 0;	/* WRONGVIF disabled: ignore the packet */
 
 	    if (ratecheck(&rt->mfc_last_assert, &pim_assert_interval)) {
 		struct sockaddr_in k_igmpsrc = { sizeof k_igmpsrc, AF_INET };
 		struct igmpmsg *im;
 		int hlen = ip->ip_hl << 2;
 		struct mbuf *mm = m_copym(m, 0, hlen, M_NOWAIT);
 
 		if (mm && (!M_WRITABLE(mm) || mm->m_len < hlen))
 		    mm = m_pullup(mm, hlen);
 		if (mm == NULL)
 		    return ENOBUFS;
 
 		im = mtod(mm, struct igmpmsg *);
 		im->im_msgtype	= IGMPMSG_WRONGVIF;
 		im->im_mbz		= 0;
 		im->im_vif		= vifi;
 
 		MRTSTAT_INC(mrts_upcalls);
 
 		k_igmpsrc.sin_addr = im->im_src;
 		if (socket_send(V_ip_mrouter, mm, &k_igmpsrc) < 0) {
 		    CTR1(KTR_IPMF, "%s: socket queue full", __func__);
 		    MRTSTAT_INC(mrts_upq_sockfull);
 		    return ENOBUFS;
 		}
 	    }
 	}
 	return 0;
     }
 
 
     /* If I sourced this packet, it counts as output, else it was input. */
     if (in_hosteq(ip->ip_src, V_viftable[vifi].v_lcl_addr)) {
 	V_viftable[vifi].v_pkt_out++;
 	V_viftable[vifi].v_bytes_out += plen;
     } else {
 	V_viftable[vifi].v_pkt_in++;
 	V_viftable[vifi].v_bytes_in += plen;
     }
     rt->mfc_pkt_cnt++;
     rt->mfc_byte_cnt += plen;
 
     /*
      * For each vif, decide if a copy of the packet should be forwarded.
      * Forward if:
      *		- the ttl exceeds the vif's threshold
      *		- there are group members downstream on interface
      */
     for (vifi = 0; vifi < V_numvifs; vifi++)
 	if ((rt->mfc_ttls[vifi] > 0) && (ip->ip_ttl > rt->mfc_ttls[vifi])) {
 	    V_viftable[vifi].v_pkt_out++;
 	    V_viftable[vifi].v_bytes_out += plen;
 	    if (V_viftable[vifi].v_flags & VIFF_REGISTER)
 		pim_register_send(ip, V_viftable + vifi, m, rt);
 	    else
 		phyint_send(ip, V_viftable + vifi, m);
 	}
 
     /*
      * Perform upcall-related bw measuring.
      */
     if (rt->mfc_bw_meter != NULL) {
 	struct bw_meter *x;
 	struct timeval now;
 
 	microtime(&now);
 	MFC_LOCK_ASSERT();
 	for (x = rt->mfc_bw_meter; x != NULL; x = x->bm_mfc_next)
 	    bw_meter_receive_packet(x, plen, &now);
     }
 
     return 0;
 }
 
 /*
  * Check if a vif number is legal/ok. This is used by in_mcast.c.
  */
 static int
 X_legal_vif_num(int vif)
 {
 	int ret;
 
 	ret = 0;
 	if (vif < 0)
 		return (ret);
 
 	VIF_LOCK();
 	if (vif < V_numvifs)
 		ret = 1;
 	VIF_UNLOCK();
 
 	return (ret);
 }
 
 /*
  * Return the local address used by this vif
  */
 static u_long
 X_ip_mcast_src(int vifi)
 {
 	in_addr_t addr;
 
 	addr = INADDR_ANY;
 	if (vifi < 0)
 		return (addr);
 
 	VIF_LOCK();
 	if (vifi < V_numvifs)
 		addr = V_viftable[vifi].v_lcl_addr.s_addr;
 	VIF_UNLOCK();
 
 	return (addr);
 }
 
 static void
 phyint_send(struct ip *ip, struct vif *vifp, struct mbuf *m)
 {
     struct mbuf *mb_copy;
     int hlen = ip->ip_hl << 2;
 
     VIF_LOCK_ASSERT();
 
     /*
      * Make a new reference to the packet; make sure that
      * the IP header is actually copied, not just referenced,
      * so that ip_output() only scribbles on the copy.
      */
     mb_copy = m_copypacket(m, M_NOWAIT);
     if (mb_copy && (!M_WRITABLE(mb_copy) || mb_copy->m_len < hlen))
 	mb_copy = m_pullup(mb_copy, hlen);
     if (mb_copy == NULL)
 	return;
 
     send_packet(vifp, mb_copy);
 }
 
 static void
 send_packet(struct vif *vifp, struct mbuf *m)
 {
 	struct ip_moptions imo;
 	struct in_multi *imm[2];
 	int error __unused;
 
 	VIF_LOCK_ASSERT();
 
 	imo.imo_multicast_ifp  = vifp->v_ifp;
 	imo.imo_multicast_ttl  = mtod(m, struct ip *)->ip_ttl - 1;
 	imo.imo_multicast_loop = 1;
 	imo.imo_multicast_vif  = -1;
 	imo.imo_num_memberships = 0;
 	imo.imo_max_memberships = 2;
 	imo.imo_membership  = &imm[0];
 
 	/*
 	 * Re-entrancy should not be a problem here, because
 	 * the packets that we send out and are looped back at us
 	 * should get rejected because they appear to come from
 	 * the loopback interface, thus preventing looping.
 	 */
 	error = ip_output(m, NULL, NULL, IP_FORWARDING, &imo, NULL);
 	CTR3(KTR_IPMF, "%s: vif %td err %d", __func__,
 	    (ptrdiff_t)(vifp - V_viftable), error);
 }
 
 /*
  * Stubs for old RSVP socket shim implementation.
  */
 
 static int
 X_ip_rsvp_vif(struct socket *so __unused, struct sockopt *sopt __unused)
 {
 
 	return (EOPNOTSUPP);
 }
 
 static void
 X_ip_rsvp_force_done(struct socket *so __unused)
 {
 
 }
 
 static int
 X_rsvp_input(struct mbuf **mp, int *offp, int proto)
 {
 	struct mbuf *m;
 
 	m = *mp;
 	*mp = NULL;
 	if (!V_rsvp_on)
 		m_freem(m);
 	return (IPPROTO_DONE);
 }
 
 /*
  * Code for bandwidth monitors
  */
 
 /*
  * Define common interface for timeval-related methods
  */
 #define	BW_TIMEVALCMP(tvp, uvp, cmp) timevalcmp((tvp), (uvp), cmp)
 #define	BW_TIMEVALDECR(vvp, uvp) timevalsub((vvp), (uvp))
 #define	BW_TIMEVALADD(vvp, uvp) timevaladd((vvp), (uvp))
 
 static uint32_t
 compute_bw_meter_flags(struct bw_upcall *req)
 {
     uint32_t flags = 0;
 
     if (req->bu_flags & BW_UPCALL_UNIT_PACKETS)
 	flags |= BW_METER_UNIT_PACKETS;
     if (req->bu_flags & BW_UPCALL_UNIT_BYTES)
 	flags |= BW_METER_UNIT_BYTES;
     if (req->bu_flags & BW_UPCALL_GEQ)
 	flags |= BW_METER_GEQ;
     if (req->bu_flags & BW_UPCALL_LEQ)
 	flags |= BW_METER_LEQ;
 
     return flags;
 }
 
 /*
  * Add a bw_meter entry
  */
 static int
 add_bw_upcall(struct bw_upcall *req)
 {
     struct mfc *mfc;
     struct timeval delta = { BW_UPCALL_THRESHOLD_INTERVAL_MIN_SEC,
 		BW_UPCALL_THRESHOLD_INTERVAL_MIN_USEC };
     struct timeval now;
     struct bw_meter *x;
     uint32_t flags;
 
     if (!(V_mrt_api_config & MRT_MFC_BW_UPCALL))
 	return EOPNOTSUPP;
 
     /* Test if the flags are valid */
     if (!(req->bu_flags & (BW_UPCALL_UNIT_PACKETS | BW_UPCALL_UNIT_BYTES)))
 	return EINVAL;
     if (!(req->bu_flags & (BW_UPCALL_GEQ | BW_UPCALL_LEQ)))
 	return EINVAL;
     if ((req->bu_flags & (BW_UPCALL_GEQ | BW_UPCALL_LEQ))
 	    == (BW_UPCALL_GEQ | BW_UPCALL_LEQ))
 	return EINVAL;
 
     /* Test if the threshold time interval is valid */
     if (BW_TIMEVALCMP(&req->bu_threshold.b_time, &delta, <))
 	return EINVAL;
 
     flags = compute_bw_meter_flags(req);
 
     /*
      * Find if we have already same bw_meter entry
      */
     MFC_LOCK();
     mfc = mfc_find(&req->bu_src, &req->bu_dst);
     if (mfc == NULL) {
 	MFC_UNLOCK();
 	return EADDRNOTAVAIL;
     }
     for (x = mfc->mfc_bw_meter; x != NULL; x = x->bm_mfc_next) {
 	if ((BW_TIMEVALCMP(&x->bm_threshold.b_time,
 			   &req->bu_threshold.b_time, ==)) &&
 	    (x->bm_threshold.b_packets == req->bu_threshold.b_packets) &&
 	    (x->bm_threshold.b_bytes == req->bu_threshold.b_bytes) &&
 	    (x->bm_flags & BW_METER_USER_FLAGS) == flags)  {
 	    MFC_UNLOCK();
 	    return 0;		/* XXX Already installed */
 	}
     }
 
     /* Allocate the new bw_meter entry */
     x = (struct bw_meter *)malloc(sizeof(*x), M_BWMETER, M_NOWAIT);
     if (x == NULL) {
 	MFC_UNLOCK();
 	return ENOBUFS;
     }
 
     /* Set the new bw_meter entry */
     x->bm_threshold.b_time = req->bu_threshold.b_time;
     microtime(&now);
     x->bm_start_time = now;
     x->bm_threshold.b_packets = req->bu_threshold.b_packets;
     x->bm_threshold.b_bytes = req->bu_threshold.b_bytes;
     x->bm_measured.b_packets = 0;
     x->bm_measured.b_bytes = 0;
     x->bm_flags = flags;
     x->bm_time_next = NULL;
     x->bm_time_hash = BW_METER_BUCKETS;
 
     /* Add the new bw_meter entry to the front of entries for this MFC */
     x->bm_mfc = mfc;
     x->bm_mfc_next = mfc->mfc_bw_meter;
     mfc->mfc_bw_meter = x;
     schedule_bw_meter(x, &now);
     MFC_UNLOCK();
 
     return 0;
 }
 
 static void
 free_bw_list(struct bw_meter *list)
 {
     while (list != NULL) {
 	struct bw_meter *x = list;
 
 	list = list->bm_mfc_next;
 	unschedule_bw_meter(x);
 	free(x, M_BWMETER);
     }
 }
 
 /*
  * Delete one or multiple bw_meter entries
  */
 static int
 del_bw_upcall(struct bw_upcall *req)
 {
     struct mfc *mfc;
     struct bw_meter *x;
 
     if (!(V_mrt_api_config & MRT_MFC_BW_UPCALL))
 	return EOPNOTSUPP;
 
     MFC_LOCK();
 
     /* Find the corresponding MFC entry */
     mfc = mfc_find(&req->bu_src, &req->bu_dst);
     if (mfc == NULL) {
 	MFC_UNLOCK();
 	return EADDRNOTAVAIL;
     } else if (req->bu_flags & BW_UPCALL_DELETE_ALL) {
 	/*
 	 * Delete all bw_meter entries for this mfc
 	 */
 	struct bw_meter *list;
 
 	list = mfc->mfc_bw_meter;
 	mfc->mfc_bw_meter = NULL;
 	free_bw_list(list);
 	MFC_UNLOCK();
 	return 0;
     } else {			/* Delete a single bw_meter entry */
 	struct bw_meter *prev;
 	uint32_t flags = 0;
 
 	flags = compute_bw_meter_flags(req);
 
 	/* Find the bw_meter entry to delete */
 	for (prev = NULL, x = mfc->mfc_bw_meter; x != NULL;
 	     prev = x, x = x->bm_mfc_next) {
 	    if ((BW_TIMEVALCMP(&x->bm_threshold.b_time,
 			       &req->bu_threshold.b_time, ==)) &&
 		(x->bm_threshold.b_packets == req->bu_threshold.b_packets) &&
 		(x->bm_threshold.b_bytes == req->bu_threshold.b_bytes) &&
 		(x->bm_flags & BW_METER_USER_FLAGS) == flags)
 		break;
 	}
 	if (x != NULL) { /* Delete entry from the list for this MFC */
 	    if (prev != NULL)
 		prev->bm_mfc_next = x->bm_mfc_next;	/* remove from middle*/
 	    else
 		x->bm_mfc->mfc_bw_meter = x->bm_mfc_next;/* new head of list */
 
 	    unschedule_bw_meter(x);
 	    MFC_UNLOCK();
 	    /* Free the bw_meter entry */
 	    free(x, M_BWMETER);
 	    return 0;
 	} else {
 	    MFC_UNLOCK();
 	    return EINVAL;
 	}
     }
     /* NOTREACHED */
 }
 
 /*
  * Perform bandwidth measurement processing that may result in an upcall
  */
 static void
 bw_meter_receive_packet(struct bw_meter *x, int plen, struct timeval *nowp)
 {
     struct timeval delta;
 
     MFC_LOCK_ASSERT();
 
     delta = *nowp;
     BW_TIMEVALDECR(&delta, &x->bm_start_time);
 
     if (x->bm_flags & BW_METER_GEQ) {
 	/*
 	 * Processing for ">=" type of bw_meter entry
 	 */
 	if (BW_TIMEVALCMP(&delta, &x->bm_threshold.b_time, >)) {
 	    /* Reset the bw_meter entry */
 	    x->bm_start_time = *nowp;
 	    x->bm_measured.b_packets = 0;
 	    x->bm_measured.b_bytes = 0;
 	    x->bm_flags &= ~BW_METER_UPCALL_DELIVERED;
 	}
 
 	/* Record that a packet is received */
 	x->bm_measured.b_packets++;
 	x->bm_measured.b_bytes += plen;
 
 	/*
 	 * Test if we should deliver an upcall
 	 */
 	if (!(x->bm_flags & BW_METER_UPCALL_DELIVERED)) {
 	    if (((x->bm_flags & BW_METER_UNIT_PACKETS) &&
 		 (x->bm_measured.b_packets >= x->bm_threshold.b_packets)) ||
 		((x->bm_flags & BW_METER_UNIT_BYTES) &&
 		 (x->bm_measured.b_bytes >= x->bm_threshold.b_bytes))) {
 		/* Prepare an upcall for delivery */
 		bw_meter_prepare_upcall(x, nowp);
 		x->bm_flags |= BW_METER_UPCALL_DELIVERED;
 	    }
 	}
     } else if (x->bm_flags & BW_METER_LEQ) {
 	/*
 	 * Processing for "<=" type of bw_meter entry
 	 */
 	if (BW_TIMEVALCMP(&delta, &x->bm_threshold.b_time, >)) {
 	    /*
 	     * We are behind time with the multicast forwarding table
 	     * scanning for "<=" type of bw_meter entries, so test now
 	     * if we should deliver an upcall.
 	     */
 	    if (((x->bm_flags & BW_METER_UNIT_PACKETS) &&
 		 (x->bm_measured.b_packets <= x->bm_threshold.b_packets)) ||
 		((x->bm_flags & BW_METER_UNIT_BYTES) &&
 		 (x->bm_measured.b_bytes <= x->bm_threshold.b_bytes))) {
 		/* Prepare an upcall for delivery */
 		bw_meter_prepare_upcall(x, nowp);
 	    }
 	    /* Reschedule the bw_meter entry */
 	    unschedule_bw_meter(x);
 	    schedule_bw_meter(x, nowp);
 	}
 
 	/* Record that a packet is received */
 	x->bm_measured.b_packets++;
 	x->bm_measured.b_bytes += plen;
 
 	/*
 	 * Test if we should restart the measuring interval
 	 */
 	if ((x->bm_flags & BW_METER_UNIT_PACKETS &&
 	     x->bm_measured.b_packets <= x->bm_threshold.b_packets) ||
 	    (x->bm_flags & BW_METER_UNIT_BYTES &&
 	     x->bm_measured.b_bytes <= x->bm_threshold.b_bytes)) {
 	    /* Don't restart the measuring interval */
 	} else {
 	    /* Do restart the measuring interval */
 	    /*
 	     * XXX: note that we don't unschedule and schedule, because this
 	     * might be too much overhead per packet. Instead, when we process
 	     * all entries for a given timer hash bin, we check whether it is
 	     * really a timeout. If not, we reschedule at that time.
 	     */
 	    x->bm_start_time = *nowp;
 	    x->bm_measured.b_packets = 0;
 	    x->bm_measured.b_bytes = 0;
 	    x->bm_flags &= ~BW_METER_UPCALL_DELIVERED;
 	}
     }
 }
 
 /*
  * Prepare a bandwidth-related upcall
  */
 static void
 bw_meter_prepare_upcall(struct bw_meter *x, struct timeval *nowp)
 {
     struct timeval delta;
     struct bw_upcall *u;
 
     MFC_LOCK_ASSERT();
 
     /*
      * Compute the measured time interval
      */
     delta = *nowp;
     BW_TIMEVALDECR(&delta, &x->bm_start_time);
 
     /*
      * If there are too many pending upcalls, deliver them now
      */
     if (V_bw_upcalls_n >= BW_UPCALLS_MAX)
 	bw_upcalls_send();
 
     /*
      * Set the bw_upcall entry
      */
     u = &V_bw_upcalls[V_bw_upcalls_n++];
     u->bu_src = x->bm_mfc->mfc_origin;
     u->bu_dst = x->bm_mfc->mfc_mcastgrp;
     u->bu_threshold.b_time = x->bm_threshold.b_time;
     u->bu_threshold.b_packets = x->bm_threshold.b_packets;
     u->bu_threshold.b_bytes = x->bm_threshold.b_bytes;
     u->bu_measured.b_time = delta;
     u->bu_measured.b_packets = x->bm_measured.b_packets;
     u->bu_measured.b_bytes = x->bm_measured.b_bytes;
     u->bu_flags = 0;
     if (x->bm_flags & BW_METER_UNIT_PACKETS)
 	u->bu_flags |= BW_UPCALL_UNIT_PACKETS;
     if (x->bm_flags & BW_METER_UNIT_BYTES)
 	u->bu_flags |= BW_UPCALL_UNIT_BYTES;
     if (x->bm_flags & BW_METER_GEQ)
 	u->bu_flags |= BW_UPCALL_GEQ;
     if (x->bm_flags & BW_METER_LEQ)
 	u->bu_flags |= BW_UPCALL_LEQ;
 }
 
 /*
  * Send the pending bandwidth-related upcalls
  */
 static void
 bw_upcalls_send(void)
 {
     struct mbuf *m;
     int len = V_bw_upcalls_n * sizeof(V_bw_upcalls[0]);
     struct sockaddr_in k_igmpsrc = { sizeof k_igmpsrc, AF_INET };
     static struct igmpmsg igmpmsg = { 0,		/* unused1 */
 				      0,		/* unused2 */
 				      IGMPMSG_BW_UPCALL,/* im_msgtype */
 				      0,		/* im_mbz  */
 				      0,		/* im_vif  */
 				      0,		/* unused3 */
 				      { 0 },		/* im_src  */
 				      { 0 } };		/* im_dst  */
 
     MFC_LOCK_ASSERT();
 
     if (V_bw_upcalls_n == 0)
 	return;			/* No pending upcalls */
 
     V_bw_upcalls_n = 0;
 
     /*
      * Allocate a new mbuf, initialize it with the header and
      * the payload for the pending calls.
      */
     m = m_gethdr(M_NOWAIT, MT_DATA);
     if (m == NULL) {
 	log(LOG_WARNING, "bw_upcalls_send: cannot allocate mbuf\n");
 	return;
     }
 
     m_copyback(m, 0, sizeof(struct igmpmsg), (caddr_t)&igmpmsg);
     m_copyback(m, sizeof(struct igmpmsg), len, (caddr_t)&V_bw_upcalls[0]);
 
     /*
      * Send the upcalls
      * XXX do we need to set the address in k_igmpsrc ?
      */
     MRTSTAT_INC(mrts_upcalls);
     if (socket_send(V_ip_mrouter, m, &k_igmpsrc) < 0) {
 	log(LOG_WARNING, "bw_upcalls_send: ip_mrouter socket queue full\n");
 	MRTSTAT_INC(mrts_upq_sockfull);
     }
 }
 
 /*
  * Compute the timeout hash value for the bw_meter entries
  */
 #define	BW_METER_TIMEHASH(bw_meter, hash)				\
     do {								\
 	struct timeval next_timeval = (bw_meter)->bm_start_time;	\
 									\
 	BW_TIMEVALADD(&next_timeval, &(bw_meter)->bm_threshold.b_time); \
 	(hash) = next_timeval.tv_sec;					\
 	if (next_timeval.tv_usec)					\
 	    (hash)++; /* XXX: make sure we don't timeout early */	\
 	(hash) %= BW_METER_BUCKETS;					\
     } while (0)
 
 /*
  * Schedule a timer to process periodically bw_meter entry of type "<="
  * by linking the entry in the proper hash bucket.
  */
 static void
 schedule_bw_meter(struct bw_meter *x, struct timeval *nowp)
 {
     int time_hash;
 
     MFC_LOCK_ASSERT();
 
     if (!(x->bm_flags & BW_METER_LEQ))
 	return;		/* XXX: we schedule timers only for "<=" entries */
 
     /*
      * Reset the bw_meter entry
      */
     x->bm_start_time = *nowp;
     x->bm_measured.b_packets = 0;
     x->bm_measured.b_bytes = 0;
     x->bm_flags &= ~BW_METER_UPCALL_DELIVERED;
 
     /*
      * Compute the timeout hash value and insert the entry
      */
     BW_METER_TIMEHASH(x, time_hash);
     x->bm_time_next = V_bw_meter_timers[time_hash];
     V_bw_meter_timers[time_hash] = x;
     x->bm_time_hash = time_hash;
 }
 
 /*
  * Unschedule the periodic timer that processes bw_meter entry of type "<="
  * by removing the entry from the proper hash bucket.
  */
 static void
 unschedule_bw_meter(struct bw_meter *x)
 {
     int time_hash;
     struct bw_meter *prev, *tmp;
 
     MFC_LOCK_ASSERT();
 
     if (!(x->bm_flags & BW_METER_LEQ))
 	return;		/* XXX: we schedule timers only for "<=" entries */
 
     /*
      * Compute the timeout hash value and delete the entry
      */
     time_hash = x->bm_time_hash;
     if (time_hash >= BW_METER_BUCKETS)
 	return;		/* Entry was not scheduled */
 
     for (prev = NULL, tmp = V_bw_meter_timers[time_hash];
 	     tmp != NULL; prev = tmp, tmp = tmp->bm_time_next)
 	if (tmp == x)
 	    break;
 
     if (tmp == NULL)
 	panic("unschedule_bw_meter: bw_meter entry not found");
 
     if (prev != NULL)
 	prev->bm_time_next = x->bm_time_next;
     else
 	V_bw_meter_timers[time_hash] = x->bm_time_next;
 
     x->bm_time_next = NULL;
     x->bm_time_hash = BW_METER_BUCKETS;
 }
 
 
 /*
  * Process all "<=" type of bw_meter that should be processed now,
  * and for each entry prepare an upcall if necessary. Each processed
  * entry is rescheduled again for the (periodic) processing.
  *
  * This is run periodically (once per second normally). On each round,
  * all the potentially matching entries are in the hash slot that we are
  * looking at.
  */
 static void
 bw_meter_process()
 {
     uint32_t loops;
     int i;
     struct timeval now, process_endtime;
 
     microtime(&now);
     if (V_last_tv_sec == now.tv_sec)
 	return;		/* nothing to do */
 
     loops = now.tv_sec - V_last_tv_sec;
     V_last_tv_sec = now.tv_sec;
     if (loops > BW_METER_BUCKETS)
 	loops = BW_METER_BUCKETS;
 
     MFC_LOCK();
     /*
      * Process all bins of bw_meter entries from the one after the last
      * processed to the current one. On entry, i points to the last bucket
      * visited, so we need to increment i at the beginning of the loop.
      */
     for (i = (now.tv_sec - loops) % BW_METER_BUCKETS; loops > 0; loops--) {
 	struct bw_meter *x, *tmp_list;
 
 	if (++i >= BW_METER_BUCKETS)
 	    i = 0;
 
 	/* Disconnect the list of bw_meter entries from the bin */
 	tmp_list = V_bw_meter_timers[i];
 	V_bw_meter_timers[i] = NULL;
 
 	/* Process the list of bw_meter entries */
 	while (tmp_list != NULL) {
 	    x = tmp_list;
 	    tmp_list = tmp_list->bm_time_next;
 
 	    /* Test if the time interval is over */
 	    process_endtime = x->bm_start_time;
 	    BW_TIMEVALADD(&process_endtime, &x->bm_threshold.b_time);
 	    if (BW_TIMEVALCMP(&process_endtime, &now, >)) {
 		/* Not yet: reschedule, but don't reset */
 		int time_hash;
 
 		BW_METER_TIMEHASH(x, time_hash);
 		if (time_hash == i && process_endtime.tv_sec == now.tv_sec) {
 		    /*
 		     * XXX: somehow the bin processing is a bit ahead of time.
 		     * Put the entry in the next bin.
 		     */
 		    if (++time_hash >= BW_METER_BUCKETS)
 			time_hash = 0;
 		}
 		x->bm_time_next = V_bw_meter_timers[time_hash];
 		V_bw_meter_timers[time_hash] = x;
 		x->bm_time_hash = time_hash;
 
 		continue;
 	    }
 
 	    /*
 	     * Test if we should deliver an upcall
 	     */
 	    if (((x->bm_flags & BW_METER_UNIT_PACKETS) &&
 		 (x->bm_measured.b_packets <= x->bm_threshold.b_packets)) ||
 		((x->bm_flags & BW_METER_UNIT_BYTES) &&
 		 (x->bm_measured.b_bytes <= x->bm_threshold.b_bytes))) {
 		/* Prepare an upcall for delivery */
 		bw_meter_prepare_upcall(x, &now);
 	    }
 
 	    /*
 	     * Reschedule for next processing
 	     */
 	    schedule_bw_meter(x, &now);
 	}
     }
 
     /* Send all upcalls that are pending delivery */
     bw_upcalls_send();
 
     MFC_UNLOCK();
 }
 
 /*
  * A periodic function for sending all upcalls that are pending delivery
  */
 static void
 expire_bw_upcalls_send(void *arg)
 {
     CURVNET_SET((struct vnet *) arg);
 
     MFC_LOCK();
     bw_upcalls_send();
     MFC_UNLOCK();
 
     callout_reset(&V_bw_upcalls_ch, BW_UPCALLS_PERIOD, expire_bw_upcalls_send,
 	curvnet);
     CURVNET_RESTORE();
 }
 
 /*
  * A periodic function for periodic scanning of the multicast forwarding
  * table for processing all "<=" bw_meter entries.
  */
 static void
 expire_bw_meter_process(void *arg)
 {
     CURVNET_SET((struct vnet *) arg);
 
     if (V_mrt_api_config & MRT_MFC_BW_UPCALL)
 	bw_meter_process();
 
     callout_reset(&V_bw_meter_ch, BW_METER_PERIOD, expire_bw_meter_process,
 	curvnet);
     CURVNET_RESTORE();
 }
 
 /*
  * End of bandwidth monitoring code
  */
 
 /*
  * Send the packet up to the user daemon, or eventually do kernel encapsulation
  *
  */
 static int
 pim_register_send(struct ip *ip, struct vif *vifp, struct mbuf *m,
     struct mfc *rt)
 {
     struct mbuf *mb_copy, *mm;
 
     /*
      * Do not send IGMP_WHOLEPKT notifications to userland, if the
      * rendezvous point was unspecified, and we were told not to.
      */
     if (pim_squelch_wholepkt != 0 && (V_mrt_api_config & MRT_MFC_RP) &&
 	in_nullhost(rt->mfc_rp))
 	return 0;
 
     mb_copy = pim_register_prepare(ip, m);
     if (mb_copy == NULL)
 	return ENOBUFS;
 
     /*
      * Send all the fragments. Note that the mbuf for each fragment
      * is freed by the sending machinery.
      */
     for (mm = mb_copy; mm; mm = mb_copy) {
 	mb_copy = mm->m_nextpkt;
 	mm->m_nextpkt = 0;
 	mm = m_pullup(mm, sizeof(struct ip));
 	if (mm != NULL) {
 	    ip = mtod(mm, struct ip *);
 	    if ((V_mrt_api_config & MRT_MFC_RP) && !in_nullhost(rt->mfc_rp)) {
 		pim_register_send_rp(ip, vifp, mm, rt);
 	    } else {
 		pim_register_send_upcall(ip, vifp, mm, rt);
 	    }
 	}
     }
 
     return 0;
 }
 
 /*
  * Return a copy of the data packet that is ready for PIM Register
  * encapsulation.
  * XXX: Note that in the returned copy the IP header is a valid one.
  */
 static struct mbuf *
 pim_register_prepare(struct ip *ip, struct mbuf *m)
 {
     struct mbuf *mb_copy = NULL;
     int mtu;
 
     /* Take care of delayed checksums */
     if (m->m_pkthdr.csum_flags & CSUM_DELAY_DATA) {
 	in_delayed_cksum(m);
 	m->m_pkthdr.csum_flags &= ~CSUM_DELAY_DATA;
     }
 
     /*
      * Copy the old packet & pullup its IP header into the
      * new mbuf so we can modify it.
      */
     mb_copy = m_copypacket(m, M_NOWAIT);
     if (mb_copy == NULL)
 	return NULL;
     mb_copy = m_pullup(mb_copy, ip->ip_hl << 2);
     if (mb_copy == NULL)
 	return NULL;
 
     /* take care of the TTL */
     ip = mtod(mb_copy, struct ip *);
     --ip->ip_ttl;
 
     /* Compute the MTU after the PIM Register encapsulation */
     mtu = 0xffff - sizeof(pim_encap_iphdr) - sizeof(pim_encap_pimhdr);
 
     if (ntohs(ip->ip_len) <= mtu) {
 	/* Turn the IP header into a valid one */
 	ip->ip_sum = 0;
 	ip->ip_sum = in_cksum(mb_copy, ip->ip_hl << 2);
     } else {
 	/* Fragment the packet */
 	mb_copy->m_pkthdr.csum_flags |= CSUM_IP;
 	if (ip_fragment(ip, &mb_copy, mtu, 0) != 0) {
 	    m_freem(mb_copy);
 	    return NULL;
 	}
     }
     return mb_copy;
 }
 
 /*
  * Send an upcall with the data packet to the user-level process.
  */
 static int
 pim_register_send_upcall(struct ip *ip, struct vif *vifp,
     struct mbuf *mb_copy, struct mfc *rt)
 {
     struct mbuf *mb_first;
     int len = ntohs(ip->ip_len);
     struct igmpmsg *im;
     struct sockaddr_in k_igmpsrc = { sizeof k_igmpsrc, AF_INET };
 
     VIF_LOCK_ASSERT();
 
     /*
      * Add a new mbuf with an upcall header
      */
     mb_first = m_gethdr(M_NOWAIT, MT_DATA);
     if (mb_first == NULL) {
 	m_freem(mb_copy);
 	return ENOBUFS;
     }
     mb_first->m_data += max_linkhdr;
     mb_first->m_pkthdr.len = len + sizeof(struct igmpmsg);
     mb_first->m_len = sizeof(struct igmpmsg);
     mb_first->m_next = mb_copy;
 
     /* Send message to routing daemon */
     im = mtod(mb_first, struct igmpmsg *);
     im->im_msgtype	= IGMPMSG_WHOLEPKT;
     im->im_mbz		= 0;
     im->im_vif		= vifp - V_viftable;
     im->im_src		= ip->ip_src;
     im->im_dst		= ip->ip_dst;
 
     k_igmpsrc.sin_addr	= ip->ip_src;
 
     MRTSTAT_INC(mrts_upcalls);
 
     if (socket_send(V_ip_mrouter, mb_first, &k_igmpsrc) < 0) {
 	CTR1(KTR_IPMF, "%s: socket queue full", __func__);
 	MRTSTAT_INC(mrts_upq_sockfull);
 	return ENOBUFS;
     }
 
     /* Keep statistics */
     PIMSTAT_INC(pims_snd_registers_msgs);
     PIMSTAT_ADD(pims_snd_registers_bytes, len);
 
     return 0;
 }
 
 /*
  * Encapsulate the data packet in PIM Register message and send it to the RP.
  */
 static int
 pim_register_send_rp(struct ip *ip, struct vif *vifp, struct mbuf *mb_copy,
     struct mfc *rt)
 {
     struct mbuf *mb_first;
     struct ip *ip_outer;
     struct pim_encap_pimhdr *pimhdr;
     int len = ntohs(ip->ip_len);
     vifi_t vifi = rt->mfc_parent;
 
     VIF_LOCK_ASSERT();
 
     if ((vifi >= V_numvifs) || in_nullhost(V_viftable[vifi].v_lcl_addr)) {
 	m_freem(mb_copy);
 	return EADDRNOTAVAIL;		/* The iif vif is invalid */
     }
 
     /*
      * Add a new mbuf with the encapsulating header
      */
     mb_first = m_gethdr(M_NOWAIT, MT_DATA);
     if (mb_first == NULL) {
 	m_freem(mb_copy);
 	return ENOBUFS;
     }
     mb_first->m_data += max_linkhdr;
     mb_first->m_len = sizeof(pim_encap_iphdr) + sizeof(pim_encap_pimhdr);
     mb_first->m_next = mb_copy;
 
     mb_first->m_pkthdr.len = len + mb_first->m_len;
 
     /*
      * Fill in the encapsulating IP and PIM header
      */
     ip_outer = mtod(mb_first, struct ip *);
     *ip_outer = pim_encap_iphdr;
     ip_outer->ip_len = htons(len + sizeof(pim_encap_iphdr) +
 	sizeof(pim_encap_pimhdr));
     ip_outer->ip_src = V_viftable[vifi].v_lcl_addr;
     ip_outer->ip_dst = rt->mfc_rp;
     /*
      * Copy the inner header TOS to the outer header, and take care of the
      * IP_DF bit.
      */
     ip_outer->ip_tos = ip->ip_tos;
     if (ip->ip_off & htons(IP_DF))
 	ip_outer->ip_off |= htons(IP_DF);
     ip_fillid(ip_outer);
     pimhdr = (struct pim_encap_pimhdr *)((caddr_t)ip_outer
 					 + sizeof(pim_encap_iphdr));
     *pimhdr = pim_encap_pimhdr;
     /* If the iif crosses a border, set the Border-bit */
     if (rt->mfc_flags[vifi] & MRT_MFC_FLAGS_BORDER_VIF & V_mrt_api_config)
 	pimhdr->flags |= htonl(PIM_BORDER_REGISTER);
 
     mb_first->m_data += sizeof(pim_encap_iphdr);
     pimhdr->pim.pim_cksum = in_cksum(mb_first, sizeof(pim_encap_pimhdr));
     mb_first->m_data -= sizeof(pim_encap_iphdr);
 
     send_packet(vifp, mb_first);
 
     /* Keep statistics */
     PIMSTAT_INC(pims_snd_registers_msgs);
     PIMSTAT_ADD(pims_snd_registers_bytes, len);
 
     return 0;
 }
 
 /*
  * pim_encapcheck() is called by the encap4_input() path at runtime to
  * determine if a packet is for PIM; allowing PIM to be dynamically loaded
  * into the kernel.
  */
 static int
-pim_encapcheck(const struct mbuf *m, int off, int proto, void *arg)
+pim_encapcheck(const struct mbuf *m __unused, int off __unused,
+    int proto __unused, void *arg __unused)
 {
 
-#ifdef DIAGNOSTIC
     KASSERT(proto == IPPROTO_PIM, ("not for IPPROTO_PIM"));
-#endif
-    if (proto != IPPROTO_PIM)
-	return 0;	/* not for us; reject the datagram. */
-
-    return 64;		/* claim the datagram. */
+    return (8);		/* claim the datagram. */
 }
 
 /*
  * PIM-SMv2 and PIM-DM messages processing.
  * Receives and verifies the PIM control messages, and passes them
  * up to the listening socket, using rip_input().
  * The only message with special processing is the PIM_REGISTER message
  * (used by PIM-SM): the PIM header is stripped off, and the inner packet
  * is passed to if_simloop().
  */
-int
-pim_input(struct mbuf **mp, int *offp, int proto)
+static int
+pim_input(struct mbuf *m, int off, int proto, void *arg __unused)
 {
-    struct mbuf *m = *mp;
     struct ip *ip = mtod(m, struct ip *);
     struct pim *pim;
-    int iphlen = *offp;
+    int iphlen = off;
     int minlen;
     int datalen = ntohs(ip->ip_len) - iphlen;
     int ip_tos;
- 
-    *mp = NULL;
 
     /* Keep statistics */
     PIMSTAT_INC(pims_rcv_total_msgs);
     PIMSTAT_ADD(pims_rcv_total_bytes, datalen);
 
     /*
      * Validate lengths
      */
     if (datalen < PIM_MINLEN) {
 	PIMSTAT_INC(pims_rcv_tooshort);
 	CTR3(KTR_IPMF, "%s: short packet (%d) from 0x%08x",
 	    __func__, datalen, ntohl(ip->ip_src.s_addr));
 	m_freem(m);
 	return (IPPROTO_DONE);
     }
 
     /*
      * If the packet is at least as big as a REGISTER, go agead
      * and grab the PIM REGISTER header size, to avoid another
      * possible m_pullup() later.
      *
      * PIM_MINLEN       == pimhdr + u_int32_t == 4 + 4 = 8
      * PIM_REG_MINLEN   == pimhdr + reghdr + encap_iphdr == 4 + 4 + 20 = 28
      */
     minlen = iphlen + (datalen >= PIM_REG_MINLEN ? PIM_REG_MINLEN : PIM_MINLEN);
     /*
      * Get the IP and PIM headers in contiguous memory, and
      * possibly the PIM REGISTER header.
      */
     if (m->m_len < minlen && (m = m_pullup(m, minlen)) == NULL) {
 	CTR1(KTR_IPMF, "%s: m_pullup() failed", __func__);
 	return (IPPROTO_DONE);
     }
 
     /* m_pullup() may have given us a new mbuf so reset ip. */
     ip = mtod(m, struct ip *);
     ip_tos = ip->ip_tos;
 
     /* adjust mbuf to point to the PIM header */
     m->m_data += iphlen;
     m->m_len  -= iphlen;
     pim = mtod(m, struct pim *);
 
     /*
      * Validate checksum. If PIM REGISTER, exclude the data packet.
      *
      * XXX: some older PIMv2 implementations don't make this distinction,
      * so for compatibility reason perform the checksum over part of the
      * message, and if error, then over the whole message.
      */
     if (PIM_VT_T(pim->pim_vt) == PIM_REGISTER && in_cksum(m, PIM_MINLEN) == 0) {
 	/* do nothing, checksum okay */
     } else if (in_cksum(m, datalen)) {
 	PIMSTAT_INC(pims_rcv_badsum);
 	CTR1(KTR_IPMF, "%s: invalid checksum", __func__);
 	m_freem(m);
 	return (IPPROTO_DONE);
     }
 
     /* PIM version check */
     if (PIM_VT_V(pim->pim_vt) < PIM_VERSION) {
 	PIMSTAT_INC(pims_rcv_badversion);
 	CTR3(KTR_IPMF, "%s: bad version %d expect %d", __func__,
 	    (int)PIM_VT_V(pim->pim_vt), PIM_VERSION);
 	m_freem(m);
 	return (IPPROTO_DONE);
     }
 
     /* restore mbuf back to the outer IP */
     m->m_data -= iphlen;
     m->m_len  += iphlen;
 
     if (PIM_VT_T(pim->pim_vt) == PIM_REGISTER) {
 	/*
 	 * Since this is a REGISTER, we'll make a copy of the register
 	 * headers ip + pim + u_int32 + encap_ip, to be passed up to the
 	 * routing daemon.
 	 */
 	struct sockaddr_in dst = { sizeof(dst), AF_INET };
 	struct mbuf *mcp;
 	struct ip *encap_ip;
 	u_int32_t *reghdr;
 	struct ifnet *vifp;
 
 	VIF_LOCK();
 	if ((V_reg_vif_num >= V_numvifs) || (V_reg_vif_num == VIFI_INVALID)) {
 	    VIF_UNLOCK();
 	    CTR2(KTR_IPMF, "%s: register vif not set: %d", __func__,
 		(int)V_reg_vif_num);
 	    m_freem(m);
 	    return (IPPROTO_DONE);
 	}
 	/* XXX need refcnt? */
 	vifp = V_viftable[V_reg_vif_num].v_ifp;
 	VIF_UNLOCK();
 
 	/*
 	 * Validate length
 	 */
 	if (datalen < PIM_REG_MINLEN) {
 	    PIMSTAT_INC(pims_rcv_tooshort);
 	    PIMSTAT_INC(pims_rcv_badregisters);
 	    CTR1(KTR_IPMF, "%s: register packet size too small", __func__);
 	    m_freem(m);
 	    return (IPPROTO_DONE);
 	}
 
 	reghdr = (u_int32_t *)(pim + 1);
 	encap_ip = (struct ip *)(reghdr + 1);
 
 	CTR3(KTR_IPMF, "%s: register: encap ip src 0x%08x len %d",
 	    __func__, ntohl(encap_ip->ip_src.s_addr),
 	    ntohs(encap_ip->ip_len));
 
 	/* verify the version number of the inner packet */
 	if (encap_ip->ip_v != IPVERSION) {
 	    PIMSTAT_INC(pims_rcv_badregisters);
 	    CTR1(KTR_IPMF, "%s: bad encap ip version", __func__);
 	    m_freem(m);
 	    return (IPPROTO_DONE);
 	}
 
 	/* verify the inner packet is destined to a mcast group */
 	if (!IN_MULTICAST(ntohl(encap_ip->ip_dst.s_addr))) {
 	    PIMSTAT_INC(pims_rcv_badregisters);
 	    CTR2(KTR_IPMF, "%s: bad encap ip dest 0x%08x", __func__,
 		ntohl(encap_ip->ip_dst.s_addr));
 	    m_freem(m);
 	    return (IPPROTO_DONE);
 	}
 
 	/* If a NULL_REGISTER, pass it to the daemon */
 	if ((ntohl(*reghdr) & PIM_NULL_REGISTER))
 	    goto pim_input_to_daemon;
 
 	/*
 	 * Copy the TOS from the outer IP header to the inner IP header.
 	 */
 	if (encap_ip->ip_tos != ip_tos) {
 	    /* Outer TOS -> inner TOS */
 	    encap_ip->ip_tos = ip_tos;
 	    /* Recompute the inner header checksum. Sigh... */
 
 	    /* adjust mbuf to point to the inner IP header */
 	    m->m_data += (iphlen + PIM_MINLEN);
 	    m->m_len  -= (iphlen + PIM_MINLEN);
 
 	    encap_ip->ip_sum = 0;
 	    encap_ip->ip_sum = in_cksum(m, encap_ip->ip_hl << 2);
 
 	    /* restore mbuf to point back to the outer IP header */
 	    m->m_data -= (iphlen + PIM_MINLEN);
 	    m->m_len  += (iphlen + PIM_MINLEN);
 	}
 
 	/*
 	 * Decapsulate the inner IP packet and loopback to forward it
 	 * as a normal multicast packet. Also, make a copy of the
 	 *     outer_iphdr + pimhdr + reghdr + encap_iphdr
 	 * to pass to the daemon later, so it can take the appropriate
 	 * actions (e.g., send back PIM_REGISTER_STOP).
 	 * XXX: here m->m_data points to the outer IP header.
 	 */
 	mcp = m_copym(m, 0, iphlen + PIM_REG_MINLEN, M_NOWAIT);
 	if (mcp == NULL) {
 	    CTR1(KTR_IPMF, "%s: m_copym() failed", __func__);
 	    m_freem(m);
 	    return (IPPROTO_DONE);
 	}
 
 	/* Keep statistics */
 	/* XXX: registers_bytes include only the encap. mcast pkt */
 	PIMSTAT_INC(pims_rcv_registers_msgs);
 	PIMSTAT_ADD(pims_rcv_registers_bytes, ntohs(encap_ip->ip_len));
 
 	/*
 	 * forward the inner ip packet; point m_data at the inner ip.
 	 */
 	m_adj(m, iphlen + PIM_MINLEN);
 
 	CTR4(KTR_IPMF,
 	    "%s: forward decap'd REGISTER: src %lx dst %lx vif %d",
 	    __func__,
 	    (u_long)ntohl(encap_ip->ip_src.s_addr),
 	    (u_long)ntohl(encap_ip->ip_dst.s_addr),
 	    (int)V_reg_vif_num);
 
 	/* NB: vifp was collected above; can it change on us? */
 	if_simloop(vifp, m, dst.sin_family, 0);
 
 	/* prepare the register head to send to the mrouting daemon */
 	m = mcp;
     }
 
 pim_input_to_daemon:
     /*
      * Pass the PIM message up to the daemon; if it is a Register message,
      * pass the 'head' only up to the daemon. This includes the
      * outer IP header, PIM header, PIM-Register header and the
      * inner IP header.
      * XXX: the outer IP header pkt size of a Register is not adjust to
      * reflect the fact that the inner multicast data is truncated.
      */
-    *mp = m;
-    rip_input(mp, offp, proto);
-
-    return (IPPROTO_DONE);
+    return (rip_input(&m, &off, proto));
 }
 
 static int
 sysctl_mfctable(SYSCTL_HANDLER_ARGS)
 {
 	struct mfc	*rt;
 	int		 error, i;
 
 	if (req->newptr)
 		return (EPERM);
 	if (V_mfchashtbl == NULL)	/* XXX unlocked */
 		return (0);
 	error = sysctl_wire_old_buffer(req, 0);
 	if (error)
 		return (error);
 
 	MFC_LOCK();
 	for (i = 0; i < mfchashsize; i++) {
 		LIST_FOREACH(rt, &V_mfchashtbl[i], mfc_hash) {
 			error = SYSCTL_OUT(req, rt, sizeof(struct mfc));
 			if (error)
 				goto out_locked;
 		}
 	}
 out_locked:
 	MFC_UNLOCK();
 	return (error);
 }
 
 static SYSCTL_NODE(_net_inet_ip, OID_AUTO, mfctable, CTLFLAG_RD,
     sysctl_mfctable, "IPv4 Multicast Forwarding Table "
     "(struct *mfc[mfchashsize], netinet/ip_mroute.h)");
 
 static void
 vnet_mroute_init(const void *unused __unused)
 {
 
 	V_nexpire = malloc(mfchashsize, M_MRTABLE, M_WAITOK|M_ZERO);
 	bzero(V_bw_meter_timers, sizeof(V_bw_meter_timers));
 	callout_init(&V_expire_upcalls_ch, 1);
 	callout_init(&V_bw_upcalls_ch, 1);
 	callout_init(&V_bw_meter_ch, 1);
 }
 
 VNET_SYSINIT(vnet_mroute_init, SI_SUB_PROTO_MC, SI_ORDER_ANY, vnet_mroute_init,
 	NULL);
 
 static void
 vnet_mroute_uninit(const void *unused __unused)
 {
 
 	free(V_nexpire, M_MRTABLE);
 	V_nexpire = NULL;
 }
 
 VNET_SYSUNINIT(vnet_mroute_uninit, SI_SUB_PROTO_MC, SI_ORDER_MIDDLE, 
 	vnet_mroute_uninit, NULL);
 
 static int
 ip_mroute_modevent(module_t mod, int type, void *unused)
 {
 
     switch (type) {
     case MOD_LOAD:
 	MROUTER_LOCK_INIT();
 
 	if_detach_event_tag = EVENTHANDLER_REGISTER(ifnet_departure_event, 
 	    if_detached_event, NULL, EVENTHANDLER_PRI_ANY);
 	if (if_detach_event_tag == NULL) {
 		printf("ip_mroute: unable to register "
 		    "ifnet_departure_event handler\n");
 		MROUTER_LOCK_DESTROY();
 		return (EINVAL);
 	}
 
 	MFC_LOCK_INIT();
 	VIF_LOCK_INIT();
 
 	mfchashsize = MFCHASHSIZE;
 	if (TUNABLE_ULONG_FETCH("net.inet.ip.mfchashsize", &mfchashsize) &&
 	    !powerof2(mfchashsize)) {
 		printf("WARNING: %s not a power of 2; using default\n",
 		    "net.inet.ip.mfchashsize");
 		mfchashsize = MFCHASHSIZE;
 	}
 
 	pim_squelch_wholepkt = 0;
 	TUNABLE_ULONG_FETCH("net.inet.pim.squelch_wholepkt",
 	    &pim_squelch_wholepkt);
 
-	pim_encap_cookie = encap_attach_func(AF_INET, IPPROTO_PIM,
-	    pim_encapcheck, &in_pim_protosw, NULL);
+	pim_encap_cookie = ip_encap_attach(&ipv4_encap_cfg, NULL, M_WAITOK);
 	if (pim_encap_cookie == NULL) {
 		printf("ip_mroute: unable to attach pim encap\n");
 		VIF_LOCK_DESTROY();
 		MFC_LOCK_DESTROY();
 		MROUTER_LOCK_DESTROY();
 		return (EINVAL);
 	}
 
 	ip_mcast_src = X_ip_mcast_src;
 	ip_mforward = X_ip_mforward;
 	ip_mrouter_done = X_ip_mrouter_done;
 	ip_mrouter_get = X_ip_mrouter_get;
 	ip_mrouter_set = X_ip_mrouter_set;
 
 	ip_rsvp_force_done = X_ip_rsvp_force_done;
 	ip_rsvp_vif = X_ip_rsvp_vif;
 
 	legal_vif_num = X_legal_vif_num;
 	mrt_ioctl = X_mrt_ioctl;
 	rsvp_input_p = X_rsvp_input;
 	break;
 
     case MOD_UNLOAD:
 	/*
 	 * Typically module unload happens after the user-level
 	 * process has shutdown the kernel services (the check
 	 * below insures someone can't just yank the module out
 	 * from under a running process).  But if the module is
 	 * just loaded and then unloaded w/o starting up a user
 	 * process we still need to cleanup.
 	 */
 	MROUTER_LOCK();
 	if (ip_mrouter_cnt != 0) {
 	    MROUTER_UNLOCK();
 	    return (EINVAL);
 	}
 	ip_mrouter_unloading = 1;
 	MROUTER_UNLOCK();
 
 	EVENTHANDLER_DEREGISTER(ifnet_departure_event, if_detach_event_tag);
 
 	if (pim_encap_cookie) {
-	    encap_detach(pim_encap_cookie);
+	    ip_encap_detach(pim_encap_cookie);
 	    pim_encap_cookie = NULL;
 	}
 
 	ip_mcast_src = NULL;
 	ip_mforward = NULL;
 	ip_mrouter_done = NULL;
 	ip_mrouter_get = NULL;
 	ip_mrouter_set = NULL;
 
 	ip_rsvp_force_done = NULL;
 	ip_rsvp_vif = NULL;
 
 	legal_vif_num = NULL;
 	mrt_ioctl = NULL;
 	rsvp_input_p = NULL;
 
 	VIF_LOCK_DESTROY();
 	MFC_LOCK_DESTROY();
 	MROUTER_LOCK_DESTROY();
 	break;
 
     default:
 	return EOPNOTSUPP;
     }
     return 0;
 }
 
 static moduledata_t ip_mroutemod = {
     "ip_mroute",
     ip_mroute_modevent,
     0
 };
 
 DECLARE_MODULE(ip_mroute, ip_mroutemod, SI_SUB_PROTO_MC, SI_ORDER_MIDDLE);
diff --git a/sys/netinet/pim_var.h b/sys/netinet/pim_var.h
index e6398a4dfa95..dfb06928dc52 100644
--- a/sys/netinet/pim_var.h
+++ b/sys/netinet/pim_var.h
@@ -1,81 +1,79 @@
 /*-
  * SPDX-License-Identifier: BSD-3-Clause
  *
  * Copyright (c) 1998-2000
  * University of Southern California/Information Sciences Institute.
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 3. Neither the name of the project nor the names of its contributors
  *    may be used to endorse or promote products derived from this software
  *    without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE PROJECT AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE PROJECT OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  * $FreeBSD$
  */
 
 #ifndef _NETINET_PIM_VAR_H_
 #define _NETINET_PIM_VAR_H_
 
 /*
  * Protocol Independent Multicast (PIM),
  * kernel variables and implementation-specific definitions.
  *
  * Written by George Edmond Eddy (Rusty), ISI, February 1998.
  * Modified by Pavlin Radoslavov, USC/ISI, May 1998, Aug 1999, October 2000.
  * Modified by Hitoshi Asaeda, WIDE, August 1998.
  */
 
 /*
  * PIM statistics kept in the kernel
  */
 struct pimstat {
 	uint64_t pims_rcv_total_msgs;	   /* total PIM messages received    */
 	uint64_t pims_rcv_total_bytes;	   /* total PIM bytes received	     */
 	uint64_t pims_rcv_tooshort;	   /* rcvd with too few bytes	     */
 	uint64_t pims_rcv_badsum;	   /* rcvd with bad checksum	     */
 	uint64_t pims_rcv_badversion;	   /* rcvd bad PIM version	     */
 	uint64_t pims_rcv_registers_msgs;  /* rcvd regs. msgs (data only)    */
 	uint64_t pims_rcv_registers_bytes; /* rcvd regs. bytes (data only)   */
 	uint64_t pims_rcv_registers_wrongiif; /* rcvd regs. on wrong iif     */
 	uint64_t pims_rcv_badregisters;	   /* rcvd invalid registers	     */
 	uint64_t pims_snd_registers_msgs;  /* sent regs. msgs (data only)    */
 	uint64_t pims_snd_registers_bytes; /* sent regs. bytes (data only)   */
 };
 
 #ifdef _KERNEL
 #define	PIMSTAT_ADD(name, val)	\
     VNET_PCPUSTAT_ADD(struct pimstat, pimstat, name, (val))
 #define	PIMSTAT_INC(name)	PIMSTAT_ADD(name, 1)
 #endif
 
 /*
  * Identifiers for PIM sysctl nodes
  */
 #define PIMCTL_STATS		1	/* statistics (read-only) */
 
 #ifdef _KERNEL
-
-int pim_input(struct mbuf **, int *, int);
 SYSCTL_DECL(_net_inet_pim);
 #endif
 
 #endif /* _NETINET_PIM_VAR_H_ */
diff --git a/sys/netinet6/in6_gif.c b/sys/netinet6/in6_gif.c
index e67d64eaf131..0fc0e66cab05 100644
--- a/sys/netinet6/in6_gif.c
+++ b/sys/netinet6/in6_gif.c
@@ -1,232 +1,221 @@
 /*-
  * SPDX-License-Identifier: BSD-3-Clause
  *
  * Copyright (C) 1995, 1996, 1997, and 1998 WIDE Project.
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 3. Neither the name of the project nor the names of its contributors
  *    may be used to endorse or promote products derived from this software
  *    without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE PROJECT AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE PROJECT OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  *	$KAME: in6_gif.c,v 1.49 2001/05/14 14:02:17 itojun Exp $
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include "opt_inet.h"
 #include "opt_inet6.h"
 
 #include <sys/param.h>
 #include <sys/lock.h>
 #include <sys/rmlock.h>
 #include <sys/systm.h>
 #include <sys/socket.h>
 #include <sys/sockio.h>
 #include <sys/mbuf.h>
 #include <sys/errno.h>
 #include <sys/kernel.h>
 #include <sys/queue.h>
 #include <sys/syslog.h>
 #include <sys/sysctl.h>
-#include <sys/protosw.h>
 #include <sys/malloc.h>
 
 #include <net/if.h>
 #include <net/if_var.h>
 #include <net/route.h>
 #include <net/vnet.h>
 
 #include <netinet/in.h>
 #include <netinet/in_systm.h>
 #ifdef INET
 #include <netinet/ip.h>
 #endif
 #include <netinet/ip_encap.h>
 #ifdef INET6
 #include <netinet/ip6.h>
 #include <netinet6/ip6_var.h>
 #include <netinet6/in6_var.h>
 #endif
 #include <netinet/ip_ecn.h>
 #ifdef INET6
 #include <netinet6/ip6_ecn.h>
 #include <netinet6/in6_fib.h>
 #endif
 
 #include <net/if_gif.h>
 
 #define GIF_HLIM	30
 static VNET_DEFINE(int, ip6_gif_hlim) = GIF_HLIM;
 #define	V_ip6_gif_hlim			VNET(ip6_gif_hlim)
 
 SYSCTL_DECL(_net_inet6_ip6);
-SYSCTL_INT(_net_inet6_ip6, IPV6CTL_GIF_HLIM, gifhlim, CTLFLAG_VNET | CTLFLAG_RW,
-    &VNET_NAME(ip6_gif_hlim), 0, "");
-
-static int in6_gif_input(struct mbuf **, int *, int);
-
-extern  struct domain inet6domain;
-static struct protosw in6_gif_protosw = {
-	.pr_type =	SOCK_RAW,
-	.pr_domain =	&inet6domain,
-	.pr_protocol =	0,			/* IPPROTO_IPV[46] */
-	.pr_flags =	PR_ATOMIC|PR_ADDR,
-	.pr_input =	in6_gif_input,
-	.pr_output =	rip6_output,
-	.pr_ctloutput =	rip6_ctloutput,
-	.pr_usrreqs =	&rip6_usrreqs
-};
+SYSCTL_INT(_net_inet6_ip6, IPV6CTL_GIF_HLIM, gifhlim,
+    CTLFLAG_VNET | CTLFLAG_RW, &VNET_NAME(ip6_gif_hlim), 0,
+    "Default hop limit for encapsulated packets");
 
 int
 in6_gif_output(struct ifnet *ifp, struct mbuf *m, int proto, uint8_t ecn)
 {
 	GIF_RLOCK_TRACKER;
 	struct gif_softc *sc = ifp->if_softc;
 	struct ip6_hdr *ip6;
 	int len;
 
 	/* prepend new IP header */
 	len = sizeof(struct ip6_hdr);
 #ifndef __NO_STRICT_ALIGNMENT
 	if (proto == IPPROTO_ETHERIP)
 		len += ETHERIP_ALIGN;
 #endif
 	M_PREPEND(m, len, M_NOWAIT);
 	if (m == NULL)
 		return (ENOBUFS);
 #ifndef __NO_STRICT_ALIGNMENT
 	if (proto == IPPROTO_ETHERIP) {
 		len = mtod(m, vm_offset_t) & 3;
 		KASSERT(len == 0 || len == ETHERIP_ALIGN,
 		    ("in6_gif_output: unexpected misalignment"));
 		m->m_data += len;
 		m->m_len -= ETHERIP_ALIGN;
 	}
 #endif
 
 	ip6 = mtod(m, struct ip6_hdr *);
 	GIF_RLOCK(sc);
 	if (sc->gif_family != AF_INET6) {
 		m_freem(m);
 		GIF_RUNLOCK(sc);
 		return (ENETDOWN);
 	}
 	bcopy(sc->gif_ip6hdr, ip6, sizeof(struct ip6_hdr));
 	GIF_RUNLOCK(sc);
 
 	ip6->ip6_flow  |= htonl((uint32_t)ecn << 20);
 	ip6->ip6_nxt	= proto;
 	ip6->ip6_hlim	= V_ip6_gif_hlim;
 	/*
 	 * force fragmentation to minimum MTU, to avoid path MTU discovery.
 	 * it is too painful to ask for resend of inner packet, to achieve
 	 * path MTU discovery for encapsulated packets.
 	 */
 	return (ip6_output(m, 0, NULL, IPV6_MINMTU, 0, NULL, NULL));
 }
 
 static int
-in6_gif_input(struct mbuf **mp, int *offp, int proto)
+in6_gif_input(struct mbuf *m, int off, int proto, void *arg)
 {
-	struct mbuf *m = *mp;
+	struct gif_softc *sc = arg;
 	struct ifnet *gifp;
-	struct gif_softc *sc;
 	struct ip6_hdr *ip6;
 	uint8_t ecn;
 
-	sc = encap_getarg(m);
 	if (sc == NULL) {
 		m_freem(m);
 		IP6STAT_INC(ip6s_nogif);
 		return (IPPROTO_DONE);
 	}
 	gifp = GIF2IFP(sc);
 	if ((gifp->if_flags & IFF_UP) != 0) {
 		ip6 = mtod(m, struct ip6_hdr *);
 		ecn = (ntohl(ip6->ip6_flow) >> 20) & 0xff;
-		m_adj(m, *offp);
+		m_adj(m, off);
 		gif_input(m, gifp, proto, ecn);
 	} else {
 		m_freem(m);
 		IP6STAT_INC(ip6s_nogif);
 	}
 	return (IPPROTO_DONE);
 }
 
 /*
  * we know that we are in IFF_UP, outer address available, and outer family
  * matched the physical addr family.  see gif_encapcheck().
  */
 int
 in6_gif_encapcheck(const struct mbuf *m, int off, int proto, void *arg)
 {
 	const struct ip6_hdr *ip6;
 	struct gif_softc *sc;
 	int ret;
 
 	/* sanity check done in caller */
 	sc = (struct gif_softc *)arg;
 	GIF_RLOCK_ASSERT(sc);
 
 	/*
 	 * Check for address match.  Note that the check is for an incoming
 	 * packet.  We should compare the *source* address in our configuration
 	 * and the *destination* address of the packet, and vice versa.
 	 */
 	ip6 = mtod(m, const struct ip6_hdr *);
 	if (!IN6_ARE_ADDR_EQUAL(&sc->gif_ip6hdr->ip6_src, &ip6->ip6_dst))
 		return (0);
 	ret = 128;
 	if (!IN6_ARE_ADDR_EQUAL(&sc->gif_ip6hdr->ip6_dst, &ip6->ip6_src)) {
 		if ((sc->gif_options & GIF_IGNORE_SOURCE) == 0)
 			return (0);
 	} else
 		ret += 128;
 
 	/* ingress filters on outer source */
 	if ((GIF2IFP(sc)->if_flags & IFF_LINK2) == 0) {
 		struct nhop6_basic nh6;
 
 		/* XXX empty scope id */
 		if (fib6_lookup_nh_basic(sc->gif_fibnum, &ip6->ip6_src, 0, 0, 0,
 		    &nh6) != 0)
 			return (0);
 
 		if (nh6.nh_ifp != m->m_pkthdr.rcvif)
 			return (0);
 	}
 	return (ret);
 }
 
+static const struct encap_config ipv6_encap_cfg = {
+	.proto = -1,
+	.min_length = sizeof(struct ip6_hdr),
+	.exact_match = (sizeof(struct in6_addr) << 4) + 8,
+	.check = gif_encapcheck,
+	.input = in6_gif_input
+};
+
 int
 in6_gif_attach(struct gif_softc *sc)
 {
 
 	KASSERT(sc->gif_ecookie == NULL, ("gif_ecookie isn't NULL"));
-	sc->gif_ecookie = encap_attach_func(AF_INET6, -1, gif_encapcheck,
-	    (void *)&in6_gif_protosw, sc);
-	if (sc->gif_ecookie == NULL)
-		return (EEXIST);
+	sc->gif_ecookie = ip6_encap_attach(&ipv6_encap_cfg, sc, M_WAITOK);
 	return (0);
 }
diff --git a/sys/netinet6/ip6_gre.c b/sys/netinet6/ip6_gre.c
index 095a1deeb633..4519582c525b 100644
--- a/sys/netinet6/ip6_gre.c
+++ b/sys/netinet6/ip6_gre.c
@@ -1,146 +1,138 @@
 /*-
  * Copyright (c) 2014 Andrey V. Elsukov <ae@FreeBSD.org>
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  *
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include "opt_inet.h"
 #include "opt_inet6.h"
 
 #include <sys/param.h>
 #include <sys/lock.h>
 #include <sys/rmlock.h>
 #include <sys/systm.h>
 #include <sys/socket.h>
 #include <sys/sockio.h>
 #include <sys/mbuf.h>
 #include <sys/errno.h>
 #include <sys/kernel.h>
 #include <sys/queue.h>
 #include <sys/syslog.h>
 #include <sys/sysctl.h>
-#include <sys/protosw.h>
 #include <sys/malloc.h>
 
 #include <net/if.h>
 #include <net/if_var.h>
 #include <net/vnet.h>
 
 #include <netinet/in.h>
 #include <netinet/in_systm.h>
 #ifdef INET
 #include <net/ethernet.h>
 #include <netinet/ip.h>
 #endif
 #include <netinet/ip_encap.h>
 #include <netinet/ip6.h>
 #include <netinet6/ip6_var.h>
 #include <netinet6/in6_var.h>
 #include <net/if_gre.h>
 
-extern  struct domain inet6domain;
-struct protosw in6_gre_protosw = {
-	.pr_type =	SOCK_RAW,
-	.pr_domain =	&inet6domain,
-	.pr_protocol =	IPPROTO_GRE,
-	.pr_flags =	PR_ATOMIC|PR_ADDR,
-	.pr_input =	gre_input,
-	.pr_output =	rip6_output,
-	.pr_ctloutput =	rip6_ctloutput,
-	.pr_usrreqs =	&rip6_usrreqs
-};
-
 VNET_DEFINE(int, ip6_gre_hlim) = IPV6_DEFHLIM;
 #define	V_ip6_gre_hlim		VNET(ip6_gre_hlim)
 
 SYSCTL_DECL(_net_inet6_ip6);
 SYSCTL_INT(_net_inet6_ip6, OID_AUTO, grehlim, CTLFLAG_VNET | CTLFLAG_RW,
     &VNET_NAME(ip6_gre_hlim), 0, "Default hop limit for encapsulated packets");
 
 static int
 in6_gre_encapcheck(const struct mbuf *m, int off, int proto, void *arg)
 {
 	GRE_RLOCK_TRACKER;
 	struct gre_softc *sc;
 	struct ip6_hdr *ip6;
 
 	sc = (struct gre_softc *)arg;
 	if ((GRE2IFP(sc)->if_flags & IFF_UP) == 0)
 		return (0);
 
 	M_ASSERTPKTHDR(m);
 	/*
 	 * We expect that payload contains at least IPv4
 	 * or IPv6 packet.
 	 */
 	if (m->m_pkthdr.len < sizeof(struct greip6) +
 #ifdef INET
 	    sizeof(struct ip))
 #else
 	    sizeof(struct ip6_hdr))
 #endif
 		return (0);
 
 	GRE_RLOCK(sc);
 	if (sc->gre_family == 0)
 		goto bad;
 
 	KASSERT(sc->gre_family == AF_INET6,
 	    ("wrong gre_family: %d", sc->gre_family));
 
 	ip6 = mtod(m, struct ip6_hdr *);
 	if (!IN6_ARE_ADDR_EQUAL(&sc->gre_oip6.ip6_src, &ip6->ip6_dst) ||
 	    !IN6_ARE_ADDR_EQUAL(&sc->gre_oip6.ip6_dst, &ip6->ip6_src))
 		goto bad;
 
 	GRE_RUNLOCK(sc);
-	return (128 * 2);
+	return (128 * 2 + 32);
 bad:
 	GRE_RUNLOCK(sc);
 	return (0);
 }
 
 int
 in6_gre_output(struct mbuf *m, int af, int hlen)
 {
 	struct greip6 *gi6;
 
 	gi6 = mtod(m, struct greip6 *);
 	gi6->gi6_ip6.ip6_hlim = V_ip6_gre_hlim;
 	return (ip6_output(m, NULL, NULL, IPV6_MINMTU, NULL, NULL, NULL));
 }
 
+static const struct encap_config ipv6_encap_cfg = {
+	.proto = IPPROTO_GRE,
+	.min_length = sizeof(struct greip6) + sizeof(struct ip),
+	.exact_match = (sizeof(struct in6_addr) << 4) + 32,
+	.check = in6_gre_encapcheck,
+	.input = gre_input
+};
+
 int
 in6_gre_attach(struct gre_softc *sc)
 {
 
 	KASSERT(sc->gre_ecookie == NULL, ("gre_ecookie isn't NULL"));
-	sc->gre_ecookie = encap_attach_func(AF_INET6, IPPROTO_GRE,
-	    in6_gre_encapcheck, &in6_gre_protosw, sc);
-	if (sc->gre_ecookie == NULL)
-		return (EEXIST);
+	sc->gre_ecookie = ip6_encap_attach(&ipv6_encap_cfg, sc, M_WAITOK);
 	return (0);
 }
diff --git a/sys/netinet6/ip6_mroute.c b/sys/netinet6/ip6_mroute.c
index 524d35a27d99..b4a7a3adda5b 100644
--- a/sys/netinet6/ip6_mroute.c
+++ b/sys/netinet6/ip6_mroute.c
@@ -1,1972 +1,1963 @@
 /*-
  * SPDX-License-Identifier: BSD-3-Clause
  *
  * Copyright (C) 1998 WIDE Project.
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 3. Neither the name of the project nor the names of its contributors
  *    may be used to endorse or promote products derived from this software
  *    without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE PROJECT AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE PROJECT OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  *	$KAME: ip6_mroute.c,v 1.58 2001/12/18 02:36:31 itojun Exp $
  */
 
 /*-
  * Copyright (c) 1989 Stephen Deering
  * Copyright (c) 1992, 1993
  *      The Regents of the University of California.  All rights reserved.
  *
  * This code is derived from software contributed to Berkeley by
  * Stephen Deering of Stanford University.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 3. Neither the name of the University nor the names of its contributors
  *    may be used to endorse or promote products derived from this software
  *    without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  *	@(#)ip_mroute.c	8.2 (Berkeley) 11/15/93
  *	BSDI ip_mroute.c,v 2.10 1996/11/14 00:29:52 jch Exp
  */
 
 /*
  * IP multicast forwarding procedures
  *
  * Written by David Waitzman, BBN Labs, August 1988.
  * Modified by Steve Deering, Stanford, February 1989.
  * Modified by Mark J. Steiglitz, Stanford, May, 1991
  * Modified by Van Jacobson, LBL, January 1993
  * Modified by Ajit Thyagarajan, PARC, August 1993
  * Modified by Bill Fenner, PARC, April 1994
  *
  * MROUTING Revision: 3.5.1.2 + PIM-SMv2 (pimd) Support
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include "opt_inet6.h"
 
 #include <sys/param.h>
 #include <sys/callout.h>
 #include <sys/errno.h>
 #include <sys/kernel.h>
 #include <sys/lock.h>
 #include <sys/malloc.h>
 #include <sys/mbuf.h>
 #include <sys/module.h>
 #include <sys/domain.h>
 #include <sys/protosw.h>
 #include <sys/sdt.h>
 #include <sys/signalvar.h>
 #include <sys/socket.h>
 #include <sys/socketvar.h>
 #include <sys/sockio.h>
 #include <sys/sx.h>
 #include <sys/sysctl.h>
 #include <sys/syslog.h>
 #include <sys/systm.h>
 #include <sys/time.h>
 
 #include <net/if.h>
 #include <net/if_var.h>
 #include <net/if_types.h>
-#include <net/raw_cb.h>
 #include <net/vnet.h>
 
 #include <netinet/in.h>
 #include <netinet/in_var.h>
 #include <netinet/icmp6.h>
 #include <netinet/ip_encap.h>
 
 #include <netinet/ip6.h>
 #include <netinet/in_kdtrace.h>
 #include <netinet6/ip6_var.h>
 #include <netinet6/scope6_var.h>
 #include <netinet6/nd6.h>
 #include <netinet6/ip6_mroute.h>
 #include <netinet6/pim6.h>
 #include <netinet6/pim6_var.h>
 
 static MALLOC_DEFINE(M_MRTABLE6, "mf6c", "multicast forwarding cache entry");
 
 static int	ip6_mdq(struct mbuf *, struct ifnet *, struct mf6c *);
 static void	phyint_send(struct ip6_hdr *, struct mif6 *, struct mbuf *);
 static int	register_send(struct ip6_hdr *, struct mif6 *, struct mbuf *);
 static int	set_pim6(int *);
 static int	socket_send(struct socket *, struct mbuf *,
 		    struct sockaddr_in6 *);
 
 extern int in6_mcast_loop;
 extern struct domain inet6domain;
 
 static const struct encaptab *pim6_encap_cookie;
-static const struct protosw in6_pim_protosw = {
-	.pr_type =		SOCK_RAW,
-	.pr_domain =		&inet6domain,
-	.pr_protocol =		IPPROTO_PIM,
-	.pr_flags =		PR_ATOMIC|PR_ADDR|PR_LASTHDR,
-	.pr_input =		pim6_input,
-	.pr_output =		rip6_output,
-	.pr_ctloutput =		rip6_ctloutput,
-	.pr_usrreqs =		&rip6_usrreqs
-};
 static int pim6_encapcheck(const struct mbuf *, int, int, void *);
+static int pim6_input(struct mbuf *, int, int, void *);
+
+static const struct encap_config ipv6_encap_cfg = {
+	.proto = IPPROTO_PIM,
+	.min_length = sizeof(struct ip6_hdr) + PIM_MINLEN,
+	.exact_match = 8,
+	.check = pim6_encapcheck,
+	.input = pim6_input
+};
+
 
 static VNET_DEFINE(int, ip6_mrouter_ver) = 0;
 #define	V_ip6_mrouter_ver	VNET(ip6_mrouter_ver)
 
 SYSCTL_DECL(_net_inet6);
 SYSCTL_DECL(_net_inet6_ip6);
 static SYSCTL_NODE(_net_inet6, IPPROTO_PIM, pim, CTLFLAG_RW, 0, "PIM");
 
 static struct mrt6stat mrt6stat;
 SYSCTL_STRUCT(_net_inet6_ip6, OID_AUTO, mrt6stat, CTLFLAG_RW,
     &mrt6stat, mrt6stat,
     "Multicast Routing Statistics (struct mrt6stat, netinet6/ip6_mroute.h)");
 
 #define	MRT6STAT_INC(name)	mrt6stat.name += 1
 #define NO_RTE_FOUND	0x1
 #define RTE_FOUND	0x2
 
 static struct mtx mrouter6_mtx;
 #define	MROUTER6_LOCK()		mtx_lock(&mrouter6_mtx)
 #define	MROUTER6_UNLOCK()	mtx_unlock(&mrouter6_mtx)
 #define	MROUTER6_LOCK_ASSERT()	do {					\
 	mtx_assert(&mrouter6_mtx, MA_OWNED);				\
 	NET_ASSERT_GIANT();						\
 } while (0)
 #define	MROUTER6_LOCK_INIT()	\
 	mtx_init(&mrouter6_mtx, "IPv6 multicast forwarding", NULL, MTX_DEF)
 #define	MROUTER6_LOCK_DESTROY()	mtx_destroy(&mrouter6_mtx)
 
 static struct mf6c *mf6ctable[MF6CTBLSIZ];
 SYSCTL_OPAQUE(_net_inet6_ip6, OID_AUTO, mf6ctable, CTLFLAG_RD,
     &mf6ctable, sizeof(mf6ctable), "S,*mf6ctable[MF6CTBLSIZ]",
     "IPv6 Multicast Forwarding Table (struct *mf6ctable[MF6CTBLSIZ], "
     "netinet6/ip6_mroute.h)");
 
 static struct mtx mfc6_mtx;
 #define	MFC6_LOCK()		mtx_lock(&mfc6_mtx)
 #define	MFC6_UNLOCK()		mtx_unlock(&mfc6_mtx)
 #define	MFC6_LOCK_ASSERT()	do {					\
 	mtx_assert(&mfc6_mtx, MA_OWNED);				\
 	NET_ASSERT_GIANT();						\
 } while (0)
 #define	MFC6_LOCK_INIT()		\
 	mtx_init(&mfc6_mtx, "IPv6 multicast forwarding cache", NULL, MTX_DEF)
 #define	MFC6_LOCK_DESTROY()	mtx_destroy(&mfc6_mtx)
 
 static u_char n6expire[MF6CTBLSIZ];
 
 static struct mif6 mif6table[MAXMIFS];
 static int
 sysctl_mif6table(SYSCTL_HANDLER_ARGS)
 {
 	struct mif6_sctl *out;
 	int error;
 
 	out = malloc(sizeof(struct mif6_sctl) * MAXMIFS, M_TEMP, M_WAITOK);
 	for (int i = 0; i < MAXMIFS; i++) {
 		out[i].m6_flags		= mif6table[i].m6_flags;
 		out[i].m6_rate_limit	= mif6table[i].m6_rate_limit;
 		out[i].m6_lcl_addr	= mif6table[i].m6_lcl_addr;
 		if (mif6table[i].m6_ifp != NULL)
 			out[i].m6_ifp	= mif6table[i].m6_ifp->if_index;
 		else
 			out[i].m6_ifp	= 0;
 		out[i].m6_pkt_in	= mif6table[i].m6_pkt_in;
 		out[i].m6_pkt_out	= mif6table[i].m6_pkt_out;
 		out[i].m6_bytes_in	= mif6table[i].m6_bytes_in;
 		out[i].m6_bytes_out	= mif6table[i].m6_bytes_out;
 	}
 	error = SYSCTL_OUT(req, out, sizeof(struct mif6_sctl) * MAXMIFS);
 	free(out, M_TEMP);
 	return (error);
 }
 SYSCTL_PROC(_net_inet6_ip6, OID_AUTO, mif6table, CTLTYPE_OPAQUE | CTLFLAG_RD,
     NULL, 0, sysctl_mif6table, "S,mif6_sctl[MAXMIFS]",
     "IPv6 Multicast Interfaces (struct mif6_sctl[MAXMIFS], "
     "netinet6/ip6_mroute.h)");
 
 static struct mtx mif6_mtx;
 #define	MIF6_LOCK()		mtx_lock(&mif6_mtx)
 #define	MIF6_UNLOCK()		mtx_unlock(&mif6_mtx)
 #define	MIF6_LOCK_ASSERT()	mtx_assert(&mif6_mtx, MA_OWNED)
 #define	MIF6_LOCK_INIT()	\
 	mtx_init(&mif6_mtx, "IPv6 multicast interfaces", NULL, MTX_DEF)
 #define	MIF6_LOCK_DESTROY()	mtx_destroy(&mif6_mtx)
 
 #ifdef MRT6DEBUG
 static VNET_DEFINE(u_int, mrt6debug) = 0;	/* debug level */
 #define	V_mrt6debug		VNET(mrt6debug)
 #define DEBUG_MFC	0x02
 #define DEBUG_FORWARD	0x04
 #define DEBUG_EXPIRE	0x08
 #define DEBUG_XMIT	0x10
 #define DEBUG_REG	0x20
 #define DEBUG_PIM	0x40
 #define	DEBUG_ERR	0x80
 #define	DEBUG_ANY	0x7f
 #define	MRT6_DLOG(m, fmt, ...)	\
 	if (V_mrt6debug & (m))	\
 		log(((m) & DEBUG_ERR) ? LOG_ERR: LOG_DEBUG, \
 		    "%s: " fmt "\n", __func__, ##__VA_ARGS__)
 #else
 #define	MRT6_DLOG(m, fmt, ...)
 #endif
 
 static void	expire_upcalls(void *);
 #define	EXPIRE_TIMEOUT	(hz / 4)	/* 4x / second */
 #define	UPCALL_EXPIRE	6		/* number of timeouts */
 
 /*
  * XXX TODO: maintain a count to if_allmulti() calls in struct ifnet.
  */
 
 /*
  * 'Interfaces' associated with decapsulator (so we can tell
  * packets that went through it from ones that get reflected
  * by a broken gateway).  Different from IPv4 register_if,
  * these interfaces are linked into the system ifnet list,
  * because per-interface IPv6 statistics are maintained in
  * ifp->if_afdata.  But it does not have any routes point
  * to them.  I.e., packets can't be sent this way.  They
  * only exist as a placeholder for multicast source
  * verification.
  */
 static struct ifnet *multicast_register_if6;
 
 #define ENCAP_HOPS 64
 
 /*
  * Private variables.
  */
 static mifi_t nummifs = 0;
 static mifi_t reg_mif_num = (mifi_t)-1;
 
 static struct pim6stat pim6stat;
 SYSCTL_STRUCT(_net_inet6_pim, PIM6CTL_STATS, stats, CTLFLAG_RW,
     &pim6stat, pim6stat,
     "PIM Statistics (struct pim6stat, netinet6/pim6_var.h)");
 
 #define	PIM6STAT_INC(name)	pim6stat.name += 1
 static VNET_DEFINE(int, pim6);
 #define	V_pim6		VNET(pim6)
 
 /*
  * Hash function for a source, group entry
  */
 #define MF6CHASH(a, g) MF6CHASHMOD((a).s6_addr32[0] ^ (a).s6_addr32[1] ^ \
 				   (a).s6_addr32[2] ^ (a).s6_addr32[3] ^ \
 				   (g).s6_addr32[0] ^ (g).s6_addr32[1] ^ \
 				   (g).s6_addr32[2] ^ (g).s6_addr32[3])
 
 /*
  * Find a route for a given origin IPv6 address and Multicast group address.
  */
 #define MF6CFIND(o, g, rt) do { \
 	struct mf6c *_rt = mf6ctable[MF6CHASH(o,g)]; \
 	rt = NULL; \
 	while (_rt) { \
 		if (IN6_ARE_ADDR_EQUAL(&_rt->mf6c_origin.sin6_addr, &(o)) && \
 		    IN6_ARE_ADDR_EQUAL(&_rt->mf6c_mcastgrp.sin6_addr, &(g)) && \
 		    (_rt->mf6c_stall == NULL)) { \
 			rt = _rt; \
 			break; \
 		} \
 		_rt = _rt->mf6c_next; \
 	} \
 	if (rt == NULL) { \
 		MRT6STAT_INC(mrt6s_mfc_misses); \
 	} \
 } while (/*CONSTCOND*/ 0)
 
 /*
  * Macros to compute elapsed time efficiently
  * Borrowed from Van Jacobson's scheduling code
  * XXX: replace with timersub() ?
  */
 #define TV_DELTA(a, b, delta) do { \
 	    int xxs; \
 		\
 	    delta = (a).tv_usec - (b).tv_usec; \
 	    if ((xxs = (a).tv_sec - (b).tv_sec)) { \
 	       switch (xxs) { \
 		      case 2: \
 			  delta += 1000000; \
 			      /* FALLTHROUGH */ \
 		      case 1: \
 			  delta += 1000000; \
 			  break; \
 		      default: \
 			  delta += (1000000 * xxs); \
 	       } \
 	    } \
 } while (/*CONSTCOND*/ 0)
 
 /* XXX: replace with timercmp(a, b, <) ? */
 #define TV_LT(a, b) (((a).tv_usec < (b).tv_usec && \
 	      (a).tv_sec <= (b).tv_sec) || (a).tv_sec < (b).tv_sec)
 
 #ifdef UPCALL_TIMING
 #define UPCALL_MAX	50
 static u_long upcall_data[UPCALL_MAX + 1];
 static void collate();
 #endif /* UPCALL_TIMING */
 
 static int ip6_mrouter_init(struct socket *, int, int);
 static int add_m6fc(struct mf6cctl *);
 static int add_m6if(struct mif6ctl *);
 static int del_m6fc(struct mf6cctl *);
 static int del_m6if(mifi_t *);
 static int del_m6if_locked(mifi_t *);
 static int get_mif6_cnt(struct sioc_mif_req6 *);
 static int get_sg_cnt(struct sioc_sg_req6 *);
 
 static struct callout expire_upcalls_ch;
 
 int X_ip6_mforward(struct ip6_hdr *, struct ifnet *, struct mbuf *);
 int X_ip6_mrouter_done(void);
 int X_ip6_mrouter_set(struct socket *, struct sockopt *);
 int X_ip6_mrouter_get(struct socket *, struct sockopt *);
 int X_mrt6_ioctl(u_long, caddr_t);
 
 /*
  * Handle MRT setsockopt commands to modify the multicast routing tables.
  */
 int
 X_ip6_mrouter_set(struct socket *so, struct sockopt *sopt)
 {
 	int error = 0;
 	int optval;
 	struct mif6ctl mifc;
 	struct mf6cctl mfcc;
 	mifi_t mifi;
 
 	if (so != V_ip6_mrouter && sopt->sopt_name != MRT6_INIT)
 		return (EPERM);
 
 	switch (sopt->sopt_name) {
 	case MRT6_INIT:
 #ifdef MRT6_OINIT
 	case MRT6_OINIT:
 #endif
 		error = sooptcopyin(sopt, &optval, sizeof(optval),
 		    sizeof(optval));
 		if (error)
 			break;
 		error = ip6_mrouter_init(so, optval, sopt->sopt_name);
 		break;
 	case MRT6_DONE:
 		error = X_ip6_mrouter_done();
 		break;
 	case MRT6_ADD_MIF:
 		error = sooptcopyin(sopt, &mifc, sizeof(mifc), sizeof(mifc));
 		if (error)
 			break;
 		error = add_m6if(&mifc);
 		break;
 	case MRT6_ADD_MFC:
 		error = sooptcopyin(sopt, &mfcc, sizeof(mfcc), sizeof(mfcc));
 		if (error)
 			break;
 		error = add_m6fc(&mfcc);
 		break;
 	case MRT6_DEL_MFC:
 		error = sooptcopyin(sopt, &mfcc, sizeof(mfcc), sizeof(mfcc));
 		if (error)
 			break;
 		error = del_m6fc(&mfcc);
 		break;
 	case MRT6_DEL_MIF:
 		error = sooptcopyin(sopt, &mifi, sizeof(mifi), sizeof(mifi));
 		if (error)
 			break;
 		error = del_m6if(&mifi);
 		break;
 	case MRT6_PIM:
 		error = sooptcopyin(sopt, &optval, sizeof(optval),
 		    sizeof(optval));
 		if (error)
 			break;
 		error = set_pim6(&optval);
 		break;
 	default:
 		error = EOPNOTSUPP;
 		break;
 	}
 
 	return (error);
 }
 
 /*
  * Handle MRT getsockopt commands
  */
 int
 X_ip6_mrouter_get(struct socket *so, struct sockopt *sopt)
 {
 	int error = 0;
 
 	if (so != V_ip6_mrouter)
 		return (EACCES);
 
 	switch (sopt->sopt_name) {
 		case MRT6_PIM:
 			error = sooptcopyout(sopt, &V_pim6, sizeof(V_pim6));
 			break;
 	}
 	return (error);
 }
 
 /*
  * Handle ioctl commands to obtain information from the cache
  */
 int
 X_mrt6_ioctl(u_long cmd, caddr_t data)
 {
 	int ret;
 
 	ret = EINVAL;
 
 	switch (cmd) {
 	case SIOCGETSGCNT_IN6:
 		ret = get_sg_cnt((struct sioc_sg_req6 *)data);
 		break;
 
 	case SIOCGETMIFCNT_IN6:
 		ret = get_mif6_cnt((struct sioc_mif_req6 *)data);
 		break;
 
 	default:
 		break;
 	}
 
 	return (ret);
 }
 
 /*
  * returns the packet, byte, rpf-failure count for the source group provided
  */
 static int
 get_sg_cnt(struct sioc_sg_req6 *req)
 {
 	struct mf6c *rt;
 	int ret;
 
 	ret = 0;
 
 	MFC6_LOCK();
 
 	MF6CFIND(req->src.sin6_addr, req->grp.sin6_addr, rt);
 	if (rt == NULL) {
 		ret = ESRCH;
 	} else {
 		req->pktcnt = rt->mf6c_pkt_cnt;
 		req->bytecnt = rt->mf6c_byte_cnt;
 		req->wrong_if = rt->mf6c_wrong_if;
 	}
 
 	MFC6_UNLOCK();
 
 	return (ret);
 }
 
 /*
  * returns the input and output packet and byte counts on the mif provided
  */
 static int
 get_mif6_cnt(struct sioc_mif_req6 *req)
 {
 	mifi_t mifi;
 	int ret;
 
 	ret = 0;
 	mifi = req->mifi;
 
 	MIF6_LOCK();
 
 	if (mifi >= nummifs) {
 		ret = EINVAL;
 	} else {
 		req->icount = mif6table[mifi].m6_pkt_in;
 		req->ocount = mif6table[mifi].m6_pkt_out;
 		req->ibytes = mif6table[mifi].m6_bytes_in;
 		req->obytes = mif6table[mifi].m6_bytes_out;
 	}
 
 	MIF6_UNLOCK();
 
 	return (ret);
 }
 
 static int
 set_pim6(int *i)
 {
 	if ((*i != 1) && (*i != 0))
 		return (EINVAL);
 
 	V_pim6 = *i;
 
 	return (0);
 }
 
 /*
  * Enable multicast routing
  */
 static int
 ip6_mrouter_init(struct socket *so, int v, int cmd)
 {
 
 	MRT6_DLOG(DEBUG_ANY, "so_type = %d, pr_protocol = %d",
 	    so->so_type, so->so_proto->pr_protocol);
 
 	if (so->so_type != SOCK_RAW ||
 	    so->so_proto->pr_protocol != IPPROTO_ICMPV6)
 		return (EOPNOTSUPP);
 
 	if (v != 1)
 		return (ENOPROTOOPT);
 
 	MROUTER6_LOCK();
 
 	if (V_ip6_mrouter != NULL) {
 		MROUTER6_UNLOCK();
 		return (EADDRINUSE);
 	}
 
 	V_ip6_mrouter = so;
 	V_ip6_mrouter_ver = cmd;
 
 	bzero((caddr_t)mf6ctable, sizeof(mf6ctable));
 	bzero((caddr_t)n6expire, sizeof(n6expire));
 
 	V_pim6 = 0;/* used for stubbing out/in pim stuff */
 
 	callout_init(&expire_upcalls_ch, 0);
 	callout_reset(&expire_upcalls_ch, EXPIRE_TIMEOUT,
 	    expire_upcalls, NULL);
 
 	MROUTER6_UNLOCK();
 	MRT6_DLOG(DEBUG_ANY, "finished");
 
 	return (0);
 }
 
 /*
  * Disable IPv6 multicast forwarding.
  */
 int
 X_ip6_mrouter_done(void)
 {
 	mifi_t mifi;
 	u_long i;
 	struct mf6c *rt;
 	struct rtdetq *rte;
 
 	MROUTER6_LOCK();
 
 	if (V_ip6_mrouter == NULL) {
 		MROUTER6_UNLOCK();
 		return (EINVAL);
 	}
 
 	/*
 	 * For each phyint in use, disable promiscuous reception of all IPv6
 	 * multicasts.
 	 */
 	for (mifi = 0; mifi < nummifs; mifi++) {
 		if (mif6table[mifi].m6_ifp &&
 		    !(mif6table[mifi].m6_flags & MIFF_REGISTER)) {
 			if_allmulti(mif6table[mifi].m6_ifp, 0);
 		}
 	}
 	bzero((caddr_t)mif6table, sizeof(mif6table));
 	nummifs = 0;
 
 	V_pim6 = 0; /* used to stub out/in pim specific code */
 
 	callout_stop(&expire_upcalls_ch);
 
 	/*
 	 * Free all multicast forwarding cache entries.
 	 */
 	MFC6_LOCK();
 	for (i = 0; i < MF6CTBLSIZ; i++) {
 		rt = mf6ctable[i];
 		while (rt) {
 			struct mf6c *frt;
 
 			for (rte = rt->mf6c_stall; rte != NULL; ) {
 				struct rtdetq *n = rte->next;
 
 				m_freem(rte->m);
 				free(rte, M_MRTABLE6);
 				rte = n;
 			}
 			frt = rt;
 			rt = rt->mf6c_next;
 			free(frt, M_MRTABLE6);
 		}
 	}
 	bzero((caddr_t)mf6ctable, sizeof(mf6ctable));
 	MFC6_UNLOCK();
 
 	/*
 	 * Reset register interface
 	 */
 	if (reg_mif_num != (mifi_t)-1 && multicast_register_if6 != NULL) {
 		if_detach(multicast_register_if6);
 		if_free(multicast_register_if6);
 		reg_mif_num = (mifi_t)-1;
 		multicast_register_if6 = NULL;
 	}
 
 	V_ip6_mrouter = NULL;
 	V_ip6_mrouter_ver = 0;
 
 	MROUTER6_UNLOCK();
 	MRT6_DLOG(DEBUG_ANY, "finished");
 
 	return (0);
 }
 
 static struct sockaddr_in6 sin6 = { sizeof(sin6), AF_INET6 };
 
 /*
  * Add a mif to the mif table
  */
 static int
 add_m6if(struct mif6ctl *mifcp)
 {
 	struct mif6 *mifp;
 	struct ifnet *ifp;
 	int error;
 
 	MIF6_LOCK();
 
 	if (mifcp->mif6c_mifi >= MAXMIFS) {
 		MIF6_UNLOCK();
 		return (EINVAL);
 	}
 	mifp = mif6table + mifcp->mif6c_mifi;
 	if (mifp->m6_ifp != NULL) {
 		MIF6_UNLOCK();
 		return (EADDRINUSE); /* XXX: is it appropriate? */
 	}
 	if (mifcp->mif6c_pifi == 0 || mifcp->mif6c_pifi > V_if_index) {
 		MIF6_UNLOCK();
 		return (ENXIO);
 	}
 
 	ifp = ifnet_byindex(mifcp->mif6c_pifi);
 
 	if (mifcp->mif6c_flags & MIFF_REGISTER) {
 		if (reg_mif_num == (mifi_t)-1) {
 			ifp = if_alloc(IFT_OTHER);
 
 			if_initname(ifp, "register_mif", 0);
 			ifp->if_flags |= IFF_LOOPBACK;
 			if_attach(ifp);
 			multicast_register_if6 = ifp;
 			reg_mif_num = mifcp->mif6c_mifi;
 			/*
 			 * it is impossible to guess the ifindex of the
 			 * register interface.  So mif6c_pifi is automatically
 			 * calculated.
 			 */
 			mifcp->mif6c_pifi = ifp->if_index;
 		} else {
 			ifp = multicast_register_if6;
 		}
 	} else {
 		/* Make sure the interface supports multicast */
 		if ((ifp->if_flags & IFF_MULTICAST) == 0) {
 			MIF6_UNLOCK();
 			return (EOPNOTSUPP);
 		}
 
 		error = if_allmulti(ifp, 1);
 		if (error) {
 			MIF6_UNLOCK();
 			return (error);
 		}
 	}
 
 	mifp->m6_flags     = mifcp->mif6c_flags;
 	mifp->m6_ifp       = ifp;
 
 	/* initialize per mif pkt counters */
 	mifp->m6_pkt_in    = 0;
 	mifp->m6_pkt_out   = 0;
 	mifp->m6_bytes_in  = 0;
 	mifp->m6_bytes_out = 0;
 
 	/* Adjust nummifs up if the mifi is higher than nummifs */
 	if (nummifs <= mifcp->mif6c_mifi)
 		nummifs = mifcp->mif6c_mifi + 1;
 
 	MIF6_UNLOCK();
 	MRT6_DLOG(DEBUG_ANY, "mif #%d, phyint %s", mifcp->mif6c_mifi,
 	    if_name(ifp));
 
 	return (0);
 }
 
 /*
  * Delete a mif from the mif table
  */
 static int
 del_m6if_locked(mifi_t *mifip)
 {
 	struct mif6 *mifp = mif6table + *mifip;
 	mifi_t mifi;
 	struct ifnet *ifp;
 
 	MIF6_LOCK_ASSERT();
 
 	if (*mifip >= nummifs)
 		return (EINVAL);
 	if (mifp->m6_ifp == NULL)
 		return (EINVAL);
 
 	if (!(mifp->m6_flags & MIFF_REGISTER)) {
 		/* XXX: TODO: Maintain an ALLMULTI refcount in struct ifnet. */
 		ifp = mifp->m6_ifp;
 		if_allmulti(ifp, 0);
 	} else {
 		if (reg_mif_num != (mifi_t)-1 &&
 		    multicast_register_if6 != NULL) {
 			if_detach(multicast_register_if6);
 			if_free(multicast_register_if6);
 			reg_mif_num = (mifi_t)-1;
 			multicast_register_if6 = NULL;
 		}
 	}
 
 	bzero((caddr_t)mifp, sizeof(*mifp));
 
 	/* Adjust nummifs down */
 	for (mifi = nummifs; mifi > 0; mifi--)
 		if (mif6table[mifi - 1].m6_ifp)
 			break;
 	nummifs = mifi;
 	MRT6_DLOG(DEBUG_ANY, "mif %d, nummifs %d", *mifip, nummifs);
 
 	return (0);
 }
 
 static int
 del_m6if(mifi_t *mifip)
 {
 	int cc;
 
 	MIF6_LOCK();
 	cc = del_m6if_locked(mifip);
 	MIF6_UNLOCK();
 
 	return (cc);
 }
 
 /*
  * Add an mfc entry
  */
 static int
 add_m6fc(struct mf6cctl *mfccp)
 {
 	struct mf6c *rt;
 	u_long hash;
 	struct rtdetq *rte;
 	u_short nstl;
 	char ip6bufo[INET6_ADDRSTRLEN], ip6bufg[INET6_ADDRSTRLEN];
 
 	MFC6_LOCK();
 
 	MF6CFIND(mfccp->mf6cc_origin.sin6_addr,
 		 mfccp->mf6cc_mcastgrp.sin6_addr, rt);
 
 	/* If an entry already exists, just update the fields */
 	if (rt) {
 		MRT6_DLOG(DEBUG_MFC, "no upcall o %s g %s p %x",
 		    ip6_sprintf(ip6bufo, &mfccp->mf6cc_origin.sin6_addr),
 		    ip6_sprintf(ip6bufg, &mfccp->mf6cc_mcastgrp.sin6_addr),
 		    mfccp->mf6cc_parent);
 
 		rt->mf6c_parent = mfccp->mf6cc_parent;
 		rt->mf6c_ifset = mfccp->mf6cc_ifset;
 
 		MFC6_UNLOCK();
 		return (0);
 	}
 
 	/*
 	 * Find the entry for which the upcall was made and update
 	 */
 	hash = MF6CHASH(mfccp->mf6cc_origin.sin6_addr,
 			mfccp->mf6cc_mcastgrp.sin6_addr);
 	for (rt = mf6ctable[hash], nstl = 0; rt; rt = rt->mf6c_next) {
 		if (IN6_ARE_ADDR_EQUAL(&rt->mf6c_origin.sin6_addr,
 				       &mfccp->mf6cc_origin.sin6_addr) &&
 		    IN6_ARE_ADDR_EQUAL(&rt->mf6c_mcastgrp.sin6_addr,
 				       &mfccp->mf6cc_mcastgrp.sin6_addr) &&
 		    (rt->mf6c_stall != NULL)) {
 
 			if (nstl++)
 				log(LOG_ERR,
 				    "add_m6fc: %s o %s g %s p %x dbx %p\n",
 				    "multiple kernel entries",
 				    ip6_sprintf(ip6bufo,
 					    &mfccp->mf6cc_origin.sin6_addr),
 				    ip6_sprintf(ip6bufg,
 					    &mfccp->mf6cc_mcastgrp.sin6_addr),
 				    mfccp->mf6cc_parent, rt->mf6c_stall);
 
 			MRT6_DLOG(DEBUG_MFC, "o %s g %s p %x dbg %p",
 			    ip6_sprintf(ip6bufo,
 			    &mfccp->mf6cc_origin.sin6_addr),
 			    ip6_sprintf(ip6bufg,
 				&mfccp->mf6cc_mcastgrp.sin6_addr),
 			    mfccp->mf6cc_parent, rt->mf6c_stall);
 
 			rt->mf6c_origin     = mfccp->mf6cc_origin;
 			rt->mf6c_mcastgrp   = mfccp->mf6cc_mcastgrp;
 			rt->mf6c_parent     = mfccp->mf6cc_parent;
 			rt->mf6c_ifset	    = mfccp->mf6cc_ifset;
 			/* initialize pkt counters per src-grp */
 			rt->mf6c_pkt_cnt    = 0;
 			rt->mf6c_byte_cnt   = 0;
 			rt->mf6c_wrong_if   = 0;
 
 			rt->mf6c_expire = 0;	/* Don't clean this guy up */
 			n6expire[hash]--;
 
 			/* free packets Qed at the end of this entry */
 			for (rte = rt->mf6c_stall; rte != NULL; ) {
 				struct rtdetq *n = rte->next;
 				ip6_mdq(rte->m, rte->ifp, rt);
 				m_freem(rte->m);
 #ifdef UPCALL_TIMING
 				collate(&(rte->t));
 #endif /* UPCALL_TIMING */
 				free(rte, M_MRTABLE6);
 				rte = n;
 			}
 			rt->mf6c_stall = NULL;
 		}
 	}
 
 	/*
 	 * It is possible that an entry is being inserted without an upcall
 	 */
 	if (nstl == 0) {
 		MRT6_DLOG(DEBUG_MFC, "no upcall h %lu o %s g %s p %x", hash,
 		    ip6_sprintf(ip6bufo, &mfccp->mf6cc_origin.sin6_addr),
 		    ip6_sprintf(ip6bufg, &mfccp->mf6cc_mcastgrp.sin6_addr),
 		    mfccp->mf6cc_parent);
 
 		for (rt = mf6ctable[hash]; rt; rt = rt->mf6c_next) {
 
 			if (IN6_ARE_ADDR_EQUAL(&rt->mf6c_origin.sin6_addr,
 					       &mfccp->mf6cc_origin.sin6_addr)&&
 			    IN6_ARE_ADDR_EQUAL(&rt->mf6c_mcastgrp.sin6_addr,
 					       &mfccp->mf6cc_mcastgrp.sin6_addr)) {
 
 				rt->mf6c_origin     = mfccp->mf6cc_origin;
 				rt->mf6c_mcastgrp   = mfccp->mf6cc_mcastgrp;
 				rt->mf6c_parent     = mfccp->mf6cc_parent;
 				rt->mf6c_ifset	    = mfccp->mf6cc_ifset;
 				/* initialize pkt counters per src-grp */
 				rt->mf6c_pkt_cnt    = 0;
 				rt->mf6c_byte_cnt   = 0;
 				rt->mf6c_wrong_if   = 0;
 
 				if (rt->mf6c_expire)
 					n6expire[hash]--;
 				rt->mf6c_expire	   = 0;
 			}
 		}
 		if (rt == NULL) {
 			/* no upcall, so make a new entry */
 			rt = (struct mf6c *)malloc(sizeof(*rt), M_MRTABLE6,
 						  M_NOWAIT);
 			if (rt == NULL) {
 				MFC6_UNLOCK();
 				return (ENOBUFS);
 			}
 
 			/* insert new entry at head of hash chain */
 			rt->mf6c_origin     = mfccp->mf6cc_origin;
 			rt->mf6c_mcastgrp   = mfccp->mf6cc_mcastgrp;
 			rt->mf6c_parent     = mfccp->mf6cc_parent;
 			rt->mf6c_ifset	    = mfccp->mf6cc_ifset;
 			/* initialize pkt counters per src-grp */
 			rt->mf6c_pkt_cnt    = 0;
 			rt->mf6c_byte_cnt   = 0;
 			rt->mf6c_wrong_if   = 0;
 			rt->mf6c_expire     = 0;
 			rt->mf6c_stall = NULL;
 
 			/* link into table */
 			rt->mf6c_next  = mf6ctable[hash];
 			mf6ctable[hash] = rt;
 		}
 	}
 
 	MFC6_UNLOCK();
 	return (0);
 }
 
 #ifdef UPCALL_TIMING
 /*
  * collect delay statistics on the upcalls
  */
 static void
 collate(struct timeval *t)
 {
 	u_long d;
 	struct timeval tp;
 	u_long delta;
 
 	GET_TIME(tp);
 
 	if (TV_LT(*t, tp))
 	{
 		TV_DELTA(tp, *t, delta);
 
 		d = delta >> 10;
 		if (d > UPCALL_MAX)
 			d = UPCALL_MAX;
 
 		++upcall_data[d];
 	}
 }
 #endif /* UPCALL_TIMING */
 
 /*
  * Delete an mfc entry
  */
 static int
 del_m6fc(struct mf6cctl *mfccp)
 {
 #ifdef MRT6DEBUG
 	char ip6bufo[INET6_ADDRSTRLEN], ip6bufg[INET6_ADDRSTRLEN];
 #endif
 	struct sockaddr_in6	origin;
 	struct sockaddr_in6	mcastgrp;
 	struct mf6c		*rt;
 	struct mf6c		**nptr;
 	u_long		hash;
 
 	origin = mfccp->mf6cc_origin;
 	mcastgrp = mfccp->mf6cc_mcastgrp;
 	hash = MF6CHASH(origin.sin6_addr, mcastgrp.sin6_addr);
 
 	MRT6_DLOG(DEBUG_MFC, "orig %s mcastgrp %s",
 	    ip6_sprintf(ip6bufo, &origin.sin6_addr),
 	    ip6_sprintf(ip6bufg, &mcastgrp.sin6_addr));
 
 	MFC6_LOCK();
 
 	nptr = &mf6ctable[hash];
 	while ((rt = *nptr) != NULL) {
 		if (IN6_ARE_ADDR_EQUAL(&origin.sin6_addr,
 				       &rt->mf6c_origin.sin6_addr) &&
 		    IN6_ARE_ADDR_EQUAL(&mcastgrp.sin6_addr,
 				       &rt->mf6c_mcastgrp.sin6_addr) &&
 		    rt->mf6c_stall == NULL)
 			break;
 
 		nptr = &rt->mf6c_next;
 	}
 	if (rt == NULL) {
 		MFC6_UNLOCK();
 		return (EADDRNOTAVAIL);
 	}
 
 	*nptr = rt->mf6c_next;
 	free(rt, M_MRTABLE6);
 
 	MFC6_UNLOCK();
 
 	return (0);
 }
 
 static int
 socket_send(struct socket *s, struct mbuf *mm, struct sockaddr_in6 *src)
 {
 
 	if (s) {
 		if (sbappendaddr(&s->so_rcv,
 				 (struct sockaddr *)src,
 				 mm, (struct mbuf *)0) != 0) {
 			sorwakeup(s);
 			return (0);
 		}
 	}
 	m_freem(mm);
 	return (-1);
 }
 
 /*
  * IPv6 multicast forwarding function. This function assumes that the packet
  * pointed to by "ip6" has arrived on (or is about to be sent to) the interface
  * pointed to by "ifp", and the packet is to be relayed to other networks
  * that have members of the packet's destination IPv6 multicast group.
  *
  * The packet is returned unscathed to the caller, unless it is
  * erroneous, in which case a non-zero return value tells the caller to
  * discard it.
  *
  * NOTE: this implementation assumes that m->m_pkthdr.rcvif is NULL iff
  * this function is called in the originating context (i.e., not when
  * forwarding a packet from other node).  ip6_output(), which is currently the
  * only function that calls this function is called in the originating context,
  * explicitly ensures this condition.  It is caller's responsibility to ensure
  * that if this function is called from somewhere else in the originating
  * context in the future.
  */
 int
 X_ip6_mforward(struct ip6_hdr *ip6, struct ifnet *ifp, struct mbuf *m)
 {
 	struct rtdetq *rte;
 	struct mbuf *mb0;
 	struct mf6c *rt;
 	struct mif6 *mifp;
 	struct mbuf *mm;
 	u_long hash;
 	mifi_t mifi;
 	char ip6bufs[INET6_ADDRSTRLEN], ip6bufd[INET6_ADDRSTRLEN];
 #ifdef UPCALL_TIMING
 	struct timeval tp;
 
 	GET_TIME(tp);
 #endif /* UPCALL_TIMING */
 
 	MRT6_DLOG(DEBUG_FORWARD, "src %s, dst %s, ifindex %d",
 	    ip6_sprintf(ip6bufs, &ip6->ip6_src),
 	    ip6_sprintf(ip6bufd, &ip6->ip6_dst), ifp->if_index);
 
 	/*
 	 * Don't forward a packet with Hop limit of zero or one,
 	 * or a packet destined to a local-only group.
 	 */
 	if (ip6->ip6_hlim <= 1 || IN6_IS_ADDR_MC_INTFACELOCAL(&ip6->ip6_dst) ||
 	    IN6_IS_ADDR_MC_LINKLOCAL(&ip6->ip6_dst))
 		return (0);
 	ip6->ip6_hlim--;
 
 	/*
 	 * Source address check: do not forward packets with unspecified
 	 * source. It was discussed in July 2000, on ipngwg mailing list.
 	 * This is rather more serious than unicast cases, because some
 	 * MLD packets can be sent with the unspecified source address
 	 * (although such packets must normally set 1 to the hop limit field).
 	 */
 	if (IN6_IS_ADDR_UNSPECIFIED(&ip6->ip6_src)) {
 		IP6STAT_INC(ip6s_cantforward);
 		if (V_ip6_log_time + V_ip6_log_interval < time_uptime) {
 			V_ip6_log_time = time_uptime;
 			log(LOG_DEBUG,
 			    "cannot forward "
 			    "from %s to %s nxt %d received on %s\n",
 			    ip6_sprintf(ip6bufs, &ip6->ip6_src),
 			    ip6_sprintf(ip6bufd, &ip6->ip6_dst),
 			    ip6->ip6_nxt,
 			    if_name(m->m_pkthdr.rcvif));
 		}
 		return (0);
 	}
 
 	MFC6_LOCK();
 
 	/*
 	 * Determine forwarding mifs from the forwarding cache table
 	 */
 	MF6CFIND(ip6->ip6_src, ip6->ip6_dst, rt);
 	MRT6STAT_INC(mrt6s_mfc_lookups);
 
 	/* Entry exists, so forward if necessary */
 	if (rt) {
 		MFC6_UNLOCK();
 		return (ip6_mdq(m, ifp, rt));
 	}
 
 	/*
 	 * If we don't have a route for packet's origin,
 	 * Make a copy of the packet & send message to routing daemon.
 	 */
 	MRT6STAT_INC(mrt6s_no_route);
 	MRT6_DLOG(DEBUG_FORWARD | DEBUG_MFC, "no rte s %s g %s",
 	    ip6_sprintf(ip6bufs, &ip6->ip6_src),
 	    ip6_sprintf(ip6bufd, &ip6->ip6_dst));
 
 	/*
 	 * Allocate mbufs early so that we don't do extra work if we
 	 * are just going to fail anyway.
 	 */
 	rte = (struct rtdetq *)malloc(sizeof(*rte), M_MRTABLE6, M_NOWAIT);
 	if (rte == NULL) {
 		MFC6_UNLOCK();
 		return (ENOBUFS);
 	}
 	mb0 = m_copym(m, 0, M_COPYALL, M_NOWAIT);
 	/*
 	 * Pullup packet header if needed before storing it,
 	 * as other references may modify it in the meantime.
 	 */
 	if (mb0 && (!M_WRITABLE(mb0) || mb0->m_len < sizeof(struct ip6_hdr)))
 		mb0 = m_pullup(mb0, sizeof(struct ip6_hdr));
 	if (mb0 == NULL) {
 		free(rte, M_MRTABLE6);
 		MFC6_UNLOCK();
 		return (ENOBUFS);
 	}
 
 	/* is there an upcall waiting for this packet? */
 	hash = MF6CHASH(ip6->ip6_src, ip6->ip6_dst);
 	for (rt = mf6ctable[hash]; rt; rt = rt->mf6c_next) {
 		if (IN6_ARE_ADDR_EQUAL(&ip6->ip6_src,
 		    &rt->mf6c_origin.sin6_addr) &&
 		    IN6_ARE_ADDR_EQUAL(&ip6->ip6_dst,
 		    &rt->mf6c_mcastgrp.sin6_addr) && (rt->mf6c_stall != NULL))
 			break;
 	}
 
 	if (rt == NULL) {
 		struct mrt6msg *im;
 #ifdef MRT6_OINIT
 		struct omrt6msg *oim;
 #endif
 		/* no upcall, so make a new entry */
 		rt = (struct mf6c *)malloc(sizeof(*rt), M_MRTABLE6, M_NOWAIT);
 		if (rt == NULL) {
 			free(rte, M_MRTABLE6);
 			m_freem(mb0);
 			MFC6_UNLOCK();
 			return (ENOBUFS);
 		}
 		/*
 		 * Make a copy of the header to send to the user
 		 * level process
 		 */
 		mm = m_copym(mb0, 0, sizeof(struct ip6_hdr), M_NOWAIT);
 		if (mm == NULL) {
 			free(rte, M_MRTABLE6);
 			m_freem(mb0);
 			free(rt, M_MRTABLE6);
 			MFC6_UNLOCK();
 			return (ENOBUFS);
 		}
 
 		/*
 		 * Send message to routing daemon
 		 */
 		sin6.sin6_addr = ip6->ip6_src;
 		im = NULL;
 #ifdef MRT6_OINIT
 		oim = NULL;
 #endif
 		switch (V_ip6_mrouter_ver) {
 #ifdef MRT6_OINIT
 		case MRT6_OINIT:
 			oim = mtod(mm, struct omrt6msg *);
 			oim->im6_msgtype = MRT6MSG_NOCACHE;
 			oim->im6_mbz = 0;
 			break;
 #endif
 		case MRT6_INIT:
 			im = mtod(mm, struct mrt6msg *);
 			im->im6_msgtype = MRT6MSG_NOCACHE;
 			im->im6_mbz = 0;
 			break;
 		default:
 			free(rte, M_MRTABLE6);
 			m_freem(mb0);
 			free(rt, M_MRTABLE6);
 			MFC6_UNLOCK();
 			return (EINVAL);
 		}
 
 		MRT6_DLOG(DEBUG_FORWARD, "getting the iif info in the kernel");
 		for (mifp = mif6table, mifi = 0;
 		    mifi < nummifs && mifp->m6_ifp != ifp; mifp++, mifi++)
 				;
 
 		switch (V_ip6_mrouter_ver) {
 #ifdef MRT6_OINIT
 		case MRT6_OINIT:
 			oim->im6_mif = mifi;
 			break;
 #endif
 		case MRT6_INIT:
 			im->im6_mif = mifi;
 			break;
 		}
 
 		if (socket_send(V_ip6_mrouter, mm, &sin6) < 0) {
 			log(LOG_WARNING, "ip6_mforward: ip6_mrouter "
 			    "socket queue full\n");
 			MRT6STAT_INC(mrt6s_upq_sockfull);
 			free(rte, M_MRTABLE6);
 			m_freem(mb0);
 			free(rt, M_MRTABLE6);
 			MFC6_UNLOCK();
 			return (ENOBUFS);
 		}
 
 		MRT6STAT_INC(mrt6s_upcalls);
 
 		/* insert new entry at head of hash chain */
 		bzero(rt, sizeof(*rt));
 		rt->mf6c_origin.sin6_family = AF_INET6;
 		rt->mf6c_origin.sin6_len = sizeof(struct sockaddr_in6);
 		rt->mf6c_origin.sin6_addr = ip6->ip6_src;
 		rt->mf6c_mcastgrp.sin6_family = AF_INET6;
 		rt->mf6c_mcastgrp.sin6_len = sizeof(struct sockaddr_in6);
 		rt->mf6c_mcastgrp.sin6_addr = ip6->ip6_dst;
 		rt->mf6c_expire = UPCALL_EXPIRE;
 		n6expire[hash]++;
 		rt->mf6c_parent = MF6C_INCOMPLETE_PARENT;
 
 		/* link into table */
 		rt->mf6c_next  = mf6ctable[hash];
 		mf6ctable[hash] = rt;
 		/* Add this entry to the end of the queue */
 		rt->mf6c_stall = rte;
 	} else {
 		/* determine if q has overflowed */
 		struct rtdetq **p;
 		int npkts = 0;
 
 		for (p = &rt->mf6c_stall; *p != NULL; p = &(*p)->next)
 			if (++npkts > MAX_UPQ6) {
 				MRT6STAT_INC(mrt6s_upq_ovflw);
 				free(rte, M_MRTABLE6);
 				m_freem(mb0);
 				MFC6_UNLOCK();
 				return (0);
 			}
 
 		/* Add this entry to the end of the queue */
 		*p = rte;
 	}
 
 	rte->next = NULL;
 	rte->m = mb0;
 	rte->ifp = ifp;
 #ifdef UPCALL_TIMING
 	rte->t = tp;
 #endif /* UPCALL_TIMING */
 
 	MFC6_UNLOCK();
 
 	return (0);
 }
 
 /*
  * Clean up cache entries if upcalls are not serviced
  * Call from the Slow Timeout mechanism, every half second.
  */
 static void
 expire_upcalls(void *unused)
 {
 #ifdef MRT6DEBUG
 	char ip6bufo[INET6_ADDRSTRLEN], ip6bufg[INET6_ADDRSTRLEN];
 #endif
 	struct rtdetq *rte;
 	struct mf6c *mfc, **nptr;
 	u_long i;
 
 	MFC6_LOCK();
 	for (i = 0; i < MF6CTBLSIZ; i++) {
 		if (n6expire[i] == 0)
 			continue;
 		nptr = &mf6ctable[i];
 		while ((mfc = *nptr) != NULL) {
 			rte = mfc->mf6c_stall;
 			/*
 			 * Skip real cache entries
 			 * Make sure it wasn't marked to not expire (shouldn't happen)
 			 * If it expires now
 			 */
 			if (rte != NULL &&
 			    mfc->mf6c_expire != 0 &&
 			    --mfc->mf6c_expire == 0) {
 				MRT6_DLOG(DEBUG_EXPIRE, "expiring (%s %s)",
 				    ip6_sprintf(ip6bufo, &mfc->mf6c_origin.sin6_addr),
 				    ip6_sprintf(ip6bufg, &mfc->mf6c_mcastgrp.sin6_addr));
 				/*
 				 * drop all the packets
 				 * free the mbuf with the pkt, if, timing info
 				 */
 				do {
 					struct rtdetq *n = rte->next;
 					m_freem(rte->m);
 					free(rte, M_MRTABLE6);
 					rte = n;
 				} while (rte != NULL);
 				MRT6STAT_INC(mrt6s_cache_cleanups);
 				n6expire[i]--;
 
 				*nptr = mfc->mf6c_next;
 				free(mfc, M_MRTABLE6);
 			} else {
 				nptr = &mfc->mf6c_next;
 			}
 		}
 	}
 	MFC6_UNLOCK();
 	callout_reset(&expire_upcalls_ch, EXPIRE_TIMEOUT,
 	    expire_upcalls, NULL);
 }
 
 /*
  * Packet forwarding routine once entry in the cache is made
  */
 static int
 ip6_mdq(struct mbuf *m, struct ifnet *ifp, struct mf6c *rt)
 {
 	struct ip6_hdr *ip6 = mtod(m, struct ip6_hdr *);
 	mifi_t mifi, iif;
 	struct mif6 *mifp;
 	int plen = m->m_pkthdr.len;
 	struct in6_addr src0, dst0; /* copies for local work */
 	u_int32_t iszone, idzone, oszone, odzone;
 	int error = 0;
 
 /*
  * Macro to send packet on mif.  Since RSVP packets don't get counted on
  * input, they shouldn't get counted on output, so statistics keeping is
  * separate.
  */
 
 #define MC6_SEND(ip6, mifp, m) do {				\
 	if ((mifp)->m6_flags & MIFF_REGISTER)			\
 		register_send((ip6), (mifp), (m));		\
 	else							\
 		phyint_send((ip6), (mifp), (m));		\
 } while (/*CONSTCOND*/ 0)
 
 	/*
 	 * Don't forward if it didn't arrive from the parent mif
 	 * for its origin.
 	 */
 	mifi = rt->mf6c_parent;
 	if ((mifi >= nummifs) || (mif6table[mifi].m6_ifp != ifp)) {
 		/* came in the wrong interface */
 		MRT6_DLOG(DEBUG_FORWARD,
 		    "wrong if: ifid %d mifi %d mififid %x", ifp->if_index,
 		    mifi, mif6table[mifi].m6_ifp->if_index);
 		MRT6STAT_INC(mrt6s_wrong_if);
 		rt->mf6c_wrong_if++;
 		/*
 		 * If we are doing PIM processing, and we are forwarding
 		 * packets on this interface, send a message to the
 		 * routing daemon.
 		 */
 		/* have to make sure this is a valid mif */
 		if (mifi < nummifs && mif6table[mifi].m6_ifp)
 			if (V_pim6 && (m->m_flags & M_LOOP) == 0) {
 				/*
 				 * Check the M_LOOP flag to avoid an
 				 * unnecessary PIM assert.
 				 * XXX: M_LOOP is an ad-hoc hack...
 				 */
 				static struct sockaddr_in6 sin6 =
 				{ sizeof(sin6), AF_INET6 };
 
 				struct mbuf *mm;
 				struct mrt6msg *im;
 #ifdef MRT6_OINIT
 				struct omrt6msg *oim;
 #endif
 
 				mm = m_copym(m, 0, sizeof(struct ip6_hdr),
 				    M_NOWAIT);
 				if (mm &&
 				    (!M_WRITABLE(mm) ||
 				     mm->m_len < sizeof(struct ip6_hdr)))
 					mm = m_pullup(mm, sizeof(struct ip6_hdr));
 				if (mm == NULL)
 					return (ENOBUFS);
 
 #ifdef MRT6_OINIT
 				oim = NULL;
 #endif
 				im = NULL;
 				switch (V_ip6_mrouter_ver) {
 #ifdef MRT6_OINIT
 				case MRT6_OINIT:
 					oim = mtod(mm, struct omrt6msg *);
 					oim->im6_msgtype = MRT6MSG_WRONGMIF;
 					oim->im6_mbz = 0;
 					break;
 #endif
 				case MRT6_INIT:
 					im = mtod(mm, struct mrt6msg *);
 					im->im6_msgtype = MRT6MSG_WRONGMIF;
 					im->im6_mbz = 0;
 					break;
 				default:
 					m_freem(mm);
 					return (EINVAL);
 				}
 
 				for (mifp = mif6table, iif = 0;
 				     iif < nummifs && mifp &&
 					     mifp->m6_ifp != ifp;
 				     mifp++, iif++)
 					;
 
 				switch (V_ip6_mrouter_ver) {
 #ifdef MRT6_OINIT
 				case MRT6_OINIT:
 					oim->im6_mif = iif;
 					sin6.sin6_addr = oim->im6_src;
 					break;
 #endif
 				case MRT6_INIT:
 					im->im6_mif = iif;
 					sin6.sin6_addr = im->im6_src;
 					break;
 				}
 
 				MRT6STAT_INC(mrt6s_upcalls);
 
 				if (socket_send(V_ip6_mrouter, mm, &sin6) < 0) {
 					MRT6_DLOG(DEBUG_ANY,
 					    "ip6_mrouter socket queue full");
 					MRT6STAT_INC(mrt6s_upq_sockfull);
 					return (ENOBUFS);
 				}	/* if socket Q full */
 			}		/* if PIM */
 		return (0);
 	}			/* if wrong iif */
 
 	/* If I sourced this packet, it counts as output, else it was input. */
 	if (m->m_pkthdr.rcvif == NULL) {
 		/* XXX: is rcvif really NULL when output?? */
 		mif6table[mifi].m6_pkt_out++;
 		mif6table[mifi].m6_bytes_out += plen;
 	} else {
 		mif6table[mifi].m6_pkt_in++;
 		mif6table[mifi].m6_bytes_in += plen;
 	}
 	rt->mf6c_pkt_cnt++;
 	rt->mf6c_byte_cnt += plen;
 
 	/*
 	 * For each mif, forward a copy of the packet if there are group
 	 * members downstream on the interface.
 	 */
 	src0 = ip6->ip6_src;
 	dst0 = ip6->ip6_dst;
 	if ((error = in6_setscope(&src0, ifp, &iszone)) != 0 ||
 	    (error = in6_setscope(&dst0, ifp, &idzone)) != 0) {
 		IP6STAT_INC(ip6s_badscope);
 		return (error);
 	}
 	for (mifp = mif6table, mifi = 0; mifi < nummifs; mifp++, mifi++) {
 		if (IF_ISSET(mifi, &rt->mf6c_ifset)) {
 			/*
 			 * check if the outgoing packet is going to break
 			 * a scope boundary.
 			 * XXX For packets through PIM register tunnel
 			 * interface, we believe a routing daemon.
 			 */
 			if (!(mif6table[rt->mf6c_parent].m6_flags &
 			      MIFF_REGISTER) &&
 			    !(mif6table[mifi].m6_flags & MIFF_REGISTER)) {
 				if (in6_setscope(&src0, mif6table[mifi].m6_ifp,
 				    &oszone) ||
 				    in6_setscope(&dst0, mif6table[mifi].m6_ifp,
 				    &odzone) ||
 				    iszone != oszone ||
 				    idzone != odzone) {
 					IP6STAT_INC(ip6s_badscope);
 					continue;
 				}
 			}
 
 			mifp->m6_pkt_out++;
 			mifp->m6_bytes_out += plen;
 			MC6_SEND(ip6, mifp, m);
 		}
 	}
 	return (0);
 }
 
 static void
 phyint_send(struct ip6_hdr *ip6, struct mif6 *mifp, struct mbuf *m)
 {
 #ifdef MRT6DEBUG
 	char ip6bufs[INET6_ADDRSTRLEN], ip6bufd[INET6_ADDRSTRLEN];
 #endif
 	struct mbuf *mb_copy;
 	struct ifnet *ifp = mifp->m6_ifp;
 	int error = 0;
 	u_long linkmtu;
 
 	/*
 	 * Make a new reference to the packet; make sure that
 	 * the IPv6 header is actually copied, not just referenced,
 	 * so that ip6_output() only scribbles on the copy.
 	 */
 	mb_copy = m_copym(m, 0, M_COPYALL, M_NOWAIT);
 	if (mb_copy &&
 	    (!M_WRITABLE(mb_copy) || mb_copy->m_len < sizeof(struct ip6_hdr)))
 		mb_copy = m_pullup(mb_copy, sizeof(struct ip6_hdr));
 	if (mb_copy == NULL) {
 		return;
 	}
 	/* set MCAST flag to the outgoing packet */
 	mb_copy->m_flags |= M_MCAST;
 
 	/*
 	 * If we sourced the packet, call ip6_output since we may devide
 	 * the packet into fragments when the packet is too big for the
 	 * outgoing interface.
 	 * Otherwise, we can simply send the packet to the interface
 	 * sending queue.
 	 */
 	if (m->m_pkthdr.rcvif == NULL) {
 		struct ip6_moptions im6o;
 
 		im6o.im6o_multicast_ifp = ifp;
 		/* XXX: ip6_output will override ip6->ip6_hlim */
 		im6o.im6o_multicast_hlim = ip6->ip6_hlim;
 		im6o.im6o_multicast_loop = 1;
 		error = ip6_output(mb_copy, NULL, NULL, IPV6_FORWARDING, &im6o,
 		    NULL, NULL);
 
 		MRT6_DLOG(DEBUG_XMIT, "mif %u err %d",
 		    (uint16_t)(mifp - mif6table), error);
 		return;
 	}
 
 	/*
 	 * If configured to loop back multicasts by default,
 	 * loop back a copy now.
 	 */
 	if (in6_mcast_loop)
 		ip6_mloopback(ifp, m);
 
 	/*
 	 * Put the packet into the sending queue of the outgoing interface
 	 * if it would fit in the MTU of the interface.
 	 */
 	linkmtu = IN6_LINKMTU(ifp);
 	if (mb_copy->m_pkthdr.len <= linkmtu || linkmtu < IPV6_MMTU) {
 		struct sockaddr_in6 dst6;
 
 		bzero(&dst6, sizeof(dst6));
 		dst6.sin6_len = sizeof(struct sockaddr_in6);
 		dst6.sin6_family = AF_INET6;
 		dst6.sin6_addr = ip6->ip6_dst;
 
 		IP_PROBE(send, NULL, NULL, ip6, ifp, NULL, ip6);
 		/*
 		 * We just call if_output instead of nd6_output here, since
 		 * we need no ND for a multicast forwarded packet...right?
 		 */
 		m_clrprotoflags(m);	/* Avoid confusing lower layers. */
 		error = (*ifp->if_output)(ifp, mb_copy,
 		    (struct sockaddr *)&dst6, NULL);
 		MRT6_DLOG(DEBUG_XMIT, "mif %u err %d",
 		    (uint16_t)(mifp - mif6table), error);
 	} else {
 		/*
 		 * pMTU discovery is intentionally disabled by default, since
 		 * various router may notify pMTU in multicast, which can be
 		 * a DDoS to a router
 		 */
 		if (V_ip6_mcast_pmtu)
 			icmp6_error(mb_copy, ICMP6_PACKET_TOO_BIG, 0, linkmtu);
 		else {
 			MRT6_DLOG(DEBUG_XMIT, " packet too big on %s o %s "
 			    "g %s size %d (discarded)", if_name(ifp),
 			    ip6_sprintf(ip6bufs, &ip6->ip6_src),
 			    ip6_sprintf(ip6bufd, &ip6->ip6_dst),
 			    mb_copy->m_pkthdr.len);
 			m_freem(mb_copy); /* simply discard the packet */
 		}
 	}
 }
 
 static int
 register_send(struct ip6_hdr *ip6, struct mif6 *mif, struct mbuf *m)
 {
 #ifdef MRT6DEBUG
 	char ip6bufs[INET6_ADDRSTRLEN], ip6bufd[INET6_ADDRSTRLEN];
 #endif
 	struct mbuf *mm;
 	int i, len = m->m_pkthdr.len;
 	static struct sockaddr_in6 sin6 = { sizeof(sin6), AF_INET6 };
 	struct mrt6msg *im6;
 
 	MRT6_DLOG(DEBUG_ANY, "src %s dst %s",
 	    ip6_sprintf(ip6bufs, &ip6->ip6_src),
 	    ip6_sprintf(ip6bufd, &ip6->ip6_dst));
 	PIM6STAT_INC(pim6s_snd_registers);
 
 	/* Make a copy of the packet to send to the user level process. */
 	mm = m_gethdr(M_NOWAIT, MT_DATA);
 	if (mm == NULL)
 		return (ENOBUFS);
 	mm->m_data += max_linkhdr;
 	mm->m_len = sizeof(struct ip6_hdr);
 
 	if ((mm->m_next = m_copym(m, 0, M_COPYALL, M_NOWAIT)) == NULL) {
 		m_freem(mm);
 		return (ENOBUFS);
 	}
 	i = MHLEN - M_LEADINGSPACE(mm);
 	if (i > len)
 		i = len;
 	mm = m_pullup(mm, i);
 	if (mm == NULL)
 		return (ENOBUFS);
 /* TODO: check it! */
 	mm->m_pkthdr.len = len + sizeof(struct ip6_hdr);
 
 	/*
 	 * Send message to routing daemon
 	 */
 	sin6.sin6_addr = ip6->ip6_src;
 
 	im6 = mtod(mm, struct mrt6msg *);
 	im6->im6_msgtype      = MRT6MSG_WHOLEPKT;
 	im6->im6_mbz          = 0;
 
 	im6->im6_mif = mif - mif6table;
 
 	/* iif info is not given for reg. encap.n */
 	MRT6STAT_INC(mrt6s_upcalls);
 
 	if (socket_send(V_ip6_mrouter, mm, &sin6) < 0) {
 		MRT6_DLOG(DEBUG_ANY, "ip6_mrouter socket queue full");
 		MRT6STAT_INC(mrt6s_upq_sockfull);
 		return (ENOBUFS);
 	}
 	return (0);
 }
 
 /*
  * pim6_encapcheck() is called by the encap6_input() path at runtime to
  * determine if a packet is for PIM; allowing PIM to be dynamically loaded
  * into the kernel.
  */
 static int
-pim6_encapcheck(const struct mbuf *m, int off, int proto, void *arg)
+pim6_encapcheck(const struct mbuf *m __unused, int off __unused,
+    int proto __unused, void *arg __unused)
 {
 
-#ifdef DIAGNOSTIC
     KASSERT(proto == IPPROTO_PIM, ("not for IPPROTO_PIM"));
-#endif
-    if (proto != IPPROTO_PIM)
-	return 0;	/* not for us; reject the datagram. */
-
-    return 64;		/* claim the datagram. */
+    return (8);		/* claim the datagram. */
 }
 
 /*
  * PIM sparse mode hook
  * Receives the pim control messages, and passes them up to the listening
  * socket, using rip6_input.
  * The only message processed is the REGISTER pim message; the pim header
  * is stripped off, and the inner packet is passed to register_mforward.
  */
-int
-pim6_input(struct mbuf **mp, int *offp, int proto)
+static int
+pim6_input(struct mbuf *m, int off, int proto, void *arg __unused)
 {
 	struct pim *pim; /* pointer to a pim struct */
 	struct ip6_hdr *ip6;
 	int pimlen;
-	struct mbuf *m = *mp;
 	int minlen;
-	int off = *offp;
 
 	PIM6STAT_INC(pim6s_rcv_total);
 
 	ip6 = mtod(m, struct ip6_hdr *);
-	pimlen = m->m_pkthdr.len - *offp;
+	pimlen = m->m_pkthdr.len - off;
 
 	/*
 	 * Validate lengths
 	 */
 	if (pimlen < PIM_MINLEN) {
 		PIM6STAT_INC(pim6s_rcv_tooshort);
 		MRT6_DLOG(DEBUG_PIM, "PIM packet too short");
 		m_freem(m);
 		return (IPPROTO_DONE);
 	}
 
 	/*
 	 * if the packet is at least as big as a REGISTER, go ahead
 	 * and grab the PIM REGISTER header size, to avoid another
 	 * possible m_pullup() later.
 	 *
 	 * PIM_MINLEN       == pimhdr + u_int32 == 8
 	 * PIM6_REG_MINLEN   == pimhdr + reghdr + eip6hdr == 4 + 4 + 40
 	 */
 	minlen = (pimlen >= PIM6_REG_MINLEN) ? PIM6_REG_MINLEN : PIM_MINLEN;
 
 	/*
 	 * Make sure that the IP6 and PIM headers in contiguous memory, and
 	 * possibly the PIM REGISTER header
 	 */
 #ifndef PULLDOWN_TEST
 	IP6_EXTHDR_CHECK(m, off, minlen, IPPROTO_DONE);
 	/* adjust pointer */
 	ip6 = mtod(m, struct ip6_hdr *);
 
 	/* adjust mbuf to point to the PIM header */
 	pim = (struct pim *)((caddr_t)ip6 + off);
 #else
 	IP6_EXTHDR_GET(pim, struct pim *, m, off, minlen);
 	if (pim == NULL) {
 		PIM6STAT_INC(pim6s_rcv_tooshort);
 		return (IPPROTO_DONE);
 	}
 #endif
 
 #define PIM6_CHECKSUM
 #ifdef PIM6_CHECKSUM
 	{
 		int cksumlen;
 
 		/*
 		 * Validate checksum.
 		 * If PIM REGISTER, exclude the data packet
 		 */
 		if (pim->pim_type == PIM_REGISTER)
 			cksumlen = PIM_MINLEN;
 		else
 			cksumlen = pimlen;
 
 		if (in6_cksum(m, IPPROTO_PIM, off, cksumlen)) {
 			PIM6STAT_INC(pim6s_rcv_badsum);
 			MRT6_DLOG(DEBUG_PIM, "invalid checksum");
 			m_freem(m);
 			return (IPPROTO_DONE);
 		}
 	}
 #endif /* PIM_CHECKSUM */
 
 	/* PIM version check */
 	if (pim->pim_ver != PIM_VERSION) {
 		PIM6STAT_INC(pim6s_rcv_badversion);
 		MRT6_DLOG(DEBUG_ANY | DEBUG_ERR,
 		    "incorrect version %d, expecting %d",
 		    pim->pim_ver, PIM_VERSION);
 		m_freem(m);
 		return (IPPROTO_DONE);
 	}
 
 	if (pim->pim_type == PIM_REGISTER) {
 		/*
 		 * since this is a REGISTER, we'll make a copy of the register
 		 * headers ip6+pim+u_int32_t+encap_ip6, to be passed up to the
 		 * routing daemon.
 		 */
 		static struct sockaddr_in6 dst = { sizeof(dst), AF_INET6 };
 
 		struct mbuf *mcp;
 		struct ip6_hdr *eip6;
 		u_int32_t *reghdr;
 		int rc;
 #ifdef MRT6DEBUG
 		char ip6bufs[INET6_ADDRSTRLEN], ip6bufd[INET6_ADDRSTRLEN];
 #endif
 
 		PIM6STAT_INC(pim6s_rcv_registers);
 
 		if ((reg_mif_num >= nummifs) || (reg_mif_num == (mifi_t) -1)) {
 			MRT6_DLOG(DEBUG_PIM, "register mif not set: %d",
 			    reg_mif_num);
 			m_freem(m);
 			return (IPPROTO_DONE);
 		}
 
 		reghdr = (u_int32_t *)(pim + 1);
 
 		if ((ntohl(*reghdr) & PIM_NULL_REGISTER))
 			goto pim6_input_to_daemon;
 
 		/*
 		 * Validate length
 		 */
 		if (pimlen < PIM6_REG_MINLEN) {
 			PIM6STAT_INC(pim6s_rcv_tooshort);
 			PIM6STAT_INC(pim6s_rcv_badregisters);
 			MRT6_DLOG(DEBUG_ANY | DEBUG_ERR, "register packet "
 			    "size too small %d from %s",
 			    pimlen, ip6_sprintf(ip6bufs, &ip6->ip6_src));
 			m_freem(m);
 			return (IPPROTO_DONE);
 		}
 
 		eip6 = (struct ip6_hdr *) (reghdr + 1);
 		MRT6_DLOG(DEBUG_PIM, "eip6: %s -> %s, eip6 plen %d",
 		    ip6_sprintf(ip6bufs, &eip6->ip6_src),
 		    ip6_sprintf(ip6bufd, &eip6->ip6_dst),
 		    ntohs(eip6->ip6_plen));
 
 		/* verify the version number of the inner packet */
 		if ((eip6->ip6_vfc & IPV6_VERSION_MASK) != IPV6_VERSION) {
 			PIM6STAT_INC(pim6s_rcv_badregisters);
 			MRT6_DLOG(DEBUG_ANY, "invalid IP version (%d) "
 			    "of the inner packet",
 			    (eip6->ip6_vfc & IPV6_VERSION));
 			m_freem(m);
 			return (IPPROTO_DONE);
 		}
 
 		/* verify the inner packet is destined to a mcast group */
 		if (!IN6_IS_ADDR_MULTICAST(&eip6->ip6_dst)) {
 			PIM6STAT_INC(pim6s_rcv_badregisters);
 			MRT6_DLOG(DEBUG_PIM, "inner packet of register "
 			    "is not multicast %s",
 			    ip6_sprintf(ip6bufd, &eip6->ip6_dst));
 			m_freem(m);
 			return (IPPROTO_DONE);
 		}
 
 		/*
 		 * make a copy of the whole header to pass to the daemon later.
 		 */
 		mcp = m_copym(m, 0, off + PIM6_REG_MINLEN, M_NOWAIT);
 		if (mcp == NULL) {
 			MRT6_DLOG(DEBUG_ANY | DEBUG_ERR, "pim register: "
 			    "could not copy register head");
 			m_freem(m);
 			return (IPPROTO_DONE);
 		}
 
 		/*
 		 * forward the inner ip6 packet; point m_data at the inner ip6.
 		 */
 		m_adj(m, off + PIM_MINLEN);
 		MRT6_DLOG(DEBUG_PIM, "forwarding decapsulated register: "
 		    "src %s, dst %s, mif %d",
 		    ip6_sprintf(ip6bufs, &eip6->ip6_src),
 		    ip6_sprintf(ip6bufd, &eip6->ip6_dst), reg_mif_num);
 
 		rc = if_simloop(mif6table[reg_mif_num].m6_ifp, m,
 				dst.sin6_family, 0);
 
 		/* prepare the register head to send to the mrouting daemon */
 		m = mcp;
 	}
 
 	/*
 	 * Pass the PIM message up to the daemon; if it is a register message
 	 * pass the 'head' only up to the daemon. This includes the
 	 * encapsulator ip6 header, pim header, register header and the
 	 * encapsulated ip6 header.
 	 */
   pim6_input_to_daemon:
-	rip6_input(&m, offp, proto);
-	return (IPPROTO_DONE);
+	return (rip6_input(&m, &off, proto));
 }
 
 static int
 ip6_mroute_modevent(module_t mod, int type, void *unused)
 {
 
 	switch (type) {
 	case MOD_LOAD:
 		MROUTER6_LOCK_INIT();
 		MFC6_LOCK_INIT();
 		MIF6_LOCK_INIT();
 
-		pim6_encap_cookie = encap_attach_func(AF_INET6, IPPROTO_PIM,
-			pim6_encapcheck,
-			(const struct protosw *)&in6_pim_protosw, NULL);
+		pim6_encap_cookie = ip6_encap_attach(&ipv6_encap_cfg,
+		    NULL, M_WAITOK);
 		if (pim6_encap_cookie == NULL) {
 			printf("ip6_mroute: unable to attach pim6 encap\n");
 			MIF6_LOCK_DESTROY();
 			MFC6_LOCK_DESTROY();
 			MROUTER6_LOCK_DESTROY();
 			return (EINVAL);
 		}
 
 		ip6_mforward = X_ip6_mforward;
 		ip6_mrouter_done = X_ip6_mrouter_done;
 		ip6_mrouter_get = X_ip6_mrouter_get;
 		ip6_mrouter_set = X_ip6_mrouter_set;
 		mrt6_ioctl = X_mrt6_ioctl;
 		break;
 
 	case MOD_UNLOAD:
 		if (V_ip6_mrouter != NULL)
 			return EINVAL;
 
 		if (pim6_encap_cookie) {
-			encap_detach(pim6_encap_cookie);
+			ip6_encap_detach(pim6_encap_cookie);
 			pim6_encap_cookie = NULL;
 		}
 		X_ip6_mrouter_done();
 		ip6_mforward = NULL;
 		ip6_mrouter_done = NULL;
 		ip6_mrouter_get = NULL;
 		ip6_mrouter_set = NULL;
 		mrt6_ioctl = NULL;
 
 		MIF6_LOCK_DESTROY();
 		MFC6_LOCK_DESTROY();
 		MROUTER6_LOCK_DESTROY();
 		break;
 
 	default:
 		return (EOPNOTSUPP);
 	}
 
 	return (0);
 }
 
 static moduledata_t ip6_mroutemod = {
 	"ip6_mroute",
 	ip6_mroute_modevent,
 	0
 };
 
 DECLARE_MODULE(ip6_mroute, ip6_mroutemod, SI_SUB_PROTO_MC, SI_ORDER_ANY);
diff --git a/sys/netinet6/pim6_var.h b/sys/netinet6/pim6_var.h
index 7afe89b9f844..7288c67eee22 100644
--- a/sys/netinet6/pim6_var.h
+++ b/sys/netinet6/pim6_var.h
@@ -1,65 +1,61 @@
 /*-
  * SPDX-License-Identifier: BSD-3-Clause
  *
  * Copyright (C) 1998 WIDE Project.
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 3. Neither the name of the project nor the names of its contributors
  *    may be used to endorse or promote products derived from this software
  *    without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE PROJECT AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE PROJECT OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  *	$KAME: pim6_var.h,v 1.8 2000/06/06 08:07:43 jinmei Exp $
  * $FreeBSD$
  */
 
 /*
  * Protocol Independent Multicast (PIM),
  * implementation-specific definitions.
  *
  * Written by George Edmond Eddy (Rusty), ISI, February 1998
  * Modified by Pavlin Ivanov Radoslavov, USC/ISI, May 1998
  */
 
 #ifndef _NETINET6_PIM6_VAR_H_
 #define _NETINET6_PIM6_VAR_H_
 
 struct pim6stat {
 	uint64_t pim6s_rcv_total;	/* total PIM messages received	*/
 	uint64_t pim6s_rcv_tooshort;	/* received with too few bytes	*/
 	uint64_t pim6s_rcv_badsum;	/* received with bad checksum	*/
 	uint64_t pim6s_rcv_badversion;	/* received bad PIM version	*/
 	uint64_t pim6s_rcv_registers;	/* received registers		*/
 	uint64_t pim6s_rcv_badregisters; /* received invalid registers	*/
 	uint64_t pim6s_snd_registers;	/* sent registers		*/
 };
 
-#if (defined(KERNEL)) || (defined(_KERNEL))
-int pim6_input(struct mbuf **, int*, int);
-#endif /* KERNEL */
-
 /*
  * Identifiers for PIM sysctl nodes
  */
 #define PIM6CTL_STATS		1	/* statistics (read-only) */
 
 #endif /* _NETINET6_PIM6_VAR_H_ */
diff --git a/sys/netipsec/xform_ipcomp.c b/sys/netipsec/xform_ipcomp.c
index 7226e31dc191..7a4a630bb693 100644
--- a/sys/netipsec/xform_ipcomp.c
+++ b/sys/netipsec/xform_ipcomp.c
@@ -1,787 +1,777 @@
 /*	$FreeBSD$	*/
 /* $OpenBSD: ip_ipcomp.c,v 1.1 2001/07/05 12:08:52 jjbg Exp $ */
 
 /*-
  * SPDX-License-Identifier: BSD-3-Clause
  *
  * Copyright (c) 2001 Jean-Jacques Bernard-Gundol (jj@wabbitt.org)
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  *
  * 1. Redistributions of source code must retain the above copyright
  *   notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *   notice, this list of conditions and the following disclaimer in the
  *   documentation and/or other materials provided with the distribution.
  * 3. The name of the author may not be used to endorse or promote products
  *   derived from this software without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  */
 
 /* IP payload compression protocol (IPComp), see RFC 2393 */
 #include "opt_inet.h"
 #include "opt_inet6.h"
 
 #include <sys/param.h>
 #include <sys/systm.h>
 #include <sys/mbuf.h>
 #include <sys/lock.h>
 #include <sys/mutex.h>
 #include <sys/socket.h>
 #include <sys/kernel.h>
 #include <sys/protosw.h>
 #include <sys/sysctl.h>
 
 #include <netinet/in.h>
 #include <netinet/in_systm.h>
 #include <netinet/ip.h>
 #include <netinet/ip_var.h>
 #include <netinet/ip_encap.h>
 
 #include <net/netisr.h>
 #include <net/vnet.h>
 
 #include <netipsec/ipsec.h>
 #include <netipsec/xform.h>
 
 #ifdef INET6
 #include <netinet/ip6.h>
 #include <netinet6/ip6_var.h>
 #include <netipsec/ipsec6.h>
 #endif
 
 #include <netipsec/ipcomp.h>
 #include <netipsec/ipcomp_var.h>
 
 #include <netipsec/key.h>
 #include <netipsec/key_debug.h>
 
 #include <opencrypto/cryptodev.h>
 #include <opencrypto/deflate.h>
 #include <opencrypto/xform.h>
 
 VNET_DEFINE(int, ipcomp_enable) = 1;
 VNET_PCPUSTAT_DEFINE(struct ipcompstat, ipcompstat);
 VNET_PCPUSTAT_SYSINIT(ipcompstat);
 
 #ifdef VIMAGE
 VNET_PCPUSTAT_SYSUNINIT(ipcompstat);
 #endif /* VIMAGE */
 
 SYSCTL_DECL(_net_inet_ipcomp);
 SYSCTL_INT(_net_inet_ipcomp, OID_AUTO, ipcomp_enable,
 	CTLFLAG_VNET | CTLFLAG_RW, &VNET_NAME(ipcomp_enable), 0, "");
 SYSCTL_VNET_PCPUSTAT(_net_inet_ipcomp, IPSECCTL_STATS, stats,
     struct ipcompstat, ipcompstat,
     "IPCOMP statistics (struct ipcompstat, netipsec/ipcomp_var.h");
 
 static int ipcomp_input_cb(struct cryptop *crp);
 static int ipcomp_output_cb(struct cryptop *crp);
 
 /*
  * RFC 3173 p 2.2. Non-Expansion Policy:
  * If the total size of a compressed payload and the IPComp header, as
  * defined in section 3, is not smaller than the size of the original
  * payload, the IP datagram MUST be sent in the original non-compressed
  * form.
  *
  * When we use IPComp in tunnel mode, for small packets we will receive
  * encapsulated IP-IP datagrams without any compression and without IPComp
  * header.
  */
 static int
 ipcomp_encapcheck(union sockaddr_union *src, union sockaddr_union *dst)
 {
 	struct secasvar *sav;
 
 	sav = key_allocsa_tunnel(src, dst, IPPROTO_IPCOMP);
 	if (sav == NULL)
 		return (0);
 	key_freesav(&sav);
 
 	if (src->sa.sa_family == AF_INET)
 		return (sizeof(struct in_addr) << 4);
 	else
 		return (sizeof(struct in6_addr) << 4);
 }
 
 static int
-ipcomp_nonexp_input(struct mbuf **mp, int *offp, int proto)
+ipcomp_nonexp_input(struct mbuf *m, int off, int proto, void *arg __unused)
 {
 	int isr;
 
 	switch (proto) {
 #ifdef INET
 	case IPPROTO_IPV4:
 		isr = NETISR_IP;
 		break;
 #endif
 #ifdef INET6
 	case IPPROTO_IPV6:
 		isr = NETISR_IPV6;
 		break;
 #endif
 	default:
 		IPCOMPSTAT_INC(ipcomps_nopf);
-		m_freem(*mp);
+		m_freem(m);
 		return (IPPROTO_DONE);
 	}
-	m_adj(*mp, *offp);
-	IPCOMPSTAT_ADD(ipcomps_ibytes, (*mp)->m_pkthdr.len);
+	m_adj(m, off);
+	IPCOMPSTAT_ADD(ipcomps_ibytes, m->m_pkthdr.len);
 	IPCOMPSTAT_INC(ipcomps_input);
-	netisr_dispatch(isr, *mp);
+	netisr_dispatch(isr, m);
 	return (IPPROTO_DONE);
 }
 
 /*
  * ipcomp_init() is called when an CPI is being set up.
  */
 static int
 ipcomp_init(struct secasvar *sav, struct xformsw *xsp)
 {
 	const struct comp_algo *tcomp;
 	struct cryptoini cric;
 
 	/* NB: algorithm really comes in alg_enc and not alg_comp! */
 	tcomp = comp_algorithm_lookup(sav->alg_enc);
 	if (tcomp == NULL) {
 		DPRINTF(("%s: unsupported compression algorithm %d\n", __func__,
 			 sav->alg_comp));
 		return EINVAL;
 	}
 	sav->alg_comp = sav->alg_enc;		/* set for doing histogram */
 	sav->tdb_xform = xsp;
 	sav->tdb_compalgxform = tcomp;
 
 	/* Initialize crypto session */
 	bzero(&cric, sizeof (cric));
 	cric.cri_alg = sav->tdb_compalgxform->type;
 
 	return crypto_newsession(&sav->tdb_cryptoid, &cric, V_crypto_support);
 }
 
 /*
  * ipcomp_zeroize() used when IPCA is deleted
  */
 static int
 ipcomp_zeroize(struct secasvar *sav)
 {
 	int err;
 
 	err = crypto_freesession(sav->tdb_cryptoid);
 	sav->tdb_cryptoid = 0;
 	return err;
 }
 
 /*
  * ipcomp_input() gets called to uncompress an input packet
  */
 static int
 ipcomp_input(struct mbuf *m, struct secasvar *sav, int skip, int protoff)
 {
 	struct xform_data *xd;
 	struct cryptodesc *crdc;
 	struct cryptop *crp;
 	struct ipcomp *ipcomp;
 	caddr_t addr;
 	int error, hlen = IPCOMP_HLENGTH;
 
 	/*
 	 * Check that the next header of the IPComp is not IPComp again, before
 	 * doing any real work.  Given it is not possible to do double
 	 * compression it means someone is playing tricks on us.
 	 */
 	error = ENOBUFS;
 	if (m->m_len < skip + hlen && (m = m_pullup(m, skip + hlen)) == NULL) {
 		IPCOMPSTAT_INC(ipcomps_hdrops);		/*XXX*/
 		DPRINTF(("%s: m_pullup failed\n", __func__));
 		key_freesav(&sav);
 		return (error);
 	}
 	addr = (caddr_t) mtod(m, struct ip *) + skip;
 	ipcomp = (struct ipcomp *)addr;
 	if (ipcomp->comp_nxt == IPPROTO_IPCOMP) {
 		IPCOMPSTAT_INC(ipcomps_pdrops);	/* XXX have our own stats? */
 		DPRINTF(("%s: recursive compression detected\n", __func__));
 		error = EINVAL;
 		goto bad;
 	}
 
 	/* Get crypto descriptors */
 	crp = crypto_getreq(1);
 	if (crp == NULL) {
 		DPRINTF(("%s: no crypto descriptors\n", __func__));
 		IPCOMPSTAT_INC(ipcomps_crypto);
 		goto bad;
 	}
 	/* Get IPsec-specific opaque pointer */
 	xd = malloc(sizeof(*xd), M_XDATA, M_NOWAIT | M_ZERO);
 	if (xd == NULL) {
 		DPRINTF(("%s: cannot allocate xform_data\n", __func__));
 		IPCOMPSTAT_INC(ipcomps_crypto);
 		crypto_freereq(crp);
 		goto bad;
 	}
 	crdc = crp->crp_desc;
 
 	crdc->crd_skip = skip + hlen;
 	crdc->crd_len = m->m_pkthdr.len - (skip + hlen);
 	crdc->crd_inject = skip;
 
 	/* Decompression operation */
 	crdc->crd_alg = sav->tdb_compalgxform->type;
 
 
 	/* Crypto operation descriptor */
 	crp->crp_ilen = m->m_pkthdr.len - (skip + hlen);
 	crp->crp_flags = CRYPTO_F_IMBUF | CRYPTO_F_CBIFSYNC;
 	crp->crp_buf = (caddr_t) m;
 	crp->crp_callback = ipcomp_input_cb;
 	crp->crp_opaque = (caddr_t) xd;
 
 	/* These are passed as-is to the callback */
 	xd->sav = sav;
 	xd->protoff = protoff;
 	xd->skip = skip;
 	xd->vnet = curvnet;
 
 	SECASVAR_LOCK(sav);
 	crp->crp_sid = xd->cryptoid = sav->tdb_cryptoid;
 	SECASVAR_UNLOCK(sav);
 
 	return crypto_dispatch(crp);
 bad:
 	m_freem(m);
 	key_freesav(&sav);
 	return (error);
 }
 
 /*
  * IPComp input callback from the crypto driver.
  */
 static int
 ipcomp_input_cb(struct cryptop *crp)
 {
 	IPSEC_DEBUG_DECLARE(char buf[IPSEC_ADDRSTRLEN]);
 	struct xform_data *xd;
 	struct mbuf *m;
 	struct secasvar *sav;
 	struct secasindex *saidx;
 	caddr_t addr;
 	uint64_t cryptoid;
 	int hlen = IPCOMP_HLENGTH, error, clen;
 	int skip, protoff;
 	uint8_t nproto;
 
 	m = (struct mbuf *) crp->crp_buf;
 	xd = (struct xform_data *) crp->crp_opaque;
 	CURVNET_SET(xd->vnet);
 	sav = xd->sav;
 	skip = xd->skip;
 	protoff = xd->protoff;
 	cryptoid = xd->cryptoid;
 	saidx = &sav->sah->saidx;
 	IPSEC_ASSERT(saidx->dst.sa.sa_family == AF_INET ||
 		saidx->dst.sa.sa_family == AF_INET6,
 		("unexpected protocol family %u", saidx->dst.sa.sa_family));
 
 	/* Check for crypto errors */
 	if (crp->crp_etype) {
 		if (crp->crp_etype == EAGAIN) {
 			/* Reset the session ID */
 			if (ipsec_updateid(sav, &crp->crp_sid, &cryptoid) != 0)
 				crypto_freesession(cryptoid);
 			xd->cryptoid = crp->crp_sid;
 			CURVNET_RESTORE();
 			return (crypto_dispatch(crp));
 		}
 		IPCOMPSTAT_INC(ipcomps_noxform);
 		DPRINTF(("%s: crypto error %d\n", __func__, crp->crp_etype));
 		error = crp->crp_etype;
 		goto bad;
 	}
 	/* Shouldn't happen... */
 	if (m == NULL) {
 		IPCOMPSTAT_INC(ipcomps_crypto);
 		DPRINTF(("%s: null mbuf returned from crypto\n", __func__));
 		error = EINVAL;
 		goto bad;
 	}
 	IPCOMPSTAT_INC(ipcomps_hist[sav->alg_comp]);
 
 	clen = crp->crp_olen;		/* Length of data after processing */
 
 	/* Release the crypto descriptors */
 	free(xd, M_XDATA), xd = NULL;
 	crypto_freereq(crp), crp = NULL;
 
 	/* In case it's not done already, adjust the size of the mbuf chain */
 	m->m_pkthdr.len = clen + hlen + skip;
 
 	if (m->m_len < skip + hlen && (m = m_pullup(m, skip + hlen)) == NULL) {
 		IPCOMPSTAT_INC(ipcomps_hdrops);		/*XXX*/
 		DPRINTF(("%s: m_pullup failed\n", __func__));
 		error = EINVAL;				/*XXX*/
 		goto bad;
 	}
 
 	/* Keep the next protocol field */
 	addr = (caddr_t) mtod(m, struct ip *) + skip;
 	nproto = ((struct ipcomp *) addr)->comp_nxt;
 
 	/* Remove the IPCOMP header */
 	error = m_striphdr(m, skip, hlen);
 	if (error) {
 		IPCOMPSTAT_INC(ipcomps_hdrops);
 		DPRINTF(("%s: bad mbuf chain, IPCA %s/%08lx\n", __func__,
 		    ipsec_address(&sav->sah->saidx.dst, buf, sizeof(buf)),
 		    (u_long) ntohl(sav->spi)));
 		goto bad;
 	}
 
 	/* Restore the Next Protocol field */
 	m_copyback(m, protoff, sizeof (u_int8_t), (u_int8_t *) &nproto);
 
 	switch (saidx->dst.sa.sa_family) {
 #ifdef INET6
 	case AF_INET6:
 		error = ipsec6_common_input_cb(m, sav, skip, protoff);
 		break;
 #endif
 #ifdef INET
 	case AF_INET:
 		error = ipsec4_common_input_cb(m, sav, skip, protoff);
 		break;
 #endif
 	default:
 		panic("%s: Unexpected address family: %d saidx=%p", __func__,
 		    saidx->dst.sa.sa_family, saidx);
 	}
 	CURVNET_RESTORE();
 	return error;
 bad:
 	CURVNET_RESTORE();
 	if (sav != NULL)
 		key_freesav(&sav);
 	if (m != NULL)
 		m_freem(m);
 	if (xd != NULL)
 		free(xd, M_XDATA);
 	if (crp != NULL)
 		crypto_freereq(crp);
 	return error;
 }
 
 /*
  * IPComp output routine, called by ipsec[46]_perform_request()
  */
 static int
 ipcomp_output(struct mbuf *m, struct secpolicy *sp, struct secasvar *sav,
     u_int idx, int skip, int protoff)
 {
 	IPSEC_DEBUG_DECLARE(char buf[IPSEC_ADDRSTRLEN]);
 	const struct comp_algo *ipcompx;
 	struct cryptodesc *crdc;
 	struct cryptop *crp;
 	struct xform_data *xd;
 	int error, ralen, maxpacketsize;
 
 	IPSEC_ASSERT(sav != NULL, ("null SA"));
 	ipcompx = sav->tdb_compalgxform;
 	IPSEC_ASSERT(ipcompx != NULL, ("null compression xform"));
 
 	/*
 	 * Do not touch the packet in case our payload to compress
 	 * is lower than the minimal threshold of the compression
 	 * alogrithm.  We will just send out the data uncompressed.
 	 * See RFC 3173, 2.2. Non-Expansion Policy.
 	 */
 	if (m->m_pkthdr.len <= ipcompx->minlen) {
 		IPCOMPSTAT_INC(ipcomps_threshold);
 		return ipsec_process_done(m, sp, sav, idx);
 	}
 
 	ralen = m->m_pkthdr.len - skip;	/* Raw payload length before comp. */
 	IPCOMPSTAT_INC(ipcomps_output);
 
 	/* Check for maximum packet size violations. */
 	switch (sav->sah->saidx.dst.sa.sa_family) {
 #ifdef INET
 	case AF_INET:
 		maxpacketsize = IP_MAXPACKET;
 		break;
 #endif /* INET */
 #ifdef INET6
 	case AF_INET6:
 		maxpacketsize = IPV6_MAXPACKET;
 		break;
 #endif /* INET6 */
 	default:
 		IPCOMPSTAT_INC(ipcomps_nopf);
 		DPRINTF(("%s: unknown/unsupported protocol family %d, "
 		    "IPCA %s/%08lx\n", __func__,
 		    sav->sah->saidx.dst.sa.sa_family,
 		    ipsec_address(&sav->sah->saidx.dst, buf, sizeof(buf)),
 		    (u_long) ntohl(sav->spi)));
 		error = EPFNOSUPPORT;
 		goto bad;
 	}
 	if (ralen + skip + IPCOMP_HLENGTH > maxpacketsize) {
 		IPCOMPSTAT_INC(ipcomps_toobig);
 		DPRINTF(("%s: packet in IPCA %s/%08lx got too big "
 		    "(len %u, max len %u)\n", __func__,
 		    ipsec_address(&sav->sah->saidx.dst, buf, sizeof(buf)),
 		    (u_long) ntohl(sav->spi),
 		    ralen + skip + IPCOMP_HLENGTH, maxpacketsize));
 		error = EMSGSIZE;
 		goto bad;
 	}
 
 	/* Update the counters */
 	IPCOMPSTAT_ADD(ipcomps_obytes, m->m_pkthdr.len - skip);
 
 	m = m_unshare(m, M_NOWAIT);
 	if (m == NULL) {
 		IPCOMPSTAT_INC(ipcomps_hdrops);
 		DPRINTF(("%s: cannot clone mbuf chain, IPCA %s/%08lx\n",
 		    __func__, ipsec_address(&sav->sah->saidx.dst, buf,
 		    sizeof(buf)), (u_long) ntohl(sav->spi)));
 		error = ENOBUFS;
 		goto bad;
 	}
 
 	/* Ok now, we can pass to the crypto processing. */
 
 	/* Get crypto descriptors */
 	crp = crypto_getreq(1);
 	if (crp == NULL) {
 		IPCOMPSTAT_INC(ipcomps_crypto);
 		DPRINTF(("%s: failed to acquire crypto descriptor\n",__func__));
 		error = ENOBUFS;
 		goto bad;
 	}
 	crdc = crp->crp_desc;
 
 	/* Compression descriptor */
 	crdc->crd_skip = skip;
 	crdc->crd_len = ralen;
 	crdc->crd_flags = CRD_F_COMP;
 	crdc->crd_inject = skip;
 
 	/* Compression operation */
 	crdc->crd_alg = ipcompx->type;
 
 	/* IPsec-specific opaque crypto info */
 	xd =  malloc(sizeof(struct xform_data), M_XDATA, M_NOWAIT | M_ZERO);
 	if (xd == NULL) {
 		IPCOMPSTAT_INC(ipcomps_crypto);
 		DPRINTF(("%s: failed to allocate xform_data\n", __func__));
 		crypto_freereq(crp);
 		error = ENOBUFS;
 		goto bad;
 	}
 
 	xd->sp = sp;
 	xd->sav = sav;
 	xd->idx = idx;
 	xd->skip = skip;
 	xd->protoff = protoff;
 	xd->vnet = curvnet;
 
 	/* Crypto operation descriptor */
 	crp->crp_ilen = m->m_pkthdr.len;	/* Total input length */
 	crp->crp_flags = CRYPTO_F_IMBUF | CRYPTO_F_CBIFSYNC;
 	crp->crp_buf = (caddr_t) m;
 	crp->crp_callback = ipcomp_output_cb;
 	crp->crp_opaque = (caddr_t) xd;
 
 	SECASVAR_LOCK(sav);
 	crp->crp_sid = xd->cryptoid = sav->tdb_cryptoid;
 	SECASVAR_UNLOCK(sav);
 
 	return crypto_dispatch(crp);
 bad:
 	if (m)
 		m_freem(m);
 	key_freesav(&sav);
 	key_freesp(&sp);
 	return (error);
 }
 
 /*
  * IPComp output callback from the crypto driver.
  */
 static int
 ipcomp_output_cb(struct cryptop *crp)
 {
 	IPSEC_DEBUG_DECLARE(char buf[IPSEC_ADDRSTRLEN]);
 	struct xform_data *xd;
 	struct secpolicy *sp;
 	struct secasvar *sav;
 	struct mbuf *m;
 	uint64_t cryptoid;
 	u_int idx;
 	int error, skip, protoff;
 
 	m = (struct mbuf *) crp->crp_buf;
 	xd = (struct xform_data *) crp->crp_opaque;
 	CURVNET_SET(xd->vnet);
 	idx = xd->idx;
 	sp = xd->sp;
 	sav = xd->sav;
 	skip = xd->skip;
 	protoff = xd->protoff;
 	cryptoid = xd->cryptoid;
 
 	/* Check for crypto errors */
 	if (crp->crp_etype) {
 		if (crp->crp_etype == EAGAIN) {
 			/* Reset the session ID */
 			if (ipsec_updateid(sav, &crp->crp_sid, &cryptoid) != 0)
 				crypto_freesession(cryptoid);
 			xd->cryptoid = crp->crp_sid;
 			CURVNET_RESTORE();
 			return (crypto_dispatch(crp));
 		}
 		IPCOMPSTAT_INC(ipcomps_noxform);
 		DPRINTF(("%s: crypto error %d\n", __func__, crp->crp_etype));
 		error = crp->crp_etype;
 		goto bad;
 	}
 	/* Shouldn't happen... */
 	if (m == NULL) {
 		IPCOMPSTAT_INC(ipcomps_crypto);
 		DPRINTF(("%s: bogus return buffer from crypto\n", __func__));
 		error = EINVAL;
 		goto bad;
 	}
 	IPCOMPSTAT_INC(ipcomps_hist[sav->alg_comp]);
 
 	if (crp->crp_ilen - skip > crp->crp_olen) {
 		struct mbuf *mo;
 		struct ipcomp *ipcomp;
 		int roff;
 		uint8_t prot;
 
 		/* Compression helped, inject IPCOMP header. */
 		mo = m_makespace(m, skip, IPCOMP_HLENGTH, &roff);
 		if (mo == NULL) {
 			IPCOMPSTAT_INC(ipcomps_wrap);
 			DPRINTF(("%s: IPCOMP header inject failed "
 			    "for IPCA %s/%08lx\n",
 			    __func__, ipsec_address(&sav->sah->saidx.dst, buf,
 			    sizeof(buf)), (u_long) ntohl(sav->spi)));
 			error = ENOBUFS;
 			goto bad;
 		}
 		ipcomp = (struct ipcomp *)(mtod(mo, caddr_t) + roff);
 
 		/* Initialize the IPCOMP header */
 		/* XXX alignment always correct? */
 		switch (sav->sah->saidx.dst.sa.sa_family) {
 #ifdef INET
 		case AF_INET:
 			ipcomp->comp_nxt = mtod(m, struct ip *)->ip_p;
 			break;
 #endif /* INET */
 #ifdef INET6
 		case AF_INET6:
 			ipcomp->comp_nxt = mtod(m, struct ip6_hdr *)->ip6_nxt;
 			break;
 #endif
 		}
 		ipcomp->comp_flags = 0;
 		ipcomp->comp_cpi = htons((u_int16_t) ntohl(sav->spi));
 
 		/* Fix Next Protocol in IPv4/IPv6 header */
 		prot = IPPROTO_IPCOMP;
 		m_copyback(m, protoff, sizeof(u_int8_t),
 		    (u_char *)&prot);
 
 		/* Adjust the length in the IP header */
 		switch (sav->sah->saidx.dst.sa.sa_family) {
 #ifdef INET
 		case AF_INET:
 			mtod(m, struct ip *)->ip_len = htons(m->m_pkthdr.len);
 			break;
 #endif /* INET */
 #ifdef INET6
 		case AF_INET6:
 			mtod(m, struct ip6_hdr *)->ip6_plen =
 				htons(m->m_pkthdr.len) - sizeof(struct ip6_hdr);
 			break;
 #endif /* INET6 */
 		default:
 			IPCOMPSTAT_INC(ipcomps_nopf);
 			DPRINTF(("%s: unknown/unsupported protocol "
 			    "family %d, IPCA %s/%08lx\n", __func__,
 			    sav->sah->saidx.dst.sa.sa_family,
 			    ipsec_address(&sav->sah->saidx.dst, buf,
 				sizeof(buf)), (u_long) ntohl(sav->spi)));
 			error = EPFNOSUPPORT;
 			goto bad;
 		}
 	} else {
 		/* Compression was useless, we have lost time. */
 		IPCOMPSTAT_INC(ipcomps_uncompr);
 		DPRINTF(("%s: compressions was useless %d - %d <= %d\n",
 		    __func__, crp->crp_ilen, skip, crp->crp_olen));
 		/* XXX remember state to not compress the next couple
 		 *     of packets, RFC 3173, 2.2. Non-Expansion Policy */
 	}
 
 	/* Release the crypto descriptor */
 	free(xd, M_XDATA);
 	crypto_freereq(crp);
 
 	/* NB: m is reclaimed by ipsec_process_done. */
 	error = ipsec_process_done(m, sp, sav, idx);
 	CURVNET_RESTORE();
 	return (error);
 bad:
 	if (m)
 		m_freem(m);
 	CURVNET_RESTORE();
 	free(xd, M_XDATA);
 	crypto_freereq(crp);
 	key_freesav(&sav);
 	key_freesp(&sp);
 	return (error);
 }
 
 #ifdef INET
-static const struct encaptab *ipe4_cookie = NULL;
-extern struct domain inetdomain;
-static struct protosw ipcomp4_protosw = {
-	.pr_type =	SOCK_RAW,
-	.pr_domain =	&inetdomain,
-	.pr_protocol =	0 /* IPPROTO_IPV[46] */,
-	.pr_flags =	PR_ATOMIC | PR_ADDR | PR_LASTHDR,
-	.pr_input =	ipcomp_nonexp_input,
-	.pr_output =	rip_output,
-	.pr_ctloutput =	rip_ctloutput,
-	.pr_usrreqs =	&rip_usrreqs
-};
-
 static int
 ipcomp4_nonexp_encapcheck(const struct mbuf *m, int off, int proto,
     void *arg __unused)
 {
 	union sockaddr_union src, dst;
 	const struct ip *ip;
 
 	if (V_ipcomp_enable == 0)
 		return (0);
 	if (proto != IPPROTO_IPV4 && proto != IPPROTO_IPV6)
 		return (0);
 	bzero(&src, sizeof(src));
 	bzero(&dst, sizeof(dst));
 	src.sa.sa_family = dst.sa.sa_family = AF_INET;
 	src.sin.sin_len = dst.sin.sin_len = sizeof(struct sockaddr_in);
 	ip = mtod(m, const struct ip *);
 	src.sin.sin_addr = ip->ip_src;
 	dst.sin.sin_addr = ip->ip_dst;
 	return (ipcomp_encapcheck(&src, &dst));
 }
+
+static const struct encaptab *ipe4_cookie = NULL;
+static const struct encap_config ipv4_encap_cfg = {
+	.proto = -1,
+	.min_length = sizeof(struct ip),
+	.exact_match = sizeof(in_addr_t) << 4,
+	.check = ipcomp4_nonexp_encapcheck,
+	.input = ipcomp_nonexp_input
+};
 #endif
 #ifdef INET6
-static const struct encaptab *ipe6_cookie = NULL;
-extern struct domain inet6domain;
-static struct protosw ipcomp6_protosw = {
-	.pr_type =	SOCK_RAW,
-	.pr_domain =	&inet6domain,
-	.pr_protocol =	0 /* IPPROTO_IPV[46] */,
-	.pr_flags =	PR_ATOMIC | PR_ADDR | PR_LASTHDR,
-	.pr_input =	ipcomp_nonexp_input,
-	.pr_output =	rip6_output,
-	.pr_ctloutput =	rip6_ctloutput,
-	.pr_usrreqs =	&rip6_usrreqs
-};
-
 static int
 ipcomp6_nonexp_encapcheck(const struct mbuf *m, int off, int proto,
     void *arg __unused)
 {
 	union sockaddr_union src, dst;
 	const struct ip6_hdr *ip6;
 
 	if (V_ipcomp_enable == 0)
 		return (0);
 	if (proto != IPPROTO_IPV4 && proto != IPPROTO_IPV6)
 		return (0);
 	bzero(&src, sizeof(src));
 	bzero(&dst, sizeof(dst));
 	src.sa.sa_family = dst.sa.sa_family = AF_INET;
 	src.sin6.sin6_len = dst.sin6.sin6_len = sizeof(struct sockaddr_in6);
 	ip6 = mtod(m, const struct ip6_hdr *);
 	src.sin6.sin6_addr = ip6->ip6_src;
 	dst.sin6.sin6_addr = ip6->ip6_dst;
 	if (IN6_IS_SCOPE_LINKLOCAL(&src.sin6.sin6_addr)) {
 		/* XXX: sa6_recoverscope() */
 		src.sin6.sin6_scope_id =
 		    ntohs(src.sin6.sin6_addr.s6_addr16[1]);
 		src.sin6.sin6_addr.s6_addr16[1] = 0;
 	}
 	if (IN6_IS_SCOPE_LINKLOCAL(&dst.sin6.sin6_addr)) {
 		/* XXX: sa6_recoverscope() */
 		dst.sin6.sin6_scope_id =
 		    ntohs(dst.sin6.sin6_addr.s6_addr16[1]);
 		dst.sin6.sin6_addr.s6_addr16[1] = 0;
 	}
 	return (ipcomp_encapcheck(&src, &dst));
 }
+
+static const struct encaptab *ipe6_cookie = NULL;
+static const struct encap_config ipv6_encap_cfg = {
+	.proto = -1,
+	.min_length = sizeof(struct ip6_hdr),
+	.exact_match = sizeof(struct in6_addr) << 4,
+	.check = ipcomp6_nonexp_encapcheck,
+	.input = ipcomp_nonexp_input
+};
 #endif
 
 static struct xformsw ipcomp_xformsw = {
 	.xf_type =	XF_IPCOMP,
 	.xf_name =	"IPcomp",
 	.xf_init =	ipcomp_init,
 	.xf_zeroize =	ipcomp_zeroize,
 	.xf_input =	ipcomp_input,
 	.xf_output =	ipcomp_output,
 };
 
 static void
 ipcomp_attach(void)
 {
 
 #ifdef INET
-	ipe4_cookie = encap_attach_func(AF_INET, -1,
-	    ipcomp4_nonexp_encapcheck, &ipcomp4_protosw, NULL);
+	ipe4_cookie = ip_encap_attach(&ipv4_encap_cfg, NULL, M_WAITOK);
 #endif
 #ifdef INET6
-	ipe6_cookie = encap_attach_func(AF_INET6, -1,
-	    ipcomp6_nonexp_encapcheck, &ipcomp6_protosw, NULL);
+	ipe6_cookie = ip6_encap_attach(&ipv6_encap_cfg, NULL, M_WAITOK);
 #endif
 	xform_attach(&ipcomp_xformsw);
 }
 
 static void
 ipcomp_detach(void)
 {
 
 #ifdef INET
-	encap_detach(ipe4_cookie);
+	ip_encap_detach(ipe4_cookie);
 #endif
 #ifdef INET6
-	encap_detach(ipe6_cookie);
+	ip6_encap_detach(ipe6_cookie);
 #endif
 	xform_detach(&ipcomp_xformsw);
 }
 
 SYSINIT(ipcomp_xform_init, SI_SUB_PROTO_DOMAIN, SI_ORDER_MIDDLE,
     ipcomp_attach, NULL);
 SYSUNINIT(ipcomp_xform_uninit, SI_SUB_PROTO_DOMAIN, SI_ORDER_MIDDLE,
     ipcomp_detach, NULL);