diff --git a/sys/net/if.c b/sys/net/if.c
index 24a97329d232..906f2256dd54 100644
--- a/sys/net/if.c
+++ b/sys/net/if.c
@@ -1,4786 +1,4772 @@
 /*-
  * SPDX-License-Identifier: BSD-3-Clause
  *
  * Copyright (c) 2010 Bjoern A. Zeeb <bz@FreeBSD.org>
  * Copyright (c) 1980, 1986, 1993
  *	The Regents of the University of California.  All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 3. Neither the name of the University nor the names of its contributors
  *    may be used to endorse or promote products derived from this software
  *    without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  *	@(#)if.c	8.5 (Berkeley) 1/9/95
  * $FreeBSD$
  */
 
 #include "opt_bpf.h"
 #include "opt_inet6.h"
 #include "opt_inet.h"
 #include "opt_ddb.h"
 
 #include <sys/param.h>
 #include <sys/capsicum.h>
 #include <sys/conf.h>
 #include <sys/eventhandler.h>
 #include <sys/malloc.h>
 #include <sys/domainset.h>
 #include <sys/sbuf.h>
 #include <sys/bus.h>
 #include <sys/epoch.h>
 #include <sys/mbuf.h>
 #include <sys/systm.h>
 #include <sys/priv.h>
 #include <sys/proc.h>
 #include <sys/socket.h>
 #include <sys/socketvar.h>
 #include <sys/protosw.h>
 #include <sys/kernel.h>
 #include <sys/lock.h>
 #include <sys/refcount.h>
 #include <sys/module.h>
 #include <sys/nv.h>
 #include <sys/rwlock.h>
 #include <sys/sockio.h>
 #include <sys/syslog.h>
 #include <sys/sysctl.h>
 #include <sys/sysent.h>
 #include <sys/taskqueue.h>
 #include <sys/domain.h>
 #include <sys/jail.h>
 #include <sys/priv.h>
 
 #ifdef DDB
 #include <ddb/ddb.h>
 #endif
 
 #include <machine/stdarg.h>
 #include <vm/uma.h>
 
 #include <net/bpf.h>
 #include <net/ethernet.h>
 #include <net/if.h>
 #include <net/if_arp.h>
 #include <net/if_clone.h>
 #include <net/if_dl.h>
 #include <net/if_types.h>
 #include <net/if_var.h>
 #include <net/if_media.h>
 #include <net/if_mib.h>
 #include <net/if_vlan_var.h>
 #include <net/radix.h>
 #include <net/route.h>
 #include <net/route/route_ctl.h>
 #include <net/vnet.h>
 
 #if defined(INET) || defined(INET6)
 #include <net/ethernet.h>
 #include <netinet/in.h>
 #include <netinet/in_var.h>
 #include <netinet/ip.h>
 #include <netinet/ip_carp.h>
 #ifdef INET
 #include <net/debugnet.h>
 #include <netinet/if_ether.h>
 #endif /* INET */
 #ifdef INET6
 #include <netinet6/in6_var.h>
 #include <netinet6/in6_ifattach.h>
 #endif /* INET6 */
 #endif /* INET || INET6 */
 
 #include <security/mac/mac_framework.h>
 
 /*
  * Consumers of struct ifreq such as tcpdump assume no pad between ifr_name
  * and ifr_ifru when it is used in SIOCGIFCONF.
  */
 _Static_assert(sizeof(((struct ifreq *)0)->ifr_name) ==
     offsetof(struct ifreq, ifr_ifru), "gap between ifr_name and ifr_ifru");
 
 __read_mostly epoch_t net_epoch_preempt;
 #ifdef COMPAT_FREEBSD32
 #include <sys/mount.h>
 #include <compat/freebsd32/freebsd32.h>
 
 struct ifreq_buffer32 {
 	uint32_t	length;		/* (size_t) */
 	uint32_t	buffer;		/* (void *) */
 };
 
 /*
  * Interface request structure used for socket
  * ioctl's.  All interface ioctl's must have parameter
  * definitions which begin with ifr_name.  The
  * remainder may be interface specific.
  */
 struct ifreq32 {
 	char	ifr_name[IFNAMSIZ];		/* if name, e.g. "en0" */
 	union {
 		struct sockaddr	ifru_addr;
 		struct sockaddr	ifru_dstaddr;
 		struct sockaddr	ifru_broadaddr;
 		struct ifreq_buffer32 ifru_buffer;
 		short		ifru_flags[2];
 		short		ifru_index;
 		int		ifru_jid;
 		int		ifru_metric;
 		int		ifru_mtu;
 		int		ifru_phys;
 		int		ifru_media;
 		uint32_t	ifru_data;
 		int		ifru_cap[2];
 		u_int		ifru_fib;
 		u_char		ifru_vlan_pcp;
 	} ifr_ifru;
 };
 CTASSERT(sizeof(struct ifreq) == sizeof(struct ifreq32));
 CTASSERT(__offsetof(struct ifreq, ifr_ifru) ==
     __offsetof(struct ifreq32, ifr_ifru));
 
 struct ifconf32 {
 	int32_t	ifc_len;
 	union {
 		uint32_t	ifcu_buf;
 		uint32_t	ifcu_req;
 	} ifc_ifcu;
 };
 #define	SIOCGIFCONF32	_IOWR('i', 36, struct ifconf32)
 
 struct ifdrv32 {
 	char		ifd_name[IFNAMSIZ];
 	uint32_t	ifd_cmd;
 	uint32_t	ifd_len;
 	uint32_t	ifd_data;
 };
 #define SIOCSDRVSPEC32	_IOC_NEWTYPE(SIOCSDRVSPEC, struct ifdrv32)
 #define SIOCGDRVSPEC32	_IOC_NEWTYPE(SIOCGDRVSPEC, struct ifdrv32)
 
 struct ifgroupreq32 {
 	char	ifgr_name[IFNAMSIZ];
 	u_int	ifgr_len;
 	union {
 		char		ifgru_group[IFNAMSIZ];
 		uint32_t	ifgru_groups;
 	} ifgr_ifgru;
 };
 #define	SIOCAIFGROUP32	_IOC_NEWTYPE(SIOCAIFGROUP, struct ifgroupreq32)
 #define	SIOCGIFGROUP32	_IOC_NEWTYPE(SIOCGIFGROUP, struct ifgroupreq32)
 #define	SIOCDIFGROUP32	_IOC_NEWTYPE(SIOCDIFGROUP, struct ifgroupreq32)
 #define	SIOCGIFGMEMB32	_IOC_NEWTYPE(SIOCGIFGMEMB, struct ifgroupreq32)
 
 struct ifmediareq32 {
 	char		ifm_name[IFNAMSIZ];
 	int		ifm_current;
 	int		ifm_mask;
 	int		ifm_status;
 	int		ifm_active;
 	int		ifm_count;
 	uint32_t	ifm_ulist;	/* (int *) */
 };
 #define	SIOCGIFMEDIA32	_IOC_NEWTYPE(SIOCGIFMEDIA, struct ifmediareq32)
 #define	SIOCGIFXMEDIA32	_IOC_NEWTYPE(SIOCGIFXMEDIA, struct ifmediareq32)
 #endif /* COMPAT_FREEBSD32 */
 
 union ifreq_union {
 	struct ifreq	ifr;
 #ifdef COMPAT_FREEBSD32
 	struct ifreq32	ifr32;
 #endif
 };
 
 SYSCTL_NODE(_net, PF_LINK, link, CTLFLAG_RW | CTLFLAG_MPSAFE, 0,
     "Link layers");
 SYSCTL_NODE(_net_link, 0, generic, CTLFLAG_RW | CTLFLAG_MPSAFE, 0,
     "Generic link-management");
 
 SYSCTL_INT(_net_link, OID_AUTO, ifqmaxlen, CTLFLAG_RDTUN,
     &ifqmaxlen, 0, "max send queue size");
 
 /* Log link state change events */
 static int log_link_state_change = 1;
 
 SYSCTL_INT(_net_link, OID_AUTO, log_link_state_change, CTLFLAG_RW,
 	&log_link_state_change, 0,
 	"log interface link state change events");
 
 /* Log promiscuous mode change events */
 static int log_promisc_mode_change = 1;
 
 SYSCTL_INT(_net_link, OID_AUTO, log_promisc_mode_change, CTLFLAG_RDTUN,
 	&log_promisc_mode_change, 1,
 	"log promiscuous mode change events");
 
 /* Interface description */
 static unsigned int ifdescr_maxlen = 1024;
 SYSCTL_UINT(_net, OID_AUTO, ifdescr_maxlen, CTLFLAG_RW,
 	&ifdescr_maxlen, 0,
 	"administrative maximum length for interface description");
 
 static MALLOC_DEFINE(M_IFDESCR, "ifdescr", "ifnet descriptions");
 
 /* global sx for non-critical path ifdescr */
 static struct sx ifdescr_sx;
 SX_SYSINIT(ifdescr_sx, &ifdescr_sx, "ifnet descr");
 
 void	(*ng_ether_link_state_p)(struct ifnet *ifp, int state);
 void	(*lagg_linkstate_p)(struct ifnet *ifp, int state);
 /* These are external hooks for CARP. */
 void	(*carp_linkstate_p)(struct ifnet *ifp);
 void	(*carp_demote_adj_p)(int, char *);
 int	(*carp_master_p)(struct ifaddr *);
 #if defined(INET) || defined(INET6)
 int	(*carp_forus_p)(struct ifnet *ifp, u_char *dhost);
 int	(*carp_output_p)(struct ifnet *ifp, struct mbuf *m,
     const struct sockaddr *sa);
 int	(*carp_ioctl_p)(struct ifreq *, u_long, struct thread *);   
 int	(*carp_attach_p)(struct ifaddr *, int);
 void	(*carp_detach_p)(struct ifaddr *, bool);
 #endif
 #ifdef INET
 int	(*carp_iamatch_p)(struct ifaddr *, uint8_t **);
 #endif
 #ifdef INET6
 struct ifaddr *(*carp_iamatch6_p)(struct ifnet *ifp, struct in6_addr *taddr6);
 caddr_t	(*carp_macmatch6_p)(struct ifnet *ifp, struct mbuf *m,
     const struct in6_addr *taddr);
 #endif
 
 struct mbuf *(*tbr_dequeue_ptr)(struct ifaltq *, int) = NULL;
 
 /*
  * XXX: Style; these should be sorted alphabetically, and unprototyped
  * static functions should be prototyped. Currently they are sorted by
  * declaration order.
  */
 static void	if_attachdomain(void *);
 static void	if_attachdomain1(struct ifnet *);
 static int	ifconf(u_long, caddr_t);
 static void	if_input_default(struct ifnet *, struct mbuf *);
 static int	if_requestencap_default(struct ifnet *, struct if_encap_req *);
 static void	if_route(struct ifnet *, int flag, int fam);
 static int	if_setflag(struct ifnet *, int, int, int *, int);
 static int	if_transmit(struct ifnet *ifp, struct mbuf *m);
 static void	if_unroute(struct ifnet *, int flag, int fam);
 static int	if_delmulti_locked(struct ifnet *, struct ifmultiaddr *, int);
 static void	do_link_state_change(void *, int);
 static int	if_getgroup(struct ifgroupreq *, struct ifnet *);
 static int	if_getgroupmembers(struct ifgroupreq *);
 static void	if_delgroups(struct ifnet *);
 static void	if_attach_internal(struct ifnet *, bool);
 static int	if_detach_internal(struct ifnet *, bool);
 static void	if_siocaddmulti(void *, int);
 static void	if_link_ifnet(struct ifnet *);
 static bool	if_unlink_ifnet(struct ifnet *, bool);
 #ifdef VIMAGE
 static int	if_vmove(struct ifnet *, struct vnet *);
 #endif
 
 #ifdef INET6
 /*
  * XXX: declare here to avoid to include many inet6 related files..
  * should be more generalized?
  */
 extern void	nd6_setmtu(struct ifnet *);
 #endif
 
 /* ipsec helper hooks */
 VNET_DEFINE(struct hhook_head *, ipsec_hhh_in[HHOOK_IPSEC_COUNT]);
 VNET_DEFINE(struct hhook_head *, ipsec_hhh_out[HHOOK_IPSEC_COUNT]);
 
 int	ifqmaxlen = IFQ_MAXLEN;
 VNET_DEFINE(struct ifnethead, ifnet);	/* depend on static init XXX */
 VNET_DEFINE(struct ifgrouphead, ifg_head);
 
 /* Table of ifnet by index. */
 static int if_index;
 static int if_indexlim = 8;
 static struct ifindex_entry {
 	struct ifnet	*ife_ifnet;
 	uint16_t	ife_gencnt;
 } *ifindex_table;
 
 SYSCTL_NODE(_net_link_generic, IFMIB_SYSTEM, system,
     CTLFLAG_RW | CTLFLAG_MPSAFE, 0,
     "Variables global to all interfaces");
 static int
 sysctl_ifcount(SYSCTL_HANDLER_ARGS)
 {
 	int rv = 0;
 
 	IFNET_RLOCK();
 	for (int i = 1; i <= if_index; i++)
 		if (ifindex_table[i].ife_ifnet != NULL &&
 		    ifindex_table[i].ife_ifnet->if_vnet == curvnet)
 			rv = i;
 	IFNET_RUNLOCK();
 
 	return (sysctl_handle_int(oidp, &rv, 0, req));
 }
 SYSCTL_PROC(_net_link_generic_system, IFMIB_IFCOUNT, ifcount,
     CTLTYPE_INT | CTLFLAG_VNET | CTLFLAG_RD, NULL, 0, sysctl_ifcount, "I",
     "Maximum known interface index");
 
 /*
  * The global network interface list (V_ifnet) and related state (such as
  * if_index, if_indexlim, and ifindex_table) are protected by an sxlock.
  * This may be acquired to stabilise the list, or we may rely on NET_EPOCH.
  */
 struct sx ifnet_sxlock;
 SX_SYSINIT_FLAGS(ifnet_sx, &ifnet_sxlock, "ifnet_sx", SX_RECURSE);
 
 struct sx ifnet_detach_sxlock;
 SX_SYSINIT_FLAGS(ifnet_detach, &ifnet_detach_sxlock, "ifnet_detach_sx",
     SX_RECURSE);
 
 #ifdef VIMAGE
 #define	VNET_IS_SHUTTING_DOWN(_vnet)					\
     ((_vnet)->vnet_shutdown && (_vnet)->vnet_state < SI_SUB_VNET_DONE)
 #endif
 
 static	if_com_alloc_t *if_com_alloc[256];
 static	if_com_free_t *if_com_free[256];
 
 static MALLOC_DEFINE(M_IFNET, "ifnet", "interface internals");
 MALLOC_DEFINE(M_IFADDR, "ifaddr", "interface address");
 MALLOC_DEFINE(M_IFMADDR, "ether_multi", "link-level multicast address");
 
 struct ifnet *
 ifnet_byindex(u_int idx)
 {
 	struct ifnet *ifp;
 
 	NET_EPOCH_ASSERT();
 
 	if (__predict_false(idx > if_index))
 		return (NULL);
 
 	ifp = ck_pr_load_ptr(&ifindex_table[idx].ife_ifnet);
 
 	if (curvnet != NULL && ifp != NULL && ifp->if_vnet != curvnet)
 		ifp = NULL;
 
 	return (ifp);
 }
 
 struct ifnet *
 ifnet_byindex_ref(u_int idx)
 {
 	struct ifnet *ifp;
 
 	ifp = ifnet_byindex(idx);
 	if (ifp == NULL || (ifp->if_flags & IFF_DYING))
 		return (NULL);
 	if (!if_try_ref(ifp))
 		return (NULL);
 	return (ifp);
 }
 
 struct ifnet *
 ifnet_byindexgen(uint16_t idx, uint16_t gen)
 {
 	struct ifnet *ifp;
 
 	NET_EPOCH_ASSERT();
 
 	if (__predict_false(idx > if_index))
 		return (NULL);
 
 	ifp = ck_pr_load_ptr(&ifindex_table[idx].ife_ifnet);
 
 	if (ifindex_table[idx].ife_gencnt == gen)
 		return (ifp);
 	else
 		return (NULL);
 }
 
 /*
  * Network interface utility routines.
  *
  * Routines with ifa_ifwith* names take sockaddr *'s as
  * parameters.
  */
 
 static void
 if_init(void *arg __unused)
 {
 
 	ifindex_table = malloc(if_indexlim * sizeof(*ifindex_table),
 	    M_IFNET, M_WAITOK | M_ZERO);
 }
 SYSINIT(if_init, SI_SUB_INIT_IF, SI_ORDER_SECOND, if_init, NULL);
 
 static void
 vnet_if_init(const void *unused __unused)
 {
 
 	CK_STAILQ_INIT(&V_ifnet);
 	CK_STAILQ_INIT(&V_ifg_head);
 	vnet_if_clone_init();
 }
 VNET_SYSINIT(vnet_if_init, SI_SUB_INIT_IF, SI_ORDER_SECOND, vnet_if_init,
     NULL);
 
 static void
 if_link_ifnet(struct ifnet *ifp)
 {
 
 	IFNET_WLOCK();
 	CK_STAILQ_INSERT_TAIL(&V_ifnet, ifp, if_link);
 #ifdef VIMAGE
 	curvnet->vnet_ifcnt++;
 #endif
 	IFNET_WUNLOCK();
 }
 
 static bool
 if_unlink_ifnet(struct ifnet *ifp, bool vmove)
 {
 	struct ifnet *iter;
 	int found = 0;
 
 	IFNET_WLOCK();
 	CK_STAILQ_FOREACH(iter, &V_ifnet, if_link)
 		if (iter == ifp) {
 			CK_STAILQ_REMOVE(&V_ifnet, ifp, ifnet, if_link);
 			if (!vmove)
 				ifp->if_flags |= IFF_DYING;
 			found = 1;
 			break;
 		}
 #ifdef VIMAGE
 	curvnet->vnet_ifcnt--;
 #endif
 	IFNET_WUNLOCK();
 
 	return (found);
 }
 
 #ifdef VIMAGE
 static void
 vnet_if_return(const void *unused __unused)
 {
 	struct ifnet *ifp, *nifp;
 	struct ifnet **pending;
 	int found __diagused;
 	int i;
 
 	i = 0;
 
 	/*
 	 * We need to protect our access to the V_ifnet tailq. Ordinarily we'd
 	 * enter NET_EPOCH, but that's not possible, because if_vmove() calls
 	 * if_detach_internal(), which waits for NET_EPOCH callbacks to
 	 * complete. We can't do that from within NET_EPOCH.
 	 *
 	 * However, we can also use the IFNET_xLOCK, which is the V_ifnet
 	 * read/write lock. We cannot hold the lock as we call if_vmove()
 	 * though, as that presents LOR w.r.t ifnet_sx, in_multi_sx and iflib
 	 * ctx lock.
 	 */
 	IFNET_WLOCK();
 
 	pending = malloc(sizeof(struct ifnet *) * curvnet->vnet_ifcnt,
 	    M_IFNET, M_WAITOK | M_ZERO);
 
 	/* Return all inherited interfaces to their parent vnets. */
 	CK_STAILQ_FOREACH_SAFE(ifp, &V_ifnet, if_link, nifp) {
 		if (ifp->if_home_vnet != ifp->if_vnet) {
 			found = if_unlink_ifnet(ifp, true);
 			MPASS(found);
 
 			pending[i++] = ifp;
 		}
 	}
 	IFNET_WUNLOCK();
 
 	for (int j = 0; j < i; j++) {
 		sx_xlock(&ifnet_detach_sxlock);
 		if_vmove(pending[j], pending[j]->if_home_vnet);
 		sx_xunlock(&ifnet_detach_sxlock);
 	}
 
 	free(pending, M_IFNET);
 }
 VNET_SYSUNINIT(vnet_if_return, SI_SUB_VNET_DONE, SI_ORDER_ANY,
     vnet_if_return, NULL);
 #endif
 
 /*
  * Allocate a struct ifnet and an index for an interface.  A layer 2
  * common structure will also be allocated if an allocation routine is
  * registered for the passed type.
  */
 static struct ifnet *
 if_alloc_domain(u_char type, int numa_domain)
 {
 	struct ifnet *ifp;
 	u_short idx;
 
 	KASSERT(numa_domain <= IF_NODOM, ("numa_domain too large"));
 	if (numa_domain == IF_NODOM)
 		ifp = malloc(sizeof(struct ifnet), M_IFNET,
 		    M_WAITOK | M_ZERO);
 	else
 		ifp = malloc_domainset(sizeof(struct ifnet), M_IFNET,
 		    DOMAINSET_PREF(numa_domain), M_WAITOK | M_ZERO);
 	ifp->if_type = type;
 	ifp->if_alloctype = type;
 	ifp->if_numa_domain = numa_domain;
 #ifdef VIMAGE
 	ifp->if_vnet = curvnet;
 #endif
 	if (if_com_alloc[type] != NULL) {
 		ifp->if_l2com = if_com_alloc[type](type, ifp);
 		KASSERT(ifp->if_l2com, ("%s: if_com_alloc[%u] failed", __func__,
 		    type));
 	}
 
 	IF_ADDR_LOCK_INIT(ifp);
 	TASK_INIT(&ifp->if_linktask, 0, do_link_state_change, ifp);
 	TASK_INIT(&ifp->if_addmultitask, 0, if_siocaddmulti, ifp);
 	ifp->if_afdata_initialized = 0;
 	IF_AFDATA_LOCK_INIT(ifp);
 	CK_STAILQ_INIT(&ifp->if_addrhead);
 	CK_STAILQ_INIT(&ifp->if_multiaddrs);
 	CK_STAILQ_INIT(&ifp->if_groups);
 #ifdef MAC
 	mac_ifnet_init(ifp);
 #endif
 	ifq_init(&ifp->if_snd, ifp);
 
 	refcount_init(&ifp->if_refcount, 1);	/* Index reference. */
 	for (int i = 0; i < IFCOUNTERS; i++)
 		ifp->if_counters[i] = counter_u64_alloc(M_WAITOK);
 	ifp->if_get_counter = if_get_counter_default;
 	ifp->if_pcp = IFNET_PCP_NONE;
 
 	/* Allocate an ifindex array entry. */
 	IFNET_WLOCK();
 	/*
 	 * Try to find an empty slot below if_index.  If we fail, take the
 	 * next slot.
 	 */
 	for (idx = 1; idx <= if_index; idx++) {
 		if (ifindex_table[idx].ife_ifnet == NULL)
 			break;
 	}
 
 	/* Catch if_index overflow. */
 	if (idx >= if_indexlim) {
 		struct ifindex_entry *new, *old;
 		int newlim;
 
 		newlim = if_indexlim * 2;
 		new = malloc(newlim * sizeof(*new), M_IFNET, M_WAITOK | M_ZERO);
 		memcpy(new, ifindex_table, if_indexlim * sizeof(*new));
 		old = ifindex_table;
 		ck_pr_store_ptr(&ifindex_table, new);
 		if_indexlim = newlim;
 		epoch_wait_preempt(net_epoch_preempt);
 		free(old, M_IFNET);
 	}
 	if (idx > if_index)
 		if_index = idx;
 
 	ifp->if_index = idx;
 	ifp->if_idxgen = ifindex_table[idx].ife_gencnt;
 	ck_pr_store_ptr(&ifindex_table[idx].ife_ifnet, ifp);
 	IFNET_WUNLOCK();
 
 	return (ifp);
 }
 
 struct ifnet *
 if_alloc_dev(u_char type, device_t dev)
 {
 	int numa_domain;
 
 	if (dev == NULL || bus_get_domain(dev, &numa_domain) != 0)
 		return (if_alloc_domain(type, IF_NODOM));
 	return (if_alloc_domain(type, numa_domain));
 }
 
 struct ifnet *
 if_alloc(u_char type)
 {
 
 	return (if_alloc_domain(type, IF_NODOM));
 }
 /*
  * Do the actual work of freeing a struct ifnet, and layer 2 common
  * structure.  This call is made when the network epoch guarantees
  * us that nobody holds a pointer to the interface.
  */
 static void
 if_free_deferred(epoch_context_t ctx)
 {
 	struct ifnet *ifp = __containerof(ctx, struct ifnet, if_epoch_ctx);
 
 	KASSERT((ifp->if_flags & IFF_DYING),
 	    ("%s: interface not dying", __func__));
 
 	if (if_com_free[ifp->if_alloctype] != NULL)
 		if_com_free[ifp->if_alloctype](ifp->if_l2com,
 		    ifp->if_alloctype);
 
 #ifdef MAC
 	mac_ifnet_destroy(ifp);
 #endif /* MAC */
 	IF_AFDATA_DESTROY(ifp);
 	IF_ADDR_LOCK_DESTROY(ifp);
 	ifq_delete(&ifp->if_snd);
 
 	for (int i = 0; i < IFCOUNTERS; i++)
 		counter_u64_free(ifp->if_counters[i]);
 
 	free(ifp->if_description, M_IFDESCR);
 	free(ifp->if_hw_addr, M_IFADDR);
 	free(ifp, M_IFNET);
 }
 
 /*
  * Deregister an interface and free the associated storage.
  */
 void
 if_free(struct ifnet *ifp)
 {
 
 	ifp->if_flags |= IFF_DYING;			/* XXX: Locking */
 
 	/*
 	 * XXXGL: An interface index is really an alias to ifp pointer.
 	 * Why would we clear the alias now, and not in the deferred
 	 * context?  Indeed there is nothing wrong with some network
 	 * thread obtaining ifp via ifnet_byindex() inside the network
 	 * epoch and then dereferencing ifp while we perform if_free(),
 	 * and after if_free() finished, too.
 	 *
 	 * This early index freeing was important back when ifindex was
 	 * virtualized and interface would outlive the vnet.
 	 */
 	IFNET_WLOCK();
 	MPASS(ifindex_table[ifp->if_index].ife_ifnet == ifp);
 	ck_pr_store_ptr(&ifindex_table[ifp->if_index].ife_ifnet, NULL);
 	ifindex_table[ifp->if_index].ife_gencnt++;
 	while (if_index > 0 && ifindex_table[if_index].ife_ifnet == NULL)
 		if_index--;
 	IFNET_WUNLOCK();
 
 	if (refcount_release(&ifp->if_refcount))
 		NET_EPOCH_CALL(if_free_deferred, &ifp->if_epoch_ctx);
 }
 
 /*
  * Interfaces to keep an ifnet type-stable despite the possibility of the
  * driver calling if_free().  If there are additional references, we defer
  * freeing the underlying data structure.
  */
 void
 if_ref(struct ifnet *ifp)
 {
 	u_int old __diagused;
 
 	/* We don't assert the ifnet list lock here, but arguably should. */
 	old = refcount_acquire(&ifp->if_refcount);
 	KASSERT(old > 0, ("%s: ifp %p has 0 refs", __func__, ifp));
 }
 
 bool
 if_try_ref(struct ifnet *ifp)
 {
 	NET_EPOCH_ASSERT();
 	return (refcount_acquire_if_not_zero(&ifp->if_refcount));
 }
 
 void
 if_rele(struct ifnet *ifp)
 {
 
 	if (!refcount_release(&ifp->if_refcount))
 		return;
 	NET_EPOCH_CALL(if_free_deferred, &ifp->if_epoch_ctx);
 }
 
 void
 ifq_init(struct ifaltq *ifq, struct ifnet *ifp)
 {
 
 	mtx_init(&ifq->ifq_mtx, ifp->if_xname, "if send queue", MTX_DEF);
 
 	if (ifq->ifq_maxlen == 0) 
 		ifq->ifq_maxlen = ifqmaxlen;
 
 	ifq->altq_type = 0;
 	ifq->altq_disc = NULL;
 	ifq->altq_flags &= ALTQF_CANTCHANGE;
 	ifq->altq_tbr  = NULL;
 	ifq->altq_ifp  = ifp;
 }
 
 void
 ifq_delete(struct ifaltq *ifq)
 {
 	mtx_destroy(&ifq->ifq_mtx);
 }
 
 /*
  * Perform generic interface initialization tasks and attach the interface
  * to the list of "active" interfaces.  If vmove flag is set on entry
  * to if_attach_internal(), perform only a limited subset of initialization
  * tasks, given that we are moving from one vnet to another an ifnet which
  * has already been fully initialized.
  *
  * Note that if_detach_internal() removes group membership unconditionally
  * even when vmove flag is set, and if_attach_internal() adds only IFG_ALL.
  * Thus, when if_vmove() is applied to a cloned interface, group membership
  * is lost while a cloned one always joins a group whose name is
  * ifc->ifc_name.  To recover this after if_detach_internal() and
  * if_attach_internal(), the cloner should be specified to
  * if_attach_internal() via ifc.  If it is non-NULL, if_attach_internal()
  * attempts to join a group whose name is ifc->ifc_name.
  *
  * XXX:
  *  - The decision to return void and thus require this function to
  *    succeed is questionable.
  *  - We should probably do more sanity checking.  For instance we don't
  *    do anything to insure if_xname is unique or non-empty.
  */
 void
 if_attach(struct ifnet *ifp)
 {
 
 	if_attach_internal(ifp, false);
 }
 
 /*
  * Compute the least common TSO limit.
  */
 void
 if_hw_tsomax_common(if_t ifp, struct ifnet_hw_tsomax *pmax)
 {
 	/*
 	 * 1) If there is no limit currently, take the limit from
 	 * the network adapter.
 	 *
 	 * 2) If the network adapter has a limit below the current
 	 * limit, apply it.
 	 */
 	if (pmax->tsomaxbytes == 0 || (ifp->if_hw_tsomax != 0 &&
 	    ifp->if_hw_tsomax < pmax->tsomaxbytes)) {
 		pmax->tsomaxbytes = ifp->if_hw_tsomax;
 	}
 	if (pmax->tsomaxsegcount == 0 || (ifp->if_hw_tsomaxsegcount != 0 &&
 	    ifp->if_hw_tsomaxsegcount < pmax->tsomaxsegcount)) {
 		pmax->tsomaxsegcount = ifp->if_hw_tsomaxsegcount;
 	}
 	if (pmax->tsomaxsegsize == 0 || (ifp->if_hw_tsomaxsegsize != 0 &&
 	    ifp->if_hw_tsomaxsegsize < pmax->tsomaxsegsize)) {
 		pmax->tsomaxsegsize = ifp->if_hw_tsomaxsegsize;
 	}
 }
 
 /*
  * Update TSO limit of a network adapter.
  *
  * Returns zero if no change. Else non-zero.
  */
 int
 if_hw_tsomax_update(if_t ifp, struct ifnet_hw_tsomax *pmax)
 {
 	int retval = 0;
 	if (ifp->if_hw_tsomax != pmax->tsomaxbytes) {
 		ifp->if_hw_tsomax = pmax->tsomaxbytes;
 		retval++;
 	}
 	if (ifp->if_hw_tsomaxsegsize != pmax->tsomaxsegsize) {
 		ifp->if_hw_tsomaxsegsize = pmax->tsomaxsegsize;
 		retval++;
 	}
 	if (ifp->if_hw_tsomaxsegcount != pmax->tsomaxsegcount) {
 		ifp->if_hw_tsomaxsegcount = pmax->tsomaxsegcount;
 		retval++;
 	}
 	return (retval);
 }
 
 static void
 if_attach_internal(struct ifnet *ifp, bool vmove)
 {
 	unsigned socksize, ifasize;
 	int namelen, masklen;
 	struct sockaddr_dl *sdl;
 	struct ifaddr *ifa;
 
 	MPASS(ifindex_table[ifp->if_index].ife_ifnet == ifp);
 
 #ifdef VIMAGE
 	ifp->if_vnet = curvnet;
 	if (ifp->if_home_vnet == NULL)
 		ifp->if_home_vnet = curvnet;
 #endif
 
 	if_addgroup(ifp, IFG_ALL);
 
 #ifdef VIMAGE
 	/* Restore group membership for cloned interface. */
 	if (vmove)
 		if_clone_restoregroup(ifp);
 #endif
 
 	getmicrotime(&ifp->if_lastchange);
 	ifp->if_epoch = time_uptime;
 
 	KASSERT((ifp->if_transmit == NULL && ifp->if_qflush == NULL) ||
 	    (ifp->if_transmit != NULL && ifp->if_qflush != NULL),
 	    ("transmit and qflush must both either be set or both be NULL"));
 	if (ifp->if_transmit == NULL) {
 		ifp->if_transmit = if_transmit;
 		ifp->if_qflush = if_qflush;
 	}
 	if (ifp->if_input == NULL)
 		ifp->if_input = if_input_default;
 
 	if (ifp->if_requestencap == NULL)
 		ifp->if_requestencap = if_requestencap_default;
 
 	if (!vmove) {
 #ifdef MAC
 		mac_ifnet_create(ifp);
 #endif
 
 		/*
 		 * Create a Link Level name for this device.
 		 */
 		namelen = strlen(ifp->if_xname);
 		/*
 		 * Always save enough space for any possiable name so we
 		 * can do a rename in place later.
 		 */
 		masklen = offsetof(struct sockaddr_dl, sdl_data[0]) + IFNAMSIZ;
 		socksize = masklen + ifp->if_addrlen;
 		if (socksize < sizeof(*sdl))
 			socksize = sizeof(*sdl);
 		socksize = roundup2(socksize, sizeof(long));
 		ifasize = sizeof(*ifa) + 2 * socksize;
 		ifa = ifa_alloc(ifasize, M_WAITOK);
 		sdl = (struct sockaddr_dl *)(ifa + 1);
 		sdl->sdl_len = socksize;
 		sdl->sdl_family = AF_LINK;
 		bcopy(ifp->if_xname, sdl->sdl_data, namelen);
 		sdl->sdl_nlen = namelen;
 		sdl->sdl_index = ifp->if_index;
 		sdl->sdl_type = ifp->if_type;
 		ifp->if_addr = ifa;
 		ifa->ifa_ifp = ifp;
 		ifa->ifa_addr = (struct sockaddr *)sdl;
 		sdl = (struct sockaddr_dl *)(socksize + (caddr_t)sdl);
 		ifa->ifa_netmask = (struct sockaddr *)sdl;
 		sdl->sdl_len = masklen;
 		while (namelen != 0)
 			sdl->sdl_data[--namelen] = 0xff;
 		CK_STAILQ_INSERT_HEAD(&ifp->if_addrhead, ifa, ifa_link);
 		/* Reliably crash if used uninitialized. */
 		ifp->if_broadcastaddr = NULL;
 
 		if (ifp->if_type == IFT_ETHER) {
 			ifp->if_hw_addr = malloc(ifp->if_addrlen, M_IFADDR,
 			    M_WAITOK | M_ZERO);
 		}
 
 #if defined(INET) || defined(INET6)
 		/* Use defaults for TSO, if nothing is set */
 		if (ifp->if_hw_tsomax == 0 &&
 		    ifp->if_hw_tsomaxsegcount == 0 &&
 		    ifp->if_hw_tsomaxsegsize == 0) {
 			/*
 			 * The TSO defaults needs to be such that an
 			 * NFS mbuf list of 35 mbufs totalling just
 			 * below 64K works and that a chain of mbufs
 			 * can be defragged into at most 32 segments:
 			 */
 			ifp->if_hw_tsomax = min(IP_MAXPACKET, (32 * MCLBYTES) -
 			    (ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN));
 			ifp->if_hw_tsomaxsegcount = 35;
 			ifp->if_hw_tsomaxsegsize = 2048;	/* 2K */
 
 			/* XXX some drivers set IFCAP_TSO after ethernet attach */
 			if (ifp->if_capabilities & IFCAP_TSO) {
 				if_printf(ifp, "Using defaults for TSO: %u/%u/%u\n",
 				    ifp->if_hw_tsomax,
 				    ifp->if_hw_tsomaxsegcount,
 				    ifp->if_hw_tsomaxsegsize);
 			}
 		}
 #endif
 	}
 #ifdef VIMAGE
 	else {
 		/*
 		 * Update the interface index in the link layer address
 		 * of the interface.
 		 */
 		for (ifa = ifp->if_addr; ifa != NULL;
 		    ifa = CK_STAILQ_NEXT(ifa, ifa_link)) {
 			if (ifa->ifa_addr->sa_family == AF_LINK) {
 				sdl = (struct sockaddr_dl *)ifa->ifa_addr;
 				sdl->sdl_index = ifp->if_index;
 			}
 		}
 	}
 #endif
 
 	if_link_ifnet(ifp);
 
 	if (domain_init_status >= 2)
 		if_attachdomain1(ifp);
 
 	EVENTHANDLER_INVOKE(ifnet_arrival_event, ifp);
 	if (IS_DEFAULT_VNET(curvnet))
 		devctl_notify("IFNET", ifp->if_xname, "ATTACH", NULL);
 
 	/* Announce the interface. */
 	rt_ifannouncemsg(ifp, IFAN_ARRIVAL);
 }
 
 static void
 if_epochalloc(void *dummy __unused)
 {
 
 	net_epoch_preempt = epoch_alloc("Net preemptible", EPOCH_PREEMPT);
 }
 SYSINIT(ifepochalloc, SI_SUB_EPOCH, SI_ORDER_ANY, if_epochalloc, NULL);
 
 static void
 if_attachdomain(void *dummy)
 {
 	struct ifnet *ifp;
 
 	CK_STAILQ_FOREACH(ifp, &V_ifnet, if_link)
 		if_attachdomain1(ifp);
 }
 SYSINIT(domainifattach, SI_SUB_PROTO_IFATTACHDOMAIN, SI_ORDER_SECOND,
     if_attachdomain, NULL);
 
 static void
 if_attachdomain1(struct ifnet *ifp)
 {
 	struct domain *dp;
 
 	/*
 	 * Since dp->dom_ifattach calls malloc() with M_WAITOK, we
 	 * cannot lock ifp->if_afdata initialization, entirely.
 	 */
 	IF_AFDATA_LOCK(ifp);
 	if (ifp->if_afdata_initialized >= domain_init_status) {
 		IF_AFDATA_UNLOCK(ifp);
 		log(LOG_WARNING, "%s called more than once on %s\n",
 		    __func__, ifp->if_xname);
 		return;
 	}
 	ifp->if_afdata_initialized = domain_init_status;
 	IF_AFDATA_UNLOCK(ifp);
 
 	/* address family dependent data region */
 	bzero(ifp->if_afdata, sizeof(ifp->if_afdata));
 	for (dp = domains; dp; dp = dp->dom_next) {
 		if (dp->dom_ifattach)
 			ifp->if_afdata[dp->dom_family] =
 			    (*dp->dom_ifattach)(ifp);
 	}
 }
 
 /*
  * Remove any unicast or broadcast network addresses from an interface.
  */
 void
 if_purgeaddrs(struct ifnet *ifp)
 {
 	struct ifaddr *ifa;
 
 #ifdef INET6
 	/*
 	 * Need to leave multicast addresses of proxy NDP llentries
 	 * before in6_purgeifaddr() because the llentries are keys
 	 * for in6_multi objects of proxy NDP entries.
 	 * in6_purgeifaddr()s clean up llentries including proxy NDPs
 	 * then we would lose the keys if they are called earlier.
 	 */
 	in6_purge_proxy_ndp(ifp);
 #endif
 	while (1) {
 		struct epoch_tracker et;
 
 		NET_EPOCH_ENTER(et);
 		CK_STAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) {
 			if (ifa->ifa_addr->sa_family != AF_LINK)
 				break;
 		}
 		NET_EPOCH_EXIT(et);
 
 		if (ifa == NULL)
 			break;
 #ifdef INET
 		/* XXX: Ugly!! ad hoc just for INET */
 		if (ifa->ifa_addr->sa_family == AF_INET) {
 			struct ifaliasreq ifr;
 
 			bzero(&ifr, sizeof(ifr));
 			ifr.ifra_addr = *ifa->ifa_addr;
 			if (ifa->ifa_dstaddr)
 				ifr.ifra_broadaddr = *ifa->ifa_dstaddr;
 			if (in_control(NULL, SIOCDIFADDR, (caddr_t)&ifr, ifp,
 			    NULL) == 0)
 				continue;
 		}
 #endif /* INET */
 #ifdef INET6
 		if (ifa->ifa_addr->sa_family == AF_INET6) {
 			in6_purgeifaddr((struct in6_ifaddr *)ifa);
 			/* ifp_addrhead is already updated */
 			continue;
 		}
 #endif /* INET6 */
 		IF_ADDR_WLOCK(ifp);
 		CK_STAILQ_REMOVE(&ifp->if_addrhead, ifa, ifaddr, ifa_link);
 		IF_ADDR_WUNLOCK(ifp);
 		ifa_free(ifa);
 	}
 }
 
 /*
  * Remove any multicast network addresses from an interface when an ifnet
  * is going away.
  */
 static void
 if_purgemaddrs(struct ifnet *ifp)
 {
 	struct ifmultiaddr *ifma;
 
 	IF_ADDR_WLOCK(ifp);
 	while (!CK_STAILQ_EMPTY(&ifp->if_multiaddrs)) {
 		ifma = CK_STAILQ_FIRST(&ifp->if_multiaddrs);
 		CK_STAILQ_REMOVE(&ifp->if_multiaddrs, ifma, ifmultiaddr, ifma_link);
 		if_delmulti_locked(ifp, ifma, 1);
 	}
 	IF_ADDR_WUNLOCK(ifp);
 }
 
 /*
  * Detach an interface, removing it from the list of "active" interfaces.
  * If vmove flag is set on entry to if_detach_internal(), perform only a
  * limited subset of cleanup tasks, given that we are moving an ifnet from
  * one vnet to another, where it must be fully operational.
  *
  * XXXRW: There are some significant questions about event ordering, and
  * how to prevent things from starting to use the interface during detach.
  */
 void
 if_detach(struct ifnet *ifp)
 {
 	bool found;
 
 	CURVNET_SET_QUIET(ifp->if_vnet);
 	found = if_unlink_ifnet(ifp, false);
 	if (found) {
 		sx_xlock(&ifnet_detach_sxlock);
 		if_detach_internal(ifp, false);
 		sx_xunlock(&ifnet_detach_sxlock);
 	}
 	CURVNET_RESTORE();
 }
 
 /*
  * The vmove flag, if set, indicates that we are called from a callpath
  * that is moving an interface to a different vnet instance.
  *
  * The shutdown flag, if set, indicates that we are called in the
  * process of shutting down a vnet instance.  Currently only the
  * vnet_if_return SYSUNINIT function sets it.  Note: we can be called
  * on a vnet instance shutdown without this flag being set, e.g., when
  * the cloned interfaces are destoyed as first thing of teardown.
  */
 static int
 if_detach_internal(struct ifnet *ifp, bool vmove)
 {
 	struct ifaddr *ifa;
 	int i;
 	struct domain *dp;
 #ifdef VIMAGE
 	bool shutdown;
 
 	shutdown = VNET_IS_SHUTTING_DOWN(ifp->if_vnet);
 #endif
 
 	/*
 	 * At this point we know the interface still was on the ifnet list
 	 * and we removed it so we are in a stable state.
 	 */
 	epoch_wait_preempt(net_epoch_preempt);
 
 	/*
 	 * Ensure all pending EPOCH(9) callbacks have been executed. This
 	 * fixes issues about late destruction of multicast options
 	 * which lead to leave group calls, which in turn access the
 	 * belonging ifnet structure:
 	 */
 	NET_EPOCH_DRAIN_CALLBACKS();
 
 	/*
 	 * In any case (destroy or vmove) detach us from the groups
 	 * and remove/wait for pending events on the taskq.
 	 * XXX-BZ in theory an interface could still enqueue a taskq change?
 	 */
 	if_delgroups(ifp);
 
 	taskqueue_drain(taskqueue_swi, &ifp->if_linktask);
 	taskqueue_drain(taskqueue_swi, &ifp->if_addmultitask);
 
 	if_down(ifp);
 
 #ifdef VIMAGE
 	/*
 	 * On VNET shutdown abort here as the stack teardown will do all
 	 * the work top-down for us.
 	 */
 	if (shutdown) {
 		/* Give interface users the chance to clean up. */
 		EVENTHANDLER_INVOKE(ifnet_departure_event, ifp);
 
 		/*
 		 * In case of a vmove we are done here without error.
 		 * If we would signal an error it would lead to the same
 		 * abort as if we did not find the ifnet anymore.
 		 * if_detach() calls us in void context and does not care
 		 * about an early abort notification, so life is splendid :)
 		 */
 		goto finish_vnet_shutdown;
 	}
 #endif
 
 	/*
 	 * At this point we are not tearing down a VNET and are either
 	 * going to destroy or vmove the interface and have to cleanup
 	 * accordingly.
 	 */
 
 	/*
 	 * Remove routes and flush queues.
 	 */
 #ifdef ALTQ
 	if (ALTQ_IS_ENABLED(&ifp->if_snd))
 		altq_disable(&ifp->if_snd);
 	if (ALTQ_IS_ATTACHED(&ifp->if_snd))
 		altq_detach(&ifp->if_snd);
 #endif
 
 	if_purgeaddrs(ifp);
 
 #ifdef INET
 	in_ifdetach(ifp);
 #endif
 
 #ifdef INET6
 	/*
 	 * Remove all IPv6 kernel structs related to ifp.  This should be done
 	 * before removing routing entries below, since IPv6 interface direct
 	 * routes are expected to be removed by the IPv6-specific kernel API.
 	 * Otherwise, the kernel will detect some inconsistency and bark it.
 	 */
 	in6_ifdetach(ifp);
 #endif
 	if_purgemaddrs(ifp);
 
 	/* Announce that the interface is gone. */
 	rt_ifannouncemsg(ifp, IFAN_DEPARTURE);
 	EVENTHANDLER_INVOKE(ifnet_departure_event, ifp);
 	if (IS_DEFAULT_VNET(curvnet))
 		devctl_notify("IFNET", ifp->if_xname, "DETACH", NULL);
 
 	if (!vmove) {
 		/*
 		 * Prevent further calls into the device driver via ifnet.
 		 */
 		if_dead(ifp);
 
 		/*
 		 * Clean up all addresses.
 		 */
 		IF_ADDR_WLOCK(ifp);
 		if (!CK_STAILQ_EMPTY(&ifp->if_addrhead)) {
 			ifa = CK_STAILQ_FIRST(&ifp->if_addrhead);
 			CK_STAILQ_REMOVE(&ifp->if_addrhead, ifa, ifaddr, ifa_link);
 			IF_ADDR_WUNLOCK(ifp);
 			ifa_free(ifa);
 		} else
 			IF_ADDR_WUNLOCK(ifp);
 	}
 
 	rt_flushifroutes(ifp);
 
 #ifdef VIMAGE
 finish_vnet_shutdown:
 #endif
 	/*
 	 * We cannot hold the lock over dom_ifdetach calls as they might
 	 * sleep, for example trying to drain a callout, thus open up the
 	 * theoretical race with re-attaching.
 	 */
 	IF_AFDATA_LOCK(ifp);
 	i = ifp->if_afdata_initialized;
 	ifp->if_afdata_initialized = 0;
 	IF_AFDATA_UNLOCK(ifp);
 	for (dp = domains; i > 0 && dp; dp = dp->dom_next) {
 		if (dp->dom_ifdetach && ifp->if_afdata[dp->dom_family]) {
 			(*dp->dom_ifdetach)(ifp,
 			    ifp->if_afdata[dp->dom_family]);
 			ifp->if_afdata[dp->dom_family] = NULL;
 		}
 	}
 
 	return (0);
 }
 
 #ifdef VIMAGE
 /*
  * if_vmove() performs a limited version of if_detach() in current
  * vnet and if_attach()es the ifnet to the vnet specified as 2nd arg.
  */
 static int
 if_vmove(struct ifnet *ifp, struct vnet *new_vnet)
 {
 #ifdef DEV_BPF
 	u_int bif_dlt, bif_hdrlen;
 #endif
 	int rc;
 
 #ifdef DEV_BPF
  	/*
 	 * if_detach_internal() will call the eventhandler to notify
 	 * interface departure.  That will detach if_bpf.  We need to
 	 * safe the dlt and hdrlen so we can re-attach it later.
 	 */
 	bpf_get_bp_params(ifp->if_bpf, &bif_dlt, &bif_hdrlen);
 #endif
 
 	/*
 	 * Detach from current vnet, but preserve LLADDR info, do not
 	 * mark as dead etc. so that the ifnet can be reattached later.
 	 * If we cannot find it, we lost the race to someone else.
 	 */
 	rc = if_detach_internal(ifp, true);
 	if (rc != 0)
 		return (rc);
 
 	/*
 	 * Perform interface-specific reassignment tasks, if provided by
 	 * the driver.
 	 */
 	if (ifp->if_reassign != NULL)
 		ifp->if_reassign(ifp, new_vnet, NULL);
 
 	/*
 	 * Switch to the context of the target vnet.
 	 */
 	CURVNET_SET_QUIET(new_vnet);
 	if_attach_internal(ifp, true);
 
 #ifdef DEV_BPF
 	if (ifp->if_bpf == NULL)
 		bpfattach(ifp, bif_dlt, bif_hdrlen);
 #endif
 
 	CURVNET_RESTORE();
 	return (0);
 }
 
 /*
  * Move an ifnet to or from another child prison/vnet, specified by the jail id.
  */
 static int
 if_vmove_loan(struct thread *td, struct ifnet *ifp, char *ifname, int jid)
 {
 	struct prison *pr;
 	struct ifnet *difp;
 	int error;
 	bool found __diagused;
 	bool shutdown;
 
 	MPASS(ifindex_table[ifp->if_index].ife_ifnet == ifp);
 
 	/* Try to find the prison within our visibility. */
 	sx_slock(&allprison_lock);
 	pr = prison_find_child(td->td_ucred->cr_prison, jid);
 	sx_sunlock(&allprison_lock);
 	if (pr == NULL)
 		return (ENXIO);
 	prison_hold_locked(pr);
 	mtx_unlock(&pr->pr_mtx);
 
 	/* Do not try to move the iface from and to the same prison. */
 	if (pr->pr_vnet == ifp->if_vnet) {
 		prison_free(pr);
 		return (EEXIST);
 	}
 
 	/* Make sure the named iface does not exists in the dst. prison/vnet. */
 	/* XXX Lock interfaces to avoid races. */
 	CURVNET_SET_QUIET(pr->pr_vnet);
 	difp = ifunit(ifname);
 	if (difp != NULL) {
 		CURVNET_RESTORE();
 		prison_free(pr);
 		return (EEXIST);
 	}
 	sx_xlock(&ifnet_detach_sxlock);
 
 	/* Make sure the VNET is stable. */
 	shutdown = VNET_IS_SHUTTING_DOWN(ifp->if_vnet);
 	if (shutdown) {
 		sx_xunlock(&ifnet_detach_sxlock);
 		CURVNET_RESTORE();
 		prison_free(pr);
 		return (EBUSY);
 	}
 	CURVNET_RESTORE();
 
 	found = if_unlink_ifnet(ifp, true);
 	if (! found) {
 		sx_xunlock(&ifnet_detach_sxlock);
 		CURVNET_RESTORE();
 		prison_free(pr);
 		return (ENODEV);
 	}
 
 	/* Move the interface into the child jail/vnet. */
 	error = if_vmove(ifp, pr->pr_vnet);
 
 	/* Report the new if_xname back to the userland on success. */
 	if (error == 0)
 		sprintf(ifname, "%s", ifp->if_xname);
 
 	sx_xunlock(&ifnet_detach_sxlock);
 
 	prison_free(pr);
 	return (error);
 }
 
 static int
 if_vmove_reclaim(struct thread *td, char *ifname, int jid)
 {
 	struct prison *pr;
 	struct vnet *vnet_dst;
 	struct ifnet *ifp;
 	int error, found __diagused;
  	bool shutdown;
 
 	/* Try to find the prison within our visibility. */
 	sx_slock(&allprison_lock);
 	pr = prison_find_child(td->td_ucred->cr_prison, jid);
 	sx_sunlock(&allprison_lock);
 	if (pr == NULL)
 		return (ENXIO);
 	prison_hold_locked(pr);
 	mtx_unlock(&pr->pr_mtx);
 
 	/* Make sure the named iface exists in the source prison/vnet. */
 	CURVNET_SET(pr->pr_vnet);
 	ifp = ifunit(ifname);		/* XXX Lock to avoid races. */
 	if (ifp == NULL) {
 		CURVNET_RESTORE();
 		prison_free(pr);
 		return (ENXIO);
 	}
 
 	/* Do not try to move the iface from and to the same prison. */
 	vnet_dst = TD_TO_VNET(td);
 	if (vnet_dst == ifp->if_vnet) {
 		CURVNET_RESTORE();
 		prison_free(pr);
 		return (EEXIST);
 	}
 
 	/* Make sure the VNET is stable. */
 	shutdown = VNET_IS_SHUTTING_DOWN(ifp->if_vnet);
 	if (shutdown) {
 		CURVNET_RESTORE();
 		prison_free(pr);
 		return (EBUSY);
 	}
 
 	/* Get interface back from child jail/vnet. */
 	found = if_unlink_ifnet(ifp, true);
 	MPASS(found);
 	sx_xlock(&ifnet_detach_sxlock);
 	error = if_vmove(ifp, vnet_dst);
 	sx_xunlock(&ifnet_detach_sxlock);
 	CURVNET_RESTORE();
 
 	/* Report the new if_xname back to the userland on success. */
 	if (error == 0)
 		sprintf(ifname, "%s", ifp->if_xname);
 
 	prison_free(pr);
 	return (error);
 }
 #endif /* VIMAGE */
 
 /*
  * Add a group to an interface
  */
 int
 if_addgroup(struct ifnet *ifp, const char *groupname)
 {
 	struct ifg_list		*ifgl;
 	struct ifg_group	*ifg = NULL;
 	struct ifg_member	*ifgm;
 	int 			 new = 0;
 
 	if (groupname[0] && groupname[strlen(groupname) - 1] >= '0' &&
 	    groupname[strlen(groupname) - 1] <= '9')
 		return (EINVAL);
 
 	IFNET_WLOCK();
 	CK_STAILQ_FOREACH(ifgl, &ifp->if_groups, ifgl_next)
 		if (!strcmp(ifgl->ifgl_group->ifg_group, groupname)) {
 			IFNET_WUNLOCK();
 			return (EEXIST);
 		}
 
 	if ((ifgl = malloc(sizeof(*ifgl), M_TEMP, M_NOWAIT)) == NULL) {
 	    	IFNET_WUNLOCK();
 		return (ENOMEM);
 	}
 
 	if ((ifgm = malloc(sizeof(*ifgm), M_TEMP, M_NOWAIT)) == NULL) {
 		free(ifgl, M_TEMP);
 		IFNET_WUNLOCK();
 		return (ENOMEM);
 	}
 
 	CK_STAILQ_FOREACH(ifg, &V_ifg_head, ifg_next)
 		if (!strcmp(ifg->ifg_group, groupname))
 			break;
 
 	if (ifg == NULL) {
 		if ((ifg = malloc(sizeof(*ifg), M_TEMP, M_NOWAIT)) == NULL) {
 			free(ifgl, M_TEMP);
 			free(ifgm, M_TEMP);
 			IFNET_WUNLOCK();
 			return (ENOMEM);
 		}
 		strlcpy(ifg->ifg_group, groupname, sizeof(ifg->ifg_group));
 		ifg->ifg_refcnt = 0;
 		CK_STAILQ_INIT(&ifg->ifg_members);
 		CK_STAILQ_INSERT_TAIL(&V_ifg_head, ifg, ifg_next);
 		new = 1;
 	}
 
 	ifg->ifg_refcnt++;
 	ifgl->ifgl_group = ifg;
 	ifgm->ifgm_ifp = ifp;
 
 	IF_ADDR_WLOCK(ifp);
 	CK_STAILQ_INSERT_TAIL(&ifg->ifg_members, ifgm, ifgm_next);
 	CK_STAILQ_INSERT_TAIL(&ifp->if_groups, ifgl, ifgl_next);
 	IF_ADDR_WUNLOCK(ifp);
 
 	IFNET_WUNLOCK();
 
 	if (new)
 		EVENTHANDLER_INVOKE(group_attach_event, ifg);
 	EVENTHANDLER_INVOKE(group_change_event, groupname);
 
 	return (0);
 }
 
 /*
  * Helper function to remove a group out of an interface.  Expects the global
  * ifnet lock to be write-locked, and drops it before returning.
  */
 static void
 _if_delgroup_locked(struct ifnet *ifp, struct ifg_list *ifgl,
     const char *groupname)
 {
 	struct ifg_member *ifgm;
 	bool freeifgl;
 
 	IFNET_WLOCK_ASSERT();
 
 	IF_ADDR_WLOCK(ifp);
 	CK_STAILQ_REMOVE(&ifp->if_groups, ifgl, ifg_list, ifgl_next);
 	IF_ADDR_WUNLOCK(ifp);
 
 	CK_STAILQ_FOREACH(ifgm, &ifgl->ifgl_group->ifg_members, ifgm_next) {
 		if (ifgm->ifgm_ifp == ifp) {
 			CK_STAILQ_REMOVE(&ifgl->ifgl_group->ifg_members, ifgm,
 			    ifg_member, ifgm_next);
 			break;
 		}
 	}
 
 	if (--ifgl->ifgl_group->ifg_refcnt == 0) {
 		CK_STAILQ_REMOVE(&V_ifg_head, ifgl->ifgl_group, ifg_group,
 		    ifg_next);
 		freeifgl = true;
 	} else {
 		freeifgl = false;
 	}
 	IFNET_WUNLOCK();
 
 	epoch_wait_preempt(net_epoch_preempt);
 	if (freeifgl) {
 		EVENTHANDLER_INVOKE(group_detach_event, ifgl->ifgl_group);
 		free(ifgl->ifgl_group, M_TEMP);
 	}
 	free(ifgm, M_TEMP);
 	free(ifgl, M_TEMP);
 
 	EVENTHANDLER_INVOKE(group_change_event, groupname);
 }
 
 /*
  * Remove a group from an interface
  */
 int
 if_delgroup(struct ifnet *ifp, const char *groupname)
 {
 	struct ifg_list *ifgl;
 
 	IFNET_WLOCK();
 	CK_STAILQ_FOREACH(ifgl, &ifp->if_groups, ifgl_next)
 		if (strcmp(ifgl->ifgl_group->ifg_group, groupname) == 0)
 			break;
 	if (ifgl == NULL) {
 		IFNET_WUNLOCK();
 		return (ENOENT);
 	}
 
 	_if_delgroup_locked(ifp, ifgl, groupname);
 
 	return (0);
 }
 
 /*
  * Remove an interface from all groups
  */
 static void
 if_delgroups(struct ifnet *ifp)
 {
 	struct ifg_list *ifgl;
 	char groupname[IFNAMSIZ];
 
 	IFNET_WLOCK();
 	while ((ifgl = CK_STAILQ_FIRST(&ifp->if_groups)) != NULL) {
 		strlcpy(groupname, ifgl->ifgl_group->ifg_group, IFNAMSIZ);
 		_if_delgroup_locked(ifp, ifgl, groupname);
 		IFNET_WLOCK();
 	}
 	IFNET_WUNLOCK();
 }
 
 /*
  * Stores all groups from an interface in memory pointed to by ifgr.
  */
 static int
 if_getgroup(struct ifgroupreq *ifgr, struct ifnet *ifp)
 {
 	int			 len, error;
 	struct ifg_list		*ifgl;
 	struct ifg_req		 ifgrq, *ifgp;
 
 	NET_EPOCH_ASSERT();
 
 	if (ifgr->ifgr_len == 0) {
 		CK_STAILQ_FOREACH(ifgl, &ifp->if_groups, ifgl_next)
 			ifgr->ifgr_len += sizeof(struct ifg_req);
 		return (0);
 	}
 
 	len = ifgr->ifgr_len;
 	ifgp = ifgr->ifgr_groups;
 	/* XXX: wire */
 	CK_STAILQ_FOREACH(ifgl, &ifp->if_groups, ifgl_next) {
 		if (len < sizeof(ifgrq))
 			return (EINVAL);
 		bzero(&ifgrq, sizeof ifgrq);
 		strlcpy(ifgrq.ifgrq_group, ifgl->ifgl_group->ifg_group,
 		    sizeof(ifgrq.ifgrq_group));
 		if ((error = copyout(&ifgrq, ifgp, sizeof(struct ifg_req))))
 			return (error);
 		len -= sizeof(ifgrq);
 		ifgp++;
 	}
 
 	return (0);
 }
 
 /*
  * Stores all members of a group in memory pointed to by igfr
  */
 static int
 if_getgroupmembers(struct ifgroupreq *ifgr)
 {
 	struct ifg_group	*ifg;
 	struct ifg_member	*ifgm;
 	struct ifg_req		 ifgrq, *ifgp;
 	int			 len, error;
 
 	IFNET_RLOCK();
 	CK_STAILQ_FOREACH(ifg, &V_ifg_head, ifg_next)
 		if (strcmp(ifg->ifg_group, ifgr->ifgr_name) == 0)
 			break;
 	if (ifg == NULL) {
 		IFNET_RUNLOCK();
 		return (ENOENT);
 	}
 
 	if (ifgr->ifgr_len == 0) {
 		CK_STAILQ_FOREACH(ifgm, &ifg->ifg_members, ifgm_next)
 			ifgr->ifgr_len += sizeof(ifgrq);
 		IFNET_RUNLOCK();
 		return (0);
 	}
 
 	len = ifgr->ifgr_len;
 	ifgp = ifgr->ifgr_groups;
 	CK_STAILQ_FOREACH(ifgm, &ifg->ifg_members, ifgm_next) {
 		if (len < sizeof(ifgrq)) {
 			IFNET_RUNLOCK();
 			return (EINVAL);
 		}
 		bzero(&ifgrq, sizeof ifgrq);
 		strlcpy(ifgrq.ifgrq_member, ifgm->ifgm_ifp->if_xname,
 		    sizeof(ifgrq.ifgrq_member));
 		if ((error = copyout(&ifgrq, ifgp, sizeof(struct ifg_req)))) {
 			IFNET_RUNLOCK();
 			return (error);
 		}
 		len -= sizeof(ifgrq);
 		ifgp++;
 	}
 	IFNET_RUNLOCK();
 
 	return (0);
 }
 
 /*
  * Return counter values from counter(9)s stored in ifnet.
  */
 uint64_t
 if_get_counter_default(struct ifnet *ifp, ift_counter cnt)
 {
 
 	KASSERT(cnt < IFCOUNTERS, ("%s: invalid cnt %d", __func__, cnt));
 
 	return (counter_u64_fetch(ifp->if_counters[cnt]));
 }
 
 /*
  * Increase an ifnet counter. Usually used for counters shared
  * between the stack and a driver, but function supports them all.
  */
 void
 if_inc_counter(struct ifnet *ifp, ift_counter cnt, int64_t inc)
 {
 
 	KASSERT(cnt < IFCOUNTERS, ("%s: invalid cnt %d", __func__, cnt));
 
 	counter_u64_add(ifp->if_counters[cnt], inc);
 }
 
 /*
  * Copy data from ifnet to userland API structure if_data.
  */
 void
 if_data_copy(struct ifnet *ifp, struct if_data *ifd)
 {
 
 	ifd->ifi_type = ifp->if_type;
 	ifd->ifi_physical = 0;
 	ifd->ifi_addrlen = ifp->if_addrlen;
 	ifd->ifi_hdrlen = ifp->if_hdrlen;
 	ifd->ifi_link_state = ifp->if_link_state;
 	ifd->ifi_vhid = 0;
 	ifd->ifi_datalen = sizeof(struct if_data);
 	ifd->ifi_mtu = ifp->if_mtu;
 	ifd->ifi_metric = ifp->if_metric;
 	ifd->ifi_baudrate = ifp->if_baudrate;
 	ifd->ifi_hwassist = ifp->if_hwassist;
 	ifd->ifi_epoch = ifp->if_epoch;
 	ifd->ifi_lastchange = ifp->if_lastchange;
 
 	ifd->ifi_ipackets = ifp->if_get_counter(ifp, IFCOUNTER_IPACKETS);
 	ifd->ifi_ierrors = ifp->if_get_counter(ifp, IFCOUNTER_IERRORS);
 	ifd->ifi_opackets = ifp->if_get_counter(ifp, IFCOUNTER_OPACKETS);
 	ifd->ifi_oerrors = ifp->if_get_counter(ifp, IFCOUNTER_OERRORS);
 	ifd->ifi_collisions = ifp->if_get_counter(ifp, IFCOUNTER_COLLISIONS);
 	ifd->ifi_ibytes = ifp->if_get_counter(ifp, IFCOUNTER_IBYTES);
 	ifd->ifi_obytes = ifp->if_get_counter(ifp, IFCOUNTER_OBYTES);
 	ifd->ifi_imcasts = ifp->if_get_counter(ifp, IFCOUNTER_IMCASTS);
 	ifd->ifi_omcasts = ifp->if_get_counter(ifp, IFCOUNTER_OMCASTS);
 	ifd->ifi_iqdrops = ifp->if_get_counter(ifp, IFCOUNTER_IQDROPS);
 	ifd->ifi_oqdrops = ifp->if_get_counter(ifp, IFCOUNTER_OQDROPS);
 	ifd->ifi_noproto = ifp->if_get_counter(ifp, IFCOUNTER_NOPROTO);
 }
 
 /*
  * Initialization, destruction and refcounting functions for ifaddrs.
  */
 struct ifaddr *
 ifa_alloc(size_t size, int flags)
 {
 	struct ifaddr *ifa;
 
 	KASSERT(size >= sizeof(struct ifaddr),
 	    ("%s: invalid size %zu", __func__, size));
 
 	ifa = malloc(size, M_IFADDR, M_ZERO | flags);
 	if (ifa == NULL)
 		return (NULL);
 
 	if ((ifa->ifa_opackets = counter_u64_alloc(flags)) == NULL)
 		goto fail;
 	if ((ifa->ifa_ipackets = counter_u64_alloc(flags)) == NULL)
 		goto fail;
 	if ((ifa->ifa_obytes = counter_u64_alloc(flags)) == NULL)
 		goto fail;
 	if ((ifa->ifa_ibytes = counter_u64_alloc(flags)) == NULL)
 		goto fail;
 
 	refcount_init(&ifa->ifa_refcnt, 1);
 
 	return (ifa);
 
 fail:
 	/* free(NULL) is okay */
 	counter_u64_free(ifa->ifa_opackets);
 	counter_u64_free(ifa->ifa_ipackets);
 	counter_u64_free(ifa->ifa_obytes);
 	counter_u64_free(ifa->ifa_ibytes);
 	free(ifa, M_IFADDR);
 
 	return (NULL);
 }
 
 void
 ifa_ref(struct ifaddr *ifa)
 {
 	u_int old __diagused;
 
 	old = refcount_acquire(&ifa->ifa_refcnt);
 	KASSERT(old > 0, ("%s: ifa %p has 0 refs", __func__, ifa));
 }
 
 int
 ifa_try_ref(struct ifaddr *ifa)
 {
 
 	NET_EPOCH_ASSERT();
 	return (refcount_acquire_if_not_zero(&ifa->ifa_refcnt));
 }
 
 static void
 ifa_destroy(epoch_context_t ctx)
 {
 	struct ifaddr *ifa;
 
 	ifa = __containerof(ctx, struct ifaddr, ifa_epoch_ctx);
 	counter_u64_free(ifa->ifa_opackets);
 	counter_u64_free(ifa->ifa_ipackets);
 	counter_u64_free(ifa->ifa_obytes);
 	counter_u64_free(ifa->ifa_ibytes);
 	free(ifa, M_IFADDR);
 }
 
 void
 ifa_free(struct ifaddr *ifa)
 {
 
 	if (refcount_release(&ifa->ifa_refcnt))
 		NET_EPOCH_CALL(ifa_destroy, &ifa->ifa_epoch_ctx);
 }
 
 /*
  * XXX: Because sockaddr_dl has deeper structure than the sockaddr
  * structs used to represent other address families, it is necessary
  * to perform a different comparison.
  */
 
 #define	sa_dl_equal(a1, a2)	\
 	((((const struct sockaddr_dl *)(a1))->sdl_len ==		\
 	 ((const struct sockaddr_dl *)(a2))->sdl_len) &&		\
 	 (bcmp(CLLADDR((const struct sockaddr_dl *)(a1)),		\
 	       CLLADDR((const struct sockaddr_dl *)(a2)),		\
 	       ((const struct sockaddr_dl *)(a1))->sdl_alen) == 0))
 
 /*
  * Locate an interface based on a complete address.
  */
 /*ARGSUSED*/
 struct ifaddr *
 ifa_ifwithaddr(const struct sockaddr *addr)
 {
 	struct ifnet *ifp;
 	struct ifaddr *ifa;
 
 	NET_EPOCH_ASSERT();
 
 	CK_STAILQ_FOREACH(ifp, &V_ifnet, if_link) {
 		CK_STAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) {
 			if (ifa->ifa_addr->sa_family != addr->sa_family)
 				continue;
 			if (sa_equal(addr, ifa->ifa_addr)) {
 				goto done;
 			}
 			/* IP6 doesn't have broadcast */
 			if ((ifp->if_flags & IFF_BROADCAST) &&
 			    ifa->ifa_broadaddr &&
 			    ifa->ifa_broadaddr->sa_len != 0 &&
 			    sa_equal(ifa->ifa_broadaddr, addr)) {
 				goto done;
 			}
 		}
 	}
 	ifa = NULL;
 done:
 	return (ifa);
 }
 
 int
 ifa_ifwithaddr_check(const struct sockaddr *addr)
 {
 	struct epoch_tracker et;
 	int rc;
 
 	NET_EPOCH_ENTER(et);
 	rc = (ifa_ifwithaddr(addr) != NULL);
 	NET_EPOCH_EXIT(et);
 	return (rc);
 }
 
 /*
  * Locate an interface based on the broadcast address.
  */
 /* ARGSUSED */
 struct ifaddr *
 ifa_ifwithbroadaddr(const struct sockaddr *addr, int fibnum)
 {
 	struct ifnet *ifp;
 	struct ifaddr *ifa;
 
 	NET_EPOCH_ASSERT();
 	CK_STAILQ_FOREACH(ifp, &V_ifnet, if_link) {
 		if ((fibnum != RT_ALL_FIBS) && (ifp->if_fib != fibnum))
 			continue;
 		CK_STAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) {
 			if (ifa->ifa_addr->sa_family != addr->sa_family)
 				continue;
 			if ((ifp->if_flags & IFF_BROADCAST) &&
 			    ifa->ifa_broadaddr &&
 			    ifa->ifa_broadaddr->sa_len != 0 &&
 			    sa_equal(ifa->ifa_broadaddr, addr)) {
 				goto done;
 			}
 		}
 	}
 	ifa = NULL;
 done:
 	return (ifa);
 }
 
 /*
  * Locate the point to point interface with a given destination address.
  */
 /*ARGSUSED*/
 struct ifaddr *
 ifa_ifwithdstaddr(const struct sockaddr *addr, int fibnum)
 {
 	struct ifnet *ifp;
 	struct ifaddr *ifa;
 
 	NET_EPOCH_ASSERT();
 	CK_STAILQ_FOREACH(ifp, &V_ifnet, if_link) {
 		if ((ifp->if_flags & IFF_POINTOPOINT) == 0)
 			continue;
 		if ((fibnum != RT_ALL_FIBS) && (ifp->if_fib != fibnum))
 			continue;
 		CK_STAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) {
 			if (ifa->ifa_addr->sa_family != addr->sa_family)
 				continue;
 			if (ifa->ifa_dstaddr != NULL &&
 			    sa_equal(addr, ifa->ifa_dstaddr)) {
 				goto done;
 			}
 		}
 	}
 	ifa = NULL;
 done:
 	return (ifa);
 }
 
 /*
  * Find an interface on a specific network.  If many, choice
  * is most specific found.
  */
 struct ifaddr *
 ifa_ifwithnet(const struct sockaddr *addr, int ignore_ptp, int fibnum)
 {
 	struct ifnet *ifp;
 	struct ifaddr *ifa;
 	struct ifaddr *ifa_maybe = NULL;
 	u_int af = addr->sa_family;
 	const char *addr_data = addr->sa_data, *cplim;
 
 	NET_EPOCH_ASSERT();
 	/*
 	 * AF_LINK addresses can be looked up directly by their index number,
 	 * so do that if we can.
 	 */
 	if (af == AF_LINK) {
 		ifp = ifnet_byindex(
 		    ((const struct sockaddr_dl *)addr)->sdl_index);
 		return (ifp ? ifp->if_addr : NULL);
 	}
 
 	/*
 	 * Scan though each interface, looking for ones that have addresses
 	 * in this address family and the requested fib.
 	 */
 	CK_STAILQ_FOREACH(ifp, &V_ifnet, if_link) {
 		if ((fibnum != RT_ALL_FIBS) && (ifp->if_fib != fibnum))
 			continue;
 		CK_STAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) {
 			const char *cp, *cp2, *cp3;
 
 			if (ifa->ifa_addr->sa_family != af)
 next:				continue;
 			if (af == AF_INET && 
 			    ifp->if_flags & IFF_POINTOPOINT && !ignore_ptp) {
 				/*
 				 * This is a bit broken as it doesn't
 				 * take into account that the remote end may
 				 * be a single node in the network we are
 				 * looking for.
 				 * The trouble is that we don't know the
 				 * netmask for the remote end.
 				 */
 				if (ifa->ifa_dstaddr != NULL &&
 				    sa_equal(addr, ifa->ifa_dstaddr)) {
 					goto done;
 				}
 			} else {
 				/*
 				 * Scan all the bits in the ifa's address.
 				 * If a bit dissagrees with what we are
 				 * looking for, mask it with the netmask
 				 * to see if it really matters.
 				 * (A byte at a time)
 				 */
 				if (ifa->ifa_netmask == 0)
 					continue;
 				cp = addr_data;
 				cp2 = ifa->ifa_addr->sa_data;
 				cp3 = ifa->ifa_netmask->sa_data;
 				cplim = ifa->ifa_netmask->sa_len
 					+ (char *)ifa->ifa_netmask;
 				while (cp3 < cplim)
 					if ((*cp++ ^ *cp2++) & *cp3++)
 						goto next; /* next address! */
 				/*
 				 * If the netmask of what we just found
 				 * is more specific than what we had before
 				 * (if we had one), or if the virtual status
 				 * of new prefix is better than of the old one,
 				 * then remember the new one before continuing
 				 * to search for an even better one.
 				 */
 				if (ifa_maybe == NULL ||
 				    ifa_preferred(ifa_maybe, ifa) ||
 				    rn_refines((caddr_t)ifa->ifa_netmask,
 				    (caddr_t)ifa_maybe->ifa_netmask)) {
 					ifa_maybe = ifa;
 				}
 			}
 		}
 	}
 	ifa = ifa_maybe;
 	ifa_maybe = NULL;
 done:
 	return (ifa);
 }
 
 /*
  * Find an interface address specific to an interface best matching
  * a given address.
  */
 struct ifaddr *
 ifaof_ifpforaddr(const struct sockaddr *addr, struct ifnet *ifp)
 {
 	struct ifaddr *ifa;
 	const char *cp, *cp2, *cp3;
 	char *cplim;
 	struct ifaddr *ifa_maybe = NULL;
 	u_int af = addr->sa_family;
 
 	if (af >= AF_MAX)
 		return (NULL);
 
 	NET_EPOCH_ASSERT();
 	CK_STAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) {
 		if (ifa->ifa_addr->sa_family != af)
 			continue;
 		if (ifa_maybe == NULL)
 			ifa_maybe = ifa;
 		if (ifa->ifa_netmask == 0) {
 			if (sa_equal(addr, ifa->ifa_addr) ||
 			    (ifa->ifa_dstaddr &&
 			    sa_equal(addr, ifa->ifa_dstaddr)))
 				goto done;
 			continue;
 		}
 		if (ifp->if_flags & IFF_POINTOPOINT) {
 			if (sa_equal(addr, ifa->ifa_dstaddr))
 				goto done;
 		} else {
 			cp = addr->sa_data;
 			cp2 = ifa->ifa_addr->sa_data;
 			cp3 = ifa->ifa_netmask->sa_data;
 			cplim = ifa->ifa_netmask->sa_len + (char *)ifa->ifa_netmask;
 			for (; cp3 < cplim; cp3++)
 				if ((*cp++ ^ *cp2++) & *cp3)
 					break;
 			if (cp3 == cplim)
 				goto done;
 		}
 	}
 	ifa = ifa_maybe;
 done:
 	return (ifa);
 }
 
 /*
  * See whether new ifa is better than current one:
  * 1) A non-virtual one is preferred over virtual.
  * 2) A virtual in master state preferred over any other state.
  *
  * Used in several address selecting functions.
  */
 int
 ifa_preferred(struct ifaddr *cur, struct ifaddr *next)
 {
 
 	return (cur->ifa_carp && (!next->ifa_carp ||
 	    ((*carp_master_p)(next) && !(*carp_master_p)(cur))));
 }
 
 struct sockaddr_dl *
 link_alloc_sdl(size_t size, int flags)
 {
 
 	return (malloc(size, M_TEMP, flags));
 }
 
 void
 link_free_sdl(struct sockaddr *sa)
 {
 	free(sa, M_TEMP);
 }
 
 /*
  * Fills in given sdl with interface basic info.
  * Returns pointer to filled sdl.
  */
 struct sockaddr_dl *
 link_init_sdl(struct ifnet *ifp, struct sockaddr *paddr, u_char iftype)
 {
 	struct sockaddr_dl *sdl;
 
 	sdl = (struct sockaddr_dl *)paddr;
 	memset(sdl, 0, sizeof(struct sockaddr_dl));
 	sdl->sdl_len = sizeof(struct sockaddr_dl);
 	sdl->sdl_family = AF_LINK;
 	sdl->sdl_index = ifp->if_index;
 	sdl->sdl_type = iftype;
 
 	return (sdl);
 }
 
 /*
  * Mark an interface down and notify protocols of
  * the transition.
  */
 static void
 if_unroute(struct ifnet *ifp, int flag, int fam)
 {
-	struct ifaddr *ifa;
-	struct epoch_tracker et;
 
 	KASSERT(flag == IFF_UP, ("if_unroute: flag != IFF_UP"));
 
 	ifp->if_flags &= ~flag;
 	getmicrotime(&ifp->if_lastchange);
-	NET_EPOCH_ENTER(et);
-	CK_STAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link)
-		if (fam == PF_UNSPEC || (fam == ifa->ifa_addr->sa_family))
-			pfctlinput(PRC_IFDOWN, ifa->ifa_addr);
-	NET_EPOCH_EXIT(et);
 	ifp->if_qflush(ifp);
 
 	if (ifp->if_carp)
 		(*carp_linkstate_p)(ifp);
 	rt_ifmsg(ifp);
 }
 
 /*
  * Mark an interface up and notify protocols of
  * the transition.
  */
 static void
 if_route(struct ifnet *ifp, int flag, int fam)
 {
-	struct ifaddr *ifa;
-	struct epoch_tracker et;
 
 	KASSERT(flag == IFF_UP, ("if_route: flag != IFF_UP"));
 
 	ifp->if_flags |= flag;
 	getmicrotime(&ifp->if_lastchange);
-	NET_EPOCH_ENTER(et);
-	CK_STAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link)
-		if (fam == PF_UNSPEC || (fam == ifa->ifa_addr->sa_family))
-			pfctlinput(PRC_IFUP, ifa->ifa_addr);
-	NET_EPOCH_EXIT(et);
 	if (ifp->if_carp)
 		(*carp_linkstate_p)(ifp);
 	rt_ifmsg(ifp);
 #ifdef INET6
 	in6_if_up(ifp);
 #endif
 }
 
 void	(*vlan_link_state_p)(struct ifnet *);	/* XXX: private from if_vlan */
 void	(*vlan_trunk_cap_p)(struct ifnet *);		/* XXX: private from if_vlan */
 struct ifnet *(*vlan_trunkdev_p)(struct ifnet *);
 struct	ifnet *(*vlan_devat_p)(struct ifnet *, uint16_t);
 int	(*vlan_tag_p)(struct ifnet *, uint16_t *);
 int	(*vlan_pcp_p)(struct ifnet *, uint16_t *);
 int	(*vlan_setcookie_p)(struct ifnet *, void *);
 void	*(*vlan_cookie_p)(struct ifnet *);
 
 /*
  * Handle a change in the interface link state. To avoid LORs
  * between driver lock and upper layer locks, as well as possible
  * recursions, we post event to taskqueue, and all job
  * is done in static do_link_state_change().
  */
 void
 if_link_state_change(struct ifnet *ifp, int link_state)
 {
 	/* Return if state hasn't changed. */
 	if (ifp->if_link_state == link_state)
 		return;
 
 	ifp->if_link_state = link_state;
 
 	/* XXXGL: reference ifp? */
 	taskqueue_enqueue(taskqueue_swi, &ifp->if_linktask);
 }
 
 static void
 do_link_state_change(void *arg, int pending)
 {
 	struct ifnet *ifp;
 	int link_state;
 
 	ifp = arg;
 	link_state = ifp->if_link_state;
 
 	CURVNET_SET(ifp->if_vnet);
 	rt_ifmsg(ifp);
 	if (ifp->if_vlantrunk != NULL)
 		(*vlan_link_state_p)(ifp);
 
 	if ((ifp->if_type == IFT_ETHER || ifp->if_type == IFT_L2VLAN) &&
 	    ifp->if_l2com != NULL)
 		(*ng_ether_link_state_p)(ifp, link_state);
 	if (ifp->if_carp)
 		(*carp_linkstate_p)(ifp);
 	if (ifp->if_bridge)
 		ifp->if_bridge_linkstate(ifp);
 	if (ifp->if_lagg)
 		(*lagg_linkstate_p)(ifp, link_state);
 
 	if (IS_DEFAULT_VNET(curvnet))
 		devctl_notify("IFNET", ifp->if_xname,
 		    (link_state == LINK_STATE_UP) ? "LINK_UP" : "LINK_DOWN",
 		    NULL);
 	if (pending > 1)
 		if_printf(ifp, "%d link states coalesced\n", pending);
 	if (log_link_state_change)
 		if_printf(ifp, "link state changed to %s\n",
 		    (link_state == LINK_STATE_UP) ? "UP" : "DOWN" );
 	EVENTHANDLER_INVOKE(ifnet_link_event, ifp, link_state);
 	CURVNET_RESTORE();
 }
 
 /*
  * Mark an interface down and notify protocols of
  * the transition.
  */
 void
 if_down(struct ifnet *ifp)
 {
 
 	EVENTHANDLER_INVOKE(ifnet_event, ifp, IFNET_EVENT_DOWN);
 	if_unroute(ifp, IFF_UP, AF_UNSPEC);
 }
 
 /*
  * Mark an interface up and notify protocols of
  * the transition.
  */
 void
 if_up(struct ifnet *ifp)
 {
 
 	if_route(ifp, IFF_UP, AF_UNSPEC);
 	EVENTHANDLER_INVOKE(ifnet_event, ifp, IFNET_EVENT_UP);
 }
 
 /*
  * Flush an interface queue.
  */
 void
 if_qflush(struct ifnet *ifp)
 {
 	struct mbuf *m, *n;
 	struct ifaltq *ifq;
 
 	ifq = &ifp->if_snd;
 	IFQ_LOCK(ifq);
 #ifdef ALTQ
 	if (ALTQ_IS_ENABLED(ifq))
 		ALTQ_PURGE(ifq);
 #endif
 	n = ifq->ifq_head;
 	while ((m = n) != NULL) {
 		n = m->m_nextpkt;
 		m_freem(m);
 	}
 	ifq->ifq_head = 0;
 	ifq->ifq_tail = 0;
 	ifq->ifq_len = 0;
 	IFQ_UNLOCK(ifq);
 }
 
 /*
  * Map interface name to interface structure pointer, with or without
  * returning a reference.
  */
 struct ifnet *
 ifunit_ref(const char *name)
 {
 	struct epoch_tracker et;
 	struct ifnet *ifp;
 
 	NET_EPOCH_ENTER(et);
 	CK_STAILQ_FOREACH(ifp, &V_ifnet, if_link) {
 		if (strncmp(name, ifp->if_xname, IFNAMSIZ) == 0 &&
 		    !(ifp->if_flags & IFF_DYING))
 			break;
 	}
 	if (ifp != NULL) {
 		if_ref(ifp);
 		MPASS(ifindex_table[ifp->if_index].ife_ifnet == ifp);
 	}
 
 	NET_EPOCH_EXIT(et);
 	return (ifp);
 }
 
 struct ifnet *
 ifunit(const char *name)
 {
 	struct epoch_tracker et;
 	struct ifnet *ifp;
 
 	NET_EPOCH_ENTER(et);
 	CK_STAILQ_FOREACH(ifp, &V_ifnet, if_link) {
 		if (strncmp(name, ifp->if_xname, IFNAMSIZ) == 0)
 			break;
 	}
 	NET_EPOCH_EXIT(et);
 	return (ifp);
 }
 
 void *
 ifr_buffer_get_buffer(void *data)
 {
 	union ifreq_union *ifrup;
 
 	ifrup = data;
 #ifdef COMPAT_FREEBSD32
 	if (SV_CURPROC_FLAG(SV_ILP32))
 		return ((void *)(uintptr_t)
 		    ifrup->ifr32.ifr_ifru.ifru_buffer.buffer);
 #endif
 	return (ifrup->ifr.ifr_ifru.ifru_buffer.buffer);
 }
 
 static void
 ifr_buffer_set_buffer_null(void *data)
 {
 	union ifreq_union *ifrup;
 
 	ifrup = data;
 #ifdef COMPAT_FREEBSD32
 	if (SV_CURPROC_FLAG(SV_ILP32))
 		ifrup->ifr32.ifr_ifru.ifru_buffer.buffer = 0;
 	else
 #endif
 		ifrup->ifr.ifr_ifru.ifru_buffer.buffer = NULL;
 }
 
 size_t
 ifr_buffer_get_length(void *data)
 {
 	union ifreq_union *ifrup;
 
 	ifrup = data;
 #ifdef COMPAT_FREEBSD32
 	if (SV_CURPROC_FLAG(SV_ILP32))
 		return (ifrup->ifr32.ifr_ifru.ifru_buffer.length);
 #endif
 	return (ifrup->ifr.ifr_ifru.ifru_buffer.length);
 }
 
 static void
 ifr_buffer_set_length(void *data, size_t len)
 {
 	union ifreq_union *ifrup;
 
 	ifrup = data;
 #ifdef COMPAT_FREEBSD32
 	if (SV_CURPROC_FLAG(SV_ILP32))
 		ifrup->ifr32.ifr_ifru.ifru_buffer.length = len;
 	else
 #endif
 		ifrup->ifr.ifr_ifru.ifru_buffer.length = len;
 }
 
 void *
 ifr_data_get_ptr(void *ifrp)
 {
 	union ifreq_union *ifrup;
 
 	ifrup = ifrp;
 #ifdef COMPAT_FREEBSD32
 	if (SV_CURPROC_FLAG(SV_ILP32))
 		return ((void *)(uintptr_t)
 		    ifrup->ifr32.ifr_ifru.ifru_data);
 #endif
 		return (ifrup->ifr.ifr_ifru.ifru_data);
 }
 
 struct ifcap_nv_bit_name {
 	int cap_bit;
 	const char *cap_name;
 };
 #define CAPNV(x) {.cap_bit = IFCAP_##x, \
     .cap_name = __CONCAT(IFCAP_, __CONCAT(x, _NAME)) }
 const struct ifcap_nv_bit_name ifcap_nv_bit_names[] = {
 	CAPNV(RXCSUM),
 	CAPNV(TXCSUM),
 	CAPNV(NETCONS),
 	CAPNV(VLAN_MTU),
 	CAPNV(VLAN_HWTAGGING),
 	CAPNV(JUMBO_MTU),
 	CAPNV(POLLING),
 	CAPNV(VLAN_HWCSUM),
 	CAPNV(TSO4),
 	CAPNV(TSO6),
 	CAPNV(LRO),
 	CAPNV(WOL_UCAST),
 	CAPNV(WOL_MCAST),
 	CAPNV(WOL_MAGIC),
 	CAPNV(TOE4),
 	CAPNV(TOE6),
 	CAPNV(VLAN_HWFILTER),
 	CAPNV(VLAN_HWTSO),
 	CAPNV(LINKSTATE),
 	CAPNV(NETMAP),
 	CAPNV(RXCSUM_IPV6),
 	CAPNV(TXCSUM_IPV6),
 	CAPNV(HWSTATS),
 	CAPNV(TXRTLMT),
 	CAPNV(HWRXTSTMP),
 	CAPNV(MEXTPG),
 	CAPNV(TXTLS4),
 	CAPNV(TXTLS6),
 	CAPNV(VXLAN_HWCSUM),
 	CAPNV(VXLAN_HWTSO),
 	CAPNV(TXTLS_RTLMT),
 	{0, NULL}
 };
 #define CAP2NV(x) {.cap_bit = IFCAP2_##x, \
     .cap_name = __CONCAT(IFCAP2_, __CONCAT(x, _NAME)) }
 const struct ifcap_nv_bit_name ifcap2_nv_bit_names[] = {
 	CAP2NV(RXTLS4),
 	CAP2NV(RXTLS6),
 	{0, NULL}
 };
 #undef CAPNV
 #undef CAP2NV
 
 int
 if_capnv_to_capint(const nvlist_t *nv, int *old_cap,
     const struct ifcap_nv_bit_name *nn, bool all)
 {
 	int i, res;
 
 	res = 0;
 	for (i = 0; nn[i].cap_name != NULL; i++) {
 		if (nvlist_exists_bool(nv, nn[i].cap_name)) {
 			if (all || nvlist_get_bool(nv, nn[i].cap_name))
 				res |= nn[i].cap_bit;
 		} else {
 			res |= *old_cap & nn[i].cap_bit;
 		}
 	}
 	return (res);
 }
 
 void
 if_capint_to_capnv(nvlist_t *nv, const struct ifcap_nv_bit_name *nn,
     int ifr_cap, int ifr_req)
 {
 	int i;
 
 	for (i = 0; nn[i].cap_name != NULL; i++) {
 		if ((nn[i].cap_bit & ifr_cap) != 0) {
 			nvlist_add_bool(nv, nn[i].cap_name,
 			    (nn[i].cap_bit & ifr_req) != 0);
 		}
 	}
 }
 
 /*
  * Hardware specific interface ioctls.
  */
 int
 ifhwioctl(u_long cmd, struct ifnet *ifp, caddr_t data, struct thread *td)
 {
 	struct ifreq *ifr;
 	int error = 0, do_ifup = 0;
 	int new_flags, temp_flags;
 	size_t namelen, onamelen;
 	size_t descrlen, nvbuflen;
 	char *descrbuf, *odescrbuf;
 	char new_name[IFNAMSIZ];
 	char old_name[IFNAMSIZ], strbuf[IFNAMSIZ + 8];
 	struct ifaddr *ifa;
 	struct sockaddr_dl *sdl;
 	void *buf;
 	nvlist_t *nvcap;
 	struct siocsifcapnv_driver_data drv_ioctl_data;
 
 	ifr = (struct ifreq *)data;
 	switch (cmd) {
 	case SIOCGIFINDEX:
 		ifr->ifr_index = ifp->if_index;
 		break;
 
 	case SIOCGIFFLAGS:
 		temp_flags = ifp->if_flags | ifp->if_drv_flags;
 		ifr->ifr_flags = temp_flags & 0xffff;
 		ifr->ifr_flagshigh = temp_flags >> 16;
 		break;
 
 	case SIOCGIFCAP:
 		ifr->ifr_reqcap = ifp->if_capabilities;
 		ifr->ifr_curcap = ifp->if_capenable;
 		break;
 
 	case SIOCGIFCAPNV:
 		if ((ifp->if_capabilities & IFCAP_NV) == 0) {
 			error = EINVAL;
 			break;
 		}
 		buf = NULL;
 		nvcap = nvlist_create(0);
 		for (;;) {
 			if_capint_to_capnv(nvcap, ifcap_nv_bit_names,
 			    ifp->if_capabilities, ifp->if_capenable);
 			if_capint_to_capnv(nvcap, ifcap2_nv_bit_names,
 			    ifp->if_capabilities2, ifp->if_capenable2);
 			error = (*ifp->if_ioctl)(ifp, SIOCGIFCAPNV,
 			    __DECONST(caddr_t, nvcap));
 			if (error != 0) {
 				if_printf(ifp,
 			    "SIOCGIFCAPNV driver mistake: nvlist error %d\n",
 				    error);
 				break;
 			}
 			buf = nvlist_pack(nvcap, &nvbuflen);
 			if (buf == NULL) {
 				error = nvlist_error(nvcap);
 				if (error == 0)
 					error = EDOOFUS;
 				break;
 			}
 			if (nvbuflen > ifr->ifr_cap_nv.buf_length) {
 				ifr->ifr_cap_nv.length = nvbuflen;
 				ifr->ifr_cap_nv.buffer = NULL;
 				error = EFBIG;
 				break;
 			}
 			ifr->ifr_cap_nv.length = nvbuflen;
 			error = copyout(buf, ifr->ifr_cap_nv.buffer, nvbuflen);
 			break;
 		}
 		free(buf, M_NVLIST);
 		nvlist_destroy(nvcap);
 		break;
 
 	case SIOCGIFDATA:
 	{
 		struct if_data ifd;
 
 		/* Ensure uninitialised padding is not leaked. */
 		memset(&ifd, 0, sizeof(ifd));
 
 		if_data_copy(ifp, &ifd);
 		error = copyout(&ifd, ifr_data_get_ptr(ifr), sizeof(ifd));
 		break;
 	}
 
 #ifdef MAC
 	case SIOCGIFMAC:
 		error = mac_ifnet_ioctl_get(td->td_ucred, ifr, ifp);
 		break;
 #endif
 
 	case SIOCGIFMETRIC:
 		ifr->ifr_metric = ifp->if_metric;
 		break;
 
 	case SIOCGIFMTU:
 		ifr->ifr_mtu = ifp->if_mtu;
 		break;
 
 	case SIOCGIFPHYS:
 		/* XXXGL: did this ever worked? */
 		ifr->ifr_phys = 0;
 		break;
 
 	case SIOCGIFDESCR:
 		error = 0;
 		sx_slock(&ifdescr_sx);
 		if (ifp->if_description == NULL)
 			error = ENOMSG;
 		else {
 			/* space for terminating nul */
 			descrlen = strlen(ifp->if_description) + 1;
 			if (ifr_buffer_get_length(ifr) < descrlen)
 				ifr_buffer_set_buffer_null(ifr);
 			else
 				error = copyout(ifp->if_description,
 				    ifr_buffer_get_buffer(ifr), descrlen);
 			ifr_buffer_set_length(ifr, descrlen);
 		}
 		sx_sunlock(&ifdescr_sx);
 		break;
 
 	case SIOCSIFDESCR:
 		error = priv_check(td, PRIV_NET_SETIFDESCR);
 		if (error)
 			return (error);
 
 		/*
 		 * Copy only (length-1) bytes to make sure that
 		 * if_description is always nul terminated.  The
 		 * length parameter is supposed to count the
 		 * terminating nul in.
 		 */
 		if (ifr_buffer_get_length(ifr) > ifdescr_maxlen)
 			return (ENAMETOOLONG);
 		else if (ifr_buffer_get_length(ifr) == 0)
 			descrbuf = NULL;
 		else {
 			descrbuf = malloc(ifr_buffer_get_length(ifr),
 			    M_IFDESCR, M_WAITOK | M_ZERO);
 			error = copyin(ifr_buffer_get_buffer(ifr), descrbuf,
 			    ifr_buffer_get_length(ifr) - 1);
 			if (error) {
 				free(descrbuf, M_IFDESCR);
 				break;
 			}
 		}
 
 		sx_xlock(&ifdescr_sx);
 		odescrbuf = ifp->if_description;
 		ifp->if_description = descrbuf;
 		sx_xunlock(&ifdescr_sx);
 
 		getmicrotime(&ifp->if_lastchange);
 		free(odescrbuf, M_IFDESCR);
 		break;
 
 	case SIOCGIFFIB:
 		ifr->ifr_fib = ifp->if_fib;
 		break;
 
 	case SIOCSIFFIB:
 		error = priv_check(td, PRIV_NET_SETIFFIB);
 		if (error)
 			return (error);
 		if (ifr->ifr_fib >= rt_numfibs)
 			return (EINVAL);
 
 		ifp->if_fib = ifr->ifr_fib;
 		break;
 
 	case SIOCSIFFLAGS:
 		error = priv_check(td, PRIV_NET_SETIFFLAGS);
 		if (error)
 			return (error);
 		/*
 		 * Currently, no driver owned flags pass the IFF_CANTCHANGE
 		 * check, so we don't need special handling here yet.
 		 */
 		new_flags = (ifr->ifr_flags & 0xffff) |
 		    (ifr->ifr_flagshigh << 16);
 		if (ifp->if_flags & IFF_UP &&
 		    (new_flags & IFF_UP) == 0) {
 			if_down(ifp);
 		} else if (new_flags & IFF_UP &&
 		    (ifp->if_flags & IFF_UP) == 0) {
 			do_ifup = 1;
 		}
 		/* See if permanently promiscuous mode bit is about to flip */
 		if ((ifp->if_flags ^ new_flags) & IFF_PPROMISC) {
 			if (new_flags & IFF_PPROMISC)
 				ifp->if_flags |= IFF_PROMISC;
 			else if (ifp->if_pcount == 0)
 				ifp->if_flags &= ~IFF_PROMISC;
 			if (log_promisc_mode_change)
                                 if_printf(ifp, "permanently promiscuous mode %s\n",
                                     ((new_flags & IFF_PPROMISC) ?
                                      "enabled" : "disabled"));
 		}
 		ifp->if_flags = (ifp->if_flags & IFF_CANTCHANGE) |
 			(new_flags &~ IFF_CANTCHANGE);
 		if (ifp->if_ioctl) {
 			(void) (*ifp->if_ioctl)(ifp, cmd, data);
 		}
 		if (do_ifup)
 			if_up(ifp);
 		getmicrotime(&ifp->if_lastchange);
 		break;
 
 	case SIOCSIFCAP:
 		error = priv_check(td, PRIV_NET_SETIFCAP);
 		if (error != 0)
 			return (error);
 		if (ifp->if_ioctl == NULL)
 			return (EOPNOTSUPP);
 		if (ifr->ifr_reqcap & ~ifp->if_capabilities)
 			return (EINVAL);
 		error = (*ifp->if_ioctl)(ifp, cmd, data);
 		if (error == 0)
 			getmicrotime(&ifp->if_lastchange);
 		break;
 
 	case SIOCSIFCAPNV:
 		error = priv_check(td, PRIV_NET_SETIFCAP);
 		if (error != 0)
 			return (error);
 		if (ifp->if_ioctl == NULL)
 			return (EOPNOTSUPP);
 		if ((ifp->if_capabilities & IFCAP_NV) == 0)
 			return (EINVAL);
 		if (ifr->ifr_cap_nv.length > IFR_CAP_NV_MAXBUFSIZE)
 			return (EINVAL);
 		nvcap = NULL;
 		buf = malloc(ifr->ifr_cap_nv.length, M_TEMP, M_WAITOK);
 		for (;;) {
 			error = copyin(ifr->ifr_cap_nv.buffer, buf,
 			    ifr->ifr_cap_nv.length);
 			if (error != 0)
 				break;
 			nvcap = nvlist_unpack(buf, ifr->ifr_cap_nv.length, 0);
 			if (nvcap == NULL) {
 				error = EINVAL;
 				break;
 			}
 			drv_ioctl_data.reqcap = if_capnv_to_capint(nvcap,
 			    &ifp->if_capenable, ifcap_nv_bit_names, false);
 			if ((drv_ioctl_data.reqcap &
 			    ~ifp->if_capabilities) != 0) {
 				error = EINVAL;
 				break;
 			}
 			drv_ioctl_data.reqcap2 = if_capnv_to_capint(nvcap,
 			    &ifp->if_capenable2, ifcap2_nv_bit_names, false);
 			if ((drv_ioctl_data.reqcap2 &
 			    ~ifp->if_capabilities2) != 0) {
 				error = EINVAL;
 				break;
 			}
 			drv_ioctl_data.nvcap = nvcap;
 			error = (*ifp->if_ioctl)(ifp, SIOCSIFCAPNV,
 			    (caddr_t)&drv_ioctl_data);
 			break;
 		}
 		nvlist_destroy(nvcap);
 		free(buf, M_TEMP);
 		if (error == 0)
 			getmicrotime(&ifp->if_lastchange);
 		break;
 
 #ifdef MAC
 	case SIOCSIFMAC:
 		error = mac_ifnet_ioctl_set(td->td_ucred, ifr, ifp);
 		break;
 #endif
 
 	case SIOCSIFNAME:
 		error = priv_check(td, PRIV_NET_SETIFNAME);
 		if (error)
 			return (error);
 		error = copyinstr(ifr_data_get_ptr(ifr), new_name, IFNAMSIZ,
 		    NULL);
 		if (error != 0)
 			return (error);
 		if (new_name[0] == '\0')
 			return (EINVAL);
 		if (strcmp(new_name, ifp->if_xname) == 0)
 			break;
 		if (ifunit(new_name) != NULL)
 			return (EEXIST);
 
 		/*
 		 * XXX: Locking.  Nothing else seems to lock if_flags,
 		 * and there are numerous other races with the
 		 * ifunit() checks not being atomic with namespace
 		 * changes (renames, vmoves, if_attach, etc).
 		 */
 		ifp->if_flags |= IFF_RENAMING;
 		
 		/* Announce the departure of the interface. */
 		rt_ifannouncemsg(ifp, IFAN_DEPARTURE);
 		EVENTHANDLER_INVOKE(ifnet_departure_event, ifp);
 
 		if_printf(ifp, "changing name to '%s'\n", new_name);
 
 		IF_ADDR_WLOCK(ifp);
 		strlcpy(old_name, ifp->if_xname, sizeof(old_name));
 		strlcpy(ifp->if_xname, new_name, sizeof(ifp->if_xname));
 		ifa = ifp->if_addr;
 		sdl = (struct sockaddr_dl *)ifa->ifa_addr;
 		namelen = strlen(new_name);
 		onamelen = sdl->sdl_nlen;
 		/*
 		 * Move the address if needed.  This is safe because we
 		 * allocate space for a name of length IFNAMSIZ when we
 		 * create this in if_attach().
 		 */
 		if (namelen != onamelen) {
 			bcopy(sdl->sdl_data + onamelen,
 			    sdl->sdl_data + namelen, sdl->sdl_alen);
 		}
 		bcopy(new_name, sdl->sdl_data, namelen);
 		sdl->sdl_nlen = namelen;
 		sdl = (struct sockaddr_dl *)ifa->ifa_netmask;
 		bzero(sdl->sdl_data, onamelen);
 		while (namelen != 0)
 			sdl->sdl_data[--namelen] = 0xff;
 		IF_ADDR_WUNLOCK(ifp);
 
 		EVENTHANDLER_INVOKE(ifnet_arrival_event, ifp);
 		/* Announce the return of the interface. */
 		rt_ifannouncemsg(ifp, IFAN_ARRIVAL);
 
 		ifp->if_flags &= ~IFF_RENAMING;
 
 		snprintf(strbuf, sizeof(strbuf), "name=%s", new_name);
 		devctl_notify("IFNET", old_name, "RENAME", strbuf);
 		break;
 
 #ifdef VIMAGE
 	case SIOCSIFVNET:
 		error = priv_check(td, PRIV_NET_SETIFVNET);
 		if (error)
 			return (error);
 		error = if_vmove_loan(td, ifp, ifr->ifr_name, ifr->ifr_jid);
 		break;
 #endif
 
 	case SIOCSIFMETRIC:
 		error = priv_check(td, PRIV_NET_SETIFMETRIC);
 		if (error)
 			return (error);
 		ifp->if_metric = ifr->ifr_metric;
 		getmicrotime(&ifp->if_lastchange);
 		break;
 
 	case SIOCSIFPHYS:
 		error = priv_check(td, PRIV_NET_SETIFPHYS);
 		if (error)
 			return (error);
 		if (ifp->if_ioctl == NULL)
 			return (EOPNOTSUPP);
 		error = (*ifp->if_ioctl)(ifp, cmd, data);
 		if (error == 0)
 			getmicrotime(&ifp->if_lastchange);
 		break;
 
 	case SIOCSIFMTU:
 	{
 		u_long oldmtu = ifp->if_mtu;
 
 		error = priv_check(td, PRIV_NET_SETIFMTU);
 		if (error)
 			return (error);
 		if (ifr->ifr_mtu < IF_MINMTU || ifr->ifr_mtu > IF_MAXMTU)
 			return (EINVAL);
 		if (ifp->if_ioctl == NULL)
 			return (EOPNOTSUPP);
 		/* Disallow MTU changes on bridge member interfaces. */
 		if (ifp->if_bridge)
 			return (EOPNOTSUPP);
 		error = (*ifp->if_ioctl)(ifp, cmd, data);
 		if (error == 0) {
 			getmicrotime(&ifp->if_lastchange);
 			rt_ifmsg(ifp);
 #ifdef INET
 			DEBUGNET_NOTIFY_MTU(ifp);
 #endif
 		}
 		/*
 		 * If the link MTU changed, do network layer specific procedure.
 		 */
 		if (ifp->if_mtu != oldmtu) {
 #ifdef INET6
 			nd6_setmtu(ifp);
 #endif
 			rt_updatemtu(ifp);
 		}
 		break;
 	}
 
 	case SIOCADDMULTI:
 	case SIOCDELMULTI:
 		if (cmd == SIOCADDMULTI)
 			error = priv_check(td, PRIV_NET_ADDMULTI);
 		else
 			error = priv_check(td, PRIV_NET_DELMULTI);
 		if (error)
 			return (error);
 
 		/* Don't allow group membership on non-multicast interfaces. */
 		if ((ifp->if_flags & IFF_MULTICAST) == 0)
 			return (EOPNOTSUPP);
 
 		/* Don't let users screw up protocols' entries. */
 		if (ifr->ifr_addr.sa_family != AF_LINK)
 			return (EINVAL);
 
 		if (cmd == SIOCADDMULTI) {
 			struct epoch_tracker et;
 			struct ifmultiaddr *ifma;
 
 			/*
 			 * Userland is only permitted to join groups once
 			 * via the if_addmulti() KPI, because it cannot hold
 			 * struct ifmultiaddr * between calls. It may also
 			 * lose a race while we check if the membership
 			 * already exists.
 			 */
 			NET_EPOCH_ENTER(et);
 			ifma = if_findmulti(ifp, &ifr->ifr_addr);
 			NET_EPOCH_EXIT(et);
 			if (ifma != NULL)
 				error = EADDRINUSE;
 			else
 				error = if_addmulti(ifp, &ifr->ifr_addr, &ifma);
 		} else {
 			error = if_delmulti(ifp, &ifr->ifr_addr);
 		}
 		if (error == 0)
 			getmicrotime(&ifp->if_lastchange);
 		break;
 
 	case SIOCSIFPHYADDR:
 	case SIOCDIFPHYADDR:
 #ifdef INET6
 	case SIOCSIFPHYADDR_IN6:
 #endif
 	case SIOCSIFMEDIA:
 	case SIOCSIFGENERIC:
 		error = priv_check(td, PRIV_NET_HWIOCTL);
 		if (error)
 			return (error);
 		if (ifp->if_ioctl == NULL)
 			return (EOPNOTSUPP);
 		error = (*ifp->if_ioctl)(ifp, cmd, data);
 		if (error == 0)
 			getmicrotime(&ifp->if_lastchange);
 		break;
 
 	case SIOCGIFSTATUS:
 	case SIOCGIFPSRCADDR:
 	case SIOCGIFPDSTADDR:
 	case SIOCGIFMEDIA:
 	case SIOCGIFXMEDIA:
 	case SIOCGIFGENERIC:
 	case SIOCGIFRSSKEY:
 	case SIOCGIFRSSHASH:
 	case SIOCGIFDOWNREASON:
 		if (ifp->if_ioctl == NULL)
 			return (EOPNOTSUPP);
 		error = (*ifp->if_ioctl)(ifp, cmd, data);
 		break;
 
 	case SIOCSIFLLADDR:
 		error = priv_check(td, PRIV_NET_SETLLADDR);
 		if (error)
 			return (error);
 		error = if_setlladdr(ifp,
 		    ifr->ifr_addr.sa_data, ifr->ifr_addr.sa_len);
 		break;
 
 	case SIOCGHWADDR:
 		error = if_gethwaddr(ifp, ifr);
 		break;
 
 	case SIOCAIFGROUP:
 		error = priv_check(td, PRIV_NET_ADDIFGROUP);
 		if (error)
 			return (error);
 		error = if_addgroup(ifp,
 		    ((struct ifgroupreq *)data)->ifgr_group);
 		if (error != 0)
 			return (error);
 		break;
 
 	case SIOCGIFGROUP:
 	{
 		struct epoch_tracker et;
 
 		NET_EPOCH_ENTER(et);
 		error = if_getgroup((struct ifgroupreq *)data, ifp);
 		NET_EPOCH_EXIT(et);
 		break;
 	}
 
 	case SIOCDIFGROUP:
 		error = priv_check(td, PRIV_NET_DELIFGROUP);
 		if (error)
 			return (error);
 		error = if_delgroup(ifp,
 		    ((struct ifgroupreq *)data)->ifgr_group);
 		if (error != 0)
 			return (error);
 		break;
 
 	default:
 		error = ENOIOCTL;
 		break;
 	}
 	return (error);
 }
 
 /*
  * Interface ioctls.
  */
 int
 ifioctl(struct socket *so, u_long cmd, caddr_t data, struct thread *td)
 {
 #ifdef COMPAT_FREEBSD32
 	union {
 		struct ifconf ifc;
 		struct ifdrv ifd;
 		struct ifgroupreq ifgr;
 		struct ifmediareq ifmr;
 	} thunk;
 	u_long saved_cmd;
 	struct ifconf32 *ifc32;
 	struct ifdrv32 *ifd32;
 	struct ifgroupreq32 *ifgr32;
 	struct ifmediareq32 *ifmr32;
 #endif
 	struct ifnet *ifp;
 	struct ifreq *ifr;
 	int error;
 	int oif_flags;
 #ifdef VIMAGE
 	bool shutdown;
 #endif
 
 	CURVNET_SET(so->so_vnet);
 #ifdef VIMAGE
 	/* Make sure the VNET is stable. */
 	shutdown = VNET_IS_SHUTTING_DOWN(so->so_vnet);
 	if (shutdown) {
 		CURVNET_RESTORE();
 		return (EBUSY);
 	}
 #endif
 
 #ifdef COMPAT_FREEBSD32
 	saved_cmd = cmd;
 	switch (cmd) {
 	case SIOCGIFCONF32:
 		ifc32 = (struct ifconf32 *)data;
 		thunk.ifc.ifc_len = ifc32->ifc_len;
 		thunk.ifc.ifc_buf = PTRIN(ifc32->ifc_buf);
 		data = (caddr_t)&thunk.ifc;
 		cmd = SIOCGIFCONF;
 		break;
 	case SIOCGDRVSPEC32:
 	case SIOCSDRVSPEC32:
 		ifd32 = (struct ifdrv32 *)data;
 		memcpy(thunk.ifd.ifd_name, ifd32->ifd_name,
 		    sizeof(thunk.ifd.ifd_name));
 		thunk.ifd.ifd_cmd = ifd32->ifd_cmd;
 		thunk.ifd.ifd_len = ifd32->ifd_len;
 		thunk.ifd.ifd_data = PTRIN(ifd32->ifd_data);
 		data = (caddr_t)&thunk.ifd;
 		cmd = _IOC_NEWTYPE(cmd, struct ifdrv);
 		break;
 	case SIOCAIFGROUP32:
 	case SIOCGIFGROUP32:
 	case SIOCDIFGROUP32:
 	case SIOCGIFGMEMB32:
 		ifgr32 = (struct ifgroupreq32 *)data;
 		memcpy(thunk.ifgr.ifgr_name, ifgr32->ifgr_name,
 		    sizeof(thunk.ifgr.ifgr_name));
 		thunk.ifgr.ifgr_len = ifgr32->ifgr_len;
 		switch (cmd) {
 		case SIOCAIFGROUP32:
 		case SIOCDIFGROUP32:
 			memcpy(thunk.ifgr.ifgr_group, ifgr32->ifgr_group,
 			    sizeof(thunk.ifgr.ifgr_group));
 			break;
 		case SIOCGIFGROUP32:
 		case SIOCGIFGMEMB32:
 			thunk.ifgr.ifgr_groups = PTRIN(ifgr32->ifgr_groups);
 			break;
 		}
 		data = (caddr_t)&thunk.ifgr;
 		cmd = _IOC_NEWTYPE(cmd, struct ifgroupreq);
 		break;
 	case SIOCGIFMEDIA32:
 	case SIOCGIFXMEDIA32:
 		ifmr32 = (struct ifmediareq32 *)data;
 		memcpy(thunk.ifmr.ifm_name, ifmr32->ifm_name,
 		    sizeof(thunk.ifmr.ifm_name));
 		thunk.ifmr.ifm_current = ifmr32->ifm_current;
 		thunk.ifmr.ifm_mask = ifmr32->ifm_mask;
 		thunk.ifmr.ifm_status = ifmr32->ifm_status;
 		thunk.ifmr.ifm_active = ifmr32->ifm_active;
 		thunk.ifmr.ifm_count = ifmr32->ifm_count;
 		thunk.ifmr.ifm_ulist = PTRIN(ifmr32->ifm_ulist);
 		data = (caddr_t)&thunk.ifmr;
 		cmd = _IOC_NEWTYPE(cmd, struct ifmediareq);
 		break;
 	}
 #endif
 
 	switch (cmd) {
 	case SIOCGIFCONF:
 		error = ifconf(cmd, data);
 		goto out_noref;
 	}
 
 	ifr = (struct ifreq *)data;
 	switch (cmd) {
 #ifdef VIMAGE
 	case SIOCSIFRVNET:
 		error = priv_check(td, PRIV_NET_SETIFVNET);
 		if (error == 0)
 			error = if_vmove_reclaim(td, ifr->ifr_name,
 			    ifr->ifr_jid);
 		goto out_noref;
 #endif
 	case SIOCIFCREATE:
 	case SIOCIFCREATE2:
 		error = priv_check(td, PRIV_NET_IFCREATE);
 		if (error == 0)
 			error = if_clone_create(ifr->ifr_name,
 			    sizeof(ifr->ifr_name), cmd == SIOCIFCREATE2 ?
 			    ifr_data_get_ptr(ifr) : NULL);
 		goto out_noref;
 	case SIOCIFDESTROY:
 		error = priv_check(td, PRIV_NET_IFDESTROY);
 
 		if (error == 0) {
 			sx_xlock(&ifnet_detach_sxlock);
 			error = if_clone_destroy(ifr->ifr_name);
 			sx_xunlock(&ifnet_detach_sxlock);
 		}
 		goto out_noref;
 
 	case SIOCIFGCLONERS:
 		error = if_clone_list((struct if_clonereq *)data);
 		goto out_noref;
 
 	case SIOCGIFGMEMB:
 		error = if_getgroupmembers((struct ifgroupreq *)data);
 		goto out_noref;
 
 #if defined(INET) || defined(INET6)
 	case SIOCSVH:
 	case SIOCGVH:
 		if (carp_ioctl_p == NULL)
 			error = EPROTONOSUPPORT;
 		else
 			error = (*carp_ioctl_p)(ifr, cmd, td);
 		goto out_noref;
 #endif
 	}
 
 	ifp = ifunit_ref(ifr->ifr_name);
 	if (ifp == NULL) {
 		error = ENXIO;
 		goto out_noref;
 	}
 
 	error = ifhwioctl(cmd, ifp, data, td);
 	if (error != ENOIOCTL)
 		goto out_ref;
 
 	oif_flags = ifp->if_flags;
 	if (so->so_proto == NULL) {
 		error = EOPNOTSUPP;
 		goto out_ref;
 	}
 
 	/*
 	 * Pass the request on to the socket control method, and if the
 	 * latter returns EOPNOTSUPP, directly to the interface.
 	 *
 	 * Make an exception for the legacy SIOCSIF* requests.  Drivers
 	 * trust SIOCSIFADDR et al to come from an already privileged
 	 * layer, and do not perform any credentials checks or input
 	 * validation.
 	 */
 	error = ((*so->so_proto->pr_usrreqs->pru_control)(so, cmd, data,
 	    ifp, td));
 	if (error == EOPNOTSUPP && ifp != NULL && ifp->if_ioctl != NULL &&
 	    cmd != SIOCSIFADDR && cmd != SIOCSIFBRDADDR &&
 	    cmd != SIOCSIFDSTADDR && cmd != SIOCSIFNETMASK)
 		error = (*ifp->if_ioctl)(ifp, cmd, data);
 
 	if ((oif_flags ^ ifp->if_flags) & IFF_UP) {
 #ifdef INET6
 		if (ifp->if_flags & IFF_UP)
 			in6_if_up(ifp);
 #endif
 	}
 
 out_ref:
 	if_rele(ifp);
 out_noref:
 	CURVNET_RESTORE();
 #ifdef COMPAT_FREEBSD32
 	if (error != 0)
 		return (error);
 	switch (saved_cmd) {
 	case SIOCGIFCONF32:
 		ifc32->ifc_len = thunk.ifc.ifc_len;
 		break;
 	case SIOCGDRVSPEC32:
 		/*
 		 * SIOCGDRVSPEC is IOWR, but nothing actually touches
 		 * the struct so just assert that ifd_len (the only
 		 * field it might make sense to update) hasn't
 		 * changed.
 		 */
 		KASSERT(thunk.ifd.ifd_len == ifd32->ifd_len,
 		    ("ifd_len was updated %u -> %zu", ifd32->ifd_len,
 			thunk.ifd.ifd_len));
 		break;
 	case SIOCGIFGROUP32:
 	case SIOCGIFGMEMB32:
 		ifgr32->ifgr_len = thunk.ifgr.ifgr_len;
 		break;
 	case SIOCGIFMEDIA32:
 	case SIOCGIFXMEDIA32:
 		ifmr32->ifm_current = thunk.ifmr.ifm_current;
 		ifmr32->ifm_mask = thunk.ifmr.ifm_mask;
 		ifmr32->ifm_status = thunk.ifmr.ifm_status;
 		ifmr32->ifm_active = thunk.ifmr.ifm_active;
 		ifmr32->ifm_count = thunk.ifmr.ifm_count;
 		break;
 	}
 #endif
 	return (error);
 }
 
 /*
  * The code common to handling reference counted flags,
  * e.g., in ifpromisc() and if_allmulti().
  * The "pflag" argument can specify a permanent mode flag to check,
  * such as IFF_PPROMISC for promiscuous mode; should be 0 if none.
  *
  * Only to be used on stack-owned flags, not driver-owned flags.
  */
 static int
 if_setflag(struct ifnet *ifp, int flag, int pflag, int *refcount, int onswitch)
 {
 	struct ifreq ifr;
 	int error;
 	int oldflags, oldcount;
 
 	/* Sanity checks to catch programming errors */
 	KASSERT((flag & (IFF_DRV_OACTIVE|IFF_DRV_RUNNING)) == 0,
 	    ("%s: setting driver-owned flag %d", __func__, flag));
 
 	if (onswitch)
 		KASSERT(*refcount >= 0,
 		    ("%s: increment negative refcount %d for flag %d",
 		    __func__, *refcount, flag));
 	else
 		KASSERT(*refcount > 0,
 		    ("%s: decrement non-positive refcount %d for flag %d",
 		    __func__, *refcount, flag));
 
 	/* In case this mode is permanent, just touch refcount */
 	if (ifp->if_flags & pflag) {
 		*refcount += onswitch ? 1 : -1;
 		return (0);
 	}
 
 	/* Save ifnet parameters for if_ioctl() may fail */
 	oldcount = *refcount;
 	oldflags = ifp->if_flags;
 
 	/*
 	 * See if we aren't the only and touching refcount is enough.
 	 * Actually toggle interface flag if we are the first or last.
 	 */
 	if (onswitch) {
 		if ((*refcount)++)
 			return (0);
 		ifp->if_flags |= flag;
 	} else {
 		if (--(*refcount))
 			return (0);
 		ifp->if_flags &= ~flag;
 	}
 
 	/* Call down the driver since we've changed interface flags */
 	if (ifp->if_ioctl == NULL) {
 		error = EOPNOTSUPP;
 		goto recover;
 	}
 	ifr.ifr_flags = ifp->if_flags & 0xffff;
 	ifr.ifr_flagshigh = ifp->if_flags >> 16;
 	error = (*ifp->if_ioctl)(ifp, SIOCSIFFLAGS, (caddr_t)&ifr);
 	if (error)
 		goto recover;
 	/* Notify userland that interface flags have changed */
 	rt_ifmsg(ifp);
 	return (0);
 
 recover:
 	/* Recover after driver error */
 	*refcount = oldcount;
 	ifp->if_flags = oldflags;
 	return (error);
 }
 
 /*
  * Set/clear promiscuous mode on interface ifp based on the truth value
  * of pswitch.  The calls are reference counted so that only the first
  * "on" request actually has an effect, as does the final "off" request.
  * Results are undefined if the "off" and "on" requests are not matched.
  */
 int
 ifpromisc(struct ifnet *ifp, int pswitch)
 {
 	int error;
 	int oldflags = ifp->if_flags;
 
 	error = if_setflag(ifp, IFF_PROMISC, IFF_PPROMISC,
 			   &ifp->if_pcount, pswitch);
 	/* If promiscuous mode status has changed, log a message */
 	if (error == 0 && ((ifp->if_flags ^ oldflags) & IFF_PROMISC) &&
             log_promisc_mode_change)
 		if_printf(ifp, "promiscuous mode %s\n",
 		    (ifp->if_flags & IFF_PROMISC) ? "enabled" : "disabled");
 	return (error);
 }
 
 /*
  * Return interface configuration
  * of system.  List may be used
  * in later ioctl's (above) to get
  * other information.
  */
 /*ARGSUSED*/
 static int
 ifconf(u_long cmd, caddr_t data)
 {
 	struct ifconf *ifc = (struct ifconf *)data;
 	struct ifnet *ifp;
 	struct ifaddr *ifa;
 	struct ifreq ifr;
 	struct sbuf *sb;
 	int error, full = 0, valid_len, max_len;
 
 	/* Limit initial buffer size to maxphys to avoid DoS from userspace. */
 	max_len = maxphys - 1;
 
 	/* Prevent hostile input from being able to crash the system */
 	if (ifc->ifc_len <= 0)
 		return (EINVAL);
 
 again:
 	if (ifc->ifc_len <= max_len) {
 		max_len = ifc->ifc_len;
 		full = 1;
 	}
 	sb = sbuf_new(NULL, NULL, max_len + 1, SBUF_FIXEDLEN);
 	max_len = 0;
 	valid_len = 0;
 
 	IFNET_RLOCK();
 	CK_STAILQ_FOREACH(ifp, &V_ifnet, if_link) {
 		struct epoch_tracker et;
 		int addrs;
 
 		/*
 		 * Zero the ifr to make sure we don't disclose the contents
 		 * of the stack.
 		 */
 		memset(&ifr, 0, sizeof(ifr));
 
 		if (strlcpy(ifr.ifr_name, ifp->if_xname, sizeof(ifr.ifr_name))
 		    >= sizeof(ifr.ifr_name)) {
 			sbuf_delete(sb);
 			IFNET_RUNLOCK();
 			return (ENAMETOOLONG);
 		}
 
 		addrs = 0;
 		NET_EPOCH_ENTER(et);
 		CK_STAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) {
 			struct sockaddr *sa = ifa->ifa_addr;
 
 			if (prison_if(curthread->td_ucred, sa) != 0)
 				continue;
 			addrs++;
 			if (sa->sa_len <= sizeof(*sa)) {
 				if (sa->sa_len < sizeof(*sa)) {
 					memset(&ifr.ifr_ifru.ifru_addr, 0,
 					    sizeof(ifr.ifr_ifru.ifru_addr));
 					memcpy(&ifr.ifr_ifru.ifru_addr, sa,
 					    sa->sa_len);
 				} else
 					ifr.ifr_ifru.ifru_addr = *sa;
 				sbuf_bcat(sb, &ifr, sizeof(ifr));
 				max_len += sizeof(ifr);
 			} else {
 				sbuf_bcat(sb, &ifr,
 				    offsetof(struct ifreq, ifr_addr));
 				max_len += offsetof(struct ifreq, ifr_addr);
 				sbuf_bcat(sb, sa, sa->sa_len);
 				max_len += sa->sa_len;
 			}
 
 			if (sbuf_error(sb) == 0)
 				valid_len = sbuf_len(sb);
 		}
 		NET_EPOCH_EXIT(et);
 		if (addrs == 0) {
 			sbuf_bcat(sb, &ifr, sizeof(ifr));
 			max_len += sizeof(ifr);
 
 			if (sbuf_error(sb) == 0)
 				valid_len = sbuf_len(sb);
 		}
 	}
 	IFNET_RUNLOCK();
 
 	/*
 	 * If we didn't allocate enough space (uncommon), try again.  If
 	 * we have already allocated as much space as we are allowed,
 	 * return what we've got.
 	 */
 	if (valid_len != max_len && !full) {
 		sbuf_delete(sb);
 		goto again;
 	}
 
 	ifc->ifc_len = valid_len;
 	sbuf_finish(sb);
 	error = copyout(sbuf_data(sb), ifc->ifc_req, ifc->ifc_len);
 	sbuf_delete(sb);
 	return (error);
 }
 
 /*
  * Just like ifpromisc(), but for all-multicast-reception mode.
  */
 int
 if_allmulti(struct ifnet *ifp, int onswitch)
 {
 
 	return (if_setflag(ifp, IFF_ALLMULTI, 0, &ifp->if_amcount, onswitch));
 }
 
 struct ifmultiaddr *
 if_findmulti(struct ifnet *ifp, const struct sockaddr *sa)
 {
 	struct ifmultiaddr *ifma;
 
 	IF_ADDR_LOCK_ASSERT(ifp);
 
 	CK_STAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) {
 		if (sa->sa_family == AF_LINK) {
 			if (sa_dl_equal(ifma->ifma_addr, sa))
 				break;
 		} else {
 			if (sa_equal(ifma->ifma_addr, sa))
 				break;
 		}
 	}
 
 	return ifma;
 }
 
 /*
  * Allocate a new ifmultiaddr and initialize based on passed arguments.  We
  * make copies of passed sockaddrs.  The ifmultiaddr will not be added to
  * the ifnet multicast address list here, so the caller must do that and
  * other setup work (such as notifying the device driver).  The reference
  * count is initialized to 1.
  */
 static struct ifmultiaddr *
 if_allocmulti(struct ifnet *ifp, struct sockaddr *sa, struct sockaddr *llsa,
     int mflags)
 {
 	struct ifmultiaddr *ifma;
 	struct sockaddr *dupsa;
 
 	ifma = malloc(sizeof *ifma, M_IFMADDR, mflags |
 	    M_ZERO);
 	if (ifma == NULL)
 		return (NULL);
 
 	dupsa = malloc(sa->sa_len, M_IFMADDR, mflags);
 	if (dupsa == NULL) {
 		free(ifma, M_IFMADDR);
 		return (NULL);
 	}
 	bcopy(sa, dupsa, sa->sa_len);
 	ifma->ifma_addr = dupsa;
 
 	ifma->ifma_ifp = ifp;
 	ifma->ifma_refcount = 1;
 	ifma->ifma_protospec = NULL;
 
 	if (llsa == NULL) {
 		ifma->ifma_lladdr = NULL;
 		return (ifma);
 	}
 
 	dupsa = malloc(llsa->sa_len, M_IFMADDR, mflags);
 	if (dupsa == NULL) {
 		free(ifma->ifma_addr, M_IFMADDR);
 		free(ifma, M_IFMADDR);
 		return (NULL);
 	}
 	bcopy(llsa, dupsa, llsa->sa_len);
 	ifma->ifma_lladdr = dupsa;
 
 	return (ifma);
 }
 
 /*
  * if_freemulti: free ifmultiaddr structure and possibly attached related
  * addresses.  The caller is responsible for implementing reference
  * counting, notifying the driver, handling routing messages, and releasing
  * any dependent link layer state.
  */
 #ifdef MCAST_VERBOSE
 extern void kdb_backtrace(void);
 #endif
 static void
 if_freemulti_internal(struct ifmultiaddr *ifma)
 {
 
 	KASSERT(ifma->ifma_refcount == 0, ("if_freemulti: refcount %d",
 	    ifma->ifma_refcount));
 
 	if (ifma->ifma_lladdr != NULL)
 		free(ifma->ifma_lladdr, M_IFMADDR);
 #ifdef MCAST_VERBOSE
 	kdb_backtrace();
 	printf("%s freeing ifma: %p\n", __func__, ifma);
 #endif
 	free(ifma->ifma_addr, M_IFMADDR);
 	free(ifma, M_IFMADDR);
 }
 
 static void
 if_destroymulti(epoch_context_t ctx)
 {
 	struct ifmultiaddr *ifma;
 
 	ifma = __containerof(ctx, struct ifmultiaddr, ifma_epoch_ctx);
 	if_freemulti_internal(ifma);
 }
 
 void
 if_freemulti(struct ifmultiaddr *ifma)
 {
 	KASSERT(ifma->ifma_refcount == 0, ("if_freemulti_epoch: refcount %d",
 	    ifma->ifma_refcount));
 
 	NET_EPOCH_CALL(if_destroymulti, &ifma->ifma_epoch_ctx);
 }
 
 /*
  * Register an additional multicast address with a network interface.
  *
  * - If the address is already present, bump the reference count on the
  *   address and return.
  * - If the address is not link-layer, look up a link layer address.
  * - Allocate address structures for one or both addresses, and attach to the
  *   multicast address list on the interface.  If automatically adding a link
  *   layer address, the protocol address will own a reference to the link
  *   layer address, to be freed when it is freed.
  * - Notify the network device driver of an addition to the multicast address
  *   list.
  *
  * 'sa' points to caller-owned memory with the desired multicast address.
  *
  * 'retifma' will be used to return a pointer to the resulting multicast
  * address reference, if desired.
  */
 int
 if_addmulti(struct ifnet *ifp, struct sockaddr *sa,
     struct ifmultiaddr **retifma)
 {
 	struct ifmultiaddr *ifma, *ll_ifma;
 	struct sockaddr *llsa;
 	struct sockaddr_dl sdl;
 	int error;
 
 #ifdef INET
 	IN_MULTI_LIST_UNLOCK_ASSERT();
 #endif
 #ifdef INET6
 	IN6_MULTI_LIST_UNLOCK_ASSERT();
 #endif
 	/*
 	 * If the address is already present, return a new reference to it;
 	 * otherwise, allocate storage and set up a new address.
 	 */
 	IF_ADDR_WLOCK(ifp);
 	ifma = if_findmulti(ifp, sa);
 	if (ifma != NULL) {
 		ifma->ifma_refcount++;
 		if (retifma != NULL)
 			*retifma = ifma;
 		IF_ADDR_WUNLOCK(ifp);
 		return (0);
 	}
 
 	/*
 	 * The address isn't already present; resolve the protocol address
 	 * into a link layer address, and then look that up, bump its
 	 * refcount or allocate an ifma for that also.
 	 * Most link layer resolving functions returns address data which
 	 * fits inside default sockaddr_dl structure. However callback
 	 * can allocate another sockaddr structure, in that case we need to
 	 * free it later.
 	 */
 	llsa = NULL;
 	ll_ifma = NULL;
 	if (ifp->if_resolvemulti != NULL) {
 		/* Provide called function with buffer size information */
 		sdl.sdl_len = sizeof(sdl);
 		llsa = (struct sockaddr *)&sdl;
 		error = ifp->if_resolvemulti(ifp, &llsa, sa);
 		if (error)
 			goto unlock_out;
 	}
 
 	/*
 	 * Allocate the new address.  Don't hook it up yet, as we may also
 	 * need to allocate a link layer multicast address.
 	 */
 	ifma = if_allocmulti(ifp, sa, llsa, M_NOWAIT);
 	if (ifma == NULL) {
 		error = ENOMEM;
 		goto free_llsa_out;
 	}
 
 	/*
 	 * If a link layer address is found, we'll need to see if it's
 	 * already present in the address list, or allocate is as well.
 	 * When this block finishes, the link layer address will be on the
 	 * list.
 	 */
 	if (llsa != NULL) {
 		ll_ifma = if_findmulti(ifp, llsa);
 		if (ll_ifma == NULL) {
 			ll_ifma = if_allocmulti(ifp, llsa, NULL, M_NOWAIT);
 			if (ll_ifma == NULL) {
 				--ifma->ifma_refcount;
 				if_freemulti(ifma);
 				error = ENOMEM;
 				goto free_llsa_out;
 			}
 			ll_ifma->ifma_flags |= IFMA_F_ENQUEUED;
 			CK_STAILQ_INSERT_HEAD(&ifp->if_multiaddrs, ll_ifma,
 			    ifma_link);
 		} else
 			ll_ifma->ifma_refcount++;
 		ifma->ifma_llifma = ll_ifma;
 	}
 
 	/*
 	 * We now have a new multicast address, ifma, and possibly a new or
 	 * referenced link layer address.  Add the primary address to the
 	 * ifnet address list.
 	 */
 	ifma->ifma_flags |= IFMA_F_ENQUEUED;
 	CK_STAILQ_INSERT_HEAD(&ifp->if_multiaddrs, ifma, ifma_link);
 
 	if (retifma != NULL)
 		*retifma = ifma;
 
 	/*
 	 * Must generate the message while holding the lock so that 'ifma'
 	 * pointer is still valid.
 	 */
 	rt_newmaddrmsg(RTM_NEWMADDR, ifma);
 	IF_ADDR_WUNLOCK(ifp);
 
 	/*
 	 * We are certain we have added something, so call down to the
 	 * interface to let them know about it.
 	 */
 	if (ifp->if_ioctl != NULL) {
 		if (THREAD_CAN_SLEEP())
 			(void )(*ifp->if_ioctl)(ifp, SIOCADDMULTI, 0);
 		else
 			taskqueue_enqueue(taskqueue_swi, &ifp->if_addmultitask);
 	}
 
 	if ((llsa != NULL) && (llsa != (struct sockaddr *)&sdl))
 		link_free_sdl(llsa);
 
 	return (0);
 
 free_llsa_out:
 	if ((llsa != NULL) && (llsa != (struct sockaddr *)&sdl))
 		link_free_sdl(llsa);
 
 unlock_out:
 	IF_ADDR_WUNLOCK(ifp);
 	return (error);
 }
 
 static void
 if_siocaddmulti(void *arg, int pending)
 {
 	struct ifnet *ifp;
 
 	ifp = arg;
 #ifdef DIAGNOSTIC
 	if (pending > 1)
 		if_printf(ifp, "%d SIOCADDMULTI coalesced\n", pending);
 #endif
 	CURVNET_SET(ifp->if_vnet);
 	(void )(*ifp->if_ioctl)(ifp, SIOCADDMULTI, 0);
 	CURVNET_RESTORE();
 }
 
 /*
  * Delete a multicast group membership by network-layer group address.
  *
  * Returns ENOENT if the entry could not be found. If ifp no longer
  * exists, results are undefined. This entry point should only be used
  * from subsystems which do appropriate locking to hold ifp for the
  * duration of the call.
  * Network-layer protocol domains must use if_delmulti_ifma().
  */
 int
 if_delmulti(struct ifnet *ifp, struct sockaddr *sa)
 {
 	struct ifmultiaddr *ifma;
 	int lastref;
 
 	KASSERT(ifp, ("%s: NULL ifp", __func__));
 
 	IF_ADDR_WLOCK(ifp);
 	lastref = 0;
 	ifma = if_findmulti(ifp, sa);
 	if (ifma != NULL)
 		lastref = if_delmulti_locked(ifp, ifma, 0);
 	IF_ADDR_WUNLOCK(ifp);
 
 	if (ifma == NULL)
 		return (ENOENT);
 
 	if (lastref && ifp->if_ioctl != NULL) {
 		(void)(*ifp->if_ioctl)(ifp, SIOCDELMULTI, 0);
 	}
 
 	return (0);
 }
 
 /*
  * Delete all multicast group membership for an interface.
  * Should be used to quickly flush all multicast filters.
  */
 void
 if_delallmulti(struct ifnet *ifp)
 {
 	struct ifmultiaddr *ifma;
 	struct ifmultiaddr *next;
 
 	IF_ADDR_WLOCK(ifp);
 	CK_STAILQ_FOREACH_SAFE(ifma, &ifp->if_multiaddrs, ifma_link, next)
 		if_delmulti_locked(ifp, ifma, 0);
 	IF_ADDR_WUNLOCK(ifp);
 }
 
 void
 if_delmulti_ifma(struct ifmultiaddr *ifma)
 {
 	if_delmulti_ifma_flags(ifma, 0);
 }
 
 /*
  * Delete a multicast group membership by group membership pointer.
  * Network-layer protocol domains must use this routine.
  *
  * It is safe to call this routine if the ifp disappeared.
  */
 void
 if_delmulti_ifma_flags(struct ifmultiaddr *ifma, int flags)
 {
 	struct ifnet *ifp;
 	int lastref;
 	MCDPRINTF("%s freeing ifma: %p\n", __func__, ifma);
 #ifdef INET
 	IN_MULTI_LIST_UNLOCK_ASSERT();
 #endif
 	ifp = ifma->ifma_ifp;
 #ifdef DIAGNOSTIC
 	if (ifp == NULL) {
 		printf("%s: ifma_ifp seems to be detached\n", __func__);
 	} else {
 		struct epoch_tracker et;
 		struct ifnet *oifp;
 
 		NET_EPOCH_ENTER(et);
 		CK_STAILQ_FOREACH(oifp, &V_ifnet, if_link)
 			if (ifp == oifp)
 				break;
 		NET_EPOCH_EXIT(et);
 		if (ifp != oifp)
 			ifp = NULL;
 	}
 #endif
 	/*
 	 * If and only if the ifnet instance exists: Acquire the address lock.
 	 */
 	if (ifp != NULL)
 		IF_ADDR_WLOCK(ifp);
 
 	lastref = if_delmulti_locked(ifp, ifma, flags);
 
 	if (ifp != NULL) {
 		/*
 		 * If and only if the ifnet instance exists:
 		 *  Release the address lock.
 		 *  If the group was left: update the hardware hash filter.
 		 */
 		IF_ADDR_WUNLOCK(ifp);
 		if (lastref && ifp->if_ioctl != NULL) {
 			(void)(*ifp->if_ioctl)(ifp, SIOCDELMULTI, 0);
 		}
 	}
 }
 
 /*
  * Perform deletion of network-layer and/or link-layer multicast address.
  *
  * Return 0 if the reference count was decremented.
  * Return 1 if the final reference was released, indicating that the
  * hardware hash filter should be reprogrammed.
  */
 static int
 if_delmulti_locked(struct ifnet *ifp, struct ifmultiaddr *ifma, int detaching)
 {
 	struct ifmultiaddr *ll_ifma;
 
 	if (ifp != NULL && ifma->ifma_ifp != NULL) {
 		KASSERT(ifma->ifma_ifp == ifp,
 		    ("%s: inconsistent ifp %p", __func__, ifp));
 		IF_ADDR_WLOCK_ASSERT(ifp);
 	}
 
 	ifp = ifma->ifma_ifp;
 	MCDPRINTF("%s freeing %p from %s \n", __func__, ifma, ifp ? ifp->if_xname : "");
 
 	/*
 	 * If the ifnet is detaching, null out references to ifnet,
 	 * so that upper protocol layers will notice, and not attempt
 	 * to obtain locks for an ifnet which no longer exists. The
 	 * routing socket announcement must happen before the ifnet
 	 * instance is detached from the system.
 	 */
 	if (detaching) {
 #ifdef DIAGNOSTIC
 		printf("%s: detaching ifnet instance %p\n", __func__, ifp);
 #endif
 		/*
 		 * ifp may already be nulled out if we are being reentered
 		 * to delete the ll_ifma.
 		 */
 		if (ifp != NULL) {
 			rt_newmaddrmsg(RTM_DELMADDR, ifma);
 			ifma->ifma_ifp = NULL;
 		}
 	}
 
 	if (--ifma->ifma_refcount > 0)
 		return 0;
 
 	if (ifp != NULL && detaching == 0 && (ifma->ifma_flags & IFMA_F_ENQUEUED)) {
 		CK_STAILQ_REMOVE(&ifp->if_multiaddrs, ifma, ifmultiaddr, ifma_link);
 		ifma->ifma_flags &= ~IFMA_F_ENQUEUED;
 	}
 	/*
 	 * If this ifma is a network-layer ifma, a link-layer ifma may
 	 * have been associated with it. Release it first if so.
 	 */
 	ll_ifma = ifma->ifma_llifma;
 	if (ll_ifma != NULL) {
 		KASSERT(ifma->ifma_lladdr != NULL,
 		    ("%s: llifma w/o lladdr", __func__));
 		if (detaching)
 			ll_ifma->ifma_ifp = NULL;	/* XXX */
 		if (--ll_ifma->ifma_refcount == 0) {
 			if (ifp != NULL) {
 				if (ll_ifma->ifma_flags & IFMA_F_ENQUEUED) {
 					CK_STAILQ_REMOVE(&ifp->if_multiaddrs, ll_ifma, ifmultiaddr,
 						ifma_link);
 					ll_ifma->ifma_flags &= ~IFMA_F_ENQUEUED;
 				}
 			}
 			if_freemulti(ll_ifma);
 		}
 	}
 #ifdef INVARIANTS
 	if (ifp) {
 		struct ifmultiaddr *ifmatmp;
 
 		CK_STAILQ_FOREACH(ifmatmp, &ifp->if_multiaddrs, ifma_link)
 			MPASS(ifma != ifmatmp);
 	}
 #endif
 	if_freemulti(ifma);
 	/*
 	 * The last reference to this instance of struct ifmultiaddr
 	 * was released; the hardware should be notified of this change.
 	 */
 	return 1;
 }
 
 /*
  * Set the link layer address on an interface.
  *
  * At this time we only support certain types of interfaces,
  * and we don't allow the length of the address to change.
  *
  * Set noinline to be dtrace-friendly
  */
 __noinline int
 if_setlladdr(struct ifnet *ifp, const u_char *lladdr, int len)
 {
 	struct sockaddr_dl *sdl;
 	struct ifaddr *ifa;
 	struct ifreq ifr;
 
 	ifa = ifp->if_addr;
 	if (ifa == NULL)
 		return (EINVAL);
 
 	sdl = (struct sockaddr_dl *)ifa->ifa_addr;
 	if (sdl == NULL)
 		return (EINVAL);
 
 	if (len != sdl->sdl_alen)	/* don't allow length to change */
 		return (EINVAL);
 
 	switch (ifp->if_type) {
 	case IFT_ETHER:
 	case IFT_XETHER:
 	case IFT_L2VLAN:
 	case IFT_BRIDGE:
 	case IFT_IEEE8023ADLAG:
 		bcopy(lladdr, LLADDR(sdl), len);
 		break;
 	default:
 		return (ENODEV);
 	}
 
 	/*
 	 * If the interface is already up, we need
 	 * to re-init it in order to reprogram its
 	 * address filter.
 	 */
 	if ((ifp->if_flags & IFF_UP) != 0) {
 		if (ifp->if_ioctl) {
 			ifp->if_flags &= ~IFF_UP;
 			ifr.ifr_flags = ifp->if_flags & 0xffff;
 			ifr.ifr_flagshigh = ifp->if_flags >> 16;
 			(*ifp->if_ioctl)(ifp, SIOCSIFFLAGS, (caddr_t)&ifr);
 			ifp->if_flags |= IFF_UP;
 			ifr.ifr_flags = ifp->if_flags & 0xffff;
 			ifr.ifr_flagshigh = ifp->if_flags >> 16;
 			(*ifp->if_ioctl)(ifp, SIOCSIFFLAGS, (caddr_t)&ifr);
 		}
 	}
 	EVENTHANDLER_INVOKE(iflladdr_event, ifp);
 
 	return (0);
 }
 
 /*
  * Compat function for handling basic encapsulation requests.
  * Not converted stacks (FDDI, IB, ..) supports traditional
  * output model: ARP (and other similar L2 protocols) are handled
  * inside output routine, arpresolve/nd6_resolve() returns MAC
  * address instead of full prepend.
  *
  * This function creates calculated header==MAC for IPv4/IPv6 and
  * returns EAFNOSUPPORT (which is then handled in ARP code) for other
  * address families.
  */
 static int
 if_requestencap_default(struct ifnet *ifp, struct if_encap_req *req)
 {
 
 	if (req->rtype != IFENCAP_LL)
 		return (EOPNOTSUPP);
 
 	if (req->bufsize < req->lladdr_len)
 		return (ENOMEM);
 
 	switch (req->family) {
 	case AF_INET:
 	case AF_INET6:
 		break;
 	default:
 		return (EAFNOSUPPORT);
 	}
 
 	/* Copy lladdr to storage as is */
 	memmove(req->buf, req->lladdr, req->lladdr_len);
 	req->bufsize = req->lladdr_len;
 	req->lladdr_off = 0;
 
 	return (0);
 }
 
 /*
  * Tunnel interfaces can nest, also they may cause infinite recursion
  * calls when misconfigured. We'll prevent this by detecting loops.
  * High nesting level may cause stack exhaustion. We'll prevent this
  * by introducing upper limit.
  *
  * Return 0, if tunnel nesting count is equal or less than limit.
  */
 int
 if_tunnel_check_nesting(struct ifnet *ifp, struct mbuf *m, uint32_t cookie,
     int limit)
 {
 	struct m_tag *mtag;
 	int count;
 
 	count = 1;
 	mtag = NULL;
 	while ((mtag = m_tag_locate(m, cookie, 0, mtag)) != NULL) {
 		if (*(struct ifnet **)(mtag + 1) == ifp) {
 			log(LOG_NOTICE, "%s: loop detected\n", if_name(ifp));
 			return (EIO);
 		}
 		count++;
 	}
 	if (count > limit) {
 		log(LOG_NOTICE,
 		    "%s: if_output recursively called too many times(%d)\n",
 		    if_name(ifp), count);
 		return (EIO);
 	}
 	mtag = m_tag_alloc(cookie, 0, sizeof(struct ifnet *), M_NOWAIT);
 	if (mtag == NULL)
 		return (ENOMEM);
 	*(struct ifnet **)(mtag + 1) = ifp;
 	m_tag_prepend(m, mtag);
 	return (0);
 }
 
 /*
  * Get the link layer address that was read from the hardware at attach.
  *
  * This is only set by Ethernet NICs (IFT_ETHER), but laggX interfaces re-type
  * their component interfaces as IFT_IEEE8023ADLAG.
  */
 int
 if_gethwaddr(struct ifnet *ifp, struct ifreq *ifr)
 {
 
 	if (ifp->if_hw_addr == NULL)
 		return (ENODEV);
 
 	switch (ifp->if_type) {
 	case IFT_ETHER:
 	case IFT_IEEE8023ADLAG:
 		bcopy(ifp->if_hw_addr, ifr->ifr_addr.sa_data, ifp->if_addrlen);
 		return (0);
 	default:
 		return (ENODEV);
 	}
 }
 
 /*
  * The name argument must be a pointer to storage which will last as
  * long as the interface does.  For physical devices, the result of
  * device_get_name(dev) is a good choice and for pseudo-devices a
  * static string works well.
  */
 void
 if_initname(struct ifnet *ifp, const char *name, int unit)
 {
 	ifp->if_dname = name;
 	ifp->if_dunit = unit;
 	if (unit != IF_DUNIT_NONE)
 		snprintf(ifp->if_xname, IFNAMSIZ, "%s%d", name, unit);
 	else
 		strlcpy(ifp->if_xname, name, IFNAMSIZ);
 }
 
 static int
 if_vlog(struct ifnet *ifp, int pri, const char *fmt, va_list ap)
 {
 	char if_fmt[256];
 
 	snprintf(if_fmt, sizeof(if_fmt), "%s: %s", ifp->if_xname, fmt);
 	vlog(pri, if_fmt, ap);
 	return (0);
 }
 
 
 int
 if_printf(struct ifnet *ifp, const char *fmt, ...)
 {
 	va_list ap;
 
 	va_start(ap, fmt);
 	if_vlog(ifp, LOG_INFO, fmt, ap);
 	va_end(ap);
 	return (0);
 }
 
 int
 if_log(struct ifnet *ifp, int pri, const char *fmt, ...)
 {
 	va_list ap;
 
 	va_start(ap, fmt);
 	if_vlog(ifp, pri, fmt, ap);
 	va_end(ap);
 	return (0);
 }
 
 void
 if_start(struct ifnet *ifp)
 {
 
 	(*(ifp)->if_start)(ifp);
 }
 
 /*
  * Backwards compatibility interface for drivers 
  * that have not implemented it
  */
 static int
 if_transmit(struct ifnet *ifp, struct mbuf *m)
 {
 	int error;
 
 	IFQ_HANDOFF(ifp, m, error);
 	return (error);
 }
 
 static void
 if_input_default(struct ifnet *ifp __unused, struct mbuf *m)
 {
 
 	m_freem(m);
 }
 
 int
 if_handoff(struct ifqueue *ifq, struct mbuf *m, struct ifnet *ifp, int adjust)
 {
 	int active = 0;
 
 	IF_LOCK(ifq);
 	if (_IF_QFULL(ifq)) {
 		IF_UNLOCK(ifq);
 		if_inc_counter(ifp, IFCOUNTER_OQDROPS, 1);
 		m_freem(m);
 		return (0);
 	}
 	if (ifp != NULL) {
 		if_inc_counter(ifp, IFCOUNTER_OBYTES, m->m_pkthdr.len + adjust);
 		if (m->m_flags & (M_BCAST|M_MCAST))
 			if_inc_counter(ifp, IFCOUNTER_OMCASTS, 1);
 		active = ifp->if_drv_flags & IFF_DRV_OACTIVE;
 	}
 	_IF_ENQUEUE(ifq, m);
 	IF_UNLOCK(ifq);
 	if (ifp != NULL && !active)
 		(*(ifp)->if_start)(ifp);
 	return (1);
 }
 
 void
 if_register_com_alloc(u_char type,
     if_com_alloc_t *a, if_com_free_t *f)
 {
 
 	KASSERT(if_com_alloc[type] == NULL,
 	    ("if_register_com_alloc: %d already registered", type));
 	KASSERT(if_com_free[type] == NULL,
 	    ("if_register_com_alloc: %d free already registered", type));
 
 	if_com_alloc[type] = a;
 	if_com_free[type] = f;
 }
 
 void
 if_deregister_com_alloc(u_char type)
 {
 
 	KASSERT(if_com_alloc[type] != NULL,
 	    ("if_deregister_com_alloc: %d not registered", type));
 	KASSERT(if_com_free[type] != NULL,
 	    ("if_deregister_com_alloc: %d free not registered", type));
 
 	/*
 	 * Ensure all pending EPOCH(9) callbacks have been executed. This
 	 * fixes issues about late invocation of if_destroy(), which leads
 	 * to memory leak from if_com_alloc[type] allocated if_l2com.
 	 */
 	NET_EPOCH_DRAIN_CALLBACKS();
 
 	if_com_alloc[type] = NULL;
 	if_com_free[type] = NULL;
 }
 
 /* API for driver access to network stack owned ifnet.*/
 uint64_t
 if_setbaudrate(struct ifnet *ifp, uint64_t baudrate)
 {
 	uint64_t oldbrate;
 
 	oldbrate = ifp->if_baudrate;
 	ifp->if_baudrate = baudrate;
 	return (oldbrate);
 }
 
 uint64_t
 if_getbaudrate(if_t ifp)
 {
 
 	return (((struct ifnet *)ifp)->if_baudrate);
 }
 
 int
 if_setcapabilities(if_t ifp, int capabilities)
 {
 	((struct ifnet *)ifp)->if_capabilities = capabilities;
 	return (0);
 }
 
 int
 if_setcapabilitiesbit(if_t ifp, int setbit, int clearbit)
 {
 	((struct ifnet *)ifp)->if_capabilities |= setbit;
 	((struct ifnet *)ifp)->if_capabilities &= ~clearbit;
 
 	return (0);
 }
 
 int
 if_getcapabilities(if_t ifp)
 {
 	return ((struct ifnet *)ifp)->if_capabilities;
 }
 
 int 
 if_setcapenable(if_t ifp, int capabilities)
 {
 	((struct ifnet *)ifp)->if_capenable = capabilities;
 	return (0);
 }
 
 int 
 if_setcapenablebit(if_t ifp, int setcap, int clearcap)
 {
 	if(setcap) 
 		((struct ifnet *)ifp)->if_capenable |= setcap;
 	if(clearcap)
 		((struct ifnet *)ifp)->if_capenable &= ~clearcap;
 
 	return (0);
 }
 
 const char *
 if_getdname(if_t ifp)
 {
 	return ((struct ifnet *)ifp)->if_dname;
 }
 
 int 
 if_togglecapenable(if_t ifp, int togglecap)
 {
 	((struct ifnet *)ifp)->if_capenable ^= togglecap;
 	return (0);
 }
 
 int
 if_getcapenable(if_t ifp)
 {
 	return ((struct ifnet *)ifp)->if_capenable;
 }
 
 /*
  * This is largely undesirable because it ties ifnet to a device, but does
  * provide flexiblity for an embedded product vendor. Should be used with
  * the understanding that it violates the interface boundaries, and should be
  * a last resort only.
  */
 int
 if_setdev(if_t ifp, void *dev)
 {
 	return (0);
 }
 
 int
 if_setdrvflagbits(if_t ifp, int set_flags, int clear_flags)
 {
 	((struct ifnet *)ifp)->if_drv_flags |= set_flags;
 	((struct ifnet *)ifp)->if_drv_flags &= ~clear_flags;
 
 	return (0);
 }
 
 int
 if_getdrvflags(if_t ifp)
 {
 	return ((struct ifnet *)ifp)->if_drv_flags;
 }
 
 int
 if_setdrvflags(if_t ifp, int flags)
 {
 	((struct ifnet *)ifp)->if_drv_flags = flags;
 	return (0);
 }
 
 int
 if_setflags(if_t ifp, int flags)
 {
 
 	ifp->if_flags = flags;
 	return (0);
 }
 
 int
 if_setflagbits(if_t ifp, int set, int clear)
 {
 	((struct ifnet *)ifp)->if_flags |= set;
 	((struct ifnet *)ifp)->if_flags &= ~clear;
 
 	return (0);
 }
 
 int
 if_getflags(if_t ifp)
 {
 	return ((struct ifnet *)ifp)->if_flags;
 }
 
 int
 if_clearhwassist(if_t ifp)
 {
 	((struct ifnet *)ifp)->if_hwassist = 0;
 	return (0);
 }
 
 int
 if_sethwassistbits(if_t ifp, int toset, int toclear)
 {
 	((struct ifnet *)ifp)->if_hwassist |= toset;
 	((struct ifnet *)ifp)->if_hwassist &= ~toclear;
 
 	return (0);
 }
 
 int
 if_sethwassist(if_t ifp, int hwassist_bit)
 {
 	((struct ifnet *)ifp)->if_hwassist = hwassist_bit;
 	return (0);
 }
 
 int
 if_gethwassist(if_t ifp)
 {
 	return ((struct ifnet *)ifp)->if_hwassist;
 }
 
 int
 if_setmtu(if_t ifp, int mtu)
 {
 	((struct ifnet *)ifp)->if_mtu = mtu;
 	return (0);
 }
 
 int
 if_getmtu(if_t ifp)
 {
 	return ((struct ifnet *)ifp)->if_mtu;
 }
 
 int
 if_getmtu_family(if_t ifp, int family)
 {
 	struct domain *dp;
 
 	for (dp = domains; dp; dp = dp->dom_next) {
 		if (dp->dom_family == family && dp->dom_ifmtu != NULL)
 			return (dp->dom_ifmtu((struct ifnet *)ifp));
 	}
 
 	return (((struct ifnet *)ifp)->if_mtu);
 }
 
 /*
  * Methods for drivers to access interface unicast and multicast
  * link level addresses.  Driver shall not know 'struct ifaddr' neither
  * 'struct ifmultiaddr'.
  */
 u_int
 if_lladdr_count(if_t ifp)
 {
 	struct epoch_tracker et;
 	struct ifaddr *ifa;
 	u_int count;
 
 	count = 0;
 	NET_EPOCH_ENTER(et);
 	CK_STAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link)
 		if (ifa->ifa_addr->sa_family == AF_LINK)
 			count++;
 	NET_EPOCH_EXIT(et);
 
 	return (count);
 }
 
 u_int
 if_foreach_lladdr(if_t ifp, iflladdr_cb_t cb, void *cb_arg)
 {
 	struct epoch_tracker et;
 	struct ifaddr *ifa;
 	u_int count;
 
 	MPASS(cb);
 
 	count = 0;
 	NET_EPOCH_ENTER(et);
 	CK_STAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) {
 		if (ifa->ifa_addr->sa_family != AF_LINK)
 			continue;
 		count += (*cb)(cb_arg, (struct sockaddr_dl *)ifa->ifa_addr,
 		    count);
 	}
 	NET_EPOCH_EXIT(et);
 
 	return (count);
 }
 
 u_int
 if_llmaddr_count(if_t ifp)
 {
 	struct epoch_tracker et;
 	struct ifmultiaddr *ifma;
 	int count;
 
 	count = 0;
 	NET_EPOCH_ENTER(et);
 	CK_STAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link)
 		if (ifma->ifma_addr->sa_family == AF_LINK)
 			count++;
 	NET_EPOCH_EXIT(et);
 
 	return (count);
 }
 
 u_int
 if_foreach_llmaddr(if_t ifp, iflladdr_cb_t cb, void *cb_arg)
 {
 	struct epoch_tracker et;
 	struct ifmultiaddr *ifma;
 	u_int count;
 
 	MPASS(cb);
 
 	count = 0;
 	NET_EPOCH_ENTER(et);
 	CK_STAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) {
 		if (ifma->ifma_addr->sa_family != AF_LINK)
 			continue;
 		count += (*cb)(cb_arg, (struct sockaddr_dl *)ifma->ifma_addr,
 		    count);
 	}
 	NET_EPOCH_EXIT(et);
 
 	return (count);
 }
 
 int
 if_setsoftc(if_t ifp, void *softc)
 {
 	((struct ifnet *)ifp)->if_softc = softc;
 	return (0);
 }
 
 void *
 if_getsoftc(if_t ifp)
 {
 	return ((struct ifnet *)ifp)->if_softc;
 }
 
 void 
 if_setrcvif(struct mbuf *m, if_t ifp)
 {
 
 	MPASS((m->m_pkthdr.csum_flags & CSUM_SND_TAG) == 0);
 	m->m_pkthdr.rcvif = (struct ifnet *)ifp;
 }
 
 void 
 if_setvtag(struct mbuf *m, uint16_t tag)
 {
 	m->m_pkthdr.ether_vtag = tag;	
 }
 
 uint16_t
 if_getvtag(struct mbuf *m)
 {
 
 	return (m->m_pkthdr.ether_vtag);
 }
 
 int
 if_sendq_empty(if_t ifp)
 {
 	return IFQ_DRV_IS_EMPTY(&((struct ifnet *)ifp)->if_snd);
 }
 
 struct ifaddr *
 if_getifaddr(if_t ifp)
 {
 	return ((struct ifnet *)ifp)->if_addr;
 }
 
 int
 if_getamcount(if_t ifp)
 {
 	return ((struct ifnet *)ifp)->if_amcount;
 }
 
 int
 if_setsendqready(if_t ifp)
 {
 	IFQ_SET_READY(&((struct ifnet *)ifp)->if_snd);
 	return (0);
 }
 
 int
 if_setsendqlen(if_t ifp, int tx_desc_count)
 {
 	IFQ_SET_MAXLEN(&((struct ifnet *)ifp)->if_snd, tx_desc_count);
 	((struct ifnet *)ifp)->if_snd.ifq_drv_maxlen = tx_desc_count;
 
 	return (0);
 }
 
 int
 if_vlantrunkinuse(if_t ifp)
 {
 	return ((struct ifnet *)ifp)->if_vlantrunk != NULL?1:0;
 }
 
 int
 if_input(if_t ifp, struct mbuf* sendmp)
 {
 	(*((struct ifnet *)ifp)->if_input)((struct ifnet *)ifp, sendmp);
 	return (0);
 
 }
 
 struct mbuf *
 if_dequeue(if_t ifp)
 {
 	struct mbuf *m;
 	IFQ_DRV_DEQUEUE(&((struct ifnet *)ifp)->if_snd, m);
 
 	return (m);
 }
 
 int
 if_sendq_prepend(if_t ifp, struct mbuf *m)
 {
 	IFQ_DRV_PREPEND(&((struct ifnet *)ifp)->if_snd, m);
 	return (0);
 }
 
 int
 if_setifheaderlen(if_t ifp, int len)
 {
 	((struct ifnet *)ifp)->if_hdrlen = len;
 	return (0);
 }
 
 caddr_t
 if_getlladdr(if_t ifp)
 {
 	return (IF_LLADDR((struct ifnet *)ifp));
 }
 
 void *
 if_gethandle(u_char type)
 {
 	return (if_alloc(type));
 }
 
 void
 if_bpfmtap(if_t ifh, struct mbuf *m)
 {
 	struct ifnet *ifp = (struct ifnet *)ifh;
 
 	BPF_MTAP(ifp, m);
 }
 
 void
 if_etherbpfmtap(if_t ifh, struct mbuf *m)
 {
 	struct ifnet *ifp = (struct ifnet *)ifh;
 
 	ETHER_BPF_MTAP(ifp, m);
 }
 
 void
 if_vlancap(if_t ifh)
 {
 	struct ifnet *ifp = (struct ifnet *)ifh;
 	VLAN_CAPABILITIES(ifp);
 }
 
 int
 if_sethwtsomax(if_t ifp, u_int if_hw_tsomax)
 {
 
 	((struct ifnet *)ifp)->if_hw_tsomax = if_hw_tsomax;
         return (0);
 }
 
 int
 if_sethwtsomaxsegcount(if_t ifp, u_int if_hw_tsomaxsegcount)
 {
 
 	((struct ifnet *)ifp)->if_hw_tsomaxsegcount = if_hw_tsomaxsegcount;
         return (0);
 }
 
 int
 if_sethwtsomaxsegsize(if_t ifp, u_int if_hw_tsomaxsegsize)
 {
 
 	((struct ifnet *)ifp)->if_hw_tsomaxsegsize = if_hw_tsomaxsegsize;
         return (0);
 }
 
 u_int
 if_gethwtsomax(if_t ifp)
 {
 
 	return (((struct ifnet *)ifp)->if_hw_tsomax);
 }
 
 u_int
 if_gethwtsomaxsegcount(if_t ifp)
 {
 
 	return (((struct ifnet *)ifp)->if_hw_tsomaxsegcount);
 }
 
 u_int
 if_gethwtsomaxsegsize(if_t ifp)
 {
 
 	return (((struct ifnet *)ifp)->if_hw_tsomaxsegsize);
 }
 
 void
 if_setinitfn(if_t ifp, void (*init_fn)(void *))
 {
 	((struct ifnet *)ifp)->if_init = init_fn;
 }
 
 void
 if_setioctlfn(if_t ifp, int (*ioctl_fn)(if_t, u_long, caddr_t))
 {
 	((struct ifnet *)ifp)->if_ioctl = (void *)ioctl_fn;
 }
 
 void
 if_setstartfn(if_t ifp, void (*start_fn)(if_t))
 {
 	((struct ifnet *)ifp)->if_start = (void *)start_fn;
 }
 
 void
 if_settransmitfn(if_t ifp, if_transmit_fn_t start_fn)
 {
 	((struct ifnet *)ifp)->if_transmit = start_fn;
 }
 
 void if_setqflushfn(if_t ifp, if_qflush_fn_t flush_fn)
 {
 	((struct ifnet *)ifp)->if_qflush = flush_fn;
 
 }
 
 void
 if_setgetcounterfn(if_t ifp, if_get_counter_t fn)
 {
 
 	ifp->if_get_counter = fn;
 }
 
 #ifdef DDB
 static void
 if_show_ifnet(struct ifnet *ifp)
 {
 
 	if (ifp == NULL)
 		return;
 	db_printf("%s:\n", ifp->if_xname);
 #define	IF_DB_PRINTF(f, e)	db_printf("   %s = " f "\n", #e, ifp->e);
 	IF_DB_PRINTF("%s", if_dname);
 	IF_DB_PRINTF("%d", if_dunit);
 	IF_DB_PRINTF("%s", if_description);
 	IF_DB_PRINTF("%u", if_index);
 	IF_DB_PRINTF("%d", if_idxgen);
 	IF_DB_PRINTF("%u", if_refcount);
 	IF_DB_PRINTF("%p", if_softc);
 	IF_DB_PRINTF("%p", if_l2com);
 	IF_DB_PRINTF("%p", if_llsoftc);
 	IF_DB_PRINTF("%d", if_amcount);
 	IF_DB_PRINTF("%p", if_addr);
 	IF_DB_PRINTF("%p", if_broadcastaddr);
 	IF_DB_PRINTF("%p", if_afdata);
 	IF_DB_PRINTF("%d", if_afdata_initialized);
 	IF_DB_PRINTF("%u", if_fib);
 	IF_DB_PRINTF("%p", if_vnet);
 	IF_DB_PRINTF("%p", if_home_vnet);
 	IF_DB_PRINTF("%p", if_vlantrunk);
 	IF_DB_PRINTF("%p", if_bpf);
 	IF_DB_PRINTF("%u", if_pcount);
 	IF_DB_PRINTF("%p", if_bridge);
 	IF_DB_PRINTF("%p", if_lagg);
 	IF_DB_PRINTF("%p", if_pf_kif);
 	IF_DB_PRINTF("%p", if_carp);
 	IF_DB_PRINTF("%p", if_label);
 	IF_DB_PRINTF("%p", if_netmap);
 	IF_DB_PRINTF("0x%08x", if_flags);
 	IF_DB_PRINTF("0x%08x", if_drv_flags);
 	IF_DB_PRINTF("0x%08x", if_capabilities);
 	IF_DB_PRINTF("0x%08x", if_capenable);
 	IF_DB_PRINTF("%p", if_snd.ifq_head);
 	IF_DB_PRINTF("%p", if_snd.ifq_tail);
 	IF_DB_PRINTF("%d", if_snd.ifq_len);
 	IF_DB_PRINTF("%d", if_snd.ifq_maxlen);
 	IF_DB_PRINTF("%p", if_snd.ifq_drv_head);
 	IF_DB_PRINTF("%p", if_snd.ifq_drv_tail);
 	IF_DB_PRINTF("%d", if_snd.ifq_drv_len);
 	IF_DB_PRINTF("%d", if_snd.ifq_drv_maxlen);
 	IF_DB_PRINTF("%d", if_snd.altq_type);
 	IF_DB_PRINTF("%x", if_snd.altq_flags);
 #undef IF_DB_PRINTF
 }
 
 DB_SHOW_COMMAND(ifnet, db_show_ifnet)
 {
 
 	if (!have_addr) {
 		db_printf("usage: show ifnet <struct ifnet *>\n");
 		return;
 	}
 
 	if_show_ifnet((struct ifnet *)addr);
 }
 
 DB_SHOW_ALL_COMMAND(ifnets, db_show_all_ifnets)
 {
 	struct ifnet *ifp;
 	u_short idx;
 
 	for (idx = 1; idx <= if_index; idx++) {
 		ifp = ifindex_table[idx].ife_ifnet;
 		if (ifp == NULL)
 			continue;
 		db_printf( "%20s ifp=%p\n", ifp->if_xname, ifp);
 		if (db_pager_quit)
 			break;
 	}
 }
 #endif	/* DDB */
diff --git a/sys/netinet/in.c b/sys/netinet/in.c
index 1c44623bdec1..e394af68ac23 100644
--- a/sys/netinet/in.c
+++ b/sys/netinet/in.c
@@ -1,1797 +1,1852 @@
 /*-
  * SPDX-License-Identifier: BSD-3-Clause
  *
  * Copyright (c) 1982, 1986, 1991, 1993
  *	The Regents of the University of California.  All rights reserved.
  * Copyright (C) 2001 WIDE Project.  All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 3. Neither the name of the University nor the names of its contributors
  *    may be used to endorse or promote products derived from this software
  *    without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  *	@(#)in.c	8.4 (Berkeley) 1/9/95
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include "opt_inet.h"
 
 #define IN_HISTORICAL_NETS		/* include class masks */
 
 #include <sys/param.h>
 #include <sys/eventhandler.h>
 #include <sys/systm.h>
 #include <sys/sockio.h>
 #include <sys/malloc.h>
 #include <sys/priv.h>
 #include <sys/socket.h>
 #include <sys/jail.h>
 #include <sys/kernel.h>
 #include <sys/lock.h>
 #include <sys/proc.h>
 #include <sys/sysctl.h>
 #include <sys/syslog.h>
 #include <sys/sx.h>
 
 #include <net/if.h>
 #include <net/if_var.h>
 #include <net/if_arp.h>
 #include <net/if_dl.h>
 #include <net/if_llatbl.h>
 #include <net/if_types.h>
 #include <net/route.h>
 #include <net/route/nhop.h>
 #include <net/route/route_ctl.h>
 #include <net/vnet.h>
 
 #include <netinet/if_ether.h>
 #include <netinet/in.h>
 #include <netinet/in_fib.h>
 #include <netinet/in_var.h>
 #include <netinet/in_pcb.h>
 #include <netinet/ip_var.h>
 #include <netinet/ip_carp.h>
 #include <netinet/igmp_var.h>
 #include <netinet/udp.h>
 #include <netinet/udp_var.h>
 
 static int in_aifaddr_ioctl(u_long, caddr_t, struct ifnet *, struct thread *);
 static int in_difaddr_ioctl(u_long, caddr_t, struct ifnet *, struct thread *);
 static int in_gifaddr_ioctl(u_long, caddr_t, struct ifnet *, struct thread *);
 
 static void	in_socktrim(struct sockaddr_in *);
 static void	in_purgemaddrs(struct ifnet *);
 
 static bool	ia_need_loopback_route(const struct in_ifaddr *);
 
 VNET_DEFINE_STATIC(int, nosameprefix);
 #define	V_nosameprefix			VNET(nosameprefix)
 SYSCTL_INT(_net_inet_ip, OID_AUTO, no_same_prefix, CTLFLAG_VNET | CTLFLAG_RW,
 	&VNET_NAME(nosameprefix), 0,
 	"Refuse to create same prefixes on different interfaces");
 
 VNET_DEFINE_STATIC(bool, broadcast_lowest);
 #define	V_broadcast_lowest		VNET(broadcast_lowest)
 SYSCTL_BOOL(_net_inet_ip, OID_AUTO, broadcast_lowest, CTLFLAG_VNET | CTLFLAG_RW,
 	&VNET_NAME(broadcast_lowest), 0,
 	"Treat lowest address on a subnet (host 0) as broadcast");
 
 VNET_DEFINE(bool, ip_allow_net240) = false;
 #define	V_ip_allow_net240		VNET(ip_allow_net240)
 SYSCTL_BOOL(_net_inet_ip, OID_AUTO, allow_net240,
 	CTLFLAG_VNET | CTLFLAG_RW, &VNET_NAME(ip_allow_net240), 0,
 	"Allow use of Experimental addresses, aka Class E (240/4)");
 /* see https://datatracker.ietf.org/doc/draft-schoen-intarea-unicast-240 */
 
 VNET_DEFINE(bool, ip_allow_net0) = false;
 SYSCTL_BOOL(_net_inet_ip, OID_AUTO, allow_net0,
 	CTLFLAG_VNET | CTLFLAG_RW, &VNET_NAME(ip_allow_net0), 0,
 	"Allow use of addresses in network 0/8");
 /* see https://datatracker.ietf.org/doc/draft-schoen-intarea-unicast-0 */
 
 VNET_DEFINE(uint32_t, in_loopback_mask) = IN_LOOPBACK_MASK_DFLT;
 #define	V_in_loopback_mask	VNET(in_loopback_mask)
 static int sysctl_loopback_prefixlen(SYSCTL_HANDLER_ARGS);
 SYSCTL_PROC(_net_inet_ip, OID_AUTO, loopback_prefixlen,
 	CTLFLAG_VNET | CTLTYPE_INT | CTLFLAG_RW,
 	NULL, 0, sysctl_loopback_prefixlen, "I",
 	"Prefix length of address space reserved for loopback");
 /* see https://datatracker.ietf.org/doc/draft-schoen-intarea-unicast-127 */
 
 VNET_DECLARE(struct inpcbinfo, ripcbinfo);
 #define	V_ripcbinfo			VNET(ripcbinfo)
 
 static struct sx in_control_sx;
 SX_SYSINIT(in_control_sx, &in_control_sx, "in_control");
 
 /*
  * Return 1 if an internet address is for a ``local'' host
  * (one to which we have a connection).
  */
 int
 in_localaddr(struct in_addr in)
 {
 	u_long i = ntohl(in.s_addr);
 	struct in_ifaddr *ia;
 
 	NET_EPOCH_ASSERT();
 
 	CK_STAILQ_FOREACH(ia, &V_in_ifaddrhead, ia_link) {
 		if ((i & ia->ia_subnetmask) == ia->ia_subnet)
 			return (1);
 	}
 
 	return (0);
 }
 
 /*
  * Return 1 if an internet address is for the local host and configured
  * on one of its interfaces.
  */
 bool
 in_localip(struct in_addr in)
 {
 	struct in_ifaddr *ia;
 
 	NET_EPOCH_ASSERT();
 
 	CK_LIST_FOREACH(ia, INADDR_HASH(in.s_addr), ia_hash)
 		if (IA_SIN(ia)->sin_addr.s_addr == in.s_addr)
 			return (true);
 
 	return (false);
 }
 
 /*
  * Like in_localip(), but FIB-aware.
  */
 bool
 in_localip_fib(struct in_addr in, uint16_t fib)
 {
 	struct in_ifaddr *ia;
 
 	NET_EPOCH_ASSERT();
 
 	CK_LIST_FOREACH(ia, INADDR_HASH(in.s_addr), ia_hash)
 		if (IA_SIN(ia)->sin_addr.s_addr == in.s_addr &&
 		    ia->ia_ifa.ifa_ifp->if_fib == fib)
 			return (true);
 
 	return (false);
 }
 
 /*
  * Return 1 if an internet address is configured on an interface.
  */
 int
 in_ifhasaddr(struct ifnet *ifp, struct in_addr in)
 {
 	struct ifaddr *ifa;
 	struct in_ifaddr *ia;
 
 	NET_EPOCH_ASSERT();
 
 	CK_STAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) {
 		if (ifa->ifa_addr->sa_family != AF_INET)
 			continue;
 		ia = (struct in_ifaddr *)ifa;
 		if (ia->ia_addr.sin_addr.s_addr == in.s_addr)
 			return (1);
 	}
 
 	return (0);
 }
 
 /*
  * Return a reference to the interface address which is different to
  * the supplied one but with same IP address value.
  */
 static struct in_ifaddr *
 in_localip_more(struct in_ifaddr *original_ia)
 {
 	struct epoch_tracker et;
 	in_addr_t original_addr = IA_SIN(original_ia)->sin_addr.s_addr;
 	uint32_t original_fib = original_ia->ia_ifa.ifa_ifp->if_fib;
 	struct in_ifaddr *ia;
 
 	NET_EPOCH_ENTER(et);
 	CK_LIST_FOREACH(ia, INADDR_HASH(original_addr), ia_hash) {
 		in_addr_t addr = IA_SIN(ia)->sin_addr.s_addr;
 		uint32_t fib = ia->ia_ifa.ifa_ifp->if_fib;
 		if (!V_rt_add_addr_allfibs && (original_fib != fib))
 			continue;
 		if ((original_ia != ia) && (original_addr == addr)) {
 			ifa_ref(&ia->ia_ifa);
 			NET_EPOCH_EXIT(et);
 			return (ia);
 		}
 	}
 	NET_EPOCH_EXIT(et);
 
 	return (NULL);
 }
 
 /*
  * Tries to find first IPv4 address in the provided fib.
  * Prefers non-loopback addresses and return loopback IFF
  * @loopback_ok is set.
  *
  * Returns ifa or NULL.
  */
 struct in_ifaddr *
 in_findlocal(uint32_t fibnum, bool loopback_ok)
 {
 	struct in_ifaddr *ia = NULL, *ia_lo = NULL;
 
 	NET_EPOCH_ASSERT();
 
 	CK_STAILQ_FOREACH(ia, &V_in_ifaddrhead, ia_link) {
 		uint32_t ia_fib = ia->ia_ifa.ifa_ifp->if_fib;
 		if (!V_rt_add_addr_allfibs && (fibnum != ia_fib))
 			continue;
 
 		if (!IN_LOOPBACK(ntohl(IA_SIN(ia)->sin_addr.s_addr)))
 			break;
 		if (loopback_ok)
 			ia_lo = ia;
 	}
 
 	if (ia == NULL)
 		ia = ia_lo;
 
 	return (ia);
 }
 
 /*
  * Determine whether an IP address is in a reserved set of addresses
  * that may not be forwarded, or whether datagrams to that destination
  * may be forwarded.
  */
 int
 in_canforward(struct in_addr in)
 {
 	u_long i = ntohl(in.s_addr);
 
 	if (IN_MULTICAST(i) || IN_LINKLOCAL(i) || IN_LOOPBACK(i))
 		return (0);
 	if (IN_EXPERIMENTAL(i) && !V_ip_allow_net240)
 		return (0);
 	if (IN_ZERONET(i) && !V_ip_allow_net0)
 		return (0);
 	return (1);
 }
 
 /*
  * Sysctl to manage prefix of reserved loopback network; translate
  * to/from mask.  The mask is always contiguous high-order 1 bits
  * followed by all 0 bits.
  */
 static int
 sysctl_loopback_prefixlen(SYSCTL_HANDLER_ARGS)
 {
 	int error, preflen;
 
 	/* ffs is 1-based; compensate. */
 	preflen = 33 - ffs(V_in_loopback_mask);
 	error = sysctl_handle_int(oidp, &preflen, 0, req);
 	if (error || !req->newptr)
 		return (error);
 	if (preflen < 8 || preflen > 31)
 		return (EINVAL);
 	V_in_loopback_mask = 0xffffffff << (32 - preflen);
 	return (0);
 }
 
 /*
  * Trim a mask in a sockaddr
  */
 static void
 in_socktrim(struct sockaddr_in *ap)
 {
     char *cplim = (char *) &ap->sin_addr;
     char *cp = (char *) (&ap->sin_addr + 1);
 
     ap->sin_len = 0;
     while (--cp >= cplim)
 	if (*cp) {
 	    (ap)->sin_len = cp - (char *) (ap) + 1;
 	    break;
 	}
 }
 
 /*
  * Generic internet control operations (ioctl's).
  */
 int
 in_control(struct socket *so, u_long cmd, caddr_t data, struct ifnet *ifp,
     struct thread *td)
 {
 	struct ifreq *ifr = (struct ifreq *)data;
 	struct sockaddr_in *addr = (struct sockaddr_in *)&ifr->ifr_addr;
 	struct epoch_tracker et;
 	struct ifaddr *ifa;
 	struct in_ifaddr *ia;
 	int error;
 
 	if (ifp == NULL)
 		return (EADDRNOTAVAIL);
 
 	/*
 	 * Filter out 4 ioctls we implement directly.  Forward the rest
 	 * to specific functions and ifp->if_ioctl().
 	 */
 	switch (cmd) {
 	case SIOCGIFADDR:
 	case SIOCGIFBRDADDR:
 	case SIOCGIFDSTADDR:
 	case SIOCGIFNETMASK:
 		break;
 	case SIOCGIFALIAS:
 		sx_xlock(&in_control_sx);
 		error = in_gifaddr_ioctl(cmd, data, ifp, td);
 		sx_xunlock(&in_control_sx);
 		return (error);
 	case SIOCDIFADDR:
 		sx_xlock(&in_control_sx);
 		error = in_difaddr_ioctl(cmd, data, ifp, td);
 		sx_xunlock(&in_control_sx);
 		return (error);
 	case OSIOCAIFADDR:	/* 9.x compat */
 	case SIOCAIFADDR:
 		sx_xlock(&in_control_sx);
 		error = in_aifaddr_ioctl(cmd, data, ifp, td);
 		sx_xunlock(&in_control_sx);
 		return (error);
 	case SIOCSIFADDR:
 	case SIOCSIFBRDADDR:
 	case SIOCSIFDSTADDR:
 	case SIOCSIFNETMASK:
 		/* We no longer support that old commands. */
 		return (EINVAL);
 	default:
 		if (ifp->if_ioctl == NULL)
 			return (EOPNOTSUPP);
 		return ((*ifp->if_ioctl)(ifp, cmd, data));
 	}
 
 	if (addr->sin_addr.s_addr != INADDR_ANY &&
 	    prison_check_ip4(td->td_ucred, &addr->sin_addr) != 0)
 		return (EADDRNOTAVAIL);
 
 	/*
 	 * Find address for this interface, if it exists.  If an
 	 * address was specified, find that one instead of the
 	 * first one on the interface, if possible.
 	 */
 	NET_EPOCH_ENTER(et);
 	CK_STAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) {
 		if (ifa->ifa_addr->sa_family != AF_INET)
 			continue;
 		ia = (struct in_ifaddr *)ifa;
 		if (ia->ia_addr.sin_addr.s_addr == addr->sin_addr.s_addr)
 			break;
 	}
 	if (ifa == NULL)
 		CK_STAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link)
 			if (ifa->ifa_addr->sa_family == AF_INET) {
 				ia = (struct in_ifaddr *)ifa;
 				if (prison_check_ip4(td->td_ucred,
 				    &ia->ia_addr.sin_addr) == 0)
 					break;
 			}
 
 	if (ifa == NULL) {
 		NET_EPOCH_EXIT(et);
 		return (EADDRNOTAVAIL);
 	}
 
 	error = 0;
 	switch (cmd) {
 	case SIOCGIFADDR:
 		*addr = ia->ia_addr;
 		break;
 
 	case SIOCGIFBRDADDR:
 		if ((ifp->if_flags & IFF_BROADCAST) == 0) {
 			error = EINVAL;
 			break;
 		}
 		*addr = ia->ia_broadaddr;
 		break;
 
 	case SIOCGIFDSTADDR:
 		if ((ifp->if_flags & IFF_POINTOPOINT) == 0) {
 			error = EINVAL;
 			break;
 		}
 		*addr = ia->ia_dstaddr;
 		break;
 
 	case SIOCGIFNETMASK:
 		*addr = ia->ia_sockmask;
 		break;
 	}
 
 	NET_EPOCH_EXIT(et);
 
 	return (error);
 }
 
 static int
 in_aifaddr_ioctl(u_long cmd, caddr_t data, struct ifnet *ifp, struct thread *td)
 {
 	const struct in_aliasreq *ifra = (struct in_aliasreq *)data;
 	const struct sockaddr_in *addr = &ifra->ifra_addr;
 	const struct sockaddr_in *broadaddr = &ifra->ifra_broadaddr;
 	const struct sockaddr_in *mask = &ifra->ifra_mask;
 	const struct sockaddr_in *dstaddr = &ifra->ifra_dstaddr;
 	const int vhid = (cmd == SIOCAIFADDR) ? ifra->ifra_vhid : 0;
 	struct epoch_tracker et;
 	struct ifaddr *ifa;
 	struct in_ifaddr *ia;
 	bool iaIsFirst;
 	int error = 0;
 
 	error = priv_check(td, PRIV_NET_ADDIFADDR);
 	if (error)
 		return (error);
 
 	/*
 	 * ifra_addr must be present and be of INET family.
 	 * ifra_broadaddr/ifra_dstaddr and ifra_mask are optional.
 	 */
 	if (addr->sin_len != sizeof(struct sockaddr_in) ||
 	    addr->sin_family != AF_INET)
 		return (EINVAL);
 	if (broadaddr->sin_len != 0 &&
 	    (broadaddr->sin_len != sizeof(struct sockaddr_in) ||
 	    broadaddr->sin_family != AF_INET))
 		return (EINVAL);
 	if (mask->sin_len != 0 &&
 	    (mask->sin_len != sizeof(struct sockaddr_in) ||
 	    mask->sin_family != AF_INET))
 		return (EINVAL);
 	if ((ifp->if_flags & IFF_POINTOPOINT) &&
 	    (dstaddr->sin_len != sizeof(struct sockaddr_in) ||
 	     dstaddr->sin_addr.s_addr == INADDR_ANY))
 		return (EDESTADDRREQ);
 	if (vhid != 0 && carp_attach_p == NULL)
 		return (EPROTONOSUPPORT);
 
 	/*
 	 * See whether address already exist.
 	 */
 	iaIsFirst = true;
 	ia = NULL;
 	NET_EPOCH_ENTER(et);
 	CK_STAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) {
 		struct in_ifaddr *it;
 
 		if (ifa->ifa_addr->sa_family != AF_INET)
 			continue;
 
 		it = (struct in_ifaddr *)ifa;
 		if (it->ia_addr.sin_addr.s_addr == addr->sin_addr.s_addr &&
 		    prison_check_ip4(td->td_ucred, &addr->sin_addr) == 0)
 			ia = it;
 		else
 			iaIsFirst = false;
 	}
 	NET_EPOCH_EXIT(et);
 
 	if (ia != NULL)
 		(void )in_difaddr_ioctl(cmd, data, ifp, td);
 
 	ifa = ifa_alloc(sizeof(struct in_ifaddr), M_WAITOK);
 	ia = (struct in_ifaddr *)ifa;
 	ifa->ifa_addr = (struct sockaddr *)&ia->ia_addr;
 	ifa->ifa_dstaddr = (struct sockaddr *)&ia->ia_dstaddr;
 	ifa->ifa_netmask = (struct sockaddr *)&ia->ia_sockmask;
 	callout_init_rw(&ia->ia_garp_timer, &ifp->if_addr_lock,
 	    CALLOUT_RETURNUNLOCKED);
 
 	ia->ia_ifp = ifp;
 	ia->ia_addr = *addr;
 	if (mask->sin_len != 0) {
 		ia->ia_sockmask = *mask;
 		ia->ia_subnetmask = ntohl(ia->ia_sockmask.sin_addr.s_addr);
 	} else {
 		in_addr_t i = ntohl(addr->sin_addr.s_addr);
 
 		/*
 	 	 * If netmask isn't supplied, use historical default.
 		 * This is deprecated for interfaces other than loopback
 		 * or point-to-point; warn in other cases.  In the future
 		 * we should return an error rather than warning.
 	 	 */
 		if ((ifp->if_flags & (IFF_POINTOPOINT | IFF_LOOPBACK)) == 0)
 			printf("%s: set address: WARNING: network mask "
 			     "should be specified; using historical default\n",
 			     ifp->if_xname);
 		if (IN_CLASSA(i))
 			ia->ia_subnetmask = IN_CLASSA_NET;
 		else if (IN_CLASSB(i))
 			ia->ia_subnetmask = IN_CLASSB_NET;
 		else
 			ia->ia_subnetmask = IN_CLASSC_NET;
 		ia->ia_sockmask.sin_addr.s_addr = htonl(ia->ia_subnetmask);
 	}
 	ia->ia_subnet = ntohl(addr->sin_addr.s_addr) & ia->ia_subnetmask;
 	in_socktrim(&ia->ia_sockmask);
 
 	if (ifp->if_flags & IFF_BROADCAST) {
 		if (broadaddr->sin_len != 0) {
 			ia->ia_broadaddr = *broadaddr;
 		} else if (ia->ia_subnetmask == IN_RFC3021_MASK) {
 			ia->ia_broadaddr.sin_addr.s_addr = INADDR_BROADCAST;
 			ia->ia_broadaddr.sin_len = sizeof(struct sockaddr_in);
 			ia->ia_broadaddr.sin_family = AF_INET;
 		} else {
 			ia->ia_broadaddr.sin_addr.s_addr =
 			    htonl(ia->ia_subnet | ~ia->ia_subnetmask);
 			ia->ia_broadaddr.sin_len = sizeof(struct sockaddr_in);
 			ia->ia_broadaddr.sin_family = AF_INET;
 		}
 	}
 
 	if (ifp->if_flags & IFF_POINTOPOINT)
 		ia->ia_dstaddr = *dstaddr;
 
 	if (vhid != 0) {
 		error = (*carp_attach_p)(&ia->ia_ifa, vhid);
 		if (error)
 			return (error);
 	}
 
 	/* if_addrhead is already referenced by ifa_alloc() */
 	IF_ADDR_WLOCK(ifp);
 	CK_STAILQ_INSERT_TAIL(&ifp->if_addrhead, ifa, ifa_link);
 	IF_ADDR_WUNLOCK(ifp);
 
 	ifa_ref(ifa);			/* in_ifaddrhead */
 	sx_assert(&in_control_sx, SA_XLOCKED);
 	CK_STAILQ_INSERT_TAIL(&V_in_ifaddrhead, ia, ia_link);
 	CK_LIST_INSERT_HEAD(INADDR_HASH(ia->ia_addr.sin_addr.s_addr), ia,
 	    ia_hash);
 
 	/*
 	 * Give the interface a chance to initialize
 	 * if this is its first address,
 	 * and to validate the address if necessary.
 	 */
 	if (ifp->if_ioctl != NULL) {
 		error = (*ifp->if_ioctl)(ifp, SIOCSIFADDR, (caddr_t)ia);
 		if (error)
 			goto fail1;
 	}
 
 	/*
 	 * Add route for the network.
 	 */
 	if (vhid == 0) {
 		error = in_addprefix(ia);
 		if (error)
 			goto fail1;
 	}
 
 	/*
 	 * Add a loopback route to self.
 	 */
 	if (vhid == 0 && ia_need_loopback_route(ia)) {
 		struct in_ifaddr *eia;
 
 		eia = in_localip_more(ia);
 
 		if (eia == NULL) {
 			error = ifa_add_loopback_route((struct ifaddr *)ia,
 			    (struct sockaddr *)&ia->ia_addr);
 			if (error)
 				goto fail2;
 		} else
 			ifa_free(&eia->ia_ifa);
 	}
 
 	if (iaIsFirst && (ifp->if_flags & IFF_MULTICAST)) {
 		struct in_addr allhosts_addr;
 		struct in_ifinfo *ii;
 
 		ii = ((struct in_ifinfo *)ifp->if_afdata[AF_INET]);
 		allhosts_addr.s_addr = htonl(INADDR_ALLHOSTS_GROUP);
 
 		error = in_joingroup(ifp, &allhosts_addr, NULL,
 			&ii->ii_allhosts);
 	}
 
 	/*
 	 * Note: we don't need extra reference for ifa, since we called
 	 * with sx lock held, and ifaddr can not be deleted in concurrent
 	 * thread.
 	 */
 	EVENTHANDLER_INVOKE(ifaddr_event_ext, ifp, ifa, IFADDR_EVENT_ADD);
 
 	return (error);
 
 fail2:
 	if (vhid == 0)
 		(void )in_scrubprefix(ia, LLE_STATIC);
 
 fail1:
 	if (ia->ia_ifa.ifa_carp)
 		(*carp_detach_p)(&ia->ia_ifa, false);
 
 	IF_ADDR_WLOCK(ifp);
 	CK_STAILQ_REMOVE(&ifp->if_addrhead, &ia->ia_ifa, ifaddr, ifa_link);
 	IF_ADDR_WUNLOCK(ifp);
 	ifa_free(&ia->ia_ifa);		/* if_addrhead */
 
 	sx_assert(&in_control_sx, SA_XLOCKED);
 	CK_STAILQ_REMOVE(&V_in_ifaddrhead, ia, in_ifaddr, ia_link);
 	CK_LIST_REMOVE(ia, ia_hash);
 	ifa_free(&ia->ia_ifa);		/* in_ifaddrhead */
 
 	return (error);
 }
 
 static int
 in_difaddr_ioctl(u_long cmd, caddr_t data, struct ifnet *ifp, struct thread *td)
 {
 	const struct ifreq *ifr = (struct ifreq *)data;
 	const struct sockaddr_in *addr = (const struct sockaddr_in *)
 	    &ifr->ifr_addr;
 	struct ifaddr *ifa;
 	struct in_ifaddr *ia;
 	bool deleteAny, iaIsLast;
 	int error;
 
 	if (td != NULL) {
 		error = priv_check(td, PRIV_NET_DELIFADDR);
 		if (error)
 			return (error);
 	}
 
 	if (addr->sin_len != sizeof(struct sockaddr_in) ||
 	    addr->sin_family != AF_INET)
 		deleteAny = true;
 	else
 		deleteAny = false;
 
 	iaIsLast = true;
 	ia = NULL;
 	IF_ADDR_WLOCK(ifp);
 	CK_STAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) {
 		struct in_ifaddr *it;
 
 		if (ifa->ifa_addr->sa_family != AF_INET)
 			continue;
 
 		it = (struct in_ifaddr *)ifa;
 		if (deleteAny && ia == NULL && (td == NULL ||
 		    prison_check_ip4(td->td_ucred, &it->ia_addr.sin_addr) == 0))
 			ia = it;
 
 		if (it->ia_addr.sin_addr.s_addr == addr->sin_addr.s_addr &&
 		    (td == NULL || prison_check_ip4(td->td_ucred,
 		    &addr->sin_addr) == 0))
 			ia = it;
 
 		if (it != ia)
 			iaIsLast = false;
 	}
 
 	if (ia == NULL) {
 		IF_ADDR_WUNLOCK(ifp);
 		return (EADDRNOTAVAIL);
 	}
 
 	CK_STAILQ_REMOVE(&ifp->if_addrhead, &ia->ia_ifa, ifaddr, ifa_link);
 	IF_ADDR_WUNLOCK(ifp);
 	ifa_free(&ia->ia_ifa);		/* if_addrhead */
 
 	sx_assert(&in_control_sx, SA_XLOCKED);
 	CK_STAILQ_REMOVE(&V_in_ifaddrhead, ia, in_ifaddr, ia_link);
 	CK_LIST_REMOVE(ia, ia_hash);
 
 	/*
 	 * in_scrubprefix() kills the interface route.
 	 */
 	in_scrubprefix(ia, LLE_STATIC);
 
 	/*
 	 * in_ifadown gets rid of all the rest of
 	 * the routes.  This is not quite the right
 	 * thing to do, but at least if we are running
 	 * a routing process they will come back.
 	 */
 	in_ifadown(&ia->ia_ifa, 1);
 
 	if (ia->ia_ifa.ifa_carp)
 		(*carp_detach_p)(&ia->ia_ifa, cmd == SIOCAIFADDR);
 
 	/*
 	 * If this is the last IPv4 address configured on this
 	 * interface, leave the all-hosts group.
 	 * No state-change report need be transmitted.
 	 */
 	if (iaIsLast && (ifp->if_flags & IFF_MULTICAST)) {
 		struct in_ifinfo *ii;
 
 		ii = ((struct in_ifinfo *)ifp->if_afdata[AF_INET]);
 		if (ii->ii_allhosts) {
 			(void)in_leavegroup(ii->ii_allhosts, NULL);
 			ii->ii_allhosts = NULL;
 		}
 	}
 
 	IF_ADDR_WLOCK(ifp);
 	if (callout_stop(&ia->ia_garp_timer) == 1) {
 		ifa_free(&ia->ia_ifa);
 	}
 	IF_ADDR_WUNLOCK(ifp);
 
 	EVENTHANDLER_INVOKE(ifaddr_event_ext, ifp, &ia->ia_ifa,
 	    IFADDR_EVENT_DEL);
 	ifa_free(&ia->ia_ifa);		/* in_ifaddrhead */
 
 	return (0);
 }
 
 static int
 in_gifaddr_ioctl(u_long cmd, caddr_t data, struct ifnet *ifp, struct thread *td)
 {
 	struct in_aliasreq *ifra = (struct in_aliasreq *)data;
 	const struct sockaddr_in *addr = &ifra->ifra_addr;
 	struct epoch_tracker et;
 	struct ifaddr *ifa;
 	struct in_ifaddr *ia;
 
 	/*
 	 * ifra_addr must be present and be of INET family.
 	 */
 	if (addr->sin_len != sizeof(struct sockaddr_in) ||
 	    addr->sin_family != AF_INET)
 		return (EINVAL);
 
 	/*
 	 * See whether address exist.
 	 */
 	ia = NULL;
 	NET_EPOCH_ENTER(et);
 	CK_STAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) {
 		struct in_ifaddr *it;
 
 		if (ifa->ifa_addr->sa_family != AF_INET)
 			continue;
 
 		it = (struct in_ifaddr *)ifa;
 		if (it->ia_addr.sin_addr.s_addr == addr->sin_addr.s_addr &&
 		    prison_check_ip4(td->td_ucred, &addr->sin_addr) == 0) {
 			ia = it;
 			break;
 		}
 	}
 	if (ia == NULL) {
 		NET_EPOCH_EXIT(et);
 		return (EADDRNOTAVAIL);
 	}
 
 	ifra->ifra_mask = ia->ia_sockmask;
 	if ((ifp->if_flags & IFF_POINTOPOINT) &&
 	    ia->ia_dstaddr.sin_family == AF_INET)
 		ifra->ifra_dstaddr = ia->ia_dstaddr;
 	else if ((ifp->if_flags & IFF_BROADCAST) &&
 	    ia->ia_broadaddr.sin_family == AF_INET)
 		ifra->ifra_broadaddr = ia->ia_broadaddr;
 	else
 		memset(&ifra->ifra_broadaddr, 0,
 		    sizeof(ifra->ifra_broadaddr));
 
 	NET_EPOCH_EXIT(et);
 	return (0);
 }
 
 static int
 in_match_ifaddr(const struct rtentry *rt, const struct nhop_object *nh, void *arg)
 {
 
 	if (nh->nh_ifa == (struct ifaddr *)arg)
 		return (1);
 
 	return (0);
 }
 
 static int
 in_handle_prefix_route(uint32_t fibnum, int cmd,
     struct sockaddr_in *dst, struct sockaddr_in *netmask, struct ifaddr *ifa,
     struct ifnet *ifp)
 {
 
 	NET_EPOCH_ASSERT();
 
 	/* Prepare gateway */
 	struct sockaddr_dl_short sdl = {
 		.sdl_family = AF_LINK,
 		.sdl_len = sizeof(struct sockaddr_dl_short),
 		.sdl_type = ifa->ifa_ifp->if_type,
 		.sdl_index = ifa->ifa_ifp->if_index,
 	};
 
 	struct rt_addrinfo info = {
 		.rti_ifa = ifa,
 		.rti_ifp = ifp,
 		.rti_flags = RTF_PINNED | ((netmask != NULL) ? 0 : RTF_HOST),
 		.rti_info = {
 			[RTAX_DST] = (struct sockaddr *)dst,
 			[RTAX_NETMASK] = (struct sockaddr *)netmask,
 			[RTAX_GATEWAY] = (struct sockaddr *)&sdl,
 		},
 		/* Ensure we delete the prefix IFF prefix ifa matches */
 		.rti_filter = in_match_ifaddr,
 		.rti_filterdata = ifa,
 	};
 
 	return (rib_handle_ifaddr_info(fibnum, cmd, &info));
 }
 
 /*
  * Routing table interaction with interface addresses.
  *
  * In general, two types of routes needs to be installed:
  * a) "interface" or "prefix" route, telling user that the addresses
  *   behind the ifa prefix are reached directly.
  * b) "loopback" route installed for the ifa address, telling user that
  *   the address belongs to local system.
  *
  * Handling for (a) and (b) differs in multi-fib aspects, hence they
  *  are implemented in different functions below.
  *
  * The cases above may intersect - /32 interface aliases results in
  *  the same prefix produced by (a) and (b). This blurs the definition
  *  of the "loopback" route and complicate interactions. The interaction
  *  table is defined below. The case numbers are used in the multiple
  *  functions below to refer to the particular test case.
  *
  * There can be multiple options:
  * 1) Adding address with prefix on non-p2p/non-loopback interface.
  *  Example: 192.0.2.1/24. Action:
  *  * add "prefix" route towards 192.0.2.0/24 via @ia interface,
  *    using @ia as an address source.
  *  * add "loopback" route towards 192.0.2.1 via V_loif, saving
  *   @ia ifp in the gateway and using @ia as an address source.
  *
  * 2) Adding address with /32 mask to non-p2p/non-loopback interface.
  *  Example: 192.0.2.2/32. Action:
  *  * add "prefix" host route via V_loif, using @ia as an address source.
  *
  * 3) Adding address with or without prefix to p2p interface.
  *  Example: 10.0.0.1/24->10.0.0.2. Action:
  *  * add "prefix" host route towards 10.0.0.2 via this interface, using @ia
  *    as an address source. Note: no sense in installing full /24 as the interface
  *    is point-to-point.
  *  * add "loopback" route towards 10.0.9.1 via V_loif, saving
  *   @ia ifp in the gateway and using @ia as an address source.
  *
  * 4) Adding address with or without prefix to loopback interface.
  *  Example: 192.0.2.1/24. Action:
  *  * add "prefix" host route via @ia interface, using @ia as an address source.
  *    Note: Skip installing /24 prefix as it would introduce TTL loop
  *    for the traffic destined to these addresses.
  */
 
 /*
  * Checks if @ia needs to install loopback route to @ia address via
  *  ifa_maintain_loopback_route().
  *
  * Return true on success.
  */
 static bool
 ia_need_loopback_route(const struct in_ifaddr *ia)
 {
 	struct ifnet *ifp = ia->ia_ifp;
 
 	/* Case 4: Skip loopback interfaces */
 	if ((ifp->if_flags & IFF_LOOPBACK) ||
 	    (ia->ia_addr.sin_addr.s_addr == INADDR_ANY))
 		return (false);
 
 	/* Clash avoidance: Skip p2p interfaces with both addresses are equal */
 	if ((ifp->if_flags & IFF_POINTOPOINT) &&
 	    ia->ia_dstaddr.sin_addr.s_addr == ia->ia_addr.sin_addr.s_addr)
 		return (false);
 
 	/* Case 2: skip /32 prefixes */
 	if (!(ifp->if_flags & IFF_POINTOPOINT) &&
 	    (ia->ia_sockmask.sin_addr.s_addr == INADDR_BROADCAST))
 		return (false);
 
 	return (true);
 }
 
 /*
  * Calculate "prefix" route corresponding to @ia.
  */
 static void
 ia_getrtprefix(const struct in_ifaddr *ia, struct in_addr *prefix, struct in_addr *mask)
 {
 
 	if (ia->ia_ifp->if_flags & IFF_POINTOPOINT) {
 		/* Case 3: return host route for dstaddr */
 		*prefix = ia->ia_dstaddr.sin_addr;
 		mask->s_addr = INADDR_BROADCAST;
 	} else if (ia->ia_ifp->if_flags & IFF_LOOPBACK) {
 		/* Case 4: return host route for ifaddr */
 		*prefix = ia->ia_addr.sin_addr;
 		mask->s_addr = INADDR_BROADCAST;
 	} else {
 		/* Cases 1,2: return actual ia prefix */
 		*prefix = ia->ia_addr.sin_addr;
 		*mask = ia->ia_sockmask.sin_addr;
 		prefix->s_addr &= mask->s_addr;
 	}
 }
 
 /*
  * Adds or delete interface "prefix" route corresponding to @ifa.
  *  Returns 0 on success or errno.
  */
-int
+static int
 in_handle_ifaddr_route(int cmd, struct in_ifaddr *ia)
 {
 	struct ifaddr *ifa = &ia->ia_ifa;
 	struct in_addr daddr, maddr;
 	struct sockaddr_in *pmask;
 	struct epoch_tracker et;
 	int error;
 
 	ia_getrtprefix(ia, &daddr, &maddr);
 
 	struct sockaddr_in mask = {
 		.sin_family = AF_INET,
 		.sin_len = sizeof(struct sockaddr_in),
 		.sin_addr = maddr,
 	};
 
 	pmask = (maddr.s_addr != INADDR_BROADCAST) ? &mask : NULL;
 
 	struct sockaddr_in dst = {
 		.sin_family = AF_INET,
 		.sin_len = sizeof(struct sockaddr_in),
 		.sin_addr.s_addr = daddr.s_addr & maddr.s_addr,
 	};
 
 	struct ifnet *ifp = ia->ia_ifp;
 
 	if ((maddr.s_addr == INADDR_BROADCAST) &&
 	    (!(ia->ia_ifp->if_flags & (IFF_POINTOPOINT|IFF_LOOPBACK)))) {
 		/* Case 2: host route on broadcast interface */
 		ifp = V_loif;
 	}
 
 	uint32_t fibnum = ifa->ifa_ifp->if_fib;
 	NET_EPOCH_ENTER(et);
 	error = in_handle_prefix_route(fibnum, cmd, &dst, pmask, ifa, ifp);
 	NET_EPOCH_EXIT(et);
 
 	return (error);
 }
 
 /*
  * Check if we have a route for the given prefix already.
  */
 static bool
 in_hasrtprefix(struct in_ifaddr *target)
 {
 	struct epoch_tracker et;
 	struct in_ifaddr *ia;
 	struct in_addr prefix, mask, p, m;
 	bool result = false;
 
 	ia_getrtprefix(target, &prefix, &mask);
 
 	/* Look for an existing address with the same prefix, mask, and fib */
 	NET_EPOCH_ENTER(et);
 	CK_STAILQ_FOREACH(ia, &V_in_ifaddrhead, ia_link) {
 		ia_getrtprefix(ia, &p, &m);
 
 		if (prefix.s_addr != p.s_addr ||
 		    mask.s_addr != m.s_addr)
 			continue;
 
 		if (target->ia_ifp->if_fib != ia->ia_ifp->if_fib)
 			continue;
 
 		/*
 		 * If we got a matching prefix route inserted by other
 		 * interface address, we are done here.
 		 */
 		if (ia->ia_flags & IFA_ROUTE) {
 			result = true;
 			break;
 		}
 	}
 	NET_EPOCH_EXIT(et);
 
 	return (result);
 }
 
 int
 in_addprefix(struct in_ifaddr *target)
 {
 	int error;
 
 	if (in_hasrtprefix(target)) {
 		if (V_nosameprefix)
 			return (EEXIST);
 		else {
 			rt_addrmsg(RTM_ADD, &target->ia_ifa,
 			    target->ia_ifp->if_fib);
 			return (0);
 		}
 	}
 
 	/*
 	 * No-one seem to have this prefix route, so we try to insert it.
 	 */
 	rt_addrmsg(RTM_ADD, &target->ia_ifa, target->ia_ifp->if_fib);
 	error = in_handle_ifaddr_route(RTM_ADD, target);
 	if (!error)
 		target->ia_flags |= IFA_ROUTE;
 	return (error);
 }
 
 /*
  * Removes either all lle entries for given @ia, or lle
  * corresponding to @ia address.
  */
 static void
 in_scrubprefixlle(struct in_ifaddr *ia, int all, u_int flags)
 {
 	struct sockaddr_in addr, mask;
 	struct sockaddr *saddr, *smask;
 	struct ifnet *ifp;
 
 	saddr = (struct sockaddr *)&addr;
 	bzero(&addr, sizeof(addr));
 	addr.sin_len = sizeof(addr);
 	addr.sin_family = AF_INET;
 	smask = (struct sockaddr *)&mask;
 	bzero(&mask, sizeof(mask));
 	mask.sin_len = sizeof(mask);
 	mask.sin_family = AF_INET;
 	mask.sin_addr.s_addr = ia->ia_subnetmask;
 	ifp = ia->ia_ifp;
 
 	if (all) {
 		/*
 		 * Remove all L2 entries matching given prefix.
 		 * Convert address to host representation to avoid
 		 * doing this on every callback. ia_subnetmask is already
 		 * stored in host representation.
 		 */
 		addr.sin_addr.s_addr = ntohl(ia->ia_addr.sin_addr.s_addr);
 		lltable_prefix_free(AF_INET, saddr, smask, flags);
 	} else {
 		/* Remove interface address only */
 		addr.sin_addr.s_addr = ia->ia_addr.sin_addr.s_addr;
 		lltable_delete_addr(LLTABLE(ifp), LLE_IFADDR, saddr);
 	}
 }
 
 /*
  * If there is no other address in the system that can serve a route to the
  * same prefix, remove the route.  Hand over the route to the new address
  * otherwise.
  */
 int
 in_scrubprefix(struct in_ifaddr *target, u_int flags)
 {
 	struct epoch_tracker et;
 	struct in_ifaddr *ia;
 	struct in_addr prefix, mask, p, m;
 	int error = 0;
 
 	/*
 	 * Remove the loopback route to the interface address.
 	 */
 	if (ia_need_loopback_route(target) && (flags & LLE_STATIC)) {
 		struct in_ifaddr *eia;
 
 		eia = in_localip_more(target);
 
 		if (eia != NULL) {
 			error = ifa_switch_loopback_route((struct ifaddr *)eia,
 			    (struct sockaddr *)&target->ia_addr);
 			ifa_free(&eia->ia_ifa);
 		} else {
 			error = ifa_del_loopback_route((struct ifaddr *)target,
 			    (struct sockaddr *)&target->ia_addr);
 		}
 	}
 
 	ia_getrtprefix(target, &prefix, &mask);
 
 	if ((target->ia_flags & IFA_ROUTE) == 0) {
 		rt_addrmsg(RTM_DELETE, &target->ia_ifa, target->ia_ifp->if_fib);
 
 		/*
 		 * Removing address from !IFF_UP interface or
 		 * prefix which exists on other interface (along with route).
 		 * No entries should exist here except target addr.
 		 * Given that, delete this entry only.
 		 */
 		in_scrubprefixlle(target, 0, flags);
 		return (0);
 	}
 
 	NET_EPOCH_ENTER(et);
 	CK_STAILQ_FOREACH(ia, &V_in_ifaddrhead, ia_link) {
 		ia_getrtprefix(ia, &p, &m);
 
 		if (prefix.s_addr != p.s_addr ||
 		    mask.s_addr != m.s_addr)
 			continue;
 
 		if ((ia->ia_ifp->if_flags & IFF_UP) == 0)
 			continue;
 
 		/*
 		 * If we got a matching prefix address, move IFA_ROUTE and
 		 * the route itself to it.  Make sure that routing daemons
 		 * get a heads-up.
 		 */
 		if ((ia->ia_flags & IFA_ROUTE) == 0) {
 			ifa_ref(&ia->ia_ifa);
 			NET_EPOCH_EXIT(et);
 			error = in_handle_ifaddr_route(RTM_DELETE, target);
 			if (error == 0)
 				target->ia_flags &= ~IFA_ROUTE;
 			else
 				log(LOG_INFO, "in_scrubprefix: err=%d, old prefix delete failed\n",
 					error);
 			/* Scrub all entries IFF interface is different */
 			in_scrubprefixlle(target, target->ia_ifp != ia->ia_ifp,
 			    flags);
 			error = in_handle_ifaddr_route(RTM_ADD, ia);
 			if (error == 0)
 				ia->ia_flags |= IFA_ROUTE;
 			else
 				log(LOG_INFO, "in_scrubprefix: err=%d, new prefix add failed\n",
 					error);
 			ifa_free(&ia->ia_ifa);
 			return (error);
 		}
 	}
 	NET_EPOCH_EXIT(et);
 
 	/*
 	 * remove all L2 entries on the given prefix
 	 */
 	in_scrubprefixlle(target, 1, flags);
 
 	/*
 	 * As no-one seem to have this prefix, we can remove the route.
 	 */
 	rt_addrmsg(RTM_DELETE, &target->ia_ifa, target->ia_ifp->if_fib);
 	error = in_handle_ifaddr_route(RTM_DELETE, target);
 	if (error == 0)
 		target->ia_flags &= ~IFA_ROUTE;
 	else
 		log(LOG_INFO, "in_scrubprefix: err=%d, prefix delete failed\n", error);
 	return (error);
 }
 
 void
 in_ifscrub_all(void)
 {
 	struct ifnet *ifp;
 	struct ifaddr *ifa, *nifa;
 	struct ifaliasreq ifr;
 
 	IFNET_RLOCK();
 	CK_STAILQ_FOREACH(ifp, &V_ifnet, if_link) {
 		/* Cannot lock here - lock recursion. */
 		/* NET_EPOCH_ENTER(et); */
 		CK_STAILQ_FOREACH_SAFE(ifa, &ifp->if_addrhead, ifa_link, nifa) {
 			if (ifa->ifa_addr->sa_family != AF_INET)
 				continue;
 
 			/*
 			 * This is ugly but the only way for legacy IP to
 			 * cleanly remove addresses and everything attached.
 			 */
 			bzero(&ifr, sizeof(ifr));
 			ifr.ifra_addr = *ifa->ifa_addr;
 			if (ifa->ifa_dstaddr)
 			ifr.ifra_broadaddr = *ifa->ifa_dstaddr;
 			(void)in_control(NULL, SIOCDIFADDR, (caddr_t)&ifr,
 			    ifp, NULL);
 		}
 		/* NET_EPOCH_EXIT(et); */
 		in_purgemaddrs(ifp);
 		igmp_domifdetach(ifp);
 	}
 	IFNET_RUNLOCK();
 }
 
 int
 in_ifaddr_broadcast(struct in_addr in, struct in_ifaddr *ia)
 {
 
 	return ((in.s_addr == ia->ia_broadaddr.sin_addr.s_addr ||
 	     /*
 	      * Optionally check for old-style (host 0) broadcast, but
 	      * taking into account that RFC 3021 obsoletes it.
 	      */
 	    (V_broadcast_lowest && ia->ia_subnetmask != IN_RFC3021_MASK &&
 	    ntohl(in.s_addr) == ia->ia_subnet)) &&
 	     /*
 	      * Check for an all one subnetmask. These
 	      * only exist when an interface gets a secondary
 	      * address.
 	      */
 	    ia->ia_subnetmask != (u_long)0xffffffff);
 }
 
 /*
  * Return 1 if the address might be a local broadcast address.
  */
 int
 in_broadcast(struct in_addr in, struct ifnet *ifp)
 {
 	struct ifaddr *ifa;
 	int found;
 
 	NET_EPOCH_ASSERT();
 
 	if (in.s_addr == INADDR_BROADCAST ||
 	    in.s_addr == INADDR_ANY)
 		return (1);
 	if ((ifp->if_flags & IFF_BROADCAST) == 0)
 		return (0);
 	found = 0;
 	/*
 	 * Look through the list of addresses for a match
 	 * with a broadcast address.
 	 */
 	CK_STAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link)
 		if (ifa->ifa_addr->sa_family == AF_INET &&
 		    in_ifaddr_broadcast(in, (struct in_ifaddr *)ifa)) {
 			found = 1;
 			break;
 		}
 	return (found);
 }
 
 /*
  * On interface removal, clean up IPv4 data structures hung off of the ifnet.
  */
 void
 in_ifdetach(struct ifnet *ifp)
 {
 	IN_MULTI_LOCK();
 	in_pcbpurgeif0(&V_ripcbinfo, ifp);
 	in_pcbpurgeif0(&V_udbinfo, ifp);
 	in_pcbpurgeif0(&V_ulitecbinfo, ifp);
 	in_purgemaddrs(ifp);
 	IN_MULTI_UNLOCK();
 
 	/*
 	 * Make sure all multicast deletions invoking if_ioctl() are
 	 * completed before returning. Else we risk accessing a freed
 	 * ifnet structure pointer.
 	 */
 	inm_release_wait(NULL);
 }
 
+static void
+in_ifnet_event(void *arg __unused, struct ifnet *ifp, int event)
+{
+	struct epoch_tracker et;
+	struct ifaddr *ifa;
+	struct in_ifaddr *ia;
+	int error;
+
+	NET_EPOCH_ENTER(et);
+	switch (event) {
+	case IFNET_EVENT_DOWN:
+		CK_STAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) {
+			if (ifa->ifa_addr->sa_family != AF_INET)
+				continue;
+			ia = (struct in_ifaddr *)ifa;
+			if ((ia->ia_flags & IFA_ROUTE) == 0)
+				continue;
+			ifa_ref(ifa);
+			/*
+			 * in_scrubprefix() kills the interface route.
+			 */
+			in_scrubprefix(ia, 0);
+			/*
+			 * in_ifadown gets rid of all the rest of the
+			 * routes.  This is not quite the right thing
+			 * to do, but at least if we are running a
+			 * routing process they will come back.
+			 */
+			in_ifadown(ifa, 0);
+			ifa_free(ifa);
+		}
+		break;
+
+	case IFNET_EVENT_UP:
+		CK_STAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) {
+			if (ifa->ifa_addr->sa_family != AF_INET)
+				continue;
+			ia = (struct in_ifaddr *)ifa;
+			if (ia->ia_flags & IFA_ROUTE)
+				continue;
+			ifa_ref(ifa);
+			error = ifa_del_loopback_route(ifa, ifa->ifa_addr);
+			rt_addrmsg(RTM_ADD, ifa, ifa->ifa_ifp->if_fib);
+			error = in_handle_ifaddr_route(RTM_ADD, ia);
+			if (error == 0)
+				ia->ia_flags |= IFA_ROUTE;
+			error = ifa_add_loopback_route(ifa, ifa->ifa_addr);
+			ifa_free(ifa);
+		}
+		break;
+	}
+	NET_EPOCH_EXIT(et);
+}
+EVENTHANDLER_DEFINE(ifnet_event, in_ifnet_event, NULL, EVENTHANDLER_PRI_ANY);
+
 /*
  * Delete all IPv4 multicast address records, and associated link-layer
  * multicast address records, associated with ifp.
  * XXX It looks like domifdetach runs AFTER the link layer cleanup.
  * XXX This should not race with ifma_protospec being set during
  * a new allocation, if it does, we have bigger problems.
  */
 static void
 in_purgemaddrs(struct ifnet *ifp)
 {
 	struct in_multi_head purgeinms;
 	struct in_multi		*inm;
 	struct ifmultiaddr	*ifma, *next;
 
 	SLIST_INIT(&purgeinms);
 	IN_MULTI_LIST_LOCK();
 
 	/*
 	 * Extract list of in_multi associated with the detaching ifp
 	 * which the PF_INET layer is about to release.
 	 * We need to do this as IF_ADDR_LOCK() may be re-acquired
 	 * by code further down.
 	 */
 	IF_ADDR_WLOCK(ifp);
  restart:
 	CK_STAILQ_FOREACH_SAFE(ifma, &ifp->if_multiaddrs, ifma_link, next) {
 		if (ifma->ifma_addr->sa_family != AF_INET ||
 		    ifma->ifma_protospec == NULL)
 			continue;
 		inm = (struct in_multi *)ifma->ifma_protospec;
 		inm_rele_locked(&purgeinms, inm);
 		if (__predict_false(ifma_restart)) {
 			ifma_restart = true;
 			goto restart;
 		}
 	}
 	IF_ADDR_WUNLOCK(ifp);
 
 	inm_release_list_deferred(&purgeinms);
 	igmp_ifdetach(ifp);
 	IN_MULTI_LIST_UNLOCK();
 }
 
 struct in_llentry {
 	struct llentry		base;
 };
 
 #define	IN_LLTBL_DEFAULT_HSIZE	32
 #define	IN_LLTBL_HASH(k, h) \
 	(((((((k >> 8) ^ k) >> 8) ^ k) >> 8) ^ k) & ((h) - 1))
 
 /*
  * Do actual deallocation of @lle.
  */
 static void
 in_lltable_destroy_lle_unlocked(epoch_context_t ctx)
 {
 	struct llentry *lle;
 
 	lle = __containerof(ctx, struct llentry, lle_epoch_ctx);
 	LLE_LOCK_DESTROY(lle);
 	LLE_REQ_DESTROY(lle);
 	free(lle, M_LLTABLE);
 }
 
 /*
  * Called by LLE_FREE_LOCKED when number of references
  * drops to zero.
  */
 static void
 in_lltable_destroy_lle(struct llentry *lle)
 {
 
 	LLE_WUNLOCK(lle);
 	NET_EPOCH_CALL(in_lltable_destroy_lle_unlocked, &lle->lle_epoch_ctx);
 }
 
 static struct llentry *
 in_lltable_new(struct in_addr addr4, u_int flags)
 {
 	struct in_llentry *lle;
 
 	lle = malloc(sizeof(struct in_llentry), M_LLTABLE, M_NOWAIT | M_ZERO);
 	if (lle == NULL)		/* NB: caller generates msg */
 		return NULL;
 
 	/*
 	 * For IPv4 this will trigger "arpresolve" to generate
 	 * an ARP request.
 	 */
 	lle->base.la_expire = time_uptime; /* mark expired */
 	lle->base.r_l3addr.addr4 = addr4;
 	lle->base.lle_refcnt = 1;
 	lle->base.lle_free = in_lltable_destroy_lle;
 	LLE_LOCK_INIT(&lle->base);
 	LLE_REQ_INIT(&lle->base);
 	callout_init(&lle->base.lle_timer, 1);
 
 	return (&lle->base);
 }
 
 #define IN_ARE_MASKED_ADDR_EQUAL(d, a, m)	(		\
 	((((d).s_addr ^ (a).s_addr) & (m).s_addr)) == 0 )
 
 static int
 in_lltable_match_prefix(const struct sockaddr *saddr,
     const struct sockaddr *smask, u_int flags, struct llentry *lle)
 {
 	struct in_addr addr, mask, lle_addr;
 
 	addr = ((const struct sockaddr_in *)saddr)->sin_addr;
 	mask = ((const struct sockaddr_in *)smask)->sin_addr;
 	lle_addr.s_addr = ntohl(lle->r_l3addr.addr4.s_addr);
 
 	if (IN_ARE_MASKED_ADDR_EQUAL(lle_addr, addr, mask) == 0)
 		return (0);
 
 	if (lle->la_flags & LLE_IFADDR) {
 		/*
 		 * Delete LLE_IFADDR records IFF address & flag matches.
 		 * Note that addr is the interface address within prefix
 		 * being matched.
 		 * Note also we should handle 'ifdown' cases without removing
 		 * ifaddr macs.
 		 */
 		if (addr.s_addr == lle_addr.s_addr && (flags & LLE_STATIC) != 0)
 			return (1);
 		return (0);
 	}
 
 	/* flags & LLE_STATIC means deleting both dynamic and static entries */
 	if ((flags & LLE_STATIC) || !(lle->la_flags & LLE_STATIC))
 		return (1);
 
 	return (0);
 }
 
 static void
 in_lltable_free_entry(struct lltable *llt, struct llentry *lle)
 {
 	size_t pkts_dropped;
 
 	LLE_WLOCK_ASSERT(lle);
 	KASSERT(llt != NULL, ("lltable is NULL"));
 
 	/* Unlink entry from table if not already */
 	if ((lle->la_flags & LLE_LINKED) != 0) {
 		IF_AFDATA_WLOCK_ASSERT(llt->llt_ifp);
 		lltable_unlink_entry(llt, lle);
 	}
 
 	/* Drop hold queue */
 	pkts_dropped = llentry_free(lle);
 	ARPSTAT_ADD(dropped, pkts_dropped);
 }
 
 static int
 in_lltable_rtcheck(struct ifnet *ifp, u_int flags, const struct sockaddr *l3addr)
 {
 	struct nhop_object *nh;
 	struct in_addr addr;
 
 	KASSERT(l3addr->sa_family == AF_INET,
 	    ("sin_family %d", l3addr->sa_family));
 
 	addr = ((const struct sockaddr_in *)l3addr)->sin_addr;
 
 	nh = fib4_lookup(ifp->if_fib, addr, 0, NHR_NONE, 0);
 	if (nh == NULL)
 		return (EINVAL);
 
 	/*
 	 * If the gateway for an existing host route matches the target L3
 	 * address, which is a special route inserted by some implementation
 	 * such as MANET, and the interface is of the correct type, then
 	 * allow for ARP to proceed.
 	 */
 	if (nh->nh_flags & NHF_GATEWAY) {
 		if (!(nh->nh_flags & NHF_HOST) || nh->nh_ifp->if_type != IFT_ETHER ||
 		    (nh->nh_ifp->if_flags & (IFF_NOARP | IFF_STATICARP)) != 0 ||
 		    memcmp(nh->gw_sa.sa_data, l3addr->sa_data,
 		    sizeof(in_addr_t)) != 0) {
 			return (EINVAL);
 		}
 	}
 
 	/*
 	 * Make sure that at least the destination address is covered
 	 * by the route. This is for handling the case where 2 or more
 	 * interfaces have the same prefix. An incoming packet arrives
 	 * on one interface and the corresponding outgoing packet leaves
 	 * another interface.
 	 */
 	if ((nh->nh_ifp != ifp) && (nh->nh_flags & NHF_HOST) == 0) {
 		struct in_ifaddr *ia = (struct in_ifaddr *)ifaof_ifpforaddr(l3addr, ifp);
 		struct in_addr dst_addr, mask_addr;
 
 		if (ia == NULL)
 			return (EINVAL);
 
 		/*
 		 * ifaof_ifpforaddr() returns _best matching_ IFA.
 		 * It is possible that ifa prefix does not cover our address.
 		 * Explicitly verify and fail if that's the case.
 		 */
 		dst_addr = IA_SIN(ia)->sin_addr;
 		mask_addr.s_addr = htonl(ia->ia_subnetmask);
 
 		if (!IN_ARE_MASKED_ADDR_EQUAL(dst_addr, addr, mask_addr))
 			return (EINVAL);
 	}
 
 	return (0);
 }
 
 static inline uint32_t
 in_lltable_hash_dst(const struct in_addr dst, uint32_t hsize)
 {
 
 	return (IN_LLTBL_HASH(dst.s_addr, hsize));
 }
 
 static uint32_t
 in_lltable_hash(const struct llentry *lle, uint32_t hsize)
 {
 
 	return (in_lltable_hash_dst(lle->r_l3addr.addr4, hsize));
 }
 
 static void
 in_lltable_fill_sa_entry(const struct llentry *lle, struct sockaddr *sa)
 {
 	struct sockaddr_in *sin;
 
 	sin = (struct sockaddr_in *)sa;
 	bzero(sin, sizeof(*sin));
 	sin->sin_family = AF_INET;
 	sin->sin_len = sizeof(*sin);
 	sin->sin_addr = lle->r_l3addr.addr4;
 }
 
 static inline struct llentry *
 in_lltable_find_dst(struct lltable *llt, struct in_addr dst)
 {
 	struct llentry *lle;
 	struct llentries *lleh;
 	u_int hashidx;
 
 	hashidx = in_lltable_hash_dst(dst, llt->llt_hsize);
 	lleh = &llt->lle_head[hashidx];
 	CK_LIST_FOREACH(lle, lleh, lle_next) {
 		if (lle->la_flags & LLE_DELETED)
 			continue;
 		if (lle->r_l3addr.addr4.s_addr == dst.s_addr)
 			break;
 	}
 
 	return (lle);
 }
 
 static void
 in_lltable_delete_entry(struct lltable *llt, struct llentry *lle)
 {
 
 	lle->la_flags |= LLE_DELETED;
 	EVENTHANDLER_INVOKE(lle_event, lle, LLENTRY_DELETED);
 #ifdef DIAGNOSTIC
 	log(LOG_INFO, "ifaddr cache = %p is deleted\n", lle);
 #endif
 	llentry_free(lle);
 }
 
 static struct llentry *
 in_lltable_alloc(struct lltable *llt, u_int flags, const struct sockaddr *l3addr)
 {
 	const struct sockaddr_in *sin = (const struct sockaddr_in *)l3addr;
 	struct ifnet *ifp = llt->llt_ifp;
 	struct llentry *lle;
 	char linkhdr[LLE_MAX_LINKHDR];
 	size_t linkhdrsize;
 	int lladdr_off;
 
 	KASSERT(l3addr->sa_family == AF_INET,
 	    ("sin_family %d", l3addr->sa_family));
 
 	/*
 	 * A route that covers the given address must have
 	 * been installed 1st because we are doing a resolution,
 	 * verify this.
 	 */
 	if (!(flags & LLE_IFADDR) &&
 	    in_lltable_rtcheck(ifp, flags, l3addr) != 0)
 		return (NULL);
 
 	lle = in_lltable_new(sin->sin_addr, flags);
 	if (lle == NULL) {
 		log(LOG_INFO, "lla_lookup: new lle malloc failed\n");
 		return (NULL);
 	}
 	lle->la_flags = flags;
 	if (flags & LLE_STATIC)
 		lle->r_flags |= RLLE_VALID;
 	if ((flags & LLE_IFADDR) == LLE_IFADDR) {
 		linkhdrsize = LLE_MAX_LINKHDR;
 		if (lltable_calc_llheader(ifp, AF_INET, IF_LLADDR(ifp),
 		    linkhdr, &linkhdrsize, &lladdr_off) != 0) {
 			in_lltable_free_entry(llt, lle);
 			return (NULL);
 		}
 		lltable_set_entry_addr(ifp, lle, linkhdr, linkhdrsize,
 		    lladdr_off);
 		lle->la_flags |= LLE_STATIC;
 		lle->r_flags |= (RLLE_VALID | RLLE_IFADDR);
 	}
 
 	return (lle);
 }
 
 /*
  * Return NULL if not found or marked for deletion.
  * If found return lle read locked.
  */
 static struct llentry *
 in_lltable_lookup(struct lltable *llt, u_int flags, const struct sockaddr *l3addr)
 {
 	const struct sockaddr_in *sin = (const struct sockaddr_in *)l3addr;
 	struct llentry *lle;
 
 	IF_AFDATA_LOCK_ASSERT(llt->llt_ifp);
 	KASSERT(l3addr->sa_family == AF_INET,
 	    ("sin_family %d", l3addr->sa_family));
 	KASSERT((flags & (LLE_UNLOCKED | LLE_EXCLUSIVE)) !=
 	    (LLE_UNLOCKED | LLE_EXCLUSIVE),
 	    ("wrong lle request flags: %#x", flags));
 
 	lle = in_lltable_find_dst(llt, sin->sin_addr);
 	if (lle == NULL)
 		return (NULL);
 	if (flags & LLE_UNLOCKED)
 		return (lle);
 
 	if (flags & LLE_EXCLUSIVE)
 		LLE_WLOCK(lle);
 	else
 		LLE_RLOCK(lle);
 
 	/*
 	 * If the afdata lock is not held, the LLE may have been unlinked while
 	 * we were blocked on the LLE lock.  Check for this case.
 	 */
 	if (__predict_false((lle->la_flags & LLE_LINKED) == 0)) {
 		if (flags & LLE_EXCLUSIVE)
 			LLE_WUNLOCK(lle);
 		else
 			LLE_RUNLOCK(lle);
 		return (NULL);
 	}
 	return (lle);
 }
 
 static int
 in_lltable_dump_entry(struct lltable *llt, struct llentry *lle,
     struct sysctl_req *wr)
 {
 	struct ifnet *ifp = llt->llt_ifp;
 	/* XXX stack use */
 	struct {
 		struct rt_msghdr	rtm;
 		struct sockaddr_in	sin;
 		struct sockaddr_dl	sdl;
 	} arpc;
 	struct sockaddr_dl *sdl;
 	int error;
 
 	bzero(&arpc, sizeof(arpc));
 	/* skip deleted entries */
 	if ((lle->la_flags & LLE_DELETED) == LLE_DELETED)
 		return (0);
 	/* Skip if jailed and not a valid IP of the prison. */
 	lltable_fill_sa_entry(lle,(struct sockaddr *)&arpc.sin);
 	if (prison_if(wr->td->td_ucred, (struct sockaddr *)&arpc.sin) != 0)
 		return (0);
 	/*
 	 * produce a msg made of:
 	 *  struct rt_msghdr;
 	 *  struct sockaddr_in; (IPv4)
 	 *  struct sockaddr_dl;
 	 */
 	arpc.rtm.rtm_msglen = sizeof(arpc);
 	arpc.rtm.rtm_version = RTM_VERSION;
 	arpc.rtm.rtm_type = RTM_GET;
 	arpc.rtm.rtm_flags = RTF_UP;
 	arpc.rtm.rtm_addrs = RTA_DST | RTA_GATEWAY;
 
 	/* publish */
 	if (lle->la_flags & LLE_PUB)
 		arpc.rtm.rtm_flags |= RTF_ANNOUNCE;
 
 	sdl = &arpc.sdl;
 	sdl->sdl_family = AF_LINK;
 	sdl->sdl_len = sizeof(*sdl);
 	sdl->sdl_index = ifp->if_index;
 	sdl->sdl_type = ifp->if_type;
 	if ((lle->la_flags & LLE_VALID) == LLE_VALID) {
 		sdl->sdl_alen = ifp->if_addrlen;
 		bcopy(lle->ll_addr, LLADDR(sdl), ifp->if_addrlen);
 	} else {
 		sdl->sdl_alen = 0;
 		bzero(LLADDR(sdl), ifp->if_addrlen);
 	}
 
 	arpc.rtm.rtm_rmx.rmx_expire =
 	    lle->la_flags & LLE_STATIC ? 0 : lle->la_expire;
 	arpc.rtm.rtm_flags |= (RTF_HOST | RTF_LLDATA);
 	if (lle->la_flags & LLE_STATIC)
 		arpc.rtm.rtm_flags |= RTF_STATIC;
 	if (lle->la_flags & LLE_IFADDR)
 		arpc.rtm.rtm_flags |= RTF_PINNED;
 	arpc.rtm.rtm_index = ifp->if_index;
 	error = SYSCTL_OUT(wr, &arpc, sizeof(arpc));
 
 	return (error);
 }
 
 static void
 in_lltable_post_resolved(struct lltable *llt, struct llentry *lle)
 {
 	struct ifnet *ifp = llt->llt_ifp;
 
 	/* gratuitous ARP */
 	if ((lle->la_flags & LLE_PUB) != 0)
 		arprequest(ifp, &lle->r_l3addr.addr4, &lle->r_l3addr.addr4,
 		    lle->ll_addr);
 }
 
 static struct lltable *
 in_lltattach(struct ifnet *ifp)
 {
 	struct lltable *llt;
 
 	llt = lltable_allocate_htbl(IN_LLTBL_DEFAULT_HSIZE);
  	llt->llt_af = AF_INET;
  	llt->llt_ifp = ifp;
 
 	llt->llt_lookup = in_lltable_lookup;
 	llt->llt_alloc_entry = in_lltable_alloc;
 	llt->llt_delete_entry = in_lltable_delete_entry;
 	llt->llt_dump_entry = in_lltable_dump_entry;
 	llt->llt_hash = in_lltable_hash;
 	llt->llt_fill_sa_entry = in_lltable_fill_sa_entry;
 	llt->llt_free_entry = in_lltable_free_entry;
 	llt->llt_match_prefix = in_lltable_match_prefix;
 	llt->llt_mark_used = llentry_mark_used;
 	llt->llt_post_resolved = in_lltable_post_resolved;
  	lltable_link(llt);
 
 	return (llt);
 }
 
 struct lltable *
 in_lltable_get(struct ifnet *ifp)
 {
 	struct lltable *llt = NULL;
 
 	void *afdata_ptr = ifp->if_afdata[AF_INET];
 	if (afdata_ptr != NULL)
 		llt = ((struct in_ifinfo *)afdata_ptr)->ii_llt;
 	return (llt);
 }
 
 void *
 in_domifattach(struct ifnet *ifp)
 {
 	struct in_ifinfo *ii;
 
 	ii = malloc(sizeof(struct in_ifinfo), M_IFADDR, M_WAITOK|M_ZERO);
 
 	ii->ii_llt = in_lltattach(ifp);
 	ii->ii_igmp = igmp_domifattach(ifp);
 
 	return (ii);
 }
 
 void
 in_domifdetach(struct ifnet *ifp, void *aux)
 {
 	struct in_ifinfo *ii = (struct in_ifinfo *)aux;
 
 	igmp_domifdetach(ifp);
 	lltable_free(ii->ii_llt);
 	free(ii, M_IFADDR);
 }
diff --git a/sys/netinet/in_var.h b/sys/netinet/in_var.h
index a6902159e739..c3f936b444dc 100644
--- a/sys/netinet/in_var.h
+++ b/sys/netinet/in_var.h
@@ -1,469 +1,468 @@
 /*-
  * SPDX-License-Identifier: BSD-3-Clause
  *
  * Copyright (c) 1985, 1986, 1993
  *	The Regents of the University of California.  All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 3. Neither the name of the University nor the names of its contributors
  *    may be used to endorse or promote products derived from this software
  *    without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  *	@(#)in_var.h	8.2 (Berkeley) 1/9/95
  * $FreeBSD$
  */
 
 #ifndef _NETINET_IN_VAR_H_
 #define _NETINET_IN_VAR_H_
 
 /*
  * Argument structure for SIOCAIFADDR.
  */
 struct	in_aliasreq {
 	char	ifra_name[IFNAMSIZ];		/* if name, e.g. "en0" */
 	struct	sockaddr_in ifra_addr;
 	struct	sockaddr_in ifra_broadaddr;
 #define ifra_dstaddr ifra_broadaddr
 	struct	sockaddr_in ifra_mask;
 	int	ifra_vhid;
 };
 
 #ifdef _KERNEL
 #include <sys/queue.h>
 #include <sys/fnv_hash.h>
 #include <sys/tree.h>
 
 struct igmp_ifsoftc;
 struct in_multi;
 struct lltable;
 SLIST_HEAD(in_multi_head, in_multi);
 
 /*
  * IPv4 per-interface state.
  */
 struct in_ifinfo {
 	struct lltable		*ii_llt;	/* ARP state */
 	struct igmp_ifsoftc	*ii_igmp;	/* IGMP state */
 	struct in_multi		*ii_allhosts;	/* 224.0.0.1 membership */
 };
 
 /*
  * Interface address, Internet version.  One of these structures
  * is allocated for each Internet address on an interface.
  * The ifaddr structure contains the protocol-independent part
  * of the structure and is assumed to be first.
  */
 struct in_ifaddr {
 	struct	ifaddr ia_ifa;		/* protocol-independent info */
 #define	ia_ifp		ia_ifa.ifa_ifp
 #define ia_flags	ia_ifa.ifa_flags
 					/* ia_subnet{,mask} in host order */
 	u_long	ia_subnet;		/* subnet address */
 	u_long	ia_subnetmask;		/* mask of subnet */
 	CK_LIST_ENTRY(in_ifaddr) ia_hash;	/* hash of internet addresses */
 	CK_STAILQ_ENTRY(in_ifaddr) ia_link;	/* list of internet addresses */
 	struct	sockaddr_in ia_addr;	/* reserve space for interface name */
 	struct	sockaddr_in ia_dstaddr; /* reserve space for broadcast addr */
 #define	ia_broadaddr	ia_dstaddr
 	struct	sockaddr_in ia_sockmask; /* reserve space for general netmask */
 	struct	callout ia_garp_timer;	/* timer for retransmitting GARPs */
 	int	ia_garp_count;		/* count of retransmitted GARPs */
 };
 
 /*
  * Given a pointer to an in_ifaddr (ifaddr),
  * return a pointer to the addr as a sockaddr_in.
  */
 #define IA_SIN(ia)    (&(((struct in_ifaddr *)(ia))->ia_addr))
 #define IA_DSTSIN(ia) (&(((struct in_ifaddr *)(ia))->ia_dstaddr))
 #define IA_MASKSIN(ia) (&(((struct in_ifaddr *)(ia))->ia_sockmask))
 
 #define IN_LNAOF(in, ifa) \
 	((ntohl((in).s_addr) & ~((struct in_ifaddr *)(ifa)->ia_subnetmask))
 
 extern	u_char	inetctlerrmap[];
 
 #define LLTABLE(ifp)	\
 	((struct in_ifinfo *)(ifp)->if_afdata[AF_INET])->ii_llt
 /*
  * Hash table for IP addresses.
  */
 CK_STAILQ_HEAD(in_ifaddrhead, in_ifaddr);
 CK_LIST_HEAD(in_ifaddrhashhead, in_ifaddr);
 
 VNET_DECLARE(struct in_ifaddrhashhead *, in_ifaddrhashtbl);
 VNET_DECLARE(struct in_ifaddrhead, in_ifaddrhead);
 VNET_DECLARE(u_long, in_ifaddrhmask);		/* mask for hash table */
 
 #define	V_in_ifaddrhashtbl	VNET(in_ifaddrhashtbl)
 #define	V_in_ifaddrhead		VNET(in_ifaddrhead)
 #define	V_in_ifaddrhmask	VNET(in_ifaddrhmask)
 
 #define INADDR_NHASH_LOG2       9
 #define INADDR_NHASH		(1 << INADDR_NHASH_LOG2)
 #define INADDR_HASHVAL(x)	fnv_32_buf((&(x)), sizeof(x), FNV1_32_INIT)
 #define INADDR_HASH(x) \
 	(&V_in_ifaddrhashtbl[INADDR_HASHVAL(x) & V_in_ifaddrhmask])
 
 /*
  * Macro for finding the internet address structure (in_ifaddr)
  * corresponding to one of our IP addresses (in_addr).
  */
 #define INADDR_TO_IFADDR(addr, ia) \
 	/* struct in_addr addr; */ \
 	/* struct in_ifaddr *ia; */ \
 do {									\
 	NET_EPOCH_ASSERT();						\
 	CK_LIST_FOREACH(ia, INADDR_HASH((addr).s_addr), ia_hash)	\
 		if (IA_SIN(ia)->sin_addr.s_addr == (addr).s_addr)	\
 			break;						\
 } while (0)
 
 /*
  * Macro for finding the interface (ifnet structure) corresponding to one
  * of our IP addresses.
  */
 #define INADDR_TO_IFP(addr, ifp) \
 	/* struct in_addr addr; */ \
 	/* struct ifnet *ifp; */ \
 { \
 	struct in_ifaddr *ia; \
 \
 	INADDR_TO_IFADDR(addr, ia); \
 	(ifp) = (ia == NULL) ? NULL : ia->ia_ifp; \
 }
 
 /*
  * Macro for finding the internet address structure (in_ifaddr) corresponding
  * to a given interface (ifnet structure).
  */
 #define IFP_TO_IA(ifp, ia)						\
 	/* struct ifnet *ifp; */					\
 	/* struct in_ifaddr *ia; */					\
 do {									\
 	NET_EPOCH_ASSERT();						\
 	for ((ia) = CK_STAILQ_FIRST(&V_in_ifaddrhead);			\
 	    (ia) != NULL && (ia)->ia_ifp != (ifp);			\
 	    (ia) = CK_STAILQ_NEXT((ia), ia_link))			\
 		continue;						\
 } while (0)
 
 /*
  * Legacy IPv4 IGMP per-link structure.
  */
 struct router_info {
 	struct ifnet *rti_ifp;
 	int    rti_type; /* type of router which is querier on this interface */
 	int    rti_time; /* # of slow timeouts since last old query */
 	SLIST_ENTRY(router_info) rti_list;
 };
 
 /*
  * IPv4 multicast IGMP-layer source entry.
  */
 struct ip_msource {
 	RB_ENTRY(ip_msource)	ims_link;	/* RB tree links */
 	in_addr_t		ims_haddr;	/* host byte order */
 	struct ims_st {
 		uint16_t	ex;		/* # of exclusive members */
 		uint16_t	in;		/* # of inclusive members */
 	}			ims_st[2];	/* state at t0, t1 */
 	uint8_t			ims_stp;	/* pending query */
 };
 
 /*
  * IPv4 multicast PCB-layer source entry.
  */
 struct in_msource {
 	RB_ENTRY(ip_msource)	ims_link;	/* RB tree links */
 	in_addr_t		ims_haddr;	/* host byte order */
 	uint8_t			imsl_st[2];	/* state before/at commit */
 };
 
 RB_HEAD(ip_msource_tree, ip_msource);	/* define struct ip_msource_tree */
 
 static __inline int
 ip_msource_cmp(const struct ip_msource *a, const struct ip_msource *b)
 {
 
 	if (a->ims_haddr < b->ims_haddr)
 		return (-1);
 	if (a->ims_haddr == b->ims_haddr)
 		return (0);
 	return (1);
 }
 RB_PROTOTYPE(ip_msource_tree, ip_msource, ims_link, ip_msource_cmp);
 
 /*
  * IPv4 multicast PCB-layer group filter descriptor.
  */
 struct in_mfilter {
 	struct ip_msource_tree	imf_sources; /* source list for (S,G) */
 	u_long			imf_nsrc;    /* # of source entries */
 	uint8_t			imf_st[2];   /* state before/at commit */
 	struct in_multi	       *imf_inm;     /* associated multicast address */
 	STAILQ_ENTRY(in_mfilter) imf_entry;  /* list entry */
 };
 
 /*
  * Helper types and functions for IPv4 multicast filters.
  */
 STAILQ_HEAD(ip_mfilter_head, in_mfilter);
 
 struct in_mfilter *ip_mfilter_alloc(int mflags, int st0, int st1);
 void ip_mfilter_free(struct in_mfilter *);
 
 static inline void
 ip_mfilter_init(struct ip_mfilter_head *head)
 {
 
 	STAILQ_INIT(head);
 }
 
 static inline struct in_mfilter *
 ip_mfilter_first(const struct ip_mfilter_head *head)
 {
 
 	return (STAILQ_FIRST(head));
 }
 
 static inline void
 ip_mfilter_insert(struct ip_mfilter_head *head, struct in_mfilter *imf)
 {
 
 	STAILQ_INSERT_TAIL(head, imf, imf_entry);
 }
 
 static inline void
 ip_mfilter_remove(struct ip_mfilter_head *head, struct in_mfilter *imf)
 {
 
 	STAILQ_REMOVE(head, imf, in_mfilter, imf_entry);
 }
 
 #define	IP_MFILTER_FOREACH(imf, head) \
 	STAILQ_FOREACH(imf, head, imf_entry)
 
 static inline size_t
 ip_mfilter_count(struct ip_mfilter_head *head)
 {
 	struct in_mfilter *imf;
 	size_t num = 0;
 
 	STAILQ_FOREACH(imf, head, imf_entry)
 		num++;
 	return (num);
 }
 
 /*
  * IPv4 group descriptor.
  *
  * For every entry on an ifnet's if_multiaddrs list which represents
  * an IP multicast group, there is one of these structures.
  *
  * If any source filters are present, then a node will exist in the RB-tree
  * to permit fast lookup by source whenever an operation takes place.
  * This permits pre-order traversal when we issue reports.
  * Source filter trees are kept separately from the socket layer to
  * greatly simplify locking.
  *
  * When IGMPv3 is active, inm_timer is the response to group query timer.
  * The state-change timer inm_sctimer is separate; whenever state changes
  * for the group the state change record is generated and transmitted,
  * and kept if retransmissions are necessary.
  *
  * FUTURE: inm_link is now only used when groups are being purged
  * on a detaching ifnet. It could be demoted to a SLIST_ENTRY, but
  * because it is at the very start of the struct, we can't do this
  * w/o breaking the ABI for ifmcstat.
  */
 struct in_multi {
 	LIST_ENTRY(in_multi) inm_link;	/* to-be-released by in_ifdetach */
 	struct	in_addr inm_addr;	/* IP multicast address, convenience */
 	struct	ifnet *inm_ifp;		/* back pointer to ifnet */
 	struct	ifmultiaddr *inm_ifma;	/* back pointer to ifmultiaddr */
 	u_int	inm_timer;		/* IGMPv1/v2 group / v3 query timer */
 	u_int	inm_state;		/* state of the membership */
 	void	*inm_rti;		/* unused, legacy field */
 	u_int	inm_refcount;		/* reference count */
 
 	/* New fields for IGMPv3 follow. */
 	struct igmp_ifsoftc	*inm_igi;	/* IGMP info */
 	SLIST_ENTRY(in_multi)	 inm_nrele;	/* to-be-released by IGMP */
 	struct ip_msource_tree	 inm_srcs;	/* tree of sources */
 	u_long			 inm_nsrc;	/* # of tree entries */
 
 	struct mbufq		 inm_scq;	/* queue of pending
 						 * state-change packets */
 	struct timeval		 inm_lastgsrtv;	/* Time of last G-S-R query */
 	uint16_t		 inm_sctimer;	/* state-change timer */
 	uint16_t		 inm_scrv;	/* state-change rexmit count */
 
 	/*
 	 * SSM state counters which track state at T0 (the time the last
 	 * state-change report's RV timer went to zero) and T1
 	 * (time of pending report, i.e. now).
 	 * Used for computing IGMPv3 state-change reports. Several refcounts
 	 * are maintained here to optimize for common use-cases.
 	 */
 	struct inm_st {
 		uint16_t	iss_fmode;	/* IGMP filter mode */
 		uint16_t	iss_asm;	/* # of ASM listeners */
 		uint16_t	iss_ex;		/* # of exclusive members */
 		uint16_t	iss_in;		/* # of inclusive members */
 		uint16_t	iss_rec;	/* # of recorded sources */
 	}			inm_st[2];	/* state at t0, t1 */
 };
 
 /*
  * Helper function to derive the filter mode on a source entry
  * from its internal counters. Predicates are:
  *  A source is only excluded if all listeners exclude it.
  *  A source is only included if no listeners exclude it,
  *  and at least one listener includes it.
  * May be used by ifmcstat(8).
  */
 static __inline uint8_t
 ims_get_mode(const struct in_multi *inm, const struct ip_msource *ims,
     uint8_t t)
 {
 
 	t = !!t;
 	if (inm->inm_st[t].iss_ex > 0 &&
 	    inm->inm_st[t].iss_ex == ims->ims_st[t].ex)
 		return (MCAST_EXCLUDE);
 	else if (ims->ims_st[t].in > 0 && ims->ims_st[t].ex == 0)
 		return (MCAST_INCLUDE);
 	return (MCAST_UNDEFINED);
 }
 
 #ifdef SYSCTL_DECL
 SYSCTL_DECL(_net_inet);
 SYSCTL_DECL(_net_inet_ip);
 SYSCTL_DECL(_net_inet_raw);
 #endif
 
 /*
  * Lock macros for IPv4 layer multicast address lists.  IPv4 lock goes
  * before link layer multicast locks in the lock order.  In most cases,
  * consumers of IN_*_MULTI() macros should acquire the locks before
  * calling them; users of the in_{add,del}multi() functions should not.
  */
 extern struct mtx in_multi_list_mtx;
 extern struct sx in_multi_sx;
 
 #define	IN_MULTI_LIST_LOCK()		mtx_lock(&in_multi_list_mtx)
 #define	IN_MULTI_LIST_UNLOCK()	mtx_unlock(&in_multi_list_mtx)
 #define	IN_MULTI_LIST_LOCK_ASSERT()	mtx_assert(&in_multi_list_mtx, MA_OWNED)
 #define	IN_MULTI_LIST_UNLOCK_ASSERT() mtx_assert(&in_multi_list_mtx, MA_NOTOWNED)
 
 #define	IN_MULTI_LOCK()		sx_xlock(&in_multi_sx)
 #define	IN_MULTI_UNLOCK()	sx_xunlock(&in_multi_sx)
 #define	IN_MULTI_LOCK_ASSERT()	sx_assert(&in_multi_sx, SA_XLOCKED)
 #define	IN_MULTI_UNLOCK_ASSERT() sx_assert(&in_multi_sx, SA_XUNLOCKED)
 
 void inm_disconnect(struct in_multi *inm);
 extern int ifma_restart;
 
 /* Acquire an in_multi record. */
 static __inline void
 inm_acquire_locked(struct in_multi *inm)
 {
 
 	IN_MULTI_LIST_LOCK_ASSERT();
 	++inm->inm_refcount;
 }
 
 static __inline void
 inm_acquire(struct in_multi *inm)
 {
 	IN_MULTI_LIST_LOCK();
 	inm_acquire_locked(inm);
 	IN_MULTI_LIST_UNLOCK();
 }
 
 static __inline void
 inm_rele_locked(struct in_multi_head *inmh, struct in_multi *inm)
 {
 	MPASS(inm->inm_refcount > 0);
 	IN_MULTI_LIST_LOCK_ASSERT();
 
 	if (--inm->inm_refcount == 0) {
 		MPASS(inmh != NULL);
 		inm_disconnect(inm);
 		inm->inm_ifma->ifma_protospec = NULL;
 		SLIST_INSERT_HEAD(inmh, inm, inm_nrele);
 	}
 }
 
 /*
  * Return values for imo_multi_filter().
  */
 #define MCAST_PASS		0	/* Pass */
 #define MCAST_NOTGMEMBER	1	/* This host not a member of group */
 #define MCAST_NOTSMEMBER	2	/* This host excluded source */
 #define MCAST_MUTED		3	/* [deprecated] */
 
 struct rib_head;
 struct	ip_moptions;
 
 struct in_multi *inm_lookup_locked(struct ifnet *, const struct in_addr);
 struct in_multi *inm_lookup(struct ifnet *, const struct in_addr);
 int	imo_multi_filter(const struct ip_moptions *, const struct ifnet *,
 	    const struct sockaddr *, const struct sockaddr *);
 void	inm_commit(struct in_multi *);
 void	inm_clear_recorded(struct in_multi *);
 void	inm_print(const struct in_multi *);
 int	inm_record_source(struct in_multi *inm, const in_addr_t);
 void	inm_release_deferred(struct in_multi *);
 void	inm_release_list_deferred(struct in_multi_head *);
 void	inm_release_wait(void *);
 int	in_joingroup(struct ifnet *, const struct in_addr *,
 	    /*const*/ struct in_mfilter *, struct in_multi **);
 int	in_joingroup_locked(struct ifnet *, const struct in_addr *,
 	    /*const*/ struct in_mfilter *, struct in_multi **);
 int	in_leavegroup(struct in_multi *, /*const*/ struct in_mfilter *);
 int	in_leavegroup_locked(struct in_multi *,
 	    /*const*/ struct in_mfilter *);
 int	in_control(struct socket *, u_long, caddr_t, struct ifnet *,
 	    struct thread *);
 int	in_addprefix(struct in_ifaddr *);
 int	in_scrubprefix(struct in_ifaddr *, u_int);
 void	in_ifscrub_all(void);
-int	in_handle_ifaddr_route(int, struct in_ifaddr *);
 void	ip_input(struct mbuf *);
 void	ip_direct_input(struct mbuf *);
 void	in_ifadown(struct ifaddr *ifa, int);
 struct	mbuf	*ip_tryforward(struct mbuf *);
 void	*in_domifattach(struct ifnet *);
 void	in_domifdetach(struct ifnet *, void *);
 struct rib_head *in_inithead(uint32_t fibnum);
 #ifdef VIMAGE
 void	in_detachhead(struct rib_head *rh);
 #endif
 
 #endif /* _KERNEL */
 
 /* INET6 stuff */
 #include <netinet6/in6_var.h>
 
 #endif /* _NETINET_IN_VAR_H_ */
diff --git a/sys/netinet/raw_ip.c b/sys/netinet/raw_ip.c
index 1f631e108a49..0bd874c717e6 100644
--- a/sys/netinet/raw_ip.c
+++ b/sys/netinet/raw_ip.c
@@ -1,1157 +1,1104 @@
 /*-
  * SPDX-License-Identifier: BSD-3-Clause
  *
  * Copyright (c) 1982, 1986, 1988, 1993
  *	The Regents of the University of California.
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 3. Neither the name of the University nor the names of its contributors
  *    may be used to endorse or promote products derived from this software
  *    without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  *	@(#)raw_ip.c	8.7 (Berkeley) 5/15/95
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include "opt_inet.h"
 #include "opt_inet6.h"
 #include "opt_ipsec.h"
 #include "opt_route.h"
 
 #include <sys/param.h>
 #include <sys/jail.h>
 #include <sys/kernel.h>
 #include <sys/eventhandler.h>
 #include <sys/lock.h>
 #include <sys/malloc.h>
 #include <sys/mbuf.h>
 #include <sys/priv.h>
 #include <sys/proc.h>
 #include <sys/protosw.h>
 #include <sys/rwlock.h>
 #include <sys/signalvar.h>
 #include <sys/socket.h>
 #include <sys/socketvar.h>
 #include <sys/sx.h>
 #include <sys/sysctl.h>
 #include <sys/systm.h>
 
 #include <vm/uma.h>
 
 #include <net/if.h>
 #include <net/if_var.h>
 #include <net/route.h>
 #include <net/route/route_ctl.h>
 #include <net/vnet.h>
 
 #include <netinet/in.h>
 #include <netinet/in_systm.h>
 #include <netinet/in_fib.h>
 #include <netinet/in_pcb.h>
 #include <netinet/in_var.h>
 #include <netinet/if_ether.h>
 #include <netinet/ip.h>
 #include <netinet/ip_var.h>
 #include <netinet/ip_mroute.h>
 #include <netinet/ip_icmp.h>
 
 #include <netipsec/ipsec_support.h>
 
 #include <machine/stdarg.h>
 #include <security/mac/mac_framework.h>
 
 VNET_DEFINE(int, ip_defttl) = IPDEFTTL;
 SYSCTL_INT(_net_inet_ip, IPCTL_DEFTTL, ttl, CTLFLAG_VNET | CTLFLAG_RW,
     &VNET_NAME(ip_defttl), 0,
     "Maximum TTL on IP packets");
 
 VNET_DEFINE(struct inpcbinfo, ripcbinfo);
 #define	V_ripcbinfo		VNET(ripcbinfo)
 
 /*
  * Control and data hooks for ipfw, dummynet, divert and so on.
  * The data hooks are not used here but it is convenient
  * to keep them all in one place.
  */
 VNET_DEFINE(ip_fw_chk_ptr_t, ip_fw_chk_ptr) = NULL;
 VNET_DEFINE(ip_fw_ctl_ptr_t, ip_fw_ctl_ptr) = NULL;
 
 int	(*ip_dn_ctl_ptr)(struct sockopt *);
 int	(*ip_dn_io_ptr)(struct mbuf **, struct ip_fw_args *);
 void	(*ip_divert_ptr)(struct mbuf *, bool);
 int	(*ng_ipfw_input_p)(struct mbuf **, struct ip_fw_args *, bool);
 
 #ifdef INET
 /*
  * Hooks for multicast routing. They all default to NULL, so leave them not
  * initialized and rely on BSS being set to 0.
  */
 
 /*
  * The socket used to communicate with the multicast routing daemon.
  */
 VNET_DEFINE(struct socket *, ip_mrouter);
 
 /*
  * The various mrouter and rsvp functions.
  */
 int (*ip_mrouter_set)(struct socket *, struct sockopt *);
 int (*ip_mrouter_get)(struct socket *, struct sockopt *);
 int (*ip_mrouter_done)(void);
 int (*ip_mforward)(struct ip *, struct ifnet *, struct mbuf *,
 		   struct ip_moptions *);
 int (*mrt_ioctl)(u_long, caddr_t, int);
 int (*legal_vif_num)(int);
 u_long (*ip_mcast_src)(int);
 
 int (*rsvp_input_p)(struct mbuf **, int *, int);
 int (*ip_rsvp_vif)(struct socket *, struct sockopt *);
 void (*ip_rsvp_force_done)(struct socket *);
 #endif /* INET */
 
 extern	struct protosw inetsw[];
 
 u_long	rip_sendspace = 9216;
 SYSCTL_ULONG(_net_inet_raw, OID_AUTO, maxdgram, CTLFLAG_RW,
     &rip_sendspace, 0, "Maximum outgoing raw IP datagram size");
 
 u_long	rip_recvspace = 9216;
 SYSCTL_ULONG(_net_inet_raw, OID_AUTO, recvspace, CTLFLAG_RW,
     &rip_recvspace, 0, "Maximum space for incoming raw IP datagrams");
 
 /*
  * Hash functions
  */
 
 #define INP_PCBHASH_RAW_SIZE	256
 #define INP_PCBHASH_RAW(proto, laddr, faddr, mask) \
         (((proto) + (laddr) + (faddr)) % (mask) + 1)
 
 #ifdef INET
 static void
 rip_inshash(struct inpcb *inp)
 {
 	struct inpcbinfo *pcbinfo = inp->inp_pcbinfo;
 	struct inpcbhead *pcbhash;
 	int hash;
 
 	INP_HASH_WLOCK_ASSERT(pcbinfo);
 	INP_WLOCK_ASSERT(inp);
 
 	if (inp->inp_ip_p != 0 &&
 	    inp->inp_laddr.s_addr != INADDR_ANY &&
 	    inp->inp_faddr.s_addr != INADDR_ANY) {
 		hash = INP_PCBHASH_RAW(inp->inp_ip_p, inp->inp_laddr.s_addr,
 		    inp->inp_faddr.s_addr, pcbinfo->ipi_hashmask);
 	} else
 		hash = 0;
 	pcbhash = &pcbinfo->ipi_hashbase[hash];
 	CK_LIST_INSERT_HEAD(pcbhash, inp, inp_hash);
 }
 
 static void
 rip_delhash(struct inpcb *inp)
 {
 
 	INP_HASH_WLOCK_ASSERT(inp->inp_pcbinfo);
 	INP_WLOCK_ASSERT(inp);
 
 	CK_LIST_REMOVE(inp, inp_hash);
 }
 #endif /* INET */
 
 INPCBSTORAGE_DEFINE(ripcbstor, "rawinp", "ripcb", "rip", "riphash");
 
 static void
 rip_init(void *arg __unused)
 {
 
 	in_pcbinfo_init(&V_ripcbinfo, &ripcbstor, INP_PCBHASH_RAW_SIZE, 1);
 }
 VNET_SYSINIT(rip_init, SI_SUB_PROTO_DOMAIN, SI_ORDER_THIRD, rip_init, NULL);
 
 #ifdef VIMAGE
 static void
 rip_destroy(void *unused __unused)
 {
 
 	in_pcbinfo_destroy(&V_ripcbinfo);
 }
 VNET_SYSUNINIT(raw_ip, SI_SUB_PROTO_DOMAIN, SI_ORDER_FOURTH, rip_destroy, NULL);
 #endif
 
 #ifdef INET
 static int
 rip_append(struct inpcb *inp, struct ip *ip, struct mbuf *m,
     struct sockaddr_in *ripsrc)
 {
 	struct socket *so = inp->inp_socket;
 	struct mbuf *n, *opts = NULL;
 
 	INP_LOCK_ASSERT(inp);
 
 #if defined(IPSEC) || defined(IPSEC_SUPPORT)
 	/* check AH/ESP integrity. */
 	if (IPSEC_ENABLED(ipv4) && IPSEC_CHECK_POLICY(ipv4, m, inp) != 0)
 		return (0);
 #endif /* IPSEC */
 #ifdef MAC
 	if (mac_inpcb_check_deliver(inp, m) != 0)
 		return (0);
 #endif
 	/* Check the minimum TTL for socket. */
 	if (inp->inp_ip_minttl && inp->inp_ip_minttl > ip->ip_ttl)
 		return (0);
 
 	if ((n = m_copym(m, 0, M_COPYALL, M_NOWAIT)) == NULL)
 		return (0);
 
 	if ((inp->inp_flags & INP_CONTROLOPTS) ||
 	    (so->so_options & (SO_TIMESTAMP | SO_BINTIME)))
 		ip_savecontrol(inp, &opts, ip, n);
 	SOCKBUF_LOCK(&so->so_rcv);
 	if (sbappendaddr_locked(&so->so_rcv,
 	    (struct sockaddr *)ripsrc, n, opts) == 0) {
 		soroverflow_locked(so);
 		m_freem(n);
 		if (opts)
 			m_freem(opts);
 		return (0);
 	}
 	sorwakeup_locked(so);
 
 	return (1);
 }
 
 struct rip_inp_match_ctx {
 	struct ip *ip;
 	int proto;
 };
 
 static bool
 rip_inp_match1(const struct inpcb *inp, void *v)
 {
 	struct rip_inp_match_ctx *ctx = v;
 
 	if (inp->inp_ip_p != ctx->proto)
 		return (false);
 #ifdef INET6
 	/* XXX inp locking */
 	if ((inp->inp_vflag & INP_IPV4) == 0)
 		return (false);
 #endif
 	if (inp->inp_laddr.s_addr != ctx->ip->ip_dst.s_addr)
 		return (false);
 	if (inp->inp_faddr.s_addr != ctx->ip->ip_src.s_addr)
 		return (false);
 	return (true);
 }
 
 static bool
 rip_inp_match2(const struct inpcb *inp, void *v)
 {
 	struct rip_inp_match_ctx *ctx = v;
 
 	if (inp->inp_ip_p && inp->inp_ip_p != ctx->proto)
 		return (false);
 #ifdef INET6
 	/* XXX inp locking */
 	if ((inp->inp_vflag & INP_IPV4) == 0)
 		return (false);
 #endif
 	if (!in_nullhost(inp->inp_laddr) &&
 	    !in_hosteq(inp->inp_laddr, ctx->ip->ip_dst))
 		return (false);
 	if (!in_nullhost(inp->inp_faddr) &&
 	    !in_hosteq(inp->inp_faddr, ctx->ip->ip_src))
 		return (false);
 	return (true);
 }
 
 /*
  * Setup generic address and protocol structures for raw_input routine, then
  * pass them along with mbuf chain.
  */
 int
 rip_input(struct mbuf **mp, int *offp, int proto)
 {
 	struct rip_inp_match_ctx ctx = {
 		.ip = mtod(*mp, struct ip *),
 		.proto = proto,
 	};
 	struct inpcb_iterator inpi = INP_ITERATOR(&V_ripcbinfo,
 	    INPLOOKUP_RLOCKPCB, rip_inp_match1, &ctx);
 	struct ifnet *ifp;
 	struct mbuf *m = *mp;
 	struct inpcb *inp;
 	struct sockaddr_in ripsrc;
 	int appended;
 
 	*mp = NULL;
 	appended = 0;
 
 	bzero(&ripsrc, sizeof(ripsrc));
 	ripsrc.sin_len = sizeof(ripsrc);
 	ripsrc.sin_family = AF_INET;
 	ripsrc.sin_addr = ctx.ip->ip_src;
 
 	ifp = m->m_pkthdr.rcvif;
 
 	inpi.hash = INP_PCBHASH_RAW(proto, ctx.ip->ip_src.s_addr,
 	    ctx.ip->ip_dst.s_addr, V_ripcbinfo.ipi_hashmask);
 	while ((inp = inp_next(&inpi)) != NULL) {
 		INP_RLOCK_ASSERT(inp);
 		if (jailed_without_vnet(inp->inp_cred) &&
 		    prison_check_ip4(inp->inp_cred, &ctx.ip->ip_dst) != 0) {
 			/*
 			 * XXX: If faddr was bound to multicast group,
 			 * jailed raw socket will drop datagram.
 			 */
 			continue;
 		}
 		appended += rip_append(inp, ctx.ip, m, &ripsrc);
 	}
 
 	inpi.hash = 0;
 	inpi.match = rip_inp_match2;
 	MPASS(inpi.inp == NULL);
 	while ((inp = inp_next(&inpi)) != NULL) {
 		INP_RLOCK_ASSERT(inp);
 		if (jailed_without_vnet(inp->inp_cred) &&
 		    !IN_MULTICAST(ntohl(ctx.ip->ip_dst.s_addr)) &&
 		    prison_check_ip4(inp->inp_cred, &ctx.ip->ip_dst) != 0)
 			/*
 			 * Allow raw socket in jail to receive multicast;
 			 * assume process had PRIV_NETINET_RAW at attach,
 			 * and fall through into normal filter path if so.
 			 */
 			continue;
 		/*
 		 * If this raw socket has multicast state, and we
 		 * have received a multicast, check if this socket
 		 * should receive it, as multicast filtering is now
 		 * the responsibility of the transport layer.
 		 */
 		if (inp->inp_moptions != NULL &&
 		    IN_MULTICAST(ntohl(ctx.ip->ip_dst.s_addr))) {
 			/*
 			 * If the incoming datagram is for IGMP, allow it
 			 * through unconditionally to the raw socket.
 			 *
 			 * In the case of IGMPv2, we may not have explicitly
 			 * joined the group, and may have set IFF_ALLMULTI
 			 * on the interface. imo_multi_filter() may discard
 			 * control traffic we actually need to see.
 			 *
 			 * Userland multicast routing daemons should continue
 			 * filter the control traffic appropriately.
 			 */
 			int blocked;
 
 			blocked = MCAST_PASS;
 			if (proto != IPPROTO_IGMP) {
 				struct sockaddr_in group;
 
 				bzero(&group, sizeof(struct sockaddr_in));
 				group.sin_len = sizeof(struct sockaddr_in);
 				group.sin_family = AF_INET;
 				group.sin_addr = ctx.ip->ip_dst;
 
 				blocked = imo_multi_filter(inp->inp_moptions,
 				    ifp,
 				    (struct sockaddr *)&group,
 				    (struct sockaddr *)&ripsrc);
 			}
 
 			if (blocked != MCAST_PASS) {
 				IPSTAT_INC(ips_notmember);
 				continue;
 			}
 		}
 		appended += rip_append(inp, ctx.ip, m, &ripsrc);
 	}
 	if (appended == 0 &&
 	    inetsw[ip_protox[ctx.ip->ip_p]].pr_input == rip_input) {
 		IPSTAT_INC(ips_noproto);
 		IPSTAT_DEC(ips_delivered);
 		icmp_error(m, ICMP_UNREACH, ICMP_UNREACH_PROTOCOL, 0, 0);
 	} else
 		m_freem(m);
 	return (IPPROTO_DONE);
 }
 
 /*
  * Generate IP header and pass packet to ip_output.  Tack on options user may
  * have setup with control call.
  */
 int
 rip_output(struct mbuf *m, struct socket *so, ...)
 {
 	struct epoch_tracker et;
 	struct ip *ip;
 	int error;
 	struct inpcb *inp = sotoinpcb(so);
 	va_list ap;
 	u_long dst;
 	int flags = ((so->so_options & SO_DONTROUTE) ? IP_ROUTETOIF : 0) |
 	    IP_ALLOWBROADCAST;
 	int cnt, hlen;
 	u_char opttype, optlen, *cp;
 
 	va_start(ap, so);
 	dst = va_arg(ap, u_long);
 	va_end(ap);
 
 	/*
 	 * If the user handed us a complete IP packet, use it.  Otherwise,
 	 * allocate an mbuf for a header and fill it in.
 	 */
 	if ((inp->inp_flags & INP_HDRINCL) == 0) {
 		if (m->m_pkthdr.len + sizeof(struct ip) > IP_MAXPACKET) {
 			m_freem(m);
 			return(EMSGSIZE);
 		}
 		M_PREPEND(m, sizeof(struct ip), M_NOWAIT);
 		if (m == NULL)
 			return(ENOBUFS);
 
 		INP_RLOCK(inp);
 		ip = mtod(m, struct ip *);
 		ip->ip_tos = inp->inp_ip_tos;
 		if (inp->inp_flags & INP_DONTFRAG)
 			ip->ip_off = htons(IP_DF);
 		else
 			ip->ip_off = htons(0);
 		ip->ip_p = inp->inp_ip_p;
 		ip->ip_len = htons(m->m_pkthdr.len);
 		ip->ip_src = inp->inp_laddr;
 		ip->ip_dst.s_addr = dst;
 #ifdef ROUTE_MPATH
 		if (CALC_FLOWID_OUTBOUND) {
 			uint32_t hash_type, hash_val;
 
 			hash_val = fib4_calc_software_hash(ip->ip_src,
 			    ip->ip_dst, 0, 0, ip->ip_p, &hash_type);
 			m->m_pkthdr.flowid = hash_val;
 			M_HASHTYPE_SET(m, hash_type);
 			flags |= IP_NODEFAULTFLOWID;
 		}
 #endif
 		if (jailed(inp->inp_cred)) {
 			/*
 			 * prison_local_ip4() would be good enough but would
 			 * let a source of INADDR_ANY pass, which we do not
 			 * want to see from jails.
 			 */
 			if (ip->ip_src.s_addr == INADDR_ANY) {
 				NET_EPOCH_ENTER(et);
 				error = in_pcbladdr(inp, &ip->ip_dst,
 				    &ip->ip_src, inp->inp_cred);
 				NET_EPOCH_EXIT(et);
 			} else {
 				error = prison_local_ip4(inp->inp_cred,
 				    &ip->ip_src);
 			}
 			if (error != 0) {
 				INP_RUNLOCK(inp);
 				m_freem(m);
 				return (error);
 			}
 		}
 		ip->ip_ttl = inp->inp_ip_ttl;
 	} else {
 		if (m->m_pkthdr.len > IP_MAXPACKET) {
 			m_freem(m);
 			return (EMSGSIZE);
 		}
 		if (m->m_pkthdr.len < sizeof(*ip)) {
 			m_freem(m);
 			return (EINVAL);
 		}
 		m = m_pullup(m, sizeof(*ip));
 		if (m == NULL)
 			return (ENOMEM);
 		ip = mtod(m, struct ip *);
 		hlen = ip->ip_hl << 2;
 		if (m->m_len < hlen) {
 			m = m_pullup(m, hlen);
 			if (m == NULL)
 				return (EINVAL);
 			ip = mtod(m, struct ip *);
 		}
 #ifdef ROUTE_MPATH
 		if (CALC_FLOWID_OUTBOUND) {
 			uint32_t hash_type, hash_val;
 
 			hash_val = fib4_calc_software_hash(ip->ip_dst,
 			    ip->ip_src, 0, 0, ip->ip_p, &hash_type);
 			m->m_pkthdr.flowid = hash_val;
 			M_HASHTYPE_SET(m, hash_type);
 			flags |= IP_NODEFAULTFLOWID;
 		}
 #endif
 		INP_RLOCK(inp);
 		/*
 		 * Don't allow both user specified and setsockopt options,
 		 * and don't allow packet length sizes that will crash.
 		 */
 		if ((hlen < sizeof (*ip))
 		    || ((hlen > sizeof (*ip)) && inp->inp_options)
 		    || (ntohs(ip->ip_len) != m->m_pkthdr.len)) {
 			INP_RUNLOCK(inp);
 			m_freem(m);
 			return (EINVAL);
 		}
 		error = prison_check_ip4(inp->inp_cred, &ip->ip_src);
 		if (error != 0) {
 			INP_RUNLOCK(inp);
 			m_freem(m);
 			return (error);
 		}
 		/*
 		 * Don't allow IP options which do not have the required
 		 * structure as specified in section 3.1 of RFC 791 on
 		 * pages 15-23.
 		 */
 		cp = (u_char *)(ip + 1);
 		cnt = hlen - sizeof (struct ip);
 		for (; cnt > 0; cnt -= optlen, cp += optlen) {
 			opttype = cp[IPOPT_OPTVAL];
 			if (opttype == IPOPT_EOL)
 				break;
 			if (opttype == IPOPT_NOP) {
 				optlen = 1;
 				continue;
 			}
 			if (cnt < IPOPT_OLEN + sizeof(u_char)) {
 				INP_RUNLOCK(inp);
 				m_freem(m);
 				return (EINVAL);
 			}
 			optlen = cp[IPOPT_OLEN];
 			if (optlen < IPOPT_OLEN + sizeof(u_char) ||
 			    optlen > cnt) {
 				INP_RUNLOCK(inp);
 				m_freem(m);
 				return (EINVAL);
 			}
 		}
 		/*
 		 * This doesn't allow application to specify ID of zero,
 		 * but we got this limitation from the beginning of history.
 		 */
 		if (ip->ip_id == 0)
 			ip_fillid(ip);
 
 		/*
 		 * XXX prevent ip_output from overwriting header fields.
 		 */
 		flags |= IP_RAWOUTPUT;
 		IPSTAT_INC(ips_rawout);
 	}
 
 	if (inp->inp_flags & INP_ONESBCAST)
 		flags |= IP_SENDONES;
 
 #ifdef MAC
 	mac_inpcb_create_mbuf(inp, m);
 #endif
 
 	NET_EPOCH_ENTER(et);
 	error = ip_output(m, inp->inp_options, NULL, flags,
 	    inp->inp_moptions, inp);
 	NET_EPOCH_EXIT(et);
 	INP_RUNLOCK(inp);
 	return (error);
 }
 
 /*
  * Raw IP socket option processing.
  *
  * IMPORTANT NOTE regarding access control: Traditionally, raw sockets could
  * only be created by a privileged process, and as such, socket option
  * operations to manage system properties on any raw socket were allowed to
  * take place without explicit additional access control checks.  However,
  * raw sockets can now also be created in jail(), and therefore explicit
  * checks are now required.  Likewise, raw sockets can be used by a process
  * after it gives up privilege, so some caution is required.  For options
  * passed down to the IP layer via ip_ctloutput(), checks are assumed to be
  * performed in ip_ctloutput() and therefore no check occurs here.
  * Unilaterally checking priv_check() here breaks normal IP socket option
  * operations on raw sockets.
  *
  * When adding new socket options here, make sure to add access control
  * checks here as necessary.
  *
  * XXX-BZ inp locking?
  */
 int
 rip_ctloutput(struct socket *so, struct sockopt *sopt)
 {
 	struct	inpcb *inp = sotoinpcb(so);
 	int	error, optval;
 
 	if (sopt->sopt_level != IPPROTO_IP) {
 		if ((sopt->sopt_level == SOL_SOCKET) &&
 		    (sopt->sopt_name == SO_SETFIB)) {
 			inp->inp_inc.inc_fibnum = so->so_fibnum;
 			return (0);
 		}
 		return (EINVAL);
 	}
 
 	error = 0;
 	switch (sopt->sopt_dir) {
 	case SOPT_GET:
 		switch (sopt->sopt_name) {
 		case IP_HDRINCL:
 			optval = inp->inp_flags & INP_HDRINCL;
 			error = sooptcopyout(sopt, &optval, sizeof optval);
 			break;
 
 		case IP_FW3:	/* generic ipfw v.3 functions */
 		case IP_FW_ADD:	/* ADD actually returns the body... */
 		case IP_FW_GET:
 		case IP_FW_TABLE_GETSIZE:
 		case IP_FW_TABLE_LIST:
 		case IP_FW_NAT_GET_CONFIG:
 		case IP_FW_NAT_GET_LOG:
 			if (V_ip_fw_ctl_ptr != NULL)
 				error = V_ip_fw_ctl_ptr(sopt);
 			else
 				error = ENOPROTOOPT;
 			break;
 
 		case IP_DUMMYNET3:	/* generic dummynet v.3 functions */
 		case IP_DUMMYNET_GET:
 			if (ip_dn_ctl_ptr != NULL)
 				error = ip_dn_ctl_ptr(sopt);
 			else
 				error = ENOPROTOOPT;
 			break ;
 
 		case MRT_INIT:
 		case MRT_DONE:
 		case MRT_ADD_VIF:
 		case MRT_DEL_VIF:
 		case MRT_ADD_MFC:
 		case MRT_DEL_MFC:
 		case MRT_VERSION:
 		case MRT_ASSERT:
 		case MRT_API_SUPPORT:
 		case MRT_API_CONFIG:
 		case MRT_ADD_BW_UPCALL:
 		case MRT_DEL_BW_UPCALL:
 			error = priv_check(curthread, PRIV_NETINET_MROUTE);
 			if (error != 0)
 				return (error);
 			error = ip_mrouter_get ? ip_mrouter_get(so, sopt) :
 				EOPNOTSUPP;
 			break;
 
 		default:
 			error = ip_ctloutput(so, sopt);
 			break;
 		}
 		break;
 
 	case SOPT_SET:
 		switch (sopt->sopt_name) {
 		case IP_HDRINCL:
 			error = sooptcopyin(sopt, &optval, sizeof optval,
 					    sizeof optval);
 			if (error)
 				break;
 			if (optval)
 				inp->inp_flags |= INP_HDRINCL;
 			else
 				inp->inp_flags &= ~INP_HDRINCL;
 			break;
 
 		case IP_FW3:	/* generic ipfw v.3 functions */
 		case IP_FW_ADD:
 		case IP_FW_DEL:
 		case IP_FW_FLUSH:
 		case IP_FW_ZERO:
 		case IP_FW_RESETLOG:
 		case IP_FW_TABLE_ADD:
 		case IP_FW_TABLE_DEL:
 		case IP_FW_TABLE_FLUSH:
 		case IP_FW_NAT_CFG:
 		case IP_FW_NAT_DEL:
 			if (V_ip_fw_ctl_ptr != NULL)
 				error = V_ip_fw_ctl_ptr(sopt);
 			else
 				error = ENOPROTOOPT;
 			break;
 
 		case IP_DUMMYNET3:	/* generic dummynet v.3 functions */
 		case IP_DUMMYNET_CONFIGURE:
 		case IP_DUMMYNET_DEL:
 		case IP_DUMMYNET_FLUSH:
 			if (ip_dn_ctl_ptr != NULL)
 				error = ip_dn_ctl_ptr(sopt);
 			else
 				error = ENOPROTOOPT ;
 			break ;
 
 		case IP_RSVP_ON:
 			error = priv_check(curthread, PRIV_NETINET_MROUTE);
 			if (error != 0)
 				return (error);
 			error = ip_rsvp_init(so);
 			break;
 
 		case IP_RSVP_OFF:
 			error = priv_check(curthread, PRIV_NETINET_MROUTE);
 			if (error != 0)
 				return (error);
 			error = ip_rsvp_done();
 			break;
 
 		case IP_RSVP_VIF_ON:
 		case IP_RSVP_VIF_OFF:
 			error = priv_check(curthread, PRIV_NETINET_MROUTE);
 			if (error != 0)
 				return (error);
 			error = ip_rsvp_vif ?
 				ip_rsvp_vif(so, sopt) : EINVAL;
 			break;
 
 		case MRT_INIT:
 		case MRT_DONE:
 		case MRT_ADD_VIF:
 		case MRT_DEL_VIF:
 		case MRT_ADD_MFC:
 		case MRT_DEL_MFC:
 		case MRT_VERSION:
 		case MRT_ASSERT:
 		case MRT_API_SUPPORT:
 		case MRT_API_CONFIG:
 		case MRT_ADD_BW_UPCALL:
 		case MRT_DEL_BW_UPCALL:
 			error = priv_check(curthread, PRIV_NETINET_MROUTE);
 			if (error != 0)
 				return (error);
 			error = ip_mrouter_set ? ip_mrouter_set(so, sopt) :
 					EOPNOTSUPP;
 			break;
 
 		default:
 			error = ip_ctloutput(so, sopt);
 			break;
 		}
 		break;
 	}
 
 	return (error);
 }
 
-/*
- * This function exists solely to receive the PRC_IFDOWN messages which are
- * sent by if_down().  It looks for an ifaddr whose ifa_addr is sa, and calls
- * in_ifadown() to remove all routes corresponding to that address.  It also
- * receives the PRC_IFUP messages from if_up() and reinstalls the interface
- * routes.
- */
 void
 rip_ctlinput(int cmd, struct sockaddr *sa, void *vip)
 {
-	struct in_ifaddr *ia;
-	int err;
-
-	NET_EPOCH_ASSERT();
 
 	switch (cmd) {
-	case PRC_IFDOWN:
-		CK_STAILQ_FOREACH(ia, &V_in_ifaddrhead, ia_link) {
-			if (ia->ia_ifa.ifa_addr == sa
-			    && (ia->ia_flags & IFA_ROUTE)) {
-				ifa_ref(&ia->ia_ifa);
-				/*
-				 * in_scrubprefix() kills the interface route.
-				 */
-				in_scrubprefix(ia, 0);
-				/*
-				 * in_ifadown gets rid of all the rest of the
-				 * routes.  This is not quite the right thing
-				 * to do, but at least if we are running a
-				 * routing process they will come back.
-				 */
-				in_ifadown(&ia->ia_ifa, 0);
-				ifa_free(&ia->ia_ifa);
-				break;
-			}
-		}
-		break;
-
-	case PRC_IFUP:
-		CK_STAILQ_FOREACH(ia, &V_in_ifaddrhead, ia_link) {
-			if (ia->ia_ifa.ifa_addr == sa)
-				break;
-		}
-		if (ia == NULL || (ia->ia_flags & IFA_ROUTE))
-			return;
-		ifa_ref(&ia->ia_ifa);
-
-		err = ifa_del_loopback_route((struct ifaddr *)ia, sa);
-
-		rt_addrmsg(RTM_ADD, &ia->ia_ifa, ia->ia_ifp->if_fib);
-		err = in_handle_ifaddr_route(RTM_ADD, ia);
-		if (err == 0)
-			ia->ia_flags |= IFA_ROUTE;
-
-		err = ifa_add_loopback_route((struct ifaddr *)ia, sa);
-
-		ifa_free(&ia->ia_ifa);
-		break;
 #if defined(IPSEC) || defined(IPSEC_SUPPORT)
 	case PRC_MSGSIZE:
 		if (IPSEC_ENABLED(ipv4))
 			IPSEC_CTLINPUT(ipv4, cmd, sa, vip);
 		break;
 #endif
 	}
 }
 
 static int
 rip_attach(struct socket *so, int proto, struct thread *td)
 {
 	struct inpcb *inp;
 	int error;
 
 	inp = sotoinpcb(so);
 	KASSERT(inp == NULL, ("rip_attach: inp != NULL"));
 
 	error = priv_check(td, PRIV_NETINET_RAW);
 	if (error)
 		return (error);
 	if (proto >= IPPROTO_MAX || proto < 0)
 		return EPROTONOSUPPORT;
 	error = soreserve(so, rip_sendspace, rip_recvspace);
 	if (error)
 		return (error);
 	error = in_pcballoc(so, &V_ripcbinfo);
 	if (error)
 		return (error);
 	inp = (struct inpcb *)so->so_pcb;
 	inp->inp_ip_p = proto;
 	inp->inp_ip_ttl = V_ip_defttl;
 	INP_HASH_WLOCK(&V_ripcbinfo);
 	rip_inshash(inp);
 	INP_HASH_WUNLOCK(&V_ripcbinfo);
 	INP_WUNLOCK(inp);
 	return (0);
 }
 
 static void
 rip_detach(struct socket *so)
 {
 	struct inpcb *inp;
 
 	inp = sotoinpcb(so);
 	KASSERT(inp != NULL, ("rip_detach: inp == NULL"));
 	KASSERT(inp->inp_faddr.s_addr == INADDR_ANY,
 	    ("rip_detach: not closed"));
 
 	/* Disable mrouter first */
 	if (so == V_ip_mrouter && ip_mrouter_done)
 		ip_mrouter_done();
 
 	INP_WLOCK(inp);
 	INP_HASH_WLOCK(&V_ripcbinfo);
 	rip_delhash(inp);
 	INP_HASH_WUNLOCK(&V_ripcbinfo);
 
 	if (ip_rsvp_force_done)
 		ip_rsvp_force_done(so);
 	if (so == V_ip_rsvpd)
 		ip_rsvp_done();
 	in_pcbdetach(inp);
 	in_pcbfree(inp);
 }
 
 static void
 rip_dodisconnect(struct socket *so, struct inpcb *inp)
 {
 	struct inpcbinfo *pcbinfo;
 
 	pcbinfo = inp->inp_pcbinfo;
 	INP_WLOCK(inp);
 	INP_HASH_WLOCK(pcbinfo);
 	rip_delhash(inp);
 	inp->inp_faddr.s_addr = INADDR_ANY;
 	rip_inshash(inp);
 	INP_HASH_WUNLOCK(pcbinfo);
 	SOCK_LOCK(so);
 	so->so_state &= ~SS_ISCONNECTED;
 	SOCK_UNLOCK(so);
 	INP_WUNLOCK(inp);
 }
 
 static void
 rip_abort(struct socket *so)
 {
 	struct inpcb *inp;
 
 	inp = sotoinpcb(so);
 	KASSERT(inp != NULL, ("rip_abort: inp == NULL"));
 
 	rip_dodisconnect(so, inp);
 }
 
 static void
 rip_close(struct socket *so)
 {
 	struct inpcb *inp;
 
 	inp = sotoinpcb(so);
 	KASSERT(inp != NULL, ("rip_close: inp == NULL"));
 
 	rip_dodisconnect(so, inp);
 }
 
 static int
 rip_disconnect(struct socket *so)
 {
 	struct inpcb *inp;
 
 	if ((so->so_state & SS_ISCONNECTED) == 0)
 		return (ENOTCONN);
 
 	inp = sotoinpcb(so);
 	KASSERT(inp != NULL, ("rip_disconnect: inp == NULL"));
 
 	rip_dodisconnect(so, inp);
 	return (0);
 }
 
 static int
 rip_bind(struct socket *so, struct sockaddr *nam, struct thread *td)
 {
 	struct sockaddr_in *addr = (struct sockaddr_in *)nam;
 	struct inpcb *inp;
 	int error;
 
 	if (nam->sa_family != AF_INET)
 		return (EAFNOSUPPORT);
 	if (nam->sa_len != sizeof(*addr))
 		return (EINVAL);
 
 	error = prison_check_ip4(td->td_ucred, &addr->sin_addr);
 	if (error != 0)
 		return (error);
 
 	inp = sotoinpcb(so);
 	KASSERT(inp != NULL, ("rip_bind: inp == NULL"));
 
 	if (CK_STAILQ_EMPTY(&V_ifnet) ||
 	    (addr->sin_family != AF_INET && addr->sin_family != AF_IMPLINK) ||
 	    (addr->sin_addr.s_addr &&
 	     (inp->inp_flags & INP_BINDANY) == 0 &&
 	     ifa_ifwithaddr_check((struct sockaddr *)addr) == 0))
 		return (EADDRNOTAVAIL);
 
 	INP_WLOCK(inp);
 	INP_HASH_WLOCK(&V_ripcbinfo);
 	rip_delhash(inp);
 	inp->inp_laddr = addr->sin_addr;
 	rip_inshash(inp);
 	INP_HASH_WUNLOCK(&V_ripcbinfo);
 	INP_WUNLOCK(inp);
 	return (0);
 }
 
 static int
 rip_connect(struct socket *so, struct sockaddr *nam, struct thread *td)
 {
 	struct sockaddr_in *addr = (struct sockaddr_in *)nam;
 	struct inpcb *inp;
 
 	if (nam->sa_len != sizeof(*addr))
 		return (EINVAL);
 	if (CK_STAILQ_EMPTY(&V_ifnet))
 		return (EADDRNOTAVAIL);
 	if (addr->sin_family != AF_INET && addr->sin_family != AF_IMPLINK)
 		return (EAFNOSUPPORT);
 
 	inp = sotoinpcb(so);
 	KASSERT(inp != NULL, ("rip_connect: inp == NULL"));
 
 	INP_WLOCK(inp);
 	INP_HASH_WLOCK(&V_ripcbinfo);
 	rip_delhash(inp);
 	inp->inp_faddr = addr->sin_addr;
 	rip_inshash(inp);
 	INP_HASH_WUNLOCK(&V_ripcbinfo);
 	soisconnected(so);
 	INP_WUNLOCK(inp);
 	return (0);
 }
 
 static int
 rip_shutdown(struct socket *so)
 {
 	struct inpcb *inp;
 
 	inp = sotoinpcb(so);
 	KASSERT(inp != NULL, ("rip_shutdown: inp == NULL"));
 
 	INP_WLOCK(inp);
 	socantsendmore(so);
 	INP_WUNLOCK(inp);
 	return (0);
 }
 
 static int
 rip_send(struct socket *so, int flags, struct mbuf *m, struct sockaddr *nam,
     struct mbuf *control, struct thread *td)
 {
 	struct inpcb *inp;
 	u_long dst;
 	int error;
 
 	inp = sotoinpcb(so);
 	KASSERT(inp != NULL, ("rip_send: inp == NULL"));
 
 	if (control != NULL) {
 		m_freem(control);
 		control = NULL;
 	}
 
 	/*
 	 * Note: 'dst' reads below are unlocked.
 	 */
 	if (so->so_state & SS_ISCONNECTED) {
 		if (nam) {
 			error = EISCONN;
 			goto release;
 		}
 		dst = inp->inp_faddr.s_addr;	/* Unlocked read. */
 	} else {
 		error = 0;
 		if (nam == NULL)
 			error = ENOTCONN;
 		else if (nam->sa_family != AF_INET)
 			error = EAFNOSUPPORT;
 		else if (nam->sa_len != sizeof(struct sockaddr_in))
 			error = EINVAL;
 		if (error != 0)
 			goto release;
 		dst = ((struct sockaddr_in *)nam)->sin_addr.s_addr;
 	}
 	return (rip_output(m, so, dst));
 
 release:
 	m_freem(m);
 	return (error);
 }
 #endif /* INET */
 
 static int
 rip_pcblist(SYSCTL_HANDLER_ARGS)
 {
 	struct inpcb_iterator inpi = INP_ALL_ITERATOR(&V_ripcbinfo,
 	    INPLOOKUP_RLOCKPCB);
 	struct xinpgen xig;
 	struct inpcb *inp;
 	int error;
 
 	if (req->newptr != 0)
 		return (EPERM);
 
 	if (req->oldptr == 0) {
 		int n;
 
 		n = V_ripcbinfo.ipi_count;
 		n += imax(n / 8, 10);
 		req->oldidx = 2 * (sizeof xig) + n * sizeof(struct xinpcb);
 		return (0);
 	}
 
 	if ((error = sysctl_wire_old_buffer(req, 0)) != 0)
 		return (error);
 
 	bzero(&xig, sizeof(xig));
 	xig.xig_len = sizeof xig;
 	xig.xig_count = V_ripcbinfo.ipi_count;
 	xig.xig_gen = V_ripcbinfo.ipi_gencnt;
 	xig.xig_sogen = so_gencnt;
 	error = SYSCTL_OUT(req, &xig, sizeof xig);
 	if (error)
 		return (error);
 
 	while ((inp = inp_next(&inpi)) != NULL) {
 		if (inp->inp_gencnt <= xig.xig_gen &&
 		    cr_canseeinpcb(req->td->td_ucred, inp) == 0) {
 			struct xinpcb xi;
 
 			in_pcbtoxinpcb(inp, &xi);
 			error = SYSCTL_OUT(req, &xi, sizeof xi);
 			if (error) {
 				INP_RUNLOCK(inp);
 				break;
 			}
 		}
 	}
 
 	if (!error) {
 		/*
 		 * Give the user an updated idea of our state.  If the
 		 * generation differs from what we told her before, she knows
 		 * that something happened while we were processing this
 		 * request, and it might be necessary to retry.
 		 */
 		xig.xig_gen = V_ripcbinfo.ipi_gencnt;
 		xig.xig_sogen = so_gencnt;
 		xig.xig_count = V_ripcbinfo.ipi_count;
 		error = SYSCTL_OUT(req, &xig, sizeof xig);
 	}
 
 	return (error);
 }
 
 SYSCTL_PROC(_net_inet_raw, OID_AUTO/*XXX*/, pcblist,
     CTLTYPE_OPAQUE | CTLFLAG_RD | CTLFLAG_MPSAFE, NULL, 0,
     rip_pcblist, "S,xinpcb",
     "List of active raw IP sockets");
 
 #ifdef INET
 struct pr_usrreqs rip_usrreqs = {
 	.pru_abort =		rip_abort,
 	.pru_attach =		rip_attach,
 	.pru_bind =		rip_bind,
 	.pru_connect =		rip_connect,
 	.pru_control =		in_control,
 	.pru_detach =		rip_detach,
 	.pru_disconnect =	rip_disconnect,
 	.pru_peeraddr =		in_getpeeraddr,
 	.pru_send =		rip_send,
 	.pru_shutdown =		rip_shutdown,
 	.pru_sockaddr =		in_getsockaddr,
 	.pru_sosetlabel =	in_pcbsosetlabel,
 	.pru_close =		rip_close,
 };
 #endif /* INET */
diff --git a/sys/sys/protosw.h b/sys/sys/protosw.h
index 26cd1bc3fc16..85761583c30a 100644
--- a/sys/sys/protosw.h
+++ b/sys/sys/protosw.h
@@ -1,357 +1,355 @@
 /*-
  * SPDX-License-Identifier: BSD-3-Clause
  *
  * Copyright (c) 1982, 1986, 1993
  *	The Regents of the University of California.  All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 3. Neither the name of the University nor the names of its contributors
  *    may be used to endorse or promote products derived from this software
  *    without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  *	@(#)protosw.h	8.1 (Berkeley) 6/2/93
  * $FreeBSD$
  */
 
 #ifndef _SYS_PROTOSW_H_
 #define _SYS_PROTOSW_H_
 
 #include <sys/queue.h>
 
 /* Forward declare these structures referenced from prototypes below. */
 struct kaiocb;
 struct mbuf;
 struct thread;
 struct sockaddr;
 struct socket;
 struct sockopt;
 
 /*#ifdef _KERNEL*/
 /*
  * Protocol switch table.
  *
  * Each protocol has a handle initializing one of these structures,
  * which is used for protocol-protocol and system-protocol communication.
  *
  * Thereafter it is called every 200ms through the pr_fasttimo entry and
  * every 500ms through the pr_slowtimo for timer based actions.
  * The system will call the pr_drain entry if it is low on space and
  * this should throw away any non-critical data.
  *
  * Protocols pass data between themselves as chains of mbufs using
  * the pr_input and pr_output hooks.  Pr_input passes data up (towards
  * the users) and pr_output passes it down (towards the interfaces); control
  * information passes up and down on pr_ctlinput and pr_ctloutput.
  * The protocol is responsible for the space occupied by any the
  * arguments to these entries and must dispose it.
  *
  * In retrospect, it would be a lot nicer to use an interface
  * similar to the vnode VOP interface.
  */
 /* USE THESE FOR YOUR PROTOTYPES ! */
 typedef int	pr_input_t (struct mbuf **, int*, int);
 typedef int	pr_output_t (struct mbuf *, struct socket *, ...);
 typedef void	pr_ctlinput_t (int, struct sockaddr *, void *);
 typedef int	pr_ctloutput_t (struct socket *, struct sockopt *);
 typedef	void	pr_fasttimo_t (void);
 typedef	void	pr_slowtimo_t (void);
 typedef	void	pr_drain_t (void);
 
 struct protosw {
 	short	pr_type;		/* socket type used for */
 	struct	domain *pr_domain;	/* domain protocol a member of */
 	short	pr_protocol;		/* protocol number */
 	short	pr_flags;		/* see below */
 /* protocol-protocol hooks */
 	pr_input_t *pr_input;		/* input to protocol (from below) */
 	pr_output_t *pr_output;		/* output to protocol (from above) */
 	pr_ctlinput_t *pr_ctlinput;	/* control input (from below) */
 	pr_ctloutput_t *pr_ctloutput;	/* control output (from above) */
 /* utility hooks */
 	pr_fasttimo_t *pr_fasttimo;	/* fast timeout (200ms) */
 	pr_slowtimo_t *pr_slowtimo;	/* slow timeout (500ms) */
 	pr_drain_t *pr_drain;		/* flush any excess space possible */
 
 	struct	pr_usrreqs *pr_usrreqs;	/* user-protocol hook */
 	LIST_ENTRY(protosw)  pr_fasttimos;
 	LIST_ENTRY(protosw)  pr_slowtimos;
 };
 /*#endif*/
 
 #define	PR_SLOWHZ	2		/* 2 slow timeouts per second */
 #define	PR_FASTHZ	5		/* 5 fast timeouts per second */
 
 /*
  * This number should be defined again within each protocol family to avoid
  * confusion.
  */
 #define	PROTO_SPACER	32767		/* spacer for loadable protocols */
 
 /*
  * Values for pr_flags.
  * PR_ADDR requires PR_ATOMIC;
  * PR_ADDR and PR_CONNREQUIRED are mutually exclusive.
  * PR_IMPLOPCL means that the protocol allows sendto without prior connect,
  *	and the protocol understands the MSG_EOF flag.  The first property is
  *	is only relevant if PR_CONNREQUIRED is set (otherwise sendto is allowed
  *	anyhow).
  * PR_SOCKBUF requires protocol to initialize and destroy its socket buffers
  * in its pr_attach and pr_detach.
  */
 #define	PR_ATOMIC	0x01		/* exchange atomic messages only */
 #define	PR_ADDR		0x02		/* addresses given with messages */
 #define	PR_CONNREQUIRED	0x04		/* connection required by protocol */
 #define	PR_WANTRCVD	0x08		/* want PRU_RCVD calls */
 #define	PR_RIGHTS	0x10		/* passes capabilities */
 #define PR_IMPLOPCL	0x20		/* implied open/close */
 #define	PR_LASTHDR	0x40		/* enforce ipsec policy; last header */
 #define	PR_CAPATTACH	0x80		/* socket can attach in cap mode */
 #define	PR_SOCKBUF	0x100		/* private implementation of buffers */
 
 /*
  * In earlier BSD network stacks, a single pr_usrreq() function pointer was
  * invoked with an operation number indicating what operation was desired.
  * We now provide individual function pointers which protocols can implement,
  * which offers a number of benefits (such as type checking for arguments).
  * These older constants are still present in order to support TCP debugging.
  */
 #define	PRU_ATTACH		0	/* attach protocol to up */
 #define	PRU_DETACH		1	/* detach protocol from up */
 #define	PRU_BIND		2	/* bind socket to address */
 #define	PRU_LISTEN		3	/* listen for connection */
 #define	PRU_CONNECT		4	/* establish connection to peer */
 #define	PRU_ACCEPT		5	/* accept connection from peer */
 #define	PRU_DISCONNECT		6	/* disconnect from peer */
 #define	PRU_SHUTDOWN		7	/* won't send any more data */
 #define	PRU_RCVD		8	/* have taken data; more room now */
 #define	PRU_SEND		9	/* send this data */
 #define	PRU_ABORT		10	/* abort (fast DISCONNECT, DETATCH) */
 #define	PRU_CONTROL		11	/* control operations on protocol */
 #define	PRU_SENSE		12	/* return status into m */
 #define	PRU_RCVOOB		13	/* retrieve out of band data */
 #define	PRU_SENDOOB		14	/* send out of band data */
 #define	PRU_SOCKADDR		15	/* fetch socket's address */
 #define	PRU_PEERADDR		16	/* fetch peer's address */
 #define	PRU_CONNECT2		17	/* connect two sockets */
 /* begin for protocols internal use */
 #define	PRU_FASTTIMO		18	/* 200ms timeout */
 #define	PRU_SLOWTIMO		19	/* 500ms timeout */
 #define	PRU_PROTORCV		20	/* receive from below */
 #define	PRU_PROTOSEND		21	/* send to below */
 /* end for protocol's internal use */
 #define PRU_SEND_EOF		22	/* send and close */
 #define	PRU_SOSETLABEL		23	/* MAC label change */
 #define	PRU_CLOSE		24	/* socket close */
 #define	PRU_FLUSH		25	/* flush the socket */
 #define	PRU_NREQ		25
 
 #ifdef PRUREQUESTS
 const char *prurequests[] = {
 	"ATTACH",	"DETACH",	"BIND",		"LISTEN",
 	"CONNECT",	"ACCEPT",	"DISCONNECT",	"SHUTDOWN",
 	"RCVD",		"SEND",		"ABORT",	"CONTROL",
 	"SENSE",	"RCVOOB",	"SENDOOB",	"SOCKADDR",
 	"PEERADDR",	"CONNECT2",	"FASTTIMO",	"SLOWTIMO",
 	"PROTORCV",	"PROTOSEND",	"SEND_EOF",	"SOSETLABEL",
 	"CLOSE",	"FLUSH",
 };
 #endif
 
 #ifdef	_KERNEL			/* users shouldn't see this decl */
 
 struct ifnet;
 struct stat;
 struct ucred;
 struct uio;
 
 /*
  * If the ordering here looks odd, that's because it's alphabetical.  These
  * should eventually be merged back into struct protosw.
  *
  * Some fields initialized to defaults if they are NULL.
  */
 struct pr_usrreqs {
 	void	(*pru_abort)(struct socket *so);
 	int	(*pru_accept)(struct socket *so, struct sockaddr **nam);
 	int	(*pru_attach)(struct socket *so, int proto, struct thread *td);
 	int	(*pru_bind)(struct socket *so, struct sockaddr *nam,
 		    struct thread *td);
 	int	(*pru_connect)(struct socket *so, struct sockaddr *nam,
 		    struct thread *td);
 	int	(*pru_connect2)(struct socket *so1, struct socket *so2);
 	int	(*pru_control)(struct socket *so, u_long cmd, caddr_t data,
 		    struct ifnet *ifp, struct thread *td);
 	void	(*pru_detach)(struct socket *so);
 	int	(*pru_disconnect)(struct socket *so);
 	int	(*pru_listen)(struct socket *so, int backlog,
 		    struct thread *td);
 	int	(*pru_peeraddr)(struct socket *so, struct sockaddr **nam);
 	int	(*pru_rcvd)(struct socket *so, int flags);
 	int	(*pru_rcvoob)(struct socket *so, struct mbuf *m, int flags);
 	int	(*pru_send)(struct socket *so, int flags, struct mbuf *m,
 		    struct sockaddr *addr, struct mbuf *control,
 		    struct thread *td);
 #define	PRUS_OOB	0x1
 #define	PRUS_EOF	0x2
 #define	PRUS_MORETOCOME	0x4
 #define	PRUS_NOTREADY	0x8
 #define	PRUS_IPV6	0x10
 	int	(*pru_ready)(struct socket *so, struct mbuf *m, int count);
 	int	(*pru_sense)(struct socket *so, struct stat *sb);
 	int	(*pru_shutdown)(struct socket *so);
 	int	(*pru_flush)(struct socket *so, int direction);
 	int	(*pru_sockaddr)(struct socket *so, struct sockaddr **nam);
 	int	(*pru_sosend)(struct socket *so, struct sockaddr *addr,
 		    struct uio *uio, struct mbuf *top, struct mbuf *control,
 		    int flags, struct thread *td);
 	int	(*pru_soreceive)(struct socket *so, struct sockaddr **paddr,
 		    struct uio *uio, struct mbuf **mp0, struct mbuf **controlp,
 		    int *flagsp);
 	int	(*pru_sopoll)(struct socket *so, int events,
 		    struct ucred *cred, struct thread *td);
 	void	(*pru_sosetlabel)(struct socket *so);
 	void	(*pru_close)(struct socket *so);
 	int	(*pru_bindat)(int fd, struct socket *so, struct sockaddr *nam,
 		    struct thread *td);
 	int	(*pru_connectat)(int fd, struct socket *so,
 		    struct sockaddr *nam, struct thread *td);
 	int	(*pru_aio_queue)(struct socket *so, struct kaiocb *job);
 };
 
 /*
  * All nonvoid pru_*() functions below return EOPNOTSUPP.
  */
 int	pru_accept_notsupp(struct socket *so, struct sockaddr **nam);
 int	pru_aio_queue_notsupp(struct socket *so, struct kaiocb *job);
 int	pru_attach_notsupp(struct socket *so, int proto, struct thread *td);
 int	pru_bind_notsupp(struct socket *so, struct sockaddr *nam,
 	    struct thread *td);
 int	pru_bindat_notsupp(int fd, struct socket *so, struct sockaddr *nam,
 	    struct thread *td);
 int	pru_connect_notsupp(struct socket *so, struct sockaddr *nam,
 	    struct thread *td);
 int	pru_connectat_notsupp(int fd, struct socket *so, struct sockaddr *nam,
 	    struct thread *td);
 int	pru_connect2_notsupp(struct socket *so1, struct socket *so2);
 int	pru_control_notsupp(struct socket *so, u_long cmd, caddr_t data,
 	    struct ifnet *ifp, struct thread *td);
 int	pru_disconnect_notsupp(struct socket *so);
 int	pru_listen_notsupp(struct socket *so, int backlog, struct thread *td);
 int	pru_peeraddr_notsupp(struct socket *so, struct sockaddr **nam);
 int	pru_rcvd_notsupp(struct socket *so, int flags);
 int	pru_rcvoob_notsupp(struct socket *so, struct mbuf *m, int flags);
 int	pru_send_notsupp(struct socket *so, int flags, struct mbuf *m,
 	    struct sockaddr *addr, struct mbuf *control, struct thread *td);
 int	pru_ready_notsupp(struct socket *so, struct mbuf *m, int count);
 int	pru_sense_null(struct socket *so, struct stat *sb);
 int	pru_shutdown_notsupp(struct socket *so);
 int	pru_sockaddr_notsupp(struct socket *so, struct sockaddr **nam);
 int	pru_sosend_notsupp(struct socket *so, struct sockaddr *addr,
 	    struct uio *uio, struct mbuf *top, struct mbuf *control, int flags,
 	    struct thread *td);
 int	pru_soreceive_notsupp(struct socket *so, struct sockaddr **paddr,
 	    struct uio *uio, struct mbuf **mp0, struct mbuf **controlp,
 	    int *flagsp);
 int	pru_sopoll_notsupp(struct socket *so, int events, struct ucred *cred,
 	    struct thread *td);
 
 #endif /* _KERNEL */
 
 /*
  * The arguments to the ctlinput routine are
  *	(*protosw[].pr_ctlinput)(cmd, sa, arg);
  * where cmd is one of the commands below, sa is a pointer to a sockaddr,
  * and arg is a `void *' argument used within a protocol family.
  */
-#define	PRC_IFDOWN		0	/* interface transition */
 #define	PRC_ROUTEDEAD		1	/* select new route if possible ??? */
-#define	PRC_IFUP		2	/* interface has come back up */
 /* was	PRC_QUENCH2		3	DEC congestion bit says slow down */
 /* was	PRC_QUENCH		4	Deprecated by RFC 6633 */
 #define	PRC_MSGSIZE		5	/* message size forced drop */
 #define	PRC_HOSTDEAD		6	/* host appears to be down */
 #define	PRC_HOSTUNREACH		7	/* deprecated (use PRC_UNREACH_HOST) */
 #define	PRC_UNREACH_NET		8	/* no route to network */
 #define	PRC_UNREACH_HOST	9	/* no route to host */
 #define	PRC_UNREACH_PROTOCOL	10	/* dst says bad protocol */
 #define	PRC_UNREACH_PORT	11	/* bad port # */
 /* was	PRC_UNREACH_NEEDFRAG	12	   (use PRC_MSGSIZE) */
 #define	PRC_UNREACH_SRCFAIL	13	/* source route failed */
 #define	PRC_REDIRECT_NET	14	/* net routing redirect */
 #define	PRC_REDIRECT_HOST	15	/* host routing redirect */
 #define	PRC_REDIRECT_TOSNET	16	/* redirect for type of service & net */
 #define	PRC_REDIRECT_TOSHOST	17	/* redirect for tos & host */
 #define	PRC_TIMXCEED_INTRANS	18	/* packet lifetime expired in transit */
 #define	PRC_TIMXCEED_REASS	19	/* lifetime expired on reass q */
 #define	PRC_PARAMPROB		20	/* header incorrect */
 #define	PRC_UNREACH_ADMIN_PROHIB	21	/* packet administrativly prohibited */
 
 #define	PRC_NCMDS		22
 
 #define	PRC_IS_REDIRECT(cmd)	\
 	((cmd) >= PRC_REDIRECT_NET && (cmd) <= PRC_REDIRECT_TOSHOST)
 
 #ifdef PRCREQUESTS
 char	*prcrequests[] = {
 	"IFDOWN", "ROUTEDEAD", "IFUP", "DEC-BIT-QUENCH2",
 	"QUENCH", "MSGSIZE", "HOSTDEAD", "#7",
 	"NET-UNREACH", "HOST-UNREACH", "PROTO-UNREACH", "PORT-UNREACH",
 	"#12", "SRCFAIL-UNREACH", "NET-REDIRECT", "HOST-REDIRECT",
 	"TOSNET-REDIRECT", "TOSHOST-REDIRECT", "TX-INTRANS", "TX-REASS",
 	"PARAMPROB", "ADMIN-UNREACH"
 };
 #endif
 
 /*
  * The arguments to ctloutput are:
  *	(*protosw[].pr_ctloutput)(req, so, level, optname, optval, p);
  * req is one of the actions listed below, so is a (struct socket *),
  * level is an indication of which protocol layer the option is intended.
  * optname is a protocol dependent socket option request,
  * optval is a pointer to a mbuf-chain pointer, for value-return results.
  * The protocol is responsible for disposal of the mbuf chain *optval
  * if supplied,
  * the caller is responsible for any space held by *optval, when returned.
  * A non-zero return from ctloutput gives an
  * UNIX error number which should be passed to higher level software.
  */
 #define	PRCO_GETOPT	0
 #define	PRCO_SETOPT	1
 
 #define	PRCO_NCMDS	2
 
 #ifdef PRCOREQUESTS
 char	*prcorequests[] = {
 	"GETOPT", "SETOPT",
 };
 #endif
 
 #ifdef _KERNEL
 void	pfctlinput(int, struct sockaddr *);
 struct domain *pffinddomain(int family);
 struct protosw *pffindproto(int family, int protocol, int type);
 struct protosw *pffindtype(int family, int type);
 int	pf_proto_register(int family, struct protosw *npr);
 int	pf_proto_unregister(int family, int protocol, int type);
 #endif
 
 #endif