No OneTemporary
Actions

Size

350 KB

Referenced Files

None

Subscribers

None

View Options

This file is larger than 256 KB, so syntax highlighting was skipped.

	Index: stable/8/sys/amd64/include/xen
	===================================================================
	--- stable/8/sys/amd64/include/xen (revision 209276)
	+++ stable/8/sys/amd64/include/xen (revision 209277)

	Property changes on: stable/8/sys/amd64/include/xen
	___________________________________________________________________
	Modified: svn:mergeinfo
	## -0,0 +0,1 ##
	Merged /head/sys/amd64/include/xen:r208553
	Index: stable/8/sys/cddl/contrib/opensolaris
	===================================================================
	--- stable/8/sys/cddl/contrib/opensolaris (revision 209276)
	+++ stable/8/sys/cddl/contrib/opensolaris (revision 209277)

	Property changes on: stable/8/sys/cddl/contrib/opensolaris
	___________________________________________________________________
	Modified: svn:mergeinfo
	## -0,0 +0,1 ##
	Merged /head/sys/cddl/contrib/opensolaris:r208553
	Index: stable/8/sys/contrib/dev/acpica
	===================================================================
	--- stable/8/sys/contrib/dev/acpica (revision 209276)
	+++ stable/8/sys/contrib/dev/acpica (revision 209277)

	Property changes on: stable/8/sys/contrib/dev/acpica
	___________________________________________________________________
	Modified: svn:mergeinfo
	## -0,0 +0,1 ##
	Merged /head/sys/contrib/dev/acpica:r208553
	Index: stable/8/sys/contrib/pf
	===================================================================
	--- stable/8/sys/contrib/pf (revision 209276)
	+++ stable/8/sys/contrib/pf (revision 209277)

	Property changes on: stable/8/sys/contrib/pf
	___________________________________________________________________
	Modified: svn:mergeinfo
	## -0,0 +0,1 ##
	Merged /head/sys/contrib/pf:r208553
	Index: stable/8/sys/dev/xen/xenpci
	===================================================================
	--- stable/8/sys/dev/xen/xenpci (revision 209276)
	+++ stable/8/sys/dev/xen/xenpci (revision 209277)

	Property changes on: stable/8/sys/dev/xen/xenpci
	___________________________________________________________________
	Modified: svn:mergeinfo
	## -0,0 +0,1 ##
	Merged /head/sys/dev/xen/xenpci:r208553
	Index: stable/8/sys/geom/sched
	===================================================================
	--- stable/8/sys/geom/sched (revision 209276)
	+++ stable/8/sys/geom/sched (revision 209277)

	Property changes on: stable/8/sys/geom/sched
	___________________________________________________________________
	Modified: svn:mergeinfo
	## -0,0 +0,1 ##
	Merged /head/sys/geom/sched:r208553
	Index: stable/8/sys/net/if.c
	===================================================================
	--- stable/8/sys/net/if.c (revision 209276)
	+++ stable/8/sys/net/if.c (revision 209277)
	@@ -1,3484 +1,3485 @@
	/*-
	* Copyright (c) 1980, 1986, 1993
	* The Regents of the University of California. All rights reserved.
	*
	* Redistribution and use in source and binary forms, with or without
	* modification, are permitted provided that the following conditions
	* are met:
	* 1. Redistributions of source code must retain the above copyright
	* notice, this list of conditions and the following disclaimer.
	* 2. Redistributions in binary form must reproduce the above copyright
	* notice, this list of conditions and the following disclaimer in the
	* documentation and/or other materials provided with the distribution.
	* 4. Neither the name of the University nor the names of its contributors
	* may be used to endorse or promote products derived from this software
	* without specific prior written permission.
	*
	* THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
	* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
	* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
	* ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
	* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
	* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
	* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
	* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
	* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
	* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
	* SUCH DAMAGE.
	*
	* @(#)if.c 8.5 (Berkeley) 1/9/95
	* $FreeBSD$
	*/

	#include "opt_compat.h"
	#include "opt_inet6.h"
	#include "opt_inet.h"
	#include "opt_carp.h"
	#include "opt_ddb.h"

	#include <sys/param.h>
	#include <sys/types.h>
	#include <sys/conf.h>
	#include <sys/malloc.h>
	#include <sys/sbuf.h>
	#include <sys/bus.h>
	#include <sys/mbuf.h>
	#include <sys/systm.h>
	#include <sys/priv.h>
	#include <sys/proc.h>
	#include <sys/socket.h>
	#include <sys/socketvar.h>
	#include <sys/protosw.h>
	#include <sys/kernel.h>
	#include <sys/lock.h>
	#include <sys/refcount.h>
	#include <sys/module.h>
	#include <sys/rwlock.h>
	#include <sys/sockio.h>
	#include <sys/syslog.h>
	#include <sys/sysctl.h>
	#include <sys/taskqueue.h>
	#include <sys/domain.h>
	#include <sys/jail.h>
	#include <machine/stdarg.h>
	#include <vm/uma.h>

	#ifdef DDB
	#include <ddb/ddb.h>
	#endif

	#include <net/if.h>
	#include <net/if_arp.h>
	#include <net/if_clone.h>
	#include <net/if_dl.h>
	#include <net/if_types.h>
	#include <net/if_var.h>
	#include <net/radix.h>
	#include <net/route.h>
	#include <net/vnet.h>

	#if defined(INET) \|\| defined(INET6)
	/XXX/
	#include <netinet/in.h>
	#include <netinet/in_var.h>
	#ifdef INET6
	#include <netinet6/in6_var.h>
	#include <netinet6/in6_ifattach.h>
	#endif
	#endif
	#ifdef INET
	#include <netinet/if_ether.h>
	#endif
	#if defined(INET) \|\| defined(INET6)
	#ifdef DEV_CARP
	#include <netinet/ip_carp.h>
	#endif
	#endif

	#include <security/mac/mac_framework.h>

	struct ifindex_entry {
	struct ifnet *ife_ifnet;
	};

	static int slowtimo_started;

	SYSCTL_NODE(_net, PF_LINK, link, CTLFLAG_RW, 0, "Link layers");
	SYSCTL_NODE(_net_link, 0, generic, CTLFLAG_RW, 0, "Generic link-management");

	/* Log link state change events */
	static int log_link_state_change = 1;

	SYSCTL_INT(_net_link, OID_AUTO, log_link_state_change, CTLFLAG_RW,
	&log_link_state_change, 0,
	"log interface link state change events");

	/* Interface description */
	static unsigned int ifdescr_maxlen = 1024;
	SYSCTL_UINT(_net, OID_AUTO, ifdescr_maxlen, CTLFLAG_RW,
	&ifdescr_maxlen, 0,
	"administrative maximum length for interface description");

	MALLOC_DEFINE(M_IFDESCR, "ifdescr", "ifnet descriptions");

	/* global sx for non-critical path ifdescr */
	static struct sx ifdescr_sx;
	SX_SYSINIT(ifdescr_sx, &ifdescr_sx, "ifnet descr");

	void (bstp_linkstate_p)(struct ifnet ifp, int state);
	void (ng_ether_link_state_p)(struct ifnet ifp, int state);
	void (lagg_linkstate_p)(struct ifnet ifp, int state);

	struct mbuf (tbr_dequeue_ptr)(struct ifaltq *, int) = NULL;

	/*
	* XXX: Style; these should be sorted alphabetically, and unprototyped
	* static functions should be prototyped. Currently they are sorted by
	* declaration order.
	*/
	static void if_attachdomain(void *);
	static void if_attachdomain1(struct ifnet *);
	static int ifconf(u_long, caddr_t);
	static void if_freemulti(struct ifmultiaddr *);
	static void if_init(void *);
	static void if_grow(void);
	static void if_check(void *);
	static void if_route(struct ifnet *, int flag, int fam);
	static int if_setflag(struct ifnet , int, int, int , int);
	static void if_slowtimo(void *);
	static int if_transmit(struct ifnet ifp, struct mbuf m);
	static void if_unroute(struct ifnet *, int flag, int fam);
	static void link_rtrequest(int, struct rtentry , struct rt_addrinfo );
	static int if_rtdel(struct radix_node , void );
	static int ifhwioctl(u_long, struct ifnet , caddr_t, struct thread );
	static int if_delmulti_locked(struct ifnet , struct ifmultiaddr , int);
	static void do_link_state_change(void *, int);
	static int if_getgroup(struct ifgroupreq , struct ifnet );
	static int if_getgroupmembers(struct ifgroupreq *);
	static void if_delgroups(struct ifnet *);
	static void if_attach_internal(struct ifnet *, int);
	static void if_detach_internal(struct ifnet *, int);

	#ifdef INET6
	/*
	* XXX: declare here to avoid to include many inet6 related files..
	* should be more generalized?
	*/
	extern void nd6_setmtu(struct ifnet *);
	#endif

	VNET_DEFINE(int, if_index);
	int ifqmaxlen = IFQ_MAXLEN;
	VNET_DEFINE(struct ifnethead, ifnet); /* depend on static init XXX */
	VNET_DEFINE(struct ifgrouphead, ifg_head);

	static VNET_DEFINE(int, if_indexlim) = 8;

	/* Table of ifnet by index. */
	static VNET_DEFINE(struct ifindex_entry *, ifindex_table);

	#define V_if_indexlim VNET(if_indexlim)
	#define V_ifindex_table VNET(ifindex_table)

	/*
	* The global network interface list (V_ifnet) and related state (such as
	* if_index, if_indexlim, and ifindex_table) are protected by an sxlock and
	* an rwlock. Either may be acquired shared to stablize the list, but both
	* must be acquired writable to modify the list. This model allows us to
	* both stablize the interface list during interrupt thread processing, but
	* also to stablize it over long-running ioctls, without introducing priority
	* inversions and deadlocks.
	*/
	struct rwlock ifnet_rwlock;
	struct sx ifnet_sxlock;

	/*
	* The allocation of network interfaces is a rather non-atomic affair; we
	* need to select an index before we are ready to expose the interface for
	* use, so will use this pointer value to indicate reservation.
	*/
	#define IFNET_HOLD (void *)(uintptr_t)(-1)

	static if_com_alloc_t *if_com_alloc[256];
	static if_com_free_t *if_com_free[256];

	/*
	* System initialization
	*/
	SYSINIT(interface_check, SI_SUB_PROTO_IF, SI_ORDER_FIRST, if_check, NULL);

	MALLOC_DEFINE(M_IFNET, "ifnet", "interface internals");
	MALLOC_DEFINE(M_IFADDR, "ifaddr", "interface address");
	MALLOC_DEFINE(M_IFMADDR, "ether_multi", "link-level multicast address");

	struct ifnet *
	ifnet_byindex_locked(u_short idx)
	{

	if (idx > V_if_index)
	return (NULL);
	if (V_ifindex_table[idx].ife_ifnet == IFNET_HOLD)
	return (NULL);
	return (V_ifindex_table[idx].ife_ifnet);
	}

	struct ifnet *
	ifnet_byindex(u_short idx)
	{
	struct ifnet *ifp;

	IFNET_RLOCK_NOSLEEP();
	ifp = ifnet_byindex_locked(idx);
	IFNET_RUNLOCK_NOSLEEP();
	return (ifp);
	}

	struct ifnet *
	ifnet_byindex_ref(u_short idx)
	{
	struct ifnet *ifp;

	IFNET_RLOCK_NOSLEEP();
	ifp = ifnet_byindex_locked(idx);
	if (ifp == NULL \|\| (ifp->if_flags & IFF_DYING)) {
	IFNET_RUNLOCK_NOSLEEP();
	return (NULL);
	}
	if_ref(ifp);
	IFNET_RUNLOCK_NOSLEEP();
	return (ifp);
	}

	/*
	* Allocate an ifindex array entry; return 0 on success or an error on
	* failure.
	*/
	static int
	ifindex_alloc_locked(u_short *idxp)
	{
	u_short idx;

	IFNET_WLOCK_ASSERT();

	/*
	* Try to find an empty slot below V_if_index. If we fail, take the
	* next slot.
	*/
	for (idx = 1; idx <= V_if_index; idx++) {
	if (V_ifindex_table[idx].ife_ifnet == NULL)
	break;
	}

	/* Catch if_index overflow. */
	if (idx < 1)
	return (ENOSPC);
	if (idx > V_if_index)
	V_if_index = idx;
	if (V_if_index >= V_if_indexlim)
	if_grow();
	*idxp = idx;
	return (0);
	}

	static void
	ifindex_free_locked(u_short idx)
	{

	IFNET_WLOCK_ASSERT();

	V_ifindex_table[idx].ife_ifnet = NULL;
	while (V_if_index > 0 &&
	V_ifindex_table[V_if_index].ife_ifnet == NULL)
	V_if_index--;
	}

	static void
	ifindex_free(u_short idx)
	{

	IFNET_WLOCK();
	ifindex_free_locked(idx);
	IFNET_WUNLOCK();
	}

	static void
	ifnet_setbyindex_locked(u_short idx, struct ifnet *ifp)
	{

	IFNET_WLOCK_ASSERT();

	V_ifindex_table[idx].ife_ifnet = ifp;
	}

	static void
	ifnet_setbyindex(u_short idx, struct ifnet *ifp)
	{

	IFNET_WLOCK();
	ifnet_setbyindex_locked(idx, ifp);
	IFNET_WUNLOCK();
	}

	struct ifaddr *
	ifaddr_byindex(u_short idx)
	{
	struct ifaddr *ifa;

	IFNET_RLOCK_NOSLEEP();
	ifa = ifnet_byindex_locked(idx)->if_addr;
	if (ifa != NULL)
	ifa_ref(ifa);
	IFNET_RUNLOCK_NOSLEEP();
	return (ifa);
	}

	/*
	* Network interface utility routines.
	*
	* Routines with ifa_ifwith* names take sockaddr *'s as
	* parameters.
	*/

	static void
	vnet_if_init(const void *unused __unused)
	{

	TAILQ_INIT(&V_ifnet);
	TAILQ_INIT(&V_ifg_head);
	if_grow(); /* create initial table */
	vnet_if_clone_init();
	}
	VNET_SYSINIT(vnet_if_init, SI_SUB_INIT_IF, SI_ORDER_FIRST, vnet_if_init,
	NULL);

	/* ARGSUSED*/
	static void
	if_init(void *dummy __unused)
	{

	IFNET_LOCK_INIT();
	if_clone_init();
	}
	SYSINIT(interfaces, SI_SUB_INIT_IF, SI_ORDER_SECOND, if_init, NULL);


	#ifdef VIMAGE
	static void
	vnet_if_uninit(const void *unused __unused)
	{

	VNET_ASSERT(TAILQ_EMPTY(&V_ifnet));
	VNET_ASSERT(TAILQ_EMPTY(&V_ifg_head));

	free((caddr_t)V_ifindex_table, M_IFNET);
	}
	VNET_SYSUNINIT(vnet_if_uninit, SI_SUB_INIT_IF, SI_ORDER_FIRST,
	vnet_if_uninit, NULL);
	#endif

	static void
	if_grow(void)
	{
	u_int n;
	struct ifindex_entry *e;

	V_if_indexlim <<= 1;
	n = V_if_indexlim * sizeof(*e);
	e = malloc(n, M_IFNET, M_WAITOK \| M_ZERO);
	if (V_ifindex_table != NULL) {
	memcpy((caddr_t)e, (caddr_t)V_ifindex_table, n/2);
	free((caddr_t)V_ifindex_table, M_IFNET);
	}
	V_ifindex_table = e;
	}

	static void
	if_check(void *dummy __unused)
	{

	/*
	* If at least one interface added during boot uses
	* if_watchdog then start the timer.
	*/
	if (slowtimo_started)
	if_slowtimo(0);
	}

	/*
	* Allocate a struct ifnet and an index for an interface. A layer 2
	* common structure will also be allocated if an allocation routine is
	* registered for the passed type.
	*/
	struct ifnet *
	if_alloc(u_char type)
	{
	struct ifnet *ifp;
	u_short idx;

	ifp = malloc(sizeof(struct ifnet), M_IFNET, M_WAITOK\|M_ZERO);
	IFNET_WLOCK();
	if (ifindex_alloc_locked(&idx) != 0) {
	IFNET_WUNLOCK();
	free(ifp, M_IFNET);
	return (NULL);
	}
	ifnet_setbyindex_locked(idx, IFNET_HOLD);
	IFNET_WUNLOCK();
	ifp->if_index = idx;
	ifp->if_type = type;
	ifp->if_alloctype = type;
	if (if_com_alloc[type] != NULL) {
	ifp->if_l2com = if_com_alloc[type](type, ifp);
	if (ifp->if_l2com == NULL) {
	free(ifp, M_IFNET);
	ifindex_free(idx);
	return (NULL);
	}
	}

	IF_ADDR_LOCK_INIT(ifp);
	TASK_INIT(&ifp->if_linktask, 0, do_link_state_change, ifp);
	ifp->if_afdata_initialized = 0;
	IF_AFDATA_LOCK_INIT(ifp);
	TAILQ_INIT(&ifp->if_addrhead);
	TAILQ_INIT(&ifp->if_prefixhead);
	TAILQ_INIT(&ifp->if_multiaddrs);
	TAILQ_INIT(&ifp->if_groups);
	#ifdef MAC
	mac_ifnet_init(ifp);
	#endif
	ifq_init(&ifp->if_snd, ifp);

	refcount_init(&ifp->if_refcount, 1); /* Index reference. */
	ifnet_setbyindex(ifp->if_index, ifp);
	return (ifp);
	}

	/*
	* Do the actual work of freeing a struct ifnet, associated index, and layer
	* 2 common structure. This call is made when the last reference to an
	* interface is released.
	*/
	static void
	if_free_internal(struct ifnet *ifp)
	{

	KASSERT((ifp->if_flags & IFF_DYING),
	("if_free_internal: interface not dying"));

	IFNET_WLOCK();
	KASSERT(ifp == ifnet_byindex_locked(ifp->if_index),
	("%s: freeing unallocated ifnet", ifp->if_xname));

	ifindex_free_locked(ifp->if_index);
	IFNET_WUNLOCK();

	if (if_com_free[ifp->if_alloctype] != NULL)
	if_com_free[ifp->if_alloctype](ifp->if_l2com,
	ifp->if_alloctype);

	#ifdef MAC
	mac_ifnet_destroy(ifp);
	#endif /* MAC */
	if (ifp->if_description != NULL)
	free(ifp->if_description, M_IFDESCR);
	IF_AFDATA_DESTROY(ifp);
	IF_ADDR_LOCK_DESTROY(ifp);
	ifq_delete(&ifp->if_snd);
	free(ifp, M_IFNET);
	}

	/*
	* This version should only be called by intefaces that switch their type
	* after calling if_alloc(). if_free_type() will go away again now that we
	* have if_alloctype to cache the original allocation type. For now, assert
	* that they match, since we require that in practice.
	*/
	void
	if_free_type(struct ifnet *ifp, u_char type)
	{

	KASSERT(ifp->if_alloctype == type,
	("if_free_type: type (%d) != alloctype (%d)", type,
	ifp->if_alloctype));

	ifp->if_flags \|= IFF_DYING; /* XXX: Locking */
	if (!refcount_release(&ifp->if_refcount))
	return;
	if_free_internal(ifp);
	}

	/*
	* This is the normal version of if_free(), used by device drivers to free a
	* detached network interface. The contents of if_free_type() will move into
	* here when if_free_type() goes away.
	*/
	void
	if_free(struct ifnet *ifp)
	{

	if_free_type(ifp, ifp->if_alloctype);
	}

	/*
	* Interfaces to keep an ifnet type-stable despite the possibility of the
	* driver calling if_free(). If there are additional references, we defer
	* freeing the underlying data structure.
	*/
	void
	if_ref(struct ifnet *ifp)
	{

	/* We don't assert the ifnet list lock here, but arguably should. */
	refcount_acquire(&ifp->if_refcount);
	}

	void
	if_rele(struct ifnet *ifp)
	{

	if (!refcount_release(&ifp->if_refcount))
	return;
	if_free_internal(ifp);
	}

	void
	ifq_init(struct ifaltq ifq, struct ifnet ifp)
	{

	mtx_init(&ifq->ifq_mtx, ifp->if_xname, "if send queue", MTX_DEF);

	if (ifq->ifq_maxlen == 0)
	ifq->ifq_maxlen = ifqmaxlen;

	ifq->altq_type = 0;
	ifq->altq_disc = NULL;
	ifq->altq_flags &= ALTQF_CANTCHANGE;
	ifq->altq_tbr = NULL;
	ifq->altq_ifp = ifp;
	}

	void
	ifq_delete(struct ifaltq *ifq)
	{
	mtx_destroy(&ifq->ifq_mtx);
	}

	/*
	* Perform generic interface initalization tasks and attach the interface
	* to the list of "active" interfaces. If vmove flag is set on entry
	* to if_attach_internal(), perform only a limited subset of initialization
	* tasks, given that we are moving from one vnet to another an ifnet which
	* has already been fully initialized.
	*
	* XXX:
	* - The decision to return void and thus require this function to
	* succeed is questionable.
	* - We should probably do more sanity checking. For instance we don't
	* do anything to insure if_xname is unique or non-empty.
	*/
	void
	if_attach(struct ifnet *ifp)
	{

	if_attach_internal(ifp, 0);
	}

	static void
	if_attach_internal(struct ifnet *ifp, int vmove)
	{
	unsigned socksize, ifasize;
	int namelen, masklen;
	struct sockaddr_dl *sdl;
	struct ifaddr *ifa;

	if (ifp->if_index == 0 \|\| ifp != ifnet_byindex(ifp->if_index))
	panic ("%s: BUG: if_attach called without if_alloc'd input()\n",
	ifp->if_xname);

	#ifdef VIMAGE
	ifp->if_vnet = curvnet;
	if (ifp->if_home_vnet == NULL)
	ifp->if_home_vnet = curvnet;
	#endif

	if_addgroup(ifp, IFG_ALL);

	getmicrotime(&ifp->if_lastchange);
	ifp->if_data.ifi_epoch = time_uptime;
	ifp->if_data.ifi_datalen = sizeof(struct if_data);

	KASSERT((ifp->if_transmit == NULL && ifp->if_qflush == NULL) \|\|
	(ifp->if_transmit != NULL && ifp->if_qflush != NULL),
	("transmit and qflush must both either be set or both be NULL"));
	if (ifp->if_transmit == NULL) {
	ifp->if_transmit = if_transmit;
	ifp->if_qflush = if_qflush;
	}

	if (!vmove) {
	#ifdef MAC
	mac_ifnet_create(ifp);
	#endif

	/*
	* Create a Link Level name for this device.
	*/
	namelen = strlen(ifp->if_xname);
	/*
	* Always save enough space for any possiable name so we
	* can do a rename in place later.
	*/
	masklen = offsetof(struct sockaddr_dl, sdl_data[0]) + IFNAMSIZ;
	socksize = masklen + ifp->if_addrlen;
	if (socksize < sizeof(*sdl))
	socksize = sizeof(*sdl);
	socksize = roundup2(socksize, sizeof(long));
	ifasize = sizeof(ifa) + 2 socksize;
	ifa = malloc(ifasize, M_IFADDR, M_WAITOK \| M_ZERO);
	ifa_init(ifa);
	sdl = (struct sockaddr_dl *)(ifa + 1);
	sdl->sdl_len = socksize;
	sdl->sdl_family = AF_LINK;
	bcopy(ifp->if_xname, sdl->sdl_data, namelen);
	sdl->sdl_nlen = namelen;
	sdl->sdl_index = ifp->if_index;
	sdl->sdl_type = ifp->if_type;
	ifp->if_addr = ifa;
	ifa->ifa_ifp = ifp;
	ifa->ifa_rtrequest = link_rtrequest;
	ifa->ifa_addr = (struct sockaddr *)sdl;
	sdl = (struct sockaddr_dl *)(socksize + (caddr_t)sdl);
	ifa->ifa_netmask = (struct sockaddr *)sdl;
	sdl->sdl_len = masklen;
	while (namelen != 0)
	sdl->sdl_data[--namelen] = 0xff;
	TAILQ_INSERT_HEAD(&ifp->if_addrhead, ifa, ifa_link);
	/* Reliably crash if used uninitialized. */
	ifp->if_broadcastaddr = NULL;
	}
	#ifdef VIMAGE
	else {
	/*
	* Update the interface index in the link layer address
	* of the interface.
	*/
	for (ifa = ifp->if_addr; ifa != NULL;
	ifa = TAILQ_NEXT(ifa, ifa_link)) {
	if (ifa->ifa_addr->sa_family == AF_LINK) {
	sdl = (struct sockaddr_dl *)ifa->ifa_addr;
	sdl->sdl_index = ifp->if_index;
	}
	}
	}
	#endif

	IFNET_WLOCK();
	TAILQ_INSERT_TAIL(&V_ifnet, ifp, if_link);
	#ifdef VIMAGE
	curvnet->vnet_ifcnt++;
	#endif
	IFNET_WUNLOCK();

	if (domain_init_status >= 2)
	if_attachdomain1(ifp);

	EVENTHANDLER_INVOKE(ifnet_arrival_event, ifp);
	if (IS_DEFAULT_VNET(curvnet))
	devctl_notify("IFNET", ifp->if_xname, "ATTACH", NULL);

	/* Announce the interface. */
	rt_ifannouncemsg(ifp, IFAN_ARRIVAL);

	if (!vmove && ifp->if_watchdog != NULL) {
	if_printf(ifp,
	"WARNING: using obsoleted if_watchdog interface\n");

	/*
	* Note that we need if_slowtimo(). If this happens after
	* boot, then call if_slowtimo() directly.
	*/
	if (atomic_cmpset_int(&slowtimo_started, 0, 1) && !cold)
	if_slowtimo(0);
	}
	}

	static void
	if_attachdomain(void *dummy)
	{
	struct ifnet *ifp;
	int s;

	s = splnet();
	TAILQ_FOREACH(ifp, &V_ifnet, if_link)
	if_attachdomain1(ifp);
	splx(s);
	}
	SYSINIT(domainifattach, SI_SUB_PROTO_IFATTACHDOMAIN, SI_ORDER_SECOND,
	if_attachdomain, NULL);

	static void
	if_attachdomain1(struct ifnet *ifp)
	{
	struct domain *dp;
	int s;

	s = splnet();

	/*
	* Since dp->dom_ifattach calls malloc() with M_WAITOK, we
	* cannot lock ifp->if_afdata initialization, entirely.
	*/
	if (IF_AFDATA_TRYLOCK(ifp) == 0) {
	splx(s);
	return;
	}
	if (ifp->if_afdata_initialized >= domain_init_status) {
	IF_AFDATA_UNLOCK(ifp);
	splx(s);
	printf("if_attachdomain called more than once on %s\n",
	ifp->if_xname);
	return;
	}
	ifp->if_afdata_initialized = domain_init_status;
	IF_AFDATA_UNLOCK(ifp);

	/* address family dependent data region */
	bzero(ifp->if_afdata, sizeof(ifp->if_afdata));
	for (dp = domains; dp; dp = dp->dom_next) {
	if (dp->dom_ifattach)
	ifp->if_afdata[dp->dom_family] =
	(*dp->dom_ifattach)(ifp);
	}

	splx(s);
	}

	/*
	* Remove any unicast or broadcast network addresses from an interface.
	*/
	void
	if_purgeaddrs(struct ifnet *ifp)
	{
	struct ifaddr ifa, next;

	TAILQ_FOREACH_SAFE(ifa, &ifp->if_addrhead, ifa_link, next) {
	if (ifa->ifa_addr->sa_family == AF_LINK)
	continue;
	#ifdef INET
	/* XXX: Ugly!! ad hoc just for INET */
	if (ifa->ifa_addr->sa_family == AF_INET) {
	struct ifaliasreq ifr;

	bzero(&ifr, sizeof(ifr));
	ifr.ifra_addr = *ifa->ifa_addr;
	if (ifa->ifa_dstaddr)
	ifr.ifra_broadaddr = *ifa->ifa_dstaddr;
	if (in_control(NULL, SIOCDIFADDR, (caddr_t)&ifr, ifp,
	NULL) == 0)
	continue;
	}
	#endif /* INET */
	#ifdef INET6
	if (ifa->ifa_addr->sa_family == AF_INET6) {
	in6_purgeaddr(ifa);
	/* ifp_addrhead is already updated */
	continue;
	}
	#endif /* INET6 */
	TAILQ_REMOVE(&ifp->if_addrhead, ifa, ifa_link);
	ifa_free(ifa);
	}
	}

	/*
	* Remove any multicast network addresses from an interface when an ifnet
	* is going away.
	*/
	static void
	if_purgemaddrs(struct ifnet *ifp)
	{
	struct ifmultiaddr *ifma;
	struct ifmultiaddr *next;

	IF_ADDR_LOCK(ifp);
	TAILQ_FOREACH_SAFE(ifma, &ifp->if_multiaddrs, ifma_link, next)
	if_delmulti_locked(ifp, ifma, 1);
	IF_ADDR_UNLOCK(ifp);
	}

	/*
	* Detach an interface, removing it from the list of "active" interfaces.
	* If vmove flag is set on entry to if_detach_internal(), perform only a
	* limited subset of cleanup tasks, given that we are moving an ifnet from
	* one vnet to another, where it must be fully operational.
	*
	* XXXRW: There are some significant questions about event ordering, and
	* how to prevent things from starting to use the interface during detach.
	*/
	void
	if_detach(struct ifnet *ifp)
	{

	if_detach_internal(ifp, 0);
	}

	static void
	if_detach_internal(struct ifnet *ifp, int vmove)
	{
	struct ifaddr *ifa;
	struct radix_node_head *rnh;
	int i, j;
	struct domain *dp;
	struct ifnet *iter;
	int found = 0;

	IFNET_WLOCK();
	TAILQ_FOREACH(iter, &V_ifnet, if_link)
	if (iter == ifp) {
	TAILQ_REMOVE(&V_ifnet, ifp, if_link);
	found = 1;
	break;
	}
	#ifdef VIMAGE
	if (found)
	curvnet->vnet_ifcnt--;
	#endif
	IFNET_WUNLOCK();
	if (!found) {
	if (vmove)
	panic("%s: ifp=%p not on the ifnet tailq %p",
	__func__, ifp, &V_ifnet);
	else
	return; /* XXX this should panic as well? */
	}

	/*
	* Remove/wait for pending events.
	*/
	taskqueue_drain(taskqueue_swi, &ifp->if_linktask);

	/*
	* Remove routes and flush queues.
	*/
	if_down(ifp);
	#ifdef ALTQ
	if (ALTQ_IS_ENABLED(&ifp->if_snd))
	altq_disable(&ifp->if_snd);
	if (ALTQ_IS_ATTACHED(&ifp->if_snd))
	altq_detach(&ifp->if_snd);
	#endif

	if_purgeaddrs(ifp);

	#ifdef INET
	in_ifdetach(ifp);
	#endif

	#ifdef INET6
	/*
	* Remove all IPv6 kernel structs related to ifp. This should be done
	* before removing routing entries below, since IPv6 interface direct
	* routes are expected to be removed by the IPv6-specific kernel API.
	* Otherwise, the kernel will detect some inconsistency and bark it.
	*/
	in6_ifdetach(ifp);
	#endif
	if_purgemaddrs(ifp);

	if (!vmove) {
	/*
	* Prevent further calls into the device driver via ifnet.
	*/
	if_dead(ifp);

	/*
	* Remove link ifaddr pointer and maybe decrement if_index.
	* Clean up all addresses.
	*/
	ifp->if_addr = NULL;

	/* We can now free link ifaddr. */
	if (!TAILQ_EMPTY(&ifp->if_addrhead)) {
	ifa = TAILQ_FIRST(&ifp->if_addrhead);
	TAILQ_REMOVE(&ifp->if_addrhead, ifa, ifa_link);
	ifa_free(ifa);
	}
	}

	/*
	* Delete all remaining routes using this interface
	* Unfortuneatly the only way to do this is to slog through
	* the entire routing table looking for routes which point
	* to this interface...oh well...
	*/
	for (i = 1; i <= AF_MAX; i++) {
	for (j = 0; j < rt_numfibs; j++) {
	rnh = rt_tables_get_rnh(j, i);
	if (rnh == NULL)
	continue;
	RADIX_NODE_HEAD_LOCK(rnh);
	(void) rnh->rnh_walktree(rnh, if_rtdel, ifp);
	RADIX_NODE_HEAD_UNLOCK(rnh);
	}
	}

	/* Announce that the interface is gone. */
	rt_ifannouncemsg(ifp, IFAN_DEPARTURE);
	EVENTHANDLER_INVOKE(ifnet_departure_event, ifp);
	if (IS_DEFAULT_VNET(curvnet))
	devctl_notify("IFNET", ifp->if_xname, "DETACH", NULL);
	if_delgroups(ifp);

	/*
	* We cannot hold the lock over dom_ifdetach calls as they might
	* sleep, for example trying to drain a callout, thus open up the
	* theoretical race with re-attaching.
	*/
	IF_AFDATA_LOCK(ifp);
	i = ifp->if_afdata_initialized;
	ifp->if_afdata_initialized = 0;
	IF_AFDATA_UNLOCK(ifp);
	for (dp = domains; i > 0 && dp; dp = dp->dom_next) {
	if (dp->dom_ifdetach && ifp->if_afdata[dp->dom_family])
	(*dp->dom_ifdetach)(ifp,
	ifp->if_afdata[dp->dom_family]);
	}
	}

	#ifdef VIMAGE
	/*
	* if_vmove() performs a limited version of if_detach() in current
	* vnet and if_attach()es the ifnet to the vnet specified as 2nd arg.
	* An attempt is made to shrink if_index in current vnet, find an
	* unused if_index in target vnet and calls if_grow() if necessary,
	* and finally find an unused if_xname for the target vnet.
	*/
	void
	if_vmove(struct ifnet ifp, struct vnet new_vnet)
	{
	u_short idx;

	/*
	* Detach from current vnet, but preserve LLADDR info, do not
	* mark as dead etc. so that the ifnet can be reattached later.
	*/
	if_detach_internal(ifp, 1);

	/*
	* Unlink the ifnet from ifindex_table[] in current vnet, and shrink
	* the if_index for that vnet if possible.
	*
	* NOTE: IFNET_WLOCK/IFNET_WUNLOCK() are assumed to be unvirtualized,
	* or we'd lock on one vnet and unlock on another.
	*/
	IFNET_WLOCK();
	ifindex_free_locked(ifp->if_index);

	/*
	* Switch to the context of the target vnet.
	*/
	CURVNET_SET_QUIET(new_vnet);

	if (ifindex_alloc_locked(&idx) != 0) {
	IFNET_WUNLOCK();
	panic("if_index overflow");
	}
	ifp->if_index = idx;
	ifnet_setbyindex_locked(ifp->if_index, ifp);
	IFNET_WUNLOCK();

	if_attach_internal(ifp, 1);

	CURVNET_RESTORE();
	}

	/*
	* Move an ifnet to or from another child prison/vnet, specified by the jail id.
	*/
	static int
	if_vmove_loan(struct thread td, struct ifnet ifp, char *ifname, int jid)
	{
	struct prison *pr;
	struct ifnet *difp;

	/* Try to find the prison within our visibility. */
	sx_slock(&allprison_lock);
	pr = prison_find_child(td->td_ucred->cr_prison, jid);
	sx_sunlock(&allprison_lock);
	if (pr == NULL)
	return (ENXIO);
	prison_hold_locked(pr);
	mtx_unlock(&pr->pr_mtx);

	/* Do not try to move the iface from and to the same prison. */
	if (pr->pr_vnet == ifp->if_vnet) {
	prison_free(pr);
	return (EEXIST);
	}

	/* Make sure the named iface does not exists in the dst. prison/vnet. */
	/* XXX Lock interfaces to avoid races. */
	CURVNET_SET_QUIET(pr->pr_vnet);
	difp = ifunit(ifname);
	CURVNET_RESTORE();
	if (difp != NULL) {
	prison_free(pr);
	return (EEXIST);
	}

	/* Move the interface into the child jail/vnet. */
	if_vmove(ifp, pr->pr_vnet);

	/* Report the new if_xname back to the userland. */
	sprintf(ifname, "%s", ifp->if_xname);

	prison_free(pr);
	return (0);
	}

	static int
	if_vmove_reclaim(struct thread td, char ifname, int jid)
	{
	struct prison *pr;
	struct vnet *vnet_dst;
	struct ifnet *ifp;

	/* Try to find the prison within our visibility. */
	sx_slock(&allprison_lock);
	pr = prison_find_child(td->td_ucred->cr_prison, jid);
	sx_sunlock(&allprison_lock);
	if (pr == NULL)
	return (ENXIO);
	prison_hold_locked(pr);
	mtx_unlock(&pr->pr_mtx);

	/* Make sure the named iface exists in the source prison/vnet. */
	CURVNET_SET(pr->pr_vnet);
	ifp = ifunit(ifname); /* XXX Lock to avoid races. */
	if (ifp == NULL) {
	CURVNET_RESTORE();
	prison_free(pr);
	return (ENXIO);
	}

	/* Do not try to move the iface from and to the same prison. */
	vnet_dst = TD_TO_VNET(td);
	if (vnet_dst == ifp->if_vnet) {
	CURVNET_RESTORE();
	prison_free(pr);
	return (EEXIST);
	}

	/* Get interface back from child jail/vnet. */
	if_vmove(ifp, vnet_dst);
	CURVNET_RESTORE();

	/* Report the new if_xname back to the userland. */
	sprintf(ifname, "%s", ifp->if_xname);

	prison_free(pr);
	return (0);
	}
	#endif /* VIMAGE */

	/*
	* Add a group to an interface
	*/
	int
	if_addgroup(struct ifnet ifp, const char groupname)
	{
	struct ifg_list *ifgl;
	struct ifg_group *ifg = NULL;
	struct ifg_member *ifgm;

	if (groupname[0] && groupname[strlen(groupname) - 1] >= '0' &&
	groupname[strlen(groupname) - 1] <= '9')
	return (EINVAL);

	IFNET_WLOCK();
	TAILQ_FOREACH(ifgl, &ifp->if_groups, ifgl_next)
	if (!strcmp(ifgl->ifgl_group->ifg_group, groupname)) {
	IFNET_WUNLOCK();
	return (EEXIST);
	}

	if ((ifgl = (struct ifg_list *)malloc(sizeof(struct ifg_list), M_TEMP,
	M_NOWAIT)) == NULL) {
	IFNET_WUNLOCK();
	return (ENOMEM);
	}

	if ((ifgm = (struct ifg_member *)malloc(sizeof(struct ifg_member),
	M_TEMP, M_NOWAIT)) == NULL) {
	free(ifgl, M_TEMP);
	IFNET_WUNLOCK();
	return (ENOMEM);
	}

	TAILQ_FOREACH(ifg, &V_ifg_head, ifg_next)
	if (!strcmp(ifg->ifg_group, groupname))
	break;

	if (ifg == NULL) {
	if ((ifg = (struct ifg_group *)malloc(sizeof(struct ifg_group),
	M_TEMP, M_NOWAIT)) == NULL) {
	free(ifgl, M_TEMP);
	free(ifgm, M_TEMP);
	IFNET_WUNLOCK();
	return (ENOMEM);
	}
	strlcpy(ifg->ifg_group, groupname, sizeof(ifg->ifg_group));
	ifg->ifg_refcnt = 0;
	TAILQ_INIT(&ifg->ifg_members);
	EVENTHANDLER_INVOKE(group_attach_event, ifg);
	TAILQ_INSERT_TAIL(&V_ifg_head, ifg, ifg_next);
	}

	ifg->ifg_refcnt++;
	ifgl->ifgl_group = ifg;
	ifgm->ifgm_ifp = ifp;

	IF_ADDR_LOCK(ifp);
	TAILQ_INSERT_TAIL(&ifg->ifg_members, ifgm, ifgm_next);
	TAILQ_INSERT_TAIL(&ifp->if_groups, ifgl, ifgl_next);
	IF_ADDR_UNLOCK(ifp);

	IFNET_WUNLOCK();

	EVENTHANDLER_INVOKE(group_change_event, groupname);

	return (0);
	}

	/*
	* Remove a group from an interface
	*/
	int
	if_delgroup(struct ifnet ifp, const char groupname)
	{
	struct ifg_list *ifgl;
	struct ifg_member *ifgm;

	IFNET_WLOCK();
	TAILQ_FOREACH(ifgl, &ifp->if_groups, ifgl_next)
	if (!strcmp(ifgl->ifgl_group->ifg_group, groupname))
	break;
	if (ifgl == NULL) {
	IFNET_WUNLOCK();
	return (ENOENT);
	}

	IF_ADDR_LOCK(ifp);
	TAILQ_REMOVE(&ifp->if_groups, ifgl, ifgl_next);
	IF_ADDR_UNLOCK(ifp);

	TAILQ_FOREACH(ifgm, &ifgl->ifgl_group->ifg_members, ifgm_next)
	if (ifgm->ifgm_ifp == ifp)
	break;

	if (ifgm != NULL) {
	TAILQ_REMOVE(&ifgl->ifgl_group->ifg_members, ifgm, ifgm_next);
	free(ifgm, M_TEMP);
	}

	if (--ifgl->ifgl_group->ifg_refcnt == 0) {
	TAILQ_REMOVE(&V_ifg_head, ifgl->ifgl_group, ifg_next);
	EVENTHANDLER_INVOKE(group_detach_event, ifgl->ifgl_group);
	free(ifgl->ifgl_group, M_TEMP);
	}
	IFNET_WUNLOCK();

	free(ifgl, M_TEMP);

	EVENTHANDLER_INVOKE(group_change_event, groupname);

	return (0);
	}

	/*
	* Remove an interface from all groups
	*/
	static void
	if_delgroups(struct ifnet *ifp)
	{
	struct ifg_list *ifgl;
	struct ifg_member *ifgm;
	char groupname[IFNAMSIZ];

	IFNET_WLOCK();
	while (!TAILQ_EMPTY(&ifp->if_groups)) {
	ifgl = TAILQ_FIRST(&ifp->if_groups);

	strlcpy(groupname, ifgl->ifgl_group->ifg_group, IFNAMSIZ);

	IF_ADDR_LOCK(ifp);
	TAILQ_REMOVE(&ifp->if_groups, ifgl, ifgl_next);
	IF_ADDR_UNLOCK(ifp);

	TAILQ_FOREACH(ifgm, &ifgl->ifgl_group->ifg_members, ifgm_next)
	if (ifgm->ifgm_ifp == ifp)
	break;

	if (ifgm != NULL) {
	TAILQ_REMOVE(&ifgl->ifgl_group->ifg_members, ifgm,
	ifgm_next);
	free(ifgm, M_TEMP);
	}

	if (--ifgl->ifgl_group->ifg_refcnt == 0) {
	TAILQ_REMOVE(&V_ifg_head, ifgl->ifgl_group, ifg_next);
	EVENTHANDLER_INVOKE(group_detach_event,
	ifgl->ifgl_group);
	free(ifgl->ifgl_group, M_TEMP);
	}
	IFNET_WUNLOCK();

	free(ifgl, M_TEMP);

	EVENTHANDLER_INVOKE(group_change_event, groupname);

	IFNET_WLOCK();
	}
	IFNET_WUNLOCK();
	}

	/*
	* Stores all groups from an interface in memory pointed
	* to by data
	*/
	static int
	if_getgroup(struct ifgroupreq data, struct ifnet ifp)
	{
	int len, error;
	struct ifg_list *ifgl;
	struct ifg_req ifgrq, *ifgp;
	struct ifgroupreq *ifgr = data;

	if (ifgr->ifgr_len == 0) {
	IF_ADDR_LOCK(ifp);
	TAILQ_FOREACH(ifgl, &ifp->if_groups, ifgl_next)
	ifgr->ifgr_len += sizeof(struct ifg_req);
	IF_ADDR_UNLOCK(ifp);
	return (0);
	}

	len = ifgr->ifgr_len;
	ifgp = ifgr->ifgr_groups;
	/* XXX: wire */
	IF_ADDR_LOCK(ifp);
	TAILQ_FOREACH(ifgl, &ifp->if_groups, ifgl_next) {
	if (len < sizeof(ifgrq)) {
	IF_ADDR_UNLOCK(ifp);
	return (EINVAL);
	}
	bzero(&ifgrq, sizeof ifgrq);
	strlcpy(ifgrq.ifgrq_group, ifgl->ifgl_group->ifg_group,
	sizeof(ifgrq.ifgrq_group));
	if ((error = copyout(&ifgrq, ifgp, sizeof(struct ifg_req)))) {
	IF_ADDR_UNLOCK(ifp);
	return (error);
	}
	len -= sizeof(ifgrq);
	ifgp++;
	}
	IF_ADDR_UNLOCK(ifp);

	return (0);
	}

	/*
	* Stores all members of a group in memory pointed to by data
	*/
	static int
	if_getgroupmembers(struct ifgroupreq *data)
	{
	struct ifgroupreq *ifgr = data;
	struct ifg_group *ifg;
	struct ifg_member *ifgm;
	struct ifg_req ifgrq, *ifgp;
	int len, error;

	IFNET_RLOCK();
	TAILQ_FOREACH(ifg, &V_ifg_head, ifg_next)
	if (!strcmp(ifg->ifg_group, ifgr->ifgr_name))
	break;
	if (ifg == NULL) {
	IFNET_RUNLOCK();
	return (ENOENT);
	}

	if (ifgr->ifgr_len == 0) {
	TAILQ_FOREACH(ifgm, &ifg->ifg_members, ifgm_next)
	ifgr->ifgr_len += sizeof(ifgrq);
	IFNET_RUNLOCK();
	return (0);
	}

	len = ifgr->ifgr_len;
	ifgp = ifgr->ifgr_groups;
	TAILQ_FOREACH(ifgm, &ifg->ifg_members, ifgm_next) {
	if (len < sizeof(ifgrq)) {
	IFNET_RUNLOCK();
	return (EINVAL);
	}
	bzero(&ifgrq, sizeof ifgrq);
	strlcpy(ifgrq.ifgrq_member, ifgm->ifgm_ifp->if_xname,
	sizeof(ifgrq.ifgrq_member));
	if ((error = copyout(&ifgrq, ifgp, sizeof(struct ifg_req)))) {
	IFNET_RUNLOCK();
	return (error);
	}
	len -= sizeof(ifgrq);
	ifgp++;
	}
	IFNET_RUNLOCK();

	return (0);
	}

	/*
	* Delete Routes for a Network Interface
	*
	* Called for each routing entry via the rnh->rnh_walktree() call above
	* to delete all route entries referencing a detaching network interface.
	*
	* Arguments:
	* rn pointer to node in the routing table
	* arg argument passed to rnh->rnh_walktree() - detaching interface
	*
	* Returns:
	* 0 successful
	* errno failed - reason indicated
	*
	*/
	static int
	if_rtdel(struct radix_node rn, void arg)
	{
	struct rtentry rt = (struct rtentry )rn;
	struct ifnet *ifp = arg;
	int err;

	if (rt->rt_ifp == ifp) {

	/*
	* Protect (sorta) against walktree recursion problems
	* with cloned routes
	*/
	if ((rt->rt_flags & RTF_UP) == 0)
	return (0);

	err = rtrequest_fib(RTM_DELETE, rt_key(rt), rt->rt_gateway,
	rt_mask(rt), rt->rt_flags\|RTF_RNH_LOCKED,
	(struct rtentry **) NULL, rt->rt_fibnum);
	if (err) {
	log(LOG_WARNING, "if_rtdel: error %d\n", err);
	}
	}

	return (0);
	}

	/*
	* Wrapper functions for struct ifnet address list locking macros. These are
	* used by kernel modules to avoid encoding programming interface or binary
	* interface assumptions that may be violated when kernel-internal locking
	* approaches change.
	*/
	void
	if_addr_rlock(struct ifnet *ifp)
	{

	IF_ADDR_LOCK(ifp);
	}

	void
	if_addr_runlock(struct ifnet *ifp)
	{

	IF_ADDR_UNLOCK(ifp);
	}

	void
	if_maddr_rlock(struct ifnet *ifp)
	{

	IF_ADDR_LOCK(ifp);
	}

	void
	if_maddr_runlock(struct ifnet *ifp)
	{

	IF_ADDR_UNLOCK(ifp);
	}

	/*
	* Reference count functions for ifaddrs.
	*/
	void
	ifa_init(struct ifaddr *ifa)
	{

	mtx_init(&ifa->ifa_mtx, "ifaddr", NULL, MTX_DEF);
	refcount_init(&ifa->ifa_refcnt, 1);
	}

	void
	ifa_ref(struct ifaddr *ifa)
	{

	refcount_acquire(&ifa->ifa_refcnt);
	}

	void
	ifa_free(struct ifaddr *ifa)
	{

	if (refcount_release(&ifa->ifa_refcnt)) {
	mtx_destroy(&ifa->ifa_mtx);
	free(ifa, M_IFADDR);
	}
	}

	int
	ifa_add_loopback_route(struct ifaddr ifa, struct sockaddr ia)
	{
	int error = 0;
	struct rtentry *rt = NULL;
	struct rt_addrinfo info;
	static struct sockaddr_dl null_sdl = {sizeof(null_sdl), AF_LINK};

	bzero(&info, sizeof(info));
	info.rti_ifp = V_loif;
	info.rti_flags = ifa->ifa_flags \| RTF_HOST \| RTF_STATIC;
	info.rti_info[RTAX_DST] = ia;
	info.rti_info[RTAX_GATEWAY] = (struct sockaddr *)&null_sdl;
	error = rtrequest1_fib(RTM_ADD, &info, &rt, 0);

	if (error == 0 && rt != NULL) {
	RT_LOCK(rt);
	((struct sockaddr_dl *)rt->rt_gateway)->sdl_type =
	ifa->ifa_ifp->if_type;
	((struct sockaddr_dl *)rt->rt_gateway)->sdl_index =
	ifa->ifa_ifp->if_index;
	RT_REMREF(rt);
	RT_UNLOCK(rt);
	} else if (error != 0)
	log(LOG_INFO, "ifa_add_loopback_route: insertion failed\n");

	return (error);
	}

	int
	ifa_del_loopback_route(struct ifaddr ifa, struct sockaddr ia)
	{
	int error = 0;
	struct rt_addrinfo info;
	struct sockaddr_dl null_sdl;

	bzero(&null_sdl, sizeof(null_sdl));
	null_sdl.sdl_len = sizeof(null_sdl);
	null_sdl.sdl_family = AF_LINK;
	null_sdl.sdl_type = ifa->ifa_ifp->if_type;
	null_sdl.sdl_index = ifa->ifa_ifp->if_index;
	bzero(&info, sizeof(info));
	info.rti_flags = ifa->ifa_flags \| RTF_HOST \| RTF_STATIC;
	info.rti_info[RTAX_DST] = ia;
	info.rti_info[RTAX_GATEWAY] = (struct sockaddr *)&null_sdl;
	error = rtrequest1_fib(RTM_DELETE, &info, NULL, 0);

	if (error != 0)
	log(LOG_INFO, "ifa_del_loopback_route: deletion failed\n");

	return (error);
	}

	/*
	* XXX: Because sockaddr_dl has deeper structure than the sockaddr
	* structs used to represent other address families, it is necessary
	* to perform a different comparison.
	*/

	#define sa_equal(a1, a2) \
	(bcmp((a1), (a2), ((a1))->sa_len) == 0)

	#define sa_dl_equal(a1, a2) \
	((((struct sockaddr_dl *)(a1))->sdl_len == \
	((struct sockaddr_dl *)(a2))->sdl_len) && \
	(bcmp(LLADDR((struct sockaddr_dl *)(a1)), \
	LLADDR((struct sockaddr_dl *)(a2)), \
	((struct sockaddr_dl *)(a1))->sdl_alen) == 0))

	/*
	* Locate an interface based on a complete address.
	*/
	/ARGSUSED/
	static struct ifaddr *
	ifa_ifwithaddr_internal(struct sockaddr *addr, int getref)
	{
	struct ifnet *ifp;
	struct ifaddr *ifa;

	IFNET_RLOCK_NOSLEEP();
	TAILQ_FOREACH(ifp, &V_ifnet, if_link) {
	IF_ADDR_LOCK(ifp);
	TAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) {
	if (ifa->ifa_addr->sa_family != addr->sa_family)
	continue;
	if (sa_equal(addr, ifa->ifa_addr)) {
	if (getref)
	ifa_ref(ifa);
	IF_ADDR_UNLOCK(ifp);
	goto done;
	}
	/* IP6 doesn't have broadcast */
	if ((ifp->if_flags & IFF_BROADCAST) &&
	ifa->ifa_broadaddr &&
	ifa->ifa_broadaddr->sa_len != 0 &&
	sa_equal(ifa->ifa_broadaddr, addr)) {
	if (getref)
	ifa_ref(ifa);
	IF_ADDR_UNLOCK(ifp);
	goto done;
	}
	}
	IF_ADDR_UNLOCK(ifp);
	}
	ifa = NULL;
	done:
	IFNET_RUNLOCK_NOSLEEP();
	return (ifa);
	}

	struct ifaddr *
	ifa_ifwithaddr(struct sockaddr *addr)
	{

	return (ifa_ifwithaddr_internal(addr, 1));
	}

	int
	ifa_ifwithaddr_check(struct sockaddr *addr)
	{

	return (ifa_ifwithaddr_internal(addr, 0) != NULL);
	}

	/*
	* Locate an interface based on the broadcast address.
	*/
	/* ARGSUSED */
	struct ifaddr *
	ifa_ifwithbroadaddr(struct sockaddr *addr)
	{
	struct ifnet *ifp;
	struct ifaddr *ifa;

	IFNET_RLOCK_NOSLEEP();
	TAILQ_FOREACH(ifp, &V_ifnet, if_link) {
	IF_ADDR_LOCK(ifp);
	TAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) {
	if (ifa->ifa_addr->sa_family != addr->sa_family)
	continue;
	if ((ifp->if_flags & IFF_BROADCAST) &&
	ifa->ifa_broadaddr &&
	ifa->ifa_broadaddr->sa_len != 0 &&
	sa_equal(ifa->ifa_broadaddr, addr)) {
	ifa_ref(ifa);
	IF_ADDR_UNLOCK(ifp);
	goto done;
	}
	}
	IF_ADDR_UNLOCK(ifp);
	}
	ifa = NULL;
	done:
	IFNET_RUNLOCK_NOSLEEP();
	return (ifa);
	}

	/*
	* Locate the point to point interface with a given destination address.
	*/
	/ARGSUSED/
	struct ifaddr *
	ifa_ifwithdstaddr(struct sockaddr *addr)
	{
	struct ifnet *ifp;
	struct ifaddr *ifa;

	IFNET_RLOCK_NOSLEEP();
	TAILQ_FOREACH(ifp, &V_ifnet, if_link) {
	if ((ifp->if_flags & IFF_POINTOPOINT) == 0)
	continue;
	IF_ADDR_LOCK(ifp);
	TAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) {
	if (ifa->ifa_addr->sa_family != addr->sa_family)
	continue;
	if (ifa->ifa_dstaddr != NULL &&
	sa_equal(addr, ifa->ifa_dstaddr)) {
	ifa_ref(ifa);
	IF_ADDR_UNLOCK(ifp);
	goto done;
	}
	}
	IF_ADDR_UNLOCK(ifp);
	}
	ifa = NULL;
	done:
	IFNET_RUNLOCK_NOSLEEP();
	return (ifa);
	}

	/*
	* Find an interface on a specific network. If many, choice
	* is most specific found.
	*/
	struct ifaddr *
	-ifa_ifwithnet(struct sockaddr *addr)
	+ifa_ifwithnet(struct sockaddr *addr, int ignore_ptp)
	{
	struct ifnet *ifp;
	struct ifaddr *ifa;
	struct ifaddr *ifa_maybe = NULL;
	u_int af = addr->sa_family;
	char addr_data = addr->sa_data, cplim;

	/*
	* AF_LINK addresses can be looked up directly by their index number,
	* so do that if we can.
	*/
	if (af == AF_LINK) {
	struct sockaddr_dl sdl = (struct sockaddr_dl )addr;
	if (sdl->sdl_index && sdl->sdl_index <= V_if_index)
	return (ifaddr_byindex(sdl->sdl_index));
	}

	/*
	* Scan though each interface, looking for ones that have addresses
	* in this address family. Maintain a reference on ifa_maybe once
	* we find one, as we release the IF_ADDR_LOCK() that kept it stable
	* when we move onto the next interface.
	*/
	IFNET_RLOCK_NOSLEEP();
	TAILQ_FOREACH(ifp, &V_ifnet, if_link) {
	IF_ADDR_LOCK(ifp);
	TAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) {
	char cp, cp2, *cp3;

	if (ifa->ifa_addr->sa_family != af)
	next: continue;
	- if (af == AF_INET && ifp->if_flags & IFF_POINTOPOINT) {
	+ if (af == AF_INET &&
	+ ifp->if_flags & IFF_POINTOPOINT && !ignore_ptp) {
	/*
	* This is a bit broken as it doesn't
	* take into account that the remote end may
	* be a single node in the network we are
	* looking for.
	* The trouble is that we don't know the
	* netmask for the remote end.
	*/
	if (ifa->ifa_dstaddr != NULL &&
	sa_equal(addr, ifa->ifa_dstaddr)) {
	ifa_ref(ifa);
	IF_ADDR_UNLOCK(ifp);
	goto done;
	}
	} else {
	/*
	* if we have a special address handler,
	* then use it instead of the generic one.
	*/
	if (ifa->ifa_claim_addr) {
	if ((*ifa->ifa_claim_addr)(ifa, addr)) {
	ifa_ref(ifa);
	IF_ADDR_UNLOCK(ifp);
	goto done;
	}
	continue;
	}

	/*
	* Scan all the bits in the ifa's address.
	* If a bit dissagrees with what we are
	* looking for, mask it with the netmask
	* to see if it really matters.
	* (A byte at a time)
	*/
	if (ifa->ifa_netmask == 0)
	continue;
	cp = addr_data;
	cp2 = ifa->ifa_addr->sa_data;
	cp3 = ifa->ifa_netmask->sa_data;
	cplim = ifa->ifa_netmask->sa_len
	+ (char *)ifa->ifa_netmask;
	while (cp3 < cplim)
	if ((cp++ ^ cp2++) & *cp3++)
	goto next; /* next address! */
	/*
	* If the netmask of what we just found
	* is more specific than what we had before
	* (if we had one) then remember the new one
	* before continuing to search
	* for an even better one.
	*/
	if (ifa_maybe == NULL \|\|
	rn_refines((caddr_t)ifa->ifa_netmask,
	(caddr_t)ifa_maybe->ifa_netmask)) {
	if (ifa_maybe != NULL)
	ifa_free(ifa_maybe);
	ifa_maybe = ifa;
	ifa_ref(ifa_maybe);
	}
	}
	}
	IF_ADDR_UNLOCK(ifp);
	}
	ifa = ifa_maybe;
	ifa_maybe = NULL;
	done:
	IFNET_RUNLOCK_NOSLEEP();
	if (ifa_maybe != NULL)
	ifa_free(ifa_maybe);
	return (ifa);
	}

	/*
	* Find an interface address specific to an interface best matching
	* a given address.
	*/
	struct ifaddr *
	ifaof_ifpforaddr(struct sockaddr addr, struct ifnet ifp)
	{
	struct ifaddr *ifa;
	char cp, cp2, *cp3;
	char *cplim;
	struct ifaddr *ifa_maybe = NULL;
	u_int af = addr->sa_family;

	if (af >= AF_MAX)
	return (0);
	IF_ADDR_LOCK(ifp);
	TAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) {
	if (ifa->ifa_addr->sa_family != af)
	continue;
	if (ifa_maybe == NULL)
	ifa_maybe = ifa;
	if (ifa->ifa_netmask == 0) {
	if (sa_equal(addr, ifa->ifa_addr) \|\|
	(ifa->ifa_dstaddr &&
	sa_equal(addr, ifa->ifa_dstaddr)))
	goto done;
	continue;
	}
	if (ifp->if_flags & IFF_POINTOPOINT) {
	if (sa_equal(addr, ifa->ifa_dstaddr))
	goto done;
	} else {
	cp = addr->sa_data;
	cp2 = ifa->ifa_addr->sa_data;
	cp3 = ifa->ifa_netmask->sa_data;
	cplim = ifa->ifa_netmask->sa_len + (char *)ifa->ifa_netmask;
	for (; cp3 < cplim; cp3++)
	if ((cp++ ^ cp2++) & *cp3)
	break;
	if (cp3 == cplim)
	goto done;
	}
	}
	ifa = ifa_maybe;
	done:
	if (ifa != NULL)
	ifa_ref(ifa);
	IF_ADDR_UNLOCK(ifp);
	return (ifa);
	}

	#include <net/if_llatbl.h>

	/*
	* Default action when installing a route with a Link Level gateway.
	* Lookup an appropriate real ifa to point to.
	* This should be moved to /sys/net/link.c eventually.
	*/
	static void
	link_rtrequest(int cmd, struct rtentry rt, struct rt_addrinfo info)
	{
	struct ifaddr ifa, oifa;
	struct sockaddr *dst;
	struct ifnet *ifp;

	RT_LOCK_ASSERT(rt);

	if (cmd != RTM_ADD \|\| ((ifa = rt->rt_ifa) == 0) \|\|
	((ifp = ifa->ifa_ifp) == 0) \|\| ((dst = rt_key(rt)) == 0))
	return;
	ifa = ifaof_ifpforaddr(dst, ifp);
	if (ifa) {
	oifa = rt->rt_ifa;
	rt->rt_ifa = ifa;
	ifa_free(oifa);
	if (ifa->ifa_rtrequest && ifa->ifa_rtrequest != link_rtrequest)
	ifa->ifa_rtrequest(cmd, rt, info);
	}
	}

	/*
	* Mark an interface down and notify protocols of
	* the transition.
	* NOTE: must be called at splnet or eqivalent.
	*/
	static void
	if_unroute(struct ifnet *ifp, int flag, int fam)
	{
	struct ifaddr *ifa;

	KASSERT(flag == IFF_UP, ("if_unroute: flag != IFF_UP"));

	ifp->if_flags &= ~flag;
	getmicrotime(&ifp->if_lastchange);
	TAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link)
	if (fam == PF_UNSPEC \|\| (fam == ifa->ifa_addr->sa_family))
	pfctlinput(PRC_IFDOWN, ifa->ifa_addr);
	ifp->if_qflush(ifp);

	#if defined(INET) \|\| defined(INET6)
	#ifdef DEV_CARP
	if (ifp->if_carp)
	carp_carpdev_state(ifp->if_carp);
	#endif
	#endif
	rt_ifmsg(ifp);
	}

	/*
	* Mark an interface up and notify protocols of
	* the transition.
	* NOTE: must be called at splnet or eqivalent.
	*/
	static void
	if_route(struct ifnet *ifp, int flag, int fam)
	{
	struct ifaddr *ifa;

	KASSERT(flag == IFF_UP, ("if_route: flag != IFF_UP"));

	ifp->if_flags \|= flag;
	getmicrotime(&ifp->if_lastchange);
	TAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link)
	if (fam == PF_UNSPEC \|\| (fam == ifa->ifa_addr->sa_family))
	pfctlinput(PRC_IFUP, ifa->ifa_addr);
	#if defined(INET) \|\| defined(INET6)
	#ifdef DEV_CARP
	if (ifp->if_carp)
	carp_carpdev_state(ifp->if_carp);
	#endif
	#endif
	rt_ifmsg(ifp);
	#ifdef INET6
	in6_if_up(ifp);
	#endif
	}

	void (vlan_link_state_p)(struct ifnet , int); /* XXX: private from if_vlan */
	void (vlan_trunk_cap_p)(struct ifnet ); /* XXX: private from if_vlan */

	/*
	* Handle a change in the interface link state. To avoid LORs
	* between driver lock and upper layer locks, as well as possible
	* recursions, we post event to taskqueue, and all job
	* is done in static do_link_state_change().
	*/
	void
	if_link_state_change(struct ifnet *ifp, int link_state)
	{
	/* Return if state hasn't changed. */
	if (ifp->if_link_state == link_state)
	return;

	ifp->if_link_state = link_state;

	taskqueue_enqueue(taskqueue_swi, &ifp->if_linktask);
	}

	static void
	do_link_state_change(void *arg, int pending)
	{
	struct ifnet ifp = (struct ifnet )arg;
	int link_state = ifp->if_link_state;
	CURVNET_SET(ifp->if_vnet);

	/* Notify that the link state has changed. */
	rt_ifmsg(ifp);
	if (ifp->if_vlantrunk != NULL)
	(*vlan_link_state_p)(ifp, 0);

	if ((ifp->if_type == IFT_ETHER \|\| ifp->if_type == IFT_L2VLAN) &&
	IFP2AC(ifp)->ac_netgraph != NULL)
	(*ng_ether_link_state_p)(ifp, link_state);
	#if defined(INET) \|\| defined(INET6)
	#ifdef DEV_CARP
	if (ifp->if_carp)
	carp_carpdev_state(ifp->if_carp);
	#endif
	#endif
	if (ifp->if_bridge) {
	KASSERT(bstp_linkstate_p != NULL,("if_bridge bstp not loaded!"));
	(*bstp_linkstate_p)(ifp, link_state);
	}
	if (ifp->if_lagg) {
	KASSERT(lagg_linkstate_p != NULL,("if_lagg not loaded!"));
	(*lagg_linkstate_p)(ifp, link_state);
	}

	if (IS_DEFAULT_VNET(curvnet))
	devctl_notify("IFNET", ifp->if_xname,
	(link_state == LINK_STATE_UP) ? "LINK_UP" : "LINK_DOWN",
	NULL);
	if (pending > 1)
	if_printf(ifp, "%d link states coalesced\n", pending);
	if (log_link_state_change)
	log(LOG_NOTICE, "%s: link state changed to %s\n", ifp->if_xname,
	(link_state == LINK_STATE_UP) ? "UP" : "DOWN" );
	CURVNET_RESTORE();
	}

	/*
	* Mark an interface down and notify protocols of
	* the transition.
	* NOTE: must be called at splnet or eqivalent.
	*/
	void
	if_down(struct ifnet *ifp)
	{

	if_unroute(ifp, IFF_UP, AF_UNSPEC);
	}

	/*
	* Mark an interface up and notify protocols of
	* the transition.
	* NOTE: must be called at splnet or eqivalent.
	*/
	void
	if_up(struct ifnet *ifp)
	{

	if_route(ifp, IFF_UP, AF_UNSPEC);
	}

	/*
	* Flush an interface queue.
	*/
	void
	if_qflush(struct ifnet *ifp)
	{
	struct mbuf m, n;
	struct ifaltq *ifq;

	ifq = &ifp->if_snd;
	IFQ_LOCK(ifq);
	#ifdef ALTQ
	if (ALTQ_IS_ENABLED(ifq))
	ALTQ_PURGE(ifq);
	#endif
	n = ifq->ifq_head;
	while ((m = n) != 0) {
	n = m->m_act;
	m_freem(m);
	}
	ifq->ifq_head = 0;
	ifq->ifq_tail = 0;
	ifq->ifq_len = 0;
	IFQ_UNLOCK(ifq);
	}

	/*
	* Handle interface watchdog timer routines. Called
	* from softclock, we decrement timers (if set) and
	* call the appropriate interface routine on expiration.
	*
	* XXXRW: Note that because timeouts run with Giant, if_watchdog() is called
	* holding Giant.
	*/
	static void
	if_slowtimo(void *arg)
	{
	VNET_ITERATOR_DECL(vnet_iter);
	struct ifnet *ifp;
	int s = splimp();

	VNET_LIST_RLOCK_NOSLEEP();
	IFNET_RLOCK_NOSLEEP();
	VNET_FOREACH(vnet_iter) {
	CURVNET_SET(vnet_iter);
	TAILQ_FOREACH(ifp, &V_ifnet, if_link) {
	if (ifp->if_timer == 0 \|\| --ifp->if_timer)
	continue;
	if (ifp->if_watchdog)
	(*ifp->if_watchdog)(ifp);
	}
	CURVNET_RESTORE();
	}
	IFNET_RUNLOCK_NOSLEEP();
	VNET_LIST_RUNLOCK_NOSLEEP();
	splx(s);
	timeout(if_slowtimo, (void *)0, hz / IFNET_SLOWHZ);
	}

	/*
	* Map interface name to interface structure pointer, with or without
	* returning a reference.
	*/
	struct ifnet *
	ifunit_ref(const char *name)
	{
	struct ifnet *ifp;

	IFNET_RLOCK_NOSLEEP();
	TAILQ_FOREACH(ifp, &V_ifnet, if_link) {
	if (strncmp(name, ifp->if_xname, IFNAMSIZ) == 0 &&
	!(ifp->if_flags & IFF_DYING))
	break;
	}
	if (ifp != NULL)
	if_ref(ifp);
	IFNET_RUNLOCK_NOSLEEP();
	return (ifp);
	}

	struct ifnet *
	ifunit(const char *name)
	{
	struct ifnet *ifp;

	IFNET_RLOCK_NOSLEEP();
	TAILQ_FOREACH(ifp, &V_ifnet, if_link) {
	if (strncmp(name, ifp->if_xname, IFNAMSIZ) == 0)
	break;
	}
	IFNET_RUNLOCK_NOSLEEP();
	return (ifp);
	}

	/*
	* Hardware specific interface ioctls.
	*/
	static int
	ifhwioctl(u_long cmd, struct ifnet ifp, caddr_t data, struct thread td)
	{
	struct ifreq *ifr;
	struct ifstat *ifs;
	int error = 0;
	int new_flags, temp_flags;
	size_t namelen, onamelen;
	size_t descrlen;
	char descrbuf, odescrbuf;
	char new_name[IFNAMSIZ];
	struct ifaddr *ifa;
	struct sockaddr_dl *sdl;

	ifr = (struct ifreq *)data;
	switch (cmd) {
	case SIOCGIFINDEX:
	ifr->ifr_index = ifp->if_index;
	break;

	case SIOCGIFFLAGS:
	temp_flags = ifp->if_flags \| ifp->if_drv_flags;
	ifr->ifr_flags = temp_flags & 0xffff;
	ifr->ifr_flagshigh = temp_flags >> 16;
	break;

	case SIOCGIFCAP:
	ifr->ifr_reqcap = ifp->if_capabilities;
	ifr->ifr_curcap = ifp->if_capenable;
	break;

	#ifdef MAC
	case SIOCGIFMAC:
	error = mac_ifnet_ioctl_get(td->td_ucred, ifr, ifp);
	break;
	#endif

	case SIOCGIFMETRIC:
	ifr->ifr_metric = ifp->if_metric;
	break;

	case SIOCGIFMTU:
	ifr->ifr_mtu = ifp->if_mtu;
	break;

	case SIOCGIFPHYS:
	ifr->ifr_phys = ifp->if_physical;
	break;

	case SIOCGIFDESCR:
	error = 0;
	sx_slock(&ifdescr_sx);
	if (ifp->if_description == NULL)
	error = ENOMSG;
	else {
	/* space for terminating nul */
	descrlen = strlen(ifp->if_description) + 1;
	if (ifr->ifr_buffer.length < descrlen)
	ifr->ifr_buffer.buffer = NULL;
	else
	error = copyout(ifp->if_description,
	ifr->ifr_buffer.buffer, descrlen);
	ifr->ifr_buffer.length = descrlen;
	}
	sx_sunlock(&ifdescr_sx);
	break;

	case SIOCSIFDESCR:
	error = priv_check(td, PRIV_NET_SETIFDESCR);
	if (error)
	return (error);

	/*
	* Copy only (length-1) bytes to make sure that
	* if_description is always nul terminated. The
	* length parameter is supposed to count the
	* terminating nul in.
	*/
	if (ifr->ifr_buffer.length > ifdescr_maxlen)
	return (ENAMETOOLONG);
	else if (ifr->ifr_buffer.length == 0)
	descrbuf = NULL;
	else {
	descrbuf = malloc(ifr->ifr_buffer.length, M_IFDESCR,
	M_WAITOK \| M_ZERO);
	error = copyin(ifr->ifr_buffer.buffer, descrbuf,
	ifr->ifr_buffer.length - 1);
	if (error) {
	free(descrbuf, M_IFDESCR);
	break;
	}
	}

	sx_xlock(&ifdescr_sx);
	odescrbuf = ifp->if_description;
	ifp->if_description = descrbuf;
	sx_xunlock(&ifdescr_sx);

	getmicrotime(&ifp->if_lastchange);
	free(odescrbuf, M_IFDESCR);
	break;

	case SIOCSIFFLAGS:
	error = priv_check(td, PRIV_NET_SETIFFLAGS);
	if (error)
	return (error);
	/*
	* Currently, no driver owned flags pass the IFF_CANTCHANGE
	* check, so we don't need special handling here yet.
	*/
	new_flags = (ifr->ifr_flags & 0xffff) \|
	(ifr->ifr_flagshigh << 16);
	if (ifp->if_flags & IFF_SMART) {
	/* Smart drivers twiddle their own routes */
	} else if (ifp->if_flags & IFF_UP &&
	(new_flags & IFF_UP) == 0) {
	int s = splimp();
	if_down(ifp);
	splx(s);
	} else if (new_flags & IFF_UP &&
	(ifp->if_flags & IFF_UP) == 0) {
	int s = splimp();
	if_up(ifp);
	splx(s);
	}
	/* See if permanently promiscuous mode bit is about to flip */
	if ((ifp->if_flags ^ new_flags) & IFF_PPROMISC) {
	if (new_flags & IFF_PPROMISC)
	ifp->if_flags \|= IFF_PROMISC;
	else if (ifp->if_pcount == 0)
	ifp->if_flags &= ~IFF_PROMISC;
	log(LOG_INFO, "%s: permanently promiscuous mode %s\n",
	ifp->if_xname,
	(new_flags & IFF_PPROMISC) ? "enabled" : "disabled");
	}
	ifp->if_flags = (ifp->if_flags & IFF_CANTCHANGE) \|
	(new_flags &~ IFF_CANTCHANGE);
	if (ifp->if_ioctl) {
	(void) (*ifp->if_ioctl)(ifp, cmd, data);
	}
	getmicrotime(&ifp->if_lastchange);
	break;

	case SIOCSIFCAP:
	error = priv_check(td, PRIV_NET_SETIFCAP);
	if (error)
	return (error);
	if (ifp->if_ioctl == NULL)
	return (EOPNOTSUPP);
	if (ifr->ifr_reqcap & ~ifp->if_capabilities)
	return (EINVAL);
	error = (*ifp->if_ioctl)(ifp, cmd, data);
	if (error == 0)
	getmicrotime(&ifp->if_lastchange);
	break;

	#ifdef MAC
	case SIOCSIFMAC:
	error = mac_ifnet_ioctl_set(td->td_ucred, ifr, ifp);
	break;
	#endif

	case SIOCSIFNAME:
	error = priv_check(td, PRIV_NET_SETIFNAME);
	if (error)
	return (error);
	error = copyinstr(ifr->ifr_data, new_name, IFNAMSIZ, NULL);
	if (error != 0)
	return (error);
	if (new_name[0] == '\0')
	return (EINVAL);
	if (ifunit(new_name) != NULL)
	return (EEXIST);

	/*
	* XXX: Locking. Nothing else seems to lock if_flags,
	* and there are numerous other races with the
	* ifunit() checks not being atomic with namespace
	* changes (renames, vmoves, if_attach, etc).
	*/
	ifp->if_flags \|= IFF_RENAMING;

	/* Announce the departure of the interface. */
	rt_ifannouncemsg(ifp, IFAN_DEPARTURE);
	EVENTHANDLER_INVOKE(ifnet_departure_event, ifp);

	log(LOG_INFO, "%s: changing name to '%s'\n",
	ifp->if_xname, new_name);

	strlcpy(ifp->if_xname, new_name, sizeof(ifp->if_xname));
	ifa = ifp->if_addr;
	IFA_LOCK(ifa);
	sdl = (struct sockaddr_dl *)ifa->ifa_addr;
	namelen = strlen(new_name);
	onamelen = sdl->sdl_nlen;
	/*
	* Move the address if needed. This is safe because we
	* allocate space for a name of length IFNAMSIZ when we
	* create this in if_attach().
	*/
	if (namelen != onamelen) {
	bcopy(sdl->sdl_data + onamelen,
	sdl->sdl_data + namelen, sdl->sdl_alen);
	}
	bcopy(new_name, sdl->sdl_data, namelen);
	sdl->sdl_nlen = namelen;
	sdl = (struct sockaddr_dl *)ifa->ifa_netmask;
	bzero(sdl->sdl_data, onamelen);
	while (namelen != 0)
	sdl->sdl_data[--namelen] = 0xff;
	IFA_UNLOCK(ifa);

	EVENTHANDLER_INVOKE(ifnet_arrival_event, ifp);
	/* Announce the return of the interface. */
	rt_ifannouncemsg(ifp, IFAN_ARRIVAL);

	ifp->if_flags &= ~IFF_RENAMING;
	break;

	#ifdef VIMAGE
	case SIOCSIFVNET:
	error = priv_check(td, PRIV_NET_SETIFVNET);
	if (error)
	return (error);
	error = if_vmove_loan(td, ifp, ifr->ifr_name, ifr->ifr_jid);
	break;
	#endif

	case SIOCSIFMETRIC:
	error = priv_check(td, PRIV_NET_SETIFMETRIC);
	if (error)
	return (error);
	ifp->if_metric = ifr->ifr_metric;
	getmicrotime(&ifp->if_lastchange);
	break;

	case SIOCSIFPHYS:
	error = priv_check(td, PRIV_NET_SETIFPHYS);
	if (error)
	return (error);
	if (ifp->if_ioctl == NULL)
	return (EOPNOTSUPP);
	error = (*ifp->if_ioctl)(ifp, cmd, data);
	if (error == 0)
	getmicrotime(&ifp->if_lastchange);
	break;

	case SIOCSIFMTU:
	{
	u_long oldmtu = ifp->if_mtu;

	error = priv_check(td, PRIV_NET_SETIFMTU);
	if (error)
	return (error);
	if (ifr->ifr_mtu < IF_MINMTU \|\| ifr->ifr_mtu > IF_MAXMTU)
	return (EINVAL);
	if (ifp->if_ioctl == NULL)
	return (EOPNOTSUPP);
	error = (*ifp->if_ioctl)(ifp, cmd, data);
	if (error == 0) {
	getmicrotime(&ifp->if_lastchange);
	rt_ifmsg(ifp);
	}
	/*
	* If the link MTU changed, do network layer specific procedure.
	*/
	if (ifp->if_mtu != oldmtu) {
	#ifdef INET6
	nd6_setmtu(ifp);
	#endif
	}
	break;
	}

	case SIOCADDMULTI:
	case SIOCDELMULTI:
	if (cmd == SIOCADDMULTI)
	error = priv_check(td, PRIV_NET_ADDMULTI);
	else
	error = priv_check(td, PRIV_NET_DELMULTI);
	if (error)
	return (error);

	/* Don't allow group membership on non-multicast interfaces. */
	if ((ifp->if_flags & IFF_MULTICAST) == 0)
	return (EOPNOTSUPP);

	/* Don't let users screw up protocols' entries. */
	if (ifr->ifr_addr.sa_family != AF_LINK)
	return (EINVAL);

	if (cmd == SIOCADDMULTI) {
	struct ifmultiaddr *ifma;

	/*
	* Userland is only permitted to join groups once
	* via the if_addmulti() KPI, because it cannot hold
	* struct ifmultiaddr * between calls. It may also
	* lose a race while we check if the membership
	* already exists.
	*/
	IF_ADDR_LOCK(ifp);
	ifma = if_findmulti(ifp, &ifr->ifr_addr);
	IF_ADDR_UNLOCK(ifp);
	if (ifma != NULL)
	error = EADDRINUSE;
	else
	error = if_addmulti(ifp, &ifr->ifr_addr, &ifma);
	} else {
	error = if_delmulti(ifp, &ifr->ifr_addr);
	}
	if (error == 0)
	getmicrotime(&ifp->if_lastchange);
	break;

	case SIOCSIFPHYADDR:
	case SIOCDIFPHYADDR:
	#ifdef INET6
	case SIOCSIFPHYADDR_IN6:
	#endif
	case SIOCSLIFPHYADDR:
	case SIOCSIFMEDIA:
	case SIOCSIFGENERIC:
	error = priv_check(td, PRIV_NET_HWIOCTL);
	if (error)
	return (error);
	if (ifp->if_ioctl == NULL)
	return (EOPNOTSUPP);
	error = (*ifp->if_ioctl)(ifp, cmd, data);
	if (error == 0)
	getmicrotime(&ifp->if_lastchange);
	break;

	case SIOCGIFSTATUS:
	ifs = (struct ifstat *)data;
	ifs->ascii[0] = '\0';

	case SIOCGIFPSRCADDR:
	case SIOCGIFPDSTADDR:
	case SIOCGLIFPHYADDR:
	case SIOCGIFMEDIA:
	case SIOCGIFGENERIC:
	if (ifp->if_ioctl == NULL)
	return (EOPNOTSUPP);
	error = (*ifp->if_ioctl)(ifp, cmd, data);
	break;

	case SIOCSIFLLADDR:
	error = priv_check(td, PRIV_NET_SETLLADDR);
	if (error)
	return (error);
	error = if_setlladdr(ifp,
	ifr->ifr_addr.sa_data, ifr->ifr_addr.sa_len);
	EVENTHANDLER_INVOKE(iflladdr_event, ifp);
	break;

	case SIOCAIFGROUP:
	{
	struct ifgroupreq ifgr = (struct ifgroupreq )ifr;

	error = priv_check(td, PRIV_NET_ADDIFGROUP);
	if (error)
	return (error);
	if ((error = if_addgroup(ifp, ifgr->ifgr_group)))
	return (error);
	break;
	}

	case SIOCGIFGROUP:
	if ((error = if_getgroup((struct ifgroupreq *)ifr, ifp)))
	return (error);
	break;

	case SIOCDIFGROUP:
	{
	struct ifgroupreq ifgr = (struct ifgroupreq )ifr;

	error = priv_check(td, PRIV_NET_DELIFGROUP);
	if (error)
	return (error);
	if ((error = if_delgroup(ifp, ifgr->ifgr_group)))
	return (error);
	break;
	}

	default:
	error = ENOIOCTL;
	break;
	}
	return (error);
	}

	/*
	* Interface ioctls.
	*/
	int
	ifioctl(struct socket so, u_long cmd, caddr_t data, struct thread td)
	{
	struct ifnet *ifp;
	struct ifreq *ifr;
	int error;
	int oif_flags;

	switch (cmd) {
	case SIOCGIFCONF:
	case OSIOCGIFCONF:
	#ifdef __amd64__
	case SIOCGIFCONF32:
	#endif
	return (ifconf(cmd, data));
	}
	ifr = (struct ifreq *)data;

	switch (cmd) {
	#ifdef VIMAGE
	case SIOCSIFRVNET:
	error = priv_check(td, PRIV_NET_SETIFVNET);
	if (error)
	return (error);
	return (if_vmove_reclaim(td, ifr->ifr_name, ifr->ifr_jid));
	#endif
	case SIOCIFCREATE:
	case SIOCIFCREATE2:
	error = priv_check(td, PRIV_NET_IFCREATE);
	if (error)
	return (error);
	return (if_clone_create(ifr->ifr_name, sizeof(ifr->ifr_name),
	cmd == SIOCIFCREATE2 ? ifr->ifr_data : NULL));
	case SIOCIFDESTROY:
	error = priv_check(td, PRIV_NET_IFDESTROY);
	if (error)
	return (error);
	return if_clone_destroy(ifr->ifr_name);

	case SIOCIFGCLONERS:
	return (if_clone_list((struct if_clonereq *)data));
	case SIOCGIFGMEMB:
	return (if_getgroupmembers((struct ifgroupreq *)data));
	}

	ifp = ifunit_ref(ifr->ifr_name);
	if (ifp == NULL)
	return (ENXIO);

	error = ifhwioctl(cmd, ifp, data, td);
	if (error != ENOIOCTL) {
	if_rele(ifp);
	return (error);
	}

	oif_flags = ifp->if_flags;
	if (so->so_proto == NULL) {
	if_rele(ifp);
	return (EOPNOTSUPP);
	}
	#ifndef COMPAT_43
	error = ((*so->so_proto->pr_usrreqs->pru_control)(so, cmd,
	data,
	ifp, td));
	if (error == EOPNOTSUPP && ifp != NULL && ifp->if_ioctl != NULL)
	error = (*ifp->if_ioctl)(ifp, cmd, data);
	#else
	{
	u_long ocmd = cmd;

	switch (cmd) {

	case SIOCSIFDSTADDR:
	case SIOCSIFADDR:
	case SIOCSIFBRDADDR:
	case SIOCSIFNETMASK:
	#if BYTE_ORDER != BIG_ENDIAN
	if (ifr->ifr_addr.sa_family == 0 &&
	ifr->ifr_addr.sa_len < 16) {
	ifr->ifr_addr.sa_family = ifr->ifr_addr.sa_len;
	ifr->ifr_addr.sa_len = 16;
	}
	#else
	if (ifr->ifr_addr.sa_len == 0)
	ifr->ifr_addr.sa_len = 16;
	#endif
	break;

	case OSIOCGIFADDR:
	cmd = SIOCGIFADDR;
	break;

	case OSIOCGIFDSTADDR:
	cmd = SIOCGIFDSTADDR;
	break;

	case OSIOCGIFBRDADDR:
	cmd = SIOCGIFBRDADDR;
	break;

	case OSIOCGIFNETMASK:
	cmd = SIOCGIFNETMASK;
	}
	error = ((*so->so_proto->pr_usrreqs->pru_control)(so,
	cmd,
	data,
	ifp, td));
	if (error == EOPNOTSUPP && ifp != NULL &&
	ifp->if_ioctl != NULL)
	error = (*ifp->if_ioctl)(ifp, cmd, data);
	switch (ocmd) {

	case OSIOCGIFADDR:
	case OSIOCGIFDSTADDR:
	case OSIOCGIFBRDADDR:
	case OSIOCGIFNETMASK:
	(u_short )&ifr->ifr_addr = ifr->ifr_addr.sa_family;

	}
	}
	#endif /* COMPAT_43 */

	if ((oif_flags ^ ifp->if_flags) & IFF_UP) {
	#ifdef INET6
	if (ifp->if_flags & IFF_UP) {
	int s = splimp();
	in6_if_up(ifp);
	splx(s);
	}
	#endif
	}
	if_rele(ifp);
	return (error);
	}

	/*
	* The code common to handling reference counted flags,
	* e.g., in ifpromisc() and if_allmulti().
	* The "pflag" argument can specify a permanent mode flag to check,
	* such as IFF_PPROMISC for promiscuous mode; should be 0 if none.
	*
	* Only to be used on stack-owned flags, not driver-owned flags.
	*/
	static int
	if_setflag(struct ifnet ifp, int flag, int pflag, int refcount, int onswitch)
	{
	struct ifreq ifr;
	int error;
	int oldflags, oldcount;

	/* Sanity checks to catch programming errors */
	KASSERT((flag & (IFF_DRV_OACTIVE\|IFF_DRV_RUNNING)) == 0,
	("%s: setting driver-owned flag %d", __func__, flag));

	if (onswitch)
	KASSERT(*refcount >= 0,
	("%s: increment negative refcount %d for flag %d",
	__func__, *refcount, flag));
	else
	KASSERT(*refcount > 0,
	("%s: decrement non-positive refcount %d for flag %d",
	__func__, *refcount, flag));

	/* In case this mode is permanent, just touch refcount */
	if (ifp->if_flags & pflag) {
	*refcount += onswitch ? 1 : -1;
	return (0);
	}

	/* Save ifnet parameters for if_ioctl() may fail */
	oldcount = *refcount;
	oldflags = ifp->if_flags;

	/*
	* See if we aren't the only and touching refcount is enough.
	* Actually toggle interface flag if we are the first or last.
	*/
	if (onswitch) {
	if ((*refcount)++)
	return (0);
	ifp->if_flags \|= flag;
	} else {
	if (--(*refcount))
	return (0);
	ifp->if_flags &= ~flag;
	}

	/* Call down the driver since we've changed interface flags */
	if (ifp->if_ioctl == NULL) {
	error = EOPNOTSUPP;
	goto recover;
	}
	ifr.ifr_flags = ifp->if_flags & 0xffff;
	ifr.ifr_flagshigh = ifp->if_flags >> 16;
	error = (*ifp->if_ioctl)(ifp, SIOCSIFFLAGS, (caddr_t)&ifr);
	if (error)
	goto recover;
	/* Notify userland that interface flags have changed */
	rt_ifmsg(ifp);
	return (0);

	recover:
	/* Recover after driver error */
	*refcount = oldcount;
	ifp->if_flags = oldflags;
	return (error);
	}

	/*
	* Set/clear promiscuous mode on interface ifp based on the truth value
	* of pswitch. The calls are reference counted so that only the first
	* "on" request actually has an effect, as does the final "off" request.
	* Results are undefined if the "off" and "on" requests are not matched.
	*/
	int
	ifpromisc(struct ifnet *ifp, int pswitch)
	{
	int error;
	int oldflags = ifp->if_flags;

	error = if_setflag(ifp, IFF_PROMISC, IFF_PPROMISC,
	&ifp->if_pcount, pswitch);
	/* If promiscuous mode status has changed, log a message */
	if (error == 0 && ((ifp->if_flags ^ oldflags) & IFF_PROMISC))
	log(LOG_INFO, "%s: promiscuous mode %s\n",
	ifp->if_xname,
	(ifp->if_flags & IFF_PROMISC) ? "enabled" : "disabled");
	return (error);
	}

	/*
	* Return interface configuration
	* of system. List may be used
	* in later ioctl's (above) to get
	* other information.
	*/
	/ARGSUSED/
	static int
	ifconf(u_long cmd, caddr_t data)
	{
	struct ifconf ifc = (struct ifconf )data;
	#ifdef __amd64__
	struct ifconf32 ifc32 = (struct ifconf32 )data;
	struct ifconf ifc_swab;
	#endif
	struct ifnet *ifp;
	struct ifaddr *ifa;
	struct ifreq ifr;
	struct sbuf *sb;
	int error, full = 0, valid_len, max_len;

	#ifdef __amd64__
	if (cmd == SIOCGIFCONF32) {
	ifc_swab.ifc_len = ifc32->ifc_len;
	ifc_swab.ifc_buf = (caddr_t)(uintptr_t)ifc32->ifc_buf;
	ifc = &ifc_swab;
	}
	#endif
	/* Limit initial buffer size to MAXPHYS to avoid DoS from userspace. */
	max_len = MAXPHYS - 1;

	/* Prevent hostile input from being able to crash the system */
	if (ifc->ifc_len <= 0)
	return (EINVAL);

	again:
	if (ifc->ifc_len <= max_len) {
	max_len = ifc->ifc_len;
	full = 1;
	}
	sb = sbuf_new(NULL, NULL, max_len + 1, SBUF_FIXEDLEN);
	max_len = 0;
	valid_len = 0;

	IFNET_RLOCK();
	TAILQ_FOREACH(ifp, &V_ifnet, if_link) {
	int addrs;

	/*
	* Zero the ifr_name buffer to make sure we don't
	* disclose the contents of the stack.
	*/
	memset(ifr.ifr_name, 0, sizeof(ifr.ifr_name));

	if (strlcpy(ifr.ifr_name, ifp->if_xname, sizeof(ifr.ifr_name))
	>= sizeof(ifr.ifr_name)) {
	sbuf_delete(sb);
	IFNET_RUNLOCK();
	return (ENAMETOOLONG);
	}

	addrs = 0;
	IF_ADDR_LOCK(ifp);
	TAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) {
	struct sockaddr *sa = ifa->ifa_addr;

	if (prison_if(curthread->td_ucred, sa) != 0)
	continue;
	addrs++;
	#ifdef COMPAT_43
	if (cmd == OSIOCGIFCONF) {
	struct osockaddr *osa =
	(struct osockaddr *)&ifr.ifr_addr;
	ifr.ifr_addr = *sa;
	osa->sa_family = sa->sa_family;
	sbuf_bcat(sb, &ifr, sizeof(ifr));
	max_len += sizeof(ifr);
	} else
	#endif
	if (sa->sa_len <= sizeof(*sa)) {
	ifr.ifr_addr = *sa;
	sbuf_bcat(sb, &ifr, sizeof(ifr));
	max_len += sizeof(ifr);
	} else {
	sbuf_bcat(sb, &ifr,
	offsetof(struct ifreq, ifr_addr));
	max_len += offsetof(struct ifreq, ifr_addr);
	sbuf_bcat(sb, sa, sa->sa_len);
	max_len += sa->sa_len;
	}

	if (!sbuf_overflowed(sb))
	valid_len = sbuf_len(sb);
	}
	IF_ADDR_UNLOCK(ifp);
	if (addrs == 0) {
	bzero((caddr_t)&ifr.ifr_addr, sizeof(ifr.ifr_addr));
	sbuf_bcat(sb, &ifr, sizeof(ifr));
	max_len += sizeof(ifr);

	if (!sbuf_overflowed(sb))
	valid_len = sbuf_len(sb);
	}
	}
	IFNET_RUNLOCK();

	/*
	* If we didn't allocate enough space (uncommon), try again. If
	* we have already allocated as much space as we are allowed,
	* return what we've got.
	*/
	if (valid_len != max_len && !full) {
	sbuf_delete(sb);
	goto again;
	}

	ifc->ifc_len = valid_len;
	#ifdef __amd64__
	if (cmd == SIOCGIFCONF32)
	ifc32->ifc_len = valid_len;
	#endif
	sbuf_finish(sb);
	error = copyout(sbuf_data(sb), ifc->ifc_req, ifc->ifc_len);
	sbuf_delete(sb);
	return (error);
	}

	/*
	* Just like ifpromisc(), but for all-multicast-reception mode.
	*/
	int
	if_allmulti(struct ifnet *ifp, int onswitch)
	{

	return (if_setflag(ifp, IFF_ALLMULTI, 0, &ifp->if_amcount, onswitch));
	}

	struct ifmultiaddr *
	if_findmulti(struct ifnet ifp, struct sockaddr sa)
	{
	struct ifmultiaddr *ifma;

	IF_ADDR_LOCK_ASSERT(ifp);

	TAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) {
	if (sa->sa_family == AF_LINK) {
	if (sa_dl_equal(ifma->ifma_addr, sa))
	break;
	} else {
	if (sa_equal(ifma->ifma_addr, sa))
	break;
	}
	}

	return ifma;
	}

	/*
	* Allocate a new ifmultiaddr and initialize based on passed arguments. We
	* make copies of passed sockaddrs. The ifmultiaddr will not be added to
	* the ifnet multicast address list here, so the caller must do that and
	* other setup work (such as notifying the device driver). The reference
	* count is initialized to 1.
	*/
	static struct ifmultiaddr *
	if_allocmulti(struct ifnet ifp, struct sockaddr sa, struct sockaddr *llsa,
	int mflags)
	{
	struct ifmultiaddr *ifma;
	struct sockaddr *dupsa;

	ifma = malloc(sizeof *ifma, M_IFMADDR, mflags \|
	M_ZERO);
	if (ifma == NULL)
	return (NULL);

	dupsa = malloc(sa->sa_len, M_IFMADDR, mflags);
	if (dupsa == NULL) {
	free(ifma, M_IFMADDR);
	return (NULL);
	}
	bcopy(sa, dupsa, sa->sa_len);
	ifma->ifma_addr = dupsa;

	ifma->ifma_ifp = ifp;
	ifma->ifma_refcount = 1;
	ifma->ifma_protospec = NULL;

	if (llsa == NULL) {
	ifma->ifma_lladdr = NULL;
	return (ifma);
	}

	dupsa = malloc(llsa->sa_len, M_IFMADDR, mflags);
	if (dupsa == NULL) {
	free(ifma->ifma_addr, M_IFMADDR);
	free(ifma, M_IFMADDR);
	return (NULL);
	}
	bcopy(llsa, dupsa, llsa->sa_len);
	ifma->ifma_lladdr = dupsa;

	return (ifma);
	}

	/*
	* if_freemulti: free ifmultiaddr structure and possibly attached related
	* addresses. The caller is responsible for implementing reference
	* counting, notifying the driver, handling routing messages, and releasing
	* any dependent link layer state.
	*/
	static void
	if_freemulti(struct ifmultiaddr *ifma)
	{

	KASSERT(ifma->ifma_refcount == 0, ("if_freemulti: refcount %d",
	ifma->ifma_refcount));
	KASSERT(ifma->ifma_protospec == NULL,
	("if_freemulti: protospec not NULL"));

	if (ifma->ifma_lladdr != NULL)
	free(ifma->ifma_lladdr, M_IFMADDR);
	free(ifma->ifma_addr, M_IFMADDR);
	free(ifma, M_IFMADDR);
	}

	/*
	* Register an additional multicast address with a network interface.
	*
	* - If the address is already present, bump the reference count on the
	* address and return.
	* - If the address is not link-layer, look up a link layer address.
	* - Allocate address structures for one or both addresses, and attach to the
	* multicast address list on the interface. If automatically adding a link
	* layer address, the protocol address will own a reference to the link
	* layer address, to be freed when it is freed.
	* - Notify the network device driver of an addition to the multicast address
	* list.
	*
	* 'sa' points to caller-owned memory with the desired multicast address.
	*
	* 'retifma' will be used to return a pointer to the resulting multicast
	* address reference, if desired.
	*/
	int
	if_addmulti(struct ifnet ifp, struct sockaddr sa,
	struct ifmultiaddr **retifma)
	{
	struct ifmultiaddr ifma, ll_ifma;
	struct sockaddr *llsa;
	int error;

	/*
	* If the address is already present, return a new reference to it;
	* otherwise, allocate storage and set up a new address.
	*/
	IF_ADDR_LOCK(ifp);
	ifma = if_findmulti(ifp, sa);
	if (ifma != NULL) {
	ifma->ifma_refcount++;
	if (retifma != NULL)
	*retifma = ifma;
	IF_ADDR_UNLOCK(ifp);
	return (0);
	}

	/*
	* The address isn't already present; resolve the protocol address
	* into a link layer address, and then look that up, bump its
	* refcount or allocate an ifma for that also. If 'llsa' was
	* returned, we will need to free it later.
	*/
	llsa = NULL;
	ll_ifma = NULL;
	if (ifp->if_resolvemulti != NULL) {
	error = ifp->if_resolvemulti(ifp, &llsa, sa);
	if (error)
	goto unlock_out;
	}

	/*
	* Allocate the new address. Don't hook it up yet, as we may also
	* need to allocate a link layer multicast address.
	*/
	ifma = if_allocmulti(ifp, sa, llsa, M_NOWAIT);
	if (ifma == NULL) {
	error = ENOMEM;
	goto free_llsa_out;
	}

	/*
	* If a link layer address is found, we'll need to see if it's
	* already present in the address list, or allocate is as well.
	* When this block finishes, the link layer address will be on the
	* list.
	*/
	if (llsa != NULL) {
	ll_ifma = if_findmulti(ifp, llsa);
	if (ll_ifma == NULL) {
	ll_ifma = if_allocmulti(ifp, llsa, NULL, M_NOWAIT);
	if (ll_ifma == NULL) {
	--ifma->ifma_refcount;
	if_freemulti(ifma);
	error = ENOMEM;
	goto free_llsa_out;
	}
	TAILQ_INSERT_HEAD(&ifp->if_multiaddrs, ll_ifma,
	ifma_link);
	} else
	ll_ifma->ifma_refcount++;
	ifma->ifma_llifma = ll_ifma;
	}

	/*
	* We now have a new multicast address, ifma, and possibly a new or
	* referenced link layer address. Add the primary address to the
	* ifnet address list.
	*/
	TAILQ_INSERT_HEAD(&ifp->if_multiaddrs, ifma, ifma_link);

	if (retifma != NULL)
	*retifma = ifma;

	/*
	* Must generate the message while holding the lock so that 'ifma'
	* pointer is still valid.
	*/
	rt_newmaddrmsg(RTM_NEWMADDR, ifma);
	IF_ADDR_UNLOCK(ifp);

	/*
	* We are certain we have added something, so call down to the
	* interface to let them know about it.
	*/
	if (ifp->if_ioctl != NULL) {
	(void) (*ifp->if_ioctl)(ifp, SIOCADDMULTI, 0);
	}

	if (llsa != NULL)
	free(llsa, M_IFMADDR);

	return (0);

	free_llsa_out:
	if (llsa != NULL)
	free(llsa, M_IFMADDR);

	unlock_out:
	IF_ADDR_UNLOCK(ifp);
	return (error);
	}

	/*
	* Delete a multicast group membership by network-layer group address.
	*
	* Returns ENOENT if the entry could not be found. If ifp no longer
	* exists, results are undefined. This entry point should only be used
	* from subsystems which do appropriate locking to hold ifp for the
	* duration of the call.
	* Network-layer protocol domains must use if_delmulti_ifma().
	*/
	int
	if_delmulti(struct ifnet ifp, struct sockaddr sa)
	{
	struct ifmultiaddr *ifma;
	int lastref;
	#ifdef INVARIANTS
	struct ifnet *oifp;

	IFNET_RLOCK_NOSLEEP();
	TAILQ_FOREACH(oifp, &V_ifnet, if_link)
	if (ifp == oifp)
	break;
	if (ifp != oifp)
	ifp = NULL;
	IFNET_RUNLOCK_NOSLEEP();

	KASSERT(ifp != NULL, ("%s: ifnet went away", __func__));
	#endif
	if (ifp == NULL)
	return (ENOENT);

	IF_ADDR_LOCK(ifp);
	lastref = 0;
	ifma = if_findmulti(ifp, sa);
	if (ifma != NULL)
	lastref = if_delmulti_locked(ifp, ifma, 0);
	IF_ADDR_UNLOCK(ifp);

	if (ifma == NULL)
	return (ENOENT);

	if (lastref && ifp->if_ioctl != NULL) {
	(void)(*ifp->if_ioctl)(ifp, SIOCDELMULTI, 0);
	}

	return (0);
	}

	/*
	* Delete all multicast group membership for an interface.
	* Should be used to quickly flush all multicast filters.
	*/
	void
	if_delallmulti(struct ifnet *ifp)
	{
	struct ifmultiaddr *ifma;
	struct ifmultiaddr *next;

	IF_ADDR_LOCK(ifp);
	TAILQ_FOREACH_SAFE(ifma, &ifp->if_multiaddrs, ifma_link, next)
	if_delmulti_locked(ifp, ifma, 0);
	IF_ADDR_UNLOCK(ifp);
	}

	/*
	* Delete a multicast group membership by group membership pointer.
	* Network-layer protocol domains must use this routine.
	*
	* It is safe to call this routine if the ifp disappeared.
	*/
	void
	if_delmulti_ifma(struct ifmultiaddr *ifma)
	{
	struct ifnet *ifp;
	int lastref;

	ifp = ifma->ifma_ifp;
	#ifdef DIAGNOSTIC
	if (ifp == NULL) {
	printf("%s: ifma_ifp seems to be detached\n", __func__);
	} else {
	struct ifnet *oifp;

	IFNET_RLOCK_NOSLEEP();
	TAILQ_FOREACH(oifp, &V_ifnet, if_link)
	if (ifp == oifp)
	break;
	if (ifp != oifp) {
	printf("%s: ifnet %p disappeared\n", __func__, ifp);
	ifp = NULL;
	}
	IFNET_RUNLOCK_NOSLEEP();
	}
	#endif
	/*
	* If and only if the ifnet instance exists: Acquire the address lock.
	*/
	if (ifp != NULL)
	IF_ADDR_LOCK(ifp);

	lastref = if_delmulti_locked(ifp, ifma, 0);

	if (ifp != NULL) {
	/*
	* If and only if the ifnet instance exists:
	* Release the address lock.
	* If the group was left: update the hardware hash filter.
	*/
	IF_ADDR_UNLOCK(ifp);
	if (lastref && ifp->if_ioctl != NULL) {
	(void)(*ifp->if_ioctl)(ifp, SIOCDELMULTI, 0);
	}
	}
	}

	/*
	* Perform deletion of network-layer and/or link-layer multicast address.
	*
	* Return 0 if the reference count was decremented.
	* Return 1 if the final reference was released, indicating that the
	* hardware hash filter should be reprogrammed.
	*/
	static int
	if_delmulti_locked(struct ifnet ifp, struct ifmultiaddr ifma, int detaching)
	{
	struct ifmultiaddr *ll_ifma;

	if (ifp != NULL && ifma->ifma_ifp != NULL) {
	KASSERT(ifma->ifma_ifp == ifp,
	("%s: inconsistent ifp %p", __func__, ifp));
	IF_ADDR_LOCK_ASSERT(ifp);
	}

	ifp = ifma->ifma_ifp;

	/*
	* If the ifnet is detaching, null out references to ifnet,
	* so that upper protocol layers will notice, and not attempt
	* to obtain locks for an ifnet which no longer exists. The
	* routing socket announcement must happen before the ifnet
	* instance is detached from the system.
	*/
	if (detaching) {
	#ifdef DIAGNOSTIC
	printf("%s: detaching ifnet instance %p\n", __func__, ifp);
	#endif
	/*
	* ifp may already be nulled out if we are being reentered
	* to delete the ll_ifma.
	*/
	if (ifp != NULL) {
	rt_newmaddrmsg(RTM_DELMADDR, ifma);
	ifma->ifma_ifp = NULL;
	}
	}

	if (--ifma->ifma_refcount > 0)
	return 0;

	/*
	* If this ifma is a network-layer ifma, a link-layer ifma may
	* have been associated with it. Release it first if so.
	*/
	ll_ifma = ifma->ifma_llifma;
	if (ll_ifma != NULL) {
	KASSERT(ifma->ifma_lladdr != NULL,
	("%s: llifma w/o lladdr", __func__));
	if (detaching)
	ll_ifma->ifma_ifp = NULL; /* XXX */
	if (--ll_ifma->ifma_refcount == 0) {
	if (ifp != NULL) {
	TAILQ_REMOVE(&ifp->if_multiaddrs, ll_ifma,
	ifma_link);
	}
	if_freemulti(ll_ifma);
	}
	}

	if (ifp != NULL)
	TAILQ_REMOVE(&ifp->if_multiaddrs, ifma, ifma_link);

	if_freemulti(ifma);

	/*
	* The last reference to this instance of struct ifmultiaddr
	* was released; the hardware should be notified of this change.
	*/
	return 1;
	}

	/*
	* Set the link layer address on an interface.
	*
	* At this time we only support certain types of interfaces,
	* and we don't allow the length of the address to change.
	*/
	int
	if_setlladdr(struct ifnet ifp, const u_char lladdr, int len)
	{
	struct sockaddr_dl *sdl;
	struct ifaddr *ifa;
	struct ifreq ifr;

	IF_ADDR_LOCK(ifp);
	ifa = ifp->if_addr;
	if (ifa == NULL) {
	IF_ADDR_UNLOCK(ifp);
	return (EINVAL);
	}
	ifa_ref(ifa);
	IF_ADDR_UNLOCK(ifp);
	sdl = (struct sockaddr_dl *)ifa->ifa_addr;
	if (sdl == NULL) {
	ifa_free(ifa);
	return (EINVAL);
	}
	if (len != sdl->sdl_alen) { /* don't allow length to change */
	ifa_free(ifa);
	return (EINVAL);
	}
	switch (ifp->if_type) {
	case IFT_ETHER:
	case IFT_FDDI:
	case IFT_XETHER:
	case IFT_ISO88025:
	case IFT_L2VLAN:
	case IFT_BRIDGE:
	case IFT_ARCNET:
	case IFT_IEEE8023ADLAG:
	case IFT_IEEE80211:
	bcopy(lladdr, LLADDR(sdl), len);
	ifa_free(ifa);
	break;
	default:
	ifa_free(ifa);
	return (ENODEV);
	}

	/*
	* If the interface is already up, we need
	* to re-init it in order to reprogram its
	* address filter.
	*/
	if ((ifp->if_flags & IFF_UP) != 0) {
	if (ifp->if_ioctl) {
	ifp->if_flags &= ~IFF_UP;
	ifr.ifr_flags = ifp->if_flags & 0xffff;
	ifr.ifr_flagshigh = ifp->if_flags >> 16;
	(*ifp->if_ioctl)(ifp, SIOCSIFFLAGS, (caddr_t)&ifr);
	ifp->if_flags \|= IFF_UP;
	ifr.ifr_flags = ifp->if_flags & 0xffff;
	ifr.ifr_flagshigh = ifp->if_flags >> 16;
	(*ifp->if_ioctl)(ifp, SIOCSIFFLAGS, (caddr_t)&ifr);
	}
	#ifdef INET
	/*
	* Also send gratuitous ARPs to notify other nodes about
	* the address change.
	*/
	TAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) {
	if (ifa->ifa_addr->sa_family == AF_INET)
	arp_ifinit(ifp, ifa);
	}
	#endif
	}
	return (0);
	}

	/*
	* The name argument must be a pointer to storage which will last as
	* long as the interface does. For physical devices, the result of
	* device_get_name(dev) is a good choice and for pseudo-devices a
	* static string works well.
	*/
	void
	if_initname(struct ifnet ifp, const char name, int unit)
	{
	ifp->if_dname = name;
	ifp->if_dunit = unit;
	if (unit != IF_DUNIT_NONE)
	snprintf(ifp->if_xname, IFNAMSIZ, "%s%d", name, unit);
	else
	strlcpy(ifp->if_xname, name, IFNAMSIZ);
	}

	int
	if_printf(struct ifnet ifp, const char fmt, ...)
	{
	va_list ap;
	int retval;

	retval = printf("%s: ", ifp->if_xname);
	va_start(ap, fmt);
	retval += vprintf(fmt, ap);
	va_end(ap);
	return (retval);
	}

	void
	if_start(struct ifnet *ifp)
	{

	(*(ifp)->if_start)(ifp);
	}

	/*
	* Backwards compatibility interface for drivers
	* that have not implemented it
	*/
	static int
	if_transmit(struct ifnet ifp, struct mbuf m)
	{
	int error;

	IFQ_HANDOFF(ifp, m, error);
	return (error);
	}

	int
	if_handoff(struct ifqueue ifq, struct mbuf m, struct ifnet *ifp, int adjust)
	{
	int active = 0;

	IF_LOCK(ifq);
	if (_IF_QFULL(ifq)) {
	_IF_DROP(ifq);
	IF_UNLOCK(ifq);
	m_freem(m);
	return (0);
	}
	if (ifp != NULL) {
	ifp->if_obytes += m->m_pkthdr.len + adjust;
	if (m->m_flags & (M_BCAST\|M_MCAST))
	ifp->if_omcasts++;
	active = ifp->if_drv_flags & IFF_DRV_OACTIVE;
	}
	_IF_ENQUEUE(ifq, m);
	IF_UNLOCK(ifq);
	if (ifp != NULL && !active)
	(*(ifp)->if_start)(ifp);
	return (1);
	}

	void
	if_register_com_alloc(u_char type,
	if_com_alloc_t a, if_com_free_t f)
	{

	KASSERT(if_com_alloc[type] == NULL,
	("if_register_com_alloc: %d already registered", type));
	KASSERT(if_com_free[type] == NULL,
	("if_register_com_alloc: %d free already registered", type));

	if_com_alloc[type] = a;
	if_com_free[type] = f;
	}

	void
	if_deregister_com_alloc(u_char type)
	{

	KASSERT(if_com_alloc[type] != NULL,
	("if_deregister_com_alloc: %d not registered", type));
	KASSERT(if_com_free[type] != NULL,
	("if_deregister_com_alloc: %d free not registered", type));
	if_com_alloc[type] = NULL;
	if_com_free[type] = NULL;
	}

	#ifdef DDB
	static void
	if_show_ifnet(struct ifnet *ifp)
	{

	if (ifp == NULL)
	return;
	db_printf("%s:\n", ifp->if_xname);
	#define IF_DB_PRINTF(f, e) db_printf(" %s = " f "\n", #e, ifp->e);
	IF_DB_PRINTF("%s", if_dname);
	IF_DB_PRINTF("%d", if_dunit);
	IF_DB_PRINTF("%s", if_description);
	IF_DB_PRINTF("%u", if_index);
	IF_DB_PRINTF("%u", if_refcount);
	IF_DB_PRINTF("%p", if_softc);
	IF_DB_PRINTF("%p", if_l2com);
	IF_DB_PRINTF("%p", if_vnet);
	IF_DB_PRINTF("%p", if_home_vnet);
	IF_DB_PRINTF("%p", if_addr);
	IF_DB_PRINTF("%p", if_llsoftc);
	IF_DB_PRINTF("%p", if_label);
	IF_DB_PRINTF("%u", if_pcount);
	IF_DB_PRINTF("0x%08x", if_flags);
	IF_DB_PRINTF("0x%08x", if_drv_flags);
	IF_DB_PRINTF("0x%08x", if_capabilities);
	IF_DB_PRINTF("0x%08x", if_capenable);
	IF_DB_PRINTF("%p", if_snd.ifq_head);
	IF_DB_PRINTF("%p", if_snd.ifq_tail);
	IF_DB_PRINTF("%d", if_snd.ifq_len);
	IF_DB_PRINTF("%d", if_snd.ifq_maxlen);
	IF_DB_PRINTF("%d", if_snd.ifq_drops);
	IF_DB_PRINTF("%p", if_snd.ifq_drv_head);
	IF_DB_PRINTF("%p", if_snd.ifq_drv_tail);
	IF_DB_PRINTF("%d", if_snd.ifq_drv_len);
	IF_DB_PRINTF("%d", if_snd.ifq_drv_maxlen);
	IF_DB_PRINTF("%d", if_snd.altq_type);
	IF_DB_PRINTF("%x", if_snd.altq_flags);
	#undef IF_DB_PRINTF
	}

	DB_SHOW_COMMAND(ifnet, db_show_ifnet)
	{

	if (!have_addr) {
	db_printf("usage: show ifnet <struct ifnet *>\n");
	return;
	}

	if_show_ifnet((struct ifnet *)addr);
	}

	DB_SHOW_ALL_COMMAND(ifnets, db_show_all_ifnets)
	{
	VNET_ITERATOR_DECL(vnet_iter);
	struct ifnet *ifp;
	u_short idx;

	VNET_FOREACH(vnet_iter) {
	CURVNET_SET_QUIET(vnet_iter);
	#ifdef VIMAGE
	db_printf("vnet=%p\n", curvnet);
	#endif
	for (idx = 1; idx <= V_if_index; idx++) {
	ifp = V_ifindex_table[idx].ife_ifnet;
	if (ifp == NULL)
	continue;
	db_printf( "%20s ifp=%p\n", ifp->if_xname, ifp);
	if (db_pager_quit)
	break;
	}
	CURVNET_RESTORE();
	}
	}
	#endif
	Index: stable/8/sys/net/if_var.h
	===================================================================
	--- stable/8/sys/net/if_var.h (revision 209276)
	+++ stable/8/sys/net/if_var.h (revision 209277)
	@@ -1,904 +1,904 @@
	/*-
	* Copyright (c) 1982, 1986, 1989, 1993
	* The Regents of the University of California. All rights reserved.
	*
	* Redistribution and use in source and binary forms, with or without
	* modification, are permitted provided that the following conditions
	* are met:
	* 1. Redistributions of source code must retain the above copyright
	* notice, this list of conditions and the following disclaimer.
	* 2. Redistributions in binary form must reproduce the above copyright
	* notice, this list of conditions and the following disclaimer in the
	* documentation and/or other materials provided with the distribution.
	* 4. Neither the name of the University nor the names of its contributors
	* may be used to endorse or promote products derived from this software
	* without specific prior written permission.
	*
	* THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
	* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
	* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
	* ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
	* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
	* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
	* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
	* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
	* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
	* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
	* SUCH DAMAGE.
	*
	* From: @(#)if.h 8.1 (Berkeley) 6/10/93
	* $FreeBSD$
	*/

	#ifndef _NET_IF_VAR_H_
	#define _NET_IF_VAR_H_

	/*
	* Structures defining a network interface, providing a packet
	* transport mechanism (ala level 0 of the PUP protocols).
	*
	* Each interface accepts output datagrams of a specified maximum
	* length, and provides higher level routines with input datagrams
	* received from its medium.
	*
	* Output occurs when the routine if_output is called, with three parameters:
	* (*ifp->if_output)(ifp, m, dst, rt)
	* Here m is the mbuf chain to be sent and dst is the destination address.
	* The output routine encapsulates the supplied datagram if necessary,
	* and then transmits it on its medium.
	*
	* On input, each interface unwraps the data received by it, and either
	* places it on the input queue of an internetwork datagram routine
	* and posts the associated software interrupt, or passes the datagram to a raw
	* packet input routine.
	*
	* Routines exist for locating interfaces by their addresses
	* or for locating an interface on a certain network, as well as more general
	* routing and gateway routines maintaining information used to locate
	* interfaces. These routines live in the files if.c and route.c
	*/

	#ifdef __STDC__
	/*
	* Forward structure declarations for function prototypes [sic].
	*/
	struct mbuf;
	struct thread;
	struct rtentry;
	struct rt_addrinfo;
	struct socket;
	struct ether_header;
	struct carp_if;
	struct ifvlantrunk;
	struct route;
	struct vnet;
	#endif

	#include <sys/queue.h> /* get TAILQ macros */

	#ifdef _KERNEL
	#include <sys/mbuf.h>
	#include <sys/eventhandler.h>
	#include <sys/buf_ring.h>
	#include <net/vnet.h>
	#endif /* _KERNEL */
	#include <sys/lock.h> /* XXX */
	#include <sys/mutex.h> /* XXX */
	#include <sys/rwlock.h> /* XXX */
	#include <sys/sx.h> /* XXX */
	#include <sys/event.h> /* XXX */
	#include <sys/_task.h>

	#define IF_DUNIT_NONE -1

	#include <altq/if_altq.h>

	TAILQ_HEAD(ifnethead, ifnet); /* we use TAILQs so that the order of */
	TAILQ_HEAD(ifaddrhead, ifaddr); /* instantiation is preserved in the list */
	TAILQ_HEAD(ifprefixhead, ifprefix);
	TAILQ_HEAD(ifmultihead, ifmultiaddr);
	TAILQ_HEAD(ifgrouphead, ifg_group);

	/*
	* Structure defining a queue for a network interface.
	*/
	struct ifqueue {
	struct mbuf *ifq_head;
	struct mbuf *ifq_tail;
	int ifq_len;
	int ifq_maxlen;
	int ifq_drops;
	struct mtx ifq_mtx;
	};

	/*
	* Structure defining a network interface.
	*
	* (Would like to call this struct ``if'', but C isn't PL/1.)
	*/

	struct ifnet {
	void if_softc; / pointer to driver state */
	void if_l2com; / pointer to protocol bits */
	struct vnet if_vnet; / pointer to network stack instance */
	TAILQ_ENTRY(ifnet) if_link; /* all struct ifnets are chained */
	char if_xname[IFNAMSIZ]; /* external name (name + unit) */
	const char if_dname; / driver name */
	int if_dunit; /* unit or IF_DUNIT_NONE */
	u_int if_refcount; /* reference count */
	struct ifaddrhead if_addrhead; /* linked list of addresses per if */
	/*
	* if_addrhead is the list of all addresses associated to
	* an interface.
	* Some code in the kernel assumes that first element
	* of the list has type AF_LINK, and contains sockaddr_dl
	* addresses which store the link-level address and the name
	* of the interface.
	* However, access to the AF_LINK address through this
	* field is deprecated. Use if_addr or ifaddr_byindex() instead.
	*/
	int if_pcount; /* number of promiscuous listeners */
	struct carp_if if_carp; / carp interface structure */
	struct bpf_if if_bpf; / packet filter structure */
	u_short if_index; /* numeric abbreviation for this if */
	short if_timer; /* time 'til if_watchdog called */
	struct ifvlantrunk if_vlantrunk; / pointer to 802.1q data */
	int if_flags; /* up/down, broadcast, etc. */
	int if_capabilities; /* interface features & capabilities */
	int if_capenable; /* enabled features & capabilities */
	void if_linkmib; / link-type-specific MIB data */
	size_t if_linkmiblen; /* length of above data */
	struct if_data if_data;
	struct ifmultihead if_multiaddrs; /* multicast addresses configured */
	int if_amcount; /* number of all-multicast requests */
	/* procedure handles */
	int (if_output) / output routine (enqueue) */
	(struct ifnet , struct mbuf , struct sockaddr *,
	struct route *);
	void (if_input) / input routine (from h/w driver) */
	(struct ifnet , struct mbuf );
	void (if_start) / initiate output routine */
	(struct ifnet *);
	int (if_ioctl) / ioctl routine */
	(struct ifnet *, u_long, caddr_t);
	void (if_watchdog) / timer routine */
	(struct ifnet *);
	void (if_init) / Init routine */
	(void *);
	int (if_resolvemulti) / validate/resolve multicast */
	(struct ifnet , struct sockaddr , struct sockaddr );
	void (if_qflush) / flush any queues */
	(struct ifnet *);
	int (if_transmit) / initiate output routine */
	(struct ifnet , struct mbuf );
	void (if_reassign) / reassign to vnet routine */
	(struct ifnet , struct vnet , char *);
	struct vnet if_home_vnet; / where this ifnet originates from */
	struct ifaddr if_addr; / pointer to link-level address */
	void if_llsoftc; / link layer softc */
	int if_drv_flags; /* driver-managed status flags */
	struct ifaltq if_snd; /* output queue (includes altq) */
	const u_int8_t if_broadcastaddr; / linklevel broadcast bytestring */

	void if_bridge; / bridge glue */

	struct label if_label; / interface MAC label */

	/* these are only used by IPv6 */
	struct ifprefixhead if_prefixhead; /* list of prefixes per if */
	void *if_afdata[AF_MAX];
	int if_afdata_initialized;
	struct rwlock if_afdata_lock;
	struct task if_linktask; /* task for link change events */
	struct mtx if_addr_mtx; /* mutex to protect address lists */

	LIST_ENTRY(ifnet) if_clones; /* interfaces of a cloner */
	TAILQ_HEAD(, ifg_list) if_groups; /* linked list of groups per if */
	/* protected by if_addr_mtx */
	void *if_pf_kif;
	void if_lagg; / lagg glue */
	u_char if_alloctype; /* if_type at time of allocation */

	/*
	* Spare fields are added so that we can modify sensitive data
	* structures without changing the kernel binary interface, and must
	* be used with care where binary compatibility is required.
	*/
	char if_cspare[3];
	char if_description; / interface description */
	void *if_pspare[7];
	int if_ispare[4];
	};

	typedef void if_init_f_t(void *);

	/*
	* XXX These aliases are terribly dangerous because they could apply
	* to anything.
	*/
	#define if_mtu if_data.ifi_mtu
	#define if_type if_data.ifi_type
	#define if_physical if_data.ifi_physical
	#define if_addrlen if_data.ifi_addrlen
	#define if_hdrlen if_data.ifi_hdrlen
	#define if_metric if_data.ifi_metric
	#define if_link_state if_data.ifi_link_state
	#define if_baudrate if_data.ifi_baudrate
	#define if_hwassist if_data.ifi_hwassist
	#define if_ipackets if_data.ifi_ipackets
	#define if_ierrors if_data.ifi_ierrors
	#define if_opackets if_data.ifi_opackets
	#define if_oerrors if_data.ifi_oerrors
	#define if_collisions if_data.ifi_collisions
	#define if_ibytes if_data.ifi_ibytes
	#define if_obytes if_data.ifi_obytes
	#define if_imcasts if_data.ifi_imcasts
	#define if_omcasts if_data.ifi_omcasts
	#define if_iqdrops if_data.ifi_iqdrops
	#define if_noproto if_data.ifi_noproto
	#define if_lastchange if_data.ifi_lastchange

	/* for compatibility with other BSDs */
	#define if_addrlist if_addrhead
	#define if_list if_link
	#define if_name(ifp) ((ifp)->if_xname)

	/*
	* Locks for address lists on the network interface.
	*/
	#define IF_ADDR_LOCK_INIT(if) mtx_init(&(if)->if_addr_mtx, \
	"if_addr_mtx", NULL, MTX_DEF)
	#define IF_ADDR_LOCK_DESTROY(if) mtx_destroy(&(if)->if_addr_mtx)
	#define IF_ADDR_LOCK(if) mtx_lock(&(if)->if_addr_mtx)
	#define IF_ADDR_UNLOCK(if) mtx_unlock(&(if)->if_addr_mtx)
	#define IF_ADDR_LOCK_ASSERT(if) mtx_assert(&(if)->if_addr_mtx, MA_OWNED)

	/*
	* Function variations on locking macros intended to be used by loadable
	* kernel modules in order to divorce them from the internals of address list
	* locking.
	*/
	void if_addr_rlock(struct ifnet ifp); / if_addrhead */
	void if_addr_runlock(struct ifnet ifp); / if_addrhead */
	void if_maddr_rlock(struct ifnet ifp); / if_multiaddrs */
	void if_maddr_runlock(struct ifnet ifp); / if_multiaddrs */

	/*
	* Output queues (ifp->if_snd) and slow device input queues (*ifp->if_slowq)
	* are queues of messages stored on ifqueue structures
	* (defined above). Entries are added to and deleted from these structures
	* by these macros, which should be called with ipl raised to splimp().
	*/
	#define IF_LOCK(ifq) mtx_lock(&(ifq)->ifq_mtx)
	#define IF_UNLOCK(ifq) mtx_unlock(&(ifq)->ifq_mtx)
	#define IF_LOCK_ASSERT(ifq) mtx_assert(&(ifq)->ifq_mtx, MA_OWNED)
	#define _IF_QFULL(ifq) ((ifq)->ifq_len >= (ifq)->ifq_maxlen)
	#define _IF_DROP(ifq) ((ifq)->ifq_drops++)
	#define _IF_QLEN(ifq) ((ifq)->ifq_len)

	#define _IF_ENQUEUE(ifq, m) do { \
	(m)->m_nextpkt = NULL; \
	if ((ifq)->ifq_tail == NULL) \
	(ifq)->ifq_head = m; \
	else \
	(ifq)->ifq_tail->m_nextpkt = m; \
	(ifq)->ifq_tail = m; \
	(ifq)->ifq_len++; \
	} while (0)

	#define IF_ENQUEUE(ifq, m) do { \
	IF_LOCK(ifq); \
	_IF_ENQUEUE(ifq, m); \
	IF_UNLOCK(ifq); \
	} while (0)

	#define _IF_PREPEND(ifq, m) do { \
	(m)->m_nextpkt = (ifq)->ifq_head; \
	if ((ifq)->ifq_tail == NULL) \
	(ifq)->ifq_tail = (m); \
	(ifq)->ifq_head = (m); \
	(ifq)->ifq_len++; \
	} while (0)

	#define IF_PREPEND(ifq, m) do { \
	IF_LOCK(ifq); \
	_IF_PREPEND(ifq, m); \
	IF_UNLOCK(ifq); \
	} while (0)

	#define _IF_DEQUEUE(ifq, m) do { \
	(m) = (ifq)->ifq_head; \
	if (m) { \
	if (((ifq)->ifq_head = (m)->m_nextpkt) == NULL) \
	(ifq)->ifq_tail = NULL; \
	(m)->m_nextpkt = NULL; \
	(ifq)->ifq_len--; \
	} \
	} while (0)

	#define IF_DEQUEUE(ifq, m) do { \
	IF_LOCK(ifq); \
	_IF_DEQUEUE(ifq, m); \
	IF_UNLOCK(ifq); \
	} while (0)

	#define _IF_POLL(ifq, m) ((m) = (ifq)->ifq_head)
	#define IF_POLL(ifq, m) _IF_POLL(ifq, m)

	#define _IF_DRAIN(ifq) do { \
	struct mbuf *m; \
	for (;;) { \
	_IF_DEQUEUE(ifq, m); \
	if (m == NULL) \
	break; \
	m_freem(m); \
	} \
	} while (0)

	#define IF_DRAIN(ifq) do { \
	IF_LOCK(ifq); \
	_IF_DRAIN(ifq); \
	IF_UNLOCK(ifq); \
	} while(0)

	#ifdef _KERNEL
	/* interface link layer address change event */
	typedef void (iflladdr_event_handler_t)(void , struct ifnet *);
	EVENTHANDLER_DECLARE(iflladdr_event, iflladdr_event_handler_t);
	/* interface address change event */
	typedef void (ifaddr_event_handler_t)(void , struct ifnet *);
	EVENTHANDLER_DECLARE(ifaddr_event, ifaddr_event_handler_t);
	/* new interface arrival event */
	typedef void (ifnet_arrival_event_handler_t)(void , struct ifnet *);
	EVENTHANDLER_DECLARE(ifnet_arrival_event, ifnet_arrival_event_handler_t);
	/* interface departure event */
	typedef void (ifnet_departure_event_handler_t)(void , struct ifnet *);
	EVENTHANDLER_DECLARE(ifnet_departure_event, ifnet_departure_event_handler_t);

	/*
	* interface groups
	*/
	struct ifg_group {
	char ifg_group[IFNAMSIZ];
	u_int ifg_refcnt;
	void *ifg_pf_kif;
	TAILQ_HEAD(, ifg_member) ifg_members;
	TAILQ_ENTRY(ifg_group) ifg_next;
	};

	struct ifg_member {
	TAILQ_ENTRY(ifg_member) ifgm_next;
	struct ifnet *ifgm_ifp;
	};

	struct ifg_list {
	struct ifg_group *ifgl_group;
	TAILQ_ENTRY(ifg_list) ifgl_next;
	};

	/* group attach event */
	typedef void (group_attach_event_handler_t)(void , struct ifg_group *);
	EVENTHANDLER_DECLARE(group_attach_event, group_attach_event_handler_t);
	/* group detach event */
	typedef void (group_detach_event_handler_t)(void , struct ifg_group *);
	EVENTHANDLER_DECLARE(group_detach_event, group_detach_event_handler_t);
	/* group change event */
	typedef void (group_change_event_handler_t)(void , const char *);
	EVENTHANDLER_DECLARE(group_change_event, group_change_event_handler_t);

	#define IF_AFDATA_LOCK_INIT(ifp) \
	rw_init(&(ifp)->if_afdata_lock, "if_afdata")

	#define IF_AFDATA_WLOCK(ifp) rw_wlock(&(ifp)->if_afdata_lock)
	#define IF_AFDATA_RLOCK(ifp) rw_rlock(&(ifp)->if_afdata_lock)
	#define IF_AFDATA_WUNLOCK(ifp) rw_wunlock(&(ifp)->if_afdata_lock)
	#define IF_AFDATA_RUNLOCK(ifp) rw_runlock(&(ifp)->if_afdata_lock)
	#define IF_AFDATA_LOCK(ifp) IF_AFDATA_WLOCK(ifp)
	#define IF_AFDATA_UNLOCK(ifp) IF_AFDATA_WUNLOCK(ifp)
	#define IF_AFDATA_TRYLOCK(ifp) rw_try_wlock(&(ifp)->if_afdata_lock)
	#define IF_AFDATA_DESTROY(ifp) rw_destroy(&(ifp)->if_afdata_lock)

	#define IF_AFDATA_LOCK_ASSERT(ifp) rw_assert(&(ifp)->if_afdata_lock, RA_LOCKED)
	#define IF_AFDATA_UNLOCK_ASSERT(ifp) rw_assert(&(ifp)->if_afdata_lock, RA_UNLOCKED)

	int if_handoff(struct ifqueue ifq, struct mbuf m, struct ifnet *ifp,
	int adjust);
	#define IF_HANDOFF(ifq, m, ifp) \
	if_handoff((struct ifqueue *)ifq, m, ifp, 0)
	#define IF_HANDOFF_ADJ(ifq, m, ifp, adj) \
	if_handoff((struct ifqueue *)ifq, m, ifp, adj)

	void if_start(struct ifnet *);

	#define IFQ_ENQUEUE(ifq, m, err) \
	do { \
	IF_LOCK(ifq); \
	if (ALTQ_IS_ENABLED(ifq)) \
	ALTQ_ENQUEUE(ifq, m, NULL, err); \
	else { \
	if (_IF_QFULL(ifq)) { \
	m_freem(m); \
	(err) = ENOBUFS; \
	} else { \
	_IF_ENQUEUE(ifq, m); \
	(err) = 0; \
	} \
	} \
	if (err) \
	(ifq)->ifq_drops++; \
	IF_UNLOCK(ifq); \
	} while (0)

	#define IFQ_DEQUEUE_NOLOCK(ifq, m) \
	do { \
	if (TBR_IS_ENABLED(ifq)) \
	(m) = tbr_dequeue_ptr(ifq, ALTDQ_REMOVE); \
	else if (ALTQ_IS_ENABLED(ifq)) \
	ALTQ_DEQUEUE(ifq, m); \
	else \
	_IF_DEQUEUE(ifq, m); \
	} while (0)

	#define IFQ_DEQUEUE(ifq, m) \
	do { \
	IF_LOCK(ifq); \
	IFQ_DEQUEUE_NOLOCK(ifq, m); \
	IF_UNLOCK(ifq); \
	} while (0)

	#define IFQ_POLL_NOLOCK(ifq, m) \
	do { \
	if (TBR_IS_ENABLED(ifq)) \
	(m) = tbr_dequeue_ptr(ifq, ALTDQ_POLL); \
	else if (ALTQ_IS_ENABLED(ifq)) \
	ALTQ_POLL(ifq, m); \
	else \
	_IF_POLL(ifq, m); \
	} while (0)

	#define IFQ_POLL(ifq, m) \
	do { \
	IF_LOCK(ifq); \
	IFQ_POLL_NOLOCK(ifq, m); \
	IF_UNLOCK(ifq); \
	} while (0)

	#define IFQ_PURGE_NOLOCK(ifq) \
	do { \
	if (ALTQ_IS_ENABLED(ifq)) { \
	ALTQ_PURGE(ifq); \
	} else \
	_IF_DRAIN(ifq); \
	} while (0)

	#define IFQ_PURGE(ifq) \
	do { \
	IF_LOCK(ifq); \
	IFQ_PURGE_NOLOCK(ifq); \
	IF_UNLOCK(ifq); \
	} while (0)

	#define IFQ_SET_READY(ifq) \
	do { ((ifq)->altq_flags \|= ALTQF_READY); } while (0)

	#define IFQ_LOCK(ifq) IF_LOCK(ifq)
	#define IFQ_UNLOCK(ifq) IF_UNLOCK(ifq)
	#define IFQ_LOCK_ASSERT(ifq) IF_LOCK_ASSERT(ifq)
	#define IFQ_IS_EMPTY(ifq) ((ifq)->ifq_len == 0)
	#define IFQ_INC_LEN(ifq) ((ifq)->ifq_len++)
	#define IFQ_DEC_LEN(ifq) (--(ifq)->ifq_len)
	#define IFQ_INC_DROPS(ifq) ((ifq)->ifq_drops++)
	#define IFQ_SET_MAXLEN(ifq, len) ((ifq)->ifq_maxlen = (len))

	/*
	* The IFF_DRV_OACTIVE test should really occur in the device driver, not in
	* the handoff logic, as that flag is locked by the device driver.
	*/
	#define IFQ_HANDOFF_ADJ(ifp, m, adj, err) \
	do { \
	int len; \
	short mflags; \
	\
	len = (m)->m_pkthdr.len; \
	mflags = (m)->m_flags; \
	IFQ_ENQUEUE(&(ifp)->if_snd, m, err); \
	if ((err) == 0) { \
	(ifp)->if_obytes += len + (adj); \
	if (mflags & M_MCAST) \
	(ifp)->if_omcasts++; \
	if (((ifp)->if_drv_flags & IFF_DRV_OACTIVE) == 0) \
	if_start(ifp); \
	} \
	} while (0)

	#define IFQ_HANDOFF(ifp, m, err) \
	IFQ_HANDOFF_ADJ(ifp, m, 0, err)

	#define IFQ_DRV_DEQUEUE(ifq, m) \
	do { \
	(m) = (ifq)->ifq_drv_head; \
	if (m) { \
	if (((ifq)->ifq_drv_head = (m)->m_nextpkt) == NULL) \
	(ifq)->ifq_drv_tail = NULL; \
	(m)->m_nextpkt = NULL; \
	(ifq)->ifq_drv_len--; \
	} else { \
	IFQ_LOCK(ifq); \
	IFQ_DEQUEUE_NOLOCK(ifq, m); \
	while ((ifq)->ifq_drv_len < (ifq)->ifq_drv_maxlen) { \
	struct mbuf *m0; \
	IFQ_DEQUEUE_NOLOCK(ifq, m0); \
	if (m0 == NULL) \
	break; \
	m0->m_nextpkt = NULL; \
	if ((ifq)->ifq_drv_tail == NULL) \
	(ifq)->ifq_drv_head = m0; \
	else \
	(ifq)->ifq_drv_tail->m_nextpkt = m0; \
	(ifq)->ifq_drv_tail = m0; \
	(ifq)->ifq_drv_len++; \
	} \
	IFQ_UNLOCK(ifq); \
	} \
	} while (0)

	#define IFQ_DRV_PREPEND(ifq, m) \
	do { \
	(m)->m_nextpkt = (ifq)->ifq_drv_head; \
	if ((ifq)->ifq_drv_tail == NULL) \
	(ifq)->ifq_drv_tail = (m); \
	(ifq)->ifq_drv_head = (m); \
	(ifq)->ifq_drv_len++; \
	} while (0)

	#define IFQ_DRV_IS_EMPTY(ifq) \
	(((ifq)->ifq_drv_len == 0) && ((ifq)->ifq_len == 0))

	#define IFQ_DRV_PURGE(ifq) \
	do { \
	struct mbuf m, n = (ifq)->ifq_drv_head; \
	while((m = n) != NULL) { \
	n = m->m_nextpkt; \
	m_freem(m); \
	} \
	(ifq)->ifq_drv_head = (ifq)->ifq_drv_tail = NULL; \
	(ifq)->ifq_drv_len = 0; \
	IFQ_PURGE(ifq); \
	} while (0)

	#ifdef _KERNEL
	static __inline void
	drbr_stats_update(struct ifnet *ifp, int len, int mflags)
	{
	#ifndef NO_SLOW_STATS
	ifp->if_obytes += len;
	if (mflags & M_MCAST)
	ifp->if_omcasts++;
	#endif
	}

	static __inline int
	drbr_enqueue(struct ifnet ifp, struct buf_ring br, struct mbuf *m)
	{
	int error = 0;
	int len = m->m_pkthdr.len;
	int mflags = m->m_flags;

	#ifdef ALTQ
	if (ALTQ_IS_ENABLED(&ifp->if_snd)) {
	IFQ_ENQUEUE(&ifp->if_snd, m, error);
	return (error);
	}
	#endif
	if ((error = buf_ring_enqueue_bytes(br, m, len)) == ENOBUFS) {
	br->br_drops++;
	m_freem(m);
	} else
	drbr_stats_update(ifp, len, mflags);

	return (error);
	}

	static __inline void
	drbr_flush(struct ifnet ifp, struct buf_ring br)
	{
	struct mbuf *m;

	#ifdef ALTQ
	if (ifp != NULL && ALTQ_IS_ENABLED(&ifp->if_snd))
	IFQ_PURGE(&ifp->if_snd);
	#endif
	while ((m = buf_ring_dequeue_sc(br)) != NULL)
	m_freem(m);
	}

	static __inline void
	drbr_free(struct buf_ring br, struct malloc_type type)
	{

	drbr_flush(NULL, br);
	buf_ring_free(br, type);
	}

	static __inline struct mbuf *
	drbr_dequeue(struct ifnet ifp, struct buf_ring br)
	{
	#ifdef ALTQ
	struct mbuf *m;

	if (ALTQ_IS_ENABLED(&ifp->if_snd)) {
	IFQ_DEQUEUE(&ifp->if_snd, m);
	return (m);
	}
	#endif
	return (buf_ring_dequeue_sc(br));
	}

	static __inline struct mbuf *
	drbr_dequeue_cond(struct ifnet ifp, struct buf_ring br,
	int (func) (struct mbuf , void ), void arg)
	{
	struct mbuf *m;
	#ifdef ALTQ
	if (ALTQ_IS_ENABLED(&ifp->if_snd)) {
	IFQ_LOCK(&ifp->if_snd);
	IFQ_POLL_NOLOCK(&ifp->if_snd, m);
	if (m != NULL && func(m, arg) == 0) {
	IFQ_UNLOCK(&ifp->if_snd);
	return (NULL);
	}
	IFQ_DEQUEUE_NOLOCK(&ifp->if_snd, m);
	IFQ_UNLOCK(&ifp->if_snd);
	return (m);
	}
	#endif
	m = buf_ring_peek(br);
	if (m == NULL \|\| func(m, arg) == 0)
	return (NULL);

	return (buf_ring_dequeue_sc(br));
	}

	static __inline int
	drbr_empty(struct ifnet ifp, struct buf_ring br)
	{
	#ifdef ALTQ
	if (ALTQ_IS_ENABLED(&ifp->if_snd))
	return (IFQ_IS_EMPTY(&ifp->if_snd));
	#endif
	return (buf_ring_empty(br));
	}

	static __inline int
	drbr_needs_enqueue(struct ifnet ifp, struct buf_ring br)
	{
	#ifdef ALTQ
	if (ALTQ_IS_ENABLED(&ifp->if_snd))
	return (1);
	#endif
	return (!buf_ring_empty(br));
	}

	static __inline int
	drbr_inuse(struct ifnet ifp, struct buf_ring br)
	{
	#ifdef ALTQ
	if (ALTQ_IS_ENABLED(&ifp->if_snd))
	return (ifp->if_snd.ifq_len);
	#endif
	return (buf_ring_count(br));
	}
	#endif
	/*
	* 72 was chosen below because it is the size of a TCP/IP
	* header (40) + the minimum mss (32).
	*/
	#define IF_MINMTU 72
	#define IF_MAXMTU 65535

	#endif /* _KERNEL */

	/*
	* The ifaddr structure contains information about one address
	* of an interface. They are maintained by the different address families,
	* are allocated and attached when an address is set, and are linked
	* together so all addresses for an interface can be located.
	*
	* NOTE: a 'struct ifaddr' is always at the beginning of a larger
	* chunk of malloc'ed memory, where we store the three addresses
	* (ifa_addr, ifa_dstaddr and ifa_netmask) referenced here.
	*/
	struct ifaddr {
	struct sockaddr ifa_addr; / address of interface */
	struct sockaddr ifa_dstaddr; / other end of p-to-p link */
	#define ifa_broadaddr ifa_dstaddr /* broadcast address interface */
	struct sockaddr ifa_netmask; / used to determine subnet */
	struct if_data if_data; /* not all members are meaningful */
	struct ifnet ifa_ifp; / back-pointer to interface */
	TAILQ_ENTRY(ifaddr) ifa_link; /* queue macro glue */
	void (ifa_rtrequest) / check or clean routes (+ or -)'d */
	(int, struct rtentry , struct rt_addrinfo );
	u_short ifa_flags; /* mostly rt_flags for cloning */
	u_int ifa_refcnt; /* references to this structure */
	int ifa_metric; /* cost of going out this interface */
	int (ifa_claim_addr) / check if an addr goes to this if */
	(struct ifaddr , struct sockaddr );
	struct mtx ifa_mtx;
	};
	#define IFA_ROUTE RTF_UP /* route installed */
	#define IFA_RTSELF RTF_HOST /* loopback route to self installed */

	/* for compatibility with other BSDs */
	#define ifa_list ifa_link

	#ifdef _KERNEL
	#define IFA_LOCK(ifa) mtx_lock(&(ifa)->ifa_mtx)
	#define IFA_UNLOCK(ifa) mtx_unlock(&(ifa)->ifa_mtx)

	void ifa_free(struct ifaddr *ifa);
	void ifa_init(struct ifaddr *ifa);
	void ifa_ref(struct ifaddr *ifa);
	#endif

	/*
	* The prefix structure contains information about one prefix
	* of an interface. They are maintained by the different address families,
	* are allocated and attached when a prefix or an address is set,
	* and are linked together so all prefixes for an interface can be located.
	*/
	struct ifprefix {
	struct sockaddr ifpr_prefix; / prefix of interface */
	struct ifnet ifpr_ifp; / back-pointer to interface */
	TAILQ_ENTRY(ifprefix) ifpr_list; /* queue macro glue */
	u_char ifpr_plen; /* prefix length in bits */
	u_char ifpr_type; /* protocol dependent prefix type */
	};

	/*
	* Multicast address structure. This is analogous to the ifaddr
	* structure except that it keeps track of multicast addresses.
	*/
	struct ifmultiaddr {
	TAILQ_ENTRY(ifmultiaddr) ifma_link; /* queue macro glue */
	struct sockaddr ifma_addr; / address this membership is for */
	struct sockaddr ifma_lladdr; / link-layer translation, if any */
	struct ifnet ifma_ifp; / back-pointer to interface */
	u_int ifma_refcount; /* reference count */
	void ifma_protospec; / protocol-specific state, if any */
	struct ifmultiaddr ifma_llifma; / pointer to ifma for ifma_lladdr */
	};

	#ifdef _KERNEL

	extern struct rwlock ifnet_rwlock;
	extern struct sx ifnet_sxlock;

	#define IFNET_LOCK_INIT() do { \
	rw_init_flags(&ifnet_rwlock, "ifnet_rw", RW_RECURSE); \
	sx_init_flags(&ifnet_sxlock, "ifnet_sx", SX_RECURSE); \
	} while(0)

	#define IFNET_WLOCK() do { \
	sx_xlock(&ifnet_sxlock); \
	rw_wlock(&ifnet_rwlock); \
	} while (0)

	#define IFNET_WUNLOCK() do { \
	rw_wunlock(&ifnet_rwlock); \
	sx_xunlock(&ifnet_sxlock); \
	} while (0)

	/*
	* To assert the ifnet lock, you must know not only whether it's for read or
	* write, but also whether it was acquired with sleep support or not.
	*/
	#define IFNET_RLOCK_ASSERT() sx_assert(&ifnet_sxlock, SA_SLOCKED)
	#define IFNET_RLOCK_NOSLEEP_ASSERT() rw_assert(&ifnet_rwlock, RA_RLOCKED)
	#define IFNET_WLOCK_ASSERT() do { \
	sx_assert(&ifnet_sxlock, SA_XLOCKED); \
	rw_assert(&ifnet_rwlock, RA_WLOCKED); \
	} while (0)

	#define IFNET_RLOCK() sx_slock(&ifnet_sxlock)
	#define IFNET_RLOCK_NOSLEEP() rw_rlock(&ifnet_rwlock)
	#define IFNET_RUNLOCK() sx_sunlock(&ifnet_sxlock)
	#define IFNET_RUNLOCK_NOSLEEP() rw_runlock(&ifnet_rwlock)

	/*
	* Look up an ifnet given its index; the _ref variant also acquires a
	* reference that must be freed using if_rele(). It is almost always a bug
	* to call ifnet_byindex() instead if ifnet_byindex_ref().
	*/
	struct ifnet *ifnet_byindex(u_short idx);
	struct ifnet *ifnet_byindex_locked(u_short idx);
	struct ifnet *ifnet_byindex_ref(u_short idx);

	/*
	* Given the index, ifaddr_byindex() returns the one and only
	* link-level ifaddr for the interface. You are not supposed to use
	* it to traverse the list of addresses associated to the interface.
	*/
	struct ifaddr *ifaddr_byindex(u_short idx);

	VNET_DECLARE(struct ifnethead, ifnet);
	VNET_DECLARE(struct ifgrouphead, ifg_head);
	VNET_DECLARE(int, if_index);
	VNET_DECLARE(struct ifnet , loif); / first loopback interface */
	VNET_DECLARE(int, useloopback);

	#define V_ifnet VNET(ifnet)
	#define V_ifg_head VNET(ifg_head)
	#define V_if_index VNET(if_index)
	#define V_loif VNET(loif)
	#define V_useloopback VNET(useloopback)

	extern int ifqmaxlen;

	int if_addgroup(struct ifnet , const char );
	int if_delgroup(struct ifnet , const char );
	int if_addmulti(struct ifnet , struct sockaddr , struct ifmultiaddr **);
	int if_allmulti(struct ifnet *, int);
	struct ifnet* if_alloc(u_char);
	void if_attach(struct ifnet *);
	void if_dead(struct ifnet *);
	int if_delmulti(struct ifnet , struct sockaddr );
	void if_delmulti_ifma(struct ifmultiaddr *);
	void if_detach(struct ifnet *);
	void if_vmove(struct ifnet , struct vnet );
	void if_purgeaddrs(struct ifnet *);
	void if_delallmulti(struct ifnet *);
	void if_down(struct ifnet *);
	struct ifmultiaddr *
	if_findmulti(struct ifnet , struct sockaddr );
	void if_free(struct ifnet *);
	void if_free_type(struct ifnet *, u_char);
	void if_initname(struct ifnet , const char , int);
	void if_link_state_change(struct ifnet *, int);
	int if_printf(struct ifnet , const char , ...) __printflike(2, 3);
	void if_qflush(struct ifnet *);
	void if_ref(struct ifnet *);
	void if_rele(struct ifnet *);
	int if_setlladdr(struct ifnet , const u_char , int);
	void if_up(struct ifnet *);
	int ifioctl(struct socket , u_long, caddr_t, struct thread );
	int ifpromisc(struct ifnet *, int);
	struct ifnet ifunit(const char );
	struct ifnet ifunit_ref(const char );

	void ifq_init(struct ifaltq , struct ifnet ifp);
	void ifq_delete(struct ifaltq *);

	int ifa_add_loopback_route(struct ifaddr , struct sockaddr );
	int ifa_del_loopback_route(struct ifaddr , struct sockaddr );

	struct ifaddr ifa_ifwithaddr(struct sockaddr );
	int ifa_ifwithaddr_check(struct sockaddr *);
	struct ifaddr ifa_ifwithbroadaddr(struct sockaddr );
	struct ifaddr ifa_ifwithdstaddr(struct sockaddr );
	-struct ifaddr ifa_ifwithnet(struct sockaddr );
	+struct ifaddr ifa_ifwithnet(struct sockaddr , int);
	struct ifaddr ifa_ifwithroute(int, struct sockaddr , struct sockaddr *);
	struct ifaddr ifa_ifwithroute_fib(int, struct sockaddr , struct sockaddr *, u_int);

	struct ifaddr ifaof_ifpforaddr(struct sockaddr , struct ifnet *);

	int if_simloop(struct ifnet ifp, struct mbuf m, int af, int hlen);

	typedef void if_com_alloc_t(u_char type, struct ifnet ifp);
	typedef void if_com_free_t(void *com, u_char type);
	void if_register_com_alloc(u_char type, if_com_alloc_t a, if_com_free_t f);
	void if_deregister_com_alloc(u_char type);

	#define IF_LLADDR(ifp) \
	LLADDR((struct sockaddr_dl *)((ifp)->if_addr->ifa_addr))

	#ifdef DEVICE_POLLING
	enum poll_cmd { POLL_ONLY, POLL_AND_CHECK_STATUS };

	typedef int poll_handler_t(struct ifnet *ifp, enum poll_cmd cmd, int count);
	int ether_poll_register(poll_handler_t h, struct ifnet ifp);
	int ether_poll_deregister(struct ifnet *ifp);
	#endif /* DEVICE_POLLING */

	#endif /* _KERNEL */

	#endif /* !_NET_IF_VAR_H_ */
	Index: stable/8/sys/net/route.c
	===================================================================
	--- stable/8/sys/net/route.c (revision 209276)
	+++ stable/8/sys/net/route.c (revision 209277)
	@@ -1,1599 +1,1599 @@
	/*-
	* Copyright (c) 1980, 1986, 1991, 1993
	* The Regents of the University of California. All rights reserved.
	*
	* Redistribution and use in source and binary forms, with or without
	* modification, are permitted provided that the following conditions
	* are met:
	* 1. Redistributions of source code must retain the above copyright
	* notice, this list of conditions and the following disclaimer.
	* 2. Redistributions in binary form must reproduce the above copyright
	* notice, this list of conditions and the following disclaimer in the
	* documentation and/or other materials provided with the distribution.
	* 4. Neither the name of the University nor the names of its contributors
	* may be used to endorse or promote products derived from this software
	* without specific prior written permission.
	*
	* THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
	* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
	* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
	* ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
	* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
	* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
	* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
	* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
	* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
	* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
	* SUCH DAMAGE.
	*
	* @(#)route.c 8.3.1.1 (Berkeley) 2/23/95
	* $FreeBSD$
	*/
	/************************************************************************
	* Note: In this file a 'fib' is a "forwarding information base" *
	* Which is the new name for an in kernel routing (next hop) table. *
	***********************************************************************/

	#include "opt_inet.h"
	#include "opt_route.h"
	#include "opt_mrouting.h"
	#include "opt_mpath.h"

	#include <sys/param.h>
	#include <sys/systm.h>
	#include <sys/syslog.h>
	#include <sys/malloc.h>
	#include <sys/mbuf.h>
	#include <sys/socket.h>
	#include <sys/sysctl.h>
	#include <sys/syslog.h>
	#include <sys/sysproto.h>
	#include <sys/proc.h>
	#include <sys/domain.h>
	#include <sys/kernel.h>

	#include <net/if.h>
	#include <net/if_dl.h>
	#include <net/route.h>
	#include <net/vnet.h>
	#include <net/flowtable.h>

	#ifdef RADIX_MPATH
	#include <net/radix_mpath.h>
	#endif

	#include <netinet/in.h>
	#include <netinet/ip_mroute.h>

	#include <vm/uma.h>

	u_int rt_numfibs = RT_NUMFIBS;
	SYSCTL_INT(_net, OID_AUTO, fibs, CTLFLAG_RD, &rt_numfibs, 0, "");
	/*
	* Allow the boot code to allow LESS than RT_MAXFIBS to be used.
	* We can't do more because storage is statically allocated for now.
	* (for compatibility reasons.. this will change).
	*/
	TUNABLE_INT("net.fibs", &rt_numfibs);

	/*
	* By default add routes to all fibs for new interfaces.
	* Once this is set to 0 then only allocate routes on interface
	* changes for the FIB of the caller when adding a new set of addresses
	* to an interface. XXX this is a shotgun aproach to a problem that needs
	* a more fine grained solution.. that will come.
	*/
	u_int rt_add_addr_allfibs = 1;
	SYSCTL_INT(_net, OID_AUTO, add_addr_allfibs, CTLFLAG_RW,
	&rt_add_addr_allfibs, 0, "");
	TUNABLE_INT("net.add_addr_allfibs", &rt_add_addr_allfibs);

	VNET_DEFINE(struct rtstat, rtstat);
	#define V_rtstat VNET(rtstat)

	VNET_DEFINE(struct radix_node_head *, rt_tables);
	#define V_rt_tables VNET(rt_tables)

	VNET_DEFINE(int, rttrash); /* routes not in table but not freed */
	#define V_rttrash VNET(rttrash)


	/* compare two sockaddr structures */
	#define sa_equal(a1, a2) (bcmp((a1), (a2), (a1)->sa_len) == 0)

	/*
	* Convert a 'struct radix_node ' to a 'struct rtentry '.
	* The operation can be done safely (in this code) because a
	* 'struct rtentry' starts with two 'struct radix_node''s, the first
	* one representing leaf nodes in the routing tree, which is
	* what the code in radix.c passes us as a 'struct radix_node'.
	*
	* But because there are a lot of assumptions in this conversion,
	* do not cast explicitly, but always use the macro below.
	*/
	#define RNTORT(p) ((struct rtentry *)(p))

	static VNET_DEFINE(uma_zone_t, rtzone); /* Routing table UMA zone. */
	#define V_rtzone VNET(rtzone)

	#if 0
	/* default fib for tunnels to use */
	u_int tunnel_fib = 0;
	SYSCTL_INT(_net, OID_AUTO, tunnelfib, CTLFLAG_RD, &tunnel_fib, 0, "");
	#endif

	/*
	* handler for net.my_fibnum
	*/
	static int
	sysctl_my_fibnum(SYSCTL_HANDLER_ARGS)
	{
	int fibnum;
	int error;

	fibnum = curthread->td_proc->p_fibnum;
	error = sysctl_handle_int(oidp, &fibnum, 0, req);
	return (error);
	}

	SYSCTL_PROC(_net, OID_AUTO, my_fibnum, CTLTYPE_INT\|CTLFLAG_RD,
	NULL, 0, &sysctl_my_fibnum, "I", "default FIB of caller");

	static __inline struct radix_node_head **
	rt_tables_get_rnh_ptr(int table, int fam)
	{
	struct radix_node_head **rnh;

	KASSERT(table >= 0 && table < rt_numfibs, ("%s: table out of bounds.",
	__func__));
	KASSERT(fam >= 0 && fam < (AF_MAX+1), ("%s: fam out of bounds.",
	__func__));

	/* rnh is [fib=0][af=0]. */
	rnh = (struct radix_node_head **)V_rt_tables;
	/* Get the offset to the requested table and fam. */
	rnh += table * (AF_MAX+1) + fam;

	return (rnh);
	}

	struct radix_node_head *
	rt_tables_get_rnh(int table, int fam)
	{

	return (*rt_tables_get_rnh_ptr(table, fam));
	}

	/*
	* route initialization must occur before ip6_init2(), which happenas at
	* SI_ORDER_MIDDLE.
	*/
	static void
	route_init(void)
	{
	struct domain *dom;
	int max_keylen = 0;

	/* whack the tunable ints into line. */
	if (rt_numfibs > RT_MAXFIBS)
	rt_numfibs = RT_MAXFIBS;
	if (rt_numfibs == 0)
	rt_numfibs = 1;

	for (dom = domains; dom; dom = dom->dom_next)
	if (dom->dom_maxrtkey > max_keylen)
	max_keylen = dom->dom_maxrtkey;

	rn_init(max_keylen); /* init all zeroes, all ones, mask table */
	}
	SYSINIT(route_init, SI_SUB_PROTO_DOMAIN, SI_ORDER_THIRD, route_init, 0);

	static void
	vnet_route_init(const void *unused __unused)
	{
	struct domain *dom;
	struct radix_node_head **rnh;
	int table;
	int fam;

	V_rt_tables = malloc(rt_numfibs * (AF_MAX+1) *
	sizeof(struct radix_node_head *), M_RTABLE, M_WAITOK\|M_ZERO);

	V_rtzone = uma_zcreate("rtentry", sizeof(struct rtentry), NULL, NULL,
	NULL, NULL, UMA_ALIGN_PTR, 0);
	for (dom = domains; dom; dom = dom->dom_next) {
	if (dom->dom_rtattach) {
	for (table = 0; table < rt_numfibs; table++) {
	if ( (fam = dom->dom_family) == AF_INET \|\|
	table == 0) {
	/* for now only AF_INET has > 1 table */
	/* XXX MRT
	* rtattach will be also called
	* from vfs_export.c but the
	* offset will be 0
	* (only for AF_INET and AF_INET6
	* which don't need it anyhow)
	*/
	rnh = rt_tables_get_rnh_ptr(table, fam);
	if (rnh == NULL)
	panic("%s: rnh NULL", __func__);
	dom->dom_rtattach((void **)rnh,
	dom->dom_rtoffset);
	} else {
	break;
	}
	}
	}
	}
	}
	VNET_SYSINIT(vnet_route_init, SI_SUB_PROTO_DOMAIN, SI_ORDER_FOURTH,
	vnet_route_init, 0);

	#ifdef VIMAGE
	static void
	vnet_route_uninit(const void *unused __unused)
	{
	int table;
	int fam;
	struct domain *dom;
	struct radix_node_head **rnh;

	for (dom = domains; dom; dom = dom->dom_next) {
	if (dom->dom_rtdetach) {
	for (table = 0; table < rt_numfibs; table++) {
	if ( (fam = dom->dom_family) == AF_INET \|\|
	table == 0) {
	/* For now only AF_INET has > 1 tbl. */
	rnh = rt_tables_get_rnh_ptr(table, fam);
	if (rnh == NULL)
	panic("%s: rnh NULL", __func__);
	dom->dom_rtdetach((void **)rnh,
	dom->dom_rtoffset);
	} else {
	break;
	}
	}
	}
	}
	}
	VNET_SYSUNINIT(vnet_route_uninit, SI_SUB_PROTO_DOMAIN, SI_ORDER_THIRD,
	vnet_route_uninit, 0);
	#endif

	#ifndef _SYS_SYSPROTO_H_
	struct setfib_args {
	int fibnum;
	};
	#endif
	int
	setfib(struct thread td, struct setfib_args uap)
	{
	if (uap->fibnum < 0 \|\| uap->fibnum >= rt_numfibs)
	return EINVAL;
	td->td_proc->p_fibnum = uap->fibnum;
	return (0);
	}

	/*
	* Packet routing routines.
	*/
	void
	rtalloc(struct route *ro)
	{
	rtalloc_ign_fib(ro, 0UL, 0);
	}

	void
	rtalloc_fib(struct route *ro, u_int fibnum)
	{
	rtalloc_ign_fib(ro, 0UL, fibnum);
	}

	void
	rtalloc_ign(struct route *ro, u_long ignore)
	{
	struct rtentry *rt;

	if ((rt = ro->ro_rt) != NULL) {
	if (rt->rt_ifp != NULL && rt->rt_flags & RTF_UP)
	return;
	RTFREE(rt);
	ro->ro_rt = NULL;
	}
	ro->ro_rt = rtalloc1_fib(&ro->ro_dst, 1, ignore, 0);
	if (ro->ro_rt)
	RT_UNLOCK(ro->ro_rt);
	}

	void
	rtalloc_ign_fib(struct route *ro, u_long ignore, u_int fibnum)
	{
	struct rtentry *rt;

	if ((rt = ro->ro_rt) != NULL) {
	if (rt->rt_ifp != NULL && rt->rt_flags & RTF_UP)
	return;
	RTFREE(rt);
	ro->ro_rt = NULL;
	}
	ro->ro_rt = rtalloc1_fib(&ro->ro_dst, 1, ignore, fibnum);
	if (ro->ro_rt)
	RT_UNLOCK(ro->ro_rt);
	}

	/*
	* Look up the route that matches the address given
	* Or, at least try.. Create a cloned route if needed.
	*
	* The returned route, if any, is locked.
	*/
	struct rtentry *
	rtalloc1(struct sockaddr *dst, int report, u_long ignflags)
	{
	return (rtalloc1_fib(dst, report, ignflags, 0));
	}

	struct rtentry *
	rtalloc1_fib(struct sockaddr *dst, int report, u_long ignflags,
	u_int fibnum)
	{
	struct radix_node_head *rnh;
	struct rtentry *rt;
	struct radix_node *rn;
	struct rtentry *newrt;
	struct rt_addrinfo info;
	int err = 0, msgtype = RTM_MISS;
	int needlock;

	KASSERT((fibnum < rt_numfibs), ("rtalloc1_fib: bad fibnum"));
	if (dst->sa_family != AF_INET) /* Only INET supports > 1 fib now */
	fibnum = 0;
	rnh = rt_tables_get_rnh(fibnum, dst->sa_family);
	newrt = NULL;
	/*
	* Look up the address in the table for that Address Family
	*/
	if (rnh == NULL) {
	V_rtstat.rts_unreach++;
	goto miss;
	}
	needlock = !(ignflags & RTF_RNH_LOCKED);
	if (needlock)
	RADIX_NODE_HEAD_RLOCK(rnh);
	#ifdef INVARIANTS
	else
	RADIX_NODE_HEAD_LOCK_ASSERT(rnh);
	#endif
	rn = rnh->rnh_matchaddr(dst, rnh);
	if (rn && ((rn->rn_flags & RNF_ROOT) == 0)) {
	newrt = rt = RNTORT(rn);
	RT_LOCK(newrt);
	RT_ADDREF(newrt);
	if (needlock)
	RADIX_NODE_HEAD_RUNLOCK(rnh);
	goto done;

	} else if (needlock)
	RADIX_NODE_HEAD_RUNLOCK(rnh);

	/*
	* Either we hit the root or couldn't find any match,
	* Which basically means
	* "caint get there frm here"
	*/
	V_rtstat.rts_unreach++;
	miss:
	if (report) {
	/*
	* If required, report the failure to the supervising
	* Authorities.
	* For a delete, this is not an error. (report == 0)
	*/
	bzero(&info, sizeof(info));
	info.rti_info[RTAX_DST] = dst;
	rt_missmsg(msgtype, &info, 0, err);
	}
	done:
	if (newrt)
	RT_LOCK_ASSERT(newrt);
	return (newrt);
	}

	/*
	* Remove a reference count from an rtentry.
	* If the count gets low enough, take it out of the routing table
	*/
	void
	rtfree(struct rtentry *rt)
	{
	struct radix_node_head *rnh;

	KASSERT(rt != NULL,("%s: NULL rt", __func__));
	rnh = rt_tables_get_rnh(rt->rt_fibnum, rt_key(rt)->sa_family);
	KASSERT(rnh != NULL,("%s: NULL rnh", __func__));

	RT_LOCK_ASSERT(rt);

	/*
	* The callers should use RTFREE_LOCKED() or RTFREE(), so
	* we should come here exactly with the last reference.
	*/
	RT_REMREF(rt);
	if (rt->rt_refcnt > 0) {
	log(LOG_DEBUG, "%s: %p has %d refs\n", __func__, rt, rt->rt_refcnt);
	goto done;
	}

	/*
	* On last reference give the "close method" a chance
	* to cleanup private state. This also permits (for
	* IPv4 and IPv6) a chance to decide if the routing table
	* entry should be purged immediately or at a later time.
	* When an immediate purge is to happen the close routine
	* typically calls rtexpunge which clears the RTF_UP flag
	* on the entry so that the code below reclaims the storage.
	*/
	if (rt->rt_refcnt == 0 && rnh->rnh_close)
	rnh->rnh_close((struct radix_node *)rt, rnh);

	/*
	* If we are no longer "up" (and ref == 0)
	* then we can free the resources associated
	* with the route.
	*/
	if ((rt->rt_flags & RTF_UP) == 0) {
	if (rt->rt_nodes->rn_flags & (RNF_ACTIVE \| RNF_ROOT))
	panic("rtfree 2");
	/*
	* the rtentry must have been removed from the routing table
	* so it is represented in rttrash.. remove that now.
	*/
	V_rttrash--;
	#ifdef DIAGNOSTIC
	if (rt->rt_refcnt < 0) {
	printf("rtfree: %p not freed (neg refs)\n", rt);
	goto done;
	}
	#endif
	/*
	* release references on items we hold them on..
	* e.g other routes and ifaddrs.
	*/
	if (rt->rt_ifa)
	ifa_free(rt->rt_ifa);
	/*
	* The key is separatly alloc'd so free it (see rt_setgate()).
	* This also frees the gateway, as they are always malloc'd
	* together.
	*/
	Free(rt_key(rt));

	/*
	* and the rtentry itself of course
	*/
	RT_LOCK_DESTROY(rt);
	uma_zfree(V_rtzone, rt);
	return;
	}
	done:
	RT_UNLOCK(rt);
	}


	/*
	* Force a routing table entry to the specified
	* destination to go through the given gateway.
	* Normally called as a result of a routing redirect
	* message from the network layer.
	*/
	void
	rtredirect(struct sockaddr *dst,
	struct sockaddr *gateway,
	struct sockaddr *netmask,
	int flags,
	struct sockaddr *src)
	{
	rtredirect_fib(dst, gateway, netmask, flags, src, 0);
	}

	void
	rtredirect_fib(struct sockaddr *dst,
	struct sockaddr *gateway,
	struct sockaddr *netmask,
	int flags,
	struct sockaddr *src,
	u_int fibnum)
	{
	struct rtentry rt, rt0 = NULL;
	int error = 0;
	short *stat = NULL;
	struct rt_addrinfo info;
	struct ifaddr *ifa;
	struct radix_node_head *rnh;

	ifa = NULL;
	rnh = rt_tables_get_rnh(fibnum, dst->sa_family);
	if (rnh == NULL) {
	error = EAFNOSUPPORT;
	goto out;
	}

	/* verify the gateway is directly reachable */
	- if ((ifa = ifa_ifwithnet(gateway)) == NULL) {
	+ if ((ifa = ifa_ifwithnet(gateway, 0)) == NULL) {
	error = ENETUNREACH;
	goto out;
	}
	rt = rtalloc1_fib(dst, 0, 0UL, fibnum); /* NB: rt is locked */
	/*
	* If the redirect isn't from our current router for this dst,
	* it's either old or wrong. If it redirects us to ourselves,
	* we have a routing loop, perhaps as a result of an interface
	* going down recently.
	*/
	if (!(flags & RTF_DONE) && rt &&
	(!sa_equal(src, rt->rt_gateway) \|\| rt->rt_ifa != ifa))
	error = EINVAL;
	else if (ifa_ifwithaddr_check(gateway))
	error = EHOSTUNREACH;
	if (error)
	goto done;
	/*
	* Create a new entry if we just got back a wildcard entry
	* or the the lookup failed. This is necessary for hosts
	* which use routing redirects generated by smart gateways
	* to dynamically build the routing tables.
	*/
	if (rt == NULL \|\| (rt_mask(rt) && rt_mask(rt)->sa_len < 2))
	goto create;
	/*
	* Don't listen to the redirect if it's
	* for a route to an interface.
	*/
	if (rt->rt_flags & RTF_GATEWAY) {
	if (((rt->rt_flags & RTF_HOST) == 0) && (flags & RTF_HOST)) {
	/*
	* Changing from route to net => route to host.
	* Create new route, rather than smashing route to net.
	*/
	create:
	rt0 = rt;
	rt = NULL;

	flags \|= RTF_GATEWAY \| RTF_DYNAMIC;
	bzero((caddr_t)&info, sizeof(info));
	info.rti_info[RTAX_DST] = dst;
	info.rti_info[RTAX_GATEWAY] = gateway;
	info.rti_info[RTAX_NETMASK] = netmask;
	info.rti_ifa = ifa;
	info.rti_flags = flags;
	if (rt0 != NULL)
	RT_UNLOCK(rt0); /* drop lock to avoid LOR with RNH */
	error = rtrequest1_fib(RTM_ADD, &info, &rt, fibnum);
	if (rt != NULL) {
	RT_LOCK(rt);
	if (rt0 != NULL)
	EVENTHANDLER_INVOKE(route_redirect_event, rt0, rt, dst);
	flags = rt->rt_flags;
	}
	if (rt0 != NULL)
	RTFREE(rt0);

	stat = &V_rtstat.rts_dynamic;
	} else {
	struct rtentry *gwrt;

	/*
	* Smash the current notion of the gateway to
	* this destination. Should check about netmask!!!
	*/
	rt->rt_flags \|= RTF_MODIFIED;
	flags \|= RTF_MODIFIED;
	stat = &V_rtstat.rts_newgateway;
	/*
	* add the key and gateway (in one malloc'd chunk).
	*/
	RT_UNLOCK(rt);
	RADIX_NODE_HEAD_LOCK(rnh);
	RT_LOCK(rt);
	rt_setgate(rt, rt_key(rt), gateway);
	gwrt = rtalloc1(gateway, 1, RTF_RNH_LOCKED);
	RADIX_NODE_HEAD_UNLOCK(rnh);
	EVENTHANDLER_INVOKE(route_redirect_event, rt, gwrt, dst);
	RTFREE_LOCKED(gwrt);
	}
	} else
	error = EHOSTUNREACH;
	done:
	if (rt)
	RTFREE_LOCKED(rt);
	out:
	if (error)
	V_rtstat.rts_badredirect++;
	else if (stat != NULL)
	(*stat)++;
	bzero((caddr_t)&info, sizeof(info));
	info.rti_info[RTAX_DST] = dst;
	info.rti_info[RTAX_GATEWAY] = gateway;
	info.rti_info[RTAX_NETMASK] = netmask;
	info.rti_info[RTAX_AUTHOR] = src;
	rt_missmsg(RTM_REDIRECT, &info, flags, error);
	if (ifa != NULL)
	ifa_free(ifa);
	}

	int
	rtioctl(u_long req, caddr_t data)
	{
	return (rtioctl_fib(req, data, 0));
	}

	/*
	* Routing table ioctl interface.
	*/
	int
	rtioctl_fib(u_long req, caddr_t data, u_int fibnum)
	{

	/*
	* If more ioctl commands are added here, make sure the proper
	* super-user checks are being performed because it is possible for
	* prison-root to make it this far if raw sockets have been enabled
	* in jails.
	*/
	#ifdef INET
	/* Multicast goop, grrr... */
	return mrt_ioctl ? mrt_ioctl(req, data, fibnum) : EOPNOTSUPP;
	#else /* INET */
	return ENXIO;
	#endif /* INET */
	}

	/*
	* For both ifa_ifwithroute() routines, 'ifa' is returned referenced.
	*/
	struct ifaddr *
	ifa_ifwithroute(int flags, struct sockaddr dst, struct sockaddr gateway)
	{
	return (ifa_ifwithroute_fib(flags, dst, gateway, 0));
	}

	struct ifaddr *
	ifa_ifwithroute_fib(int flags, struct sockaddr dst, struct sockaddr gateway,
	u_int fibnum)
	{
	register struct ifaddr *ifa;
	int not_found = 0;

	if ((flags & RTF_GATEWAY) == 0) {
	/*
	* If we are adding a route to an interface,
	* and the interface is a pt to pt link
	* we should search for the destination
	* as our clue to the interface. Otherwise
	* we can use the local address.
	*/
	ifa = NULL;
	if (flags & RTF_HOST)
	ifa = ifa_ifwithdstaddr(dst);
	if (ifa == NULL)
	ifa = ifa_ifwithaddr(gateway);
	} else {
	/*
	* If we are adding a route to a remote net
	* or host, the gateway may still be on the
	* other end of a pt to pt link.
	*/
	ifa = ifa_ifwithdstaddr(gateway);
	}
	if (ifa == NULL)
	- ifa = ifa_ifwithnet(gateway);
	+ ifa = ifa_ifwithnet(gateway, 0);
	if (ifa == NULL) {
	struct rtentry *rt = rtalloc1_fib(gateway, 0, RTF_RNH_LOCKED, fibnum);
	if (rt == NULL)
	return (NULL);
	/*
	* dismiss a gateway that is reachable only
	* through the default router
	*/
	switch (gateway->sa_family) {
	case AF_INET:
	if (satosin(rt_key(rt))->sin_addr.s_addr == INADDR_ANY)
	not_found = 1;
	break;
	case AF_INET6:
	if (IN6_IS_ADDR_UNSPECIFIED(&satosin6(rt_key(rt))->sin6_addr))
	not_found = 1;
	break;
	default:
	break;
	}
	if (!not_found && rt->rt_ifa != NULL) {
	ifa = rt->rt_ifa;
	ifa_ref(ifa);
	}
	RT_REMREF(rt);
	RT_UNLOCK(rt);
	if (not_found \|\| ifa == NULL)
	return (NULL);
	}
	if (ifa->ifa_addr->sa_family != dst->sa_family) {
	struct ifaddr *oifa = ifa;
	ifa = ifaof_ifpforaddr(dst, ifa->ifa_ifp);
	if (ifa == NULL)
	ifa = oifa;
	else
	ifa_free(oifa);
	}
	return (ifa);
	}

	/*
	* Do appropriate manipulations of a routing tree given
	* all the bits of info needed
	*/
	int
	rtrequest(int req,
	struct sockaddr *dst,
	struct sockaddr *gateway,
	struct sockaddr *netmask,
	int flags,
	struct rtentry **ret_nrt)
	{
	return (rtrequest_fib(req, dst, gateway, netmask, flags, ret_nrt, 0));
	}

	int
	rtrequest_fib(int req,
	struct sockaddr *dst,
	struct sockaddr *gateway,
	struct sockaddr *netmask,
	int flags,
	struct rtentry **ret_nrt,
	u_int fibnum)
	{
	struct rt_addrinfo info;

	if (dst->sa_len == 0)
	return(EINVAL);

	bzero((caddr_t)&info, sizeof(info));
	info.rti_flags = flags;
	info.rti_info[RTAX_DST] = dst;
	info.rti_info[RTAX_GATEWAY] = gateway;
	info.rti_info[RTAX_NETMASK] = netmask;
	return rtrequest1_fib(req, &info, ret_nrt, fibnum);
	}

	/*
	* These (questionable) definitions of apparent local variables apply
	* to the next two functions. XXXXXX!!!
	*/
	#define dst info->rti_info[RTAX_DST]
	#define gateway info->rti_info[RTAX_GATEWAY]
	#define netmask info->rti_info[RTAX_NETMASK]
	#define ifaaddr info->rti_info[RTAX_IFA]
	#define ifpaddr info->rti_info[RTAX_IFP]
	#define flags info->rti_flags

	int
	rt_getifa(struct rt_addrinfo *info)
	{
	return (rt_getifa_fib(info, 0));
	}

	/*
	* Look up rt_addrinfo for a specific fib. Note that if rti_ifa is defined,
	* it will be referenced so the caller must free it.
	*/
	int
	rt_getifa_fib(struct rt_addrinfo *info, u_int fibnum)
	{
	struct ifaddr *ifa;
	int error = 0;

	/*
	* ifp may be specified by sockaddr_dl
	* when protocol address is ambiguous.
	*/
	if (info->rti_ifp == NULL && ifpaddr != NULL &&
	ifpaddr->sa_family == AF_LINK &&
	- (ifa = ifa_ifwithnet(ifpaddr)) != NULL) {
	+ (ifa = ifa_ifwithnet(ifpaddr, 0)) != NULL) {
	info->rti_ifp = ifa->ifa_ifp;
	ifa_free(ifa);
	}
	if (info->rti_ifa == NULL && ifaaddr != NULL)
	info->rti_ifa = ifa_ifwithaddr(ifaaddr);
	if (info->rti_ifa == NULL) {
	struct sockaddr *sa;

	sa = ifaaddr != NULL ? ifaaddr :
	(gateway != NULL ? gateway : dst);
	if (sa != NULL && info->rti_ifp != NULL)
	info->rti_ifa = ifaof_ifpforaddr(sa, info->rti_ifp);
	else if (dst != NULL && gateway != NULL)
	info->rti_ifa = ifa_ifwithroute_fib(flags, dst, gateway,
	fibnum);
	else if (sa != NULL)
	info->rti_ifa = ifa_ifwithroute_fib(flags, sa, sa,
	fibnum);
	}
	if ((ifa = info->rti_ifa) != NULL) {
	if (info->rti_ifp == NULL)
	info->rti_ifp = ifa->ifa_ifp;
	} else
	error = ENETUNREACH;
	return (error);
	}

	/*
	* Expunges references to a route that's about to be reclaimed.
	* The route must be locked.
	*/
	int
	rtexpunge(struct rtentry *rt)
	{
	#if !defined(RADIX_MPATH)
	struct radix_node *rn;
	#else
	struct rt_addrinfo info;
	int fib;
	struct rtentry *rt0;
	#endif
	struct radix_node_head *rnh;
	struct ifaddr *ifa;
	int error = 0;

	/*
	* Find the correct routing tree to use for this Address Family
	*/
	rnh = rt_tables_get_rnh(rt->rt_fibnum, rt_key(rt)->sa_family);
	RT_LOCK_ASSERT(rt);
	if (rnh == NULL)
	return (EAFNOSUPPORT);
	RADIX_NODE_HEAD_LOCK_ASSERT(rnh);

	#ifdef RADIX_MPATH
	fib = rt->rt_fibnum;
	bzero(&info, sizeof(info));
	info.rti_ifp = rt->rt_ifp;
	info.rti_flags = RTF_RNH_LOCKED;
	info.rti_info[RTAX_DST] = rt_key(rt);
	info.rti_info[RTAX_GATEWAY] = rt->rt_ifa->ifa_addr;

	RT_UNLOCK(rt);
	error = rtrequest1_fib(RTM_DELETE, &info, &rt0, fib);

	if (error == 0 && rt0 != NULL) {
	rt = rt0;
	RT_LOCK(rt);
	} else if (error != 0) {
	RT_LOCK(rt);
	return (error);
	}
	#else
	/*
	* Remove the item from the tree; it should be there,
	* but when callers invoke us blindly it may not (sigh).
	*/
	rn = rnh->rnh_deladdr(rt_key(rt), rt_mask(rt), rnh);
	if (rn == NULL) {
	error = ESRCH;
	goto bad;
	}
	KASSERT((rn->rn_flags & (RNF_ACTIVE \| RNF_ROOT)) == 0,
	("unexpected flags 0x%x", rn->rn_flags));
	KASSERT(rt == RNTORT(rn),
	("lookup mismatch, rt %p rn %p", rt, rn));
	#endif /* RADIX_MPATH */

	rt->rt_flags &= ~RTF_UP;

	/*
	* Give the protocol a chance to keep things in sync.
	*/
	if ((ifa = rt->rt_ifa) && ifa->ifa_rtrequest) {
	struct rt_addrinfo info;

	bzero((caddr_t)&info, sizeof(info));
	info.rti_flags = rt->rt_flags;
	info.rti_info[RTAX_DST] = rt_key(rt);
	info.rti_info[RTAX_GATEWAY] = rt->rt_gateway;
	info.rti_info[RTAX_NETMASK] = rt_mask(rt);
	ifa->ifa_rtrequest(RTM_DELETE, rt, &info);
	}

	/*
	* one more rtentry floating around that is not
	* linked to the routing table.
	*/
	V_rttrash++;
	#if !defined(RADIX_MPATH)
	bad:
	#endif
	return (error);
	}

	#ifdef RADIX_MPATH
	static int
	rn_mpath_update(int req, struct rt_addrinfo *info,
	struct radix_node_head rnh, struct rtentry *ret_nrt)
	{
	/*
	* if we got multipath routes, we require users to specify
	* a matching RTAX_GATEWAY.
	*/
	struct rtentry rt, rto = NULL;
	register struct radix_node *rn;
	int error = 0;

	rn = rnh->rnh_matchaddr(dst, rnh);
	if (rn == NULL)
	return (ESRCH);
	rto = rt = RNTORT(rn);
	rt = rt_mpath_matchgate(rt, gateway);
	if (rt == NULL)
	return (ESRCH);
	/*
	* this is the first entry in the chain
	*/
	if (rto == rt) {
	rn = rn_mpath_next((struct radix_node *)rt);
	/*
	* there is another entry, now it's active
	*/
	if (rn) {
	rto = RNTORT(rn);
	RT_LOCK(rto);
	rto->rt_flags \|= RTF_UP;
	RT_UNLOCK(rto);
	} else if (rt->rt_flags & RTF_GATEWAY) {
	/*
	* For gateway routes, we need to
	* make sure that we we are deleting
	* the correct gateway.
	* rt_mpath_matchgate() does not
	* check the case when there is only
	* one route in the chain.
	*/
	if (gateway &&
	(rt->rt_gateway->sa_len != gateway->sa_len \|\|
	memcmp(rt->rt_gateway, gateway, gateway->sa_len)))
	error = ESRCH;
	else {
	/*
	* remove from tree before returning it
	* to the caller
	*/
	rn = rnh->rnh_deladdr(dst, netmask, rnh);
	KASSERT(rt == RNTORT(rn), ("radix node disappeared"));
	goto gwdelete;
	}

	}
	/*
	* use the normal delete code to remove
	* the first entry
	*/
	if (req != RTM_DELETE)
	goto nondelete;

	error = ENOENT;
	goto done;
	}

	/*
	* if the entry is 2nd and on up
	*/
	if ((req == RTM_DELETE) && !rt_mpath_deldup(rto, rt))
	panic ("rtrequest1: rt_mpath_deldup");
	gwdelete:
	RT_LOCK(rt);
	RT_ADDREF(rt);
	if (req == RTM_DELETE) {
	rt->rt_flags &= ~RTF_UP;
	/*
	* One more rtentry floating around that is not
	* linked to the routing table. rttrash will be decremented
	* when RTFREE(rt) is eventually called.
	*/
	V_rttrash++;
	}

	nondelete:
	if (req != RTM_DELETE)
	panic("unrecognized request %d", req);


	/*
	* If the caller wants it, then it can have it,
	* but it's up to it to free the rtentry as we won't be
	* doing it.
	*/
	if (ret_nrt) {
	*ret_nrt = rt;
	RT_UNLOCK(rt);
	} else
	RTFREE_LOCKED(rt);
	done:
	return (error);
	}
	#endif

	int
	rtrequest1_fib(int req, struct rt_addrinfo info, struct rtentry *ret_nrt,
	u_int fibnum)
	{
	int error = 0, needlock = 0;
	register struct rtentry *rt;
	#ifdef FLOWTABLE
	register struct rtentry *rt0;
	#endif
	register struct radix_node *rn;
	register struct radix_node_head *rnh;
	struct ifaddr *ifa;
	struct sockaddr *ndst;
	#define senderr(x) { error = x ; goto bad; }

	KASSERT((fibnum < rt_numfibs), ("rtrequest1_fib: bad fibnum"));
	if (dst->sa_family != AF_INET) /* Only INET supports > 1 fib now */
	fibnum = 0;
	/*
	* Find the correct routing tree to use for this Address Family
	*/
	rnh = rt_tables_get_rnh(fibnum, dst->sa_family);
	if (rnh == NULL)
	return (EAFNOSUPPORT);
	needlock = ((flags & RTF_RNH_LOCKED) == 0);
	flags &= ~RTF_RNH_LOCKED;
	if (needlock)
	RADIX_NODE_HEAD_LOCK(rnh);
	else
	RADIX_NODE_HEAD_LOCK_ASSERT(rnh);
	/*
	* If we are adding a host route then we don't want to put
	* a netmask in the tree, nor do we want to clone it.
	*/
	if (flags & RTF_HOST)
	netmask = NULL;

	switch (req) {
	case RTM_DELETE:
	#ifdef RADIX_MPATH
	if (rn_mpath_capable(rnh)) {
	error = rn_mpath_update(req, info, rnh, ret_nrt);
	/*
	* "bad" holds true for the success case
	* as well
	*/
	if (error != ENOENT)
	goto bad;
	error = 0;
	}
	#endif
	/*
	* Remove the item from the tree and return it.
	* Complain if it is not there and do no more processing.
	*/
	rn = rnh->rnh_deladdr(dst, netmask, rnh);
	if (rn == NULL)
	senderr(ESRCH);
	if (rn->rn_flags & (RNF_ACTIVE \| RNF_ROOT))
	panic ("rtrequest delete");
	rt = RNTORT(rn);
	RT_LOCK(rt);
	RT_ADDREF(rt);
	rt->rt_flags &= ~RTF_UP;

	/*
	* give the protocol a chance to keep things in sync.
	*/
	if ((ifa = rt->rt_ifa) && ifa->ifa_rtrequest)
	ifa->ifa_rtrequest(RTM_DELETE, rt, info);

	/*
	* One more rtentry floating around that is not
	* linked to the routing table. rttrash will be decremented
	* when RTFREE(rt) is eventually called.
	*/
	V_rttrash++;

	/*
	* If the caller wants it, then it can have it,
	* but it's up to it to free the rtentry as we won't be
	* doing it.
	*/
	if (ret_nrt) {
	*ret_nrt = rt;
	RT_UNLOCK(rt);
	} else
	RTFREE_LOCKED(rt);
	break;
	case RTM_RESOLVE:
	/*
	* resolve was only used for route cloning
	* here for compat
	*/
	break;
	case RTM_ADD:
	if ((flags & RTF_GATEWAY) && !gateway)
	senderr(EINVAL);
	if (dst && gateway && (dst->sa_family != gateway->sa_family) &&
	(gateway->sa_family != AF_UNSPEC) && (gateway->sa_family != AF_LINK))
	senderr(EINVAL);

	if (info->rti_ifa == NULL) {
	error = rt_getifa_fib(info, fibnum);
	if (error)
	senderr(error);
	} else
	ifa_ref(info->rti_ifa);
	ifa = info->rti_ifa;
	rt = uma_zalloc(V_rtzone, M_NOWAIT \| M_ZERO);
	if (rt == NULL) {
	if (ifa != NULL)
	ifa_free(ifa);
	senderr(ENOBUFS);
	}
	RT_LOCK_INIT(rt);
	rt->rt_flags = RTF_UP \| flags;
	rt->rt_fibnum = fibnum;
	/*
	* Add the gateway. Possibly re-malloc-ing the storage for it
	*
	*/
	RT_LOCK(rt);
	if ((error = rt_setgate(rt, dst, gateway)) != 0) {
	RT_LOCK_DESTROY(rt);
	if (ifa != NULL)
	ifa_free(ifa);
	uma_zfree(V_rtzone, rt);
	senderr(error);
	}

	/*
	* point to the (possibly newly malloc'd) dest address.
	*/
	ndst = (struct sockaddr *)rt_key(rt);

	/*
	* make sure it contains the value we want (masked if needed).
	*/
	if (netmask) {
	rt_maskedcopy(dst, ndst, netmask);
	} else
	bcopy(dst, ndst, dst->sa_len);

	/*
	* We use the ifa reference returned by rt_getifa_fib().
	* This moved from below so that rnh->rnh_addaddr() can
	* examine the ifa and ifa->ifa_ifp if it so desires.
	*/
	rt->rt_ifa = ifa;
	rt->rt_ifp = ifa->ifa_ifp;
	rt->rt_rmx.rmx_weight = 1;

	#ifdef RADIX_MPATH
	/* do not permit exactly the same dst/mask/gw pair */
	if (rn_mpath_capable(rnh) &&
	rt_mpath_conflict(rnh, rt, netmask)) {
	if (rt->rt_ifa) {
	ifa_free(rt->rt_ifa);
	}
	Free(rt_key(rt));
	RT_LOCK_DESTROY(rt);
	uma_zfree(V_rtzone, rt);
	senderr(EEXIST);
	}
	#endif

	#ifdef FLOWTABLE
	rt0 = NULL;
	/* XXX
	* "flow-table" only support IPv4 at the moment.
	*/
	#ifdef INET
	if (dst->sa_family == AF_INET) {
	rn = rnh->rnh_matchaddr(dst, rnh);
	if (rn && ((rn->rn_flags & RNF_ROOT) == 0)) {
	struct sockaddr *mask;
	u_char m, n;
	int len;

	/*
	* compare mask to see if the new route is
	* more specific than the existing one
	*/
	rt0 = RNTORT(rn);
	RT_LOCK(rt0);
	RT_ADDREF(rt0);
	RT_UNLOCK(rt0);
	/*
	* A host route is already present, so
	* leave the flow-table entries as is.
	*/
	if (rt0->rt_flags & RTF_HOST) {
	RTFREE(rt0);
	rt0 = NULL;
	} else if (!(flags & RTF_HOST) && netmask) {
	mask = rt_mask(rt0);
	len = mask->sa_len;
	m = (u_char *)mask;
	n = (u_char *)netmask;
	while (len-- > 0) {
	if (n != m)
	break;
	n++;
	m++;
	}
	if (len == 0 \|\| (n < m)) {
	RTFREE(rt0);
	rt0 = NULL;
	}
	}
	}
	}
	#endif
	#endif

	/* XXX mtu manipulation will be done in rnh_addaddr -- itojun */
	rn = rnh->rnh_addaddr(ndst, netmask, rnh, rt->rt_nodes);
	/*
	* If it still failed to go into the tree,
	* then un-make it (this should be a function)
	*/
	if (rn == NULL) {
	if (rt->rt_ifa)
	ifa_free(rt->rt_ifa);
	Free(rt_key(rt));
	RT_LOCK_DESTROY(rt);
	uma_zfree(V_rtzone, rt);
	#ifdef FLOWTABLE
	if (rt0 != NULL)
	RTFREE(rt0);
	#endif
	senderr(EEXIST);
	}
	#ifdef FLOWTABLE
	else if (rt0 != NULL) {
	#ifdef INET
	flowtable_route_flush(V_ip_ft, rt0);
	#endif
	RTFREE(rt0);
	}
	#endif

	/*
	* If this protocol has something to add to this then
	* allow it to do that as well.
	*/
	if (ifa->ifa_rtrequest)
	ifa->ifa_rtrequest(req, rt, info);

	/*
	* actually return a resultant rtentry and
	* give the caller a single reference.
	*/
	if (ret_nrt) {
	*ret_nrt = rt;
	RT_ADDREF(rt);
	}
	RT_UNLOCK(rt);
	break;
	default:
	error = EOPNOTSUPP;
	}
	bad:
	if (needlock)
	RADIX_NODE_HEAD_UNLOCK(rnh);
	return (error);
	#undef senderr
	}

	#undef dst
	#undef gateway
	#undef netmask
	#undef ifaaddr
	#undef ifpaddr
	#undef flags

	int
	rt_setgate(struct rtentry rt, struct sockaddr dst, struct sockaddr *gate)
	{
	/* XXX dst may be overwritten, can we move this to below */
	int dlen = SA_SIZE(dst), glen = SA_SIZE(gate);
	#ifdef INVARIANTS
	struct radix_node_head *rnh;

	rnh = rt_tables_get_rnh(rt->rt_fibnum, dst->sa_family);
	#endif

	RT_LOCK_ASSERT(rt);
	RADIX_NODE_HEAD_LOCK_ASSERT(rnh);

	/*
	* Prepare to store the gateway in rt->rt_gateway.
	* Both dst and gateway are stored one after the other in the same
	* malloc'd chunk. If we have room, we can reuse the old buffer,
	* rt_gateway already points to the right place.
	* Otherwise, malloc a new block and update the 'dst' address.
	*/
	if (rt->rt_gateway == NULL \|\| glen > SA_SIZE(rt->rt_gateway)) {
	caddr_t new;

	R_Malloc(new, caddr_t, dlen + glen);
	if (new == NULL)
	return ENOBUFS;
	/*
	* XXX note, we copy from dst and not rt_key(rt) because
	* rt_setgate() can be called to initialize a newly
	* allocated route entry, in which case rt_key(rt) == NULL
	* (and also rt->rt_gateway == NULL).
	* Free()/free() handle a NULL argument just fine.
	*/
	bcopy(dst, new, dlen);
	Free(rt_key(rt)); /* free old block, if any */
	rt_key(rt) = (struct sockaddr *)new;
	rt->rt_gateway = (struct sockaddr *)(new + dlen);
	}

	/*
	* Copy the new gateway value into the memory chunk.
	*/
	bcopy(gate, rt->rt_gateway, glen);

	return (0);
	}

	void
	rt_maskedcopy(struct sockaddr src, struct sockaddr dst, struct sockaddr *netmask)
	{
	register u_char cp1 = (u_char )src;
	register u_char cp2 = (u_char )dst;
	register u_char cp3 = (u_char )netmask;
	u_char cplim = cp2 + cp3;
	u_char cplim2 = cp2 + cp1;

	cp2++ = cp1++; cp2++ = cp1++; /* copies sa_len & sa_family */
	cp3 += 2;
	if (cplim > cplim2)
	cplim = cplim2;
	while (cp2 < cplim)
	cp2++ = cp1++ & *cp3++;
	if (cp2 < cplim2)
	bzero((caddr_t)cp2, (unsigned)(cplim2 - cp2));
	}

	/*
	* Set up a routing table entry, normally
	* for an interface.
	*/
	#define _SOCKADDR_TMPSIZE 128 /* Not too big.. kernel stack size is limited */
	static inline int
	rtinit1(struct ifaddr *ifa, int cmd, int flags, int fibnum)
	{
	struct sockaddr *dst;
	struct sockaddr *netmask;
	struct rtentry *rt = NULL;
	struct rt_addrinfo info;
	int error = 0;
	int startfib, endfib;
	char tempbuf[_SOCKADDR_TMPSIZE];
	int didwork = 0;
	int a_failure = 0;
	static struct sockaddr_dl null_sdl = {sizeof(null_sdl), AF_LINK};

	if (flags & RTF_HOST) {
	dst = ifa->ifa_dstaddr;
	netmask = NULL;
	} else {
	dst = ifa->ifa_addr;
	netmask = ifa->ifa_netmask;
	}
	if ( dst->sa_family != AF_INET)
	fibnum = 0;
	if (fibnum == -1) {
	if (rt_add_addr_allfibs == 0 && cmd == (int)RTM_ADD) {
	startfib = endfib = curthread->td_proc->p_fibnum;
	} else {
	startfib = 0;
	endfib = rt_numfibs - 1;
	}
	} else {
	KASSERT((fibnum < rt_numfibs), ("rtinit1: bad fibnum"));
	startfib = fibnum;
	endfib = fibnum;
	}
	if (dst->sa_len == 0)
	return(EINVAL);

	/*
	* If it's a delete, check that if it exists,
	* it's on the correct interface or we might scrub
	* a route to another ifa which would
	* be confusing at best and possibly worse.
	*/
	if (cmd == RTM_DELETE) {
	/*
	* It's a delete, so it should already exist..
	* If it's a net, mask off the host bits
	* (Assuming we have a mask)
	* XXX this is kinda inet specific..
	*/
	if (netmask != NULL) {
	rt_maskedcopy(dst, (struct sockaddr *)tempbuf, netmask);
	dst = (struct sockaddr *)tempbuf;
	}
	}
	/*
	* Now go through all the requested tables (fibs) and do the
	* requested action. Realistically, this will either be fib 0
	* for protocols that don't do multiple tables or all the
	* tables for those that do. XXX For this version only AF_INET.
	* When that changes code should be refactored to protocol
	* independent parts and protocol dependent parts.
	*/
	for ( fibnum = startfib; fibnum <= endfib; fibnum++) {
	if (cmd == RTM_DELETE) {
	struct radix_node_head *rnh;
	struct radix_node *rn;
	/*
	* Look up an rtentry that is in the routing tree and
	* contains the correct info.
	*/
	rnh = rt_tables_get_rnh(fibnum, dst->sa_family);
	if (rnh == NULL)
	/* this table doesn't exist but others might */
	continue;
	RADIX_NODE_HEAD_LOCK(rnh);
	#ifdef RADIX_MPATH
	if (rn_mpath_capable(rnh)) {

	rn = rnh->rnh_matchaddr(dst, rnh);
	if (rn == NULL)
	error = ESRCH;
	else {
	rt = RNTORT(rn);
	/*
	* for interface route the
	* rt->rt_gateway is sockaddr_intf
	* for cloning ARP entries, so
	* rt_mpath_matchgate must use the
	* interface address
	*/
	rt = rt_mpath_matchgate(rt,
	ifa->ifa_addr);
	if (!rt)
	error = ESRCH;
	}
	}
	else
	#endif
	rn = rnh->rnh_lookup(dst, netmask, rnh);
	error = (rn == NULL \|\|
	(rn->rn_flags & RNF_ROOT) \|\|
	RNTORT(rn)->rt_ifa != ifa \|\|
	!sa_equal((struct sockaddr *)rn->rn_key, dst));
	RADIX_NODE_HEAD_UNLOCK(rnh);
	if (error) {
	/* this is only an error if bad on ALL tables */
	continue;
	}
	}
	/*
	* Do the actual request
	*/
	bzero((caddr_t)&info, sizeof(info));
	info.rti_ifa = ifa;
	info.rti_flags = flags \| ifa->ifa_flags;
	info.rti_info[RTAX_DST] = dst;
	/*
	* doing this for compatibility reasons
	*/
	if (cmd == RTM_ADD)
	info.rti_info[RTAX_GATEWAY] =
	(struct sockaddr *)&null_sdl;
	else
	info.rti_info[RTAX_GATEWAY] = ifa->ifa_addr;
	info.rti_info[RTAX_NETMASK] = netmask;
	error = rtrequest1_fib(cmd, &info, &rt, fibnum);
	if (error == 0 && rt != NULL) {
	/*
	* notify any listening routing agents of the change
	*/
	RT_LOCK(rt);
	#ifdef RADIX_MPATH
	/*
	* in case address alias finds the first address
	* e.g. ifconfig bge0 192.103.54.246/24
	* e.g. ifconfig bge0 192.103.54.247/24
	* the address set in the route is 192.103.54.246
	* so we need to replace it with 192.103.54.247
	*/
	if (memcmp(rt->rt_ifa->ifa_addr,
	ifa->ifa_addr, ifa->ifa_addr->sa_len)) {
	ifa_free(rt->rt_ifa);
	ifa_ref(ifa);
	rt->rt_ifp = ifa->ifa_ifp;
	rt->rt_ifa = ifa;
	}
	#endif
	/*
	* doing this for compatibility reasons
	*/
	if (cmd == RTM_ADD) {
	((struct sockaddr_dl *)rt->rt_gateway)->sdl_type =
	rt->rt_ifp->if_type;
	((struct sockaddr_dl *)rt->rt_gateway)->sdl_index =
	rt->rt_ifp->if_index;
	}
	RT_ADDREF(rt);
	RT_UNLOCK(rt);
	rt_newaddrmsg(cmd, ifa, error, rt);
	RT_LOCK(rt);
	RT_REMREF(rt);
	if (cmd == RTM_DELETE) {
	/*
	* If we are deleting, and we found an entry,
	* then it's been removed from the tree..
	* now throw it away.
	*/
	RTFREE_LOCKED(rt);
	} else {
	if (cmd == RTM_ADD) {
	/*
	* We just wanted to add it..
	* we don't actually need a reference.
	*/
	RT_REMREF(rt);
	}
	RT_UNLOCK(rt);
	}
	didwork = 1;
	}
	if (error)
	a_failure = error;
	}
	if (cmd == RTM_DELETE) {
	if (didwork) {
	error = 0;
	} else {
	/* we only give an error if it wasn't in any table */
	error = ((flags & RTF_HOST) ?
	EHOSTUNREACH : ENETUNREACH);
	}
	} else {
	if (a_failure) {
	/* return an error if any of them failed */
	error = a_failure;
	}
	}
	return (error);
	}

	/* special one for inet internal use. may not use. */
	int
	rtinit_fib(struct ifaddr *ifa, int cmd, int flags)
	{
	return (rtinit1(ifa, cmd, flags, -1));
	}

	/*
	* Set up a routing table entry, normally
	* for an interface.
	*/
	int
	rtinit(struct ifaddr *ifa, int cmd, int flags)
	{
	struct sockaddr *dst;
	int fib = 0;

	if (flags & RTF_HOST) {
	dst = ifa->ifa_dstaddr;
	} else {
	dst = ifa->ifa_addr;
	}

	if (dst->sa_family == AF_INET)
	fib = -1;
	return (rtinit1(ifa, cmd, flags, fib));
	}
	Index: stable/8/sys/net/rtsock.c
	===================================================================
	--- stable/8/sys/net/rtsock.c (revision 209276)
	+++ stable/8/sys/net/rtsock.c (revision 209277)
	@@ -1,1683 +1,1694 @@
	/*-
	* Copyright (c) 1988, 1991, 1993
	* The Regents of the University of California. All rights reserved.
	*
	* Redistribution and use in source and binary forms, with or without
	* modification, are permitted provided that the following conditions
	* are met:
	* 1. Redistributions of source code must retain the above copyright
	* notice, this list of conditions and the following disclaimer.
	* 2. Redistributions in binary form must reproduce the above copyright
	* notice, this list of conditions and the following disclaimer in the
	* documentation and/or other materials provided with the distribution.
	* 4. Neither the name of the University nor the names of its contributors
	* may be used to endorse or promote products derived from this software
	* without specific prior written permission.
	*
	* THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
	* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
	* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
	* ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
	* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
	* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
	* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
	* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
	* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
	* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
	* SUCH DAMAGE.
	*
	* @(#)rtsock.c 8.7 (Berkeley) 10/12/95
	* $FreeBSD$
	*/
	#include "opt_compat.h"
	#include "opt_sctp.h"
	#include "opt_mpath.h"
	#include "opt_inet.h"
	#include "opt_inet6.h"

	#include <sys/param.h>
	#include <sys/jail.h>
	#include <sys/kernel.h>
	#include <sys/domain.h>
	#include <sys/lock.h>
	#include <sys/malloc.h>
	#include <sys/mbuf.h>
	#include <sys/priv.h>
	#include <sys/proc.h>
	#include <sys/protosw.h>
	#include <sys/rwlock.h>
	#include <sys/signalvar.h>
	#include <sys/socket.h>
	#include <sys/socketvar.h>
	#include <sys/sysctl.h>
	#include <sys/systm.h>

	#include <net/if.h>
	#include <net/if_dl.h>
	#include <net/if_llatbl.h>
	+#include <net/if_types.h>
	#include <net/netisr.h>
	#include <net/raw_cb.h>
	#include <net/route.h>
	#include <net/vnet.h>

	#include <netinet/in.h>
	#include <netinet/if_ether.h>
	#ifdef INET6
	#include <netinet6/scope6_var.h>
	#endif

	#if defined(INET) \|\| defined(INET6)
	#ifdef SCTP
	extern void sctp_addr_change(struct ifaddr *ifa, int cmd);
	#endif /* SCTP */
	#endif

	#ifdef COMPAT_FREEBSD32
	#include <sys/mount.h>
	#include <compat/freebsd32/freebsd32.h>

	struct if_data32 {
	uint8_t ifi_type;
	uint8_t ifi_physical;
	uint8_t ifi_addrlen;
	uint8_t ifi_hdrlen;
	uint8_t ifi_link_state;
	uint8_t ifi_spare_char1;
	uint8_t ifi_spare_char2;
	uint8_t ifi_datalen;
	uint32_t ifi_mtu;
	uint32_t ifi_metric;
	uint32_t ifi_baudrate;
	uint32_t ifi_ipackets;
	uint32_t ifi_ierrors;
	uint32_t ifi_opackets;
	uint32_t ifi_oerrors;
	uint32_t ifi_collisions;
	uint32_t ifi_ibytes;
	uint32_t ifi_obytes;
	uint32_t ifi_imcasts;
	uint32_t ifi_omcasts;
	uint32_t ifi_iqdrops;
	uint32_t ifi_noproto;
	uint32_t ifi_hwassist;
	int32_t ifi_epoch;
	struct timeval32 ifi_lastchange;
	};

	struct if_msghdr32 {
	uint16_t ifm_msglen;
	uint8_t ifm_version;
	uint8_t ifm_type;
	int32_t ifm_addrs;
	int32_t ifm_flags;
	uint16_t ifm_index;
	struct if_data32 ifm_data;
	};
	#endif

	MALLOC_DEFINE(M_RTABLE, "routetbl", "routing tables");

	/* NB: these are not modified */
	static struct sockaddr route_src = { 2, PF_ROUTE, };
	static struct sockaddr sa_zero = { sizeof(sa_zero), AF_INET, };

	static struct {
	int ip_count; /* attached w/ AF_INET */
	int ip6_count; /* attached w/ AF_INET6 */
	int ipx_count; /* attached w/ AF_IPX */
	int any_count; /* total attached */
	} route_cb;

	struct mtx rtsock_mtx;
	MTX_SYSINIT(rtsock, &rtsock_mtx, "rtsock route_cb lock", MTX_DEF);

	#define RTSOCK_LOCK() mtx_lock(&rtsock_mtx)
	#define RTSOCK_UNLOCK() mtx_unlock(&rtsock_mtx)
	#define RTSOCK_LOCK_ASSERT() mtx_assert(&rtsock_mtx, MA_OWNED)

	SYSCTL_NODE(_net, OID_AUTO, route, CTLFLAG_RD, 0, "");

	struct walkarg {
	int w_tmemsize;
	int w_op, w_arg;
	caddr_t w_tmem;
	struct sysctl_req *w_req;
	};

	static void rts_input(struct mbuf *m);
	static struct mbuf rt_msg1(int type, struct rt_addrinfo rtinfo);
	static int rt_msg2(int type, struct rt_addrinfo *rtinfo,
	caddr_t cp, struct walkarg *w);
	static int rt_xaddrs(caddr_t cp, caddr_t cplim,
	struct rt_addrinfo *rtinfo);
	static int sysctl_dumpentry(struct radix_node rn, void vw);
	static int sysctl_iflist(int af, struct walkarg *w);
	static int sysctl_ifmalist(int af, struct walkarg *w);
	static int route_output(struct mbuf m, struct socket so);
	static void rt_setmetrics(u_long which, const struct rt_metrics *in,
	struct rt_metrics_lite *out);
	static void rt_getmetrics(const struct rt_metrics_lite *in,
	struct rt_metrics *out);
	static void rt_dispatch(struct mbuf , const struct sockaddr );

	static struct netisr_handler rtsock_nh = {
	.nh_name = "rtsock",
	.nh_handler = rts_input,
	.nh_proto = NETISR_ROUTE,
	.nh_policy = NETISR_POLICY_SOURCE,
	};

	static int
	sysctl_route_netisr_maxqlen(SYSCTL_HANDLER_ARGS)
	{
	int error, qlimit;

	netisr_getqlimit(&rtsock_nh, &qlimit);
	error = sysctl_handle_int(oidp, &qlimit, 0, req);
	if (error \|\| !req->newptr)
	return (error);
	if (qlimit < 1)
	return (EINVAL);
	return (netisr_setqlimit(&rtsock_nh, qlimit));
	}
	SYSCTL_PROC(_net_route, OID_AUTO, netisr_maxqlen, CTLTYPE_INT\|CTLFLAG_RW,
	0, 0, sysctl_route_netisr_maxqlen, "I",
	"maximum routing socket dispatch queue length");

	static void
	rts_init(void)
	{
	int tmp;

	if (TUNABLE_INT_FETCH("net.route.netisr_maxqlen", &tmp))
	rtsock_nh.nh_qlimit = tmp;
	netisr_register(&rtsock_nh);
	}
	SYSINIT(rtsock, SI_SUB_PROTO_DOMAIN, SI_ORDER_THIRD, rts_init, 0);

	static void
	rts_input(struct mbuf *m)
	{
	struct sockproto route_proto;
	unsigned short *family;
	struct m_tag *tag;

	route_proto.sp_family = PF_ROUTE;
	tag = m_tag_find(m, PACKET_TAG_RTSOCKFAM, NULL);
	if (tag != NULL) {
	family = (unsigned short *)(tag + 1);
	route_proto.sp_protocol = *family;
	m_tag_delete(m, tag);
	} else
	route_proto.sp_protocol = 0;

	raw_input(m, &route_proto, &route_src);
	}

	/*
	* It really doesn't make any sense at all for this code to share much
	* with raw_usrreq.c, since its functionality is so restricted. XXX
	*/
	static void
	rts_abort(struct socket *so)
	{

	raw_usrreqs.pru_abort(so);
	}

	static void
	rts_close(struct socket *so)
	{

	raw_usrreqs.pru_close(so);
	}

	/* pru_accept is EOPNOTSUPP */

	static int
	rts_attach(struct socket so, int proto, struct thread td)
	{
	struct rawcb *rp;
	int s, error;

	KASSERT(so->so_pcb == NULL, ("rts_attach: so_pcb != NULL"));

	/* XXX */
	rp = malloc(sizeof *rp, M_PCB, M_WAITOK \| M_ZERO);
	if (rp == NULL)
	return ENOBUFS;

	/*
	* The splnet() is necessary to block protocols from sending
	* error notifications (like RTM_REDIRECT or RTM_LOSING) while
	* this PCB is extant but incompletely initialized.
	* Probably we should try to do more of this work beforehand and
	* eliminate the spl.
	*/
	s = splnet();
	so->so_pcb = (caddr_t)rp;
	so->so_fibnum = td->td_proc->p_fibnum;
	error = raw_attach(so, proto);
	rp = sotorawcb(so);
	if (error) {
	splx(s);
	so->so_pcb = NULL;
	free(rp, M_PCB);
	return error;
	}
	RTSOCK_LOCK();
	switch(rp->rcb_proto.sp_protocol) {
	case AF_INET:
	route_cb.ip_count++;
	break;
	case AF_INET6:
	route_cb.ip6_count++;
	break;
	case AF_IPX:
	route_cb.ipx_count++;
	break;
	}
	route_cb.any_count++;
	RTSOCK_UNLOCK();
	soisconnected(so);
	so->so_options \|= SO_USELOOPBACK;
	splx(s);
	return 0;
	}

	static int
	rts_bind(struct socket so, struct sockaddr nam, struct thread *td)
	{

	return (raw_usrreqs.pru_bind(so, nam, td)); /* xxx just EINVAL */
	}

	static int
	rts_connect(struct socket so, struct sockaddr nam, struct thread *td)
	{

	return (raw_usrreqs.pru_connect(so, nam, td)); /* XXX just EINVAL */
	}

	/* pru_connect2 is EOPNOTSUPP */
	/* pru_control is EOPNOTSUPP */

	static void
	rts_detach(struct socket *so)
	{
	struct rawcb *rp = sotorawcb(so);

	KASSERT(rp != NULL, ("rts_detach: rp == NULL"));

	RTSOCK_LOCK();
	switch(rp->rcb_proto.sp_protocol) {
	case AF_INET:
	route_cb.ip_count--;
	break;
	case AF_INET6:
	route_cb.ip6_count--;
	break;
	case AF_IPX:
	route_cb.ipx_count--;
	break;
	}
	route_cb.any_count--;
	RTSOCK_UNLOCK();
	raw_usrreqs.pru_detach(so);
	}

	static int
	rts_disconnect(struct socket *so)
	{

	return (raw_usrreqs.pru_disconnect(so));
	}

	/* pru_listen is EOPNOTSUPP */

	static int
	rts_peeraddr(struct socket so, struct sockaddr *nam)
	{

	return (raw_usrreqs.pru_peeraddr(so, nam));
	}

	/* pru_rcvd is EOPNOTSUPP */
	/* pru_rcvoob is EOPNOTSUPP */

	static int
	rts_send(struct socket so, int flags, struct mbuf m, struct sockaddr *nam,
	struct mbuf control, struct thread td)
	{

	return (raw_usrreqs.pru_send(so, flags, m, nam, control, td));
	}

	/* pru_sense is null */

	static int
	rts_shutdown(struct socket *so)
	{

	return (raw_usrreqs.pru_shutdown(so));
	}

	static int
	rts_sockaddr(struct socket so, struct sockaddr *nam)
	{

	return (raw_usrreqs.pru_sockaddr(so, nam));
	}

	static struct pr_usrreqs route_usrreqs = {
	.pru_abort = rts_abort,
	.pru_attach = rts_attach,
	.pru_bind = rts_bind,
	.pru_connect = rts_connect,
	.pru_detach = rts_detach,
	.pru_disconnect = rts_disconnect,
	.pru_peeraddr = rts_peeraddr,
	.pru_send = rts_send,
	.pru_shutdown = rts_shutdown,
	.pru_sockaddr = rts_sockaddr,
	.pru_close = rts_close,
	};

	#ifndef _SOCKADDR_UNION_DEFINED
	#define _SOCKADDR_UNION_DEFINED
	/*
	* The union of all possible address formats we handle.
	*/
	union sockaddr_union {
	struct sockaddr sa;
	struct sockaddr_in sin;
	struct sockaddr_in6 sin6;
	};
	#endif /* _SOCKADDR_UNION_DEFINED */

	static int
	rtm_get_jailed(struct rt_addrinfo info, struct ifnet ifp,
	struct rtentry rt, union sockaddr_union saun, struct ucred *cred)
	{

	/* First, see if the returned address is part of the jail. */
	if (prison_if(cred, rt->rt_ifa->ifa_addr) == 0) {
	info->rti_info[RTAX_IFA] = rt->rt_ifa->ifa_addr;
	return (0);
	}

	switch (info->rti_info[RTAX_DST]->sa_family) {
	#ifdef INET
	case AF_INET:
	{
	struct in_addr ia;
	struct ifaddr *ifa;
	int found;

	found = 0;
	/*
	* Try to find an address on the given outgoing interface
	* that belongs to the jail.
	*/
	IF_ADDR_LOCK(ifp);
	TAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) {
	struct sockaddr *sa;
	sa = ifa->ifa_addr;
	if (sa->sa_family != AF_INET)
	continue;
	ia = ((struct sockaddr_in *)sa)->sin_addr;
	if (prison_check_ip4(cred, &ia) == 0) {
	found = 1;
	break;
	}
	}
	IF_ADDR_UNLOCK(ifp);
	if (!found) {
	/*
	* As a last resort return the 'default' jail address.
	*/
	ia = ((struct sockaddr_in *)rt->rt_ifa->ifa_addr)->
	sin_addr;
	if (prison_get_ip4(cred, &ia) != 0)
	return (ESRCH);
	}
	bzero(&saun->sin, sizeof(struct sockaddr_in));
	saun->sin.sin_len = sizeof(struct sockaddr_in);
	saun->sin.sin_family = AF_INET;
	saun->sin.sin_addr.s_addr = ia.s_addr;
	info->rti_info[RTAX_IFA] = (struct sockaddr *)&saun->sin;
	break;
	}
	#endif
	#ifdef INET6
	case AF_INET6:
	{
	struct in6_addr ia6;
	struct ifaddr *ifa;
	int found;

	found = 0;
	/*
	* Try to find an address on the given outgoing interface
	* that belongs to the jail.
	*/
	IF_ADDR_LOCK(ifp);
	TAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) {
	struct sockaddr *sa;
	sa = ifa->ifa_addr;
	if (sa->sa_family != AF_INET6)
	continue;
	bcopy(&((struct sockaddr_in6 *)sa)->sin6_addr,
	&ia6, sizeof(struct in6_addr));
	if (prison_check_ip6(cred, &ia6) == 0) {
	found = 1;
	break;
	}
	}
	IF_ADDR_UNLOCK(ifp);
	if (!found) {
	/*
	* As a last resort return the 'default' jail address.
	*/
	ia6 = ((struct sockaddr_in6 *)rt->rt_ifa->ifa_addr)->
	sin6_addr;
	if (prison_get_ip6(cred, &ia6) != 0)
	return (ESRCH);
	}
	bzero(&saun->sin6, sizeof(struct sockaddr_in6));
	saun->sin6.sin6_len = sizeof(struct sockaddr_in6);
	saun->sin6.sin6_family = AF_INET6;
	bcopy(&ia6, &saun->sin6.sin6_addr, sizeof(struct in6_addr));
	if (sa6_recoverscope(&saun->sin6) != 0)
	return (ESRCH);
	info->rti_info[RTAX_IFA] = (struct sockaddr *)&saun->sin6;
	break;
	}
	#endif
	default:
	return (ESRCH);
	}
	return (0);
	}

	/ARGSUSED/
	static int
	route_output(struct mbuf m, struct socket so)
	{
	#define sa_equal(a1, a2) (bcmp((a1), (a2), (a1)->sa_len) == 0)
	struct rt_msghdr *rtm = NULL;
	struct rtentry *rt = NULL;
	struct radix_node_head *rnh;
	struct rt_addrinfo info;
	int len, error = 0;
	struct ifnet *ifp = NULL;
	union sockaddr_union saun;

	#define senderr(e) { error = e; goto flush;}
	if (m == NULL \|\| ((m->m_len < sizeof(long)) &&
	(m = m_pullup(m, sizeof(long))) == NULL))
	return (ENOBUFS);
	if ((m->m_flags & M_PKTHDR) == 0)
	panic("route_output");
	len = m->m_pkthdr.len;
	if (len < sizeof(*rtm) \|\|
	len != mtod(m, struct rt_msghdr *)->rtm_msglen) {
	info.rti_info[RTAX_DST] = NULL;
	senderr(EINVAL);
	}
	R_Malloc(rtm, struct rt_msghdr *, len);
	if (rtm == NULL) {
	info.rti_info[RTAX_DST] = NULL;
	senderr(ENOBUFS);
	}
	m_copydata(m, 0, len, (caddr_t)rtm);
	if (rtm->rtm_version != RTM_VERSION) {
	info.rti_info[RTAX_DST] = NULL;
	senderr(EPROTONOSUPPORT);
	}
	rtm->rtm_pid = curproc->p_pid;
	bzero(&info, sizeof(info));
	info.rti_addrs = rtm->rtm_addrs;
	if (rt_xaddrs((caddr_t)(rtm + 1), len + (caddr_t)rtm, &info)) {
	info.rti_info[RTAX_DST] = NULL;
	senderr(EINVAL);
	}
	info.rti_flags = rtm->rtm_flags;
	if (info.rti_info[RTAX_DST] == NULL \|\|
	info.rti_info[RTAX_DST]->sa_family >= AF_MAX \|\|
	(info.rti_info[RTAX_GATEWAY] != NULL &&
	info.rti_info[RTAX_GATEWAY]->sa_family >= AF_MAX))
	senderr(EINVAL);
	/*
	* Verify that the caller has the appropriate privilege; RTM_GET
	* is the only operation the non-superuser is allowed.
	*/
	if (rtm->rtm_type != RTM_GET) {
	error = priv_check(curthread, PRIV_NET_ROUTE);
	if (error)
	senderr(error);
	}

	/*
	* The given gateway address may be an interface address.
	* For example, issuing a "route change" command on a route
	* entry that was created from a tunnel, and the gateway
	* address given is the local end point. In this case the
	* RTF_GATEWAY flag must be cleared or the destination will
	* not be reachable even though there is no error message.
	*/
	if (info.rti_info[RTAX_GATEWAY] != NULL &&
	info.rti_info[RTAX_GATEWAY]->sa_family != AF_LINK) {
	struct route gw_ro;

	bzero(&gw_ro, sizeof(gw_ro));
	gw_ro.ro_dst = *info.rti_info[RTAX_GATEWAY];
	rtalloc_ign_fib(&gw_ro, 0, so->so_fibnum);
	/*
	* A host route through the loopback interface is
	* installed for each interface adddress. In pre 8.0
	* releases the interface address of a PPP link type
	* is not reachable locally. This behavior is fixed as
	* part of the new L2/L3 redesign and rewrite work. The
	* signature of this interface address route is the
	* AF_LINK sa_family type of the rt_gateway, and the
	* rt_ifp has the IFF_LOOPBACK flag set.
	*/
	if (gw_ro.ro_rt != NULL &&
	gw_ro.ro_rt->rt_gateway->sa_family == AF_LINK &&
	gw_ro.ro_rt->rt_ifp->if_flags & IFF_LOOPBACK)
	info.rti_flags &= ~RTF_GATEWAY;
	if (gw_ro.ro_rt != NULL)
	RTFREE(gw_ro.ro_rt);
	}

	switch (rtm->rtm_type) {
	struct rtentry *saved_nrt;

	case RTM_ADD:
	if (info.rti_info[RTAX_GATEWAY] == NULL)
	senderr(EINVAL);
	saved_nrt = NULL;

	/* support for new ARP code */
	if (info.rti_info[RTAX_GATEWAY]->sa_family == AF_LINK &&
	(rtm->rtm_flags & RTF_LLDATA) != 0) {
	error = lla_rt_output(rtm, &info);
	break;
	}
	error = rtrequest1_fib(RTM_ADD, &info, &saved_nrt,
	so->so_fibnum);
	if (error == 0 && saved_nrt) {
	RT_LOCK(saved_nrt);
	rt_setmetrics(rtm->rtm_inits,
	&rtm->rtm_rmx, &saved_nrt->rt_rmx);
	rtm->rtm_index = saved_nrt->rt_ifp->if_index;
	RT_REMREF(saved_nrt);
	RT_UNLOCK(saved_nrt);
	}
	break;

	case RTM_DELETE:
	saved_nrt = NULL;
	/* support for new ARP code */
	if (info.rti_info[RTAX_GATEWAY] &&
	(info.rti_info[RTAX_GATEWAY]->sa_family == AF_LINK) &&
	(rtm->rtm_flags & RTF_LLDATA) != 0) {
	error = lla_rt_output(rtm, &info);
	break;
	}
	error = rtrequest1_fib(RTM_DELETE, &info, &saved_nrt,
	so->so_fibnum);
	if (error == 0) {
	RT_LOCK(saved_nrt);
	rt = saved_nrt;
	goto report;
	}
	break;

	case RTM_GET:
	case RTM_CHANGE:
	case RTM_LOCK:
	rnh = rt_tables_get_rnh(so->so_fibnum,
	info.rti_info[RTAX_DST]->sa_family);
	if (rnh == NULL)
	senderr(EAFNOSUPPORT);
	RADIX_NODE_HEAD_RLOCK(rnh);
	rt = (struct rtentry *) rnh->rnh_lookup(info.rti_info[RTAX_DST],
	info.rti_info[RTAX_NETMASK], rnh);
	if (rt == NULL) { /* XXX looks bogus */
	RADIX_NODE_HEAD_RUNLOCK(rnh);
	senderr(ESRCH);
	}
	#ifdef RADIX_MPATH
	/*
	* for RTM_CHANGE/LOCK, if we got multipath routes,
	* we require users to specify a matching RTAX_GATEWAY.
	*
	* for RTM_GET, gate is optional even with multipath.
	* if gate == NULL the first match is returned.
	* (no need to call rt_mpath_matchgate if gate == NULL)
	*/
	if (rn_mpath_capable(rnh) &&
	(rtm->rtm_type != RTM_GET \|\| info.rti_info[RTAX_GATEWAY])) {
	rt = rt_mpath_matchgate(rt, info.rti_info[RTAX_GATEWAY]);
	if (!rt) {
	RADIX_NODE_HEAD_RUNLOCK(rnh);
	senderr(ESRCH);
	}
	}
	#endif
	/*
	* If performing proxied L2 entry insertion, and
	* the actual PPP host entry is found, perform
	* another search to retrieve the prefix route of
	* the local end point of the PPP link.
	*/
	- if ((rtm->rtm_flags & RTF_ANNOUNCE) &&
	- (rt->rt_ifp->if_flags & IFF_POINTOPOINT)) {
	+ if (rtm->rtm_flags & RTF_ANNOUNCE) {
	struct sockaddr laddr;
	- rt_maskedcopy(rt->rt_ifa->ifa_addr,
	- &laddr,
	- rt->rt_ifa->ifa_netmask);
	+
	+ if (rt->rt_ifp != NULL &&
	+ rt->rt_ifp->if_type == IFT_PROPVIRTUAL) {
	+ struct ifaddr *ifa;
	+
	+ ifa = ifa_ifwithnet(info.rti_info[RTAX_DST], 1);
	+ if (ifa != NULL)
	+ rt_maskedcopy(ifa->ifa_addr,
	+ &laddr,
	+ ifa->ifa_netmask);
	+ } else
	+ rt_maskedcopy(rt->rt_ifa->ifa_addr,
	+ &laddr,
	+ rt->rt_ifa->ifa_netmask);
	/*
	* refactor rt and no lock operation necessary
	*/
	rt = (struct rtentry *)rnh->rnh_matchaddr(&laddr, rnh);
	if (rt == NULL) {
	RADIX_NODE_HEAD_RUNLOCK(rnh);
	senderr(ESRCH);
	}
	}
	RT_LOCK(rt);
	RT_ADDREF(rt);
	RADIX_NODE_HEAD_RUNLOCK(rnh);

	/*
	* Fix for PR: 82974
	*
	* RTM_CHANGE/LOCK need a perfect match, rn_lookup()
	* returns a perfect match in case a netmask is
	* specified. For host routes only a longest prefix
	* match is returned so it is necessary to compare the
	* existence of the netmask. If both have a netmask
	* rnh_lookup() did a perfect match and if none of them
	* have a netmask both are host routes which is also a
	* perfect match.
	*/

	if (rtm->rtm_type != RTM_GET &&
	(!rt_mask(rt) != !info.rti_info[RTAX_NETMASK])) {
	RT_UNLOCK(rt);
	senderr(ESRCH);
	}

	switch(rtm->rtm_type) {

	case RTM_GET:
	report:
	RT_LOCK_ASSERT(rt);
	if ((rt->rt_flags & RTF_HOST) == 0
	? jailed_without_vnet(curthread->td_ucred)
	: prison_if(curthread->td_ucred,
	rt_key(rt)) != 0) {
	RT_UNLOCK(rt);
	senderr(ESRCH);
	}
	info.rti_info[RTAX_DST] = rt_key(rt);
	info.rti_info[RTAX_GATEWAY] = rt->rt_gateway;
	info.rti_info[RTAX_NETMASK] = rt_mask(rt);
	info.rti_info[RTAX_GENMASK] = 0;
	if (rtm->rtm_addrs & (RTA_IFP \| RTA_IFA)) {
	ifp = rt->rt_ifp;
	if (ifp) {
	info.rti_info[RTAX_IFP] =
	ifp->if_addr->ifa_addr;
	error = rtm_get_jailed(&info, ifp, rt,
	&saun, curthread->td_ucred);
	if (error != 0) {
	RT_UNLOCK(rt);
	senderr(error);
	}
	if (ifp->if_flags & IFF_POINTOPOINT)
	info.rti_info[RTAX_BRD] =
	rt->rt_ifa->ifa_dstaddr;
	rtm->rtm_index = ifp->if_index;
	} else {
	info.rti_info[RTAX_IFP] = NULL;
	info.rti_info[RTAX_IFA] = NULL;
	}
	} else if ((ifp = rt->rt_ifp) != NULL) {
	rtm->rtm_index = ifp->if_index;
	}
	len = rt_msg2(rtm->rtm_type, &info, NULL, NULL);
	if (len > rtm->rtm_msglen) {
	struct rt_msghdr *new_rtm;
	R_Malloc(new_rtm, struct rt_msghdr *, len);
	if (new_rtm == NULL) {
	RT_UNLOCK(rt);
	senderr(ENOBUFS);
	}
	bcopy(rtm, new_rtm, rtm->rtm_msglen);
	Free(rtm); rtm = new_rtm;
	}
	(void)rt_msg2(rtm->rtm_type, &info, (caddr_t)rtm, NULL);
	rtm->rtm_flags = rt->rt_flags;
	rt_getmetrics(&rt->rt_rmx, &rtm->rtm_rmx);
	rtm->rtm_addrs = info.rti_addrs;
	break;

	case RTM_CHANGE:
	/*
	* New gateway could require new ifaddr, ifp;
	* flags may also be different; ifp may be specified
	* by ll sockaddr when protocol address is ambiguous
	*/
	if (((rt->rt_flags & RTF_GATEWAY) &&
	info.rti_info[RTAX_GATEWAY] != NULL) \|\|
	info.rti_info[RTAX_IFP] != NULL \|\|
	(info.rti_info[RTAX_IFA] != NULL &&
	!sa_equal(info.rti_info[RTAX_IFA],
	rt->rt_ifa->ifa_addr))) {
	RT_UNLOCK(rt);
	RADIX_NODE_HEAD_LOCK(rnh);
	error = rt_getifa_fib(&info, rt->rt_fibnum);
	/*
	* XXXRW: Really we should release this
	* reference later, but this maintains
	* historical behavior.
	*/
	if (info.rti_ifa != NULL)
	ifa_free(info.rti_ifa);
	RADIX_NODE_HEAD_UNLOCK(rnh);
	if (error != 0)
	senderr(error);
	RT_LOCK(rt);
	}
	if (info.rti_ifa != NULL &&
	info.rti_ifa != rt->rt_ifa &&
	rt->rt_ifa != NULL &&
	rt->rt_ifa->ifa_rtrequest != NULL) {
	rt->rt_ifa->ifa_rtrequest(RTM_DELETE, rt,
	&info);
	ifa_free(rt->rt_ifa);
	}
	if (info.rti_info[RTAX_GATEWAY] != NULL) {
	RT_UNLOCK(rt);
	RADIX_NODE_HEAD_LOCK(rnh);
	RT_LOCK(rt);

	error = rt_setgate(rt, rt_key(rt),
	info.rti_info[RTAX_GATEWAY]);
	RADIX_NODE_HEAD_UNLOCK(rnh);
	if (error != 0) {
	RT_UNLOCK(rt);
	senderr(error);
	}
	rt->rt_flags \|= (RTF_GATEWAY & info.rti_flags);
	}
	if (info.rti_ifa != NULL &&
	info.rti_ifa != rt->rt_ifa) {
	ifa_ref(info.rti_ifa);
	rt->rt_ifa = info.rti_ifa;
	rt->rt_ifp = info.rti_ifp;
	}
	/* Allow some flags to be toggled on change. */
	rt->rt_flags = (rt->rt_flags & ~RTF_FMASK) \|
	(rtm->rtm_flags & RTF_FMASK);
	rt_setmetrics(rtm->rtm_inits, &rtm->rtm_rmx,
	&rt->rt_rmx);
	rtm->rtm_index = rt->rt_ifp->if_index;
	if (rt->rt_ifa && rt->rt_ifa->ifa_rtrequest)
	rt->rt_ifa->ifa_rtrequest(RTM_ADD, rt, &info);
	/* FALLTHROUGH */
	case RTM_LOCK:
	/* We don't support locks anymore */
	break;
	}
	RT_UNLOCK(rt);
	break;

	default:
	senderr(EOPNOTSUPP);
	}

	flush:
	if (rtm) {
	if (error)
	rtm->rtm_errno = error;
	else
	rtm->rtm_flags \|= RTF_DONE;
	}
	if (rt) /* XXX can this be true? */
	RTFREE(rt);
	{
	struct rawcb *rp = NULL;
	/*
	* Check to see if we don't want our own messages.
	*/
	if ((so->so_options & SO_USELOOPBACK) == 0) {
	if (route_cb.any_count <= 1) {
	if (rtm)
	Free(rtm);
	m_freem(m);
	return (error);
	}
	/* There is another listener, so construct message */
	rp = sotorawcb(so);
	}
	if (rtm) {
	m_copyback(m, 0, rtm->rtm_msglen, (caddr_t)rtm);
	if (m->m_pkthdr.len < rtm->rtm_msglen) {
	m_freem(m);
	m = NULL;
	} else if (m->m_pkthdr.len > rtm->rtm_msglen)
	m_adj(m, rtm->rtm_msglen - m->m_pkthdr.len);
	Free(rtm);
	}
	if (m) {
	if (rp) {
	/*
	* XXX insure we don't get a copy by
	* invalidating our protocol
	*/
	unsigned short family = rp->rcb_proto.sp_family;
	rp->rcb_proto.sp_family = 0;
	rt_dispatch(m, info.rti_info[RTAX_DST]);
	rp->rcb_proto.sp_family = family;
	} else
	rt_dispatch(m, info.rti_info[RTAX_DST]);
	}
	}
	return (error);
	#undef sa_equal
	}

	static void
	rt_setmetrics(u_long which, const struct rt_metrics *in,
	struct rt_metrics_lite *out)
	{
	#define metric(f, e) if (which & (f)) out->e = in->e;
	/*
	* Only these are stored in the routing entry since introduction
	* of tcp hostcache. The rest is ignored.
	*/
	metric(RTV_MTU, rmx_mtu);
	metric(RTV_WEIGHT, rmx_weight);
	/* Userland -> kernel timebase conversion. */
	if (which & RTV_EXPIRE)
	out->rmx_expire = in->rmx_expire ?
	in->rmx_expire - time_second + time_uptime : 0;
	#undef metric
	}

	static void
	rt_getmetrics(const struct rt_metrics_lite in, struct rt_metrics out)
	{
	#define metric(e) out->e = in->e;
	bzero(out, sizeof(*out));
	metric(rmx_mtu);
	metric(rmx_weight);
	/* Kernel -> userland timebase conversion. */
	out->rmx_expire = in->rmx_expire ?
	in->rmx_expire - time_uptime + time_second : 0;
	#undef metric
	}

	/*
	* Extract the addresses of the passed sockaddrs.
	* Do a little sanity checking so as to avoid bad memory references.
	* This data is derived straight from userland.
	*/
	static int
	rt_xaddrs(caddr_t cp, caddr_t cplim, struct rt_addrinfo *rtinfo)
	{
	struct sockaddr *sa;
	int i;

	for (i = 0; i < RTAX_MAX && cp < cplim; i++) {
	if ((rtinfo->rti_addrs & (1 << i)) == 0)
	continue;
	sa = (struct sockaddr *)cp;
	/*
	* It won't fit.
	*/
	if (cp + sa->sa_len > cplim)
	return (EINVAL);
	/*
	* there are no more.. quit now
	* If there are more bits, they are in error.
	* I've seen this. route(1) can evidently generate these.
	* This causes kernel to core dump.
	* for compatibility, If we see this, point to a safe address.
	*/
	if (sa->sa_len == 0) {
	rtinfo->rti_info[i] = &sa_zero;
	return (0); /* should be EINVAL but for compat */
	}
	/* accept it */
	rtinfo->rti_info[i] = sa;
	cp += SA_SIZE(sa);
	}
	return (0);
	}

	static struct mbuf *
	rt_msg1(int type, struct rt_addrinfo *rtinfo)
	{
	struct rt_msghdr *rtm;
	struct mbuf *m;
	int i;
	struct sockaddr *sa;
	int len, dlen;

	switch (type) {

	case RTM_DELADDR:
	case RTM_NEWADDR:
	len = sizeof(struct ifa_msghdr);
	break;

	case RTM_DELMADDR:
	case RTM_NEWMADDR:
	len = sizeof(struct ifma_msghdr);
	break;

	case RTM_IFINFO:
	len = sizeof(struct if_msghdr);
	break;

	case RTM_IFANNOUNCE:
	case RTM_IEEE80211:
	len = sizeof(struct if_announcemsghdr);
	break;

	default:
	len = sizeof(struct rt_msghdr);
	}
	if (len > MCLBYTES)
	panic("rt_msg1");
	m = m_gethdr(M_DONTWAIT, MT_DATA);
	if (m && len > MHLEN) {
	MCLGET(m, M_DONTWAIT);
	if ((m->m_flags & M_EXT) == 0) {
	m_free(m);
	m = NULL;
	}
	}
	if (m == NULL)
	return (m);
	m->m_pkthdr.len = m->m_len = len;
	m->m_pkthdr.rcvif = NULL;
	rtm = mtod(m, struct rt_msghdr *);
	bzero((caddr_t)rtm, len);
	for (i = 0; i < RTAX_MAX; i++) {
	if ((sa = rtinfo->rti_info[i]) == NULL)
	continue;
	rtinfo->rti_addrs \|= (1 << i);
	dlen = SA_SIZE(sa);
	m_copyback(m, len, dlen, (caddr_t)sa);
	len += dlen;
	}
	if (m->m_pkthdr.len != len) {
	m_freem(m);
	return (NULL);
	}
	rtm->rtm_msglen = len;
	rtm->rtm_version = RTM_VERSION;
	rtm->rtm_type = type;
	return (m);
	}

	static int
	rt_msg2(int type, struct rt_addrinfo rtinfo, caddr_t cp, struct walkarg w)
	{
	int i;
	int len, dlen, second_time = 0;
	caddr_t cp0;

	rtinfo->rti_addrs = 0;
	again:
	switch (type) {

	case RTM_DELADDR:
	case RTM_NEWADDR:
	len = sizeof(struct ifa_msghdr);
	break;

	case RTM_IFINFO:
	#ifdef COMPAT_FREEBSD32
	if (w != NULL && w->w_req->flags & SCTL_MASK32) {
	len = sizeof(struct if_msghdr32);
	break;
	}
	#endif
	len = sizeof(struct if_msghdr);
	break;

	case RTM_NEWMADDR:
	len = sizeof(struct ifma_msghdr);
	break;

	default:
	len = sizeof(struct rt_msghdr);
	}
	cp0 = cp;
	if (cp0)
	cp += len;
	for (i = 0; i < RTAX_MAX; i++) {
	struct sockaddr *sa;

	if ((sa = rtinfo->rti_info[i]) == NULL)
	continue;
	rtinfo->rti_addrs \|= (1 << i);
	dlen = SA_SIZE(sa);
	if (cp) {
	bcopy((caddr_t)sa, cp, (unsigned)dlen);
	cp += dlen;
	}
	len += dlen;
	}
	len = ALIGN(len);
	if (cp == NULL && w != NULL && !second_time) {
	struct walkarg *rw = w;

	if (rw->w_req) {
	if (rw->w_tmemsize < len) {
	if (rw->w_tmem)
	free(rw->w_tmem, M_RTABLE);
	rw->w_tmem = (caddr_t)
	malloc(len, M_RTABLE, M_NOWAIT);
	if (rw->w_tmem)
	rw->w_tmemsize = len;
	}
	if (rw->w_tmem) {
	cp = rw->w_tmem;
	second_time = 1;
	goto again;
	}
	}
	}
	if (cp) {
	struct rt_msghdr rtm = (struct rt_msghdr )cp0;

	rtm->rtm_version = RTM_VERSION;
	rtm->rtm_type = type;
	rtm->rtm_msglen = len;
	}
	return (len);
	}

	/*
	* This routine is called to generate a message from the routing
	* socket indicating that a redirect has occured, a routing lookup
	* has failed, or that a protocol has detected timeouts to a particular
	* destination.
	*/
	void
	rt_missmsg(int type, struct rt_addrinfo *rtinfo, int flags, int error)
	{
	struct rt_msghdr *rtm;
	struct mbuf *m;
	struct sockaddr *sa = rtinfo->rti_info[RTAX_DST];

	if (route_cb.any_count == 0)
	return;
	m = rt_msg1(type, rtinfo);
	if (m == NULL)
	return;
	rtm = mtod(m, struct rt_msghdr *);
	rtm->rtm_flags = RTF_DONE \| flags;
	rtm->rtm_errno = error;
	rtm->rtm_addrs = rtinfo->rti_addrs;
	rt_dispatch(m, sa);
	}

	/*
	* This routine is called to generate a message from the routing
	* socket indicating that the status of a network interface has changed.
	*/
	void
	rt_ifmsg(struct ifnet *ifp)
	{
	struct if_msghdr *ifm;
	struct mbuf *m;
	struct rt_addrinfo info;

	if (route_cb.any_count == 0)
	return;
	bzero((caddr_t)&info, sizeof(info));
	m = rt_msg1(RTM_IFINFO, &info);
	if (m == NULL)
	return;
	ifm = mtod(m, struct if_msghdr *);
	ifm->ifm_index = ifp->if_index;
	ifm->ifm_flags = ifp->if_flags \| ifp->if_drv_flags;
	ifm->ifm_data = ifp->if_data;
	ifm->ifm_addrs = 0;
	rt_dispatch(m, NULL);
	}

	/*
	* This is called to generate messages from the routing socket
	* indicating a network interface has had addresses associated with it.
	* if we ever reverse the logic and replace messages TO the routing
	* socket indicate a request to configure interfaces, then it will
	* be unnecessary as the routing socket will automatically generate
	* copies of it.
	*/
	void
	rt_newaddrmsg(int cmd, struct ifaddr ifa, int error, struct rtentry rt)
	{
	struct rt_addrinfo info;
	struct sockaddr *sa = NULL;
	int pass;
	struct mbuf *m = NULL;
	struct ifnet *ifp = ifa->ifa_ifp;

	KASSERT(cmd == RTM_ADD \|\| cmd == RTM_DELETE,
	("unexpected cmd %u", cmd));
	#if defined(INET) \|\| defined(INET6)
	#ifdef SCTP
	/*
	* notify the SCTP stack
	* this will only get called when an address is added/deleted
	* XXX pass the ifaddr struct instead if ifa->ifa_addr...
	*/
	sctp_addr_change(ifa, cmd);
	#endif /* SCTP */
	#endif
	if (route_cb.any_count == 0)
	return;
	for (pass = 1; pass < 3; pass++) {
	bzero((caddr_t)&info, sizeof(info));
	if ((cmd == RTM_ADD && pass == 1) \|\|
	(cmd == RTM_DELETE && pass == 2)) {
	struct ifa_msghdr *ifam;
	int ncmd = cmd == RTM_ADD ? RTM_NEWADDR : RTM_DELADDR;

	info.rti_info[RTAX_IFA] = sa = ifa->ifa_addr;
	info.rti_info[RTAX_IFP] = ifp->if_addr->ifa_addr;
	info.rti_info[RTAX_NETMASK] = ifa->ifa_netmask;
	info.rti_info[RTAX_BRD] = ifa->ifa_dstaddr;
	if ((m = rt_msg1(ncmd, &info)) == NULL)
	continue;
	ifam = mtod(m, struct ifa_msghdr *);
	ifam->ifam_index = ifp->if_index;
	ifam->ifam_metric = ifa->ifa_metric;
	ifam->ifam_flags = ifa->ifa_flags;
	ifam->ifam_addrs = info.rti_addrs;
	}
	if ((cmd == RTM_ADD && pass == 2) \|\|
	(cmd == RTM_DELETE && pass == 1)) {
	struct rt_msghdr *rtm;

	if (rt == NULL)
	continue;
	info.rti_info[RTAX_NETMASK] = rt_mask(rt);
	info.rti_info[RTAX_DST] = sa = rt_key(rt);
	info.rti_info[RTAX_GATEWAY] = rt->rt_gateway;
	if ((m = rt_msg1(cmd, &info)) == NULL)
	continue;
	rtm = mtod(m, struct rt_msghdr *);
	rtm->rtm_index = ifp->if_index;
	rtm->rtm_flags \|= rt->rt_flags;
	rtm->rtm_errno = error;
	rtm->rtm_addrs = info.rti_addrs;
	}
	rt_dispatch(m, sa);
	}
	}

	/*
	* This is the analogue to the rt_newaddrmsg which performs the same
	* function but for multicast group memberhips. This is easier since
	* there is no route state to worry about.
	*/
	void
	rt_newmaddrmsg(int cmd, struct ifmultiaddr *ifma)
	{
	struct rt_addrinfo info;
	struct mbuf *m = NULL;
	struct ifnet *ifp = ifma->ifma_ifp;
	struct ifma_msghdr *ifmam;

	if (route_cb.any_count == 0)
	return;

	bzero((caddr_t)&info, sizeof(info));
	info.rti_info[RTAX_IFA] = ifma->ifma_addr;
	info.rti_info[RTAX_IFP] = ifp ? ifp->if_addr->ifa_addr : NULL;
	/*
	* If a link-layer address is present, present it as a ``gateway''
	* (similarly to how ARP entries, e.g., are presented).
	*/
	info.rti_info[RTAX_GATEWAY] = ifma->ifma_lladdr;
	m = rt_msg1(cmd, &info);
	if (m == NULL)
	return;
	ifmam = mtod(m, struct ifma_msghdr *);
	KASSERT(ifp != NULL, ("%s: link-layer multicast address w/o ifp\n",
	__func__));
	ifmam->ifmam_index = ifp->if_index;
	ifmam->ifmam_addrs = info.rti_addrs;
	rt_dispatch(m, ifma->ifma_addr);
	}

	static struct mbuf *
	rt_makeifannouncemsg(struct ifnet *ifp, int type, int what,
	struct rt_addrinfo *info)
	{
	struct if_announcemsghdr *ifan;
	struct mbuf *m;

	if (route_cb.any_count == 0)
	return NULL;
	bzero((caddr_t)info, sizeof(*info));
	m = rt_msg1(type, info);
	if (m != NULL) {
	ifan = mtod(m, struct if_announcemsghdr *);
	ifan->ifan_index = ifp->if_index;
	strlcpy(ifan->ifan_name, ifp->if_xname,
	sizeof(ifan->ifan_name));
	ifan->ifan_what = what;
	}
	return m;
	}

	/*
	* This is called to generate routing socket messages indicating
	* IEEE80211 wireless events.
	* XXX we piggyback on the RTM_IFANNOUNCE msg format in a clumsy way.
	*/
	void
	rt_ieee80211msg(struct ifnet ifp, int what, void data, size_t data_len)
	{
	struct mbuf *m;
	struct rt_addrinfo info;

	m = rt_makeifannouncemsg(ifp, RTM_IEEE80211, what, &info);
	if (m != NULL) {
	/*
	* Append the ieee80211 data. Try to stick it in the
	* mbuf containing the ifannounce msg; otherwise allocate
	* a new mbuf and append.
	*
	* NB: we assume m is a single mbuf.
	*/
	if (data_len > M_TRAILINGSPACE(m)) {
	struct mbuf *n = m_get(M_NOWAIT, MT_DATA);
	if (n == NULL) {
	m_freem(m);
	return;
	}
	bcopy(data, mtod(n, void *), data_len);
	n->m_len = data_len;
	m->m_next = n;
	} else if (data_len > 0) {
	bcopy(data, mtod(m, u_int8_t *) + m->m_len, data_len);
	m->m_len += data_len;
	}
	if (m->m_flags & M_PKTHDR)
	m->m_pkthdr.len += data_len;
	mtod(m, struct if_announcemsghdr *)->ifan_msglen += data_len;
	rt_dispatch(m, NULL);
	}
	}

	/*
	* This is called to generate routing socket messages indicating
	* network interface arrival and departure.
	*/
	void
	rt_ifannouncemsg(struct ifnet *ifp, int what)
	{
	struct mbuf *m;
	struct rt_addrinfo info;

	m = rt_makeifannouncemsg(ifp, RTM_IFANNOUNCE, what, &info);
	if (m != NULL)
	rt_dispatch(m, NULL);
	}

	static void
	rt_dispatch(struct mbuf m, const struct sockaddr sa)
	{
	struct m_tag *tag;

	/*
	* Preserve the family from the sockaddr, if any, in an m_tag for
	* use when injecting the mbuf into the routing socket buffer from
	* the netisr.
	*/
	if (sa != NULL) {
	tag = m_tag_get(PACKET_TAG_RTSOCKFAM, sizeof(unsigned short),
	M_NOWAIT);
	if (tag == NULL) {
	m_freem(m);
	return;
	}
	(unsigned short )(tag + 1) = sa->sa_family;
	m_tag_prepend(m, tag);
	}
	#ifdef VIMAGE
	if (V_loif)
	m->m_pkthdr.rcvif = V_loif;
	else {
	m_freem(m);
	return;
	}
	#endif
	netisr_queue(NETISR_ROUTE, m); /* mbuf is free'd on failure. */
	}

	/*
	* This is used in dumping the kernel table via sysctl().
	*/
	static int
	sysctl_dumpentry(struct radix_node rn, void vw)
	{
	struct walkarg *w = vw;
	struct rtentry rt = (struct rtentry )rn;
	int error = 0, size;
	struct rt_addrinfo info;

	if (w->w_op == NET_RT_FLAGS && !(rt->rt_flags & w->w_arg))
	return 0;
	if ((rt->rt_flags & RTF_HOST) == 0
	? jailed_without_vnet(w->w_req->td->td_ucred)
	: prison_if(w->w_req->td->td_ucred, rt_key(rt)) != 0)
	return (0);
	bzero((caddr_t)&info, sizeof(info));
	info.rti_info[RTAX_DST] = rt_key(rt);
	info.rti_info[RTAX_GATEWAY] = rt->rt_gateway;
	info.rti_info[RTAX_NETMASK] = rt_mask(rt);
	info.rti_info[RTAX_GENMASK] = 0;
	if (rt->rt_ifp) {
	info.rti_info[RTAX_IFP] = rt->rt_ifp->if_addr->ifa_addr;
	info.rti_info[RTAX_IFA] = rt->rt_ifa->ifa_addr;
	if (rt->rt_ifp->if_flags & IFF_POINTOPOINT)
	info.rti_info[RTAX_BRD] = rt->rt_ifa->ifa_dstaddr;
	}
	size = rt_msg2(RTM_GET, &info, NULL, w);
	if (w->w_req && w->w_tmem) {
	struct rt_msghdr rtm = (struct rt_msghdr )w->w_tmem;

	rtm->rtm_flags = rt->rt_flags;
	/*
	* let's be honest about this being a retarded hack
	*/
	rtm->rtm_fmask = rt->rt_rmx.rmx_pksent;
	rt_getmetrics(&rt->rt_rmx, &rtm->rtm_rmx);
	rtm->rtm_index = rt->rt_ifp->if_index;
	rtm->rtm_errno = rtm->rtm_pid = rtm->rtm_seq = 0;
	rtm->rtm_addrs = info.rti_addrs;
	error = SYSCTL_OUT(w->w_req, (caddr_t)rtm, size);
	return (error);
	}
	return (error);
	}

	#ifdef COMPAT_FREEBSD32
	static void
	copy_ifdata32(struct if_data src, struct if_data32 dst)
	{

	bzero(dst, sizeof(*dst));
	CP(src, dst, ifi_type);
	CP(src, dst, ifi_physical);
	CP(src, dst, ifi_addrlen);
	CP(src, dst, ifi_hdrlen);
	CP(src, dst, ifi_link_state);
	CP(src, dst, ifi_datalen);
	CP(src, dst, ifi_mtu);
	CP(src, dst, ifi_metric);
	CP(src, dst, ifi_baudrate);
	CP(src, dst, ifi_ipackets);
	CP(src, dst, ifi_ierrors);
	CP(src, dst, ifi_opackets);
	CP(src, dst, ifi_oerrors);
	CP(src, dst, ifi_collisions);
	CP(src, dst, ifi_ibytes);
	CP(src, dst, ifi_obytes);
	CP(src, dst, ifi_imcasts);
	CP(src, dst, ifi_omcasts);
	CP(src, dst, ifi_iqdrops);
	CP(src, dst, ifi_noproto);
	CP(src, dst, ifi_hwassist);
	CP(src, dst, ifi_epoch);
	TV_CP(src, dst, ifi_lastchange);
	}
	#endif

	static int
	sysctl_iflist(int af, struct walkarg *w)
	{
	struct ifnet *ifp;
	struct ifaddr *ifa;
	struct rt_addrinfo info;
	int len, error = 0;

	bzero((caddr_t)&info, sizeof(info));
	IFNET_RLOCK();
	TAILQ_FOREACH(ifp, &V_ifnet, if_link) {
	if (w->w_arg && w->w_arg != ifp->if_index)
	continue;
	ifa = ifp->if_addr;
	info.rti_info[RTAX_IFP] = ifa->ifa_addr;
	len = rt_msg2(RTM_IFINFO, &info, NULL, w);
	info.rti_info[RTAX_IFP] = NULL;
	if (w->w_req && w->w_tmem) {
	struct if_msghdr *ifm;

	#ifdef COMPAT_FREEBSD32
	if (w->w_req->flags & SCTL_MASK32) {
	struct if_msghdr32 *ifm32;

	ifm32 = (struct if_msghdr32 *)w->w_tmem;
	ifm32->ifm_index = ifp->if_index;
	ifm32->ifm_flags = ifp->if_flags \|
	ifp->if_drv_flags;
	copy_ifdata32(&ifp->if_data, &ifm32->ifm_data);
	ifm32->ifm_addrs = info.rti_addrs;
	error = SYSCTL_OUT(w->w_req, (caddr_t)ifm32,
	len);
	goto sysctl_out;
	}
	#endif
	ifm = (struct if_msghdr *)w->w_tmem;
	ifm->ifm_index = ifp->if_index;
	ifm->ifm_flags = ifp->if_flags \| ifp->if_drv_flags;
	ifm->ifm_data = ifp->if_data;
	ifm->ifm_addrs = info.rti_addrs;
	error = SYSCTL_OUT(w->w_req, (caddr_t)ifm, len);
	#ifdef COMPAT_FREEBSD32
	sysctl_out:
	#endif
	if (error)
	goto done;
	}
	while ((ifa = TAILQ_NEXT(ifa, ifa_link)) != NULL) {
	if (af && af != ifa->ifa_addr->sa_family)
	continue;
	if (prison_if(w->w_req->td->td_ucred,
	ifa->ifa_addr) != 0)
	continue;
	info.rti_info[RTAX_IFA] = ifa->ifa_addr;
	info.rti_info[RTAX_NETMASK] = ifa->ifa_netmask;
	info.rti_info[RTAX_BRD] = ifa->ifa_dstaddr;
	len = rt_msg2(RTM_NEWADDR, &info, NULL, w);
	if (w->w_req && w->w_tmem) {
	struct ifa_msghdr *ifam;

	ifam = (struct ifa_msghdr *)w->w_tmem;
	ifam->ifam_index = ifa->ifa_ifp->if_index;
	ifam->ifam_flags = ifa->ifa_flags;
	ifam->ifam_metric = ifa->ifa_metric;
	ifam->ifam_addrs = info.rti_addrs;
	error = SYSCTL_OUT(w->w_req, w->w_tmem, len);
	if (error)
	goto done;
	}
	}
	info.rti_info[RTAX_IFA] = info.rti_info[RTAX_NETMASK] =
	info.rti_info[RTAX_BRD] = NULL;
	}
	done:
	IFNET_RUNLOCK();
	return (error);
	}

	static int
	sysctl_ifmalist(int af, struct walkarg *w)
	{
	struct ifnet *ifp;
	struct ifmultiaddr *ifma;
	struct rt_addrinfo info;
	int len, error = 0;
	struct ifaddr *ifa;

	bzero((caddr_t)&info, sizeof(info));
	IFNET_RLOCK();
	TAILQ_FOREACH(ifp, &V_ifnet, if_link) {
	if (w->w_arg && w->w_arg != ifp->if_index)
	continue;
	ifa = ifp->if_addr;
	info.rti_info[RTAX_IFP] = ifa ? ifa->ifa_addr : NULL;
	IF_ADDR_LOCK(ifp);
	TAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) {
	if (af && af != ifma->ifma_addr->sa_family)
	continue;
	if (prison_if(w->w_req->td->td_ucred,
	ifma->ifma_addr) != 0)
	continue;
	info.rti_info[RTAX_IFA] = ifma->ifma_addr;
	info.rti_info[RTAX_GATEWAY] =
	(ifma->ifma_addr->sa_family != AF_LINK) ?
	ifma->ifma_lladdr : NULL;
	len = rt_msg2(RTM_NEWMADDR, &info, NULL, w);
	if (w->w_req && w->w_tmem) {
	struct ifma_msghdr *ifmam;

	ifmam = (struct ifma_msghdr *)w->w_tmem;
	ifmam->ifmam_index = ifma->ifma_ifp->if_index;
	ifmam->ifmam_flags = 0;
	ifmam->ifmam_addrs = info.rti_addrs;
	error = SYSCTL_OUT(w->w_req, w->w_tmem, len);
	if (error) {
	IF_ADDR_UNLOCK(ifp);
	goto done;
	}
	}
	}
	IF_ADDR_UNLOCK(ifp);
	}
	done:
	IFNET_RUNLOCK();
	return (error);
	}

	static int
	sysctl_rtsock(SYSCTL_HANDLER_ARGS)
	{
	int name = (int )arg1;
	u_int namelen = arg2;
	struct radix_node_head rnh = NULL; / silence compiler. */
	int i, lim, error = EINVAL;
	u_char af;
	struct walkarg w;

	name ++;
	namelen--;
	if (req->newptr)
	return (EPERM);
	if (namelen != 3)
	return ((namelen < 3) ? EISDIR : ENOTDIR);
	af = name[0];
	if (af > AF_MAX)
	return (EINVAL);
	bzero(&w, sizeof(w));
	w.w_op = name[1];
	w.w_arg = name[2];
	w.w_req = req;

	error = sysctl_wire_old_buffer(req, 0);
	if (error)
	return (error);
	switch (w.w_op) {

	case NET_RT_DUMP:
	case NET_RT_FLAGS:
	if (af == 0) { /* dump all tables */
	i = 1;
	lim = AF_MAX;
	} else /* dump only one table */
	i = lim = af;

	/*
	* take care of llinfo entries, the caller must
	* specify an AF
	*/
	if (w.w_op == NET_RT_FLAGS &&
	(w.w_arg == 0 \|\| w.w_arg & RTF_LLINFO)) {
	if (af != 0)
	error = lltable_sysctl_dumparp(af, w.w_req);
	else
	error = EINVAL;
	break;
	}
	/*
	* take care of routing entries
	*/
	for (error = 0; error == 0 && i <= lim; i++) {
	rnh = rt_tables_get_rnh(req->td->td_proc->p_fibnum, i);
	if (rnh != NULL) {
	RADIX_NODE_HEAD_LOCK(rnh);
	error = rnh->rnh_walktree(rnh,
	sysctl_dumpentry, &w);
	RADIX_NODE_HEAD_UNLOCK(rnh);
	} else if (af != 0)
	error = EAFNOSUPPORT;
	}
	break;

	case NET_RT_IFLIST:
	error = sysctl_iflist(af, &w);
	break;

	case NET_RT_IFMALIST:
	error = sysctl_ifmalist(af, &w);
	break;
	}
	if (w.w_tmem)
	free(w.w_tmem, M_RTABLE);
	return (error);
	}

	SYSCTL_NODE(_net, PF_ROUTE, routetable, CTLFLAG_RD, sysctl_rtsock, "");

	/*
	* Definitions of protocols supported in the ROUTE domain.
	*/

	static struct domain routedomain; /* or at least forward */

	static struct protosw routesw[] = {
	{
	.pr_type = SOCK_RAW,
	.pr_domain = &routedomain,
	.pr_flags = PR_ATOMIC\|PR_ADDR,
	.pr_output = route_output,
	.pr_ctlinput = raw_ctlinput,
	.pr_init = raw_init,
	.pr_usrreqs = &route_usrreqs
	}
	};

	static struct domain routedomain = {
	.dom_family = PF_ROUTE,
	.dom_name = "route",
	.dom_protosw = routesw,
	.dom_protoswNPROTOSW = &routesw[sizeof(routesw)/sizeof(routesw[0])]
	};

	VNET_DOMAIN_SET(route);
	Index: stable/8/sys/netinet/in.c
	===================================================================
	--- stable/8/sys/netinet/in.c (revision 209276)
	+++ stable/8/sys/netinet/in.c (revision 209277)
	@@ -1,1585 +1,1586 @@
	/*-
	* Copyright (c) 1982, 1986, 1991, 1993
	* The Regents of the University of California. All rights reserved.
	* Copyright (C) 2001 WIDE Project. All rights reserved.
	*
	* Redistribution and use in source and binary forms, with or without
	* modification, are permitted provided that the following conditions
	* are met:
	* 1. Redistributions of source code must retain the above copyright
	* notice, this list of conditions and the following disclaimer.
	* 2. Redistributions in binary form must reproduce the above copyright
	* notice, this list of conditions and the following disclaimer in the
	* documentation and/or other materials provided with the distribution.
	* 4. Neither the name of the University nor the names of its contributors
	* may be used to endorse or promote products derived from this software
	* without specific prior written permission.
	*
	* THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
	* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
	* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
	* ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
	* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
	* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
	* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
	* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
	* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
	* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
	* SUCH DAMAGE.
	*
	* @(#)in.c 8.4 (Berkeley) 1/9/95
	*/

	#include <sys/cdefs.h>
	__FBSDID("$FreeBSD$");

	#include "opt_carp.h"
	#include "opt_mpath.h"

	#include <sys/param.h>
	#include <sys/systm.h>
	#include <sys/sockio.h>
	#include <sys/malloc.h>
	#include <sys/priv.h>
	#include <sys/socket.h>
	#include <sys/jail.h>
	#include <sys/kernel.h>
	#include <sys/proc.h>
	#include <sys/sysctl.h>
	#include <sys/syslog.h>

	#include <net/if.h>
	#include <net/if_var.h>
	#include <net/if_dl.h>
	#include <net/if_llatbl.h>
	#include <net/if_types.h>
	#include <net/route.h>
	#include <net/vnet.h>

	#include <netinet/in.h>
	#include <netinet/in_var.h>
	#include <netinet/in_pcb.h>
	#include <netinet/ip_var.h>
	#include <netinet/igmp_var.h>
	#include <netinet/udp.h>
	#include <netinet/udp_var.h>

	static int in_mask2len(struct in_addr *);
	static void in_len2mask(struct in_addr *, int);
	static int in_lifaddr_ioctl(struct socket *, u_long, caddr_t,
	struct ifnet , struct thread );

	static int in_addprefix(struct in_ifaddr *, int);
	static int in_scrubprefix(struct in_ifaddr *);
	static void in_socktrim(struct sockaddr_in *);
	static int in_ifinit(struct ifnet *,
	struct in_ifaddr , struct sockaddr_in , int);
	static void in_purgemaddrs(struct ifnet *);

	static VNET_DEFINE(int, subnetsarelocal);
	#define V_subnetsarelocal VNET(subnetsarelocal)
	SYSCTL_VNET_INT(_net_inet_ip, OID_AUTO, subnets_are_local, CTLFLAG_RW,
	&VNET_NAME(subnetsarelocal), 0,
	"Treat all subnets as directly connected");
	static VNET_DEFINE(int, sameprefixcarponly);
	#define V_sameprefixcarponly VNET(sameprefixcarponly)
	SYSCTL_VNET_INT(_net_inet_ip, OID_AUTO, same_prefix_carp_only, CTLFLAG_RW,
	&VNET_NAME(sameprefixcarponly), 0,
	"Refuse to create same prefixes on different interfaces");

	VNET_DECLARE(struct inpcbinfo, ripcbinfo);
	#define V_ripcbinfo VNET(ripcbinfo)

	/*
	* Return 1 if an internet address is for a ``local'' host
	* (one to which we have a connection). If subnetsarelocal
	* is true, this includes other subnets of the local net.
	* Otherwise, it includes only the directly-connected (sub)nets.
	*/
	int
	in_localaddr(struct in_addr in)
	{
	register u_long i = ntohl(in.s_addr);
	register struct in_ifaddr *ia;

	IN_IFADDR_RLOCK();
	if (V_subnetsarelocal) {
	TAILQ_FOREACH(ia, &V_in_ifaddrhead, ia_link) {
	if ((i & ia->ia_netmask) == ia->ia_net) {
	IN_IFADDR_RUNLOCK();
	return (1);
	}
	}
	} else {
	TAILQ_FOREACH(ia, &V_in_ifaddrhead, ia_link) {
	if ((i & ia->ia_subnetmask) == ia->ia_subnet) {
	IN_IFADDR_RUNLOCK();
	return (1);
	}
	}
	}
	IN_IFADDR_RUNLOCK();
	return (0);
	}

	/*
	* Return 1 if an internet address is for the local host and configured
	* on one of its interfaces.
	*/
	int
	in_localip(struct in_addr in)
	{
	struct in_ifaddr *ia;

	IN_IFADDR_RLOCK();
	LIST_FOREACH(ia, INADDR_HASH(in.s_addr), ia_hash) {
	if (IA_SIN(ia)->sin_addr.s_addr == in.s_addr) {
	IN_IFADDR_RUNLOCK();
	return (1);
	}
	}
	IN_IFADDR_RUNLOCK();
	return (0);
	}

	/*
	* Determine whether an IP address is in a reserved set of addresses
	* that may not be forwarded, or whether datagrams to that destination
	* may be forwarded.
	*/
	int
	in_canforward(struct in_addr in)
	{
	register u_long i = ntohl(in.s_addr);
	register u_long net;

	if (IN_EXPERIMENTAL(i) \|\| IN_MULTICAST(i) \|\| IN_LINKLOCAL(i))
	return (0);
	if (IN_CLASSA(i)) {
	net = i & IN_CLASSA_NET;
	if (net == 0 \|\| net == (IN_LOOPBACKNET << IN_CLASSA_NSHIFT))
	return (0);
	}
	return (1);
	}

	/*
	* Trim a mask in a sockaddr
	*/
	static void
	in_socktrim(struct sockaddr_in *ap)
	{
	register char cplim = (char ) &ap->sin_addr;
	register char cp = (char ) (&ap->sin_addr + 1);

	ap->sin_len = 0;
	while (--cp >= cplim)
	if (*cp) {
	(ap)->sin_len = cp - (char *) (ap) + 1;
	break;
	}
	}

	static int
	in_mask2len(mask)
	struct in_addr *mask;
	{
	int x, y;
	u_char *p;

	p = (u_char *)mask;
	for (x = 0; x < sizeof(*mask); x++) {
	if (p[x] != 0xff)
	break;
	}
	y = 0;
	if (x < sizeof(*mask)) {
	for (y = 0; y < 8; y++) {
	if ((p[x] & (0x80 >> y)) == 0)
	break;
	}
	}
	return (x * 8 + y);
	}

	static void
	in_len2mask(struct in_addr *mask, int len)
	{
	int i;
	u_char *p;

	p = (u_char *)mask;
	bzero(mask, sizeof(*mask));
	for (i = 0; i < len / 8; i++)
	p[i] = 0xff;
	if (len % 8)
	p[i] = (0xff00 >> (len % 8)) & 0xff;
	}

	/*
	* Generic internet control operations (ioctl's).
	*
	* ifp is NULL if not an interface-specific ioctl.
	*/
	/* ARGSUSED */
	int
	in_control(struct socket so, u_long cmd, caddr_t data, struct ifnet ifp,
	struct thread *td)
	{
	register struct ifreq ifr = (struct ifreq )data;
	register struct in_ifaddr ia, iap;
	register struct ifaddr *ifa;
	struct in_addr allhosts_addr;
	struct in_addr dst;
	struct in_ifinfo *ii;
	struct in_aliasreq ifra = (struct in_aliasreq )data;
	struct sockaddr_in oldaddr;
	int error, hostIsNew, iaIsNew, maskIsNew;
	int iaIsFirst;

	ia = NULL;
	iaIsFirst = 0;
	iaIsNew = 0;
	allhosts_addr.s_addr = htonl(INADDR_ALLHOSTS_GROUP);

	/*
	* Filter out ioctls we implement directly; forward the rest on to
	* in_lifaddr_ioctl() and ifp->if_ioctl().
	*/
	switch (cmd) {
	case SIOCAIFADDR:
	case SIOCDIFADDR:
	case SIOCGIFADDR:
	case SIOCGIFBRDADDR:
	case SIOCGIFDSTADDR:
	case SIOCGIFNETMASK:
	case SIOCSIFADDR:
	case SIOCSIFBRDADDR:
	case SIOCSIFDSTADDR:
	case SIOCSIFNETMASK:
	break;

	case SIOCALIFADDR:
	if (td != NULL) {
	error = priv_check(td, PRIV_NET_ADDIFADDR);
	if (error)
	return (error);
	}
	if (ifp == NULL)
	return (EINVAL);
	return in_lifaddr_ioctl(so, cmd, data, ifp, td);

	case SIOCDLIFADDR:
	if (td != NULL) {
	error = priv_check(td, PRIV_NET_DELIFADDR);
	if (error)
	return (error);
	}
	if (ifp == NULL)
	return (EINVAL);
	return in_lifaddr_ioctl(so, cmd, data, ifp, td);

	case SIOCGLIFADDR:
	if (ifp == NULL)
	return (EINVAL);
	return in_lifaddr_ioctl(so, cmd, data, ifp, td);

	default:
	if (ifp == NULL \|\| ifp->if_ioctl == NULL)
	return (EOPNOTSUPP);
	return ((*ifp->if_ioctl)(ifp, cmd, data));
	}

	if (ifp == NULL)
	return (EADDRNOTAVAIL);

	/*
	* Security checks before we get involved in any work.
	*/
	switch (cmd) {
	case SIOCAIFADDR:
	case SIOCSIFADDR:
	case SIOCSIFBRDADDR:
	case SIOCSIFNETMASK:
	case SIOCSIFDSTADDR:
	if (td != NULL) {
	error = priv_check(td, PRIV_NET_ADDIFADDR);
	if (error)
	return (error);
	}
	break;

	case SIOCDIFADDR:
	if (td != NULL) {
	error = priv_check(td, PRIV_NET_DELIFADDR);
	if (error)
	return (error);
	}
	break;
	}

	/*
	* Find address for this interface, if it exists.
	*
	* If an alias address was specified, find that one instead of the
	* first one on the interface, if possible.
	*/
	dst = ((struct sockaddr_in *)&ifr->ifr_addr)->sin_addr;
	IN_IFADDR_RLOCK();
	LIST_FOREACH(iap, INADDR_HASH(dst.s_addr), ia_hash) {
	if (iap->ia_ifp == ifp &&
	iap->ia_addr.sin_addr.s_addr == dst.s_addr) {
	if (td == NULL \|\| prison_check_ip4(td->td_ucred,
	&dst) == 0)
	ia = iap;
	break;
	}
	}
	if (ia != NULL)
	ifa_ref(&ia->ia_ifa);
	IN_IFADDR_RUNLOCK();
	if (ia == NULL) {
	IF_ADDR_LOCK(ifp);
	TAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) {
	iap = ifatoia(ifa);
	if (iap->ia_addr.sin_family == AF_INET) {
	if (td != NULL &&
	prison_check_ip4(td->td_ucred,
	&iap->ia_addr.sin_addr) != 0)
	continue;
	ia = iap;
	break;
	}
	}
	if (ia != NULL)
	ifa_ref(&ia->ia_ifa);
	IF_ADDR_UNLOCK(ifp);
	}
	if (ia == NULL)
	iaIsFirst = 1;

	error = 0;
	switch (cmd) {
	case SIOCAIFADDR:
	case SIOCDIFADDR:
	if (ifra->ifra_addr.sin_family == AF_INET) {
	struct in_ifaddr *oia;

	IN_IFADDR_RLOCK();
	for (oia = ia; ia; ia = TAILQ_NEXT(ia, ia_link)) {
	if (ia->ia_ifp == ifp &&
	ia->ia_addr.sin_addr.s_addr ==
	ifra->ifra_addr.sin_addr.s_addr)
	break;
	}
	if (ia != NULL && ia != oia)
	ifa_ref(&ia->ia_ifa);
	if (oia != NULL && ia != oia)
	ifa_free(&oia->ia_ifa);
	IN_IFADDR_RUNLOCK();
	if ((ifp->if_flags & IFF_POINTOPOINT)
	&& (cmd == SIOCAIFADDR)
	&& (ifra->ifra_dstaddr.sin_addr.s_addr
	== INADDR_ANY)) {
	error = EDESTADDRREQ;
	goto out;
	}
	}
	if (cmd == SIOCDIFADDR && ia == NULL) {
	error = EADDRNOTAVAIL;
	goto out;
	}
	/* FALLTHROUGH */
	case SIOCSIFADDR:
	case SIOCSIFNETMASK:
	case SIOCSIFDSTADDR:
	if (ia == NULL) {
	ia = (struct in_ifaddr *)
	malloc(sizeof *ia, M_IFADDR, M_NOWAIT \|
	M_ZERO);
	if (ia == NULL) {
	error = ENOBUFS;
	goto out;
	}

	ifa = &ia->ia_ifa;
	ifa_init(ifa);
	ifa->ifa_addr = (struct sockaddr *)&ia->ia_addr;
	ifa->ifa_dstaddr = (struct sockaddr *)&ia->ia_dstaddr;
	ifa->ifa_netmask = (struct sockaddr *)&ia->ia_sockmask;

	ia->ia_sockmask.sin_len = 8;
	ia->ia_sockmask.sin_family = AF_INET;
	if (ifp->if_flags & IFF_BROADCAST) {
	ia->ia_broadaddr.sin_len = sizeof(ia->ia_addr);
	ia->ia_broadaddr.sin_family = AF_INET;
	}
	ia->ia_ifp = ifp;

	ifa_ref(ifa); /* if_addrhead */
	IF_ADDR_LOCK(ifp);
	TAILQ_INSERT_TAIL(&ifp->if_addrhead, ifa, ifa_link);
	IF_ADDR_UNLOCK(ifp);
	ifa_ref(ifa); /* in_ifaddrhead */
	IN_IFADDR_WLOCK();
	TAILQ_INSERT_TAIL(&V_in_ifaddrhead, ia, ia_link);
	IN_IFADDR_WUNLOCK();
	iaIsNew = 1;
	}
	break;

	case SIOCSIFBRDADDR:
	case SIOCGIFADDR:
	case SIOCGIFNETMASK:
	case SIOCGIFDSTADDR:
	case SIOCGIFBRDADDR:
	if (ia == NULL) {
	error = EADDRNOTAVAIL;
	goto out;
	}
	break;
	}

	/*
	* Most paths in this switch return directly or via out. Only paths
	* that remove the address break in order to hit common removal code.
	*/
	switch (cmd) {
	case SIOCGIFADDR:
	((struct sockaddr_in )&ifr->ifr_addr) = ia->ia_addr;
	goto out;

	case SIOCGIFBRDADDR:
	if ((ifp->if_flags & IFF_BROADCAST) == 0) {
	error = EINVAL;
	goto out;
	}
	((struct sockaddr_in )&ifr->ifr_dstaddr) = ia->ia_broadaddr;
	goto out;

	case SIOCGIFDSTADDR:
	if ((ifp->if_flags & IFF_POINTOPOINT) == 0) {
	error = EINVAL;
	goto out;
	}
	((struct sockaddr_in )&ifr->ifr_dstaddr) = ia->ia_dstaddr;
	goto out;

	case SIOCGIFNETMASK:
	((struct sockaddr_in )&ifr->ifr_addr) = ia->ia_sockmask;
	goto out;

	case SIOCSIFDSTADDR:
	if ((ifp->if_flags & IFF_POINTOPOINT) == 0) {
	error = EINVAL;
	goto out;
	}
	oldaddr = ia->ia_dstaddr;
	ia->ia_dstaddr = (struct sockaddr_in )&ifr->ifr_dstaddr;
	if (ifp->if_ioctl != NULL) {
	error = (*ifp->if_ioctl)(ifp, SIOCSIFDSTADDR,
	(caddr_t)ia);
	if (error) {
	ia->ia_dstaddr = oldaddr;
	goto out;
	}
	}
	if (ia->ia_flags & IFA_ROUTE) {
	ia->ia_ifa.ifa_dstaddr = (struct sockaddr *)&oldaddr;
	rtinit(&(ia->ia_ifa), (int)RTM_DELETE, RTF_HOST);
	ia->ia_ifa.ifa_dstaddr =
	(struct sockaddr *)&ia->ia_dstaddr;
	rtinit(&(ia->ia_ifa), (int)RTM_ADD, RTF_HOST\|RTF_UP);
	}
	goto out;

	case SIOCSIFBRDADDR:
	if ((ifp->if_flags & IFF_BROADCAST) == 0) {
	error = EINVAL;
	goto out;
	}
	ia->ia_broadaddr = (struct sockaddr_in )&ifr->ifr_broadaddr;
	goto out;

	case SIOCSIFADDR:
	error = in_ifinit(ifp, ia,
	(struct sockaddr_in *) &ifr->ifr_addr, 1);
	if (error != 0 && iaIsNew)
	break;
	if (error == 0) {
	ii = ((struct in_ifinfo *)ifp->if_afdata[AF_INET]);
	if (iaIsFirst &&
	(ifp->if_flags & IFF_MULTICAST) != 0) {
	error = in_joingroup(ifp, &allhosts_addr,
	NULL, &ii->ii_allhosts);
	}
	EVENTHANDLER_INVOKE(ifaddr_event, ifp);
	}
	error = 0;
	goto out;

	case SIOCSIFNETMASK:
	ia->ia_sockmask.sin_addr = ifra->ifra_addr.sin_addr;
	ia->ia_subnetmask = ntohl(ia->ia_sockmask.sin_addr.s_addr);
	goto out;

	case SIOCAIFADDR:
	maskIsNew = 0;
	hostIsNew = 1;
	error = 0;
	if (ia->ia_addr.sin_family == AF_INET) {
	if (ifra->ifra_addr.sin_len == 0) {
	ifra->ifra_addr = ia->ia_addr;
	hostIsNew = 0;
	} else if (ifra->ifra_addr.sin_addr.s_addr ==
	ia->ia_addr.sin_addr.s_addr)
	hostIsNew = 0;
	}
	if (ifra->ifra_mask.sin_len) {
	/*
	* QL: XXX
	* Need to scrub the prefix here in case
	* the issued command is SIOCAIFADDR with
	* the same address, but with a different
	* prefix length. And if the prefix length
	* is the same as before, then the call is
	* un-necessarily executed here.
	*/
	in_ifscrub(ifp, ia);
	ia->ia_sockmask = ifra->ifra_mask;
	ia->ia_sockmask.sin_family = AF_INET;
	ia->ia_subnetmask =
	ntohl(ia->ia_sockmask.sin_addr.s_addr);
	maskIsNew = 1;
	}
	if ((ifp->if_flags & IFF_POINTOPOINT) &&
	(ifra->ifra_dstaddr.sin_family == AF_INET)) {
	in_ifscrub(ifp, ia);
	ia->ia_dstaddr = ifra->ifra_dstaddr;
	maskIsNew = 1; /* We lie; but the effect's the same */
	}
	if (ifra->ifra_addr.sin_family == AF_INET &&
	(hostIsNew \|\| maskIsNew))
	error = in_ifinit(ifp, ia, &ifra->ifra_addr, 0);
	if (error != 0 && iaIsNew)
	goto out;

	if ((ifp->if_flags & IFF_BROADCAST) &&
	(ifra->ifra_broadaddr.sin_family == AF_INET))
	ia->ia_broadaddr = ifra->ifra_broadaddr;
	if (error == 0) {
	ii = ((struct in_ifinfo *)ifp->if_afdata[AF_INET]);
	if (iaIsFirst &&
	(ifp->if_flags & IFF_MULTICAST) != 0) {
	error = in_joingroup(ifp, &allhosts_addr,
	NULL, &ii->ii_allhosts);
	}
	EVENTHANDLER_INVOKE(ifaddr_event, ifp);
	}
	goto out;

	case SIOCDIFADDR:
	/*
	* in_ifscrub kills the interface route.
	*/
	in_ifscrub(ifp, ia);

	/*
	* in_ifadown gets rid of all the rest of
	* the routes. This is not quite the right
	* thing to do, but at least if we are running
	* a routing process they will come back.
	*/
	in_ifadown(&ia->ia_ifa, 1);
	EVENTHANDLER_INVOKE(ifaddr_event, ifp);
	error = 0;
	break;

	default:
	panic("in_control: unsupported ioctl");
	}

	IF_ADDR_LOCK(ifp);
	TAILQ_REMOVE(&ifp->if_addrhead, &ia->ia_ifa, ifa_link);
	IF_ADDR_UNLOCK(ifp);
	ifa_free(&ia->ia_ifa); /* if_addrhead */

	IN_IFADDR_WLOCK();
	TAILQ_REMOVE(&V_in_ifaddrhead, ia, ia_link);
	if (ia->ia_addr.sin_family == AF_INET) {
	struct in_ifaddr *if_ia;

	LIST_REMOVE(ia, ia_hash);
	IN_IFADDR_WUNLOCK();
	/*
	* If this is the last IPv4 address configured on this
	* interface, leave the all-hosts group.
	* No state-change report need be transmitted.
	*/
	if_ia = NULL;
	IFP_TO_IA(ifp, if_ia);
	if (if_ia == NULL) {
	ii = ((struct in_ifinfo *)ifp->if_afdata[AF_INET]);
	IN_MULTI_LOCK();
	if (ii->ii_allhosts) {
	(void)in_leavegroup_locked(ii->ii_allhosts,
	NULL);
	ii->ii_allhosts = NULL;
	}
	IN_MULTI_UNLOCK();
	} else
	ifa_free(&if_ia->ia_ifa);
	} else
	IN_IFADDR_WUNLOCK();
	ifa_free(&ia->ia_ifa); /* in_ifaddrhead */
	out:
	if (ia != NULL)
	ifa_free(&ia->ia_ifa);
	return (error);
	}

	/*
	* SIOC[GAD]LIFADDR.
	* SIOCGLIFADDR: get first address. (?!?)
	* SIOCGLIFADDR with IFLR_PREFIX:
	* get first address that matches the specified prefix.
	* SIOCALIFADDR: add the specified address.
	* SIOCALIFADDR with IFLR_PREFIX:
	* EINVAL since we can't deduce hostid part of the address.
	* SIOCDLIFADDR: delete the specified address.
	* SIOCDLIFADDR with IFLR_PREFIX:
	* delete the first address that matches the specified prefix.
	* return values:
	* EINVAL on invalid parameters
	* EADDRNOTAVAIL on prefix match failed/specified address not found
	* other values may be returned from in_ioctl()
	*/
	static int
	in_lifaddr_ioctl(struct socket *so, u_long cmd, caddr_t data,
	struct ifnet ifp, struct thread td)
	{
	struct if_laddrreq iflr = (struct if_laddrreq )data;
	struct ifaddr *ifa;

	/* sanity checks */
	if (data == NULL \|\| ifp == NULL) {
	panic("invalid argument to in_lifaddr_ioctl");
	/NOTRECHED/
	}

	switch (cmd) {
	case SIOCGLIFADDR:
	/* address must be specified on GET with IFLR_PREFIX */
	if ((iflr->flags & IFLR_PREFIX) == 0)
	break;
	/FALLTHROUGH/
	case SIOCALIFADDR:
	case SIOCDLIFADDR:
	/* address must be specified on ADD and DELETE */
	if (iflr->addr.ss_family != AF_INET)
	return (EINVAL);
	if (iflr->addr.ss_len != sizeof(struct sockaddr_in))
	return (EINVAL);
	/* XXX need improvement */
	if (iflr->dstaddr.ss_family
	&& iflr->dstaddr.ss_family != AF_INET)
	return (EINVAL);
	if (iflr->dstaddr.ss_family
	&& iflr->dstaddr.ss_len != sizeof(struct sockaddr_in))
	return (EINVAL);
	break;
	default: /shouldn't happen/
	return (EOPNOTSUPP);
	}
	if (sizeof(struct in_addr) * 8 < iflr->prefixlen)
	return (EINVAL);

	switch (cmd) {
	case SIOCALIFADDR:
	{
	struct in_aliasreq ifra;

	if (iflr->flags & IFLR_PREFIX)
	return (EINVAL);

	/* copy args to in_aliasreq, perform ioctl(SIOCAIFADDR_IN6). */
	bzero(&ifra, sizeof(ifra));
	bcopy(iflr->iflr_name, ifra.ifra_name,
	sizeof(ifra.ifra_name));

	bcopy(&iflr->addr, &ifra.ifra_addr, iflr->addr.ss_len);

	if (iflr->dstaddr.ss_family) { /XXX/
	bcopy(&iflr->dstaddr, &ifra.ifra_dstaddr,
	iflr->dstaddr.ss_len);
	}

	ifra.ifra_mask.sin_family = AF_INET;
	ifra.ifra_mask.sin_len = sizeof(struct sockaddr_in);
	in_len2mask(&ifra.ifra_mask.sin_addr, iflr->prefixlen);

	return (in_control(so, SIOCAIFADDR, (caddr_t)&ifra, ifp, td));
	}
	case SIOCGLIFADDR:
	case SIOCDLIFADDR:
	{
	struct in_ifaddr *ia;
	struct in_addr mask, candidate, match;
	struct sockaddr_in *sin;

	bzero(&mask, sizeof(mask));
	bzero(&match, sizeof(match));
	if (iflr->flags & IFLR_PREFIX) {
	/* lookup a prefix rather than address. */
	in_len2mask(&mask, iflr->prefixlen);

	sin = (struct sockaddr_in *)&iflr->addr;
	match.s_addr = sin->sin_addr.s_addr;
	match.s_addr &= mask.s_addr;

	/* if you set extra bits, that's wrong */
	if (match.s_addr != sin->sin_addr.s_addr)
	return (EINVAL);

	} else {
	/* on getting an address, take the 1st match */
	/* on deleting an address, do exact match */
	if (cmd != SIOCGLIFADDR) {
	in_len2mask(&mask, 32);
	sin = (struct sockaddr_in *)&iflr->addr;
	match.s_addr = sin->sin_addr.s_addr;
	}
	}

	TAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) {
	if (ifa->ifa_addr->sa_family != AF_INET6)
	continue;
	if (match.s_addr == 0)
	break;
	candidate.s_addr = ((struct sockaddr_in *)&ifa->ifa_addr)->sin_addr.s_addr;
	candidate.s_addr &= mask.s_addr;
	if (candidate.s_addr == match.s_addr)
	break;
	}
	if (ifa == NULL)
	return (EADDRNOTAVAIL);
	ia = (struct in_ifaddr *)ifa;

	if (cmd == SIOCGLIFADDR) {
	/* fill in the if_laddrreq structure */
	bcopy(&ia->ia_addr, &iflr->addr, ia->ia_addr.sin_len);

	if ((ifp->if_flags & IFF_POINTOPOINT) != 0) {
	bcopy(&ia->ia_dstaddr, &iflr->dstaddr,
	ia->ia_dstaddr.sin_len);
	} else
	bzero(&iflr->dstaddr, sizeof(iflr->dstaddr));

	iflr->prefixlen =
	in_mask2len(&ia->ia_sockmask.sin_addr);

	iflr->flags = 0; /XXX/

	return (0);
	} else {
	struct in_aliasreq ifra;

	/* fill in_aliasreq and do ioctl(SIOCDIFADDR_IN6) */
	bzero(&ifra, sizeof(ifra));
	bcopy(iflr->iflr_name, ifra.ifra_name,
	sizeof(ifra.ifra_name));

	bcopy(&ia->ia_addr, &ifra.ifra_addr,
	ia->ia_addr.sin_len);
	if ((ifp->if_flags & IFF_POINTOPOINT) != 0) {
	bcopy(&ia->ia_dstaddr, &ifra.ifra_dstaddr,
	ia->ia_dstaddr.sin_len);
	}
	bcopy(&ia->ia_sockmask, &ifra.ifra_dstaddr,
	ia->ia_sockmask.sin_len);

	return (in_control(so, SIOCDIFADDR, (caddr_t)&ifra,
	ifp, td));
	}
	}
	}

	return (EOPNOTSUPP); /just for safety/
	}

	/*
	* Delete any existing route for an interface.
	*/
	void
	in_ifscrub(struct ifnet ifp, struct in_ifaddr ia)
	{

	in_scrubprefix(ia);
	}

	/*
	* Initialize an interface's internet address
	* and routing table entry.
	*/
	static int
	in_ifinit(struct ifnet ifp, struct in_ifaddr ia, struct sockaddr_in *sin,
	int scrub)
	{
	register u_long i = ntohl(sin->sin_addr.s_addr);
	struct sockaddr_in oldaddr;
	int s = splimp(), flags = RTF_UP, error = 0;

	oldaddr = ia->ia_addr;
	if (oldaddr.sin_family == AF_INET)
	LIST_REMOVE(ia, ia_hash);
	ia->ia_addr = *sin;
	if (ia->ia_addr.sin_family == AF_INET) {
	IN_IFADDR_WLOCK();
	LIST_INSERT_HEAD(INADDR_HASH(ia->ia_addr.sin_addr.s_addr),
	ia, ia_hash);
	IN_IFADDR_WUNLOCK();
	}
	/*
	* Give the interface a chance to initialize
	* if this is its first address,
	* and to validate the address if necessary.
	*/
	if (ifp->if_ioctl != NULL) {
	error = (*ifp->if_ioctl)(ifp, SIOCSIFADDR, (caddr_t)ia);
	if (error) {
	splx(s);
	/* LIST_REMOVE(ia, ia_hash) is done in in_control */
	ia->ia_addr = oldaddr;
	IN_IFADDR_WLOCK();
	if (ia->ia_addr.sin_family == AF_INET)
	LIST_INSERT_HEAD(INADDR_HASH(
	ia->ia_addr.sin_addr.s_addr), ia, ia_hash);
	else
	/*
	* If oldaddr family is not AF_INET (e.g.
	* interface has been just created) in_control
	* does not call LIST_REMOVE, and we end up
	* with bogus ia entries in hash
	*/
	LIST_REMOVE(ia, ia_hash);
	IN_IFADDR_WUNLOCK();
	return (error);
	}
	}
	splx(s);
	if (scrub) {
	ia->ia_ifa.ifa_addr = (struct sockaddr *)&oldaddr;
	in_ifscrub(ifp, ia);
	ia->ia_ifa.ifa_addr = (struct sockaddr *)&ia->ia_addr;
	}
	if (IN_CLASSA(i))
	ia->ia_netmask = IN_CLASSA_NET;
	else if (IN_CLASSB(i))
	ia->ia_netmask = IN_CLASSB_NET;
	else
	ia->ia_netmask = IN_CLASSC_NET;
	/*
	* The subnet mask usually includes at least the standard network part,
	* but may may be smaller in the case of supernetting.
	* If it is set, we believe it.
	*/
	if (ia->ia_subnetmask == 0) {
	ia->ia_subnetmask = ia->ia_netmask;
	ia->ia_sockmask.sin_addr.s_addr = htonl(ia->ia_subnetmask);
	} else
	ia->ia_netmask &= ia->ia_subnetmask;
	ia->ia_net = i & ia->ia_netmask;
	ia->ia_subnet = i & ia->ia_subnetmask;
	in_socktrim(&ia->ia_sockmask);
	#ifdef DEV_CARP
	/*
	* XXX: carp(4) does not have interface route
	*/
	if (ifp->if_type == IFT_CARP)
	return (0);
	#endif
	/*
	* Add route for the network.
	*/
	ia->ia_ifa.ifa_metric = ifp->if_metric;
	if (ifp->if_flags & IFF_BROADCAST) {
	ia->ia_broadaddr.sin_addr.s_addr =
	htonl(ia->ia_subnet \| ~ia->ia_subnetmask);
	ia->ia_netbroadcast.s_addr =
	htonl(ia->ia_net \| ~ ia->ia_netmask);
	} else if (ifp->if_flags & IFF_LOOPBACK) {
	ia->ia_dstaddr = ia->ia_addr;
	flags \|= RTF_HOST;
	} else if (ifp->if_flags & IFF_POINTOPOINT) {
	if (ia->ia_dstaddr.sin_family != AF_INET)
	return (0);
	flags \|= RTF_HOST;
	}
	if ((error = in_addprefix(ia, flags)) != 0)
	return (error);

	if (ia->ia_addr.sin_addr.s_addr == INADDR_ANY)
	return (0);

	if (ifp->if_flags & IFF_POINTOPOINT) {
	if (ia->ia_dstaddr.sin_addr.s_addr == ia->ia_addr.sin_addr.s_addr)
	return (0);
	}


	/*
	* add a loopback route to self
	*/
	if (V_useloopback && !(ifp->if_flags & IFF_LOOPBACK)) {
	struct route ia_ro;

	bzero(&ia_ro, sizeof(ia_ro));
	((struct sockaddr_in )(&ia_ro.ro_dst)) = ia->ia_addr;
	rtalloc_ign_fib(&ia_ro, 0, 0);
	if ((ia_ro.ro_rt != NULL) && (ia_ro.ro_rt->rt_ifp != NULL) &&
	(ia_ro.ro_rt->rt_ifp == V_loif)) {
	RT_LOCK(ia_ro.ro_rt);
	RT_ADDREF(ia_ro.ro_rt);
	RTFREE_LOCKED(ia_ro.ro_rt);
	} else
	error = ifa_add_loopback_route((struct ifaddr *)ia,
	(struct sockaddr *)&ia->ia_addr);
	if (error == 0)
	ia->ia_flags \|= IFA_RTSELF;
	if (ia_ro.ro_rt != NULL)
	RTFREE(ia_ro.ro_rt);
	}

	return (error);
	}

	#define rtinitflags(x) \
	((((x)->ia_ifp->if_flags & (IFF_LOOPBACK \| IFF_POINTOPOINT)) != 0) \
	? RTF_HOST : 0)

	/*
	* Generate a routing message when inserting or deleting
	* an interface address alias.
	*/
	static void in_addralias_rtmsg(int cmd, struct in_addr *prefix,
	struct in_ifaddr *target)
	{
	struct route pfx_ro;
	struct sockaddr_in *pfx_addr;
	struct rtentry msg_rt;

	/* QL: XXX
	* This is a bit questionable because there is no
	* additional route entry added/deleted for an address
	* alias. Therefore this route report is inaccurate.
	*/
	bzero(&pfx_ro, sizeof(pfx_ro));
	pfx_addr = (struct sockaddr_in *)(&pfx_ro.ro_dst);
	pfx_addr->sin_len = sizeof(*pfx_addr);
	pfx_addr->sin_family = AF_INET;
	pfx_addr->sin_addr = *prefix;
	rtalloc_ign_fib(&pfx_ro, 0, 0);
	if (pfx_ro.ro_rt != NULL) {
	msg_rt = *pfx_ro.ro_rt;

	/* QL: XXX
	* Point the gateway to the new interface
	* address as if a new prefix route entry has
	* been added through the new address alias.
	* All other parts of the rtentry is accurate,
	* e.g., rt_key, rt_mask, rt_ifp etc.
	*/
	msg_rt.rt_gateway =
	(struct sockaddr *)&target->ia_addr;
	rt_newaddrmsg(cmd,
	(struct ifaddr *)target,
	0, &msg_rt);
	RTFREE(pfx_ro.ro_rt);
	}
	return;
	}

	/*
	* Check if we have a route for the given prefix already or add one accordingly.
	*/
	static int
	in_addprefix(struct in_ifaddr *target, int flags)
	{
	struct in_ifaddr *ia;
	struct in_addr prefix, mask, p, m;
	int error;

	if ((flags & RTF_HOST) != 0) {
	prefix = target->ia_dstaddr.sin_addr;
	mask.s_addr = 0;
	} else {
	prefix = target->ia_addr.sin_addr;
	mask = target->ia_sockmask.sin_addr;
	prefix.s_addr &= mask.s_addr;
	}

	IN_IFADDR_RLOCK();
	TAILQ_FOREACH(ia, &V_in_ifaddrhead, ia_link) {
	if (rtinitflags(ia)) {
	p = ia->ia_addr.sin_addr;

	if (prefix.s_addr != p.s_addr)
	continue;
	} else {
	p = ia->ia_addr.sin_addr;
	m = ia->ia_sockmask.sin_addr;
	p.s_addr &= m.s_addr;

	if (prefix.s_addr != p.s_addr \|\|
	mask.s_addr != m.s_addr)
	continue;
	}

	/*
	* If we got a matching prefix route inserted by other
	* interface address, we are done here.
	*/
	if (ia->ia_flags & IFA_ROUTE) {
	#ifdef RADIX_MPATH
	if (ia->ia_addr.sin_addr.s_addr ==
	target->ia_addr.sin_addr.s_addr)
	return (EEXIST);
	else
	break;
	#endif
	if (V_sameprefixcarponly &&
	target->ia_ifp->if_type != IFT_CARP &&
	ia->ia_ifp->if_type != IFT_CARP) {
	IN_IFADDR_RUNLOCK();
	return (EEXIST);
	} else {
	in_addralias_rtmsg(RTM_ADD, &prefix, target);
	IN_IFADDR_RUNLOCK();
	return (0);
	}
	}
	}
	IN_IFADDR_RUNLOCK();

	/*
	* No-one seem to have this prefix route, so we try to insert it.
	*/
	error = rtinit(&target->ia_ifa, (int)RTM_ADD, flags);
	if (!error)
	target->ia_flags \|= IFA_ROUTE;
	return (error);
	}

	extern void arp_ifscrub(struct ifnet *ifp, uint32_t addr);

	/*
	* If there is no other address in the system that can serve a route to the
	* same prefix, remove the route. Hand over the route to the new address
	* otherwise.
	*/
	static int
	in_scrubprefix(struct in_ifaddr *target)
	{
	struct in_ifaddr *ia;
	struct in_addr prefix, mask, p;
	int error = 0;
	struct sockaddr_in prefix0, mask0;

	/*
	* Remove the loopback route to the interface address.
	* The "useloopback" setting is not consulted because if the
	* user configures an interface address, turns off this
	* setting, and then tries to delete that interface address,
	* checking the current setting of "useloopback" would leave
	* that interface address loopback route untouched, which
	* would be wrong. Therefore the interface address loopback route
	* deletion is unconditional.
	*/
	if ((target->ia_addr.sin_addr.s_addr != INADDR_ANY) &&
	!(target->ia_ifp->if_flags & IFF_LOOPBACK) &&
	(target->ia_flags & IFA_RTSELF)) {
	struct route ia_ro;
	int freeit = 0;

	bzero(&ia_ro, sizeof(ia_ro));
	((struct sockaddr_in )(&ia_ro.ro_dst)) = target->ia_addr;
	rtalloc_ign_fib(&ia_ro, 0, 0);
	if ((ia_ro.ro_rt != NULL) && (ia_ro.ro_rt->rt_ifp != NULL) &&
	(ia_ro.ro_rt->rt_ifp == V_loif)) {
	RT_LOCK(ia_ro.ro_rt);
	if (ia_ro.ro_rt->rt_refcnt <= 1)
	freeit = 1;
	else
	RT_REMREF(ia_ro.ro_rt);
	RTFREE_LOCKED(ia_ro.ro_rt);
	}
	if (freeit)
	error = ifa_del_loopback_route((struct ifaddr *)target,
	(struct sockaddr *)&target->ia_addr);
	if (error == 0)
	target->ia_flags &= ~IFA_RTSELF;
	/* remove arp cache */
	arp_ifscrub(target->ia_ifp, IA_SIN(target)->sin_addr.s_addr);
	}

	if (rtinitflags(target))
	prefix = target->ia_dstaddr.sin_addr;
	else {
	prefix = target->ia_addr.sin_addr;
	mask = target->ia_sockmask.sin_addr;
	prefix.s_addr &= mask.s_addr;
	}

	if ((target->ia_flags & IFA_ROUTE) == 0) {
	in_addralias_rtmsg(RTM_DELETE, &prefix, target);
	return (0);
	}

	IN_IFADDR_RLOCK();
	TAILQ_FOREACH(ia, &V_in_ifaddrhead, ia_link) {
	if (rtinitflags(ia))
	p = ia->ia_dstaddr.sin_addr;
	else {
	p = ia->ia_addr.sin_addr;
	p.s_addr &= ia->ia_sockmask.sin_addr.s_addr;
	}

	if (prefix.s_addr != p.s_addr)
	continue;

	/*
	* If we got a matching prefix address, move IFA_ROUTE and
	* the route itself to it. Make sure that routing daemons
	* get a heads-up.
	*
	* XXX: a special case for carp(4) interface
	*/
	if ((ia->ia_flags & IFA_ROUTE) == 0
	#ifdef DEV_CARP
	&& (ia->ia_ifp->if_type != IFT_CARP)
	#endif
	) {
	IN_IFADDR_RUNLOCK();
	rtinit(&(target->ia_ifa), (int)RTM_DELETE,
	rtinitflags(target));
	target->ia_flags &= ~IFA_ROUTE;

	error = rtinit(&ia->ia_ifa, (int)RTM_ADD,
	rtinitflags(ia) \| RTF_UP);
	if (error == 0)
	ia->ia_flags \|= IFA_ROUTE;
	return (error);
	}
	}
	IN_IFADDR_RUNLOCK();

	/*
	* remove all L2 entries on the given prefix
	*/
	bzero(&prefix0, sizeof(prefix0));
	prefix0.sin_len = sizeof(prefix0);
	prefix0.sin_family = AF_INET;
	prefix0.sin_addr.s_addr = target->ia_subnet;
	bzero(&mask0, sizeof(mask0));
	mask0.sin_len = sizeof(mask0);
	mask0.sin_family = AF_INET;
	mask0.sin_addr.s_addr = target->ia_subnetmask;
	lltable_prefix_free(AF_INET, (struct sockaddr *)&prefix0,
	(struct sockaddr *)&mask0);

	/*
	* As no-one seem to have this prefix, we can remove the route.
	*/
	rtinit(&(target->ia_ifa), (int)RTM_DELETE, rtinitflags(target));
	target->ia_flags &= ~IFA_ROUTE;
	return (0);
	}

	#undef rtinitflags

	/*
	* Return 1 if the address might be a local broadcast address.
	*/
	int
	in_broadcast(struct in_addr in, struct ifnet *ifp)
	{
	register struct ifaddr *ifa;
	u_long t;

	if (in.s_addr == INADDR_BROADCAST \|\|
	in.s_addr == INADDR_ANY)
	return (1);
	if ((ifp->if_flags & IFF_BROADCAST) == 0)
	return (0);
	t = ntohl(in.s_addr);
	/*
	* Look through the list of addresses for a match
	* with a broadcast address.
	*/
	#define ia ((struct in_ifaddr *)ifa)
	TAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link)
	if (ifa->ifa_addr->sa_family == AF_INET &&
	(in.s_addr == ia->ia_broadaddr.sin_addr.s_addr \|\|
	in.s_addr == ia->ia_netbroadcast.s_addr \|\|
	/*
	* Check for old-style (host 0) broadcast.
	*/
	t == ia->ia_subnet \|\| t == ia->ia_net) &&
	/*
	* Check for an all one subnetmask. These
	* only exist when an interface gets a secondary
	* address.
	*/
	ia->ia_subnetmask != (u_long)0xffffffff)
	return (1);
	return (0);
	#undef ia
	}

	/*
	* On interface removal, clean up IPv4 data structures hung off of the ifnet.
	*/
	void
	in_ifdetach(struct ifnet *ifp)
	{

	in_pcbpurgeif0(&V_ripcbinfo, ifp);
	in_pcbpurgeif0(&V_udbinfo, ifp);
	in_purgemaddrs(ifp);
	}

	/*
	* Delete all IPv4 multicast address records, and associated link-layer
	* multicast address records, associated with ifp.
	* XXX It looks like domifdetach runs AFTER the link layer cleanup.
	* XXX This should not race with ifma_protospec being set during
	* a new allocation, if it does, we have bigger problems.
	*/
	static void
	in_purgemaddrs(struct ifnet *ifp)
	{
	LIST_HEAD(,in_multi) purgeinms;
	struct in_multi inm, tinm;
	struct ifmultiaddr *ifma;

	LIST_INIT(&purgeinms);
	IN_MULTI_LOCK();

	/*
	* Extract list of in_multi associated with the detaching ifp
	* which the PF_INET layer is about to release.
	* We need to do this as IF_ADDR_LOCK() may be re-acquired
	* by code further down.
	*/
	IF_ADDR_LOCK(ifp);
	TAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) {
	if (ifma->ifma_addr->sa_family != AF_INET \|\|
	ifma->ifma_protospec == NULL)
	continue;
	#if 0
	KASSERT(ifma->ifma_protospec != NULL,
	("%s: ifma_protospec is NULL", __func__));
	#endif
	inm = (struct in_multi *)ifma->ifma_protospec;
	LIST_INSERT_HEAD(&purgeinms, inm, inm_link);
	}
	IF_ADDR_UNLOCK(ifp);

	LIST_FOREACH_SAFE(inm, &purgeinms, inm_link, tinm) {
	LIST_REMOVE(inm, inm_link);
	inm_release_locked(inm);
	}
	igmp_ifdetach(ifp);

	IN_MULTI_UNLOCK();
	}

	#include <net/if_dl.h>
	#include <netinet/if_ether.h>

	struct in_llentry {
	struct llentry base;
	struct sockaddr_in l3_addr4;
	};

	static struct llentry *
	in_lltable_new(const struct sockaddr *l3addr, u_int flags)
	{
	struct in_llentry *lle;

	lle = malloc(sizeof(struct in_llentry), M_LLTABLE, M_DONTWAIT \| M_ZERO);
	if (lle == NULL) /* NB: caller generates msg */
	return NULL;

	callout_init(&lle->base.la_timer, CALLOUT_MPSAFE);
	/*
	* For IPv4 this will trigger "arpresolve" to generate
	* an ARP request.
	*/
	lle->base.la_expire = time_second; /* mark expired */
	lle->l3_addr4 = (const struct sockaddr_in )l3addr;
	lle->base.lle_refcnt = 1;
	LLE_LOCK_INIT(&lle->base);
	return &lle->base;
	}

	/*
	* Deletes an address from the address table.
	* This function is called by the timer functions
	* such as arptimer() and nd6_llinfo_timer(), and
	* the caller does the locking.
	*/
	static void
	in_lltable_free(struct lltable llt, struct llentry lle)
	{
	LLE_WUNLOCK(lle);
	LLE_LOCK_DESTROY(lle);
	free(lle, M_LLTABLE);
	}


	#define IN_ARE_MASKED_ADDR_EQUAL(d, a, m) ( \
	(((ntohl((d)->sin_addr.s_addr) ^ (a)->sin_addr.s_addr) & (m)->sin_addr.s_addr)) == 0 )

	static void
	in_lltable_prefix_free(struct lltable *llt,
	const struct sockaddr *prefix,
	const struct sockaddr *mask)
	{
	const struct sockaddr_in pfx = (const struct sockaddr_in )prefix;
	const struct sockaddr_in msk = (const struct sockaddr_in )mask;
	struct llentry lle, next;
	register int i;

	for (i=0; i < LLTBL_HASHTBL_SIZE; i++) {
	LIST_FOREACH_SAFE(lle, &llt->lle_head[i], lle_next, next) {

	if (IN_ARE_MASKED_ADDR_EQUAL((struct sockaddr_in *)L3_ADDR(lle),
	pfx, msk)) {
	int canceled;

	canceled = callout_drain(&lle->la_timer);
	LLE_WLOCK(lle);
	if (canceled)
	LLE_REMREF(lle);
	llentry_free(lle);
	}
	}
	}
	}


	static int
	in_lltable_rtcheck(struct ifnet ifp, u_int flags, const struct sockaddr l3addr)
	{
	struct rtentry *rt;

	KASSERT(l3addr->sa_family == AF_INET,
	("sin_family %d", l3addr->sa_family));

	/* XXX rtalloc1 should take a const param */
	rt = rtalloc1(__DECONST(struct sockaddr *, l3addr), 0, 0);
	- if (rt == NULL \|\| (rt->rt_flags & RTF_GATEWAY) \|\|
	- ((rt->rt_ifp != ifp) && !(flags & LLE_PUB))) {
	+ if (rt == NULL \|\| (!(flags & LLE_PUB) &&
	+ ((rt->rt_flags & RTF_GATEWAY) \|\|
	+ (rt->rt_ifp != ifp)))) {
	#ifdef DIAGNOSTIC
	log(LOG_INFO, "IPv4 address: \"%s\" is not on the network\n",
	inet_ntoa(((const struct sockaddr_in *)l3addr)->sin_addr));
	#endif
	if (rt != NULL)
	RTFREE_LOCKED(rt);
	return (EINVAL);
	}
	RTFREE_LOCKED(rt);
	return 0;
	}

	/*
	* Return NULL if not found or marked for deletion.
	* If found return lle read locked.
	*/
	static struct llentry *
	in_lltable_lookup(struct lltable llt, u_int flags, const struct sockaddr l3addr)
	{
	const struct sockaddr_in sin = (const struct sockaddr_in )l3addr;
	struct ifnet *ifp = llt->llt_ifp;
	struct llentry *lle;
	struct llentries *lleh;
	u_int hashkey;

	IF_AFDATA_LOCK_ASSERT(ifp);
	KASSERT(l3addr->sa_family == AF_INET,
	("sin_family %d", l3addr->sa_family));

	hashkey = sin->sin_addr.s_addr;
	lleh = &llt->lle_head[LLATBL_HASH(hashkey, LLTBL_HASHMASK)];
	LIST_FOREACH(lle, lleh, lle_next) {
	struct sockaddr_in sa2 = (struct sockaddr_in )L3_ADDR(lle);
	if (lle->la_flags & LLE_DELETED)
	continue;
	if (sa2->sin_addr.s_addr == sin->sin_addr.s_addr)
	break;
	}
	if (lle == NULL) {
	#ifdef DIAGNOSTIC
	if (flags & LLE_DELETE)
	log(LOG_INFO, "interface address is missing from cache = %p in delete\n", lle);
	#endif
	if (!(flags & LLE_CREATE))
	return (NULL);
	/*
	* A route that covers the given address must have
	* been installed 1st because we are doing a resolution,
	* verify this.
	*/
	if (!(flags & LLE_IFADDR) &&
	in_lltable_rtcheck(ifp, flags, l3addr) != 0)
	goto done;

	lle = in_lltable_new(l3addr, flags);
	if (lle == NULL) {
	log(LOG_INFO, "lla_lookup: new lle malloc failed\n");
	goto done;
	}
	lle->la_flags = flags & ~LLE_CREATE;
	if ((flags & (LLE_CREATE \| LLE_IFADDR)) == (LLE_CREATE \| LLE_IFADDR)) {
	bcopy(IF_LLADDR(ifp), &lle->ll_addr, ifp->if_addrlen);
	lle->la_flags \|= (LLE_VALID \| LLE_STATIC);
	}

	lle->lle_tbl = llt;
	lle->lle_head = lleh;
	LIST_INSERT_HEAD(lleh, lle, lle_next);
	} else if (flags & LLE_DELETE) {
	if (!(lle->la_flags & LLE_IFADDR) \|\| (flags & LLE_IFADDR)) {
	LLE_WLOCK(lle);
	lle->la_flags = LLE_DELETED;
	LLE_WUNLOCK(lle);
	#ifdef DIAGNOSTIC
	log(LOG_INFO, "ifaddr cache = %p is deleted\n", lle);
	#endif
	}
	lle = (void *)-1;

	}
	if (LLE_IS_VALID(lle)) {
	if (flags & LLE_EXCLUSIVE)
	LLE_WLOCK(lle);
	else
	LLE_RLOCK(lle);
	}
	done:
	return (lle);
	}

	static int
	in_lltable_dump(struct lltable llt, struct sysctl_req wr)
	{
	#define SIN(lle) ((struct sockaddr_in *) L3_ADDR(lle))
	struct ifnet *ifp = llt->llt_ifp;
	struct llentry *lle;
	/* XXX stack use */
	struct {
	struct rt_msghdr rtm;
	struct sockaddr_inarp sin;
	struct sockaddr_dl sdl;
	} arpc;
	int error, i;

	LLTABLE_LOCK_ASSERT();

	error = 0;
	for (i = 0; i < LLTBL_HASHTBL_SIZE; i++) {
	LIST_FOREACH(lle, &llt->lle_head[i], lle_next) {
	struct sockaddr_dl *sdl;

	/* skip deleted entries */
	if ((lle->la_flags & LLE_DELETED) == LLE_DELETED)
	continue;
	/* Skip if jailed and not a valid IP of the prison. */
	if (prison_if(wr->td->td_ucred, L3_ADDR(lle)) != 0)
	continue;
	/*
	* produce a msg made of:
	* struct rt_msghdr;
	* struct sockaddr_inarp; (IPv4)
	* struct sockaddr_dl;
	*/
	bzero(&arpc, sizeof(arpc));
	arpc.rtm.rtm_msglen = sizeof(arpc);
	arpc.rtm.rtm_version = RTM_VERSION;
	arpc.rtm.rtm_type = RTM_GET;
	arpc.rtm.rtm_flags = RTF_UP;
	arpc.rtm.rtm_addrs = RTA_DST \| RTA_GATEWAY;
	arpc.sin.sin_family = AF_INET;
	arpc.sin.sin_len = sizeof(arpc.sin);
	arpc.sin.sin_addr.s_addr = SIN(lle)->sin_addr.s_addr;

	/* publish */
	if (lle->la_flags & LLE_PUB) {
	arpc.rtm.rtm_flags \|= RTF_ANNOUNCE;
	/* proxy only */
	if (lle->la_flags & LLE_PROXY)
	arpc.sin.sin_other = SIN_PROXY;
	}

	sdl = &arpc.sdl;
	sdl->sdl_family = AF_LINK;
	sdl->sdl_len = sizeof(*sdl);
	sdl->sdl_index = ifp->if_index;
	sdl->sdl_type = ifp->if_type;
	if ((lle->la_flags & LLE_VALID) == LLE_VALID) {
	sdl->sdl_alen = ifp->if_addrlen;
	bcopy(&lle->ll_addr, LLADDR(sdl), ifp->if_addrlen);
	} else {
	sdl->sdl_alen = 0;
	bzero(LLADDR(sdl), ifp->if_addrlen);
	}

	arpc.rtm.rtm_rmx.rmx_expire =
	lle->la_flags & LLE_STATIC ? 0 : lle->la_expire;
	arpc.rtm.rtm_flags \|= (RTF_HOST \| RTF_LLDATA);
	if (lle->la_flags & LLE_STATIC)
	arpc.rtm.rtm_flags \|= RTF_STATIC;
	arpc.rtm.rtm_index = ifp->if_index;
	error = SYSCTL_OUT(wr, &arpc, sizeof(arpc));
	if (error)
	break;
	}
	}
	return error;
	#undef SIN
	}

	void *
	in_domifattach(struct ifnet *ifp)
	{
	struct in_ifinfo *ii;
	struct lltable *llt;

	ii = malloc(sizeof(struct in_ifinfo), M_IFADDR, M_WAITOK\|M_ZERO);

	llt = lltable_init(ifp, AF_INET);
	if (llt != NULL) {
	llt->llt_new = in_lltable_new;
	llt->llt_free = in_lltable_free;
	llt->llt_prefix_free = in_lltable_prefix_free;
	llt->llt_rtcheck = in_lltable_rtcheck;
	llt->llt_lookup = in_lltable_lookup;
	llt->llt_dump = in_lltable_dump;
	}
	ii->ii_llt = llt;

	ii->ii_igmp = igmp_domifattach(ifp);

	return ii;
	}

	void
	in_domifdetach(struct ifnet ifp, void aux)
	{
	struct in_ifinfo ii = (struct in_ifinfo )aux;

	igmp_domifdetach(ifp);
	lltable_free(ii->ii_llt);
	free(ii, M_IFADDR);
	}
	Index: stable/8/sys/netinet/in_pcb.c
	===================================================================
	--- stable/8/sys/netinet/in_pcb.c (revision 209276)
	+++ stable/8/sys/netinet/in_pcb.c (revision 209277)
	@@ -1,1957 +1,1957 @@
	/*-
	* Copyright (c) 1982, 1986, 1991, 1993, 1995
	* The Regents of the University of California.
	* Copyright (c) 2007-2009 Robert N. M. Watson
	* All rights reserved.
	*
	* Redistribution and use in source and binary forms, with or without
	* modification, are permitted provided that the following conditions
	* are met:
	* 1. Redistributions of source code must retain the above copyright
	* notice, this list of conditions and the following disclaimer.
	* 2. Redistributions in binary form must reproduce the above copyright
	* notice, this list of conditions and the following disclaimer in the
	* documentation and/or other materials provided with the distribution.
	* 4. Neither the name of the University nor the names of its contributors
	* may be used to endorse or promote products derived from this software
	* without specific prior written permission.
	*
	* THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
	* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
	* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
	* ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
	* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
	* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
	* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
	* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
	* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
	* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
	* SUCH DAMAGE.
	*
	* @(#)in_pcb.c 8.4 (Berkeley) 5/24/95
	*/

	#include <sys/cdefs.h>
	__FBSDID("$FreeBSD$");

	#include "opt_ddb.h"
	#include "opt_ipsec.h"
	#include "opt_inet6.h"

	#include <sys/param.h>
	#include <sys/systm.h>
	#include <sys/malloc.h>
	#include <sys/mbuf.h>
	#include <sys/domain.h>
	#include <sys/protosw.h>
	#include <sys/socket.h>
	#include <sys/socketvar.h>
	#include <sys/priv.h>
	#include <sys/proc.h>
	#include <sys/jail.h>
	#include <sys/kernel.h>
	#include <sys/sysctl.h>

	#ifdef DDB
	#include <ddb/ddb.h>
	#endif

	#include <vm/uma.h>

	#include <net/if.h>
	#include <net/if_types.h>
	#include <net/route.h>
	#include <net/vnet.h>

	#include <netinet/in.h>
	#include <netinet/in_pcb.h>
	#include <netinet/in_var.h>
	#include <netinet/ip_var.h>
	#include <netinet/tcp_var.h>
	#include <netinet/udp.h>
	#include <netinet/udp_var.h>
	#ifdef INET6
	#include <netinet/ip6.h>
	#include <netinet6/ip6_var.h>
	#endif /* INET6 */


	#ifdef IPSEC
	#include <netipsec/ipsec.h>
	#include <netipsec/key.h>
	#endif /* IPSEC */

	#include <security/mac/mac_framework.h>

	/*
	* These configure the range of local port addresses assigned to
	* "unspecified" outgoing connections/packets/whatever.
	*/
	VNET_DEFINE(int, ipport_lowfirstauto) = IPPORT_RESERVED - 1; /* 1023 */
	VNET_DEFINE(int, ipport_lowlastauto) = IPPORT_RESERVEDSTART; /* 600 */
	VNET_DEFINE(int, ipport_firstauto) = IPPORT_EPHEMERALFIRST; /* 10000 */
	VNET_DEFINE(int, ipport_lastauto) = IPPORT_EPHEMERALLAST; /* 65535 */
	VNET_DEFINE(int, ipport_hifirstauto) = IPPORT_HIFIRSTAUTO; /* 49152 */
	VNET_DEFINE(int, ipport_hilastauto) = IPPORT_HILASTAUTO; /* 65535 */

	/*
	* Reserved ports accessible only to root. There are significant
	* security considerations that must be accounted for when changing these,
	* but the security benefits can be great. Please be careful.
	*/
	VNET_DEFINE(int, ipport_reservedhigh) = IPPORT_RESERVED - 1; /* 1023 */
	VNET_DEFINE(int, ipport_reservedlow);

	/* Variables dealing with random ephemeral port allocation. */
	VNET_DEFINE(int, ipport_randomized) = 1; /* user controlled via sysctl */
	VNET_DEFINE(int, ipport_randomcps) = 10; /* user controlled via sysctl */
	VNET_DEFINE(int, ipport_randomtime) = 45; /* user controlled via sysctl */
	VNET_DEFINE(int, ipport_stoprandom); /* toggled by ipport_tick */
	VNET_DEFINE(int, ipport_tcpallocs);
	static VNET_DEFINE(int, ipport_tcplastcount);

	#define V_ipport_tcplastcount VNET(ipport_tcplastcount)

	#define RANGECHK(var, min, max) \
	if ((var) < (min)) { (var) = (min); } \
	else if ((var) > (max)) { (var) = (max); }

	static void in_pcbremlists(struct inpcb *inp);

	static int
	sysctl_net_ipport_check(SYSCTL_HANDLER_ARGS)
	{
	int error;

	#ifdef VIMAGE
	error = vnet_sysctl_handle_int(oidp, arg1, arg2, req);
	#else
	error = sysctl_handle_int(oidp, arg1, arg2, req);
	#endif
	if (error == 0) {
	RANGECHK(V_ipport_lowfirstauto, 1, IPPORT_RESERVED - 1);
	RANGECHK(V_ipport_lowlastauto, 1, IPPORT_RESERVED - 1);
	RANGECHK(V_ipport_firstauto, IPPORT_RESERVED, IPPORT_MAX);
	RANGECHK(V_ipport_lastauto, IPPORT_RESERVED, IPPORT_MAX);
	RANGECHK(V_ipport_hifirstauto, IPPORT_RESERVED, IPPORT_MAX);
	RANGECHK(V_ipport_hilastauto, IPPORT_RESERVED, IPPORT_MAX);
	}
	return (error);
	}

	#undef RANGECHK

	SYSCTL_NODE(_net_inet_ip, IPPROTO_IP, portrange, CTLFLAG_RW, 0, "IP Ports");

	SYSCTL_VNET_PROC(_net_inet_ip_portrange, OID_AUTO, lowfirst,
	CTLTYPE_INT\|CTLFLAG_RW, &VNET_NAME(ipport_lowfirstauto), 0,
	&sysctl_net_ipport_check, "I", "");
	SYSCTL_VNET_PROC(_net_inet_ip_portrange, OID_AUTO, lowlast,
	CTLTYPE_INT\|CTLFLAG_RW, &VNET_NAME(ipport_lowlastauto), 0,
	&sysctl_net_ipport_check, "I", "");
	SYSCTL_VNET_PROC(_net_inet_ip_portrange, OID_AUTO, first,
	CTLTYPE_INT\|CTLFLAG_RW, &VNET_NAME(ipport_firstauto), 0,
	&sysctl_net_ipport_check, "I", "");
	SYSCTL_VNET_PROC(_net_inet_ip_portrange, OID_AUTO, last,
	CTLTYPE_INT\|CTLFLAG_RW, &VNET_NAME(ipport_lastauto), 0,
	&sysctl_net_ipport_check, "I", "");
	SYSCTL_VNET_PROC(_net_inet_ip_portrange, OID_AUTO, hifirst,
	CTLTYPE_INT\|CTLFLAG_RW, &VNET_NAME(ipport_hifirstauto), 0,
	&sysctl_net_ipport_check, "I", "");
	SYSCTL_VNET_PROC(_net_inet_ip_portrange, OID_AUTO, hilast,
	CTLTYPE_INT\|CTLFLAG_RW, &VNET_NAME(ipport_hilastauto), 0,
	&sysctl_net_ipport_check, "I", "");
	SYSCTL_VNET_INT(_net_inet_ip_portrange, OID_AUTO, reservedhigh,
	CTLFLAG_RW\|CTLFLAG_SECURE, &VNET_NAME(ipport_reservedhigh), 0, "");
	SYSCTL_VNET_INT(_net_inet_ip_portrange, OID_AUTO, reservedlow,
	CTLFLAG_RW\|CTLFLAG_SECURE, &VNET_NAME(ipport_reservedlow), 0, "");
	SYSCTL_VNET_INT(_net_inet_ip_portrange, OID_AUTO, randomized, CTLFLAG_RW,
	&VNET_NAME(ipport_randomized), 0, "Enable random port allocation");
	SYSCTL_VNET_INT(_net_inet_ip_portrange, OID_AUTO, randomcps, CTLFLAG_RW,
	&VNET_NAME(ipport_randomcps), 0, "Maximum number of random port "
	"allocations before switching to a sequental one");
	SYSCTL_VNET_INT(_net_inet_ip_portrange, OID_AUTO, randomtime, CTLFLAG_RW,
	&VNET_NAME(ipport_randomtime), 0,
	"Minimum time to keep sequental port "
	"allocation before switching to a random one");

	/*
	* in_pcb.c: manage the Protocol Control Blocks.
	*
	* NOTE: It is assumed that most of these functions will be called with
	* the pcbinfo lock held, and often, the inpcb lock held, as these utility
	* functions often modify hash chains or addresses in pcbs.
	*/

	/*
	* Allocate a PCB and associate it with the socket.
	* On success return with the PCB locked.
	*/
	int
	in_pcballoc(struct socket so, struct inpcbinfo pcbinfo)
	{
	struct inpcb *inp;
	int error;

	INP_INFO_WLOCK_ASSERT(pcbinfo);
	error = 0;
	inp = uma_zalloc(pcbinfo->ipi_zone, M_NOWAIT);
	if (inp == NULL)
	return (ENOBUFS);
	bzero(inp, inp_zero_size);
	inp->inp_pcbinfo = pcbinfo;
	inp->inp_socket = so;
	inp->inp_cred = crhold(so->so_cred);
	inp->inp_inc.inc_fibnum = so->so_fibnum;
	#ifdef MAC
	error = mac_inpcb_init(inp, M_NOWAIT);
	if (error != 0)
	goto out;
	mac_inpcb_create(so, inp);
	#endif
	#ifdef IPSEC
	error = ipsec_init_policy(so, &inp->inp_sp);
	if (error != 0) {
	#ifdef MAC
	mac_inpcb_destroy(inp);
	#endif
	goto out;
	}
	#endif /IPSEC/
	#ifdef INET6
	if (INP_SOCKAF(so) == AF_INET6) {
	inp->inp_vflag \|= INP_IPV6PROTO;
	if (V_ip6_v6only)
	inp->inp_flags \|= IN6P_IPV6_V6ONLY;
	}
	#endif
	LIST_INSERT_HEAD(pcbinfo->ipi_listhead, inp, inp_list);
	pcbinfo->ipi_count++;
	so->so_pcb = (caddr_t)inp;
	#ifdef INET6
	if (V_ip6_auto_flowlabel)
	inp->inp_flags \|= IN6P_AUTOFLOWLABEL;
	#endif
	INP_WLOCK(inp);
	inp->inp_gencnt = ++pcbinfo->ipi_gencnt;
	inp->inp_refcount = 1; /* Reference from the inpcbinfo */
	#if defined(IPSEC) \|\| defined(MAC)
	out:
	if (error != 0) {
	crfree(inp->inp_cred);
	uma_zfree(pcbinfo->ipi_zone, inp);
	}
	#endif
	return (error);
	}

	int
	in_pcbbind(struct inpcb inp, struct sockaddr nam, struct ucred *cred)
	{
	int anonport, error;

	INP_INFO_WLOCK_ASSERT(inp->inp_pcbinfo);
	INP_WLOCK_ASSERT(inp);

	if (inp->inp_lport != 0 \|\| inp->inp_laddr.s_addr != INADDR_ANY)
	return (EINVAL);
	anonport = inp->inp_lport == 0 && (nam == NULL \|\|
	((struct sockaddr_in *)nam)->sin_port == 0);
	error = in_pcbbind_setup(inp, nam, &inp->inp_laddr.s_addr,
	&inp->inp_lport, cred);
	if (error)
	return (error);
	if (in_pcbinshash(inp) != 0) {
	inp->inp_laddr.s_addr = INADDR_ANY;
	inp->inp_lport = 0;
	return (EAGAIN);
	}
	if (anonport)
	inp->inp_flags \|= INP_ANONPORT;
	return (0);
	}

	/*
	* Set up a bind operation on a PCB, performing port allocation
	* as required, but do not actually modify the PCB. Callers can
	* either complete the bind by setting inp_laddr/inp_lport and
	* calling in_pcbinshash(), or they can just use the resulting
	* port and address to authorise the sending of a once-off packet.
	*
	* On error, the values of laddrp and lportp are not changed.
	*/
	int
	in_pcbbind_setup(struct inpcb inp, struct sockaddr nam, in_addr_t *laddrp,
	u_short lportp, struct ucred cred)
	{
	struct socket *so = inp->inp_socket;
	unsigned short *lastport;
	struct sockaddr_in *sin;
	struct inpcbinfo *pcbinfo = inp->inp_pcbinfo;
	struct in_addr laddr;
	u_short lport = 0;
	int wild = 0, reuseport = (so->so_options & SO_REUSEPORT);
	int error;
	int dorandom;

	/*
	* Because no actual state changes occur here, a global write lock on
	* the pcbinfo isn't required.
	*/
	INP_INFO_LOCK_ASSERT(pcbinfo);
	INP_LOCK_ASSERT(inp);

	if (TAILQ_EMPTY(&V_in_ifaddrhead)) /* XXX broken! */
	return (EADDRNOTAVAIL);
	laddr.s_addr = *laddrp;
	if (nam != NULL && laddr.s_addr != INADDR_ANY)
	return (EINVAL);
	if ((so->so_options & (SO_REUSEADDR\|SO_REUSEPORT)) == 0)
	wild = INPLOOKUP_WILDCARD;
	if (nam == NULL) {
	if ((error = prison_local_ip4(cred, &laddr)) != 0)
	return (error);
	} else {
	sin = (struct sockaddr_in *)nam;
	if (nam->sa_len != sizeof (*sin))
	return (EINVAL);
	#ifdef notdef
	/*
	* We should check the family, but old programs
	* incorrectly fail to initialize it.
	*/
	if (sin->sin_family != AF_INET)
	return (EAFNOSUPPORT);
	#endif
	error = prison_local_ip4(cred, &sin->sin_addr);
	if (error)
	return (error);
	if (sin->sin_port != *lportp) {
	/* Don't allow the port to change. */
	if (*lportp != 0)
	return (EINVAL);
	lport = sin->sin_port;
	}
	/* NB: lport is left as 0 if the port isn't being changed. */
	if (IN_MULTICAST(ntohl(sin->sin_addr.s_addr))) {
	/*
	* Treat SO_REUSEADDR as SO_REUSEPORT for multicast;
	* allow complete duplication of binding if
	* SO_REUSEPORT is set, or if SO_REUSEADDR is set
	* and a multicast address is bound on both
	* new and duplicated sockets.
	*/
	if (so->so_options & SO_REUSEADDR)
	reuseport = SO_REUSEADDR\|SO_REUSEPORT;
	} else if (sin->sin_addr.s_addr != INADDR_ANY) {
	sin->sin_port = 0; /* yech... */
	bzero(&sin->sin_zero, sizeof(sin->sin_zero));
	/*
	* Is the address a local IP address?
	* If INP_BINDANY is set, then the socket may be bound
	* to any endpoint address, local or not.
	*/
	if ((inp->inp_flags & INP_BINDANY) == 0 &&
	ifa_ifwithaddr_check((struct sockaddr *)sin) == 0)
	return (EADDRNOTAVAIL);
	}
	laddr = sin->sin_addr;
	if (lport) {
	struct inpcb *t;
	struct tcptw *tw;

	/* GROSS */
	if (ntohs(lport) <= V_ipport_reservedhigh &&
	ntohs(lport) >= V_ipport_reservedlow &&
	priv_check_cred(cred, PRIV_NETINET_RESERVEDPORT,
	0))
	return (EACCES);
	if (!IN_MULTICAST(ntohl(sin->sin_addr.s_addr)) &&
	priv_check_cred(inp->inp_cred,
	PRIV_NETINET_REUSEPORT, 0) != 0) {
	t = in_pcblookup_local(pcbinfo, sin->sin_addr,
	lport, INPLOOKUP_WILDCARD, cred);
	/*
	* XXX
	* This entire block sorely needs a rewrite.
	*/
	if (t &&
	((t->inp_flags & INP_TIMEWAIT) == 0) &&
	(so->so_type != SOCK_STREAM \|\|
	ntohl(t->inp_faddr.s_addr) == INADDR_ANY) &&
	(ntohl(sin->sin_addr.s_addr) != INADDR_ANY \|\|
	ntohl(t->inp_laddr.s_addr) != INADDR_ANY \|\|
	(t->inp_socket->so_options &
	SO_REUSEPORT) == 0) &&
	(inp->inp_cred->cr_uid !=
	t->inp_cred->cr_uid))
	return (EADDRINUSE);
	}
	t = in_pcblookup_local(pcbinfo, sin->sin_addr,
	lport, wild, cred);
	if (t && (t->inp_flags & INP_TIMEWAIT)) {
	/*
	* XXXRW: If an incpb has had its timewait
	* state recycled, we treat the address as
	* being in use (for now). This is better
	* than a panic, but not desirable.
	*/
	tw = intotw(inp);
	if (tw == NULL \|\|
	(reuseport & tw->tw_so_options) == 0)
	return (EADDRINUSE);
	} else if (t &&
	(reuseport & t->inp_socket->so_options) == 0) {
	#ifdef INET6
	if (ntohl(sin->sin_addr.s_addr) !=
	INADDR_ANY \|\|
	ntohl(t->inp_laddr.s_addr) !=
	INADDR_ANY \|\|
	INP_SOCKAF(so) ==
	INP_SOCKAF(t->inp_socket))
	#endif
	return (EADDRINUSE);
	}
	}
	}
	if (*lportp != 0)
	lport = *lportp;
	if (lport == 0) {
	u_short first, last, aux;
	int count;

	if (inp->inp_flags & INP_HIGHPORT) {
	first = V_ipport_hifirstauto; /* sysctl */
	last = V_ipport_hilastauto;
	lastport = &pcbinfo->ipi_lasthi;
	} else if (inp->inp_flags & INP_LOWPORT) {
	error = priv_check_cred(cred,
	PRIV_NETINET_RESERVEDPORT, 0);
	if (error)
	return error;
	first = V_ipport_lowfirstauto; /* 1023 */
	last = V_ipport_lowlastauto; /* 600 */
	lastport = &pcbinfo->ipi_lastlow;
	} else {
	first = V_ipport_firstauto; /* sysctl */
	last = V_ipport_lastauto;
	lastport = &pcbinfo->ipi_lastport;
	}
	/*
	* For UDP, use random port allocation as long as the user
	* allows it. For TCP (and as of yet unknown) connections,
	* use random port allocation only if the user allows it AND
	* ipport_tick() allows it.
	*/
	if (V_ipport_randomized &&
	(!V_ipport_stoprandom \|\| pcbinfo == &V_udbinfo))
	dorandom = 1;
	else
	dorandom = 0;
	/*
	* It makes no sense to do random port allocation if
	* we have the only port available.
	*/
	if (first == last)
	dorandom = 0;
	/* Make sure to not include UDP packets in the count. */
	if (pcbinfo != &V_udbinfo)
	V_ipport_tcpallocs++;
	/*
	* Instead of having two loops further down counting up or down
	* make sure that first is always <= last and go with only one
	* code path implementing all logic.
	*/
	if (first > last) {
	aux = first;
	first = last;
	last = aux;
	}

	if (dorandom)
	*lastport = first +
	(arc4random() % (last - first));

	count = last - first;

	do {
	if (count-- < 0) /* completely used? */
	return (EADDRNOTAVAIL);
	++*lastport;
	if (lastport < first \|\| lastport > last)
	*lastport = first;
	lport = htons(*lastport);
	} while (in_pcblookup_local(pcbinfo, laddr,
	lport, wild, cred));
	}
	*laddrp = laddr.s_addr;
	*lportp = lport;
	return (0);
	}

	/*
	* Connect from a socket to a specified address.
	* Both address and port must be specified in argument sin.
	* If don't have a local address for this socket yet,
	* then pick one.
	*/
	int
	in_pcbconnect(struct inpcb inp, struct sockaddr nam, struct ucred *cred)
	{
	u_short lport, fport;
	in_addr_t laddr, faddr;
	int anonport, error;

	INP_INFO_WLOCK_ASSERT(inp->inp_pcbinfo);
	INP_WLOCK_ASSERT(inp);

	lport = inp->inp_lport;
	laddr = inp->inp_laddr.s_addr;
	anonport = (lport == 0);
	error = in_pcbconnect_setup(inp, nam, &laddr, &lport, &faddr, &fport,
	NULL, cred);
	if (error)
	return (error);

	/* Do the initial binding of the local address if required. */
	if (inp->inp_laddr.s_addr == INADDR_ANY && inp->inp_lport == 0) {
	inp->inp_lport = lport;
	inp->inp_laddr.s_addr = laddr;
	if (in_pcbinshash(inp) != 0) {
	inp->inp_laddr.s_addr = INADDR_ANY;
	inp->inp_lport = 0;
	return (EAGAIN);
	}
	}

	/* Commit the remaining changes. */
	inp->inp_lport = lport;
	inp->inp_laddr.s_addr = laddr;
	inp->inp_faddr.s_addr = faddr;
	inp->inp_fport = fport;
	in_pcbrehash(inp);

	if (anonport)
	inp->inp_flags \|= INP_ANONPORT;
	return (0);
	}

	/*
	* Do proper source address selection on an unbound socket in case
	* of connect. Take jails into account as well.
	*/
	static int
	in_pcbladdr(struct inpcb inp, struct in_addr faddr, struct in_addr *laddr,
	struct ucred *cred)
	{
	struct ifaddr *ifa;
	struct sockaddr *sa;
	struct sockaddr_in *sin;
	struct route sro;
	int error;

	KASSERT(laddr != NULL, ("%s: laddr NULL", __func__));

	/*
	* Bypass source address selection and use the primary jail IP
	* if requested.
	*/
	if (cred != NULL && !prison_saddrsel_ip4(cred, laddr))
	return (0);

	error = 0;
	bzero(&sro, sizeof(sro));

	sin = (struct sockaddr_in *)&sro.ro_dst;
	sin->sin_family = AF_INET;
	sin->sin_len = sizeof(struct sockaddr_in);
	sin->sin_addr.s_addr = faddr->s_addr;

	/*
	* If route is known our src addr is taken from the i/f,
	* else punt.
	*
	* Find out route to destination.
	*/
	if ((inp->inp_socket->so_options & SO_DONTROUTE) == 0)
	in_rtalloc_ign(&sro, 0, inp->inp_inc.inc_fibnum);

	/*
	* If we found a route, use the address corresponding to
	* the outgoing interface.
	*
	* Otherwise assume faddr is reachable on a directly connected
	* network and try to find a corresponding interface to take
	* the source address from.
	*/
	if (sro.ro_rt == NULL \|\| sro.ro_rt->rt_ifp == NULL) {
	struct in_ifaddr *ia;
	struct ifnet *ifp;

	ia = ifatoia(ifa_ifwithdstaddr((struct sockaddr *)sin));
	if (ia == NULL)
	- ia = ifatoia(ifa_ifwithnet((struct sockaddr *)sin));
	+ ia = ifatoia(ifa_ifwithnet((struct sockaddr *)sin, 0));
	if (ia == NULL) {
	error = ENETUNREACH;
	goto done;
	}

	if (cred == NULL \|\| !prison_flag(cred, PR_IP4)) {
	laddr->s_addr = ia->ia_addr.sin_addr.s_addr;
	ifa_free(&ia->ia_ifa);
	goto done;
	}

	ifp = ia->ia_ifp;
	ifa_free(&ia->ia_ifa);
	ia = NULL;
	IF_ADDR_LOCK(ifp);
	TAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) {

	sa = ifa->ifa_addr;
	if (sa->sa_family != AF_INET)
	continue;
	sin = (struct sockaddr_in *)sa;
	if (prison_check_ip4(cred, &sin->sin_addr) == 0) {
	ia = (struct in_ifaddr *)ifa;
	break;
	}
	}
	if (ia != NULL) {
	laddr->s_addr = ia->ia_addr.sin_addr.s_addr;
	IF_ADDR_UNLOCK(ifp);
	goto done;
	}
	IF_ADDR_UNLOCK(ifp);

	/* 3. As a last resort return the 'default' jail address. */
	error = prison_get_ip4(cred, laddr);
	goto done;
	}

	/*
	* If the outgoing interface on the route found is not
	* a loopback interface, use the address from that interface.
	* In case of jails do those three steps:
	* 1. check if the interface address belongs to the jail. If so use it.
	* 2. check if we have any address on the outgoing interface
	* belonging to this jail. If so use it.
	* 3. as a last resort return the 'default' jail address.
	*/
	if ((sro.ro_rt->rt_ifp->if_flags & IFF_LOOPBACK) == 0) {
	struct in_ifaddr *ia;
	struct ifnet *ifp;

	/* If not jailed, use the default returned. */
	if (cred == NULL \|\| !prison_flag(cred, PR_IP4)) {
	ia = (struct in_ifaddr *)sro.ro_rt->rt_ifa;
	laddr->s_addr = ia->ia_addr.sin_addr.s_addr;
	goto done;
	}

	/* Jailed. */
	/* 1. Check if the iface address belongs to the jail. */
	sin = (struct sockaddr_in *)sro.ro_rt->rt_ifa->ifa_addr;
	if (prison_check_ip4(cred, &sin->sin_addr) == 0) {
	ia = (struct in_ifaddr *)sro.ro_rt->rt_ifa;
	laddr->s_addr = ia->ia_addr.sin_addr.s_addr;
	goto done;
	}

	/*
	* 2. Check if we have any address on the outgoing interface
	* belonging to this jail.
	*/
	ia = NULL;
	ifp = sro.ro_rt->rt_ifp;
	IF_ADDR_LOCK(ifp);
	TAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) {
	sa = ifa->ifa_addr;
	if (sa->sa_family != AF_INET)
	continue;
	sin = (struct sockaddr_in *)sa;
	if (prison_check_ip4(cred, &sin->sin_addr) == 0) {
	ia = (struct in_ifaddr *)ifa;
	break;
	}
	}
	if (ia != NULL) {
	laddr->s_addr = ia->ia_addr.sin_addr.s_addr;
	IF_ADDR_UNLOCK(ifp);
	goto done;
	}
	IF_ADDR_UNLOCK(ifp);

	/* 3. As a last resort return the 'default' jail address. */
	error = prison_get_ip4(cred, laddr);
	goto done;
	}

	/*
	* The outgoing interface is marked with 'loopback net', so a route
	* to ourselves is here.
	* Try to find the interface of the destination address and then
	* take the address from there. That interface is not necessarily
	* a loopback interface.
	* In case of jails, check that it is an address of the jail
	* and if we cannot find, fall back to the 'default' jail address.
	*/
	if ((sro.ro_rt->rt_ifp->if_flags & IFF_LOOPBACK) != 0) {
	struct sockaddr_in sain;
	struct in_ifaddr *ia;

	bzero(&sain, sizeof(struct sockaddr_in));
	sain.sin_family = AF_INET;
	sain.sin_len = sizeof(struct sockaddr_in);
	sain.sin_addr.s_addr = faddr->s_addr;

	ia = ifatoia(ifa_ifwithdstaddr(sintosa(&sain)));
	if (ia == NULL)
	- ia = ifatoia(ifa_ifwithnet(sintosa(&sain)));
	+ ia = ifatoia(ifa_ifwithnet(sintosa(&sain), 0));
	if (ia == NULL)
	ia = ifatoia(ifa_ifwithaddr(sintosa(&sain)));

	if (cred == NULL \|\| !prison_flag(cred, PR_IP4)) {
	if (ia == NULL) {
	error = ENETUNREACH;
	goto done;
	}
	laddr->s_addr = ia->ia_addr.sin_addr.s_addr;
	ifa_free(&ia->ia_ifa);
	goto done;
	}

	/* Jailed. */
	if (ia != NULL) {
	struct ifnet *ifp;

	ifp = ia->ia_ifp;
	ifa_free(&ia->ia_ifa);
	ia = NULL;
	IF_ADDR_LOCK(ifp);
	TAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) {

	sa = ifa->ifa_addr;
	if (sa->sa_family != AF_INET)
	continue;
	sin = (struct sockaddr_in *)sa;
	if (prison_check_ip4(cred,
	&sin->sin_addr) == 0) {
	ia = (struct in_ifaddr *)ifa;
	break;
	}
	}
	if (ia != NULL) {
	laddr->s_addr = ia->ia_addr.sin_addr.s_addr;
	IF_ADDR_UNLOCK(ifp);
	goto done;
	}
	IF_ADDR_UNLOCK(ifp);
	}

	/* 3. As a last resort return the 'default' jail address. */
	error = prison_get_ip4(cred, laddr);
	goto done;
	}

	done:
	if (sro.ro_rt != NULL)
	RTFREE(sro.ro_rt);
	return (error);
	}

	/*
	* Set up for a connect from a socket to the specified address.
	* On entry, laddrp and lportp should contain the current local
	* address and port for the PCB; these are updated to the values
	* that should be placed in inp_laddr and inp_lport to complete
	* the connect.
	*
	* On success, faddrp and fportp will be set to the remote address
	* and port. These are not updated in the error case.
	*
	* If the operation fails because the connection already exists,
	* *oinpp will be set to the PCB of that connection so that the
	* caller can decide to override it. In all other cases, *oinpp
	* is set to NULL.
	*/
	int
	in_pcbconnect_setup(struct inpcb inp, struct sockaddr nam,
	in_addr_t laddrp, u_short lportp, in_addr_t faddrp, u_short fportp,
	struct inpcb *oinpp, struct ucred cred)
	{
	struct sockaddr_in sin = (struct sockaddr_in )nam;
	struct in_ifaddr *ia;
	struct inpcb *oinp;
	struct in_addr laddr, faddr;
	u_short lport, fport;
	int error;

	/*
	* Because a global state change doesn't actually occur here, a read
	* lock is sufficient.
	*/
	INP_INFO_LOCK_ASSERT(inp->inp_pcbinfo);
	INP_LOCK_ASSERT(inp);

	if (oinpp != NULL)
	*oinpp = NULL;
	if (nam->sa_len != sizeof (*sin))
	return (EINVAL);
	if (sin->sin_family != AF_INET)
	return (EAFNOSUPPORT);
	if (sin->sin_port == 0)
	return (EADDRNOTAVAIL);
	laddr.s_addr = *laddrp;
	lport = *lportp;
	faddr = sin->sin_addr;
	fport = sin->sin_port;

	if (!TAILQ_EMPTY(&V_in_ifaddrhead)) {
	/*
	* If the destination address is INADDR_ANY,
	* use the primary local address.
	* If the supplied address is INADDR_BROADCAST,
	* and the primary interface supports broadcast,
	* choose the broadcast address for that interface.
	*/
	if (faddr.s_addr == INADDR_ANY) {
	IN_IFADDR_RLOCK();
	faddr =
	IA_SIN(TAILQ_FIRST(&V_in_ifaddrhead))->sin_addr;
	IN_IFADDR_RUNLOCK();
	if (cred != NULL &&
	(error = prison_get_ip4(cred, &faddr)) != 0)
	return (error);
	} else if (faddr.s_addr == (u_long)INADDR_BROADCAST) {
	IN_IFADDR_RLOCK();
	if (TAILQ_FIRST(&V_in_ifaddrhead)->ia_ifp->if_flags &
	IFF_BROADCAST)
	faddr = satosin(&TAILQ_FIRST(
	&V_in_ifaddrhead)->ia_broadaddr)->sin_addr;
	IN_IFADDR_RUNLOCK();
	}
	}
	if (laddr.s_addr == INADDR_ANY) {
	error = in_pcbladdr(inp, &faddr, &laddr, cred);
	if (error)
	return (error);

	/*
	* If the destination address is multicast and an outgoing
	* interface has been set as a multicast option, use the
	* address of that interface as our source address.
	*/
	if (IN_MULTICAST(ntohl(faddr.s_addr)) &&
	inp->inp_moptions != NULL) {
	struct ip_moptions *imo;
	struct ifnet *ifp;

	imo = inp->inp_moptions;
	if (imo->imo_multicast_ifp != NULL) {
	ifp = imo->imo_multicast_ifp;
	IN_IFADDR_RLOCK();
	TAILQ_FOREACH(ia, &V_in_ifaddrhead, ia_link)
	if (ia->ia_ifp == ifp)
	break;
	if (ia == NULL) {
	IN_IFADDR_RUNLOCK();
	return (EADDRNOTAVAIL);
	}
	laddr = ia->ia_addr.sin_addr;
	IN_IFADDR_RUNLOCK();
	}
	}
	}

	oinp = in_pcblookup_hash(inp->inp_pcbinfo, faddr, fport, laddr, lport,
	0, NULL);
	if (oinp != NULL) {
	if (oinpp != NULL)
	*oinpp = oinp;
	return (EADDRINUSE);
	}
	if (lport == 0) {
	error = in_pcbbind_setup(inp, NULL, &laddr.s_addr, &lport,
	cred);
	if (error)
	return (error);
	}
	*laddrp = laddr.s_addr;
	*lportp = lport;
	*faddrp = faddr.s_addr;
	*fportp = fport;
	return (0);
	}

	void
	in_pcbdisconnect(struct inpcb *inp)
	{

	INP_INFO_WLOCK_ASSERT(inp->inp_pcbinfo);
	INP_WLOCK_ASSERT(inp);

	inp->inp_faddr.s_addr = INADDR_ANY;
	inp->inp_fport = 0;
	in_pcbrehash(inp);
	}

	/*
	* in_pcbdetach() is responsibe for disassociating a socket from an inpcb.
	* For most protocols, this will be invoked immediately prior to calling
	* in_pcbfree(). However, with TCP the inpcb may significantly outlive the
	* socket, in which case in_pcbfree() is deferred.
	*/
	void
	in_pcbdetach(struct inpcb *inp)
	{

	KASSERT(inp->inp_socket != NULL, ("%s: inp_socket == NULL", __func__));

	inp->inp_socket->so_pcb = NULL;
	inp->inp_socket = NULL;
	}

	/*
	* in_pcbfree_internal() frees an inpcb that has been detached from its
	* socket, and whose reference count has reached 0. It will also remove the
	* inpcb from any global lists it might remain on.
	*/
	static void
	in_pcbfree_internal(struct inpcb *inp)
	{
	struct inpcbinfo *ipi = inp->inp_pcbinfo;

	KASSERT(inp->inp_socket == NULL, ("%s: inp_socket != NULL", __func__));
	KASSERT(inp->inp_refcount == 0, ("%s: refcount !0", __func__));

	INP_INFO_WLOCK_ASSERT(ipi);
	INP_WLOCK_ASSERT(inp);

	#ifdef IPSEC
	if (inp->inp_sp != NULL)
	ipsec_delete_pcbpolicy(inp);
	#endif /* IPSEC */
	inp->inp_gencnt = ++ipi->ipi_gencnt;
	in_pcbremlists(inp);
	#ifdef INET6
	if (inp->inp_vflag & INP_IPV6PROTO) {
	ip6_freepcbopts(inp->in6p_outputopts);
	if (inp->in6p_moptions != NULL)
	ip6_freemoptions(inp->in6p_moptions);
	}
	#endif
	if (inp->inp_options)
	(void)m_free(inp->inp_options);
	if (inp->inp_moptions != NULL)
	inp_freemoptions(inp->inp_moptions);
	inp->inp_vflag = 0;
	crfree(inp->inp_cred);

	#ifdef MAC
	mac_inpcb_destroy(inp);
	#endif
	INP_WUNLOCK(inp);
	uma_zfree(ipi->ipi_zone, inp);
	}

	/*
	* in_pcbref() bumps the reference count on an inpcb in order to maintain
	* stability of an inpcb pointer despite the inpcb lock being released. This
	* is used in TCP when the inpcbinfo lock needs to be acquired or upgraded,
	* but where the inpcb lock is already held.
	*
	* While the inpcb will not be freed, releasing the inpcb lock means that the
	* connection's state may change, so the caller should be careful to
	* revalidate any cached state on reacquiring the lock. Drop the reference
	* using in_pcbrele().
	*/
	void
	in_pcbref(struct inpcb *inp)
	{

	INP_WLOCK_ASSERT(inp);

	KASSERT(inp->inp_refcount > 0, ("%s: refcount 0", __func__));

	inp->inp_refcount++;
	}

	/*
	* Drop a refcount on an inpcb elevated using in_pcbref(); because a call to
	* in_pcbfree() may have been made between in_pcbref() and in_pcbrele(), we
	* return a flag indicating whether or not the inpcb remains valid. If it is
	* valid, we return with the inpcb lock held.
	*/
	int
	in_pcbrele(struct inpcb *inp)
	{
	#ifdef INVARIANTS
	struct inpcbinfo *ipi = inp->inp_pcbinfo;
	#endif

	KASSERT(inp->inp_refcount > 0, ("%s: refcount 0", __func__));

	INP_INFO_WLOCK_ASSERT(ipi);
	INP_WLOCK_ASSERT(inp);

	inp->inp_refcount--;
	if (inp->inp_refcount > 0)
	return (0);
	in_pcbfree_internal(inp);
	return (1);
	}

	/*
	* Unconditionally schedule an inpcb to be freed by decrementing its
	* reference count, which should occur only after the inpcb has been detached
	* from its socket. If another thread holds a temporary reference (acquired
	* using in_pcbref()) then the free is deferred until that reference is
	* released using in_pcbrele(), but the inpcb is still unlocked.
	*/
	void
	in_pcbfree(struct inpcb *inp)
	{
	#ifdef INVARIANTS
	struct inpcbinfo *ipi = inp->inp_pcbinfo;
	#endif

	KASSERT(inp->inp_socket == NULL, ("%s: inp_socket != NULL",
	__func__));

	INP_INFO_WLOCK_ASSERT(ipi);
	INP_WLOCK_ASSERT(inp);

	if (!in_pcbrele(inp))
	INP_WUNLOCK(inp);
	}

	/*
	* in_pcbdrop() removes an inpcb from hashed lists, releasing its address and
	* port reservation, and preventing it from being returned by inpcb lookups.
	*
	* It is used by TCP to mark an inpcb as unused and avoid future packet
	* delivery or event notification when a socket remains open but TCP has
	* closed. This might occur as a result of a shutdown()-initiated TCP close
	* or a RST on the wire, and allows the port binding to be reused while still
	* maintaining the invariant that so_pcb always points to a valid inpcb until
	* in_pcbdetach().
	*
	* XXXRW: An inp_lport of 0 is used to indicate that the inpcb is not on hash
	* lists, but can lead to confusing netstat output, as open sockets with
	* closed TCP connections will no longer appear to have their bound port
	* number. An explicit flag would be better, as it would allow us to leave
	* the port number intact after the connection is dropped.
	*
	* XXXRW: Possibly in_pcbdrop() should also prevent future notifications by
	* in_pcbnotifyall() and in_pcbpurgeif0()?
	*/
	void
	in_pcbdrop(struct inpcb *inp)
	{

	INP_INFO_WLOCK_ASSERT(inp->inp_pcbinfo);
	INP_WLOCK_ASSERT(inp);

	inp->inp_flags \|= INP_DROPPED;
	if (inp->inp_flags & INP_INHASHLIST) {
	struct inpcbport *phd = inp->inp_phd;

	LIST_REMOVE(inp, inp_hash);
	LIST_REMOVE(inp, inp_portlist);
	if (LIST_FIRST(&phd->phd_pcblist) == NULL) {
	LIST_REMOVE(phd, phd_hash);
	free(phd, M_PCB);
	}
	inp->inp_flags &= ~INP_INHASHLIST;
	}
	}

	/*
	* Common routines to return the socket addresses associated with inpcbs.
	*/
	struct sockaddr *
	in_sockaddr(in_port_t port, struct in_addr *addr_p)
	{
	struct sockaddr_in *sin;

	sin = malloc(sizeof *sin, M_SONAME,
	M_WAITOK \| M_ZERO);
	sin->sin_family = AF_INET;
	sin->sin_len = sizeof(*sin);
	sin->sin_addr = *addr_p;
	sin->sin_port = port;

	return (struct sockaddr *)sin;
	}

	int
	in_getsockaddr(struct socket so, struct sockaddr *nam)
	{
	struct inpcb *inp;
	struct in_addr addr;
	in_port_t port;

	inp = sotoinpcb(so);
	KASSERT(inp != NULL, ("in_getsockaddr: inp == NULL"));

	INP_RLOCK(inp);
	port = inp->inp_lport;
	addr = inp->inp_laddr;
	INP_RUNLOCK(inp);

	*nam = in_sockaddr(port, &addr);
	return 0;
	}

	int
	in_getpeeraddr(struct socket so, struct sockaddr *nam)
	{
	struct inpcb *inp;
	struct in_addr addr;
	in_port_t port;

	inp = sotoinpcb(so);
	KASSERT(inp != NULL, ("in_getpeeraddr: inp == NULL"));

	INP_RLOCK(inp);
	port = inp->inp_fport;
	addr = inp->inp_faddr;
	INP_RUNLOCK(inp);

	*nam = in_sockaddr(port, &addr);
	return 0;
	}

	void
	in_pcbnotifyall(struct inpcbinfo *pcbinfo, struct in_addr faddr, int errno,
	struct inpcb (notify)(struct inpcb *, int))
	{
	struct inpcb inp, inp_temp;

	INP_INFO_WLOCK(pcbinfo);
	LIST_FOREACH_SAFE(inp, pcbinfo->ipi_listhead, inp_list, inp_temp) {
	INP_WLOCK(inp);
	#ifdef INET6
	if ((inp->inp_vflag & INP_IPV4) == 0) {
	INP_WUNLOCK(inp);
	continue;
	}
	#endif
	if (inp->inp_faddr.s_addr != faddr.s_addr \|\|
	inp->inp_socket == NULL) {
	INP_WUNLOCK(inp);
	continue;
	}
	if ((*notify)(inp, errno))
	INP_WUNLOCK(inp);
	}
	INP_INFO_WUNLOCK(pcbinfo);
	}

	void
	in_pcbpurgeif0(struct inpcbinfo pcbinfo, struct ifnet ifp)
	{
	struct inpcb *inp;
	struct ip_moptions *imo;
	int i, gap;

	INP_INFO_RLOCK(pcbinfo);
	LIST_FOREACH(inp, pcbinfo->ipi_listhead, inp_list) {
	INP_WLOCK(inp);
	imo = inp->inp_moptions;
	if ((inp->inp_vflag & INP_IPV4) &&
	imo != NULL) {
	/*
	* Unselect the outgoing interface if it is being
	* detached.
	*/
	if (imo->imo_multicast_ifp == ifp)
	imo->imo_multicast_ifp = NULL;

	/*
	* Drop multicast group membership if we joined
	* through the interface being detached.
	*/
	for (i = 0, gap = 0; i < imo->imo_num_memberships;
	i++) {
	if (imo->imo_membership[i]->inm_ifp == ifp) {
	in_delmulti(imo->imo_membership[i]);
	gap++;
	} else if (gap != 0)
	imo->imo_membership[i - gap] =
	imo->imo_membership[i];
	}
	imo->imo_num_memberships -= gap;
	}
	INP_WUNLOCK(inp);
	}
	INP_INFO_RUNLOCK(pcbinfo);
	}

	/*
	* Lookup a PCB based on the local address and port.
	*/
	#define INP_LOOKUP_MAPPED_PCB_COST 3
	struct inpcb *
	in_pcblookup_local(struct inpcbinfo *pcbinfo, struct in_addr laddr,
	u_short lport, int wild_okay, struct ucred *cred)
	{
	struct inpcb *inp;
	#ifdef INET6
	int matchwild = 3 + INP_LOOKUP_MAPPED_PCB_COST;
	#else
	int matchwild = 3;
	#endif
	int wildcard;

	INP_INFO_LOCK_ASSERT(pcbinfo);

	if (!wild_okay) {
	struct inpcbhead *head;
	/*
	* Look for an unconnected (wildcard foreign addr) PCB that
	* matches the local address and port we're looking for.
	*/
	head = &pcbinfo->ipi_hashbase[INP_PCBHASH(INADDR_ANY, lport,
	0, pcbinfo->ipi_hashmask)];
	LIST_FOREACH(inp, head, inp_hash) {
	#ifdef INET6
	/* XXX inp locking */
	if ((inp->inp_vflag & INP_IPV4) == 0)
	continue;
	#endif
	if (inp->inp_faddr.s_addr == INADDR_ANY &&
	inp->inp_laddr.s_addr == laddr.s_addr &&
	inp->inp_lport == lport) {
	/*
	* Found?
	*/
	if (cred == NULL \|\|
	prison_equal_ip4(cred->cr_prison,
	inp->inp_cred->cr_prison))
	return (inp);
	}
	}
	/*
	* Not found.
	*/
	return (NULL);
	} else {
	struct inpcbporthead *porthash;
	struct inpcbport *phd;
	struct inpcb *match = NULL;
	/*
	* Best fit PCB lookup.
	*
	* First see if this local port is in use by looking on the
	* port hash list.
	*/
	porthash = &pcbinfo->ipi_porthashbase[INP_PCBPORTHASH(lport,
	pcbinfo->ipi_porthashmask)];
	LIST_FOREACH(phd, porthash, phd_hash) {
	if (phd->phd_port == lport)
	break;
	}
	if (phd != NULL) {
	/*
	* Port is in use by one or more PCBs. Look for best
	* fit.
	*/
	LIST_FOREACH(inp, &phd->phd_pcblist, inp_portlist) {
	wildcard = 0;
	if (cred != NULL &&
	!prison_equal_ip4(inp->inp_cred->cr_prison,
	cred->cr_prison))
	continue;
	#ifdef INET6
	/* XXX inp locking */
	if ((inp->inp_vflag & INP_IPV4) == 0)
	continue;
	/*
	* We never select the PCB that has
	* INP_IPV6 flag and is bound to :: if
	* we have another PCB which is bound
	* to 0.0.0.0. If a PCB has the
	* INP_IPV6 flag, then we set its cost
	* higher than IPv4 only PCBs.
	*
	* Note that the case only happens
	* when a socket is bound to ::, under
	* the condition that the use of the
	* mapped address is allowed.
	*/
	if ((inp->inp_vflag & INP_IPV6) != 0)
	wildcard += INP_LOOKUP_MAPPED_PCB_COST;
	#endif
	if (inp->inp_faddr.s_addr != INADDR_ANY)
	wildcard++;
	if (inp->inp_laddr.s_addr != INADDR_ANY) {
	if (laddr.s_addr == INADDR_ANY)
	wildcard++;
	else if (inp->inp_laddr.s_addr != laddr.s_addr)
	continue;
	} else {
	if (laddr.s_addr != INADDR_ANY)
	wildcard++;
	}
	if (wildcard < matchwild) {
	match = inp;
	matchwild = wildcard;
	if (matchwild == 0)
	break;
	}
	}
	}
	return (match);
	}
	}
	#undef INP_LOOKUP_MAPPED_PCB_COST

	/*
	* Lookup PCB in hash list.
	*/
	struct inpcb *
	in_pcblookup_hash(struct inpcbinfo *pcbinfo, struct in_addr faddr,
	u_int fport_arg, struct in_addr laddr, u_int lport_arg, int wildcard,
	struct ifnet *ifp)
	{
	struct inpcbhead *head;
	struct inpcb inp, tmpinp;
	u_short fport = fport_arg, lport = lport_arg;

	INP_INFO_LOCK_ASSERT(pcbinfo);

	/*
	* First look for an exact match.
	*/
	tmpinp = NULL;
	head = &pcbinfo->ipi_hashbase[INP_PCBHASH(faddr.s_addr, lport, fport,
	pcbinfo->ipi_hashmask)];
	LIST_FOREACH(inp, head, inp_hash) {
	#ifdef INET6
	/* XXX inp locking */
	if ((inp->inp_vflag & INP_IPV4) == 0)
	continue;
	#endif
	if (inp->inp_faddr.s_addr == faddr.s_addr &&
	inp->inp_laddr.s_addr == laddr.s_addr &&
	inp->inp_fport == fport &&
	inp->inp_lport == lport) {
	/*
	* XXX We should be able to directly return
	* the inp here, without any checks.
	* Well unless both bound with SO_REUSEPORT?
	*/
	if (prison_flag(inp->inp_cred, PR_IP4))
	return (inp);
	if (tmpinp == NULL)
	tmpinp = inp;
	}
	}
	if (tmpinp != NULL)
	return (tmpinp);

	/*
	* Then look for a wildcard match, if requested.
	*/
	if (wildcard == INPLOOKUP_WILDCARD) {
	struct inpcb local_wild = NULL, local_exact = NULL;
	#ifdef INET6
	struct inpcb *local_wild_mapped = NULL;
	#endif
	struct inpcb *jail_wild = NULL;
	int injail;

	/*
	* Order of socket selection - we always prefer jails.
	* 1. jailed, non-wild.
	* 2. jailed, wild.
	* 3. non-jailed, non-wild.
	* 4. non-jailed, wild.
	*/

	head = &pcbinfo->ipi_hashbase[INP_PCBHASH(INADDR_ANY, lport,
	0, pcbinfo->ipi_hashmask)];
	LIST_FOREACH(inp, head, inp_hash) {
	#ifdef INET6
	/* XXX inp locking */
	if ((inp->inp_vflag & INP_IPV4) == 0)
	continue;
	#endif
	if (inp->inp_faddr.s_addr != INADDR_ANY \|\|
	inp->inp_lport != lport)
	continue;

	/* XXX inp locking */
	if (ifp && ifp->if_type == IFT_FAITH &&
	(inp->inp_flags & INP_FAITH) == 0)
	continue;

	injail = prison_flag(inp->inp_cred, PR_IP4);
	if (injail) {
	if (prison_check_ip4(inp->inp_cred,
	&laddr) != 0)
	continue;
	} else {
	if (local_exact != NULL)
	continue;
	}

	if (inp->inp_laddr.s_addr == laddr.s_addr) {
	if (injail)
	return (inp);
	else
	local_exact = inp;
	} else if (inp->inp_laddr.s_addr == INADDR_ANY) {
	#ifdef INET6
	/* XXX inp locking, NULL check */
	if (inp->inp_vflag & INP_IPV6PROTO)
	local_wild_mapped = inp;
	else
	#endif /* INET6 */
	if (injail)
	jail_wild = inp;
	else
	local_wild = inp;
	}
	} /* LIST_FOREACH */
	if (jail_wild != NULL)
	return (jail_wild);
	if (local_exact != NULL)
	return (local_exact);
	if (local_wild != NULL)
	return (local_wild);
	#ifdef INET6
	if (local_wild_mapped != NULL)
	return (local_wild_mapped);
	#endif /* defined(INET6) */
	} /* if (wildcard == INPLOOKUP_WILDCARD) */

	return (NULL);
	}

	/*
	* Insert PCB onto various hash lists.
	*/
	int
	in_pcbinshash(struct inpcb *inp)
	{
	struct inpcbhead *pcbhash;
	struct inpcbporthead *pcbporthash;
	struct inpcbinfo *pcbinfo = inp->inp_pcbinfo;
	struct inpcbport *phd;
	u_int32_t hashkey_faddr;

	INP_INFO_WLOCK_ASSERT(pcbinfo);
	INP_WLOCK_ASSERT(inp);
	KASSERT((inp->inp_flags & INP_INHASHLIST) == 0,
	("in_pcbinshash: INP_INHASHLIST"));

	#ifdef INET6
	if (inp->inp_vflag & INP_IPV6)
	hashkey_faddr = inp->in6p_faddr.s6_addr32[3] /* XXX */;
	else
	#endif /* INET6 */
	hashkey_faddr = inp->inp_faddr.s_addr;

	pcbhash = &pcbinfo->ipi_hashbase[INP_PCBHASH(hashkey_faddr,
	inp->inp_lport, inp->inp_fport, pcbinfo->ipi_hashmask)];

	pcbporthash = &pcbinfo->ipi_porthashbase[
	INP_PCBPORTHASH(inp->inp_lport, pcbinfo->ipi_porthashmask)];

	/*
	* Go through port list and look for a head for this lport.
	*/
	LIST_FOREACH(phd, pcbporthash, phd_hash) {
	if (phd->phd_port == inp->inp_lport)
	break;
	}
	/*
	* If none exists, malloc one and tack it on.
	*/
	if (phd == NULL) {
	phd = malloc(sizeof(struct inpcbport), M_PCB, M_NOWAIT);
	if (phd == NULL) {
	return (ENOBUFS); /* XXX */
	}
	phd->phd_port = inp->inp_lport;
	LIST_INIT(&phd->phd_pcblist);
	LIST_INSERT_HEAD(pcbporthash, phd, phd_hash);
	}
	inp->inp_phd = phd;
	LIST_INSERT_HEAD(&phd->phd_pcblist, inp, inp_portlist);
	LIST_INSERT_HEAD(pcbhash, inp, inp_hash);
	inp->inp_flags \|= INP_INHASHLIST;
	return (0);
	}

	/*
	* Move PCB to the proper hash bucket when { faddr, fport } have been
	* changed. NOTE: This does not handle the case of the lport changing (the
	* hashed port list would have to be updated as well), so the lport must
	* not change after in_pcbinshash() has been called.
	*/
	void
	in_pcbrehash(struct inpcb *inp)
	{
	struct inpcbinfo *pcbinfo = inp->inp_pcbinfo;
	struct inpcbhead *head;
	u_int32_t hashkey_faddr;

	INP_INFO_WLOCK_ASSERT(pcbinfo);
	INP_WLOCK_ASSERT(inp);
	KASSERT(inp->inp_flags & INP_INHASHLIST,
	("in_pcbrehash: !INP_INHASHLIST"));

	#ifdef INET6
	if (inp->inp_vflag & INP_IPV6)
	hashkey_faddr = inp->in6p_faddr.s6_addr32[3] /* XXX */;
	else
	#endif /* INET6 */
	hashkey_faddr = inp->inp_faddr.s_addr;

	head = &pcbinfo->ipi_hashbase[INP_PCBHASH(hashkey_faddr,
	inp->inp_lport, inp->inp_fport, pcbinfo->ipi_hashmask)];

	LIST_REMOVE(inp, inp_hash);
	LIST_INSERT_HEAD(head, inp, inp_hash);
	}

	/*
	* Remove PCB from various lists.
	*/
	static void
	in_pcbremlists(struct inpcb *inp)
	{
	struct inpcbinfo *pcbinfo = inp->inp_pcbinfo;

	INP_INFO_WLOCK_ASSERT(pcbinfo);
	INP_WLOCK_ASSERT(inp);

	inp->inp_gencnt = ++pcbinfo->ipi_gencnt;
	if (inp->inp_flags & INP_INHASHLIST) {
	struct inpcbport *phd = inp->inp_phd;

	LIST_REMOVE(inp, inp_hash);
	LIST_REMOVE(inp, inp_portlist);
	if (LIST_FIRST(&phd->phd_pcblist) == NULL) {
	LIST_REMOVE(phd, phd_hash);
	free(phd, M_PCB);
	}
	inp->inp_flags &= ~INP_INHASHLIST;
	}
	LIST_REMOVE(inp, inp_list);
	pcbinfo->ipi_count--;
	}

	/*
	* A set label operation has occurred at the socket layer, propagate the
	* label change into the in_pcb for the socket.
	*/
	void
	in_pcbsosetlabel(struct socket *so)
	{
	#ifdef MAC
	struct inpcb *inp;

	inp = sotoinpcb(so);
	KASSERT(inp != NULL, ("in_pcbsosetlabel: so->so_pcb == NULL"));

	INP_WLOCK(inp);
	SOCK_LOCK(so);
	mac_inpcb_sosetlabel(so, inp);
	SOCK_UNLOCK(so);
	INP_WUNLOCK(inp);
	#endif
	}

	/*
	* ipport_tick runs once per second, determining if random port allocation
	* should be continued. If more than ipport_randomcps ports have been
	* allocated in the last second, then we return to sequential port
	* allocation. We return to random allocation only once we drop below
	* ipport_randomcps for at least ipport_randomtime seconds.
	*/
	void
	ipport_tick(void *xtp)
	{
	VNET_ITERATOR_DECL(vnet_iter);

	VNET_LIST_RLOCK_NOSLEEP();
	VNET_FOREACH(vnet_iter) {
	CURVNET_SET(vnet_iter); /* XXX appease INVARIANTS here */
	if (V_ipport_tcpallocs <=
	V_ipport_tcplastcount + V_ipport_randomcps) {
	if (V_ipport_stoprandom > 0)
	V_ipport_stoprandom--;
	} else
	V_ipport_stoprandom = V_ipport_randomtime;
	V_ipport_tcplastcount = V_ipport_tcpallocs;
	CURVNET_RESTORE();
	}
	VNET_LIST_RUNLOCK_NOSLEEP();
	callout_reset(&ipport_tick_callout, hz, ipport_tick, NULL);
	}

	void
	inp_wlock(struct inpcb *inp)
	{

	INP_WLOCK(inp);
	}

	void
	inp_wunlock(struct inpcb *inp)
	{

	INP_WUNLOCK(inp);
	}

	void
	inp_rlock(struct inpcb *inp)
	{

	INP_RLOCK(inp);
	}

	void
	inp_runlock(struct inpcb *inp)
	{

	INP_RUNLOCK(inp);
	}

	#ifdef INVARIANTS
	void
	inp_lock_assert(struct inpcb *inp)
	{

	INP_WLOCK_ASSERT(inp);
	}

	void
	inp_unlock_assert(struct inpcb *inp)
	{

	INP_UNLOCK_ASSERT(inp);
	}
	#endif

	void
	inp_apply_all(void (func)(struct inpcb , void ), void arg)
	{
	struct inpcb *inp;

	INP_INFO_RLOCK(&V_tcbinfo);
	LIST_FOREACH(inp, V_tcbinfo.ipi_listhead, inp_list) {
	INP_WLOCK(inp);
	func(inp, arg);
	INP_WUNLOCK(inp);
	}
	INP_INFO_RUNLOCK(&V_tcbinfo);
	}

	struct socket *
	inp_inpcbtosocket(struct inpcb *inp)
	{

	INP_WLOCK_ASSERT(inp);
	return (inp->inp_socket);
	}

	struct tcpcb *
	inp_inpcbtotcpcb(struct inpcb *inp)
	{

	INP_WLOCK_ASSERT(inp);
	return ((struct tcpcb *)inp->inp_ppcb);
	}

	int
	inp_ip_tos_get(const struct inpcb *inp)
	{

	return (inp->inp_ip_tos);
	}

	void
	inp_ip_tos_set(struct inpcb *inp, int val)
	{

	inp->inp_ip_tos = val;
	}

	void
	inp_4tuple_get(struct inpcb inp, uint32_t laddr, uint16_t *lp,
	uint32_t faddr, uint16_t fp)
	{

	INP_LOCK_ASSERT(inp);
	*laddr = inp->inp_laddr.s_addr;
	*faddr = inp->inp_faddr.s_addr;
	*lp = inp->inp_lport;
	*fp = inp->inp_fport;
	}

	struct inpcb *
	so_sotoinpcb(struct socket *so)
	{

	return (sotoinpcb(so));
	}

	struct tcpcb *
	so_sototcpcb(struct socket *so)
	{

	return (sototcpcb(so));
	}

	#ifdef DDB
	static void
	db_print_indent(int indent)
	{
	int i;

	for (i = 0; i < indent; i++)
	db_printf(" ");
	}

	static void
	db_print_inconninfo(struct in_conninfo inc, const char name, int indent)
	{
	char faddr_str[48], laddr_str[48];

	db_print_indent(indent);
	db_printf("%s at %p\n", name, inc);

	indent += 2;

	#ifdef INET6
	if (inc->inc_flags & INC_ISIPV6) {
	/* IPv6. */
	ip6_sprintf(laddr_str, &inc->inc6_laddr);
	ip6_sprintf(faddr_str, &inc->inc6_faddr);
	} else {
	#endif
	/* IPv4. */
	inet_ntoa_r(inc->inc_laddr, laddr_str);
	inet_ntoa_r(inc->inc_faddr, faddr_str);
	#ifdef INET6
	}
	#endif
	db_print_indent(indent);
	db_printf("inc_laddr %s inc_lport %u\n", laddr_str,
	ntohs(inc->inc_lport));
	db_print_indent(indent);
	db_printf("inc_faddr %s inc_fport %u\n", faddr_str,
	ntohs(inc->inc_fport));
	}

	static void
	db_print_inpflags(int inp_flags)
	{
	int comma;

	comma = 0;
	if (inp_flags & INP_RECVOPTS) {
	db_printf("%sINP_RECVOPTS", comma ? ", " : "");
	comma = 1;
	}
	if (inp_flags & INP_RECVRETOPTS) {
	db_printf("%sINP_RECVRETOPTS", comma ? ", " : "");
	comma = 1;
	}
	if (inp_flags & INP_RECVDSTADDR) {
	db_printf("%sINP_RECVDSTADDR", comma ? ", " : "");
	comma = 1;
	}
	if (inp_flags & INP_HDRINCL) {
	db_printf("%sINP_HDRINCL", comma ? ", " : "");
	comma = 1;
	}
	if (inp_flags & INP_HIGHPORT) {
	db_printf("%sINP_HIGHPORT", comma ? ", " : "");
	comma = 1;
	}
	if (inp_flags & INP_LOWPORT) {
	db_printf("%sINP_LOWPORT", comma ? ", " : "");
	comma = 1;
	}
	if (inp_flags & INP_ANONPORT) {
	db_printf("%sINP_ANONPORT", comma ? ", " : "");
	comma = 1;
	}
	if (inp_flags & INP_RECVIF) {
	db_printf("%sINP_RECVIF", comma ? ", " : "");
	comma = 1;
	}
	if (inp_flags & INP_MTUDISC) {
	db_printf("%sINP_MTUDISC", comma ? ", " : "");
	comma = 1;
	}
	if (inp_flags & INP_FAITH) {
	db_printf("%sINP_FAITH", comma ? ", " : "");
	comma = 1;
	}
	if (inp_flags & INP_RECVTTL) {
	db_printf("%sINP_RECVTTL", comma ? ", " : "");
	comma = 1;
	}
	if (inp_flags & INP_DONTFRAG) {
	db_printf("%sINP_DONTFRAG", comma ? ", " : "");
	comma = 1;
	}
	if (inp_flags & IN6P_IPV6_V6ONLY) {
	db_printf("%sIN6P_IPV6_V6ONLY", comma ? ", " : "");
	comma = 1;
	}
	if (inp_flags & IN6P_PKTINFO) {
	db_printf("%sIN6P_PKTINFO", comma ? ", " : "");
	comma = 1;
	}
	if (inp_flags & IN6P_HOPLIMIT) {
	db_printf("%sIN6P_HOPLIMIT", comma ? ", " : "");
	comma = 1;
	}
	if (inp_flags & IN6P_HOPOPTS) {
	db_printf("%sIN6P_HOPOPTS", comma ? ", " : "");
	comma = 1;
	}
	if (inp_flags & IN6P_DSTOPTS) {
	db_printf("%sIN6P_DSTOPTS", comma ? ", " : "");
	comma = 1;
	}
	if (inp_flags & IN6P_RTHDR) {
	db_printf("%sIN6P_RTHDR", comma ? ", " : "");
	comma = 1;
	}
	if (inp_flags & IN6P_RTHDRDSTOPTS) {
	db_printf("%sIN6P_RTHDRDSTOPTS", comma ? ", " : "");
	comma = 1;
	}
	if (inp_flags & IN6P_TCLASS) {
	db_printf("%sIN6P_TCLASS", comma ? ", " : "");
	comma = 1;
	}
	if (inp_flags & IN6P_AUTOFLOWLABEL) {
	db_printf("%sIN6P_AUTOFLOWLABEL", comma ? ", " : "");
	comma = 1;
	}
	if (inp_flags & INP_TIMEWAIT) {
	db_printf("%sINP_TIMEWAIT", comma ? ", " : "");
	comma = 1;
	}
	if (inp_flags & INP_ONESBCAST) {
	db_printf("%sINP_ONESBCAST", comma ? ", " : "");
	comma = 1;
	}
	if (inp_flags & INP_DROPPED) {
	db_printf("%sINP_DROPPED", comma ? ", " : "");
	comma = 1;
	}
	if (inp_flags & INP_SOCKREF) {
	db_printf("%sINP_SOCKREF", comma ? ", " : "");
	comma = 1;
	}
	if (inp_flags & IN6P_RFC2292) {
	db_printf("%sIN6P_RFC2292", comma ? ", " : "");
	comma = 1;
	}
	if (inp_flags & IN6P_MTU) {
	db_printf("IN6P_MTU%s", comma ? ", " : "");
	comma = 1;
	}
	}

	static void
	db_print_inpvflag(u_char inp_vflag)
	{
	int comma;

	comma = 0;
	if (inp_vflag & INP_IPV4) {
	db_printf("%sINP_IPV4", comma ? ", " : "");
	comma = 1;
	}
	if (inp_vflag & INP_IPV6) {
	db_printf("%sINP_IPV6", comma ? ", " : "");
	comma = 1;
	}
	if (inp_vflag & INP_IPV6PROTO) {
	db_printf("%sINP_IPV6PROTO", comma ? ", " : "");
	comma = 1;
	}
	}

	static void
	db_print_inpcb(struct inpcb inp, const char name, int indent)
	{

	db_print_indent(indent);
	db_printf("%s at %p\n", name, inp);

	indent += 2;

	db_print_indent(indent);
	db_printf("inp_flow: 0x%x\n", inp->inp_flow);

	db_print_inconninfo(&inp->inp_inc, "inp_conninfo", indent);

	db_print_indent(indent);
	db_printf("inp_ppcb: %p inp_pcbinfo: %p inp_socket: %p\n",
	inp->inp_ppcb, inp->inp_pcbinfo, inp->inp_socket);

	db_print_indent(indent);
	db_printf("inp_label: %p inp_flags: 0x%x (",
	inp->inp_label, inp->inp_flags);
	db_print_inpflags(inp->inp_flags);
	db_printf(")\n");

	db_print_indent(indent);
	db_printf("inp_sp: %p inp_vflag: 0x%x (", inp->inp_sp,
	inp->inp_vflag);
	db_print_inpvflag(inp->inp_vflag);
	db_printf(")\n");

	db_print_indent(indent);
	db_printf("inp_ip_ttl: %d inp_ip_p: %d inp_ip_minttl: %d\n",
	inp->inp_ip_ttl, inp->inp_ip_p, inp->inp_ip_minttl);

	db_print_indent(indent);
	#ifdef INET6
	if (inp->inp_vflag & INP_IPV6) {
	db_printf("in6p_options: %p in6p_outputopts: %p "
	"in6p_moptions: %p\n", inp->in6p_options,
	inp->in6p_outputopts, inp->in6p_moptions);
	db_printf("in6p_icmp6filt: %p in6p_cksum %d "
	"in6p_hops %u\n", inp->in6p_icmp6filt, inp->in6p_cksum,
	inp->in6p_hops);
	} else
	#endif
	{
	db_printf("inp_ip_tos: %d inp_ip_options: %p "
	"inp_ip_moptions: %p\n", inp->inp_ip_tos,
	inp->inp_options, inp->inp_moptions);
	}

	db_print_indent(indent);
	db_printf("inp_phd: %p inp_gencnt: %ju\n", inp->inp_phd,
	(uintmax_t)inp->inp_gencnt);
	}

	DB_SHOW_COMMAND(inpcb, db_show_inpcb)
	{
	struct inpcb *inp;

	if (!have_addr) {
	db_printf("usage: show inpcb <addr>\n");
	return;
	}
	inp = (struct inpcb *)addr;

	db_print_inpcb(inp, "inpcb", 0);
	}
	#endif
	Index: stable/8/sys/netinet/ip_options.c
	===================================================================
	--- stable/8/sys/netinet/ip_options.c (revision 209276)
	+++ stable/8/sys/netinet/ip_options.c (revision 209277)
	@@ -1,745 +1,745 @@
	/*
	* Copyright (c) 1982, 1986, 1988, 1993
	* The Regents of the University of California.
	* Copyright (c) 2005 Andre Oppermann, Internet Business Solutions AG.
	* All rights reserved.
	*
	* Redistribution and use in source and binary forms, with or without
	* modification, are permitted provided that the following conditions
	* are met:
	* 1. Redistributions of source code must retain the above copyright
	* notice, this list of conditions and the following disclaimer.
	* 2. Redistributions in binary form must reproduce the above copyright
	* notice, this list of conditions and the following disclaimer in the
	* documentation and/or other materials provided with the distribution.
	* 4. Neither the name of the University nor the names of its contributors
	* may be used to endorse or promote products derived from this software
	* without specific prior written permission.
	*
	* THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
	* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
	* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
	* ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
	* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
	* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
	* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
	* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
	* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
	* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
	* SUCH DAMAGE.
	*/

	#include <sys/cdefs.h>
	__FBSDID("$FreeBSD$");

	#include "opt_ipstealth.h"

	#include <sys/param.h>
	#include <sys/systm.h>
	#include <sys/mbuf.h>
	#include <sys/domain.h>
	#include <sys/protosw.h>
	#include <sys/socket.h>
	#include <sys/time.h>
	#include <sys/kernel.h>
	#include <sys/syslog.h>
	#include <sys/sysctl.h>

	#include <net/if.h>
	#include <net/if_types.h>
	#include <net/if_var.h>
	#include <net/if_dl.h>
	#include <net/route.h>
	#include <net/netisr.h>
	#include <net/vnet.h>

	#include <netinet/in.h>
	#include <netinet/in_systm.h>
	#include <netinet/in_var.h>
	#include <netinet/ip.h>
	#include <netinet/in_pcb.h>
	#include <netinet/ip_var.h>
	#include <netinet/ip_options.h>
	#include <netinet/ip_icmp.h>
	#include <machine/in_cksum.h>

	#include <sys/socketvar.h>

	#include <security/mac/mac_framework.h>

	static int ip_dosourceroute = 0;
	SYSCTL_INT(_net_inet_ip, IPCTL_SOURCEROUTE, sourceroute, CTLFLAG_RW,
	&ip_dosourceroute, 0, "Enable forwarding source routed IP packets");

	static int ip_acceptsourceroute = 0;
	SYSCTL_INT(_net_inet_ip, IPCTL_ACCEPTSOURCEROUTE, accept_sourceroute,
	CTLFLAG_RW, &ip_acceptsourceroute, 0,
	"Enable accepting source routed IP packets");

	int ip_doopts = 1; /* 0 = ignore, 1 = process, 2 = reject */
	SYSCTL_INT(_net_inet_ip, OID_AUTO, process_options, CTLFLAG_RW,
	&ip_doopts, 0, "Enable IP options processing ([LS]SRR, RR, TS)");

	static void save_rte(struct mbuf m, u_char , struct in_addr);

	/*
	* Do option processing on a datagram, possibly discarding it if bad options
	* are encountered, or forwarding it if source-routed.
	*
	* The pass argument is used when operating in the IPSTEALTH mode to tell
	* what options to process: [LS]SRR (pass 0) or the others (pass 1). The
	* reason for as many as two passes is that when doing IPSTEALTH, non-routing
	* options should be processed only if the packet is for us.
	*
	* Returns 1 if packet has been forwarded/freed, 0 if the packet should be
	* processed further.
	*/
	int
	ip_dooptions(struct mbuf *m, int pass)
	{
	struct ip ip = mtod(m, struct ip );
	u_char *cp;
	struct in_ifaddr *ia;
	int opt, optlen, cnt, off, code, type = ICMP_PARAMPROB, forward = 0;
	struct in_addr *sin, dst;
	uint32_t ntime;
	struct sockaddr_in ipaddr = { sizeof(ipaddr), AF_INET };

	/* Ignore or reject packets with IP options. */
	if (ip_doopts == 0)
	return 0;
	else if (ip_doopts == 2) {
	type = ICMP_UNREACH;
	code = ICMP_UNREACH_FILTER_PROHIB;
	goto bad;
	}

	dst = ip->ip_dst;
	cp = (u_char *)(ip + 1);
	cnt = (ip->ip_hl << 2) - sizeof (struct ip);
	for (; cnt > 0; cnt -= optlen, cp += optlen) {
	opt = cp[IPOPT_OPTVAL];
	if (opt == IPOPT_EOL)
	break;
	if (opt == IPOPT_NOP)
	optlen = 1;
	else {
	if (cnt < IPOPT_OLEN + sizeof(*cp)) {
	code = &cp[IPOPT_OLEN] - (u_char *)ip;
	goto bad;
	}
	optlen = cp[IPOPT_OLEN];
	if (optlen < IPOPT_OLEN + sizeof(*cp) \|\| optlen > cnt) {
	code = &cp[IPOPT_OLEN] - (u_char *)ip;
	goto bad;
	}
	}
	switch (opt) {

	default:
	break;

	/*
	* Source routing with record. Find interface with current
	* destination address. If none on this machine then drop if
	* strictly routed, or do nothing if loosely routed. Record
	* interface address and bring up next address component. If
	* strictly routed make sure next address is on directly
	* accessible net.
	*/
	case IPOPT_LSRR:
	case IPOPT_SSRR:
	#ifdef IPSTEALTH
	if (V_ipstealth && pass > 0)
	break;
	#endif
	if (optlen < IPOPT_OFFSET + sizeof(*cp)) {
	code = &cp[IPOPT_OLEN] - (u_char *)ip;
	goto bad;
	}
	if ((off = cp[IPOPT_OFFSET]) < IPOPT_MINOFF) {
	code = &cp[IPOPT_OFFSET] - (u_char *)ip;
	goto bad;
	}
	ipaddr.sin_addr = ip->ip_dst;
	if (ifa_ifwithaddr_check((struct sockaddr *)&ipaddr)
	== 0) {
	if (opt == IPOPT_SSRR) {
	type = ICMP_UNREACH;
	code = ICMP_UNREACH_SRCFAIL;
	goto bad;
	}
	if (!ip_dosourceroute)
	goto nosourcerouting;
	/*
	* Loose routing, and not at next destination
	* yet; nothing to do except forward.
	*/
	break;
	}
	off--; /* 0 origin */
	if (off > optlen - (int)sizeof(struct in_addr)) {
	/*
	* End of source route. Should be for us.
	*/
	if (!ip_acceptsourceroute)
	goto nosourcerouting;
	save_rte(m, cp, ip->ip_src);
	break;
	}
	#ifdef IPSTEALTH
	if (V_ipstealth)
	goto dropit;
	#endif
	if (!ip_dosourceroute) {
	if (V_ipforwarding) {
	char buf[16]; /* aaa.bbb.ccc.ddd\0 */
	/*
	* Acting as a router, so generate
	* ICMP
	*/
	nosourcerouting:
	strcpy(buf, inet_ntoa(ip->ip_dst));
	log(LOG_WARNING,
	"attempted source route from %s to %s\n",
	inet_ntoa(ip->ip_src), buf);
	type = ICMP_UNREACH;
	code = ICMP_UNREACH_SRCFAIL;
	goto bad;
	} else {
	/*
	* Not acting as a router, so
	* silently drop.
	*/
	#ifdef IPSTEALTH
	dropit:
	#endif
	IPSTAT_INC(ips_cantforward);
	m_freem(m);
	return (1);
	}
	}

	/*
	* locate outgoing interface
	*/
	(void)memcpy(&ipaddr.sin_addr, cp + off,
	sizeof(ipaddr.sin_addr));

	if (opt == IPOPT_SSRR) {
	#define INA struct in_ifaddr *
	#define SA struct sockaddr *
	if ((ia = (INA)ifa_ifwithdstaddr((SA)&ipaddr)) == NULL)
	- ia = (INA)ifa_ifwithnet((SA)&ipaddr);
	+ ia = (INA)ifa_ifwithnet((SA)&ipaddr, 0);
	} else
	/* XXX MRT 0 for routing */
	ia = ip_rtaddr(ipaddr.sin_addr, M_GETFIB(m));
	if (ia == NULL) {
	type = ICMP_UNREACH;
	code = ICMP_UNREACH_SRCFAIL;
	goto bad;
	}
	ip->ip_dst = ipaddr.sin_addr;
	(void)memcpy(cp + off, &(IA_SIN(ia)->sin_addr),
	sizeof(struct in_addr));
	ifa_free(&ia->ia_ifa);
	cp[IPOPT_OFFSET] += sizeof(struct in_addr);
	/*
	* Let ip_intr's mcast routing check handle mcast pkts
	*/
	forward = !IN_MULTICAST(ntohl(ip->ip_dst.s_addr));
	break;

	case IPOPT_RR:
	#ifdef IPSTEALTH
	if (V_ipstealth && pass == 0)
	break;
	#endif
	if (optlen < IPOPT_OFFSET + sizeof(*cp)) {
	code = &cp[IPOPT_OFFSET] - (u_char *)ip;
	goto bad;
	}
	if ((off = cp[IPOPT_OFFSET]) < IPOPT_MINOFF) {
	code = &cp[IPOPT_OFFSET] - (u_char *)ip;
	goto bad;
	}
	/*
	* If no space remains, ignore.
	*/
	off--; /* 0 origin */
	if (off > optlen - (int)sizeof(struct in_addr))
	break;
	(void)memcpy(&ipaddr.sin_addr, &ip->ip_dst,
	sizeof(ipaddr.sin_addr));
	/*
	* Locate outgoing interface; if we're the
	* destination, use the incoming interface (should be
	* same).
	*/
	if ((ia = (INA)ifa_ifwithaddr((SA)&ipaddr)) == NULL &&
	(ia = ip_rtaddr(ipaddr.sin_addr, M_GETFIB(m))) == NULL) {
	type = ICMP_UNREACH;
	code = ICMP_UNREACH_HOST;
	goto bad;
	}
	(void)memcpy(cp + off, &(IA_SIN(ia)->sin_addr),
	sizeof(struct in_addr));
	ifa_free(&ia->ia_ifa);
	cp[IPOPT_OFFSET] += sizeof(struct in_addr);
	break;

	case IPOPT_TS:
	#ifdef IPSTEALTH
	if (V_ipstealth && pass == 0)
	break;
	#endif
	code = cp - (u_char *)ip;
	if (optlen < 4 \|\| optlen > 40) {
	code = &cp[IPOPT_OLEN] - (u_char *)ip;
	goto bad;
	}
	if ((off = cp[IPOPT_OFFSET]) < 5) {
	code = &cp[IPOPT_OLEN] - (u_char *)ip;
	goto bad;
	}
	if (off > optlen - (int)sizeof(int32_t)) {
	cp[IPOPT_OFFSET + 1] += (1 << 4);
	if ((cp[IPOPT_OFFSET + 1] & 0xf0) == 0) {
	code = &cp[IPOPT_OFFSET] - (u_char *)ip;
	goto bad;
	}
	break;
	}
	off--; /* 0 origin */
	sin = (struct in_addr *)(cp + off);
	switch (cp[IPOPT_OFFSET + 1] & 0x0f) {

	case IPOPT_TS_TSONLY:
	break;

	case IPOPT_TS_TSANDADDR:
	if (off + sizeof(uint32_t) +
	sizeof(struct in_addr) > optlen) {
	code = &cp[IPOPT_OFFSET] - (u_char *)ip;
	goto bad;
	}
	ipaddr.sin_addr = dst;
	ia = (INA)ifaof_ifpforaddr((SA)&ipaddr,
	m->m_pkthdr.rcvif);
	if (ia == NULL)
	continue;
	(void)memcpy(sin, &IA_SIN(ia)->sin_addr,
	sizeof(struct in_addr));
	ifa_free(&ia->ia_ifa);
	cp[IPOPT_OFFSET] += sizeof(struct in_addr);
	off += sizeof(struct in_addr);
	break;

	case IPOPT_TS_PRESPEC:
	if (off + sizeof(uint32_t) +
	sizeof(struct in_addr) > optlen) {
	code = &cp[IPOPT_OFFSET] - (u_char *)ip;
	goto bad;
	}
	(void)memcpy(&ipaddr.sin_addr, sin,
	sizeof(struct in_addr));
	if (ifa_ifwithaddr((SA)&ipaddr) == NULL)
	continue;
	cp[IPOPT_OFFSET] += sizeof(struct in_addr);
	off += sizeof(struct in_addr);
	break;

	default:
	code = &cp[IPOPT_OFFSET + 1] - (u_char *)ip;
	goto bad;
	}
	ntime = iptime();
	(void)memcpy(cp + off, &ntime, sizeof(uint32_t));
	cp[IPOPT_OFFSET] += sizeof(uint32_t);
	}
	}
	if (forward && V_ipforwarding) {
	ip_forward(m, 1);
	return (1);
	}
	return (0);
	bad:
	icmp_error(m, type, code, 0, 0);
	IPSTAT_INC(ips_badoptions);
	return (1);
	}

	/*
	* Save incoming source route for use in replies, to be picked up later by
	* ip_srcroute if the receiver is interested.
	*/
	static void
	save_rte(struct mbuf m, u_char option, struct in_addr dst)
	{
	unsigned olen;
	struct ipopt_tag *opts;

	opts = (struct ipopt_tag *)m_tag_get(PACKET_TAG_IPOPTIONS,
	sizeof(struct ipopt_tag), M_NOWAIT);
	if (opts == NULL)
	return;

	olen = option[IPOPT_OLEN];
	if (olen > sizeof(opts->ip_srcrt) - (1 + sizeof(dst))) {
	m_tag_free((struct m_tag *)opts);
	return;
	}
	bcopy(option, opts->ip_srcrt.srcopt, olen);
	opts->ip_nhops = (olen - IPOPT_OFFSET - 1) / sizeof(struct in_addr);
	opts->ip_srcrt.dst = dst;
	m_tag_prepend(m, (struct m_tag *)opts);
	}

	/*
	* Retrieve incoming source route for use in replies, in the same form used
	* by setsockopt. The first hop is placed before the options, will be
	* removed later.
	*/
	struct mbuf *
	ip_srcroute(struct mbuf *m0)
	{
	struct in_addr p, q;
	struct mbuf *m;
	struct ipopt_tag *opts;

	opts = (struct ipopt_tag *)m_tag_find(m0, PACKET_TAG_IPOPTIONS, NULL);
	if (opts == NULL)
	return (NULL);

	if (opts->ip_nhops == 0)
	return (NULL);
	m = m_get(M_DONTWAIT, MT_DATA);
	if (m == NULL)
	return (NULL);

	#define OPTSIZ (sizeof(opts->ip_srcrt.nop) + sizeof(opts->ip_srcrt.srcopt))

	/* length is (nhops+1)sizeof(addr) + sizeof(nop + srcrt header) /
	m->m_len = opts->ip_nhops * sizeof(struct in_addr) +
	sizeof(struct in_addr) + OPTSIZ;

	/*
	* First, save first hop for return route.
	*/
	p = &(opts->ip_srcrt.route[opts->ip_nhops - 1]);
	(mtod(m, struct in_addr )) = *p--;

	/*
	* Copy option fields and padding (nop) to mbuf.
	*/
	opts->ip_srcrt.nop = IPOPT_NOP;
	opts->ip_srcrt.srcopt[IPOPT_OFFSET] = IPOPT_MINOFF;
	(void)memcpy(mtod(m, caddr_t) + sizeof(struct in_addr),
	&(opts->ip_srcrt.nop), OPTSIZ);
	q = (struct in_addr *)(mtod(m, caddr_t) +
	sizeof(struct in_addr) + OPTSIZ);
	#undef OPTSIZ
	/*
	* Record return path as an IP source route, reversing the path
	* (pointers are now aligned).
	*/
	while (p >= opts->ip_srcrt.route) {
	q++ = p--;
	}
	/*
	* Last hop goes to final destination.
	*/
	*q = opts->ip_srcrt.dst;
	m_tag_delete(m0, (struct m_tag *)opts);
	return (m);
	}

	/*
	* Strip out IP options, at higher level protocol in the kernel. Second
	* argument is buffer to which options will be moved, and return value is
	* their length.
	*
	* XXX should be deleted; last arg currently ignored.
	*/
	void
	ip_stripoptions(struct mbuf m, struct mbuf mopt)
	{
	int i;
	struct ip ip = mtod(m, struct ip );
	caddr_t opts;
	int olen;

	olen = (ip->ip_hl << 2) - sizeof (struct ip);
	opts = (caddr_t)(ip + 1);
	i = m->m_len - (sizeof (struct ip) + olen);
	bcopy(opts + olen, opts, (unsigned)i);
	m->m_len -= olen;
	if (m->m_flags & M_PKTHDR)
	m->m_pkthdr.len -= olen;
	ip->ip_v = IPVERSION;
	ip->ip_hl = sizeof(struct ip) >> 2;
	}

	/*
	* Insert IP options into preformed packet. Adjust IP destination as
	* required for IP source routing, as indicated by a non-zero in_addr at the
	* start of the options.
	*
	* XXX This routine assumes that the packet has no options in place.
	*/
	struct mbuf *
	ip_insertoptions(struct mbuf m, struct mbuf opt, int *phlen)
	{
	struct ipoption p = mtod(opt, struct ipoption );
	struct mbuf *n;
	struct ip ip = mtod(m, struct ip );
	unsigned optlen;

	optlen = opt->m_len - sizeof(p->ipopt_dst);
	if (optlen + ip->ip_len > IP_MAXPACKET) {
	*phlen = 0;
	return (m); /* XXX should fail */
	}
	if (p->ipopt_dst.s_addr)
	ip->ip_dst = p->ipopt_dst;
	if (m->m_flags & M_EXT \|\| m->m_data - optlen < m->m_pktdat) {
	MGETHDR(n, M_DONTWAIT, MT_DATA);
	if (n == NULL) {
	*phlen = 0;
	return (m);
	}
	M_MOVE_PKTHDR(n, m);
	n->m_pkthdr.rcvif = NULL;
	n->m_pkthdr.len += optlen;
	m->m_len -= sizeof(struct ip);
	m->m_data += sizeof(struct ip);
	n->m_next = m;
	m = n;
	m->m_len = optlen + sizeof(struct ip);
	m->m_data += max_linkhdr;
	bcopy(ip, mtod(m, void *), sizeof(struct ip));
	} else {
	m->m_data -= optlen;
	m->m_len += optlen;
	m->m_pkthdr.len += optlen;
	bcopy(ip, mtod(m, void *), sizeof(struct ip));
	}
	ip = mtod(m, struct ip *);
	bcopy(p->ipopt_list, ip + 1, optlen);
	*phlen = sizeof(struct ip) + optlen;
	ip->ip_v = IPVERSION;
	ip->ip_hl = *phlen >> 2;
	ip->ip_len += optlen;
	return (m);
	}

	/*
	* Copy options from ip to jp, omitting those not copied during
	* fragmentation.
	*/
	int
	ip_optcopy(struct ip ip, struct ip jp)
	{
	u_char cp, dp;
	int opt, optlen, cnt;

	cp = (u_char *)(ip + 1);
	dp = (u_char *)(jp + 1);
	cnt = (ip->ip_hl << 2) - sizeof (struct ip);
	for (; cnt > 0; cnt -= optlen, cp += optlen) {
	opt = cp[0];
	if (opt == IPOPT_EOL)
	break;
	if (opt == IPOPT_NOP) {
	/* Preserve for IP mcast tunnel's LSRR alignment. */
	*dp++ = IPOPT_NOP;
	optlen = 1;
	continue;
	}

	KASSERT(cnt >= IPOPT_OLEN + sizeof(*cp),
	("ip_optcopy: malformed ipv4 option"));
	optlen = cp[IPOPT_OLEN];
	KASSERT(optlen >= IPOPT_OLEN + sizeof(*cp) && optlen <= cnt,
	("ip_optcopy: malformed ipv4 option"));

	/* Bogus lengths should have been caught by ip_dooptions. */
	if (optlen > cnt)
	optlen = cnt;
	if (IPOPT_COPIED(opt)) {
	bcopy(cp, dp, optlen);
	dp += optlen;
	}
	}
	for (optlen = dp - (u_char *)(jp+1); optlen & 0x3; optlen++)
	*dp++ = IPOPT_EOL;
	return (optlen);
	}

	/*
	* Set up IP options in pcb for insertion in output packets. Store in mbuf
	* with pointer in pcbopt, adding pseudo-option with destination address if
	* source routed.
	*/
	int
	ip_pcbopts(struct inpcb inp, int optname, struct mbuf m)
	{
	int cnt, optlen;
	u_char *cp;
	struct mbuf **pcbopt;
	u_char opt;

	INP_WLOCK_ASSERT(inp);

	pcbopt = &inp->inp_options;

	/* turn off any old options */
	if (*pcbopt)
	(void)m_free(*pcbopt);
	*pcbopt = 0;
	if (m == NULL \|\| m->m_len == 0) {
	/*
	* Only turning off any previous options.
	*/
	if (m != NULL)
	(void)m_free(m);
	return (0);
	}

	if (m->m_len % sizeof(int32_t))
	goto bad;
	/*
	* IP first-hop destination address will be stored before actual
	* options; move other options back and clear it when none present.
	*/
	if (m->m_data + m->m_len + sizeof(struct in_addr) >= &m->m_dat[MLEN])
	goto bad;
	cnt = m->m_len;
	m->m_len += sizeof(struct in_addr);
	cp = mtod(m, u_char *) + sizeof(struct in_addr);
	bcopy(mtod(m, void *), cp, (unsigned)cnt);
	bzero(mtod(m, void *), sizeof(struct in_addr));

	for (; cnt > 0; cnt -= optlen, cp += optlen) {
	opt = cp[IPOPT_OPTVAL];
	if (opt == IPOPT_EOL)
	break;
	if (opt == IPOPT_NOP)
	optlen = 1;
	else {
	if (cnt < IPOPT_OLEN + sizeof(*cp))
	goto bad;
	optlen = cp[IPOPT_OLEN];
	if (optlen < IPOPT_OLEN + sizeof(*cp) \|\| optlen > cnt)
	goto bad;
	}
	switch (opt) {

	default:
	break;

	case IPOPT_LSRR:
	case IPOPT_SSRR:
	/*
	* User process specifies route as:
	*
	* ->A->B->C->D
	*
	* D must be our final destination (but we can't
	* check that since we may not have connected yet).
	* A is first hop destination, which doesn't appear
	* in actual IP option, but is stored before the
	* options.
	*/
	/* XXX-BZ PRIV_NETINET_SETHDROPTS? */
	if (optlen < IPOPT_MINOFF - 1 + sizeof(struct in_addr))
	goto bad;
	m->m_len -= sizeof(struct in_addr);
	cnt -= sizeof(struct in_addr);
	optlen -= sizeof(struct in_addr);
	cp[IPOPT_OLEN] = optlen;
	/*
	* Move first hop before start of options.
	*/
	bcopy((caddr_t)&cp[IPOPT_OFFSET+1], mtod(m, caddr_t),
	sizeof(struct in_addr));
	/*
	* Then copy rest of options back
	* to close up the deleted entry.
	*/
	bcopy((&cp[IPOPT_OFFSET+1] + sizeof(struct in_addr)),
	&cp[IPOPT_OFFSET+1],
	(unsigned)cnt - (IPOPT_MINOFF - 1));
	break;
	}
	}
	if (m->m_len > MAX_IPOPTLEN + sizeof(struct in_addr))
	goto bad;
	*pcbopt = m;
	return (0);

	bad:
	(void)m_free(m);
	return (EINVAL);
	}

	/*
	* Check for the presence of the IP Router Alert option [RFC2113]
	* in the header of an IPv4 datagram.
	*
	* This call is not intended for use from the forwarding path; it is here
	* so that protocol domains may check for the presence of the option.
	* Given how FreeBSD's IPv4 stack is currently structured, the Router Alert
	* option does not have much relevance to the implementation, though this
	* may change in future.
	* Router alert options SHOULD be passed if running in IPSTEALTH mode and
	* we are not the endpoint.
	* Length checks on individual options should already have been peformed
	* by ip_dooptions() therefore they are folded under INVARIANTS here.
	*
	* Return zero if not present or options are invalid, non-zero if present.
	*/
	int
	ip_checkrouteralert(struct mbuf *m)
	{
	struct ip ip = mtod(m, struct ip );
	u_char *cp;
	int opt, optlen, cnt, found_ra;

	found_ra = 0;
	cp = (u_char *)(ip + 1);
	cnt = (ip->ip_hl << 2) - sizeof (struct ip);
	for (; cnt > 0; cnt -= optlen, cp += optlen) {
	opt = cp[IPOPT_OPTVAL];
	if (opt == IPOPT_EOL)
	break;
	if (opt == IPOPT_NOP)
	optlen = 1;
	else {
	#ifdef INVARIANTS
	if (cnt < IPOPT_OLEN + sizeof(*cp))
	break;
	#endif
	optlen = cp[IPOPT_OLEN];
	#ifdef INVARIANTS
	if (optlen < IPOPT_OLEN + sizeof(*cp) \|\| optlen > cnt)
	break;
	#endif
	}
	switch (opt) {
	case IPOPT_RA:
	#ifdef INVARIANTS
	if (optlen != IPOPT_OFFSET + sizeof(uint16_t) \|\|
	(((uint16_t )&cp[IPOPT_OFFSET]) != 0))
	break;
	else
	#endif
	found_ra = 1;
	break;
	default:
	break;
	}
	}

	return (found_ra);
	}
	Index: stable/8/sys/netinet/ip_output.c
	===================================================================
	--- stable/8/sys/netinet/ip_output.c (revision 209276)
	+++ stable/8/sys/netinet/ip_output.c (revision 209277)
	@@ -1,1278 +1,1278 @@
	/*-
	* Copyright (c) 1982, 1986, 1988, 1990, 1993
	* The Regents of the University of California. All rights reserved.
	*
	* Redistribution and use in source and binary forms, with or without
	* modification, are permitted provided that the following conditions
	* are met:
	* 1. Redistributions of source code must retain the above copyright
	* notice, this list of conditions and the following disclaimer.
	* 2. Redistributions in binary form must reproduce the above copyright
	* notice, this list of conditions and the following disclaimer in the
	* documentation and/or other materials provided with the distribution.
	* 4. Neither the name of the University nor the names of its contributors
	* may be used to endorse or promote products derived from this software
	* without specific prior written permission.
	*
	* THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
	* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
	* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
	* ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
	* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
	* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
	* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
	* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
	* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
	* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
	* SUCH DAMAGE.
	*
	* @(#)ip_output.c 8.3 (Berkeley) 1/21/94
	*/

	#include <sys/cdefs.h>
	__FBSDID("$FreeBSD$");

	#include "opt_ipfw.h"
	#include "opt_ipsec.h"
	#include "opt_route.h"
	#include "opt_mbuf_stress_test.h"
	#include "opt_mpath.h"
	#include "opt_sctp.h"

	#include <sys/param.h>
	#include <sys/systm.h>
	#include <sys/kernel.h>
	#include <sys/malloc.h>
	#include <sys/mbuf.h>
	#include <sys/priv.h>
	#include <sys/proc.h>
	#include <sys/protosw.h>
	#include <sys/socket.h>
	#include <sys/socketvar.h>
	#include <sys/sysctl.h>
	#include <sys/ucred.h>

	#include <net/if.h>
	#include <net/if_llatbl.h>
	#include <net/netisr.h>
	#include <net/pfil.h>
	#include <net/route.h>
	#include <net/flowtable.h>
	#ifdef RADIX_MPATH
	#include <net/radix_mpath.h>
	#endif
	#include <net/vnet.h>

	#include <netinet/in.h>
	#include <netinet/in_systm.h>
	#include <netinet/ip.h>
	#include <netinet/in_pcb.h>
	#include <netinet/in_var.h>
	#include <netinet/ip_var.h>
	#include <netinet/ip_options.h>
	#ifdef SCTP
	#include <netinet/sctp.h>
	#include <netinet/sctp_crc32.h>
	#endif

	#ifdef IPSEC
	#include <netinet/ip_ipsec.h>
	#include <netipsec/ipsec.h>
	#endif /* IPSEC*/

	#include <machine/in_cksum.h>

	#include <security/mac/mac_framework.h>

	#define print_ip(x, a, y) printf("%s %d.%d.%d.%d%s",\
	x, (ntohl(a.s_addr)>>24)&0xFF,\
	(ntohl(a.s_addr)>>16)&0xFF,\
	(ntohl(a.s_addr)>>8)&0xFF,\
	(ntohl(a.s_addr))&0xFF, y);

	VNET_DEFINE(u_short, ip_id);

	#ifdef MBUF_STRESS_TEST
	int mbuf_frag_size = 0;
	SYSCTL_INT(_net_inet_ip, OID_AUTO, mbuf_frag_size, CTLFLAG_RW,
	&mbuf_frag_size, 0, "Fragment outgoing mbufs to this size");
	#endif

	static void ip_mloopback
	(struct ifnet , struct mbuf , struct sockaddr_in *, int);


	extern int in_mcast_loop;
	extern struct protosw inetsw[];

	/*
	* IP output. The packet in mbuf chain m contains a skeletal IP
	* header (with len, off, ttl, proto, tos, src, dst).
	* The mbuf chain containing the packet will be freed.
	* The mbuf opt, if present, will not be freed.
	* In the IP forwarding case, the packet will arrive with options already
	* inserted, so must have a NULL opt pointer.
	*/
	int
	ip_output(struct mbuf m, struct mbuf opt, struct route *ro, int flags,
	struct ip_moptions imo, struct inpcb inp)
	{
	struct ip *ip;
	struct ifnet ifp = NULL; / keep compiler happy */
	struct mbuf *m0;
	int hlen = sizeof (struct ip);
	int mtu;
	int len, error = 0;
	int nortfree = 0;
	struct sockaddr_in dst = NULL; / keep compiler happy */
	struct in_ifaddr *ia = NULL;
	int isbroadcast, sw_csum;
	struct route iproute;
	struct rtentry rte; / cache for ro->ro_rt */
	struct in_addr odst;
	#ifdef IPFIREWALL_FORWARD
	struct m_tag *fwd_tag = NULL;
	#endif
	#ifdef IPSEC
	int no_route_but_check_spd = 0;
	#endif
	M_ASSERTPKTHDR(m);

	if (inp != NULL) {
	INP_LOCK_ASSERT(inp);
	M_SETFIB(m, inp->inp_inc.inc_fibnum);
	if (inp->inp_flags & (INP_HW_FLOWID\|INP_SW_FLOWID)) {
	m->m_pkthdr.flowid = inp->inp_flowid;
	m->m_flags \|= M_FLOWID;
	}
	}

	if (ro == NULL) {
	ro = &iproute;
	bzero(ro, sizeof (*ro));

	#ifdef FLOWTABLE
	{
	struct flentry *fle;

	/*
	* The flow table returns route entries valid for up to 30
	* seconds; we rely on the remainder of ip_output() taking no
	* longer than that long for the stability of ro_rt. The
	* flow ID assignment must have happened before this point.
	*/
	if ((fle = flowtable_lookup_mbuf(V_ip_ft, m, AF_INET)) != NULL) {
	flow_to_route(fle, ro);
	nortfree = 1;
	}
	}
	#endif
	}

	if (opt) {
	len = 0;
	m = ip_insertoptions(m, opt, &len);
	if (len != 0)
	hlen = len;
	}
	ip = mtod(m, struct ip *);

	/*
	* Fill in IP header. If we are not allowing fragmentation,
	* then the ip_id field is meaningless, but we don't set it
	* to zero. Doing so causes various problems when devices along
	* the path (routers, load balancers, firewalls, etc.) illegally
	* disable DF on our packet. Note that a 16-bit counter
	* will wrap around in less than 10 seconds at 100 Mbit/s on a
	* medium with MTU 1500. See Steven M. Bellovin, "A Technique
	* for Counting NATted Hosts", Proc. IMW'02, available at
	* <http://www.cs.columbia.edu/~smb/papers/fnat.pdf>.
	*/
	if ((flags & (IP_FORWARDING\|IP_RAWOUTPUT)) == 0) {
	ip->ip_v = IPVERSION;
	ip->ip_hl = hlen >> 2;
	ip->ip_id = ip_newid();
	IPSTAT_INC(ips_localout);
	} else {
	hlen = ip->ip_hl << 2;
	}

	dst = (struct sockaddr_in *)&ro->ro_dst;
	again:
	/*
	* If there is a cached route,
	* check that it is to the same destination
	* and is still up. If not, free it and try again.
	* The address family should also be checked in case of sharing the
	* cache with IPv6.
	*/
	rte = ro->ro_rt;
	if (rte && ((rte->rt_flags & RTF_UP) == 0 \|\|
	rte->rt_ifp == NULL \|\|
	!RT_LINK_IS_UP(rte->rt_ifp) \|\|
	dst->sin_family != AF_INET \|\|
	dst->sin_addr.s_addr != ip->ip_dst.s_addr)) {
	if (!nortfree)
	RTFREE(rte);
	rte = ro->ro_rt = (struct rtentry *)NULL;
	ro->ro_lle = (struct llentry *)NULL;
	}
	#ifdef IPFIREWALL_FORWARD
	if (rte == NULL && fwd_tag == NULL) {
	#else
	if (rte == NULL) {
	#endif
	bzero(dst, sizeof(*dst));
	dst->sin_family = AF_INET;
	dst->sin_len = sizeof(*dst);
	dst->sin_addr = ip->ip_dst;
	}
	/*
	* If routing to interface only, short circuit routing lookup.
	* The use of an all-ones broadcast address implies this; an
	* interface is specified by the broadcast address of an interface,
	* or the destination address of a ptp interface.
	*/
	if (flags & IP_SENDONES) {
	if ((ia = ifatoia(ifa_ifwithbroadaddr(sintosa(dst)))) == NULL &&
	(ia = ifatoia(ifa_ifwithdstaddr(sintosa(dst)))) == NULL) {
	IPSTAT_INC(ips_noroute);
	error = ENETUNREACH;
	goto bad;
	}
	ip->ip_dst.s_addr = INADDR_BROADCAST;
	dst->sin_addr = ip->ip_dst;
	ifp = ia->ia_ifp;
	ip->ip_ttl = 1;
	isbroadcast = 1;
	} else if (flags & IP_ROUTETOIF) {
	if ((ia = ifatoia(ifa_ifwithdstaddr(sintosa(dst)))) == NULL &&
	- (ia = ifatoia(ifa_ifwithnet(sintosa(dst)))) == NULL) {
	+ (ia = ifatoia(ifa_ifwithnet(sintosa(dst), 0))) == NULL) {
	IPSTAT_INC(ips_noroute);
	error = ENETUNREACH;
	goto bad;
	}
	ifp = ia->ia_ifp;
	ip->ip_ttl = 1;
	isbroadcast = in_broadcast(dst->sin_addr, ifp);
	} else if (IN_MULTICAST(ntohl(ip->ip_dst.s_addr)) &&
	imo != NULL && imo->imo_multicast_ifp != NULL) {
	/*
	* Bypass the normal routing lookup for multicast
	* packets if the interface is specified.
	*/
	ifp = imo->imo_multicast_ifp;
	IFP_TO_IA(ifp, ia);
	isbroadcast = 0; /* fool gcc */
	} else {
	/*
	* We want to do any cloning requested by the link layer,
	* as this is probably required in all cases for correct
	* operation (as it is for ARP).
	*/
	if (rte == NULL) {
	#ifdef RADIX_MPATH
	rtalloc_mpath_fib(ro,
	ntohl(ip->ip_src.s_addr ^ ip->ip_dst.s_addr),
	inp ? inp->inp_inc.inc_fibnum : M_GETFIB(m));
	#else
	in_rtalloc_ign(ro, 0,
	inp ? inp->inp_inc.inc_fibnum : M_GETFIB(m));
	#endif
	rte = ro->ro_rt;
	}
	if (rte == NULL \|\|
	rte->rt_ifp == NULL \|\|
	!RT_LINK_IS_UP(rte->rt_ifp)) {
	#ifdef IPSEC
	/*
	* There is no route for this packet, but it is
	* possible that a matching SPD entry exists.
	*/
	no_route_but_check_spd = 1;
	mtu = 0; /* Silence GCC warning. */
	goto sendit;
	#endif
	IPSTAT_INC(ips_noroute);
	error = EHOSTUNREACH;
	goto bad;
	}
	ia = ifatoia(rte->rt_ifa);
	ifa_ref(&ia->ia_ifa);
	ifp = rte->rt_ifp;
	rte->rt_rmx.rmx_pksent++;
	if (rte->rt_flags & RTF_GATEWAY)
	dst = (struct sockaddr_in *)rte->rt_gateway;
	if (rte->rt_flags & RTF_HOST)
	isbroadcast = (rte->rt_flags & RTF_BROADCAST);
	else
	isbroadcast = in_broadcast(dst->sin_addr, ifp);
	}
	/*
	* Calculate MTU. If we have a route that is up, use that,
	* otherwise use the interface's MTU.
	*/
	if (rte != NULL && (rte->rt_flags & (RTF_UP\|RTF_HOST))) {
	/*
	* This case can happen if the user changed the MTU
	* of an interface after enabling IP on it. Because
	* most netifs don't keep track of routes pointing to
	* them, there is no way for one to update all its
	* routes when the MTU is changed.
	*/
	if (rte->rt_rmx.rmx_mtu > ifp->if_mtu)
	rte->rt_rmx.rmx_mtu = ifp->if_mtu;
	mtu = rte->rt_rmx.rmx_mtu;
	} else {
	mtu = ifp->if_mtu;
	}
	if (IN_MULTICAST(ntohl(ip->ip_dst.s_addr))) {
	m->m_flags \|= M_MCAST;
	/*
	* IP destination address is multicast. Make sure "dst"
	* still points to the address in "ro". (It may have been
	* changed to point to a gateway address, above.)
	*/
	dst = (struct sockaddr_in *)&ro->ro_dst;
	/*
	* See if the caller provided any multicast options
	*/
	if (imo != NULL) {
	ip->ip_ttl = imo->imo_multicast_ttl;
	if (imo->imo_multicast_vif != -1)
	ip->ip_src.s_addr =
	ip_mcast_src ?
	ip_mcast_src(imo->imo_multicast_vif) :
	INADDR_ANY;
	} else
	ip->ip_ttl = IP_DEFAULT_MULTICAST_TTL;
	/*
	* Confirm that the outgoing interface supports multicast.
	*/
	if ((imo == NULL) \|\| (imo->imo_multicast_vif == -1)) {
	if ((ifp->if_flags & IFF_MULTICAST) == 0) {
	IPSTAT_INC(ips_noroute);
	error = ENETUNREACH;
	goto bad;
	}
	}
	/*
	* If source address not specified yet, use address
	* of outgoing interface.
	*/
	if (ip->ip_src.s_addr == INADDR_ANY) {
	/* Interface may have no addresses. */
	if (ia != NULL)
	ip->ip_src = IA_SIN(ia)->sin_addr;
	}

	if ((imo == NULL && in_mcast_loop) \|\|
	(imo && imo->imo_multicast_loop)) {
	/*
	* Loop back multicast datagram if not expressly
	* forbidden to do so, even if we are not a member
	* of the group; ip_input() will filter it later,
	* thus deferring a hash lookup and mutex acquisition
	* at the expense of a cheap copy using m_copym().
	*/
	ip_mloopback(ifp, m, dst, hlen);
	} else {
	/*
	* If we are acting as a multicast router, perform
	* multicast forwarding as if the packet had just
	* arrived on the interface to which we are about
	* to send. The multicast forwarding function
	* recursively calls this function, using the
	* IP_FORWARDING flag to prevent infinite recursion.
	*
	* Multicasts that are looped back by ip_mloopback(),
	* above, will be forwarded by the ip_input() routine,
	* if necessary.
	*/
	if (V_ip_mrouter && (flags & IP_FORWARDING) == 0) {
	/*
	* If rsvp daemon is not running, do not
	* set ip_moptions. This ensures that the packet
	* is multicast and not just sent down one link
	* as prescribed by rsvpd.
	*/
	if (!V_rsvp_on)
	imo = NULL;
	if (ip_mforward &&
	ip_mforward(ip, ifp, m, imo) != 0) {
	m_freem(m);
	goto done;
	}
	}
	}

	/*
	* Multicasts with a time-to-live of zero may be looped-
	* back, above, but must not be transmitted on a network.
	* Also, multicasts addressed to the loopback interface
	* are not sent -- the above call to ip_mloopback() will
	* loop back a copy. ip_input() will drop the copy if
	* this host does not belong to the destination group on
	* the loopback interface.
	*/
	if (ip->ip_ttl == 0 \|\| ifp->if_flags & IFF_LOOPBACK) {
	m_freem(m);
	goto done;
	}

	goto sendit;
	}

	/*
	* If the source address is not specified yet, use the address
	* of the outoing interface.
	*/
	if (ip->ip_src.s_addr == INADDR_ANY) {
	/* Interface may have no addresses. */
	if (ia != NULL) {
	ip->ip_src = IA_SIN(ia)->sin_addr;
	}
	}

	/*
	* Verify that we have any chance at all of being able to queue the
	* packet or packet fragments, unless ALTQ is enabled on the given
	* interface in which case packetdrop should be done by queueing.
	*/
	#ifdef ALTQ
	if ((!ALTQ_IS_ENABLED(&ifp->if_snd)) &&
	((ifp->if_snd.ifq_len + ip->ip_len / mtu + 1) >=
	ifp->if_snd.ifq_maxlen))
	#else
	if ((ifp->if_snd.ifq_len + ip->ip_len / mtu + 1) >=
	ifp->if_snd.ifq_maxlen)
	#endif /* ALTQ */
	{
	error = ENOBUFS;
	IPSTAT_INC(ips_odropped);
	ifp->if_snd.ifq_drops += (ip->ip_len / ifp->if_mtu + 1);
	goto bad;
	}

	/*
	* Look for broadcast address and
	* verify user is allowed to send
	* such a packet.
	*/
	if (isbroadcast) {
	if ((ifp->if_flags & IFF_BROADCAST) == 0) {
	error = EADDRNOTAVAIL;
	goto bad;
	}
	if ((flags & IP_ALLOWBROADCAST) == 0) {
	error = EACCES;
	goto bad;
	}
	/* don't allow broadcast messages to be fragmented */
	if (ip->ip_len > mtu) {
	error = EMSGSIZE;
	goto bad;
	}
	m->m_flags \|= M_BCAST;
	} else {
	m->m_flags &= ~M_BCAST;
	}

	sendit:
	#ifdef IPSEC
	switch(ip_ipsec_output(&m, inp, &flags, &error, &ifp)) {
	case 1:
	goto bad;
	case -1:
	goto done;
	case 0:
	default:
	break; /* Continue with packet processing. */
	}
	/*
	* Check if there was a route for this packet; return error if not.
	*/
	if (no_route_but_check_spd) {
	IPSTAT_INC(ips_noroute);
	error = EHOSTUNREACH;
	goto bad;
	}
	/* Update variables that are affected by ipsec4_output(). */
	ip = mtod(m, struct ip *);
	hlen = ip->ip_hl << 2;
	#endif /* IPSEC */

	/* Jump over all PFIL processing if hooks are not active. */
	if (!PFIL_HOOKED(&V_inet_pfil_hook))
	goto passout;

	/* Run through list of hooks for output packets. */
	odst.s_addr = ip->ip_dst.s_addr;
	error = pfil_run_hooks(&V_inet_pfil_hook, &m, ifp, PFIL_OUT, inp);
	if (error != 0 \|\| m == NULL)
	goto done;

	ip = mtod(m, struct ip *);

	/* See if destination IP address was changed by packet filter. */
	if (odst.s_addr != ip->ip_dst.s_addr) {
	m->m_flags \|= M_SKIP_FIREWALL;
	/* If destination is now ourself drop to ip_input(). */
	if (in_localip(ip->ip_dst)) {
	m->m_flags \|= M_FASTFWD_OURS;
	if (m->m_pkthdr.rcvif == NULL)
	m->m_pkthdr.rcvif = V_loif;
	if (m->m_pkthdr.csum_flags & CSUM_DELAY_DATA) {
	m->m_pkthdr.csum_flags \|=
	CSUM_DATA_VALID \| CSUM_PSEUDO_HDR;
	m->m_pkthdr.csum_data = 0xffff;
	}
	m->m_pkthdr.csum_flags \|=
	CSUM_IP_CHECKED \| CSUM_IP_VALID;
	#ifdef SCTP
	if (m->m_pkthdr.csum_flags & CSUM_SCTP)
	m->m_pkthdr.csum_flags \|= CSUM_SCTP_VALID;
	#endif
	error = netisr_queue(NETISR_IP, m);
	goto done;
	} else
	goto again; /* Redo the routing table lookup. */
	}

	#ifdef IPFIREWALL_FORWARD
	/* See if local, if yes, send it to netisr with IP_FASTFWD_OURS. */
	if (m->m_flags & M_FASTFWD_OURS) {
	if (m->m_pkthdr.rcvif == NULL)
	m->m_pkthdr.rcvif = V_loif;
	if (m->m_pkthdr.csum_flags & CSUM_DELAY_DATA) {
	m->m_pkthdr.csum_flags \|=
	CSUM_DATA_VALID \| CSUM_PSEUDO_HDR;
	m->m_pkthdr.csum_data = 0xffff;
	}
	#ifdef SCTP
	if (m->m_pkthdr.csum_flags & CSUM_SCTP)
	m->m_pkthdr.csum_flags \|= CSUM_SCTP_VALID;
	#endif
	m->m_pkthdr.csum_flags \|=
	CSUM_IP_CHECKED \| CSUM_IP_VALID;

	error = netisr_queue(NETISR_IP, m);
	goto done;
	}
	/* Or forward to some other address? */
	fwd_tag = m_tag_find(m, PACKET_TAG_IPFORWARD, NULL);
	if (fwd_tag) {
	dst = (struct sockaddr_in *)&ro->ro_dst;
	bcopy((fwd_tag+1), dst, sizeof(struct sockaddr_in));
	m->m_flags \|= M_SKIP_FIREWALL;
	m_tag_delete(m, fwd_tag);
	goto again;
	}
	#endif /* IPFIREWALL_FORWARD */

	passout:
	/* 127/8 must not appear on wire - RFC1122. */
	if ((ntohl(ip->ip_dst.s_addr) >> IN_CLASSA_NSHIFT) == IN_LOOPBACKNET \|\|
	(ntohl(ip->ip_src.s_addr) >> IN_CLASSA_NSHIFT) == IN_LOOPBACKNET) {
	if ((ifp->if_flags & IFF_LOOPBACK) == 0) {
	IPSTAT_INC(ips_badaddr);
	error = EADDRNOTAVAIL;
	goto bad;
	}
	}

	m->m_pkthdr.csum_flags \|= CSUM_IP;
	sw_csum = m->m_pkthdr.csum_flags & ~ifp->if_hwassist;
	if (sw_csum & CSUM_DELAY_DATA) {
	in_delayed_cksum(m);
	sw_csum &= ~CSUM_DELAY_DATA;
	}
	#ifdef SCTP
	if (sw_csum & CSUM_SCTP) {
	sctp_delayed_cksum(m, (uint32_t)(ip->ip_hl << 2));
	sw_csum &= ~CSUM_SCTP;
	}
	#endif
	m->m_pkthdr.csum_flags &= ifp->if_hwassist;

	/*
	* If small enough for interface, or the interface will take
	* care of the fragmentation for us, we can just send directly.
	*/
	if (ip->ip_len <= mtu \|\|
	(m->m_pkthdr.csum_flags & ifp->if_hwassist & CSUM_TSO) != 0 \|\|
	((ip->ip_off & IP_DF) == 0 && (ifp->if_hwassist & CSUM_FRAGMENT))) {
	ip->ip_len = htons(ip->ip_len);
	ip->ip_off = htons(ip->ip_off);
	ip->ip_sum = 0;
	if (sw_csum & CSUM_DELAY_IP)
	ip->ip_sum = in_cksum(m, hlen);

	/*
	* Record statistics for this interface address.
	* With CSUM_TSO the byte/packet count will be slightly
	* incorrect because we count the IP+TCP headers only
	* once instead of for every generated packet.
	*/
	if (!(flags & IP_FORWARDING) && ia) {
	if (m->m_pkthdr.csum_flags & CSUM_TSO)
	ia->ia_ifa.if_opackets +=
	m->m_pkthdr.len / m->m_pkthdr.tso_segsz;
	else
	ia->ia_ifa.if_opackets++;
	ia->ia_ifa.if_obytes += m->m_pkthdr.len;
	}
	#ifdef MBUF_STRESS_TEST
	if (mbuf_frag_size && m->m_pkthdr.len > mbuf_frag_size)
	m = m_fragment(m, M_DONTWAIT, mbuf_frag_size);
	#endif
	/*
	* Reset layer specific mbuf flags
	* to avoid confusing lower layers.
	*/
	m->m_flags &= ~(M_PROTOFLAGS);
	error = (*ifp->if_output)(ifp, m,
	(struct sockaddr *)dst, ro);
	goto done;
	}

	/* Balk when DF bit is set or the interface didn't support TSO. */
	if ((ip->ip_off & IP_DF) \|\| (m->m_pkthdr.csum_flags & CSUM_TSO)) {
	error = EMSGSIZE;
	IPSTAT_INC(ips_cantfrag);
	goto bad;
	}

	/*
	* Too large for interface; fragment if possible. If successful,
	* on return, m will point to a list of packets to be sent.
	*/
	error = ip_fragment(ip, &m, mtu, ifp->if_hwassist, sw_csum);
	if (error)
	goto bad;
	for (; m; m = m0) {
	m0 = m->m_nextpkt;
	m->m_nextpkt = 0;
	if (error == 0) {
	/* Record statistics for this interface address. */
	if (ia != NULL) {
	ia->ia_ifa.if_opackets++;
	ia->ia_ifa.if_obytes += m->m_pkthdr.len;
	}
	/*
	* Reset layer specific mbuf flags
	* to avoid confusing upper layers.
	*/
	m->m_flags &= ~(M_PROTOFLAGS);

	error = (*ifp->if_output)(ifp, m,
	(struct sockaddr *)dst, ro);
	} else
	m_freem(m);
	}

	if (error == 0)
	IPSTAT_INC(ips_fragmented);

	done:
	if (ro == &iproute && ro->ro_rt && !nortfree) {
	RTFREE(ro->ro_rt);
	}
	if (ia != NULL)
	ifa_free(&ia->ia_ifa);
	return (error);
	bad:
	m_freem(m);
	goto done;
	}

	/*
	* Create a chain of fragments which fit the given mtu. m_frag points to the
	* mbuf to be fragmented; on return it points to the chain with the fragments.
	* Return 0 if no error. If error, m_frag may contain a partially built
	* chain of fragments that should be freed by the caller.
	*
	* if_hwassist_flags is the hw offload capabilities (see if_data.ifi_hwassist)
	* sw_csum contains the delayed checksums flags (e.g., CSUM_DELAY_IP).
	*/
	int
	ip_fragment(struct ip ip, struct mbuf *m_frag, int mtu,
	u_long if_hwassist_flags, int sw_csum)
	{
	int error = 0;
	int hlen = ip->ip_hl << 2;
	int len = (mtu - hlen) & ~7; /* size of payload in each fragment */
	int off;
	struct mbuf m0 = m_frag; /* the original packet */
	int firstlen;
	struct mbuf **mnext;
	int nfrags;

	if (ip->ip_off & IP_DF) { /* Fragmentation not allowed */
	IPSTAT_INC(ips_cantfrag);
	return EMSGSIZE;
	}

	/*
	* Must be able to put at least 8 bytes per fragment.
	*/
	if (len < 8)
	return EMSGSIZE;

	/*
	* If the interface will not calculate checksums on
	* fragmented packets, then do it here.
	*/
	if (m0->m_pkthdr.csum_flags & CSUM_DELAY_DATA &&
	(if_hwassist_flags & CSUM_IP_FRAGS) == 0) {
	in_delayed_cksum(m0);
	m0->m_pkthdr.csum_flags &= ~CSUM_DELAY_DATA;
	}
	#ifdef SCTP
	if (m0->m_pkthdr.csum_flags & CSUM_SCTP &&
	(if_hwassist_flags & CSUM_IP_FRAGS) == 0) {
	sctp_delayed_cksum(m0, hlen);
	m0->m_pkthdr.csum_flags &= ~CSUM_SCTP;
	}
	#endif
	if (len > PAGE_SIZE) {
	/*
	* Fragment large datagrams such that each segment
	* contains a multiple of PAGE_SIZE amount of data,
	* plus headers. This enables a receiver to perform
	* page-flipping zero-copy optimizations.
	*
	* XXX When does this help given that sender and receiver
	* could have different page sizes, and also mtu could
	* be less than the receiver's page size ?
	*/
	int newlen;
	struct mbuf *m;

	for (m = m0, off = 0; m && (off+m->m_len) <= mtu; m = m->m_next)
	off += m->m_len;

	/*
	* firstlen (off - hlen) must be aligned on an
	* 8-byte boundary
	*/
	if (off < hlen)
	goto smart_frag_failure;
	off = ((off - hlen) & ~7) + hlen;
	newlen = (~PAGE_MASK) & mtu;
	if ((newlen + sizeof (struct ip)) > mtu) {
	/* we failed, go back the default */
	smart_frag_failure:
	newlen = len;
	off = hlen + len;
	}
	len = newlen;

	} else {
	off = hlen + len;
	}

	firstlen = off - hlen;
	mnext = &m0->m_nextpkt; /* pointer to next packet */

	/*
	* Loop through length of segment after first fragment,
	* make new header and copy data of each part and link onto chain.
	* Here, m0 is the original packet, m is the fragment being created.
	* The fragments are linked off the m_nextpkt of the original
	* packet, which after processing serves as the first fragment.
	*/
	for (nfrags = 1; off < ip->ip_len; off += len, nfrags++) {
	struct ip mhip; / ip header on the fragment */
	struct mbuf *m;
	int mhlen = sizeof (struct ip);

	MGETHDR(m, M_DONTWAIT, MT_DATA);
	if (m == NULL) {
	error = ENOBUFS;
	IPSTAT_INC(ips_odropped);
	goto done;
	}
	m->m_flags \|= (m0->m_flags & M_MCAST) \| M_FRAG;
	/*
	* In the first mbuf, leave room for the link header, then
	* copy the original IP header including options. The payload
	* goes into an additional mbuf chain returned by m_copym().
	*/
	m->m_data += max_linkhdr;
	mhip = mtod(m, struct ip *);
	mhip = ip;
	if (hlen > sizeof (struct ip)) {
	mhlen = ip_optcopy(ip, mhip) + sizeof (struct ip);
	mhip->ip_v = IPVERSION;
	mhip->ip_hl = mhlen >> 2;
	}
	m->m_len = mhlen;
	/* XXX do we need to add ip->ip_off below ? */
	mhip->ip_off = ((off - hlen) >> 3) + ip->ip_off;
	if (off + len >= ip->ip_len) { /* last fragment */
	len = ip->ip_len - off;
	m->m_flags \|= M_LASTFRAG;
	} else
	mhip->ip_off \|= IP_MF;
	mhip->ip_len = htons((u_short)(len + mhlen));
	m->m_next = m_copym(m0, off, len, M_DONTWAIT);
	if (m->m_next == NULL) { /* copy failed */
	m_free(m);
	error = ENOBUFS; /* ??? */
	IPSTAT_INC(ips_odropped);
	goto done;
	}
	m->m_pkthdr.len = mhlen + len;
	m->m_pkthdr.rcvif = NULL;
	#ifdef MAC
	mac_netinet_fragment(m0, m);
	#endif
	m->m_pkthdr.csum_flags = m0->m_pkthdr.csum_flags;
	mhip->ip_off = htons(mhip->ip_off);
	mhip->ip_sum = 0;
	if (sw_csum & CSUM_DELAY_IP)
	mhip->ip_sum = in_cksum(m, mhlen);
	*mnext = m;
	mnext = &m->m_nextpkt;
	}
	IPSTAT_ADD(ips_ofragments, nfrags);

	/* set first marker for fragment chain */
	m0->m_flags \|= M_FIRSTFRAG \| M_FRAG;
	m0->m_pkthdr.csum_data = nfrags;

	/*
	* Update first fragment by trimming what's been copied out
	* and updating header.
	*/
	m_adj(m0, hlen + firstlen - ip->ip_len);
	m0->m_pkthdr.len = hlen + firstlen;
	ip->ip_len = htons((u_short)m0->m_pkthdr.len);
	ip->ip_off \|= IP_MF;
	ip->ip_off = htons(ip->ip_off);
	ip->ip_sum = 0;
	if (sw_csum & CSUM_DELAY_IP)
	ip->ip_sum = in_cksum(m0, hlen);

	done:
	*m_frag = m0;
	return error;
	}

	void
	in_delayed_cksum(struct mbuf *m)
	{
	struct ip *ip;
	u_short csum, offset;

	ip = mtod(m, struct ip *);
	offset = ip->ip_hl << 2 ;
	csum = in_cksum_skip(m, ip->ip_len, offset);
	if (m->m_pkthdr.csum_flags & CSUM_UDP && csum == 0)
	csum = 0xffff;
	offset += m->m_pkthdr.csum_data; /* checksum offset */

	if (offset + sizeof(u_short) > m->m_len) {
	printf("delayed m_pullup, m->len: %d off: %d p: %d\n",
	m->m_len, offset, ip->ip_p);
	/*
	* XXX
	* this shouldn't happen, but if it does, the
	* correct behavior may be to insert the checksum
	* in the appropriate next mbuf in the chain.
	*/
	return;
	}
	(u_short )(m->m_data + offset) = csum;
	}

	/*
	* IP socket option processing.
	*/
	int
	ip_ctloutput(struct socket so, struct sockopt sopt)
	{
	struct inpcb *inp = sotoinpcb(so);
	int error, optval;

	error = optval = 0;
	if (sopt->sopt_level != IPPROTO_IP) {
	if ((sopt->sopt_level == SOL_SOCKET) &&
	(sopt->sopt_name == SO_SETFIB)) {
	inp->inp_inc.inc_fibnum = so->so_fibnum;
	return (0);
	}
	return (EINVAL);
	}

	switch (sopt->sopt_dir) {
	case SOPT_SET:
	switch (sopt->sopt_name) {
	case IP_OPTIONS:
	#ifdef notyet
	case IP_RETOPTS:
	#endif
	{
	struct mbuf *m;
	if (sopt->sopt_valsize > MLEN) {
	error = EMSGSIZE;
	break;
	}
	MGET(m, sopt->sopt_td ? M_WAIT : M_DONTWAIT, MT_DATA);
	if (m == NULL) {
	error = ENOBUFS;
	break;
	}
	m->m_len = sopt->sopt_valsize;
	error = sooptcopyin(sopt, mtod(m, char *), m->m_len,
	m->m_len);
	if (error) {
	m_free(m);
	break;
	}
	INP_WLOCK(inp);
	error = ip_pcbopts(inp, sopt->sopt_name, m);
	INP_WUNLOCK(inp);
	return (error);
	}

	case IP_BINDANY:
	if (sopt->sopt_td != NULL) {
	error = priv_check(sopt->sopt_td,
	PRIV_NETINET_BINDANY);
	if (error)
	break;
	}
	/* FALLTHROUGH */
	case IP_TOS:
	case IP_TTL:
	case IP_MINTTL:
	case IP_RECVOPTS:
	case IP_RECVRETOPTS:
	case IP_RECVDSTADDR:
	case IP_RECVTTL:
	case IP_RECVIF:
	case IP_FAITH:
	case IP_ONESBCAST:
	case IP_DONTFRAG:
	error = sooptcopyin(sopt, &optval, sizeof optval,
	sizeof optval);
	if (error)
	break;

	switch (sopt->sopt_name) {
	case IP_TOS:
	inp->inp_ip_tos = optval;
	break;

	case IP_TTL:
	inp->inp_ip_ttl = optval;
	break;

	case IP_MINTTL:
	if (optval >= 0 && optval <= MAXTTL)
	inp->inp_ip_minttl = optval;
	else
	error = EINVAL;
	break;

	#define OPTSET(bit) do { \
	INP_WLOCK(inp); \
	if (optval) \
	inp->inp_flags \|= bit; \
	else \
	inp->inp_flags &= ~bit; \
	INP_WUNLOCK(inp); \
	} while (0)

	case IP_RECVOPTS:
	OPTSET(INP_RECVOPTS);
	break;

	case IP_RECVRETOPTS:
	OPTSET(INP_RECVRETOPTS);
	break;

	case IP_RECVDSTADDR:
	OPTSET(INP_RECVDSTADDR);
	break;

	case IP_RECVTTL:
	OPTSET(INP_RECVTTL);
	break;

	case IP_RECVIF:
	OPTSET(INP_RECVIF);
	break;

	case IP_FAITH:
	OPTSET(INP_FAITH);
	break;

	case IP_ONESBCAST:
	OPTSET(INP_ONESBCAST);
	break;
	case IP_DONTFRAG:
	OPTSET(INP_DONTFRAG);
	break;
	case IP_BINDANY:
	OPTSET(INP_BINDANY);
	break;
	}
	break;
	#undef OPTSET

	/*
	* Multicast socket options are processed by the in_mcast
	* module.
	*/
	case IP_MULTICAST_IF:
	case IP_MULTICAST_VIF:
	case IP_MULTICAST_TTL:
	case IP_MULTICAST_LOOP:
	case IP_ADD_MEMBERSHIP:
	case IP_DROP_MEMBERSHIP:
	case IP_ADD_SOURCE_MEMBERSHIP:
	case IP_DROP_SOURCE_MEMBERSHIP:
	case IP_BLOCK_SOURCE:
	case IP_UNBLOCK_SOURCE:
	case IP_MSFILTER:
	case MCAST_JOIN_GROUP:
	case MCAST_LEAVE_GROUP:
	case MCAST_JOIN_SOURCE_GROUP:
	case MCAST_LEAVE_SOURCE_GROUP:
	case MCAST_BLOCK_SOURCE:
	case MCAST_UNBLOCK_SOURCE:
	error = inp_setmoptions(inp, sopt);
	break;

	case IP_PORTRANGE:
	error = sooptcopyin(sopt, &optval, sizeof optval,
	sizeof optval);
	if (error)
	break;

	INP_WLOCK(inp);
	switch (optval) {
	case IP_PORTRANGE_DEFAULT:
	inp->inp_flags &= ~(INP_LOWPORT);
	inp->inp_flags &= ~(INP_HIGHPORT);
	break;

	case IP_PORTRANGE_HIGH:
	inp->inp_flags &= ~(INP_LOWPORT);
	inp->inp_flags \|= INP_HIGHPORT;
	break;

	case IP_PORTRANGE_LOW:
	inp->inp_flags &= ~(INP_HIGHPORT);
	inp->inp_flags \|= INP_LOWPORT;
	break;

	default:
	error = EINVAL;
	break;
	}
	INP_WUNLOCK(inp);
	break;

	#ifdef IPSEC
	case IP_IPSEC_POLICY:
	{
	caddr_t req;
	struct mbuf *m;

	if ((error = soopt_getm(sopt, &m)) != 0) /* XXX */
	break;
	if ((error = soopt_mcopyin(sopt, m)) != 0) /* XXX */
	break;
	req = mtod(m, caddr_t);
	error = ipsec_set_policy(inp, sopt->sopt_name, req,
	m->m_len, (sopt->sopt_td != NULL) ?
	sopt->sopt_td->td_ucred : NULL);
	m_freem(m);
	break;
	}
	#endif /* IPSEC */

	default:
	error = ENOPROTOOPT;
	break;
	}
	break;

	case SOPT_GET:
	switch (sopt->sopt_name) {
	case IP_OPTIONS:
	case IP_RETOPTS:
	if (inp->inp_options)
	error = sooptcopyout(sopt,
	mtod(inp->inp_options,
	char *),
	inp->inp_options->m_len);
	else
	sopt->sopt_valsize = 0;
	break;

	case IP_TOS:
	case IP_TTL:
	case IP_MINTTL:
	case IP_RECVOPTS:
	case IP_RECVRETOPTS:
	case IP_RECVDSTADDR:
	case IP_RECVTTL:
	case IP_RECVIF:
	case IP_PORTRANGE:
	case IP_FAITH:
	case IP_ONESBCAST:
	case IP_DONTFRAG:
	switch (sopt->sopt_name) {

	case IP_TOS:
	optval = inp->inp_ip_tos;
	break;

	case IP_TTL:
	optval = inp->inp_ip_ttl;
	break;

	case IP_MINTTL:
	optval = inp->inp_ip_minttl;
	break;

	#define OPTBIT(bit) (inp->inp_flags & bit ? 1 : 0)

	case IP_RECVOPTS:
	optval = OPTBIT(INP_RECVOPTS);
	break;

	case IP_RECVRETOPTS:
	optval = OPTBIT(INP_RECVRETOPTS);
	break;

	case IP_RECVDSTADDR:
	optval = OPTBIT(INP_RECVDSTADDR);
	break;

	case IP_RECVTTL:
	optval = OPTBIT(INP_RECVTTL);
	break;

	case IP_RECVIF:
	optval = OPTBIT(INP_RECVIF);
	break;

	case IP_PORTRANGE:
	if (inp->inp_flags & INP_HIGHPORT)
	optval = IP_PORTRANGE_HIGH;
	else if (inp->inp_flags & INP_LOWPORT)
	optval = IP_PORTRANGE_LOW;
	else
	optval = 0;
	break;

	case IP_FAITH:
	optval = OPTBIT(INP_FAITH);
	break;

	case IP_ONESBCAST:
	optval = OPTBIT(INP_ONESBCAST);
	break;
	case IP_DONTFRAG:
	optval = OPTBIT(INP_DONTFRAG);
	break;
	}
	error = sooptcopyout(sopt, &optval, sizeof optval);
	break;

	/*
	* Multicast socket options are processed by the in_mcast
	* module.
	*/
	case IP_MULTICAST_IF:
	case IP_MULTICAST_VIF:
	case IP_MULTICAST_TTL:
	case IP_MULTICAST_LOOP:
	case IP_MSFILTER:
	error = inp_getmoptions(inp, sopt);
	break;

	#ifdef IPSEC
	case IP_IPSEC_POLICY:
	{
	struct mbuf *m = NULL;
	caddr_t req = NULL;
	size_t len = 0;

	if (m != 0) {
	req = mtod(m, caddr_t);
	len = m->m_len;
	}
	error = ipsec_get_policy(sotoinpcb(so), req, len, &m);
	if (error == 0)
	error = soopt_mcopyout(sopt, m); /* XXX */
	if (error == 0)
	m_freem(m);
	break;
	}
	#endif /* IPSEC */

	default:
	error = ENOPROTOOPT;
	break;
	}
	break;
	}
	return (error);
	}

	/*
	* Routine called from ip_output() to loop back a copy of an IP multicast
	* packet to the input queue of a specified interface. Note that this
	* calls the output routine of the loopback "driver", but with an interface
	* pointer that might NOT be a loopback interface -- evil, but easier than
	* replicating that code here.
	*/
	static void
	ip_mloopback(struct ifnet ifp, struct mbuf m, struct sockaddr_in *dst,
	int hlen)
	{
	register struct ip *ip;
	struct mbuf *copym;

	/*
	* Make a deep copy of the packet because we're going to
	* modify the pack in order to generate checksums.
	*/
	copym = m_dup(m, M_DONTWAIT);
	if (copym != NULL && (copym->m_flags & M_EXT \|\| copym->m_len < hlen))
	copym = m_pullup(copym, hlen);
	if (copym != NULL) {
	/* If needed, compute the checksum and mark it as valid. */
	if (copym->m_pkthdr.csum_flags & CSUM_DELAY_DATA) {
	in_delayed_cksum(copym);
	copym->m_pkthdr.csum_flags &= ~CSUM_DELAY_DATA;
	copym->m_pkthdr.csum_flags \|=
	CSUM_DATA_VALID \| CSUM_PSEUDO_HDR;
	copym->m_pkthdr.csum_data = 0xffff;
	}
	/*
	* We don't bother to fragment if the IP length is greater
	* than the interface's MTU. Can this possibly matter?
	*/
	ip = mtod(copym, struct ip *);
	ip->ip_len = htons(ip->ip_len);
	ip->ip_off = htons(ip->ip_off);
	ip->ip_sum = 0;
	ip->ip_sum = in_cksum(copym, hlen);
	#if 1 /* XXX */
	if (dst->sin_family != AF_INET) {
	printf("ip_mloopback: bad address family %d\n",
	dst->sin_family);
	dst->sin_family = AF_INET;
	}
	#endif
	if_simloop(ifp, copym, dst->sin_family, 0);
	}
	}
	Index: stable/8/sys
	===================================================================
	--- stable/8/sys (revision 209276)
	+++ stable/8/sys (revision 209277)

	Property changes on: stable/8/sys
	___________________________________________________________________
	Modified: svn:mergeinfo
	## -0,0 +0,1 ##
	Merged /head/sys:r208553

File Metadata

Mime Type: text/x-c
Expires: Sun, Mar 29, 12:56 AM (2 d)
Storage Engine: blob
Storage Format: Raw Data
Storage Handle: 30450978
Default Alt Text: (350 KB)

No OneTemporaryActions

View Options

File Metadata

Event Timeline

No OneTemporary
Actions