No OneTemporary
Actions

Size

712 KB

Referenced Files

None

Subscribers

None

View Options

This file is larger than 256 KB, so syntax highlighting was skipped.

	Index: projects/arpv2_merge_1/sys/net/if.c
	===================================================================
	--- projects/arpv2_merge_1/sys/net/if.c (revision 185838)
	+++ projects/arpv2_merge_1/sys/net/if.c (revision 185839)
	@@ -1,2882 +1,2884 @@
	/*-
	* Copyright (c) 1980, 1986, 1993
	* The Regents of the University of California. All rights reserved.
	*
	* Redistribution and use in source and binary forms, with or without
	* modification, are permitted provided that the following conditions
	* are met:
	* 1. Redistributions of source code must retain the above copyright
	* notice, this list of conditions and the following disclaimer.
	* 2. Redistributions in binary form must reproduce the above copyright
	* notice, this list of conditions and the following disclaimer in the
	* documentation and/or other materials provided with the distribution.
	* 4. Neither the name of the University nor the names of its contributors
	* may be used to endorse or promote products derived from this software
	* without specific prior written permission.
	*
	* THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
	* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
	* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
	* ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
	* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
	* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
	* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
	* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
	* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
	* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
	* SUCH DAMAGE.
	*
	* @(#)if.c 8.5 (Berkeley) 1/9/95
	* $FreeBSD$
	*/

	#include "opt_compat.h"
	#include "opt_inet6.h"
	#include "opt_inet.h"
	#include "opt_mac.h"
	#include "opt_carp.h"

	#include <sys/param.h>
	#include <sys/types.h>
	#include <sys/conf.h>
	#include <sys/malloc.h>
	#include <sys/sbuf.h>
	#include <sys/bus.h>
	#include <sys/mbuf.h>
	#include <sys/systm.h>
	#include <sys/priv.h>
	#include <sys/proc.h>
	#include <sys/socket.h>
	#include <sys/socketvar.h>
	#include <sys/protosw.h>
	#include <sys/kernel.h>
	#include <sys/lock.h>
	#include <sys/rwlock.h>
	#include <sys/sockio.h>
	#include <sys/syslog.h>
	#include <sys/sysctl.h>
	#include <sys/taskqueue.h>
	#include <sys/domain.h>
	#include <sys/jail.h>
	#include <sys/vimage.h>
	#include <machine/stdarg.h>
	#include <vm/uma.h>

	#include <net/if.h>
	#include <net/if_arp.h>
	#include <net/if_clone.h>
	#include <net/if_dl.h>
	#include <net/if_types.h>
	#include <net/if_var.h>
	#include <net/radix.h>
	#include <net/route.h>
	#include <net/vnet.h>
	-#include <net/if_llatbl.h>

	#if defined(INET) \|\| defined(INET6)
	/XXX/
	#include <netinet/in.h>
	#include <netinet/in_var.h>
	#ifdef INET6
	#include <netinet6/in6_var.h>
	#include <netinet6/in6_ifattach.h>
	#endif
	#endif
	#ifdef INET
	#include <netinet/if_ether.h>
	#include <netinet/vinet.h>
	#endif
	#ifdef DEV_CARP
	#include <netinet/ip_carp.h>
	#endif

	#include <security/mac/mac_framework.h>

	SYSCTL_NODE(_net, PF_LINK, link, CTLFLAG_RW, 0, "Link layers");
	SYSCTL_NODE(_net_link, 0, generic, CTLFLAG_RW, 0, "Generic link-management");

	/* Log link state change events */
	static int log_link_state_change = 1;

	SYSCTL_INT(_net_link, OID_AUTO, log_link_state_change, CTLFLAG_RW,
	&log_link_state_change, 0,
	"log interface link state change events");

	void (bstp_linkstate_p)(struct ifnet ifp, int state);
	void (ng_ether_link_state_p)(struct ifnet ifp, int state);
	void (lagg_linkstate_p)(struct ifnet ifp, int state);

	struct mbuf (tbr_dequeue_ptr)(struct ifaltq *, int) = NULL;

	/*
	* XXX: Style; these should be sorted alphabetically, and unprototyped
	* static functions should be prototyped. Currently they are sorted by
	* declaration order.
	*/
	static void if_attachdomain(void *);
	static void if_attachdomain1(struct ifnet *);
	static int ifconf(u_long, caddr_t);
	static void if_freemulti(struct ifmultiaddr *);
	static void if_grow(void);
	static void if_init(void *);
	static void if_qflush(struct ifnet *);
	static void if_route(struct ifnet *, int flag, int fam);
	static int if_setflag(struct ifnet , int, int, int , int);
	static void if_slowtimo(void *);
	static int if_transmit(struct ifnet ifp, struct mbuf m);
	static void if_unroute(struct ifnet *, int flag, int fam);
	static void link_rtrequest(int, struct rtentry , struct rt_addrinfo );
	static int if_rtdel(struct radix_node , void );
	static int ifhwioctl(u_long, struct ifnet , caddr_t, struct thread );
	static int if_delmulti_locked(struct ifnet , struct ifmultiaddr , int);
	static void if_start_deferred(void *context, int pending);
	static void do_link_state_change(void *, int);
	static int if_getgroup(struct ifgroupreq , struct ifnet );
	static int if_getgroupmembers(struct ifgroupreq *);

	#ifdef INET6
	/*
	* XXX: declare here to avoid to include many inet6 related files..
	* should be more generalized?
	*/
	extern void nd6_setmtu(struct ifnet *);
	#endif

	#ifdef VIMAGE_GLOBALS
	struct ifnethead ifnet; /* depend on static init XXX */
	struct ifgrouphead ifg_head;
	int if_index;
	static int if_indexlim;
	/* Table of ifnet/cdev by index. Locked with ifnet_lock. */
	static struct ifindex_entry *ifindex_table;
	static struct knlist ifklist;
	#endif

	int ifqmaxlen = IFQ_MAXLEN;
	struct mtx ifnet_lock;
	static if_com_alloc_t *if_com_alloc[256];
	static if_com_free_t *if_com_free[256];

	static void filt_netdetach(struct knote *kn);
	static int filt_netdev(struct knote *kn, long hint);

	static struct filterops netdev_filtops =
	{ 1, NULL, filt_netdetach, filt_netdev };

	/*
	* System initialization
	*/
	SYSINIT(interfaces, SI_SUB_INIT_IF, SI_ORDER_FIRST, if_init, NULL);
	SYSINIT(interface_check, SI_SUB_PROTO_IF, SI_ORDER_FIRST, if_slowtimo, NULL);

	MALLOC_DEFINE(M_IFNET, "ifnet", "interface internals");
	MALLOC_DEFINE(M_IFADDR, "ifaddr", "interface address");
	MALLOC_DEFINE(M_IFMADDR, "ether_multi", "link-level multicast address");

	struct ifnet *
	ifnet_byindex(u_short idx)
	{
	INIT_VNET_NET(curvnet);
	struct ifnet *ifp;

	IFNET_RLOCK();
	ifp = V_ifindex_table[idx].ife_ifnet;
	IFNET_RUNLOCK();
	return (ifp);
	}

	static void
	ifnet_setbyindex(u_short idx, struct ifnet *ifp)
	{
	INIT_VNET_NET(curvnet);

	IFNET_WLOCK_ASSERT();

	V_ifindex_table[idx].ife_ifnet = ifp;
	}

	struct ifaddr *
	ifaddr_byindex(u_short idx)
	{
	struct ifaddr *ifa;

	IFNET_RLOCK();
	ifa = ifnet_byindex(idx)->if_addr;
	IFNET_RUNLOCK();
	return (ifa);
	}

	struct cdev *
	ifdev_byindex(u_short idx)
	{
	INIT_VNET_NET(curvnet);
	struct cdev *cdev;

	IFNET_RLOCK();
	cdev = V_ifindex_table[idx].ife_dev;
	IFNET_RUNLOCK();
	return (cdev);
	}

	static void
	ifdev_setbyindex(u_short idx, struct cdev *cdev)
	{
	INIT_VNET_NET(curvnet);

	IFNET_WLOCK();
	V_ifindex_table[idx].ife_dev = cdev;
	IFNET_WUNLOCK();
	}

	static d_open_t netopen;
	static d_close_t netclose;
	static d_ioctl_t netioctl;
	static d_kqfilter_t netkqfilter;

	static struct cdevsw net_cdevsw = {
	.d_version = D_VERSION,
	.d_flags = D_NEEDGIANT,
	.d_open = netopen,
	.d_close = netclose,
	.d_ioctl = netioctl,
	.d_name = "net",
	.d_kqfilter = netkqfilter,
	};

	static int
	netopen(struct cdev dev, int flag, int mode, struct thread td)
	{
	return (0);
	}

	static int
	netclose(struct cdev dev, int flags, int fmt, struct thread td)
	{
	return (0);
	}

	static int
	netioctl(struct cdev dev, u_long cmd, caddr_t data, int flag, struct thread td)
	{
	struct ifnet *ifp;
	int error, idx;

	/* only support interface specific ioctls */
	if (IOCGROUP(cmd) != 'i')
	return (EOPNOTSUPP);
	idx = dev2unit(dev);
	if (idx == 0) {
	/*
	* special network device, not interface.
	*/
	if (cmd == SIOCGIFCONF)
	return (ifconf(cmd, data)); /* XXX remove cmd */
	#ifdef __amd64__
	if (cmd == SIOCGIFCONF32)
	return (ifconf(cmd, data)); /* XXX remove cmd */
	#endif
	return (EOPNOTSUPP);
	}

	ifp = ifnet_byindex(idx);
	if (ifp == NULL)
	return (ENXIO);

	error = ifhwioctl(cmd, ifp, data, td);
	if (error == ENOIOCTL)
	error = EOPNOTSUPP;
	return (error);
	}

	static int
	netkqfilter(struct cdev dev, struct knote kn)
	{
	INIT_VNET_NET(curvnet);
	struct knlist *klist;
	struct ifnet *ifp;
	int idx;

	switch (kn->kn_filter) {
	case EVFILT_NETDEV:
	kn->kn_fop = &netdev_filtops;
	break;
	default:
	return (EINVAL);
	}

	idx = dev2unit(dev);
	if (idx == 0) {
	klist = &V_ifklist;
	} else {
	ifp = ifnet_byindex(idx);
	if (ifp == NULL)
	return (1);
	klist = &ifp->if_klist;
	}

	kn->kn_hook = (caddr_t)klist;

	knlist_add(klist, kn, 0);

	return (0);
	}

	static void
	filt_netdetach(struct knote *kn)
	{
	struct knlist klist = (struct knlist )kn->kn_hook;

	knlist_remove(klist, kn, 0);
	}

	static int
	filt_netdev(struct knote *kn, long hint)
	{
	struct knlist klist = (struct knlist )kn->kn_hook;

	/*
	* Currently NOTE_EXIT is abused to indicate device detach.
	*/
	if (hint == NOTE_EXIT) {
	kn->kn_data = NOTE_LINKINV;
	kn->kn_flags \|= (EV_EOF \| EV_ONESHOT);
	knlist_remove_inevent(klist, kn);
	return (1);
	}
	if (hint != 0)
	kn->kn_data = hint; /* current status */
	if (kn->kn_sfflags & hint)
	kn->kn_fflags \|= hint;
	return (kn->kn_fflags != 0);
	}

	/*
	* Network interface utility routines.
	*
	* Routines with ifa_ifwith* names take sockaddr *'s as
	* parameters.
	*/

	/* ARGSUSED*/
	static void
	if_init(void *dummy __unused)
	{
	INIT_VNET_NET(curvnet);

	V_if_index = 0;
	V_ifindex_table = NULL;
	V_if_indexlim = 8;

	IFNET_LOCK_INIT();
	TAILQ_INIT(&V_ifnet);
	TAILQ_INIT(&V_ifg_head);
	knlist_init(&V_ifklist, NULL, NULL, NULL, NULL);
	if_grow(); /* create initial table */
	ifdev_setbyindex(0, make_dev(&net_cdevsw, 0, UID_ROOT, GID_WHEEL,
	0600, "network"));
	if_clone_init();
	}

	static void
	if_grow(void)
	{
	INIT_VNET_NET(curvnet);
	u_int n;
	struct ifindex_entry *e;

	V_if_indexlim <<= 1;
	n = V_if_indexlim * sizeof(*e);
	e = malloc(n, M_IFNET, M_WAITOK \| M_ZERO);
	if (V_ifindex_table != NULL) {
	memcpy((caddr_t)e, (caddr_t)V_ifindex_table, n/2);
	free((caddr_t)V_ifindex_table, M_IFNET);
	}
	V_ifindex_table = e;
	}

	/*
	* Allocate a struct ifnet and an index for an interface. A layer 2
	* common structure will also be allocated if an allocation routine is
	* registered for the passed type.
	*/
	struct ifnet*
	if_alloc(u_char type)
	{
	INIT_VNET_NET(curvnet);
	struct ifnet *ifp;

	ifp = malloc(sizeof(struct ifnet), M_IFNET, M_WAITOK\|M_ZERO);

	/*
	* Try to find an empty slot below if_index. If we fail, take
	* the next slot.
	*
	* XXX: should be locked!
	*/
	for (ifp->if_index = 1; ifp->if_index <= V_if_index; ifp->if_index++) {
	if (ifnet_byindex(ifp->if_index) == NULL)
	break;
	}
	/* Catch if_index overflow. */
	if (ifp->if_index < 1) {
	free(ifp, M_IFNET);
	return (NULL);
	}
	if (ifp->if_index > V_if_index)
	V_if_index = ifp->if_index;
	if (V_if_index >= V_if_indexlim)
	if_grow();

	ifp->if_type = type;

	if (if_com_alloc[type] != NULL) {
	ifp->if_l2com = if_com_alloc[type](type, ifp);
	if (ifp->if_l2com == NULL) {
	free(ifp, M_IFNET);
	return (NULL);
	}
	}
	IFNET_WLOCK();
	ifnet_setbyindex(ifp->if_index, ifp);
	IFNET_WUNLOCK();
	IF_ADDR_LOCK_INIT(ifp);

	return (ifp);
	}

	/*
	* Free the struct ifnet, the associated index, and the layer 2 common
	* structure if needed. All the work is done in if_free_type().
	*
	* Do not add code to this function! Add it to if_free_type().
	*/
	void
	if_free(struct ifnet *ifp)
	{

	if_free_type(ifp, ifp->if_type);
	}

	/*
	* Do the actual work of freeing a struct ifnet, associated index, and
	* layer 2 common structure. This version should only be called by
	* intefaces that switch their type after calling if_alloc().
	*/
	void
	if_free_type(struct ifnet *ifp, u_char type)
	{
	INIT_VNET_NET(curvnet); /* ifp->if_vnet can be NULL here ! */

	if (ifp != ifnet_byindex(ifp->if_index)) {
	if_printf(ifp, "%s: value was not if_alloced, skipping\n",
	__func__);
	return;
	}

	IFNET_WLOCK();
	ifnet_setbyindex(ifp->if_index, NULL);

	/* XXX: should be locked with if_findindex() */
	while (V_if_index > 0 && ifnet_byindex(V_if_index) == NULL)
	V_if_index--;
	IFNET_WUNLOCK();

	if (if_com_free[type] != NULL)
	if_com_free[type](ifp->if_l2com, type);

	IF_ADDR_LOCK_DESTROY(ifp);
	free(ifp, M_IFNET);
	};

	void
	ifq_attach(struct ifaltq ifq, struct ifnet ifp)
	{

	mtx_init(&ifq->ifq_mtx, ifp->if_xname, "if send queue", MTX_DEF);

	if (ifq->ifq_maxlen == 0)
	ifq->ifq_maxlen = ifqmaxlen;

	ifq->altq_type = 0;
	ifq->altq_disc = NULL;
	ifq->altq_flags &= ALTQF_CANTCHANGE;
	ifq->altq_tbr = NULL;
	ifq->altq_ifp = ifp;
	}

	void
	ifq_detach(struct ifaltq *ifq)
	{
	mtx_destroy(&ifq->ifq_mtx);
	}

	/*
	* Perform generic interface initalization tasks and attach the interface
	* to the list of "active" interfaces.
	*
	* XXX:
	* - The decision to return void and thus require this function to
	* succeed is questionable.
	* - We do more initialization here then is probably a good idea.
	* Some of this should probably move to if_alloc().
	* - We should probably do more sanity checking. For instance we don't
	* do anything to insure if_xname is unique or non-empty.
	*/
	void
	if_attach(struct ifnet *ifp)
	{
	INIT_VNET_NET(curvnet);
	unsigned socksize, ifasize;
	int namelen, masklen;
	struct sockaddr_dl *sdl;
	struct ifaddr *ifa;

	if (ifp->if_index == 0 \|\| ifp != ifnet_byindex(ifp->if_index))
	panic ("%s: BUG: if_attach called without if_alloc'd input()\n",
	ifp->if_xname);

	TASK_INIT(&ifp->if_starttask, 0, if_start_deferred, ifp);
	TASK_INIT(&ifp->if_linktask, 0, do_link_state_change, ifp);
	IF_AFDATA_LOCK_INIT(ifp);
	ifp->if_afdata_initialized = 0;

	TAILQ_INIT(&ifp->if_addrhead);
	TAILQ_INIT(&ifp->if_prefixhead);
	TAILQ_INIT(&ifp->if_multiaddrs);
	TAILQ_INIT(&ifp->if_groups);

	if_addgroup(ifp, IFG_ALL);

	knlist_init(&ifp->if_klist, NULL, NULL, NULL, NULL);
	getmicrotime(&ifp->if_lastchange);
	ifp->if_data.ifi_epoch = time_uptime;
	ifp->if_data.ifi_datalen = sizeof(struct if_data);
	ifp->if_transmit = if_transmit;
	ifp->if_qflush = if_qflush;
	#ifdef MAC
	mac_ifnet_init(ifp);
	mac_ifnet_create(ifp);
	#endif

	ifdev_setbyindex(ifp->if_index, make_dev(&net_cdevsw,
	ifp->if_index, UID_ROOT, GID_WHEEL, 0600, "%s/%s",
	net_cdevsw.d_name, ifp->if_xname));
	make_dev_alias(ifdev_byindex(ifp->if_index), "%s%d",
	net_cdevsw.d_name, ifp->if_index);

	ifq_attach(&ifp->if_snd, ifp);

	/*
	* create a Link Level name for this device
	*/
	namelen = strlen(ifp->if_xname);
	/*
	* Always save enough space for any possiable name so we can do
	* a rename in place later.
	*/
	masklen = offsetof(struct sockaddr_dl, sdl_data[0]) + IFNAMSIZ;
	socksize = masklen + ifp->if_addrlen;
	if (socksize < sizeof(*sdl))
	socksize = sizeof(*sdl);
	socksize = roundup2(socksize, sizeof(long));
	ifasize = sizeof(ifa) + 2 socksize;
	ifa = malloc(ifasize, M_IFADDR, M_WAITOK \| M_ZERO);
	IFA_LOCK_INIT(ifa);
	sdl = (struct sockaddr_dl *)(ifa + 1);
	sdl->sdl_len = socksize;
	sdl->sdl_family = AF_LINK;
	bcopy(ifp->if_xname, sdl->sdl_data, namelen);
	sdl->sdl_nlen = namelen;
	sdl->sdl_index = ifp->if_index;
	sdl->sdl_type = ifp->if_type;
	ifp->if_addr = ifa;
	ifa->ifa_ifp = ifp;
	ifa->ifa_rtrequest = link_rtrequest;
	ifa->ifa_addr = (struct sockaddr *)sdl;
	sdl = (struct sockaddr_dl *)(socksize + (caddr_t)sdl);
	ifa->ifa_netmask = (struct sockaddr *)sdl;
	sdl->sdl_len = masklen;
	while (namelen != 0)
	sdl->sdl_data[--namelen] = 0xff;
	ifa->ifa_refcnt = 1;
	TAILQ_INSERT_HEAD(&ifp->if_addrhead, ifa, ifa_link);
	ifp->if_broadcastaddr = NULL; /* reliably crash if used uninitialized */


	IFNET_WLOCK();
	TAILQ_INSERT_TAIL(&V_ifnet, ifp, if_link);
	IFNET_WUNLOCK();

	if (domain_init_status >= 2)
	if_attachdomain1(ifp);

	EVENTHANDLER_INVOKE(ifnet_arrival_event, ifp);
	devctl_notify("IFNET", ifp->if_xname, "ATTACH", NULL);

	/* Announce the interface. */
	rt_ifannouncemsg(ifp, IFAN_ARRIVAL);

	if (ifp->if_watchdog != NULL)
	if_printf(ifp,
	"WARNING: using obsoleted if_watchdog interface\n");
	if (ifp->if_flags & IFF_NEEDSGIANT)
	if_printf(ifp,
	"WARNING: using obsoleted IFF_NEEDSGIANT flag\n");
	}

	static void
	if_attachdomain(void *dummy)
	{
	INIT_VNET_NET(curvnet);
	struct ifnet *ifp;
	int s;

	s = splnet();
	TAILQ_FOREACH(ifp, &V_ifnet, if_link)
	if_attachdomain1(ifp);
	splx(s);
	}
	SYSINIT(domainifattach, SI_SUB_PROTO_IFATTACHDOMAIN, SI_ORDER_SECOND,
	if_attachdomain, NULL);

	static void
	if_attachdomain1(struct ifnet *ifp)
	{
	struct domain *dp;
	int s;

	s = splnet();

	/*
	* Since dp->dom_ifattach calls malloc() with M_WAITOK, we
	* cannot lock ifp->if_afdata initialization, entirely.
	*/
	if (IF_AFDATA_TRYLOCK(ifp) == 0) {
	splx(s);
	return;
	}
	if (ifp->if_afdata_initialized >= domain_init_status) {
	IF_AFDATA_UNLOCK(ifp);
	splx(s);
	printf("if_attachdomain called more than once on %s\n",
	ifp->if_xname);
	return;
	}
	ifp->if_afdata_initialized = domain_init_status;
	IF_AFDATA_UNLOCK(ifp);

	/* address family dependent data region */
	bzero(ifp->if_afdata, sizeof(ifp->if_afdata));
	for (dp = domains; dp; dp = dp->dom_next) {
	if (dp->dom_ifattach)
	ifp->if_afdata[dp->dom_family] =
	(*dp->dom_ifattach)(ifp);
	}

	splx(s);
	}

	/*
	* Remove any unicast or broadcast network addresses from an interface.
	*/
	void
	if_purgeaddrs(struct ifnet *ifp)
	{
	struct ifaddr ifa, next;

	TAILQ_FOREACH_SAFE(ifa, &ifp->if_addrhead, ifa_link, next) {
	if (ifa->ifa_addr->sa_family == AF_LINK)
	continue;
	#ifdef INET
	/* XXX: Ugly!! ad hoc just for INET */
	if (ifa->ifa_addr->sa_family == AF_INET) {
	struct ifaliasreq ifr;

	bzero(&ifr, sizeof(ifr));
	ifr.ifra_addr = *ifa->ifa_addr;
	if (ifa->ifa_dstaddr)
	ifr.ifra_broadaddr = *ifa->ifa_dstaddr;
	if (in_control(NULL, SIOCDIFADDR, (caddr_t)&ifr, ifp,
	NULL) == 0)
	continue;
	}
	#endif /* INET */
	#ifdef INET6
	if (ifa->ifa_addr->sa_family == AF_INET6) {
	in6_purgeaddr(ifa);
	/* ifp_addrhead is already updated */
	continue;
	}
	#endif /* INET6 */
	TAILQ_REMOVE(&ifp->if_addrhead, ifa, ifa_link);
	IFAFREE(ifa);
	}
	}

	/*
	* Remove any multicast network addresses from an interface.
	*/
	void
	if_purgemaddrs(struct ifnet *ifp)
	{
	struct ifmultiaddr *ifma;
	struct ifmultiaddr *next;

	IF_ADDR_LOCK(ifp);
	TAILQ_FOREACH_SAFE(ifma, &ifp->if_multiaddrs, ifma_link, next)
	if_delmulti_locked(ifp, ifma, 1);
	IF_ADDR_UNLOCK(ifp);
	}

	/*
	* Detach an interface, removing it from the
	* list of "active" interfaces.
	*
	* XXXRW: There are some significant questions about event ordering, and
	* how to prevent things from starting to use the interface during detach.
	*/
	void
	if_detach(struct ifnet *ifp)
	{
	INIT_VNET_NET(ifp->if_vnet);
	struct ifaddr *ifa;
	struct radix_node_head *rnh;
	int s, i, j;
	struct domain *dp;
	struct ifnet *iter;
	int found = 0;

	IFNET_WLOCK();
	TAILQ_FOREACH(iter, &V_ifnet, if_link)
	if (iter == ifp) {
	TAILQ_REMOVE(&V_ifnet, ifp, if_link);
	found = 1;
	break;
	}
	IFNET_WUNLOCK();
	if (!found)
	return;

	/*
	* Remove/wait for pending events.
	*/
	taskqueue_drain(taskqueue_swi, &ifp->if_linktask);

	/*
	* Remove routes and flush queues.
	*/
	s = splnet();
	if_down(ifp);
	#ifdef ALTQ
	if (ALTQ_IS_ENABLED(&ifp->if_snd))
	altq_disable(&ifp->if_snd);
	if (ALTQ_IS_ATTACHED(&ifp->if_snd))
	altq_detach(&ifp->if_snd);
	#endif

	if_purgeaddrs(ifp);

	#ifdef INET
	in_ifdetach(ifp);
	#endif

	#ifdef INET6
	/*
	* Remove all IPv6 kernel structs related to ifp. This should be done
	* before removing routing entries below, since IPv6 interface direct
	* routes are expected to be removed by the IPv6-specific kernel API.
	* Otherwise, the kernel will detect some inconsistency and bark it.
	*/
	in6_ifdetach(ifp);
	#endif
	if_purgemaddrs(ifp);

	/*
	* Remove link ifaddr pointer and maybe decrement if_index.
	* Clean up all addresses.
	*/
	ifp->if_addr = NULL;
	destroy_dev(ifdev_byindex(ifp->if_index));
	ifdev_setbyindex(ifp->if_index, NULL);

	/* We can now free link ifaddr. */
	if (!TAILQ_EMPTY(&ifp->if_addrhead)) {
	ifa = TAILQ_FIRST(&ifp->if_addrhead);
	TAILQ_REMOVE(&ifp->if_addrhead, ifa, ifa_link);
	IFAFREE(ifa);
	}

	/*
	* Delete all remaining routes using this interface
	* Unfortuneatly the only way to do this is to slog through
	* the entire routing table looking for routes which point
	* to this interface...oh well...
	*/
	for (i = 1; i <= AF_MAX; i++) {
	for (j = 0; j < rt_numfibs; j++) {
	if ((rnh = V_rt_tables[j][i]) == NULL)
	continue;
	RADIX_NODE_HEAD_LOCK(rnh);
	(void) rnh->rnh_walktree(rnh, if_rtdel, ifp);
	RADIX_NODE_HEAD_UNLOCK(rnh);
	}
	}

	/* Announce that the interface is gone. */
	rt_ifannouncemsg(ifp, IFAN_DEPARTURE);
	EVENTHANDLER_INVOKE(ifnet_departure_event, ifp);
	devctl_notify("IFNET", ifp->if_xname, "DETACH", NULL);

	IF_AFDATA_LOCK(ifp);
	for (dp = domains; dp; dp = dp->dom_next) {
	if (dp->dom_ifdetach && ifp->if_afdata[dp->dom_family])
	(*dp->dom_ifdetach)(ifp,
	ifp->if_afdata[dp->dom_family]);
	}
	IF_AFDATA_UNLOCK(ifp);

	#ifdef MAC
	mac_ifnet_destroy(ifp);
	#endif /* MAC */
	KNOTE_UNLOCKED(&ifp->if_klist, NOTE_EXIT);
	knlist_clear(&ifp->if_klist, 0);
	knlist_destroy(&ifp->if_klist);
	ifq_detach(&ifp->if_snd);
	IF_AFDATA_DESTROY(ifp);
	splx(s);
	}

	/*
	* Add a group to an interface
	*/
	int
	if_addgroup(struct ifnet ifp, const char groupname)
	{
	INIT_VNET_NET(ifp->if_vnet);
	struct ifg_list *ifgl;
	struct ifg_group *ifg = NULL;
	struct ifg_member *ifgm;

	if (groupname[0] && groupname[strlen(groupname) - 1] >= '0' &&
	groupname[strlen(groupname) - 1] <= '9')
	return (EINVAL);

	IFNET_WLOCK();
	TAILQ_FOREACH(ifgl, &ifp->if_groups, ifgl_next)
	if (!strcmp(ifgl->ifgl_group->ifg_group, groupname)) {
	IFNET_WUNLOCK();
	return (EEXIST);
	}

	if ((ifgl = (struct ifg_list *)malloc(sizeof(struct ifg_list), M_TEMP,
	M_NOWAIT)) == NULL) {
	IFNET_WUNLOCK();
	return (ENOMEM);
	}

	if ((ifgm = (struct ifg_member *)malloc(sizeof(struct ifg_member),
	M_TEMP, M_NOWAIT)) == NULL) {
	free(ifgl, M_TEMP);
	IFNET_WUNLOCK();
	return (ENOMEM);
	}

	TAILQ_FOREACH(ifg, &V_ifg_head, ifg_next)
	if (!strcmp(ifg->ifg_group, groupname))
	break;

	if (ifg == NULL) {
	if ((ifg = (struct ifg_group *)malloc(sizeof(struct ifg_group),
	M_TEMP, M_NOWAIT)) == NULL) {
	free(ifgl, M_TEMP);
	free(ifgm, M_TEMP);
	IFNET_WUNLOCK();
	return (ENOMEM);
	}
	strlcpy(ifg->ifg_group, groupname, sizeof(ifg->ifg_group));
	ifg->ifg_refcnt = 0;
	TAILQ_INIT(&ifg->ifg_members);
	EVENTHANDLER_INVOKE(group_attach_event, ifg);
	TAILQ_INSERT_TAIL(&V_ifg_head, ifg, ifg_next);
	}

	ifg->ifg_refcnt++;
	ifgl->ifgl_group = ifg;
	ifgm->ifgm_ifp = ifp;

	IF_ADDR_LOCK(ifp);
	TAILQ_INSERT_TAIL(&ifg->ifg_members, ifgm, ifgm_next);
	TAILQ_INSERT_TAIL(&ifp->if_groups, ifgl, ifgl_next);
	IF_ADDR_UNLOCK(ifp);

	IFNET_WUNLOCK();

	EVENTHANDLER_INVOKE(group_change_event, groupname);

	return (0);
	}

	/*
	* Remove a group from an interface
	*/
	int
	if_delgroup(struct ifnet ifp, const char groupname)
	{
	INIT_VNET_NET(ifp->if_vnet);
	struct ifg_list *ifgl;
	struct ifg_member *ifgm;

	IFNET_WLOCK();
	TAILQ_FOREACH(ifgl, &ifp->if_groups, ifgl_next)
	if (!strcmp(ifgl->ifgl_group->ifg_group, groupname))
	break;
	if (ifgl == NULL) {
	IFNET_WUNLOCK();
	return (ENOENT);
	}

	IF_ADDR_LOCK(ifp);
	TAILQ_REMOVE(&ifp->if_groups, ifgl, ifgl_next);
	IF_ADDR_UNLOCK(ifp);

	TAILQ_FOREACH(ifgm, &ifgl->ifgl_group->ifg_members, ifgm_next)
	if (ifgm->ifgm_ifp == ifp)
	break;

	if (ifgm != NULL) {
	TAILQ_REMOVE(&ifgl->ifgl_group->ifg_members, ifgm, ifgm_next);
	free(ifgm, M_TEMP);
	}

	if (--ifgl->ifgl_group->ifg_refcnt == 0) {
	TAILQ_REMOVE(&V_ifg_head, ifgl->ifgl_group, ifg_next);
	EVENTHANDLER_INVOKE(group_detach_event, ifgl->ifgl_group);
	free(ifgl->ifgl_group, M_TEMP);
	}
	IFNET_WUNLOCK();

	free(ifgl, M_TEMP);

	EVENTHANDLER_INVOKE(group_change_event, groupname);

	return (0);
	}

	/*
	* Stores all groups from an interface in memory pointed
	* to by data
	*/
	static int
	if_getgroup(struct ifgroupreq data, struct ifnet ifp)
	{
	int len, error;
	struct ifg_list *ifgl;
	struct ifg_req ifgrq, *ifgp;
	struct ifgroupreq *ifgr = data;

	if (ifgr->ifgr_len == 0) {
	IF_ADDR_LOCK(ifp);
	TAILQ_FOREACH(ifgl, &ifp->if_groups, ifgl_next)
	ifgr->ifgr_len += sizeof(struct ifg_req);
	IF_ADDR_UNLOCK(ifp);
	return (0);
	}

	len = ifgr->ifgr_len;
	ifgp = ifgr->ifgr_groups;
	/* XXX: wire */
	IF_ADDR_LOCK(ifp);
	TAILQ_FOREACH(ifgl, &ifp->if_groups, ifgl_next) {
	if (len < sizeof(ifgrq)) {
	IF_ADDR_UNLOCK(ifp);
	return (EINVAL);
	}
	bzero(&ifgrq, sizeof ifgrq);
	strlcpy(ifgrq.ifgrq_group, ifgl->ifgl_group->ifg_group,
	sizeof(ifgrq.ifgrq_group));
	if ((error = copyout(&ifgrq, ifgp, sizeof(struct ifg_req)))) {
	IF_ADDR_UNLOCK(ifp);
	return (error);
	}
	len -= sizeof(ifgrq);
	ifgp++;
	}
	IF_ADDR_UNLOCK(ifp);

	return (0);
	}

	/*
	* Stores all members of a group in memory pointed to by data
	*/
	static int
	if_getgroupmembers(struct ifgroupreq *data)
	{
	INIT_VNET_NET(curvnet);
	struct ifgroupreq *ifgr = data;
	struct ifg_group *ifg;
	struct ifg_member *ifgm;
	struct ifg_req ifgrq, *ifgp;
	int len, error;

	IFNET_RLOCK();
	TAILQ_FOREACH(ifg, &V_ifg_head, ifg_next)
	if (!strcmp(ifg->ifg_group, ifgr->ifgr_name))
	break;
	if (ifg == NULL) {
	IFNET_RUNLOCK();
	return (ENOENT);
	}

	if (ifgr->ifgr_len == 0) {
	TAILQ_FOREACH(ifgm, &ifg->ifg_members, ifgm_next)
	ifgr->ifgr_len += sizeof(ifgrq);
	IFNET_RUNLOCK();
	return (0);
	}

	len = ifgr->ifgr_len;
	ifgp = ifgr->ifgr_groups;
	TAILQ_FOREACH(ifgm, &ifg->ifg_members, ifgm_next) {
	if (len < sizeof(ifgrq)) {
	IFNET_RUNLOCK();
	return (EINVAL);
	}
	bzero(&ifgrq, sizeof ifgrq);
	strlcpy(ifgrq.ifgrq_member, ifgm->ifgm_ifp->if_xname,
	sizeof(ifgrq.ifgrq_member));
	if ((error = copyout(&ifgrq, ifgp, sizeof(struct ifg_req)))) {
	IFNET_RUNLOCK();
	return (error);
	}
	len -= sizeof(ifgrq);
	ifgp++;
	}
	IFNET_RUNLOCK();

	return (0);
	}

	/*
	* Delete Routes for a Network Interface
	*
	* Called for each routing entry via the rnh->rnh_walktree() call above
	* to delete all route entries referencing a detaching network interface.
	*
	* Arguments:
	* rn pointer to node in the routing table
	* arg argument passed to rnh->rnh_walktree() - detaching interface
	*
	* Returns:
	* 0 successful
	* errno failed - reason indicated
	*
	*/
	static int
	if_rtdel(struct radix_node rn, void arg)
	{
	struct rtentry rt = (struct rtentry )rn;
	struct ifnet *ifp = arg;
	int err;

	if (rt->rt_ifp == ifp) {

	/*
	* Protect (sorta) against walktree recursion problems
	* with cloned routes
	*/
	if ((rt->rt_flags & RTF_UP) == 0)
	return (0);

	err = rtrequest_fib(RTM_DELETE, rt_key(rt), rt->rt_gateway,
	rt_mask(rt), rt->rt_flags,
	(struct rtentry **) NULL, rt->rt_fibnum);
	if (err) {
	log(LOG_WARNING, "if_rtdel: error %d\n", err);
	}
	}

	return (0);
	}

	/*
	* XXX: Because sockaddr_dl has deeper structure than the sockaddr
	* structs used to represent other address families, it is necessary
	* to perform a different comparison.
	*/

	#define sa_equal(a1, a2) \
	(bcmp((a1), (a2), ((a1))->sa_len) == 0)

	#define sa_dl_equal(a1, a2) \
	((((struct sockaddr_dl *)(a1))->sdl_len == \
	((struct sockaddr_dl *)(a2))->sdl_len) && \
	(bcmp(LLADDR((struct sockaddr_dl *)(a1)), \
	LLADDR((struct sockaddr_dl *)(a2)), \
	((struct sockaddr_dl *)(a1))->sdl_alen) == 0))

	/*
	* Locate an interface based on a complete address.
	*/
	/ARGSUSED/
	struct ifaddr *
	ifa_ifwithaddr(struct sockaddr *addr)
	{
	INIT_VNET_NET(curvnet);
	struct ifnet *ifp;
	struct ifaddr *ifa;

	IFNET_RLOCK();
	TAILQ_FOREACH(ifp, &V_ifnet, if_link)
	TAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) {
	if (ifa->ifa_addr->sa_family != addr->sa_family)
	continue;
	if (sa_equal(addr, ifa->ifa_addr))
	goto done;
	/* IP6 doesn't have broadcast */
	if ((ifp->if_flags & IFF_BROADCAST) &&
	ifa->ifa_broadaddr &&
	ifa->ifa_broadaddr->sa_len != 0 &&
	sa_equal(ifa->ifa_broadaddr, addr))
	goto done;
	}
	ifa = NULL;
	done:
	IFNET_RUNLOCK();
	return (ifa);
	}

	/*
	* Locate an interface based on the broadcast address.
	*/
	/* ARGSUSED */
	struct ifaddr *
	ifa_ifwithbroadaddr(struct sockaddr *addr)
	{
	INIT_VNET_NET(curvnet);
	struct ifnet *ifp;
	struct ifaddr *ifa;

	IFNET_RLOCK();
	TAILQ_FOREACH(ifp, &V_ifnet, if_link)
	TAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) {
	if (ifa->ifa_addr->sa_family != addr->sa_family)
	continue;
	if ((ifp->if_flags & IFF_BROADCAST) &&
	ifa->ifa_broadaddr &&
	ifa->ifa_broadaddr->sa_len != 0 &&
	sa_equal(ifa->ifa_broadaddr, addr))
	goto done;
	}
	ifa = NULL;
	done:
	IFNET_RUNLOCK();
	return (ifa);
	}

	/*
	* Locate the point to point interface with a given destination address.
	*/
	/ARGSUSED/
	struct ifaddr *
	ifa_ifwithdstaddr(struct sockaddr *addr)
	{
	INIT_VNET_NET(curvnet);
	struct ifnet *ifp;
	struct ifaddr *ifa;

	IFNET_RLOCK();
	TAILQ_FOREACH(ifp, &V_ifnet, if_link) {
	if ((ifp->if_flags & IFF_POINTOPOINT) == 0)
	continue;
	TAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) {
	if (ifa->ifa_addr->sa_family != addr->sa_family)
	continue;
	if (ifa->ifa_dstaddr != NULL &&
	sa_equal(addr, ifa->ifa_dstaddr))
	goto done;
	}
	}
	ifa = NULL;
	done:
	IFNET_RUNLOCK();
	return (ifa);
	}

	/*
	* Find an interface on a specific network. If many, choice
	* is most specific found.
	*/
	struct ifaddr *
	ifa_ifwithnet(struct sockaddr *addr)
	{
	INIT_VNET_NET(curvnet);
	struct ifnet *ifp;
	struct ifaddr *ifa;
	struct ifaddr ifa_maybe = (struct ifaddr ) 0;
	u_int af = addr->sa_family;
	char addr_data = addr->sa_data, cplim;

	/*
	* AF_LINK addresses can be looked up directly by their index number,
	* so do that if we can.
	*/
	if (af == AF_LINK) {
	struct sockaddr_dl sdl = (struct sockaddr_dl )addr;
	if (sdl->sdl_index && sdl->sdl_index <= V_if_index)
	return (ifaddr_byindex(sdl->sdl_index));
	}

	/*
	* Scan though each interface, looking for ones that have
	* addresses in this address family.
	*/
	IFNET_RLOCK();
	TAILQ_FOREACH(ifp, &V_ifnet, if_link) {
	TAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) {
	char cp, cp2, *cp3;

	if (ifa->ifa_addr->sa_family != af)
	next: continue;
	if (af == AF_INET && ifp->if_flags & IFF_POINTOPOINT) {
	/*
	* This is a bit broken as it doesn't
	* take into account that the remote end may
	* be a single node in the network we are
	* looking for.
	* The trouble is that we don't know the
	* netmask for the remote end.
	*/
	if (ifa->ifa_dstaddr != NULL &&
	sa_equal(addr, ifa->ifa_dstaddr))
	goto done;
	} else {
	/*
	* if we have a special address handler,
	* then use it instead of the generic one.
	*/
	if (ifa->ifa_claim_addr) {
	if ((*ifa->ifa_claim_addr)(ifa, addr))
	goto done;
	continue;
	}

	/*
	* Scan all the bits in the ifa's address.
	* If a bit dissagrees with what we are
	* looking for, mask it with the netmask
	* to see if it really matters.
	* (A byte at a time)
	*/
	if (ifa->ifa_netmask == 0)
	continue;
	cp = addr_data;
	cp2 = ifa->ifa_addr->sa_data;
	cp3 = ifa->ifa_netmask->sa_data;
	cplim = ifa->ifa_netmask->sa_len
	+ (char *)ifa->ifa_netmask;
	while (cp3 < cplim)
	if ((cp++ ^ cp2++) & *cp3++)
	goto next; /* next address! */
	/*
	* If the netmask of what we just found
	* is more specific than what we had before
	* (if we had one) then remember the new one
	* before continuing to search
	* for an even better one.
	*/
	if (ifa_maybe == 0 \|\|
	rn_refines((caddr_t)ifa->ifa_netmask,
	(caddr_t)ifa_maybe->ifa_netmask))
	ifa_maybe = ifa;
	}
	}
	}
	ifa = ifa_maybe;
	done:
	IFNET_RUNLOCK();
	return (ifa);
	}

	/*
	* Find an interface address specific to an interface best matching
	* a given address.
	*/
	struct ifaddr *
	ifaof_ifpforaddr(struct sockaddr addr, struct ifnet ifp)
	{
	struct ifaddr *ifa;
	char cp, cp2, *cp3;
	char *cplim;
	struct ifaddr *ifa_maybe = 0;
	u_int af = addr->sa_family;

	if (af >= AF_MAX)
	return (0);
	TAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) {
	if (ifa->ifa_addr->sa_family != af)
	continue;
	if (ifa_maybe == 0)
	ifa_maybe = ifa;
	if (ifa->ifa_netmask == 0) {
	if (sa_equal(addr, ifa->ifa_addr) \|\|
	(ifa->ifa_dstaddr &&
	sa_equal(addr, ifa->ifa_dstaddr)))
	goto done;
	continue;
	}
	if (ifp->if_flags & IFF_POINTOPOINT) {
	if (sa_equal(addr, ifa->ifa_dstaddr))
	goto done;
	} else {
	cp = addr->sa_data;
	cp2 = ifa->ifa_addr->sa_data;
	cp3 = ifa->ifa_netmask->sa_data;
	cplim = ifa->ifa_netmask->sa_len + (char *)ifa->ifa_netmask;
	for (; cp3 < cplim; cp3++)
	if ((cp++ ^ cp2++) & *cp3)
	break;
	if (cp3 == cplim)
	goto done;
	}
	}
	ifa = ifa_maybe;
	done:
	return (ifa);
	}
	+
	+#include <net/route.h>
	+#include <net/if_llatbl.h>

	/*
	* Default action when installing a route with a Link Level gateway.
	* Lookup an appropriate real ifa to point to.
	* This should be moved to /sys/net/link.c eventually.
	*/
	static void
	link_rtrequest(int cmd, struct rtentry rt, struct rt_addrinfo info)
	{
	struct ifaddr ifa, oifa;
	struct sockaddr *dst;
	struct ifnet *ifp;

	RT_LOCK_ASSERT(rt);

	if (cmd != RTM_ADD \|\| ((ifa = rt->rt_ifa) == 0) \|\|
	((ifp = ifa->ifa_ifp) == 0) \|\| ((dst = rt_key(rt)) == 0))
	return;
	ifa = ifaof_ifpforaddr(dst, ifp);
	if (ifa) {
	IFAREF(ifa); /* XXX */
	oifa = rt->rt_ifa;
	rt->rt_ifa = ifa;
	IFAFREE(oifa);
	if (ifa->ifa_rtrequest && ifa->ifa_rtrequest != link_rtrequest)
	ifa->ifa_rtrequest(cmd, rt, info);
	}
	}

	/*
	* Mark an interface down and notify protocols of
	* the transition.
	* NOTE: must be called at splnet or eqivalent.
	*/
	static void
	if_unroute(struct ifnet *ifp, int flag, int fam)
	{
	struct ifaddr *ifa;

	KASSERT(flag == IFF_UP, ("if_unroute: flag != IFF_UP"));

	ifp->if_flags &= ~flag;
	getmicrotime(&ifp->if_lastchange);
	TAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link)
	if (fam == PF_UNSPEC \|\| (fam == ifa->ifa_addr->sa_family))
	pfctlinput(PRC_IFDOWN, ifa->ifa_addr);
	ifp->if_qflush(ifp);

	#ifdef DEV_CARP
	if (ifp->if_carp)
	carp_carpdev_state(ifp->if_carp);
	#endif
	rt_ifmsg(ifp);
	}

	/*
	* Mark an interface up and notify protocols of
	* the transition.
	* NOTE: must be called at splnet or eqivalent.
	*/
	static void
	if_route(struct ifnet *ifp, int flag, int fam)
	{
	struct ifaddr *ifa;

	KASSERT(flag == IFF_UP, ("if_route: flag != IFF_UP"));

	ifp->if_flags \|= flag;
	getmicrotime(&ifp->if_lastchange);
	TAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link)
	if (fam == PF_UNSPEC \|\| (fam == ifa->ifa_addr->sa_family))
	pfctlinput(PRC_IFUP, ifa->ifa_addr);
	#ifdef DEV_CARP
	if (ifp->if_carp)
	carp_carpdev_state(ifp->if_carp);
	#endif
	rt_ifmsg(ifp);
	#ifdef INET6
	in6_if_up(ifp);
	#endif
	}

	void (vlan_link_state_p)(struct ifnet , int); /* XXX: private from if_vlan */
	void (vlan_trunk_cap_p)(struct ifnet ); /* XXX: private from if_vlan */

	/*
	* Handle a change in the interface link state. To avoid LORs
	* between driver lock and upper layer locks, as well as possible
	* recursions, we post event to taskqueue, and all job
	* is done in static do_link_state_change().
	*/
	void
	if_link_state_change(struct ifnet *ifp, int link_state)
	{
	/* Return if state hasn't changed. */
	if (ifp->if_link_state == link_state)
	return;

	ifp->if_link_state = link_state;

	taskqueue_enqueue(taskqueue_swi, &ifp->if_linktask);
	}

	static void
	do_link_state_change(void *arg, int pending)
	{
	struct ifnet ifp = (struct ifnet )arg;
	int link_state = ifp->if_link_state;
	int link;
	CURVNET_SET(ifp->if_vnet);

	/* Notify that the link state has changed. */
	rt_ifmsg(ifp);
	if (link_state == LINK_STATE_UP)
	link = NOTE_LINKUP;
	else if (link_state == LINK_STATE_DOWN)
	link = NOTE_LINKDOWN;
	else
	link = NOTE_LINKINV;
	KNOTE_UNLOCKED(&ifp->if_klist, link);
	if (ifp->if_vlantrunk != NULL)
	(*vlan_link_state_p)(ifp, link);

	if ((ifp->if_type == IFT_ETHER \|\| ifp->if_type == IFT_L2VLAN) &&
	IFP2AC(ifp)->ac_netgraph != NULL)
	(*ng_ether_link_state_p)(ifp, link_state);
	#ifdef DEV_CARP
	if (ifp->if_carp)
	carp_carpdev_state(ifp->if_carp);
	#endif
	if (ifp->if_bridge) {
	KASSERT(bstp_linkstate_p != NULL,("if_bridge bstp not loaded!"));
	(*bstp_linkstate_p)(ifp, link_state);
	}
	if (ifp->if_lagg) {
	KASSERT(lagg_linkstate_p != NULL,("if_lagg not loaded!"));
	(*lagg_linkstate_p)(ifp, link_state);
	}

	devctl_notify("IFNET", ifp->if_xname,
	(link_state == LINK_STATE_UP) ? "LINK_UP" : "LINK_DOWN", NULL);
	if (pending > 1)
	if_printf(ifp, "%d link states coalesced\n", pending);
	if (log_link_state_change)
	log(LOG_NOTICE, "%s: link state changed to %s\n", ifp->if_xname,
	(link_state == LINK_STATE_UP) ? "UP" : "DOWN" );
	CURVNET_RESTORE();
	}

	/*
	* Mark an interface down and notify protocols of
	* the transition.
	* NOTE: must be called at splnet or eqivalent.
	*/
	void
	if_down(struct ifnet *ifp)
	{

	if_unroute(ifp, IFF_UP, AF_UNSPEC);
	}

	/*
	* Mark an interface up and notify protocols of
	* the transition.
	* NOTE: must be called at splnet or eqivalent.
	*/
	void
	if_up(struct ifnet *ifp)
	{

	if_route(ifp, IFF_UP, AF_UNSPEC);
	}

	/*
	* Flush an interface queue.
	*/
	static void
	if_qflush(struct ifnet *ifp)
	{
	struct mbuf m, n;
	struct ifaltq *ifq;

	ifq = &ifp->if_snd;
	IFQ_LOCK(ifq);
	#ifdef ALTQ
	if (ALTQ_IS_ENABLED(ifq))
	ALTQ_PURGE(ifq);
	#endif
	n = ifq->ifq_head;
	while ((m = n) != 0) {
	n = m->m_act;
	m_freem(m);
	}
	ifq->ifq_head = 0;
	ifq->ifq_tail = 0;
	ifq->ifq_len = 0;
	IFQ_UNLOCK(ifq);
	}

	/*
	* Handle interface watchdog timer routines. Called
	* from softclock, we decrement timers (if set) and
	* call the appropriate interface routine on expiration.
	*
	* XXXRW: Note that because timeouts run with Giant, if_watchdog() is called
	* holding Giant. If we switch to an MPSAFE callout, we likely need to grab
	* Giant before entering if_watchdog() on an IFF_NEEDSGIANT interface.
	*/
	static void
	if_slowtimo(void *arg)
	{
	VNET_ITERATOR_DECL(vnet_iter);
	struct ifnet *ifp;
	int s = splimp();

	IFNET_RLOCK();
	VNET_LIST_RLOCK();
	VNET_FOREACH(vnet_iter) {
	CURVNET_SET(vnet_iter);
	INIT_VNET_NET(vnet_iter);
	TAILQ_FOREACH(ifp, &V_ifnet, if_link) {
	if (ifp->if_timer == 0 \|\| --ifp->if_timer)
	continue;
	if (ifp->if_watchdog)
	(*ifp->if_watchdog)(ifp);
	}
	CURVNET_RESTORE();
	}
	VNET_LIST_RUNLOCK();
	IFNET_RUNLOCK();
	splx(s);
	timeout(if_slowtimo, (void *)0, hz / IFNET_SLOWHZ);
	}

	/*
	* Map interface name to
	* interface structure pointer.
	*/
	struct ifnet *
	ifunit(const char *name)
	{
	INIT_VNET_NET(curvnet);
	struct ifnet *ifp;

	IFNET_RLOCK();
	TAILQ_FOREACH(ifp, &V_ifnet, if_link) {
	if (strncmp(name, ifp->if_xname, IFNAMSIZ) == 0)
	break;
	}
	IFNET_RUNLOCK();
	return (ifp);
	}

	/*
	* Hardware specific interface ioctls.
	*/
	static int
	ifhwioctl(u_long cmd, struct ifnet ifp, caddr_t data, struct thread td)
	{
	struct ifreq *ifr;
	struct ifstat *ifs;
	int error = 0;
	int new_flags, temp_flags;
	size_t namelen, onamelen;
	char new_name[IFNAMSIZ];
	struct ifaddr *ifa;
	struct sockaddr_dl *sdl;

	ifr = (struct ifreq *)data;
	switch (cmd) {
	case SIOCGIFINDEX:
	ifr->ifr_index = ifp->if_index;
	break;

	case SIOCGIFFLAGS:
	temp_flags = ifp->if_flags \| ifp->if_drv_flags;
	ifr->ifr_flags = temp_flags & 0xffff;
	ifr->ifr_flagshigh = temp_flags >> 16;
	break;

	case SIOCGIFCAP:
	ifr->ifr_reqcap = ifp->if_capabilities;
	ifr->ifr_curcap = ifp->if_capenable;
	break;

	#ifdef MAC
	case SIOCGIFMAC:
	error = mac_ifnet_ioctl_get(td->td_ucred, ifr, ifp);
	break;
	#endif

	case SIOCGIFMETRIC:
	ifr->ifr_metric = ifp->if_metric;
	break;

	case SIOCGIFMTU:
	ifr->ifr_mtu = ifp->if_mtu;
	break;

	case SIOCGIFPHYS:
	ifr->ifr_phys = ifp->if_physical;
	break;

	case SIOCSIFFLAGS:
	error = priv_check(td, PRIV_NET_SETIFFLAGS);
	if (error)
	return (error);
	/*
	* Currently, no driver owned flags pass the IFF_CANTCHANGE
	* check, so we don't need special handling here yet.
	*/
	new_flags = (ifr->ifr_flags & 0xffff) \|
	(ifr->ifr_flagshigh << 16);
	if (ifp->if_flags & IFF_SMART) {
	/* Smart drivers twiddle their own routes */
	} else if (ifp->if_flags & IFF_UP &&
	(new_flags & IFF_UP) == 0) {
	int s = splimp();
	if_down(ifp);
	splx(s);
	} else if (new_flags & IFF_UP &&
	(ifp->if_flags & IFF_UP) == 0) {
	int s = splimp();
	if_up(ifp);
	splx(s);
	}
	/* See if permanently promiscuous mode bit is about to flip */
	if ((ifp->if_flags ^ new_flags) & IFF_PPROMISC) {
	if (new_flags & IFF_PPROMISC)
	ifp->if_flags \|= IFF_PROMISC;
	else if (ifp->if_pcount == 0)
	ifp->if_flags &= ~IFF_PROMISC;
	log(LOG_INFO, "%s: permanently promiscuous mode %s\n",
	ifp->if_xname,
	(new_flags & IFF_PPROMISC) ? "enabled" : "disabled");
	}
	ifp->if_flags = (ifp->if_flags & IFF_CANTCHANGE) \|
	(new_flags &~ IFF_CANTCHANGE);
	if (ifp->if_ioctl) {
	IFF_LOCKGIANT(ifp);
	(void) (*ifp->if_ioctl)(ifp, cmd, data);
	IFF_UNLOCKGIANT(ifp);
	}
	getmicrotime(&ifp->if_lastchange);
	break;

	case SIOCSIFCAP:
	error = priv_check(td, PRIV_NET_SETIFCAP);
	if (error)
	return (error);
	if (ifp->if_ioctl == NULL)
	return (EOPNOTSUPP);
	if (ifr->ifr_reqcap & ~ifp->if_capabilities)
	return (EINVAL);
	IFF_LOCKGIANT(ifp);
	error = (*ifp->if_ioctl)(ifp, cmd, data);
	IFF_UNLOCKGIANT(ifp);
	if (error == 0)
	getmicrotime(&ifp->if_lastchange);
	break;

	#ifdef MAC
	case SIOCSIFMAC:
	error = mac_ifnet_ioctl_set(td->td_ucred, ifr, ifp);
	break;
	#endif

	case SIOCSIFNAME:
	error = priv_check(td, PRIV_NET_SETIFNAME);
	if (error)
	return (error);
	error = copyinstr(ifr->ifr_data, new_name, IFNAMSIZ, NULL);
	if (error != 0)
	return (error);
	if (new_name[0] == '\0')
	return (EINVAL);
	if (ifunit(new_name) != NULL)
	return (EEXIST);

	/* Announce the departure of the interface. */
	rt_ifannouncemsg(ifp, IFAN_DEPARTURE);
	EVENTHANDLER_INVOKE(ifnet_departure_event, ifp);

	log(LOG_INFO, "%s: changing name to '%s'\n",
	ifp->if_xname, new_name);

	strlcpy(ifp->if_xname, new_name, sizeof(ifp->if_xname));
	ifa = ifp->if_addr;
	IFA_LOCK(ifa);
	sdl = (struct sockaddr_dl *)ifa->ifa_addr;
	namelen = strlen(new_name);
	onamelen = sdl->sdl_nlen;
	/*
	* Move the address if needed. This is safe because we
	* allocate space for a name of length IFNAMSIZ when we
	* create this in if_attach().
	*/
	if (namelen != onamelen) {
	bcopy(sdl->sdl_data + onamelen,
	sdl->sdl_data + namelen, sdl->sdl_alen);
	}
	bcopy(new_name, sdl->sdl_data, namelen);
	sdl->sdl_nlen = namelen;
	sdl = (struct sockaddr_dl *)ifa->ifa_netmask;
	bzero(sdl->sdl_data, onamelen);
	while (namelen != 0)
	sdl->sdl_data[--namelen] = 0xff;
	IFA_UNLOCK(ifa);

	EVENTHANDLER_INVOKE(ifnet_arrival_event, ifp);
	/* Announce the return of the interface. */
	rt_ifannouncemsg(ifp, IFAN_ARRIVAL);
	break;

	case SIOCSIFMETRIC:
	error = priv_check(td, PRIV_NET_SETIFMETRIC);
	if (error)
	return (error);
	ifp->if_metric = ifr->ifr_metric;
	getmicrotime(&ifp->if_lastchange);
	break;

	case SIOCSIFPHYS:
	error = priv_check(td, PRIV_NET_SETIFPHYS);
	if (error)
	return (error);
	if (ifp->if_ioctl == NULL)
	return (EOPNOTSUPP);
	IFF_LOCKGIANT(ifp);
	error = (*ifp->if_ioctl)(ifp, cmd, data);
	IFF_UNLOCKGIANT(ifp);
	if (error == 0)
	getmicrotime(&ifp->if_lastchange);
	break;

	case SIOCSIFMTU:
	{
	u_long oldmtu = ifp->if_mtu;

	error = priv_check(td, PRIV_NET_SETIFMTU);
	if (error)
	return (error);
	if (ifr->ifr_mtu < IF_MINMTU \|\| ifr->ifr_mtu > IF_MAXMTU)
	return (EINVAL);
	if (ifp->if_ioctl == NULL)
	return (EOPNOTSUPP);
	IFF_LOCKGIANT(ifp);
	error = (*ifp->if_ioctl)(ifp, cmd, data);
	IFF_UNLOCKGIANT(ifp);
	if (error == 0) {
	getmicrotime(&ifp->if_lastchange);
	rt_ifmsg(ifp);
	}
	/*
	* If the link MTU changed, do network layer specific procedure.
	*/
	if (ifp->if_mtu != oldmtu) {
	#ifdef INET6
	nd6_setmtu(ifp);
	#endif
	}
	break;
	}

	case SIOCADDMULTI:
	case SIOCDELMULTI:
	if (cmd == SIOCADDMULTI)
	error = priv_check(td, PRIV_NET_ADDMULTI);
	else
	error = priv_check(td, PRIV_NET_DELMULTI);
	if (error)
	return (error);

	/* Don't allow group membership on non-multicast interfaces. */
	if ((ifp->if_flags & IFF_MULTICAST) == 0)
	return (EOPNOTSUPP);

	/* Don't let users screw up protocols' entries. */
	if (ifr->ifr_addr.sa_family != AF_LINK)
	return (EINVAL);

	if (cmd == SIOCADDMULTI) {
	struct ifmultiaddr *ifma;

	/*
	* Userland is only permitted to join groups once
	* via the if_addmulti() KPI, because it cannot hold
	* struct ifmultiaddr * between calls. It may also
	* lose a race while we check if the membership
	* already exists.
	*/
	IF_ADDR_LOCK(ifp);
	ifma = if_findmulti(ifp, &ifr->ifr_addr);
	IF_ADDR_UNLOCK(ifp);
	if (ifma != NULL)
	error = EADDRINUSE;
	else
	error = if_addmulti(ifp, &ifr->ifr_addr, &ifma);
	} else {
	error = if_delmulti(ifp, &ifr->ifr_addr);
	}
	if (error == 0)
	getmicrotime(&ifp->if_lastchange);
	break;

	case SIOCSIFPHYADDR:
	case SIOCDIFPHYADDR:
	#ifdef INET6
	case SIOCSIFPHYADDR_IN6:
	#endif
	case SIOCSLIFPHYADDR:
	case SIOCSIFMEDIA:
	case SIOCSIFGENERIC:
	error = priv_check(td, PRIV_NET_HWIOCTL);
	if (error)
	return (error);
	if (ifp->if_ioctl == NULL)
	return (EOPNOTSUPP);
	IFF_LOCKGIANT(ifp);
	error = (*ifp->if_ioctl)(ifp, cmd, data);
	IFF_UNLOCKGIANT(ifp);
	if (error == 0)
	getmicrotime(&ifp->if_lastchange);
	break;

	case SIOCGIFSTATUS:
	ifs = (struct ifstat *)data;
	ifs->ascii[0] = '\0';

	case SIOCGIFPSRCADDR:
	case SIOCGIFPDSTADDR:
	case SIOCGLIFPHYADDR:
	case SIOCGIFMEDIA:
	case SIOCGIFGENERIC:
	if (ifp->if_ioctl == NULL)
	return (EOPNOTSUPP);
	IFF_LOCKGIANT(ifp);
	error = (*ifp->if_ioctl)(ifp, cmd, data);
	IFF_UNLOCKGIANT(ifp);
	break;

	case SIOCSIFLLADDR:
	error = priv_check(td, PRIV_NET_SETLLADDR);
	if (error)
	return (error);
	error = if_setlladdr(ifp,
	ifr->ifr_addr.sa_data, ifr->ifr_addr.sa_len);
	break;

	case SIOCAIFGROUP:
	{
	struct ifgroupreq ifgr = (struct ifgroupreq )ifr;

	error = priv_check(td, PRIV_NET_ADDIFGROUP);
	if (error)
	return (error);
	if ((error = if_addgroup(ifp, ifgr->ifgr_group)))
	return (error);
	break;
	}

	case SIOCGIFGROUP:
	if ((error = if_getgroup((struct ifgroupreq *)ifr, ifp)))
	return (error);
	break;

	case SIOCDIFGROUP:
	{
	struct ifgroupreq ifgr = (struct ifgroupreq )ifr;

	error = priv_check(td, PRIV_NET_DELIFGROUP);
	if (error)
	return (error);
	if ((error = if_delgroup(ifp, ifgr->ifgr_group)))
	return (error);
	break;
	}

	default:
	error = ENOIOCTL;
	break;
	}
	return (error);
	}

	/*
	* Interface ioctls.
	*/
	int
	ifioctl(struct socket so, u_long cmd, caddr_t data, struct thread td)
	{
	struct ifnet *ifp;
	struct ifreq *ifr;
	int error;
	int oif_flags;

	switch (cmd) {
	case SIOCGIFCONF:
	case OSIOCGIFCONF:
	#ifdef __amd64__
	case SIOCGIFCONF32:
	#endif
	return (ifconf(cmd, data));
	}
	ifr = (struct ifreq *)data;

	switch (cmd) {
	case SIOCIFCREATE:
	case SIOCIFCREATE2:
	error = priv_check(td, PRIV_NET_IFCREATE);
	if (error)
	return (error);
	return (if_clone_create(ifr->ifr_name, sizeof(ifr->ifr_name),
	cmd == SIOCIFCREATE2 ? ifr->ifr_data : NULL));
	case SIOCIFDESTROY:
	error = priv_check(td, PRIV_NET_IFDESTROY);
	if (error)
	return (error);
	return if_clone_destroy(ifr->ifr_name);

	case SIOCIFGCLONERS:
	return (if_clone_list((struct if_clonereq *)data));
	case SIOCGIFGMEMB:
	return (if_getgroupmembers((struct ifgroupreq *)data));
	}

	ifp = ifunit(ifr->ifr_name);
	if (ifp == 0)
	return (ENXIO);

	error = ifhwioctl(cmd, ifp, data, td);
	if (error != ENOIOCTL)
	return (error);

	oif_flags = ifp->if_flags;
	if (so->so_proto == 0)
	return (EOPNOTSUPP);
	#ifndef COMPAT_43
	error = ((*so->so_proto->pr_usrreqs->pru_control)(so, cmd,
	data,
	ifp, td));
	#else
	{
	int ocmd = cmd;

	switch (cmd) {

	case SIOCSIFDSTADDR:
	case SIOCSIFADDR:
	case SIOCSIFBRDADDR:
	case SIOCSIFNETMASK:
	#if BYTE_ORDER != BIG_ENDIAN
	if (ifr->ifr_addr.sa_family == 0 &&
	ifr->ifr_addr.sa_len < 16) {
	ifr->ifr_addr.sa_family = ifr->ifr_addr.sa_len;
	ifr->ifr_addr.sa_len = 16;
	}
	#else
	if (ifr->ifr_addr.sa_len == 0)
	ifr->ifr_addr.sa_len = 16;
	#endif
	break;

	case OSIOCGIFADDR:
	cmd = SIOCGIFADDR;
	break;

	case OSIOCGIFDSTADDR:
	cmd = SIOCGIFDSTADDR;
	break;

	case OSIOCGIFBRDADDR:
	cmd = SIOCGIFBRDADDR;
	break;

	case OSIOCGIFNETMASK:
	cmd = SIOCGIFNETMASK;
	}
	error = ((*so->so_proto->pr_usrreqs->pru_control)(so,
	cmd,
	data,
	ifp, td));
	switch (ocmd) {

	case OSIOCGIFADDR:
	case OSIOCGIFDSTADDR:
	case OSIOCGIFBRDADDR:
	case OSIOCGIFNETMASK:
	(u_short )&ifr->ifr_addr = ifr->ifr_addr.sa_family;

	}
	}
	#endif /* COMPAT_43 */

	if ((oif_flags ^ ifp->if_flags) & IFF_UP) {
	#ifdef INET6
	DELAY(100);/* XXX: temporary workaround for fxp issue*/
	if (ifp->if_flags & IFF_UP) {
	int s = splimp();
	in6_if_up(ifp);
	splx(s);
	}
	#endif
	}
	return (error);
	}

	/*
	* The code common to handling reference counted flags,
	* e.g., in ifpromisc() and if_allmulti().
	* The "pflag" argument can specify a permanent mode flag to check,
	* such as IFF_PPROMISC for promiscuous mode; should be 0 if none.
	*
	* Only to be used on stack-owned flags, not driver-owned flags.
	*/
	static int
	if_setflag(struct ifnet ifp, int flag, int pflag, int refcount, int onswitch)
	{
	struct ifreq ifr;
	int error;
	int oldflags, oldcount;

	/* Sanity checks to catch programming errors */
	KASSERT((flag & (IFF_DRV_OACTIVE\|IFF_DRV_RUNNING)) == 0,
	("%s: setting driver-owned flag %d", __func__, flag));

	if (onswitch)
	KASSERT(*refcount >= 0,
	("%s: increment negative refcount %d for flag %d",
	__func__, *refcount, flag));
	else
	KASSERT(*refcount > 0,
	("%s: decrement non-positive refcount %d for flag %d",
	__func__, *refcount, flag));

	/* In case this mode is permanent, just touch refcount */
	if (ifp->if_flags & pflag) {
	*refcount += onswitch ? 1 : -1;
	return (0);
	}

	/* Save ifnet parameters for if_ioctl() may fail */
	oldcount = *refcount;
	oldflags = ifp->if_flags;

	/*
	* See if we aren't the only and touching refcount is enough.
	* Actually toggle interface flag if we are the first or last.
	*/
	if (onswitch) {
	if ((*refcount)++)
	return (0);
	ifp->if_flags \|= flag;
	} else {
	if (--(*refcount))
	return (0);
	ifp->if_flags &= ~flag;
	}

	/* Call down the driver since we've changed interface flags */
	if (ifp->if_ioctl == NULL) {
	error = EOPNOTSUPP;
	goto recover;
	}
	ifr.ifr_flags = ifp->if_flags & 0xffff;
	ifr.ifr_flagshigh = ifp->if_flags >> 16;
	IFF_LOCKGIANT(ifp);
	error = (*ifp->if_ioctl)(ifp, SIOCSIFFLAGS, (caddr_t)&ifr);
	IFF_UNLOCKGIANT(ifp);
	if (error)
	goto recover;
	/* Notify userland that interface flags have changed */
	rt_ifmsg(ifp);
	return (0);

	recover:
	/* Recover after driver error */
	*refcount = oldcount;
	ifp->if_flags = oldflags;
	return (error);
	}

	/*
	* Set/clear promiscuous mode on interface ifp based on the truth value
	* of pswitch. The calls are reference counted so that only the first
	* "on" request actually has an effect, as does the final "off" request.
	* Results are undefined if the "off" and "on" requests are not matched.
	*/
	int
	ifpromisc(struct ifnet *ifp, int pswitch)
	{
	int error;
	int oldflags = ifp->if_flags;

	error = if_setflag(ifp, IFF_PROMISC, IFF_PPROMISC,
	&ifp->if_pcount, pswitch);
	/* If promiscuous mode status has changed, log a message */
	if (error == 0 && ((ifp->if_flags ^ oldflags) & IFF_PROMISC))
	log(LOG_INFO, "%s: promiscuous mode %s\n",
	ifp->if_xname,
	(ifp->if_flags & IFF_PROMISC) ? "enabled" : "disabled");
	return (error);
	}

	/*
	* Return interface configuration
	* of system. List may be used
	* in later ioctl's (above) to get
	* other information.
	*/
	/ARGSUSED/
	static int
	ifconf(u_long cmd, caddr_t data)
	{
	INIT_VNET_NET(curvnet);
	struct ifconf ifc = (struct ifconf )data;
	#ifdef __amd64__
	struct ifconf32 ifc32 = (struct ifconf32 )data;
	struct ifconf ifc_swab;
	#endif
	struct ifnet *ifp;
	struct ifaddr *ifa;
	struct ifreq ifr;
	struct sbuf *sb;
	int error, full = 0, valid_len, max_len;

	#ifdef __amd64__
	if (cmd == SIOCGIFCONF32) {
	ifc_swab.ifc_len = ifc32->ifc_len;
	ifc_swab.ifc_buf = (caddr_t)(uintptr_t)ifc32->ifc_buf;
	ifc = &ifc_swab;
	}
	#endif
	/* Limit initial buffer size to MAXPHYS to avoid DoS from userspace. */
	max_len = MAXPHYS - 1;

	/* Prevent hostile input from being able to crash the system */
	if (ifc->ifc_len <= 0)
	return (EINVAL);

	again:
	if (ifc->ifc_len <= max_len) {
	max_len = ifc->ifc_len;
	full = 1;
	}
	sb = sbuf_new(NULL, NULL, max_len + 1, SBUF_FIXEDLEN);
	max_len = 0;
	valid_len = 0;

	IFNET_RLOCK(); /* could sleep XXX */
	TAILQ_FOREACH(ifp, &V_ifnet, if_link) {
	int addrs;

	/*
	* Zero the ifr_name buffer to make sure we don't
	* disclose the contents of the stack.
	*/
	memset(ifr.ifr_name, 0, sizeof(ifr.ifr_name));

	if (strlcpy(ifr.ifr_name, ifp->if_xname, sizeof(ifr.ifr_name))
	>= sizeof(ifr.ifr_name)) {
	sbuf_delete(sb);
	IFNET_RUNLOCK();
	return (ENAMETOOLONG);
	}

	addrs = 0;
	TAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) {
	struct sockaddr *sa = ifa->ifa_addr;

	if (jailed(curthread->td_ucred) &&
	!prison_if(curthread->td_ucred, sa))
	continue;
	addrs++;
	#ifdef COMPAT_43
	if (cmd == OSIOCGIFCONF) {
	struct osockaddr *osa =
	(struct osockaddr *)&ifr.ifr_addr;
	ifr.ifr_addr = *sa;
	osa->sa_family = sa->sa_family;
	sbuf_bcat(sb, &ifr, sizeof(ifr));
	max_len += sizeof(ifr);
	} else
	#endif
	if (sa->sa_len <= sizeof(*sa)) {
	ifr.ifr_addr = *sa;
	sbuf_bcat(sb, &ifr, sizeof(ifr));
	max_len += sizeof(ifr);
	} else {
	sbuf_bcat(sb, &ifr,
	offsetof(struct ifreq, ifr_addr));
	max_len += offsetof(struct ifreq, ifr_addr);
	sbuf_bcat(sb, sa, sa->sa_len);
	max_len += sa->sa_len;
	}

	if (!sbuf_overflowed(sb))
	valid_len = sbuf_len(sb);
	}
	if (addrs == 0) {
	bzero((caddr_t)&ifr.ifr_addr, sizeof(ifr.ifr_addr));
	sbuf_bcat(sb, &ifr, sizeof(ifr));
	max_len += sizeof(ifr);

	if (!sbuf_overflowed(sb))
	valid_len = sbuf_len(sb);
	}
	}
	IFNET_RUNLOCK();

	/*
	* If we didn't allocate enough space (uncommon), try again. If
	* we have already allocated as much space as we are allowed,
	* return what we've got.
	*/
	if (valid_len != max_len && !full) {
	sbuf_delete(sb);
	goto again;
	}

	ifc->ifc_len = valid_len;
	#ifdef __amd64__
	if (cmd == SIOCGIFCONF32)
	ifc32->ifc_len = valid_len;
	#endif
	sbuf_finish(sb);
	error = copyout(sbuf_data(sb), ifc->ifc_req, ifc->ifc_len);
	sbuf_delete(sb);
	return (error);
	}

	/*
	* Just like ifpromisc(), but for all-multicast-reception mode.
	*/
	int
	if_allmulti(struct ifnet *ifp, int onswitch)
	{

	return (if_setflag(ifp, IFF_ALLMULTI, 0, &ifp->if_amcount, onswitch));
	}

	struct ifmultiaddr *
	if_findmulti(struct ifnet ifp, struct sockaddr sa)
	{
	struct ifmultiaddr *ifma;

	IF_ADDR_LOCK_ASSERT(ifp);

	TAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) {
	if (sa->sa_family == AF_LINK) {
	if (sa_dl_equal(ifma->ifma_addr, sa))
	break;
	} else {
	if (sa_equal(ifma->ifma_addr, sa))
	break;
	}
	}

	return ifma;
	}

	/*
	* Allocate a new ifmultiaddr and initialize based on passed arguments. We
	* make copies of passed sockaddrs. The ifmultiaddr will not be added to
	* the ifnet multicast address list here, so the caller must do that and
	* other setup work (such as notifying the device driver). The reference
	* count is initialized to 1.
	*/
	static struct ifmultiaddr *
	if_allocmulti(struct ifnet ifp, struct sockaddr sa, struct sockaddr *llsa,
	int mflags)
	{
	struct ifmultiaddr *ifma;
	struct sockaddr *dupsa;

	ifma = malloc(sizeof *ifma, M_IFMADDR, mflags \|
	M_ZERO);
	if (ifma == NULL)
	return (NULL);

	dupsa = malloc(sa->sa_len, M_IFMADDR, mflags);
	if (dupsa == NULL) {
	free(ifma, M_IFMADDR);
	return (NULL);
	}
	bcopy(sa, dupsa, sa->sa_len);
	ifma->ifma_addr = dupsa;

	ifma->ifma_ifp = ifp;
	ifma->ifma_refcount = 1;
	ifma->ifma_protospec = NULL;

	if (llsa == NULL) {
	ifma->ifma_lladdr = NULL;
	return (ifma);
	}

	dupsa = malloc(llsa->sa_len, M_IFMADDR, mflags);
	if (dupsa == NULL) {
	free(ifma->ifma_addr, M_IFMADDR);
	free(ifma, M_IFMADDR);
	return (NULL);
	}
	bcopy(llsa, dupsa, llsa->sa_len);
	ifma->ifma_lladdr = dupsa;

	return (ifma);
	}

	/*
	* if_freemulti: free ifmultiaddr structure and possibly attached related
	* addresses. The caller is responsible for implementing reference
	* counting, notifying the driver, handling routing messages, and releasing
	* any dependent link layer state.
	*/
	static void
	if_freemulti(struct ifmultiaddr *ifma)
	{

	KASSERT(ifma->ifma_refcount == 0, ("if_freemulti: refcount %d",
	ifma->ifma_refcount));
	KASSERT(ifma->ifma_protospec == NULL,
	("if_freemulti: protospec not NULL"));

	if (ifma->ifma_lladdr != NULL)
	free(ifma->ifma_lladdr, M_IFMADDR);
	free(ifma->ifma_addr, M_IFMADDR);
	free(ifma, M_IFMADDR);
	}

	/*
	* Register an additional multicast address with a network interface.
	*
	* - If the address is already present, bump the reference count on the
	* address and return.
	* - If the address is not link-layer, look up a link layer address.
	* - Allocate address structures for one or both addresses, and attach to the
	* multicast address list on the interface. If automatically adding a link
	* layer address, the protocol address will own a reference to the link
	* layer address, to be freed when it is freed.
	* - Notify the network device driver of an addition to the multicast address
	* list.
	*
	* 'sa' points to caller-owned memory with the desired multicast address.
	*
	* 'retifma' will be used to return a pointer to the resulting multicast
	* address reference, if desired.
	*/
	int
	if_addmulti(struct ifnet ifp, struct sockaddr sa,
	struct ifmultiaddr **retifma)
	{
	struct ifmultiaddr ifma, ll_ifma;
	struct sockaddr *llsa;
	int error;

	/*
	* If the address is already present, return a new reference to it;
	* otherwise, allocate storage and set up a new address.
	*/
	IF_ADDR_LOCK(ifp);
	ifma = if_findmulti(ifp, sa);
	if (ifma != NULL) {
	ifma->ifma_refcount++;
	if (retifma != NULL)
	*retifma = ifma;
	IF_ADDR_UNLOCK(ifp);
	return (0);
	}

	/*
	* The address isn't already present; resolve the protocol address
	* into a link layer address, and then look that up, bump its
	* refcount or allocate an ifma for that also. If 'llsa' was
	* returned, we will need to free it later.
	*/
	llsa = NULL;
	ll_ifma = NULL;
	if (ifp->if_resolvemulti != NULL) {
	error = ifp->if_resolvemulti(ifp, &llsa, sa);
	if (error)
	goto unlock_out;
	}

	/*
	* Allocate the new address. Don't hook it up yet, as we may also
	* need to allocate a link layer multicast address.
	*/
	ifma = if_allocmulti(ifp, sa, llsa, M_NOWAIT);
	if (ifma == NULL) {
	error = ENOMEM;
	goto free_llsa_out;
	}

	/*
	* If a link layer address is found, we'll need to see if it's
	* already present in the address list, or allocate is as well.
	* When this block finishes, the link layer address will be on the
	* list.
	*/
	if (llsa != NULL) {
	ll_ifma = if_findmulti(ifp, llsa);
	if (ll_ifma == NULL) {
	ll_ifma = if_allocmulti(ifp, llsa, NULL, M_NOWAIT);
	if (ll_ifma == NULL) {
	--ifma->ifma_refcount;
	if_freemulti(ifma);
	error = ENOMEM;
	goto free_llsa_out;
	}
	TAILQ_INSERT_HEAD(&ifp->if_multiaddrs, ll_ifma,
	ifma_link);
	} else
	ll_ifma->ifma_refcount++;
	ifma->ifma_llifma = ll_ifma;
	}

	/*
	* We now have a new multicast address, ifma, and possibly a new or
	* referenced link layer address. Add the primary address to the
	* ifnet address list.
	*/
	TAILQ_INSERT_HEAD(&ifp->if_multiaddrs, ifma, ifma_link);

	if (retifma != NULL)
	*retifma = ifma;

	/*
	* Must generate the message while holding the lock so that 'ifma'
	* pointer is still valid.
	*/
	rt_newmaddrmsg(RTM_NEWMADDR, ifma);
	IF_ADDR_UNLOCK(ifp);

	/*
	* We are certain we have added something, so call down to the
	* interface to let them know about it.
	*/
	if (ifp->if_ioctl != NULL) {
	IFF_LOCKGIANT(ifp);
	(void) (*ifp->if_ioctl)(ifp, SIOCADDMULTI, 0);
	IFF_UNLOCKGIANT(ifp);
	}

	if (llsa != NULL)
	free(llsa, M_IFMADDR);

	return (0);

	free_llsa_out:
	if (llsa != NULL)
	free(llsa, M_IFMADDR);

	unlock_out:
	IF_ADDR_UNLOCK(ifp);
	return (error);
	}

	/*
	* Delete a multicast group membership by network-layer group address.
	*
	* Returns ENOENT if the entry could not be found. If ifp no longer
	* exists, results are undefined. This entry point should only be used
	* from subsystems which do appropriate locking to hold ifp for the
	* duration of the call.
	* Network-layer protocol domains must use if_delmulti_ifma().
	*/
	int
	if_delmulti(struct ifnet ifp, struct sockaddr sa)
	{
	struct ifmultiaddr *ifma;
	int lastref;
	#ifdef INVARIANTS
	struct ifnet *oifp;
	INIT_VNET_NET(ifp->if_vnet);

	IFNET_RLOCK();
	TAILQ_FOREACH(oifp, &V_ifnet, if_link)
	if (ifp == oifp)
	break;
	if (ifp != oifp)
	ifp = NULL;
	IFNET_RUNLOCK();

	KASSERT(ifp != NULL, ("%s: ifnet went away", __func__));
	#endif
	if (ifp == NULL)
	return (ENOENT);

	IF_ADDR_LOCK(ifp);
	lastref = 0;
	ifma = if_findmulti(ifp, sa);
	if (ifma != NULL)
	lastref = if_delmulti_locked(ifp, ifma, 0);
	IF_ADDR_UNLOCK(ifp);

	if (ifma == NULL)
	return (ENOENT);

	if (lastref && ifp->if_ioctl != NULL) {
	IFF_LOCKGIANT(ifp);
	(void)(*ifp->if_ioctl)(ifp, SIOCDELMULTI, 0);
	IFF_UNLOCKGIANT(ifp);
	}

	return (0);
	}

	/*
	* Delete a multicast group membership by group membership pointer.
	* Network-layer protocol domains must use this routine.
	*
	* It is safe to call this routine if the ifp disappeared. Callers should
	* hold IFF_LOCKGIANT() to avoid a LOR in case the hardware needs to be
	* reconfigured.
	*/
	void
	if_delmulti_ifma(struct ifmultiaddr *ifma)
	{
	#ifdef DIAGNOSTIC
	INIT_VNET_NET(curvnet);
	#endif
	struct ifnet *ifp;
	int lastref;

	ifp = ifma->ifma_ifp;
	#ifdef DIAGNOSTIC
	if (ifp == NULL) {
	printf("%s: ifma_ifp seems to be detached\n", __func__);
	} else {
	struct ifnet *oifp;

	IFNET_RLOCK();
	TAILQ_FOREACH(oifp, &V_ifnet, if_link)
	if (ifp == oifp)
	break;
	if (ifp != oifp) {
	printf("%s: ifnet %p disappeared\n", __func__, ifp);
	ifp = NULL;
	}
	IFNET_RUNLOCK();
	}
	#endif
	/*
	* If and only if the ifnet instance exists: Acquire the address lock.
	*/
	if (ifp != NULL)
	IF_ADDR_LOCK(ifp);

	lastref = if_delmulti_locked(ifp, ifma, 0);

	if (ifp != NULL) {
	/*
	* If and only if the ifnet instance exists:
	* Release the address lock.
	* If the group was left: update the hardware hash filter.
	*/
	IF_ADDR_UNLOCK(ifp);
	if (lastref && ifp->if_ioctl != NULL) {
	IFF_LOCKGIANT(ifp);
	(void)(*ifp->if_ioctl)(ifp, SIOCDELMULTI, 0);
	IFF_UNLOCKGIANT(ifp);
	}
	}
	}

	/*
	* Perform deletion of network-layer and/or link-layer multicast address.
	*
	* Return 0 if the reference count was decremented.
	* Return 1 if the final reference was released, indicating that the
	* hardware hash filter should be reprogrammed.
	*/
	static int
	if_delmulti_locked(struct ifnet ifp, struct ifmultiaddr ifma, int detaching)
	{
	struct ifmultiaddr *ll_ifma;

	if (ifp != NULL && ifma->ifma_ifp != NULL) {
	KASSERT(ifma->ifma_ifp == ifp,
	("%s: inconsistent ifp %p", __func__, ifp));
	IF_ADDR_LOCK_ASSERT(ifp);
	}

	ifp = ifma->ifma_ifp;

	/*
	* If the ifnet is detaching, null out references to ifnet,
	* so that upper protocol layers will notice, and not attempt
	* to obtain locks for an ifnet which no longer exists. The
	* routing socket announcement must happen before the ifnet
	* instance is detached from the system.
	*/
	if (detaching) {
	#ifdef DIAGNOSTIC
	printf("%s: detaching ifnet instance %p\n", __func__, ifp);
	#endif
	/*
	* ifp may already be nulled out if we are being reentered
	* to delete the ll_ifma.
	*/
	if (ifp != NULL) {
	rt_newmaddrmsg(RTM_DELMADDR, ifma);
	ifma->ifma_ifp = NULL;
	}
	}

	if (--ifma->ifma_refcount > 0)
	return 0;

	/*
	* If this ifma is a network-layer ifma, a link-layer ifma may
	* have been associated with it. Release it first if so.
	*/
	ll_ifma = ifma->ifma_llifma;
	if (ll_ifma != NULL) {
	KASSERT(ifma->ifma_lladdr != NULL,
	("%s: llifma w/o lladdr", __func__));
	if (detaching)
	ll_ifma->ifma_ifp = NULL; /* XXX */
	if (--ll_ifma->ifma_refcount == 0) {
	if (ifp != NULL) {
	TAILQ_REMOVE(&ifp->if_multiaddrs, ll_ifma,
	ifma_link);
	}
	if_freemulti(ll_ifma);
	}
	}

	if (ifp != NULL)
	TAILQ_REMOVE(&ifp->if_multiaddrs, ifma, ifma_link);

	if_freemulti(ifma);

	/*
	* The last reference to this instance of struct ifmultiaddr
	* was released; the hardware should be notified of this change.
	*/
	return 1;
	}

	/*
	* Set the link layer address on an interface.
	*
	* At this time we only support certain types of interfaces,
	* and we don't allow the length of the address to change.
	*/
	int
	if_setlladdr(struct ifnet ifp, const u_char lladdr, int len)
	{
	struct sockaddr_dl *sdl;
	struct ifaddr *ifa;
	struct ifreq ifr;

	ifa = ifp->if_addr;
	if (ifa == NULL)
	return (EINVAL);
	sdl = (struct sockaddr_dl *)ifa->ifa_addr;
	if (sdl == NULL)
	return (EINVAL);
	if (len != sdl->sdl_alen) /* don't allow length to change */
	return (EINVAL);
	switch (ifp->if_type) {
	case IFT_ETHER:
	case IFT_FDDI:
	case IFT_XETHER:
	case IFT_ISO88025:
	case IFT_L2VLAN:
	case IFT_BRIDGE:
	case IFT_ARCNET:
	case IFT_IEEE8023ADLAG:
	bcopy(lladdr, LLADDR(sdl), len);
	break;
	default:
	return (ENODEV);
	}
	/*
	* If the interface is already up, we need
	* to re-init it in order to reprogram its
	* address filter.
	*/
	if ((ifp->if_flags & IFF_UP) != 0) {
	if (ifp->if_ioctl) {
	IFF_LOCKGIANT(ifp);
	ifp->if_flags &= ~IFF_UP;
	ifr.ifr_flags = ifp->if_flags & 0xffff;
	ifr.ifr_flagshigh = ifp->if_flags >> 16;
	(*ifp->if_ioctl)(ifp, SIOCSIFFLAGS, (caddr_t)&ifr);
	ifp->if_flags \|= IFF_UP;
	ifr.ifr_flags = ifp->if_flags & 0xffff;
	ifr.ifr_flagshigh = ifp->if_flags >> 16;
	(*ifp->if_ioctl)(ifp, SIOCSIFFLAGS, (caddr_t)&ifr);
	IFF_UNLOCKGIANT(ifp);
	}
	#ifdef INET
	/*
	* Also send gratuitous ARPs to notify other nodes about
	* the address change.
	*/
	TAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) {
	if (ifa->ifa_addr->sa_family == AF_INET)
	arp_ifinit(ifp, ifa);
	}
	#endif
	}
	return (0);
	}

	/*
	* The name argument must be a pointer to storage which will last as
	* long as the interface does. For physical devices, the result of
	* device_get_name(dev) is a good choice and for pseudo-devices a
	* static string works well.
	*/
	void
	if_initname(struct ifnet ifp, const char name, int unit)
	{
	ifp->if_dname = name;
	ifp->if_dunit = unit;
	if (unit != IF_DUNIT_NONE)
	snprintf(ifp->if_xname, IFNAMSIZ, "%s%d", name, unit);
	else
	strlcpy(ifp->if_xname, name, IFNAMSIZ);
	}

	int
	if_printf(struct ifnet ifp, const char fmt, ...)
	{
	va_list ap;
	int retval;

	retval = printf("%s: ", ifp->if_xname);
	va_start(ap, fmt);
	retval += vprintf(fmt, ap);
	va_end(ap);
	return (retval);
	}

	/*
	* When an interface is marked IFF_NEEDSGIANT, its if_start() routine cannot
	* be called without Giant. However, we often can't acquire the Giant lock
	* at those points; instead, we run it via a task queue that holds Giant via
	* if_start_deferred.
	*
	* XXXRW: We need to make sure that the ifnet isn't fully detached until any
	* outstanding if_start_deferred() tasks that will run after the free. This
	* probably means waiting in if_detach().
	*/
	void
	if_start(struct ifnet *ifp)
	{

	if (ifp->if_flags & IFF_NEEDSGIANT) {
	if (mtx_owned(&Giant))
	(*(ifp)->if_start)(ifp);
	else
	taskqueue_enqueue(taskqueue_swi_giant,
	&ifp->if_starttask);
	} else
	(*(ifp)->if_start)(ifp);
	}

	static void
	if_start_deferred(void *context, int pending)
	{
	struct ifnet *ifp;

	GIANT_REQUIRED;

	ifp = context;
	(ifp->if_start)(ifp);
	}

	/*
	* Backwards compatibility interface for drivers
	* that have not implemented it
	*/
	static int
	if_transmit(struct ifnet ifp, struct mbuf m)
	{
	int error;

	IFQ_HANDOFF(ifp, m, error);
	return (error);
	}

	int
	if_handoff(struct ifqueue ifq, struct mbuf m, struct ifnet *ifp, int adjust)
	{
	int active = 0;

	IF_LOCK(ifq);
	if (_IF_QFULL(ifq)) {
	_IF_DROP(ifq);
	IF_UNLOCK(ifq);
	m_freem(m);
	return (0);
	}
	if (ifp != NULL) {
	ifp->if_obytes += m->m_pkthdr.len + adjust;
	if (m->m_flags & (M_BCAST\|M_MCAST))
	ifp->if_omcasts++;
	active = ifp->if_drv_flags & IFF_DRV_OACTIVE;
	}
	_IF_ENQUEUE(ifq, m);
	IF_UNLOCK(ifq);
	if (ifp != NULL && !active)
	if_start(ifp);
	return (1);
	}

	void
	if_register_com_alloc(u_char type,
	if_com_alloc_t a, if_com_free_t f)
	{

	KASSERT(if_com_alloc[type] == NULL,
	("if_register_com_alloc: %d already registered", type));
	KASSERT(if_com_free[type] == NULL,
	("if_register_com_alloc: %d free already registered", type));

	if_com_alloc[type] = a;
	if_com_free[type] = f;
	}

	void
	if_deregister_com_alloc(u_char type)
	{

	KASSERT(if_com_alloc[type] != NULL,
	("if_deregister_com_alloc: %d not registered", type));
	KASSERT(if_com_free[type] != NULL,
	("if_deregister_com_alloc: %d free not registered", type));
	if_com_alloc[type] = NULL;
	if_com_free[type] = NULL;
	}
	Index: projects/arpv2_merge_1/sys/net/if_llatbl.c
	===================================================================
	--- projects/arpv2_merge_1/sys/net/if_llatbl.c (revision 185838)
	+++ projects/arpv2_merge_1/sys/net/if_llatbl.c (revision 185839)
	@@ -1,298 +1,306 @@
	/*
	* Copyright (c) 2007 Qing Li, Luigi Rizzo, Alessandro Cerri. All rights reserved.
	*
	* Redistribution and use in source and binary forms, with or without
	* modification, are permitted provided that the following conditions
	* are met:
	* 1. Redistributions of source code must retain the above copyright
	* notice, this list of conditions and the following disclaimer.
	* 2. Redistributions in binary form must reproduce the above copyright
	* notice, this list of conditions and the following disclaimer in the
	* documentation and/or other materials provided with the distribution.
	*
	* THIS SOFTWARE IS PROVIDED BY AUTHOR AND CONTRIBUTORS ``AS IS'' AND
	* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
	* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
	* ARE DISCLAIMED. IN NO EVENT SHALL AUTHOR OR CONTRIBUTORS BE LIABLE
	* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
	* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
	* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
	* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
	* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
	* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
	* SUCH DAMAGE.
	*/
	#include <sys/cdefs.h>
	__FBSDID("$FreeBSD$");

	#include "opt_inet.h"
	#include "opt_inet6.h"

	#include <sys/param.h>
	#include <sys/systm.h>
	#include <sys/malloc.h>
	#include <sys/mbuf.h>
	#include <sys/syslog.h>
	#include <sys/sysctl.h>
	#include <sys/socket.h>
	#include <sys/kernel.h>
	#include <sys/mutex.h>
	+#include <sys/rwlock.h>
	#include <sys/vimage.h>

	#include <vm/uma.h>

	#include <netinet/in.h>
	#include <net/if_llatbl.h>
	#include <net/if.h>
	#include <net/if_dl.h>
	#include <net/if_var.h>
	#include <net/route.h>
	#include <netinet/if_ether.h>
	#include <netinet6/in6_var.h>
	#include <netinet6/nd6.h>

	MALLOC_DEFINE(M_LLTABLE, "lltable", "link level address tables");

	static SLIST_HEAD(, lltable) lltables = SLIST_HEAD_INITIALIZER(lltables);

	extern void arprequest(struct ifnet , struct in_addr , struct in_addr *,
	u_char *);

	/*
	* Dump arp state for a specific address family.
	*/
	int
	lltable_sysctl_dumparp(int af, struct sysctl_req *wr)
	{
	struct lltable *llt;
	int error = 0;

	IFNET_RLOCK();
	SLIST_FOREACH(llt, &lltables, llt_link) {
	if (llt->llt_af == af) {
	error = llt->llt_dump(llt, wr);
	if (error != 0)
	goto done;
	}
	}
	done:
	IFNET_RUNLOCK();
	return (error);
	}

	/*
	* Deletes an address from the address table.
	* This function is called by the timer functions
	* such as arptimer() and nd6_llinfo_timer(), and
	* the caller does the locking.
	*/
	void
	llentry_free(struct llentry *lle)
	{
	- struct lltable *llt = lle->lle_tbl;

	+ LLE_WLOCK(lle);
	LIST_REMOVE(lle, lle_next);

	if (lle->la_hold != NULL)
	m_freem(lle->la_hold);
	- llt->llt_free(llt, lle);
	+
	+ LLE_FREE_LOCKED(lle);
	}

	/*
	* Free all entries from given table and free itself.
	* Since lltables collects from all of the intefaces,
	* the caller of this function must acquire IFNET_WLOCK().
	*/
	void
	lltable_free(struct lltable *llt)
	{
	struct llentry lle, next;
	int i;

	KASSERT(llt != NULL, ("%s: llt is NULL", __func__));

	IFNET_WLOCK();
	SLIST_REMOVE(&lltables, llt, lltable, llt_link);
	IFNET_WUNLOCK();

	for (i=0; i < LLTBL_HASHTBL_SIZE; i++) {
	LIST_FOREACH_SAFE(lle, &llt->lle_head[i], lle_next, next) {
	callout_drain(&lle->la_timer);
	llentry_free(lle);
	}
	}

	free(llt, M_LLTABLE);
	}

	void
	lltable_drain(int af)
	{
	struct lltable *llt;
	struct llentry *lle;
	register int i;

	IFNET_RLOCK();
	SLIST_FOREACH(llt, &lltables, llt_link) {
	if (llt->llt_af != af)
	continue;

	for (i=0; i < LLTBL_HASHTBL_SIZE; i++) {
	LIST_FOREACH(lle, &llt->lle_head[i], lle_next) {
	if (lle->la_hold) {
	m_freem(lle->la_hold);
	lle->la_hold = NULL;
	}
	}
	}
	}
	IFNET_RUNLOCK();
	}

	/*
	* Create a new lltable.
	*/
	struct lltable *
	lltable_init(struct ifnet *ifp, int af)
	{
	struct lltable *llt;
	register int i;

	llt = malloc(sizeof(struct lltable), M_LLTABLE, M_WAITOK);
	if (llt == NULL)
	return (NULL);

	llt->llt_af = af;
	llt->llt_ifp = ifp;
	for (i = 0; i < LLTBL_HASHTBL_SIZE; i++)
	LIST_INIT(&llt->lle_head[i]);

	IFNET_WLOCK();
	SLIST_INSERT_HEAD(&lltables, llt, llt_link);
	IFNET_WUNLOCK();

	return (llt);
	}

	/*
	* Called in route_output when adding/deleting a route to an interface.
	*/
	int
	lla_rt_output(struct rt_msghdr rtm, struct rt_addrinfo info)
	{
	struct sockaddr_dl *dl =
	(struct sockaddr_dl *)info->rti_info[RTAX_GATEWAY];
	struct sockaddr dst = (struct sockaddr )info->rti_info[RTAX_DST];
	struct ifnet *ifp;
	struct lltable *llt;
	struct llentry *lle;
	- u_int flags = 0;
	+ u_int laflags = 0, flags = 0;
	+ int error = 0;

	if (dl == NULL \|\| dl->sdl_family != AF_LINK) {
	log(LOG_INFO, "%s: invalid dl\n", __func__);
	return EINVAL;
	}
	ifp = ifnet_byindex(dl->sdl_index);
	if (ifp == NULL) {
	log(LOG_INFO, "%s: invalid ifp (sdl_index %d)\n",
	__func__, dl->sdl_index);
	return EINVAL;
	}

	switch (rtm->rtm_type) {
	case RTM_ADD:
	if (rtm->rtm_flags & RTF_ANNOUNCE) {
	flags \|= LLE_PUB;
	#ifdef INET
	if (dst->sa_family == AF_INET &&
	((struct sockaddr_inarp *)dst)->sin_other != 0) {
	struct rtentry *rt = rtalloc1(dst, 0, 0);
	if (rt == NULL \|\| !(rt->rt_flags & RTF_HOST)) {
	log(LOG_INFO, "%s: RTM_ADD publish "
	"(proxy only) is invalid\n",
	__func__);
	- rtfree(rt);
	+ RTFREE(rt);
	return EINVAL;
	}
	- rtfree(rt);
	+ RTFREE(rt);

	flags \|= LLE_PROXY;
	}
	#endif
	}
	flags \|= LLE_CREATE;
	break;

	case RTM_DELETE:
	flags \|= LLE_DELETE;
	break;

	case RTM_CHANGE:
	break;

	default:
	return EINVAL; /* XXX not implemented yet */
	}

	/*
	* XXXXXXXX:
	* REVISE this approach if possible.
	*/
	- IFNET_WLOCK();
	+ IFNET_RLOCK();
	SLIST_FOREACH(llt, &lltables, llt_link) {
	if (llt->llt_af == dst->sa_family &&
	llt->llt_ifp == ifp)
	break;
	}
	- IFNET_WUNLOCK();
	+ IFNET_RUNLOCK();
	KASSERT(llt != NULL, ("Yep, ugly hacks are bad\n"));

	+ if (flags && LLE_CREATE)
	+ flags \|= LLE_EXCLUSIVE;
	+
	IF_AFDATA_LOCK(ifp);
	lle = lla_lookup(llt, flags, dst);
	+ IF_AFDATA_UNLOCK(ifp);
	if (lle != NULL) {
	if (flags & LLE_CREATE) {
	/* qing: if we delay the delete, then if a subsequent
	* "arp add" on the same host should look up this entry,
	* reset the LLE_DELETED flag, and reset the expiration timer
	*/
	bcopy(LLADDR(dl), &lle->ll_addr, ifp->if_addrlen);
	lle->la_flags \|= LLE_VALID;
	lle->la_flags &= ~LLE_DELETED;
	#ifdef INET6
	/*
	* ND6
	*/
	if (dst->sa_family == AF_INET6)
	lle->ln_state = ND6_LLINFO_REACHABLE;
	#endif
	/*
	* "arp" and "ndp" always sets the (RTF_STATIC \| RTF_HOST) flags
	*/
	+
	if (rtm->rtm_rmx.rmx_expire == 0) {
	lle->la_flags \|= LLE_STATIC;
	lle->la_expire = 0;
	} else
	lle->la_expire = rtm->rtm_rmx.rmx_expire;
	+ laflags = lle->la_flags;
	+ LLE_WUNLOCK(lle);
	#ifdef INET
	/* gratuious ARP */
	- if ((lle->la_flags & LLE_PUB) &&
	+ if ((laflags & LLE_PUB) &&
	dst->sa_family == AF_INET) {
	arprequest(ifp,
	&((struct sockaddr_in *)dst)->sin_addr,
	&((struct sockaddr_in *)dst)->sin_addr,
	- ((lle->la_flags & LLE_PROXY) ?
	+ ((laflags & LLE_PROXY) ?
	(u_char *)IF_LLADDR(ifp) :
	(u_char *)LLADDR(dl)));
	}
	#endif
	- }
	+ } else
	+ LLE_RUNLOCK(lle);
	} else {
	- if (flags & LLE_DELETE) {
	- IF_AFDATA_UNLOCK(ifp);
	- return EINVAL;
	- }
	+ if (flags & LLE_DELETE)
	+ error = EINVAL;
	}

	- IF_AFDATA_UNLOCK(ifp);
	- return 0;
	+ return (error);
	}
	Index: projects/arpv2_merge_1/sys/net/if_llatbl.h
	===================================================================
	--- projects/arpv2_merge_1/sys/net/if_llatbl.h (revision 185838)
	+++ projects/arpv2_merge_1/sys/net/if_llatbl.h (revision 185839)
	@@ -1,131 +1,187 @@
	/*
	* Copyright (c) 2007 Qing Li, Luigi Rizzo, Alessandro Cerri. All rights reserved.
	*
	* Redistribution and use in source and binary forms, with or without
	* modification, are permitted provided that the following conditions
	* are met:
	* 1. Redistributions of source code must retain the above copyright
	* notice, this list of conditions and the following disclaimer.
	* 2. Redistributions in binary form must reproduce the above copyright
	* notice, this list of conditions and the following disclaimer in the
	* documentation and/or other materials provided with the distribution.
	*
	* THIS SOFTWARE IS PROVIDED BY AUTHOR AND CONTRIBUTORS ``AS IS'' AND
	* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
	* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
	* ARE DISCLAIMED. IN NO EVENT SHALL AUTHOR OR CONTRIBUTORS BE LIABLE
	* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
	* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
	* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
	* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
	* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
	* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
	* SUCH DAMAGE.
	*/
	#include <sys/cdefs.h>
	__FBSDID("$FreeBSD$");

	#ifndef _NET_IF_LLATBL_H_
	#define _NET_IF_LLATBL_H_

	+#include <sys/_rwlock.h>
	#include <netinet/in.h>

	struct ifnet;
	struct sysctl_req;
	struct rt_msghdr;
	struct rt_addrinfo;

	struct llentry;
	LIST_HEAD(llentries, llentry);

	+/*
	+ * Code referencing llentry must at least hold
	+ * a shared lock
	+ */
	struct llentry {
	LIST_ENTRY(llentry) lle_next;
	+ struct rwlock lle_lock;
	struct lltable *lle_tbl;
	struct llentries *lle_head;
	struct mbuf *la_hold;
	time_t la_expire;
	uint16_t la_flags;
	uint16_t la_asked;
	uint16_t la_preempt;
	uint16_t ln_byhint;
	int16_t ln_state; /* IPv6 has ND6_LLINFO_NOSTATE == -2 */
	uint16_t ln_router;
	time_t ln_ntick;
	+ int lle_refcnt;
	+
	union {
	uint64_t mac_aligned;
	uint16_t mac16[3];
	} ll_addr;

	/* XXX af-private? */
	union {
	struct callout ln_timer_ch;
	struct callout la_timer;
	} lle_timer;
	/* NB: struct sockaddr must immediately follow */
	};

	+#define LLE_WLOCK(lle) rw_wlock(&(lle)->lle_lock)
	+#define LLE_RLOCK(lle) rw_rlock(&(lle)->lle_lock)
	+#define LLE_WUNLOCK(lle) rw_wunlock(&(lle)->lle_lock)
	+#define LLE_RUNLOCK(lle) rw_runlock(&(lle)->lle_lock)
	+#define LLE_DOWNGRADE(lle) rw_downgrade(&(lle)->lle_lock)
	+#define LLE_TRY_UPGRADE(lle) rw_try_upgrade(&(lle)->lle_lock)
	+#define LLE_LOCK_INIT(lle) rw_init_flags(&(lle)->lle_lock, "lle", RW_DUPOK)
	+#define LLE_WLOCK_ASSERT(lle) rw_assert(&(lle)->lle_lock, RA_WLOCKED)
	+
	+#define LLE_ADDREF(lle) do { \
	+ LLE_WLOCK_ASSERT(lle); \
	+ KASSERT((lle)->lle_refcnt >= 0, \
	+ ("negative refcnt %d", (lle)->lle_refcnt)); \
	+ (lle)->lle_refcnt++; \
	+} while (0)
	+
	+#define LLE_REMREF(lle) do { \
	+ LLE_WLOCK_ASSERT(lle); \
	+ KASSERT((lle)->rt_refcnt > 0, \
	+ ("bogus refcnt %ld", (lle)->rt_refcnt)); \
	+ (lle)->rt_refcnt--; \
	+} while (0)
	+
	+#define LLE_FREE_LOCKED(lle) do { \
	+ if ((lle)->lle_refcnt <= 1) \
	+ (lle)->lle_tbl->llt_free((lle)->lle_tbl, (lle));\
	+ else { \
	+ (lle)->lle_refcnt--; \
	+ LLE_WUNLOCK(lle); \
	+ } \
	+ /* guard against invalid refs */ \
	+ lle = 0; \
	+} while (0)
	+
	+#define LLE_FREE(lle) do { \
	+ LLE_WLOCK(lle); \
	+ if ((lle)->lle_refcnt <= 1) \
	+ (lle)->lle_tbl->llt_free((lle)->lle_tbl, (lle));\
	+ else { \
	+ (lle)->lle_refcnt--; \
	+ LLE_WUNLOCK(lle); \
	+ } \
	+ /* guard against invalid refs */ \
	+ lle = 0; \
	+} while (0)
	+
	+
	#define ln_timer_ch lle_timer.ln_timer_ch
	#define la_timer lle_timer.la_timer

	/* XXX bad name */
	#define L3_ADDR(lle) ((struct sockaddr *)(&lle[1]))
	#define L3_ADDR_LEN(lle) (((struct sockaddr *)(&lle[1]))->sa_len)

	#ifndef LLTBL_HASHTBL_SIZE
	#define LLTBL_HASHTBL_SIZE 32 /* default 32 ? */
	#endif

	#ifndef LLTBL_HASHMASK
	#define LLTBL_HASHMASK (LLTBL_HASHTBL_SIZE - 1)
	#endif

	struct lltable {
	SLIST_ENTRY(lltable) llt_link;
	struct llentries lle_head[LLTBL_HASHTBL_SIZE];
	int llt_af;
	struct ifnet *llt_ifp;

	struct llentry * (llt_new)(const struct sockaddr , u_int);
	void (llt_free)(struct lltable , struct llentry *);
	struct llentry * (llt_lookup)(struct lltable , u_int flags,
	const struct sockaddr *l3addr);
	int (llt_rtcheck)(struct ifnet ,
	const struct sockaddr *);
	int (llt_dump)(struct lltable ,
	struct sysctl_req *);
	};
	MALLOC_DECLARE(M_LLTABLE);

	/*
	* flags to be passed to arplookup.
	*/
	#define LLE_DELETED 0x0001 /* entry must be deleted */
	#define LLE_STATIC 0x0002 /* entry is static */
	#define LLE_IFADDR 0x0004 /* entry is interface addr */
	#define LLE_VALID 0x0008 /* ll_addr is valid */
	#define LLE_PROXY 0x0010 /* proxy entry ??? */
	#define LLE_PUB 0x0020 /* publish entry ??? */
	-#define LLE_CREATE 0x8000 /* create on a lookup miss */
	#define LLE_DELETE 0x4000 /* delete on a lookup - match LLE_IFADDR */
	+#define LLE_CREATE 0x8000 /* create on a lookup miss */
	+#define LLE_EXCLUSIVE 0x2000 /* return lle xlocked */

	#define LLATBL_HASH(key, mask) \
	(((((((key >> 8) ^ key) >> 8) ^ key) >> 8) ^ key) & mask)

	struct lltable lltable_init(struct ifnet , int);
	void lltable_free(struct lltable *);
	void lltable_drain(int);
	int lltable_sysctl_dumparp(int, struct sysctl_req *);

	void llentry_free(struct llentry *);

	/*
	* Generic link layer address lookup function.
	*/
	static __inline struct llentry *
	lla_lookup(struct lltable llt, u_int flags, const struct sockaddr l3addr)
	{
	return llt->llt_lookup(llt, flags, l3addr);
	}

	int lla_rt_output(struct rt_msghdr , struct rt_addrinfo );
	#endif /* _NET_IF_LLATBL_H_ */
	Index: projects/arpv2_merge_1/sys/net/if_var.h
	===================================================================
	--- projects/arpv2_merge_1/sys/net/if_var.h (revision 185838)
	+++ projects/arpv2_merge_1/sys/net/if_var.h (revision 185839)
	@@ -1,726 +1,728 @@
	/*-
	* Copyright (c) 1982, 1986, 1989, 1993
	* The Regents of the University of California. All rights reserved.
	*
	* Redistribution and use in source and binary forms, with or without
	* modification, are permitted provided that the following conditions
	* are met:
	* 1. Redistributions of source code must retain the above copyright
	* notice, this list of conditions and the following disclaimer.
	* 2. Redistributions in binary form must reproduce the above copyright
	* notice, this list of conditions and the following disclaimer in the
	* documentation and/or other materials provided with the distribution.
	* 4. Neither the name of the University nor the names of its contributors
	* may be used to endorse or promote products derived from this software
	* without specific prior written permission.
	*
	* THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
	* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
	* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
	* ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
	* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
	* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
	* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
	* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
	* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
	* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
	* SUCH DAMAGE.
	*
	* From: @(#)if.h 8.1 (Berkeley) 6/10/93
	* $FreeBSD$
	*/

	#ifndef _NET_IF_VAR_H_
	#define _NET_IF_VAR_H_

	/*
	* Structures defining a network interface, providing a packet
	* transport mechanism (ala level 0 of the PUP protocols).
	*
	* Each interface accepts output datagrams of a specified maximum
	* length, and provides higher level routines with input datagrams
	* received from its medium.
	*
	* Output occurs when the routine if_output is called, with three parameters:
	* (*ifp->if_output)(ifp, m, dst, rt)
	* Here m is the mbuf chain to be sent and dst is the destination address.
	* The output routine encapsulates the supplied datagram if necessary,
	* and then transmits it on its medium.
	*
	* On input, each interface unwraps the data received by it, and either
	* places it on the input queue of an internetwork datagram routine
	* and posts the associated software interrupt, or passes the datagram to a raw
	* packet input routine.
	*
	* Routines exist for locating interfaces by their addresses
	* or for locating an interface on a certain network, as well as more general
	* routing and gateway routines maintaining information used to locate
	* interfaces. These routines live in the files if.c and route.c
	*/

	#ifdef __STDC__
	/*
	* Forward structure declarations for function prototypes [sic].
	*/
	struct mbuf;
	struct thread;
	struct rtentry;
	struct rt_addrinfo;
	struct socket;
	struct ether_header;
	struct lltable;
	struct carp_if;
	struct ifvlantrunk;
	#endif

	#include <sys/queue.h> /* get TAILQ macros */

	#ifdef _KERNEL
	#include <sys/mbuf.h>
	#include <sys/eventhandler.h>
	#endif /* _KERNEL */
	#include <sys/lock.h> /* XXX */
	#include <sys/mutex.h> /* XXX */
	#include <sys/event.h> /* XXX */
	#include <sys/_task.h>

	#define IF_DUNIT_NONE -1

	#include <altq/if_altq.h>

	TAILQ_HEAD(ifnethead, ifnet); /* we use TAILQs so that the order of */
	TAILQ_HEAD(ifaddrhead, ifaddr); /* instantiation is preserved in the list */
	TAILQ_HEAD(ifprefixhead, ifprefix);
	TAILQ_HEAD(ifmultihead, ifmultiaddr);
	TAILQ_HEAD(ifgrouphead, ifg_group);

	/*
	* Structure defining a queue for a network interface.
	*/
	struct ifqueue {
	struct mbuf *ifq_head;
	struct mbuf *ifq_tail;
	int ifq_len;
	int ifq_maxlen;
	int ifq_drops;
	struct mtx ifq_mtx;
	};

	/*
	* Structure defining a network interface.
	*
	* (Would like to call this struct ``if'', but C isn't PL/1.)
	*/

	struct ifnet {
	void if_softc; / pointer to driver state */
	void if_l2com; / pointer to protocol bits */
	TAILQ_ENTRY(ifnet) if_link; /* all struct ifnets are chained */
	char if_xname[IFNAMSIZ]; /* external name (name + unit) */
	const char if_dname; / driver name */
	int if_dunit; /* unit or IF_DUNIT_NONE */
	struct ifaddrhead if_addrhead; /* linked list of addresses per if */
	/*
	* if_addrhead is the list of all addresses associated to
	* an interface.
	* Some code in the kernel assumes that first element
	* of the list has type AF_LINK, and contains sockaddr_dl
	* addresses which store the link-level address and the name
	* of the interface.
	* However, access to the AF_LINK address through this
	* field is deprecated. Use if_addr or ifaddr_byindex() instead.
	*/
	struct knlist if_klist; /* events attached to this if */
	int if_pcount; /* number of promiscuous listeners */
	struct carp_if if_carp; / carp interface structure */
	struct bpf_if if_bpf; / packet filter structure */
	u_short if_index; /* numeric abbreviation for this if */
	short if_timer; /* time 'til if_watchdog called */
	struct ifvlantrunk if_vlantrunk; / pointer to 802.1q data */
	int if_flags; /* up/down, broadcast, etc. */
	int if_capabilities; /* interface features & capabilities */
	int if_capenable; /* enabled features & capabilities */
	void if_linkmib; / link-type-specific MIB data */
	size_t if_linkmiblen; /* length of above data */
	struct if_data if_data;
	struct ifmultihead if_multiaddrs; /* multicast addresses configured */
	int if_amcount; /* number of all-multicast requests */
	/* procedure handles */
	int (if_output) / output routine (enqueue) */
	(struct ifnet , struct mbuf , struct sockaddr *,
	struct rtentry *);
	void (if_input) / input routine (from h/w driver) */
	(struct ifnet , struct mbuf );
	void (if_start) / initiate output routine */
	(struct ifnet *);
	int (if_ioctl) / ioctl routine */
	(struct ifnet *, u_long, caddr_t);
	void (if_watchdog) / timer routine */
	(struct ifnet *);
	void (if_init) / Init routine */
	(void *);
	int (if_resolvemulti) / validate/resolve multicast */
	(struct ifnet , struct sockaddr , struct sockaddr );
	struct ifaddr if_addr; / pointer to link-level address */
	void if_llsoftc; / link layer softc */
	int if_drv_flags; /* driver-managed status flags */
	u_int if_spare_flags2; /* spare flags 2 */
	struct ifaltq if_snd; /* output queue (includes altq) */
	const u_int8_t if_broadcastaddr; / linklevel broadcast bytestring */

	void if_bridge; / bridge glue */

	struct label if_label; / interface MAC label */

	/* these are only used by IPv6 */
	struct ifprefixhead if_prefixhead; /* list of prefixes per if */
	void *if_afdata[AF_MAX];
	int if_afdata_initialized;
	struct mtx if_afdata_mtx;
	struct task if_starttask; /* task for IFF_NEEDSGIANT */
	struct task if_linktask; /* task for link change events */
	struct mtx if_addr_mtx; /* mutex to protect address lists */

	LIST_ENTRY(ifnet) if_clones; /* interfaces of a cloner */
	TAILQ_HEAD(, ifg_list) if_groups; /* linked list of groups per if */
	/* protected by if_addr_mtx */
	void *if_pf_kif;
	void if_lagg; / lagg glue */
	void if_pspare[8]; / multiq/TOE 3; vimage 3; general use 4 */
	void (if_qflush) / flush any queues */
	(struct ifnet *);
	int (if_transmit) / initiate output routine */
	(struct ifnet , struct mbuf );
	int if_ispare[2]; /* general use 2 */
	};

	typedef void if_init_f_t(void *);

	/*
	* XXX These aliases are terribly dangerous because they could apply
	* to anything.
	*/
	#define if_mtu if_data.ifi_mtu
	#define if_type if_data.ifi_type
	#define if_physical if_data.ifi_physical
	#define if_addrlen if_data.ifi_addrlen
	#define if_hdrlen if_data.ifi_hdrlen
	#define if_metric if_data.ifi_metric
	#define if_link_state if_data.ifi_link_state
	#define if_baudrate if_data.ifi_baudrate
	#define if_hwassist if_data.ifi_hwassist
	#define if_ipackets if_data.ifi_ipackets
	#define if_ierrors if_data.ifi_ierrors
	#define if_opackets if_data.ifi_opackets
	#define if_oerrors if_data.ifi_oerrors
	#define if_collisions if_data.ifi_collisions
	#define if_ibytes if_data.ifi_ibytes
	#define if_obytes if_data.ifi_obytes
	#define if_imcasts if_data.ifi_imcasts
	#define if_omcasts if_data.ifi_omcasts
	#define if_iqdrops if_data.ifi_iqdrops
	#define if_noproto if_data.ifi_noproto
	#define if_lastchange if_data.ifi_lastchange
	#define if_rawoutput(if, m, sa) if_output(if, m, sa, (struct rtentry *)NULL)

	/* for compatibility with other BSDs */
	#define if_addrlist if_addrhead
	#define if_list if_link
	#define if_name(ifp) ((ifp)->if_xname)

	/*
	* Locks for address lists on the network interface.
	*/
	#define IF_ADDR_LOCK_INIT(if) mtx_init(&(if)->if_addr_mtx, \
	"if_addr_mtx", NULL, MTX_DEF)
	#define IF_ADDR_LOCK_DESTROY(if) mtx_destroy(&(if)->if_addr_mtx)
	#define IF_ADDR_LOCK(if) mtx_lock(&(if)->if_addr_mtx)
	#define IF_ADDR_UNLOCK(if) mtx_unlock(&(if)->if_addr_mtx)
	#define IF_ADDR_LOCK_ASSERT(if) mtx_assert(&(if)->if_addr_mtx, MA_OWNED)

	/*
	* Output queues (ifp->if_snd) and slow device input queues (*ifp->if_slowq)
	* are queues of messages stored on ifqueue structures
	* (defined above). Entries are added to and deleted from these structures
	* by these macros, which should be called with ipl raised to splimp().
	*/
	#define IF_LOCK(ifq) mtx_lock(&(ifq)->ifq_mtx)
	#define IF_UNLOCK(ifq) mtx_unlock(&(ifq)->ifq_mtx)
	#define IF_LOCK_ASSERT(ifq) mtx_assert(&(ifq)->ifq_mtx, MA_OWNED)
	#define _IF_QFULL(ifq) ((ifq)->ifq_len >= (ifq)->ifq_maxlen)
	#define _IF_DROP(ifq) ((ifq)->ifq_drops++)
	#define _IF_QLEN(ifq) ((ifq)->ifq_len)

	#define _IF_ENQUEUE(ifq, m) do { \
	(m)->m_nextpkt = NULL; \
	if ((ifq)->ifq_tail == NULL) \
	(ifq)->ifq_head = m; \
	else \
	(ifq)->ifq_tail->m_nextpkt = m; \
	(ifq)->ifq_tail = m; \
	(ifq)->ifq_len++; \
	} while (0)

	#define IF_ENQUEUE(ifq, m) do { \
	IF_LOCK(ifq); \
	_IF_ENQUEUE(ifq, m); \
	IF_UNLOCK(ifq); \
	} while (0)

	#define _IF_PREPEND(ifq, m) do { \
	(m)->m_nextpkt = (ifq)->ifq_head; \
	if ((ifq)->ifq_tail == NULL) \
	(ifq)->ifq_tail = (m); \
	(ifq)->ifq_head = (m); \
	(ifq)->ifq_len++; \
	} while (0)

	#define IF_PREPEND(ifq, m) do { \
	IF_LOCK(ifq); \
	_IF_PREPEND(ifq, m); \
	IF_UNLOCK(ifq); \
	} while (0)

	#define _IF_DEQUEUE(ifq, m) do { \
	(m) = (ifq)->ifq_head; \
	if (m) { \
	if (((ifq)->ifq_head = (m)->m_nextpkt) == NULL) \
	(ifq)->ifq_tail = NULL; \
	(m)->m_nextpkt = NULL; \
	(ifq)->ifq_len--; \
	} \
	} while (0)

	#define IF_DEQUEUE(ifq, m) do { \
	IF_LOCK(ifq); \
	_IF_DEQUEUE(ifq, m); \
	IF_UNLOCK(ifq); \
	} while (0)

	#define _IF_POLL(ifq, m) ((m) = (ifq)->ifq_head)
	#define IF_POLL(ifq, m) _IF_POLL(ifq, m)

	#define _IF_DRAIN(ifq) do { \
	struct mbuf *m; \
	for (;;) { \
	_IF_DEQUEUE(ifq, m); \
	if (m == NULL) \
	break; \
	m_freem(m); \
	} \
	} while (0)

	#define IF_DRAIN(ifq) do { \
	IF_LOCK(ifq); \
	_IF_DRAIN(ifq); \
	IF_UNLOCK(ifq); \
	} while(0)

	#ifdef _KERNEL
	/* interface address change event */
	typedef void (ifaddr_event_handler_t)(void , struct ifnet *);
	EVENTHANDLER_DECLARE(ifaddr_event, ifaddr_event_handler_t);
	/* new interface arrival event */
	typedef void (ifnet_arrival_event_handler_t)(void , struct ifnet *);
	EVENTHANDLER_DECLARE(ifnet_arrival_event, ifnet_arrival_event_handler_t);
	/* interface departure event */
	typedef void (ifnet_departure_event_handler_t)(void , struct ifnet *);
	EVENTHANDLER_DECLARE(ifnet_departure_event, ifnet_departure_event_handler_t);

	/*
	* interface groups
	*/
	struct ifg_group {
	char ifg_group[IFNAMSIZ];
	u_int ifg_refcnt;
	void *ifg_pf_kif;
	TAILQ_HEAD(, ifg_member) ifg_members;
	TAILQ_ENTRY(ifg_group) ifg_next;
	};

	struct ifg_member {
	TAILQ_ENTRY(ifg_member) ifgm_next;
	struct ifnet *ifgm_ifp;
	};

	struct ifg_list {
	struct ifg_group *ifgl_group;
	TAILQ_ENTRY(ifg_list) ifgl_next;
	};

	/* group attach event */
	typedef void (group_attach_event_handler_t)(void , struct ifg_group *);
	EVENTHANDLER_DECLARE(group_attach_event, group_attach_event_handler_t);
	/* group detach event */
	typedef void (group_detach_event_handler_t)(void , struct ifg_group *);
	EVENTHANDLER_DECLARE(group_detach_event, group_detach_event_handler_t);
	/* group change event */
	typedef void (group_change_event_handler_t)(void , const char *);
	EVENTHANDLER_DECLARE(group_change_event, group_change_event_handler_t);

	#define IF_AFDATA_LOCK_INIT(ifp) \
	- mtx_init(&(ifp)->if_afdata_mtx, "if_afdata", NULL, \
	- (MTX_DEF \| MTX_RECURSE))
	+ mtx_init(&(ifp)->if_afdata_mtx, "if_afdata", NULL, MTX_DEF)
	#define IF_AFDATA_LOCK(ifp) mtx_lock(&(ifp)->if_afdata_mtx)
	#define IF_AFDATA_TRYLOCK(ifp) mtx_trylock(&(ifp)->if_afdata_mtx)
	#define IF_AFDATA_UNLOCK(ifp) mtx_unlock(&(ifp)->if_afdata_mtx)
	#define IF_AFDATA_DESTROY(ifp) mtx_destroy(&(ifp)->if_afdata_mtx)
	+
	+#define IF_AFDATA_LOCK_ASSERT(ifp) mtx_assert(&(ifp)->if_afdata_mtx, MA_OWNED)
	+#define IF_AFDATA_UNLOCK_ASSERT(ifp) mtx_assert(&(ifp)->if_afdata_mtx, MA_NOTOWNED)

	#define IFF_LOCKGIANT(ifp) do { \
	if ((ifp)->if_flags & IFF_NEEDSGIANT) \
	mtx_lock(&Giant); \
	} while (0)

	#define IFF_UNLOCKGIANT(ifp) do { \
	if ((ifp)->if_flags & IFF_NEEDSGIANT) \
	mtx_unlock(&Giant); \
	} while (0)

	int if_handoff(struct ifqueue ifq, struct mbuf m, struct ifnet *ifp,
	int adjust);
	#define IF_HANDOFF(ifq, m, ifp) \
	if_handoff((struct ifqueue *)ifq, m, ifp, 0)
	#define IF_HANDOFF_ADJ(ifq, m, ifp, adj) \
	if_handoff((struct ifqueue *)ifq, m, ifp, adj)

	void if_start(struct ifnet *);

	#define IFQ_ENQUEUE(ifq, m, err) \
	do { \
	IF_LOCK(ifq); \
	if (ALTQ_IS_ENABLED(ifq)) \
	ALTQ_ENQUEUE(ifq, m, NULL, err); \
	else { \
	if (_IF_QFULL(ifq)) { \
	m_freem(m); \
	(err) = ENOBUFS; \
	} else { \
	_IF_ENQUEUE(ifq, m); \
	(err) = 0; \
	} \
	} \
	if (err) \
	(ifq)->ifq_drops++; \
	IF_UNLOCK(ifq); \
	} while (0)

	#define IFQ_DEQUEUE_NOLOCK(ifq, m) \
	do { \
	if (TBR_IS_ENABLED(ifq)) \
	(m) = tbr_dequeue_ptr(ifq, ALTDQ_REMOVE); \
	else if (ALTQ_IS_ENABLED(ifq)) \
	ALTQ_DEQUEUE(ifq, m); \
	else \
	_IF_DEQUEUE(ifq, m); \
	} while (0)

	#define IFQ_DEQUEUE(ifq, m) \
	do { \
	IF_LOCK(ifq); \
	IFQ_DEQUEUE_NOLOCK(ifq, m); \
	IF_UNLOCK(ifq); \
	} while (0)

	#define IFQ_POLL_NOLOCK(ifq, m) \
	do { \
	if (TBR_IS_ENABLED(ifq)) \
	(m) = tbr_dequeue_ptr(ifq, ALTDQ_POLL); \
	else if (ALTQ_IS_ENABLED(ifq)) \
	ALTQ_POLL(ifq, m); \
	else \
	_IF_POLL(ifq, m); \
	} while (0)

	#define IFQ_POLL(ifq, m) \
	do { \
	IF_LOCK(ifq); \
	IFQ_POLL_NOLOCK(ifq, m); \
	IF_UNLOCK(ifq); \
	} while (0)

	#define IFQ_PURGE_NOLOCK(ifq) \
	do { \
	if (ALTQ_IS_ENABLED(ifq)) { \
	ALTQ_PURGE(ifq); \
	} else \
	_IF_DRAIN(ifq); \
	} while (0)

	#define IFQ_PURGE(ifq) \
	do { \
	IF_LOCK(ifq); \
	IFQ_PURGE_NOLOCK(ifq); \
	IF_UNLOCK(ifq); \
	} while (0)

	#define IFQ_SET_READY(ifq) \
	do { ((ifq)->altq_flags \|= ALTQF_READY); } while (0)

	#define IFQ_LOCK(ifq) IF_LOCK(ifq)
	#define IFQ_UNLOCK(ifq) IF_UNLOCK(ifq)
	#define IFQ_LOCK_ASSERT(ifq) IF_LOCK_ASSERT(ifq)
	#define IFQ_IS_EMPTY(ifq) ((ifq)->ifq_len == 0)
	#define IFQ_INC_LEN(ifq) ((ifq)->ifq_len++)
	#define IFQ_DEC_LEN(ifq) (--(ifq)->ifq_len)
	#define IFQ_INC_DROPS(ifq) ((ifq)->ifq_drops++)
	#define IFQ_SET_MAXLEN(ifq, len) ((ifq)->ifq_maxlen = (len))

	/*
	* The IFF_DRV_OACTIVE test should really occur in the device driver, not in
	* the handoff logic, as that flag is locked by the device driver.
	*/
	#define IFQ_HANDOFF_ADJ(ifp, m, adj, err) \
	do { \
	int len; \
	short mflags; \
	\
	len = (m)->m_pkthdr.len; \
	mflags = (m)->m_flags; \
	IFQ_ENQUEUE(&(ifp)->if_snd, m, err); \
	if ((err) == 0) { \
	(ifp)->if_obytes += len + (adj); \
	if (mflags & M_MCAST) \
	(ifp)->if_omcasts++; \
	if (((ifp)->if_drv_flags & IFF_DRV_OACTIVE) == 0) \
	if_start(ifp); \
	} \
	} while (0)

	#define IFQ_HANDOFF(ifp, m, err) \
	IFQ_HANDOFF_ADJ(ifp, m, 0, err)

	#define IFQ_DRV_DEQUEUE(ifq, m) \
	do { \
	(m) = (ifq)->ifq_drv_head; \
	if (m) { \
	if (((ifq)->ifq_drv_head = (m)->m_nextpkt) == NULL) \
	(ifq)->ifq_drv_tail = NULL; \
	(m)->m_nextpkt = NULL; \
	(ifq)->ifq_drv_len--; \
	} else { \
	IFQ_LOCK(ifq); \
	IFQ_DEQUEUE_NOLOCK(ifq, m); \
	while ((ifq)->ifq_drv_len < (ifq)->ifq_drv_maxlen) { \
	struct mbuf *m0; \
	IFQ_DEQUEUE_NOLOCK(ifq, m0); \
	if (m0 == NULL) \
	break; \
	m0->m_nextpkt = NULL; \
	if ((ifq)->ifq_drv_tail == NULL) \
	(ifq)->ifq_drv_head = m0; \
	else \
	(ifq)->ifq_drv_tail->m_nextpkt = m0; \
	(ifq)->ifq_drv_tail = m0; \
	(ifq)->ifq_drv_len++; \
	} \
	IFQ_UNLOCK(ifq); \
	} \
	} while (0)

	#define IFQ_DRV_PREPEND(ifq, m) \
	do { \
	(m)->m_nextpkt = (ifq)->ifq_drv_head; \
	if ((ifq)->ifq_drv_tail == NULL) \
	(ifq)->ifq_drv_tail = (m); \
	(ifq)->ifq_drv_head = (m); \
	(ifq)->ifq_drv_len++; \
	} while (0)

	#define IFQ_DRV_IS_EMPTY(ifq) \
	(((ifq)->ifq_drv_len == 0) && ((ifq)->ifq_len == 0))

	#define IFQ_DRV_PURGE(ifq) \
	do { \
	struct mbuf m, n = (ifq)->ifq_drv_head; \
	while((m = n) != NULL) { \
	n = m->m_nextpkt; \
	m_freem(m); \
	} \
	(ifq)->ifq_drv_head = (ifq)->ifq_drv_tail = NULL; \
	(ifq)->ifq_drv_len = 0; \
	IFQ_PURGE(ifq); \
	} while (0)

	/*
	* 72 was chosen below because it is the size of a TCP/IP
	* header (40) + the minimum mss (32).
	*/
	#define IF_MINMTU 72
	#define IF_MAXMTU 65535

	#endif /* _KERNEL */

	/*
	* The ifaddr structure contains information about one address
	* of an interface. They are maintained by the different address families,
	* are allocated and attached when an address is set, and are linked
	* together so all addresses for an interface can be located.
	*
	* NOTE: a 'struct ifaddr' is always at the beginning of a larger
	* chunk of malloc'ed memory, where we store the three addresses
	* (ifa_addr, ifa_dstaddr and ifa_netmask) referenced here.
	*/
	struct ifaddr {
	struct sockaddr ifa_addr; / address of interface */
	struct sockaddr ifa_dstaddr; / other end of p-to-p link */
	#define ifa_broadaddr ifa_dstaddr /* broadcast address interface */
	struct sockaddr ifa_netmask; / used to determine subnet */
	struct if_data if_data; /* not all members are meaningful */
	struct ifnet ifa_ifp; / back-pointer to interface */
	TAILQ_ENTRY(ifaddr) ifa_link; /* queue macro glue */
	void (ifa_rtrequest) / check or clean routes (+ or -)'d */
	(int, struct rtentry , struct rt_addrinfo );
	u_short ifa_flags; /* mostly rt_flags for cloning */
	u_int ifa_refcnt; /* references to this structure */
	int ifa_metric; /* cost of going out this interface */
	int (ifa_claim_addr) / check if an addr goes to this if */
	(struct ifaddr , struct sockaddr );
	struct mtx ifa_mtx;
	};
	#define IFA_ROUTE RTF_UP /* route installed */

	/* for compatibility with other BSDs */
	#define ifa_list ifa_link

	#define IFA_LOCK_INIT(ifa) \
	mtx_init(&(ifa)->ifa_mtx, "ifaddr", NULL, MTX_DEF)
	#define IFA_LOCK(ifa) mtx_lock(&(ifa)->ifa_mtx)
	#define IFA_UNLOCK(ifa) mtx_unlock(&(ifa)->ifa_mtx)
	#define IFA_DESTROY(ifa) mtx_destroy(&(ifa)->ifa_mtx)

	/*
	* The prefix structure contains information about one prefix
	* of an interface. They are maintained by the different address families,
	* are allocated and attached when a prefix or an address is set,
	* and are linked together so all prefixes for an interface can be located.
	*/
	struct ifprefix {
	struct sockaddr ifpr_prefix; / prefix of interface */
	struct ifnet ifpr_ifp; / back-pointer to interface */
	TAILQ_ENTRY(ifprefix) ifpr_list; /* queue macro glue */
	u_char ifpr_plen; /* prefix length in bits */
	u_char ifpr_type; /* protocol dependent prefix type */
	};

	/*
	* Multicast address structure. This is analogous to the ifaddr
	* structure except that it keeps track of multicast addresses.
	*/
	struct ifmultiaddr {
	TAILQ_ENTRY(ifmultiaddr) ifma_link; /* queue macro glue */
	struct sockaddr ifma_addr; / address this membership is for */
	struct sockaddr ifma_lladdr; / link-layer translation, if any */
	struct ifnet ifma_ifp; / back-pointer to interface */
	u_int ifma_refcount; /* reference count */
	void ifma_protospec; / protocol-specific state, if any */
	struct ifmultiaddr ifma_llifma; / pointer to ifma for ifma_lladdr */
	};

	#ifdef _KERNEL
	#define IFAFREE(ifa) \
	do { \
	IFA_LOCK(ifa); \
	KASSERT((ifa)->ifa_refcnt > 0, \
	("ifa %p !(ifa_refcnt > 0)", ifa)); \
	if (--(ifa)->ifa_refcnt == 0) { \
	IFA_DESTROY(ifa); \
	free(ifa, M_IFADDR); \
	} else \
	IFA_UNLOCK(ifa); \
	} while (0)

	#define IFAREF(ifa) \
	do { \
	IFA_LOCK(ifa); \
	++(ifa)->ifa_refcnt; \
	IFA_UNLOCK(ifa); \
	} while (0)

	extern struct mtx ifnet_lock;
	#define IFNET_LOCK_INIT() \
	mtx_init(&ifnet_lock, "ifnet", NULL, MTX_DEF \| MTX_RECURSE)
	#define IFNET_WLOCK() mtx_lock(&ifnet_lock)
	#define IFNET_WUNLOCK() mtx_unlock(&ifnet_lock)
	#define IFNET_WLOCK_ASSERT() mtx_assert(&ifnet_lock, MA_OWNED)
	#define IFNET_RLOCK() IFNET_WLOCK()
	#define IFNET_RUNLOCK() IFNET_WUNLOCK()

	struct ifindex_entry {
	struct ifnet *ife_ifnet;
	struct cdev *ife_dev;
	};

	struct ifnet *ifnet_byindex(u_short idx);

	/*
	* Given the index, ifaddr_byindex() returns the one and only
	* link-level ifaddr for the interface. You are not supposed to use
	* it to traverse the list of addresses associated to the interface.
	*/
	struct ifaddr *ifaddr_byindex(u_short idx);
	struct cdev *ifdev_byindex(u_short idx);

	extern struct ifnethead ifnet;
	extern int ifqmaxlen;
	extern struct ifnet loif; / first loopback interface */
	extern int if_index;

	int if_addgroup(struct ifnet , const char );
	int if_delgroup(struct ifnet , const char );
	int if_addmulti(struct ifnet , struct sockaddr , struct ifmultiaddr **);
	int if_allmulti(struct ifnet *, int);
	struct ifnet* if_alloc(u_char);
	void if_attach(struct ifnet *);
	int if_delmulti(struct ifnet , struct sockaddr );
	void if_delmulti_ifma(struct ifmultiaddr *);
	void if_detach(struct ifnet *);
	void if_purgeaddrs(struct ifnet *);
	void if_purgemaddrs(struct ifnet *);
	void if_down(struct ifnet *);
	struct ifmultiaddr *
	if_findmulti(struct ifnet , struct sockaddr );
	void if_free(struct ifnet *);
	void if_free_type(struct ifnet *, u_char);
	void if_initname(struct ifnet , const char , int);
	void if_link_state_change(struct ifnet *, int);
	int if_printf(struct ifnet , const char , ...) __printflike(2, 3);
	int if_setlladdr(struct ifnet , const u_char , int);
	void if_up(struct ifnet *);
	/void ifinit(void);/ /* declared in systm.h for main() */
	int ifioctl(struct socket , u_long, caddr_t, struct thread );
	int ifpromisc(struct ifnet *, int);
	struct ifnet ifunit(const char );

	void ifq_attach(struct ifaltq , struct ifnet ifp);
	void ifq_detach(struct ifaltq *);

	struct ifaddr ifa_ifwithaddr(struct sockaddr );
	struct ifaddr ifa_ifwithbroadaddr(struct sockaddr );
	struct ifaddr ifa_ifwithdstaddr(struct sockaddr );
	struct ifaddr ifa_ifwithnet(struct sockaddr );
	struct ifaddr ifa_ifwithroute(int, struct sockaddr , struct sockaddr *);
	struct ifaddr ifa_ifwithroute_fib(int, struct sockaddr , struct sockaddr *, u_int);

	struct ifaddr ifaof_ifpforaddr(struct sockaddr , struct ifnet *);

	int if_simloop(struct ifnet ifp, struct mbuf m, int af, int hlen);

	typedef void if_com_alloc_t(u_char type, struct ifnet ifp);
	typedef void if_com_free_t(void *com, u_char type);
	void if_register_com_alloc(u_char type, if_com_alloc_t a, if_com_free_t f);
	void if_deregister_com_alloc(u_char type);

	#define IF_LLADDR(ifp) \
	LLADDR((struct sockaddr_dl *)((ifp)->if_addr->ifa_addr))

	#ifdef DEVICE_POLLING
	enum poll_cmd { POLL_ONLY, POLL_AND_CHECK_STATUS };

	typedef void poll_handler_t(struct ifnet *ifp, enum poll_cmd cmd, int count);
	int ether_poll_register(poll_handler_t h, struct ifnet ifp);
	int ether_poll_deregister(struct ifnet *ifp);
	#endif /* DEVICE_POLLING */

	#endif /* _KERNEL */

	#endif /* !_NET_IF_VAR_H_ */
	Index: projects/arpv2_merge_1/sys/netinet/if_ether.c
	===================================================================
	--- projects/arpv2_merge_1/sys/netinet/if_ether.c (revision 185838)
	+++ projects/arpv2_merge_1/sys/netinet/if_ether.c (revision 185839)
	@@ -1,782 +1,824 @@
	/*-
	* Copyright (c) 1982, 1986, 1988, 1993
	* The Regents of the University of California. All rights reserved.
	*
	* Redistribution and use in source and binary forms, with or without
	* modification, are permitted provided that the following conditions
	* are met:
	* 1. Redistributions of source code must retain the above copyright
	* notice, this list of conditions and the following disclaimer.
	* 2. Redistributions in binary form must reproduce the above copyright
	* notice, this list of conditions and the following disclaimer in the
	* documentation and/or other materials provided with the distribution.
	* 4. Neither the name of the University nor the names of its contributors
	* may be used to endorse or promote products derived from this software
	* without specific prior written permission.
	*
	* THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
	* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
	* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
	* ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
	* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
	* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
	* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
	* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
	* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
	* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
	* SUCH DAMAGE.
	*
	* @(#)if_ether.c 8.1 (Berkeley) 6/10/93
	*/

	/*
	* Ethernet address resolution protocol.
	* TODO:
	* add "inuse/lock" bit (or ref. count) along with valid bit
	*/

	#include <sys/cdefs.h>
	__FBSDID("$FreeBSD$");

	#include "opt_inet.h"
	#include "opt_mac.h"
	#include "opt_carp.h"

	#include <sys/param.h>
	#include <sys/kernel.h>
	#include <sys/queue.h>
	#include <sys/sysctl.h>
	#include <sys/systm.h>
	#include <sys/mbuf.h>
	#include <sys/malloc.h>
	#include <sys/proc.h>
	#include <sys/socket.h>
	#include <sys/syslog.h>
	#include <sys/vimage.h>

	#include <net/if.h>
	#include <net/if_dl.h>
	#include <net/if_types.h>
	#include <net/route.h>
	#include <net/netisr.h>
	#include <net/if_llc.h>
	#include <net/ethernet.h>
	#include <net/vnet.h>

	#include <netinet/in.h>
	#include <netinet/in_var.h>
	#include <net/if_llatbl.h>
	#include <netinet/if_ether.h>
	#include <netinet/vinet.h>

	#include <net/if_arc.h>
	#include <net/iso88025.h>

	#ifdef DEV_CARP
	#include <netinet/ip_carp.h>
	#endif

	#include <security/mac/mac_framework.h>

	#define SIN(s) ((struct sockaddr_in *)s)
	#define SDL(s) ((struct sockaddr_dl *)s)
	#define LLTABLE(ifp) ((struct lltable *)(ifp)->if_afdata[AF_INET])

	SYSCTL_DECL(_net_link_ether);
	SYSCTL_NODE(_net_link_ether, PF_INET, inet, CTLFLAG_RW, 0, "");

	/* timer values */
	#ifdef VIMAGE_GLOBALS
	static int arpt_keep; /* once resolved, good for 20 more minutes */
	static int arp_maxtries;
	int useloopback; /* use loopback interface for local traffic */
	static int arp_proxyall;
	#endif

	SYSCTL_V_INT(V_NET, vnet_inet, _net_link_ether_inet, OID_AUTO, max_age,
	CTLFLAG_RW, arpt_keep, 0, "ARP entry lifetime in seconds");

	static struct ifqueue arpintrq;

	SYSCTL_V_INT(V_NET, vnet_inet, _net_link_ether_inet, OID_AUTO, maxtries,
	CTLFLAG_RW, arp_maxtries, 0,
	"ARP resolution attempts before returning error");
	SYSCTL_V_INT(V_NET, vnet_inet, _net_link_ether_inet, OID_AUTO, useloopback,
	CTLFLAG_RW, useloopback, 0,
	"Use the loopback interface for local traffic");
	SYSCTL_V_INT(V_NET, vnet_inet, _net_link_ether_inet, OID_AUTO, proxyall,
	CTLFLAG_RW, arp_proxyall, 0,
	"Enable proxy ARP for all suitable requests");

	static void arp_init(void);
	void arprequest(struct ifnet *,
	struct in_addr , struct in_addr , u_char *);
	static void arpintr(struct mbuf *);
	static void arptimer(void *);
	#ifdef INET
	static void in_arpinput(struct mbuf *);
	#endif

	#ifdef AF_INET
	void arp_ifscrub(struct ifnet *ifp, uint32_t addr);

	/*
	* called by in_ifscrub to remove entry from the table when
	* the interface goes away
	*/
	void
	arp_ifscrub(struct ifnet *ifp, uint32_t addr)
	{
	struct sockaddr_in addr4;
	- struct llentry *lle;

	bzero((void *)&addr4, sizeof(addr4));
	addr4.sin_len = sizeof(addr4);
	addr4.sin_family = AF_INET;
	addr4.sin_addr.s_addr = addr;
	IF_AFDATA_LOCK(ifp);
	- lle = lla_lookup(LLTABLE(ifp), (LLE_DELETE \| LLE_IFADDR),
	+ lla_lookup(LLTABLE(ifp), (LLE_DELETE \| LLE_IFADDR),
	(struct sockaddr *)&addr4);
	IF_AFDATA_UNLOCK(ifp);
	-#if 0
	- if (lle == NULL)
	- log(LOG_INFO, "arp_ifscrub: interface address is missing from cache\n");
	-#endif
	}
	#endif

	/*
	* Timeout routine. Age arp_tab entries periodically.
	*/
	static void
	arptimer(void *arg)
	{
	struct ifnet *ifp;
	struct llentry lle = (struct llentry )arg;

	if (lle == NULL) {
	panic("%s: NULL entry!\n", __func__);
	return;
	}
	ifp = lle->lle_tbl->llt_ifp;
	- IF_AFDATA_LOCK(ifp);
	if ((lle->la_flags & LLE_DELETED) \|\|
	(time_second >= lle->la_expire)) {
	+ printf("deleting entry\n");
	+
	+ IF_AFDATA_LOCK(ifp);
	if (!callout_pending(&lle->la_timer) &&
	(callout_active(&lle->la_timer))) {
	(void)llentry_free(lle);
	}
	+ IF_AFDATA_UNLOCK(ifp);
	+ } else {
	+ /*
	+ * Still valid, just drop our reference
	+ */
	+ LLE_FREE(lle);
	}
	- IF_AFDATA_UNLOCK(ifp);
	}


	/*
	* Broadcast an ARP request. Caller specifies:
	* - arp header source ip address
	* - arp header target ip address
	* - arp header source ethernet address
	*/
	void
	arprequest(struct ifnet ifp, struct in_addr sip, struct in_addr *tip,
	u_char *enaddr)
	{
	struct mbuf *m;
	struct arphdr *ah;
	struct sockaddr sa;

	if (sip == NULL) {
	/*
	* The caller did not supply a source address, try to find
	* a compatible one among those assigned to this interface.
	*/
	struct ifaddr *ifa;

	TAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) {
	if (!ifa->ifa_addr \|\|
	ifa->ifa_addr->sa_family != AF_INET)
	continue;
	sip = &SIN(ifa->ifa_addr)->sin_addr;
	if (0 == ((sip->s_addr ^ tip->s_addr) &
	SIN(ifa->ifa_netmask)->sin_addr.s_addr) )
	break; /* found it. */
	}
	if (sip == NULL) {
	printf("%s: cannot find matching address\n", __func__);
	return;
	}
	}

	if ((m = m_gethdr(M_DONTWAIT, MT_DATA)) == NULL)
	return;
	m->m_len = sizeof(ah) + 2sizeof(struct in_addr) +
	2*ifp->if_data.ifi_addrlen;
	m->m_pkthdr.len = m->m_len;
	MH_ALIGN(m, m->m_len);
	ah = mtod(m, struct arphdr *);
	bzero((caddr_t)ah, m->m_len);
	#ifdef MAC
	mac_netinet_arp_send(ifp, m);
	#endif
	ah->ar_pro = htons(ETHERTYPE_IP);
	ah->ar_hln = ifp->if_addrlen; /* hardware address length */
	ah->ar_pln = sizeof(struct in_addr); /* protocol address length */
	ah->ar_op = htons(ARPOP_REQUEST);
	bcopy((caddr_t)enaddr, (caddr_t)ar_sha(ah), ah->ar_hln);
	bcopy((caddr_t)sip, (caddr_t)ar_spa(ah), ah->ar_pln);
	bcopy((caddr_t)tip, (caddr_t)ar_tpa(ah), ah->ar_pln);
	sa.sa_family = AF_ARP;
	sa.sa_len = 2;
	m->m_flags \|= M_BCAST;
	(ifp->if_output)(ifp, m, &sa, (struct rtentry )0);
	}

	/*
	* Resolve an IP address into an ethernet address.
	* On input:
	* ifp is the interface we use
	* rt0 is the route to the final destination (possibly useless)
	* m is the mbuf. May be NULL if we don't have a packet.
	* dst is the next hop,
	* desten is where we want the address.
	*
	* On success, desten is filled in and the function returns 0;
	* If the packet must be held pending resolution, we return EWOULDBLOCK
	* On other errors, we return the corresponding error code.
	* Note that m_freem() handles NULL.
	*/
	int
	arpresolve(struct ifnet ifp, struct rtentry rt0, struct mbuf *m,
	struct sockaddr dst, u_char desten, struct llentry **lle)
	{
	INIT_VNET_INET(ifp->if_vnet);
	struct llentry *la = 0;
	u_int flags;
	- int error;
	+ int error, renew;

	+ log(LOG_DEBUG, "arpesolve called\n");
	*lle = NULL;
	-
	if (m != NULL) {
	if (m->m_flags & M_BCAST) {
	/* broadcast */
	(void)memcpy(desten,
	ifp->if_broadcastaddr, ifp->if_addrlen);
	return (0);
	}
	if (m->m_flags & M_MCAST && ifp->if_type != IFT_ARCNET) {
	/* multicast */
	ETHER_MAP_IP_MULTICAST(&SIN(dst)->sin_addr, desten);
	return (0);
	}
	}

	flags = (ifp->if_flags & (IFF_NOARP \| IFF_STATICARP)) ? 0 : LLE_CREATE;

	/* XXXXX
	* Since this function returns an llentry, the
	* lock is held by the caller.
	*/
	+retry:
	la = lla_lookup(LLTABLE(ifp), flags, dst);
	if (la == NULL) {
	if (flags & LLE_CREATE)
	log(LOG_DEBUG,
	"arpresolve: can't allocate llinfo for %s\n",
	inet_ntoa(SIN(dst)->sin_addr));
	m_freem(m);
	+ log(LOG_DEBUG, "arpesolve: lla_lookup fail\n");
	return (EINVAL);
	}

	- if (la->la_flags & LLE_VALID &&
	- (la->la_flags & LLE_STATIC \|\| la->la_expire > time_uptime)) {
	+ if ((la->la_flags & LLE_VALID) &&
	+ ((la->la_flags & LLE_STATIC) \|\| (la->la_expire > time_uptime))) {
	bcopy(&la->ll_addr, desten, ifp->if_addrlen);
	/*
	* If entry has an expiry time and it is approaching,
	* see if we need to send an ARP request within this
	* arpt_down interval.
	*/
	if (!(la->la_flags & LLE_STATIC) &&
	time_uptime + la->la_preempt > la->la_expire) {
	arprequest(ifp, NULL,
	&SIN(dst)->sin_addr, IF_LLADDR(ifp));

	la->la_preempt--;
	- }
	+ }
	+ log(LOG_DEBUG, "arpresolve: success\n");
	+
	*lle = la;
	- return (0);
	- }
	-
	+ error = 0;
	+ goto done;
	+ } else
	+ log(LOG_DEBUG,
	+ "la=%p valid=%d static=%d expire=%ld uptime=%ld\n", la,
	+ !!(la->la_flags & LLE_VALID), !!(la->la_flags & LLE_STATIC),
	+ la->la_expire, time_uptime);
	+
	if (la->la_flags & LLE_STATIC) { /* should not happen! */
	log(LOG_DEBUG, "arpresolve: ouch, empty static llinfo for %s\n",
	inet_ntoa(SIN(dst)->sin_addr));
	m_freem(m);
	- return (EINVAL);
	+ error = EINVAL;
	+ goto done;
	}
	+
	+ renew = (la->la_asked == 0 \|\| la->la_expire != time_uptime);
	/*
	* There is an arptab entry, but no ethernet address
	* response yet. Replace the held mbuf with this
	* latest one.
	*/
	if (m) {
	+ if ((flags & LLE_EXCLUSIVE) == 0) {
	+ flags \|= LLE_EXCLUSIVE;
	+ LLE_RUNLOCK(la);
	+ goto retry;
	+ }
	if (la->la_hold)
	m_freem(la->la_hold);
	la->la_hold = m;
	+ if (renew == 0 && (flags & LLE_EXCLUSIVE)) {
	+ flags &= ~LLE_EXCLUSIVE;
	+ LLE_DOWNGRADE(la);
	+ }
	+
	}
	/*
	* Return EWOULDBLOCK if we have tried less than arp_maxtries. It
	* will be masked by ether_output(). Return EHOSTDOWN/EHOSTUNREACH
	* if we have already sent arp_maxtries ARP requests. Retransmit the
	* ARP request, but not faster than one request per second.
	*/
	if (la->la_asked < V_arp_maxtries)
	error = EWOULDBLOCK; /* First request. */
	else
	error =
	(rt0->rt_flags & RTF_GATEWAY) ? EHOSTDOWN : EHOSTUNREACH;

	- if (la->la_asked == 0 \|\| la->la_expire != time_uptime) {
	+ if (renew) {
	+ log(LOG_DEBUG,
	+ "arpresolve: kicking off new resolve expire=%ld\n",
	+ la->la_expire);
	+ LLE_ADDREF(la);
	la->la_expire = time_uptime;
	callout_reset(&la->la_timer, hz, arptimer, la);
	la->la_asked++;
	-
	+ LLE_WUNLOCK(la);
	arprequest(ifp, NULL, &SIN(dst)->sin_addr,
	IF_LLADDR(ifp));
	+ return (error);
	}

	- return (EWOULDBLOCK);
	+done:
	+ if (flags & LLE_EXCLUSIVE)
	+ LLE_WUNLOCK(la);
	+ else
	+ LLE_RUNLOCK(la);
	+ return (error);
	}

	/*
	* Common length and type checks are done here,
	* then the protocol-specific routine is called.
	*/
	static void
	arpintr(struct mbuf *m)
	{
	struct arphdr *ar;

	if (m->m_len < sizeof(struct arphdr) &&
	((m = m_pullup(m, sizeof(struct arphdr))) == NULL)) {
	log(LOG_ERR, "arp: runt packet -- m_pullup failed\n");
	return;
	}
	ar = mtod(m, struct arphdr *);

	if (ntohs(ar->ar_hrd) != ARPHRD_ETHER &&
	ntohs(ar->ar_hrd) != ARPHRD_IEEE802 &&
	ntohs(ar->ar_hrd) != ARPHRD_ARCNET &&
	ntohs(ar->ar_hrd) != ARPHRD_IEEE1394) {
	log(LOG_ERR, "arp: unknown hardware address format (0x%2D)\n",
	(unsigned char *)&ar->ar_hrd, "");
	m_freem(m);
	return;
	}

	if (m->m_len < arphdr_len(ar)) {
	if ((m = m_pullup(m, arphdr_len(ar))) == NULL) {
	log(LOG_ERR, "arp: runt packet\n");
	m_freem(m);
	return;
	}
	ar = mtod(m, struct arphdr *);
	}

	switch (ntohs(ar->ar_pro)) {
	#ifdef INET
	case ETHERTYPE_IP:
	in_arpinput(m);
	return;
	#endif
	}
	m_freem(m);
	}

	#ifdef INET
	/*
	* ARP for Internet protocols on 10 Mb/s Ethernet.
	* Algorithm is that given in RFC 826.
	* In addition, a sanity check is performed on the sender
	* protocol address, to catch impersonators.
	* We no longer handle negotiations for use of trailer protocol:
	* Formerly, ARP replied for protocol type ETHERTYPE_TRAIL sent
	* along with IP replies if we wanted trailers sent to us,
	* and also sent them in response to IP replies.
	* This allowed either end to announce the desire to receive
	* trailer packets.
	* We no longer reply to requests for ETHERTYPE_TRAIL protocol either,
	* but formerly didn't normally send requests.
	*/
	static int log_arp_wrong_iface = 1;
	static int log_arp_movements = 1;
	static int log_arp_permanent_modify = 1;

	SYSCTL_INT(_net_link_ether_inet, OID_AUTO, log_arp_wrong_iface, CTLFLAG_RW,
	&log_arp_wrong_iface, 0,
	"log arp packets arriving on the wrong interface");
	SYSCTL_INT(_net_link_ether_inet, OID_AUTO, log_arp_movements, CTLFLAG_RW,
	&log_arp_movements, 0,
	"log arp replies from MACs different than the one in the cache");
	SYSCTL_INT(_net_link_ether_inet, OID_AUTO, log_arp_permanent_modify, CTLFLAG_RW,
	&log_arp_permanent_modify, 0,
	"log arp replies from MACs different than the one in the permanent arp entry");


	static void
	in_arpinput(struct mbuf *m)
	{
	struct arphdr *ah;
	struct ifnet *ifp = m->m_pkthdr.rcvif;
	struct llentry *la = NULL;
	struct rtentry *rt;
	struct ifaddr *ifa;
	struct in_ifaddr *ia;
	struct sockaddr sa;
	struct in_addr isaddr, itaddr, myaddr;
	u_int8_t *enaddr = NULL;
	- int op, flag, lock_owned = 0;
	+ int op, flags;
	+ struct mbuf *m0;
	/*
	, rif_len;
	*/
	int req_len;
	int bridged = 0, is_bridge = 0;
	#ifdef DEV_CARP
	int carp_match = 0;
	#endif
	struct sockaddr_in sin;
	sin.sin_len = sizeof(struct sockaddr_in);
	sin.sin_family = AF_INET;
	sin.sin_addr.s_addr = 0;
	INIT_VNET_INET(ifp->if_vnet);

	if (ifp->if_bridge)
	bridged = 1;
	if (ifp->if_type == IFT_BRIDGE)
	is_bridge = 1;

	req_len = arphdr_len2(ifp->if_addrlen, sizeof(struct in_addr));
	if (m->m_len < req_len && (m = m_pullup(m, req_len)) == NULL) {
	log(LOG_ERR, "in_arp: runt packet -- m_pullup failed\n");
	return;
	}

	ah = mtod(m, struct arphdr *);
	op = ntohs(ah->ar_op);
	(void)memcpy(&isaddr, ar_spa(ah), sizeof (isaddr));
	(void)memcpy(&itaddr, ar_tpa(ah), sizeof (itaddr));

	/*
	* For a bridge, we want to check the address irrespective
	* of the receive interface. (This will change slightly
	* when we have clusters of interfaces).
	* If the interface does not match, but the recieving interface
	* is part of carp, we call carp_iamatch to see if this is a
	* request for the virtual host ip.
	* XXX: This is really ugly!
	*/
	LIST_FOREACH(ia, INADDR_HASH(itaddr.s_addr), ia_hash) {
	if (((bridged && ia->ia_ifp->if_bridge != NULL) \|\|
	(ia->ia_ifp == ifp)) &&
	itaddr.s_addr == ia->ia_addr.sin_addr.s_addr)
	goto match;
	#ifdef DEV_CARP
	if (ifp->if_carp != NULL &&
	carp_iamatch(ifp->if_carp, ia, &isaddr, &enaddr) &&
	itaddr.s_addr == ia->ia_addr.sin_addr.s_addr) {
	carp_match = 1;
	goto match;
	}
	#endif
	}
	LIST_FOREACH(ia, INADDR_HASH(isaddr.s_addr), ia_hash)
	if (((bridged && ia->ia_ifp->if_bridge != NULL) \|\|
	(ia->ia_ifp == ifp)) &&
	isaddr.s_addr == ia->ia_addr.sin_addr.s_addr)
	goto match;

	#define BDG_MEMBER_MATCHES_ARP(addr, ifp, ia) \
	(ia->ia_ifp->if_bridge == ifp->if_softc && \
	!bcmp(IF_LLADDR(ia->ia_ifp), IF_LLADDR(ifp), ifp->if_addrlen) && \
	addr == ia->ia_addr.sin_addr.s_addr)
	/*
	* Check the case when bridge shares its MAC address with
	* some of its children, so packets are claimed by bridge
	* itself (bridge_input() does it first), but they are really
	* meant to be destined to the bridge member.
	*/
	if (is_bridge) {
	LIST_FOREACH(ia, INADDR_HASH(itaddr.s_addr), ia_hash) {
	if (BDG_MEMBER_MATCHES_ARP(itaddr.s_addr, ifp, ia)) {
	ifp = ia->ia_ifp;
	goto match;
	}
	}
	}
	#undef BDG_MEMBER_MATCHES_ARP

	/*
	* No match, use the first inet address on the receive interface
	* as a dummy address for the rest of the function.
	*/
	TAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link)
	if (ifa->ifa_addr->sa_family == AF_INET) {
	ia = ifatoia(ifa);
	goto match;
	}
	/*
	* If bridging, fall back to using any inet address.
	*/
	if (!bridged \|\| (ia = TAILQ_FIRST(&V_in_ifaddrhead)) == NULL)
	goto drop;
	match:
	+ log(LOG_DEBUG,"in_arpinput: match\n");
	if (!enaddr)
	enaddr = (u_int8_t *)IF_LLADDR(ifp);
	myaddr = ia->ia_addr.sin_addr;
	if (!bcmp(ar_sha(ah), enaddr, ifp->if_addrlen))
	goto drop; /* it's from me, ignore it. */
	if (!bcmp(ar_sha(ah), ifp->if_broadcastaddr, ifp->if_addrlen)) {
	log(LOG_ERR,
	"arp: link address is broadcast for IP address %s!\n",
	inet_ntoa(isaddr));
	goto drop;
	}
	/*
	* Warn if another host is using the same IP address, but only if the
	* IP address isn't 0.0.0.0, which is used for DHCP only, in which
	* case we suppress the warning to avoid false positive complaints of
	* potential misconfiguration.
	*/
	if (!bridged && isaddr.s_addr == myaddr.s_addr && myaddr.s_addr != 0) {
	log(LOG_ERR,
	"arp: %*D is using my IP address %s on %s!\n",
	ifp->if_addrlen, (u_char *)ar_sha(ah), ":",
	inet_ntoa(isaddr), ifp->if_xname);
	itaddr = myaddr;
	goto reply;
	}
	if (ifp->if_flags & IFF_STATICARP)
	goto reply;

	bzero(&sin, sizeof(sin));
	sin.sin_len = sizeof(struct sockaddr_in);
	sin.sin_family = AF_INET;
	sin.sin_addr = isaddr;
	- flag = (itaddr.s_addr == myaddr.s_addr) ? LLE_CREATE : 0;
	+ flags = (itaddr.s_addr == myaddr.s_addr) ? LLE_CREATE : 0;
	+ flags \|= LLE_EXCLUSIVE;
	IF_AFDATA_LOCK(ifp);
	- lock_owned = 1;
	- la = lla_lookup(LLTABLE(ifp), flag, (struct sockaddr *)&sin);
	+ la = lla_lookup(LLTABLE(ifp), flags, (struct sockaddr *)&sin);
	+ IF_AFDATA_UNLOCK(ifp);
	if (la != NULL) {
	+ log(LOG_DEBUG, "in_arpinput: la found\n");
	/* the following is not an error when doing bridging */
	if (!bridged && la->lle_tbl->llt_ifp != ifp
	#ifdef DEV_CARP
	&& (ifp->if_type != IFT_CARP \|\| !carp_match)
	#endif
	- ) {
	+ ) {
	if (log_arp_wrong_iface)
	log(LOG_ERR, "arp: %s is on %s "
	"but got reply from %*D on %s\n",
	inet_ntoa(isaddr),
	la->lle_tbl->llt_ifp->if_xname,
	ifp->if_addrlen, (u_char *)ar_sha(ah), ":",
	ifp->if_xname);
	goto reply;
	}
	-
	- if (la->la_flags & LLE_VALID &&
	+ if ((la->la_flags & LLE_VALID) &&
	bcmp(ar_sha(ah), &la->ll_addr, ifp->if_addrlen)) {
	+ log(LOG_DEBUG, "LLE_VALID and match\n");
	if (la->la_flags & LLE_STATIC) {
	log(LOG_ERR,
	"arp: %*D attempts to modify permanent "
	"entry for %s on %s\n",
	ifp->if_addrlen, (u_char *)ar_sha(ah), ":",
	inet_ntoa(isaddr), ifp->if_xname);
	goto reply;
	}
	if (log_arp_movements) {
	log(LOG_INFO, "arp: %s moved from %*D "
	"to %*D on %s\n",
	inet_ntoa(isaddr),
	ifp->if_addrlen,
	(u_char *)&la->ll_addr, ":",
	ifp->if_addrlen, (u_char *)ar_sha(ah), ":",
	ifp->if_xname);
	}
	}
	+
	if (ifp->if_addrlen != ah->ar_hln) {
	log(LOG_WARNING,
	"arp from %*D: addr len: new %d, i/f %d (ignored)",
	ifp->if_addrlen, (u_char *) ar_sha(ah), ":",
	ah->ar_hln, ifp->if_addrlen);
	goto reply;
	}
	(void)memcpy(&la->ll_addr, ar_sha(ah), ifp->if_addrlen);
	la->la_flags \|= LLE_VALID;

	+ log(LOG_DEBUG, "in_arpinput: la=%p valid set\n", la);
	if (!(la->la_flags & LLE_STATIC)) {
	la->la_expire = time_uptime + arpt_keep;
	callout_reset(&la->la_timer, hz * V_arpt_keep,
	arptimer, la);
	}
	la->la_asked = 0;
	la->la_preempt = V_arp_maxtries;
	if (la->la_hold) {
	- (*ifp->if_output)(ifp, la->la_hold, L3_ADDR(la), NULL);
	+ m0 = la->la_hold;
	la->la_hold = 0;
	+ memcpy(&sa, L3_ADDR(la), sizeof(sa));
	+ LLE_WUNLOCK(la);
	+
	+ (*ifp->if_output)(ifp, m0, &sa, NULL);
	+ return;
	}
	}
	reply:
	if (op != ARPOP_REQUEST)
	goto drop;

	if (itaddr.s_addr == myaddr.s_addr) {
	/* Shortcut.. the receiving interface is the target. */
	(void)memcpy(ar_tha(ah), ar_sha(ah), ah->ar_hln);
	(void)memcpy(ar_sha(ah), enaddr, ah->ar_hln);
	} else {
	if (la == NULL) {
	if (!V_arp_proxyall)
	goto drop;

	sin.sin_addr = itaddr;
	-
	/* XXX MRT use table 0 for arp reply */
	rt = in_rtalloc1((struct sockaddr *)&sin, 0, 0UL, 0);
	if (!rt)
	goto drop;
	/*
	* Don't send proxies for nodes on the same interface
	* as this one came out of, or we'll get into a fight
	* over who claims what Ether address.
	*/
	if (rt->rt_ifp == ifp) {
	RTFREE_LOCKED(rt);
	goto drop;
	}
	(void)memcpy(ar_tha(ah), ar_sha(ah), ah->ar_hln);
	(void)memcpy(ar_sha(ah), enaddr, ah->ar_hln);
	RTFREE_LOCKED(rt);

	/*
	* Also check that the node which sent the ARP packet
	* is on the the interface we expect it to be on. This
	* avoids ARP chaos if an interface is connected to the
	* wrong network.
	*/
	sin.sin_addr = isaddr;

	/* XXX MRT use table 0 for arp checks */
	rt = in_rtalloc1((struct sockaddr *)&sin, 0, 0UL, 0);
	if (!rt)
	goto drop;
	if (rt->rt_ifp != ifp) {
	log(LOG_INFO, "arp_proxy: ignoring request"
	" from %s via %s, expecting %s\n",
	inet_ntoa(isaddr), ifp->if_xname,
	rt->rt_ifp->if_xname);
	RTFREE_LOCKED(rt);
	goto drop;
	}
	RTFREE_LOCKED(rt);

	#ifdef DEBUG_PROXY
	printf("arp: proxying for %s\n",
	inet_ntoa(itaddr));
	#endif
	} else {
	/*
	* Return proxied ARP replies only on the interface
	* or bridge cluster where this network resides.
	* Otherwise we may conflict with the host we are
	* proxying for.
	*/
	if (la->lle_tbl->llt_ifp != ifp &&
	(la->lle_tbl->llt_ifp->if_bridge != ifp->if_bridge \|\|
	ifp->if_bridge == NULL)) {
	goto drop;
	}
	(void)memcpy(ar_tha(ah), ar_sha(ah), ah->ar_hln);
	(void)memcpy(ar_sha(ah), &la->ll_addr, ah->ar_hln);
	}
	}

	- if (lock_owned != 0) {
	- IF_AFDATA_UNLOCK(ifp);
	- lock_owned = 0;
	- }
	-
	+ if (la)
	+ LLE_WUNLOCK(la);
	if (itaddr.s_addr == myaddr.s_addr &&
	IN_LINKLOCAL(ntohl(itaddr.s_addr))) {
	/* RFC 3927 link-local IPv4; always reply by broadcast. */
	#ifdef DEBUG_LINKLOCAL
	printf("arp: sending reply for link-local addr %s\n",
	inet_ntoa(itaddr));
	#endif
	m->m_flags \|= M_BCAST;
	m->m_flags &= ~M_MCAST;
	} else {
	/* default behaviour; never reply by broadcast. */
	m->m_flags &= ~(M_BCAST\|M_MCAST);
	}
	(void)memcpy(ar_tpa(ah), ar_spa(ah), ah->ar_pln);
	(void)memcpy(ar_spa(ah), &itaddr, ah->ar_pln);
	ah->ar_op = htons(ARPOP_REPLY);
	ah->ar_pro = htons(ETHERTYPE_IP); /* let's be sure! */
	m->m_len = sizeof(ah) + (2 ah->ar_pln) + (2 * ah->ar_hln);
	m->m_pkthdr.len = m->m_len;
	sa.sa_family = AF_ARP;
	sa.sa_len = 2;
	(ifp->if_output)(ifp, m, &sa, (struct rtentry )0);
	return;

	drop:
	- if (lock_owned != 0)
	- IF_AFDATA_UNLOCK(ifp);
	+ if (la)
	+ LLE_WUNLOCK(la);
	m_freem(m);
	}
	#endif

	void
	arp_ifinit(struct ifnet ifp, struct ifaddr ifa)
	{
	struct llentry *lle;

	if (ntohl(IA_SIN(ifa)->sin_addr.s_addr) != INADDR_ANY)
	arprequest(ifp, &IA_SIN(ifa)->sin_addr,
	&IA_SIN(ifa)->sin_addr, IF_LLADDR(ifp));
	/*
	* interface address is considered static entry
	* because the output of the arp utility shows
	* that L2 entry as permanent
	*/
	IF_AFDATA_LOCK(ifp);
	lle = lla_lookup(LLTABLE(ifp), (LLE_CREATE \| LLE_IFADDR \| LLE_STATIC),
	(struct sockaddr *)IA_SIN(ifa));
	IF_AFDATA_UNLOCK(ifp);
	if (lle == NULL)
	log(LOG_INFO, "arp_ifinit: cannot create arp "
	"entry for interface address\n");
	+ LLE_RUNLOCK(lle);
	ifa->ifa_rtrequest = NULL;
	}

	void
	arp_ifinit2(struct ifnet ifp, struct ifaddr ifa, u_char *enaddr)
	{
	if (ntohl(IA_SIN(ifa)->sin_addr.s_addr) != INADDR_ANY)
	arprequest(ifp, &IA_SIN(ifa)->sin_addr,
	&IA_SIN(ifa)->sin_addr, enaddr);
	ifa->ifa_rtrequest = NULL;
	}

	static void
	arp_init(void)
	{
	INIT_VNET_INET(curvnet);

	V_arpt_keep = (2060); / once resolved, good for 20 more minutes */
	V_arp_maxtries = 5;
	V_useloopback = 1; /* use loopback interface for local traffic */
	V_arp_proxyall = 0;

	arpintrq.ifq_maxlen = 50;
	mtx_init(&arpintrq.ifq_mtx, "arp_inq", NULL, MTX_DEF);
	netisr_register(NETISR_ARP, arpintr, &arpintrq, 0);
	}
	SYSINIT(arp, SI_SUB_PROTO_DOMAIN, SI_ORDER_ANY, arp_init, 0);
	Index: projects/arpv2_merge_1/sys/netinet/in.c
	===================================================================
	--- projects/arpv2_merge_1/sys/netinet/in.c (revision 185838)
	+++ projects/arpv2_merge_1/sys/netinet/in.c (revision 185839)
	@@ -1,1234 +1,1255 @@
	/*-
	* Copyright (c) 1982, 1986, 1991, 1993
	* The Regents of the University of California. All rights reserved.
	* Copyright (C) 2001 WIDE Project. All rights reserved.
	*
	* Redistribution and use in source and binary forms, with or without
	* modification, are permitted provided that the following conditions
	* are met:
	* 1. Redistributions of source code must retain the above copyright
	* notice, this list of conditions and the following disclaimer.
	* 2. Redistributions in binary form must reproduce the above copyright
	* notice, this list of conditions and the following disclaimer in the
	* documentation and/or other materials provided with the distribution.
	* 4. Neither the name of the University nor the names of its contributors
	* may be used to endorse or promote products derived from this software
	* without specific prior written permission.
	*
	* THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
	* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
	* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
	* ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
	* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
	* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
	* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
	* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
	* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
	* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
	* SUCH DAMAGE.
	*
	* @(#)in.c 8.4 (Berkeley) 1/9/95
	*/

	#include <sys/cdefs.h>
	__FBSDID("$FreeBSD$");

	#include "opt_carp.h"

	#include <sys/param.h>
	#include <sys/systm.h>
	#include <sys/sockio.h>
	#include <sys/malloc.h>
	#include <sys/priv.h>
	#include <sys/socket.h>
	#include <sys/kernel.h>
	#include <sys/sysctl.h>
	#include <sys/vimage.h>

	#include <net/if.h>
	#include <net/if_llatbl.h>
	#include <net/if_types.h>
	#include <net/route.h>

	#include <netinet/in.h>
	#include <netinet/in_var.h>
	#include <netinet/in_pcb.h>
	#include <netinet/ip_var.h>
	#include <netinet/vinet.h>

	static int in_mask2len(struct in_addr *);
	static void in_len2mask(struct in_addr *, int);
	static int in_lifaddr_ioctl(struct socket *, u_long, caddr_t,
	struct ifnet , struct thread );

	static int in_addprefix(struct in_ifaddr *, int);
	static int in_scrubprefix(struct in_ifaddr *);
	static void in_socktrim(struct sockaddr_in *);
	static int in_ifinit(struct ifnet *,
	struct in_ifaddr , struct sockaddr_in , int);
	static void in_purgemaddrs(struct ifnet *);

	#ifdef VIMAGE_GLOBALS
	static int subnetsarelocal;
	static int sameprefixcarponly;
	extern struct inpcbinfo ripcbinfo;
	#endif

	SYSCTL_V_INT(V_NET, vnet_inet, _net_inet_ip, OID_AUTO, subnets_are_local,
	CTLFLAG_RW, subnetsarelocal, 0,
	"Treat all subnets as directly connected");
	SYSCTL_V_INT(V_NET, vnet_inet, _net_inet_ip, OID_AUTO, same_prefix_carp_only,
	CTLFLAG_RW, sameprefixcarponly, 0,
	"Refuse to create same prefixes on different interfaces");

	/*
	* Return 1 if an internet address is for a ``local'' host
	* (one to which we have a connection). If subnetsarelocal
	* is true, this includes other subnets of the local net.
	* Otherwise, it includes only the directly-connected (sub)nets.
	*/
	int
	in_localaddr(struct in_addr in)
	{
	INIT_VNET_INET(curvnet);
	register u_long i = ntohl(in.s_addr);
	register struct in_ifaddr *ia;

	if (V_subnetsarelocal) {
	TAILQ_FOREACH(ia, &V_in_ifaddrhead, ia_link)
	if ((i & ia->ia_netmask) == ia->ia_net)
	return (1);
	} else {
	TAILQ_FOREACH(ia, &V_in_ifaddrhead, ia_link)
	if ((i & ia->ia_subnetmask) == ia->ia_subnet)
	return (1);
	}
	return (0);
	}

	/*
	* Return 1 if an internet address is for the local host and configured
	* on one of its interfaces.
	*/
	int
	in_localip(struct in_addr in)
	{
	INIT_VNET_INET(curvnet);
	struct in_ifaddr *ia;

	LIST_FOREACH(ia, INADDR_HASH(in.s_addr), ia_hash) {
	if (IA_SIN(ia)->sin_addr.s_addr == in.s_addr)
	return (1);
	}
	return (0);
	}

	/*
	* Determine whether an IP address is in a reserved set of addresses
	* that may not be forwarded, or whether datagrams to that destination
	* may be forwarded.
	*/
	int
	in_canforward(struct in_addr in)
	{
	register u_long i = ntohl(in.s_addr);
	register u_long net;

	if (IN_EXPERIMENTAL(i) \|\| IN_MULTICAST(i) \|\| IN_LINKLOCAL(i))
	return (0);
	if (IN_CLASSA(i)) {
	net = i & IN_CLASSA_NET;
	if (net == 0 \|\| net == (IN_LOOPBACKNET << IN_CLASSA_NSHIFT))
	return (0);
	}
	return (1);
	}

	/*
	* Trim a mask in a sockaddr
	*/
	static void
	in_socktrim(struct sockaddr_in *ap)
	{
	register char cplim = (char ) &ap->sin_addr;
	register char cp = (char ) (&ap->sin_addr + 1);

	ap->sin_len = 0;
	while (--cp >= cplim)
	if (*cp) {
	(ap)->sin_len = cp - (char *) (ap) + 1;
	break;
	}
	}

	static int
	in_mask2len(mask)
	struct in_addr *mask;
	{
	int x, y;
	u_char *p;

	p = (u_char *)mask;
	for (x = 0; x < sizeof(*mask); x++) {
	if (p[x] != 0xff)
	break;
	}
	y = 0;
	if (x < sizeof(*mask)) {
	for (y = 0; y < 8; y++) {
	if ((p[x] & (0x80 >> y)) == 0)
	break;
	}
	}
	return (x * 8 + y);
	}

	static void
	in_len2mask(struct in_addr *mask, int len)
	{
	int i;
	u_char *p;

	p = (u_char *)mask;
	bzero(mask, sizeof(*mask));
	for (i = 0; i < len / 8; i++)
	p[i] = 0xff;
	if (len % 8)
	p[i] = (0xff00 >> (len % 8)) & 0xff;
	}

	/*
	* Generic internet control operations (ioctl's).
	* Ifp is 0 if not an interface-specific ioctl.
	*/
	/* ARGSUSED */
	int
	in_control(struct socket so, u_long cmd, caddr_t data, struct ifnet ifp,
	struct thread *td)
	{
	INIT_VNET_INET(curvnet); /* both so and ifp can be NULL here! */
	register struct ifreq ifr = (struct ifreq )data;
	register struct in_ifaddr ia, iap;
	register struct ifaddr *ifa;
	struct in_addr allhosts_addr;
	struct in_addr dst;
	struct in_ifaddr *oia;
	struct in_aliasreq ifra = (struct in_aliasreq )data;
	struct sockaddr_in oldaddr;
	int error, hostIsNew, iaIsNew, maskIsNew, s;
	int iaIsFirst;

	ia = NULL;
	iaIsFirst = 0;
	iaIsNew = 0;
	allhosts_addr.s_addr = htonl(INADDR_ALLHOSTS_GROUP);

	switch (cmd) {
	case SIOCALIFADDR:
	if (td != NULL) {
	error = priv_check(td, PRIV_NET_ADDIFADDR);
	if (error)
	return (error);
	}
	if (ifp == NULL)
	return (EINVAL);
	return in_lifaddr_ioctl(so, cmd, data, ifp, td);

	case SIOCDLIFADDR:
	if (td != NULL) {
	error = priv_check(td, PRIV_NET_DELIFADDR);
	if (error)
	return (error);
	}
	if (ifp == NULL)
	return (EINVAL);
	return in_lifaddr_ioctl(so, cmd, data, ifp, td);

	case SIOCGLIFADDR:
	if (ifp == NULL)
	return (EINVAL);
	return in_lifaddr_ioctl(so, cmd, data, ifp, td);
	}

	/*
	* Find address for this interface, if it exists.
	*
	* If an alias address was specified, find that one instead of
	* the first one on the interface, if possible.
	*/
	if (ifp != NULL) {
	dst = ((struct sockaddr_in *)&ifr->ifr_addr)->sin_addr;
	LIST_FOREACH(iap, INADDR_HASH(dst.s_addr), ia_hash)
	if (iap->ia_ifp == ifp &&
	iap->ia_addr.sin_addr.s_addr == dst.s_addr) {
	ia = iap;
	break;
	}
	if (ia == NULL)
	TAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) {
	iap = ifatoia(ifa);
	if (iap->ia_addr.sin_family == AF_INET) {
	ia = iap;
	break;
	}
	}
	if (ia == NULL)
	iaIsFirst = 1;
	}

	switch (cmd) {

	case SIOCAIFADDR:
	case SIOCDIFADDR:
	if (ifp == NULL)
	return (EADDRNOTAVAIL);
	if (ifra->ifra_addr.sin_family == AF_INET) {
	for (oia = ia; ia; ia = TAILQ_NEXT(ia, ia_link)) {
	if (ia->ia_ifp == ifp &&
	ia->ia_addr.sin_addr.s_addr ==
	ifra->ifra_addr.sin_addr.s_addr)
	break;
	}
	if ((ifp->if_flags & IFF_POINTOPOINT)
	&& (cmd == SIOCAIFADDR)
	&& (ifra->ifra_dstaddr.sin_addr.s_addr
	== INADDR_ANY)) {
	return (EDESTADDRREQ);
	}
	}
	if (cmd == SIOCDIFADDR && ia == NULL)
	return (EADDRNOTAVAIL);
	/* FALLTHROUGH */
	case SIOCSIFADDR:
	case SIOCSIFNETMASK:
	case SIOCSIFDSTADDR:
	if (td != NULL) {
	error = priv_check(td, (cmd == SIOCDIFADDR) ?
	PRIV_NET_DELIFADDR : PRIV_NET_ADDIFADDR);
	if (error)
	return (error);
	}

	if (ifp == NULL)
	return (EADDRNOTAVAIL);
	if (ia == NULL) {
	ia = (struct in_ifaddr *)
	malloc(sizeof *ia, M_IFADDR, M_WAITOK \| M_ZERO);
	if (ia == NULL)
	return (ENOBUFS);
	/*
	* Protect from ipintr() traversing address list
	* while we're modifying it.
	*/
	s = splnet();
	ifa = &ia->ia_ifa;
	IFA_LOCK_INIT(ifa);
	ifa->ifa_addr = (struct sockaddr *)&ia->ia_addr;
	ifa->ifa_dstaddr = (struct sockaddr *)&ia->ia_dstaddr;
	ifa->ifa_netmask = (struct sockaddr *)&ia->ia_sockmask;
	ifa->ifa_refcnt = 1;
	TAILQ_INSERT_TAIL(&ifp->if_addrhead, ifa, ifa_link);

	ia->ia_sockmask.sin_len = 8;
	ia->ia_sockmask.sin_family = AF_INET;
	if (ifp->if_flags & IFF_BROADCAST) {
	ia->ia_broadaddr.sin_len = sizeof(ia->ia_addr);
	ia->ia_broadaddr.sin_family = AF_INET;
	}
	ia->ia_ifp = ifp;

	TAILQ_INSERT_TAIL(&V_in_ifaddrhead, ia, ia_link);
	splx(s);
	iaIsNew = 1;
	}
	break;

	case SIOCSIFBRDADDR:
	if (td != NULL) {
	error = priv_check(td, PRIV_NET_ADDIFADDR);
	if (error)
	return (error);
	}
	/* FALLTHROUGH */

	case SIOCGIFADDR:
	case SIOCGIFNETMASK:
	case SIOCGIFDSTADDR:
	case SIOCGIFBRDADDR:
	if (ia == NULL)
	return (EADDRNOTAVAIL);
	break;
	}
	switch (cmd) {

	case SIOCGIFADDR:
	((struct sockaddr_in )&ifr->ifr_addr) = ia->ia_addr;
	return (0);

	case SIOCGIFBRDADDR:
	if ((ifp->if_flags & IFF_BROADCAST) == 0)
	return (EINVAL);
	((struct sockaddr_in )&ifr->ifr_dstaddr) = ia->ia_broadaddr;
	return (0);

	case SIOCGIFDSTADDR:
	if ((ifp->if_flags & IFF_POINTOPOINT) == 0)
	return (EINVAL);
	((struct sockaddr_in )&ifr->ifr_dstaddr) = ia->ia_dstaddr;
	return (0);

	case SIOCGIFNETMASK:
	((struct sockaddr_in )&ifr->ifr_addr) = ia->ia_sockmask;
	return (0);

	case SIOCSIFDSTADDR:
	if ((ifp->if_flags & IFF_POINTOPOINT) == 0)
	return (EINVAL);
	oldaddr = ia->ia_dstaddr;
	ia->ia_dstaddr = (struct sockaddr_in )&ifr->ifr_dstaddr;
	if (ifp->if_ioctl != NULL) {
	IFF_LOCKGIANT(ifp);
	error = (*ifp->if_ioctl)(ifp, SIOCSIFDSTADDR,
	(caddr_t)ia);
	IFF_UNLOCKGIANT(ifp);
	if (error) {
	ia->ia_dstaddr = oldaddr;
	return (error);
	}
	}
	if (ia->ia_flags & IFA_ROUTE) {
	ia->ia_ifa.ifa_dstaddr = (struct sockaddr *)&oldaddr;
	rtinit(&(ia->ia_ifa), (int)RTM_DELETE, RTF_HOST);
	ia->ia_ifa.ifa_dstaddr =
	(struct sockaddr *)&ia->ia_dstaddr;
	rtinit(&(ia->ia_ifa), (int)RTM_ADD, RTF_HOST\|RTF_UP);
	}
	return (0);

	case SIOCSIFBRDADDR:
	if ((ifp->if_flags & IFF_BROADCAST) == 0)
	return (EINVAL);
	ia->ia_broadaddr = (struct sockaddr_in )&ifr->ifr_broadaddr;
	return (0);

	case SIOCSIFADDR:
	error = in_ifinit(ifp, ia,
	(struct sockaddr_in *) &ifr->ifr_addr, 1);
	if (error != 0 && iaIsNew)
	break;
	if (error == 0) {
	if (iaIsFirst && (ifp->if_flags & IFF_MULTICAST) != 0)
	in_addmulti(&allhosts_addr, ifp);
	EVENTHANDLER_INVOKE(ifaddr_event, ifp);
	}
	return (0);

	case SIOCSIFNETMASK:
	ia->ia_sockmask.sin_addr = ifra->ifra_addr.sin_addr;
	ia->ia_subnetmask = ntohl(ia->ia_sockmask.sin_addr.s_addr);
	return (0);

	case SIOCAIFADDR:
	maskIsNew = 0;
	hostIsNew = 1;
	error = 0;
	if (ia->ia_addr.sin_family == AF_INET) {
	if (ifra->ifra_addr.sin_len == 0) {
	ifra->ifra_addr = ia->ia_addr;
	hostIsNew = 0;
	} else if (ifra->ifra_addr.sin_addr.s_addr ==
	ia->ia_addr.sin_addr.s_addr)
	hostIsNew = 0;
	}
	if (ifra->ifra_mask.sin_len) {
	in_ifscrub(ifp, ia);
	ia->ia_sockmask = ifra->ifra_mask;
	ia->ia_sockmask.sin_family = AF_INET;
	ia->ia_subnetmask =
	ntohl(ia->ia_sockmask.sin_addr.s_addr);
	maskIsNew = 1;
	}
	if ((ifp->if_flags & IFF_POINTOPOINT) &&
	(ifra->ifra_dstaddr.sin_family == AF_INET)) {
	in_ifscrub(ifp, ia);
	ia->ia_dstaddr = ifra->ifra_dstaddr;
	maskIsNew = 1; /* We lie; but the effect's the same */
	}
	if (ifra->ifra_addr.sin_family == AF_INET &&
	(hostIsNew \|\| maskIsNew))
	error = in_ifinit(ifp, ia, &ifra->ifra_addr, 0);
	if (error != 0 && iaIsNew)
	break;

	if ((ifp->if_flags & IFF_BROADCAST) &&
	(ifra->ifra_broadaddr.sin_family == AF_INET))
	ia->ia_broadaddr = ifra->ifra_broadaddr;
	if (error == 0) {
	if (iaIsFirst && (ifp->if_flags & IFF_MULTICAST) != 0)
	in_addmulti(&allhosts_addr, ifp);
	EVENTHANDLER_INVOKE(ifaddr_event, ifp);
	}
	return (error);

	case SIOCDIFADDR:
	/*
	* in_ifscrub kills the interface route.
	*/
	in_ifscrub(ifp, ia);
	/*
	* in_ifadown gets rid of all the rest of
	* the routes. This is not quite the right
	* thing to do, but at least if we are running
	* a routing process they will come back.
	*/
	in_ifadown(&ia->ia_ifa, 1);
	EVENTHANDLER_INVOKE(ifaddr_event, ifp);
	error = 0;
	break;

	default:
	if (ifp == NULL \|\| ifp->if_ioctl == NULL)
	return (EOPNOTSUPP);
	IFF_LOCKGIANT(ifp);
	error = (*ifp->if_ioctl)(ifp, cmd, data);
	IFF_UNLOCKGIANT(ifp);
	return (error);
	}

	/*
	* Protect from ipintr() traversing address list while we're modifying
	* it.
	*/
	s = splnet();
	TAILQ_REMOVE(&ifp->if_addrhead, &ia->ia_ifa, ifa_link);
	TAILQ_REMOVE(&V_in_ifaddrhead, ia, ia_link);
	if (ia->ia_addr.sin_family == AF_INET) {
	LIST_REMOVE(ia, ia_hash);
	/*
	* If this is the last IPv4 address configured on this
	* interface, leave the all-hosts group.
	* XXX: This is quite ugly because of locking and structure.
	*/
	oia = NULL;
	IFP_TO_IA(ifp, oia);
	if (oia == NULL) {
	struct in_multi *inm;

	IFF_LOCKGIANT(ifp);
	IN_MULTI_LOCK();
	IN_LOOKUP_MULTI(allhosts_addr, ifp, inm);
	if (inm != NULL)
	in_delmulti_locked(inm);
	IN_MULTI_UNLOCK();
	IFF_UNLOCKGIANT(ifp);
	}
	}
	IFAFREE(&ia->ia_ifa);
	splx(s);

	return (error);
	}

	/*
	* SIOC[GAD]LIFADDR.
	* SIOCGLIFADDR: get first address. (?!?)
	* SIOCGLIFADDR with IFLR_PREFIX:
	* get first address that matches the specified prefix.
	* SIOCALIFADDR: add the specified address.
	* SIOCALIFADDR with IFLR_PREFIX:
	* EINVAL since we can't deduce hostid part of the address.
	* SIOCDLIFADDR: delete the specified address.
	* SIOCDLIFADDR with IFLR_PREFIX:
	* delete the first address that matches the specified prefix.
	* return values:
	* EINVAL on invalid parameters
	* EADDRNOTAVAIL on prefix match failed/specified address not found
	* other values may be returned from in_ioctl()
	*/
	static int
	in_lifaddr_ioctl(struct socket *so, u_long cmd, caddr_t data,
	struct ifnet ifp, struct thread td)
	{
	struct if_laddrreq iflr = (struct if_laddrreq )data;
	struct ifaddr *ifa;

	/* sanity checks */
	if (data == NULL \|\| ifp == NULL) {
	panic("invalid argument to in_lifaddr_ioctl");
	/NOTRECHED/
	}

	switch (cmd) {
	case SIOCGLIFADDR:
	/* address must be specified on GET with IFLR_PREFIX */
	if ((iflr->flags & IFLR_PREFIX) == 0)
	break;
	/FALLTHROUGH/
	case SIOCALIFADDR:
	case SIOCDLIFADDR:
	/* address must be specified on ADD and DELETE */
	if (iflr->addr.ss_family != AF_INET)
	return (EINVAL);
	if (iflr->addr.ss_len != sizeof(struct sockaddr_in))
	return (EINVAL);
	/* XXX need improvement */
	if (iflr->dstaddr.ss_family
	&& iflr->dstaddr.ss_family != AF_INET)
	return (EINVAL);
	if (iflr->dstaddr.ss_family
	&& iflr->dstaddr.ss_len != sizeof(struct sockaddr_in))
	return (EINVAL);
	break;
	default: /shouldn't happen/
	return (EOPNOTSUPP);
	}
	if (sizeof(struct in_addr) * 8 < iflr->prefixlen)
	return (EINVAL);

	switch (cmd) {
	case SIOCALIFADDR:
	{
	struct in_aliasreq ifra;

	if (iflr->flags & IFLR_PREFIX)
	return (EINVAL);

	/* copy args to in_aliasreq, perform ioctl(SIOCAIFADDR_IN6). */
	bzero(&ifra, sizeof(ifra));
	bcopy(iflr->iflr_name, ifra.ifra_name,
	sizeof(ifra.ifra_name));

	bcopy(&iflr->addr, &ifra.ifra_addr, iflr->addr.ss_len);

	if (iflr->dstaddr.ss_family) { /XXX/
	bcopy(&iflr->dstaddr, &ifra.ifra_dstaddr,
	iflr->dstaddr.ss_len);
	}

	ifra.ifra_mask.sin_family = AF_INET;
	ifra.ifra_mask.sin_len = sizeof(struct sockaddr_in);
	in_len2mask(&ifra.ifra_mask.sin_addr, iflr->prefixlen);

	return (in_control(so, SIOCAIFADDR, (caddr_t)&ifra, ifp, td));
	}
	case SIOCGLIFADDR:
	case SIOCDLIFADDR:
	{
	struct in_ifaddr *ia;
	struct in_addr mask, candidate, match;
	struct sockaddr_in *sin;

	bzero(&mask, sizeof(mask));
	bzero(&match, sizeof(match));
	if (iflr->flags & IFLR_PREFIX) {
	/* lookup a prefix rather than address. */
	in_len2mask(&mask, iflr->prefixlen);

	sin = (struct sockaddr_in *)&iflr->addr;
	match.s_addr = sin->sin_addr.s_addr;
	match.s_addr &= mask.s_addr;

	/* if you set extra bits, that's wrong */
	if (match.s_addr != sin->sin_addr.s_addr)
	return (EINVAL);

	} else {
	/* on getting an address, take the 1st match */
	/* on deleting an address, do exact match */
	if (cmd != SIOCGLIFADDR) {
	in_len2mask(&mask, 32);
	sin = (struct sockaddr_in *)&iflr->addr;
	match.s_addr = sin->sin_addr.s_addr;
	}
	}

	TAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) {
	if (ifa->ifa_addr->sa_family != AF_INET6)
	continue;
	if (match.s_addr == 0)
	break;
	candidate.s_addr = ((struct sockaddr_in *)&ifa->ifa_addr)->sin_addr.s_addr;
	candidate.s_addr &= mask.s_addr;
	if (candidate.s_addr == match.s_addr)
	break;
	}
	if (ifa == NULL)
	return (EADDRNOTAVAIL);
	ia = (struct in_ifaddr *)ifa;

	if (cmd == SIOCGLIFADDR) {
	/* fill in the if_laddrreq structure */
	bcopy(&ia->ia_addr, &iflr->addr, ia->ia_addr.sin_len);

	if ((ifp->if_flags & IFF_POINTOPOINT) != 0) {
	bcopy(&ia->ia_dstaddr, &iflr->dstaddr,
	ia->ia_dstaddr.sin_len);
	} else
	bzero(&iflr->dstaddr, sizeof(iflr->dstaddr));

	iflr->prefixlen =
	in_mask2len(&ia->ia_sockmask.sin_addr);

	iflr->flags = 0; /XXX/

	return (0);
	} else {
	struct in_aliasreq ifra;

	/* fill in_aliasreq and do ioctl(SIOCDIFADDR_IN6) */
	bzero(&ifra, sizeof(ifra));
	bcopy(iflr->iflr_name, ifra.ifra_name,
	sizeof(ifra.ifra_name));

	bcopy(&ia->ia_addr, &ifra.ifra_addr,
	ia->ia_addr.sin_len);
	if ((ifp->if_flags & IFF_POINTOPOINT) != 0) {
	bcopy(&ia->ia_dstaddr, &ifra.ifra_dstaddr,
	ia->ia_dstaddr.sin_len);
	}
	bcopy(&ia->ia_sockmask, &ifra.ifra_dstaddr,
	ia->ia_sockmask.sin_len);

	return (in_control(so, SIOCDIFADDR, (caddr_t)&ifra,
	ifp, td));
	}
	}
	}

	return (EOPNOTSUPP); /just for safety/
	}

	/*
	* Delete any existing route for an interface.
	*/
	void
	in_ifscrub(struct ifnet ifp, struct in_ifaddr ia)
	{

	in_scrubprefix(ia);
	}

	/*
	* Initialize an interface's internet address
	* and routing table entry.
	*/
	static int
	in_ifinit(struct ifnet ifp, struct in_ifaddr ia, struct sockaddr_in *sin,
	int scrub)
	{
	INIT_VNET_INET(ifp->if_vnet);
	register u_long i = ntohl(sin->sin_addr.s_addr);
	struct sockaddr_in oldaddr;
	int s = splimp(), flags = RTF_UP, error = 0;

	oldaddr = ia->ia_addr;
	if (oldaddr.sin_family == AF_INET)
	LIST_REMOVE(ia, ia_hash);
	ia->ia_addr = *sin;
	if (ia->ia_addr.sin_family == AF_INET)
	LIST_INSERT_HEAD(INADDR_HASH(ia->ia_addr.sin_addr.s_addr),
	ia, ia_hash);
	/*
	* Give the interface a chance to initialize
	* if this is its first address,
	* and to validate the address if necessary.
	*/
	if (ifp->if_ioctl != NULL) {
	IFF_LOCKGIANT(ifp);
	error = (*ifp->if_ioctl)(ifp, SIOCSIFADDR, (caddr_t)ia);
	IFF_UNLOCKGIANT(ifp);
	if (error) {
	splx(s);
	/* LIST_REMOVE(ia, ia_hash) is done in in_control */
	ia->ia_addr = oldaddr;
	if (ia->ia_addr.sin_family == AF_INET)
	LIST_INSERT_HEAD(INADDR_HASH(
	ia->ia_addr.sin_addr.s_addr), ia, ia_hash);
	else
	/*
	* If oldaddr family is not AF_INET (e.g.
	* interface has been just created) in_control
	* does not call LIST_REMOVE, and we end up
	* with bogus ia entries in hash
	*/
	LIST_REMOVE(ia, ia_hash);
	return (error);
	}
	}
	splx(s);
	if (scrub) {
	ia->ia_ifa.ifa_addr = (struct sockaddr *)&oldaddr;
	in_ifscrub(ifp, ia);
	ia->ia_ifa.ifa_addr = (struct sockaddr *)&ia->ia_addr;
	}
	if (IN_CLASSA(i))
	ia->ia_netmask = IN_CLASSA_NET;
	else if (IN_CLASSB(i))
	ia->ia_netmask = IN_CLASSB_NET;
	else
	ia->ia_netmask = IN_CLASSC_NET;
	/*
	* The subnet mask usually includes at least the standard network part,
	* but may may be smaller in the case of supernetting.
	* If it is set, we believe it.
	*/
	if (ia->ia_subnetmask == 0) {
	ia->ia_subnetmask = ia->ia_netmask;
	ia->ia_sockmask.sin_addr.s_addr = htonl(ia->ia_subnetmask);
	} else
	ia->ia_netmask &= ia->ia_subnetmask;
	ia->ia_net = i & ia->ia_netmask;
	ia->ia_subnet = i & ia->ia_subnetmask;
	in_socktrim(&ia->ia_sockmask);
	#ifdef DEV_CARP
	/*
	* XXX: carp(4) does not have interface route
	*/
	if (ifp->if_type == IFT_CARP)
	return (0);
	#endif
	/*
	* Add route for the network.
	*/
	ia->ia_ifa.ifa_metric = ifp->if_metric;
	if (ifp->if_flags & IFF_BROADCAST) {
	ia->ia_broadaddr.sin_addr.s_addr =
	htonl(ia->ia_subnet \| ~ia->ia_subnetmask);
	ia->ia_netbroadcast.s_addr =
	htonl(ia->ia_net \| ~ ia->ia_netmask);
	} else if (ifp->if_flags & IFF_LOOPBACK) {
	ia->ia_dstaddr = ia->ia_addr;
	flags \|= RTF_HOST;
	} else if (ifp->if_flags & IFF_POINTOPOINT) {
	if (ia->ia_dstaddr.sin_family != AF_INET)
	return (0);
	flags \|= RTF_HOST;
	}
	if ((error = in_addprefix(ia, flags)) != 0)
	return (error);

	return (error);
	}

	#define rtinitflags(x) \
	((((x)->ia_ifp->if_flags & (IFF_LOOPBACK \| IFF_POINTOPOINT)) != 0) \
	? RTF_HOST : 0)
	/*
	* Check if we have a route for the given prefix already or add one accordingly.
	*/
	static int
	in_addprefix(struct in_ifaddr *target, int flags)
	{
	INIT_VNET_INET(curvnet);
	struct in_ifaddr *ia;
	struct in_addr prefix, mask, p, m;
	int error;

	if ((flags & RTF_HOST) != 0) {
	prefix = target->ia_dstaddr.sin_addr;
	mask.s_addr = 0;
	} else {
	prefix = target->ia_addr.sin_addr;
	mask = target->ia_sockmask.sin_addr;
	prefix.s_addr &= mask.s_addr;
	}

	TAILQ_FOREACH(ia, &V_in_ifaddrhead, ia_link) {
	if (rtinitflags(ia)) {
	p = ia->ia_addr.sin_addr;

	if (prefix.s_addr != p.s_addr)
	continue;
	} else {
	p = ia->ia_addr.sin_addr;
	m = ia->ia_sockmask.sin_addr;
	p.s_addr &= m.s_addr;

	if (prefix.s_addr != p.s_addr \|\|
	mask.s_addr != m.s_addr)
	continue;
	}

	/*
	* If we got a matching prefix route inserted by other
	* interface address, we are done here.
	*/
	if (ia->ia_flags & IFA_ROUTE) {
	if (V_sameprefixcarponly &&
	target->ia_ifp->if_type != IFT_CARP &&
	ia->ia_ifp->if_type != IFT_CARP)
	return (EEXIST);
	else
	return (0);
	}
	}

	/*
	* No-one seem to have this prefix route, so we try to insert it.
	*/
	error = rtinit(&target->ia_ifa, (int)RTM_ADD, flags);
	if (!error)
	target->ia_flags \|= IFA_ROUTE;
	return (error);
	}

	extern void arp_ifscrub(struct ifnet *ifp, uint32_t addr);

	/*
	* If there is no other address in the system that can serve a route to the
	* same prefix, remove the route. Hand over the route to the new address
	* otherwise.
	*/
	static int
	in_scrubprefix(struct in_ifaddr *target)
	{
	INIT_VNET_INET(curvnet);
	struct in_ifaddr *ia;
	struct in_addr prefix, mask, p;
	int error;

	if ((target->ia_flags & IFA_ROUTE) == 0)
	return (0);

	if (rtinitflags(target))
	prefix = target->ia_dstaddr.sin_addr;
	else {
	prefix = target->ia_addr.sin_addr;
	mask = target->ia_sockmask.sin_addr;
	prefix.s_addr &= mask.s_addr;
	/* remove arp cache */
	arp_ifscrub(target->ia_ifp, IA_SIN(target)->sin_addr.s_addr);
	}

	TAILQ_FOREACH(ia, &V_in_ifaddrhead, ia_link) {
	if (rtinitflags(ia))
	p = ia->ia_dstaddr.sin_addr;
	else {
	p = ia->ia_addr.sin_addr;
	p.s_addr &= ia->ia_sockmask.sin_addr.s_addr;
	}

	if (prefix.s_addr != p.s_addr)
	continue;

	/*
	* If we got a matching prefix address, move IFA_ROUTE and
	* the route itself to it. Make sure that routing daemons
	* get a heads-up.
	*
	* XXX: a special case for carp(4) interface
	*/
	if ((ia->ia_flags & IFA_ROUTE) == 0
	#ifdef DEV_CARP
	&& (ia->ia_ifp->if_type != IFT_CARP)
	#endif
	) {
	rtinit(&(target->ia_ifa), (int)RTM_DELETE,
	rtinitflags(target));
	target->ia_flags &= ~IFA_ROUTE;

	error = rtinit(&ia->ia_ifa, (int)RTM_ADD,
	rtinitflags(ia) \| RTF_UP);
	if (error == 0)
	ia->ia_flags \|= IFA_ROUTE;
	return (error);
	}
	}

	/*
	* As no-one seem to have this prefix, we can remove the route.
	*/
	rtinit(&(target->ia_ifa), (int)RTM_DELETE, rtinitflags(target));
	target->ia_flags &= ~IFA_ROUTE;
	return (0);
	}

	#undef rtinitflags

	/*
	* Return 1 if the address might be a local broadcast address.
	*/
	int
	in_broadcast(struct in_addr in, struct ifnet *ifp)
	{
	register struct ifaddr *ifa;
	u_long t;

	if (in.s_addr == INADDR_BROADCAST \|\|
	in.s_addr == INADDR_ANY)
	return (1);
	if ((ifp->if_flags & IFF_BROADCAST) == 0)
	return (0);
	t = ntohl(in.s_addr);
	/*
	* Look through the list of addresses for a match
	* with a broadcast address.
	*/
	#define ia ((struct in_ifaddr *)ifa)
	TAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link)
	if (ifa->ifa_addr->sa_family == AF_INET &&
	(in.s_addr == ia->ia_broadaddr.sin_addr.s_addr \|\|
	in.s_addr == ia->ia_netbroadcast.s_addr \|\|
	/*
	* Check for old-style (host 0) broadcast.
	*/
	t == ia->ia_subnet \|\| t == ia->ia_net) &&
	/*
	* Check for an all one subnetmask. These
	* only exist when an interface gets a secondary
	* address.
	*/
	ia->ia_subnetmask != (u_long)0xffffffff)
	return (1);
	return (0);
	#undef ia
	}

	/*
	* Delete all IPv4 multicast address records, and associated link-layer
	* multicast address records, associated with ifp.
	*/
	static void
	in_purgemaddrs(struct ifnet *ifp)
	{
	INIT_VNET_INET(ifp->if_vnet);
	struct in_multi *inm;
	struct in_multi *oinm;

	#ifdef DIAGNOSTIC
	printf("%s: purging ifp %p\n", __func__, ifp);
	#endif
	IFF_LOCKGIANT(ifp);
	IN_MULTI_LOCK();
	LIST_FOREACH_SAFE(inm, &V_in_multihead, inm_link, oinm) {
	if (inm->inm_ifp == ifp)
	in_delmulti_locked(inm);
	}
	IN_MULTI_UNLOCK();
	IFF_UNLOCKGIANT(ifp);
	}

	/*
	* On interface removal, clean up IPv4 data structures hung off of the ifnet.
	*/
	void
	in_ifdetach(struct ifnet *ifp)
	{
	INIT_VNET_INET(ifp->if_vnet);

	in_pcbpurgeif0(&V_ripcbinfo, ifp);
	in_pcbpurgeif0(&V_udbinfo, ifp);
	in_purgemaddrs(ifp);
	}

	#include <sys/syslog.h>
	#include <net/if_dl.h>
	#include <netinet/if_ether.h>

	struct in_llentry {
	struct llentry base;
	struct sockaddr_in l3_addr4;
	};

	static struct llentry *
	in_lltable_new(const struct sockaddr *l3addr, u_int flags)
	{
	struct in_llentry *lle;

	lle = malloc(sizeof(struct in_llentry), M_LLTABLE, M_DONTWAIT \| M_ZERO);
	if (lle == NULL) /* NB: caller generates msg */
	return NULL;

	callout_init(&lle->base.la_timer, CALLOUT_MPSAFE);
	/* qing
	* For IPv4 this will trigger "arpresolve" to generate
	* an ARP request
	*/
	lle->base.la_expire = time_second; /* mark expired */
	lle->l3_addr4 = (const struct sockaddr_in )l3addr;
	-
	+ lle->base.lle_refcnt = 1;
	+ LLE_LOCK_INIT(&lle->base);
	return &lle->base;
	}

	/*
	* Deletes an address from the address table.
	* This function is called by the timer functions
	* such as arptimer() and nd6_llinfo_timer(), and
	* the caller does the locking.
	*/
	static void
	in_lltable_free(struct lltable llt, struct llentry lle)
	{
	free(lle, M_LLTABLE);
	}

	static int
	in_lltable_rtcheck(struct ifnet ifp, const struct sockaddr l3addr)
	{
	struct rtentry *rt;

	KASSERT(l3addr->sa_family == AF_INET,
	("sin_family %d", l3addr->sa_family));

	/* XXX rtalloc1 should take a const param */
	rt = rtalloc1(__DECONST(struct sockaddr *, l3addr), 0, 0);
	if (rt == NULL \|\| (rt->rt_flags & RTF_GATEWAY) \|\| rt->rt_ifp != ifp) {
	log(LOG_INFO, "IPv4 address: \"%s\" is not on the network\n",
	inet_ntoa(((const struct sockaddr_in *)l3addr)->sin_addr));
	if (rt != NULL)
	- rtfree(rt);
	- return EINVAL;
	+ RTFREE_LOCKED(rt);
	+ return (EINVAL);
	}
	- rtfree(rt);
	+ RTFREE_LOCKED(rt);
	return 0;
	}

	+/*
	+ * Returns NULL if not found or marked for deletion
	+ * if found returns lle read locked
	+ *
	+ */
	static struct llentry *
	in_lltable_lookup(struct lltable llt, u_int flags, const struct sockaddr l3addr)
	{
	const struct sockaddr_in sin = (const struct sockaddr_in )l3addr;
	struct ifnet *ifp = llt->llt_ifp;
	struct llentry *lle;
	struct llentries *lleh;
	u_int hashkey;

	KASSERT(l3addr->sa_family == AF_INET,
	("sin_family %d", l3addr->sa_family));

	hashkey = sin->sin_addr.s_addr;
	lleh = &llt->lle_head[LLATBL_HASH(hashkey, LLTBL_HASHMASK)];
	LIST_FOREACH(lle, lleh, lle_next) {
	if (lle->la_flags & LLE_DELETED)
	continue;
	if (bcmp(L3_ADDR(lle), l3addr, sizeof(struct sockaddr_in)) == 0)
	break;
	}
	-
	if (lle == NULL) {
	+#ifdef INVARIANTS
	+ if (flags & LLE_DELETE)
	+ log(LOG_INFO, "interface address is missing from cache = %p in delete\n", lle);
	+#endif
	if (!(flags & LLE_CREATE))
	return (NULL);
	/*
	* A route that covers the given address must have
	* been installed 1st because we are doing a resolution,
	* verify this.
	*/
	if (!(flags & LLE_IFADDR) &&
	in_lltable_rtcheck(ifp, l3addr) != 0)
	- return NULL;
	+ goto done;

	lle = in_lltable_new(l3addr, flags);
	if (lle == NULL) {
	log(LOG_INFO, "lla_lookup: new lle malloc failed\n");
	- return NULL;
	+ goto done;
	}
	lle->la_flags = flags & ~LLE_CREATE;
	if ((flags & (LLE_CREATE \| LLE_IFADDR)) == (LLE_CREATE \| LLE_IFADDR)) {
	bcopy(IF_LLADDR(ifp), &lle->ll_addr, ifp->if_addrlen);
	lle->la_flags \|= (LLE_VALID \| LLE_STATIC);
	}

	lle->lle_tbl = llt;
	lle->lle_head = lleh;
	LIST_INSERT_HEAD(lleh, lle, lle_next);
	- } else {
	- if (flags & LLE_DELETE)
	- lle->la_flags = LLE_DELETED;
	+ } else if (flags & LLE_DELETE) {
	+ LLE_WLOCK(lle);
	+ lle->la_flags = LLE_DELETED;
	+ LLE_WUNLOCK(lle);
	+#ifdef INVARIANTS
	+ log(LOG_INFO, "ifaddr cache = %p is deleted\n", lle);
	+#endif
	+ lle = NULL;
	}
	- return lle;
	+ if (lle) {
	+ if (flags & LLE_EXCLUSIVE)
	+ LLE_WLOCK(lle);
	+ else
	+ LLE_RLOCK(lle);
	+ }
	+done:
	+ return (lle);
	}

	static int
	in_lltable_dump(struct lltable llt, struct sysctl_req wr)
	{
	#define SIN(lle) ((struct sockaddr_in *) L3_ADDR(lle))
	struct ifnet *ifp = llt->llt_ifp;
	struct llentry *lle;
	/* XXX stack use */
	struct {
	struct rt_msghdr rtm;
	struct sockaddr_inarp sin;
	struct sockaddr_dl sdl;
	} arpc;
	int error, i;

	/* XXXXX
	* current IFNET_RLOCK() is mapped to IFNET_WLOCK()
	* so it is okay to use this ASSERT, change it when
	* IFNET lock is finalized
	*/
	IFNET_WLOCK_ASSERT();

	error = 0;
	for (i = 0; i < LLTBL_HASHTBL_SIZE; i++) {
	LIST_FOREACH(lle, &llt->lle_head[i], lle_next) {
	/* skip deleted entries */
	if (lle->la_flags & LLE_DELETED)
	continue;
	/*
	* produce a msg made of:
	* struct rt_msghdr;
	* struct sockaddr_inarp; (IPv4)
	* struct sockaddr_dl;
	*/
	bzero(&arpc, sizeof(arpc));
	arpc.rtm.rtm_msglen = sizeof(arpc);

	arpc.sin.sin_family = AF_INET;
	arpc.sin.sin_len = sizeof(arpc.sin);
	arpc.sin.sin_addr.s_addr = SIN(lle)->sin_addr.s_addr;

	/* publish */
	if (lle->la_flags & LLE_PUB) {
	arpc.rtm.rtm_flags \|= RTF_ANNOUNCE;
	/* proxy only */
	if (lle->la_flags & LLE_PROXY)
	arpc.sin.sin_other = SIN_PROXY;
	}

	if (lle->la_flags & LLE_VALID) { /* valid MAC */
	struct sockaddr_dl *sdl = &arpc.sdl;

	sdl->sdl_family = AF_LINK;
	sdl->sdl_len = sizeof(*sdl);
	sdl->sdl_alen = ifp->if_addrlen;
	sdl->sdl_index = ifp->if_index;
	sdl->sdl_type = ifp->if_type;
	bcopy(&lle->ll_addr, LLADDR(sdl), ifp->if_addrlen);
	}
	arpc.rtm.rtm_rmx.rmx_expire =
	lle->la_flags & LLE_STATIC ? 0 : lle->la_expire;
	arpc.rtm.rtm_flags \|= RTF_LLINFO \| RTF_HOST;
	if (lle->la_flags & LLE_STATIC)
	arpc.rtm.rtm_flags \|= RTF_STATIC;
	arpc.rtm.rtm_index = ifp->if_index;
	error = SYSCTL_OUT(wr, &arpc, sizeof(arpc));
	if (error)
	break;
	}
	}
	return error;
	#undef SIN
	}

	void *
	in_domifattach(struct ifnet *ifp)
	{
	struct lltable *llt = lltable_init(ifp, AF_INET);

	if (llt != NULL) {
	llt->llt_new = in_lltable_new;
	llt->llt_free = in_lltable_free;
	llt->llt_rtcheck = in_lltable_rtcheck;
	llt->llt_lookup = in_lltable_lookup;
	llt->llt_dump = in_lltable_dump;
	}
	return (llt);
	}

	void
	in_domifdetach(struct ifnet ifp __unused, void aux)
	{
	struct lltable llt = (struct lltable )aux;

	lltable_free(llt);
	}
	Index: projects/arpv2_merge_1/sys/netinet/ip_output.c
	===================================================================
	--- projects/arpv2_merge_1/sys/netinet/ip_output.c (revision 185838)
	+++ projects/arpv2_merge_1/sys/netinet/ip_output.c (revision 185839)
	@@ -1,1202 +1,1197 @@
	/*-
	* Copyright (c) 1982, 1986, 1988, 1990, 1993
	* The Regents of the University of California. All rights reserved.
	*
	* Redistribution and use in source and binary forms, with or without
	* modification, are permitted provided that the following conditions
	* are met:
	* 1. Redistributions of source code must retain the above copyright
	* notice, this list of conditions and the following disclaimer.
	* 2. Redistributions in binary form must reproduce the above copyright
	* notice, this list of conditions and the following disclaimer in the
	* documentation and/or other materials provided with the distribution.
	* 4. Neither the name of the University nor the names of its contributors
	* may be used to endorse or promote products derived from this software
	* without specific prior written permission.
	*
	* THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
	* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
	* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
	* ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
	* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
	* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
	* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
	* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
	* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
	* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
	* SUCH DAMAGE.
	*
	* @(#)ip_output.c 8.3 (Berkeley) 1/21/94
	*/

	#include <sys/cdefs.h>
	__FBSDID("$FreeBSD$");

	#include "opt_ipfw.h"
	#include "opt_ipsec.h"
	#include "opt_mac.h"
	#include "opt_mbuf_stress_test.h"
	#include "opt_mpath.h"

	#include <sys/param.h>
	#include <sys/systm.h>
	#include <sys/kernel.h>
	#include <sys/malloc.h>
	#include <sys/mbuf.h>
	#include <sys/priv.h>
	#include <sys/proc.h>
	#include <sys/protosw.h>
	#include <sys/socket.h>
	#include <sys/socketvar.h>
	#include <sys/sysctl.h>
	#include <sys/ucred.h>
	#include <sys/vimage.h>

	#include <net/if.h>
	#include <net/netisr.h>
	#include <net/pfil.h>
	#include <net/route.h>
	#ifdef RADIX_MPATH
	#include <net/radix_mpath.h>
	#endif
	#include <net/vnet.h>

	#include <netinet/in.h>
	#include <netinet/in_systm.h>
	#include <netinet/ip.h>
	#include <netinet/in_pcb.h>
	#include <netinet/in_var.h>
	#include <netinet/ip_var.h>
	#include <netinet/ip_options.h>
	#include <netinet/vinet.h>

	#ifdef IPSEC
	#include <netinet/ip_ipsec.h>
	#include <netipsec/ipsec.h>
	#endif /* IPSEC*/

	#include <machine/in_cksum.h>

	#include <security/mac/mac_framework.h>

	#define print_ip(x, a, y) printf("%s %d.%d.%d.%d%s",\
	x, (ntohl(a.s_addr)>>24)&0xFF,\
	(ntohl(a.s_addr)>>16)&0xFF,\
	(ntohl(a.s_addr)>>8)&0xFF,\
	(ntohl(a.s_addr))&0xFF, y);

	#ifdef VIMAGE_GLOBALS
	u_short ip_id;
	#endif

	#ifdef MBUF_STRESS_TEST
	int mbuf_frag_size = 0;
	SYSCTL_INT(_net_inet_ip, OID_AUTO, mbuf_frag_size, CTLFLAG_RW,
	&mbuf_frag_size, 0, "Fragment outgoing mbufs to this size");
	#endif

	static void ip_mloopback
	(struct ifnet , struct mbuf , struct sockaddr_in *, int);


	extern struct protosw inetsw[];

	/*
	* IP output. The packet in mbuf chain m contains a skeletal IP
	* header (with len, off, ttl, proto, tos, src, dst).
	* The mbuf chain containing the packet will be freed.
	* The mbuf opt, if present, will not be freed.
	* In the IP forwarding case, the packet will arrive with options already
	* inserted, so must have a NULL opt pointer.
	*/
	int
	ip_output(struct mbuf m, struct mbuf opt, struct route *ro, int flags,
	struct ip_moptions imo, struct inpcb inp)
	{
	INIT_VNET_NET(curvnet);
	INIT_VNET_INET(curvnet);
	struct ip *ip;
	struct ifnet ifp = NULL; / keep compiler happy */
	struct mbuf *m0;
	int hlen = sizeof (struct ip);
	int mtu;
	int len, error = 0;
	struct sockaddr_in dst = NULL; / keep compiler happy */
	struct in_ifaddr *ia = NULL;
	int isbroadcast, sw_csum;
	struct route iproute;
	struct in_addr odst;
	#ifdef IPFIREWALL_FORWARD
	struct m_tag *fwd_tag = NULL;
	#endif
	M_ASSERTPKTHDR(m);

	if (ro == NULL) {
	ro = &iproute;
	bzero(ro, sizeof (*ro));
	}

	if (inp != NULL) {
	M_SETFIB(m, inp->inp_inc.inc_fibnum);
	INP_LOCK_ASSERT(inp);
	}

	if (opt) {
	len = 0;
	m = ip_insertoptions(m, opt, &len);
	if (len != 0)
	hlen = len;
	}
	ip = mtod(m, struct ip *);

	/*
	* Fill in IP header. If we are not allowing fragmentation,
	* then the ip_id field is meaningless, but we don't set it
	* to zero. Doing so causes various problems when devices along
	* the path (routers, load balancers, firewalls, etc.) illegally
	* disable DF on our packet. Note that a 16-bit counter
	* will wrap around in less than 10 seconds at 100 Mbit/s on a
	* medium with MTU 1500. See Steven M. Bellovin, "A Technique
	* for Counting NATted Hosts", Proc. IMW'02, available at
	* <http://www.cs.columbia.edu/~smb/papers/fnat.pdf>.
	*/
	if ((flags & (IP_FORWARDING\|IP_RAWOUTPUT)) == 0) {
	ip->ip_v = IPVERSION;
	ip->ip_hl = hlen >> 2;
	ip->ip_id = ip_newid();
	V_ipstat.ips_localout++;
	} else {
	hlen = ip->ip_hl << 2;
	}

	dst = (struct sockaddr_in *)&ro->ro_dst;
	again:
	/*
	* If there is a cached route,
	* check that it is to the same destination
	* and is still up. If not, free it and try again.
	* The address family should also be checked in case of sharing the
	* cache with IPv6.
	*/
	if (ro->ro_rt && ((ro->ro_rt->rt_flags & RTF_UP) == 0 \|\|
	dst->sin_family != AF_INET \|\|
	dst->sin_addr.s_addr != ip->ip_dst.s_addr)) {
	RTFREE(ro->ro_rt);
	ro->ro_rt = (struct rtentry *)NULL;
	}
	#ifdef IPFIREWALL_FORWARD
	if (ro->ro_rt == NULL && fwd_tag == NULL) {
	#else
	if (ro->ro_rt == NULL) {
	#endif
	bzero(dst, sizeof(*dst));
	dst->sin_family = AF_INET;
	dst->sin_len = sizeof(*dst);
	dst->sin_addr = ip->ip_dst;
	}
	/*
	* If routing to interface only, short circuit routing lookup.
	* The use of an all-ones broadcast address implies this; an
	* interface is specified by the broadcast address of an interface,
	* or the destination address of a ptp interface.
	*/
	if (flags & IP_SENDONES) {
	if ((ia = ifatoia(ifa_ifwithbroadaddr(sintosa(dst)))) == NULL &&
	(ia = ifatoia(ifa_ifwithdstaddr(sintosa(dst)))) == NULL) {
	V_ipstat.ips_noroute++;
	error = ENETUNREACH;
	goto bad;
	}
	ip->ip_dst.s_addr = INADDR_BROADCAST;
	dst->sin_addr = ip->ip_dst;
	ifp = ia->ia_ifp;
	ip->ip_ttl = 1;
	isbroadcast = 1;
	} else if (flags & IP_ROUTETOIF) {
	if ((ia = ifatoia(ifa_ifwithdstaddr(sintosa(dst)))) == NULL &&
	(ia = ifatoia(ifa_ifwithnet(sintosa(dst)))) == NULL) {
	V_ipstat.ips_noroute++;
	error = ENETUNREACH;
	goto bad;
	}
	ifp = ia->ia_ifp;
	ip->ip_ttl = 1;
	isbroadcast = in_broadcast(dst->sin_addr, ifp);
	} else if (IN_MULTICAST(ntohl(ip->ip_dst.s_addr)) &&
	imo != NULL && imo->imo_multicast_ifp != NULL) {
	/*
	* Bypass the normal routing lookup for multicast
	* packets if the interface is specified.
	*/
	ifp = imo->imo_multicast_ifp;
	IFP_TO_IA(ifp, ia);
	isbroadcast = 0; /* fool gcc */
	} else {
	/*
	* We want to do any cloning requested by the link layer,
	* as this is probably required in all cases for correct
	* operation (as it is for ARP).
	*/
	if (ro->ro_rt == NULL)
	#ifdef RADIX_MPATH
	rtalloc_mpath_fib(ro,
	ntohl(ip->ip_src.s_addr ^ ip->ip_dst.s_addr),
	inp ? inp->inp_inc.inc_fibnum : M_GETFIB(m));
	#else
	in_rtalloc_ign(ro, 0,
	inp ? inp->inp_inc.inc_fibnum : M_GETFIB(m));
	#endif
	if (ro->ro_rt == NULL) {
	V_ipstat.ips_noroute++;
	error = EHOSTUNREACH;
	goto bad;
	}
	ia = ifatoia(ro->ro_rt->rt_ifa);
	ifp = ro->ro_rt->rt_ifp;
	ro->ro_rt->rt_rmx.rmx_pksent++;
	if (ro->ro_rt->rt_flags & RTF_GATEWAY)
	dst = (struct sockaddr_in *)ro->ro_rt->rt_gateway;
	if (ro->ro_rt->rt_flags & RTF_HOST)
	isbroadcast = (ro->ro_rt->rt_flags & RTF_BROADCAST);
	else
	isbroadcast = in_broadcast(dst->sin_addr, ifp);
	}
	/*
	* Calculate MTU. If we have a route that is up, use that,
	* otherwise use the interface's MTU.
	*/
	if (ro->ro_rt != NULL && (ro->ro_rt->rt_flags & (RTF_UP\|RTF_HOST))) {
	/*
	* This case can happen if the user changed the MTU
	* of an interface after enabling IP on it. Because
	* most netifs don't keep track of routes pointing to
	* them, there is no way for one to update all its
	* routes when the MTU is changed.
	*/
	if (ro->ro_rt->rt_rmx.rmx_mtu > ifp->if_mtu)
	ro->ro_rt->rt_rmx.rmx_mtu = ifp->if_mtu;
	mtu = ro->ro_rt->rt_rmx.rmx_mtu;
	} else {
	mtu = ifp->if_mtu;
	}
	if (IN_MULTICAST(ntohl(ip->ip_dst.s_addr))) {
	struct in_multi *inm;

	m->m_flags \|= M_MCAST;
	/*
	* IP destination address is multicast. Make sure "dst"
	* still points to the address in "ro". (It may have been
	* changed to point to a gateway address, above.)
	*/
	dst = (struct sockaddr_in *)&ro->ro_dst;
	/*
	* See if the caller provided any multicast options
	*/
	if (imo != NULL) {
	ip->ip_ttl = imo->imo_multicast_ttl;
	if (imo->imo_multicast_vif != -1)
	ip->ip_src.s_addr =
	ip_mcast_src ?
	ip_mcast_src(imo->imo_multicast_vif) :
	INADDR_ANY;
	} else
	ip->ip_ttl = IP_DEFAULT_MULTICAST_TTL;
	/*
	* Confirm that the outgoing interface supports multicast.
	*/
	if ((imo == NULL) \|\| (imo->imo_multicast_vif == -1)) {
	if ((ifp->if_flags & IFF_MULTICAST) == 0) {
	V_ipstat.ips_noroute++;
	error = ENETUNREACH;
	goto bad;
	}
	}
	/*
	* If source address not specified yet, use address
	* of outgoing interface.
	*/
	if (ip->ip_src.s_addr == INADDR_ANY) {
	/* Interface may have no addresses. */
	if (ia != NULL)
	ip->ip_src = IA_SIN(ia)->sin_addr;
	}

	IN_MULTI_LOCK();
	IN_LOOKUP_MULTI(ip->ip_dst, ifp, inm);
	if (inm != NULL &&
	(imo == NULL \|\| imo->imo_multicast_loop)) {
	IN_MULTI_UNLOCK();
	/*
	* If we belong to the destination multicast group
	* on the outgoing interface, and the caller did not
	* forbid loopback, loop back a copy.
	*/
	ip_mloopback(ifp, m, dst, hlen);
	}
	else {
	IN_MULTI_UNLOCK();
	/*
	* If we are acting as a multicast router, perform
	* multicast forwarding as if the packet had just
	* arrived on the interface to which we are about
	* to send. The multicast forwarding function
	* recursively calls this function, using the
	* IP_FORWARDING flag to prevent infinite recursion.
	*
	* Multicasts that are looped back by ip_mloopback(),
	* above, will be forwarded by the ip_input() routine,
	* if necessary.
	*/
	if (V_ip_mrouter && (flags & IP_FORWARDING) == 0) {
	/*
	* If rsvp daemon is not running, do not
	* set ip_moptions. This ensures that the packet
	* is multicast and not just sent down one link
	* as prescribed by rsvpd.
	*/
	if (!V_rsvp_on)
	imo = NULL;
	if (ip_mforward &&
	ip_mforward(ip, ifp, m, imo) != 0) {
	m_freem(m);
	goto done;
	}
	}
	}

	/*
	* Multicasts with a time-to-live of zero may be looped-
	* back, above, but must not be transmitted on a network.
	* Also, multicasts addressed to the loopback interface
	* are not sent -- the above call to ip_mloopback() will
	* loop back a copy if this host actually belongs to the
	* destination group on the loopback interface.
	*/
	if (ip->ip_ttl == 0 \|\| ifp->if_flags & IFF_LOOPBACK) {
	m_freem(m);
	goto done;
	}

	goto sendit;
	}

	/*
	* If the source address is not specified yet, use the address
	* of the outoing interface.
	*/
	if (ip->ip_src.s_addr == INADDR_ANY) {
	/* Interface may have no addresses. */
	if (ia != NULL) {
	ip->ip_src = IA_SIN(ia)->sin_addr;
	}
	}

	/*
	* Verify that we have any chance at all of being able to queue the
	* packet or packet fragments, unless ALTQ is enabled on the given
	* interface in which case packetdrop should be done by queueing.
	*/
	#ifdef ALTQ
	if ((!ALTQ_IS_ENABLED(&ifp->if_snd)) &&
	((ifp->if_snd.ifq_len + ip->ip_len / mtu + 1) >=
	ifp->if_snd.ifq_maxlen))
	#else
	if ((ifp->if_snd.ifq_len + ip->ip_len / mtu + 1) >=
	ifp->if_snd.ifq_maxlen)
	#endif /* ALTQ */
	{
	error = ENOBUFS;
	V_ipstat.ips_odropped++;
	ifp->if_snd.ifq_drops += (ip->ip_len / ifp->if_mtu + 1);
	goto bad;
	}

	/*
	* Look for broadcast address and
	* verify user is allowed to send
	* such a packet.
	*/
	if (isbroadcast) {
	if ((ifp->if_flags & IFF_BROADCAST) == 0) {
	error = EADDRNOTAVAIL;
	goto bad;
	}
	if ((flags & IP_ALLOWBROADCAST) == 0) {
	error = EACCES;
	goto bad;
	}
	/* don't allow broadcast messages to be fragmented */
	if (ip->ip_len > mtu) {
	error = EMSGSIZE;
	goto bad;
	}
	m->m_flags \|= M_BCAST;
	} else {
	m->m_flags &= ~M_BCAST;
	}

	sendit:
	#ifdef IPSEC
	switch(ip_ipsec_output(&m, inp, &flags, &error, &ro, &iproute, &dst, &ia, &ifp)) {
	case 1:
	goto bad;
	case -1:
	goto done;
	case 0:
	default:
	break; /* Continue with packet processing. */
	}
	/* Update variables that are affected by ipsec4_output(). */
	ip = mtod(m, struct ip *);
	hlen = ip->ip_hl << 2;
	#endif /* IPSEC */

	/* Jump over all PFIL processing if hooks are not active. */
	if (!PFIL_HOOKED(&inet_pfil_hook))
	goto passout;

	/* Run through list of hooks for output packets. */
	odst.s_addr = ip->ip_dst.s_addr;
	error = pfil_run_hooks(&inet_pfil_hook, &m, ifp, PFIL_OUT, inp);
	if (error != 0 \|\| m == NULL)
	goto done;

	ip = mtod(m, struct ip *);

	/* See if destination IP address was changed by packet filter. */
	if (odst.s_addr != ip->ip_dst.s_addr) {
	m->m_flags \|= M_SKIP_FIREWALL;
	/* If destination is now ourself drop to ip_input(). */
	if (in_localip(ip->ip_dst)) {
	m->m_flags \|= M_FASTFWD_OURS;
	if (m->m_pkthdr.rcvif == NULL)
	m->m_pkthdr.rcvif = V_loif;
	if (m->m_pkthdr.csum_flags & CSUM_DELAY_DATA) {
	m->m_pkthdr.csum_flags \|=
	CSUM_DATA_VALID \| CSUM_PSEUDO_HDR;
	m->m_pkthdr.csum_data = 0xffff;
	}
	m->m_pkthdr.csum_flags \|=
	CSUM_IP_CHECKED \| CSUM_IP_VALID;

	error = netisr_queue(NETISR_IP, m);
	goto done;
	} else
	goto again; /* Redo the routing table lookup. */
	}

	#ifdef IPFIREWALL_FORWARD
	/* See if local, if yes, send it to netisr with IP_FASTFWD_OURS. */
	if (m->m_flags & M_FASTFWD_OURS) {
	if (m->m_pkthdr.rcvif == NULL)
	m->m_pkthdr.rcvif = V_loif;
	if (m->m_pkthdr.csum_flags & CSUM_DELAY_DATA) {
	m->m_pkthdr.csum_flags \|=
	CSUM_DATA_VALID \| CSUM_PSEUDO_HDR;
	m->m_pkthdr.csum_data = 0xffff;
	}
	m->m_pkthdr.csum_flags \|=
	CSUM_IP_CHECKED \| CSUM_IP_VALID;

	error = netisr_queue(NETISR_IP, m);
	goto done;
	}
	/* Or forward to some other address? */
	fwd_tag = m_tag_find(m, PACKET_TAG_IPFORWARD, NULL);
	if (fwd_tag) {
	dst = (struct sockaddr_in *)&ro->ro_dst;
	bcopy((fwd_tag+1), dst, sizeof(struct sockaddr_in));
	m->m_flags \|= M_SKIP_FIREWALL;
	m_tag_delete(m, fwd_tag);
	goto again;
	}
	#endif /* IPFIREWALL_FORWARD */

	passout:
	/* 127/8 must not appear on wire - RFC1122. */
	if ((ntohl(ip->ip_dst.s_addr) >> IN_CLASSA_NSHIFT) == IN_LOOPBACKNET \|\|
	(ntohl(ip->ip_src.s_addr) >> IN_CLASSA_NSHIFT) == IN_LOOPBACKNET) {
	if ((ifp->if_flags & IFF_LOOPBACK) == 0) {
	V_ipstat.ips_badaddr++;
	error = EADDRNOTAVAIL;
	goto bad;
	}
	}

	m->m_pkthdr.csum_flags \|= CSUM_IP;
	sw_csum = m->m_pkthdr.csum_flags & ~ifp->if_hwassist;
	if (sw_csum & CSUM_DELAY_DATA) {
	in_delayed_cksum(m);
	sw_csum &= ~CSUM_DELAY_DATA;
	}
	m->m_pkthdr.csum_flags &= ifp->if_hwassist;

	/*
	* If small enough for interface, or the interface will take
	* care of the fragmentation for us, we can just send directly.
	*/
	if (ip->ip_len <= mtu \|\|
	(m->m_pkthdr.csum_flags & ifp->if_hwassist & CSUM_TSO) != 0 \|\|
	((ip->ip_off & IP_DF) == 0 && (ifp->if_hwassist & CSUM_FRAGMENT))) {
	ip->ip_len = htons(ip->ip_len);
	ip->ip_off = htons(ip->ip_off);
	ip->ip_sum = 0;
	if (sw_csum & CSUM_DELAY_IP)
	ip->ip_sum = in_cksum(m, hlen);

	/*
	* Record statistics for this interface address.
	* With CSUM_TSO the byte/packet count will be slightly
	* incorrect because we count the IP+TCP headers only
	* once instead of for every generated packet.
	*/
	if (!(flags & IP_FORWARDING) && ia) {
	if (m->m_pkthdr.csum_flags & CSUM_TSO)
	ia->ia_ifa.if_opackets +=
	m->m_pkthdr.len / m->m_pkthdr.tso_segsz;
	else
	ia->ia_ifa.if_opackets++;
	ia->ia_ifa.if_obytes += m->m_pkthdr.len;
	}
	#ifdef MBUF_STRESS_TEST
	if (mbuf_frag_size && m->m_pkthdr.len > mbuf_frag_size)
	m = m_fragment(m, M_DONTWAIT, mbuf_frag_size);
	#endif
	/*
	* Reset layer specific mbuf flags
	* to avoid confusing lower layers.
	*/
	m->m_flags &= ~(M_PROTOFLAGS);
	-
	- IF_AFDATA_LOCK(ifp);
	error = (*ifp->if_output)(ifp, m,
	(struct sockaddr *)dst, ro->ro_rt);
	- IF_AFDATA_UNLOCK(ifp);
	goto done;
	}

	/* Balk when DF bit is set or the interface didn't support TSO. */
	if ((ip->ip_off & IP_DF) \|\| (m->m_pkthdr.csum_flags & CSUM_TSO)) {
	error = EMSGSIZE;
	V_ipstat.ips_cantfrag++;
	goto bad;
	}

	/*
	* Too large for interface; fragment if possible. If successful,
	* on return, m will point to a list of packets to be sent.
	*/
	error = ip_fragment(ip, &m, mtu, ifp->if_hwassist, sw_csum);
	if (error)
	goto bad;
	for (; m; m = m0) {
	m0 = m->m_nextpkt;
	m->m_nextpkt = 0;
	if (error == 0) {
	/* Record statistics for this interface address. */
	if (ia != NULL) {
	ia->ia_ifa.if_opackets++;
	ia->ia_ifa.if_obytes += m->m_pkthdr.len;
	}
	/*
	* Reset layer specific mbuf flags
	* to avoid confusing upper layers.
	*/
	m->m_flags &= ~(M_PROTOFLAGS);

	- IF_AFDATA_LOCK(ifp);
	error = (*ifp->if_output)(ifp, m,
	(struct sockaddr *)dst, ro->ro_rt);
	- IF_AFDATA_UNLOCK(ifp);
	} else
	m_freem(m);
	}

	if (error == 0)
	V_ipstat.ips_fragmented++;

	done:
	if (ro == &iproute && ro->ro_rt) {
	RTFREE(ro->ro_rt);
	}
	return (error);
	bad:
	m_freem(m);
	goto done;
	}

	/*
	* Create a chain of fragments which fit the given mtu. m_frag points to the
	* mbuf to be fragmented; on return it points to the chain with the fragments.
	* Return 0 if no error. If error, m_frag may contain a partially built
	* chain of fragments that should be freed by the caller.
	*
	* if_hwassist_flags is the hw offload capabilities (see if_data.ifi_hwassist)
	* sw_csum contains the delayed checksums flags (e.g., CSUM_DELAY_IP).
	*/
	int
	ip_fragment(struct ip ip, struct mbuf *m_frag, int mtu,
	u_long if_hwassist_flags, int sw_csum)
	{
	INIT_VNET_INET(curvnet);
	int error = 0;
	int hlen = ip->ip_hl << 2;
	int len = (mtu - hlen) & ~7; /* size of payload in each fragment */
	int off;
	struct mbuf m0 = m_frag; /* the original packet */
	int firstlen;
	struct mbuf **mnext;
	int nfrags;

	if (ip->ip_off & IP_DF) { /* Fragmentation not allowed */
	V_ipstat.ips_cantfrag++;
	return EMSGSIZE;
	}

	/*
	* Must be able to put at least 8 bytes per fragment.
	*/
	if (len < 8)
	return EMSGSIZE;

	/*
	* If the interface will not calculate checksums on
	* fragmented packets, then do it here.
	*/
	if (m0->m_pkthdr.csum_flags & CSUM_DELAY_DATA &&
	(if_hwassist_flags & CSUM_IP_FRAGS) == 0) {
	in_delayed_cksum(m0);
	m0->m_pkthdr.csum_flags &= ~CSUM_DELAY_DATA;
	}

	if (len > PAGE_SIZE) {
	/*
	* Fragment large datagrams such that each segment
	* contains a multiple of PAGE_SIZE amount of data,
	* plus headers. This enables a receiver to perform
	* page-flipping zero-copy optimizations.
	*
	* XXX When does this help given that sender and receiver
	* could have different page sizes, and also mtu could
	* be less than the receiver's page size ?
	*/
	int newlen;
	struct mbuf *m;

	for (m = m0, off = 0; m && (off+m->m_len) <= mtu; m = m->m_next)
	off += m->m_len;

	/*
	* firstlen (off - hlen) must be aligned on an
	* 8-byte boundary
	*/
	if (off < hlen)
	goto smart_frag_failure;
	off = ((off - hlen) & ~7) + hlen;
	newlen = (~PAGE_MASK) & mtu;
	if ((newlen + sizeof (struct ip)) > mtu) {
	/* we failed, go back the default */
	smart_frag_failure:
	newlen = len;
	off = hlen + len;
	}
	len = newlen;

	} else {
	off = hlen + len;
	}

	firstlen = off - hlen;
	mnext = &m0->m_nextpkt; /* pointer to next packet */

	/*
	* Loop through length of segment after first fragment,
	* make new header and copy data of each part and link onto chain.
	* Here, m0 is the original packet, m is the fragment being created.
	* The fragments are linked off the m_nextpkt of the original
	* packet, which after processing serves as the first fragment.
	*/
	for (nfrags = 1; off < ip->ip_len; off += len, nfrags++) {
	struct ip mhip; / ip header on the fragment */
	struct mbuf *m;
	int mhlen = sizeof (struct ip);

	MGETHDR(m, M_DONTWAIT, MT_DATA);
	if (m == NULL) {
	error = ENOBUFS;
	V_ipstat.ips_odropped++;
	goto done;
	}
	m->m_flags \|= (m0->m_flags & M_MCAST) \| M_FRAG;
	/*
	* In the first mbuf, leave room for the link header, then
	* copy the original IP header including options. The payload
	* goes into an additional mbuf chain returned by m_copy().
	*/
	m->m_data += max_linkhdr;
	mhip = mtod(m, struct ip *);
	mhip = ip;
	if (hlen > sizeof (struct ip)) {
	mhlen = ip_optcopy(ip, mhip) + sizeof (struct ip);
	mhip->ip_v = IPVERSION;
	mhip->ip_hl = mhlen >> 2;
	}
	m->m_len = mhlen;
	/* XXX do we need to add ip->ip_off below ? */
	mhip->ip_off = ((off - hlen) >> 3) + ip->ip_off;
	if (off + len >= ip->ip_len) { /* last fragment */
	len = ip->ip_len - off;
	m->m_flags \|= M_LASTFRAG;
	} else
	mhip->ip_off \|= IP_MF;
	mhip->ip_len = htons((u_short)(len + mhlen));
	m->m_next = m_copy(m0, off, len);
	if (m->m_next == NULL) { /* copy failed */
	m_free(m);
	error = ENOBUFS; /* ??? */
	V_ipstat.ips_odropped++;
	goto done;
	}
	m->m_pkthdr.len = mhlen + len;
	m->m_pkthdr.rcvif = NULL;
	#ifdef MAC
	mac_netinet_fragment(m0, m);
	#endif
	m->m_pkthdr.csum_flags = m0->m_pkthdr.csum_flags;
	mhip->ip_off = htons(mhip->ip_off);
	mhip->ip_sum = 0;
	if (sw_csum & CSUM_DELAY_IP)
	mhip->ip_sum = in_cksum(m, mhlen);
	*mnext = m;
	mnext = &m->m_nextpkt;
	}
	V_ipstat.ips_ofragments += nfrags;

	/* set first marker for fragment chain */
	m0->m_flags \|= M_FIRSTFRAG \| M_FRAG;
	m0->m_pkthdr.csum_data = nfrags;

	/*
	* Update first fragment by trimming what's been copied out
	* and updating header.
	*/
	m_adj(m0, hlen + firstlen - ip->ip_len);
	m0->m_pkthdr.len = hlen + firstlen;
	ip->ip_len = htons((u_short)m0->m_pkthdr.len);
	ip->ip_off \|= IP_MF;
	ip->ip_off = htons(ip->ip_off);
	ip->ip_sum = 0;
	if (sw_csum & CSUM_DELAY_IP)
	ip->ip_sum = in_cksum(m0, hlen);

	done:
	*m_frag = m0;
	return error;
	}

	void
	in_delayed_cksum(struct mbuf *m)
	{
	INIT_VNET_INET(curvnet);
	struct ip *ip;
	u_short csum, offset;

	ip = mtod(m, struct ip *);
	offset = ip->ip_hl << 2 ;
	csum = in_cksum_skip(m, ip->ip_len, offset);
	if (m->m_pkthdr.csum_flags & CSUM_UDP && csum == 0)
	csum = 0xffff;
	offset += m->m_pkthdr.csum_data; /* checksum offset */

	if (offset + sizeof(u_short) > m->m_len) {
	printf("delayed m_pullup, m->len: %d off: %d p: %d\n",
	m->m_len, offset, ip->ip_p);
	/*
	* XXX
	* this shouldn't happen, but if it does, the
	* correct behavior may be to insert the checksum
	* in the appropriate next mbuf in the chain.
	*/
	return;
	}
	(u_short )(m->m_data + offset) = csum;
	}

	/*
	* IP socket option processing.
	*/
	int
	ip_ctloutput(struct socket so, struct sockopt sopt)
	{
	struct inpcb *inp = sotoinpcb(so);
	int error, optval;

	error = optval = 0;
	if (sopt->sopt_level != IPPROTO_IP) {
	if ((sopt->sopt_level == SOL_SOCKET) &&
	(sopt->sopt_name == SO_SETFIB)) {
	inp->inp_inc.inc_fibnum = so->so_fibnum;
	return (0);
	}
	return (EINVAL);
	}

	switch (sopt->sopt_dir) {
	case SOPT_SET:
	switch (sopt->sopt_name) {
	case IP_OPTIONS:
	#ifdef notyet
	case IP_RETOPTS:
	#endif
	{
	struct mbuf *m;
	if (sopt->sopt_valsize > MLEN) {
	error = EMSGSIZE;
	break;
	}
	MGET(m, sopt->sopt_td ? M_WAIT : M_DONTWAIT, MT_DATA);
	if (m == NULL) {
	error = ENOBUFS;
	break;
	}
	m->m_len = sopt->sopt_valsize;
	error = sooptcopyin(sopt, mtod(m, char *), m->m_len,
	m->m_len);
	if (error) {
	m_free(m);
	break;
	}
	INP_WLOCK(inp);
	error = ip_pcbopts(inp, sopt->sopt_name, m);
	INP_WUNLOCK(inp);
	return (error);
	}

	case IP_TOS:
	case IP_TTL:
	case IP_MINTTL:
	case IP_RECVOPTS:
	case IP_RECVRETOPTS:
	case IP_RECVDSTADDR:
	case IP_RECVTTL:
	case IP_RECVIF:
	case IP_FAITH:
	case IP_ONESBCAST:
	case IP_DONTFRAG:
	error = sooptcopyin(sopt, &optval, sizeof optval,
	sizeof optval);
	if (error)
	break;

	switch (sopt->sopt_name) {
	case IP_TOS:
	inp->inp_ip_tos = optval;
	break;

	case IP_TTL:
	inp->inp_ip_ttl = optval;
	break;

	case IP_MINTTL:
	if (optval > 0 && optval <= MAXTTL)
	inp->inp_ip_minttl = optval;
	else
	error = EINVAL;
	break;

	#define OPTSET(bit) do { \
	INP_WLOCK(inp); \
	if (optval) \
	inp->inp_flags \|= bit; \
	else \
	inp->inp_flags &= ~bit; \
	INP_WUNLOCK(inp); \
	} while (0)

	case IP_RECVOPTS:
	OPTSET(INP_RECVOPTS);
	break;

	case IP_RECVRETOPTS:
	OPTSET(INP_RECVRETOPTS);
	break;

	case IP_RECVDSTADDR:
	OPTSET(INP_RECVDSTADDR);
	break;

	case IP_RECVTTL:
	OPTSET(INP_RECVTTL);
	break;

	case IP_RECVIF:
	OPTSET(INP_RECVIF);
	break;

	case IP_FAITH:
	OPTSET(INP_FAITH);
	break;

	case IP_ONESBCAST:
	OPTSET(INP_ONESBCAST);
	break;
	case IP_DONTFRAG:
	OPTSET(INP_DONTFRAG);
	break;
	}
	break;
	#undef OPTSET

	/*
	* Multicast socket options are processed by the in_mcast
	* module.
	*/
	case IP_MULTICAST_IF:
	case IP_MULTICAST_VIF:
	case IP_MULTICAST_TTL:
	case IP_MULTICAST_LOOP:
	case IP_ADD_MEMBERSHIP:
	case IP_DROP_MEMBERSHIP:
	case IP_ADD_SOURCE_MEMBERSHIP:
	case IP_DROP_SOURCE_MEMBERSHIP:
	case IP_BLOCK_SOURCE:
	case IP_UNBLOCK_SOURCE:
	case IP_MSFILTER:
	case MCAST_JOIN_GROUP:
	case MCAST_LEAVE_GROUP:
	case MCAST_JOIN_SOURCE_GROUP:
	case MCAST_LEAVE_SOURCE_GROUP:
	case MCAST_BLOCK_SOURCE:
	case MCAST_UNBLOCK_SOURCE:
	error = inp_setmoptions(inp, sopt);
	break;

	case IP_PORTRANGE:
	error = sooptcopyin(sopt, &optval, sizeof optval,
	sizeof optval);
	if (error)
	break;

	INP_WLOCK(inp);
	switch (optval) {
	case IP_PORTRANGE_DEFAULT:
	inp->inp_flags &= ~(INP_LOWPORT);
	inp->inp_flags &= ~(INP_HIGHPORT);
	break;

	case IP_PORTRANGE_HIGH:
	inp->inp_flags &= ~(INP_LOWPORT);
	inp->inp_flags \|= INP_HIGHPORT;
	break;

	case IP_PORTRANGE_LOW:
	inp->inp_flags &= ~(INP_HIGHPORT);
	inp->inp_flags \|= INP_LOWPORT;
	break;

	default:
	error = EINVAL;
	break;
	}
	INP_WUNLOCK(inp);
	break;

	#ifdef IPSEC
	case IP_IPSEC_POLICY:
	{
	caddr_t req;
	struct mbuf *m;

	if ((error = soopt_getm(sopt, &m)) != 0) /* XXX */
	break;
	if ((error = soopt_mcopyin(sopt, m)) != 0) /* XXX */
	break;
	req = mtod(m, caddr_t);
	error = ipsec4_set_policy(inp, sopt->sopt_name, req,
	m->m_len, (sopt->sopt_td != NULL) ?
	sopt->sopt_td->td_ucred : NULL);
	m_freem(m);
	break;
	}
	#endif /* IPSEC */

	default:
	error = ENOPROTOOPT;
	break;
	}
	break;

	case SOPT_GET:
	switch (sopt->sopt_name) {
	case IP_OPTIONS:
	case IP_RETOPTS:
	if (inp->inp_options)
	error = sooptcopyout(sopt,
	mtod(inp->inp_options,
	char *),
	inp->inp_options->m_len);
	else
	sopt->sopt_valsize = 0;
	break;

	case IP_TOS:
	case IP_TTL:
	case IP_MINTTL:
	case IP_RECVOPTS:
	case IP_RECVRETOPTS:
	case IP_RECVDSTADDR:
	case IP_RECVTTL:
	case IP_RECVIF:
	case IP_PORTRANGE:
	case IP_FAITH:
	case IP_ONESBCAST:
	case IP_DONTFRAG:
	switch (sopt->sopt_name) {

	case IP_TOS:
	optval = inp->inp_ip_tos;
	break;

	case IP_TTL:
	optval = inp->inp_ip_ttl;
	break;

	case IP_MINTTL:
	optval = inp->inp_ip_minttl;
	break;

	#define OPTBIT(bit) (inp->inp_flags & bit ? 1 : 0)

	case IP_RECVOPTS:
	optval = OPTBIT(INP_RECVOPTS);
	break;

	case IP_RECVRETOPTS:
	optval = OPTBIT(INP_RECVRETOPTS);
	break;

	case IP_RECVDSTADDR:
	optval = OPTBIT(INP_RECVDSTADDR);
	break;

	case IP_RECVTTL:
	optval = OPTBIT(INP_RECVTTL);
	break;

	case IP_RECVIF:
	optval = OPTBIT(INP_RECVIF);
	break;

	case IP_PORTRANGE:
	if (inp->inp_flags & INP_HIGHPORT)
	optval = IP_PORTRANGE_HIGH;
	else if (inp->inp_flags & INP_LOWPORT)
	optval = IP_PORTRANGE_LOW;
	else
	optval = 0;
	break;

	case IP_FAITH:
	optval = OPTBIT(INP_FAITH);
	break;

	case IP_ONESBCAST:
	optval = OPTBIT(INP_ONESBCAST);
	break;
	case IP_DONTFRAG:
	optval = OPTBIT(INP_DONTFRAG);
	break;
	}
	error = sooptcopyout(sopt, &optval, sizeof optval);
	break;

	/*
	* Multicast socket options are processed by the in_mcast
	* module.
	*/
	case IP_MULTICAST_IF:
	case IP_MULTICAST_VIF:
	case IP_MULTICAST_TTL:
	case IP_MULTICAST_LOOP:
	case IP_MSFILTER:
	error = inp_getmoptions(inp, sopt);
	break;

	#ifdef IPSEC
	case IP_IPSEC_POLICY:
	{
	struct mbuf *m = NULL;
	caddr_t req = NULL;
	size_t len = 0;

	if (m != 0) {
	req = mtod(m, caddr_t);
	len = m->m_len;
	}
	error = ipsec4_get_policy(sotoinpcb(so), req, len, &m);
	if (error == 0)
	error = soopt_mcopyout(sopt, m); /* XXX */
	if (error == 0)
	m_freem(m);
	break;
	}
	#endif /* IPSEC */

	default:
	error = ENOPROTOOPT;
	break;
	}
	break;
	}
	return (error);
	}

	/*
	* Routine called from ip_output() to loop back a copy of an IP multicast
	* packet to the input queue of a specified interface. Note that this
	* calls the output routine of the loopback "driver", but with an interface
	* pointer that might NOT be a loopback interface -- evil, but easier than
	* replicating that code here.
	*/
	static void
	ip_mloopback(struct ifnet ifp, struct mbuf m, struct sockaddr_in *dst,
	int hlen)
	{
	register struct ip *ip;
	struct mbuf *copym;

	/*
	* Make a deep copy of the packet because we're going to
	* modify the pack in order to generate checksums.
	*/
	copym = m_dup(m, M_DONTWAIT);
	if (copym != NULL && (copym->m_flags & M_EXT \|\| copym->m_len < hlen))
	copym = m_pullup(copym, hlen);
	if (copym != NULL) {
	/* If needed, compute the checksum and mark it as valid. */
	if (copym->m_pkthdr.csum_flags & CSUM_DELAY_DATA) {
	in_delayed_cksum(copym);
	copym->m_pkthdr.csum_flags &= ~CSUM_DELAY_DATA;
	copym->m_pkthdr.csum_flags \|=
	CSUM_DATA_VALID \| CSUM_PSEUDO_HDR;
	copym->m_pkthdr.csum_data = 0xffff;
	}
	/*
	* We don't bother to fragment if the IP length is greater
	* than the interface's MTU. Can this possibly matter?
	*/
	ip = mtod(copym, struct ip *);
	ip->ip_len = htons(ip->ip_len);
	ip->ip_off = htons(ip->ip_off);
	ip->ip_sum = 0;
	ip->ip_sum = in_cksum(copym, hlen);
	#if 1 /* XXX */
	if (dst->sin_family != AF_INET) {
	printf("ip_mloopback: bad address family %d\n",
	dst->sin_family);
	dst->sin_family = AF_INET;
	}
	#endif
	if_simloop(ifp, copym, dst->sin_family, 0);
	}
	}
	Index: projects/arpv2_merge_1/sys/netinet6/icmp6.c
	===================================================================
	--- projects/arpv2_merge_1/sys/netinet6/icmp6.c (revision 185838)
	+++ projects/arpv2_merge_1/sys/netinet6/icmp6.c (revision 185839)
	@@ -1,2831 +1,2828 @@
	/*-
	* Copyright (C) 1995, 1996, 1997, and 1998 WIDE Project.
	* All rights reserved.
	*
	* Redistribution and use in source and binary forms, with or without
	* modification, are permitted provided that the following conditions
	* are met:
	* 1. Redistributions of source code must retain the above copyright
	* notice, this list of conditions and the following disclaimer.
	* 2. Redistributions in binary form must reproduce the above copyright
	* notice, this list of conditions and the following disclaimer in the
	* documentation and/or other materials provided with the distribution.
	* 3. Neither the name of the project nor the names of its contributors
	* may be used to endorse or promote products derived from this software
	* without specific prior written permission.
	*
	* THIS SOFTWARE IS PROVIDED BY THE PROJECT AND CONTRIBUTORS ``AS IS'' AND
	* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
	* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
	* ARE DISCLAIMED. IN NO EVENT SHALL THE PROJECT OR CONTRIBUTORS BE LIABLE
	* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
	* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
	* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
	* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
	* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
	* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
	* SUCH DAMAGE.
	*
	* $KAME: icmp6.c,v 1.211 2001/04/04 05:56:20 itojun Exp $
	*/

	/*-
	* Copyright (c) 1982, 1986, 1988, 1993
	* The Regents of the University of California. All rights reserved.
	*
	* Redistribution and use in source and binary forms, with or without
	* modification, are permitted provided that the following conditions
	* are met:
	* 1. Redistributions of source code must retain the above copyright
	* notice, this list of conditions and the following disclaimer.
	* 2. Redistributions in binary form must reproduce the above copyright
	* notice, this list of conditions and the following disclaimer in the
	* documentation and/or other materials provided with the distribution.
	* 4. Neither the name of the University nor the names of its contributors
	* may be used to endorse or promote products derived from this software
	* without specific prior written permission.
	*
	* THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
	* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
	* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
	* ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
	* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
	* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
	* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
	* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
	* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
	* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
	* SUCH DAMAGE.
	*
	* @(#)ip_icmp.c 8.2 (Berkeley) 1/4/94
	*/

	#include <sys/cdefs.h>
	__FBSDID("$FreeBSD$");

	#include "opt_inet.h"
	#include "opt_inet6.h"
	#include "opt_ipsec.h"

	#include <sys/param.h>
	#include <sys/domain.h>
	#include <sys/kernel.h>
	#include <sys/lock.h>
	#include <sys/malloc.h>
	#include <sys/mbuf.h>
	#include <sys/protosw.h>
	#include <sys/signalvar.h>
	#include <sys/socket.h>
	#include <sys/socketvar.h>
	#include <sys/sx.h>
	#include <sys/syslog.h>
	#include <sys/systm.h>
	#include <sys/time.h>
	#include <sys/vimage.h>

	#include <net/if.h>
	#include <net/if_dl.h>
	+#include <net/if_llatbl.h>
	#include <net/if_types.h>
	#include <net/route.h>
	#include <net/vnet.h>
	-#include <net/if_llatbl.h>

	#include <netinet/in.h>
	#include <netinet/in_pcb.h>
	#include <netinet/in_var.h>
	#include <netinet/ip6.h>
	#include <netinet/icmp6.h>
	#include <netinet/tcp_var.h>
	#include <netinet/vinet.h>

	#include <netinet6/in6_ifattach.h>
	#include <netinet6/in6_pcb.h>
	#include <netinet6/ip6protosw.h>
	#include <netinet6/ip6_var.h>
	#include <netinet6/scope6_var.h>
	#include <netinet6/mld6_var.h>
	#include <netinet6/nd6.h>
	#include <netinet6/vinet6.h>

	#ifdef IPSEC
	#include <netipsec/ipsec.h>
	#include <netipsec/key.h>
	#endif

	extern struct domain inet6domain;

	#ifdef VIMAGE_GLOBALS
	extern struct inpcbinfo ripcbinfo;
	extern struct inpcbhead ripcb;
	extern int icmp6errppslim;
	extern int icmp6_nodeinfo;

	struct icmp6stat icmp6stat;
	static int icmp6errpps_count;
	static struct timeval icmp6errppslim_last;
	#endif

	static void icmp6_errcount(struct icmp6errstat *, int, int);
	static int icmp6_rip6_input(struct mbuf **, int);
	static int icmp6_ratelimit(const struct in6_addr *, const int, const int);
	static const char icmp6_redirect_diag __P((struct in6_addr ,
	struct in6_addr , struct in6_addr ));
	static struct mbuf ni6_input(struct mbuf , int);
	static struct mbuf ni6_nametodns(const char , int, int);
	static int ni6_dnsmatch(const char , int, const char , int);
	static int ni6_addrs __P((struct icmp6_nodeinfo , struct mbuf ,
	struct ifnet *, struct in6_addr ));
	static int ni6_store_addrs __P((struct icmp6_nodeinfo , struct icmp6_nodeinfo ,
	struct ifnet *, int));
	static int icmp6_notify_error(struct mbuf **, int, int, int);


	void
	icmp6_init(void)
	{
	INIT_VNET_INET6(curvnet);

	V_icmp6errpps_count = 0;

	mld6_init();
	}

	static void
	icmp6_errcount(struct icmp6errstat *stat, int type, int code)
	{
	switch (type) {
	case ICMP6_DST_UNREACH:
	switch (code) {
	case ICMP6_DST_UNREACH_NOROUTE:
	stat->icp6errs_dst_unreach_noroute++;
	return;
	case ICMP6_DST_UNREACH_ADMIN:
	stat->icp6errs_dst_unreach_admin++;
	return;
	case ICMP6_DST_UNREACH_BEYONDSCOPE:
	stat->icp6errs_dst_unreach_beyondscope++;
	return;
	case ICMP6_DST_UNREACH_ADDR:
	stat->icp6errs_dst_unreach_addr++;
	return;
	case ICMP6_DST_UNREACH_NOPORT:
	stat->icp6errs_dst_unreach_noport++;
	return;
	}
	break;
	case ICMP6_PACKET_TOO_BIG:
	stat->icp6errs_packet_too_big++;
	return;
	case ICMP6_TIME_EXCEEDED:
	switch (code) {
	case ICMP6_TIME_EXCEED_TRANSIT:
	stat->icp6errs_time_exceed_transit++;
	return;
	case ICMP6_TIME_EXCEED_REASSEMBLY:
	stat->icp6errs_time_exceed_reassembly++;
	return;
	}
	break;
	case ICMP6_PARAM_PROB:
	switch (code) {
	case ICMP6_PARAMPROB_HEADER:
	stat->icp6errs_paramprob_header++;
	return;
	case ICMP6_PARAMPROB_NEXTHEADER:
	stat->icp6errs_paramprob_nextheader++;
	return;
	case ICMP6_PARAMPROB_OPTION:
	stat->icp6errs_paramprob_option++;
	return;
	}
	break;
	case ND_REDIRECT:
	stat->icp6errs_redirect++;
	return;
	}
	stat->icp6errs_unknown++;
	}

	/*
	* A wrapper function for icmp6_error() necessary when the erroneous packet
	* may not contain enough scope zone information.
	*/
	void
	icmp6_error2(struct mbuf *m, int type, int code, int param,
	struct ifnet *ifp)
	{
	INIT_VNET_INET6(curvnet);
	struct ip6_hdr *ip6;

	if (ifp == NULL)
	return;

	#ifndef PULLDOWN_TEST
	IP6_EXTHDR_CHECK(m, 0, sizeof(struct ip6_hdr), );
	#else
	if (m->m_len < sizeof(struct ip6_hdr)) {
	m = m_pullup(m, sizeof(struct ip6_hdr));
	if (m == NULL)
	return;
	}
	#endif

	ip6 = mtod(m, struct ip6_hdr *);

	if (in6_setscope(&ip6->ip6_src, ifp, NULL) != 0)
	return;
	if (in6_setscope(&ip6->ip6_dst, ifp, NULL) != 0)
	return;

	icmp6_error(m, type, code, param);
	}

	/*
	* Generate an error packet of type error in response to bad IP6 packet.
	*/
	void
	icmp6_error(struct mbuf *m, int type, int code, int param)
	{
	INIT_VNET_INET6(curvnet);
	struct ip6_hdr oip6, nip6;
	struct icmp6_hdr *icmp6;
	u_int preplen;
	int off;
	int nxt;

	V_icmp6stat.icp6s_error++;

	/* count per-type-code statistics */
	icmp6_errcount(&V_icmp6stat.icp6s_outerrhist, type, code);

	#ifdef M_DECRYPTED /not openbsd/
	if (m->m_flags & M_DECRYPTED) {
	V_icmp6stat.icp6s_canterror++;
	goto freeit;
	}
	#endif

	#ifndef PULLDOWN_TEST
	IP6_EXTHDR_CHECK(m, 0, sizeof(struct ip6_hdr), );
	#else
	if (m->m_len < sizeof(struct ip6_hdr)) {
	m = m_pullup(m, sizeof(struct ip6_hdr));
	if (m == NULL)
	return;
	}
	#endif
	oip6 = mtod(m, struct ip6_hdr *);

	/*
	* If the destination address of the erroneous packet is a multicast
	* address, or the packet was sent using link-layer multicast,
	* we should basically suppress sending an error (RFC 2463, Section
	* 2.4).
	* We have two exceptions (the item e.2 in that section):
	* - the Pakcet Too Big message can be sent for path MTU discovery.
	* - the Parameter Problem Message that can be allowed an icmp6 error
	* in the option type field. This check has been done in
	* ip6_unknown_opt(), so we can just check the type and code.
	*/
	if ((m->m_flags & (M_BCAST\|M_MCAST) \|\|
	IN6_IS_ADDR_MULTICAST(&oip6->ip6_dst)) &&
	(type != ICMP6_PACKET_TOO_BIG &&
	(type != ICMP6_PARAM_PROB \|\|
	code != ICMP6_PARAMPROB_OPTION)))
	goto freeit;

	/*
	* RFC 2463, 2.4 (e.5): source address check.
	* XXX: the case of anycast source?
	*/
	if (IN6_IS_ADDR_UNSPECIFIED(&oip6->ip6_src) \|\|
	IN6_IS_ADDR_MULTICAST(&oip6->ip6_src))
	goto freeit;

	/*
	* If we are about to send ICMPv6 against ICMPv6 error/redirect,
	* don't do it.
	*/
	nxt = -1;
	off = ip6_lasthdr(m, 0, IPPROTO_IPV6, &nxt);
	if (off >= 0 && nxt == IPPROTO_ICMPV6) {
	struct icmp6_hdr *icp;

	#ifndef PULLDOWN_TEST
	IP6_EXTHDR_CHECK(m, 0, off + sizeof(struct icmp6_hdr), );
	icp = (struct icmp6_hdr *)(mtod(m, caddr_t) + off);
	#else
	IP6_EXTHDR_GET(icp, struct icmp6_hdr *, m, off,
	sizeof(*icp));
	if (icp == NULL) {
	V_icmp6stat.icp6s_tooshort++;
	return;
	}
	#endif
	if (icp->icmp6_type < ICMP6_ECHO_REQUEST \|\|
	icp->icmp6_type == ND_REDIRECT) {
	/*
	* ICMPv6 error
	* Special case: for redirect (which is
	* informational) we must not send icmp6 error.
	*/
	V_icmp6stat.icp6s_canterror++;
	goto freeit;
	} else {
	/* ICMPv6 informational - send the error */
	}
	} else {
	/* non-ICMPv6 - send the error */
	}

	oip6 = mtod(m, struct ip6_hdr ); / adjust pointer */

	/* Finally, do rate limitation check. */
	if (icmp6_ratelimit(&oip6->ip6_src, type, code)) {
	V_icmp6stat.icp6s_toofreq++;
	goto freeit;
	}

	/*
	* OK, ICMP6 can be generated.
	*/

	if (m->m_pkthdr.len >= ICMPV6_PLD_MAXLEN)
	m_adj(m, ICMPV6_PLD_MAXLEN - m->m_pkthdr.len);

	preplen = sizeof(struct ip6_hdr) + sizeof(struct icmp6_hdr);
	M_PREPEND(m, preplen, M_DONTWAIT);
	if (m && m->m_len < preplen)
	m = m_pullup(m, preplen);
	if (m == NULL) {
	nd6log((LOG_DEBUG, "ENOBUFS in icmp6_error %d\n", __LINE__));
	return;
	}

	nip6 = mtod(m, struct ip6_hdr *);
	nip6->ip6_src = oip6->ip6_src;
	nip6->ip6_dst = oip6->ip6_dst;

	in6_clearscope(&oip6->ip6_src);
	in6_clearscope(&oip6->ip6_dst);

	icmp6 = (struct icmp6_hdr *)(nip6 + 1);
	icmp6->icmp6_type = type;
	icmp6->icmp6_code = code;
	icmp6->icmp6_pptr = htonl((u_int32_t)param);

	/*
	* icmp6_reflect() is designed to be in the input path.
	* icmp6_error() can be called from both input and output path,
	* and if we are in output path rcvif could contain bogus value.
	* clear m->m_pkthdr.rcvif for safety, we should have enough scope
	* information in ip header (nip6).
	*/
	m->m_pkthdr.rcvif = NULL;

	V_icmp6stat.icp6s_outhist[type]++;
	icmp6_reflect(m, sizeof(struct ip6_hdr)); /* header order: IPv6 - ICMPv6 */

	return;

	freeit:
	/*
	* If we can't tell whether or not we can generate ICMP6, free it.
	*/
	m_freem(m);
	}

	/*
	* Process a received ICMP6 message.
	*/
	int
	icmp6_input(struct mbuf *mp, int offp, int proto)
	{
	INIT_VNET_INET6(curvnet);
	INIT_VPROCG(TD_TO_VPROCG(curthread)); /* XXX V_hostname needs this */
	struct mbuf m = mp, *n;
	struct ip6_hdr ip6, nip6;
	struct icmp6_hdr icmp6, nicmp6;
	int off = *offp;
	int icmp6len = m->m_pkthdr.len - *offp;
	int code, sum, noff;
	char ip6bufs[INET6_ADDRSTRLEN], ip6bufd[INET6_ADDRSTRLEN];

	#ifndef PULLDOWN_TEST
	IP6_EXTHDR_CHECK(m, off, sizeof(struct icmp6_hdr), IPPROTO_DONE);
	/* m might change if M_LOOP. So, call mtod after this */
	#endif

	/*
	* Locate icmp6 structure in mbuf, and check
	* that not corrupted and of at least minimum length
	*/

	ip6 = mtod(m, struct ip6_hdr *);
	if (icmp6len < sizeof(struct icmp6_hdr)) {
	V_icmp6stat.icp6s_tooshort++;
	goto freeit;
	}

	/*
	* calculate the checksum
	*/
	#ifndef PULLDOWN_TEST
	icmp6 = (struct icmp6_hdr *)((caddr_t)ip6 + off);
	#else
	IP6_EXTHDR_GET(icmp6, struct icmp6_hdr , m, off, sizeof(icmp6));
	if (icmp6 == NULL) {
	V_icmp6stat.icp6s_tooshort++;
	return IPPROTO_DONE;
	}
	#endif
	code = icmp6->icmp6_code;

	if ((sum = in6_cksum(m, IPPROTO_ICMPV6, off, icmp6len)) != 0) {
	nd6log((LOG_ERR,
	"ICMP6 checksum error(%d\|%x) %s\n",
	icmp6->icmp6_type, sum,
	ip6_sprintf(ip6bufs, &ip6->ip6_src)));
	V_icmp6stat.icp6s_checksum++;
	goto freeit;
	}

	if (faithprefix_p != NULL && (*faithprefix_p)(&ip6->ip6_dst)) {
	/*
	* Deliver very specific ICMP6 type only.
	* This is important to deliver TOOBIG. Otherwise PMTUD
	* will not work.
	*/
	switch (icmp6->icmp6_type) {
	case ICMP6_DST_UNREACH:
	case ICMP6_PACKET_TOO_BIG:
	case ICMP6_TIME_EXCEEDED:
	break;
	default:
	goto freeit;
	}
	}

	V_icmp6stat.icp6s_inhist[icmp6->icmp6_type]++;
	icmp6_ifstat_inc(m->m_pkthdr.rcvif, ifs6_in_msg);
	if (icmp6->icmp6_type < ICMP6_INFOMSG_MASK)
	icmp6_ifstat_inc(m->m_pkthdr.rcvif, ifs6_in_error);

	switch (icmp6->icmp6_type) {
	case ICMP6_DST_UNREACH:
	icmp6_ifstat_inc(m->m_pkthdr.rcvif, ifs6_in_dstunreach);
	switch (code) {
	case ICMP6_DST_UNREACH_NOROUTE:
	code = PRC_UNREACH_NET;
	break;
	case ICMP6_DST_UNREACH_ADMIN:
	icmp6_ifstat_inc(m->m_pkthdr.rcvif, ifs6_in_adminprohib);
	code = PRC_UNREACH_PROTOCOL; /* is this a good code? */
	break;
	case ICMP6_DST_UNREACH_ADDR:
	code = PRC_HOSTDEAD;
	break;
	case ICMP6_DST_UNREACH_BEYONDSCOPE:
	/* I mean "source address was incorrect." */
	code = PRC_PARAMPROB;
	break;
	case ICMP6_DST_UNREACH_NOPORT:
	code = PRC_UNREACH_PORT;
	break;
	default:
	goto badcode;
	}
	goto deliver;
	break;

	case ICMP6_PACKET_TOO_BIG:
	icmp6_ifstat_inc(m->m_pkthdr.rcvif, ifs6_in_pkttoobig);

	/* validation is made in icmp6_mtudisc_update */

	code = PRC_MSGSIZE;

	/*
	* Updating the path MTU will be done after examining
	* intermediate extension headers.
	*/
	goto deliver;
	break;

	case ICMP6_TIME_EXCEEDED:
	icmp6_ifstat_inc(m->m_pkthdr.rcvif, ifs6_in_timeexceed);
	switch (code) {
	case ICMP6_TIME_EXCEED_TRANSIT:
	code = PRC_TIMXCEED_INTRANS;
	break;
	case ICMP6_TIME_EXCEED_REASSEMBLY:
	code = PRC_TIMXCEED_REASS;
	break;
	default:
	goto badcode;
	}
	goto deliver;
	break;

	case ICMP6_PARAM_PROB:
	icmp6_ifstat_inc(m->m_pkthdr.rcvif, ifs6_in_paramprob);
	switch (code) {
	case ICMP6_PARAMPROB_NEXTHEADER:
	code = PRC_UNREACH_PROTOCOL;
	break;
	case ICMP6_PARAMPROB_HEADER:
	case ICMP6_PARAMPROB_OPTION:
	code = PRC_PARAMPROB;
	break;
	default:
	goto badcode;
	}
	goto deliver;
	break;

	case ICMP6_ECHO_REQUEST:
	icmp6_ifstat_inc(m->m_pkthdr.rcvif, ifs6_in_echo);
	if (code != 0)
	goto badcode;
	if ((n = m_copy(m, 0, M_COPYALL)) == NULL) {
	/* Give up remote */
	break;
	}
	if ((n->m_flags & M_EXT) != 0
	\|\| n->m_len < off + sizeof(struct icmp6_hdr)) {
	struct mbuf *n0 = n;
	const int maxlen = sizeof(nip6) + sizeof(nicmp6);
	int n0len;

	MGETHDR(n, M_DONTWAIT, n0->m_type);
	n0len = n0->m_pkthdr.len; /* save for use below */
	if (n)
	M_MOVE_PKTHDR(n, n0);
	if (n && maxlen >= MHLEN) {
	MCLGET(n, M_DONTWAIT);
	if ((n->m_flags & M_EXT) == 0) {
	m_free(n);
	n = NULL;
	}
	}
	if (n == NULL) {
	/* Give up remote */
	m_freem(n0);
	break;
	}
	/*
	* Copy IPv6 and ICMPv6 only.
	*/
	nip6 = mtod(n, struct ip6_hdr *);
	bcopy(ip6, nip6, sizeof(struct ip6_hdr));
	nicmp6 = (struct icmp6_hdr *)(nip6 + 1);
	bcopy(icmp6, nicmp6, sizeof(struct icmp6_hdr));
	noff = sizeof(struct ip6_hdr);
	/* new mbuf contains only ipv6+icmpv6 headers */
	n->m_len = noff + sizeof(struct icmp6_hdr);
	/*
	* Adjust mbuf. ip6_plen will be adjusted in
	* ip6_output().
	*/
	m_adj(n0, off + sizeof(struct icmp6_hdr));
	/* recalculate complete packet size */
	n->m_pkthdr.len = n0len + (noff - off);
	n->m_next = n0;
	} else {
	nip6 = mtod(n, struct ip6_hdr *);
	IP6_EXTHDR_GET(nicmp6, struct icmp6_hdr *, n, off,
	sizeof(*nicmp6));
	noff = off;
	}
	nicmp6->icmp6_type = ICMP6_ECHO_REPLY;
	nicmp6->icmp6_code = 0;
	if (n) {
	V_icmp6stat.icp6s_reflect++;
	V_icmp6stat.icp6s_outhist[ICMP6_ECHO_REPLY]++;
	icmp6_reflect(n, noff);
	}
	break;

	case ICMP6_ECHO_REPLY:
	icmp6_ifstat_inc(m->m_pkthdr.rcvif, ifs6_in_echoreply);
	if (code != 0)
	goto badcode;
	break;

	case MLD_LISTENER_QUERY:
	case MLD_LISTENER_REPORT:
	if (icmp6len < sizeof(struct mld_hdr))
	goto badlen;
	if (icmp6->icmp6_type == MLD_LISTENER_QUERY) /* XXX: ugly... */
	icmp6_ifstat_inc(m->m_pkthdr.rcvif, ifs6_in_mldquery);
	else
	icmp6_ifstat_inc(m->m_pkthdr.rcvif, ifs6_in_mldreport);
	if ((n = m_copym(m, 0, M_COPYALL, M_DONTWAIT)) == NULL) {
	/* give up local */
	mld6_input(m, off);
	m = NULL;
	goto freeit;
	}
	mld6_input(n, off);
	/* m stays. */
	break;

	case MLD_LISTENER_DONE:
	icmp6_ifstat_inc(m->m_pkthdr.rcvif, ifs6_in_mlddone);
	if (icmp6len < sizeof(struct mld_hdr)) /* necessary? */
	goto badlen;
	break; /* nothing to be done in kernel */

	case MLD_MTRACE_RESP:
	case MLD_MTRACE:
	/* XXX: these two are experimental. not officially defined. */
	/* XXX: per-interface statistics? */
	break; /* just pass it to applications */

	case ICMP6_WRUREQUEST: /* ICMP6_FQDN_QUERY */
	{
	enum { WRU, FQDN } mode;

	if (!V_icmp6_nodeinfo)
	break;

	if (icmp6len == sizeof(struct icmp6_hdr) + 4)
	mode = WRU;
	else if (icmp6len >= sizeof(struct icmp6_nodeinfo))
	mode = FQDN;
	else
	goto badlen;

	#define hostnamelen strlen(V_hostname)
	if (mode == FQDN) {
	#ifndef PULLDOWN_TEST
	IP6_EXTHDR_CHECK(m, off, sizeof(struct icmp6_nodeinfo),
	IPPROTO_DONE);
	#endif
	n = m_copy(m, 0, M_COPYALL);
	if (n)
	n = ni6_input(n, off);
	/* XXX meaningless if n == NULL */
	noff = sizeof(struct ip6_hdr);
	} else {
	u_char *p;
	int maxlen, maxhlen;

	/*
	* XXX: this combination of flags is pointless,
	* but should we keep this for compatibility?
	*/
	if ((V_icmp6_nodeinfo & 5) != 5)
	break;

	if (code != 0)
	goto badcode;
	maxlen = sizeof(nip6) + sizeof(nicmp6) + 4;
	if (maxlen >= MCLBYTES) {
	/* Give up remote */
	break;
	}
	MGETHDR(n, M_DONTWAIT, m->m_type);
	if (n && maxlen > MHLEN) {
	MCLGET(n, M_DONTWAIT);
	if ((n->m_flags & M_EXT) == 0) {
	m_free(n);
	n = NULL;
	}
	}
	if (n && !m_dup_pkthdr(n, m, M_DONTWAIT)) {
	/*
	* Previous code did a blind M_COPY_PKTHDR
	* and said "just for rcvif". If true, then
	* we could tolerate the dup failing (due to
	* the deep copy of the tag chain). For now
	* be conservative and just fail.
	*/
	m_free(n);
	n = NULL;
	}
	if (n == NULL) {
	/* Give up remote */
	break;
	}
	n->m_pkthdr.rcvif = NULL;
	n->m_len = 0;
	maxhlen = M_TRAILINGSPACE(n) - maxlen;
	mtx_lock(&hostname_mtx);
	if (maxhlen > hostnamelen)
	maxhlen = hostnamelen;
	/*
	* Copy IPv6 and ICMPv6 only.
	*/
	nip6 = mtod(n, struct ip6_hdr *);
	bcopy(ip6, nip6, sizeof(struct ip6_hdr));
	nicmp6 = (struct icmp6_hdr *)(nip6 + 1);
	bcopy(icmp6, nicmp6, sizeof(struct icmp6_hdr));
	p = (u_char *)(nicmp6 + 1);
	bzero(p, 4);
	bcopy(V_hostname, p + 4, maxhlen); /* meaningless TTL */
	mtx_unlock(&hostname_mtx);
	noff = sizeof(struct ip6_hdr);
	n->m_pkthdr.len = n->m_len = sizeof(struct ip6_hdr) +
	sizeof(struct icmp6_hdr) + 4 + maxhlen;
	nicmp6->icmp6_type = ICMP6_WRUREPLY;
	nicmp6->icmp6_code = 0;
	}
	#undef hostnamelen
	if (n) {
	V_icmp6stat.icp6s_reflect++;
	V_icmp6stat.icp6s_outhist[ICMP6_WRUREPLY]++;
	icmp6_reflect(n, noff);
	}
	break;
	}

	case ICMP6_WRUREPLY:
	if (code != 0)
	goto badcode;
	break;

	case ND_ROUTER_SOLICIT:
	icmp6_ifstat_inc(m->m_pkthdr.rcvif, ifs6_in_routersolicit);
	if (code != 0)
	goto badcode;
	if (icmp6len < sizeof(struct nd_router_solicit))
	goto badlen;
	if ((n = m_copym(m, 0, M_COPYALL, M_DONTWAIT)) == NULL) {
	/* give up local */
	nd6_rs_input(m, off, icmp6len);
	m = NULL;
	goto freeit;
	}
	nd6_rs_input(n, off, icmp6len);
	/* m stays. */
	break;

	case ND_ROUTER_ADVERT:
	icmp6_ifstat_inc(m->m_pkthdr.rcvif, ifs6_in_routeradvert);
	if (code != 0)
	goto badcode;
	if (icmp6len < sizeof(struct nd_router_advert))
	goto badlen;
	if ((n = m_copym(m, 0, M_COPYALL, M_DONTWAIT)) == NULL) {
	/* give up local */
	nd6_ra_input(m, off, icmp6len);
	m = NULL;
	goto freeit;
	}
	nd6_ra_input(n, off, icmp6len);
	/* m stays. */
	break;

	case ND_NEIGHBOR_SOLICIT:
	icmp6_ifstat_inc(m->m_pkthdr.rcvif, ifs6_in_neighborsolicit);
	if (code != 0)
	goto badcode;
	if (icmp6len < sizeof(struct nd_neighbor_solicit))
	goto badlen;
	if ((n = m_copym(m, 0, M_COPYALL, M_DONTWAIT)) == NULL) {
	/* give up local */
	nd6_ns_input(m, off, icmp6len);
	m = NULL;
	goto freeit;
	}
	nd6_ns_input(n, off, icmp6len);
	/* m stays. */
	break;

	case ND_NEIGHBOR_ADVERT:
	icmp6_ifstat_inc(m->m_pkthdr.rcvif, ifs6_in_neighboradvert);
	if (code != 0)
	goto badcode;
	if (icmp6len < sizeof(struct nd_neighbor_advert))
	goto badlen;
	if ((n = m_copym(m, 0, M_COPYALL, M_DONTWAIT)) == NULL) {
	/* give up local */
	nd6_na_input(m, off, icmp6len);
	m = NULL;
	goto freeit;
	}
	nd6_na_input(n, off, icmp6len);
	/* m stays. */
	break;

	case ND_REDIRECT:
	icmp6_ifstat_inc(m->m_pkthdr.rcvif, ifs6_in_redirect);
	if (code != 0)
	goto badcode;
	if (icmp6len < sizeof(struct nd_redirect))
	goto badlen;
	if ((n = m_copym(m, 0, M_COPYALL, M_DONTWAIT)) == NULL) {
	/* give up local */
	icmp6_redirect_input(m, off);
	m = NULL;
	goto freeit;
	}
	icmp6_redirect_input(n, off);
	/* m stays. */
	break;

	case ICMP6_ROUTER_RENUMBERING:
	if (code != ICMP6_ROUTER_RENUMBERING_COMMAND &&
	code != ICMP6_ROUTER_RENUMBERING_RESULT)
	goto badcode;
	if (icmp6len < sizeof(struct icmp6_router_renum))
	goto badlen;
	break;

	default:
	nd6log((LOG_DEBUG,
	"icmp6_input: unknown type %d(src=%s, dst=%s, ifid=%d)\n",
	icmp6->icmp6_type, ip6_sprintf(ip6bufs, &ip6->ip6_src),
	ip6_sprintf(ip6bufd, &ip6->ip6_dst),
	m->m_pkthdr.rcvif ? m->m_pkthdr.rcvif->if_index : 0));
	if (icmp6->icmp6_type < ICMP6_ECHO_REQUEST) {
	/* ICMPv6 error: MUST deliver it by spec... */
	code = PRC_NCMDS;
	/* deliver */
	} else {
	/* ICMPv6 informational: MUST not deliver */
	break;
	}
	deliver:
	if (icmp6_notify_error(&m, off, icmp6len, code)) {
	/* In this case, m should've been freed. */
	return (IPPROTO_DONE);
	}
	break;

	badcode:
	V_icmp6stat.icp6s_badcode++;
	break;

	badlen:
	V_icmp6stat.icp6s_badlen++;
	break;
	}

	/* deliver the packet to appropriate sockets */
	icmp6_rip6_input(&m, *offp);

	return IPPROTO_DONE;

	freeit:
	m_freem(m);
	return IPPROTO_DONE;
	}

	static int
	icmp6_notify_error(struct mbuf **mp, int off, int icmp6len, int code)
	{
	INIT_VNET_INET6(curvnet);
	struct mbuf m = mp;
	struct icmp6_hdr *icmp6;
	struct ip6_hdr *eip6;
	u_int32_t notifymtu;
	struct sockaddr_in6 icmp6src, icmp6dst;

	if (icmp6len < sizeof(struct icmp6_hdr) + sizeof(struct ip6_hdr)) {
	V_icmp6stat.icp6s_tooshort++;
	goto freeit;
	}
	#ifndef PULLDOWN_TEST
	IP6_EXTHDR_CHECK(m, off,
	sizeof(struct icmp6_hdr) + sizeof(struct ip6_hdr), -1);
	icmp6 = (struct icmp6_hdr *)(mtod(m, caddr_t) + off);
	#else
	IP6_EXTHDR_GET(icmp6, struct icmp6_hdr *, m, off,
	sizeof(*icmp6) + sizeof(struct ip6_hdr));
	if (icmp6 == NULL) {
	V_icmp6stat.icp6s_tooshort++;
	return (-1);
	}
	#endif
	eip6 = (struct ip6_hdr *)(icmp6 + 1);

	/* Detect the upper level protocol */
	{
	void (ctlfunc)(int, struct sockaddr , void *);
	u_int8_t nxt = eip6->ip6_nxt;
	int eoff = off + sizeof(struct icmp6_hdr) +
	sizeof(struct ip6_hdr);
	struct ip6ctlparam ip6cp;
	struct in6_addr *finaldst = NULL;
	int icmp6type = icmp6->icmp6_type;
	struct ip6_frag *fh;
	struct ip6_rthdr *rth;
	struct ip6_rthdr0 *rth0;
	int rthlen;

	while (1) { /* XXX: should avoid infinite loop explicitly? */
	struct ip6_ext *eh;

	switch (nxt) {
	case IPPROTO_HOPOPTS:
	case IPPROTO_DSTOPTS:
	case IPPROTO_AH:
	#ifndef PULLDOWN_TEST
	IP6_EXTHDR_CHECK(m, 0,
	eoff + sizeof(struct ip6_ext), -1);
	eh = (struct ip6_ext *)(mtod(m, caddr_t) + eoff);
	#else
	IP6_EXTHDR_GET(eh, struct ip6_ext *, m,
	eoff, sizeof(*eh));
	if (eh == NULL) {
	V_icmp6stat.icp6s_tooshort++;
	return (-1);
	}
	#endif

	if (nxt == IPPROTO_AH)
	eoff += (eh->ip6e_len + 2) << 2;
	else
	eoff += (eh->ip6e_len + 1) << 3;
	nxt = eh->ip6e_nxt;
	break;
	case IPPROTO_ROUTING:
	/*
	* When the erroneous packet contains a
	* routing header, we should examine the
	* header to determine the final destination.
	* Otherwise, we can't properly update
	* information that depends on the final
	* destination (e.g. path MTU).
	*/
	#ifndef PULLDOWN_TEST
	IP6_EXTHDR_CHECK(m, 0, eoff + sizeof(*rth), -1);
	rth = (struct ip6_rthdr *)
	(mtod(m, caddr_t) + eoff);
	#else
	IP6_EXTHDR_GET(rth, struct ip6_rthdr *, m,
	eoff, sizeof(*rth));
	if (rth == NULL) {
	V_icmp6stat.icp6s_tooshort++;
	return (-1);
	}
	#endif
	rthlen = (rth->ip6r_len + 1) << 3;
	/*
	* XXX: currently there is no
	* officially defined type other
	* than type-0.
	* Note that if the segment left field
	* is 0, all intermediate hops must
	* have been passed.
	*/
	if (rth->ip6r_segleft &&
	rth->ip6r_type == IPV6_RTHDR_TYPE_0) {
	int hops;

	#ifndef PULLDOWN_TEST
	IP6_EXTHDR_CHECK(m, 0, eoff + rthlen, -1);
	rth0 = (struct ip6_rthdr0 *)
	(mtod(m, caddr_t) + eoff);
	#else
	IP6_EXTHDR_GET(rth0,
	struct ip6_rthdr0 *, m,
	eoff, rthlen);
	if (rth0 == NULL) {
	V_icmp6stat.icp6s_tooshort++;
	return (-1);
	}
	#endif
	/* just ignore a bogus header */
	if ((rth0->ip6r0_len % 2) == 0 &&
	(hops = rth0->ip6r0_len/2))
	finaldst = (struct in6_addr *)(rth0 + 1) + (hops - 1);
	}
	eoff += rthlen;
	nxt = rth->ip6r_nxt;
	break;
	case IPPROTO_FRAGMENT:
	#ifndef PULLDOWN_TEST
	IP6_EXTHDR_CHECK(m, 0, eoff +
	sizeof(struct ip6_frag), -1);
	fh = (struct ip6_frag *)(mtod(m, caddr_t) +
	eoff);
	#else
	IP6_EXTHDR_GET(fh, struct ip6_frag *, m,
	eoff, sizeof(*fh));
	if (fh == NULL) {
	V_icmp6stat.icp6s_tooshort++;
	return (-1);
	}
	#endif
	/*
	* Data after a fragment header is meaningless
	* unless it is the first fragment, but
	* we'll go to the notify label for path MTU
	* discovery.
	*/
	if (fh->ip6f_offlg & IP6F_OFF_MASK)
	goto notify;

	eoff += sizeof(struct ip6_frag);
	nxt = fh->ip6f_nxt;
	break;
	default:
	/*
	* This case includes ESP and the No Next
	* Header. In such cases going to the notify
	* label does not have any meaning
	* (i.e. ctlfunc will be NULL), but we go
	* anyway since we might have to update
	* path MTU information.
	*/
	goto notify;
	}
	}
	notify:
	#ifndef PULLDOWN_TEST
	icmp6 = (struct icmp6_hdr *)(mtod(m, caddr_t) + off);
	#else
	IP6_EXTHDR_GET(icmp6, struct icmp6_hdr *, m, off,
	sizeof(*icmp6) + sizeof(struct ip6_hdr));
	if (icmp6 == NULL) {
	V_icmp6stat.icp6s_tooshort++;
	return (-1);
	}
	#endif

	/*
	* retrieve parameters from the inner IPv6 header, and convert
	* them into sockaddr structures.
	* XXX: there is no guarantee that the source or destination
	* addresses of the inner packet are in the same scope as
	* the addresses of the icmp packet. But there is no other
	* way to determine the zone.
	*/
	eip6 = (struct ip6_hdr *)(icmp6 + 1);

	bzero(&icmp6dst, sizeof(icmp6dst));
	icmp6dst.sin6_len = sizeof(struct sockaddr_in6);
	icmp6dst.sin6_family = AF_INET6;
	if (finaldst == NULL)
	icmp6dst.sin6_addr = eip6->ip6_dst;
	else
	icmp6dst.sin6_addr = *finaldst;
	if (in6_setscope(&icmp6dst.sin6_addr, m->m_pkthdr.rcvif, NULL))
	goto freeit;
	bzero(&icmp6src, sizeof(icmp6src));
	icmp6src.sin6_len = sizeof(struct sockaddr_in6);
	icmp6src.sin6_family = AF_INET6;
	icmp6src.sin6_addr = eip6->ip6_src;
	if (in6_setscope(&icmp6src.sin6_addr, m->m_pkthdr.rcvif, NULL))
	goto freeit;
	icmp6src.sin6_flowinfo =
	(eip6->ip6_flow & IPV6_FLOWLABEL_MASK);

	if (finaldst == NULL)
	finaldst = &eip6->ip6_dst;
	ip6cp.ip6c_m = m;
	ip6cp.ip6c_icmp6 = icmp6;
	ip6cp.ip6c_ip6 = (struct ip6_hdr *)(icmp6 + 1);
	ip6cp.ip6c_off = eoff;
	ip6cp.ip6c_finaldst = finaldst;
	ip6cp.ip6c_src = &icmp6src;
	ip6cp.ip6c_nxt = nxt;

	if (icmp6type == ICMP6_PACKET_TOO_BIG) {
	notifymtu = ntohl(icmp6->icmp6_mtu);
	ip6cp.ip6c_cmdarg = (void *)&notifymtu;
	icmp6_mtudisc_update(&ip6cp, 1); /XXX/
	}

	ctlfunc = (void ()(int, struct sockaddr , void *))
	(inet6sw[ip6_protox[nxt]].pr_ctlinput);
	if (ctlfunc) {
	(void) (ctlfunc)(code, (struct sockaddr )&icmp6dst,
	&ip6cp);
	}
	}
	*mp = m;
	return (0);

	freeit:
	m_freem(m);
	return (-1);
	}

	void
	icmp6_mtudisc_update(struct ip6ctlparam *ip6cp, int validated)
	{
	INIT_VNET_INET6(curvnet);
	struct in6_addr *dst = ip6cp->ip6c_finaldst;
	struct icmp6_hdr *icmp6 = ip6cp->ip6c_icmp6;
	struct mbuf m = ip6cp->ip6c_m; / will be necessary for scope issue */
	u_int mtu = ntohl(icmp6->icmp6_mtu);
	struct in_conninfo inc;

	#if 0
	/*
	* RFC2460 section 5, last paragraph.
	* even though minimum link MTU for IPv6 is IPV6_MMTU,
	* we may see ICMPv6 too big with mtu < IPV6_MMTU
	* due to packet translator in the middle.
	* see ip6_output() and ip6_getpmtu() "alwaysfrag" case for
	* special handling.
	*/
	if (mtu < IPV6_MMTU)
	return;
	#endif

	/*
	* we reject ICMPv6 too big with abnormally small value.
	* XXX what is the good definition of "abnormally small"?
	*/
	if (mtu < sizeof(struct ip6_hdr) + sizeof(struct ip6_frag) + 8)
	return;

	if (!validated)
	return;

	/*
	* In case the suggested mtu is less than IPV6_MMTU, we
	* only need to remember that it was for above mentioned
	* "alwaysfrag" case.
	* Try to be as close to the spec as possible.
	*/
	if (mtu < IPV6_MMTU)
	mtu = IPV6_MMTU - 8;

	bzero(&inc, sizeof(inc));
	inc.inc_flags = 1; /* IPv6 */
	inc.inc6_faddr = *dst;
	if (in6_setscope(&inc.inc6_faddr, m->m_pkthdr.rcvif, NULL))
	return;

	if (mtu < tcp_maxmtu6(&inc, NULL)) {
	tcp_hc_updatemtu(&inc, mtu);
	V_icmp6stat.icp6s_pmtuchg++;
	}
	}

	/*
	* Process a Node Information Query packet, based on
	* draft-ietf-ipngwg-icmp-name-lookups-07.
	*
	* Spec incompatibilities:
	* - IPv6 Subject address handling
	* - IPv4 Subject address handling support missing
	* - Proxy reply (answer even if it's not for me)
	* - joins NI group address at in6_ifattach() time only, does not cope
	* with hostname changes by sethostname(3)
	*/
	#define hostnamelen strlen(V_hostname)
	static struct mbuf *
	ni6_input(struct mbuf *m, int off)
	{
	INIT_VNET_INET6(curvnet);
	INIT_VPROCG(TD_TO_VPROCG(curthread)); /* XXX V_hostname needs this */
	struct icmp6_nodeinfo ni6, nni6;
	struct mbuf *n = NULL;
	u_int16_t qtype;
	int subjlen;
	int replylen = sizeof(struct ip6_hdr) + sizeof(struct icmp6_nodeinfo);
	struct ni_reply_fqdn *fqdn;
	int addrs; /* for NI_QTYPE_NODEADDR */
	struct ifnet ifp = NULL; / for NI_QTYPE_NODEADDR */
	struct in6_addr in6_subj; /* subject address */
	struct ip6_hdr *ip6;
	int oldfqdn = 0; /* if 1, return pascal string (03 draft) */
	char *subj = NULL;
	struct in6_ifaddr *ia6 = NULL;

	ip6 = mtod(m, struct ip6_hdr *);
	#ifndef PULLDOWN_TEST
	ni6 = (struct icmp6_nodeinfo *)(mtod(m, caddr_t) + off);
	#else
	IP6_EXTHDR_GET(ni6, struct icmp6_nodeinfo , m, off, sizeof(ni6));
	if (ni6 == NULL) {
	/* m is already reclaimed */
	return (NULL);
	}
	#endif

	/*
	* Validate IPv6 source address.
	* The default configuration MUST be to refuse answering queries from
	* global-scope addresses according to RFC4602.
	* Notes:
	* - it's not very clear what "refuse" means; this implementation
	* simply drops it.
	* - it's not very easy to identify global-scope (unicast) addresses
	* since there are many prefixes for them. It should be safer
	* and in practice sufficient to check "all" but loopback and
	* link-local (note that site-local unicast was deprecated and
	* ULA is defined as global scope-wise)
	*/
	if ((V_icmp6_nodeinfo & ICMP6_NODEINFO_GLOBALOK) == 0 &&
	!IN6_IS_ADDR_LOOPBACK(&ip6->ip6_src) &&
	!IN6_IS_ADDR_LINKLOCAL(&ip6->ip6_src))
	goto bad;

	/*
	* Validate IPv6 destination address.
	*
	* The Responder must discard the Query without further processing
	* unless it is one of the Responder's unicast or anycast addresses, or
	* a link-local scope multicast address which the Responder has joined.
	* [RFC4602, Section 5.]
	*/
	if (IN6_IS_ADDR_MULTICAST(&ip6->ip6_dst)) {
	if (!IN6_IS_ADDR_MC_LINKLOCAL(&ip6->ip6_dst))
	goto bad;
	/* else it's a link-local multicast, fine */
	} else { /* unicast or anycast */
	if ((ia6 = ip6_getdstifaddr(m)) == NULL)
	goto bad; /* XXX impossible */

	if ((ia6->ia6_flags & IN6_IFF_TEMPORARY) &&
	!(V_icmp6_nodeinfo & ICMP6_NODEINFO_TMPADDROK)) {
	nd6log((LOG_DEBUG, "ni6_input: ignore node info to "
	"a temporary address in %s:%d",
	__FILE__, __LINE__));
	goto bad;
	}
	}

	/* validate query Subject field. */
	qtype = ntohs(ni6->ni_qtype);
	subjlen = m->m_pkthdr.len - off - sizeof(struct icmp6_nodeinfo);
	switch (qtype) {
	case NI_QTYPE_NOOP:
	case NI_QTYPE_SUPTYPES:
	/* 07 draft */
	if (ni6->ni_code == ICMP6_NI_SUBJ_FQDN && subjlen == 0)
	break;
	/* FALLTHROUGH */
	case NI_QTYPE_FQDN:
	case NI_QTYPE_NODEADDR:
	case NI_QTYPE_IPV4ADDR:
	switch (ni6->ni_code) {
	case ICMP6_NI_SUBJ_IPV6:
	#if ICMP6_NI_SUBJ_IPV6 != 0
	case 0:
	#endif
	/*
	* backward compatibility - try to accept 03 draft
	* format, where no Subject is present.
	*/
	if (qtype == NI_QTYPE_FQDN && ni6->ni_code == 0 &&
	subjlen == 0) {
	oldfqdn++;
	break;
	}
	#if ICMP6_NI_SUBJ_IPV6 != 0
	if (ni6->ni_code != ICMP6_NI_SUBJ_IPV6)
	goto bad;
	#endif

	if (subjlen != sizeof(struct in6_addr))
	goto bad;

	/*
	* Validate Subject address.
	*
	* Not sure what exactly "address belongs to the node"
	* means in the spec, is it just unicast, or what?
	*
	* At this moment we consider Subject address as
	* "belong to the node" if the Subject address equals
	* to the IPv6 destination address; validation for
	* IPv6 destination address should have done enough
	* check for us.
	*
	* We do not do proxy at this moment.
	*/
	/* m_pulldown instead of copy? */
	m_copydata(m, off + sizeof(struct icmp6_nodeinfo),
	subjlen, (caddr_t)&in6_subj);
	if (in6_setscope(&in6_subj, m->m_pkthdr.rcvif, NULL))
	goto bad;

	subj = (char *)&in6_subj;
	if (IN6_ARE_ADDR_EQUAL(&ip6->ip6_dst, &in6_subj))
	break;

	/*
	* XXX if we are to allow other cases, we should really
	* be careful about scope here.
	* basically, we should disallow queries toward IPv6
	* destination X with subject Y,
	* if scope(X) > scope(Y).
	* if we allow scope(X) > scope(Y), it will result in
	* information leakage across scope boundary.
	*/
	goto bad;

	case ICMP6_NI_SUBJ_FQDN:
	/*
	* Validate Subject name with gethostname(3).
	*
	* The behavior may need some debate, since:
	* - we are not sure if the node has FQDN as
	* hostname (returned by gethostname(3)).
	* - the code does wildcard match for truncated names.
	* however, we are not sure if we want to perform
	* wildcard match, if gethostname(3) side has
	* truncated hostname.
	*/
	mtx_lock(&hostname_mtx);
	n = ni6_nametodns(V_hostname, hostnamelen, 0);
	mtx_unlock(&hostname_mtx);
	if (!n \|\| n->m_next \|\| n->m_len == 0)
	goto bad;
	IP6_EXTHDR_GET(subj, char *, m,
	off + sizeof(struct icmp6_nodeinfo), subjlen);
	if (subj == NULL)
	goto bad;
	if (!ni6_dnsmatch(subj, subjlen, mtod(n, const char *),
	n->m_len)) {
	goto bad;
	}
	m_freem(n);
	n = NULL;
	break;

	case ICMP6_NI_SUBJ_IPV4: /* XXX: to be implemented? */
	default:
	goto bad;
	}
	break;
	}

	/* refuse based on configuration. XXX ICMP6_NI_REFUSED? */
	switch (qtype) {
	case NI_QTYPE_FQDN:
	if ((V_icmp6_nodeinfo & ICMP6_NODEINFO_FQDNOK) == 0)
	goto bad;
	break;
	case NI_QTYPE_NODEADDR:
	case NI_QTYPE_IPV4ADDR:
	if ((V_icmp6_nodeinfo & ICMP6_NODEINFO_NODEADDROK) == 0)
	goto bad;
	break;
	}

	/* guess reply length */
	switch (qtype) {
	case NI_QTYPE_NOOP:
	break; /* no reply data */
	case NI_QTYPE_SUPTYPES:
	replylen += sizeof(u_int32_t);
	break;
	case NI_QTYPE_FQDN:
	/* XXX will append an mbuf */
	replylen += offsetof(struct ni_reply_fqdn, ni_fqdn_namelen);
	break;
	case NI_QTYPE_NODEADDR:
	addrs = ni6_addrs(ni6, m, &ifp, (struct in6_addr *)subj);
	if ((replylen += addrs * (sizeof(struct in6_addr) +
	sizeof(u_int32_t))) > MCLBYTES)
	replylen = MCLBYTES; /* XXX: will truncate pkt later */
	break;
	case NI_QTYPE_IPV4ADDR:
	/* unsupported - should respond with unknown Qtype? */
	break;
	default:
	/*
	* XXX: We must return a reply with the ICMP6 code
	* `unknown Qtype' in this case. However we regard the case
	* as an FQDN query for backward compatibility.
	* Older versions set a random value to this field,
	* so it rarely varies in the defined qtypes.
	* But the mechanism is not reliable...
	* maybe we should obsolete older versions.
	*/
	qtype = NI_QTYPE_FQDN;
	/* XXX will append an mbuf */
	replylen += offsetof(struct ni_reply_fqdn, ni_fqdn_namelen);
	oldfqdn++;
	break;
	}

	/* allocate an mbuf to reply. */
	MGETHDR(n, M_DONTWAIT, m->m_type);
	if (n == NULL) {
	m_freem(m);
	return (NULL);
	}
	M_MOVE_PKTHDR(n, m); /* just for recvif */
	if (replylen > MHLEN) {
	if (replylen > MCLBYTES) {
	/*
	* XXX: should we try to allocate more? But MCLBYTES
	* is probably much larger than IPV6_MMTU...
	*/
	goto bad;
	}
	MCLGET(n, M_DONTWAIT);
	if ((n->m_flags & M_EXT) == 0) {
	goto bad;
	}
	}
	n->m_pkthdr.len = n->m_len = replylen;

	/* copy mbuf header and IPv6 + Node Information base headers */
	bcopy(mtod(m, caddr_t), mtod(n, caddr_t), sizeof(struct ip6_hdr));
	nni6 = (struct icmp6_nodeinfo )(mtod(n, struct ip6_hdr ) + 1);
	bcopy((caddr_t)ni6, (caddr_t)nni6, sizeof(struct icmp6_nodeinfo));

	/* qtype dependent procedure */
	switch (qtype) {
	case NI_QTYPE_NOOP:
	nni6->ni_code = ICMP6_NI_SUCCESS;
	nni6->ni_flags = 0;
	break;
	case NI_QTYPE_SUPTYPES:
	{
	u_int32_t v;
	nni6->ni_code = ICMP6_NI_SUCCESS;
	nni6->ni_flags = htons(0x0000); /* raw bitmap */
	/* supports NOOP, SUPTYPES, FQDN, and NODEADDR */
	v = (u_int32_t)htonl(0x0000000f);
	bcopy(&v, nni6 + 1, sizeof(u_int32_t));
	break;
	}
	case NI_QTYPE_FQDN:
	nni6->ni_code = ICMP6_NI_SUCCESS;
	fqdn = (struct ni_reply_fqdn *)(mtod(n, caddr_t) +
	sizeof(struct ip6_hdr) + sizeof(struct icmp6_nodeinfo));
	nni6->ni_flags = 0; /* XXX: meaningless TTL */
	fqdn->ni_fqdn_ttl = 0; /* ditto. */
	/*
	* XXX do we really have FQDN in variable "hostname"?
	*/
	mtx_lock(&hostname_mtx);
	n->m_next = ni6_nametodns(V_hostname, hostnamelen, oldfqdn);
	mtx_unlock(&hostname_mtx);
	if (n->m_next == NULL)
	goto bad;
	/* XXX we assume that n->m_next is not a chain */
	if (n->m_next->m_next != NULL)
	goto bad;
	n->m_pkthdr.len += n->m_next->m_len;
	break;
	case NI_QTYPE_NODEADDR:
	{
	int lenlim, copied;

	nni6->ni_code = ICMP6_NI_SUCCESS;
	n->m_pkthdr.len = n->m_len =
	sizeof(struct ip6_hdr) + sizeof(struct icmp6_nodeinfo);
	lenlim = M_TRAILINGSPACE(n);
	copied = ni6_store_addrs(ni6, nni6, ifp, lenlim);
	/* XXX: reset mbuf length */
	n->m_pkthdr.len = n->m_len = sizeof(struct ip6_hdr) +
	sizeof(struct icmp6_nodeinfo) + copied;
	break;
	}
	default:
	break; /* XXX impossible! */
	}

	nni6->ni_type = ICMP6_NI_REPLY;
	m_freem(m);
	return (n);

	bad:
	m_freem(m);
	if (n)
	m_freem(n);
	return (NULL);
	}
	#undef hostnamelen

	/*
	* make a mbuf with DNS-encoded string. no compression support.
	*
	* XXX names with less than 2 dots (like "foo" or "foo.section") will be
	* treated as truncated name (two \0 at the end). this is a wild guess.
	*
	* old - return pascal string if non-zero
	*/
	static struct mbuf *
	ni6_nametodns(const char *name, int namelen, int old)
	{
	struct mbuf *m;
	char cp, ep;
	const char p, q;
	int i, len, nterm;

	if (old)
	len = namelen + 1;
	else
	len = MCLBYTES;

	/* because MAXHOSTNAMELEN is usually 256, we use cluster mbuf */
	MGET(m, M_DONTWAIT, MT_DATA);
	if (m && len > MLEN) {
	MCLGET(m, M_DONTWAIT);
	if ((m->m_flags & M_EXT) == 0)
	goto fail;
	}
	if (!m)
	goto fail;
	m->m_next = NULL;

	if (old) {
	m->m_len = len;
	mtod(m, char ) = namelen;
	bcopy(name, mtod(m, char *) + 1, namelen);
	return m;
	} else {
	m->m_len = 0;
	cp = mtod(m, char *);
	ep = mtod(m, char *) + M_TRAILINGSPACE(m);

	/* if not certain about my name, return empty buffer */
	if (namelen == 0)
	return m;

	/*
	* guess if it looks like shortened hostname, or FQDN.
	* shortened hostname needs two trailing "\0".
	*/
	i = 0;
	for (p = name; p < name + namelen; p++) {
	if (p && p == '.')
	i++;
	}
	if (i < 2)
	nterm = 2;
	else
	nterm = 1;

	p = name;
	while (cp < ep && p < name + namelen) {
	i = 0;
	for (q = p; q < name + namelen && q && q != '.'; q++)
	i++;
	/* result does not fit into mbuf */
	if (cp + i + 1 >= ep)
	goto fail;
	/*
	* DNS label length restriction, RFC1035 page 8.
	* "i == 0" case is included here to avoid returning
	* 0-length label on "foo..bar".
	*/
	if (i <= 0 \|\| i >= 64)
	goto fail;
	*cp++ = i;
	bcopy(p, cp, i);
	cp += i;
	p = q;
	if (p < name + namelen && *p == '.')
	p++;
	}
	/* termination */
	if (cp + nterm >= ep)
	goto fail;
	while (nterm-- > 0)
	*cp++ = '\0';
	m->m_len = cp - mtod(m, char *);
	return m;
	}

	panic("should not reach here");
	/* NOTREACHED */

	fail:
	if (m)
	m_freem(m);
	return NULL;
	}

	/*
	* check if two DNS-encoded string matches. takes care of truncated
	* form (with \0\0 at the end). no compression support.
	* XXX upper/lowercase match (see RFC2065)
	*/
	static int
	ni6_dnsmatch(const char a, int alen, const char b, int blen)
	{
	const char a0, b0;
	int l;

	/* simplest case - need validation? */
	if (alen == blen && bcmp(a, b, alen) == 0)
	return 1;

	a0 = a;
	b0 = b;

	/* termination is mandatory */
	if (alen < 2 \|\| blen < 2)
	return 0;
	if (a0[alen - 1] != '\0' \|\| b0[blen - 1] != '\0')
	return 0;
	alen--;
	blen--;

	while (a - a0 < alen && b - b0 < blen) {
	if (a - a0 + 1 > alen \|\| b - b0 + 1 > blen)
	return 0;

	if ((signed char)a[0] < 0 \|\| (signed char)b[0] < 0)
	return 0;
	/* we don't support compression yet */
	if (a[0] >= 64 \|\| b[0] >= 64)
	return 0;

	/* truncated case */
	if (a[0] == 0 && a - a0 == alen - 1)
	return 1;
	if (b[0] == 0 && b - b0 == blen - 1)
	return 1;
	if (a[0] == 0 \|\| b[0] == 0)
	return 0;

	if (a[0] != b[0])
	return 0;
	l = a[0];
	if (a - a0 + 1 + l > alen \|\| b - b0 + 1 + l > blen)
	return 0;
	if (bcmp(a + 1, b + 1, l) != 0)
	return 0;

	a += 1 + l;
	b += 1 + l;
	}

	if (a - a0 == alen && b - b0 == blen)
	return 1;
	else
	return 0;
	}

	/*
	* calculate the number of addresses to be returned in the node info reply.
	*/
	static int
	ni6_addrs(struct icmp6_nodeinfo ni6, struct mbuf m, struct ifnet **ifpp,
	struct in6_addr *subj)
	{
	INIT_VNET_NET(curvnet);
	INIT_VNET_INET6(curvnet);
	struct ifnet *ifp;
	struct in6_ifaddr *ifa6;
	struct ifaddr *ifa;
	int addrs = 0, addrsofif, iffound = 0;
	int niflags = ni6->ni_flags;

	if ((niflags & NI_NODEADDR_FLAG_ALL) == 0) {
	switch (ni6->ni_code) {
	case ICMP6_NI_SUBJ_IPV6:
	if (subj == NULL) /* must be impossible... */
	return (0);
	break;
	default:
	/*
	* XXX: we only support IPv6 subject address for
	* this Qtype.
	*/
	return (0);
	}
	}

	IFNET_RLOCK();
	for (ifp = TAILQ_FIRST(&V_ifnet); ifp; ifp = TAILQ_NEXT(ifp, if_list)) {
	addrsofif = 0;
	TAILQ_FOREACH(ifa, &ifp->if_addrlist, ifa_list) {
	if (ifa->ifa_addr->sa_family != AF_INET6)
	continue;
	ifa6 = (struct in6_ifaddr *)ifa;

	if ((niflags & NI_NODEADDR_FLAG_ALL) == 0 &&
	IN6_ARE_ADDR_EQUAL(subj, &ifa6->ia_addr.sin6_addr))
	iffound = 1;

	/*
	* IPv4-mapped addresses can only be returned by a
	* Node Information proxy, since they represent
	* addresses of IPv4-only nodes, which perforce do
	* not implement this protocol.
	* [icmp-name-lookups-07, Section 5.4]
	* So we don't support NI_NODEADDR_FLAG_COMPAT in
	* this function at this moment.
	*/

	/* What do we have to do about ::1? */
	switch (in6_addrscope(&ifa6->ia_addr.sin6_addr)) {
	case IPV6_ADDR_SCOPE_LINKLOCAL:
	if ((niflags & NI_NODEADDR_FLAG_LINKLOCAL) == 0)
	continue;
	break;
	case IPV6_ADDR_SCOPE_SITELOCAL:
	if ((niflags & NI_NODEADDR_FLAG_SITELOCAL) == 0)
	continue;
	break;
	case IPV6_ADDR_SCOPE_GLOBAL:
	if ((niflags & NI_NODEADDR_FLAG_GLOBAL) == 0)
	continue;
	break;
	default:
	continue;
	}

	/*
	* check if anycast is okay.
	* XXX: just experimental. not in the spec.
	*/
	if ((ifa6->ia6_flags & IN6_IFF_ANYCAST) != 0 &&
	(niflags & NI_NODEADDR_FLAG_ANYCAST) == 0)
	continue; /* we need only unicast addresses */
	if ((ifa6->ia6_flags & IN6_IFF_TEMPORARY) != 0 &&
	(V_icmp6_nodeinfo & ICMP6_NODEINFO_TMPADDROK) == 0) {
	continue;
	}
	addrsofif++; /* count the address */
	}
	if (iffound) {
	*ifpp = ifp;
	IFNET_RUNLOCK();
	return (addrsofif);
	}

	addrs += addrsofif;
	}
	IFNET_RUNLOCK();

	return (addrs);
	}

	static int
	ni6_store_addrs(struct icmp6_nodeinfo ni6, struct icmp6_nodeinfo nni6,
	struct ifnet *ifp0, int resid)
	{
	INIT_VNET_NET(curvnet);
	INIT_VNET_INET6(curvnet);
	struct ifnet *ifp = ifp0 ? ifp0 : TAILQ_FIRST(&V_ifnet);
	struct in6_ifaddr *ifa6;
	struct ifaddr *ifa;
	struct ifnet *ifp_dep = NULL;
	int copied = 0, allow_deprecated = 0;
	u_char cp = (u_char )(nni6 + 1);
	int niflags = ni6->ni_flags;
	u_int32_t ltime;

	if (ifp0 == NULL && !(niflags & NI_NODEADDR_FLAG_ALL))
	return (0); /* needless to copy */

	IFNET_RLOCK();
	again:

	for (; ifp; ifp = TAILQ_NEXT(ifp, if_list)) {
	for (ifa = ifp->if_addrlist.tqh_first; ifa;
	ifa = ifa->ifa_list.tqe_next) {
	if (ifa->ifa_addr->sa_family != AF_INET6)
	continue;
	ifa6 = (struct in6_ifaddr *)ifa;

	if ((ifa6->ia6_flags & IN6_IFF_DEPRECATED) != 0 &&
	allow_deprecated == 0) {
	/*
	* prefererred address should be put before
	* deprecated addresses.
	*/

	/* record the interface for later search */
	if (ifp_dep == NULL)
	ifp_dep = ifp;

	continue;
	} else if ((ifa6->ia6_flags & IN6_IFF_DEPRECATED) == 0 &&
	allow_deprecated != 0)
	continue; /* we now collect deprecated addrs */

	/* What do we have to do about ::1? */
	switch (in6_addrscope(&ifa6->ia_addr.sin6_addr)) {
	case IPV6_ADDR_SCOPE_LINKLOCAL:
	if ((niflags & NI_NODEADDR_FLAG_LINKLOCAL) == 0)
	continue;
	break;
	case IPV6_ADDR_SCOPE_SITELOCAL:
	if ((niflags & NI_NODEADDR_FLAG_SITELOCAL) == 0)
	continue;
	break;
	case IPV6_ADDR_SCOPE_GLOBAL:
	if ((niflags & NI_NODEADDR_FLAG_GLOBAL) == 0)
	continue;
	break;
	default:
	continue;
	}

	/*
	* check if anycast is okay.
	* XXX: just experimental. not in the spec.
	*/
	if ((ifa6->ia6_flags & IN6_IFF_ANYCAST) != 0 &&
	(niflags & NI_NODEADDR_FLAG_ANYCAST) == 0)
	continue;
	if ((ifa6->ia6_flags & IN6_IFF_TEMPORARY) != 0 &&
	(V_icmp6_nodeinfo & ICMP6_NODEINFO_TMPADDROK) == 0) {
	continue;
	}

	/* now we can copy the address */
	if (resid < sizeof(struct in6_addr) +
	sizeof(u_int32_t)) {
	/*
	* We give up much more copy.
	* Set the truncate flag and return.
	*/
	nni6->ni_flags \|= NI_NODEADDR_FLAG_TRUNCATE;
	IFNET_RUNLOCK();
	return (copied);
	}

	/*
	* Set the TTL of the address.
	* The TTL value should be one of the following
	* according to the specification:
	*
	* 1. The remaining lifetime of a DHCP lease on the
	* address, or
	* 2. The remaining Valid Lifetime of a prefix from
	* which the address was derived through Stateless
	* Autoconfiguration.
	*
	* Note that we currently do not support stateful
	* address configuration by DHCPv6, so the former
	* case can't happen.
	*/
	if (ifa6->ia6_lifetime.ia6t_expire == 0)
	ltime = ND6_INFINITE_LIFETIME;
	else {
	if (ifa6->ia6_lifetime.ia6t_expire >
	time_second)
	ltime = htonl(ifa6->ia6_lifetime.ia6t_expire - time_second);
	else
	ltime = 0;
	}

	bcopy(&ltime, cp, sizeof(u_int32_t));
	cp += sizeof(u_int32_t);

	/* copy the address itself */
	bcopy(&ifa6->ia_addr.sin6_addr, cp,
	sizeof(struct in6_addr));
	in6_clearscope((struct in6_addr )cp); / XXX */
	cp += sizeof(struct in6_addr);

	resid -= (sizeof(struct in6_addr) + sizeof(u_int32_t));
	copied += (sizeof(struct in6_addr) + sizeof(u_int32_t));
	}
	if (ifp0) /* we need search only on the specified IF */
	break;
	}

	if (allow_deprecated == 0 && ifp_dep != NULL) {
	ifp = ifp_dep;
	allow_deprecated = 1;

	goto again;
	}

	IFNET_RUNLOCK();

	return (copied);
	}

	/*
	* XXX almost dup'ed code with rip6_input.
	*/
	static int
	icmp6_rip6_input(struct mbuf **mp, int off)
	{
	INIT_VNET_INET(curvnet);
	INIT_VNET_INET6(curvnet);
	struct mbuf m = mp;
	struct ip6_hdr ip6 = mtod(m, struct ip6_hdr );
	struct in6pcb *in6p;
	struct in6pcb *last = NULL;
	struct sockaddr_in6 fromsa;
	struct icmp6_hdr *icmp6;
	struct mbuf *opts = NULL;

	#ifndef PULLDOWN_TEST
	/* this is assumed to be safe. */
	icmp6 = (struct icmp6_hdr *)((caddr_t)ip6 + off);
	#else
	IP6_EXTHDR_GET(icmp6, struct icmp6_hdr , m, off, sizeof(icmp6));
	if (icmp6 == NULL) {
	/* m is already reclaimed */
	return (IPPROTO_DONE);
	}
	#endif

	/*
	* XXX: the address may have embedded scope zone ID, which should be
	* hidden from applications.
	*/
	bzero(&fromsa, sizeof(fromsa));
	fromsa.sin6_family = AF_INET6;
	fromsa.sin6_len = sizeof(struct sockaddr_in6);
	fromsa.sin6_addr = ip6->ip6_src;
	if (sa6_recoverscope(&fromsa)) {
	m_freem(m);
	return (IPPROTO_DONE);
	}

	INP_INFO_RLOCK(&V_ripcbinfo);
	LIST_FOREACH(in6p, &V_ripcb, inp_list) {
	if ((in6p->inp_vflag & INP_IPV6) == 0)
	continue;
	if (in6p->in6p_ip6_nxt != IPPROTO_ICMPV6)
	continue;
	if (!IN6_IS_ADDR_UNSPECIFIED(&in6p->in6p_laddr) &&
	!IN6_ARE_ADDR_EQUAL(&in6p->in6p_laddr, &ip6->ip6_dst))
	continue;
	if (!IN6_IS_ADDR_UNSPECIFIED(&in6p->in6p_faddr) &&
	!IN6_ARE_ADDR_EQUAL(&in6p->in6p_faddr, &ip6->ip6_src))
	continue;
	INP_RLOCK(in6p);
	if (ICMP6_FILTER_WILLBLOCK(icmp6->icmp6_type,
	in6p->in6p_icmp6filt)) {
	INP_RUNLOCK(in6p);
	continue;
	}
	if (last) {
	struct mbuf *n = NULL;

	/*
	* Recent network drivers tend to allocate a single
	* mbuf cluster, rather than to make a couple of
	* mbufs without clusters. Also, since the IPv6 code
	* path tries to avoid m_pullup(), it is highly
	* probable that we still have an mbuf cluster here
	* even though the necessary length can be stored in an
	* mbuf's internal buffer.
	* Meanwhile, the default size of the receive socket
	* buffer for raw sockets is not so large. This means
	* the possibility of packet loss is relatively higher
	* than before. To avoid this scenario, we copy the
	* received data to a separate mbuf that does not use
	* a cluster, if possible.
	* XXX: it is better to copy the data after stripping
	* intermediate headers.
	*/
	if ((m->m_flags & M_EXT) && m->m_next == NULL &&
	m->m_len <= MHLEN) {
	MGET(n, M_DONTWAIT, m->m_type);
	if (n != NULL) {
	if (m_dup_pkthdr(n, m, M_NOWAIT)) {
	bcopy(m->m_data, n->m_data,
	m->m_len);
	n->m_len = m->m_len;
	} else {
	m_free(n);
	n = NULL;
	}
	}
	}
	if (n != NULL \|\|
	(n = m_copy(m, 0, (int)M_COPYALL)) != NULL) {
	if (last->in6p_flags & IN6P_CONTROLOPTS)
	ip6_savecontrol(last, n, &opts);
	/* strip intermediate headers */
	m_adj(n, off);
	SOCKBUF_LOCK(&last->in6p_socket->so_rcv);
	if (sbappendaddr_locked(
	&last->in6p_socket->so_rcv,
	(struct sockaddr *)&fromsa, n, opts)
	== 0) {
	/* should notify about lost packet */
	m_freem(n);
	if (opts) {
	m_freem(opts);
	}
	SOCKBUF_UNLOCK(
	&last->in6p_socket->so_rcv);
	} else
	sorwakeup_locked(last->in6p_socket);
	opts = NULL;
	}
	INP_RUNLOCK(last);
	}
	last = in6p;
	}
	INP_INFO_RUNLOCK(&V_ripcbinfo);
	if (last) {
	if (last->in6p_flags & IN6P_CONTROLOPTS)
	ip6_savecontrol(last, m, &opts);
	/* strip intermediate headers */
	m_adj(m, off);

	/* avoid using mbuf clusters if possible (see above) */
	if ((m->m_flags & M_EXT) && m->m_next == NULL &&
	m->m_len <= MHLEN) {
	struct mbuf *n;

	MGET(n, M_DONTWAIT, m->m_type);
	if (n != NULL) {
	if (m_dup_pkthdr(n, m, M_NOWAIT)) {
	bcopy(m->m_data, n->m_data, m->m_len);
	n->m_len = m->m_len;

	m_freem(m);
	m = n;
	} else {
	m_freem(n);
	n = NULL;
	}
	}
	}
	SOCKBUF_LOCK(&last->in6p_socket->so_rcv);
	if (sbappendaddr_locked(&last->in6p_socket->so_rcv,
	(struct sockaddr *)&fromsa, m, opts) == 0) {
	m_freem(m);
	if (opts)
	m_freem(opts);
	SOCKBUF_UNLOCK(&last->in6p_socket->so_rcv);
	} else
	sorwakeup_locked(last->in6p_socket);
	INP_RUNLOCK(last);
	} else {
	m_freem(m);
	V_ip6stat.ip6s_delivered--;
	}
	return IPPROTO_DONE;
	}

	/*
	* Reflect the ip6 packet back to the source.
	* OFF points to the icmp6 header, counted from the top of the mbuf.
	*/
	void
	icmp6_reflect(struct mbuf *m, size_t off)
	{
	INIT_VNET_INET6(curvnet);
	struct ip6_hdr *ip6;
	struct icmp6_hdr *icmp6;
	struct in6_ifaddr *ia;
	int plen;
	int type, code;
	struct ifnet *outif = NULL;
	struct in6_addr origdst, *src = NULL;

	/* too short to reflect */
	if (off < sizeof(struct ip6_hdr)) {
	nd6log((LOG_DEBUG,
	"sanity fail: off=%lx, sizeof(ip6)=%lx in %s:%d\n",
	(u_long)off, (u_long)sizeof(struct ip6_hdr),
	__FILE__, __LINE__));
	goto bad;
	}

	/*
	* If there are extra headers between IPv6 and ICMPv6, strip
	* off that header first.
	*/
	#ifdef DIAGNOSTIC
	if (sizeof(struct ip6_hdr) + sizeof(struct icmp6_hdr) > MHLEN)
	panic("assumption failed in icmp6_reflect");
	#endif
	if (off > sizeof(struct ip6_hdr)) {
	size_t l;
	struct ip6_hdr nip6;

	l = off - sizeof(struct ip6_hdr);
	m_copydata(m, 0, sizeof(nip6), (caddr_t)&nip6);
	m_adj(m, l);
	l = sizeof(struct ip6_hdr) + sizeof(struct icmp6_hdr);
	if (m->m_len < l) {
	if ((m = m_pullup(m, l)) == NULL)
	return;
	}
	bcopy((caddr_t)&nip6, mtod(m, caddr_t), sizeof(nip6));
	} else /* off == sizeof(struct ip6_hdr) */ {
	size_t l;
	l = sizeof(struct ip6_hdr) + sizeof(struct icmp6_hdr);
	if (m->m_len < l) {
	if ((m = m_pullup(m, l)) == NULL)
	return;
	}
	}
	plen = m->m_pkthdr.len - sizeof(struct ip6_hdr);
	ip6 = mtod(m, struct ip6_hdr *);
	ip6->ip6_nxt = IPPROTO_ICMPV6;
	icmp6 = (struct icmp6_hdr *)(ip6 + 1);
	type = icmp6->icmp6_type; /* keep type for statistics */
	code = icmp6->icmp6_code; /* ditto. */

	origdst = ip6->ip6_dst;
	/*
	* ip6_input() drops a packet if its src is multicast.
	* So, the src is never multicast.
	*/
	ip6->ip6_dst = ip6->ip6_src;

	/*
	* If the incoming packet was addressed directly to us (i.e. unicast),
	* use dst as the src for the reply.
	* The IN6_IFF_NOTREADY case should be VERY rare, but is possible
	* (for example) when we encounter an error while forwarding procedure
	* destined to a duplicated address of ours.
	* Note that ip6_getdstifaddr() may fail if we are in an error handling
	* procedure of an outgoing packet of our own, in which case we need
	* to search in the ifaddr list.
	*/
	if (!IN6_IS_ADDR_MULTICAST(&origdst)) {
	if ((ia = ip6_getdstifaddr(m))) {
	if (!(ia->ia6_flags &
	(IN6_IFF_ANYCAST\|IN6_IFF_NOTREADY)))
	src = &ia->ia_addr.sin6_addr;
	} else {
	struct sockaddr_in6 d;

	bzero(&d, sizeof(d));
	d.sin6_family = AF_INET6;
	d.sin6_len = sizeof(d);
	d.sin6_addr = origdst;
	ia = (struct in6_ifaddr *)
	ifa_ifwithaddr((struct sockaddr *)&d);
	if (ia &&
	!(ia->ia6_flags &
	(IN6_IFF_ANYCAST\|IN6_IFF_NOTREADY))) {
	src = &ia->ia_addr.sin6_addr;
	}
	}
	}

	if (src == NULL) {
	int e;
	struct sockaddr_in6 sin6;
	struct route_in6 ro;

	/*
	* This case matches to multicasts, our anycast, or unicasts
	* that we do not own. Select a source address based on the
	* source address of the erroneous packet.
	*/
	bzero(&sin6, sizeof(sin6));
	sin6.sin6_family = AF_INET6;
	sin6.sin6_len = sizeof(sin6);
	sin6.sin6_addr = ip6->ip6_dst; /* zone ID should be embedded */

	bzero(&ro, sizeof(ro));
	src = in6_selectsrc(&sin6, NULL, NULL, &ro, NULL, &outif, &e);
	if (ro.ro_rt)
	RTFREE(ro.ro_rt); /* XXX: we could use this */
	if (src == NULL) {
	char ip6buf[INET6_ADDRSTRLEN];
	nd6log((LOG_DEBUG,
	"icmp6_reflect: source can't be determined: "
	"dst=%s, error=%d\n",
	ip6_sprintf(ip6buf, &sin6.sin6_addr), e));
	goto bad;
	}
	}

	ip6->ip6_src = *src;
	ip6->ip6_flow = 0;
	ip6->ip6_vfc &= ~IPV6_VERSION_MASK;
	ip6->ip6_vfc \|= IPV6_VERSION;
	ip6->ip6_nxt = IPPROTO_ICMPV6;
	if (outif)
	ip6->ip6_hlim = ND_IFINFO(outif)->chlim;
	else if (m->m_pkthdr.rcvif) {
	/* XXX: This may not be the outgoing interface */
	ip6->ip6_hlim = ND_IFINFO(m->m_pkthdr.rcvif)->chlim;
	} else
	ip6->ip6_hlim = V_ip6_defhlim;

	icmp6->icmp6_cksum = 0;
	icmp6->icmp6_cksum = in6_cksum(m, IPPROTO_ICMPV6,
	sizeof(struct ip6_hdr), plen);

	/*
	* XXX option handling
	*/

	m->m_flags &= ~(M_BCAST\|M_MCAST);

	ip6_output(m, NULL, NULL, 0, NULL, &outif, NULL);
	if (outif)
	icmp6_ifoutstat_inc(outif, type, code);

	return;

	bad:
	m_freem(m);
	return;
	}

	void
	icmp6_fasttimo(void)
	{

	return;
	}

	static const char *
	icmp6_redirect_diag(struct in6_addr src6, struct in6_addr dst6,
	struct in6_addr *tgt6)
	{
	static char buf[1024];
	char ip6bufs[INET6_ADDRSTRLEN];
	char ip6bufd[INET6_ADDRSTRLEN];
	char ip6buft[INET6_ADDRSTRLEN];
	snprintf(buf, sizeof(buf), "(src=%s dst=%s tgt=%s)",
	ip6_sprintf(ip6bufs, src6), ip6_sprintf(ip6bufd, dst6),
	ip6_sprintf(ip6buft, tgt6));
	return buf;
	}

	void
	icmp6_redirect_input(struct mbuf *m, int off)
	{
	INIT_VNET_INET6(curvnet);
	struct ifnet *ifp;
	struct ip6_hdr ip6 = mtod(m, struct ip6_hdr );
	struct nd_redirect *nd_rd;
	int icmp6len = ntohs(ip6->ip6_plen);
	char *lladdr = NULL;
	int lladdrlen = 0;
	u_char *redirhdr = NULL;
	int redirhdrlen = 0;
	struct rtentry *rt = NULL;
	int is_router;
	int is_onlink;
	struct in6_addr src6 = ip6->ip6_src;
	struct in6_addr redtgt6;
	struct in6_addr reddst6;
	union nd_opts ndopts;
	char ip6buf[INET6_ADDRSTRLEN];

	if (!m)
	return;

	ifp = m->m_pkthdr.rcvif;

	if (!ifp)
	return;

	/* XXX if we are router, we don't update route by icmp6 redirect */
	if (V_ip6_forwarding)
	goto freeit;
	if (!V_icmp6_rediraccept)
	goto freeit;

	#ifndef PULLDOWN_TEST
	IP6_EXTHDR_CHECK(m, off, icmp6len,);
	nd_rd = (struct nd_redirect *)((caddr_t)ip6 + off);
	#else
	IP6_EXTHDR_GET(nd_rd, struct nd_redirect *, m, off, icmp6len);
	if (nd_rd == NULL) {
	V_icmp6stat.icp6s_tooshort++;
	return;
	}
	#endif
	redtgt6 = nd_rd->nd_rd_target;
	reddst6 = nd_rd->nd_rd_dst;

	if (in6_setscope(&redtgt6, m->m_pkthdr.rcvif, NULL) \|\|
	in6_setscope(&reddst6, m->m_pkthdr.rcvif, NULL)) {
	goto freeit;
	}

	/* validation */
	if (!IN6_IS_ADDR_LINKLOCAL(&src6)) {
	nd6log((LOG_ERR,
	"ICMP6 redirect sent from %s rejected; "
	"must be from linklocal\n",
	ip6_sprintf(ip6buf, &src6)));
	goto bad;
	}
	if (ip6->ip6_hlim != 255) {
	nd6log((LOG_ERR,
	"ICMP6 redirect sent from %s rejected; "
	"hlim=%d (must be 255)\n",
	ip6_sprintf(ip6buf, &src6), ip6->ip6_hlim));
	goto bad;
	}
	{
	/* ip6->ip6_src must be equal to gw for icmp6->icmp6_reddst */
	struct sockaddr_in6 sin6;
	struct in6_addr *gw6;

	bzero(&sin6, sizeof(sin6));
	sin6.sin6_family = AF_INET6;
	sin6.sin6_len = sizeof(struct sockaddr_in6);
	bcopy(&reddst6, &sin6.sin6_addr, sizeof(reddst6));
	rt = rtalloc1((struct sockaddr *)&sin6, 0, 0UL);
	if (rt) {
	if (rt->rt_gateway == NULL \|\|
	rt->rt_gateway->sa_family != AF_INET6) {
	nd6log((LOG_ERR,
	"ICMP6 redirect rejected; no route "
	"with inet6 gateway found for redirect dst: %s\n",
	icmp6_redirect_diag(&src6, &reddst6, &redtgt6)));
	RTFREE_LOCKED(rt);
	goto bad;
	}

	gw6 = &(((struct sockaddr_in6 *)rt->rt_gateway)->sin6_addr);
	if (bcmp(&src6, gw6, sizeof(struct in6_addr)) != 0) {
	nd6log((LOG_ERR,
	"ICMP6 redirect rejected; "
	"not equal to gw-for-src=%s (must be same): "
	"%s\n",
	ip6_sprintf(ip6buf, gw6),
	icmp6_redirect_diag(&src6, &reddst6, &redtgt6)));
	RTFREE_LOCKED(rt);
	goto bad;
	}
	} else {
	nd6log((LOG_ERR,
	"ICMP6 redirect rejected; "
	"no route found for redirect dst: %s\n",
	icmp6_redirect_diag(&src6, &reddst6, &redtgt6)));
	goto bad;
	}
	RTFREE_LOCKED(rt);
	rt = NULL;
	}
	if (IN6_IS_ADDR_MULTICAST(&reddst6)) {
	nd6log((LOG_ERR,
	"ICMP6 redirect rejected; "
	"redirect dst must be unicast: %s\n",
	icmp6_redirect_diag(&src6, &reddst6, &redtgt6)));
	goto bad;
	}

	is_router = is_onlink = 0;
	if (IN6_IS_ADDR_LINKLOCAL(&redtgt6))
	is_router = 1; /* router case */
	if (bcmp(&redtgt6, &reddst6, sizeof(redtgt6)) == 0)
	is_onlink = 1; /* on-link destination case */
	if (!is_router && !is_onlink) {
	nd6log((LOG_ERR,
	"ICMP6 redirect rejected; "
	"neither router case nor onlink case: %s\n",
	icmp6_redirect_diag(&src6, &reddst6, &redtgt6)));
	goto bad;
	}
	/* validation passed */

	icmp6len -= sizeof(*nd_rd);
	nd6_option_init(nd_rd + 1, icmp6len, &ndopts);
	if (nd6_options(&ndopts) < 0) {
	nd6log((LOG_INFO, "icmp6_redirect_input: "
	"invalid ND option, rejected: %s\n",
	icmp6_redirect_diag(&src6, &reddst6, &redtgt6)));
	/* nd6_options have incremented stats */
	goto freeit;
	}

	if (ndopts.nd_opts_tgt_lladdr) {
	lladdr = (char *)(ndopts.nd_opts_tgt_lladdr + 1);
	lladdrlen = ndopts.nd_opts_tgt_lladdr->nd_opt_len << 3;
	}

	if (ndopts.nd_opts_rh) {
	redirhdrlen = ndopts.nd_opts_rh->nd_opt_rh_len;
	redirhdr = (u_char )(ndopts.nd_opts_rh + 1); / xxx */
	}

	if (lladdr && ((ifp->if_addrlen + 2 + 7) & ~7) != lladdrlen) {
	nd6log((LOG_INFO,
	"icmp6_redirect_input: lladdrlen mismatch for %s "
	"(if %d, icmp6 packet %d): %s\n",
	ip6_sprintf(ip6buf, &redtgt6),
	ifp->if_addrlen, lladdrlen - 2,
	icmp6_redirect_diag(&src6, &reddst6, &redtgt6)));
	goto bad;
	}

	/* RFC 2461 8.3 */
	- IF_AFDATA_LOCK(ifp);
	nd6_cache_lladdr(ifp, &redtgt6, lladdr, lladdrlen, ND_REDIRECT,
	is_onlink ? ND_REDIRECT_ONLINK : ND_REDIRECT_ROUTER);
	- IF_AFDATA_UNLOCK(ifp);

	if (!is_onlink) { /* better router case. perform rtredirect. */
	/* perform rtredirect */
	struct sockaddr_in6 sdst;
	struct sockaddr_in6 sgw;
	struct sockaddr_in6 ssrc;

	bzero(&sdst, sizeof(sdst));
	bzero(&sgw, sizeof(sgw));
	bzero(&ssrc, sizeof(ssrc));
	sdst.sin6_family = sgw.sin6_family = ssrc.sin6_family = AF_INET6;
	sdst.sin6_len = sgw.sin6_len = ssrc.sin6_len =
	sizeof(struct sockaddr_in6);
	bcopy(&redtgt6, &sgw.sin6_addr, sizeof(struct in6_addr));
	bcopy(&reddst6, &sdst.sin6_addr, sizeof(struct in6_addr));
	bcopy(&src6, &ssrc.sin6_addr, sizeof(struct in6_addr));
	rtredirect((struct sockaddr )&sdst, (struct sockaddr )&sgw,
	(struct sockaddr *)NULL, RTF_GATEWAY \| RTF_HOST,
	(struct sockaddr *)&ssrc);
	}
	/* finally update cached route in each socket via pfctlinput */
	{
	struct sockaddr_in6 sdst;

	bzero(&sdst, sizeof(sdst));
	sdst.sin6_family = AF_INET6;
	sdst.sin6_len = sizeof(struct sockaddr_in6);
	bcopy(&reddst6, &sdst.sin6_addr, sizeof(struct in6_addr));
	pfctlinput(PRC_REDIRECT_HOST, (struct sockaddr *)&sdst);
	#ifdef IPSEC
	key_sa_routechange((struct sockaddr *)&sdst);
	#endif /* IPSEC */
	}

	freeit:
	m_freem(m);
	return;

	bad:
	V_icmp6stat.icp6s_badredirect++;
	m_freem(m);
	}

	void
	icmp6_redirect_output(struct mbuf m0, struct rtentry rt)
	{
	INIT_VNET_INET6(curvnet);
	struct ifnet ifp; / my outgoing interface */
	struct in6_addr *ifp_ll6;
	struct in6_addr *router_ll6;
	struct ip6_hdr sip6; / m0 as struct ip6_hdr */
	struct mbuf m = NULL; / newly allocated one */
	struct ip6_hdr ip6; / m as struct ip6_hdr */
	struct nd_redirect *nd_rd;
	size_t maxlen;
	u_char *p;
	struct ifnet *outif = NULL;
	struct sockaddr_in6 src_sa;

	icmp6_errcount(&V_icmp6stat.icp6s_outerrhist, ND_REDIRECT, 0);

	/* if we are not router, we don't send icmp6 redirect */
	if (!V_ip6_forwarding)
	goto fail;

	/* sanity check */
	if (!m0 \|\| !rt \|\| !(rt->rt_flags & RTF_UP) \|\| !(ifp = rt->rt_ifp))
	goto fail;

	/*
	* Address check:
	* the source address must identify a neighbor, and
	* the destination address must not be a multicast address
	* [RFC 2461, sec 8.2]
	*/
	sip6 = mtod(m0, struct ip6_hdr *);
	bzero(&src_sa, sizeof(src_sa));
	src_sa.sin6_family = AF_INET6;
	src_sa.sin6_len = sizeof(src_sa);
	src_sa.sin6_addr = sip6->ip6_src;
	if (nd6_is_addr_neighbor(&src_sa, ifp) == 0)
	goto fail;
	if (IN6_IS_ADDR_MULTICAST(&sip6->ip6_dst))
	goto fail; /* what should we do here? */

	/* rate limit */
	if (icmp6_ratelimit(&sip6->ip6_src, ND_REDIRECT, 0))
	goto fail;

	/*
	* Since we are going to append up to 1280 bytes (= IPV6_MMTU),
	* we almost always ask for an mbuf cluster for simplicity.
	* (MHLEN < IPV6_MMTU is almost always true)
	*/
	#if IPV6_MMTU >= MCLBYTES
	# error assumption failed about IPV6_MMTU and MCLBYTES
	#endif
	MGETHDR(m, M_DONTWAIT, MT_HEADER);
	if (m && IPV6_MMTU >= MHLEN)
	MCLGET(m, M_DONTWAIT);
	if (!m)
	goto fail;
	m->m_pkthdr.rcvif = NULL;
	m->m_len = 0;
	maxlen = M_TRAILINGSPACE(m);
	maxlen = min(IPV6_MMTU, maxlen);
	/* just for safety */
	if (maxlen < sizeof(struct ip6_hdr) + sizeof(struct icmp6_hdr) +
	((sizeof(struct nd_opt_hdr) + ifp->if_addrlen + 7) & ~7)) {
	goto fail;
	}

	{
	/* get ip6 linklocal address for ifp(my outgoing interface). */
	struct in6_ifaddr *ia;
	if ((ia = in6ifa_ifpforlinklocal(ifp,
	IN6_IFF_NOTREADY\|
	IN6_IFF_ANYCAST)) == NULL)
	goto fail;
	ifp_ll6 = &ia->ia_addr.sin6_addr;
	}

	/* get ip6 linklocal address for the router. */
	if (rt->rt_gateway && (rt->rt_flags & RTF_GATEWAY)) {
	struct sockaddr_in6 *sin6;
	sin6 = (struct sockaddr_in6 *)rt->rt_gateway;
	router_ll6 = &sin6->sin6_addr;
	if (!IN6_IS_ADDR_LINKLOCAL(router_ll6))
	router_ll6 = (struct in6_addr *)NULL;
	} else
	router_ll6 = (struct in6_addr *)NULL;

	/* ip6 */
	ip6 = mtod(m, struct ip6_hdr *);
	ip6->ip6_flow = 0;
	ip6->ip6_vfc &= ~IPV6_VERSION_MASK;
	ip6->ip6_vfc \|= IPV6_VERSION;
	/* ip6->ip6_plen will be set later */
	ip6->ip6_nxt = IPPROTO_ICMPV6;
	ip6->ip6_hlim = 255;
	/* ip6->ip6_src must be linklocal addr for my outgoing if. */
	bcopy(ifp_ll6, &ip6->ip6_src, sizeof(struct in6_addr));
	bcopy(&sip6->ip6_src, &ip6->ip6_dst, sizeof(struct in6_addr));

	/* ND Redirect */
	nd_rd = (struct nd_redirect *)(ip6 + 1);
	nd_rd->nd_rd_type = ND_REDIRECT;
	nd_rd->nd_rd_code = 0;
	nd_rd->nd_rd_reserved = 0;
	if (rt->rt_flags & RTF_GATEWAY) {
	/*
	* nd_rd->nd_rd_target must be a link-local address in
	* better router cases.
	*/
	if (!router_ll6)
	goto fail;
	bcopy(router_ll6, &nd_rd->nd_rd_target,
	sizeof(nd_rd->nd_rd_target));
	bcopy(&sip6->ip6_dst, &nd_rd->nd_rd_dst,
	sizeof(nd_rd->nd_rd_dst));
	} else {
	/* make sure redtgt == reddst */
	bcopy(&sip6->ip6_dst, &nd_rd->nd_rd_target,
	sizeof(nd_rd->nd_rd_target));
	bcopy(&sip6->ip6_dst, &nd_rd->nd_rd_dst,
	sizeof(nd_rd->nd_rd_dst));
	}

	p = (u_char *)(nd_rd + 1);

	if (!router_ll6)
	goto nolladdropt;

	{
	/* target lladdr option */
	int len;
	struct llentry *ln;
	struct nd_opt_hdr *nd_opt;
	char *lladdr;

	IF_AFDATA_LOCK(ifp);
	ln = nd6_lookup(router_ll6, 0, ifp);
	- if (!ln) {
	- IF_AFDATA_UNLOCK(ifp);
	+ IF_AFDATA_UNLOCK(ifp);
	+ if (!ln)
	goto nolladdropt;
	- }
	+
	len = sizeof(*nd_opt) + ifp->if_addrlen;
	len = (len + 7) & ~7; /* round by 8 */
	/* safety check */
	- if (len + (p - (u_char *)ip6) > maxlen) {
	- IF_AFDATA_UNLOCK(ifp);
	+ if (len + (p - (u_char *)ip6) > maxlen)
	goto nolladdropt;
	- }
	+
	if (ln->la_flags & LLE_VALID) {
	nd_opt = (struct nd_opt_hdr *)p;
	nd_opt->nd_opt_type = ND_OPT_TARGET_LINKADDR;
	nd_opt->nd_opt_len = len >> 3;
	lladdr = (char *)(nd_opt + 1);
	bcopy(&ln->ll_addr, lladdr, ifp->if_addrlen);
	p += len;
	}
	- IF_AFDATA_UNLOCK(ifp);
	+ LLE_RUNLOCK(ln);
	}
	nolladdropt:;

	m->m_pkthdr.len = m->m_len = p - (u_char *)ip6;

	/* just to be safe */
	#ifdef M_DECRYPTED /not openbsd/
	if (m0->m_flags & M_DECRYPTED)
	goto noredhdropt;
	#endif
	if (p - (u_char *)ip6 > maxlen)
	goto noredhdropt;

	{
	/* redirected header option */
	int len;
	struct nd_opt_rd_hdr *nd_opt_rh;

	/*
	* compute the maximum size for icmp6 redirect header option.
	* XXX room for auth header?
	*/
	len = maxlen - (p - (u_char *)ip6);
	len &= ~7;

	/* This is just for simplicity. */
	if (m0->m_pkthdr.len != m0->m_len) {
	if (m0->m_next) {
	m_freem(m0->m_next);
	m0->m_next = NULL;
	}
	m0->m_pkthdr.len = m0->m_len;
	}

	/*
	* Redirected header option spec (RFC2461 4.6.3) talks nothing
	* about padding/truncate rule for the original IP packet.
	* From the discussion on IPv6imp in Feb 1999,
	* the consensus was:
	* - "attach as much as possible" is the goal
	* - pad if not aligned (original size can be guessed by
	* original ip6 header)
	* Following code adds the padding if it is simple enough,
	* and truncates if not.
	*/
	if (m0->m_next \|\| m0->m_pkthdr.len != m0->m_len)
	panic("assumption failed in %s:%d", __FILE__,
	__LINE__);

	if (len - sizeof(*nd_opt_rh) < m0->m_pkthdr.len) {
	/* not enough room, truncate */
	m0->m_pkthdr.len = m0->m_len = len -
	sizeof(*nd_opt_rh);
	} else {
	/* enough room, pad or truncate */
	size_t extra;

	extra = m0->m_pkthdr.len % 8;
	if (extra) {
	/* pad if easy enough, truncate if not */
	if (8 - extra <= M_TRAILINGSPACE(m0)) {
	/* pad */
	m0->m_len += (8 - extra);
	m0->m_pkthdr.len += (8 - extra);
	} else {
	/* truncate */
	m0->m_pkthdr.len -= extra;
	m0->m_len -= extra;
	}
	}
	len = m0->m_pkthdr.len + sizeof(*nd_opt_rh);
	m0->m_pkthdr.len = m0->m_len = len -
	sizeof(*nd_opt_rh);
	}

	nd_opt_rh = (struct nd_opt_rd_hdr *)p;
	bzero(nd_opt_rh, sizeof(*nd_opt_rh));
	nd_opt_rh->nd_opt_rh_type = ND_OPT_REDIRECTED_HEADER;
	nd_opt_rh->nd_opt_rh_len = len >> 3;
	p += sizeof(*nd_opt_rh);
	m->m_pkthdr.len = m->m_len = p - (u_char *)ip6;

	/* connect m0 to m */
	m_tag_delete_chain(m0, NULL);
	m0->m_flags &= ~M_PKTHDR;
	m->m_next = m0;
	m->m_pkthdr.len = m->m_len + m0->m_len;
	m0 = NULL;
	}
	noredhdropt:;
	if (m0) {
	m_freem(m0);
	m0 = NULL;
	}

	/* XXX: clear embedded link IDs in the inner header */
	in6_clearscope(&sip6->ip6_src);
	in6_clearscope(&sip6->ip6_dst);
	in6_clearscope(&nd_rd->nd_rd_target);
	in6_clearscope(&nd_rd->nd_rd_dst);

	ip6->ip6_plen = htons(m->m_pkthdr.len - sizeof(struct ip6_hdr));

	nd_rd->nd_rd_cksum = 0;
	nd_rd->nd_rd_cksum = in6_cksum(m, IPPROTO_ICMPV6,
	sizeof(*ip6), ntohs(ip6->ip6_plen));

	/* send the packet to outside... */
	ip6_output(m, NULL, NULL, 0, NULL, &outif, NULL);
	if (outif) {
	icmp6_ifstat_inc(outif, ifs6_out_msg);
	icmp6_ifstat_inc(outif, ifs6_out_redirect);
	}
	V_icmp6stat.icp6s_outhist[ND_REDIRECT]++;

	return;

	fail:
	if (m)
	m_freem(m);
	if (m0)
	m_freem(m0);
	}

	/*
	* ICMPv6 socket option processing.
	*/
	int
	icmp6_ctloutput(struct socket so, struct sockopt sopt)
	{
	int error = 0;
	int optlen;
	struct inpcb *inp = sotoinpcb(so);
	int level, op, optname;

	if (sopt) {
	level = sopt->sopt_level;
	op = sopt->sopt_dir;
	optname = sopt->sopt_name;
	optlen = sopt->sopt_valsize;
	} else
	level = op = optname = optlen = 0;

	if (level != IPPROTO_ICMPV6) {
	return EINVAL;
	}

	switch (op) {
	case PRCO_SETOPT:
	switch (optname) {
	case ICMP6_FILTER:
	{
	struct icmp6_filter ic6f;

	if (optlen != sizeof(ic6f)) {
	error = EMSGSIZE;
	break;
	}
	error = sooptcopyin(sopt, &ic6f, optlen, optlen);
	if (error == 0) {
	INP_WLOCK(inp);
	*inp->in6p_icmp6filt = ic6f;
	INP_WUNLOCK(inp);
	}
	break;
	}

	default:
	error = ENOPROTOOPT;
	break;
	}
	break;

	case PRCO_GETOPT:
	switch (optname) {
	case ICMP6_FILTER:
	{
	struct icmp6_filter ic6f;

	INP_RLOCK(inp);
	ic6f = *inp->in6p_icmp6filt;
	INP_RUNLOCK(inp);
	error = sooptcopyout(sopt, &ic6f, sizeof(ic6f));
	break;
	}

	default:
	error = ENOPROTOOPT;
	break;
	}
	break;
	}

	return (error);
	}

	/*
	* Perform rate limit check.
	* Returns 0 if it is okay to send the icmp6 packet.
	* Returns 1 if the router SHOULD NOT send this icmp6 packet due to rate
	* limitation.
	*
	* XXX per-destination/type check necessary?
	*
	* dst - not used at this moment
	* type - not used at this moment
	* code - not used at this moment
	*/
	static int
	icmp6_ratelimit(const struct in6_addr *dst, const int type,
	const int code)
	{
	INIT_VNET_INET6(curvnet);
	int ret;

	ret = 0; /* okay to send */

	/* PPS limit */
	if (!ppsratecheck(&V_icmp6errppslim_last, &V_icmp6errpps_count,
	V_icmp6errppslim)) {
	/* The packet is subject to rate limit */
	ret++;
	}

	return ret;
	}
	Index: projects/arpv2_merge_1/sys/netinet6/in6.c
	===================================================================
	--- projects/arpv2_merge_1/sys/netinet6/in6.c (revision 185838)
	+++ projects/arpv2_merge_1/sys/netinet6/in6.c (revision 185839)
	@@ -1,2395 +1,2403 @@
	/*-
	* Copyright (C) 1995, 1996, 1997, and 1998 WIDE Project.
	* All rights reserved.
	*
	* Redistribution and use in source and binary forms, with or without
	* modification, are permitted provided that the following conditions
	* are met:
	* 1. Redistributions of source code must retain the above copyright
	* notice, this list of conditions and the following disclaimer.
	* 2. Redistributions in binary form must reproduce the above copyright
	* notice, this list of conditions and the following disclaimer in the
	* documentation and/or other materials provided with the distribution.
	* 3. Neither the name of the project nor the names of its contributors
	* may be used to endorse or promote products derived from this software
	* without specific prior written permission.
	*
	* THIS SOFTWARE IS PROVIDED BY THE PROJECT AND CONTRIBUTORS ``AS IS'' AND
	* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
	* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
	* ARE DISCLAIMED. IN NO EVENT SHALL THE PROJECT OR CONTRIBUTORS BE LIABLE
	* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
	* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
	* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
	* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
	* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
	* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
	* SUCH DAMAGE.
	*
	* $KAME: in6.c,v 1.259 2002/01/21 11:37:50 keiichi Exp $
	*/

	/*-
	* Copyright (c) 1982, 1986, 1991, 1993
	* The Regents of the University of California. All rights reserved.
	*
	* Redistribution and use in source and binary forms, with or without
	* modification, are permitted provided that the following conditions
	* are met:
	* 1. Redistributions of source code must retain the above copyright
	* notice, this list of conditions and the following disclaimer.
	* 2. Redistributions in binary form must reproduce the above copyright
	* notice, this list of conditions and the following disclaimer in the
	* documentation and/or other materials provided with the distribution.
	* 4. Neither the name of the University nor the names of its contributors
	* may be used to endorse or promote products derived from this software
	* without specific prior written permission.
	*
	* THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
	* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
	* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
	* ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
	* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
	* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
	* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
	* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
	* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
	* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
	* SUCH DAMAGE.
	*
	* @(#)in.c 8.2 (Berkeley) 11/15/93
	*/

	#include <sys/cdefs.h>
	__FBSDID("$FreeBSD$");

	#include "opt_inet.h"
	#include "opt_inet6.h"

	#include <sys/param.h>
	#include <sys/errno.h>
	#include <sys/malloc.h>
	#include <sys/socket.h>
	#include <sys/socketvar.h>
	#include <sys/sockio.h>
	#include <sys/systm.h>
	#include <sys/priv.h>
	#include <sys/proc.h>
	#include <sys/time.h>
	#include <sys/kernel.h>
	#include <sys/syslog.h>
	#include <sys/vimage.h>

	#include <net/if.h>
	#include <net/if_types.h>
	#include <net/route.h>
	#include <net/if_dl.h>
	#include <net/vnet.h>

	#include <netinet/in.h>
	#include <netinet/in_var.h>
	#include <net/if_llatbl.h>
	#include <netinet/if_ether.h>
	#include <netinet/in_systm.h>
	#include <netinet/ip.h>
	#include <netinet/in_pcb.h>

	#include <netinet/ip6.h>
	#include <netinet6/ip6_var.h>
	#include <netinet6/nd6.h>
	#include <netinet6/mld6_var.h>
	#include <netinet6/ip6_mroute.h>
	#include <netinet6/in6_ifattach.h>
	#include <netinet6/scope6_var.h>
	#include <netinet6/in6_pcb.h>
	#include <netinet6/vinet6.h>

	MALLOC_DEFINE(M_IP6MADDR, "in6_multi", "internet multicast address");

	/*
	* Definitions of some costant IP6 addresses.
	*/
	const struct in6_addr in6addr_any = IN6ADDR_ANY_INIT;
	const struct in6_addr in6addr_loopback = IN6ADDR_LOOPBACK_INIT;
	const struct in6_addr in6addr_nodelocal_allnodes =
	IN6ADDR_NODELOCAL_ALLNODES_INIT;
	const struct in6_addr in6addr_linklocal_allnodes =
	IN6ADDR_LINKLOCAL_ALLNODES_INIT;
	const struct in6_addr in6addr_linklocal_allrouters =
	IN6ADDR_LINKLOCAL_ALLROUTERS_INIT;

	const struct in6_addr in6mask0 = IN6MASK0;
	const struct in6_addr in6mask32 = IN6MASK32;
	const struct in6_addr in6mask64 = IN6MASK64;
	const struct in6_addr in6mask96 = IN6MASK96;
	const struct in6_addr in6mask128 = IN6MASK128;

	const struct sockaddr_in6 sa6_any =
	{ sizeof(sa6_any), AF_INET6, 0, 0, IN6ADDR_ANY_INIT, 0 };

	static int in6_lifaddr_ioctl __P((struct socket *, u_long, caddr_t,
	struct ifnet , struct thread ));
	static int in6_ifinit __P((struct ifnet , struct in6_ifaddr ,
	struct sockaddr_in6 *, int));
	static void in6_unlink_ifa(struct in6_ifaddr , struct ifnet );

	struct in6_multihead in6_multihead; /* XXX BSS initialization */
	int (faithprefix_p)(struct in6_addr );



	int
	in6_mask2len(struct in6_addr mask, u_char lim0)
	{
	int x = 0, y;
	u_char lim = lim0, p;

	/* ignore the scope_id part */
	if (lim0 == NULL \|\| lim0 - (u_char )mask > sizeof(mask))
	lim = (u_char )mask + sizeof(mask);
	for (p = (u_char *)mask; p < lim; x++, p++) {
	if (*p != 0xff)
	break;
	}
	y = 0;
	if (p < lim) {
	for (y = 0; y < 8; y++) {
	if ((*p & (0x80 >> y)) == 0)
	break;
	}
	}

	/*
	* when the limit pointer is given, do a stricter check on the
	* remaining bits.
	*/
	if (p < lim) {
	if (y != 0 && (*p & (0x00ff >> y)) != 0)
	return (-1);
	for (p = p + 1; p < lim; p++)
	if (*p != 0)
	return (-1);
	}

	return x * 8 + y;
	}

	#define ifa2ia6(ifa) ((struct in6_ifaddr *)(ifa))
	#define ia62ifa(ia6) (&((ia6)->ia_ifa))

	int
	in6_control(struct socket *so, u_long cmd, caddr_t data,
	struct ifnet ifp, struct thread td)
	{
	INIT_VNET_INET6(curvnet);
	struct in6_ifreq ifr = (struct in6_ifreq )data;
	struct in6_ifaddr *ia = NULL;
	struct in6_aliasreq ifra = (struct in6_aliasreq )data;
	struct sockaddr_in6 *sa6;
	int error;

	switch (cmd) {
	case SIOCGETSGCNT_IN6:
	case SIOCGETMIFCNT_IN6:
	return (mrt6_ioctl ? mrt6_ioctl(cmd, data) : EOPNOTSUPP);
	}

	switch(cmd) {
	case SIOCAADDRCTL_POLICY:
	case SIOCDADDRCTL_POLICY:
	if (td != NULL) {
	error = priv_check(td, PRIV_NETINET_ADDRCTRL6);
	if (error)
	return (error);
	}
	return (in6_src_ioctl(cmd, data));
	}

	if (ifp == NULL)
	return (EOPNOTSUPP);

	switch (cmd) {
	case SIOCSNDFLUSH_IN6:
	case SIOCSPFXFLUSH_IN6:
	case SIOCSRTRFLUSH_IN6:
	case SIOCSDEFIFACE_IN6:
	case SIOCSIFINFO_FLAGS:
	if (td != NULL) {
	error = priv_check(td, PRIV_NETINET_ND6);
	if (error)
	return (error);
	}
	/* FALLTHROUGH */
	case OSIOCGIFINFO_IN6:
	case SIOCGIFINFO_IN6:
	case SIOCSIFINFO_IN6:
	case SIOCGDRLST_IN6:
	case SIOCGPRLST_IN6:
	case SIOCGNBRINFO_IN6:
	case SIOCGDEFIFACE_IN6:
	return (nd6_ioctl(cmd, data, ifp));
	}

	switch (cmd) {
	case SIOCSIFPREFIX_IN6:
	case SIOCDIFPREFIX_IN6:
	case SIOCAIFPREFIX_IN6:
	case SIOCCIFPREFIX_IN6:
	case SIOCSGIFPREFIX_IN6:
	case SIOCGIFPREFIX_IN6:
	log(LOG_NOTICE,
	"prefix ioctls are now invalidated. "
	"please use ifconfig.\n");
	return (EOPNOTSUPP);
	}

	switch (cmd) {
	case SIOCSSCOPE6:
	if (td != NULL) {
	error = priv_check(td, PRIV_NETINET_SCOPE6);
	if (error)
	return (error);
	}
	return (scope6_set(ifp,
	(struct scope6_id *)ifr->ifr_ifru.ifru_scope_id));
	case SIOCGSCOPE6:
	return (scope6_get(ifp,
	(struct scope6_id *)ifr->ifr_ifru.ifru_scope_id));
	case SIOCGSCOPE6DEF:
	return (scope6_get_default((struct scope6_id *)
	ifr->ifr_ifru.ifru_scope_id));
	}

	switch (cmd) {
	case SIOCALIFADDR:
	if (td != NULL) {
	error = priv_check(td, PRIV_NET_ADDIFADDR);
	if (error)
	return (error);
	}
	return in6_lifaddr_ioctl(so, cmd, data, ifp, td);

	case SIOCDLIFADDR:
	if (td != NULL) {
	error = priv_check(td, PRIV_NET_DELIFADDR);
	if (error)
	return (error);
	}
	/* FALLTHROUGH */
	case SIOCGLIFADDR:
	return in6_lifaddr_ioctl(so, cmd, data, ifp, td);
	}

	/*
	* Find address for this interface, if it exists.
	*
	* In netinet code, we have checked ifra_addr in SIOCSIF*ADDR operation
	* only, and used the first interface address as the target of other
	* operations (without checking ifra_addr). This was because netinet
	* code/API assumed at most 1 interface address per interface.
	* Since IPv6 allows a node to assign multiple addresses
	* on a single interface, we almost always look and check the
	* presence of ifra_addr, and reject invalid ones here.
	* It also decreases duplicated code among SIOC*_IN6 operations.
	*/
	switch (cmd) {
	case SIOCAIFADDR_IN6:
	case SIOCSIFPHYADDR_IN6:
	sa6 = &ifra->ifra_addr;
	break;
	case SIOCSIFADDR_IN6:
	case SIOCGIFADDR_IN6:
	case SIOCSIFDSTADDR_IN6:
	case SIOCSIFNETMASK_IN6:
	case SIOCGIFDSTADDR_IN6:
	case SIOCGIFNETMASK_IN6:
	case SIOCDIFADDR_IN6:
	case SIOCGIFPSRCADDR_IN6:
	case SIOCGIFPDSTADDR_IN6:
	case SIOCGIFAFLAG_IN6:
	case SIOCSNDFLUSH_IN6:
	case SIOCSPFXFLUSH_IN6:
	case SIOCSRTRFLUSH_IN6:
	case SIOCGIFALIFETIME_IN6:
	case SIOCSIFALIFETIME_IN6:
	case SIOCGIFSTAT_IN6:
	case SIOCGIFSTAT_ICMP6:
	sa6 = &ifr->ifr_addr;
	break;
	default:
	sa6 = NULL;
	break;
	}
	if (sa6 && sa6->sin6_family == AF_INET6) {
	int error = 0;

	if (sa6->sin6_scope_id != 0)
	error = sa6_embedscope(sa6, 0);
	else
	error = in6_setscope(&sa6->sin6_addr, ifp, NULL);
	if (error != 0)
	return (error);
	ia = in6ifa_ifpwithaddr(ifp, &sa6->sin6_addr);
	} else
	ia = NULL;

	switch (cmd) {
	case SIOCSIFADDR_IN6:
	case SIOCSIFDSTADDR_IN6:
	case SIOCSIFNETMASK_IN6:
	/*
	* Since IPv6 allows a node to assign multiple addresses
	* on a single interface, SIOCSIFxxx ioctls are deprecated.
	*/
	/* we decided to obsolete this command (20000704) */
	return (EINVAL);

	case SIOCDIFADDR_IN6:
	/*
	* for IPv4, we look for existing in_ifaddr here to allow
	* "ifconfig if0 delete" to remove the first IPv4 address on
	* the interface. For IPv6, as the spec allows multiple
	* interface address from the day one, we consider "remove the
	* first one" semantics to be not preferable.
	*/
	if (ia == NULL)
	return (EADDRNOTAVAIL);
	/* FALLTHROUGH */
	case SIOCAIFADDR_IN6:
	/*
	* We always require users to specify a valid IPv6 address for
	* the corresponding operation.
	*/
	if (ifra->ifra_addr.sin6_family != AF_INET6 \|\|
	ifra->ifra_addr.sin6_len != sizeof(struct sockaddr_in6))
	return (EAFNOSUPPORT);

	if (td != NULL) {
	error = priv_check(td, (cmd == SIOCDIFADDR_IN6) ?
	PRIV_NET_DELIFADDR : PRIV_NET_ADDIFADDR);
	if (error)
	return (error);
	}

	break;

	case SIOCGIFADDR_IN6:
	/* This interface is basically deprecated. use SIOCGIFCONF. */
	/* FALLTHROUGH */
	case SIOCGIFAFLAG_IN6:
	case SIOCGIFNETMASK_IN6:
	case SIOCGIFDSTADDR_IN6:
	case SIOCGIFALIFETIME_IN6:
	/* must think again about its semantics */
	if (ia == NULL)
	return (EADDRNOTAVAIL);
	break;
	case SIOCSIFALIFETIME_IN6:
	{
	struct in6_addrlifetime *lt;

	if (td != NULL) {
	error = priv_check(td, PRIV_NETINET_ALIFETIME6);
	if (error)
	return (error);
	}
	if (ia == NULL)
	return (EADDRNOTAVAIL);
	/* sanity for overflow - beware unsigned */
	lt = &ifr->ifr_ifru.ifru_lifetime;
	if (lt->ia6t_vltime != ND6_INFINITE_LIFETIME &&
	lt->ia6t_vltime + time_second < time_second) {
	return EINVAL;
	}
	if (lt->ia6t_pltime != ND6_INFINITE_LIFETIME &&
	lt->ia6t_pltime + time_second < time_second) {
	return EINVAL;
	}
	break;
	}
	}

	switch (cmd) {

	case SIOCGIFADDR_IN6:
	ifr->ifr_addr = ia->ia_addr;
	if ((error = sa6_recoverscope(&ifr->ifr_addr)) != 0)
	return (error);
	break;

	case SIOCGIFDSTADDR_IN6:
	if ((ifp->if_flags & IFF_POINTOPOINT) == 0)
	return (EINVAL);
	/*
	* XXX: should we check if ifa_dstaddr is NULL and return
	* an error?
	*/
	ifr->ifr_dstaddr = ia->ia_dstaddr;
	if ((error = sa6_recoverscope(&ifr->ifr_dstaddr)) != 0)
	return (error);
	break;

	case SIOCGIFNETMASK_IN6:
	ifr->ifr_addr = ia->ia_prefixmask;
	break;

	case SIOCGIFAFLAG_IN6:
	ifr->ifr_ifru.ifru_flags6 = ia->ia6_flags;
	break;

	case SIOCGIFSTAT_IN6:
	if (ifp == NULL)
	return EINVAL;
	bzero(&ifr->ifr_ifru.ifru_stat,
	sizeof(ifr->ifr_ifru.ifru_stat));
	ifr->ifr_ifru.ifru_stat =
	((struct in6_ifextra )ifp->if_afdata[AF_INET6])->in6_ifstat;
	break;

	case SIOCGIFSTAT_ICMP6:
	if (ifp == NULL)
	return EINVAL;
	bzero(&ifr->ifr_ifru.ifru_icmp6stat,
	sizeof(ifr->ifr_ifru.ifru_icmp6stat));
	ifr->ifr_ifru.ifru_icmp6stat =
	((struct in6_ifextra )ifp->if_afdata[AF_INET6])->icmp6_ifstat;
	break;

	case SIOCGIFALIFETIME_IN6:
	ifr->ifr_ifru.ifru_lifetime = ia->ia6_lifetime;
	if (ia->ia6_lifetime.ia6t_vltime != ND6_INFINITE_LIFETIME) {
	time_t maxexpire;
	struct in6_addrlifetime *retlt =
	&ifr->ifr_ifru.ifru_lifetime;

	/*
	* XXX: adjust expiration time assuming time_t is
	* signed.
	*/
	maxexpire = (-1) &
	~((time_t)1 << ((sizeof(maxexpire) * 8) - 1));
	if (ia->ia6_lifetime.ia6t_vltime <
	maxexpire - ia->ia6_updatetime) {
	retlt->ia6t_expire = ia->ia6_updatetime +
	ia->ia6_lifetime.ia6t_vltime;
	} else
	retlt->ia6t_expire = maxexpire;
	}
	if (ia->ia6_lifetime.ia6t_pltime != ND6_INFINITE_LIFETIME) {
	time_t maxexpire;
	struct in6_addrlifetime *retlt =
	&ifr->ifr_ifru.ifru_lifetime;

	/*
	* XXX: adjust expiration time assuming time_t is
	* signed.
	*/
	maxexpire = (-1) &
	~((time_t)1 << ((sizeof(maxexpire) * 8) - 1));
	if (ia->ia6_lifetime.ia6t_pltime <
	maxexpire - ia->ia6_updatetime) {
	retlt->ia6t_preferred = ia->ia6_updatetime +
	ia->ia6_lifetime.ia6t_pltime;
	} else
	retlt->ia6t_preferred = maxexpire;
	}
	break;

	case SIOCSIFALIFETIME_IN6:
	ia->ia6_lifetime = ifr->ifr_ifru.ifru_lifetime;
	/* for sanity */
	if (ia->ia6_lifetime.ia6t_vltime != ND6_INFINITE_LIFETIME) {
	ia->ia6_lifetime.ia6t_expire =
	time_second + ia->ia6_lifetime.ia6t_vltime;
	} else
	ia->ia6_lifetime.ia6t_expire = 0;
	if (ia->ia6_lifetime.ia6t_pltime != ND6_INFINITE_LIFETIME) {
	ia->ia6_lifetime.ia6t_preferred =
	time_second + ia->ia6_lifetime.ia6t_pltime;
	} else
	ia->ia6_lifetime.ia6t_preferred = 0;
	break;

	case SIOCAIFADDR_IN6:
	{
	int i, error = 0;
	struct nd_prefixctl pr0;
	struct nd_prefix *pr;

	/*
	* first, make or update the interface address structure,
	* and link it to the list.
	*/
	if ((error = in6_update_ifa(ifp, ifra, ia, 0)) != 0)
	return (error);
	if ((ia = in6ifa_ifpwithaddr(ifp, &ifra->ifra_addr.sin6_addr))
	== NULL) {
	/*
	* this can happen when the user specify the 0 valid
	* lifetime.
	*/
	break;
	}

	/*
	* then, make the prefix on-link on the interface.
	* XXX: we'd rather create the prefix before the address, but
	* we need at least one address to install the corresponding
	* interface route, so we configure the address first.
	*/

	/*
	* convert mask to prefix length (prefixmask has already
	* been validated in in6_update_ifa().
	*/
	bzero(&pr0, sizeof(pr0));
	pr0.ndpr_ifp = ifp;
	pr0.ndpr_plen = in6_mask2len(&ifra->ifra_prefixmask.sin6_addr,
	NULL);
	if (pr0.ndpr_plen == 128) {
	break; /* we don't need to install a host route. */
	}
	pr0.ndpr_prefix = ifra->ifra_addr;
	/* apply the mask for safety. */
	for (i = 0; i < 4; i++) {
	pr0.ndpr_prefix.sin6_addr.s6_addr32[i] &=
	ifra->ifra_prefixmask.sin6_addr.s6_addr32[i];
	}
	/*
	* XXX: since we don't have an API to set prefix (not address)
	* lifetimes, we just use the same lifetimes as addresses.
	* The (temporarily) installed lifetimes can be overridden by
	* later advertised RAs (when accept_rtadv is non 0), which is
	* an intended behavior.
	*/
	pr0.ndpr_raf_onlink = 1; /* should be configurable? */
	pr0.ndpr_raf_auto =
	((ifra->ifra_flags & IN6_IFF_AUTOCONF) != 0);
	pr0.ndpr_vltime = ifra->ifra_lifetime.ia6t_vltime;
	pr0.ndpr_pltime = ifra->ifra_lifetime.ia6t_pltime;

	/* add the prefix if not yet. */
	if ((pr = nd6_prefix_lookup(&pr0)) == NULL) {
	/*
	* nd6_prelist_add will install the corresponding
	* interface route.
	*/
	if ((error = nd6_prelist_add(&pr0, NULL, &pr)) != 0)
	return (error);
	if (pr == NULL) {
	log(LOG_ERR, "nd6_prelist_add succeeded but "
	"no prefix\n");
	return (EINVAL); /* XXX panic here? */
	}
	}

	/* relate the address to the prefix */
	if (ia->ia6_ndpr == NULL) {
	ia->ia6_ndpr = pr;
	pr->ndpr_refcnt++;

	/*
	* If this is the first autoconf address from the
	* prefix, create a temporary address as well
	* (when required).
	*/
	if ((ia->ia6_flags & IN6_IFF_AUTOCONF) &&
	V_ip6_use_tempaddr && pr->ndpr_refcnt == 1) {
	int e;
	if ((e = in6_tmpifadd(ia, 1, 0)) != 0) {
	log(LOG_NOTICE, "in6_control: failed "
	"to create a temporary address, "
	"errno=%d\n", e);
	}
	}
	}

	/*
	* this might affect the status of autoconfigured addresses,
	* that is, this address might make other addresses detached.
	*/
	pfxlist_onlink_check();
	if (error == 0 && ia)
	EVENTHANDLER_INVOKE(ifaddr_event, ifp);
	break;
	}

	case SIOCDIFADDR_IN6:
	{
	struct nd_prefix *pr;

	/*
	* If the address being deleted is the only one that owns
	* the corresponding prefix, expire the prefix as well.
	* XXX: theoretically, we don't have to worry about such
	* relationship, since we separate the address management
	* and the prefix management. We do this, however, to provide
	* as much backward compatibility as possible in terms of
	* the ioctl operation.
	* Note that in6_purgeaddr() will decrement ndpr_refcnt.
	*/
	pr = ia->ia6_ndpr;
	in6_purgeaddr(&ia->ia_ifa);
	if (pr && pr->ndpr_refcnt == 0)
	prelist_remove(pr);
	EVENTHANDLER_INVOKE(ifaddr_event, ifp);
	break;
	}

	default:
	if (ifp == NULL \|\| ifp->if_ioctl == 0)
	return (EOPNOTSUPP);
	return ((*ifp->if_ioctl)(ifp, cmd, data));
	}

	return (0);
	}

	/*
	* Update parameters of an IPv6 interface address.
	* If necessary, a new entry is created and linked into address chains.
	* This function is separated from in6_control().
	* XXX: should this be performed under splnet()?
	*/
	int
	in6_update_ifa(struct ifnet ifp, struct in6_aliasreq ifra,
	struct in6_ifaddr *ia, int flags)
	{
	INIT_VNET_INET6(ifp->if_vnet);
	INIT_VPROCG(TD_TO_VPROCG(curthread)); /* XXX V_hostname needs this */
	int error = 0, hostIsNew = 0, plen = -1;
	struct in6_ifaddr *oia;
	struct sockaddr_in6 dst6;
	struct in6_addrlifetime *lt;
	struct in6_multi_mship *imm;
	struct in6_multi *in6m_sol;
	struct rtentry *rt;
	int delay;
	char ip6buf[INET6_ADDRSTRLEN];

	/* Validate parameters */
	if (ifp == NULL \|\| ifra == NULL) /* this maybe redundant */
	return (EINVAL);

	/*
	* The destination address for a p2p link must have a family
	* of AF_UNSPEC or AF_INET6.
	*/
	if ((ifp->if_flags & IFF_POINTOPOINT) != 0 &&
	ifra->ifra_dstaddr.sin6_family != AF_INET6 &&
	ifra->ifra_dstaddr.sin6_family != AF_UNSPEC)
	return (EAFNOSUPPORT);
	/*
	* validate ifra_prefixmask. don't check sin6_family, netmask
	* does not carry fields other than sin6_len.
	*/
	if (ifra->ifra_prefixmask.sin6_len > sizeof(struct sockaddr_in6))
	return (EINVAL);
	/*
	* Because the IPv6 address architecture is classless, we require
	* users to specify a (non 0) prefix length (mask) for a new address.
	* We also require the prefix (when specified) mask is valid, and thus
	* reject a non-consecutive mask.
	*/
	if (ia == NULL && ifra->ifra_prefixmask.sin6_len == 0)
	return (EINVAL);
	if (ifra->ifra_prefixmask.sin6_len != 0) {
	plen = in6_mask2len(&ifra->ifra_prefixmask.sin6_addr,
	(u_char *)&ifra->ifra_prefixmask +
	ifra->ifra_prefixmask.sin6_len);
	if (plen <= 0)
	return (EINVAL);
	} else {
	/*
	* In this case, ia must not be NULL. We just use its prefix
	* length.
	*/
	plen = in6_mask2len(&ia->ia_prefixmask.sin6_addr, NULL);
	}
	/*
	* If the destination address on a p2p interface is specified,
	* and the address is a scoped one, validate/set the scope
	* zone identifier.
	*/
	dst6 = ifra->ifra_dstaddr;
	if ((ifp->if_flags & (IFF_POINTOPOINT\|IFF_LOOPBACK)) != 0 &&
	(dst6.sin6_family == AF_INET6)) {
	struct in6_addr in6_tmp;
	u_int32_t zoneid;

	in6_tmp = dst6.sin6_addr;
	if (in6_setscope(&in6_tmp, ifp, &zoneid))
	return (EINVAL); /* XXX: should be impossible */

	if (dst6.sin6_scope_id != 0) {
	if (dst6.sin6_scope_id != zoneid)
	return (EINVAL);
	} else /* user omit to specify the ID. */
	dst6.sin6_scope_id = zoneid;

	/* convert into the internal form */
	if (sa6_embedscope(&dst6, 0))
	return (EINVAL); /* XXX: should be impossible */
	}
	/*
	* The destination address can be specified only for a p2p or a
	* loopback interface. If specified, the corresponding prefix length
	* must be 128.
	*/
	if (ifra->ifra_dstaddr.sin6_family == AF_INET6) {
	if ((ifp->if_flags & (IFF_POINTOPOINT\|IFF_LOOPBACK)) == 0) {
	/* XXX: noisy message */
	nd6log((LOG_INFO, "in6_update_ifa: a destination can "
	"be specified for a p2p or a loopback IF only\n"));
	return (EINVAL);
	}
	if (plen != 128) {
	nd6log((LOG_INFO, "in6_update_ifa: prefixlen should "
	"be 128 when dstaddr is specified\n"));
	return (EINVAL);
	}
	}
	/* lifetime consistency check */
	lt = &ifra->ifra_lifetime;
	if (lt->ia6t_pltime > lt->ia6t_vltime)
	return (EINVAL);
	if (lt->ia6t_vltime == 0) {
	/*
	* the following log might be noisy, but this is a typical
	* configuration mistake or a tool's bug.
	*/
	nd6log((LOG_INFO,
	"in6_update_ifa: valid lifetime is 0 for %s\n",
	ip6_sprintf(ip6buf, &ifra->ifra_addr.sin6_addr)));

	if (ia == NULL)
	return (0); /* there's nothing to do */
	}

	/*
	* If this is a new address, allocate a new ifaddr and link it
	* into chains.
	*/
	if (ia == NULL) {
	hostIsNew = 1;
	/*
	* When in6_update_ifa() is called in a process of a received
	* RA, it is called under an interrupt context. So, we should
	* call malloc with M_NOWAIT.
	*/
	ia = (struct in6_ifaddr ) malloc(sizeof(ia), M_IFADDR,
	M_NOWAIT);
	if (ia == NULL)
	return (ENOBUFS);
	bzero((caddr_t)ia, sizeof(*ia));
	LIST_INIT(&ia->ia6_memberships);
	/* Initialize the address and masks, and put time stamp */
	IFA_LOCK_INIT(&ia->ia_ifa);
	ia->ia_ifa.ifa_addr = (struct sockaddr *)&ia->ia_addr;
	ia->ia_addr.sin6_family = AF_INET6;
	ia->ia_addr.sin6_len = sizeof(ia->ia_addr);
	ia->ia6_createtime = time_second;
	if ((ifp->if_flags & (IFF_POINTOPOINT \| IFF_LOOPBACK)) != 0) {
	/*
	* XXX: some functions expect that ifa_dstaddr is not
	* NULL for p2p interfaces.
	*/
	ia->ia_ifa.ifa_dstaddr =
	(struct sockaddr *)&ia->ia_dstaddr;
	} else {
	ia->ia_ifa.ifa_dstaddr = NULL;
	}
	ia->ia_ifa.ifa_netmask = (struct sockaddr *)&ia->ia_prefixmask;

	ia->ia_ifp = ifp;
	if ((oia = V_in6_ifaddr) != NULL) {
	for ( ; oia->ia_next; oia = oia->ia_next)
	continue;
	oia->ia_next = ia;
	} else
	V_in6_ifaddr = ia;

	ia->ia_ifa.ifa_refcnt = 1;
	TAILQ_INSERT_TAIL(&ifp->if_addrlist, &ia->ia_ifa, ifa_list);
	}

	/* update timestamp */
	ia->ia6_updatetime = time_second;

	/* set prefix mask */
	if (ifra->ifra_prefixmask.sin6_len) {
	/*
	* We prohibit changing the prefix length of an existing
	* address, because
	* + such an operation should be rare in IPv6, and
	* + the operation would confuse prefix management.
	*/
	if (ia->ia_prefixmask.sin6_len &&
	in6_mask2len(&ia->ia_prefixmask.sin6_addr, NULL) != plen) {
	nd6log((LOG_INFO, "in6_update_ifa: the prefix length of an"
	" existing (%s) address should not be changed\n",
	ip6_sprintf(ip6buf, &ia->ia_addr.sin6_addr)));
	error = EINVAL;
	goto unlink;
	}
	ia->ia_prefixmask = ifra->ifra_prefixmask;
	}

	/*
	* If a new destination address is specified, scrub the old one and
	* install the new destination. Note that the interface must be
	* p2p or loopback (see the check above.)
	*/
	if (dst6.sin6_family == AF_INET6 &&
	!IN6_ARE_ADDR_EQUAL(&dst6.sin6_addr, &ia->ia_dstaddr.sin6_addr)) {
	int e;

	if ((ia->ia_flags & IFA_ROUTE) != 0 &&
	(e = rtinit(&(ia->ia_ifa), (int)RTM_DELETE, RTF_HOST)) != 0) {
	nd6log((LOG_ERR, "in6_update_ifa: failed to remove "
	"a route to the old destination: %s\n",
	ip6_sprintf(ip6buf, &ia->ia_addr.sin6_addr)));
	/* proceed anyway... */
	} else
	ia->ia_flags &= ~IFA_ROUTE;
	ia->ia_dstaddr = dst6;
	}

	/*
	* Set lifetimes. We do not refer to ia6t_expire and ia6t_preferred
	* to see if the address is deprecated or invalidated, but initialize
	* these members for applications.
	*/
	ia->ia6_lifetime = ifra->ifra_lifetime;
	if (ia->ia6_lifetime.ia6t_vltime != ND6_INFINITE_LIFETIME) {
	ia->ia6_lifetime.ia6t_expire =
	time_second + ia->ia6_lifetime.ia6t_vltime;
	} else
	ia->ia6_lifetime.ia6t_expire = 0;
	if (ia->ia6_lifetime.ia6t_pltime != ND6_INFINITE_LIFETIME) {
	ia->ia6_lifetime.ia6t_preferred =
	time_second + ia->ia6_lifetime.ia6t_pltime;
	} else
	ia->ia6_lifetime.ia6t_preferred = 0;

	/* reset the interface and routing table appropriately. */
	if ((error = in6_ifinit(ifp, ia, &ifra->ifra_addr, hostIsNew)) != 0)
	goto unlink;

	/*
	* configure address flags.
	*/
	ia->ia6_flags = ifra->ifra_flags;
	/*
	* backward compatibility - if IN6_IFF_DEPRECATED is set from the
	* userland, make it deprecated.
	*/
	if ((ifra->ifra_flags & IN6_IFF_DEPRECATED) != 0) {
	ia->ia6_lifetime.ia6t_pltime = 0;
	ia->ia6_lifetime.ia6t_preferred = time_second;
	}
	/*
	* Make the address tentative before joining multicast addresses,
	* so that corresponding MLD responses would not have a tentative
	* source address.
	*/
	ia->ia6_flags &= ~IN6_IFF_DUPLICATED; /* safety */
	if (hostIsNew && in6if_do_dad(ifp))
	ia->ia6_flags \|= IN6_IFF_TENTATIVE;

	/*
	* We are done if we have simply modified an existing address.
	*/
	if (!hostIsNew)
	return (error);

	/*
	* Beyond this point, we should call in6_purgeaddr upon an error,
	* not just go to unlink.
	*/

	/* Join necessary multicast groups */
	in6m_sol = NULL;
	if ((ifp->if_flags & IFF_MULTICAST) != 0) {
	struct sockaddr_in6 mltaddr, mltmask;
	struct in6_addr llsol;

	/* join solicited multicast addr for new host id */
	bzero(&llsol, sizeof(struct in6_addr));
	llsol.s6_addr32[0] = IPV6_ADDR_INT32_MLL;
	llsol.s6_addr32[1] = 0;
	llsol.s6_addr32[2] = htonl(1);
	llsol.s6_addr32[3] = ifra->ifra_addr.sin6_addr.s6_addr32[3];
	llsol.s6_addr8[12] = 0xff;
	if ((error = in6_setscope(&llsol, ifp, NULL)) != 0) {
	/* XXX: should not happen */
	log(LOG_ERR, "in6_update_ifa: "
	"in6_setscope failed\n");
	goto cleanup;
	}
	delay = 0;
	if ((flags & IN6_IFAUPDATE_DADDELAY)) {
	/*
	* We need a random delay for DAD on the address
	* being configured. It also means delaying
	* transmission of the corresponding MLD report to
	* avoid report collision.
	* [draft-ietf-ipv6-rfc2462bis-02.txt]
	*/
	delay = arc4random() %
	(MAX_RTR_SOLICITATION_DELAY * hz);
	}
	imm = in6_joingroup(ifp, &llsol, &error, delay);
	if (imm == NULL) {
	nd6log((LOG_WARNING,
	"in6_update_ifa: addmulti failed for "
	"%s on %s (errno=%d)\n",
	ip6_sprintf(ip6buf, &llsol), if_name(ifp),
	error));
	in6_purgeaddr((struct ifaddr *)ia);
	return (error);
	}
	LIST_INSERT_HEAD(&ia->ia6_memberships,
	imm, i6mm_chain);
	in6m_sol = imm->i6mm_maddr;

	bzero(&mltmask, sizeof(mltmask));
	mltmask.sin6_len = sizeof(struct sockaddr_in6);
	mltmask.sin6_family = AF_INET6;
	mltmask.sin6_addr = in6mask32;
	#define MLTMASK_LEN 4 /* mltmask's masklen (=32bit=4octet) */

	/*
	* join link-local all-nodes address
	*/
	bzero(&mltaddr, sizeof(mltaddr));
	mltaddr.sin6_len = sizeof(struct sockaddr_in6);
	mltaddr.sin6_family = AF_INET6;
	mltaddr.sin6_addr = in6addr_linklocal_allnodes;
	if ((error = in6_setscope(&mltaddr.sin6_addr, ifp, NULL)) !=
	0)
	goto cleanup; /* XXX: should not fail */

	/*
	* XXX: do we really need this automatic routes?
	* We should probably reconsider this stuff. Most applications
	* actually do not need the routes, since they usually specify
	* the outgoing interface.
	*/
	rt = rtalloc1((struct sockaddr *)&mltaddr, 0, 0UL);
	if (rt) {
	/* XXX: only works in !SCOPEDROUTING case. */
	if (memcmp(&mltaddr.sin6_addr,
	&((struct sockaddr_in6 *)rt_key(rt))->sin6_addr,
	MLTMASK_LEN)) {
	RTFREE_LOCKED(rt);
	rt = NULL;
	}
	}
	if (!rt) {
	/* XXX: we need RTF_CLONING to fake nd6_rtrequest */
	error = rtrequest(RTM_ADD, (struct sockaddr *)&mltaddr,
	(struct sockaddr *)&ia->ia_addr,
	(struct sockaddr *)&mltmask, RTF_UP \| RTF_CLONING,
	(struct rtentry **)0);
	if (error)
	goto cleanup;
	} else {
	RTFREE_LOCKED(rt);
	}

	imm = in6_joingroup(ifp, &mltaddr.sin6_addr, &error, 0);
	if (!imm) {
	nd6log((LOG_WARNING,
	"in6_update_ifa: addmulti failed for "
	"%s on %s (errno=%d)\n",
	ip6_sprintf(ip6buf, &mltaddr.sin6_addr),
	if_name(ifp), error));
	goto cleanup;
	}
	LIST_INSERT_HEAD(&ia->ia6_memberships, imm, i6mm_chain);

	/*
	* join node information group address
	*/
	#define hostnamelen strlen(V_hostname)
	delay = 0;
	if ((flags & IN6_IFAUPDATE_DADDELAY)) {
	/*
	* The spec doesn't say anything about delay for this
	* group, but the same logic should apply.
	*/
	delay = arc4random() %
	(MAX_RTR_SOLICITATION_DELAY * hz);
	}
	mtx_lock(&hostname_mtx);
	if (in6_nigroup(ifp, V_hostname, hostnamelen,
	&mltaddr.sin6_addr) == 0) {
	mtx_unlock(&hostname_mtx);
	imm = in6_joingroup(ifp, &mltaddr.sin6_addr, &error,
	delay); /* XXX jinmei */
	if (!imm) {
	nd6log((LOG_WARNING, "in6_update_ifa: "
	"addmulti failed for %s on %s "
	"(errno=%d)\n",
	ip6_sprintf(ip6buf, &mltaddr.sin6_addr),
	if_name(ifp), error));
	/* XXX not very fatal, go on... */
	} else {
	LIST_INSERT_HEAD(&ia->ia6_memberships,
	imm, i6mm_chain);
	}
	} else
	mtx_unlock(&hostname_mtx);
	#undef hostnamelen

	/*
	* join interface-local all-nodes address.
	* (ff01::1%ifN, and ff01::%ifN/32)
	*/
	mltaddr.sin6_addr = in6addr_nodelocal_allnodes;
	if ((error = in6_setscope(&mltaddr.sin6_addr, ifp, NULL))
	!= 0)
	goto cleanup; /* XXX: should not fail */
	/* XXX: again, do we really need the route? */
	rt = rtalloc1((struct sockaddr *)&mltaddr, 0, 0UL);
	if (rt) {
	if (memcmp(&mltaddr.sin6_addr,
	&((struct sockaddr_in6 *)rt_key(rt))->sin6_addr,
	MLTMASK_LEN)) {
	RTFREE_LOCKED(rt);
	rt = NULL;
	}
	}
	if (!rt) {
	error = rtrequest(RTM_ADD, (struct sockaddr *)&mltaddr,
	(struct sockaddr *)&ia->ia_addr,
	(struct sockaddr *)&mltmask, RTF_UP \| RTF_CLONING,
	(struct rtentry **)0);
	if (error)
	goto cleanup;
	} else
	RTFREE_LOCKED(rt);

	imm = in6_joingroup(ifp, &mltaddr.sin6_addr, &error, 0);
	if (!imm) {
	nd6log((LOG_WARNING, "in6_update_ifa: "
	"addmulti failed for %s on %s "
	"(errno=%d)\n",
	ip6_sprintf(ip6buf, &mltaddr.sin6_addr),
	if_name(ifp), error));
	goto cleanup;
	}
	LIST_INSERT_HEAD(&ia->ia6_memberships, imm, i6mm_chain);
	#undef MLTMASK_LEN
	}

	/*
	* Perform DAD, if needed.
	* XXX It may be of use, if we can administratively
	* disable DAD.
	*/
	if (hostIsNew && in6if_do_dad(ifp) &&
	((ifra->ifra_flags & IN6_IFF_NODAD) == 0) &&
	(ia->ia6_flags & IN6_IFF_TENTATIVE))
	{
	int mindelay, maxdelay;

	delay = 0;
	if ((flags & IN6_IFAUPDATE_DADDELAY)) {
	/*
	* We need to impose a delay before sending an NS
	* for DAD. Check if we also needed a delay for the
	* corresponding MLD message. If we did, the delay
	* should be larger than the MLD delay (this could be
	* relaxed a bit, but this simple logic is at least
	* safe).
	*/
	mindelay = 0;
	if (in6m_sol != NULL &&
	in6m_sol->in6m_state == MLD_REPORTPENDING) {
	mindelay = in6m_sol->in6m_timer;
	}
	maxdelay = MAX_RTR_SOLICITATION_DELAY * hz;
	if (maxdelay - mindelay == 0)
	delay = 0;
	else {
	delay =
	(arc4random() % (maxdelay - mindelay)) +
	mindelay;
	}
	}
	nd6_dad_start((struct ifaddr *)ia, delay);
	}

	return (error);

	unlink:
	/*
	* XXX: if a change of an existing address failed, keep the entry
	* anyway.
	*/
	if (hostIsNew)
	in6_unlink_ifa(ia, ifp);
	return (error);

	cleanup:
	in6_purgeaddr(&ia->ia_ifa);
	return error;
	}

	void
	in6_purgeaddr(struct ifaddr *ifa)
	{
	struct ifnet *ifp = ifa->ifa_ifp;
	struct in6_ifaddr ia = (struct in6_ifaddr ) ifa;
	- struct llentry *ln = NULL;
	struct in6_multi_mship *imm;

	/* stop DAD processing */
	nd6_dad_stop(ifa);

	IF_AFDATA_LOCK(ifp);
	- ln = lla_lookup(LLTABLE6(ifp), (LLE_DELETE \| LLE_IFADDR),
	+ lla_lookup(LLTABLE6(ifp), (LLE_DELETE \| LLE_IFADDR),
	(struct sockaddr *)&ia->ia_addr);
	- if (ln == NULL)
	- log(LOG_INFO, "nd6_purgeaddr: interface address is missing from cache\n");
	- else
	- log(LOG_INFO, "nd6_purgeaddr: ifaddr cache = %p is deleted\n", ln);
	IF_AFDATA_UNLOCK(ifp);
	-
	+
	/*
	* leave from multicast groups we have joined for the interface
	*/
	while ((imm = ia->ia6_memberships.lh_first) != NULL) {
	LIST_REMOVE(imm, i6mm_chain);
	in6_leavegroup(imm);
	}

	in6_unlink_ifa(ia, ifp);
	}

	static void
	in6_unlink_ifa(struct in6_ifaddr ia, struct ifnet ifp)
	{
	INIT_VNET_INET6(ifp->if_vnet);
	struct in6_ifaddr *oia;
	int s = splnet();

	TAILQ_REMOVE(&ifp->if_addrlist, &ia->ia_ifa, ifa_list);

	oia = ia;
	if (oia == (ia = V_in6_ifaddr))
	V_in6_ifaddr = ia->ia_next;
	else {
	while (ia->ia_next && (ia->ia_next != oia))
	ia = ia->ia_next;
	if (ia->ia_next)
	ia->ia_next = oia->ia_next;
	else {
	/* search failed */
	printf("Couldn't unlink in6_ifaddr from in6_ifaddr\n");
	}
	}

	/*
	* Release the reference to the base prefix. There should be a
	* positive reference.
	*/
	if (oia->ia6_ndpr == NULL) {
	nd6log((LOG_NOTICE,
	"in6_unlink_ifa: autoconf'ed address "
	"%p has no prefix\n", oia));
	} else {
	oia->ia6_ndpr->ndpr_refcnt--;
	oia->ia6_ndpr = NULL;
	}

	/*
	* Also, if the address being removed is autoconf'ed, call
	* pfxlist_onlink_check() since the release might affect the status of
	* other (detached) addresses.
	*/
	if ((oia->ia6_flags & IN6_IFF_AUTOCONF)) {
	pfxlist_onlink_check();
	}

	/*
	* release another refcnt for the link from in6_ifaddr.
	* Note that we should decrement the refcnt at least once for all *BSD.
	*/
	IFAFREE(&oia->ia_ifa);

	splx(s);
	}

	void
	in6_purgeif(struct ifnet *ifp)
	{
	struct ifaddr ifa, nifa;

	for (ifa = TAILQ_FIRST(&ifp->if_addrlist); ifa != NULL; ifa = nifa) {
	nifa = TAILQ_NEXT(ifa, ifa_list);
	if (ifa->ifa_addr->sa_family != AF_INET6)
	continue;
	in6_purgeaddr(ifa);
	}

	in6_ifdetach(ifp);
	}

	/*
	* SIOC[GAD]LIFADDR.
	* SIOCGLIFADDR: get first address. (?)
	* SIOCGLIFADDR with IFLR_PREFIX:
	* get first address that matches the specified prefix.
	* SIOCALIFADDR: add the specified address.
	* SIOCALIFADDR with IFLR_PREFIX:
	* add the specified prefix, filling hostid part from
	* the first link-local address. prefixlen must be <= 64.
	* SIOCDLIFADDR: delete the specified address.
	* SIOCDLIFADDR with IFLR_PREFIX:
	* delete the first address that matches the specified prefix.
	* return values:
	* EINVAL on invalid parameters
	* EADDRNOTAVAIL on prefix match failed/specified address not found
	* other values may be returned from in6_ioctl()
	*
	* NOTE: SIOCALIFADDR(with IFLR_PREFIX set) allows prefixlen less than 64.
	* this is to accomodate address naming scheme other than RFC2374,
	* in the future.
	* RFC2373 defines interface id to be 64bit, but it allows non-RFC2374
	* address encoding scheme. (see figure on page 8)
	*/
	static int
	in6_lifaddr_ioctl(struct socket *so, u_long cmd, caddr_t data,
	struct ifnet ifp, struct thread td)
	{
	struct if_laddrreq iflr = (struct if_laddrreq )data;
	struct ifaddr *ifa;
	struct sockaddr *sa;

	/* sanity checks */
	if (!data \|\| !ifp) {
	panic("invalid argument to in6_lifaddr_ioctl");
	/* NOTREACHED */
	}

	switch (cmd) {
	case SIOCGLIFADDR:
	/* address must be specified on GET with IFLR_PREFIX */
	if ((iflr->flags & IFLR_PREFIX) == 0)
	break;
	/* FALLTHROUGH */
	case SIOCALIFADDR:
	case SIOCDLIFADDR:
	/* address must be specified on ADD and DELETE */
	sa = (struct sockaddr *)&iflr->addr;
	if (sa->sa_family != AF_INET6)
	return EINVAL;
	if (sa->sa_len != sizeof(struct sockaddr_in6))
	return EINVAL;
	/* XXX need improvement */
	sa = (struct sockaddr *)&iflr->dstaddr;
	if (sa->sa_family && sa->sa_family != AF_INET6)
	return EINVAL;
	if (sa->sa_len && sa->sa_len != sizeof(struct sockaddr_in6))
	return EINVAL;
	break;
	default: /* shouldn't happen */
	#if 0
	panic("invalid cmd to in6_lifaddr_ioctl");
	/* NOTREACHED */
	#else
	return EOPNOTSUPP;
	#endif
	}
	if (sizeof(struct in6_addr) * 8 < iflr->prefixlen)
	return EINVAL;

	switch (cmd) {
	case SIOCALIFADDR:
	{
	struct in6_aliasreq ifra;
	struct in6_addr *hostid = NULL;
	int prefixlen;

	if ((iflr->flags & IFLR_PREFIX) != 0) {
	struct sockaddr_in6 *sin6;

	/*
	* hostid is to fill in the hostid part of the
	* address. hostid points to the first link-local
	* address attached to the interface.
	*/
	ifa = (struct ifaddr *)in6ifa_ifpforlinklocal(ifp, 0);
	if (!ifa)
	return EADDRNOTAVAIL;
	hostid = IFA_IN6(ifa);

	/* prefixlen must be <= 64. */
	if (64 < iflr->prefixlen)
	return EINVAL;
	prefixlen = iflr->prefixlen;

	/* hostid part must be zero. */
	sin6 = (struct sockaddr_in6 *)&iflr->addr;
	if (sin6->sin6_addr.s6_addr32[2] != 0 \|\|
	sin6->sin6_addr.s6_addr32[3] != 0) {
	return EINVAL;
	}
	} else
	prefixlen = iflr->prefixlen;

	/* copy args to in6_aliasreq, perform ioctl(SIOCAIFADDR_IN6). */
	bzero(&ifra, sizeof(ifra));
	bcopy(iflr->iflr_name, ifra.ifra_name, sizeof(ifra.ifra_name));

	bcopy(&iflr->addr, &ifra.ifra_addr,
	((struct sockaddr *)&iflr->addr)->sa_len);
	if (hostid) {
	/* fill in hostid part */
	ifra.ifra_addr.sin6_addr.s6_addr32[2] =
	hostid->s6_addr32[2];
	ifra.ifra_addr.sin6_addr.s6_addr32[3] =
	hostid->s6_addr32[3];
	}

	if (((struct sockaddr )&iflr->dstaddr)->sa_family) { / XXX */
	bcopy(&iflr->dstaddr, &ifra.ifra_dstaddr,
	((struct sockaddr *)&iflr->dstaddr)->sa_len);
	if (hostid) {
	ifra.ifra_dstaddr.sin6_addr.s6_addr32[2] =
	hostid->s6_addr32[2];
	ifra.ifra_dstaddr.sin6_addr.s6_addr32[3] =
	hostid->s6_addr32[3];
	}
	}

	ifra.ifra_prefixmask.sin6_len = sizeof(struct sockaddr_in6);
	in6_prefixlen2mask(&ifra.ifra_prefixmask.sin6_addr, prefixlen);

	ifra.ifra_flags = iflr->flags & ~IFLR_PREFIX;
	return in6_control(so, SIOCAIFADDR_IN6, (caddr_t)&ifra, ifp, td);
	}
	case SIOCGLIFADDR:
	case SIOCDLIFADDR:
	{
	struct in6_ifaddr *ia;
	struct in6_addr mask, candidate, match;
	struct sockaddr_in6 *sin6;
	int cmp;

	bzero(&mask, sizeof(mask));
	if (iflr->flags & IFLR_PREFIX) {
	/* lookup a prefix rather than address. */
	in6_prefixlen2mask(&mask, iflr->prefixlen);

	sin6 = (struct sockaddr_in6 *)&iflr->addr;
	bcopy(&sin6->sin6_addr, &match, sizeof(match));
	match.s6_addr32[0] &= mask.s6_addr32[0];
	match.s6_addr32[1] &= mask.s6_addr32[1];
	match.s6_addr32[2] &= mask.s6_addr32[2];
	match.s6_addr32[3] &= mask.s6_addr32[3];

	/* if you set extra bits, that's wrong */
	if (bcmp(&match, &sin6->sin6_addr, sizeof(match)))
	return EINVAL;

	cmp = 1;
	} else {
	if (cmd == SIOCGLIFADDR) {
	/* on getting an address, take the 1st match */
	cmp = 0; /* XXX */
	} else {
	/* on deleting an address, do exact match */
	in6_prefixlen2mask(&mask, 128);
	sin6 = (struct sockaddr_in6 *)&iflr->addr;
	bcopy(&sin6->sin6_addr, &match, sizeof(match));

	cmp = 1;
	}
	}

	TAILQ_FOREACH(ifa, &ifp->if_addrlist, ifa_list) {
	if (ifa->ifa_addr->sa_family != AF_INET6)
	continue;
	if (!cmp)
	break;

	/*
	* XXX: this is adhoc, but is necessary to allow
	* a user to specify fe80::/64 (not /10) for a
	* link-local address.
	*/
	bcopy(IFA_IN6(ifa), &candidate, sizeof(candidate));
	in6_clearscope(&candidate);
	candidate.s6_addr32[0] &= mask.s6_addr32[0];
	candidate.s6_addr32[1] &= mask.s6_addr32[1];
	candidate.s6_addr32[2] &= mask.s6_addr32[2];
	candidate.s6_addr32[3] &= mask.s6_addr32[3];
	if (IN6_ARE_ADDR_EQUAL(&candidate, &match))
	break;
	}
	if (!ifa)
	return EADDRNOTAVAIL;
	ia = ifa2ia6(ifa);

	if (cmd == SIOCGLIFADDR) {
	int error;

	/* fill in the if_laddrreq structure */
	bcopy(&ia->ia_addr, &iflr->addr, ia->ia_addr.sin6_len);
	error = sa6_recoverscope(
	(struct sockaddr_in6 *)&iflr->addr);
	if (error != 0)
	return (error);

	if ((ifp->if_flags & IFF_POINTOPOINT) != 0) {
	bcopy(&ia->ia_dstaddr, &iflr->dstaddr,
	ia->ia_dstaddr.sin6_len);
	error = sa6_recoverscope(
	(struct sockaddr_in6 *)&iflr->dstaddr);
	if (error != 0)
	return (error);
	} else
	bzero(&iflr->dstaddr, sizeof(iflr->dstaddr));

	iflr->prefixlen =
	in6_mask2len(&ia->ia_prefixmask.sin6_addr, NULL);

	iflr->flags = ia->ia6_flags; /* XXX */

	return 0;
	} else {
	struct in6_aliasreq ifra;

	/* fill in6_aliasreq and do ioctl(SIOCDIFADDR_IN6) */
	bzero(&ifra, sizeof(ifra));
	bcopy(iflr->iflr_name, ifra.ifra_name,
	sizeof(ifra.ifra_name));

	bcopy(&ia->ia_addr, &ifra.ifra_addr,
	ia->ia_addr.sin6_len);
	if ((ifp->if_flags & IFF_POINTOPOINT) != 0) {
	bcopy(&ia->ia_dstaddr, &ifra.ifra_dstaddr,
	ia->ia_dstaddr.sin6_len);
	} else {
	bzero(&ifra.ifra_dstaddr,
	sizeof(ifra.ifra_dstaddr));
	}
	bcopy(&ia->ia_prefixmask, &ifra.ifra_dstaddr,
	ia->ia_prefixmask.sin6_len);

	ifra.ifra_flags = ia->ia6_flags;
	return in6_control(so, SIOCDIFADDR_IN6, (caddr_t)&ifra,
	ifp, td);
	}
	}
	}

	return EOPNOTSUPP; /* just for safety */
	}

	/*
	* Initialize an interface's intetnet6 address
	* and routing table entry.
	*/
	static int
	in6_ifinit(struct ifnet ifp, struct in6_ifaddr ia,
	struct sockaddr_in6 *sin6, int newhost)
	{
	int error = 0, plen, ifacount = 0;
	int s = splimp();
	struct ifaddr *ifa;

	/*
	* Give the interface a chance to initialize
	* if this is its first address,
	* and to validate the address if necessary.
	*/
	TAILQ_FOREACH(ifa, &ifp->if_addrlist, ifa_list) {
	if (ifa->ifa_addr->sa_family != AF_INET6)
	continue;
	ifacount++;
	}

	ia->ia_addr = *sin6;

	if (ifacount <= 1 && ifp->if_ioctl) {
	IFF_LOCKGIANT(ifp);
	error = (*ifp->if_ioctl)(ifp, SIOCSIFADDR, (caddr_t)ia);
	IFF_UNLOCKGIANT(ifp);
	if (error) {
	splx(s);
	return (error);
	}
	}
	splx(s);

	ia->ia_ifa.ifa_metric = ifp->if_metric;

	/* we could do in(6)_socktrim here, but just omit it at this moment. */

	/*
	* Special case:
	* If a new destination address is specified for a point-to-point
	* interface, install a route to the destination as an interface
	* direct route.
	* XXX: the logic below rejects assigning multiple addresses on a p2p
	* interface that share the same destination.
	*/
	#if 0 /* QING - verify */
	plen = in6_mask2len(&ia->ia_prefixmask.sin6_addr, NULL); /* XXX */
	if (!(ia->ia_flags & IFA_ROUTE) && plen == 128 &&
	ia->ia_dstaddr.sin6_family == AF_INET6) {
	int rtflags = RTF_UP \| RTF_HOST;
	struct rtentry rt = NULL, *rtp = NULL;

	if (nd6_need_cache(ifp) != 0) {
	rtflags \|= RTF_LLINFO;
	rtp = &rt;
	}

	error = rtrequest(RTM_ADD,
	(struct sockaddr *)&ia->ia_dstaddr,
	(struct sockaddr *)&ia->ia_addr,
	(struct sockaddr *)&ia->ia_prefixmask,
	ia->ia_flags \| rtflags, rtp);
	if (error != 0)
	return (error);
	if (rt != NULL) {
	struct llinfo_nd6 *ln;

	RT_LOCK(rt);
	ln = (struct llinfo_nd6 *)rt->rt_llinfo;
	if (ln != NULL) {
	/*
	* Set the state to STALE because we don't
	* have to perform address resolution on this
	* link.
	*/
	ln->ln_state = ND6_LLINFO_STALE;
	}
	RT_REMREF(rt);
	RT_UNLOCK(rt);
	}
	ia->ia_flags \|= IFA_ROUTE;
	}
	if (plen < 128) {
	/*
	* The RTF_CLONING flag is necessary for in6_is_ifloop_auto().
	*/
	ia->ia_ifa.ifa_flags \|= RTF_CLONING;
	}
	#else
	plen = in6_mask2len(&ia->ia_prefixmask.sin6_addr, NULL); /* XXX */
	if (!(ia->ia_flags & IFA_ROUTE) && plen == 128 &&
	ia->ia_dstaddr.sin6_family == AF_INET6) {
	if ((error = rtinit(&(ia->ia_ifa), (int)RTM_ADD,
	RTF_UP \| RTF_HOST)) != 0)
	return (error);
	ia->ia_flags \|= IFA_ROUTE;
	}
	if (plen < 128) {
	/*
	* The RTF_CLONING flag is necessary for in6_is_ifloop_auto().
	*/
	ia->ia_ifa.ifa_flags \|= RTF_CLONING;
	}
	#endif

	/* Add ownaddr as loopback rtentry, if necessary (ex. on p2p link). */
	if (newhost) {
	struct llentry *ln;

	IF_AFDATA_LOCK(ifp);
	ia->ia_ifa.ifa_rtrequest = NULL;

	/* Qing
	* we need to report rt_newaddrmsg
	*/
	- ln = lla_lookup(LLTABLE6(ifp), (LLE_CREATE \| LLE_IFADDR),
	+ ln = lla_lookup(LLTABLE6(ifp), (LLE_CREATE \| LLE_IFADDR \| LLE_EXCLUSIVE),
	(struct sockaddr *)&ia->ia_addr);
	+ IF_AFDATA_UNLOCK(ifp);
	if (ln) {
	ln->la_expire = 0; /* for IPv6 this means permanent */
	ln->ln_state = ND6_LLINFO_REACHABLE;
	+ LLE_WUNLOCK(ln);
	}
	- IF_AFDATA_UNLOCK(ifp);
	}

	return (error);
	}

	struct in6_multi_mship *
	in6_joingroup(struct ifnet ifp, struct in6_addr addr,
	int *errorp, int delay)
	{
	struct in6_multi_mship *imm;

	imm = malloc(sizeof(*imm), M_IP6MADDR, M_NOWAIT);
	if (!imm) {
	*errorp = ENOBUFS;
	return NULL;
	}
	imm->i6mm_maddr = in6_addmulti(addr, ifp, errorp, delay);
	if (!imm->i6mm_maddr) {
	/* errorp is alrady set /
	free(imm, M_IP6MADDR);
	return NULL;
	}
	return imm;
	}

	int
	in6_leavegroup(struct in6_multi_mship *imm)
	{

	if (imm->i6mm_maddr)
	in6_delmulti(imm->i6mm_maddr);
	free(imm, M_IP6MADDR);
	return 0;
	}

	/*
	* Find an IPv6 interface link-local address specific to an interface.
	*/
	struct in6_ifaddr *
	in6ifa_ifpforlinklocal(struct ifnet *ifp, int ignoreflags)
	{
	struct ifaddr *ifa;

	TAILQ_FOREACH(ifa, &ifp->if_addrlist, ifa_list) {
	if (ifa->ifa_addr->sa_family != AF_INET6)
	continue;
	if (IN6_IS_ADDR_LINKLOCAL(IFA_IN6(ifa))) {
	if ((((struct in6_ifaddr *)ifa)->ia6_flags &
	ignoreflags) != 0)
	continue;
	break;
	}
	}

	return ((struct in6_ifaddr *)ifa);
	}


	/*
	* find the internet address corresponding to a given interface and address.
	*/
	struct in6_ifaddr *
	in6ifa_ifpwithaddr(struct ifnet ifp, struct in6_addr addr)
	{
	struct ifaddr *ifa;

	TAILQ_FOREACH(ifa, &ifp->if_addrlist, ifa_list) {
	if (ifa->ifa_addr->sa_family != AF_INET6)
	continue;
	if (IN6_ARE_ADDR_EQUAL(addr, IFA_IN6(ifa)))
	break;
	}

	return ((struct in6_ifaddr *)ifa);
	}

	/*
	* Convert IP6 address to printable (loggable) representation. Caller
	* has to make sure that ip6buf is at least INET6_ADDRSTRLEN long.
	*/
	static char digits[] = "0123456789abcdef";
	char *
	ip6_sprintf(char ip6buf, const struct in6_addr addr)
	{
	int i;
	char *cp;
	const u_int16_t a = (const u_int16_t )addr;
	const u_int8_t *d;
	int dcolon = 0, zero = 0;

	cp = ip6buf;

	for (i = 0; i < 8; i++) {
	if (dcolon == 1) {
	if (*a == 0) {
	if (i == 7)
	*cp++ = ':';
	a++;
	continue;
	} else
	dcolon = 2;
	}
	if (*a == 0) {
	if (dcolon == 0 && *(a + 1) == 0) {
	if (i == 0)
	*cp++ = ':';
	*cp++ = ':';
	dcolon = 1;
	} else {
	*cp++ = '0';
	*cp++ = ':';
	}
	a++;
	continue;
	}
	d = (const u_char *)a;
	/* Try to eliminate leading zeros in printout like in :0001. */
	zero = 1;
	cp = digits[d >> 4];
	if (*cp != '0') {
	zero = 0;
	cp++;
	}
	cp = digits[d++ & 0xf];
	if (zero == 0 \|\| (*cp != '0')) {
	zero = 0;
	cp++;
	}
	cp = digits[d >> 4];
	if (zero == 0 \|\| (*cp != '0')) {
	zero = 0;
	cp++;
	}
	cp++ = digits[d & 0xf];
	*cp++ = ':';
	a++;
	}
	*--cp = '\0';
	return (ip6buf);
	}

	int
	in6_localaddr(struct in6_addr *in6)
	{
	INIT_VNET_INET6(curvnet);
	struct in6_ifaddr *ia;

	if (IN6_IS_ADDR_LOOPBACK(in6) \|\| IN6_IS_ADDR_LINKLOCAL(in6))
	return 1;

	for (ia = V_in6_ifaddr; ia; ia = ia->ia_next) {
	if (IN6_ARE_MASKED_ADDR_EQUAL(in6, &ia->ia_addr.sin6_addr,
	&ia->ia_prefixmask.sin6_addr)) {
	return 1;
	}
	}

	return (0);
	}

	int
	in6_is_addr_deprecated(struct sockaddr_in6 *sa6)
	{
	INIT_VNET_INET6(curvnet);
	struct in6_ifaddr *ia;

	for (ia = V_in6_ifaddr; ia; ia = ia->ia_next) {
	if (IN6_ARE_ADDR_EQUAL(&ia->ia_addr.sin6_addr,
	&sa6->sin6_addr) &&
	(ia->ia6_flags & IN6_IFF_DEPRECATED) != 0)
	return (1); /* true */

	/* XXX: do we still have to go thru the rest of the list? */
	}

	return (0); /* false */
	}

	/*
	* return length of part which dst and src are equal
	* hard coding...
	*/
	int
	in6_matchlen(struct in6_addr src, struct in6_addr dst)
	{
	int match = 0;
	u_char s = (u_char )src, d = (u_char )dst;
	u_char *lim = s + 16, r;

	while (s < lim)
	if ((r = (d++ ^ s++)) != 0) {
	while (r < 128) {
	match++;
	r <<= 1;
	}
	break;
	} else
	match += 8;
	return match;
	}

	/* XXX: to be scope conscious */
	int
	in6_are_prefix_equal(struct in6_addr p1, struct in6_addr p2, int len)
	{
	int bytelen, bitlen;

	/* sanity check */
	if (0 > len \|\| len > 128) {
	log(LOG_ERR, "in6_are_prefix_equal: invalid prefix length(%d)\n",
	len);
	return (0);
	}

	bytelen = len / 8;
	bitlen = len % 8;

	if (bcmp(&p1->s6_addr, &p2->s6_addr, bytelen))
	return (0);
	if (bitlen != 0 &&
	p1->s6_addr[bytelen] >> (8 - bitlen) !=
	p2->s6_addr[bytelen] >> (8 - bitlen))
	return (0);

	return (1);
	}

	void
	in6_prefixlen2mask(struct in6_addr *maskp, int len)
	{
	u_char maskarray[8] = {0x80, 0xc0, 0xe0, 0xf0, 0xf8, 0xfc, 0xfe, 0xff};
	int bytelen, bitlen, i;

	/* sanity check */
	if (0 > len \|\| len > 128) {
	log(LOG_ERR, "in6_prefixlen2mask: invalid prefix length(%d)\n",
	len);
	return;
	}

	bzero(maskp, sizeof(*maskp));
	bytelen = len / 8;
	bitlen = len % 8;
	for (i = 0; i < bytelen; i++)
	maskp->s6_addr[i] = 0xff;
	if (bitlen)
	maskp->s6_addr[bytelen] = maskarray[bitlen - 1];
	}

	/*
	* return the best address out of the same scope. if no address was
	* found, return the first valid address from designated IF.
	*/
	struct in6_ifaddr *
	in6_ifawithifp(struct ifnet ifp, struct in6_addr dst)
	{
	INIT_VNET_INET6(curvnet);
	int dst_scope = in6_addrscope(dst), blen = -1, tlen;
	struct ifaddr *ifa;
	struct in6_ifaddr *besta = 0;
	struct in6_ifaddr dep[2]; / last-resort: deprecated */

	dep[0] = dep[1] = NULL;

	/*
	* We first look for addresses in the same scope.
	* If there is one, return it.
	* If two or more, return one which matches the dst longest.
	* If none, return one of global addresses assigned other ifs.
	*/
	TAILQ_FOREACH(ifa, &ifp->if_addrlist, ifa_list) {
	if (ifa->ifa_addr->sa_family != AF_INET6)
	continue;
	if (((struct in6_ifaddr *)ifa)->ia6_flags & IN6_IFF_ANYCAST)
	continue; /* XXX: is there any case to allow anycast? */
	if (((struct in6_ifaddr *)ifa)->ia6_flags & IN6_IFF_NOTREADY)
	continue; /* don't use this interface */
	if (((struct in6_ifaddr *)ifa)->ia6_flags & IN6_IFF_DETACHED)
	continue;
	if (((struct in6_ifaddr *)ifa)->ia6_flags & IN6_IFF_DEPRECATED) {
	if (V_ip6_use_deprecated)
	dep[0] = (struct in6_ifaddr *)ifa;
	continue;
	}

	if (dst_scope == in6_addrscope(IFA_IN6(ifa))) {
	/*
	* call in6_matchlen() as few as possible
	*/
	if (besta) {
	if (blen == -1)
	blen = in6_matchlen(&besta->ia_addr.sin6_addr, dst);
	tlen = in6_matchlen(IFA_IN6(ifa), dst);
	if (tlen > blen) {
	blen = tlen;
	besta = (struct in6_ifaddr *)ifa;
	}
	} else
	besta = (struct in6_ifaddr *)ifa;
	}
	}
	if (besta)
	return (besta);

	TAILQ_FOREACH(ifa, &ifp->if_addrlist, ifa_list) {
	if (ifa->ifa_addr->sa_family != AF_INET6)
	continue;
	if (((struct in6_ifaddr *)ifa)->ia6_flags & IN6_IFF_ANYCAST)
	continue; /* XXX: is there any case to allow anycast? */
	if (((struct in6_ifaddr *)ifa)->ia6_flags & IN6_IFF_NOTREADY)
	continue; /* don't use this interface */
	if (((struct in6_ifaddr *)ifa)->ia6_flags & IN6_IFF_DETACHED)
	continue;
	if (((struct in6_ifaddr *)ifa)->ia6_flags & IN6_IFF_DEPRECATED) {
	if (V_ip6_use_deprecated)
	dep[1] = (struct in6_ifaddr *)ifa;
	continue;
	}

	return (struct in6_ifaddr *)ifa;
	}

	/* use the last-resort values, that are, deprecated addresses */
	if (dep[0])
	return dep[0];
	if (dep[1])
	return dep[1];

	return NULL;
	}

	/*
	* perform DAD when interface becomes IFF_UP.
	*/
	void
	in6_if_up(struct ifnet *ifp)
	{
	struct ifaddr *ifa;
	struct in6_ifaddr *ia;

	TAILQ_FOREACH(ifa, &ifp->if_addrlist, ifa_list) {
	if (ifa->ifa_addr->sa_family != AF_INET6)
	continue;
	ia = (struct in6_ifaddr *)ifa;
	if (ia->ia6_flags & IN6_IFF_TENTATIVE) {
	/*
	* The TENTATIVE flag was likely set by hand
	* beforehand, implicitly indicating the need for DAD.
	* We may be able to skip the random delay in this
	* case, but we impose delays just in case.
	*/
	nd6_dad_start(ifa,
	arc4random() % (MAX_RTR_SOLICITATION_DELAY * hz));
	}
	}

	/*
	* special cases, like 6to4, are handled in in6_ifattach
	*/
	in6_ifattach(ifp, NULL);
	}

	int
	in6if_do_dad(struct ifnet *ifp)
	{
	if ((ifp->if_flags & IFF_LOOPBACK) != 0)
	return (0);

	switch (ifp->if_type) {
	#ifdef IFT_DUMMY
	case IFT_DUMMY:
	#endif
	case IFT_FAITH:
	/*
	* These interfaces do not have the IFF_LOOPBACK flag,
	* but loop packets back. We do not have to do DAD on such
	* interfaces. We should even omit it, because loop-backed
	* NS would confuse the DAD procedure.
	*/
	return (0);
	default:
	/*
	* Our DAD routine requires the interface up and running.
	* However, some interfaces can be up before the RUNNING
	* status. Additionaly, users may try to assign addresses
	* before the interface becomes up (or running).
	* We simply skip DAD in such a case as a work around.
	* XXX: we should rather mark "tentative" on such addresses,
	* and do DAD after the interface becomes ready.
	*/
	if (!((ifp->if_flags & IFF_UP) &&
	(ifp->if_drv_flags & IFF_DRV_RUNNING)))
	return (0);

	return (1);
	}
	}

	/*
	* Calculate max IPv6 MTU through all the interfaces and store it
	* to in6_maxmtu.
	*/
	void
	in6_setmaxmtu(void)
	{
	INIT_VNET_NET(curvnet);
	INIT_VNET_INET6(curvnet);
	unsigned long maxmtu = 0;
	struct ifnet *ifp;

	IFNET_RLOCK();
	for (ifp = TAILQ_FIRST(&V_ifnet); ifp;
	ifp = TAILQ_NEXT(ifp, if_list)) {
	/* this function can be called during ifnet initialization */
	if (!ifp->if_afdata[AF_INET6])
	continue;
	if ((ifp->if_flags & IFF_LOOPBACK) == 0 &&
	IN6_LINKMTU(ifp) > maxmtu)
	maxmtu = IN6_LINKMTU(ifp);
	}
	IFNET_RUNLOCK();
	if (maxmtu) /* update only when maxmtu is positive */
	V_in6_maxmtu = maxmtu;
	}

	/*
	* Provide the length of interface identifiers to be used for the link attached
	* to the given interface. The length should be defined in "IPv6 over
	* xxx-link" document. Note that address architecture might also define
	* the length for a particular set of address prefixes, regardless of the
	* link type. As clarified in rfc2462bis, those two definitions should be
	* consistent, and those really are as of August 2004.
	*/
	int
	in6_if2idlen(struct ifnet *ifp)
	{
	switch (ifp->if_type) {
	case IFT_ETHER: /* RFC2464 */
	#ifdef IFT_PROPVIRTUAL
	case IFT_PROPVIRTUAL: /* XXX: no RFC. treat it as ether */
	#endif
	#ifdef IFT_L2VLAN
	case IFT_L2VLAN: /* ditto */
	#endif
	#ifdef IFT_IEEE80211
	case IFT_IEEE80211: /* ditto */
	#endif
	#ifdef IFT_MIP
	case IFT_MIP: /* ditto */
	#endif
	return (64);
	case IFT_FDDI: /* RFC2467 */
	return (64);
	case IFT_ISO88025: /* RFC2470 (IPv6 over Token Ring) */
	return (64);
	case IFT_PPP: /* RFC2472 */
	return (64);
	case IFT_ARCNET: /* RFC2497 */
	return (64);
	case IFT_FRELAY: /* RFC2590 */
	return (64);
	case IFT_IEEE1394: /* RFC3146 */
	return (64);
	case IFT_GIF:
	return (64); /* draft-ietf-v6ops-mech-v2-07 */
	case IFT_LOOP:
	return (64); /* XXX: is this really correct? */
	default:
	/*
	* Unknown link type:
	* It might be controversial to use the today's common constant
	* of 64 for these cases unconditionally. For full compliance,
	* we should return an error in this case. On the other hand,
	* if we simply miss the standard for the link type or a new
	* standard is defined for a new link type, the IFID length
	* is very likely to be the common constant. As a compromise,
	* we always use the constant, but make an explicit notice
	* indicating the "unknown" case.
	*/
	printf("in6_if2idlen: unknown link type (%d)\n", ifp->if_type);
	return (64);
	}
	}

	#include <sys/sysctl.h>

	struct in6_llentry {
	struct llentry base;
	struct sockaddr_in6 l3_addr6;
	};

	static struct llentry *
	in6_lltable_new(const struct sockaddr *l3addr, u_int flags)
	{
	struct in6_llentry *lle;

	lle = malloc(sizeof(struct in6_llentry), M_LLTABLE,
	M_DONTWAIT \| M_ZERO);
	if (lle == NULL) /* NB: caller generates msg */
	return NULL;

	callout_init(&lle->base.ln_timer_ch, CALLOUT_MPSAFE);
	lle->l3_addr6 = (const struct sockaddr_in6 )l3addr;
	-
	+ lle->base.lle_refcnt = 1;
	+ LLE_LOCK_INIT(&lle->base);
	return &lle->base;
	}

	/*
	* Deletes an address from the address table.
	* This function is called by the timer functions
	* such as arptimer() and nd6_llinfo_timer(), and
	* the caller does the locking.
	*/
	static void
	in6_lltable_free(struct lltable llt, struct llentry lle)
	{
	free(lle, M_LLTABLE);
	}

	static int
	in6_lltable_rtcheck(struct ifnet ifp, const struct sockaddr l3addr)
	{
	struct rtentry *rt;
	char ip6buf[INET6_ADDRSTRLEN];

	KASSERT(l3addr->sa_family == AF_INET6,
	("sin_family %d", l3addr->sa_family));

	/* XXX rtalloc1 should take a const param */
	rt = rtalloc1(__DECONST(struct sockaddr *, l3addr), 0, 0);
	if (rt == NULL \|\| (rt->rt_flags & RTF_GATEWAY) \|\| rt->rt_ifp != ifp) {
	struct ifaddr *ifa;
	/*
	* Create an ND6 cache for an IPv6 neighbor
	* that is not covered by our own prefix.
	*/
	/* XXX ifaof_ifpforaddr should take a const param */
	ifa = ifaof_ifpforaddr(__DECONST(struct sockaddr *, l3addr), ifp);
	if (ifa != NULL) {
	if (rt != NULL)
	rtfree(rt);
	return 0;
	}
	log(LOG_INFO, "IPv6 address: \"%s\" is not on the network\n",
	ip6_sprintf(ip6buf, &((const struct sockaddr_in6 *)l3addr)->sin6_addr));
	if (rt != NULL)
	rtfree(rt);
	return EINVAL;
	}
	rtfree(rt);
	return 0;
	}

	static struct llentry *
	in6_lltable_lookup(struct lltable *llt, u_int flags,
	const struct sockaddr *l3addr)
	{
	const struct sockaddr_in6 sin6 = (const struct sockaddr_in6 )l3addr;
	struct ifnet *ifp = llt->llt_ifp;
	struct llentry *lle;
	struct llentries *lleh;
	u_int hashkey;

	KASSERT(l3addr->sa_family == AF_INET6,
	("sin_family %d", l3addr->sa_family));

	hashkey = sin6->sin6_addr.s6_addr32[3];
	lleh = &llt->lle_head[LLATBL_HASH(hashkey, LLTBL_HASHMASK)];
	LIST_FOREACH(lle, lleh, lle_next) {
	if (lle->la_flags & LLE_DELETED)
	continue;
	if (bcmp(L3_ADDR(lle), l3addr, l3addr->sa_len) == 0)
	break;
	}

	if (lle == NULL) {
	if (!(flags & LLE_CREATE))
	return (NULL);
	/*
	* A route that covers the given address must have
	* been installed 1st because we are doing a resolution,
	* verify this.
	*/
	if (!(flags & LLE_IFADDR) &&
	in6_lltable_rtcheck(ifp, l3addr) != 0)
	return NULL;

	lle = in6_lltable_new(l3addr, flags);
	if (lle == NULL) {
	log(LOG_INFO, "lla_lookup: new lle malloc failed\n");
	return NULL;
	}
	lle->la_flags = flags & ~LLE_CREATE;
	if ((flags & (LLE_CREATE \| LLE_IFADDR)) == (LLE_CREATE \| LLE_IFADDR)) {
	bcopy(IF_LLADDR(ifp), &lle->ll_addr, ifp->if_addrlen);
	lle->la_flags \|= (LLE_VALID \| LLE_STATIC);
	}

	lle->lle_tbl = llt;
	lle->lle_head = lleh;
	LIST_INSERT_HEAD(lleh, lle, lle_next);
	- } else {
	- if (flags & LLE_DELETE)
	- lle->la_flags = LLE_DELETED;
	+ } else if (flags & LLE_DELETE) {
	+ LLE_WLOCK(lle);
	+ lle->la_flags = LLE_DELETED;
	+ LLE_WUNLOCK(lle);
	+#ifdef INVARIANTS
	+ log(LOG_INFO, "ifaddr cache = %p is deleted\n", lle);
	+#endif
	+ lle = NULL;
	}
	- return lle;
	+ if (lle) {
	+ if (flags & LLE_EXCLUSIVE)
	+ LLE_WLOCK(lle);
	+ else
	+ LLE_RLOCK(lle);
	+ }
	+ return (lle);
	}

	static int
	in6_lltable_dump(struct lltable llt, struct sysctl_req wr)
	{
	struct ifnet *ifp = llt->llt_ifp;
	struct llentry *lle;
	/* XXX stack use */
	struct {
	struct rt_msghdr rtm;
	struct sockaddr_in6 sin6;
	struct sockaddr_dl sdl;
	} ndpc;
	int i, error;

	/* XXXXX
	* current IFNET_RLOCK() is mapped to IFNET_WLOCK()
	* so it is okay to use this ASSERT, change it when
	* IFNET lock is finalized
	*/
	IFNET_WLOCK_ASSERT();

	error = 0;
	for (i = 0; i < LLTBL_HASHTBL_SIZE; i++) {
	LIST_FOREACH(lle, &llt->lle_head[i], lle_next) {
	/* skip deleted entries */
	if (lle->la_flags & LLE_DELETED)
	continue;
	/*
	* produce a msg made of:
	* struct rt_msghdr;
	* struct sockaddr_in6 (IPv6)
	* struct sockaddr_dl;
	*/
	bzero(&ndpc, sizeof(ndpc));
	ndpc.rtm.rtm_msglen = sizeof(ndpc);

	ndpc.sin6.sin6_family = AF_INET6;
	ndpc.sin6.sin6_len = sizeof(ndpc.sin6);
	bcopy(L3_ADDR(lle), &ndpc.sin6, L3_ADDR_LEN(lle));

	/* publish */
	if (lle->la_flags & LLE_PUB)
	ndpc.rtm.rtm_flags \|= RTF_ANNOUNCE;

	if (lle->la_flags & LLE_VALID) { /* valid MAC */
	struct sockaddr_dl *sdl = &ndpc.sdl;

	sdl->sdl_family = AF_LINK;
	sdl->sdl_len = sizeof(*sdl);
	sdl->sdl_alen = ifp->if_addrlen;
	sdl->sdl_index = ifp->if_index;
	sdl->sdl_type = ifp->if_type;
	bcopy(&lle->ll_addr, LLADDR(sdl), ifp->if_addrlen);
	}
	ndpc.rtm.rtm_rmx.rmx_expire =
	lle->la_flags & LLE_STATIC ? 0 : lle->la_expire;
	ndpc.rtm.rtm_flags \|= RTF_LLINFO \| RTF_HOST;
	if (lle->la_flags & LLE_STATIC)
	ndpc.rtm.rtm_flags \|= RTF_STATIC;
	ndpc.rtm.rtm_index = ifp->if_index;
	error = SYSCTL_OUT(wr, &ndpc, sizeof(ndpc));
	if (error)
	break;
	}
	}
	return error;
	}

	void *
	in6_domifattach(struct ifnet *ifp)
	{
	struct in6_ifextra *ext;

	ext = (struct in6_ifextra )malloc(sizeof(ext), M_IFADDR, M_WAITOK);
	bzero(ext, sizeof(*ext));

	ext->in6_ifstat = (struct in6_ifstat *)malloc(sizeof(struct in6_ifstat),
	M_IFADDR, M_WAITOK);
	bzero(ext->in6_ifstat, sizeof(*ext->in6_ifstat));

	ext->icmp6_ifstat =
	(struct icmp6_ifstat *)malloc(sizeof(struct icmp6_ifstat),
	M_IFADDR, M_WAITOK);
	bzero(ext->icmp6_ifstat, sizeof(*ext->icmp6_ifstat));

	ext->nd_ifinfo = nd6_ifattach(ifp);
	ext->scope6_id = scope6_ifattach(ifp);
	ext->lltable = lltable_init(ifp, AF_INET6);
	if (ext->lltable != NULL) {
	ext->lltable->llt_new = in6_lltable_new;
	ext->lltable->llt_free = in6_lltable_free;
	ext->lltable->llt_rtcheck = in6_lltable_rtcheck;
	ext->lltable->llt_lookup = in6_lltable_lookup;
	ext->lltable->llt_dump = in6_lltable_dump;
	}
	return ext;
	}

	void
	in6_domifdetach(struct ifnet ifp, void aux)
	{
	struct in6_ifextra ext = (struct in6_ifextra )aux;

	scope6_ifdetach(ext->scope6_id);
	nd6_ifdetach(ext->nd_ifinfo);
	lltable_free(ext->lltable);
	free(ext->in6_ifstat, M_IFADDR);
	free(ext->icmp6_ifstat, M_IFADDR);
	free(ext, M_IFADDR);
	}

	/*
	* Convert sockaddr_in6 to sockaddr_in. Original sockaddr_in6 must be
	* v4 mapped addr or v4 compat addr
	*/
	void
	in6_sin6_2_sin(struct sockaddr_in sin, struct sockaddr_in6 sin6)
	{

	bzero(sin, sizeof(*sin));
	sin->sin_len = sizeof(struct sockaddr_in);
	sin->sin_family = AF_INET;
	sin->sin_port = sin6->sin6_port;
	sin->sin_addr.s_addr = sin6->sin6_addr.s6_addr32[3];
	}

	/* Convert sockaddr_in to sockaddr_in6 in v4 mapped addr format. */
	void
	in6_sin_2_v4mapsin6(struct sockaddr_in sin, struct sockaddr_in6 sin6)
	{
	bzero(sin6, sizeof(*sin6));
	sin6->sin6_len = sizeof(struct sockaddr_in6);
	sin6->sin6_family = AF_INET6;
	sin6->sin6_port = sin->sin_port;
	sin6->sin6_addr.s6_addr32[0] = 0;
	sin6->sin6_addr.s6_addr32[1] = 0;
	sin6->sin6_addr.s6_addr32[2] = IPV6_ADDR_INT32_SMP;
	sin6->sin6_addr.s6_addr32[3] = sin->sin_addr.s_addr;
	}

	/* Convert sockaddr_in6 into sockaddr_in. */
	void
	in6_sin6_2_sin_in_sock(struct sockaddr *nam)
	{
	struct sockaddr_in *sin_p;
	struct sockaddr_in6 sin6;

	/*
	* Save original sockaddr_in6 addr and convert it
	* to sockaddr_in.
	*/
	sin6 = (struct sockaddr_in6 )nam;
	sin_p = (struct sockaddr_in *)nam;
	in6_sin6_2_sin(sin_p, &sin6);
	}

	/* Convert sockaddr_in into sockaddr_in6 in v4 mapped addr format. */
	void
	in6_sin_2_v4mapsin6_in_sock(struct sockaddr **nam)
	{
	struct sockaddr_in *sin_p;
	struct sockaddr_in6 *sin6_p;

	sin6_p = malloc(sizeof *sin6_p, M_SONAME,
	M_WAITOK);
	sin_p = (struct sockaddr_in )nam;
	in6_sin_2_v4mapsin6(sin_p, sin6_p);
	free(*nam, M_SONAME);
	nam = (struct sockaddr )sin6_p;
	}
	Index: projects/arpv2_merge_1/sys/netinet6/in6_rmx.c
	===================================================================
	--- projects/arpv2_merge_1/sys/netinet6/in6_rmx.c (revision 185838)
	+++ projects/arpv2_merge_1/sys/netinet6/in6_rmx.c (revision 185839)
	@@ -1,504 +1,504 @@
	/*-
	* Copyright (C) 1995, 1996, 1997, and 1998 WIDE Project.
	* All rights reserved.
	*
	* Redistribution and use in source and binary forms, with or without
	* modification, are permitted provided that the following conditions
	* are met:
	* 1. Redistributions of source code must retain the above copyright
	* notice, this list of conditions and the following disclaimer.
	* 2. Redistributions in binary form must reproduce the above copyright
	* notice, this list of conditions and the following disclaimer in the
	* documentation and/or other materials provided with the distribution.
	* 3. Neither the name of the project nor the names of its contributors
	* may be used to endorse or promote products derived from this software
	* without specific prior written permission.
	*
	* THIS SOFTWARE IS PROVIDED BY THE PROJECT AND CONTRIBUTORS ``AS IS'' AND
	* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
	* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
	* ARE DISCLAIMED. IN NO EVENT SHALL THE PROJECT OR CONTRIBUTORS BE LIABLE
	* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
	* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
	* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
	* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
	* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
	* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
	* SUCH DAMAGE.
	*
	* $KAME: in6_rmx.c,v 1.11 2001/07/26 06:53:16 jinmei Exp $
	*/

	/*-
	* Copyright 1994, 1995 Massachusetts Institute of Technology
	*
	* Permission to use, copy, modify, and distribute this software and
	* its documentation for any purpose and without fee is hereby
	* granted, provided that both the above copyright notice and this
	* permission notice appear in all copies, that both the above
	* copyright notice and this permission notice appear in all
	* supporting documentation, and that the name of M.I.T. not be used
	* in advertising or publicity pertaining to distribution of the
	* software without specific, written prior permission. M.I.T. makes
	* no representations about the suitability of this software for any
	* purpose. It is provided "as is" without express or implied
	* warranty.
	*
	* THIS SOFTWARE IS PROVIDED BY M.I.T. ``AS IS''. M.I.T. DISCLAIMS
	* ALL EXPRESS OR IMPLIED WARRANTIES WITH REGARD TO THIS SOFTWARE,
	* INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
	* MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. IN NO EVENT
	* SHALL M.I.T. BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
	* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
	* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
	* USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
	* ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
	* OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
	* OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
	* SUCH DAMAGE.
	*
	*/

	/*
	* This code does two things necessary for the enhanced TCP metrics to
	* function in a useful manner:
	* 1) It marks all non-host routes as `cloning', thus ensuring that
	* every actual reference to such a route actually gets turned
	* into a reference to a host route to the specific destination
	* requested.
	* 2) When such routes lose all their references, it arranges for them
	* to be deleted in some random collection of circumstances, so that
	* a large quantity of stale routing data is not kept in kernel memory
	* indefinitely. See in6_rtqtimo() below for the exact mechanism.
	*/

	#include <sys/cdefs.h>
	__FBSDID("$FreeBSD$");

	#include <sys/param.h>
	#include <sys/systm.h>
	#include <sys/kernel.h>
	#include <sys/lock.h>
	#include <sys/sysctl.h>
	#include <sys/queue.h>
	#include <sys/socket.h>
	#include <sys/socketvar.h>
	#include <sys/mbuf.h>
	#include <sys/rwlock.h>
	#include <sys/syslog.h>
	#include <sys/callout.h>
	#include <sys/vimage.h>

	#include <net/if.h>
	#include <net/route.h>
	#include <net/vnet.h>

	#include <netinet/in.h>
	#include <netinet/ip_var.h>
	#include <netinet/in_var.h>

	#include <netinet/ip6.h>
	#include <netinet6/ip6_var.h>

	#include <netinet/icmp6.h>
	#include <netinet6/nd6.h>
	#include <netinet6/vinet6.h>

	#include <netinet/tcp.h>
	#include <netinet/tcp_seq.h>
	#include <netinet/tcp_timer.h>
	#include <netinet/tcp_var.h>

	extern int in6_inithead(void **head, int off);

	#define RTPRF_OURS RTF_PROTO3 /* set on routes we manage */

	/*
	* Do what we need to do when inserting a route.
	*/
	static struct radix_node *
	in6_addroute(void v_arg, void n_arg, struct radix_node_head *head,
	struct radix_node *treenodes)
	{
	struct rtentry rt = (struct rtentry )treenodes;
	struct sockaddr_in6 sin6 = (struct sockaddr_in6 )rt_key(rt);
	struct radix_node *ret;

	if (IN6_IS_ADDR_MULTICAST(&sin6->sin6_addr))
	rt->rt_flags \|= RTF_MULTICAST;

	/*
	* A little bit of help for both IPv6 output and input:
	* For local addresses, we make sure that RTF_LOCAL is set,
	* with the thought that this might one day be used to speed up
	* ip_input().
	*
	* We also mark routes to multicast addresses as such, because
	* it's easy to do and might be useful (but this is much more
	* dubious since it's so easy to inspect the address). (This
	* is done above.)
	*
	* XXX
	* should elaborate the code.
	*/
	if (rt->rt_flags & RTF_HOST) {
	if (IN6_ARE_ADDR_EQUAL(&satosin6(rt->rt_ifa->ifa_addr)
	->sin6_addr,
	&sin6->sin6_addr)) {
	rt->rt_flags \|= RTF_LOCAL;
	}
	}

	if (!rt->rt_rmx.rmx_mtu && rt->rt_ifp)
	rt->rt_rmx.rmx_mtu = IN6_LINKMTU(rt->rt_ifp);

	ret = rn_addroute(v_arg, n_arg, head, treenodes);
	if (ret == NULL && rt->rt_flags & RTF_HOST) {
	struct rtentry *rt2;
	/*
	* We are trying to add a host route, but can't.
	* Find out if it is because of an
	* ARP entry and delete it if so.
	*/
	rt2 = rtalloc1((struct sockaddr *)sin6, 0, RTF_CLONING);
	if (rt2) {
	if (rt2->rt_flags & RTF_LLINFO &&
	rt2->rt_flags & RTF_HOST &&
	rt2->rt_gateway &&
	rt2->rt_gateway->sa_family == AF_LINK) {
	rtexpunge(rt2);
	RTFREE_LOCKED(rt2);
	ret = rn_addroute(v_arg, n_arg, head,
	treenodes);
	} else
	RTFREE_LOCKED(rt2);
	}
	} else if (ret == NULL && rt->rt_flags & RTF_CLONING) {
	struct rtentry *rt2;
	/*
	* We are trying to add a net route, but can't.
	* The following case should be allowed, so we'll make a
	* special check for this:
	* Two IPv6 addresses with the same prefix is assigned
	* to a single interrface.
	* # ifconfig if0 inet6 3ffe:0501::1 prefix 64 alias (*1)
	* # ifconfig if0 inet6 3ffe:0501::2 prefix 64 alias (*2)
	* In this case, (1) and (2) want to add the same
	* net route entry, 3ffe:0501:: -> if0.
	* This case should not raise an error.
	*/
	rt2 = rtalloc1((struct sockaddr *)sin6, 0, RTF_CLONING);
	if (rt2) {
	if ((rt2->rt_flags & (RTF_CLONING\|RTF_HOST\|RTF_GATEWAY))
	== RTF_CLONING
	&& rt2->rt_gateway
	&& rt2->rt_gateway->sa_family == AF_LINK
	&& rt2->rt_ifp == rt->rt_ifp) {
	ret = rt2->rt_nodes;
	}
	RTFREE_LOCKED(rt2);
	}
	}
	return ret;
	}

	/*
	* This code is the inverse of in6_clsroute: on first reference, if we
	* were managing the route, stop doing so and set the expiration timer
	* back off again.
	*/
	static struct radix_node *
	in6_matroute(void v_arg, struct radix_node_head head)
	{
	struct radix_node *rn = rn_match(v_arg, head);
	struct rtentry rt = (struct rtentry )rn;

	if (rt && rt->rt_refcnt == 0) { /* this is first reference */
	if (rt->rt_flags & RTPRF_OURS) {
	rt->rt_flags &= ~RTPRF_OURS;
	rt->rt_rmx.rmx_expire = 0;
	}
	}
	return rn;
	}

	SYSCTL_DECL(_net_inet6_ip6);

	#ifdef VIMAGE_GLOBALS
	static int rtq_reallyold6;
	static int rtq_minreallyold6;
	static int rtq_toomany6;
	#endif

	SYSCTL_V_INT(V_NET, vnet_inet6, _net_inet6_ip6, IPV6CTL_RTEXPIRE,
	rtexpire, CTLFLAG_RW, rtq_reallyold6 , 0, "");

	SYSCTL_V_INT(V_NET, vnet_inet6, _net_inet6_ip6, IPV6CTL_RTMINEXPIRE,
	rtminexpire, CTLFLAG_RW, rtq_minreallyold6 , 0, "");

	SYSCTL_V_INT(V_NET, vnet_inet6, _net_inet6_ip6, IPV6CTL_RTMAXCACHE,
	rtmaxcache, CTLFLAG_RW, rtq_toomany6 , 0, "");


	/*
	* On last reference drop, mark the route as belong to us so that it can be
	* timed out.
	*/
	static void
	in6_clsroute(struct radix_node rn, struct radix_node_head head)
	{
	INIT_VNET_INET6(curvnet);
	struct rtentry rt = (struct rtentry )rn;

	RT_LOCK_ASSERT(rt);

	if (!(rt->rt_flags & RTF_UP))
	return; /* prophylactic measures */

	if ((rt->rt_flags & (RTF_LLINFO \| RTF_HOST)) != RTF_HOST)
	return;

	if ((rt->rt_flags & (RTF_WASCLONED \| RTPRF_OURS)) != RTF_WASCLONED)
	return;

	/*
	* As requested by David Greenman:
	* If rtq_reallyold6 is 0, just delete the route without
	* waiting for a timeout cycle to kill it.
	*/
	if (V_rtq_reallyold6 != 0) {
	rt->rt_flags \|= RTPRF_OURS;
	rt->rt_rmx.rmx_expire = time_uptime + V_rtq_reallyold6;
	} else {
	rtexpunge(rt);
	}
	}

	struct rtqk_arg {
	struct radix_node_head *rnh;
	int mode;
	int updating;
	int draining;
	int killed;
	int found;
	time_t nextstop;
	};

	/*
	* Get rid of old routes. When draining, this deletes everything, even when
	* the timeout is not expired yet. When updating, this makes sure that
	* nothing has a timeout longer than the current value of rtq_reallyold6.
	*/
	static int
	in6_rtqkill(struct radix_node rn, void rock)
	{
	INIT_VNET_INET6(curvnet);
	struct rtqk_arg *ap = rock;
	struct rtentry rt = (struct rtentry )rn;
	int err;

	if (rt->rt_flags & RTPRF_OURS) {
	ap->found++;

	if (ap->draining \|\| rt->rt_rmx.rmx_expire <= time_uptime) {
	if (rt->rt_refcnt > 0)
	panic("rtqkill route really not free");

	err = rtrequest(RTM_DELETE,
	(struct sockaddr *)rt_key(rt),
	rt->rt_gateway, rt_mask(rt),
	- rt->rt_flags, 0);
	+ rt->rt_flags\|RTF_RNH_LOCKED, 0);
	if (err) {
	log(LOG_WARNING, "in6_rtqkill: error %d", err);
	} else {
	ap->killed++;
	}
	} else {
	if (ap->updating
	&& (rt->rt_rmx.rmx_expire - time_uptime
	> V_rtq_reallyold6)) {
	rt->rt_rmx.rmx_expire = time_uptime
	+ V_rtq_reallyold6;
	}
	ap->nextstop = lmin(ap->nextstop,
	rt->rt_rmx.rmx_expire);
	}
	}

	return 0;
	}

	#define RTQ_TIMEOUT 6010 / run no less than once every ten minutes */
	#ifdef VIMAGE_GLOBALS
	static int rtq_timeout6;
	static struct callout rtq_timer6;
	#endif

	static void
	in6_rtqtimo(void *rock)
	{
	CURVNET_SET_QUIET((struct vnet *) rock);
	INIT_VNET_NET((struct vnet *) rock);
	INIT_VNET_INET6((struct vnet *) rock);
	struct radix_node_head *rnh = rock;
	struct rtqk_arg arg;
	struct timeval atv;
	static time_t last_adjusted_timeout = 0;

	arg.found = arg.killed = 0;
	arg.rnh = rnh;
	arg.nextstop = time_uptime + V_rtq_timeout6;
	arg.draining = arg.updating = 0;
	RADIX_NODE_HEAD_LOCK(rnh);
	rnh->rnh_walktree(rnh, in6_rtqkill, &arg);
	RADIX_NODE_HEAD_UNLOCK(rnh);

	/*
	* Attempt to be somewhat dynamic about this:
	* If there are ``too many'' routes sitting around taking up space,
	* then crank down the timeout, and see if we can't make some more
	* go away. However, we make sure that we will never adjust more
	* than once in rtq_timeout6 seconds, to keep from cranking down too
	* hard.
	*/
	if ((arg.found - arg.killed > V_rtq_toomany6)
	&& (time_uptime - last_adjusted_timeout >= V_rtq_timeout6)
	&& V_rtq_reallyold6 > V_rtq_minreallyold6) {
	V_rtq_reallyold6 = 2*V_rtq_reallyold6 / 3;
	if (V_rtq_reallyold6 < V_rtq_minreallyold6) {
	V_rtq_reallyold6 = V_rtq_minreallyold6;
	}

	last_adjusted_timeout = time_uptime;
	#ifdef DIAGNOSTIC
	log(LOG_DEBUG, "in6_rtqtimo: adjusted rtq_reallyold6 to %d",
	V_rtq_reallyold6);
	#endif
	arg.found = arg.killed = 0;
	arg.updating = 1;
	RADIX_NODE_HEAD_LOCK(rnh);
	rnh->rnh_walktree(rnh, in6_rtqkill, &arg);
	RADIX_NODE_HEAD_UNLOCK(rnh);
	}

	atv.tv_usec = 0;
	atv.tv_sec = arg.nextstop - time_uptime;
	callout_reset(&V_rtq_timer6, tvtohz(&atv), in6_rtqtimo, rock);
	CURVNET_RESTORE();
	}

	/*
	* Age old PMTUs.
	*/
	struct mtuex_arg {
	struct radix_node_head *rnh;
	time_t nextstop;
	};
	#ifdef VIMAGE_GLOBALS
	static struct callout rtq_mtutimer;
	#endif

	static int
	in6_mtuexpire(struct radix_node rn, void rock)
	{
	struct rtentry rt = (struct rtentry )rn;
	struct mtuex_arg *ap = rock;

	/* sanity */
	if (!rt)
	panic("rt == NULL in in6_mtuexpire");

	if (rt->rt_rmx.rmx_expire && !(rt->rt_flags & RTF_PROBEMTU)) {
	if (rt->rt_rmx.rmx_expire <= time_uptime) {
	rt->rt_flags \|= RTF_PROBEMTU;
	} else {
	ap->nextstop = lmin(ap->nextstop,
	rt->rt_rmx.rmx_expire);
	}
	}

	return 0;
	}

	#define MTUTIMO_DEFAULT (60*1)

	static void
	in6_mtutimo(void *rock)
	{
	CURVNET_SET_QUIET((struct vnet *) rock);
	INIT_VNET_NET((struct vnet *) rock);
	INIT_VNET_INET6((struct vnet *) rock);
	struct radix_node_head *rnh = rock;
	struct mtuex_arg arg;
	struct timeval atv;

	arg.rnh = rnh;
	arg.nextstop = time_uptime + MTUTIMO_DEFAULT;
	RADIX_NODE_HEAD_LOCK(rnh);
	rnh->rnh_walktree(rnh, in6_mtuexpire, &arg);
	RADIX_NODE_HEAD_UNLOCK(rnh);

	atv.tv_usec = 0;
	atv.tv_sec = arg.nextstop - time_uptime;
	if (atv.tv_sec < 0) {
	printf("invalid mtu expiration time on routing table\n");
	arg.nextstop = time_uptime + 30; /* last resort */
	atv.tv_sec = 30;
	}
	callout_reset(&V_rtq_mtutimer, tvtohz(&atv), in6_mtutimo, rock);
	CURVNET_RESTORE();
	}

	#if 0
	void
	in6_rtqdrain(void)
	{
	INIT_VNET_NET(curvnet);
	struct radix_node_head *rnh = V_rt_tables[AF_INET6];
	struct rtqk_arg arg;

	arg.found = arg.killed = 0;
	arg.rnh = rnh;
	arg.nextstop = 0;
	arg.draining = 1;
	arg.updating = 0;
	RADIX_NODE_HEAD_LOCK(rnh);
	rnh->rnh_walktree(rnh, in6_rtqkill, &arg);
	RADIX_NODE_HEAD_UNLOCK(rnh);
	}
	#endif

	/*
	* Initialize our routing tree.
	* XXX MRT When off == 0, we are being called from vfs_export.c
	* so just set up their table and leave. (we know what the correct
	* value should be so just use that).. FIX AFTER RELENG_7 is MFC'd
	* see also comments in in_inithead() vfs_export.c and domain.h
	*/
	int
	in6_inithead(void **head, int off)
	{
	INIT_VNET_INET6(curvnet);
	struct radix_node_head *rnh;

	if (!rn_inithead(head, offsetof(struct sockaddr_in6, sin6_addr) << 3))
	return 0; /* See above */

	if (off == 0) /* See above */
	return 1; /* only do the rest for the real thing */

	V_rtq_reallyold6 = 6060; / one hour is ``really old'' */
	V_rtq_minreallyold6 = 10; /* never automatically crank down to less */
	V_rtq_toomany6 = 128; /* 128 cached routes is ``too many'' */
	V_rtq_timeout6 = RTQ_TIMEOUT;

	rnh = *head;
	rnh->rnh_addaddr = in6_addroute;
	rnh->rnh_matchaddr = in6_matroute;
	rnh->rnh_close = in6_clsroute;
	callout_init(&V_rtq_timer6, CALLOUT_MPSAFE);
	in6_rtqtimo(rnh); /* kick off timeout first time */
	callout_init(&V_rtq_mtutimer, CALLOUT_MPSAFE);
	in6_mtutimo(rnh); /* kick off timeout first time */
	return 1;
	}
	Index: projects/arpv2_merge_1/sys/netinet6/ip6_input.c
	===================================================================
	--- projects/arpv2_merge_1/sys/netinet6/ip6_input.c (revision 185838)
	+++ projects/arpv2_merge_1/sys/netinet6/ip6_input.c (revision 185839)
	@@ -1,1695 +1,1696 @@
	/*-
	* Copyright (C) 1995, 1996, 1997, and 1998 WIDE Project.
	* All rights reserved.
	*
	* Redistribution and use in source and binary forms, with or without
	* modification, are permitted provided that the following conditions
	* are met:
	* 1. Redistributions of source code must retain the above copyright
	* notice, this list of conditions and the following disclaimer.
	* 2. Redistributions in binary form must reproduce the above copyright
	* notice, this list of conditions and the following disclaimer in the
	* documentation and/or other materials provided with the distribution.
	* 3. Neither the name of the project nor the names of its contributors
	* may be used to endorse or promote products derived from this software
	* without specific prior written permission.
	*
	* THIS SOFTWARE IS PROVIDED BY THE PROJECT AND CONTRIBUTORS ``AS IS'' AND
	* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
	* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
	* ARE DISCLAIMED. IN NO EVENT SHALL THE PROJECT OR CONTRIBUTORS BE LIABLE
	* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
	* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
	* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
	* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
	* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
	* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
	* SUCH DAMAGE.
	*
	* $KAME: ip6_input.c,v 1.259 2002/01/21 04:58:09 jinmei Exp $
	*/

	/*-
	* Copyright (c) 1982, 1986, 1988, 1993
	* The Regents of the University of California. All rights reserved.
	*
	* Redistribution and use in source and binary forms, with or without
	* modification, are permitted provided that the following conditions
	* are met:
	* 1. Redistributions of source code must retain the above copyright
	* notice, this list of conditions and the following disclaimer.
	* 2. Redistributions in binary form must reproduce the above copyright
	* notice, this list of conditions and the following disclaimer in the
	* documentation and/or other materials provided with the distribution.
	* 4. Neither the name of the University nor the names of its contributors
	* may be used to endorse or promote products derived from this software
	* without specific prior written permission.
	*
	* THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
	* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
	* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
	* ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
	* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
	* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
	* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
	* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
	* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
	* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
	* SUCH DAMAGE.
	*
	* @(#)ip_input.c 8.2 (Berkeley) 1/4/94
	*/

	#include <sys/cdefs.h>
	__FBSDID("$FreeBSD$");

	#include "opt_inet.h"
	#include "opt_inet6.h"
	#include "opt_ipsec.h"

	#include <sys/param.h>
	#include <sys/systm.h>
	#include <sys/malloc.h>
	#include <sys/mbuf.h>
	#include <sys/proc.h>
	#include <sys/domain.h>
	#include <sys/protosw.h>
	#include <sys/socket.h>
	#include <sys/socketvar.h>
	#include <sys/errno.h>
	#include <sys/time.h>
	#include <sys/kernel.h>
	#include <sys/syslog.h>
	#include <sys/vimage.h>

	#include <net/if.h>
	#include <net/if_types.h>
	#include <net/if_dl.h>
	#include <net/route.h>
	#include <net/netisr.h>
	#include <net/pfil.h>
	#include <net/vnet.h>

	#include <netinet/in.h>
	#include <netinet/in_systm.h>
	#include <net/if_llatbl.h>
	#ifdef INET
	#include <netinet/ip.h>
	#include <netinet/ip_icmp.h>
	#include <netinet/vinet.h>
	#endif /* INET */
	#include <netinet/ip6.h>
	#include <netinet6/in6_var.h>
	#include <netinet6/ip6_var.h>
	#include <netinet/in_pcb.h>
	#include <netinet/icmp6.h>
	#include <netinet6/scope6_var.h>
	#include <netinet6/in6_ifattach.h>
	#include <netinet6/nd6.h>
	#include <netinet6/vinet6.h>

	#ifdef IPSEC
	#include <netipsec/ipsec.h>
	#include <netinet6/ip6_ipsec.h>
	#include <netipsec/ipsec6.h>
	#endif /* IPSEC */

	#include <netinet6/ip6protosw.h>

	extern struct domain inet6domain;

	u_char ip6_protox[IPPROTO_MAX];
	static struct ifqueue ip6intrq;

	#ifdef VIMAGE_GLOBALS
	static int ip6qmaxlen;
	struct in6_ifaddr *in6_ifaddr;
	struct ip6stat ip6stat;
	#endif

	extern struct callout in6_tmpaddrtimer_ch;

	extern int dad_init;
	extern int pmtu_expire;
	extern int pmtu_probe;
	extern u_long rip6_sendspace;
	extern u_long rip6_recvspace;
	extern int icmp6errppslim;
	extern int icmp6_nodeinfo;
	extern int udp6_sendspace;
	extern int udp6_recvspace;

	#ifdef VIMAGE_GLOBALS
	int ip6_forward_srcrt; /* XXX */
	int ip6_sourcecheck; /* XXX */
	int ip6_sourcecheck_interval; /* XXX */
	int ip6_ours_check_algorithm;
	#endif

	struct pfil_head inet6_pfil_hook;

	static void ip6_init2(void *);
	static struct ip6aux ip6_setdstifaddr(struct mbuf , struct in6_ifaddr *);
	static int ip6_hopopts_input(u_int32_t , u_int32_t , struct mbuf *, int );
	#ifdef PULLDOWN_TEST
	static struct mbuf ip6_pullexthdr(struct mbuf , size_t, int);
	#endif

	/*
	* IP6 initialization: fill in IP6 protocol switch table.
	* All protocols not implemented in kernel go to raw IP6 protocol handler.
	*/
	void
	ip6_init(void)
	{
	INIT_VNET_INET6(curvnet);
	struct ip6protosw *pr;
	int i;

	V_ip6qmaxlen = IFQ_MAXLEN;
	V_in6_maxmtu = 0;
	#ifdef IP6_AUTO_LINKLOCAL
	V_ip6_auto_linklocal = IP6_AUTO_LINKLOCAL;
	#else
	V_ip6_auto_linklocal = 1; /* enable by default */
	#endif

	#ifndef IPV6FORWARDING
	#ifdef GATEWAY6
	#define IPV6FORWARDING 1 /* forward IP6 packets not for us */
	#else
	#define IPV6FORWARDING 0 /* don't forward IP6 packets not for us */
	#endif /* GATEWAY6 */
	#endif /* !IPV6FORWARDING */

	#ifndef IPV6_SENDREDIRECTS
	#define IPV6_SENDREDIRECTS 1
	#endif

	V_ip6_forwarding = IPV6FORWARDING; /* act as router? */
	V_ip6_sendredirects = IPV6_SENDREDIRECTS;
	V_ip6_defhlim = IPV6_DEFHLIM;
	V_ip6_defmcasthlim = IPV6_DEFAULT_MULTICAST_HOPS;
	V_ip6_accept_rtadv = 0; /* "IPV6FORWARDING ? 0 : 1" is dangerous */
	V_ip6_log_interval = 5;
	V_ip6_hdrnestlimit = 15; /* How many header options will we process? */
	V_ip6_dad_count = 1; /* DupAddrDetectionTransmits */
	V_ip6_auto_flowlabel = 1;
	V_ip6_use_deprecated = 1;/* allow deprecated addr (RFC2462 5.5.4) */
	V_ip6_rr_prune = 5; /* router renumbering prefix
	* walk list every 5 sec. */
	V_ip6_mcast_pmtu = 0; /* enable pMTU discovery for multicast? */
	V_ip6_v6only = 1;
	V_ip6_keepfaith = 0;
	V_ip6_log_time = (time_t)0L;
	#ifdef IPSTEALTH
	V_ip6stealth = 0;
	#endif
	V_nd6_onlink_ns_rfc4861 = 0; /* allow 'on-link' nd6 NS (RFC 4861) */

	V_pmtu_expire = 60*10;
	V_pmtu_probe = 60*2;

	/* raw IP6 parameters */
	/*
	* Nominal space allocated to a raw ip socket.
	*/
	#define RIPV6SNDQ 8192
	#define RIPV6RCVQ 8192
	V_rip6_sendspace = RIPV6SNDQ;
	V_rip6_recvspace = RIPV6RCVQ;

	/* ICMPV6 parameters */
	V_icmp6_rediraccept = 1; /* accept and process redirects */
	V_icmp6_redirtimeout = 10 * 60; /* 10 minutes */
	V_icmp6errppslim = 100; /* 100pps */
	/* control how to respond to NI queries */
	V_icmp6_nodeinfo = (ICMP6_NODEINFO_FQDNOK\|ICMP6_NODEINFO_NODEADDROK);

	/* UDP on IP6 parameters */
	V_udp6_sendspace = 9216; /* really max datagram size */
	V_udp6_recvspace = 40 * (1024 + sizeof(struct sockaddr_in6));
	/* 40 1K datagrams */
	V_dad_init = 0;

	#ifdef DIAGNOSTIC
	if (sizeof(struct protosw) != sizeof(struct ip6protosw))
	panic("sizeof(protosw) != sizeof(ip6protosw)");
	#endif
	pr = (struct ip6protosw *)pffindproto(PF_INET6, IPPROTO_RAW, SOCK_RAW);
	if (pr == 0)
	panic("ip6_init");

	/* Initialize the entire ip_protox[] array to IPPROTO_RAW. */
	for (i = 0; i < IPPROTO_MAX; i++)
	ip6_protox[i] = pr - inet6sw;
	/*
	* Cycle through IP protocols and put them into the appropriate place
	* in ip6_protox[].
	*/
	for (pr = (struct ip6protosw *)inet6domain.dom_protosw;
	pr < (struct ip6protosw *)inet6domain.dom_protoswNPROTOSW; pr++)
	if (pr->pr_domain->dom_family == PF_INET6 &&
	pr->pr_protocol && pr->pr_protocol != IPPROTO_RAW) {
	/* Be careful to only index valid IP protocols. */
	if (pr->pr_protocol < IPPROTO_MAX)
	ip6_protox[pr->pr_protocol] = pr - inet6sw;
	}

	/* Initialize packet filter hooks. */
	inet6_pfil_hook.ph_type = PFIL_TYPE_AF;
	inet6_pfil_hook.ph_af = AF_INET6;
	if ((i = pfil_head_register(&inet6_pfil_hook)) != 0)
	printf("%s: WARNING: unable to register pfil hook, "
	"error %d\n", __func__, i);

	ip6intrq.ifq_maxlen = V_ip6qmaxlen;
	mtx_init(&ip6intrq.ifq_mtx, "ip6_inq", NULL, MTX_DEF);
	netisr_register(NETISR_IPV6, ip6_input, &ip6intrq, 0);
	scope6_init();
	addrsel_policy_init();
	nd6_init();
	frag6_init();
	V_ip6_desync_factor = arc4random() % MAX_TEMP_DESYNC_FACTOR;
	}

	static void
	ip6_init2(void *dummy)
	{
	INIT_VNET_INET6(curvnet);

	/* nd6_timer_init */
	callout_init(&V_nd6_timer_ch, 0);
	callout_reset(&V_nd6_timer_ch, hz, nd6_timer, NULL);

	/* timer for regeneranation of temporary addresses randomize ID */
	callout_init(&V_in6_tmpaddrtimer_ch, 0);
	callout_reset(&V_in6_tmpaddrtimer_ch,
	(V_ip6_temp_preferred_lifetime - V_ip6_desync_factor -
	V_ip6_temp_regen_advance) * hz,
	in6_tmpaddrtimer, NULL);
	}

	/* cheat */
	/* This must be after route_init(), which is now SI_ORDER_THIRD */
	SYSINIT(netinet6init2, SI_SUB_PROTO_DOMAIN, SI_ORDER_MIDDLE, ip6_init2, NULL);

	extern struct route_in6 ip6_forward_rt;

	void
	ip6_input(struct mbuf *m)
	{
	INIT_VNET_NET(curvnet);
	INIT_VNET_INET6(curvnet);
	struct ip6_hdr *ip6;
	int off = sizeof(struct ip6_hdr), nest;
	u_int32_t plen;
	u_int32_t rtalert = ~0;
	int nxt, ours = 0;
	struct ifnet deliverifp = NULL, ifp = NULL;
	struct in6_addr odst;
	int srcrt = 0;
	struct llentry *lle = NULL;
	struct sockaddr_in6 dst6;

	#ifdef IPSEC
	/*
	* should the inner packet be considered authentic?
	* see comment in ah4_input().
	* NB: m cannot be NULL when passed to the input routine
	*/

	m->m_flags &= ~M_AUTHIPHDR;
	m->m_flags &= ~M_AUTHIPDGM;

	#endif /* IPSEC */

	/*
	* make sure we don't have onion peering information into m_tag.
	*/
	ip6_delaux(m);

	/*
	* mbuf statistics
	*/
	if (m->m_flags & M_EXT) {
	if (m->m_next)
	V_ip6stat.ip6s_mext2m++;
	else
	V_ip6stat.ip6s_mext1++;
	} else {
	#define M2MMAX (sizeof(V_ip6stat.ip6s_m2m)/sizeof(V_ip6stat.ip6s_m2m[0]))
	if (m->m_next) {
	if (m->m_flags & M_LOOP) {
	V_ip6stat.ip6s_m2m[V_loif[0].if_index]++; /* XXX */
	} else if (m->m_pkthdr.rcvif->if_index < M2MMAX)
	V_ip6stat.ip6s_m2m[m->m_pkthdr.rcvif->if_index]++;
	else
	V_ip6stat.ip6s_m2m[0]++;
	} else
	V_ip6stat.ip6s_m1++;
	#undef M2MMAX
	}

	/* drop the packet if IPv6 operation is disabled on the IF */
	if ((ND_IFINFO(m->m_pkthdr.rcvif)->flags & ND6_IFF_IFDISABLED)) {
	m_freem(m);
	return;
	}

	in6_ifstat_inc(m->m_pkthdr.rcvif, ifs6_in_receive);
	V_ip6stat.ip6s_total++;

	#ifndef PULLDOWN_TEST
	/*
	* L2 bridge code and some other code can return mbuf chain
	* that does not conform to KAME requirement. too bad.
	* XXX: fails to join if interface MTU > MCLBYTES. jumbogram?
	*/
	if (m && m->m_next != NULL && m->m_pkthdr.len < MCLBYTES) {
	struct mbuf *n;

	MGETHDR(n, M_DONTWAIT, MT_HEADER);
	if (n)
	M_MOVE_PKTHDR(n, m);
	if (n && n->m_pkthdr.len > MHLEN) {
	MCLGET(n, M_DONTWAIT);
	if ((n->m_flags & M_EXT) == 0) {
	m_freem(n);
	n = NULL;
	}
	}
	if (n == NULL) {
	m_freem(m);
	return; /* ENOBUFS */
	}

	m_copydata(m, 0, n->m_pkthdr.len, mtod(n, caddr_t));
	n->m_len = n->m_pkthdr.len;
	m_freem(m);
	m = n;
	}
	IP6_EXTHDR_CHECK(m, 0, sizeof(struct ip6_hdr), /* nothing */);
	#endif

	if (m->m_len < sizeof(struct ip6_hdr)) {
	struct ifnet *inifp;
	inifp = m->m_pkthdr.rcvif;
	if ((m = m_pullup(m, sizeof(struct ip6_hdr))) == NULL) {
	V_ip6stat.ip6s_toosmall++;
	in6_ifstat_inc(inifp, ifs6_in_hdrerr);
	return;
	}
	}

	ip6 = mtod(m, struct ip6_hdr *);

	if ((ip6->ip6_vfc & IPV6_VERSION_MASK) != IPV6_VERSION) {
	V_ip6stat.ip6s_badvers++;
	in6_ifstat_inc(m->m_pkthdr.rcvif, ifs6_in_hdrerr);
	goto bad;
	}

	V_ip6stat.ip6s_nxthist[ip6->ip6_nxt]++;

	/*
	* Check against address spoofing/corruption.
	*/
	if (IN6_IS_ADDR_MULTICAST(&ip6->ip6_src) \|\|
	IN6_IS_ADDR_UNSPECIFIED(&ip6->ip6_dst)) {
	/*
	* XXX: "badscope" is not very suitable for a multicast source.
	*/
	V_ip6stat.ip6s_badscope++;
	in6_ifstat_inc(m->m_pkthdr.rcvif, ifs6_in_addrerr);
	goto bad;
	}
	if (IN6_IS_ADDR_MC_INTFACELOCAL(&ip6->ip6_dst) &&
	!(m->m_flags & M_LOOP)) {
	/*
	* In this case, the packet should come from the loopback
	* interface. However, we cannot just check the if_flags,
	* because ip6_mloopback() passes the "actual" interface
	* as the outgoing/incoming interface.
	*/
	V_ip6stat.ip6s_badscope++;
	in6_ifstat_inc(m->m_pkthdr.rcvif, ifs6_in_addrerr);
	goto bad;
	}

	#ifdef ALTQ
	if (altq_input != NULL && (*altq_input)(m, AF_INET6) == 0) {
	/* packet is dropped by traffic conditioner */
	return;
	}
	#endif
	/*
	* The following check is not documented in specs. A malicious
	* party may be able to use IPv4 mapped addr to confuse tcp/udp stack
	* and bypass security checks (act as if it was from 127.0.0.1 by using
	* IPv6 src ::ffff:127.0.0.1). Be cautious.
	*
	* This check chokes if we are in an SIIT cloud. As none of BSDs
	* support IPv4-less kernel compilation, we cannot support SIIT
	* environment at all. So, it makes more sense for us to reject any
	* malicious packets for non-SIIT environment, than try to do a
	* partial support for SIIT environment.
	*/
	if (IN6_IS_ADDR_V4MAPPED(&ip6->ip6_src) \|\|
	IN6_IS_ADDR_V4MAPPED(&ip6->ip6_dst)) {
	V_ip6stat.ip6s_badscope++;
	in6_ifstat_inc(m->m_pkthdr.rcvif, ifs6_in_addrerr);
	goto bad;
	}
	#if 0
	/*
	* Reject packets with IPv4 compatible addresses (auto tunnel).
	*
	* The code forbids auto tunnel relay case in RFC1933 (the check is
	* stronger than RFC1933). We may want to re-enable it if mech-xx
	* is revised to forbid relaying case.
	*/
	if (IN6_IS_ADDR_V4COMPAT(&ip6->ip6_src) \|\|
	IN6_IS_ADDR_V4COMPAT(&ip6->ip6_dst)) {
	V_ip6stat.ip6s_badscope++;
	in6_ifstat_inc(m->m_pkthdr.rcvif, ifs6_in_addrerr);
	goto bad;
	}
	#endif

	/*
	* Run through list of hooks for input packets.
	*
	* NB: Beware of the destination address changing
	* (e.g. by NAT rewriting). When this happens,
	* tell ip6_forward to do the right thing.
	*/
	odst = ip6->ip6_dst;

	/* Jump over all PFIL processing if hooks are not active. */
	if (!PFIL_HOOKED(&inet6_pfil_hook))
	goto passin;

	if (pfil_run_hooks(&inet6_pfil_hook, &m, m->m_pkthdr.rcvif, PFIL_IN, NULL))
	return;
	if (m == NULL) /* consumed by filter */
	return;
	ip6 = mtod(m, struct ip6_hdr *);
	srcrt = !IN6_ARE_ADDR_EQUAL(&odst, &ip6->ip6_dst);

	passin:
	/*
	* Disambiguate address scope zones (if there is ambiguity).
	* We first make sure that the original source or destination address
	* is not in our internal form for scoped addresses. Such addresses
	* are not necessarily invalid spec-wise, but we cannot accept them due
	* to the usage conflict.
	* in6_setscope() then also checks and rejects the cases where src or
	* dst are the loopback address and the receiving interface
	* is not loopback.
	*/
	if (in6_clearscope(&ip6->ip6_src) \|\| in6_clearscope(&ip6->ip6_dst)) {
	V_ip6stat.ip6s_badscope++; /* XXX */
	goto bad;
	}
	if (in6_setscope(&ip6->ip6_src, m->m_pkthdr.rcvif, NULL) \|\|
	in6_setscope(&ip6->ip6_dst, m->m_pkthdr.rcvif, NULL)) {
	V_ip6stat.ip6s_badscope++;
	goto bad;
	}

	/*
	* Multicast check
	*/
	if (IN6_IS_ADDR_MULTICAST(&ip6->ip6_dst)) {
	struct in6_multi *in6m = 0;

	in6_ifstat_inc(m->m_pkthdr.rcvif, ifs6_in_mcast);
	/*
	* See if we belong to the destination multicast group on the
	* arrival interface.
	*/
	IN6_LOOKUP_MULTI(ip6->ip6_dst, m->m_pkthdr.rcvif, in6m);
	if (in6m)
	ours = 1;
	else if (!ip6_mrouter) {
	V_ip6stat.ip6s_notmember++;
	V_ip6stat.ip6s_cantforward++;
	in6_ifstat_inc(m->m_pkthdr.rcvif, ifs6_in_discard);
	goto bad;
	}
	deliverifp = m->m_pkthdr.rcvif;
	goto hbhcheck;
	}

	/*
	* Unicast check
	*/

	bzero(&dst6, sizeof(dst6));
	dst6.sin6_family = AF_INET6;
	dst6.sin6_len = sizeof(struct sockaddr_in6);
	dst6.sin6_addr = ip6->ip6_dst;
	ifp = m->m_pkthdr.rcvif;
	IF_AFDATA_LOCK(ifp);
	lle = lla_lookup(LLTABLE6(ifp), 0,
	(struct sockaddr *)&dst6);
	+ IF_AFDATA_UNLOCK(ifp);
	if ((lle != NULL) && (lle->la_flags & LLE_IFADDR)) {
	ours = 1;
	deliverifp = ifp;
	- IF_AFDATA_UNLOCK(ifp);
	+ LLE_RUNLOCK(lle);
	goto hbhcheck;
	}
	- IF_AFDATA_UNLOCK(ifp);
	+ LLE_RUNLOCK(lle);

	if (ip6_forward_rt.ro_rt != NULL &&
	(ip6_forward_rt.ro_rt->rt_flags & RTF_UP) != 0 &&
	IN6_ARE_ADDR_EQUAL(&ip6->ip6_dst,
	&((struct sockaddr_in6 *)(&V_ip6_forward_rt.ro_dst))->sin6_addr))
	V_ip6stat.ip6s_forward_cachehit++;
	else {
	struct sockaddr_in6 *dst6;

	if (V_ip6_forward_rt.ro_rt) {
	/* route is down or destination is different */
	V_ip6stat.ip6s_forward_cachemiss++;
	RTFREE(V_ip6_forward_rt.ro_rt);
	V_ip6_forward_rt.ro_rt = 0;
	}

	bzero(&V_ip6_forward_rt.ro_dst, sizeof(struct sockaddr_in6));
	dst6 = (struct sockaddr_in6 *)&V_ip6_forward_rt.ro_dst;
	dst6->sin6_len = sizeof(struct sockaddr_in6);
	dst6->sin6_family = AF_INET6;
	dst6->sin6_addr = ip6->ip6_dst;

	rtalloc((struct route *)&V_ip6_forward_rt);
	}

	#define rt6_key(r) ((struct sockaddr_in6 *)((r)->rt_nodes->rn_key))

	/*
	* Accept the packet if the forwarding interface to the destination
	* according to the routing table is the loopback interface,
	* unless the associated route has a gateway.
	* Note that this approach causes to accept a packet if there is a
	* route to the loopback interface for the destination of the packet.
	* But we think it's even useful in some situations, e.g. when using
	* a special daemon which wants to intercept the packet.
	*
	* XXX: some OSes automatically make a cloned route for the destination
	* of an outgoing packet. If the outgoing interface of the packet
	* is a loopback one, the kernel would consider the packet to be
	* accepted, even if we have no such address assinged on the interface.
	* We check the cloned flag of the route entry to reject such cases,
	* assuming that route entries for our own addresses are not made by
	* cloning (it should be true because in6_addloop explicitly installs
	* the host route). However, we might have to do an explicit check
	* while it would be less efficient. Or, should we rather install a
	* reject route for such a case?
	*/
	if (V_ip6_forward_rt.ro_rt &&
	(V_ip6_forward_rt.ro_rt->rt_flags &
	(RTF_HOST\|RTF_GATEWAY)) == RTF_HOST &&
	#ifdef RTF_WASCLONED
	!(V_ip6_forward_rt.ro_rt->rt_flags & RTF_WASCLONED) &&
	#endif
	#ifdef RTF_CLONED
	!(V_ip6_forward_rt.ro_rt->rt_flags & RTF_CLONED) &&
	#endif
	#if 0
	/*
	* The check below is redundant since the comparison of
	* the destination and the key of the rtentry has
	* already done through looking up the routing table.
	*/
	IN6_ARE_ADDR_EQUAL(&ip6->ip6_dst,
	&rt6_key(V_ip6_forward_rt.ro_rt)->sin6_addr)
	#endif
	V_ip6_forward_rt.ro_rt->rt_ifp->if_type == IFT_LOOP) {
	struct in6_ifaddr *ia6 =
	(struct in6_ifaddr *)V_ip6_forward_rt.ro_rt->rt_ifa;

	/*
	* record address information into m_tag.
	*/
	(void)ip6_setdstifaddr(m, ia6);

	/*
	* packets to a tentative, duplicated, or somehow invalid
	* address must not be accepted.
	*/
	if (!(ia6->ia6_flags & IN6_IFF_NOTREADY)) {
	/* this address is ready */
	ours = 1;
	deliverifp = ia6->ia_ifp; /* correct? */
	/* Count the packet in the ip address stats */
	ia6->ia_ifa.if_ipackets++;
	ia6->ia_ifa.if_ibytes += m->m_pkthdr.len;
	goto hbhcheck;
	} else {
	char ip6bufs[INET6_ADDRSTRLEN];
	char ip6bufd[INET6_ADDRSTRLEN];
	/* address is not ready, so discard the packet. */
	nd6log((LOG_INFO,
	"ip6_input: packet to an unready address %s->%s\n",
	ip6_sprintf(ip6bufs, &ip6->ip6_src),
	ip6_sprintf(ip6bufd, &ip6->ip6_dst)));

	goto bad;
	}
	}

	/*
	* FAITH (Firewall Aided Internet Translator)
	*/
	if (V_ip6_keepfaith) {
	if (V_ip6_forward_rt.ro_rt && V_ip6_forward_rt.ro_rt->rt_ifp
	&& V_ip6_forward_rt.ro_rt->rt_ifp->if_type == IFT_FAITH) {
	/* XXX do we need more sanity checks? */
	ours = 1;
	deliverifp = V_ip6_forward_rt.ro_rt->rt_ifp; /* faith */
	goto hbhcheck;
	}
	}

	/*
	* Now there is no reason to process the packet if it's not our own
	* and we're not a router.
	*/
	if (!V_ip6_forwarding) {
	V_ip6stat.ip6s_cantforward++;
	in6_ifstat_inc(m->m_pkthdr.rcvif, ifs6_in_discard);
	goto bad;
	}

	hbhcheck:
	/*
	* record address information into m_tag, if we don't have one yet.
	* note that we are unable to record it, if the address is not listed
	* as our interface address (e.g. multicast addresses, addresses
	* within FAITH prefixes and such).
	*/
	if (deliverifp && !ip6_getdstifaddr(m)) {
	struct in6_ifaddr *ia6;

	ia6 = in6_ifawithifp(deliverifp, &ip6->ip6_dst);
	if (ia6) {
	if (!ip6_setdstifaddr(m, ia6)) {
	/*
	* XXX maybe we should drop the packet here,
	* as we could not provide enough information
	* to the upper layers.
	*/
	}
	}
	}

	/*
	* Process Hop-by-Hop options header if it's contained.
	* m may be modified in ip6_hopopts_input().
	* If a JumboPayload option is included, plen will also be modified.
	*/
	plen = (u_int32_t)ntohs(ip6->ip6_plen);
	if (ip6->ip6_nxt == IPPROTO_HOPOPTS) {
	struct ip6_hbh *hbh;

	if (ip6_hopopts_input(&plen, &rtalert, &m, &off)) {
	#if 0 /touches NULL pointer/
	in6_ifstat_inc(m->m_pkthdr.rcvif, ifs6_in_discard);
	#endif
	return; /* m have already been freed */
	}

	/* adjust pointer */
	ip6 = mtod(m, struct ip6_hdr *);

	/*
	* if the payload length field is 0 and the next header field
	* indicates Hop-by-Hop Options header, then a Jumbo Payload
	* option MUST be included.
	*/
	if (ip6->ip6_plen == 0 && plen == 0) {
	/*
	* Note that if a valid jumbo payload option is
	* contained, ip6_hopopts_input() must set a valid
	* (non-zero) payload length to the variable plen.
	*/
	V_ip6stat.ip6s_badoptions++;
	in6_ifstat_inc(m->m_pkthdr.rcvif, ifs6_in_discard);
	in6_ifstat_inc(m->m_pkthdr.rcvif, ifs6_in_hdrerr);
	icmp6_error(m, ICMP6_PARAM_PROB,
	ICMP6_PARAMPROB_HEADER,
	(caddr_t)&ip6->ip6_plen - (caddr_t)ip6);
	return;
	}
	#ifndef PULLDOWN_TEST
	/* ip6_hopopts_input() ensures that mbuf is contiguous */
	hbh = (struct ip6_hbh *)(ip6 + 1);
	#else
	IP6_EXTHDR_GET(hbh, struct ip6_hbh *, m, sizeof(struct ip6_hdr),
	sizeof(struct ip6_hbh));
	if (hbh == NULL) {
	V_ip6stat.ip6s_tooshort++;
	return;
	}
	#endif
	nxt = hbh->ip6h_nxt;

	/*
	* If we are acting as a router and the packet contains a
	* router alert option, see if we know the option value.
	* Currently, we only support the option value for MLD, in which
	* case we should pass the packet to the multicast routing
	* daemon.
	*/
	if (rtalert != ~0 && V_ip6_forwarding) {
	switch (rtalert) {
	case IP6OPT_RTALERT_MLD:
	ours = 1;
	break;
	default:
	/*
	* RFC2711 requires unrecognized values must be
	* silently ignored.
	*/
	break;
	}
	}
	} else
	nxt = ip6->ip6_nxt;

	/*
	* Check that the amount of data in the buffers
	* is as at least much as the IPv6 header would have us expect.
	* Trim mbufs if longer than we expect.
	* Drop packet if shorter than we expect.
	*/
	if (m->m_pkthdr.len - sizeof(struct ip6_hdr) < plen) {
	V_ip6stat.ip6s_tooshort++;
	in6_ifstat_inc(m->m_pkthdr.rcvif, ifs6_in_truncated);
	goto bad;
	}
	if (m->m_pkthdr.len > sizeof(struct ip6_hdr) + plen) {
	if (m->m_len == m->m_pkthdr.len) {
	m->m_len = sizeof(struct ip6_hdr) + plen;
	m->m_pkthdr.len = sizeof(struct ip6_hdr) + plen;
	} else
	m_adj(m, sizeof(struct ip6_hdr) + plen - m->m_pkthdr.len);
	}

	/*
	* Forward if desirable.
	*/
	if (IN6_IS_ADDR_MULTICAST(&ip6->ip6_dst)) {
	/*
	* If we are acting as a multicast router, all
	* incoming multicast packets are passed to the
	* kernel-level multicast forwarding function.
	* The packet is returned (relatively) intact; if
	* ip6_mforward() returns a non-zero value, the packet
	* must be discarded, else it may be accepted below.
	*/
	if (ip6_mrouter && ip6_mforward &&
	ip6_mforward(ip6, m->m_pkthdr.rcvif, m)) {
	V_ip6stat.ip6s_cantforward++;
	m_freem(m);
	return;
	}
	if (!ours) {
	m_freem(m);
	return;
	}
	} else if (!ours) {
	ip6_forward(m, srcrt);
	return;
	}

	ip6 = mtod(m, struct ip6_hdr *);

	/*
	* Malicious party may be able to use IPv4 mapped addr to confuse
	* tcp/udp stack and bypass security checks (act as if it was from
	* 127.0.0.1 by using IPv6 src ::ffff:127.0.0.1). Be cautious.
	*
	* For SIIT end node behavior, you may want to disable the check.
	* However, you will become vulnerable to attacks using IPv4 mapped
	* source.
	*/
	if (IN6_IS_ADDR_V4MAPPED(&ip6->ip6_src) \|\|
	IN6_IS_ADDR_V4MAPPED(&ip6->ip6_dst)) {
	V_ip6stat.ip6s_badscope++;
	in6_ifstat_inc(m->m_pkthdr.rcvif, ifs6_in_addrerr);
	goto bad;
	}

	/*
	* Tell launch routine the next header
	*/
	V_ip6stat.ip6s_delivered++;
	in6_ifstat_inc(deliverifp, ifs6_in_deliver);
	nest = 0;

	while (nxt != IPPROTO_DONE) {
	if (V_ip6_hdrnestlimit && (++nest > V_ip6_hdrnestlimit)) {
	V_ip6stat.ip6s_toomanyhdr++;
	goto bad;
	}

	/*
	* protection against faulty packet - there should be
	* more sanity checks in header chain processing.
	*/
	if (m->m_pkthdr.len < off) {
	V_ip6stat.ip6s_tooshort++;
	in6_ifstat_inc(m->m_pkthdr.rcvif, ifs6_in_truncated);
	goto bad;
	}

	#ifdef IPSEC
	/*
	* enforce IPsec policy checking if we are seeing last header.
	* note that we do not visit this with protocols with pcb layer
	* code - like udp/tcp/raw ip.
	*/
	if (ip6_ipsec_input(m, nxt))
	goto bad;
	#endif /* IPSEC */
	nxt = (*inet6sw[ip6_protox[nxt]].pr_input)(&m, &off, nxt);
	}
	return;
	bad:
	m_freem(m);
	}

	/*
	* set/grab in6_ifaddr correspond to IPv6 destination address.
	* XXX backward compatibility wrapper
	*/
	static struct ip6aux *
	ip6_setdstifaddr(struct mbuf m, struct in6_ifaddr ia6)
	{
	struct ip6aux *ip6a;

	ip6a = ip6_addaux(m);
	if (ip6a)
	ip6a->ip6a_dstia6 = ia6;
	return ip6a; /* NULL if failed to set */
	}

	struct in6_ifaddr *
	ip6_getdstifaddr(struct mbuf *m)
	{
	struct ip6aux *ip6a;

	ip6a = ip6_findaux(m);
	if (ip6a)
	return ip6a->ip6a_dstia6;
	else
	return NULL;
	}

	/*
	* Hop-by-Hop options header processing. If a valid jumbo payload option is
	* included, the real payload length will be stored in plenp.
	*
	* rtalertp - XXX: should be stored more smart way
	*/
	static int
	ip6_hopopts_input(u_int32_t plenp, u_int32_t rtalertp,
	struct mbuf *mp, int offp)
	{
	INIT_VNET_INET6(curvnet);
	struct mbuf m = mp;
	int off = *offp, hbhlen;
	struct ip6_hbh *hbh;
	u_int8_t *opt;

	/* validation of the length of the header */
	#ifndef PULLDOWN_TEST
	IP6_EXTHDR_CHECK(m, off, sizeof(*hbh), -1);
	hbh = (struct ip6_hbh *)(mtod(m, caddr_t) + off);
	hbhlen = (hbh->ip6h_len + 1) << 3;

	IP6_EXTHDR_CHECK(m, off, hbhlen, -1);
	hbh = (struct ip6_hbh *)(mtod(m, caddr_t) + off);
	#else
	IP6_EXTHDR_GET(hbh, struct ip6_hbh *, m,
	sizeof(struct ip6_hdr), sizeof(struct ip6_hbh));
	if (hbh == NULL) {
	V_ip6stat.ip6s_tooshort++;
	return -1;
	}
	hbhlen = (hbh->ip6h_len + 1) << 3;
	IP6_EXTHDR_GET(hbh, struct ip6_hbh *, m, sizeof(struct ip6_hdr),
	hbhlen);
	if (hbh == NULL) {
	V_ip6stat.ip6s_tooshort++;
	return -1;
	}
	#endif
	off += hbhlen;
	hbhlen -= sizeof(struct ip6_hbh);
	opt = (u_int8_t *)hbh + sizeof(struct ip6_hbh);

	if (ip6_process_hopopts(m, (u_int8_t *)hbh + sizeof(struct ip6_hbh),
	hbhlen, rtalertp, plenp) < 0)
	return (-1);

	*offp = off;
	*mp = m;
	return (0);
	}

	/*
	* Search header for all Hop-by-hop options and process each option.
	* This function is separate from ip6_hopopts_input() in order to
	* handle a case where the sending node itself process its hop-by-hop
	* options header. In such a case, the function is called from ip6_output().
	*
	* The function assumes that hbh header is located right after the IPv6 header
	* (RFC2460 p7), opthead is pointer into data content in m, and opthead to
	* opthead + hbhlen is located in continuous memory region.
	*/
	int
	ip6_process_hopopts(struct mbuf m, u_int8_t opthead, int hbhlen,
	u_int32_t rtalertp, u_int32_t plenp)
	{
	INIT_VNET_INET6(curvnet);
	struct ip6_hdr *ip6;
	int optlen = 0;
	u_int8_t *opt = opthead;
	u_int16_t rtalert_val;
	u_int32_t jumboplen;
	const int erroff = sizeof(struct ip6_hdr) + sizeof(struct ip6_hbh);

	for (; hbhlen > 0; hbhlen -= optlen, opt += optlen) {
	switch (*opt) {
	case IP6OPT_PAD1:
	optlen = 1;
	break;
	case IP6OPT_PADN:
	if (hbhlen < IP6OPT_MINLEN) {
	V_ip6stat.ip6s_toosmall++;
	goto bad;
	}
	optlen = *(opt + 1) + 2;
	break;
	case IP6OPT_ROUTER_ALERT:
	/* XXX may need check for alignment */
	if (hbhlen < IP6OPT_RTALERT_LEN) {
	V_ip6stat.ip6s_toosmall++;
	goto bad;
	}
	if (*(opt + 1) != IP6OPT_RTALERT_LEN - 2) {
	/* XXX stat */
	icmp6_error(m, ICMP6_PARAM_PROB,
	ICMP6_PARAMPROB_HEADER,
	erroff + opt + 1 - opthead);
	return (-1);
	}
	optlen = IP6OPT_RTALERT_LEN;
	bcopy((caddr_t)(opt + 2), (caddr_t)&rtalert_val, 2);
	*rtalertp = ntohs(rtalert_val);
	break;
	case IP6OPT_JUMBO:
	/* XXX may need check for alignment */
	if (hbhlen < IP6OPT_JUMBO_LEN) {
	V_ip6stat.ip6s_toosmall++;
	goto bad;
	}
	if (*(opt + 1) != IP6OPT_JUMBO_LEN - 2) {
	/* XXX stat */
	icmp6_error(m, ICMP6_PARAM_PROB,
	ICMP6_PARAMPROB_HEADER,
	erroff + opt + 1 - opthead);
	return (-1);
	}
	optlen = IP6OPT_JUMBO_LEN;

	/*
	* IPv6 packets that have non 0 payload length
	* must not contain a jumbo payload option.
	*/
	ip6 = mtod(m, struct ip6_hdr *);
	if (ip6->ip6_plen) {
	V_ip6stat.ip6s_badoptions++;
	icmp6_error(m, ICMP6_PARAM_PROB,
	ICMP6_PARAMPROB_HEADER,
	erroff + opt - opthead);
	return (-1);
	}

	/*
	* We may see jumbolen in unaligned location, so
	* we'd need to perform bcopy().
	*/
	bcopy(opt + 2, &jumboplen, sizeof(jumboplen));
	jumboplen = (u_int32_t)htonl(jumboplen);

	#if 1
	/*
	* if there are multiple jumbo payload options,
	* *plenp will be non-zero and the packet will be
	* rejected.
	* the behavior may need some debate in ipngwg -
	* multiple options does not make sense, however,
	* there's no explicit mention in specification.
	*/
	if (*plenp != 0) {
	V_ip6stat.ip6s_badoptions++;
	icmp6_error(m, ICMP6_PARAM_PROB,
	ICMP6_PARAMPROB_HEADER,
	erroff + opt + 2 - opthead);
	return (-1);
	}
	#endif

	/*
	* jumbo payload length must be larger than 65535.
	*/
	if (jumboplen <= IPV6_MAXPACKET) {
	V_ip6stat.ip6s_badoptions++;
	icmp6_error(m, ICMP6_PARAM_PROB,
	ICMP6_PARAMPROB_HEADER,
	erroff + opt + 2 - opthead);
	return (-1);
	}
	*plenp = jumboplen;

	break;
	default: /* unknown option */
	if (hbhlen < IP6OPT_MINLEN) {
	V_ip6stat.ip6s_toosmall++;
	goto bad;
	}
	optlen = ip6_unknown_opt(opt, m,
	erroff + opt - opthead);
	if (optlen == -1)
	return (-1);
	optlen += 2;
	break;
	}
	}

	return (0);

	bad:
	m_freem(m);
	return (-1);
	}

	/*
	* Unknown option processing.
	* The third argument `off' is the offset from the IPv6 header to the option,
	* which is necessary if the IPv6 header the and option header and IPv6 header
	* is not continuous in order to return an ICMPv6 error.
	*/
	int
	ip6_unknown_opt(u_int8_t optp, struct mbuf m, int off)
	{
	INIT_VNET_INET6(curvnet);
	struct ip6_hdr *ip6;

	switch (IP6OPT_TYPE(*optp)) {
	case IP6OPT_TYPE_SKIP: /* ignore the option */
	return ((int)*(optp + 1));
	case IP6OPT_TYPE_DISCARD: /* silently discard */
	m_freem(m);
	return (-1);
	case IP6OPT_TYPE_FORCEICMP: /* send ICMP even if multicasted */
	V_ip6stat.ip6s_badoptions++;
	icmp6_error(m, ICMP6_PARAM_PROB, ICMP6_PARAMPROB_OPTION, off);
	return (-1);
	case IP6OPT_TYPE_ICMP: /* send ICMP if not multicasted */
	V_ip6stat.ip6s_badoptions++;
	ip6 = mtod(m, struct ip6_hdr *);
	if (IN6_IS_ADDR_MULTICAST(&ip6->ip6_dst) \|\|
	(m->m_flags & (M_BCAST\|M_MCAST)))
	m_freem(m);
	else
	icmp6_error(m, ICMP6_PARAM_PROB,
	ICMP6_PARAMPROB_OPTION, off);
	return (-1);
	}

	m_freem(m); /* XXX: NOTREACHED */
	return (-1);
	}

	/*
	* Create the "control" list for this pcb.
	* These functions will not modify mbuf chain at all.
	*
	* With KAME mbuf chain restriction:
	* The routine will be called from upper layer handlers like tcp6_input().
	* Thus the routine assumes that the caller (tcp6_input) have already
	* called IP6_EXTHDR_CHECK() and all the extension headers are located in the
	* very first mbuf on the mbuf chain.
	*
	* ip6_savecontrol_v4 will handle those options that are possible to be
	* set on a v4-mapped socket.
	* ip6_savecontrol will directly call ip6_savecontrol_v4 to handle those
	* options and handle the v6-only ones itself.
	*/
	struct mbuf **
	ip6_savecontrol_v4(struct inpcb inp, struct mbuf m, struct mbuf **mp,
	int *v4only)
	{
	struct ip6_hdr ip6 = mtod(m, struct ip6_hdr );

	#ifdef SO_TIMESTAMP
	if ((inp->inp_socket->so_options & SO_TIMESTAMP) != 0) {
	struct timeval tv;

	microtime(&tv);
	*mp = sbcreatecontrol((caddr_t) &tv, sizeof(tv),
	SCM_TIMESTAMP, SOL_SOCKET);
	if (*mp)
	mp = &(*mp)->m_next;
	}
	#endif

	if ((ip6->ip6_vfc & IPV6_VERSION_MASK) != IPV6_VERSION) {
	if (v4only != NULL)
	*v4only = 1;
	return (mp);
	}

	#define IS2292(inp, x, y) (((inp)->inp_flags & IN6P_RFC2292) ? (x) : (y))
	/* RFC 2292 sec. 5 */
	if ((inp->inp_flags & IN6P_PKTINFO) != 0) {
	struct in6_pktinfo pi6;

	bcopy(&ip6->ip6_dst, &pi6.ipi6_addr, sizeof(struct in6_addr));
	in6_clearscope(&pi6.ipi6_addr); /* XXX */
	pi6.ipi6_ifindex =
	(m && m->m_pkthdr.rcvif) ? m->m_pkthdr.rcvif->if_index : 0;

	*mp = sbcreatecontrol((caddr_t) &pi6,
	sizeof(struct in6_pktinfo),
	IS2292(inp, IPV6_2292PKTINFO, IPV6_PKTINFO), IPPROTO_IPV6);
	if (*mp)
	mp = &(*mp)->m_next;
	}

	if ((inp->inp_flags & IN6P_HOPLIMIT) != 0) {
	int hlim = ip6->ip6_hlim & 0xff;

	*mp = sbcreatecontrol((caddr_t) &hlim, sizeof(int),
	IS2292(inp, IPV6_2292HOPLIMIT, IPV6_HOPLIMIT),
	IPPROTO_IPV6);
	if (*mp)
	mp = &(*mp)->m_next;
	}

	if (v4only != NULL)
	*v4only = 0;
	return (mp);
	}

	void
	ip6_savecontrol(struct inpcb in6p, struct mbuf m, struct mbuf **mp)
	{
	struct ip6_hdr ip6 = mtod(m, struct ip6_hdr );
	int v4only = 0;

	mp = ip6_savecontrol_v4(in6p, m, mp, &v4only);
	if (v4only)
	return;

	if ((in6p->in6p_flags & IN6P_TCLASS) != 0) {
	u_int32_t flowinfo;
	int tclass;

	flowinfo = (u_int32_t)ntohl(ip6->ip6_flow & IPV6_FLOWINFO_MASK);
	flowinfo >>= 20;

	tclass = flowinfo & 0xff;
	*mp = sbcreatecontrol((caddr_t) &tclass, sizeof(tclass),
	IPV6_TCLASS, IPPROTO_IPV6);
	if (*mp)
	mp = &(*mp)->m_next;
	}

	/*
	* IPV6_HOPOPTS socket option. Recall that we required super-user
	* privilege for the option (see ip6_ctloutput), but it might be too
	* strict, since there might be some hop-by-hop options which can be
	* returned to normal user.
	* See also RFC 2292 section 6 (or RFC 3542 section 8).
	*/
	if ((in6p->in6p_flags & IN6P_HOPOPTS) != 0) {
	/*
	* Check if a hop-by-hop options header is contatined in the
	* received packet, and if so, store the options as ancillary
	* data. Note that a hop-by-hop options header must be
	* just after the IPv6 header, which is assured through the
	* IPv6 input processing.
	*/
	if (ip6->ip6_nxt == IPPROTO_HOPOPTS) {
	struct ip6_hbh *hbh;
	int hbhlen = 0;
	#ifdef PULLDOWN_TEST
	struct mbuf *ext;
	#endif

	#ifndef PULLDOWN_TEST
	hbh = (struct ip6_hbh *)(ip6 + 1);
	hbhlen = (hbh->ip6h_len + 1) << 3;
	#else
	ext = ip6_pullexthdr(m, sizeof(struct ip6_hdr),
	ip6->ip6_nxt);
	if (ext == NULL) {
	V_ip6stat.ip6s_tooshort++;
	return;
	}
	hbh = mtod(ext, struct ip6_hbh *);
	hbhlen = (hbh->ip6h_len + 1) << 3;
	if (hbhlen != ext->m_len) {
	m_freem(ext);
	V_ip6stat.ip6s_tooshort++;
	return;
	}
	#endif

	/*
	* XXX: We copy the whole header even if a
	* jumbo payload option is included, the option which
	* is to be removed before returning according to
	* RFC2292.
	* Note: this constraint is removed in RFC3542
	*/
	*mp = sbcreatecontrol((caddr_t)hbh, hbhlen,
	IS2292(in6p, IPV6_2292HOPOPTS, IPV6_HOPOPTS),
	IPPROTO_IPV6);
	if (*mp)
	mp = &(*mp)->m_next;
	#ifdef PULLDOWN_TEST
	m_freem(ext);
	#endif
	}
	}

	if ((in6p->in6p_flags & (IN6P_RTHDR \| IN6P_DSTOPTS)) != 0) {
	int nxt = ip6->ip6_nxt, off = sizeof(struct ip6_hdr);

	/*
	* Search for destination options headers or routing
	* header(s) through the header chain, and stores each
	* header as ancillary data.
	* Note that the order of the headers remains in
	* the chain of ancillary data.
	*/
	while (1) { /* is explicit loop prevention necessary? */
	struct ip6_ext *ip6e = NULL;
	int elen;
	#ifdef PULLDOWN_TEST
	struct mbuf *ext = NULL;
	#endif

	/*
	* if it is not an extension header, don't try to
	* pull it from the chain.
	*/
	switch (nxt) {
	case IPPROTO_DSTOPTS:
	case IPPROTO_ROUTING:
	case IPPROTO_HOPOPTS:
	case IPPROTO_AH: /* is it possible? */
	break;
	default:
	goto loopend;
	}

	#ifndef PULLDOWN_TEST
	if (off + sizeof(*ip6e) > m->m_len)
	goto loopend;
	ip6e = (struct ip6_ext *)(mtod(m, caddr_t) + off);
	if (nxt == IPPROTO_AH)
	elen = (ip6e->ip6e_len + 2) << 2;
	else
	elen = (ip6e->ip6e_len + 1) << 3;
	if (off + elen > m->m_len)
	goto loopend;
	#else
	ext = ip6_pullexthdr(m, off, nxt);
	if (ext == NULL) {
	V_ip6stat.ip6s_tooshort++;
	return;
	}
	ip6e = mtod(ext, struct ip6_ext *);
	if (nxt == IPPROTO_AH)
	elen = (ip6e->ip6e_len + 2) << 2;
	else
	elen = (ip6e->ip6e_len + 1) << 3;
	if (elen != ext->m_len) {
	m_freem(ext);
	V_ip6stat.ip6s_tooshort++;
	return;
	}
	#endif

	switch (nxt) {
	case IPPROTO_DSTOPTS:
	if (!(in6p->in6p_flags & IN6P_DSTOPTS))
	break;

	*mp = sbcreatecontrol((caddr_t)ip6e, elen,
	IS2292(in6p,
	IPV6_2292DSTOPTS, IPV6_DSTOPTS),
	IPPROTO_IPV6);
	if (*mp)
	mp = &(*mp)->m_next;
	break;
	case IPPROTO_ROUTING:
	if (!in6p->in6p_flags & IN6P_RTHDR)
	break;

	*mp = sbcreatecontrol((caddr_t)ip6e, elen,
	IS2292(in6p, IPV6_2292RTHDR, IPV6_RTHDR),
	IPPROTO_IPV6);
	if (*mp)
	mp = &(*mp)->m_next;
	break;
	case IPPROTO_HOPOPTS:
	case IPPROTO_AH: /* is it possible? */
	break;

	default:
	/*
	* other cases have been filtered in the above.
	* none will visit this case. here we supply
	* the code just in case (nxt overwritten or
	* other cases).
	*/
	#ifdef PULLDOWN_TEST
	m_freem(ext);
	#endif
	goto loopend;

	}

	/* proceed with the next header. */
	off += elen;
	nxt = ip6e->ip6e_nxt;
	ip6e = NULL;
	#ifdef PULLDOWN_TEST
	m_freem(ext);
	ext = NULL;
	#endif
	}
	loopend:
	;
	}
	}
	#undef IS2292

	void
	ip6_notify_pmtu(struct inpcb in6p, struct sockaddr_in6 dst, u_int32_t *mtu)
	{
	struct socket *so;
	struct mbuf *m_mtu;
	struct ip6_mtuinfo mtuctl;

	so = in6p->inp_socket;

	if (mtu == NULL)
	return;

	#ifdef DIAGNOSTIC
	if (so == NULL) /* I believe this is impossible */
	panic("ip6_notify_pmtu: socket is NULL");
	#endif

	bzero(&mtuctl, sizeof(mtuctl)); /* zero-clear for safety */
	mtuctl.ip6m_mtu = *mtu;
	mtuctl.ip6m_addr = *dst;
	if (sa6_recoverscope(&mtuctl.ip6m_addr))
	return;

	if ((m_mtu = sbcreatecontrol((caddr_t)&mtuctl, sizeof(mtuctl),
	IPV6_PATHMTU, IPPROTO_IPV6)) == NULL)
	return;

	if (sbappendaddr(&so->so_rcv, (struct sockaddr *)dst, NULL, m_mtu)
	== 0) {
	m_freem(m_mtu);
	/* XXX: should count statistics */
	} else
	sorwakeup(so);

	return;
	}

	#ifdef PULLDOWN_TEST
	/*
	* pull single extension header from mbuf chain. returns single mbuf that
	* contains the result, or NULL on error.
	*/
	static struct mbuf *
	ip6_pullexthdr(struct mbuf *m, size_t off, int nxt)
	{
	struct ip6_ext ip6e;
	size_t elen;
	struct mbuf *n;

	#ifdef DIAGNOSTIC
	switch (nxt) {
	case IPPROTO_DSTOPTS:
	case IPPROTO_ROUTING:
	case IPPROTO_HOPOPTS:
	case IPPROTO_AH: /* is it possible? */
	break;
	default:
	printf("ip6_pullexthdr: invalid nxt=%d\n", nxt);
	}
	#endif

	m_copydata(m, off, sizeof(ip6e), (caddr_t)&ip6e);
	if (nxt == IPPROTO_AH)
	elen = (ip6e.ip6e_len + 2) << 2;
	else
	elen = (ip6e.ip6e_len + 1) << 3;

	MGET(n, M_DONTWAIT, MT_DATA);
	if (n && elen >= MLEN) {
	MCLGET(n, M_DONTWAIT);
	if ((n->m_flags & M_EXT) == 0) {
	m_free(n);
	n = NULL;
	}
	}
	if (!n)
	return NULL;

	n->m_len = 0;
	if (elen >= M_TRAILINGSPACE(n)) {
	m_free(n);
	return NULL;
	}

	m_copydata(m, off, elen, mtod(n, caddr_t));
	n->m_len = elen;
	return n;
	}
	#endif

	/*
	* Get pointer to the previous header followed by the header
	* currently processed.
	* XXX: This function supposes that
	* M includes all headers,
	* the next header field and the header length field of each header
	* are valid, and
	* the sum of each header length equals to OFF.
	* Because of these assumptions, this function must be called very
	* carefully. Moreover, it will not be used in the near future when
	* we develop `neater' mechanism to process extension headers.
	*/
	char *
	ip6_get_prevhdr(struct mbuf *m, int off)
	{
	struct ip6_hdr ip6 = mtod(m, struct ip6_hdr );

	if (off == sizeof(struct ip6_hdr))
	return (&ip6->ip6_nxt);
	else {
	int len, nxt;
	struct ip6_ext *ip6e = NULL;

	nxt = ip6->ip6_nxt;
	len = sizeof(struct ip6_hdr);
	while (len < off) {
	ip6e = (struct ip6_ext *)(mtod(m, caddr_t) + len);

	switch (nxt) {
	case IPPROTO_FRAGMENT:
	len += sizeof(struct ip6_frag);
	break;
	case IPPROTO_AH:
	len += (ip6e->ip6e_len + 2) << 2;
	break;
	default:
	len += (ip6e->ip6e_len + 1) << 3;
	break;
	}
	nxt = ip6e->ip6e_nxt;
	}
	if (ip6e)
	return (&ip6e->ip6e_nxt);
	else
	return NULL;
	}
	}

	/*
	* get next header offset. m will be retained.
	*/
	int
	ip6_nexthdr(struct mbuf m, int off, int proto, int nxtp)
	{
	struct ip6_hdr ip6;
	struct ip6_ext ip6e;
	struct ip6_frag fh;

	/* just in case */
	if (m == NULL)
	panic("ip6_nexthdr: m == NULL");
	if ((m->m_flags & M_PKTHDR) == 0 \|\| m->m_pkthdr.len < off)
	return -1;

	switch (proto) {
	case IPPROTO_IPV6:
	if (m->m_pkthdr.len < off + sizeof(ip6))
	return -1;
	m_copydata(m, off, sizeof(ip6), (caddr_t)&ip6);
	if (nxtp)
	*nxtp = ip6.ip6_nxt;
	off += sizeof(ip6);
	return off;

	case IPPROTO_FRAGMENT:
	/*
	* terminate parsing if it is not the first fragment,
	* it does not make sense to parse through it.
	*/
	if (m->m_pkthdr.len < off + sizeof(fh))
	return -1;
	m_copydata(m, off, sizeof(fh), (caddr_t)&fh);
	/* IP6F_OFF_MASK = 0xfff8(BigEndian), 0xf8ff(LittleEndian) */
	if (fh.ip6f_offlg & IP6F_OFF_MASK)
	return -1;
	if (nxtp)
	*nxtp = fh.ip6f_nxt;
	off += sizeof(struct ip6_frag);
	return off;

	case IPPROTO_AH:
	if (m->m_pkthdr.len < off + sizeof(ip6e))
	return -1;
	m_copydata(m, off, sizeof(ip6e), (caddr_t)&ip6e);
	if (nxtp)
	*nxtp = ip6e.ip6e_nxt;
	off += (ip6e.ip6e_len + 2) << 2;
	return off;

	case IPPROTO_HOPOPTS:
	case IPPROTO_ROUTING:
	case IPPROTO_DSTOPTS:
	if (m->m_pkthdr.len < off + sizeof(ip6e))
	return -1;
	m_copydata(m, off, sizeof(ip6e), (caddr_t)&ip6e);
	if (nxtp)
	*nxtp = ip6e.ip6e_nxt;
	off += (ip6e.ip6e_len + 1) << 3;
	return off;

	case IPPROTO_NONE:
	case IPPROTO_ESP:
	case IPPROTO_IPCOMP:
	/* give up */
	return -1;

	default:
	return -1;
	}

	return -1;
	}

	/*
	* get offset for the last header in the chain. m will be kept untainted.
	*/
	int
	ip6_lasthdr(struct mbuf m, int off, int proto, int nxtp)
	{
	int newoff;
	int nxt;

	if (!nxtp) {
	nxt = -1;
	nxtp = &nxt;
	}
	while (1) {
	newoff = ip6_nexthdr(m, off, proto, nxtp);
	if (newoff < 0)
	return off;
	else if (newoff < off)
	return -1; /* invalid */
	else if (newoff == off)
	return newoff;

	off = newoff;
	proto = *nxtp;
	}
	}

	struct ip6aux *
	ip6_addaux(struct mbuf *m)
	{
	struct m_tag *mtag;

	mtag = m_tag_find(m, PACKET_TAG_IPV6_INPUT, NULL);
	if (!mtag) {
	mtag = m_tag_get(PACKET_TAG_IPV6_INPUT, sizeof(struct ip6aux),
	M_NOWAIT);
	if (mtag) {
	m_tag_prepend(m, mtag);
	bzero(mtag + 1, sizeof(struct ip6aux));
	}
	}
	return mtag ? (struct ip6aux *)(mtag + 1) : NULL;
	}

	struct ip6aux *
	ip6_findaux(struct mbuf *m)
	{
	struct m_tag *mtag;

	mtag = m_tag_find(m, PACKET_TAG_IPV6_INPUT, NULL);
	return mtag ? (struct ip6aux *)(mtag + 1) : NULL;
	}

	void
	ip6_delaux(struct mbuf *m)
	{
	struct m_tag *mtag;

	mtag = m_tag_find(m, PACKET_TAG_IPV6_INPUT, NULL);
	if (mtag)
	m_tag_delete(m, mtag);
	}

	/*
	* System control for IP6
	*/

	u_char inet6ctlerrmap[PRC_NCMDS] = {
	0, 0, 0, 0,
	0, EMSGSIZE, EHOSTDOWN, EHOSTUNREACH,
	EHOSTUNREACH, EHOSTUNREACH, ECONNREFUSED, ECONNREFUSED,
	EMSGSIZE, EHOSTUNREACH, 0, 0,
	0, 0, 0, 0,
	ENOPROTOOPT
	};
	Index: projects/arpv2_merge_1/sys/netinet6/ip6_mroute.c
	===================================================================
	--- projects/arpv2_merge_1/sys/netinet6/ip6_mroute.c (revision 185838)
	+++ projects/arpv2_merge_1/sys/netinet6/ip6_mroute.c (revision 185839)
	@@ -1,1954 +1,1952 @@
	/*-
	* Copyright (C) 1998 WIDE Project.
	* All rights reserved.
	*
	* Redistribution and use in source and binary forms, with or without
	* modification, are permitted provided that the following conditions
	* are met:
	* 1. Redistributions of source code must retain the above copyright
	* notice, this list of conditions and the following disclaimer.
	* 2. Redistributions in binary form must reproduce the above copyright
	* notice, this list of conditions and the following disclaimer in the
	* documentation and/or other materials provided with the distribution.
	* 3. Neither the name of the project nor the names of its contributors
	* may be used to endorse or promote products derived from this software
	* without specific prior written permission.
	*
	* THIS SOFTWARE IS PROVIDED BY THE PROJECT AND CONTRIBUTORS ``AS IS'' AND
	* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
	* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
	* ARE DISCLAIMED. IN NO EVENT SHALL THE PROJECT OR CONTRIBUTORS BE LIABLE
	* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
	* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
	* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
	* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
	* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
	* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
	* SUCH DAMAGE.
	*
	* $KAME: ip6_mroute.c,v 1.58 2001/12/18 02:36:31 itojun Exp $
	*/

	/*-
	* Copyright (c) 1989 Stephen Deering
	* Copyright (c) 1992, 1993
	* The Regents of the University of California. All rights reserved.
	*
	* This code is derived from software contributed to Berkeley by
	* Stephen Deering of Stanford University.
	*
	* Redistribution and use in source and binary forms, with or without
	* modification, are permitted provided that the following conditions
	* are met:
	* 1. Redistributions of source code must retain the above copyright
	* notice, this list of conditions and the following disclaimer.
	* 2. Redistributions in binary form must reproduce the above copyright
	* notice, this list of conditions and the following disclaimer in the
	* documentation and/or other materials provided with the distribution.
	* 4. Neither the name of the University nor the names of its contributors
	* may be used to endorse or promote products derived from this software
	* without specific prior written permission.
	*
	* THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
	* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
	* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
	* ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
	* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
	* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
	* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
	* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
	* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
	* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
	* SUCH DAMAGE.
	*
	* @(#)ip_mroute.c 8.2 (Berkeley) 11/15/93
	* BSDI ip_mroute.c,v 2.10 1996/11/14 00:29:52 jch Exp
	*/

	/*
	* IP multicast forwarding procedures
	*
	* Written by David Waitzman, BBN Labs, August 1988.
	* Modified by Steve Deering, Stanford, February 1989.
	* Modified by Mark J. Steiglitz, Stanford, May, 1991
	* Modified by Van Jacobson, LBL, January 1993
	* Modified by Ajit Thyagarajan, PARC, August 1993
	* Modified by Bill Fenner, PARC, April 1994
	*
	* MROUTING Revision: 3.5.1.2 + PIM-SMv2 (pimd) Support
	*/

	#include <sys/cdefs.h>
	__FBSDID("$FreeBSD$");

	#include "opt_inet.h"
	#include "opt_inet6.h"

	#include <sys/param.h>
	#include <sys/callout.h>
	#include <sys/errno.h>
	#include <sys/kernel.h>
	#include <sys/lock.h>
	#include <sys/malloc.h>
	#include <sys/mbuf.h>
	#include <sys/protosw.h>
	#include <sys/signalvar.h>
	#include <sys/socket.h>
	#include <sys/socketvar.h>
	#include <sys/sockio.h>
	#include <sys/sx.h>
	#include <sys/sysctl.h>
	#include <sys/syslog.h>
	#include <sys/systm.h>
	#include <sys/time.h>
	#include <sys/vimage.h>

	#include <net/if.h>
	#include <net/if_types.h>
	#include <net/raw_cb.h>
	#include <net/route.h>
	#include <net/vnet.h>

	#include <netinet/in.h>
	#include <netinet/in_var.h>
	#include <netinet/icmp6.h>
	#include <netinet/vinet.h>

	#include <netinet/ip6.h>
	#include <netinet6/ip6_var.h>
	#include <netinet6/scope6_var.h>
	#include <netinet6/nd6.h>
	#include <netinet6/ip6_mroute.h>
	#include <netinet6/ip6protosw.h>
	#include <netinet6/pim6.h>
	#include <netinet6/pim6_var.h>
	#include <netinet6/vinet6.h>

	static MALLOC_DEFINE(M_MRTABLE6, "mf6c", "multicast forwarding cache entry");

	/* XXX: this is a very common idiom; move to <sys/mbuf.h> ? */
	#define M_HASCL(m) ((m)->m_flags & M_EXT)

	static int ip6_mdq(struct mbuf , struct ifnet , struct mf6c *);
	static void phyint_send(struct ip6_hdr , struct mif6 , struct mbuf *);

	static void pim6_init(void);
	static int set_pim6(int *);
	static int socket_send __P((struct socket , struct mbuf ,
	struct sockaddr_in6 *));
	static int register_send __P((struct ip6_hdr , struct mif6 ,
	struct mbuf *));

	extern struct domain inet6domain;

	/* XXX: referenced from ip_mroute.c for dynamically loading this code. */
	struct ip6protosw in6_pim_protosw = {
	.pr_type = SOCK_RAW,
	.pr_domain = &inet6domain,
	.pr_protocol = IPPROTO_PIM,
	.pr_flags = PR_ATOMIC\|PR_ADDR\|PR_LASTHDR,
	.pr_input = pim6_input,
	.pr_output = rip6_output,
	.pr_ctloutput = rip6_ctloutput,
	.pr_init = pim6_init,
	.pr_usrreqs = &rip6_usrreqs
	};

	#ifdef VIMAGE_GLOBALS
	static int ip6_mrouter_ver;
	#endif

	SYSCTL_DECL(_net_inet6);
	SYSCTL_DECL(_net_inet6_ip6);
	SYSCTL_NODE(_net_inet6, IPPROTO_PIM, pim, CTLFLAG_RW, 0, "PIM");

	static struct mrt6stat mrt6stat;
	SYSCTL_STRUCT(_net_inet6_ip6, OID_AUTO, mrt6stat, CTLFLAG_RW,
	&mrt6stat, mrt6stat,
	"Multicast Routing Statistics (struct mrt6stat, netinet6/ip6_mroute.h)");

	#define NO_RTE_FOUND 0x1
	#define RTE_FOUND 0x2

	static struct mf6c *mf6ctable[MF6CTBLSIZ];
	SYSCTL_OPAQUE(_net_inet6_ip6, OID_AUTO, mf6ctable, CTLFLAG_RD,
	&mf6ctable, sizeof(mf6ctable), "S,*mf6ctable[MF6CTBLSIZ]",
	"Multicast Forwarding Table (struct *mf6ctable[MF6CTBLSIZ], "
	"netinet6/ip6_mroute.h)");

	static u_char n6expire[MF6CTBLSIZ];

	static struct mif6 mif6table[MAXMIFS];
	SYSCTL_OPAQUE(_net_inet6_ip6, OID_AUTO, mif6table, CTLFLAG_RD,
	&mif6table, sizeof(mif6table), "S,vif[MAXMIFS]",
	"Multicast Interfaces (struct mif[MAXMIFS], netinet6/ip6_mroute.h)");

	#ifdef MRT6DEBUG
	#ifdef VIMAGE_GLOBALS
	static u_int mrt6debug = 0; /* debug level */
	#endif
	#define DEBUG_MFC 0x02
	#define DEBUG_FORWARD 0x04
	#define DEBUG_EXPIRE 0x08
	#define DEBUG_XMIT 0x10
	#define DEBUG_REG 0x20
	#define DEBUG_PIM 0x40
	#endif

	static void expire_upcalls(void *);
	#define EXPIRE_TIMEOUT (hz / 4) /* 4x / second */
	#define UPCALL_EXPIRE 6 /* number of timeouts */

	#ifdef INET
	#ifdef MROUTING
	extern struct socket *ip_mrouter;
	#endif
	#endif

	/*
	* 'Interfaces' associated with decapsulator (so we can tell
	* packets that went through it from ones that get reflected
	* by a broken gateway). Different from IPv4 register_if,
	* these interfaces are linked into the system ifnet list,
	* because per-interface IPv6 statistics are maintained in
	* ifp->if_afdata. But it does not have any routes point
	* to them. I.e., packets can't be sent this way. They
	* only exist as a placeholder for multicast source
	* verification.
	*/
	static struct ifnet *multicast_register_if6;

	#define ENCAP_HOPS 64

	/*
	* Private variables.
	*/
	static mifi_t nummifs = 0;
	static mifi_t reg_mif_num = (mifi_t)-1;

	static struct pim6stat pim6stat;
	SYSCTL_STRUCT(_net_inet6_pim, PIM6CTL_STATS, stats, CTLFLAG_RD,
	&pim6stat, pim6stat,
	"PIM Statistics (struct pim6stat, netinet6/pim_var.h)");

	#ifdef VIMAGE_GLOBALS
	static int pim6;
	#endif

	/*
	* Hash function for a source, group entry
	*/
	#define MF6CHASH(a, g) MF6CHASHMOD((a).s6_addr32[0] ^ (a).s6_addr32[1] ^ \
	(a).s6_addr32[2] ^ (a).s6_addr32[3] ^ \
	(g).s6_addr32[0] ^ (g).s6_addr32[1] ^ \
	(g).s6_addr32[2] ^ (g).s6_addr32[3])

	/*
	* Find a route for a given origin IPv6 address and Multicast group address.
	*/
	#define MF6CFIND(o, g, rt) do { \
	struct mf6c *_rt = mf6ctable[MF6CHASH(o,g)]; \
	rt = NULL; \
	mrt6stat.mrt6s_mfc_lookups++; \
	while (_rt) { \
	if (IN6_ARE_ADDR_EQUAL(&_rt->mf6c_origin.sin6_addr, &(o)) && \
	IN6_ARE_ADDR_EQUAL(&_rt->mf6c_mcastgrp.sin6_addr, &(g)) && \
	(_rt->mf6c_stall == NULL)) { \
	rt = _rt; \
	break; \
	} \
	_rt = _rt->mf6c_next; \
	} \
	if (rt == NULL) { \
	mrt6stat.mrt6s_mfc_misses++; \
	} \
	} while (/CONSTCOND/ 0)

	/*
	* Macros to compute elapsed time efficiently
	* Borrowed from Van Jacobson's scheduling code
	* XXX: replace with timersub() ?
	*/
	#define TV_DELTA(a, b, delta) do { \
	int xxs; \
	\
	delta = (a).tv_usec - (b).tv_usec; \
	if ((xxs = (a).tv_sec - (b).tv_sec)) { \
	switch (xxs) { \
	case 2: \
	delta += 1000000; \
	/* FALLTHROUGH */ \
	case 1: \
	delta += 1000000; \
	break; \
	default: \
	delta += (1000000 * xxs); \
	} \
	} \
	} while (/CONSTCOND/ 0)

	/* XXX: replace with timercmp(a, b, <) ? */
	#define TV_LT(a, b) (((a).tv_usec < (b).tv_usec && \
	(a).tv_sec <= (b).tv_sec) \|\| (a).tv_sec < (b).tv_sec)

	#ifdef UPCALL_TIMING
	#define UPCALL_MAX 50
	static u_long upcall_data[UPCALL_MAX + 1];
	static void collate();
	#endif /* UPCALL_TIMING */

	static int get_sg_cnt(struct sioc_sg_req6 *);
	static int get_mif6_cnt(struct sioc_mif_req6 *);
	static int ip6_mrouter_init(struct socket *, int, int);
	static int add_m6if(struct mif6ctl *);
	static int del_m6if(mifi_t *);
	static int add_m6fc(struct mf6cctl *);
	static int del_m6fc(struct mf6cctl *);

	static struct callout expire_upcalls_ch;

	int X_ip6_mforward(struct ip6_hdr ip6, struct ifnet ifp, struct mbuf *m);
	int X_ip6_mrouter_done(void);
	int X_ip6_mrouter_set(struct socket so, struct sockopt sopt);
	int X_ip6_mrouter_get(struct socket so, struct sockopt sopt);
	int X_mrt6_ioctl(int cmd, caddr_t data);

	static void
	pim6_init(void)
	{
	INIT_VNET_INET6(curvnet);

	V_ip6_mrouter_ver = 0;
	#ifdef MRT6DEBUG
	V_mrt6debug = 0; /* debug level */
	#endif
	}

	/*
	* Handle MRT setsockopt commands to modify the multicast routing tables.
	*/
	int
	X_ip6_mrouter_set(struct socket so, struct sockopt sopt)
	{
	int error = 0;
	int optval;
	struct mif6ctl mifc;
	struct mf6cctl mfcc;
	mifi_t mifi;

	if (so != ip6_mrouter && sopt->sopt_name != MRT6_INIT)
	return (EACCES);

	switch (sopt->sopt_name) {
	case MRT6_INIT:
	#ifdef MRT6_OINIT
	case MRT6_OINIT:
	#endif
	error = sooptcopyin(sopt, &optval, sizeof(optval),
	sizeof(optval));
	if (error)
	break;
	error = ip6_mrouter_init(so, optval, sopt->sopt_name);
	break;
	case MRT6_DONE:
	error = X_ip6_mrouter_done();
	break;
	case MRT6_ADD_MIF:
	error = sooptcopyin(sopt, &mifc, sizeof(mifc), sizeof(mifc));
	if (error)
	break;
	error = add_m6if(&mifc);
	break;
	case MRT6_ADD_MFC:
	error = sooptcopyin(sopt, &mfcc, sizeof(mfcc), sizeof(mfcc));
	if (error)
	break;
	error = add_m6fc(&mfcc);
	break;
	case MRT6_DEL_MFC:
	error = sooptcopyin(sopt, &mfcc, sizeof(mfcc), sizeof(mfcc));
	if (error)
	break;
	error = del_m6fc(&mfcc);
	break;
	case MRT6_DEL_MIF:
	error = sooptcopyin(sopt, &mifi, sizeof(mifi), sizeof(mifi));
	if (error)
	break;
	error = del_m6if(&mifi);
	break;
	case MRT6_PIM:
	error = sooptcopyin(sopt, &optval, sizeof(optval),
	sizeof(optval));
	if (error)
	break;
	error = set_pim6(&optval);
	break;
	default:
	error = EOPNOTSUPP;
	break;
	}

	return (error);
	}

	/*
	* Handle MRT getsockopt commands
	*/
	int
	X_ip6_mrouter_get(struct socket so, struct sockopt sopt)
	{
	INIT_VNET_INET6(curvnet);
	int error = 0;

	if (so != ip6_mrouter)
	return (EACCES);

	switch (sopt->sopt_name) {
	case MRT6_PIM:
	error = sooptcopyout(sopt, &V_pim6, sizeof(V_pim6));
	break;
	}
	return (error);
	}

	/*
	* Handle ioctl commands to obtain information from the cache
	*/
	int
	X_mrt6_ioctl(int cmd, caddr_t data)
	{
	switch (cmd) {
	case SIOCGETSGCNT_IN6:
	return (get_sg_cnt((struct sioc_sg_req6 *)data));
	case SIOCGETMIFCNT_IN6:
	return (get_mif6_cnt((struct sioc_mif_req6 *)data));
	default:
	return (EINVAL);
	}
	}

	/*
	* returns the packet, byte, rpf-failure count for the source group provided
	*/
	static int
	get_sg_cnt(struct sioc_sg_req6 *req)
	{
	struct mf6c *rt;
	int s;

	s = splnet();
	MF6CFIND(req->src.sin6_addr, req->grp.sin6_addr, rt);
	splx(s);
	if (rt != NULL) {
	req->pktcnt = rt->mf6c_pkt_cnt;
	req->bytecnt = rt->mf6c_byte_cnt;
	req->wrong_if = rt->mf6c_wrong_if;
	} else
	return (ESRCH);
	#if 0
	req->pktcnt = req->bytecnt = req->wrong_if = 0xffffffff;
	#endif

	return (0);
	}

	/*
	* returns the input and output packet and byte counts on the mif provided
	*/
	static int
	get_mif6_cnt(struct sioc_mif_req6 *req)
	{
	mifi_t mifi = req->mifi;

	if (mifi >= nummifs)
	return (EINVAL);

	req->icount = mif6table[mifi].m6_pkt_in;
	req->ocount = mif6table[mifi].m6_pkt_out;
	req->ibytes = mif6table[mifi].m6_bytes_in;
	req->obytes = mif6table[mifi].m6_bytes_out;

	return (0);
	}

	static int
	set_pim6(int *i)
	{
	INIT_VNET_INET6(curvnet);
	if ((i != 1) && (i != 0))
	return (EINVAL);

	V_pim6 = *i;

	return (0);
	}

	/*
	* Enable multicast routing
	*/
	static int
	ip6_mrouter_init(struct socket *so, int v, int cmd)
	{
	INIT_VNET_INET6(curvnet);

	#ifdef MRT6DEBUG
	if (V_mrt6debug)
	log(LOG_DEBUG,
	"ip6_mrouter_init: so_type = %d, pr_protocol = %d\n",
	so->so_type, so->so_proto->pr_protocol);
	#endif

	if (so->so_type != SOCK_RAW \|\|
	so->so_proto->pr_protocol != IPPROTO_ICMPV6)
	return (EOPNOTSUPP);

	if (v != 1)
	return (ENOPROTOOPT);

	if (ip6_mrouter != NULL)
	return (EADDRINUSE);

	ip6_mrouter = so;
	V_ip6_mrouter_ver = cmd;

	bzero((caddr_t)mf6ctable, sizeof(mf6ctable));
	bzero((caddr_t)n6expire, sizeof(n6expire));

	V_pim6 = 0;/* used for stubbing out/in pim stuff */

	callout_init(&expire_upcalls_ch, 0);
	callout_reset(&expire_upcalls_ch, EXPIRE_TIMEOUT,
	expire_upcalls, NULL);

	#ifdef MRT6DEBUG
	if (V_mrt6debug)
	log(LOG_DEBUG, "ip6_mrouter_init\n");
	#endif

	return (0);
	}

	/*
	* Disable multicast routing
	*/
	int
	X_ip6_mrouter_done(void)
	{
	INIT_VNET_INET6(curvnet);
	mifi_t mifi;
	int i;
	struct mf6c *rt;
	struct rtdetq *rte;
	int s;

	s = splnet();

	/*
	* For each phyint in use, disable promiscuous reception of all IPv6
	* multicasts.
	*/
	#ifdef INET
	#ifdef MROUTING
	/*
	* If there is still IPv4 multicast routing daemon,
	* we remain interfaces to receive all muliticasted packets.
	* XXX: there may be an interface in which the IPv4 multicast
	* daemon is not interested...
	*/
	if (!V_ip_mrouter)
	#endif
	#endif
	{
	for (mifi = 0; mifi < nummifs; mifi++) {
	if (mif6table[mifi].m6_ifp &&
	!(mif6table[mifi].m6_flags & MIFF_REGISTER)) {
	if_allmulti(mif6table[mifi].m6_ifp, 0);
	}
	}
	}
	bzero((caddr_t)mif6table, sizeof(mif6table));
	nummifs = 0;

	V_pim6 = 0; /* used to stub out/in pim specific code */

	callout_stop(&expire_upcalls_ch);

	/*
	* Free all multicast forwarding cache entries.
	*/
	for (i = 0; i < MF6CTBLSIZ; i++) {
	rt = mf6ctable[i];
	while (rt) {
	struct mf6c *frt;

	for (rte = rt->mf6c_stall; rte != NULL; ) {
	struct rtdetq *n = rte->next;

	m_free(rte->m);
	free(rte, M_MRTABLE6);
	rte = n;
	}
	frt = rt;
	rt = rt->mf6c_next;
	free(frt, M_MRTABLE6);
	}
	}

	bzero((caddr_t)mf6ctable, sizeof(mf6ctable));

	/*
	* Reset register interface
	*/
	if (reg_mif_num != (mifi_t)-1 && multicast_register_if6 != NULL) {
	if_detach(multicast_register_if6);
	if_free(multicast_register_if6);
	reg_mif_num = (mifi_t)-1;
	multicast_register_if6 = NULL;
	}

	ip6_mrouter = NULL;
	V_ip6_mrouter_ver = 0;

	splx(s);

	#ifdef MRT6DEBUG
	if (V_mrt6debug)
	log(LOG_DEBUG, "ip6_mrouter_done\n");
	#endif

	return (0);
	}

	static struct sockaddr_in6 sin6 = { sizeof(sin6), AF_INET6 };

	/*
	* Add a mif to the mif table
	*/
	static int
	add_m6if(struct mif6ctl *mifcp)
	{
	INIT_VNET_NET(curvnet);
	struct mif6 *mifp;
	struct ifnet *ifp;
	int error, s;

	if (mifcp->mif6c_mifi >= MAXMIFS)
	return (EINVAL);
	mifp = mif6table + mifcp->mif6c_mifi;
	if (mifp->m6_ifp)
	return (EADDRINUSE); /* XXX: is it appropriate? */
	if (mifcp->mif6c_pifi == 0 \|\| mifcp->mif6c_pifi > V_if_index)
	return (ENXIO);
	ifp = ifnet_byindex(mifcp->mif6c_pifi);

	if (mifcp->mif6c_flags & MIFF_REGISTER) {
	if (reg_mif_num == (mifi_t)-1) {
	ifp = if_alloc(IFT_OTHER);

	if_initname(ifp, "register_mif", 0);
	ifp->if_flags \|= IFF_LOOPBACK;
	if_attach(ifp);
	multicast_register_if6 = ifp;
	reg_mif_num = mifcp->mif6c_mifi;
	/*
	* it is impossible to guess the ifindex of the
	* register interface. So mif6c_pifi is automatically
	* calculated.
	*/
	mifcp->mif6c_pifi = ifp->if_index;
	} else {
	ifp = multicast_register_if6;
	}

	} /* if REGISTER */
	else {
	/* Make sure the interface supports multicast */
	if ((ifp->if_flags & IFF_MULTICAST) == 0)
	return (EOPNOTSUPP);

	s = splnet();
	error = if_allmulti(ifp, 1);
	splx(s);
	if (error)
	return (error);
	}

	s = splnet();
	mifp->m6_flags = mifcp->mif6c_flags;
	mifp->m6_ifp = ifp;

	/* initialize per mif pkt counters */
	mifp->m6_pkt_in = 0;
	mifp->m6_pkt_out = 0;
	mifp->m6_bytes_in = 0;
	mifp->m6_bytes_out = 0;
	splx(s);

	/* Adjust nummifs up if the mifi is higher than nummifs */
	if (nummifs <= mifcp->mif6c_mifi)
	nummifs = mifcp->mif6c_mifi + 1;

	#ifdef MRT6DEBUG
	if (V_mrt6debug)
	log(LOG_DEBUG,
	"add_mif #%d, phyint %s\n",
	mifcp->mif6c_mifi,
	ifp->if_xname);
	#endif

	return (0);
	}

	/*
	* Delete a mif from the mif table
	*/
	static int
	del_m6if(mifi_t *mifip)
	{
	struct mif6 mifp = mif6table + mifip;
	mifi_t mifi;
	struct ifnet *ifp;
	int s;

	if (*mifip >= nummifs)
	return (EINVAL);
	if (mifp->m6_ifp == NULL)
	return (EINVAL);

	s = splnet();

	if (!(mifp->m6_flags & MIFF_REGISTER)) {
	/*
	* XXX: what if there is yet IPv4 multicast daemon
	* using the interface?
	*/
	ifp = mifp->m6_ifp;

	if_allmulti(ifp, 0);
	} else {
	if (reg_mif_num != (mifi_t)-1 &&
	multicast_register_if6 != NULL) {
	if_detach(multicast_register_if6);
	if_free(multicast_register_if6);
	reg_mif_num = (mifi_t)-1;
	multicast_register_if6 = NULL;
	}
	}

	bzero((caddr_t)mifp, sizeof(*mifp));

	/* Adjust nummifs down */
	for (mifi = nummifs; mifi > 0; mifi--)
	if (mif6table[mifi - 1].m6_ifp)
	break;
	nummifs = mifi;

	splx(s);

	#ifdef MRT6DEBUG
	if (V_mrt6debug)
	log(LOG_DEBUG, "del_m6if %d, nummifs %d\n", *mifip, nummifs);
	#endif

	return (0);
	}

	/*
	* Add an mfc entry
	*/
	static int
	add_m6fc(struct mf6cctl *mfccp)
	{
	struct mf6c *rt;
	u_long hash;
	struct rtdetq *rte;
	u_short nstl;
	int s;
	char ip6bufo[INET6_ADDRSTRLEN], ip6bufg[INET6_ADDRSTRLEN];

	MF6CFIND(mfccp->mf6cc_origin.sin6_addr,
	mfccp->mf6cc_mcastgrp.sin6_addr, rt);

	/* If an entry already exists, just update the fields */
	if (rt) {
	#ifdef MRT6DEBUG
	if (V_mrt6debug & DEBUG_MFC) {
	log(LOG_DEBUG,
	"add_m6fc no upcall h %d o %s g %s p %x\n",
	ip6_sprintf(ip6bufo, &mfccp->mf6cc_origin.sin6_addr),
	ip6_sprintf(ip6bufg, &mfccp->mf6cc_mcastgrp.sin6_addr),
	mfccp->mf6cc_parent);
	}
	#endif

	s = splnet();
	rt->mf6c_parent = mfccp->mf6cc_parent;
	rt->mf6c_ifset = mfccp->mf6cc_ifset;
	splx(s);
	return (0);
	}

	/*
	* Find the entry for which the upcall was made and update
	*/
	s = splnet();
	hash = MF6CHASH(mfccp->mf6cc_origin.sin6_addr,
	mfccp->mf6cc_mcastgrp.sin6_addr);
	for (rt = mf6ctable[hash], nstl = 0; rt; rt = rt->mf6c_next) {
	if (IN6_ARE_ADDR_EQUAL(&rt->mf6c_origin.sin6_addr,
	&mfccp->mf6cc_origin.sin6_addr) &&
	IN6_ARE_ADDR_EQUAL(&rt->mf6c_mcastgrp.sin6_addr,
	&mfccp->mf6cc_mcastgrp.sin6_addr) &&
	(rt->mf6c_stall != NULL)) {

	if (nstl++)
	log(LOG_ERR,
	"add_m6fc: %s o %s g %s p %x dbx %p\n",
	"multiple kernel entries",
	ip6_sprintf(ip6bufo,
	&mfccp->mf6cc_origin.sin6_addr),
	ip6_sprintf(ip6bufg,
	&mfccp->mf6cc_mcastgrp.sin6_addr),
	mfccp->mf6cc_parent, rt->mf6c_stall);

	#ifdef MRT6DEBUG
	if (V_mrt6debug & DEBUG_MFC)
	log(LOG_DEBUG,
	"add_m6fc o %s g %s p %x dbg %x\n",
	ip6_sprintf(ip6bufo,
	&mfccp->mf6cc_origin.sin6_addr),
	ip6_sprintf(ip6bufg,
	&mfccp->mf6cc_mcastgrp.sin6_addr),
	mfccp->mf6cc_parent, rt->mf6c_stall);
	#endif

	rt->mf6c_origin = mfccp->mf6cc_origin;
	rt->mf6c_mcastgrp = mfccp->mf6cc_mcastgrp;
	rt->mf6c_parent = mfccp->mf6cc_parent;
	rt->mf6c_ifset = mfccp->mf6cc_ifset;
	/* initialize pkt counters per src-grp */
	rt->mf6c_pkt_cnt = 0;
	rt->mf6c_byte_cnt = 0;
	rt->mf6c_wrong_if = 0;

	rt->mf6c_expire = 0; /* Don't clean this guy up */
	n6expire[hash]--;

	/* free packets Qed at the end of this entry */
	for (rte = rt->mf6c_stall; rte != NULL; ) {
	struct rtdetq *n = rte->next;
	ip6_mdq(rte->m, rte->ifp, rt);
	m_freem(rte->m);
	#ifdef UPCALL_TIMING
	collate(&(rte->t));
	#endif /* UPCALL_TIMING */
	free(rte, M_MRTABLE6);
	rte = n;
	}
	rt->mf6c_stall = NULL;
	}
	}

	/*
	* It is possible that an entry is being inserted without an upcall
	*/
	if (nstl == 0) {
	#ifdef MRT6DEBUG
	if (V_mrt6debug & DEBUG_MFC)
	log(LOG_DEBUG,
	"add_mfc no upcall h %d o %s g %s p %x\n",
	hash,
	ip6_sprintf(ip6bufo, &mfccp->mf6cc_origin.sin6_addr),
	ip6_sprintf(ip6bufg, &mfccp->mf6cc_mcastgrp.sin6_addr),
	mfccp->mf6cc_parent);
	#endif

	for (rt = mf6ctable[hash]; rt; rt = rt->mf6c_next) {

	if (IN6_ARE_ADDR_EQUAL(&rt->mf6c_origin.sin6_addr,
	&mfccp->mf6cc_origin.sin6_addr)&&
	IN6_ARE_ADDR_EQUAL(&rt->mf6c_mcastgrp.sin6_addr,
	&mfccp->mf6cc_mcastgrp.sin6_addr)) {

	rt->mf6c_origin = mfccp->mf6cc_origin;
	rt->mf6c_mcastgrp = mfccp->mf6cc_mcastgrp;
	rt->mf6c_parent = mfccp->mf6cc_parent;
	rt->mf6c_ifset = mfccp->mf6cc_ifset;
	/* initialize pkt counters per src-grp */
	rt->mf6c_pkt_cnt = 0;
	rt->mf6c_byte_cnt = 0;
	rt->mf6c_wrong_if = 0;

	if (rt->mf6c_expire)
	n6expire[hash]--;
	rt->mf6c_expire = 0;
	}
	}
	if (rt == NULL) {
	/* no upcall, so make a new entry */
	rt = (struct mf6c )malloc(sizeof(rt), M_MRTABLE6,
	M_NOWAIT);
	if (rt == NULL) {
	splx(s);
	return (ENOBUFS);
	}

	/* insert new entry at head of hash chain */
	rt->mf6c_origin = mfccp->mf6cc_origin;
	rt->mf6c_mcastgrp = mfccp->mf6cc_mcastgrp;
	rt->mf6c_parent = mfccp->mf6cc_parent;
	rt->mf6c_ifset = mfccp->mf6cc_ifset;
	/* initialize pkt counters per src-grp */
	rt->mf6c_pkt_cnt = 0;
	rt->mf6c_byte_cnt = 0;
	rt->mf6c_wrong_if = 0;
	rt->mf6c_expire = 0;
	rt->mf6c_stall = NULL;

	/* link into table */
	rt->mf6c_next = mf6ctable[hash];
	mf6ctable[hash] = rt;
	}
	}
	splx(s);
	return (0);
	}

	#ifdef UPCALL_TIMING
	/*
	* collect delay statistics on the upcalls
	*/
	static void
	collate(struct timeval *t)
	{
	u_long d;
	struct timeval tp;
	u_long delta;

	GET_TIME(tp);

	if (TV_LT(*t, tp))
	{
	TV_DELTA(tp, *t, delta);

	d = delta >> 10;
	if (d > UPCALL_MAX)
	d = UPCALL_MAX;

	++upcall_data[d];
	}
	}
	#endif /* UPCALL_TIMING */

	/*
	* Delete an mfc entry
	*/
	static int
	del_m6fc(struct mf6cctl *mfccp)
	{
	struct sockaddr_in6 origin;
	struct sockaddr_in6 mcastgrp;
	struct mf6c *rt;
	struct mf6c **nptr;
	u_long hash;
	int s;

	origin = mfccp->mf6cc_origin;
	mcastgrp = mfccp->mf6cc_mcastgrp;
	hash = MF6CHASH(origin.sin6_addr, mcastgrp.sin6_addr);

	#ifdef MRT6DEBUG
	if (V_mrt6debug & DEBUG_MFC) {
	char ip6bufo[INET6_ADDRSTRLEN], ip6bufg[INET6_ADDRSTRLEN];
	log(LOG_DEBUG,"del_m6fc orig %s mcastgrp %s\n",
	ip6_sprintf(ip6bufo, &origin.sin6_addr),
	ip6_sprintf(ip6bufg, &mcastgrp.sin6_addr));
	}
	#endif

	s = splnet();

	nptr = &mf6ctable[hash];
	while ((rt = *nptr) != NULL) {
	if (IN6_ARE_ADDR_EQUAL(&origin.sin6_addr,
	&rt->mf6c_origin.sin6_addr) &&
	IN6_ARE_ADDR_EQUAL(&mcastgrp.sin6_addr,
	&rt->mf6c_mcastgrp.sin6_addr) &&
	rt->mf6c_stall == NULL)
	break;

	nptr = &rt->mf6c_next;
	}
	if (rt == NULL) {
	splx(s);
	return (EADDRNOTAVAIL);
	}

	*nptr = rt->mf6c_next;
	free(rt, M_MRTABLE6);

	splx(s);

	return (0);
	}

	static int
	socket_send(struct socket s, struct mbuf mm, struct sockaddr_in6 *src)
	{

	if (s) {
	if (sbappendaddr(&s->so_rcv,
	(struct sockaddr *)src,
	mm, (struct mbuf *)0) != 0) {
	sorwakeup(s);
	return (0);
	}
	}
	m_freem(mm);
	return (-1);
	}

	/*
	* IPv6 multicast forwarding function. This function assumes that the packet
	* pointed to by "ip6" has arrived on (or is about to be sent to) the interface
	* pointed to by "ifp", and the packet is to be relayed to other networks
	* that have members of the packet's destination IPv6 multicast group.
	*
	* The packet is returned unscathed to the caller, unless it is
	* erroneous, in which case a non-zero return value tells the caller to
	* discard it.
	*
	* NOTE: this implementation assumes that m->m_pkthdr.rcvif is NULL iff
	* this function is called in the originating context (i.e., not when
	* forwarding a packet from other node). ip6_output(), which is currently the
	* only function that calls this function is called in the originating context,
	* explicitly ensures this condition. It is caller's responsibility to ensure
	* that if this function is called from somewhere else in the originating
	* context in the future.
	*/
	int
	X_ip6_mforward(struct ip6_hdr ip6, struct ifnet ifp, struct mbuf *m)
	{
	INIT_VNET_INET6(curvnet);
	struct mf6c *rt;
	struct mif6 *mifp;
	struct mbuf *mm;
	int s;
	mifi_t mifi;
	char ip6bufs[INET6_ADDRSTRLEN], ip6bufd[INET6_ADDRSTRLEN];

	#ifdef MRT6DEBUG
	if (V_mrt6debug & DEBUG_FORWARD)
	log(LOG_DEBUG, "ip6_mforward: src %s, dst %s, ifindex %d\n",
	ip6_sprintf(ip6bufs, &ip6->ip6_src),
	ip6_sprintf(ip6bufd, &ip6->ip6_dst),
	ifp->if_index);
	#endif

	/*
	* Don't forward a packet with Hop limit of zero or one,
	* or a packet destined to a local-only group.
	*/
	if (ip6->ip6_hlim <= 1 \|\| IN6_IS_ADDR_MC_INTFACELOCAL(&ip6->ip6_dst) \|\|
	IN6_IS_ADDR_MC_LINKLOCAL(&ip6->ip6_dst))
	return (0);
	ip6->ip6_hlim--;

	/*
	* Source address check: do not forward packets with unspecified
	* source. It was discussed in July 2000, on ipngwg mailing list.
	* This is rather more serious than unicast cases, because some
	* MLD packets can be sent with the unspecified source address
	* (although such packets must normally set 1 to the hop limit field).
	*/
	if (IN6_IS_ADDR_UNSPECIFIED(&ip6->ip6_src)) {
	V_ip6stat.ip6s_cantforward++;
	if (V_ip6_log_time + V_ip6_log_interval < time_second) {
	V_ip6_log_time = time_second;
	log(LOG_DEBUG,
	"cannot forward "
	"from %s to %s nxt %d received on %s\n",
	ip6_sprintf(ip6bufs, &ip6->ip6_src),
	ip6_sprintf(ip6bufd, &ip6->ip6_dst),
	ip6->ip6_nxt,
	if_name(m->m_pkthdr.rcvif));
	}
	return (0);
	}

	/*
	* Determine forwarding mifs from the forwarding cache table
	*/
	s = splnet();
	MF6CFIND(ip6->ip6_src, ip6->ip6_dst, rt);

	/* Entry exists, so forward if necessary */
	if (rt) {
	splx(s);
	return (ip6_mdq(m, ifp, rt));
	} else {
	/*
	* If we don't have a route for packet's origin,
	* Make a copy of the packet &
	* send message to routing daemon
	*/

	struct mbuf *mb0;
	struct rtdetq *rte;
	u_long hash;
	/* int i, npkts;*/
	#ifdef UPCALL_TIMING
	struct timeval tp;

	GET_TIME(tp);
	#endif /* UPCALL_TIMING */

	mrt6stat.mrt6s_no_route++;
	#ifdef MRT6DEBUG
	if (V_mrt6debug & (DEBUG_FORWARD \| DEBUG_MFC))
	log(LOG_DEBUG, "ip6_mforward: no rte s %s g %s\n",
	ip6_sprintf(ip6bufs, &ip6->ip6_src),
	ip6_sprintf(ip6bufd, &ip6->ip6_dst));
	#endif

	/*
	* Allocate mbufs early so that we don't do extra work if we
	* are just going to fail anyway.
	*/
	rte = (struct rtdetq )malloc(sizeof(rte), M_MRTABLE6,
	M_NOWAIT);
	if (rte == NULL) {
	splx(s);
	return (ENOBUFS);
	}
	mb0 = m_copy(m, 0, M_COPYALL);
	/*
	* Pullup packet header if needed before storing it,
	* as other references may modify it in the meantime.
	*/
	if (mb0 &&
	(M_HASCL(mb0) \|\| mb0->m_len < sizeof(struct ip6_hdr)))
	mb0 = m_pullup(mb0, sizeof(struct ip6_hdr));
	if (mb0 == NULL) {
	free(rte, M_MRTABLE6);
	splx(s);
	return (ENOBUFS);
	}

	/* is there an upcall waiting for this packet? */
	hash = MF6CHASH(ip6->ip6_src, ip6->ip6_dst);
	for (rt = mf6ctable[hash]; rt; rt = rt->mf6c_next) {
	if (IN6_ARE_ADDR_EQUAL(&ip6->ip6_src,
	&rt->mf6c_origin.sin6_addr) &&
	IN6_ARE_ADDR_EQUAL(&ip6->ip6_dst,
	&rt->mf6c_mcastgrp.sin6_addr) &&
	(rt->mf6c_stall != NULL))
	break;
	}

	if (rt == NULL) {
	struct mrt6msg *im;
	#ifdef MRT6_OINIT
	struct omrt6msg *oim;
	#endif

	/* no upcall, so make a new entry */
	rt = (struct mf6c )malloc(sizeof(rt), M_MRTABLE6,
	M_NOWAIT);
	if (rt == NULL) {
	free(rte, M_MRTABLE6);
	m_freem(mb0);
	splx(s);
	return (ENOBUFS);
	}
	/*
	* Make a copy of the header to send to the user
	* level process
	*/
	mm = m_copy(mb0, 0, sizeof(struct ip6_hdr));

	if (mm == NULL) {
	free(rte, M_MRTABLE6);
	m_freem(mb0);
	free(rt, M_MRTABLE6);
	splx(s);
	return (ENOBUFS);
	}

	/*
	* Send message to routing daemon
	*/
	sin6.sin6_addr = ip6->ip6_src;

	im = NULL;
	#ifdef MRT6_OINIT
	oim = NULL;
	#endif
	switch (V_ip6_mrouter_ver) {
	#ifdef MRT6_OINIT
	case MRT6_OINIT:
	oim = mtod(mm, struct omrt6msg *);
	oim->im6_msgtype = MRT6MSG_NOCACHE;
	oim->im6_mbz = 0;
	break;
	#endif
	case MRT6_INIT:
	im = mtod(mm, struct mrt6msg *);
	im->im6_msgtype = MRT6MSG_NOCACHE;
	im->im6_mbz = 0;
	break;
	default:
	free(rte, M_MRTABLE6);
	m_freem(mb0);
	free(rt, M_MRTABLE6);
	splx(s);
	return (EINVAL);
	}

	#ifdef MRT6DEBUG
	if (V_mrt6debug & DEBUG_FORWARD)
	log(LOG_DEBUG,
	"getting the iif info in the kernel\n");
	#endif

	for (mifp = mif6table, mifi = 0;
	mifi < nummifs && mifp->m6_ifp != ifp;
	mifp++, mifi++)
	;

	switch (V_ip6_mrouter_ver) {
	#ifdef MRT6_OINIT
	case MRT6_OINIT:
	oim->im6_mif = mifi;
	break;
	#endif
	case MRT6_INIT:
	im->im6_mif = mifi;
	break;
	}

	if (socket_send(ip6_mrouter, mm, &sin6) < 0) {
	log(LOG_WARNING, "ip6_mforward: ip6_mrouter "
	"socket queue full\n");
	mrt6stat.mrt6s_upq_sockfull++;
	free(rte, M_MRTABLE6);
	m_freem(mb0);
	free(rt, M_MRTABLE6);
	splx(s);
	return (ENOBUFS);
	}

	mrt6stat.mrt6s_upcalls++;

	/* insert new entry at head of hash chain */
	bzero(rt, sizeof(*rt));
	rt->mf6c_origin.sin6_family = AF_INET6;
	rt->mf6c_origin.sin6_len = sizeof(struct sockaddr_in6);
	rt->mf6c_origin.sin6_addr = ip6->ip6_src;
	rt->mf6c_mcastgrp.sin6_family = AF_INET6;
	rt->mf6c_mcastgrp.sin6_len = sizeof(struct sockaddr_in6);
	rt->mf6c_mcastgrp.sin6_addr = ip6->ip6_dst;
	rt->mf6c_expire = UPCALL_EXPIRE;
	n6expire[hash]++;
	rt->mf6c_parent = MF6C_INCOMPLETE_PARENT;

	/* link into table */
	rt->mf6c_next = mf6ctable[hash];
	mf6ctable[hash] = rt;
	/* Add this entry to the end of the queue */
	rt->mf6c_stall = rte;
	} else {
	/* determine if q has overflowed */
	struct rtdetq **p;
	int npkts = 0;

	for (p = &rt->mf6c_stall; p != NULL; p = &(p)->next)
	if (++npkts > MAX_UPQ6) {
	mrt6stat.mrt6s_upq_ovflw++;
	free(rte, M_MRTABLE6);
	m_freem(mb0);
	splx(s);
	return (0);
	}

	/* Add this entry to the end of the queue */
	*p = rte;
	}

	rte->next = NULL;
	rte->m = mb0;
	rte->ifp = ifp;
	#ifdef UPCALL_TIMING
	rte->t = tp;
	#endif /* UPCALL_TIMING */

	splx(s);

	return (0);
	}
	}

	/*
	* Clean up cache entries if upcalls are not serviced
	* Call from the Slow Timeout mechanism, every half second.
	*/
	static void
	expire_upcalls(void *unused)
	{
	struct rtdetq *rte;
	struct mf6c mfc, *nptr;
	int i;
	int s;

	s = splnet();
	for (i = 0; i < MF6CTBLSIZ; i++) {
	if (n6expire[i] == 0)
	continue;
	nptr = &mf6ctable[i];
	while ((mfc = *nptr) != NULL) {
	rte = mfc->mf6c_stall;
	/*
	* Skip real cache entries
	* Make sure it wasn't marked to not expire (shouldn't happen)
	* If it expires now
	*/
	if (rte != NULL &&
	mfc->mf6c_expire != 0 &&
	--mfc->mf6c_expire == 0) {
	#ifdef MRT6DEBUG
	if (V_mrt6debug & DEBUG_EXPIRE) {
	char ip6bufo[INET6_ADDRSTRLEN];
	char ip6bufg[INET6_ADDRSTRLEN];
	log(LOG_DEBUG, "expire_upcalls: expiring (%s %s)\n",
	ip6_sprintf(ip6bufo, &mfc->mf6c_origin.sin6_addr),
	ip6_sprintf(ip6bufg, &mfc->mf6c_mcastgrp.sin6_addr));
	}
	#endif
	/*
	* drop all the packets
	* free the mbuf with the pkt, if, timing info
	*/
	do {
	struct rtdetq *n = rte->next;
	m_freem(rte->m);
	free(rte, M_MRTABLE6);
	rte = n;
	} while (rte != NULL);
	mrt6stat.mrt6s_cache_cleanups++;
	n6expire[i]--;

	*nptr = mfc->mf6c_next;
	free(mfc, M_MRTABLE6);
	} else {
	nptr = &mfc->mf6c_next;
	}
	}
	}
	splx(s);
	callout_reset(&expire_upcalls_ch, EXPIRE_TIMEOUT,
	expire_upcalls, NULL);
	}

	/*
	* Packet forwarding routine once entry in the cache is made
	*/
	static int
	ip6_mdq(struct mbuf m, struct ifnet ifp, struct mf6c *rt)
	{
	INIT_VNET_INET6(curvnet);
	struct ip6_hdr ip6 = mtod(m, struct ip6_hdr );
	mifi_t mifi, iif;
	struct mif6 *mifp;
	int plen = m->m_pkthdr.len;
	struct in6_addr src0, dst0; /* copies for local work */
	u_int32_t iszone, idzone, oszone, odzone;
	int error = 0;

	/*
	* Macro to send packet on mif. Since RSVP packets don't get counted on
	* input, they shouldn't get counted on output, so statistics keeping is
	* separate.
	*/

	#define MC6_SEND(ip6, mifp, m) do { \
	if ((mifp)->m6_flags & MIFF_REGISTER) \
	register_send((ip6), (mifp), (m)); \
	else \
	phyint_send((ip6), (mifp), (m)); \
	} while (/CONSTCOND/ 0)

	/*
	* Don't forward if it didn't arrive from the parent mif
	* for its origin.
	*/
	mifi = rt->mf6c_parent;
	if ((mifi >= nummifs) \|\| (mif6table[mifi].m6_ifp != ifp)) {
	/* came in the wrong interface */
	#ifdef MRT6DEBUG
	if (V_mrt6debug & DEBUG_FORWARD)
	log(LOG_DEBUG,
	"wrong if: ifid %d mifi %d mififid %x\n",
	ifp->if_index, mifi,
	mif6table[mifi].m6_ifp->if_index);
	#endif
	mrt6stat.mrt6s_wrong_if++;
	rt->mf6c_wrong_if++;
	/*
	* If we are doing PIM processing, and we are forwarding
	* packets on this interface, send a message to the
	* routing daemon.
	*/
	/* have to make sure this is a valid mif */
	if (mifi < nummifs && mif6table[mifi].m6_ifp)
	if (V_pim6 && (m->m_flags & M_LOOP) == 0) {
	/*
	* Check the M_LOOP flag to avoid an
	* unnecessary PIM assert.
	* XXX: M_LOOP is an ad-hoc hack...
	*/
	static struct sockaddr_in6 sin6 =
	{ sizeof(sin6), AF_INET6 };

	struct mbuf *mm;
	struct mrt6msg *im;
	#ifdef MRT6_OINIT
	struct omrt6msg *oim;
	#endif

	mm = m_copy(m, 0, sizeof(struct ip6_hdr));
	if (mm &&
	(M_HASCL(mm) \|\|
	mm->m_len < sizeof(struct ip6_hdr)))
	mm = m_pullup(mm, sizeof(struct ip6_hdr));
	if (mm == NULL)
	return (ENOBUFS);

	#ifdef MRT6_OINIT
	oim = NULL;
	#endif
	im = NULL;
	switch (V_ip6_mrouter_ver) {
	#ifdef MRT6_OINIT
	case MRT6_OINIT:
	oim = mtod(mm, struct omrt6msg *);
	oim->im6_msgtype = MRT6MSG_WRONGMIF;
	oim->im6_mbz = 0;
	break;
	#endif
	case MRT6_INIT:
	im = mtod(mm, struct mrt6msg *);
	im->im6_msgtype = MRT6MSG_WRONGMIF;
	im->im6_mbz = 0;
	break;
	default:
	m_freem(mm);
	return (EINVAL);
	}

	for (mifp = mif6table, iif = 0;
	iif < nummifs && mifp &&
	mifp->m6_ifp != ifp;
	mifp++, iif++)
	;

	switch (V_ip6_mrouter_ver) {
	#ifdef MRT6_OINIT
	case MRT6_OINIT:
	oim->im6_mif = iif;
	sin6.sin6_addr = oim->im6_src;
	break;
	#endif
	case MRT6_INIT:
	im->im6_mif = iif;
	sin6.sin6_addr = im->im6_src;
	break;
	}

	mrt6stat.mrt6s_upcalls++;

	if (socket_send(ip6_mrouter, mm, &sin6) < 0) {
	#ifdef MRT6DEBUG
	if (V_mrt6debug)
	log(LOG_WARNING, "mdq, ip6_mrouter socket queue full\n");
	#endif
	++mrt6stat.mrt6s_upq_sockfull;
	return (ENOBUFS);
	} /* if socket Q full */
	} /* if PIM */
	return (0);
	} /* if wrong iif */

	/* If I sourced this packet, it counts as output, else it was input. */
	if (m->m_pkthdr.rcvif == NULL) {
	/* XXX: is rcvif really NULL when output?? */
	mif6table[mifi].m6_pkt_out++;
	mif6table[mifi].m6_bytes_out += plen;
	} else {
	mif6table[mifi].m6_pkt_in++;
	mif6table[mifi].m6_bytes_in += plen;
	}
	rt->mf6c_pkt_cnt++;
	rt->mf6c_byte_cnt += plen;

	/*
	* For each mif, forward a copy of the packet if there are group
	* members downstream on the interface.
	*/
	src0 = ip6->ip6_src;
	dst0 = ip6->ip6_dst;
	if ((error = in6_setscope(&src0, ifp, &iszone)) != 0 \|\|
	(error = in6_setscope(&dst0, ifp, &idzone)) != 0) {
	V_ip6stat.ip6s_badscope++;
	return (error);
	}
	for (mifp = mif6table, mifi = 0; mifi < nummifs; mifp++, mifi++) {
	if (IF_ISSET(mifi, &rt->mf6c_ifset)) {
	/*
	* check if the outgoing packet is going to break
	* a scope boundary.
	* XXX For packets through PIM register tunnel
	* interface, we believe a routing daemon.
	*/
	if (!(mif6table[rt->mf6c_parent].m6_flags &
	MIFF_REGISTER) &&
	!(mif6table[mifi].m6_flags & MIFF_REGISTER)) {
	if (in6_setscope(&src0, mif6table[mifi].m6_ifp,
	&oszone) \|\|
	in6_setscope(&dst0, mif6table[mifi].m6_ifp,
	&odzone) \|\|
	iszone != oszone \|\|
	idzone != odzone) {
	V_ip6stat.ip6s_badscope++;
	continue;
	}
	}

	mifp->m6_pkt_out++;
	mifp->m6_bytes_out += plen;
	MC6_SEND(ip6, mifp, m);
	}
	}
	return (0);
	}

	static void
	phyint_send(struct ip6_hdr ip6, struct mif6 mifp, struct mbuf *m)
	{
	INIT_VNET_INET6(curvnet);
	struct mbuf *mb_copy;
	struct ifnet *ifp = mifp->m6_ifp;
	int error = 0;
	int s = splnet(); /* needs to protect static "ro" below. */
	static struct route_in6 ro;
	struct in6_multi *in6m;
	struct sockaddr_in6 *dst6;
	u_long linkmtu;

	/*
	* Make a new reference to the packet; make sure that
	* the IPv6 header is actually copied, not just referenced,
	* so that ip6_output() only scribbles on the copy.
	*/
	mb_copy = m_copy(m, 0, M_COPYALL);
	if (mb_copy &&
	(M_HASCL(mb_copy) \|\| mb_copy->m_len < sizeof(struct ip6_hdr)))
	mb_copy = m_pullup(mb_copy, sizeof(struct ip6_hdr));
	if (mb_copy == NULL) {
	splx(s);
	return;
	}
	/* set MCAST flag to the outgoing packet */
	mb_copy->m_flags \|= M_MCAST;

	/*
	* If we sourced the packet, call ip6_output since we may devide
	* the packet into fragments when the packet is too big for the
	* outgoing interface.
	* Otherwise, we can simply send the packet to the interface
	* sending queue.
	*/
	if (m->m_pkthdr.rcvif == NULL) {
	struct ip6_moptions im6o;

	im6o.im6o_multicast_ifp = ifp;
	/* XXX: ip6_output will override ip6->ip6_hlim */
	im6o.im6o_multicast_hlim = ip6->ip6_hlim;
	im6o.im6o_multicast_loop = 1;
	error = ip6_output(mb_copy, NULL, &ro,
	IPV6_FORWARDING, &im6o, NULL, NULL);

	#ifdef MRT6DEBUG
	if (V_mrt6debug & DEBUG_XMIT)
	log(LOG_DEBUG, "phyint_send on mif %d err %d\n",
	mifp - mif6table, error);
	#endif
	splx(s);
	return;
	}

	/*
	* If we belong to the destination multicast group
	* on the outgoing interface, loop back a copy.
	*/
	dst6 = (struct sockaddr_in6 *)&ro.ro_dst;
	IN6_LOOKUP_MULTI(ip6->ip6_dst, ifp, in6m);
	if (in6m != NULL) {
	dst6->sin6_len = sizeof(struct sockaddr_in6);
	dst6->sin6_family = AF_INET6;
	dst6->sin6_addr = ip6->ip6_dst;
	ip6_mloopback(ifp, m, (struct sockaddr_in6 *)&ro.ro_dst);
	}
	/*
	* Put the packet into the sending queue of the outgoing interface
	* if it would fit in the MTU of the interface.
	*/
	linkmtu = IN6_LINKMTU(ifp);
	if (mb_copy->m_pkthdr.len <= linkmtu \|\| linkmtu < IPV6_MMTU) {
	dst6->sin6_len = sizeof(struct sockaddr_in6);
	dst6->sin6_family = AF_INET6;
	dst6->sin6_addr = ip6->ip6_dst;
	/*
	* We just call if_output instead of nd6_output here, since
	* we need no ND for a multicast forwarded packet...right?
	*/
	- IF_AFDATA_LOCK(ifp);
	error = (*ifp->if_output)(ifp, mb_copy,
	(struct sockaddr *)&ro.ro_dst, NULL);
	- IF_AFDATA_UNLOCK(ifp);
	#ifdef MRT6DEBUG
	if (V_mrt6debug & DEBUG_XMIT)
	log(LOG_DEBUG, "phyint_send on mif %d err %d\n",
	mifp - mif6table, error);
	#endif
	} else {
	/*
	* pMTU discovery is intentionally disabled by default, since
	* various router may notify pMTU in multicast, which can be
	* a DDoS to a router
	*/
	if (V_ip6_mcast_pmtu)
	icmp6_error(mb_copy, ICMP6_PACKET_TOO_BIG, 0, linkmtu);
	else {
	#ifdef MRT6DEBUG
	if (V_mrt6debug & DEBUG_XMIT) {
	char ip6bufs[INET6_ADDRSTRLEN];
	char ip6bufd[INET6_ADDRSTRLEN];
	log(LOG_DEBUG,
	"phyint_send: packet too big on %s o %s "
	"g %s size %d(discarded)\n",
	if_name(ifp),
	ip6_sprintf(ip6bufs, &ip6->ip6_src),
	ip6_sprintf(ip6bufd, &ip6->ip6_dst),
	mb_copy->m_pkthdr.len);
	}
	#endif /* MRT6DEBUG */
	m_freem(mb_copy); /* simply discard the packet */
	}
	}

	splx(s);
	}

	static int
	register_send(struct ip6_hdr ip6, struct mif6 mif, struct mbuf *m)
	{
	struct mbuf *mm;
	int i, len = m->m_pkthdr.len;
	static struct sockaddr_in6 sin6 = { sizeof(sin6), AF_INET6 };
	struct mrt6msg *im6;

	#ifdef MRT6DEBUG
	if (V_mrt6debug) {
	char ip6bufs[INET6_ADDRSTRLEN], ip6bufd[INET6_ADDRSTRLEN];
	log(LOG_DEBUG, " IPv6 register_send \n src %s dst %s\n",
	ip6_sprintf(ip6bufs, &ip6->ip6_src),
	ip6_sprintf(ip6bufd, &ip6->ip6_dst));
	}
	#endif
	++pim6stat.pim6s_snd_registers;

	/* Make a copy of the packet to send to the user level process */
	MGETHDR(mm, M_DONTWAIT, MT_HEADER);
	if (mm == NULL)
	return (ENOBUFS);
	mm->m_pkthdr.rcvif = NULL;
	mm->m_data += max_linkhdr;
	mm->m_len = sizeof(struct ip6_hdr);

	if ((mm->m_next = m_copy(m, 0, M_COPYALL)) == NULL) {
	m_freem(mm);
	return (ENOBUFS);
	}
	i = MHLEN - M_LEADINGSPACE(mm);
	if (i > len)
	i = len;
	mm = m_pullup(mm, i);
	if (mm == NULL)
	return (ENOBUFS);
	/* TODO: check it! */
	mm->m_pkthdr.len = len + sizeof(struct ip6_hdr);

	/*
	* Send message to routing daemon
	*/
	sin6.sin6_addr = ip6->ip6_src;

	im6 = mtod(mm, struct mrt6msg *);
	im6->im6_msgtype = MRT6MSG_WHOLEPKT;
	im6->im6_mbz = 0;

	im6->im6_mif = mif - mif6table;

	/* iif info is not given for reg. encap.n */
	mrt6stat.mrt6s_upcalls++;

	if (socket_send(ip6_mrouter, mm, &sin6) < 0) {
	#ifdef MRT6DEBUG
	if (V_mrt6debug)
	log(LOG_WARNING,
	"register_send: ip6_mrouter socket queue full\n");
	#endif
	++mrt6stat.mrt6s_upq_sockfull;
	return (ENOBUFS);
	}
	return (0);
	}

	/*
	* PIM sparse mode hook
	* Receives the pim control messages, and passes them up to the listening
	* socket, using rip6_input.
	* The only message processed is the REGISTER pim message; the pim header
	* is stripped off, and the inner packet is passed to register_mforward.
	*/
	int
	pim6_input(struct mbuf *mp, int offp, int proto)
	{
	INIT_VNET_INET6(curvnet);
	struct pim pim; / pointer to a pim struct */
	struct ip6_hdr *ip6;
	int pimlen;
	struct mbuf m = mp;
	int minlen;
	int off = *offp;

	++pim6stat.pim6s_rcv_total;

	ip6 = mtod(m, struct ip6_hdr *);
	pimlen = m->m_pkthdr.len - *offp;

	/*
	* Validate lengths
	*/
	if (pimlen < PIM_MINLEN) {
	++pim6stat.pim6s_rcv_tooshort;
	#ifdef MRT6DEBUG
	if (V_mrt6debug & DEBUG_PIM)
	log(LOG_DEBUG,"pim6_input: PIM packet too short\n");
	#endif
	m_freem(m);
	return (IPPROTO_DONE);
	}

	/*
	* if the packet is at least as big as a REGISTER, go ahead
	* and grab the PIM REGISTER header size, to avoid another
	* possible m_pullup() later.
	*
	* PIM_MINLEN == pimhdr + u_int32 == 8
	* PIM6_REG_MINLEN == pimhdr + reghdr + eip6hdr == 4 + 4 + 40
	*/
	minlen = (pimlen >= PIM6_REG_MINLEN) ? PIM6_REG_MINLEN : PIM_MINLEN;

	/*
	* Make sure that the IP6 and PIM headers in contiguous memory, and
	* possibly the PIM REGISTER header
	*/
	#ifndef PULLDOWN_TEST
	IP6_EXTHDR_CHECK(m, off, minlen, IPPROTO_DONE);
	/* adjust pointer */
	ip6 = mtod(m, struct ip6_hdr *);

	/* adjust mbuf to point to the PIM header */
	pim = (struct pim *)((caddr_t)ip6 + off);
	#else
	IP6_EXTHDR_GET(pim, struct pim *, m, off, minlen);
	if (pim == NULL) {
	pim6stat.pim6s_rcv_tooshort++;
	return (IPPROTO_DONE);
	}
	#endif

	#define PIM6_CHECKSUM
	#ifdef PIM6_CHECKSUM
	{
	int cksumlen;

	/*
	* Validate checksum.
	* If PIM REGISTER, exclude the data packet
	*/
	if (pim->pim_type == PIM_REGISTER)
	cksumlen = PIM_MINLEN;
	else
	cksumlen = pimlen;

	if (in6_cksum(m, IPPROTO_PIM, off, cksumlen)) {
	++pim6stat.pim6s_rcv_badsum;
	#ifdef MRT6DEBUG
	if (V_mrt6debug & DEBUG_PIM)
	log(LOG_DEBUG,
	"pim6_input: invalid checksum\n");
	#endif
	m_freem(m);
	return (IPPROTO_DONE);
	}
	}
	#endif /* PIM_CHECKSUM */

	/* PIM version check */
	if (pim->pim_ver != PIM_VERSION) {
	++pim6stat.pim6s_rcv_badversion;
	#ifdef MRT6DEBUG
	log(LOG_ERR,
	"pim6_input: incorrect version %d, expecting %d\n",
	pim->pim_ver, PIM_VERSION);
	#endif
	m_freem(m);
	return (IPPROTO_DONE);
	}

	if (pim->pim_type == PIM_REGISTER) {
	/*
	* since this is a REGISTER, we'll make a copy of the register
	* headers ip6+pim+u_int32_t+encap_ip6, to be passed up to the
	* routing daemon.
	*/
	static struct sockaddr_in6 dst = { sizeof(dst), AF_INET6 };

	struct mbuf *mcp;
	struct ip6_hdr *eip6;
	u_int32_t *reghdr;
	int rc;
	#ifdef MRT6DEBUG
	char ip6bufs[INET6_ADDRSTRLEN], ip6bufd[INET6_ADDRSTRLEN];
	#endif

	++pim6stat.pim6s_rcv_registers;

	if ((reg_mif_num >= nummifs) \|\| (reg_mif_num == (mifi_t) -1)) {
	#ifdef MRT6DEBUG
	if (V_mrt6debug & DEBUG_PIM)
	log(LOG_DEBUG,
	"pim6_input: register mif not set: %d\n",
	reg_mif_num);
	#endif
	m_freem(m);
	return (IPPROTO_DONE);
	}

	reghdr = (u_int32_t *)(pim + 1);

	if ((ntohl(*reghdr) & PIM_NULL_REGISTER))
	goto pim6_input_to_daemon;

	/*
	* Validate length
	*/
	if (pimlen < PIM6_REG_MINLEN) {
	++pim6stat.pim6s_rcv_tooshort;
	++pim6stat.pim6s_rcv_badregisters;
	#ifdef MRT6DEBUG
	log(LOG_ERR,
	"pim6_input: register packet size too "
	"small %d from %s\n",
	pimlen, ip6_sprintf(ip6bufs, &ip6->ip6_src));
	#endif
	m_freem(m);
	return (IPPROTO_DONE);
	}

	eip6 = (struct ip6_hdr *) (reghdr + 1);
	#ifdef MRT6DEBUG
	if (V_mrt6debug & DEBUG_PIM)
	log(LOG_DEBUG,
	"pim6_input[register], eip6: %s -> %s, "
	"eip6 plen %d\n",
	ip6_sprintf(ip6bufs, &eip6->ip6_src),
	ip6_sprintf(ip6bufd, &eip6->ip6_dst),
	ntohs(eip6->ip6_plen));
	#endif

	/* verify the version number of the inner packet */
	if ((eip6->ip6_vfc & IPV6_VERSION_MASK) != IPV6_VERSION) {
	++pim6stat.pim6s_rcv_badregisters;
	#ifdef MRT6DEBUG
	log(LOG_DEBUG, "pim6_input: invalid IP version (%d) "
	"of the inner packet\n",
	(eip6->ip6_vfc & IPV6_VERSION));
	#endif
	m_freem(m);
	return (IPPROTO_NONE);
	}

	/* verify the inner packet is destined to a mcast group */
	if (!IN6_IS_ADDR_MULTICAST(&eip6->ip6_dst)) {
	++pim6stat.pim6s_rcv_badregisters;
	#ifdef MRT6DEBUG
	if (V_mrt6debug & DEBUG_PIM)
	log(LOG_DEBUG,
	"pim6_input: inner packet of register "
	"is not multicast %s\n",
	ip6_sprintf(ip6bufd, &eip6->ip6_dst));
	#endif
	m_freem(m);
	return (IPPROTO_DONE);
	}

	/*
	* make a copy of the whole header to pass to the daemon later.
	*/
	mcp = m_copy(m, 0, off + PIM6_REG_MINLEN);
	if (mcp == NULL) {
	#ifdef MRT6DEBUG
	log(LOG_ERR,
	"pim6_input: pim register: "
	"could not copy register head\n");
	#endif
	m_freem(m);
	return (IPPROTO_DONE);
	}

	/*
	* forward the inner ip6 packet; point m_data at the inner ip6.
	*/
	m_adj(m, off + PIM_MINLEN);
	#ifdef MRT6DEBUG
	if (V_mrt6debug & DEBUG_PIM) {
	log(LOG_DEBUG,
	"pim6_input: forwarding decapsulated register: "
	"src %s, dst %s, mif %d\n",
	ip6_sprintf(ip6bufs, &eip6->ip6_src),
	ip6_sprintf(ip6bufd, &eip6->ip6_dst),
	reg_mif_num);
	}
	#endif

	rc = if_simloop(mif6table[reg_mif_num].m6_ifp, m,
	dst.sin6_family, 0);

	/* prepare the register head to send to the mrouting daemon */
	m = mcp;
	}

	/*
	* Pass the PIM message up to the daemon; if it is a register message
	* pass the 'head' only up to the daemon. This includes the
	* encapsulator ip6 header, pim header, register header and the
	* encapsulated ip6 header.
	*/
	pim6_input_to_daemon:
	rip6_input(&m, offp, proto);
	return (IPPROTO_DONE);
	}
	Index: projects/arpv2_merge_1/sys/netinet6/ip6_output.c
	===================================================================
	--- projects/arpv2_merge_1/sys/netinet6/ip6_output.c (revision 185838)
	+++ projects/arpv2_merge_1/sys/netinet6/ip6_output.c (revision 185839)
	@@ -1,3352 +1,3348 @@
	/*-
	* Copyright (C) 1995, 1996, 1997, and 1998 WIDE Project.
	* All rights reserved.
	*
	* Redistribution and use in source and binary forms, with or without
	* modification, are permitted provided that the following conditions
	* are met:
	* 1. Redistributions of source code must retain the above copyright
	* notice, this list of conditions and the following disclaimer.
	* 2. Redistributions in binary form must reproduce the above copyright
	* notice, this list of conditions and the following disclaimer in the
	* documentation and/or other materials provided with the distribution.
	* 3. Neither the name of the project nor the names of its contributors
	* may be used to endorse or promote products derived from this software
	* without specific prior written permission.
	*
	* THIS SOFTWARE IS PROVIDED BY THE PROJECT AND CONTRIBUTORS ``AS IS'' AND
	* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
	* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
	* ARE DISCLAIMED. IN NO EVENT SHALL THE PROJECT OR CONTRIBUTORS BE LIABLE
	* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
	* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
	* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
	* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
	* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
	* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
	* SUCH DAMAGE.
	*
	* $KAME: ip6_output.c,v 1.279 2002/01/26 06:12:30 jinmei Exp $
	*/

	/*-
	* Copyright (c) 1982, 1986, 1988, 1990, 1993
	* The Regents of the University of California. All rights reserved.
	*
	* Redistribution and use in source and binary forms, with or without
	* modification, are permitted provided that the following conditions
	* are met:
	* 1. Redistributions of source code must retain the above copyright
	* notice, this list of conditions and the following disclaimer.
	* 2. Redistributions in binary form must reproduce the above copyright
	* notice, this list of conditions and the following disclaimer in the
	* documentation and/or other materials provided with the distribution.
	* 4. Neither the name of the University nor the names of its contributors
	* may be used to endorse or promote products derived from this software
	* without specific prior written permission.
	*
	* THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
	* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
	* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
	* ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
	* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
	* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
	* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
	* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
	* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
	* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
	* SUCH DAMAGE.
	*
	* @(#)ip_output.c 8.3 (Berkeley) 1/21/94
	*/

	#include <sys/cdefs.h>
	__FBSDID("$FreeBSD$");

	#include "opt_inet.h"
	#include "opt_inet6.h"
	#include "opt_ipsec.h"

	#include <sys/param.h>
	#include <sys/kernel.h>
	#include <sys/malloc.h>
	#include <sys/mbuf.h>
	#include <sys/errno.h>
	#include <sys/priv.h>
	#include <sys/proc.h>
	#include <sys/protosw.h>
	#include <sys/socket.h>
	#include <sys/socketvar.h>
	#include <sys/ucred.h>
	#include <sys/vimage.h>

	#include <net/if.h>
	#include <net/netisr.h>
	#include <net/route.h>
	#include <net/pfil.h>
	#include <net/vnet.h>

	#include <netinet/in.h>
	#include <netinet/in_var.h>
	#include <netinet6/in6_var.h>
	#include <netinet/ip6.h>
	#include <netinet/icmp6.h>
	#include <netinet6/ip6_var.h>
	#include <netinet/in_pcb.h>
	#include <netinet/tcp_var.h>
	#include <netinet6/nd6.h>
	#include <netinet/vinet.h>

	#ifdef IPSEC
	#include <netipsec/ipsec.h>
	#include <netipsec/ipsec6.h>
	#include <netipsec/key.h>
	#include <netinet6/ip6_ipsec.h>
	#endif /* IPSEC */

	#include <netinet6/ip6protosw.h>
	#include <netinet6/scope6_var.h>
	#include <netinet6/vinet6.h>

	static MALLOC_DEFINE(M_IP6MOPTS, "ip6_moptions", "internet multicast options");

	struct ip6_exthdrs {
	struct mbuf *ip6e_ip6;
	struct mbuf *ip6e_hbh;
	struct mbuf *ip6e_dest1;
	struct mbuf *ip6e_rthdr;
	struct mbuf *ip6e_dest2;
	};

	static int ip6_pcbopt __P((int, u_char , int, struct ip6_pktopts *,
	struct ucred *, int));
	static int ip6_pcbopts __P((struct ip6_pktopts *, struct mbuf ,
	struct socket , struct sockopt ));
	static int ip6_getpcbopt(struct ip6_pktopts , int, struct sockopt );
	static int ip6_setpktopt __P((int, u_char , int, struct ip6_pktopts ,
	struct ucred *, int, int, int));

	static int ip6_setmoptions(int, struct ip6_moptions *, struct mbuf );
	static int ip6_getmoptions(int, struct ip6_moptions , struct mbuf *);
	static int ip6_copyexthdr(struct mbuf **, caddr_t, int);
	static int ip6_insertfraghdr __P((struct mbuf , struct mbuf , int,
	struct ip6_frag **));
	static int ip6_insert_jumboopt(struct ip6_exthdrs *, u_int32_t);
	static int ip6_splithdr(struct mbuf , struct ip6_exthdrs );
	static int ip6_getpmtu __P((struct route_in6 , struct route_in6 ,
	struct ifnet , struct in6_addr , u_long , int ));
	static int copypktopts(struct ip6_pktopts , struct ip6_pktopts , int);


	/*
	* Make an extension header from option data. hp is the source, and
	* mp is the destination.
	*/
	#define MAKE_EXTHDR(hp, mp) \
	do { \
	if (hp) { \
	struct ip6_ext eh = (struct ip6_ext )(hp); \
	error = ip6_copyexthdr((mp), (caddr_t)(hp), \
	((eh)->ip6e_len + 1) << 3); \
	if (error) \
	goto freehdrs; \
	} \
	} while (/CONSTCOND/ 0)

	/*
	* Form a chain of extension headers.
	* m is the extension header mbuf
	* mp is the previous mbuf in the chain
	* p is the next header
	* i is the type of option.
	*/
	#define MAKE_CHAIN(m, mp, p, i)\
	do {\
	if (m) {\
	if (!hdrsplit) \
	panic("assumption failed: hdr not split"); \
	mtod((m), u_char ) = *(p);\
	*(p) = (i);\
	p = mtod((m), u_char *);\
	(m)->m_next = (mp)->m_next;\
	(mp)->m_next = (m);\
	(mp) = (m);\
	}\
	} while (/CONSTCOND/ 0)

	/*
	* IP6 output. The packet in mbuf chain m contains a skeletal IP6
	* header (with pri, len, nxt, hlim, src, dst).
	* This function may modify ver and hlim only.
	* The mbuf chain containing the packet will be freed.
	* The mbuf opt, if present, will not be freed.
	*
	* type of "mtu": rt_rmx.rmx_mtu is u_long, ifnet.ifr_mtu is int, and
	* nd_ifinfo.linkmtu is u_int32_t. so we use u_long to hold largest one,
	* which is rt_rmx.rmx_mtu.
	*
	* ifpp - XXX: just for statistics
	*/
	int
	ip6_output(struct mbuf m0, struct ip6_pktopts opt,
	struct route_in6 ro, int flags, struct ip6_moptions im6o,
	struct ifnet *ifpp, struct inpcb inp)
	{
	INIT_VNET_NET(curvnet);
	INIT_VNET_INET6(curvnet);
	struct ip6_hdr ip6, mhip6;
	struct ifnet ifp, origifp;
	struct mbuf *m = m0;
	struct mbuf *mprev = NULL;
	int hlen, tlen, len, off;
	struct route_in6 ip6route;
	struct rtentry *rt = NULL;
	struct sockaddr_in6 *dst, src_sa, dst_sa;
	struct in6_addr odst;
	int error = 0;
	struct in6_ifaddr *ia = NULL;
	u_long mtu;
	int alwaysfrag, dontfrag;
	u_int32_t optlen = 0, plen = 0, unfragpartlen = 0;
	struct ip6_exthdrs exthdrs;
	struct in6_addr finaldst, src0, dst0;
	u_int32_t zone;
	struct route_in6 *ro_pmtu = NULL;
	int hdrsplit = 0;
	int needipsec = 0;
	#ifdef IPSEC
	struct ipsec_output_state state;
	struct ip6_rthdr *rh = NULL;
	int needipsectun = 0;
	int segleft_org = 0;
	struct secpolicy *sp = NULL;
	#endif /* IPSEC */

	ip6 = mtod(m, struct ip6_hdr *);
	if (ip6 == NULL) {
	printf ("ip6 is NULL");
	goto bad;
	}

	finaldst = ip6->ip6_dst;

	bzero(&exthdrs, sizeof(exthdrs));

	if (opt) {
	/* Hop-by-Hop options header */
	MAKE_EXTHDR(opt->ip6po_hbh, &exthdrs.ip6e_hbh);
	/* Destination options header(1st part) */
	if (opt->ip6po_rthdr) {
	/*
	* Destination options header(1st part)
	* This only makes sense with a routing header.
	* See Section 9.2 of RFC 3542.
	* Disabling this part just for MIP6 convenience is
	* a bad idea. We need to think carefully about a
	* way to make the advanced API coexist with MIP6
	* options, which might automatically be inserted in
	* the kernel.
	*/
	MAKE_EXTHDR(opt->ip6po_dest1, &exthdrs.ip6e_dest1);
	}
	/* Routing header */
	MAKE_EXTHDR(opt->ip6po_rthdr, &exthdrs.ip6e_rthdr);
	/* Destination options header(2nd part) */
	MAKE_EXTHDR(opt->ip6po_dest2, &exthdrs.ip6e_dest2);
	}

	/*
	* IPSec checking which handles several cases.
	* FAST IPSEC: We re-injected the packet.
	*/
	#ifdef IPSEC
	switch(ip6_ipsec_output(&m, inp, &flags, &error, &ifp, &sp))
	{
	case 1: /* Bad packet */
	goto freehdrs;
	case -1: /* Do IPSec */
	needipsec = 1;
	case 0: /* No IPSec */
	default:
	break;
	}
	#endif /* IPSEC */

	/*
	* Calculate the total length of the extension header chain.
	* Keep the length of the unfragmentable part for fragmentation.
	*/
	optlen = 0;
	if (exthdrs.ip6e_hbh)
	optlen += exthdrs.ip6e_hbh->m_len;
	if (exthdrs.ip6e_dest1)
	optlen += exthdrs.ip6e_dest1->m_len;
	if (exthdrs.ip6e_rthdr)
	optlen += exthdrs.ip6e_rthdr->m_len;
	unfragpartlen = optlen + sizeof(struct ip6_hdr);

	/* NOTE: we don't add AH/ESP length here. do that later. */
	if (exthdrs.ip6e_dest2)
	optlen += exthdrs.ip6e_dest2->m_len;

	/*
	* If we need IPsec, or there is at least one extension header,
	* separate IP6 header from the payload.
	*/
	if ((needipsec \|\| optlen) && !hdrsplit) {
	if ((error = ip6_splithdr(m, &exthdrs)) != 0) {
	m = NULL;
	goto freehdrs;
	}
	m = exthdrs.ip6e_ip6;
	hdrsplit++;
	}

	/* adjust pointer */
	ip6 = mtod(m, struct ip6_hdr *);

	/* adjust mbuf packet header length */
	m->m_pkthdr.len += optlen;
	plen = m->m_pkthdr.len - sizeof(*ip6);

	/* If this is a jumbo payload, insert a jumbo payload option. */
	if (plen > IPV6_MAXPACKET) {
	if (!hdrsplit) {
	if ((error = ip6_splithdr(m, &exthdrs)) != 0) {
	m = NULL;
	goto freehdrs;
	}
	m = exthdrs.ip6e_ip6;
	hdrsplit++;
	}
	/* adjust pointer */
	ip6 = mtod(m, struct ip6_hdr *);
	if ((error = ip6_insert_jumboopt(&exthdrs, plen)) != 0)
	goto freehdrs;
	ip6->ip6_plen = 0;
	} else
	ip6->ip6_plen = htons(plen);

	/*
	* Concatenate headers and fill in next header fields.
	* Here we have, on "m"
	* IPv6 payload
	* and we insert headers accordingly. Finally, we should be getting:
	* IPv6 hbh dest1 rthdr ah* [esp* dest2 payload]
	*
	* during the header composing process, "m" points to IPv6 header.
	* "mprev" points to an extension header prior to esp.
	*/
	u_char *nexthdrp = &ip6->ip6_nxt;
	mprev = m;

	/*
	* we treat dest2 specially. this makes IPsec processing
	* much easier. the goal here is to make mprev point the
	* mbuf prior to dest2.
	*
	* result: IPv6 dest2 payload
	* m and mprev will point to IPv6 header.
	*/
	if (exthdrs.ip6e_dest2) {
	if (!hdrsplit)
	panic("assumption failed: hdr not split");
	exthdrs.ip6e_dest2->m_next = m->m_next;
	m->m_next = exthdrs.ip6e_dest2;
	mtod(exthdrs.ip6e_dest2, u_char ) = ip6->ip6_nxt;
	ip6->ip6_nxt = IPPROTO_DSTOPTS;
	}

	/*
	* result: IPv6 hbh dest1 rthdr dest2 payload
	* m will point to IPv6 header. mprev will point to the
	* extension header prior to dest2 (rthdr in the above case).
	*/
	MAKE_CHAIN(exthdrs.ip6e_hbh, mprev, nexthdrp, IPPROTO_HOPOPTS);
	MAKE_CHAIN(exthdrs.ip6e_dest1, mprev, nexthdrp,
	IPPROTO_DSTOPTS);
	MAKE_CHAIN(exthdrs.ip6e_rthdr, mprev, nexthdrp,
	IPPROTO_ROUTING);

	#ifdef IPSEC
	if (!needipsec)
	goto skip_ipsec2;

	/*
	* pointers after IPsec headers are not valid any more.
	* other pointers need a great care too.
	* (IPsec routines should not mangle mbufs prior to AH/ESP)
	*/
	exthdrs.ip6e_dest2 = NULL;

	if (exthdrs.ip6e_rthdr) {
	rh = mtod(exthdrs.ip6e_rthdr, struct ip6_rthdr *);
	segleft_org = rh->ip6r_segleft;
	rh->ip6r_segleft = 0;
	}

	bzero(&state, sizeof(state));
	state.m = m;
	error = ipsec6_output_trans(&state, nexthdrp, mprev, sp, flags,
	&needipsectun);
	m = state.m;
	if (error == EJUSTRETURN) {
	/*
	* We had a SP with a level of 'use' and no SA. We
	* will just continue to process the packet without
	* IPsec processing.
	*/
	;
	} else if (error) {
	/* mbuf is already reclaimed in ipsec6_output_trans. */
	m = NULL;
	switch (error) {
	case EHOSTUNREACH:
	case ENETUNREACH:
	case EMSGSIZE:
	case ENOBUFS:
	case ENOMEM:
	break;
	default:
	printf("[%s:%d] (ipsec): error code %d\n",
	__func__, __LINE__, error);
	/* FALLTHROUGH */
	case ENOENT:
	/* don't show these error codes to the user */
	error = 0;
	break;
	}
	goto bad;
	} else if (!needipsectun) {
	/*
	* In the FAST IPSec case we have already
	* re-injected the packet and it has been freed
	* by the ipsec_done() function. So, just clean
	* up after ourselves.
	*/
	m = NULL;
	goto done;
	}
	if (exthdrs.ip6e_rthdr) {
	/* ah6_output doesn't modify mbuf chain */
	rh->ip6r_segleft = segleft_org;
	}
	skip_ipsec2:;
	#endif /* IPSEC */

	/*
	* If there is a routing header, replace the destination address field
	* with the first hop of the routing header.
	*/
	if (exthdrs.ip6e_rthdr) {
	struct ip6_rthdr *rh =
	(struct ip6_rthdr *)(mtod(exthdrs.ip6e_rthdr,
	struct ip6_rthdr *));
	struct ip6_rthdr0 *rh0;
	struct in6_addr *addr;
	struct sockaddr_in6 sa;

	switch (rh->ip6r_type) {
	case IPV6_RTHDR_TYPE_0:
	rh0 = (struct ip6_rthdr0 *)rh;
	addr = (struct in6_addr *)(rh0 + 1);

	/*
	* construct a sockaddr_in6 form of
	* the first hop.
	*
	* XXX: we may not have enough
	* information about its scope zone;
	* there is no standard API to pass
	* the information from the
	* application.
	*/
	bzero(&sa, sizeof(sa));
	sa.sin6_family = AF_INET6;
	sa.sin6_len = sizeof(sa);
	sa.sin6_addr = addr[0];
	if ((error = sa6_embedscope(&sa,
	V_ip6_use_defzone)) != 0) {
	goto bad;
	}
	ip6->ip6_dst = sa.sin6_addr;
	bcopy(&addr[1], &addr[0], sizeof(struct in6_addr)
	* (rh0->ip6r0_segleft - 1));
	addr[rh0->ip6r0_segleft - 1] = finaldst;
	/* XXX */
	in6_clearscope(addr + rh0->ip6r0_segleft - 1);
	break;
	default: /* is it possible? */
	error = EINVAL;
	goto bad;
	}
	}

	/* Source address validation */
	if (IN6_IS_ADDR_UNSPECIFIED(&ip6->ip6_src) &&
	(flags & IPV6_UNSPECSRC) == 0) {
	error = EOPNOTSUPP;
	V_ip6stat.ip6s_badscope++;
	goto bad;
	}
	if (IN6_IS_ADDR_MULTICAST(&ip6->ip6_src)) {
	error = EOPNOTSUPP;
	V_ip6stat.ip6s_badscope++;
	goto bad;
	}

	V_ip6stat.ip6s_localout++;

	/*
	* Route packet.
	*/
	if (ro == 0) {
	ro = &ip6route;
	bzero((caddr_t)ro, sizeof(*ro));
	}
	ro_pmtu = ro;
	if (opt && opt->ip6po_rthdr)
	ro = &opt->ip6po_route;
	dst = (struct sockaddr_in6 *)&ro->ro_dst;

	again:
	/*
	* if specified, try to fill in the traffic class field.
	* do not override if a non-zero value is already set.
	* we check the diffserv field and the ecn field separately.
	*/
	if (opt && opt->ip6po_tclass >= 0) {
	int mask = 0;

	if ((ip6->ip6_flow & htonl(0xfc << 20)) == 0)
	mask \|= 0xfc;
	if ((ip6->ip6_flow & htonl(0x03 << 20)) == 0)
	mask \|= 0x03;
	if (mask != 0)
	ip6->ip6_flow \|= htonl((opt->ip6po_tclass & mask) << 20);
	}

	/* fill in or override the hop limit field, if necessary. */
	if (opt && opt->ip6po_hlim != -1)
	ip6->ip6_hlim = opt->ip6po_hlim & 0xff;
	else if (IN6_IS_ADDR_MULTICAST(&ip6->ip6_dst)) {
	if (im6o != NULL)
	ip6->ip6_hlim = im6o->im6o_multicast_hlim;
	else
	ip6->ip6_hlim = V_ip6_defmcasthlim;
	}

	#ifdef IPSEC
	/*
	* We may re-inject packets into the stack here.
	*/
	if (needipsec && needipsectun) {
	struct ipsec_output_state state;

	/*
	* All the extension headers will become inaccessible
	* (since they can be encrypted).
	* Don't panic, we need no more updates to extension headers
	* on inner IPv6 packet (since they are now encapsulated).
	*
	* IPv6 [ESP\|AH] IPv6 [extension headers] payload
	*/
	bzero(&exthdrs, sizeof(exthdrs));
	exthdrs.ip6e_ip6 = m;

	bzero(&state, sizeof(state));
	state.m = m;
	state.ro = (struct route *)ro;
	state.dst = (struct sockaddr *)dst;

	error = ipsec6_output_tunnel(&state, sp, flags);

	m = state.m;
	ro = (struct route_in6 *)state.ro;
	dst = (struct sockaddr_in6 *)state.dst;
	if (error == EJUSTRETURN) {
	/*
	* We had a SP with a level of 'use' and no SA. We
	* will just continue to process the packet without
	* IPsec processing.
	*/
	;
	} else if (error) {
	/* mbuf is already reclaimed in ipsec6_output_tunnel. */
	m0 = m = NULL;
	m = NULL;
	switch (error) {
	case EHOSTUNREACH:
	case ENETUNREACH:
	case EMSGSIZE:
	case ENOBUFS:
	case ENOMEM:
	break;
	default:
	printf("[%s:%d] (ipsec): error code %d\n",
	__func__, __LINE__, error);
	/* FALLTHROUGH */
	case ENOENT:
	/* don't show these error codes to the user */
	error = 0;
	break;
	}
	goto bad;
	} else {
	/*
	* In the FAST IPSec case we have already
	* re-injected the packet and it has been freed
	* by the ipsec_done() function. So, just clean
	* up after ourselves.
	*/
	m = NULL;
	goto done;
	}

	exthdrs.ip6e_ip6 = m;
	}
	#endif /* IPSEC */

	/* adjust pointer */
	ip6 = mtod(m, struct ip6_hdr *);

	bzero(&dst_sa, sizeof(dst_sa));
	dst_sa.sin6_family = AF_INET6;
	dst_sa.sin6_len = sizeof(dst_sa);
	dst_sa.sin6_addr = ip6->ip6_dst;
	if ((error = in6_selectroute(&dst_sa, opt, im6o, ro,
	&ifp, &rt, 0)) != 0) {
	switch (error) {
	case EHOSTUNREACH:
	V_ip6stat.ip6s_noroute++;
	break;
	case EADDRNOTAVAIL:
	default:
	break; /* XXX statistics? */
	}
	if (ifp != NULL)
	in6_ifstat_inc(ifp, ifs6_out_discard);
	goto bad;
	}
	if (rt == NULL) {
	/*
	* If in6_selectroute() does not return a route entry,
	* dst may not have been updated.
	*/
	dst = dst_sa; / XXX */
	}

	/*
	* then rt (for unicast) and ifp must be non-NULL valid values.
	*/
	if ((flags & IPV6_FORWARDING) == 0) {
	/* XXX: the FORWARDING flag can be set for mrouting. */
	in6_ifstat_inc(ifp, ifs6_out_request);
	}
	if (rt != NULL) {
	ia = (struct in6_ifaddr *)(rt->rt_ifa);
	rt->rt_use++;
	}

	/*
	* The outgoing interface must be in the zone of source and
	* destination addresses. We should use ia_ifp to support the
	* case of sending packets to an address of our own.
	*/
	if (ia != NULL && ia->ia_ifp)
	origifp = ia->ia_ifp;
	else
	origifp = ifp;

	src0 = ip6->ip6_src;
	if (in6_setscope(&src0, origifp, &zone))
	goto badscope;
	bzero(&src_sa, sizeof(src_sa));
	src_sa.sin6_family = AF_INET6;
	src_sa.sin6_len = sizeof(src_sa);
	src_sa.sin6_addr = ip6->ip6_src;
	if (sa6_recoverscope(&src_sa) \|\| zone != src_sa.sin6_scope_id)
	goto badscope;

	dst0 = ip6->ip6_dst;
	if (in6_setscope(&dst0, origifp, &zone))
	goto badscope;
	/* re-initialize to be sure */
	bzero(&dst_sa, sizeof(dst_sa));
	dst_sa.sin6_family = AF_INET6;
	dst_sa.sin6_len = sizeof(dst_sa);
	dst_sa.sin6_addr = ip6->ip6_dst;
	if (sa6_recoverscope(&dst_sa) \|\| zone != dst_sa.sin6_scope_id) {
	goto badscope;
	}

	/* scope check is done. */
	goto routefound;

	badscope:
	V_ip6stat.ip6s_badscope++;
	in6_ifstat_inc(origifp, ifs6_out_discard);
	if (error == 0)
	error = EHOSTUNREACH; /* XXX */
	goto bad;

	routefound:
	if (rt && !IN6_IS_ADDR_MULTICAST(&ip6->ip6_dst)) {
	if (opt && opt->ip6po_nextroute.ro_rt) {
	/*
	* The nexthop is explicitly specified by the
	* application. We assume the next hop is an IPv6
	* address.
	*/
	dst = (struct sockaddr_in6 *)opt->ip6po_nexthop;
	}
	else if ((rt->rt_flags & RTF_GATEWAY))
	dst = (struct sockaddr_in6 *)rt->rt_gateway;
	}

	if (!IN6_IS_ADDR_MULTICAST(&ip6->ip6_dst)) {
	m->m_flags &= ~(M_BCAST \| M_MCAST); /* just in case */
	} else {
	struct in6_multi *in6m;

	m->m_flags = (m->m_flags & ~M_BCAST) \| M_MCAST;

	in6_ifstat_inc(ifp, ifs6_out_mcast);

	/*
	* Confirm that the outgoing interface supports multicast.
	*/
	if (!(ifp->if_flags & IFF_MULTICAST)) {
	V_ip6stat.ip6s_noroute++;
	in6_ifstat_inc(ifp, ifs6_out_discard);
	error = ENETUNREACH;
	goto bad;
	}
	IN6_LOOKUP_MULTI(ip6->ip6_dst, ifp, in6m);
	if (in6m != NULL &&
	(im6o == NULL \|\| im6o->im6o_multicast_loop)) {
	/*
	* If we belong to the destination multicast group
	* on the outgoing interface, and the caller did not
	* forbid loopback, loop back a copy.
	*/
	ip6_mloopback(ifp, m, dst);
	} else {
	/*
	* If we are acting as a multicast router, perform
	* multicast forwarding as if the packet had just
	* arrived on the interface to which we are about
	* to send. The multicast forwarding function
	* recursively calls this function, using the
	* IPV6_FORWARDING flag to prevent infinite recursion.
	*
	* Multicasts that are looped back by ip6_mloopback(),
	* above, will be forwarded by the ip6_input() routine,
	* if necessary.
	*/
	if (ip6_mrouter && (flags & IPV6_FORWARDING) == 0) {
	/*
	* XXX: ip6_mforward expects that rcvif is NULL
	* when it is called from the originating path.
	* However, it is not always the case, since
	* some versions of MGETHDR() does not
	* initialize the field.
	*/
	m->m_pkthdr.rcvif = NULL;
	if (ip6_mforward(ip6, ifp, m) != 0) {
	m_freem(m);
	goto done;
	}
	}
	}
	/*
	* Multicasts with a hoplimit of zero may be looped back,
	* above, but must not be transmitted on a network.
	* Also, multicasts addressed to the loopback interface
	* are not sent -- the above call to ip6_mloopback() will
	* loop back a copy if this host actually belongs to the
	* destination group on the loopback interface.
	*/
	if (ip6->ip6_hlim == 0 \|\| (ifp->if_flags & IFF_LOOPBACK) \|\|
	IN6_IS_ADDR_MC_INTFACELOCAL(&ip6->ip6_dst)) {
	m_freem(m);
	goto done;
	}
	}

	/*
	* Fill the outgoing inteface to tell the upper layer
	* to increment per-interface statistics.
	*/
	if (ifpp)
	*ifpp = ifp;

	/* Determine path MTU. */
	if ((error = ip6_getpmtu(ro_pmtu, ro, ifp, &finaldst, &mtu,
	&alwaysfrag)) != 0)
	goto bad;

	/*
	* The caller of this function may specify to use the minimum MTU
	* in some cases.
	* An advanced API option (IPV6_USE_MIN_MTU) can also override MTU
	* setting. The logic is a bit complicated; by default, unicast
	* packets will follow path MTU while multicast packets will be sent at
	* the minimum MTU. If IP6PO_MINMTU_ALL is specified, all packets
	* including unicast ones will be sent at the minimum MTU. Multicast
	* packets will always be sent at the minimum MTU unless
	* IP6PO_MINMTU_DISABLE is explicitly specified.
	* See RFC 3542 for more details.
	*/
	if (mtu > IPV6_MMTU) {
	if ((flags & IPV6_MINMTU))
	mtu = IPV6_MMTU;
	else if (opt && opt->ip6po_minmtu == IP6PO_MINMTU_ALL)
	mtu = IPV6_MMTU;
	else if (IN6_IS_ADDR_MULTICAST(&ip6->ip6_dst) &&
	(opt == NULL \|\|
	opt->ip6po_minmtu != IP6PO_MINMTU_DISABLE)) {
	mtu = IPV6_MMTU;
	}
	}

	/*
	* clear embedded scope identifiers if necessary.
	* in6_clearscope will touch the addresses only when necessary.
	*/
	in6_clearscope(&ip6->ip6_src);
	in6_clearscope(&ip6->ip6_dst);

	/*
	* If the outgoing packet contains a hop-by-hop options header,
	* it must be examined and processed even by the source node.
	* (RFC 2460, section 4.)
	*/
	if (exthdrs.ip6e_hbh) {
	struct ip6_hbh hbh = mtod(exthdrs.ip6e_hbh, struct ip6_hbh );
	u_int32_t dummy; /* XXX unused */
	u_int32_t plen = 0; /* XXX: ip6_process will check the value */

	#ifdef DIAGNOSTIC
	if ((hbh->ip6h_len + 1) << 3 > exthdrs.ip6e_hbh->m_len)
	panic("ip6e_hbh is not continuous");
	#endif
	/*
	* XXX: if we have to send an ICMPv6 error to the sender,
	* we need the M_LOOP flag since icmp6_error() expects
	* the IPv6 and the hop-by-hop options header are
	* continuous unless the flag is set.
	*/
	m->m_flags \|= M_LOOP;
	m->m_pkthdr.rcvif = ifp;
	if (ip6_process_hopopts(m, (u_int8_t *)(hbh + 1),
	((hbh->ip6h_len + 1) << 3) - sizeof(struct ip6_hbh),
	&dummy, &plen) < 0) {
	/* m was already freed at this point */
	error = EINVAL;/* better error? */
	goto done;
	}
	m->m_flags &= ~M_LOOP; /* XXX */
	m->m_pkthdr.rcvif = NULL;
	}

	/* Jump over all PFIL processing if hooks are not active. */
	if (!PFIL_HOOKED(&inet6_pfil_hook))
	goto passout;

	odst = ip6->ip6_dst;
	/* Run through list of hooks for output packets. */
	error = pfil_run_hooks(&inet6_pfil_hook, &m, ifp, PFIL_OUT, inp);
	if (error != 0 \|\| m == NULL)
	goto done;
	ip6 = mtod(m, struct ip6_hdr *);

	/* See if destination IP address was changed by packet filter. */
	if (!IN6_ARE_ADDR_EQUAL(&odst, &ip6->ip6_dst)) {
	m->m_flags \|= M_SKIP_FIREWALL;
	/* If destination is now ourself drop to ip6_input(). */
	if (in6_localaddr(&ip6->ip6_dst)) {
	if (m->m_pkthdr.rcvif == NULL)
	m->m_pkthdr.rcvif = V_loif;
	if (m->m_pkthdr.csum_flags & CSUM_DELAY_DATA) {
	m->m_pkthdr.csum_flags \|=
	CSUM_DATA_VALID \| CSUM_PSEUDO_HDR;
	m->m_pkthdr.csum_data = 0xffff;
	}
	m->m_pkthdr.csum_flags \|=
	CSUM_IP_CHECKED \| CSUM_IP_VALID;
	error = netisr_queue(NETISR_IPV6, m);
	goto done;
	} else
	goto again; /* Redo the routing table lookup. */
	}

	/* XXX: IPFIREWALL_FORWARD */

	passout:
	/*
	* Send the packet to the outgoing interface.
	* If necessary, do IPv6 fragmentation before sending.
	*
	* the logic here is rather complex:
	* 1: normal case (dontfrag == 0, alwaysfrag == 0)
	* 1-a: send as is if tlen <= path mtu
	* 1-b: fragment if tlen > path mtu
	*
	* 2: if user asks us not to fragment (dontfrag == 1)
	* 2-a: send as is if tlen <= interface mtu
	* 2-b: error if tlen > interface mtu
	*
	* 3: if we always need to attach fragment header (alwaysfrag == 1)
	* always fragment
	*
	* 4: if dontfrag == 1 && alwaysfrag == 1
	* error, as we cannot handle this conflicting request
	*/
	tlen = m->m_pkthdr.len;

	if (opt && (opt->ip6po_flags & IP6PO_DONTFRAG))
	dontfrag = 1;
	else
	dontfrag = 0;
	if (dontfrag && alwaysfrag) { /* case 4 */
	/* conflicting request - can't transmit */
	error = EMSGSIZE;
	goto bad;
	}
	if (dontfrag && tlen > IN6_LINKMTU(ifp)) { /* case 2-b */
	/*
	* Even if the DONTFRAG option is specified, we cannot send the
	* packet when the data length is larger than the MTU of the
	* outgoing interface.
	* Notify the error by sending IPV6_PATHMTU ancillary data as
	* well as returning an error code (the latter is not described
	* in the API spec.)
	*/
	u_int32_t mtu32;
	struct ip6ctlparam ip6cp;

	mtu32 = (u_int32_t)mtu;
	bzero(&ip6cp, sizeof(ip6cp));
	ip6cp.ip6c_cmdarg = (void *)&mtu32;
	pfctlinput2(PRC_MSGSIZE, (struct sockaddr *)&ro_pmtu->ro_dst,
	(void *)&ip6cp);

	error = EMSGSIZE;
	goto bad;
	}

	/*
	* transmit packet without fragmentation
	*/
	if (dontfrag \|\| (!alwaysfrag && tlen <= mtu)) { /* case 1-a and 2-a */
	struct in6_ifaddr *ia6;

	ip6 = mtod(m, struct ip6_hdr *);
	ia6 = in6_ifawithifp(ifp, &ip6->ip6_src);
	if (ia6) {
	/* Record statistics for this interface address. */
	ia6->ia_ifa.if_opackets++;
	ia6->ia_ifa.if_obytes += m->m_pkthdr.len;
	}
	- IF_AFDATA_LOCK(ifp);
	error = nd6_output(ifp, origifp, m, dst, ro->ro_rt);
	- IF_AFDATA_UNLOCK(ifp);
	goto done;
	}

	/*
	* try to fragment the packet. case 1-b and 3
	*/
	if (mtu < IPV6_MMTU) {
	/* path MTU cannot be less than IPV6_MMTU */
	error = EMSGSIZE;
	in6_ifstat_inc(ifp, ifs6_out_fragfail);
	goto bad;
	} else if (ip6->ip6_plen == 0) {
	/* jumbo payload cannot be fragmented */
	error = EMSGSIZE;
	in6_ifstat_inc(ifp, ifs6_out_fragfail);
	goto bad;
	} else {
	struct mbuf *mnext, m_frgpart;
	struct ip6_frag *ip6f;
	u_int32_t id = htonl(ip6_randomid());
	u_char nextproto;

	int qslots = ifp->if_snd.ifq_maxlen - ifp->if_snd.ifq_len;

	/*
	* Too large for the destination or interface;
	* fragment if possible.
	* Must be able to put at least 8 bytes per fragment.
	*/
	hlen = unfragpartlen;
	if (mtu > IPV6_MAXPACKET)
	mtu = IPV6_MAXPACKET;

	len = (mtu - hlen - sizeof(struct ip6_frag)) & ~7;
	if (len < 8) {
	error = EMSGSIZE;
	in6_ifstat_inc(ifp, ifs6_out_fragfail);
	goto bad;
	}

	/*
	* Verify that we have any chance at all of being able to queue
	* the packet or packet fragments
	*/
	if (qslots <= 0 \|\| ((u_int)qslots * (mtu - hlen)
	< tlen /* - hlen */)) {
	error = ENOBUFS;
	V_ip6stat.ip6s_odropped++;
	goto bad;
	}

	mnext = &m->m_nextpkt;

	/*
	* Change the next header field of the last header in the
	* unfragmentable part.
	*/
	if (exthdrs.ip6e_rthdr) {
	nextproto = mtod(exthdrs.ip6e_rthdr, u_char );
	mtod(exthdrs.ip6e_rthdr, u_char ) = IPPROTO_FRAGMENT;
	} else if (exthdrs.ip6e_dest1) {
	nextproto = mtod(exthdrs.ip6e_dest1, u_char );
	mtod(exthdrs.ip6e_dest1, u_char ) = IPPROTO_FRAGMENT;
	} else if (exthdrs.ip6e_hbh) {
	nextproto = mtod(exthdrs.ip6e_hbh, u_char );
	mtod(exthdrs.ip6e_hbh, u_char ) = IPPROTO_FRAGMENT;
	} else {
	nextproto = ip6->ip6_nxt;
	ip6->ip6_nxt = IPPROTO_FRAGMENT;
	}

	/*
	* Loop through length of segment after first fragment,
	* make new header and copy data of each part and link onto
	* chain.
	*/
	m0 = m;
	for (off = hlen; off < tlen; off += len) {
	MGETHDR(m, M_DONTWAIT, MT_HEADER);
	if (!m) {
	error = ENOBUFS;
	V_ip6stat.ip6s_odropped++;
	goto sendorfree;
	}
	m->m_pkthdr.rcvif = NULL;
	m->m_flags = m0->m_flags & M_COPYFLAGS;
	*mnext = m;
	mnext = &m->m_nextpkt;
	m->m_data += max_linkhdr;
	mhip6 = mtod(m, struct ip6_hdr *);
	mhip6 = ip6;
	m->m_len = sizeof(*mhip6);
	error = ip6_insertfraghdr(m0, m, hlen, &ip6f);
	if (error) {
	V_ip6stat.ip6s_odropped++;
	goto sendorfree;
	}
	ip6f->ip6f_offlg = htons((u_short)((off - hlen) & ~7));
	if (off + len >= tlen)
	len = tlen - off;
	else
	ip6f->ip6f_offlg \|= IP6F_MORE_FRAG;
	mhip6->ip6_plen = htons((u_short)(len + hlen +
	sizeof(*ip6f) - sizeof(struct ip6_hdr)));
	if ((m_frgpart = m_copy(m0, off, len)) == 0) {
	error = ENOBUFS;
	V_ip6stat.ip6s_odropped++;
	goto sendorfree;
	}
	m_cat(m, m_frgpart);
	m->m_pkthdr.len = len + hlen + sizeof(*ip6f);
	m->m_pkthdr.rcvif = NULL;
	ip6f->ip6f_reserved = 0;
	ip6f->ip6f_ident = id;
	ip6f->ip6f_nxt = nextproto;
	V_ip6stat.ip6s_ofragments++;
	in6_ifstat_inc(ifp, ifs6_out_fragcreat);
	}

	in6_ifstat_inc(ifp, ifs6_out_fragok);
	}

	/*
	* Remove leading garbages.
	*/
	sendorfree:
	m = m0->m_nextpkt;
	m0->m_nextpkt = 0;
	m_freem(m0);
	for (m0 = m; m; m = m0) {
	m0 = m->m_nextpkt;
	m->m_nextpkt = 0;
	if (error == 0) {
	/* Record statistics for this interface address. */
	if (ia) {
	ia->ia_ifa.if_opackets++;
	ia->ia_ifa.if_obytes += m->m_pkthdr.len;
	}
	- IF_AFDATA_LOCK(ifp);
	error = nd6_output(ifp, origifp, m, dst, ro->ro_rt);
	- IF_AFDATA_UNLOCK(ifp);
	} else
	m_freem(m);
	}

	if (error == 0)
	V_ip6stat.ip6s_fragmented++;

	done:
	if (ro == &ip6route && ro->ro_rt) { /* brace necessary for RTFREE */
	RTFREE(ro->ro_rt);
	} else if (ro_pmtu == &ip6route && ro_pmtu->ro_rt) {
	RTFREE(ro_pmtu->ro_rt);
	}
	#ifdef IPSEC
	if (sp != NULL)
	KEY_FREESP(&sp);
	#endif

	return (error);

	freehdrs:
	m_freem(exthdrs.ip6e_hbh); /* m_freem will check if mbuf is 0 */
	m_freem(exthdrs.ip6e_dest1);
	m_freem(exthdrs.ip6e_rthdr);
	m_freem(exthdrs.ip6e_dest2);
	/* FALLTHROUGH */
	bad:
	if (m)
	m_freem(m);
	goto done;
	}

	static int
	ip6_copyexthdr(struct mbuf **mp, caddr_t hdr, int hlen)
	{
	struct mbuf *m;

	if (hlen > MCLBYTES)
	return (ENOBUFS); /* XXX */

	MGET(m, M_DONTWAIT, MT_DATA);
	if (!m)
	return (ENOBUFS);

	if (hlen > MLEN) {
	MCLGET(m, M_DONTWAIT);
	if ((m->m_flags & M_EXT) == 0) {
	m_free(m);
	return (ENOBUFS);
	}
	}
	m->m_len = hlen;
	if (hdr)
	bcopy(hdr, mtod(m, caddr_t), hlen);

	*mp = m;
	return (0);
	}

	/*
	* Insert jumbo payload option.
	*/
	static int
	ip6_insert_jumboopt(struct ip6_exthdrs *exthdrs, u_int32_t plen)
	{
	struct mbuf *mopt;
	u_char *optbuf;
	u_int32_t v;

	#define JUMBOOPTLEN 8 /* length of jumbo payload option and padding */

	/*
	* If there is no hop-by-hop options header, allocate new one.
	* If there is one but it doesn't have enough space to store the
	* jumbo payload option, allocate a cluster to store the whole options.
	* Otherwise, use it to store the options.
	*/
	if (exthdrs->ip6e_hbh == 0) {
	MGET(mopt, M_DONTWAIT, MT_DATA);
	if (mopt == 0)
	return (ENOBUFS);
	mopt->m_len = JUMBOOPTLEN;
	optbuf = mtod(mopt, u_char *);
	optbuf[1] = 0; /* = ((JUMBOOPTLEN) >> 3) - 1 */
	exthdrs->ip6e_hbh = mopt;
	} else {
	struct ip6_hbh *hbh;

	mopt = exthdrs->ip6e_hbh;
	if (M_TRAILINGSPACE(mopt) < JUMBOOPTLEN) {
	/*
	* XXX assumption:
	* - exthdrs->ip6e_hbh is not referenced from places
	* other than exthdrs.
	* - exthdrs->ip6e_hbh is not an mbuf chain.
	*/
	int oldoptlen = mopt->m_len;
	struct mbuf *n;

	/*
	* XXX: give up if the whole (new) hbh header does
	* not fit even in an mbuf cluster.
	*/
	if (oldoptlen + JUMBOOPTLEN > MCLBYTES)
	return (ENOBUFS);

	/*
	* As a consequence, we must always prepare a cluster
	* at this point.
	*/
	MGET(n, M_DONTWAIT, MT_DATA);
	if (n) {
	MCLGET(n, M_DONTWAIT);
	if ((n->m_flags & M_EXT) == 0) {
	m_freem(n);
	n = NULL;
	}
	}
	if (!n)
	return (ENOBUFS);
	n->m_len = oldoptlen + JUMBOOPTLEN;
	bcopy(mtod(mopt, caddr_t), mtod(n, caddr_t),
	oldoptlen);
	optbuf = mtod(n, caddr_t) + oldoptlen;
	m_freem(mopt);
	mopt = exthdrs->ip6e_hbh = n;
	} else {
	optbuf = mtod(mopt, u_char *) + mopt->m_len;
	mopt->m_len += JUMBOOPTLEN;
	}
	optbuf[0] = IP6OPT_PADN;
	optbuf[1] = 1;

	/*
	* Adjust the header length according to the pad and
	* the jumbo payload option.
	*/
	hbh = mtod(mopt, struct ip6_hbh *);
	hbh->ip6h_len += (JUMBOOPTLEN >> 3);
	}

	/* fill in the option. */
	optbuf[2] = IP6OPT_JUMBO;
	optbuf[3] = 4;
	v = (u_int32_t)htonl(plen + JUMBOOPTLEN);
	bcopy(&v, &optbuf[4], sizeof(u_int32_t));

	/* finally, adjust the packet header length */
	exthdrs->ip6e_ip6->m_pkthdr.len += JUMBOOPTLEN;

	return (0);
	#undef JUMBOOPTLEN
	}

	/*
	* Insert fragment header and copy unfragmentable header portions.
	*/
	static int
	ip6_insertfraghdr(struct mbuf m0, struct mbuf m, int hlen,
	struct ip6_frag **frghdrp)
	{
	struct mbuf n, mlast;

	if (hlen > sizeof(struct ip6_hdr)) {
	n = m_copym(m0, sizeof(struct ip6_hdr),
	hlen - sizeof(struct ip6_hdr), M_DONTWAIT);
	if (n == 0)
	return (ENOBUFS);
	m->m_next = n;
	} else
	n = m;

	/* Search for the last mbuf of unfragmentable part. */
	for (mlast = n; mlast->m_next; mlast = mlast->m_next)
	;

	if ((mlast->m_flags & M_EXT) == 0 &&
	M_TRAILINGSPACE(mlast) >= sizeof(struct ip6_frag)) {
	/* use the trailing space of the last mbuf for the fragment hdr */
	frghdrp = (struct ip6_frag )(mtod(mlast, caddr_t) +
	mlast->m_len);
	mlast->m_len += sizeof(struct ip6_frag);
	m->m_pkthdr.len += sizeof(struct ip6_frag);
	} else {
	/* allocate a new mbuf for the fragment header */
	struct mbuf *mfrg;

	MGET(mfrg, M_DONTWAIT, MT_DATA);
	if (mfrg == 0)
	return (ENOBUFS);
	mfrg->m_len = sizeof(struct ip6_frag);
	frghdrp = mtod(mfrg, struct ip6_frag );
	mlast->m_next = mfrg;
	}

	return (0);
	}

	static int
	ip6_getpmtu(struct route_in6 ro_pmtu, struct route_in6 ro,
	struct ifnet ifp, struct in6_addr dst, u_long *mtup,
	int *alwaysfragp)
	{
	u_int32_t mtu = 0;
	int alwaysfrag = 0;
	int error = 0;

	if (ro_pmtu != ro) {
	/* The first hop and the final destination may differ. */
	struct sockaddr_in6 *sa6_dst =
	(struct sockaddr_in6 *)&ro_pmtu->ro_dst;
	if (ro_pmtu->ro_rt &&
	((ro_pmtu->ro_rt->rt_flags & RTF_UP) == 0 \|\|
	!IN6_ARE_ADDR_EQUAL(&sa6_dst->sin6_addr, dst))) {
	RTFREE(ro_pmtu->ro_rt);
	ro_pmtu->ro_rt = (struct rtentry *)NULL;
	}
	if (ro_pmtu->ro_rt == NULL) {
	bzero(sa6_dst, sizeof(*sa6_dst));
	sa6_dst->sin6_family = AF_INET6;
	sa6_dst->sin6_len = sizeof(struct sockaddr_in6);
	sa6_dst->sin6_addr = *dst;

	rtalloc((struct route *)ro_pmtu);
	}
	}
	if (ro_pmtu->ro_rt) {
	u_int32_t ifmtu;
	struct in_conninfo inc;

	bzero(&inc, sizeof(inc));
	inc.inc_flags = 1; /* IPv6 */
	inc.inc6_faddr = *dst;

	if (ifp == NULL)
	ifp = ro_pmtu->ro_rt->rt_ifp;
	ifmtu = IN6_LINKMTU(ifp);
	mtu = tcp_hc_getmtu(&inc);
	if (mtu)
	mtu = min(mtu, ro_pmtu->ro_rt->rt_rmx.rmx_mtu);
	else
	mtu = ro_pmtu->ro_rt->rt_rmx.rmx_mtu;
	if (mtu == 0)
	mtu = ifmtu;
	else if (mtu < IPV6_MMTU) {
	/*
	* RFC2460 section 5, last paragraph:
	* if we record ICMPv6 too big message with
	* mtu < IPV6_MMTU, transmit packets sized IPV6_MMTU
	* or smaller, with framgent header attached.
	* (fragment header is needed regardless from the
	* packet size, for translators to identify packets)
	*/
	alwaysfrag = 1;
	mtu = IPV6_MMTU;
	} else if (mtu > ifmtu) {
	/*
	* The MTU on the route is larger than the MTU on
	* the interface! This shouldn't happen, unless the
	* MTU of the interface has been changed after the
	* interface was brought up. Change the MTU in the
	* route to match the interface MTU (as long as the
	* field isn't locked).
	*/
	mtu = ifmtu;
	ro_pmtu->ro_rt->rt_rmx.rmx_mtu = mtu;
	}
	} else if (ifp) {
	mtu = IN6_LINKMTU(ifp);
	} else
	error = EHOSTUNREACH; /* XXX */

	*mtup = mtu;
	if (alwaysfragp)
	*alwaysfragp = alwaysfrag;
	return (error);
	}

	/*
	* IP6 socket option processing.
	*/
	int
	ip6_ctloutput(struct socket so, struct sockopt sopt)
	{
	int optdatalen, uproto;
	void *optdata;
	struct inpcb *in6p = sotoinpcb(so);
	int error, optval;
	int level, op, optname;
	int optlen;
	struct thread *td;

	level = sopt->sopt_level;
	op = sopt->sopt_dir;
	optname = sopt->sopt_name;
	optlen = sopt->sopt_valsize;
	td = sopt->sopt_td;
	error = 0;
	optval = 0;
	uproto = (int)so->so_proto->pr_protocol;

	if (level == IPPROTO_IPV6) {
	switch (op) {

	case SOPT_SET:
	switch (optname) {
	case IPV6_2292PKTOPTIONS:
	#ifdef IPV6_PKTOPTIONS
	case IPV6_PKTOPTIONS:
	#endif
	{
	struct mbuf *m;

	error = soopt_getm(sopt, &m); /* XXX */
	if (error != 0)
	break;
	error = soopt_mcopyin(sopt, m); /* XXX */
	if (error != 0)
	break;
	error = ip6_pcbopts(&in6p->in6p_outputopts,
	m, so, sopt);
	m_freem(m); /* XXX */
	break;
	}

	/*
	* Use of some Hop-by-Hop options or some
	* Destination options, might require special
	* privilege. That is, normal applications
	* (without special privilege) might be forbidden
	* from setting certain options in outgoing packets,
	* and might never see certain options in received
	* packets. [RFC 2292 Section 6]
	* KAME specific note:
	* KAME prevents non-privileged users from sending or
	* receiving ANY hbh/dst options in order to avoid
	* overhead of parsing options in the kernel.
	*/
	case IPV6_RECVHOPOPTS:
	case IPV6_RECVDSTOPTS:
	case IPV6_RECVRTHDRDSTOPTS:
	if (td != NULL) {
	error = priv_check(td,
	PRIV_NETINET_SETHDROPTS);
	if (error)
	break;
	}
	/* FALLTHROUGH */
	case IPV6_UNICAST_HOPS:
	case IPV6_HOPLIMIT:
	case IPV6_FAITH:

	case IPV6_RECVPKTINFO:
	case IPV6_RECVHOPLIMIT:
	case IPV6_RECVRTHDR:
	case IPV6_RECVPATHMTU:
	case IPV6_RECVTCLASS:
	case IPV6_V6ONLY:
	case IPV6_AUTOFLOWLABEL:
	if (optlen != sizeof(int)) {
	error = EINVAL;
	break;
	}
	error = sooptcopyin(sopt, &optval,
	sizeof optval, sizeof optval);
	if (error)
	break;
	switch (optname) {

	case IPV6_UNICAST_HOPS:
	if (optval < -1 \|\| optval >= 256)
	error = EINVAL;
	else {
	/* -1 = kernel default */
	in6p->in6p_hops = optval;
	if ((in6p->in6p_vflag &
	INP_IPV4) != 0)
	in6p->inp_ip_ttl = optval;
	}
	break;
	#define OPTSET(bit) \
	do { \
	if (optval) \
	in6p->in6p_flags \|= (bit); \
	else \
	in6p->in6p_flags &= ~(bit); \
	} while (/CONSTCOND/ 0)
	#define OPTSET2292(bit) \
	do { \
	in6p->in6p_flags \|= IN6P_RFC2292; \
	if (optval) \
	in6p->in6p_flags \|= (bit); \
	else \
	in6p->in6p_flags &= ~(bit); \
	} while (/CONSTCOND/ 0)
	#define OPTBIT(bit) (in6p->in6p_flags & (bit) ? 1 : 0)

	case IPV6_RECVPKTINFO:
	/* cannot mix with RFC2292 */
	if (OPTBIT(IN6P_RFC2292)) {
	error = EINVAL;
	break;
	}
	OPTSET(IN6P_PKTINFO);
	break;

	case IPV6_HOPLIMIT:
	{
	struct ip6_pktopts **optp;

	/* cannot mix with RFC2292 */
	if (OPTBIT(IN6P_RFC2292)) {
	error = EINVAL;
	break;
	}
	optp = &in6p->in6p_outputopts;
	error = ip6_pcbopt(IPV6_HOPLIMIT,
	(u_char *)&optval, sizeof(optval),
	optp, (td != NULL) ? td->td_ucred :
	NULL, uproto);
	break;
	}

	case IPV6_RECVHOPLIMIT:
	/* cannot mix with RFC2292 */
	if (OPTBIT(IN6P_RFC2292)) {
	error = EINVAL;
	break;
	}
	OPTSET(IN6P_HOPLIMIT);
	break;

	case IPV6_RECVHOPOPTS:
	/* cannot mix with RFC2292 */
	if (OPTBIT(IN6P_RFC2292)) {
	error = EINVAL;
	break;
	}
	OPTSET(IN6P_HOPOPTS);
	break;

	case IPV6_RECVDSTOPTS:
	/* cannot mix with RFC2292 */
	if (OPTBIT(IN6P_RFC2292)) {
	error = EINVAL;
	break;
	}
	OPTSET(IN6P_DSTOPTS);
	break;

	case IPV6_RECVRTHDRDSTOPTS:
	/* cannot mix with RFC2292 */
	if (OPTBIT(IN6P_RFC2292)) {
	error = EINVAL;
	break;
	}
	OPTSET(IN6P_RTHDRDSTOPTS);
	break;

	case IPV6_RECVRTHDR:
	/* cannot mix with RFC2292 */
	if (OPTBIT(IN6P_RFC2292)) {
	error = EINVAL;
	break;
	}
	OPTSET(IN6P_RTHDR);
	break;

	case IPV6_FAITH:
	OPTSET(IN6P_FAITH);
	break;

	case IPV6_RECVPATHMTU:
	/*
	* We ignore this option for TCP
	* sockets.
	* (RFC3542 leaves this case
	* unspecified.)
	*/
	if (uproto != IPPROTO_TCP)
	OPTSET(IN6P_MTU);
	break;

	case IPV6_V6ONLY:
	/*
	* make setsockopt(IPV6_V6ONLY)
	* available only prior to bind(2).
	* see ipng mailing list, Jun 22 2001.
	*/
	if (in6p->in6p_lport \|\|
	!IN6_IS_ADDR_UNSPECIFIED(&in6p->in6p_laddr)) {
	error = EINVAL;
	break;
	}
	OPTSET(IN6P_IPV6_V6ONLY);
	if (optval)
	in6p->in6p_vflag &= ~INP_IPV4;
	else
	in6p->in6p_vflag \|= INP_IPV4;
	break;
	case IPV6_RECVTCLASS:
	/* cannot mix with RFC2292 XXX */
	if (OPTBIT(IN6P_RFC2292)) {
	error = EINVAL;
	break;
	}
	OPTSET(IN6P_TCLASS);
	break;
	case IPV6_AUTOFLOWLABEL:
	OPTSET(IN6P_AUTOFLOWLABEL);
	break;

	}
	break;

	case IPV6_TCLASS:
	case IPV6_DONTFRAG:
	case IPV6_USE_MIN_MTU:
	case IPV6_PREFER_TEMPADDR:
	if (optlen != sizeof(optval)) {
	error = EINVAL;
	break;
	}
	error = sooptcopyin(sopt, &optval,
	sizeof optval, sizeof optval);
	if (error)
	break;
	{
	struct ip6_pktopts **optp;
	optp = &in6p->in6p_outputopts;
	error = ip6_pcbopt(optname,
	(u_char *)&optval, sizeof(optval),
	optp, (td != NULL) ? td->td_ucred :
	NULL, uproto);
	break;
	}

	case IPV6_2292PKTINFO:
	case IPV6_2292HOPLIMIT:
	case IPV6_2292HOPOPTS:
	case IPV6_2292DSTOPTS:
	case IPV6_2292RTHDR:
	/* RFC 2292 */
	if (optlen != sizeof(int)) {
	error = EINVAL;
	break;
	}
	error = sooptcopyin(sopt, &optval,
	sizeof optval, sizeof optval);
	if (error)
	break;
	switch (optname) {
	case IPV6_2292PKTINFO:
	OPTSET2292(IN6P_PKTINFO);
	break;
	case IPV6_2292HOPLIMIT:
	OPTSET2292(IN6P_HOPLIMIT);
	break;
	case IPV6_2292HOPOPTS:
	/*
	* Check super-user privilege.
	* See comments for IPV6_RECVHOPOPTS.
	*/
	if (td != NULL) {
	error = priv_check(td,
	PRIV_NETINET_SETHDROPTS);
	if (error)
	return (error);
	}
	OPTSET2292(IN6P_HOPOPTS);
	break;
	case IPV6_2292DSTOPTS:
	if (td != NULL) {
	error = priv_check(td,
	PRIV_NETINET_SETHDROPTS);
	if (error)
	return (error);
	}
	OPTSET2292(IN6P_DSTOPTS\|IN6P_RTHDRDSTOPTS); /* XXX */
	break;
	case IPV6_2292RTHDR:
	OPTSET2292(IN6P_RTHDR);
	break;
	}
	break;
	case IPV6_PKTINFO:
	case IPV6_HOPOPTS:
	case IPV6_RTHDR:
	case IPV6_DSTOPTS:
	case IPV6_RTHDRDSTOPTS:
	case IPV6_NEXTHOP:
	{
	/* new advanced API (RFC3542) */
	u_char *optbuf;
	u_char optbuf_storage[MCLBYTES];
	int optlen;
	struct ip6_pktopts **optp;

	/* cannot mix with RFC2292 */
	if (OPTBIT(IN6P_RFC2292)) {
	error = EINVAL;
	break;
	}

	/*
	* We only ensure valsize is not too large
	* here. Further validation will be done
	* later.
	*/
	error = sooptcopyin(sopt, optbuf_storage,
	sizeof(optbuf_storage), 0);
	if (error)
	break;
	optlen = sopt->sopt_valsize;
	optbuf = optbuf_storage;
	optp = &in6p->in6p_outputopts;
	error = ip6_pcbopt(optname, optbuf, optlen,
	optp, (td != NULL) ? td->td_ucred : NULL,
	uproto);
	break;
	}
	#undef OPTSET

	case IPV6_MULTICAST_IF:
	case IPV6_MULTICAST_HOPS:
	case IPV6_MULTICAST_LOOP:
	case IPV6_JOIN_GROUP:
	case IPV6_LEAVE_GROUP:
	{
	if (sopt->sopt_valsize > MLEN) {
	error = EMSGSIZE;
	break;
	}
	/* XXX */
	}
	/* FALLTHROUGH */
	{
	struct mbuf *m;

	if (sopt->sopt_valsize > MCLBYTES) {
	error = EMSGSIZE;
	break;
	}
	/* XXX */
	MGET(m, sopt->sopt_td ? M_WAIT : M_DONTWAIT, MT_DATA);
	if (m == 0) {
	error = ENOBUFS;
	break;
	}
	if (sopt->sopt_valsize > MLEN) {
	MCLGET(m, sopt->sopt_td ? M_WAIT : M_DONTWAIT);
	if ((m->m_flags & M_EXT) == 0) {
	m_free(m);
	error = ENOBUFS;
	break;
	}
	}
	m->m_len = sopt->sopt_valsize;
	error = sooptcopyin(sopt, mtod(m, char *),
	m->m_len, m->m_len);
	if (error) {
	(void)m_free(m);
	break;
	}
	error = ip6_setmoptions(sopt->sopt_name,
	&in6p->in6p_moptions,
	m);
	(void)m_free(m);
	}
	break;

	case IPV6_PORTRANGE:
	error = sooptcopyin(sopt, &optval,
	sizeof optval, sizeof optval);
	if (error)
	break;

	switch (optval) {
	case IPV6_PORTRANGE_DEFAULT:
	in6p->in6p_flags &= ~(IN6P_LOWPORT);
	in6p->in6p_flags &= ~(IN6P_HIGHPORT);
	break;

	case IPV6_PORTRANGE_HIGH:
	in6p->in6p_flags &= ~(IN6P_LOWPORT);
	in6p->in6p_flags \|= IN6P_HIGHPORT;
	break;

	case IPV6_PORTRANGE_LOW:
	in6p->in6p_flags &= ~(IN6P_HIGHPORT);
	in6p->in6p_flags \|= IN6P_LOWPORT;
	break;

	default:
	error = EINVAL;
	break;
	}
	break;

	#ifdef IPSEC
	case IPV6_IPSEC_POLICY:
	{
	caddr_t req;
	struct mbuf *m;

	if ((error = soopt_getm(sopt, &m)) != 0) /* XXX */
	break;
	if ((error = soopt_mcopyin(sopt, m)) != 0) /* XXX */
	break;
	req = mtod(m, caddr_t);
	error = ipsec6_set_policy(in6p, optname, req,
	m->m_len, (sopt->sopt_td != NULL) ?
	sopt->sopt_td->td_ucred : NULL);
	m_freem(m);
	break;
	}
	#endif /* IPSEC */

	default:
	error = ENOPROTOOPT;
	break;
	}
	break;

	case SOPT_GET:
	switch (optname) {

	case IPV6_2292PKTOPTIONS:
	#ifdef IPV6_PKTOPTIONS
	case IPV6_PKTOPTIONS:
	#endif
	/*
	* RFC3542 (effectively) deprecated the
	* semantics of the 2292-style pktoptions.
	* Since it was not reliable in nature (i.e.,
	* applications had to expect the lack of some
	* information after all), it would make sense
	* to simplify this part by always returning
	* empty data.
	*/
	sopt->sopt_valsize = 0;
	break;

	case IPV6_RECVHOPOPTS:
	case IPV6_RECVDSTOPTS:
	case IPV6_RECVRTHDRDSTOPTS:
	case IPV6_UNICAST_HOPS:
	case IPV6_RECVPKTINFO:
	case IPV6_RECVHOPLIMIT:
	case IPV6_RECVRTHDR:
	case IPV6_RECVPATHMTU:

	case IPV6_FAITH:
	case IPV6_V6ONLY:
	case IPV6_PORTRANGE:
	case IPV6_RECVTCLASS:
	case IPV6_AUTOFLOWLABEL:
	switch (optname) {

	case IPV6_RECVHOPOPTS:
	optval = OPTBIT(IN6P_HOPOPTS);
	break;

	case IPV6_RECVDSTOPTS:
	optval = OPTBIT(IN6P_DSTOPTS);
	break;

	case IPV6_RECVRTHDRDSTOPTS:
	optval = OPTBIT(IN6P_RTHDRDSTOPTS);
	break;

	case IPV6_UNICAST_HOPS:
	optval = in6p->in6p_hops;
	break;

	case IPV6_RECVPKTINFO:
	optval = OPTBIT(IN6P_PKTINFO);
	break;

	case IPV6_RECVHOPLIMIT:
	optval = OPTBIT(IN6P_HOPLIMIT);
	break;

	case IPV6_RECVRTHDR:
	optval = OPTBIT(IN6P_RTHDR);
	break;

	case IPV6_RECVPATHMTU:
	optval = OPTBIT(IN6P_MTU);
	break;

	case IPV6_FAITH:
	optval = OPTBIT(IN6P_FAITH);
	break;

	case IPV6_V6ONLY:
	optval = OPTBIT(IN6P_IPV6_V6ONLY);
	break;

	case IPV6_PORTRANGE:
	{
	int flags;
	flags = in6p->in6p_flags;
	if (flags & IN6P_HIGHPORT)
	optval = IPV6_PORTRANGE_HIGH;
	else if (flags & IN6P_LOWPORT)
	optval = IPV6_PORTRANGE_LOW;
	else
	optval = 0;
	break;
	}
	case IPV6_RECVTCLASS:
	optval = OPTBIT(IN6P_TCLASS);
	break;

	case IPV6_AUTOFLOWLABEL:
	optval = OPTBIT(IN6P_AUTOFLOWLABEL);
	break;
	}
	if (error)
	break;
	error = sooptcopyout(sopt, &optval,
	sizeof optval);
	break;

	case IPV6_PATHMTU:
	{
	u_long pmtu = 0;
	struct ip6_mtuinfo mtuinfo;
	struct route_in6 sro;

	bzero(&sro, sizeof(sro));

	if (!(so->so_state & SS_ISCONNECTED))
	return (ENOTCONN);
	/*
	* XXX: we dot not consider the case of source
	* routing, or optional information to specify
	* the outgoing interface.
	*/
	error = ip6_getpmtu(&sro, NULL, NULL,
	&in6p->in6p_faddr, &pmtu, NULL);
	if (sro.ro_rt)
	RTFREE(sro.ro_rt);
	if (error)
	break;
	if (pmtu > IPV6_MAXPACKET)
	pmtu = IPV6_MAXPACKET;

	bzero(&mtuinfo, sizeof(mtuinfo));
	mtuinfo.ip6m_mtu = (u_int32_t)pmtu;
	optdata = (void *)&mtuinfo;
	optdatalen = sizeof(mtuinfo);
	error = sooptcopyout(sopt, optdata,
	optdatalen);
	break;
	}

	case IPV6_2292PKTINFO:
	case IPV6_2292HOPLIMIT:
	case IPV6_2292HOPOPTS:
	case IPV6_2292RTHDR:
	case IPV6_2292DSTOPTS:
	switch (optname) {
	case IPV6_2292PKTINFO:
	optval = OPTBIT(IN6P_PKTINFO);
	break;
	case IPV6_2292HOPLIMIT:
	optval = OPTBIT(IN6P_HOPLIMIT);
	break;
	case IPV6_2292HOPOPTS:
	optval = OPTBIT(IN6P_HOPOPTS);
	break;
	case IPV6_2292RTHDR:
	optval = OPTBIT(IN6P_RTHDR);
	break;
	case IPV6_2292DSTOPTS:
	optval = OPTBIT(IN6P_DSTOPTS\|IN6P_RTHDRDSTOPTS);
	break;
	}
	error = sooptcopyout(sopt, &optval,
	sizeof optval);
	break;
	case IPV6_PKTINFO:
	case IPV6_HOPOPTS:
	case IPV6_RTHDR:
	case IPV6_DSTOPTS:
	case IPV6_RTHDRDSTOPTS:
	case IPV6_NEXTHOP:
	case IPV6_TCLASS:
	case IPV6_DONTFRAG:
	case IPV6_USE_MIN_MTU:
	case IPV6_PREFER_TEMPADDR:
	error = ip6_getpcbopt(in6p->in6p_outputopts,
	optname, sopt);
	break;

	case IPV6_MULTICAST_IF:
	case IPV6_MULTICAST_HOPS:
	case IPV6_MULTICAST_LOOP:
	case IPV6_JOIN_GROUP:
	case IPV6_LEAVE_GROUP:
	{
	struct mbuf *m;
	error = ip6_getmoptions(sopt->sopt_name,
	in6p->in6p_moptions, &m);
	if (error == 0)
	error = sooptcopyout(sopt,
	mtod(m, char *), m->m_len);
	m_freem(m);
	}
	break;

	#ifdef IPSEC
	case IPV6_IPSEC_POLICY:
	{
	caddr_t req = NULL;
	size_t len = 0;
	struct mbuf *m = NULL;
	struct mbuf **mp = &m;
	size_t ovalsize = sopt->sopt_valsize;
	caddr_t oval = (caddr_t)sopt->sopt_val;

	error = soopt_getm(sopt, &m); /* XXX */
	if (error != 0)
	break;
	error = soopt_mcopyin(sopt, m); /* XXX */
	if (error != 0)
	break;
	sopt->sopt_valsize = ovalsize;
	sopt->sopt_val = oval;
	if (m) {
	req = mtod(m, caddr_t);
	len = m->m_len;
	}
	error = ipsec6_get_policy(in6p, req, len, mp);
	if (error == 0)
	error = soopt_mcopyout(sopt, m); /* XXX */
	if (error == 0 && m)
	m_freem(m);
	break;
	}
	#endif /* IPSEC */

	default:
	error = ENOPROTOOPT;
	break;
	}
	break;
	}
	} else { /* level != IPPROTO_IPV6 */
	error = EINVAL;
	}
	return (error);
	}

	int
	ip6_raw_ctloutput(struct socket so, struct sockopt sopt)
	{
	int error = 0, optval, optlen;
	const int icmp6off = offsetof(struct icmp6_hdr, icmp6_cksum);
	struct in6pcb *in6p = sotoin6pcb(so);
	int level, op, optname;

	level = sopt->sopt_level;
	op = sopt->sopt_dir;
	optname = sopt->sopt_name;
	optlen = sopt->sopt_valsize;

	if (level != IPPROTO_IPV6) {
	return (EINVAL);
	}

	switch (optname) {
	case IPV6_CHECKSUM:
	/*
	* For ICMPv6 sockets, no modification allowed for checksum
	* offset, permit "no change" values to help existing apps.
	*
	* RFC3542 says: "An attempt to set IPV6_CHECKSUM
	* for an ICMPv6 socket will fail."
	* The current behavior does not meet RFC3542.
	*/
	switch (op) {
	case SOPT_SET:
	if (optlen != sizeof(int)) {
	error = EINVAL;
	break;
	}
	error = sooptcopyin(sopt, &optval, sizeof(optval),
	sizeof(optval));
	if (error)
	break;
	if ((optval % 2) != 0) {
	/* the API assumes even offset values */
	error = EINVAL;
	} else if (so->so_proto->pr_protocol ==
	IPPROTO_ICMPV6) {
	if (optval != icmp6off)
	error = EINVAL;
	} else
	in6p->in6p_cksum = optval;
	break;

	case SOPT_GET:
	if (so->so_proto->pr_protocol == IPPROTO_ICMPV6)
	optval = icmp6off;
	else
	optval = in6p->in6p_cksum;

	error = sooptcopyout(sopt, &optval, sizeof(optval));
	break;

	default:
	error = EINVAL;
	break;
	}
	break;

	default:
	error = ENOPROTOOPT;
	break;
	}

	return (error);
	}

	/*
	* Set up IP6 options in pcb for insertion in output packets or
	* specifying behavior of outgoing packets.
	*/
	static int
	ip6_pcbopts(struct ip6_pktopts *pktopt, struct mbuf m,
	struct socket so, struct sockopt sopt)
	{
	struct ip6_pktopts opt = pktopt;
	int error = 0;
	struct thread *td = sopt->sopt_td;

	/* turn off any old options. */
	if (opt) {
	#ifdef DIAGNOSTIC
	if (opt->ip6po_pktinfo \|\| opt->ip6po_nexthop \|\|
	opt->ip6po_hbh \|\| opt->ip6po_dest1 \|\| opt->ip6po_dest2 \|\|
	opt->ip6po_rhinfo.ip6po_rhi_rthdr)
	printf("ip6_pcbopts: all specified options are cleared.\n");
	#endif
	ip6_clearpktopts(opt, -1);
	} else
	opt = malloc(sizeof(*opt), M_IP6OPT, M_WAITOK);
	*pktopt = NULL;

	if (!m \|\| m->m_len == 0) {
	/*
	* Only turning off any previous options, regardless of
	* whether the opt is just created or given.
	*/
	free(opt, M_IP6OPT);
	return (0);
	}

	/* set options specified by user. */
	if ((error = ip6_setpktopts(m, opt, NULL, (td != NULL) ?
	td->td_ucred : NULL, so->so_proto->pr_protocol)) != 0) {
	ip6_clearpktopts(opt, -1); /* XXX: discard all options */
	free(opt, M_IP6OPT);
	return (error);
	}
	*pktopt = opt;
	return (0);
	}

	/*
	* initialize ip6_pktopts. beware that there are non-zero default values in
	* the struct.
	*/
	void
	ip6_initpktopts(struct ip6_pktopts *opt)
	{

	bzero(opt, sizeof(*opt));
	opt->ip6po_hlim = -1; /* -1 means default hop limit */
	opt->ip6po_tclass = -1; /* -1 means default traffic class */
	opt->ip6po_minmtu = IP6PO_MINMTU_MCASTONLY;
	opt->ip6po_prefer_tempaddr = IP6PO_TEMPADDR_SYSTEM;
	}

	static int
	ip6_pcbopt(int optname, u_char buf, int len, struct ip6_pktopts *pktopt,
	struct ucred *cred, int uproto)
	{
	struct ip6_pktopts *opt;

	if (*pktopt == NULL) {
	*pktopt = malloc(sizeof(struct ip6_pktopts), M_IP6OPT,
	M_WAITOK);
	ip6_initpktopts(*pktopt);
	}
	opt = *pktopt;

	return (ip6_setpktopt(optname, buf, len, opt, cred, 1, 0, uproto));
	}

	static int
	ip6_getpcbopt(struct ip6_pktopts pktopt, int optname, struct sockopt sopt)
	{
	void *optdata = NULL;
	int optdatalen = 0;
	struct ip6_ext *ip6e;
	int error = 0;
	struct in6_pktinfo null_pktinfo;
	int deftclass = 0, on;
	int defminmtu = IP6PO_MINMTU_MCASTONLY;
	int defpreftemp = IP6PO_TEMPADDR_SYSTEM;

	switch (optname) {
	case IPV6_PKTINFO:
	if (pktopt && pktopt->ip6po_pktinfo)
	optdata = (void *)pktopt->ip6po_pktinfo;
	else {
	/* XXX: we don't have to do this every time... */
	bzero(&null_pktinfo, sizeof(null_pktinfo));
	optdata = (void *)&null_pktinfo;
	}
	optdatalen = sizeof(struct in6_pktinfo);
	break;
	case IPV6_TCLASS:
	if (pktopt && pktopt->ip6po_tclass >= 0)
	optdata = (void *)&pktopt->ip6po_tclass;
	else
	optdata = (void *)&deftclass;
	optdatalen = sizeof(int);
	break;
	case IPV6_HOPOPTS:
	if (pktopt && pktopt->ip6po_hbh) {
	optdata = (void *)pktopt->ip6po_hbh;
	ip6e = (struct ip6_ext *)pktopt->ip6po_hbh;
	optdatalen = (ip6e->ip6e_len + 1) << 3;
	}
	break;
	case IPV6_RTHDR:
	if (pktopt && pktopt->ip6po_rthdr) {
	optdata = (void *)pktopt->ip6po_rthdr;
	ip6e = (struct ip6_ext *)pktopt->ip6po_rthdr;
	optdatalen = (ip6e->ip6e_len + 1) << 3;
	}
	break;
	case IPV6_RTHDRDSTOPTS:
	if (pktopt && pktopt->ip6po_dest1) {
	optdata = (void *)pktopt->ip6po_dest1;
	ip6e = (struct ip6_ext *)pktopt->ip6po_dest1;
	optdatalen = (ip6e->ip6e_len + 1) << 3;
	}
	break;
	case IPV6_DSTOPTS:
	if (pktopt && pktopt->ip6po_dest2) {
	optdata = (void *)pktopt->ip6po_dest2;
	ip6e = (struct ip6_ext *)pktopt->ip6po_dest2;
	optdatalen = (ip6e->ip6e_len + 1) << 3;
	}
	break;
	case IPV6_NEXTHOP:
	if (pktopt && pktopt->ip6po_nexthop) {
	optdata = (void *)pktopt->ip6po_nexthop;
	optdatalen = pktopt->ip6po_nexthop->sa_len;
	}
	break;
	case IPV6_USE_MIN_MTU:
	if (pktopt)
	optdata = (void *)&pktopt->ip6po_minmtu;
	else
	optdata = (void *)&defminmtu;
	optdatalen = sizeof(int);
	break;
	case IPV6_DONTFRAG:
	if (pktopt && ((pktopt->ip6po_flags) & IP6PO_DONTFRAG))
	on = 1;
	else
	on = 0;
	optdata = (void *)&on;
	optdatalen = sizeof(on);
	break;
	case IPV6_PREFER_TEMPADDR:
	if (pktopt)
	optdata = (void *)&pktopt->ip6po_prefer_tempaddr;
	else
	optdata = (void *)&defpreftemp;
	optdatalen = sizeof(int);
	break;
	default: /* should not happen */
	#ifdef DIAGNOSTIC
	panic("ip6_getpcbopt: unexpected option\n");
	#endif
	return (ENOPROTOOPT);
	}

	error = sooptcopyout(sopt, optdata, optdatalen);

	return (error);
	}

	void
	ip6_clearpktopts(struct ip6_pktopts *pktopt, int optname)
	{
	if (pktopt == NULL)
	return;

	if (optname == -1 \|\| optname == IPV6_PKTINFO) {
	if (pktopt->ip6po_pktinfo)
	free(pktopt->ip6po_pktinfo, M_IP6OPT);
	pktopt->ip6po_pktinfo = NULL;
	}
	if (optname == -1 \|\| optname == IPV6_HOPLIMIT)
	pktopt->ip6po_hlim = -1;
	if (optname == -1 \|\| optname == IPV6_TCLASS)
	pktopt->ip6po_tclass = -1;
	if (optname == -1 \|\| optname == IPV6_NEXTHOP) {
	if (pktopt->ip6po_nextroute.ro_rt) {
	RTFREE(pktopt->ip6po_nextroute.ro_rt);
	pktopt->ip6po_nextroute.ro_rt = NULL;
	}
	if (pktopt->ip6po_nexthop)
	free(pktopt->ip6po_nexthop, M_IP6OPT);
	pktopt->ip6po_nexthop = NULL;
	}
	if (optname == -1 \|\| optname == IPV6_HOPOPTS) {
	if (pktopt->ip6po_hbh)
	free(pktopt->ip6po_hbh, M_IP6OPT);
	pktopt->ip6po_hbh = NULL;
	}
	if (optname == -1 \|\| optname == IPV6_RTHDRDSTOPTS) {
	if (pktopt->ip6po_dest1)
	free(pktopt->ip6po_dest1, M_IP6OPT);
	pktopt->ip6po_dest1 = NULL;
	}
	if (optname == -1 \|\| optname == IPV6_RTHDR) {
	if (pktopt->ip6po_rhinfo.ip6po_rhi_rthdr)
	free(pktopt->ip6po_rhinfo.ip6po_rhi_rthdr, M_IP6OPT);
	pktopt->ip6po_rhinfo.ip6po_rhi_rthdr = NULL;
	if (pktopt->ip6po_route.ro_rt) {
	RTFREE(pktopt->ip6po_route.ro_rt);
	pktopt->ip6po_route.ro_rt = NULL;
	}
	}
	if (optname == -1 \|\| optname == IPV6_DSTOPTS) {
	if (pktopt->ip6po_dest2)
	free(pktopt->ip6po_dest2, M_IP6OPT);
	pktopt->ip6po_dest2 = NULL;
	}
	}

	#define PKTOPT_EXTHDRCPY(type) \
	do {\
	if (src->type) {\
	int hlen = (((struct ip6_ext *)src->type)->ip6e_len + 1) << 3;\
	dst->type = malloc(hlen, M_IP6OPT, canwait);\
	if (dst->type == NULL && canwait == M_NOWAIT)\
	goto bad;\
	bcopy(src->type, dst->type, hlen);\
	}\
	} while (/CONSTCOND/ 0)

	static int
	copypktopts(struct ip6_pktopts dst, struct ip6_pktopts src, int canwait)
	{
	if (dst == NULL \|\| src == NULL) {
	printf("ip6_clearpktopts: invalid argument\n");
	return (EINVAL);
	}

	dst->ip6po_hlim = src->ip6po_hlim;
	dst->ip6po_tclass = src->ip6po_tclass;
	dst->ip6po_flags = src->ip6po_flags;
	if (src->ip6po_pktinfo) {
	dst->ip6po_pktinfo = malloc(sizeof(*dst->ip6po_pktinfo),
	M_IP6OPT, canwait);
	if (dst->ip6po_pktinfo == NULL)
	goto bad;
	dst->ip6po_pktinfo = src->ip6po_pktinfo;
	}
	if (src->ip6po_nexthop) {
	dst->ip6po_nexthop = malloc(src->ip6po_nexthop->sa_len,
	M_IP6OPT, canwait);
	if (dst->ip6po_nexthop == NULL)
	goto bad;
	bcopy(src->ip6po_nexthop, dst->ip6po_nexthop,
	src->ip6po_nexthop->sa_len);
	}
	PKTOPT_EXTHDRCPY(ip6po_hbh);
	PKTOPT_EXTHDRCPY(ip6po_dest1);
	PKTOPT_EXTHDRCPY(ip6po_dest2);
	PKTOPT_EXTHDRCPY(ip6po_rthdr); /* not copy the cached route */
	return (0);

	bad:
	ip6_clearpktopts(dst, -1);
	return (ENOBUFS);
	}
	#undef PKTOPT_EXTHDRCPY

	struct ip6_pktopts *
	ip6_copypktopts(struct ip6_pktopts *src, int canwait)
	{
	int error;
	struct ip6_pktopts *dst;

	dst = malloc(sizeof(*dst), M_IP6OPT, canwait);
	if (dst == NULL)
	return (NULL);
	ip6_initpktopts(dst);

	if ((error = copypktopts(dst, src, canwait)) != 0) {
	free(dst, M_IP6OPT);
	return (NULL);
	}

	return (dst);
	}

	void
	ip6_freepcbopts(struct ip6_pktopts *pktopt)
	{
	if (pktopt == NULL)
	return;

	ip6_clearpktopts(pktopt, -1);

	free(pktopt, M_IP6OPT);
	}

	/*
	* Set the IP6 multicast options in response to user setsockopt().
	*/
	static int
	ip6_setmoptions(int optname, struct ip6_moptions *im6op, struct mbuf m)
	{
	INIT_VNET_NET(curvnet);
	INIT_VNET_INET6(curvnet);
	int error = 0;
	u_int loop, ifindex;
	struct ipv6_mreq *mreq;
	struct ifnet *ifp;
	struct ip6_moptions im6o = im6op;
	struct route_in6 ro;
	struct in6_multi_mship *imm;

	if (im6o == NULL) {
	/*
	* No multicast option buffer attached to the pcb;
	* allocate one and initialize to default values.
	*/
	im6o = (struct ip6_moptions *)
	malloc(sizeof(*im6o), M_IP6MOPTS, M_WAITOK);

	if (im6o == NULL)
	return (ENOBUFS);
	*im6op = im6o;
	im6o->im6o_multicast_ifp = NULL;
	im6o->im6o_multicast_hlim = V_ip6_defmcasthlim;
	im6o->im6o_multicast_loop = IPV6_DEFAULT_MULTICAST_LOOP;
	LIST_INIT(&im6o->im6o_memberships);
	}

	switch (optname) {

	case IPV6_MULTICAST_IF:
	/*
	* Select the interface for outgoing multicast packets.
	*/
	if (m == NULL \|\| m->m_len != sizeof(u_int)) {
	error = EINVAL;
	break;
	}
	bcopy(mtod(m, u_int *), &ifindex, sizeof(ifindex));
	if (ifindex < 0 \|\| V_if_index < ifindex) {
	error = ENXIO; /* XXX EINVAL? */
	break;
	}
	ifp = ifnet_byindex(ifindex);
	if (ifp == NULL \|\| (ifp->if_flags & IFF_MULTICAST) == 0) {
	error = EADDRNOTAVAIL;
	break;
	}
	im6o->im6o_multicast_ifp = ifp;
	break;

	case IPV6_MULTICAST_HOPS:
	{
	/*
	* Set the IP6 hoplimit for outgoing multicast packets.
	*/
	int optval;
	if (m == NULL \|\| m->m_len != sizeof(int)) {
	error = EINVAL;
	break;
	}
	bcopy(mtod(m, u_int *), &optval, sizeof(optval));
	if (optval < -1 \|\| optval >= 256)
	error = EINVAL;
	else if (optval == -1)
	im6o->im6o_multicast_hlim = V_ip6_defmcasthlim;
	else
	im6o->im6o_multicast_hlim = optval;
	break;
	}

	case IPV6_MULTICAST_LOOP:
	/*
	* Set the loopback flag for outgoing multicast packets.
	* Must be zero or one.
	*/
	if (m == NULL \|\| m->m_len != sizeof(u_int)) {
	error = EINVAL;
	break;
	}
	bcopy(mtod(m, u_int *), &loop, sizeof(loop));
	if (loop > 1) {
	error = EINVAL;
	break;
	}
	im6o->im6o_multicast_loop = loop;
	break;

	case IPV6_JOIN_GROUP:
	/*
	* Add a multicast group membership.
	* Group must be a valid IP6 multicast address.
	*/
	if (m == NULL \|\| m->m_len != sizeof(struct ipv6_mreq)) {
	error = EINVAL;
	break;
	}
	mreq = mtod(m, struct ipv6_mreq *);

	if (IN6_IS_ADDR_UNSPECIFIED(&mreq->ipv6mr_multiaddr)) {
	/*
	* We use the unspecified address to specify to accept
	* all multicast addresses. Only super user is allowed
	* to do this.
	*/
	/* XXX-BZ might need a better PRIV_NETINET_x for this */
	error = priv_check(curthread, PRIV_NETINET_MROUTE);
	if (error)
	break;
	} else if (!IN6_IS_ADDR_MULTICAST(&mreq->ipv6mr_multiaddr)) {
	error = EINVAL;
	break;
	}

	/*
	* If no interface was explicitly specified, choose an
	* appropriate one according to the given multicast address.
	*/
	if (mreq->ipv6mr_interface == 0) {
	struct sockaddr_in6 *dst;

	/*
	* Look up the routing table for the
	* address, and choose the outgoing interface.
	* XXX: is it a good approach?
	*/
	ro.ro_rt = NULL;
	dst = (struct sockaddr_in6 *)&ro.ro_dst;
	bzero(dst, sizeof(*dst));
	dst->sin6_family = AF_INET6;
	dst->sin6_len = sizeof(*dst);
	dst->sin6_addr = mreq->ipv6mr_multiaddr;
	rtalloc((struct route *)&ro);
	if (ro.ro_rt == NULL) {
	error = EADDRNOTAVAIL;
	break;
	}
	ifp = ro.ro_rt->rt_ifp;
	RTFREE(ro.ro_rt);
	} else {
	/*
	* If the interface is specified, validate it.
	*/
	if (mreq->ipv6mr_interface < 0 \|\|
	V_if_index < mreq->ipv6mr_interface) {
	error = ENXIO; /* XXX EINVAL? */
	break;
	}
	ifp = ifnet_byindex(mreq->ipv6mr_interface);
	if (!ifp) {
	error = ENXIO; /* XXX EINVAL? */
	break;
	}
	}

	/*
	* See if we found an interface, and confirm that it
	* supports multicast
	*/
	if (ifp == NULL \|\| (ifp->if_flags & IFF_MULTICAST) == 0) {
	error = EADDRNOTAVAIL;
	break;
	}

	if (in6_setscope(&mreq->ipv6mr_multiaddr, ifp, NULL)) {
	error = EADDRNOTAVAIL; /* XXX: should not happen */
	break;
	}

	/*
	* See if the membership already exists.
	*/
	for (imm = im6o->im6o_memberships.lh_first;
	imm != NULL; imm = imm->i6mm_chain.le_next)
	if (imm->i6mm_maddr->in6m_ifp == ifp &&
	IN6_ARE_ADDR_EQUAL(&imm->i6mm_maddr->in6m_addr,
	&mreq->ipv6mr_multiaddr))
	break;
	if (imm != NULL) {
	error = EADDRINUSE;
	break;
	}
	/*
	* Everything looks good; add a new record to the multicast
	* address list for the given interface.
	*/
	imm = in6_joingroup(ifp, &mreq->ipv6mr_multiaddr, &error, 0);
	if (imm == NULL)
	break;
	LIST_INSERT_HEAD(&im6o->im6o_memberships, imm, i6mm_chain);
	break;

	case IPV6_LEAVE_GROUP:
	/*
	* Drop a multicast group membership.
	* Group must be a valid IP6 multicast address.
	*/
	if (m == NULL \|\| m->m_len != sizeof(struct ipv6_mreq)) {
	error = EINVAL;
	break;
	}
	mreq = mtod(m, struct ipv6_mreq *);

	/*
	* If an interface address was specified, get a pointer
	* to its ifnet structure.
	*/
	if (mreq->ipv6mr_interface < 0 \|\|
	V_if_index < mreq->ipv6mr_interface) {
	error = ENXIO; /* XXX EINVAL? */
	break;
	}
	if (mreq->ipv6mr_interface == 0)
	ifp = NULL;
	else
	ifp = ifnet_byindex(mreq->ipv6mr_interface);

	/* Fill in the scope zone ID */
	if (ifp) {
	if (in6_setscope(&mreq->ipv6mr_multiaddr, ifp, NULL)) {
	/* XXX: should not happen */
	error = EADDRNOTAVAIL;
	break;
	}
	} else if (mreq->ipv6mr_interface != 0) {
	/*
	* This case happens when the (positive) index is in
	* the valid range, but the corresponding interface has
	* been detached dynamically (XXX).
	*/
	error = EADDRNOTAVAIL;
	break;
	} else { /* ipv6mr_interface == 0 */
	struct sockaddr_in6 sa6_mc;

	/*
	* The API spec says as follows:
	* If the interface index is specified as 0, the
	* system may choose a multicast group membership to
	* drop by matching the multicast address only.
	* On the other hand, we cannot disambiguate the scope
	* zone unless an interface is provided. Thus, we
	* check if there's ambiguity with the default scope
	* zone as the last resort.
	*/
	bzero(&sa6_mc, sizeof(sa6_mc));
	sa6_mc.sin6_family = AF_INET6;
	sa6_mc.sin6_len = sizeof(sa6_mc);
	sa6_mc.sin6_addr = mreq->ipv6mr_multiaddr;
	error = sa6_embedscope(&sa6_mc, V_ip6_use_defzone);
	if (error != 0)
	break;
	mreq->ipv6mr_multiaddr = sa6_mc.sin6_addr;
	}

	/*
	* Find the membership in the membership list.
	*/
	for (imm = im6o->im6o_memberships.lh_first;
	imm != NULL; imm = imm->i6mm_chain.le_next) {
	if ((ifp == NULL \|\| imm->i6mm_maddr->in6m_ifp == ifp) &&
	IN6_ARE_ADDR_EQUAL(&imm->i6mm_maddr->in6m_addr,
	&mreq->ipv6mr_multiaddr))
	break;
	}
	if (imm == NULL) {
	/* Unable to resolve interface */
	error = EADDRNOTAVAIL;
	break;
	}
	/*
	* Give up the multicast address record to which the
	* membership points.
	*/
	LIST_REMOVE(imm, i6mm_chain);
	in6_delmulti(imm->i6mm_maddr);
	free(imm, M_IP6MADDR);
	break;

	default:
	error = EOPNOTSUPP;
	break;
	}

	/*
	* If all options have default values, no need to keep the mbuf.
	*/
	if (im6o->im6o_multicast_ifp == NULL &&
	im6o->im6o_multicast_hlim == V_ip6_defmcasthlim &&
	im6o->im6o_multicast_loop == IPV6_DEFAULT_MULTICAST_LOOP &&
	im6o->im6o_memberships.lh_first == NULL) {
	free(*im6op, M_IP6MOPTS);
	*im6op = NULL;
	}

	return (error);
	}

	/*
	* Return the IP6 multicast options in response to user getsockopt().
	*/
	static int
	ip6_getmoptions(int optname, struct ip6_moptions im6o, struct mbuf *mp)
	{
	INIT_VNET_INET6(curvnet);
	u_int hlim, loop, *ifindex;

	mp = m_get(M_WAIT, MT_HEADER); / XXX */

	switch (optname) {

	case IPV6_MULTICAST_IF:
	ifindex = mtod(mp, u_int );
	(*mp)->m_len = sizeof(u_int);
	if (im6o == NULL \|\| im6o->im6o_multicast_ifp == NULL)
	*ifindex = 0;
	else
	*ifindex = im6o->im6o_multicast_ifp->if_index;
	return (0);

	case IPV6_MULTICAST_HOPS:
	hlim = mtod(mp, u_int );
	(*mp)->m_len = sizeof(u_int);
	if (im6o == NULL)
	*hlim = V_ip6_defmcasthlim;
	else
	*hlim = im6o->im6o_multicast_hlim;
	return (0);

	case IPV6_MULTICAST_LOOP:
	loop = mtod(mp, u_int );
	(*mp)->m_len = sizeof(u_int);
	if (im6o == NULL)
	*loop = V_ip6_defmcasthlim;
	else
	*loop = im6o->im6o_multicast_loop;
	return (0);

	default:
	return (EOPNOTSUPP);
	}
	}

	/*
	* Discard the IP6 multicast options.
	*/
	void
	ip6_freemoptions(struct ip6_moptions *im6o)
	{
	struct in6_multi_mship *imm;

	if (im6o == NULL)
	return;

	while ((imm = im6o->im6o_memberships.lh_first) != NULL) {
	LIST_REMOVE(imm, i6mm_chain);
	if (imm->i6mm_maddr)
	in6_delmulti(imm->i6mm_maddr);
	free(imm, M_IP6MADDR);
	}
	free(im6o, M_IP6MOPTS);
	}

	/*
	* Set IPv6 outgoing packet options based on advanced API.
	*/
	int
	ip6_setpktopts(struct mbuf control, struct ip6_pktopts opt,
	struct ip6_pktopts stickyopt, struct ucred cred, int uproto)
	{
	struct cmsghdr *cm = 0;

	if (control == NULL \|\| opt == NULL)
	return (EINVAL);

	ip6_initpktopts(opt);
	if (stickyopt) {
	int error;

	/*
	* If stickyopt is provided, make a local copy of the options
	* for this particular packet, then override them by ancillary
	* objects.
	* XXX: copypktopts() does not copy the cached route to a next
	* hop (if any). This is not very good in terms of efficiency,
	* but we can allow this since this option should be rarely
	* used.
	*/
	if ((error = copypktopts(opt, stickyopt, M_NOWAIT)) != 0)
	return (error);
	}

	/*
	* XXX: Currently, we assume all the optional information is stored
	* in a single mbuf.
	*/
	if (control->m_next)
	return (EINVAL);

	for (; control->m_len > 0; control->m_data += CMSG_ALIGN(cm->cmsg_len),
	control->m_len -= CMSG_ALIGN(cm->cmsg_len)) {
	int error;

	if (control->m_len < CMSG_LEN(0))
	return (EINVAL);

	cm = mtod(control, struct cmsghdr *);
	if (cm->cmsg_len == 0 \|\| cm->cmsg_len > control->m_len)
	return (EINVAL);
	if (cm->cmsg_level != IPPROTO_IPV6)
	continue;

	error = ip6_setpktopt(cm->cmsg_type, CMSG_DATA(cm),
	cm->cmsg_len - CMSG_LEN(0), opt, cred, 0, 1, uproto);
	if (error)
	return (error);
	}

	return (0);
	}

	/*
	* Set a particular packet option, as a sticky option or an ancillary data
	* item. "len" can be 0 only when it's a sticky option.
	* We have 4 cases of combination of "sticky" and "cmsg":
	* "sticky=0, cmsg=0": impossible
	* "sticky=0, cmsg=1": RFC2292 or RFC3542 ancillary data
	* "sticky=1, cmsg=0": RFC3542 socket option
	* "sticky=1, cmsg=1": RFC2292 socket option
	*/
	static int
	ip6_setpktopt(int optname, u_char buf, int len, struct ip6_pktopts opt,
	struct ucred *cred, int sticky, int cmsg, int uproto)
	{
	INIT_VNET_NET(curvnet);
	INIT_VNET_INET6(curvnet);
	int minmtupolicy, preftemp;
	int error;

	if (!sticky && !cmsg) {
	#ifdef DIAGNOSTIC
	printf("ip6_setpktopt: impossible case\n");
	#endif
	return (EINVAL);
	}

	/*
	* IPV6_2292xxx is for backward compatibility to RFC2292, and should
	* not be specified in the context of RFC3542. Conversely,
	* RFC3542 types should not be specified in the context of RFC2292.
	*/
	if (!cmsg) {
	switch (optname) {
	case IPV6_2292PKTINFO:
	case IPV6_2292HOPLIMIT:
	case IPV6_2292NEXTHOP:
	case IPV6_2292HOPOPTS:
	case IPV6_2292DSTOPTS:
	case IPV6_2292RTHDR:
	case IPV6_2292PKTOPTIONS:
	return (ENOPROTOOPT);
	}
	}
	if (sticky && cmsg) {
	switch (optname) {
	case IPV6_PKTINFO:
	case IPV6_HOPLIMIT:
	case IPV6_NEXTHOP:
	case IPV6_HOPOPTS:
	case IPV6_DSTOPTS:
	case IPV6_RTHDRDSTOPTS:
	case IPV6_RTHDR:
	case IPV6_USE_MIN_MTU:
	case IPV6_DONTFRAG:
	case IPV6_TCLASS:
	case IPV6_PREFER_TEMPADDR: /* XXX: not an RFC3542 option */
	return (ENOPROTOOPT);
	}
	}

	switch (optname) {
	case IPV6_2292PKTINFO:
	case IPV6_PKTINFO:
	{
	struct ifnet *ifp = NULL;
	struct in6_pktinfo *pktinfo;

	if (len != sizeof(struct in6_pktinfo))
	return (EINVAL);

	pktinfo = (struct in6_pktinfo *)buf;

	/*
	* An application can clear any sticky IPV6_PKTINFO option by
	* doing a "regular" setsockopt with ipi6_addr being
	* in6addr_any and ipi6_ifindex being zero.
	* [RFC 3542, Section 6]
	*/
	if (optname == IPV6_PKTINFO && opt->ip6po_pktinfo &&
	pktinfo->ipi6_ifindex == 0 &&
	IN6_IS_ADDR_UNSPECIFIED(&pktinfo->ipi6_addr)) {
	ip6_clearpktopts(opt, optname);
	break;
	}

	if (uproto == IPPROTO_TCP && optname == IPV6_PKTINFO &&
	sticky && !IN6_IS_ADDR_UNSPECIFIED(&pktinfo->ipi6_addr)) {
	return (EINVAL);
	}

	/* validate the interface index if specified. */
	if (pktinfo->ipi6_ifindex > V_if_index \|\|
	pktinfo->ipi6_ifindex < 0) {
	return (ENXIO);
	}
	if (pktinfo->ipi6_ifindex) {
	ifp = ifnet_byindex(pktinfo->ipi6_ifindex);
	if (ifp == NULL)
	return (ENXIO);
	}

	/*
	* We store the address anyway, and let in6_selectsrc()
	* validate the specified address. This is because ipi6_addr
	* may not have enough information about its scope zone, and
	* we may need additional information (such as outgoing
	* interface or the scope zone of a destination address) to
	* disambiguate the scope.
	* XXX: the delay of the validation may confuse the
	* application when it is used as a sticky option.
	*/
	if (opt->ip6po_pktinfo == NULL) {
	opt->ip6po_pktinfo = malloc(sizeof(*pktinfo),
	M_IP6OPT, M_NOWAIT);
	if (opt->ip6po_pktinfo == NULL)
	return (ENOBUFS);
	}
	bcopy(pktinfo, opt->ip6po_pktinfo, sizeof(*pktinfo));
	break;
	}

	case IPV6_2292HOPLIMIT:
	case IPV6_HOPLIMIT:
	{
	int *hlimp;

	/*
	* RFC 3542 deprecated the usage of sticky IPV6_HOPLIMIT
	* to simplify the ordering among hoplimit options.
	*/
	if (optname == IPV6_HOPLIMIT && sticky)
	return (ENOPROTOOPT);

	if (len != sizeof(int))
	return (EINVAL);
	hlimp = (int *)buf;
	if (hlimp < -1 \|\| hlimp > 255)
	return (EINVAL);

	opt->ip6po_hlim = *hlimp;
	break;
	}

	case IPV6_TCLASS:
	{
	int tclass;

	if (len != sizeof(int))
	return (EINVAL);
	tclass = (int )buf;
	if (tclass < -1 \|\| tclass > 255)
	return (EINVAL);

	opt->ip6po_tclass = tclass;
	break;
	}

	case IPV6_2292NEXTHOP:
	case IPV6_NEXTHOP:
	if (cred != NULL) {
	error = priv_check_cred(cred,
	PRIV_NETINET_SETHDROPTS, 0);
	if (error)
	return (error);
	}

	if (len == 0) { /* just remove the option */
	ip6_clearpktopts(opt, IPV6_NEXTHOP);
	break;
	}

	/* check if cmsg_len is large enough for sa_len */
	if (len < sizeof(struct sockaddr) \|\| len < *buf)
	return (EINVAL);

	switch (((struct sockaddr *)buf)->sa_family) {
	case AF_INET6:
	{
	struct sockaddr_in6 sa6 = (struct sockaddr_in6 )buf;
	int error;

	if (sa6->sin6_len != sizeof(struct sockaddr_in6))
	return (EINVAL);

	if (IN6_IS_ADDR_UNSPECIFIED(&sa6->sin6_addr) \|\|
	IN6_IS_ADDR_MULTICAST(&sa6->sin6_addr)) {
	return (EINVAL);
	}
	if ((error = sa6_embedscope(sa6, V_ip6_use_defzone))
	!= 0) {
	return (error);
	}
	break;
	}
	case AF_LINK: /* should eventually be supported */
	default:
	return (EAFNOSUPPORT);
	}

	/* turn off the previous option, then set the new option. */
	ip6_clearpktopts(opt, IPV6_NEXTHOP);
	opt->ip6po_nexthop = malloc(*buf, M_IP6OPT, M_NOWAIT);
	if (opt->ip6po_nexthop == NULL)
	return (ENOBUFS);
	bcopy(buf, opt->ip6po_nexthop, *buf);
	break;

	case IPV6_2292HOPOPTS:
	case IPV6_HOPOPTS:
	{
	struct ip6_hbh *hbh;
	int hbhlen;

	/*
	* XXX: We don't allow a non-privileged user to set ANY HbH
	* options, since per-option restriction has too much
	* overhead.
	*/
	if (cred != NULL) {
	error = priv_check_cred(cred,
	PRIV_NETINET_SETHDROPTS, 0);
	if (error)
	return (error);
	}

	if (len == 0) {
	ip6_clearpktopts(opt, IPV6_HOPOPTS);
	break; /* just remove the option */
	}

	/* message length validation */
	if (len < sizeof(struct ip6_hbh))
	return (EINVAL);
	hbh = (struct ip6_hbh *)buf;
	hbhlen = (hbh->ip6h_len + 1) << 3;
	if (len != hbhlen)
	return (EINVAL);

	/* turn off the previous option, then set the new option. */
	ip6_clearpktopts(opt, IPV6_HOPOPTS);
	opt->ip6po_hbh = malloc(hbhlen, M_IP6OPT, M_NOWAIT);
	if (opt->ip6po_hbh == NULL)
	return (ENOBUFS);
	bcopy(hbh, opt->ip6po_hbh, hbhlen);

	break;
	}

	case IPV6_2292DSTOPTS:
	case IPV6_DSTOPTS:
	case IPV6_RTHDRDSTOPTS:
	{
	struct ip6_dest dest, *newdest = NULL;
	int destlen;

	if (cred != NULL) { /* XXX: see the comment for IPV6_HOPOPTS */
	error = priv_check_cred(cred,
	PRIV_NETINET_SETHDROPTS, 0);
	if (error)
	return (error);
	}

	if (len == 0) {
	ip6_clearpktopts(opt, optname);
	break; /* just remove the option */
	}

	/* message length validation */
	if (len < sizeof(struct ip6_dest))
	return (EINVAL);
	dest = (struct ip6_dest *)buf;
	destlen = (dest->ip6d_len + 1) << 3;
	if (len != destlen)
	return (EINVAL);

	/*
	* Determine the position that the destination options header
	* should be inserted; before or after the routing header.
	*/
	switch (optname) {
	case IPV6_2292DSTOPTS:
	/*
	* The old advacned API is ambiguous on this point.
	* Our approach is to determine the position based
	* according to the existence of a routing header.
	* Note, however, that this depends on the order of the
	* extension headers in the ancillary data; the 1st
	* part of the destination options header must appear
	* before the routing header in the ancillary data,
	* too.
	* RFC3542 solved the ambiguity by introducing
	* separate ancillary data or option types.
	*/
	if (opt->ip6po_rthdr == NULL)
	newdest = &opt->ip6po_dest1;
	else
	newdest = &opt->ip6po_dest2;
	break;
	case IPV6_RTHDRDSTOPTS:
	newdest = &opt->ip6po_dest1;
	break;
	case IPV6_DSTOPTS:
	newdest = &opt->ip6po_dest2;
	break;
	}

	/* turn off the previous option, then set the new option. */
	ip6_clearpktopts(opt, optname);
	*newdest = malloc(destlen, M_IP6OPT, M_NOWAIT);
	if (*newdest == NULL)
	return (ENOBUFS);
	bcopy(dest, *newdest, destlen);

	break;
	}

	case IPV6_2292RTHDR:
	case IPV6_RTHDR:
	{
	struct ip6_rthdr *rth;
	int rthlen;

	if (len == 0) {
	ip6_clearpktopts(opt, IPV6_RTHDR);
	break; /* just remove the option */
	}

	/* message length validation */
	if (len < sizeof(struct ip6_rthdr))
	return (EINVAL);
	rth = (struct ip6_rthdr *)buf;
	rthlen = (rth->ip6r_len + 1) << 3;
	if (len != rthlen)
	return (EINVAL);

	switch (rth->ip6r_type) {
	case IPV6_RTHDR_TYPE_0:
	if (rth->ip6r_len == 0) /* must contain one addr */
	return (EINVAL);
	if (rth->ip6r_len % 2) /* length must be even */
	return (EINVAL);
	if (rth->ip6r_len / 2 != rth->ip6r_segleft)
	return (EINVAL);
	break;
	default:
	return (EINVAL); /* not supported */
	}

	/* turn off the previous option */
	ip6_clearpktopts(opt, IPV6_RTHDR);
	opt->ip6po_rthdr = malloc(rthlen, M_IP6OPT, M_NOWAIT);
	if (opt->ip6po_rthdr == NULL)
	return (ENOBUFS);
	bcopy(rth, opt->ip6po_rthdr, rthlen);

	break;
	}

	case IPV6_USE_MIN_MTU:
	if (len != sizeof(int))
	return (EINVAL);
	minmtupolicy = (int )buf;
	if (minmtupolicy != IP6PO_MINMTU_MCASTONLY &&
	minmtupolicy != IP6PO_MINMTU_DISABLE &&
	minmtupolicy != IP6PO_MINMTU_ALL) {
	return (EINVAL);
	}
	opt->ip6po_minmtu = minmtupolicy;
	break;

	case IPV6_DONTFRAG:
	if (len != sizeof(int))
	return (EINVAL);

	if (uproto == IPPROTO_TCP \|\| (int )buf == 0) {
	/*
	* we ignore this option for TCP sockets.
	* (RFC3542 leaves this case unspecified.)
	*/
	opt->ip6po_flags &= ~IP6PO_DONTFRAG;
	} else
	opt->ip6po_flags \|= IP6PO_DONTFRAG;
	break;

	case IPV6_PREFER_TEMPADDR:
	if (len != sizeof(int))
	return (EINVAL);
	preftemp = (int )buf;
	if (preftemp != IP6PO_TEMPADDR_SYSTEM &&
	preftemp != IP6PO_TEMPADDR_NOTPREFER &&
	preftemp != IP6PO_TEMPADDR_PREFER) {
	return (EINVAL);
	}
	opt->ip6po_prefer_tempaddr = preftemp;
	break;

	default:
	return (ENOPROTOOPT);
	} /* end of switch */

	return (0);
	}

	/*
	* Routine called from ip6_output() to loop back a copy of an IP6 multicast
	* packet to the input queue of a specified interface. Note that this
	* calls the output routine of the loopback "driver", but with an interface
	* pointer that might NOT be &loif -- easier than replicating that code here.
	*/
	void
	ip6_mloopback(struct ifnet ifp, struct mbuf m, struct sockaddr_in6 *dst)
	{
	struct mbuf *copym;
	struct ip6_hdr *ip6;

	copym = m_copy(m, 0, M_COPYALL);
	if (copym == NULL)
	return;

	/*
	* Make sure to deep-copy IPv6 header portion in case the data
	* is in an mbuf cluster, so that we can safely override the IPv6
	* header portion later.
	*/
	if ((copym->m_flags & M_EXT) != 0 \|\|
	copym->m_len < sizeof(struct ip6_hdr)) {
	copym = m_pullup(copym, sizeof(struct ip6_hdr));
	if (copym == NULL)
	return;
	}

	#ifdef DIAGNOSTIC
	if (copym->m_len < sizeof(*ip6)) {
	m_freem(copym);
	return;
	}
	#endif

	ip6 = mtod(copym, struct ip6_hdr *);
	/*
	* clear embedded scope identifiers if necessary.
	* in6_clearscope will touch the addresses only when necessary.
	*/
	in6_clearscope(&ip6->ip6_src);
	in6_clearscope(&ip6->ip6_dst);

	(void)if_simloop(ifp, copym, dst->sin6_family, 0);
	}

	/*
	* Chop IPv6 header off from the payload.
	*/
	static int
	ip6_splithdr(struct mbuf m, struct ip6_exthdrs exthdrs)
	{
	struct mbuf *mh;
	struct ip6_hdr *ip6;

	ip6 = mtod(m, struct ip6_hdr *);
	if (m->m_len > sizeof(*ip6)) {
	MGETHDR(mh, M_DONTWAIT, MT_HEADER);
	if (mh == 0) {
	m_freem(m);
	return ENOBUFS;
	}
	M_MOVE_PKTHDR(mh, m);
	MH_ALIGN(mh, sizeof(*ip6));
	m->m_len -= sizeof(*ip6);
	m->m_data += sizeof(*ip6);
	mh->m_next = m;
	m = mh;
	m->m_len = sizeof(*ip6);
	bcopy((caddr_t)ip6, mtod(m, caddr_t), sizeof(*ip6));
	}
	exthdrs->ip6e_ip6 = m;
	return 0;
	}

	/*
	* Compute IPv6 extension header length.
	*/
	int
	ip6_optlen(struct in6pcb *in6p)
	{
	int len;

	if (!in6p->in6p_outputopts)
	return 0;

	len = 0;
	#define elen(x) \
	(((struct ip6_ext )(x)) ? (((struct ip6_ext )(x))->ip6e_len + 1) << 3 : 0)

	len += elen(in6p->in6p_outputopts->ip6po_hbh);
	if (in6p->in6p_outputopts->ip6po_rthdr)
	/* dest1 is valid with rthdr only */
	len += elen(in6p->in6p_outputopts->ip6po_dest1);
	len += elen(in6p->in6p_outputopts->ip6po_rthdr);
	len += elen(in6p->in6p_outputopts->ip6po_dest2);
	return len;
	#undef elen
	}
	Index: projects/arpv2_merge_1/sys/netinet6/nd6.c
	===================================================================
	--- projects/arpv2_merge_1/sys/netinet6/nd6.c (revision 185838)
	+++ projects/arpv2_merge_1/sys/netinet6/nd6.c (revision 185839)
	@@ -1,2015 +1,2102 @@
	/*-
	* Copyright (C) 1995, 1996, 1997, and 1998 WIDE Project.
	* All rights reserved.
	*
	* Redistribution and use in source and binary forms, with or without
	* modification, are permitted provided that the following conditions
	* are met:
	* 1. Redistributions of source code must retain the above copyright
	* notice, this list of conditions and the following disclaimer.
	* 2. Redistributions in binary form must reproduce the above copyright
	* notice, this list of conditions and the following disclaimer in the
	* documentation and/or other materials provided with the distribution.
	* 3. Neither the name of the project nor the names of its contributors
	* may be used to endorse or promote products derived from this software
	* without specific prior written permission.
	*
	* THIS SOFTWARE IS PROVIDED BY THE PROJECT AND CONTRIBUTORS ``AS IS'' AND
	* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
	* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
	* ARE DISCLAIMED. IN NO EVENT SHALL THE PROJECT OR CONTRIBUTORS BE LIABLE
	* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
	* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
	* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
	* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
	* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
	* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
	* SUCH DAMAGE.
	*
	* $KAME: nd6.c,v 1.144 2001/05/24 07:44:00 itojun Exp $
	*/

	#include <sys/cdefs.h>
	__FBSDID("$FreeBSD$");

	#include "opt_inet.h"
	#include "opt_inet6.h"
	#include "opt_mac.h"

	#include <sys/param.h>
	#include <sys/systm.h>
	#include <sys/callout.h>
	#include <sys/malloc.h>
	#include <sys/mbuf.h>
	#include <sys/socket.h>
	#include <sys/sockio.h>
	#include <sys/time.h>
	#include <sys/kernel.h>
	#include <sys/protosw.h>
	#include <sys/errno.h>
	#include <sys/syslog.h>
	+#include <sys/lock.h>
	+#include <sys/rwlock.h>
	#include <sys/queue.h>
	#include <sys/sysctl.h>

	#include <net/if.h>
	#include <net/if_arc.h>
	#include <net/if_dl.h>
	#include <net/if_types.h>
	#include <net/iso88025.h>
	#include <net/fddi.h>
	#include <net/route.h>
	#include <net/vnet.h>

	#include <netinet/in.h>
	#include <net/if_llatbl.h>
	#define L3_ADDR_SIN6(le) ((struct sockaddr_in6 *) L3_ADDR(le))
	#include <netinet/if_ether.h>
	#include <netinet6/in6_var.h>
	#include <netinet/ip6.h>
	#include <netinet6/ip6_var.h>
	#include <netinet6/scope6_var.h>
	#include <netinet6/nd6.h>
	#include <netinet/icmp6.h>
	#include <netinet6/vinet6.h>

	#include <sys/limits.h>
	#include <sys/vimage.h>

	#include <security/mac/mac_framework.h>

	#define ND6_SLOWTIMER_INTERVAL (60 * 60) /* 1 hour */
	#define ND6_RECALC_REACHTM_INTERVAL (60 * 120) /* 2 hours */

	#define SIN6(s) ((struct sockaddr_in6 *)s)
	#define SDL(s) ((struct sockaddr_dl *)s)

	#ifdef VIMAGE_GLOBALS
	int nd6_prune;
	int nd6_delay;
	int nd6_umaxtries;
	int nd6_mmaxtries;
	int nd6_useloopback;
	int nd6_gctimer;

	/* preventing too many loops in ND option parsing */
	int nd6_maxndopt;

	int nd6_maxnudhint;
	int nd6_maxqueuelen;

	int nd6_debug;

	+/* for debugging? */
	+#if 0
	+static int nd6_inuse, nd6_allocated;
	+#endif
	+
	struct nd_drhead nd_defrouter;
	struct nd_prhead nd_prefix;

	int nd6_recalc_reachtm_interval;
	#endif /* VIMAGE_GLOBALS */

	static struct sockaddr_in6 all1_sa;

	static int nd6_is_new_addr_neighbor __P((struct sockaddr_in6 *,
	struct ifnet *));
	static void nd6_setmtu0(struct ifnet , struct nd_ifinfo );
	static void nd6_slowtimo(void *);
	static int regen_tmpaddr(struct in6_ifaddr *);
	static struct llentry nd6_free(struct llentry , int);
	static void nd6_llinfo_timer(void *);
	static void clear_llinfo_pqueue(struct llentry *);

	#ifdef VIMAGE_GLOBALS
	struct callout nd6_slowtimo_ch;
	struct callout nd6_timer_ch;
	extern struct callout in6_tmpaddrtimer_ch;
	extern int dad_ignore_ns;
	extern int dad_maxtry;
	#endif

	void
	nd6_init(void)
	{
	INIT_VNET_INET6(curvnet);
	static int nd6_init_done = 0;
	int i;

	if (nd6_init_done) {
	log(LOG_NOTICE, "nd6_init called more than once(ignored)\n");
	return;
	}

	V_nd6_prune = 1; /* walk list every 1 seconds */
	V_nd6_delay = 5; /* delay first probe time 5 second */
	V_nd6_umaxtries = 3; /* maximum unicast query */
	V_nd6_mmaxtries = 3; /* maximum multicast query */
	V_nd6_useloopback = 1; /* use loopback interface for local traffic */
	V_nd6_gctimer = (60 * 60 * 24); /* 1 day: garbage collection timer */

	/* preventing too many loops in ND option parsing */
	V_nd6_maxndopt = 10; /* max # of ND options allowed */

	V_nd6_maxnudhint = 0; /* max # of subsequent upper layer hints */
	V_nd6_maxqueuelen = 1; /* max pkts cached in unresolved ND entries */

	#ifdef ND6_DEBUG
	V_nd6_debug = 1;
	#else
	V_nd6_debug = 0;
	#endif

	V_nd6_recalc_reachtm_interval = ND6_RECALC_REACHTM_INTERVAL;

	V_dad_ignore_ns = 0; /* ignore NS in DAD - specwise incorrect*/
	V_dad_maxtry = 15; /* max # of tries to transmit DAD packet */

	+ /*
	+ * XXX just to get this to compile KMM
	+ */
	+#ifdef notyet
	+ V_llinfo_nd6.ln_next = &V_llinfo_nd6;
	+ V_llinfo_nd6.ln_prev = &V_llinfo_nd6;
	+#endif
	LIST_INIT(&V_nd_prefix);

	ip6_use_tempaddr = 0;
	ip6_temp_preferred_lifetime = DEF_TEMP_PREFERRED_LIFETIME;
	ip6_temp_valid_lifetime = DEF_TEMP_VALID_LIFETIME;
	ip6_temp_regen_advance = TEMPADDR_REGEN_ADVANCE;

	all1_sa.sin6_family = AF_INET6;
	all1_sa.sin6_len = sizeof(struct sockaddr_in6);
	for (i = 0; i < sizeof(all1_sa.sin6_addr); i++)
	all1_sa.sin6_addr.s6_addr[i] = 0xff;

	/* initialization of the default router list */
	TAILQ_INIT(&V_nd_defrouter);
	/* start timer */
	callout_init(&V_nd6_slowtimo_ch, 0);
	callout_reset(&V_nd6_slowtimo_ch, ND6_SLOWTIMER_INTERVAL * hz,
	nd6_slowtimo, NULL);

	nd6_init_done = 1;

	}

	struct nd_ifinfo *
	nd6_ifattach(struct ifnet *ifp)
	{
	struct nd_ifinfo *nd;

	nd = (struct nd_ifinfo )malloc(sizeof(nd), M_IP6NDP, M_WAITOK);
	bzero(nd, sizeof(*nd));

	nd->initialized = 1;

	nd->chlim = IPV6_DEFHLIM;
	nd->basereachable = REACHABLE_TIME;
	nd->reachable = ND_COMPUTE_RTIME(nd->basereachable);
	nd->retrans = RETRANS_TIMER;
	/*
	* Note that the default value of ip6_accept_rtadv is 0, which means
	* we won't accept RAs by default even if we set ND6_IFF_ACCEPT_RTADV
	* here.
	*/
	nd->flags = (ND6_IFF_PERFORMNUD \| ND6_IFF_ACCEPT_RTADV);

	/* XXX: we cannot call nd6_setmtu since ifp is not fully initialized */
	nd6_setmtu0(ifp, nd);

	return nd;
	}

	void
	nd6_ifdetach(struct nd_ifinfo *nd)
	{

	free(nd, M_IP6NDP);
	}

	/*
	* Reset ND level link MTU. This function is called when the physical MTU
	* changes, which means we might have to adjust the ND level MTU.
	*/
	void
	nd6_setmtu(struct ifnet *ifp)
	{

	nd6_setmtu0(ifp, ND_IFINFO(ifp));
	}

	/* XXX todo: do not maintain copy of ifp->if_mtu in ndi->maxmtu */
	void
	nd6_setmtu0(struct ifnet ifp, struct nd_ifinfo ndi)
	{
	INIT_VNET_INET6(ifp->if_vnet);
	u_int32_t omaxmtu;

	omaxmtu = ndi->maxmtu;

	switch (ifp->if_type) {
	case IFT_ARCNET:
	ndi->maxmtu = MIN(ARC_PHDS_MAXMTU, ifp->if_mtu); /* RFC2497 */
	break;
	case IFT_FDDI:
	ndi->maxmtu = MIN(FDDIIPMTU, ifp->if_mtu); /* RFC2467 */
	break;
	case IFT_ISO88025:
	ndi->maxmtu = MIN(ISO88025_MAX_MTU, ifp->if_mtu);
	break;
	default:
	ndi->maxmtu = ifp->if_mtu;
	break;
	}

	/*
	* Decreasing the interface MTU under IPV6 minimum MTU may cause
	* undesirable situation. We thus notify the operator of the change
	* explicitly. The check for omaxmtu is necessary to restrict the
	* log to the case of changing the MTU, not initializing it.
	*/
	if (omaxmtu >= IPV6_MMTU && ndi->maxmtu < IPV6_MMTU) {
	log(LOG_NOTICE, "nd6_setmtu0: "
	"new link MTU on %s (%lu) is too small for IPv6\n",
	if_name(ifp), (unsigned long)ndi->maxmtu);
	}

	if (ndi->maxmtu > V_in6_maxmtu)
	in6_setmaxmtu(); /* check all interfaces just in case */

	#undef MIN
	}

	void
	nd6_option_init(void opt, int icmp6len, union nd_opts ndopts)
	{

	bzero(ndopts, sizeof(*ndopts));
	ndopts->nd_opts_search = (struct nd_opt_hdr *)opt;
	ndopts->nd_opts_last
	= (struct nd_opt_hdr )(((u_char )opt) + icmp6len);

	if (icmp6len == 0) {
	ndopts->nd_opts_done = 1;
	ndopts->nd_opts_search = NULL;
	}
	}

	/*
	* Take one ND option.
	*/
	struct nd_opt_hdr *
	nd6_option(union nd_opts *ndopts)
	{
	struct nd_opt_hdr *nd_opt;
	int olen;

	if (ndopts == NULL)
	panic("ndopts == NULL in nd6_option");
	if (ndopts->nd_opts_last == NULL)
	panic("uninitialized ndopts in nd6_option");
	if (ndopts->nd_opts_search == NULL)
	return NULL;
	if (ndopts->nd_opts_done)
	return NULL;

	nd_opt = ndopts->nd_opts_search;

	/* make sure nd_opt_len is inside the buffer */
	if ((caddr_t)&nd_opt->nd_opt_len >= (caddr_t)ndopts->nd_opts_last) {
	bzero(ndopts, sizeof(*ndopts));
	return NULL;
	}

	olen = nd_opt->nd_opt_len << 3;
	if (olen == 0) {
	/*
	* Message validation requires that all included
	* options have a length that is greater than zero.
	*/
	bzero(ndopts, sizeof(*ndopts));
	return NULL;
	}

	ndopts->nd_opts_search = (struct nd_opt_hdr *)((caddr_t)nd_opt + olen);
	if (ndopts->nd_opts_search > ndopts->nd_opts_last) {
	/* option overruns the end of buffer, invalid */
	bzero(ndopts, sizeof(*ndopts));
	return NULL;
	} else if (ndopts->nd_opts_search == ndopts->nd_opts_last) {
	/* reached the end of options chain */
	ndopts->nd_opts_done = 1;
	ndopts->nd_opts_search = NULL;
	}
	return nd_opt;
	}

	/*
	* Parse multiple ND options.
	* This function is much easier to use, for ND routines that do not need
	* multiple options of the same type.
	*/
	int
	nd6_options(union nd_opts *ndopts)
	{
	INIT_VNET_INET6(curvnet);
	struct nd_opt_hdr *nd_opt;
	int i = 0;

	if (ndopts == NULL)
	panic("ndopts == NULL in nd6_options");
	if (ndopts->nd_opts_last == NULL)
	panic("uninitialized ndopts in nd6_options");
	if (ndopts->nd_opts_search == NULL)
	return 0;

	while (1) {
	nd_opt = nd6_option(ndopts);
	if (nd_opt == NULL && ndopts->nd_opts_last == NULL) {
	/*
	* Message validation requires that all included
	* options have a length that is greater than zero.
	*/
	V_icmp6stat.icp6s_nd_badopt++;
	bzero(ndopts, sizeof(*ndopts));
	return -1;
	}

	if (nd_opt == NULL)
	goto skip1;

	switch (nd_opt->nd_opt_type) {
	case ND_OPT_SOURCE_LINKADDR:
	case ND_OPT_TARGET_LINKADDR:
	case ND_OPT_MTU:
	case ND_OPT_REDIRECTED_HEADER:
	if (ndopts->nd_opt_array[nd_opt->nd_opt_type]) {
	nd6log((LOG_INFO,
	"duplicated ND6 option found (type=%d)\n",
	nd_opt->nd_opt_type));
	/* XXX bark? */
	} else {
	ndopts->nd_opt_array[nd_opt->nd_opt_type]
	= nd_opt;
	}
	break;
	case ND_OPT_PREFIX_INFORMATION:
	if (ndopts->nd_opt_array[nd_opt->nd_opt_type] == 0) {
	ndopts->nd_opt_array[nd_opt->nd_opt_type]
	= nd_opt;
	}
	ndopts->nd_opts_pi_end =
	(struct nd_opt_prefix_info *)nd_opt;
	break;
	default:
	/*
	* Unknown options must be silently ignored,
	* to accomodate future extension to the protocol.
	*/
	nd6log((LOG_DEBUG,
	"nd6_options: unsupported option %d - "
	"option ignored\n", nd_opt->nd_opt_type));
	}

	skip1:
	i++;
	if (i > V_nd6_maxndopt) {
	V_icmp6stat.icp6s_nd_toomanyopt++;
	nd6log((LOG_INFO, "too many loop in nd opt\n"));
	break;
	}

	if (ndopts->nd_opts_done)
	break;
	}

	return 0;
	}

	/*
	* ND6 timer routine to handle ND6 entries
	*/
	void
	nd6_llinfo_settimer(struct llentry *ln, long tick)
	{
	+ LLE_WLOCK(ln);
	if (tick < 0) {
	ln->la_expire = 0;
	ln->ln_ntick = 0;
	callout_stop(&ln->ln_timer_ch);
	} else {
	ln->la_expire = time_second + tick / hz;
	+ LLE_ADDREF(ln);
	if (tick > INT_MAX) {
	ln->ln_ntick = tick - INT_MAX;
	callout_reset(&ln->ln_timer_ch, INT_MAX,
	nd6_llinfo_timer, ln);
	} else {
	ln->ln_ntick = 0;
	callout_reset(&ln->ln_timer_ch, tick,
	nd6_llinfo_timer, ln);
	}
	}
	+ LLE_WUNLOCK(ln);
	}

	static void
	nd6_llinfo_timer(void *arg)
	{
	struct llentry *ln;
	struct in6_addr *dst;
	struct ifnet *ifp;
	struct nd_ifinfo *ndi = NULL;

	ln = (struct llentry *)arg;
	if (ln == NULL) {
	panic("%s: NULL entry!\n", __func__);
	return;
	}

	if ((ifp = ((ln->lle_tbl != NULL) ? ln->lle_tbl->llt_ifp : NULL)) == NULL)
	panic("ln ifp == NULL");

	CURVNET_SET(ifp->if_vnet);
	INIT_VNET_INET6(curvnet);

	+ /*
	+ * llentry is refcounted - we shouldn't need to protect it
	+ * with IF_AFDATA
	+ */
	IF_AFDATA_LOCK(ifp);

	if (ln->ln_ntick > 0) {
	if (ln->ln_ntick > INT_MAX) {
	ln->ln_ntick -= INT_MAX;
	nd6_llinfo_settimer(ln, INT_MAX);
	} else {
	ln->ln_ntick = 0;
	nd6_llinfo_settimer(ln, ln->ln_ntick);
	}
	IF_AFDATA_UNLOCK(ifp);
	- return;
	+ goto done;
	}

	ndi = ND_IFINFO(ifp);
	dst = &L3_ADDR_SIN6(ln)->sin6_addr;

	if ((ln->la_flags & LLE_STATIC) \|\| (ln->la_expire > time_second)) {
	IF_AFDATA_UNLOCK(ifp);
	- return;
	+ goto done;
	}

	if (ln->la_flags & LLE_DELETED) {
	(void)nd6_free(ln, 0);
	IF_AFDATA_UNLOCK(ifp);
	- return;
	+ goto done;
	}

	switch (ln->ln_state) {
	case ND6_LLINFO_INCOMPLETE:
	if (ln->la_asked < V_nd6_mmaxtries) {
	ln->la_asked++;
	nd6_llinfo_settimer(ln, (long)ndi->retrans * hz / 1000);
	nd6_ns_output(ifp, NULL, dst, ln, 0);
	} else {
	struct mbuf *m = ln->la_hold;
	if (m) {
	struct mbuf *m0;

	/*
	* assuming every packet in la_hold has the
	* same IP header
	*/
	m0 = m->m_nextpkt;
	m->m_nextpkt = NULL;
	icmp6_error2(m, ICMP6_DST_UNREACH,
	ICMP6_DST_UNREACH_ADDR, 0, ifp);

	ln->la_hold = m0;
	clear_llinfo_pqueue(ln);
	}
	(void)nd6_free(ln, 0);
	ln = NULL;
	}
	break;
	case ND6_LLINFO_REACHABLE:
	if (!ND6_LLINFO_PERMANENT(ln)) {
	ln->ln_state = ND6_LLINFO_STALE;
	nd6_llinfo_settimer(ln, (long)V_nd6_gctimer * hz);
	}
	break;

	case ND6_LLINFO_STALE:
	/* Garbage Collection(RFC 2461 5.3) */
	if (!ND6_LLINFO_PERMANENT(ln)) {
	(void)nd6_free(ln, 1);
	ln = NULL;
	}
	break;

	case ND6_LLINFO_DELAY:
	if (ndi && (ndi->flags & ND6_IFF_PERFORMNUD) != 0) {
	/* We need NUD */
	ln->la_asked = 1;
	ln->ln_state = ND6_LLINFO_PROBE;
	nd6_llinfo_settimer(ln, (long)ndi->retrans * hz / 1000);
	nd6_ns_output(ifp, dst, dst, ln, 0);
	} else {
	ln->ln_state = ND6_LLINFO_STALE; /* XXX */
	nd6_llinfo_settimer(ln, (long)V_nd6_gctimer * hz);
	}
	break;
	case ND6_LLINFO_PROBE:
	if (ln->la_asked < V_nd6_umaxtries) {
	ln->la_asked++;
	nd6_llinfo_settimer(ln, (long)ndi->retrans * hz / 1000);
	nd6_ns_output(ifp, dst, dst, ln, 0);
	} else {
	(void)nd6_free(ln, 0);
	ln = NULL;
	}
	break;
	}
	IF_AFDATA_UNLOCK(ifp);
	CURVNET_RESTORE();
	+done:
	+ LLE_FREE_LOCKED(ln);
	}


	/*
	* ND6 timer routine to expire default route list and prefix list
	*/
	void
	nd6_timer(void *arg)
	{
	CURVNET_SET_QUIET((struct vnet *) arg);
	INIT_VNET_INET6((struct vnet *) arg);
	int s;
	struct nd_defrouter *dr;
	struct nd_prefix *pr;
	struct in6_ifaddr ia6, nia6;
	struct in6_addrlifetime *lt6;

	callout_reset(&V_nd6_timer_ch, V_nd6_prune * hz,
	nd6_timer, NULL);

	/* expire default router list */
	s = splnet();
	dr = TAILQ_FIRST(&V_nd_defrouter);
	while (dr) {
	if (dr->expire && dr->expire < time_second) {
	struct nd_defrouter *t;
	t = TAILQ_NEXT(dr, dr_entry);
	defrtrlist_del(dr);
	dr = t;
	} else {
	dr = TAILQ_NEXT(dr, dr_entry);
	}
	}

	/*
	* expire interface addresses.
	* in the past the loop was inside prefix expiry processing.
	* However, from a stricter speci-confrmance standpoint, we should
	* rather separate address lifetimes and prefix lifetimes.
	*/
	addrloop:
	for (ia6 = V_in6_ifaddr; ia6; ia6 = nia6) {
	nia6 = ia6->ia_next;
	/* check address lifetime */
	lt6 = &ia6->ia6_lifetime;
	if (IFA6_IS_INVALID(ia6)) {
	int regen = 0;

	/*
	* If the expiring address is temporary, try
	* regenerating a new one. This would be useful when
	* we suspended a laptop PC, then turned it on after a
	* period that could invalidate all temporary
	* addresses. Although we may have to restart the
	* loop (see below), it must be after purging the
	* address. Otherwise, we'd see an infinite loop of
	* regeneration.
	*/
	if (V_ip6_use_tempaddr &&
	(ia6->ia6_flags & IN6_IFF_TEMPORARY) != 0) {
	if (regen_tmpaddr(ia6) == 0)
	regen = 1;
	}

	in6_purgeaddr(&ia6->ia_ifa);

	if (regen)
	goto addrloop; /* XXX: see below */
	} else if (IFA6_IS_DEPRECATED(ia6)) {
	int oldflags = ia6->ia6_flags;

	ia6->ia6_flags \|= IN6_IFF_DEPRECATED;

	/*
	* If a temporary address has just become deprecated,
	* regenerate a new one if possible.
	*/
	if (V_ip6_use_tempaddr &&
	(ia6->ia6_flags & IN6_IFF_TEMPORARY) != 0 &&
	(oldflags & IN6_IFF_DEPRECATED) == 0) {

	if (regen_tmpaddr(ia6) == 0) {
	/*
	* A new temporary address is
	* generated.
	* XXX: this means the address chain
	* has changed while we are still in
	* the loop. Although the change
	* would not cause disaster (because
	* it's not a deletion, but an
	* addition,) we'd rather restart the
	* loop just for safety. Or does this
	* significantly reduce performance??
	*/
	goto addrloop;
	}
	}
	} else {
	/*
	* A new RA might have made a deprecated address
	* preferred.
	*/
	ia6->ia6_flags &= ~IN6_IFF_DEPRECATED;
	}
	}

	/* expire prefix list */
	pr = V_nd_prefix.lh_first;
	while (pr) {
	/*
	* check prefix lifetime.
	* since pltime is just for autoconf, pltime processing for
	* prefix is not necessary.
	*/
	if (pr->ndpr_vltime != ND6_INFINITE_LIFETIME &&
	time_second - pr->ndpr_lastupdate > pr->ndpr_vltime) {
	struct nd_prefix *t;
	t = pr->ndpr_next;

	/*
	* address expiration and prefix expiration are
	* separate. NEVER perform in6_purgeaddr here.
	*/

	prelist_remove(pr);
	pr = t;
	} else
	pr = pr->ndpr_next;
	}
	splx(s);
	CURVNET_RESTORE();
	}

	/*
	* ia6 - deprecated/invalidated temporary address
	*/
	static int
	regen_tmpaddr(struct in6_ifaddr *ia6)
	{
	struct ifaddr *ifa;
	struct ifnet *ifp;
	struct in6_ifaddr *public_ifa6 = NULL;

	ifp = ia6->ia_ifa.ifa_ifp;
	for (ifa = ifp->if_addrlist.tqh_first; ifa;
	ifa = ifa->ifa_list.tqe_next) {
	struct in6_ifaddr *it6;

	if (ifa->ifa_addr->sa_family != AF_INET6)
	continue;

	it6 = (struct in6_ifaddr *)ifa;

	/* ignore no autoconf addresses. */
	if ((it6->ia6_flags & IN6_IFF_AUTOCONF) == 0)
	continue;

	/* ignore autoconf addresses with different prefixes. */
	if (it6->ia6_ndpr == NULL \|\| it6->ia6_ndpr != ia6->ia6_ndpr)
	continue;

	/*
	* Now we are looking at an autoconf address with the same
	* prefix as ours. If the address is temporary and is still
	* preferred, do not create another one. It would be rare, but
	* could happen, for example, when we resume a laptop PC after
	* a long period.
	*/
	if ((it6->ia6_flags & IN6_IFF_TEMPORARY) != 0 &&
	!IFA6_IS_DEPRECATED(it6)) {
	public_ifa6 = NULL;
	break;
	}

	/*
	* This is a public autoconf address that has the same prefix
	* as ours. If it is preferred, keep it. We can't break the
	* loop here, because there may be a still-preferred temporary
	* address with the prefix.
	*/
	if (!IFA6_IS_DEPRECATED(it6))
	public_ifa6 = it6;
	}

	if (public_ifa6 != NULL) {
	int e;

	if ((e = in6_tmpifadd(public_ifa6, 0, 0)) != 0) {
	log(LOG_NOTICE, "regen_tmpaddr: failed to create a new"
	" tmp addr,errno=%d\n", e);
	return (-1);
	}
	return (0);
	}

	return (-1);
	}

	/*
	* Nuke neighbor cache/prefix/default router management table, right before
	* ifp goes away.
	*/
	void
	nd6_purge(struct ifnet *ifp)
	{
	INIT_VNET_INET6(ifp->if_vnet);
	struct nd_defrouter dr, ndr;
	struct nd_prefix pr, npr;

	/*
	* Nuke default router list entries toward ifp.
	* We defer removal of default router list entries that is installed
	* in the routing table, in order to keep additional side effects as
	* small as possible.
	*/
	for (dr = TAILQ_FIRST(&V_nd_defrouter); dr; dr = ndr) {
	ndr = TAILQ_NEXT(dr, dr_entry);
	if (dr->installed)
	continue;

	if (dr->ifp == ifp)
	defrtrlist_del(dr);
	}

	for (dr = TAILQ_FIRST(&V_nd_defrouter); dr; dr = ndr) {
	ndr = TAILQ_NEXT(dr, dr_entry);
	if (!dr->installed)
	continue;

	if (dr->ifp == ifp)
	defrtrlist_del(dr);
	}

	/* Nuke prefix list entries toward ifp */
	for (pr = V_nd_prefix.lh_first; pr; pr = npr) {
	npr = pr->ndpr_next;
	if (pr->ndpr_ifp == ifp) {
	/*
	* Because if_detach() does not release prefixes
	* while purging addresses the reference count will
	* still be above zero. We therefore reset it to
	* make sure that the prefix really gets purged.
	*/
	pr->ndpr_refcnt = 0;

	/*
	* Previously, pr->ndpr_addr is removed as well,
	* but I strongly believe we don't have to do it.
	* nd6_purge() is only called from in6_ifdetach(),
	* which removes all the associated interface addresses
	* by itself.
	* (jinmei@kame.net 20010129)
	*/
	prelist_remove(pr);
	}
	}

	/* cancel default outgoing interface setting */
	if (V_nd6_defifindex == ifp->if_index)
	nd6_setdefaultiface(0);

	if (!V_ip6_forwarding && V_ip6_accept_rtadv) { /* XXX: too restrictive? */
	- /* refresh default router list */
	+ /* refresh default router list
	+ *
	+ *
	+ */
	defrouter_select();
	+
	}

	/* XXXXX
	* We do not nuke the neighbor cache entries here any more
	* because the neighbor cache is kept in if_afdata[AF_INET6].
	* nd6_purge() is invoked by in6_ifdetach() which is called
	* from if_detach() where everything gets purged. So let
	* in6_domifdetach() do the actual L2 table purging work.
	*/
	-#if 0
	- /*
	- * Nuke neighbor cache entries for the ifp.
	- * Note that rt->rt_ifp may not be the same as ifp,
	- * due to KAME goto ours hack. See RTM_RESOLVE case in
	- * nd6_rtrequest(), and ip6_input().
	- */
	- IF_AFDATA_LOCK(ifp);
	- lltable_free(LLTABLE6(ifp));
	- IF_AFDATA_UNLOCK(ifp);
	-#endif
	}

	-
	-
	/* Qing
	* the caller acquires and releases the lock on the lltbls
	+ * Returns the llentry locked
	*/
	struct llentry *
	-nd6_lookup(struct in6_addr addr6, int create, struct ifnet ifp)
	+nd6_lookup(struct in6_addr addr6, int flags, struct ifnet ifp)
	{
	INIT_VNET_INET6(curvnet);
	struct sockaddr_in6 sin6;
	struct llentry *ln;
	- int flags = 0;
	-
	+ int llflags = 0;
	+
	bzero(&sin6, sizeof(sin6));
	sin6.sin6_len = sizeof(struct sockaddr_in6);
	sin6.sin6_family = AF_INET6;
	sin6.sin6_addr = *addr6;

	- if (create)
	- flags \|= LLE_CREATE;
	- ln = lla_lookup(LLTABLE6(ifp), flags, (struct sockaddr *)&sin6);
	+ IF_AFDATA_LOCK_ASSERT(ifp);
	+
	+ if (flags & ND6_CREATE)
	+ llflags \|= LLE_CREATE;
	+ if (flags & ND6_EXCLUSIVE)
	+ llflags \|= LLE_EXCLUSIVE;
	+
	+ ln = lla_lookup(LLTABLE6(ifp), llflags, (struct sockaddr *)&sin6);
	if ((ln != NULL) && (flags & LLE_CREATE)) {
	ln->ln_state = ND6_LLINFO_NOSTATE;
	callout_init(&ln->ln_timer_ch, 0);
	}
	+
	return (ln);
	}

	/*
	* Test whether a given IPv6 address is a neighbor or not, ignoring
	* the actual neighbor cache. The neighbor cache is ignored in order
	* to not reenter the routing code from within itself.
	*/
	static int
	nd6_is_new_addr_neighbor(struct sockaddr_in6 addr, struct ifnet ifp)
	{
	INIT_VNET_INET6(ifp->if_vnet);
	struct nd_prefix *pr;
	struct ifaddr *dstaddr;

	/*
	* A link-local address is always a neighbor.
	* XXX: a link does not necessarily specify a single interface.
	*/
	if (IN6_IS_ADDR_LINKLOCAL(&addr->sin6_addr)) {
	struct sockaddr_in6 sin6_copy;
	u_int32_t zone;

	/*
	* We need sin6_copy since sa6_recoverscope() may modify the
	* content (XXX).
	*/
	sin6_copy = *addr;
	if (sa6_recoverscope(&sin6_copy))
	return (0); /* XXX: should be impossible */
	if (in6_setscope(&sin6_copy.sin6_addr, ifp, &zone))
	return (0);
	if (sin6_copy.sin6_scope_id == zone)
	return (1);
	else
	return (0);
	}

	/*
	* If the address matches one of our addresses,
	* it should be a neighbor.
	* If the address matches one of our on-link prefixes, it should be a
	* neighbor.
	*/
	for (pr = V_nd_prefix.lh_first; pr; pr = pr->ndpr_next) {
	if (pr->ndpr_ifp != ifp)
	continue;

	if (!(pr->ndpr_stateflags & NDPRF_ONLINK))
	continue;

	if (IN6_ARE_MASKED_ADDR_EQUAL(&pr->ndpr_prefix.sin6_addr,
	&addr->sin6_addr, &pr->ndpr_mask))
	return (1);
	}

	/*
	* If the address is assigned on the node of the other side of
	* a p2p interface, the address should be a neighbor.
	*/
	dstaddr = ifa_ifwithdstaddr((struct sockaddr *)addr);
	if ((dstaddr != NULL) && (dstaddr->ifa_ifp == ifp))
	return (1);

	/*
	* If the default router list is empty, all addresses are regarded
	* as on-link, and thus, as a neighbor.
	* XXX: we restrict the condition to hosts, because routers usually do
	* not have the "default router list".
	*/
	if (!V_ip6_forwarding && TAILQ_FIRST(&V_nd_defrouter) == NULL &&
	V_nd6_defifindex == ifp->if_index) {
	return (1);
	}

	return (0);
	}


	/*
	* Detect if a given IPv6 address identifies a neighbor on a given link.
	* XXX: should take care of the destination of a p2p link?
	*/
	int
	nd6_is_addr_neighbor(struct sockaddr_in6 addr, struct ifnet ifp)
	{
	+ struct llentry *lle;
	+ int rc = 0;

	+ IF_AFDATA_UNLOCK_ASSERT(ifp);
	if (nd6_is_new_addr_neighbor(addr, ifp))
	return (1);

	/*
	* Even if the address matches none of our addresses, it might be
	* in the neighbor cache.
	*/
	IF_AFDATA_LOCK(ifp);
	- if (nd6_lookup(&addr->sin6_addr, 0, ifp) != NULL) {
	- IF_AFDATA_UNLOCK(ifp);
	- return (1);
	+ if ((lle = nd6_lookup(&addr->sin6_addr, 0, ifp)) != NULL) {
	+ LLE_RUNLOCK(lle);
	+ rc = 1;
	}
	IF_AFDATA_UNLOCK(ifp);
	- return (0);
	+ return (rc);
	}

	/*
	* Free an nd6 llinfo entry.
	* Since the function would cause significant changes in the kernel, DO NOT
	* make it global, unless you have a strong reason for the change, and are sure
	* that the change is safe.
	*/
	static struct llentry *
	nd6_free(struct llentry *ln, int gc)
	{
	INIT_VNET_INET6(curvnet);
	struct llentry *next;
	struct nd_defrouter *dr;
	struct ifnet *ifp=NULL;

	/*
	* we used to have pfctlinput(PRC_HOSTDEAD) here.
	* even though it is not harmful, it was not really necessary.
	*/

	/* cancel timer */
	nd6_llinfo_settimer(ln, -1);

	if (!V_ip6_forwarding) {
	int s;
	s = splnet();
	dr = defrouter_lookup(&L3_ADDR_SIN6(ln)->sin6_addr, ln->lle_tbl->llt_ifp);

	if (dr != NULL && dr->expire &&
	ln->ln_state == ND6_LLINFO_STALE && gc) {
	/*
	* If the reason for the deletion is just garbage
	* collection, and the neighbor is an active default
	* router, do not delete it. Instead, reset the GC
	* timer using the router's lifetime.
	* Simply deleting the entry would affect default
	* router selection, which is not necessarily a good
	* thing, especially when we're using router preference
	* values.
	* XXX: the check for ln_state would be redundant,
	* but we intentionally keep it just in case.
	*/
	if (dr->expire > time_second)
	nd6_llinfo_settimer(ln,
	(dr->expire - time_second) * hz);
	else
	nd6_llinfo_settimer(ln, (long)V_nd6_gctimer * hz);
	splx(s);
	return (LIST_NEXT(ln, lle_next));
	}

	if (ln->ln_router \|\| dr) {
	/*
	* rt6_flush must be called whether or not the neighbor
	* is in the Default Router List.
	* See a corresponding comment in nd6_na_input().
	*/
	rt6_flush(&L3_ADDR_SIN6(ln)->sin6_addr, ln->lle_tbl->llt_ifp);
	}

	if (dr) {
	/*
	* Unreachablity of a router might affect the default
	* router selection and on-link detection of advertised
	* prefixes.
	*/

	/*
	* Temporarily fake the state to choose a new default
	* router and to perform on-link determination of
	* prefixes correctly.
	* Below the state will be set correctly,
	* or the entry itself will be deleted.
	*/
	ln->ln_state = ND6_LLINFO_INCOMPLETE;

	/*
	* Since defrouter_select() does not affect the
	* on-link determination and MIP6 needs the check
	* before the default router selection, we perform
	* the check now.
	*/
	pfxlist_onlink_check();

	/*
	* refresh default router list
	*/
	defrouter_select();
	}
	splx(s);
	}

	/*
	* Before deleting the entry, remember the next entry as the
	* return value. We need this because pfxlist_onlink_check() above
	* might have freed other entries (particularly the old next entry) as
	* a side effect (XXX).
	*/
	next = LIST_NEXT(ln, lle_next);

	ifp = ln->lle_tbl->llt_ifp;
	IF_AFDATA_LOCK(ifp);
	llentry_free(ln);
	IF_AFDATA_UNLOCK(ifp);

	return (next);
	}

	/*
	* Upper-layer reachability hint for Neighbor Unreachability Detection.
	*
	* XXX cost-effective methods?
	*/
	void
	nd6_nud_hint(struct rtentry rt, struct in6_addr dst6, int force)
	{
	INIT_VNET_INET6(curvnet);
	struct llentry *ln;
	- struct ifnet *ifp = NULL;
	+ struct ifnet *ifp;

	- if (dst6 == NULL)
	+ if ((dst6 == NULL) \|\| (rt == NULL))
	return;
	- if (rt == NULL)
	- return;

	ifp = rt->rt_ifp;
	IF_AFDATA_LOCK(ifp);
	- if ((ln = nd6_lookup(dst6, 0, NULL)) == NULL) {
	- IF_AFDATA_UNLOCK(ifp);
	+ ln = nd6_lookup(dst6, ND6_EXCLUSIVE, NULL);
	+ IF_AFDATA_UNLOCK(ifp);
	+ if (ln == NULL)
	return;
	- }

	- if (ln->ln_state < ND6_LLINFO_REACHABLE) {
	- IF_AFDATA_UNLOCK(ifp);
	- return;
	- }
	+ if (ln->ln_state < ND6_LLINFO_REACHABLE)
	+ goto done;

	/*
	* if we get upper-layer reachability confirmation many times,
	* it is possible we have false information.
	*/
	if (!force) {
	ln->ln_byhint++;
	if (ln->ln_byhint > V_nd6_maxnudhint) {
	- IF_AFDATA_UNLOCK(ifp);
	- return;
	+ goto done;
	}
	}

	- ln->ln_state = ND6_LLINFO_REACHABLE;
	+ ln->ln_state = ND6_LLINFO_REACHABLE;
	if (!ND6_LLINFO_PERMANENT(ln)) {
	nd6_llinfo_settimer(ln,
	(long)ND_IFINFO(rt->rt_ifp)->reachable * hz);
	}
	- IF_AFDATA_UNLOCK(ifp);
	+done:
	+ LLE_WUNLOCK(ln);
	}


	int
	nd6_ioctl(u_long cmd, caddr_t data, struct ifnet *ifp)
	{
	INIT_VNET_INET6(ifp->if_vnet);
	struct in6_drlist drl = (struct in6_drlist )data;
	struct in6_oprlist oprl = (struct in6_oprlist )data;
	struct in6_ndireq ndi = (struct in6_ndireq )data;
	struct in6_nbrinfo nbi = (struct in6_nbrinfo )data;
	struct in6_ndifreq ndif = (struct in6_ndifreq )data;
	struct nd_defrouter *dr;
	struct nd_prefix *pr;
	int i = 0, error = 0;
	int s;

	switch (cmd) {
	case SIOCGDRLST_IN6:
	/*
	* obsolete API, use sysctl under net.inet6.icmp6
	*/
	bzero(drl, sizeof(*drl));
	s = splnet();
	dr = TAILQ_FIRST(&V_nd_defrouter);
	while (dr && i < DRLSTSIZ) {
	drl->defrouter[i].rtaddr = dr->rtaddr;
	in6_clearscope(&drl->defrouter[i].rtaddr);

	drl->defrouter[i].flags = dr->flags;
	drl->defrouter[i].rtlifetime = dr->rtlifetime;
	drl->defrouter[i].expire = dr->expire;
	drl->defrouter[i].if_index = dr->ifp->if_index;
	i++;
	dr = TAILQ_NEXT(dr, dr_entry);
	}
	splx(s);
	break;
	case SIOCGPRLST_IN6:
	/*
	* obsolete API, use sysctl under net.inet6.icmp6
	*
	* XXX the structure in6_prlist was changed in backward-
	* incompatible manner. in6_oprlist is used for SIOCGPRLST_IN6,
	* in6_prlist is used for nd6_sysctl() - fill_prlist().
	*/
	/*
	* XXX meaning of fields, especialy "raflags", is very
	* differnet between RA prefix list and RR/static prefix list.
	* how about separating ioctls into two?
	*/
	bzero(oprl, sizeof(*oprl));
	s = splnet();
	pr = V_nd_prefix.lh_first;
	while (pr && i < PRLSTSIZ) {
	struct nd_pfxrouter *pfr;
	int j;

	oprl->prefix[i].prefix = pr->ndpr_prefix.sin6_addr;
	oprl->prefix[i].raflags = pr->ndpr_raf;
	oprl->prefix[i].prefixlen = pr->ndpr_plen;
	oprl->prefix[i].vltime = pr->ndpr_vltime;
	oprl->prefix[i].pltime = pr->ndpr_pltime;
	oprl->prefix[i].if_index = pr->ndpr_ifp->if_index;
	if (pr->ndpr_vltime == ND6_INFINITE_LIFETIME)
	oprl->prefix[i].expire = 0;
	else {
	time_t maxexpire;

	/* XXX: we assume time_t is signed. */
	maxexpire = (-1) &
	~((time_t)1 <<
	((sizeof(maxexpire) * 8) - 1));
	if (pr->ndpr_vltime <
	maxexpire - pr->ndpr_lastupdate) {
	oprl->prefix[i].expire =
	pr->ndpr_lastupdate +
	pr->ndpr_vltime;
	} else
	oprl->prefix[i].expire = maxexpire;
	}

	pfr = pr->ndpr_advrtrs.lh_first;
	j = 0;
	while (pfr) {
	if (j < DRLSTSIZ) {
	#define RTRADDR oprl->prefix[i].advrtr[j]
	RTRADDR = pfr->router->rtaddr;
	in6_clearscope(&RTRADDR);
	#undef RTRADDR
	}
	j++;
	pfr = pfr->pfr_next;
	}
	oprl->prefix[i].advrtrs = j;
	oprl->prefix[i].origin = PR_ORIG_RA;

	i++;
	pr = pr->ndpr_next;
	}
	splx(s);

	break;
	case OSIOCGIFINFO_IN6:
	#define ND ndi->ndi
	/* XXX: old ndp(8) assumes a positive value for linkmtu. */
	bzero(&ND, sizeof(ND));
	ND.linkmtu = IN6_LINKMTU(ifp);
	ND.maxmtu = ND_IFINFO(ifp)->maxmtu;
	ND.basereachable = ND_IFINFO(ifp)->basereachable;
	ND.reachable = ND_IFINFO(ifp)->reachable;
	ND.retrans = ND_IFINFO(ifp)->retrans;
	ND.flags = ND_IFINFO(ifp)->flags;
	ND.recalctm = ND_IFINFO(ifp)->recalctm;
	ND.chlim = ND_IFINFO(ifp)->chlim;
	break;
	case SIOCGIFINFO_IN6:
	ND = *ND_IFINFO(ifp);
	break;
	case SIOCSIFINFO_IN6:
	/*
	* used to change host variables from userland.
	* intented for a use on router to reflect RA configurations.
	*/
	/* 0 means 'unspecified' */
	if (ND.linkmtu != 0) {
	if (ND.linkmtu < IPV6_MMTU \|\|
	ND.linkmtu > IN6_LINKMTU(ifp)) {
	error = EINVAL;
	break;
	}
	ND_IFINFO(ifp)->linkmtu = ND.linkmtu;
	}

	if (ND.basereachable != 0) {
	int obasereachable = ND_IFINFO(ifp)->basereachable;

	ND_IFINFO(ifp)->basereachable = ND.basereachable;
	if (ND.basereachable != obasereachable)
	ND_IFINFO(ifp)->reachable =
	ND_COMPUTE_RTIME(ND.basereachable);
	}
	if (ND.retrans != 0)
	ND_IFINFO(ifp)->retrans = ND.retrans;
	if (ND.chlim != 0)
	ND_IFINFO(ifp)->chlim = ND.chlim;
	/* FALLTHROUGH */
	case SIOCSIFINFO_FLAGS:
	ND_IFINFO(ifp)->flags = ND.flags;
	break;
	#undef ND
	case SIOCSNDFLUSH_IN6: /* XXX: the ioctl name is confusing... */
	/* sync kernel routing table with the default router list */
	defrouter_reset();
	defrouter_select();
	break;
	case SIOCSPFXFLUSH_IN6:
	{
	/* flush all the prefix advertised by routers */
	struct nd_prefix pr, next;

	s = splnet();
	for (pr = V_nd_prefix.lh_first; pr; pr = next) {
	struct in6_ifaddr ia, ia_next;

	next = pr->ndpr_next;

	if (IN6_IS_ADDR_LINKLOCAL(&pr->ndpr_prefix.sin6_addr))
	continue; /* XXX */

	/* do we really have to remove addresses as well? */
	for (ia = V_in6_ifaddr; ia; ia = ia_next) {
	/* ia might be removed. keep the next ptr. */
	ia_next = ia->ia_next;

	if ((ia->ia6_flags & IN6_IFF_AUTOCONF) == 0)
	continue;

	if (ia->ia6_ndpr == pr)
	in6_purgeaddr(&ia->ia_ifa);
	}
	prelist_remove(pr);
	}
	splx(s);
	break;
	}
	case SIOCSRTRFLUSH_IN6:
	{
	/* flush all the default routers */
	struct nd_defrouter dr, next;

	s = splnet();
	defrouter_reset();
	for (dr = TAILQ_FIRST(&V_nd_defrouter); dr; dr = next) {
	next = TAILQ_NEXT(dr, dr_entry);
	defrtrlist_del(dr);
	}
	defrouter_select();
	splx(s);
	break;
	}
	case SIOCGNBRINFO_IN6:
	{
	struct llentry *ln;
	struct in6_addr nb_addr = nbi->addr; /* make local for safety */

	if ((error = in6_setscope(&nb_addr, ifp, NULL)) != 0)
	return (error);

	IF_AFDATA_LOCK(ifp);
	- if ((ln = nd6_lookup(&nb_addr, 0, ifp)) == NULL) {
	+ ln = nd6_lookup(&nb_addr, 0, ifp);
	+ IF_AFDATA_UNLOCK(ifp);
	+
	+ if (ln == NULL) {
	error = EINVAL;
	- IF_AFDATA_UNLOCK(ifp);
	break;
	}
	nbi->state = ln->ln_state;
	nbi->asked = ln->la_asked;
	nbi->isrouter = ln->ln_router;
	nbi->expire = ln->la_expire;
	- IF_AFDATA_UNLOCK(ifp);
	-
	+ LLE_RUNLOCK(ln);
	break;
	}
	case SIOCGDEFIFACE_IN6: /* XXX: should be implemented as a sysctl? */
	ndif->ifindex = V_nd6_defifindex;
	break;
	case SIOCSDEFIFACE_IN6: /* XXX: should be implemented as a sysctl? */
	return (nd6_setdefaultiface(ndif->ifindex));
	}
	return (error);
	}

	/*
	* Create neighbor cache entry and cache link-layer address,
	* on reception of inbound ND6 packets. (RS/RA/NS/redirect)
	*
	* type - ICMP6 type
	* code - type dependent information
	*
	* XXXXX
	* The caller of this function already acquired the ndp
	* cache table lock because the cache entry is returned.
	*/
	struct llentry *
	nd6_cache_lladdr(struct ifnet ifp, struct in6_addr from, char *lladdr,
	int lladdrlen, int type, int code)
	{
	INIT_VNET_INET6(curvnet);
	struct llentry *ln = NULL;
	int is_newentry;
	int do_update;
	int olladdr;
	int llchange;
	+ int flags = 0;
	int newstate = 0;

	+ IF_AFDATA_UNLOCK_ASSERT(ifp);
	+
	if (ifp == NULL)
	panic("ifp == NULL in nd6_cache_lladdr");
	if (from == NULL)
	panic("from == NULL in nd6_cache_lladdr");

	/* nothing must be updated for unspecified address */
	if (IN6_IS_ADDR_UNSPECIFIED(from))
	return NULL;

	/*
	* Validation about ifp->if_addrlen and lladdrlen must be done in
	* the caller.
	*
	* XXX If the link does not have link-layer adderss, what should
	* we do? (ifp->if_addrlen == 0)
	* Spec says nothing in sections for RA, RS and NA. There's small
	* description on it in NS section (RFC 2461 7.2.3).
	*/
	- ln = nd6_lookup(from, 0, ifp);
	+ flags \|= lladdr ? ND6_EXCLUSIVE : 0;
	+ IF_AFDATA_LOCK(ifp);
	+ ln = nd6_lookup(from, flags, ifp);
	+ if (ln)
	+ IF_AFDATA_UNLOCK(ifp);
	if (ln == NULL) {
	- ln = nd6_lookup(from, 1, ifp);
	+ ln = nd6_lookup(from, flags \|ND6_CREATE, ifp);
	+ IF_AFDATA_UNLOCK(ifp);
	is_newentry = 1;
	} else {
	/* do nothing if static ndp is set */
	if (ln->la_flags & LLE_STATIC)
	- return NULL;
	+ goto done;
	is_newentry = 0;
	}

	- if (ln == NULL) {
	- return NULL;
	- }
	+ if (ln == NULL)
	+ return (NULL);

	olladdr = (ln->la_flags & LLE_VALID) ? 1 : 0;
	if (olladdr && lladdr) {
	- if (bcmp(lladdr, &ln->ll_addr, ifp->if_addrlen))
	- llchange = 1;
	- else
	- llchange = 0;
	+ llchange = bcmp(lladdr, &ln->ll_addr,
	+ ifp->if_addrlen);
	} else
	llchange = 0;

	/*
	* newentry olladdr lladdr llchange (*=record)
	* 0 n n -- (1)
	* 0 y n -- (2)
	* 0 n y -- (3) * STALE
	* 0 y y n (4) *
	* 0 y y y (5) * STALE
	* 1 -- n -- (6) NOSTATE(= PASSIVE)
	* 1 -- y -- (7) * STALE
	*/

	if (lladdr) { /* (3-5) and (7) */
	/*
	* Record source link-layer address
	* XXX is it dependent to ifp->if_type?
	*/
	bcopy(lladdr, &ln->ll_addr, ifp->if_addrlen);
	ln->la_flags \|= LLE_VALID;
	}

	if (!is_newentry) {
	if ((!olladdr && lladdr != NULL) \|\| /* (3) */
	(olladdr && lladdr != NULL && llchange)) { /* (5) */
	do_update = 1;
	newstate = ND6_LLINFO_STALE;
	} else /* (1-2,4) */
	do_update = 0;
	} else {
	do_update = 1;
	if (lladdr == NULL) /* (6) */
	newstate = ND6_LLINFO_NOSTATE;
	else /* (7) */
	newstate = ND6_LLINFO_STALE;
	}

	if (do_update) {
	/*
	* Update the state of the neighbor cache.
	*/
	ln->ln_state = newstate;

	if (ln->ln_state == ND6_LLINFO_STALE) {
	/*
	* XXX: since nd6_output() below will cause
	* state tansition to DELAY and reset the timer,
	* we must set the timer now, although it is actually
	* meaningless.
	*/
	nd6_llinfo_settimer(ln, (long)V_nd6_gctimer * hz);

	if (ln->la_hold) {
	struct mbuf m_hold, m_hold_next;

	/*
	* reset the la_hold in advance, to explicitly
	* prevent a la_hold lookup in nd6_output()
	* (wouldn't happen, though...)
	*/
	for (m_hold = ln->la_hold, ln->la_hold = NULL;
	m_hold; m_hold = m_hold_next) {
	m_hold_next = m_hold->m_nextpkt;
	m_hold->m_nextpkt = NULL;

	/*
	* we assume ifp is not a p2p here, so
	* just set the 2nd argument as the
	* 1st one.
	*/
	nd6_output(ifp, ifp, m_hold, L3_ADDR_SIN6(ln), NULL);
	}
	}
	} else if (ln->ln_state == ND6_LLINFO_INCOMPLETE) {
	/* probe right away */
	nd6_llinfo_settimer((void *)ln, 0);
	}
	}

	/*
	* ICMP6 type dependent behavior.
	*
	* NS: clear IsRouter if new entry
	* RS: clear IsRouter
	* RA: set IsRouter if there's lladdr
	* redir: clear IsRouter if new entry
	*
	* RA case, (1):
	* The spec says that we must set IsRouter in the following cases:
	* - If lladdr exist, set IsRouter. This means (1-5).
	* - If it is old entry (!newentry), set IsRouter. This means (7).
	* So, based on the spec, in (1-5) and (7) cases we must set IsRouter.
	* A quetion arises for (1) case. (1) case has no lladdr in the
	* neighbor cache, this is similar to (6).
	* This case is rare but we figured that we MUST NOT set IsRouter.
	*
	* newentry olladdr lladdr llchange NS RS RA redir
	* D R
	* 0 n n -- (1) c ? s
	* 0 y n -- (2) c s s
	* 0 n y -- (3) c s s
	* 0 y y n (4) c s s
	* 0 y y y (5) c s s
	* 1 -- n -- (6) c c c s
	* 1 -- y -- (7) c c s c s
	*
	* (c=clear s=set)
	*/
	switch (type & 0xff) {
	case ND_NEIGHBOR_SOLICIT:
	/*
	* New entry must have is_router flag cleared.
	*/
	if (is_newentry) /* (6-7) */
	ln->ln_router = 0;
	break;
	case ND_REDIRECT:
	/*
	* If the icmp is a redirect to a better router, always set the
	* is_router flag. Otherwise, if the entry is newly created,
	* clear the flag. [RFC 2461, sec 8.3]
	*/
	if (code == ND_REDIRECT_ROUTER)
	ln->ln_router = 1;
	else if (is_newentry) /* (6-7) */
	ln->ln_router = 0;
	break;
	case ND_ROUTER_SOLICIT:
	/*
	* is_router flag must always be cleared.
	*/
	ln->ln_router = 0;
	break;
	case ND_ROUTER_ADVERT:
	/*
	* Mark an entry with lladdr as a router.
	*/
	if ((!is_newentry && (olladdr \|\| lladdr)) \|\| /* (2-5) */
	(is_newentry && lladdr)) { /* (7) */
	ln->ln_router = 1;
	}
	break;
	}

	/*
	* When the link-layer address of a router changes, select the
	* best router again. In particular, when the neighbor entry is newly
	* created, it might affect the selection policy.
	* Question: can we restrict the first condition to the "is_newentry"
	* case?
	* XXX: when we hear an RA from a new router with the link-layer
	* address option, defrouter_select() is called twice, since
	* defrtrlist_update called the function as well. However, I believe
	* we can compromise the overhead, since it only happens the first
	* time.
	* XXX: although defrouter_select() should not have a bad effect
	* for those are not autoconfigured hosts, we explicitly avoid such
	* cases for safety.
	*/
	- if (do_update && ln->ln_router && !V_ip6_forwarding && V_ip6_accept_rtadv)
	+ if (do_update && ln->ln_router && !V_ip6_forwarding && V_ip6_accept_rtadv) {
	+#ifdef notyet
	+ /*
	+ * XXX implement the boiler plate
	+ */
	+ taskqueue_enqueue(ipv6_taskq, defrouter_select_task);
	+#endif
	+ /*
	+ * guaranteed recursion
	+ */
	defrouter_select();
	-
	- return ln;
	+ }
	+
	+done:
	+ if (ln) {
	+ if (flags & ND6_EXCLUSIVE)
	+ LLE_WUNLOCK(ln);
	+ else
	+ LLE_RUNLOCK(ln);
	+ if (ln->la_flags & LLE_STATIC)
	+ ln = NULL;
	+ }
	+ return (ln);
	}

	static void
	nd6_slowtimo(void *arg)
	{
	CURVNET_SET((struct vnet *) arg);
	INIT_VNET_NET((struct vnet *) arg);
	INIT_VNET_INET6((struct vnet *) arg);
	struct nd_ifinfo *nd6if;
	struct ifnet *ifp;

	callout_reset(&V_nd6_slowtimo_ch, ND6_SLOWTIMER_INTERVAL * hz,
	nd6_slowtimo, NULL);
	IFNET_RLOCK();
	for (ifp = TAILQ_FIRST(&V_ifnet); ifp;
	ifp = TAILQ_NEXT(ifp, if_list)) {
	nd6if = ND_IFINFO(ifp);
	if (nd6if->basereachable && /* already initialized */
	(nd6if->recalctm -= ND6_SLOWTIMER_INTERVAL) <= 0) {
	/*
	* Since reachable time rarely changes by router
	* advertisements, we SHOULD insure that a new random
	* value gets recomputed at least once every few hours.
	* (RFC 2461, 6.3.4)
	*/
	nd6if->recalctm = V_nd6_recalc_reachtm_interval;
	nd6if->reachable = ND_COMPUTE_RTIME(nd6if->basereachable);
	}
	}
	IFNET_RUNLOCK();
	CURVNET_RESTORE();
	}

	+/*
	+ * Note that I'm not enforcing any global serialization
	+ * lle state or asked changes here as the logic is too
	+ * complicated to avoid having to always acquire an exclusive
	+ * lock
	+ * KMM
	+ *
	+ */
	#define senderr(e) { error = (e); goto bad;}
	int
	nd6_output(struct ifnet ifp, struct ifnet origifp, struct mbuf *m0,
	struct sockaddr_in6 dst, struct rtentry rt0)
	{
	INIT_VNET_INET6(curvnet);
	struct mbuf *m = m0;
	struct rtentry *rt = rt0;
	struct llentry *ln = NULL;
	int error = 0;
	+ int flags = 0;

	if (IN6_IS_ADDR_MULTICAST(&dst->sin6_addr))
	goto sendpkt;

	if (nd6_need_cache(ifp) == 0)
	goto sendpkt;

	/*
	* next hop determination. This routine is derived from ether_output.
	*/

	/*
	* Address resolution or Neighbor Unreachability Detection
	* for the next hop.
	* At this point, the destination of the packet must be a unicast
	* or an anycast address(i.e. not a multicast).
	*/
	- ln = lla_lookup(LLTABLE6(ifp), 0, (struct sockaddr *)dst);
	+ flags = m ? LLE_EXCLUSIVE : 0;
	+ IF_AFDATA_LOCK(rt->rt_ifp);
	+ ln = lla_lookup(LLTABLE6(ifp), flags, (struct sockaddr *)dst);
	+ IF_AFDATA_UNLOCK(rt->rt_ifp);
	if ((ln == NULL) && nd6_is_addr_neighbor(dst, ifp)) {
	/*
	* Since nd6_is_addr_neighbor() internally calls nd6_lookup(),
	* the condition below is not very efficient. But we believe
	* it is tolerable, because this should be a rare case.
	*/
	- ln = nd6_lookup(&dst->sin6_addr, 1, ifp);
	+ flags = ND6_CREATE \| (m ? ND6_EXCLUSIVE : 0);
	+ IF_AFDATA_LOCK(rt->rt_ifp);
	+ ln = nd6_lookup(&dst->sin6_addr, flags, ifp);
	+ IF_AFDATA_UNLOCK(rt->rt_ifp);
	}
	if (ln == NULL) {
	if ((ifp->if_flags & IFF_POINTOPOINT) == 0 &&
	!(ND_IFINFO(ifp)->flags & ND6_IFF_PERFORMNUD)) {
	char ip6buf[INET6_ADDRSTRLEN];
	log(LOG_DEBUG,
	"nd6_output: can't allocate llinfo for %s "
	"(ln=%p, rt=%p)\n",
	ip6_sprintf(ip6buf, &dst->sin6_addr), ln, rt);
	senderr(EIO); /* XXX: good error? */
	}
	-
	goto sendpkt; /* send anyway */
	}

	/* We don't have to do link-layer address resolution on a p2p link. */
	if ((ifp->if_flags & IFF_POINTOPOINT) != 0 &&
	ln->ln_state < ND6_LLINFO_REACHABLE) {
	ln->ln_state = ND6_LLINFO_STALE;
	nd6_llinfo_settimer(ln, (long)V_nd6_gctimer * hz);
	}

	/*
	* The first time we send a packet to a neighbor whose entry is
	* STALE, we have to change the state to DELAY and a sets a timer to
	* expire in DELAY_FIRST_PROBE_TIME seconds to ensure do
	* neighbor unreachability detection on expiration.
	* (RFC 2461 7.3.3)
	*/
	if (ln->ln_state == ND6_LLINFO_STALE) {
	ln->la_asked = 0;
	ln->ln_state = ND6_LLINFO_DELAY;
	nd6_llinfo_settimer(ln, (long)V_nd6_delay * hz);
	}

	/*
	* If the neighbor cache entry has a state other than INCOMPLETE
	* (i.e. its link-layer address is already resolved), just
	* send the packet.
	*/
	if (ln->ln_state > ND6_LLINFO_INCOMPLETE)
	goto sendpkt;

	/*
	* There is a neighbor cache entry, but no ethernet address
	* response yet. Append this latest packet to the end of the
	* packet queue in the mbuf, unless the number of the packet
	* does not exceed nd6_maxqueuelen. When it exceeds nd6_maxqueuelen,
	* the oldest packet in the queue will be removed.
	*/
	if (ln->ln_state == ND6_LLINFO_NOSTATE)
	ln->ln_state = ND6_LLINFO_INCOMPLETE;
	if (ln->la_hold) {
	struct mbuf *m_hold;
	int i;

	i = 0;
	for (m_hold = ln->la_hold; m_hold; m_hold = m_hold->m_nextpkt) {
	i++;
	if (m_hold->m_nextpkt == NULL) {
	m_hold->m_nextpkt = m;
	break;
	}
	}
	while (i >= V_nd6_maxqueuelen) {
	m_hold = ln->la_hold;
	ln->la_hold = ln->la_hold->m_nextpkt;
	m_freem(m_hold);
	i--;
	}
	} else {
	ln->la_hold = m;
	}

	+ if (flags & LLE_EXCLUSIVE)
	+ LLE_WUNLOCK(ln);
	+ else
	+ LLE_RUNLOCK(ln);
	+
	/*
	* If there has been no NS for the neighbor after entering the
	* INCOMPLETE state, send the first solicitation.
	*/
	if (!ND6_LLINFO_PERMANENT(ln) && ln->la_asked == 0) {
	ln->la_asked++;
	+
	nd6_llinfo_settimer(ln,
	(long)ND_IFINFO(ifp)->retrans * hz / 1000);
	nd6_ns_output(ifp, NULL, &dst->sin6_addr, ln, 0);
	}
	return (0);

	sendpkt:
	/* discard the packet if IPv6 operation is disabled on the interface */
	if ((ND_IFINFO(ifp)->flags & ND6_IFF_IFDISABLED)) {
	error = ENETDOWN; /* better error? */
	goto bad;
	}
	+ if (ln) {
	+ if (flags & LLE_EXCLUSIVE)
	+ LLE_WUNLOCK(ln);
	+ else
	+ LLE_RUNLOCK(ln);
	+ }

	#ifdef MAC
	mac_netinet6_nd6_send(ifp, m);
	#endif
	if ((ifp->if_flags & IFF_LOOPBACK) != 0) {
	return ((ifp->if_output)(origifp, m, (struct sockaddr )dst,
	rt));
	}
	error = (ifp->if_output)(ifp, m, (struct sockaddr )dst, rt);
	return (error);

	bad:
	+ if (ln) {
	+ if (flags & LLE_EXCLUSIVE)
	+ LLE_WUNLOCK(ln);
	+ else
	+ LLE_RUNLOCK(ln);
	+ }
	if (m)
	m_freem(m);
	return (error);
	}
	#undef senderr

	int
	nd6_need_cache(struct ifnet *ifp)
	{
	/*
	* XXX: we currently do not make neighbor cache on any interface
	* other than ARCnet, Ethernet, FDDI and GIF.
	*
	* RFC2893 says:
	* - unidirectional tunnels needs no ND
	*/
	switch (ifp->if_type) {
	case IFT_ARCNET:
	case IFT_ETHER:
	case IFT_FDDI:
	case IFT_IEEE1394:
	#ifdef IFT_L2VLAN
	case IFT_L2VLAN:
	#endif
	#ifdef IFT_IEEE80211
	case IFT_IEEE80211:
	#endif
	#ifdef IFT_CARP
	case IFT_CARP:
	#endif
	case IFT_GIF: /* XXX need more cases? */
	case IFT_PPP:
	case IFT_TUNNEL:
	case IFT_BRIDGE:
	case IFT_PROPVIRTUAL:
	return (1);
	default:
	return (0);
	}
	}

	/*
	- * the caller of this function needs to lock the interface table
	+ * the callers of this function need to be re-worked to drop
	+ * the lle lock, drop here for now
	*/
	int
	nd6_storelladdr(struct ifnet ifp, struct rtentry rt0, struct mbuf *m,
	struct sockaddr dst, u_char desten, struct llentry **lle)
	{
	struct llentry *ln;

	*lle = NULL;
	+ IF_AFDATA_UNLOCK_ASSERT(ifp);
	if (m->m_flags & M_MCAST) {
	int i;

	switch (ifp->if_type) {
	case IFT_ETHER:
	case IFT_FDDI:
	#ifdef IFT_L2VLAN
	case IFT_L2VLAN:
	#endif
	#ifdef IFT_IEEE80211
	case IFT_IEEE80211:
	#endif
	case IFT_BRIDGE:
	case IFT_ISO88025:
	ETHER_MAP_IPV6_MULTICAST(&SIN6(dst)->sin6_addr,
	desten);
	return (0);
	case IFT_IEEE1394:
	/*
	* netbsd can use if_broadcastaddr, but we don't do so
	* to reduce # of ifdef.
	*/
	for (i = 0; i < ifp->if_addrlen; i++)
	desten[i] = ~0;
	return (0);
	case IFT_ARCNET:
	*desten = 0;
	return (0);
	default:
	m_freem(m);
	return (EAFNOSUPPORT);
	}
	}


	/*
	* the entry should have been created in nd6_store_lladdr
	*/
	+ IF_AFDATA_LOCK(ifp);
	ln = lla_lookup(LLTABLE6(ifp), 0, dst);
	+ IF_AFDATA_UNLOCK(ifp);
	if ((ln == NULL) \|\| !(ln->la_flags & LLE_VALID)) {
	+ if (ln)
	+ LLE_RUNLOCK(ln);
	/* this could happen, if we could not allocate memory */
	m_freem(m);
	return (1);
	}

	bcopy(&ln->ll_addr, desten, ifp->if_addrlen);
	*lle = ln;
	+ LLE_RUNLOCK(ln);
	+ /*
	+ * A small use after free race exists here
	+ */
	return (0);
	}

	static void
	clear_llinfo_pqueue(struct llentry *ln)
	{
	struct mbuf m_hold, m_hold_next;

	for (m_hold = ln->la_hold; m_hold; m_hold = m_hold_next) {
	m_hold_next = m_hold->m_nextpkt;
	m_hold->m_nextpkt = NULL;
	m_freem(m_hold);
	}

	ln->la_hold = NULL;
	return;
	}

	static int nd6_sysctl_drlist(SYSCTL_HANDLER_ARGS);
	static int nd6_sysctl_prlist(SYSCTL_HANDLER_ARGS);
	#ifdef SYSCTL_DECL
	SYSCTL_DECL(_net_inet6_icmp6);
	#endif
	SYSCTL_NODE(_net_inet6_icmp6, ICMPV6CTL_ND6_DRLIST, nd6_drlist,
	CTLFLAG_RD, nd6_sysctl_drlist, "");
	SYSCTL_NODE(_net_inet6_icmp6, ICMPV6CTL_ND6_PRLIST, nd6_prlist,
	CTLFLAG_RD, nd6_sysctl_prlist, "");
	SYSCTL_V_INT(V_NET, vnet_inet6, _net_inet6_icmp6, ICMPV6CTL_ND6_MAXQLEN,
	nd6_maxqueuelen, CTLFLAG_RW, nd6_maxqueuelen, 1, "");

	static int
	nd6_sysctl_drlist(SYSCTL_HANDLER_ARGS)
	{
	INIT_VNET_INET6(curvnet);
	int error;
	char buf[1024] __aligned(4);
	struct in6_defrouter d, de;
	struct nd_defrouter *dr;

	if (req->newptr)
	return EPERM;
	error = 0;

	for (dr = TAILQ_FIRST(&V_nd_defrouter); dr;
	dr = TAILQ_NEXT(dr, dr_entry)) {
	d = (struct in6_defrouter *)buf;
	de = (struct in6_defrouter *)(buf + sizeof(buf));

	if (d + 1 <= de) {
	bzero(d, sizeof(*d));
	d->rtaddr.sin6_family = AF_INET6;
	d->rtaddr.sin6_len = sizeof(d->rtaddr);
	d->rtaddr.sin6_addr = dr->rtaddr;
	error = sa6_recoverscope(&d->rtaddr);
	if (error != 0)
	return (error);
	d->flags = dr->flags;
	d->rtlifetime = dr->rtlifetime;
	d->expire = dr->expire;
	d->if_index = dr->ifp->if_index;
	} else
	panic("buffer too short");

	error = SYSCTL_OUT(req, buf, sizeof(*d));
	if (error)
	break;
	}

	return (error);
	}

	static int
	nd6_sysctl_prlist(SYSCTL_HANDLER_ARGS)
	{
	INIT_VNET_INET6(curvnet);
	int error;
	char buf[1024] __aligned(4);
	struct in6_prefix p, pe;
	struct nd_prefix *pr;
	char ip6buf[INET6_ADDRSTRLEN];

	if (req->newptr)
	return EPERM;
	error = 0;

	for (pr = V_nd_prefix.lh_first; pr; pr = pr->ndpr_next) {
	u_short advrtrs;
	size_t advance;
	struct sockaddr_in6 sin6, s6;
	struct nd_pfxrouter *pfr;

	p = (struct in6_prefix *)buf;
	pe = (struct in6_prefix *)(buf + sizeof(buf));

	if (p + 1 <= pe) {
	bzero(p, sizeof(*p));
	sin6 = (struct sockaddr_in6 *)(p + 1);

	p->prefix = pr->ndpr_prefix;
	if (sa6_recoverscope(&p->prefix)) {
	log(LOG_ERR,
	"scope error in prefix list (%s)\n",
	ip6_sprintf(ip6buf, &p->prefix.sin6_addr));
	/* XXX: press on... */
	}
	p->raflags = pr->ndpr_raf;
	p->prefixlen = pr->ndpr_plen;
	p->vltime = pr->ndpr_vltime;
	p->pltime = pr->ndpr_pltime;
	p->if_index = pr->ndpr_ifp->if_index;
	if (pr->ndpr_vltime == ND6_INFINITE_LIFETIME)
	p->expire = 0;
	else {
	time_t maxexpire;

	/* XXX: we assume time_t is signed. */
	maxexpire = (-1) &
	~((time_t)1 <<
	((sizeof(maxexpire) * 8) - 1));
	if (pr->ndpr_vltime <
	maxexpire - pr->ndpr_lastupdate) {
	p->expire = pr->ndpr_lastupdate +
	pr->ndpr_vltime;
	} else
	p->expire = maxexpire;
	}
	p->refcnt = pr->ndpr_refcnt;
	p->flags = pr->ndpr_stateflags;
	p->origin = PR_ORIG_RA;
	advrtrs = 0;
	for (pfr = pr->ndpr_advrtrs.lh_first; pfr;
	pfr = pfr->pfr_next) {
	if ((void )&sin6[advrtrs + 1] > (void )pe) {
	advrtrs++;
	continue;
	}
	s6 = &sin6[advrtrs];
	bzero(s6, sizeof(*s6));
	s6->sin6_family = AF_INET6;
	s6->sin6_len = sizeof(*sin6);
	s6->sin6_addr = pfr->router->rtaddr;
	if (sa6_recoverscope(s6)) {
	log(LOG_ERR,
	"scope error in "
	"prefix list (%s)\n",
	ip6_sprintf(ip6buf,
	&pfr->router->rtaddr));
	}
	advrtrs++;
	}
	p->advrtrs = advrtrs;
	} else
	panic("buffer too short");

	advance = sizeof(p) + sizeof(sin6) * advrtrs;
	error = SYSCTL_OUT(req, buf, advance);
	if (error)
	break;
	}

	return (error);
	}
	Index: projects/arpv2_merge_1/sys/netinet6/nd6.h
	===================================================================
	--- projects/arpv2_merge_1/sys/netinet6/nd6.h (revision 185838)
	+++ projects/arpv2_merge_1/sys/netinet6/nd6.h (revision 185839)
	@@ -1,426 +1,429 @@
	/*-
	* Copyright (C) 1995, 1996, 1997, and 1998 WIDE Project.
	* All rights reserved.
	*
	* Redistribution and use in source and binary forms, with or without
	* modification, are permitted provided that the following conditions
	* are met:
	* 1. Redistributions of source code must retain the above copyright
	* notice, this list of conditions and the following disclaimer.
	* 2. Redistributions in binary form must reproduce the above copyright
	* notice, this list of conditions and the following disclaimer in the
	* documentation and/or other materials provided with the distribution.
	* 3. Neither the name of the project nor the names of its contributors
	* may be used to endorse or promote products derived from this software
	* without specific prior written permission.
	*
	* THIS SOFTWARE IS PROVIDED BY THE PROJECT AND CONTRIBUTORS ``AS IS'' AND
	* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
	* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
	* ARE DISCLAIMED. IN NO EVENT SHALL THE PROJECT OR CONTRIBUTORS BE LIABLE
	* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
	* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
	* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
	* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
	* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
	* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
	* SUCH DAMAGE.
	*
	* $KAME: nd6.h,v 1.76 2001/12/18 02:10:31 itojun Exp $
	* $FreeBSD$
	*/

	#ifndef _NETINET6_ND6_H_
	#define _NETINET6_ND6_H_

	/* see net/route.h, or net/if_inarp.h */
	#ifndef RTF_ANNOUNCE
	#define RTF_ANNOUNCE RTF_PROTO2
	#endif

	#include <sys/queue.h>
	#include <sys/callout.h>

	struct llentry;

	#define ND6_LLINFO_NOSTATE -2
	/*
	* We don't need the WAITDELETE state any more, but we keep the definition
	* in a comment line instead of removing it. This is necessary to avoid
	* unintentionally reusing the value for another purpose, which might
	* affect backward compatibility with old applications.
	* (20000711 jinmei@kame.net)
	*/
	/* #define ND6_LLINFO_WAITDELETE -1 */
	#define ND6_LLINFO_INCOMPLETE 0
	#define ND6_LLINFO_REACHABLE 1
	#define ND6_LLINFO_STALE 2
	#define ND6_LLINFO_DELAY 3
	#define ND6_LLINFO_PROBE 4

	#define ND6_IS_LLINFO_PROBREACH(n) ((n)->ln_state > ND6_LLINFO_INCOMPLETE)
	#define ND6_LLINFO_PERMANENT(n) (((n)->la_expire == 0) && ((n)->ln_state > ND6_LLINFO_INCOMPLETE))

	struct nd_ifinfo {
	u_int32_t linkmtu; /* LinkMTU */
	u_int32_t maxmtu; /* Upper bound of LinkMTU */
	u_int32_t basereachable; /* BaseReachableTime */
	u_int32_t reachable; /* Reachable Time */
	u_int32_t retrans; /* Retrans Timer */
	u_int32_t flags; /* Flags */
	int recalctm; /* BaseReacable re-calculation timer */
	u_int8_t chlim; /* CurHopLimit */
	u_int8_t initialized; /* Flag to see the entry is initialized */
	/* the following 3 members are for privacy extension for addrconf */
	u_int8_t randomseed0[8]; /* upper 64 bits of MD5 digest */
	u_int8_t randomseed1[8]; /* lower 64 bits (usually the EUI64 IFID) */
	u_int8_t randomid[8]; /* current random ID */
	};

	#define ND6_IFF_PERFORMNUD 0x1
	#define ND6_IFF_ACCEPT_RTADV 0x2
	#define ND6_IFF_PREFER_SOURCE 0x4 /* XXX: not related to ND. */
	#define ND6_IFF_IFDISABLED 0x8 /* IPv6 operation is disabled due to
	* DAD failure. (XXX: not ND-specific)
	*/
	#define ND6_IFF_DONT_SET_IFROUTE 0x10

	+#define ND6_CREATE LLE_CREATE
	+#define ND6_EXCLUSIVE LLE_EXCLUSIVE
	+
	#ifdef _KERNEL
	#define ND_IFINFO(ifp) \
	(((struct in6_ifextra *)(ifp)->if_afdata[AF_INET6])->nd_ifinfo)
	#define IN6_LINKMTU(ifp) \
	((ND_IFINFO(ifp)->linkmtu && ND_IFINFO(ifp)->linkmtu < (ifp)->if_mtu) \
	? ND_IFINFO(ifp)->linkmtu \
	: ((ND_IFINFO(ifp)->maxmtu && ND_IFINFO(ifp)->maxmtu < (ifp)->if_mtu) \
	? ND_IFINFO(ifp)->maxmtu : (ifp)->if_mtu))
	#endif

	struct in6_nbrinfo {
	char ifname[IFNAMSIZ]; /* if name, e.g. "en0" */
	struct in6_addr addr; /* IPv6 address of the neighbor */
	long asked; /* number of queries already sent for this addr */
	int isrouter; /* if it acts as a router */
	int state; /* reachability state */
	int expire; /* lifetime for NDP state transition */
	};

	#define DRLSTSIZ 10
	#define PRLSTSIZ 10
	struct in6_drlist {
	char ifname[IFNAMSIZ];
	struct {
	struct in6_addr rtaddr;
	u_char flags;
	u_short rtlifetime;
	u_long expire;
	u_short if_index;
	} defrouter[DRLSTSIZ];
	};

	struct in6_defrouter {
	struct sockaddr_in6 rtaddr;
	u_char flags;
	u_short rtlifetime;
	u_long expire;
	u_short if_index;
	};

	#ifdef _KERNEL
	struct in6_oprlist {
	char ifname[IFNAMSIZ];
	struct {
	struct in6_addr prefix;
	struct prf_ra raflags;
	u_char prefixlen;
	u_char origin;
	u_long vltime;
	u_long pltime;
	u_long expire;
	u_short if_index;
	u_short advrtrs; /* number of advertisement routers */
	struct in6_addr advrtr[DRLSTSIZ]; /* XXX: explicit limit */
	} prefix[PRLSTSIZ];
	};
	#endif

	struct in6_prlist {
	char ifname[IFNAMSIZ];
	struct {
	struct in6_addr prefix;
	struct prf_ra raflags;
	u_char prefixlen;
	u_char origin;
	u_int32_t vltime;
	u_int32_t pltime;
	time_t expire;
	u_short if_index;
	u_short advrtrs; /* number of advertisement routers */
	struct in6_addr advrtr[DRLSTSIZ]; /* XXX: explicit limit */
	} prefix[PRLSTSIZ];
	};

	struct in6_prefix {
	struct sockaddr_in6 prefix;
	struct prf_ra raflags;
	u_char prefixlen;
	u_char origin;
	u_int32_t vltime;
	u_int32_t pltime;
	time_t expire;
	u_int32_t flags;
	int refcnt;
	u_short if_index;
	u_short advrtrs; /* number of advertisement routers */
	/* struct sockaddr_in6 advrtr[] */
	};

	#ifdef _KERNEL
	struct in6_ondireq {
	char ifname[IFNAMSIZ];
	struct {
	u_int32_t linkmtu; /* LinkMTU */
	u_int32_t maxmtu; /* Upper bound of LinkMTU */
	u_int32_t basereachable; /* BaseReachableTime */
	u_int32_t reachable; /* Reachable Time */
	u_int32_t retrans; /* Retrans Timer */
	u_int32_t flags; /* Flags */
	int recalctm; /* BaseReacable re-calculation timer */
	u_int8_t chlim; /* CurHopLimit */
	u_int8_t receivedra;
	} ndi;
	};
	#endif

	struct in6_ndireq {
	char ifname[IFNAMSIZ];
	struct nd_ifinfo ndi;
	};

	struct in6_ndifreq {
	char ifname[IFNAMSIZ];
	u_long ifindex;
	};

	/* Prefix status */
	#define NDPRF_ONLINK 0x1
	#define NDPRF_DETACHED 0x2

	/* protocol constants */
	#define MAX_RTR_SOLICITATION_DELAY 1 /* 1sec */
	#define RTR_SOLICITATION_INTERVAL 4 /* 4sec */
	#define MAX_RTR_SOLICITATIONS 3

	#define ND6_INFINITE_LIFETIME 0xffffffff

	#ifdef _KERNEL
	/* node constants */
	#define MAX_REACHABLE_TIME 3600000 /* msec */
	#define REACHABLE_TIME 30000 /* msec */
	#define RETRANS_TIMER 1000 /* msec */
	#define MIN_RANDOM_FACTOR 512 /* 1024 * 0.5 */
	#define MAX_RANDOM_FACTOR 1536 /* 1024 * 1.5 */
	#define DEF_TEMP_VALID_LIFETIME 604800 /* 1 week */
	#define DEF_TEMP_PREFERRED_LIFETIME 86400 /* 1 day */
	#define TEMPADDR_REGEN_ADVANCE 5 /* sec */
	#define MAX_TEMP_DESYNC_FACTOR 600 /* 10 min */
	#define ND_COMPUTE_RTIME(x) \
	(((MIN_RANDOM_FACTOR * (x >> 10)) + (arc4random() & \
	((MAX_RANDOM_FACTOR - MIN_RANDOM_FACTOR) * (x >> 10)))) /1000)

	TAILQ_HEAD(nd_drhead, nd_defrouter);
	struct nd_defrouter {
	TAILQ_ENTRY(nd_defrouter) dr_entry;
	struct in6_addr rtaddr;
	u_char flags; /* flags on RA message */
	u_short rtlifetime;
	u_long expire;
	struct ifnet *ifp;
	int installed; /* is installed into kernel routing table */
	};

	struct nd_prefixctl {
	struct ifnet *ndpr_ifp;

	/* prefix */
	struct sockaddr_in6 ndpr_prefix;
	u_char ndpr_plen;

	u_int32_t ndpr_vltime; /* advertised valid lifetime */
	u_int32_t ndpr_pltime; /* advertised preferred lifetime */

	struct prf_ra ndpr_flags;
	};


	struct nd_prefix {
	struct ifnet *ndpr_ifp;
	LIST_ENTRY(nd_prefix) ndpr_entry;
	struct sockaddr_in6 ndpr_prefix; /* prefix */
	struct in6_addr ndpr_mask; /* netmask derived from the prefix */

	u_int32_t ndpr_vltime; /* advertised valid lifetime */
	u_int32_t ndpr_pltime; /* advertised preferred lifetime */

	time_t ndpr_expire; /* expiration time of the prefix */
	time_t ndpr_preferred; /* preferred time of the prefix */
	time_t ndpr_lastupdate; /* reception time of last advertisement */

	struct prf_ra ndpr_flags;
	u_int32_t ndpr_stateflags; /* actual state flags */
	/* list of routers that advertise the prefix: */
	LIST_HEAD(pr_rtrhead, nd_pfxrouter) ndpr_advrtrs;
	u_char ndpr_plen;
	int ndpr_refcnt; /* reference couter from addresses */
	};

	#define ndpr_next ndpr_entry.le_next

	#define ndpr_raf ndpr_flags
	#define ndpr_raf_onlink ndpr_flags.onlink
	#define ndpr_raf_auto ndpr_flags.autonomous
	#define ndpr_raf_router ndpr_flags.router

	/*
	* Message format for use in obtaining information about prefixes
	* from inet6 sysctl function
	*/
	struct inet6_ndpr_msghdr {
	u_short inpm_msglen; /* to skip over non-understood messages */
	u_char inpm_version; /* future binary compatibility */
	u_char inpm_type; /* message type */
	struct in6_addr inpm_prefix;
	u_long prm_vltim;
	u_long prm_pltime;
	u_long prm_expire;
	u_long prm_preferred;
	struct in6_prflags prm_flags;
	u_short prm_index; /* index for associated ifp */
	u_char prm_plen; /* length of prefix in bits */
	};

	#define prm_raf_onlink prm_flags.prf_ra.onlink
	#define prm_raf_auto prm_flags.prf_ra.autonomous

	#define prm_statef_onlink prm_flags.prf_state.onlink

	#define prm_rrf_decrvalid prm_flags.prf_rr.decrvalid
	#define prm_rrf_decrprefd prm_flags.prf_rr.decrprefd

	struct nd_pfxrouter {
	LIST_ENTRY(nd_pfxrouter) pfr_entry;
	#define pfr_next pfr_entry.le_next
	struct nd_defrouter *router;
	};

	LIST_HEAD(nd_prhead, nd_prefix);

	/* nd6.c */
	extern int nd6_prune;
	extern int nd6_delay;
	extern int nd6_umaxtries;
	extern int nd6_mmaxtries;
	extern int nd6_useloopback;
	extern int nd6_maxnudhint;
	extern int nd6_gctimer;
	extern struct nd_drhead nd_defrouter;
	extern struct nd_prhead nd_prefix;
	extern int nd6_debug;
	extern int nd6_onlink_ns_rfc4861;

	#define nd6log(x) do { if (V_nd6_debug) log x; } while (/CONSTCOND/ 0)

	extern struct callout nd6_timer_ch;

	/* nd6_rtr.c */
	extern int nd6_defifindex;
	extern int ip6_desync_factor; /* seconds */
	extern u_int32_t ip6_temp_preferred_lifetime; /* seconds */
	extern u_int32_t ip6_temp_valid_lifetime; /* seconds */
	extern int ip6_temp_regen_advance; /* seconds */

	union nd_opts {
	struct nd_opt_hdr nd_opt_array[8]; / max = target address list */
	struct {
	struct nd_opt_hdr *zero;
	struct nd_opt_hdr *src_lladdr;
	struct nd_opt_hdr *tgt_lladdr;
	struct nd_opt_prefix_info pi_beg; / multiple opts, start */
	struct nd_opt_rd_hdr *rh;
	struct nd_opt_mtu *mtu;
	struct nd_opt_hdr search; / multiple opts */
	struct nd_opt_hdr last; / multiple opts */
	int done;
	struct nd_opt_prefix_info pi_end;/ multiple opts, end */
	} nd_opt_each;
	};
	#define nd_opts_src_lladdr nd_opt_each.src_lladdr
	#define nd_opts_tgt_lladdr nd_opt_each.tgt_lladdr
	#define nd_opts_pi nd_opt_each.pi_beg
	#define nd_opts_pi_end nd_opt_each.pi_end
	#define nd_opts_rh nd_opt_each.rh
	#define nd_opts_mtu nd_opt_each.mtu
	#define nd_opts_search nd_opt_each.search
	#define nd_opts_last nd_opt_each.last
	#define nd_opts_done nd_opt_each.done

	/* XXX: need nd6_var.h?? */
	/* nd6.c */
	void nd6_init __P((void));
	struct nd_ifinfo nd6_ifattach __P((struct ifnet ));
	void nd6_ifdetach __P((struct nd_ifinfo *));
	int nd6_is_addr_neighbor __P((struct sockaddr_in6 , struct ifnet ));
	void nd6_option_init __P((void , int, union nd_opts ));
	struct nd_opt_hdr nd6_option __P((union nd_opts ));
	int nd6_options __P((union nd_opts *));
	struct llentry nd6_lookup __P((struct in6_addr , int, struct ifnet *));
	void nd6_setmtu __P((struct ifnet *));
	void nd6_llinfo_settimer __P((struct llentry *, long));
	void nd6_timer __P((void *));
	void nd6_purge __P((struct ifnet *));
	void nd6_nud_hint __P((struct rtentry , struct in6_addr , int));
	int nd6_resolve __P((struct ifnet , struct rtentry , struct mbuf *,
	struct sockaddr , u_char ));
	int nd6_ioctl __P((u_long, caddr_t, struct ifnet *));
	struct llentry nd6_cache_lladdr __P((struct ifnet , struct in6_addr *,
	char *, int, int, int));
	int nd6_output __P((struct ifnet , struct ifnet , struct mbuf *,
	struct sockaddr_in6 , struct rtentry ));
	int nd6_need_cache __P((struct ifnet *));
	int nd6_storelladdr __P((struct ifnet , struct rtentry , struct mbuf *,
	struct sockaddr , u_char , struct llentry **));

	/* nd6_nbr.c */
	void nd6_na_input __P((struct mbuf *, int, int));
	void nd6_na_output __P((struct ifnet , const struct in6_addr ,
	const struct in6_addr , u_long, int, struct sockaddr ));
	void nd6_ns_input __P((struct mbuf *, int, int));
	void nd6_ns_output __P((struct ifnet , const struct in6_addr ,
	const struct in6_addr , struct llentry , int));
	caddr_t nd6_ifptomac __P((struct ifnet *));
	void nd6_dad_start __P((struct ifaddr *, int));
	void nd6_dad_stop __P((struct ifaddr *));
	void nd6_dad_duplicated __P((struct ifaddr *));

	/* nd6_rtr.c */
	void nd6_rs_input __P((struct mbuf *, int, int));
	void nd6_ra_input __P((struct mbuf *, int, int));
	void prelist_del __P((struct nd_prefix *));
	void defrouter_addreq __P((struct nd_defrouter *));
	void defrouter_reset __P((void));
	void defrouter_select __P((void));
	void defrtrlist_del __P((struct nd_defrouter *));
	void prelist_remove __P((struct nd_prefix *));
	int nd6_prelist_add __P((struct nd_prefixctl , struct nd_defrouter ,
	struct nd_prefix **));
	int nd6_prefix_onlink __P((struct nd_prefix *));
	int nd6_prefix_offlink __P((struct nd_prefix *));
	void pfxlist_onlink_check __P((void));
	struct nd_defrouter defrouter_lookup __P((struct in6_addr , struct ifnet *));
	struct nd_prefix nd6_prefix_lookup __P((struct nd_prefixctl ));
	void rt6_flush __P((struct in6_addr , struct ifnet ));
	int nd6_setdefaultiface __P((int));
	int in6_tmpifadd __P((const struct in6_ifaddr *, int, int));

	#endif /* _KERNEL */

	#endif /* _NETINET6_ND6_H_ */
	Index: projects/arpv2_merge_1/sys/netinet6/nd6_nbr.c
	===================================================================
	--- projects/arpv2_merge_1/sys/netinet6/nd6_nbr.c (revision 185838)
	+++ projects/arpv2_merge_1/sys/netinet6/nd6_nbr.c (revision 185839)
	@@ -1,1516 +1,1511 @@
	/*-
	* Copyright (C) 1995, 1996, 1997, and 1998 WIDE Project.
	* All rights reserved.
	*
	* Redistribution and use in source and binary forms, with or without
	* modification, are permitted provided that the following conditions
	* are met:
	* 1. Redistributions of source code must retain the above copyright
	* notice, this list of conditions and the following disclaimer.
	* 2. Redistributions in binary form must reproduce the above copyright
	* notice, this list of conditions and the following disclaimer in the
	* documentation and/or other materials provided with the distribution.
	* 3. Neither the name of the project nor the names of its contributors
	* may be used to endorse or promote products derived from this software
	* without specific prior written permission.
	*
	* THIS SOFTWARE IS PROVIDED BY THE PROJECT AND CONTRIBUTORS ``AS IS'' AND
	* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
	* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
	* ARE DISCLAIMED. IN NO EVENT SHALL THE PROJECT OR CONTRIBUTORS BE LIABLE
	* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
	* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
	* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
	* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
	* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
	* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
	* SUCH DAMAGE.
	*
	* $KAME: nd6_nbr.c,v 1.86 2002/01/21 02:33:04 jinmei Exp $
	*/

	#include <sys/cdefs.h>
	__FBSDID("$FreeBSD$");

	#include "opt_inet.h"
	#include "opt_inet6.h"
	#include "opt_ipsec.h"
	#include "opt_carp.h"
	#include "opt_mpath.h"

	#include <sys/param.h>
	#include <sys/systm.h>
	#include <sys/malloc.h>
	#include <sys/mbuf.h>
	#include <sys/socket.h>
	#include <sys/sockio.h>
	#include <sys/time.h>
	#include <sys/kernel.h>
	#include <sys/errno.h>
	#include <sys/syslog.h>
	#include <sys/queue.h>
	#include <sys/callout.h>
	#include <sys/vimage.h>

	#include <net/if.h>
	#include <net/if_types.h>
	#include <net/if_dl.h>
	#include <net/if_var.h>
	#include <net/route.h>
	#ifdef RADIX_MPATH
	#include <net/radix_mpath.h>
	#endif

	#include <netinet/in.h>
	#include <netinet/in_var.h>
	#include <net/if_llatbl.h>
	#define L3_ADDR_SIN6(le) ((struct sockaddr_in6 *) L3_ADDR(le))
	#include <netinet6/in6_var.h>
	#include <netinet6/in6_ifattach.h>
	#include <netinet/ip6.h>
	#include <netinet6/ip6_var.h>
	#include <netinet6/scope6_var.h>
	#include <netinet6/nd6.h>
	#include <netinet/icmp6.h>
	#include <netinet6/vinet6.h>

	#ifdef DEV_CARP
	#include <netinet/ip_carp.h>
	#endif

	#define SDL(s) ((struct sockaddr_dl *)s)

	struct dadq;
	static struct dadq nd6_dad_find(struct ifaddr );
	static void nd6_dad_starttimer(struct dadq *, int);
	static void nd6_dad_stoptimer(struct dadq *);
	static void nd6_dad_timer(struct ifaddr *);
	static void nd6_dad_ns_output(struct dadq , struct ifaddr );
	static void nd6_dad_ns_input(struct ifaddr *);
	static void nd6_dad_na_input(struct ifaddr *);

	#ifdef VIMAGE_GLOBALS
	int dad_ignore_ns;
	int dad_maxtry;
	#endif

	/*
	* Input a Neighbor Solicitation Message.
	*
	* Based on RFC 2461
	* Based on RFC 2462 (duplicate address detection)
	*/
	void
	nd6_ns_input(struct mbuf *m, int off, int icmp6len)
	{
	INIT_VNET_INET6(curvnet);
	struct ifnet *ifp = m->m_pkthdr.rcvif;
	struct ip6_hdr ip6 = mtod(m, struct ip6_hdr );
	struct nd_neighbor_solicit *nd_ns;
	struct in6_addr saddr6 = ip6->ip6_src;
	struct in6_addr daddr6 = ip6->ip6_dst;
	struct in6_addr taddr6;
	struct in6_addr myaddr6;
	char *lladdr = NULL;
	struct ifaddr *ifa = NULL;
	int lladdrlen = 0;
	int anycast = 0, proxy = 0, tentative = 0;
	- int tlladdr;
	+ int tlladdr, error;
	union nd_opts ndopts;
	struct sockaddr_dl *proxydl = NULL;
	char ip6bufs[INET6_ADDRSTRLEN], ip6bufd[INET6_ADDRSTRLEN];

	#ifndef PULLDOWN_TEST
	IP6_EXTHDR_CHECK(m, off, icmp6len,);
	nd_ns = (struct nd_neighbor_solicit *)((caddr_t)ip6 + off);
	#else
	IP6_EXTHDR_GET(nd_ns, struct nd_neighbor_solicit *, m, off, icmp6len);
	if (nd_ns == NULL) {
	V_icmp6stat.icp6s_tooshort++;
	return;
	}
	#endif
	ip6 = mtod(m, struct ip6_hdr ); / adjust pointer for safety */
	taddr6 = nd_ns->nd_ns_target;
	if (in6_setscope(&taddr6, ifp, NULL) != 0)
	goto bad;

	if (ip6->ip6_hlim != 255) {
	nd6log((LOG_ERR,
	"nd6_ns_input: invalid hlim (%d) from %s to %s on %s\n",
	ip6->ip6_hlim, ip6_sprintf(ip6bufs, &ip6->ip6_src),
	ip6_sprintf(ip6bufd, &ip6->ip6_dst), if_name(ifp)));
	goto bad;
	}

	if (IN6_IS_ADDR_UNSPECIFIED(&saddr6)) {
	/* dst has to be a solicited node multicast address. */
	if (daddr6.s6_addr16[0] == IPV6_ADDR_INT16_MLL &&
	/* don't check ifindex portion */
	daddr6.s6_addr32[1] == 0 &&
	daddr6.s6_addr32[2] == IPV6_ADDR_INT32_ONE &&
	daddr6.s6_addr8[12] == 0xff) {
	; /* good */
	} else {
	nd6log((LOG_INFO, "nd6_ns_input: bad DAD packet "
	"(wrong ip6 dst)\n"));
	goto bad;
	}
	} else if (!V_nd6_onlink_ns_rfc4861) {
	struct sockaddr_in6 src_sa6;

	/*
	* According to recent IETF discussions, it is not a good idea
	* to accept a NS from an address which would not be deemed
	* to be a neighbor otherwise. This point is expected to be
	* clarified in future revisions of the specification.
	*/
	bzero(&src_sa6, sizeof(src_sa6));
	src_sa6.sin6_family = AF_INET6;
	src_sa6.sin6_len = sizeof(src_sa6);
	src_sa6.sin6_addr = saddr6;
	- if (!nd6_is_addr_neighbor(&src_sa6, ifp)) {
	+ error = nd6_is_addr_neighbor(&src_sa6, ifp);
	+ if (error) {
	nd6log((LOG_INFO, "nd6_ns_input: "
	"NS packet from non-neighbor\n"));
	goto bad;
	}
	}

	if (IN6_IS_ADDR_MULTICAST(&taddr6)) {
	nd6log((LOG_INFO, "nd6_ns_input: bad NS target (multicast)\n"));
	goto bad;
	}

	icmp6len -= sizeof(*nd_ns);
	nd6_option_init(nd_ns + 1, icmp6len, &ndopts);
	if (nd6_options(&ndopts) < 0) {
	nd6log((LOG_INFO,
	"nd6_ns_input: invalid ND option, ignored\n"));
	/* nd6_options have incremented stats */
	goto freeit;
	}

	if (ndopts.nd_opts_src_lladdr) {
	lladdr = (char *)(ndopts.nd_opts_src_lladdr + 1);
	lladdrlen = ndopts.nd_opts_src_lladdr->nd_opt_len << 3;
	}

	if (IN6_IS_ADDR_UNSPECIFIED(&ip6->ip6_src) && lladdr) {
	nd6log((LOG_INFO, "nd6_ns_input: bad DAD packet "
	"(link-layer address option)\n"));
	goto bad;
	}

	/*
	* Attaching target link-layer address to the NA?
	* (RFC 2461 7.2.4)
	*
	* NS IP dst is unicast/anycast MUST NOT add
	* NS IP dst is solicited-node multicast MUST add
	*
	* In implementation, we add target link-layer address by default.
	* We do not add one in MUST NOT cases.
	*/
	if (!IN6_IS_ADDR_MULTICAST(&daddr6))
	tlladdr = 0;
	else
	tlladdr = 1;

	/*
	* Target address (taddr6) must be either:
	* (1) Valid unicast/anycast address for my receiving interface,
	* (2) Unicast address for which I'm offering proxy service, or
	* (3) "tentative" address on which DAD is being performed.
	*/
	/* (1) and (3) check. */
	#ifdef DEV_CARP
	if (ifp->if_carp)
	ifa = carp_iamatch6(ifp->if_carp, &taddr6);
	if (ifa == NULL)
	ifa = (struct ifaddr *)in6ifa_ifpwithaddr(ifp, &taddr6);
	#else
	ifa = (struct ifaddr *)in6ifa_ifpwithaddr(ifp, &taddr6);
	#endif

	/* (2) check. */
	if (ifa == NULL) {
	struct rtentry *rt;
	struct sockaddr_in6 tsin6;
	int need_proxy;
	#ifdef RADIX_MPATH
	struct route_in6 ro;
	#endif

	bzero(&tsin6, sizeof tsin6);
	tsin6.sin6_len = sizeof(struct sockaddr_in6);
	tsin6.sin6_family = AF_INET6;
	tsin6.sin6_addr = taddr6;

	#ifdef RADIX_MPATH
	bzero(&ro, sizeof(ro));
	ro.ro_dst = tsin6;
	rtalloc_mpath((struct route *)&ro, RTF_ANNOUNCE);
	rt = ro.ro_rt;
	#else
	rt = rtalloc1((struct sockaddr *)&tsin6, 0, 0);
	#endif
	need_proxy = (rt && (rt->rt_flags & RTF_ANNOUNCE) != 0 &&
	rt->rt_gateway->sa_family == AF_LINK);
	if (rt)
	rtfree(rt);
	if (need_proxy) {
	/*
	* proxy NDP for single entry
	*/
	ifa = (struct ifaddr *)in6ifa_ifpforlinklocal(ifp,
	IN6_IFF_NOTREADY\|IN6_IFF_ANYCAST);
	if (ifa) {
	proxy = 1;
	proxydl = SDL(rt->rt_gateway);
	}
	}
	}
	if (ifa == NULL) {
	/*
	* We've got an NS packet, and we don't have that adddress
	* assigned for us. We MUST silently ignore it.
	* See RFC2461 7.2.3.
	*/
	goto freeit;
	}
	myaddr6 = *IFA_IN6(ifa);
	anycast = ((struct in6_ifaddr *)ifa)->ia6_flags & IN6_IFF_ANYCAST;
	tentative = ((struct in6_ifaddr *)ifa)->ia6_flags & IN6_IFF_TENTATIVE;
	if (((struct in6_ifaddr *)ifa)->ia6_flags & IN6_IFF_DUPLICATED)
	goto freeit;

	if (lladdr && ((ifp->if_addrlen + 2 + 7) & ~7) != lladdrlen) {
	nd6log((LOG_INFO, "nd6_ns_input: lladdrlen mismatch for %s "
	"(if %d, NS packet %d)\n",
	ip6_sprintf(ip6bufs, &taddr6),
	ifp->if_addrlen, lladdrlen - 2));
	goto bad;
	}

	if (IN6_ARE_ADDR_EQUAL(&myaddr6, &saddr6)) {
	nd6log((LOG_INFO, "nd6_ns_input: duplicate IP6 address %s\n",
	ip6_sprintf(ip6bufs, &saddr6)));
	goto freeit;
	}

	/*
	* We have neighbor solicitation packet, with target address equals to
	* one of my tentative address.
	*
	* src addr how to process?
	* --- ---
	* multicast of course, invalid (rejected in ip6_input)
	* unicast somebody is doing address resolution -> ignore
	* unspec dup address detection
	*
	* The processing is defined in RFC 2462.
	*/
	if (tentative) {
	/*
	* If source address is unspecified address, it is for
	* duplicate address detection.
	*
	* If not, the packet is for addess resolution;
	* silently ignore it.
	*/
	if (IN6_IS_ADDR_UNSPECIFIED(&saddr6))
	nd6_dad_ns_input(ifa);

	goto freeit;
	}

	/*
	* If the source address is unspecified address, entries must not
	* be created or updated.
	* It looks that sender is performing DAD. Output NA toward
	* all-node multicast address, to tell the sender that I'm using
	* the address.
	* S bit ("solicited") must be zero.
	*/
	if (IN6_IS_ADDR_UNSPECIFIED(&saddr6)) {
	struct in6_addr in6_all;

	in6_all = in6addr_linklocal_allnodes;
	if (in6_setscope(&in6_all, ifp, NULL) != 0)
	goto bad;
	nd6_na_output(ifp, &in6_all, &taddr6,
	((anycast \|\| proxy \|\| !tlladdr) ? 0 : ND_NA_FLAG_OVERRIDE) \|
	(V_ip6_forwarding ? ND_NA_FLAG_ROUTER : 0),
	tlladdr, (struct sockaddr *)proxydl);
	goto freeit;
	}

	- IF_AFDATA_LOCK(ifp);
	nd6_cache_lladdr(ifp, &saddr6, lladdr, lladdrlen,
	ND_NEIGHBOR_SOLICIT, 0);
	- IF_AFDATA_UNLOCK(ifp);

	nd6_na_output(ifp, &saddr6, &taddr6,
	((anycast \|\| proxy \|\| !tlladdr) ? 0 : ND_NA_FLAG_OVERRIDE) \|
	(V_ip6_forwarding ? ND_NA_FLAG_ROUTER : 0) \| ND_NA_FLAG_SOLICITED,
	tlladdr, (struct sockaddr *)proxydl);
	freeit:
	m_freem(m);
	return;

	bad:
	nd6log((LOG_ERR, "nd6_ns_input: src=%s\n",
	ip6_sprintf(ip6bufs, &saddr6)));
	nd6log((LOG_ERR, "nd6_ns_input: dst=%s\n",
	ip6_sprintf(ip6bufs, &daddr6)));
	nd6log((LOG_ERR, "nd6_ns_input: tgt=%s\n",
	ip6_sprintf(ip6bufs, &taddr6)));
	V_icmp6stat.icp6s_badns++;
	m_freem(m);
	}

	/*
	* Output a Neighbor Solicitation Message. Caller specifies:
	* - ICMP6 header source IP6 address
	* - ND6 header target IP6 address
	* - ND6 header source datalink address
	*
	* Based on RFC 2461
	* Based on RFC 2462 (duplicate address detection)
	*
	* ln - for source address determination
	* dad - duplicate address detection
	*/
	void
	nd6_ns_output(struct ifnet ifp, const struct in6_addr daddr6,
	const struct in6_addr taddr6, struct llentry ln, int dad)
	{
	INIT_VNET_INET6(ifp->if_vnet);
	struct mbuf *m;
	struct ip6_hdr *ip6;
	struct nd_neighbor_solicit *nd_ns;
	struct in6_addr *src, src_in;
	struct ip6_moptions im6o;
	int icmp6len;
	int maxlen;
	caddr_t mac;
	struct route_in6 ro;

	bzero(&ro, sizeof(ro));

	if (IN6_IS_ADDR_MULTICAST(taddr6))
	return;

	/* estimate the size of message */
	maxlen = sizeof(ip6) + sizeof(nd_ns);
	maxlen += (sizeof(struct nd_opt_hdr) + ifp->if_addrlen + 7) & ~7;
	if (max_linkhdr + maxlen >= MCLBYTES) {
	#ifdef DIAGNOSTIC
	printf("nd6_ns_output: max_linkhdr + maxlen >= MCLBYTES "
	"(%d + %d > %d)\n", max_linkhdr, maxlen, MCLBYTES);
	#endif
	return;
	}

	MGETHDR(m, M_DONTWAIT, MT_DATA);
	if (m && max_linkhdr + maxlen >= MHLEN) {
	MCLGET(m, M_DONTWAIT);
	if ((m->m_flags & M_EXT) == 0) {
	m_free(m);
	m = NULL;
	}
	}
	if (m == NULL)
	return;
	m->m_pkthdr.rcvif = NULL;

	if (daddr6 == NULL \|\| IN6_IS_ADDR_MULTICAST(daddr6)) {
	m->m_flags \|= M_MCAST;
	im6o.im6o_multicast_ifp = ifp;
	im6o.im6o_multicast_hlim = 255;
	im6o.im6o_multicast_loop = 0;
	}

	icmp6len = sizeof(*nd_ns);
	m->m_pkthdr.len = m->m_len = sizeof(*ip6) + icmp6len;
	m->m_data += max_linkhdr; /* or MH_ALIGN() equivalent? */

	/* fill neighbor solicitation packet */
	ip6 = mtod(m, struct ip6_hdr *);
	ip6->ip6_flow = 0;
	ip6->ip6_vfc &= ~IPV6_VERSION_MASK;
	ip6->ip6_vfc \|= IPV6_VERSION;
	/* ip6->ip6_plen will be set later */
	ip6->ip6_nxt = IPPROTO_ICMPV6;
	ip6->ip6_hlim = 255;
	if (daddr6)
	ip6->ip6_dst = *daddr6;
	else {
	ip6->ip6_dst.s6_addr16[0] = IPV6_ADDR_INT16_MLL;
	ip6->ip6_dst.s6_addr16[1] = 0;
	ip6->ip6_dst.s6_addr32[1] = 0;
	ip6->ip6_dst.s6_addr32[2] = IPV6_ADDR_INT32_ONE;
	ip6->ip6_dst.s6_addr32[3] = taddr6->s6_addr32[3];
	ip6->ip6_dst.s6_addr8[12] = 0xff;
	if (in6_setscope(&ip6->ip6_dst, ifp, NULL) != 0)
	goto bad;
	}
	if (!dad) {
	/*
	* RFC2461 7.2.2:
	* "If the source address of the packet prompting the
	* solicitation is the same as one of the addresses assigned
	* to the outgoing interface, that address SHOULD be placed
	* in the IP Source Address of the outgoing solicitation.
	* Otherwise, any one of the addresses assigned to the
	* interface should be used."
	*
	* We use the source address for the prompting packet
	* (saddr6), if:
	* - saddr6 is given from the caller (by giving "ln"), and
	* - saddr6 belongs to the outgoing interface.
	* Otherwise, we perform the source address selection as usual.
	*/
	struct ip6_hdr hip6; / hold ip6 */
	struct in6_addr *hsrc = NULL;

	if (ln && ln->la_hold) {
	/*
	* assuming every packet in la_hold has the same IP
	* header
	*/
	hip6 = mtod(ln->la_hold, struct ip6_hdr *);
	/* XXX pullup? */
	if (sizeof(*hip6) < ln->la_hold->m_len)
	hsrc = &hip6->ip6_src;
	else
	hsrc = NULL;
	}
	if (hsrc && in6ifa_ifpwithaddr(ifp, hsrc))
	src = hsrc;
	else {
	int error;
	struct sockaddr_in6 dst_sa;

	bzero(&dst_sa, sizeof(dst_sa));
	dst_sa.sin6_family = AF_INET6;
	dst_sa.sin6_len = sizeof(dst_sa);
	dst_sa.sin6_addr = ip6->ip6_dst;

	src = in6_selectsrc(&dst_sa, NULL,
	NULL, &ro, NULL, NULL, &error);
	if (src == NULL) {
	char ip6buf[INET6_ADDRSTRLEN];
	nd6log((LOG_DEBUG,
	"nd6_ns_output: source can't be "
	"determined: dst=%s, error=%d\n",
	ip6_sprintf(ip6buf, &dst_sa.sin6_addr),
	error));
	goto bad;
	}
	}
	} else {
	/*
	* Source address for DAD packet must always be IPv6
	* unspecified address. (0::0)
	* We actually don't have to 0-clear the address (we did it
	* above), but we do so here explicitly to make the intention
	* clearer.
	*/
	bzero(&src_in, sizeof(src_in));
	src = &src_in;
	}
	ip6->ip6_src = *src;
	nd_ns = (struct nd_neighbor_solicit *)(ip6 + 1);
	nd_ns->nd_ns_type = ND_NEIGHBOR_SOLICIT;
	nd_ns->nd_ns_code = 0;
	nd_ns->nd_ns_reserved = 0;
	nd_ns->nd_ns_target = *taddr6;
	in6_clearscope(&nd_ns->nd_ns_target); /* XXX */

	/*
	* Add source link-layer address option.
	*
	* spec implementation
	* --- ---
	* DAD packet MUST NOT do not add the option
	* there's no link layer address:
	* impossible do not add the option
	* there's link layer address:
	* Multicast NS MUST add one add the option
	* Unicast NS SHOULD add one add the option
	*/
	if (!dad && (mac = nd6_ifptomac(ifp))) {
	int optlen = sizeof(struct nd_opt_hdr) + ifp->if_addrlen;
	struct nd_opt_hdr nd_opt = (struct nd_opt_hdr )(nd_ns + 1);
	/* 8 byte alignments... */
	optlen = (optlen + 7) & ~7;

	m->m_pkthdr.len += optlen;
	m->m_len += optlen;
	icmp6len += optlen;
	bzero((caddr_t)nd_opt, optlen);
	nd_opt->nd_opt_type = ND_OPT_SOURCE_LINKADDR;
	nd_opt->nd_opt_len = optlen >> 3;
	bcopy(mac, (caddr_t)(nd_opt + 1), ifp->if_addrlen);
	}

	ip6->ip6_plen = htons((u_short)icmp6len);
	nd_ns->nd_ns_cksum = 0;
	nd_ns->nd_ns_cksum =
	in6_cksum(m, IPPROTO_ICMPV6, sizeof(*ip6), icmp6len);

	ip6_output(m, NULL, &ro, dad ? IPV6_UNSPECSRC : 0, &im6o, NULL, NULL);
	icmp6_ifstat_inc(ifp, ifs6_out_msg);
	icmp6_ifstat_inc(ifp, ifs6_out_neighborsolicit);
	V_icmp6stat.icp6s_outhist[ND_NEIGHBOR_SOLICIT]++;

	if (ro.ro_rt) { /* we don't cache this route. */
	RTFREE(ro.ro_rt);
	}
	return;

	bad:
	if (ro.ro_rt) {
	RTFREE(ro.ro_rt);
	}
	m_freem(m);
	return;
	}

	/*
	* Neighbor advertisement input handling.
	*
	* Based on RFC 2461
	* Based on RFC 2462 (duplicate address detection)
	*
	* the following items are not implemented yet:
	* - proxy advertisement delay rule (RFC2461 7.2.8, last paragraph, SHOULD)
	* - anycast advertisement delay rule (RFC2461 7.2.7, SHOULD)
	*/
	void
	nd6_na_input(struct mbuf *m, int off, int icmp6len)
	{
	INIT_VNET_INET6(curvnet);
	struct ifnet *ifp = m->m_pkthdr.rcvif;
	struct ip6_hdr ip6 = mtod(m, struct ip6_hdr );
	struct nd_neighbor_advert *nd_na;
	struct in6_addr daddr6 = ip6->ip6_dst;
	struct in6_addr taddr6;
	int flags;
	int is_router;
	int is_solicited;
	int is_override;
	char *lladdr = NULL;
	int lladdrlen = 0;
	struct ifaddr *ifa;
	struct llentry *ln;
	union nd_opts ndopts;
	char ip6bufs[INET6_ADDRSTRLEN], ip6bufd[INET6_ADDRSTRLEN];

	if (ip6->ip6_hlim != 255) {
	nd6log((LOG_ERR,
	"nd6_na_input: invalid hlim (%d) from %s to %s on %s\n",
	ip6->ip6_hlim, ip6_sprintf(ip6bufs, &ip6->ip6_src),
	ip6_sprintf(ip6bufd, &ip6->ip6_dst), if_name(ifp)));
	goto bad;
	}

	#ifndef PULLDOWN_TEST
	IP6_EXTHDR_CHECK(m, off, icmp6len,);
	nd_na = (struct nd_neighbor_advert *)((caddr_t)ip6 + off);
	#else
	IP6_EXTHDR_GET(nd_na, struct nd_neighbor_advert *, m, off, icmp6len);
	if (nd_na == NULL) {
	V_icmp6stat.icp6s_tooshort++;
	return;
	}
	#endif

	flags = nd_na->nd_na_flags_reserved;
	is_router = ((flags & ND_NA_FLAG_ROUTER) != 0);
	is_solicited = ((flags & ND_NA_FLAG_SOLICITED) != 0);
	is_override = ((flags & ND_NA_FLAG_OVERRIDE) != 0);

	taddr6 = nd_na->nd_na_target;
	if (in6_setscope(&taddr6, ifp, NULL))
	goto bad; /* XXX: impossible */

	if (IN6_IS_ADDR_MULTICAST(&taddr6)) {
	nd6log((LOG_ERR,
	"nd6_na_input: invalid target address %s\n",
	ip6_sprintf(ip6bufs, &taddr6)));
	goto bad;
	}
	if (IN6_IS_ADDR_MULTICAST(&daddr6))
	if (is_solicited) {
	nd6log((LOG_ERR,
	"nd6_na_input: a solicited adv is multicasted\n"));
	goto bad;
	}

	icmp6len -= sizeof(*nd_na);
	nd6_option_init(nd_na + 1, icmp6len, &ndopts);
	if (nd6_options(&ndopts) < 0) {
	nd6log((LOG_INFO,
	"nd6_na_input: invalid ND option, ignored\n"));
	/* nd6_options have incremented stats */
	goto freeit;
	}

	if (ndopts.nd_opts_tgt_lladdr) {
	lladdr = (char *)(ndopts.nd_opts_tgt_lladdr + 1);
	lladdrlen = ndopts.nd_opts_tgt_lladdr->nd_opt_len << 3;
	}

	ifa = (struct ifaddr *)in6ifa_ifpwithaddr(ifp, &taddr6);

	/*
	* Target address matches one of my interface address.
	*
	* If my address is tentative, this means that there's somebody
	* already using the same address as mine. This indicates DAD failure.
	* This is defined in RFC 2462.
	*
	* Otherwise, process as defined in RFC 2461.
	*/
	if (ifa
	&& (((struct in6_ifaddr *)ifa)->ia6_flags & IN6_IFF_TENTATIVE)) {
	nd6_dad_na_input(ifa);
	goto freeit;
	}

	/* Just for safety, maybe unnecessary. */
	if (ifa) {
	log(LOG_ERR,
	"nd6_na_input: duplicate IP6 address %s\n",
	ip6_sprintf(ip6bufs, &taddr6));
	goto freeit;
	}

	if (lladdr && ((ifp->if_addrlen + 2 + 7) & ~7) != lladdrlen) {
	nd6log((LOG_INFO, "nd6_na_input: lladdrlen mismatch for %s "
	"(if %d, NA packet %d)\n", ip6_sprintf(ip6bufs, &taddr6),
	ifp->if_addrlen, lladdrlen - 2));
	goto bad;
	}

	/*
	* If no neighbor cache entry is found, NA SHOULD silently be
	* discarded.
	*/
	IF_AFDATA_LOCK(ifp);
	ln = nd6_lookup(&taddr6, 0, ifp);
	+ IF_AFDATA_UNLOCK(ifp);
	if (ln == NULL) {
	- IF_AFDATA_UNLOCK(ifp);
	goto freeit;
	}

	if (ln->ln_state == ND6_LLINFO_INCOMPLETE) {
	/*
	* If the link-layer has address, and no lladdr option came,
	* discard the packet.
	*/
	if (ifp->if_addrlen && lladdr == NULL) {
	- IF_AFDATA_UNLOCK(ifp);
	goto freeit;
	}

	/*
	* Record link-layer address, and update the state.
	*/
	bcopy(lladdr, &ln->ll_addr, ifp->if_addrlen);
	ln->la_flags \|= LLE_VALID;
	if (is_solicited) {
	ln->ln_state = ND6_LLINFO_REACHABLE;
	ln->ln_byhint = 0;
	if (!ND6_LLINFO_PERMANENT(ln)) {
	nd6_llinfo_settimer(ln,
	(long)ND_IFINFO(ln->lle_tbl->llt_ifp)->reachable * hz);
	}
	} else {
	ln->ln_state = ND6_LLINFO_STALE;
	nd6_llinfo_settimer(ln, (long)V_nd6_gctimer * hz);
	}
	if ((ln->ln_router = is_router) != 0) {
	/*
	* This means a router's state has changed from
	* non-reachable to probably reachable, and might
	* affect the status of associated prefixes..
	*/
	pfxlist_onlink_check();
	}
	} else {
	int llchange;

	/*
	* Check if the link-layer address has changed or not.
	*/
	if (lladdr == NULL)
	llchange = 0;
	else {
	if (ln->la_flags & LLE_VALID) {
	if (bcmp(lladdr, &ln->ll_addr, ifp->if_addrlen))
	llchange = 1;
	else
	llchange = 0;
	} else
	llchange = 1;
	}

	/*
	* This is VERY complex. Look at it with care.
	*
	* override solicit lladdr llchange action
	* (L: record lladdr)
	*
	* 0 0 n -- (2c)
	* 0 0 y n (2b) L
	* 0 0 y y (1) REACHABLE->STALE
	* 0 1 n -- (2c) *->REACHABLE
	* 0 1 y n (2b) L *->REACHABLE
	* 0 1 y y (1) REACHABLE->STALE
	* 1 0 n -- (2a)
	* 1 0 y n (2a) L
	* 1 0 y y (2a) L *->STALE
	* 1 1 n -- (2a) *->REACHABLE
	* 1 1 y n (2a) L *->REACHABLE
	* 1 1 y y (2a) L *->REACHABLE
	*/
	if (!is_override && (lladdr != NULL && llchange)) { /* (1) */
	/*
	* If state is REACHABLE, make it STALE.
	* no other updates should be done.
	*/
	if (ln->ln_state == ND6_LLINFO_REACHABLE) {
	ln->ln_state = ND6_LLINFO_STALE;
	nd6_llinfo_settimer(ln, (long)V_nd6_gctimer * hz);
	}
	- IF_AFDATA_UNLOCK(ifp);
	goto freeit;
	} else if (is_override /* (2a) */
	\|\| (!is_override && (lladdr != NULL && !llchange)) /* (2b) */
	\|\| lladdr == NULL) { /* (2c) */
	/*
	* Update link-local address, if any.
	*/
	if (lladdr != NULL) {
	bcopy(lladdr, &ln->ll_addr, ifp->if_addrlen);
	ln->la_flags \|= LLE_VALID;
	}

	/*
	* If solicited, make the state REACHABLE.
	* If not solicited and the link-layer address was
	* changed, make it STALE.
	*/
	if (is_solicited) {
	ln->ln_state = ND6_LLINFO_REACHABLE;
	ln->ln_byhint = 0;
	if (!ND6_LLINFO_PERMANENT(ln)) {
	nd6_llinfo_settimer(ln,
	(long)ND_IFINFO(ifp)->reachable * hz);
	}
	} else {
	if (lladdr != NULL && llchange) {
	ln->ln_state = ND6_LLINFO_STALE;
	nd6_llinfo_settimer(ln,
	(long)V_nd6_gctimer * hz);
	}
	}
	}

	if (ln->ln_router && !is_router) {
	/*
	* The peer dropped the router flag.
	* Remove the sender from the Default Router List and
	* update the Destination Cache entries.
	*/
	struct nd_defrouter *dr;
	struct in6_addr *in6;
	/* int s;*/

	in6 = &L3_ADDR_SIN6(ln)->sin6_addr;

	/*
	* Lock to protect the default router list.
	* XXX: this might be unnecessary, since this function
	* is only called under the network software interrupt
	* context. However, we keep it just for safety.
	*/
	/* Qing - removing
	s = splnet();
	*/
	dr = defrouter_lookup(in6, ln->lle_tbl->llt_ifp);
	if (dr)
	defrtrlist_del(dr);
	else if (!V_ip6_forwarding) {
	/*
	* Even if the neighbor is not in the default
	* router list, the neighbor may be used
	* as a next hop for some destinations
	* (e.g. redirect case). So we must
	* call rt6_flush explicitly.
	*/
	rt6_flush(&ip6->ip6_src, ifp);
	}
	/* Qing - removing
	splx(s);
	*/
	}
	ln->ln_router = is_router;
	}
	/* Qing - do we care ?
	rt->rt_flags &= ~RTF_REJECT;
	*/
	ln->la_asked = 0;
	if (ln->la_hold) {
	struct mbuf m_hold, m_hold_next;

	/*
	* reset the la_hold in advance, to explicitly
	* prevent a la_hold lookup in nd6_output()
	* (wouldn't happen, though...)
	*/
	for (m_hold = ln->la_hold, ln->la_hold = NULL;
	m_hold; m_hold = m_hold_next) {
	m_hold_next = m_hold->m_nextpkt;
	m_hold->m_nextpkt = NULL;
	/*
	* we assume ifp is not a loopback here, so just set
	* the 2nd argument as the 1st one.
	*/
	nd6_output(ifp, ifp, m_hold, L3_ADDR_SIN6(ln), NULL);
	}
	}
	- IF_AFDATA_UNLOCK(ifp);
	-
	freeit:
	m_freem(m);
	return;

	bad:
	V_icmp6stat.icp6s_badna++;
	m_freem(m);
	}

	/*
	* Neighbor advertisement output handling.
	*
	* Based on RFC 2461
	*
	* the following items are not implemented yet:
	* - proxy advertisement delay rule (RFC2461 7.2.8, last paragraph, SHOULD)
	* - anycast advertisement delay rule (RFC2461 7.2.7, SHOULD)
	*
	* tlladdr - 1 if include target link-layer address
	* sdl0 - sockaddr_dl (= proxy NA) or NULL
	*/
	void
	nd6_na_output(struct ifnet ifp, const struct in6_addr daddr6_0,
	const struct in6_addr *taddr6, u_long flags, int tlladdr,
	struct sockaddr *sdl0)
	{
	INIT_VNET_INET6(ifp->if_vnet);
	struct mbuf *m;
	struct ip6_hdr *ip6;
	struct nd_neighbor_advert *nd_na;
	struct ip6_moptions im6o;
	struct in6_addr *src, daddr6;
	struct sockaddr_in6 dst_sa;
	int icmp6len, maxlen, error;
	caddr_t mac = NULL;
	struct route_in6 ro;

	bzero(&ro, sizeof(ro));

	daddr6 = daddr6_0; / make a local copy for modification */

	/* estimate the size of message */
	maxlen = sizeof(ip6) + sizeof(nd_na);
	maxlen += (sizeof(struct nd_opt_hdr) + ifp->if_addrlen + 7) & ~7;
	if (max_linkhdr + maxlen >= MCLBYTES) {
	#ifdef DIAGNOSTIC
	printf("nd6_na_output: max_linkhdr + maxlen >= MCLBYTES "
	"(%d + %d > %d)\n", max_linkhdr, maxlen, MCLBYTES);
	#endif
	return;
	}

	MGETHDR(m, M_DONTWAIT, MT_DATA);
	if (m && max_linkhdr + maxlen >= MHLEN) {
	MCLGET(m, M_DONTWAIT);
	if ((m->m_flags & M_EXT) == 0) {
	m_free(m);
	m = NULL;
	}
	}
	if (m == NULL)
	return;
	m->m_pkthdr.rcvif = NULL;

	if (IN6_IS_ADDR_MULTICAST(&daddr6)) {
	m->m_flags \|= M_MCAST;
	im6o.im6o_multicast_ifp = ifp;
	im6o.im6o_multicast_hlim = 255;
	im6o.im6o_multicast_loop = 0;
	}

	icmp6len = sizeof(*nd_na);
	m->m_pkthdr.len = m->m_len = sizeof(struct ip6_hdr) + icmp6len;
	m->m_data += max_linkhdr; /* or MH_ALIGN() equivalent? */

	/* fill neighbor advertisement packet */
	ip6 = mtod(m, struct ip6_hdr *);
	ip6->ip6_flow = 0;
	ip6->ip6_vfc &= ~IPV6_VERSION_MASK;
	ip6->ip6_vfc \|= IPV6_VERSION;
	ip6->ip6_nxt = IPPROTO_ICMPV6;
	ip6->ip6_hlim = 255;
	if (IN6_IS_ADDR_UNSPECIFIED(&daddr6)) {
	/* reply to DAD */
	daddr6.s6_addr16[0] = IPV6_ADDR_INT16_MLL;
	daddr6.s6_addr16[1] = 0;
	daddr6.s6_addr32[1] = 0;
	daddr6.s6_addr32[2] = 0;
	daddr6.s6_addr32[3] = IPV6_ADDR_INT32_ONE;
	if (in6_setscope(&daddr6, ifp, NULL))
	goto bad;

	flags &= ~ND_NA_FLAG_SOLICITED;
	}
	ip6->ip6_dst = daddr6;
	bzero(&dst_sa, sizeof(struct sockaddr_in6));
	dst_sa.sin6_family = AF_INET6;
	dst_sa.sin6_len = sizeof(struct sockaddr_in6);
	dst_sa.sin6_addr = daddr6;

	/*
	* Select a source whose scope is the same as that of the dest.
	*/
	bcopy(&dst_sa, &ro.ro_dst, sizeof(dst_sa));
	src = in6_selectsrc(&dst_sa, NULL, NULL, &ro, NULL, NULL, &error);
	if (src == NULL) {
	char ip6buf[INET6_ADDRSTRLEN];
	nd6log((LOG_DEBUG, "nd6_na_output: source can't be "
	"determined: dst=%s, error=%d\n",
	ip6_sprintf(ip6buf, &dst_sa.sin6_addr), error));
	goto bad;
	}
	ip6->ip6_src = *src;
	nd_na = (struct nd_neighbor_advert *)(ip6 + 1);
	nd_na->nd_na_type = ND_NEIGHBOR_ADVERT;
	nd_na->nd_na_code = 0;
	nd_na->nd_na_target = *taddr6;
	in6_clearscope(&nd_na->nd_na_target); /* XXX */

	/*
	* "tlladdr" indicates NS's condition for adding tlladdr or not.
	* see nd6_ns_input() for details.
	* Basically, if NS packet is sent to unicast/anycast addr,
	* target lladdr option SHOULD NOT be included.
	*/
	if (tlladdr) {
	/*
	* sdl0 != NULL indicates proxy NA. If we do proxy, use
	* lladdr in sdl0. If we are not proxying (sending NA for
	* my address) use lladdr configured for the interface.
	*/
	if (sdl0 == NULL) {
	#ifdef DEV_CARP
	if (ifp->if_carp)
	mac = carp_macmatch6(ifp->if_carp, m, taddr6);
	if (mac == NULL)
	mac = nd6_ifptomac(ifp);
	#else
	mac = nd6_ifptomac(ifp);
	#endif
	} else if (sdl0->sa_family == AF_LINK) {
	struct sockaddr_dl *sdl;
	sdl = (struct sockaddr_dl *)sdl0;
	if (sdl->sdl_alen == ifp->if_addrlen)
	mac = LLADDR(sdl);
	}
	}
	if (tlladdr && mac) {
	int optlen = sizeof(struct nd_opt_hdr) + ifp->if_addrlen;
	struct nd_opt_hdr nd_opt = (struct nd_opt_hdr )(nd_na + 1);

	/* roundup to 8 bytes alignment! */
	optlen = (optlen + 7) & ~7;

	m->m_pkthdr.len += optlen;
	m->m_len += optlen;
	icmp6len += optlen;
	bzero((caddr_t)nd_opt, optlen);
	nd_opt->nd_opt_type = ND_OPT_TARGET_LINKADDR;
	nd_opt->nd_opt_len = optlen >> 3;
	bcopy(mac, (caddr_t)(nd_opt + 1), ifp->if_addrlen);
	} else
	flags &= ~ND_NA_FLAG_OVERRIDE;

	ip6->ip6_plen = htons((u_short)icmp6len);
	nd_na->nd_na_flags_reserved = flags;
	nd_na->nd_na_cksum = 0;
	nd_na->nd_na_cksum =
	in6_cksum(m, IPPROTO_ICMPV6, sizeof(struct ip6_hdr), icmp6len);

	ip6_output(m, NULL, &ro, 0, &im6o, NULL, NULL);
	icmp6_ifstat_inc(ifp, ifs6_out_msg);
	icmp6_ifstat_inc(ifp, ifs6_out_neighboradvert);
	V_icmp6stat.icp6s_outhist[ND_NEIGHBOR_ADVERT]++;

	if (ro.ro_rt) { /* we don't cache this route. */
	RTFREE(ro.ro_rt);
	}
	return;

	bad:
	if (ro.ro_rt) {
	RTFREE(ro.ro_rt);
	}
	m_freem(m);
	return;
	}

	caddr_t
	nd6_ifptomac(struct ifnet *ifp)
	{
	switch (ifp->if_type) {
	case IFT_ARCNET:
	case IFT_ETHER:
	case IFT_FDDI:
	case IFT_IEEE1394:
	#ifdef IFT_L2VLAN
	case IFT_L2VLAN:
	#endif
	#ifdef IFT_IEEE80211
	case IFT_IEEE80211:
	#endif
	#ifdef IFT_CARP
	case IFT_CARP:
	#endif
	case IFT_BRIDGE:
	case IFT_ISO88025:
	return IF_LLADDR(ifp);
	default:
	return NULL;
	}
	}

	TAILQ_HEAD(dadq_head, dadq);
	struct dadq {
	TAILQ_ENTRY(dadq) dad_list;
	struct ifaddr *dad_ifa;
	int dad_count; /* max NS to send */
	int dad_ns_tcount; /* # of trials to send NS */
	int dad_ns_ocount; /* NS sent so far */
	int dad_ns_icount;
	int dad_na_icount;
	struct callout dad_timer_ch;
	};

	#ifdef VIMAGE_GLOBALS
	static struct dadq_head dadq;
	int dad_init;
	#endif

	static struct dadq *
	nd6_dad_find(struct ifaddr *ifa)
	{
	INIT_VNET_INET6(curvnet);
	struct dadq *dp;

	for (dp = V_dadq.tqh_first; dp; dp = dp->dad_list.tqe_next) {
	if (dp->dad_ifa == ifa)
	return dp;
	}
	return NULL;
	}

	static void
	nd6_dad_starttimer(struct dadq *dp, int ticks)
	{

	callout_reset(&dp->dad_timer_ch, ticks,
	(void ()(void ))nd6_dad_timer, (void *)dp->dad_ifa);
	}

	static void
	nd6_dad_stoptimer(struct dadq *dp)
	{

	callout_stop(&dp->dad_timer_ch);
	}

	/*
	* Start Duplicate Address Detection (DAD) for specified interface address.
	*/
	void
	nd6_dad_start(struct ifaddr *ifa, int delay)
	{
	INIT_VNET_INET6(curvnet);
	struct in6_ifaddr ia = (struct in6_ifaddr )ifa;
	struct dadq *dp;
	char ip6buf[INET6_ADDRSTRLEN];

	if (!V_dad_init) {
	TAILQ_INIT(&V_dadq);
	V_dad_init++;
	}

	/*
	* If we don't need DAD, don't do it.
	* There are several cases:
	* - DAD is disabled (ip6_dad_count == 0)
	* - the interface address is anycast
	*/
	if (!(ia->ia6_flags & IN6_IFF_TENTATIVE)) {
	log(LOG_DEBUG,
	"nd6_dad_start: called with non-tentative address "
	"%s(%s)\n",
	ip6_sprintf(ip6buf, &ia->ia_addr.sin6_addr),
	ifa->ifa_ifp ? if_name(ifa->ifa_ifp) : "???");
	return;
	}
	if (ia->ia6_flags & IN6_IFF_ANYCAST) {
	ia->ia6_flags &= ~IN6_IFF_TENTATIVE;
	return;
	}
	if (!V_ip6_dad_count) {
	ia->ia6_flags &= ~IN6_IFF_TENTATIVE;
	return;
	}
	if (ifa->ifa_ifp == NULL)
	panic("nd6_dad_start: ifa->ifa_ifp == NULL");
	if (!(ifa->ifa_ifp->if_flags & IFF_UP)) {
	return;
	}
	if (nd6_dad_find(ifa) != NULL) {
	/* DAD already in progress */
	return;
	}

	dp = malloc(sizeof(*dp), M_IP6NDP, M_NOWAIT);
	if (dp == NULL) {
	log(LOG_ERR, "nd6_dad_start: memory allocation failed for "
	"%s(%s)\n",
	ip6_sprintf(ip6buf, &ia->ia_addr.sin6_addr),
	ifa->ifa_ifp ? if_name(ifa->ifa_ifp) : "???");
	return;
	}
	bzero(dp, sizeof(*dp));
	callout_init(&dp->dad_timer_ch, 0);
	TAILQ_INSERT_TAIL(&V_dadq, (struct dadq *)dp, dad_list);

	nd6log((LOG_DEBUG, "%s: starting DAD for %s\n", if_name(ifa->ifa_ifp),
	ip6_sprintf(ip6buf, &ia->ia_addr.sin6_addr)));

	/*
	* Send NS packet for DAD, ip6_dad_count times.
	* Note that we must delay the first transmission, if this is the
	* first packet to be sent from the interface after interface
	* (re)initialization.
	*/
	dp->dad_ifa = ifa;
	IFAREF(ifa); /* just for safety */
	dp->dad_count = V_ip6_dad_count;
	dp->dad_ns_icount = dp->dad_na_icount = 0;
	dp->dad_ns_ocount = dp->dad_ns_tcount = 0;
	if (delay == 0) {
	nd6_dad_ns_output(dp, ifa);
	nd6_dad_starttimer(dp,
	(long)ND_IFINFO(ifa->ifa_ifp)->retrans * hz / 1000);
	} else {
	nd6_dad_starttimer(dp, delay);
	}
	}

	/*
	* terminate DAD unconditionally. used for address removals.
	*/
	void
	nd6_dad_stop(struct ifaddr *ifa)
	{
	INIT_VNET_INET6(curvnet);
	struct dadq *dp;

	if (!V_dad_init)
	return;
	dp = nd6_dad_find(ifa);
	if (!dp) {
	/* DAD wasn't started yet */
	return;
	}

	nd6_dad_stoptimer(dp);

	TAILQ_REMOVE(&V_dadq, (struct dadq *)dp, dad_list);
	free(dp, M_IP6NDP);
	dp = NULL;
	IFAFREE(ifa);
	}

	static void
	nd6_dad_timer(struct ifaddr *ifa)
	{
	CURVNET_SET(dp->dad_vnet);
	INIT_VNET_INET6(curvnet);
	int s;
	struct in6_ifaddr ia = (struct in6_ifaddr )ifa;
	struct dadq *dp;
	char ip6buf[INET6_ADDRSTRLEN];

	s = splnet(); /* XXX */

	/* Sanity check */
	if (ia == NULL) {
	log(LOG_ERR, "nd6_dad_timer: called with null parameter\n");
	goto done;
	}
	dp = nd6_dad_find(ifa);
	if (dp == NULL) {
	log(LOG_ERR, "nd6_dad_timer: DAD structure not found\n");
	goto done;
	}
	if (ia->ia6_flags & IN6_IFF_DUPLICATED) {
	log(LOG_ERR, "nd6_dad_timer: called with duplicated address "
	"%s(%s)\n",
	ip6_sprintf(ip6buf, &ia->ia_addr.sin6_addr),
	ifa->ifa_ifp ? if_name(ifa->ifa_ifp) : "???");
	goto done;
	}
	if ((ia->ia6_flags & IN6_IFF_TENTATIVE) == 0) {
	log(LOG_ERR, "nd6_dad_timer: called with non-tentative address "
	"%s(%s)\n",
	ip6_sprintf(ip6buf, &ia->ia_addr.sin6_addr),
	ifa->ifa_ifp ? if_name(ifa->ifa_ifp) : "???");
	goto done;
	}

	/* timeouted with IFF_{RUNNING,UP} check */
	if (dp->dad_ns_tcount > V_dad_maxtry) {
	nd6log((LOG_INFO, "%s: could not run DAD, driver problem?\n",
	if_name(ifa->ifa_ifp)));

	TAILQ_REMOVE(&V_dadq, (struct dadq *)dp, dad_list);
	free(dp, M_IP6NDP);
	dp = NULL;
	IFAFREE(ifa);
	goto done;
	}

	/* Need more checks? */
	if (dp->dad_ns_ocount < dp->dad_count) {
	/*
	* We have more NS to go. Send NS packet for DAD.
	*/
	nd6_dad_ns_output(dp, ifa);
	nd6_dad_starttimer(dp,
	(long)ND_IFINFO(ifa->ifa_ifp)->retrans * hz / 1000);
	} else {
	/*
	* We have transmitted sufficient number of DAD packets.
	* See what we've got.
	*/
	int duplicate;

	duplicate = 0;

	if (dp->dad_na_icount) {
	/*
	* the check is in nd6_dad_na_input(),
	* but just in case
	*/
	duplicate++;
	}

	if (dp->dad_ns_icount) {
	/* We've seen NS, means DAD has failed. */
	duplicate++;
	}

	if (duplicate) {
	/* (dp) will be freed in nd6_dad_duplicated() /
	dp = NULL;
	nd6_dad_duplicated(ifa);
	} else {
	/*
	* We are done with DAD. No NA came, no NS came.
	* No duplicate address found.
	*/
	ia->ia6_flags &= ~IN6_IFF_TENTATIVE;

	nd6log((LOG_DEBUG,
	"%s: DAD complete for %s - no duplicates found\n",
	if_name(ifa->ifa_ifp),
	ip6_sprintf(ip6buf, &ia->ia_addr.sin6_addr)));

	TAILQ_REMOVE(&V_dadq, (struct dadq *)dp, dad_list);
	free(dp, M_IP6NDP);
	dp = NULL;
	IFAFREE(ifa);
	}
	}

	done:
	splx(s);
	CURVNET_RESTORE();
	}

	void
	nd6_dad_duplicated(struct ifaddr *ifa)
	{
	INIT_VNET_INET6(curvnet);
	struct in6_ifaddr ia = (struct in6_ifaddr )ifa;
	struct ifnet *ifp;
	struct dadq *dp;
	char ip6buf[INET6_ADDRSTRLEN];

	dp = nd6_dad_find(ifa);
	if (dp == NULL) {
	log(LOG_ERR, "nd6_dad_duplicated: DAD structure not found\n");
	return;
	}

	log(LOG_ERR, "%s: DAD detected duplicate IPv6 address %s: "
	"NS in/out=%d/%d, NA in=%d\n",
	if_name(ifa->ifa_ifp), ip6_sprintf(ip6buf, &ia->ia_addr.sin6_addr),
	dp->dad_ns_icount, dp->dad_ns_ocount, dp->dad_na_icount);

	ia->ia6_flags &= ~IN6_IFF_TENTATIVE;
	ia->ia6_flags \|= IN6_IFF_DUPLICATED;

	/* We are done with DAD, with duplicate address found. (failure) */
	nd6_dad_stoptimer(dp);

	ifp = ifa->ifa_ifp;
	log(LOG_ERR, "%s: DAD complete for %s - duplicate found\n",
	if_name(ifp), ip6_sprintf(ip6buf, &ia->ia_addr.sin6_addr));
	log(LOG_ERR, "%s: manual intervention required\n",
	if_name(ifp));

	/*
	* If the address is a link-local address formed from an interface
	* identifier based on the hardware address which is supposed to be
	* uniquely assigned (e.g., EUI-64 for an Ethernet interface), IP
	* operation on the interface SHOULD be disabled.
	* [rfc2462bis-03 Section 5.4.5]
	*/
	if (IN6_IS_ADDR_LINKLOCAL(&ia->ia_addr.sin6_addr)) {
	struct in6_addr in6;

	/*
	* To avoid over-reaction, we only apply this logic when we are
	* very sure that hardware addresses are supposed to be unique.
	*/
	switch (ifp->if_type) {
	case IFT_ETHER:
	case IFT_FDDI:
	case IFT_ATM:
	case IFT_IEEE1394:
	#ifdef IFT_IEEE80211
	case IFT_IEEE80211:
	#endif
	in6 = ia->ia_addr.sin6_addr;
	if (in6_get_hw_ifid(ifp, &in6) == 0 &&
	IN6_ARE_ADDR_EQUAL(&ia->ia_addr.sin6_addr, &in6)) {
	ND_IFINFO(ifp)->flags \|= ND6_IFF_IFDISABLED;
	log(LOG_ERR, "%s: possible hardware address "
	"duplication detected, disable IPv6\n",
	if_name(ifp));
	}
	break;
	}
	}

	TAILQ_REMOVE(&V_dadq, (struct dadq *)dp, dad_list);
	free(dp, M_IP6NDP);
	dp = NULL;
	IFAFREE(ifa);
	}

	static void
	nd6_dad_ns_output(struct dadq dp, struct ifaddr ifa)
	{
	struct in6_ifaddr ia = (struct in6_ifaddr )ifa;
	struct ifnet *ifp = ifa->ifa_ifp;

	dp->dad_ns_tcount++;
	if ((ifp->if_flags & IFF_UP) == 0) {
	return;
	}
	if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0) {
	return;
	}

	dp->dad_ns_ocount++;
	nd6_ns_output(ifp, NULL, &ia->ia_addr.sin6_addr, NULL, 1);
	}

	static void
	nd6_dad_ns_input(struct ifaddr *ifa)
	{
	INIT_VNET_INET6(curvnet);
	struct in6_ifaddr *ia;
	struct ifnet *ifp;
	const struct in6_addr *taddr6;
	struct dadq *dp;
	int duplicate;

	if (ifa == NULL)
	panic("ifa == NULL in nd6_dad_ns_input");

	ia = (struct in6_ifaddr *)ifa;
	ifp = ifa->ifa_ifp;
	taddr6 = &ia->ia_addr.sin6_addr;
	duplicate = 0;
	dp = nd6_dad_find(ifa);

	/* Quickhack - completely ignore DAD NS packets */
	if (V_dad_ignore_ns) {
	char ip6buf[INET6_ADDRSTRLEN];
	nd6log((LOG_INFO,
	"nd6_dad_ns_input: ignoring DAD NS packet for "
	"address %s(%s)\n", ip6_sprintf(ip6buf, taddr6),
	if_name(ifa->ifa_ifp)));
	return;
	}

	/*
	* if I'm yet to start DAD, someone else started using this address
	* first. I have a duplicate and you win.
	*/
	if (dp == NULL \|\| dp->dad_ns_ocount == 0)
	duplicate++;

	/* XXX more checks for loopback situation - see nd6_dad_timer too */

	if (duplicate) {
	dp = NULL; /* will be freed in nd6_dad_duplicated() */
	nd6_dad_duplicated(ifa);
	} else {
	/*
	* not sure if I got a duplicate.
	* increment ns count and see what happens.
	*/
	if (dp)
	dp->dad_ns_icount++;
	}
	}

	static void
	nd6_dad_na_input(struct ifaddr *ifa)
	{
	struct dadq *dp;

	if (ifa == NULL)
	panic("ifa == NULL in nd6_dad_na_input");

	dp = nd6_dad_find(ifa);
	if (dp)
	dp->dad_na_icount++;

	/* remove the address. */
	nd6_dad_duplicated(ifa);
	}
	Index: projects/arpv2_merge_1/sys/netinet6/nd6_rtr.c
	===================================================================
	--- projects/arpv2_merge_1/sys/netinet6/nd6_rtr.c (revision 185838)
	+++ projects/arpv2_merge_1/sys/netinet6/nd6_rtr.c (revision 185839)
	@@ -1,2136 +1,2134 @@
	/*-
	* Copyright (C) 1995, 1996, 1997, and 1998 WIDE Project.
	* All rights reserved.
	*
	* Redistribution and use in source and binary forms, with or without
	* modification, are permitted provided that the following conditions
	* are met:
	* 1. Redistributions of source code must retain the above copyright
	* notice, this list of conditions and the following disclaimer.
	* 2. Redistributions in binary form must reproduce the above copyright
	* notice, this list of conditions and the following disclaimer in the
	* documentation and/or other materials provided with the distribution.
	* 3. Neither the name of the project nor the names of its contributors
	* may be used to endorse or promote products derived from this software
	* without specific prior written permission.
	*
	* THIS SOFTWARE IS PROVIDED BY THE PROJECT AND CONTRIBUTORS ``AS IS'' AND
	* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
	* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
	* ARE DISCLAIMED. IN NO EVENT SHALL THE PROJECT OR CONTRIBUTORS BE LIABLE
	* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
	* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
	* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
	* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
	* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
	* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
	* SUCH DAMAGE.
	*
	* $KAME: nd6_rtr.c,v 1.111 2001/04/27 01:37:15 jinmei Exp $
	*/

	#include <sys/cdefs.h>
	__FBSDID("$FreeBSD$");

	#include "opt_inet.h"
	#include "opt_inet6.h"

	#include <sys/param.h>
	#include <sys/systm.h>
	#include <sys/malloc.h>
	#include <sys/mbuf.h>
	#include <sys/socket.h>
	#include <sys/sockio.h>
	#include <sys/time.h>
	#include <sys/kernel.h>
	#include <sys/lock.h>
	#include <sys/errno.h>
	#include <sys/rwlock.h>
	#include <sys/syslog.h>
	#include <sys/queue.h>
	#include <sys/vimage.h>

	#include <net/if.h>
	#include <net/if_types.h>
	#include <net/if_dl.h>
	#include <net/route.h>
	#include <net/radix.h>
	#include <net/vnet.h>

	#include <netinet/in.h>
	#include <net/if_llatbl.h>
	#include <netinet6/in6_var.h>
	#include <netinet6/in6_ifattach.h>
	#include <netinet/ip6.h>
	#include <netinet6/ip6_var.h>
	#include <netinet6/nd6.h>
	#include <netinet/icmp6.h>
	#include <netinet6/scope6_var.h>
	#include <netinet6/vinet6.h>

	#define SDL(s) ((struct sockaddr_dl *)s)

	static int rtpref(struct nd_defrouter *);
	static struct nd_defrouter defrtrlist_update(struct nd_defrouter );
	static int prelist_update __P((struct nd_prefixctl , struct nd_defrouter ,
	struct mbuf *, int));
	static struct in6_ifaddr in6_ifadd(struct nd_prefixctl , int);
	static struct nd_pfxrouter pfxrtr_lookup __P((struct nd_prefix ,
	struct nd_defrouter *));
	static void pfxrtr_add(struct nd_prefix , struct nd_defrouter );
	static void pfxrtr_del(struct nd_pfxrouter *);
	static struct nd_pfxrouter *find_pfxlist_reachable_router
	(struct nd_prefix *);
	static void defrouter_delreq(struct nd_defrouter *);
	static void nd6_rtmsg(int, struct rtentry *);

	static int in6_init_prefix_ltimes(struct nd_prefix *);
	static void in6_init_address_ltimes __P((struct nd_prefix *,
	struct in6_addrlifetime *));

	static int rt6_deleteroute(struct radix_node , void );

	#ifdef VIMAGE_GLOBALS
	extern int nd6_recalc_reachtm_interval;

	static struct ifnet *nd6_defifp;
	int nd6_defifindex;

	int ip6_use_tempaddr;
	int ip6_desync_factor;
	u_int32_t ip6_temp_preferred_lifetime;
	u_int32_t ip6_temp_valid_lifetime;
	int ip6_temp_regen_advance;
	#endif

	/* RTPREF_MEDIUM has to be 0! */
	#define RTPREF_HIGH 1
	#define RTPREF_MEDIUM 0
	#define RTPREF_LOW (-1)
	#define RTPREF_RESERVED (-2)
	#define RTPREF_INVALID (-3) /* internal */

	/*
	* Receive Router Solicitation Message - just for routers.
	* Router solicitation/advertisement is mostly managed by userland program
	* (rtadvd) so here we have no function like nd6_ra_output().
	*
	* Based on RFC 2461
	*/
	void
	nd6_rs_input(struct mbuf *m, int off, int icmp6len)
	{
	INIT_VNET_INET6(curvnet);
	struct ifnet *ifp = m->m_pkthdr.rcvif;
	struct ip6_hdr ip6 = mtod(m, struct ip6_hdr );
	struct nd_router_solicit *nd_rs;
	struct in6_addr saddr6 = ip6->ip6_src;
	char *lladdr = NULL;
	int lladdrlen = 0;
	union nd_opts ndopts;
	char ip6bufs[INET6_ADDRSTRLEN], ip6bufd[INET6_ADDRSTRLEN];

	/* If I'm not a router, ignore it. */
	if (V_ip6_accept_rtadv != 0 \|\| V_ip6_forwarding != 1)
	goto freeit;

	/* Sanity checks */
	if (ip6->ip6_hlim != 255) {
	nd6log((LOG_ERR,
	"nd6_rs_input: invalid hlim (%d) from %s to %s on %s\n",
	ip6->ip6_hlim, ip6_sprintf(ip6bufs, &ip6->ip6_src),
	ip6_sprintf(ip6bufd, &ip6->ip6_dst), if_name(ifp)));
	goto bad;
	}

	/*
	* Don't update the neighbor cache, if src = ::.
	* This indicates that the src has no IP address assigned yet.
	*/
	if (IN6_IS_ADDR_UNSPECIFIED(&saddr6))
	goto freeit;

	#ifndef PULLDOWN_TEST
	IP6_EXTHDR_CHECK(m, off, icmp6len,);
	nd_rs = (struct nd_router_solicit *)((caddr_t)ip6 + off);
	#else
	IP6_EXTHDR_GET(nd_rs, struct nd_router_solicit *, m, off, icmp6len);
	if (nd_rs == NULL) {
	V_icmp6stat.icp6s_tooshort++;
	return;
	}
	#endif

	icmp6len -= sizeof(*nd_rs);
	nd6_option_init(nd_rs + 1, icmp6len, &ndopts);
	if (nd6_options(&ndopts) < 0) {
	nd6log((LOG_INFO,
	"nd6_rs_input: invalid ND option, ignored\n"));
	/* nd6_options have incremented stats */
	goto freeit;
	}

	if (ndopts.nd_opts_src_lladdr) {
	lladdr = (char *)(ndopts.nd_opts_src_lladdr + 1);
	lladdrlen = ndopts.nd_opts_src_lladdr->nd_opt_len << 3;
	}

	if (lladdr && ((ifp->if_addrlen + 2 + 7) & ~7) != lladdrlen) {
	nd6log((LOG_INFO,
	"nd6_rs_input: lladdrlen mismatch for %s "
	"(if %d, RS packet %d)\n",
	ip6_sprintf(ip6bufs, &saddr6),
	ifp->if_addrlen, lladdrlen - 2));
	goto bad;
	}

	- IF_AFDATA_LOCK(ifp);
	nd6_cache_lladdr(ifp, &saddr6, lladdr, lladdrlen, ND_ROUTER_SOLICIT, 0);
	- IF_AFDATA_UNLOCK(ifp);

	freeit:
	m_freem(m);
	return;

	bad:
	V_icmp6stat.icp6s_badrs++;
	m_freem(m);
	}

	/*
	* Receive Router Advertisement Message.
	*
	* Based on RFC 2461
	* TODO: on-link bit on prefix information
	* TODO: ND_RA_FLAG_{OTHER,MANAGED} processing
	*/
	void
	nd6_ra_input(struct mbuf *m, int off, int icmp6len)
	{
	INIT_VNET_INET6(curvnet);
	struct ifnet *ifp = m->m_pkthdr.rcvif;
	struct nd_ifinfo *ndi = ND_IFINFO(ifp);
	struct ip6_hdr ip6 = mtod(m, struct ip6_hdr );
	struct nd_router_advert *nd_ra;
	struct in6_addr saddr6 = ip6->ip6_src;
	int mcast = 0;
	union nd_opts ndopts;
	struct nd_defrouter *dr;
	char ip6bufs[INET6_ADDRSTRLEN], ip6bufd[INET6_ADDRSTRLEN];

	/*
	* We only accept RAs only when
	* the system-wide variable allows the acceptance, and
	* per-interface variable allows RAs on the receiving interface.
	*/
	if (V_ip6_accept_rtadv == 0)
	goto freeit;
	if (!(ndi->flags & ND6_IFF_ACCEPT_RTADV))
	goto freeit;

	if (ip6->ip6_hlim != 255) {
	nd6log((LOG_ERR,
	"nd6_ra_input: invalid hlim (%d) from %s to %s on %s\n",
	ip6->ip6_hlim, ip6_sprintf(ip6bufs, &ip6->ip6_src),
	ip6_sprintf(ip6bufd, &ip6->ip6_dst), if_name(ifp)));
	goto bad;
	}

	if (!IN6_IS_ADDR_LINKLOCAL(&saddr6)) {
	nd6log((LOG_ERR,
	"nd6_ra_input: src %s is not link-local\n",
	ip6_sprintf(ip6bufs, &saddr6)));
	goto bad;
	}

	#ifndef PULLDOWN_TEST
	IP6_EXTHDR_CHECK(m, off, icmp6len,);
	nd_ra = (struct nd_router_advert *)((caddr_t)ip6 + off);
	#else
	IP6_EXTHDR_GET(nd_ra, struct nd_router_advert *, m, off, icmp6len);
	if (nd_ra == NULL) {
	V_icmp6stat.icp6s_tooshort++;
	return;
	}
	#endif

	icmp6len -= sizeof(*nd_ra);
	nd6_option_init(nd_ra + 1, icmp6len, &ndopts);
	if (nd6_options(&ndopts) < 0) {
	nd6log((LOG_INFO,
	"nd6_ra_input: invalid ND option, ignored\n"));
	/* nd6_options have incremented stats */
	goto freeit;
	}

	{
	struct nd_defrouter dr0;
	u_int32_t advreachable = nd_ra->nd_ra_reachable;

	/* remember if this is a multicasted advertisement */
	if (IN6_IS_ADDR_MULTICAST(&ip6->ip6_dst))
	mcast = 1;

	bzero(&dr0, sizeof(dr0));
	dr0.rtaddr = saddr6;
	dr0.flags = nd_ra->nd_ra_flags_reserved;
	dr0.rtlifetime = ntohs(nd_ra->nd_ra_router_lifetime);
	dr0.expire = time_second + dr0.rtlifetime;
	dr0.ifp = ifp;
	/* unspecified or not? (RFC 2461 6.3.4) */
	if (advreachable) {
	advreachable = ntohl(advreachable);
	if (advreachable <= MAX_REACHABLE_TIME &&
	ndi->basereachable != advreachable) {
	ndi->basereachable = advreachable;
	ndi->reachable = ND_COMPUTE_RTIME(ndi->basereachable);
	ndi->recalctm = V_nd6_recalc_reachtm_interval; /* reset */
	}
	}
	if (nd_ra->nd_ra_retransmit)
	ndi->retrans = ntohl(nd_ra->nd_ra_retransmit);
	if (nd_ra->nd_ra_curhoplimit)
	ndi->chlim = nd_ra->nd_ra_curhoplimit;
	dr = defrtrlist_update(&dr0);
	}

	/*
	* prefix
	*/
	if (ndopts.nd_opts_pi) {
	struct nd_opt_hdr *pt;
	struct nd_opt_prefix_info *pi = NULL;
	struct nd_prefixctl pr;

	for (pt = (struct nd_opt_hdr *)ndopts.nd_opts_pi;
	pt <= (struct nd_opt_hdr *)ndopts.nd_opts_pi_end;
	pt = (struct nd_opt_hdr *)((caddr_t)pt +
	(pt->nd_opt_len << 3))) {
	if (pt->nd_opt_type != ND_OPT_PREFIX_INFORMATION)
	continue;
	pi = (struct nd_opt_prefix_info *)pt;

	if (pi->nd_opt_pi_len != 4) {
	nd6log((LOG_INFO,
	"nd6_ra_input: invalid option "
	"len %d for prefix information option, "
	"ignored\n", pi->nd_opt_pi_len));
	continue;
	}

	if (128 < pi->nd_opt_pi_prefix_len) {
	nd6log((LOG_INFO,
	"nd6_ra_input: invalid prefix "
	"len %d for prefix information option, "
	"ignored\n", pi->nd_opt_pi_prefix_len));
	continue;
	}

	if (IN6_IS_ADDR_MULTICAST(&pi->nd_opt_pi_prefix)
	\|\| IN6_IS_ADDR_LINKLOCAL(&pi->nd_opt_pi_prefix)) {
	nd6log((LOG_INFO,
	"nd6_ra_input: invalid prefix "
	"%s, ignored\n",
	ip6_sprintf(ip6bufs,
	&pi->nd_opt_pi_prefix)));
	continue;
	}

	bzero(&pr, sizeof(pr));
	pr.ndpr_prefix.sin6_family = AF_INET6;
	pr.ndpr_prefix.sin6_len = sizeof(pr.ndpr_prefix);
	pr.ndpr_prefix.sin6_addr = pi->nd_opt_pi_prefix;
	pr.ndpr_ifp = (struct ifnet *)m->m_pkthdr.rcvif;

	pr.ndpr_raf_onlink = (pi->nd_opt_pi_flags_reserved &
	ND_OPT_PI_FLAG_ONLINK) ? 1 : 0;
	pr.ndpr_raf_auto = (pi->nd_opt_pi_flags_reserved &
	ND_OPT_PI_FLAG_AUTO) ? 1 : 0;
	pr.ndpr_plen = pi->nd_opt_pi_prefix_len;
	pr.ndpr_vltime = ntohl(pi->nd_opt_pi_valid_time);
	pr.ndpr_pltime = ntohl(pi->nd_opt_pi_preferred_time);
	(void)prelist_update(&pr, dr, m, mcast);
	}
	}

	/*
	* MTU
	*/
	if (ndopts.nd_opts_mtu && ndopts.nd_opts_mtu->nd_opt_mtu_len == 1) {
	u_long mtu;
	u_long maxmtu;

	mtu = (u_long)ntohl(ndopts.nd_opts_mtu->nd_opt_mtu_mtu);

	/* lower bound */
	if (mtu < IPV6_MMTU) {
	nd6log((LOG_INFO, "nd6_ra_input: bogus mtu option "
	"mtu=%lu sent from %s, ignoring\n",
	mtu, ip6_sprintf(ip6bufs, &ip6->ip6_src)));
	goto skip;
	}

	/* upper bound */
	maxmtu = (ndi->maxmtu && ndi->maxmtu < ifp->if_mtu)
	? ndi->maxmtu : ifp->if_mtu;
	if (mtu <= maxmtu) {
	int change = (ndi->linkmtu != mtu);

	ndi->linkmtu = mtu;
	if (change) /* in6_maxmtu may change */
	in6_setmaxmtu();
	} else {
	nd6log((LOG_INFO, "nd6_ra_input: bogus mtu "
	"mtu=%lu sent from %s; "
	"exceeds maxmtu %lu, ignoring\n",
	mtu, ip6_sprintf(ip6bufs, &ip6->ip6_src), maxmtu));
	}
	}

	skip:

	/*
	* Source link layer address
	*/
	{
	char *lladdr = NULL;
	int lladdrlen = 0;

	if (ndopts.nd_opts_src_lladdr) {
	lladdr = (char *)(ndopts.nd_opts_src_lladdr + 1);
	lladdrlen = ndopts.nd_opts_src_lladdr->nd_opt_len << 3;
	}

	if (lladdr && ((ifp->if_addrlen + 2 + 7) & ~7) != lladdrlen) {
	nd6log((LOG_INFO,
	"nd6_ra_input: lladdrlen mismatch for %s "
	"(if %d, RA packet %d)\n", ip6_sprintf(ip6bufs, &saddr6),
	ifp->if_addrlen, lladdrlen - 2));
	goto bad;
	}

	- IF_AFDATA_LOCK(ifp);
	nd6_cache_lladdr(ifp, &saddr6, lladdr,
	lladdrlen, ND_ROUTER_ADVERT, 0);
	- IF_AFDATA_UNLOCK(ifp);

	/*
	* Installing a link-layer address might change the state of the
	* router's neighbor cache, which might also affect our on-link
	* detection of adveritsed prefixes.
	*/
	pfxlist_onlink_check();
	}

	freeit:
	m_freem(m);
	return;

	bad:
	V_icmp6stat.icp6s_badra++;
	m_freem(m);
	}

	/*
	* default router list proccessing sub routines
	*/

	/* tell the change to user processes watching the routing socket. */
	static void
	nd6_rtmsg(int cmd, struct rtentry *rt)
	{
	struct rt_addrinfo info;

	bzero((caddr_t)&info, sizeof(info));
	info.rti_info[RTAX_DST] = rt_key(rt);
	info.rti_info[RTAX_GATEWAY] = rt->rt_gateway;
	info.rti_info[RTAX_NETMASK] = rt_mask(rt);
	if (rt->rt_ifp) {
	info.rti_info[RTAX_IFP] =
	TAILQ_FIRST(&rt->rt_ifp->if_addrlist)->ifa_addr;
	info.rti_info[RTAX_IFA] = rt->rt_ifa->ifa_addr;
	}

	rt_missmsg(cmd, &info, rt->rt_flags, 0);
	}

	void
	defrouter_addreq(struct nd_defrouter *new)
	{
	struct sockaddr_in6 def, mask, gate;
	struct rtentry *newrt = NULL;
	int s;
	int error;

	bzero(&def, sizeof(def));
	bzero(&mask, sizeof(mask));
	bzero(&gate, sizeof(gate));

	def.sin6_len = mask.sin6_len = gate.sin6_len =
	sizeof(struct sockaddr_in6);
	def.sin6_family = gate.sin6_family = AF_INET6;
	gate.sin6_addr = new->rtaddr;

	s = splnet();
	error = rtrequest(RTM_ADD, (struct sockaddr *)&def,
	(struct sockaddr )&gate, (struct sockaddr )&mask,
	RTF_GATEWAY, &newrt);
	if (newrt) {
	- RT_LOCK(newrt);
	nd6_rtmsg(RTM_ADD, newrt); /* tell user process */
	- RT_REMREF(newrt);
	- RT_UNLOCK(newrt);
	+ RTFREE(newrt);
	}
	if (error == 0)
	new->installed = 1;
	splx(s);
	return;
	}

	struct nd_defrouter *
	defrouter_lookup(struct in6_addr addr, struct ifnet ifp)
	{
	INIT_VNET_INET6(ifp->if_vnet);
	struct nd_defrouter *dr;

	for (dr = TAILQ_FIRST(&V_nd_defrouter); dr;
	dr = TAILQ_NEXT(dr, dr_entry)) {
	if (dr->ifp == ifp && IN6_ARE_ADDR_EQUAL(addr, &dr->rtaddr))
	return (dr);
	}

	return (NULL); /* search failed */
	}

	/*
	* Remove the default route for a given router.
	* This is just a subroutine function for defrouter_select(), and should
	* not be called from anywhere else.
	*/
	static void
	defrouter_delreq(struct nd_defrouter *dr)
	{
	struct sockaddr_in6 def, mask, gate;
	struct rtentry *oldrt = NULL;

	bzero(&def, sizeof(def));
	bzero(&mask, sizeof(mask));
	bzero(&gate, sizeof(gate));

	def.sin6_len = mask.sin6_len = gate.sin6_len =
	sizeof(struct sockaddr_in6);
	def.sin6_family = gate.sin6_family = AF_INET6;
	gate.sin6_addr = dr->rtaddr;

	rtrequest(RTM_DELETE, (struct sockaddr *)&def,
	(struct sockaddr *)&gate,
	(struct sockaddr *)&mask, RTF_GATEWAY, &oldrt);
	if (oldrt) {
	nd6_rtmsg(RTM_DELETE, oldrt);
	RTFREE(oldrt);
	}

	dr->installed = 0;
	}

	/*
	* remove all default routes from default router list
	*/
	void
	defrouter_reset(void)
	{
	INIT_VNET_INET6(curvnet);
	struct nd_defrouter *dr;

	for (dr = TAILQ_FIRST(&V_nd_defrouter); dr;
	dr = TAILQ_NEXT(dr, dr_entry))
	defrouter_delreq(dr);

	/*
	* XXX should we also nuke any default routers in the kernel, by
	* going through them by rtalloc1()?
	*/
	}

	void
	defrtrlist_del(struct nd_defrouter *dr)
	{
	INIT_VNET_INET6(curvnet);
	struct nd_defrouter *deldr = NULL;
	struct nd_prefix *pr;

	/*
	* Flush all the routing table entries that use the router
	* as a next hop.
	*/
	if (!V_ip6_forwarding && V_ip6_accept_rtadv) /* XXX: better condition? */
	rt6_flush(&dr->rtaddr, dr->ifp);

	if (dr->installed) {
	deldr = dr;
	defrouter_delreq(dr);
	}
	TAILQ_REMOVE(&V_nd_defrouter, dr, dr_entry);

	/*
	* Also delete all the pointers to the router in each prefix lists.
	*/
	for (pr = V_nd_prefix.lh_first; pr; pr = pr->ndpr_next) {
	struct nd_pfxrouter *pfxrtr;
	if ((pfxrtr = pfxrtr_lookup(pr, dr)) != NULL)
	pfxrtr_del(pfxrtr);
	}
	pfxlist_onlink_check();

	/*
	* If the router is the primary one, choose a new one.
	* Note that defrouter_select() will remove the current gateway
	* from the routing table.
	*/
	if (deldr)
	defrouter_select();

	free(dr, M_IP6NDP);
	}

	/*
	* Default Router Selection according to Section 6.3.6 of RFC 2461 and
	* draft-ietf-ipngwg-router-selection:
	* 1) Routers that are reachable or probably reachable should be preferred.
	* If we have more than one (probably) reachable router, prefer ones
	* with the highest router preference.
	* 2) When no routers on the list are known to be reachable or
	* probably reachable, routers SHOULD be selected in a round-robin
	* fashion, regardless of router preference values.
	* 3) If the Default Router List is empty, assume that all
	* destinations are on-link.
	*
	* We assume nd_defrouter is sorted by router preference value.
	* Since the code below covers both with and without router preference cases,
	* we do not need to classify the cases by ifdef.
	*
	* At this moment, we do not try to install more than one default router,
	* even when the multipath routing is available, because we're not sure about
	* the benefits for stub hosts comparing to the risk of making the code
	* complicated and the possibility of introducing bugs.
	*/
	void
	defrouter_select(void)
	{
	INIT_VNET_INET6(curvnet);
	int s = splnet();
	struct nd_defrouter dr, selected_dr = NULL, *installed_dr = NULL;
	struct llentry *ln = NULL;

	/*
	* This function should be called only when acting as an autoconfigured
	* host. Although the remaining part of this function is not effective
	* if the node is not an autoconfigured host, we explicitly exclude
	* such cases here for safety.
	*/
	if (V_ip6_forwarding \|\| !V_ip6_accept_rtadv) {
	nd6log((LOG_WARNING,
	"defrouter_select: called unexpectedly (forwarding=%d, "
	"accept_rtadv=%d)\n", V_ip6_forwarding, V_ip6_accept_rtadv));
	splx(s);
	return;
	}

	/*
	* Let's handle easy case (3) first:
	* If default router list is empty, there's nothing to be done.
	*/
	if (!TAILQ_FIRST(&V_nd_defrouter)) {
	splx(s);
	return;
	}

	/*
	* Search for a (probably) reachable router from the list.
	* We just pick up the first reachable one (if any), assuming that
	* the ordering rule of the list described in defrtrlist_update().
	*/
	for (dr = TAILQ_FIRST(&V_nd_defrouter); dr;
	dr = TAILQ_NEXT(dr, dr_entry)) {
	IF_AFDATA_LOCK(dr->ifp);
	if (selected_dr == NULL &&
	(ln = nd6_lookup(&dr->rtaddr, 0, dr->ifp)) &&
	ND6_IS_LLINFO_PROBREACH(ln)) {
	selected_dr = dr;
	}
	IF_AFDATA_UNLOCK(dr->ifp);

	if (dr->installed && installed_dr == NULL)
	installed_dr = dr;
	else if (dr->installed && installed_dr) {
	/* this should not happen. warn for diagnosis. */
	log(LOG_ERR, "defrouter_select: more than one router"
	" is installed\n");
	}
	}
	/*
	* If none of the default routers was found to be reachable,
	* round-robin the list regardless of preference.
	* Otherwise, if we have an installed router, check if the selected
	* (reachable) router should really be preferred to the installed one.
	* We only prefer the new router when the old one is not reachable
	* or when the new one has a really higher preference value.
	*/
	if (selected_dr == NULL) {
	if (installed_dr == NULL \|\| !TAILQ_NEXT(installed_dr, dr_entry))
	selected_dr = TAILQ_FIRST(&V_nd_defrouter);
	else
	selected_dr = TAILQ_NEXT(installed_dr, dr_entry);
	} else if (installed_dr) {
	IF_AFDATA_LOCK(installed_dr->ifp);
	if ((ln = nd6_lookup(&installed_dr->rtaddr, 0, installed_dr->ifp)) &&
	ND6_IS_LLINFO_PROBREACH(ln) &&
	rtpref(selected_dr) <= rtpref(installed_dr)) {
	selected_dr = installed_dr;
	}
	IF_AFDATA_UNLOCK(installed_dr->ifp);
	}

	/*
	* If the selected router is different than the installed one,
	* remove the installed router and install the selected one.
	* Note that the selected router is never NULL here.
	*/
	if (installed_dr != selected_dr) {
	if (installed_dr)
	defrouter_delreq(installed_dr);
	defrouter_addreq(selected_dr);
	}

	splx(s);
	return;
	}

	/*
	* for default router selection
	* regards router-preference field as a 2-bit signed integer
	*/
	static int
	rtpref(struct nd_defrouter *dr)
	{
	switch (dr->flags & ND_RA_FLAG_RTPREF_MASK) {
	case ND_RA_FLAG_RTPREF_HIGH:
	return (RTPREF_HIGH);
	case ND_RA_FLAG_RTPREF_MEDIUM:
	case ND_RA_FLAG_RTPREF_RSV:
	return (RTPREF_MEDIUM);
	case ND_RA_FLAG_RTPREF_LOW:
	return (RTPREF_LOW);
	default:
	/*
	* This case should never happen. If it did, it would mean a
	* serious bug of kernel internal. We thus always bark here.
	* Or, can we even panic?
	*/
	log(LOG_ERR, "rtpref: impossible RA flag %x\n", dr->flags);
	return (RTPREF_INVALID);
	}
	/* NOTREACHED */
	}

	static struct nd_defrouter *
	defrtrlist_update(struct nd_defrouter *new)
	{
	INIT_VNET_INET6(curvnet);
	struct nd_defrouter dr, n;
	int s = splnet();

	if ((dr = defrouter_lookup(&new->rtaddr, new->ifp)) != NULL) {
	/* entry exists */
	if (new->rtlifetime == 0) {
	defrtrlist_del(dr);
	dr = NULL;
	} else {
	int oldpref = rtpref(dr);

	/* override */
	dr->flags = new->flags; /* xxx flag check */
	dr->rtlifetime = new->rtlifetime;
	dr->expire = new->expire;

	/*
	* If the preference does not change, there's no need
	* to sort the entries.
	*/
	if (rtpref(new) == oldpref) {
	splx(s);
	return (dr);
	}

	/*
	* preferred router may be changed, so relocate
	* this router.
	* XXX: calling TAILQ_REMOVE directly is a bad manner.
	* However, since defrtrlist_del() has many side
	* effects, we intentionally do so here.
	* defrouter_select() below will handle routing
	* changes later.
	*/
	TAILQ_REMOVE(&V_nd_defrouter, dr, dr_entry);
	n = dr;
	goto insert;
	}
	splx(s);
	return (dr);
	}

	/* entry does not exist */
	if (new->rtlifetime == 0) {
	splx(s);
	return (NULL);
	}

	n = (struct nd_defrouter )malloc(sizeof(n), M_IP6NDP, M_NOWAIT);
	if (n == NULL) {
	splx(s);
	return (NULL);
	}
	bzero(n, sizeof(*n));
	n = new;

	insert:
	/*
	* Insert the new router in the Default Router List;
	* The Default Router List should be in the descending order
	* of router-preferece. Routers with the same preference are
	* sorted in the arriving time order.
	*/

	/* insert at the end of the group */
	for (dr = TAILQ_FIRST(&V_nd_defrouter); dr;
	dr = TAILQ_NEXT(dr, dr_entry)) {
	if (rtpref(n) > rtpref(dr))
	break;
	}
	if (dr)
	TAILQ_INSERT_BEFORE(dr, n, dr_entry);
	else
	TAILQ_INSERT_TAIL(&V_nd_defrouter, n, dr_entry);

	defrouter_select();

	splx(s);

	return (n);
	}

	static struct nd_pfxrouter *
	pfxrtr_lookup(struct nd_prefix pr, struct nd_defrouter dr)
	{
	struct nd_pfxrouter *search;

	for (search = pr->ndpr_advrtrs.lh_first; search; search = search->pfr_next) {
	if (search->router == dr)
	break;
	}

	return (search);
	}

	static void
	pfxrtr_add(struct nd_prefix pr, struct nd_defrouter dr)
	{
	struct nd_pfxrouter *new;

	new = (struct nd_pfxrouter )malloc(sizeof(new), M_IP6NDP, M_NOWAIT);
	if (new == NULL)
	return;
	bzero(new, sizeof(*new));
	new->router = dr;

	LIST_INSERT_HEAD(&pr->ndpr_advrtrs, new, pfr_entry);

	pfxlist_onlink_check();
	}

	static void
	pfxrtr_del(struct nd_pfxrouter *pfr)
	{
	LIST_REMOVE(pfr, pfr_entry);
	free(pfr, M_IP6NDP);
	}

	struct nd_prefix *
	nd6_prefix_lookup(struct nd_prefixctl *key)
	{
	INIT_VNET_INET6(curvnet);
	struct nd_prefix *search;

	for (search = V_nd_prefix.lh_first;
	search; search = search->ndpr_next) {
	if (key->ndpr_ifp == search->ndpr_ifp &&
	key->ndpr_plen == search->ndpr_plen &&
	in6_are_prefix_equal(&key->ndpr_prefix.sin6_addr,
	&search->ndpr_prefix.sin6_addr, key->ndpr_plen)) {
	break;
	}
	}

	return (search);
	}

	int
	nd6_prelist_add(struct nd_prefixctl pr, struct nd_defrouter dr,
	struct nd_prefix **newp)
	{
	INIT_VNET_INET6(curvnet);
	struct nd_prefix *new = NULL;
	int error = 0;
	int i, s;
	char ip6buf[INET6_ADDRSTRLEN];

	new = (struct nd_prefix )malloc(sizeof(new), M_IP6NDP, M_NOWAIT);
	if (new == NULL)
	return(ENOMEM);
	bzero(new, sizeof(*new));
	new->ndpr_ifp = pr->ndpr_ifp;
	new->ndpr_prefix = pr->ndpr_prefix;
	new->ndpr_plen = pr->ndpr_plen;
	new->ndpr_vltime = pr->ndpr_vltime;
	new->ndpr_pltime = pr->ndpr_pltime;
	new->ndpr_flags = pr->ndpr_flags;
	if ((error = in6_init_prefix_ltimes(new)) != 0) {
	free(new, M_IP6NDP);
	return(error);
	}
	new->ndpr_lastupdate = time_second;
	if (newp != NULL)
	*newp = new;

	/* initialization */
	LIST_INIT(&new->ndpr_advrtrs);
	in6_prefixlen2mask(&new->ndpr_mask, new->ndpr_plen);
	/* make prefix in the canonical form */
	for (i = 0; i < 4; i++)
	new->ndpr_prefix.sin6_addr.s6_addr32[i] &=
	new->ndpr_mask.s6_addr32[i];

	s = splnet();
	/* link ndpr_entry to nd_prefix list */
	LIST_INSERT_HEAD(&V_nd_prefix, new, ndpr_entry);
	splx(s);

	/* ND_OPT_PI_FLAG_ONLINK processing */
	if (new->ndpr_raf_onlink) {
	int e;

	if ((e = nd6_prefix_onlink(new)) != 0) {
	nd6log((LOG_ERR, "nd6_prelist_add: failed to make "
	"the prefix %s/%d on-link on %s (errno=%d)\n",
	ip6_sprintf(ip6buf, &pr->ndpr_prefix.sin6_addr),
	pr->ndpr_plen, if_name(pr->ndpr_ifp), e));
	/* proceed anyway. XXX: is it correct? */
	}
	}

	if (dr)
	pfxrtr_add(new, dr);

	return 0;
	}

	void
	prelist_remove(struct nd_prefix *pr)
	{
	INIT_VNET_INET6(curvnet);
	struct nd_pfxrouter pfr, next;
	int e, s;
	char ip6buf[INET6_ADDRSTRLEN];

	/* make sure to invalidate the prefix until it is really freed. */
	pr->ndpr_vltime = 0;
	pr->ndpr_pltime = 0;

	/*
	* Though these flags are now meaningless, we'd rather keep the value
	* of pr->ndpr_raf_onlink and pr->ndpr_raf_auto not to confuse users
	* when executing "ndp -p".
	*/

	if ((pr->ndpr_stateflags & NDPRF_ONLINK) != 0 &&
	(e = nd6_prefix_offlink(pr)) != 0) {
	nd6log((LOG_ERR, "prelist_remove: failed to make %s/%d offlink "
	"on %s, errno=%d\n",
	ip6_sprintf(ip6buf, &pr->ndpr_prefix.sin6_addr),
	pr->ndpr_plen, if_name(pr->ndpr_ifp), e));
	/* what should we do? */
	}

	if (pr->ndpr_refcnt > 0)
	return; /* notice here? */

	s = splnet();

	/* unlink ndpr_entry from nd_prefix list */
	LIST_REMOVE(pr, ndpr_entry);

	/* free list of routers that adversed the prefix */
	for (pfr = pr->ndpr_advrtrs.lh_first; pfr; pfr = next) {
	next = pfr->pfr_next;

	free(pfr, M_IP6NDP);
	}
	splx(s);

	free(pr, M_IP6NDP);

	pfxlist_onlink_check();
	}

	/*
	* dr - may be NULL
	*/

	static int
	prelist_update(struct nd_prefixctl new, struct nd_defrouter dr,
	struct mbuf *m, int mcast)
	{
	INIT_VNET_INET6(curvnet);
	struct in6_ifaddr ia6 = NULL, ia6_match = NULL;
	struct ifaddr *ifa;
	struct ifnet *ifp = new->ndpr_ifp;
	struct nd_prefix *pr;
	int s = splnet();
	int error = 0;
	int newprefix = 0;
	int auth;
	struct in6_addrlifetime lt6_tmp;
	char ip6buf[INET6_ADDRSTRLEN];

	auth = 0;
	if (m) {
	/*
	* Authenticity for NA consists authentication for
	* both IP header and IP datagrams, doesn't it ?
	*/
	#if defined(M_AUTHIPHDR) && defined(M_AUTHIPDGM)
	auth = ((m->m_flags & M_AUTHIPHDR) &&
	(m->m_flags & M_AUTHIPDGM));
	#endif
	}

	if ((pr = nd6_prefix_lookup(new)) != NULL) {
	/*
	* nd6_prefix_lookup() ensures that pr and new have the same
	* prefix on a same interface.
	*/

	/*
	* Update prefix information. Note that the on-link (L) bit
	* and the autonomous (A) bit should NOT be changed from 1
	* to 0.
	*/
	if (new->ndpr_raf_onlink == 1)
	pr->ndpr_raf_onlink = 1;
	if (new->ndpr_raf_auto == 1)
	pr->ndpr_raf_auto = 1;
	if (new->ndpr_raf_onlink) {
	pr->ndpr_vltime = new->ndpr_vltime;
	pr->ndpr_pltime = new->ndpr_pltime;
	(void)in6_init_prefix_ltimes(pr); /* XXX error case? */
	pr->ndpr_lastupdate = time_second;
	}

	if (new->ndpr_raf_onlink &&
	(pr->ndpr_stateflags & NDPRF_ONLINK) == 0) {
	int e;

	if ((e = nd6_prefix_onlink(pr)) != 0) {
	nd6log((LOG_ERR,
	"prelist_update: failed to make "
	"the prefix %s/%d on-link on %s "
	"(errno=%d)\n",
	ip6_sprintf(ip6buf,
	&pr->ndpr_prefix.sin6_addr),
	pr->ndpr_plen, if_name(pr->ndpr_ifp), e));
	/* proceed anyway. XXX: is it correct? */
	}
	}

	if (dr && pfxrtr_lookup(pr, dr) == NULL)
	pfxrtr_add(pr, dr);
	} else {
	struct nd_prefix *newpr = NULL;

	newprefix = 1;

	if (new->ndpr_vltime == 0)
	goto end;
	if (new->ndpr_raf_onlink == 0 && new->ndpr_raf_auto == 0)
	goto end;

	error = nd6_prelist_add(new, dr, &newpr);
	if (error != 0 \|\| newpr == NULL) {
	nd6log((LOG_NOTICE, "prelist_update: "
	"nd6_prelist_add failed for %s/%d on %s "
	"errno=%d, returnpr=%p\n",
	ip6_sprintf(ip6buf, &new->ndpr_prefix.sin6_addr),
	new->ndpr_plen, if_name(new->ndpr_ifp),
	error, newpr));
	goto end; /* we should just give up in this case. */
	}

	/*
	* XXX: from the ND point of view, we can ignore a prefix
	* with the on-link bit being zero. However, we need a
	* prefix structure for references from autoconfigured
	* addresses. Thus, we explicitly make sure that the prefix
	* itself expires now.
	*/
	if (newpr->ndpr_raf_onlink == 0) {
	newpr->ndpr_vltime = 0;
	newpr->ndpr_pltime = 0;
	in6_init_prefix_ltimes(newpr);
	}

	pr = newpr;
	}

	/*
	* Address autoconfiguration based on Section 5.5.3 of RFC 2462.
	* Note that pr must be non NULL at this point.
	*/

	/* 5.5.3 (a). Ignore the prefix without the A bit set. */
	if (!new->ndpr_raf_auto)
	goto end;

	/*
	* 5.5.3 (b). the link-local prefix should have been ignored in
	* nd6_ra_input.
	*/

	/* 5.5.3 (c). Consistency check on lifetimes: pltime <= vltime. */
	if (new->ndpr_pltime > new->ndpr_vltime) {
	error = EINVAL; /* XXX: won't be used */
	goto end;
	}

	/*
	* 5.5.3 (d). If the prefix advertised is not equal to the prefix of
	* an address configured by stateless autoconfiguration already in the
	* list of addresses associated with the interface, and the Valid
	* Lifetime is not 0, form an address. We first check if we have
	* a matching prefix.
	* Note: we apply a clarification in rfc2462bis-02 here. We only
	* consider autoconfigured addresses while RFC2462 simply said
	* "address".
	*/
	TAILQ_FOREACH(ifa, &ifp->if_addrlist, ifa_list) {
	struct in6_ifaddr *ifa6;
	u_int32_t remaininglifetime;

	if (ifa->ifa_addr->sa_family != AF_INET6)
	continue;

	ifa6 = (struct in6_ifaddr *)ifa;

	/*
	* We only consider autoconfigured addresses as per rfc2462bis.
	*/
	if (!(ifa6->ia6_flags & IN6_IFF_AUTOCONF))
	continue;

	/*
	* Spec is not clear here, but I believe we should concentrate
	* on unicast (i.e. not anycast) addresses.
	* XXX: other ia6_flags? detached or duplicated?
	*/
	if ((ifa6->ia6_flags & IN6_IFF_ANYCAST) != 0)
	continue;

	/*
	* Ignore the address if it is not associated with a prefix
	* or is associated with a prefix that is different from this
	* one. (pr is never NULL here)
	*/
	if (ifa6->ia6_ndpr != pr)
	continue;

	if (ia6_match == NULL) /* remember the first one */
	ia6_match = ifa6;

	/*
	* An already autoconfigured address matched. Now that we
	* are sure there is at least one matched address, we can
	* proceed to 5.5.3. (e): update the lifetimes according to the
	* "two hours" rule and the privacy extension.
	* We apply some clarifications in rfc2462bis:
	* - use remaininglifetime instead of storedlifetime as a
	* variable name
	* - remove the dead code in the "two-hour" rule
	*/
	#define TWOHOUR (120*60)
	lt6_tmp = ifa6->ia6_lifetime;

	if (lt6_tmp.ia6t_vltime == ND6_INFINITE_LIFETIME)
	remaininglifetime = ND6_INFINITE_LIFETIME;
	else if (time_second - ifa6->ia6_updatetime >
	lt6_tmp.ia6t_vltime) {
	/*
	* The case of "invalid" address. We should usually
	* not see this case.
	*/
	remaininglifetime = 0;
	} else
	remaininglifetime = lt6_tmp.ia6t_vltime -
	(time_second - ifa6->ia6_updatetime);

	/* when not updating, keep the current stored lifetime. */
	lt6_tmp.ia6t_vltime = remaininglifetime;

	if (TWOHOUR < new->ndpr_vltime \|\|
	remaininglifetime < new->ndpr_vltime) {
	lt6_tmp.ia6t_vltime = new->ndpr_vltime;
	} else if (remaininglifetime <= TWOHOUR) {
	if (auth) {
	lt6_tmp.ia6t_vltime = new->ndpr_vltime;
	}
	} else {
	/*
	* new->ndpr_vltime <= TWOHOUR &&
	* TWOHOUR < remaininglifetime
	*/
	lt6_tmp.ia6t_vltime = TWOHOUR;
	}

	/* The 2 hour rule is not imposed for preferred lifetime. */
	lt6_tmp.ia6t_pltime = new->ndpr_pltime;

	in6_init_address_ltimes(pr, &lt6_tmp);

	/*
	* We need to treat lifetimes for temporary addresses
	* differently, according to
	* draft-ietf-ipv6-privacy-addrs-v2-01.txt 3.3 (1);
	* we only update the lifetimes when they are in the maximum
	* intervals.
	*/
	if ((ifa6->ia6_flags & IN6_IFF_TEMPORARY) != 0) {
	u_int32_t maxvltime, maxpltime;

	if (V_ip6_temp_valid_lifetime >
	(u_int32_t)((time_second - ifa6->ia6_createtime) +
	V_ip6_desync_factor)) {
	maxvltime = V_ip6_temp_valid_lifetime -
	(time_second - ifa6->ia6_createtime) -
	V_ip6_desync_factor;
	} else
	maxvltime = 0;
	if (V_ip6_temp_preferred_lifetime >
	(u_int32_t)((time_second - ifa6->ia6_createtime) +
	V_ip6_desync_factor)) {
	maxpltime = V_ip6_temp_preferred_lifetime -
	(time_second - ifa6->ia6_createtime) -
	V_ip6_desync_factor;
	} else
	maxpltime = 0;

	if (lt6_tmp.ia6t_vltime == ND6_INFINITE_LIFETIME \|\|
	lt6_tmp.ia6t_vltime > maxvltime) {
	lt6_tmp.ia6t_vltime = maxvltime;
	}
	if (lt6_tmp.ia6t_pltime == ND6_INFINITE_LIFETIME \|\|
	lt6_tmp.ia6t_pltime > maxpltime) {
	lt6_tmp.ia6t_pltime = maxpltime;
	}
	}
	ifa6->ia6_lifetime = lt6_tmp;
	ifa6->ia6_updatetime = time_second;
	}
	if (ia6_match == NULL && new->ndpr_vltime) {
	int ifidlen;

	/*
	* 5.5.3 (d) (continued)
	* No address matched and the valid lifetime is non-zero.
	* Create a new address.
	*/

	/*
	* Prefix Length check:
	* If the sum of the prefix length and interface identifier
	* length does not equal 128 bits, the Prefix Information
	* option MUST be ignored. The length of the interface
	* identifier is defined in a separate link-type specific
	* document.
	*/
	ifidlen = in6_if2idlen(ifp);
	if (ifidlen < 0) {
	/* this should not happen, so we always log it. */
	log(LOG_ERR, "prelist_update: IFID undefined (%s)\n",
	if_name(ifp));
	goto end;
	}
	if (ifidlen + pr->ndpr_plen != 128) {
	nd6log((LOG_INFO,
	"prelist_update: invalid prefixlen "
	"%d for %s, ignored\n",
	pr->ndpr_plen, if_name(ifp)));
	goto end;
	}

	if ((ia6 = in6_ifadd(new, mcast)) != NULL) {
	/*
	* note that we should use pr (not new) for reference.
	*/
	pr->ndpr_refcnt++;
	ia6->ia6_ndpr = pr;

	/*
	* RFC 3041 3.3 (2).
	* When a new public address is created as described
	* in RFC2462, also create a new temporary address.
	*
	* RFC 3041 3.5.
	* When an interface connects to a new link, a new
	* randomized interface identifier should be generated
	* immediately together with a new set of temporary
	* addresses. Thus, we specifiy 1 as the 2nd arg of
	* in6_tmpifadd().
	*/
	if (V_ip6_use_tempaddr) {
	int e;
	if ((e = in6_tmpifadd(ia6, 1, 1)) != 0) {
	nd6log((LOG_NOTICE, "prelist_update: "
	"failed to create a temporary "
	"address, errno=%d\n",
	e));
	}
	}

	/*
	* A newly added address might affect the status
	* of other addresses, so we check and update it.
	* XXX: what if address duplication happens?
	*/
	pfxlist_onlink_check();
	} else {
	/* just set an error. do not bark here. */
	error = EADDRNOTAVAIL; /* XXX: might be unused. */
	}
	}

	end:
	splx(s);
	return error;
	}

	/*
	* A supplement function used in the on-link detection below;
	* detect if a given prefix has a (probably) reachable advertising router.
	* XXX: lengthy function name...
	*/
	static struct nd_pfxrouter *
	find_pfxlist_reachable_router(struct nd_prefix *pr)
	{
	struct nd_pfxrouter *pfxrtr;
	struct llentry *ln;

	for (pfxrtr = LIST_FIRST(&pr->ndpr_advrtrs); pfxrtr;
	pfxrtr = LIST_NEXT(pfxrtr, pfr_entry)) {
	IF_AFDATA_LOCK(pfxrtr->router->ifp);
	if ((ln = nd6_lookup(&pfxrtr->router->rtaddr, 0,
	pfxrtr->router->ifp)) &&
	ND6_IS_LLINFO_PROBREACH(ln)) {
	IF_AFDATA_UNLOCK(pfxrtr->router->ifp);
	break; /* found */
	}
	IF_AFDATA_UNLOCK(pfxrtr->router->ifp);
	}
	return (pfxrtr);
	}

	/*
	* Check if each prefix in the prefix list has at least one available router
	* that advertised the prefix (a router is "available" if its neighbor cache
	* entry is reachable or probably reachable).
	* If the check fails, the prefix may be off-link, because, for example,
	* we have moved from the network but the lifetime of the prefix has not
	* expired yet. So we should not use the prefix if there is another prefix
	* that has an available router.
	* But, if there is no prefix that has an available router, we still regards
	* all the prefixes as on-link. This is because we can't tell if all the
	* routers are simply dead or if we really moved from the network and there
	* is no router around us.
	*/
	void
	pfxlist_onlink_check()
	{
	INIT_VNET_INET6(curvnet);
	struct nd_prefix *pr;
	struct in6_ifaddr *ifa;
	struct nd_defrouter *dr;
	struct nd_pfxrouter *pfxrtr = NULL;

	/*
	* Check if there is a prefix that has a reachable advertising
	* router.
	*/
	for (pr = V_nd_prefix.lh_first; pr; pr = pr->ndpr_next) {
	if (pr->ndpr_raf_onlink && find_pfxlist_reachable_router(pr))
	break;
	}

	/*
	* If we have no such prefix, check whether we still have a router
	* that does not advertise any prefixes.
	*/
	if (pr == NULL) {
	for (dr = TAILQ_FIRST(&V_nd_defrouter); dr;
	dr = TAILQ_NEXT(dr, dr_entry)) {
	struct nd_prefix *pr0;

	for (pr0 = V_nd_prefix.lh_first; pr0;
	pr0 = pr0->ndpr_next) {
	if ((pfxrtr = pfxrtr_lookup(pr0, dr)) != NULL)
	break;
	}
	if (pfxrtr != NULL)
	break;
	}
	}
	if (pr != NULL \|\| (TAILQ_FIRST(&V_nd_defrouter) && pfxrtr == NULL)) {
	/*
	* There is at least one prefix that has a reachable router,
	* or at least a router which probably does not advertise
	* any prefixes. The latter would be the case when we move
	* to a new link where we have a router that does not provide
	* prefixes and we configure an address by hand.
	* Detach prefixes which have no reachable advertising
	* router, and attach other prefixes.
	*/
	for (pr = V_nd_prefix.lh_first; pr; pr = pr->ndpr_next) {
	/* XXX: a link-local prefix should never be detached */
	if (IN6_IS_ADDR_LINKLOCAL(&pr->ndpr_prefix.sin6_addr))
	continue;

	/*
	* we aren't interested in prefixes without the L bit
	* set.
	*/
	if (pr->ndpr_raf_onlink == 0)
	continue;

	if ((pr->ndpr_stateflags & NDPRF_DETACHED) == 0 &&
	find_pfxlist_reachable_router(pr) == NULL)
	pr->ndpr_stateflags \|= NDPRF_DETACHED;
	if ((pr->ndpr_stateflags & NDPRF_DETACHED) != 0 &&
	find_pfxlist_reachable_router(pr) != 0)
	pr->ndpr_stateflags &= ~NDPRF_DETACHED;
	}
	} else {
	/* there is no prefix that has a reachable router */
	for (pr = V_nd_prefix.lh_first; pr; pr = pr->ndpr_next) {
	if (IN6_IS_ADDR_LINKLOCAL(&pr->ndpr_prefix.sin6_addr))
	continue;

	if (pr->ndpr_raf_onlink == 0)
	continue;

	if ((pr->ndpr_stateflags & NDPRF_DETACHED) != 0)
	pr->ndpr_stateflags &= ~NDPRF_DETACHED;
	}
	}

	/*
	* Remove each interface route associated with a (just) detached
	* prefix, and reinstall the interface route for a (just) attached
	* prefix. Note that all attempt of reinstallation does not
	* necessarily success, when a same prefix is shared among multiple
	* interfaces. Such cases will be handled in nd6_prefix_onlink,
	* so we don't have to care about them.
	*/
	for (pr = V_nd_prefix.lh_first; pr; pr = pr->ndpr_next) {
	int e;
	char ip6buf[INET6_ADDRSTRLEN];

	if (IN6_IS_ADDR_LINKLOCAL(&pr->ndpr_prefix.sin6_addr))
	continue;

	if (pr->ndpr_raf_onlink == 0)
	continue;

	if ((pr->ndpr_stateflags & NDPRF_DETACHED) != 0 &&
	(pr->ndpr_stateflags & NDPRF_ONLINK) != 0) {
	if ((e = nd6_prefix_offlink(pr)) != 0) {
	nd6log((LOG_ERR,
	"pfxlist_onlink_check: failed to "
	"make %s/%d offlink, errno=%d\n",
	ip6_sprintf(ip6buf,
	&pr->ndpr_prefix.sin6_addr),
	pr->ndpr_plen, e));
	}
	}
	if ((pr->ndpr_stateflags & NDPRF_DETACHED) == 0 &&
	(pr->ndpr_stateflags & NDPRF_ONLINK) == 0 &&
	pr->ndpr_raf_onlink) {
	if ((e = nd6_prefix_onlink(pr)) != 0) {
	nd6log((LOG_ERR,
	"pfxlist_onlink_check: failed to "
	"make %s/%d onlink, errno=%d\n",
	ip6_sprintf(ip6buf,
	&pr->ndpr_prefix.sin6_addr),
	pr->ndpr_plen, e));
	}
	}
	}

	/*
	* Changes on the prefix status might affect address status as well.
	* Make sure that all addresses derived from an attached prefix are
	* attached, and that all addresses derived from a detached prefix are
	* detached. Note, however, that a manually configured address should
	* always be attached.
	* The precise detection logic is same as the one for prefixes.
	*/
	for (ifa = V_in6_ifaddr; ifa; ifa = ifa->ia_next) {
	if (!(ifa->ia6_flags & IN6_IFF_AUTOCONF))
	continue;

	if (ifa->ia6_ndpr == NULL) {
	/*
	* This can happen when we first configure the address
	* (i.e. the address exists, but the prefix does not).
	* XXX: complicated relationships...
	*/
	continue;
	}

	if (find_pfxlist_reachable_router(ifa->ia6_ndpr))
	break;
	}
	if (ifa) {
	for (ifa = V_in6_ifaddr; ifa; ifa = ifa->ia_next) {
	if ((ifa->ia6_flags & IN6_IFF_AUTOCONF) == 0)
	continue;

	if (ifa->ia6_ndpr == NULL) /* XXX: see above. */
	continue;

	if (find_pfxlist_reachable_router(ifa->ia6_ndpr)) {
	if (ifa->ia6_flags & IN6_IFF_DETACHED) {
	ifa->ia6_flags &= ~IN6_IFF_DETACHED;
	ifa->ia6_flags \|= IN6_IFF_TENTATIVE;
	nd6_dad_start((struct ifaddr *)ifa, 0);
	}
	} else {
	ifa->ia6_flags \|= IN6_IFF_DETACHED;
	}
	}
	}
	else {
	for (ifa = V_in6_ifaddr; ifa; ifa = ifa->ia_next) {
	if ((ifa->ia6_flags & IN6_IFF_AUTOCONF) == 0)
	continue;

	if (ifa->ia6_flags & IN6_IFF_DETACHED) {
	ifa->ia6_flags &= ~IN6_IFF_DETACHED;
	ifa->ia6_flags \|= IN6_IFF_TENTATIVE;
	/* Do we need a delay in this case? */
	nd6_dad_start((struct ifaddr *)ifa, 0);
	}
	}
	}
	}

	int
	nd6_prefix_onlink(struct nd_prefix *pr)
	{
	INIT_VNET_INET6(curvnet);
	struct ifaddr *ifa;
	struct ifnet *ifp = pr->ndpr_ifp;
	struct sockaddr_in6 mask6;
	struct nd_prefix *opr;
	u_long rtflags;
	int error = 0;
	+ struct radix_node_head *rnh;
	struct rtentry *rt = NULL;
	char ip6buf[INET6_ADDRSTRLEN];
	struct sockaddr_dl null_sdl = {sizeof(null_sdl), AF_LINK};

	/* sanity check */
	if ((pr->ndpr_stateflags & NDPRF_ONLINK) != 0) {
	nd6log((LOG_ERR,
	"nd6_prefix_onlink: %s/%d is already on-link\n",
	ip6_sprintf(ip6buf, &pr->ndpr_prefix.sin6_addr),
	pr->ndpr_plen));
	return (EEXIST);
	}

	/*
	* Add the interface route associated with the prefix. Before
	* installing the route, check if there's the same prefix on another
	* interface, and the prefix has already installed the interface route.
	* Although such a configuration is expected to be rare, we explicitly
	* allow it.
	*/
	for (opr = V_nd_prefix.lh_first; opr; opr = opr->ndpr_next) {
	if (opr == pr)
	continue;

	if ((opr->ndpr_stateflags & NDPRF_ONLINK) == 0)
	continue;

	if (opr->ndpr_plen == pr->ndpr_plen &&
	in6_are_prefix_equal(&pr->ndpr_prefix.sin6_addr,
	&opr->ndpr_prefix.sin6_addr, pr->ndpr_plen))
	return (0);
	}

	/*
	* We prefer link-local addresses as the associated interface address.
	*/
	/* search for a link-local addr */
	ifa = (struct ifaddr *)in6ifa_ifpforlinklocal(ifp,
	IN6_IFF_NOTREADY \| IN6_IFF_ANYCAST);
	if (ifa == NULL) {
	/* XXX: freebsd does not have ifa_ifwithaf */
	TAILQ_FOREACH(ifa, &ifp->if_addrlist, ifa_list) {
	if (ifa->ifa_addr->sa_family == AF_INET6)
	break;
	}
	/* should we care about ia6_flags? */
	}
	if (ifa == NULL) {
	/*
	* This can still happen, when, for example, we receive an RA
	* containing a prefix with the L bit set and the A bit clear,
	* after removing all IPv6 addresses on the receiving
	* interface. This should, of course, be rare though.
	*/
	nd6log((LOG_NOTICE,
	"nd6_prefix_onlink: failed to find any ifaddr"
	" to add route for a prefix(%s/%d) on %s\n",
	ip6_sprintf(ip6buf, &pr->ndpr_prefix.sin6_addr),
	pr->ndpr_plen, if_name(ifp)));
	return (0);
	}

	/*
	* in6_ifinit() sets nd6_rtrequest to ifa_rtrequest for all ifaddrs.
	* ifa->ifa_rtrequest = nd6_rtrequest;
	*/
	bzero(&mask6, sizeof(mask6));
	mask6.sin6_len = sizeof(mask6);
	mask6.sin6_addr = pr->ndpr_mask;
	rtflags = ifa->ifa_flags \| RTF_CLONING \| RTF_UP;
	if (nd6_need_cache(ifp)) {
	/* explicitly set in case ifa_flags does not set the flag. */
	rtflags \|= RTF_CLONING;
	} else {
	/*
	* explicitly clear the cloning bit in case ifa_flags sets it.
	*/
	rtflags &= ~RTF_CLONING;
	}
	error = rtrequest(RTM_ADD, (struct sockaddr *)&pr->ndpr_prefix,
	ifa->ifa_addr, (struct sockaddr *)&mask6, rtflags, &rt);
	if (error == 0) {
	if (rt != NULL) /* this should be non NULL, though */ {
	+ rnh = V_rt_tables[rt->rt_fibnum][AF_INET6];
	+ RADIX_NODE_HEAD_LOCK(rnh);
	RT_LOCK(rt);
	if (!rt_setgate(rt, rt_key(rt), (struct sockaddr *)&null_sdl)) {
	((struct sockaddr_dl *)rt->rt_gateway)->sdl_type =
	rt->rt_ifp->if_type;
	((struct sockaddr_dl *)rt->rt_gateway)->sdl_index =
	rt->rt_ifp->if_index;
	}
	+ RADIX_NODE_HEAD_UNLOCK(rnh);
	nd6_rtmsg(RTM_ADD, rt);
	RT_UNLOCK(rt);
	}
	pr->ndpr_stateflags \|= NDPRF_ONLINK;
	} else {
	char ip6bufg[INET6_ADDRSTRLEN], ip6bufm[INET6_ADDRSTRLEN];
	nd6log((LOG_ERR, "nd6_prefix_onlink: failed to add route for a"
	" prefix (%s/%d) on %s, gw=%s, mask=%s, flags=%lx "
	"errno = %d\n",
	ip6_sprintf(ip6buf, &pr->ndpr_prefix.sin6_addr),
	pr->ndpr_plen, if_name(ifp),
	ip6_sprintf(ip6bufg, &((struct sockaddr_in6 *)ifa->ifa_addr)->sin6_addr),
	ip6_sprintf(ip6bufm, &mask6.sin6_addr), rtflags, error));
	}

	if (rt != NULL) {
	RT_LOCK(rt);
	RT_REMREF(rt);
	RT_UNLOCK(rt);
	}

	return (error);
	}

	int
	nd6_prefix_offlink(struct nd_prefix *pr)
	{
	INIT_VNET_INET6(curvnet);
	int error = 0;
	struct ifnet *ifp = pr->ndpr_ifp;
	struct nd_prefix *opr;
	struct sockaddr_in6 sa6, mask6;
	struct rtentry *rt = NULL;
	char ip6buf[INET6_ADDRSTRLEN];

	/* sanity check */
	if ((pr->ndpr_stateflags & NDPRF_ONLINK) == 0) {
	nd6log((LOG_ERR,
	"nd6_prefix_offlink: %s/%d is already off-link\n",
	ip6_sprintf(ip6buf, &pr->ndpr_prefix.sin6_addr),
	pr->ndpr_plen));
	return (EEXIST);
	}

	bzero(&sa6, sizeof(sa6));
	sa6.sin6_family = AF_INET6;
	sa6.sin6_len = sizeof(sa6);
	bcopy(&pr->ndpr_prefix.sin6_addr, &sa6.sin6_addr,
	sizeof(struct in6_addr));
	bzero(&mask6, sizeof(mask6));
	mask6.sin6_family = AF_INET6;
	mask6.sin6_len = sizeof(sa6);
	bcopy(&pr->ndpr_mask, &mask6.sin6_addr, sizeof(struct in6_addr));
	error = rtrequest(RTM_DELETE, (struct sockaddr *)&sa6, NULL,
	(struct sockaddr *)&mask6, 0, &rt);
	if (error == 0) {
	pr->ndpr_stateflags &= ~NDPRF_ONLINK;

	/* report the route deletion to the routing socket. */
	if (rt != NULL)
	nd6_rtmsg(RTM_DELETE, rt);

	/*
	* There might be the same prefix on another interface,
	* the prefix which could not be on-link just because we have
	* the interface route (see comments in nd6_prefix_onlink).
	* If there's one, try to make the prefix on-link on the
	* interface.
	*/
	for (opr = V_nd_prefix.lh_first; opr; opr = opr->ndpr_next) {
	if (opr == pr)
	continue;

	if ((opr->ndpr_stateflags & NDPRF_ONLINK) != 0)
	continue;

	/*
	* KAME specific: detached prefixes should not be
	* on-link.
	*/
	if ((opr->ndpr_stateflags & NDPRF_DETACHED) != 0)
	continue;

	if (opr->ndpr_plen == pr->ndpr_plen &&
	in6_are_prefix_equal(&pr->ndpr_prefix.sin6_addr,
	&opr->ndpr_prefix.sin6_addr, pr->ndpr_plen)) {
	int e;

	if ((e = nd6_prefix_onlink(opr)) != 0) {
	nd6log((LOG_ERR,
	"nd6_prefix_offlink: failed to "
	"recover a prefix %s/%d from %s "
	"to %s (errno = %d)\n",
	ip6_sprintf(ip6buf,
	&opr->ndpr_prefix.sin6_addr),
	opr->ndpr_plen, if_name(ifp),
	if_name(opr->ndpr_ifp), e));
	}
	}
	}
	} else {
	/* XXX: can we still set the NDPRF_ONLINK flag? */
	nd6log((LOG_ERR,
	"nd6_prefix_offlink: failed to delete route: "
	"%s/%d on %s (errno = %d)\n",
	ip6_sprintf(ip6buf, &sa6.sin6_addr), pr->ndpr_plen,
	if_name(ifp), error));
	}

	if (rt != NULL) {
	RTFREE(rt);
	}

	return (error);
	}

	static struct in6_ifaddr *
	in6_ifadd(struct nd_prefixctl *pr, int mcast)
	{
	INIT_VNET_INET6(curvnet);
	struct ifnet *ifp = pr->ndpr_ifp;
	struct ifaddr *ifa;
	struct in6_aliasreq ifra;
	struct in6_ifaddr ia, ib;
	int error, plen0;
	struct in6_addr mask;
	int prefixlen = pr->ndpr_plen;
	int updateflags;
	char ip6buf[INET6_ADDRSTRLEN];

	in6_prefixlen2mask(&mask, prefixlen);

	/*
	* find a link-local address (will be interface ID).
	* Is it really mandatory? Theoretically, a global or a site-local
	* address can be configured without a link-local address, if we
	* have a unique interface identifier...
	*
	* it is not mandatory to have a link-local address, we can generate
	* interface identifier on the fly. we do this because:
	* (1) it should be the easiest way to find interface identifier.
	* (2) RFC2462 5.4 suggesting the use of the same interface identifier
	* for multiple addresses on a single interface, and possible shortcut
	* of DAD. we omitted DAD for this reason in the past.
	* (3) a user can prevent autoconfiguration of global address
	* by removing link-local address by hand (this is partly because we
	* don't have other way to control the use of IPv6 on an interface.
	* this has been our design choice - cf. NRL's "ifconfig auto").
	* (4) it is easier to manage when an interface has addresses
	* with the same interface identifier, than to have multiple addresses
	* with different interface identifiers.
	*/
	ifa = (struct ifaddr )in6ifa_ifpforlinklocal(ifp, 0); / 0 is OK? */
	if (ifa)
	ib = (struct in6_ifaddr *)ifa;
	else
	return NULL;

	/* prefixlen + ifidlen must be equal to 128 */
	plen0 = in6_mask2len(&ib->ia_prefixmask.sin6_addr, NULL);
	if (prefixlen != plen0) {
	nd6log((LOG_INFO, "in6_ifadd: wrong prefixlen for %s "
	"(prefix=%d ifid=%d)\n",
	if_name(ifp), prefixlen, 128 - plen0));
	return NULL;
	}

	/* make ifaddr */

	bzero(&ifra, sizeof(ifra));
	/*
	* in6_update_ifa() does not use ifra_name, but we accurately set it
	* for safety.
	*/
	strncpy(ifra.ifra_name, if_name(ifp), sizeof(ifra.ifra_name));
	ifra.ifra_addr.sin6_family = AF_INET6;
	ifra.ifra_addr.sin6_len = sizeof(struct sockaddr_in6);
	/* prefix */
	ifra.ifra_addr.sin6_addr = pr->ndpr_prefix.sin6_addr;
	ifra.ifra_addr.sin6_addr.s6_addr32[0] &= mask.s6_addr32[0];
	ifra.ifra_addr.sin6_addr.s6_addr32[1] &= mask.s6_addr32[1];
	ifra.ifra_addr.sin6_addr.s6_addr32[2] &= mask.s6_addr32[2];
	ifra.ifra_addr.sin6_addr.s6_addr32[3] &= mask.s6_addr32[3];

	/* interface ID */
	ifra.ifra_addr.sin6_addr.s6_addr32[0] \|=
	(ib->ia_addr.sin6_addr.s6_addr32[0] & ~mask.s6_addr32[0]);
	ifra.ifra_addr.sin6_addr.s6_addr32[1] \|=
	(ib->ia_addr.sin6_addr.s6_addr32[1] & ~mask.s6_addr32[1]);
	ifra.ifra_addr.sin6_addr.s6_addr32[2] \|=
	(ib->ia_addr.sin6_addr.s6_addr32[2] & ~mask.s6_addr32[2]);
	ifra.ifra_addr.sin6_addr.s6_addr32[3] \|=
	(ib->ia_addr.sin6_addr.s6_addr32[3] & ~mask.s6_addr32[3]);

	/* new prefix mask. */
	ifra.ifra_prefixmask.sin6_len = sizeof(struct sockaddr_in6);
	ifra.ifra_prefixmask.sin6_family = AF_INET6;
	bcopy(&mask, &ifra.ifra_prefixmask.sin6_addr,
	sizeof(ifra.ifra_prefixmask.sin6_addr));

	/* lifetimes. */
	ifra.ifra_lifetime.ia6t_vltime = pr->ndpr_vltime;
	ifra.ifra_lifetime.ia6t_pltime = pr->ndpr_pltime;

	/* XXX: scope zone ID? */

	ifra.ifra_flags \|= IN6_IFF_AUTOCONF; /* obey autoconf */

	/*
	* Make sure that we do not have this address already. This should
	* usually not happen, but we can still see this case, e.g., if we
	* have manually configured the exact address to be configured.
	*/
	if (in6ifa_ifpwithaddr(ifp, &ifra.ifra_addr.sin6_addr) != NULL) {
	/* this should be rare enough to make an explicit log */
	log(LOG_INFO, "in6_ifadd: %s is already configured\n",
	ip6_sprintf(ip6buf, &ifra.ifra_addr.sin6_addr));
	return (NULL);
	}

	/*
	* Allocate ifaddr structure, link into chain, etc.
	* If we are going to create a new address upon receiving a multicasted
	* RA, we need to impose a random delay before starting DAD.
	* [draft-ietf-ipv6-rfc2462bis-02.txt, Section 5.4.2]
	*/
	updateflags = 0;
	if (mcast)
	updateflags \|= IN6_IFAUPDATE_DADDELAY;
	if ((error = in6_update_ifa(ifp, &ifra, NULL, updateflags)) != 0) {
	nd6log((LOG_ERR,
	"in6_ifadd: failed to make ifaddr %s on %s (errno=%d)\n",
	ip6_sprintf(ip6buf, &ifra.ifra_addr.sin6_addr),
	if_name(ifp), error));
	return (NULL); /* ifaddr must not have been allocated. */
	}

	ia = in6ifa_ifpwithaddr(ifp, &ifra.ifra_addr.sin6_addr);

	return (ia); /* this is always non-NULL */
	}

	/*
	* ia0 - corresponding public address
	*/
	int
	in6_tmpifadd(const struct in6_ifaddr *ia0, int forcegen, int delay)
	{
	INIT_VNET_INET6(curvnet);
	struct ifnet *ifp = ia0->ia_ifa.ifa_ifp;
	struct in6_ifaddr newia, ia;
	struct in6_aliasreq ifra;
	int i, error;
	int trylimit = 3; /* XXX: adhoc value */
	int updateflags;
	u_int32_t randid[2];
	time_t vltime0, pltime0;

	bzero(&ifra, sizeof(ifra));
	strncpy(ifra.ifra_name, if_name(ifp), sizeof(ifra.ifra_name));
	ifra.ifra_addr = ia0->ia_addr;
	/* copy prefix mask */
	ifra.ifra_prefixmask = ia0->ia_prefixmask;
	/* clear the old IFID */
	for (i = 0; i < 4; i++) {
	ifra.ifra_addr.sin6_addr.s6_addr32[i] &=
	ifra.ifra_prefixmask.sin6_addr.s6_addr32[i];
	}

	again:
	if (in6_get_tmpifid(ifp, (u_int8_t *)randid,
	(const u_int8_t *)&ia0->ia_addr.sin6_addr.s6_addr[8], forcegen)) {
	nd6log((LOG_NOTICE, "in6_tmpifadd: failed to find a good "
	"random IFID\n"));
	return (EINVAL);
	}
	ifra.ifra_addr.sin6_addr.s6_addr32[2] \|=
	(randid[0] & ~(ifra.ifra_prefixmask.sin6_addr.s6_addr32[2]));
	ifra.ifra_addr.sin6_addr.s6_addr32[3] \|=
	(randid[1] & ~(ifra.ifra_prefixmask.sin6_addr.s6_addr32[3]));

	/*
	* in6_get_tmpifid() quite likely provided a unique interface ID.
	* However, we may still have a chance to see collision, because
	* there may be a time lag between generation of the ID and generation
	* of the address. So, we'll do one more sanity check.
	*/
	for (ia = V_in6_ifaddr; ia; ia = ia->ia_next) {
	if (IN6_ARE_ADDR_EQUAL(&ia->ia_addr.sin6_addr,
	&ifra.ifra_addr.sin6_addr)) {
	if (trylimit-- == 0) {
	/*
	* Give up. Something strange should have
	* happened.
	*/
	nd6log((LOG_NOTICE, "in6_tmpifadd: failed to "
	"find a unique random IFID\n"));
	return (EEXIST);
	}
	forcegen = 1;
	goto again;
	}
	}

	/*
	* The Valid Lifetime is the lower of the Valid Lifetime of the
	* public address or TEMP_VALID_LIFETIME.
	* The Preferred Lifetime is the lower of the Preferred Lifetime
	* of the public address or TEMP_PREFERRED_LIFETIME -
	* DESYNC_FACTOR.
	*/
	if (ia0->ia6_lifetime.ia6t_vltime != ND6_INFINITE_LIFETIME) {
	vltime0 = IFA6_IS_INVALID(ia0) ? 0 :
	(ia0->ia6_lifetime.ia6t_vltime -
	(time_second - ia0->ia6_updatetime));
	if (vltime0 > V_ip6_temp_valid_lifetime)
	vltime0 = V_ip6_temp_valid_lifetime;
	} else
	vltime0 = V_ip6_temp_valid_lifetime;
	if (ia0->ia6_lifetime.ia6t_pltime != ND6_INFINITE_LIFETIME) {
	pltime0 = IFA6_IS_DEPRECATED(ia0) ? 0 :
	(ia0->ia6_lifetime.ia6t_pltime -
	(time_second - ia0->ia6_updatetime));
	if (pltime0 > V_ip6_temp_preferred_lifetime - V_ip6_desync_factor){
	pltime0 = V_ip6_temp_preferred_lifetime -
	V_ip6_desync_factor;
	}
	} else
	pltime0 = V_ip6_temp_preferred_lifetime - V_ip6_desync_factor;
	ifra.ifra_lifetime.ia6t_vltime = vltime0;
	ifra.ifra_lifetime.ia6t_pltime = pltime0;

	/*
	* A temporary address is created only if this calculated Preferred
	* Lifetime is greater than REGEN_ADVANCE time units.
	*/
	if (ifra.ifra_lifetime.ia6t_pltime <= V_ip6_temp_regen_advance)
	return (0);

	/* XXX: scope zone ID? */

	ifra.ifra_flags \|= (IN6_IFF_AUTOCONF\|IN6_IFF_TEMPORARY);

	/* allocate ifaddr structure, link into chain, etc. */
	updateflags = 0;
	if (delay)
	updateflags \|= IN6_IFAUPDATE_DADDELAY;
	if ((error = in6_update_ifa(ifp, &ifra, NULL, updateflags)) != 0)
	return (error);

	newia = in6ifa_ifpwithaddr(ifp, &ifra.ifra_addr.sin6_addr);
	if (newia == NULL) { /* XXX: can it happen? */
	nd6log((LOG_ERR,
	"in6_tmpifadd: ifa update succeeded, but we got "
	"no ifaddr\n"));
	return (EINVAL); /* XXX */
	}
	newia->ia6_ndpr = ia0->ia6_ndpr;
	newia->ia6_ndpr->ndpr_refcnt++;

	/*
	* A newly added address might affect the status of other addresses.
	* XXX: when the temporary address is generated with a new public
	* address, the onlink check is redundant. However, it would be safe
	* to do the check explicitly everywhere a new address is generated,
	* and, in fact, we surely need the check when we create a new
	* temporary address due to deprecation of an old temporary address.
	*/
	pfxlist_onlink_check();

	return (0);
	}

	static int
	in6_init_prefix_ltimes(struct nd_prefix *ndpr)
	{
	if (ndpr->ndpr_pltime == ND6_INFINITE_LIFETIME)
	ndpr->ndpr_preferred = 0;
	else
	ndpr->ndpr_preferred = time_second + ndpr->ndpr_pltime;
	if (ndpr->ndpr_vltime == ND6_INFINITE_LIFETIME)
	ndpr->ndpr_expire = 0;
	else
	ndpr->ndpr_expire = time_second + ndpr->ndpr_vltime;

	return 0;
	}

	static void
	in6_init_address_ltimes(struct nd_prefix new, struct in6_addrlifetime lt6)
	{
	/* init ia6t_expire */
	if (lt6->ia6t_vltime == ND6_INFINITE_LIFETIME)
	lt6->ia6t_expire = 0;
	else {
	lt6->ia6t_expire = time_second;
	lt6->ia6t_expire += lt6->ia6t_vltime;
	}

	/* init ia6t_preferred */
	if (lt6->ia6t_pltime == ND6_INFINITE_LIFETIME)
	lt6->ia6t_preferred = 0;
	else {
	lt6->ia6t_preferred = time_second;
	lt6->ia6t_preferred += lt6->ia6t_pltime;
	}
	}

	/*
	* Delete all the routing table entries that use the specified gateway.
	* XXX: this function causes search through all entries of routing table, so
	* it shouldn't be called when acting as a router.
	*/
	void
	rt6_flush(struct in6_addr gateway, struct ifnet ifp)
	{
	INIT_VNET_NET(curvnet);
	struct radix_node_head *rnh = V_rt_tables[0][AF_INET6];
	int s = splnet();

	/* We'll care only link-local addresses */
	if (!IN6_IS_ADDR_LINKLOCAL(gateway)) {
	splx(s);
	return;
	}

	RADIX_NODE_HEAD_LOCK(rnh);
	rnh->rnh_walktree(rnh, rt6_deleteroute, (void *)gateway);
	RADIX_NODE_HEAD_UNLOCK(rnh);
	splx(s);
	}

	static int
	rt6_deleteroute(struct radix_node rn, void arg)
	{
	#define SIN6(s) ((struct sockaddr_in6 *)s)
	struct rtentry rt = (struct rtentry )rn;
	struct in6_addr gate = (struct in6_addr )arg;

	if (rt->rt_gateway == NULL \|\| rt->rt_gateway->sa_family != AF_INET6)
	return (0);

	if (!IN6_ARE_ADDR_EQUAL(gate, &SIN6(rt->rt_gateway)->sin6_addr)) {
	return (0);
	}

	/*
	* Do not delete a static route.
	* XXX: this seems to be a bit ad-hoc. Should we consider the
	* 'cloned' bit instead?
	*/
	if ((rt->rt_flags & RTF_STATIC) != 0)
	return (0);

	/*
	* We delete only host route. This means, in particular, we don't
	* delete default route.
	*/
	if ((rt->rt_flags & RTF_HOST) == 0)
	return (0);

	return (rtrequest(RTM_DELETE, rt_key(rt), rt->rt_gateway,
	rt_mask(rt), rt->rt_flags, 0));
	#undef SIN6
	}

	int
	nd6_setdefaultiface(int ifindex)
	{
	INIT_VNET_NET(curvnet);
	INIT_VNET_INET6(curvnet);
	int error = 0;

	if (ifindex < 0 \|\| V_if_index < ifindex)
	return (EINVAL);
	if (ifindex != 0 && !ifnet_byindex(ifindex))
	return (EINVAL);

	if (V_nd6_defifindex != ifindex) {
	V_nd6_defifindex = ifindex;
	if (V_nd6_defifindex > 0)
	V_nd6_defifp = ifnet_byindex(V_nd6_defifindex);
	else
	V_nd6_defifp = NULL;

	/*
	* Our current implementation assumes one-to-one maping between
	* interfaces and links, so it would be natural to use the
	* default interface as the default link.
	*/
	scope6_setdefault(V_nd6_defifp);
	}

	return (error);
	}

File Metadata

Mime Type: text/x-diff
Expires: Fri, Feb 21, 9:02 PM (8 h, 22 m)
Storage Engine: blob
Storage Format: Raw Data
Storage Handle: 16744960
Default Alt Text: (712 KB)

No OneTemporaryActions

View Options

File Metadata

Event Timeline

No OneTemporary
Actions