Index: head/sys/net/if.c
===================================================================
--- head/sys/net/if.c	(revision 120726)
+++ head/sys/net/if.c	(revision 120727)
@@ -1,2008 +1,2010 @@
 /*
  * Copyright (c) 1980, 1986, 1993
  *	The Regents of the University of California.  All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 3. All advertising materials mentioning features or use of this software
  *    must display the following acknowledgement:
  *	This product includes software developed by the University of
  *	California, Berkeley and its contributors.
  * 4. Neither the name of the University nor the names of its contributors
  *    may be used to endorse or promote products derived from this software
  *    without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  *	@(#)if.c	8.5 (Berkeley) 1/9/95
  * $FreeBSD$
  */
 
 #include "opt_compat.h"
 #include "opt_inet6.h"
 #include "opt_inet.h"
 #include "opt_mac.h"
 
 #include <sys/param.h>
 #include <sys/conf.h>
 #include <sys/mac.h>
 #include <sys/malloc.h>
 #include <sys/bus.h>
 #include <sys/mbuf.h>
 #include <sys/systm.h>
 #include <sys/proc.h>
 #include <sys/socket.h>
 #include <sys/socketvar.h>
 #include <sys/protosw.h>
 #include <sys/kernel.h>
 #include <sys/sockio.h>
 #include <sys/syslog.h>
 #include <sys/sysctl.h>
 #include <sys/jail.h>
 #include <machine/stdarg.h>
 
 #include <net/if.h>
 #include <net/if_arp.h>
 #include <net/if_dl.h>
 #include <net/if_types.h>
 #include <net/if_var.h>
 #include <net/radix.h>
 #include <net/route.h>
 
 #if defined(INET) || defined(INET6)
 /*XXX*/
 #include <netinet/in.h>
 #include <netinet/in_var.h>
 #ifdef INET6
 #include <netinet6/in6_var.h>
 #include <netinet6/in6_ifattach.h>
 #endif
 #endif
 #ifdef INET
 #include <netinet/if_ether.h>
 #endif
 
 static int	ifconf(u_long, caddr_t);
 static void	if_grow(void);
 static void	if_init(void *);
 static void	if_check(void *);
 static int	if_findindex(struct ifnet *);
 static void	if_qflush(struct ifqueue *);
 static void	if_slowtimo(void *);
 static void	link_rtrequest(int, struct rtentry *, struct rt_addrinfo *);
 static int	if_rtdel(struct radix_node *, void *);
 static struct	if_clone *if_clone_lookup(const char *, int *);
 static int	if_clone_list(struct if_clonereq *);
 static int	ifhwioctl(u_long, struct ifnet *, caddr_t, struct thread *);
 #ifdef INET6
 /*
  * XXX: declare here to avoid to include many inet6 related files..
  * should be more generalized?
  */
 extern void	nd6_setmtu(struct ifnet *);
 #endif
 
 int	if_index = 0;
 struct	ifindex_entry *ifindex_table = NULL;
 int	ifqmaxlen = IFQ_MAXLEN;
 struct	ifnethead ifnet;	/* depend on static init XXX */
 struct	mtx ifnet_lock;
 static int	if_cloners_count;
 LIST_HEAD(, if_clone) if_cloners = LIST_HEAD_INITIALIZER(if_cloners);
 
 static int	if_indexlim = 8;
 static struct	klist ifklist;
 
 static void	filt_netdetach(struct knote *kn);
 static int	filt_netdev(struct knote *kn, long hint);
 
 static struct filterops netdev_filtops =
     { 1, NULL, filt_netdetach, filt_netdev };
 
 /*
  * System initialization
  */
 SYSINIT(interfaces, SI_SUB_INIT_IF, SI_ORDER_FIRST, if_init, NULL)
 SYSINIT(interface_check, SI_SUB_PROTO_IF, SI_ORDER_FIRST, if_check, NULL)
 
 MALLOC_DEFINE(M_IFADDR, "ifaddr", "interface address");
 MALLOC_DEFINE(M_IFMADDR, "ether_multi", "link-level multicast address");
 MALLOC_DEFINE(M_CLONE, "clone", "interface cloning framework");
 
 static d_open_t		netopen;
 static d_close_t	netclose;
 static d_ioctl_t	netioctl;
 static d_kqfilter_t	netkqfilter;
 
 static struct cdevsw net_cdevsw = {
 	.d_open =	netopen,
 	.d_close =	netclose,
 	.d_ioctl =	netioctl,
 	.d_name =	"net",
 	.d_kqfilter =	netkqfilter,
 };
 
 static int
 netopen(dev_t dev, int flag, int mode, struct thread *td)
 {
 	return (0);
 }
 
 static int
 netclose(dev_t dev, int flags, int fmt, struct thread *td)
 {
 	return (0);
 }
 
 static int
 netioctl(dev_t dev, u_long cmd, caddr_t data, int flag, struct thread *td)
 {
 	struct ifnet *ifp;
 	int error, idx;
 
 	/* only support interface specific ioctls */
 	if (IOCGROUP(cmd) != 'i')
 		return (EOPNOTSUPP);
 	idx = minor(dev);
 	if (idx == 0) {
 		/*
 		 * special network device, not interface.
 		 */
 		if (cmd == SIOCGIFCONF)
 			return (ifconf(cmd, data));	/* XXX remove cmd */
 		return (EOPNOTSUPP);
 	}
 
 	ifp = ifnet_byindex(idx);
 	if (ifp == NULL)
 		return (ENXIO);
 
 	error = ifhwioctl(cmd, ifp, data, td);
 	if (error == ENOIOCTL)
 		error = EOPNOTSUPP;
 	return (error);
 }
 
 static int
 netkqfilter(dev_t dev, struct knote *kn)
 {
 	struct klist *klist;
 	struct ifnet *ifp;
 	int idx;
 
 	idx = minor(dev);
 	if (idx == 0) {
 		klist = &ifklist;
 	} else {
 		ifp = ifnet_byindex(idx);
 		if (ifp == NULL)
 			return (1);
 		klist = &ifp->if_klist;
 	}
 
 	switch (kn->kn_filter) {
 	case EVFILT_NETDEV:
 		kn->kn_fop = &netdev_filtops;
 		break;
 	default:
 		return (1);
 	}
 
 	kn->kn_hook = (caddr_t)klist;
 
 	/* XXX locking? */
 	SLIST_INSERT_HEAD(klist, kn, kn_selnext);
 
 	return (0);
 }
 
 static void
 filt_netdetach(struct knote *kn)
 {
 	struct klist *klist = (struct klist *)kn->kn_hook;
 
 	if (kn->kn_status & KN_DETACHED)
 		return;
 	SLIST_REMOVE(klist, kn, knote, kn_selnext);
 }
 
 static int
 filt_netdev(struct knote *kn, long hint)
 {
 
 	/*
 	 * Currently NOTE_EXIT is abused to indicate device detach.
 	 */
 	if (hint == NOTE_EXIT) {
 		kn->kn_data = NOTE_LINKINV;
                 kn->kn_status |= KN_DETACHED;
                 kn->kn_flags |= (EV_EOF | EV_ONESHOT); 
                 return (1);
         }
 	kn->kn_data = hint;			/* current status */
 	if (kn->kn_sfflags & hint)
 		kn->kn_fflags |= hint;
 	return (kn->kn_fflags != 0);
 }
 
 /*
  * Network interface utility routines.
  *
  * Routines with ifa_ifwith* names take sockaddr *'s as
  * parameters.
  */
 /* ARGSUSED*/
 static void
 if_init(dummy)
 	void *dummy;
 {
 
 	IFNET_LOCK_INIT();
 	TAILQ_INIT(&ifnet);
 	SLIST_INIT(&ifklist);
 	if_grow();				/* create initial table */
 	ifdev_byindex(0) = make_dev(&net_cdevsw, 0,
 	    UID_ROOT, GID_WHEEL, 0600, "network");
 }
 
 static void
 if_grow(void)
 {
 	u_int n;
 	struct ifindex_entry *e;
 
 	if_indexlim <<= 1;
 	n = if_indexlim * sizeof(*e);
 	e = malloc(n, M_IFADDR, M_WAITOK | M_ZERO);
 	if (ifindex_table != NULL) {
 		memcpy((caddr_t)e, (caddr_t)ifindex_table, n/2);
 		free((caddr_t)ifindex_table, M_IFADDR);
 	}
 	ifindex_table = e;
 }
 
 /* ARGSUSED*/
 static void
 if_check(dummy)
 	void *dummy;
 {
 	struct ifnet *ifp;
 	int s;
 
 	s = splimp();
 	IFNET_RLOCK();	/* could sleep on rare error; mostly okay XXX */
 	TAILQ_FOREACH(ifp, &ifnet, if_link) {
 		if (ifp->if_snd.ifq_maxlen == 0) {
 			printf("%s%d XXX: driver didn't set ifq_maxlen\n",
 			    ifp->if_name, ifp->if_unit);
 			ifp->if_snd.ifq_maxlen = ifqmaxlen;
 		}
 		if (!mtx_initialized(&ifp->if_snd.ifq_mtx)) {
 			printf("%s%d XXX: driver didn't initialize queue mtx\n",
 			    ifp->if_name, ifp->if_unit);
 			mtx_init(&ifp->if_snd.ifq_mtx, "unknown",
 			    MTX_NETWORK_LOCK, MTX_DEF);
 		}
 	}
 	IFNET_RUNLOCK();
 	splx(s);
 	if_slowtimo(0);
 }
 
 static int
 if_findindex(struct ifnet *ifp)
 {
 	int i, unit;
 	char eaddr[18], devname[32];
 	const char *name, *p;
 
 	switch (ifp->if_type) {
 	case IFT_ETHER:			/* these types use struct arpcom */
 	case IFT_FDDI:
 	case IFT_XETHER:
 	case IFT_ISO88025:
 	case IFT_L2VLAN:
 		snprintf(eaddr, 18, "%6D", 
 		    ((struct arpcom *)ifp->if_softc)->ac_enaddr, ":");
 		break;
 	default:
 		eaddr[0] = '\0';
 		break;
 	}
 	snprintf(devname, 32, "%s%d", ifp->if_name, ifp->if_unit);
 	name = net_cdevsw.d_name;
 	i = 0;
 	while ((resource_find_dev(&i, name, &unit, NULL, NULL)) == 0) {
 		if (resource_string_value(name, unit, "ether", &p) == 0)
 			if (strcmp(p, eaddr) == 0)
 				goto found;
 		if (resource_string_value(name, unit, "dev", &p) == 0)
 			if (strcmp(p, devname) == 0)
 				goto found;
 	}
 	unit = 0;
 found:
 	if (unit != 0) {
 		if (ifaddr_byindex(unit) == NULL)
 			return (unit);
 		printf("%s%d in use, cannot hardwire it to %s.\n",
 		    name, unit, devname);
 	}
 	for (unit = 1; ; unit++) {
 		if (unit <= if_index && ifaddr_byindex(unit) != NULL)
 			continue;
 		if (resource_string_value(name, unit, "ether", &p) == 0 ||
 		    resource_string_value(name, unit, "dev", &p) == 0)
 			continue;
 		break;
 	}
 	return (unit);
 }
 
 /*
  * Attach an interface to the
  * list of "active" interfaces.
  */
 void
 if_attach(ifp)
 	struct ifnet *ifp;
 {
 	unsigned socksize, ifasize;
 	int namelen, masklen;
 	char workbuf[64];
 	register struct sockaddr_dl *sdl;
 	register struct ifaddr *ifa;
 
 	IFNET_WLOCK();
 	TAILQ_INSERT_TAIL(&ifnet, ifp, if_link);
 	IFNET_WUNLOCK();
 	/*
 	 * XXX -
 	 * The old code would work if the interface passed a pre-existing
 	 * chain of ifaddrs to this code.  We don't trust our callers to
 	 * properly initialize the tailq, however, so we no longer allow
 	 * this unlikely case.
 	 */
 	TAILQ_INIT(&ifp->if_addrhead);
 	TAILQ_INIT(&ifp->if_prefixhead);
 	TAILQ_INIT(&ifp->if_multiaddrs);
 	SLIST_INIT(&ifp->if_klist);
 	getmicrotime(&ifp->if_lastchange);
 
 #ifdef MAC
 	mac_init_ifnet(ifp);
 	mac_create_ifnet(ifp);
 #endif
 
 	ifp->if_index = if_findindex(ifp);
 	if (ifp->if_index > if_index)
 		if_index = ifp->if_index;
 	if (if_index >= if_indexlim)
 		if_grow();
 
 	ifnet_byindex(ifp->if_index) = ifp;
 	ifdev_byindex(ifp->if_index) = make_dev(&net_cdevsw, ifp->if_index,
 	    UID_ROOT, GID_WHEEL, 0600, "%s/%s%d",
 	    net_cdevsw.d_name, ifp->if_name, ifp->if_unit);
 	make_dev_alias(ifdev_byindex(ifp->if_index), "%s%d",
 	    net_cdevsw.d_name, ifp->if_index);
 
 	mtx_init(&ifp->if_snd.ifq_mtx, ifp->if_name, "if send queue", MTX_DEF);
 
 	/*
 	 * create a Link Level name for this device
 	 */
 	namelen = snprintf(workbuf, sizeof(workbuf),
 	    "%s%d", ifp->if_name, ifp->if_unit);
 #define _offsetof(t, m) ((int)((caddr_t)&((t *)0)->m))
 	masklen = _offsetof(struct sockaddr_dl, sdl_data[0]) + namelen;
 	socksize = masklen + ifp->if_addrlen;
 #define ROUNDUP(a) (1 + (((a) - 1) | (sizeof(long) - 1)))
 	if (socksize < sizeof(*sdl))
 		socksize = sizeof(*sdl);
 	socksize = ROUNDUP(socksize);
 	ifasize = sizeof(*ifa) + 2 * socksize;
 	ifa = (struct ifaddr *)malloc(ifasize, M_IFADDR, M_WAITOK | M_ZERO);
 	if (ifa) {
 		IFA_LOCK_INIT(ifa);
 		sdl = (struct sockaddr_dl *)(ifa + 1);
 		sdl->sdl_len = socksize;
 		sdl->sdl_family = AF_LINK;
 		bcopy(workbuf, sdl->sdl_data, namelen);
 		sdl->sdl_nlen = namelen;
 		sdl->sdl_index = ifp->if_index;
 		sdl->sdl_type = ifp->if_type;
 		ifaddr_byindex(ifp->if_index) = ifa;
 		ifa->ifa_ifp = ifp;
 		ifa->ifa_rtrequest = link_rtrequest;
 		ifa->ifa_addr = (struct sockaddr *)sdl;
 		sdl = (struct sockaddr_dl *)(socksize + (caddr_t)sdl);
 		ifa->ifa_netmask = (struct sockaddr *)sdl;
 		sdl->sdl_len = masklen;
 		while (namelen != 0)
 			sdl->sdl_data[--namelen] = 0xff;
 		ifa->ifa_refcnt = 1;
 		TAILQ_INSERT_HEAD(&ifp->if_addrhead, ifa, ifa_link);
 	}
 	ifp->if_broadcastaddr = 0; /* reliably crash if used uninitialized */
 
 	/* Announce the interface. */
 	rt_ifannouncemsg(ifp, IFAN_ARRIVAL);
 }
 
 /*
  * Detach an interface, removing it from the
  * list of "active" interfaces.
  */
 void
 if_detach(ifp)
 	struct ifnet *ifp;
 {
 	struct ifaddr *ifa;
 	struct radix_node_head	*rnh;
 	int s;
 	int i;
 
 	/*
 	 * Remove routes and flush queues.
 	 */
 	s = splnet();
 	if_down(ifp);
 
 	/*
 	 * Remove address from ifindex_table[] and maybe decrement if_index.
 	 * Clean up all addresses.
 	 */
 	ifaddr_byindex(ifp->if_index) = NULL;
 	destroy_dev(ifdev_byindex(ifp->if_index));
 	ifdev_byindex(ifp->if_index) = NULL;
 
 	while (if_index > 0 && ifaddr_byindex(if_index) == NULL)
 		if_index--;
 
 	for (ifa = TAILQ_FIRST(&ifp->if_addrhead); ifa;
 	     ifa = TAILQ_FIRST(&ifp->if_addrhead)) {
 #ifdef INET
 		/* XXX: Ugly!! ad hoc just for INET */
 		if (ifa->ifa_addr && ifa->ifa_addr->sa_family == AF_INET) {
 			struct ifaliasreq ifr;
 
 			bzero(&ifr, sizeof(ifr));
 			ifr.ifra_addr = *ifa->ifa_addr;
 			if (ifa->ifa_dstaddr)
 				ifr.ifra_broadaddr = *ifa->ifa_dstaddr;
 			if (in_control(NULL, SIOCDIFADDR, (caddr_t)&ifr, ifp,
 			    NULL) == 0)
 				continue;
 		}
 #endif /* INET */
 #ifdef INET6
 		if (ifa->ifa_addr && ifa->ifa_addr->sa_family == AF_INET6) {
 			in6_purgeaddr(ifa);
 			/* ifp_addrhead is already updated */
 			continue;
 		}
 #endif /* INET6 */
 		TAILQ_REMOVE(&ifp->if_addrhead, ifa, ifa_link);
 		IFAFREE(ifa);
 	}
 
 #ifdef INET6
 	/*
 	 * Remove all IPv6 kernel structs related to ifp.  This should be done
 	 * before removing routing entries below, since IPv6 interface direct
 	 * routes are expected to be removed by the IPv6-specific kernel API.
 	 * Otherwise, the kernel will detect some inconsistency and bark it.
 	 */
 	in6_ifdetach(ifp);
 #endif
 
 	/*
 	 * Delete all remaining routes using this interface
 	 * Unfortuneatly the only way to do this is to slog through
 	 * the entire routing table looking for routes which point
 	 * to this interface...oh well...
 	 */
 	for (i = 1; i <= AF_MAX; i++) {
 		if ((rnh = rt_tables[i]) == NULL)
 			continue;
 		RADIX_NODE_HEAD_LOCK(rnh);
 		(void) rnh->rnh_walktree(rnh, if_rtdel, ifp);
 		RADIX_NODE_HEAD_UNLOCK(rnh);
 	}
 
 	/* Announce that the interface is gone. */
 	rt_ifannouncemsg(ifp, IFAN_DEPARTURE);
 
 #ifdef MAC
 	mac_destroy_ifnet(ifp);
 #endif /* MAC */
 	KNOTE(&ifp->if_klist, NOTE_EXIT);
 	IFNET_WLOCK();
 	TAILQ_REMOVE(&ifnet, ifp, if_link);
 	IFNET_WUNLOCK();
 	mtx_destroy(&ifp->if_snd.ifq_mtx);
 	splx(s);
 }
 
 /*
  * Delete Routes for a Network Interface
  * 
  * Called for each routing entry via the rnh->rnh_walktree() call above
  * to delete all route entries referencing a detaching network interface.
  *
  * Arguments:
  *	rn	pointer to node in the routing table
  *	arg	argument passed to rnh->rnh_walktree() - detaching interface
  *
  * Returns:
  *	0	successful
  *	errno	failed - reason indicated
  *
  */
 static int
 if_rtdel(rn, arg)
 	struct radix_node	*rn;
 	void			*arg;
 {
 	struct rtentry	*rt = (struct rtentry *)rn;
 	struct ifnet	*ifp = arg;
 	int		err;
 
 	if (rt->rt_ifp == ifp) {
 
 		/*
 		 * Protect (sorta) against walktree recursion problems
 		 * with cloned routes
 		 */
 		if ((rt->rt_flags & RTF_UP) == 0)
 			return (0);
 
 		err = rtrequest(RTM_DELETE, rt_key(rt), rt->rt_gateway,
 				rt_mask(rt), rt->rt_flags,
 				(struct rtentry **) NULL);
 		if (err) {
 			log(LOG_WARNING, "if_rtdel: error %d\n", err);
 		}
 	}
 
 	return (0);
 }
 
 /*
  * Create a clone network interface.
  */
 int
 if_clone_create(name, len)
 	char *name;
 	int len;
 {
 	struct if_clone *ifc;
 	char *dp;
 	int wildcard, bytoff, bitoff;
 	int unit;
 	int err;
 
 	ifc = if_clone_lookup(name, &unit);
 	if (ifc == NULL)
 		return (EINVAL);
 
 	if (ifunit(name) != NULL)
 		return (EEXIST);
 
 	bytoff = bitoff = 0;
 	wildcard = (unit < 0);
 	/*
 	 * Find a free unit if none was given.
 	 */ 
 	if (wildcard) {
 		while ((bytoff < ifc->ifc_bmlen)
 		    && (ifc->ifc_units[bytoff] == 0xff))
 			bytoff++;
 		if (bytoff >= ifc->ifc_bmlen)
 			return (ENOSPC);
 		while ((ifc->ifc_units[bytoff] & (1 << bitoff)) != 0)
 			bitoff++;
 		unit = (bytoff << 3) + bitoff;
 	}
 
 	if (unit > ifc->ifc_maxunit)
 		return (ENXIO);
 
 	err = (*ifc->ifc_create)(ifc, unit);
 	if (err != 0)
 		return (err);
 
 	if (!wildcard) {
 		bytoff = unit >> 3;
 		bitoff = unit - (bytoff << 3);
 	}
 
 	/*
 	 * Allocate the unit in the bitmap.
 	 */
 	KASSERT((ifc->ifc_units[bytoff] & (1 << bitoff)) == 0,
 	    ("%s: bit is already set", __func__));
 	ifc->ifc_units[bytoff] |= (1 << bitoff);
 
 	/* In the wildcard case, we need to update the name. */
 	if (wildcard) {
 		for (dp = name; *dp != '\0'; dp++);
 		if (snprintf(dp, len - (dp-name), "%d", unit) >
 		    len - (dp-name) - 1) {
 			/*
 			 * This can only be a programmer error and
 			 * there's no straightforward way to recover if
 			 * it happens.
 			 */
 			panic("if_clone_create(): interface name too long");
 		}
 			
 	}
 
 	return (0);
 }
 
 /*
  * Destroy a clone network interface.
  */
 int
 if_clone_destroy(name)
 	const char *name;
 {
 	struct if_clone *ifc;
 	struct ifnet *ifp;
 	int bytoff, bitoff;
 	int unit;
 
 	ifc = if_clone_lookup(name, &unit);
 	if (ifc == NULL)
 		return (EINVAL);
 
 	if (unit < ifc->ifc_minifs)
 		return (EINVAL);
 
 	ifp = ifunit(name);
 	if (ifp == NULL)
 		return (ENXIO);
 
 	if (ifc->ifc_destroy == NULL)
 		return (EOPNOTSUPP);
 
 	(*ifc->ifc_destroy)(ifp);
 
 	/*
 	 * Compute offset in the bitmap and deallocate the unit.
 	 */
 	bytoff = unit >> 3;
 	bitoff = unit - (bytoff << 3);
 	KASSERT((ifc->ifc_units[bytoff] & (1 << bitoff)) != 0,
 	    ("%s: bit is already cleared", __func__));
 	ifc->ifc_units[bytoff] &= ~(1 << bitoff);
 	return (0);
 }
 
 /*
  * Look up a network interface cloner.
  */
 static struct if_clone *
 if_clone_lookup(name, unitp)
 	const char *name;
 	int *unitp;
 {
 	struct if_clone *ifc;
 	const char *cp;
 	int i;
 
 	for (ifc = LIST_FIRST(&if_cloners); ifc != NULL;) {
 		for (cp = name, i = 0; i < ifc->ifc_namelen; i++, cp++) {
 			if (ifc->ifc_name[i] != *cp)
 				goto next_ifc;
 		}
 		goto found_name;
  next_ifc:
 		ifc = LIST_NEXT(ifc, ifc_list);
 	}
 
 	/* No match. */
 	return ((struct if_clone *)NULL);
 
  found_name:
 	if (*cp == '\0') {
 		i = -1;
 	} else {
 		for (i = 0; *cp != '\0'; cp++) {
 			if (*cp < '0' || *cp > '9') {
 				/* Bogus unit number. */
 				return (NULL);
 			}
 			i = (i * 10) + (*cp - '0');
 		}
 	}
 
 	if (unitp != NULL)
 		*unitp = i;
 	return (ifc);
 }
 
 /*
  * Register a network interface cloner.
  */
 void
 if_clone_attach(ifc)
 	struct if_clone *ifc;
 {
 	int bytoff, bitoff;
 	int err;
 	int len, maxclone;
 	int unit;
 
 	KASSERT(ifc->ifc_minifs - 1 <= ifc->ifc_maxunit,
 	    ("%s: %s requested more units then allowed (%d > %d)",
 	    __func__, ifc->ifc_name, ifc->ifc_minifs,
 	    ifc->ifc_maxunit + 1));
 	/*
 	 * Compute bitmap size and allocate it.
 	 */
 	maxclone = ifc->ifc_maxunit + 1;
 	len = maxclone >> 3;
 	if ((len << 3) < maxclone)
 		len++;
 	ifc->ifc_units = malloc(len, M_CLONE, M_WAITOK | M_ZERO);
 	ifc->ifc_bmlen = len;
 
 	LIST_INSERT_HEAD(&if_cloners, ifc, ifc_list);
 	if_cloners_count++;
 
 	for (unit = 0; unit < ifc->ifc_minifs; unit++) {
 		err = (*ifc->ifc_create)(ifc, unit);
 		KASSERT(err == 0,
 		    ("%s: failed to create required interface %s%d",
 		    __func__, ifc->ifc_name, unit));
 
 		/* Allocate the unit in the bitmap. */
 		bytoff = unit >> 3;
 		bitoff = unit - (bytoff << 3);
 		ifc->ifc_units[bytoff] |= (1 << bitoff);
 	}
 }
 
 /*
  * Unregister a network interface cloner.
  */
 void
 if_clone_detach(ifc)
 	struct if_clone *ifc;
 {
 
 	LIST_REMOVE(ifc, ifc_list);
 	free(ifc->ifc_units, M_CLONE);
 	if_cloners_count--;
 }
 
 /*
  * Provide list of interface cloners to userspace.
  */
 static int
 if_clone_list(ifcr)
 	struct if_clonereq *ifcr;
 {
 	char outbuf[IFNAMSIZ], *dst;
 	struct if_clone *ifc;
 	int count, error = 0;
 
 	ifcr->ifcr_total = if_cloners_count;
 	if ((dst = ifcr->ifcr_buffer) == NULL) {
 		/* Just asking how many there are. */
 		return (0);
 	}
 
 	if (ifcr->ifcr_count < 0)
 		return (EINVAL);
 
 	count = (if_cloners_count < ifcr->ifcr_count) ?
 	    if_cloners_count : ifcr->ifcr_count;
 
 	for (ifc = LIST_FIRST(&if_cloners); ifc != NULL && count != 0;
 	     ifc = LIST_NEXT(ifc, ifc_list), count--, dst += IFNAMSIZ) {
 		strncpy(outbuf, ifc->ifc_name, IFNAMSIZ);
 		outbuf[IFNAMSIZ - 1] = '\0';	/* sanity */
 		error = copyout(outbuf, dst, IFNAMSIZ);
 		if (error)
 			break;
 	}
 
 	return (error);
 }
 
-#define	equal(a1, a2) \
-  (bcmp((caddr_t)(a1), (caddr_t)(a2), ((struct sockaddr *)(a1))->sa_len) == 0)
+#define	equal(a1, a2)	(bcmp((a1), (a2), ((a1))->sa_len) == 0)
 
 /*
  * Locate an interface based on a complete address.
  */
 /*ARGSUSED*/
 struct ifaddr *
 ifa_ifwithaddr(addr)
 	struct sockaddr *addr;
 {
 	struct ifnet *ifp;
 	struct ifaddr *ifa;
 
 	IFNET_RLOCK();
 	TAILQ_FOREACH(ifp, &ifnet, if_link)
 		TAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) {
 			if (ifa->ifa_addr->sa_family != addr->sa_family)
 				continue;
 			if (equal(addr, ifa->ifa_addr))
 				goto done;
 			/* IP6 doesn't have broadcast */
 			if ((ifp->if_flags & IFF_BROADCAST) &&
 			    ifa->ifa_broadaddr &&
 			    ifa->ifa_broadaddr->sa_len != 0 &&
 			    equal(ifa->ifa_broadaddr, addr))
 				goto done;
 		}
 	ifa = NULL;
 done:
 	IFNET_RUNLOCK();
 	return (ifa);
 }
 
 /*
  * Locate the point to point interface with a given destination address.
  */
 /*ARGSUSED*/
 struct ifaddr *
 ifa_ifwithdstaddr(addr)
 	struct sockaddr *addr;
 {
 	struct ifnet *ifp;
 	struct ifaddr *ifa;
 
 	IFNET_RLOCK();
 	TAILQ_FOREACH(ifp, &ifnet, if_link) {
 		if ((ifp->if_flags & IFF_POINTOPOINT) == 0)
 			continue;
 		TAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) {
 			if (ifa->ifa_addr->sa_family != addr->sa_family)
 				continue;
 			if (ifa->ifa_dstaddr && equal(addr, ifa->ifa_dstaddr))
 				goto done;
 		}
 	}
 	ifa = NULL;
 done:
 	IFNET_RUNLOCK();
 	return (ifa);
 }
 
 /*
  * Find an interface on a specific network.  If many, choice
  * is most specific found.
  */
 struct ifaddr *
 ifa_ifwithnet(addr)
 	struct sockaddr *addr;
 {
 	register struct ifnet *ifp;
 	register struct ifaddr *ifa;
 	struct ifaddr *ifa_maybe = (struct ifaddr *) 0;
 	u_int af = addr->sa_family;
 	char *addr_data = addr->sa_data, *cplim;
 
 	/*
 	 * AF_LINK addresses can be looked up directly by their index number,
 	 * so do that if we can.
 	 */
 	if (af == AF_LINK) {
-	    register struct sockaddr_dl *sdl = (struct sockaddr_dl *)addr;
+	    struct sockaddr_dl *sdl = (struct sockaddr_dl *)addr;
 	    if (sdl->sdl_index && sdl->sdl_index <= if_index)
 		return (ifaddr_byindex(sdl->sdl_index));
 	}
 
 	/*
 	 * Scan though each interface, looking for ones that have
 	 * addresses in this address family.
 	 */
 	IFNET_RLOCK();
 	TAILQ_FOREACH(ifp, &ifnet, if_link) {
 		TAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) {
 			register char *cp, *cp2, *cp3;
 
 			if (ifa->ifa_addr->sa_family != af)
 next:				continue;
 			if (af == AF_INET && ifp->if_flags & IFF_POINTOPOINT) {
 				/*
 				 * This is a bit broken as it doesn't
 				 * take into account that the remote end may
 				 * be a single node in the network we are
 				 * looking for.
 				 * The trouble is that we don't know the
 				 * netmask for the remote end.
 				 */
 				if (ifa->ifa_dstaddr != 0
 				    && equal(addr, ifa->ifa_dstaddr))
 					goto done;
 			} else {
 				/*
 				 * if we have a special address handler,
 				 * then use it instead of the generic one.
 				 */
 	          		if (ifa->ifa_claim_addr) {
 					if ((*ifa->ifa_claim_addr)(ifa, addr))
 						goto done;
 					continue;
 				}
 
 				/*
 				 * Scan all the bits in the ifa's address.
 				 * If a bit dissagrees with what we are
 				 * looking for, mask it with the netmask
 				 * to see if it really matters.
 				 * (A byte at a time)
 				 */
 				if (ifa->ifa_netmask == 0)
 					continue;
 				cp = addr_data;
 				cp2 = ifa->ifa_addr->sa_data;
 				cp3 = ifa->ifa_netmask->sa_data;
 				cplim = ifa->ifa_netmask->sa_len
 					+ (char *)ifa->ifa_netmask;
 				while (cp3 < cplim)
 					if ((*cp++ ^ *cp2++) & *cp3++)
 						goto next; /* next address! */
 				/*
 				 * If the netmask of what we just found
 				 * is more specific than what we had before
 				 * (if we had one) then remember the new one
 				 * before continuing to search
 				 * for an even better one.
 				 */
 				if (ifa_maybe == 0 ||
 				    rn_refines((caddr_t)ifa->ifa_netmask,
 				    (caddr_t)ifa_maybe->ifa_netmask))
 					ifa_maybe = ifa;
 			}
 		}
 	}
 	ifa = ifa_maybe;
 done:
 	IFNET_RUNLOCK();
 	return (ifa);
 }
 
 /*
  * Find an interface address specific to an interface best matching
  * a given address.
  */
 struct ifaddr *
 ifaof_ifpforaddr(addr, ifp)
 	struct sockaddr *addr;
 	register struct ifnet *ifp;
 {
 	register struct ifaddr *ifa;
 	register char *cp, *cp2, *cp3;
 	register char *cplim;
 	struct ifaddr *ifa_maybe = 0;
 	u_int af = addr->sa_family;
 
 	if (af >= AF_MAX)
 		return (0);
 	TAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) {
 		if (ifa->ifa_addr->sa_family != af)
 			continue;
 		if (ifa_maybe == 0)
 			ifa_maybe = ifa;
 		if (ifa->ifa_netmask == 0) {
 			if (equal(addr, ifa->ifa_addr) ||
 			    (ifa->ifa_dstaddr && equal(addr, ifa->ifa_dstaddr)))
 				goto done;
 			continue;
 		}
 		if (ifp->if_flags & IFF_POINTOPOINT) {
 			if (equal(addr, ifa->ifa_dstaddr))
 				goto done;
 		} else {
 			cp = addr->sa_data;
 			cp2 = ifa->ifa_addr->sa_data;
 			cp3 = ifa->ifa_netmask->sa_data;
 			cplim = ifa->ifa_netmask->sa_len + (char *)ifa->ifa_netmask;
 			for (; cp3 < cplim; cp3++)
 				if ((*cp++ ^ *cp2++) & *cp3)
 					break;
 			if (cp3 == cplim)
 				goto done;
 		}
 	}
 	ifa = ifa_maybe;
 done:
 	return (ifa);
 }
 
 #include <net/route.h>
 
 /*
  * Default action when installing a route with a Link Level gateway.
  * Lookup an appropriate real ifa to point to.
  * This should be moved to /sys/net/link.c eventually.
  */
 static void
 link_rtrequest(cmd, rt, info)
 	int cmd;
 	register struct rtentry *rt;
 	struct rt_addrinfo *info;
 {
-	register struct ifaddr *ifa;
+	register struct ifaddr *ifa, *oifa;
 	struct sockaddr *dst;
 	struct ifnet *ifp;
 
+	RT_LOCK_ASSERT(rt);
+
 	if (cmd != RTM_ADD || ((ifa = rt->rt_ifa) == 0) ||
 	    ((ifp = ifa->ifa_ifp) == 0) || ((dst = rt_key(rt)) == 0))
 		return;
 	ifa = ifaof_ifpforaddr(dst, ifp);
 	if (ifa) {
-		IFAFREE(rt->rt_ifa);
 		IFAREF(ifa);		/* XXX */
+		oifa = rt->rt_ifa;
 		rt->rt_ifa = ifa;
+		IFAFREE(oifa);
 		if (ifa->ifa_rtrequest && ifa->ifa_rtrequest != link_rtrequest)
 			ifa->ifa_rtrequest(cmd, rt, info);
 	}
 }
 
 /*
  * Mark an interface down and notify protocols of
  * the transition.
  * NOTE: must be called at splnet or eqivalent.
  */
 void
 if_unroute(ifp, flag, fam)
 	register struct ifnet *ifp;
 	int flag, fam;
 {
 	register struct ifaddr *ifa;
 
 	ifp->if_flags &= ~flag;
 	getmicrotime(&ifp->if_lastchange);
 	TAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link)
 		if (fam == PF_UNSPEC || (fam == ifa->ifa_addr->sa_family))
 			pfctlinput(PRC_IFDOWN, ifa->ifa_addr);
 	if_qflush(&ifp->if_snd);
 	rt_ifmsg(ifp);
 }
 
 /*
  * Mark an interface up and notify protocols of
  * the transition.
  * NOTE: must be called at splnet or eqivalent.
  */
 void
 if_route(ifp, flag, fam)
 	register struct ifnet *ifp;
 	int flag, fam;
 {
 	register struct ifaddr *ifa;
 
 	ifp->if_flags |= flag;
 	getmicrotime(&ifp->if_lastchange);
 	TAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link)
 		if (fam == PF_UNSPEC || (fam == ifa->ifa_addr->sa_family))
 			pfctlinput(PRC_IFUP, ifa->ifa_addr);
 	rt_ifmsg(ifp);
 #ifdef INET6
 	in6_if_up(ifp);
 #endif
 }
 
 /*
  * Mark an interface down and notify protocols of
  * the transition.
  * NOTE: must be called at splnet or eqivalent.
  */
 void
 if_down(ifp)
 	register struct ifnet *ifp;
 {
 
 	if_unroute(ifp, IFF_UP, AF_UNSPEC);
 }
 
 /*
  * Mark an interface up and notify protocols of
  * the transition.
  * NOTE: must be called at splnet or eqivalent.
  */
 void
 if_up(ifp)
 	register struct ifnet *ifp;
 {
 
 	if_route(ifp, IFF_UP, AF_UNSPEC);
 }
 
 /*
  * Flush an interface queue.
  */
 static void
 if_qflush(ifq)
 	register struct ifqueue *ifq;
 {
 	register struct mbuf *m, *n;
 
 	n = ifq->ifq_head;
 	while ((m = n) != 0) {
 		n = m->m_act;
 		m_freem(m);
 	}
 	ifq->ifq_head = 0;
 	ifq->ifq_tail = 0;
 	ifq->ifq_len = 0;
 }
 
 /*
  * Handle interface watchdog timer routines.  Called
  * from softclock, we decrement timers (if set) and
  * call the appropriate interface routine on expiration.
  */
 static void
 if_slowtimo(arg)
 	void *arg;
 {
 	register struct ifnet *ifp;
 	int s = splimp();
 
 	IFNET_RLOCK();
 	TAILQ_FOREACH(ifp, &ifnet, if_link) {
 		if (ifp->if_timer == 0 || --ifp->if_timer)
 			continue;
 		if (ifp->if_watchdog)
 			(*ifp->if_watchdog)(ifp);
 	}
 	IFNET_RUNLOCK();
 	splx(s);
 	timeout(if_slowtimo, (void *)0, hz / IFNET_SLOWHZ);
 }
 
 /*
  * Map interface name to
  * interface structure pointer.
  */
 struct ifnet *
 ifunit(const char *name)
 {
 	char namebuf[IFNAMSIZ + 1];
 	struct ifnet *ifp;
 	dev_t dev;
 
 	/*
 	 * Now search all the interfaces for this name/number
 	 */
 
 	/*
 	 * XXX
 	 * Devices should really be known as /dev/fooN, not /dev/net/fooN.
 	 */
 	snprintf(namebuf, IFNAMSIZ, "%s/%s", net_cdevsw.d_name, name);
 	IFNET_RLOCK();
 	TAILQ_FOREACH(ifp, &ifnet, if_link) {
 		dev = ifdev_byindex(ifp->if_index);
 		if (strcmp(devtoname(dev), namebuf) == 0)
 			break;
 		if (dev_named(dev, name))
 			break;
 	}
 	IFNET_RUNLOCK();
 	return (ifp);
 }
 
 /*
  * Map interface name in a sockaddr_dl to
  * interface structure pointer.
  */
 struct ifnet *
 if_withname(sa)
 	struct sockaddr *sa;
 {
 	char ifname[IFNAMSIZ+1];
 	struct sockaddr_dl *sdl = (struct sockaddr_dl *)sa;
 
 	if ( (sa->sa_family != AF_LINK) || (sdl->sdl_nlen == 0) ||
 	     (sdl->sdl_nlen > IFNAMSIZ) )
 		return NULL;
 
 	/*
 	 * ifunit wants a NUL-terminated string.  It may not be NUL-terminated
 	 * in the sockaddr, and we don't want to change the caller's sockaddr
 	 * (there might not be room to add the trailing NUL anyway), so we make
 	 * a local copy that we know we can NUL-terminate safely.
 	 */
 
 	bcopy(sdl->sdl_data, ifname, sdl->sdl_nlen);
 	ifname[sdl->sdl_nlen] = '\0';
 	return ifunit(ifname);
 }
 
 /*
  * Hardware specific interface ioctls.
  */
 static int
 ifhwioctl(u_long cmd, struct ifnet *ifp, caddr_t data, struct thread *td)
 {
 	struct ifreq *ifr;
 	struct ifstat *ifs;
 	int error = 0;
 	int new_flags;
 
 	ifr = (struct ifreq *)data;
 	switch (cmd) {
 	case SIOCGIFINDEX:
 		ifr->ifr_index = ifp->if_index;
 		break;
 
 	case SIOCGIFFLAGS:
 		ifr->ifr_flags = ifp->if_flags & 0xffff;
 		ifr->ifr_flagshigh = ifp->if_flags >> 16;
 		break;
 
 	case SIOCGIFCAP:
 		ifr->ifr_reqcap = ifp->if_capabilities;
 		ifr->ifr_curcap = ifp->if_capenable;
 		break;
 
 #ifdef MAC
 	case SIOCGIFMAC:
 		error = mac_ioctl_ifnet_get(td->td_ucred, ifr, ifp);
 		break;
 #endif
 
 	case SIOCGIFMETRIC:
 		ifr->ifr_metric = ifp->if_metric;
 		break;
 
 	case SIOCGIFMTU:
 		ifr->ifr_mtu = ifp->if_mtu;
 		break;
 
 	case SIOCGIFPHYS:
 		ifr->ifr_phys = ifp->if_physical;
 		break;
 
 	case SIOCSIFFLAGS:
 		error = suser(td);
 		if (error)
 			return (error);
 		new_flags = (ifr->ifr_flags & 0xffff) |
 		    (ifr->ifr_flagshigh << 16);
 		if (ifp->if_flags & IFF_SMART) {
 			/* Smart drivers twiddle their own routes */
 		} else if (ifp->if_flags & IFF_UP &&
 		    (new_flags & IFF_UP) == 0) {
 			int s = splimp();
 			if_down(ifp);
 			splx(s);
 		} else if (new_flags & IFF_UP &&
 		    (ifp->if_flags & IFF_UP) == 0) {
 			int s = splimp();
 			if_up(ifp);
 			splx(s);
 		}
 		ifp->if_flags = (ifp->if_flags & IFF_CANTCHANGE) |
 			(new_flags &~ IFF_CANTCHANGE);
 		if (new_flags & IFF_PPROMISC) {
 			/* Permanently promiscuous mode requested */
 			ifp->if_flags |= IFF_PROMISC;
 		} else if (ifp->if_pcount == 0) {
 			ifp->if_flags &= ~IFF_PROMISC;
 		}
 		if (ifp->if_ioctl)
 			(void) (*ifp->if_ioctl)(ifp, cmd, data);
 		getmicrotime(&ifp->if_lastchange);
 		break;
 
 	case SIOCSIFCAP:
 		error = suser(td);
 		if (error)
 			return (error);
 		if (ifr->ifr_reqcap & ~ifp->if_capabilities)
 			return (EINVAL);
 		(void) (*ifp->if_ioctl)(ifp, cmd, data);
 		break;
 
 #ifdef MAC
 	case SIOCSIFMAC:
 		error = mac_ioctl_ifnet_set(td->td_ucred, ifr, ifp);
 		break;
 #endif
 
 	case SIOCSIFMETRIC:
 		error = suser(td);
 		if (error)
 			return (error);
 		ifp->if_metric = ifr->ifr_metric;
 		getmicrotime(&ifp->if_lastchange);
 		break;
 
 	case SIOCSIFPHYS:
 		error = suser(td);
 		if (error)
 			return error;
 		if (!ifp->if_ioctl)
 		        return EOPNOTSUPP;
 		error = (*ifp->if_ioctl)(ifp, cmd, data);
 		if (error == 0)
 			getmicrotime(&ifp->if_lastchange);
 		return(error);
 
 	case SIOCSIFMTU:
 	{
 		u_long oldmtu = ifp->if_mtu;
 
 		error = suser(td);
 		if (error)
 			return (error);
 		if (ifr->ifr_mtu < IF_MINMTU || ifr->ifr_mtu > IF_MAXMTU)
 			return (EINVAL);
 		if (ifp->if_ioctl == NULL)
 			return (EOPNOTSUPP);
 		error = (*ifp->if_ioctl)(ifp, cmd, data);
 		if (error == 0) {
 			getmicrotime(&ifp->if_lastchange);
 			rt_ifmsg(ifp);
 		}
 		/*
 		 * If the link MTU changed, do network layer specific procedure.
 		 */
 		if (ifp->if_mtu != oldmtu) {
 #ifdef INET6
 			nd6_setmtu(ifp);
 #endif
 		}
 		break;
 	}
 
 	case SIOCADDMULTI:
 	case SIOCDELMULTI:
 		error = suser(td);
 		if (error)
 			return (error);
 
 		/* Don't allow group membership on non-multicast interfaces. */
 		if ((ifp->if_flags & IFF_MULTICAST) == 0)
 			return (EOPNOTSUPP);
 
 		/* Don't let users screw up protocols' entries. */
 		if (ifr->ifr_addr.sa_family != AF_LINK)
 			return (EINVAL);
 
 		if (cmd == SIOCADDMULTI) {
 			struct ifmultiaddr *ifma;
 			error = if_addmulti(ifp, &ifr->ifr_addr, &ifma);
 		} else {
 			error = if_delmulti(ifp, &ifr->ifr_addr);
 		}
 		if (error == 0)
 			getmicrotime(&ifp->if_lastchange);
 		break;
 
 	case SIOCSIFPHYADDR:
 	case SIOCDIFPHYADDR:
 #ifdef INET6
 	case SIOCSIFPHYADDR_IN6:
 #endif
 	case SIOCSLIFPHYADDR:
         case SIOCSIFMEDIA:
 	case SIOCSIFGENERIC:
 		error = suser(td);
 		if (error)
 			return (error);
 		if (ifp->if_ioctl == NULL)
 			return (EOPNOTSUPP);
 		error = (*ifp->if_ioctl)(ifp, cmd, data);
 		if (error == 0)
 			getmicrotime(&ifp->if_lastchange);
 		break;
 
 	case SIOCGIFSTATUS:
 		ifs = (struct ifstat *)data;
 		ifs->ascii[0] = '\0';
 		
 	case SIOCGIFPSRCADDR:
 	case SIOCGIFPDSTADDR:
 	case SIOCGLIFPHYADDR:
 	case SIOCGIFMEDIA:
 	case SIOCGIFGENERIC:
 		if (ifp->if_ioctl == 0)
 			return (EOPNOTSUPP);
 		error = (*ifp->if_ioctl)(ifp, cmd, data);
 		break;
 
 	case SIOCSIFLLADDR:
 		error = suser(td);
 		if (error)
 			return (error);
 		error = if_setlladdr(ifp,
 		    ifr->ifr_addr.sa_data, ifr->ifr_addr.sa_len);
 		break;
 
 	default:
 		error = ENOIOCTL;
 		break;
 	}
 	return (error);
 }
 
 /*
  * Interface ioctls.
  */
 int
 ifioctl(so, cmd, data, td)
 	struct socket *so;
 	u_long cmd;
 	caddr_t data;
 	struct thread *td;
 {
 	struct ifnet *ifp;
 	struct ifreq *ifr;
 	int error;
 	int oif_flags;
 
 	switch (cmd) {
 	case SIOCGIFCONF:
 	case OSIOCGIFCONF:
 		return (ifconf(cmd, data));
 	}
 	ifr = (struct ifreq *)data;
 
 	switch (cmd) {
 	case SIOCIFCREATE:
 	case SIOCIFDESTROY:
 		if ((error = suser(td)) != 0)
 			return (error);
 		return ((cmd == SIOCIFCREATE) ?
 			if_clone_create(ifr->ifr_name, sizeof(ifr->ifr_name)) :
 			if_clone_destroy(ifr->ifr_name));
 	
 	case SIOCIFGCLONERS:
 		return (if_clone_list((struct if_clonereq *)data));
 	}
 
 	ifp = ifunit(ifr->ifr_name);
 	if (ifp == 0)
 		return (ENXIO);
 
 	error = ifhwioctl(cmd, ifp, data, td);
 	if (error != ENOIOCTL)
 		return (error);
 
 	oif_flags = ifp->if_flags;
 	if (so->so_proto == 0)
 		return (EOPNOTSUPP);
 #ifndef COMPAT_43
 	error = ((*so->so_proto->pr_usrreqs->pru_control)(so, cmd,
 								 data,
 								 ifp, td));
 #else
 	{
 		int ocmd = cmd;
 
 		switch (cmd) {
 
 		case SIOCSIFDSTADDR:
 		case SIOCSIFADDR:
 		case SIOCSIFBRDADDR:
 		case SIOCSIFNETMASK:
 #if BYTE_ORDER != BIG_ENDIAN
 			if (ifr->ifr_addr.sa_family == 0 &&
 			    ifr->ifr_addr.sa_len < 16) {
 				ifr->ifr_addr.sa_family = ifr->ifr_addr.sa_len;
 				ifr->ifr_addr.sa_len = 16;
 			}
 #else
 			if (ifr->ifr_addr.sa_len == 0)
 				ifr->ifr_addr.sa_len = 16;
 #endif
 			break;
 
 		case OSIOCGIFADDR:
 			cmd = SIOCGIFADDR;
 			break;
 
 		case OSIOCGIFDSTADDR:
 			cmd = SIOCGIFDSTADDR;
 			break;
 
 		case OSIOCGIFBRDADDR:
 			cmd = SIOCGIFBRDADDR;
 			break;
 
 		case OSIOCGIFNETMASK:
 			cmd = SIOCGIFNETMASK;
 		}
 		error =  ((*so->so_proto->pr_usrreqs->pru_control)(so,
 								   cmd,
 								   data,
 								   ifp, td));
 		switch (ocmd) {
 
 		case OSIOCGIFADDR:
 		case OSIOCGIFDSTADDR:
 		case OSIOCGIFBRDADDR:
 		case OSIOCGIFNETMASK:
 			*(u_short *)&ifr->ifr_addr = ifr->ifr_addr.sa_family;
 
 		}
 	}
 #endif /* COMPAT_43 */
 
 	if ((oif_flags ^ ifp->if_flags) & IFF_UP) {
 #ifdef INET6
 		DELAY(100);/* XXX: temporary workaround for fxp issue*/
 		if (ifp->if_flags & IFF_UP) {
 			int s = splimp();
 			in6_if_up(ifp);
 			splx(s);
 		}
 #endif
 	}
 	return (error);
 }
 
 /*
  * Set/clear promiscuous mode on interface ifp based on the truth value
  * of pswitch.  The calls are reference counted so that only the first
  * "on" request actually has an effect, as does the final "off" request.
  * Results are undefined if the "off" and "on" requests are not matched.
  */
 int
 ifpromisc(ifp, pswitch)
 	struct ifnet *ifp;
 	int pswitch;
 {
 	struct ifreq ifr;
 	int error;
 	int oldflags, oldpcount;
 
 	oldpcount = ifp->if_pcount;
 	oldflags = ifp->if_flags;
 	if (ifp->if_flags & IFF_PPROMISC) {
 		/* Do nothing if device is in permanently promiscuous mode */
 		ifp->if_pcount += pswitch ? 1 : -1;
 		return (0);
 	}
 	if (pswitch) {
 		/*
 		 * If the device is not configured up, we cannot put it in
 		 * promiscuous mode.
 		 */
 		if ((ifp->if_flags & IFF_UP) == 0)
 			return (ENETDOWN);
 		if (ifp->if_pcount++ != 0)
 			return (0);
 		ifp->if_flags |= IFF_PROMISC;
 	} else {
 		if (--ifp->if_pcount > 0)
 			return (0);
 		ifp->if_flags &= ~IFF_PROMISC;
 	}
 	ifr.ifr_flags = ifp->if_flags & 0xffff;
 	ifr.ifr_flagshigh = ifp->if_flags >> 16;
 	error = (*ifp->if_ioctl)(ifp, SIOCSIFFLAGS, (caddr_t)&ifr);
 	if (error == 0) {
 		log(LOG_INFO, "%s%d: promiscuous mode %s\n",
 		    ifp->if_name, ifp->if_unit,
 		    (ifp->if_flags & IFF_PROMISC) ? "enabled" : "disabled");
 		rt_ifmsg(ifp);
 	} else {
 		ifp->if_pcount = oldpcount;
 		ifp->if_flags = oldflags;
 	}
 	return error;
 }
 
 /*
  * Return interface configuration
  * of system.  List may be used
  * in later ioctl's (above) to get
  * other information.
  */
 /*ARGSUSED*/
 static int
 ifconf(cmd, data)
 	u_long cmd;
 	caddr_t data;
 {
 	struct ifconf *ifc = (struct ifconf *)data;
 	struct ifnet *ifp;
 	struct ifaddr *ifa;
 	struct ifreq ifr, *ifrp;
 	int space = ifc->ifc_len, error = 0;
 
 	ifrp = ifc->ifc_req;
 	IFNET_RLOCK();		/* could sleep XXX */
 	TAILQ_FOREACH(ifp, &ifnet, if_link) {
 		char workbuf[64];
 		int ifnlen, addrs;
 
 		if (space < sizeof(ifr))
 			break;
 		ifnlen = snprintf(workbuf, sizeof(workbuf),
 		    "%s%d", ifp->if_name, ifp->if_unit);
 		if(ifnlen + 1 > sizeof ifr.ifr_name) {
 			error = ENAMETOOLONG;
 			break;
 		} else {
 			strcpy(ifr.ifr_name, workbuf);
 		}
 
 		addrs = 0;
 		TAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) {
 			struct sockaddr *sa = ifa->ifa_addr;
 
 			if (space < sizeof(ifr))
 				break;
 			if (jailed(curthread->td_ucred) &&
 			    prison_if(curthread->td_ucred, sa))
 				continue;
 			addrs++;
 #ifdef COMPAT_43
 			if (cmd == OSIOCGIFCONF) {
 				struct osockaddr *osa =
 					 (struct osockaddr *)&ifr.ifr_addr;
 				ifr.ifr_addr = *sa;
 				osa->sa_family = sa->sa_family;
 				error = copyout((caddr_t)&ifr, (caddr_t)ifrp,
 						sizeof (ifr));
 				ifrp++;
 			} else
 #endif
 			if (sa->sa_len <= sizeof(*sa)) {
 				ifr.ifr_addr = *sa;
 				error = copyout((caddr_t)&ifr, (caddr_t)ifrp,
 						sizeof (ifr));
 				ifrp++;
 			} else {
 				if (space < sizeof (ifr) + sa->sa_len -
 					    sizeof(*sa))
 					break;
 				space -= sa->sa_len - sizeof(*sa);
 				error = copyout((caddr_t)&ifr, (caddr_t)ifrp,
 						sizeof (ifr.ifr_name));
 				if (error == 0)
 				    error = copyout((caddr_t)sa,
 				      (caddr_t)&ifrp->ifr_addr, sa->sa_len);
 				ifrp = (struct ifreq *)
 					(sa->sa_len + (caddr_t)&ifrp->ifr_addr);
 			}
 			if (error)
 				break;
 			space -= sizeof (ifr);
 		}
 		if (error)
 			break;
 		if (!addrs) {
 			bzero((caddr_t)&ifr.ifr_addr, sizeof(ifr.ifr_addr));
 			error = copyout((caddr_t)&ifr, (caddr_t)ifrp,
 			    sizeof (ifr));
 			if (error)
 				break;
 			space -= sizeof (ifr);
 			ifrp++;
 		}
 	}
 	IFNET_RUNLOCK();
 	ifc->ifc_len -= space;
 	return (error);
 }
 
 /*
  * Just like if_promisc(), but for all-multicast-reception mode.
  */
 int
 if_allmulti(ifp, onswitch)
 	struct ifnet *ifp;
 	int onswitch;
 {
 	int error = 0;
 	int s = splimp();
 	struct ifreq ifr;
 
 	if (onswitch) {
 		if (ifp->if_amcount++ == 0) {
 			ifp->if_flags |= IFF_ALLMULTI;
 			ifr.ifr_flags = ifp->if_flags & 0xffff;
 			ifr.ifr_flagshigh = ifp->if_flags >> 16;
 			error = ifp->if_ioctl(ifp, SIOCSIFFLAGS, (caddr_t)&ifr);
 		}
 	} else {
 		if (ifp->if_amcount > 1) {
 			ifp->if_amcount--;
 		} else {
 			ifp->if_amcount = 0;
 			ifp->if_flags &= ~IFF_ALLMULTI;
 			ifr.ifr_flags = ifp->if_flags & 0xffff;;
 			ifr.ifr_flagshigh = ifp->if_flags >> 16;
 			error = ifp->if_ioctl(ifp, SIOCSIFFLAGS, (caddr_t)&ifr);
 		}
 	}
 	splx(s);
 
 	if (error == 0)
 		rt_ifmsg(ifp);
 	return error;
 }
 
 /*
  * Add a multicast listenership to the interface in question.
  * The link layer provides a routine which converts
  */
 int
 if_addmulti(ifp, sa, retifma)
 	struct ifnet *ifp;	/* interface to manipulate */
 	struct sockaddr *sa;	/* address to add */
 	struct ifmultiaddr **retifma;
 {
 	struct sockaddr *llsa, *dupsa;
 	int error, s;
 	struct ifmultiaddr *ifma;
 
 	/*
 	 * If the matching multicast address already exists
 	 * then don't add a new one, just add a reference
 	 */
 	TAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) {
 		if (equal(sa, ifma->ifma_addr)) {
 			ifma->ifma_refcount++;
 			if (retifma)
 				*retifma = ifma;
 			return 0;
 		}
 	}
 
 	/*
 	 * Give the link layer a chance to accept/reject it, and also
 	 * find out which AF_LINK address this maps to, if it isn't one
 	 * already.
 	 */
 	if (ifp->if_resolvemulti) {
 		error = ifp->if_resolvemulti(ifp, &llsa, sa);
 		if (error) return error;
 	} else {
 		llsa = 0;
 	}
 
 	MALLOC(ifma, struct ifmultiaddr *, sizeof *ifma, M_IFMADDR, M_WAITOK);
 	MALLOC(dupsa, struct sockaddr *, sa->sa_len, M_IFMADDR, M_WAITOK);
 	bcopy(sa, dupsa, sa->sa_len);
 
 	ifma->ifma_addr = dupsa;
 	ifma->ifma_lladdr = llsa;
 	ifma->ifma_ifp = ifp;
 	ifma->ifma_refcount = 1;
 	ifma->ifma_protospec = 0;
 	rt_newmaddrmsg(RTM_NEWMADDR, ifma);
 
 	/*
 	 * Some network interfaces can scan the address list at
 	 * interrupt time; lock them out.
 	 */
 	s = splimp();
 	TAILQ_INSERT_HEAD(&ifp->if_multiaddrs, ifma, ifma_link);
 	splx(s);
 	if (retifma != NULL)
 		*retifma = ifma;
 
 	if (llsa != 0) {
 		TAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) {
 			if (equal(ifma->ifma_addr, llsa))
 				break;
 		}
 		if (ifma) {
 			ifma->ifma_refcount++;
 		} else {
 			MALLOC(ifma, struct ifmultiaddr *, sizeof *ifma,
 			       M_IFMADDR, M_WAITOK);
 			MALLOC(dupsa, struct sockaddr *, llsa->sa_len,
 			       M_IFMADDR, M_WAITOK);
 			bcopy(llsa, dupsa, llsa->sa_len);
 			ifma->ifma_addr = dupsa;
 			ifma->ifma_lladdr = NULL;
 			ifma->ifma_ifp = ifp;
 			ifma->ifma_refcount = 1;
 			ifma->ifma_protospec = 0;
 			s = splimp();
 			TAILQ_INSERT_HEAD(&ifp->if_multiaddrs, ifma, ifma_link);
 			splx(s);
 		}
 	}
 	/*
 	 * We are certain we have added something, so call down to the
 	 * interface to let them know about it.
 	 */
 	s = splimp();
 	ifp->if_ioctl(ifp, SIOCADDMULTI, 0);
 	splx(s);
 
 	return 0;
 }
 
 /*
  * Remove a reference to a multicast address on this interface.  Yell
  * if the request does not match an existing membership.
  */
 int
 if_delmulti(ifp, sa)
 	struct ifnet *ifp;
 	struct sockaddr *sa;
 {
 	struct ifmultiaddr *ifma;
 	int s;
 
 	TAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link)
 		if (equal(sa, ifma->ifma_addr))
 			break;
 	if (ifma == 0)
 		return ENOENT;
 
 	if (ifma->ifma_refcount > 1) {
 		ifma->ifma_refcount--;
 		return 0;
 	}
 
 	rt_newmaddrmsg(RTM_DELMADDR, ifma);
 	sa = ifma->ifma_lladdr;
 	s = splimp();
 	TAILQ_REMOVE(&ifp->if_multiaddrs, ifma, ifma_link);
 	/*
 	 * Make sure the interface driver is notified
 	 * in the case of a link layer mcast group being left.
 	 */
 	if (ifma->ifma_addr->sa_family == AF_LINK && sa == 0)
 		ifp->if_ioctl(ifp, SIOCDELMULTI, 0);
 	splx(s);
 	free(ifma->ifma_addr, M_IFMADDR);
 	free(ifma, M_IFMADDR);
 	if (sa == 0)
 		return 0;
 
 	/*
 	 * Now look for the link-layer address which corresponds to
 	 * this network address.  It had been squirreled away in
 	 * ifma->ifma_lladdr for this purpose (so we don't have
 	 * to call ifp->if_resolvemulti() again), and we saved that
 	 * value in sa above.  If some nasty deleted the
 	 * link-layer address out from underneath us, we can deal because
 	 * the address we stored was is not the same as the one which was
 	 * in the record for the link-layer address.  (So we don't complain
 	 * in that case.)
 	 */
 	TAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link)
 		if (equal(sa, ifma->ifma_addr))
 			break;
 	if (ifma == 0)
 		return 0;
 
 	if (ifma->ifma_refcount > 1) {
 		ifma->ifma_refcount--;
 		return 0;
 	}
 
 	s = splimp();
 	TAILQ_REMOVE(&ifp->if_multiaddrs, ifma, ifma_link);
 	ifp->if_ioctl(ifp, SIOCDELMULTI, 0);
 	splx(s);
 	free(ifma->ifma_addr, M_IFMADDR);
 	free(sa, M_IFMADDR);
 	free(ifma, M_IFMADDR);
 
 	return 0;
 }
 
 /*
  * Set the link layer address on an interface.
  *
  * At this time we only support certain types of interfaces,
  * and we don't allow the length of the address to change.
  */
 int
 if_setlladdr(struct ifnet *ifp, const u_char *lladdr, int len)
 {
 	struct sockaddr_dl *sdl;
 	struct ifaddr *ifa;
 	struct ifreq ifr;
 
 	ifa = ifaddr_byindex(ifp->if_index);
 	if (ifa == NULL)
 		return (EINVAL);
 	sdl = (struct sockaddr_dl *)ifa->ifa_addr;
 	if (sdl == NULL)
 		return (EINVAL);
 	if (len != sdl->sdl_alen)	/* don't allow length to change */
 		return (EINVAL);
 	switch (ifp->if_type) {
 	case IFT_ETHER:			/* these types use struct arpcom */
 	case IFT_FDDI:
 	case IFT_XETHER:
 	case IFT_ISO88025:
 	case IFT_L2VLAN:
 		bcopy(lladdr, ((struct arpcom *)ifp->if_softc)->ac_enaddr, len);
 		/* FALLTHROUGH */
 	case IFT_ARCNET:
 		bcopy(lladdr, LLADDR(sdl), len);
 		break;
 	default:
 		return (ENODEV);
 	}
 	/*
 	 * If the interface is already up, we need
 	 * to re-init it in order to reprogram its
 	 * address filter.
 	 */
 	if ((ifp->if_flags & IFF_UP) != 0) {
 		ifp->if_flags &= ~IFF_UP;
 		ifr.ifr_flags = ifp->if_flags & 0xffff;
 		ifr.ifr_flagshigh = ifp->if_flags >> 16;
 		(*ifp->if_ioctl)(ifp, SIOCSIFFLAGS, (caddr_t)&ifr);
 		ifp->if_flags |= IFF_UP;
 		ifr.ifr_flags = ifp->if_flags & 0xffff;
 		ifr.ifr_flagshigh = ifp->if_flags >> 16;
 		(*ifp->if_ioctl)(ifp, SIOCSIFFLAGS, (caddr_t)&ifr);
 #ifdef INET
 		/*
 		 * Also send gratuitous ARPs to notify other nodes about
 		 * the address change.
 		 */
 		TAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) {
 			if (ifa->ifa_addr != NULL &&
 			    ifa->ifa_addr->sa_family == AF_INET)
 				arp_ifinit(ifp, ifa);
 		}
 #endif
 	}
 	return (0);
 }
 
 struct ifmultiaddr *
 ifmaof_ifpforaddr(sa, ifp)
 	struct sockaddr *sa;
 	struct ifnet *ifp;
 {
 	struct ifmultiaddr *ifma;
 	
 	TAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link)
 		if (equal(ifma->ifma_addr, sa))
 			break;
 
 	return ifma;
 }
 
 int
 if_printf(struct ifnet *ifp, const char * fmt, ...)
 {
 	va_list ap;
 	int retval;
 
 	retval = printf("%s%d: ", ifp->if_name, ifp->if_unit);
 	va_start(ap, fmt);
 	retval += vprintf(fmt, ap);
 	va_end(ap);
 	return (retval);
 }
 
 SYSCTL_NODE(_net, PF_LINK, link, CTLFLAG_RW, 0, "Link layers");
 SYSCTL_NODE(_net_link, 0, generic, CTLFLAG_RW, 0, "Generic link-management");
Index: head/sys/net/if_disc.c
===================================================================
--- head/sys/net/if_disc.c	(revision 120726)
+++ head/sys/net/if_disc.c	(revision 120727)
@@ -1,254 +1,256 @@
 /*
  * Copyright (c) 1982, 1986, 1993
  *	The Regents of the University of California.  All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 3. All advertising materials mentioning features or use of this software
  *    must display the following acknowledgement:
  *	This product includes software developed by the University of
  *	California, Berkeley and its contributors.
  * 4. Neither the name of the University nor the names of its contributors
  *    may be used to endorse or promote products derived from this software
  *    without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  *	From: @(#)if_loop.c	8.1 (Berkeley) 6/10/93
  * $FreeBSD$
  */
 
 /*
  * Discard interface driver for protocol testing and timing.
  * (Based on the loopback.)
  */
 
 #include <sys/param.h>
 #include <sys/systm.h>
 #include <sys/kernel.h>
 #include <sys/malloc.h>
 #include <sys/module.h>
 #include <sys/mbuf.h>
 #include <sys/socket.h>
 #include <sys/sockio.h>
 
 #include <net/if.h>
 #include <net/if_types.h>
 #include <net/route.h>
 #include <net/bpf.h>
 
 #include "opt_inet.h"
 #include "opt_inet6.h"
 
 #ifdef TINY_DSMTU
 #define	DSMTU	(1024+512)
 #else
 #define DSMTU	65532
 #endif
 
 #define DISCNAME	"disc"
 
 struct disc_softc {
 	struct ifnet sc_if;	/* must be first */
 	LIST_ENTRY(disc_softc) sc_list;
 };
 
 static int	discoutput(struct ifnet *, struct mbuf *,
 		    struct sockaddr *, struct rtentry *);
 static void	discrtrequest(int, struct rtentry *, struct rt_addrinfo *);
 static int	discioctl(struct ifnet *, u_long, caddr_t);
 static int	disc_clone_create(struct if_clone *, int);
 static void	disc_clone_destroy(struct ifnet *);
 
 static MALLOC_DEFINE(M_DISC, DISCNAME, "Discard interface");
 static LIST_HEAD(, disc_softc) disc_softc_list;
 static struct if_clone disc_cloner = IF_CLONE_INITIALIZER(DISCNAME,
     disc_clone_create, disc_clone_destroy, 0, IF_MAXUNIT);
 
 static int
 disc_clone_create(struct if_clone *ifc, int unit)
 {
 	struct ifnet		*ifp;
 	struct disc_softc	*sc;
 
 	sc = malloc(sizeof(struct disc_softc), M_DISC, M_WAITOK);
 	bzero(sc, sizeof(struct disc_softc));
 
 	ifp = &sc->sc_if;
 
 	ifp->if_softc = sc;
 	ifp->if_name = DISCNAME;
 	ifp->if_unit = unit;
 	ifp->if_mtu = DSMTU;
 	ifp->if_flags = IFF_LOOPBACK | IFF_MULTICAST;
 	ifp->if_ioctl = discioctl;
 	ifp->if_output = discoutput;
 	ifp->if_type = IFT_LOOP;
 	ifp->if_hdrlen = 0;
 	ifp->if_addrlen = 0;
 	ifp->if_snd.ifq_maxlen = 20;
 	if_attach(ifp);
 	bpfattach(ifp, DLT_NULL, sizeof(u_int));
 	LIST_INSERT_HEAD(&disc_softc_list, sc, sc_list);
 
 	return (0);
 }
 
 static void
 disc_clone_destroy(struct ifnet *ifp)
 {
 	struct disc_softc	*sc;
 
 	sc = ifp->if_softc;
 
 	LIST_REMOVE(sc, sc_list);
 	bpfdetach(ifp);
 	if_detach(ifp);
 
 	free(sc, M_DISC);
 }
 
 static int
 disc_modevent(module_t mod, int type, void *data) 
 { 
 	switch (type) { 
 	case MOD_LOAD: 
 		LIST_INIT(&disc_softc_list);
 		if_clone_attach(&disc_cloner);
 		break; 
 	case MOD_UNLOAD: 
 		if_clone_detach(&disc_cloner);
 
 		while (!LIST_EMPTY(&disc_softc_list))
 			disc_clone_destroy(
 			    &LIST_FIRST(&disc_softc_list)->sc_if);
 		break;
 	} 
 	return 0; 
 } 
 
 static moduledata_t disc_mod = { 
 	"if_disc", 
 	disc_modevent, 
 	NULL
 }; 
 
 DECLARE_MODULE(if_disc, disc_mod, SI_SUB_PSEUDO, SI_ORDER_ANY);
 
 static int
 discoutput(struct ifnet *ifp, struct mbuf *m, struct sockaddr *dst,
     struct rtentry *rt)
 {
 	M_ASSERTPKTHDR(m);
 	/* BPF write needs to be handled specially */
 	if (dst->sa_family == AF_UNSPEC) {
 		dst->sa_family = *(mtod(m, int *));
 		m->m_len -= sizeof(int);
 		m->m_pkthdr.len -= sizeof(int);
 		m->m_data += sizeof(int);
 	}
 
 	if (ifp->if_bpf) {
 		/*
 		 * We need to prepend the address family as
 		 * a four byte field.  Cons up a dummy header
 		 * to pacify bpf.  This is safe because bpf
 		 * will only read from the mbuf (i.e., it won't
 		 * try to free it or keep a pointer a to it).
 		 */
 		struct mbuf m0;
 		u_int af = dst->sa_family;
 
 		m0.m_next = m;
 		m0.m_len = 4;
 		m0.m_data = (char *)&af;
 
 		BPF_MTAP(ifp, &m0);
 	}
 	m->m_pkthdr.rcvif = ifp;
 
 	ifp->if_opackets++;
 	ifp->if_obytes += m->m_pkthdr.len;
 
 	m_freem(m);
 	return 0;
 }
 
 /* ARGSUSED */
 static void
 discrtrequest(int cmd, struct rtentry *rt, struct rt_addrinfo *info)
 {
+	RT_LOCK_ASSERT(rt);
+
 	if (rt)
 		rt->rt_rmx.rmx_mtu = DSMTU;
 }
 
 /*
  * Process an ioctl request.
  */
 static int
 discioctl(struct ifnet *ifp, u_long cmd, caddr_t data)
 {
 	struct ifaddr *ifa;
 	struct ifreq *ifr = (struct ifreq *)data;
 	int error = 0;
 
 	switch (cmd) {
 
 	case SIOCSIFADDR:
 		ifp->if_flags |= IFF_UP;
 		ifa = (struct ifaddr *)data;
 		if (ifa != 0)
 			ifa->ifa_rtrequest = discrtrequest;
 		/*
 		 * Everything else is done at a higher level.
 		 */
 		break;
 
 	case SIOCADDMULTI:
 	case SIOCDELMULTI:
 		if (ifr == 0) {
 			error = EAFNOSUPPORT;		/* XXX */
 			break;
 		}
 		switch (ifr->ifr_addr.sa_family) {
 
 #ifdef INET
 		case AF_INET:
 			break;
 #endif
 #ifdef INET6
 		case AF_INET6:
 			break;
 #endif
 
 		default:
 			error = EAFNOSUPPORT;
 			break;
 		}
 		break;
 
 	case SIOCSIFMTU:
 		ifp->if_mtu = ifr->ifr_mtu;
 		break;
 
 	default:
 		error = EINVAL;
 	}
 	return (error);
 }
Index: head/sys/net/if_faith.c
===================================================================
--- head/sys/net/if_faith.c	(revision 120726)
+++ head/sys/net/if_faith.c	(revision 120727)
@@ -1,377 +1,379 @@
 /*	$KAME: if_faith.c,v 1.23 2001/12/17 13:55:29 sumikawa Exp $	*/
 
 /*
  * Copyright (c) 1982, 1986, 1993
  *	The Regents of the University of California.  All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 3. All advertising materials mentioning features or use of this software
  *    must display the following acknowledgement:
  *	This product includes software developed by the University of
  *	California, Berkeley and its contributors.
  * 4. Neither the name of the University nor the names of its contributors
  *    may be used to endorse or promote products derived from this software
  *    without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  * $FreeBSD$
  */
 /*
  * derived from
  *	@(#)if_loop.c	8.1 (Berkeley) 6/10/93
  * Id: if_loop.c,v 1.22 1996/06/19 16:24:10 wollman Exp
  */
 
 /*
  * Loopback interface driver for protocol testing and timing.
  */
 #include "opt_inet.h"
 #include "opt_inet6.h"
 
 #include <sys/param.h>
 #include <sys/systm.h>
 #include <sys/kernel.h>
 #include <sys/mbuf.h>
 #include <sys/socket.h>
 #include <sys/errno.h>
 #include <sys/sockio.h>
 #include <sys/time.h>
 #include <sys/queue.h>
 #include <sys/types.h>
 #include <sys/malloc.h>
 
 #include <net/if.h>
 #include <net/if_types.h>
 #include <net/netisr.h>
 #include <net/route.h>
 #include <net/bpf.h>
 
 #ifdef	INET
 #include <netinet/in.h>
 #include <netinet/in_systm.h>
 #include <netinet/in_var.h>
 #include <netinet/ip.h>
 #endif
 
 #ifdef INET6
 #ifndef INET
 #include <netinet/in.h>
 #endif
 #include <netinet6/in6_var.h>
 #include <netinet/ip6.h>
 #include <netinet6/ip6_var.h>
 #endif
 
 #include <net/net_osdep.h>
 
 #define FAITHNAME	"faith"
 
 struct faith_softc {
 	struct ifnet sc_if;	/* must be first */
 	LIST_ENTRY(faith_softc) sc_list;
 };
 
 static int faithioctl(struct ifnet *, u_long, caddr_t);
 int faithoutput(struct ifnet *, struct mbuf *, struct sockaddr *,
 	struct rtentry *);
 static void faithrtrequest(int, struct rtentry *, struct rt_addrinfo *);
 #ifdef INET6
 static int faithprefix(struct in6_addr *);
 #endif
 
 static int faithmodevent(module_t, int, void *);
 
 static MALLOC_DEFINE(M_FAITH, FAITHNAME, "Firewall Assisted Tunnel Interface");
 static LIST_HEAD(, faith_softc) faith_softc_list;
 
 int	faith_clone_create(struct if_clone *, int);
 void	faith_clone_destroy(struct ifnet *);
 
 struct if_clone faith_cloner = IF_CLONE_INITIALIZER(FAITHNAME,
     faith_clone_create, faith_clone_destroy, 0, IF_MAXUNIT);
 
 #define	FAITHMTU	1500
 
 static int
 faithmodevent(mod, type, data)
 	module_t mod;
 	int type;
 	void *data;
 {
 
 	switch (type) {
 	case MOD_LOAD:
 		LIST_INIT(&faith_softc_list);
 		if_clone_attach(&faith_cloner);
 
 #ifdef INET6
 		faithprefix_p = faithprefix;
 #endif
 
 		break;
 	case MOD_UNLOAD:
 #ifdef INET6
 		faithprefix_p = NULL;
 #endif
 
 		if_clone_detach(&faith_cloner);
 
 		while (!LIST_EMPTY(&faith_softc_list))
 			faith_clone_destroy(
 			    &LIST_FIRST(&faith_softc_list)->sc_if);
 
 		break;
 	}
 	return 0;
 }
 
 static moduledata_t faith_mod = {
 	"if_faith",
 	faithmodevent,
 	0
 };
 
 DECLARE_MODULE(if_faith, faith_mod, SI_SUB_PSEUDO, SI_ORDER_ANY);
 MODULE_VERSION(if_faith, 1);
 
 int
 faith_clone_create(ifc, unit)
 	struct if_clone *ifc;
 	int unit;
 {
 	struct faith_softc *sc;
 
 	sc = malloc(sizeof(struct faith_softc), M_FAITH, M_WAITOK);
 	bzero(sc, sizeof(struct faith_softc));
 
 	sc->sc_if.if_softc = sc;
 	sc->sc_if.if_name = FAITHNAME;
 	sc->sc_if.if_unit = unit;
 
 	sc->sc_if.if_mtu = FAITHMTU;
 	/* Change to BROADCAST experimentaly to announce its prefix. */
 	sc->sc_if.if_flags = /* IFF_LOOPBACK */ IFF_BROADCAST | IFF_MULTICAST;
 	sc->sc_if.if_ioctl = faithioctl;
 	sc->sc_if.if_output = faithoutput;
 	sc->sc_if.if_type = IFT_FAITH;
 	sc->sc_if.if_hdrlen = 0;
 	sc->sc_if.if_addrlen = 0;
 	sc->sc_if.if_snd.ifq_maxlen = ifqmaxlen;
 	if_attach(&sc->sc_if);
 	bpfattach(&sc->sc_if, DLT_NULL, sizeof(u_int));
 	LIST_INSERT_HEAD(&faith_softc_list, sc, sc_list);
 	return (0);
 }
 
 void
 faith_clone_destroy(ifp)
 	struct ifnet *ifp;
 {
 	struct faith_softc *sc = (void *) ifp;
 
 	LIST_REMOVE(sc, sc_list);
 	bpfdetach(ifp);
 	if_detach(ifp);
 
 	free(sc, M_FAITH);
 }
 
 int
 faithoutput(ifp, m, dst, rt)
 	struct ifnet *ifp;
 	struct mbuf *m;
 	struct sockaddr *dst;
 	struct rtentry *rt;
 {
 	int isr;
 
 	M_ASSERTPKTHDR(m);
 
 	/* BPF write needs to be handled specially */
 	if (dst->sa_family == AF_UNSPEC) {
 		dst->sa_family = *(mtod(m, int *));
 		m->m_len -= sizeof(int);
 		m->m_pkthdr.len -= sizeof(int);
 		m->m_data += sizeof(int);
 	}
 
 	if (ifp->if_bpf) {
 		/*
 		 * We need to prepend the address family as
 		 * a four byte field.  Cons up a faith header
 		 * to pacify bpf.  This is safe because bpf
 		 * will only read from the mbuf (i.e., it won't
 		 * try to free it or keep a pointer a to it).
 		 */
 		struct mbuf m0;
 		u_int32_t af = dst->sa_family;
 
 		m0.m_next = m;
 		m0.m_len = 4;
 		m0.m_data = (char *)&af;
 
 		BPF_MTAP(ifp, &m0);
 	}
 
 	if (rt && rt->rt_flags & (RTF_REJECT|RTF_BLACKHOLE)) {
 		m_freem(m);
 		return (rt->rt_flags & RTF_BLACKHOLE ? 0 :
 		        rt->rt_flags & RTF_HOST ? EHOSTUNREACH : ENETUNREACH);
 	}
 	ifp->if_opackets++;
 	ifp->if_obytes += m->m_pkthdr.len;
 	switch (dst->sa_family) {
 #ifdef INET
 	case AF_INET:
 		isr = NETISR_IP;
 		break;
 #endif
 #ifdef INET6
 	case AF_INET6:
 		isr = NETISR_IPV6;
 		break;
 #endif
 	default:
 		m_freem(m);
 		return EAFNOSUPPORT;
 	}
 
 	/* XXX do we need more sanity checks? */
 
 	m->m_pkthdr.rcvif = ifp;
 	ifp->if_ipackets++;
 	ifp->if_ibytes += m->m_pkthdr.len;
 	netisr_dispatch(isr, m);
 	return (0);
 }
 
 /* ARGSUSED */
 static void
 faithrtrequest(cmd, rt, info)
 	int cmd;
 	struct rtentry *rt;
 	struct rt_addrinfo *info;
 {
+	RT_LOCK_ASSERT(rt);
+
 	if (rt) {
 		rt->rt_rmx.rmx_mtu = rt->rt_ifp->if_mtu; /* for ISO */
 		/*
 		 * For optimal performance, the send and receive buffers
 		 * should be at least twice the MTU plus a little more for
 		 * overhead.
 		 */
 		rt->rt_rmx.rmx_recvpipe =
 			rt->rt_rmx.rmx_sendpipe = 3 * FAITHMTU;
 	}
 }
 
 /*
  * Process an ioctl request.
  */
 /* ARGSUSED */
 static int
 faithioctl(ifp, cmd, data)
 	struct ifnet *ifp;
 	u_long cmd;
 	caddr_t data;
 {
 	struct ifaddr *ifa;
 	struct ifreq *ifr = (struct ifreq *)data;
 	int error = 0;
 
 	switch (cmd) {
 
 	case SIOCSIFADDR:
 		ifp->if_flags |= IFF_UP | IFF_RUNNING;
 		ifa = (struct ifaddr *)data;
 		ifa->ifa_rtrequest = faithrtrequest;
 		/*
 		 * Everything else is done at a higher level.
 		 */
 		break;
 
 	case SIOCADDMULTI:
 	case SIOCDELMULTI:
 		if (ifr == 0) {
 			error = EAFNOSUPPORT;		/* XXX */
 			break;
 		}
 		switch (ifr->ifr_addr.sa_family) {
 #ifdef INET
 		case AF_INET:
 			break;
 #endif
 #ifdef INET6
 		case AF_INET6:
 			break;
 #endif
 
 		default:
 			error = EAFNOSUPPORT;
 			break;
 		}
 		break;
 
 #ifdef SIOCSIFMTU
 	case SIOCSIFMTU:
 		ifp->if_mtu = ifr->ifr_mtu;
 		break;
 #endif
 
 	case SIOCSIFFLAGS:
 		break;
 
 	default:
 		error = EINVAL;
 	}
 	return (error);
 }
 
 #ifdef INET6
 /*
  * XXX could be slow
  * XXX could be layer violation to call sys/net from sys/netinet6
  */
 static int
 faithprefix(in6)
 	struct in6_addr *in6;
 {
 	struct rtentry *rt;
 	struct sockaddr_in6 sin6;
 	int ret;
 
 	if (ip6_keepfaith == 0)
 		return 0;
 
 	bzero(&sin6, sizeof(sin6));
 	sin6.sin6_family = AF_INET6;
 	sin6.sin6_len = sizeof(struct sockaddr_in6);
 	sin6.sin6_addr = *in6;
 	rt = rtalloc1((struct sockaddr *)&sin6, 0, 0UL);
 	if (rt && rt->rt_ifp && rt->rt_ifp->if_type == IFT_FAITH &&
 	    (rt->rt_ifp->if_flags & IFF_UP) != 0)
 		ret = 1;
 	else
 		ret = 0;
 	if (rt)
-		RTFREE(rt);
+		RTFREE_LOCKED(rt);
 	return ret;
 }
 #endif
Index: head/sys/net/if_loop.c
===================================================================
--- head/sys/net/if_loop.c	(revision 120726)
+++ head/sys/net/if_loop.c	(revision 120727)
@@ -1,429 +1,431 @@
 /*
  * Copyright (c) 1982, 1986, 1993
  *	The Regents of the University of California.  All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 3. All advertising materials mentioning features or use of this software
  *    must display the following acknowledgement:
  *	This product includes software developed by the University of
  *	California, Berkeley and its contributors.
  * 4. Neither the name of the University nor the names of its contributors
  *    may be used to endorse or promote products derived from this software
  *    without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  *	@(#)if_loop.c	8.2 (Berkeley) 1/9/95
  * $FreeBSD$
  */
 
 /*
  * Loopback interface driver for protocol testing and timing.
  */
 
 #include "opt_atalk.h"
 #include "opt_inet.h"
 #include "opt_inet6.h"
 #include "opt_ipx.h"
 #include "opt_mac.h"
 
 #include <sys/param.h>
 #include <sys/systm.h>
 #include <sys/kernel.h>
 #include <sys/mac.h>
 #include <sys/malloc.h>
 #include <sys/mbuf.h>
 #include <sys/module.h>
 #include <machine/bus.h>
 #include <sys/rman.h>
 #include <sys/socket.h>
 #include <sys/sockio.h>
 #include <sys/sysctl.h>
 
 #include <net/if.h>
 #include <net/if_types.h>
 #include <net/netisr.h>
 #include <net/route.h>
 #include <net/bpf.h>
 #include <net/bpfdesc.h>
 
 #ifdef	INET
 #include <netinet/in.h>
 #include <netinet/in_var.h>
 #endif
 
 #ifdef IPX
 #include <netipx/ipx.h>
 #include <netipx/ipx_if.h>
 #endif
 
 #ifdef INET6
 #ifndef INET
 #include <netinet/in.h>
 #endif
 #include <netinet6/in6_var.h>
 #include <netinet/ip6.h>
 #endif
 
 #ifdef NETATALK
 #include <netatalk/at.h>
 #include <netatalk/at_var.h>
 #endif
 
 #ifdef TINY_LOMTU
 #define	LOMTU	(1024+512)
 #elif defined(LARGE_LOMTU)
 #define LOMTU	131072
 #else
 #define LOMTU	16384
 #endif
 
 #define LONAME	"lo"
 
 struct lo_softc {
 	struct	ifnet sc_if;		/* network-visible interface */
 	LIST_ENTRY(lo_softc) sc_next;
 };
 
 int		loioctl(struct ifnet *, u_long, caddr_t);
 static void	lortrequest(int, struct rtentry *, struct rt_addrinfo *);
 int		looutput(struct ifnet *ifp, struct mbuf *m,
 		    struct sockaddr *dst, struct rtentry *rt);
 int		lo_clone_create(struct if_clone *, int);
 void		lo_clone_destroy(struct ifnet *);
 
 struct ifnet *loif = NULL;			/* Used externally */
 
 static MALLOC_DEFINE(M_LO, LONAME, "Loopback Interface");
 
 static LIST_HEAD(lo_list, lo_softc) lo_list;
 
 struct if_clone lo_cloner = IF_CLONE_INITIALIZER(LONAME,
     lo_clone_create, lo_clone_destroy, 1, IF_MAXUNIT);
 
 void
 lo_clone_destroy(ifp)
 	struct ifnet *ifp;
 {
 	struct lo_softc *sc;
 	
 	sc = ifp->if_softc;
 
 	/* XXX: destroying lo0 will lead to panics. */
 	KASSERT(loif != ifp, ("%s: destroying lo0", __func__));
 
 	bpfdetach(ifp);
 	if_detach(ifp);
 	LIST_REMOVE(sc, sc_next);
 	free(sc, M_LO);
 }
 
 int
 lo_clone_create(ifc, unit)
 	struct if_clone *ifc;
 	int unit;
 {
 	struct lo_softc *sc;
 
 	MALLOC(sc, struct lo_softc *, sizeof(*sc), M_LO, M_WAITOK | M_ZERO);
 
 	sc->sc_if.if_name = LONAME;
 	sc->sc_if.if_unit = unit;
 	sc->sc_if.if_mtu = LOMTU;
 	sc->sc_if.if_flags = IFF_LOOPBACK | IFF_MULTICAST;
 	sc->sc_if.if_ioctl = loioctl;
 	sc->sc_if.if_output = looutput;
 	sc->sc_if.if_type = IFT_LOOP;
 	sc->sc_if.if_snd.ifq_maxlen = ifqmaxlen;
 	sc->sc_if.if_softc = sc;
 	if_attach(&sc->sc_if);
 	bpfattach(&sc->sc_if, DLT_NULL, sizeof(u_int));
 	LIST_INSERT_HEAD(&lo_list, sc, sc_next);
 	if (loif == NULL)
 		loif = &sc->sc_if;
 
 	return (0);
 }
 
 static int
 loop_modevent(module_t mod, int type, void *data) 
 { 
 	switch (type) { 
 	case MOD_LOAD: 
 		LIST_INIT(&lo_list);
 		if_clone_attach(&lo_cloner);
 		break; 
 	case MOD_UNLOAD: 
 		printf("loop module unload - not possible for this module type\n"); 
 		return EINVAL; 
 	} 
 	return 0; 
 } 
 
 static moduledata_t loop_mod = { 
 	"loop", 
 	loop_modevent, 
 	0
 }; 
 
 DECLARE_MODULE(loop, loop_mod, SI_SUB_PSEUDO, SI_ORDER_ANY);
 
 int
 looutput(ifp, m, dst, rt)
 	struct ifnet *ifp;
 	register struct mbuf *m;
 	struct sockaddr *dst;
 	register struct rtentry *rt;
 {
 #ifdef INET6
 	struct mbuf *n;
 #endif
 
 	M_ASSERTPKTHDR(m); /* check if we have the packet header */
 
 	if (rt && rt->rt_flags & (RTF_REJECT|RTF_BLACKHOLE)) {
 		m_freem(m);
 		return (rt->rt_flags & RTF_BLACKHOLE ? 0 :
 		        rt->rt_flags & RTF_HOST ? EHOSTUNREACH : ENETUNREACH);
 	}
 #ifdef INET6
 	/*
 	 * KAME requires that the packet to be contiguous on the
 	 * mbuf.  We need to make that sure.
 	 * this kind of code should be avoided.
 	 *
 	 * XXX: KAME may no longer need contiguous packets.  Once
 	 * that has been verified, the following code _should_ be
 	 * removed.
 	 */
 
 	if (m && m->m_next != NULL) {
 
 		n = m_defrag(m, M_DONTWAIT);
 
 		if (n == NULL) {
 			m_freem(m);
 			return (ENOBUFS);
 		} else {
 			m = n;
 		}
 	}
 #endif
 
 	ifp->if_opackets++;
 	ifp->if_obytes += m->m_pkthdr.len;
 #if 1	/* XXX */
 	switch (dst->sa_family) {
 	case AF_INET:
 	case AF_INET6:
 	case AF_IPX:
 	case AF_APPLETALK:
 		break;
 	default:
 		printf("looutput: af=%d unexpected\n", dst->sa_family);
 		m_freem(m);
 		return (EAFNOSUPPORT);
 	}
 #endif
 	return(if_simloop(ifp, m, dst->sa_family, 0));
 }
 
 /*
  * if_simloop()
  *
  * This function is to support software emulation of hardware loopback,
  * i.e., for interfaces with the IFF_SIMPLEX attribute. Since they can't
  * hear their own broadcasts, we create a copy of the packet that we
  * would normally receive via a hardware loopback.
  *
  * This function expects the packet to include the media header of length hlen.
  */
 
 int
 if_simloop(ifp, m, af, hlen)
 	struct ifnet *ifp;
 	struct mbuf *m;
 	int af;
 	int hlen;
 {
 	int isr;
 
 	M_ASSERTPKTHDR(m);
 	m->m_pkthdr.rcvif = ifp;
 
 	/* BPF write needs to be handled specially */
 	if (af == AF_UNSPEC) {
 		KASSERT(m->m_len >= sizeof(int), ("if_simloop: m_len"));
 		af = *(mtod(m, int *));
 		m->m_len -= sizeof(int);
 		m->m_pkthdr.len -= sizeof(int);
 		m->m_data += sizeof(int);
 	}
 
 	/* Let BPF see incoming packet */
 	if (ifp->if_bpf) {
 		struct mbuf m0, *n = m;
 
 		if (ifp->if_bpf->bif_dlt == DLT_NULL) {
 			/*
 			 * We need to prepend the address family as
 			 * a four byte field.  Cons up a dummy header
 			 * to pacify bpf.  This is safe because bpf
 			 * will only read from the mbuf (i.e., it won't
 			 * try to free it or keep a pointer a to it).
 			 */
 			m0.m_next = m;
 			m0.m_len = 4;
 			m0.m_data = (char *)&af;
 			n = &m0;
 		}
 		BPF_MTAP(ifp, n);
 	}
 
 	/* Strip away media header */
 	if (hlen > 0) {
 		m_adj(m, hlen);
 #if defined(__alpha__) || defined(__ia64__) || defined(__sparc64__)
 		/* The alpha doesn't like unaligned data.
 		 * We move data down in the first mbuf */
 		if (mtod(m, vm_offset_t) & 3) {
 			KASSERT(hlen >= 3, ("if_simloop: hlen too small"));
 			bcopy(m->m_data, 
 			    (char *)(mtod(m, vm_offset_t) 
 				- (mtod(m, vm_offset_t) & 3)),
 			    m->m_len);
 			mtod(m,vm_offset_t) -= (mtod(m, vm_offset_t) & 3);
 		}
 #endif
 	}
 
 	/* Deliver to upper layer protocol */
 	switch (af) {
 #ifdef INET
 	case AF_INET:
 		isr = NETISR_IP;
 		break;
 #endif
 #ifdef INET6
 	case AF_INET6:
 		m->m_flags |= M_LOOP;
 		isr = NETISR_IPV6;
 		break;
 #endif
 #ifdef IPX
 	case AF_IPX:
 		isr = NETISR_IPX;
 		break;
 #endif
 #ifdef NETATALK
 	case AF_APPLETALK:
 		isr = NETISR_ATALK2;
 		break;
 #endif
 	default:
 		printf("if_simloop: can't handle af=%d\n", af);
 		m_freem(m);
 		return (EAFNOSUPPORT);
 	}
 	ifp->if_ipackets++;
 	ifp->if_ibytes += m->m_pkthdr.len;
 	netisr_dispatch(isr, m);
 	return (0);
 }
 
 /* ARGSUSED */
 static void
 lortrequest(cmd, rt, info)
 	int cmd;
 	struct rtentry *rt;
 	struct rt_addrinfo *info;
 {
+	RT_LOCK_ASSERT(rt);
+
 	if (rt) {
 		rt->rt_rmx.rmx_mtu = rt->rt_ifp->if_mtu; /* for ISO */
 		/*
 		 * For optimal performance, the send and receive buffers
 		 * should be at least twice the MTU plus a little more for
 		 * overhead.
 		 */
 		rt->rt_rmx.rmx_recvpipe =
 			rt->rt_rmx.rmx_sendpipe = 3 * LOMTU;
 	}
 }
 
 /*
  * Process an ioctl request.
  */
 /* ARGSUSED */
 int
 loioctl(ifp, cmd, data)
 	register struct ifnet *ifp;
 	u_long cmd;
 	caddr_t data;
 {
 	register struct ifaddr *ifa;
 	register struct ifreq *ifr = (struct ifreq *)data;
 	register int error = 0;
 
 	switch (cmd) {
 
 	case SIOCSIFADDR:
 		ifp->if_flags |= IFF_UP | IFF_RUNNING;
 		ifa = (struct ifaddr *)data;
 		ifa->ifa_rtrequest = lortrequest;
 		/*
 		 * Everything else is done at a higher level.
 		 */
 		break;
 
 	case SIOCADDMULTI:
 	case SIOCDELMULTI:
 		if (ifr == 0) {
 			error = EAFNOSUPPORT;		/* XXX */
 			break;
 		}
 		switch (ifr->ifr_addr.sa_family) {
 
 #ifdef INET
 		case AF_INET:
 			break;
 #endif
 #ifdef INET6
 		case AF_INET6:
 			break;
 #endif
 
 		default:
 			error = EAFNOSUPPORT;
 			break;
 		}
 		break;
 
 	case SIOCSIFMTU:
 		ifp->if_mtu = ifr->ifr_mtu;
 		break;
 
 	case SIOCSIFFLAGS:
 		break;
 
 	default:
 		error = EINVAL;
 	}
 	return (error);
 }
Index: head/sys/net/if_stf.c
===================================================================
--- head/sys/net/if_stf.c	(revision 120726)
+++ head/sys/net/if_stf.c	(revision 120727)
@@ -1,776 +1,777 @@
 /*	$FreeBSD$	*/
 /*	$KAME: if_stf.c,v 1.73 2001/12/03 11:08:30 keiichi Exp $	*/
 
 /*
  * Copyright (C) 2000 WIDE Project.
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 3. Neither the name of the project nor the names of its contributors
  *    may be used to endorse or promote products derived from this software
  *    without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE PROJECT AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE PROJECT OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  */
 
 /*
  * 6to4 interface, based on RFC3056.
  *
  * 6to4 interface is NOT capable of link-layer (I mean, IPv4) multicasting.
  * There is no address mapping defined from IPv6 multicast address to IPv4
  * address.  Therefore, we do not have IFF_MULTICAST on the interface.
  *
  * Due to the lack of address mapping for link-local addresses, we cannot
  * throw packets toward link-local addresses (fe80::x).  Also, we cannot throw
  * packets to link-local multicast addresses (ff02::x).
  *
  * Here are interesting symptoms due to the lack of link-local address:
  *
  * Unicast routing exchange:
  * - RIPng: Impossible.  Uses link-local multicast packet toward ff02::9,
  *   and link-local addresses as nexthop.
  * - OSPFv6: Impossible.  OSPFv6 assumes that there's link-local address
  *   assigned to the link, and makes use of them.  Also, HELLO packets use
  *   link-local multicast addresses (ff02::5 and ff02::6).
  * - BGP4+: Maybe.  You can only use global address as nexthop, and global
  *   address as TCP endpoint address.
  *
  * Multicast routing protocols:
  * - PIM: Hello packet cannot be used to discover adjacent PIM routers.
  *   Adjacent PIM routers must be configured manually (is it really spec-wise
  *   correct thing to do?).
  *
  * ICMPv6:
  * - Redirects cannot be used due to the lack of link-local address.
  *
  * stf interface does not have, and will not need, a link-local address.  
  * It seems to have no real benefit and does not help the above symptoms much.
  * Even if we assign link-locals to interface, we cannot really
  * use link-local unicast/multicast on top of 6to4 cloud (since there's no
  * encapsulation defined for link-local address), and the above analysis does
  * not change.  RFC3056 does not mandate the assignment of link-local address
  * either.
  *
  * 6to4 interface has security issues.  Refer to
  * http://playground.iijlab.net/i-d/draft-itojun-ipv6-transition-abuse-00.txt
  * for details.  The code tries to filter out some of malicious packets.
  * Note that there is no way to be 100% secure.
  */
 
 #include "opt_inet.h"
 #include "opt_inet6.h"
 #include "opt_mac.h"
 
 #include <sys/param.h>
 #include <sys/systm.h>
 #include <sys/socket.h>
 #include <sys/sockio.h>
 #include <sys/mac.h>
 #include <sys/mbuf.h>
 #include <sys/errno.h>
 #include <sys/kernel.h>
 #include <sys/protosw.h>
 #include <sys/queue.h>
 #include <machine/cpu.h>
 
 #include <sys/malloc.h>
 
 #include <net/if.h>
 #include <net/route.h>
 #include <net/netisr.h>
 #include <net/if_types.h>
 #include <net/if_stf.h>
 
 #include <netinet/in.h>
 #include <netinet/in_systm.h>
 #include <netinet/ip.h>
 #include <netinet/ip_var.h>
 #include <netinet/in_var.h>
 
 #include <netinet/ip6.h>
 #include <netinet6/ip6_var.h>
 #include <netinet6/in6_var.h>
 #include <netinet/ip_ecn.h>
 
 #include <netinet/ip_encap.h>
 
 #include <machine/stdarg.h>
 
 #include <net/net_osdep.h>
 
 #include <net/bpf.h>
 
 #define STFNAME		"stf"
 
 #define IN6_IS_ADDR_6TO4(x)	(ntohs((x)->s6_addr16[0]) == 0x2002)
 
 /*
  * XXX: Return a pointer with 16-bit aligned.  Don't cast it to
  * struct in_addr *; use bcopy() instead.
  */
 #define GET_V4(x)	((caddr_t)(&(x)->s6_addr16[1]))
 
 struct stf_softc {
 	struct ifnet	sc_if;	   /* common area */
 	union {
 		struct route  __sc_ro4;
 		struct route_in6 __sc_ro6; /* just for safety */
 	} __sc_ro46;
 #define sc_ro	__sc_ro46.__sc_ro4
 	const struct encaptab *encap_cookie;
 	LIST_ENTRY(stf_softc) sc_list;	/* all stf's are linked */
 };
 
 static LIST_HEAD(, stf_softc) stf_softc_list;
 
 static MALLOC_DEFINE(M_STF, STFNAME, "6to4 Tunnel Interface");
 static int ip_stf_ttl = 40;
 
 extern  struct domain inetdomain;
 struct protosw in_stf_protosw =
 { SOCK_RAW,	&inetdomain,	IPPROTO_IPV6,	PR_ATOMIC|PR_ADDR,
   in_stf_input,	(pr_output_t*)rip_output, 0,	rip_ctloutput,
   0,
   0,            0,              0,              0,
   &rip_usrreqs
 };
 
 static int stfmodevent(module_t, int, void *);
 static int stf_encapcheck(const struct mbuf *, int, int, void *);
 static struct in6_ifaddr *stf_getsrcifa6(struct ifnet *);
 static int stf_output(struct ifnet *, struct mbuf *, struct sockaddr *,
 	struct rtentry *);
 static int isrfc1918addr(struct in_addr *);
 static int stf_checkaddr4(struct stf_softc *, struct in_addr *,
 	struct ifnet *);
 static int stf_checkaddr6(struct stf_softc *, struct in6_addr *,
 	struct ifnet *);
 static void stf_rtrequest(int, struct rtentry *, struct rt_addrinfo *);
 static int stf_ioctl(struct ifnet *, u_long, caddr_t);
 
 int	stf_clone_create(struct if_clone *, int);
 void	stf_clone_destroy(struct ifnet *);
 
 /* only one clone is currently allowed */
 struct if_clone stf_cloner =
     IF_CLONE_INITIALIZER(STFNAME, stf_clone_create, stf_clone_destroy, 0, 0);
 
 int
 stf_clone_create(ifc, unit)
 	struct if_clone *ifc;
 	int unit;
 {
 	struct stf_softc *sc;
 
 	sc = malloc(sizeof(struct stf_softc), M_STF, M_WAITOK | M_ZERO);
 	sc->sc_if.if_name = STFNAME;
 	sc->sc_if.if_unit = unit;
 
 	sc->encap_cookie = encap_attach_func(AF_INET, IPPROTO_IPV6,
 	    stf_encapcheck, &in_stf_protosw, sc);
 	if (sc->encap_cookie == NULL) {
 		printf("%s: attach failed\n", if_name(&sc->sc_if));
 		free(sc, M_STF);
 		return (ENOMEM);
 	}
 
 	sc->sc_if.if_mtu    = IPV6_MMTU;
 	sc->sc_if.if_ioctl  = stf_ioctl;
 	sc->sc_if.if_output = stf_output;
 	sc->sc_if.if_type   = IFT_STF;
 	sc->sc_if.if_snd.ifq_maxlen = IFQ_MAXLEN;
 	if_attach(&sc->sc_if);
 	bpfattach(&sc->sc_if, DLT_NULL, sizeof(u_int));
 	LIST_INSERT_HEAD(&stf_softc_list, sc, sc_list);
 	return (0);
 }
 
 void
 stf_clone_destroy(ifp)
 	struct ifnet *ifp;
 {
 	int err;
 	struct stf_softc *sc = (void *) ifp;
 
 	LIST_REMOVE(sc, sc_list);
 	err = encap_detach(sc->encap_cookie);
 	KASSERT(err == 0, ("Unexpected error detaching encap_cookie"));
 	bpfdetach(ifp);
 	if_detach(ifp);
 
 	free(sc, M_STF);
 }
 
 static int
 stfmodevent(mod, type, data)
 	module_t mod;
 	int type;
 	void *data;
 {
 
 	switch (type) {
 	case MOD_LOAD:
 		LIST_INIT(&stf_softc_list);
 		if_clone_attach(&stf_cloner);
 
 		break;
 	case MOD_UNLOAD:
 		if_clone_detach(&stf_cloner);
 
 		while (!LIST_EMPTY(&stf_softc_list))
 			stf_clone_destroy(&LIST_FIRST(&stf_softc_list)->sc_if);
 		break;
 	}
 
 	return (0);
 }
 
 static moduledata_t stf_mod = {
 	"if_stf",
 	stfmodevent,
 	0
 };
 
 DECLARE_MODULE(if_stf, stf_mod, SI_SUB_PSEUDO, SI_ORDER_ANY);
 
 static int
 stf_encapcheck(m, off, proto, arg)
 	const struct mbuf *m;
 	int off;
 	int proto;
 	void *arg;
 {
 	struct ip ip;
 	struct in6_ifaddr *ia6;
 	struct stf_softc *sc;
 	struct in_addr a, b, mask;
 
 	sc = (struct stf_softc *)arg;
 	if (sc == NULL)
 		return 0;
 
 	if ((sc->sc_if.if_flags & IFF_UP) == 0)
 		return 0;
 
 	/* IFF_LINK0 means "no decapsulation" */
 	if ((sc->sc_if.if_flags & IFF_LINK0) != 0)
 		return 0;
 
 	if (proto != IPPROTO_IPV6)
 		return 0;
 
 	/* LINTED const cast */
 	m_copydata((struct mbuf *)(uintptr_t)m, 0, sizeof(ip), (caddr_t)&ip);
 
 	if (ip.ip_v != 4)
 		return 0;
 
 	ia6 = stf_getsrcifa6(&sc->sc_if);
 	if (ia6 == NULL)
 		return 0;
 
 	/*
 	 * check if IPv4 dst matches the IPv4 address derived from the
 	 * local 6to4 address.
 	 * success on: dst = 10.1.1.1, ia6->ia_addr = 2002:0a01:0101:...
 	 */
 	if (bcmp(GET_V4(&ia6->ia_addr.sin6_addr), &ip.ip_dst,
 	    sizeof(ip.ip_dst)) != 0)
 		return 0;
 
 	/*
 	 * check if IPv4 src matches the IPv4 address derived from the
 	 * local 6to4 address masked by prefixmask.
 	 * success on: src = 10.1.1.1, ia6->ia_addr = 2002:0a00:.../24
 	 * fail on: src = 10.1.1.1, ia6->ia_addr = 2002:0b00:.../24
 	 */
 	bzero(&a, sizeof(a));
 	bcopy(GET_V4(&ia6->ia_addr.sin6_addr), &a, sizeof(a));
 	bcopy(GET_V4(&ia6->ia_prefixmask.sin6_addr), &mask, sizeof(mask));
 	a.s_addr &= mask.s_addr;
 	b = ip.ip_src;
 	b.s_addr &= mask.s_addr;
 	if (a.s_addr != b.s_addr)
 		return 0;
 
 	/* stf interface makes single side match only */
 	return 32;
 }
 
 static struct in6_ifaddr *
 stf_getsrcifa6(ifp)
 	struct ifnet *ifp;
 {
 	struct ifaddr *ia;
 	struct in_ifaddr *ia4;
 	struct sockaddr_in6 *sin6;
 	struct in_addr in;
 
 	for (ia = TAILQ_FIRST(&ifp->if_addrlist);
 	     ia;
 	     ia = TAILQ_NEXT(ia, ifa_list))
 	{
 		if (ia->ifa_addr == NULL)
 			continue;
 		if (ia->ifa_addr->sa_family != AF_INET6)
 			continue;
 		sin6 = (struct sockaddr_in6 *)ia->ifa_addr;
 		if (!IN6_IS_ADDR_6TO4(&sin6->sin6_addr))
 			continue;
 
 		bcopy(GET_V4(&sin6->sin6_addr), &in, sizeof(in));
 		LIST_FOREACH(ia4, INADDR_HASH(in.s_addr), ia_hash)
 			if (ia4->ia_addr.sin_addr.s_addr == in.s_addr)
 				break;
 		if (ia4 == NULL)
 			continue;
 
 		return (struct in6_ifaddr *)ia;
 	}
 
 	return NULL;
 }
 
 static int
 stf_output(ifp, m, dst, rt)
 	struct ifnet *ifp;
 	struct mbuf *m;
 	struct sockaddr *dst;
 	struct rtentry *rt;
 {
 	struct stf_softc *sc;
 	struct sockaddr_in6 *dst6;
 	struct in_addr in4;
 	caddr_t ptr;
 	struct sockaddr_in *dst4;
 	u_int8_t tos;
 	struct ip *ip;
 	struct ip6_hdr *ip6;
 	struct in6_ifaddr *ia6;
 #ifdef MAC
 	int error;
 
 	error = mac_check_ifnet_transmit(ifp, m);
 	if (error) {
 		m_freem(m);
 		return (error);
 	}
 #endif
 
 	sc = (struct stf_softc*)ifp;
 	dst6 = (struct sockaddr_in6 *)dst;
 
 	/* just in case */
 	if ((ifp->if_flags & IFF_UP) == 0) {
 		m_freem(m);
 		ifp->if_oerrors++;
 		return ENETDOWN;
 	}
 
 	/*
 	 * If we don't have an ip4 address that match my inner ip6 address,
 	 * we shouldn't generate output.  Without this check, we'll end up
 	 * using wrong IPv4 source.
 	 */
 	ia6 = stf_getsrcifa6(ifp);
 	if (ia6 == NULL) {
 		m_freem(m);
 		ifp->if_oerrors++;
 		return ENETDOWN;
 	}
 
 	if (m->m_len < sizeof(*ip6)) {
 		m = m_pullup(m, sizeof(*ip6));
 		if (!m) {
 			ifp->if_oerrors++;
 			return ENOBUFS;
 		}
 	}
 	ip6 = mtod(m, struct ip6_hdr *);
 	tos = (ntohl(ip6->ip6_flow) >> 20) & 0xff;
 
 	/*
 	 * Pickup the right outer dst addr from the list of candidates.
 	 * ip6_dst has priority as it may be able to give us shorter IPv4 hops.
 	 */
 	ptr = NULL;
 	if (IN6_IS_ADDR_6TO4(&ip6->ip6_dst))
 		ptr = GET_V4(&ip6->ip6_dst);
 	else if (IN6_IS_ADDR_6TO4(&dst6->sin6_addr))
 		ptr = GET_V4(&dst6->sin6_addr);
 	else {
 		m_freem(m);
 		ifp->if_oerrors++;
 		return ENETUNREACH;
 	}
 	bcopy(ptr, &in4, sizeof(in4));
 
 #if NBPFILTER > 0
 	if (ifp->if_bpf) {
 		/*
 		 * We need to prepend the address family as
 		 * a four byte field.  Cons up a dummy header
 		 * to pacify bpf.  This is safe because bpf
 		 * will only read from the mbuf (i.e., it won't
 		 * try to free it or keep a pointer a to it).
 		 */
 		struct mbuf m0;
 		u_int32_t af = AF_INET6;
 		
 		m0.m_next = m;
 		m0.m_len = 4;
 		m0.m_data = (char *)&af;
 		
 #ifdef HAVE_OLD_BPF
 		BPF_MTAP(ifp, &m0);
 #else
 		bpf_mtap(ifp->if_bpf, &m0);
 #endif
 	}
 #endif /*NBPFILTER > 0*/
 
 	M_PREPEND(m, sizeof(struct ip), M_DONTWAIT);
 	if (m && m->m_len < sizeof(struct ip))
 		m = m_pullup(m, sizeof(struct ip));
 	if (m == NULL) {
 		ifp->if_oerrors++;
 		return ENOBUFS;
 	}
 	ip = mtod(m, struct ip *);
 
 	bzero(ip, sizeof(*ip));
 
 	bcopy(GET_V4(&((struct sockaddr_in6 *)&ia6->ia_addr)->sin6_addr),
 	    &ip->ip_src, sizeof(ip->ip_src));
 	bcopy(&in4, &ip->ip_dst, sizeof(ip->ip_dst));
 	ip->ip_p = IPPROTO_IPV6;
 	ip->ip_ttl = ip_stf_ttl;
 	ip->ip_len = m->m_pkthdr.len;	/*host order*/
 	if (ifp->if_flags & IFF_LINK1)
 		ip_ecn_ingress(ECN_ALLOWED, &ip->ip_tos, &tos);
 	else
 		ip_ecn_ingress(ECN_NOCARE, &ip->ip_tos, &tos);
 
 	dst4 = (struct sockaddr_in *)&sc->sc_ro.ro_dst;
 	if (dst4->sin_family != AF_INET ||
 	    bcmp(&dst4->sin_addr, &ip->ip_dst, sizeof(ip->ip_dst)) != 0) {
 		/* cache route doesn't match */
 		dst4->sin_family = AF_INET;
 		dst4->sin_len = sizeof(struct sockaddr_in);
 		bcopy(&ip->ip_dst, &dst4->sin_addr, sizeof(dst4->sin_addr));
 		if (sc->sc_ro.ro_rt) {
 			RTFREE(sc->sc_ro.ro_rt);
 			sc->sc_ro.ro_rt = NULL;
 		}
 	}
 
 	if (sc->sc_ro.ro_rt == NULL) {
 		rtalloc(&sc->sc_ro);
 		if (sc->sc_ro.ro_rt == NULL) {
 			m_freem(m);
 			ifp->if_oerrors++;
 			return ENETUNREACH;
 		}
 	}
 
 	ifp->if_opackets++;
 	return ip_output(m, NULL, &sc->sc_ro, 0, NULL, NULL);
 }
 
 static int
 isrfc1918addr(in)
 	struct in_addr *in;
 {
 	/*
 	 * returns 1 if private address range:
 	 * 10.0.0.0/8 172.16.0.0/12 192.168.0.0/16
 	 */
 	if ((ntohl(in->s_addr) & 0xff000000) >> 24 == 10 ||
 	    (ntohl(in->s_addr) & 0xfff00000) >> 16 == 172 * 256 + 16 ||
 	    (ntohl(in->s_addr) & 0xffff0000) >> 16 == 192 * 256 + 168)
 		return 1;
 
 	return 0;
 }
 
 static int
 stf_checkaddr4(sc, in, inifp)
 	struct stf_softc *sc;
 	struct in_addr *in;
 	struct ifnet *inifp;	/* incoming interface */
 {
 	struct in_ifaddr *ia4;
 
 	/*
 	 * reject packets with the following address:
 	 * 224.0.0.0/4 0.0.0.0/8 127.0.0.0/8 255.0.0.0/8
 	 */
 	if (IN_MULTICAST(ntohl(in->s_addr)))
 		return -1;
 	switch ((ntohl(in->s_addr) & 0xff000000) >> 24) {
 	case 0: case 127: case 255:
 		return -1;
 	}
 
 	/*
 	 * reject packets with private address range.
 	 * (requirement from RFC3056 section 2 1st paragraph)
 	 */
 	if (isrfc1918addr(in))
 		return -1;
 
 	/*
 	 * reject packets with broadcast
 	 */
 	for (ia4 = TAILQ_FIRST(&in_ifaddrhead);
 	     ia4;
 	     ia4 = TAILQ_NEXT(ia4, ia_link))
 	{
 		if ((ia4->ia_ifa.ifa_ifp->if_flags & IFF_BROADCAST) == 0)
 			continue;
 		if (in->s_addr == ia4->ia_broadaddr.sin_addr.s_addr)
 			return -1;
 	}
 
 	/*
 	 * perform ingress filter
 	 */
 	if (sc && (sc->sc_if.if_flags & IFF_LINK2) == 0 && inifp) {
 		struct sockaddr_in sin;
 		struct rtentry *rt;
 
 		bzero(&sin, sizeof(sin));
 		sin.sin_family = AF_INET;
 		sin.sin_len = sizeof(struct sockaddr_in);
 		sin.sin_addr = *in;
 		rt = rtalloc1((struct sockaddr *)&sin, 0, 0UL);
 		if (!rt || rt->rt_ifp != inifp) {
 #if 0
 			log(LOG_WARNING, "%s: packet from 0x%x dropped "
 			    "due to ingress filter\n", if_name(&sc->sc_if),
 			    (u_int32_t)ntohl(sin.sin_addr.s_addr));
 #endif
 			if (rt)
 				rtfree(rt);
 			return -1;
 		}
 		rtfree(rt);
 	}
 
 	return 0;
 }
 
 static int
 stf_checkaddr6(sc, in6, inifp)
 	struct stf_softc *sc;
 	struct in6_addr *in6;
 	struct ifnet *inifp;	/* incoming interface */
 {
 	/*
 	 * check 6to4 addresses
 	 */
 	if (IN6_IS_ADDR_6TO4(in6)) {
 		struct in_addr in4;
 		bcopy(GET_V4(in6), &in4, sizeof(in4));
 		return stf_checkaddr4(sc, &in4, inifp);
 	}
 
 	/*
 	 * reject anything that look suspicious.  the test is implemented
 	 * in ip6_input too, but we check here as well to
 	 * (1) reject bad packets earlier, and
 	 * (2) to be safe against future ip6_input change.
 	 */
 	if (IN6_IS_ADDR_V4COMPAT(in6) || IN6_IS_ADDR_V4MAPPED(in6))
 		return -1;
 
 	return 0;
 }
 
 void
 in_stf_input(m, off)
 	struct mbuf *m;
 	int off;
 {
 	int proto;
 	struct stf_softc *sc;
 	struct ip *ip;
 	struct ip6_hdr *ip6;
 	u_int8_t otos, itos;
 	struct ifnet *ifp;
 
 	proto = mtod(m, struct ip *)->ip_p;
 
 	if (proto != IPPROTO_IPV6) {
 		m_freem(m);
 		return;
 	}
 
 	ip = mtod(m, struct ip *);
 
 	sc = (struct stf_softc *)encap_getarg(m);
 
 	if (sc == NULL || (sc->sc_if.if_flags & IFF_UP) == 0) {
 		m_freem(m);
 		return;
 	}
 
 	ifp = &sc->sc_if;
 
 #ifdef MAC
 	mac_create_mbuf_from_ifnet(ifp, m);
 #endif
 
 	/*
 	 * perform sanity check against outer src/dst.
 	 * for source, perform ingress filter as well.
 	 */
 	if (stf_checkaddr4(sc, &ip->ip_dst, NULL) < 0 ||
 	    stf_checkaddr4(sc, &ip->ip_src, m->m_pkthdr.rcvif) < 0) {
 		m_freem(m);
 		return;
 	}
 
 	otos = ip->ip_tos;
 	m_adj(m, off);
 
 	if (m->m_len < sizeof(*ip6)) {
 		m = m_pullup(m, sizeof(*ip6));
 		if (!m)
 			return;
 	}
 	ip6 = mtod(m, struct ip6_hdr *);
 
 	/*
 	 * perform sanity check against inner src/dst.
 	 * for source, perform ingress filter as well.
 	 */
 	if (stf_checkaddr6(sc, &ip6->ip6_dst, NULL) < 0 ||
 	    stf_checkaddr6(sc, &ip6->ip6_src, m->m_pkthdr.rcvif) < 0) {
 		m_freem(m);
 		return;
 	}
 
 	itos = (ntohl(ip6->ip6_flow) >> 20) & 0xff;
 	if ((ifp->if_flags & IFF_LINK1) != 0)
 		ip_ecn_egress(ECN_ALLOWED, &otos, &itos);
 	else
 		ip_ecn_egress(ECN_NOCARE, &otos, &itos);
 	ip6->ip6_flow &= ~htonl(0xff << 20);
 	ip6->ip6_flow |= htonl((u_int32_t)itos << 20);
 
 	m->m_pkthdr.rcvif = ifp;
 	
 	if (ifp->if_bpf) {
 		/*
 		 * We need to prepend the address family as
 		 * a four byte field.  Cons up a dummy header
 		 * to pacify bpf.  This is safe because bpf
 		 * will only read from the mbuf (i.e., it won't
 		 * try to free it or keep a pointer a to it).
 		 */
 		struct mbuf m0;
 		u_int32_t af = AF_INET6;
 		
 		m0.m_next = m;
 		m0.m_len = 4;
 		m0.m_data = (char *)&af;
 		
 #ifdef HAVE_OLD_BPF
 		BPF_MTAP(ifp, &m0);
 #else
 		bpf_mtap(ifp->if_bpf, &m0);
 #endif
 	}
 
 	/*
 	 * Put the packet to the network layer input queue according to the
 	 * specified address family.
 	 * See net/if_gif.c for possible issues with packet processing
 	 * reorder due to extra queueing.
 	 */
 	ifp->if_ipackets++;
 	ifp->if_ibytes += m->m_pkthdr.len;
 	netisr_dispatch(NETISR_IPV6, m);
 }
 
 /* ARGSUSED */
 static void
 stf_rtrequest(cmd, rt, info)
 	int cmd;
 	struct rtentry *rt;
 	struct rt_addrinfo *info;
 {
+	RT_LOCK_ASSERT(rt);
 
 	if (rt)
 		rt->rt_rmx.rmx_mtu = IPV6_MMTU;
 }
 
 static int
 stf_ioctl(ifp, cmd, data)
 	struct ifnet *ifp;
 	u_long cmd;
 	caddr_t data;
 {
 	struct ifaddr *ifa;
 	struct ifreq *ifr;
 	struct sockaddr_in6 *sin6;
 	struct in_addr addr;
 	int error;
 
 	error = 0;
 	switch (cmd) {
 	case SIOCSIFADDR:
 		ifa = (struct ifaddr *)data;
 		if (ifa == NULL || ifa->ifa_addr->sa_family != AF_INET6) {
 			error = EAFNOSUPPORT;
 			break;
 		}
 		sin6 = (struct sockaddr_in6 *)ifa->ifa_addr;
 		if (!IN6_IS_ADDR_6TO4(&sin6->sin6_addr)) {
 			error = EINVAL;
 			break;
 		}
 		bcopy(GET_V4(&sin6->sin6_addr), &addr, sizeof(addr));
 		if (isrfc1918addr(&addr)) {
 			error = EINVAL;
 			break;
 		}
 
 		ifa->ifa_rtrequest = stf_rtrequest;
 		ifp->if_flags |= IFF_UP;
 		break;
 
 	case SIOCADDMULTI:
 	case SIOCDELMULTI:
 		ifr = (struct ifreq *)data;
 		if (ifr && ifr->ifr_addr.sa_family == AF_INET6)
 			;
 		else
 			error = EAFNOSUPPORT;
 		break;
 
 	default:
 		error = EINVAL;
 		break;
 	}
 
 	return error;
 }
Index: head/sys/net/route.c
===================================================================
--- head/sys/net/route.c	(revision 120726)
+++ head/sys/net/route.c	(revision 120727)
@@ -1,1187 +1,1220 @@
 /*
  * Copyright (c) 1980, 1986, 1991, 1993
  *	The Regents of the University of California.  All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 3. All advertising materials mentioning features or use of this software
  *    must display the following acknowledgement:
  *	This product includes software developed by the University of
  *	California, Berkeley and its contributors.
  * 4. Neither the name of the University nor the names of its contributors
  *    may be used to endorse or promote products derived from this software
  *    without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  *	@(#)route.c	8.3.1.1 (Berkeley) 2/23/95
  * $FreeBSD$
  */
 
 #include "opt_inet.h"
 #include "opt_mrouting.h"
 
 #include <sys/param.h>
 #include <sys/systm.h>
 #include <sys/malloc.h>
 #include <sys/mbuf.h>
 #include <sys/socket.h>
 #include <sys/domain.h>
 #include <sys/kernel.h>
 
 #include <net/if.h>
 #include <net/route.h>
 
 #include <netinet/in.h>
 #include <netinet/ip_mroute.h>
 
 #define	SA(p) ((struct sockaddr *)(p))
 
 static struct rtstat rtstat;
 struct radix_node_head *rt_tables[AF_MAX+1];
 
 static int	rttrash;		/* routes not in table but not freed */
 
 static void rt_maskedcopy(struct sockaddr *,
 	    struct sockaddr *, struct sockaddr *);
 static void rtable_init(void **);
 
 static void
-rtable_init(table)
-	void **table;
+rtable_init(void **table)
 {
 	struct domain *dom;
 	for (dom = domains; dom; dom = dom->dom_next)
 		if (dom->dom_rtattach)
 			dom->dom_rtattach(&table[dom->dom_family],
 			    dom->dom_rtoffset);
 }
 
 void
 route_init()
 {
 	rn_init();	/* initialize all zeroes, all ones, mask table */
 	rtable_init((void **)rt_tables);
 }
 
 /*
  * Packet routing routines.
  */
 void
-rtalloc(ro)
-	register struct route *ro;
+rtalloc(struct route *ro)
 {
 	rtalloc_ign(ro, 0UL);
 }
 
 void
-rtalloc_ign(ro, ignore)
-	register struct route *ro;
-	u_long ignore;
+rtalloc_ign(struct route *ro, u_long ignore)
 {
 	struct rtentry *rt;
-	int s;
 
 	if ((rt = ro->ro_rt) != NULL) {
 		if (rt->rt_ifp != NULL && rt->rt_flags & RTF_UP)
 			return;
-		/* XXX - We are probably always at splnet here already. */
-		s = splnet();
 		RTFREE(rt);
 		ro->ro_rt = NULL;
-		splx(s);
 	}
 	ro->ro_rt = rtalloc1(&ro->ro_dst, 1, ignore);
+	if (ro->ro_rt)
+		RT_UNLOCK(ro->ro_rt);
 }
 
 /*
  * Look up the route that matches the address given
  * Or, at least try.. Create a cloned route if needed.
+ *
+ * The returned route, if any, is locked.
  */
 struct rtentry *
-rtalloc1(dst, report, ignflags)
-	register struct sockaddr *dst;
-	int report;
-	u_long ignflags;
+rtalloc1(struct sockaddr *dst, int report, u_long ignflags)
 {
-	register struct radix_node_head *rnh = rt_tables[dst->sa_family];
-	register struct rtentry *rt;
-	register struct radix_node *rn;
-	struct rtentry *newrt = 0;
+	struct radix_node_head *rnh = rt_tables[dst->sa_family];
+	struct rtentry *rt;
+	struct radix_node *rn;
+	struct rtentry *newrt;
 	struct rt_addrinfo info;
 	u_long nflags;
-	int  s = splnet(), err = 0, msgtype = RTM_MISS;
+	int err = 0, msgtype = RTM_MISS;
 
+	newrt = 0;
 	/*
 	 * Look up the address in the table for that Address Family
 	 */
 	if (rnh == NULL) {
 		rtstat.rts_unreach++;
 		goto miss2;
 	}
+	bzero(&info, sizeof(info));
 	RADIX_NODE_HEAD_LOCK(rnh);
-	if ((rn = rnh->rnh_matchaddr((caddr_t)dst, rnh)) &&
-	    ((rn->rn_flags & RNF_ROOT) == 0)) {
+	if ((rn = rnh->rnh_matchaddr(dst, rnh)) &&
+	    (rn->rn_flags & RNF_ROOT) == 0) {
 		/*
 		 * If we find it and it's not the root node, then
 		 * get a refernce on the rtentry associated.
 		 */
 		newrt = rt = (struct rtentry *)rn;
 		nflags = rt->rt_flags & ~ignflags;
 		if (report && (nflags & (RTF_CLONING | RTF_PRCLONING))) {
 			/*
 			 * We are apparently adding (report = 0 in delete).
 			 * If it requires that it be cloned, do so.
 			 * (This implies it wasn't a HOST route.)
 			 */
 			err = rtrequest(RTM_RESOLVE, dst, SA(0),
 					      SA(0), 0, &newrt);
 			if (err) {
 				/*
 				 * If the cloning didn't succeed, maybe
 				 * what we have will do. Return that.
 				 */
-				newrt = rt;
-				rt->rt_refcnt++;
+				newrt = rt;		/* existing route */
+				RT_LOCK(newrt);
+				newrt->rt_refcnt++;
 				goto miss;
 			}
-			if ((rt = newrt) && (rt->rt_flags & RTF_XRESOLVE)) {
+			KASSERT(newrt, ("no route and no error"));
+			RT_LOCK(newrt);
+			if (newrt->rt_flags & RTF_XRESOLVE) {
 				/*
 				 * If the new route specifies it be
 				 * externally resolved, then go do that.
 				 */
 				msgtype = RTM_RESOLVE;
 				goto miss;
 			}
 			/* Inform listeners of the new route. */
-			bzero(&info, sizeof(info));
-			info.rti_info[RTAX_DST] = rt_key(rt);
-			info.rti_info[RTAX_NETMASK] = rt_mask(rt);
-			info.rti_info[RTAX_GATEWAY] = rt->rt_gateway;
-			if (rt->rt_ifp != NULL) {
+			info.rti_info[RTAX_DST] = rt_key(newrt);
+			info.rti_info[RTAX_NETMASK] = rt_mask(newrt);
+			info.rti_info[RTAX_GATEWAY] = newrt->rt_gateway;
+			if (newrt->rt_ifp != NULL) {
 				info.rti_info[RTAX_IFP] =
-				    TAILQ_FIRST(&rt->rt_ifp->if_addrhead)->ifa_addr;
-				info.rti_info[RTAX_IFA] = rt->rt_ifa->ifa_addr;
+				    TAILQ_FIRST(&newrt->rt_ifp->if_addrhead)->ifa_addr;
+				info.rti_info[RTAX_IFA] = newrt->rt_ifa->ifa_addr;
 			}
-			rt_missmsg(RTM_ADD, &info, rt->rt_flags, 0);
-		} else
-			rt->rt_refcnt++;
+			rt_missmsg(RTM_ADD, &info, newrt->rt_flags, 0);
+		} else {
+			KASSERT(rt == newrt, ("locking wrong route"));
+			RT_LOCK(newrt);
+			newrt->rt_refcnt++;
+		}
 		RADIX_NODE_HEAD_UNLOCK(rnh);
 	} else {
 		/*
 		 * Either we hit the root or couldn't find any match,
 		 * Which basically means
 		 * "caint get there frm here"
 		 */
 		rtstat.rts_unreach++;
 	miss:
 		RADIX_NODE_HEAD_UNLOCK(rnh);
 	miss2:	if (report) {
 			/*
 			 * If required, report the failure to the supervising
 			 * Authorities.
 			 * For a delete, this is not an error. (report == 0)
 			 */
-			bzero((caddr_t)&info, sizeof(info));
 			info.rti_info[RTAX_DST] = dst;
 			rt_missmsg(msgtype, &info, 0, err);
 		}
 	}
-	splx(s);
+	if (newrt)
+		RT_LOCK_ASSERT(newrt);
 	return (newrt);
 }
 
 /*
  * Remove a reference count from an rtentry.
  * If the count gets low enough, take it out of the routing table
  */
 void
-rtfree(rt)
-	register struct rtentry *rt;
+rtfree(struct rtentry *rt)
 {
 	/*
 	 * find the tree for that address family
 	 */
 	struct radix_node_head *rnh = rt_tables[rt_key(rt)->sa_family];
 
 	if (rt == 0 || rnh == 0)
 		panic("rtfree");
 
+	RT_LOCK_ASSERT(rt);
+
 	/*
 	 * decrement the reference count by one and if it reaches 0,
 	 * and there is a close function defined, call the close function
 	 */
-	rt->rt_refcnt--;
-	if (rnh->rnh_close && rt->rt_refcnt == 0) {
+	if (--rt->rt_refcnt > 0)
+		goto done;
+	/* XXX refcount==0? */
+	if (rt->rt_refcnt == 0 && rnh->rnh_close)
 		rnh->rnh_close((struct radix_node *)rt, rnh);
-	}
 
 	/*
 	 * If we are no longer "up" (and ref == 0)
 	 * then we can free the resources associated
 	 * with the route.
 	 */
-	if (rt->rt_refcnt <= 0 && (rt->rt_flags & RTF_UP) == 0) {
+	if ((rt->rt_flags & RTF_UP) == 0) {
 		if (rt->rt_nodes->rn_flags & (RNF_ACTIVE | RNF_ROOT))
 			panic ("rtfree 2");
 		/*
 		 * the rtentry must have been removed from the routing table
 		 * so it is represented in rttrash.. remove that now.
 		 */
 		rttrash--;
-
 #ifdef	DIAGNOSTIC
 		if (rt->rt_refcnt < 0) {
 			printf("rtfree: %p not freed (neg refs)\n", rt);
-			return;
+			goto done;
 		}
 #endif
-
 		/*
 		 * release references on items we hold them on..
 		 * e.g other routes and ifaddrs.
 		 */
 		if (rt->rt_ifa)
 			IFAFREE(rt->rt_ifa);
-		if (rt->rt_parent)
-			RTFREE(rt->rt_parent);
+		rt->rt_parent = NULL;		/* NB: no refcnt on parent */
 
 		/*
 		 * The key is separatly alloc'd so free it (see rt_setgate()).
 		 * This also frees the gateway, as they are always malloc'd
 		 * together.
 		 */
 		Free(rt_key(rt));
 
 		/*
 		 * and the rtentry itself of course
 		 */
+		RT_LOCK_DESTROY(rt);
 		Free(rt);
+		return;
 	}
+done:
+	RT_UNLOCK(rt);
 }
 
 /* compare two sockaddr structures */
 #define	sa_equal(a1, a2) (bcmp((a1), (a2), (a1)->sa_len) == 0)
 
 /*
  * Force a routing table entry to the specified
  * destination to go through the given gateway.
  * Normally called as a result of a routing redirect
  * message from the network layer.
- *
- * N.B.: must be called at splnet
- *
  */
 void
-rtredirect(dst, gateway, netmask, flags, src, rtp)
-	struct sockaddr *dst, *gateway, *netmask, *src;
-	int flags;
-	struct rtentry **rtp;
+rtredirect(struct sockaddr *dst,
+	struct sockaddr *gateway,
+	struct sockaddr *netmask,
+	int flags,
+	struct sockaddr *src)
 {
 	struct rtentry *rt;
 	int error = 0;
 	short *stat = 0;
 	struct rt_addrinfo info;
 	struct ifaddr *ifa;
 
 	/* verify the gateway is directly reachable */
 	if ((ifa = ifa_ifwithnet(gateway)) == 0) {
 		error = ENETUNREACH;
 		goto out;
 	}
-	rt = rtalloc1(dst, 0, 0UL);
+	rt = rtalloc1(dst, 0, 0UL);	/* NB: rt is locked */
 	/*
 	 * If the redirect isn't from our current router for this dst,
 	 * it's either old or wrong.  If it redirects us to ourselves,
 	 * we have a routing loop, perhaps as a result of an interface
 	 * going down recently.
 	 */
 	if (!(flags & RTF_DONE) && rt &&
 	     (!sa_equal(src, rt->rt_gateway) || rt->rt_ifa != ifa))
 		error = EINVAL;
 	else if (ifa_ifwithaddr(gateway))
 		error = EHOSTUNREACH;
 	if (error)
 		goto done;
 	/*
 	 * Create a new entry if we just got back a wildcard entry
 	 * or the the lookup failed.  This is necessary for hosts
 	 * which use routing redirects generated by smart gateways
 	 * to dynamically build the routing tables.
 	 */
-	if ((rt == 0) || (rt_mask(rt) && rt_mask(rt)->sa_len < 2))
+	if (rt == 0 || (rt_mask(rt) && rt_mask(rt)->sa_len < 2))
 		goto create;
 	/*
 	 * Don't listen to the redirect if it's
 	 * for a route to an interface.
 	 */
 	if (rt->rt_flags & RTF_GATEWAY) {
 		if (((rt->rt_flags & RTF_HOST) == 0) && (flags & RTF_HOST)) {
 			/*
 			 * Changing from route to net => route to host.
 			 * Create new route, rather than smashing route to net.
 			 */
 		create:
 			if (rt)
 				rtfree(rt);
 			flags |=  RTF_GATEWAY | RTF_DYNAMIC;
 			bzero((caddr_t)&info, sizeof(info));
 			info.rti_info[RTAX_DST] = dst;
 			info.rti_info[RTAX_GATEWAY] = gateway;
 			info.rti_info[RTAX_NETMASK] = netmask;
 			info.rti_ifa = ifa;
 			info.rti_flags = flags;
 			rt = NULL;
 			error = rtrequest1(RTM_ADD, &info, &rt);
-			if (rt != NULL)
+			if (rt != NULL) {
+				RT_UNLOCK(rt);
 				flags = rt->rt_flags;
+			}
 			stat = &rtstat.rts_dynamic;
 		} else {
 			/*
 			 * Smash the current notion of the gateway to
 			 * this destination.  Should check about netmask!!!
 			 */
 			rt->rt_flags |= RTF_MODIFIED;
 			flags |= RTF_MODIFIED;
 			stat = &rtstat.rts_newgateway;
 			/*
 			 * add the key and gateway (in one malloc'd chunk).
 			 */
 			rt_setgate(rt, rt_key(rt), gateway);
 		}
 	} else
 		error = EHOSTUNREACH;
 done:
-	if (rt) {
-		if (rtp && !error)
-			*rtp = rt;
-		else
-			rtfree(rt);
-	}
+	if (rt)
+		rtfree(rt);
 out:
 	if (error)
 		rtstat.rts_badredirect++;
 	else if (stat != NULL)
 		(*stat)++;
 	bzero((caddr_t)&info, sizeof(info));
 	info.rti_info[RTAX_DST] = dst;
 	info.rti_info[RTAX_GATEWAY] = gateway;
 	info.rti_info[RTAX_NETMASK] = netmask;
 	info.rti_info[RTAX_AUTHOR] = src;
 	rt_missmsg(RTM_REDIRECT, &info, flags, error);
 }
 
 /*
  * Routing table ioctl interface.
  */
 int
-rtioctl(req, data)
-	u_long req;
-	caddr_t data;
+rtioctl(u_long req, caddr_t data)
 {
 #ifdef INET
 	/* Multicast goop, grrr... */
 	return mrt_ioctl ? mrt_ioctl(req, data) : EOPNOTSUPP;
 #else /* INET */
 	return ENXIO;
 #endif /* INET */
 }
 
 struct ifaddr *
-ifa_ifwithroute(flags, dst, gateway)
-	int flags;
-	struct sockaddr	*dst, *gateway;
+ifa_ifwithroute(int flags, struct sockaddr *dst, struct sockaddr *gateway)
 {
 	register struct ifaddr *ifa;
+
 	if ((flags & RTF_GATEWAY) == 0) {
 		/*
 		 * If we are adding a route to an interface,
 		 * and the interface is a pt to pt link
 		 * we should search for the destination
 		 * as our clue to the interface.  Otherwise
 		 * we can use the local address.
 		 */
 		ifa = 0;
 		if (flags & RTF_HOST) {
 			ifa = ifa_ifwithdstaddr(dst);
 		}
 		if (ifa == 0)
 			ifa = ifa_ifwithaddr(gateway);
 	} else {
 		/*
 		 * If we are adding a route to a remote net
 		 * or host, the gateway may still be on the
 		 * other end of a pt to pt link.
 		 */
 		ifa = ifa_ifwithdstaddr(gateway);
 	}
 	if (ifa == 0)
 		ifa = ifa_ifwithnet(gateway);
 	if (ifa == 0) {
 		struct rtentry *rt = rtalloc1(gateway, 0, 0UL);
 		if (rt == 0)
 			return (0);
 		--rt->rt_refcnt;
+		RT_UNLOCK(rt);
 		if ((ifa = rt->rt_ifa) == 0)
 			return (0);
 	}
 	if (ifa->ifa_addr->sa_family != dst->sa_family) {
 		struct ifaddr *oifa = ifa;
 		ifa = ifaof_ifpforaddr(dst, ifa->ifa_ifp);
 		if (ifa == 0)
 			ifa = oifa;
 	}
 	return (ifa);
 }
 
 static int rt_fixdelete(struct radix_node *, void *);
 static int rt_fixchange(struct radix_node *, void *);
 
 struct rtfc_arg {
 	struct rtentry *rt0;
 	struct radix_node_head *rnh;
 };
 
 /*
  * Do appropriate manipulations of a routing tree given
  * all the bits of info needed
  */
 int
-rtrequest(req, dst, gateway, netmask, flags, ret_nrt)
-	int req, flags;
-	struct sockaddr *dst, *gateway, *netmask;
-	struct rtentry **ret_nrt;
+rtrequest(int req,
+	struct sockaddr *dst,
+	struct sockaddr *gateway,
+	struct sockaddr *netmask,
+	int flags,
+	struct rtentry **ret_nrt)
 {
 	struct rt_addrinfo info;
 
 	bzero((caddr_t)&info, sizeof(info));
 	info.rti_flags = flags;
 	info.rti_info[RTAX_DST] = dst;
 	info.rti_info[RTAX_GATEWAY] = gateway;
 	info.rti_info[RTAX_NETMASK] = netmask;
 	return rtrequest1(req, &info, ret_nrt);
 }
 
 /*
  * These (questionable) definitions of apparent local variables apply
  * to the next two functions.  XXXXXX!!!
  */
 #define	dst	info->rti_info[RTAX_DST]
 #define	gateway	info->rti_info[RTAX_GATEWAY]
 #define	netmask	info->rti_info[RTAX_NETMASK]
 #define	ifaaddr	info->rti_info[RTAX_IFA]
 #define	ifpaddr	info->rti_info[RTAX_IFP]
 #define	flags	info->rti_flags
 
 int
-rt_getifa(info)
-	struct rt_addrinfo *info;
+rt_getifa(struct rt_addrinfo *info)
 {
 	struct ifaddr *ifa;
 	int error = 0;
 
 	/*
 	 * ifp may be specified by sockaddr_dl
 	 * when protocol address is ambiguous.
 	 */
 	if (info->rti_ifp == NULL && ifpaddr != NULL &&
 	    ifpaddr->sa_family == AF_LINK &&
 	    (ifa = ifa_ifwithnet(ifpaddr)) != NULL)
 		info->rti_ifp = ifa->ifa_ifp;
 	if (info->rti_ifa == NULL && ifaaddr != NULL)
 		info->rti_ifa = ifa_ifwithaddr(ifaaddr);
 	if (info->rti_ifa == NULL) {
 		struct sockaddr *sa;
 
 		sa = ifaaddr != NULL ? ifaaddr :
 		    (gateway != NULL ? gateway : dst);
 		if (sa != NULL && info->rti_ifp != NULL)
 			info->rti_ifa = ifaof_ifpforaddr(sa, info->rti_ifp);
 		else if (dst != NULL && gateway != NULL)
 			info->rti_ifa = ifa_ifwithroute(flags, dst, gateway);
 		else if (sa != NULL)
 			info->rti_ifa = ifa_ifwithroute(flags, sa, sa);
 	}
 	if ((ifa = info->rti_ifa) != NULL) {
 		if (info->rti_ifp == NULL)
 			info->rti_ifp = ifa->ifa_ifp;
 	} else
 		error = ENETUNREACH;
 	return (error);
 }
 
 int
-rtrequest1(req, info, ret_nrt)
-	int req;
-	struct rt_addrinfo *info;
-	struct rtentry **ret_nrt;
+rtrequest1(int req, struct rt_addrinfo *info, struct rtentry **ret_nrt)
 {
-	int s = splnet(); int error = 0;
+	int error = 0;
 	register struct rtentry *rt;
 	register struct radix_node *rn;
 	register struct radix_node_head *rnh;
 	struct ifaddr *ifa;
 	struct sockaddr *ndst;
 #define senderr(x) { error = x ; goto bad; }
 
 	/*
 	 * Find the correct routing tree to use for this Address Family
 	 */
-	if ((rnh = rt_tables[dst->sa_family]) == 0) {
-		splx(s);
+	rnh = rt_tables[dst->sa_family];
+	if (rnh == 0)
 		return (EAFNOSUPPORT);
-	}
 	RADIX_NODE_HEAD_LOCK(rnh);
 	/*
 	 * If we are adding a host route then we don't want to put
 	 * a netmask in the tree, nor do we want to clone it.
 	 */
 	if (flags & RTF_HOST) {
 		netmask = 0;
 		flags &= ~(RTF_CLONING | RTF_PRCLONING);
 	}
 	switch (req) {
 	case RTM_DELETE:
 		/*
 		 * Remove the item from the tree and return it.
 		 * Complain if it is not there and do no more processing.
 		 */
-		if ((rn = rnh->rnh_deladdr(dst, netmask, rnh)) == 0)
+		rn = rnh->rnh_deladdr(dst, netmask, rnh);
+		if (rn == 0)
 			senderr(ESRCH);
 		if (rn->rn_flags & (RNF_ACTIVE | RNF_ROOT))
 			panic ("rtrequest delete");
 		rt = (struct rtentry *)rn;
+		RT_LOCK(rt);
 		rt->rt_refcnt++;
 		rt->rt_flags &= ~RTF_UP;
 
 		/*
 		 * Now search what's left of the subtree for any cloned
 		 * routes which might have been formed from this node.
 		 */
 		if ((rt->rt_flags & (RTF_CLONING | RTF_PRCLONING)) &&
 		    rt_mask(rt)) {
 			rnh->rnh_walktree_from(rnh, dst, rt_mask(rt),
 					       rt_fixdelete, rt);
 		}
 
 		/*
 		 * Remove any external references we may have.
 		 * This might result in another rtentry being freed if
 		 * we held its last reference.
 		 */
 		if (rt->rt_gwroute) {
-			rt = rt->rt_gwroute;
-			RTFREE(rt);
-			(rt = (struct rtentry *)rn)->rt_gwroute = 0;
+			struct rtentry *gwrt = rt->rt_gwroute;
+			RTFREE(gwrt);
+			rt->rt_gwroute = 0;
 		}
 
 		/*
 		 * give the protocol a chance to keep things in sync.
 		 */
 		if ((ifa = rt->rt_ifa) && ifa->ifa_rtrequest)
 			ifa->ifa_rtrequest(RTM_DELETE, rt, info);
 
 		/*
 		 * one more rtentry floating around that is not
 		 * linked to the routing table.
 		 */
 		rttrash++;
 
 		/*
 		 * If the caller wants it, then it can have it,
 		 * but it's up to it to free the rtentry as we won't be
 		 * doing it.
 		 */
-		if (ret_nrt)
+		if (ret_nrt) {
 			*ret_nrt = rt;
-		else
-			RTFREE(rt);
+			RT_UNLOCK(rt);
+		} else
+			RTFREE_LOCKED(rt);
 		break;
 
 	case RTM_RESOLVE:
 		if (ret_nrt == 0 || (rt = *ret_nrt) == 0)
 			senderr(EINVAL);
 		ifa = rt->rt_ifa;
+		/* XXX locking? */
 		flags = rt->rt_flags &
 		    ~(RTF_CLONING | RTF_PRCLONING | RTF_STATIC);
 		flags |= RTF_WASCLONED;
 		gateway = rt->rt_gateway;
 		if ((netmask = rt->rt_genmask) == 0)
 			flags |= RTF_HOST;
 		goto makeroute;
 
 	case RTM_ADD:
 		if ((flags & RTF_GATEWAY) && !gateway)
 			panic("rtrequest: GATEWAY but no gateway");
 
 		if (info->rti_ifa == NULL && (error = rt_getifa(info)))
 			senderr(error);
 		ifa = info->rti_ifa;
 
 	makeroute:
-		R_Malloc(rt, struct rtentry *, sizeof(*rt));
+		R_Zalloc(rt, struct rtentry *, sizeof(*rt));
 		if (rt == 0)
 			senderr(ENOBUFS);
-		Bzero(rt, sizeof(*rt));
+		RT_LOCK_INIT(rt);
 		rt->rt_flags = RTF_UP | flags;
 		/*
 		 * Add the gateway. Possibly re-malloc-ing the storage for it
 		 * also add the rt_gwroute if possible.
 		 */
+		RT_LOCK(rt);
 		if ((error = rt_setgate(rt, dst, gateway)) != 0) {
+			RT_LOCK_DESTROY(rt);
 			Free(rt);
 			senderr(error);
 		}
 
 		/*
 		 * point to the (possibly newly malloc'd) dest address.
 		 */
-		ndst = rt_key(rt);
+		ndst = (struct sockaddr *)rt_key(rt);
 
 		/*
 		 * make sure it contains the value we want (masked if needed).
 		 */
 		if (netmask) {
 			rt_maskedcopy(dst, ndst, netmask);
 		} else
 			Bcopy(dst, ndst, dst->sa_len);
 
 		/*
 		 * Note that we now have a reference to the ifa.
 		 * This moved from below so that rnh->rnh_addaddr() can
 		 * examine the ifa and  ifa->ifa_ifp if it so desires.
 		 */
 		IFAREF(ifa);
 		rt->rt_ifa = ifa;
 		rt->rt_ifp = ifa->ifa_ifp;
-		/* XXX mtu manipulation will be done in rnh_addaddr -- itojun */
 
-		rn = rnh->rnh_addaddr((caddr_t)ndst, (caddr_t)netmask,
-					rnh, rt->rt_nodes);
+		/* XXX mtu manipulation will be done in rnh_addaddr -- itojun */
+		rn = rnh->rnh_addaddr(ndst, netmask, rnh, rt->rt_nodes);
 		if (rn == 0) {
 			struct rtentry *rt2;
 			/*
 			 * Uh-oh, we already have one of these in the tree.
 			 * We do a special hack: if the route that's already
 			 * there was generated by the protocol-cloning
 			 * mechanism, then we just blow it away and retry
 			 * the insertion of the new one.
 			 */
 			rt2 = rtalloc1(dst, 0, RTF_PRCLONING);
 			if (rt2 && rt2->rt_parent) {
 				rtrequest(RTM_DELETE,
-					  (struct sockaddr *)rt_key(rt2),
+					  rt_key(rt2),
 					  rt2->rt_gateway,
 					  rt_mask(rt2), rt2->rt_flags, 0);
-				RTFREE(rt2);
-				rn = rnh->rnh_addaddr((caddr_t)ndst,
-						      (caddr_t)netmask,
+				RTFREE_LOCKED(rt2);
+				rn = rnh->rnh_addaddr(ndst, netmask,
 						      rnh, rt->rt_nodes);
 			} else if (rt2) {
 				/* undo the extra ref we got */
-				RTFREE(rt2);
+				RTFREE_LOCKED(rt2);
 			}
 		}
 
 		/*
 		 * If it still failed to go into the tree,
 		 * then un-make it (this should be a function)
 		 */
 		if (rn == 0) {
 			if (rt->rt_gwroute)
-				rtfree(rt->rt_gwroute);
-			if (rt->rt_ifa) {
+				RTFREE(rt->rt_gwroute);
+			if (rt->rt_ifa)
 				IFAFREE(rt->rt_ifa);
-			}
 			Free(rt_key(rt));
+			RT_LOCK_DESTROY(rt);
 			Free(rt);
 			senderr(EEXIST);
 		}
 
 		rt->rt_parent = 0;
 
 		/*
 		 * If we got here from RESOLVE, then we are cloning
 		 * so clone the rest, and note that we
 		 * are a clone (and increment the parent's references)
 		 */
 		if (req == RTM_RESOLVE) {
+			KASSERT(ret_nrt && *ret_nrt,
+				("no route to clone from"));
 			rt->rt_rmx = (*ret_nrt)->rt_rmx; /* copy metrics */
 			rt->rt_rmx.rmx_pksent = 0; /* reset packet counter */
 			if ((*ret_nrt)->rt_flags & (RTF_CLONING | RTF_PRCLONING)) {
-				rt->rt_parent = (*ret_nrt);
-				(*ret_nrt)->rt_refcnt++;
+				/*
+				 * NB: We do not bump the refcnt on the parent
+				 * entry under the assumption that it will
+				 * remain so long as we do.  This is
+				 * important when deleting the parent route
+				 * as this operation requires traversing
+				 * the tree to delete all clones and futzing
+				 * with refcnts requires us to double-lock
+				 * parent through this back reference.
+				 */
+				rt->rt_parent = *ret_nrt;
 			}
 		}
 
 		/*
 		 * if this protocol has something to add to this then
 		 * allow it to do that as well.
 		 */
 		if (ifa->ifa_rtrequest)
 			ifa->ifa_rtrequest(req, rt, info);
 
 		/*
 		 * We repeat the same procedure from rt_setgate() here because
 		 * it doesn't fire when we call it there because the node
 		 * hasn't been added to the tree yet.
 		 */
 		if (req == RTM_ADD &&
 		    !(rt->rt_flags & RTF_HOST) && rt_mask(rt) != 0) {
 			struct rtfc_arg arg;
 			arg.rnh = rnh;
 			arg.rt0 = rt;
 			rnh->rnh_walktree_from(rnh, rt_key(rt), rt_mask(rt),
 					       rt_fixchange, &arg);
 		}
 
 		/*
 		 * actually return a resultant rtentry and
 		 * give the caller a single reference.
 		 */
 		if (ret_nrt) {
 			*ret_nrt = rt;
 			rt->rt_refcnt++;
 		}
+		RT_UNLOCK(rt);
 		break;
 	default:
 		error = EOPNOTSUPP;
 	}
 bad:
 	RADIX_NODE_HEAD_UNLOCK(rnh);
-	splx(s);
 	return (error);
+#undef senderr
+}
+
 #undef dst
 #undef gateway
 #undef netmask
 #undef ifaaddr
 #undef ifpaddr
 #undef flags
-}
 
 /*
  * Called from rtrequest(RTM_DELETE, ...) to fix up the route's ``family''
  * (i.e., the routes related to it by the operation of cloning).  This
  * routine is iterated over all potential former-child-routes by way of
  * rnh->rnh_walktree_from() above, and those that actually are children of
  * the late parent (passed in as VP here) are themselves deleted.
  */
 static int
-rt_fixdelete(rn, vp)
-	struct radix_node *rn;
-	void *vp;
+rt_fixdelete(struct radix_node *rn, void *vp)
 {
 	struct rtentry *rt = (struct rtentry *)rn;
 	struct rtentry *rt0 = vp;
 
 	if (rt->rt_parent == rt0 &&
 	    !(rt->rt_flags & (RTF_PINNED | RTF_CLONING | RTF_PRCLONING))) {
 		return rtrequest(RTM_DELETE, rt_key(rt),
 				 (struct sockaddr *)0, rt_mask(rt),
 				 rt->rt_flags, (struct rtentry **)0);
 	}
 	return 0;
 }
 
 /*
  * This routine is called from rt_setgate() to do the analogous thing for
  * adds and changes.  There is the added complication in this case of a
  * middle insert; i.e., insertion of a new network route between an older
  * network route and (cloned) host routes.  For this reason, a simple check
  * of rt->rt_parent is insufficient; each candidate route must be tested
  * against the (mask, value) of the new route (passed as before in vp)
  * to see if the new route matches it.
  *
  * XXX - it may be possible to do fixdelete() for changes and reserve this
  * routine just for adds.  I'm not sure why I thought it was necessary to do
  * changes this way.
  */
 #ifdef DEBUG
 static int rtfcdebug = 0;
 #endif
 
 static int
-rt_fixchange(rn, vp)
-	struct radix_node *rn;
-	void *vp;
+rt_fixchange(struct radix_node *rn, void *vp)
 {
 	struct rtentry *rt = (struct rtentry *)rn;
 	struct rtfc_arg *ap = vp;
 	struct rtentry *rt0 = ap->rt0;
 	struct radix_node_head *rnh = ap->rnh;
 	u_char *xk1, *xm1, *xk2, *xmp;
 	int i, len, mlen;
 
 #ifdef DEBUG
 	if (rtfcdebug)
 		printf("rt_fixchange: rt %p, rt0 %p\n", rt, rt0);
 #endif
 
 	if (!rt->rt_parent ||
 	    (rt->rt_flags & (RTF_PINNED | RTF_CLONING | RTF_PRCLONING))) {
 #ifdef DEBUG
 		if(rtfcdebug) printf("no parent, pinned or cloning\n");
 #endif
 		return 0;
 	}
 
 	if (rt->rt_parent == rt0) {
 #ifdef DEBUG
 		if(rtfcdebug) printf("parent match\n");
 #endif
 		return rtrequest(RTM_DELETE, rt_key(rt),
 				 (struct sockaddr *)0, rt_mask(rt),
 				 rt->rt_flags, (struct rtentry **)0);
 	}
 
 	/*
 	 * There probably is a function somewhere which does this...
 	 * if not, there should be.
 	 */
-	len = imin(((struct sockaddr *)rt_key(rt0))->sa_len,
-		   ((struct sockaddr *)rt_key(rt))->sa_len);
+	len = imin(rt_key(rt0)->sa_len, rt_key(rt)->sa_len);
 
 	xk1 = (u_char *)rt_key(rt0);
 	xm1 = (u_char *)rt_mask(rt0);
 	xk2 = (u_char *)rt_key(rt);
 
 	/* avoid applying a less specific route */
 	xmp = (u_char *)rt_mask(rt->rt_parent);
-	mlen = ((struct sockaddr *)rt_key(rt->rt_parent))->sa_len;
-	if (mlen > ((struct sockaddr *)rt_key(rt0))->sa_len) {
+	mlen = rt_key(rt->rt_parent)->sa_len;
+	if (mlen > rt_key(rt0)->sa_len) {
 #ifdef DEBUG
 		if (rtfcdebug)
 			printf("rt_fixchange: inserting a less "
 			       "specific route\n");
 #endif
 		return 0;
 	}
 	for (i = rnh->rnh_treetop->rn_offset; i < mlen; i++) {
 		if ((xmp[i] & ~(xmp[i] ^ xm1[i])) != xmp[i]) {
 #ifdef DEBUG
 			if (rtfcdebug)
 				printf("rt_fixchange: inserting a less "
 				       "specific route\n");
 #endif
 			return 0;
 		}
 	}
 
 	for (i = rnh->rnh_treetop->rn_offset; i < len; i++) {
 		if ((xk2[i] & xm1[i]) != xk1[i]) {
 #ifdef DEBUG
 			if(rtfcdebug) printf("no match\n");
 #endif
 			return 0;
 		}
 	}
 
 	/*
 	 * OK, this node is a clone, and matches the node currently being
 	 * changed/added under the node's mask.  So, get rid of it.
 	 */
 #ifdef DEBUG
 	if(rtfcdebug) printf("deleting\n");
 #endif
 	return rtrequest(RTM_DELETE, rt_key(rt), (struct sockaddr *)0,
 			 rt_mask(rt), rt->rt_flags, (struct rtentry **)0);
 }
 
 #define ROUNDUP(a) (a>0 ? (1 + (((a) - 1) | (sizeof(long) - 1))) : sizeof(long))
 
 int
-rt_setgate(rt0, dst, gate)
-	struct rtentry *rt0;
-	struct sockaddr *dst, *gate;
+rt_setgate(struct rtentry *rt, struct sockaddr *dst, struct sockaddr *gate)
 {
+	/* XXX dst may be overwritten, can we move this to below */
+	struct radix_node_head *rnh = rt_tables[dst->sa_family];
 	caddr_t new, old;
 	int dlen = ROUNDUP(dst->sa_len), glen = ROUNDUP(gate->sa_len);
-	register struct rtentry *rt = rt0;
-	struct radix_node_head *rnh = rt_tables[dst->sa_family];
 
+	RT_LOCK_ASSERT(rt);
+
 	/*
 	 * A host route with the destination equal to the gateway
 	 * will interfere with keeping LLINFO in the routing
 	 * table, so disallow it.
 	 */
-	if (((rt0->rt_flags & (RTF_HOST|RTF_GATEWAY|RTF_LLINFO)) ==
+	if (((rt->rt_flags & (RTF_HOST|RTF_GATEWAY|RTF_LLINFO)) ==
 					(RTF_HOST|RTF_GATEWAY)) &&
-	    (dst->sa_len == gate->sa_len) &&
-	    (bcmp(dst, gate, dst->sa_len) == 0)) {
+	    dst->sa_len == gate->sa_len &&
+	    bcmp(dst, gate, dst->sa_len) == 0) {
 		/*
 		 * The route might already exist if this is an RTM_CHANGE
 		 * or a routing redirect, so try to delete it.
 		 */
-		if (rt_key(rt0))
-			rtrequest(RTM_DELETE, (struct sockaddr *)rt_key(rt0),
-			    rt0->rt_gateway, rt_mask(rt0), rt0->rt_flags, 0);
+		if (rt_key(rt))
+			rtrequest(RTM_DELETE, rt_key(rt),
+			    rt->rt_gateway, rt_mask(rt), rt->rt_flags, 0);
 		return EADDRNOTAVAIL;
 	}
 
 	/*
 	 * Both dst and gateway are stored in the same malloc'd chunk
 	 * (If I ever get my hands on....)
 	 * if we need to malloc a new chunk, then keep the old one around
 	 * till we don't need it any more.
 	 */
 	if (rt->rt_gateway == 0 || glen > ROUNDUP(rt->rt_gateway->sa_len)) {
 		old = (caddr_t)rt_key(rt);
 		R_Malloc(new, caddr_t, dlen + glen);
 		if (new == 0)
 			return ENOBUFS;
-		rt->rt_nodes->rn_key = new;
+		rt_key(rt) = new;
 	} else {
 		/*
 		 * otherwise just overwrite the old one
 		 */
-		new = rt->rt_nodes->rn_key;
+		new = (caddr_t)rt_key(rt);
 		old = 0;
 	}
 
 	/*
 	 * copy the new gateway value into the memory chunk
 	 */
 	Bcopy(gate, (rt->rt_gateway = (struct sockaddr *)(new + dlen)), glen);
 
 	/*
 	 * if we are replacing the chunk (or it's new) we need to
 	 * replace the dst as well
 	 */
 	if (old) {
 		Bcopy(dst, new, dlen);
 		Free(old);
+		dst = gate = 0;		/* XXX??? */
 	}
 
 	/*
 	 * If there is already a gwroute, it's now almost definitly wrong
 	 * so drop it.
 	 */
 	if (rt->rt_gwroute != NULL) {
 		RTFREE(rt->rt_gwroute);
 		rt->rt_gwroute = NULL;
 	}
 	/*
 	 * Cloning loop avoidance:
 	 * In the presence of protocol-cloning and bad configuration,
 	 * it is possible to get stuck in bottomless mutual recursion
 	 * (rtrequest rt_setgate rtalloc1).  We avoid this by not allowing
 	 * protocol-cloning to operate for gateways (which is probably the
 	 * correct choice anyway), and avoid the resulting reference loops
 	 * by disallowing any route to run through itself as a gateway.
 	 * This is obviously mandatory when we get rt->rt_output().
 	 */
 	if (rt->rt_flags & RTF_GATEWAY) {
 		rt->rt_gwroute = rtalloc1(gate, 1, RTF_PRCLONING);
 		if (rt->rt_gwroute == rt) {
-			RTFREE(rt->rt_gwroute);
+			RTFREE_LOCKED(rt->rt_gwroute);
 			rt->rt_gwroute = 0;
 			return EDQUOT; /* failure */
 		}
+		RT_UNLOCK(rt->rt_gwroute);
 	}
 
 	/*
 	 * This isn't going to do anything useful for host routes, so
 	 * don't bother.  Also make sure we have a reasonable mask
 	 * (we don't yet have one during adds).
 	 */
 	if (!(rt->rt_flags & RTF_HOST) && rt_mask(rt) != 0) {
 		struct rtfc_arg arg;
+
 		arg.rnh = rnh;
 		arg.rt0 = rt;
 		RADIX_NODE_HEAD_LOCK(rnh);
 		rnh->rnh_walktree_from(rnh, rt_key(rt), rt_mask(rt),
 				       rt_fixchange, &arg);
 		RADIX_NODE_HEAD_UNLOCK(rnh);
 	}
 
 	return 0;
 }
 
 static void
-rt_maskedcopy(src, dst, netmask)
-	struct sockaddr *src, *dst, *netmask;
+rt_maskedcopy(struct sockaddr *src, struct sockaddr *dst, struct sockaddr *netmask)
 {
 	register u_char *cp1 = (u_char *)src;
 	register u_char *cp2 = (u_char *)dst;
 	register u_char *cp3 = (u_char *)netmask;
 	u_char *cplim = cp2 + *cp3;
 	u_char *cplim2 = cp2 + *cp1;
 
 	*cp2++ = *cp1++; *cp2++ = *cp1++; /* copies sa_len & sa_family */
 	cp3 += 2;
 	if (cplim > cplim2)
 		cplim = cplim2;
 	while (cp2 < cplim)
 		*cp2++ = *cp1++ & *cp3++;
 	if (cp2 < cplim2)
 		bzero((caddr_t)cp2, (unsigned)(cplim2 - cp2));
 }
 
 /*
  * Set up a routing table entry, normally
  * for an interface.
  */
 int
-rtinit(ifa, cmd, flags)
-	register struct ifaddr *ifa;
-	int cmd, flags;
+rtinit(struct ifaddr *ifa, int cmd, int flags)
 {
 	register struct rtentry *rt;
 	register struct sockaddr *dst;
 	register struct sockaddr *deldst;
 	struct sockaddr *netmask;
 	struct mbuf *m = 0;
 	struct rtentry *nrt = 0;
 	struct radix_node_head *rnh;
 	struct radix_node *rn;
 	int error;
 	struct rt_addrinfo info;
 
 	if (flags & RTF_HOST) {
 		dst = ifa->ifa_dstaddr;
 		netmask = NULL;
 	} else {
 		dst = ifa->ifa_addr;
 		netmask = ifa->ifa_netmask;
 	}
 	/*
 	 * If it's a delete, check that if it exists, it's on the correct
 	 * interface or we might scrub a route to another ifa which would
 	 * be confusing at best and possibly worse.
 	 */
 	if (cmd == RTM_DELETE) {
 		/*
 		 * It's a delete, so it should already exist..
 		 * If it's a net, mask off the host bits
 		 * (Assuming we have a mask)
 		 */
 		if (netmask != NULL) {
 			m = m_get(M_DONTWAIT, MT_SONAME);
 			if (m == NULL)
 				return(ENOBUFS);
 			deldst = mtod(m, struct sockaddr *);
 			rt_maskedcopy(dst, deldst, netmask);
 			dst = deldst;
 		}
 		/*
 		 * Look up an rtentry that is in the routing tree and
 		 * contains the correct info.
 		 */
 		if ((rnh = rt_tables[dst->sa_family]) == NULL)
 			goto bad;
 		RADIX_NODE_HEAD_LOCK(rnh);
 		error = ((rn = rnh->rnh_lookup(dst, netmask, rnh)) == NULL ||
 		    (rn->rn_flags & RNF_ROOT) ||
 		    ((struct rtentry *)rn)->rt_ifa != ifa ||
 		    !sa_equal(SA(rn->rn_key), dst));
 		RADIX_NODE_HEAD_UNLOCK(rnh);
 		if (error) {
 bad:
 			if (m)
 				(void) m_free(m);
 			return (flags & RTF_HOST ? EHOSTUNREACH : ENETUNREACH);
 		}
 	}
 	/*
 	 * Do the actual request
 	 */
 	bzero((caddr_t)&info, sizeof(info));
 	info.rti_ifa = ifa;
 	info.rti_flags = flags | ifa->ifa_flags;
 	info.rti_info[RTAX_DST] = dst;
 	info.rti_info[RTAX_GATEWAY] = ifa->ifa_addr;
 	info.rti_info[RTAX_NETMASK] = netmask;
 	error = rtrequest1(cmd, &info, &nrt);
 	if (error == 0 && (rt = nrt) != NULL) {
 		/*
 		 * notify any listening routing agents of the change
 		 */
+		RT_LOCK(rt);
 		rt_newaddrmsg(cmd, ifa, error, rt);
 		if (cmd == RTM_DELETE) {
 			/*
 			 * If we are deleting, and we found an entry, then
 			 * it's been removed from the tree.. now throw it away.
 			 */
-			RTFREE(rt);
-		} else if (cmd == RTM_ADD) {
-			/*
-			 * We just wanted to add it.. we don't actually
-			 * need a reference.
-			 */
-			rt->rt_refcnt--;
+			RTFREE_LOCKED(rt);
+		} else {
+			if (cmd == RTM_ADD) {
+				/*
+				 * We just wanted to add it.. we don't actually
+				 * need a reference.
+				 */
+				rt->rt_refcnt--;
+			}
+			RT_UNLOCK(rt);
 		}
 	}
 	if (m)
 		(void) m_free(m);
 	return (error);
 }
 
+/*
+ * Validate the route rt0 to the specified destination.  If the
+ * route is marked down try to find a new route.  If the route
+ * to the gateway is gone, try to setup a new route.  Otherwise,
+ * if the route is marked for packets to be rejected, enforce that.
+ *
+ * On return lrt contains the route to the destination and lrt0
+ * contains the route to the next hop.  Their values are meaningul
+ * ONLY if no error is returned.
+ *
+ * This routine is invoked on each layer 2 output path, prior to
+ * encapsulating outbound packets.
+ */
 int
-rt_check(lrt, lrt0, dst)
-	struct rtentry **lrt;
-	struct rtentry **lrt0;
-	struct sockaddr *dst;
+rt_check(struct rtentry **lrt, struct rtentry **lrt0, struct sockaddr *dst)
 {
+#define senderr(x) { error = x ; goto bad; }
 	struct rtentry *rt;
 	struct rtentry *rt0;
 	int error;
 
-	rt = *lrt;
 	rt0 = *lrt0;
-	error = 0;
-
 	rt = rt0;
-
-	if (rt != NULL) {
+	if (rt) {
+		/* NB: the locking here is tortuous... */
+		RT_LOCK(rt);
 		if ((rt->rt_flags & RTF_UP) == 0) {
-			rt0 = rt = rtalloc1(dst, 1, 0UL);
-			if (rt0 != NULL)
+			RT_UNLOCK(rt);
+			rt = rtalloc1(dst, 1, 0UL);
+			if (rt != NULL) {
 				rt->rt_refcnt--;
-			else
+				RT_UNLOCK(rt);
+			} else
 				senderr(EHOSTUNREACH);
+			rt0 = rt;
 		}
+		/* XXX BSD/OS checks dst->sa_family != AF_NS */
 		if (rt->rt_flags & RTF_GATEWAY) {
-			if (rt->rt_gwroute == NULL)
+			if (rt->rt_gwroute == 0)
 				goto lookup;
-
 			rt = rt->rt_gwroute;
+			RT_LOCK(rt);		/* NB: gwroute */
 			if ((rt->rt_flags & RTF_UP) == 0) {
-				rtfree(rt);
+				rtfree(rt);	/* unlock gwroute */
 				rt = rt0;
 			lookup:
-				rt->rt_gwroute = rtalloc1(rt->rt_gateway, 1, 0UL);
-				rt = rt->rt_gwroute;
-				if (rt == NULL)
+				RT_UNLOCK(rt0);
+				rt = rtalloc1(rt->rt_gateway, 1, 0UL);
+				RT_LOCK(rt0);
+				rt0->rt_gwroute = rt;
+				if (rt == 0) {
+					RT_UNLOCK(rt0);
 					senderr(EHOSTUNREACH);
+				}
 			}
+			RT_UNLOCK(rt0);
 		}
-		if (rt->rt_flags & RTF_REJECT)
-			if (rt->rt_rmx.rmx_expire == 0 ||
-				time_second < rt->rt_rmx.rmx_expire)
-				senderr(rt == rt0 ? EHOSTDOWN : EHOSTUNREACH);
+		/* XXX why are we inspecting rmx_expire? */
+		error = (rt->rt_flags & RTF_REJECT) &&
+			(rt->rt_rmx.rmx_expire == 0 ||
+				time_second < rt->rt_rmx.rmx_expire);
+		RT_UNLOCK(rt);
+		if (error)
+			senderr(rt == rt0 ? EHOSTDOWN : EHOSTUNREACH);
 	}
-
-bad:
-	*lrt = rt;
+	*lrt = rt;		/* NB: return unlocked */
 	*lrt0 = rt0;
+	return (0);
+bad:
+	/* NB: lrt and lrt0 should not be interpreted if error is non-zero */
 	return (error);
+#undef senderr
 }
 
 /* This must be before ip6_init2(), which is now SI_ORDER_MIDDLE */
 SYSINIT(route, SI_SUB_PROTO_DOMAIN, SI_ORDER_THIRD, route_init, 0);
Index: head/sys/net/route.h
===================================================================
--- head/sys/net/route.h	(revision 120726)
+++ head/sys/net/route.h	(revision 120727)
@@ -1,300 +1,311 @@
 /*
  * Copyright (c) 1980, 1986, 1993
  *	The Regents of the University of California.  All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 3. All advertising materials mentioning features or use of this software
  *    must display the following acknowledgement:
  *	This product includes software developed by the University of
  *	California, Berkeley and its contributors.
  * 4. Neither the name of the University nor the names of its contributors
  *    may be used to endorse or promote products derived from this software
  *    without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  *	@(#)route.h	8.4 (Berkeley) 1/9/95
  * $FreeBSD$
  */
 
 #ifndef _NET_ROUTE_H_
 #define _NET_ROUTE_H_
 
 /*
  * Kernel resident routing tables.
  *
  * The routing tables are initialized when interface addresses
  * are set by making entries for all directly connected interfaces.
  */
 
 /*
  * A route consists of a destination address and a reference
  * to a routing entry.  These are often held by protocols
  * in their control blocks, e.g. inpcb.
  */
 struct route {
 	struct	rtentry *ro_rt;
 	struct	sockaddr ro_dst;
 };
 
 /*
  * These numbers are used by reliable protocols for determining
  * retransmission behavior and are included in the routing structure.
  */
 struct rt_metrics {
 	u_long	rmx_locks;	/* Kernel must leave these values alone */
 	u_long	rmx_mtu;	/* MTU for this path */
 	u_long	rmx_hopcount;	/* max hops expected */
 	u_long	rmx_expire;	/* lifetime for route, e.g. redirect */
 	u_long	rmx_recvpipe;	/* inbound delay-bandwidth product */
 	u_long	rmx_sendpipe;	/* outbound delay-bandwidth product */
 	u_long	rmx_ssthresh;	/* outbound gateway buffer limit */
 	u_long	rmx_rtt;	/* estimated round trip time */
 	u_long	rmx_rttvar;	/* estimated rtt variance */
 	u_long	rmx_pksent;	/* packets sent using this route */
 	u_long	rmx_filler[4];	/* will be used for T/TCP later */
 };
 
 /*
  * rmx_rtt and rmx_rttvar are stored as microseconds;
  * RTTTOPRHZ(rtt) converts to a value suitable for use
  * by a protocol slowtimo counter.
  */
 #define	RTM_RTTUNIT	1000000	/* units for rtt, rttvar, as units per sec */
 #define	RTTTOPRHZ(r)	((r) / (RTM_RTTUNIT / PR_SLOWHZ))
 
 /*
  * XXX kernel function pointer `rt_output' is visible to applications.
  */
 struct mbuf;
 
 /*
  * We distinguish between routes to hosts and routes to networks,
  * preferring the former if available.  For each route we infer
  * the interface to use from the gateway address supplied when
  * the route was entered.  Routes that forward packets through
  * gateways are marked so that the output routines know to address the
  * gateway rather than the ultimate destination.
  */
 #ifndef RNF_NORMAL
 #include <net/radix.h>
 #endif
 struct rtentry {
 	struct	radix_node rt_nodes[2];	/* tree glue, and other values */
 #define	rt_key(r)	((struct sockaddr *)((r)->rt_nodes->rn_key))
 #define	rt_mask(r)	((struct sockaddr *)((r)->rt_nodes->rn_mask))
 	struct	sockaddr *rt_gateway;	/* value */
 	long	rt_refcnt;		/* # held references */
 	u_long	rt_flags;		/* up/down?, host/net */
 	struct	ifnet *rt_ifp;		/* the answer: interface to use */
 	struct	ifaddr *rt_ifa;		/* the answer: interface to use */
 	struct	sockaddr *rt_genmask;	/* for generation of cloned routes */
 	caddr_t	rt_llinfo;		/* pointer to link level info cache */
 	struct	rt_metrics rt_rmx;	/* metrics used by rx'ing protocols */
 	struct	rtentry *rt_gwroute;	/* implied entry for gatewayed routes */
 	int	(*rt_output)(struct ifnet *, struct mbuf *, struct sockaddr *,
 		    struct rtentry *);
 					/* output routine for this (rt,if) */
 	struct	rtentry *rt_parent; 	/* cloning parent of this route */
-	struct	mtx *rt_mtx;		/* mutex for routing entry */
+#ifdef _KERNEL
+	/* XXX ugly, user apps use this definition but don't have a mtx def */
+	struct	mtx rt_mtx;		/* mutex for routing entry */
+#endif
 };
 
 /*
  * Following structure necessary for 4.3 compatibility;
  * We should eventually move it to a compat file.
  */
 struct ortentry {
 	u_long	rt_hash;		/* to speed lookups */
 	struct	sockaddr rt_dst;	/* key */
 	struct	sockaddr rt_gateway;	/* value */
 	short	rt_flags;		/* up/down?, host/net */
 	short	rt_refcnt;		/* # held references */
 	u_long	rt_use;			/* raw # packets forwarded */
 	struct	ifnet *rt_ifp;		/* the answer: interface to use */
 };
 
 #define rt_use rt_rmx.rmx_pksent
 
 #define	RTF_UP		0x1		/* route usable */
 #define	RTF_GATEWAY	0x2		/* destination is a gateway */
 #define	RTF_HOST	0x4		/* host entry (net otherwise) */
 #define	RTF_REJECT	0x8		/* host or net unreachable */
 #define	RTF_DYNAMIC	0x10		/* created dynamically (by redirect) */
 #define	RTF_MODIFIED	0x20		/* modified dynamically (by redirect) */
 #define RTF_DONE	0x40		/* message confirmed */
 /*			0x80		   unused, was RTF_DELCLONE */
 #define RTF_CLONING	0x100		/* generate new routes on use */
 #define RTF_XRESOLVE	0x200		/* external daemon resolves name */
 #define RTF_LLINFO	0x400		/* generated by link layer (e.g. ARP) */
 #define RTF_STATIC	0x800		/* manually added */
 #define RTF_BLACKHOLE	0x1000		/* just discard pkts (during updates) */
 #define RTF_PROTO2	0x4000		/* protocol specific routing flag */
 #define RTF_PROTO1	0x8000		/* protocol specific routing flag */
 
 #define RTF_PRCLONING	0x10000		/* protocol requires cloning */
 #define RTF_WASCLONED	0x20000		/* route generated through cloning */
 #define RTF_PROTO3	0x40000		/* protocol specific routing flag */
 /*			0x80000		   unused */
 #define RTF_PINNED	0x100000	/* future use */
 #define	RTF_LOCAL	0x200000 	/* route represents a local address */
 #define	RTF_BROADCAST	0x400000	/* route represents a bcast address */
 #define	RTF_MULTICAST	0x800000	/* route represents a mcast address */
 					/* 0x1000000 and up unassigned */
 
 /*
  * Routing statistics.
  */
 struct	rtstat {
 	short	rts_badredirect;	/* bogus redirect calls */
 	short	rts_dynamic;		/* routes created by redirects */
 	short	rts_newgateway;		/* routes modified by redirects */
 	short	rts_unreach;		/* lookups which failed */
 	short	rts_wildcard;		/* lookups satisfied by a wildcard */
 };
 /*
  * Structures for routing messages.
  */
 struct rt_msghdr {
 	u_short	rtm_msglen;	/* to skip over non-understood messages */
 	u_char	rtm_version;	/* future binary compatibility */
 	u_char	rtm_type;	/* message type */
 	u_short	rtm_index;	/* index for associated ifp */
 	int	rtm_flags;	/* flags, incl. kern & message, e.g. DONE */
 	int	rtm_addrs;	/* bitmask identifying sockaddrs in msg */
 	pid_t	rtm_pid;	/* identify sender */
 	int	rtm_seq;	/* for sender to identify action */
 	int	rtm_errno;	/* why failed */
 	int	rtm_use;	/* from rtentry */
 	u_long	rtm_inits;	/* which metrics we are initializing */
 	struct	rt_metrics rtm_rmx; /* metrics themselves */
 };
 
 #define RTM_VERSION	5	/* Up the ante and ignore older versions */
 
 /*
  * Message types.
  */
 #define RTM_ADD		0x1	/* Add Route */
 #define RTM_DELETE	0x2	/* Delete Route */
 #define RTM_CHANGE	0x3	/* Change Metrics or flags */
 #define RTM_GET		0x4	/* Report Metrics */
 #define RTM_LOSING	0x5	/* Kernel Suspects Partitioning */
 #define RTM_REDIRECT	0x6	/* Told to use different route */
 #define RTM_MISS	0x7	/* Lookup failed on this address */
 #define RTM_LOCK	0x8	/* fix specified metrics */
 #define RTM_OLDADD	0x9	/* caused by SIOCADDRT */
 #define RTM_OLDDEL	0xa	/* caused by SIOCDELRT */
 #define RTM_RESOLVE	0xb	/* req to resolve dst to LL addr */
 #define RTM_NEWADDR	0xc	/* address being added to iface */
 #define RTM_DELADDR	0xd	/* address being removed from iface */
 #define RTM_IFINFO	0xe	/* iface going up/down etc. */
 #define	RTM_NEWMADDR	0xf	/* mcast group membership being added to if */
 #define	RTM_DELMADDR	0x10	/* mcast group membership being deleted */
 #define	RTM_IFANNOUNCE	0x11	/* iface arrival/departure */
 
 /*
  * Bitmask values for rtm_inits and rmx_locks.
  */
 #define RTV_MTU		0x1	/* init or lock _mtu */
 #define RTV_HOPCOUNT	0x2	/* init or lock _hopcount */
 #define RTV_EXPIRE	0x4	/* init or lock _expire */
 #define RTV_RPIPE	0x8	/* init or lock _recvpipe */
 #define RTV_SPIPE	0x10	/* init or lock _sendpipe */
 #define RTV_SSTHRESH	0x20	/* init or lock _ssthresh */
 #define RTV_RTT		0x40	/* init or lock _rtt */
 #define RTV_RTTVAR	0x80	/* init or lock _rttvar */
 
 /*
  * Bitmask values for rtm_addrs.
  */
 #define RTA_DST		0x1	/* destination sockaddr present */
 #define RTA_GATEWAY	0x2	/* gateway sockaddr present */
 #define RTA_NETMASK	0x4	/* netmask sockaddr present */
 #define RTA_GENMASK	0x8	/* cloning mask sockaddr present */
 #define RTA_IFP		0x10	/* interface name sockaddr present */
 #define RTA_IFA		0x20	/* interface addr sockaddr present */
 #define RTA_AUTHOR	0x40	/* sockaddr for author of redirect */
 #define RTA_BRD		0x80	/* for NEWADDR, broadcast or p-p dest addr */
 
 /*
  * Index offsets for sockaddr array for alternate internal encoding.
  */
 #define RTAX_DST	0	/* destination sockaddr present */
 #define RTAX_GATEWAY	1	/* gateway sockaddr present */
 #define RTAX_NETMASK	2	/* netmask sockaddr present */
 #define RTAX_GENMASK	3	/* cloning mask sockaddr present */
 #define RTAX_IFP	4	/* interface name sockaddr present */
 #define RTAX_IFA	5	/* interface addr sockaddr present */
 #define RTAX_AUTHOR	6	/* sockaddr for author of redirect */
 #define RTAX_BRD	7	/* for NEWADDR, broadcast or p-p dest addr */
 #define RTAX_MAX	8	/* size of array to allocate */
 
 struct rt_addrinfo {
 	int	rti_addrs;
 	struct	sockaddr *rti_info[RTAX_MAX];
 	int	rti_flags;
 	struct	ifaddr *rti_ifa;
 	struct	ifnet *rti_ifp;
 };
 
 #ifdef _KERNEL
 
-#define	RT_LOCK_INIT(rt) \
-    mtx_init((rt)->rt_mtx, "rtentry", NULL, MTX_DEF | MTX_DUPOK)
-#define	RT_LOCK(rt)		mtx_lock((rt)->rt_mtx)
-#define	RT_UNLOCK(rt)		mtx_unlock((rt)->rt_mtx)
-#define	RT_LOCK_DESTROY(rt)	mtx_destroy((rt)->rt_mtx)
+#define	RT_LOCK_INIT(_rt) \
+	mtx_init(&(_rt)->rt_mtx, "rtentry", NULL, MTX_DEF | MTX_DUPOK)
+#define	RT_LOCK(_rt)		mtx_lock(&(_rt)->rt_mtx)
+#define	RT_UNLOCK(_rt)		mtx_unlock(&(_rt)->rt_mtx)
+#define	RT_LOCK_DESTROY(_rt)	mtx_destroy(&(_rt)->rt_mtx)
+#define	RT_LOCK_ASSERT(_rt)	mtx_assert(&(_rt)->rt_mtx, MA_OWNED)
 
-#define	RTFREE(rt) \
-	do { \
-		if ((rt)->rt_refcnt <= 1) \
-			rtfree(rt); \
-		else \
-			(rt)->rt_refcnt--; \
+#define	RTFREE_LOCKED(_rt) do {				\
+		if ((_rt)->rt_refcnt <= 1)		\
+			rtfree(_rt);			\
+		else {					\
+			(_rt)->rt_refcnt--;		\
+			RT_UNLOCK(_rt);			\
+		}					\
+		/* guard against invalid refs */	\
+		_rt = 0;				\
 	} while (0)
+#define	RTFREE(_rt) do {				\
+		RT_LOCK(_rt);				\
+		RTFREE_LOCKED(_rt);			\
+	} while (0)
 
 extern struct radix_node_head *rt_tables[AF_MAX+1];
 
 struct ifmultiaddr;
 
 void	 route_init(void);
 int	 rt_getifa(struct rt_addrinfo *);
 void	 rt_ifannouncemsg(struct ifnet *, int);
 void	 rt_ifmsg(struct ifnet *);
 void	 rt_missmsg(int, struct rt_addrinfo *, int, int);
 void	 rt_newaddrmsg(int, struct ifaddr *, int, struct rtentry *);
 void	 rt_newmaddrmsg(int, struct ifmultiaddr *);
-int	 rt_setgate(struct rtentry *, struct sockaddr *, struct sockaddr *);
 void	 rtalloc(struct route *);
+int	 rt_setgate(struct rtentry *, struct sockaddr *, struct sockaddr *);
 void	 rtalloc_ign(struct route *, u_long);
-struct rtentry *
-	 rtalloc1(struct sockaddr *, int, u_long);
+/* NB: the rtentry is returned locked */
+struct rtentry *rtalloc1(struct sockaddr *, int, u_long);
 void	 rtfree(struct rtentry *);
 int	 rtinit(struct ifaddr *, int, int);
 int	 rtioctl(u_long, caddr_t);
 void	 rtredirect(struct sockaddr *, struct sockaddr *,
-	    struct sockaddr *, int, struct sockaddr *, struct rtentry **);
+	    struct sockaddr *, int, struct sockaddr *);
 int	 rtrequest(int, struct sockaddr *,
 	    struct sockaddr *, struct sockaddr *, int, struct rtentry **);
 int	 rtrequest1(int, struct rt_addrinfo *, struct rtentry **);
 int	 rt_check(struct rtentry **, struct rtentry **, struct sockaddr *);
 #endif
 
 #endif
Index: head/sys/net/rtsock.c
===================================================================
--- head/sys/net/rtsock.c	(revision 120726)
+++ head/sys/net/rtsock.c	(revision 120727)
@@ -1,1089 +1,1098 @@
 /*
  * Copyright (c) 1988, 1991, 1993
  *	The Regents of the University of California.  All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 3. All advertising materials mentioning features or use of this software
  *    must display the following acknowledgement:
  *	This product includes software developed by the University of
  *	California, Berkeley and its contributors.
  * 4. Neither the name of the University nor the names of its contributors
  *    may be used to endorse or promote products derived from this software
  *    without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  *	@(#)rtsock.c	8.7 (Berkeley) 10/12/95
  * $FreeBSD$
  */
 
 #include <sys/param.h>
 #include <sys/domain.h>
 #include <sys/kernel.h>
 #include <sys/jail.h>
 #include <sys/malloc.h>
 #include <sys/mbuf.h>
 #include <sys/proc.h>
 #include <sys/protosw.h>
 #include <sys/signalvar.h>
 #include <sys/socket.h>
 #include <sys/socketvar.h>
 #include <sys/sysctl.h>
 #include <sys/systm.h>
 
 #include <net/if.h>
 #include <net/raw_cb.h>
 #include <net/route.h>
 
 MALLOC_DEFINE(M_RTABLE, "routetbl", "routing tables");
 
 /* NB: these are not modified */
 static struct	sockaddr route_dst = { 2, PF_ROUTE, };
 static struct	sockaddr route_src = { 2, PF_ROUTE, };
 static struct	sockaddr sa_zero   = { sizeof(sa_zero), AF_INET, };
 
 static struct {
 	int	ip_count;	/* attacked w/ AF_INET */
 	int	ip6_count;	/* attached w/ AF_INET6 */
 	int	ipx_count;	/* attached w/ AF_IPX */
 	int	any_count;	/* total attached */
 } route_cb;
 
 struct mtx rtsock_mtx;
 MTX_SYSINIT(rtsock, &rtsock_mtx, "rtsock route_cb lock", MTX_DEF);
 
 #define	RTSOCK_LOCK()	mtx_lock(&rtsock_mtx)
 #define	RTSOCK_UNLOCK()	mtx_unlock(&rtsock_mtx)
 #define	RTSOCK_LOCK_ASSERT()	mtx_assert(&rtsock_mtx, MA_OWNED)
 
 struct walkarg {
 	int	w_tmemsize;
 	int	w_op, w_arg;
 	caddr_t	w_tmem;
 	struct sysctl_req *w_req;
 };
 
 static struct mbuf *rt_msg1(int, struct rt_addrinfo *);
 static int	rt_msg2(int, struct rt_addrinfo *, caddr_t, struct walkarg *);
 static int	rt_xaddrs(caddr_t, caddr_t, struct rt_addrinfo *);
 static int	sysctl_dumpentry(struct radix_node *rn, void *vw);
 static int	sysctl_iflist(int af, struct walkarg *w);
 static int	route_output(struct mbuf *, struct socket *);
 static void	rt_setmetrics(u_long, struct rt_metrics *, struct rt_metrics *);
 static void	rt_dispatch(struct mbuf *, struct sockaddr *);
 
 /*
  * It really doesn't make any sense at all for this code to share much
  * with raw_usrreq.c, since its functionality is so restricted.  XXX
  */
 static int
 rts_abort(struct socket *so)
 {
 	int s, error;
 	s = splnet();
 	error = raw_usrreqs.pru_abort(so);
 	splx(s);
 	return error;
 }
 
 /* pru_accept is EOPNOTSUPP */
 
 static int
 rts_attach(struct socket *so, int proto, struct thread *td)
 {
 	struct rawcb *rp;
 	int s, error;
 
 	if (sotorawcb(so) != 0)
 		return EISCONN;	/* XXX panic? */
 	/* XXX */
 	MALLOC(rp, struct rawcb *, sizeof *rp, M_PCB, M_WAITOK | M_ZERO);
 	if (rp == 0)
 		return ENOBUFS;
 
 	/*
 	 * The splnet() is necessary to block protocols from sending
 	 * error notifications (like RTM_REDIRECT or RTM_LOSING) while
 	 * this PCB is extant but incompletely initialized.
 	 * Probably we should try to do more of this work beforehand and
 	 * eliminate the spl.
 	 */
 	s = splnet();
 	so->so_pcb = (caddr_t)rp;
 	error = raw_attach(so, proto);
 	rp = sotorawcb(so);
 	if (error) {
 		splx(s);
 		so->so_pcb = NULL;
 		free(rp, M_PCB);
 		return error;
 	}
 	RTSOCK_LOCK();
 	switch(rp->rcb_proto.sp_protocol) {
 	case AF_INET:
 		route_cb.ip_count++;
 		break;
 	case AF_INET6:
 		route_cb.ip6_count++;
 		break;
 	case AF_IPX:
 		route_cb.ipx_count++;
 		break;
 	}
 	rp->rcb_faddr = &route_src;
 	route_cb.any_count++;
 	RTSOCK_UNLOCK();
 	soisconnected(so);
 	so->so_options |= SO_USELOOPBACK;
 	splx(s);
 	return 0;
 }
 
 static int
 rts_bind(struct socket *so, struct sockaddr *nam, struct thread *td)
 {
 	int s, error;
 	s = splnet();
 	error = raw_usrreqs.pru_bind(so, nam, td); /* xxx just EINVAL */
 	splx(s);
 	return error;
 }
 
 static int
 rts_connect(struct socket *so, struct sockaddr *nam, struct thread *td)
 {
 	int s, error;
 	s = splnet();
 	error = raw_usrreqs.pru_connect(so, nam, td); /* XXX just EINVAL */
 	splx(s);
 	return error;
 }
 
 /* pru_connect2 is EOPNOTSUPP */
 /* pru_control is EOPNOTSUPP */
 
 static int
 rts_detach(struct socket *so)
 {
 	struct rawcb *rp = sotorawcb(so);
 	int s, error;
 
 	s = splnet();
 	if (rp != 0) {
 		RTSOCK_LOCK();
 		switch(rp->rcb_proto.sp_protocol) {
 		case AF_INET:
 			route_cb.ip_count--;
 			break;
 		case AF_INET6:
 			route_cb.ip6_count--;
 			break;
 		case AF_IPX:
 			route_cb.ipx_count--;
 			break;
 		}
 		route_cb.any_count--;
 		RTSOCK_UNLOCK();
 	}
 	error = raw_usrreqs.pru_detach(so);
 	splx(s);
 	return error;
 }
 
 static int
 rts_disconnect(struct socket *so)
 {
 	int s, error;
 	s = splnet();
 	error = raw_usrreqs.pru_disconnect(so);
 	splx(s);
 	return error;
 }
 
 /* pru_listen is EOPNOTSUPP */
 
 static int
 rts_peeraddr(struct socket *so, struct sockaddr **nam)
 {
 	int s, error;
 	s = splnet();
 	error = raw_usrreqs.pru_peeraddr(so, nam);
 	splx(s);
 	return error;
 }
 
 /* pru_rcvd is EOPNOTSUPP */
 /* pru_rcvoob is EOPNOTSUPP */
 
 static int
 rts_send(struct socket *so, int flags, struct mbuf *m, struct sockaddr *nam,
 	 struct mbuf *control, struct thread *td)
 {
 	int s, error;
 	s = splnet();
 	error = raw_usrreqs.pru_send(so, flags, m, nam, control, td);
 	splx(s);
 	return error;
 }
 
 /* pru_sense is null */
 
 static int
 rts_shutdown(struct socket *so)
 {
 	int s, error;
 	s = splnet();
 	error = raw_usrreqs.pru_shutdown(so);
 	splx(s);
 	return error;
 }
 
 static int
 rts_sockaddr(struct socket *so, struct sockaddr **nam)
 {
 	int s, error;
 	s = splnet();
 	error = raw_usrreqs.pru_sockaddr(so, nam);
 	splx(s);
 	return error;
 }
 
 static struct pr_usrreqs route_usrreqs = {
 	rts_abort, pru_accept_notsupp, rts_attach, rts_bind, rts_connect,
 	pru_connect2_notsupp, pru_control_notsupp, rts_detach, rts_disconnect,
 	pru_listen_notsupp, rts_peeraddr, pru_rcvd_notsupp, pru_rcvoob_notsupp,
 	rts_send, pru_sense_null, rts_shutdown, rts_sockaddr,
 	sosend, soreceive, sopoll
 };
 
 /*ARGSUSED*/
 static int
 route_output(m, so)
 	register struct mbuf *m;
 	struct socket *so;
 {
 #define	sa_equal(a1, a2) (bcmp((a1), (a2), (a1)->sa_len) == 0)
 	register struct rt_msghdr *rtm = 0;
 	register struct rtentry *rt = 0;
 	struct radix_node_head *rnh;
 	struct rt_addrinfo info;
 	int len, error = 0;
 	struct ifnet *ifp = 0;
 	struct ifaddr *ifa = 0;
 
 #define senderr(e) { error = e; goto flush;}
 	if (m == 0 || ((m->m_len < sizeof(long)) &&
 		       (m = m_pullup(m, sizeof(long))) == 0))
 		return (ENOBUFS);
 	if ((m->m_flags & M_PKTHDR) == 0)
 		panic("route_output");
 	len = m->m_pkthdr.len;
 	if (len < sizeof(*rtm) ||
 	    len != mtod(m, struct rt_msghdr *)->rtm_msglen) {
 		info.rti_info[RTAX_DST] = 0;
 		senderr(EINVAL);
 	}
 	R_Malloc(rtm, struct rt_msghdr *, len);
 	if (rtm == 0) {
 		info.rti_info[RTAX_DST] = 0;
 		senderr(ENOBUFS);
 	}
 	m_copydata(m, 0, len, (caddr_t)rtm);
 	if (rtm->rtm_version != RTM_VERSION) {
 		info.rti_info[RTAX_DST] = 0;
 		senderr(EPROTONOSUPPORT);
 	}
 	rtm->rtm_pid = curproc->p_pid;
 	bzero(&info, sizeof(info));
 	info.rti_addrs = rtm->rtm_addrs;
 	if (rt_xaddrs((caddr_t)(rtm + 1), len + (caddr_t)rtm, &info)) {
 		info.rti_info[RTAX_DST] = 0;
 		senderr(EINVAL);
 	}
 	info.rti_flags = rtm->rtm_flags;
 	if (info.rti_info[RTAX_DST] == 0 ||
 	    info.rti_info[RTAX_DST]->sa_family >= AF_MAX ||
 	    (info.rti_info[RTAX_GATEWAY] != 0 &&
 	     info.rti_info[RTAX_GATEWAY]->sa_family >= AF_MAX))
 		senderr(EINVAL);
 	if (info.rti_info[RTAX_GENMASK]) {
 		struct radix_node *t;
 		t = rn_addmask((caddr_t) info.rti_info[RTAX_GENMASK], 0, 1);
 		if (t && Bcmp((caddr_t *) info.rti_info[RTAX_GENMASK] + 1,
 			      (caddr_t *)t->rn_key + 1,
 			      *(u_char *)t->rn_key - 1) == 0)
 			info.rti_info[RTAX_GENMASK] =
 				(struct sockaddr *)(t->rn_key);
 		else
 			senderr(ENOBUFS);
 	}
 
 	/*
 	 * Verify that the caller has the appropriate privilege; RTM_GET
 	 * is the only operation the non-superuser is allowed.
 	 */
 	if (rtm->rtm_type != RTM_GET && (error = suser(curthread)) != 0)
 		senderr(error);
 
 	switch (rtm->rtm_type) {
 		struct rtentry *saved_nrt;
 
 	case RTM_ADD:
 		if (info.rti_info[RTAX_GATEWAY] == 0)
 			senderr(EINVAL);
 		saved_nrt = 0;
 		error = rtrequest1(RTM_ADD, &info, &saved_nrt);
 		if (error == 0 && saved_nrt) {
+			RT_LOCK(saved_nrt);
 			rt_setmetrics(rtm->rtm_inits,
 				&rtm->rtm_rmx, &saved_nrt->rt_rmx);
 			saved_nrt->rt_rmx.rmx_locks &= ~(rtm->rtm_inits);
 			saved_nrt->rt_rmx.rmx_locks |=
 				(rtm->rtm_inits & rtm->rtm_rmx.rmx_locks);
 			saved_nrt->rt_refcnt--;
 			saved_nrt->rt_genmask = info.rti_info[RTAX_GENMASK];
+			RT_UNLOCK(saved_nrt);
 		}
 		break;
 
 	case RTM_DELETE:
 		saved_nrt = 0;
 		error = rtrequest1(RTM_DELETE, &info, &saved_nrt);
 		if (error == 0) {
+			RT_LOCK(saved_nrt);
 			rt = saved_nrt;
 			goto report;
 		}
 		break;
 
 	case RTM_GET:
 	case RTM_CHANGE:
 	case RTM_LOCK:
 		rnh = rt_tables[info.rti_info[RTAX_DST]->sa_family];
 		if (rnh == 0)
 			senderr(EAFNOSUPPORT);
 		RADIX_NODE_HEAD_LOCK(rnh);
 		rt = (struct rtentry *) rnh->rnh_lookup(info.rti_info[RTAX_DST],
 			info.rti_info[RTAX_NETMASK], rnh);
 		RADIX_NODE_HEAD_UNLOCK(rnh);
 		if (rt == NULL)		/* XXX looks bogus */
 			senderr(ESRCH);
+		RT_LOCK(rt);
 		rt->rt_refcnt++;
 
 		switch(rtm->rtm_type) {
 
 		case RTM_GET:
 		report:
+			RT_LOCK_ASSERT(rt);
 			info.rti_info[RTAX_DST] = rt_key(rt);
 			info.rti_info[RTAX_GATEWAY] = rt->rt_gateway;
 			info.rti_info[RTAX_NETMASK] = rt_mask(rt);
 			info.rti_info[RTAX_GENMASK] = rt->rt_genmask;
 			if (rtm->rtm_addrs & (RTA_IFP | RTA_IFA)) {
 				ifp = rt->rt_ifp;
 				if (ifp) {
 					info.rti_info[RTAX_IFP] = TAILQ_FIRST(&ifp->if_addrhead)->ifa_addr;
 					info.rti_info[RTAX_IFA] =
 						rt->rt_ifa->ifa_addr;
 					if (ifp->if_flags & IFF_POINTOPOINT)
 						 info.rti_info[RTAX_BRD] =
 							rt->rt_ifa->ifa_dstaddr;
 					rtm->rtm_index = ifp->if_index;
 				} else {
 					info.rti_info[RTAX_IFP] = 0;
 					info.rti_info[RTAX_IFA] = 0;
 				}
 			}
 			len = rt_msg2(rtm->rtm_type, &info, (caddr_t)0,
 				(struct walkarg *)0);
 			if (len > rtm->rtm_msglen) {
 				struct rt_msghdr *new_rtm;
 				R_Malloc(new_rtm, struct rt_msghdr *, len);
 				if (new_rtm == 0) {
+					RT_UNLOCK(rt);
 					senderr(ENOBUFS);
 				}
 				Bcopy(rtm, new_rtm, rtm->rtm_msglen);
 				Free(rtm); rtm = new_rtm;
 			}
 			(void)rt_msg2(rtm->rtm_type, &info, (caddr_t)rtm,
 				(struct walkarg *)0);
 			rtm->rtm_flags = rt->rt_flags;
 			rtm->rtm_rmx = rt->rt_rmx;
 			rtm->rtm_addrs = info.rti_addrs;
 			break;
 
 		case RTM_CHANGE:
 			/*
 			 * New gateway could require new ifaddr, ifp;
 			 * flags may also be different; ifp may be specified
 			 * by ll sockaddr when protocol address is ambiguous
 			 */
 			if (((rt->rt_flags & RTF_GATEWAY) &&
 			     info.rti_info[RTAX_GATEWAY] != NULL) ||
 			    info.rti_info[RTAX_IFP] != NULL ||
 			    (info.rti_info[RTAX_IFA] != NULL &&
 			     !sa_equal(info.rti_info[RTAX_IFA],
 				       rt->rt_ifa->ifa_addr))) {
 				if ((error = rt_getifa(&info)) != 0) {
+					RT_UNLOCK(rt);
 					senderr(error);
 				}
 			}
 			if (info.rti_info[RTAX_GATEWAY] != NULL &&
 			    (error = rt_setgate(rt, rt_key(rt),
 					info.rti_info[RTAX_GATEWAY])) != 0) {
+				RT_UNLOCK(rt);
 				senderr(error);
 			}
 			if ((ifa = info.rti_ifa) != NULL) {
 				struct ifaddr *oifa = rt->rt_ifa;
 				if (oifa != ifa) {
 					if (oifa) {
 						if (oifa->ifa_rtrequest)
 							oifa->ifa_rtrequest(
 								RTM_DELETE, rt,
 								&info);
 						IFAFREE(oifa);
 					}
 				        IFAREF(ifa);
 				        rt->rt_ifa = ifa;
 				        rt->rt_ifp = info.rti_ifp;
 				}
 			}
 			rt_setmetrics(rtm->rtm_inits, &rtm->rtm_rmx,
 					&rt->rt_rmx);
 			if (rt->rt_ifa && rt->rt_ifa->ifa_rtrequest)
 			       rt->rt_ifa->ifa_rtrequest(RTM_ADD, rt, &info);
 			if (info.rti_info[RTAX_GENMASK])
 				rt->rt_genmask = info.rti_info[RTAX_GENMASK];
 			/* FALLTHROUGH */
 		case RTM_LOCK:
 			rt->rt_rmx.rmx_locks &= ~(rtm->rtm_inits);
 			rt->rt_rmx.rmx_locks |=
 				(rtm->rtm_inits & rtm->rtm_rmx.rmx_locks);
 			break;
 		}
+		RT_UNLOCK(rt);
 		break;
 
 	default:
 		senderr(EOPNOTSUPP);
 	}
 
 flush:
 	if (rtm) {
 		if (error)
 			rtm->rtm_errno = error;
 		else
 			rtm->rtm_flags |= RTF_DONE;
 	}
 	if (rt)		/* XXX can this be true? */
 		RTFREE(rt);
     {
 	register struct rawcb *rp = 0;
 	/*
 	 * Check to see if we don't want our own messages.
 	 */
 	if ((so->so_options & SO_USELOOPBACK) == 0) {
 		if (route_cb.any_count <= 1) {
 			if (rtm)
 				Free(rtm);
 			m_freem(m);
 			return (error);
 		}
 		/* There is another listener, so construct message */
 		rp = sotorawcb(so);
 	}
 	if (rtm) {
 		m_copyback(m, 0, rtm->rtm_msglen, (caddr_t)rtm);
 		if (m->m_pkthdr.len < rtm->rtm_msglen) {
 			m_freem(m);
 			m = NULL;
 		} else if (m->m_pkthdr.len > rtm->rtm_msglen)
 			m_adj(m, rtm->rtm_msglen - m->m_pkthdr.len);
 		Free(rtm);
 	}
 	if (m) {
 		if (rp) {
 			/*
 			 * XXX insure we don't get a copy by
 			 * invalidating our protocol
 			 */
 			unsigned short family = rp->rcb_proto.sp_family;
 			rp->rcb_proto.sp_family = 0;
 			rt_dispatch(m, info.rti_info[RTAX_DST]);
 			rp->rcb_proto.sp_family = family;
 		} else
 			rt_dispatch(m, info.rti_info[RTAX_DST]);
 	}
     }
 	return (error);
 #undef	sa_equal
 }
 
 static void
 rt_setmetrics(u_long which, struct rt_metrics *in, struct rt_metrics *out)
 {
 #define metric(f, e) if (which & (f)) out->e = in->e;
 	metric(RTV_RPIPE, rmx_recvpipe);
 	metric(RTV_SPIPE, rmx_sendpipe);
 	metric(RTV_SSTHRESH, rmx_ssthresh);
 	metric(RTV_RTT, rmx_rtt);
 	metric(RTV_RTTVAR, rmx_rttvar);
 	metric(RTV_HOPCOUNT, rmx_hopcount);
 	metric(RTV_MTU, rmx_mtu);
 	metric(RTV_EXPIRE, rmx_expire);
 #undef metric
 }
 
 #define ROUNDUP(a) \
 	((a) > 0 ? (1 + (((a) - 1) | (sizeof(long) - 1))) : sizeof(long))
 
 /*
  * Extract the addresses of the passed sockaddrs.
  * Do a little sanity checking so as to avoid bad memory references.
  * This data is derived straight from userland.
  */
 static int
 rt_xaddrs(caddr_t cp, caddr_t cplim, struct rt_addrinfo *rtinfo)
 {
 #define ADVANCE(x, n) (x += ROUNDUP((n)->sa_len))
 	register struct sockaddr *sa;
 	register int i;
 
 	for (i = 0; i < RTAX_MAX && cp < cplim; i++) {
 		if ((rtinfo->rti_addrs & (1 << i)) == 0)
 			continue;
 		sa = (struct sockaddr *)cp;
 		/*
 		 * It won't fit.
 		 */
 		if (cp + sa->sa_len > cplim)
 			return (EINVAL);
 		/*
 		 * there are no more.. quit now
 		 * If there are more bits, they are in error.
 		 * I've seen this. route(1) can evidently generate these. 
 		 * This causes kernel to core dump.
 		 * for compatibility, If we see this, point to a safe address.
 		 */
 		if (sa->sa_len == 0) {
 			rtinfo->rti_info[i] = &sa_zero;
 			return (0); /* should be EINVAL but for compat */
 		}
 		/* accept it */
 		rtinfo->rti_info[i] = sa;
 		ADVANCE(cp, sa);
 	}
 	return (0);
 #undef ADVANCE
 }
 
 static struct mbuf *
 rt_msg1(int type, struct rt_addrinfo *rtinfo)
 {
 	register struct rt_msghdr *rtm;
 	register struct mbuf *m;
 	register int i;
 	register struct sockaddr *sa;
 	int len, dlen;
 
 	switch (type) {
 
 	case RTM_DELADDR:
 	case RTM_NEWADDR:
 		len = sizeof(struct ifa_msghdr);
 		break;
 
 	case RTM_DELMADDR:
 	case RTM_NEWMADDR:
 		len = sizeof(struct ifma_msghdr);
 		break;
 
 	case RTM_IFINFO:
 		len = sizeof(struct if_msghdr);
 		break;
 
 	case RTM_IFANNOUNCE:
 		len = sizeof(struct if_announcemsghdr);
 		break;
 
 	default:
 		len = sizeof(struct rt_msghdr);
 	}
 	if (len > MCLBYTES)
 		panic("rt_msg1");
 	m = m_gethdr(M_DONTWAIT, MT_DATA);
 	if (m && len > MHLEN) {
 		MCLGET(m, M_DONTWAIT);
 		if ((m->m_flags & M_EXT) == 0) {
 			m_free(m);
 			m = NULL;
 		}
 	}
 	if (m == 0)
 		return (m);
 	m->m_pkthdr.len = m->m_len = len;
 	m->m_pkthdr.rcvif = 0;
 	rtm = mtod(m, struct rt_msghdr *);
 	bzero((caddr_t)rtm, len);
 	for (i = 0; i < RTAX_MAX; i++) {
 		if ((sa = rtinfo->rti_info[i]) == NULL)
 			continue;
 		rtinfo->rti_addrs |= (1 << i);
 		dlen = ROUNDUP(sa->sa_len);
 		m_copyback(m, len, dlen, (caddr_t)sa);
 		len += dlen;
 	}
 	if (m->m_pkthdr.len != len) {
 		m_freem(m);
 		return (NULL);
 	}
 	rtm->rtm_msglen = len;
 	rtm->rtm_version = RTM_VERSION;
 	rtm->rtm_type = type;
 	return (m);
 }
 
 static int
 rt_msg2(int type, struct rt_addrinfo *rtinfo, caddr_t cp, struct walkarg *w)
 {
 	register int i;
 	int len, dlen, second_time = 0;
 	caddr_t cp0;
 
 	rtinfo->rti_addrs = 0;
 again:
 	switch (type) {
 
 	case RTM_DELADDR:
 	case RTM_NEWADDR:
 		len = sizeof(struct ifa_msghdr);
 		break;
 
 	case RTM_IFINFO:
 		len = sizeof(struct if_msghdr);
 		break;
 
 	default:
 		len = sizeof(struct rt_msghdr);
 	}
 	cp0 = cp;
 	if (cp0)
 		cp += len;
 	for (i = 0; i < RTAX_MAX; i++) {
 		register struct sockaddr *sa;
 
 		if ((sa = rtinfo->rti_info[i]) == 0)
 			continue;
 		rtinfo->rti_addrs |= (1 << i);
 		dlen = ROUNDUP(sa->sa_len);
 		if (cp) {
 			bcopy((caddr_t)sa, cp, (unsigned)dlen);
 			cp += dlen;
 		}
 		len += dlen;
 	}
 	len = ALIGN(len);
 	if (cp == 0 && w != NULL && !second_time) {
 		register struct walkarg *rw = w;
 
 		if (rw->w_req) {
 			if (rw->w_tmemsize < len) {
 				if (rw->w_tmem)
 					free(rw->w_tmem, M_RTABLE);
 				rw->w_tmem = (caddr_t)
 					malloc(len, M_RTABLE, M_NOWAIT);
 				if (rw->w_tmem)
 					rw->w_tmemsize = len;
 			}
 			if (rw->w_tmem) {
 				cp = rw->w_tmem;
 				second_time = 1;
 				goto again;
 			}
 		}
 	}
 	if (cp) {
 		register struct rt_msghdr *rtm = (struct rt_msghdr *)cp0;
 
 		rtm->rtm_version = RTM_VERSION;
 		rtm->rtm_type = type;
 		rtm->rtm_msglen = len;
 	}
 	return (len);
 }
 
 /*
  * This routine is called to generate a message from the routing
  * socket indicating that a redirect has occured, a routing lookup
  * has failed, or that a protocol has detected timeouts to a particular
  * destination.
  */
 void
 rt_missmsg(int type, struct rt_addrinfo *rtinfo, int flags, int error)
 {
 	struct rt_msghdr *rtm;
 	struct mbuf *m;
 	struct sockaddr *sa = rtinfo->rti_info[RTAX_DST];
 
 	if (route_cb.any_count == 0)
 		return;
 	m = rt_msg1(type, rtinfo);
 	if (m == 0)
 		return;
 	rtm = mtod(m, struct rt_msghdr *);
 	rtm->rtm_flags = RTF_DONE | flags;
 	rtm->rtm_errno = error;
 	rtm->rtm_addrs = rtinfo->rti_addrs;
 	rt_dispatch(m, sa);
 }
 
 /*
  * This routine is called to generate a message from the routing
  * socket indicating that the status of a network interface has changed.
  */
 void
 rt_ifmsg(struct ifnet *ifp)
 {
 	struct if_msghdr *ifm;
 	struct mbuf *m;
 	struct rt_addrinfo info;
 
 	if (route_cb.any_count == 0)
 		return;
 	bzero((caddr_t)&info, sizeof(info));
 	m = rt_msg1(RTM_IFINFO, &info);
 	if (m == 0)
 		return;
 	ifm = mtod(m, struct if_msghdr *);
 	ifm->ifm_index = ifp->if_index;
 	ifm->ifm_flags = ifp->if_flags;
 	ifm->ifm_data = ifp->if_data;
 	ifm->ifm_addrs = 0;
 	rt_dispatch(m, NULL);
 }
 
 /*
  * This is called to generate messages from the routing socket
  * indicating a network interface has had addresses associated with it.
  * if we ever reverse the logic and replace messages TO the routing
  * socket indicate a request to configure interfaces, then it will
  * be unnecessary as the routing socket will automatically generate
  * copies of it.
  */
 void
 rt_newaddrmsg(int cmd, struct ifaddr *ifa, int error, struct rtentry *rt)
 {
 	struct rt_addrinfo info;
 	struct sockaddr *sa = 0;
 	int pass;
 	struct mbuf *m = 0;
 	struct ifnet *ifp = ifa->ifa_ifp;
 
 	if (route_cb.any_count == 0)
 		return;
 	for (pass = 1; pass < 3; pass++) {
 		bzero((caddr_t)&info, sizeof(info));
 		if ((cmd == RTM_ADD && pass == 1) ||
 		    (cmd == RTM_DELETE && pass == 2)) {
 			register struct ifa_msghdr *ifam;
 			int ncmd = cmd == RTM_ADD ? RTM_NEWADDR : RTM_DELADDR;
 
 			info.rti_info[RTAX_IFA] = sa = ifa->ifa_addr;
 			info.rti_info[RTAX_IFP] = TAILQ_FIRST(&ifp->if_addrhead)->ifa_addr;
 			info.rti_info[RTAX_NETMASK] = ifa->ifa_netmask;
 			info.rti_info[RTAX_BRD] = ifa->ifa_dstaddr;
 			if ((m = rt_msg1(ncmd, &info)) == NULL)
 				continue;
 			ifam = mtod(m, struct ifa_msghdr *);
 			ifam->ifam_index = ifp->if_index;
 			ifam->ifam_metric = ifa->ifa_metric;
 			ifam->ifam_flags = ifa->ifa_flags;
 			ifam->ifam_addrs = info.rti_addrs;
 		}
 		if ((cmd == RTM_ADD && pass == 2) ||
 		    (cmd == RTM_DELETE && pass == 1)) {
 			register struct rt_msghdr *rtm;
 
 			if (rt == 0)
 				continue;
 			info.rti_info[RTAX_NETMASK] = rt_mask(rt);
 			info.rti_info[RTAX_DST] = sa = rt_key(rt);
 			info.rti_info[RTAX_GATEWAY] = rt->rt_gateway;
 			if ((m = rt_msg1(cmd, &info)) == NULL)
 				continue;
 			rtm = mtod(m, struct rt_msghdr *);
 			rtm->rtm_index = ifp->if_index;
 			rtm->rtm_flags |= rt->rt_flags;
 			rtm->rtm_errno = error;
 			rtm->rtm_addrs = info.rti_addrs;
 		}
 		rt_dispatch(m, sa);
 	}
 }
 
 /*
  * This is the analogue to the rt_newaddrmsg which performs the same
  * function but for multicast group memberhips.  This is easier since
  * there is no route state to worry about.
  */
 void
 rt_newmaddrmsg(int cmd, struct ifmultiaddr *ifma)
 {
 	struct rt_addrinfo info;
 	struct mbuf *m = 0;
 	struct ifnet *ifp = ifma->ifma_ifp;
 	struct ifma_msghdr *ifmam;
 
 	if (route_cb.any_count == 0)
 		return;
 
 	bzero((caddr_t)&info, sizeof(info));
 	info.rti_info[RTAX_IFA] = ifma->ifma_addr;
 	if (ifp && TAILQ_FIRST(&ifp->if_addrhead))
 		info.rti_info[RTAX_IFP] =
 			TAILQ_FIRST(&ifp->if_addrhead)->ifa_addr;
 	else
 		info.rti_info[RTAX_IFP] = NULL;
 	/*
 	 * If a link-layer address is present, present it as a ``gateway''
 	 * (similarly to how ARP entries, e.g., are presented).
 	 */
 	info.rti_info[RTAX_GATEWAY] = ifma->ifma_lladdr;
 	m = rt_msg1(cmd, &info);
 	if (m == NULL)
 		return;
 	ifmam = mtod(m, struct ifma_msghdr *);
 	ifmam->ifmam_index = ifp->if_index;
 	ifmam->ifmam_addrs = info.rti_addrs;
 	rt_dispatch(m, ifma->ifma_addr);
 }
 
 /*
  * This is called to generate routing socket messages indicating
  * network interface arrival and departure.
  */
 void
 rt_ifannouncemsg(struct ifnet *ifp, int what)
 {
 	struct if_announcemsghdr *ifan;
 	struct mbuf *m;
 	struct rt_addrinfo info;
 
 	if (route_cb.any_count == 0)
 		return;
 	bzero((caddr_t)&info, sizeof(info));
 	m = rt_msg1(RTM_IFANNOUNCE, &info);
 	if (m == NULL)
 		return;
 	ifan = mtod(m, struct if_announcemsghdr *);
 	ifan->ifan_index = ifp->if_index;
 	snprintf(ifan->ifan_name, sizeof(ifan->ifan_name),
 	    "%s%d", ifp->if_name, ifp->if_unit);
 	ifan->ifan_what = what;
 	rt_dispatch(m, NULL);
  }
 
 static void
 rt_dispatch(struct mbuf *m, struct sockaddr *sa)
 {
 	struct sockproto route_proto;
 
 	route_proto.sp_family = PF_ROUTE;
 	route_proto.sp_protocol = sa ?  sa->sa_family : 0;
 	raw_input(m, &route_proto, &route_src, &route_dst);
 }
 
 /*
  * This is used in dumping the kernel table via sysctl().
  */
 static int
 sysctl_dumpentry(struct radix_node *rn, void *vw)
 {
 	struct walkarg *w = vw;
 	struct rtentry *rt = (struct rtentry *)rn;
 	int error = 0, size;
 	struct rt_addrinfo info;
 
 	if (w->w_op == NET_RT_FLAGS && !(rt->rt_flags & w->w_arg))
 		return 0;
 	bzero((caddr_t)&info, sizeof(info));
 	info.rti_info[RTAX_DST] = rt_key(rt);
 	info.rti_info[RTAX_GATEWAY] = rt->rt_gateway;
 	info.rti_info[RTAX_NETMASK] = rt_mask(rt);
 	info.rti_info[RTAX_GENMASK] = rt->rt_genmask;
 	if (rt->rt_ifp) {
 		info.rti_info[RTAX_IFP] =
 			TAILQ_FIRST(&rt->rt_ifp->if_addrhead)->ifa_addr;
 		info.rti_info[RTAX_IFA] = rt->rt_ifa->ifa_addr;
 		if (rt->rt_ifp->if_flags & IFF_POINTOPOINT)
 			info.rti_info[RTAX_BRD] = rt->rt_ifa->ifa_dstaddr;
 	}
 	size = rt_msg2(RTM_GET, &info, 0, w);
 	if (w->w_req && w->w_tmem) {
 		struct rt_msghdr *rtm = (struct rt_msghdr *)w->w_tmem;
 
 		rtm->rtm_flags = rt->rt_flags;
 		rtm->rtm_use = rt->rt_use;
 		rtm->rtm_rmx = rt->rt_rmx;
 		rtm->rtm_index = rt->rt_ifp->if_index;
 		rtm->rtm_errno = rtm->rtm_pid = rtm->rtm_seq = 0;
 		rtm->rtm_addrs = info.rti_addrs;
 		error = SYSCTL_OUT(w->w_req, (caddr_t)rtm, size);
 		return (error);
 	}
 	return (error);
 }
 
 static int
 sysctl_iflist(int af, struct walkarg *w)
 {
 	struct ifnet *ifp;
 	struct ifaddr *ifa;
 	struct rt_addrinfo info;
 	int len, error = 0;
 
 	bzero((caddr_t)&info, sizeof(info));
 	/* IFNET_RLOCK(); */		/* could sleep XXX */
 	TAILQ_FOREACH(ifp, &ifnet, if_link) {
 		if (w->w_arg && w->w_arg != ifp->if_index)
 			continue;
 		ifa = TAILQ_FIRST(&ifp->if_addrhead);
 		info.rti_info[RTAX_IFP] = ifa->ifa_addr;
 		len = rt_msg2(RTM_IFINFO, &info, (caddr_t)0, w);
 		info.rti_info[RTAX_IFP] = 0;
 		if (w->w_req && w->w_tmem) {
 			struct if_msghdr *ifm;
 
 			ifm = (struct if_msghdr *)w->w_tmem;
 			ifm->ifm_index = ifp->if_index;
 			ifm->ifm_flags = ifp->if_flags;
 			ifm->ifm_data = ifp->if_data;
 			ifm->ifm_addrs = info.rti_addrs;
 			error = SYSCTL_OUT(w->w_req,(caddr_t)ifm, len);
 			if (error)
 				goto done;
 		}
 		while ((ifa = TAILQ_NEXT(ifa, ifa_link)) != 0) {
 			if (af && af != ifa->ifa_addr->sa_family)
 				continue;
 			if (jailed(curthread->td_ucred) &&
 			    prison_if(curthread->td_ucred, ifa->ifa_addr))
 				continue;
 			info.rti_info[RTAX_IFA] = ifa->ifa_addr;
 			info.rti_info[RTAX_NETMASK] = ifa->ifa_netmask;
 			info.rti_info[RTAX_BRD] = ifa->ifa_dstaddr;
 			len = rt_msg2(RTM_NEWADDR, &info, 0, w);
 			if (w->w_req && w->w_tmem) {
 				struct ifa_msghdr *ifam;
 
 				ifam = (struct ifa_msghdr *)w->w_tmem;
 				ifam->ifam_index = ifa->ifa_ifp->if_index;
 				ifam->ifam_flags = ifa->ifa_flags;
 				ifam->ifam_metric = ifa->ifa_metric;
 				ifam->ifam_addrs = info.rti_addrs;
 				error = SYSCTL_OUT(w->w_req, w->w_tmem, len);
 				if (error)
 					goto done;
 			}
 		}
 		info.rti_info[RTAX_IFA] = info.rti_info[RTAX_NETMASK] =
 			info.rti_info[RTAX_BRD] = 0;
 	}
 done:
 	/* IFNET_RUNLOCK(); */ /* XXX */
 	return (error);
 }
 
 static int
 sysctl_rtsock(SYSCTL_HANDLER_ARGS)
 {
 	int	*name = (int *)arg1;
 	u_int	namelen = arg2;
 	struct radix_node_head *rnh;
 	int	i, s, error = EINVAL;
 	u_char  af;
 	struct	walkarg w;
 
 	name ++;
 	namelen--;
 	if (req->newptr)
 		return (EPERM);
 	if (namelen != 3)
 		return ((namelen < 3) ? EISDIR : ENOTDIR);
 	af = name[0];
 	if (af > AF_MAX)
 		return (EINVAL);
 	Bzero(&w, sizeof(w));
 	w.w_op = name[1];
 	w.w_arg = name[2];
 	w.w_req = req;
 
 	s = splnet();
 	switch (w.w_op) {
 
 	case NET_RT_DUMP:
 	case NET_RT_FLAGS:
 		if (af != 0) {
 			if ((rnh = rt_tables[af]) != NULL) {
 				/* RADIX_NODE_HEAD_LOCK(rnh); */
 			    	error = rnh->rnh_walktree(rnh,
 				    sysctl_dumpentry, &w);/* could sleep XXX */
 				/* RADIX_NODE_HEAD_UNLOCK(rnh); */
 			} else
 				error = EAFNOSUPPORT;
 		} else {
 			for (i = 1; i <= AF_MAX; i++)
 				if ((rnh = rt_tables[i]) != NULL) {
 					/* RADIX_NODE_HEAD_LOCK(rnh); */
 					error = rnh->rnh_walktree(rnh,
 					    sysctl_dumpentry, &w);
 					/* RADIX_NODE_HEAD_UNLOCK(rnh); */
 					if (error)
 						break;
 				}
 		}
 		break;
 
 	case NET_RT_IFLIST:
 		error = sysctl_iflist(af, &w);
 	}
 	splx(s);
 	if (w.w_tmem)
 		free(w.w_tmem, M_RTABLE);
 	return (error);
 }
 
 SYSCTL_NODE(_net, PF_ROUTE, routetable, CTLFLAG_RD, sysctl_rtsock, "");
 
 /*
  * Definitions of protocols supported in the ROUTE domain.
  */
 
 extern struct domain routedomain;		/* or at least forward */
 
 static struct protosw routesw[] = {
 { SOCK_RAW,	&routedomain,	0,		PR_ATOMIC|PR_ADDR,
   0,		route_output,	raw_ctlinput,	0,
   0,
   raw_init,	0,		0,		0,
   &route_usrreqs
 }
 };
 
 static struct domain routedomain =
     { PF_ROUTE, "route", 0, 0, 0,
       routesw, &routesw[sizeof(routesw)/sizeof(routesw[0])] };
 
 DOMAIN_SET(route);
Index: head/sys/netinet/if_atm.c
===================================================================
--- head/sys/netinet/if_atm.c	(revision 120726)
+++ head/sys/netinet/if_atm.c	(revision 120727)
@@ -1,358 +1,360 @@
 /*      $NetBSD: if_atm.c,v 1.6 1996/10/13 02:03:01 christos Exp $       */
 
 /*
  *
  * Copyright (c) 1996 Charles D. Cranor and Washington University.
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 3. All advertising materials mentioning features or use of this software
  *    must display the following acknowledgement:
  *      This product includes software developed by Charles D. Cranor and 
  *	Washington University.
  * 4. The name of the author may not be used to endorse or promote products
  *    derived from this software without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  */
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 /*
  * IP <=> ATM address resolution.
  */
 #include "opt_inet.h"
 #include "opt_inet6.h"
 #include "opt_natm.h"
 
 #if defined(INET) || defined(INET6)
 
 #include <sys/param.h>
 #include <sys/systm.h>
 #include <sys/queue.h>
 #include <sys/mbuf.h>
 #include <sys/socket.h>
 #include <sys/sockio.h>
 #include <sys/syslog.h>
 
 #include <net/if.h>
 #include <net/if_dl.h>
 #include <net/route.h>
 #include <net/if_atm.h>
 
 #include <netinet/in.h>
 #include <netinet/if_atm.h>
 
 #ifdef NATM
 #include <netnatm/natm.h>
 #endif
 
 #define SDL(s) ((struct sockaddr_dl *)s)
 
 #define	GET3BYTE(V, A, L)	do {				\
 	(V) = ((A)[0] << 16) | ((A)[1] << 8) | (A)[2];		\
 	(A) += 3;						\
 	(L) -= 3;						\
     } while (0)
 
 #define GET2BYTE(V, A, L)	do {				\
 	(V) = ((A)[0] << 8) | (A)[1];				\
 	(A) += 2;						\
 	(L) -= 2;						\
     } while (0)
 
 #define GET1BYTE(V, A, L)	do {				\
 	(V) = *(A)++;						\
 	(L)--;							\
     } while (0)
 
 
 /*
  * atm_rtrequest: handle ATM rt request (in support of generic code)
  *   inputs: "req" = request code
  *           "rt" = route entry
  *           "info" = rt_addrinfo
  */
 void
 atm_rtrequest(int req, struct rtentry *rt, struct rt_addrinfo *info)
 {
 	struct sockaddr *gate = rt->rt_gateway;
 	struct atmio_openvcc op;
 	struct atmio_closevcc cl;
 	u_char *addr;
 	u_int alen;
 #ifdef NATM
 	struct sockaddr_in *sin;
 	struct natmpcb *npcb = NULL;
 #endif
 	static struct sockaddr_dl null_sdl = {sizeof(null_sdl), AF_LINK};
 
 	if (rt->rt_flags & RTF_GATEWAY)   /* link level requests only */
 		return;
 
 	switch (req) {
 
 	case RTM_RESOLVE: /* resolve: only happens when cloning */
 		printf("atm_rtrequest: RTM_RESOLVE request detected?\n");
 		break;
 
 	case RTM_ADD:
 		/*
 		 * route added by a command (e.g. ifconfig, route, arp...).
 		 *
 		 * first check to see if this is not a host route, in which
 		 * case we are being called via "ifconfig" to set the address.
 		 */
 		if ((rt->rt_flags & RTF_HOST) == 0) { 
 			rt_setgate(rt,rt_key(rt),(struct sockaddr *)&null_sdl);
 			gate = rt->rt_gateway;
 			SDL(gate)->sdl_type = rt->rt_ifp->if_type;
 			SDL(gate)->sdl_index = rt->rt_ifp->if_index;
 			break;
 		}
 
 		if ((rt->rt_flags & RTF_CLONING) != 0) {
 			printf("atm_rtrequest: cloning route detected?\n");
 			break;
 		}
 		if (gate->sa_family != AF_LINK ||
 		    gate->sa_len < sizeof(null_sdl)) {
 			log(LOG_DEBUG, "atm_rtrequest: bad gateway value");
 			break;
 		}
 
 		KASSERT(rt->rt_ifp->if_ioctl != NULL,
 		    ("atm_rtrequest: null ioctl"));
 
 		/*
 		 * Parse and verify the link level address as
 		 * an open request
 		 */
 		bzero(&op, sizeof(op));
 		addr = LLADDR(SDL(gate));
 		alen = SDL(gate)->sdl_alen;
 		if (alen < 4) {
 			printf("%s: bad link-level address\n", __func__);
 			goto failed;
 		}
 
 		if (alen == 4) {
 			/* old type address */
 			GET1BYTE(op.param.flags, addr, alen);
 			GET1BYTE(op.param.vpi, addr, alen);
 			GET2BYTE(op.param.vci, addr, alen);
 			op.param.traffic = ATMIO_TRAFFIC_UBR;
 			op.param.aal = (op.param.flags & ATM_PH_AAL5) ?
 			    ATMIO_AAL_5 : ATMIO_AAL_0;
 		} else {
 			/* new address */
 			op.param.aal = ATMIO_AAL_5;
 
 			GET1BYTE(op.param.flags, addr, alen);
 			op.param.flags &= ATM_PH_LLCSNAP;
 
 			GET1BYTE(op.param.vpi, addr, alen);
 			GET2BYTE(op.param.vci, addr, alen);
 
 			GET1BYTE(op.param.traffic, addr, alen);
 
 			switch (op.param.traffic) {
 
 			  case ATMIO_TRAFFIC_UBR:
 				if (alen >= 3)
 					GET3BYTE(op.param.tparam.pcr,
 					    addr, alen);
 				break;
 
 			  case ATMIO_TRAFFIC_CBR:
 				if (alen < 3)
 					goto bad_param;
 				GET3BYTE(op.param.tparam.pcr, addr, alen);
 				break;
 
 			  case ATMIO_TRAFFIC_VBR:
 				if (alen < 3 * 3)
 					goto bad_param;
 				GET3BYTE(op.param.tparam.pcr, addr, alen);
 				GET3BYTE(op.param.tparam.scr, addr, alen);
 				GET3BYTE(op.param.tparam.mbs, addr, alen);
 				break;
 
 			  case ATMIO_TRAFFIC_ABR:
 				if (alen < 4 * 3 + 2 + 1 * 2 + 3)
 					goto bad_param;
 				GET3BYTE(op.param.tparam.pcr, addr, alen);
 				GET3BYTE(op.param.tparam.mcr, addr, alen);
 				GET3BYTE(op.param.tparam.icr, addr, alen);
 				GET3BYTE(op.param.tparam.tbe, addr, alen);
 				GET1BYTE(op.param.tparam.nrm, addr, alen);
 				GET1BYTE(op.param.tparam.trm, addr, alen);
 				GET2BYTE(op.param.tparam.adtf, addr, alen);
 				GET1BYTE(op.param.tparam.rif, addr, alen);
 				GET1BYTE(op.param.tparam.rdf, addr, alen);
 				GET1BYTE(op.param.tparam.cdf, addr, alen);
 				break;
 
 			  default:
 			  bad_param:
 				printf("%s: bad traffic params\n", __func__);
 				goto failed;
 			}
 		}
 		op.param.rmtu = op.param.tmtu = rt->rt_ifp->if_mtu;
 #ifdef NATM
 		/*
 		 * let native ATM know we are using this VCI/VPI
 		 * (i.e. reserve it)
 		 */
 		sin = (struct sockaddr_in *) rt_key(rt);
 		if (sin->sin_family != AF_INET)
 			goto failed;
 		npcb = npcb_add(NULL, rt->rt_ifp, op.param.vci,  op.param.vpi);
 		if (npcb == NULL) 
 			goto failed;
 		npcb->npcb_flags |= NPCB_IP;
 		npcb->ipaddr.s_addr = sin->sin_addr.s_addr;
 		/* XXX: move npcb to llinfo when ATM ARP is ready */
 		rt->rt_llinfo = (caddr_t) npcb;
 		rt->rt_flags |= RTF_LLINFO;
 #endif
 		/*
 		 * let the lower level know this circuit is active
 		 */
 		op.rxhand = NULL;
 		op.param.flags |= ATMIO_FLAG_ASYNC;
 		if (rt->rt_ifp->if_ioctl(rt->rt_ifp, SIOCATMOPENVCC, 
 		    (caddr_t)&op) != 0) {
 			printf("atm: couldn't add VC\n");
 			goto failed;
 		}
 
 		SDL(gate)->sdl_type = rt->rt_ifp->if_type;
 		SDL(gate)->sdl_index = rt->rt_ifp->if_index;
 
 		break;
 
 failed:
 #ifdef NATM
 		if (npcb) {
 			npcb_free(npcb, NPCB_DESTROY);
 			rt->rt_llinfo = NULL;
 			rt->rt_flags &= ~RTF_LLINFO;
 		}
 #endif
 		/* mark as invalid. We cannot RTM_DELETE the route from
 		 * here, because the recursive call to rtrequest1 does
 		 * not really work. */
 		rt->rt_flags |= RTF_REJECT;
 		break;
 
 	case RTM_DELETE:
 #ifdef NATM
 		/*
 		 * tell native ATM we are done with this VC
 		 */
 		if (rt->rt_flags & RTF_LLINFO) {
 			npcb_free((struct natmpcb *)rt->rt_llinfo, 
 			    NPCB_DESTROY);
 			rt->rt_llinfo = NULL;
 			rt->rt_flags &= ~RTF_LLINFO;
 		}
 #endif
 		/*
 		 * tell the lower layer to disable this circuit
 		 */
 		bzero(&op, sizeof(op));
 		addr = LLADDR(SDL(gate));
 		addr++;
 		cl.vpi = *addr++;
 		cl.vci = *addr++ << 8;
 		cl.vci |= *addr++;
 		(void)rt->rt_ifp->if_ioctl(rt->rt_ifp, SIOCATMCLOSEVCC, 
 		    (caddr_t)&cl);
 		break;
 	}
 }
 
 /*
  * atmresolve:
  *   inputs:
  *     [1] "rt" = the link level route to use (or null if need to look one up)
  *     [2] "m" = mbuf containing the data to be sent
  *     [3] "dst" = sockaddr_in (IP) address of dest.
  *   output:
  *     [4] "desten" = ATM pseudo header which we will fill in VPI/VCI info
  *   return: 
  *     0 == resolve FAILED; note that "m" gets m_freem'd in this case
  *     1 == resolve OK; desten contains result
  *
  *   XXX: will need more work if we wish to support ATMARP in the kernel,
  *   but this is enough for PVCs entered via the "route" command.
  */
 int
 atmresolve(struct rtentry *rt, struct mbuf *m, struct sockaddr *dst,
     struct atm_pseudohdr *desten)
 {
 	struct sockaddr_dl *sdl;
 
 	if (m->m_flags & (M_BCAST | M_MCAST)) {
 		log(LOG_INFO, "atmresolve: BCAST/MCAST packet detected/dumped");
 		goto bad;
 	}
 
 	if (rt == NULL) {
 		rt = RTALLOC1(dst, 0);
 		if (rt == NULL)
 			goto bad;	/* failed */
 		rt->rt_refcnt--;	/* don't keep LL references */
 		if ((rt->rt_flags & RTF_GATEWAY) != 0 || 
 		    (rt->rt_flags & RTF_LLINFO) == 0 ||
 		    /* XXX: are we using LLINFO? */
 		    rt->rt_gateway->sa_family != AF_LINK) {
+			RT_UNLOCK(rt);
 			goto bad;
 		}
+		RT_UNLOCK(rt);
 	}
 
 	/*
 	 * note that rt_gateway is a sockaddr_dl which contains the 
 	 * atm_pseudohdr data structure for this route.   we currently
 	 * don't need any rt_llinfo info (but will if we want to support
 	 * ATM ARP [c.f. if_ether.c]).
 	 */
 	sdl = SDL(rt->rt_gateway);
 
 	/*
 	 * Check the address family and length is valid, the address
 	 * is resolved; otherwise, try to resolve.
 	 */
 	if (sdl->sdl_family == AF_LINK && sdl->sdl_alen >= sizeof(*desten)) {
 		bcopy(LLADDR(sdl), desten, sizeof(*desten));
 		return (1);	/* ok, go for it! */
 	}
 
 	/*
 	 * we got an entry, but it doesn't have valid link address
 	 * info in it (it is prob. the interface route, which has
 	 * sdl_alen == 0).    dump packet.  (fall through to "bad").
 	 */
 bad:
 	m_freem(m);
 	return (0);
 }
 #endif /* INET */
Index: head/sys/netinet/if_ether.c
===================================================================
--- head/sys/netinet/if_ether.c	(revision 120726)
+++ head/sys/netinet/if_ether.c	(revision 120727)
@@ -1,970 +1,991 @@
 /*
  * Copyright (c) 1982, 1986, 1988, 1993
  *	The Regents of the University of California.  All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 3. All advertising materials mentioning features or use of this software
  *    must display the following acknowledgement:
  *	This product includes software developed by the University of
  *	California, Berkeley and its contributors.
  * 4. Neither the name of the University nor the names of its contributors
  *    may be used to endorse or promote products derived from this software
  *    without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  *	@(#)if_ether.c	8.1 (Berkeley) 6/10/93
  * $FreeBSD$
  */
 
 /*
  * Ethernet address resolution protocol.
  * TODO:
  *	add "inuse/lock" bit (or ref. count) along with valid bit
  */
 
 #include "opt_inet.h"
 #include "opt_bdg.h"
 #include "opt_mac.h"
 
 #include <sys/param.h>
 #include <sys/kernel.h>
 #include <sys/queue.h>
 #include <sys/sysctl.h>
 #include <sys/systm.h>
 #include <sys/mac.h>
 #include <sys/mbuf.h>
 #include <sys/malloc.h>
 #include <sys/socket.h>
 #include <sys/syslog.h>
 
 #include <net/if.h>
 #include <net/if_dl.h>
 #include <net/if_types.h>
 #include <net/route.h>
 #include <net/netisr.h>
 #include <net/if_llc.h>
 #ifdef BRIDGE
 #include <net/ethernet.h>
 #include <net/bridge.h>
 #endif
 
 #include <netinet/in.h>
 #include <netinet/in_var.h>
 #include <netinet/if_ether.h>
 
 #include <net/if_arc.h>
 #include <net/iso88025.h>
 
 #define SIN(s) ((struct sockaddr_in *)s)
 #define SDL(s) ((struct sockaddr_dl *)s)
 
 SYSCTL_DECL(_net_link_ether);
 SYSCTL_NODE(_net_link_ether, PF_INET, inet, CTLFLAG_RW, 0, "");
 
 /* timer values */
 static int arpt_prune = (5*60*1); /* walk list every 5 minutes */
 static int arpt_keep = (20*60); /* once resolved, good for 20 more minutes */
 static int arpt_down = 20;	/* once declared down, don't send for 20 sec */
 
 SYSCTL_INT(_net_link_ether_inet, OID_AUTO, prune_intvl, CTLFLAG_RW,
 	   &arpt_prune, 0, "");
 SYSCTL_INT(_net_link_ether_inet, OID_AUTO, max_age, CTLFLAG_RW, 
 	   &arpt_keep, 0, "");
 SYSCTL_INT(_net_link_ether_inet, OID_AUTO, host_down_time, CTLFLAG_RW,
 	   &arpt_down, 0, "");
 
 #define	rt_expire rt_rmx.rmx_expire
 
 struct llinfo_arp {
 	LIST_ENTRY(llinfo_arp) la_le;
 	struct	rtentry *la_rt;
 	struct	mbuf *la_hold;	/* last packet until resolved/timeout */
 	u_short	la_preempt;	/* countdown for pre-expiry arps */
 	u_short	la_asked;	/* #times we QUERIED following expiration */
 #define la_timer la_rt->rt_rmx.rmx_expire /* deletion time in seconds */
 };
 
 static	LIST_HEAD(, llinfo_arp) llinfo_arp;
 
 static struct	ifqueue arpintrq;
-static int	arp_inuse, arp_allocated, arpinit_done;
+static int	arp_allocated;
+static int	arpinit_done;
 
 static int	arp_maxtries = 5;
 static int	useloopback = 1; /* use loopback interface for local traffic */
 static int	arp_proxyall = 0;
+static struct callout arp_callout;
 
 SYSCTL_INT(_net_link_ether_inet, OID_AUTO, maxtries, CTLFLAG_RW,
 	   &arp_maxtries, 0, "");
 SYSCTL_INT(_net_link_ether_inet, OID_AUTO, useloopback, CTLFLAG_RW,
 	   &useloopback, 0, "");
 SYSCTL_INT(_net_link_ether_inet, OID_AUTO, proxyall, CTLFLAG_RW,
 	   &arp_proxyall, 0, "");
 
 static void	arp_init(void);
 static void	arp_rtrequest(int, struct rtentry *, struct rt_addrinfo *);
 static void	arprequest(struct ifnet *,
 			struct in_addr *, struct in_addr *, u_char *);
 static void	arpintr(struct mbuf *);
 static void	arptfree(struct llinfo_arp *);
 static void	arptimer(void *);
 static struct llinfo_arp
 		*arplookup(u_long, int, int);
 #ifdef INET
 static void	in_arpinput(struct mbuf *);
 #endif
 
 /*
  * Timeout routine.  Age arp_tab entries periodically.
  */
 /* ARGSUSED */
 static void
 arptimer(ignored_arg)
 	void *ignored_arg;
 {
 	struct llinfo_arp *la, *ola;
-	int s = splnet();
 
 	RADIX_NODE_HEAD_LOCK(rt_tables[AF_INET]);
 	la = LIST_FIRST(&llinfo_arp);
 	while (la != NULL) {
 		struct rtentry *rt = la->la_rt;
 		ola = la;
 		la = LIST_NEXT(la, la_le);
 		if (rt->rt_expire && rt->rt_expire <= time_second)
 			arptfree(ola);		/* timer has expired, clear */
 	}
 	RADIX_NODE_HEAD_UNLOCK(rt_tables[AF_INET]);
-	splx(s);
-	timeout(arptimer, NULL, arpt_prune * hz);
+
+	callout_reset(&arp_callout, arpt_prune * hz, arptimer, NULL);
 }
 
 /*
  * Parallel to llc_rtrequest.
  */
 static void
 arp_rtrequest(req, rt, info)
 	int req;
 	register struct rtentry *rt;
 	struct rt_addrinfo *info;
 {
-	register struct sockaddr *gate = rt->rt_gateway;
-	register struct llinfo_arp *la = (struct llinfo_arp *)rt->rt_llinfo;
+	register struct sockaddr *gate;
+	register struct llinfo_arp *la;
 	static struct sockaddr_dl null_sdl = {sizeof(null_sdl), AF_LINK};
 
+	RT_LOCK_ASSERT(rt);
+
 	if (!arpinit_done) {
 		arpinit_done = 1;
-		timeout(arptimer, (caddr_t)0, hz);
+		callout_reset(&arp_callout, hz, arptimer, NULL);
 	}
 	if (rt->rt_flags & RTF_GATEWAY)
 		return;
+	gate = rt->rt_gateway;
+	la = (struct llinfo_arp *)rt->rt_llinfo;
 	switch (req) {
 
 	case RTM_ADD:
 		/*
 		 * XXX: If this is a manually added route to interface
 		 * such as older version of routed or gated might provide,
 		 * restore cloning bit.
 		 */
 		if ((rt->rt_flags & RTF_HOST) == 0 &&
 		    SIN(rt_mask(rt))->sin_addr.s_addr != 0xffffffff)
 			rt->rt_flags |= RTF_CLONING;
 		if (rt->rt_flags & RTF_CLONING) {
 			/*
 			 * Case 1: This route should come from a route to iface.
 			 */
 			rt_setgate(rt, rt_key(rt),
 					(struct sockaddr *)&null_sdl);
 			gate = rt->rt_gateway;
 			SDL(gate)->sdl_type = rt->rt_ifp->if_type;
 			SDL(gate)->sdl_index = rt->rt_ifp->if_index;
 			rt->rt_expire = time_second;
 			break;
 		}
 		/* Announce a new entry if requested. */
 		if (rt->rt_flags & RTF_ANNOUNCE)
 			arprequest(rt->rt_ifp,
 			    &SIN(rt_key(rt))->sin_addr,
 			    &SIN(rt_key(rt))->sin_addr,
 			    (u_char *)LLADDR(SDL(gate)));
 		/*FALLTHROUGH*/
 	case RTM_RESOLVE:
 		if (gate->sa_family != AF_LINK ||
 		    gate->sa_len < sizeof(null_sdl)) {
-			log(LOG_DEBUG, "arp_rtrequest: bad gateway %s%s\n",
+			log(LOG_DEBUG, "%s: bad gateway %s%s\n", __func__,
 			    inet_ntoa(SIN(rt_key(rt))->sin_addr),
 			    (gate->sa_family != AF_LINK) ?
 			    " (!AF_LINK)": "");
 			break;
 		}
 		SDL(gate)->sdl_type = rt->rt_ifp->if_type;
 		SDL(gate)->sdl_index = rt->rt_ifp->if_index;
 		if (la != 0)
 			break; /* This happens on a route change */
 		/*
 		 * Case 2:  This route may come from cloning, or a manual route
 		 * add with a LL address.
 		 */
-		R_Malloc(la, struct llinfo_arp *, sizeof(*la));
+		R_Zalloc(la, struct llinfo_arp *, sizeof(*la));
 		rt->rt_llinfo = (caddr_t)la;
 		if (la == 0) {
-			log(LOG_DEBUG, "arp_rtrequest: malloc failed\n");
+			log(LOG_DEBUG, "%s: malloc failed\n", __func__);
 			break;
 		}
-		arp_inuse++, arp_allocated++;
-		Bzero(la, sizeof(*la));
+		arp_allocated++;
 		la->la_rt = rt;
 		rt->rt_flags |= RTF_LLINFO;
 		RADIX_NODE_HEAD_LOCK_ASSERT(rt_tables[AF_INET]);
 		LIST_INSERT_HEAD(&llinfo_arp, la, la_le);
 
 #ifdef INET
 		/*
 		 * This keeps the multicast addresses from showing up
 		 * in `arp -a' listings as unresolved.  It's not actually
 		 * functional.  Then the same for broadcast.
 		 */
 		if (IN_MULTICAST(ntohl(SIN(rt_key(rt))->sin_addr.s_addr)) &&
 		    rt->rt_ifp->if_type != IFT_ARCNET) {
 			ETHER_MAP_IP_MULTICAST(&SIN(rt_key(rt))->sin_addr,
 					       LLADDR(SDL(gate)));
 			SDL(gate)->sdl_alen = 6;
 			rt->rt_expire = 0;
 		}
 		if (in_broadcast(SIN(rt_key(rt))->sin_addr, rt->rt_ifp)) {
 			memcpy(LLADDR(SDL(gate)), rt->rt_ifp->if_broadcastaddr,
 			       rt->rt_ifp->if_addrlen);
 			SDL(gate)->sdl_alen = rt->rt_ifp->if_addrlen;
 			rt->rt_expire = 0;
 		}
 #endif
 
 		if (SIN(rt_key(rt))->sin_addr.s_addr ==
 		    (IA_SIN(rt->rt_ifa))->sin_addr.s_addr) {
 		    /*
 		     * This test used to be
 		     *	if (loif.if_flags & IFF_UP)
 		     * It allowed local traffic to be forced
 		     * through the hardware by configuring the loopback down.
 		     * However, it causes problems during network configuration
 		     * for boards that can't receive packets they send.
 		     * It is now necessary to clear "useloopback" and remove
 		     * the route to force traffic out to the hardware.
 		     */
 			rt->rt_expire = 0;
 			Bcopy(IF_LLADDR(rt->rt_ifp), LLADDR(SDL(gate)),
 			      SDL(gate)->sdl_alen = rt->rt_ifp->if_addrlen);
 			if (useloopback)
 				rt->rt_ifp = loif;
 
 		}
 		break;
 
 	case RTM_DELETE:
 		if (la == 0)
 			break;
-		arp_inuse--;
 		RADIX_NODE_HEAD_LOCK_ASSERT(rt_tables[AF_INET]);
 		LIST_REMOVE(la, la_le);
 		rt->rt_llinfo = 0;
 		rt->rt_flags &= ~RTF_LLINFO;
 		if (la->la_hold)
 			m_freem(la->la_hold);
 		Free((caddr_t)la);
 	}
 }
 
 /*
  * Broadcast an ARP request. Caller specifies:
  *	- arp header source ip address
  *	- arp header target ip address
  *	- arp header source ethernet address
  */
 static void
 arprequest(ifp, sip, tip, enaddr)
 	register struct ifnet *ifp;
 	register struct in_addr *sip, *tip;
 	register u_char *enaddr;
 {
 	register struct mbuf *m;
 	register struct ether_header *eh;
 	register struct arc_header *arh;
 	register struct arphdr *ah;
 	struct sockaddr sa;
 	static u_char	llcx[] = { 0x82, 0x40, LLC_SNAP_LSAP, LLC_SNAP_LSAP,
 				   LLC_UI, 0x00, 0x00, 0x00, 0x08, 0x06 };
 	u_short ar_hrd;
 
 	if ((m = m_gethdr(M_DONTWAIT, MT_DATA)) == NULL)
 		return;
 	m->m_pkthdr.rcvif = (struct ifnet *)0;
 #ifdef MAC
 	mac_create_mbuf_linklayer(ifp, m);
 #endif
 	switch (ifp->if_type) {
 	case IFT_ARCNET:
 		ar_hrd = htons(ARPHRD_ARCNET);
 
 		m->m_len = arphdr_len2(ifp->if_addrlen, sizeof(struct in_addr));
 		m->m_pkthdr.len = m->m_len;
 		MH_ALIGN(m, m->m_len);
 
 		arh = (struct arc_header *)sa.sa_data;
 		arh->arc_dhost = *ifp->if_broadcastaddr;
 		arh->arc_type = ARCTYPE_ARP;
 
 		ah = mtod(m, struct arphdr *);
 		break;
 
 	case IFT_ISO88025:
 		ar_hrd = htons(ARPHRD_IEEE802);
 
 		m->m_len = sizeof(llcx) +
 		    arphdr_len2(ifp->if_addrlen, sizeof(struct in_addr));
 		m->m_pkthdr.len = m->m_len;
 		MH_ALIGN(m, m->m_len);
 
 		(void)memcpy(mtod(m, caddr_t), llcx, sizeof(llcx));
 		(void)memcpy(sa.sa_data, ifp->if_broadcastaddr, 6);
 		(void)memcpy(sa.sa_data + 6, enaddr, 6);
 		sa.sa_data[6] |= TR_RII;
 		sa.sa_data[12] = TR_AC;
 		sa.sa_data[13] = TR_LLC_FRAME;
 
 		ah = (struct arphdr *)(mtod(m, char *) + sizeof(llcx));
 		break;
 	case IFT_FDDI:
 	case IFT_ETHER:
 		/*
 		 * This may not be correct for types not explicitly
 		 * listed, but this is our best guess
 		 */
 	default:
 		ar_hrd = htons(ARPHRD_ETHER);
 
 		m->m_len = arphdr_len2(ifp->if_addrlen, sizeof(struct in_addr));
 		m->m_pkthdr.len = m->m_len;
 		MH_ALIGN(m, m->m_len);
 
 		eh = (struct ether_header *)sa.sa_data;
 		/* if_output will not swap */
 		eh->ether_type = htons(ETHERTYPE_ARP);
 		(void)memcpy(eh->ether_dhost, ifp->if_broadcastaddr,
 		    sizeof(eh->ether_dhost));
 
 		ah = mtod(m, struct arphdr *);
 		break;
 	}
 
 	ah->ar_hrd = ar_hrd;
 	ah->ar_pro = htons(ETHERTYPE_IP);
 	ah->ar_hln = ifp->if_addrlen;		/* hardware address length */
 	ah->ar_pln = sizeof(struct in_addr);	/* protocol address length */
 	ah->ar_op = htons(ARPOP_REQUEST);
 	(void)memcpy(ar_sha(ah), enaddr, ah->ar_hln);
 	memset(ar_tha(ah), 0, ah->ar_hln);
 	(void)memcpy(ar_spa(ah), sip, ah->ar_pln);
 	(void)memcpy(ar_tpa(ah), tip, ah->ar_pln);
 
 	sa.sa_family = AF_UNSPEC;
 	sa.sa_len = sizeof(sa);
 	(*ifp->if_output)(ifp, m, &sa, (struct rtentry *)0);
 }
 
 /*
  * Resolve an IP address into an ethernet address.  If success,
  * desten is filled in.  If there is no entry in arptab,
  * set one up and broadcast a request for the IP address.
  * Hold onto this mbuf and resend it once the address
  * is finally resolved.  A return value of 1 indicates
  * that desten has been filled in and the packet should be sent
  * normally; a 0 return indicates that the packet has been
  * taken over here, either now or for later transmission.
  */
 int
 arpresolve(ifp, rt, m, dst, desten, rt0)
 	register struct ifnet *ifp;
 	register struct rtentry *rt;
 	struct mbuf *m;
 	register struct sockaddr *dst;
 	register u_char *desten;
 	struct rtentry *rt0;
 {
 	struct llinfo_arp *la = 0;
 	struct sockaddr_dl *sdl;
 
 	if (m->m_flags & M_BCAST) {	/* broadcast */
 		(void)memcpy(desten, ifp->if_broadcastaddr, ifp->if_addrlen);
 		return (1);
 	}
 	if (m->m_flags & M_MCAST && ifp->if_type != IFT_ARCNET) {/* multicast */
 		ETHER_MAP_IP_MULTICAST(&SIN(dst)->sin_addr, desten);
 		return(1);
 	}
 	if (rt)
 		la = (struct llinfo_arp *)rt->rt_llinfo;
 	if (la == 0) {
 		la = arplookup(SIN(dst)->sin_addr.s_addr, 1, 0);
 		if (la)
 			rt = la->la_rt;
 	}
 	if (la == 0 || rt == 0) {
 		log(LOG_DEBUG, "arpresolve: can't allocate llinfo for %s%s%s\n",
 			inet_ntoa(SIN(dst)->sin_addr), la ? "la" : "",
 				rt ? "rt" : "");
 		m_freem(m);
 		return (0);
 	}
 	sdl = SDL(rt->rt_gateway);
 	/*
 	 * Check the address family and length is valid, the address
 	 * is resolved; otherwise, try to resolve.
 	 */
 	if ((rt->rt_expire == 0 || rt->rt_expire > time_second) &&
 	    sdl->sdl_family == AF_LINK && sdl->sdl_alen != 0) {
 		/*
 		 * If entry has an expiry time and it is approaching,
 		 * see if we need to send an ARP request within this
 		 * arpt_down interval.
 		 */
 		if ((rt->rt_expire != 0) &&
 		    (time_second + la->la_preempt > rt->rt_expire)) {
 			arprequest(ifp,
 				   &SIN(rt->rt_ifa->ifa_addr)->sin_addr,
 				   &SIN(dst)->sin_addr,
 				   IF_LLADDR(ifp));
 			la->la_preempt--;
 		} 
 
 		bcopy(LLADDR(sdl), desten, sdl->sdl_alen);
 		return 1;
 	}
 	/*
 	 * If ARP is disabled or static on this interface, stop.
 	 * XXX
 	 * Probably should not allocate empty llinfo struct if we are
 	 * not going to be sending out an arp request.
 	 */
 	if (ifp->if_flags & (IFF_NOARP | IFF_STATICARP)) {
 		m_freem(m);
 		return (0);
 	}
 	/*
 	 * There is an arptab entry, but no ethernet address
 	 * response yet.  Replace the held mbuf with this
 	 * latest one.
 	 */
 	if (la->la_hold)
 		m_freem(la->la_hold);
 	la->la_hold = m;
 	if (rt->rt_expire) {
+		RT_LOCK(rt);
 		rt->rt_flags &= ~RTF_REJECT;
 		if (la->la_asked == 0 || rt->rt_expire != time_second) {
 			rt->rt_expire = time_second;
 			if (la->la_asked++ < arp_maxtries) {
 				arprequest(ifp,
 					   &SIN(rt->rt_ifa->ifa_addr)->sin_addr,
 					   &SIN(dst)->sin_addr,
 					   IF_LLADDR(ifp));
 			} else {
 				rt->rt_flags |= RTF_REJECT;
 				rt->rt_expire += arpt_down;
 				la->la_asked = 0;
 				la->la_preempt = arp_maxtries;
 			}
 
 		}
+		RT_UNLOCK(rt);
 	}
 	return (0);
 }
 
 /*
  * Common length and type checks are done here,
  * then the protocol-specific routine is called.
  */
 static void
 arpintr(struct mbuf *m)
 {
 	struct arphdr *ar;
 
 	if (!arpinit_done) {
+		/* NB: this race should not matter */
 		arpinit_done = 1;
-		timeout(arptimer, (caddr_t)0, hz);
+		callout_reset(&arp_callout, hz, arptimer, NULL);
 	}
 	if (m->m_len < sizeof(struct arphdr) &&
 	    ((m = m_pullup(m, sizeof(struct arphdr))) == NULL)) {
 		log(LOG_ERR, "arp: runt packet -- m_pullup failed\n");
 		return;
 	}
 	ar = mtod(m, struct arphdr *);
 
 	if (ntohs(ar->ar_hrd) != ARPHRD_ETHER &&
 	    ntohs(ar->ar_hrd) != ARPHRD_IEEE802 &&
 	    ntohs(ar->ar_hrd) != ARPHRD_ARCNET) {
 		log(LOG_ERR, "arp: unknown hardware address format (0x%2D)\n",
 		    (unsigned char *)&ar->ar_hrd, "");
 		m_freem(m);
 		return;
 	}
 
 	if (m->m_pkthdr.len < arphdr_len(ar) &&
 	    (m = m_pullup(m, arphdr_len(ar))) == NULL) {
 		log(LOG_ERR, "arp: runt packet\n");
 		m_freem(m);
 		return;
 	}
 
 	switch (ntohs(ar->ar_pro)) {
 #ifdef INET
 	case ETHERTYPE_IP:
 		in_arpinput(m);
 		return;
 #endif
 	}
 	m_freem(m);
 }
 
 #ifdef INET
 /*
  * ARP for Internet protocols on 10 Mb/s Ethernet.
  * Algorithm is that given in RFC 826.
  * In addition, a sanity check is performed on the sender
  * protocol address, to catch impersonators.
  * We no longer handle negotiations for use of trailer protocol:
  * Formerly, ARP replied for protocol type ETHERTYPE_TRAIL sent
  * along with IP replies if we wanted trailers sent to us,
  * and also sent them in response to IP replies.
  * This allowed either end to announce the desire to receive
  * trailer packets.
  * We no longer reply to requests for ETHERTYPE_TRAIL protocol either,
  * but formerly didn't normally send requests.
  */
 static int log_arp_wrong_iface = 1;
 static int log_arp_movements = 1;
 
 SYSCTL_INT(_net_link_ether_inet, OID_AUTO, log_arp_wrong_iface, CTLFLAG_RW,
 	&log_arp_wrong_iface, 0,
 	"log arp packets arriving on the wrong interface");
 SYSCTL_INT(_net_link_ether_inet, OID_AUTO, log_arp_movements, CTLFLAG_RW,
         &log_arp_movements, 0,
         "log arp replies from MACs different than the one in the cache");
 
 
 static void
 in_arpinput(m)
 	struct mbuf *m;
 {
 	register struct arphdr *ah;
 	register struct ifnet *ifp = m->m_pkthdr.rcvif;
 	struct ether_header *eh;
 	struct arc_header *arh;
 	struct iso88025_header *th = (struct iso88025_header *)0;
 	struct iso88025_sockaddr_dl_data *trld;
 	register struct llinfo_arp *la = 0;
 	register struct rtentry *rt;
 	struct ifaddr *ifa;
 	struct in_ifaddr *ia;
 	struct sockaddr_dl *sdl;
 	struct sockaddr sa;
 	struct in_addr isaddr, itaddr, myaddr;
 	int op, rif_len;
 	int req_len;
 
 	req_len = arphdr_len2(ifp->if_addrlen, sizeof(struct in_addr));
 	if (m->m_len < req_len && (m = m_pullup(m, req_len)) == NULL) {
 		log(LOG_ERR, "in_arp: runt packet -- m_pullup failed\n");
 		return;
 	}
 
 	ah = mtod(m, struct arphdr *);
 	op = ntohs(ah->ar_op);
 	(void)memcpy(&isaddr, ar_spa(ah), sizeof (isaddr));
 	(void)memcpy(&itaddr, ar_tpa(ah), sizeof (itaddr));
 #ifdef BRIDGE
 #define BRIDGE_TEST (do_bridge)
 #else
 #define BRIDGE_TEST (0) /* cc will optimise the test away */
 #endif
 	/*
 	 * For a bridge, we want to check the address irrespective
 	 * of the receive interface. (This will change slightly
 	 * when we have clusters of interfaces).
 	 */
 	LIST_FOREACH(ia, INADDR_HASH(itaddr.s_addr), ia_hash)
 		if ((BRIDGE_TEST || (ia->ia_ifp == ifp)) &&
 		    itaddr.s_addr == ia->ia_addr.sin_addr.s_addr)
 			goto match;
 	LIST_FOREACH(ia, INADDR_HASH(isaddr.s_addr), ia_hash)
 		if ((BRIDGE_TEST || (ia->ia_ifp == ifp)) &&
 		    isaddr.s_addr == ia->ia_addr.sin_addr.s_addr)
 			goto match;
 	/*
 	 * No match, use the first inet address on the receive interface
 	 * as a dummy address for the rest of the function.
 	 */
 	TAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link)
 		if (ifa->ifa_addr && ifa->ifa_addr->sa_family == AF_INET) {
 			ia = ifatoia(ifa);
 			goto match;
 		}
 	/*
 	 * If bridging, fall back to using any inet address.
 	 */
 	if (!BRIDGE_TEST ||
 	    (ia = TAILQ_FIRST(&in_ifaddrhead)) == NULL) {
 		m_freem(m);
 		return;
 	}
 match:
 	myaddr = ia->ia_addr.sin_addr;
 	if (!bcmp(ar_sha(ah), IF_LLADDR(ifp), ifp->if_addrlen)) {
 		m_freem(m);	/* it's from me, ignore it. */
 		return;
 	}
 	if (!bcmp(ar_sha(ah), ifp->if_broadcastaddr, ifp->if_addrlen)) {
 		log(LOG_ERR,
 		    "arp: link address is broadcast for IP address %s!\n",
 		    inet_ntoa(isaddr));
 		m_freem(m);
 		return;
 	}
 	if (isaddr.s_addr == myaddr.s_addr) {
 		log(LOG_ERR,
 		   "arp: %*D is using my IP address %s!\n",
 		   ifp->if_addrlen, (u_char *)ar_sha(ah), ":",
 		   inet_ntoa(isaddr));
 		itaddr = myaddr;
 		goto reply;
 	}
 	if (ifp->if_flags & IFF_STATICARP)
 		goto reply;
 	la = arplookup(isaddr.s_addr, itaddr.s_addr == myaddr.s_addr, 0);
 	if (la && (rt = la->la_rt) && (sdl = SDL(rt->rt_gateway))) {
 		/* the following is not an error when doing bridging */
 		if (!BRIDGE_TEST && rt->rt_ifp != ifp) {
 			if (log_arp_wrong_iface)
 				log(LOG_ERR, "arp: %s is on %s%d but got reply from %*D on %s%d\n",
 				    inet_ntoa(isaddr),
 				    rt->rt_ifp->if_name, rt->rt_ifp->if_unit,
 				    ifp->if_addrlen, (u_char *)ar_sha(ah), ":",
 				    ifp->if_name, ifp->if_unit);
 			goto reply;
 		}
 		if (sdl->sdl_alen &&
 		    bcmp(ar_sha(ah), LLADDR(sdl), sdl->sdl_alen)) {
 			if (rt->rt_expire) {
 			    if (log_arp_movements)
 			        log(LOG_INFO, "arp: %s moved from %*D to %*D on %s%d\n",
 				    inet_ntoa(isaddr),
 				    ifp->if_addrlen, (u_char *)LLADDR(sdl), ":",
 				    ifp->if_addrlen, (u_char *)ar_sha(ah), ":",
 				    ifp->if_name, ifp->if_unit);
 			} else {
 			    log(LOG_ERR,
 				"arp: %*D attempts to modify permanent entry for %s on %s%d\n",
 				ifp->if_addrlen, (u_char *)ar_sha(ah), ":",
 				inet_ntoa(isaddr), ifp->if_name, ifp->if_unit);
 			    goto reply;
 			}
 		}
 		/*
 		 * sanity check for the address length.
 		 * XXX this does not work for protocols with variable address
 		 * length. -is
 		 */
 		if (sdl->sdl_alen &&
 		    sdl->sdl_alen != ah->ar_hln) {
 			log(LOG_WARNING,
 			    "arp from %*D: new addr len %d, was %d",
 			    ifp->if_addrlen, (u_char *) ar_sha(ah), ":",
 			    ah->ar_hln, sdl->sdl_alen);
 		}
 		if (ifp->if_addrlen != ah->ar_hln) {
 			log(LOG_WARNING,
 			    "arp from %*D: addr len: new %d, i/f %d (ignored)",
 			    ifp->if_addrlen, (u_char *) ar_sha(ah), ":",
 			    ah->ar_hln, ifp->if_addrlen);
 			goto reply;
 		}
 		(void)memcpy(LLADDR(sdl), ar_sha(ah),
 		    sdl->sdl_alen = ah->ar_hln);
 		/*
 		 * If we receive an arp from a token-ring station over
 		 * a token-ring nic then try to save the source
 		 * routing info.
 		 */
 		if (ifp->if_type == IFT_ISO88025) {
 			th = (struct iso88025_header *)m->m_pkthdr.header;
 			trld = SDL_ISO88025(sdl);
 			rif_len = TR_RCF_RIFLEN(th->rcf);
 			if ((th->iso88025_shost[0] & TR_RII) &&
 			    (rif_len > 2)) {
 				trld->trld_rcf = th->rcf;
 				trld->trld_rcf ^= htons(TR_RCF_DIR);
 				memcpy(trld->trld_route, th->rd, rif_len - 2);
 				trld->trld_rcf &= ~htons(TR_RCF_BCST_MASK);
 				/*
 				 * Set up source routing information for
 				 * reply packet (XXX)
 				 */
 				m->m_data -= rif_len;
 				m->m_len  += rif_len;
 				m->m_pkthdr.len += rif_len;
 			} else {
 				th->iso88025_shost[0] &= ~TR_RII;
 				trld->trld_rcf = 0;
 			}
 			m->m_data -= 8;
 			m->m_len  += 8;
 			m->m_pkthdr.len += 8;
 			th->rcf = trld->trld_rcf;
 		}
+		RT_LOCK(rt);
 		if (rt->rt_expire)
 			rt->rt_expire = time_second + arpt_keep;
 		rt->rt_flags &= ~RTF_REJECT;
+		RT_UNLOCK(rt);
 		la->la_asked = 0;
 		la->la_preempt = arp_maxtries;
 		if (la->la_hold) {
 			(*ifp->if_output)(ifp, la->la_hold,
 				rt_key(rt), rt);
 			la->la_hold = 0;
 		}
 	}
 reply:
 	if (op != ARPOP_REQUEST) {
 		m_freem(m);
 		return;
 	}
 	if (itaddr.s_addr == myaddr.s_addr) {
 		/* I am the target */
 		(void)memcpy(ar_tha(ah), ar_sha(ah), ah->ar_hln);
 		(void)memcpy(ar_sha(ah), IF_LLADDR(ifp), ah->ar_hln);
 	} else {
 		la = arplookup(itaddr.s_addr, 0, SIN_PROXY);
 		if (la == NULL) {
 			struct sockaddr_in sin;
 
 			if (!arp_proxyall) {
 				m_freem(m);
 				return;
 			}
 
 			bzero(&sin, sizeof sin);
 			sin.sin_family = AF_INET;
 			sin.sin_len = sizeof sin;
 			sin.sin_addr = itaddr;
 
 			rt = rtalloc1((struct sockaddr *)&sin, 0, 0UL);
 			if (!rt) {
 				m_freem(m);
 				return;
 			}
 			/*
 			 * Don't send proxies for nodes on the same interface
 			 * as this one came out of, or we'll get into a fight
 			 * over who claims what Ether address.
 			 */
 			if (rt->rt_ifp == ifp) {
 				rtfree(rt);
 				m_freem(m);
 				return;
 			}
 			(void)memcpy(ar_tha(ah), ar_sha(ah), ah->ar_hln);
 			(void)memcpy(ar_sha(ah), IF_LLADDR(ifp), ah->ar_hln);
 			rtfree(rt);
 
 			/*
 			 * Also check that the node which sent the ARP packet
 			 * is on the the interface we expect it to be on. This
 			 * avoids ARP chaos if an interface is connected to the
 			 * wrong network.
 			 */
 			sin.sin_addr = isaddr;
 
 			rt = rtalloc1((struct sockaddr *)&sin, 0, 0UL);
 			if (!rt) {
 				m_freem(m);
 				return;
 			}
 			if (rt->rt_ifp != ifp) {
 				log(LOG_INFO, "arp_proxy: ignoring request"
 				    " from %s via %s%d, expecting %s%d\n",
 				    inet_ntoa(isaddr), ifp->if_name,
 				    ifp->if_unit, rt->rt_ifp->if_name,
 				    rt->rt_ifp->if_unit);
 				rtfree(rt);
 				m_freem(m);
 				return;
 			}
 			rtfree(rt);
 
 #ifdef DEBUG_PROXY
 			printf("arp: proxying for %s\n",
 			       inet_ntoa(itaddr));
 #endif
 		} else {
 			rt = la->la_rt;
 			(void)memcpy(ar_tha(ah), ar_sha(ah), ah->ar_hln);
 			sdl = SDL(rt->rt_gateway);
 			(void)memcpy(ar_sha(ah), LLADDR(sdl), ah->ar_hln);
 		}
 	}
 
 	(void)memcpy(ar_tpa(ah), ar_spa(ah), ah->ar_pln);
 	(void)memcpy(ar_spa(ah), &itaddr, ah->ar_pln);
 	ah->ar_op = htons(ARPOP_REPLY);
 	ah->ar_pro = htons(ETHERTYPE_IP); /* let's be sure! */
 	switch (ifp->if_type) {
 	case IFT_ARCNET:
 		arh = (struct arc_header *)sa.sa_data;
 		arh->arc_dhost = *ar_tha(ah);
 		arh->arc_type = ARCTYPE_ARP;
 		break;
 
 	case IFT_ISO88025:
 		/* Re-arrange the source/dest address */
 		memcpy(th->iso88025_dhost, th->iso88025_shost,
 		    sizeof(th->iso88025_dhost));
 		memcpy(th->iso88025_shost, IF_LLADDR(ifp),
 		    sizeof(th->iso88025_shost));
 		/* Set the source routing bit if neccesary */
 		if (th->iso88025_dhost[0] & TR_RII) {
 			th->iso88025_dhost[0] &= ~TR_RII;
 			if (TR_RCF_RIFLEN(th->rcf) > 2)
 				th->iso88025_shost[0] |= TR_RII;
 		}
 		/* Copy the addresses, ac and fc into sa_data */
 		memcpy(sa.sa_data, th->iso88025_dhost,
 		    sizeof(th->iso88025_dhost) * 2);
 		sa.sa_data[(sizeof(th->iso88025_dhost) * 2)] = TR_AC;
 		sa.sa_data[(sizeof(th->iso88025_dhost) * 2) + 1] = TR_LLC_FRAME;
 		break;
 	case IFT_ETHER:
 	case IFT_FDDI:
 	/*
 	 * May not be correct for types not explictly
 	 * listed, but it is our best guess.
 	 */
 	default:
 		eh = (struct ether_header *)sa.sa_data;
 		(void)memcpy(eh->ether_dhost, ar_tha(ah),
 		    sizeof(eh->ether_dhost));
 		eh->ether_type = htons(ETHERTYPE_ARP);
 		break;
 	}
 	sa.sa_family = AF_UNSPEC;
 	sa.sa_len = sizeof(sa);
 	(*ifp->if_output)(ifp, m, &sa, (struct rtentry *)0);
 	return;
 }
 #endif
 
 /*
  * Free an arp entry.
  */
 static void
 arptfree(la)
 	register struct llinfo_arp *la;
 {
 	register struct rtentry *rt = la->la_rt;
 	register struct sockaddr_dl *sdl;
+
 	if (rt == 0)
 		panic("arptfree");
 	if (rt->rt_refcnt > 0 && (sdl = SDL(rt->rt_gateway)) &&
 	    sdl->sdl_family == AF_LINK) {
 		sdl->sdl_alen = 0;
 		la->la_preempt = la->la_asked = 0;
+		RT_LOCK(rt);		/* XXX needed or move higher? */
 		rt->rt_flags &= ~RTF_REJECT;
+		RT_UNLOCK(rt);
 		return;
 	}
 	rtrequest(RTM_DELETE, rt_key(rt), (struct sockaddr *)0, rt_mask(rt),
 			0, (struct rtentry **)0);
 }
 /*
  * Lookup or enter a new address in arptab.
  */
 static struct llinfo_arp *
 arplookup(addr, create, proxy)
 	u_long addr;
 	int create, proxy;
 {
 	register struct rtentry *rt;
-	static struct sockaddr_inarp sin = {sizeof(sin), AF_INET };
+	struct sockaddr_inarp sin;
 	const char *why = 0;
 
+	bzero(&sin, sizeof(sin));
+	sin.sin_len = sizeof(sin);
+	sin.sin_family = AF_INET;
 	sin.sin_addr.s_addr = addr;
-	sin.sin_other = proxy ? SIN_PROXY : 0;
+	if (proxy)
+		sin.sin_other = SIN_PROXY;
 	rt = rtalloc1((struct sockaddr *)&sin, create, 0UL);
 	if (rt == 0)
 		return (0);
-	rt->rt_refcnt--;
 
 	if (rt->rt_flags & RTF_GATEWAY)
 		why = "host is not on local network";
 	else if ((rt->rt_flags & RTF_LLINFO) == 0)
 		why = "could not allocate llinfo";
 	else if (rt->rt_gateway->sa_family != AF_LINK)
 		why = "gateway route is not ours";
 
 	if (why) {
-		if (create) {
+#define	ISDYNCLONE(_rt) \
+	(((_rt)->rt_flags & (RTF_STATIC | RTF_WASCLONED)) == RTF_WASCLONED)
+		if (create)
 			log(LOG_DEBUG, "arplookup %s failed: %s\n",
 			    inet_ntoa(sin.sin_addr), why);
-			/*
-			 * If there are no references to this Layer 2 route,
-			 * and it is a cloned route, and not static, and
-			 * arplookup() is creating the route, then purge
-			 * it from the routing table as it is probably bogus.
-			 */
-			if (((rt->rt_flags & (RTF_STATIC | RTF_WASCLONED)) ==
-			    RTF_WASCLONED) && (rt->rt_refcnt == 0))
-				rtrequest(RTM_DELETE,
-				    (struct sockaddr *)rt_key(rt),
-				    rt->rt_gateway, rt_mask(rt),
-				    rt->rt_flags, 0);
+		/*
+		 * If there are no references to this Layer 2 route,
+		 * and it is a cloned route, and not static, and
+		 * arplookup() is creating the route, then purge
+		 * it from the routing table as it is probably bogus.
+		 */
+		RT_UNLOCK(rt);
+		if (rt->rt_refcnt == 1 && ISDYNCLONE(rt)) {
+			rtrequest(RTM_DELETE,
+					(struct sockaddr *)rt_key(rt),
+					rt->rt_gateway, rt_mask(rt),
+					rt->rt_flags, 0);
 		}
+		RTFREE(rt);
 		return (0);
+#undef ISDYNCLONE
+	} else {
+		rt->rt_refcnt--;
+		RT_UNLOCK(rt);
+		return ((struct llinfo_arp *)rt->rt_llinfo);
 	}
-	return ((struct llinfo_arp *)rt->rt_llinfo);
 }
 
 void
 arp_ifinit(ifp, ifa)
 	struct ifnet *ifp;
 	struct ifaddr *ifa;
 {
 	if (ntohl(IA_SIN(ifa)->sin_addr.s_addr) != INADDR_ANY)
 		arprequest(ifp, &IA_SIN(ifa)->sin_addr,
 				&IA_SIN(ifa)->sin_addr, IF_LLADDR(ifp));
 	ifa->ifa_rtrequest = arp_rtrequest;
 	ifa->ifa_flags |= RTF_CLONING;
 }
 
 static void
 arp_init(void)
 {
 
 	arpintrq.ifq_maxlen = 50;
 	mtx_init(&arpintrq.ifq_mtx, "arp_inq", NULL, MTX_DEF);
 	LIST_INIT(&llinfo_arp);
+	callout_init(&arp_callout, CALLOUT_MPSAFE);
 	netisr_register(NETISR_ARP, arpintr, &arpintrq);
 }
-
 SYSINIT(arp, SI_SUB_PROTO_DOMAIN, SI_ORDER_ANY, arp_init, 0);
Index: head/sys/netinet/in_pcb.c
===================================================================
--- head/sys/netinet/in_pcb.c	(revision 120726)
+++ head/sys/netinet/in_pcb.c	(revision 120727)
@@ -1,1181 +1,1182 @@
 /*
  * Copyright (c) 1982, 1986, 1991, 1993, 1995
  *	The Regents of the University of California.  All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 3. All advertising materials mentioning features or use of this software
  *    must display the following acknowledgement:
  *	This product includes software developed by the University of
  *	California, Berkeley and its contributors.
  * 4. Neither the name of the University nor the names of its contributors
  *    may be used to endorse or promote products derived from this software
  *    without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  *	@(#)in_pcb.c	8.4 (Berkeley) 5/24/95
  * $FreeBSD$
  */
 
 #include "opt_ipsec.h"
 #include "opt_inet6.h"
 
 #include <sys/param.h>
 #include <sys/systm.h>
 #include <sys/limits.h>
 #include <sys/malloc.h>
 #include <sys/mbuf.h>
 #include <sys/domain.h>
 #include <sys/protosw.h>
 #include <sys/socket.h>
 #include <sys/socketvar.h>
 #include <sys/proc.h>
 #include <sys/jail.h>
 #include <sys/kernel.h>
 #include <sys/sysctl.h>
 
 #include <vm/uma.h>
 
 #include <net/if.h>
 #include <net/if_types.h>
 #include <net/route.h>
 
 #include <netinet/in.h>
 #include <netinet/in_pcb.h>
 #include <netinet/in_var.h>
 #include <netinet/ip_var.h>
 #include <netinet/tcp_var.h>
 #ifdef INET6
 #include <netinet/ip6.h>
 #include <netinet6/ip6_var.h>
 #endif /* INET6 */
 
 #ifdef IPSEC
 #include <netinet6/ipsec.h>
 #include <netkey/key.h>
 #endif /* IPSEC */
 
 #ifdef FAST_IPSEC
 #if defined(IPSEC) || defined(IPSEC_ESP)
 #error "Bad idea: don't compile with both IPSEC and FAST_IPSEC!"
 #endif
 
 #include <netipsec/ipsec.h>
 #include <netipsec/key.h>
 #define	IPSEC
 #endif /* FAST_IPSEC */
 
 struct	in_addr zeroin_addr;
 
 /*
  * These configure the range of local port addresses assigned to
  * "unspecified" outgoing connections/packets/whatever.
  */
 int	ipport_lowfirstauto  = IPPORT_RESERVED - 1;	/* 1023 */
 int	ipport_lowlastauto = IPPORT_RESERVEDSTART;	/* 600 */
 int	ipport_firstauto = IPPORT_HIFIRSTAUTO;		/* 49152 */
 int	ipport_lastauto  = IPPORT_HILASTAUTO;		/* 65535 */
 int	ipport_hifirstauto = IPPORT_HIFIRSTAUTO;	/* 49152 */
 int	ipport_hilastauto  = IPPORT_HILASTAUTO;		/* 65535 */
 
 /*
  * Reserved ports accessible only to root. There are significant
  * security considerations that must be accounted for when changing these,
  * but the security benefits can be great. Please be careful.
  */
 int	ipport_reservedhigh = IPPORT_RESERVED - 1;	/* 1023 */
 int	ipport_reservedlow = 0;
 
 #define RANGECHK(var, min, max) \
 	if ((var) < (min)) { (var) = (min); } \
 	else if ((var) > (max)) { (var) = (max); }
 
 static int
 sysctl_net_ipport_check(SYSCTL_HANDLER_ARGS)
 {
 	int error = sysctl_handle_int(oidp,
 		oidp->oid_arg1, oidp->oid_arg2, req);
 	if (!error) {
 		RANGECHK(ipport_lowfirstauto, 1, IPPORT_RESERVED - 1);
 		RANGECHK(ipport_lowlastauto, 1, IPPORT_RESERVED - 1);
 		RANGECHK(ipport_firstauto, IPPORT_RESERVED, USHRT_MAX);
 		RANGECHK(ipport_lastauto, IPPORT_RESERVED, USHRT_MAX);
 		RANGECHK(ipport_hifirstauto, IPPORT_RESERVED, USHRT_MAX);
 		RANGECHK(ipport_hilastauto, IPPORT_RESERVED, USHRT_MAX);
 	}
 	return error;
 }
 
 #undef RANGECHK
 
 SYSCTL_NODE(_net_inet_ip, IPPROTO_IP, portrange, CTLFLAG_RW, 0, "IP Ports");
 
 SYSCTL_PROC(_net_inet_ip_portrange, OID_AUTO, lowfirst, CTLTYPE_INT|CTLFLAG_RW,
 	   &ipport_lowfirstauto, 0, &sysctl_net_ipport_check, "I", "");
 SYSCTL_PROC(_net_inet_ip_portrange, OID_AUTO, lowlast, CTLTYPE_INT|CTLFLAG_RW,
 	   &ipport_lowlastauto, 0, &sysctl_net_ipport_check, "I", "");
 SYSCTL_PROC(_net_inet_ip_portrange, OID_AUTO, first, CTLTYPE_INT|CTLFLAG_RW,
 	   &ipport_firstauto, 0, &sysctl_net_ipport_check, "I", "");
 SYSCTL_PROC(_net_inet_ip_portrange, OID_AUTO, last, CTLTYPE_INT|CTLFLAG_RW,
 	   &ipport_lastauto, 0, &sysctl_net_ipport_check, "I", "");
 SYSCTL_PROC(_net_inet_ip_portrange, OID_AUTO, hifirst, CTLTYPE_INT|CTLFLAG_RW,
 	   &ipport_hifirstauto, 0, &sysctl_net_ipport_check, "I", "");
 SYSCTL_PROC(_net_inet_ip_portrange, OID_AUTO, hilast, CTLTYPE_INT|CTLFLAG_RW,
 	   &ipport_hilastauto, 0, &sysctl_net_ipport_check, "I", "");
 SYSCTL_INT(_net_inet_ip_portrange, OID_AUTO, reservedhigh,
 	   CTLFLAG_RW|CTLFLAG_SECURE, &ipport_reservedhigh, 0, "");
 SYSCTL_INT(_net_inet_ip_portrange, OID_AUTO, reservedlow,
 	   CTLFLAG_RW|CTLFLAG_SECURE, &ipport_reservedlow, 0, "");
 
 /*
  * in_pcb.c: manage the Protocol Control Blocks.
  *
  * NOTE: It is assumed that most of these functions will be called at
  * splnet(). XXX - There are, unfortunately, a few exceptions to this
  * rule that should be fixed.
  */
 
 /*
  * Allocate a PCB and associate it with the socket.
  */
 int
 in_pcballoc(so, pcbinfo, td)
 	struct socket *so;
 	struct inpcbinfo *pcbinfo;
 	struct thread *td;
 {
 	register struct inpcb *inp;
 #ifdef IPSEC
 	int error;
 #endif
-
-	inp = uma_zalloc(pcbinfo->ipi_zone, M_NOWAIT);
+	inp = uma_zalloc(pcbinfo->ipi_zone, M_NOWAIT | M_ZERO);
 	if (inp == NULL)
 		return (ENOBUFS);
-	bzero((caddr_t)inp, sizeof(*inp));
 	inp->inp_gencnt = ++pcbinfo->ipi_gencnt;
 	inp->inp_pcbinfo = pcbinfo;
 	inp->inp_socket = so;
 #ifdef IPSEC
 	error = ipsec_init_policy(so, &inp->inp_sp);
 	if (error != 0) {
 		uma_zfree(pcbinfo->ipi_zone, inp);
 		return error;
 	}
 #endif /*IPSEC*/
 #if defined(INET6)
 	if (INP_SOCKAF(so) == AF_INET6) {
 		inp->inp_vflag |= INP_IPV6PROTO;
 		if (ip6_v6only)
 			inp->inp_flags |= IN6P_IPV6_V6ONLY;
 	}
 #endif
 	LIST_INSERT_HEAD(pcbinfo->listhead, inp, inp_list);
 	pcbinfo->ipi_count++;
 	so->so_pcb = (caddr_t)inp;
 	INP_LOCK_INIT(inp, "inp");
 #ifdef INET6
 	if (ip6_auto_flowlabel)
 		inp->inp_flags |= IN6P_AUTOFLOWLABEL;
 #endif
 	return (0);
 }
 
 int
 in_pcbbind(inp, nam, td)
 	register struct inpcb *inp;
 	struct sockaddr *nam;
 	struct thread *td;
 {
 	int anonport, error;
 
 	if (inp->inp_lport != 0 || inp->inp_laddr.s_addr != INADDR_ANY)
 		return (EINVAL);
 	anonport = inp->inp_lport == 0 && (nam == NULL ||
 	    ((struct sockaddr_in *)nam)->sin_port == 0);
 	error = in_pcbbind_setup(inp, nam, &inp->inp_laddr.s_addr,
 	    &inp->inp_lport, td);
 	if (error)
 		return (error);
 	if (in_pcbinshash(inp) != 0) {
 		inp->inp_laddr.s_addr = INADDR_ANY;
 		inp->inp_lport = 0;
 		return (EAGAIN);
 	}
 	if (anonport)
 		inp->inp_flags |= INP_ANONPORT;
 	return (0);
 }
 
 /*
  * Set up a bind operation on a PCB, performing port allocation
  * as required, but do not actually modify the PCB. Callers can
  * either complete the bind by setting inp_laddr/inp_lport and
  * calling in_pcbinshash(), or they can just use the resulting
  * port and address to authorise the sending of a once-off packet.
  *
  * On error, the values of *laddrp and *lportp are not changed.
  */
 int
 in_pcbbind_setup(inp, nam, laddrp, lportp, td)
 	struct inpcb *inp;
 	struct sockaddr *nam;
 	in_addr_t *laddrp;
 	u_short *lportp;
 	struct thread *td;
 {
 	struct socket *so = inp->inp_socket;
 	unsigned short *lastport;
 	struct sockaddr_in *sin;
 	struct inpcbinfo *pcbinfo = inp->inp_pcbinfo;
 	struct in_addr laddr;
 	u_short lport = 0;
 	int wild = 0, reuseport = (so->so_options & SO_REUSEPORT);
 	int error, prison = 0;
 
 	if (TAILQ_EMPTY(&in_ifaddrhead)) /* XXX broken! */
 		return (EADDRNOTAVAIL);
 	laddr.s_addr = *laddrp;
 	if (nam != NULL && laddr.s_addr != INADDR_ANY)
 		return (EINVAL);
 	if ((so->so_options & (SO_REUSEADDR|SO_REUSEPORT)) == 0)
 		wild = 1;
 	if (nam) {
 		sin = (struct sockaddr_in *)nam;
 		if (nam->sa_len != sizeof (*sin))
 			return (EINVAL);
 #ifdef notdef
 		/*
 		 * We should check the family, but old programs
 		 * incorrectly fail to initialize it.
 		 */
 		if (sin->sin_family != AF_INET)
 			return (EAFNOSUPPORT);
 #endif
 		if (sin->sin_addr.s_addr != INADDR_ANY)
 			if (prison_ip(td->td_ucred, 0, &sin->sin_addr.s_addr))
 				return(EINVAL);
 		if (sin->sin_port != *lportp) {
 			/* Don't allow the port to change. */
 			if (*lportp != 0)
 				return (EINVAL);
 			lport = sin->sin_port;
 		}
 		/* NB: lport is left as 0 if the port isn't being changed. */
 		if (IN_MULTICAST(ntohl(sin->sin_addr.s_addr))) {
 			/*
 			 * Treat SO_REUSEADDR as SO_REUSEPORT for multicast;
 			 * allow complete duplication of binding if
 			 * SO_REUSEPORT is set, or if SO_REUSEADDR is set
 			 * and a multicast address is bound on both
 			 * new and duplicated sockets.
 			 */
 			if (so->so_options & SO_REUSEADDR)
 				reuseport = SO_REUSEADDR|SO_REUSEPORT;
 		} else if (sin->sin_addr.s_addr != INADDR_ANY) {
 			sin->sin_port = 0;		/* yech... */
 			bzero(&sin->sin_zero, sizeof(sin->sin_zero));
 			if (ifa_ifwithaddr((struct sockaddr *)sin) == 0)
 				return (EADDRNOTAVAIL);
 		}
 		laddr = sin->sin_addr;
 		if (lport) {
 			struct inpcb *t;
 			/* GROSS */
 			if (ntohs(lport) <= ipport_reservedhigh &&
 			    ntohs(lport) >= ipport_reservedlow &&
 			    td && suser_cred(td->td_ucred, PRISON_ROOT))
 				return (EACCES);
 			if (td && jailed(td->td_ucred))
 				prison = 1;
 			if (so->so_cred->cr_uid != 0 &&
 			    !IN_MULTICAST(ntohl(sin->sin_addr.s_addr))) {
 				t = in_pcblookup_local(inp->inp_pcbinfo,
 				    sin->sin_addr, lport,
 				    prison ? 0 :  INPLOOKUP_WILDCARD);
 	/*
 	 * XXX
 	 * This entire block sorely needs a rewrite.
 	 */
 				if (t && (t->inp_vflag & INP_TIMEWAIT)) {
 					if ((ntohl(sin->sin_addr.s_addr) != INADDR_ANY ||
 					    ntohl(t->inp_laddr.s_addr) != INADDR_ANY ||
 					    (intotw(t)->tw_so_options & SO_REUSEPORT) == 0) &&
 					    (so->so_cred->cr_uid != intotw(t)->tw_cred->cr_uid))
 						return (EADDRINUSE);
 				} else
 				if (t &&
 				    (ntohl(sin->sin_addr.s_addr) != INADDR_ANY ||
 				     ntohl(t->inp_laddr.s_addr) != INADDR_ANY ||
 				     (t->inp_socket->so_options &
 					 SO_REUSEPORT) == 0) &&
 				    (so->so_cred->cr_uid !=
 				     t->inp_socket->so_cred->cr_uid)) {
 #if defined(INET6)
 					if (ntohl(sin->sin_addr.s_addr) !=
 					    INADDR_ANY ||
 					    ntohl(t->inp_laddr.s_addr) !=
 					    INADDR_ANY ||
 					    INP_SOCKAF(so) ==
 					    INP_SOCKAF(t->inp_socket))
 #endif /* defined(INET6) */
 					return (EADDRINUSE);
 				}
 			}
 			if (prison &&
 			    prison_ip(td->td_ucred, 0, &sin->sin_addr.s_addr))
 				return (EADDRNOTAVAIL);
 			t = in_pcblookup_local(pcbinfo, sin->sin_addr,
 			    lport, prison ? 0 : wild);
 			if (t && (t->inp_vflag & INP_TIMEWAIT)) {
 				if ((reuseport & intotw(t)->tw_so_options) == 0)
 					return (EADDRINUSE);
 			} else
 			if (t &&
 			    (reuseport & t->inp_socket->so_options) == 0) {
 #if defined(INET6)
 				if (ntohl(sin->sin_addr.s_addr) !=
 				    INADDR_ANY ||
 				    ntohl(t->inp_laddr.s_addr) !=
 				    INADDR_ANY ||
 				    INP_SOCKAF(so) ==
 				    INP_SOCKAF(t->inp_socket))
 #endif /* defined(INET6) */
 				return (EADDRINUSE);
 			}
 		}
 	}
 	if (*lportp != 0)
 		lport = *lportp;
 	if (lport == 0) {
 		u_short first, last;
 		int count;
 
 		if (laddr.s_addr != INADDR_ANY)
 			if (prison_ip(td->td_ucred, 0, &laddr.s_addr))
 				return (EINVAL);
 
 		if (inp->inp_flags & INP_HIGHPORT) {
 			first = ipport_hifirstauto;	/* sysctl */
 			last  = ipport_hilastauto;
 			lastport = &pcbinfo->lasthi;
 		} else if (inp->inp_flags & INP_LOWPORT) {
 			if (td && (error = suser_cred(td->td_ucred,
 			    PRISON_ROOT)) != 0)
 				return error;
 			first = ipport_lowfirstauto;	/* 1023 */
 			last  = ipport_lowlastauto;	/* 600 */
 			lastport = &pcbinfo->lastlow;
 		} else {
 			first = ipport_firstauto;	/* sysctl */
 			last  = ipport_lastauto;
 			lastport = &pcbinfo->lastport;
 		}
 		/*
 		 * Simple check to ensure all ports are not used up causing
 		 * a deadlock here.
 		 *
 		 * We split the two cases (up and down) so that the direction
 		 * is not being tested on each round of the loop.
 		 */
 		if (first > last) {
 			/*
 			 * counting down
 			 */
 			count = first - last;
 
 			do {
 				if (count-- < 0)	/* completely used? */
 					return (EADDRNOTAVAIL);
 				--*lastport;
 				if (*lastport > first || *lastport < last)
 					*lastport = first;
 				lport = htons(*lastport);
 			} while (in_pcblookup_local(pcbinfo, laddr, lport,
 			    wild));
 		} else {
 			/*
 			 * counting up
 			 */
 			count = last - first;
 
 			do {
 				if (count-- < 0)	/* completely used? */
 					return (EADDRNOTAVAIL);
 				++*lastport;
 				if (*lastport < first || *lastport > last)
 					*lastport = first;
 				lport = htons(*lastport);
 			} while (in_pcblookup_local(pcbinfo, laddr, lport,
 			    wild));
 		}
 	}
 	if (prison_ip(td->td_ucred, 0, &laddr.s_addr))
 		return (EINVAL);
 	*laddrp = laddr.s_addr;
 	*lportp = lport;
 	return (0);
 }
 
 /*
  * Connect from a socket to a specified address.
  * Both address and port must be specified in argument sin.
  * If don't have a local address for this socket yet,
  * then pick one.
  */
 int
 in_pcbconnect(inp, nam, td)
 	register struct inpcb *inp;
 	struct sockaddr *nam;
 	struct thread *td;
 {
 	u_short lport, fport;
 	in_addr_t laddr, faddr;
 	int anonport, error;
 
 	lport = inp->inp_lport;
 	laddr = inp->inp_laddr.s_addr;
 	anonport = (lport == 0);
 	error = in_pcbconnect_setup(inp, nam, &laddr, &lport, &faddr, &fport,
 	    NULL, td);
 	if (error)
 		return (error);
 
 	/* Do the initial binding of the local address if required. */
 	if (inp->inp_laddr.s_addr == INADDR_ANY && inp->inp_lport == 0) {
 		inp->inp_lport = lport;
 		inp->inp_laddr.s_addr = laddr;
 		if (in_pcbinshash(inp) != 0) {
 			inp->inp_laddr.s_addr = INADDR_ANY;
 			inp->inp_lport = 0;
 			return (EAGAIN);
 		}
 	}
 
 	/* Commit the remaining changes. */
 	inp->inp_lport = lport;
 	inp->inp_laddr.s_addr = laddr;
 	inp->inp_faddr.s_addr = faddr;
 	inp->inp_fport = fport;
 	in_pcbrehash(inp);
 	if (anonport)
 		inp->inp_flags |= INP_ANONPORT;
 	return (0);
 }
 
 /*
  * Set up for a connect from a socket to the specified address.
  * On entry, *laddrp and *lportp should contain the current local
  * address and port for the PCB; these are updated to the values
  * that should be placed in inp_laddr and inp_lport to complete
  * the connect.
  *
  * On success, *faddrp and *fportp will be set to the remote address
  * and port. These are not updated in the error case.
  *
  * If the operation fails because the connection already exists,
  * *oinpp will be set to the PCB of that connection so that the
  * caller can decide to override it. In all other cases, *oinpp
  * is set to NULL.
  */
 int
 in_pcbconnect_setup(inp, nam, laddrp, lportp, faddrp, fportp, oinpp, td)
 	register struct inpcb *inp;
 	struct sockaddr *nam;
 	in_addr_t *laddrp;
 	u_short *lportp;
 	in_addr_t *faddrp;
 	u_short *fportp;
 	struct inpcb **oinpp;
 	struct thread *td;
 {
 	struct sockaddr_in *sin = (struct sockaddr_in *)nam;
 	struct in_ifaddr *ia;
 	struct sockaddr_in sa;
 	struct ucred *cred;
 	struct inpcb *oinp;
 	struct in_addr laddr, faddr;
 	u_short lport, fport;
 	int error;
 
 	if (oinpp != NULL)
 		*oinpp = NULL;
 	if (nam->sa_len != sizeof (*sin))
 		return (EINVAL);
 	if (sin->sin_family != AF_INET)
 		return (EAFNOSUPPORT);
 	if (sin->sin_port == 0)
 		return (EADDRNOTAVAIL);
 	laddr.s_addr = *laddrp;
 	lport = *lportp;
 	faddr = sin->sin_addr;
 	fport = sin->sin_port;
 	cred = inp->inp_socket->so_cred;
 	if (laddr.s_addr == INADDR_ANY && jailed(cred)) {
 		bzero(&sa, sizeof(sa));
 		sa.sin_addr.s_addr = htonl(prison_getip(cred));
 		sa.sin_len = sizeof(sa);
 		sa.sin_family = AF_INET;
 		error = in_pcbbind_setup(inp, (struct sockaddr *)&sa,
 		    &laddr.s_addr, &lport, td);
 		if (error)
 			return (error);
 	}
 
 	if (!TAILQ_EMPTY(&in_ifaddrhead)) {
 		/*
 		 * If the destination address is INADDR_ANY,
 		 * use the primary local address.
 		 * If the supplied address is INADDR_BROADCAST,
 		 * and the primary interface supports broadcast,
 		 * choose the broadcast address for that interface.
 		 */
 		if (faddr.s_addr == INADDR_ANY)
 			faddr = IA_SIN(TAILQ_FIRST(&in_ifaddrhead))->sin_addr;
 		else if (faddr.s_addr == (u_long)INADDR_BROADCAST &&
 		    (TAILQ_FIRST(&in_ifaddrhead)->ia_ifp->if_flags &
 		    IFF_BROADCAST))
 			faddr = satosin(&TAILQ_FIRST(
 			    &in_ifaddrhead)->ia_broadaddr)->sin_addr;
 	}
 	if (laddr.s_addr == INADDR_ANY) {
 		register struct route *ro;
 
 		ia = (struct in_ifaddr *)0;
 		/*
 		 * If route is known or can be allocated now,
 		 * our src addr is taken from the i/f, else punt.
 		 * Note that we should check the address family of the cached
 		 * destination, in case of sharing the cache with IPv6.
 		 */
 		ro = &inp->inp_route;
 		if (ro->ro_rt &&
 		    (ro->ro_dst.sa_family != AF_INET ||
 		     satosin(&ro->ro_dst)->sin_addr.s_addr != faddr.s_addr ||
 		     inp->inp_socket->so_options & SO_DONTROUTE)) {
 			RTFREE(ro->ro_rt);
 			ro->ro_rt = (struct rtentry *)0;
 		}
 		if ((inp->inp_socket->so_options & SO_DONTROUTE) == 0 && /*XXX*/
 		    (ro->ro_rt == (struct rtentry *)0 ||
 		    ro->ro_rt->rt_ifp == (struct ifnet *)0)) {
 			/* No route yet, so try to acquire one */
 			bzero(&ro->ro_dst, sizeof(struct sockaddr_in));
 			ro->ro_dst.sa_family = AF_INET;
 			ro->ro_dst.sa_len = sizeof(struct sockaddr_in);
 			((struct sockaddr_in *)&ro->ro_dst)->sin_addr = faddr;
 			rtalloc(ro);
 		}
 		/*
 		 * If we found a route, use the address
 		 * corresponding to the outgoing interface
 		 * unless it is the loopback (in case a route
 		 * to our address on another net goes to loopback).
 		 */
 		if (ro->ro_rt && !(ro->ro_rt->rt_ifp->if_flags & IFF_LOOPBACK))
 			ia = ifatoia(ro->ro_rt->rt_ifa);
 		if (ia == 0) {
 			bzero(&sa, sizeof(sa));
 			sa.sin_addr = faddr;
 			sa.sin_len = sizeof(sa);
 			sa.sin_family = AF_INET;
 
 			ia = ifatoia(ifa_ifwithdstaddr(sintosa(&sa)));
 			if (ia == 0)
 				ia = ifatoia(ifa_ifwithnet(sintosa(&sa)));
 			if (ia == 0)
 				ia = TAILQ_FIRST(&in_ifaddrhead);
 			if (ia == 0)
 				return (EADDRNOTAVAIL);
 		}
 		/*
 		 * If the destination address is multicast and an outgoing
 		 * interface has been set as a multicast option, use the
 		 * address of that interface as our source address.
 		 */
 		if (IN_MULTICAST(ntohl(faddr.s_addr)) &&
 		    inp->inp_moptions != NULL) {
 			struct ip_moptions *imo;
 			struct ifnet *ifp;
 
 			imo = inp->inp_moptions;
 			if (imo->imo_multicast_ifp != NULL) {
 				ifp = imo->imo_multicast_ifp;
 				TAILQ_FOREACH(ia, &in_ifaddrhead, ia_link)
 					if (ia->ia_ifp == ifp)
 						break;
 				if (ia == 0)
 					return (EADDRNOTAVAIL);
 			}
 		}
 		laddr = ia->ia_addr.sin_addr;
 	}
 
 	oinp = in_pcblookup_hash(inp->inp_pcbinfo, faddr, fport, laddr, lport,
 	    0, NULL);
 	if (oinp != NULL) {
 		if (oinpp != NULL)
 			*oinpp = oinp;
 		return (EADDRINUSE);
 	}
 	if (lport == 0) {
 		error = in_pcbbind_setup(inp, NULL, &laddr.s_addr, &lport, td);
 		if (error)
 			return (error);
 	}
 	*laddrp = laddr.s_addr;
 	*lportp = lport;
 	*faddrp = faddr.s_addr;
 	*fportp = fport;
 	return (0);
 }
 
 void
 in_pcbdisconnect(inp)
 	struct inpcb *inp;
 {
 
 	inp->inp_faddr.s_addr = INADDR_ANY;
 	inp->inp_fport = 0;
 	in_pcbrehash(inp);
 	if (inp->inp_socket->so_state & SS_NOFDREF)
 		in_pcbdetach(inp);
 }
 
 void
 in_pcbdetach(inp)
 	struct inpcb *inp;
 {
 	struct socket *so = inp->inp_socket;
 	struct inpcbinfo *ipi = inp->inp_pcbinfo;
 
 #ifdef IPSEC
 	ipsec4_delete_pcbpolicy(inp);
 #endif /*IPSEC*/
 	inp->inp_gencnt = ++ipi->ipi_gencnt;
 	in_pcbremlists(inp);
 	if (so) {
 		so->so_pcb = 0;
 		sotryfree(so);
 	}
 	if (inp->inp_options)
 		(void)m_free(inp->inp_options);
 	if (inp->inp_route.ro_rt)
-		rtfree(inp->inp_route.ro_rt);
+		RTFREE(inp->inp_route.ro_rt);
 	ip_freemoptions(inp->inp_moptions);
 	inp->inp_vflag = 0;
 	INP_LOCK_DESTROY(inp);
 	uma_zfree(ipi->ipi_zone, inp);
 }
 
 struct sockaddr *
 in_sockaddr(port, addr_p)
 	in_port_t port;
 	struct in_addr *addr_p;
 {
 	struct sockaddr_in *sin;
 
 	MALLOC(sin, struct sockaddr_in *, sizeof *sin, M_SONAME,
 		M_WAITOK | M_ZERO);
 	sin->sin_family = AF_INET;
 	sin->sin_len = sizeof(*sin);
 	sin->sin_addr = *addr_p;
 	sin->sin_port = port;
 
 	return (struct sockaddr *)sin;
 }
 
 /*
  * The wrapper function will pass down the pcbinfo for this function to lock.
  * The socket must have a valid
  * (i.e., non-nil) PCB, but it should be impossible to get an invalid one
  * except through a kernel programming error, so it is acceptable to panic
  * (or in this case trap) if the PCB is invalid.  (Actually, we don't trap
  * because there actually /is/ a programming error somewhere... XXX)
  */
 int
 in_setsockaddr(so, nam, pcbinfo)
 	struct socket *so;
 	struct sockaddr **nam;
 	struct inpcbinfo *pcbinfo;
 {
 	int s;
 	register struct inpcb *inp;
 	struct in_addr addr;
 	in_port_t port;
 
 	s = splnet();
 	INP_INFO_RLOCK(pcbinfo);
 	inp = sotoinpcb(so);
 	if (!inp) {
 		INP_INFO_RUNLOCK(pcbinfo);
 		splx(s);
 		return ECONNRESET;
 	}
 	INP_LOCK(inp);
 	port = inp->inp_lport;
 	addr = inp->inp_laddr;
 	INP_UNLOCK(inp);
 	INP_INFO_RUNLOCK(pcbinfo);
 	splx(s);
 
 	*nam = in_sockaddr(port, &addr);
 	return 0;
 }
 
 /*
  * The wrapper function will pass down the pcbinfo for this function to lock.
  */
 int
 in_setpeeraddr(so, nam, pcbinfo)
 	struct socket *so;
 	struct sockaddr **nam;
 	struct inpcbinfo *pcbinfo;
 {
 	int s;
 	register struct inpcb *inp;
 	struct in_addr addr;
 	in_port_t port;
 
 	s = splnet();
 	INP_INFO_RLOCK(pcbinfo);
 	inp = sotoinpcb(so);
 	if (!inp) {
 		INP_INFO_RUNLOCK(pcbinfo);
 		splx(s);
 		return ECONNRESET;
 	}
 	INP_LOCK(inp);
 	port = inp->inp_fport;
 	addr = inp->inp_faddr;
 	INP_UNLOCK(inp);
 	INP_INFO_RUNLOCK(pcbinfo);
 	splx(s);
 
 	*nam = in_sockaddr(port, &addr);
 	return 0;
 }
 
 void
 in_pcbnotifyall(pcbinfo, faddr, errno, notify)
 	struct inpcbinfo *pcbinfo;
 	struct in_addr faddr;
 	int errno;
 	struct inpcb *(*notify)(struct inpcb *, int);
 {
 	struct inpcb *inp, *ninp;
 	struct inpcbhead *head;
 	int s;
 
 	s = splnet();
 	INP_INFO_WLOCK(pcbinfo);
 	head = pcbinfo->listhead;
 	for (inp = LIST_FIRST(head); inp != NULL; inp = ninp) {
 		INP_LOCK(inp);
 		ninp = LIST_NEXT(inp, inp_list);
 #ifdef INET6
 		if ((inp->inp_vflag & INP_IPV4) == 0) {
 			INP_UNLOCK(inp);
 			continue;
 		}
 #endif
 		if (inp->inp_faddr.s_addr != faddr.s_addr ||
 		    inp->inp_socket == NULL) {
 			INP_UNLOCK(inp);
 			continue;
 		}
 		if ((*notify)(inp, errno))
 			INP_UNLOCK(inp);
 	}
 	INP_INFO_WUNLOCK(pcbinfo);
 	splx(s);
 }
 
 void
 in_pcbpurgeif0(pcbinfo, ifp)
 	struct inpcbinfo *pcbinfo;
 	struct ifnet *ifp;
 {
 	struct inpcb *inp;
 	struct ip_moptions *imo;
 	int i, gap;
 
 	/* why no splnet here? XXX */
 	INP_INFO_RLOCK(pcbinfo);
 	LIST_FOREACH(inp, pcbinfo->listhead, inp_list) {
 		INP_LOCK(inp);
 		imo = inp->inp_moptions;
 		if ((inp->inp_vflag & INP_IPV4) &&
 		    imo != NULL) {
 			/*
 			 * Unselect the outgoing interface if it is being
 			 * detached.
 			 */
 			if (imo->imo_multicast_ifp == ifp)
 				imo->imo_multicast_ifp = NULL;
 
 			/*
 			 * Drop multicast group membership if we joined
 			 * through the interface being detached.
 			 */
 			for (i = 0, gap = 0; i < imo->imo_num_memberships;
 			    i++) {
 				if (imo->imo_membership[i]->inm_ifp == ifp) {
 					in_delmulti(imo->imo_membership[i]);
 					gap++;
 				} else if (gap != 0)
 					imo->imo_membership[i - gap] =
 					    imo->imo_membership[i];
 			}
 			imo->imo_num_memberships -= gap;
 		}
 		INP_UNLOCK(inp);
 	}
 	INP_INFO_RUNLOCK(pcbinfo);
 }
 
 /*
  * Check for alternatives when higher level complains
  * about service problems.  For now, invalidate cached
  * routing information.  If the route was created dynamically
  * (by a redirect), time to try a default gateway again.
  */
 void
 in_losing(inp)
 	struct inpcb *inp;
 {
 	register struct rtentry *rt;
 	struct rt_addrinfo info;
 
 	if ((rt = inp->inp_route.ro_rt)) {
+		RT_LOCK(rt);
+		inp->inp_route.ro_rt = NULL;
 		bzero((caddr_t)&info, sizeof(info));
 		info.rti_flags = rt->rt_flags;
 		info.rti_info[RTAX_DST] = rt_key(rt);
 		info.rti_info[RTAX_GATEWAY] = rt->rt_gateway;
 		info.rti_info[RTAX_NETMASK] = rt_mask(rt);
 		rt_missmsg(RTM_LOSING, &info, rt->rt_flags, 0);
-		if (rt->rt_flags & RTF_DYNAMIC)
+		if (rt->rt_flags & RTF_DYNAMIC) {
+			RT_UNLOCK(rt);		/* XXX refcnt? */
 			(void) rtrequest1(RTM_DELETE, &info, NULL);
-		inp->inp_route.ro_rt = NULL;
-		rtfree(rt);
+		} else
+			rtfree(rt);
 		/*
 		 * A new route can be allocated
 		 * the next time output is attempted.
 		 */
 	}
 }
 
 /*
  * After a routing change, flush old routing
  * and allocate a (hopefully) better one.
  */
 struct inpcb *
 in_rtchange(inp, errno)
 	register struct inpcb *inp;
 	int errno;
 {
 	if (inp->inp_route.ro_rt) {
-		rtfree(inp->inp_route.ro_rt);
+		RTFREE(inp->inp_route.ro_rt);
 		inp->inp_route.ro_rt = 0;
 		/*
 		 * A new route can be allocated the next time
 		 * output is attempted.
 		 */
 	}
 	return inp;
 }
 
 /*
  * Lookup a PCB based on the local address and port.
  */
 struct inpcb *
 in_pcblookup_local(pcbinfo, laddr, lport_arg, wild_okay)
 	struct inpcbinfo *pcbinfo;
 	struct in_addr laddr;
 	u_int lport_arg;
 	int wild_okay;
 {
 	register struct inpcb *inp;
 	int matchwild = 3, wildcard;
 	u_short lport = lport_arg;
 
 	if (!wild_okay) {
 		struct inpcbhead *head;
 		/*
 		 * Look for an unconnected (wildcard foreign addr) PCB that
 		 * matches the local address and port we're looking for.
 		 */
 		head = &pcbinfo->hashbase[INP_PCBHASH(INADDR_ANY, lport, 0, pcbinfo->hashmask)];
 		LIST_FOREACH(inp, head, inp_hash) {
 #ifdef INET6
 			if ((inp->inp_vflag & INP_IPV4) == 0)
 				continue;
 #endif
 			if (inp->inp_faddr.s_addr == INADDR_ANY &&
 			    inp->inp_laddr.s_addr == laddr.s_addr &&
 			    inp->inp_lport == lport) {
 				/*
 				 * Found.
 				 */
 				return (inp);
 			}
 		}
 		/*
 		 * Not found.
 		 */
 		return (NULL);
 	} else {
 		struct inpcbporthead *porthash;
 		struct inpcbport *phd;
 		struct inpcb *match = NULL;
 		/*
 		 * Best fit PCB lookup.
 		 *
 		 * First see if this local port is in use by looking on the
 		 * port hash list.
 		 */
 		porthash = &pcbinfo->porthashbase[INP_PCBPORTHASH(lport,
 		    pcbinfo->porthashmask)];
 		LIST_FOREACH(phd, porthash, phd_hash) {
 			if (phd->phd_port == lport)
 				break;
 		}
 		if (phd != NULL) {
 			/*
 			 * Port is in use by one or more PCBs. Look for best
 			 * fit.
 			 */
 			LIST_FOREACH(inp, &phd->phd_pcblist, inp_portlist) {
 				wildcard = 0;
 #ifdef INET6
 				if ((inp->inp_vflag & INP_IPV4) == 0)
 					continue;
 #endif
 				if (inp->inp_faddr.s_addr != INADDR_ANY)
 					wildcard++;
 				if (inp->inp_laddr.s_addr != INADDR_ANY) {
 					if (laddr.s_addr == INADDR_ANY)
 						wildcard++;
 					else if (inp->inp_laddr.s_addr != laddr.s_addr)
 						continue;
 				} else {
 					if (laddr.s_addr != INADDR_ANY)
 						wildcard++;
 				}
 				if (wildcard < matchwild) {
 					match = inp;
 					matchwild = wildcard;
 					if (matchwild == 0) {
 						break;
 					}
 				}
 			}
 		}
 		return (match);
 	}
 }
 
 /*
  * Lookup PCB in hash list.
  */
 struct inpcb *
 in_pcblookup_hash(pcbinfo, faddr, fport_arg, laddr, lport_arg, wildcard,
 		  ifp)
 	struct inpcbinfo *pcbinfo;
 	struct in_addr faddr, laddr;
 	u_int fport_arg, lport_arg;
 	int wildcard;
 	struct ifnet *ifp;
 {
 	struct inpcbhead *head;
 	register struct inpcb *inp;
 	u_short fport = fport_arg, lport = lport_arg;
 
 	/*
 	 * First look for an exact match.
 	 */
 	head = &pcbinfo->hashbase[INP_PCBHASH(faddr.s_addr, lport, fport, pcbinfo->hashmask)];
 	LIST_FOREACH(inp, head, inp_hash) {
 #ifdef INET6
 		if ((inp->inp_vflag & INP_IPV4) == 0)
 			continue;
 #endif
 		if (inp->inp_faddr.s_addr == faddr.s_addr &&
 		    inp->inp_laddr.s_addr == laddr.s_addr &&
 		    inp->inp_fport == fport &&
 		    inp->inp_lport == lport) {
 			/*
 			 * Found.
 			 */
 			return (inp);
 		}
 	}
 	if (wildcard) {
 		struct inpcb *local_wild = NULL;
 #if defined(INET6)
 		struct inpcb *local_wild_mapped = NULL;
 #endif /* defined(INET6) */
 
 		head = &pcbinfo->hashbase[INP_PCBHASH(INADDR_ANY, lport, 0, pcbinfo->hashmask)];
 		LIST_FOREACH(inp, head, inp_hash) {
 #ifdef INET6
 			if ((inp->inp_vflag & INP_IPV4) == 0)
 				continue;
 #endif
 			if (inp->inp_faddr.s_addr == INADDR_ANY &&
 			    inp->inp_lport == lport) {
 				if (ifp && ifp->if_type == IFT_FAITH &&
 				    (inp->inp_flags & INP_FAITH) == 0)
 					continue;
 				if (inp->inp_laddr.s_addr == laddr.s_addr)
 					return (inp);
 				else if (inp->inp_laddr.s_addr == INADDR_ANY) {
 #if defined(INET6)
 					if (INP_CHECK_SOCKAF(inp->inp_socket,
 							     AF_INET6))
 						local_wild_mapped = inp;
 					else
 #endif /* defined(INET6) */
 					local_wild = inp;
 				}
 			}
 		}
 #if defined(INET6)
 		if (local_wild == NULL)
 			return (local_wild_mapped);
 #endif /* defined(INET6) */
 		return (local_wild);
 	}
 
 	/*
 	 * Not found.
 	 */
 	return (NULL);
 }
 
 /*
  * Insert PCB onto various hash lists.
  */
 int
 in_pcbinshash(inp)
 	struct inpcb *inp;
 {
 	struct inpcbhead *pcbhash;
 	struct inpcbporthead *pcbporthash;
 	struct inpcbinfo *pcbinfo = inp->inp_pcbinfo;
 	struct inpcbport *phd;
 	u_int32_t hashkey_faddr;
 
 #ifdef INET6
 	if (inp->inp_vflag & INP_IPV6)
 		hashkey_faddr = inp->in6p_faddr.s6_addr32[3] /* XXX */;
 	else
 #endif /* INET6 */
 	hashkey_faddr = inp->inp_faddr.s_addr;
 
 	pcbhash = &pcbinfo->hashbase[INP_PCBHASH(hashkey_faddr,
 		 inp->inp_lport, inp->inp_fport, pcbinfo->hashmask)];
 
 	pcbporthash = &pcbinfo->porthashbase[INP_PCBPORTHASH(inp->inp_lport,
 	    pcbinfo->porthashmask)];
 
 	/*
 	 * Go through port list and look for a head for this lport.
 	 */
 	LIST_FOREACH(phd, pcbporthash, phd_hash) {
 		if (phd->phd_port == inp->inp_lport)
 			break;
 	}
 	/*
 	 * If none exists, malloc one and tack it on.
 	 */
 	if (phd == NULL) {
 		MALLOC(phd, struct inpcbport *, sizeof(struct inpcbport), M_PCB, M_NOWAIT);
 		if (phd == NULL) {
 			return (ENOBUFS); /* XXX */
 		}
 		phd->phd_port = inp->inp_lport;
 		LIST_INIT(&phd->phd_pcblist);
 		LIST_INSERT_HEAD(pcbporthash, phd, phd_hash);
 	}
 	inp->inp_phd = phd;
 	LIST_INSERT_HEAD(&phd->phd_pcblist, inp, inp_portlist);
 	LIST_INSERT_HEAD(pcbhash, inp, inp_hash);
 	return (0);
 }
 
 /*
  * Move PCB to the proper hash bucket when { faddr, fport } have  been
  * changed. NOTE: This does not handle the case of the lport changing (the
  * hashed port list would have to be updated as well), so the lport must
  * not change after in_pcbinshash() has been called.
  */
 void
 in_pcbrehash(inp)
 	struct inpcb *inp;
 {
 	struct inpcbhead *head;
 	u_int32_t hashkey_faddr;
 
 #ifdef INET6
 	if (inp->inp_vflag & INP_IPV6)
 		hashkey_faddr = inp->in6p_faddr.s6_addr32[3] /* XXX */;
 	else
 #endif /* INET6 */
 	hashkey_faddr = inp->inp_faddr.s_addr;
 
 	head = &inp->inp_pcbinfo->hashbase[INP_PCBHASH(hashkey_faddr,
 		inp->inp_lport, inp->inp_fport, inp->inp_pcbinfo->hashmask)];
 
 	LIST_REMOVE(inp, inp_hash);
 	LIST_INSERT_HEAD(head, inp, inp_hash);
 }
 
 /*
  * Remove PCB from various lists.
  */
 void
 in_pcbremlists(inp)
 	struct inpcb *inp;
 {
 	inp->inp_gencnt = ++inp->inp_pcbinfo->ipi_gencnt;
 	if (inp->inp_lport) {
 		struct inpcbport *phd = inp->inp_phd;
 
 		LIST_REMOVE(inp, inp_hash);
 		LIST_REMOVE(inp, inp_portlist);
 		if (LIST_FIRST(&phd->phd_pcblist) == NULL) {
 			LIST_REMOVE(phd, phd_hash);
 			free(phd, M_PCB);
 		}
 	}
 	LIST_REMOVE(inp, inp_list);
 	inp->inp_pcbinfo->ipi_count--;
 }
 
 int
 prison_xinpcb(struct thread *td, struct inpcb *inp)
 {
 	if (!jailed(td->td_ucred))
 		return (0);
 	if (ntohl(inp->inp_laddr.s_addr) == prison_getip(td->td_ucred))
 		return (0);
 	return (1);
 }
Index: head/sys/netinet/in_rmx.c
===================================================================
--- head/sys/netinet/in_rmx.c	(revision 120726)
+++ head/sys/netinet/in_rmx.c	(revision 120727)
@@ -1,425 +1,432 @@
 /*
  * Copyright 1994, 1995 Massachusetts Institute of Technology
  *
  * Permission to use, copy, modify, and distribute this software and
  * its documentation for any purpose and without fee is hereby
  * granted, provided that both the above copyright notice and this
  * permission notice appear in all copies, that both the above
  * copyright notice and this permission notice appear in all
  * supporting documentation, and that the name of M.I.T. not be used
  * in advertising or publicity pertaining to distribution of the
  * software without specific, written prior permission.  M.I.T. makes
  * no representations about the suitability of this software for any
  * purpose.  It is provided "as is" without express or implied
  * warranty.
  *
  * THIS SOFTWARE IS PROVIDED BY M.I.T. ``AS IS''.  M.I.T. DISCLAIMS
  * ALL EXPRESS OR IMPLIED WARRANTIES WITH REGARD TO THIS SOFTWARE,
  * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
  * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. IN NO EVENT
  * SHALL M.I.T. BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
  * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
  * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
  * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
  * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
  * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
  * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  * $FreeBSD$
  */
 
 /*
  * This code does two things necessary for the enhanced TCP metrics to
  * function in a useful manner:
  *  1) It marks all non-host routes as `cloning', thus ensuring that
  *     every actual reference to such a route actually gets turned
  *     into a reference to a host route to the specific destination
  *     requested.
  *  2) When such routes lose all their references, it arranges for them
  *     to be deleted in some random collection of circumstances, so that
  *     a large quantity of stale routing data is not kept in kernel memory
  *     indefinitely.  See in_rtqtimo() below for the exact mechanism.
  */
 
 #include <sys/param.h>
 #include <sys/systm.h>
 #include <sys/kernel.h>
 #include <sys/sysctl.h>
 #include <sys/socket.h>
 #include <sys/mbuf.h>
 #include <sys/syslog.h>
+#include <sys/callout.h>
 
 #include <net/if.h>
 #include <net/route.h>
 #include <netinet/in.h>
 #include <netinet/in_var.h>
 #include <netinet/ip_var.h>
 
 extern int	in_inithead(void **head, int off);
 
 #define RTPRF_OURS		RTF_PROTO3	/* set on routes we manage */
 
 /*
  * Do what we need to do when inserting a route.
  */
 static struct radix_node *
 in_addroute(void *v_arg, void *n_arg, struct radix_node_head *head,
 	    struct radix_node *treenodes)
 {
 	struct rtentry *rt = (struct rtentry *)treenodes;
 	struct sockaddr_in *sin = (struct sockaddr_in *)rt_key(rt);
 	struct radix_node *ret;
 
 	/*
 	 * For IP, all unicast non-host routes are automatically cloning.
 	 */
 	if (IN_MULTICAST(ntohl(sin->sin_addr.s_addr)))
 		rt->rt_flags |= RTF_MULTICAST;
 
 	if (!(rt->rt_flags & (RTF_HOST | RTF_CLONING | RTF_MULTICAST)))
 		rt->rt_flags |= RTF_PRCLONING;
 
 	/*
 	 * A little bit of help for both IP output and input:
 	 *   For host routes, we make sure that RTF_BROADCAST
 	 *   is set for anything that looks like a broadcast address.
 	 *   This way, we can avoid an expensive call to in_broadcast()
 	 *   in ip_output() most of the time (because the route passed
 	 *   to ip_output() is almost always a host route).
 	 *
 	 *   We also do the same for local addresses, with the thought
 	 *   that this might one day be used to speed up ip_input().
 	 *
 	 * We also mark routes to multicast addresses as such, because
 	 * it's easy to do and might be useful (but this is much more
 	 * dubious since it's so easy to inspect the address).  (This
 	 * is done above.)
 	 */
 	if (rt->rt_flags & RTF_HOST) {
 		if (in_broadcast(sin->sin_addr, rt->rt_ifp)) {
 			rt->rt_flags |= RTF_BROADCAST;
 		} else if (satosin(rt->rt_ifa->ifa_addr)->sin_addr.s_addr ==
 		    sin->sin_addr.s_addr) {
 			rt->rt_flags |= RTF_LOCAL;
 		}
 	}
 
 	if (!rt->rt_rmx.rmx_mtu && !(rt->rt_rmx.rmx_locks & RTV_MTU) &&
 	    rt->rt_ifp)
 		rt->rt_rmx.rmx_mtu = rt->rt_ifp->if_mtu;
 
 	ret = rn_addroute(v_arg, n_arg, head, treenodes);
 	if (ret == NULL && rt->rt_flags & RTF_HOST) {
 		struct rtentry *rt2;
 		/*
 		 * We are trying to add a host route, but can't.
 		 * Find out if it is because of an
 		 * ARP entry and delete it if so.
 		 */
 		rt2 = rtalloc1((struct sockaddr *)sin, 0,
 				RTF_CLONING | RTF_PRCLONING);
 		if (rt2) {
 			if (rt2->rt_flags & RTF_LLINFO &&
 			    rt2->rt_flags & RTF_HOST &&
 			    rt2->rt_gateway &&
 			    rt2->rt_gateway->sa_family == AF_LINK) {
+				/* NB: must unlock to avoid recursion */
+				RT_UNLOCK(rt2);
 				rtrequest(RTM_DELETE,
 					  (struct sockaddr *)rt_key(rt2),
 					  rt2->rt_gateway, rt_mask(rt2),
 					  rt2->rt_flags, 0);
 				ret = rn_addroute(v_arg, n_arg, head,
 						  treenodes);
+				RT_LOCK(rt2);
 			}
-			RTFREE(rt2);
+			RTFREE_LOCKED(rt2);
 		}
 	}
 
 	/*
 	 * If the new route created successfully, and we are forwarding,
 	 * and there is a cached route, free it.  Otherwise, we may end
 	 * up using the wrong route.
 	 */
 	if (ret != NULL && ipforwarding && ipforward_rt.ro_rt) {
 		RTFREE(ipforward_rt.ro_rt);
 		ipforward_rt.ro_rt = 0;
 	}
 
 	return ret;
 }
 
 /*
  * This code is the inverse of in_clsroute: on first reference, if we
  * were managing the route, stop doing so and set the expiration timer
  * back off again.
  */
 static struct radix_node *
 in_matroute(void *v_arg, struct radix_node_head *head)
 {
 	struct radix_node *rn = rn_match(v_arg, head);
 	struct rtentry *rt = (struct rtentry *)rn;
 
+	/*XXX locking? */
 	if (rt && rt->rt_refcnt == 0) {		/* this is first reference */
 		if (rt->rt_flags & RTPRF_OURS) {
 			rt->rt_flags &= ~RTPRF_OURS;
 			rt->rt_rmx.rmx_expire = 0;
 		}
 	}
 	return rn;
 }
 
 static int rtq_reallyold = 60*60;		/* one hour is "really old" */
 SYSCTL_INT(_net_inet_ip, IPCTL_RTEXPIRE, rtexpire, CTLFLAG_RW,
     &rtq_reallyold, 0, "Default expiration time on dynamically learned routes");
 
 static int rtq_minreallyold = 10;  /* never automatically crank down to less */
 SYSCTL_INT(_net_inet_ip, IPCTL_RTMINEXPIRE, rtminexpire, CTLFLAG_RW,
     &rtq_minreallyold, 0,
     "Minimum time to attempt to hold onto dynamically learned routes");
 
 static int rtq_toomany = 128;		/* 128 cached routes is "too many" */
 SYSCTL_INT(_net_inet_ip, IPCTL_RTMAXCACHE, rtmaxcache, CTLFLAG_RW,
     &rtq_toomany, 0, "Upper limit on dynamically learned routes");
 
 /*
  * On last reference drop, mark the route as belong to us so that it can be
  * timed out.
  */
 static void
 in_clsroute(struct radix_node *rn, struct radix_node_head *head)
 {
 	struct rtentry *rt = (struct rtentry *)rn;
 
+	RT_LOCK_ASSERT(rt);
+
 	if (!(rt->rt_flags & RTF_UP))
 		return;			/* prophylactic measures */
 
 	if ((rt->rt_flags & (RTF_LLINFO | RTF_HOST)) != RTF_HOST)
 		return;
 
 	if ((rt->rt_flags & (RTF_WASCLONED | RTPRF_OURS)) != RTF_WASCLONED)
 		return;
 
 	/*
 	 * If rtq_reallyold is 0, just delete the route without
 	 * waiting for a timeout cycle to kill it.
 	 */
 	if (rtq_reallyold != 0) {
 		rt->rt_flags |= RTPRF_OURS;
 		rt->rt_rmx.rmx_expire = time_second + rtq_reallyold;
 	} else {
+		/* NB: must unlock to avoid recursion */
+		RT_UNLOCK(rt);
 		rtrequest(RTM_DELETE,
 			  (struct sockaddr *)rt_key(rt),
 			  rt->rt_gateway, rt_mask(rt),
 			  rt->rt_flags, 0);
+		RT_LOCK(rt);
 	}
 }
 
 struct rtqk_arg {
 	struct radix_node_head *rnh;
 	int draining;
 	int killed;
 	int found;
 	int updating;
 	time_t nextstop;
 };
 
 /*
  * Get rid of old routes.  When draining, this deletes everything, even when
  * the timeout is not expired yet.  When updating, this makes sure that
  * nothing has a timeout longer than the current value of rtq_reallyold.
  */
 static int
 in_rtqkill(struct radix_node *rn, void *rock)
 {
 	struct rtqk_arg *ap = rock;
 	struct rtentry *rt = (struct rtentry *)rn;
 	int err;
 
 	if (rt->rt_flags & RTPRF_OURS) {
 		ap->found++;
 
 		if (ap->draining || rt->rt_rmx.rmx_expire <= time_second) {
 			if (rt->rt_refcnt > 0)
 				panic("rtqkill route really not free");
 
 			err = rtrequest(RTM_DELETE,
 					(struct sockaddr *)rt_key(rt),
 					rt->rt_gateway, rt_mask(rt),
 					rt->rt_flags, 0);
 			if (err) {
 				log(LOG_WARNING, "in_rtqkill: error %d\n", err);
 			} else {
 				ap->killed++;
 			}
 		} else {
 			if (ap->updating &&
 			    (rt->rt_rmx.rmx_expire - time_second >
 			     rtq_reallyold)) {
 				rt->rt_rmx.rmx_expire =
 				    time_second + rtq_reallyold;
 			}
 			ap->nextstop = lmin(ap->nextstop,
 					    rt->rt_rmx.rmx_expire);
 		}
 	}
 
 	return 0;
 }
 
 #define RTQ_TIMEOUT	60*10	/* run no less than once every ten minutes */
 static int rtq_timeout = RTQ_TIMEOUT;
+static struct callout rtq_timer;
 
 static void
 in_rtqtimo(void *rock)
 {
 	struct radix_node_head *rnh = rock;
 	struct rtqk_arg arg;
 	struct timeval atv;
 	static time_t last_adjusted_timeout = 0;
-	int s;
 
 	arg.found = arg.killed = 0;
 	arg.rnh = rnh;
 	arg.nextstop = time_second + rtq_timeout;
 	arg.draining = arg.updating = 0;
-	s = splnet();
 	RADIX_NODE_HEAD_LOCK(rnh);
 	rnh->rnh_walktree(rnh, in_rtqkill, &arg);
 	RADIX_NODE_HEAD_UNLOCK(rnh);
-	splx(s);
 
 	/*
 	 * Attempt to be somewhat dynamic about this:
 	 * If there are ``too many'' routes sitting around taking up space,
 	 * then crank down the timeout, and see if we can't make some more
 	 * go away.  However, we make sure that we will never adjust more
 	 * than once in rtq_timeout seconds, to keep from cranking down too
 	 * hard.
 	 */
 	if ((arg.found - arg.killed > rtq_toomany) &&
 	    (time_second - last_adjusted_timeout >= rtq_timeout) &&
 	    rtq_reallyold > rtq_minreallyold) {
 		rtq_reallyold = 2 * rtq_reallyold / 3;
 		if (rtq_reallyold < rtq_minreallyold) {
 			rtq_reallyold = rtq_minreallyold;
 		}
 
 		last_adjusted_timeout = time_second;
 #ifdef DIAGNOSTIC
 		log(LOG_DEBUG, "in_rtqtimo: adjusted rtq_reallyold to %d\n",
 		    rtq_reallyold);
 #endif
 		arg.found = arg.killed = 0;
 		arg.updating = 1;
-		s = splnet();
 		RADIX_NODE_HEAD_LOCK(rnh);
 		rnh->rnh_walktree(rnh, in_rtqkill, &arg);
 		RADIX_NODE_HEAD_UNLOCK(rnh);
-		splx(s);
 	}
 
 	atv.tv_usec = 0;
 	atv.tv_sec = arg.nextstop - time_second;
-	timeout(in_rtqtimo, rock, tvtohz(&atv));
+	callout_reset(&rtq_timer, tvtohz(&atv), in_rtqtimo, rock);
 }
 
 void
 in_rtqdrain(void)
 {
 	struct radix_node_head *rnh = rt_tables[AF_INET];
 	struct rtqk_arg arg;
-	int s;
+
 	arg.found = arg.killed = 0;
 	arg.rnh = rnh;
 	arg.nextstop = 0;
 	arg.draining = 1;
 	arg.updating = 0;
-	s = splnet();
 	RADIX_NODE_HEAD_LOCK(rnh);
 	rnh->rnh_walktree(rnh, in_rtqkill, &arg);
 	RADIX_NODE_HEAD_UNLOCK(rnh);
-	splx(s);
 }
 
 /*
  * Initialize our routing tree.
  */
 int
 in_inithead(void **head, int off)
 {
 	struct radix_node_head *rnh;
 
 	if (!rn_inithead(head, off))
 		return 0;
 
 	if (head != (void **)&rt_tables[AF_INET])	/* BOGUS! */
 		return 1;	/* only do this for the real routing table */
 
 	rnh = *head;
 	rnh->rnh_addaddr = in_addroute;
 	rnh->rnh_matchaddr = in_matroute;
 	rnh->rnh_close = in_clsroute;
+	callout_init(&rtq_timer, CALLOUT_MPSAFE);
 	in_rtqtimo(rnh);	/* kick off timeout first time */
 	return 1;
 }
 
 /*
  * This zaps old routes when the interface goes down or interface
  * address is deleted.  In the latter case, it deletes static routes
  * that point to this address.  If we don't do this, we may end up
  * using the old address in the future.  The ones we always want to
  * get rid of are things like ARP entries, since the user might down
  * the interface, walk over to a completely different network, and
  * plug back in.
  */
 struct in_ifadown_arg {
 	struct radix_node_head *rnh;
 	struct ifaddr *ifa;
 	int del;
 };
 
 static int
 in_ifadownkill(struct radix_node *rn, void *xap)
 {
 	struct in_ifadown_arg *ap = xap;
 	struct rtentry *rt = (struct rtentry *)rn;
 	int err;
 
 	if (rt->rt_ifa == ap->ifa &&
 	    (ap->del || !(rt->rt_flags & RTF_STATIC))) {
 		/*
 		 * We need to disable the automatic prune that happens
 		 * in this case in rtrequest() because it will blow
 		 * away the pointers that rn_walktree() needs in order
 		 * continue our descent.  We will end up deleting all
 		 * the routes that rtrequest() would have in any case,
 		 * so that behavior is not needed there.
 		 */
+		RT_LOCK(rt);
 		rt->rt_flags &= ~(RTF_CLONING | RTF_PRCLONING);
+		RT_UNLOCK(rt);
 		err = rtrequest(RTM_DELETE, (struct sockaddr *)rt_key(rt),
 				rt->rt_gateway, rt_mask(rt), rt->rt_flags, 0);
 		if (err) {
 			log(LOG_WARNING, "in_ifadownkill: error %d\n", err);
 		}
 	}
 	return 0;
 }
 
 int
 in_ifadown(struct ifaddr *ifa, int delete)
 {
 	struct in_ifadown_arg arg;
 	struct radix_node_head *rnh;
 
 	if (ifa->ifa_addr->sa_family != AF_INET)
 		return 1;
 
 	arg.rnh = rnh = rt_tables[AF_INET];
 	arg.ifa = ifa;
 	arg.del = delete;
 	RADIX_NODE_HEAD_LOCK(rnh);
 	rnh->rnh_walktree(rnh, in_ifadownkill, &arg);
 	RADIX_NODE_HEAD_UNLOCK(rnh);
-	ifa->ifa_flags &= ~IFA_ROUTE;
+	ifa->ifa_flags &= ~IFA_ROUTE;		/* XXXlocking? */
 	return 0;
 }
Index: head/sys/netinet/ip_flow.c
===================================================================
--- head/sys/netinet/ip_flow.c	(revision 120726)
+++ head/sys/netinet/ip_flow.c	(revision 120727)
@@ -1,375 +1,377 @@
 /*-
  * Copyright (c) 1998 The NetBSD Foundation, Inc.
  * All rights reserved.
  *
  * This code is derived from software contributed to The NetBSD Foundation
  * by the 3am Software Foundry ("3am").  It was developed by Matt Thomas.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 3. All advertising materials mentioning features or use of this software
  *    must display the following acknowledgement:
  *	This product includes software developed by the NetBSD
  *	Foundation, Inc. and its contributors.
  * 4. Neither the name of The NetBSD Foundation nor the names of its
  *    contributors may be used to endorse or promote products derived
  *    from this software without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
  * POSSIBILITY OF SUCH DAMAGE.
  *
  * $FreeBSD$
  */
 
 #include <sys/param.h>
 #include <sys/systm.h>
 #include <sys/lock.h>
 #include <sys/malloc.h>
 #include <sys/mbuf.h>
 #include <sys/mutex.h>
 #include <sys/protosw.h>
 #include <sys/socket.h>
 #include <sys/kernel.h>
 
 #include <sys/sysctl.h>
 
 #include <net/if.h>
 #include <net/route.h>
 
 #include <netinet/in.h>
 #include <netinet/in_systm.h>
 #include <netinet/ip.h>
 #include <netinet/in_var.h>
 #include <netinet/ip_var.h>
 #include <netinet/ip_flow.h>
 
 #define	IPFLOW_TIMER		(5 * PR_SLOWHZ)
 #define IPFLOW_HASHBITS		6	/* should not be a multiple of 8 */
 #define	IPFLOW_HASHSIZE		(1 << IPFLOW_HASHBITS)
 #if IPFLOW_HASHSIZE > 255
 #error "make ipf_hash larger"
 #endif
 static struct ipflow_head ipflows[IPFLOW_HASHSIZE];
 static int ipflow_inuse;
 #define	IPFLOW_MAX		256
 
 /*
  * Each flow list has a lock that guards updates to the list and to
  * all entries on the list.  Flow entries hold the hash index for
  * finding the head of the list so the lock can be found quickly.
  *
  * ipflow_inuse holds a count of the number of flow entries present.
  * This is used to bound the size of the table.  When IPFLOW_MAX entries
  * are present and an additional entry is needed one is chosen for
  * replacement.  We could use atomic ops for this counter but having it
  * inconsistent doesn't appear to be a problem.
  */
 #define	IPFLOW_HEAD_LOCK(_ipfh)		mtx_lock(&(_ipfh)->ipfh_mtx)
 #define	IPFLOW_HEAD_UNLOCK(_ipfh)	mtx_unlock(&(_ipfh)->ipfh_mtx)
 #define	IPFLOW_LOCK(_ipf) \
 	IPFLOW_HEAD_LOCK(&ipflows[(_ipf)->ipf_hash])
 #define	IPFLOW_UNLOCK(_ipf) \
 	IPFLOW_HEAD_UNLOCK(&ipflows[(_ipf)->ipf_hash])
 
 static int ipflow_active = 0;
 SYSCTL_INT(_net_inet_ip, IPCTL_FASTFORWARDING, fastforwarding, CTLFLAG_RW,
     &ipflow_active, 0, "Enable flow-based IP forwarding");
 
 static MALLOC_DEFINE(M_IPFLOW, "ip_flow", "IP flow");
 
 static unsigned
 ipflow_hash(struct in_addr dst, struct in_addr src, unsigned tos)
 {
 	unsigned hash = tos;
 	int idx;
 	for (idx = 0; idx < 32; idx += IPFLOW_HASHBITS)
 		hash += (dst.s_addr >> (32 - idx)) + (src.s_addr >> idx);
 	return hash & (IPFLOW_HASHSIZE-1);
 }
 
 static struct ipflow *
 ipflow_lookup(const struct ip *ip)
 {
 	unsigned hash;
 	struct ipflow_head *head;
 	struct ipflow *ipf;
 
 	hash = ipflow_hash(ip->ip_dst, ip->ip_src, ip->ip_tos);
 	head = &ipflows[hash];
 
 	IPFLOW_HEAD_LOCK(head);
 	LIST_FOREACH(ipf, &head->ipfh_head, ipf_next) {
 		if (ip->ip_dst.s_addr == ipf->ipf_dst.s_addr
 		    && ip->ip_src.s_addr == ipf->ipf_src.s_addr
 		    && ip->ip_tos == ipf->ipf_tos) {
 			/* NB: return head locked */
 			return ipf;
 		}
 	}
 	IPFLOW_HEAD_UNLOCK(head);
 	return NULL;
 }
 
 int
 ipflow_fastforward(struct mbuf *m)
 {
 	struct ip *ip;
 	struct ipflow *ipf;
 	struct rtentry *rt;
 	struct sockaddr *dst;
 	int error;
 
 	/*
 	 * Are we forwarding packets?  Big enough for an IP packet?
 	 */
 	if (!ipforwarding || !ipflow_active || m->m_len < sizeof(struct ip))
 		return 0;
 	/*
 	 * IP header with no option and valid version and length
 	 */
 	ip = mtod(m, struct ip *);
 	if (ip->ip_v != IPVERSION || ip->ip_hl != (sizeof(struct ip) >> 2)
 	    || ntohs(ip->ip_len) > m->m_pkthdr.len)
 		return 0;
 	/*
 	 * Find a flow.
 	 */
 	if ((ipf = ipflow_lookup(ip)) == NULL)
 		return 0;
 
 	/*
 	 * Route and interface still up?
 	 */
 	rt = ipf->ipf_ro.ro_rt;
 	if ((rt->rt_flags & RTF_UP) == 0 || (rt->rt_ifp->if_flags & IFF_UP) == 0) {
 		IPFLOW_UNLOCK(ipf);
 		return 0;
 	}
 
 	/*
 	 * Packet size OK?  TTL?
 	 */
 	if (m->m_pkthdr.len > rt->rt_ifp->if_mtu || ip->ip_ttl <= IPTTLDEC) {
 		IPFLOW_UNLOCK(ipf);
 		return 0;
 	}
 
 	/*
 	 * Everything checks out and so we can forward this packet.
 	 * Modify the TTL and incrementally change the checksum.
 	 */
 	ip->ip_ttl -= IPTTLDEC;
 	if (ip->ip_sum >= htons(0xffff - (IPTTLDEC << 8))) {
 		ip->ip_sum += htons(IPTTLDEC << 8) + 1;
 	} else {
 		ip->ip_sum += htons(IPTTLDEC << 8);
 	}
 
 	/*
 	 * Send the packet on its way.  All we can get back is ENOBUFS
 	 */
 	ipf->ipf_uses++;
 	ipf->ipf_timer = IPFLOW_TIMER;
 
 	if (rt->rt_flags & RTF_GATEWAY)
 		dst = rt->rt_gateway;
 	else
 		dst = &ipf->ipf_ro.ro_dst;
 	if ((error = (*rt->rt_ifp->if_output)(rt->rt_ifp, m, dst, rt)) != 0) {
 		if (error == ENOBUFS)
 			ipf->ipf_dropped++;
 		else
 			ipf->ipf_errors++;
 	}
 	IPFLOW_UNLOCK(ipf);
 	return 1;
 }
 
 static void
 ipflow_addstats(struct ipflow *ipf)
 {
 	ipf->ipf_ro.ro_rt->rt_use += ipf->ipf_uses;
 	ipstat.ips_cantforward += ipf->ipf_errors + ipf->ipf_dropped;
 	ipstat.ips_forward += ipf->ipf_uses;
 	ipstat.ips_fastforward += ipf->ipf_uses;
 }
 
 /*
  * XXX the locking here makes reaping an entry very expensive...
  */
 static struct ipflow *
 ipflow_reap(void)
 {
 	struct ipflow *victim = NULL;
 	struct ipflow *ipf;
 	int idx;
 
 	for (idx = 0; idx < IPFLOW_HASHSIZE; idx++) {
 		struct ipflow_head *head = &ipflows[idx];
 
 		IPFLOW_HEAD_LOCK(head);
 		LIST_FOREACH(ipf, &head->ipfh_head, ipf_next) {
 			/*
 			 * If this no longer points to a valid route
 			 * reclaim it.
 			 */
 			if ((ipf->ipf_ro.ro_rt->rt_flags & RTF_UP) == 0)
 				goto done;
 			/*
 			 * choose the one that's been least recently used
 			 * or has had the least uses in the last 1.5 
 			 * intervals.
 			 */
 			if (victim == NULL)
 				victim = ipf;
 			else if (ipf->ipf_timer < victim->ipf_timer
 			    || (ipf->ipf_timer == victim->ipf_timer
 				&& ipf->ipf_last_uses + ipf->ipf_uses <
 				    victim->ipf_last_uses + victim->ipf_uses)) {
 				if (victim->ipf_hash != ipf->ipf_hash)
 					IPFLOW_UNLOCK(victim);
 				victim = ipf;
 			}
 		}
 		if (victim && victim->ipf_hash != idx)
 			IPFLOW_HEAD_UNLOCK(head);
 	}
 	ipf = victim;
     done:
 	/*
 	 * Remove the entry from the flow table.
 	 */
 	LIST_REMOVE(ipf, ipf_next);
 	IPFLOW_UNLOCK(ipf);
 
 	ipflow_addstats(ipf);
 	RTFREE(ipf->ipf_ro.ro_rt);
 	return ipf;
 }
 
 static void
 ipflow_free(struct ipflow *ipf)
 {
 	/*
 	 * Remove the flow from the hash table.
 	 */
 	LIST_REMOVE(ipf, ipf_next);
 
 	ipflow_addstats(ipf);
 	RTFREE(ipf->ipf_ro.ro_rt);
 	ipflow_inuse--;
 	free(ipf, M_IPFLOW);
 }
 
 void
 ipflow_slowtimo(void)
 {
 	struct ipflow *ipf;
 	int idx;
 
 	for (idx = 0; idx < IPFLOW_HASHSIZE; idx++) {
 		struct ipflow_head *head = &ipflows[idx];
 
 		IPFLOW_HEAD_LOCK(head);
 		ipf = LIST_FIRST(&head->ipfh_head);
 		while (ipf != NULL) {
 			struct ipflow *next_ipf = LIST_NEXT(ipf, ipf_next);
 			if (--ipf->ipf_timer == 0) {
 				ipflow_free(ipf);
 			} else {
 				ipf->ipf_last_uses = ipf->ipf_uses;
 				ipf->ipf_ro.ro_rt->rt_use += ipf->ipf_uses;
 				ipstat.ips_forward += ipf->ipf_uses;
 				ipstat.ips_fastforward += ipf->ipf_uses;
 				ipf->ipf_uses = 0;
 			}
 			ipf = next_ipf;
 		}
 		IPFLOW_HEAD_UNLOCK(head);
 	}
 }
 
 void
 ipflow_create(const struct route *ro, struct mbuf *m)
 {
 	const struct ip *const ip = mtod(m, struct ip *);
 	struct ipflow *ipf;
 
 	/*
 	 * Don't create cache entries for ICMP messages.
 	 */
 	if (!ipflow_active || ip->ip_p == IPPROTO_ICMP)
 		return;
 	/*
 	 * See if an existing flow struct exists.  If so remove it from it's
 	 * list and free the old route.  If not, try to malloc a new one
 	 * (if we aren't at our limit).
 	 */
 	ipf = ipflow_lookup(ip);
 	if (ipf == NULL) {
 		if (ipflow_inuse == IPFLOW_MAX) {
 			ipf = ipflow_reap();
 		} else {
 			ipf = (struct ipflow *) malloc(sizeof(*ipf), M_IPFLOW,
 						       M_NOWAIT);
 			if (ipf == NULL)
 				return;
 			ipflow_inuse++;
 		}
 		bzero((caddr_t) ipf, sizeof(*ipf));
 
 		ipf->ipf_hash = ipflow_hash(ip->ip_dst, ip->ip_src, ip->ip_tos);
 		ipf->ipf_dst = ip->ip_dst;
 		ipf->ipf_src = ip->ip_src;
 		ipf->ipf_tos = ip->ip_tos;
 
 		IPFLOW_LOCK(ipf);
 	} else {
 		LIST_REMOVE(ipf, ipf_next);
 
 		ipflow_addstats(ipf);		/* add stats to old route */
 		RTFREE(ipf->ipf_ro.ro_rt);	/* clear reference */
 		ipf->ipf_uses = ipf->ipf_last_uses = 0;
 		ipf->ipf_errors = ipf->ipf_dropped = 0;
 	}
 
 	/*
 	 * Fill in the updated information.
 	 */
 	ipf->ipf_ro = *ro;
+	RT_LOCK(ro->ro_rt);
 	ro->ro_rt->rt_refcnt++;
+	RT_UNLOCK(ro->ro_rt);
 	ipf->ipf_timer = IPFLOW_TIMER;
 	/*
 	 * Insert into the approriate bucket of the flow table.
 	 */
 	LIST_INSERT_HEAD(&ipflows[ipf->ipf_hash].ipfh_head, ipf, ipf_next);
 	IPFLOW_UNLOCK(ipf);
 }
 
 static void
 ipflow_init(void)
 {
 	int idx;
 
 	for (idx = 0; idx < IPFLOW_HASHSIZE; idx++) {
 		struct ipflow_head *head = &ipflows[idx];
 		LIST_INIT(&head->ipfh_head);
 		mtx_init(&head->ipfh_mtx, "ipflow list head", NULL, MTX_DEF);
 	}
 }
 SYSINIT(ipflow, SI_SUB_PROTO_DOMAIN, SI_ORDER_ANY, ipflow_init, 0);
Index: head/sys/netinet/ip_icmp.c
===================================================================
--- head/sys/netinet/ip_icmp.c	(revision 120726)
+++ head/sys/netinet/ip_icmp.c	(revision 120727)
@@ -1,881 +1,881 @@
 /*
  * Copyright (c) 1982, 1986, 1988, 1993
  *	The Regents of the University of California.  All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 3. All advertising materials mentioning features or use of this software
  *    must display the following acknowledgement:
  *	This product includes software developed by the University of
  *	California, Berkeley and its contributors.
  * 4. Neither the name of the University nor the names of its contributors
  *    may be used to endorse or promote products derived from this software
  *    without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  *	@(#)ip_icmp.c	8.2 (Berkeley) 1/4/94
  * $FreeBSD$
  */
 
 #include "opt_ipsec.h"
 #include "opt_mac.h"
 
 #include <sys/param.h>
 #include <sys/systm.h>
 #include <sys/mac.h>
 #include <sys/mbuf.h>
 #include <sys/protosw.h>
 #include <sys/socket.h>
 #include <sys/time.h>
 #include <sys/kernel.h>
 #include <sys/sysctl.h>
 
 #include <net/if.h>
 #include <net/if_types.h>
 #include <net/route.h>
 
 #include <netinet/in.h>
 #include <netinet/in_systm.h>
 #include <netinet/in_var.h>
 #include <netinet/ip.h>
 #include <netinet/ip_icmp.h>
 #include <netinet/ip_var.h>
 #include <netinet/icmp_var.h>
 
 #ifdef IPSEC
 #include <netinet6/ipsec.h>
 #include <netkey/key.h>
 #endif
 
 #ifdef FAST_IPSEC
 #include <netipsec/ipsec.h>
 #include <netipsec/key.h>
 #define	IPSEC
 #endif
 
 #include <machine/in_cksum.h>
 
 /*
  * ICMP routines: error generation, receive packet processing, and
  * routines to turnaround packets back to the originator, and
  * host table maintenance routines.
  */
 
 static struct	icmpstat icmpstat;
 SYSCTL_STRUCT(_net_inet_icmp, ICMPCTL_STATS, stats, CTLFLAG_RW,
 	&icmpstat, icmpstat, "");
 
 static int	icmpmaskrepl = 0;
 SYSCTL_INT(_net_inet_icmp, ICMPCTL_MASKREPL, maskrepl, CTLFLAG_RW,
 	&icmpmaskrepl, 0, "Reply to ICMP Address Mask Request packets.");
 
 static u_int	icmpmaskfake = 0;
 SYSCTL_UINT(_net_inet_icmp, OID_AUTO, maskfake, CTLFLAG_RW,
 	&icmpmaskfake, 0, "Fake reply to ICMP Address Mask Request packets.");
 
 static int	drop_redirect = 0;
 SYSCTL_INT(_net_inet_icmp, OID_AUTO, drop_redirect, CTLFLAG_RW, 
 	&drop_redirect, 0, "");
 
 static int	log_redirect = 0;
 SYSCTL_INT(_net_inet_icmp, OID_AUTO, log_redirect, CTLFLAG_RW, 
 	&log_redirect, 0, "");
 
 static int      icmplim = 200;
 SYSCTL_INT(_net_inet_icmp, ICMPCTL_ICMPLIM, icmplim, CTLFLAG_RW,
 	&icmplim, 0, "");
 
 static int	icmplim_output = 1;
 SYSCTL_INT(_net_inet_icmp, OID_AUTO, icmplim_output, CTLFLAG_RW,
 	&icmplim_output, 0, "");
 
 /*
  * ICMP broadcast echo sysctl
  */
 
 static int	icmpbmcastecho = 0;
 SYSCTL_INT(_net_inet_icmp, OID_AUTO, bmcastecho, CTLFLAG_RW,
 	&icmpbmcastecho, 0, "");
 
 
 #ifdef ICMPPRINTFS
 int	icmpprintfs = 0;
 #endif
 
 static void	icmp_reflect(struct mbuf *);
 static void	icmp_send(struct mbuf *, struct mbuf *, struct route *);
 static int	ip_next_mtu(int, int);
 
 extern	struct protosw inetsw[];
 
 /*
  * Generate an error packet of type error
  * in response to bad packet ip.
  */
 void
 icmp_error(n, type, code, dest, destifp)
 	struct mbuf *n;
 	int type, code;
 	n_long dest;
 	struct ifnet *destifp;
 {
 	register struct ip *oip = mtod(n, struct ip *), *nip;
 	register unsigned oiplen = oip->ip_hl << 2;
 	register struct icmp *icp;
 	register struct mbuf *m;
 	unsigned icmplen;
 
 #ifdef ICMPPRINTFS
 	if (icmpprintfs)
 		printf("icmp_error(%p, %x, %d)\n", oip, type, code);
 #endif
 	if (type != ICMP_REDIRECT)
 		icmpstat.icps_error++;
 	/*
 	 * Don't send error if not the first fragment of message.
 	 * Don't error if the old packet protocol was ICMP
 	 * error message, only known informational types.
 	 */
 	if (oip->ip_off &~ (IP_MF|IP_DF))
 		goto freeit;
 	if (oip->ip_p == IPPROTO_ICMP && type != ICMP_REDIRECT &&
 	  n->m_len >= oiplen + ICMP_MINLEN &&
 	  !ICMP_INFOTYPE(((struct icmp *)((caddr_t)oip + oiplen))->icmp_type)) {
 		icmpstat.icps_oldicmp++;
 		goto freeit;
 	}
 	/* Don't send error in response to a multicast or broadcast packet */
 	if (n->m_flags & (M_BCAST|M_MCAST))
 		goto freeit;
 	/*
 	 * First, formulate icmp message
 	 */
 	m = m_gethdr(M_DONTWAIT, MT_HEADER);
 	if (m == NULL)
 		goto freeit;
 #ifdef MAC
 	mac_create_mbuf_netlayer(n, m);
 #endif
 	icmplen = min(oiplen + 8, oip->ip_len);
 	if (icmplen < sizeof(struct ip))
 		panic("icmp_error: bad length");
 	m->m_len = icmplen + ICMP_MINLEN;
 	MH_ALIGN(m, m->m_len);
 	icp = mtod(m, struct icmp *);
 	if ((u_int)type > ICMP_MAXTYPE)
 		panic("icmp_error");
 	icmpstat.icps_outhist[type]++;
 	icp->icmp_type = type;
 	if (type == ICMP_REDIRECT)
 		icp->icmp_gwaddr.s_addr = dest;
 	else {
 		icp->icmp_void = 0;
 		/*
 		 * The following assignments assume an overlay with the
 		 * zeroed icmp_void field.
 		 */
 		if (type == ICMP_PARAMPROB) {
 			icp->icmp_pptr = code;
 			code = 0;
 		} else if (type == ICMP_UNREACH &&
 			code == ICMP_UNREACH_NEEDFRAG && destifp) {
 			icp->icmp_nextmtu = htons(destifp->if_mtu);
 		}
 	}
 
 	icp->icmp_code = code;
 	m_copydata(n, 0, icmplen, (caddr_t)&icp->icmp_ip);
 	nip = &icp->icmp_ip;
 
 	/*
 	 * Convert fields to network representation.
 	 */
 	nip->ip_len = htons(nip->ip_len);
 	nip->ip_off = htons(nip->ip_off);
 
 	/*
 	 * Now, copy old ip header (without options)
 	 * in front of icmp message.
 	 */
 	if (m->m_data - sizeof(struct ip) < m->m_pktdat)
 		panic("icmp len");
 	m->m_data -= sizeof(struct ip);
 	m->m_len += sizeof(struct ip);
 	m->m_pkthdr.len = m->m_len;
 	m->m_pkthdr.rcvif = n->m_pkthdr.rcvif;
 	nip = mtod(m, struct ip *);
 	bcopy((caddr_t)oip, (caddr_t)nip, sizeof(struct ip));
 	nip->ip_len = m->m_len;
 	nip->ip_v = IPVERSION;
 	nip->ip_hl = 5;
 	nip->ip_p = IPPROTO_ICMP;
 	nip->ip_tos = 0;
 	icmp_reflect(m);
 
 freeit:
 	m_freem(n);
 }
 
 static struct sockaddr_in icmpsrc = { sizeof (struct sockaddr_in), AF_INET };
 static struct sockaddr_in icmpdst = { sizeof (struct sockaddr_in), AF_INET };
 static struct sockaddr_in icmpgw = { sizeof (struct sockaddr_in), AF_INET };
 
 /*
  * Process a received ICMP message.
  */
 void
 icmp_input(m, off)
 	register struct mbuf *m;
 	int off;
 {
 	int hlen = off;
 	register struct icmp *icp;
 	register struct ip *ip = mtod(m, struct ip *);
 	int icmplen = ip->ip_len;
 	register int i;
 	struct in_ifaddr *ia;
 	void (*ctlfunc)(int, struct sockaddr *, void *);
 	int code;
 
 	/*
 	 * Locate icmp structure in mbuf, and check
 	 * that not corrupted and of at least minimum length.
 	 */
 #ifdef ICMPPRINTFS
 	if (icmpprintfs) {
 		char buf[4 * sizeof "123"];
 		strcpy(buf, inet_ntoa(ip->ip_src));
 		printf("icmp_input from %s to %s, len %d\n",
 		       buf, inet_ntoa(ip->ip_dst), icmplen);
 	}
 #endif
 	if (icmplen < ICMP_MINLEN) {
 		icmpstat.icps_tooshort++;
 		goto freeit;
 	}
 	i = hlen + min(icmplen, ICMP_ADVLENMIN);
 	if (m->m_len < i && (m = m_pullup(m, i)) == 0)  {
 		icmpstat.icps_tooshort++;
 		return;
 	}
 	ip = mtod(m, struct ip *);
 	m->m_len -= hlen;
 	m->m_data += hlen;
 	icp = mtod(m, struct icmp *);
 	if (in_cksum(m, icmplen)) {
 		icmpstat.icps_checksum++;
 		goto freeit;
 	}
 	m->m_len += hlen;
 	m->m_data -= hlen;
 
 	if (m->m_pkthdr.rcvif && m->m_pkthdr.rcvif->if_type == IFT_FAITH) {
 		/*
 		 * Deliver very specific ICMP type only.
 		 */
 		switch (icp->icmp_type) {
 		case ICMP_UNREACH:
 		case ICMP_TIMXCEED:
 			break;
 		default:
 			goto freeit;
 		}
 	}
 
 #ifdef ICMPPRINTFS
 	if (icmpprintfs)
 		printf("icmp_input, type %d code %d\n", icp->icmp_type,
 		    icp->icmp_code);
 #endif
 
 	/*
 	 * Message type specific processing.
 	 */
 	if (icp->icmp_type > ICMP_MAXTYPE)
 		goto raw;
 	icmpstat.icps_inhist[icp->icmp_type]++;
 	code = icp->icmp_code;
 	switch (icp->icmp_type) {
 
 	case ICMP_UNREACH:
 		switch (code) {
 			case ICMP_UNREACH_NET:
 			case ICMP_UNREACH_HOST:
 			case ICMP_UNREACH_SRCFAIL:
 			case ICMP_UNREACH_NET_UNKNOWN:
 			case ICMP_UNREACH_HOST_UNKNOWN:
 			case ICMP_UNREACH_ISOLATED:
 			case ICMP_UNREACH_TOSNET:
 			case ICMP_UNREACH_TOSHOST:
 			case ICMP_UNREACH_HOST_PRECEDENCE:
 			case ICMP_UNREACH_PRECEDENCE_CUTOFF:
 				code = PRC_UNREACH_NET;
 				break;
 
 			case ICMP_UNREACH_NEEDFRAG:
 				code = PRC_MSGSIZE;
 				break;
 
 			/*
 			 * RFC 1122, Sections 3.2.2.1 and 4.2.3.9.
 			 * Treat subcodes 2,3 as immediate RST
 			 */
 			case ICMP_UNREACH_PROTOCOL:
 			case ICMP_UNREACH_PORT:
 				code = PRC_UNREACH_PORT;
 				break;
 
 			case ICMP_UNREACH_NET_PROHIB:
 			case ICMP_UNREACH_HOST_PROHIB:
 			case ICMP_UNREACH_FILTER_PROHIB:
 				code = PRC_UNREACH_ADMIN_PROHIB;
 				break;
 
 			default:
 				goto badcode;
 		}
 		goto deliver;
 
 	case ICMP_TIMXCEED:
 		if (code > 1)
 			goto badcode;
 		code += PRC_TIMXCEED_INTRANS;
 		goto deliver;
 
 	case ICMP_PARAMPROB:
 		if (code > 1)
 			goto badcode;
 		code = PRC_PARAMPROB;
 		goto deliver;
 
 	case ICMP_SOURCEQUENCH:
 		if (code)
 			goto badcode;
 		code = PRC_QUENCH;
 	deliver:
 		/*
 		 * Problem with datagram; advise higher level routines.
 		 */
 		if (icmplen < ICMP_ADVLENMIN || icmplen < ICMP_ADVLEN(icp) ||
 		    icp->icmp_ip.ip_hl < (sizeof(struct ip) >> 2)) {
 			icmpstat.icps_badlen++;
 			goto freeit;
 		}
 		icp->icmp_ip.ip_len = ntohs(icp->icmp_ip.ip_len);
 		/* Discard ICMP's in response to multicast packets */
 		if (IN_MULTICAST(ntohl(icp->icmp_ip.ip_dst.s_addr)))
 			goto badcode;
 #ifdef ICMPPRINTFS
 		if (icmpprintfs)
 			printf("deliver to protocol %d\n", icp->icmp_ip.ip_p);
 #endif
 		icmpsrc.sin_addr = icp->icmp_ip.ip_dst;
 #if 1
 		/*
 		 * MTU discovery:
 		 * If we got a needfrag and there is a host route to the
 		 * original destination, and the MTU is not locked, then
 		 * set the MTU in the route to the suggested new value
 		 * (if given) and then notify as usual.  The ULPs will
 		 * notice that the MTU has changed and adapt accordingly.
 		 * If no new MTU was suggested, then we guess a new one
 		 * less than the current value.  If the new MTU is 
 		 * unreasonably small (arbitrarily set at 296), then
 		 * we reset the MTU to the interface value and enable the
 		 * lock bit, indicating that we are no longer doing MTU
 		 * discovery.
 		 */
 		if (code == PRC_MSGSIZE) {
 			struct rtentry *rt;
 			int mtu;
 
 			rt = rtalloc1((struct sockaddr *)&icmpsrc, 0,
 				      RTF_CLONING | RTF_PRCLONING);
 			if (rt && (rt->rt_flags & RTF_HOST)
 			    && !(rt->rt_rmx.rmx_locks & RTV_MTU)) {
 				mtu = ntohs(icp->icmp_nextmtu);
 				if (!mtu)
 					mtu = ip_next_mtu(rt->rt_rmx.rmx_mtu,
 							  1);
 #ifdef DEBUG_MTUDISC
 				printf("MTU for %s reduced to %d\n",
 					inet_ntoa(icmpsrc.sin_addr), mtu);
 #endif
 				if (mtu < 296) {
 					/* rt->rt_rmx.rmx_mtu =
 						rt->rt_ifp->if_mtu; */
 					rt->rt_rmx.rmx_locks |= RTV_MTU;
 				} else if (rt->rt_rmx.rmx_mtu > mtu) {
 					rt->rt_rmx.rmx_mtu = mtu;
 				}
 			}
 			if (rt)
-				RTFREE(rt);
+				rtfree(rt);
 		}
 
 #endif
 		/*
 		 * XXX if the packet contains [IPv4 AH TCP], we can't make a
 		 * notification to TCP layer.
 		 */
 		ctlfunc = inetsw[ip_protox[icp->icmp_ip.ip_p]].pr_ctlinput;
 		if (ctlfunc)
 			(*ctlfunc)(code, (struct sockaddr *)&icmpsrc,
 				   (void *)&icp->icmp_ip);
 		break;
 
 	badcode:
 		icmpstat.icps_badcode++;
 		break;
 
 	case ICMP_ECHO:
 		if (!icmpbmcastecho
 		    && (m->m_flags & (M_MCAST | M_BCAST)) != 0) {
 			icmpstat.icps_bmcastecho++;
 			break;
 		}
 		icp->icmp_type = ICMP_ECHOREPLY;
 		if (badport_bandlim(BANDLIM_ICMP_ECHO) < 0)
 			goto freeit;
 		else
 			goto reflect;
 
 	case ICMP_TSTAMP:
 		if (!icmpbmcastecho
 		    && (m->m_flags & (M_MCAST | M_BCAST)) != 0) {
 			icmpstat.icps_bmcasttstamp++;
 			break;
 		}
 		if (icmplen < ICMP_TSLEN) {
 			icmpstat.icps_badlen++;
 			break;
 		}
 		icp->icmp_type = ICMP_TSTAMPREPLY;
 		icp->icmp_rtime = iptime();
 		icp->icmp_ttime = icp->icmp_rtime;	/* bogus, do later! */
 		if (badport_bandlim(BANDLIM_ICMP_TSTAMP) < 0)
 			goto freeit;
 		else
 			goto reflect;
 
 	case ICMP_MASKREQ:
 		if (icmpmaskrepl == 0)
 			break;
 		/*
 		 * We are not able to respond with all ones broadcast
 		 * unless we receive it over a point-to-point interface.
 		 */
 		if (icmplen < ICMP_MASKLEN)
 			break;
 		switch (ip->ip_dst.s_addr) {
 
 		case INADDR_BROADCAST:
 		case INADDR_ANY:
 			icmpdst.sin_addr = ip->ip_src;
 			break;
 
 		default:
 			icmpdst.sin_addr = ip->ip_dst;
 		}
 		ia = (struct in_ifaddr *)ifaof_ifpforaddr(
 			    (struct sockaddr *)&icmpdst, m->m_pkthdr.rcvif);
 		if (ia == 0)
 			break;
 		if (ia->ia_ifp == 0)
 			break;
 		icp->icmp_type = ICMP_MASKREPLY;
 		if (icmpmaskfake == 0)
 			icp->icmp_mask = ia->ia_sockmask.sin_addr.s_addr;
 		else
 			icp->icmp_mask = icmpmaskfake;
 		if (ip->ip_src.s_addr == 0) {
 			if (ia->ia_ifp->if_flags & IFF_BROADCAST)
 			    ip->ip_src = satosin(&ia->ia_broadaddr)->sin_addr;
 			else if (ia->ia_ifp->if_flags & IFF_POINTOPOINT)
 			    ip->ip_src = satosin(&ia->ia_dstaddr)->sin_addr;
 		}
 reflect:
 		ip->ip_len += hlen;	/* since ip_input deducts this */
 		icmpstat.icps_reflect++;
 		icmpstat.icps_outhist[icp->icmp_type]++;
 		icmp_reflect(m);
 		return;
 
 	case ICMP_REDIRECT:
 		if (log_redirect) {
 			u_long src, dst, gw;
 
 			src = ntohl(ip->ip_src.s_addr);
 			dst = ntohl(icp->icmp_ip.ip_dst.s_addr);
 			gw = ntohl(icp->icmp_gwaddr.s_addr);
 			printf("icmp redirect from %d.%d.%d.%d: "
 			       "%d.%d.%d.%d => %d.%d.%d.%d\n",
 			       (int)(src >> 24), (int)((src >> 16) & 0xff),
 			       (int)((src >> 8) & 0xff), (int)(src & 0xff),
 			       (int)(dst >> 24), (int)((dst >> 16) & 0xff),
 			       (int)((dst >> 8) & 0xff), (int)(dst & 0xff),
 			       (int)(gw >> 24), (int)((gw >> 16) & 0xff),
 			       (int)((gw >> 8) & 0xff), (int)(gw & 0xff));
 		}
 		if (drop_redirect)
 			break;
 		if (code > 3)
 			goto badcode;
 		if (icmplen < ICMP_ADVLENMIN || icmplen < ICMP_ADVLEN(icp) ||
 		    icp->icmp_ip.ip_hl < (sizeof(struct ip) >> 2)) {
 			icmpstat.icps_badlen++;
 			break;
 		}
 		/*
 		 * Short circuit routing redirects to force
 		 * immediate change in the kernel's routing
 		 * tables.  The message is also handed to anyone
 		 * listening on a raw socket (e.g. the routing
 		 * daemon for use in updating its tables).
 		 */
 		icmpgw.sin_addr = ip->ip_src;
 		icmpdst.sin_addr = icp->icmp_gwaddr;
 #ifdef	ICMPPRINTFS
 		if (icmpprintfs) {
 			char buf[4 * sizeof "123"];
 			strcpy(buf, inet_ntoa(icp->icmp_ip.ip_dst));
 
 			printf("redirect dst %s to %s\n",
 			       buf, inet_ntoa(icp->icmp_gwaddr));
 		}
 #endif
 		icmpsrc.sin_addr = icp->icmp_ip.ip_dst;
 		rtredirect((struct sockaddr *)&icmpsrc,
 		  (struct sockaddr *)&icmpdst,
 		  (struct sockaddr *)0, RTF_GATEWAY | RTF_HOST,
-		  (struct sockaddr *)&icmpgw, (struct rtentry **)0);
+		  (struct sockaddr *)&icmpgw);
 		pfctlinput(PRC_REDIRECT_HOST, (struct sockaddr *)&icmpsrc);
 #ifdef IPSEC
 		key_sa_routechange((struct sockaddr *)&icmpsrc);
 #endif
 		break;
 
 	/*
 	 * No kernel processing for the following;
 	 * just fall through to send to raw listener.
 	 */
 	case ICMP_ECHOREPLY:
 	case ICMP_ROUTERADVERT:
 	case ICMP_ROUTERSOLICIT:
 	case ICMP_TSTAMPREPLY:
 	case ICMP_IREQREPLY:
 	case ICMP_MASKREPLY:
 	default:
 		break;
 	}
 
 raw:
 	rip_input(m, off);
 	return;
 
 freeit:
 	m_freem(m);
 }
 
 /*
  * Reflect the ip packet back to the source
  */
 static void
 icmp_reflect(m)
 	struct mbuf *m;
 {
 	struct ip *ip = mtod(m, struct ip *);
 	struct ifaddr *ifa;
 	struct in_ifaddr *ia;
 	struct in_addr t;
 	struct mbuf *opts = 0;
 	int optlen = (ip->ip_hl << 2) - sizeof(struct ip);
 	struct route *ro = NULL, rt;
 
 	if (!in_canforward(ip->ip_src) &&
 	    ((ntohl(ip->ip_src.s_addr) & IN_CLASSA_NET) !=
 	     (IN_LOOPBACKNET << IN_CLASSA_NSHIFT))) {
 		m_freem(m);	/* Bad return address */
 		icmpstat.icps_badaddr++;
 		goto done;	/* Ip_output() will check for broadcast */
 	}
 	t = ip->ip_dst;
 	ip->ip_dst = ip->ip_src;
 	ro = &rt;
 	bzero(ro, sizeof(*ro));
 	/*
 	 * If the incoming packet was addressed directly to us,
 	 * use dst as the src for the reply.  Otherwise (broadcast
 	 * or anonymous), use the address which corresponds
 	 * to the incoming interface.
 	 */
 	LIST_FOREACH(ia, INADDR_HASH(t.s_addr), ia_hash)
 		if (t.s_addr == IA_SIN(ia)->sin_addr.s_addr)
 			goto match;
 	if (m->m_pkthdr.rcvif != NULL &&
 	    m->m_pkthdr.rcvif->if_flags & IFF_BROADCAST) {
 		TAILQ_FOREACH(ifa, &m->m_pkthdr.rcvif->if_addrhead, ifa_link) {
 			if (ifa->ifa_addr->sa_family != AF_INET)
 				continue;
 			ia = ifatoia(ifa);
 			if (satosin(&ia->ia_broadaddr)->sin_addr.s_addr ==
 			    t.s_addr)
 				goto match;
 		}
 	}
 	ia = ip_rtaddr(ip->ip_dst, ro);
 	/* We need a route to do anything useful. */
 	if (ia == NULL) {
 		m_freem(m);
 		icmpstat.icps_noroute++;
 		goto done;
 	}
 match:
 #ifdef MAC
 	mac_reflect_mbuf_icmp(m);
 #endif
 	t = IA_SIN(ia)->sin_addr;
 	ip->ip_src = t;
 	ip->ip_ttl = ip_defttl;
 
 	if (optlen > 0) {
 		register u_char *cp;
 		int opt, cnt;
 		u_int len;
 
 		/*
 		 * Retrieve any source routing from the incoming packet;
 		 * add on any record-route or timestamp options.
 		 */
 		cp = (u_char *) (ip + 1);
 		if ((opts = ip_srcroute()) == 0 &&
 		    (opts = m_gethdr(M_DONTWAIT, MT_HEADER))) {
 			opts->m_len = sizeof(struct in_addr);
 			mtod(opts, struct in_addr *)->s_addr = 0;
 		}
 		if (opts) {
 #ifdef ICMPPRINTFS
 		    if (icmpprintfs)
 			    printf("icmp_reflect optlen %d rt %d => ",
 				optlen, opts->m_len);
 #endif
 		    for (cnt = optlen; cnt > 0; cnt -= len, cp += len) {
 			    opt = cp[IPOPT_OPTVAL];
 			    if (opt == IPOPT_EOL)
 				    break;
 			    if (opt == IPOPT_NOP)
 				    len = 1;
 			    else {
 				    if (cnt < IPOPT_OLEN + sizeof(*cp))
 					    break;
 				    len = cp[IPOPT_OLEN];
 				    if (len < IPOPT_OLEN + sizeof(*cp) ||
 				        len > cnt)
 					    break;
 			    }
 			    /*
 			     * Should check for overflow, but it "can't happen"
 			     */
 			    if (opt == IPOPT_RR || opt == IPOPT_TS ||
 				opt == IPOPT_SECURITY) {
 				    bcopy((caddr_t)cp,
 					mtod(opts, caddr_t) + opts->m_len, len);
 				    opts->m_len += len;
 			    }
 		    }
 		    /* Terminate & pad, if necessary */
 		    cnt = opts->m_len % 4;
 		    if (cnt) {
 			    for (; cnt < 4; cnt++) {
 				    *(mtod(opts, caddr_t) + opts->m_len) =
 					IPOPT_EOL;
 				    opts->m_len++;
 			    }
 		    }
 #ifdef ICMPPRINTFS
 		    if (icmpprintfs)
 			    printf("%d\n", opts->m_len);
 #endif
 		}
 		/*
 		 * Now strip out original options by copying rest of first
 		 * mbuf's data back, and adjust the IP length.
 		 */
 		ip->ip_len -= optlen;
 		ip->ip_v = IPVERSION;
 		ip->ip_hl = 5;
 		m->m_len -= optlen;
 		if (m->m_flags & M_PKTHDR)
 			m->m_pkthdr.len -= optlen;
 		optlen += sizeof(struct ip);
 		bcopy((caddr_t)ip + optlen, (caddr_t)(ip + 1),
 			 (unsigned)(m->m_len - sizeof(struct ip)));
 	}
 	m->m_flags &= ~(M_BCAST|M_MCAST);
 	icmp_send(m, opts, ro);
 done:
 	if (opts)
 		(void)m_free(opts);
 	if (ro && ro->ro_rt)
 		RTFREE(ro->ro_rt);
 }
 
 /*
  * Send an icmp packet back to the ip level,
  * after supplying a checksum.
  */
 static void
 icmp_send(m, opts, rt)
 	register struct mbuf *m;
 	struct mbuf *opts;
 	struct route *rt;
 {
 	register struct ip *ip = mtod(m, struct ip *);
 	register int hlen;
 	register struct icmp *icp;
 
 	hlen = ip->ip_hl << 2;
 	m->m_data += hlen;
 	m->m_len -= hlen;
 	icp = mtod(m, struct icmp *);
 	icp->icmp_cksum = 0;
 	icp->icmp_cksum = in_cksum(m, ip->ip_len - hlen);
 	m->m_data -= hlen;
 	m->m_len += hlen;
 	m->m_pkthdr.rcvif = (struct ifnet *)0;
 #ifdef ICMPPRINTFS
 	if (icmpprintfs) {
 		char buf[4 * sizeof "123"];
 		strcpy(buf, inet_ntoa(ip->ip_dst));
 		printf("icmp_send dst %s src %s\n",
 		       buf, inet_ntoa(ip->ip_src));
 	}
 #endif
 	(void) ip_output(m, opts, rt, 0, NULL, NULL);
 }
 
 n_time
 iptime()
 {
 	struct timeval atv;
 	u_long t;
 
 	getmicrotime(&atv);
 	t = (atv.tv_sec % (24*60*60)) * 1000 + atv.tv_usec / 1000;
 	return (htonl(t));
 }
 
 #if 1
 /*
  * Return the next larger or smaller MTU plateau (table from RFC 1191)
  * given current value MTU.  If DIR is less than zero, a larger plateau
  * is returned; otherwise, a smaller value is returned.
  */
 static int
 ip_next_mtu(mtu, dir)
 	int mtu;
 	int dir;
 {
 	static int mtutab[] = {
 		65535, 32000, 17914, 8166, 4352, 2002, 1492, 1006, 508, 296,
 		68, 0
 	};
 	int i;
 
 	for (i = 0; i < (sizeof mtutab) / (sizeof mtutab[0]); i++) {
 		if (mtu >= mtutab[i])
 			break;
 	}
 
 	if (dir < 0) {
 		if (i == 0) {
 			return 0;
 		} else {
 			return mtutab[i - 1];
 		}
 	} else {
 		if (mtutab[i] == 0) {
 			return 0;
 		} else if(mtu > mtutab[i]) {
 			return mtutab[i];
 		} else {
 			return mtutab[i + 1];
 		}
 	}
 }
 #endif
 
 
 /*
  * badport_bandlim() - check for ICMP bandwidth limit
  *
  *	Return 0 if it is ok to send an ICMP error response, -1 if we have
  *	hit our bandwidth limit and it is not ok.  
  *
  *	If icmplim is <= 0, the feature is disabled and 0 is returned.
  *
  *	For now we separate the TCP and UDP subsystems w/ different 'which'
  *	values.  We may eventually remove this separation (and simplify the
  *	code further).
  *
  *	Note that the printing of the error message is delayed so we can
  *	properly print the icmp error rate that the system was trying to do
  *	(i.e. 22000/100 pps, etc...).  This can cause long delays in printing
  *	the 'final' error, but it doesn't make sense to solve the printing 
  *	delay with more complex code.
  */
 
 int
 badport_bandlim(int which)
 {
 #define	N(a)	(sizeof (a) / sizeof (a[0]))
 	static struct rate {
 		const char	*type;
 		struct timeval	lasttime;
 		int		curpps;;
 	} rates[BANDLIM_MAX+1] = {
 		{ "icmp unreach response" },
 		{ "icmp ping response" },
 		{ "icmp tstamp response" },
 		{ "closed port RST response" },
 		{ "open port RST response" }
 	};
 
 	/*
 	 * Return ok status if feature disabled or argument out of range.
 	 */
 	if (icmplim > 0 && (u_int) which < N(rates)) {
 		struct rate *r = &rates[which];
 		int opps = r->curpps;
 
 		if (!ppsratecheck(&r->lasttime, &r->curpps, icmplim))
 			return -1;	/* discard packet */
 		/*
 		 * If we've dropped below the threshold after having
 		 * rate-limited traffic print the message.  This preserves
 		 * the previous behaviour at the expense of added complexity.
 		 */
 		if (icmplim_output && opps > icmplim)
 			printf("Limiting %s from %d to %d packets/sec\n",
 				r->type, opps, icmplim);
 	}
 	return 0;			/* okay to send packet */
 #undef N
 }
Index: head/sys/netinet/ip_output.c
===================================================================
--- head/sys/netinet/ip_output.c	(revision 120726)
+++ head/sys/netinet/ip_output.c	(revision 120727)
@@ -1,2260 +1,2260 @@
 /*
  * Copyright (c) 1982, 1986, 1988, 1990, 1993
  *	The Regents of the University of California.  All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 3. All advertising materials mentioning features or use of this software
  *    must display the following acknowledgement:
  *	This product includes software developed by the University of
  *	California, Berkeley and its contributors.
  * 4. Neither the name of the University nor the names of its contributors
  *    may be used to endorse or promote products derived from this software
  *    without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  *	@(#)ip_output.c	8.3 (Berkeley) 1/21/94
  * $FreeBSD$
  */
 
 #include "opt_ipfw.h"
 #include "opt_ipdn.h"
 #include "opt_ipdivert.h"
 #include "opt_ipfilter.h"
 #include "opt_ipsec.h"
 #include "opt_mac.h"
 #include "opt_pfil_hooks.h"
 #include "opt_random_ip_id.h"
 #include "opt_mbuf_stress_test.h"
 
 #include <sys/param.h>
 #include <sys/systm.h>
 #include <sys/kernel.h>
 #include <sys/mac.h>
 #include <sys/malloc.h>
 #include <sys/mbuf.h>
 #include <sys/protosw.h>
 #include <sys/socket.h>
 #include <sys/socketvar.h>
 #include <sys/sysctl.h>
 
 #include <net/if.h>
 #include <net/route.h>
 
 #include <netinet/in.h>
 #include <netinet/in_systm.h>
 #include <netinet/ip.h>
 #include <netinet/in_pcb.h>
 #include <netinet/in_var.h>
 #include <netinet/ip_var.h>
 
 #ifdef PFIL_HOOKS
 #include <net/pfil.h>
 #endif
 
 #include <machine/in_cksum.h>
 
 static MALLOC_DEFINE(M_IPMOPTS, "ip_moptions", "internet multicast options");
 
 #ifdef IPSEC
 #include <netinet6/ipsec.h>
 #include <netkey/key.h>
 #ifdef IPSEC_DEBUG
 #include <netkey/key_debug.h>
 #else
 #define	KEYDEBUG(lev,arg)
 #endif
 #endif /*IPSEC*/
 
 #ifdef FAST_IPSEC
 #include <netipsec/ipsec.h>
 #include <netipsec/xform.h>
 #include <netipsec/key.h>
 #endif /*FAST_IPSEC*/
 
 #include <netinet/ip_fw.h>
 #include <netinet/ip_dummynet.h>
 
 #define print_ip(x, a, y)	 printf("%s %d.%d.%d.%d%s",\
 				x, (ntohl(a.s_addr)>>24)&0xFF,\
 				  (ntohl(a.s_addr)>>16)&0xFF,\
 				  (ntohl(a.s_addr)>>8)&0xFF,\
 				  (ntohl(a.s_addr))&0xFF, y);
 
 u_short ip_id;
 
 #ifdef MBUF_STRESS_TEST
 int mbuf_frag_size = 0;
 SYSCTL_INT(_net_inet_ip, OID_AUTO, mbuf_frag_size, CTLFLAG_RW,
 	&mbuf_frag_size, 0, "Fragment outgoing mbufs to this size");
 #endif
 
 static struct mbuf *ip_insertoptions(struct mbuf *, struct mbuf *, int *);
 static struct ifnet *ip_multicast_if(struct in_addr *, int *);
 static void	ip_mloopback
 	(struct ifnet *, struct mbuf *, struct sockaddr_in *, int);
 static int	ip_getmoptions
 	(struct sockopt *, struct ip_moptions *);
 static int	ip_pcbopts(int, struct mbuf **, struct mbuf *);
 static int	ip_setmoptions
 	(struct sockopt *, struct ip_moptions **);
 
 int	ip_optcopy(struct ip *, struct ip *);
 
 
 extern	struct protosw inetsw[];
 
 /*
  * IP output.  The packet in mbuf chain m contains a skeletal IP
  * header (with len, off, ttl, proto, tos, src, dst).
  * The mbuf chain containing the packet will be freed.
  * The mbuf opt, if present, will not be freed.
  */
 int
 ip_output(struct mbuf *m0, struct mbuf *opt, struct route *ro,
 	int flags, struct ip_moptions *imo, struct inpcb *inp)
 {
 	struct ip *ip;
 	struct ifnet *ifp = NULL;	/* keep compiler happy */
 	struct mbuf *m;
 	int hlen = sizeof (struct ip);
 	int len, off, error = 0;
 	struct sockaddr_in *dst = NULL;	/* keep compiler happy */
 	struct in_ifaddr *ia = NULL;
 	int isbroadcast, sw_csum;
 	struct in_addr pkt_dst;
 #ifdef IPSEC
 	struct route iproute;
 	struct secpolicy *sp = NULL;
 #endif
 #ifdef FAST_IPSEC
 	struct route iproute;
 	struct m_tag *mtag;
 	struct secpolicy *sp = NULL;
 	struct tdb_ident *tdbi;
 	int s;
 #endif /* FAST_IPSEC */
 	struct ip_fw_args args;
 	int src_was_INADDR_ANY = 0;	/* as the name says... */
 
 	args.eh = NULL;
 	args.rule = NULL;
 	args.next_hop = NULL;
 	args.divert_rule = 0;			/* divert cookie */
 
 	/* Grab info from MT_TAG mbufs prepended to the chain. */
 	for (; m0 && m0->m_type == MT_TAG; m0 = m0->m_next) {
 		switch(m0->_m_tag_id) {
 		default:
 			printf("ip_output: unrecognised MT_TAG tag %d\n",
 			    m0->_m_tag_id);
 			break;
 
 		case PACKET_TAG_DUMMYNET:
 			/*
 			 * the packet was already tagged, so part of the
 			 * processing was already done, and we need to go down.
 			 * Get parameters from the header.
 			 */
 			args.rule = ((struct dn_pkt *)m0)->rule;
 			opt = NULL ;
 			ro = & ( ((struct dn_pkt *)m0)->ro ) ;
 			imo = NULL ;
 			dst = ((struct dn_pkt *)m0)->dn_dst ;
 			ifp = ((struct dn_pkt *)m0)->ifp ;
 			flags = ((struct dn_pkt *)m0)->flags ;
 			break;
 
 		case PACKET_TAG_DIVERT:
 			args.divert_rule = (intptr_t)m0->m_data & 0xffff;
 			break;
 
 		case PACKET_TAG_IPFORWARD:
 			args.next_hop = (struct sockaddr_in *)m0->m_data;
 			break;
 		}
 	}
 	m = m0;
 
 	M_ASSERTPKTHDR(m);
 #ifndef FAST_IPSEC
 	KASSERT(ro != NULL, ("ip_output: no route, proto %d",
 	    mtod(m, struct ip *)->ip_p));
 #endif
 
 	if (args.rule != NULL) {	/* dummynet already saw us */
 		ip = mtod(m, struct ip *);
 		hlen = ip->ip_hl << 2 ;
 		if (ro->ro_rt)
 			ia = ifatoia(ro->ro_rt->rt_ifa);
 		goto sendit;
 	}
 
 	if (opt) {
 		len = 0;
 		m = ip_insertoptions(m, opt, &len);
 		if (len != 0)
 			hlen = len;
 	}
 	ip = mtod(m, struct ip *);
 	pkt_dst = args.next_hop ? args.next_hop->sin_addr : ip->ip_dst;
 
 	/*
 	 * Fill in IP header.  If we are not allowing fragmentation,
 	 * then the ip_id field is meaningless, so send it as zero
 	 * to reduce information leakage.  Otherwise, if we are not
 	 * randomizing ip_id, then don't bother to convert it to network
 	 * byte order -- it's just a nonce.  Note that a 16-bit counter
 	 * will wrap around in less than 10 seconds at 100 Mbit/s on a
 	 * medium with MTU 1500.  See Steven M. Bellovin, "A Technique
 	 * for Counting NATted Hosts", Proc. IMW'02, available at
 	 * <http://www.research.att.com/~smb/papers/fnat.pdf>.
 	 */
 	if ((flags & (IP_FORWARDING|IP_RAWOUTPUT)) == 0) {
 		ip->ip_v = IPVERSION;
 		ip->ip_hl = hlen >> 2;
 		if ((ip->ip_off & IP_DF) == 0) {
 			ip->ip_off = 0;
 #ifdef RANDOM_IP_ID
 			ip->ip_id = ip_randomid();
 #else
 			ip->ip_id = ip_id++;
 #endif
 		} else {
 			ip->ip_off = IP_DF;
 			ip->ip_id = 0;
 		}
 		ipstat.ips_localout++;
 	} else {
 		hlen = ip->ip_hl << 2;
 	}
 
 #ifdef FAST_IPSEC
 	if (ro == NULL) {
 		ro = &iproute;
 		bzero(ro, sizeof (*ro));
 	}
 #endif /* FAST_IPSEC */
 	dst = (struct sockaddr_in *)&ro->ro_dst;
 	/*
 	 * If there is a cached route,
 	 * check that it is to the same destination
 	 * and is still up.  If not, free it and try again.
 	 * The address family should also be checked in case of sharing the
 	 * cache with IPv6.
 	 */
 	if (ro->ro_rt && ((ro->ro_rt->rt_flags & RTF_UP) == 0 ||
 			  dst->sin_family != AF_INET ||
 			  dst->sin_addr.s_addr != pkt_dst.s_addr)) {
 		RTFREE(ro->ro_rt);
 		ro->ro_rt = (struct rtentry *)0;
 	}
 	if (ro->ro_rt == 0) {
 		bzero(dst, sizeof(*dst));
 		dst->sin_family = AF_INET;
 		dst->sin_len = sizeof(*dst);
 		dst->sin_addr = pkt_dst;
 	}
 	/*
 	 * If routing to interface only,
 	 * short circuit routing lookup.
 	 */
 	if (flags & IP_ROUTETOIF) {
 		if ((ia = ifatoia(ifa_ifwithdstaddr(sintosa(dst)))) == 0 &&
 		    (ia = ifatoia(ifa_ifwithnet(sintosa(dst)))) == 0) {
 			ipstat.ips_noroute++;
 			error = ENETUNREACH;
 			goto bad;
 		}
 		ifp = ia->ia_ifp;
 		ip->ip_ttl = 1;
 		isbroadcast = in_broadcast(dst->sin_addr, ifp);
 	} else if (IN_MULTICAST(ntohl(ip->ip_dst.s_addr)) &&
 	    imo != NULL && imo->imo_multicast_ifp != NULL) {
 		/*
 		 * Bypass the normal routing lookup for multicast
 		 * packets if the interface is specified.
 		 */
 		ifp = imo->imo_multicast_ifp;
 		IFP_TO_IA(ifp, ia);
 		isbroadcast = 0;	/* fool gcc */
 	} else {
 		/*
 		 * If this is the case, we probably don't want to allocate
 		 * a protocol-cloned route since we didn't get one from the
 		 * ULP.  This lets TCP do its thing, while not burdening
 		 * forwarding or ICMP with the overhead of cloning a route.
 		 * Of course, we still want to do any cloning requested by
 		 * the link layer, as this is probably required in all cases
 		 * for correct operation (as it is for ARP).
 		 */
 		if (ro->ro_rt == 0)
 			rtalloc_ign(ro, RTF_PRCLONING);
 		if (ro->ro_rt == 0) {
 			ipstat.ips_noroute++;
 			error = EHOSTUNREACH;
 			goto bad;
 		}
 		ia = ifatoia(ro->ro_rt->rt_ifa);
 		ifp = ro->ro_rt->rt_ifp;
 		ro->ro_rt->rt_use++;
 		if (ro->ro_rt->rt_flags & RTF_GATEWAY)
 			dst = (struct sockaddr_in *)ro->ro_rt->rt_gateway;
 		if (ro->ro_rt->rt_flags & RTF_HOST)
 			isbroadcast = (ro->ro_rt->rt_flags & RTF_BROADCAST);
 		else
 			isbroadcast = in_broadcast(dst->sin_addr, ifp);
 	}
 	if (IN_MULTICAST(ntohl(pkt_dst.s_addr))) {
 		struct in_multi *inm;
 
 		m->m_flags |= M_MCAST;
 		/*
 		 * IP destination address is multicast.  Make sure "dst"
 		 * still points to the address in "ro".  (It may have been
 		 * changed to point to a gateway address, above.)
 		 */
 		dst = (struct sockaddr_in *)&ro->ro_dst;
 		/*
 		 * See if the caller provided any multicast options
 		 */
 		if (imo != NULL) {
 			ip->ip_ttl = imo->imo_multicast_ttl;
 			if (imo->imo_multicast_vif != -1)
 				ip->ip_src.s_addr =
 				    ip_mcast_src ?
 				    ip_mcast_src(imo->imo_multicast_vif) :
 				    INADDR_ANY;
 		} else
 			ip->ip_ttl = IP_DEFAULT_MULTICAST_TTL;
 		/*
 		 * Confirm that the outgoing interface supports multicast.
 		 */
 		if ((imo == NULL) || (imo->imo_multicast_vif == -1)) {
 			if ((ifp->if_flags & IFF_MULTICAST) == 0) {
 				ipstat.ips_noroute++;
 				error = ENETUNREACH;
 				goto bad;
 			}
 		}
 		/*
 		 * If source address not specified yet, use address
 		 * of outgoing interface.
 		 */
 		if (ip->ip_src.s_addr == INADDR_ANY) {
 			/* Interface may have no addresses. */
 			if (ia != NULL)
 				ip->ip_src = IA_SIN(ia)->sin_addr;
 		}
 
 		if (ip_mrouter && (flags & IP_FORWARDING) == 0) {
 			/*
 			 * XXX
 			 * delayed checksums are not currently
 			 * compatible with IP multicast routing
 			 */
 			if (m->m_pkthdr.csum_flags & CSUM_DELAY_DATA) {
 				in_delayed_cksum(m);
 				m->m_pkthdr.csum_flags &=
 					~CSUM_DELAY_DATA;
 			}
 		}
 		IN_LOOKUP_MULTI(pkt_dst, ifp, inm);
 		if (inm != NULL &&
 		   (imo == NULL || imo->imo_multicast_loop)) {
 			/*
 			 * If we belong to the destination multicast group
 			 * on the outgoing interface, and the caller did not
 			 * forbid loopback, loop back a copy.
 			 */
 			ip_mloopback(ifp, m, dst, hlen);
 		}
 		else {
 			/*
 			 * If we are acting as a multicast router, perform
 			 * multicast forwarding as if the packet had just
 			 * arrived on the interface to which we are about
 			 * to send.  The multicast forwarding function
 			 * recursively calls this function, using the
 			 * IP_FORWARDING flag to prevent infinite recursion.
 			 *
 			 * Multicasts that are looped back by ip_mloopback(),
 			 * above, will be forwarded by the ip_input() routine,
 			 * if necessary.
 			 */
 			if (ip_mrouter && (flags & IP_FORWARDING) == 0) {
 				/*
 				 * If rsvp daemon is not running, do not
 				 * set ip_moptions. This ensures that the packet
 				 * is multicast and not just sent down one link
 				 * as prescribed by rsvpd.
 				 */
 				if (!rsvp_on)
 					imo = NULL;
 				if (ip_mforward &&
 				    ip_mforward(ip, ifp, m, imo) != 0) {
 					m_freem(m);
 					goto done;
 				}
 			}
 		}
 
 		/*
 		 * Multicasts with a time-to-live of zero may be looped-
 		 * back, above, but must not be transmitted on a network.
 		 * Also, multicasts addressed to the loopback interface
 		 * are not sent -- the above call to ip_mloopback() will
 		 * loop back a copy if this host actually belongs to the
 		 * destination group on the loopback interface.
 		 */
 		if (ip->ip_ttl == 0 || ifp->if_flags & IFF_LOOPBACK) {
 			m_freem(m);
 			goto done;
 		}
 
 		goto sendit;
 	}
 #ifndef notdef
 	/*
 	 * If the source address is not specified yet, use the address
 	 * of the outoing interface. In case, keep note we did that, so
 	 * if the the firewall changes the next-hop causing the output
 	 * interface to change, we can fix that.
 	 */
 	if (ip->ip_src.s_addr == INADDR_ANY) {
 		/* Interface may have no addresses. */
 		if (ia != NULL) {
 			ip->ip_src = IA_SIN(ia)->sin_addr;
 			src_was_INADDR_ANY = 1;
 		}
 	}
 #endif /* notdef */
 	/*
 	 * Verify that we have any chance at all of being able to queue
 	 *      the packet or packet fragments
 	 */
 	if ((ifp->if_snd.ifq_len + ip->ip_len / ifp->if_mtu + 1) >=
 		ifp->if_snd.ifq_maxlen) {
 			error = ENOBUFS;
 			ipstat.ips_odropped++;
 			goto bad;
 	}
 
 	/*
 	 * Look for broadcast address and
 	 * verify user is allowed to send
 	 * such a packet.
 	 */
 	if (isbroadcast) {
 		if ((ifp->if_flags & IFF_BROADCAST) == 0) {
 			error = EADDRNOTAVAIL;
 			goto bad;
 		}
 		if ((flags & IP_ALLOWBROADCAST) == 0) {
 			error = EACCES;
 			goto bad;
 		}
 		/* don't allow broadcast messages to be fragmented */
 		if (ip->ip_len > ifp->if_mtu) {
 			error = EMSGSIZE;
 			goto bad;
 		}
 		if (flags & IP_SENDONES)
 			ip->ip_dst.s_addr = INADDR_BROADCAST;
 		m->m_flags |= M_BCAST;
 	} else {
 		m->m_flags &= ~M_BCAST;
 	}
 
 sendit:
 #ifdef IPSEC
 	/* get SP for this packet */
 	if (inp == NULL)
 		sp = ipsec4_getpolicybyaddr(m, IPSEC_DIR_OUTBOUND, flags, &error);
 	else
 		sp = ipsec4_getpolicybypcb(m, IPSEC_DIR_OUTBOUND, inp, &error);
 
 	if (sp == NULL) {
 		ipsecstat.out_inval++;
 		goto bad;
 	}
 
 	error = 0;
 
 	/* check policy */
 	switch (sp->policy) {
 	case IPSEC_POLICY_DISCARD:
 		/*
 		 * This packet is just discarded.
 		 */
 		ipsecstat.out_polvio++;
 		goto bad;
 
 	case IPSEC_POLICY_BYPASS:
 	case IPSEC_POLICY_NONE:
 		/* no need to do IPsec. */
 		goto skip_ipsec;
 	
 	case IPSEC_POLICY_IPSEC:
 		if (sp->req == NULL) {
 			/* acquire a policy */
 			error = key_spdacquire(sp);
 			goto bad;
 		}
 		break;
 
 	case IPSEC_POLICY_ENTRUST:
 	default:
 		printf("ip_output: Invalid policy found. %d\n", sp->policy);
 	}
     {
 	struct ipsec_output_state state;
 	bzero(&state, sizeof(state));
 	state.m = m;
 	if (flags & IP_ROUTETOIF) {
 		state.ro = &iproute;
 		bzero(&iproute, sizeof(iproute));
 	} else
 		state.ro = ro;
 	state.dst = (struct sockaddr *)dst;
 
 	ip->ip_sum = 0;
 
 	/*
 	 * XXX
 	 * delayed checksums are not currently compatible with IPsec
 	 */
 	if (m->m_pkthdr.csum_flags & CSUM_DELAY_DATA) {
 		in_delayed_cksum(m);
 		m->m_pkthdr.csum_flags &= ~CSUM_DELAY_DATA;
 	}
 
 	ip->ip_len = htons(ip->ip_len);
 	ip->ip_off = htons(ip->ip_off);
 
 	error = ipsec4_output(&state, sp, flags);
 
 	m = state.m;
 	if (flags & IP_ROUTETOIF) {
 		/*
 		 * if we have tunnel mode SA, we may need to ignore
 		 * IP_ROUTETOIF.
 		 */
 		if (state.ro != &iproute || state.ro->ro_rt != NULL) {
 			flags &= ~IP_ROUTETOIF;
 			ro = state.ro;
 		}
 	} else
 		ro = state.ro;
 	dst = (struct sockaddr_in *)state.dst;
 	if (error) {
 		/* mbuf is already reclaimed in ipsec4_output. */
 		m0 = NULL;
 		switch (error) {
 		case EHOSTUNREACH:
 		case ENETUNREACH:
 		case EMSGSIZE:
 		case ENOBUFS:
 		case ENOMEM:
 			break;
 		default:
 			printf("ip4_output (ipsec): error code %d\n", error);
 			/*fall through*/
 		case ENOENT:
 			/* don't show these error codes to the user */
 			error = 0;
 			break;
 		}
 		goto bad;
 	}
     }
 
 	/* be sure to update variables that are affected by ipsec4_output() */
 	ip = mtod(m, struct ip *);
 	hlen = ip->ip_hl << 2;
 	if (ro->ro_rt == NULL) {
 		if ((flags & IP_ROUTETOIF) == 0) {
 			printf("ip_output: "
 				"can't update route after IPsec processing\n");
 			error = EHOSTUNREACH;	/*XXX*/
 			goto bad;
 		}
 	} else {
 		ia = ifatoia(ro->ro_rt->rt_ifa);
 		ifp = ro->ro_rt->rt_ifp;
 	}
 
 	/* make it flipped, again. */
 	ip->ip_len = ntohs(ip->ip_len);
 	ip->ip_off = ntohs(ip->ip_off);
 skip_ipsec:
 #endif /*IPSEC*/
 #ifdef FAST_IPSEC
 	/*
 	 * Check the security policy (SP) for the packet and, if
 	 * required, do IPsec-related processing.  There are two
 	 * cases here; the first time a packet is sent through
 	 * it will be untagged and handled by ipsec4_checkpolicy.
 	 * If the packet is resubmitted to ip_output (e.g. after
 	 * AH, ESP, etc. processing), there will be a tag to bypass
 	 * the lookup and related policy checking.
 	 */
 	mtag = m_tag_find(m, PACKET_TAG_IPSEC_PENDING_TDB, NULL);
 	s = splnet();
 	if (mtag != NULL) {
 		tdbi = (struct tdb_ident *)(mtag + 1);
 		sp = ipsec_getpolicy(tdbi, IPSEC_DIR_OUTBOUND);
 		if (sp == NULL)
 			error = -EINVAL;	/* force silent drop */
 		m_tag_delete(m, mtag);
 	} else {
 		sp = ipsec4_checkpolicy(m, IPSEC_DIR_OUTBOUND, flags,
 					&error, inp);
 	}
 	/*
 	 * There are four return cases:
 	 *    sp != NULL	 	    apply IPsec policy
 	 *    sp == NULL, error == 0	    no IPsec handling needed
 	 *    sp == NULL, error == -EINVAL  discard packet w/o error
 	 *    sp == NULL, error != 0	    discard packet, report error
 	 */
 	if (sp != NULL) {
 		/* Loop detection, check if ipsec processing already done */
 		KASSERT(sp->req != NULL, ("ip_output: no ipsec request"));
 		for (mtag = m_tag_first(m); mtag != NULL;
 		     mtag = m_tag_next(m, mtag)) {
 			if (mtag->m_tag_cookie != MTAG_ABI_COMPAT)
 				continue;
 			if (mtag->m_tag_id != PACKET_TAG_IPSEC_OUT_DONE &&
 			    mtag->m_tag_id != PACKET_TAG_IPSEC_OUT_CRYPTO_NEEDED)
 				continue;
 			/*
 			 * Check if policy has an SA associated with it.
 			 * This can happen when an SP has yet to acquire
 			 * an SA; e.g. on first reference.  If it occurs,
 			 * then we let ipsec4_process_packet do its thing.
 			 */
 			if (sp->req->sav == NULL)
 				break;
 			tdbi = (struct tdb_ident *)(mtag + 1);
 			if (tdbi->spi == sp->req->sav->spi &&
 			    tdbi->proto == sp->req->sav->sah->saidx.proto &&
 			    bcmp(&tdbi->dst, &sp->req->sav->sah->saidx.dst,
 				 sizeof (union sockaddr_union)) == 0) {
 				/*
 				 * No IPsec processing is needed, free
 				 * reference to SP.
 				 *
 				 * NB: null pointer to avoid free at
 				 *     done: below.
 				 */
 				KEY_FREESP(&sp), sp = NULL;
 				splx(s);
 				goto spd_done;
 			}
 		}
 
 		/*
 		 * Do delayed checksums now because we send before
 		 * this is done in the normal processing path.
 		 */
 		if (m->m_pkthdr.csum_flags & CSUM_DELAY_DATA) {
 			in_delayed_cksum(m);
 			m->m_pkthdr.csum_flags &= ~CSUM_DELAY_DATA;
 		}
 
 		ip->ip_len = htons(ip->ip_len);
 		ip->ip_off = htons(ip->ip_off);
 
 		/* NB: callee frees mbuf */
 		error = ipsec4_process_packet(m, sp->req, flags, 0);
 		/*
 		 * Preserve KAME behaviour: ENOENT can be returned
 		 * when an SA acquire is in progress.  Don't propagate
 		 * this to user-level; it confuses applications.
 		 *
 		 * XXX this will go away when the SADB is redone.
 		 */
 		if (error == ENOENT)
 			error = 0;
 		splx(s);
 		goto done;
 	} else {
 		splx(s);
 
 		if (error != 0) {
 			/*
 			 * Hack: -EINVAL is used to signal that a packet
 			 * should be silently discarded.  This is typically
 			 * because we asked key management for an SA and
 			 * it was delayed (e.g. kicked up to IKE).
 			 */
 			if (error == -EINVAL)
 				error = 0;
 			goto bad;
 		} else {
 			/* No IPsec processing for this packet. */
 		}
 #ifdef notyet
 		/*
 		 * If deferred crypto processing is needed, check that
 		 * the interface supports it.
 		 */ 
 		mtag = m_tag_find(m, PACKET_TAG_IPSEC_OUT_CRYPTO_NEEDED, NULL);
 		if (mtag != NULL && (ifp->if_capenable & IFCAP_IPSEC) == 0) {
 			/* notify IPsec to do its own crypto */
 			ipsp_skipcrypto_unmark((struct tdb_ident *)(mtag + 1));
 			error = EHOSTUNREACH;
 			goto bad;
 		}
 #endif
 	}
 spd_done:
 #endif /* FAST_IPSEC */
 
 	/*
 	 * IpHack's section.
 	 * - Xlate: translate packet's addr/port (NAT).
 	 * - Firewall: deny/allow/etc.
 	 * - Wrap: fake packet's addr/port <unimpl.>
 	 * - Encapsulate: put it in another IP and send out. <unimp.>
 	 */ 
 #ifdef PFIL_HOOKS
 	/*
 	 * Run through list of hooks for output packets.
 	 */
 	error = pfil_run_hooks(&inet_pfil_hook, &m, ifp, PFIL_OUT);
 	if (error != 0 || m == NULL)
 		goto done;
 	ip = mtod(m, struct ip *);
 #endif /* PFIL_HOOKS */
 
 	/*
 	 * Check with the firewall...
 	 * but not if we are already being fwd'd from a firewall.
 	 */
 	if (fw_enable && IPFW_LOADED && !args.next_hop) {
 		struct sockaddr_in *old = dst;
 
 		args.m = m;
 		args.next_hop = dst;
 		args.oif = ifp;
 		off = ip_fw_chk_ptr(&args);
 		m = args.m;
 		dst = args.next_hop;
 
                 /*
 		 * On return we must do the following:
 		 * m == NULL	-> drop the pkt (old interface, deprecated)
 		 * (off & IP_FW_PORT_DENY_FLAG)	-> drop the pkt (new interface)
 		 * 1<=off<= 0xffff		-> DIVERT
 		 * (off & IP_FW_PORT_DYNT_FLAG)	-> send to a DUMMYNET pipe
 		 * (off & IP_FW_PORT_TEE_FLAG)	-> TEE the packet
 		 * dst != old			-> IPFIREWALL_FORWARD
 		 * off==0, dst==old		-> accept
 		 * If some of the above modules are not compiled in, then
 		 * we should't have to check the corresponding condition
 		 * (because the ipfw control socket should not accept
 		 * unsupported rules), but better play safe and drop
 		 * packets in case of doubt.
 		 */
 		if ( (off & IP_FW_PORT_DENY_FLAG) || m == NULL) {
 			if (m)
 				m_freem(m);
 			error = EACCES;
 			goto done;
 		}
 		ip = mtod(m, struct ip *);
 		if (off == 0 && dst == old)		/* common case */
 			goto pass;
                 if (DUMMYNET_LOADED && (off & IP_FW_PORT_DYNT_FLAG) != 0) {
 			/*
 			 * pass the pkt to dummynet. Need to include
 			 * pipe number, m, ifp, ro, dst because these are
 			 * not recomputed in the next pass.
 			 * All other parameters have been already used and
 			 * so they are not needed anymore. 
 			 * XXX note: if the ifp or ro entry are deleted
 			 * while a pkt is in dummynet, we are in trouble!
 			 */ 
 			args.ro = ro;
 			args.dst = dst;
 			args.flags = flags;
 
 			error = ip_dn_io_ptr(m, off & 0xffff, DN_TO_IP_OUT,
 				&args);
 			goto done;
 		}
 #ifdef IPDIVERT
 		if (off != 0 && (off & IP_FW_PORT_DYNT_FLAG) == 0) {
 			struct mbuf *clone = NULL;
 
 			/* Clone packet if we're doing a 'tee' */
 			if ((off & IP_FW_PORT_TEE_FLAG) != 0)
 				clone = m_dup(m, M_DONTWAIT);
 
 			/*
 			 * XXX
 			 * delayed checksums are not currently compatible
 			 * with divert sockets.
 			 */
 			if (m->m_pkthdr.csum_flags & CSUM_DELAY_DATA) {
 				in_delayed_cksum(m);
 				m->m_pkthdr.csum_flags &= ~CSUM_DELAY_DATA;
 			}
 
 			/* Restore packet header fields to original values */
 			ip->ip_len = htons(ip->ip_len);
 			ip->ip_off = htons(ip->ip_off);
 
 			/* Deliver packet to divert input routine */
 			divert_packet(m, 0, off & 0xffff, args.divert_rule);
 
 			/* If 'tee', continue with original packet */
 			if (clone != NULL) {
 				m = clone;
 				ip = mtod(m, struct ip *);
 				goto pass;
 			}
 			goto done;
 		}
 #endif
 
 		/* IPFIREWALL_FORWARD */
 		/*
 		 * Check dst to make sure it is directly reachable on the
 		 * interface we previously thought it was.
 		 * If it isn't (which may be likely in some situations) we have
 		 * to re-route it (ie, find a route for the next-hop and the
 		 * associated interface) and set them here. This is nested
 		 * forwarding which in most cases is undesirable, except where
 		 * such control is nigh impossible. So we do it here.
 		 * And I'm babbling.
 		 */
 		if (off == 0 && old != dst) { /* FORWARD, dst has changed */
 #if 0
 			/*
 			 * XXX To improve readability, this block should be
 			 * changed into a function call as below:
 			 */
 			error = ip_ipforward(&m, &dst, &ifp);
 			if (error)
 				goto bad;
 			if (m == NULL) /* ip_input consumed the mbuf */
 				goto done;
 #else
 			struct in_ifaddr *ia;
 
 			/*
 			 * XXX sro_fwd below is static, and a pointer
 			 * to it gets passed to routines downstream.
 			 * This could have surprisingly bad results in
 			 * practice, because its content is overwritten
 			 * by subsequent packets.
 			 */
 			/* There must be a better way to do this next line... */
 			static struct route sro_fwd;
 			struct route *ro_fwd = &sro_fwd;
 
 #if 0
 			print_ip("IPFIREWALL_FORWARD: New dst ip: ",
 			    dst->sin_addr, "\n");
 #endif
 
 			/*
 			 * We need to figure out if we have been forwarded
 			 * to a local socket. If so, then we should somehow 
 			 * "loop back" to ip_input, and get directed to the
 			 * PCB as if we had received this packet. This is
 			 * because it may be dificult to identify the packets
 			 * you want to forward until they are being output
 			 * and have selected an interface. (e.g. locally
 			 * initiated packets) If we used the loopback inteface,
 			 * we would not be able to control what happens 
 			 * as the packet runs through ip_input() as
 			 * it is done through an ISR.
 			 */
 			LIST_FOREACH(ia,
 			    INADDR_HASH(dst->sin_addr.s_addr), ia_hash) {
 				/*
 				 * If the addr to forward to is one
 				 * of ours, we pretend to
 				 * be the destination for this packet.
 				 */
 				if (IA_SIN(ia)->sin_addr.s_addr ==
 						 dst->sin_addr.s_addr)
 					break;
 			}
 			if (ia) {	/* tell ip_input "dont filter" */
 				struct m_hdr tag;
 
 				tag.mh_type = MT_TAG;
 				tag.mh_flags = PACKET_TAG_IPFORWARD;
 				tag.mh_data = (caddr_t)args.next_hop;
 				tag.mh_next = m;
 
 				if (m->m_pkthdr.rcvif == NULL)
 					m->m_pkthdr.rcvif = ifunit("lo0");
 				if (m->m_pkthdr.csum_flags & CSUM_DELAY_DATA) {
 					m->m_pkthdr.csum_flags |=
 					    CSUM_DATA_VALID | CSUM_PSEUDO_HDR;
 					m0->m_pkthdr.csum_data = 0xffff;
 				}
 				m->m_pkthdr.csum_flags |=
 				    CSUM_IP_CHECKED | CSUM_IP_VALID;
 				ip->ip_len = htons(ip->ip_len);
 				ip->ip_off = htons(ip->ip_off);
 				ip_input((struct mbuf *)&tag);
 				goto done;
 			}
 			/* Some of the logic for this was
 			 * nicked from above.
 			 *
 			 * This rewrites the cached route in a local PCB.
 			 * Is this what we want to do?
 			 */
 			bcopy(dst, &ro_fwd->ro_dst, sizeof(*dst));
 
 			ro_fwd->ro_rt = 0;
 			rtalloc_ign(ro_fwd, RTF_PRCLONING);
 
 			if (ro_fwd->ro_rt == 0) {
 				ipstat.ips_noroute++;
 				error = EHOSTUNREACH;
 				goto bad;
 			}
 
 			ia = ifatoia(ro_fwd->ro_rt->rt_ifa);
 			ifp = ro_fwd->ro_rt->rt_ifp;
 			ro_fwd->ro_rt->rt_use++;
 			if (ro_fwd->ro_rt->rt_flags & RTF_GATEWAY)
 				dst = (struct sockaddr_in *)
 					ro_fwd->ro_rt->rt_gateway;
 			if (ro_fwd->ro_rt->rt_flags & RTF_HOST)
 				isbroadcast =
 				    (ro_fwd->ro_rt->rt_flags & RTF_BROADCAST);
 			else
 				isbroadcast = in_broadcast(dst->sin_addr, ifp);
 			if (ro->ro_rt)
 				RTFREE(ro->ro_rt);
 			ro->ro_rt = ro_fwd->ro_rt;
 			dst = (struct sockaddr_in *)&ro_fwd->ro_dst;
 
 #endif	/* ... block to be put into a function */
 			/*
 			 * If we added a default src ip earlier,
 			 * which would have been gotten from the-then
 			 * interface, do it again, from the new one.
 			 */
 			if (src_was_INADDR_ANY)
 				ip->ip_src = IA_SIN(ia)->sin_addr;
 			goto pass ;
 		}
 
                 /*
                  * if we get here, none of the above matches, and 
                  * we have to drop the pkt
                  */
 		m_freem(m);
                 error = EACCES; /* not sure this is the right error msg */
                 goto done;
 	}
 
 pass:
 	/* 127/8 must not appear on wire - RFC1122. */
 	if ((ntohl(ip->ip_dst.s_addr) >> IN_CLASSA_NSHIFT) == IN_LOOPBACKNET ||
 	    (ntohl(ip->ip_src.s_addr) >> IN_CLASSA_NSHIFT) == IN_LOOPBACKNET) {
 		if ((ifp->if_flags & IFF_LOOPBACK) == 0) {
 			ipstat.ips_badaddr++;
 			error = EADDRNOTAVAIL;
 			goto bad;
 		}
 	}
 
 	m->m_pkthdr.csum_flags |= CSUM_IP;
 	sw_csum = m->m_pkthdr.csum_flags & ~ifp->if_hwassist;
 	if (sw_csum & CSUM_DELAY_DATA) {
 		in_delayed_cksum(m);
 		sw_csum &= ~CSUM_DELAY_DATA;
 	}
 	m->m_pkthdr.csum_flags &= ifp->if_hwassist;
 
 	/*
 	 * If small enough for interface, or the interface will take
 	 * care of the fragmentation for us, can just send directly.
 	 */
 	if (ip->ip_len <= ifp->if_mtu || ifp->if_hwassist & CSUM_FRAGMENT) {
 		ip->ip_len = htons(ip->ip_len);
 		ip->ip_off = htons(ip->ip_off);
 		ip->ip_sum = 0;
 		if (sw_csum & CSUM_DELAY_IP)
 			ip->ip_sum = in_cksum(m, hlen);
 
 		/* Record statistics for this interface address. */
 		if (!(flags & IP_FORWARDING) && ia) {
 			ia->ia_ifa.if_opackets++;
 			ia->ia_ifa.if_obytes += m->m_pkthdr.len;
 		}
 
 #ifdef IPSEC
 		/* clean ipsec history once it goes out of the node */
 		ipsec_delaux(m);
 #endif
 
 #ifdef MBUF_STRESS_TEST
 		if (mbuf_frag_size && m->m_pkthdr.len > mbuf_frag_size)
 			m = m_fragment(m, M_DONTWAIT, mbuf_frag_size);
 #endif
 		error = (*ifp->if_output)(ifp, m,
 				(struct sockaddr *)dst, ro->ro_rt);
 		goto done;
 	}
 
 	if (ip->ip_off & IP_DF) {
 		error = EMSGSIZE;
 		/*
 		 * This case can happen if the user changed the MTU
 		 * of an interface after enabling IP on it.  Because
 		 * most netifs don't keep track of routes pointing to
 		 * them, there is no way for one to update all its
 		 * routes when the MTU is changed.
 		 */
 		if ((ro->ro_rt->rt_flags & (RTF_UP | RTF_HOST)) &&
 		    !(ro->ro_rt->rt_rmx.rmx_locks & RTV_MTU) &&
 		    (ro->ro_rt->rt_rmx.rmx_mtu > ifp->if_mtu)) {
 			ro->ro_rt->rt_rmx.rmx_mtu = ifp->if_mtu;
 		}
 		ipstat.ips_cantfrag++;
 		goto bad;
 	}
 
 	/*
 	 * Too large for interface; fragment if possible. If successful,
 	 * on return, m will point to a list of packets to be sent.
 	 */
 	error = ip_fragment(ip, &m, ifp->if_mtu, ifp->if_hwassist, sw_csum);
 	if (error)
 		goto bad;
 	for (; m; m = m0) {
 		m0 = m->m_nextpkt;
 		m->m_nextpkt = 0;
 #ifdef IPSEC
 		/* clean ipsec history once it goes out of the node */
 		ipsec_delaux(m);
 #endif
 		if (error == 0) {
 			/* Record statistics for this interface address. */
 			if (ia != NULL) {
 				ia->ia_ifa.if_opackets++;
 				ia->ia_ifa.if_obytes += m->m_pkthdr.len;
 			}
 			
 			error = (*ifp->if_output)(ifp, m,
 			    (struct sockaddr *)dst, ro->ro_rt);
 		} else
 			m_freem(m);
 	}
 
 	if (error == 0)
 		ipstat.ips_fragmented++;
 
 done:
 #ifdef IPSEC
 	if (ro == &iproute && ro->ro_rt) {
 		RTFREE(ro->ro_rt);
 		ro->ro_rt = NULL;
 	}
 	if (sp != NULL) {
 		KEYDEBUG(KEYDEBUG_IPSEC_STAMP,
 			printf("DP ip_output call free SP:%p\n", sp));
 		key_freesp(sp);
 	}
 #endif
 #ifdef FAST_IPSEC
 	if (ro == &iproute && ro->ro_rt) {
 		RTFREE(ro->ro_rt);
 		ro->ro_rt = NULL;
 	}
 	if (sp != NULL)
 		KEY_FREESP(&sp);
 #endif
 	return (error);
 bad:
 	m_freem(m);
 	goto done;
 }
 
 /*
  * Create a chain of fragments which fit the given mtu. m_frag points to the
  * mbuf to be fragmented; on return it points to the chain with the fragments.
  * Return 0 if no error. If error, m_frag may contain a partially built
  * chain of fragments that should be freed by the caller.
  *
  * if_hwassist_flags is the hw offload capabilities (see if_data.ifi_hwassist)
  * sw_csum contains the delayed checksums flags (e.g., CSUM_DELAY_IP).
  */
 int
 ip_fragment(struct ip *ip, struct mbuf **m_frag, int mtu,
 	    u_long if_hwassist_flags, int sw_csum)
 {
 	int error = 0;
 	int hlen = ip->ip_hl << 2;
 	int len = (mtu - hlen) & ~7;	/* size of payload in each fragment */
 	int off;
 	struct mbuf *m0 = *m_frag;	/* the original packet		*/
 	int firstlen;
 	struct mbuf **mnext;
 	int nfrags;
 
 	if (ip->ip_off & IP_DF) {	/* Fragmentation not allowed */
 		ipstat.ips_cantfrag++;
 		return EMSGSIZE;
 	}
 
 	/*
 	 * Must be able to put at least 8 bytes per fragment.
 	 */
 	if (len < 8)
 		return EMSGSIZE;
 
 	/*
 	 * If the interface will not calculate checksums on
 	 * fragmented packets, then do it here.
 	 */
 	if (m0->m_pkthdr.csum_flags & CSUM_DELAY_DATA &&
 	    (if_hwassist_flags & CSUM_IP_FRAGS) == 0) {
 		in_delayed_cksum(m0);
 		m0->m_pkthdr.csum_flags &= ~CSUM_DELAY_DATA;
 	}
 
 	if (len > PAGE_SIZE) {
 		/* 
 		 * Fragment large datagrams such that each segment 
 		 * contains a multiple of PAGE_SIZE amount of data, 
 		 * plus headers. This enables a receiver to perform 
 		 * page-flipping zero-copy optimizations.
 		 *
 		 * XXX When does this help given that sender and receiver
 		 * could have different page sizes, and also mtu could
 		 * be less than the receiver's page size ?
 		 */
 		int newlen;
 		struct mbuf *m;
 
 		for (m = m0, off = 0; m && (off+m->m_len) <= mtu; m = m->m_next)
 			off += m->m_len;
 
 		/*
 		 * firstlen (off - hlen) must be aligned on an 
 		 * 8-byte boundary
 		 */
 		if (off < hlen)
 			goto smart_frag_failure;
 		off = ((off - hlen) & ~7) + hlen;
 		newlen = (~PAGE_MASK) & mtu;
 		if ((newlen + sizeof (struct ip)) > mtu) {
 			/* we failed, go back the default */
 smart_frag_failure:
 			newlen = len;
 			off = hlen + len;
 		}
 		len = newlen;
 
 	} else {
 		off = hlen + len;
 	}
 
 	firstlen = off - hlen;
 	mnext = &m0->m_nextpkt;		/* pointer to next packet */
 
 	/*
 	 * Loop through length of segment after first fragment,
 	 * make new header and copy data of each part and link onto chain.
 	 * Here, m0 is the original packet, m is the fragment being created.
 	 * The fragments are linked off the m_nextpkt of the original
 	 * packet, which after processing serves as the first fragment.
 	 */
 	for (nfrags = 1; off < ip->ip_len; off += len, nfrags++) {
 		struct ip *mhip;	/* ip header on the fragment */
 		struct mbuf *m;
 		int mhlen = sizeof (struct ip);
 
 		MGETHDR(m, M_DONTWAIT, MT_HEADER);
 		if (m == 0) {
 			error = ENOBUFS;
 			ipstat.ips_odropped++;
 			goto done;
 		}
 		m->m_flags |= (m0->m_flags & M_MCAST) | M_FRAG;
 		/*
 		 * In the first mbuf, leave room for the link header, then
 		 * copy the original IP header including options. The payload
 		 * goes into an additional mbuf chain returned by m_copy().
 		 */
 		m->m_data += max_linkhdr;
 		mhip = mtod(m, struct ip *);
 		*mhip = *ip;
 		if (hlen > sizeof (struct ip)) {
 			mhlen = ip_optcopy(ip, mhip) + sizeof (struct ip);
 			mhip->ip_v = IPVERSION;
 			mhip->ip_hl = mhlen >> 2;
 		}
 		m->m_len = mhlen;
 		/* XXX do we need to add ip->ip_off below ? */
 		mhip->ip_off = ((off - hlen) >> 3) + ip->ip_off;
 		if (off + len >= ip->ip_len) {	/* last fragment */
 			len = ip->ip_len - off;
 			m->m_flags |= M_LASTFRAG;
 		} else
 			mhip->ip_off |= IP_MF;
 		mhip->ip_len = htons((u_short)(len + mhlen));
 		m->m_next = m_copy(m0, off, len);
 		if (m->m_next == 0) {		/* copy failed */
 			m_free(m);
 			error = ENOBUFS;	/* ??? */
 			ipstat.ips_odropped++;
 			goto done;
 		}
 		m->m_pkthdr.len = mhlen + len;
 		m->m_pkthdr.rcvif = (struct ifnet *)0;
 #ifdef MAC
 		mac_create_fragment(m0, m);
 #endif
 		m->m_pkthdr.csum_flags = m0->m_pkthdr.csum_flags;
 		mhip->ip_off = htons(mhip->ip_off);
 		mhip->ip_sum = 0;
 		if (sw_csum & CSUM_DELAY_IP)
 			mhip->ip_sum = in_cksum(m, mhlen);
 		*mnext = m;
 		mnext = &m->m_nextpkt;
 	}
 	ipstat.ips_ofragments += nfrags;
 
 	/* set first marker for fragment chain */
 	m0->m_flags |= M_FIRSTFRAG | M_FRAG;
 	m0->m_pkthdr.csum_data = nfrags;
 
 	/*
 	 * Update first fragment by trimming what's been copied out
 	 * and updating header.
 	 */
 	m_adj(m0, hlen + firstlen - ip->ip_len);
 	m0->m_pkthdr.len = hlen + firstlen;
 	ip->ip_len = htons((u_short)m0->m_pkthdr.len);
 	ip->ip_off |= IP_MF;
 	ip->ip_off = htons(ip->ip_off);
 	ip->ip_sum = 0;
 	if (sw_csum & CSUM_DELAY_IP)
 		ip->ip_sum = in_cksum(m0, hlen);
 
 done:
 	*m_frag = m0;
 	return error;
 }
 
 void
 in_delayed_cksum(struct mbuf *m)
 {
 	struct ip *ip;
 	u_short csum, offset;
 
 	ip = mtod(m, struct ip *);
 	offset = ip->ip_hl << 2 ;
 	csum = in_cksum_skip(m, ip->ip_len, offset);
 	if (m->m_pkthdr.csum_flags & CSUM_UDP && csum == 0)
 		csum = 0xffff;
 	offset += m->m_pkthdr.csum_data;	/* checksum offset */
 
 	if (offset + sizeof(u_short) > m->m_len) {
 		printf("delayed m_pullup, m->len: %d  off: %d  p: %d\n",
 		    m->m_len, offset, ip->ip_p);
 		/*
 		 * XXX
 		 * this shouldn't happen, but if it does, the
 		 * correct behavior may be to insert the checksum
 		 * in the existing chain instead of rearranging it.
 		 */
 		m = m_pullup(m, offset + sizeof(u_short));
 	}
 	*(u_short *)(m->m_data + offset) = csum;
 }
 
 /*
  * Insert IP options into preformed packet.
  * Adjust IP destination as required for IP source routing,
  * as indicated by a non-zero in_addr at the start of the options.
  *
  * XXX This routine assumes that the packet has no options in place.
  */
 static struct mbuf *
 ip_insertoptions(m, opt, phlen)
 	register struct mbuf *m;
 	struct mbuf *opt;
 	int *phlen;
 {
 	register struct ipoption *p = mtod(opt, struct ipoption *);
 	struct mbuf *n;
 	register struct ip *ip = mtod(m, struct ip *);
 	unsigned optlen;
 
 	optlen = opt->m_len - sizeof(p->ipopt_dst);
 	if (optlen + ip->ip_len > IP_MAXPACKET) {
 		*phlen = 0;
 		return (m);		/* XXX should fail */
 	}
 	if (p->ipopt_dst.s_addr)
 		ip->ip_dst = p->ipopt_dst;
 	if (m->m_flags & M_EXT || m->m_data - optlen < m->m_pktdat) {
 		MGETHDR(n, M_DONTWAIT, MT_HEADER);
 		if (n == 0) {
 			*phlen = 0;
 			return (m);
 		}
 		n->m_pkthdr.rcvif = (struct ifnet *)0;
 #ifdef MAC
 		mac_create_mbuf_from_mbuf(m, n);
 #endif
 		n->m_pkthdr.len = m->m_pkthdr.len + optlen;
 		m->m_len -= sizeof(struct ip);
 		m->m_data += sizeof(struct ip);
 		n->m_next = m;
 		m = n;
 		m->m_len = optlen + sizeof(struct ip);
 		m->m_data += max_linkhdr;
 		bcopy(ip, mtod(m, void *), sizeof(struct ip));
 	} else {
 		m->m_data -= optlen;
 		m->m_len += optlen;
 		m->m_pkthdr.len += optlen;
 		bcopy(ip, mtod(m, void *), sizeof(struct ip));
 	}
 	ip = mtod(m, struct ip *);
 	bcopy(p->ipopt_list, ip + 1, optlen);
 	*phlen = sizeof(struct ip) + optlen;
 	ip->ip_v = IPVERSION;
 	ip->ip_hl = *phlen >> 2;
 	ip->ip_len += optlen;
 	return (m);
 }
 
 /*
  * Copy options from ip to jp,
  * omitting those not copied during fragmentation.
  */
 int
 ip_optcopy(ip, jp)
 	struct ip *ip, *jp;
 {
 	register u_char *cp, *dp;
 	int opt, optlen, cnt;
 
 	cp = (u_char *)(ip + 1);
 	dp = (u_char *)(jp + 1);
 	cnt = (ip->ip_hl << 2) - sizeof (struct ip);
 	for (; cnt > 0; cnt -= optlen, cp += optlen) {
 		opt = cp[0];
 		if (opt == IPOPT_EOL)
 			break;
 		if (opt == IPOPT_NOP) {
 			/* Preserve for IP mcast tunnel's LSRR alignment. */
 			*dp++ = IPOPT_NOP;
 			optlen = 1;
 			continue;
 		}
 
 		KASSERT(cnt >= IPOPT_OLEN + sizeof(*cp),
 		    ("ip_optcopy: malformed ipv4 option"));
 		optlen = cp[IPOPT_OLEN];
 		KASSERT(optlen >= IPOPT_OLEN + sizeof(*cp) && optlen <= cnt,
 		    ("ip_optcopy: malformed ipv4 option"));
 
 		/* bogus lengths should have been caught by ip_dooptions */
 		if (optlen > cnt)
 			optlen = cnt;
 		if (IPOPT_COPIED(opt)) {
 			bcopy(cp, dp, optlen);
 			dp += optlen;
 		}
 	}
 	for (optlen = dp - (u_char *)(jp+1); optlen & 0x3; optlen++)
 		*dp++ = IPOPT_EOL;
 	return (optlen);
 }
 
 /*
  * IP socket option processing.
  */
 int
 ip_ctloutput(so, sopt)
 	struct socket *so;
 	struct sockopt *sopt;
 {
 	struct	inpcb *inp = sotoinpcb(so);
 	int	error, optval;
 
 	error = optval = 0;
 	if (sopt->sopt_level != IPPROTO_IP) {
 		return (EINVAL);
 	}
 
 	switch (sopt->sopt_dir) {
 	case SOPT_SET:
 		switch (sopt->sopt_name) {
 		case IP_OPTIONS:
 #ifdef notyet
 		case IP_RETOPTS:
 #endif
 		{
 			struct mbuf *m;
 			if (sopt->sopt_valsize > MLEN) {
 				error = EMSGSIZE;
 				break;
 			}
 			MGET(m, sopt->sopt_td ? M_TRYWAIT : M_DONTWAIT, MT_HEADER);
 			if (m == 0) {
 				error = ENOBUFS;
 				break;
 			}
 			m->m_len = sopt->sopt_valsize;
 			error = sooptcopyin(sopt, mtod(m, char *), m->m_len,
 					    m->m_len);
 			
 			return (ip_pcbopts(sopt->sopt_name, &inp->inp_options,
 					   m));
 		}
 
 		case IP_TOS:
 		case IP_TTL:
 		case IP_RECVOPTS:
 		case IP_RECVRETOPTS:
 		case IP_RECVDSTADDR:
 		case IP_RECVTTL:
 		case IP_RECVIF:
 		case IP_FAITH:
 		case IP_ONESBCAST:
 			error = sooptcopyin(sopt, &optval, sizeof optval,
 					    sizeof optval);
 			if (error)
 				break;
 
 			switch (sopt->sopt_name) {
 			case IP_TOS:
 				inp->inp_ip_tos = optval;
 				break;
 
 			case IP_TTL:
 				inp->inp_ip_ttl = optval;
 				break;
 #define	OPTSET(bit) \
 	if (optval) \
 		inp->inp_flags |= bit; \
 	else \
 		inp->inp_flags &= ~bit;
 
 			case IP_RECVOPTS:
 				OPTSET(INP_RECVOPTS);
 				break;
 
 			case IP_RECVRETOPTS:
 				OPTSET(INP_RECVRETOPTS);
 				break;
 
 			case IP_RECVDSTADDR:
 				OPTSET(INP_RECVDSTADDR);
 				break;
 
 			case IP_RECVTTL:
 				OPTSET(INP_RECVTTL);
 				break;
 
 			case IP_RECVIF:
 				OPTSET(INP_RECVIF);
 				break;
 
 			case IP_FAITH:
 				OPTSET(INP_FAITH);
 				break;
 
 			case IP_ONESBCAST:
 				OPTSET(INP_ONESBCAST);
 				break;
 			}
 			break;
 #undef OPTSET
 
 		case IP_MULTICAST_IF:
 		case IP_MULTICAST_VIF:
 		case IP_MULTICAST_TTL:
 		case IP_MULTICAST_LOOP:
 		case IP_ADD_MEMBERSHIP:
 		case IP_DROP_MEMBERSHIP:
 			error = ip_setmoptions(sopt, &inp->inp_moptions);
 			break;
 
 		case IP_PORTRANGE:
 			error = sooptcopyin(sopt, &optval, sizeof optval,
 					    sizeof optval);
 			if (error)
 				break;
 
 			switch (optval) {
 			case IP_PORTRANGE_DEFAULT:
 				inp->inp_flags &= ~(INP_LOWPORT);
 				inp->inp_flags &= ~(INP_HIGHPORT);
 				break;
 
 			case IP_PORTRANGE_HIGH:
 				inp->inp_flags &= ~(INP_LOWPORT);
 				inp->inp_flags |= INP_HIGHPORT;
 				break;
 
 			case IP_PORTRANGE_LOW:
 				inp->inp_flags &= ~(INP_HIGHPORT);
 				inp->inp_flags |= INP_LOWPORT;
 				break;
 
 			default:
 				error = EINVAL;
 				break;
 			}
 			break;
 
 #if defined(IPSEC) || defined(FAST_IPSEC)
 		case IP_IPSEC_POLICY:
 		{
 			caddr_t req;
 			size_t len = 0;
 			int priv;
 			struct mbuf *m;
 			int optname;
 
 			if ((error = soopt_getm(sopt, &m)) != 0) /* XXX */
 				break;
 			if ((error = soopt_mcopyin(sopt, m)) != 0) /* XXX */
 				break;
 			priv = (sopt->sopt_td != NULL &&
 				suser(sopt->sopt_td) != 0) ? 0 : 1;
 			req = mtod(m, caddr_t);
 			len = m->m_len;
 			optname = sopt->sopt_name;
 			error = ipsec4_set_policy(inp, optname, req, len, priv);
 			m_freem(m);
 			break;
 		}
 #endif /*IPSEC*/
 
 		default:
 			error = ENOPROTOOPT;
 			break;
 		}
 		break;
 
 	case SOPT_GET:
 		switch (sopt->sopt_name) {
 		case IP_OPTIONS:
 		case IP_RETOPTS:
 			if (inp->inp_options)
 				error = sooptcopyout(sopt, 
 						     mtod(inp->inp_options,
 							  char *),
 						     inp->inp_options->m_len);
 			else
 				sopt->sopt_valsize = 0;
 			break;
 
 		case IP_TOS:
 		case IP_TTL:
 		case IP_RECVOPTS:
 		case IP_RECVRETOPTS:
 		case IP_RECVDSTADDR:
 		case IP_RECVTTL:
 		case IP_RECVIF:
 		case IP_PORTRANGE:
 		case IP_FAITH:
 		case IP_ONESBCAST:
 			switch (sopt->sopt_name) {
 
 			case IP_TOS:
 				optval = inp->inp_ip_tos;
 				break;
 
 			case IP_TTL:
 				optval = inp->inp_ip_ttl;
 				break;
 
 #define	OPTBIT(bit)	(inp->inp_flags & bit ? 1 : 0)
 
 			case IP_RECVOPTS:
 				optval = OPTBIT(INP_RECVOPTS);
 				break;
 
 			case IP_RECVRETOPTS:
 				optval = OPTBIT(INP_RECVRETOPTS);
 				break;
 
 			case IP_RECVDSTADDR:
 				optval = OPTBIT(INP_RECVDSTADDR);
 				break;
 
 			case IP_RECVTTL:
 				optval = OPTBIT(INP_RECVTTL);
 				break;
 
 			case IP_RECVIF:
 				optval = OPTBIT(INP_RECVIF);
 				break;
 
 			case IP_PORTRANGE:
 				if (inp->inp_flags & INP_HIGHPORT)
 					optval = IP_PORTRANGE_HIGH;
 				else if (inp->inp_flags & INP_LOWPORT)
 					optval = IP_PORTRANGE_LOW;
 				else
 					optval = 0;
 				break;
 
 			case IP_FAITH:
 				optval = OPTBIT(INP_FAITH);
 				break;
 
 			case IP_ONESBCAST:
 				optval = OPTBIT(INP_ONESBCAST);
 				break;
 			}
 			error = sooptcopyout(sopt, &optval, sizeof optval);
 			break;
 
 		case IP_MULTICAST_IF:
 		case IP_MULTICAST_VIF:
 		case IP_MULTICAST_TTL:
 		case IP_MULTICAST_LOOP:
 		case IP_ADD_MEMBERSHIP:
 		case IP_DROP_MEMBERSHIP:
 			error = ip_getmoptions(sopt, inp->inp_moptions);
 			break;
 
 #if defined(IPSEC) || defined(FAST_IPSEC)
 		case IP_IPSEC_POLICY:
 		{
 			struct mbuf *m = NULL;
 			caddr_t req = NULL;
 			size_t len = 0;
 
 			if (m != 0) {
 				req = mtod(m, caddr_t);
 				len = m->m_len;
 			}
 			error = ipsec4_get_policy(sotoinpcb(so), req, len, &m);
 			if (error == 0)
 				error = soopt_mcopyout(sopt, m); /* XXX */
 			if (error == 0)
 				m_freem(m);
 			break;
 		}
 #endif /*IPSEC*/
 
 		default:
 			error = ENOPROTOOPT;
 			break;
 		}
 		break;
 	}
 	return (error);
 }
 
 /*
  * Set up IP options in pcb for insertion in output packets.
  * Store in mbuf with pointer in pcbopt, adding pseudo-option
  * with destination address if source routed.
  */
 static int
 ip_pcbopts(optname, pcbopt, m)
 	int optname;
 	struct mbuf **pcbopt;
 	register struct mbuf *m;
 {
 	register int cnt, optlen;
 	register u_char *cp;
 	u_char opt;
 
 	/* turn off any old options */
 	if (*pcbopt)
 		(void)m_free(*pcbopt);
 	*pcbopt = 0;
 	if (m == (struct mbuf *)0 || m->m_len == 0) {
 		/*
 		 * Only turning off any previous options.
 		 */
 		if (m)
 			(void)m_free(m);
 		return (0);
 	}
 
 	if (m->m_len % sizeof(int32_t))
 		goto bad;
 	/*
 	 * IP first-hop destination address will be stored before
 	 * actual options; move other options back
 	 * and clear it when none present.
 	 */
 	if (m->m_data + m->m_len + sizeof(struct in_addr) >= &m->m_dat[MLEN])
 		goto bad;
 	cnt = m->m_len;
 	m->m_len += sizeof(struct in_addr);
 	cp = mtod(m, u_char *) + sizeof(struct in_addr);
 	bcopy(mtod(m, void *), cp, (unsigned)cnt);
 	bzero(mtod(m, void *), sizeof(struct in_addr));
 
 	for (; cnt > 0; cnt -= optlen, cp += optlen) {
 		opt = cp[IPOPT_OPTVAL];
 		if (opt == IPOPT_EOL)
 			break;
 		if (opt == IPOPT_NOP)
 			optlen = 1;
 		else {
 			if (cnt < IPOPT_OLEN + sizeof(*cp))
 				goto bad;
 			optlen = cp[IPOPT_OLEN];
 			if (optlen < IPOPT_OLEN + sizeof(*cp) || optlen > cnt)
 				goto bad;
 		}
 		switch (opt) {
 
 		default:
 			break;
 
 		case IPOPT_LSRR:
 		case IPOPT_SSRR:
 			/*
 			 * user process specifies route as:
 			 *	->A->B->C->D
 			 * D must be our final destination (but we can't
 			 * check that since we may not have connected yet).
 			 * A is first hop destination, which doesn't appear in
 			 * actual IP option, but is stored before the options.
 			 */
 			if (optlen < IPOPT_MINOFF - 1 + sizeof(struct in_addr))
 				goto bad;
 			m->m_len -= sizeof(struct in_addr);
 			cnt -= sizeof(struct in_addr);
 			optlen -= sizeof(struct in_addr);
 			cp[IPOPT_OLEN] = optlen;
 			/*
 			 * Move first hop before start of options.
 			 */
 			bcopy((caddr_t)&cp[IPOPT_OFFSET+1], mtod(m, caddr_t),
 			    sizeof(struct in_addr));
 			/*
 			 * Then copy rest of options back
 			 * to close up the deleted entry.
 			 */
 			bcopy((&cp[IPOPT_OFFSET+1] + sizeof(struct in_addr)),
 			    &cp[IPOPT_OFFSET+1],
 			    (unsigned)cnt + sizeof(struct in_addr));
 			break;
 		}
 	}
 	if (m->m_len > MAX_IPOPTLEN + sizeof(struct in_addr))
 		goto bad;
 	*pcbopt = m;
 	return (0);
 
 bad:
 	(void)m_free(m);
 	return (EINVAL);
 }
 
 /*
  * XXX
  * The whole multicast option thing needs to be re-thought.
  * Several of these options are equally applicable to non-multicast
  * transmission, and one (IP_MULTICAST_TTL) totally duplicates a
  * standard option (IP_TTL).
  */
 
 /*
  * following RFC1724 section 3.3, 0.0.0.0/8 is interpreted as interface index.
  */
 static struct ifnet *
 ip_multicast_if(a, ifindexp)
 	struct in_addr *a;
 	int *ifindexp;
 {
 	int ifindex;
 	struct ifnet *ifp;
 
 	if (ifindexp)
 		*ifindexp = 0;
 	if (ntohl(a->s_addr) >> 24 == 0) {
 		ifindex = ntohl(a->s_addr) & 0xffffff;
 		if (ifindex < 0 || if_index < ifindex)
 			return NULL;
 		ifp = ifnet_byindex(ifindex);
 		if (ifindexp)
 			*ifindexp = ifindex;
 	} else {
 		INADDR_TO_IFP(*a, ifp);
 	}
 	return ifp;
 }
 
 /*
  * Set the IP multicast options in response to user setsockopt().
  */
 static int
 ip_setmoptions(sopt, imop)
 	struct sockopt *sopt;
 	struct ip_moptions **imop;
 {
 	int error = 0;
 	int i;
 	struct in_addr addr;
 	struct ip_mreq mreq;
 	struct ifnet *ifp;
 	struct ip_moptions *imo = *imop;
 	struct route ro;
 	struct sockaddr_in *dst;
 	int ifindex;
 	int s;
 
 	if (imo == NULL) {
 		/*
 		 * No multicast option buffer attached to the pcb;
 		 * allocate one and initialize to default values.
 		 */
 		imo = (struct ip_moptions*)malloc(sizeof(*imo), M_IPMOPTS,
 		    M_WAITOK);
 
 		if (imo == NULL)
 			return (ENOBUFS);
 		*imop = imo;
 		imo->imo_multicast_ifp = NULL;
 		imo->imo_multicast_addr.s_addr = INADDR_ANY;
 		imo->imo_multicast_vif = -1;
 		imo->imo_multicast_ttl = IP_DEFAULT_MULTICAST_TTL;
 		imo->imo_multicast_loop = IP_DEFAULT_MULTICAST_LOOP;
 		imo->imo_num_memberships = 0;
 	}
 
 	switch (sopt->sopt_name) {
 	/* store an index number for the vif you wanna use in the send */
 	case IP_MULTICAST_VIF:
 		if (legal_vif_num == 0) {
 			error = EOPNOTSUPP;
 			break;
 		}
 		error = sooptcopyin(sopt, &i, sizeof i, sizeof i);
 		if (error)
 			break;
 		if (!legal_vif_num(i) && (i != -1)) {
 			error = EINVAL;
 			break;
 		}
 		imo->imo_multicast_vif = i;
 		break;
 
 	case IP_MULTICAST_IF:
 		/*
 		 * Select the interface for outgoing multicast packets.
 		 */
 		error = sooptcopyin(sopt, &addr, sizeof addr, sizeof addr);
 		if (error)
 			break;
 		/*
 		 * INADDR_ANY is used to remove a previous selection.
 		 * When no interface is selected, a default one is
 		 * chosen every time a multicast packet is sent.
 		 */
 		if (addr.s_addr == INADDR_ANY) {
 			imo->imo_multicast_ifp = NULL;
 			break;
 		}
 		/*
 		 * The selected interface is identified by its local
 		 * IP address.  Find the interface and confirm that
 		 * it supports multicasting.
 		 */
 		s = splimp();
 		ifp = ip_multicast_if(&addr, &ifindex);
 		if (ifp == NULL || (ifp->if_flags & IFF_MULTICAST) == 0) {
 			splx(s);
 			error = EADDRNOTAVAIL;
 			break;
 		}
 		imo->imo_multicast_ifp = ifp;
 		if (ifindex)
 			imo->imo_multicast_addr = addr;
 		else
 			imo->imo_multicast_addr.s_addr = INADDR_ANY;
 		splx(s);
 		break;
 
 	case IP_MULTICAST_TTL:
 		/*
 		 * Set the IP time-to-live for outgoing multicast packets.
 		 * The original multicast API required a char argument,
 		 * which is inconsistent with the rest of the socket API.
 		 * We allow either a char or an int.
 		 */
 		if (sopt->sopt_valsize == 1) {
 			u_char ttl;
 			error = sooptcopyin(sopt, &ttl, 1, 1);
 			if (error)
 				break;
 			imo->imo_multicast_ttl = ttl;
 		} else {
 			u_int ttl;
 			error = sooptcopyin(sopt, &ttl, sizeof ttl, 
 					    sizeof ttl);
 			if (error)
 				break;
 			if (ttl > 255)
 				error = EINVAL;
 			else
 				imo->imo_multicast_ttl = ttl;
 		}
 		break;
 
 	case IP_MULTICAST_LOOP:
 		/*
 		 * Set the loopback flag for outgoing multicast packets.
 		 * Must be zero or one.  The original multicast API required a
 		 * char argument, which is inconsistent with the rest
 		 * of the socket API.  We allow either a char or an int.
 		 */
 		if (sopt->sopt_valsize == 1) {
 			u_char loop;
 			error = sooptcopyin(sopt, &loop, 1, 1);
 			if (error)
 				break;
 			imo->imo_multicast_loop = !!loop;
 		} else {
 			u_int loop;
 			error = sooptcopyin(sopt, &loop, sizeof loop,
 					    sizeof loop);
 			if (error)
 				break;
 			imo->imo_multicast_loop = !!loop;
 		}
 		break;
 
 	case IP_ADD_MEMBERSHIP:
 		/*
 		 * Add a multicast group membership.
 		 * Group must be a valid IP multicast address.
 		 */
 		error = sooptcopyin(sopt, &mreq, sizeof mreq, sizeof mreq);
 		if (error)
 			break;
 
 		if (!IN_MULTICAST(ntohl(mreq.imr_multiaddr.s_addr))) {
 			error = EINVAL;
 			break;
 		}
 		s = splimp();
 		/*
 		 * If no interface address was provided, use the interface of
 		 * the route to the given multicast address.
 		 */
 		if (mreq.imr_interface.s_addr == INADDR_ANY) {
 			bzero((caddr_t)&ro, sizeof(ro));
 			dst = (struct sockaddr_in *)&ro.ro_dst;
 			dst->sin_len = sizeof(*dst);
 			dst->sin_family = AF_INET;
 			dst->sin_addr = mreq.imr_multiaddr;
 			rtalloc(&ro);
 			if (ro.ro_rt == NULL) {
 				error = EADDRNOTAVAIL;
 				splx(s);
 				break;
 			}
 			ifp = ro.ro_rt->rt_ifp;
-			rtfree(ro.ro_rt);
+			RTFREE(ro.ro_rt);
 		}
 		else {
 			ifp = ip_multicast_if(&mreq.imr_interface, NULL);
 		}
 
 		/*
 		 * See if we found an interface, and confirm that it
 		 * supports multicast.
 		 */
 		if (ifp == NULL || (ifp->if_flags & IFF_MULTICAST) == 0) {
 			error = EADDRNOTAVAIL;
 			splx(s);
 			break;
 		}
 		/*
 		 * See if the membership already exists or if all the
 		 * membership slots are full.
 		 */
 		for (i = 0; i < imo->imo_num_memberships; ++i) {
 			if (imo->imo_membership[i]->inm_ifp == ifp &&
 			    imo->imo_membership[i]->inm_addr.s_addr
 						== mreq.imr_multiaddr.s_addr)
 				break;
 		}
 		if (i < imo->imo_num_memberships) {
 			error = EADDRINUSE;
 			splx(s);
 			break;
 		}
 		if (i == IP_MAX_MEMBERSHIPS) {
 			error = ETOOMANYREFS;
 			splx(s);
 			break;
 		}
 		/*
 		 * Everything looks good; add a new record to the multicast
 		 * address list for the given interface.
 		 */
 		if ((imo->imo_membership[i] =
 		    in_addmulti(&mreq.imr_multiaddr, ifp)) == NULL) {
 			error = ENOBUFS;
 			splx(s);
 			break;
 		}
 		++imo->imo_num_memberships;
 		splx(s);
 		break;
 
 	case IP_DROP_MEMBERSHIP:
 		/*
 		 * Drop a multicast group membership.
 		 * Group must be a valid IP multicast address.
 		 */
 		error = sooptcopyin(sopt, &mreq, sizeof mreq, sizeof mreq);
 		if (error)
 			break;
 
 		if (!IN_MULTICAST(ntohl(mreq.imr_multiaddr.s_addr))) {
 			error = EINVAL;
 			break;
 		}
 
 		s = splimp();
 		/*
 		 * If an interface address was specified, get a pointer
 		 * to its ifnet structure.
 		 */
 		if (mreq.imr_interface.s_addr == INADDR_ANY)
 			ifp = NULL;
 		else {
 			ifp = ip_multicast_if(&mreq.imr_interface, NULL);
 			if (ifp == NULL) {
 				error = EADDRNOTAVAIL;
 				splx(s);
 				break;
 			}
 		}
 		/*
 		 * Find the membership in the membership array.
 		 */
 		for (i = 0; i < imo->imo_num_memberships; ++i) {
 			if ((ifp == NULL ||
 			     imo->imo_membership[i]->inm_ifp == ifp) &&
 			     imo->imo_membership[i]->inm_addr.s_addr ==
 			     mreq.imr_multiaddr.s_addr)
 				break;
 		}
 		if (i == imo->imo_num_memberships) {
 			error = EADDRNOTAVAIL;
 			splx(s);
 			break;
 		}
 		/*
 		 * Give up the multicast address record to which the
 		 * membership points.
 		 */
 		in_delmulti(imo->imo_membership[i]);
 		/*
 		 * Remove the gap in the membership array.
 		 */
 		for (++i; i < imo->imo_num_memberships; ++i)
 			imo->imo_membership[i-1] = imo->imo_membership[i];
 		--imo->imo_num_memberships;
 		splx(s);
 		break;
 
 	default:
 		error = EOPNOTSUPP;
 		break;
 	}
 
 	/*
 	 * If all options have default values, no need to keep the mbuf.
 	 */
 	if (imo->imo_multicast_ifp == NULL &&
 	    imo->imo_multicast_vif == -1 &&
 	    imo->imo_multicast_ttl == IP_DEFAULT_MULTICAST_TTL &&
 	    imo->imo_multicast_loop == IP_DEFAULT_MULTICAST_LOOP &&
 	    imo->imo_num_memberships == 0) {
 		free(*imop, M_IPMOPTS);
 		*imop = NULL;
 	}
 
 	return (error);
 }
 
 /*
  * Return the IP multicast options in response to user getsockopt().
  */
 static int
 ip_getmoptions(sopt, imo)
 	struct sockopt *sopt;
 	register struct ip_moptions *imo;
 {
 	struct in_addr addr;
 	struct in_ifaddr *ia;
 	int error, optval;
 	u_char coptval;
 
 	error = 0;
 	switch (sopt->sopt_name) {
 	case IP_MULTICAST_VIF: 
 		if (imo != NULL)
 			optval = imo->imo_multicast_vif;
 		else
 			optval = -1;
 		error = sooptcopyout(sopt, &optval, sizeof optval);
 		break;
 
 	case IP_MULTICAST_IF:
 		if (imo == NULL || imo->imo_multicast_ifp == NULL)
 			addr.s_addr = INADDR_ANY;
 		else if (imo->imo_multicast_addr.s_addr) {
 			/* return the value user has set */
 			addr = imo->imo_multicast_addr;
 		} else {
 			IFP_TO_IA(imo->imo_multicast_ifp, ia);
 			addr.s_addr = (ia == NULL) ? INADDR_ANY
 				: IA_SIN(ia)->sin_addr.s_addr;
 		}
 		error = sooptcopyout(sopt, &addr, sizeof addr);
 		break;
 
 	case IP_MULTICAST_TTL:
 		if (imo == 0)
 			optval = coptval = IP_DEFAULT_MULTICAST_TTL;
 		else
 			optval = coptval = imo->imo_multicast_ttl;
 		if (sopt->sopt_valsize == 1)
 			error = sooptcopyout(sopt, &coptval, 1);
 		else
 			error = sooptcopyout(sopt, &optval, sizeof optval);
 		break;
 
 	case IP_MULTICAST_LOOP:
 		if (imo == 0)
 			optval = coptval = IP_DEFAULT_MULTICAST_LOOP;
 		else
 			optval = coptval = imo->imo_multicast_loop;
 		if (sopt->sopt_valsize == 1)
 			error = sooptcopyout(sopt, &coptval, 1);
 		else
 			error = sooptcopyout(sopt, &optval, sizeof optval);
 		break;
 
 	default:
 		error = ENOPROTOOPT;
 		break;
 	}
 	return (error);
 }
 
 /*
  * Discard the IP multicast options.
  */
 void
 ip_freemoptions(imo)
 	register struct ip_moptions *imo;
 {
 	register int i;
 
 	if (imo != NULL) {
 		for (i = 0; i < imo->imo_num_memberships; ++i)
 			in_delmulti(imo->imo_membership[i]);
 		free(imo, M_IPMOPTS);
 	}
 }
 
 /*
  * Routine called from ip_output() to loop back a copy of an IP multicast
  * packet to the input queue of a specified interface.  Note that this
  * calls the output routine of the loopback "driver", but with an interface
  * pointer that might NOT be a loopback interface -- evil, but easier than
  * replicating that code here.
  */
 static void
 ip_mloopback(ifp, m, dst, hlen)
 	struct ifnet *ifp;
 	register struct mbuf *m;
 	register struct sockaddr_in *dst;
 	int hlen;
 {
 	register struct ip *ip;
 	struct mbuf *copym;
 
 	copym = m_copy(m, 0, M_COPYALL);
 	if (copym != NULL && (copym->m_flags & M_EXT || copym->m_len < hlen))
 		copym = m_pullup(copym, hlen);
 	if (copym != NULL) {
 		/*
 		 * We don't bother to fragment if the IP length is greater
 		 * than the interface's MTU.  Can this possibly matter?
 		 */
 		ip = mtod(copym, struct ip *);
 		ip->ip_len = htons(ip->ip_len);
 		ip->ip_off = htons(ip->ip_off);
 		ip->ip_sum = 0;
 		ip->ip_sum = in_cksum(copym, hlen);
 		/*
 		 * NB:
 		 * It's not clear whether there are any lingering
 		 * reentrancy problems in other areas which might
 		 * be exposed by using ip_input directly (in
 		 * particular, everything which modifies the packet
 		 * in-place).  Yet another option is using the
 		 * protosw directly to deliver the looped back
 		 * packet.  For the moment, we'll err on the side
 		 * of safety by using if_simloop().
 		 */
 #if 1 /* XXX */
 		if (dst->sin_family != AF_INET) {
 			printf("ip_mloopback: bad address family %d\n",
 						dst->sin_family);
 			dst->sin_family = AF_INET;
 		}
 #endif
 
 #ifdef notdef
 		copym->m_pkthdr.rcvif = ifp;
 		ip_input(copym);
 #else
 		/* if the checksum hasn't been computed, mark it as valid */
 		if (copym->m_pkthdr.csum_flags & CSUM_DELAY_DATA) {
 			copym->m_pkthdr.csum_flags |=
 			    CSUM_DATA_VALID | CSUM_PSEUDO_HDR;
 			copym->m_pkthdr.csum_data = 0xffff;
 		}
 		if_simloop(ifp, copym, dst->sin_family, 0);
 #endif
 	}
 }
Index: head/sys/netinet6/icmp6.c
===================================================================
--- head/sys/netinet6/icmp6.c	(revision 120726)
+++ head/sys/netinet6/icmp6.c	(revision 120727)
@@ -1,2894 +1,2892 @@
 /*	$FreeBSD$	*/
 /*	$KAME: icmp6.c,v 1.211 2001/04/04 05:56:20 itojun Exp $	*/
 
 /*
  * Copyright (C) 1995, 1996, 1997, and 1998 WIDE Project.
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 3. Neither the name of the project nor the names of its contributors
  *    may be used to endorse or promote products derived from this software
  *    without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE PROJECT AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE PROJECT OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  */
 
 /*
  * Copyright (c) 1982, 1986, 1988, 1993
  *	The Regents of the University of California.  All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 3. All advertising materials mentioning features or use of this software
  *    must display the following acknowledgement:
  *	This product includes software developed by the University of
  *	California, Berkeley and its contributors.
  * 4. Neither the name of the University nor the names of its contributors
  *    may be used to endorse or promote products derived from this software
  *    without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  *	@(#)ip_icmp.c	8.2 (Berkeley) 1/4/94
  */
 
 #include "opt_inet.h"
 #include "opt_inet6.h"
 #include "opt_ipsec.h"
 
 #include <sys/param.h>
 #include <sys/domain.h>
 #include <sys/kernel.h>
 #include <sys/lock.h>
 #include <sys/malloc.h>
 #include <sys/mbuf.h>
 #include <sys/protosw.h>
 #include <sys/signalvar.h>
 #include <sys/socket.h>
 #include <sys/socketvar.h>
 #include <sys/sx.h>
 #include <sys/syslog.h>
 #include <sys/systm.h>
 #include <sys/time.h>
 
 #include <net/if.h>
 #include <net/if_dl.h>
 #include <net/if_types.h>
 #include <net/route.h>
 
 #include <netinet/in.h>
 #include <netinet/in_pcb.h>
 #include <netinet/in_var.h>
 #include <netinet/ip6.h>
 #include <netinet/icmp6.h>
 #include <netinet6/in6_ifattach.h>
 #include <netinet6/in6_pcb.h>
 #include <netinet6/ip6protosw.h>
 #include <netinet6/ip6_var.h>
 #include <netinet6/mld6_var.h>
 #include <netinet6/nd6.h>
 
 #ifdef IPSEC
 #include <netinet6/ipsec.h>
 #include <netkey/key.h>
 #endif
 
 #ifdef FAST_IPSEC
 #include <netipsec/ipsec.h>
 #include <netipsec/key.h>
 #define	IPSEC
 #endif
 
 #include <net/net_osdep.h>
 
 #ifdef HAVE_NRL_INPCB
 /* inpcb members */
 #define in6pcb		inpcb
 #define in6p_laddr	inp_laddr6
 #define in6p_faddr	inp_faddr6
 #define in6p_icmp6filt	inp_icmp6filt
 #define in6p_route	inp_route
 #define in6p_socket	inp_socket
 #define in6p_flags	inp_flags
 #define in6p_moptions	inp_moptions6
 #define in6p_outputopts	inp_outputopts6
 #define in6p_ip6	inp_ipv6
 #define in6p_flowinfo	inp_flowinfo
 #define in6p_sp		inp_sp
 #define in6p_next	inp_next
 #define in6p_prev	inp_prev
 /* macro names */
 #define sotoin6pcb	sotoinpcb
 /* function names */
 #define in6_pcbdetach	in_pcbdetach
 #define in6_rtchange	in_rtchange
 
 /*
  * for KAME src sync over BSD*'s. XXX: FreeBSD (>=3) are VERY different from
  * others...
  */
 #define in6p_ip6_nxt	inp_ipv6.ip6_nxt
 #endif
 
 extern struct domain inet6domain;
 
 struct icmp6stat icmp6stat;
 
 extern struct inpcbhead ripcb;
 extern int icmp6errppslim;
 static int icmp6errpps_count = 0;
 static struct timeval icmp6errppslim_last;
 extern int icmp6_nodeinfo;
 
 static void icmp6_errcount __P((struct icmp6errstat *, int, int));
 static int icmp6_rip6_input __P((struct mbuf **, int));
 static int icmp6_ratelimit __P((const struct in6_addr *, const int, const int));
 static const char *icmp6_redirect_diag __P((struct in6_addr *,
 	struct in6_addr *, struct in6_addr *));
 #define	HAVE_PPSRATECHECK
 #ifndef HAVE_PPSRATECHECK
 static int ppsratecheck __P((struct timeval *, int *, int));
 #endif
 static struct mbuf *ni6_input __P((struct mbuf *, int));
 static struct mbuf *ni6_nametodns __P((const char *, int, int));
 static int ni6_dnsmatch __P((const char *, int, const char *, int));
 static int ni6_addrs __P((struct icmp6_nodeinfo *, struct mbuf *,
 			  struct ifnet **, char *));
 static int ni6_store_addrs __P((struct icmp6_nodeinfo *, struct icmp6_nodeinfo *,
 				struct ifnet *, int));
 static int icmp6_notify_error __P((struct mbuf *, int, int, int));
 
 #ifdef COMPAT_RFC1885
 static struct route_in6 icmp6_reflect_rt;
 #endif
 
 
 void
 icmp6_init()
 {
 	mld6_init();
 }
 
 static void
 icmp6_errcount(stat, type, code)
 	struct icmp6errstat *stat;
 	int type, code;
 {
 	switch (type) {
 	case ICMP6_DST_UNREACH:
 		switch (code) {
 		case ICMP6_DST_UNREACH_NOROUTE:
 			stat->icp6errs_dst_unreach_noroute++;
 			return;
 		case ICMP6_DST_UNREACH_ADMIN:
 			stat->icp6errs_dst_unreach_admin++;
 			return;
 		case ICMP6_DST_UNREACH_BEYONDSCOPE:
 			stat->icp6errs_dst_unreach_beyondscope++;
 			return;
 		case ICMP6_DST_UNREACH_ADDR:
 			stat->icp6errs_dst_unreach_addr++;
 			return;
 		case ICMP6_DST_UNREACH_NOPORT:
 			stat->icp6errs_dst_unreach_noport++;
 			return;
 		}
 		break;
 	case ICMP6_PACKET_TOO_BIG:
 		stat->icp6errs_packet_too_big++;
 		return;
 	case ICMP6_TIME_EXCEEDED:
 		switch (code) {
 		case ICMP6_TIME_EXCEED_TRANSIT:
 			stat->icp6errs_time_exceed_transit++;
 			return;
 		case ICMP6_TIME_EXCEED_REASSEMBLY:
 			stat->icp6errs_time_exceed_reassembly++;
 			return;
 		}
 		break;
 	case ICMP6_PARAM_PROB:
 		switch (code) {
 		case ICMP6_PARAMPROB_HEADER:
 			stat->icp6errs_paramprob_header++;
 			return;
 		case ICMP6_PARAMPROB_NEXTHEADER:
 			stat->icp6errs_paramprob_nextheader++;
 			return;
 		case ICMP6_PARAMPROB_OPTION:
 			stat->icp6errs_paramprob_option++;
 			return;
 		}
 		break;
 	case ND_REDIRECT:
 		stat->icp6errs_redirect++;
 		return;
 	}
 	stat->icp6errs_unknown++;
 }
 
 /*
  * Generate an error packet of type error in response to bad IP6 packet.
  */
 void
 icmp6_error(m, type, code, param)
 	struct mbuf *m;
 	int type, code, param;
 {
 	struct ip6_hdr *oip6, *nip6;
 	struct icmp6_hdr *icmp6;
 	u_int preplen;
 	int off;
 	int nxt;
 
 	icmp6stat.icp6s_error++;
 
 	/* count per-type-code statistics */
 	icmp6_errcount(&icmp6stat.icp6s_outerrhist, type, code);
 
 #ifdef M_DECRYPTED	/*not openbsd*/
 	if (m->m_flags & M_DECRYPTED) {
 		icmp6stat.icp6s_canterror++;
 		goto freeit;
 	}
 #endif
 
 #ifndef PULLDOWN_TEST
 	IP6_EXTHDR_CHECK(m, 0, sizeof(struct ip6_hdr), );
 #else
 	if (m->m_len < sizeof(struct ip6_hdr)) {
 		m = m_pullup(m, sizeof(struct ip6_hdr));
 		if (m == NULL)
 			return;
 	}
 #endif
 	oip6 = mtod(m, struct ip6_hdr *);
 
 	/*
 	 * Multicast destination check. For unrecognized option errors,
 	 * this check has already done in ip6_unknown_opt(), so we can
 	 * check only for other errors.
 	 */
 	if ((m->m_flags & (M_BCAST|M_MCAST) ||
 	     IN6_IS_ADDR_MULTICAST(&oip6->ip6_dst)) &&
 	    (type != ICMP6_PACKET_TOO_BIG &&
 	     (type != ICMP6_PARAM_PROB ||
 	      code != ICMP6_PARAMPROB_OPTION)))
 		goto freeit;
 
 	/* Source address check. XXX: the case of anycast source? */
 	if (IN6_IS_ADDR_UNSPECIFIED(&oip6->ip6_src) ||
 	    IN6_IS_ADDR_MULTICAST(&oip6->ip6_src))
 		goto freeit;
 
 	/*
 	 * If we are about to send ICMPv6 against ICMPv6 error/redirect,
 	 * don't do it.
 	 */
 	nxt = -1;
 	off = ip6_lasthdr(m, 0, IPPROTO_IPV6, &nxt);
 	if (off >= 0 && nxt == IPPROTO_ICMPV6) {
 		struct icmp6_hdr *icp;
 
 #ifndef PULLDOWN_TEST
 		IP6_EXTHDR_CHECK(m, 0, off + sizeof(struct icmp6_hdr), );
 		icp = (struct icmp6_hdr *)(mtod(m, caddr_t) + off);
 #else
 		IP6_EXTHDR_GET(icp, struct icmp6_hdr *, m, off,
 			sizeof(*icp));
 		if (icp == NULL) {
 			icmp6stat.icp6s_tooshort++;
 			return;
 		}
 #endif
 		if (icp->icmp6_type < ICMP6_ECHO_REQUEST ||
 		    icp->icmp6_type == ND_REDIRECT) {
 			/*
 			 * ICMPv6 error
 			 * Special case: for redirect (which is
 			 * informational) we must not send icmp6 error.
 			 */
 			icmp6stat.icp6s_canterror++;
 			goto freeit;
 		} else {
 			/* ICMPv6 informational - send the error */
 		}
 	} else {
 		/* non-ICMPv6 - send the error */
 	}
 
 	oip6 = mtod(m, struct ip6_hdr *); /* adjust pointer */
 
 	/* Finally, do rate limitation check. */
 	if (icmp6_ratelimit(&oip6->ip6_src, type, code)) {
 		icmp6stat.icp6s_toofreq++;
 		goto freeit;
 	}
 
 	/*
 	 * OK, ICMP6 can be generated.
 	 */
 
 	if (m->m_pkthdr.len >= ICMPV6_PLD_MAXLEN)
 		m_adj(m, ICMPV6_PLD_MAXLEN - m->m_pkthdr.len);
 
 	preplen = sizeof(struct ip6_hdr) + sizeof(struct icmp6_hdr);
 	M_PREPEND(m, preplen, M_DONTWAIT);
 	if (m && m->m_len < preplen)
 		m = m_pullup(m, preplen);
 	if (m == NULL) {
 		nd6log((LOG_DEBUG, "ENOBUFS in icmp6_error %d\n", __LINE__));
 		return;
 	}
 
 	nip6 = mtod(m, struct ip6_hdr *);
 	nip6->ip6_src  = oip6->ip6_src;
 	nip6->ip6_dst  = oip6->ip6_dst;
 
 	if (IN6_IS_SCOPE_LINKLOCAL(&oip6->ip6_src))
 		oip6->ip6_src.s6_addr16[1] = 0;
 	if (IN6_IS_SCOPE_LINKLOCAL(&oip6->ip6_dst))
 		oip6->ip6_dst.s6_addr16[1] = 0;
 
 	icmp6 = (struct icmp6_hdr *)(nip6 + 1);
 	icmp6->icmp6_type = type;
 	icmp6->icmp6_code = code;
 	icmp6->icmp6_pptr = htonl((u_int32_t)param);
 
 	/*
 	 * icmp6_reflect() is designed to be in the input path.
 	 * icmp6_error() can be called from both input and outut path,
 	 * and if we are in output path rcvif could contain bogus value.
 	 * clear m->m_pkthdr.rcvif for safety, we should have enough scope
 	 * information in ip header (nip6).
 	 */
 	m->m_pkthdr.rcvif = NULL;
 
 	icmp6stat.icp6s_outhist[type]++;
 	icmp6_reflect(m, sizeof(struct ip6_hdr)); /* header order: IPv6 - ICMPv6 */
 
 	return;
 
   freeit:
 	/*
 	 * If we can't tell wheter or not we can generate ICMP6, free it.
 	 */
 	m_freem(m);
 }
 
 /*
  * Process a received ICMP6 message.
  */
 int
 icmp6_input(mp, offp, proto)
 	struct mbuf **mp;
 	int *offp, proto;
 {
 	struct mbuf *m = *mp, *n;
 	struct ip6_hdr *ip6, *nip6;
 	struct icmp6_hdr *icmp6, *nicmp6;
 	int off = *offp;
 	int icmp6len = m->m_pkthdr.len - *offp;
 	int code, sum, noff;
 
 #ifndef PULLDOWN_TEST
 	IP6_EXTHDR_CHECK(m, off, sizeof(struct icmp6_hdr), IPPROTO_DONE);
 	/* m might change if M_LOOP.  So, call mtod after this */
 #endif
 
 	/*
 	 * Locate icmp6 structure in mbuf, and check
 	 * that not corrupted and of at least minimum length
 	 */
 
 	ip6 = mtod(m, struct ip6_hdr *);
 	if (icmp6len < sizeof(struct icmp6_hdr)) {
 		icmp6stat.icp6s_tooshort++;
 		goto freeit;
 	}
 
 	/*
 	 * calculate the checksum
 	 */
 #ifndef PULLDOWN_TEST
 	icmp6 = (struct icmp6_hdr *)((caddr_t)ip6 + off);
 #else
 	IP6_EXTHDR_GET(icmp6, struct icmp6_hdr *, m, off, sizeof(*icmp6));
 	if (icmp6 == NULL) {
 		icmp6stat.icp6s_tooshort++;
 		return IPPROTO_DONE;
 	}
 #endif
 	code = icmp6->icmp6_code;
 
 	if ((sum = in6_cksum(m, IPPROTO_ICMPV6, off, icmp6len)) != 0) {
 		nd6log((LOG_ERR,
 		    "ICMP6 checksum error(%d|%x) %s\n",
 		    icmp6->icmp6_type, sum, ip6_sprintf(&ip6->ip6_src)));
 		icmp6stat.icp6s_checksum++;
 		goto freeit;
 	}
 
 	if (faithprefix_p != NULL && (*faithprefix_p)(&ip6->ip6_dst)) {
 		/*
 		 * Deliver very specific ICMP6 type only.
 		 * This is important to deilver TOOBIG.  Otherwise PMTUD
 		 * will not work.
 		 */
 		switch (icmp6->icmp6_type) {
 		case ICMP6_DST_UNREACH:
 		case ICMP6_PACKET_TOO_BIG:
 		case ICMP6_TIME_EXCEEDED:
 			break;
 		default:
 			goto freeit;
 		}
 	}
 
 	icmp6stat.icp6s_inhist[icmp6->icmp6_type]++;
 	icmp6_ifstat_inc(m->m_pkthdr.rcvif, ifs6_in_msg);
 	if (icmp6->icmp6_type < ICMP6_INFOMSG_MASK)
 		icmp6_ifstat_inc(m->m_pkthdr.rcvif, ifs6_in_error);
 
 	switch (icmp6->icmp6_type) {
 	case ICMP6_DST_UNREACH:
 		icmp6_ifstat_inc(m->m_pkthdr.rcvif, ifs6_in_dstunreach);
 		switch (code) {
 		case ICMP6_DST_UNREACH_NOROUTE:
 			code = PRC_UNREACH_NET;
 			break;
 		case ICMP6_DST_UNREACH_ADMIN:
 			icmp6_ifstat_inc(m->m_pkthdr.rcvif, ifs6_in_adminprohib);
 			code = PRC_UNREACH_PROTOCOL; /* is this a good code? */
 			break;
 		case ICMP6_DST_UNREACH_ADDR:
 			code = PRC_HOSTDEAD;
 			break;
 #ifdef COMPAT_RFC1885
 		case ICMP6_DST_UNREACH_NOTNEIGHBOR:
 			code = PRC_UNREACH_SRCFAIL;
 			break;
 #else
 		case ICMP6_DST_UNREACH_BEYONDSCOPE:
 			/* I mean "source address was incorrect." */
 			code = PRC_PARAMPROB;
 			break;
 #endif
 		case ICMP6_DST_UNREACH_NOPORT:
 			code = PRC_UNREACH_PORT;
 			break;
 		default:
 			goto badcode;
 		}
 		goto deliver;
 		break;
 
 	case ICMP6_PACKET_TOO_BIG:
 		icmp6_ifstat_inc(m->m_pkthdr.rcvif, ifs6_in_pkttoobig);
 		if (code != 0)
 			goto badcode;
 
 		code = PRC_MSGSIZE;
 
 		/*
 		 * Updating the path MTU will be done after examining
 		 * intermediate extension headers.
 		 */
 		goto deliver;
 		break;
 
 	case ICMP6_TIME_EXCEEDED:
 		icmp6_ifstat_inc(m->m_pkthdr.rcvif, ifs6_in_timeexceed);
 		switch (code) {
 		case ICMP6_TIME_EXCEED_TRANSIT:
 		case ICMP6_TIME_EXCEED_REASSEMBLY:
 			code += PRC_TIMXCEED_INTRANS;
 			break;
 		default:
 			goto badcode;
 		}
 		goto deliver;
 		break;
 
 	case ICMP6_PARAM_PROB:
 		icmp6_ifstat_inc(m->m_pkthdr.rcvif, ifs6_in_paramprob);
 		switch (code) {
 		case ICMP6_PARAMPROB_NEXTHEADER:
 			code = PRC_UNREACH_PROTOCOL;
 			break;
 		case ICMP6_PARAMPROB_HEADER:
 		case ICMP6_PARAMPROB_OPTION:
 			code = PRC_PARAMPROB;
 			break;
 		default:
 			goto badcode;
 		}
 		goto deliver;
 		break;
 
 	case ICMP6_ECHO_REQUEST:
 		icmp6_ifstat_inc(m->m_pkthdr.rcvif, ifs6_in_echo);
 		if (code != 0)
 			goto badcode;
 		if ((n = m_copy(m, 0, M_COPYALL)) == NULL) {
 			/* Give up remote */
 			break;
 		}
 		if ((n->m_flags & M_EXT) != 0
 		 || n->m_len < off + sizeof(struct icmp6_hdr)) {
 			struct mbuf *n0 = n;
 			const int maxlen = sizeof(*nip6) + sizeof(*nicmp6);
 			int n0len;
 
 			/*
 			 * Prepare an internal mbuf. m_pullup() doesn't
 			 * always copy the length we specified.
 			 */
 			if (maxlen >= MCLBYTES) {
 				/* Give up remote */
 				m_freem(n0);
 				break;
 			}
 			MGETHDR(n, M_DONTWAIT, n0->m_type);
 			n0len = n0->m_pkthdr.len;	/* save for use below */
 			if (n)
 				M_MOVE_PKTHDR(n, n0);
 			if (n && maxlen >= MHLEN) {
 				MCLGET(n, M_DONTWAIT);
 				if ((n->m_flags & M_EXT) == 0) {
 					m_free(n);
 					n = NULL;
 				}
 			}
 			if (n == NULL) {
 				/* Give up remote */
 				m_freem(n0);
 				break;
 			}
 			/*
 			 * Copy IPv6 and ICMPv6 only.
 			 */
 			nip6 = mtod(n, struct ip6_hdr *);
 			bcopy(ip6, nip6, sizeof(struct ip6_hdr));
 			nicmp6 = (struct icmp6_hdr *)(nip6 + 1);
 			bcopy(icmp6, nicmp6, sizeof(struct icmp6_hdr));
 			noff = sizeof(struct ip6_hdr);
 			/* new mbuf contains only ipv6+icmpv6 headers */
 			n->m_len = noff + sizeof(struct icmp6_hdr);
 			/*
 			 * Adjust mbuf. ip6_plen will be adjusted in
 			 * ip6_output().
 			 */
 			m_adj(n0, off + sizeof(struct icmp6_hdr));
 			/* recalculate complete packet size */
 			n->m_pkthdr.len = n0len + (noff - off);
 			n->m_next = n0;
 		} else {
 			nip6 = mtod(n, struct ip6_hdr *);
 			nicmp6 = (struct icmp6_hdr *)((caddr_t)nip6 + off);
 			noff = off;
 		}
 		nicmp6->icmp6_type = ICMP6_ECHO_REPLY;
 		nicmp6->icmp6_code = 0;
 		if (n) {
 			icmp6stat.icp6s_reflect++;
 			icmp6stat.icp6s_outhist[ICMP6_ECHO_REPLY]++;
 			icmp6_reflect(n, noff);
 		}
 		break;
 
 	case ICMP6_ECHO_REPLY:
 		icmp6_ifstat_inc(m->m_pkthdr.rcvif, ifs6_in_echoreply);
 		if (code != 0)
 			goto badcode;
 		break;
 
 	case MLD_LISTENER_QUERY:
 	case MLD_LISTENER_REPORT:
 		if (icmp6len < sizeof(struct mld_hdr))
 			goto badlen;
 		if (icmp6->icmp6_type == MLD_LISTENER_QUERY) /* XXX: ugly... */
 			icmp6_ifstat_inc(m->m_pkthdr.rcvif, ifs6_in_mldquery);
 		else
 			icmp6_ifstat_inc(m->m_pkthdr.rcvif, ifs6_in_mldreport);
 		if ((n = m_copym(m, 0, M_COPYALL, M_DONTWAIT)) == NULL) {
 			/* give up local */
 			mld6_input(m, off);
 			m = NULL;
 			goto freeit;
 		}
 		mld6_input(n, off);
 		/* m stays. */
 		break;
 
 	case MLD_LISTENER_DONE:
 		icmp6_ifstat_inc(m->m_pkthdr.rcvif, ifs6_in_mlddone);
 		if (icmp6len < sizeof(struct mld_hdr))	/* necessary? */
 			goto badlen;
 		break;		/* nothing to be done in kernel */
 
 	case MLD_MTRACE_RESP:
 	case MLD_MTRACE:
 		/* XXX: these two are experimental.  not officially defind. */
 		/* XXX: per-interface statistics? */
 		break;		/* just pass it to applications */
 
 	case ICMP6_WRUREQUEST:	/* ICMP6_FQDN_QUERY */
 	    {
 		enum { WRU, FQDN } mode;
 
 		if (!icmp6_nodeinfo)
 			break;
 
 		if (icmp6len == sizeof(struct icmp6_hdr) + 4)
 			mode = WRU;
 		else if (icmp6len >= sizeof(struct icmp6_nodeinfo))
 			mode = FQDN;
 		else
 			goto badlen;
 
 #define hostnamelen	strlen(hostname)
 		if (mode == FQDN) {
 #ifndef PULLDOWN_TEST
 			IP6_EXTHDR_CHECK(m, off, sizeof(struct icmp6_nodeinfo),
 					 IPPROTO_DONE);
 #endif
 			n = m_copy(m, 0, M_COPYALL);
 			if (n)
 				n = ni6_input(n, off);
 			/* XXX meaningless if n == NULL */
 			noff = sizeof(struct ip6_hdr);
 		} else {
 			u_char *p;
 			int maxlen, maxhlen;
 
 			if ((icmp6_nodeinfo & 5) != 5) 
 				break;
 
 			if (code != 0)
 				goto badcode;
 			maxlen = sizeof(*nip6) + sizeof(*nicmp6) + 4;
 			if (maxlen >= MCLBYTES) {
 				/* Give up remote */
 				break;
 			}
 			MGETHDR(n, M_DONTWAIT, m->m_type);
 			if (n && maxlen > MHLEN) {
 				MCLGET(n, M_DONTWAIT);
 				if ((n->m_flags & M_EXT) == 0) {
 					m_free(n);
 					n = NULL;
 				}
 			}
 			if (!m_dup_pkthdr(n, m, M_DONTWAIT)) {
 				/*
 				 * Previous code did a blind M_COPY_PKTHDR
 				 * and said "just for rcvif".  If true, then
 				 * we could tolerate the dup failing (due to
 				 * the deep copy of the tag chain).  For now
 				 * be conservative and just fail.
 				 */
 				m_free(n);
 				n = NULL;
 			}
 			if (n == NULL) {
 				/* Give up remote */
 				break;
 			}
 			n->m_pkthdr.rcvif = NULL;
 			n->m_len = 0;
 			maxhlen = M_TRAILINGSPACE(n) - maxlen;
 			if (maxhlen > hostnamelen)
 				maxhlen = hostnamelen;
 			/*
 			 * Copy IPv6 and ICMPv6 only.
 			 */
 			nip6 = mtod(n, struct ip6_hdr *);
 			bcopy(ip6, nip6, sizeof(struct ip6_hdr));
 			nicmp6 = (struct icmp6_hdr *)(nip6 + 1);
 			bcopy(icmp6, nicmp6, sizeof(struct icmp6_hdr));
 			p = (u_char *)(nicmp6 + 1);
 			bzero(p, 4);
 			bcopy(hostname, p + 4, maxhlen); /* meaningless TTL */
 			noff = sizeof(struct ip6_hdr);
 			n->m_pkthdr.len = n->m_len = sizeof(struct ip6_hdr) +
 				sizeof(struct icmp6_hdr) + 4 + maxhlen;
 			nicmp6->icmp6_type = ICMP6_WRUREPLY;
 			nicmp6->icmp6_code = 0;
 		}
 #undef hostnamelen
 		if (n) {
 			icmp6stat.icp6s_reflect++;
 			icmp6stat.icp6s_outhist[ICMP6_WRUREPLY]++;
 			icmp6_reflect(n, noff);
 		}
 		break;
 	    }
 
 	case ICMP6_WRUREPLY:
 		if (code != 0)
 			goto badcode;
 		break;
 
 	case ND_ROUTER_SOLICIT:
 		icmp6_ifstat_inc(m->m_pkthdr.rcvif, ifs6_in_routersolicit);
 		if (code != 0)
 			goto badcode;
 		if (icmp6len < sizeof(struct nd_router_solicit))
 			goto badlen;
 		if ((n = m_copym(m, 0, M_COPYALL, M_DONTWAIT)) == NULL) {
 			/* give up local */
 			nd6_rs_input(m, off, icmp6len);
 			m = NULL;
 			goto freeit;
 		}
 		nd6_rs_input(n, off, icmp6len);
 		/* m stays. */
 		break;
 
 	case ND_ROUTER_ADVERT:
 		icmp6_ifstat_inc(m->m_pkthdr.rcvif, ifs6_in_routeradvert);
 		if (code != 0)
 			goto badcode;
 		if (icmp6len < sizeof(struct nd_router_advert))
 			goto badlen;
 		if ((n = m_copym(m, 0, M_COPYALL, M_DONTWAIT)) == NULL) {
 			/* give up local */
 			nd6_ra_input(m, off, icmp6len);
 			m = NULL;
 			goto freeit;
 		}
 		nd6_ra_input(n, off, icmp6len);
 		/* m stays. */
 		break;
 
 	case ND_NEIGHBOR_SOLICIT:
 		icmp6_ifstat_inc(m->m_pkthdr.rcvif, ifs6_in_neighborsolicit);
 		if (code != 0)
 			goto badcode;
 		if (icmp6len < sizeof(struct nd_neighbor_solicit))
 			goto badlen;
 		if ((n = m_copym(m, 0, M_COPYALL, M_DONTWAIT)) == NULL) {
 			/* give up local */
 			nd6_ns_input(m, off, icmp6len);
 			m = NULL;
 			goto freeit;
 		}
 		nd6_ns_input(n, off, icmp6len);
 		/* m stays. */
 		break;
 
 	case ND_NEIGHBOR_ADVERT:
 		icmp6_ifstat_inc(m->m_pkthdr.rcvif, ifs6_in_neighboradvert);
 		if (code != 0)
 			goto badcode;
 		if (icmp6len < sizeof(struct nd_neighbor_advert))
 			goto badlen;
 		if ((n = m_copym(m, 0, M_COPYALL, M_DONTWAIT)) == NULL) {
 			/* give up local */
 			nd6_na_input(m, off, icmp6len);
 			m = NULL;
 			goto freeit;
 		}
 		nd6_na_input(n, off, icmp6len);
 		/* m stays. */
 		break;
 
 	case ND_REDIRECT:
 		icmp6_ifstat_inc(m->m_pkthdr.rcvif, ifs6_in_redirect);
 		if (code != 0)
 			goto badcode;
 		if (icmp6len < sizeof(struct nd_redirect))
 			goto badlen;
 		if ((n = m_copym(m, 0, M_COPYALL, M_DONTWAIT)) == NULL) {
 			/* give up local */
 			icmp6_redirect_input(m, off);
 			m = NULL;
 			goto freeit;
 		}
 		icmp6_redirect_input(n, off);
 		/* m stays. */
 		break;
 
 	case ICMP6_ROUTER_RENUMBERING:
 		if (code != ICMP6_ROUTER_RENUMBERING_COMMAND &&
 		    code != ICMP6_ROUTER_RENUMBERING_RESULT)
 			goto badcode;
 		if (icmp6len < sizeof(struct icmp6_router_renum))
 			goto badlen;
 		break;
 
 	default:
 		nd6log((LOG_DEBUG,
 		    "icmp6_input: unknown type %d(src=%s, dst=%s, ifid=%d)\n",
 		    icmp6->icmp6_type, ip6_sprintf(&ip6->ip6_src),
 		    ip6_sprintf(&ip6->ip6_dst),
 		    m->m_pkthdr.rcvif ? m->m_pkthdr.rcvif->if_index : 0));
 		if (icmp6->icmp6_type < ICMP6_ECHO_REQUEST) {
 			/* ICMPv6 error: MUST deliver it by spec... */
 			code = PRC_NCMDS;
 			/* deliver */
 		} else {
 			/* ICMPv6 informational: MUST not deliver */
 			break;
 		}
 	deliver:
 		if (icmp6_notify_error(m, off, icmp6len, code)) {
 			/* In this case, m should've been freed. */
 			return(IPPROTO_DONE);
 		}
 		break;
 
 	badcode:
 		icmp6stat.icp6s_badcode++;
 		break;
 
 	badlen:
 		icmp6stat.icp6s_badlen++;
 		break;
 	}
 
 	/* deliver the packet to appropriate sockets */
 	icmp6_rip6_input(&m, *offp);
 
 	return IPPROTO_DONE;
 
  freeit:
 	m_freem(m);
 	return IPPROTO_DONE;
 }
 
 static int
 icmp6_notify_error(m, off, icmp6len, code)
 	struct mbuf *m;
 	int off, icmp6len, code;
 {
 	struct icmp6_hdr *icmp6;
 	struct ip6_hdr *eip6;
 	u_int32_t notifymtu;
 	struct sockaddr_in6 icmp6src, icmp6dst;
 
 	if (icmp6len < sizeof(struct icmp6_hdr) + sizeof(struct ip6_hdr)) {
 		icmp6stat.icp6s_tooshort++;
 		goto freeit;
 	}
 #ifndef PULLDOWN_TEST
 	IP6_EXTHDR_CHECK(m, off,
 			 sizeof(struct icmp6_hdr) + sizeof(struct ip6_hdr),
 			 -1);
 	icmp6 = (struct icmp6_hdr *)(mtod(m, caddr_t) + off);
 #else
 	IP6_EXTHDR_GET(icmp6, struct icmp6_hdr *, m, off,
 		       sizeof(*icmp6) + sizeof(struct ip6_hdr));
 	if (icmp6 == NULL) {
 		icmp6stat.icp6s_tooshort++;
 		return(-1);
 	}
 #endif
 	eip6 = (struct ip6_hdr *)(icmp6 + 1);
 
 	/* Detect the upper level protocol */
 	{
 		void (*ctlfunc) __P((int, struct sockaddr *, void *));
 		u_int8_t nxt = eip6->ip6_nxt;
 		int eoff = off + sizeof(struct icmp6_hdr) +
 			sizeof(struct ip6_hdr);
 		struct ip6ctlparam ip6cp;
 		struct in6_addr *finaldst = NULL;
 		int icmp6type = icmp6->icmp6_type;
 		struct ip6_frag *fh;
 		struct ip6_rthdr *rth;
 		struct ip6_rthdr0 *rth0;
 		int rthlen;
 
 		while (1) { /* XXX: should avoid infinite loop explicitly? */
 			struct ip6_ext *eh;
 
 			switch (nxt) {
 			case IPPROTO_HOPOPTS:
 			case IPPROTO_DSTOPTS:
 			case IPPROTO_AH:
 #ifndef PULLDOWN_TEST
 				IP6_EXTHDR_CHECK(m, 0, eoff +
 						 sizeof(struct ip6_ext),
 						 -1);
 				eh = (struct ip6_ext *)(mtod(m, caddr_t)
 							+ eoff);
 #else
 				IP6_EXTHDR_GET(eh, struct ip6_ext *, m,
 					       eoff, sizeof(*eh));
 				if (eh == NULL) {
 					icmp6stat.icp6s_tooshort++;
 					return(-1);
 				}
 #endif
 				
 				if (nxt == IPPROTO_AH)
 					eoff += (eh->ip6e_len + 2) << 2;
 				else
 					eoff += (eh->ip6e_len + 1) << 3;
 				nxt = eh->ip6e_nxt;
 				break;
 			case IPPROTO_ROUTING:
 				/*
 				 * When the erroneous packet contains a
 				 * routing header, we should examine the
 				 * header to determine the final destination.
 				 * Otherwise, we can't properly update
 				 * information that depends on the final
 				 * destination (e.g. path MTU).
 				 */
 #ifndef PULLDOWN_TEST
 				IP6_EXTHDR_CHECK(m, 0, eoff + sizeof(*rth),
 						 -1);
 				rth = (struct ip6_rthdr *)(mtod(m, caddr_t)
 							   + eoff);
 #else
 				IP6_EXTHDR_GET(rth, struct ip6_rthdr *, m,
 					       eoff, sizeof(*rth));
 				if (rth == NULL) {
 					icmp6stat.icp6s_tooshort++;
 					return(-1);
 				}
 #endif
 				rthlen = (rth->ip6r_len + 1) << 3;
 				/*
 				 * XXX: currently there is no
 				 * officially defined type other
 				 * than type-0.
 				 * Note that if the segment left field
 				 * is 0, all intermediate hops must
 				 * have been passed.
 				 */
 				if (rth->ip6r_segleft &&
 				    rth->ip6r_type == IPV6_RTHDR_TYPE_0) {
 					int hops;
 
 #ifndef PULLDOWN_TEST
 					IP6_EXTHDR_CHECK(m, 0, eoff + rthlen,
 							 -1);
 					rth0 = (struct ip6_rthdr0 *)(mtod(m, caddr_t) + eoff);
 #else
 					IP6_EXTHDR_GET(rth0,
 						       struct ip6_rthdr0 *, m,
 						       eoff, rthlen);
 					if (rth0 == NULL) {
 						icmp6stat.icp6s_tooshort++;
 						return(-1);
 					}
 #endif
 					/* just ignore a bogus header */
 					if ((rth0->ip6r0_len % 2) == 0 &&
 					    (hops = rth0->ip6r0_len/2))
 						finaldst = (struct in6_addr *)(rth0 + 1) + (hops - 1);
 				}
 				eoff += rthlen;
 				nxt = rth->ip6r_nxt;
 				break;
 			case IPPROTO_FRAGMENT:
 #ifndef PULLDOWN_TEST
 				IP6_EXTHDR_CHECK(m, 0, eoff +
 						 sizeof(struct ip6_frag),
 						 -1);
 				fh = (struct ip6_frag *)(mtod(m, caddr_t)
 							 + eoff);
 #else
 				IP6_EXTHDR_GET(fh, struct ip6_frag *, m,
 					       eoff, sizeof(*fh));
 				if (fh == NULL) {
 					icmp6stat.icp6s_tooshort++;
 					return(-1);
 				}
 #endif
 				/*
 				 * Data after a fragment header is meaningless
 				 * unless it is the first fragment, but
 				 * we'll go to the notify label for path MTU
 				 * discovery.
 				 */
 				if (fh->ip6f_offlg & IP6F_OFF_MASK)
 					goto notify;
 
 				eoff += sizeof(struct ip6_frag);
 				nxt = fh->ip6f_nxt;
 				break;
 			default:
 				/*
 				 * This case includes ESP and the No Next
 				 * Header.  In such cases going to the notify
 				 * label does not have any meaning
 				 * (i.e. ctlfunc will be NULL), but we go
 				 * anyway since we might have to update
 				 * path MTU information.
 				 */
 				goto notify;
 			}
 		}
 	  notify:
 #ifndef PULLDOWN_TEST
 		icmp6 = (struct icmp6_hdr *)(mtod(m, caddr_t) + off);
 #else
 		IP6_EXTHDR_GET(icmp6, struct icmp6_hdr *, m, off,
 			       sizeof(*icmp6) + sizeof(struct ip6_hdr));
 		if (icmp6 == NULL) {
 			icmp6stat.icp6s_tooshort++;
 			return(-1);
 		}
 #endif
 
 		eip6 = (struct ip6_hdr *)(icmp6 + 1);
 		bzero(&icmp6dst, sizeof(icmp6dst));
 		icmp6dst.sin6_len = sizeof(struct sockaddr_in6);
 		icmp6dst.sin6_family = AF_INET6;
 		if (finaldst == NULL)
 			icmp6dst.sin6_addr = eip6->ip6_dst;
 		else
 			icmp6dst.sin6_addr = *finaldst;
 		icmp6dst.sin6_scope_id = in6_addr2scopeid(m->m_pkthdr.rcvif,
 							  &icmp6dst.sin6_addr);
 #ifndef SCOPEDROUTING
 		if (in6_embedscope(&icmp6dst.sin6_addr, &icmp6dst,
 				   NULL, NULL)) {
 			/* should be impossbile */
 			nd6log((LOG_DEBUG,
 			    "icmp6_notify_error: in6_embedscope failed\n"));
 			goto freeit;
 		}
 #endif
 
 		/*
 		 * retrieve parameters from the inner IPv6 header, and convert
 		 * them into sockaddr structures.
 		 */
 		bzero(&icmp6src, sizeof(icmp6src));
 		icmp6src.sin6_len = sizeof(struct sockaddr_in6);
 		icmp6src.sin6_family = AF_INET6;
 		icmp6src.sin6_addr = eip6->ip6_src;
 		icmp6src.sin6_scope_id = in6_addr2scopeid(m->m_pkthdr.rcvif,
 							  &icmp6src.sin6_addr);
 #ifndef SCOPEDROUTING
 		if (in6_embedscope(&icmp6src.sin6_addr, &icmp6src,
 				   NULL, NULL)) {
 			/* should be impossbile */
 			nd6log((LOG_DEBUG,
 			    "icmp6_notify_error: in6_embedscope failed\n"));
 			goto freeit;
 		}
 #endif
 		icmp6src.sin6_flowinfo =
 			(eip6->ip6_flow & IPV6_FLOWLABEL_MASK);
 
 		if (finaldst == NULL)
 			finaldst = &eip6->ip6_dst;
 		ip6cp.ip6c_m = m;
 		ip6cp.ip6c_icmp6 = icmp6;
 		ip6cp.ip6c_ip6 = (struct ip6_hdr *)(icmp6 + 1);
 		ip6cp.ip6c_off = eoff;
 		ip6cp.ip6c_finaldst = finaldst;
 		ip6cp.ip6c_src = &icmp6src;
 		ip6cp.ip6c_nxt = nxt;
 
 		if (icmp6type == ICMP6_PACKET_TOO_BIG) {
 			notifymtu = ntohl(icmp6->icmp6_mtu);
 			ip6cp.ip6c_cmdarg = (void *)&notifymtu;
 			icmp6_mtudisc_update(&ip6cp, 1);	/*XXX*/
 		}
 
 		ctlfunc = (void (*) __P((int, struct sockaddr *, void *)))
 			(inet6sw[ip6_protox[nxt]].pr_ctlinput);
 		if (ctlfunc) {
 			(void) (*ctlfunc)(code, (struct sockaddr *)&icmp6dst,
 					  &ip6cp);
 		}
 	}
 	return(0);
 
   freeit:
 	m_freem(m);
 	return(-1);
 }
 
 void
 icmp6_mtudisc_update(ip6cp, validated)
 	struct ip6ctlparam *ip6cp;
 	int validated;
 {
 	struct in6_addr *dst = ip6cp->ip6c_finaldst;
 	struct icmp6_hdr *icmp6 = ip6cp->ip6c_icmp6;
 	struct mbuf *m = ip6cp->ip6c_m;	/* will be necessary for scope issue */
 	u_int mtu = ntohl(icmp6->icmp6_mtu);
 	struct rtentry *rt = NULL;
 	struct sockaddr_in6 sin6;
 
 	if (!validated)
 		return;
 
 	bzero(&sin6, sizeof(sin6));
 	sin6.sin6_family = PF_INET6;
 	sin6.sin6_len = sizeof(struct sockaddr_in6);
 	sin6.sin6_addr = *dst;
 	/* XXX normally, this won't happen */
 	if (IN6_IS_ADDR_LINKLOCAL(dst)) {
 		sin6.sin6_addr.s6_addr16[1] =
 		    htons(m->m_pkthdr.rcvif->if_index);
 	}
 	/* sin6.sin6_scope_id = XXX: should be set if DST is a scoped addr */
 	rt = rtalloc1((struct sockaddr *)&sin6, 0,
 		      RTF_CLONING | RTF_PRCLONING);
 
 	if (rt && (rt->rt_flags & RTF_HOST)
 	    && !(rt->rt_rmx.rmx_locks & RTV_MTU)) {
 		if (mtu < IPV6_MMTU) {
 				/* xxx */
 			rt->rt_rmx.rmx_locks |= RTV_MTU;
 		} else if (mtu < rt->rt_ifp->if_mtu &&
 			   rt->rt_rmx.rmx_mtu > mtu) {
 			icmp6stat.icp6s_pmtuchg++;
 			rt->rt_rmx.rmx_mtu = mtu;
 		}
 	}
-	if (rt) { /* XXX: need braces to avoid conflict with else in RTFREE. */
-		RTFREE(rt);
-	}
+	if (rt)
+		rtfree(rt);
 }
 
 /*
  * Process a Node Information Query packet, based on
  * draft-ietf-ipngwg-icmp-name-lookups-07.
  * 
  * Spec incompatibilities:
  * - IPv6 Subject address handling
  * - IPv4 Subject address handling support missing
  * - Proxy reply (answer even if it's not for me)
  * - joins NI group address at in6_ifattach() time only, does not cope
  *   with hostname changes by sethostname(3)
  */
 #define hostnamelen	strlen(hostname)
 static struct mbuf *
 ni6_input(m, off)
 	struct mbuf *m;
 	int off;
 {
 	struct icmp6_nodeinfo *ni6, *nni6;
 	struct mbuf *n = NULL;
 	u_int16_t qtype;
 	int subjlen;
 	int replylen = sizeof(struct ip6_hdr) + sizeof(struct icmp6_nodeinfo);
 	struct ni_reply_fqdn *fqdn;
 	int addrs;		/* for NI_QTYPE_NODEADDR */
 	struct ifnet *ifp = NULL; /* for NI_QTYPE_NODEADDR */
 	struct sockaddr_in6 sin6; /* double meaning; ip6_dst and subjectaddr */
 	struct sockaddr_in6 sin6_d; /* XXX: we should retrieve this from m_aux */
 	struct ip6_hdr *ip6;
 	int oldfqdn = 0;	/* if 1, return pascal string (03 draft) */
 	char *subj = NULL;
 	struct in6_ifaddr *ia6 = NULL;
 
 	ip6 = mtod(m, struct ip6_hdr *);
 #ifndef PULLDOWN_TEST
 	ni6 = (struct icmp6_nodeinfo *)(mtod(m, caddr_t) + off);
 #else
 	IP6_EXTHDR_GET(ni6, struct icmp6_nodeinfo *, m, off, sizeof(*ni6));
 	if (ni6 == NULL) {
 		/* m is already reclaimed */
 		return NULL;
 	}
 #endif
 
 	/*
 	 * Validate IPv6 destination address.
 	 *
 	 * The Responder must discard the Query without further processing
 	 * unless it is one of the Responder's unicast or anycast addresses, or
 	 * a link-local scope multicast address which the Responder has joined.
 	 * [icmp-name-lookups-07, Section 4.]
 	 */
 	bzero(&sin6, sizeof(sin6));
 	sin6.sin6_family = AF_INET6;
 	sin6.sin6_len = sizeof(struct sockaddr_in6);
 	bcopy(&ip6->ip6_dst, &sin6.sin6_addr, sizeof(sin6.sin6_addr));
 	/* XXX scopeid */
 	if ((ia6 = (struct in6_ifaddr *)ifa_ifwithaddr((struct sockaddr *)&sin6)) != NULL) {
 		/* unicast/anycast, fine */
 		if ((ia6->ia6_flags & IN6_IFF_TEMPORARY) != 0 &&
 		    (icmp6_nodeinfo & 4) == 0) {
 			nd6log((LOG_DEBUG, "ni6_input: ignore node info to "
 				"a temporary address in %s:%d",
 			       __FILE__, __LINE__));
 			goto bad;
 		}
 	} else if (IN6_IS_ADDR_MC_LINKLOCAL(&sin6.sin6_addr))
 		; /* link-local multicast, fine */
 	else
 		goto bad;
 
 	/* validate query Subject field. */
 	qtype = ntohs(ni6->ni_qtype);
 	subjlen = m->m_pkthdr.len - off - sizeof(struct icmp6_nodeinfo);
 	switch (qtype) {
 	case NI_QTYPE_NOOP:
 	case NI_QTYPE_SUPTYPES:
 		/* 07 draft */
 		if (ni6->ni_code == ICMP6_NI_SUBJ_FQDN && subjlen == 0)
 			break;
 		/* FALLTHROUGH */
 	case NI_QTYPE_FQDN:
 	case NI_QTYPE_NODEADDR:
 		switch (ni6->ni_code) {
 		case ICMP6_NI_SUBJ_IPV6:
 #if ICMP6_NI_SUBJ_IPV6 != 0
 		case 0:
 #endif
 			/*
 			 * backward compatibility - try to accept 03 draft
 			 * format, where no Subject is present.
 			 */
 			if (qtype == NI_QTYPE_FQDN && ni6->ni_code == 0 &&
 			    subjlen == 0) {
 				oldfqdn++;
 				break;
 			}
 #if ICMP6_NI_SUBJ_IPV6 != 0
 			if (ni6->ni_code != ICMP6_NI_SUBJ_IPV6)
 				goto bad;
 #endif
 
 			if (subjlen != sizeof(sin6.sin6_addr))
 				goto bad;
 
 			/*
 			 * Validate Subject address.
 			 *
 			 * Not sure what exactly "address belongs to the node"
 			 * means in the spec, is it just unicast, or what?
 			 *
 			 * At this moment we consider Subject address as
 			 * "belong to the node" if the Subject address equals
 			 * to the IPv6 destination address; validation for
 			 * IPv6 destination address should have done enough
 			 * check for us.
 			 *
 			 * We do not do proxy at this moment.
 			 */
 			/* m_pulldown instead of copy? */
 			m_copydata(m, off + sizeof(struct icmp6_nodeinfo),
 			    subjlen, (caddr_t)&sin6.sin6_addr);
 			sin6.sin6_scope_id = in6_addr2scopeid(m->m_pkthdr.rcvif,
 							      &sin6.sin6_addr);
 #ifndef SCOPEDROUTING
 			in6_embedscope(&sin6.sin6_addr, &sin6, NULL, NULL);
 #endif
 			bzero(&sin6_d, sizeof(sin6_d));
 			sin6_d.sin6_family = AF_INET6; /* not used, actually */
 			sin6_d.sin6_len = sizeof(sin6_d); /* ditto */
 			sin6_d.sin6_addr = ip6->ip6_dst;
 			sin6_d.sin6_scope_id = in6_addr2scopeid(m->m_pkthdr.rcvif,
 								&ip6->ip6_dst);
 #ifndef SCOPEDROUTING
 			in6_embedscope(&sin6_d.sin6_addr, &sin6_d, NULL, NULL);
 #endif
 			subj = (char *)&sin6;
 			if (SA6_ARE_ADDR_EQUAL(&sin6, &sin6_d))
 				break;
 
 			/*
 			 * XXX if we are to allow other cases, we should really
 			 * be careful about scope here.
 			 * basically, we should disallow queries toward IPv6
 			 * destination X with subject Y, if scope(X) > scope(Y).
 			 * if we allow scope(X) > scope(Y), it will result in
 			 * information leakage across scope boundary.
 			 */
 			goto bad;
 
 		case ICMP6_NI_SUBJ_FQDN:
 			/*
 			 * Validate Subject name with gethostname(3).
 			 *
 			 * The behavior may need some debate, since:
 			 * - we are not sure if the node has FQDN as
 			 *   hostname (returned by gethostname(3)).
 			 * - the code does wildcard match for truncated names.
 			 *   however, we are not sure if we want to perform
 			 *   wildcard match, if gethostname(3) side has
 			 *   truncated hostname.
 			 */
 			n = ni6_nametodns(hostname, hostnamelen, 0);
 			if (!n || n->m_next || n->m_len == 0)
 				goto bad;
 			IP6_EXTHDR_GET(subj, char *, m,
 			    off + sizeof(struct icmp6_nodeinfo), subjlen);
 			if (subj == NULL)
 				goto bad;
 			if (!ni6_dnsmatch(subj, subjlen, mtod(n, const char *),
 					n->m_len)) {
 				goto bad;
 			}
 			m_freem(n);
 			n = NULL;
 			break;
 
 		case ICMP6_NI_SUBJ_IPV4:	/* XXX: to be implemented? */
 		default:
 			goto bad;
 		}
 		break;
 	}
 
 	/* refuse based on configuration.  XXX ICMP6_NI_REFUSED? */
 	switch (qtype) {
 	case NI_QTYPE_FQDN:
 		if ((icmp6_nodeinfo & 1) == 0)
 			goto bad;
 		break;
 	case NI_QTYPE_NODEADDR:
 		if ((icmp6_nodeinfo & 2) == 0)
 			goto bad;
 		break;
 	}
 
 	/* guess reply length */
 	switch (qtype) {
 	case NI_QTYPE_NOOP:
 		break;		/* no reply data */
 	case NI_QTYPE_SUPTYPES:
 		replylen += sizeof(u_int32_t);
 		break;
 	case NI_QTYPE_FQDN:
 		/* XXX will append an mbuf */
 		replylen += offsetof(struct ni_reply_fqdn, ni_fqdn_namelen);
 		break;
 	case NI_QTYPE_NODEADDR:
 		addrs = ni6_addrs(ni6, m, &ifp, subj);
 		if ((replylen += addrs * (sizeof(struct in6_addr) +
 					  sizeof(u_int32_t))) > MCLBYTES)
 			replylen = MCLBYTES; /* XXX: will truncate pkt later */
 		break;
 	default:
 		/*
 		 * XXX: We must return a reply with the ICMP6 code
 		 * `unknown Qtype' in this case. However we regard the case
 		 * as an FQDN query for backward compatibility.
 		 * Older versions set a random value to this field,
 		 * so it rarely varies in the defined qtypes.
 		 * But the mechanism is not reliable...
 		 * maybe we should obsolete older versions.
 		 */
 		qtype = NI_QTYPE_FQDN;
 		/* XXX will append an mbuf */
 		replylen += offsetof(struct ni_reply_fqdn, ni_fqdn_namelen);
 		oldfqdn++;
 		break;
 	}
 
 	/* allocate an mbuf to reply. */
 	MGETHDR(n, M_DONTWAIT, m->m_type);
 	if (n == NULL) {
 		m_freem(m);
 		return(NULL);
 	}
 	M_MOVE_PKTHDR(n, m); /* just for recvif */
 	if (replylen > MHLEN) {
 		if (replylen > MCLBYTES) {
 			/*
 			 * XXX: should we try to allocate more? But MCLBYTES
 			 * is probably much larger than IPV6_MMTU...
 			 */
 			goto bad;
 		}
 		MCLGET(n, M_DONTWAIT);
 		if ((n->m_flags & M_EXT) == 0) {
 			goto bad;
 		}
 	}
 	n->m_pkthdr.len = n->m_len = replylen;
 
 	/* copy mbuf header and IPv6 + Node Information base headers */
 	bcopy(mtod(m, caddr_t), mtod(n, caddr_t), sizeof(struct ip6_hdr));
 	nni6 = (struct icmp6_nodeinfo *)(mtod(n, struct ip6_hdr *) + 1);
 	bcopy((caddr_t)ni6, (caddr_t)nni6, sizeof(struct icmp6_nodeinfo));
 
 	/* qtype dependent procedure */
 	switch (qtype) {
 	case NI_QTYPE_NOOP:
 		nni6->ni_code = ICMP6_NI_SUCCESS;
 		nni6->ni_flags = 0;
 		break;
 	case NI_QTYPE_SUPTYPES:
 	{
 		u_int32_t v;
 		nni6->ni_code = ICMP6_NI_SUCCESS;
 		nni6->ni_flags = htons(0x0000);	/* raw bitmap */
 		/* supports NOOP, SUPTYPES, FQDN, and NODEADDR */
 		v = (u_int32_t)htonl(0x0000000f);
 		bcopy(&v, nni6 + 1, sizeof(u_int32_t));
 		break;
 	}
 	case NI_QTYPE_FQDN:
 		nni6->ni_code = ICMP6_NI_SUCCESS;
 		fqdn = (struct ni_reply_fqdn *)(mtod(n, caddr_t) +
 						sizeof(struct ip6_hdr) +
 						sizeof(struct icmp6_nodeinfo));
 		nni6->ni_flags = 0; /* XXX: meaningless TTL */
 		fqdn->ni_fqdn_ttl = 0;	/* ditto. */
 		/*
 		 * XXX do we really have FQDN in variable "hostname"?
 		 */
 		n->m_next = ni6_nametodns(hostname, hostnamelen, oldfqdn);
 		if (n->m_next == NULL)
 			goto bad;
 		/* XXX we assume that n->m_next is not a chain */
 		if (n->m_next->m_next != NULL)
 			goto bad;
 		n->m_pkthdr.len += n->m_next->m_len;
 		break;
 	case NI_QTYPE_NODEADDR:
 	{
 		int lenlim, copied;
 
 		nni6->ni_code = ICMP6_NI_SUCCESS;
 		n->m_pkthdr.len = n->m_len =
 		    sizeof(struct ip6_hdr) + sizeof(struct icmp6_nodeinfo);
 		lenlim = M_TRAILINGSPACE(n);
 		copied = ni6_store_addrs(ni6, nni6, ifp, lenlim);
 		/* XXX: reset mbuf length */
 		n->m_pkthdr.len = n->m_len = sizeof(struct ip6_hdr) +
 			sizeof(struct icmp6_nodeinfo) + copied;
 		break;
 	}
 	default:
 		break;		/* XXX impossible! */
 	}
 
 	nni6->ni_type = ICMP6_NI_REPLY;
 	m_freem(m);
 	return(n);
 
   bad:
 	m_freem(m);
 	if (n)
 		m_freem(n);
 	return(NULL);
 }
 #undef hostnamelen
 
 /*
  * make a mbuf with DNS-encoded string.  no compression support.
  *
  * XXX names with less than 2 dots (like "foo" or "foo.section") will be
  * treated as truncated name (two \0 at the end).  this is a wild guess.
  */
 static struct mbuf *
 ni6_nametodns(name, namelen, old)
 	const char *name;
 	int namelen;
 	int old;	/* return pascal string if non-zero */
 {
 	struct mbuf *m;
 	char *cp, *ep;
 	const char *p, *q;
 	int i, len, nterm;
 
 	if (old)
 		len = namelen + 1;
 	else
 		len = MCLBYTES;
 
 	/* because MAXHOSTNAMELEN is usually 256, we use cluster mbuf */
 	MGET(m, M_DONTWAIT, MT_DATA);
 	if (m && len > MLEN) {
 		MCLGET(m, M_DONTWAIT);
 		if ((m->m_flags & M_EXT) == 0)
 			goto fail;
 	}
 	if (!m)
 		goto fail;
 	m->m_next = NULL;
 
 	if (old) {
 		m->m_len = len;
 		*mtod(m, char *) = namelen;
 		bcopy(name, mtod(m, char *) + 1, namelen);
 		return m;
 	} else {
 		m->m_len = 0;
 		cp = mtod(m, char *);
 		ep = mtod(m, char *) + M_TRAILINGSPACE(m);
 
 		/* if not certain about my name, return empty buffer */
 		if (namelen == 0)
 			return m;
 
 		/*
 		 * guess if it looks like shortened hostname, or FQDN.
 		 * shortened hostname needs two trailing "\0".
 		 */
 		i = 0;
 		for (p = name; p < name + namelen; p++) {
 			if (*p && *p == '.')
 				i++;
 		}
 		if (i < 2)
 			nterm = 2;
 		else
 			nterm = 1;
 
 		p = name;
 		while (cp < ep && p < name + namelen) {
 			i = 0;
 			for (q = p; q < name + namelen && *q && *q != '.'; q++)
 				i++;
 			/* result does not fit into mbuf */
 			if (cp + i + 1 >= ep)
 				goto fail;
 			/*
 			 * DNS label length restriction, RFC1035 page 8.
 			 * "i == 0" case is included here to avoid returning
 			 * 0-length label on "foo..bar".
 			 */
 			if (i <= 0 || i >= 64)
 				goto fail;
 			*cp++ = i;
 			bcopy(p, cp, i);
 			cp += i;
 			p = q;
 			if (p < name + namelen && *p == '.')
 				p++;
 		}
 		/* termination */
 		if (cp + nterm >= ep)
 			goto fail;
 		while (nterm-- > 0)
 			*cp++ = '\0';
 		m->m_len = cp - mtod(m, char *);
 		return m;
 	}
 
 	panic("should not reach here");
 	/* NOTREACHED */
 
  fail:
 	if (m)
 		m_freem(m);
 	return NULL;
 }
 
 /*
  * check if two DNS-encoded string matches.  takes care of truncated
  * form (with \0\0 at the end).  no compression support.
  * XXX upper/lowercase match (see RFC2065)
  */
 static int
 ni6_dnsmatch(a, alen, b, blen)
 	const char *a;
 	int alen;
 	const char *b;
 	int blen;
 {
 	const char *a0, *b0;
 	int l;
 
 	/* simplest case - need validation? */
 	if (alen == blen && bcmp(a, b, alen) == 0)
 		return 1;
 
 	a0 = a;
 	b0 = b;
 
 	/* termination is mandatory */
 	if (alen < 2 || blen < 2)
 		return 0;
 	if (a0[alen - 1] != '\0' || b0[blen - 1] != '\0')
 		return 0;
 	alen--;
 	blen--;
 
 	while (a - a0 < alen && b - b0 < blen) {
 		if (a - a0 + 1 > alen || b - b0 + 1 > blen)
 			return 0;
 
 		if ((signed char)a[0] < 0 || (signed char)b[0] < 0)
 			return 0;
 		/* we don't support compression yet */
 		if (a[0] >= 64 || b[0] >= 64)
 			return 0;
 
 		/* truncated case */
 		if (a[0] == 0 && a - a0 == alen - 1)
 			return 1;
 		if (b[0] == 0 && b - b0 == blen - 1)
 			return 1;
 		if (a[0] == 0 || b[0] == 0)
 			return 0;
 
 		if (a[0] != b[0])
 			return 0;
 		l = a[0];
 		if (a - a0 + 1 + l > alen || b - b0 + 1 + l > blen)
 			return 0;
 		if (bcmp(a + 1, b + 1, l) != 0)
 			return 0;
 
 		a += 1 + l;
 		b += 1 + l;
 	}
 
 	if (a - a0 == alen && b - b0 == blen)
 		return 1;
 	else
 		return 0;
 }
 
 /*
  * calculate the number of addresses to be returned in the node info reply.
  */
 static int
 ni6_addrs(ni6, m, ifpp, subj)
 	struct icmp6_nodeinfo *ni6;
 	struct mbuf *m;
 	struct ifnet **ifpp;
 	char *subj;
 {
 	struct ifnet *ifp;
 	struct in6_ifaddr *ifa6;
 	struct ifaddr *ifa;
 	struct sockaddr_in6 *subj_ip6 = NULL; /* XXX pedant */
 	int addrs = 0, addrsofif, iffound = 0;
 	int niflags = ni6->ni_flags;
 
 	if ((niflags & NI_NODEADDR_FLAG_ALL) == 0) {
 		switch (ni6->ni_code) {
 		case ICMP6_NI_SUBJ_IPV6:
 			if (subj == NULL) /* must be impossible... */
 				return(0);
 			subj_ip6 = (struct sockaddr_in6 *)subj;
 			break;
 		default:
 			/*
 			 * XXX: we only support IPv6 subject address for
 			 * this Qtype.
 			 */
 			return(0);
 		}
 	}
 
 	IFNET_RLOCK();
 	for (ifp = TAILQ_FIRST(&ifnet); ifp; ifp = TAILQ_NEXT(ifp, if_list))
 	{
 		addrsofif = 0;
 		TAILQ_FOREACH(ifa, &ifp->if_addrlist, ifa_list)
 		{
 			if (ifa->ifa_addr->sa_family != AF_INET6)
 				continue;
 			ifa6 = (struct in6_ifaddr *)ifa;
 
 			if ((niflags & NI_NODEADDR_FLAG_ALL) == 0 &&
 			    IN6_ARE_ADDR_EQUAL(&subj_ip6->sin6_addr,
 					       &ifa6->ia_addr.sin6_addr))
 				iffound = 1;
 
 			/*
 			 * IPv4-mapped addresses can only be returned by a
 			 * Node Information proxy, since they represent
 			 * addresses of IPv4-only nodes, which perforce do
 			 * not implement this protocol.
 			 * [icmp-name-lookups-07, Section 5.4]
 			 * So we don't support NI_NODEADDR_FLAG_COMPAT in
 			 * this function at this moment.
 			 */
 
 			/* What do we have to do about ::1? */
 			switch (in6_addrscope(&ifa6->ia_addr.sin6_addr)) {
 			case IPV6_ADDR_SCOPE_LINKLOCAL:
 				if ((niflags & NI_NODEADDR_FLAG_LINKLOCAL) == 0)
 					continue;
 				break;
 			case IPV6_ADDR_SCOPE_SITELOCAL:
 				if ((niflags & NI_NODEADDR_FLAG_SITELOCAL) == 0)
 					continue;
 				break;
 			case IPV6_ADDR_SCOPE_GLOBAL:
 				if ((niflags & NI_NODEADDR_FLAG_GLOBAL) == 0)
 					continue;
 				break;
 			default:
 				continue;
 			}
 
 			/*
 			 * check if anycast is okay.
 			 * XXX: just experimental.  not in the spec.
 			 */
 			if ((ifa6->ia6_flags & IN6_IFF_ANYCAST) != 0 &&
 			    (niflags & NI_NODEADDR_FLAG_ANYCAST) == 0)
 				continue; /* we need only unicast addresses */
 			if ((ifa6->ia6_flags & IN6_IFF_TEMPORARY) != 0 &&
 			    (icmp6_nodeinfo & 4) == 0) {
 				continue;
 			}
 			addrsofif++; /* count the address */
 		}
 		if (iffound) {
 			*ifpp = ifp;
 			IFNET_RUNLOCK();
 			return(addrsofif);
 		}
 
 		addrs += addrsofif;
 	}
 	IFNET_RUNLOCK();
 
 	return(addrs);
 }
 
 static int
 ni6_store_addrs(ni6, nni6, ifp0, resid)
 	struct icmp6_nodeinfo *ni6, *nni6;
 	struct ifnet *ifp0;
 	int resid;
 {
 	struct ifnet *ifp = ifp0 ? ifp0 : TAILQ_FIRST(&ifnet);
 	struct in6_ifaddr *ifa6;
 	struct ifaddr *ifa;
 	struct ifnet *ifp_dep = NULL;
 	int copied = 0, allow_deprecated = 0;
 	u_char *cp = (u_char *)(nni6 + 1);
 	int niflags = ni6->ni_flags;
 	u_int32_t ltime;
 
 	if (ifp0 == NULL && !(niflags & NI_NODEADDR_FLAG_ALL))
 		return(0);	/* needless to copy */
 		
 	IFNET_RLOCK();
   again:
 	for (; ifp; ifp = TAILQ_NEXT(ifp, if_list))
 	{
 		for (ifa = ifp->if_addrlist.tqh_first; ifa;
 		     ifa = ifa->ifa_list.tqe_next)
 		{
 			if (ifa->ifa_addr->sa_family != AF_INET6)
 				continue;
 			ifa6 = (struct in6_ifaddr *)ifa;
 
 			if ((ifa6->ia6_flags & IN6_IFF_DEPRECATED) != 0 &&
 			    allow_deprecated == 0) {
 				/*
 				 * prefererred address should be put before
 				 * deprecated addresses.
 				 */
 
 				/* record the interface for later search */
 				if (ifp_dep == NULL)
 					ifp_dep = ifp;
 
 				continue;
 			}
 			else if ((ifa6->ia6_flags & IN6_IFF_DEPRECATED) == 0 &&
 				 allow_deprecated != 0)
 				continue; /* we now collect deprecated addrs */
 
 			/* What do we have to do about ::1? */
 			switch (in6_addrscope(&ifa6->ia_addr.sin6_addr)) {
 			case IPV6_ADDR_SCOPE_LINKLOCAL:
 				if ((niflags & NI_NODEADDR_FLAG_LINKLOCAL) == 0)
 					continue;
 				break;
 			case IPV6_ADDR_SCOPE_SITELOCAL:
 				if ((niflags & NI_NODEADDR_FLAG_SITELOCAL) == 0)
 					continue;
 				break;
 			case IPV6_ADDR_SCOPE_GLOBAL:
 				if ((niflags & NI_NODEADDR_FLAG_GLOBAL) == 0)
 					continue;
 				break;
 			default:
 				continue;
 			}
 
 			/*
 			 * check if anycast is okay.
 			 * XXX: just experimental. not in the spec.
 			 */
 			if ((ifa6->ia6_flags & IN6_IFF_ANYCAST) != 0 &&
 			    (niflags & NI_NODEADDR_FLAG_ANYCAST) == 0)
 				continue;
 			if ((ifa6->ia6_flags & IN6_IFF_TEMPORARY) != 0 &&
 			    (icmp6_nodeinfo & 4) == 0) {
 				continue;
 			}
 
 			/* now we can copy the address */
 			if (resid < sizeof(struct in6_addr) +
 			    sizeof(u_int32_t)) {
 				/*
 				 * We give up much more copy.
 				 * Set the truncate flag and return.
 				 */
 				nni6->ni_flags |=
 					NI_NODEADDR_FLAG_TRUNCATE;
 				IFNET_RUNLOCK();
 				return(copied);
 			}
 
 			/*
 			 * Set the TTL of the address.
 			 * The TTL value should be one of the following
 			 * according to the specification:
 			 *
 			 * 1. The remaining lifetime of a DHCP lease on the
 			 *    address, or
 			 * 2. The remaining Valid Lifetime of a prefix from
 			 *    which the address was derived through Stateless
 			 *    Autoconfiguration.
 			 *
 			 * Note that we currently do not support stateful
 			 * address configuration by DHCPv6, so the former
 			 * case can't happen.
 			 */
 			if (ifa6->ia6_lifetime.ia6t_expire == 0)
 				ltime = ND6_INFINITE_LIFETIME;
 			else {
 				if (ifa6->ia6_lifetime.ia6t_expire >
 				    time_second)
 					ltime = htonl(ifa6->ia6_lifetime.ia6t_expire - time_second);
 				else
 					ltime = 0;
 			}
 			
 			bcopy(&ltime, cp, sizeof(u_int32_t));
 			cp += sizeof(u_int32_t);
 
 			/* copy the address itself */
 			bcopy(&ifa6->ia_addr.sin6_addr, cp,
 			      sizeof(struct in6_addr));
 			/* XXX: KAME link-local hack; remove ifindex */
 			if (IN6_IS_ADDR_LINKLOCAL(&ifa6->ia_addr.sin6_addr))
 				((struct in6_addr *)cp)->s6_addr16[1] = 0;
 			cp += sizeof(struct in6_addr);
 			
 			resid -= (sizeof(struct in6_addr) + sizeof(u_int32_t));
 			copied += (sizeof(struct in6_addr) +
 				   sizeof(u_int32_t));
 		}
 		if (ifp0)	/* we need search only on the specified IF */
 			break;
 	}
 
 	if (allow_deprecated == 0 && ifp_dep != NULL) {
 		ifp = ifp_dep;
 		allow_deprecated = 1;
 
 		goto again;
 	}
 
 	IFNET_RUNLOCK();
 
 	return(copied);
 }
 
 /*
  * XXX almost dup'ed code with rip6_input.
  */
 static int
 icmp6_rip6_input(mp, off)
 	struct	mbuf **mp;
 	int	off;
 {
 	struct mbuf *m = *mp;
 	struct ip6_hdr *ip6 = mtod(m, struct ip6_hdr *);
 	struct in6pcb *in6p;
 	struct in6pcb *last = NULL;
 	struct sockaddr_in6 rip6src;
 	struct icmp6_hdr *icmp6;
 	struct mbuf *opts = NULL;
 
 #ifndef PULLDOWN_TEST
 	/* this is assumed to be safe. */
 	icmp6 = (struct icmp6_hdr *)((caddr_t)ip6 + off);
 #else
 	IP6_EXTHDR_GET(icmp6, struct icmp6_hdr *, m, off, sizeof(*icmp6));
 	if (icmp6 == NULL) {
 		/* m is already reclaimed */
 		return IPPROTO_DONE;
 	}
 #endif
 
 	bzero(&rip6src, sizeof(rip6src));
 	rip6src.sin6_len = sizeof(struct sockaddr_in6);
 	rip6src.sin6_family = AF_INET6;
 	/* KAME hack: recover scopeid */
 	(void)in6_recoverscope(&rip6src, &ip6->ip6_src, m->m_pkthdr.rcvif);
 
 	LIST_FOREACH(in6p, &ripcb, inp_list)
 	{
 		if ((in6p->inp_vflag & INP_IPV6) == 0)
 			continue;
 #ifdef HAVE_NRL_INPCB
 		if (!(in6p->in6p_flags & INP_IPV6))
 			continue;
 #endif
 		if (in6p->in6p_ip6_nxt != IPPROTO_ICMPV6)
 			continue;
 		if (!IN6_IS_ADDR_UNSPECIFIED(&in6p->in6p_laddr) &&
 		   !IN6_ARE_ADDR_EQUAL(&in6p->in6p_laddr, &ip6->ip6_dst))
 			continue;
 		if (!IN6_IS_ADDR_UNSPECIFIED(&in6p->in6p_faddr) &&
 		   !IN6_ARE_ADDR_EQUAL(&in6p->in6p_faddr, &ip6->ip6_src))
 			continue;
 		if (in6p->in6p_icmp6filt
 		    && ICMP6_FILTER_WILLBLOCK(icmp6->icmp6_type,
 				 in6p->in6p_icmp6filt))
 			continue;
 		if (last) {
 			struct	mbuf *n;
 			if ((n = m_copy(m, 0, (int)M_COPYALL)) != NULL) {
 				if (last->in6p_flags & IN6P_CONTROLOPTS)
 					ip6_savecontrol(last, &opts, ip6, n);
 				/* strip intermediate headers */
 				m_adj(n, off);
 				if (sbappendaddr(&last->in6p_socket->so_rcv,
 						 (struct sockaddr *)&rip6src,
 						 n, opts) == 0) {
 					/* should notify about lost packet */
 					m_freem(n);
 					if (opts) {
 						m_freem(opts);
 					}
 				} else
 					sorwakeup(last->in6p_socket);
 				opts = NULL;
 			}
 		}
 		last = in6p;
 	}
 	if (last) {
 		if (last->in6p_flags & IN6P_CONTROLOPTS)
 			ip6_savecontrol(last, &opts, ip6, m);
 		/* strip intermediate headers */
 		m_adj(m, off);
 		if (sbappendaddr(&last->in6p_socket->so_rcv,
 				 (struct sockaddr *)&rip6src, m, opts) == 0) {
 			m_freem(m);
 			if (opts)
 				m_freem(opts);
 		} else
 			sorwakeup(last->in6p_socket);
 	} else {
 		m_freem(m);
 		ip6stat.ip6s_delivered--;
 	}
 	return IPPROTO_DONE;
 }
 
 /*
  * Reflect the ip6 packet back to the source.
  * OFF points to the icmp6 header, counted from the top of the mbuf.
  */
 void
 icmp6_reflect(m, off)
 	struct	mbuf *m;
 	size_t off;
 {
 	struct ip6_hdr *ip6;
 	struct icmp6_hdr *icmp6;
 	struct in6_ifaddr *ia;
 	struct in6_addr t, *src = 0;
 	int plen;
 	int type, code;
 	struct ifnet *outif = NULL;
 	struct sockaddr_in6 sa6_src, sa6_dst;
 #ifdef COMPAT_RFC1885
 	int mtu = IPV6_MMTU;
 	struct sockaddr_in6 *sin6 = &icmp6_reflect_rt.ro_dst;
 #endif
 
 	/* too short to reflect */
 	if (off < sizeof(struct ip6_hdr)) {
 		nd6log((LOG_DEBUG,
 		    "sanity fail: off=%lx, sizeof(ip6)=%lx in %s:%d\n",
 		    (u_long)off, (u_long)sizeof(struct ip6_hdr),
 		    __FILE__, __LINE__));
 		goto bad;
 	}
 
 	/*
 	 * If there are extra headers between IPv6 and ICMPv6, strip
 	 * off that header first.
 	 */
 #ifdef DIAGNOSTIC
 	if (sizeof(struct ip6_hdr) + sizeof(struct icmp6_hdr) > MHLEN)
 		panic("assumption failed in icmp6_reflect");
 #endif
 	if (off > sizeof(struct ip6_hdr)) {
 		size_t l;
 		struct ip6_hdr nip6;
 
 		l = off - sizeof(struct ip6_hdr);
 		m_copydata(m, 0, sizeof(nip6), (caddr_t)&nip6);
 		m_adj(m, l);
 		l = sizeof(struct ip6_hdr) + sizeof(struct icmp6_hdr);
 		if (m->m_len < l) {
 			if ((m = m_pullup(m, l)) == NULL)
 				return;
 		}
 		bcopy((caddr_t)&nip6, mtod(m, caddr_t), sizeof(nip6));
 	} else /* off == sizeof(struct ip6_hdr) */ {
 		size_t l;
 		l = sizeof(struct ip6_hdr) + sizeof(struct icmp6_hdr);
 		if (m->m_len < l) {
 			if ((m = m_pullup(m, l)) == NULL)
 				return;
 		}
 	}
 	plen = m->m_pkthdr.len - sizeof(struct ip6_hdr);
 	ip6 = mtod(m, struct ip6_hdr *);
 	ip6->ip6_nxt = IPPROTO_ICMPV6;
 	icmp6 = (struct icmp6_hdr *)(ip6 + 1);
 	type = icmp6->icmp6_type; /* keep type for statistics */
 	code = icmp6->icmp6_code; /* ditto. */
 
 	t = ip6->ip6_dst;
 	/*
 	 * ip6_input() drops a packet if its src is multicast.
 	 * So, the src is never multicast.
 	 */
 	ip6->ip6_dst = ip6->ip6_src;
 
 	/*
 	 * XXX: make sure to embed scope zone information, using
 	 * already embedded IDs or the received interface (if any).
 	 * Note that rcvif may be NULL.
 	 * TODO: scoped routing case (XXX).
 	 */
 	bzero(&sa6_src, sizeof(sa6_src));
 	sa6_src.sin6_family = AF_INET6;
 	sa6_src.sin6_len = sizeof(sa6_src);
 	sa6_src.sin6_addr = ip6->ip6_dst;
 	in6_recoverscope(&sa6_src, &ip6->ip6_dst, m->m_pkthdr.rcvif);
 	in6_embedscope(&ip6->ip6_dst, &sa6_src, NULL, NULL);
 	bzero(&sa6_dst, sizeof(sa6_dst));
 	sa6_dst.sin6_family = AF_INET6;
 	sa6_dst.sin6_len = sizeof(sa6_dst);
 	sa6_dst.sin6_addr = t;
 	in6_recoverscope(&sa6_dst, &t, m->m_pkthdr.rcvif);
 	in6_embedscope(&t, &sa6_dst, NULL, NULL);
 
 #ifdef COMPAT_RFC1885
 	/*
 	 * xxx guess MTU
 	 * RFC 1885 requires that echo reply should be truncated if it
 	 * does not fit in with (return) path MTU, but the description was
 	 * removed in the new spec.
 	 */
 	if (icmp6_reflect_rt.ro_rt == 0 ||
 	    ! (IN6_ARE_ADDR_EQUAL(&sin6->sin6_addr, &ip6->ip6_dst))) {
 		if (icmp6_reflect_rt.ro_rt) {
 			RTFREE(icmp6_reflect_rt.ro_rt);
 			icmp6_reflect_rt.ro_rt = 0;
 		}
 		bzero(sin6, sizeof(*sin6));
 		sin6->sin6_family = PF_INET6;
 		sin6->sin6_len = sizeof(struct sockaddr_in6);
 		sin6->sin6_addr = ip6->ip6_dst;
 
 		rtalloc_ign((struct route *)&icmp6_reflect_rt.ro_rt,
 			    RTF_PRCLONING);
 	}
 
 	if (icmp6_reflect_rt.ro_rt == 0)
 		goto bad;
 
 	if ((icmp6_reflect_rt.ro_rt->rt_flags & RTF_HOST)
 	    && mtu < icmp6_reflect_rt.ro_rt->rt_ifp->if_mtu)
 		mtu = icmp6_reflect_rt.ro_rt->rt_rmx.rmx_mtu;
 
 	if (mtu < m->m_pkthdr.len) {
 		plen -= (m->m_pkthdr.len - mtu);
 		m_adj(m, mtu - m->m_pkthdr.len);
 	}
 #endif
 	/*
 	 * If the incoming packet was addressed directly to us(i.e. unicast),
 	 * use dst as the src for the reply.
 	 * The IN6_IFF_NOTREADY case would be VERY rare, but is possible
 	 * (for example) when we encounter an error while forwarding procedure
 	 * destined to a duplicated address of ours.
 	 */
 	for (ia = in6_ifaddr; ia; ia = ia->ia_next)
 		if (IN6_ARE_ADDR_EQUAL(&t, &ia->ia_addr.sin6_addr) &&
 		    (ia->ia6_flags & (IN6_IFF_ANYCAST|IN6_IFF_NOTREADY)) == 0) {
 			src = &t;
 			break;
 		}
 	if (ia == NULL && IN6_IS_ADDR_LINKLOCAL(&t) && (m->m_flags & M_LOOP)) {
 		/*
 		 * This is the case if the dst is our link-local address
 		 * and the sender is also ourselves.
 		 */
 		src = &t;
 	}
 
 	if (src == 0) {
 		int e;
 		struct route_in6 ro;
 
 		/*
 		 * This case matches to multicasts, our anycast, or unicasts
 		 * that we do not own.  Select a source address based on the
 		 * source address of the erroneous packet.
 		 */
 		bzero(&ro, sizeof(ro));
 		src = in6_selectsrc(&sa6_src, NULL, NULL, &ro, NULL, &e);
 		if (ro.ro_rt)
 			RTFREE(ro.ro_rt); /* XXX: we could use this */
 		if (src == NULL) {
 			nd6log((LOG_DEBUG,
 			    "icmp6_reflect: source can't be determined: "
 			    "dst=%s, error=%d\n",
 			    ip6_sprintf(&sa6_src.sin6_addr), e));
 			goto bad;
 		}
 	}
 
 	ip6->ip6_src = *src;
 
 	ip6->ip6_flow = 0;
 	ip6->ip6_vfc &= ~IPV6_VERSION_MASK;
 	ip6->ip6_vfc |= IPV6_VERSION;
 	ip6->ip6_nxt = IPPROTO_ICMPV6;
 	if (m->m_pkthdr.rcvif) {
 		/* XXX: This may not be the outgoing interface */
 		ip6->ip6_hlim = nd_ifinfo[m->m_pkthdr.rcvif->if_index].chlim;
 	} else
 		ip6->ip6_hlim = ip6_defhlim;
 
 	icmp6->icmp6_cksum = 0;
 	icmp6->icmp6_cksum = in6_cksum(m, IPPROTO_ICMPV6,
 					sizeof(struct ip6_hdr), plen);
 
 	/*
 	 * XXX option handling
 	 */
 
 	m->m_flags &= ~(M_BCAST|M_MCAST);
 
 #ifdef COMPAT_RFC1885
 	ip6_output(m, NULL, &icmp6_reflect_rt, 0, NULL, &outif, NULL);
 #else
 	ip6_output(m, NULL, NULL, 0, NULL, &outif, NULL);
 #endif
 	if (outif)
 		icmp6_ifoutstat_inc(outif, type, code);
 
 	return;
 
  bad:
 	m_freem(m);
 	return;
 }
 
 void
 icmp6_fasttimo()
 {
 
 	mld6_fasttimeo();
 }
 
 static const char *
 icmp6_redirect_diag(src6, dst6, tgt6)
 	struct in6_addr *src6;
 	struct in6_addr *dst6;
 	struct in6_addr *tgt6;
 {
 	static char buf[1024];
 	snprintf(buf, sizeof(buf), "(src=%s dst=%s tgt=%s)",
 		ip6_sprintf(src6), ip6_sprintf(dst6), ip6_sprintf(tgt6));
 	return buf;
 }
 
 void
 icmp6_redirect_input(m, off)
 	struct mbuf *m;
 	int off;
 {
 	struct ifnet *ifp = m->m_pkthdr.rcvif;
 	struct ip6_hdr *ip6 = mtod(m, struct ip6_hdr *);
 	struct nd_redirect *nd_rd;
 	int icmp6len = ntohs(ip6->ip6_plen);
 	char *lladdr = NULL;
 	int lladdrlen = 0;
 	u_char *redirhdr = NULL;
 	int redirhdrlen = 0;
 	struct rtentry *rt = NULL;
 	int is_router;
 	int is_onlink;
 	struct in6_addr src6 = ip6->ip6_src;
 	struct in6_addr redtgt6;
 	struct in6_addr reddst6;
 	union nd_opts ndopts;
 
 	if (!m || !ifp)
 		return;
 
 	/* XXX if we are router, we don't update route by icmp6 redirect */
 	if (ip6_forwarding)
 		goto freeit;
 	if (!icmp6_rediraccept)
 		goto freeit;
 
 #ifndef PULLDOWN_TEST
 	IP6_EXTHDR_CHECK(m, off, icmp6len,);
 	nd_rd = (struct nd_redirect *)((caddr_t)ip6 + off);
 #else
 	IP6_EXTHDR_GET(nd_rd, struct nd_redirect *, m, off, icmp6len);
 	if (nd_rd == NULL) {
 		icmp6stat.icp6s_tooshort++;
 		return;
 	}
 #endif
 	redtgt6 = nd_rd->nd_rd_target;
 	reddst6 = nd_rd->nd_rd_dst;
 
 	if (IN6_IS_ADDR_LINKLOCAL(&redtgt6))
 		redtgt6.s6_addr16[1] = htons(ifp->if_index);
 	if (IN6_IS_ADDR_LINKLOCAL(&reddst6))
 		reddst6.s6_addr16[1] = htons(ifp->if_index);
 
 	/* validation */
 	if (!IN6_IS_ADDR_LINKLOCAL(&src6)) {
 		nd6log((LOG_ERR,
 			"ICMP6 redirect sent from %s rejected; "
 			"must be from linklocal\n", ip6_sprintf(&src6)));
 		goto bad;
 	}
 	if (ip6->ip6_hlim != 255) {
 		nd6log((LOG_ERR,
 			"ICMP6 redirect sent from %s rejected; "
 			"hlim=%d (must be 255)\n",
 			ip6_sprintf(&src6), ip6->ip6_hlim));
 		goto bad;
 	}
     {
 	/* ip6->ip6_src must be equal to gw for icmp6->icmp6_reddst */
 	struct sockaddr_in6 sin6;
 	struct in6_addr *gw6;
 
 	bzero(&sin6, sizeof(sin6));
 	sin6.sin6_family = AF_INET6;
 	sin6.sin6_len = sizeof(struct sockaddr_in6);
 	bcopy(&reddst6, &sin6.sin6_addr, sizeof(reddst6));
 	rt = rtalloc1((struct sockaddr *)&sin6, 0, 0UL);
 	if (rt) {
 		if (rt->rt_gateway == NULL ||
 		    rt->rt_gateway->sa_family != AF_INET6) {
 			nd6log((LOG_ERR,
 			    "ICMP6 redirect rejected; no route "
 			    "with inet6 gateway found for redirect dst: %s\n",
 			    icmp6_redirect_diag(&src6, &reddst6, &redtgt6)));
-			RTFREE(rt);
+			RTFREE_LOCKED(rt);
 			goto bad;
 		}
 
 		gw6 = &(((struct sockaddr_in6 *)rt->rt_gateway)->sin6_addr);
 		if (bcmp(&src6, gw6, sizeof(struct in6_addr)) != 0) {
 			nd6log((LOG_ERR,
 				"ICMP6 redirect rejected; "
 				"not equal to gw-for-src=%s (must be same): "
 				"%s\n",
 				ip6_sprintf(gw6),
 				icmp6_redirect_diag(&src6, &reddst6, &redtgt6)));
-			RTFREE(rt);
+			RTFREE_LOCKED(rt);
 			goto bad;
 		}
 	} else {
 		nd6log((LOG_ERR,
 			"ICMP6 redirect rejected; "
 			"no route found for redirect dst: %s\n",
 			icmp6_redirect_diag(&src6, &reddst6, &redtgt6)));
 		goto bad;
 	}
-	RTFREE(rt);
+	RTFREE_LOCKED(rt);
 	rt = NULL;
     }
 	if (IN6_IS_ADDR_MULTICAST(&reddst6)) {
 		nd6log((LOG_ERR,
 			"ICMP6 redirect rejected; "
 			"redirect dst must be unicast: %s\n",
 			icmp6_redirect_diag(&src6, &reddst6, &redtgt6)));
 		goto bad;
 	}
 
 	is_router = is_onlink = 0;
 	if (IN6_IS_ADDR_LINKLOCAL(&redtgt6))
 		is_router = 1;	/* router case */
 	if (bcmp(&redtgt6, &reddst6, sizeof(redtgt6)) == 0)
 		is_onlink = 1;	/* on-link destination case */
 	if (!is_router && !is_onlink) {
 		nd6log((LOG_ERR,
 			"ICMP6 redirect rejected; "
 			"neither router case nor onlink case: %s\n",
 			icmp6_redirect_diag(&src6, &reddst6, &redtgt6)));
 		goto bad;
 	}
 	/* validation passed */
 
 	icmp6len -= sizeof(*nd_rd);
 	nd6_option_init(nd_rd + 1, icmp6len, &ndopts);
 	if (nd6_options(&ndopts) < 0) {
 		nd6log((LOG_INFO, "icmp6_redirect_input: "
 			"invalid ND option, rejected: %s\n",
 			icmp6_redirect_diag(&src6, &reddst6, &redtgt6)));
 		/* nd6_options have incremented stats */
 		goto freeit;
 	}
 
 	if (ndopts.nd_opts_tgt_lladdr) {
 		lladdr = (char *)(ndopts.nd_opts_tgt_lladdr + 1);
 		lladdrlen = ndopts.nd_opts_tgt_lladdr->nd_opt_len << 3;
 	}
 
 	if (ndopts.nd_opts_rh) {
 		redirhdrlen = ndopts.nd_opts_rh->nd_opt_rh_len;
 		redirhdr = (u_char *)(ndopts.nd_opts_rh + 1); /* xxx */
 	}
 
 	if (lladdr && ((ifp->if_addrlen + 2 + 7) & ~7) != lladdrlen) {
 		nd6log((LOG_INFO,
 			"icmp6_redirect_input: lladdrlen mismatch for %s "
 			"(if %d, icmp6 packet %d): %s\n",
 			ip6_sprintf(&redtgt6), ifp->if_addrlen, lladdrlen - 2,
 			icmp6_redirect_diag(&src6, &reddst6, &redtgt6)));
 		goto bad;
 	}
 
 	/* RFC 2461 8.3 */
 	nd6_cache_lladdr(ifp, &redtgt6, lladdr, lladdrlen, ND_REDIRECT,
 			 is_onlink ? ND_REDIRECT_ONLINK : ND_REDIRECT_ROUTER);
 
 	if (!is_onlink) {	/* better router case.  perform rtredirect. */
 		/* perform rtredirect */
 		struct sockaddr_in6 sdst;
 		struct sockaddr_in6 sgw;
 		struct sockaddr_in6 ssrc;
 
 		bzero(&sdst, sizeof(sdst));
 		bzero(&sgw, sizeof(sgw));
 		bzero(&ssrc, sizeof(ssrc));
 		sdst.sin6_family = sgw.sin6_family = ssrc.sin6_family = AF_INET6;
 		sdst.sin6_len = sgw.sin6_len = ssrc.sin6_len =
 			sizeof(struct sockaddr_in6);
 		bcopy(&redtgt6, &sgw.sin6_addr, sizeof(struct in6_addr));
 		bcopy(&reddst6, &sdst.sin6_addr, sizeof(struct in6_addr));
 		bcopy(&src6, &ssrc.sin6_addr, sizeof(struct in6_addr));
 		rtredirect((struct sockaddr *)&sdst, (struct sockaddr *)&sgw,
 			   (struct sockaddr *)NULL, RTF_GATEWAY | RTF_HOST,
-			   (struct sockaddr *)&ssrc,
-			   (struct rtentry **)NULL);
+			   (struct sockaddr *)&ssrc);
 	}
 	/* finally update cached route in each socket via pfctlinput */
     {
 	struct sockaddr_in6 sdst;
 
 	bzero(&sdst, sizeof(sdst));
 	sdst.sin6_family = AF_INET6;
 	sdst.sin6_len = sizeof(struct sockaddr_in6);
 	bcopy(&reddst6, &sdst.sin6_addr, sizeof(struct in6_addr));
 	pfctlinput(PRC_REDIRECT_HOST, (struct sockaddr *)&sdst);
 #ifdef IPSEC
 	key_sa_routechange((struct sockaddr *)&sdst);
 #endif
     }
 
  freeit:
 	m_freem(m);
 	return;
 
  bad:
 	icmp6stat.icp6s_badredirect++;
 	m_freem(m);
 }
 
 void
 icmp6_redirect_output(m0, rt)
 	struct mbuf *m0;
 	struct rtentry *rt;
 {
 	struct ifnet *ifp;	/* my outgoing interface */
 	struct in6_addr *ifp_ll6;
 	struct in6_addr *router_ll6;
 	struct ip6_hdr *sip6;	/* m0 as struct ip6_hdr */
 	struct mbuf *m = NULL;	/* newly allocated one */
 	struct ip6_hdr *ip6;	/* m as struct ip6_hdr */
 	struct nd_redirect *nd_rd;
 	size_t maxlen;
 	u_char *p;
 	struct ifnet *outif = NULL;
 	struct sockaddr_in6 src_sa;
 
 	icmp6_errcount(&icmp6stat.icp6s_outerrhist, ND_REDIRECT, 0);
 
 	/* if we are not router, we don't send icmp6 redirect */
 	if (!ip6_forwarding || ip6_accept_rtadv)
 		goto fail;
 
 	/* sanity check */
 	if (!m0 || !rt || !(rt->rt_flags & RTF_UP) || !(ifp = rt->rt_ifp))
 		goto fail;
 
 	/*
 	 * Address check:
 	 *  the source address must identify a neighbor, and
 	 *  the destination address must not be a multicast address
 	 *  [RFC 2461, sec 8.2]
 	 */
 	sip6 = mtod(m0, struct ip6_hdr *);
 	bzero(&src_sa, sizeof(src_sa));
 	src_sa.sin6_family = AF_INET6;
 	src_sa.sin6_len = sizeof(src_sa);
 	src_sa.sin6_addr = sip6->ip6_src;
 	/* we don't currently use sin6_scope_id, but eventually use it */
 	src_sa.sin6_scope_id = in6_addr2scopeid(ifp, &sip6->ip6_src);
 	if (nd6_is_addr_neighbor(&src_sa, ifp) == 0)
 		goto fail;
 	if (IN6_IS_ADDR_MULTICAST(&sip6->ip6_dst))
 		goto fail;	/* what should we do here? */
 
 	/* rate limit */
 	if (icmp6_ratelimit(&sip6->ip6_src, ND_REDIRECT, 0))
 		goto fail;
 
 	/*
 	 * Since we are going to append up to 1280 bytes (= IPV6_MMTU),
 	 * we almost always ask for an mbuf cluster for simplicity.
 	 * (MHLEN < IPV6_MMTU is almost always true)
 	 */
 #if IPV6_MMTU >= MCLBYTES
 # error assumption failed about IPV6_MMTU and MCLBYTES
 #endif
 	MGETHDR(m, M_DONTWAIT, MT_HEADER);
 	if (m && IPV6_MMTU >= MHLEN)
 		MCLGET(m, M_DONTWAIT);
 	if (!m)
 		goto fail;
 	m->m_pkthdr.rcvif = NULL;
 	m->m_len = 0;
 	maxlen = M_TRAILINGSPACE(m);
 	maxlen = min(IPV6_MMTU, maxlen);
 	/* just for safety */
 	if (maxlen < sizeof(struct ip6_hdr) + sizeof(struct icmp6_hdr) +
 	    ((sizeof(struct nd_opt_hdr) + ifp->if_addrlen + 7) & ~7)) {
 		goto fail;
 	}
 
 	{
 		/* get ip6 linklocal address for ifp(my outgoing interface). */
 		struct in6_ifaddr *ia;
 		if ((ia = in6ifa_ifpforlinklocal(ifp,
 						 IN6_IFF_NOTREADY|
 						 IN6_IFF_ANYCAST)) == NULL)
 			goto fail;
 		ifp_ll6 = &ia->ia_addr.sin6_addr;
 	}
 
 	/* get ip6 linklocal address for the router. */
 	if (rt->rt_gateway && (rt->rt_flags & RTF_GATEWAY)) {
 		struct sockaddr_in6 *sin6;
 		sin6 = (struct sockaddr_in6 *)rt->rt_gateway;
 		router_ll6 = &sin6->sin6_addr;
 		if (!IN6_IS_ADDR_LINKLOCAL(router_ll6))
 			router_ll6 = (struct in6_addr *)NULL;
 	} else
 		router_ll6 = (struct in6_addr *)NULL;
 
 	/* ip6 */
 	ip6 = mtod(m, struct ip6_hdr *);
 	ip6->ip6_flow = 0;
 	ip6->ip6_vfc &= ~IPV6_VERSION_MASK;
 	ip6->ip6_vfc |= IPV6_VERSION;
 	/* ip6->ip6_plen will be set later */
 	ip6->ip6_nxt = IPPROTO_ICMPV6;
 	ip6->ip6_hlim = 255;
 	/* ip6->ip6_src must be linklocal addr for my outgoing if. */
 	bcopy(ifp_ll6, &ip6->ip6_src, sizeof(struct in6_addr));
 	bcopy(&sip6->ip6_src, &ip6->ip6_dst, sizeof(struct in6_addr));
 
 	/* ND Redirect */
 	nd_rd = (struct nd_redirect *)(ip6 + 1);
 	nd_rd->nd_rd_type = ND_REDIRECT;
 	nd_rd->nd_rd_code = 0;
 	nd_rd->nd_rd_reserved = 0;
 	if (rt->rt_flags & RTF_GATEWAY) {
 		/*
 		 * nd_rd->nd_rd_target must be a link-local address in
 		 * better router cases.
 		 */
 		if (!router_ll6)
 			goto fail;
 		bcopy(router_ll6, &nd_rd->nd_rd_target,
 		      sizeof(nd_rd->nd_rd_target));
 		bcopy(&sip6->ip6_dst, &nd_rd->nd_rd_dst,
 		      sizeof(nd_rd->nd_rd_dst));
 	} else {
 		/* make sure redtgt == reddst */
 		bcopy(&sip6->ip6_dst, &nd_rd->nd_rd_target,
 		      sizeof(nd_rd->nd_rd_target));
 		bcopy(&sip6->ip6_dst, &nd_rd->nd_rd_dst,
 		      sizeof(nd_rd->nd_rd_dst));
 	}
 
 	p = (u_char *)(nd_rd + 1);
 
 	if (!router_ll6)
 		goto nolladdropt;
 
     {
 	/* target lladdr option */
 	struct rtentry *rt_router = NULL;
 	int len;
 	struct sockaddr_dl *sdl;
 	struct nd_opt_hdr *nd_opt;
 	char *lladdr;
 
 	rt_router = nd6_lookup(router_ll6, 0, ifp);
 	if (!rt_router)
 		goto nolladdropt;
 	len = sizeof(*nd_opt) + ifp->if_addrlen;
 	len = (len + 7) & ~7;	/* round by 8 */
 	/* safety check */
 	if (len + (p - (u_char *)ip6) > maxlen)
 		goto nolladdropt;
 	if (!(rt_router->rt_flags & RTF_GATEWAY) &&
 	    (rt_router->rt_flags & RTF_LLINFO) &&
 	    (rt_router->rt_gateway->sa_family == AF_LINK) &&
 	    (sdl = (struct sockaddr_dl *)rt_router->rt_gateway) &&
 	    sdl->sdl_alen) {
 		nd_opt = (struct nd_opt_hdr *)p;
 		nd_opt->nd_opt_type = ND_OPT_TARGET_LINKADDR;
 		nd_opt->nd_opt_len = len >> 3;
 		lladdr = (char *)(nd_opt + 1);
 		bcopy(LLADDR(sdl), lladdr, ifp->if_addrlen);
 		p += len;
 	}
     }
 nolladdropt:;
 
 	m->m_pkthdr.len = m->m_len = p - (u_char *)ip6;
 
 	/* just to be safe */
 #ifdef M_DECRYPTED	/*not openbsd*/
 	if (m0->m_flags & M_DECRYPTED)
 		goto noredhdropt;
 #endif
 	if (p - (u_char *)ip6 > maxlen)
 		goto noredhdropt;
 
     {
 	/* redirected header option */
 	int len;
 	struct nd_opt_rd_hdr *nd_opt_rh;
 
 	/*
 	 * compute the maximum size for icmp6 redirect header option.
 	 * XXX room for auth header?
 	 */
 	len = maxlen - (p - (u_char *)ip6);
 	len &= ~7;
 
 	/* This is just for simplicity. */
 	if (m0->m_pkthdr.len != m0->m_len) {
 		if (m0->m_next) {
 			m_freem(m0->m_next);
 			m0->m_next = NULL;
 		}
 		m0->m_pkthdr.len = m0->m_len;
 	}
 
 	/*
 	 * Redirected header option spec (RFC2461 4.6.3) talks nothing
 	 * about padding/truncate rule for the original IP packet.
 	 * From the discussion on IPv6imp in Feb 1999, the consensus was:
 	 * - "attach as much as possible" is the goal
 	 * - pad if not aligned (original size can be guessed by original
 	 *   ip6 header)
 	 * Following code adds the padding if it is simple enough,
 	 * and truncates if not.
 	 */
 	if (m0->m_next || m0->m_pkthdr.len != m0->m_len)
 		panic("assumption failed in %s:%d", __FILE__, __LINE__);
 
 	if (len - sizeof(*nd_opt_rh) < m0->m_pkthdr.len) {
 		/* not enough room, truncate */
 		m0->m_pkthdr.len = m0->m_len = len - sizeof(*nd_opt_rh);
 	} else {
 		/* enough room, pad or truncate */
 		size_t extra;
 
 		extra = m0->m_pkthdr.len % 8;
 		if (extra) {
 			/* pad if easy enough, truncate if not */
 			if (8 - extra <= M_TRAILINGSPACE(m0)) {
 				/* pad */
 				m0->m_len += (8 - extra);
 				m0->m_pkthdr.len += (8 - extra);
 			} else {
 				/* truncate */
 				m0->m_pkthdr.len -= extra;
 				m0->m_len -= extra;
 			}
 		}
 		len = m0->m_pkthdr.len + sizeof(*nd_opt_rh);
 		m0->m_pkthdr.len = m0->m_len = len - sizeof(*nd_opt_rh);
 	}
 
 	nd_opt_rh = (struct nd_opt_rd_hdr *)p;
 	bzero(nd_opt_rh, sizeof(*nd_opt_rh));
 	nd_opt_rh->nd_opt_rh_type = ND_OPT_REDIRECTED_HEADER;
 	nd_opt_rh->nd_opt_rh_len = len >> 3;
 	p += sizeof(*nd_opt_rh);
 	m->m_pkthdr.len = m->m_len = p - (u_char *)ip6;
 
 	/* connect m0 to m */
 	m_tag_delete_chain(m0, NULL);
 	m0->m_flags &= ~M_PKTHDR;
 	m->m_next = m0;
 	m->m_pkthdr.len = m->m_len + m0->m_len;
 	m0 = NULL;
     }
 noredhdropt:;
 	if (m0) {
 		m_freem(m0);
 		m0 = NULL;
 	}
 
 	if (IN6_IS_ADDR_LINKLOCAL(&sip6->ip6_src))
 		sip6->ip6_src.s6_addr16[1] = 0;
 	if (IN6_IS_ADDR_LINKLOCAL(&sip6->ip6_dst))
 		sip6->ip6_dst.s6_addr16[1] = 0;
 #if 0
 	if (IN6_IS_ADDR_LINKLOCAL(&ip6->ip6_src))
 		ip6->ip6_src.s6_addr16[1] = 0;
 	if (IN6_IS_ADDR_LINKLOCAL(&ip6->ip6_dst))
 		ip6->ip6_dst.s6_addr16[1] = 0;
 #endif
 	if (IN6_IS_ADDR_LINKLOCAL(&nd_rd->nd_rd_target))
 		nd_rd->nd_rd_target.s6_addr16[1] = 0;
 	if (IN6_IS_ADDR_LINKLOCAL(&nd_rd->nd_rd_dst))
 		nd_rd->nd_rd_dst.s6_addr16[1] = 0;
 
 	ip6->ip6_plen = htons(m->m_pkthdr.len - sizeof(struct ip6_hdr));
 
 	nd_rd->nd_rd_cksum = 0;
 	nd_rd->nd_rd_cksum
 		= in6_cksum(m, IPPROTO_ICMPV6, sizeof(*ip6), ntohs(ip6->ip6_plen));
 
 	/* send the packet to outside... */
 	ip6_output(m, NULL, NULL, 0, NULL, &outif, NULL);
 	if (outif) {
 		icmp6_ifstat_inc(outif, ifs6_out_msg);
 		icmp6_ifstat_inc(outif, ifs6_out_redirect);
 	}
 	icmp6stat.icp6s_outhist[ND_REDIRECT]++;
 
 	return;
 
 fail:
 	if (m)
 		m_freem(m);
 	if (m0)
 		m_freem(m0);
 }
 
 #ifdef HAVE_NRL_INPCB
 #define sotoin6pcb	sotoinpcb
 #define in6pcb		inpcb
 #define in6p_icmp6filt	inp_icmp6filt
 #endif
 /*
  * ICMPv6 socket option processing.
  */
 int
 icmp6_ctloutput(so, sopt)
 	struct socket *so;
 	struct sockopt *sopt;
 {
 	int error = 0;
 	int optlen;
 	struct inpcb *inp = sotoinpcb(so);
 	int level, op, optname;
 
 	if (sopt) {
 		level = sopt->sopt_level;
 		op = sopt->sopt_dir;
 		optname = sopt->sopt_name;
 		optlen = sopt->sopt_valsize;
 	} else
 		level = op = optname = optlen = 0;
 
 	if (level != IPPROTO_ICMPV6) {
 		return EINVAL;
 	}
 
 	switch (op) {
 	case PRCO_SETOPT:
 		switch (optname) {
 		case ICMP6_FILTER:
 		    {
 			struct icmp6_filter *p;
 
 			if (optlen != sizeof(*p)) {
 				error = EMSGSIZE;
 				break;
 			}
 			if (inp->in6p_icmp6filt == NULL) {
 				error = EINVAL;
 				break;
 			}
 			error = sooptcopyin(sopt, inp->in6p_icmp6filt, optlen,
 				optlen);
 			break;
 		    }
 
 		default:
 			error = ENOPROTOOPT;
 			break;
 		}
 		break;
 
 	case PRCO_GETOPT:
 		switch (optname) {
 		case ICMP6_FILTER:
 		    {
 			if (inp->in6p_icmp6filt == NULL) {
 				error = EINVAL;
 				break;
 			}
 			error = sooptcopyout(sopt, inp->in6p_icmp6filt,
 				sizeof(struct icmp6_filter));
 			break;
 		    }
 
 		default:
 			error = ENOPROTOOPT;
 			break;
 		}
 		break;
 	}
 
 	return(error);
 }
 #ifdef HAVE_NRL_INPCB
 #undef sotoin6pcb
 #undef in6pcb
 #undef in6p_icmp6filt
 #endif
 
 #ifndef HAVE_PPSRATECHECK
 #ifndef timersub
 #define	timersub(tvp, uvp, vvp)						\
 	do {								\
 		(vvp)->tv_sec = (tvp)->tv_sec - (uvp)->tv_sec;		\
 		(vvp)->tv_usec = (tvp)->tv_usec - (uvp)->tv_usec;	\
 		if ((vvp)->tv_usec < 0) {				\
 			(vvp)->tv_sec--;				\
 			(vvp)->tv_usec += 1000000;			\
 		}							\
 	} while (0)
 #endif
 
 /*
  * ppsratecheck(): packets (or events) per second limitation.
  */
 static int
 ppsratecheck(lasttime, curpps, maxpps)
 	struct timeval *lasttime;
 	int *curpps;
 	int maxpps;	/* maximum pps allowed */
 {
 	struct timeval tv, delta;
 	int s, rv;
 
 	s = splclock(); 
 	microtime(&tv);
 	splx(s);
 
 	timersub(&tv, lasttime, &delta);
 
 	/*
 	 * Check for 0,0 so that the message will be seen at least once.
 	 * If more than one second has passed since the last update of
 	 * lasttime, reset the counter.
 	 *
 	 * We do increment *curpps even in *curpps < maxpps case, as some may
 	 * try to use *curpps for stat purposes as well.
 	 */
 	if ((lasttime->tv_sec == 0 && lasttime->tv_usec == 0) ||
 	    delta.tv_sec >= 1) {
 		*lasttime = tv;
 		*curpps = 0;
 		rv = 1;
 	} else if (maxpps < 0)
 		rv = 1;
 	else if (*curpps < maxpps)
 		rv = 1;
 	else
 		rv = 0;
 
 #if 1 /* DIAGNOSTIC? */
 	/* be careful about wrap-around */
 	if (*curpps + 1 > *curpps)
 		*curpps = *curpps + 1;
 #else
 	/*
 	 * assume that there's not too many calls to this function.
 	 * not sure if the assumption holds, as it depends on *caller's*
 	 * behavior, not the behavior of this function.
 	 * IMHO it is wrong to make assumption on the caller's behavior,
 	 * so the above #if is #if 1, not #ifdef DIAGNOSTIC.
 	 */
 	*curpps = *curpps + 1;
 #endif
 
 	return (rv);
 }
 #endif
 
 /*
  * Perform rate limit check.
  * Returns 0 if it is okay to send the icmp6 packet.
  * Returns 1 if the router SHOULD NOT send this icmp6 packet due to rate
  * limitation.
  *
  * XXX per-destination/type check necessary?
  */
 static int
 icmp6_ratelimit(dst, type, code)
 	const struct in6_addr *dst;	/* not used at this moment */
 	const int type;			/* not used at this moment */
 	const int code;			/* not used at this moment */
 {
 	int ret;
 
 	ret = 0;	/* okay to send */
 
 	/* PPS limit */
 	if (!ppsratecheck(&icmp6errppslim_last, &icmp6errpps_count,
 	    icmp6errppslim)) {
 		/* The packet is subject to rate limit */
 		ret++;
 	}
 
 	return ret;
 }
Index: head/sys/netinet6/in6.c
===================================================================
--- head/sys/netinet6/in6.c	(revision 120726)
+++ head/sys/netinet6/in6.c	(revision 120727)
@@ -1,2455 +1,2461 @@
 /*	$FreeBSD$	*/
 /*	$KAME: in6.c,v 1.259 2002/01/21 11:37:50 keiichi Exp $	*/
 
 /*
  * Copyright (C) 1995, 1996, 1997, and 1998 WIDE Project.
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 3. Neither the name of the project nor the names of its contributors
  *    may be used to endorse or promote products derived from this software
  *    without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE PROJECT AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE PROJECT OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  */
 
 /*
  * Copyright (c) 1982, 1986, 1991, 1993
  *	The Regents of the University of California.  All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 3. All advertising materials mentioning features or use of this software
  *    must display the following acknowledgement:
  *	This product includes software developed by the University of
  *	California, Berkeley and its contributors.
  * 4. Neither the name of the University nor the names of its contributors
  *    may be used to endorse or promote products derived from this software
  *    without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  *	@(#)in.c	8.2 (Berkeley) 11/15/93
  */
 
 #include "opt_inet.h"
 #include "opt_inet6.h"
 
 #include <sys/param.h>
 #include <sys/errno.h>
 #include <sys/malloc.h>
 #include <sys/socket.h>
 #include <sys/socketvar.h>
 #include <sys/sockio.h>
 #include <sys/systm.h>
 #include <sys/proc.h>
 #include <sys/time.h>
 #include <sys/kernel.h>
 #include <sys/syslog.h>
 
 #include <net/if.h>
 #include <net/if_types.h>
 #include <net/route.h>
 #include <net/if_dl.h>
 
 #include <netinet/in.h>
 #include <netinet/in_var.h>
 #include <netinet/if_ether.h>
 #ifndef SCOPEDROUTING
 #include <netinet/in_systm.h>
 #include <netinet/ip.h>
 #include <netinet/in_pcb.h>
 #endif
 
 #include <netinet/ip6.h>
 #include <netinet6/ip6_var.h>
 #include <netinet6/nd6.h>
 #include <netinet6/mld6_var.h>
 #include <netinet6/ip6_mroute.h>
 #include <netinet6/in6_ifattach.h>
 #include <netinet6/scope6_var.h>
 #ifndef SCOPEDROUTING
 #include <netinet6/in6_pcb.h>
 #endif
 
 #include <net/net_osdep.h>
 
 MALLOC_DEFINE(M_IPMADDR, "in6_multi", "internet multicast address");
 
 /*
  * Definitions of some costant IP6 addresses.
  */
 const struct in6_addr in6addr_any = IN6ADDR_ANY_INIT;
 const struct in6_addr in6addr_loopback = IN6ADDR_LOOPBACK_INIT;
 const struct in6_addr in6addr_nodelocal_allnodes =
 	IN6ADDR_NODELOCAL_ALLNODES_INIT;
 const struct in6_addr in6addr_linklocal_allnodes =
 	IN6ADDR_LINKLOCAL_ALLNODES_INIT;
 const struct in6_addr in6addr_linklocal_allrouters =
 	IN6ADDR_LINKLOCAL_ALLROUTERS_INIT;
 
 const struct in6_addr in6mask0 = IN6MASK0;
 const struct in6_addr in6mask32 = IN6MASK32;
 const struct in6_addr in6mask64 = IN6MASK64;
 const struct in6_addr in6mask96 = IN6MASK96;
 const struct in6_addr in6mask128 = IN6MASK128;
 
 const struct sockaddr_in6 sa6_any = {sizeof(sa6_any), AF_INET6,
 				     0, 0, IN6ADDR_ANY_INIT, 0};
 
 static int in6_lifaddr_ioctl __P((struct socket *, u_long, caddr_t,
 	struct ifnet *, struct thread *));
 static int in6_ifinit __P((struct ifnet *, struct in6_ifaddr *,
 			   struct sockaddr_in6 *, int));
 static void in6_unlink_ifa __P((struct in6_ifaddr *, struct ifnet *));
 
 struct in6_multihead in6_multihead;	/* XXX BSS initialization */
 
 int	(*faithprefix_p)(struct in6_addr *);
 
 /*
  * Subroutine for in6_ifaddloop() and in6_ifremloop().
  * This routine does actual work.
  */
 static void
 in6_ifloop_request(int cmd, struct ifaddr *ifa)
 {
 	struct sockaddr_in6 all1_sa;
 	struct rtentry *nrt = NULL;
 	int e;
 	
 	bzero(&all1_sa, sizeof(all1_sa));
 	all1_sa.sin6_family = AF_INET6;
 	all1_sa.sin6_len = sizeof(struct sockaddr_in6);
 	all1_sa.sin6_addr = in6mask128;
 
 	/*
 	 * We specify the address itself as the gateway, and set the
 	 * RTF_LLINFO flag, so that the corresponding host route would have
 	 * the flag, and thus applications that assume traditional behavior
 	 * would be happy.  Note that we assume the caller of the function
 	 * (probably implicitly) set nd6_rtrequest() to ifa->ifa_rtrequest,
 	 * which changes the outgoing interface to the loopback interface.
 	 */
 	e = rtrequest(cmd, ifa->ifa_addr, ifa->ifa_addr,
 		      (struct sockaddr *)&all1_sa,
 		      RTF_UP|RTF_HOST|RTF_LLINFO, &nrt);
 	if (e != 0) {
 		log(LOG_ERR, "in6_ifloop_request: "
 		    "%s operation failed for %s (errno=%d)\n",
 		    cmd == RTM_ADD ? "ADD" : "DELETE",
 		    ip6_sprintf(&((struct in6_ifaddr *)ifa)->ia_addr.sin6_addr),
 		    e);
 	}
 
-	/*
-	 * Make sure rt_ifa be equal to IFA, the second argument of the
-	 * function.
-	 * We need this because when we refer to rt_ifa->ia6_flags in
-	 * ip6_input, we assume that the rt_ifa points to the address instead
-	 * of the loopback address.
-	 */
-	if (cmd == RTM_ADD && nrt && ifa != nrt->rt_ifa) {
-		IFAFREE(nrt->rt_ifa);
-		IFAREF(ifa);
-		nrt->rt_ifa = ifa;
-	}
-
-	/*
-	 * Report the addition/removal of the address to the routing socket.
-	 * XXX: since we called rtinit for a p2p interface with a destination,
-	 *      we end up reporting twice in such a case.  Should we rather
-	 *      omit the second report?
-	 */
 	if (nrt) {
+		RT_LOCK(nrt);
+		/*
+		 * Make sure rt_ifa be equal to IFA, the second argument of
+		 * the function.  We need this because when we refer to
+		 * rt_ifa->ia6_flags in ip6_input, we assume that the rt_ifa
+		 * points to the address instead of the loopback address.
+		 */
+		if (cmd == RTM_ADD && ifa != nrt->rt_ifa) {
+			IFAFREE(nrt->rt_ifa);
+			IFAREF(ifa);
+			nrt->rt_ifa = ifa;
+		}
+
+		/*
+		 * Report the addition/removal of the address to the routing
+		 * socket.
+		 *
+		 * XXX: since we called rtinit for a p2p interface with a
+		 *      destination, we end up reporting twice in such a case.
+		 *      Should we rather omit the second report?
+		 */
 		rt_newaddrmsg(cmd, ifa, e, nrt);
 		if (cmd == RTM_DELETE) {
-			RTFREE(nrt);
+			rtfree(nrt);
 		} else {
 			/* the cmd must be RTM_ADD here */
 			nrt->rt_refcnt--;
+			RT_UNLOCK(nrt);
 		}
 	}
 }
 
 /*
  * Add ownaddr as loopback rtentry.  We previously add the route only if
  * necessary (ex. on a p2p link).  However, since we now manage addresses
  * separately from prefixes, we should always add the route.  We can't
  * rely on the cloning mechanism from the corresponding interface route
  * any more.
  */
 static void
 in6_ifaddloop(struct ifaddr *ifa)
 {
 	struct rtentry *rt;
 
 	/* If there is no loopback entry, allocate one. */
 	rt = rtalloc1(ifa->ifa_addr, 0, 0);
 	if (rt == NULL || (rt->rt_flags & RTF_HOST) == 0 ||
 	    (rt->rt_ifp->if_flags & IFF_LOOPBACK) == 0)
 		in6_ifloop_request(RTM_ADD, ifa);
 	if (rt)
-		rt->rt_refcnt--;
+		rtfree(rt);
 }
 
 /*
  * Remove loopback rtentry of ownaddr generated by in6_ifaddloop(),
  * if it exists.
  */
 static void
 in6_ifremloop(struct ifaddr *ifa)
 {
 	struct in6_ifaddr *ia;
 	struct rtentry *rt;
 	int ia_count = 0;
 
 	/*
 	 * Some of BSD variants do not remove cloned routes
 	 * from an interface direct route, when removing the direct route
 	 * (see comments in net/net_osdep.h).  Even for variants that do remove
 	 * cloned routes, they could fail to remove the cloned routes when
 	 * we handle multple addresses that share a common prefix.
 	 * So, we should remove the route corresponding to the deleted address
 	 * regardless of the result of in6_is_ifloop_auto().
 	 */
 
 	/*
 	 * Delete the entry only if exact one ifa exists.  More than one ifa
 	 * can exist if we assign a same single address to multiple
 	 * (probably p2p) interfaces.
 	 * XXX: we should avoid such a configuration in IPv6...
 	 */
 	for (ia = in6_ifaddr; ia; ia = ia->ia_next) {
 		if (IN6_ARE_ADDR_EQUAL(IFA_IN6(ifa), &ia->ia_addr.sin6_addr)) {
 			ia_count++;
 			if (ia_count > 1)
 				break;
 		}
 	}
 
 	if (ia_count == 1) {
 		/*
 		 * Before deleting, check if a corresponding loopbacked host
 		 * route surely exists.  With this check, we can avoid to
 		 * delete an interface direct route whose destination is same
 		 * as the address being removed.  This can happen when remofing
 		 * a subnet-router anycast address on an interface attahced
 		 * to a shared medium.
 		 */
 		rt = rtalloc1(ifa->ifa_addr, 0, 0);
-		if (rt != NULL && (rt->rt_flags & RTF_HOST) != 0 &&
-		    (rt->rt_ifp->if_flags & IFF_LOOPBACK) != 0) {
-			rt->rt_refcnt--;
-			in6_ifloop_request(RTM_DELETE, ifa);
+		if (rt != NULL) {
+			if ((rt->rt_flags & RTF_HOST) != 0 &&
+			    (rt->rt_ifp->if_flags & IFF_LOOPBACK) != 0) {
+				rtfree(rt);
+				in6_ifloop_request(RTM_DELETE, ifa);
+			} else
+				RT_UNLOCK(rt);
 		}
 	}
 }
 
 int
 in6_ifindex2scopeid(idx)
 	int idx;
 {
 	struct ifnet *ifp;
 	struct ifaddr *ifa;
 	struct sockaddr_in6 *sin6;
 
 	if (idx < 0 || if_index < idx)
 		return -1;
 	ifp = ifnet_byindex(idx);
 
 	TAILQ_FOREACH(ifa, &ifp->if_addrlist, ifa_list)
 	{
 		if (ifa->ifa_addr->sa_family != AF_INET6)
 			continue;
 		sin6 = (struct sockaddr_in6 *)ifa->ifa_addr;
 		if (IN6_IS_ADDR_SITELOCAL(&sin6->sin6_addr))
 			return sin6->sin6_scope_id & 0xffff;
 	}
 
 	return -1;
 }
 
 int
 in6_mask2len(mask, lim0)
 	struct in6_addr *mask;
 	u_char *lim0;
 {
 	int x = 0, y;
 	u_char *lim = lim0, *p;
 
 	if (lim0 == NULL ||
 	    lim0 - (u_char *)mask > sizeof(*mask)) /* ignore the scope_id part */
 		lim = (u_char *)mask + sizeof(*mask);
 	for (p = (u_char *)mask; p < lim; x++, p++) {
 		if (*p != 0xff)
 			break;
 	}
 	y = 0;
 	if (p < lim) {
 		for (y = 0; y < 8; y++) {
 			if ((*p & (0x80 >> y)) == 0)
 				break;
 		}
 	}
 
 	/*
 	 * when the limit pointer is given, do a stricter check on the
 	 * remaining bits.
 	 */
 	if (p < lim) {
 		if (y != 0 && (*p & (0x00ff >> y)) != 0)
 			return(-1);
 		for (p = p + 1; p < lim; p++)
 			if (*p != 0)
 				return(-1);
 	}
 	
 	return x * 8 + y;
 }
 
 void
 in6_len2mask(mask, len)
 	struct in6_addr *mask;
 	int len;
 {
 	int i;
 
 	bzero(mask, sizeof(*mask));
 	for (i = 0; i < len / 8; i++)
 		mask->s6_addr8[i] = 0xff;
 	if (len % 8)
 		mask->s6_addr8[i] = (0xff00 >> (len % 8)) & 0xff;
 }
 
 #define ifa2ia6(ifa)	((struct in6_ifaddr *)(ifa))
 #define ia62ifa(ia6)	(&((ia6)->ia_ifa))
 
 int
 in6_control(so, cmd, data, ifp, td)
 	struct	socket *so;
 	u_long cmd;
 	caddr_t	data;
 	struct ifnet *ifp;
 	struct thread *td;
 {
 	struct	in6_ifreq *ifr = (struct in6_ifreq *)data;
 	struct	in6_ifaddr *ia = NULL;
 	struct	in6_aliasreq *ifra = (struct in6_aliasreq *)data;
 	int privileged;
 
 	privileged = 0;
 	if (td == NULL || !suser(td))
 		privileged++;
 
 	switch (cmd) {
 	case SIOCGETSGCNT_IN6:
 	case SIOCGETMIFCNT_IN6:
 		return (mrt6_ioctl(cmd, data));
 	}
 
 	if (ifp == NULL)
 		return(EOPNOTSUPP);
 
 	switch (cmd) {
 	case SIOCSNDFLUSH_IN6:
 	case SIOCSPFXFLUSH_IN6:
 	case SIOCSRTRFLUSH_IN6:
 	case SIOCSDEFIFACE_IN6:
 	case SIOCSIFINFO_FLAGS:
 		if (!privileged)
 			return(EPERM);
 		/* fall through */
 	case OSIOCGIFINFO_IN6:
 	case SIOCGIFINFO_IN6:
 	case SIOCGDRLST_IN6:
 	case SIOCGPRLST_IN6:
 	case SIOCGNBRINFO_IN6:
 	case SIOCGDEFIFACE_IN6:
 		return(nd6_ioctl(cmd, data, ifp));
 	}
 
 	switch (cmd) {
 	case SIOCSIFPREFIX_IN6:
 	case SIOCDIFPREFIX_IN6:
 	case SIOCAIFPREFIX_IN6:
 	case SIOCCIFPREFIX_IN6:
 	case SIOCSGIFPREFIX_IN6:
 	case SIOCGIFPREFIX_IN6:
 		log(LOG_NOTICE,
 		    "prefix ioctls are now invalidated. "
 		    "please use ifconfig.\n");
 		return(EOPNOTSUPP);
 	}
 
 	switch (cmd) {
 	case SIOCSSCOPE6:
 		if (!privileged)
 			return(EPERM);
 		return(scope6_set(ifp, ifr->ifr_ifru.ifru_scope_id));
 		break;
 	case SIOCGSCOPE6:
 		return(scope6_get(ifp, ifr->ifr_ifru.ifru_scope_id));
 		break;
 	case SIOCGSCOPE6DEF:
 		return(scope6_get_default(ifr->ifr_ifru.ifru_scope_id));
 		break;
 	}
 
 	switch (cmd) {
 	case SIOCALIFADDR:
 	case SIOCDLIFADDR:
 		if (!privileged)
 			return(EPERM);
 		/* fall through */
 	case SIOCGLIFADDR:
 		return in6_lifaddr_ioctl(so, cmd, data, ifp, td);
 	}
 
 	/*
 	 * Find address for this interface, if it exists.
 	 */
 	if (ifra->ifra_addr.sin6_family == AF_INET6) { /* XXX */
 		struct sockaddr_in6 *sa6 =
 			(struct sockaddr_in6 *)&ifra->ifra_addr;
 
 		if (IN6_IS_ADDR_LINKLOCAL(&sa6->sin6_addr)) {
 			if (sa6->sin6_addr.s6_addr16[1] == 0) {
 				/* link ID is not embedded by the user */
 				sa6->sin6_addr.s6_addr16[1] =
 					htons(ifp->if_index);
 			} else if (sa6->sin6_addr.s6_addr16[1] !=
 				    htons(ifp->if_index)) {
 				return(EINVAL);	/* link ID contradicts */
 			}
 			if (sa6->sin6_scope_id) {
 				if (sa6->sin6_scope_id !=
 				    (u_int32_t)ifp->if_index)
 					return(EINVAL);
 				sa6->sin6_scope_id = 0; /* XXX: good way? */
 			}
 		}
 		ia = in6ifa_ifpwithaddr(ifp, &ifra->ifra_addr.sin6_addr);
 	}
 
 	switch (cmd) {
 	case SIOCSIFADDR_IN6:
 	case SIOCSIFDSTADDR_IN6:
 	case SIOCSIFNETMASK_IN6:
 		/*
 		 * Since IPv6 allows a node to assign multiple addresses
 		 * on a single interface, SIOCSIFxxx ioctls are not suitable
 		 * and should be unused.
 		 */
 		/* we decided to obsolete this command (20000704) */
 		return(EINVAL);
 
 	case SIOCDIFADDR_IN6:
 		/*
 		 * for IPv4, we look for existing in_ifaddr here to allow
 		 * "ifconfig if0 delete" to remove first IPv4 address on the
 		 * interface.  For IPv6, as the spec allow multiple interface
 		 * address from the day one, we consider "remove the first one"
 		 * semantics to be not preferable.
 		 */
 		if (ia == NULL)
 			return(EADDRNOTAVAIL);
 		/* FALLTHROUGH */
 	case SIOCAIFADDR_IN6:
 		/*
 		 * We always require users to specify a valid IPv6 address for
 		 * the corresponding operation.
 		 */
 		if (ifra->ifra_addr.sin6_family != AF_INET6 ||
 		    ifra->ifra_addr.sin6_len != sizeof(struct sockaddr_in6))
 			return(EAFNOSUPPORT);
 		if (!privileged)
 			return(EPERM);
 
 		break;
 
 	case SIOCGIFADDR_IN6:
 		/* This interface is basically deprecated. use SIOCGIFCONF. */
 		/* fall through */
 	case SIOCGIFAFLAG_IN6:
 	case SIOCGIFNETMASK_IN6:
 	case SIOCGIFDSTADDR_IN6:
 	case SIOCGIFALIFETIME_IN6:
 		/* must think again about its semantics */
 		if (ia == NULL)
 			return(EADDRNOTAVAIL);
 		break;
 	case SIOCSIFALIFETIME_IN6:
 	    {
 		struct in6_addrlifetime *lt;
 
 		if (!privileged)
 			return(EPERM);
 		if (ia == NULL)
 			return(EADDRNOTAVAIL);
 		/* sanity for overflow - beware unsigned */
 		lt = &ifr->ifr_ifru.ifru_lifetime;
 		if (lt->ia6t_vltime != ND6_INFINITE_LIFETIME
 		 && lt->ia6t_vltime + time_second < time_second) {
 			return EINVAL;
 		}
 		if (lt->ia6t_pltime != ND6_INFINITE_LIFETIME
 		 && lt->ia6t_pltime + time_second < time_second) {
 			return EINVAL;
 		}
 		break;
 	    }
 	}
 
 	switch (cmd) {
 
 	case SIOCGIFADDR_IN6:
 		ifr->ifr_addr = ia->ia_addr;
 		break;
 
 	case SIOCGIFDSTADDR_IN6:
 		if ((ifp->if_flags & IFF_POINTOPOINT) == 0)
 			return(EINVAL);
 		/*
 		 * XXX: should we check if ifa_dstaddr is NULL and return
 		 * an error?
 		 */
 		ifr->ifr_dstaddr = ia->ia_dstaddr;
 		break;
 
 	case SIOCGIFNETMASK_IN6:
 		ifr->ifr_addr = ia->ia_prefixmask;
 		break;
 
 	case SIOCGIFAFLAG_IN6:
 		ifr->ifr_ifru.ifru_flags6 = ia->ia6_flags;
 		break;
 
 	case SIOCGIFSTAT_IN6:
 		if (ifp == NULL)
 			return EINVAL;
 		if (in6_ifstat == NULL || ifp->if_index >= in6_ifstatmax
 		 || in6_ifstat[ifp->if_index] == NULL) {
 			/* return EAFNOSUPPORT? */
 			bzero(&ifr->ifr_ifru.ifru_stat,
 				sizeof(ifr->ifr_ifru.ifru_stat));
 		} else
 			ifr->ifr_ifru.ifru_stat = *in6_ifstat[ifp->if_index];
 		break;
 
 	case SIOCGIFSTAT_ICMP6:
 		if (ifp == NULL)
 			return EINVAL;
 		if (icmp6_ifstat == NULL || ifp->if_index >= icmp6_ifstatmax ||
 		    icmp6_ifstat[ifp->if_index] == NULL) {
 			/* return EAFNOSUPPORT? */
 			bzero(&ifr->ifr_ifru.ifru_stat,
 				sizeof(ifr->ifr_ifru.ifru_icmp6stat));
 		} else
 			ifr->ifr_ifru.ifru_icmp6stat =
 				*icmp6_ifstat[ifp->if_index];
 		break;
 
 	case SIOCGIFALIFETIME_IN6:
 		ifr->ifr_ifru.ifru_lifetime = ia->ia6_lifetime;
 		break;
 
 	case SIOCSIFALIFETIME_IN6:
 		ia->ia6_lifetime = ifr->ifr_ifru.ifru_lifetime;
 		/* for sanity */
 		if (ia->ia6_lifetime.ia6t_vltime != ND6_INFINITE_LIFETIME) {
 			ia->ia6_lifetime.ia6t_expire =
 				time_second + ia->ia6_lifetime.ia6t_vltime;
 		} else
 			ia->ia6_lifetime.ia6t_expire = 0;
 		if (ia->ia6_lifetime.ia6t_pltime != ND6_INFINITE_LIFETIME) {
 			ia->ia6_lifetime.ia6t_preferred =
 				time_second + ia->ia6_lifetime.ia6t_pltime;
 		} else
 			ia->ia6_lifetime.ia6t_preferred = 0;
 		break;
 
 	case SIOCAIFADDR_IN6:
 	{
 		int i, error = 0;
 		struct nd_prefix pr0, *pr;
 
 		/*
 		 * first, make or update the interface address structure,
 		 * and link it to the list.
 		 */
 		if ((error = in6_update_ifa(ifp, ifra, ia)) != 0)
 			return(error);
 
 		/*
 		 * then, make the prefix on-link on the interface.
 		 * XXX: we'd rather create the prefix before the address, but
 		 * we need at least one address to install the corresponding
 		 * interface route, so we configure the address first.
 		 */
 
 		/*
 		 * convert mask to prefix length (prefixmask has already
 		 * been validated in in6_update_ifa().
 		 */
 		bzero(&pr0, sizeof(pr0));
 		pr0.ndpr_ifp = ifp;
 		pr0.ndpr_plen = in6_mask2len(&ifra->ifra_prefixmask.sin6_addr,
 					     NULL);
 		if (pr0.ndpr_plen == 128)
 			break;	/* we don't need to install a host route. */
 		pr0.ndpr_prefix = ifra->ifra_addr;
 		pr0.ndpr_mask = ifra->ifra_prefixmask.sin6_addr;
 		/* apply the mask for safety. */
 		for (i = 0; i < 4; i++) {
 			pr0.ndpr_prefix.sin6_addr.s6_addr32[i] &=
 				ifra->ifra_prefixmask.sin6_addr.s6_addr32[i];
 		}
 		/*
 		 * XXX: since we don't have an API to set prefix (not address)
 		 * lifetimes, we just use the same lifetimes as addresses.
 		 * The (temporarily) installed lifetimes can be overridden by
 		 * later advertised RAs (when accept_rtadv is non 0), which is
 		 * an intended behavior.
 		 */
 		pr0.ndpr_raf_onlink = 1; /* should be configurable? */
 		pr0.ndpr_raf_auto =
 			((ifra->ifra_flags & IN6_IFF_AUTOCONF) != 0);
 		pr0.ndpr_vltime = ifra->ifra_lifetime.ia6t_vltime;
 		pr0.ndpr_pltime = ifra->ifra_lifetime.ia6t_pltime;
 
 		/* add the prefix if there's one. */
 		if ((pr = nd6_prefix_lookup(&pr0)) == NULL) {
 			/*
 			 * nd6_prelist_add will install the corresponding
 			 * interface route.
 			 */
 			if ((error = nd6_prelist_add(&pr0, NULL, &pr)) != 0)
 				return(error);
 			if (pr == NULL) {
 				log(LOG_ERR, "nd6_prelist_add succedded but "
 				    "no prefix\n");
 				return(EINVAL); /* XXX panic here? */
 			}
 		}
 		if ((ia = in6ifa_ifpwithaddr(ifp, &ifra->ifra_addr.sin6_addr))
 		    == NULL) {
 		    	/* XXX: this should not happen! */
 			log(LOG_ERR, "in6_control: addition succeeded, but"
 			    " no ifaddr\n");
 		} else {
 			if ((ia->ia6_flags & IN6_IFF_AUTOCONF) != 0 &&
 			    ia->ia6_ndpr == NULL) { /* new autoconfed addr */
 				ia->ia6_ndpr = pr;
 				pr->ndpr_refcnt++;
 
 				/*
 				 * If this is the first autoconf address from
 				 * the prefix, create a temporary address
 				 * as well (when specified).
 				 */
 				if (ip6_use_tempaddr &&
 				    pr->ndpr_refcnt == 1) {
 					int e;
 					if ((e = in6_tmpifadd(ia, 1)) != 0) {
 						log(LOG_NOTICE, "in6_control: "
 						    "failed to create a "
 						    "temporary address, "
 						    "errno=%d\n",
 						    e);
 					}
 				}
 			}
 
 			/*
 			 * this might affect the status of autoconfigured
 			 * addresses, that is, this address might make
 			 * other addresses detached.
 			 */
 			pfxlist_onlink_check();
 		}
 		break;
 	}
 
 	case SIOCDIFADDR_IN6:
 	{
 		int i = 0;
 		struct nd_prefix pr0, *pr;
 
 		/*
 		 * If the address being deleted is the only one that owns
 		 * the corresponding prefix, expire the prefix as well.
 		 * XXX: theoretically, we don't have to warry about such
 		 * relationship, since we separate the address management
 		 * and the prefix management.  We do this, however, to provide
 		 * as much backward compatibility as possible in terms of
 		 * the ioctl operation.
 		 */
 		bzero(&pr0, sizeof(pr0));
 		pr0.ndpr_ifp = ifp;
 		pr0.ndpr_plen = in6_mask2len(&ia->ia_prefixmask.sin6_addr,
 					     NULL);
 		if (pr0.ndpr_plen == 128)
 			goto purgeaddr;
 		pr0.ndpr_prefix = ia->ia_addr;
 		pr0.ndpr_mask = ia->ia_prefixmask.sin6_addr;
 		for (i = 0; i < 4; i++) {
 			pr0.ndpr_prefix.sin6_addr.s6_addr32[i] &=
 				ia->ia_prefixmask.sin6_addr.s6_addr32[i];
 		}
 		/*
 		 * The logic of the following condition is a bit complicated.
 		 * We expire the prefix when
 		 * 1. the address obeys autoconfiguration and it is the
 		 *    only owner of the associated prefix, or
 		 * 2. the address does not obey autoconf and there is no
 		 *    other owner of the prefix.
 		 */
 		if ((pr = nd6_prefix_lookup(&pr0)) != NULL &&
 		    (((ia->ia6_flags & IN6_IFF_AUTOCONF) != 0 &&
 		      pr->ndpr_refcnt == 1) ||
 		     ((ia->ia6_flags & IN6_IFF_AUTOCONF) == 0 &&
 		      pr->ndpr_refcnt == 0))) {
 			pr->ndpr_expire = 1; /* XXX: just for expiration */
 		}
 
 	  purgeaddr:
 		in6_purgeaddr(&ia->ia_ifa);
 		break;
 	}
 
 	default:
 		if (ifp == NULL || ifp->if_ioctl == 0)
 			return(EOPNOTSUPP);
 		return((*ifp->if_ioctl)(ifp, cmd, data));
 	}
 
 	return(0);
 }
 
 /*
  * Update parameters of an IPv6 interface address.
  * If necessary, a new entry is created and linked into address chains.
  * This function is separated from in6_control().
  * XXX: should this be performed under splnet()?
  */
 int
 in6_update_ifa(ifp, ifra, ia)
 	struct ifnet *ifp;
 	struct in6_aliasreq *ifra;
 	struct in6_ifaddr *ia;
 {
 	int error = 0, hostIsNew = 0, plen = -1;
 	struct in6_ifaddr *oia;
 	struct sockaddr_in6 dst6;
 	struct in6_addrlifetime *lt;
 
 	/* Validate parameters */
 	if (ifp == NULL || ifra == NULL) /* this maybe redundant */
 		return(EINVAL);
 
 	/*
 	 * The destination address for a p2p link must have a family
 	 * of AF_UNSPEC or AF_INET6.
 	 */
 	if ((ifp->if_flags & IFF_POINTOPOINT) != 0 &&
 	    ifra->ifra_dstaddr.sin6_family != AF_INET6 &&
 	    ifra->ifra_dstaddr.sin6_family != AF_UNSPEC)
 		return(EAFNOSUPPORT);
 	/*
 	 * validate ifra_prefixmask.  don't check sin6_family, netmask
 	 * does not carry fields other than sin6_len.
 	 */
 	if (ifra->ifra_prefixmask.sin6_len > sizeof(struct sockaddr_in6))
 		return(EINVAL);
 	/*
 	 * Because the IPv6 address architecture is classless, we require
 	 * users to specify a (non 0) prefix length (mask) for a new address.
 	 * We also require the prefix (when specified) mask is valid, and thus
 	 * reject a non-consecutive mask.
 	 */
 	if (ia == NULL && ifra->ifra_prefixmask.sin6_len == 0)
 		return(EINVAL);
 	if (ifra->ifra_prefixmask.sin6_len != 0) {
 		plen = in6_mask2len(&ifra->ifra_prefixmask.sin6_addr,
 				    (u_char *)&ifra->ifra_prefixmask +
 				    ifra->ifra_prefixmask.sin6_len);
 		if (plen <= 0)
 			return(EINVAL);
 	}
 	else {
 		/*
 		 * In this case, ia must not be NULL.  We just use its prefix
 		 * length.
 		 */
 		plen = in6_mask2len(&ia->ia_prefixmask.sin6_addr, NULL);
 	}
 	/*
 	 * If the destination address on a p2p interface is specified,
 	 * and the address is a scoped one, validate/set the scope
 	 * zone identifier.
 	 */
 	dst6 = ifra->ifra_dstaddr;
 	if ((ifp->if_flags & (IFF_POINTOPOINT|IFF_LOOPBACK)) &&
 	    (dst6.sin6_family == AF_INET6)) {
 		int scopeid;
 
 #ifndef SCOPEDROUTING
 		if ((error = in6_recoverscope(&dst6,
 					      &ifra->ifra_dstaddr.sin6_addr,
 					      ifp)) != 0)
 			return(error);
 #endif
 		scopeid = in6_addr2scopeid(ifp, &dst6.sin6_addr);
 		if (dst6.sin6_scope_id == 0) /* user omit to specify the ID. */
 			dst6.sin6_scope_id = scopeid;
 		else if (dst6.sin6_scope_id != scopeid)
 			return(EINVAL); /* scope ID mismatch. */
 #ifndef SCOPEDROUTING
 		if ((error = in6_embedscope(&dst6.sin6_addr, &dst6, NULL, NULL))
 		    != 0)
 			return(error);
 		dst6.sin6_scope_id = 0; /* XXX */
 #endif
 	}
 	/*
 	 * The destination address can be specified only for a p2p or a
 	 * loopback interface.  If specified, the corresponding prefix length
 	 * must be 128.
 	 */
 	if (ifra->ifra_dstaddr.sin6_family == AF_INET6) {
 		if ((ifp->if_flags & (IFF_POINTOPOINT|IFF_LOOPBACK)) == 0) {
 			/* XXX: noisy message */
 			log(LOG_INFO, "in6_update_ifa: a destination can be "
 			    "specified for a p2p or a loopback IF only\n");
 			return(EINVAL);
 		}
 		if (plen != 128) {
 			/*
 			 * The following message seems noisy, but we dare to
 			 * add it for diagnosis.
 			 */
 			log(LOG_INFO, "in6_update_ifa: prefixlen must be 128 "
 			    "when dstaddr is specified\n");
 			return(EINVAL);
 		}
 	}
 	/* lifetime consistency check */
 	lt = &ifra->ifra_lifetime;
 	if (lt->ia6t_vltime != ND6_INFINITE_LIFETIME
 	    && lt->ia6t_vltime + time_second < time_second) {
 		return EINVAL;
 	}
 	if (lt->ia6t_vltime == 0) {
 		/*
 		 * the following log might be noisy, but this is a typical
 		 * configuration mistake or a tool's bug.
 		 */
 		log(LOG_INFO,
 		    "in6_update_ifa: valid lifetime is 0 for %s\n",
 		    ip6_sprintf(&ifra->ifra_addr.sin6_addr));
 	}
 	if (lt->ia6t_pltime != ND6_INFINITE_LIFETIME
 	    && lt->ia6t_pltime + time_second < time_second) {
 		return EINVAL;
 	}
 
 	/*
 	 * If this is a new address, allocate a new ifaddr and link it
 	 * into chains.
 	 */
 	if (ia == NULL) {
 		hostIsNew = 1;
 		/*
 		 * When in6_update_ifa() is called in a process of a received
 		 * RA, it is called under splnet().  So, we should call malloc
 		 * with M_NOWAIT.
 		 */
 		ia = (struct in6_ifaddr *)
 			malloc(sizeof(*ia), M_IFADDR, M_NOWAIT);
 		if (ia == NULL)
 			return (ENOBUFS);
 		bzero((caddr_t)ia, sizeof(*ia));
 		/* Initialize the address and masks */
 		IFA_LOCK_INIT(&ia->ia_ifa);
 		ia->ia_ifa.ifa_addr = (struct sockaddr *)&ia->ia_addr;
 		ia->ia_addr.sin6_family = AF_INET6;
 		ia->ia_addr.sin6_len = sizeof(ia->ia_addr);
 		if ((ifp->if_flags & (IFF_POINTOPOINT | IFF_LOOPBACK)) != 0) {
 			/*
 			 * XXX: some functions expect that ifa_dstaddr is not
 			 * NULL for p2p interfaces.
 			 */
 			ia->ia_ifa.ifa_dstaddr
 				= (struct sockaddr *)&ia->ia_dstaddr;
 		} else {
 			ia->ia_ifa.ifa_dstaddr = NULL;
 		}
 		ia->ia_ifa.ifa_netmask = (struct sockaddr *)&ia->ia_prefixmask;
 
 		ia->ia_ifp = ifp;
 		if ((oia = in6_ifaddr) != NULL) {
 			for ( ; oia->ia_next; oia = oia->ia_next)
 				continue;
 			oia->ia_next = ia;
 		} else
 			in6_ifaddr = ia;
 
 		ia->ia_ifa.ifa_refcnt = 1;
 		TAILQ_INSERT_TAIL(&ifp->if_addrlist, &ia->ia_ifa, ifa_list);
 	}
 
 	/* set prefix mask */
 	if (ifra->ifra_prefixmask.sin6_len) {
 		/*
 		 * We prohibit changing the prefix length of an existing
 		 * address, because
 		 * + such an operation should be rare in IPv6, and
 		 * + the operation would confuse prefix management.
 		 */
 		if (ia->ia_prefixmask.sin6_len &&
 		    in6_mask2len(&ia->ia_prefixmask.sin6_addr, NULL) != plen) {
 			log(LOG_INFO, "in6_update_ifa: the prefix length of an"
 			    " existing (%s) address should not be changed\n",
 			    ip6_sprintf(&ia->ia_addr.sin6_addr));
 			error = EINVAL;
 			goto unlink;
 		}
 		ia->ia_prefixmask = ifra->ifra_prefixmask;
 	}
 
 	/*
 	 * If a new destination address is specified, scrub the old one and
 	 * install the new destination.  Note that the interface must be
 	 * p2p or loopback (see the check above.) 
 	 */
 	if (dst6.sin6_family == AF_INET6 &&
 	    !IN6_ARE_ADDR_EQUAL(&dst6.sin6_addr,
 				&ia->ia_dstaddr.sin6_addr)) {
 		int e;
 
 		if ((ia->ia_flags & IFA_ROUTE) != 0 &&
 		    (e = rtinit(&(ia->ia_ifa), (int)RTM_DELETE, RTF_HOST))
 		    != 0) {
 			log(LOG_ERR, "in6_update_ifa: failed to remove "
 			    "a route to the old destination: %s\n",
 			    ip6_sprintf(&ia->ia_addr.sin6_addr));
 			/* proceed anyway... */
 		}
 		else
 			ia->ia_flags &= ~IFA_ROUTE;
 		ia->ia_dstaddr = dst6;
 	}
 
 	/* reset the interface and routing table appropriately. */
 	if ((error = in6_ifinit(ifp, ia, &ifra->ifra_addr, hostIsNew)) != 0)
 		goto unlink;
 
 	/*
 	 * Beyond this point, we should call in6_purgeaddr upon an error,
 	 * not just go to unlink. 
 	 */
 
 #if 0				/* disable this mechanism for now */
 	/* update prefix list */
 	if (hostIsNew &&
 	    (ifra->ifra_flags & IN6_IFF_NOPFX) == 0) { /* XXX */
 		int iilen;
 
 		iilen = (sizeof(ia->ia_prefixmask.sin6_addr) << 3) - plen;
 		if ((error = in6_prefix_add_ifid(iilen, ia)) != 0) {
 			in6_purgeaddr((struct ifaddr *)ia);
 			return(error);
 		}
 	}
 #endif
 
 	if ((ifp->if_flags & IFF_MULTICAST) != 0) {
 		struct sockaddr_in6 mltaddr, mltmask;
 		struct in6_multi *in6m;
 
 		if (hostIsNew) {
 			/*
 			 * join solicited multicast addr for new host id
 			 */
 			struct in6_addr llsol;
 			bzero(&llsol, sizeof(struct in6_addr));
 			llsol.s6_addr16[0] = htons(0xff02);
 			llsol.s6_addr16[1] = htons(ifp->if_index);
 			llsol.s6_addr32[1] = 0;
 			llsol.s6_addr32[2] = htonl(1);
 			llsol.s6_addr32[3] =
 				ifra->ifra_addr.sin6_addr.s6_addr32[3];
 			llsol.s6_addr8[12] = 0xff;
 			(void)in6_addmulti(&llsol, ifp, &error);
 			if (error != 0) {
 				log(LOG_WARNING,
 				    "in6_update_ifa: addmulti failed for "
 				    "%s on %s (errno=%d)\n",
 				    ip6_sprintf(&llsol), if_name(ifp),
 				    error);
 				in6_purgeaddr((struct ifaddr *)ia);
 				return(error);
 			}
 		}
 
 		bzero(&mltmask, sizeof(mltmask));
 		mltmask.sin6_len = sizeof(struct sockaddr_in6);
 		mltmask.sin6_family = AF_INET6;
 		mltmask.sin6_addr = in6mask32;
 
 		/*
 		 * join link-local all-nodes address
 		 */
 		bzero(&mltaddr, sizeof(mltaddr));
 		mltaddr.sin6_len = sizeof(struct sockaddr_in6);
 		mltaddr.sin6_family = AF_INET6;
 		mltaddr.sin6_addr = in6addr_linklocal_allnodes;
 		mltaddr.sin6_addr.s6_addr16[1] = htons(ifp->if_index);
 
 		IN6_LOOKUP_MULTI(mltaddr.sin6_addr, ifp, in6m);
 		if (in6m == NULL) {
 			rtrequest(RTM_ADD,
 				  (struct sockaddr *)&mltaddr,
 				  (struct sockaddr *)&ia->ia_addr,
 				  (struct sockaddr *)&mltmask,
 				  RTF_UP|RTF_CLONING,  /* xxx */
 				  (struct rtentry **)0);
 			(void)in6_addmulti(&mltaddr.sin6_addr, ifp, &error);
 			if (error != 0) {
 				log(LOG_WARNING,
 				    "in6_update_ifa: addmulti failed for "
 				    "%s on %s (errno=%d)\n",
 				    ip6_sprintf(&mltaddr.sin6_addr), 
 				    if_name(ifp), error);
 			}
 		}
 
 		/*
 		 * join node information group address
 		 */
 #define hostnamelen	strlen(hostname)
 		if (in6_nigroup(ifp, hostname, hostnamelen, &mltaddr.sin6_addr)
 		    == 0) {
 			IN6_LOOKUP_MULTI(mltaddr.sin6_addr, ifp, in6m);
 			if (in6m == NULL && ia != NULL) {
 				(void)in6_addmulti(&mltaddr.sin6_addr,
 				    ifp, &error);
 				if (error != 0) {
 					log(LOG_WARNING, "in6_update_ifa: "
 					    "addmulti failed for "
 					    "%s on %s (errno=%d)\n",
 					    ip6_sprintf(&mltaddr.sin6_addr), 
 					    if_name(ifp), error);
 				}
 			}
 		}
 #undef hostnamelen
 
 		/*
 		 * join node-local all-nodes address, on loopback.
 		 * XXX: since "node-local" is obsoleted by interface-local,
 		 *      we have to join the group on every interface with
 		 *      some interface-boundary restriction.
 		 */
 		if (ifp->if_flags & IFF_LOOPBACK) {
 			struct in6_ifaddr *ia_loop;
 
 			struct in6_addr loop6 = in6addr_loopback;
 			ia_loop = in6ifa_ifpwithaddr(ifp, &loop6);
 
 			mltaddr.sin6_addr = in6addr_nodelocal_allnodes;
 
 			IN6_LOOKUP_MULTI(mltaddr.sin6_addr, ifp, in6m);
 			if (in6m == NULL && ia_loop != NULL) {
 				rtrequest(RTM_ADD,
 					  (struct sockaddr *)&mltaddr,
 					  (struct sockaddr *)&ia_loop->ia_addr,
 					  (struct sockaddr *)&mltmask,
 					  RTF_UP,
 					  (struct rtentry **)0);
 				(void)in6_addmulti(&mltaddr.sin6_addr, ifp,
 						   &error);
 				if (error != 0) {
 					log(LOG_WARNING, "in6_update_ifa: "
 					    "addmulti failed for %s on %s "
 					    "(errno=%d)\n",
 					    ip6_sprintf(&mltaddr.sin6_addr), 
 					    if_name(ifp), error);
 				}
 			}
 		}
 	}
 
 	ia->ia6_flags = ifra->ifra_flags;
 	ia->ia6_flags &= ~IN6_IFF_DUPLICATED;	/*safety*/
 	ia->ia6_flags &= ~IN6_IFF_NODAD;	/* Mobile IPv6 */
 
 	ia->ia6_lifetime = ifra->ifra_lifetime;
 	/* for sanity */
 	if (ia->ia6_lifetime.ia6t_vltime != ND6_INFINITE_LIFETIME) {
 		ia->ia6_lifetime.ia6t_expire =
 			time_second + ia->ia6_lifetime.ia6t_vltime;
 	} else
 		ia->ia6_lifetime.ia6t_expire = 0;
 	if (ia->ia6_lifetime.ia6t_pltime != ND6_INFINITE_LIFETIME) {
 		ia->ia6_lifetime.ia6t_preferred =
 			time_second + ia->ia6_lifetime.ia6t_pltime;
 	} else
 		ia->ia6_lifetime.ia6t_preferred = 0;
 
 	/*
 	 * make sure to initialize ND6 information.  this is to workaround
 	 * issues with interfaces with IPv6 addresses, which have never brought
 	 * up.  We are assuming that it is safe to nd6_ifattach multiple times.
 	 */
 	nd6_ifattach(ifp);
 
 	/*
 	 * Perform DAD, if needed.
 	 * XXX It may be of use, if we can administratively
 	 * disable DAD.
 	 */
 	if (in6if_do_dad(ifp) && (ifra->ifra_flags & IN6_IFF_NODAD) == 0) {
 		ia->ia6_flags |= IN6_IFF_TENTATIVE;
 		nd6_dad_start((struct ifaddr *)ia, NULL);
 	}
 
 	return(error);
 
   unlink:
 	/*
 	 * XXX: if a change of an existing address failed, keep the entry
 	 * anyway.
 	 */
 	if (hostIsNew)
 		in6_unlink_ifa(ia, ifp);
 	return(error);
 }
 
 void
 in6_purgeaddr(ifa)
 	struct ifaddr *ifa;
 {
 	struct ifnet *ifp = ifa->ifa_ifp;
 	struct in6_ifaddr *ia = (struct in6_ifaddr *) ifa;
 
 	/* stop DAD processing */
 	nd6_dad_stop(ifa);
 
 	/*
 	 * delete route to the destination of the address being purged.
 	 * The interface must be p2p or loopback in this case.
 	 */
 	if ((ia->ia_flags & IFA_ROUTE) != 0 && ia->ia_dstaddr.sin6_len != 0) {
 		int e;
 
 		if ((e = rtinit(&(ia->ia_ifa), (int)RTM_DELETE, RTF_HOST))
 		    != 0) {
 			log(LOG_ERR, "in6_purgeaddr: failed to remove "
 			    "a route to the p2p destination: %s on %s, "
 			    "errno=%d\n",
 			    ip6_sprintf(&ia->ia_addr.sin6_addr), if_name(ifp),
 			    e);
 			/* proceed anyway... */
 		}
 		else
 			ia->ia_flags &= ~IFA_ROUTE;
 	}
 
 	/* Remove ownaddr's loopback rtentry, if it exists. */
 	in6_ifremloop(&(ia->ia_ifa));
 
 	if (ifp->if_flags & IFF_MULTICAST) {
 		/*
 		 * delete solicited multicast addr for deleting host id
 		 */
 		struct in6_multi *in6m;
 		struct in6_addr llsol;
 		bzero(&llsol, sizeof(struct in6_addr));
 		llsol.s6_addr16[0] = htons(0xff02);
 		llsol.s6_addr16[1] = htons(ifp->if_index);
 		llsol.s6_addr32[1] = 0;
 		llsol.s6_addr32[2] = htonl(1);
 		llsol.s6_addr32[3] =
 			ia->ia_addr.sin6_addr.s6_addr32[3];
 		llsol.s6_addr8[12] = 0xff;
 
 		IN6_LOOKUP_MULTI(llsol, ifp, in6m);
 		if (in6m)
 			in6_delmulti(in6m);
 	}
 
 	in6_unlink_ifa(ia, ifp);
 }
 
 static void
 in6_unlink_ifa(ia, ifp)
 	struct in6_ifaddr *ia;
 	struct ifnet *ifp;
 {
 	int plen, iilen;
 	struct in6_ifaddr *oia;
 	int	s = splnet();
 
 	TAILQ_REMOVE(&ifp->if_addrlist, &ia->ia_ifa, ifa_list);
 
 	oia = ia;
 	if (oia == (ia = in6_ifaddr))
 		in6_ifaddr = ia->ia_next;
 	else {
 		while (ia->ia_next && (ia->ia_next != oia))
 			ia = ia->ia_next;
 		if (ia->ia_next)
 			ia->ia_next = oia->ia_next;
 		else {
 			/* search failed */
 			printf("Couldn't unlink in6_ifaddr from in6_ifaddr\n");
 		}
 	}
 
 	if (oia->ia6_ifpr) {	/* check for safety */
 		plen = in6_mask2len(&oia->ia_prefixmask.sin6_addr, NULL);
 		iilen = (sizeof(oia->ia_prefixmask.sin6_addr) << 3) - plen;
 		in6_prefix_remove_ifid(iilen, oia);
 	}
 
 	/*
 	 * When an autoconfigured address is being removed, release the
 	 * reference to the base prefix.  Also, since the release might
 	 * affect the status of other (detached) addresses, call
 	 * pfxlist_onlink_check().
 	 */
 	if ((oia->ia6_flags & IN6_IFF_AUTOCONF) != 0) {
 		if (oia->ia6_ndpr == NULL) {
 			log(LOG_NOTICE, "in6_unlink_ifa: autoconf'ed address "
 			    "%p has no prefix\n", oia);
 		} else {
 			oia->ia6_ndpr->ndpr_refcnt--;
 			oia->ia6_flags &= ~IN6_IFF_AUTOCONF;
 			oia->ia6_ndpr = NULL;
 		}
 
 		pfxlist_onlink_check();
 	}
 
 	/*
 	 * release another refcnt for the link from in6_ifaddr.
 	 * Note that we should decrement the refcnt at least once for all *BSD.
 	 */
 	IFAFREE(&oia->ia_ifa);
 
 	splx(s);
 }
 
 void
 in6_purgeif(ifp)
 	struct ifnet *ifp;
 {
 	struct ifaddr *ifa, *nifa;
 
 	for (ifa = TAILQ_FIRST(&ifp->if_addrlist); ifa != NULL; ifa = nifa)
 	{
 		nifa = TAILQ_NEXT(ifa, ifa_list);
 		if (ifa->ifa_addr->sa_family != AF_INET6)
 			continue;
 		in6_purgeaddr(ifa);
 	}
 
 	in6_ifdetach(ifp);
 }
 
 /*
  * SIOC[GAD]LIFADDR.
  *	SIOCGLIFADDR: get first address. (?)
  *	SIOCGLIFADDR with IFLR_PREFIX:
  *		get first address that matches the specified prefix.
  *	SIOCALIFADDR: add the specified address.
  *	SIOCALIFADDR with IFLR_PREFIX:
  *		add the specified prefix, filling hostid part from
  *		the first link-local address.  prefixlen must be <= 64.
  *	SIOCDLIFADDR: delete the specified address.
  *	SIOCDLIFADDR with IFLR_PREFIX:
  *		delete the first address that matches the specified prefix.
  * return values:
  *	EINVAL on invalid parameters
  *	EADDRNOTAVAIL on prefix match failed/specified address not found
  *	other values may be returned from in6_ioctl()
  *
  * NOTE: SIOCALIFADDR(with IFLR_PREFIX set) allows prefixlen less than 64.
  * this is to accomodate address naming scheme other than RFC2374,
  * in the future.
  * RFC2373 defines interface id to be 64bit, but it allows non-RFC2374
  * address encoding scheme. (see figure on page 8)
  */
 static int
 in6_lifaddr_ioctl(so, cmd, data, ifp, td)
 	struct socket *so;
 	u_long cmd;
 	caddr_t	data;
 	struct ifnet *ifp;
 	struct thread *td;
 {
 	struct if_laddrreq *iflr = (struct if_laddrreq *)data;
 	struct ifaddr *ifa;
 	struct sockaddr *sa;
 
 	/* sanity checks */
 	if (!data || !ifp) {
 		panic("invalid argument to in6_lifaddr_ioctl");
 		/*NOTRECHED*/
 	}
 
 	switch (cmd) {
 	case SIOCGLIFADDR:
 		/* address must be specified on GET with IFLR_PREFIX */
 		if ((iflr->flags & IFLR_PREFIX) == 0)
 			break;
 		/* FALLTHROUGH */
 	case SIOCALIFADDR:
 	case SIOCDLIFADDR:
 		/* address must be specified on ADD and DELETE */
 		sa = (struct sockaddr *)&iflr->addr;
 		if (sa->sa_family != AF_INET6)
 			return EINVAL;
 		if (sa->sa_len != sizeof(struct sockaddr_in6))
 			return EINVAL;
 		/* XXX need improvement */
 		sa = (struct sockaddr *)&iflr->dstaddr;
 		if (sa->sa_family && sa->sa_family != AF_INET6)
 			return EINVAL;
 		if (sa->sa_len && sa->sa_len != sizeof(struct sockaddr_in6))
 			return EINVAL;
 		break;
 	default: /* shouldn't happen */
 #if 0
 		panic("invalid cmd to in6_lifaddr_ioctl");
 		/* NOTREACHED */
 #else
 		return EOPNOTSUPP;
 #endif
 	}
 	if (sizeof(struct in6_addr) * 8 < iflr->prefixlen)
 		return EINVAL;
 
 	switch (cmd) {
 	case SIOCALIFADDR:
 	    {
 		struct in6_aliasreq ifra;
 		struct in6_addr *hostid = NULL;
 		int prefixlen;
 
 		if ((iflr->flags & IFLR_PREFIX) != 0) {
 			struct sockaddr_in6 *sin6;
 
 			/*
 			 * hostid is to fill in the hostid part of the
 			 * address.  hostid points to the first link-local
 			 * address attached to the interface.
 			 */
 			ifa = (struct ifaddr *)in6ifa_ifpforlinklocal(ifp, 0);
 			if (!ifa)
 				return EADDRNOTAVAIL;
 			hostid = IFA_IN6(ifa);
 
 		 	/* prefixlen must be <= 64. */
 			if (64 < iflr->prefixlen)
 				return EINVAL;
 			prefixlen = iflr->prefixlen;
 
 			/* hostid part must be zero. */
 			sin6 = (struct sockaddr_in6 *)&iflr->addr;
 			if (sin6->sin6_addr.s6_addr32[2] != 0
 			 || sin6->sin6_addr.s6_addr32[3] != 0) {
 				return EINVAL;
 			}
 		} else
 			prefixlen = iflr->prefixlen;
 
 		/* copy args to in6_aliasreq, perform ioctl(SIOCAIFADDR_IN6). */
 		bzero(&ifra, sizeof(ifra));
 		bcopy(iflr->iflr_name, ifra.ifra_name,
 			sizeof(ifra.ifra_name));
 
 		bcopy(&iflr->addr, &ifra.ifra_addr,
 			((struct sockaddr *)&iflr->addr)->sa_len);
 		if (hostid) {
 			/* fill in hostid part */
 			ifra.ifra_addr.sin6_addr.s6_addr32[2] =
 				hostid->s6_addr32[2];
 			ifra.ifra_addr.sin6_addr.s6_addr32[3] =
 				hostid->s6_addr32[3];
 		}
 
 		if (((struct sockaddr *)&iflr->dstaddr)->sa_family) {	/*XXX*/
 			bcopy(&iflr->dstaddr, &ifra.ifra_dstaddr,
 				((struct sockaddr *)&iflr->dstaddr)->sa_len);
 			if (hostid) {
 				ifra.ifra_dstaddr.sin6_addr.s6_addr32[2] =
 					hostid->s6_addr32[2];
 				ifra.ifra_dstaddr.sin6_addr.s6_addr32[3] =
 					hostid->s6_addr32[3];
 			}
 		}
 
 		ifra.ifra_prefixmask.sin6_len = sizeof(struct sockaddr_in6);
 		in6_len2mask(&ifra.ifra_prefixmask.sin6_addr, prefixlen);
 
 		ifra.ifra_flags = iflr->flags & ~IFLR_PREFIX;
 		return in6_control(so, SIOCAIFADDR_IN6, (caddr_t)&ifra, ifp, td);
 	    }
 	case SIOCGLIFADDR:
 	case SIOCDLIFADDR:
 	    {
 		struct in6_ifaddr *ia;
 		struct in6_addr mask, candidate, match;
 		struct sockaddr_in6 *sin6;
 		int cmp;
 
 		bzero(&mask, sizeof(mask));
 		if (iflr->flags & IFLR_PREFIX) {
 			/* lookup a prefix rather than address. */
 			in6_len2mask(&mask, iflr->prefixlen);
 
 			sin6 = (struct sockaddr_in6 *)&iflr->addr;
 			bcopy(&sin6->sin6_addr, &match, sizeof(match));
 			match.s6_addr32[0] &= mask.s6_addr32[0];
 			match.s6_addr32[1] &= mask.s6_addr32[1];
 			match.s6_addr32[2] &= mask.s6_addr32[2];
 			match.s6_addr32[3] &= mask.s6_addr32[3];
 
 			/* if you set extra bits, that's wrong */
 			if (bcmp(&match, &sin6->sin6_addr, sizeof(match)))
 				return EINVAL;
 
 			cmp = 1;
 		} else {
 			if (cmd == SIOCGLIFADDR) {
 				/* on getting an address, take the 1st match */
 				cmp = 0;	/* XXX */
 			} else {
 				/* on deleting an address, do exact match */
 				in6_len2mask(&mask, 128);
 				sin6 = (struct sockaddr_in6 *)&iflr->addr;
 				bcopy(&sin6->sin6_addr, &match, sizeof(match));
 
 				cmp = 1;
 			}
 		}
 
 		TAILQ_FOREACH(ifa, &ifp->if_addrlist, ifa_list)
 		{
 			if (ifa->ifa_addr->sa_family != AF_INET6)
 				continue;
 			if (!cmp)
 				break;
 
 			bcopy(IFA_IN6(ifa), &candidate, sizeof(candidate));
 #ifndef SCOPEDROUTING
 			/*
 			 * XXX: this is adhoc, but is necessary to allow
 			 * a user to specify fe80::/64 (not /10) for a
 			 * link-local address.
 			 */
 			if (IN6_IS_ADDR_LINKLOCAL(&candidate))
 				candidate.s6_addr16[1] = 0;
 #endif
 			candidate.s6_addr32[0] &= mask.s6_addr32[0];
 			candidate.s6_addr32[1] &= mask.s6_addr32[1];
 			candidate.s6_addr32[2] &= mask.s6_addr32[2];
 			candidate.s6_addr32[3] &= mask.s6_addr32[3];
 			if (IN6_ARE_ADDR_EQUAL(&candidate, &match))
 				break;
 		}
 		if (!ifa)
 			return EADDRNOTAVAIL;
 		ia = ifa2ia6(ifa);
 
 		if (cmd == SIOCGLIFADDR) {
 #ifndef SCOPEDROUTING
 			struct sockaddr_in6 *s6;
 #endif
 
 			/* fill in the if_laddrreq structure */
 			bcopy(&ia->ia_addr, &iflr->addr, ia->ia_addr.sin6_len);
 #ifndef SCOPEDROUTING		/* XXX see above */
 			s6 = (struct sockaddr_in6 *)&iflr->addr;
 			if (IN6_IS_ADDR_LINKLOCAL(&s6->sin6_addr)) {
 				s6->sin6_addr.s6_addr16[1] = 0;
 				s6->sin6_scope_id =
 					in6_addr2scopeid(ifp, &s6->sin6_addr);
 			}
 #endif
 			if ((ifp->if_flags & IFF_POINTOPOINT) != 0) {
 				bcopy(&ia->ia_dstaddr, &iflr->dstaddr,
 					ia->ia_dstaddr.sin6_len);
 #ifndef SCOPEDROUTING		/* XXX see above */
 				s6 = (struct sockaddr_in6 *)&iflr->dstaddr;
 				if (IN6_IS_ADDR_LINKLOCAL(&s6->sin6_addr)) {
 					s6->sin6_addr.s6_addr16[1] = 0;
 					s6->sin6_scope_id =
 						in6_addr2scopeid(ifp,
 								 &s6->sin6_addr);
 				}
 #endif
 			} else
 				bzero(&iflr->dstaddr, sizeof(iflr->dstaddr));
 
 			iflr->prefixlen =
 				in6_mask2len(&ia->ia_prefixmask.sin6_addr,
 					     NULL);
 
 			iflr->flags = ia->ia6_flags;	/* XXX */
 
 			return 0;
 		} else {
 			struct in6_aliasreq ifra;
 
 			/* fill in6_aliasreq and do ioctl(SIOCDIFADDR_IN6) */
 			bzero(&ifra, sizeof(ifra));
 			bcopy(iflr->iflr_name, ifra.ifra_name,
 				sizeof(ifra.ifra_name));
 
 			bcopy(&ia->ia_addr, &ifra.ifra_addr,
 				ia->ia_addr.sin6_len);
 			if ((ifp->if_flags & IFF_POINTOPOINT) != 0) {
 				bcopy(&ia->ia_dstaddr, &ifra.ifra_dstaddr,
 					ia->ia_dstaddr.sin6_len);
 			} else {
 				bzero(&ifra.ifra_dstaddr,
 				    sizeof(ifra.ifra_dstaddr));
 			}
 			bcopy(&ia->ia_prefixmask, &ifra.ifra_dstaddr,
 				ia->ia_prefixmask.sin6_len);
 
 			ifra.ifra_flags = ia->ia6_flags;
 			return in6_control(so, SIOCDIFADDR_IN6, (caddr_t)&ifra,
 				ifp, td);
 		}
 	    }
 	}
 
 	return EOPNOTSUPP;	/* just for safety */
 }
 
 /*
  * Initialize an interface's intetnet6 address
  * and routing table entry.
  */
 static int
 in6_ifinit(ifp, ia, sin6, newhost)
 	struct ifnet *ifp;
 	struct in6_ifaddr *ia;
 	struct sockaddr_in6 *sin6;
 	int newhost;
 {
 	int	error = 0, plen, ifacount = 0;
 	int	s = splimp();
 	struct ifaddr *ifa;
 
 	/*
 	 * Give the interface a chance to initialize
 	 * if this is its first address,
 	 * and to validate the address if necessary.
 	 */
 	TAILQ_FOREACH(ifa, &ifp->if_addrlist, ifa_list)
 	{
 		if (ifa->ifa_addr == NULL)
 			continue;	/* just for safety */
 		if (ifa->ifa_addr->sa_family != AF_INET6)
 			continue;
 		ifacount++;
 	}
 
 	ia->ia_addr = *sin6;
 
 	if (ifacount <= 1 && ifp->if_ioctl &&
 	    (error = (*ifp->if_ioctl)(ifp, SIOCSIFADDR, (caddr_t)ia))) {
 		splx(s);
 		return(error);
 	}
 	splx(s);
 
 	ia->ia_ifa.ifa_metric = ifp->if_metric;
 
 	/* we could do in(6)_socktrim here, but just omit it at this moment. */
 
 	/*
 	 * Special case:
 	 * If the destination address is specified for a point-to-point
 	 * interface, install a route to the destination as an interface
 	 * direct route.
 	 */
 	plen = in6_mask2len(&ia->ia_prefixmask.sin6_addr, NULL); /* XXX */
 	if (plen == 128 && ia->ia_dstaddr.sin6_family == AF_INET6) {
 		if ((error = rtinit(&(ia->ia_ifa), (int)RTM_ADD,
 				    RTF_UP | RTF_HOST)) != 0)
 			return(error);
 		ia->ia_flags |= IFA_ROUTE;
 	}
 	if (plen < 128) {
 		/*
 		 * The RTF_CLONING flag is necessary for in6_is_ifloop_auto().
 		 */
 		ia->ia_ifa.ifa_flags |= RTF_CLONING;
 	}
 
 	/* Add ownaddr as loopback rtentry, if necessary (ex. on p2p link). */
 	if (newhost) {
 		/* set the rtrequest function to create llinfo */
 		ia->ia_ifa.ifa_rtrequest = nd6_rtrequest;
 		in6_ifaddloop(&(ia->ia_ifa));
 	}
 
 	return(error);
 }
 
 /*
  * Add an address to the list of IP6 multicast addresses for a
  * given interface.
  */
 struct	in6_multi *
 in6_addmulti(maddr6, ifp, errorp)
 	struct in6_addr *maddr6;
 	struct ifnet *ifp;
 	int *errorp;
 {
 	struct	in6_multi *in6m;
 	struct sockaddr_in6 sin6;
 	struct ifmultiaddr *ifma;
 	int	s = splnet();
 
 	*errorp = 0;
 
 	/*
 	 * Call generic routine to add membership or increment
 	 * refcount.  It wants addresses in the form of a sockaddr,
 	 * so we build one here (being careful to zero the unused bytes).
 	 */
 	bzero(&sin6, sizeof sin6);
 	sin6.sin6_family = AF_INET6;
 	sin6.sin6_len = sizeof sin6;
 	sin6.sin6_addr = *maddr6;
 	*errorp = if_addmulti(ifp, (struct sockaddr *)&sin6, &ifma);
 	if (*errorp) {
 		splx(s);
 		return 0;
 	}
 
 	/*
 	 * If ifma->ifma_protospec is null, then if_addmulti() created
 	 * a new record.  Otherwise, we are done.
 	 */
 	if (ifma->ifma_protospec != 0)
 		return ifma->ifma_protospec;
 
 	/* XXX - if_addmulti uses M_WAITOK.  Can this really be called
 	   at interrupt time?  If so, need to fix if_addmulti. XXX */
 	in6m = (struct in6_multi *)malloc(sizeof(*in6m), M_IPMADDR, M_NOWAIT);
 	if (in6m == NULL) {
 		splx(s);
 		return (NULL);
 	}
 
 	bzero(in6m, sizeof *in6m);
 	in6m->in6m_addr = *maddr6;
 	in6m->in6m_ifp = ifp;
 	in6m->in6m_ifma = ifma;
 	ifma->ifma_protospec = in6m;
 	LIST_INSERT_HEAD(&in6_multihead, in6m, in6m_entry);
 
 	/*
 	 * Let MLD6 know that we have joined a new IP6 multicast
 	 * group.
 	 */
 	mld6_start_listening(in6m);
 	splx(s);
 	return(in6m);
 }
 
 /*
  * Delete a multicast address record.
  */
 void
 in6_delmulti(in6m)
 	struct in6_multi *in6m;
 {
 	struct ifmultiaddr *ifma = in6m->in6m_ifma;
 	int	s = splnet();
 
 	if (ifma->ifma_refcount == 1) {
 		/*
 		 * No remaining claims to this record; let MLD6 know
 		 * that we are leaving the multicast group.
 		 */
 		mld6_stop_listening(in6m);
 		ifma->ifma_protospec = 0;
 		LIST_REMOVE(in6m, in6m_entry);
 		free(in6m, M_IPMADDR);
 	}
 	/* XXX - should be separate API for when we have an ifma? */
 	if_delmulti(ifma->ifma_ifp, ifma->ifma_addr);
 	splx(s);
 }
 
 /*
  * Find an IPv6 interface link-local address specific to an interface.
  */
 struct in6_ifaddr *
 in6ifa_ifpforlinklocal(ifp, ignoreflags)
 	struct ifnet *ifp;
 	int ignoreflags;
 {
 	struct ifaddr *ifa;
 
 	TAILQ_FOREACH(ifa, &ifp->if_addrlist, ifa_list)
 	{
 		if (ifa->ifa_addr == NULL)
 			continue;	/* just for safety */
 		if (ifa->ifa_addr->sa_family != AF_INET6)
 			continue;
 		if (IN6_IS_ADDR_LINKLOCAL(IFA_IN6(ifa))) {
 			if ((((struct in6_ifaddr *)ifa)->ia6_flags &
 			     ignoreflags) != 0)
 				continue;
 			break;
 		}
 	}
 
 	return((struct in6_ifaddr *)ifa);
 }
 
 
 /*
  * find the internet address corresponding to a given interface and address.
  */
 struct in6_ifaddr *
 in6ifa_ifpwithaddr(ifp, addr)
 	struct ifnet *ifp;
 	struct in6_addr *addr;
 {
 	struct ifaddr *ifa;
 
 	TAILQ_FOREACH(ifa, &ifp->if_addrlist, ifa_list)
 	{
 		if (ifa->ifa_addr == NULL)
 			continue;	/* just for safety */
 		if (ifa->ifa_addr->sa_family != AF_INET6)
 			continue;
 		if (IN6_ARE_ADDR_EQUAL(addr, IFA_IN6(ifa)))
 			break;
 	}
 
 	return((struct in6_ifaddr *)ifa);
 }
 
 /*
  * Convert IP6 address to printable (loggable) representation.
  */
 static char digits[] = "0123456789abcdef";
 static int ip6round = 0;
 char *
 ip6_sprintf(addr)
 	const struct in6_addr *addr;
 {
 	static char ip6buf[8][48];
 	int i;
 	char *cp;
 	const u_short *a = (const u_short *)addr;
 	const u_char *d;
 	int dcolon = 0;
 
 	ip6round = (ip6round + 1) & 7;
 	cp = ip6buf[ip6round];
 
 	for (i = 0; i < 8; i++) {
 		if (dcolon == 1) {
 			if (*a == 0) {
 				if (i == 7)
 					*cp++ = ':';
 				a++;
 				continue;
 			} else
 				dcolon = 2;
 		}
 		if (*a == 0) {
 			if (dcolon == 0 && *(a + 1) == 0) {
 				if (i == 0)
 					*cp++ = ':';
 				*cp++ = ':';
 				dcolon = 1;
 			} else {
 				*cp++ = '0';
 				*cp++ = ':';
 			}
 			a++;
 			continue;
 		}
 		d = (const u_char *)a;
 		*cp++ = digits[*d >> 4];
 		*cp++ = digits[*d++ & 0xf];
 		*cp++ = digits[*d >> 4];
 		*cp++ = digits[*d & 0xf];
 		*cp++ = ':';
 		a++;
 	}
 	*--cp = 0;
 	return(ip6buf[ip6round]);
 }
 
 int
 in6_localaddr(in6)
 	struct in6_addr *in6;
 {
 	struct in6_ifaddr *ia;
 
 	if (IN6_IS_ADDR_LOOPBACK(in6) || IN6_IS_ADDR_LINKLOCAL(in6))
 		return 1;
 
 	for (ia = in6_ifaddr; ia; ia = ia->ia_next)
 		if (IN6_ARE_MASKED_ADDR_EQUAL(in6, &ia->ia_addr.sin6_addr,
 					      &ia->ia_prefixmask.sin6_addr))
 			return 1;
 
 	return (0);
 }
 
 int
 in6_is_addr_deprecated(sa6)
 	struct sockaddr_in6 *sa6;
 {
 	struct in6_ifaddr *ia;
 
 	for (ia = in6_ifaddr; ia; ia = ia->ia_next) {
 		if (IN6_ARE_ADDR_EQUAL(&ia->ia_addr.sin6_addr,
 				       &sa6->sin6_addr) &&
 #ifdef SCOPEDROUTING
 		    ia->ia_addr.sin6_scope_id == sa6->sin6_scope_id &&
 #endif
 		    (ia->ia6_flags & IN6_IFF_DEPRECATED) != 0)
 			return(1); /* true */
 
 		/* XXX: do we still have to go thru the rest of the list? */
 	}
 
 	return(0);		/* false */
 }
 
 /*
  * return length of part which dst and src are equal
  * hard coding...
  */
 int
 in6_matchlen(src, dst)
 struct in6_addr *src, *dst;
 {
 	int match = 0;
 	u_char *s = (u_char *)src, *d = (u_char *)dst;
 	u_char *lim = s + 16, r;
 
 	while (s < lim)
 		if ((r = (*d++ ^ *s++)) != 0) {
 			while (r < 128) {
 				match++;
 				r <<= 1;
 			}
 			break;
 		} else
 			match += 8;
 	return match;
 }
 
 /* XXX: to be scope conscious */
 int
 in6_are_prefix_equal(p1, p2, len)
 	struct in6_addr *p1, *p2;
 	int len;
 {
 	int bytelen, bitlen;
 
 	/* sanity check */
 	if (0 > len || len > 128) {
 		log(LOG_ERR, "in6_are_prefix_equal: invalid prefix length(%d)\n",
 		    len);
 		return(0);
 	}
 
 	bytelen = len / 8;
 	bitlen = len % 8;
 
 	if (bcmp(&p1->s6_addr, &p2->s6_addr, bytelen))
 		return(0);
 	if (p1->s6_addr[bytelen] >> (8 - bitlen) !=
 	    p2->s6_addr[bytelen] >> (8 - bitlen))
 		return(0);
 
 	return(1);
 }
 
 void
 in6_prefixlen2mask(maskp, len)
 	struct in6_addr *maskp;
 	int len;
 {
 	u_char maskarray[8] = {0x80, 0xc0, 0xe0, 0xf0, 0xf8, 0xfc, 0xfe, 0xff};
 	int bytelen, bitlen, i;
 
 	/* sanity check */
 	if (0 > len || len > 128) {
 		log(LOG_ERR, "in6_prefixlen2mask: invalid prefix length(%d)\n",
 		    len);
 		return;
 	}
 
 	bzero(maskp, sizeof(*maskp));
 	bytelen = len / 8;
 	bitlen = len % 8;
 	for (i = 0; i < bytelen; i++)
 		maskp->s6_addr[i] = 0xff;
 	if (bitlen)
 		maskp->s6_addr[bytelen] = maskarray[bitlen - 1];
 }
 
 /*
  * return the best address out of the same scope
  */
 struct in6_ifaddr *
 in6_ifawithscope(oifp, dst)
 	struct ifnet *oifp;
 	struct in6_addr *dst;
 {
 	int dst_scope =	in6_addrscope(dst), src_scope, best_scope = 0;
 	int blen = -1;
 	struct ifaddr *ifa;
 	struct ifnet *ifp;
 	struct in6_ifaddr *ifa_best = NULL;
 	
 	if (oifp == NULL) {
 #if 0
 		printf("in6_ifawithscope: output interface is not specified\n");
 #endif
 		return(NULL);
 	}
 
 	/*
 	 * We search for all addresses on all interfaces from the beginning.
 	 * Comparing an interface with the outgoing interface will be done
 	 * only at the final stage of tiebreaking.
 	 */
 	IFNET_RLOCK();
 	for (ifp = TAILQ_FIRST(&ifnet); ifp; ifp = TAILQ_NEXT(ifp, if_list))
 	{
 		/*
 		 * We can never take an address that breaks the scope zone
 		 * of the destination.
 		 */
 		if (in6_addr2scopeid(ifp, dst) != in6_addr2scopeid(oifp, dst))
 			continue;
 
 		TAILQ_FOREACH(ifa, &ifp->if_addrlist, ifa_list)
 		{
 			int tlen = -1, dscopecmp, bscopecmp, matchcmp;
 
 			if (ifa->ifa_addr->sa_family != AF_INET6)
 				continue;
 
 			src_scope = in6_addrscope(IFA_IN6(ifa));
 
 			/*
 			 * Don't use an address before completing DAD
 			 * nor a duplicated address.
 			 */
 			if (((struct in6_ifaddr *)ifa)->ia6_flags &
 			    IN6_IFF_NOTREADY)
 				continue;
 
 			/* XXX: is there any case to allow anycasts? */
 			if (((struct in6_ifaddr *)ifa)->ia6_flags &
 			    IN6_IFF_ANYCAST)
 				continue;
 
 			if (((struct in6_ifaddr *)ifa)->ia6_flags &
 			    IN6_IFF_DETACHED)
 				continue;
 
 			/*
 			 * If this is the first address we find,
 			 * keep it anyway.
 			 */
 			if (ifa_best == NULL)
 				goto replace;
 
 			/*
 			 * ifa_best is never NULL beyond this line except
 			 * within the block labeled "replace".
 			 */
 
 			/*
 			 * If ifa_best has a smaller scope than dst and
 			 * the current address has a larger one than
 			 * (or equal to) dst, always replace ifa_best.
 			 * Also, if the current address has a smaller scope
 			 * than dst, ignore it unless ifa_best also has a
 			 * smaller scope.
 			 * Consequently, after the two if-clause below,
 			 * the followings must be satisfied:
 			 * (scope(src) < scope(dst) &&
 			 *  scope(best) < scope(dst))
 			 *  OR
 			 * (scope(best) >= scope(dst) &&
 			 *  scope(src) >= scope(dst))
 			 */
 			if (IN6_ARE_SCOPE_CMP(best_scope, dst_scope) < 0 &&
 			    IN6_ARE_SCOPE_CMP(src_scope, dst_scope) >= 0)
 				goto replace; /* (A) */
 			if (IN6_ARE_SCOPE_CMP(src_scope, dst_scope) < 0 &&
 			    IN6_ARE_SCOPE_CMP(best_scope, dst_scope) >= 0)
 				continue; /* (B) */
 
 			/*
 			 * A deprecated address SHOULD NOT be used in new
 			 * communications if an alternate (non-deprecated)
 			 * address is available and has sufficient scope.
 			 * RFC 2462, Section 5.5.4.
 			 */
 			if (((struct in6_ifaddr *)ifa)->ia6_flags &
 			    IN6_IFF_DEPRECATED) {
 				/*
 				 * Ignore any deprecated addresses if
 				 * specified by configuration.
 				 */
 				if (!ip6_use_deprecated)
 					continue;
 
 				/*
 				 * If we have already found a non-deprecated
 				 * candidate, just ignore deprecated addresses.
 				 */
 				if ((ifa_best->ia6_flags & IN6_IFF_DEPRECATED)
 				    == 0)
 					continue;
 			}
 
 			/*
 			 * A non-deprecated address is always preferred
 			 * to a deprecated one regardless of scopes and
 			 * address matching (Note invariants ensured by the
 			 * conditions (A) and (B) above.)
 			 */
 			if ((ifa_best->ia6_flags & IN6_IFF_DEPRECATED) &&
 			    (((struct in6_ifaddr *)ifa)->ia6_flags &
 			     IN6_IFF_DEPRECATED) == 0)
 				goto replace;
 
 			/*
 			 * When we use temporary addresses described in
 			 * RFC 3041, we prefer temporary addresses to
 			 * public autoconf addresses.  Again, note the
 			 * invariants from (A) and (B).  Also note that we
 			 * don't have any preference between static addresses
 			 * and autoconf addresses (despite of whether or not
 			 * the latter is temporary or public.)
 			 */
 			if (ip6_use_tempaddr) {
 				struct in6_ifaddr *ifat;
 
 				ifat = (struct in6_ifaddr *)ifa;
 				if ((ifa_best->ia6_flags &
 				     (IN6_IFF_AUTOCONF|IN6_IFF_TEMPORARY))
 				     == IN6_IFF_AUTOCONF &&
 				    (ifat->ia6_flags &
 				     (IN6_IFF_AUTOCONF|IN6_IFF_TEMPORARY))
 				     == (IN6_IFF_AUTOCONF|IN6_IFF_TEMPORARY)) {
 					goto replace;
 				}
 				if ((ifa_best->ia6_flags &
 				     (IN6_IFF_AUTOCONF|IN6_IFF_TEMPORARY))
 				    == (IN6_IFF_AUTOCONF|IN6_IFF_TEMPORARY) &&
 				    (ifat->ia6_flags &
 				     (IN6_IFF_AUTOCONF|IN6_IFF_TEMPORARY))
 				     == IN6_IFF_AUTOCONF) {
 					continue;
 				}
 			}
 
 			/*
 			 * At this point, we have two cases:
 			 * 1. we are looking at a non-deprecated address,
 			 *    and ifa_best is also non-deprecated.
 			 * 2. we are looking at a deprecated address,
 			 *    and ifa_best is also deprecated.
 			 * Also, we do not have to consider a case where
 			 * the scope of if_best is larger(smaller) than dst and
 			 * the scope of the current address is smaller(larger)
 			 * than dst. Such a case has already been covered.
 			 * Tiebreaking is done according to the following
 			 * items:
 			 * - the scope comparison between the address and
 			 *   dst (dscopecmp)
 			 * - the scope comparison between the address and
 			 *   ifa_best (bscopecmp)
 			 * - if the address match dst longer than ifa_best
 			 *   (matchcmp)
 			 * - if the address is on the outgoing I/F (outI/F)
 			 *
 			 * Roughly speaking, the selection policy is
 			 * - the most important item is scope. The same scope
 			 *   is best. Then search for a larger scope.
 			 *   Smaller scopes are the last resort.
 			 * - A deprecated address is chosen only when we have
 			 *   no address that has an enough scope, but is
 			 *   prefered to any addresses of smaller scopes
 			 *   (this must be already done above.)
 			 * - addresses on the outgoing I/F are preferred to
 			 *   ones on other interfaces if none of above
 			 *   tiebreaks.  In the table below, the column "bI"
 			 *   means if the best_ifa is on the outgoing
 			 *   interface, and the column "sI" means if the ifa
 			 *   is on the outgoing interface.
 			 * - If there is no other reasons to choose one,
 			 *   longest address match against dst is considered.
 			 *
 			 * The precise decision table is as follows:
 			 * dscopecmp bscopecmp    match  bI oI | replace?
 			 *       N/A     equal      N/A   Y  N |   No (1)
 			 *       N/A     equal      N/A   N  Y |  Yes (2)
 			 *       N/A     equal   larger    N/A |  Yes (3)
 			 *       N/A     equal  !larger    N/A |   No (4)
 			 *    larger    larger      N/A    N/A |   No (5)
 			 *    larger   smaller      N/A    N/A |  Yes (6)
 			 *   smaller    larger      N/A    N/A |  Yes (7)
 			 *   smaller   smaller      N/A    N/A |   No (8)
 			 *     equal   smaller      N/A    N/A |  Yes (9)
 			 *     equal    larger       (already done at A above)
 			 */
 			dscopecmp = IN6_ARE_SCOPE_CMP(src_scope, dst_scope);
 			bscopecmp = IN6_ARE_SCOPE_CMP(src_scope, best_scope);
 
 			if (bscopecmp == 0) {
 				struct ifnet *bifp = ifa_best->ia_ifp;
 
 				if (bifp == oifp && ifp != oifp) /* (1) */
 					continue;
 				if (bifp != oifp && ifp == oifp) /* (2) */
 					goto replace;
 
 				/*
 				 * Both bifp and ifp are on the outgoing
 				 * interface, or both two are on a different
 				 * interface from the outgoing I/F.
 				 * now we need address matching against dst
 				 * for tiebreaking.
 				 */
 				tlen = in6_matchlen(IFA_IN6(ifa), dst);
 				matchcmp = tlen - blen;
 				if (matchcmp > 0) /* (3) */
 					goto replace;
 				continue; /* (4) */
 			}
 			if (dscopecmp > 0) {
 				if (bscopecmp > 0) /* (5) */
 					continue;
 				goto replace; /* (6) */
 			}
 			if (dscopecmp < 0) {
 				if (bscopecmp > 0) /* (7) */
 					goto replace;
 				continue; /* (8) */
 			}
 
 			/* now dscopecmp must be 0 */
 			if (bscopecmp < 0)
 				goto replace; /* (9) */
 
 		  replace:
 			ifa_best = (struct in6_ifaddr *)ifa;
 			blen = tlen >= 0 ? tlen :
 				in6_matchlen(IFA_IN6(ifa), dst);
 			best_scope = in6_addrscope(&ifa_best->ia_addr.sin6_addr);
 		}
 	}
 	IFNET_RUNLOCK();
 
 	/* count statistics for future improvements */
 	if (ifa_best == NULL)
 		ip6stat.ip6s_sources_none++;
 	else {
 		if (oifp == ifa_best->ia_ifp)
 			ip6stat.ip6s_sources_sameif[best_scope]++;
 		else
 			ip6stat.ip6s_sources_otherif[best_scope]++;
 
 		if (best_scope == dst_scope)
 			ip6stat.ip6s_sources_samescope[best_scope]++;
 		else
 			ip6stat.ip6s_sources_otherscope[best_scope]++;
 
 		if ((ifa_best->ia6_flags & IN6_IFF_DEPRECATED) != 0)
 			ip6stat.ip6s_sources_deprecated[best_scope]++;
 	}
 
 	return(ifa_best);
 }
 
 /*
  * return the best address out of the same scope. if no address was
  * found, return the first valid address from designated IF.
  */
 struct in6_ifaddr *
 in6_ifawithifp(ifp, dst)
 	struct ifnet *ifp;
 	struct in6_addr *dst;
 {
 	int dst_scope =	in6_addrscope(dst), blen = -1, tlen;
 	struct ifaddr *ifa;
 	struct in6_ifaddr *besta = 0;
 	struct in6_ifaddr *dep[2];	/* last-resort: deprecated */
 
 	dep[0] = dep[1] = NULL;
 
 	/*
 	 * We first look for addresses in the same scope.
 	 * If there is one, return it.
 	 * If two or more, return one which matches the dst longest.
 	 * If none, return one of global addresses assigned other ifs.
 	 */
 	TAILQ_FOREACH(ifa, &ifp->if_addrlist, ifa_list)
 	{
 		if (ifa->ifa_addr->sa_family != AF_INET6)
 			continue;
 		if (((struct in6_ifaddr *)ifa)->ia6_flags & IN6_IFF_ANYCAST)
 			continue; /* XXX: is there any case to allow anycast? */
 		if (((struct in6_ifaddr *)ifa)->ia6_flags & IN6_IFF_NOTREADY)
 			continue; /* don't use this interface */
 		if (((struct in6_ifaddr *)ifa)->ia6_flags & IN6_IFF_DETACHED)
 			continue;
 		if (((struct in6_ifaddr *)ifa)->ia6_flags & IN6_IFF_DEPRECATED) {
 			if (ip6_use_deprecated)
 				dep[0] = (struct in6_ifaddr *)ifa;
 			continue;
 		}
 
 		if (dst_scope == in6_addrscope(IFA_IN6(ifa))) {
 			/*
 			 * call in6_matchlen() as few as possible
 			 */
 			if (besta) {
 				if (blen == -1)
 					blen = in6_matchlen(&besta->ia_addr.sin6_addr, dst);
 				tlen = in6_matchlen(IFA_IN6(ifa), dst);
 				if (tlen > blen) {
 					blen = tlen;
 					besta = (struct in6_ifaddr *)ifa;
 				}
 			} else
 				besta = (struct in6_ifaddr *)ifa;
 		}
 	}
 	if (besta)
 		return(besta);
 
 	TAILQ_FOREACH(ifa, &ifp->if_addrlist, ifa_list)
 	{
 		if (ifa->ifa_addr->sa_family != AF_INET6)
 			continue;
 		if (((struct in6_ifaddr *)ifa)->ia6_flags & IN6_IFF_ANYCAST)
 			continue; /* XXX: is there any case to allow anycast? */
 		if (((struct in6_ifaddr *)ifa)->ia6_flags & IN6_IFF_NOTREADY)
 			continue; /* don't use this interface */
 		if (((struct in6_ifaddr *)ifa)->ia6_flags & IN6_IFF_DETACHED)
 			continue;
 		if (((struct in6_ifaddr *)ifa)->ia6_flags & IN6_IFF_DEPRECATED) {
 			if (ip6_use_deprecated)
 				dep[1] = (struct in6_ifaddr *)ifa;
 			continue;
 		}
 
 		return (struct in6_ifaddr *)ifa;
 	}
 
 	/* use the last-resort values, that are, deprecated addresses */
 	if (dep[0])
 		return dep[0];
 	if (dep[1])
 		return dep[1];
 
 	return NULL;
 }
 
 /*
  * perform DAD when interface becomes IFF_UP.
  */
 void
 in6_if_up(ifp)
 	struct ifnet *ifp;
 {
 	struct ifaddr *ifa;
 	struct in6_ifaddr *ia;
 	int dad_delay;		/* delay ticks before DAD output */
 
 	/*
 	 * special cases, like 6to4, are handled in in6_ifattach
 	 */
 	in6_ifattach(ifp, NULL);
 
 	dad_delay = 0;
 	TAILQ_FOREACH(ifa, &ifp->if_addrlist, ifa_list)
 	{
 		if (ifa->ifa_addr->sa_family != AF_INET6)
 			continue;
 		ia = (struct in6_ifaddr *)ifa;
 		if (ia->ia6_flags & IN6_IFF_TENTATIVE)
 			nd6_dad_start(ifa, &dad_delay);
 	}
 }
 
 int
 in6if_do_dad(ifp)
 	struct ifnet *ifp;
 {
 	if ((ifp->if_flags & IFF_LOOPBACK) != 0)
 		return(0);
 
 	switch (ifp->if_type) {
 #ifdef IFT_DUMMY
 	case IFT_DUMMY:
 #endif
 	case IFT_FAITH:
 		/*
 		 * These interfaces do not have the IFF_LOOPBACK flag,
 		 * but loop packets back.  We do not have to do DAD on such
 		 * interfaces.  We should even omit it, because loop-backed
 		 * NS would confuse the DAD procedure.
 		 */
 		return(0);
 	default:
 		/*
 		 * Our DAD routine requires the interface up and running.
 		 * However, some interfaces can be up before the RUNNING
 		 * status.  Additionaly, users may try to assign addresses
 		 * before the interface becomes up (or running).
 		 * We simply skip DAD in such a case as a work around.
 		 * XXX: we should rather mark "tentative" on such addresses,
 		 * and do DAD after the interface becomes ready.
 		 */
 		if ((ifp->if_flags & (IFF_UP|IFF_RUNNING)) !=
 		    (IFF_UP|IFF_RUNNING))
 			return(0);
 
 		return(1);
 	}
 }
 
 /*
  * Calculate max IPv6 MTU through all the interfaces and store it
  * to in6_maxmtu.
  */
 void
 in6_setmaxmtu()
 {
 	unsigned long maxmtu = 0;
 	struct ifnet *ifp;
 
 	IFNET_RLOCK();
 	for (ifp = TAILQ_FIRST(&ifnet); ifp; ifp = TAILQ_NEXT(ifp, if_list))
 	{
 		if ((ifp->if_flags & IFF_LOOPBACK) == 0 &&
 		    nd_ifinfo[ifp->if_index].linkmtu > maxmtu)
 			maxmtu =  nd_ifinfo[ifp->if_index].linkmtu;
 	}
 	IFNET_RUNLOCK();
 	if (maxmtu)	/* update only when maxmtu is positive */
 		in6_maxmtu = maxmtu;
 }
 
 /*
  * Convert sockaddr_in6 to sockaddr_in.  Original sockaddr_in6 must be
  * v4 mapped addr or v4 compat addr
  */
 void
 in6_sin6_2_sin(struct sockaddr_in *sin, struct sockaddr_in6 *sin6)
 {
 	bzero(sin, sizeof(*sin));
 	sin->sin_len = sizeof(struct sockaddr_in);
 	sin->sin_family = AF_INET;
 	sin->sin_port = sin6->sin6_port;
 	sin->sin_addr.s_addr = sin6->sin6_addr.s6_addr32[3];	
 }
 
 /* Convert sockaddr_in to sockaddr_in6 in v4 mapped addr format. */
 void
 in6_sin_2_v4mapsin6(struct sockaddr_in *sin, struct sockaddr_in6 *sin6)
 {
 	bzero(sin6, sizeof(*sin6));
 	sin6->sin6_len = sizeof(struct sockaddr_in6);
 	sin6->sin6_family = AF_INET6;
 	sin6->sin6_port = sin->sin_port;
 	sin6->sin6_addr.s6_addr32[0] = 0;
 	sin6->sin6_addr.s6_addr32[1] = 0;
 	sin6->sin6_addr.s6_addr32[2] = IPV6_ADDR_INT32_SMP;
 	sin6->sin6_addr.s6_addr32[3] = sin->sin_addr.s_addr;
 }
 
 /* Convert sockaddr_in6 into sockaddr_in. */
 void
 in6_sin6_2_sin_in_sock(struct sockaddr *nam)
 {
 	struct sockaddr_in *sin_p;
 	struct sockaddr_in6 sin6;
 
 	/*
 	 * Save original sockaddr_in6 addr and convert it
 	 * to sockaddr_in.
 	 */
 	sin6 = *(struct sockaddr_in6 *)nam;
 	sin_p = (struct sockaddr_in *)nam;
 	in6_sin6_2_sin(sin_p, &sin6);
 }
 
 /* Convert sockaddr_in into sockaddr_in6 in v4 mapped addr format. */
 void
 in6_sin_2_v4mapsin6_in_sock(struct sockaddr **nam)
 {
 	struct sockaddr_in *sin_p;
 	struct sockaddr_in6 *sin6_p;
 
 	MALLOC(sin6_p, struct sockaddr_in6 *, sizeof *sin6_p, M_SONAME,
 	       M_WAITOK);
 	sin_p = (struct sockaddr_in *)*nam;
 	in6_sin_2_v4mapsin6(sin_p, sin6_p);
 	FREE(*nam, M_SONAME);
 	*nam = (struct sockaddr *)sin6_p;
 }
Index: head/sys/netinet6/in6_ifattach.c
===================================================================
--- head/sys/netinet6/in6_ifattach.c	(revision 120726)
+++ head/sys/netinet6/in6_ifattach.c	(revision 120727)
@@ -1,1053 +1,1057 @@
 /*	$FreeBSD$	*/
 /*	$KAME: in6_ifattach.c,v 1.118 2001/05/24 07:44:00 itojun Exp $	*/
 
 /*
  * Copyright (C) 1995, 1996, 1997, and 1998 WIDE Project.
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 3. Neither the name of the project nor the names of its contributors
  *    may be used to endorse or promote products derived from this software
  *    without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE PROJECT AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE PROJECT OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  */
 
 #include <sys/param.h>
 #include <sys/systm.h>
 #include <sys/malloc.h>
 #include <sys/socket.h>
 #include <sys/sockio.h>
 #include <sys/kernel.h>
 #include <sys/syslog.h>
 #include <sys/md5.h>
 
 #include <net/if.h>
 #include <net/if_dl.h>
 #include <net/if_types.h>
 #include <net/route.h>
 
 #include <netinet/in.h>
 #include <netinet/in_var.h>
 #include <netinet/if_ether.h>
 #include <netinet/in_pcb.h>
 
 #include <netinet/ip6.h>
 #include <netinet6/ip6_var.h>
 #include <netinet6/in6_var.h>
 #include <netinet6/in6_pcb.h>
 #include <netinet6/in6_ifattach.h>
 #include <netinet6/ip6_var.h>
 #include <netinet6/nd6.h>
 #include <netinet6/scope6_var.h>
 
 #include <net/net_osdep.h>
 
 struct in6_ifstat **in6_ifstat = NULL;
 struct icmp6_ifstat **icmp6_ifstat = NULL;
 size_t in6_ifstatmax = 0;
 size_t icmp6_ifstatmax = 0;
 unsigned long in6_maxmtu = 0;
 
 #ifdef IP6_AUTO_LINKLOCAL
 int ip6_auto_linklocal = IP6_AUTO_LINKLOCAL;
 #else
 int ip6_auto_linklocal = 1;	/* enable by default */
 #endif
 
 struct callout in6_tmpaddrtimer_ch;
 
 extern struct inpcbinfo udbinfo;
 extern struct inpcbinfo ripcbinfo;
 
 static int get_rand_ifid __P((struct ifnet *, struct in6_addr *));
 static int generate_tmp_ifid __P((u_int8_t *, const u_int8_t *, u_int8_t *));
 static int get_hw_ifid __P((struct ifnet *, struct in6_addr *));
 static int get_ifid __P((struct ifnet *, struct ifnet *, struct in6_addr *));
 static int in6_ifattach_linklocal __P((struct ifnet *, struct ifnet *));
 static int in6_ifattach_loopback __P((struct ifnet *));
 
 #define EUI64_GBIT	0x01
 #define EUI64_UBIT	0x02
 #define EUI64_TO_IFID(in6)	do {(in6)->s6_addr[8] ^= EUI64_UBIT; } while (0)
 #define EUI64_GROUP(in6)	((in6)->s6_addr[8] & EUI64_GBIT)
 #define EUI64_INDIVIDUAL(in6)	(!EUI64_GROUP(in6))
 #define EUI64_LOCAL(in6)	((in6)->s6_addr[8] & EUI64_UBIT)
 #define EUI64_UNIVERSAL(in6)	(!EUI64_LOCAL(in6))
 
 #define IFID_LOCAL(in6)		(!EUI64_LOCAL(in6))
 #define IFID_UNIVERSAL(in6)	(!EUI64_UNIVERSAL(in6))
 
 /*
  * Generate a last-resort interface identifier, when the machine has no
  * IEEE802/EUI64 address sources.
  * The goal here is to get an interface identifier that is
  * (1) random enough and (2) does not change across reboot.
  * We currently use MD5(hostname) for it.
  */
 static int
 get_rand_ifid(ifp, in6)
 	struct ifnet *ifp;
 	struct in6_addr *in6;	/* upper 64bits are preserved */
 {
 	MD5_CTX ctxt;
 	u_int8_t digest[16];
 	int hostnamelen	= strlen(hostname);
 
 #if 0
 	/* we need at least several letters as seed for ifid */
 	if (hostnamelen < 3)
 		return -1;
 #endif
 
 	/* generate 8 bytes of pseudo-random value. */
 	bzero(&ctxt, sizeof(ctxt));
 	MD5Init(&ctxt);
 	MD5Update(&ctxt, hostname, hostnamelen);
 	MD5Final(digest, &ctxt);
 
 	/* assumes sizeof(digest) > sizeof(ifid) */
 	bcopy(digest, &in6->s6_addr[8], 8);
 
 	/* make sure to set "u" bit to local, and "g" bit to individual. */
 	in6->s6_addr[8] &= ~EUI64_GBIT;	/* g bit to "individual" */
 	in6->s6_addr[8] |= EUI64_UBIT;	/* u bit to "local" */
 
 	/* convert EUI64 into IPv6 interface identifier */
 	EUI64_TO_IFID(in6);
 
 	return 0;
 }
 
 static int
 generate_tmp_ifid(seed0, seed1, ret)
 	u_int8_t *seed0, *ret;
 	const u_int8_t *seed1;
 {
 	MD5_CTX ctxt;
 	u_int8_t seed[16], digest[16], nullbuf[8];
 	u_int32_t val32;
 	struct timeval tv;
 
 	/* If there's no hisotry, start with a random seed. */
 	bzero(nullbuf, sizeof(nullbuf));
 	if (bcmp(nullbuf, seed0, sizeof(nullbuf)) == 0) {
 		int i;
 
 		for (i = 0; i < 2; i++) {
 			microtime(&tv);
 			val32 = random() ^ tv.tv_usec;
 			bcopy(&val32, seed + sizeof(val32) * i, sizeof(val32));
 		}
 	} else {
 		bcopy(seed0, seed, 8);
 	}
 
 	/* copy the right-most 64-bits of the given address */
 	/* XXX assumption on the size of IFID */
 	bcopy(seed1, &seed[8], 8);
 
 	if (0) {		/* for debugging purposes only */
 		int i;
 
 		printf("generate_tmp_ifid: new randomized ID from: ");
 		for (i = 0; i < 16; i++)
 			printf("%02x", seed[i]);
 		printf(" ");
 	}
 
 	/* generate 16 bytes of pseudo-random value. */
 	bzero(&ctxt, sizeof(ctxt));
 	MD5Init(&ctxt);
 	MD5Update(&ctxt, seed, sizeof(seed));
 	MD5Final(digest, &ctxt);
 
 	/*
 	 * RFC 3041 3.2.1. (3)
 	 * Take the left-most 64-bits of the MD5 digest and set bit 6 (the
 	 * left-most bit is numbered 0) to zero.
 	 */
 	bcopy(digest, ret, 8);
 	ret[0] &= ~EUI64_UBIT;
 
 	/*
 	 * XXX: we'd like to ensure that the generated value is not zero
 	 * for simplicity.  If the caclculated digest happens to be zero,
 	 * use a random non-zero value as the last resort.
 	 */
 	if (bcmp(nullbuf, ret, sizeof(nullbuf)) == 0) {
 		log(LOG_INFO,
 		    "generate_tmp_ifid: computed MD5 value is zero.\n");
 
 		microtime(&tv);
 		val32 = random() ^ tv.tv_usec;
 		val32 = 1 + (val32 % (0xffffffff - 1));
 	}
 
 	/*
 	 * RFC 3041 3.2.1. (4)
 	 * Take the rightmost 64-bits of the MD5 digest and save them in
 	 * stable storage as the history value to be used in the next
 	 * iteration of the algorithm. 
 	 */
 	bcopy(&digest[8], seed0, 8);
 
 	if (0) {		/* for debugging purposes only */
 		int i;
 
 		printf("to: ");
 		for (i = 0; i < 16; i++)
 			printf("%02x", digest[i]);
 		printf("\n");
 	}
 
 	return 0;
 }
 
 /*
  * Get interface identifier for the specified interface.
  * XXX assumes single sockaddr_dl (AF_LINK address) per an interface
  */
 static int
 get_hw_ifid(ifp, in6)
 	struct ifnet *ifp;
 	struct in6_addr *in6;	/* upper 64bits are preserved */
 {
 	struct ifaddr *ifa;
 	struct sockaddr_dl *sdl;
 	u_int8_t *addr;
 	size_t addrlen;
 	static u_int8_t allzero[8] = { 0, 0, 0, 0, 0, 0, 0, 0 };
 	static u_int8_t allone[8] =
 		{ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff };
 
 	for (ifa = ifp->if_addrlist.tqh_first;
 	     ifa;
 	     ifa = ifa->ifa_list.tqe_next)
 	{
 		if (ifa->ifa_addr->sa_family != AF_LINK)
 			continue;
 		sdl = (struct sockaddr_dl *)ifa->ifa_addr;
 		if (sdl == NULL)
 			continue;
 		if (sdl->sdl_alen == 0)
 			continue;
 
 		goto found;
 	}
 
 	return -1;
 
 found:
 	addr = LLADDR(sdl);
 	addrlen = sdl->sdl_alen;
 
 	/* get EUI64 */
 	switch (ifp->if_type) {
 	case IFT_ETHER:
 	case IFT_FDDI:
 	case IFT_ISO88025:
 	case IFT_ATM:
 	case IFT_IEEE1394:
 #ifdef IFT_IEEE80211
 	case IFT_IEEE80211:
 #endif
 		/* IEEE802/EUI64 cases - what others? */
 		/* IEEE1394 uses 16byte length address starting with EUI64 */
 		if (addrlen > 8)
 			addrlen = 8;
 
 		/* look at IEEE802/EUI64 only */
 		if (addrlen != 8 && addrlen != 6)
 			return -1;
 
 		/*
 		 * check for invalid MAC address - on bsdi, we see it a lot
 		 * since wildboar configures all-zero MAC on pccard before
 		 * card insertion.
 		 */
 		if (bcmp(addr, allzero, addrlen) == 0)
 			return -1;
 		if (bcmp(addr, allone, addrlen) == 0)
 			return -1;
 
 		/* make EUI64 address */
 		if (addrlen == 8)
 			bcopy(addr, &in6->s6_addr[8], 8);
 		else if (addrlen == 6) {
 			in6->s6_addr[8] = addr[0];
 			in6->s6_addr[9] = addr[1];
 			in6->s6_addr[10] = addr[2];
 			in6->s6_addr[11] = 0xff;
 			in6->s6_addr[12] = 0xfe;
 			in6->s6_addr[13] = addr[3];
 			in6->s6_addr[14] = addr[4];
 			in6->s6_addr[15] = addr[5];
 		}
 		break;
 
 	case IFT_ARCNET:
 		if (addrlen != 1)
 			return -1;
 		if (!addr[0])
 			return -1;
 
 		bzero(&in6->s6_addr[8], 8);
 		in6->s6_addr[15] = addr[0];
 
 		/*
 		 * due to insufficient bitwidth, we mark it local.
 		 */
 		in6->s6_addr[8] &= ~EUI64_GBIT;	/* g bit to "individual" */
 		in6->s6_addr[8] |= EUI64_UBIT;	/* u bit to "local" */
 		break;
 
 	case IFT_GIF:
 #ifdef IFT_STF
 	case IFT_STF:
 #endif
 		/*
 		 * RFC2893 says: "SHOULD use IPv4 address as ifid source".
 		 * however, IPv4 address is not very suitable as unique
 		 * identifier source (can be renumbered).
 		 * we don't do this.
 		 */
 		return -1;
 
 	default:
 		return -1;
 	}
 
 	/* sanity check: g bit must not indicate "group" */
 	if (EUI64_GROUP(in6))
 		return -1;
 
 	/* convert EUI64 into IPv6 interface identifier */
 	EUI64_TO_IFID(in6);
 
 	/*
 	 * sanity check: ifid must not be all zero, avoid conflict with
 	 * subnet router anycast
 	 */
 	if ((in6->s6_addr[8] & ~(EUI64_GBIT | EUI64_UBIT)) == 0x00 &&
 	    bcmp(&in6->s6_addr[9], allzero, 7) == 0) {
 		return -1;
 	}
 
 	return 0;
 }
 
 /*
  * Get interface identifier for the specified interface.  If it is not
  * available on ifp0, borrow interface identifier from other information
  * sources.
  */
 static int
 get_ifid(ifp0, altifp, in6)
 	struct ifnet *ifp0;
 	struct ifnet *altifp;	/* secondary EUI64 source */
 	struct in6_addr *in6;
 {
 	struct ifnet *ifp;
 
 	/* first, try to get it from the interface itself */
 	if (get_hw_ifid(ifp0, in6) == 0) {
 		nd6log((LOG_DEBUG, "%s: got interface identifier from itself\n",
 		    if_name(ifp0)));
 		goto success;
 	}
 
 	/* try secondary EUI64 source. this basically is for ATM PVC */
 	if (altifp && get_hw_ifid(altifp, in6) == 0) {
 		nd6log((LOG_DEBUG, "%s: got interface identifier from %s\n",
 		    if_name(ifp0), if_name(altifp)));
 		goto success;
 	}
 
 	/* next, try to get it from some other hardware interface */
 	IFNET_RLOCK();
 	for (ifp = ifnet.tqh_first; ifp; ifp = ifp->if_list.tqe_next)
 	{
 		if (ifp == ifp0)
 			continue;
 		if (get_hw_ifid(ifp, in6) != 0)
 			continue;
 
 		/*
 		 * to borrow ifid from other interface, ifid needs to be
 		 * globally unique
 		 */
 		if (IFID_UNIVERSAL(in6)) {
 			nd6log((LOG_DEBUG,
 			    "%s: borrow interface identifier from %s\n",
 			    if_name(ifp0), if_name(ifp)));
 			IFNET_RUNLOCK();
 			goto success;
 		}
 	}
 	IFNET_RUNLOCK();
 
 	/* last resort: get from random number source */
 	if (get_rand_ifid(ifp, in6) == 0) {
 		nd6log((LOG_DEBUG,
 		    "%s: interface identifier generated by random number\n",
 		    if_name(ifp0)));
 		goto success;
 	}
 
 	printf("%s: failed to get interface identifier\n", if_name(ifp0));
 	return -1;
 
 success:
 	nd6log((LOG_INFO, "%s: ifid: "
 		"%02x:%02x:%02x:%02x:%02x:%02x:%02x:%02x\n",
 		if_name(ifp0),
 		in6->s6_addr[8], in6->s6_addr[9],
 		in6->s6_addr[10], in6->s6_addr[11],
 		in6->s6_addr[12], in6->s6_addr[13],
 		in6->s6_addr[14], in6->s6_addr[15]));
 	return 0;
 }
 
 static int
 in6_ifattach_linklocal(ifp, altifp)
 	struct ifnet *ifp;
 	struct ifnet *altifp;	/* secondary EUI64 source */
 {
 	struct in6_ifaddr *ia;
 	struct in6_aliasreq ifra;
 	struct nd_prefix pr0;
 	int i, error;
 
 	/*
 	 * configure link-local address.
 	 */
 	bzero(&ifra, sizeof(ifra));
 
 	/*
 	 * in6_update_ifa() does not use ifra_name, but we accurately set it
 	 * for safety.
 	 */
 	strncpy(ifra.ifra_name, if_name(ifp), sizeof(ifra.ifra_name));
 
 	ifra.ifra_addr.sin6_family = AF_INET6;
 	ifra.ifra_addr.sin6_len = sizeof(struct sockaddr_in6);
 	ifra.ifra_addr.sin6_addr.s6_addr16[0] = htons(0xfe80);
 #ifdef SCOPEDROUTING
 	ifra.ifra_addr.sin6_addr.s6_addr16[1] = 0
 #else
 	ifra.ifra_addr.sin6_addr.s6_addr16[1] = htons(ifp->if_index); /* XXX */
 #endif
 	ifra.ifra_addr.sin6_addr.s6_addr32[1] = 0;
 	if ((ifp->if_flags & IFF_LOOPBACK) != 0) {
 		ifra.ifra_addr.sin6_addr.s6_addr32[2] = 0;
 		ifra.ifra_addr.sin6_addr.s6_addr32[3] = htonl(1);
 	} else {
 		if (get_ifid(ifp, altifp, &ifra.ifra_addr.sin6_addr) != 0) {
 			nd6log((LOG_ERR,
 			    "%s: no ifid available\n", if_name(ifp)));
 			return -1;
 		}
 	}
 #ifdef SCOPEDROUTING
 	ifra.ifra_addr.sin6_scope_id =
 		in6_addr2scopeid(ifp,  &ifra.ifra_addr.sin6_addr);
 #endif
 
 	ifra.ifra_prefixmask.sin6_len = sizeof(struct sockaddr_in6);
 	ifra.ifra_prefixmask.sin6_family = AF_INET6;
 	ifra.ifra_prefixmask.sin6_addr = in6mask64;
 #ifdef SCOPEDROUTING
 	/* take into accound the sin6_scope_id field for routing */
 	ifra.ifra_prefixmask.sin6_scope_id = 0xffffffff;
 #endif
 	/* link-local addresses should NEVER expire. */
 	ifra.ifra_lifetime.ia6t_vltime = ND6_INFINITE_LIFETIME;
 	ifra.ifra_lifetime.ia6t_pltime = ND6_INFINITE_LIFETIME;
 
 	/*
 	 * Do not let in6_update_ifa() do DAD, since we need a random delay
 	 * before sending an NS at the first time the interface becomes up.
 	 * Instead, in6_if_up() will start DAD with a proper random delay.
 	 */
 	ifra.ifra_flags |= IN6_IFF_NODAD;
 
 	/*
 	 * Now call in6_update_ifa() to do a bunch of procedures to configure
 	 * a link-local address. We can set NULL to the 3rd argument, because
 	 * we know there's no other link-local address on the interface
 	 * and therefore we are adding one (instead of updating one).
 	 */
 	if ((error = in6_update_ifa(ifp, &ifra, NULL)) != 0) {
 		/*
 		 * XXX: When the interface does not support IPv6, this call
 		 * would fail in the SIOCSIFADDR ioctl.  I believe the
 		 * notification is rather confusing in this case, so just
 		 * supress it.  (jinmei@kame.net 20010130)
 		 */
 		if (error != EAFNOSUPPORT)
 			log(LOG_NOTICE, "in6_ifattach_linklocal: failed to "
 			    "configure a link-local address on %s "
 			    "(errno=%d)\n",
 			    if_name(ifp), error);
 		return(-1);
 	}
 
 	/*
 	 * Adjust ia6_flags so that in6_if_up will perform DAD.
 	 * XXX: Some P2P interfaces seem not to send packets just after
 	 * becoming up, so we skip p2p interfaces for safety.
 	 */
 	ia = in6ifa_ifpforlinklocal(ifp, 0); /* ia must not be NULL */
 #ifdef DIAGNOSTIC
 	if (!ia) {
 		panic("ia == NULL in in6_ifattach_linklocal");
 		/* NOTREACHED */
 	}
 #endif
 	if (in6if_do_dad(ifp) && (ifp->if_flags & IFF_POINTOPOINT) == 0) {
 		ia->ia6_flags &= ~IN6_IFF_NODAD;
 		ia->ia6_flags |= IN6_IFF_TENTATIVE;
 	}
 
 	/*
 	 * Make the link-local prefix (fe80::/64%link) as on-link.
 	 * Since we'd like to manage prefixes separately from addresses,
 	 * we make an ND6 prefix structure for the link-local prefix,
 	 * and add it to the prefix list as a never-expire prefix.
 	 * XXX: this change might affect some existing code base...
 	 */
 	bzero(&pr0, sizeof(pr0));
 	pr0.ndpr_ifp = ifp;
 	/* this should be 64 at this moment. */
 	pr0.ndpr_plen = in6_mask2len(&ifra.ifra_prefixmask.sin6_addr, NULL);
 	pr0.ndpr_mask = ifra.ifra_prefixmask.sin6_addr;
 	pr0.ndpr_prefix = ifra.ifra_addr;
 	/* apply the mask for safety. (nd6_prelist_add will apply it again) */
 	for (i = 0; i < 4; i++) {
 		pr0.ndpr_prefix.sin6_addr.s6_addr32[i] &=
 			in6mask64.s6_addr32[i];
 	}
 	/*
 	 * Initialize parameters.  The link-local prefix must always be
 	 * on-link, and its lifetimes never expire.
 	 */
 	pr0.ndpr_raf_onlink = 1;
 	pr0.ndpr_raf_auto = 1;	/* probably meaningless */
 	pr0.ndpr_vltime = ND6_INFINITE_LIFETIME;
 	pr0.ndpr_pltime = ND6_INFINITE_LIFETIME;
 	/*
 	 * Since there is no other link-local addresses, nd6_prefix_lookup()
 	 * probably returns NULL.  However, we cannot always expect the result.
 	 * For example, if we first remove the (only) existing link-local
 	 * address, and then reconfigure another one, the prefix is still
 	 * valid with referring to the old link-local address.
 	 */
 	if (nd6_prefix_lookup(&pr0) == NULL) {
 		if ((error = nd6_prelist_add(&pr0, NULL, NULL)) != 0)
 			return(error);
 	}
 
 	return 0;
 }
 
 static int
 in6_ifattach_loopback(ifp)
 	struct ifnet *ifp;	/* must be IFT_LOOP */
 {
 	struct in6_aliasreq ifra;
 	int error;
 
 	bzero(&ifra, sizeof(ifra));
 
 	/*
 	 * in6_update_ifa() does not use ifra_name, but we accurately set it
 	 * for safety.
 	 */
 	strncpy(ifra.ifra_name, if_name(ifp), sizeof(ifra.ifra_name));
 
 	ifra.ifra_prefixmask.sin6_len = sizeof(struct sockaddr_in6);
 	ifra.ifra_prefixmask.sin6_family = AF_INET6;
 	ifra.ifra_prefixmask.sin6_addr = in6mask128;
 
 	/*
 	 * Always initialize ia_dstaddr (= broadcast address) to loopback
 	 * address.  Follows IPv4 practice - see in_ifinit().
 	 */
 	ifra.ifra_dstaddr.sin6_len = sizeof(struct sockaddr_in6);
 	ifra.ifra_dstaddr.sin6_family = AF_INET6;
 	ifra.ifra_dstaddr.sin6_addr = in6addr_loopback;
 
 	ifra.ifra_addr.sin6_len = sizeof(struct sockaddr_in6);
 	ifra.ifra_addr.sin6_family = AF_INET6;
 	ifra.ifra_addr.sin6_addr = in6addr_loopback;
 
 	/* the loopback  address should NEVER expire. */
 	ifra.ifra_lifetime.ia6t_vltime = ND6_INFINITE_LIFETIME;
 	ifra.ifra_lifetime.ia6t_pltime = ND6_INFINITE_LIFETIME;
 
 	/* we don't need to perform DAD on loopback interfaces. */
 	ifra.ifra_flags |= IN6_IFF_NODAD;
 
 	/* skip registration to the prefix list. XXX should be temporary. */
 	ifra.ifra_flags |= IN6_IFF_NOPFX;
 
 	/*
 	 * We are sure that this is a newly assigned address, so we can set
 	 * NULL to the 3rd arg.
 	 */
 	if ((error = in6_update_ifa(ifp, &ifra, NULL)) != 0) {
 		log(LOG_ERR, "in6_ifattach_loopback: failed to configure "
 		    "the loopback address on %s (errno=%d)\n",
 		    if_name(ifp), error);
 		return(-1);
 	}
 
 	return 0;
 }
 
 /*
  * compute NI group address, based on the current hostname setting.
  * see draft-ietf-ipngwg-icmp-name-lookup-* (04 and later).
  *
  * when ifp == NULL, the caller is responsible for filling scopeid.
  */
 int
 in6_nigroup(ifp, name, namelen, in6)
 	struct ifnet *ifp;
 	const char *name;
 	int namelen;
 	struct in6_addr *in6;
 {
 	const char *p;
 	u_char *q;
 	MD5_CTX ctxt;
 	u_int8_t digest[16];
 	char l;
 	char n[64];	/* a single label must not exceed 63 chars */
 
 	if (!namelen || !name)
 		return -1;
 
 	p = name;
 	while (p && *p && *p != '.' && p - name < namelen)
 		p++;
 	if (p - name > sizeof(n) - 1)
 		return -1;	/* label too long */
 	l = p - name;
 	strncpy(n, name, l);
 	n[(int)l] = '\0';
 	for (q = n; *q; q++) {
 		if ('A' <= *q && *q <= 'Z')
 			*q = *q - 'A' + 'a';
 	}
 
 	/* generate 8 bytes of pseudo-random value. */
 	bzero(&ctxt, sizeof(ctxt));
 	MD5Init(&ctxt);
 	MD5Update(&ctxt, &l, sizeof(l));
 	MD5Update(&ctxt, n, l);
 	MD5Final(digest, &ctxt);
 
 	bzero(in6, sizeof(*in6));
 	in6->s6_addr16[0] = htons(0xff02);
 	if (ifp)
 		in6->s6_addr16[1] = htons(ifp->if_index);
 	in6->s6_addr8[11] = 2;
 	bcopy(digest, &in6->s6_addr32[3], sizeof(in6->s6_addr32[3]));
 
 	return 0;
 }
 
 void
 in6_nigroup_attach(name, namelen)
 	const char *name;
 	int namelen;
 {
 	struct ifnet *ifp;
 	struct sockaddr_in6 mltaddr;
 	struct in6_multi *in6m;
 	int error;
 
 	bzero(&mltaddr, sizeof(mltaddr));
 	mltaddr.sin6_family = AF_INET6;
 	mltaddr.sin6_len = sizeof(struct sockaddr_in6);
 	if (in6_nigroup(NULL, name, namelen, &mltaddr.sin6_addr) != 0)
 		return;
 
 	IFNET_RLOCK();
 	for (ifp = ifnet.tqh_first; ifp; ifp = ifp->if_list.tqe_next)
 	{
 		mltaddr.sin6_addr.s6_addr16[1] = htons(ifp->if_index);
 		IN6_LOOKUP_MULTI(mltaddr.sin6_addr, ifp, in6m);
 		if (!in6m) {
 			if (!in6_addmulti(&mltaddr.sin6_addr, ifp, &error)) {
 				nd6log((LOG_ERR, "%s: failed to join %s "
 				    "(errno=%d)\n", if_name(ifp),
 				    ip6_sprintf(&mltaddr.sin6_addr), 
 				    error));
 			}
 		}
 	}
 	IFNET_RUNLOCK();
 }
 
 void
 in6_nigroup_detach(name, namelen)
 	const char *name;
 	int namelen;
 {
 	struct ifnet *ifp;
 	struct sockaddr_in6 mltaddr;
 	struct in6_multi *in6m;
 
 	bzero(&mltaddr, sizeof(mltaddr));
 	mltaddr.sin6_family = AF_INET6;
 	mltaddr.sin6_len = sizeof(struct sockaddr_in6);
 	if (in6_nigroup(NULL, name, namelen, &mltaddr.sin6_addr) != 0)
 		return;
 
 	IFNET_RLOCK();
 	for (ifp = ifnet.tqh_first; ifp; ifp = ifp->if_list.tqe_next)
 	{
 		mltaddr.sin6_addr.s6_addr16[1] = htons(ifp->if_index);
 		IN6_LOOKUP_MULTI(mltaddr.sin6_addr, ifp, in6m);
 		if (in6m)
 			in6_delmulti(in6m);
 	}
 	IFNET_RUNLOCK();	
 }
 
 /*
  * XXX multiple loopback interface needs more care.  for instance,
  * nodelocal address needs to be configured onto only one of them.
  * XXX multiple link-local address case
  */
 void
 in6_ifattach(ifp, altifp)
 	struct ifnet *ifp;
 	struct ifnet *altifp;	/* secondary EUI64 source */
 {
 	static size_t if_indexlim = 8;
 	struct in6_ifaddr *ia;
 	struct in6_addr in6;
 
 	/* some of the interfaces are inherently not IPv6 capable */
 	switch (ifp->if_type) {
 #ifdef IFT_BRIDGE	/*OpenBSD 2.8*/
 	case IFT_BRIDGE:
 		return;
 #endif
 	}
 
 	/*
 	 * We have some arrays that should be indexed by if_index.
 	 * since if_index will grow dynamically, they should grow too.
 	 *	struct in6_ifstat **in6_ifstat
 	 *	struct icmp6_ifstat **icmp6_ifstat
 	 */
 	if (in6_ifstat == NULL || icmp6_ifstat == NULL ||
 	    if_index >= if_indexlim) {
 		size_t n;
 		caddr_t q;
 		size_t olim;
 
 		olim = if_indexlim;
 		while (if_index >= if_indexlim)
 			if_indexlim <<= 1;
 
 		/* grow in6_ifstat */
 		n = if_indexlim * sizeof(struct in6_ifstat *);
 		q = (caddr_t)malloc(n, M_IFADDR, M_WAITOK);
 		bzero(q, n);
 		if (in6_ifstat) {
 			bcopy((caddr_t)in6_ifstat, q,
 				olim * sizeof(struct in6_ifstat *));
 			free((caddr_t)in6_ifstat, M_IFADDR);
 		}
 		in6_ifstat = (struct in6_ifstat **)q;
 		in6_ifstatmax = if_indexlim;
 
 		/* grow icmp6_ifstat */
 		n = if_indexlim * sizeof(struct icmp6_ifstat *);
 		q = (caddr_t)malloc(n, M_IFADDR, M_WAITOK);
 		bzero(q, n);
 		if (icmp6_ifstat) {
 			bcopy((caddr_t)icmp6_ifstat, q,
 				olim * sizeof(struct icmp6_ifstat *));
 			free((caddr_t)icmp6_ifstat, M_IFADDR);
 		}
 		icmp6_ifstat = (struct icmp6_ifstat **)q;
 		icmp6_ifstatmax = if_indexlim;
 	}
 
 	/* initialize scope identifiers */
 	scope6_ifattach(ifp);
 
 	/*
 	 * quirks based on interface type
 	 */
 	switch (ifp->if_type) {
 #ifdef IFT_STF
 	case IFT_STF:
 		/*
 		 * 6to4 interface is a very special kind of beast.
 		 * no multicast, no linklocal.  RFC2529 specifies how to make
 		 * linklocals for 6to4 interface, but there's no use and
 		 * it is rather harmful to have one.
 		 */
 		goto statinit;
 #endif
 	default:
 		break;
 	}
 
 	/*
 	 * usually, we require multicast capability to the interface
 	 */
 	if ((ifp->if_flags & IFF_MULTICAST) == 0) {
 		log(LOG_INFO, "in6_ifattach: "
 		    "%s is not multicast capable, IPv6 not enabled\n",
 		    if_name(ifp));
 		return;
 	}
 
 	/*
 	 * assign loopback address for loopback interface.
 	 * XXX multiple loopback interface case.
 	 */
 	if ((ifp->if_flags & IFF_LOOPBACK) != 0) {
 		in6 = in6addr_loopback;
 		if (in6ifa_ifpwithaddr(ifp, &in6) == NULL) {
 			if (in6_ifattach_loopback(ifp) != 0)
 				return;
 		}
 	}
 
 	/*
 	 * assign a link-local address, if there's none. 
 	 */
 	if (ip6_auto_linklocal) {
 		ia = in6ifa_ifpforlinklocal(ifp, 0);
 		if (ia == NULL) {
 			if (in6_ifattach_linklocal(ifp, altifp) == 0) {
 				/* linklocal address assigned */
 			} else {
 				/* failed to assign linklocal address. bark? */
 			}
 		}
 	}
 
 #ifdef IFT_STF			/* XXX */
 statinit:	
 #endif
 
 	/* update dynamically. */
 	if (in6_maxmtu < ifp->if_mtu)
 		in6_maxmtu = ifp->if_mtu;
 
 	if (in6_ifstat[ifp->if_index] == NULL) {
 		in6_ifstat[ifp->if_index] = (struct in6_ifstat *)
 			malloc(sizeof(struct in6_ifstat), M_IFADDR, M_WAITOK);
 		bzero(in6_ifstat[ifp->if_index], sizeof(struct in6_ifstat));
 	}
 	if (icmp6_ifstat[ifp->if_index] == NULL) {
 		icmp6_ifstat[ifp->if_index] = (struct icmp6_ifstat *)
 			malloc(sizeof(struct icmp6_ifstat), M_IFADDR, M_WAITOK);
 		bzero(icmp6_ifstat[ifp->if_index], sizeof(struct icmp6_ifstat));
 	}
 
 	/* initialize NDP variables */
 	nd6_ifattach(ifp);
 }
 
 /*
  * NOTE: in6_ifdetach() does not support loopback if at this moment.
  * We don't need this function in bsdi, because interfaces are never removed
  * from the ifnet list in bsdi.
  */
 void
 in6_ifdetach(ifp)
 	struct ifnet *ifp;
 {
 	struct in6_ifaddr *ia, *oia;
 	struct ifaddr *ifa, *next;
 	struct rtentry *rt;
 	short rtflags;
 	struct sockaddr_in6 sin6;
 	struct in6_multi *in6m;
 	struct in6_multi *in6m_next;
 
 	/* nuke prefix list.  this may try to remove some of ifaddrs as well */
 	in6_purgeprefix(ifp);
 
 	/* remove neighbor management table */
 	nd6_purge(ifp);
 
 	/* nuke any of IPv6 addresses we have */
 	for (ifa = ifp->if_addrlist.tqh_first; ifa; ifa = next)
 	{
 		next = ifa->ifa_list.tqe_next;
 		if (ifa->ifa_addr->sa_family != AF_INET6)
 			continue;
 		in6_purgeaddr(ifa);
 	}
 
 	/* undo everything done by in6_ifattach(), just in case */
 	for (ifa = ifp->if_addrlist.tqh_first; ifa; ifa = next)
 	{
 		next = ifa->ifa_list.tqe_next;
 
 
 		if (ifa->ifa_addr->sa_family != AF_INET6
 		 || !IN6_IS_ADDR_LINKLOCAL(&satosin6(&ifa->ifa_addr)->sin6_addr)) {
 			continue;
 		}
 
 		ia = (struct in6_ifaddr *)ifa;
 
 		/* remove from the routing table */
 		if ((ia->ia_flags & IFA_ROUTE)
 		 && (rt = rtalloc1((struct sockaddr *)&ia->ia_addr, 0, 0UL))) {
 			rtflags = rt->rt_flags;
 			rtfree(rt);
 			rtrequest(RTM_DELETE,
 				(struct sockaddr *)&ia->ia_addr,
 				(struct sockaddr *)&ia->ia_addr,
 				(struct sockaddr *)&ia->ia_prefixmask,
 				rtflags, (struct rtentry **)0);
 		}
 
 		/* remove from the linked list */
 		TAILQ_REMOVE(&ifp->if_addrlist, (struct ifaddr *)ia, ifa_list);
 		IFAFREE(&ia->ia_ifa);
 
 		/* also remove from the IPv6 address chain(itojun&jinmei) */
 		oia = ia;
 		if (oia == (ia = in6_ifaddr))
 			in6_ifaddr = ia->ia_next;
 		else {
 			while (ia->ia_next && (ia->ia_next != oia))
 				ia = ia->ia_next;
 			if (ia->ia_next)
 				ia->ia_next = oia->ia_next;
 			else {
 				nd6log((LOG_ERR, 
 				    "%s: didn't unlink in6ifaddr from "
 				    "list\n", if_name(ifp)));
 			}
 		}
 
 		IFAFREE(&oia->ia_ifa);
 	}
 
 	/* leave from all multicast groups joined */
 	if (udbinfo.listhead != NULL)
 		in6_pcbpurgeif0(LIST_FIRST(udbinfo.listhead), ifp);
 	if (ripcbinfo.listhead != NULL)
 		in6_pcbpurgeif0(LIST_FIRST(ripcbinfo.listhead), ifp);
 	for (in6m = LIST_FIRST(&in6_multihead); in6m; in6m = in6m_next) {
 		in6m_next = LIST_NEXT(in6m, in6m_entry);
 		if (in6m->in6m_ifp != ifp)
 			continue;
 		in6_delmulti(in6m);
 		in6m = NULL;
 	}
 
 	/*
 	 * remove neighbor management table.  we call it twice just to make
 	 * sure we nuke everything.  maybe we need just one call.
 	 * XXX: since the first call did not release addresses, some prefixes
 	 * might remain.  We should call nd6_purge() again to release the
 	 * prefixes after removing all addresses above.
 	 * (Or can we just delay calling nd6_purge until at this point?)
 	 */
 	nd6_purge(ifp);
 
 	/* remove route to link-local allnodes multicast (ff02::1) */
 	bzero(&sin6, sizeof(sin6));
 	sin6.sin6_len = sizeof(struct sockaddr_in6);
 	sin6.sin6_family = AF_INET6;
 	sin6.sin6_addr = in6addr_linklocal_allnodes;
 	sin6.sin6_addr.s6_addr16[1] = htons(ifp->if_index);
 	rt = rtalloc1((struct sockaddr *)&sin6, 0, 0UL);
-	if (rt && rt->rt_ifp == ifp) {
-		rtrequest(RTM_DELETE, (struct sockaddr *)rt_key(rt),
-			rt->rt_gateway, rt_mask(rt), rt->rt_flags, 0);
-		rtfree(rt);
+	if (rt) {
+		if (rt->rt_ifp == ifp) {
+			RT_UNLOCK(rt);
+			rtrequest(RTM_DELETE, (struct sockaddr *)rt_key(rt),
+				rt->rt_gateway, rt_mask(rt), rt->rt_flags, 0);
+			RTFREE(rt);
+		} else
+			rtfree(rt);
 	}
 }
 
 void
 in6_get_tmpifid(ifp, retbuf, baseid, generate)
 	struct ifnet *ifp;
 	u_int8_t *retbuf;
 	const u_int8_t *baseid;
 	int generate;
 {
 	u_int8_t nullbuf[8];
 	struct nd_ifinfo *ndi = &nd_ifinfo[ifp->if_index];
 
 	bzero(nullbuf, sizeof(nullbuf));
 	if (bcmp(ndi->randomid, nullbuf, sizeof(nullbuf)) == 0) {
 		/* we've never created a random ID.  Create a new one. */
 		generate = 1;
 	}
 
 	if (generate) {
 		bcopy(baseid, ndi->randomseed1, sizeof(ndi->randomseed1));
 
 		/* generate_tmp_ifid will update seedn and buf */
 		(void)generate_tmp_ifid(ndi->randomseed0, ndi->randomseed1,
 					ndi->randomid);
 	}
 	bcopy(ndi->randomid, retbuf, 8);
 }
 
 void
 in6_tmpaddrtimer(ignored_arg)
 	void *ignored_arg;
 {
 	int i;
 	struct nd_ifinfo *ndi;
 	u_int8_t nullbuf[8];
 	int s = splnet();
 
 	callout_reset(&in6_tmpaddrtimer_ch,
 		      (ip6_temp_preferred_lifetime - ip6_desync_factor -
 		       ip6_temp_regen_advance) * hz,
 		      in6_tmpaddrtimer, NULL);
 
 	bzero(nullbuf, sizeof(nullbuf));
 	for (i = 1; i < if_index + 1; i++) {
 		ndi = &nd_ifinfo[i];
 		if (bcmp(ndi->randomid, nullbuf, sizeof(nullbuf)) != 0) {
 			/*
 			 * We've been generating a random ID on this interface.
 			 * Create a new one.
 			 */
 			(void)generate_tmp_ifid(ndi->randomseed0,
 						ndi->randomseed1,
 						ndi->randomid);
 		}
 	}
 
 	splx(s);
 }
Index: head/sys/netinet6/in6_pcb.c
===================================================================
--- head/sys/netinet6/in6_pcb.c	(revision 120726)
+++ head/sys/netinet6/in6_pcb.c	(revision 120727)
@@ -1,1163 +1,1167 @@
 /*	$FreeBSD$	*/
 /*	$KAME: in6_pcb.c,v 1.31 2001/05/21 05:45:10 jinmei Exp $	*/
   
 /*
  * Copyright (C) 1995, 1996, 1997, and 1998 WIDE Project.
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 3. Neither the name of the project nor the names of its contributors
  *    may be used to endorse or promote products derived from this software
  *    without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE PROJECT AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE PROJECT OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  */
 
 /*
  * Copyright (c) 1982, 1986, 1991, 1993
  *	The Regents of the University of California.  All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 3. All advertising materials mentioning features or use of this software
  *    must display the following acknowledgement:
  *	This product includes software developed by the University of
  *	California, Berkeley and its contributors.
  * 4. Neither the name of the University nor the names of its contributors
  *    may be used to endorse or promote products derived from this software
  *    without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  *	@(#)in_pcb.c	8.2 (Berkeley) 1/4/94
  */
 
 #include "opt_inet.h"
 #include "opt_inet6.h"
 #include "opt_ipsec.h"
 #include "opt_random_ip_id.h"
 
 #include <sys/param.h>
 #include <sys/systm.h>
 #include <sys/malloc.h>
 #include <sys/mbuf.h>
 #include <sys/domain.h>
 #include <sys/protosw.h>
 #include <sys/socket.h>
 #include <sys/socketvar.h>
 #include <sys/sockio.h>
 #include <sys/errno.h>
 #include <sys/time.h>
 #include <sys/proc.h>
 #include <sys/jail.h>
 
 #include <vm/uma.h>
 
 #include <net/if.h>
 #include <net/if_types.h>
 #include <net/route.h>
 
 #include <netinet/in.h>
 #include <netinet/in_var.h>
 #include <netinet/in_systm.h>
 #include <netinet/tcp_var.h>
 #include <netinet/ip6.h>
 #include <netinet/ip_var.h>
 #include <netinet6/ip6_var.h>
 #include <netinet6/nd6.h>
 #include <netinet/in_pcb.h>
 #include <netinet6/in6_pcb.h>
 
 #ifdef IPSEC
 #include <netinet6/ipsec.h>
 #ifdef INET6
 #include <netinet6/ipsec6.h>
 #endif
 #include <netinet6/ah.h>
 #ifdef INET6
 #include <netinet6/ah6.h>
 #endif
 #include <netkey/key.h>
 #endif /* IPSEC */
 
 #ifdef FAST_IPSEC
 #include <netipsec/ipsec.h>
 #include <netipsec/ipsec6.h>
 #include <netipsec/key.h>
 #define	IPSEC
 #endif /* FAST_IPSEC */
 
 struct	in6_addr zeroin6_addr;
 
 int
 in6_pcbbind(inp, nam, td)
 	register struct inpcb *inp;
 	struct sockaddr *nam;
 	struct thread *td;
 {
 	struct socket *so = inp->inp_socket;
 	struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *)NULL;
 	struct inpcbinfo *pcbinfo = inp->inp_pcbinfo;
 	u_short	lport = 0;
 	int wild = 0, reuseport = (so->so_options & SO_REUSEPORT);
 
 	if (!in6_ifaddr) /* XXX broken! */
 		return (EADDRNOTAVAIL);
 	if (inp->inp_lport || !IN6_IS_ADDR_UNSPECIFIED(&inp->in6p_laddr))
 		return(EINVAL);
 	if ((so->so_options & (SO_REUSEADDR|SO_REUSEPORT)) == 0)
 		wild = 1;
 	if (nam) {
 		sin6 = (struct sockaddr_in6 *)nam;
 		if (nam->sa_len != sizeof(*sin6))
 			return(EINVAL);
 		/*
 		 * family check.
 		 */
 		if (nam->sa_family != AF_INET6)
 			return(EAFNOSUPPORT);
 
 		/* KAME hack: embed scopeid */
 		if (in6_embedscope(&sin6->sin6_addr, sin6, inp, NULL) != 0)
 			return EINVAL;
 		/* this must be cleared for ifa_ifwithaddr() */
 		sin6->sin6_scope_id = 0;
 
 		lport = sin6->sin6_port;
 		if (IN6_IS_ADDR_MULTICAST(&sin6->sin6_addr)) {
 			/*
 			 * Treat SO_REUSEADDR as SO_REUSEPORT for multicast;
 			 * allow compepte duplication of binding if
 			 * SO_REUSEPORT is set, or if SO_REUSEADDR is set
 			 * and a multicast address is bound on both
 			 * new and duplicated sockets.
 			 */
 			if (so->so_options & SO_REUSEADDR)
 				reuseport = SO_REUSEADDR|SO_REUSEPORT;
 		} else if (!IN6_IS_ADDR_UNSPECIFIED(&sin6->sin6_addr)) {
 			struct ifaddr *ia = NULL;
 
 			sin6->sin6_port = 0;		/* yech... */
 			if ((ia = ifa_ifwithaddr((struct sockaddr *)sin6)) == 0)
 				return(EADDRNOTAVAIL);
 
 			/*
 			 * XXX: bind to an anycast address might accidentally
 			 * cause sending a packet with anycast source address.
 			 * We should allow to bind to a deprecated address, since
 			 * the application dare to use it.
 			 */
 			if (ia &&
 			    ((struct in6_ifaddr *)ia)->ia6_flags &
 			    (IN6_IFF_ANYCAST|IN6_IFF_NOTREADY|IN6_IFF_DETACHED)) {
 				return(EADDRNOTAVAIL);
 			}
 		}
 		if (lport) {
 			struct inpcb *t;
 
 			/* GROSS */
 			if (ntohs(lport) < IPV6PORT_RESERVED && td &&
 			    suser_cred(td->td_ucred, PRISON_ROOT))
 				return(EACCES);
 			if (so->so_cred->cr_uid != 0 &&
 			    !IN6_IS_ADDR_MULTICAST(&sin6->sin6_addr)) {
 				t = in6_pcblookup_local(pcbinfo,
 				    &sin6->sin6_addr, lport,
 				    INPLOOKUP_WILDCARD);
 				if (t && (t->inp_vflag & INP_TIMEWAIT)) {
 					if ((!IN6_IS_ADDR_UNSPECIFIED(&sin6->sin6_addr) ||
 					    !IN6_IS_ADDR_UNSPECIFIED(&t->in6p_laddr) ||
 					    !(intotw(t)->tw_so_options & SO_REUSEPORT))
 					    && so->so_cred->cr_uid != 
 					    intotw(t)->tw_cred->cr_uid)
 						return (EADDRINUSE);
 				} else if (t &&
 				    (!IN6_IS_ADDR_UNSPECIFIED(&sin6->sin6_addr) ||
 			    	     !IN6_IS_ADDR_UNSPECIFIED(&t->in6p_laddr) ||
 				     (t->inp_socket->so_options & SO_REUSEPORT) 
 				      == 0) && (so->so_cred->cr_uid !=
 				     t->inp_socket->so_cred->cr_uid))
 					return (EADDRINUSE);
 				if ((inp->inp_flags & IN6P_IPV6_V6ONLY) == 0 &&
 				    IN6_IS_ADDR_UNSPECIFIED(&sin6->sin6_addr)) {
 					struct sockaddr_in sin;
 
 					in6_sin6_2_sin(&sin, sin6);
 					t = in_pcblookup_local(pcbinfo,
 						sin.sin_addr, lport,
 						INPLOOKUP_WILDCARD);
 					if (t && (t->inp_vflag & INP_TIMEWAIT)) {
 						if (so->so_cred->cr_uid !=
 						    intotw(t)->tw_cred->cr_uid &&
 						    (ntohl(t->inp_laddr.s_addr) !=
 						     INADDR_ANY || 
 						     ((inp->inp_vflag & 
 						       INP_IPV6PROTO) == 
 						      (t->inp_vflag & 
 						       INP_IPV6PROTO))))
 					    return (EADDRINUSE);
 					} else if (t && 
 					    (so->so_cred->cr_uid !=
 					     t->inp_socket->so_cred->cr_uid) &&
 					    (ntohl(t->inp_laddr.s_addr) !=
 					     INADDR_ANY ||
 					     INP_SOCKAF(so) ==
 					     INP_SOCKAF(t->inp_socket)))
 						return (EADDRINUSE);
 				}
 			}
 			t = in6_pcblookup_local(pcbinfo, &sin6->sin6_addr,
 						lport, wild);
 			if (t && (reuseport & ((t->inp_vflag & INP_TIMEWAIT) ?
 			    intotw(t)->tw_so_options : 
 			    t->inp_socket->so_options)) == 0)
 				return(EADDRINUSE);
 			if ((inp->inp_flags & IN6P_IPV6_V6ONLY) == 0 &&
 			    IN6_IS_ADDR_UNSPECIFIED(&sin6->sin6_addr)) {
 				struct sockaddr_in sin;
 
 				in6_sin6_2_sin(&sin, sin6);
 				t = in_pcblookup_local(pcbinfo, sin.sin_addr,
 						       lport, wild);
 				if (t && t->inp_vflag & INP_TIMEWAIT) {
 					if ((reuseport & 
 					    intotw(t)->tw_so_options) == 0 &&
 					    (ntohl(t->inp_laddr.s_addr) !=
 					     INADDR_ANY || ((inp->inp_vflag & 
 					     INP_IPV6PROTO) == 
 					     (t->inp_vflag & INP_IPV6PROTO))))
 						return (EADDRINUSE);
 				}
 				else if (t && 
 				    (reuseport & t->inp_socket->so_options) 
 				    == 0 && (ntohl(t->inp_laddr.s_addr) != 
 				    INADDR_ANY || INP_SOCKAF(so) ==
 				     INP_SOCKAF(t->inp_socket)))
 					return (EADDRINUSE);
 			}
 		}
 		inp->in6p_laddr = sin6->sin6_addr;
 	}
 	if (lport == 0) {
 		int e;
 		if ((e = in6_pcbsetport(&inp->in6p_laddr, inp, td)) != 0)
 			return(e);
 	}
 	else {
 		inp->inp_lport = lport;
 		if (in_pcbinshash(inp) != 0) {
 			inp->in6p_laddr = in6addr_any;
 			inp->inp_lport = 0;
 			return (EAGAIN);
 		}
 	}
 	return(0);
 }
 
 /*
  *   Transform old in6_pcbconnect() into an inner subroutine for new
  *   in6_pcbconnect(): Do some validity-checking on the remote
  *   address (in mbuf 'nam') and then determine local host address
  *   (i.e., which interface) to use to access that remote host.
  *
  *   This preserves definition of in6_pcbconnect(), while supporting a
  *   slightly different version for T/TCP.  (This is more than
  *   a bit of a kludge, but cleaning up the internal interfaces would
  *   have forced minor changes in every protocol).
  */
 
 int
 in6_pcbladdr(inp, nam, plocal_addr6)
 	register struct inpcb *inp;
 	struct sockaddr *nam;
 	struct in6_addr **plocal_addr6;
 {
 	register struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *)nam;
 	struct ifnet *ifp = NULL;
 	int error = 0;
 
 	if (nam->sa_len != sizeof (*sin6))
 		return (EINVAL);
 	if (sin6->sin6_family != AF_INET6)
 		return (EAFNOSUPPORT);
 	if (sin6->sin6_port == 0)
 		return (EADDRNOTAVAIL);
 
 	/* KAME hack: embed scopeid */
 	if (in6_embedscope(&sin6->sin6_addr, sin6, inp, &ifp) != 0)
 		return EINVAL;
 
 	if (in6_ifaddr) {
 		/*
 		 * If the destination address is UNSPECIFIED addr,
 		 * use the loopback addr, e.g ::1.
 		 */
 		if (IN6_IS_ADDR_UNSPECIFIED(&sin6->sin6_addr))
 			sin6->sin6_addr = in6addr_loopback;
 	}
 	{
 		/*
 		 * XXX: in6_selectsrc might replace the bound local address
 		 * with the address specified by setsockopt(IPV6_PKTINFO).
 		 * Is it the intended behavior?
 		 */
 		*plocal_addr6 = in6_selectsrc(sin6, inp->in6p_outputopts,
 					      inp->in6p_moptions,
 					      &inp->in6p_route,
 					      &inp->in6p_laddr, &error);
 		if (*plocal_addr6 == 0) {
 			if (error == 0)
 				error = EADDRNOTAVAIL;
 			return(error);
 		}
 		/*
 		 * Don't do pcblookup call here; return interface in
 		 * plocal_addr6
 		 * and exit to caller, that will do the lookup.
 		 */
 	}
 
 	if (inp->in6p_route.ro_rt)
 		ifp = inp->in6p_route.ro_rt->rt_ifp;
 
 	return(0);
 }
 
 /*
  * Outer subroutine:
  * Connect from a socket to a specified address.
  * Both address and port must be specified in argument sin.
  * If don't have a local address for this socket yet,
  * then pick one.
  */
 int
 in6_pcbconnect(inp, nam, td)
 	register struct inpcb *inp;
 	struct sockaddr *nam;
 	struct thread *td;
 {
 	struct in6_addr *addr6;
 	register struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *)nam;
 	int error;
 
 	/*
 	 * Call inner routine, to assign local interface address.
 	 * in6_pcbladdr() may automatically fill in sin6_scope_id.
 	 */
 	if ((error = in6_pcbladdr(inp, nam, &addr6)) != 0)
 		return(error);
 
 	if (in6_pcblookup_hash(inp->inp_pcbinfo, &sin6->sin6_addr,
 			       sin6->sin6_port,
 			      IN6_IS_ADDR_UNSPECIFIED(&inp->in6p_laddr)
 			      ? addr6 : &inp->in6p_laddr,
 			      inp->inp_lport, 0, NULL) != NULL) {
 		return (EADDRINUSE);
 	}
 	if (IN6_IS_ADDR_UNSPECIFIED(&inp->in6p_laddr)) {
 		if (inp->inp_lport == 0) {
 			error = in6_pcbbind(inp, (struct sockaddr *)0, td);
 			if (error)
 				return (error);
 		}
 		inp->in6p_laddr = *addr6;
 	}
 	inp->in6p_faddr = sin6->sin6_addr;
 	inp->inp_fport = sin6->sin6_port;
 	/* update flowinfo - draft-itojun-ipv6-flowlabel-api-00 */
 	inp->in6p_flowinfo &= ~IPV6_FLOWLABEL_MASK;
 	if (inp->in6p_flags & IN6P_AUTOFLOWLABEL)
 		inp->in6p_flowinfo |=
 #ifdef RANDOM_IP_ID
 		    (htonl(ip6_randomflowlabel()) & IPV6_FLOWLABEL_MASK);
 #else
 		    (htonl(ip6_flow_seq++) & IPV6_FLOWLABEL_MASK);
 #endif
 
 	in_pcbrehash(inp);
 	return (0);
 }
 
 #if 0
 /*
  * Return an IPv6 address, which is the most appropriate for given
  * destination and user specified options.
  * If necessary, this function lookups the routing table and return
  * an entry to the caller for later use.
  */
 struct in6_addr *
 in6_selectsrc(dstsock, opts, mopts, ro, laddr, errorp)
 	struct sockaddr_in6 *dstsock;
 	struct ip6_pktopts *opts;
 	struct ip6_moptions *mopts;
 	struct route_in6 *ro;
 	struct in6_addr *laddr;
 	int *errorp;
 {
 	struct in6_addr *dst;
 	struct in6_ifaddr *ia6 = 0;
 	struct in6_pktinfo *pi = NULL;
 
 	dst = &dstsock->sin6_addr;
 	*errorp = 0;
 
 	/*
 	 * If the source address is explicitly specified by the caller,
 	 * use it.
 	 */
 	if (opts && (pi = opts->ip6po_pktinfo) &&
 	    !IN6_IS_ADDR_UNSPECIFIED(&pi->ipi6_addr))
 		return(&pi->ipi6_addr);
 
 	/*
 	 * If the source address is not specified but the socket(if any)
 	 * is already bound, use the bound address.
 	 */
 	if (laddr && !IN6_IS_ADDR_UNSPECIFIED(laddr))
 		return(laddr);
 
 	/*
 	 * If the caller doesn't specify the source address but
 	 * the outgoing interface, use an address associated with
 	 * the interface.
 	 */
 	if (pi && pi->ipi6_ifindex) {
 		/* XXX boundary check is assumed to be already done. */
 		ia6 = in6_ifawithscope(ifnet_byindex(pi->ipi6_ifindex), dst);
 		if (ia6 == 0) {
 			*errorp = EADDRNOTAVAIL;
 			return(0);
 		}
 		return(&satosin6(&ia6->ia_addr)->sin6_addr);
 	}
 
 	/*
 	 * If the destination address is a link-local unicast address or
 	 * a multicast address, and if the outgoing interface is specified
 	 * by the sin6_scope_id filed, use an address associated with the
 	 * interface.
 	 * XXX: We're now trying to define more specific semantics of
 	 *      sin6_scope_id field, so this part will be rewritten in
 	 *      the near future.
 	 */
 	if ((IN6_IS_ADDR_LINKLOCAL(dst) || IN6_IS_ADDR_MULTICAST(dst)) &&
 	    dstsock->sin6_scope_id) {
 		/*
 		 * I'm not sure if boundary check for scope_id is done
 		 * somewhere...
 		 */
 		if (dstsock->sin6_scope_id < 0 ||
 		    if_index < dstsock->sin6_scope_id) {
 			*errorp = ENXIO; /* XXX: better error? */
 			return(0);
 		}
 		ia6 = in6_ifawithscope(ifnet_byindex(dstsock->sin6_scope_id),
 				       dst);
 		if (ia6 == 0) {
 			*errorp = EADDRNOTAVAIL;
 			return(0);
 		}
 		return(&satosin6(&ia6->ia_addr)->sin6_addr);
 	}
 
 	/*
 	 * If the destination address is a multicast address and
 	 * the outgoing interface for the address is specified
 	 * by the caller, use an address associated with the interface.
 	 * There is a sanity check here; if the destination has node-local
 	 * scope, the outgoing interfacde should be a loopback address.
 	 * Even if the outgoing interface is not specified, we also
 	 * choose a loopback interface as the outgoing interface.
 	 */
 	if (IN6_IS_ADDR_MULTICAST(dst)) {
 		struct ifnet *ifp = mopts ? mopts->im6o_multicast_ifp : NULL;
 
 		if (ifp == NULL && IN6_IS_ADDR_MC_NODELOCAL(dst)) {
 			ifp = &loif[0];
 		}
 
 		if (ifp) {
 			ia6 = in6_ifawithscope(ifp, dst);
 			if (ia6 == 0) {
 				*errorp = EADDRNOTAVAIL;
 				return(0);
 			}
 			return(&ia6->ia_addr.sin6_addr);
 		}
 	}
 
 	/*
 	 * If the next hop address for the packet is specified
 	 * by caller, use an address associated with the route
 	 * to the next hop.
 	 */
 	{
 		struct sockaddr_in6 *sin6_next;
 		struct rtentry *rt;
 
 		if (opts && opts->ip6po_nexthop) {
 			sin6_next = satosin6(opts->ip6po_nexthop);
 			rt = nd6_lookup(&sin6_next->sin6_addr, 1, NULL);
 			if (rt) {
 				ia6 = in6_ifawithscope(rt->rt_ifp, dst);
 				if (ia6 == 0)
 					ia6 = ifatoia6(rt->rt_ifa);
 			}
 			if (ia6 == 0) {
 				*errorp = EADDRNOTAVAIL;
 				return(0);
 			}
 			return(&satosin6(&ia6->ia_addr)->sin6_addr);
 		}
 	}
 
 	/*
 	 * If route is known or can be allocated now,
 	 * our src addr is taken from the i/f, else punt.
 	 */
 	if (ro) {
 		if (ro->ro_rt &&
 		    !IN6_ARE_ADDR_EQUAL(&satosin6(&ro->ro_dst)->sin6_addr, dst)) {
 			RTFREE(ro->ro_rt);
 			ro->ro_rt = (struct rtentry *)0;
 		}
 		if (ro->ro_rt == (struct rtentry *)0 ||
 		    ro->ro_rt->rt_ifp == (struct ifnet *)0) {
 			struct sockaddr_in6 *dst6;
 
 			/* No route yet, so try to acquire one */
 			bzero(&ro->ro_dst, sizeof(struct sockaddr_in6));
 			dst6 = (struct sockaddr_in6 *)&ro->ro_dst;
 			dst6->sin6_family = AF_INET6;
 			dst6->sin6_len = sizeof(struct sockaddr_in6);
 			dst6->sin6_addr = *dst;
 			if (IN6_IS_ADDR_MULTICAST(dst)) {
 				ro->ro_rt = rtalloc1(&((struct route *)ro)
 						     ->ro_dst, 0, 0UL);
+				RT_UNLOCK(ro->ro_rt);
 			} else {
 				rtalloc((struct route *)ro);
 			}
 		}
 
 		/*
 		 * in_pcbconnect() checks out IFF_LOOPBACK to skip using
 		 * the address. But we don't know why it does so.
 		 * It is necessary to ensure the scope even for lo0
 		 * so doesn't check out IFF_LOOPBACK.
 		 */
 
 		if (ro->ro_rt) {
 			ia6 = in6_ifawithscope(ro->ro_rt->rt_ifa->ifa_ifp, dst);
 			if (ia6 == 0) /* xxx scope error ?*/
 				ia6 = ifatoia6(ro->ro_rt->rt_ifa);
 		}
 		if (ia6 == 0) {
 			*errorp = EHOSTUNREACH;	/* no route */
 			return(0);
 		}
 		return(&satosin6(&ia6->ia_addr)->sin6_addr);
 	}
 
 	*errorp = EADDRNOTAVAIL;
 	return(0);
 }
 
 /*
  * Default hop limit selection. The precedence is as follows:
  * 1. Hoplimit valued specified via ioctl.
  * 2. (If the outgoing interface is detected) the current
  *     hop limit of the interface specified by router advertisement.
  * 3. The system default hoplimit.
 */
 int
 in6_selecthlim(in6p, ifp)
 	struct in6pcb *in6p;
 	struct ifnet *ifp;
 {
 	if (in6p && in6p->in6p_hops >= 0)
 		return(in6p->in6p_hops);
 	else if (ifp)
 		return(nd_ifinfo[ifp->if_index].chlim);
 	else
 		return(ip6_defhlim);
 }
 #endif
 
 void
 in6_pcbdisconnect(inp)
 	struct inpcb *inp;
 {
 	bzero((caddr_t)&inp->in6p_faddr, sizeof(inp->in6p_faddr));
 	inp->inp_fport = 0;
 	/* clear flowinfo - draft-itojun-ipv6-flowlabel-api-00 */
 	inp->in6p_flowinfo &= ~IPV6_FLOWLABEL_MASK;
 	in_pcbrehash(inp);
 	if (inp->inp_socket->so_state & SS_NOFDREF)
 		in6_pcbdetach(inp);
 }
 
 void
 in6_pcbdetach(inp)
 	struct inpcb *inp;
 {
 	struct socket *so = inp->inp_socket;
 	struct inpcbinfo *ipi = inp->inp_pcbinfo;
 
 #ifdef IPSEC
 	if (inp->in6p_sp != NULL)
 		ipsec6_delete_pcbpolicy(inp);
 #endif /* IPSEC */
 	inp->inp_gencnt = ++ipi->ipi_gencnt;
 	in_pcbremlists(inp);
 	if (so) {
 		so->so_pcb = NULL;
 		sotryfree(so);
 	}
 	if (inp->in6p_options)
 		m_freem(inp->in6p_options);
  	ip6_freepcbopts(inp->in6p_outputopts);
  	ip6_freemoptions(inp->in6p_moptions);
 	if (inp->in6p_route.ro_rt)
-		rtfree(inp->in6p_route.ro_rt);
+		RTFREE(inp->in6p_route.ro_rt);
 	/* Check and free IPv4 related resources in case of mapped addr */
 	if (inp->inp_options)
 		(void)m_free(inp->inp_options);
 	ip_freemoptions(inp->inp_moptions);
 	inp->inp_vflag = 0;
 	INP_LOCK_DESTROY(inp);
 	uma_zfree(ipi->ipi_zone, inp);
 }
 
 struct sockaddr *
 in6_sockaddr(port, addr_p)
 	in_port_t port;
 	struct in6_addr *addr_p;
 {
 	struct sockaddr_in6 *sin6;
 
 	MALLOC(sin6, struct sockaddr_in6 *, sizeof *sin6, M_SONAME, M_WAITOK);
 	bzero(sin6, sizeof *sin6);
 	sin6->sin6_family = AF_INET6;
 	sin6->sin6_len = sizeof(*sin6);
 	sin6->sin6_port = port;
 	sin6->sin6_addr = *addr_p;
 	if (IN6_IS_SCOPE_LINKLOCAL(&sin6->sin6_addr))
 		sin6->sin6_scope_id = ntohs(sin6->sin6_addr.s6_addr16[1]);
 	else
 		sin6->sin6_scope_id = 0;	/*XXX*/
 	if (IN6_IS_SCOPE_LINKLOCAL(&sin6->sin6_addr))
 		sin6->sin6_addr.s6_addr16[1] = 0;
 
 	return (struct sockaddr *)sin6;
 }
 
 struct sockaddr *
 in6_v4mapsin6_sockaddr(port, addr_p)
 	in_port_t port;
 	struct in_addr *addr_p;
 {
 	struct sockaddr_in sin;
 	struct sockaddr_in6 *sin6_p;
 
 	bzero(&sin, sizeof sin);
 	sin.sin_family = AF_INET;
 	sin.sin_len = sizeof(sin);
 	sin.sin_port = port;
 	sin.sin_addr = *addr_p;
 
 	MALLOC(sin6_p, struct sockaddr_in6 *, sizeof *sin6_p, M_SONAME,
 		M_WAITOK);
 	in6_sin_2_v4mapsin6(&sin, sin6_p);
 
 	return (struct sockaddr *)sin6_p;
 }
 
 /*
  * The calling convention of in6_setsockaddr() and in6_setpeeraddr() was
  * modified to match the pru_sockaddr() and pru_peeraddr() entry points
  * in struct pr_usrreqs, so that protocols can just reference then directly
  * without the need for a wrapper function.  The socket must have a valid
  * (i.e., non-nil) PCB, but it should be impossible to get an invalid one
  * except through a kernel programming error, so it is acceptable to panic
  * (or in this case trap) if the PCB is invalid.  (Actually, we don't trap
  * because there actually /is/ a programming error somewhere... XXX)
  */
 int
 in6_setsockaddr(so, nam)
 	struct socket *so;
 	struct sockaddr **nam;
 {
 	int s;
 	register struct inpcb *inp;
 	struct in6_addr addr;
 	in_port_t port;
 
 	s = splnet();
 	inp = sotoinpcb(so);
 	if (!inp) {
 		splx(s);
 		return EINVAL;
 	}
 	port = inp->inp_lport;
 	addr = inp->in6p_laddr;
 	splx(s);
 
 	*nam = in6_sockaddr(port, &addr);
 	return 0;
 }
 
 int
 in6_setpeeraddr(so, nam)
 	struct socket *so;
 	struct sockaddr **nam;
 {
 	int s;
 	struct inpcb *inp;
 	struct in6_addr addr;
 	in_port_t port;
 
 	s = splnet();
 	inp = sotoinpcb(so);
 	if (!inp) {
 		splx(s);
 		return EINVAL;
 	}
 	port = inp->inp_fport;
 	addr = inp->in6p_faddr;
 	splx(s);
 
 	*nam = in6_sockaddr(port, &addr);
 	return 0;
 }
 
 int
 in6_mapped_sockaddr(struct socket *so, struct sockaddr **nam)
 {
 	struct	inpcb *inp = sotoinpcb(so);
 	int	error;
 
 	if (inp == NULL)
 		return EINVAL;
 	if (inp->inp_vflag & INP_IPV4) {
 		error = in_setsockaddr(so, nam, &tcbinfo);
 		if (error == 0)
 			in6_sin_2_v4mapsin6_in_sock(nam);
 	} else
 	/* scope issues will be handled in in6_setsockaddr(). */
 	error = in6_setsockaddr(so, nam);
 
 	return error;
 }
 
 int
 in6_mapped_peeraddr(struct socket *so, struct sockaddr **nam)
 {
 	struct	inpcb *inp = sotoinpcb(so);
 	int	error;
 
 	if (inp == NULL)
 		return EINVAL;
 	if (inp->inp_vflag & INP_IPV4) {
 		error = in_setpeeraddr(so, nam, &tcbinfo);
 		if (error == 0)
 			in6_sin_2_v4mapsin6_in_sock(nam);
 	} else
 	/* scope issues will be handled in in6_setpeeraddr(). */
 	error = in6_setpeeraddr(so, nam);
 
 	return error;
 }
 
 /*
  * Pass some notification to all connections of a protocol
  * associated with address dst.  The local address and/or port numbers
  * may be specified to limit the search.  The "usual action" will be
  * taken, depending on the ctlinput cmd.  The caller must filter any
  * cmds that are uninteresting (e.g., no error in the map).
  * Call the protocol specific routine (if any) to report
  * any errors for each matching socket.
  *
  * Must be called at splnet.
  */
 void
 in6_pcbnotify(head, dst, fport_arg, src, lport_arg, cmd, notify)
 	struct inpcbhead *head;
 	struct sockaddr *dst;
 	const struct sockaddr *src;
 	u_int fport_arg, lport_arg;
 	int cmd;
 	struct inpcb *(*notify) __P((struct inpcb *, int));
 {
 	struct inpcb *inp, *ninp;
 	struct sockaddr_in6 sa6_src, *sa6_dst;
 	u_short	fport = fport_arg, lport = lport_arg;
 	u_int32_t flowinfo;
 	int errno, s;
 
 	if ((unsigned)cmd >= PRC_NCMDS || dst->sa_family != AF_INET6)
 		return;
 
 	sa6_dst = (struct sockaddr_in6 *)dst;
 	if (IN6_IS_ADDR_UNSPECIFIED(&sa6_dst->sin6_addr))
 		return;
 
 	/*
 	 * note that src can be NULL when we get notify by local fragmentation.
 	 */
 	sa6_src = (src == NULL) ? sa6_any : *(const struct sockaddr_in6 *)src;
 	flowinfo = sa6_src.sin6_flowinfo;
 
 	/*
 	 * Redirects go to all references to the destination,
 	 * and use in6_rtchange to invalidate the route cache.
 	 * Dead host indications: also use in6_rtchange to invalidate
 	 * the cache, and deliver the error to all the sockets.
 	 * Otherwise, if we have knowledge of the local port and address,
 	 * deliver only to that socket.
 	 */
 	if (PRC_IS_REDIRECT(cmd) || cmd == PRC_HOSTDEAD) {
 		fport = 0;
 		lport = 0;
 		bzero((caddr_t)&sa6_src.sin6_addr, sizeof(sa6_src.sin6_addr));
 
 		if (cmd != PRC_HOSTDEAD)
 			notify = in6_rtchange;
 	}
 	errno = inet6ctlerrmap[cmd];
 	s = splnet();
  	for (inp = LIST_FIRST(head); inp != NULL; inp = ninp) {
  		ninp = LIST_NEXT(inp, inp_list);
 
  		if ((inp->inp_vflag & INP_IPV6) == 0)
 			continue;
 
 		/*
 		 * Detect if we should notify the error. If no source and
 		 * destination ports are specifed, but non-zero flowinfo and
 		 * local address match, notify the error. This is the case
 		 * when the error is delivered with an encrypted buffer
 		 * by ESP. Otherwise, just compare addresses and ports
 		 * as usual.
 		 */
 		if (lport == 0 && fport == 0 && flowinfo &&
 		    inp->inp_socket != NULL &&
 		    flowinfo == (inp->in6p_flowinfo & IPV6_FLOWLABEL_MASK) &&
 		    IN6_ARE_ADDR_EQUAL(&inp->in6p_laddr, &sa6_src.sin6_addr))
 			goto do_notify;
 		else if (!IN6_ARE_ADDR_EQUAL(&inp->in6p_faddr,
 					     &sa6_dst->sin6_addr) ||
 			 inp->inp_socket == 0 ||
 			 (lport && inp->inp_lport != lport) ||
 			 (!IN6_IS_ADDR_UNSPECIFIED(&sa6_src.sin6_addr) &&
 			  !IN6_ARE_ADDR_EQUAL(&inp->in6p_laddr,
 					      &sa6_src.sin6_addr)) ||
 			 (fport && inp->inp_fport != fport))
 			continue;
 
 	  do_notify:
 		if (notify)
 			(*notify)(inp, errno);
 	}
 	splx(s);
 }
 
 /*
  * Lookup a PCB based on the local address and port.
  */
 struct inpcb *
 in6_pcblookup_local(pcbinfo, laddr, lport_arg, wild_okay)
 	struct inpcbinfo *pcbinfo;
 	struct in6_addr *laddr;
 	u_int lport_arg;
 	int wild_okay;
 {
 	register struct inpcb *inp;
 	int matchwild = 3, wildcard;
 	u_short lport = lport_arg;
 
 	if (!wild_okay) {
 		struct inpcbhead *head;
 		/*
 		 * Look for an unconnected (wildcard foreign addr) PCB that
 		 * matches the local address and port we're looking for.
 		 */
 		head = &pcbinfo->hashbase[INP_PCBHASH(INADDR_ANY, lport, 0,
 						      pcbinfo->hashmask)];
 		LIST_FOREACH(inp, head, inp_hash) {
 			if ((inp->inp_vflag & INP_IPV6) == 0)
 				continue;
 			if (IN6_IS_ADDR_UNSPECIFIED(&inp->in6p_faddr) &&
 			    IN6_ARE_ADDR_EQUAL(&inp->in6p_laddr, laddr) &&
 			    inp->inp_lport == lport) {
 				/*
 				 * Found.
 				 */
 				return (inp);
 			}
 		}
 		/*
 		 * Not found.
 		 */
 		return (NULL);
 	} else {
 		struct inpcbporthead *porthash;
 		struct inpcbport *phd;
 		struct inpcb *match = NULL;
 		/*
 		 * Best fit PCB lookup.
 		 *
 		 * First see if this local port is in use by looking on the
 		 * port hash list.
 		 */
 		porthash = &pcbinfo->porthashbase[INP_PCBPORTHASH(lport,
 		    pcbinfo->porthashmask)];
 		LIST_FOREACH(phd, porthash, phd_hash) {
 			if (phd->phd_port == lport)
 				break;
 		}
 		if (phd != NULL) {
 			/*
 			 * Port is in use by one or more PCBs. Look for best
 			 * fit.
 			 */
 			LIST_FOREACH(inp, &phd->phd_pcblist, inp_portlist) {
 				wildcard = 0;
 				if ((inp->inp_vflag & INP_IPV6) == 0)
 					continue;
 				if (!IN6_IS_ADDR_UNSPECIFIED(&inp->in6p_faddr))
 					wildcard++;
 				if (!IN6_IS_ADDR_UNSPECIFIED(
 					&inp->in6p_laddr)) {
 					if (IN6_IS_ADDR_UNSPECIFIED(laddr))
 						wildcard++;
 					else if (!IN6_ARE_ADDR_EQUAL(
 						&inp->in6p_laddr, laddr))
 						continue;
 				} else {
 					if (!IN6_IS_ADDR_UNSPECIFIED(laddr))
 						wildcard++;
 				}
 				if (wildcard < matchwild) {
 					match = inp;
 					matchwild = wildcard;
 					if (matchwild == 0) {
 						break;
 					}
 				}
 			}
 		}
 		return (match);
 	}
 }
 
 void
 in6_pcbpurgeif0(head, ifp)
 	struct in6pcb *head;
 	struct ifnet *ifp;
 {
 	struct in6pcb *in6p;
 	struct ip6_moptions *im6o;
 	struct in6_multi_mship *imm, *nimm;
 
 	for (in6p = head; in6p != NULL; in6p = LIST_NEXT(in6p, inp_list)) {
 		im6o = in6p->in6p_moptions;
 		if ((in6p->inp_vflag & INP_IPV6) &&
 		    im6o) {
 			/*
 			 * Unselect the outgoing interface if it is being
 			 * detached.
 			 */
 			if (im6o->im6o_multicast_ifp == ifp)
 				im6o->im6o_multicast_ifp = NULL;
 
 			/*
 			 * Drop multicast group membership if we joined
 			 * through the interface being detached.
 			 * XXX controversial - is it really legal for kernel
 			 * to force this?
 			 */
 			for (imm = im6o->im6o_memberships.lh_first;
 			     imm != NULL; imm = nimm) {
 				nimm = imm->i6mm_chain.le_next;
 				if (imm->i6mm_maddr->in6m_ifp == ifp) {
 					LIST_REMOVE(imm, i6mm_chain);
 					in6_delmulti(imm->i6mm_maddr);
 					free(imm, M_IPMADDR);
 				}
 			}
 		}
 	}
 }
 
 /*
  * Check for alternatives when higher level complains
  * about service problems.  For now, invalidate cached
  * routing information.  If the route was created dynamically
  * (by a redirect), time to try a default gateway again.
  */
 void
 in6_losing(in6p)
 	struct inpcb *in6p;
 {
 	struct rtentry *rt;
 	struct rt_addrinfo info;
 
 	if ((rt = in6p->in6p_route.ro_rt) != NULL) {
+		RT_LOCK(rt);
+		in6p->in6p_route.ro_rt = NULL;
 		bzero((caddr_t)&info, sizeof(info));
 		info.rti_flags = rt->rt_flags;
 		info.rti_info[RTAX_DST] = rt_key(rt);
 		info.rti_info[RTAX_GATEWAY] = rt->rt_gateway;
 		info.rti_info[RTAX_NETMASK] = rt_mask(rt);
 		rt_missmsg(RTM_LOSING, &info, rt->rt_flags, 0);
-		if (rt->rt_flags & RTF_DYNAMIC)
+		if (rt->rt_flags & RTF_DYNAMIC) {
+			RT_UNLOCK(rt);		/* XXX refcnt? */
 			(void)rtrequest1(RTM_DELETE, &info, NULL);
-		in6p->in6p_route.ro_rt = NULL;
-		rtfree(rt);
+		} else
+			rtfree(rt);
 		/*
 		 * A new route can be allocated
 		 * the next time output is attempted.
 		 */
 	}
 }
 
 /*
  * After a routing change, flush old routing
  * and allocate a (hopefully) better one.
  */
 struct inpcb *
 in6_rtchange(inp, errno)
 	struct inpcb *inp;
 	int errno;
 {
 	if (inp->in6p_route.ro_rt) {
-		rtfree(inp->in6p_route.ro_rt);
+		RTFREE(inp->in6p_route.ro_rt);
 		inp->in6p_route.ro_rt = 0;
 		/*
 		 * A new route can be allocated the next time
 		 * output is attempted.
 		 */
 	}
 	return inp;
 }
 
 /*
  * Lookup PCB in hash list.
  */
 struct inpcb *
 in6_pcblookup_hash(pcbinfo, faddr, fport_arg, laddr, lport_arg, wildcard, ifp)
 	struct inpcbinfo *pcbinfo;
 	struct in6_addr *faddr, *laddr;
 	u_int fport_arg, lport_arg;
 	int wildcard;
 	struct ifnet *ifp;
 {
 	struct inpcbhead *head;
 	register struct inpcb *inp;
 	u_short fport = fport_arg, lport = lport_arg;
 	int faith;
 
 	if (faithprefix_p != NULL)
 		faith = (*faithprefix_p)(laddr);
 	else
 		faith = 0;
 
 	/*
 	 * First look for an exact match.
 	 */
 	head = &pcbinfo->hashbase[INP_PCBHASH(faddr->s6_addr32[3] /* XXX */,
 					      lport, fport,
 					      pcbinfo->hashmask)];
 	LIST_FOREACH(inp, head, inp_hash) {
 		if ((inp->inp_vflag & INP_IPV6) == 0)
 			continue;
 		if (IN6_ARE_ADDR_EQUAL(&inp->in6p_faddr, faddr) &&
 		    IN6_ARE_ADDR_EQUAL(&inp->in6p_laddr, laddr) &&
 		    inp->inp_fport == fport &&
 		    inp->inp_lport == lport) {
 			/*
 			 * Found.
 			 */
 			return (inp);
 		}
 	}
 	if (wildcard) {
 		struct inpcb *local_wild = NULL;
 
 		head = &pcbinfo->hashbase[INP_PCBHASH(INADDR_ANY, lport, 0,
 						      pcbinfo->hashmask)];
 		LIST_FOREACH(inp, head, inp_hash) {
 			if ((inp->inp_vflag & INP_IPV6) == 0)
 				continue;
 			if (IN6_IS_ADDR_UNSPECIFIED(&inp->in6p_faddr) &&
 			    inp->inp_lport == lport) {
 				if (faith && (inp->inp_flags & INP_FAITH) == 0)
 					continue;
 				if (IN6_ARE_ADDR_EQUAL(&inp->in6p_laddr,
 						       laddr))
 					return (inp);
 				else if (IN6_IS_ADDR_UNSPECIFIED(&inp->in6p_laddr))
 					local_wild = inp;
 			}
 		}
 		return (local_wild);
 	}
 
 	/*
 	 * Not found.
 	 */
 	return (NULL);
 }
 
 void
 init_sin6(struct sockaddr_in6 *sin6, struct mbuf *m)
 {
 	struct ip6_hdr *ip;
 
 	ip = mtod(m, struct ip6_hdr *);
 	bzero(sin6, sizeof(*sin6));
 	sin6->sin6_len = sizeof(*sin6);
 	sin6->sin6_family = AF_INET6;
 	sin6->sin6_addr = ip->ip6_src;
 	if (IN6_IS_SCOPE_LINKLOCAL(&sin6->sin6_addr))
 		sin6->sin6_addr.s6_addr16[1] = 0;
 	sin6->sin6_scope_id =
 		(m->m_pkthdr.rcvif && IN6_IS_SCOPE_LINKLOCAL(&sin6->sin6_addr))
 		? m->m_pkthdr.rcvif->if_index : 0;
 
 	return;
 }
Index: head/sys/netinet6/in6_rmx.c
===================================================================
--- head/sys/netinet6/in6_rmx.c	(revision 120726)
+++ head/sys/netinet6/in6_rmx.c	(revision 120727)
@@ -1,487 +1,490 @@
 /*	$FreeBSD$	*/
 /*	$KAME: in6_rmx.c,v 1.11 2001/07/26 06:53:16 jinmei Exp $	*/
 
 /*
  * Copyright (C) 1995, 1996, 1997, and 1998 WIDE Project.
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 3. Neither the name of the project nor the names of its contributors
  *    may be used to endorse or promote products derived from this software
  *    without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE PROJECT AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE PROJECT OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  */
 
 /*
  * Copyright 1994, 1995 Massachusetts Institute of Technology
  *
  * Permission to use, copy, modify, and distribute this software and
  * its documentation for any purpose and without fee is hereby
  * granted, provided that both the above copyright notice and this
  * permission notice appear in all copies, that both the above
  * copyright notice and this permission notice appear in all
  * supporting documentation, and that the name of M.I.T. not be used
  * in advertising or publicity pertaining to distribution of the
  * software without specific, written prior permission.  M.I.T. makes
  * no representations about the suitability of this software for any
  * purpose.  It is provided "as is" without express or implied
  * warranty.
  *
  * THIS SOFTWARE IS PROVIDED BY M.I.T. ``AS IS''.  M.I.T. DISCLAIMS
  * ALL EXPRESS OR IMPLIED WARRANTIES WITH REGARD TO THIS SOFTWARE,
  * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
  * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. IN NO EVENT
  * SHALL M.I.T. BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
  * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
  * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
  * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
  * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
  * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
  * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  */
 
 /*
  * This code does two things necessary for the enhanced TCP metrics to
  * function in a useful manner:
  *  1) It marks all non-host routes as `cloning', thus ensuring that
  *     every actual reference to such a route actually gets turned
  *     into a reference to a host route to the specific destination
  *     requested.
  *  2) When such routes lose all their references, it arranges for them
  *     to be deleted in some random collection of circumstances, so that
  *     a large quantity of stale routing data is not kept in kernel memory
  *     indefinitely.  See in6_rtqtimo() below for the exact mechanism.
  */
 
 #include <sys/param.h>
 #include <sys/systm.h>
 #include <sys/kernel.h>
 #include <sys/sysctl.h>
 #include <sys/queue.h>
 #include <sys/socket.h>
 #include <sys/socketvar.h>
 #include <sys/mbuf.h>
 #include <sys/syslog.h>
+#include <sys/callout.h>
 
 #include <net/if.h>
 #include <net/route.h>
 #include <netinet/in.h>
 #include <netinet/ip_var.h>
 #include <netinet/in_var.h>
 
 #include <netinet/ip6.h>
 #include <netinet6/ip6_var.h>
 
 #include <netinet/icmp6.h>
 
 #include <netinet/tcp.h>
 #include <netinet/tcp_seq.h>
 #include <netinet/tcp_timer.h>
 #include <netinet/tcp_var.h>
 
 extern int	in6_inithead __P((void **head, int off));
 
 #define RTPRF_OURS		RTF_PROTO3	/* set on routes we manage */
 
 /*
  * Do what we need to do when inserting a route.
  */
 static struct radix_node *
 in6_addroute(void *v_arg, void *n_arg, struct radix_node_head *head,
 	    struct radix_node *treenodes)
 {
 	struct rtentry *rt = (struct rtentry *)treenodes;
 	struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *)rt_key(rt);
 	struct radix_node *ret;
 
 	/*
 	 * For IPv6, all unicast non-host routes are automatically cloning.
 	 */
 	if (IN6_IS_ADDR_MULTICAST(&sin6->sin6_addr))
 		rt->rt_flags |= RTF_MULTICAST;
 
 	if (!(rt->rt_flags & (RTF_HOST | RTF_CLONING | RTF_MULTICAST))) {
 		rt->rt_flags |= RTF_PRCLONING;
 	}
 
 	/*
 	 * A little bit of help for both IPv6 output and input:
 	 *   For local addresses, we make sure that RTF_LOCAL is set,
 	 *   with the thought that this might one day be used to speed up
 	 *   ip_input().
 	 *
 	 * We also mark routes to multicast addresses as such, because
 	 * it's easy to do and might be useful (but this is much more
 	 * dubious since it's so easy to inspect the address).  (This
 	 * is done above.)
 	 *
 	 * XXX
 	 * should elaborate the code.
 	 */
 	if (rt->rt_flags & RTF_HOST) {
 		if (IN6_ARE_ADDR_EQUAL(&satosin6(rt->rt_ifa->ifa_addr)
 					->sin6_addr,
 				       &sin6->sin6_addr)) {
 			rt->rt_flags |= RTF_LOCAL;
 		}
 	}
 
 	if (!rt->rt_rmx.rmx_mtu && !(rt->rt_rmx.rmx_locks & RTV_MTU)
 	    && rt->rt_ifp)
 		rt->rt_rmx.rmx_mtu = rt->rt_ifp->if_mtu;
 
 	ret = rn_addroute(v_arg, n_arg, head, treenodes);
 	if (ret == NULL && rt->rt_flags & RTF_HOST) {
 		struct rtentry *rt2;
 		/*
 		 * We are trying to add a host route, but can't.
 		 * Find out if it is because of an
 		 * ARP entry and delete it if so.
 		 */
 		rt2 = rtalloc1((struct sockaddr *)sin6, 0,
 				RTF_CLONING | RTF_PRCLONING);
 		if (rt2) {
 			if (rt2->rt_flags & RTF_LLINFO &&
 				rt2->rt_flags & RTF_HOST &&
 				rt2->rt_gateway &&
 				rt2->rt_gateway->sa_family == AF_LINK) {
+				/* NB: must unlock to avoid recursion */
+				RT_UNLOCK(rt2);
 				rtrequest(RTM_DELETE,
 					  (struct sockaddr *)rt_key(rt2),
 					  rt2->rt_gateway,
 					  rt_mask(rt2), rt2->rt_flags, 0);
 				ret = rn_addroute(v_arg, n_arg, head,
 					treenodes);
+				RT_LOCK(rt2);
 			}
-			RTFREE(rt2);
+			RTFREE_LOCKED(rt2);
 		}
 	} else if (ret == NULL && rt->rt_flags & RTF_CLONING) {
 		struct rtentry *rt2;
 		/*
 		 * We are trying to add a net route, but can't.
 		 * The following case should be allowed, so we'll make a
 		 * special check for this:
 		 *	Two IPv6 addresses with the same prefix is assigned
 		 *	to a single interrface.
 		 *	# ifconfig if0 inet6 3ffe:0501::1 prefix 64 alias (*1)
 		 *	# ifconfig if0 inet6 3ffe:0501::2 prefix 64 alias (*2)
 		 *	In this case, (*1) and (*2) want to add the same
 		 *	net route entry, 3ffe:0501:: -> if0.
 		 *	This case should not raise an error.
 		 */
 		rt2 = rtalloc1((struct sockaddr *)sin6, 0,
 				RTF_CLONING | RTF_PRCLONING);
 		if (rt2) {
 			if ((rt2->rt_flags & (RTF_CLONING|RTF_HOST|RTF_GATEWAY))
 					== RTF_CLONING
 			 && rt2->rt_gateway
 			 && rt2->rt_gateway->sa_family == AF_LINK
 			 && rt2->rt_ifp == rt->rt_ifp) {
 				ret = rt2->rt_nodes;
 			}
-			RTFREE(rt2);
+			RTFREE_LOCKED(rt2);
 		}
 	}
 	return ret;
 }
 
 /*
  * This code is the inverse of in6_clsroute: on first reference, if we
  * were managing the route, stop doing so and set the expiration timer
  * back off again.
  */
 static struct radix_node *
 in6_matroute(void *v_arg, struct radix_node_head *head)
 {
 	struct radix_node *rn = rn_match(v_arg, head);
 	struct rtentry *rt = (struct rtentry *)rn;
 
 	if (rt && rt->rt_refcnt == 0) { /* this is first reference */
 		if (rt->rt_flags & RTPRF_OURS) {
 			rt->rt_flags &= ~RTPRF_OURS;
 			rt->rt_rmx.rmx_expire = 0;
 		}
 	}
 	return rn;
 }
 
 SYSCTL_DECL(_net_inet6_ip6);
 
 static int rtq_reallyold = 60*60;
 	/* one hour is ``really old'' */
 SYSCTL_INT(_net_inet6_ip6, IPV6CTL_RTEXPIRE, rtexpire,
 	CTLFLAG_RW, &rtq_reallyold , 0, "");
 				
 static int rtq_minreallyold = 10;
 	/* never automatically crank down to less */
 SYSCTL_INT(_net_inet6_ip6, IPV6CTL_RTMINEXPIRE, rtminexpire,
 	CTLFLAG_RW, &rtq_minreallyold , 0, "");
 				
 static int rtq_toomany = 128;
 	/* 128 cached routes is ``too many'' */
 SYSCTL_INT(_net_inet6_ip6, IPV6CTL_RTMAXCACHE, rtmaxcache,
 	CTLFLAG_RW, &rtq_toomany , 0, "");
 				
 
 /*
  * On last reference drop, mark the route as belong to us so that it can be
  * timed out.
  */
 static void
 in6_clsroute(struct radix_node *rn, struct radix_node_head *head)
 {
 	struct rtentry *rt = (struct rtentry *)rn;
 
+	RT_LOCK_ASSERT(rt);
+
 	if (!(rt->rt_flags & RTF_UP))
 		return;		/* prophylactic measures */
 
 	if ((rt->rt_flags & (RTF_LLINFO | RTF_HOST)) != RTF_HOST)
 		return;
 
 	if ((rt->rt_flags & (RTF_WASCLONED | RTPRF_OURS)) != RTF_WASCLONED)
 		return;
 
 	/*
 	 * As requested by David Greenman:
 	 * If rtq_reallyold is 0, just delete the route without
 	 * waiting for a timeout cycle to kill it.
 	 */
 	if (rtq_reallyold != 0) {
 		rt->rt_flags |= RTPRF_OURS;
 		rt->rt_rmx.rmx_expire = time_second + rtq_reallyold;
 	} else {
+		/* NB: must unlock to avoid recursion */
+		RT_UNLOCK(rt);
 		rtrequest(RTM_DELETE,
 			  (struct sockaddr *)rt_key(rt),
 			  rt->rt_gateway, rt_mask(rt),
 			  rt->rt_flags, 0);
+		RT_LOCK(rt);
 	}
 }
 
 struct rtqk_arg {
 	struct radix_node_head *rnh;
 	int mode;
 	int updating;
 	int draining;
 	int killed;
 	int found;
 	time_t nextstop;
 };
 
 /*
  * Get rid of old routes.  When draining, this deletes everything, even when
  * the timeout is not expired yet.  When updating, this makes sure that
  * nothing has a timeout longer than the current value of rtq_reallyold.
  */
 static int
 in6_rtqkill(struct radix_node *rn, void *rock)
 {
 	struct rtqk_arg *ap = rock;
 	struct rtentry *rt = (struct rtentry *)rn;
 	int err;
 
 	if (rt->rt_flags & RTPRF_OURS) {
 		ap->found++;
 
 		if (ap->draining || rt->rt_rmx.rmx_expire <= time_second) {
 			if (rt->rt_refcnt > 0)
 				panic("rtqkill route really not free");
 
 			err = rtrequest(RTM_DELETE,
 					(struct sockaddr *)rt_key(rt),
 					rt->rt_gateway, rt_mask(rt),
 					rt->rt_flags, 0);
 			if (err) {
 				log(LOG_WARNING, "in6_rtqkill: error %d", err);
 			} else {
 				ap->killed++;
 			}
 		} else {
 			if (ap->updating
 			   && (rt->rt_rmx.rmx_expire - time_second
 			       > rtq_reallyold)) {
 				rt->rt_rmx.rmx_expire = time_second
 					+ rtq_reallyold;
 			}
 			ap->nextstop = lmin(ap->nextstop,
 					    rt->rt_rmx.rmx_expire);
 		}
 	}
 
 	return 0;
 }
 
 #define RTQ_TIMEOUT	60*10	/* run no less than once every ten minutes */
 static int rtq_timeout = RTQ_TIMEOUT;
+static struct callout rtq_timer;
 
 static void
 in6_rtqtimo(void *rock)
 {
 	struct radix_node_head *rnh = rock;
 	struct rtqk_arg arg;
 	struct timeval atv;
 	static time_t last_adjusted_timeout = 0;
-	int s;
 
 	arg.found = arg.killed = 0;
 	arg.rnh = rnh;
 	arg.nextstop = time_second + rtq_timeout;
 	arg.draining = arg.updating = 0;
-	s = splnet();
 	RADIX_NODE_HEAD_LOCK(rnh);
 	rnh->rnh_walktree(rnh, in6_rtqkill, &arg);
 	RADIX_NODE_HEAD_UNLOCK(rnh);
-	splx(s);
 
 	/*
 	 * Attempt to be somewhat dynamic about this:
 	 * If there are ``too many'' routes sitting around taking up space,
 	 * then crank down the timeout, and see if we can't make some more
 	 * go away.  However, we make sure that we will never adjust more
 	 * than once in rtq_timeout seconds, to keep from cranking down too
 	 * hard.
 	 */
 	if ((arg.found - arg.killed > rtq_toomany)
 	   && (time_second - last_adjusted_timeout >= rtq_timeout)
 	   && rtq_reallyold > rtq_minreallyold) {
 		rtq_reallyold = 2*rtq_reallyold / 3;
 		if (rtq_reallyold < rtq_minreallyold) {
 			rtq_reallyold = rtq_minreallyold;
 		}
 
 		last_adjusted_timeout = time_second;
 #ifdef DIAGNOSTIC
 		log(LOG_DEBUG, "in6_rtqtimo: adjusted rtq_reallyold to %d",
 		    rtq_reallyold);
 #endif
 		arg.found = arg.killed = 0;
 		arg.updating = 1;
-		s = splnet();
 		RADIX_NODE_HEAD_LOCK(rnh);
 		rnh->rnh_walktree(rnh, in6_rtqkill, &arg);
 		RADIX_NODE_HEAD_UNLOCK(rnh);
-		splx(s);
 	}
 
 	atv.tv_usec = 0;
 	atv.tv_sec = arg.nextstop;
-	timeout(in6_rtqtimo, rock, tvtohz(&atv));
+	callout_reset(&rtq_timer, tvtohz(&atv), in6_rtqtimo, rock);
 }
 
 /*
  * Age old PMTUs.
  */
 struct mtuex_arg {
 	struct radix_node_head *rnh;
 	time_t nextstop;
 };
+static struct callout rtq_mtutimer;
 
 static int
 in6_mtuexpire(struct radix_node *rn, void *rock)
 {
 	struct rtentry *rt = (struct rtentry *)rn;
 	struct mtuex_arg *ap = rock;
 
 	/* sanity */
 	if (!rt)
 		panic("rt == NULL in in6_mtuexpire");
 
 	if (rt->rt_rmx.rmx_expire && !(rt->rt_flags & RTF_PROBEMTU)) {
 		if (rt->rt_rmx.rmx_expire <= time_second) {
 			rt->rt_flags |= RTF_PROBEMTU;
 		} else {
 			ap->nextstop = lmin(ap->nextstop,
 					rt->rt_rmx.rmx_expire);
 		}
 	}
 
 	return 0;
 }
 
 #define	MTUTIMO_DEFAULT	(60*1)
 
 static void
 in6_mtutimo(void *rock)
 {
 	struct radix_node_head *rnh = rock;
 	struct mtuex_arg arg;
 	struct timeval atv;
-	int s;
 
 	arg.rnh = rnh;
 	arg.nextstop = time_second + MTUTIMO_DEFAULT;
-	s = splnet();
 	RADIX_NODE_HEAD_LOCK(rnh);
 	rnh->rnh_walktree(rnh, in6_mtuexpire, &arg);
 	RADIX_NODE_HEAD_UNLOCK(rnh);
-	splx(s);
 
 	atv.tv_usec = 0;
 	atv.tv_sec = arg.nextstop;
 	if (atv.tv_sec < time_second) {
 		printf("invalid mtu expiration time on routing table\n");
 		arg.nextstop = time_second + 30;	/* last resort */
 	}
-	timeout(in6_mtutimo, rock, tvtohz(&atv));
+	callout_reset(&rtq_mtutimer, tvtohz(&atv), in6_mtutimo, rock);
 }
 
 #if 0
 void
 in6_rtqdrain()
 {
 	struct radix_node_head *rnh = rt_tables[AF_INET6];
 	struct rtqk_arg arg;
-	int s;
+
 	arg.found = arg.killed = 0;
 	arg.rnh = rnh;
 	arg.nextstop = 0;
 	arg.draining = 1;
 	arg.updating = 0;
-	s = splnet();
 	RADIX_NODE_HEAD_LOCK(rnh);
 	rnh->rnh_walktree(rnh, in6_rtqkill, &arg);
 	RADIX_NODE_HEAD_UNLOCK(rnh);
-	splx(s);
 }
 #endif
 
 /*
  * Initialize our routing tree.
  */
 int
 in6_inithead(void **head, int off)
 {
 	struct radix_node_head *rnh;
 
 	if (!rn_inithead(head, off))
 		return 0;
 
 	if (head != (void **)&rt_tables[AF_INET6]) /* BOGUS! */
 		return 1;	/* only do this for the real routing table */
 
 	rnh = *head;
 	rnh->rnh_addaddr = in6_addroute;
 	rnh->rnh_matchaddr = in6_matroute;
 	rnh->rnh_close = in6_clsroute;
+	callout_init(&rtq_timer, CALLOUT_MPSAFE);
 	in6_rtqtimo(rnh);	/* kick off timeout first time */
+	callout_init(&rtq_mtutimer, CALLOUT_MPSAFE);
 	in6_mtutimo(rnh);	/* kick off timeout first time */
 	return 1;
 }
Index: head/sys/netinet6/in6_src.c
===================================================================
--- head/sys/netinet6/in6_src.c	(revision 120726)
+++ head/sys/netinet6/in6_src.c	(revision 120727)
@@ -1,558 +1,559 @@
 /*	$FreeBSD$	*/
 /*	$KAME: in6_src.c,v 1.37 2001/03/29 05:34:31 itojun Exp $	*/
 
 /*
  * Copyright (C) 1995, 1996, 1997, and 1998 WIDE Project.
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 3. Neither the name of the project nor the names of its contributors
  *    may be used to endorse or promote products derived from this software
  *    without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE PROJECT AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE PROJECT OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  */
 
 /*
  * Copyright (c) 1982, 1986, 1991, 1993
  *	The Regents of the University of California.  All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 3. All advertising materials mentioning features or use of this software
  *    must display the following acknowledgement:
  *	This product includes software developed by the University of
  *	California, Berkeley and its contributors.
  * 4. Neither the name of the University nor the names of its contributors
  *    may be used to endorse or promote products derived from this software
  *    without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  *	@(#)in_pcb.c	8.2 (Berkeley) 1/4/94
  */
 
 #include "opt_inet.h"
 #include "opt_inet6.h"
 
 #include <sys/param.h>
 #include <sys/systm.h>
 #include <sys/malloc.h>
 #include <sys/mbuf.h>
 #include <sys/protosw.h>
 #include <sys/socket.h>
 #include <sys/socketvar.h>
 #include <sys/errno.h>
 #include <sys/time.h>
 
 #include <net/if.h>
 #include <net/route.h>
 
 #include <netinet/in.h>
 #include <netinet/in_var.h>
 #include <netinet/in_systm.h>
 #include <netinet/ip.h>
 #include <netinet/in_pcb.h>
 #include <netinet6/in6_var.h>
 #include <netinet/ip6.h>
 #include <netinet6/in6_pcb.h>
 #include <netinet6/ip6_var.h>
 #include <netinet6/nd6.h>
 #ifdef ENABLE_DEFAULT_SCOPE
 #include <netinet6/scope6_var.h> 
 #endif
 
 #include <net/net_osdep.h>
 
 /*
  * Return an IPv6 address, which is the most appropriate for a given
  * destination and user specified options.
  * If necessary, this function lookups the routing table and returns
  * an entry to the caller for later use.
  */
 struct in6_addr *
 in6_selectsrc(dstsock, opts, mopts, ro, laddr, errorp)
 	struct sockaddr_in6 *dstsock;
 	struct ip6_pktopts *opts;
 	struct ip6_moptions *mopts;
 	struct route_in6 *ro;
 	struct in6_addr *laddr;
 	int *errorp;
 {
 	struct in6_addr *dst;
 	struct in6_ifaddr *ia6 = 0;
 	struct in6_pktinfo *pi = NULL;
 
 	dst = &dstsock->sin6_addr;
 	*errorp = 0;
 
 	/*
 	 * If the source address is explicitly specified by the caller,
 	 * use it.
 	 */
 	if (opts && (pi = opts->ip6po_pktinfo) &&
 	    !IN6_IS_ADDR_UNSPECIFIED(&pi->ipi6_addr))
 		return(&pi->ipi6_addr);
 
 	/*
 	 * If the source address is not specified but the socket(if any)
 	 * is already bound, use the bound address.
 	 */
 	if (laddr && !IN6_IS_ADDR_UNSPECIFIED(laddr))
 		return(laddr);
 
 	/*
 	 * If the caller doesn't specify the source address but
 	 * the outgoing interface, use an address associated with
 	 * the interface.
 	 */
 	if (pi && pi->ipi6_ifindex) {
 		/* XXX boundary check is assumed to be already done. */
 		ia6 = in6_ifawithscope(ifnet_byindex(pi->ipi6_ifindex), dst);
 		if (ia6 == 0) {
 			*errorp = EADDRNOTAVAIL;
 			return(0);
 		}
 		return(&satosin6(&ia6->ia_addr)->sin6_addr);
 	}
 
 	/*
 	 * If the destination address is a link-local unicast address or
 	 * a multicast address, and if the outgoing interface is specified
 	 * by the sin6_scope_id filed, use an address associated with the
 	 * interface.
 	 * XXX: We're now trying to define more specific semantics of
 	 *      sin6_scope_id field, so this part will be rewritten in
 	 *      the near future.
 	 */
 	if ((IN6_IS_ADDR_LINKLOCAL(dst) || IN6_IS_ADDR_MULTICAST(dst)) &&
 	    dstsock->sin6_scope_id) {
 		/*
 		 * I'm not sure if boundary check for scope_id is done
 		 * somewhere...
 		 */
 		if (dstsock->sin6_scope_id < 0 ||
 		    if_index < dstsock->sin6_scope_id) {
 			*errorp = ENXIO; /* XXX: better error? */
 			return(0);
 		}
 		ia6 = in6_ifawithscope(ifnet_byindex(dstsock->sin6_scope_id),
 				       dst);
 		if (ia6 == 0) {
 			*errorp = EADDRNOTAVAIL;
 			return(0);
 		}
 		return(&satosin6(&ia6->ia_addr)->sin6_addr);
 	}
 
 	/*
 	 * If the destination address is a multicast address and
 	 * the outgoing interface for the address is specified
 	 * by the caller, use an address associated with the interface.
 	 * There is a sanity check here; if the destination has node-local
 	 * scope, the outgoing interfacde should be a loopback address.
 	 * Even if the outgoing interface is not specified, we also
 	 * choose a loopback interface as the outgoing interface.
 	 */
 	if (IN6_IS_ADDR_MULTICAST(dst)) {
 		struct ifnet *ifp = mopts ? mopts->im6o_multicast_ifp : NULL;
 
 		if (ifp == NULL && IN6_IS_ADDR_MC_NODELOCAL(dst)) {
 			ifp = &loif[0];
 		}
 
 		if (ifp) {
 			ia6 = in6_ifawithscope(ifp, dst);
 			if (ia6 == 0) {
 				*errorp = EADDRNOTAVAIL;
 				return(0);
 			}
 			return(&satosin6(&ia6->ia_addr)->sin6_addr);
 		}
 	}
 
 	/*
 	 * If the next hop address for the packet is specified
 	 * by caller, use an address associated with the route
 	 * to the next hop.
 	 */
 	{
 		struct sockaddr_in6 *sin6_next;
 		struct rtentry *rt;
 
 		if (opts && opts->ip6po_nexthop) {
 			sin6_next = satosin6(opts->ip6po_nexthop);
 			rt = nd6_lookup(&sin6_next->sin6_addr, 1, NULL);
 			if (rt) {
 				ia6 = in6_ifawithscope(rt->rt_ifp, dst);
 				if (ia6 == 0)
 					ia6 = ifatoia6(rt->rt_ifa);
 			}
 			if (ia6 == 0) {
 				*errorp = EADDRNOTAVAIL;
 				return(0);
 			}
 			return(&satosin6(&ia6->ia_addr)->sin6_addr);
 		}
 	}
 
 	/*
 	 * If route is known or can be allocated now,
 	 * our src addr is taken from the i/f, else punt.
 	 */
 	if (ro) {
 		if (ro->ro_rt &&
 		    (!(ro->ro_rt->rt_flags & RTF_UP) ||
 		     satosin6(&ro->ro_dst)->sin6_family != AF_INET6 || 
 		     !IN6_ARE_ADDR_EQUAL(&satosin6(&ro->ro_dst)->sin6_addr,
 					 dst))) {
 			RTFREE(ro->ro_rt);
 			ro->ro_rt = (struct rtentry *)0;
 		}
 		if (ro->ro_rt == (struct rtentry *)0 ||
 		    ro->ro_rt->rt_ifp == (struct ifnet *)0) {
 			struct sockaddr_in6 *sa6;
 
 			/* No route yet, so try to acquire one */
 			bzero(&ro->ro_dst, sizeof(struct sockaddr_in6));
 			sa6 = (struct sockaddr_in6 *)&ro->ro_dst;
 			sa6->sin6_family = AF_INET6;
 			sa6->sin6_len = sizeof(struct sockaddr_in6);
 			sa6->sin6_addr = *dst;
 			sa6->sin6_scope_id = dstsock->sin6_scope_id;
 			if (IN6_IS_ADDR_MULTICAST(dst)) {
 				ro->ro_rt = rtalloc1(&((struct route *)ro)
 						     ->ro_dst, 0, 0UL);
+				RT_UNLOCK(ro->ro_rt);
 			} else {
 				rtalloc((struct route *)ro);
 			}
 		}
 
 		/*
 		 * in_pcbconnect() checks out IFF_LOOPBACK to skip using
 		 * the address. But we don't know why it does so.
 		 * It is necessary to ensure the scope even for lo0
 		 * so doesn't check out IFF_LOOPBACK.
 		 */
 
 		if (ro->ro_rt) {
 			ia6 = in6_ifawithscope(ro->ro_rt->rt_ifa->ifa_ifp, dst);
 			if (ia6 == 0) /* xxx scope error ?*/
 				ia6 = ifatoia6(ro->ro_rt->rt_ifa);
 		}
 #if 0
 		/*
 		 * xxx The followings are necessary? (kazu)
 		 * I don't think so.
 		 * It's for SO_DONTROUTE option in IPv4.(jinmei)
 		 */
 		if (ia6 == 0) {
 			struct sockaddr_in6 sin6 = {sizeof(sin6), AF_INET6, 0};
 
 			sin6->sin6_addr = *dst;
 
 			ia6 = ifatoia6(ifa_ifwithdstaddr(sin6tosa(&sin6)));
 			if (ia6 == 0)
 				ia6 = ifatoia6(ifa_ifwithnet(sin6tosa(&sin6)));
 			if (ia6 == 0)
 				return(0);
 			return(&satosin6(&ia6->ia_addr)->sin6_addr);
 		}
 #endif /* 0 */
 		if (ia6 == 0) {
 			*errorp = EHOSTUNREACH;	/* no route */
 			return(0);
 		}
 		return(&satosin6(&ia6->ia_addr)->sin6_addr);
 	}
 
 	*errorp = EADDRNOTAVAIL;
 	return(0);
 }
 
 /*
  * Default hop limit selection. The precedence is as follows:
  * 1. Hoplimit value specified via ioctl.
  * 2. (If the outgoing interface is detected) the current
  *     hop limit of the interface specified by router advertisement.
  * 3. The system default hoplimit.
 */
 int
 in6_selecthlim(in6p, ifp)
 	struct in6pcb *in6p;
 	struct ifnet *ifp;
 {
 	if (in6p && in6p->in6p_hops >= 0)
 		return(in6p->in6p_hops);
 	else if (ifp)
 		return(nd_ifinfo[ifp->if_index].chlim);
 	else
 		return(ip6_defhlim);
 }
 
 /*
  * XXX: this is borrowed from in6_pcbbind(). If possible, we should
  * share this function by all *bsd*...
  */
 int
 in6_pcbsetport(laddr, inp, td)
 	struct in6_addr *laddr;
 	struct inpcb *inp;
 	struct thread *td;
 {
 	struct socket *so = inp->inp_socket;
 	u_int16_t lport = 0, first, last, *lastport;
 	int count, error = 0, wild = 0;
 	struct inpcbinfo *pcbinfo = inp->inp_pcbinfo;
 
 	/* XXX: this is redundant when called from in6_pcbbind */
 	if ((so->so_options & (SO_REUSEADDR|SO_REUSEPORT)) == 0)
 		wild = INPLOOKUP_WILDCARD;
 
 	inp->inp_flags |= INP_ANONPORT;
 
 	if (inp->inp_flags & INP_HIGHPORT) {
 		first = ipport_hifirstauto;	/* sysctl */
 		last  = ipport_hilastauto;
 		lastport = &pcbinfo->lasthi;
 	} else if (inp->inp_flags & INP_LOWPORT) {
 		if (td && (error = suser(td)))
 			return error;
 		first = ipport_lowfirstauto;	/* 1023 */
 		last  = ipport_lowlastauto;	/* 600 */
 		lastport = &pcbinfo->lastlow;
 	} else {
 		first = ipport_firstauto;	/* sysctl */
 		last  = ipport_lastauto;
 		lastport = &pcbinfo->lastport;
 	}
 	/*
 	 * Simple check to ensure all ports are not used up causing
 	 * a deadlock here.
 	 *
 	 * We split the two cases (up and down) so that the direction
 	 * is not being tested on each round of the loop.
 	 */
 	if (first > last) {
 		/*
 		 * counting down
 		 */
 		count = first - last;
 
 		do {
 			if (count-- < 0) {	/* completely used? */
 				/*
 				 * Undo any address bind that may have
 				 * occurred above.
 				 */
 				inp->in6p_laddr = in6addr_any;
 				return (EAGAIN);
 			}
 			--*lastport;
 			if (*lastport > first || *lastport < last)
 				*lastport = first;
 			lport = htons(*lastport);
 		} while (in6_pcblookup_local(pcbinfo,
 					     &inp->in6p_laddr, lport, wild));
 	} else {
 		/*
 			 * counting up
 			 */
 		count = last - first;
 
 		do {
 			if (count-- < 0) {	/* completely used? */
 				/*
 				 * Undo any address bind that may have
 				 * occurred above.
 				 */
 				inp->in6p_laddr = in6addr_any;
 				return (EAGAIN);
 			}
 			++*lastport;
 			if (*lastport < first || *lastport > last)
 				*lastport = first;
 			lport = htons(*lastport);
 		} while (in6_pcblookup_local(pcbinfo,
 					     &inp->in6p_laddr, lport, wild));
 	}
 
 	inp->inp_lport = lport;
 	if (in_pcbinshash(inp) != 0) {
 		inp->in6p_laddr = in6addr_any;
 		inp->inp_lport = 0;
 		return (EAGAIN);
 	}
 
 	return(0);
 }
 
 /*
  * generate kernel-internal form (scopeid embedded into s6_addr16[1]).
  * If the address scope of is link-local, embed the interface index in the
  * address.  The routine determines our precedence
  * between advanced API scope/interface specification and basic API
  * specification.
  *
  * this function should be nuked in the future, when we get rid of
  * embedded scopeid thing.
  *
  * XXX actually, it is over-specification to return ifp against sin6_scope_id.
  * there can be multiple interfaces that belong to a particular scope zone
  * (in specification, we have 1:N mapping between a scope zone and interfaces).
  * we may want to change the function to return something other than ifp.
  */
 int
 in6_embedscope(in6, sin6, in6p, ifpp)
 	struct in6_addr *in6;
 	const struct sockaddr_in6 *sin6;
 #ifdef HAVE_NRL_INPCB
 	struct inpcb *in6p;
 #define in6p_outputopts	inp_outputopts6
 #define in6p_moptions	inp_moptions6
 #else
 	struct in6pcb *in6p;
 #endif
 	struct ifnet **ifpp;
 {
 	struct ifnet *ifp = NULL;
 	u_int32_t scopeid;
 
 	*in6 = sin6->sin6_addr;
 	scopeid = sin6->sin6_scope_id;
 	if (ifpp)
 		*ifpp = NULL;
 
 	/*
 	 * don't try to read sin6->sin6_addr beyond here, since the caller may
 	 * ask us to overwrite existing sockaddr_in6
 	 */
 
 #ifdef ENABLE_DEFAULT_SCOPE
 	if (scopeid == 0)
 		scopeid = scope6_addr2default(in6);
 #endif
 
 	if (IN6_IS_SCOPE_LINKLOCAL(in6)) {
 		struct in6_pktinfo *pi;
 
 		/*
 		 * KAME assumption: link id == interface id
 		 */
 
 		if (in6p && in6p->in6p_outputopts &&
 		    (pi = in6p->in6p_outputopts->ip6po_pktinfo) &&
 		    pi->ipi6_ifindex) {
 			ifp = ifnet_byindex(pi->ipi6_ifindex);
 			in6->s6_addr16[1] = htons(pi->ipi6_ifindex);
 		} else if (in6p && IN6_IS_ADDR_MULTICAST(in6) &&
 			   in6p->in6p_moptions &&
 			   in6p->in6p_moptions->im6o_multicast_ifp) {
 			ifp = in6p->in6p_moptions->im6o_multicast_ifp;
 			in6->s6_addr16[1] = htons(ifp->if_index);
 		} else if (scopeid) {
 			/* boundary check */
 			if (scopeid < 0 || if_index < scopeid)
 				return ENXIO;  /* XXX EINVAL? */
 			ifp = ifnet_byindex(scopeid);
 			/*XXX assignment to 16bit from 32bit variable */
 			in6->s6_addr16[1] = htons(scopeid & 0xffff);
 		}
 
 		if (ifpp)
 			*ifpp = ifp;
 	}
 
 	return 0;
 }
 #ifdef HAVE_NRL_INPCB
 #undef in6p_outputopts
 #undef in6p_moptions
 #endif
 
 /*
  * generate standard sockaddr_in6 from embedded form.
  * touches sin6_addr and sin6_scope_id only.
  *
  * this function should be nuked in the future, when we get rid of
  * embedded scopeid thing.
  */
 int
 in6_recoverscope(sin6, in6, ifp)
 	struct sockaddr_in6 *sin6;
 	const struct in6_addr *in6;
 	struct ifnet *ifp;
 {
 	u_int32_t scopeid;
 
 	sin6->sin6_addr = *in6;
 
 	/*
 	 * don't try to read *in6 beyond here, since the caller may
 	 * ask us to overwrite existing sockaddr_in6
 	 */
 
 	sin6->sin6_scope_id = 0;
 	if (IN6_IS_SCOPE_LINKLOCAL(in6)) {
 		/*
 		 * KAME assumption: link id == interface id
 		 */
 		scopeid = ntohs(sin6->sin6_addr.s6_addr16[1]);
 		if (scopeid) {
 			/* sanity check */
 			if (scopeid < 0 || if_index < scopeid)
 				return ENXIO;
 			if (ifp && ifp->if_index != scopeid)
 				return ENXIO;
 			sin6->sin6_addr.s6_addr16[1] = 0;
 			sin6->sin6_scope_id = scopeid;
 		}
 	}
 
 	return 0;
 }
 
 /*
  * just clear the embedded scope identifer.
  * XXX: currently used for bsdi4 only as a supplement function.
  */
 void
 in6_clearscope(addr)
 	struct in6_addr *addr;
 {
 	if (IN6_IS_SCOPE_LINKLOCAL(addr))
 		addr->s6_addr16[1] = 0;
 }
Index: head/sys/netinet6/ip6_output.c
===================================================================
--- head/sys/netinet6/ip6_output.c	(revision 120726)
+++ head/sys/netinet6/ip6_output.c	(revision 120727)
@@ -1,2604 +1,2605 @@
 /*	$FreeBSD$	*/
 /*	$KAME: ip6_output.c,v 1.279 2002/01/26 06:12:30 jinmei Exp $	*/
 
 /*
  * Copyright (C) 1995, 1996, 1997, and 1998 WIDE Project.
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 3. Neither the name of the project nor the names of its contributors
  *    may be used to endorse or promote products derived from this software
  *    without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE PROJECT AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE PROJECT OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  */
 
 /*
  * Copyright (c) 1982, 1986, 1988, 1990, 1993
  *	The Regents of the University of California.  All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 3. All advertising materials mentioning features or use of this software
  *    must display the following acknowledgement:
  *	This product includes software developed by the University of
  *	California, Berkeley and its contributors.
  * 4. Neither the name of the University nor the names of its contributors
  *    may be used to endorse or promote products derived from this software
  *    without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  *	@(#)ip_output.c	8.3 (Berkeley) 1/21/94
  */
 
 #include "opt_ip6fw.h"
 #include "opt_inet.h"
 #include "opt_inet6.h"
 #include "opt_ipsec.h"
 #include "opt_pfil_hooks.h"
 #include "opt_random_ip_id.h"
 
 #include <sys/param.h>
 #include <sys/malloc.h>
 #include <sys/mbuf.h>
 #include <sys/proc.h>
 #include <sys/errno.h>
 #include <sys/protosw.h>
 #include <sys/socket.h>
 #include <sys/socketvar.h>
 #include <sys/systm.h>
 #include <sys/kernel.h>
 
 #include <net/if.h>
 #include <net/route.h>
 #ifdef PFIL_HOOKS
 #include <net/pfil.h>
 #endif
 
 #include <netinet/in.h>
 #include <netinet/in_var.h>
 #include <netinet6/in6_var.h>
 #include <netinet/ip6.h>
 #include <netinet/icmp6.h>
 #include <netinet6/ip6_var.h>
 #include <netinet/in_pcb.h>
 #include <netinet6/nd6.h>
 
 #ifdef IPSEC
 #include <netinet6/ipsec.h>
 #ifdef INET6
 #include <netinet6/ipsec6.h>
 #endif
 #include <netkey/key.h>
 #endif /* IPSEC */
 
 #ifdef FAST_IPSEC
 #include <netipsec/ipsec.h>
 #include <netipsec/ipsec6.h>
 #include <netipsec/key.h>
 #endif /* FAST_IPSEC */
 
 #include <netinet6/ip6_fw.h>
 
 #include <net/net_osdep.h>
 
 #include <netinet6/ip6protosw.h>
 
 static MALLOC_DEFINE(M_IPMOPTS, "ip6_moptions", "internet multicast options");
 
 struct ip6_exthdrs {
 	struct mbuf *ip6e_ip6;
 	struct mbuf *ip6e_hbh;
 	struct mbuf *ip6e_dest1;
 	struct mbuf *ip6e_rthdr;
 	struct mbuf *ip6e_dest2;
 };
 
 static int ip6_pcbopts __P((struct ip6_pktopts **, struct mbuf *,
 			    struct socket *, struct sockopt *sopt));
 static int ip6_setmoptions __P((int, struct ip6_moptions **, struct mbuf *));
 static int ip6_getmoptions __P((int, struct ip6_moptions *, struct mbuf **));
 static int ip6_copyexthdr __P((struct mbuf **, caddr_t, int));
 static int ip6_insertfraghdr __P((struct mbuf *, struct mbuf *, int,
 				  struct ip6_frag **));
 static int ip6_insert_jumboopt __P((struct ip6_exthdrs *, u_int32_t));
 static int ip6_splithdr __P((struct mbuf *, struct ip6_exthdrs *));
 
 /*
  * IP6 output. The packet in mbuf chain m contains a skeletal IP6
  * header (with pri, len, nxt, hlim, src, dst).
  * This function may modify ver and hlim only.
  * The mbuf chain containing the packet will be freed.
  * The mbuf opt, if present, will not be freed.
  *
  * type of "mtu": rt_rmx.rmx_mtu is u_long, ifnet.ifr_mtu is int, and
  * nd_ifinfo.linkmtu is u_int32_t.  so we use u_long to hold largest one,
  * which is rt_rmx.rmx_mtu.
  */
 int
 ip6_output(m0, opt, ro, flags, im6o, ifpp, inp)
 	struct mbuf *m0;
 	struct ip6_pktopts *opt;
 	struct route_in6 *ro;
 	int flags;
 	struct ip6_moptions *im6o;
 	struct ifnet **ifpp;		/* XXX: just for statistics */
 	struct inpcb *inp;
 {
 	struct ip6_hdr *ip6, *mhip6;
 	struct ifnet *ifp, *origifp;
 	struct mbuf *m = m0;
 	int hlen, tlen, len, off;
 	struct route_in6 ip6route;
 	struct sockaddr_in6 *dst;
 	int error = 0;
 	struct in6_ifaddr *ia = NULL;
 	u_long mtu;
 	u_int32_t optlen = 0, plen = 0, unfragpartlen = 0;
 	struct ip6_exthdrs exthdrs;
 	struct in6_addr finaldst;
 	struct route_in6 *ro_pmtu = NULL;
 	int hdrsplit = 0;
 	int needipsec = 0;
 #ifdef IPSEC
 	int needipsectun = 0;
 	struct secpolicy *sp = NULL;
 
 	ip6 = mtod(m, struct ip6_hdr *);
 #endif /* IPSEC */
 #ifdef FAST_IPSEC
 	int needipsectun = 0;
 	struct secpolicy *sp = NULL;
 
 	ip6 = mtod(m, struct ip6_hdr *);
 #endif /* FAST_IPSEC */
 
 #define MAKE_EXTHDR(hp, mp)						\
     do {								\
 	if (hp) {							\
 		struct ip6_ext *eh = (struct ip6_ext *)(hp);		\
 		error = ip6_copyexthdr((mp), (caddr_t)(hp), 		\
 				       ((eh)->ip6e_len + 1) << 3);	\
 		if (error)						\
 			goto freehdrs;					\
 	}								\
     } while (0)
 	
 	bzero(&exthdrs, sizeof(exthdrs));
 	
 	if (opt) {
 		/* Hop-by-Hop options header */
 		MAKE_EXTHDR(opt->ip6po_hbh, &exthdrs.ip6e_hbh);
 		/* Destination options header(1st part) */
 		MAKE_EXTHDR(opt->ip6po_dest1, &exthdrs.ip6e_dest1);
 		/* Routing header */
 		MAKE_EXTHDR(opt->ip6po_rthdr, &exthdrs.ip6e_rthdr);
 		/* Destination options header(2nd part) */
 		MAKE_EXTHDR(opt->ip6po_dest2, &exthdrs.ip6e_dest2);
 	}
 
 #ifdef IPSEC
 	/* get a security policy for this packet */
 	if (inp == NULL)
 		sp = ipsec6_getpolicybyaddr(m, IPSEC_DIR_OUTBOUND, 0, &error);
 	else
 		sp = ipsec6_getpolicybypcb(m, IPSEC_DIR_OUTBOUND, inp, &error);
 
 	if (sp == NULL) {
 		ipsec6stat.out_inval++;
 		goto freehdrs;
 	}
 
 	error = 0;
 
 	/* check policy */
 	switch (sp->policy) {
 	case IPSEC_POLICY_DISCARD:
 		/*
 		 * This packet is just discarded.
 		 */
 		ipsec6stat.out_polvio++;
 		goto freehdrs;
 
 	case IPSEC_POLICY_BYPASS:
 	case IPSEC_POLICY_NONE:
 		/* no need to do IPsec. */
 		needipsec = 0;
 		break;
 	
 	case IPSEC_POLICY_IPSEC:
 		if (sp->req == NULL) {
 			/* acquire a policy */
 			error = key_spdacquire(sp);
 			goto freehdrs;
 		}
 		needipsec = 1;
 		break;
 
 	case IPSEC_POLICY_ENTRUST:
 	default:
 		printf("ip6_output: Invalid policy found. %d\n", sp->policy);
 	}
 #endif /* IPSEC */
 #ifdef FAST_IPSEC
 	/* get a security policy for this packet */
 	if (inp == NULL)
 		sp = ipsec_getpolicybyaddr(m, IPSEC_DIR_OUTBOUND, 0, &error);
 	else
 		sp = ipsec_getpolicybysock(m, IPSEC_DIR_OUTBOUND, inp, &error);
 
 	if (sp == NULL) {
 		newipsecstat.ips_out_inval++;
 		goto freehdrs;
 	}
 
 	error = 0;
 
 	/* check policy */
 	switch (sp->policy) {
 	case IPSEC_POLICY_DISCARD:
 		/*
 		 * This packet is just discarded.
 		 */
 		newipsecstat.ips_out_polvio++;
 		goto freehdrs;
 
 	case IPSEC_POLICY_BYPASS:
 	case IPSEC_POLICY_NONE:
 		/* no need to do IPsec. */
 		needipsec = 0;
 		break;
 	
 	case IPSEC_POLICY_IPSEC:
 		if (sp->req == NULL) {
 			/* acquire a policy */
 			error = key_spdacquire(sp);
 			goto freehdrs;
 		}
 		needipsec = 1;
 		break;
 
 	case IPSEC_POLICY_ENTRUST:
 	default:
 		printf("ip6_output: Invalid policy found. %d\n", sp->policy);
 	}
 #endif /* FAST_IPSEC */
 
 	/*
 	 * Calculate the total length of the extension header chain.
 	 * Keep the length of the unfragmentable part for fragmentation.
 	 */
 	optlen = 0;
 	if (exthdrs.ip6e_hbh) optlen += exthdrs.ip6e_hbh->m_len;
 	if (exthdrs.ip6e_dest1) optlen += exthdrs.ip6e_dest1->m_len;
 	if (exthdrs.ip6e_rthdr) optlen += exthdrs.ip6e_rthdr->m_len;
 	unfragpartlen = optlen + sizeof(struct ip6_hdr);
 	/* NOTE: we don't add AH/ESP length here. do that later. */
 	if (exthdrs.ip6e_dest2) optlen += exthdrs.ip6e_dest2->m_len;
 
 	/*
 	 * If we need IPsec, or there is at least one extension header,
 	 * separate IP6 header from the payload.
 	 */
 	if ((needipsec || optlen) && !hdrsplit) {
 		if ((error = ip6_splithdr(m, &exthdrs)) != 0) {
 			m = NULL;
 			goto freehdrs;
 		}
 		m = exthdrs.ip6e_ip6;
 		hdrsplit++;
 	}
 
 	/* adjust pointer */
 	ip6 = mtod(m, struct ip6_hdr *);
 
 	/* adjust mbuf packet header length */
 	m->m_pkthdr.len += optlen;
 	plen = m->m_pkthdr.len - sizeof(*ip6);
 
 	/* If this is a jumbo payload, insert a jumbo payload option. */
 	if (plen > IPV6_MAXPACKET) {
 		if (!hdrsplit) {
 			if ((error = ip6_splithdr(m, &exthdrs)) != 0) {
 				m = NULL;
 				goto freehdrs;
 			}
 			m = exthdrs.ip6e_ip6;
 			hdrsplit++;
 		}
 		/* adjust pointer */
 		ip6 = mtod(m, struct ip6_hdr *);
 		if ((error = ip6_insert_jumboopt(&exthdrs, plen)) != 0)
 			goto freehdrs;
 		ip6->ip6_plen = 0;
 	} else
 		ip6->ip6_plen = htons(plen);
 
 	/*
 	 * Concatenate headers and fill in next header fields.
 	 * Here we have, on "m"
 	 *	IPv6 payload
 	 * and we insert headers accordingly.  Finally, we should be getting:
 	 *	IPv6 hbh dest1 rthdr ah* [esp* dest2 payload]
 	 *
 	 * during the header composing process, "m" points to IPv6 header.
 	 * "mprev" points to an extension header prior to esp.
 	 */
 	{
 		u_char *nexthdrp = &ip6->ip6_nxt;
 		struct mbuf *mprev = m;
 
 		/*
 		 * we treat dest2 specially.  this makes IPsec processing
 		 * much easier.  the goal here is to make mprev point the
 		 * mbuf prior to dest2.
 		 *
 		 * result: IPv6 dest2 payload
 		 * m and mprev will point to IPv6 header.
 		 */
 		if (exthdrs.ip6e_dest2) {
 			if (!hdrsplit)
 				panic("assumption failed: hdr not split");
 			exthdrs.ip6e_dest2->m_next = m->m_next;
 			m->m_next = exthdrs.ip6e_dest2;
 			*mtod(exthdrs.ip6e_dest2, u_char *) = ip6->ip6_nxt;
 			ip6->ip6_nxt = IPPROTO_DSTOPTS;
 		}
 
 #define MAKE_CHAIN(m, mp, p, i)\
     do {\
 	if (m) {\
 		if (!hdrsplit) \
 			panic("assumption failed: hdr not split"); \
 		*mtod((m), u_char *) = *(p);\
 		*(p) = (i);\
 		p = mtod((m), u_char *);\
 		(m)->m_next = (mp)->m_next;\
 		(mp)->m_next = (m);\
 		(mp) = (m);\
 	}\
     } while (0)
 		/*
 		 * result: IPv6 hbh dest1 rthdr dest2 payload
 		 * m will point to IPv6 header.  mprev will point to the
 		 * extension header prior to dest2 (rthdr in the above case).
 		 */
 		MAKE_CHAIN(exthdrs.ip6e_hbh, mprev,
 			   nexthdrp, IPPROTO_HOPOPTS);
 		MAKE_CHAIN(exthdrs.ip6e_dest1, mprev,
 			   nexthdrp, IPPROTO_DSTOPTS);
 		MAKE_CHAIN(exthdrs.ip6e_rthdr, mprev,
 			   nexthdrp, IPPROTO_ROUTING);
 
 #if defined(IPSEC) || defined(FAST_IPSEC)
 		if (!needipsec)
 			goto skip_ipsec2;
 
 		/*
 		 * pointers after IPsec headers are not valid any more.
 		 * other pointers need a great care too.
 		 * (IPsec routines should not mangle mbufs prior to AH/ESP)
 		 */
 		exthdrs.ip6e_dest2 = NULL;
 
 	    {
 		struct ip6_rthdr *rh = NULL;
 		int segleft_org = 0;
 		struct ipsec_output_state state;
 
 		if (exthdrs.ip6e_rthdr) {
 			rh = mtod(exthdrs.ip6e_rthdr, struct ip6_rthdr *);
 			segleft_org = rh->ip6r_segleft;
 			rh->ip6r_segleft = 0;
 		}
 
 		bzero(&state, sizeof(state));
 		state.m = m;
 		error = ipsec6_output_trans(&state, nexthdrp, mprev, sp, flags,
 			&needipsectun);
 		m = state.m;
 		if (error) {
 			/* mbuf is already reclaimed in ipsec6_output_trans. */
 			m = NULL;
 			switch (error) {
 			case EHOSTUNREACH:
 			case ENETUNREACH:
 			case EMSGSIZE:
 			case ENOBUFS:
 			case ENOMEM:
 				break;
 			default:
 				printf("ip6_output (ipsec): error code %d\n", error);
 				/* fall through */
 			case ENOENT:
 				/* don't show these error codes to the user */
 				error = 0;
 				break;
 			}
 			goto bad;
 		}
 		if (exthdrs.ip6e_rthdr) {
 			/* ah6_output doesn't modify mbuf chain */
 			rh->ip6r_segleft = segleft_org;
 		}
 	    }
 skip_ipsec2:;
 #endif
 	}
 
 	/*
 	 * If there is a routing header, replace destination address field
 	 * with the first hop of the routing header.
 	 */
 	if (exthdrs.ip6e_rthdr) {
 		struct ip6_rthdr *rh =
 			(struct ip6_rthdr *)(mtod(exthdrs.ip6e_rthdr,
 						  struct ip6_rthdr *));
 		struct ip6_rthdr0 *rh0;
 
 		finaldst = ip6->ip6_dst;
 		switch (rh->ip6r_type) {
 		case IPV6_RTHDR_TYPE_0:
 			 rh0 = (struct ip6_rthdr0 *)rh;
 			 ip6->ip6_dst = rh0->ip6r0_addr[0];
 			 bcopy((caddr_t)&rh0->ip6r0_addr[1],
 			       (caddr_t)&rh0->ip6r0_addr[0],
 			       sizeof(struct in6_addr)*(rh0->ip6r0_segleft - 1)
 				 );
 			 rh0->ip6r0_addr[rh0->ip6r0_segleft - 1] = finaldst;
 			 break;
 		default:	/* is it possible? */
 			 error = EINVAL;
 			 goto bad;
 		}
 	}
 
 	/* Source address validation */
 	if (IN6_IS_ADDR_UNSPECIFIED(&ip6->ip6_src) &&
 	    (flags & IPV6_DADOUTPUT) == 0) {
 		error = EOPNOTSUPP;
 		ip6stat.ip6s_badscope++;
 		goto bad;
 	}
 	if (IN6_IS_ADDR_MULTICAST(&ip6->ip6_src)) {
 		error = EOPNOTSUPP;
 		ip6stat.ip6s_badscope++;
 		goto bad;
 	}
 
 	ip6stat.ip6s_localout++;
 
 	/*
 	 * Route packet.
 	 */
 	if (ro == 0) {
 		ro = &ip6route;
 		bzero((caddr_t)ro, sizeof(*ro));
 	}
 	ro_pmtu = ro;
 	if (opt && opt->ip6po_rthdr)
 		ro = &opt->ip6po_route;
 	dst = (struct sockaddr_in6 *)&ro->ro_dst;
 	/*
 	 * If there is a cached route,
 	 * check that it is to the same destination
 	 * and is still up. If not, free it and try again.
 	 */
 	if (ro->ro_rt && ((ro->ro_rt->rt_flags & RTF_UP) == 0 ||
 			 dst->sin6_family != AF_INET6 ||
 			 !IN6_ARE_ADDR_EQUAL(&dst->sin6_addr, &ip6->ip6_dst))) {
 		RTFREE(ro->ro_rt);
 		ro->ro_rt = (struct rtentry *)0;
 	}
 	if (ro->ro_rt == 0) {
 		bzero(dst, sizeof(*dst));
 		dst->sin6_family = AF_INET6;
 		dst->sin6_len = sizeof(struct sockaddr_in6);
 		dst->sin6_addr = ip6->ip6_dst;
 #ifdef SCOPEDROUTING
 		/* XXX: sin6_scope_id should already be fixed at this point */
 		if (IN6_IS_SCOPE_LINKLOCAL(&dst->sin6_addr))
 			dst->sin6_scope_id = ntohs(dst->sin6_addr.s6_addr16[1]);
 #endif
 	}
 #if defined(IPSEC) || defined(FAST_IPSEC)
 	if (needipsec && needipsectun) {
 		struct ipsec_output_state state;
 
 		/*
 		 * All the extension headers will become inaccessible
 		 * (since they can be encrypted).
 		 * Don't panic, we need no more updates to extension headers
 		 * on inner IPv6 packet (since they are now encapsulated).
 		 *
 		 * IPv6 [ESP|AH] IPv6 [extension headers] payload
 		 */
 		bzero(&exthdrs, sizeof(exthdrs));
 		exthdrs.ip6e_ip6 = m;
 
 		bzero(&state, sizeof(state));
 		state.m = m;
 		state.ro = (struct route *)ro;
 		state.dst = (struct sockaddr *)dst;
 
 		error = ipsec6_output_tunnel(&state, sp, flags);
 
 		m = state.m;
 		ro = (struct route_in6 *)state.ro;
 		dst = (struct sockaddr_in6 *)state.dst;
 		if (error) {
 			/* mbuf is already reclaimed in ipsec6_output_tunnel. */
 			m0 = m = NULL;
 			m = NULL;
 			switch (error) {
 			case EHOSTUNREACH:
 			case ENETUNREACH:
 			case EMSGSIZE:
 			case ENOBUFS:
 			case ENOMEM:
 				break;
 			default:
 				printf("ip6_output (ipsec): error code %d\n", error);
 				/* fall through */
 			case ENOENT:
 				/* don't show these error codes to the user */
 				error = 0;
 				break;
 			}
 			goto bad;
 		}
 
 		exthdrs.ip6e_ip6 = m;
 	}
 #endif /* IPSEC */
 
 	if (!IN6_IS_ADDR_MULTICAST(&ip6->ip6_dst)) {
 		/* Unicast */
 
 #define ifatoia6(ifa)	((struct in6_ifaddr *)(ifa))
 #define sin6tosa(sin6)	((struct sockaddr *)(sin6))
 		/* xxx
 		 * interface selection comes here
 		 * if an interface is specified from an upper layer,
 		 * ifp must point it.
 		 */
 		if (ro->ro_rt == 0) {
 			/*
 			 * non-bsdi always clone routes, if parent is
 			 * PRF_CLONING.
 			 */
 			rtalloc((struct route *)ro);
 		}
 		if (ro->ro_rt == 0) {
 			ip6stat.ip6s_noroute++;
 			error = EHOSTUNREACH;
 			/* XXX in6_ifstat_inc(ifp, ifs6_out_discard); */
 			goto bad;
 		}
 		ia = ifatoia6(ro->ro_rt->rt_ifa);
 		ifp = ro->ro_rt->rt_ifp;
 		ro->ro_rt->rt_use++;
 		if (ro->ro_rt->rt_flags & RTF_GATEWAY)
 			dst = (struct sockaddr_in6 *)ro->ro_rt->rt_gateway;
 		m->m_flags &= ~(M_BCAST | M_MCAST);	/* just in case */
 
 		in6_ifstat_inc(ifp, ifs6_out_request);
 
 		/*
 		 * Check if the outgoing interface conflicts with
 		 * the interface specified by ifi6_ifindex (if specified).
 		 * Note that loopback interface is always okay.
 		 * (this may happen when we are sending a packet to one of
 		 *  our own addresses.)
 		 */
 		if (opt && opt->ip6po_pktinfo
 		 && opt->ip6po_pktinfo->ipi6_ifindex) {
 			if (!(ifp->if_flags & IFF_LOOPBACK)
 			 && ifp->if_index != opt->ip6po_pktinfo->ipi6_ifindex) {
 				ip6stat.ip6s_noroute++;
 				in6_ifstat_inc(ifp, ifs6_out_discard);
 				error = EHOSTUNREACH;
 				goto bad;
 			}
 		}
 
 		if (opt && opt->ip6po_hlim != -1)
 			ip6->ip6_hlim = opt->ip6po_hlim & 0xff;
 	} else {
 		/* Multicast */
 		struct	in6_multi *in6m;
 
 		m->m_flags = (m->m_flags & ~M_BCAST) | M_MCAST;
 
 		/*
 		 * See if the caller provided any multicast options
 		 */
 		ifp = NULL;
 		if (im6o != NULL) {
 			ip6->ip6_hlim = im6o->im6o_multicast_hlim;
 			if (im6o->im6o_multicast_ifp != NULL)
 				ifp = im6o->im6o_multicast_ifp;
 		} else
 			ip6->ip6_hlim = ip6_defmcasthlim;
 
 		/*
 		 * See if the caller provided the outgoing interface
 		 * as an ancillary data.
 		 * Boundary check for ifindex is assumed to be already done.
 		 */
 		if (opt && opt->ip6po_pktinfo && opt->ip6po_pktinfo->ipi6_ifindex)
 			ifp = ifnet_byindex(opt->ip6po_pktinfo->ipi6_ifindex);
 
 		/*
 		 * If the destination is a node-local scope multicast,
 		 * the packet should be loop-backed only.
 		 */
 		if (IN6_IS_ADDR_MC_NODELOCAL(&ip6->ip6_dst)) {
 			/*
 			 * If the outgoing interface is already specified,
 			 * it should be a loopback interface.
 			 */
 			if (ifp && (ifp->if_flags & IFF_LOOPBACK) == 0) {
 				ip6stat.ip6s_badscope++;
 				error = ENETUNREACH; /* XXX: better error? */
 				/* XXX correct ifp? */
 				in6_ifstat_inc(ifp, ifs6_out_discard);
 				goto bad;
 			} else {
 				ifp = &loif[0];
 			}
 		}
 
 		if (opt && opt->ip6po_hlim != -1)
 			ip6->ip6_hlim = opt->ip6po_hlim & 0xff;
 
 		/*
 		 * If caller did not provide an interface lookup a
 		 * default in the routing table.  This is either a
 		 * default for the speicfied group (i.e. a host
 		 * route), or a multicast default (a route for the
 		 * ``net'' ff00::/8).
 		 */
 		if (ifp == NULL) {
 			if (ro->ro_rt == 0) {
 				ro->ro_rt = rtalloc1((struct sockaddr *)
 						&ro->ro_dst, 0, 0UL);
 			}
 			if (ro->ro_rt == 0) {
 				ip6stat.ip6s_noroute++;
 				error = EHOSTUNREACH;
 				/* XXX in6_ifstat_inc(ifp, ifs6_out_discard) */
 				goto bad;
 			}
 			ia = ifatoia6(ro->ro_rt->rt_ifa);
 			ifp = ro->ro_rt->rt_ifp;
 			ro->ro_rt->rt_use++;
+			RT_UNLOCK(ro->ro_rt);
 		}
 
 		if ((flags & IPV6_FORWARDING) == 0)
 			in6_ifstat_inc(ifp, ifs6_out_request);
 		in6_ifstat_inc(ifp, ifs6_out_mcast);
 
 		/*
 		 * Confirm that the outgoing interface supports multicast.
 		 */
 		if ((ifp->if_flags & IFF_MULTICAST) == 0) {
 			ip6stat.ip6s_noroute++;
 			in6_ifstat_inc(ifp, ifs6_out_discard);
 			error = ENETUNREACH;
 			goto bad;
 		}
 		IN6_LOOKUP_MULTI(ip6->ip6_dst, ifp, in6m);
 		if (in6m != NULL &&
 		   (im6o == NULL || im6o->im6o_multicast_loop)) {
 			/*
 			 * If we belong to the destination multicast group
 			 * on the outgoing interface, and the caller did not
 			 * forbid loopback, loop back a copy.
 			 */
 			ip6_mloopback(ifp, m, dst);
 		} else {
 			/*
 			 * If we are acting as a multicast router, perform
 			 * multicast forwarding as if the packet had just
 			 * arrived on the interface to which we are about
 			 * to send.  The multicast forwarding function
 			 * recursively calls this function, using the
 			 * IPV6_FORWARDING flag to prevent infinite recursion.
 			 *
 			 * Multicasts that are looped back by ip6_mloopback(),
 			 * above, will be forwarded by the ip6_input() routine,
 			 * if necessary.
 			 */
 			if (ip6_mrouter && (flags & IPV6_FORWARDING) == 0) {
 				if (ip6_mforward(ip6, ifp, m) != 0) {
 					m_freem(m);
 					goto done;
 				}
 			}
 		}
 		/*
 		 * Multicasts with a hoplimit of zero may be looped back,
 		 * above, but must not be transmitted on a network.
 		 * Also, multicasts addressed to the loopback interface
 		 * are not sent -- the above call to ip6_mloopback() will
 		 * loop back a copy if this host actually belongs to the
 		 * destination group on the loopback interface.
 		 */
 		if (ip6->ip6_hlim == 0 || (ifp->if_flags & IFF_LOOPBACK)) {
 			m_freem(m);
 			goto done;
 		}
 	}
 
 	/*
 	 * Fill the outgoing inteface to tell the upper layer
 	 * to increment per-interface statistics.
 	 */
 	if (ifpp)
 		*ifpp = ifp;
 
 	/*
 	 * Determine path MTU.
 	 */
 	if (ro_pmtu != ro) {
 		/* The first hop and the final destination may differ. */
 		struct sockaddr_in6 *sin6_fin =
 			(struct sockaddr_in6 *)&ro_pmtu->ro_dst;
 		if (ro_pmtu->ro_rt && ((ro->ro_rt->rt_flags & RTF_UP) == 0 ||
 				       !IN6_ARE_ADDR_EQUAL(&sin6_fin->sin6_addr,
 							   &finaldst))) {
 			RTFREE(ro_pmtu->ro_rt);
 			ro_pmtu->ro_rt = (struct rtentry *)0;
 		}
 		if (ro_pmtu->ro_rt == 0) {
 			bzero(sin6_fin, sizeof(*sin6_fin));
 			sin6_fin->sin6_family = AF_INET6;
 			sin6_fin->sin6_len = sizeof(struct sockaddr_in6);
 			sin6_fin->sin6_addr = finaldst;
 
 			rtalloc((struct route *)ro_pmtu);
 		}
 	}
 	if (ro_pmtu->ro_rt != NULL) {
 		u_int32_t ifmtu = nd_ifinfo[ifp->if_index].linkmtu;
 
 		mtu = ro_pmtu->ro_rt->rt_rmx.rmx_mtu;
 		if (mtu > ifmtu || mtu == 0) {
 			/*
 			 * The MTU on the route is larger than the MTU on
 			 * the interface!  This shouldn't happen, unless the
 			 * MTU of the interface has been changed after the
 			 * interface was brought up.  Change the MTU in the
 			 * route to match the interface MTU (as long as the
 			 * field isn't locked).
 			 *
 			 * if MTU on the route is 0, we need to fix the MTU.
 			 * this case happens with path MTU discovery timeouts.
 			 */
 			 mtu = ifmtu;
 			 if ((ro_pmtu->ro_rt->rt_rmx.rmx_locks & RTV_MTU) == 0)
 				 ro_pmtu->ro_rt->rt_rmx.rmx_mtu = mtu; /* XXX */
 		}
 	} else {
 		mtu = nd_ifinfo[ifp->if_index].linkmtu;
 	}
 
 	/*
 	 * advanced API (IPV6_USE_MIN_MTU) overrides mtu setting
 	 */
 	if ((flags & IPV6_MINMTU) != 0 && mtu > IPV6_MMTU)
 		mtu = IPV6_MMTU;
 
 	/* Fake scoped addresses */
 	if ((ifp->if_flags & IFF_LOOPBACK) != 0) {
 		/*
 		 * If source or destination address is a scoped address, and
 		 * the packet is going to be sent to a loopback interface,
 		 * we should keep the original interface.
 		 */
 
 		/*
 		 * XXX: this is a very experimental and temporary solution.
 		 * We eventually have sockaddr_in6 and use the sin6_scope_id
 		 * field of the structure here.
 		 * We rely on the consistency between two scope zone ids
 		 * of source and destination, which should already be assured.
 		 * Larger scopes than link will be supported in the future. 
 		 */
 		origifp = NULL;
 		if (IN6_IS_SCOPE_LINKLOCAL(&ip6->ip6_src))
 			origifp = ifnet_byindex(ntohs(ip6->ip6_src.s6_addr16[1]));
 		else if (IN6_IS_SCOPE_LINKLOCAL(&ip6->ip6_dst))
 			origifp = ifnet_byindex(ntohs(ip6->ip6_dst.s6_addr16[1]));
 		/*
 		 * XXX: origifp can be NULL even in those two cases above.
 		 * For example, if we remove the (only) link-local address
 		 * from the loopback interface, and try to send a link-local
 		 * address without link-id information.  Then the source
 		 * address is ::1, and the destination address is the
 		 * link-local address with its s6_addr16[1] being zero.
 		 * What is worse, if the packet goes to the loopback interface
 		 * by a default rejected route, the null pointer would be
 		 * passed to looutput, and the kernel would hang.
 		 * The following last resort would prevent such disaster.
 		 */
 		if (origifp == NULL)
 			origifp = ifp;
 	}
 	else
 		origifp = ifp;
 #ifndef SCOPEDROUTING
 	/*
 	 * clear embedded scope identifiers if necessary.
 	 * in6_clearscope will touch the addresses only when necessary.
 	 */
 	in6_clearscope(&ip6->ip6_src);
 	in6_clearscope(&ip6->ip6_dst);
 #endif
 
 	/*
 	 * Check with the firewall...
 	 */
         if (ip6_fw_enable && ip6_fw_chk_ptr) {
 		u_short port = 0;
 		m->m_pkthdr.rcvif = NULL;	/* XXX */
 		/* If ipfw says divert, we have to just drop packet */
 		if ((*ip6_fw_chk_ptr)(&ip6, ifp, &port, &m)) {
 			m_freem(m);
 			goto done;
 		}
 		if (!m) {
 			error = EACCES;
 			goto done;
 		}
 	}
 
 	/*
 	 * If the outgoing packet contains a hop-by-hop options header,
 	 * it must be examined and processed even by the source node.
 	 * (RFC 2460, section 4.)
 	 */
 	if (exthdrs.ip6e_hbh) {
 		struct ip6_hbh *hbh = mtod(exthdrs.ip6e_hbh, struct ip6_hbh *);
 		u_int32_t dummy1; /* XXX unused */
 		u_int32_t dummy2; /* XXX unused */
 
 #ifdef DIAGNOSTIC
 		if ((hbh->ip6h_len + 1) << 3 > exthdrs.ip6e_hbh->m_len)
 			panic("ip6e_hbh is not continuous");
 #endif
 		/*
 		 *  XXX: if we have to send an ICMPv6 error to the sender,
 		 *       we need the M_LOOP flag since icmp6_error() expects
 		 *       the IPv6 and the hop-by-hop options header are
 		 *       continuous unless the flag is set.
 		 */
 		m->m_flags |= M_LOOP;
 		m->m_pkthdr.rcvif = ifp;
 		if (ip6_process_hopopts(m,
 					(u_int8_t *)(hbh + 1),
 					((hbh->ip6h_len + 1) << 3) -
 					sizeof(struct ip6_hbh),
 					&dummy1, &dummy2) < 0) {
 			/* m was already freed at this point */
 			error = EINVAL;/* better error? */
 			goto done;
 		}
 		m->m_flags &= ~M_LOOP; /* XXX */
 		m->m_pkthdr.rcvif = NULL;
 	}
 
 #ifdef PFIL_HOOKS
 	/*
 	 * Run through list of hooks for output packets.
 	 */
 	error = pfil_run_hooks(&inet6_pfil_hook, &m, ifp, PFIL_OUT);
 	if (error != 0 || m == NULL)
 		goto done;
 	ip6 = mtod(m, struct ip6_hdr *);
 #endif /* PFIL_HOOKS */
 	/*
 	 * Send the packet to the outgoing interface.
 	 * If necessary, do IPv6 fragmentation before sending.
 	 */
 	tlen = m->m_pkthdr.len;
 	if (tlen <= mtu
 #ifdef notyet
 	    /*
 	     * On any link that cannot convey a 1280-octet packet in one piece,
 	     * link-specific fragmentation and reassembly must be provided at
 	     * a layer below IPv6. [RFC 2460, sec.5]
 	     * Thus if the interface has ability of link-level fragmentation,
 	     * we can just send the packet even if the packet size is
 	     * larger than the link's MTU.
 	     * XXX: IFF_FRAGMENTABLE (or such) flag has not been defined yet...
 	     */
 	
 	    || ifp->if_flags & IFF_FRAGMENTABLE
 #endif
 	    )
 	{
  		/* Record statistics for this interface address. */
  		if (ia && !(flags & IPV6_FORWARDING)) {
  			ia->ia_ifa.if_opackets++;
  			ia->ia_ifa.if_obytes += m->m_pkthdr.len;
  		}
 #ifdef IPSEC
 		/* clean ipsec history once it goes out of the node */
 		ipsec_delaux(m);
 #endif
 		error = nd6_output(ifp, origifp, m, dst, ro->ro_rt);
 		goto done;
 	} else if (mtu < IPV6_MMTU) {
 		/*
 		 * note that path MTU is never less than IPV6_MMTU
 		 * (see icmp6_input).
 		 */
 		error = EMSGSIZE;
 		in6_ifstat_inc(ifp, ifs6_out_fragfail);
 		goto bad;
 	} else if (ip6->ip6_plen == 0) { /* jumbo payload cannot be fragmented */
 		error = EMSGSIZE;
 		in6_ifstat_inc(ifp, ifs6_out_fragfail);
 		goto bad;
 	} else {
 		struct mbuf **mnext, *m_frgpart;
 		struct ip6_frag *ip6f;
 #ifdef RANDOM_IP_ID
 		u_int32_t id = htonl(ip6_randomid());
 #else
 		u_int32_t id = htonl(ip6_id++);
 #endif
 		u_char nextproto;
 
 		/*
 		 * Too large for the destination or interface;
 		 * fragment if possible.
 		 * Must be able to put at least 8 bytes per fragment.
 		 */
 		hlen = unfragpartlen;
 		if (mtu > IPV6_MAXPACKET)
 			mtu = IPV6_MAXPACKET;
 
 		len = (mtu - hlen - sizeof(struct ip6_frag)) & ~7;
 		if (len < 8) {
 			error = EMSGSIZE;
 			in6_ifstat_inc(ifp, ifs6_out_fragfail);
 			goto bad;
 		}
 
 		mnext = &m->m_nextpkt;
 
 		/*
 		 * Change the next header field of the last header in the
 		 * unfragmentable part.
 		 */
 		if (exthdrs.ip6e_rthdr) {
 			nextproto = *mtod(exthdrs.ip6e_rthdr, u_char *);
 			*mtod(exthdrs.ip6e_rthdr, u_char *) = IPPROTO_FRAGMENT;
 		} else if (exthdrs.ip6e_dest1) {
 			nextproto = *mtod(exthdrs.ip6e_dest1, u_char *);
 			*mtod(exthdrs.ip6e_dest1, u_char *) = IPPROTO_FRAGMENT;
 		} else if (exthdrs.ip6e_hbh) {
 			nextproto = *mtod(exthdrs.ip6e_hbh, u_char *);
 			*mtod(exthdrs.ip6e_hbh, u_char *) = IPPROTO_FRAGMENT;
 		} else {
 			nextproto = ip6->ip6_nxt;
 			ip6->ip6_nxt = IPPROTO_FRAGMENT;
 		}
 
 		/*
 		 * Loop through length of segment after first fragment,
 		 * make new header and copy data of each part and link onto
 		 * chain.
 		 */
 		m0 = m;
 		for (off = hlen; off < tlen; off += len) {
 			MGETHDR(m, M_DONTWAIT, MT_HEADER);
 			if (!m) {
 				error = ENOBUFS;
 				ip6stat.ip6s_odropped++;
 				goto sendorfree;
 			}
 			m->m_pkthdr.rcvif = NULL;
 			m->m_flags = m0->m_flags & M_COPYFLAGS;
 			*mnext = m;
 			mnext = &m->m_nextpkt;
 			m->m_data += max_linkhdr;
 			mhip6 = mtod(m, struct ip6_hdr *);
 			*mhip6 = *ip6;
 			m->m_len = sizeof(*mhip6);
  			error = ip6_insertfraghdr(m0, m, hlen, &ip6f);
  			if (error) {
 				ip6stat.ip6s_odropped++;
 				goto sendorfree;
 			}
 			ip6f->ip6f_offlg = htons((u_short)((off - hlen) & ~7));
 			if (off + len >= tlen)
 				len = tlen - off;
 			else
 				ip6f->ip6f_offlg |= IP6F_MORE_FRAG;
 			mhip6->ip6_plen = htons((u_short)(len + hlen +
 							  sizeof(*ip6f) -
 							  sizeof(struct ip6_hdr)));
 			if ((m_frgpart = m_copy(m0, off, len)) == 0) {
 				error = ENOBUFS;
 				ip6stat.ip6s_odropped++;
 				goto sendorfree;
 			}
 			m_cat(m, m_frgpart);
 			m->m_pkthdr.len = len + hlen + sizeof(*ip6f);
 			m->m_pkthdr.rcvif = (struct ifnet *)0;
 			ip6f->ip6f_reserved = 0;
 			ip6f->ip6f_ident = id;
 			ip6f->ip6f_nxt = nextproto;
 			ip6stat.ip6s_ofragments++;
 			in6_ifstat_inc(ifp, ifs6_out_fragcreat);
 		}
 
 		in6_ifstat_inc(ifp, ifs6_out_fragok);
 	}
 
 	/*
 	 * Remove leading garbages.
 	 */
 sendorfree:
 	m = m0->m_nextpkt;
 	m0->m_nextpkt = 0;
 	m_freem(m0);
 	for (m0 = m; m; m = m0) {
 		m0 = m->m_nextpkt;
 		m->m_nextpkt = 0;
 		if (error == 0) {
  			/* Record statistics for this interface address. */
  			if (ia) {
  				ia->ia_ifa.if_opackets++;
  				ia->ia_ifa.if_obytes += m->m_pkthdr.len;
  			}
 #ifdef IPSEC
 			/* clean ipsec history once it goes out of the node */
 			ipsec_delaux(m);
 #endif
 			error = nd6_output(ifp, origifp, m, dst, ro->ro_rt);
 		} else
 			m_freem(m);
 	}
 
 	if (error == 0)
 		ip6stat.ip6s_fragmented++;
 
 done:
 	if (ro == &ip6route && ro->ro_rt) { /* brace necessary for RTFREE */
 		RTFREE(ro->ro_rt);
 	} else if (ro_pmtu == &ip6route && ro_pmtu->ro_rt) {
 		RTFREE(ro_pmtu->ro_rt);
 	}
 
 #ifdef IPSEC
 	if (sp != NULL)
 		key_freesp(sp);
 #endif /* IPSEC */
 #ifdef FAST_IPSEC
 	if (sp != NULL)
 		KEY_FREESP(&sp);
 #endif /* FAST_IPSEC */
 
 	return(error);
 
 freehdrs:
 	m_freem(exthdrs.ip6e_hbh);	/* m_freem will check if mbuf is 0 */
 	m_freem(exthdrs.ip6e_dest1);
 	m_freem(exthdrs.ip6e_rthdr);
 	m_freem(exthdrs.ip6e_dest2);
 	/* fall through */
 bad:
 	m_freem(m);
 	goto done;
 }
 
 static int
 ip6_copyexthdr(mp, hdr, hlen)
 	struct mbuf **mp;
 	caddr_t hdr;
 	int hlen;
 {
 	struct mbuf *m;
 
 	if (hlen > MCLBYTES)
 		return(ENOBUFS); /* XXX */
 
 	MGET(m, M_DONTWAIT, MT_DATA);
 	if (!m)
 		return(ENOBUFS);
 
 	if (hlen > MLEN) {
 		MCLGET(m, M_DONTWAIT);
 		if ((m->m_flags & M_EXT) == 0) {
 			m_free(m);
 			return(ENOBUFS);
 		}
 	}
 	m->m_len = hlen;
 	if (hdr)
 		bcopy(hdr, mtod(m, caddr_t), hlen);
 
 	*mp = m;
 	return(0);
 }
 
 /*
  * Insert jumbo payload option.
  */
 static int
 ip6_insert_jumboopt(exthdrs, plen)
 	struct ip6_exthdrs *exthdrs;
 	u_int32_t plen;
 {
 	struct mbuf *mopt;
 	u_char *optbuf;
 	u_int32_t v;
 
 #define JUMBOOPTLEN	8	/* length of jumbo payload option and padding */
 
 	/*
 	 * If there is no hop-by-hop options header, allocate new one.
 	 * If there is one but it doesn't have enough space to store the
 	 * jumbo payload option, allocate a cluster to store the whole options.
 	 * Otherwise, use it to store the options.
 	 */
 	if (exthdrs->ip6e_hbh == 0) {
 		MGET(mopt, M_DONTWAIT, MT_DATA);
 		if (mopt == 0)
 			return(ENOBUFS);
 		mopt->m_len = JUMBOOPTLEN;
 		optbuf = mtod(mopt, u_char *);
 		optbuf[1] = 0;	/* = ((JUMBOOPTLEN) >> 3) - 1 */
 		exthdrs->ip6e_hbh = mopt;
 	} else {
 		struct ip6_hbh *hbh;
 
 		mopt = exthdrs->ip6e_hbh;
 		if (M_TRAILINGSPACE(mopt) < JUMBOOPTLEN) {
 			/*
 			 * XXX assumption:
 			 * - exthdrs->ip6e_hbh is not referenced from places
 			 *   other than exthdrs.
 			 * - exthdrs->ip6e_hbh is not an mbuf chain.
 			 */
 			int oldoptlen = mopt->m_len;
 			struct mbuf *n;
 
 			/*
 			 * XXX: give up if the whole (new) hbh header does
 			 * not fit even in an mbuf cluster.
 			 */
 			if (oldoptlen + JUMBOOPTLEN > MCLBYTES)
 				return(ENOBUFS);
 
 			/*
 			 * As a consequence, we must always prepare a cluster
 			 * at this point.
 			 */
 			MGET(n, M_DONTWAIT, MT_DATA);
 			if (n) {
 				MCLGET(n, M_DONTWAIT);
 				if ((n->m_flags & M_EXT) == 0) {
 					m_freem(n);
 					n = NULL;
 				}
 			}
 			if (!n)
 				return(ENOBUFS);
 			n->m_len = oldoptlen + JUMBOOPTLEN;
 			bcopy(mtod(mopt, caddr_t), mtod(n, caddr_t),
 			      oldoptlen);
 			optbuf = mtod(n, caddr_t) + oldoptlen;
 			m_freem(mopt);
 			mopt = exthdrs->ip6e_hbh = n;
 		} else {
 			optbuf = mtod(mopt, u_char *) + mopt->m_len;
 			mopt->m_len += JUMBOOPTLEN;
 		}
 		optbuf[0] = IP6OPT_PADN;
 		optbuf[1] = 1;
 
 		/*
 		 * Adjust the header length according to the pad and
 		 * the jumbo payload option.
 		 */
 		hbh = mtod(mopt, struct ip6_hbh *);
 		hbh->ip6h_len += (JUMBOOPTLEN >> 3);
 	}
 
 	/* fill in the option. */
 	optbuf[2] = IP6OPT_JUMBO;
 	optbuf[3] = 4;
 	v = (u_int32_t)htonl(plen + JUMBOOPTLEN);
 	bcopy(&v, &optbuf[4], sizeof(u_int32_t));
 
 	/* finally, adjust the packet header length */
 	exthdrs->ip6e_ip6->m_pkthdr.len += JUMBOOPTLEN;
 
 	return(0);
 #undef JUMBOOPTLEN
 }
 
 /*
  * Insert fragment header and copy unfragmentable header portions.
  */
 static int
 ip6_insertfraghdr(m0, m, hlen, frghdrp)
 	struct mbuf *m0, *m;
 	int hlen;
 	struct ip6_frag **frghdrp;
 {
 	struct mbuf *n, *mlast;
 
 	if (hlen > sizeof(struct ip6_hdr)) {
 		n = m_copym(m0, sizeof(struct ip6_hdr),
 			    hlen - sizeof(struct ip6_hdr), M_DONTWAIT);
 		if (n == 0)
 			return(ENOBUFS);
 		m->m_next = n;
 	} else
 		n = m;
 
 	/* Search for the last mbuf of unfragmentable part. */
 	for (mlast = n; mlast->m_next; mlast = mlast->m_next)
 		;
 
 	if ((mlast->m_flags & M_EXT) == 0 &&
 	    M_TRAILINGSPACE(mlast) >= sizeof(struct ip6_frag)) {
 		/* use the trailing space of the last mbuf for the fragment hdr */
 		*frghdrp =
 			(struct ip6_frag *)(mtod(mlast, caddr_t) + mlast->m_len);
 		mlast->m_len += sizeof(struct ip6_frag);
 		m->m_pkthdr.len += sizeof(struct ip6_frag);
 	} else {
 		/* allocate a new mbuf for the fragment header */
 		struct mbuf *mfrg;
 
 		MGET(mfrg, M_DONTWAIT, MT_DATA);
 		if (mfrg == 0)
 			return(ENOBUFS);
 		mfrg->m_len = sizeof(struct ip6_frag);
 		*frghdrp = mtod(mfrg, struct ip6_frag *);
 		mlast->m_next = mfrg;
 	}
 
 	return(0);
 }
 
 /*
  * IP6 socket option processing.
  */
 int
 ip6_ctloutput(so, sopt)
 	struct socket *so;
 	struct sockopt *sopt;
 {
 	int privileged;
 	struct inpcb *in6p = sotoinpcb(so);
 	int error, optval;
 	int level, op, optname;
 	int optlen;
 	struct thread *td;
 
 	if (sopt) {
 		level = sopt->sopt_level;
 		op = sopt->sopt_dir;
 		optname = sopt->sopt_name;
 		optlen = sopt->sopt_valsize;
 		td = sopt->sopt_td;
 	} else {
 		panic("ip6_ctloutput: arg soopt is NULL");
 	}
 	error = optval = 0;
 
 	privileged = (td == 0 || suser(td)) ? 0 : 1;
 
 	if (level == IPPROTO_IPV6) {
 		switch (op) {
 
 		case SOPT_SET:
 			switch (optname) {
 			case IPV6_PKTOPTIONS:
 			{
 				struct mbuf *m;
 
 				error = soopt_getm(sopt, &m); /* XXX */
 				if (error != 0)
 					break;
 				error = soopt_mcopyin(sopt, m); /* XXX */
 				if (error != 0)
 					break;
 				error = ip6_pcbopts(&in6p->in6p_outputopts,
 						    m, so, sopt);
 				m_freem(m); /* XXX */
 				break;
 			}
 
 			/*
 			 * Use of some Hop-by-Hop options or some
 			 * Destination options, might require special
 			 * privilege.  That is, normal applications
 			 * (without special privilege) might be forbidden
 			 * from setting certain options in outgoing packets,
 			 * and might never see certain options in received
 			 * packets. [RFC 2292 Section 6]
 			 * KAME specific note:
 			 *  KAME prevents non-privileged users from sending or
 			 *  receiving ANY hbh/dst options in order to avoid
 			 *  overhead of parsing options in the kernel.
 			 */
 			case IPV6_UNICAST_HOPS:
 			case IPV6_CHECKSUM:
 			case IPV6_FAITH:
 
 			case IPV6_V6ONLY:
 				if (optlen != sizeof(int)) {
 					error = EINVAL;
 					break;
 				}
 				error = sooptcopyin(sopt, &optval,
 					sizeof optval, sizeof optval);
 				if (error)
 					break;
 				switch (optname) {
 
 				case IPV6_UNICAST_HOPS:
 					if (optval < -1 || optval >= 256)
 						error = EINVAL;
 					else {
 						/* -1 = kernel default */
 						in6p->in6p_hops = optval;
 
 						if ((in6p->in6p_vflag &
 						     INP_IPV4) != 0)
 							in6p->inp_ip_ttl = optval;
 					}
 					break;
 #define OPTSET(bit) \
 do { \
 	if (optval) \
 		in6p->in6p_flags |= (bit); \
 	else \
 		in6p->in6p_flags &= ~(bit); \
 } while (0)
 #define OPTBIT(bit) (in6p->in6p_flags & (bit) ? 1 : 0)
 
 				case IPV6_CHECKSUM:
 					in6p->in6p_cksum = optval;
 					break;
 
 				case IPV6_FAITH:
 					OPTSET(IN6P_FAITH);
 					break;
 
 				case IPV6_V6ONLY:
 					/*
 					 * make setsockopt(IPV6_V6ONLY)
 					 * available only prior to bind(2).
 					 * see ipng mailing list, Jun 22 2001.
 					 */
 					if (in6p->in6p_lport ||
 					    !IN6_IS_ADDR_UNSPECIFIED(&in6p->in6p_laddr))
 					{
 						error = EINVAL;
 						break;
 					}
 					OPTSET(IN6P_IPV6_V6ONLY);
 					if (optval)
 						in6p->in6p_vflag &= ~INP_IPV4;
 					else
 						in6p->in6p_vflag |= INP_IPV4;
 					break;
 				}
 				break;
 
 			case IPV6_PKTINFO:
 			case IPV6_HOPLIMIT:
 			case IPV6_HOPOPTS:
 			case IPV6_DSTOPTS:
 			case IPV6_RTHDR:
 				/* RFC 2292 */
 				if (optlen != sizeof(int)) {
 					error = EINVAL;
 					break;
 				}
 				error = sooptcopyin(sopt, &optval,
 					sizeof optval, sizeof optval);
 				if (error)
 					break;
 				switch (optname) {
 				case IPV6_PKTINFO:
 					OPTSET(IN6P_PKTINFO);
 					break;
 				case IPV6_HOPLIMIT:
 					OPTSET(IN6P_HOPLIMIT);
 					break;
 				case IPV6_HOPOPTS:
 					/*
 					 * Check super-user privilege.
 					 * See comments for IPV6_RECVHOPOPTS.
 					 */
 					if (!privileged)
 						return(EPERM);
 					OPTSET(IN6P_HOPOPTS);
 					break;
 				case IPV6_DSTOPTS:
 					if (!privileged)
 						return(EPERM);
 					OPTSET(IN6P_DSTOPTS|IN6P_RTHDRDSTOPTS); /* XXX */
 					break;
 				case IPV6_RTHDR:
 					OPTSET(IN6P_RTHDR);
 					break;
 				}
 				break;
 #undef OPTSET
 
 			case IPV6_MULTICAST_IF:
 			case IPV6_MULTICAST_HOPS:
 			case IPV6_MULTICAST_LOOP:
 			case IPV6_JOIN_GROUP:
 			case IPV6_LEAVE_GROUP:
 			    {
 				struct mbuf *m;
 				if (sopt->sopt_valsize > MLEN) {
 					error = EMSGSIZE;
 					break;
 				}
 				/* XXX */
 				MGET(m, sopt->sopt_td ? M_TRYWAIT : M_DONTWAIT, MT_HEADER);
 				if (m == 0) {
 					error = ENOBUFS;
 					break;
 				}
 				m->m_len = sopt->sopt_valsize;
 				error = sooptcopyin(sopt, mtod(m, char *),
 						    m->m_len, m->m_len);
 				error =	ip6_setmoptions(sopt->sopt_name,
 							&in6p->in6p_moptions,
 							m);
 				(void)m_free(m);
 			    }
 				break;
 
 			case IPV6_PORTRANGE:
 				error = sooptcopyin(sopt, &optval,
 				    sizeof optval, sizeof optval);
 				if (error)
 					break;
 
 				switch (optval) {
 				case IPV6_PORTRANGE_DEFAULT:
 					in6p->in6p_flags &= ~(IN6P_LOWPORT);
 					in6p->in6p_flags &= ~(IN6P_HIGHPORT);
 					break;
 
 				case IPV6_PORTRANGE_HIGH:
 					in6p->in6p_flags &= ~(IN6P_LOWPORT);
 					in6p->in6p_flags |= IN6P_HIGHPORT;
 					break;
 
 				case IPV6_PORTRANGE_LOW:
 					in6p->in6p_flags &= ~(IN6P_HIGHPORT);
 					in6p->in6p_flags |= IN6P_LOWPORT;
 					break;
 
 				default:
 					error = EINVAL;
 					break;
 				}
 				break;
 
 #if defined(IPSEC) || defined(FAST_IPSEC)
 			case IPV6_IPSEC_POLICY:
 			    {
 				caddr_t req = NULL;
 				size_t len = 0;
 				struct mbuf *m;
 
 				if ((error = soopt_getm(sopt, &m)) != 0) /* XXX */
 					break;
 				if ((error = soopt_mcopyin(sopt, m)) != 0) /* XXX */
 					break;
 				if (m) {
 					req = mtod(m, caddr_t);
 					len = m->m_len;
 				}
 				error = ipsec6_set_policy(in6p, optname, req,
 				                          len, privileged);
 				m_freem(m);
 			    }
 				break;
 #endif /* KAME IPSEC */
 
 			case IPV6_FW_ADD:
 			case IPV6_FW_DEL:
 			case IPV6_FW_FLUSH:
 			case IPV6_FW_ZERO:
 			    {
 				struct mbuf *m;
 				struct mbuf **mp = &m;
 
 				if (ip6_fw_ctl_ptr == NULL)
 					return EINVAL;
 				/* XXX */
 				if ((error = soopt_getm(sopt, &m)) != 0)
 					break;
 				/* XXX */
 				if ((error = soopt_mcopyin(sopt, m)) != 0)
 					break;
 				error = (*ip6_fw_ctl_ptr)(optname, mp);
 				m = *mp;
 			    }
 				break;
 
 			default:
 				error = ENOPROTOOPT;
 				break;
 			}
 			break;
 
 		case SOPT_GET:
 			switch (optname) {
 
 			case IPV6_PKTOPTIONS:
 				if (in6p->in6p_options) {
 					struct mbuf *m;
 					m = m_copym(in6p->in6p_options,
 					    0, M_COPYALL, M_TRYWAIT);
 					error = soopt_mcopyout(sopt, m);
 					if (error == 0)
 						m_freem(m);
 				} else
 					sopt->sopt_valsize = 0;
 				break;
 
 			case IPV6_UNICAST_HOPS:
 			case IPV6_CHECKSUM:
 
 			case IPV6_FAITH:
 			case IPV6_V6ONLY:
 			case IPV6_PORTRANGE:
 				switch (optname) {
 
 				case IPV6_UNICAST_HOPS:
 					optval = in6p->in6p_hops;
 					break;
 
 				case IPV6_CHECKSUM:
 					optval = in6p->in6p_cksum;
 					break;
 
 				case IPV6_FAITH:
 					optval = OPTBIT(IN6P_FAITH);
 					break;
 
 				case IPV6_V6ONLY:
 					optval = OPTBIT(IN6P_IPV6_V6ONLY);
 					break;
 
 				case IPV6_PORTRANGE:
 				    {
 					int flags;
 					flags = in6p->in6p_flags;
 					if (flags & IN6P_HIGHPORT)
 						optval = IPV6_PORTRANGE_HIGH;
 					else if (flags & IN6P_LOWPORT)
 						optval = IPV6_PORTRANGE_LOW;
 					else
 						optval = 0;
 					break;
 				    }
 				}
 				error = sooptcopyout(sopt, &optval,
 					sizeof optval);
 				break;
 
 			case IPV6_PKTINFO:
 			case IPV6_HOPLIMIT:
 			case IPV6_HOPOPTS:
 			case IPV6_RTHDR:
 			case IPV6_DSTOPTS:
 				if (optname == IPV6_HOPOPTS ||
 				    optname == IPV6_DSTOPTS ||
 				    !privileged)
 					return(EPERM);
 				switch (optname) {
 				case IPV6_PKTINFO:
 					optval = OPTBIT(IN6P_PKTINFO);
 					break;
 				case IPV6_HOPLIMIT:
 					optval = OPTBIT(IN6P_HOPLIMIT);
 					break;
 				case IPV6_HOPOPTS:
 					if (!privileged)
 						return(EPERM);
 					optval = OPTBIT(IN6P_HOPOPTS);
 					break;
 				case IPV6_RTHDR:
 					optval = OPTBIT(IN6P_RTHDR);
 					break;
 				case IPV6_DSTOPTS:
 					if (!privileged)
 						return(EPERM);
 					optval = OPTBIT(IN6P_DSTOPTS|IN6P_RTHDRDSTOPTS);
 					break;
 				}
 				error = sooptcopyout(sopt, &optval,
 					sizeof optval);
 				break;
 
 			case IPV6_MULTICAST_IF:
 			case IPV6_MULTICAST_HOPS:
 			case IPV6_MULTICAST_LOOP:
 			case IPV6_JOIN_GROUP:
 			case IPV6_LEAVE_GROUP:
 			    {
 				struct mbuf *m;
 				error = ip6_getmoptions(sopt->sopt_name,
 						in6p->in6p_moptions, &m);
 				if (error == 0)
 					error = sooptcopyout(sopt,
 						mtod(m, char *), m->m_len);
 				m_freem(m);
 			    }
 				break;
 
 #if defined(IPSEC) || defined(FAST_IPSEC)
 			case IPV6_IPSEC_POLICY:
 			  {
 				caddr_t req = NULL;
 				size_t len = 0;
 				struct mbuf *m = NULL;
 				struct mbuf **mp = &m;
 
 				error = soopt_getm(sopt, &m); /* XXX */
 				if (error != 0)
 					break;
 				error = soopt_mcopyin(sopt, m); /* XXX */
 				if (error != 0)
 					break;
 				if (m) {
 					req = mtod(m, caddr_t);
 					len = m->m_len;
 				}
 				error = ipsec6_get_policy(in6p, req, len, mp);
 				if (error == 0)
 					error = soopt_mcopyout(sopt, m); /*XXX*/
 				if (error == 0 && m)
 					m_freem(m);
 				break;
 			  }
 #endif /* KAME IPSEC */
 
 			case IPV6_FW_GET:
 			  {
 				struct mbuf *m;
 				struct mbuf **mp = &m;
 
 				if (ip6_fw_ctl_ptr == NULL)
 			        {
 					return EINVAL;
 				}
 				error = (*ip6_fw_ctl_ptr)(optname, mp);
 				if (error == 0)
 					error = soopt_mcopyout(sopt, m); /* XXX */
 				if (error == 0 && m)
 					m_freem(m);
 			  }
 				break;
 
 			default:
 				error = ENOPROTOOPT;
 				break;
 			}
 			break;
 		}
 	} else {
 		error = EINVAL;
 	}
 	return(error);
 }
 
 /*
  * Set up IP6 options in pcb for insertion in output packets or
  * specifying behavior of outgoing packets.
  */
 static int
 ip6_pcbopts(pktopt, m, so, sopt)
 	struct ip6_pktopts **pktopt;
 	struct mbuf *m;
 	struct socket *so;
 	struct sockopt *sopt;
 {
 	struct ip6_pktopts *opt = *pktopt;
 	int error = 0;
 	struct thread *td = sopt->sopt_td;
 	int priv = 0;
 
 	/* turn off any old options. */
 	if (opt) {
 #ifdef DIAGNOSTIC
 		if (opt->ip6po_pktinfo || opt->ip6po_nexthop ||
 		    opt->ip6po_hbh || opt->ip6po_dest1 || opt->ip6po_dest2 ||
 		    opt->ip6po_rhinfo.ip6po_rhi_rthdr)
 			printf("ip6_pcbopts: all specified options are cleared.\n");
 #endif
 		ip6_clearpktopts(opt, 1, -1);
 	} else
 		opt = malloc(sizeof(*opt), M_IP6OPT, M_WAITOK);
 	*pktopt = NULL;
 
 	if (!m || m->m_len == 0) {
 		/*
 		 * Only turning off any previous options, regardless of
 		 * whether the opt is just created or given.
 		 */
 		free(opt, M_IP6OPT);
 		return(0);
 	}
 
 	/*  set options specified by user. */
 	if (td && !suser(td))
 		priv = 1;
 	if ((error = ip6_setpktoptions(m, opt, priv, 1)) != 0) {
 		ip6_clearpktopts(opt, 1, -1); /* XXX: discard all options */
 		free(opt, M_IP6OPT);
 		return(error);
 	}
 	*pktopt = opt;
 	return(0);
 }
 
 /*
  * initialize ip6_pktopts.  beware that there are non-zero default values in
  * the struct.
  */
 void
 init_ip6pktopts(opt)
 	struct ip6_pktopts *opt;
 {
 
 	bzero(opt, sizeof(*opt));
 	opt->ip6po_hlim = -1;	/* -1 means default hop limit */
 }
 
 void
 ip6_clearpktopts(pktopt, needfree, optname)
 	struct ip6_pktopts *pktopt;
 	int needfree, optname;
 {
 	if (pktopt == NULL)
 		return;
 
 	if (optname == -1) {
 		if (needfree && pktopt->ip6po_pktinfo)
 			free(pktopt->ip6po_pktinfo, M_IP6OPT);
 		pktopt->ip6po_pktinfo = NULL;
 	}
 	if (optname == -1)
 		pktopt->ip6po_hlim = -1;
 	if (optname == -1) {
 		if (needfree && pktopt->ip6po_nexthop)
 			free(pktopt->ip6po_nexthop, M_IP6OPT);
 		pktopt->ip6po_nexthop = NULL;
 	}
 	if (optname == -1) {
 		if (needfree && pktopt->ip6po_hbh)
 			free(pktopt->ip6po_hbh, M_IP6OPT);
 		pktopt->ip6po_hbh = NULL;
 	}
 	if (optname == -1) {
 		if (needfree && pktopt->ip6po_dest1)
 			free(pktopt->ip6po_dest1, M_IP6OPT);
 		pktopt->ip6po_dest1 = NULL;
 	}
 	if (optname == -1) {
 		if (needfree && pktopt->ip6po_rhinfo.ip6po_rhi_rthdr)
 			free(pktopt->ip6po_rhinfo.ip6po_rhi_rthdr, M_IP6OPT);
 		pktopt->ip6po_rhinfo.ip6po_rhi_rthdr = NULL;
 		if (pktopt->ip6po_route.ro_rt) {
 			RTFREE(pktopt->ip6po_route.ro_rt);
 			pktopt->ip6po_route.ro_rt = NULL;
 		}
 	}
 	if (optname == -1) {
 		if (needfree && pktopt->ip6po_dest2)
 			free(pktopt->ip6po_dest2, M_IP6OPT);
 		pktopt->ip6po_dest2 = NULL;
 	}
 }
 
 #define PKTOPT_EXTHDRCPY(type) \
 do {\
 	if (src->type) {\
 		int hlen =\
 			(((struct ip6_ext *)src->type)->ip6e_len + 1) << 3;\
 		dst->type = malloc(hlen, M_IP6OPT, canwait);\
 		if (dst->type == NULL && canwait == M_NOWAIT)\
 			goto bad;\
 		bcopy(src->type, dst->type, hlen);\
 	}\
 } while (0)
 
 struct ip6_pktopts *
 ip6_copypktopts(src, canwait)
 	struct ip6_pktopts *src;
 	int canwait;
 {
 	struct ip6_pktopts *dst;
 
 	if (src == NULL) {
 		printf("ip6_clearpktopts: invalid argument\n");
 		return(NULL);
 	}
 
 	dst = malloc(sizeof(*dst), M_IP6OPT, canwait);
 	if (dst == NULL && canwait == M_NOWAIT)
 		return (NULL);
 	bzero(dst, sizeof(*dst));
 
 	dst->ip6po_hlim = src->ip6po_hlim;
 	if (src->ip6po_pktinfo) {
 		dst->ip6po_pktinfo = malloc(sizeof(*dst->ip6po_pktinfo),
 					    M_IP6OPT, canwait);
 		if (dst->ip6po_pktinfo == NULL && canwait == M_NOWAIT)
 			goto bad;
 		*dst->ip6po_pktinfo = *src->ip6po_pktinfo;
 	}
 	if (src->ip6po_nexthop) {
 		dst->ip6po_nexthop = malloc(src->ip6po_nexthop->sa_len,
 					    M_IP6OPT, canwait);
 		if (dst->ip6po_nexthop == NULL && canwait == M_NOWAIT)
 			goto bad;
 		bcopy(src->ip6po_nexthop, dst->ip6po_nexthop,
 		      src->ip6po_nexthop->sa_len);
 	}
 	PKTOPT_EXTHDRCPY(ip6po_hbh);
 	PKTOPT_EXTHDRCPY(ip6po_dest1);
 	PKTOPT_EXTHDRCPY(ip6po_dest2);
 	PKTOPT_EXTHDRCPY(ip6po_rthdr); /* not copy the cached route */
 	return(dst);
 
   bad:
 	if (dst->ip6po_pktinfo) free(dst->ip6po_pktinfo, M_IP6OPT);
 	if (dst->ip6po_nexthop) free(dst->ip6po_nexthop, M_IP6OPT);
 	if (dst->ip6po_hbh) free(dst->ip6po_hbh, M_IP6OPT);
 	if (dst->ip6po_dest1) free(dst->ip6po_dest1, M_IP6OPT);
 	if (dst->ip6po_dest2) free(dst->ip6po_dest2, M_IP6OPT);
 	if (dst->ip6po_rthdr) free(dst->ip6po_rthdr, M_IP6OPT);
 	free(dst, M_IP6OPT);
 	return(NULL);
 }
 #undef PKTOPT_EXTHDRCPY
 
 void
 ip6_freepcbopts(pktopt)
 	struct ip6_pktopts *pktopt;
 {
 	if (pktopt == NULL)
 		return;
 
 	ip6_clearpktopts(pktopt, 1, -1);
 
 	free(pktopt, M_IP6OPT);
 }
 
 /*
  * Set the IP6 multicast options in response to user setsockopt().
  */
 static int
 ip6_setmoptions(optname, im6op, m)
 	int optname;
 	struct ip6_moptions **im6op;
 	struct mbuf *m;
 {
 	int error = 0;
 	u_int loop, ifindex;
 	struct ipv6_mreq *mreq;
 	struct ifnet *ifp;
 	struct ip6_moptions *im6o = *im6op;
 	struct route_in6 ro;
 	struct sockaddr_in6 *dst;
 	struct in6_multi_mship *imm;
 	struct thread *td = curthread;	/* XXX */
 
 	if (im6o == NULL) {
 		/*
 		 * No multicast option buffer attached to the pcb;
 		 * allocate one and initialize to default values.
 		 */
 		im6o = (struct ip6_moptions *)
 			malloc(sizeof(*im6o), M_IPMOPTS, M_WAITOK);
 
 		if (im6o == NULL)
 			return(ENOBUFS);
 		*im6op = im6o;
 		im6o->im6o_multicast_ifp = NULL;
 		im6o->im6o_multicast_hlim = ip6_defmcasthlim;
 		im6o->im6o_multicast_loop = IPV6_DEFAULT_MULTICAST_LOOP;
 		LIST_INIT(&im6o->im6o_memberships);
 	}
 
 	switch (optname) {
 
 	case IPV6_MULTICAST_IF:
 		/*
 		 * Select the interface for outgoing multicast packets.
 		 */
 		if (m == NULL || m->m_len != sizeof(u_int)) {
 			error = EINVAL;
 			break;
 		}
 		bcopy(mtod(m, u_int *), &ifindex, sizeof(ifindex));
 		if (ifindex < 0 || if_index < ifindex) {
 			error = ENXIO;	/* XXX EINVAL? */
 			break;
 		}
 		ifp = ifnet_byindex(ifindex);
 		if (ifp == NULL || (ifp->if_flags & IFF_MULTICAST) == 0) {
 			error = EADDRNOTAVAIL;
 			break;
 		}
 		im6o->im6o_multicast_ifp = ifp;
 		break;
 
 	case IPV6_MULTICAST_HOPS:
 	    {
 		/*
 		 * Set the IP6 hoplimit for outgoing multicast packets.
 		 */
 		int optval;
 		if (m == NULL || m->m_len != sizeof(int)) {
 			error = EINVAL;
 			break;
 		}
 		bcopy(mtod(m, u_int *), &optval, sizeof(optval));
 		if (optval < -1 || optval >= 256)
 			error = EINVAL;
 		else if (optval == -1)
 			im6o->im6o_multicast_hlim = ip6_defmcasthlim;
 		else
 			im6o->im6o_multicast_hlim = optval;
 		break;
 	    }
 
 	case IPV6_MULTICAST_LOOP:
 		/*
 		 * Set the loopback flag for outgoing multicast packets.
 		 * Must be zero or one.
 		 */
 		if (m == NULL || m->m_len != sizeof(u_int)) {
 			error = EINVAL;
 			break;
 		}
 		bcopy(mtod(m, u_int *), &loop, sizeof(loop));
 		if (loop > 1) {
 			error = EINVAL;
 			break;
 		}
 		im6o->im6o_multicast_loop = loop;
 		break;
 
 	case IPV6_JOIN_GROUP:
 		/*
 		 * Add a multicast group membership.
 		 * Group must be a valid IP6 multicast address.
 		 */
 		if (m == NULL || m->m_len != sizeof(struct ipv6_mreq)) {
 			error = EINVAL;
 			break;
 		}
 		mreq = mtod(m, struct ipv6_mreq *);
 		if (IN6_IS_ADDR_UNSPECIFIED(&mreq->ipv6mr_multiaddr)) {
 			/*
 			 * We use the unspecified address to specify to accept
 			 * all multicast addresses. Only super user is allowed
 			 * to do this.
 			 */
 			if (suser(td))
 			{
 				error = EACCES;
 				break;
 			}
 		} else if (!IN6_IS_ADDR_MULTICAST(&mreq->ipv6mr_multiaddr)) {
 			error = EINVAL;
 			break;
 		}
 
 		/*
 		 * If the interface is specified, validate it.
 		 */
 		if (mreq->ipv6mr_interface < 0
 		 || if_index < mreq->ipv6mr_interface) {
 			error = ENXIO;	/* XXX EINVAL? */
 			break;
 		}
 		/*
 		 * If no interface was explicitly specified, choose an
 		 * appropriate one according to the given multicast address.
 		 */
 		if (mreq->ipv6mr_interface == 0) {
 			/*
 			 * If the multicast address is in node-local scope,
 			 * the interface should be a loopback interface.
 			 * Otherwise, look up the routing table for the
 			 * address, and choose the outgoing interface.
 			 *   XXX: is it a good approach?
 			 */
 			if (IN6_IS_ADDR_MC_NODELOCAL(&mreq->ipv6mr_multiaddr)) {
 				ifp = &loif[0];
 			} else {
 				ro.ro_rt = NULL;
 				dst = (struct sockaddr_in6 *)&ro.ro_dst;
 				bzero(dst, sizeof(*dst));
 				dst->sin6_len = sizeof(struct sockaddr_in6);
 				dst->sin6_family = AF_INET6;
 				dst->sin6_addr = mreq->ipv6mr_multiaddr;
 				rtalloc((struct route *)&ro);
 				if (ro.ro_rt == NULL) {
 					error = EADDRNOTAVAIL;
 					break;
 				}
 				ifp = ro.ro_rt->rt_ifp;
-				rtfree(ro.ro_rt);
+				RTFREE(ro.ro_rt);
 			}
 		} else
 			ifp = ifnet_byindex(mreq->ipv6mr_interface);
 
 		/*
 		 * See if we found an interface, and confirm that it
 		 * supports multicast
 		 */
 		if (ifp == NULL || (ifp->if_flags & IFF_MULTICAST) == 0) {
 			error = EADDRNOTAVAIL;
 			break;
 		}
 		/*
 		 * Put interface index into the multicast address,
 		 * if the address has link-local scope.
 		 */
 		if (IN6_IS_ADDR_MC_LINKLOCAL(&mreq->ipv6mr_multiaddr)) {
 			mreq->ipv6mr_multiaddr.s6_addr16[1]
 				= htons(mreq->ipv6mr_interface);
 		}
 		/*
 		 * See if the membership already exists.
 		 */
 		for (imm = im6o->im6o_memberships.lh_first;
 		     imm != NULL; imm = imm->i6mm_chain.le_next)
 			if (imm->i6mm_maddr->in6m_ifp == ifp &&
 			    IN6_ARE_ADDR_EQUAL(&imm->i6mm_maddr->in6m_addr,
 					       &mreq->ipv6mr_multiaddr))
 				break;
 		if (imm != NULL) {
 			error = EADDRINUSE;
 			break;
 		}
 		/*
 		 * Everything looks good; add a new record to the multicast
 		 * address list for the given interface.
 		 */
 		imm = malloc(sizeof(*imm), M_IPMADDR, M_WAITOK);
 		if (imm == NULL) {
 			error = ENOBUFS;
 			break;
 		}
 		if ((imm->i6mm_maddr =
 		     in6_addmulti(&mreq->ipv6mr_multiaddr, ifp, &error)) == NULL) {
 			free(imm, M_IPMADDR);
 			break;
 		}
 		LIST_INSERT_HEAD(&im6o->im6o_memberships, imm, i6mm_chain);
 		break;
 
 	case IPV6_LEAVE_GROUP:
 		/*
 		 * Drop a multicast group membership.
 		 * Group must be a valid IP6 multicast address.
 		 */
 		if (m == NULL || m->m_len != sizeof(struct ipv6_mreq)) {
 			error = EINVAL;
 			break;
 		}
 		mreq = mtod(m, struct ipv6_mreq *);
 		if (IN6_IS_ADDR_UNSPECIFIED(&mreq->ipv6mr_multiaddr)) {
 			if (suser(td)) {
 				error = EACCES;
 				break;
 			}
 		} else if (!IN6_IS_ADDR_MULTICAST(&mreq->ipv6mr_multiaddr)) {
 			error = EINVAL;
 			break;
 		}
 		/*
 		 * If an interface address was specified, get a pointer
 		 * to its ifnet structure.
 		 */
 		if (mreq->ipv6mr_interface < 0
 		 || if_index < mreq->ipv6mr_interface) {
 			error = ENXIO;	/* XXX EINVAL? */
 			break;
 		}
 		ifp = ifnet_byindex(mreq->ipv6mr_interface);
 		/*
 		 * Put interface index into the multicast address,
 		 * if the address has link-local scope.
 		 */
 		if (IN6_IS_ADDR_MC_LINKLOCAL(&mreq->ipv6mr_multiaddr)) {
 			mreq->ipv6mr_multiaddr.s6_addr16[1]
 				= htons(mreq->ipv6mr_interface);
 		}
 		/*
 		 * Find the membership in the membership list.
 		 */
 		for (imm = im6o->im6o_memberships.lh_first;
 		     imm != NULL; imm = imm->i6mm_chain.le_next) {
 			if ((ifp == NULL ||
 			     imm->i6mm_maddr->in6m_ifp == ifp) &&
 			    IN6_ARE_ADDR_EQUAL(&imm->i6mm_maddr->in6m_addr,
 					       &mreq->ipv6mr_multiaddr))
 				break;
 		}
 		if (imm == NULL) {
 			/* Unable to resolve interface */
 			error = EADDRNOTAVAIL;
 			break;
 		}
 		/*
 		 * Give up the multicast address record to which the
 		 * membership points.
 		 */
 		LIST_REMOVE(imm, i6mm_chain);
 		in6_delmulti(imm->i6mm_maddr);
 		free(imm, M_IPMADDR);
 		break;
 
 	default:
 		error = EOPNOTSUPP;
 		break;
 	}
 
 	/*
 	 * If all options have default values, no need to keep the mbuf.
 	 */
 	if (im6o->im6o_multicast_ifp == NULL &&
 	    im6o->im6o_multicast_hlim == ip6_defmcasthlim &&
 	    im6o->im6o_multicast_loop == IPV6_DEFAULT_MULTICAST_LOOP &&
 	    im6o->im6o_memberships.lh_first == NULL) {
 		free(*im6op, M_IPMOPTS);
 		*im6op = NULL;
 	}
 
 	return(error);
 }
 
 /*
  * Return the IP6 multicast options in response to user getsockopt().
  */
 static int
 ip6_getmoptions(optname, im6o, mp)
 	int optname;
 	struct ip6_moptions *im6o;
 	struct mbuf **mp;
 {
 	u_int *hlim, *loop, *ifindex;
 
 	*mp = m_get(M_TRYWAIT, MT_HEADER);		/* XXX */
 
 	switch (optname) {
 
 	case IPV6_MULTICAST_IF:
 		ifindex = mtod(*mp, u_int *);
 		(*mp)->m_len = sizeof(u_int);
 		if (im6o == NULL || im6o->im6o_multicast_ifp == NULL)
 			*ifindex = 0;
 		else
 			*ifindex = im6o->im6o_multicast_ifp->if_index;
 		return(0);
 
 	case IPV6_MULTICAST_HOPS:
 		hlim = mtod(*mp, u_int *);
 		(*mp)->m_len = sizeof(u_int);
 		if (im6o == NULL)
 			*hlim = ip6_defmcasthlim;
 		else
 			*hlim = im6o->im6o_multicast_hlim;
 		return(0);
 
 	case IPV6_MULTICAST_LOOP:
 		loop = mtod(*mp, u_int *);
 		(*mp)->m_len = sizeof(u_int);
 		if (im6o == NULL)
 			*loop = ip6_defmcasthlim;
 		else
 			*loop = im6o->im6o_multicast_loop;
 		return(0);
 
 	default:
 		return(EOPNOTSUPP);
 	}
 }
 
 /*
  * Discard the IP6 multicast options.
  */
 void
 ip6_freemoptions(im6o)
 	struct ip6_moptions *im6o;
 {
 	struct in6_multi_mship *imm;
 
 	if (im6o == NULL)
 		return;
 
 	while ((imm = im6o->im6o_memberships.lh_first) != NULL) {
 		LIST_REMOVE(imm, i6mm_chain);
 		if (imm->i6mm_maddr)
 			in6_delmulti(imm->i6mm_maddr);
 		free(imm, M_IPMADDR);
 	}
 	free(im6o, M_IPMOPTS);
 }
 
 /*
  * Set IPv6 outgoing packet options based on advanced API.
  */
 int
 ip6_setpktoptions(control, opt, priv, needcopy)
 	struct mbuf *control;
 	struct ip6_pktopts *opt;
 	int priv, needcopy;
 {
 	struct cmsghdr *cm = 0;
 
 	if (control == 0 || opt == 0)
 		return(EINVAL);
 
 	init_ip6pktopts(opt);
 
 	/*
 	 * XXX: Currently, we assume all the optional information is stored
 	 * in a single mbuf.
 	 */
 	if (control->m_next)
 		return(EINVAL);
 
 	for (; control->m_len; control->m_data += CMSG_ALIGN(cm->cmsg_len),
 		     control->m_len -= CMSG_ALIGN(cm->cmsg_len)) {
 		cm = mtod(control, struct cmsghdr *);
 		if (cm->cmsg_len == 0 || cm->cmsg_len > control->m_len)
 			return(EINVAL);
 		if (cm->cmsg_level != IPPROTO_IPV6)
 			continue;
 
 		/*
 		 * XXX should check if RFC2292 API is mixed with 2292bis API
 		 */
 		switch (cm->cmsg_type) {
 		case IPV6_PKTINFO:
 			if (cm->cmsg_len != CMSG_LEN(sizeof(struct in6_pktinfo)))
 				return(EINVAL);
 			if (needcopy) {
 				/* XXX: Is it really WAITOK? */
 				opt->ip6po_pktinfo =
 					malloc(sizeof(struct in6_pktinfo),
 					       M_IP6OPT, M_WAITOK);
 				bcopy(CMSG_DATA(cm), opt->ip6po_pktinfo,
 				    sizeof(struct in6_pktinfo));
 			} else
 				opt->ip6po_pktinfo =
 					(struct in6_pktinfo *)CMSG_DATA(cm);
 			if (opt->ip6po_pktinfo->ipi6_ifindex &&
 			    IN6_IS_ADDR_LINKLOCAL(&opt->ip6po_pktinfo->ipi6_addr))
 				opt->ip6po_pktinfo->ipi6_addr.s6_addr16[1] =
 					htons(opt->ip6po_pktinfo->ipi6_ifindex);
 
 			if (opt->ip6po_pktinfo->ipi6_ifindex > if_index
 			 || opt->ip6po_pktinfo->ipi6_ifindex < 0) {
 				return(ENXIO);
 			}
 
 			/*
 			 * Check if the requested source address is indeed a
 			 * unicast address assigned to the node, and can be
 			 * used as the packet's source address.
 			 */
 			if (!IN6_IS_ADDR_UNSPECIFIED(&opt->ip6po_pktinfo->ipi6_addr)) {
 				struct in6_ifaddr *ia6;
 				struct sockaddr_in6 sin6;
 
 				bzero(&sin6, sizeof(sin6));
 				sin6.sin6_len = sizeof(sin6);
 				sin6.sin6_family = AF_INET6;
 				sin6.sin6_addr =
 					opt->ip6po_pktinfo->ipi6_addr;
 				ia6 = (struct in6_ifaddr *)ifa_ifwithaddr(sin6tosa(&sin6));
 				if (ia6 == NULL ||
 				    (ia6->ia6_flags & (IN6_IFF_ANYCAST |
 						       IN6_IFF_NOTREADY)) != 0)
 					return(EADDRNOTAVAIL);
 			}
 			break;
 
 		case IPV6_HOPLIMIT:
 			if (cm->cmsg_len != CMSG_LEN(sizeof(int)))
 				return(EINVAL);
 
 			opt->ip6po_hlim = *(int *)CMSG_DATA(cm);
 			if (opt->ip6po_hlim < -1 || opt->ip6po_hlim > 255)
 				return(EINVAL);
 			break;
 
 		case IPV6_NEXTHOP:
 			if (!priv)
 				return(EPERM);
 
 			if (cm->cmsg_len < sizeof(u_char) ||
 			    /* check if cmsg_len is large enough for sa_len */
 			    cm->cmsg_len < CMSG_LEN(*CMSG_DATA(cm)))
 				return(EINVAL);
 
 			if (needcopy) {
 				opt->ip6po_nexthop =
 					malloc(*CMSG_DATA(cm),
 					       M_IP6OPT, M_WAITOK);
 				bcopy(CMSG_DATA(cm),
 				      opt->ip6po_nexthop,
 				      *CMSG_DATA(cm));
 			} else
 				opt->ip6po_nexthop =
 					(struct sockaddr *)CMSG_DATA(cm);
 			break;
 
 		case IPV6_HOPOPTS:
 		{
 			struct ip6_hbh *hbh;
 			int hbhlen;
 
 			if (cm->cmsg_len < CMSG_LEN(sizeof(struct ip6_hbh)))
 				return(EINVAL);
 			hbh = (struct ip6_hbh *)CMSG_DATA(cm);
 			hbhlen = (hbh->ip6h_len + 1) << 3;
 			if (cm->cmsg_len != CMSG_LEN(hbhlen))
 				return(EINVAL);
 
 			if (needcopy) {
 				opt->ip6po_hbh =
 					malloc(hbhlen, M_IP6OPT, M_WAITOK);
 				bcopy(hbh, opt->ip6po_hbh, hbhlen);
 			} else
 				opt->ip6po_hbh = hbh;
 			break;
 		}
 
 		case IPV6_DSTOPTS:
 		{
 			struct ip6_dest *dest, **newdest;
 			int destlen;
 
 			if (cm->cmsg_len < CMSG_LEN(sizeof(struct ip6_dest)))
 				return(EINVAL);
 			dest = (struct ip6_dest *)CMSG_DATA(cm);
 			destlen = (dest->ip6d_len + 1) << 3;
 			if (cm->cmsg_len != CMSG_LEN(destlen))
 				return(EINVAL);
 
 			/* 
 			 * The old advacned API is ambiguous on this
 			 * point. Our approach is to determine the
 			 * position based according to the existence
 			 * of a routing header. Note, however, that
 			 * this depends on the order of the extension
 			 * headers in the ancillary data; the 1st part
 			 * of the destination options header must
 			 * appear before the routing header in the
 			 * ancillary data, too.
 			 * RFC2292bis solved the ambiguity by
 			 * introducing separate cmsg types.
 			 */
 			if (opt->ip6po_rthdr == NULL)
 				newdest = &opt->ip6po_dest1;
 			else
 				newdest = &opt->ip6po_dest2;
 
 			if (needcopy) {
 				*newdest = malloc(destlen, M_IP6OPT, M_WAITOK);
 				bcopy(dest, *newdest, destlen);
 			} else
 				*newdest = dest;
 
 			break;
 		}
 
 		case IPV6_RTHDR:
 		{
 			struct ip6_rthdr *rth;
 			int rthlen;
 
 			if (cm->cmsg_len < CMSG_LEN(sizeof(struct ip6_rthdr)))
 				return(EINVAL);
 			rth = (struct ip6_rthdr *)CMSG_DATA(cm);
 			rthlen = (rth->ip6r_len + 1) << 3;
 			if (cm->cmsg_len != CMSG_LEN(rthlen))
 				return(EINVAL);
 
 			switch (rth->ip6r_type) {
 			case IPV6_RTHDR_TYPE_0:
 				/* must contain one addr */
 				if (rth->ip6r_len == 0)
 					return(EINVAL);
 				/* length must be even */
 				if (rth->ip6r_len % 2)
 					return(EINVAL);
 				if (rth->ip6r_len / 2 != rth->ip6r_segleft)
 					return(EINVAL);
 				break;
 			default:
 				return(EINVAL);	/* not supported */
 			}
 
 			if (needcopy) {
 				opt->ip6po_rthdr = malloc(rthlen, M_IP6OPT,
 							  M_WAITOK);
 				bcopy(rth, opt->ip6po_rthdr, rthlen);
 			} else
 				opt->ip6po_rthdr = rth;
 
 			break;
 		}
 
 		default:
 			return(ENOPROTOOPT);
 		}
 	}
 
 	return(0);
 }
 
 /*
  * Routine called from ip6_output() to loop back a copy of an IP6 multicast
  * packet to the input queue of a specified interface.  Note that this
  * calls the output routine of the loopback "driver", but with an interface
  * pointer that might NOT be &loif -- easier than replicating that code here.
  */
 void
 ip6_mloopback(ifp, m, dst)
 	struct ifnet *ifp;
 	struct mbuf *m;
 	struct sockaddr_in6 *dst;
 {
 	struct mbuf *copym;
 	struct ip6_hdr *ip6;
 
 	copym = m_copy(m, 0, M_COPYALL);
 	if (copym == NULL)
 		return;
 
 	/*
 	 * Make sure to deep-copy IPv6 header portion in case the data
 	 * is in an mbuf cluster, so that we can safely override the IPv6
 	 * header portion later.
 	 */
 	if ((copym->m_flags & M_EXT) != 0 ||
 	    copym->m_len < sizeof(struct ip6_hdr)) {
 		copym = m_pullup(copym, sizeof(struct ip6_hdr));
 		if (copym == NULL)
 			return;
 	}
 
 #ifdef DIAGNOSTIC
 	if (copym->m_len < sizeof(*ip6)) {
 		m_freem(copym);
 		return;
 	}
 #endif
 
 	ip6 = mtod(copym, struct ip6_hdr *);
 #ifndef SCOPEDROUTING
 	/*
 	 * clear embedded scope identifiers if necessary.
 	 * in6_clearscope will touch the addresses only when necessary.
 	 */
 	in6_clearscope(&ip6->ip6_src);
 	in6_clearscope(&ip6->ip6_dst);
 #endif
 
 	(void)if_simloop(ifp, copym, dst->sin6_family, 0);
 }
 
 /*
  * Chop IPv6 header off from the payload.
  */
 static int
 ip6_splithdr(m, exthdrs)
 	struct mbuf *m;
 	struct ip6_exthdrs *exthdrs;
 {
 	struct mbuf *mh;
 	struct ip6_hdr *ip6;
 
 	ip6 = mtod(m, struct ip6_hdr *);
 	if (m->m_len > sizeof(*ip6)) {
 		MGETHDR(mh, M_DONTWAIT, MT_HEADER);
 		if (mh == 0) {
 			m_freem(m);
 			return ENOBUFS;
 		}
 		M_MOVE_PKTHDR(mh, m);
 		MH_ALIGN(mh, sizeof(*ip6));
 		m->m_len -= sizeof(*ip6);
 		m->m_data += sizeof(*ip6);
 		mh->m_next = m;
 		m = mh;
 		m->m_len = sizeof(*ip6);
 		bcopy((caddr_t)ip6, mtod(m, caddr_t), sizeof(*ip6));
 	}
 	exthdrs->ip6e_ip6 = m;
 	return 0;
 }
 
 /*
  * Compute IPv6 extension header length.
  */
 int
 ip6_optlen(in6p)
 	struct in6pcb *in6p;
 {
 	int len;
 
 	if (!in6p->in6p_outputopts)
 		return 0;
 
 	len = 0;
 #define elen(x) \
     (((struct ip6_ext *)(x)) ? (((struct ip6_ext *)(x))->ip6e_len + 1) << 3 : 0)
 
 	len += elen(in6p->in6p_outputopts->ip6po_hbh);
 	if (in6p->in6p_outputopts->ip6po_rthdr)
 		/* dest1 is valid with rthdr only */
 		len += elen(in6p->in6p_outputopts->ip6po_dest1);
 	len += elen(in6p->in6p_outputopts->ip6po_rthdr);
 	len += elen(in6p->in6p_outputopts->ip6po_dest2);
 	return len;
 #undef elen
 }
Index: head/sys/netinet6/nd6.c
===================================================================
--- head/sys/netinet6/nd6.c	(revision 120726)
+++ head/sys/netinet6/nd6.c	(revision 120727)
@@ -1,2261 +1,2265 @@
 /*	$FreeBSD$	*/
 /*	$KAME: nd6.c,v 1.144 2001/05/24 07:44:00 itojun Exp $	*/
 
 /*
  * Copyright (C) 1995, 1996, 1997, and 1998 WIDE Project.
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 3. Neither the name of the project nor the names of its contributors
  *    may be used to endorse or promote products derived from this software
  *    without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE PROJECT AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE PROJECT OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  */
 
 /*
  * XXX
  * KAME 970409 note:
  * BSD/OS version heavily modifies this code, related to llinfo.
  * Since we don't have BSD/OS version of net/route.c in our hand,
  * I left the code mostly as it was in 970310.  -- itojun
  */
 
 #include "opt_inet.h"
 #include "opt_inet6.h"
 #include "opt_mac.h"
 
 #include <sys/param.h>
 #include <sys/systm.h>
 #include <sys/callout.h>
 #include <sys/mac.h>
 #include <sys/malloc.h>
 #include <sys/mbuf.h>
 #include <sys/socket.h>
 #include <sys/sockio.h>
 #include <sys/time.h>
 #include <sys/kernel.h>
 #include <sys/protosw.h>
 #include <sys/errno.h>
 #include <sys/syslog.h>
 #include <sys/queue.h>
 #include <sys/sysctl.h>
 
 #include <net/if.h>
 #include <net/if_dl.h>
 #include <net/if_types.h>
 #include <net/if_atm.h>
 #include <net/iso88025.h>
 #include <net/fddi.h>
 #include <net/route.h>
 
 #include <netinet/in.h>
 #include <netinet/if_ether.h>
 #include <netinet6/in6_var.h>
 #include <netinet/ip6.h>
 #include <netinet6/ip6_var.h>
 #include <netinet6/nd6.h>
 #include <netinet6/in6_prefix.h>
 #include <netinet/icmp6.h>
 
 #include <net/net_osdep.h>
 
 #define ND6_SLOWTIMER_INTERVAL (60 * 60) /* 1 hour */
 #define ND6_RECALC_REACHTM_INTERVAL (60 * 120) /* 2 hours */
 
 #define SIN6(s) ((struct sockaddr_in6 *)s)
 #define SDL(s) ((struct sockaddr_dl *)s)
 
 /* timer values */
 int	nd6_prune	= 1;	/* walk list every 1 seconds */
 int	nd6_delay	= 5;	/* delay first probe time 5 second */
 int	nd6_umaxtries	= 3;	/* maximum unicast query */
 int	nd6_mmaxtries	= 3;	/* maximum multicast query */
 int	nd6_useloopback = 1;	/* use loopback interface for local traffic */
 int	nd6_gctimer	= (60 * 60 * 24); /* 1 day: garbage collection timer */
 
 /* preventing too many loops in ND option parsing */
 int nd6_maxndopt = 10;	/* max # of ND options allowed */
 
 int nd6_maxnudhint = 0;	/* max # of subsequent upper layer hints */
 
 #ifdef ND6_DEBUG
 int nd6_debug = 1;
 #else
 int nd6_debug = 0;
 #endif
 
 /* for debugging? */
 static int nd6_inuse, nd6_allocated;
 
 struct llinfo_nd6 llinfo_nd6 = {&llinfo_nd6, &llinfo_nd6};
 static size_t nd_ifinfo_indexlim = 8;
 struct nd_ifinfo *nd_ifinfo = NULL;
 struct nd_drhead nd_defrouter;
 struct nd_prhead nd_prefix = { 0 };
 
 int nd6_recalc_reachtm_interval = ND6_RECALC_REACHTM_INTERVAL;
 static struct sockaddr_in6 all1_sa;
 
 static void nd6_slowtimo __P((void *));
 static int regen_tmpaddr __P((struct in6_ifaddr *));
 
 struct callout nd6_slowtimo_ch;
 struct callout nd6_timer_ch;
 extern struct callout in6_tmpaddrtimer_ch;
 
 void
 nd6_init()
 {
 	static int nd6_init_done = 0;
 	int i;
 
 	if (nd6_init_done) {
 		log(LOG_NOTICE, "nd6_init called more than once(ignored)\n");
 		return;
 	}
 
 	all1_sa.sin6_family = AF_INET6;
 	all1_sa.sin6_len = sizeof(struct sockaddr_in6);
 	for (i = 0; i < sizeof(all1_sa.sin6_addr); i++)
 		all1_sa.sin6_addr.s6_addr[i] = 0xff;
 
 	/* initialization of the default router list */
 	TAILQ_INIT(&nd_defrouter);
 
 	nd6_init_done = 1;
 
 	/* start timer */
 	callout_reset(&nd6_slowtimo_ch, ND6_SLOWTIMER_INTERVAL * hz,
 	    nd6_slowtimo, NULL);
 }
 
 void
 nd6_ifattach(ifp)
 	struct ifnet *ifp;
 {
 
 	/*
 	 * We have some arrays that should be indexed by if_index.
 	 * since if_index will grow dynamically, they should grow too.
 	 */
 	if (nd_ifinfo == NULL || if_index >= nd_ifinfo_indexlim) {
 		size_t n;
 		caddr_t q;
 
 		while (if_index >= nd_ifinfo_indexlim)
 			nd_ifinfo_indexlim <<= 1;
 
 		/* grow nd_ifinfo */
 		n = nd_ifinfo_indexlim * sizeof(struct nd_ifinfo);
 		q = (caddr_t)malloc(n, M_IP6NDP, M_WAITOK);
 		bzero(q, n);
 		if (nd_ifinfo) {
 			bcopy((caddr_t)nd_ifinfo, q, n/2);
 			free((caddr_t)nd_ifinfo, M_IP6NDP);
 		}
 		nd_ifinfo = (struct nd_ifinfo *)q;
 	}
 
 #define ND nd_ifinfo[ifp->if_index]
 
 	/*
 	 * Don't initialize if called twice.
 	 * XXX: to detect this, we should choose a member that is never set
 	 * before initialization of the ND structure itself.  We formaly used
 	 * the linkmtu member, which was not suitable because it could be 
 	 * initialized via "ifconfig mtu".
 	 */
 	if (ND.basereachable)
 		return;
 
 	ND.linkmtu = ifnet_byindex(ifp->if_index)->if_mtu;
 	ND.chlim = IPV6_DEFHLIM;
 	ND.basereachable = REACHABLE_TIME;
 	ND.reachable = ND_COMPUTE_RTIME(ND.basereachable);
 	ND.retrans = RETRANS_TIMER;
 	ND.receivedra = 0;
 	/*
 	 * Note that the default value of ip6_accept_rtadv is 0, which means
 	 * we won't accept RAs by default even if we set ND6_IFF_ACCEPT_RTADV
 	 * here.
 	 */
 	ND.flags = (ND6_IFF_PERFORMNUD | ND6_IFF_ACCEPT_RTADV);
 	nd6_setmtu(ifp);
 #undef ND
 }
 
 /*
  * Reset ND level link MTU. This function is called when the physical MTU
  * changes, which means we might have to adjust the ND level MTU.
  */
 void
 nd6_setmtu(ifp)
 	struct ifnet *ifp;
 {
 	struct nd_ifinfo *ndi = &nd_ifinfo[ifp->if_index];
 	u_long oldmaxmtu = ndi->maxmtu;
 	u_long oldlinkmtu = ndi->linkmtu;
 
 	switch (ifp->if_type) {
 	case IFT_ARCNET:	/* XXX MTU handling needs more work */
 		ndi->maxmtu = MIN(60480, ifp->if_mtu);
 		break;
 	case IFT_ETHER:
 		ndi->maxmtu = MIN(ETHERMTU, ifp->if_mtu);
 		break;
 	case IFT_FDDI:
 		ndi->maxmtu = MIN(FDDIIPMTU, ifp->if_mtu);
 		break;
 	case IFT_ATM:
 		ndi->maxmtu = MIN(ATMMTU, ifp->if_mtu);
 		break;
 	case IFT_IEEE1394:	/* XXX should be IEEE1394MTU(1500) */
 		ndi->maxmtu = MIN(ETHERMTU, ifp->if_mtu);
 		break;
 #ifdef IFT_IEEE80211
 	case IFT_IEEE80211:	/* XXX should be IEEE80211MTU(1500) */
 		ndi->maxmtu = MIN(ETHERMTU, ifp->if_mtu);
 		break;
 #endif
 	 case IFT_ISO88025:
 		 ndi->maxmtu = MIN(ISO88025_MAX_MTU, ifp->if_mtu);
 		 break;
 	default:
 		ndi->maxmtu = ifp->if_mtu;
 		break;
 	}
 
 	if (oldmaxmtu != ndi->maxmtu) {
 		/*
 		 * If the ND level MTU is not set yet, or if the maxmtu
 		 * is reset to a smaller value than the ND level MTU,
 		 * also reset the ND level MTU.
 		 */
 		if (ndi->linkmtu == 0 ||
 		    ndi->maxmtu < ndi->linkmtu) {
 			ndi->linkmtu = ndi->maxmtu;
 			/* also adjust in6_maxmtu if necessary. */
 			if (oldlinkmtu == 0) {
 				/*
 				 * XXX: the case analysis is grotty, but
 				 * it is not efficient to call in6_setmaxmtu()
 				 * here when we are during the initialization
 				 * procedure.
 				 */
 				if (in6_maxmtu < ndi->linkmtu)
 					in6_maxmtu = ndi->linkmtu;
 			} else
 				in6_setmaxmtu();
 		}
 	}
 #undef MIN
 }
 
 void
 nd6_option_init(opt, icmp6len, ndopts)
 	void *opt;
 	int icmp6len;
 	union nd_opts *ndopts;
 {
 	bzero(ndopts, sizeof(*ndopts));
 	ndopts->nd_opts_search = (struct nd_opt_hdr *)opt;
 	ndopts->nd_opts_last
 		= (struct nd_opt_hdr *)(((u_char *)opt) + icmp6len);
 
 	if (icmp6len == 0) {
 		ndopts->nd_opts_done = 1;
 		ndopts->nd_opts_search = NULL;
 	}
 }
 
 /*
  * Take one ND option.
  */
 struct nd_opt_hdr *
 nd6_option(ndopts)
 	union nd_opts *ndopts;
 {
 	struct nd_opt_hdr *nd_opt;
 	int olen;
 
 	if (!ndopts)
 		panic("ndopts == NULL in nd6_option");
 	if (!ndopts->nd_opts_last)
 		panic("uninitialized ndopts in nd6_option");
 	if (!ndopts->nd_opts_search)
 		return NULL;
 	if (ndopts->nd_opts_done)
 		return NULL;
 
 	nd_opt = ndopts->nd_opts_search;
 
 	/* make sure nd_opt_len is inside the buffer */
 	if ((caddr_t)&nd_opt->nd_opt_len >= (caddr_t)ndopts->nd_opts_last) {
 		bzero(ndopts, sizeof(*ndopts));
 		return NULL;
 	}
 
 	olen = nd_opt->nd_opt_len << 3;
 	if (olen == 0) {
 		/*
 		 * Message validation requires that all included
 		 * options have a length that is greater than zero.
 		 */
 		bzero(ndopts, sizeof(*ndopts));
 		return NULL;
 	}
 
 	ndopts->nd_opts_search = (struct nd_opt_hdr *)((caddr_t)nd_opt + olen);
 	if (ndopts->nd_opts_search > ndopts->nd_opts_last) {
 		/* option overruns the end of buffer, invalid */
 		bzero(ndopts, sizeof(*ndopts));
 		return NULL;
 	} else if (ndopts->nd_opts_search == ndopts->nd_opts_last) {
 		/* reached the end of options chain */
 		ndopts->nd_opts_done = 1;
 		ndopts->nd_opts_search = NULL;
 	}
 	return nd_opt;
 }
 
 /*
  * Parse multiple ND options.
  * This function is much easier to use, for ND routines that do not need
  * multiple options of the same type.
  */
 int
 nd6_options(ndopts)
 	union nd_opts *ndopts;
 {
 	struct nd_opt_hdr *nd_opt;
 	int i = 0;
 
 	if (!ndopts)
 		panic("ndopts == NULL in nd6_options");
 	if (!ndopts->nd_opts_last)
 		panic("uninitialized ndopts in nd6_options");
 	if (!ndopts->nd_opts_search)
 		return 0;
 
 	while (1) {
 		nd_opt = nd6_option(ndopts);
 		if (!nd_opt && !ndopts->nd_opts_last) {
 			/*
 			 * Message validation requires that all included
 			 * options have a length that is greater than zero.
 			 */
 			icmp6stat.icp6s_nd_badopt++;
 			bzero(ndopts, sizeof(*ndopts));
 			return -1;
 		}
 
 		if (!nd_opt)
 			goto skip1;
 
 		switch (nd_opt->nd_opt_type) {
 		case ND_OPT_SOURCE_LINKADDR:
 		case ND_OPT_TARGET_LINKADDR:
 		case ND_OPT_MTU:
 		case ND_OPT_REDIRECTED_HEADER:
 			if (ndopts->nd_opt_array[nd_opt->nd_opt_type]) {
 				nd6log((LOG_INFO,
 				    "duplicated ND6 option found (type=%d)\n",
 				    nd_opt->nd_opt_type));
 				/* XXX bark? */
 			} else {
 				ndopts->nd_opt_array[nd_opt->nd_opt_type]
 					= nd_opt;
 			}
 			break;
 		case ND_OPT_PREFIX_INFORMATION:
 			if (ndopts->nd_opt_array[nd_opt->nd_opt_type] == 0) {
 				ndopts->nd_opt_array[nd_opt->nd_opt_type]
 					= nd_opt;
 			}
 			ndopts->nd_opts_pi_end =
 				(struct nd_opt_prefix_info *)nd_opt;
 			break;
 		default:
 			/*
 			 * Unknown options must be silently ignored,
 			 * to accomodate future extension to the protocol.
 			 */
 			nd6log((LOG_DEBUG,
 			    "nd6_options: unsupported option %d - "
 			    "option ignored\n", nd_opt->nd_opt_type));
 		}
 
 skip1:
 		i++;
 		if (i > nd6_maxndopt) {
 			icmp6stat.icp6s_nd_toomanyopt++;
 			nd6log((LOG_INFO, "too many loop in nd opt\n"));
 			break;
 		}
 
 		if (ndopts->nd_opts_done)
 			break;
 	}
 
 	return 0;
 }
 
 /*
  * ND6 timer routine to expire default route list and prefix list
  */
 void
 nd6_timer(ignored_arg)
 	void	*ignored_arg;
 {
 	int s;
 	struct llinfo_nd6 *ln;
 	struct nd_defrouter *dr;
 	struct nd_prefix *pr;
 	struct ifnet *ifp;
 	struct in6_ifaddr *ia6, *nia6;
 	struct in6_addrlifetime *lt6;
 	
 	s = splnet();
 	callout_reset(&nd6_timer_ch, nd6_prune * hz,
 		      nd6_timer, NULL);
 
 	ln = llinfo_nd6.ln_next;
 	while (ln && ln != &llinfo_nd6) {
 		struct rtentry *rt;
 		struct sockaddr_in6 *dst;
 		struct llinfo_nd6 *next = ln->ln_next;
 		/* XXX: used for the DELAY case only: */
 		struct nd_ifinfo *ndi = NULL;
 
 		if ((rt = ln->ln_rt) == NULL) {
 			ln = next;
 			continue;
 		}
 		if ((ifp = rt->rt_ifp) == NULL) {
 			ln = next;
 			continue;
 		}
 		ndi = &nd_ifinfo[ifp->if_index];
 		dst = (struct sockaddr_in6 *)rt_key(rt);
 
 		if (ln->ln_expire > time_second) {
 			ln = next;
 			continue;
 		}
 
 		/* sanity check */
 		if (!rt)
 			panic("rt=0 in nd6_timer(ln=%p)", ln);
 		if (rt->rt_llinfo && (struct llinfo_nd6 *)rt->rt_llinfo != ln)
 			panic("rt_llinfo(%p) is not equal to ln(%p)",
 			      rt->rt_llinfo, ln);
 		if (!dst)
 			panic("dst=0 in nd6_timer(ln=%p)", ln);
 
 		switch (ln->ln_state) {
 		case ND6_LLINFO_INCOMPLETE:
 			if (ln->ln_asked < nd6_mmaxtries) {
 				ln->ln_asked++;
 				ln->ln_expire = time_second +
 					nd_ifinfo[ifp->if_index].retrans / 1000;
 				nd6_ns_output(ifp, NULL, &dst->sin6_addr,
 					ln, 0);
 			} else {
 				struct mbuf *m = ln->ln_hold;
 				if (m) {
 					if (rt->rt_ifp) {
 						/*
 						 * Fake rcvif to make ICMP error
 						 * more helpful in diagnosing
 						 * for the receiver.
 						 * XXX: should we consider
 						 * older rcvif?
 						 */
 						m->m_pkthdr.rcvif = rt->rt_ifp;
 					}
 					icmp6_error(m, ICMP6_DST_UNREACH,
 						    ICMP6_DST_UNREACH_ADDR, 0);
 					ln->ln_hold = NULL;
 				}
 				next = nd6_free(rt);
 			}
 			break;
 		case ND6_LLINFO_REACHABLE:
 			if (ln->ln_expire) {
 				ln->ln_state = ND6_LLINFO_STALE;
 				ln->ln_expire = time_second + nd6_gctimer;
 			}
 			break;
 
 		case ND6_LLINFO_STALE:
 			/* Garbage Collection(RFC 2461 5.3) */
 			if (ln->ln_expire)
 				next = nd6_free(rt);
 			break;
 
 		case ND6_LLINFO_DELAY:
 			if (ndi && (ndi->flags & ND6_IFF_PERFORMNUD) != 0) {
 				/* We need NUD */
 				ln->ln_asked = 1;
 				ln->ln_state = ND6_LLINFO_PROBE;
 				ln->ln_expire = time_second +
 					ndi->retrans / 1000;
 				nd6_ns_output(ifp, &dst->sin6_addr,
 					      &dst->sin6_addr,
 					      ln, 0);
 			} else {
 				ln->ln_state = ND6_LLINFO_STALE; /* XXX */
 				ln->ln_expire = time_second + nd6_gctimer;
 			}
 			break;
 		case ND6_LLINFO_PROBE:
 			if (ln->ln_asked < nd6_umaxtries) {
 				ln->ln_asked++;
 				ln->ln_expire = time_second +
 					nd_ifinfo[ifp->if_index].retrans / 1000;
 				nd6_ns_output(ifp, &dst->sin6_addr,
 					       &dst->sin6_addr, ln, 0);
 			} else {
 				next = nd6_free(rt);
 			}
 			break;
 		}
 		ln = next;
 	}
 	
 	/* expire default router list */
 	dr = TAILQ_FIRST(&nd_defrouter);
 	while (dr) {
 		if (dr->expire && dr->expire < time_second) {
 			struct nd_defrouter *t;
 			t = TAILQ_NEXT(dr, dr_entry);
 			defrtrlist_del(dr);
 			dr = t;
 		} else {
 			dr = TAILQ_NEXT(dr, dr_entry);
 		}
 	}
 
 	/*
 	 * expire interface addresses.
 	 * in the past the loop was inside prefix expiry processing.
 	 * However, from a stricter speci-confrmance standpoint, we should
 	 * rather separate address lifetimes and prefix lifetimes.
 	 */
   addrloop:
 	for (ia6 = in6_ifaddr; ia6; ia6 = nia6) {
 		nia6 = ia6->ia_next;
 		/* check address lifetime */
 		lt6 = &ia6->ia6_lifetime;
 		if (IFA6_IS_INVALID(ia6)) {
 			int regen = 0;
 
 			/*
 			 * If the expiring address is temporary, try
 			 * regenerating a new one.  This would be useful when
 			 * we suspended a laptop PC, then turned it on after a
 			 * period that could invalidate all temporary
 			 * addresses.  Although we may have to restart the
 			 * loop (see below), it must be after purging the
 			 * address.  Otherwise, we'd see an infinite loop of
 			 * regeneration. 
 			 */
 			if (ip6_use_tempaddr &&
 			    (ia6->ia6_flags & IN6_IFF_TEMPORARY) != 0) {
 				if (regen_tmpaddr(ia6) == 0)
 					regen = 1;
 			}
 
 			in6_purgeaddr(&ia6->ia_ifa);
 
 			if (regen)
 				goto addrloop; /* XXX: see below */
 		}
 		if (IFA6_IS_DEPRECATED(ia6)) {
 			int oldflags = ia6->ia6_flags;
 
 			ia6->ia6_flags |= IN6_IFF_DEPRECATED;
 
 			/*
 			 * If a temporary address has just become deprecated,
 			 * regenerate a new one if possible.
 			 */
 			if (ip6_use_tempaddr &&
 			    (ia6->ia6_flags & IN6_IFF_TEMPORARY) != 0 &&
 			    (oldflags & IN6_IFF_DEPRECATED) == 0) {
 
 				if (regen_tmpaddr(ia6) == 0) {
 					/*
 					 * A new temporary address is
 					 * generated.
 					 * XXX: this means the address chain
 					 * has changed while we are still in
 					 * the loop.  Although the change
 					 * would not cause disaster (because
 					 * it's not a deletion, but an
 					 * addition,) we'd rather restart the
 					 * loop just for safety.  Or does this 
 					 * significantly reduce performance??
 					 */
 					goto addrloop;
 				}
 			}
 		} else {
 			/*
 			 * A new RA might have made a deprecated address
 			 * preferred.
 			 */
 			ia6->ia6_flags &= ~IN6_IFF_DEPRECATED;
 		}
 	}
 
 	/* expire prefix list */
 	pr = nd_prefix.lh_first;
 	while (pr) {
 		/*
 		 * check prefix lifetime.
 		 * since pltime is just for autoconf, pltime processing for
 		 * prefix is not necessary.
 		 */
 		if (pr->ndpr_expire && pr->ndpr_expire < time_second) {
 			struct nd_prefix *t;
 			t = pr->ndpr_next;
 
 			/*
 			 * address expiration and prefix expiration are
 			 * separate.  NEVER perform in6_purgeaddr here.
 			 */
 
 			prelist_remove(pr);
 			pr = t;
 		} else
 			pr = pr->ndpr_next;
 	}
 	splx(s);
 }
 
 static int
 regen_tmpaddr(ia6)
 	struct in6_ifaddr *ia6; /* deprecated/invalidated temporary address */
 {
 	struct ifaddr *ifa;
 	struct ifnet *ifp;
 	struct in6_ifaddr *public_ifa6 = NULL;
 
 	ifp = ia6->ia_ifa.ifa_ifp;
 	for (ifa = ifp->if_addrlist.tqh_first; ifa;
 	     ifa = ifa->ifa_list.tqe_next)
 	{
 		struct in6_ifaddr *it6;
 
 		if (ifa->ifa_addr->sa_family != AF_INET6)
 			continue;
 
 		it6 = (struct in6_ifaddr *)ifa;
 
 		/* ignore no autoconf addresses. */
 		if ((it6->ia6_flags & IN6_IFF_AUTOCONF) == 0)
 			continue;
 
 		/* ignore autoconf addresses with different prefixes. */
 		if (it6->ia6_ndpr == NULL || it6->ia6_ndpr != ia6->ia6_ndpr)
 			continue;
 
 		/*
 		 * Now we are looking at an autoconf address with the same
 		 * prefix as ours.  If the address is temporary and is still
 		 * preferred, do not create another one.  It would be rare, but
 		 * could happen, for example, when we resume a laptop PC after
 		 * a long period.
 		 */
 		if ((it6->ia6_flags & IN6_IFF_TEMPORARY) != 0 &&
 		    !IFA6_IS_DEPRECATED(it6)) {
 			public_ifa6 = NULL;
 			break;
 		}
 
 		/*
 		 * This is a public autoconf address that has the same prefix
 		 * as ours.  If it is preferred, keep it.  We can't break the
 		 * loop here, because there may be a still-preferred temporary
 		 * address with the prefix.
 		 */
 		if (!IFA6_IS_DEPRECATED(it6))
 		    public_ifa6 = it6;
 	}
 
 	if (public_ifa6 != NULL) {
 		int e;
 
 		if ((e = in6_tmpifadd(public_ifa6, 0)) != 0) {
 			log(LOG_NOTICE, "regen_tmpaddr: failed to create a new"
 			    " tmp addr,errno=%d\n", e);
 			return(-1);
 		}
 		return(0);
 	}
 
 	return(-1);
 }
 
 /*
  * Nuke neighbor cache/prefix/default router management table, right before
  * ifp goes away.
  */
 void
 nd6_purge(ifp)
 	struct ifnet *ifp;
 {
 	struct llinfo_nd6 *ln, *nln;
 	struct nd_defrouter *dr, *ndr, drany;
 	struct nd_prefix *pr, *npr;
 
 	/* Nuke default router list entries toward ifp */
 	if ((dr = TAILQ_FIRST(&nd_defrouter)) != NULL) {
 		/*
 		 * The first entry of the list may be stored in
 		 * the routing table, so we'll delete it later.
 		 */
 		for (dr = TAILQ_NEXT(dr, dr_entry); dr; dr = ndr) {
 			ndr = TAILQ_NEXT(dr, dr_entry);
 			if (dr->ifp == ifp)
 				defrtrlist_del(dr);
 		}
 		dr = TAILQ_FIRST(&nd_defrouter);
 		if (dr->ifp == ifp)
 			defrtrlist_del(dr);
 	}
 
 	/* Nuke prefix list entries toward ifp */
 	for (pr = nd_prefix.lh_first; pr; pr = npr) {
 		npr = pr->ndpr_next;
 		if (pr->ndpr_ifp == ifp) {
 			/*
 			 * Previously, pr->ndpr_addr is removed as well,
 			 * but I strongly believe we don't have to do it.
 			 * nd6_purge() is only called from in6_ifdetach(),
 			 * which removes all the associated interface addresses
 			 * by itself.
 			 * (jinmei@kame.net 20010129)
 			 */
 			prelist_remove(pr);
 		}
 	}
 
 	/* cancel default outgoing interface setting */
 	if (nd6_defifindex == ifp->if_index)
 		nd6_setdefaultiface(0);
 
 	if (!ip6_forwarding && ip6_accept_rtadv) { /* XXX: too restrictive? */
 		/* refresh default router list */
 		bzero(&drany, sizeof(drany));
 		defrouter_delreq(&drany, 0);
 		defrouter_select();
 	}
 
 	/*
 	 * Nuke neighbor cache entries for the ifp.
 	 * Note that rt->rt_ifp may not be the same as ifp,
 	 * due to KAME goto ours hack.  See RTM_RESOLVE case in
 	 * nd6_rtrequest(), and ip6_input().
 	 */
 	ln = llinfo_nd6.ln_next;
 	while (ln && ln != &llinfo_nd6) {
 		struct rtentry *rt;
 		struct sockaddr_dl *sdl;
 
 		nln = ln->ln_next;
 		rt = ln->ln_rt;
 		if (rt && rt->rt_gateway &&
 		    rt->rt_gateway->sa_family == AF_LINK) {
 			sdl = (struct sockaddr_dl *)rt->rt_gateway;
 			if (sdl->sdl_index == ifp->if_index)
 				nln = nd6_free(rt);
 		}
 		ln = nln;
 	}
 }
 
 struct rtentry *
 nd6_lookup(addr6, create, ifp)
 	struct in6_addr *addr6;
 	int create;
 	struct ifnet *ifp;
 {
 	struct rtentry *rt;
 	struct sockaddr_in6 sin6;
 
 	bzero(&sin6, sizeof(sin6));
 	sin6.sin6_len = sizeof(struct sockaddr_in6);
 	sin6.sin6_family = AF_INET6;
 	sin6.sin6_addr = *addr6;
 #ifdef SCOPEDROUTING
 	sin6.sin6_scope_id = in6_addr2scopeid(ifp, addr6);
 #endif
 	rt = rtalloc1((struct sockaddr *)&sin6, create, 0UL);
-	if (rt && (rt->rt_flags & RTF_LLINFO) == 0) {
-		/*
-		 * This is the case for the default route.
-		 * If we want to create a neighbor cache for the address, we
-		 * should free the route for the destination and allocate an
-		 * interface route.
-		 */
-		if (create) {
-			RTFREE(rt);
+	if (rt) {
+		if ((rt->rt_flags & RTF_LLINFO) == 0 && create) {
+			/*
+			 * This is the case for the default route.
+			 * If we want to create a neighbor cache for the
+			 * address, we should free the route for the
+			 * destination and allocate an interface route.
+			 */
+			RTFREE_LOCKED(rt);
 			rt = 0;
 		}
+		RT_UNLOCK(rt);
 	}
 	if (!rt) {
 		if (create && ifp) {
 			int e;
 
 			/*
 			 * If no route is available and create is set,
 			 * we allocate a host route for the destination
 			 * and treat it like an interface route.
 			 * This hack is necessary for a neighbor which can't
 			 * be covered by our own prefix.
 			 */
 			struct ifaddr *ifa =
 				ifaof_ifpforaddr((struct sockaddr *)&sin6, ifp);
 			if (ifa == NULL)
 				return(NULL);
 
 			/*
 			 * Create a new route.  RTF_LLINFO is necessary
 			 * to create a Neighbor Cache entry for the
 			 * destination in nd6_rtrequest which will be
 			 * called in rtrequest via ifa->ifa_rtrequest.
 			 */
 			if ((e = rtrequest(RTM_ADD, (struct sockaddr *)&sin6,
 					   ifa->ifa_addr,
 					   (struct sockaddr *)&all1_sa,
 					   (ifa->ifa_flags |
 					    RTF_HOST | RTF_LLINFO) &
 					   ~RTF_CLONING,
 					   &rt)) != 0)
 				log(LOG_ERR,
 				    "nd6_lookup: failed to add route for a "
 				    "neighbor(%s), errno=%d\n",
 				    ip6_sprintf(addr6), e);
 			if (rt == NULL)
 				return(NULL);
 			if (rt->rt_llinfo) {
 				struct llinfo_nd6 *ln =
 					(struct llinfo_nd6 *)rt->rt_llinfo;
 				ln->ln_state = ND6_LLINFO_NOSTATE;
 			}
 		} else
 			return(NULL);
 	}
 	rt->rt_refcnt--;
 	/*
 	 * Validation for the entry.
 	 * Note that the check for rt_llinfo is necessary because a cloned
 	 * route from a parent route that has the L flag (e.g. the default
 	 * route to a p2p interface) may have the flag, too, while the
 	 * destination is not actually a neighbor.
 	 * XXX: we can't use rt->rt_ifp to check for the interface, since
 	 *      it might be the loopback interface if the entry is for our
 	 *      own address on a non-loopback interface. Instead, we should
 	 *      use rt->rt_ifa->ifa_ifp, which would specify the REAL
 	 *      interface.
 	 */
 	if ((rt->rt_flags & RTF_GATEWAY) || (rt->rt_flags & RTF_LLINFO) == 0 ||
 	    rt->rt_gateway->sa_family != AF_LINK || rt->rt_llinfo == NULL ||
 	    (ifp && rt->rt_ifa->ifa_ifp != ifp)) {
 		if (create) {
 			log(LOG_DEBUG, "nd6_lookup: failed to lookup %s (if = %s)\n",
 			    ip6_sprintf(addr6), ifp ? if_name(ifp) : "unspec");
 			/* xxx more logs... kazu */
 		}
 		return(NULL);
 	}
 	return(rt);
 }
 
 /*
  * Detect if a given IPv6 address identifies a neighbor on a given link.
  * XXX: should take care of the destination of a p2p link?
  */
 int
 nd6_is_addr_neighbor(addr, ifp)
 	struct sockaddr_in6 *addr;
 	struct ifnet *ifp;
 {
 	struct ifaddr *ifa;
 	int i;
 
 #define IFADDR6(a) ((((struct in6_ifaddr *)(a))->ia_addr).sin6_addr)
 #define IFMASK6(a) ((((struct in6_ifaddr *)(a))->ia_prefixmask).sin6_addr)
 
 	/*
 	 * A link-local address is always a neighbor.
 	 * XXX: we should use the sin6_scope_id field rather than the embedded
 	 * interface index.
 	 */
 	if (IN6_IS_ADDR_LINKLOCAL(&addr->sin6_addr) &&
 	    ntohs(*(u_int16_t *)&addr->sin6_addr.s6_addr[2]) == ifp->if_index)
 		return(1);
 
 	/*
 	 * If the address matches one of our addresses,
 	 * it should be a neighbor.
 	 */
 	for (ifa = ifp->if_addrlist.tqh_first;
 	     ifa;
 	     ifa = ifa->ifa_list.tqe_next)
 	{
 		if (ifa->ifa_addr->sa_family != AF_INET6)
 			next: continue;
 
 		for (i = 0; i < 4; i++) {
 			if ((IFADDR6(ifa).s6_addr32[i] ^
 			     addr->sin6_addr.s6_addr32[i]) &
 			    IFMASK6(ifa).s6_addr32[i])
 				goto next;
 		}
 		return(1);
 	}
 
 	/*
 	 * Even if the address matches none of our addresses, it might be
 	 * in the neighbor cache.
 	 */
 	if (nd6_lookup(&addr->sin6_addr, 0, ifp) != NULL)
 		return(1);
 
 	return(0);
 #undef IFADDR6
 #undef IFMASK6
 }
 
 /*
  * Free an nd6 llinfo entry.
  */
 struct llinfo_nd6 *
 nd6_free(rt)
 	struct rtentry *rt;
 {
 	struct llinfo_nd6 *ln = (struct llinfo_nd6 *)rt->rt_llinfo, *next;
 	struct in6_addr in6 = ((struct sockaddr_in6 *)rt_key(rt))->sin6_addr;
 	struct nd_defrouter *dr;
 
 	/*
 	 * we used to have pfctlinput(PRC_HOSTDEAD) here. 
 	 * even though it is not harmful, it was not really necessary.
 	 */
 
 	if (!ip6_forwarding && ip6_accept_rtadv) { /* XXX: too restrictive? */
 		int s;
 		s = splnet();
 		dr = defrouter_lookup(&((struct sockaddr_in6 *)rt_key(rt))->sin6_addr,
 				      rt->rt_ifp);
 
 		if (ln->ln_router || dr) {
 			/*
 			 * rt6_flush must be called whether or not the neighbor
 			 * is in the Default Router List.
 			 * See a corresponding comment in nd6_na_input().
 			 */
 			rt6_flush(&in6, rt->rt_ifp);
 		}
 
 		if (dr) {
 			/*
 			 * Unreachablity of a router might affect the default
 			 * router selection and on-link detection of advertised
 			 * prefixes.
 			 */
 
 			/*
 			 * Temporarily fake the state to choose a new default
 			 * router and to perform on-link determination of
 			 * prefixes correctly.
 			 * Below the state will be set correctly,
 			 * or the entry itself will be deleted.
 			 */
 			ln->ln_state = ND6_LLINFO_INCOMPLETE;
 
 			/*
 			 * Since defrouter_select() does not affect the
 			 * on-link determination and MIP6 needs the check
 			 * before the default router selection, we perform
 			 * the check now.
 			 */
 			pfxlist_onlink_check();
 
 			if (dr == TAILQ_FIRST(&nd_defrouter)) {
 				/*
 				 * It is used as the current default router,
 				 * so we have to move it to the end of the
 				 * list and choose a new one.
 				 * XXX: it is not very efficient if this is
 				 *      the only router.
 				 */
 				TAILQ_REMOVE(&nd_defrouter, dr, dr_entry);
 				TAILQ_INSERT_TAIL(&nd_defrouter, dr, dr_entry);
 
 				defrouter_select();
 			}
 		}
 		splx(s);
 	}
 
 	/*
 	 * Before deleting the entry, remember the next entry as the
 	 * return value.  We need this because pfxlist_onlink_check() above
 	 * might have freed other entries (particularly the old next entry) as
 	 * a side effect (XXX).
 	 */
 	next = ln->ln_next;
 
 	/*
 	 * Detach the route from the routing tree and the list of neighbor
 	 * caches, and disable the route entry not to be used in already
 	 * cached routes.
 	 */
 	rtrequest(RTM_DELETE, rt_key(rt), (struct sockaddr *)0,
 		  rt_mask(rt), 0, (struct rtentry **)0);
 
 	return(next);
 }
 
 /*
  * Upper-layer reachability hint for Neighbor Unreachability Detection.
  *
  * XXX cost-effective metods?
  */
 void
 nd6_nud_hint(rt, dst6, force)
 	struct rtentry *rt;
 	struct in6_addr *dst6;
 	int force;
 {
 	struct llinfo_nd6 *ln;
 
 	/*
 	 * If the caller specified "rt", use that.  Otherwise, resolve the
 	 * routing table by supplied "dst6".
 	 */
 	if (!rt) {
 		if (!dst6)
 			return;
 		if (!(rt = nd6_lookup(dst6, 0, NULL)))
 			return;
 	}
 
 	if ((rt->rt_flags & RTF_GATEWAY) != 0 ||
 	    (rt->rt_flags & RTF_LLINFO) == 0 ||
 	    !rt->rt_llinfo || !rt->rt_gateway ||
 	    rt->rt_gateway->sa_family != AF_LINK) {
 		/* This is not a host route. */
 		return;
 	}
 
 	ln = (struct llinfo_nd6 *)rt->rt_llinfo;
 	if (ln->ln_state < ND6_LLINFO_REACHABLE)
 		return;
 
 	/*
 	 * if we get upper-layer reachability confirmation many times,
 	 * it is possible we have false information.
 	 */
 	if (!force) {
 		ln->ln_byhint++;
 		if (ln->ln_byhint > nd6_maxnudhint)
 			return;
 	}
 
 	ln->ln_state = ND6_LLINFO_REACHABLE;
 	if (ln->ln_expire)
 		ln->ln_expire = time_second +
 			nd_ifinfo[rt->rt_ifp->if_index].reachable;
 }
 
 void
 nd6_rtrequest(req, rt, info)
 	int	req;
 	struct rtentry *rt;
 	struct rt_addrinfo *info; /* xxx unused */
 {
 	struct sockaddr *gate = rt->rt_gateway;
 	struct llinfo_nd6 *ln = (struct llinfo_nd6 *)rt->rt_llinfo;
 	static struct sockaddr_dl null_sdl = {sizeof(null_sdl), AF_LINK};
 	struct ifnet *ifp = rt->rt_ifp;
 	struct ifaddr *ifa;
 
+	RT_LOCK_ASSERT(rt);
+
 	if ((rt->rt_flags & RTF_GATEWAY))
 		return;
 
 	if (nd6_need_cache(ifp) == 0 && (rt->rt_flags & RTF_HOST) == 0) {
 		/*
 		 * This is probably an interface direct route for a link
 		 * which does not need neighbor caches (e.g. fe80::%lo0/64).
 		 * We do not need special treatment below for such a route.
 		 * Moreover, the RTF_LLINFO flag which would be set below
 		 * would annoy the ndp(8) command.
 		 */
 		return;
 	}
 
 	if (req == RTM_RESOLVE &&
 	    (nd6_need_cache(ifp) == 0 || /* stf case */
 	     !nd6_is_addr_neighbor((struct sockaddr_in6 *)rt_key(rt), ifp))) {
 		/*
 		 * FreeBSD and BSD/OS often make a cloned host route based
 		 * on a less-specific route (e.g. the default route).
 		 * If the less specific route does not have a "gateway"
 		 * (this is the case when the route just goes to a p2p or an
 		 * stf interface), we'll mistakenly make a neighbor cache for
 		 * the host route, and will see strange neighbor solicitation
 		 * for the corresponding destination.  In order to avoid the
 		 * confusion, we check if the destination of the route is
 		 * a neighbor in terms of neighbor discovery, and stop the
 		 * process if not.  Additionally, we remove the LLINFO flag
 		 * so that ndp(8) will not try to get the neighbor information
 		 * of the destination.
 		 */
 		rt->rt_flags &= ~RTF_LLINFO;
 		return;
 	}
 
 	switch (req) {
 	case RTM_ADD:
 		/*
 		 * There is no backward compatibility :)
 		 *
 		 * if ((rt->rt_flags & RTF_HOST) == 0 &&
 		 *     SIN(rt_mask(rt))->sin_addr.s_addr != 0xffffffff)
 		 *	   rt->rt_flags |= RTF_CLONING;
 		 */
 		if (rt->rt_flags & (RTF_CLONING | RTF_LLINFO)) {
 			/*
 			 * Case 1: This route should come from
 			 * a route to interface.  RTF_LLINFO flag is set
 			 * for a host route whose destination should be
 			 * treated as on-link.
 			 */
 			rt_setgate(rt, rt_key(rt),
 				   (struct sockaddr *)&null_sdl);
 			gate = rt->rt_gateway;
 			SDL(gate)->sdl_type = ifp->if_type;
 			SDL(gate)->sdl_index = ifp->if_index;
 			if (ln)
 				ln->ln_expire = time_second;
 #if 1
 			if (ln && ln->ln_expire == 0) {
 				/* kludge for desktops */
 #if 0
 				printf("nd6_rtequest: time.tv_sec is zero; "
 				       "treat it as 1\n");
 #endif
 				ln->ln_expire = 1;
 			}
 #endif
 			if ((rt->rt_flags & RTF_CLONING))
 				break;
 		}
 		/*
 		 * In IPv4 code, we try to annonuce new RTF_ANNOUNCE entry here.
 		 * We don't do that here since llinfo is not ready yet.
 		 *
 		 * There are also couple of other things to be discussed:
 		 * - unsolicited NA code needs improvement beforehand
 		 * - RFC2461 says we MAY send multicast unsolicited NA
 		 *   (7.2.6 paragraph 4), however, it also says that we
 		 *   SHOULD provide a mechanism to prevent multicast NA storm.
 		 *   we don't have anything like it right now.
 		 *   note that the mechanism needs a mutual agreement
 		 *   between proxies, which means that we need to implement
 		 *   a new protocol, or a new kludge.
 		 * - from RFC2461 6.2.4, host MUST NOT send an unsolicited NA.
 		 *   we need to check ip6forwarding before sending it.
 		 *   (or should we allow proxy ND configuration only for
 		 *   routers?  there's no mention about proxy ND from hosts)
 		 */
 #if 0
 		/* XXX it does not work */
 		if (rt->rt_flags & RTF_ANNOUNCE)
 			nd6_na_output(ifp,
 			      &SIN6(rt_key(rt))->sin6_addr,
 			      &SIN6(rt_key(rt))->sin6_addr,
 			      ip6_forwarding ? ND_NA_FLAG_ROUTER : 0,
 			      1, NULL);
 #endif
 		/* FALLTHROUGH */
 	case RTM_RESOLVE:
 		if ((ifp->if_flags & (IFF_POINTOPOINT | IFF_LOOPBACK)) == 0) {
 			/*
 			 * Address resolution isn't necessary for a point to
 			 * point link, so we can skip this test for a p2p link.
 			 */
 			if (gate->sa_family != AF_LINK ||
 			    gate->sa_len < sizeof(null_sdl)) {
 				log(LOG_DEBUG,
 				    "nd6_rtrequest: bad gateway value: %s\n",
 				    if_name(ifp));
 				break;
 			}
 			SDL(gate)->sdl_type = ifp->if_type;
 			SDL(gate)->sdl_index = ifp->if_index;
 		}
 		if (ln != NULL)
 			break;	/* This happens on a route change */
 		/*
 		 * Case 2: This route may come from cloning, or a manual route
 		 * add with a LL address.
 		 */
 		R_Malloc(ln, struct llinfo_nd6 *, sizeof(*ln));
 		rt->rt_llinfo = (caddr_t)ln;
 		if (!ln) {
 			log(LOG_DEBUG, "nd6_rtrequest: malloc failed\n");
 			break;
 		}
 		nd6_inuse++;
 		nd6_allocated++;
 		Bzero(ln, sizeof(*ln));
 		ln->ln_rt = rt;
 		/* this is required for "ndp" command. - shin */
 		if (req == RTM_ADD) {
 		        /*
 			 * gate should have some valid AF_LINK entry,
 			 * and ln->ln_expire should have some lifetime
 			 * which is specified by ndp command.
 			 */
 			ln->ln_state = ND6_LLINFO_REACHABLE;
 			ln->ln_byhint = 0;
 		} else {
 		        /*
 			 * When req == RTM_RESOLVE, rt is created and
 			 * initialized in rtrequest(), so rt_expire is 0.
 			 */
 			ln->ln_state = ND6_LLINFO_NOSTATE;
 			ln->ln_expire = time_second;
 		}
 		rt->rt_flags |= RTF_LLINFO;
 		ln->ln_next = llinfo_nd6.ln_next;
 		llinfo_nd6.ln_next = ln;
 		ln->ln_prev = &llinfo_nd6;
 		ln->ln_next->ln_prev = ln;
 
 		/*
 		 * check if rt_key(rt) is one of my address assigned
 		 * to the interface.
 		 */
 		ifa = (struct ifaddr *)in6ifa_ifpwithaddr(rt->rt_ifp,
 					  &SIN6(rt_key(rt))->sin6_addr);
 		if (ifa) {
 			caddr_t macp = nd6_ifptomac(ifp);
 			ln->ln_expire = 0;
 			ln->ln_state = ND6_LLINFO_REACHABLE;
 			ln->ln_byhint = 0;
 			if (macp) {
 				Bcopy(macp, LLADDR(SDL(gate)), ifp->if_addrlen);
 				SDL(gate)->sdl_alen = ifp->if_addrlen;
 			}
 			if (nd6_useloopback) {
 				rt->rt_ifp = &loif[0];	/* XXX */
 				/*
 				 * Make sure rt_ifa be equal to the ifaddr
 				 * corresponding to the address.
 				 * We need this because when we refer
 				 * rt_ifa->ia6_flags in ip6_input, we assume
 				 * that the rt_ifa points to the address instead
 				 * of the loopback address.
 				 */
 				if (ifa != rt->rt_ifa) {
 					IFAFREE(rt->rt_ifa);
 					IFAREF(ifa);
 					rt->rt_ifa = ifa;
 				}
 			}
 		} else if (rt->rt_flags & RTF_ANNOUNCE) {
 			ln->ln_expire = 0;
 			ln->ln_state = ND6_LLINFO_REACHABLE;
 			ln->ln_byhint = 0;
 
 			/* join solicited node multicast for proxy ND */
 			if (ifp->if_flags & IFF_MULTICAST) {
 				struct in6_addr llsol;
 				int error;
 
 				llsol = SIN6(rt_key(rt))->sin6_addr;
 				llsol.s6_addr16[0] = htons(0xff02);
 				llsol.s6_addr16[1] = htons(ifp->if_index);
 				llsol.s6_addr32[1] = 0;
 				llsol.s6_addr32[2] = htonl(1);
 				llsol.s6_addr8[12] = 0xff;
 
 				if (!in6_addmulti(&llsol, ifp, &error)) {
 					nd6log((LOG_ERR, "%s: failed to join "
 					    "%s (errno=%d)\n", if_name(ifp),
 					    ip6_sprintf(&llsol), error));
 				}
 			}
 		}
 		break;
 
 	case RTM_DELETE:
 		if (!ln)
 			break;
 		/* leave from solicited node multicast for proxy ND */
 		if ((rt->rt_flags & RTF_ANNOUNCE) != 0 &&
 		    (ifp->if_flags & IFF_MULTICAST) != 0) {
 			struct in6_addr llsol;
 			struct in6_multi *in6m;
 
 			llsol = SIN6(rt_key(rt))->sin6_addr;
 			llsol.s6_addr16[0] = htons(0xff02);
 			llsol.s6_addr16[1] = htons(ifp->if_index);
 			llsol.s6_addr32[1] = 0;
 			llsol.s6_addr32[2] = htonl(1);
 			llsol.s6_addr8[12] = 0xff;
 
 			IN6_LOOKUP_MULTI(llsol, ifp, in6m);
 			if (in6m)
 				in6_delmulti(in6m);
 		}
 		nd6_inuse--;
 		ln->ln_next->ln_prev = ln->ln_prev;
 		ln->ln_prev->ln_next = ln->ln_next;
 		ln->ln_prev = NULL;
 		rt->rt_llinfo = 0;
 		rt->rt_flags &= ~RTF_LLINFO;
 		if (ln->ln_hold)
 			m_freem(ln->ln_hold);
 		Free((caddr_t)ln);
 	}
 }
 
 int
 nd6_ioctl(cmd, data, ifp)
 	u_long cmd;
 	caddr_t	data;
 	struct ifnet *ifp;
 {
 	struct in6_drlist *drl = (struct in6_drlist *)data;
 	struct in6_prlist *prl = (struct in6_prlist *)data;
 	struct in6_ndireq *ndi = (struct in6_ndireq *)data;
 	struct in6_nbrinfo *nbi = (struct in6_nbrinfo *)data;
 	struct in6_ndifreq *ndif = (struct in6_ndifreq *)data;
 	struct nd_defrouter *dr, any;
 	struct nd_prefix *pr;
 	struct rtentry *rt;
 	int i = 0, error = 0;
 	int s;
 
 	switch (cmd) {
 	case SIOCGDRLST_IN6:
 		/*
 		 * obsolete API, use sysctl under net.inet6.icmp6
 		 */
 		bzero(drl, sizeof(*drl));
 		s = splnet();
 		dr = TAILQ_FIRST(&nd_defrouter);
 		while (dr && i < DRLSTSIZ) {
 			drl->defrouter[i].rtaddr = dr->rtaddr;
 			if (IN6_IS_ADDR_LINKLOCAL(&drl->defrouter[i].rtaddr)) {
 				/* XXX: need to this hack for KAME stack */
 				drl->defrouter[i].rtaddr.s6_addr16[1] = 0;
 			} else
 				log(LOG_ERR,
 				    "default router list contains a "
 				    "non-linklocal address(%s)\n",
 				    ip6_sprintf(&drl->defrouter[i].rtaddr));
 
 			drl->defrouter[i].flags = dr->flags;
 			drl->defrouter[i].rtlifetime = dr->rtlifetime;
 			drl->defrouter[i].expire = dr->expire;
 			drl->defrouter[i].if_index = dr->ifp->if_index;
 			i++;
 			dr = TAILQ_NEXT(dr, dr_entry);
 		}
 		splx(s);
 		break;
 	case SIOCGPRLST_IN6:
 		/*
 		 * obsolete API, use sysctl under net.inet6.icmp6
 		 */
 		/*
 		 * XXX meaning of fields, especialy "raflags", is very
 		 * differnet between RA prefix list and RR/static prefix list.
 		 * how about separating ioctls into two?
 		 */
 		bzero(prl, sizeof(*prl));
 		s = splnet();
 		pr = nd_prefix.lh_first;
 		while (pr && i < PRLSTSIZ) {
 			struct nd_pfxrouter *pfr;
 			int j;
 
 			(void)in6_embedscope(&prl->prefix[i].prefix,
 			    &pr->ndpr_prefix, NULL, NULL);
 			prl->prefix[i].raflags = pr->ndpr_raf;
 			prl->prefix[i].prefixlen = pr->ndpr_plen;
 			prl->prefix[i].vltime = pr->ndpr_vltime;
 			prl->prefix[i].pltime = pr->ndpr_pltime;
 			prl->prefix[i].if_index = pr->ndpr_ifp->if_index;
 			prl->prefix[i].expire = pr->ndpr_expire;
 
 			pfr = pr->ndpr_advrtrs.lh_first;
 			j = 0;
 			while (pfr) {
 				if (j < DRLSTSIZ) {
 #define RTRADDR prl->prefix[i].advrtr[j]
 					RTRADDR = pfr->router->rtaddr;
 					if (IN6_IS_ADDR_LINKLOCAL(&RTRADDR)) {
 						/* XXX: hack for KAME */
 						RTRADDR.s6_addr16[1] = 0;
 					} else
 						log(LOG_ERR,
 						    "a router(%s) advertises "
 						    "a prefix with "
 						    "non-link local address\n",
 						    ip6_sprintf(&RTRADDR));
 #undef RTRADDR
 				}
 				j++;
 				pfr = pfr->pfr_next;
 			}
 			prl->prefix[i].advrtrs = j;
 			prl->prefix[i].origin = PR_ORIG_RA;
 
 			i++;
 			pr = pr->ndpr_next;
 		}
 	      {
 		struct rr_prefix *rpp;
 
 		for (rpp = LIST_FIRST(&rr_prefix); rpp;
 		     rpp = LIST_NEXT(rpp, rp_entry)) {
 			if (i >= PRLSTSIZ)
 				break;
 			(void)in6_embedscope(&prl->prefix[i].prefix,
 			    &pr->ndpr_prefix, NULL, NULL);
 			prl->prefix[i].raflags = rpp->rp_raf;
 			prl->prefix[i].prefixlen = rpp->rp_plen;
 			prl->prefix[i].vltime = rpp->rp_vltime;
 			prl->prefix[i].pltime = rpp->rp_pltime;
 			prl->prefix[i].if_index = rpp->rp_ifp->if_index;
 			prl->prefix[i].expire = rpp->rp_expire;
 			prl->prefix[i].advrtrs = 0;
 			prl->prefix[i].origin = rpp->rp_origin;
 			i++;
 		}
 	      }
 		splx(s);
 
 		break;
 	case OSIOCGIFINFO_IN6:
 		if (!nd_ifinfo || i >= nd_ifinfo_indexlim) {
 			error = EINVAL;
 			break;
 		}
 		ndi->ndi.linkmtu = nd_ifinfo[ifp->if_index].linkmtu;
 		ndi->ndi.maxmtu = nd_ifinfo[ifp->if_index].maxmtu;
 		ndi->ndi.basereachable =
 		    nd_ifinfo[ifp->if_index].basereachable;
 		ndi->ndi.reachable = nd_ifinfo[ifp->if_index].reachable;
 		ndi->ndi.retrans = nd_ifinfo[ifp->if_index].retrans;
 		ndi->ndi.flags = nd_ifinfo[ifp->if_index].flags;
 		ndi->ndi.recalctm = nd_ifinfo[ifp->if_index].recalctm;
 		ndi->ndi.chlim = nd_ifinfo[ifp->if_index].chlim;
 		ndi->ndi.receivedra = nd_ifinfo[ifp->if_index].receivedra;
 		break;
 	case SIOCGIFINFO_IN6:
 		if (!nd_ifinfo || i >= nd_ifinfo_indexlim) {
 			error = EINVAL;
 			break;
 		}
 		ndi->ndi = nd_ifinfo[ifp->if_index];
 		break;
 	case SIOCSIFINFO_FLAGS:
 		/* XXX: almost all other fields of ndi->ndi is unused */
 		if (!nd_ifinfo || i >= nd_ifinfo_indexlim) {
 			error = EINVAL;
 			break;
 		}
 		nd_ifinfo[ifp->if_index].flags = ndi->ndi.flags;
 		break;
 	case SIOCSNDFLUSH_IN6:	/* XXX: the ioctl name is confusing... */
 		/* flush default router list */
 		/*
 		 * xxx sumikawa: should not delete route if default
 		 * route equals to the top of default router list
 		 */
 		bzero(&any, sizeof(any));
 		defrouter_delreq(&any, 0);
 		defrouter_select();
 		/* xxx sumikawa: flush prefix list */
 		break;
 	case SIOCSPFXFLUSH_IN6:
 	    {
 		/* flush all the prefix advertised by routers */
 		struct nd_prefix *pr, *next;
 
 		s = splnet();
 		for (pr = nd_prefix.lh_first; pr; pr = next) {
 			struct in6_ifaddr *ia, *ia_next;
 
 			next = pr->ndpr_next;
 
 			if (IN6_IS_ADDR_LINKLOCAL(&pr->ndpr_prefix.sin6_addr))
 				continue; /* XXX */
 
 			/* do we really have to remove addresses as well? */
 			for (ia = in6_ifaddr; ia; ia = ia_next) {
 				/* ia might be removed.  keep the next ptr. */
 				ia_next = ia->ia_next;
 
 				if ((ia->ia6_flags & IN6_IFF_AUTOCONF) == 0)
 					continue;
 
 				if (ia->ia6_ndpr == pr)
 					in6_purgeaddr(&ia->ia_ifa);
 			}
 			prelist_remove(pr);
 		}
 		splx(s);
 		break;
 	    }
 	case SIOCSRTRFLUSH_IN6:
 	    {
 		/* flush all the default routers */
 		struct nd_defrouter *dr, *next;
 
 		s = splnet();
 		if ((dr = TAILQ_FIRST(&nd_defrouter)) != NULL) {
 			/*
 			 * The first entry of the list may be stored in
 			 * the routing table, so we'll delete it later.
 			 */
 			for (dr = TAILQ_NEXT(dr, dr_entry); dr; dr = next) {
 				next = TAILQ_NEXT(dr, dr_entry);
 				defrtrlist_del(dr);
 			}
 			defrtrlist_del(TAILQ_FIRST(&nd_defrouter));
 		}
 		splx(s);
 		break;
 	    }
 	case SIOCGNBRINFO_IN6:
 	    {
 		struct llinfo_nd6 *ln;
 		struct in6_addr nb_addr = nbi->addr; /* make local for safety */
 
 		/*
 		 * XXX: KAME specific hack for scoped addresses
 		 *      XXXX: for other scopes than link-local?
 		 */
 		if (IN6_IS_ADDR_LINKLOCAL(&nbi->addr) ||
 		    IN6_IS_ADDR_MC_LINKLOCAL(&nbi->addr)) {
 			u_int16_t *idp = (u_int16_t *)&nb_addr.s6_addr[2];
 
 			if (*idp == 0)
 				*idp = htons(ifp->if_index);
 		}
 
 		s = splnet();
 		if ((rt = nd6_lookup(&nb_addr, 0, ifp)) == NULL) {
 			error = EINVAL;
 			splx(s);
 			break;
 		}
 		ln = (struct llinfo_nd6 *)rt->rt_llinfo;
 		nbi->state = ln->ln_state;
 		nbi->asked = ln->ln_asked;
 		nbi->isrouter = ln->ln_router;
 		nbi->expire = ln->ln_expire;
 		splx(s);
 		
 		break;
 	    }
 	case SIOCGDEFIFACE_IN6:	/* XXX: should be implemented as a sysctl? */
 		ndif->ifindex = nd6_defifindex;
 		break;
 	case SIOCSDEFIFACE_IN6:	/* XXX: should be implemented as a sysctl? */
 		return(nd6_setdefaultiface(ndif->ifindex));
 		break;
 	}
 	return(error);
 }
 
 /*
  * Create neighbor cache entry and cache link-layer address,
  * on reception of inbound ND6 packets. (RS/RA/NS/redirect)
  */
 struct rtentry *
 nd6_cache_lladdr(ifp, from, lladdr, lladdrlen, type, code)
 	struct ifnet *ifp;
 	struct in6_addr *from;
 	char *lladdr;
 	int lladdrlen;
 	int type;	/* ICMP6 type */
 	int code;	/* type dependent information */
 {
 	struct rtentry *rt = NULL;
 	struct llinfo_nd6 *ln = NULL;
 	int is_newentry;
 	struct sockaddr_dl *sdl = NULL;
 	int do_update;
 	int olladdr;
 	int llchange;
 	int newstate = 0;
 
 	if (!ifp)
 		panic("ifp == NULL in nd6_cache_lladdr");
 	if (!from)
 		panic("from == NULL in nd6_cache_lladdr");
 
 	/* nothing must be updated for unspecified address */
 	if (IN6_IS_ADDR_UNSPECIFIED(from))
 		return NULL;
 
 	/*
 	 * Validation about ifp->if_addrlen and lladdrlen must be done in
 	 * the caller.
 	 *
 	 * XXX If the link does not have link-layer adderss, what should
 	 * we do? (ifp->if_addrlen == 0)
 	 * Spec says nothing in sections for RA, RS and NA.  There's small
 	 * description on it in NS section (RFC 2461 7.2.3).
 	 */
 
 	rt = nd6_lookup(from, 0, ifp);
 	if (!rt) {
 #if 0
 		/* nothing must be done if there's no lladdr */
 		if (!lladdr || !lladdrlen)
 			return NULL;
 #endif
 
 		rt = nd6_lookup(from, 1, ifp);
 		is_newentry = 1;
 	} else {
 		/* do nothing if static ndp is set */
 		if (rt->rt_flags & RTF_STATIC)
 			return NULL;
 		is_newentry = 0;
 	}
 
 	if (!rt)
 		return NULL;
 	if ((rt->rt_flags & (RTF_GATEWAY | RTF_LLINFO)) != RTF_LLINFO) {
 fail:
 		(void)nd6_free(rt);
 		return NULL;
 	}
 	ln = (struct llinfo_nd6 *)rt->rt_llinfo;
 	if (!ln)
 		goto fail;
 	if (!rt->rt_gateway)
 		goto fail;
 	if (rt->rt_gateway->sa_family != AF_LINK)
 		goto fail;
 	sdl = SDL(rt->rt_gateway);
 
 	olladdr = (sdl->sdl_alen) ? 1 : 0;
 	if (olladdr && lladdr) {
 		if (bcmp(lladdr, LLADDR(sdl), ifp->if_addrlen))
 			llchange = 1;
 		else
 			llchange = 0;
 	} else
 		llchange = 0;
 
 	/*
 	 * newentry olladdr  lladdr  llchange	(*=record)
 	 *	0	n	n	--	(1)
 	 *	0	y	n	--	(2)
 	 *	0	n	y	--	(3) * STALE
 	 *	0	y	y	n	(4) *
 	 *	0	y	y	y	(5) * STALE
 	 *	1	--	n	--	(6)   NOSTATE(= PASSIVE)
 	 *	1	--	y	--	(7) * STALE
 	 */
 
 	if (lladdr) {		/* (3-5) and (7) */
 		/*
 		 * Record source link-layer address
 		 * XXX is it dependent to ifp->if_type?
 		 */
 		sdl->sdl_alen = ifp->if_addrlen;
 		bcopy(lladdr, LLADDR(sdl), ifp->if_addrlen);
 	}
 
 	if (!is_newentry) {
 		if ((!olladdr && lladdr)		/* (3) */
 		 || (olladdr && lladdr && llchange)) {	/* (5) */
 			do_update = 1;
 			newstate = ND6_LLINFO_STALE;
 		} else					/* (1-2,4) */
 			do_update = 0;
 	} else {
 		do_update = 1;
 		if (!lladdr)				/* (6) */
 			newstate = ND6_LLINFO_NOSTATE;
 		else					/* (7) */
 			newstate = ND6_LLINFO_STALE;
 	}
 
 	if (do_update) {
 		/*
 		 * Update the state of the neighbor cache.
 		 */
 		ln->ln_state = newstate;
 
 		if (ln->ln_state == ND6_LLINFO_STALE) {
 			/*
 			 * XXX: since nd6_output() below will cause
 			 * state tansition to DELAY and reset the timer,
 			 * we must set the timer now, although it is actually
 			 * meaningless.
 			 */
 			ln->ln_expire = time_second + nd6_gctimer;
 
 			if (ln->ln_hold) {
 				/*
 				 * we assume ifp is not a p2p here, so just
 				 * set the 2nd argument as the 1st one.
 				 */
 				nd6_output(ifp, ifp, ln->ln_hold,
 					   (struct sockaddr_in6 *)rt_key(rt),
 					   rt);
 				ln->ln_hold = NULL;
 			}
 		} else if (ln->ln_state == ND6_LLINFO_INCOMPLETE) {
 			/* probe right away */
 			ln->ln_expire = time_second;
 		}
 	}
 
 	/*
 	 * ICMP6 type dependent behavior.
 	 *
 	 * NS: clear IsRouter if new entry
 	 * RS: clear IsRouter
 	 * RA: set IsRouter if there's lladdr
 	 * redir: clear IsRouter if new entry
 	 *
 	 * RA case, (1):
 	 * The spec says that we must set IsRouter in the following cases:
 	 * - If lladdr exist, set IsRouter.  This means (1-5).
 	 * - If it is old entry (!newentry), set IsRouter.  This means (7).
 	 * So, based on the spec, in (1-5) and (7) cases we must set IsRouter.
 	 * A quetion arises for (1) case.  (1) case has no lladdr in the
 	 * neighbor cache, this is similar to (6).
 	 * This case is rare but we figured that we MUST NOT set IsRouter.
 	 *
 	 * newentry olladdr  lladdr  llchange	    NS  RS  RA	redir
 	 *							D R
 	 *	0	n	n	--	(1)	c   ?     s
 	 *	0	y	n	--	(2)	c   s     s
 	 *	0	n	y	--	(3)	c   s     s
 	 *	0	y	y	n	(4)	c   s     s
 	 *	0	y	y	y	(5)	c   s     s
 	 *	1	--	n	--	(6) c	c 	c s
 	 *	1	--	y	--	(7) c	c   s	c s
 	 *
 	 *					(c=clear s=set)
 	 */
 	switch (type & 0xff) {
 	case ND_NEIGHBOR_SOLICIT:
 		/*
 		 * New entry must have is_router flag cleared.
 		 */
 		if (is_newentry)	/* (6-7) */
 			ln->ln_router = 0;
 		break;
 	case ND_REDIRECT:
 		/*
 		 * If the icmp is a redirect to a better router, always set the
 		 * is_router flag. Otherwise, if the entry is newly created,
 		 * clear the flag. [RFC 2461, sec 8.3]
 		 */
 		if (code == ND_REDIRECT_ROUTER)
 			ln->ln_router = 1;
 		else if (is_newentry) /* (6-7) */
 			ln->ln_router = 0;
 		break;
 	case ND_ROUTER_SOLICIT:
 		/*
 		 * is_router flag must always be cleared.
 		 */
 		ln->ln_router = 0;
 		break;
 	case ND_ROUTER_ADVERT:
 		/*
 		 * Mark an entry with lladdr as a router.
 		 */
 		if ((!is_newentry && (olladdr || lladdr))	/* (2-5) */
 		 || (is_newentry && lladdr)) {			/* (7) */
 			ln->ln_router = 1;
 		}
 		break;
 	}
 
 	/*
 	 * When the link-layer address of a router changes, select the
 	 * best router again.  In particular, when the neighbor entry is newly
 	 * created, it might affect the selection policy.
 	 * Question: can we restrict the first condition to the "is_newentry"
 	 * case?
 	 * XXX: when we hear an RA from a new router with the link-layer
 	 * address option, defrouter_select() is called twice, since
 	 * defrtrlist_update called the function as well.  However, I believe
 	 * we can compromise the overhead, since it only happens the first
 	 * time.
 	 * XXX: although defrouter_select() should not have a bad effect
 	 * for those are not autoconfigured hosts, we explicitly avoid such
 	 * cases for safety.
 	 */
 	if (do_update && ln->ln_router && !ip6_forwarding && ip6_accept_rtadv)
 		defrouter_select();
 
 	return rt;
 }
 
 static void
 nd6_slowtimo(ignored_arg)
     void *ignored_arg;
 {
 	int s = splnet();
 	int i;
 	struct nd_ifinfo *nd6if;
 
 	callout_reset(&nd6_slowtimo_ch, ND6_SLOWTIMER_INTERVAL * hz,
 	    nd6_slowtimo, NULL);
 	for (i = 1; i < if_index + 1; i++) {
 		if (!nd_ifinfo || i >= nd_ifinfo_indexlim)
 			continue;
 		nd6if = &nd_ifinfo[i];
 		if (nd6if->basereachable && /* already initialized */
 		    (nd6if->recalctm -= ND6_SLOWTIMER_INTERVAL) <= 0) {
 			/*
 			 * Since reachable time rarely changes by router
 			 * advertisements, we SHOULD insure that a new random
 			 * value gets recomputed at least once every few hours.
 			 * (RFC 2461, 6.3.4)
 			 */
 			nd6if->recalctm = nd6_recalc_reachtm_interval;
 			nd6if->reachable = ND_COMPUTE_RTIME(nd6if->basereachable);
 		}
 	}
 	splx(s);
 }
 
 #define senderr(e) { error = (e); goto bad;}
 int
 nd6_output(ifp, origifp, m0, dst, rt0)
 	struct ifnet *ifp;
 	struct ifnet *origifp;
 	struct mbuf *m0;
 	struct sockaddr_in6 *dst;
 	struct rtentry *rt0;
 {
 	struct mbuf *m = m0;
 	struct rtentry *rt = rt0;
 	struct sockaddr_in6 *gw6 = NULL;
 	struct llinfo_nd6 *ln = NULL;
 	int error = 0;
 
 	if (IN6_IS_ADDR_MULTICAST(&dst->sin6_addr))
 		goto sendpkt;
 
 	if (nd6_need_cache(ifp) == 0)
 		goto sendpkt;
 
 	/*
 	 * next hop determination.  This routine is derived from ether_outpout.
 	 */
 	if (rt) {
 		if ((rt->rt_flags & RTF_UP) == 0) {
-			if ((rt0 = rt = rtalloc1((struct sockaddr *)dst, 1, 0UL)) !=
-				NULL)
-			{
+			rt0 = rt = rtalloc1((struct sockaddr *)dst, 1, 0UL);
+			if (rt != NULL) {
 				rt->rt_refcnt--;
+				RT_UNLOCK(rt);
 				if (rt->rt_ifp != ifp) {
 					/* XXX: loop care? */
 					return nd6_output(ifp, origifp, m0,
 							  dst, rt);
 				}
 			} else
 				senderr(EHOSTUNREACH);
 		}
 
 		if (rt->rt_flags & RTF_GATEWAY) {
 			gw6 = (struct sockaddr_in6 *)rt->rt_gateway;
 
 			/*
 			 * We skip link-layer address resolution and NUD
 			 * if the gateway is not a neighbor from ND point
 			 * of view, regardless of the value of nd_ifinfo.flags.
 			 * The second condition is a bit tricky; we skip
 			 * if the gateway is our own address, which is
 			 * sometimes used to install a route to a p2p link.
 			 */
 			if (!nd6_is_addr_neighbor(gw6, ifp) ||
 			    in6ifa_ifpwithaddr(ifp, &gw6->sin6_addr)) {
 				/*
 				 * We allow this kind of tricky route only
 				 * when the outgoing interface is p2p.
 				 * XXX: we may need a more generic rule here.
 				 */
 				if ((ifp->if_flags & IFF_POINTOPOINT) == 0)
 					senderr(EHOSTUNREACH);
 
 				goto sendpkt;
 			}
 
 			if (rt->rt_gwroute == 0)
 				goto lookup;
 			if (((rt = rt->rt_gwroute)->rt_flags & RTF_UP) == 0) {
 				rtfree(rt); rt = rt0;
 			lookup: rt->rt_gwroute = rtalloc1(rt->rt_gateway, 1, 0UL);
 				if ((rt = rt->rt_gwroute) == 0)
 					senderr(EHOSTUNREACH);
+				RT_UNLOCK(rt);
 			}
 		}
 	}
 
 	/*
 	 * Address resolution or Neighbor Unreachability Detection
 	 * for the next hop.
 	 * At this point, the destination of the packet must be a unicast
 	 * or an anycast address(i.e. not a multicast).
 	 */
 
 	/* Look up the neighbor cache for the nexthop */
 	if (rt && (rt->rt_flags & RTF_LLINFO) != 0)
 		ln = (struct llinfo_nd6 *)rt->rt_llinfo;
 	else {
 		/*
 		 * Since nd6_is_addr_neighbor() internally calls nd6_lookup(),
 		 * the condition below is not very efficient.  But we believe
 		 * it is tolerable, because this should be a rare case.
 		 */
 		if (nd6_is_addr_neighbor(dst, ifp) &&
 		    (rt = nd6_lookup(&dst->sin6_addr, 1, ifp)) != NULL)
 			ln = (struct llinfo_nd6 *)rt->rt_llinfo;
 	}
 	if (!ln || !rt) {
 		if ((ifp->if_flags & IFF_POINTOPOINT) == 0 &&
 		    !(nd_ifinfo[ifp->if_index].flags & ND6_IFF_PERFORMNUD)) {
 			log(LOG_DEBUG,
 			    "nd6_output: can't allocate llinfo for %s "
 			    "(ln=%p, rt=%p)\n",
 			    ip6_sprintf(&dst->sin6_addr), ln, rt);
 			senderr(EIO);	/* XXX: good error? */
 		}
 
 		goto sendpkt;	/* send anyway */
 	}
 
 	/* We don't have to do link-layer address resolution on a p2p link. */
 	if ((ifp->if_flags & IFF_POINTOPOINT) != 0 &&
 	    ln->ln_state < ND6_LLINFO_REACHABLE) {
 		ln->ln_state = ND6_LLINFO_STALE;
 		ln->ln_expire = time_second + nd6_gctimer;
 	}
 
 	/*
 	 * The first time we send a packet to a neighbor whose entry is
 	 * STALE, we have to change the state to DELAY and a sets a timer to
 	 * expire in DELAY_FIRST_PROBE_TIME seconds to ensure do
 	 * neighbor unreachability detection on expiration.
 	 * (RFC 2461 7.3.3)
 	 */
 	if (ln->ln_state == ND6_LLINFO_STALE) {
 		ln->ln_asked = 0;
 		ln->ln_state = ND6_LLINFO_DELAY;
 		ln->ln_expire = time_second + nd6_delay;
 	}
 
 	/*
 	 * If the neighbor cache entry has a state other than INCOMPLETE
 	 * (i.e. its link-layer address is already resolved), just
 	 * send the packet.
 	 */
 	if (ln->ln_state > ND6_LLINFO_INCOMPLETE)
 		goto sendpkt;
 
 	/*
 	 * There is a neighbor cache entry, but no ethernet address
 	 * response yet.  Replace the held mbuf (if any) with this
 	 * latest one.
 	 *
 	 * This code conforms to the rate-limiting rule described in Section
 	 * 7.2.2 of RFC 2461, because the timer is set correctly after sending
 	 * an NS below.
 	 */
 	if (ln->ln_state == ND6_LLINFO_NOSTATE)
 		ln->ln_state = ND6_LLINFO_INCOMPLETE;
 	if (ln->ln_hold)
 		m_freem(ln->ln_hold);
 	ln->ln_hold = m;
 	if (ln->ln_expire) {
 		if (ln->ln_asked < nd6_mmaxtries &&
 		    ln->ln_expire < time_second) {
 			ln->ln_asked++;
 			ln->ln_expire = time_second +
 				nd_ifinfo[ifp->if_index].retrans / 1000;
 			nd6_ns_output(ifp, NULL, &dst->sin6_addr, ln, 0);
 		}
 	}
 	return(0);
 	
   sendpkt:
 
 #ifdef MAC
 	mac_create_mbuf_linklayer(ifp, m);
 #endif
 	if ((ifp->if_flags & IFF_LOOPBACK) != 0) {
 		return((*ifp->if_output)(origifp, m, (struct sockaddr *)dst,
 					 rt));
 	}
 	return((*ifp->if_output)(ifp, m, (struct sockaddr *)dst, rt));
 
   bad:
 	if (m)
 		m_freem(m);
 	return (error);
 }	
 #undef senderr
 
 int
 nd6_need_cache(ifp)
 	struct ifnet *ifp;
 {
 	/*
 	 * XXX: we currently do not make neighbor cache on any interface
 	 * other than ARCnet, Ethernet, FDDI and GIF.
 	 *
 	 * RFC2893 says:
 	 * - unidirectional tunnels needs no ND
 	 */
 	switch (ifp->if_type) {
 	case IFT_ARCNET:
 	case IFT_ETHER:
 	case IFT_FDDI:
 	case IFT_IEEE1394:
 #ifdef IFT_L2VLAN
 	case IFT_L2VLAN:
 #endif
 #ifdef IFT_IEEE80211
 	case IFT_IEEE80211:
 #endif
 	case IFT_GIF:		/* XXX need more cases? */
 		return(1);
 	default:
 		return(0);
 	}
 }
 
 int
 nd6_storelladdr(ifp, rt, m, dst, desten)
 	struct ifnet *ifp;
 	struct rtentry *rt;
 	struct mbuf *m;
 	struct sockaddr *dst;
 	u_char *desten;
 {
 	int i;
 	struct sockaddr_dl *sdl;
 
 	if (m->m_flags & M_MCAST) {
 		switch (ifp->if_type) {
 		case IFT_ETHER:
 		case IFT_FDDI:
 #ifdef IFT_L2VLAN
 	case IFT_L2VLAN:
 #endif
 #ifdef IFT_IEEE80211
 		case IFT_IEEE80211:
 #endif
 		case IFT_ISO88025:
 			ETHER_MAP_IPV6_MULTICAST(&SIN6(dst)->sin6_addr,
 						 desten);
 			return(1);
 		case IFT_IEEE1394:
 			/*
 			 * netbsd can use if_broadcastaddr, but we don't do so
 			 * to reduce # of ifdef.
 			 */
 			for (i = 0; i < ifp->if_addrlen; i++)
 				desten[i] = ~0;
 			return(1);
 		case IFT_ARCNET:
 			*desten = 0;
 			return(1);
 		default:
 			m_freem(m);
 			return(0);
 		}
 	}
 
 	if (rt == NULL) {
 		/* this could happen, if we could not allocate memory */
 		m_freem(m);
 		return(0);
 	}
 	if (rt->rt_gateway->sa_family != AF_LINK) {
 		printf("nd6_storelladdr: something odd happens\n");
 		m_freem(m);
 		return(0);
 	}
 	sdl = SDL(rt->rt_gateway);
 	if (sdl->sdl_alen == 0) {
 		/* this should be impossible, but we bark here for debugging */
 		printf("nd6_storelladdr: sdl_alen == 0\n");
 		m_freem(m);
 		return(0);
 	}
 
 	bcopy(LLADDR(sdl), desten, sdl->sdl_alen);
 	return(1);
 }
 
 static int nd6_sysctl_drlist(SYSCTL_HANDLER_ARGS);
 static int nd6_sysctl_prlist(SYSCTL_HANDLER_ARGS);
 #ifdef SYSCTL_DECL
 SYSCTL_DECL(_net_inet6_icmp6);
 #endif
 SYSCTL_NODE(_net_inet6_icmp6, ICMPV6CTL_ND6_DRLIST, nd6_drlist,
 	CTLFLAG_RD, nd6_sysctl_drlist, "");
 SYSCTL_NODE(_net_inet6_icmp6, ICMPV6CTL_ND6_PRLIST, nd6_prlist,
 	CTLFLAG_RD, nd6_sysctl_prlist, "");
 
 static int
 nd6_sysctl_drlist(SYSCTL_HANDLER_ARGS)
 {
 	int error;
 	char buf[1024];
 	struct in6_defrouter *d, *de;
 	struct nd_defrouter *dr;
 
 	if (req->newptr)
 		return EPERM;
 	error = 0;
 
 	for (dr = TAILQ_FIRST(&nd_defrouter);
 	     dr;
 	     dr = TAILQ_NEXT(dr, dr_entry)) {
 		d = (struct in6_defrouter *)buf;
 		de = (struct in6_defrouter *)(buf + sizeof(buf));
 
 		if (d + 1 <= de) {
 			bzero(d, sizeof(*d));
 			d->rtaddr.sin6_family = AF_INET6;
 			d->rtaddr.sin6_len = sizeof(d->rtaddr);
 			if (in6_recoverscope(&d->rtaddr, &dr->rtaddr,
 			    dr->ifp) != 0)
 				log(LOG_ERR,
 				    "scope error in "
 				    "default router list (%s)\n",
 				    ip6_sprintf(&dr->rtaddr));
 			d->flags = dr->flags;
 			d->rtlifetime = dr->rtlifetime;
 			d->expire = dr->expire;
 			d->if_index = dr->ifp->if_index;
 		} else
 			panic("buffer too short");
 
 		error = SYSCTL_OUT(req, buf, sizeof(*d));
 		if (error)
 			break;
 	}
 	return error;
 }
 
 static int
 nd6_sysctl_prlist(SYSCTL_HANDLER_ARGS)
 {
 	int error;
 	char buf[1024];
 	struct in6_prefix *p, *pe;
 	struct nd_prefix *pr;
 
 	if (req->newptr)
 		return EPERM;
 	error = 0;
 
 	for (pr = nd_prefix.lh_first; pr; pr = pr->ndpr_next) {
 		u_short advrtrs;
 		size_t advance;
 		struct sockaddr_in6 *sin6, *s6;
 		struct nd_pfxrouter *pfr;
 
 		p = (struct in6_prefix *)buf;
 		pe = (struct in6_prefix *)(buf + sizeof(buf));
 
 		if (p + 1 <= pe) {
 			bzero(p, sizeof(*p));
 			sin6 = (struct sockaddr_in6 *)(p + 1);
 
 			p->prefix = pr->ndpr_prefix;
 			if (in6_recoverscope(&p->prefix,
 			    &p->prefix.sin6_addr, pr->ndpr_ifp) != 0)
 				log(LOG_ERR,
 				    "scope error in prefix list (%s)\n",
 				    ip6_sprintf(&p->prefix.sin6_addr));
 			p->raflags = pr->ndpr_raf;
 			p->prefixlen = pr->ndpr_plen;
 			p->vltime = pr->ndpr_vltime;
 			p->pltime = pr->ndpr_pltime;
 			p->if_index = pr->ndpr_ifp->if_index;
 			p->expire = pr->ndpr_expire;
 			p->refcnt = pr->ndpr_refcnt;
 			p->flags = pr->ndpr_stateflags;
 			p->origin = PR_ORIG_RA;
 			advrtrs = 0;
 			for (pfr = pr->ndpr_advrtrs.lh_first;
 			     pfr;
 			     pfr = pfr->pfr_next) {
 				if ((void *)&sin6[advrtrs + 1] >
 				    (void *)pe) {
 					advrtrs++;
 					continue;
 				}
 				s6 = &sin6[advrtrs];
 				bzero(s6, sizeof(*s6));
 				s6->sin6_family = AF_INET6;
 				s6->sin6_len = sizeof(*sin6);
 				if (in6_recoverscope(s6,
 				    &pfr->router->rtaddr,
 				    pfr->router->ifp) != 0)
 					log(LOG_ERR,
 					    "scope error in "
 					    "prefix list (%s)\n",
 					    ip6_sprintf(&pfr->router->rtaddr));
 				advrtrs++;
 			}
 			p->advrtrs = advrtrs;
 		} else 
 			panic("buffer too short");
 
 		advance = sizeof(*p) + sizeof(*sin6) * advrtrs;
 		error = SYSCTL_OUT(req, buf, advance);
 		if (error)
 			break;
 	}
 	return error;
 }
Index: head/sys/netinet6/nd6_rtr.c
===================================================================
--- head/sys/netinet6/nd6_rtr.c	(revision 120726)
+++ head/sys/netinet6/nd6_rtr.c	(revision 120727)
@@ -1,1989 +1,1990 @@
 /*	$FreeBSD$	*/
 /*	$KAME: nd6_rtr.c,v 1.111 2001/04/27 01:37:15 jinmei Exp $	*/
 
 /*
  * Copyright (C) 1995, 1996, 1997, and 1998 WIDE Project.
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 3. Neither the name of the project nor the names of its contributors
  *    may be used to endorse or promote products derived from this software
  *    without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE PROJECT AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE PROJECT OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  */
 
 #include "opt_inet.h"
 #include "opt_inet6.h"
 
 #include <sys/param.h>
 #include <sys/systm.h>
 #include <sys/malloc.h>
 #include <sys/mbuf.h>
 #include <sys/socket.h>
 #include <sys/sockio.h>
 #include <sys/time.h>
 #include <sys/kernel.h>
 #include <sys/errno.h>
 #include <sys/syslog.h>
 #include <sys/queue.h>
 
 #include <net/if.h>
 #include <net/if_types.h>
 #include <net/if_dl.h>
 #include <net/route.h>
 #include <net/radix.h>
 
 #include <netinet/in.h>
 #include <netinet6/in6_var.h>
 #include <netinet6/in6_ifattach.h>
 #include <netinet/ip6.h>
 #include <netinet6/ip6_var.h>
 #include <netinet6/nd6.h>
 #include <netinet/icmp6.h>
 #include <netinet6/scope6_var.h>
 
 #include <net/net_osdep.h>
 
 #define SDL(s)	((struct sockaddr_dl *)s)
 
 static struct nd_defrouter *defrtrlist_update __P((struct nd_defrouter *));
 static struct in6_ifaddr *in6_ifadd __P((struct nd_prefix *,
 	struct in6_addr *));
 static struct nd_pfxrouter *pfxrtr_lookup __P((struct nd_prefix *,
 	struct nd_defrouter *));
 static void pfxrtr_add __P((struct nd_prefix *, struct nd_defrouter *));
 static void pfxrtr_del __P((struct nd_pfxrouter *));
 static struct nd_pfxrouter *find_pfxlist_reachable_router
 	__P((struct nd_prefix *));
 static void defrouter_addifreq __P((struct ifnet *));
 static void nd6_rtmsg __P((int, struct rtentry *));
 
 static void in6_init_address_ltimes __P((struct nd_prefix *ndpr,
 					 struct in6_addrlifetime *lt6));
 
 static int rt6_deleteroute __P((struct radix_node *, void *));
 
 extern int nd6_recalc_reachtm_interval;
 
 static struct ifnet *nd6_defifp;
 int nd6_defifindex;
 
 int ip6_use_tempaddr = 0;
 
 int ip6_desync_factor;
 u_int32_t ip6_temp_preferred_lifetime = DEF_TEMP_PREFERRED_LIFETIME;
 u_int32_t ip6_temp_valid_lifetime = DEF_TEMP_VALID_LIFETIME;
 /*
  * shorter lifetimes for debugging purposes.
 int ip6_temp_preferred_lifetime = 800;
 static int ip6_temp_valid_lifetime = 1800;
 */
 int ip6_temp_regen_advance = TEMPADDR_REGEN_ADVANCE;
 
 /*
  * Receive Router Solicitation Message - just for routers.
  * Router solicitation/advertisement is mostly managed by userland program
  * (rtadvd) so here we have no function like nd6_ra_output().
  *
  * Based on RFC 2461
  */
 void
 nd6_rs_input(m, off, icmp6len)
 	struct	mbuf *m;
 	int off, icmp6len;
 {
 	struct ifnet *ifp = m->m_pkthdr.rcvif;
 	struct ip6_hdr *ip6 = mtod(m, struct ip6_hdr *);
 	struct nd_router_solicit *nd_rs;
 	struct in6_addr saddr6 = ip6->ip6_src;
 #if 0
 	struct in6_addr daddr6 = ip6->ip6_dst;
 #endif
 	char *lladdr = NULL;
 	int lladdrlen = 0;
 #if 0
 	struct sockaddr_dl *sdl = (struct sockaddr_dl *)NULL;
 	struct llinfo_nd6 *ln = (struct llinfo_nd6 *)NULL;
 	struct rtentry *rt = NULL;
 	int is_newentry;
 #endif
 	union nd_opts ndopts;
 
 	/* If I'm not a router, ignore it. */
 	if (ip6_accept_rtadv != 0 || ip6_forwarding != 1)
 		goto freeit;
 
 	/* Sanity checks */
 	if (ip6->ip6_hlim != 255) {
 		nd6log((LOG_ERR,
 		    "nd6_rs_input: invalid hlim (%d) from %s to %s on %s\n",
 		    ip6->ip6_hlim, ip6_sprintf(&ip6->ip6_src),
 		    ip6_sprintf(&ip6->ip6_dst), if_name(ifp)));
 		goto bad;
 	}
 
 	/*
 	 * Don't update the neighbor cache, if src = ::.
 	 * This indicates that the src has no IP address assigned yet.
 	 */
 	if (IN6_IS_ADDR_UNSPECIFIED(&saddr6))
 		goto freeit;
 
 #ifndef PULLDOWN_TEST
 	IP6_EXTHDR_CHECK(m, off, icmp6len,);
 	nd_rs = (struct nd_router_solicit *)((caddr_t)ip6 + off);
 #else
 	IP6_EXTHDR_GET(nd_rs, struct nd_router_solicit *, m, off, icmp6len);
 	if (nd_rs == NULL) {
 		icmp6stat.icp6s_tooshort++;
 		return;
 	}
 #endif
 
 	icmp6len -= sizeof(*nd_rs);
 	nd6_option_init(nd_rs + 1, icmp6len, &ndopts);
 	if (nd6_options(&ndopts) < 0) {
 		nd6log((LOG_INFO,
 		    "nd6_rs_input: invalid ND option, ignored\n"));
 		/* nd6_options have incremented stats */
 		goto freeit;
 	}
 
 	if (ndopts.nd_opts_src_lladdr) {
 		lladdr = (char *)(ndopts.nd_opts_src_lladdr + 1);
 		lladdrlen = ndopts.nd_opts_src_lladdr->nd_opt_len << 3;
 	}
 
 	if (lladdr && ((ifp->if_addrlen + 2 + 7) & ~7) != lladdrlen) {
 		nd6log((LOG_INFO,
 		    "nd6_rs_input: lladdrlen mismatch for %s "
 		    "(if %d, RS packet %d)\n",
 			ip6_sprintf(&saddr6), ifp->if_addrlen, lladdrlen - 2));
 		goto bad;
 	}
 
 	nd6_cache_lladdr(ifp, &saddr6, lladdr, lladdrlen, ND_ROUTER_SOLICIT, 0);
 
  freeit:
 	m_freem(m);
 	return;
 
  bad:
 	icmp6stat.icp6s_badrs++;
 	m_freem(m);
 }
 
 /*
  * Receive Router Advertisement Message.
  *
  * Based on RFC 2461
  * TODO: on-link bit on prefix information
  * TODO: ND_RA_FLAG_{OTHER,MANAGED} processing
  */
 void
 nd6_ra_input(m, off, icmp6len)
 	struct	mbuf *m;
 	int off, icmp6len;
 {
 	struct ifnet *ifp = m->m_pkthdr.rcvif;
 	struct nd_ifinfo *ndi = &nd_ifinfo[ifp->if_index];
 	struct ip6_hdr *ip6 = mtod(m, struct ip6_hdr *);
 	struct nd_router_advert *nd_ra;
 	struct in6_addr saddr6 = ip6->ip6_src;
 #if 0
 	struct in6_addr daddr6 = ip6->ip6_dst;
 	int flags; /* = nd_ra->nd_ra_flags_reserved; */
 	int is_managed = ((flags & ND_RA_FLAG_MANAGED) != 0);
 	int is_other = ((flags & ND_RA_FLAG_OTHER) != 0);
 #endif
 	union nd_opts ndopts;
 	struct nd_defrouter *dr;
 
 	/*
 	 * We only accept RAs only when
 	 * the system-wide variable allows the acceptance, and
 	 * per-interface variable allows RAs on the receiving interface.
 	 */
 	if (ip6_accept_rtadv == 0)
 		goto freeit;
 	if (!(ndi->flags & ND6_IFF_ACCEPT_RTADV))
 		goto freeit;
 
 	if (ip6->ip6_hlim != 255) {
 		nd6log((LOG_ERR,
 		    "nd6_ra_input: invalid hlim (%d) from %s to %s on %s\n",
 		    ip6->ip6_hlim, ip6_sprintf(&ip6->ip6_src),
 		    ip6_sprintf(&ip6->ip6_dst), if_name(ifp)));
 		goto bad;
 	}
 
 	if (!IN6_IS_ADDR_LINKLOCAL(&saddr6)) {
 		nd6log((LOG_ERR,
 		    "nd6_ra_input: src %s is not link-local\n",
 		    ip6_sprintf(&saddr6)));
 		goto bad;
 	}
 
 #ifndef PULLDOWN_TEST
 	IP6_EXTHDR_CHECK(m, off, icmp6len,);
 	nd_ra = (struct nd_router_advert *)((caddr_t)ip6 + off);
 #else
 	IP6_EXTHDR_GET(nd_ra, struct nd_router_advert *, m, off, icmp6len);
 	if (nd_ra == NULL) {
 		icmp6stat.icp6s_tooshort++;
 		return;
 	}
 #endif
 
 	icmp6len -= sizeof(*nd_ra);
 	nd6_option_init(nd_ra + 1, icmp6len, &ndopts);
 	if (nd6_options(&ndopts) < 0) {
 		nd6log((LOG_INFO,
 		    "nd6_ra_input: invalid ND option, ignored\n"));
 		/* nd6_options have incremented stats */
 		goto freeit;
 	}
 
     {
 	struct nd_defrouter dr0;
 	u_int32_t advreachable = nd_ra->nd_ra_reachable;
 
 	dr0.rtaddr = saddr6;
 	dr0.flags  = nd_ra->nd_ra_flags_reserved;
 	dr0.rtlifetime = ntohs(nd_ra->nd_ra_router_lifetime);
 	dr0.expire = time_second + dr0.rtlifetime;
 	dr0.ifp = ifp;
 	dr0.advint = 0;		/* Mobile IPv6 */
 	dr0.advint_expire = 0;	/* Mobile IPv6 */
 	dr0.advints_lost = 0;	/* Mobile IPv6 */
 	/* unspecified or not? (RFC 2461 6.3.4) */
 	if (advreachable) {
 		advreachable = ntohl(advreachable);
 		if (advreachable <= MAX_REACHABLE_TIME &&
 		    ndi->basereachable != advreachable) {
 			ndi->basereachable = advreachable;
 			ndi->reachable = ND_COMPUTE_RTIME(ndi->basereachable);
 			ndi->recalctm = nd6_recalc_reachtm_interval; /* reset */
 		}
 	}
 	if (nd_ra->nd_ra_retransmit)
 		ndi->retrans = ntohl(nd_ra->nd_ra_retransmit);
 	if (nd_ra->nd_ra_curhoplimit)
 		ndi->chlim = nd_ra->nd_ra_curhoplimit;
 	dr = defrtrlist_update(&dr0);
     }
 
 	/*
 	 * prefix
 	 */
 	if (ndopts.nd_opts_pi) {
 		struct nd_opt_hdr *pt;
 		struct nd_opt_prefix_info *pi = NULL;
 		struct nd_prefix pr;
 
 		for (pt = (struct nd_opt_hdr *)ndopts.nd_opts_pi;
 		     pt <= (struct nd_opt_hdr *)ndopts.nd_opts_pi_end;
 		     pt = (struct nd_opt_hdr *)((caddr_t)pt +
 						(pt->nd_opt_len << 3))) {
 			if (pt->nd_opt_type != ND_OPT_PREFIX_INFORMATION)
 				continue;
 			pi = (struct nd_opt_prefix_info *)pt;
 
 			if (pi->nd_opt_pi_len != 4) {
 				nd6log((LOG_INFO,
 				    "nd6_ra_input: invalid option "
 				    "len %d for prefix information option, "
 				    "ignored\n", pi->nd_opt_pi_len));
 				continue;
 			}
 
 			if (128 < pi->nd_opt_pi_prefix_len) {
 				nd6log((LOG_INFO,
 				    "nd6_ra_input: invalid prefix "
 				    "len %d for prefix information option, "
 				    "ignored\n", pi->nd_opt_pi_prefix_len));
 				continue;
 			}
 
 			if (IN6_IS_ADDR_MULTICAST(&pi->nd_opt_pi_prefix)
 			 || IN6_IS_ADDR_LINKLOCAL(&pi->nd_opt_pi_prefix)) {
 				nd6log((LOG_INFO,
 				    "nd6_ra_input: invalid prefix "
 				    "%s, ignored\n",
 				    ip6_sprintf(&pi->nd_opt_pi_prefix)));
 				continue;
 			}
 
 			/* aggregatable unicast address, rfc2374 */
 			if ((pi->nd_opt_pi_prefix.s6_addr8[0] & 0xe0) == 0x20
 			 && pi->nd_opt_pi_prefix_len != 64) {
 				nd6log((LOG_INFO,
 				    "nd6_ra_input: invalid prefixlen "
 				    "%d for rfc2374 prefix %s, ignored\n",
 				    pi->nd_opt_pi_prefix_len,
 				    ip6_sprintf(&pi->nd_opt_pi_prefix)));
 				continue;
 			}
 
 			bzero(&pr, sizeof(pr));
 			pr.ndpr_prefix.sin6_family = AF_INET6;
 			pr.ndpr_prefix.sin6_len = sizeof(pr.ndpr_prefix);
 			pr.ndpr_prefix.sin6_addr = pi->nd_opt_pi_prefix;
 			pr.ndpr_ifp = (struct ifnet *)m->m_pkthdr.rcvif;
 
 			pr.ndpr_raf_onlink = (pi->nd_opt_pi_flags_reserved &
 					      ND_OPT_PI_FLAG_ONLINK) ? 1 : 0;
 			pr.ndpr_raf_auto = (pi->nd_opt_pi_flags_reserved &
 					    ND_OPT_PI_FLAG_AUTO) ? 1 : 0;
 			pr.ndpr_plen = pi->nd_opt_pi_prefix_len;
 			pr.ndpr_vltime = ntohl(pi->nd_opt_pi_valid_time);
 			pr.ndpr_pltime =
 				ntohl(pi->nd_opt_pi_preferred_time);
 
 			if (in6_init_prefix_ltimes(&pr))
 				continue; /* prefix lifetime init failed */
 
 			(void)prelist_update(&pr, dr, m);
 		}
 	}
 
 	/*
 	 * MTU
 	 */
 	if (ndopts.nd_opts_mtu && ndopts.nd_opts_mtu->nd_opt_mtu_len == 1) {
 		u_int32_t mtu = ntohl(ndopts.nd_opts_mtu->nd_opt_mtu_mtu);
 
 		/* lower bound */
 		if (mtu < IPV6_MMTU) {
 			nd6log((LOG_INFO, "nd6_ra_input: bogus mtu option "
 			    "mtu=%d sent from %s, ignoring\n",
 			    mtu, ip6_sprintf(&ip6->ip6_src)));
 			goto skip;
 		}
 
 		/* upper bound */
 		if (ndi->maxmtu) {
 			if (mtu <= ndi->maxmtu) {
 				int change = (ndi->linkmtu != mtu);
 
 				ndi->linkmtu = mtu;
 				if (change) /* in6_maxmtu may change */
 					in6_setmaxmtu();
 			} else {
 				nd6log((LOG_INFO, "nd6_ra_input: bogus mtu "
 				    "mtu=%d sent from %s; "
 				    "exceeds maxmtu %d, ignoring\n",
 				    mtu, ip6_sprintf(&ip6->ip6_src),
 				    ndi->maxmtu));
 			}
 		} else {
 			nd6log((LOG_INFO, "nd6_ra_input: mtu option "
 			    "mtu=%d sent from %s; maxmtu unknown, "
 			    "ignoring\n",
 			    mtu, ip6_sprintf(&ip6->ip6_src)));
 		}
 	}
 
  skip:
 	
 	/*
 	 * Source link layer address
 	 */
     {
 	char *lladdr = NULL;
 	int lladdrlen = 0;
 	
 	if (ndopts.nd_opts_src_lladdr) {
 		lladdr = (char *)(ndopts.nd_opts_src_lladdr + 1);
 		lladdrlen = ndopts.nd_opts_src_lladdr->nd_opt_len << 3;
 	}
 
 	if (lladdr && ((ifp->if_addrlen + 2 + 7) & ~7) != lladdrlen) {
 		nd6log((LOG_INFO,
 		    "nd6_ra_input: lladdrlen mismatch for %s "
 		    "(if %d, RA packet %d)\n",
 			ip6_sprintf(&saddr6), ifp->if_addrlen, lladdrlen - 2));
 		goto bad;
 	}
 
 	nd6_cache_lladdr(ifp, &saddr6, lladdr, lladdrlen, ND_ROUTER_ADVERT, 0);
 
 	/*
 	 * Installing a link-layer address might change the state of the
 	 * router's neighbor cache, which might also affect our on-link
 	 * detection of adveritsed prefixes.
 	 */
 	pfxlist_onlink_check();
     }
 
  freeit:
 	m_freem(m);
 	return;
 
  bad:
 	icmp6stat.icp6s_badra++;
 	m_freem(m);
 }
 
 /*
  * default router list proccessing sub routines
  */
 
 /* tell the change to user processes watching the routing socket. */
 static void
 nd6_rtmsg(cmd, rt)
 	int cmd;
 	struct rtentry *rt;
 {
 	struct rt_addrinfo info;
 
 	bzero((caddr_t)&info, sizeof(info));
 	info.rti_info[RTAX_DST] = rt_key(rt);
 	info.rti_info[RTAX_GATEWAY] = rt->rt_gateway;
 	info.rti_info[RTAX_NETMASK] = rt_mask(rt);
 	info.rti_info[RTAX_IFP] =
 		(struct sockaddr *)TAILQ_FIRST(&rt->rt_ifp->if_addrlist);
 	info.rti_info[RTAX_IFA] = rt->rt_ifa->ifa_addr;
 
 	rt_missmsg(cmd, &info, rt->rt_flags, 0);
 }
 
 void
 defrouter_addreq(new)
 	struct nd_defrouter *new;
 {
 	struct sockaddr_in6 def, mask, gate;
 	struct rtentry *newrt = NULL;
-	int s;
 
 	Bzero(&def, sizeof(def));
 	Bzero(&mask, sizeof(mask));
 	Bzero(&gate, sizeof(gate));
 
 	def.sin6_len = mask.sin6_len = gate.sin6_len
 		= sizeof(struct sockaddr_in6);
 	def.sin6_family = mask.sin6_family = gate.sin6_family = AF_INET6;
 	gate.sin6_addr = new->rtaddr;
 
-	s = splnet();
 	(void)rtrequest(RTM_ADD, (struct sockaddr *)&def,
 		(struct sockaddr *)&gate, (struct sockaddr *)&mask,
 		RTF_GATEWAY, &newrt);
 	if (newrt) {
+		RT_LOCK(newrt);
 		nd6_rtmsg(RTM_ADD, newrt); /* tell user process */
 		newrt->rt_refcnt--;
+		RT_UNLOCK(newrt);
 	}
-	splx(s);
 	return;
 }
 
 /* Add a route to a given interface as default */
 void
 defrouter_addifreq(ifp)
 	struct ifnet *ifp;
 {
 	struct sockaddr_in6 def, mask;
 	struct ifaddr *ifa;
 	struct rtentry *newrt = NULL;
 	int error, flags;
 
 	bzero(&def, sizeof(def));
 	bzero(&mask, sizeof(mask));
 
 	def.sin6_len = mask.sin6_len = sizeof(struct sockaddr_in6);
 	def.sin6_family = mask.sin6_family = AF_INET6;
 
 	/*
 	 * Search for an ifaddr beloging to the specified interface.
 	 * XXX: An IPv6 address are required to be assigned on the interface.
 	 */
 	if ((ifa = ifaof_ifpforaddr((struct sockaddr *)&def, ifp)) == NULL) {
 		nd6log((LOG_ERR,	/* better error? */
 		    "defrouter_addifreq: failed to find an ifaddr "
 		    "to install a route to interface %s\n",
 		    if_name(ifp)));
 		return;
 	}
 
 	flags = ifa->ifa_flags;
 	error = rtrequest(RTM_ADD, (struct sockaddr *)&def, ifa->ifa_addr,
 			  (struct sockaddr *)&mask, flags, &newrt);
 	if (error != 0) {
 		nd6log((LOG_ERR,
 		    "defrouter_addifreq: failed to install a route to "
 		    "interface %s (errno = %d)\n",
 		    if_name(ifp), error));
-
-		if (newrt)	/* maybe unnecessary, but do it for safety */
-			newrt->rt_refcnt--;
 	} else {
 		if (newrt) {
+			RT_LOCK(newrt);
 			nd6_rtmsg(RTM_ADD, newrt);
 			newrt->rt_refcnt--;
+			RT_UNLOCK(newrt);
 		}
 	}
 }
 
 struct nd_defrouter *
 defrouter_lookup(addr, ifp)
 	struct in6_addr *addr;
 	struct ifnet *ifp;
 {
 	struct nd_defrouter *dr;
 
 	for (dr = TAILQ_FIRST(&nd_defrouter); dr;
 	     dr = TAILQ_NEXT(dr, dr_entry)) {
 		if (dr->ifp == ifp && IN6_ARE_ADDR_EQUAL(addr, &dr->rtaddr))
 			return(dr);
 	}
 
 	return(NULL);		/* search failed */
 }
 
 void
 defrouter_delreq(dr, dofree)
 	struct nd_defrouter *dr;
 	int dofree;
 {
 	struct sockaddr_in6 def, mask, gate;
 	struct rtentry *oldrt = NULL;
 
 	Bzero(&def, sizeof(def));
 	Bzero(&mask, sizeof(mask));
 	Bzero(&gate, sizeof(gate));
 
 	def.sin6_len = mask.sin6_len = gate.sin6_len
 		= sizeof(struct sockaddr_in6);
 	def.sin6_family = mask.sin6_family = gate.sin6_family = AF_INET6;
 	gate.sin6_addr = dr->rtaddr;
 
 	rtrequest(RTM_DELETE, (struct sockaddr *)&def,
 		  (struct sockaddr *)&gate,
 		  (struct sockaddr *)&mask,
 		  RTF_GATEWAY, &oldrt);
 	if (oldrt) {
 		nd6_rtmsg(RTM_DELETE, oldrt);
 		RTFREE(oldrt);
 	}
 
 	if (dofree)		/* XXX: necessary? */
 		free(dr, M_IP6NDP);
 }
 
 void
 defrtrlist_del(dr)
 	struct nd_defrouter *dr;
 {
 	struct nd_defrouter *deldr = NULL;
 	struct nd_prefix *pr;
 
 	/*
 	 * Flush all the routing table entries that use the router
 	 * as a next hop.
 	 */
 	if (!ip6_forwarding && ip6_accept_rtadv) {
 		/* above is a good condition? */
 		rt6_flush(&dr->rtaddr, dr->ifp);
 	}
 
 	if (dr == TAILQ_FIRST(&nd_defrouter))
 		deldr = dr;	/* The router is primary. */
 
 	TAILQ_REMOVE(&nd_defrouter, dr, dr_entry);
 
 	/*
 	 * Also delete all the pointers to the router in each prefix lists.
 	 */
 	for (pr = nd_prefix.lh_first; pr; pr = pr->ndpr_next) {
 		struct nd_pfxrouter *pfxrtr;
 		if ((pfxrtr = pfxrtr_lookup(pr, dr)) != NULL)
 			pfxrtr_del(pfxrtr);
 	}
 	pfxlist_onlink_check();
 
 	/*
 	 * If the router is the primary one, choose a new one.
 	 * Note that defrouter_select() will remove the current gateway
 	 * from the routing table.
 	 */
 	if (deldr)
 		defrouter_select();
 
 	free(dr, M_IP6NDP);
 }
 
 /*
  * Default Router Selection according to Section 6.3.6 of RFC 2461:
  * 1) Routers that are reachable or probably reachable should be
  *    preferred.
  * 2) When no routers on the list are known to be reachable or
  *    probably reachable, routers SHOULD be selected in a round-robin
  *    fashion.
  * 3) If the Default Router List is empty, assume that all
  *    destinations are on-link.
  */
 void
 defrouter_select()
 {
 	int s = splnet();
 	struct nd_defrouter *dr, anydr;
 	struct rtentry *rt = NULL;
 	struct llinfo_nd6 *ln = NULL;
 
 	/*
 	 * Search for a (probably) reachable router from the list.
 	 */
 	for (dr = TAILQ_FIRST(&nd_defrouter); dr;
 	     dr = TAILQ_NEXT(dr, dr_entry)) {
 		if ((rt = nd6_lookup(&dr->rtaddr, 0, dr->ifp)) &&
 		    (ln = (struct llinfo_nd6 *)rt->rt_llinfo) &&
 		    ND6_IS_LLINFO_PROBREACH(ln)) {
 			/* Got it, and move it to the head */
 			TAILQ_REMOVE(&nd_defrouter, dr, dr_entry);
 			TAILQ_INSERT_HEAD(&nd_defrouter, dr, dr_entry);
 			break;
 		}
 	}
 
 	if ((dr = TAILQ_FIRST(&nd_defrouter))) {
 		/*
 		 * De-install the previous default gateway and install
 		 * a new one.
 		 * Note that if there is no reachable router in the list,
 		 * the head entry will be used anyway.
 		 * XXX: do we have to check the current routing table entry?
 		 */
 		bzero(&anydr, sizeof(anydr));
 		defrouter_delreq(&anydr, 0);
 		defrouter_addreq(dr);
 	}
 	else {
 		/*
 		 * The Default Router List is empty, so install the default
 		 * route to an inteface.
 		 * XXX: The specification does not say this mechanism should
 		 * be restricted to hosts, but this would be not useful
 		 * (even harmful) for routers.
 		 */
 		if (!ip6_forwarding) {
 			/*
 			 * De-install the current default route
 			 * in advance.
 			 */
 			bzero(&anydr, sizeof(anydr));
 			defrouter_delreq(&anydr, 0);
 			if (nd6_defifp) {
 				/*
 				 * Install a route to the default interface
 				 * as default route.
 				 * XXX: we enable this for host only, because
 				 * this may override a default route installed
 				 * a user process (e.g. routing daemon) in a
 				 * router case.
 				 */
 				defrouter_addifreq(nd6_defifp);
 			} else {
 				nd6log((LOG_INFO, "defrouter_select: "
 				    "there's no default router and no default"
 				    " interface\n"));
 			}
 		}
 	}
 
 	splx(s);
 	return;
 }
 
 static struct nd_defrouter *
 defrtrlist_update(new)
 	struct nd_defrouter *new;
 {
 	struct nd_defrouter *dr, *n;
 	int s = splnet();
 
 	if ((dr = defrouter_lookup(&new->rtaddr, new->ifp)) != NULL) {
 		/* entry exists */
 		if (new->rtlifetime == 0) {
 			defrtrlist_del(dr);
 			dr = NULL;
 		} else {
 			/* override */
 			dr->flags = new->flags; /* xxx flag check */
 			dr->rtlifetime = new->rtlifetime;
 			dr->expire = new->expire;
 		}
 		splx(s);
 		return(dr);
 	}
 
 	/* entry does not exist */
 	if (new->rtlifetime == 0) {
 		splx(s);
 		return(NULL);
 	}
 
 	n = (struct nd_defrouter *)malloc(sizeof(*n), M_IP6NDP, M_NOWAIT);
 	if (n == NULL) {
 		splx(s);
 		return(NULL);
 	}
 	bzero(n, sizeof(*n));
 	*n = *new;
 
 	/*
 	 * Insert the new router at the end of the Default Router List.
 	 * If there is no other router, install it anyway. Otherwise,
 	 * just continue to use the current default router.
 	 */
 	TAILQ_INSERT_TAIL(&nd_defrouter, n, dr_entry);
 	if (TAILQ_FIRST(&nd_defrouter) == n)
 		defrouter_select();
 	splx(s);
 		
 	return(n);
 }
 
 static struct nd_pfxrouter *
 pfxrtr_lookup(pr, dr)
 	struct nd_prefix *pr;
 	struct nd_defrouter *dr;
 {
 	struct nd_pfxrouter *search;
 	
 	for (search = pr->ndpr_advrtrs.lh_first; search; search = search->pfr_next) {
 		if (search->router == dr)
 			break;
 	}
 
 	return(search);
 }
 
 static void
 pfxrtr_add(pr, dr)
 	struct nd_prefix *pr;
 	struct nd_defrouter *dr;
 {
 	struct nd_pfxrouter *new;
 
 	new = (struct nd_pfxrouter *)malloc(sizeof(*new), M_IP6NDP, M_NOWAIT);
 	if (new == NULL)
 		return;
 	bzero(new, sizeof(*new));
 	new->router = dr;
 
 	LIST_INSERT_HEAD(&pr->ndpr_advrtrs, new, pfr_entry);
 
 	pfxlist_onlink_check();
 }
 
 static void
 pfxrtr_del(pfr)
 	struct nd_pfxrouter *pfr;
 {
 	LIST_REMOVE(pfr, pfr_entry);
 	free(pfr, M_IP6NDP);
 }
 
 struct nd_prefix *
 nd6_prefix_lookup(pr)
 	struct nd_prefix *pr;
 {
 	struct nd_prefix *search;
 
 	for (search = nd_prefix.lh_first; search; search = search->ndpr_next) {
 		if (pr->ndpr_ifp == search->ndpr_ifp &&
 		    pr->ndpr_plen == search->ndpr_plen &&
 		    in6_are_prefix_equal(&pr->ndpr_prefix.sin6_addr,
 					 &search->ndpr_prefix.sin6_addr,
 					 pr->ndpr_plen)
 		    ) {
 			break;
 		}
 	}
 
 	return(search);
 }
 
 int
 nd6_prelist_add(pr, dr, newp)
 	struct nd_prefix *pr, **newp;
 	struct nd_defrouter *dr;
 {
 	struct nd_prefix *new = NULL;
 	int i, s;
 
 	new = (struct nd_prefix *)malloc(sizeof(*new), M_IP6NDP, M_NOWAIT);
 	if (new == NULL)
 		return ENOMEM;
 	bzero(new, sizeof(*new));
 	*new = *pr;
 	if (newp != NULL)
 		*newp = new;
 
 	/* initilization */
 	LIST_INIT(&new->ndpr_advrtrs);
 	in6_prefixlen2mask(&new->ndpr_mask, new->ndpr_plen);
 	/* make prefix in the canonical form */
 	for (i = 0; i < 4; i++)
 		new->ndpr_prefix.sin6_addr.s6_addr32[i] &=
 			new->ndpr_mask.s6_addr32[i];
 
 	s = splnet();
 	/* link ndpr_entry to nd_prefix list */
 	LIST_INSERT_HEAD(&nd_prefix, new, ndpr_entry);
 	splx(s);
 
 	/* ND_OPT_PI_FLAG_ONLINK processing */
 	if (new->ndpr_raf_onlink) {
 		int e;
 
 		if ((e = nd6_prefix_onlink(new)) != 0) {
 			nd6log((LOG_ERR, "nd6_prelist_add: failed to make "
 			    "the prefix %s/%d on-link on %s (errno=%d)\n",
 			    ip6_sprintf(&pr->ndpr_prefix.sin6_addr),
 			    pr->ndpr_plen, if_name(pr->ndpr_ifp), e));
 			/* proceed anyway. XXX: is it correct? */
 		}
 	}
 
 	if (dr) {
 		pfxrtr_add(new, dr);
 	}
 
 	return 0;
 }
 
 void
 prelist_remove(pr)
 	struct nd_prefix *pr;
 {
 	struct nd_pfxrouter *pfr, *next;
 	int e, s;
 
 	/* make sure to invalidate the prefix until it is really freed. */
 	pr->ndpr_vltime = 0;
 	pr->ndpr_pltime = 0;
 #if 0
 	/*
 	 * Though these flags are now meaningless, we'd rather keep the value
 	 * not to confuse users when executing "ndp -p".
 	 */
 	pr->ndpr_raf_onlink = 0;
 	pr->ndpr_raf_auto = 0;
 #endif
 	if ((pr->ndpr_stateflags & NDPRF_ONLINK) != 0 &&
 	    (e = nd6_prefix_offlink(pr)) != 0) {
 		nd6log((LOG_ERR, "prelist_remove: failed to make %s/%d offlink "
 		    "on %s, errno=%d\n",
 		    ip6_sprintf(&pr->ndpr_prefix.sin6_addr),
 		    pr->ndpr_plen, if_name(pr->ndpr_ifp), e));
 		/* what should we do? */
 	}
 
 	if (pr->ndpr_refcnt > 0)
 		return;		/* notice here? */
 
 	s = splnet();
 
 	/* unlink ndpr_entry from nd_prefix list */
 	LIST_REMOVE(pr, ndpr_entry);
 
 	/* free list of routers that adversed the prefix */
 	for (pfr = pr->ndpr_advrtrs.lh_first; pfr; pfr = next) {
 		next = pfr->pfr_next;
 
 		free(pfr, M_IP6NDP);
 	}
 	splx(s);
 
 	free(pr, M_IP6NDP);
 
 	pfxlist_onlink_check();
 }
 
 int
 prelist_update(new, dr, m)
 	struct nd_prefix *new;
 	struct nd_defrouter *dr; /* may be NULL */
 	struct mbuf *m;
 {
 	struct in6_ifaddr *ia6 = NULL, *ia6_match = NULL;
 	struct ifaddr *ifa;
 	struct ifnet *ifp = new->ndpr_ifp;
 	struct nd_prefix *pr;
 	int s = splnet();
 	int error = 0;
 	int newprefix = 0;
 	int auth;
 	struct in6_addrlifetime lt6_tmp;
 
 	auth = 0;
 	if (m) {
 		/*
 		 * Authenticity for NA consists authentication for
 		 * both IP header and IP datagrams, doesn't it ?
 		 */
 #if defined(M_AUTHIPHDR) && defined(M_AUTHIPDGM)
 		auth = (m->m_flags & M_AUTHIPHDR
 		     && m->m_flags & M_AUTHIPDGM) ? 1 : 0;
 #endif
 	}
 
 
 	if ((pr = nd6_prefix_lookup(new)) != NULL) {
 		/*
 		 * nd6_prefix_lookup() ensures that pr and new have the same
 		 * prefix on a same interface.
 		 */
 
 		/*
 		 * Update prefix information.  Note that the on-link (L) bit
 		 * and the autonomous (A) bit should NOT be changed from 1
 		 * to 0.
 		 */
 		if (new->ndpr_raf_onlink == 1)
 			pr->ndpr_raf_onlink = 1;
 		if (new->ndpr_raf_auto == 1)
 			pr->ndpr_raf_auto = 1;
 		if (new->ndpr_raf_onlink) {
 			pr->ndpr_vltime = new->ndpr_vltime;
 			pr->ndpr_pltime = new->ndpr_pltime;
 			pr->ndpr_preferred = new->ndpr_preferred;
 			pr->ndpr_expire = new->ndpr_expire;
 		}
 
 		if (new->ndpr_raf_onlink &&
 		    (pr->ndpr_stateflags & NDPRF_ONLINK) == 0) {
 			int e;
 
 			if ((e = nd6_prefix_onlink(pr)) != 0) {
 				nd6log((LOG_ERR,
 				    "prelist_update: failed to make "
 				    "the prefix %s/%d on-link on %s "
 				    "(errno=%d)\n",
 				    ip6_sprintf(&pr->ndpr_prefix.sin6_addr),
 				    pr->ndpr_plen, if_name(pr->ndpr_ifp), e));
 				/* proceed anyway. XXX: is it correct? */
 			}
 		}
 
 		if (dr && pfxrtr_lookup(pr, dr) == NULL)
 			pfxrtr_add(pr, dr);
 	} else {
 		struct nd_prefix *newpr = NULL;
 
 		newprefix = 1;
 
 		if (new->ndpr_vltime == 0)
 			goto end;
 		if (new->ndpr_raf_onlink == 0 && new->ndpr_raf_auto == 0)
 			goto end;
 
 		bzero(&new->ndpr_addr, sizeof(struct in6_addr));
 
 		error = nd6_prelist_add(new, dr, &newpr);
 		if (error != 0 || newpr == NULL) {
 			nd6log((LOG_NOTICE, "prelist_update: "
 			    "nd6_prelist_add failed for %s/%d on %s "
 			    "errno=%d, returnpr=%p\n",
 			    ip6_sprintf(&new->ndpr_prefix.sin6_addr),
 					new->ndpr_plen, if_name(new->ndpr_ifp),
 					error, newpr));
 			goto end; /* we should just give up in this case. */
 		}
 
 		/*
 		 * XXX: from the ND point of view, we can ignore a prefix
 		 * with the on-link bit being zero.  However, we need a
 		 * prefix structure for references from autoconfigured
 		 * addresses.  Thus, we explicitly make suret that the prefix
 		 * itself expires now.
 		 */
 		if (newpr->ndpr_raf_onlink == 0) {
 			newpr->ndpr_vltime = 0;
 			newpr->ndpr_pltime = 0;
 			in6_init_prefix_ltimes(newpr);
 		}
 
 		pr = newpr;
 	}
 
 	/*
 	 * Address autoconfiguration based on Section 5.5.3 of RFC 2462.
 	 * Note that pr must be non NULL at this point.
 	 */
 
 	/* 5.5.3 (a). Ignore the prefix without the A bit set. */
 	if (!new->ndpr_raf_auto)
 		goto afteraddrconf;
 
 	/*
 	 * 5.5.3 (b). the link-local prefix should have been ignored in
 	 * nd6_ra_input.
 	 */
 
 	/*
 	 * 5.5.3 (c). Consistency check on lifetimes: pltime <= vltime.
 	 * This should have been done in nd6_ra_input.
 	 */
 
  	/*
 	 * 5.5.3 (d). If the prefix advertised does not match the prefix of an
 	 * address already in the list, and the Valid Lifetime is not 0,
 	 * form an address.  Note that even a manually configured address
 	 * should reject autoconfiguration of a new address.
 	 */
 	TAILQ_FOREACH(ifa, &ifp->if_addrlist, ifa_list)
 	{
 		struct in6_ifaddr *ifa6;
 		int ifa_plen;
 		u_int32_t storedlifetime;
 
 		if (ifa->ifa_addr->sa_family != AF_INET6)
 			continue;
 
 		ifa6 = (struct in6_ifaddr *)ifa;
 
 		/*
 		 * Spec is not clear here, but I believe we should concentrate
 		 * on unicast (i.e. not anycast) addresses.
 		 * XXX: other ia6_flags? detached or duplicated?
 		 */
 		if ((ifa6->ia6_flags & IN6_IFF_ANYCAST) != 0)
 			continue;
 		
 		ifa_plen = in6_mask2len(&ifa6->ia_prefixmask.sin6_addr, NULL);
 		if (ifa_plen != new->ndpr_plen ||
 		    !in6_are_prefix_equal(&ifa6->ia_addr.sin6_addr,
 					  &new->ndpr_prefix.sin6_addr,
 					  ifa_plen))
 			continue;
 
 		if (ia6_match == NULL) /* remember the first one */
 			ia6_match = ifa6;
 
 		if ((ifa6->ia6_flags & IN6_IFF_AUTOCONF) == 0)
 			continue;
 
 		/*
 		 * An already autoconfigured address matched.  Now that we
 		 * are sure there is at least one matched address, we can
 		 * proceed to 5.5.3. (e): update the lifetimes according to the
 		 * "two hours" rule and the privacy extension.
 		 */
 #define TWOHOUR		(120*60)
 		lt6_tmp = ifa6->ia6_lifetime;
 
 		if (lt6_tmp.ia6t_vltime == ND6_INFINITE_LIFETIME)
 			storedlifetime = ND6_INFINITE_LIFETIME;
 		else if (IFA6_IS_INVALID(ifa6))
 			storedlifetime = 0;
 		else
 			storedlifetime = lt6_tmp.ia6t_expire - time_second;
 
 		/* when not updating, keep the current stored lifetime. */
 		lt6_tmp.ia6t_vltime = storedlifetime;
 
 		if (TWOHOUR < new->ndpr_vltime ||
 		    storedlifetime < new->ndpr_vltime) {
 			lt6_tmp.ia6t_vltime = new->ndpr_vltime;
 		} else if (storedlifetime <= TWOHOUR
 #if 0
 			   /*
 			    * This condition is logically redundant, so we just
 			    * omit it.
 			    * See IPng 6712, 6717, and 6721.
 			    */
 			   && new->ndpr_vltime <= storedlifetime
 #endif
 			) {
 			if (auth) {
 				lt6_tmp.ia6t_vltime = new->ndpr_vltime;
 			}
 		} else {
 			/*
 			 * new->ndpr_vltime <= TWOHOUR &&
 			 * TWOHOUR < storedlifetime
 			 */
 			lt6_tmp.ia6t_vltime = TWOHOUR;
 		}
 
 		/* The 2 hour rule is not imposed for preferred lifetime. */
 		lt6_tmp.ia6t_pltime = new->ndpr_pltime;
 
 		in6_init_address_ltimes(pr, &lt6_tmp);
 
 		/*
 		 * When adjusting the lifetimes of an existing temporary
 		 * address, only lower the lifetimes.
 		 * RFC 3041 3.3. (1).
 		 * XXX: how should we modify ia6t_[pv]ltime?
 		 */
 		if ((ifa6->ia6_flags & IN6_IFF_TEMPORARY) != 0) {
 			if (lt6_tmp.ia6t_expire == 0 || /* no expire */
 			    lt6_tmp.ia6t_expire >
 			    ifa6->ia6_lifetime.ia6t_expire) {
 				lt6_tmp.ia6t_expire =
 					ifa6->ia6_lifetime.ia6t_expire;
 			}
 			if (lt6_tmp.ia6t_preferred == 0 || /* no expire */
 			    lt6_tmp.ia6t_preferred >
 			    ifa6->ia6_lifetime.ia6t_preferred) {
 				lt6_tmp.ia6t_preferred =
 					ifa6->ia6_lifetime.ia6t_preferred;
 			}
 		}
 
 		ifa6->ia6_lifetime = lt6_tmp;
 	}
 	if (ia6_match == NULL && new->ndpr_vltime) {
 		/*
 		 * No address matched and the valid lifetime is non-zero.
 		 * Create a new address.
 		 */
 		if ((ia6 = in6_ifadd(new, NULL)) != NULL) {
 			/*
 			 * note that we should use pr (not new) for reference.
 			 */
 			pr->ndpr_refcnt++;
 			ia6->ia6_ndpr = pr;
 
 #if 0
 			/* XXXYYY Don't do this, according to Jinmei. */
 			pr->ndpr_addr = new->ndpr_addr;
 #endif
 
 			/*
 			 * RFC 3041 3.3 (2).
 			 * When a new public address is created as described
 			 * in RFC2462, also create a new temporary address.
 			 *
 			 * RFC 3041 3.5.
 			 * When an interface connects to a new link, a new
 			 * randomized interface identifier should be generated
 			 * immediately together with a new set of temporary
 			 * addresses.  Thus, we specifiy 1 as the 2nd arg of
 			 * in6_tmpifadd().
 			 */
 			if (ip6_use_tempaddr) {
 				int e;
 				if ((e = in6_tmpifadd(ia6, 1)) != 0) {
 					nd6log((LOG_NOTICE, "prelist_update: "
 					    "failed to create a temporary "
 					    "address, errno=%d\n",
 					    e));
 				}
 			}
 
 			/*
 			 * A newly added address might affect the status
 			 * of other addresses, so we check and update it.
 			 * XXX: what if address duplication happens?
 			 */
 			pfxlist_onlink_check();
 		} else {
 			/* just set an error. do not bark here. */
 			error = EADDRNOTAVAIL; /* XXX: might be unused. */
 		}
 	}
 
   afteraddrconf:
 
  end:
 	splx(s);
 	return error;
 }
 
 /*
  * A supplement function used in the on-link detection below;
  * detect if a given prefix has a (probably) reachable advertising router.
  * XXX: lengthy function name...
  */
 static struct nd_pfxrouter *
 find_pfxlist_reachable_router(pr)
 	struct nd_prefix *pr;
 {
 	struct nd_pfxrouter *pfxrtr;
 	struct rtentry *rt;
 	struct llinfo_nd6 *ln;
 
 	for (pfxrtr = LIST_FIRST(&pr->ndpr_advrtrs); pfxrtr;
 	     pfxrtr = LIST_NEXT(pfxrtr, pfr_entry)) {
 		if ((rt = nd6_lookup(&pfxrtr->router->rtaddr, 0,
 				     pfxrtr->router->ifp)) &&
 		    (ln = (struct llinfo_nd6 *)rt->rt_llinfo) &&
 		    ND6_IS_LLINFO_PROBREACH(ln))
 			break;	/* found */
 	}
 
 	return(pfxrtr);
 
 }
 
 /*
  * Check if each prefix in the prefix list has at least one available router
  * that advertised the prefix (a router is "available" if its neighbor cache
  * entry is reachable or probably reachable).
  * If the check fails, the prefix may be off-link, because, for example,
  * we have moved from the network but the lifetime of the prefix has not
  * expired yet.  So we should not use the prefix if there is another prefix
  * that has an available router.
  * But, if there is no prefix that has an available router, we still regards
  * all the prefixes as on-link.  This is because we can't tell if all the
  * routers are simply dead or if we really moved from the network and there
  * is no router around us.
  */
 void
 pfxlist_onlink_check()
 {
 	struct nd_prefix *pr;
 	struct in6_ifaddr *ifa;
 
 	/*
 	 * Check if there is a prefix that has a reachable advertising
 	 * router.
 	 */
 	for (pr = nd_prefix.lh_first; pr; pr = pr->ndpr_next) {
 		if (pr->ndpr_raf_onlink && find_pfxlist_reachable_router(pr))
 			break;
 	}
 
 	if (pr) {
 		/*
 		 * There is at least one prefix that has a reachable router.
 		 * Detach prefixes which have no reachable advertising
 		 * router, and attach other prefixes.
 		 */
 		for (pr = nd_prefix.lh_first; pr; pr = pr->ndpr_next) {
 			/* XXX: a link-local prefix should never be detached */
 			if (IN6_IS_ADDR_LINKLOCAL(&pr->ndpr_prefix.sin6_addr))
 				continue;
 
 			/*
 			 * we aren't interested in prefixes without the L bit
 			 * set.
 			 */
 			if (pr->ndpr_raf_onlink == 0)
 				continue;
 
 			if ((pr->ndpr_stateflags & NDPRF_DETACHED) == 0 &&
 			    find_pfxlist_reachable_router(pr) == NULL)
 				pr->ndpr_stateflags |= NDPRF_DETACHED;
 			if ((pr->ndpr_stateflags & NDPRF_DETACHED) != 0 &&
 			    find_pfxlist_reachable_router(pr) != 0)
 				pr->ndpr_stateflags &= ~NDPRF_DETACHED;
 		}
 	} else {
 		/* there is no prefix that has a reachable router */
 		for (pr = nd_prefix.lh_first; pr; pr = pr->ndpr_next) {
 			if (IN6_IS_ADDR_LINKLOCAL(&pr->ndpr_prefix.sin6_addr))
 				continue;
 
 			if (pr->ndpr_raf_onlink == 0)
 				continue;
 
 			if ((pr->ndpr_stateflags & NDPRF_DETACHED) != 0)
 				pr->ndpr_stateflags &= ~NDPRF_DETACHED;
 		}
 	}
 
 	/*
 	 * Remove each interface route associated with a (just) detached
 	 * prefix, and reinstall the interface route for a (just) attached
 	 * prefix.  Note that all attempt of reinstallation does not
 	 * necessarily success, when a same prefix is shared among multiple
 	 * interfaces.  Such cases will be handled in nd6_prefix_onlink,
 	 * so we don't have to care about them.
 	 */
 	for (pr = nd_prefix.lh_first; pr; pr = pr->ndpr_next) {
 		int e;
 
 		if (IN6_IS_ADDR_LINKLOCAL(&pr->ndpr_prefix.sin6_addr))
 			continue;
 
 		if (pr->ndpr_raf_onlink == 0)
 			continue;
 
 		if ((pr->ndpr_stateflags & NDPRF_DETACHED) != 0 &&
 		    (pr->ndpr_stateflags & NDPRF_ONLINK) != 0) {
 			if ((e = nd6_prefix_offlink(pr)) != 0) {
 				nd6log((LOG_ERR,
 				    "pfxlist_onlink_check: failed to "
 				    "make %s/%d offlink, errno=%d\n",
 				    ip6_sprintf(&pr->ndpr_prefix.sin6_addr),
 				    pr->ndpr_plen, e));
 			}
 		}
 		if ((pr->ndpr_stateflags & NDPRF_DETACHED) == 0 &&
 		    (pr->ndpr_stateflags & NDPRF_ONLINK) == 0 &&
 		    pr->ndpr_raf_onlink) {
 			if ((e = nd6_prefix_onlink(pr)) != 0) {
 				nd6log((LOG_ERR,
 				    "pfxlist_onlink_check: failed to "
 				    "make %s/%d offlink, errno=%d\n",
 				    ip6_sprintf(&pr->ndpr_prefix.sin6_addr),
 				    pr->ndpr_plen, e));
 			}
 		}
 	}
 
 	/*
 	 * Changes on the prefix status might affect address status as well.
 	 * Make sure that all addresses derived from an attached prefix are
 	 * attached, and that all addresses derived from a detached prefix are
 	 * detached.  Note, however, that a manually configured address should
 	 * always be attached.
 	 * The precise detection logic is same as the one for prefixes.
 	 */
 	for (ifa = in6_ifaddr; ifa; ifa = ifa->ia_next) {
 		if ((ifa->ia6_flags & IN6_IFF_AUTOCONF) == 0)
 			continue;
 
 		if (ifa->ia6_ndpr == NULL) {
 			/*
 			 * This can happen when we first configure the address
 			 * (i.e. the address exists, but the prefix does not).
 			 * XXX: complicated relationships...
 			 */
 			continue;
 		}
 
 		if (find_pfxlist_reachable_router(ifa->ia6_ndpr))
 			break;
 	}
 	if (ifa) {
 		for (ifa = in6_ifaddr; ifa; ifa = ifa->ia_next) {
 			if ((ifa->ia6_flags & IN6_IFF_AUTOCONF) == 0)
 				continue;
 
 			if (ifa->ia6_ndpr == NULL) /* XXX: see above. */
 				continue;
 
 			if (find_pfxlist_reachable_router(ifa->ia6_ndpr))
 				ifa->ia6_flags &= ~IN6_IFF_DETACHED;
 			else
 				ifa->ia6_flags |= IN6_IFF_DETACHED;
 		}
 	}
 	else {
 		for (ifa = in6_ifaddr; ifa; ifa = ifa->ia_next) {
 			if ((ifa->ia6_flags & IN6_IFF_AUTOCONF) == 0)
 				continue;
 
 			ifa->ia6_flags &= ~IN6_IFF_DETACHED;
 		}
 	}
 }
 
 int
 nd6_prefix_onlink(pr)
 	struct nd_prefix *pr;
 {
 	struct ifaddr *ifa;
 	struct ifnet *ifp = pr->ndpr_ifp;
 	struct sockaddr_in6 mask6;
 	struct nd_prefix *opr;
 	u_long rtflags;
 	int error = 0;
 	struct rtentry *rt = NULL;
 
 	/* sanity check */
 	if ((pr->ndpr_stateflags & NDPRF_ONLINK) != 0) {
 		nd6log((LOG_ERR,
 		    "nd6_prefix_onlink: %s/%d is already on-link\n",
 		    ip6_sprintf(&pr->ndpr_prefix.sin6_addr), pr->ndpr_plen);
 		return(EEXIST));
 	}
 
 	/*
 	 * Add the interface route associated with the prefix.  Before
 	 * installing the route, check if there's the same prefix on another
 	 * interface, and the prefix has already installed the interface route.
 	 * Although such a configuration is expected to be rare, we explicitly
 	 * allow it.
 	 */
 	for (opr = nd_prefix.lh_first; opr; opr = opr->ndpr_next) {
 		if (opr == pr)
 			continue;
 
 		if ((opr->ndpr_stateflags & NDPRF_ONLINK) == 0)
 			continue;
 
 		if (opr->ndpr_plen == pr->ndpr_plen &&
 		    in6_are_prefix_equal(&pr->ndpr_prefix.sin6_addr,
 					 &opr->ndpr_prefix.sin6_addr,
 					 pr->ndpr_plen))
 			return(0);
 	}
 
 	/*
 	 * We prefer link-local addresses as the associated interface address. 
 	 */
 	/* search for a link-local addr */
 	ifa = (struct ifaddr *)in6ifa_ifpforlinklocal(ifp,
 						      IN6_IFF_NOTREADY|
 						      IN6_IFF_ANYCAST);
 	if (ifa == NULL) {
 		/* XXX: freebsd does not have ifa_ifwithaf */
 		TAILQ_FOREACH(ifa, &ifp->if_addrlist, ifa_list)
 		{
 			if (ifa->ifa_addr->sa_family == AF_INET6)
 				break;
 		}
 		/* should we care about ia6_flags? */
 	}
 	if (ifa == NULL) {
 		/*
 		 * This can still happen, when, for example, we receive an RA
 		 * containing a prefix with the L bit set and the A bit clear,
 		 * after removing all IPv6 addresses on the receiving
 		 * interface.  This should, of course, be rare though.
 		 */
 		nd6log((LOG_NOTICE,
 		    "nd6_prefix_onlink: failed to find any ifaddr"
 		    " to add route for a prefix(%s/%d) on %s\n",
 		    ip6_sprintf(&pr->ndpr_prefix.sin6_addr),
 		    pr->ndpr_plen, if_name(ifp)));
 		return(0);
 	}
 
 	/*
 	 * in6_ifinit() sets nd6_rtrequest to ifa_rtrequest for all ifaddrs.
 	 * ifa->ifa_rtrequest = nd6_rtrequest;
 	 */
 	bzero(&mask6, sizeof(mask6));
 	mask6.sin6_len = sizeof(mask6);
 	mask6.sin6_addr = pr->ndpr_mask;
 	rtflags = ifa->ifa_flags | RTF_CLONING | RTF_UP;
 	if (nd6_need_cache(ifp)) {
 		/* explicitly set in case ifa_flags does not set the flag. */
 		rtflags |= RTF_CLONING;
 	} else {
 		/*
 		 * explicitly clear the cloning bit in case ifa_flags sets it.
 		 */
 		rtflags &= ~RTF_CLONING;
 	}
 	error = rtrequest(RTM_ADD, (struct sockaddr *)&pr->ndpr_prefix,
 			  ifa->ifa_addr, (struct sockaddr *)&mask6,
 			  rtflags, &rt);
 	if (error == 0) {
 		if (rt != NULL) /* this should be non NULL, though */
 			nd6_rtmsg(RTM_ADD, rt);
 		pr->ndpr_stateflags |= NDPRF_ONLINK;
 	}
 	else {
 		nd6log((LOG_ERR, "nd6_prefix_onlink: failed to add route for a"
 		    " prefix (%s/%d) on %s, gw=%s, mask=%s, flags=%lx "
 		    "errno = %d\n",
 		    ip6_sprintf(&pr->ndpr_prefix.sin6_addr),
 		    pr->ndpr_plen, if_name(ifp),
 		    ip6_sprintf(&((struct sockaddr_in6 *)ifa->ifa_addr)->sin6_addr),
 		    ip6_sprintf(&mask6.sin6_addr), rtflags, error));
 	}
 
-	if (rt != NULL)
+	if (rt != NULL) {
+		RT_LOCK(rt);
 		rt->rt_refcnt--;
+		RT_UNLOCK(rt);
+	}
 
 	return(error);
 }
 
 int
 nd6_prefix_offlink(pr)
 	struct nd_prefix *pr;
 {
 	int error = 0;
 	struct ifnet *ifp = pr->ndpr_ifp;
 	struct nd_prefix *opr;
 	struct sockaddr_in6 sa6, mask6;
 	struct rtentry *rt = NULL;
 
 	/* sanity check */
 	if ((pr->ndpr_stateflags & NDPRF_ONLINK) == 0) {
 		nd6log((LOG_ERR,
 		    "nd6_prefix_offlink: %s/%d is already off-link\n",
 		    ip6_sprintf(&pr->ndpr_prefix.sin6_addr), pr->ndpr_plen));
 		return(EEXIST);
 	}
 
 	bzero(&sa6, sizeof(sa6));
 	sa6.sin6_family = AF_INET6;
 	sa6.sin6_len = sizeof(sa6);
 	bcopy(&pr->ndpr_prefix.sin6_addr, &sa6.sin6_addr,
 	      sizeof(struct in6_addr));
 	bzero(&mask6, sizeof(mask6));
 	mask6.sin6_family = AF_INET6;
 	mask6.sin6_len = sizeof(sa6);
 	bcopy(&pr->ndpr_mask, &mask6.sin6_addr, sizeof(struct in6_addr));
 	error = rtrequest(RTM_DELETE, (struct sockaddr *)&sa6, NULL,
 			  (struct sockaddr *)&mask6, 0, &rt);
 	if (error == 0) {
 		pr->ndpr_stateflags &= ~NDPRF_ONLINK;
 
 		/* report the route deletion to the routing socket. */
 		if (rt != NULL)
 			nd6_rtmsg(RTM_DELETE, rt);
 
 		/*
 		 * There might be the same prefix on another interface,
 		 * the prefix which could not be on-link just because we have
 		 * the interface route (see comments in nd6_prefix_onlink).
 		 * If there's one, try to make the prefix on-link on the
 		 * interface.
 		 */
 		for (opr = nd_prefix.lh_first; opr; opr = opr->ndpr_next) {
 			if (opr == pr)
 				continue;
 
 			if ((opr->ndpr_stateflags & NDPRF_ONLINK) != 0)
 				continue;
 
 			/*
 			 * KAME specific: detached prefixes should not be
 			 * on-link.
 			 */
 			if ((opr->ndpr_stateflags & NDPRF_DETACHED) != 0)
 				continue;
 
 			if (opr->ndpr_plen == pr->ndpr_plen &&
 			    in6_are_prefix_equal(&pr->ndpr_prefix.sin6_addr,
 						 &opr->ndpr_prefix.sin6_addr,
 						 pr->ndpr_plen)) {
 				int e;
 
 				if ((e = nd6_prefix_onlink(opr)) != 0) {
 					nd6log((LOG_ERR,
 					    "nd6_prefix_offlink: failed to "
 					    "recover a prefix %s/%d from %s "
 					    "to %s (errno = %d)\n",
 					    ip6_sprintf(&opr->ndpr_prefix.sin6_addr),
 					    opr->ndpr_plen, if_name(ifp),
 					    if_name(opr->ndpr_ifp), e));
 				}
 			}
 		}
 	}
 	else {
 		/* XXX: can we still set the NDPRF_ONLINK flag? */
 		nd6log((LOG_ERR,
 		    "nd6_prefix_offlink: failed to delete route: "
 		    "%s/%d on %s (errno = %d)\n",
 		    ip6_sprintf(&sa6.sin6_addr), pr->ndpr_plen, if_name(ifp),
 		    error));
 	}
 
 	if (rt != NULL)
 		RTFREE(rt);
 
 	return(error);
 }
 
 static struct in6_ifaddr *
 in6_ifadd(pr, ifid)
 	struct nd_prefix *pr;
 	struct in6_addr  *ifid;   /* Mobile IPv6 addition */
 {
 	struct ifnet *ifp = pr->ndpr_ifp;
 	struct ifaddr *ifa;
 	struct in6_aliasreq ifra;
 	struct in6_ifaddr *ia, *ib;
 	int error, plen0;
 	struct in6_addr mask;
 	int prefixlen = pr->ndpr_plen;
 
 	in6_len2mask(&mask, prefixlen);
 
 	/*
 	 * find a link-local address (will be interface ID).
 	 * Is it really mandatory? Theoretically, a global or a site-local
 	 * address can be configured without a link-local address, if we
 	 * have a unique interface identifier...
 	 *
 	 * it is not mandatory to have a link-local address, we can generate
 	 * interface identifier on the fly.  we do this because:
 	 * (1) it should be the easiest way to find interface identifier.
 	 * (2) RFC2462 5.4 suggesting the use of the same interface identifier
 	 * for multiple addresses on a single interface, and possible shortcut
 	 * of DAD.  we omitted DAD for this reason in the past.
 	 * (3) a user can prevent autoconfiguration of global address 
 	 * by removing link-local address by hand (this is partly because we
 	 * don't have other way to control the use of IPv6 on an interface.
 	 * this has been our design choice - cf. NRL's "ifconfig auto").
 	 * (4) it is easier to manage when an interface has addresses
 	 * with the same interface identifier, than to have multiple addresses
 	 * with different interface identifiers.
 	 *
 	 * Mobile IPv6 addition: allow for caller to specify a wished interface
 	 * ID. This is to not break connections when moving addresses between
 	 * interfaces.
 	 */
 	ifa = (struct ifaddr *)in6ifa_ifpforlinklocal(ifp, 0);/* 0 is OK? */
 	if (ifa)
 		ib = (struct in6_ifaddr *)ifa;
 	else
 		return NULL;
 
 #if 0 /* don't care link local addr state, and always do DAD */
 	/* if link-local address is not eligible, do not autoconfigure. */
 	if (((struct in6_ifaddr *)ifa)->ia6_flags & IN6_IFF_NOTREADY) {
 		printf("in6_ifadd: link-local address not ready\n");
 		return NULL;
 	}
 #endif
 
 	/* prefixlen + ifidlen must be equal to 128 */
 	plen0 = in6_mask2len(&ib->ia_prefixmask.sin6_addr, NULL);
 	if (prefixlen != plen0) {
 		nd6log((LOG_INFO, "in6_ifadd: wrong prefixlen for %s "
 		    "(prefix=%d ifid=%d)\n",
 		    if_name(ifp), prefixlen, 128 - plen0));
 		return NULL;
 	}
 
 	/* make ifaddr */
 
 	bzero(&ifra, sizeof(ifra));
 	/*
 	 * in6_update_ifa() does not use ifra_name, but we accurately set it
 	 * for safety.
 	 */
 	strncpy(ifra.ifra_name, if_name(ifp), sizeof(ifra.ifra_name));
 	ifra.ifra_addr.sin6_family = AF_INET6;
 	ifra.ifra_addr.sin6_len = sizeof(struct sockaddr_in6);
 	/* prefix */
 	bcopy(&pr->ndpr_prefix.sin6_addr, &ifra.ifra_addr.sin6_addr,
 	      sizeof(ifra.ifra_addr.sin6_addr));
 	ifra.ifra_addr.sin6_addr.s6_addr32[0] &= mask.s6_addr32[0];
 	ifra.ifra_addr.sin6_addr.s6_addr32[1] &= mask.s6_addr32[1];
 	ifra.ifra_addr.sin6_addr.s6_addr32[2] &= mask.s6_addr32[2];
 	ifra.ifra_addr.sin6_addr.s6_addr32[3] &= mask.s6_addr32[3];
 
 	/* interface ID */
 	if (ifid == NULL || IN6_IS_ADDR_UNSPECIFIED(ifid))
 		ifid = &ib->ia_addr.sin6_addr;
 	ifra.ifra_addr.sin6_addr.s6_addr32[0]
 		|= (ifid->s6_addr32[0] & ~mask.s6_addr32[0]);
 	ifra.ifra_addr.sin6_addr.s6_addr32[1]
 		|= (ifid->s6_addr32[1] & ~mask.s6_addr32[1]);
 	ifra.ifra_addr.sin6_addr.s6_addr32[2]
 		|= (ifid->s6_addr32[2] & ~mask.s6_addr32[2]);
 	ifra.ifra_addr.sin6_addr.s6_addr32[3]
 		|= (ifid->s6_addr32[3] & ~mask.s6_addr32[3]);
 	    
 	/* new prefix mask. */
 	ifra.ifra_prefixmask.sin6_len = sizeof(struct sockaddr_in6);
 	ifra.ifra_prefixmask.sin6_family = AF_INET6;
 	bcopy(&mask, &ifra.ifra_prefixmask.sin6_addr,
 	      sizeof(ifra.ifra_prefixmask.sin6_addr));
 
 	/*
 	 * lifetime.
 	 * XXX: in6_init_address_ltimes would override these values later.
 	 * We should reconsider this logic. 
 	 */
 	ifra.ifra_lifetime.ia6t_vltime = pr->ndpr_vltime;
 	ifra.ifra_lifetime.ia6t_pltime = pr->ndpr_pltime;
 
 	/* XXX: scope zone ID? */
 
 	ifra.ifra_flags |= IN6_IFF_AUTOCONF; /* obey autoconf */
 	/*
 	 * temporarily set the nopfx flag to avoid conflict.
 	 * XXX: we should reconsider the entire mechanism about prefix
 	 * manipulation.
 	 */
 	ifra.ifra_flags |= IN6_IFF_NOPFX;
 
 	/*
 	 * keep the new address, regardless of the result of in6_update_ifa.
 	 * XXX: this address is now meaningless.
 	 * We should reconsider its role.
 	 */
 	pr->ndpr_addr = ifra.ifra_addr.sin6_addr;
 
 	/* allocate ifaddr structure, link into chain, etc. */
 	if ((error = in6_update_ifa(ifp, &ifra, NULL)) != 0) {
 		nd6log((LOG_ERR,
 		    "in6_ifadd: failed to make ifaddr %s on %s (errno=%d)\n",
 		    ip6_sprintf(&ifra.ifra_addr.sin6_addr), if_name(ifp),
 		    error));
 		return(NULL);	/* ifaddr must not have been allocated. */
 	}
 
 	ia = in6ifa_ifpwithaddr(ifp, &ifra.ifra_addr.sin6_addr);
 
 	return(ia);		/* this must NOT be NULL. */
 }
 
 int
 in6_tmpifadd(ia0, forcegen)
 	const struct in6_ifaddr *ia0; /* corresponding public address */
 	int forcegen;
 {
 	struct ifnet *ifp = ia0->ia_ifa.ifa_ifp;
 	struct in6_ifaddr *newia;
 	struct in6_aliasreq ifra;
 	int i, error;
 	int trylimit = 3;	/* XXX: adhoc value */
 	u_int32_t randid[2];
 	time_t vltime0, pltime0;
 
 	bzero(&ifra, sizeof(ifra));
 	strncpy(ifra.ifra_name, if_name(ifp), sizeof(ifra.ifra_name));
 	ifra.ifra_addr = ia0->ia_addr;
 	/* copy prefix mask */
 	ifra.ifra_prefixmask = ia0->ia_prefixmask;
 	/* clear the old IFID */
 	for (i = 0; i < 4; i++) {
 		ifra.ifra_addr.sin6_addr.s6_addr32[i]
 			&= ifra.ifra_prefixmask.sin6_addr.s6_addr32[i];
 	}
 
   again:
 	in6_get_tmpifid(ifp, (u_int8_t *)randid,
 			(const u_int8_t *)&ia0->ia_addr.sin6_addr.s6_addr[8],
 			forcegen);
 	ifra.ifra_addr.sin6_addr.s6_addr32[2]
 		|= (randid[0] & ~(ifra.ifra_prefixmask.sin6_addr.s6_addr32[2]));
 	ifra.ifra_addr.sin6_addr.s6_addr32[3]
 		|= (randid[1] & ~(ifra.ifra_prefixmask.sin6_addr.s6_addr32[3]));
 
 	/*
 	 * If by chance the new temporary address is the same as an address
 	 * already assigned to the interface, generate a new randomized
 	 * interface identifier and repeat this step.
 	 * RFC 3041 3.3 (4).
 	 */
 	if (in6ifa_ifpwithaddr(ifp, &ifra.ifra_addr.sin6_addr) != NULL) {
 		if (trylimit-- == 0) {
 			nd6log((LOG_NOTICE, "in6_tmpifadd: failed to find "
 			    "a unique random IFID\n"));
 			return(EEXIST);
 		}
 		forcegen = 1;
 		goto again;
 	}
 
 	/*
 	 * The Valid Lifetime is the lower of the Valid Lifetime of the
          * public address or TEMP_VALID_LIFETIME.
 	 * The Preferred Lifetime is the lower of the Preferred Lifetime
          * of the public address or TEMP_PREFERRED_LIFETIME -
          * DESYNC_FACTOR.
 	 */
 	if (ia0->ia6_lifetime.ia6t_expire != 0) {
 		vltime0 = IFA6_IS_INVALID(ia0) ? 0 :
 			(ia0->ia6_lifetime.ia6t_expire - time_second);
 		if (vltime0 > ip6_temp_valid_lifetime)
 			vltime0 = ip6_temp_valid_lifetime;
 	} else
 		vltime0 = ip6_temp_valid_lifetime;
 	if (ia0->ia6_lifetime.ia6t_preferred != 0) {
 		pltime0 = IFA6_IS_DEPRECATED(ia0) ? 0 :
 			(ia0->ia6_lifetime.ia6t_preferred - time_second);
 		if (pltime0 > ip6_temp_preferred_lifetime - ip6_desync_factor){
 			pltime0 = ip6_temp_preferred_lifetime -
 				ip6_desync_factor;
 		}
 	} else
 		pltime0 = ip6_temp_preferred_lifetime - ip6_desync_factor;
 	ifra.ifra_lifetime.ia6t_vltime = vltime0;
 	ifra.ifra_lifetime.ia6t_pltime = pltime0;
 
 	/*
 	 * A temporary address is created only if this calculated Preferred
 	 * Lifetime is greater than REGEN_ADVANCE time units.
 	 */
 	if (ifra.ifra_lifetime.ia6t_pltime <= ip6_temp_regen_advance)
 		return(0);
 
 	/* XXX: scope zone ID? */
 
 	ifra.ifra_flags |= (IN6_IFF_AUTOCONF|IN6_IFF_TEMPORARY);
 
 	/* allocate ifaddr structure, link into chain, etc. */
 	if ((error = in6_update_ifa(ifp, &ifra, NULL)) != 0)
 		return(error);
 
 	newia = in6ifa_ifpwithaddr(ifp, &ifra.ifra_addr.sin6_addr);
 	if (newia == NULL) {	/* XXX: can it happen? */
 		nd6log((LOG_ERR,
 		    "in6_tmpifadd: ifa update succeeded, but we got "
 		    "no ifaddr\n"));
 		return(EINVAL); /* XXX */
 	}
 	newia->ia6_ndpr = ia0->ia6_ndpr;
 	newia->ia6_ndpr->ndpr_refcnt++;
 
 	/*
 	 * A newly added address might affect the status of other addresses.
 	 * XXX: when the temporary address is generated with a new public
 	 * address, the onlink check is redundant.  However, it would be safe
 	 * to do the check explicitly everywhere a new address is generated,
 	 * and, in fact, we surely need the check when we create a new
 	 * temporary address due to deprecation of an old temporary address.
 	 */
 	pfxlist_onlink_check();
 
 	return(0);
 }	    
 
 int
 in6_init_prefix_ltimes(struct nd_prefix *ndpr)
 {
 	/* check if preferred lifetime > valid lifetime.  RFC2462 5.5.3 (c) */
 	if (ndpr->ndpr_pltime > ndpr->ndpr_vltime) {
 		nd6log((LOG_INFO, "in6_init_prefix_ltimes: preferred lifetime"
 		    "(%d) is greater than valid lifetime(%d)\n",
 		    (u_int)ndpr->ndpr_pltime, (u_int)ndpr->ndpr_vltime));
 		return (EINVAL);
 	}
 	if (ndpr->ndpr_pltime == ND6_INFINITE_LIFETIME)
 		ndpr->ndpr_preferred = 0;
 	else
 		ndpr->ndpr_preferred = time_second + ndpr->ndpr_pltime;
 	if (ndpr->ndpr_vltime == ND6_INFINITE_LIFETIME)
 		ndpr->ndpr_expire = 0;
 	else
 		ndpr->ndpr_expire = time_second + ndpr->ndpr_vltime;
 
 	return 0;
 }
 
 static void
 in6_init_address_ltimes(struct nd_prefix *new, struct in6_addrlifetime *lt6)
 {
 	/* init ia6t_expire */
 	if (lt6->ia6t_vltime == ND6_INFINITE_LIFETIME)
 		lt6->ia6t_expire = 0;
 	else {
 		lt6->ia6t_expire = time_second;
 		lt6->ia6t_expire += lt6->ia6t_vltime;
 	}
 
 	/* init ia6t_preferred */
 	if (lt6->ia6t_pltime == ND6_INFINITE_LIFETIME)
 		lt6->ia6t_preferred = 0;
 	else {
 		lt6->ia6t_preferred = time_second;
 		lt6->ia6t_preferred += lt6->ia6t_pltime;
 	}
 }
 
 /*
  * Delete all the routing table entries that use the specified gateway.
  * XXX: this function causes search through all entries of routing table, so
  * it shouldn't be called when acting as a router.
  */
 void
 rt6_flush(gateway, ifp)
 	struct in6_addr *gateway;
 	struct ifnet *ifp;
 {
 	struct radix_node_head *rnh = rt_tables[AF_INET6];
 	int s = splnet();
 
 	/* We'll care only link-local addresses */
 	if (!IN6_IS_ADDR_LINKLOCAL(gateway)) {
 		splx(s);
 		return;
 	}
 	/* XXX: hack for KAME's link-local address kludge */
 	gateway->s6_addr16[1] = htons(ifp->if_index);
 
 	RADIX_NODE_HEAD_LOCK(rnh);
 	rnh->rnh_walktree(rnh, rt6_deleteroute, (void *)gateway);
 	RADIX_NODE_HEAD_UNLOCK(rnh);
 	splx(s);
 }
 
 static int
 rt6_deleteroute(rn, arg)
 	struct radix_node *rn;
 	void *arg;
 {
 #define SIN6(s)	((struct sockaddr_in6 *)s)
 	struct rtentry *rt = (struct rtentry *)rn;
 	struct in6_addr *gate = (struct in6_addr *)arg;
 
 	if (rt->rt_gateway == NULL || rt->rt_gateway->sa_family != AF_INET6)
 		return(0);
 
 	if (!IN6_ARE_ADDR_EQUAL(gate, &SIN6(rt->rt_gateway)->sin6_addr))
 		return(0);
 
 	/*
 	 * Do not delete a static route.
 	 * XXX: this seems to be a bit ad-hoc. Should we consider the
 	 * 'cloned' bit instead?
 	 */
 	if ((rt->rt_flags & RTF_STATIC) != 0)
 		return(0);
 
 	/*
 	 * We delete only host route. This means, in particular, we don't
 	 * delete default route.
 	 */
 	if ((rt->rt_flags & RTF_HOST) == 0)
 		return(0);
 
 	return(rtrequest(RTM_DELETE, rt_key(rt),
 			 rt->rt_gateway, rt_mask(rt), rt->rt_flags, 0));
 #undef SIN6
 }
 
 int
 nd6_setdefaultiface(ifindex)
 	int ifindex;
 {
 	int error = 0;
 
 	if (ifindex < 0 || if_index < ifindex)
 		return(EINVAL);
 
 	if (nd6_defifindex != ifindex) {
 		nd6_defifindex = ifindex;
 		if (nd6_defifindex > 0)
 			nd6_defifp = ifnet_byindex(nd6_defifindex);
 		else
 			nd6_defifp = NULL;
 
 		/*
 		 * If the Default Router List is empty, install a route
 		 * to the specified interface as default or remove the default
 		 * route when the default interface becomes canceled.
 		 * The check for the queue is actually redundant, but
 		 * we do this here to avoid re-install the default route
 		 * if the list is NOT empty.
 		 */
 		if (TAILQ_FIRST(&nd_defrouter) == NULL)
 			defrouter_select();
 
 		/*
 		 * Our current implementation assumes one-to-one maping between
 		 * interfaces and links, so it would be natural to use the
 		 * default interface as the default link.
 		 */
 		scope6_setdefault(nd6_defifp);
 	}
 
 	return(error);
 }