diff --git a/sys/net/pfil.c b/sys/net/pfil.c
index 35ac338095b6..78b87f3179c5 100644
--- a/sys/net/pfil.c
+++ b/sys/net/pfil.c
@@ -1,385 +1,307 @@
 /*	$FreeBSD$ */
 /*	$NetBSD: pfil.c,v 1.20 2001/11/12 23:49:46 lukem Exp $	*/
 
 /*-
  * Copyright (c) 1996 Matthew R. Green
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 3. The name of the author may not be used to endorse or promote products
  *    derived from this software without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
  * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
  * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
  * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
  * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  */
 
 #include <sys/param.h>
 #include <sys/kernel.h>
 #include <sys/errno.h>
+#include <sys/lock.h>
 #include <sys/malloc.h>
+#include <sys/rwlock.h>
 #include <sys/socket.h>
 #include <sys/socketvar.h>
 #include <sys/systm.h>
 #include <sys/condvar.h>
 #include <sys/lock.h>
 #include <sys/mutex.h>
 #include <sys/proc.h>
 #include <sys/queue.h>
 
 #include <net/if.h>
 #include <net/pfil.h>
 
 static struct mtx pfil_global_lock;
 
 MTX_SYSINIT(pfil_heads_lock, &pfil_global_lock, "pfil_head_list lock", MTX_DEF);
 
 static int pfil_list_add(pfil_list_t *, struct packet_filter_hook *, int);
 
 static int pfil_list_remove(pfil_list_t *,
     int (*)(void *, struct mbuf **, struct ifnet *, int, struct inpcb *), void *);
 
 LIST_HEAD(, pfil_head) pfil_head_list =
     LIST_HEAD_INITIALIZER(&pfil_head_list);
 
-static __inline void
-PFIL_RLOCK(struct pfil_head *ph)
-{
-	mtx_lock(&ph->ph_mtx);
-	ph->ph_busy_count++;
-	mtx_unlock(&ph->ph_mtx);
-}
-
-static __inline void
-PFIL_RUNLOCK(struct pfil_head *ph)
-{
-	mtx_lock(&ph->ph_mtx);
-	ph->ph_busy_count--;
-	if (ph->ph_busy_count == 0 && ph->ph_want_write)
-		cv_signal(&ph->ph_cv);
-	mtx_unlock(&ph->ph_mtx);
-}
-
-static __inline void
-PFIL_WLOCK(struct pfil_head *ph)
-{
-	mtx_lock(&ph->ph_mtx);
-	ph->ph_want_write = 1;
-	while (ph->ph_busy_count > 0)
-		cv_wait(&ph->ph_cv, &ph->ph_mtx);
-}
-
-static __inline int
-PFIL_TRY_WLOCK(struct pfil_head *ph)
-{
-	mtx_lock(&ph->ph_mtx);
-	ph->ph_want_write = 1;
-	if (ph->ph_busy_count > 0) {
-		ph->ph_want_write = 0;
-		mtx_unlock(&ph->ph_mtx);
-		return EBUSY;
-	}
-	return 0;
-}
-
-static __inline void
-PFIL_WUNLOCK(struct pfil_head *ph)
-{
-	ph->ph_want_write = 0;
-	cv_signal(&ph->ph_cv);
-	mtx_unlock(&ph->ph_mtx);
-}
-
-#define PFIL_LIST_LOCK() mtx_lock(&pfil_global_lock)
-#define PFIL_LIST_UNLOCK() mtx_unlock(&pfil_global_lock)
-
 /*
  * pfil_run_hooks() runs the specified packet filter hooks.
  */
 int
 pfil_run_hooks(struct pfil_head *ph, struct mbuf **mp, struct ifnet *ifp,
     int dir, struct inpcb *inp)
 {
 	struct packet_filter_hook *pfh;
 	struct mbuf *m = *mp;
 	int rv = 0;
 
-	if (ph->ph_busy_count == -1)
-		return (0);
-	/*
-	 * Prevent packet filtering from starving the modification of
-	 * the packet filters. We would prefer a reader/writer locking
-	 * mechanism with guaranteed ordering, though.
-	 */
-	if (ph->ph_want_write) {
-		m_freem(*mp);
-		*mp = NULL;
-		return (ENOBUFS);
-	}
-
 	PFIL_RLOCK(ph);
+	KASSERT(ph->ph_nhooks >= 0, ("Pfil hook count dropped < 0"));
 	for (pfh = pfil_hook_get(dir, ph); pfh != NULL;
 	     pfh = TAILQ_NEXT(pfh, pfil_link)) {
 		if (pfh->pfil_func != NULL) {
 			rv = (*pfh->pfil_func)(pfh->pfil_arg, &m, ifp, dir, inp);
 			if (rv != 0 || m == NULL)
 				break;
 		}
 	}
 	PFIL_RUNLOCK(ph);
 	
 	*mp = m;
 	return (rv);
 }
 
 /*
  * pfil_head_register() registers a pfil_head with the packet filter
  * hook mechanism.
  */
 int
 pfil_head_register(struct pfil_head *ph)
 {
 	struct pfil_head *lph;
 
 	PFIL_LIST_LOCK();
 	LIST_FOREACH(lph, &pfil_head_list, ph_list)
 		if (ph->ph_type == lph->ph_type &&
 		    ph->ph_un.phu_val == lph->ph_un.phu_val) {
 			PFIL_LIST_UNLOCK();
 			return EEXIST;
 		}
 	PFIL_LIST_UNLOCK();
 
-	if (mtx_initialized(&ph->ph_mtx)) {	/* should not happen */
-		KASSERT((0), ("%s: allready initialized!", __func__));
-		return EBUSY;
-	} else {
-		ph->ph_busy_count = -1;
-		ph->ph_want_write = 1;
-		mtx_init(&ph->ph_mtx, "pfil_head_mtx", NULL, MTX_DEF);
-		cv_init(&ph->ph_cv, "pfil_head_cv");
-		mtx_lock(&ph->ph_mtx);			/* XXX: race? */
-	}
+	rw_init(&ph->ph_mtx, "PFil hook read/write mutex");
+	PFIL_WLOCK(ph);
+	ph->ph_nhooks = 0;
 
 	TAILQ_INIT(&ph->ph_in);
 	TAILQ_INIT(&ph->ph_out);
 
 	PFIL_LIST_LOCK();
 	LIST_INSERT_HEAD(&pfil_head_list, ph, ph_list);
 	PFIL_LIST_UNLOCK();
-	
+
 	PFIL_WUNLOCK(ph);
-	
+
 	return (0);
 }
 
 /*
  * pfil_head_unregister() removes a pfil_head from the packet filter
  * hook mechanism.
  */
 int
 pfil_head_unregister(struct pfil_head *ph)
 {
 	struct packet_filter_hook *pfh, *pfnext;
 		
 	PFIL_LIST_LOCK();
 	/* 
 	 * LIST_REMOVE is safe for unlocked pfil_heads in ph_list.
 	 * No need to WLOCK all of them.
 	 */
 	LIST_REMOVE(ph, ph_list);
 	PFIL_LIST_UNLOCK();
 
-	PFIL_WLOCK(ph);			/* XXX: may sleep (cv_wait)! */
+	PFIL_WLOCK(ph);
 	
 	TAILQ_FOREACH_SAFE(pfh, &ph->ph_in, pfil_link, pfnext)
 		free(pfh, M_IFADDR);
 	TAILQ_FOREACH_SAFE(pfh, &ph->ph_out, pfil_link, pfnext)
 		free(pfh, M_IFADDR);
-	cv_destroy(&ph->ph_cv);
-	mtx_destroy(&ph->ph_mtx);
+	rw_destroy(&ph->ph_mtx);
 	
 	return (0);
 }
 
 /*
  * pfil_head_get() returns the pfil_head for a given key/dlt.
  */
 struct pfil_head *
 pfil_head_get(int type, u_long val)
 {
 	struct pfil_head *ph;
 
 	PFIL_LIST_LOCK();
 	LIST_FOREACH(ph, &pfil_head_list, ph_list)
 		if (ph->ph_type == type && ph->ph_un.phu_val == val)
 			break;
 	PFIL_LIST_UNLOCK();
 	
 	return (ph);
 }
 
 /*
  * pfil_add_hook() adds a function to the packet filter hook.  the
  * flags are:
  *	PFIL_IN		call me on incoming packets
  *	PFIL_OUT	call me on outgoing packets
  *	PFIL_ALL	call me on all of the above
  *	PFIL_WAITOK	OK to call malloc with M_WAITOK.
  */
 int
 pfil_add_hook(int (*func)(void *, struct mbuf **, struct ifnet *, int, struct inpcb *),
     void *arg, int flags, struct pfil_head *ph)
 {
 	struct packet_filter_hook *pfh1 = NULL;
 	struct packet_filter_hook *pfh2 = NULL;
 	int err;
 
 	/* Get memory */
 	if (flags & PFIL_IN) {
 		pfh1 = (struct packet_filter_hook *)malloc(sizeof(*pfh1), 
 		    M_IFADDR, (flags & PFIL_WAITOK) ? M_WAITOK : M_NOWAIT);
 		if (pfh1 == NULL) {
 			err = ENOMEM;
 			goto error;
 		}
 	}
 	if (flags & PFIL_OUT) {
 		pfh2 = (struct packet_filter_hook *)malloc(sizeof(*pfh1),
 		    M_IFADDR, (flags & PFIL_WAITOK) ? M_WAITOK : M_NOWAIT);
 		if (pfh2 == NULL) {
 			err = ENOMEM;
 			goto error;
 		}
 	}
 
 	/* Lock */
-	if (flags & PFIL_WAITOK)
-		PFIL_WLOCK(ph);
-	else {
-		err = PFIL_TRY_WLOCK(ph);
-		if (err)
-			goto error;
-	}
+	PFIL_WLOCK(ph);
 
 	/* Add */
 	if (flags & PFIL_IN) {
 		pfh1->pfil_func = func;
 		pfh1->pfil_arg = arg;
 		err = pfil_list_add(&ph->ph_in, pfh1, flags & ~PFIL_OUT);
 		if (err)
 			goto done;
+		ph->ph_nhooks++;
 	}
 	if (flags & PFIL_OUT) {
 		pfh2->pfil_func = func;
 		pfh2->pfil_arg = arg;
 		err = pfil_list_add(&ph->ph_out, pfh2, flags & ~PFIL_IN);
 		if (err) {
 			if (flags & PFIL_IN)
 				pfil_list_remove(&ph->ph_in, func, arg);
 			goto done;
 		}
+		ph->ph_nhooks++;
 	}
 
-	ph->ph_busy_count = 0;
 	PFIL_WUNLOCK(ph);
 
 	return 0;
 done:
 	PFIL_WUNLOCK(ph);
 error:
 	if (pfh1 != NULL)
 		free(pfh1, M_IFADDR);
 	if (pfh2 != NULL)
 		free(pfh2, M_IFADDR);
 	return err;
 }
 
 /*
  * pfil_remove_hook removes a specific function from the packet filter
  * hook list.
  */
 int
 pfil_remove_hook(int (*func)(void *, struct mbuf **, struct ifnet *, int, struct inpcb *),
     void *arg, int flags, struct pfil_head *ph)
 {
 	int err = 0;
 
-	if (flags & PFIL_WAITOK)
-		PFIL_WLOCK(ph);
-	else {
-		err = PFIL_TRY_WLOCK(ph);
-		if (err)
-			return err;
-	}
+	PFIL_WLOCK(ph);
 
-	if (flags & PFIL_IN)
+	if (flags & PFIL_IN) {
 		err = pfil_list_remove(&ph->ph_in, func, arg);
-	if ((err == 0) && (flags & PFIL_OUT))
+		if (err == 0)
+			ph->ph_nhooks--;
+	}
+	if ((err == 0) && (flags & PFIL_OUT)) {
 		err = pfil_list_remove(&ph->ph_out, func, arg);
-
-	if (TAILQ_EMPTY(&ph->ph_in) && TAILQ_EMPTY(&ph->ph_out))
-		ph->ph_busy_count = -1;
-
+		if (err == 0)
+			ph->ph_nhooks--;
+	}
 	PFIL_WUNLOCK(ph);
 	
 	return err;
 }
 
 static int
 pfil_list_add(pfil_list_t *list, struct packet_filter_hook *pfh1, int flags)
 {
 	struct packet_filter_hook *pfh;
 
 	/*
 	 * First make sure the hook is not already there.
 	 */
 	TAILQ_FOREACH(pfh, list, pfil_link)
 		if (pfh->pfil_func == pfh1->pfil_func &&
 		    pfh->pfil_arg == pfh1->pfil_arg)
 			return EEXIST;
 	/*
 	 * insert the input list in reverse order of the output list
 	 * so that the same path is followed in or out of the kernel.
 	 */
 	if (flags & PFIL_IN)
 		TAILQ_INSERT_HEAD(list, pfh1, pfil_link);
 	else
 		TAILQ_INSERT_TAIL(list, pfh1, pfil_link);
 
 	return 0;
 }
 
 /*
  * pfil_list_remove is an internal function that takes a function off the
  * specified list.
  */
 static int
 pfil_list_remove(pfil_list_t *list,
     int (*func)(void *, struct mbuf **, struct ifnet *, int, struct inpcb *), void *arg)
 {
 	struct packet_filter_hook *pfh;
 
 	TAILQ_FOREACH(pfh, list, pfil_link)
 		if (pfh->pfil_func == func && pfh->pfil_arg == arg) {
 			TAILQ_REMOVE(list, pfh, pfil_link);
 			free(pfh, M_IFADDR);
 			return 0;
 		}
 	return ENOENT;
 }
diff --git a/sys/net/pfil.h b/sys/net/pfil.h
index da14f5b4801c..ccf7a651717a 100644
--- a/sys/net/pfil.h
+++ b/sys/net/pfil.h
@@ -1,113 +1,113 @@
 /*	$FreeBSD$ */
 /*	$NetBSD: pfil.h,v 1.22 2003/06/23 12:57:08 martin Exp $	*/
 
 /*-
  * Copyright (c) 1996 Matthew R. Green
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 3. The name of the author may not be used to endorse or promote products
  *    derived from this software without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
  * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
  * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
  * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
  * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  */
 
 #ifndef _NET_PFIL_H_
 #define _NET_PFIL_H_
 
 #include <sys/systm.h>
 #include <sys/queue.h>
 #include <sys/_lock.h>
 #include <sys/_mutex.h>
-#include <sys/condvar.h>	/* XXX */
+#include <sys/rwlock.h>
 
 struct mbuf;
 struct ifnet;
 struct inpcb;
 
 /*
  * The packet filter hooks are designed for anything to call them to
  * possibly intercept the packet.
  */
 struct packet_filter_hook {
         TAILQ_ENTRY(packet_filter_hook) pfil_link;
 	int	(*pfil_func)(void *, struct mbuf **, struct ifnet *, int, struct inpcb *);
 	void	*pfil_arg;
 	int	pfil_flags;
 };
 
 #define PFIL_IN		0x00000001
 #define PFIL_OUT	0x00000002
 #define PFIL_WAITOK	0x00000004
 #define PFIL_ALL	(PFIL_IN|PFIL_OUT)
 
 typedef	TAILQ_HEAD(pfil_list, packet_filter_hook) pfil_list_t;
 
 #define	PFIL_TYPE_AF		1	/* key is AF_* type */
 #define	PFIL_TYPE_IFNET		2	/* key is ifnet pointer */
 
 struct pfil_head {
 	pfil_list_t	ph_in;
 	pfil_list_t	ph_out;
 	int		ph_type;
-	/*
-	 * Locking: use a busycounter per pfil_head.
-	 * Use ph_busy_count = -1 to indicate pfil_head is empty.
-	 */
-	int		ph_busy_count;	/* count of threads with read lock */
-	int		ph_want_write;	/* want write lock flag */
-	struct cv	ph_cv;		/* for waking up writers */
-	struct mtx	ph_mtx;		/* mutex on locking state */
+	int		ph_nhooks;
+	struct rwlock	ph_mtx;
 	union {
 		u_long		phu_val;
 		void		*phu_ptr;
 	} ph_un;
 #define	ph_af		ph_un.phu_val
 #define	ph_ifnet	ph_un.phu_ptr
 	LIST_ENTRY(pfil_head) ph_list;
 };
 
 int	pfil_run_hooks(struct pfil_head *, struct mbuf **, struct ifnet *,
 	    int, struct inpcb *inp);
 
 int	pfil_add_hook(int (*func)(void *, struct mbuf **,
 	    struct ifnet *, int, struct inpcb *), void *, int, struct pfil_head *);
 int	pfil_remove_hook(int (*func)(void *, struct mbuf **,
 	    struct ifnet *, int, struct inpcb *), void *, int, struct pfil_head *);
 
 int	pfil_head_register(struct pfil_head *);
 int	pfil_head_unregister(struct pfil_head *);
 
 struct pfil_head *pfil_head_get(int, u_long);
 
+#define	PFIL_HOOKED(p) (&(p)->ph_nhooks > 0)
+#define PFIL_RLOCK(p) rw_rlock(&(p)->ph_mtx)
+#define PFIL_WLOCK(p) rw_wlock(&(p)->ph_mtx)
+#define PFIL_RUNLOCK(p) rw_runlock(&(p)->ph_mtx)
+#define PFIL_WUNLOCK(p) rw_wunlock(&(p)->ph_mtx)
+#define PFIL_LIST_LOCK() mtx_lock(&pfil_global_lock)
+#define PFIL_LIST_UNLOCK() mtx_unlock(&pfil_global_lock)
+
 static __inline struct packet_filter_hook *
 pfil_hook_get(int dir, struct pfil_head *ph)
 {
-	KASSERT(ph->ph_busy_count > 0, 
-	    ("pfil_hook_get: called on unbusy pfil_head"));
 	if (dir == PFIL_IN)
 		return (TAILQ_FIRST(&ph->ph_in));
 	else if (dir == PFIL_OUT)
 		return (TAILQ_FIRST(&ph->ph_out));
 	else
 		return (NULL);
 }
 
 #endif /* _NET_PFIL_H_ */
diff --git a/sys/netinet/ip_fastfwd.c b/sys/netinet/ip_fastfwd.c
index 6ad8e9d85fe6..94da84b2645e 100644
--- a/sys/netinet/ip_fastfwd.c
+++ b/sys/netinet/ip_fastfwd.c
@@ -1,607 +1,607 @@
 /*-
  * Copyright (c) 2003 Andre Oppermann, Internet Business Solutions AG
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 3. The name of the author may not be used to endorse or promote
  *    products derived from this software without specific prior written
  *    permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  * $FreeBSD$
  */
 
 /*
  * ip_fastforward gets its speed from processing the forwarded packet to
  * completion (if_output on the other side) without any queues or netisr's.
  * The receiving interface DMAs the packet into memory, the upper half of
  * driver calls ip_fastforward, we do our routing table lookup and directly
  * send it off to the outgoing interface, which DMAs the packet to the
  * network card. The only part of the packet we touch with the CPU is the
  * IP header (unless there are complex firewall rules touching other parts
  * of the packet, but that is up to you). We are essentially limited by bus
  * bandwidth and how fast the network card/driver can set up receives and
  * transmits.
  *
  * We handle basic errors, IP header errors, checksum errors,
  * destination unreachable, fragmentation and fragmentation needed and
  * report them via ICMP to the sender.
  *
  * Else if something is not pure IPv4 unicast forwarding we fall back to
  * the normal ip_input processing path. We should only be called from
  * interfaces connected to the outside world.
  *
  * Firewalling is fully supported including divert, ipfw fwd and ipfilter
  * ipnat and address rewrite.
  *
  * IPSEC is not supported if this host is a tunnel broker. IPSEC is
  * supported for connections to/from local host.
  *
  * We try to do the least expensive (in CPU ops) checks and operations
  * first to catch junk with as little overhead as possible.
  * 
  * We take full advantage of hardware support for IP checksum and
  * fragmentation offloading.
  *
  * We don't do ICMP redirect in the fast forwarding path. I have had my own
  * cases where two core routers with Zebra routing suite would send millions
  * ICMP redirects to connected hosts if the destination router was not the
  * default gateway. In one case it was filling the routing table of a host
  * with approximately 300.000 cloned redirect entries until it ran out of
  * kernel memory. However the networking code proved very robust and it didn't
  * crash or fail in other ways.
  */
 
 /*
  * Many thanks to Matt Thomas of NetBSD for basic structure of ip_flow.c which
  * is being followed here.
  */
 
 #include "opt_ipfw.h"
 #include "opt_ipstealth.h"
 
 #include <sys/param.h>
 #include <sys/systm.h>
 #include <sys/kernel.h>
 #include <sys/malloc.h>
 #include <sys/mbuf.h>
 #include <sys/protosw.h>
 #include <sys/socket.h>
 #include <sys/sysctl.h>
 
 #include <net/pfil.h>
 #include <net/if.h>
 #include <net/if_types.h>
 #include <net/if_var.h>
 #include <net/if_dl.h>
 #include <net/route.h>
 
 #include <netinet/in.h>
 #include <netinet/in_systm.h>
 #include <netinet/in_var.h>
 #include <netinet/ip.h>
 #include <netinet/ip_var.h>
 #include <netinet/ip_icmp.h>
 #include <netinet/ip_options.h>
 
 #include <machine/in_cksum.h>
 
 static int ipfastforward_active = 0;
 SYSCTL_INT(_net_inet_ip, OID_AUTO, fastforwarding, CTLFLAG_RW,
     &ipfastforward_active, 0, "Enable fast IP forwarding");
 
 static struct sockaddr_in *
 ip_findroute(struct route *ro, struct in_addr dest, struct mbuf *m)
 {
 	struct sockaddr_in *dst;
 	struct rtentry *rt;
 
 	/*
 	 * Find route to destination.
 	 */
 	bzero(ro, sizeof(*ro));
 	dst = (struct sockaddr_in *)&ro->ro_dst;
 	dst->sin_family = AF_INET;
 	dst->sin_len = sizeof(*dst);
 	dst->sin_addr.s_addr = dest.s_addr;
 	rtalloc_ign(ro, RTF_CLONING);
 
 	/*
 	 * Route there and interface still up?
 	 */
 	rt = ro->ro_rt;
 	if (rt && (rt->rt_flags & RTF_UP) &&
 	    (rt->rt_ifp->if_flags & IFF_UP) &&
 	    (rt->rt_ifp->if_drv_flags & IFF_DRV_RUNNING)) {
 		if (rt->rt_flags & RTF_GATEWAY)
 			dst = (struct sockaddr_in *)rt->rt_gateway;
 	} else {
 		ipstat.ips_noroute++;
 		ipstat.ips_cantforward++;
 		if (rt)
 			RTFREE(rt);
 		icmp_error(m, ICMP_UNREACH, ICMP_UNREACH_HOST, 0, 0);
 		return NULL;
 	}
 	return dst;
 }
 
 /*
  * Try to forward a packet based on the destination address.
  * This is a fast path optimized for the plain forwarding case.
  * If the packet is handled (and consumed) here then we return 1;
  * otherwise 0 is returned and the packet should be delivered
  * to ip_input for full processing.
  */
 struct mbuf *
 ip_fastforward(struct mbuf *m)
 {
 	struct ip *ip;
 	struct mbuf *m0 = NULL;
 	struct route ro;
 	struct sockaddr_in *dst = NULL;
 	struct ifnet *ifp;
 	struct in_addr odest, dest;
 	u_short sum, ip_len;
 	int error = 0;
 	int hlen, mtu;
 #ifdef IPFIREWALL_FORWARD
 	struct m_tag *fwd_tag;
 #endif
 
 	/*
 	 * Are we active and forwarding packets?
 	 */
 	if (!ipfastforward_active || !ipforwarding)
 		return m;
 
 	M_ASSERTVALID(m);
 	M_ASSERTPKTHDR(m);
 
 	ro.ro_rt = NULL;
 
 	/*
 	 * Step 1: check for packet drop conditions (and sanity checks)
 	 */
 
 	/*
 	 * Is entire packet big enough?
 	 */
 	if (m->m_pkthdr.len < sizeof(struct ip)) {
 		ipstat.ips_tooshort++;
 		goto drop;
 	}
 
 	/*
 	 * Is first mbuf large enough for ip header and is header present?
 	 */
 	if (m->m_len < sizeof (struct ip) &&
 	   (m = m_pullup(m, sizeof (struct ip))) == NULL) {
 		ipstat.ips_toosmall++;
 		return NULL;	/* mbuf already free'd */
 	}
 
 	ip = mtod(m, struct ip *);
 
 	/*
 	 * Is it IPv4?
 	 */
 	if (ip->ip_v != IPVERSION) {
 		ipstat.ips_badvers++;
 		goto drop;
 	}
 
 	/*
 	 * Is IP header length correct and is it in first mbuf?
 	 */
 	hlen = ip->ip_hl << 2;
 	if (hlen < sizeof(struct ip)) {	/* minimum header length */
 		ipstat.ips_badlen++;
 		goto drop;
 	}
 	if (hlen > m->m_len) {
 		if ((m = m_pullup(m, hlen)) == NULL) {
 			ipstat.ips_badhlen++;
 			return NULL;	/* mbuf already free'd */
 		}
 		ip = mtod(m, struct ip *);
 	}
 
 	/*
 	 * Checksum correct?
 	 */
 	if (m->m_pkthdr.csum_flags & CSUM_IP_CHECKED)
 		sum = !(m->m_pkthdr.csum_flags & CSUM_IP_VALID);
 	else {
 		if (hlen == sizeof(struct ip))
 			sum = in_cksum_hdr(ip);
 		else
 			sum = in_cksum(m, hlen);
 	}
 	if (sum) {
 		ipstat.ips_badsum++;
 		goto drop;
 	}
 
 	/*
 	 * Remember that we have checked the IP header and found it valid.
 	 */
 	m->m_pkthdr.csum_flags |= (CSUM_IP_CHECKED | CSUM_IP_VALID);
 
 	ip_len = ntohs(ip->ip_len);
 
 	/*
 	 * Is IP length longer than packet we have got?
 	 */
 	if (m->m_pkthdr.len < ip_len) {
 		ipstat.ips_tooshort++;
 		goto drop;
 	}
 
 	/*
 	 * Is packet longer than IP header tells us? If yes, truncate packet.
 	 */
 	if (m->m_pkthdr.len > ip_len) {
 		if (m->m_len == m->m_pkthdr.len) {
 			m->m_len = ip_len;
 			m->m_pkthdr.len = ip_len;
 		} else
 			m_adj(m, ip_len - m->m_pkthdr.len);
 	}
 
 	/*
 	 * Is packet from or to 127/8?
 	 */
 	if ((ntohl(ip->ip_dst.s_addr) >> IN_CLASSA_NSHIFT) == IN_LOOPBACKNET ||
 	    (ntohl(ip->ip_src.s_addr) >> IN_CLASSA_NSHIFT) == IN_LOOPBACKNET) {
 		ipstat.ips_badaddr++;
 		goto drop;
 	}
 
 #ifdef ALTQ
 	/*
 	 * Is packet dropped by traffic conditioner?
 	 */
 	if (altq_input != NULL && (*altq_input)(m, AF_INET) == 0)
 		goto drop;
 #endif
 
 	/*
 	 * Step 2: fallback conditions to normal ip_input path processing
 	 */
 
 	/*
 	 * Only IP packets without options
 	 */
 	if (ip->ip_hl != (sizeof(struct ip) >> 2)) {
 		if (ip_doopts == 1)
 			return m;
 		else if (ip_doopts == 2) {
 			icmp_error(m, ICMP_UNREACH, ICMP_UNREACH_FILTER_PROHIB,
 				0, 0);
 			return NULL;	/* mbuf already free'd */
 		}
 		/* else ignore IP options and continue */
 	}
 
 	/*
 	 * Only unicast IP, not from loopback, no L2 or IP broadcast,
 	 * no multicast, no INADDR_ANY
 	 *
 	 * XXX: Probably some of these checks could be direct drop
 	 * conditions.  However it is not clear whether there are some
 	 * hacks or obscure behaviours which make it neccessary to
 	 * let ip_input handle it.  We play safe here and let ip_input
 	 * deal with it until it is proven that we can directly drop it.
 	 */
 	if ((m->m_flags & (M_BCAST|M_MCAST)) ||
 	    (m->m_pkthdr.rcvif->if_flags & IFF_LOOPBACK) ||
 	    ntohl(ip->ip_src.s_addr) == (u_long)INADDR_BROADCAST ||
 	    ntohl(ip->ip_dst.s_addr) == (u_long)INADDR_BROADCAST ||
 	    IN_MULTICAST(ntohl(ip->ip_src.s_addr)) ||
 	    IN_MULTICAST(ntohl(ip->ip_dst.s_addr)) ||
 	    ip->ip_src.s_addr == INADDR_ANY ||
 	    ip->ip_dst.s_addr == INADDR_ANY )
 		return m;
 
 	/*
 	 * Is it for a local address on this host?
 	 */
 	if (in_localip(ip->ip_dst))
 		return m;
 
 	ipstat.ips_total++;
 
 	/*
 	 * Step 3: incoming packet firewall processing
 	 */
 
 	/*
 	 * Convert to host representation
 	 */
 	ip->ip_len = ntohs(ip->ip_len);
 	ip->ip_off = ntohs(ip->ip_off);
 
 	odest.s_addr = dest.s_addr = ip->ip_dst.s_addr;
 
 	/*
 	 * Run through list of ipfilter hooks for input packets
 	 */
-	if (inet_pfil_hook.ph_busy_count == -1)
+	if (!PFIL_HOOKED(&inet_pfil_hook))
 		goto passin;
 
 	if (pfil_run_hooks(&inet_pfil_hook, &m, m->m_pkthdr.rcvif, PFIL_IN, NULL) ||
 	    m == NULL)
 		goto drop;
 
 	M_ASSERTVALID(m);
 	M_ASSERTPKTHDR(m);
 
 	ip = mtod(m, struct ip *);	/* m may have changed by pfil hook */
 	dest.s_addr = ip->ip_dst.s_addr;
 
 	/*
 	 * Destination address changed?
 	 */
 	if (odest.s_addr != dest.s_addr) {
 		/*
 		 * Is it now for a local address on this host?
 		 */
 		if (in_localip(dest))
 			goto forwardlocal;
 		/*
 		 * Go on with new destination address
 		 */
 	}
 #ifdef IPFIREWALL_FORWARD
 	if (m->m_flags & M_FASTFWD_OURS) {
 		/*
 		 * ipfw changed it for a local address on this host.
 		 */
 		goto forwardlocal;
 	}
 #endif /* IPFIREWALL_FORWARD */
 
 passin:
 	/*
 	 * Step 4: decrement TTL and look up route
 	 */
 
 	/*
 	 * Check TTL
 	 */
 #ifdef IPSTEALTH
 	if (!ipstealth) {
 #endif
 	if (ip->ip_ttl <= IPTTLDEC) {
 		icmp_error(m, ICMP_TIMXCEED, ICMP_TIMXCEED_INTRANS, 0, 0);
 		return NULL;	/* mbuf already free'd */
 	}
 
 	/*
 	 * Decrement the TTL and incrementally change the IP header checksum.
 	 * Don't bother doing this with hw checksum offloading, it's faster
 	 * doing it right here.
 	 */
 	ip->ip_ttl -= IPTTLDEC;
 	if (ip->ip_sum >= (u_int16_t) ~htons(IPTTLDEC << 8))
 		ip->ip_sum -= ~htons(IPTTLDEC << 8);
 	else
 		ip->ip_sum += htons(IPTTLDEC << 8);
 #ifdef IPSTEALTH
 	}
 #endif
 
 	/*
 	 * Find route to destination.
 	 */
 	if ((dst = ip_findroute(&ro, dest, m)) == NULL)
 		return NULL;	/* icmp unreach already sent */
 	ifp = ro.ro_rt->rt_ifp;
 
 	/*
 	 * Immediately drop blackholed traffic.
 	 */
 	if (ro.ro_rt->rt_flags & RTF_BLACKHOLE)
 		goto drop;
 
 	/*
 	 * Step 5: outgoing firewall packet processing
 	 */
 
 	/*
 	 * Run through list of hooks for output packets.
 	 */
-	if (inet_pfil_hook.ph_busy_count == -1)
+	if (!PFIL_HOOKED(&inet_pfil_hook))
 		goto passout;
 
 	if (pfil_run_hooks(&inet_pfil_hook, &m, ifp, PFIL_OUT, NULL) || m == NULL) {
 		goto drop;
 	}
 
 	M_ASSERTVALID(m);
 	M_ASSERTPKTHDR(m);
 
 	ip = mtod(m, struct ip *);
 	dest.s_addr = ip->ip_dst.s_addr;
 
 	/*
 	 * Destination address changed?
 	 */
 #ifndef IPFIREWALL_FORWARD
 	if (odest.s_addr != dest.s_addr) {
 #else
 	fwd_tag = m_tag_find(m, PACKET_TAG_IPFORWARD, NULL);
 	if (odest.s_addr != dest.s_addr || fwd_tag != NULL) {
 #endif /* IPFIREWALL_FORWARD */
 		/*
 		 * Is it now for a local address on this host?
 		 */
 #ifndef IPFIREWALL_FORWARD
 		if (in_localip(dest)) {
 #else
 		if (m->m_flags & M_FASTFWD_OURS || in_localip(dest)) {
 #endif /* IPFIREWALL_FORWARD */
 forwardlocal:
 			/*
 			 * Return packet for processing by ip_input().
 			 * Keep host byte order as expected at ip_input's
 			 * "ours"-label.
 			 */
 			m->m_flags |= M_FASTFWD_OURS;
 			if (ro.ro_rt)
 				RTFREE(ro.ro_rt);
 			return m;
 		}
 		/*
 		 * Redo route lookup with new destination address
 		 */
 #ifdef IPFIREWALL_FORWARD
 		if (fwd_tag) {
 			if (!in_localip(ip->ip_src) && !in_localaddr(ip->ip_dst))
 				dest.s_addr = ((struct sockaddr_in *)(fwd_tag+1))->sin_addr.s_addr;
 			m_tag_delete(m, fwd_tag);
 		}
 #endif /* IPFIREWALL_FORWARD */
 		RTFREE(ro.ro_rt);
 		if ((dst = ip_findroute(&ro, dest, m)) == NULL)
 			return NULL;	/* icmp unreach already sent */
 		ifp = ro.ro_rt->rt_ifp;
 	}
 
 passout:
 	/*
 	 * Step 6: send off the packet
 	 */
 
 	/*
 	 * Check if route is dampned (when ARP is unable to resolve)
 	 */
 	if ((ro.ro_rt->rt_flags & RTF_REJECT) &&
 	    ro.ro_rt->rt_rmx.rmx_expire >= time_uptime) {
 		icmp_error(m, ICMP_UNREACH, ICMP_UNREACH_HOST, 0, 0);
 		goto consumed;
 	}
 
 #ifndef ALTQ
 	/*
 	 * Check if there is enough space in the interface queue
 	 */
 	if ((ifp->if_snd.ifq_len + ip->ip_len / ifp->if_mtu + 1) >=
 	    ifp->if_snd.ifq_maxlen) {
 		ipstat.ips_odropped++;
 		/* would send source quench here but that is depreciated */
 		goto drop;
 	}
 #endif
 
 	/*
 	 * Check if media link state of interface is not down
 	 */
 	if (ifp->if_link_state == LINK_STATE_DOWN) {
 		icmp_error(m, ICMP_UNREACH, ICMP_UNREACH_HOST, 0, 0);
 		goto consumed;
 	}
 
 	/*
 	 * Check if packet fits MTU or if hardware will fragment for us
 	 */
 	if (ro.ro_rt->rt_rmx.rmx_mtu)
 		mtu = min(ro.ro_rt->rt_rmx.rmx_mtu, ifp->if_mtu);
 	else
 		mtu = ifp->if_mtu;
 
 	if (ip->ip_len <= mtu ||
 	    (ifp->if_hwassist & CSUM_FRAGMENT && (ip->ip_off & IP_DF) == 0)) {
 		/*
 		 * Restore packet header fields to original values
 		 */
 		ip->ip_len = htons(ip->ip_len);
 		ip->ip_off = htons(ip->ip_off);
 		/*
 		 * Send off the packet via outgoing interface
 		 */
 		error = (*ifp->if_output)(ifp, m,
 				(struct sockaddr *)dst, ro.ro_rt);
 	} else {
 		/*
 		 * Handle EMSGSIZE with icmp reply needfrag for TCP MTU discovery
 		 */
 		if (ip->ip_off & IP_DF) {
 			ipstat.ips_cantfrag++;
 			icmp_error(m, ICMP_UNREACH, ICMP_UNREACH_NEEDFRAG,
 				0, mtu);
 			goto consumed;
 		} else {
 			/*
 			 * We have to fragment the packet
 			 */
 			m->m_pkthdr.csum_flags |= CSUM_IP;
 			/*
 			 * ip_fragment expects ip_len and ip_off in host byte
 			 * order but returns all packets in network byte order
 			 */
 			if (ip_fragment(ip, &m, mtu, ifp->if_hwassist,
 					(~ifp->if_hwassist & CSUM_DELAY_IP))) {
 				goto drop;
 			}
 			KASSERT(m != NULL, ("null mbuf and no error"));
 			/*
 			 * Send off the fragments via outgoing interface
 			 */
 			error = 0;
 			do {
 				m0 = m->m_nextpkt;
 				m->m_nextpkt = NULL;
 
 				error = (*ifp->if_output)(ifp, m,
 					(struct sockaddr *)dst, ro.ro_rt);
 				if (error)
 					break;
 			} while ((m = m0) != NULL);
 			if (error) {
 				/* Reclaim remaining fragments */
 				for (m = m0; m; m = m0) {
 					m0 = m->m_nextpkt;
 					m_freem(m);
 				}
 			} else
 				ipstat.ips_fragmented++;
 		}
 	}
 
 	if (error != 0)
 		ipstat.ips_odropped++;
 	else {
 		ro.ro_rt->rt_rmx.rmx_pksent++;
 		ipstat.ips_forward++;
 		ipstat.ips_fastforward++;
 	}
 consumed:
 	RTFREE(ro.ro_rt);
 	return NULL;
 drop:
 	if (m)
 		m_freem(m);
 	if (ro.ro_rt)
 		RTFREE(ro.ro_rt);
 	return NULL;
 }
diff --git a/sys/netinet/ip_fw2.c b/sys/netinet/ip_fw2.c
index e28695198c73..ea43ece3e42d 100644
--- a/sys/netinet/ip_fw2.c
+++ b/sys/netinet/ip_fw2.c
@@ -1,4262 +1,4227 @@
 /*-
  * Copyright (c) 2002 Luigi Rizzo, Universita` di Pisa
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  * $FreeBSD$
  */
 
 #define        DEB(x)
 #define        DDB(x) x
 
 /*
  * Implement IP packet firewall (new version)
  */
 
 #if !defined(KLD_MODULE)
 #include "opt_ipfw.h"
 #include "opt_ip6fw.h"
 #include "opt_ipdn.h"
 #include "opt_inet.h"
 #ifndef INET
 #error IPFIREWALL requires INET.
 #endif /* INET */
 #endif
 #include "opt_inet6.h"
 #include "opt_ipsec.h"
 
 #include <sys/param.h>
 #include <sys/systm.h>
 #include <sys/condvar.h>
 #include <sys/malloc.h>
 #include <sys/mbuf.h>
 #include <sys/kernel.h>
+#include <sys/lock.h>
 #include <sys/jail.h>
 #include <sys/module.h>
 #include <sys/proc.h>
+#include <sys/rwlock.h>
 #include <sys/socket.h>
 #include <sys/socketvar.h>
 #include <sys/sysctl.h>
 #include <sys/syslog.h>
 #include <sys/ucred.h>
 #include <net/if.h>
 #include <net/radix.h>
 #include <net/route.h>
 #include <netinet/in.h>
 #include <netinet/in_systm.h>
 #include <netinet/in_var.h>
 #include <netinet/in_pcb.h>
 #include <netinet/ip.h>
 #include <netinet/ip_var.h>
 #include <netinet/ip_icmp.h>
 #include <netinet/ip_fw.h>
 #include <netinet/ip_divert.h>
 #include <netinet/ip_dummynet.h>
 #include <netinet/tcp.h>
 #include <netinet/tcp_timer.h>
 #include <netinet/tcp_var.h>
 #include <netinet/tcpip.h>
 #include <netinet/udp.h>
 #include <netinet/udp_var.h>
 
 #include <netgraph/ng_ipfw.h>
 
 #include <altq/if_altq.h>
 
 #ifdef IPSEC
 #include <netinet6/ipsec.h>
 #endif
 
 #include <netinet/ip6.h>
 #include <netinet/icmp6.h>
 #ifdef INET6
 #include <netinet6/scope6_var.h>
 #endif
 
 #include <netinet/if_ether.h> /* XXX for ETHERTYPE_IP */
 
 #include <machine/in_cksum.h>	/* XXX for in_cksum */
 
 /*
  * set_disable contains one bit per set value (0..31).
  * If the bit is set, all rules with the corresponding set
  * are disabled. Set RESVD_SET(31) is reserved for the default rule
  * and rules that are not deleted by the flush command,
  * and CANNOT be disabled.
  * Rules in set RESVD_SET can only be deleted explicitly.
  */
 static u_int32_t set_disable;
 
 static int fw_verbose;
 static int verbose_limit;
 
 static struct callout ipfw_timeout;
 static uma_zone_t ipfw_dyn_rule_zone;
 #define	IPFW_DEFAULT_RULE	65535
 
 /*
  * Data structure to cache our ucred related
  * information. This structure only gets used if
  * the user specified UID/GID based constraints in
  * a firewall rule.
  */
 struct ip_fw_ugid {
 	gid_t		fw_groups[NGROUPS];
 	int		fw_ngroups;
 	uid_t		fw_uid;
 	int		fw_prid;
 };
 
 #define	IPFW_TABLES_MAX		128
 struct ip_fw_chain {
 	struct ip_fw	*rules;		/* list of rules */
 	struct ip_fw	*reap;		/* list of rules to reap */
 	struct radix_node_head *tables[IPFW_TABLES_MAX];
-	struct mtx	mtx;		/* lock guarding rule list */
-	int		busy_count;	/* busy count for rw locks */
-	int		want_write;
-	struct cv	cv;
+	struct rwlock	rwmtx;
 };
 #define	IPFW_LOCK_INIT(_chain) \
-	mtx_init(&(_chain)->mtx, "IPFW static rules", NULL, \
-		MTX_DEF | MTX_RECURSE)
-#define	IPFW_LOCK_DESTROY(_chain)	mtx_destroy(&(_chain)->mtx)
+	rw_init(&(_chain)->rwmtx, "IPFW static rules")
+#define	IPFW_LOCK_DESTROY(_chain)	rw_destroy(&(_chain)->rwmtx)
 #define	IPFW_WLOCK_ASSERT(_chain)	do {				\
-	mtx_assert(&(_chain)->mtx, MA_OWNED);				\
+	rw_assert(rw, RA_WLOCKED);					\
 	NET_ASSERT_GIANT();						\
 } while (0)
 
-static __inline void
-IPFW_RLOCK(struct ip_fw_chain *chain)
-{
-	mtx_lock(&chain->mtx);
-	chain->busy_count++;
-	mtx_unlock(&chain->mtx);
-}
-
-static __inline void
-IPFW_RUNLOCK(struct ip_fw_chain *chain)
-{
-	mtx_lock(&chain->mtx);
-	chain->busy_count--;
-	if (chain->busy_count == 0 && chain->want_write)
-		cv_signal(&chain->cv);
-	mtx_unlock(&chain->mtx);
-}
-
-static __inline void
-IPFW_WLOCK(struct ip_fw_chain *chain)
-{
-	mtx_lock(&chain->mtx);
-	chain->want_write++;
-	while (chain->busy_count > 0)
-		cv_wait(&chain->cv, &chain->mtx);
-}
-
-static __inline void
-IPFW_WUNLOCK(struct ip_fw_chain *chain)
-{
-	chain->want_write--;
-	cv_signal(&chain->cv);
-	mtx_unlock(&chain->mtx);
-}
+#define IPFW_RLOCK(p) rw_rlock(&(p)->rwmtx)
+#define IPFW_RUNLOCK(p) rw_runlock(&(p)->rwmtx)
+#define IPFW_WLOCK(p) rw_wlock(&(p)->rwmtx)
+#define IPFW_WUNLOCK(p) rw_wunlock(&(p)->rwmtx)
 
 /*
  * list of rules for layer 3
  */
 static struct ip_fw_chain layer3_chain;
 
 MALLOC_DEFINE(M_IPFW, "IpFw/IpAcct", "IpFw/IpAcct chain's");
 MALLOC_DEFINE(M_IPFW_TBL, "ipfw_tbl", "IpFw tables");
 
 struct table_entry {
 	struct radix_node	rn[2];
 	struct sockaddr_in	addr, mask;
 	u_int32_t		value;
 };
 
 static int fw_debug = 1;
 static int autoinc_step = 100; /* bounded to 1..1000 in add_rule() */
 
 #ifdef SYSCTL_NODE
 SYSCTL_NODE(_net_inet_ip, OID_AUTO, fw, CTLFLAG_RW, 0, "Firewall");
 SYSCTL_INT(_net_inet_ip_fw, OID_AUTO, enable,
     CTLFLAG_RW | CTLFLAG_SECURE3,
     &fw_enable, 0, "Enable ipfw");
 SYSCTL_INT(_net_inet_ip_fw, OID_AUTO, autoinc_step, CTLFLAG_RW,
     &autoinc_step, 0, "Rule number autincrement step");
 SYSCTL_INT(_net_inet_ip_fw, OID_AUTO, one_pass,
     CTLFLAG_RW | CTLFLAG_SECURE3,
     &fw_one_pass, 0,
     "Only do a single pass through ipfw when using dummynet(4)");
 SYSCTL_INT(_net_inet_ip_fw, OID_AUTO, debug, CTLFLAG_RW,
     &fw_debug, 0, "Enable printing of debug ip_fw statements");
 SYSCTL_INT(_net_inet_ip_fw, OID_AUTO, verbose,
     CTLFLAG_RW | CTLFLAG_SECURE3,
     &fw_verbose, 0, "Log matches to ipfw rules");
 SYSCTL_INT(_net_inet_ip_fw, OID_AUTO, verbose_limit, CTLFLAG_RW,
     &verbose_limit, 0, "Set upper limit of matches of ipfw rules logged");
 
 /*
  * Description of dynamic rules.
  *
  * Dynamic rules are stored in lists accessed through a hash table
  * (ipfw_dyn_v) whose size is curr_dyn_buckets. This value can
  * be modified through the sysctl variable dyn_buckets which is
  * updated when the table becomes empty.
  *
  * XXX currently there is only one list, ipfw_dyn.
  *
  * When a packet is received, its address fields are first masked
  * with the mask defined for the rule, then hashed, then matched
  * against the entries in the corresponding list.
  * Dynamic rules can be used for different purposes:
  *  + stateful rules;
  *  + enforcing limits on the number of sessions;
  *  + in-kernel NAT (not implemented yet)
  *
  * The lifetime of dynamic rules is regulated by dyn_*_lifetime,
  * measured in seconds and depending on the flags.
  *
  * The total number of dynamic rules is stored in dyn_count.
  * The max number of dynamic rules is dyn_max. When we reach
  * the maximum number of rules we do not create anymore. This is
  * done to avoid consuming too much memory, but also too much
  * time when searching on each packet (ideally, we should try instead
  * to put a limit on the length of the list on each bucket...).
  *
  * Each dynamic rule holds a pointer to the parent ipfw rule so
  * we know what action to perform. Dynamic rules are removed when
  * the parent rule is deleted. XXX we should make them survive.
  *
  * There are some limitations with dynamic rules -- we do not
  * obey the 'randomized match', and we do not do multiple
  * passes through the firewall. XXX check the latter!!!
  */
 static ipfw_dyn_rule **ipfw_dyn_v = NULL;
 static u_int32_t dyn_buckets = 256; /* must be power of 2 */
 static u_int32_t curr_dyn_buckets = 256; /* must be power of 2 */
 
 static struct mtx ipfw_dyn_mtx;		/* mutex guarding dynamic rules */
 #define	IPFW_DYN_LOCK_INIT() \
 	mtx_init(&ipfw_dyn_mtx, "IPFW dynamic rules", NULL, MTX_DEF)
 #define	IPFW_DYN_LOCK_DESTROY()	mtx_destroy(&ipfw_dyn_mtx)
 #define	IPFW_DYN_LOCK()		mtx_lock(&ipfw_dyn_mtx)
 #define	IPFW_DYN_UNLOCK()	mtx_unlock(&ipfw_dyn_mtx)
 #define	IPFW_DYN_LOCK_ASSERT()	mtx_assert(&ipfw_dyn_mtx, MA_OWNED)
 
 /*
  * Timeouts for various events in handing dynamic rules.
  */
 static u_int32_t dyn_ack_lifetime = 300;
 static u_int32_t dyn_syn_lifetime = 20;
 static u_int32_t dyn_fin_lifetime = 1;
 static u_int32_t dyn_rst_lifetime = 1;
 static u_int32_t dyn_udp_lifetime = 10;
 static u_int32_t dyn_short_lifetime = 5;
 
 /*
  * Keepalives are sent if dyn_keepalive is set. They are sent every
  * dyn_keepalive_period seconds, in the last dyn_keepalive_interval
  * seconds of lifetime of a rule.
  * dyn_rst_lifetime and dyn_fin_lifetime should be strictly lower
  * than dyn_keepalive_period.
  */
 
 static u_int32_t dyn_keepalive_interval = 20;
 static u_int32_t dyn_keepalive_period = 5;
 static u_int32_t dyn_keepalive = 1;	/* do send keepalives */
 
 static u_int32_t static_count;	/* # of static rules */
 static u_int32_t static_len;	/* size in bytes of static rules */
 static u_int32_t dyn_count;		/* # of dynamic rules */
 static u_int32_t dyn_max = 4096;	/* max # of dynamic rules */
 
 SYSCTL_INT(_net_inet_ip_fw, OID_AUTO, dyn_buckets, CTLFLAG_RW,
     &dyn_buckets, 0, "Number of dyn. buckets");
 SYSCTL_INT(_net_inet_ip_fw, OID_AUTO, curr_dyn_buckets, CTLFLAG_RD,
     &curr_dyn_buckets, 0, "Current Number of dyn. buckets");
 SYSCTL_INT(_net_inet_ip_fw, OID_AUTO, dyn_count, CTLFLAG_RD,
     &dyn_count, 0, "Number of dyn. rules");
 SYSCTL_INT(_net_inet_ip_fw, OID_AUTO, dyn_max, CTLFLAG_RW,
     &dyn_max, 0, "Max number of dyn. rules");
 SYSCTL_INT(_net_inet_ip_fw, OID_AUTO, static_count, CTLFLAG_RD,
     &static_count, 0, "Number of static rules");
 SYSCTL_INT(_net_inet_ip_fw, OID_AUTO, dyn_ack_lifetime, CTLFLAG_RW,
     &dyn_ack_lifetime, 0, "Lifetime of dyn. rules for acks");
 SYSCTL_INT(_net_inet_ip_fw, OID_AUTO, dyn_syn_lifetime, CTLFLAG_RW,
     &dyn_syn_lifetime, 0, "Lifetime of dyn. rules for syn");
 SYSCTL_INT(_net_inet_ip_fw, OID_AUTO, dyn_fin_lifetime, CTLFLAG_RW,
     &dyn_fin_lifetime, 0, "Lifetime of dyn. rules for fin");
 SYSCTL_INT(_net_inet_ip_fw, OID_AUTO, dyn_rst_lifetime, CTLFLAG_RW,
     &dyn_rst_lifetime, 0, "Lifetime of dyn. rules for rst");
 SYSCTL_INT(_net_inet_ip_fw, OID_AUTO, dyn_udp_lifetime, CTLFLAG_RW,
     &dyn_udp_lifetime, 0, "Lifetime of dyn. rules for UDP");
 SYSCTL_INT(_net_inet_ip_fw, OID_AUTO, dyn_short_lifetime, CTLFLAG_RW,
     &dyn_short_lifetime, 0, "Lifetime of dyn. rules for other situations");
 SYSCTL_INT(_net_inet_ip_fw, OID_AUTO, dyn_keepalive, CTLFLAG_RW,
     &dyn_keepalive, 0, "Enable keepalives for dyn. rules");
 
 #ifdef INET6
 /*
  * IPv6 specific variables
  */
 SYSCTL_DECL(_net_inet6_ip6);
 
 static struct sysctl_ctx_list ip6_fw_sysctl_ctx;
 static struct sysctl_oid *ip6_fw_sysctl_tree;
 #endif /* INET6 */
 #endif /* SYSCTL_NODE */
 
 static int fw_deny_unknown_exthdrs = 1;
 
 
 /*
  * L3HDR maps an ipv4 pointer into a layer3 header pointer of type T
  * Other macros just cast void * into the appropriate type
  */
 #define	L3HDR(T, ip)	((T *)((u_int32_t *)(ip) + (ip)->ip_hl))
 #define	TCP(p)		((struct tcphdr *)(p))
 #define	UDP(p)		((struct udphdr *)(p))
 #define	ICMP(p)		((struct icmphdr *)(p))
 #define	ICMP6(p)	((struct icmp6_hdr *)(p))
 
 static __inline int
 icmptype_match(struct icmphdr *icmp, ipfw_insn_u32 *cmd)
 {
 	int type = icmp->icmp_type;
 
 	return (type <= ICMP_MAXTYPE && (cmd->d[0] & (1<<type)) );
 }
 
 #define TT	( (1 << ICMP_ECHO) | (1 << ICMP_ROUTERSOLICIT) | \
     (1 << ICMP_TSTAMP) | (1 << ICMP_IREQ) | (1 << ICMP_MASKREQ) )
 
 static int
 is_icmp_query(struct icmphdr *icmp)
 {
 	int type = icmp->icmp_type;
 
 	return (type <= ICMP_MAXTYPE && (TT & (1<<type)) );
 }
 #undef TT
 
 /*
  * The following checks use two arrays of 8 or 16 bits to store the
  * bits that we want set or clear, respectively. They are in the
  * low and high half of cmd->arg1 or cmd->d[0].
  *
  * We scan options and store the bits we find set. We succeed if
  *
  *	(want_set & ~bits) == 0 && (want_clear & ~bits) == want_clear
  *
  * The code is sometimes optimized not to store additional variables.
  */
 
 static int
 flags_match(ipfw_insn *cmd, u_int8_t bits)
 {
 	u_char want_clear;
 	bits = ~bits;
 
 	if ( ((cmd->arg1 & 0xff) & bits) != 0)
 		return 0; /* some bits we want set were clear */
 	want_clear = (cmd->arg1 >> 8) & 0xff;
 	if ( (want_clear & bits) != want_clear)
 		return 0; /* some bits we want clear were set */
 	return 1;
 }
 
 static int
 ipopts_match(struct ip *ip, ipfw_insn *cmd)
 {
 	int optlen, bits = 0;
 	u_char *cp = (u_char *)(ip + 1);
 	int x = (ip->ip_hl << 2) - sizeof (struct ip);
 
 	for (; x > 0; x -= optlen, cp += optlen) {
 		int opt = cp[IPOPT_OPTVAL];
 
 		if (opt == IPOPT_EOL)
 			break;
 		if (opt == IPOPT_NOP)
 			optlen = 1;
 		else {
 			optlen = cp[IPOPT_OLEN];
 			if (optlen <= 0 || optlen > x)
 				return 0; /* invalid or truncated */
 		}
 		switch (opt) {
 
 		default:
 			break;
 
 		case IPOPT_LSRR:
 			bits |= IP_FW_IPOPT_LSRR;
 			break;
 
 		case IPOPT_SSRR:
 			bits |= IP_FW_IPOPT_SSRR;
 			break;
 
 		case IPOPT_RR:
 			bits |= IP_FW_IPOPT_RR;
 			break;
 
 		case IPOPT_TS:
 			bits |= IP_FW_IPOPT_TS;
 			break;
 		}
 	}
 	return (flags_match(cmd, bits));
 }
 
 static int
 tcpopts_match(struct tcphdr *tcp, ipfw_insn *cmd)
 {
 	int optlen, bits = 0;
 	u_char *cp = (u_char *)(tcp + 1);
 	int x = (tcp->th_off << 2) - sizeof(struct tcphdr);
 
 	for (; x > 0; x -= optlen, cp += optlen) {
 		int opt = cp[0];
 		if (opt == TCPOPT_EOL)
 			break;
 		if (opt == TCPOPT_NOP)
 			optlen = 1;
 		else {
 			optlen = cp[1];
 			if (optlen <= 0)
 				break;
 		}
 
 		switch (opt) {
 
 		default:
 			break;
 
 		case TCPOPT_MAXSEG:
 			bits |= IP_FW_TCPOPT_MSS;
 			break;
 
 		case TCPOPT_WINDOW:
 			bits |= IP_FW_TCPOPT_WINDOW;
 			break;
 
 		case TCPOPT_SACK_PERMITTED:
 		case TCPOPT_SACK:
 			bits |= IP_FW_TCPOPT_SACK;
 			break;
 
 		case TCPOPT_TIMESTAMP:
 			bits |= IP_FW_TCPOPT_TS;
 			break;
 
 		}
 	}
 	return (flags_match(cmd, bits));
 }
 
 static int
 iface_match(struct ifnet *ifp, ipfw_insn_if *cmd)
 {
 	if (ifp == NULL)	/* no iface with this packet, match fails */
 		return 0;
 	/* Check by name or by IP address */
 	if (cmd->name[0] != '\0') { /* match by name */
 		/* Check name */
 		if (cmd->p.glob) {
 			if (fnmatch(cmd->name, ifp->if_xname, 0) == 0)
 				return(1);
 		} else {
 			if (strncmp(ifp->if_xname, cmd->name, IFNAMSIZ) == 0)
 				return(1);
 		}
 	} else {
 		struct ifaddr *ia;
 
 		/* XXX lock? */
 		TAILQ_FOREACH(ia, &ifp->if_addrhead, ifa_link) {
 			if (ia->ifa_addr == NULL)
 				continue;
 			if (ia->ifa_addr->sa_family != AF_INET)
 				continue;
 			if (cmd->p.ip.s_addr == ((struct sockaddr_in *)
 			    (ia->ifa_addr))->sin_addr.s_addr)
 				return(1);	/* match */
 		}
 	}
 	return(0);	/* no match, fail ... */
 }
 
 /*
  * The verify_path function checks if a route to the src exists and
  * if it is reachable via ifp (when provided).
  * 
  * The 'verrevpath' option checks that the interface that an IP packet
  * arrives on is the same interface that traffic destined for the
  * packet's source address would be routed out of.  The 'versrcreach'
  * option just checks that the source address is reachable via any route
  * (except default) in the routing table.  These two are a measure to block
  * forged packets.  This is also commonly known as "anti-spoofing" or Unicast
  * Reverse Path Forwarding (Unicast RFP) in Cisco-ese. The name of the knobs
  * is purposely reminiscent of the Cisco IOS command,
  *
  *   ip verify unicast reverse-path
  *   ip verify unicast source reachable-via any
  *
  * which implements the same functionality. But note that syntax is
  * misleading. The check may be performed on all IP packets whether unicast,
  * multicast, or broadcast.
  */
 static int
 verify_path(struct in_addr src, struct ifnet *ifp)
 {
 	struct route ro;
 	struct sockaddr_in *dst;
 
 	bzero(&ro, sizeof(ro));
 
 	dst = (struct sockaddr_in *)&(ro.ro_dst);
 	dst->sin_family = AF_INET;
 	dst->sin_len = sizeof(*dst);
 	dst->sin_addr = src;
 	rtalloc_ign(&ro, RTF_CLONING);
 
 	if (ro.ro_rt == NULL)
 		return 0;
 
 	/*
 	 * If ifp is provided, check for equality with rtentry.
 	 * We should use rt->rt_ifa->ifa_ifp, instead of rt->rt_ifp,
 	 * in order to pass packets injected back by if_simloop():
 	 * if useloopback == 1 routing entry (via lo0) for our own address
 	 * may exist, so we need to handle routing assymetry.
 	 */
 	if (ifp != NULL && ro.ro_rt->rt_ifa->ifa_ifp != ifp) {
 		RTFREE(ro.ro_rt);
 		return 0;
 	}
 
 	/* if no ifp provided, check if rtentry is not default route */
 	if (ifp == NULL &&
 	     satosin(rt_key(ro.ro_rt))->sin_addr.s_addr == INADDR_ANY) {
 		RTFREE(ro.ro_rt);
 		return 0;
 	}
 
 	/* or if this is a blackhole/reject route */
 	if (ifp == NULL && ro.ro_rt->rt_flags & (RTF_REJECT|RTF_BLACKHOLE)) {
 		RTFREE(ro.ro_rt);
 		return 0;
 	}
 
 	/* found valid route */
 	RTFREE(ro.ro_rt);
 	return 1;
 }
 
 #ifdef INET6
 /*
  * ipv6 specific rules here...
  */
 static __inline int
 icmp6type_match (int type, ipfw_insn_u32 *cmd)
 {
 	return (type <= ICMP6_MAXTYPE && (cmd->d[type/32] & (1<<(type%32)) ) );
 }
 
 static int
 flow6id_match( int curr_flow, ipfw_insn_u32 *cmd )
 {
 	int i;
 	for (i=0; i <= cmd->o.arg1; ++i )
 		if (curr_flow == cmd->d[i] )
 			return 1;
 	return 0;
 }
 
 /* support for IP6_*_ME opcodes */
 static int
 search_ip6_addr_net (struct in6_addr * ip6_addr)
 {
 	struct ifnet *mdc;
 	struct ifaddr *mdc2;
 	struct in6_ifaddr *fdm;
 	struct in6_addr copia;
 
 	TAILQ_FOREACH(mdc, &ifnet, if_link)
 		for (mdc2 = mdc->if_addrlist.tqh_first; mdc2;
 		    mdc2 = mdc2->ifa_list.tqe_next) {
 			if (!mdc2->ifa_addr)
 				continue;
 			if (mdc2->ifa_addr->sa_family == AF_INET6) {
 				fdm = (struct in6_ifaddr *)mdc2;
 				copia = fdm->ia_addr.sin6_addr;
 				/* need for leaving scope_id in the sock_addr */
 				in6_clearscope(&copia);
 				if (IN6_ARE_ADDR_EQUAL(ip6_addr, &copia))
 					return 1;
 			}
 		}
 	return 0;
 }
 
 static int
 verify_path6(struct in6_addr *src, struct ifnet *ifp)
 {
 	struct route_in6 ro;
 	struct sockaddr_in6 *dst;
 
 	bzero(&ro, sizeof(ro));
 
 	dst = (struct sockaddr_in6 * )&(ro.ro_dst);
 	dst->sin6_family = AF_INET6;
 	dst->sin6_len = sizeof(*dst);
 	dst->sin6_addr = *src;
 	rtalloc_ign((struct route *)&ro, RTF_CLONING);
 
 	if (ro.ro_rt == NULL)
 		return 0;
 
 	/* 
 	 * if ifp is provided, check for equality with rtentry
 	 * We should use rt->rt_ifa->ifa_ifp, instead of rt->rt_ifp,
 	 * to support the case of sending packets to an address of our own.
 	 * (where the former interface is the first argument of if_simloop()
 	 *  (=ifp), the latter is lo0)
 	 */
 	if (ifp != NULL && ro.ro_rt->rt_ifa->ifa_ifp != ifp) {
 		RTFREE(ro.ro_rt);
 		return 0;
 	}
 
 	/* if no ifp provided, check if rtentry is not default route */
 	if (ifp == NULL &&
 	    IN6_IS_ADDR_UNSPECIFIED(&satosin6(rt_key(ro.ro_rt))->sin6_addr)) {
 		RTFREE(ro.ro_rt);
 		return 0;
 	}
 
 	/* or if this is a blackhole/reject route */
 	if (ifp == NULL && ro.ro_rt->rt_flags & (RTF_REJECT|RTF_BLACKHOLE)) {
 		RTFREE(ro.ro_rt);
 		return 0;
 	}
 
 	/* found valid route */
 	RTFREE(ro.ro_rt);
 	return 1;
 
 }
 static __inline int
 hash_packet6(struct ipfw_flow_id *id)
 {
 	u_int32_t i;
 	i = (id->dst_ip6.__u6_addr.__u6_addr32[0]) ^
 	    (id->dst_ip6.__u6_addr.__u6_addr32[1]) ^
 	    (id->dst_ip6.__u6_addr.__u6_addr32[2]) ^
 	    (id->dst_ip6.__u6_addr.__u6_addr32[3]) ^
 	    (id->dst_port) ^ (id->src_port) ^ (id->flow_id6);
 	return i;
 }
 
 static int
 is_icmp6_query(int icmp6_type)
 {
 	if ((icmp6_type <= ICMP6_MAXTYPE) &&
 	    (icmp6_type == ICMP6_ECHO_REQUEST ||
 	    icmp6_type == ICMP6_MEMBERSHIP_QUERY ||
 	    icmp6_type == ICMP6_WRUREQUEST ||
 	    icmp6_type == ICMP6_FQDN_QUERY ||
 	    icmp6_type == ICMP6_NI_QUERY))
 		return (1);
 
 	return (0);
 }
 
 static void
 send_reject6(struct ip_fw_args *args, int code, u_short offset, u_int hlen)
 {
 	if (code == ICMP6_UNREACH_RST && offset == 0 &&
 	    args->f_id.proto == IPPROTO_TCP) {
 		struct ip6_hdr *ip6;
 		struct tcphdr *tcp;
 		tcp_seq ack, seq;
 		int flags;
 		struct {
 			struct ip6_hdr ip6;
 			struct tcphdr th;
 		} ti;
 
 		if (args->m->m_len < (hlen+sizeof(struct tcphdr))) {
 			args->m = m_pullup(args->m, hlen+sizeof(struct tcphdr));
 			if (args->m == NULL)
 				return;
 		}
 
 		ip6 = mtod(args->m, struct ip6_hdr *);
 		tcp = (struct tcphdr *)(mtod(args->m, char *) + hlen);
 
 		if ((tcp->th_flags & TH_RST) != 0) {
 			m_freem(args->m);
 			return;
 		}
 
 		ti.ip6 = *ip6;
 		ti.th = *tcp;
 		ti.th.th_seq = ntohl(ti.th.th_seq);
 		ti.th.th_ack = ntohl(ti.th.th_ack);
 		ti.ip6.ip6_nxt = IPPROTO_TCP;
 
 		if (ti.th.th_flags & TH_ACK) {
 			ack = 0;
 			seq = ti.th.th_ack;
 			flags = TH_RST;
 		} else {
 			ack = ti.th.th_seq;
 			if (((args->m)->m_flags & M_PKTHDR) != 0) {
 				ack += (args->m)->m_pkthdr.len - hlen
 					- (ti.th.th_off << 2);
 			} else if (ip6->ip6_plen) {
 				ack += ntohs(ip6->ip6_plen) + sizeof(*ip6)
 					- hlen - (ti.th.th_off << 2);
 			} else {
 				m_freem(args->m);
 				return;
 			}
 			if (tcp->th_flags & TH_SYN)
 				ack++;
 			seq = 0;
 			flags = TH_RST|TH_ACK;
 		}
 		bcopy(&ti, ip6, sizeof(ti));
 		tcp_respond(NULL, ip6, (struct tcphdr *)(ip6 + 1),
 			args->m, ack, seq, flags);
 
 	} else if (code != ICMP6_UNREACH_RST) { /* Send an ICMPv6 unreach. */
 		icmp6_error(args->m, ICMP6_DST_UNREACH, code, 0);
 
 	} else
 		m_freem(args->m);
 
 	args->m = NULL;
 }
 
 #endif /* INET6 */
 
 static u_int64_t norule_counter;	/* counter for ipfw_log(NULL...) */
 
 #define SNPARGS(buf, len) buf + len, sizeof(buf) > len ? sizeof(buf) - len : 0
 #define SNP(buf) buf, sizeof(buf)
 
 /*
  * We enter here when we have a rule with O_LOG.
  * XXX this function alone takes about 2Kbytes of code!
  */
 static void
 ipfw_log(struct ip_fw *f, u_int hlen, struct ip_fw_args *args,
 	struct mbuf *m, struct ifnet *oif, u_short offset)
 {
 	struct ether_header *eh = args->eh;
 	char *action;
 	int limit_reached = 0;
 	char action2[40], proto[128], fragment[32];
 
 	fragment[0] = '\0';
 	proto[0] = '\0';
 
 	if (f == NULL) {	/* bogus pkt */
 		if (verbose_limit != 0 && norule_counter >= verbose_limit)
 			return;
 		norule_counter++;
 		if (norule_counter == verbose_limit)
 			limit_reached = verbose_limit;
 		action = "Refuse";
 	} else {	/* O_LOG is the first action, find the real one */
 		ipfw_insn *cmd = ACTION_PTR(f);
 		ipfw_insn_log *l = (ipfw_insn_log *)cmd;
 
 		if (l->max_log != 0 && l->log_left == 0)
 			return;
 		l->log_left--;
 		if (l->log_left == 0)
 			limit_reached = l->max_log;
 		cmd += F_LEN(cmd);	/* point to first action */
 		if (cmd->opcode == O_ALTQ) {
 			ipfw_insn_altq *altq = (ipfw_insn_altq *)cmd;
 
 			snprintf(SNPARGS(action2, 0), "Altq %d",
 				altq->qid);
 			cmd += F_LEN(cmd);
 		}
 		if (cmd->opcode == O_PROB)
 			cmd += F_LEN(cmd);
 
 		action = action2;
 		switch (cmd->opcode) {
 		case O_DENY:
 			action = "Deny";
 			break;
 
 		case O_REJECT:
 			if (cmd->arg1==ICMP_REJECT_RST)
 				action = "Reset";
 			else if (cmd->arg1==ICMP_UNREACH_HOST)
 				action = "Reject";
 			else
 				snprintf(SNPARGS(action2, 0), "Unreach %d",
 					cmd->arg1);
 			break;
 
 		case O_UNREACH6:
 			if (cmd->arg1==ICMP6_UNREACH_RST)
 				action = "Reset";
 			else
 				snprintf(SNPARGS(action2, 0), "Unreach %d",
 					cmd->arg1);
 			break;
 
 		case O_ACCEPT:
 			action = "Accept";
 			break;
 		case O_COUNT:
 			action = "Count";
 			break;
 		case O_DIVERT:
 			snprintf(SNPARGS(action2, 0), "Divert %d",
 				cmd->arg1);
 			break;
 		case O_TEE:
 			snprintf(SNPARGS(action2, 0), "Tee %d",
 				cmd->arg1);
 			break;
 		case O_SKIPTO:
 			snprintf(SNPARGS(action2, 0), "SkipTo %d",
 				cmd->arg1);
 			break;
 		case O_PIPE:
 			snprintf(SNPARGS(action2, 0), "Pipe %d",
 				cmd->arg1);
 			break;
 		case O_QUEUE:
 			snprintf(SNPARGS(action2, 0), "Queue %d",
 				cmd->arg1);
 			break;
 		case O_FORWARD_IP: {
 			ipfw_insn_sa *sa = (ipfw_insn_sa *)cmd;
 			int len;
 
 			len = snprintf(SNPARGS(action2, 0), "Forward to %s",
 				inet_ntoa(sa->sa.sin_addr));
 			if (sa->sa.sin_port)
 				snprintf(SNPARGS(action2, len), ":%d",
 				    sa->sa.sin_port);
 			}
 			break;
 		case O_NETGRAPH:
 			snprintf(SNPARGS(action2, 0), "Netgraph %d",
 				cmd->arg1);
 			break;
 		case O_NGTEE:
 			snprintf(SNPARGS(action2, 0), "Ngtee %d",
 				cmd->arg1);
 			break;
 		default:
 			action = "UNKNOWN";
 			break;
 		}
 	}
 
 	if (hlen == 0) {	/* non-ip */
 		snprintf(SNPARGS(proto, 0), "MAC");
 
 	} else {
 		int len;
 		char src[48], dst[48];
 		struct icmphdr *icmp;
 		struct tcphdr *tcp;
 		struct udphdr *udp;
 		/* Initialize to make compiler happy. */
 		struct ip *ip = NULL;
 #ifdef INET6
 		struct ip6_hdr *ip6 = NULL;
 		struct icmp6_hdr *icmp6;
 #endif
 		src[0] = '\0';
 		dst[0] = '\0';
 #ifdef INET6
 		if (args->f_id.addr_type == 6) {
 			snprintf(src, sizeof(src), "[%s]",
 			    ip6_sprintf(&args->f_id.src_ip6));
 			snprintf(dst, sizeof(dst), "[%s]",
 			    ip6_sprintf(&args->f_id.dst_ip6));
 
 			ip6 = (struct ip6_hdr *)mtod(m, struct ip6_hdr *);
 			tcp = (struct tcphdr *)(mtod(args->m, char *) + hlen);
 			udp = (struct udphdr *)(mtod(args->m, char *) + hlen);
 		} else
 #endif
 		{
 			ip = mtod(m, struct ip *);
 			tcp = L3HDR(struct tcphdr, ip);
 			udp = L3HDR(struct udphdr, ip);
 
 			inet_ntoa_r(ip->ip_src, src);
 			inet_ntoa_r(ip->ip_dst, dst);
 		}
 
 		switch (args->f_id.proto) {
 		case IPPROTO_TCP:
 			len = snprintf(SNPARGS(proto, 0), "TCP %s", src);
 			if (offset == 0)
 				snprintf(SNPARGS(proto, len), ":%d %s:%d",
 				    ntohs(tcp->th_sport),
 				    dst,
 				    ntohs(tcp->th_dport));
 			else
 				snprintf(SNPARGS(proto, len), " %s", dst);
 			break;
 
 		case IPPROTO_UDP:
 			len = snprintf(SNPARGS(proto, 0), "UDP %s", src);
 			if (offset == 0)
 				snprintf(SNPARGS(proto, len), ":%d %s:%d",
 				    ntohs(udp->uh_sport),
 				    dst,
 				    ntohs(udp->uh_dport));
 			else
 				snprintf(SNPARGS(proto, len), " %s", dst);
 			break;
 
 		case IPPROTO_ICMP:
 			icmp = L3HDR(struct icmphdr, ip);
 			if (offset == 0)
 				len = snprintf(SNPARGS(proto, 0),
 				    "ICMP:%u.%u ",
 				    icmp->icmp_type, icmp->icmp_code);
 			else
 				len = snprintf(SNPARGS(proto, 0), "ICMP ");
 			len += snprintf(SNPARGS(proto, len), "%s", src);
 			snprintf(SNPARGS(proto, len), " %s", dst);
 			break;
 #ifdef INET6
 		case IPPROTO_ICMPV6:
 			icmp6 = (struct icmp6_hdr *)(mtod(args->m, char *) + hlen);
 			if (offset == 0)
 				len = snprintf(SNPARGS(proto, 0),
 				    "ICMPv6:%u.%u ",
 				    icmp6->icmp6_type, icmp6->icmp6_code);
 			else
 				len = snprintf(SNPARGS(proto, 0), "ICMPv6 ");
 			len += snprintf(SNPARGS(proto, len), "%s", src);
 			snprintf(SNPARGS(proto, len), " %s", dst);
 			break;
 #endif
 		default:
 			len = snprintf(SNPARGS(proto, 0), "P:%d %s",
 			    args->f_id.proto, src);
 			snprintf(SNPARGS(proto, len), " %s", dst);
 			break;
 		}
 
 #ifdef INET6
 		if (args->f_id.addr_type == 6) {
 			if (offset & (IP6F_OFF_MASK | IP6F_MORE_FRAG))
 				snprintf(SNPARGS(fragment, 0),
 				    " (frag %08x:%d@%d%s)",
 				    args->f_id.frag_id6,
 				    ntohs(ip6->ip6_plen) - hlen,
 				    ntohs(offset & IP6F_OFF_MASK) << 3,
 				    (offset & IP6F_MORE_FRAG) ? "+" : "");
 		} else
 #endif
 		{
 			int ip_off, ip_len;
 			if (eh != NULL) { /* layer 2 packets are as on the wire */
 				ip_off = ntohs(ip->ip_off);
 				ip_len = ntohs(ip->ip_len);
 			} else {
 				ip_off = ip->ip_off;
 				ip_len = ip->ip_len;
 			}
 			if (ip_off & (IP_MF | IP_OFFMASK))
 				snprintf(SNPARGS(fragment, 0),
 				    " (frag %d:%d@%d%s)",
 				    ntohs(ip->ip_id), ip_len - (ip->ip_hl << 2),
 				    offset << 3,
 				    (ip_off & IP_MF) ? "+" : "");
 		}
 	}
 	if (oif || m->m_pkthdr.rcvif)
 		log(LOG_SECURITY | LOG_INFO,
 		    "ipfw: %d %s %s %s via %s%s\n",
 		    f ? f->rulenum : -1,
 		    action, proto, oif ? "out" : "in",
 		    oif ? oif->if_xname : m->m_pkthdr.rcvif->if_xname,
 		    fragment);
 	else
 		log(LOG_SECURITY | LOG_INFO,
 		    "ipfw: %d %s %s [no if info]%s\n",
 		    f ? f->rulenum : -1,
 		    action, proto, fragment);
 	if (limit_reached)
 		log(LOG_SECURITY | LOG_NOTICE,
 		    "ipfw: limit %d reached on entry %d\n",
 		    limit_reached, f ? f->rulenum : -1);
 }
 
 /*
  * IMPORTANT: the hash function for dynamic rules must be commutative
  * in source and destination (ip,port), because rules are bidirectional
  * and we want to find both in the same bucket.
  */
 static __inline int
 hash_packet(struct ipfw_flow_id *id)
 {
 	u_int32_t i;
 
 #ifdef INET6
 	if (IS_IP6_FLOW_ID(id)) 
 		i = hash_packet6(id);
 	else
 #endif /* INET6 */
 	i = (id->dst_ip) ^ (id->src_ip) ^ (id->dst_port) ^ (id->src_port);
 	i &= (curr_dyn_buckets - 1);
 	return i;
 }
 
 /**
  * unlink a dynamic rule from a chain. prev is a pointer to
  * the previous one, q is a pointer to the rule to delete,
  * head is a pointer to the head of the queue.
  * Modifies q and potentially also head.
  */
 #define UNLINK_DYN_RULE(prev, head, q) {				\
 	ipfw_dyn_rule *old_q = q;					\
 									\
 	/* remove a refcount to the parent */				\
 	if (q->dyn_type == O_LIMIT)					\
 		q->parent->count--;					\
 	DEB(printf("ipfw: unlink entry 0x%08x %d -> 0x%08x %d, %d left\n",\
 		(q->id.src_ip), (q->id.src_port),			\
 		(q->id.dst_ip), (q->id.dst_port), dyn_count-1 ); )	\
 	if (prev != NULL)						\
 		prev->next = q = q->next;				\
 	else								\
 		head = q = q->next;					\
 	dyn_count--;							\
 	uma_zfree(ipfw_dyn_rule_zone, old_q); }
 
 #define TIME_LEQ(a,b)       ((int)((a)-(b)) <= 0)
 
 /**
  * Remove dynamic rules pointing to "rule", or all of them if rule == NULL.
  *
  * If keep_me == NULL, rules are deleted even if not expired,
  * otherwise only expired rules are removed.
  *
  * The value of the second parameter is also used to point to identify
  * a rule we absolutely do not want to remove (e.g. because we are
  * holding a reference to it -- this is the case with O_LIMIT_PARENT
  * rules). The pointer is only used for comparison, so any non-null
  * value will do.
  */
 static void
 remove_dyn_rule(struct ip_fw *rule, ipfw_dyn_rule *keep_me)
 {
 	static u_int32_t last_remove = 0;
 
 #define FORCE (keep_me == NULL)
 
 	ipfw_dyn_rule *prev, *q;
 	int i, pass = 0, max_pass = 0;
 
 	IPFW_DYN_LOCK_ASSERT();
 
 	if (ipfw_dyn_v == NULL || dyn_count == 0)
 		return;
 	/* do not expire more than once per second, it is useless */
 	if (!FORCE && last_remove == time_uptime)
 		return;
 	last_remove = time_uptime;
 
 	/*
 	 * because O_LIMIT refer to parent rules, during the first pass only
 	 * remove child and mark any pending LIMIT_PARENT, and remove
 	 * them in a second pass.
 	 */
 next_pass:
 	for (i = 0 ; i < curr_dyn_buckets ; i++) {
 		for (prev=NULL, q = ipfw_dyn_v[i] ; q ; ) {
 			/*
 			 * Logic can become complex here, so we split tests.
 			 */
 			if (q == keep_me)
 				goto next;
 			if (rule != NULL && rule != q->rule)
 				goto next; /* not the one we are looking for */
 			if (q->dyn_type == O_LIMIT_PARENT) {
 				/*
 				 * handle parent in the second pass,
 				 * record we need one.
 				 */
 				max_pass = 1;
 				if (pass == 0)
 					goto next;
 				if (FORCE && q->count != 0 ) {
 					/* XXX should not happen! */
 					printf("ipfw: OUCH! cannot remove rule,"
 					     " count %d\n", q->count);
 				}
 			} else {
 				if (!FORCE &&
 				    !TIME_LEQ( q->expire, time_uptime ))
 					goto next;
 			}
              if (q->dyn_type != O_LIMIT_PARENT || !q->count) {
                      UNLINK_DYN_RULE(prev, ipfw_dyn_v[i], q);
                      continue;
              }
 next:
 			prev=q;
 			q=q->next;
 		}
 	}
 	if (pass++ < max_pass)
 		goto next_pass;
 }
 
 
 /**
  * lookup a dynamic rule.
  */
 static ipfw_dyn_rule *
 lookup_dyn_rule_locked(struct ipfw_flow_id *pkt, int *match_direction,
 	struct tcphdr *tcp)
 {
 	/*
 	 * stateful ipfw extensions.
 	 * Lookup into dynamic session queue
 	 */
 #define MATCH_REVERSE	0
 #define MATCH_FORWARD	1
 #define MATCH_NONE	2
 #define MATCH_UNKNOWN	3
 	int i, dir = MATCH_NONE;
 	ipfw_dyn_rule *prev, *q=NULL;
 
 	IPFW_DYN_LOCK_ASSERT();
 
 	if (ipfw_dyn_v == NULL)
 		goto done;	/* not found */
 	i = hash_packet( pkt );
 	for (prev=NULL, q = ipfw_dyn_v[i] ; q != NULL ; ) {
 		if (q->dyn_type == O_LIMIT_PARENT && q->count)
 			goto next;
 		if (TIME_LEQ( q->expire, time_uptime)) { /* expire entry */
 			UNLINK_DYN_RULE(prev, ipfw_dyn_v[i], q);
 			continue;
 		}
 		if (pkt->proto == q->id.proto &&
 		    q->dyn_type != O_LIMIT_PARENT) {
 			if (IS_IP6_FLOW_ID(pkt)) {
 			    if (IN6_ARE_ADDR_EQUAL(&(pkt->src_ip6),
 				&(q->id.src_ip6)) &&
 			    IN6_ARE_ADDR_EQUAL(&(pkt->dst_ip6),
 				&(q->id.dst_ip6)) &&
 			    pkt->src_port == q->id.src_port &&
 			    pkt->dst_port == q->id.dst_port ) {
 				dir = MATCH_FORWARD;
 				break;
 			    }
 			    if (IN6_ARE_ADDR_EQUAL(&(pkt->src_ip6),
 				    &(q->id.dst_ip6)) &&
 				IN6_ARE_ADDR_EQUAL(&(pkt->dst_ip6),
 				    &(q->id.src_ip6)) &&
 				pkt->src_port == q->id.dst_port &&
 				pkt->dst_port == q->id.src_port ) {
 				    dir = MATCH_REVERSE;
 				    break;
 			    }
 			} else {
 			    if (pkt->src_ip == q->id.src_ip &&
 				pkt->dst_ip == q->id.dst_ip &&
 				pkt->src_port == q->id.src_port &&
 				pkt->dst_port == q->id.dst_port ) {
 				    dir = MATCH_FORWARD;
 				    break;
 			    }
 			    if (pkt->src_ip == q->id.dst_ip &&
 				pkt->dst_ip == q->id.src_ip &&
 				pkt->src_port == q->id.dst_port &&
 				pkt->dst_port == q->id.src_port ) {
 				    dir = MATCH_REVERSE;
 				    break;
 			    }
 			}
 		}
 next:
 		prev = q;
 		q = q->next;
 	}
 	if (q == NULL)
 		goto done; /* q = NULL, not found */
 
 	if ( prev != NULL) { /* found and not in front */
 		prev->next = q->next;
 		q->next = ipfw_dyn_v[i];
 		ipfw_dyn_v[i] = q;
 	}
 	if (pkt->proto == IPPROTO_TCP) { /* update state according to flags */
 		u_char flags = pkt->flags & (TH_FIN|TH_SYN|TH_RST);
 
 #define BOTH_SYN	(TH_SYN | (TH_SYN << 8))
 #define BOTH_FIN	(TH_FIN | (TH_FIN << 8))
 		q->state |= (dir == MATCH_FORWARD ) ? flags : (flags << 8);
 		switch (q->state) {
 		case TH_SYN:				/* opening */
 			q->expire = time_uptime + dyn_syn_lifetime;
 			break;
 
 		case BOTH_SYN:			/* move to established */
 		case BOTH_SYN | TH_FIN :	/* one side tries to close */
 		case BOTH_SYN | (TH_FIN << 8) :
  			if (tcp) {
 #define _SEQ_GE(a,b) ((int)(a) - (int)(b) >= 0)
 			    u_int32_t ack = ntohl(tcp->th_ack);
 			    if (dir == MATCH_FORWARD) {
 				if (q->ack_fwd == 0 || _SEQ_GE(ack, q->ack_fwd))
 				    q->ack_fwd = ack;
 				else { /* ignore out-of-sequence */
 				    break;
 				}
 			    } else {
 				if (q->ack_rev == 0 || _SEQ_GE(ack, q->ack_rev))
 				    q->ack_rev = ack;
 				else { /* ignore out-of-sequence */
 				    break;
 				}
 			    }
 			}
 			q->expire = time_uptime + dyn_ack_lifetime;
 			break;
 
 		case BOTH_SYN | BOTH_FIN:	/* both sides closed */
 			if (dyn_fin_lifetime >= dyn_keepalive_period)
 				dyn_fin_lifetime = dyn_keepalive_period - 1;
 			q->expire = time_uptime + dyn_fin_lifetime;
 			break;
 
 		default:
 #if 0
 			/*
 			 * reset or some invalid combination, but can also
 			 * occur if we use keep-state the wrong way.
 			 */
 			if ( (q->state & ((TH_RST << 8)|TH_RST)) == 0)
 				printf("invalid state: 0x%x\n", q->state);
 #endif
 			if (dyn_rst_lifetime >= dyn_keepalive_period)
 				dyn_rst_lifetime = dyn_keepalive_period - 1;
 			q->expire = time_uptime + dyn_rst_lifetime;
 			break;
 		}
 	} else if (pkt->proto == IPPROTO_UDP) {
 		q->expire = time_uptime + dyn_udp_lifetime;
 	} else {
 		/* other protocols */
 		q->expire = time_uptime + dyn_short_lifetime;
 	}
 done:
 	if (match_direction)
 		*match_direction = dir;
 	return q;
 }
 
 static ipfw_dyn_rule *
 lookup_dyn_rule(struct ipfw_flow_id *pkt, int *match_direction,
 	struct tcphdr *tcp)
 {
 	ipfw_dyn_rule *q;
 
 	IPFW_DYN_LOCK();
 	q = lookup_dyn_rule_locked(pkt, match_direction, tcp);
 	if (q == NULL)
 		IPFW_DYN_UNLOCK();
 	/* NB: return table locked when q is not NULL */
 	return q;
 }
 
 static void
 realloc_dynamic_table(void)
 {
 	IPFW_DYN_LOCK_ASSERT();
 
 	/*
 	 * Try reallocation, make sure we have a power of 2 and do
 	 * not allow more than 64k entries. In case of overflow,
 	 * default to 1024.
 	 */
 
 	if (dyn_buckets > 65536)
 		dyn_buckets = 1024;
 	if ((dyn_buckets & (dyn_buckets-1)) != 0) { /* not a power of 2 */
 		dyn_buckets = curr_dyn_buckets; /* reset */
 		return;
 	}
 	curr_dyn_buckets = dyn_buckets;
 	if (ipfw_dyn_v != NULL)
 		free(ipfw_dyn_v, M_IPFW);
 	for (;;) {
 		ipfw_dyn_v = malloc(curr_dyn_buckets * sizeof(ipfw_dyn_rule *),
 		       M_IPFW, M_NOWAIT | M_ZERO);
 		if (ipfw_dyn_v != NULL || curr_dyn_buckets <= 2)
 			break;
 		curr_dyn_buckets /= 2;
 	}
 }
 
 /**
  * Install state of type 'type' for a dynamic session.
  * The hash table contains two type of rules:
  * - regular rules (O_KEEP_STATE)
  * - rules for sessions with limited number of sess per user
  *   (O_LIMIT). When they are created, the parent is
  *   increased by 1, and decreased on delete. In this case,
  *   the third parameter is the parent rule and not the chain.
  * - "parent" rules for the above (O_LIMIT_PARENT).
  */
 static ipfw_dyn_rule *
 add_dyn_rule(struct ipfw_flow_id *id, u_int8_t dyn_type, struct ip_fw *rule)
 {
 	ipfw_dyn_rule *r;
 	int i;
 
 	IPFW_DYN_LOCK_ASSERT();
 
 	if (ipfw_dyn_v == NULL ||
 	    (dyn_count == 0 && dyn_buckets != curr_dyn_buckets)) {
 		realloc_dynamic_table();
 		if (ipfw_dyn_v == NULL)
 			return NULL; /* failed ! */
 	}
 	i = hash_packet(id);
 
 	r = uma_zalloc(ipfw_dyn_rule_zone, M_NOWAIT | M_ZERO);
 	if (r == NULL) {
 		printf ("ipfw: sorry cannot allocate state\n");
 		return NULL;
 	}
 
 	/* increase refcount on parent, and set pointer */
 	if (dyn_type == O_LIMIT) {
 		ipfw_dyn_rule *parent = (ipfw_dyn_rule *)rule;
 		if ( parent->dyn_type != O_LIMIT_PARENT)
 			panic("invalid parent");
 		parent->count++;
 		r->parent = parent;
 		rule = parent->rule;
 	}
 
 	r->id = *id;
 	r->expire = time_uptime + dyn_syn_lifetime;
 	r->rule = rule;
 	r->dyn_type = dyn_type;
 	r->pcnt = r->bcnt = 0;
 	r->count = 0;
 
 	r->bucket = i;
 	r->next = ipfw_dyn_v[i];
 	ipfw_dyn_v[i] = r;
 	dyn_count++;
 	DEB(printf("ipfw: add dyn entry ty %d 0x%08x %d -> 0x%08x %d, total %d\n",
 	   dyn_type,
 	   (r->id.src_ip), (r->id.src_port),
 	   (r->id.dst_ip), (r->id.dst_port),
 	   dyn_count ); )
 	return r;
 }
 
 /**
  * lookup dynamic parent rule using pkt and rule as search keys.
  * If the lookup fails, then install one.
  */
 static ipfw_dyn_rule *
 lookup_dyn_parent(struct ipfw_flow_id *pkt, struct ip_fw *rule)
 {
 	ipfw_dyn_rule *q;
 	int i;
 
 	IPFW_DYN_LOCK_ASSERT();
 
 	if (ipfw_dyn_v) {
 		int is_v6 = IS_IP6_FLOW_ID(pkt);
 		i = hash_packet( pkt );
 		for (q = ipfw_dyn_v[i] ; q != NULL ; q=q->next)
 			if (q->dyn_type == O_LIMIT_PARENT &&
 			    rule== q->rule &&
 			    pkt->proto == q->id.proto &&
 			    pkt->src_port == q->id.src_port &&
 			    pkt->dst_port == q->id.dst_port &&
 			    (
 				(is_v6 &&
 				 IN6_ARE_ADDR_EQUAL(&(pkt->src_ip6),
 					&(q->id.src_ip6)) &&
 				 IN6_ARE_ADDR_EQUAL(&(pkt->dst_ip6),
 					&(q->id.dst_ip6))) ||
 				(!is_v6 &&
 				 pkt->src_ip == q->id.src_ip &&
 				 pkt->dst_ip == q->id.dst_ip)
 			    )
 			) {
 				q->expire = time_uptime + dyn_short_lifetime;
 				DEB(printf("ipfw: lookup_dyn_parent found 0x%p\n",q);)
 				return q;
 			}
 	}
 	return add_dyn_rule(pkt, O_LIMIT_PARENT, rule);
 }
 
 /**
  * Install dynamic state for rule type cmd->o.opcode
  *
  * Returns 1 (failure) if state is not installed because of errors or because
  * session limitations are enforced.
  */
 static int
 install_state(struct ip_fw *rule, ipfw_insn_limit *cmd,
 	struct ip_fw_args *args)
 {
 	static int last_log;
 
 	ipfw_dyn_rule *q;
 
 	DEB(printf("ipfw: install state type %d 0x%08x %u -> 0x%08x %u\n",
 	    cmd->o.opcode,
 	    (args->f_id.src_ip), (args->f_id.src_port),
 	    (args->f_id.dst_ip), (args->f_id.dst_port) );)
 
 	IPFW_DYN_LOCK();
 
 	q = lookup_dyn_rule_locked(&args->f_id, NULL, NULL);
 
 	if (q != NULL) { /* should never occur */
 		if (last_log != time_uptime) {
 			last_log = time_uptime;
 			printf("ipfw: install_state: entry already present, done\n");
 		}
 		IPFW_DYN_UNLOCK();
 		return 0;
 	}
 
 	if (dyn_count >= dyn_max)
 		/*
 		 * Run out of slots, try to remove any expired rule.
 		 */
 		remove_dyn_rule(NULL, (ipfw_dyn_rule *)1);
 
 	if (dyn_count >= dyn_max) {
 		if (last_log != time_uptime) {
 			last_log = time_uptime;
 			printf("ipfw: install_state: Too many dynamic rules\n");
 		}
 		IPFW_DYN_UNLOCK();
 		return 1; /* cannot install, notify caller */
 	}
 
 	switch (cmd->o.opcode) {
 	case O_KEEP_STATE: /* bidir rule */
 		add_dyn_rule(&args->f_id, O_KEEP_STATE, rule);
 		break;
 
 	case O_LIMIT: /* limit number of sessions */
 	    {
 		u_int16_t limit_mask = cmd->limit_mask;
 		struct ipfw_flow_id id;
 		ipfw_dyn_rule *parent;
 
 		DEB(printf("ipfw: installing dyn-limit rule %d\n",
 		    cmd->conn_limit);)
 
 		id.dst_ip = id.src_ip = 0;
 		id.dst_port = id.src_port = 0;
 		id.proto = args->f_id.proto;
 
 		if (IS_IP6_FLOW_ID (&(args->f_id))) {
 			if (limit_mask & DYN_SRC_ADDR)
 				id.src_ip6 = args->f_id.src_ip6;
 			if (limit_mask & DYN_DST_ADDR)
 				id.dst_ip6 = args->f_id.dst_ip6;
 		} else {
 			if (limit_mask & DYN_SRC_ADDR)
 				id.src_ip = args->f_id.src_ip;
 			if (limit_mask & DYN_DST_ADDR)
 				id.dst_ip = args->f_id.dst_ip;
 		}
 		if (limit_mask & DYN_SRC_PORT)
 			id.src_port = args->f_id.src_port;
 		if (limit_mask & DYN_DST_PORT)
 			id.dst_port = args->f_id.dst_port;
 		parent = lookup_dyn_parent(&id, rule);
 		if (parent == NULL) {
 			printf("ipfw: add parent failed\n");
 			IPFW_DYN_UNLOCK();
 			return 1;
 		}
 		if (parent->count >= cmd->conn_limit) {
 			/*
 			 * See if we can remove some expired rule.
 			 */
 			remove_dyn_rule(rule, parent);
 			if (parent->count >= cmd->conn_limit) {
 				if (fw_verbose && last_log != time_uptime) {
 					last_log = time_uptime;
 					log(LOG_SECURITY | LOG_DEBUG,
 					    "drop session, too many entries\n");
 				}
 				IPFW_DYN_UNLOCK();
 				return 1;
 			}
 		}
 		add_dyn_rule(&args->f_id, O_LIMIT, (struct ip_fw *)parent);
 	    }
 		break;
 	default:
 		printf("ipfw: unknown dynamic rule type %u\n", cmd->o.opcode);
 		IPFW_DYN_UNLOCK();
 		return 1;
 	}
 	lookup_dyn_rule_locked(&args->f_id, NULL, NULL); /* XXX just set lifetime */
 	IPFW_DYN_UNLOCK();
 	return 0;
 }
 
 /*
  * Generate a TCP packet, containing either a RST or a keepalive.
  * When flags & TH_RST, we are sending a RST packet, because of a
  * "reset" action matched the packet.
  * Otherwise we are sending a keepalive, and flags & TH_
  */
 static struct mbuf *
 send_pkt(struct ipfw_flow_id *id, u_int32_t seq, u_int32_t ack, int flags)
 {
 	struct mbuf *m;
 	struct ip *ip;
 	struct tcphdr *tcp;
 
 	MGETHDR(m, M_DONTWAIT, MT_DATA);
 	if (m == 0)
 		return (NULL);
 	m->m_pkthdr.rcvif = (struct ifnet *)0;
 	m->m_pkthdr.len = m->m_len = sizeof(struct ip) + sizeof(struct tcphdr);
 	m->m_data += max_linkhdr;
 
 	ip = mtod(m, struct ip *);
 	bzero(ip, m->m_len);
 	tcp = (struct tcphdr *)(ip + 1); /* no IP options */
 	ip->ip_p = IPPROTO_TCP;
 	tcp->th_off = 5;
 	/*
 	 * Assume we are sending a RST (or a keepalive in the reverse
 	 * direction), swap src and destination addresses and ports.
 	 */
 	ip->ip_src.s_addr = htonl(id->dst_ip);
 	ip->ip_dst.s_addr = htonl(id->src_ip);
 	tcp->th_sport = htons(id->dst_port);
 	tcp->th_dport = htons(id->src_port);
 	if (flags & TH_RST) {	/* we are sending a RST */
 		if (flags & TH_ACK) {
 			tcp->th_seq = htonl(ack);
 			tcp->th_ack = htonl(0);
 			tcp->th_flags = TH_RST;
 		} else {
 			if (flags & TH_SYN)
 				seq++;
 			tcp->th_seq = htonl(0);
 			tcp->th_ack = htonl(seq);
 			tcp->th_flags = TH_RST | TH_ACK;
 		}
 	} else {
 		/*
 		 * We are sending a keepalive. flags & TH_SYN determines
 		 * the direction, forward if set, reverse if clear.
 		 * NOTE: seq and ack are always assumed to be correct
 		 * as set by the caller. This may be confusing...
 		 */
 		if (flags & TH_SYN) {
 			/*
 			 * we have to rewrite the correct addresses!
 			 */
 			ip->ip_dst.s_addr = htonl(id->dst_ip);
 			ip->ip_src.s_addr = htonl(id->src_ip);
 			tcp->th_dport = htons(id->dst_port);
 			tcp->th_sport = htons(id->src_port);
 		}
 		tcp->th_seq = htonl(seq);
 		tcp->th_ack = htonl(ack);
 		tcp->th_flags = TH_ACK;
 	}
 	/*
 	 * set ip_len to the payload size so we can compute
 	 * the tcp checksum on the pseudoheader
 	 * XXX check this, could save a couple of words ?
 	 */
 	ip->ip_len = htons(sizeof(struct tcphdr));
 	tcp->th_sum = in_cksum(m, m->m_pkthdr.len);
 	/*
 	 * now fill fields left out earlier
 	 */
 	ip->ip_ttl = ip_defttl;
 	ip->ip_len = m->m_pkthdr.len;
 	m->m_flags |= M_SKIP_FIREWALL;
 	return (m);
 }
 
 /*
  * sends a reject message, consuming the mbuf passed as an argument.
  */
 static void
 send_reject(struct ip_fw_args *args, int code, u_short offset, int ip_len)
 {
 
 	if (code != ICMP_REJECT_RST) { /* Send an ICMP unreach */
 		/* We need the IP header in host order for icmp_error(). */
 		if (args->eh != NULL) {
 			struct ip *ip = mtod(args->m, struct ip *);
 			ip->ip_len = ntohs(ip->ip_len);
 			ip->ip_off = ntohs(ip->ip_off);
 		}
 		icmp_error(args->m, ICMP_UNREACH, code, 0L, 0);
 	} else if (offset == 0 && args->f_id.proto == IPPROTO_TCP) {
 		struct tcphdr *const tcp =
 		    L3HDR(struct tcphdr, mtod(args->m, struct ip *));
 		if ( (tcp->th_flags & TH_RST) == 0) {
 			struct mbuf *m;
 			m = send_pkt(&(args->f_id), ntohl(tcp->th_seq),
 				ntohl(tcp->th_ack),
 				tcp->th_flags | TH_RST);
 			if (m != NULL)
 				ip_output(m, NULL, NULL, 0, NULL, NULL);
 		}
 		m_freem(args->m);
 	} else
 		m_freem(args->m);
 	args->m = NULL;
 }
 
 /**
  *
  * Given an ip_fw *, lookup_next_rule will return a pointer
  * to the next rule, which can be either the jump
  * target (for skipto instructions) or the next one in the list (in
  * all other cases including a missing jump target).
  * The result is also written in the "next_rule" field of the rule.
  * Backward jumps are not allowed, so start looking from the next
  * rule...
  *
  * This never returns NULL -- in case we do not have an exact match,
  * the next rule is returned. When the ruleset is changed,
  * pointers are flushed so we are always correct.
  */
 
 static struct ip_fw *
 lookup_next_rule(struct ip_fw *me)
 {
 	struct ip_fw *rule = NULL;
 	ipfw_insn *cmd;
 
 	/* look for action, in case it is a skipto */
 	cmd = ACTION_PTR(me);
 	if (cmd->opcode == O_LOG)
 		cmd += F_LEN(cmd);
 	if (cmd->opcode == O_ALTQ)
 		cmd += F_LEN(cmd);
 	if ( cmd->opcode == O_SKIPTO )
 		for (rule = me->next; rule ; rule = rule->next)
 			if (rule->rulenum >= cmd->arg1)
 				break;
 	if (rule == NULL)			/* failure or not a skipto */
 		rule = me->next;
 	me->next_rule = rule;
 	return rule;
 }
 
 static int
 add_table_entry(struct ip_fw_chain *ch, uint16_t tbl, in_addr_t addr,
 	uint8_t mlen, uint32_t value)
 {
 	struct radix_node_head *rnh;
 	struct table_entry *ent;
 
 	if (tbl >= IPFW_TABLES_MAX)
 		return (EINVAL);
 	rnh = ch->tables[tbl];
 	ent = malloc(sizeof(*ent), M_IPFW_TBL, M_NOWAIT | M_ZERO);
 	if (ent == NULL)
 		return (ENOMEM);
 	ent->value = value;
 	ent->addr.sin_len = ent->mask.sin_len = 8;
 	ent->mask.sin_addr.s_addr = htonl(mlen ? ~((1 << (32 - mlen)) - 1) : 0);
 	ent->addr.sin_addr.s_addr = addr & ent->mask.sin_addr.s_addr;
 	IPFW_WLOCK(&layer3_chain);
 	if (rnh->rnh_addaddr(&ent->addr, &ent->mask, rnh, (void *)ent) ==
 	    NULL) {
 		IPFW_WUNLOCK(&layer3_chain);
 		free(ent, M_IPFW_TBL);
 		return (EEXIST);
 	}
 	IPFW_WUNLOCK(&layer3_chain);
 	return (0);
 }
 
 static int
 del_table_entry(struct ip_fw_chain *ch, uint16_t tbl, in_addr_t addr,
 	uint8_t mlen)
 {
 	struct radix_node_head *rnh;
 	struct table_entry *ent;
 	struct sockaddr_in sa, mask;
 
 	if (tbl >= IPFW_TABLES_MAX)
 		return (EINVAL);
 	rnh = ch->tables[tbl];
 	sa.sin_len = mask.sin_len = 8;
 	mask.sin_addr.s_addr = htonl(mlen ? ~((1 << (32 - mlen)) - 1) : 0);
 	sa.sin_addr.s_addr = addr & mask.sin_addr.s_addr;
 	IPFW_WLOCK(ch);
 	ent = (struct table_entry *)rnh->rnh_deladdr(&sa, &mask, rnh);
 	if (ent == NULL) {
 		IPFW_WUNLOCK(ch);
 		return (ESRCH);
 	}
 	IPFW_WUNLOCK(ch);
 	free(ent, M_IPFW_TBL);
 	return (0);
 }
 
 static int
 flush_table_entry(struct radix_node *rn, void *arg)
 {
 	struct radix_node_head * const rnh = arg;
 	struct table_entry *ent;
 
 	ent = (struct table_entry *)
 	    rnh->rnh_deladdr(rn->rn_key, rn->rn_mask, rnh);
 	if (ent != NULL)
 		free(ent, M_IPFW_TBL);
 	return (0);
 }
 
 static int
 flush_table(struct ip_fw_chain *ch, uint16_t tbl)
 {
 	struct radix_node_head *rnh;
 
 	IPFW_WLOCK_ASSERT(ch);
 
 	if (tbl >= IPFW_TABLES_MAX)
 		return (EINVAL);
 	rnh = ch->tables[tbl];
 	KASSERT(rnh != NULL, ("NULL IPFW table"));
 	rnh->rnh_walktree(rnh, flush_table_entry, rnh);
 	return (0);
 }
 
 static void
 flush_tables(struct ip_fw_chain *ch)
 {
 	uint16_t tbl;
 
 	IPFW_WLOCK_ASSERT(ch);
 
 	for (tbl = 0; tbl < IPFW_TABLES_MAX; tbl++)
 		flush_table(ch, tbl);
 }
 
 static int
 init_tables(struct ip_fw_chain *ch)
 { 
 	int i;
 	uint16_t j;
 
 	for (i = 0; i < IPFW_TABLES_MAX; i++) {
 		if (!rn_inithead((void **)&ch->tables[i], 32)) {
 			for (j = 0; j < i; j++) {
 				(void) flush_table(ch, j);
 			}
 			return (ENOMEM);
 		}
 	}
 	return (0);
 }
 
 static int
 lookup_table(struct ip_fw_chain *ch, uint16_t tbl, in_addr_t addr,
 	uint32_t *val)
 {
 	struct radix_node_head *rnh;
 	struct table_entry *ent;
 	struct sockaddr_in sa;
 
 	if (tbl >= IPFW_TABLES_MAX)
 		return (0);
 	rnh = ch->tables[tbl];
 	sa.sin_len = 8;
 	sa.sin_addr.s_addr = addr;
 	ent = (struct table_entry *)(rnh->rnh_lookup(&sa, NULL, rnh));
 	if (ent != NULL) {
 		*val = ent->value;
 		return (1);
 	}
 	return (0);
 }
 
 static int
 count_table_entry(struct radix_node *rn, void *arg)
 {
 	u_int32_t * const cnt = arg;
 
 	(*cnt)++;
 	return (0);
 }
 
 static int
 count_table(struct ip_fw_chain *ch, uint32_t tbl, uint32_t *cnt)
 {
 	struct radix_node_head *rnh;
 
 	if (tbl >= IPFW_TABLES_MAX)
 		return (EINVAL);
 	rnh = ch->tables[tbl];
 	*cnt = 0;
 	rnh->rnh_walktree(rnh, count_table_entry, cnt);
 	return (0);
 }
 
 static int
 dump_table_entry(struct radix_node *rn, void *arg)
 {
 	struct table_entry * const n = (struct table_entry *)rn;
 	ipfw_table * const tbl = arg;
 	ipfw_table_entry *ent;
 
 	if (tbl->cnt == tbl->size)
 		return (1);
 	ent = &tbl->ent[tbl->cnt];
 	ent->tbl = tbl->tbl;
 	if (in_nullhost(n->mask.sin_addr))
 		ent->masklen = 0;
 	else
 		ent->masklen = 33 - ffs(ntohl(n->mask.sin_addr.s_addr));
 	ent->addr = n->addr.sin_addr.s_addr;
 	ent->value = n->value;
 	tbl->cnt++;
 	return (0);
 }
 
 static int
 dump_table(struct ip_fw_chain *ch, ipfw_table *tbl)
 {
 	struct radix_node_head *rnh;
 
 	IPFW_WLOCK_ASSERT(ch);
 
 	if (tbl->tbl >= IPFW_TABLES_MAX)
 		return (EINVAL);
 	rnh = ch->tables[tbl->tbl];
 	tbl->cnt = 0;
 	rnh->rnh_walktree(rnh, dump_table_entry, tbl);
 	return (0);
 }
 
 static void
 fill_ugid_cache(struct inpcb *inp, struct ip_fw_ugid *ugp)
 {
 	struct ucred *cr;
 
 	if (inp->inp_socket != NULL) {
 		cr = inp->inp_socket->so_cred;
 		ugp->fw_prid = jailed(cr) ?
 		    cr->cr_prison->pr_id : -1;
 		ugp->fw_uid = cr->cr_uid;
 		ugp->fw_ngroups = cr->cr_ngroups;
 		bcopy(cr->cr_groups, ugp->fw_groups,
 		    sizeof(ugp->fw_groups));
 	}
 }
 
 static int
 check_uidgid(ipfw_insn_u32 *insn,
 	int proto, struct ifnet *oif,
 	struct in_addr dst_ip, u_int16_t dst_port,
 	struct in_addr src_ip, u_int16_t src_port,
 	struct ip_fw_ugid *ugp, int *lookup, struct inpcb *inp)
 {
 	struct inpcbinfo *pi;
 	int wildcard;
 	struct inpcb *pcb;
 	int match;
 	gid_t *gp;
 
 	/*
 	 * Check to see if the UDP or TCP stack supplied us with
 	 * the PCB. If so, rather then holding a lock and looking
 	 * up the PCB, we can use the one that was supplied.
 	 */
 	if (inp && *lookup == 0) {
 		INP_LOCK_ASSERT(inp);
 		if (inp->inp_socket != NULL) {
 			fill_ugid_cache(inp, ugp);
 			*lookup = 1;
 		}
 	}
 	/*
 	 * If we have already been here and the packet has no
 	 * PCB entry associated with it, then we can safely
 	 * assume that this is a no match.
 	 */
 	if (*lookup == -1)
 		return (0);
 	if (proto == IPPROTO_TCP) {
 		wildcard = 0;
 		pi = &tcbinfo;
 	} else if (proto == IPPROTO_UDP) {
 		wildcard = 1;
 		pi = &udbinfo;
 	} else
 		return 0;
 	match = 0;
 	if (*lookup == 0) {
 		INP_INFO_RLOCK(pi);
 		pcb =  (oif) ?
 			in_pcblookup_hash(pi,
 				dst_ip, htons(dst_port),
 				src_ip, htons(src_port),
 				wildcard, oif) :
 			in_pcblookup_hash(pi,
 				src_ip, htons(src_port),
 				dst_ip, htons(dst_port),
 				wildcard, NULL);
 		if (pcb != NULL) {
 			INP_LOCK(pcb);
 			if (pcb->inp_socket != NULL) {
 				fill_ugid_cache(pcb, ugp);
 				*lookup = 1;
 			}
 			INP_UNLOCK(pcb);
 		}
 		INP_INFO_RUNLOCK(pi);
 		if (*lookup == 0) {
 			/*
 			 * If the lookup did not yield any results, there
 			 * is no sense in coming back and trying again. So
 			 * we can set lookup to -1 and ensure that we wont
 			 * bother the pcb system again.
 			 */
 			*lookup = -1;
 			return (0);
 		}
 	} 
 	if (insn->o.opcode == O_UID)
 		match = (ugp->fw_uid == (uid_t)insn->d[0]);
 	else if (insn->o.opcode == O_GID) {
 		for (gp = ugp->fw_groups;
 			gp < &ugp->fw_groups[ugp->fw_ngroups]; gp++)
 			if (*gp == (gid_t)insn->d[0]) {
 				match = 1;
 				break;
 			}
 	} else if (insn->o.opcode == O_JAIL)
 		match = (ugp->fw_prid == (int)insn->d[0]);
 	return match;
 }
 
 /*
  * The main check routine for the firewall.
  *
  * All arguments are in args so we can modify them and return them
  * back to the caller.
  *
  * Parameters:
  *
  *	args->m	(in/out) The packet; we set to NULL when/if we nuke it.
  *		Starts with the IP header.
  *	args->eh (in)	Mac header if present, or NULL for layer3 packet.
  *	args->oif	Outgoing interface, or NULL if packet is incoming.
  *		The incoming interface is in the mbuf. (in)
  *	args->divert_rule (in/out)
  *		Skip up to the first rule past this rule number;
  *		upon return, non-zero port number for divert or tee.
  *
  *	args->rule	Pointer to the last matching rule (in/out)
  *	args->next_hop	Socket we are forwarding to (out).
  *	args->f_id	Addresses grabbed from the packet (out)
  * 	args->cookie	a cookie depending on rule action
  *
  * Return value:
  *
  *	IP_FW_PASS	the packet must be accepted
  *	IP_FW_DENY	the packet must be dropped
  *	IP_FW_DIVERT	divert packet, port in m_tag
  *	IP_FW_TEE	tee packet, port in m_tag
  *	IP_FW_DUMMYNET	to dummynet, pipe in args->cookie
  *	IP_FW_NETGRAPH	into netgraph, cookie args->cookie
  *
  */
 
 int
 ipfw_chk(struct ip_fw_args *args)
 {
 	/*
 	 * Local variables hold state during the processing of a packet.
 	 *
 	 * IMPORTANT NOTE: to speed up the processing of rules, there
 	 * are some assumption on the values of the variables, which
 	 * are documented here. Should you change them, please check
 	 * the implementation of the various instructions to make sure
 	 * that they still work.
 	 *
 	 * args->eh	The MAC header. It is non-null for a layer2
 	 *	packet, it is NULL for a layer-3 packet.
 	 *
 	 * m | args->m	Pointer to the mbuf, as received from the caller.
 	 *	It may change if ipfw_chk() does an m_pullup, or if it
 	 *	consumes the packet because it calls send_reject().
 	 *	XXX This has to change, so that ipfw_chk() never modifies
 	 *	or consumes the buffer.
 	 * ip	is simply an alias of the value of m, and it is kept
 	 *	in sync with it (the packet is	supposed to start with
 	 *	the ip header).
 	 */
 	struct mbuf *m = args->m;
 	struct ip *ip = mtod(m, struct ip *);
 
 	/*
 	 * For rules which contain uid/gid or jail constraints, cache
 	 * a copy of the users credentials after the pcb lookup has been
 	 * executed. This will speed up the processing of rules with
 	 * these types of constraints, as well as decrease contention
 	 * on pcb related locks.
 	 */
 	struct ip_fw_ugid fw_ugid_cache;
 	int ugid_lookup = 0;
 
 	/*
 	 * divinput_flags	If non-zero, set to the IP_FW_DIVERT_*_FLAG
 	 *	associated with a packet input on a divert socket.  This
 	 *	will allow to distinguish traffic and its direction when
 	 *	it originates from a divert socket.
 	 */
 	u_int divinput_flags = 0;
 
 	/*
 	 * oif | args->oif	If NULL, ipfw_chk has been called on the
 	 *	inbound path (ether_input, ip_input).
 	 *	If non-NULL, ipfw_chk has been called on the outbound path
 	 *	(ether_output, ip_output).
 	 */
 	struct ifnet *oif = args->oif;
 
 	struct ip_fw *f = NULL;		/* matching rule */
 	int retval = 0;
 
 	/*
 	 * hlen	The length of the IP header.
 	 */
 	u_int hlen = 0;		/* hlen >0 means we have an IP pkt */
 
 	/*
 	 * offset	The offset of a fragment. offset != 0 means that
 	 *	we have a fragment at this offset of an IPv4 packet.
 	 *	offset == 0 means that (if this is an IPv4 packet)
 	 *	this is the first or only fragment.
 	 *	For IPv6 offset == 0 means there is no Fragment Header. 
 	 *	If offset != 0 for IPv6 always use correct mask to
 	 *	get the correct offset because we add IP6F_MORE_FRAG
 	 *	to be able to dectect the first fragment which would
 	 *	otherwise have offset = 0.
 	 */
 	u_short offset = 0;
 
 	/*
 	 * Local copies of addresses. They are only valid if we have
 	 * an IP packet.
 	 *
 	 * proto	The protocol. Set to 0 for non-ip packets,
 	 *	or to the protocol read from the packet otherwise.
 	 *	proto != 0 means that we have an IPv4 packet.
 	 *
 	 * src_port, dst_port	port numbers, in HOST format. Only
 	 *	valid for TCP and UDP packets.
 	 *
 	 * src_ip, dst_ip	ip addresses, in NETWORK format.
 	 *	Only valid for IPv4 packets.
 	 */
 	u_int8_t proto;
 	u_int16_t src_port = 0, dst_port = 0;	/* NOTE: host format	*/
 	struct in_addr src_ip, dst_ip;		/* NOTE: network format	*/
 	u_int16_t ip_len=0;
 	int pktlen;
 
 	/*
 	 * dyn_dir = MATCH_UNKNOWN when rules unchecked,
 	 * 	MATCH_NONE when checked and not matched (q = NULL),
 	 *	MATCH_FORWARD or MATCH_REVERSE otherwise (q != NULL)
 	 */
 	int dyn_dir = MATCH_UNKNOWN;
 	ipfw_dyn_rule *q = NULL;
 	struct ip_fw_chain *chain = &layer3_chain;
 	struct m_tag *mtag;
 
 	/*
 	 * We store in ulp a pointer to the upper layer protocol header.
 	 * In the ipv4 case this is easy to determine from the header,
 	 * but for ipv6 we might have some additional headers in the middle.
 	 * ulp is NULL if not found.
 	 */
 	void *ulp = NULL;		/* upper layer protocol pointer. */
 	/* XXX ipv6 variables */
 	int is_ipv6 = 0;
 	u_int16_t ext_hd = 0;	/* bits vector for extension header filtering */
 	/* end of ipv6 variables */
 	int is_ipv4 = 0;
 
 	if (m->m_flags & M_SKIP_FIREWALL)
 		return (IP_FW_PASS);	/* accept */
 
 	pktlen = m->m_pkthdr.len;
 	proto = args->f_id.proto = 0;	/* mark f_id invalid */
 		/* XXX 0 is a valid proto: IP/IPv6 Hop-by-Hop Option */
 
 /*
  * PULLUP_TO(len, p, T) makes sure that len + sizeof(T) is contiguous,
  * then it sets p to point at the offset "len" in the mbuf. WARNING: the
  * pointer might become stale after other pullups (but we never use it
  * this way).
  */
 #define PULLUP_TO(len, p, T)						\
 do {									\
 	int x = (len) + sizeof(T);					\
 	if ((m)->m_len < x) {						\
 		args->m = m = m_pullup(m, x);				\
 		if (m == NULL)						\
 			goto pullup_failed;				\
 	}								\
 	p = (mtod(m, char *) + (len));					\
 } while (0)
 
 	/* Identify IP packets and fill up variables. */
 	if (pktlen >= sizeof(struct ip6_hdr) &&
 	    (args->eh == NULL || ntohs(args->eh->ether_type)==ETHERTYPE_IPV6) &&
 	    mtod(m, struct ip *)->ip_v == 6) {
 		is_ipv6 = 1;
 		args->f_id.addr_type = 6;
 		hlen = sizeof(struct ip6_hdr);
 		proto = mtod(m, struct ip6_hdr *)->ip6_nxt;
 
 		/* Search extension headers to find upper layer protocols */
 		while (ulp == NULL) {
 			switch (proto) {
 			case IPPROTO_ICMPV6:
 				PULLUP_TO(hlen, ulp, struct icmp6_hdr);
 				args->f_id.flags = ICMP6(ulp)->icmp6_type;
 				break;
 
 			case IPPROTO_TCP:
 				PULLUP_TO(hlen, ulp, struct tcphdr);
 				dst_port = TCP(ulp)->th_dport;
 				src_port = TCP(ulp)->th_sport;
 				args->f_id.flags = TCP(ulp)->th_flags;
 				break;
 
 			case IPPROTO_UDP:
 				PULLUP_TO(hlen, ulp, struct udphdr);
 				dst_port = UDP(ulp)->uh_dport;
 				src_port = UDP(ulp)->uh_sport;
 				break;
 
 			case IPPROTO_HOPOPTS:	/* RFC 2460 */
 				PULLUP_TO(hlen, ulp, struct ip6_hbh);
 				ext_hd |= EXT_HOPOPTS;
 				hlen += (((struct ip6_hbh *)ulp)->ip6h_len + 1) << 3;
 				proto = ((struct ip6_hbh *)ulp)->ip6h_nxt;
 				ulp = NULL;
 				break;
 
 			case IPPROTO_ROUTING:	/* RFC 2460 */
 				PULLUP_TO(hlen, ulp, struct ip6_rthdr);
 				if (((struct ip6_rthdr *)ulp)->ip6r_type != 0) {
 					printf("IPFW2: IPV6 - Unknown Routing "
 					    "Header type(%d)\n",
 					    ((struct ip6_rthdr *)ulp)->ip6r_type);
 					if (fw_deny_unknown_exthdrs)
 					    return (IP_FW_DENY);
 					break;
 				}
 				ext_hd |= EXT_ROUTING;
 				hlen += (((struct ip6_rthdr *)ulp)->ip6r_len + 1) << 3;
 				proto = ((struct ip6_rthdr *)ulp)->ip6r_nxt;
 				ulp = NULL;
 				break;
 
 			case IPPROTO_FRAGMENT:	/* RFC 2460 */
 				PULLUP_TO(hlen, ulp, struct ip6_frag);
 				ext_hd |= EXT_FRAGMENT;
 				hlen += sizeof (struct ip6_frag);
 				proto = ((struct ip6_frag *)ulp)->ip6f_nxt;
 				offset = ((struct ip6_frag *)ulp)->ip6f_offlg &
 					IP6F_OFF_MASK;
 				/* Add IP6F_MORE_FRAG for offset of first
 				 * fragment to be != 0. */
 				offset |= ((struct ip6_frag *)ulp)->ip6f_offlg &
 					IP6F_MORE_FRAG;
 				if (offset == 0) {
 					printf("IPFW2: IPV6 - Invalid Fragment "
 					    "Header\n");
 					if (fw_deny_unknown_exthdrs)
 					    return (IP_FW_DENY);
 					break;
 				}
 				args->f_id.frag_id6 =
 				    ntohl(((struct ip6_frag *)ulp)->ip6f_ident);
 				ulp = NULL;
 				break;
 
 			case IPPROTO_DSTOPTS:	/* RFC 2460 */
 				PULLUP_TO(hlen, ulp, struct ip6_hbh);
 				ext_hd |= EXT_DSTOPTS;
 				hlen += (((struct ip6_hbh *)ulp)->ip6h_len + 1) << 3;
 				proto = ((struct ip6_hbh *)ulp)->ip6h_nxt;
 				ulp = NULL;
 				break;
 
 			case IPPROTO_AH:	/* RFC 2402 */
 				PULLUP_TO(hlen, ulp, struct ip6_ext);
 				ext_hd |= EXT_AH;
 				hlen += (((struct ip6_ext *)ulp)->ip6e_len + 2) << 2;
 				proto = ((struct ip6_ext *)ulp)->ip6e_nxt;
 				ulp = NULL;
 				break;
 
 			case IPPROTO_ESP:	/* RFC 2406 */
 				PULLUP_TO(hlen, ulp, uint32_t);	/* SPI, Seq# */
 				/* Anything past Seq# is variable length and
 				 * data past this ext. header is encrypted. */
 				ext_hd |= EXT_ESP;
 				break;
 
 			case IPPROTO_NONE:	/* RFC 2460 */
 				PULLUP_TO(hlen, ulp, struct ip6_ext);
 				/* Packet ends here. if ip6e_len!=0 octets
 				 * must be ignored. */
 				break;
 
 			case IPPROTO_OSPFIGP:
 				/* XXX OSPF header check? */
 				PULLUP_TO(hlen, ulp, struct ip6_ext);
 				break;
 
 			default:
 				printf("IPFW2: IPV6 - Unknown Extension "
 				    "Header(%d), ext_hd=%x\n", proto, ext_hd);
 				if (fw_deny_unknown_exthdrs)
 				    return (IP_FW_DENY);
 				break;
 			} /*switch */
 		}
 		args->f_id.src_ip6 = mtod(m,struct ip6_hdr *)->ip6_src;
 		args->f_id.dst_ip6 = mtod(m,struct ip6_hdr *)->ip6_dst;
 		args->f_id.src_ip = 0;
 		args->f_id.dst_ip = 0;
 		args->f_id.flow_id6 = ntohl(mtod(m, struct ip6_hdr *)->ip6_flow);
 	} else if (pktlen >= sizeof(struct ip) &&
 	    (args->eh == NULL || ntohs(args->eh->ether_type) == ETHERTYPE_IP) &&
 	    mtod(m, struct ip *)->ip_v == 4) {
 	    	is_ipv4 = 1;
 		ip = mtod(m, struct ip *);
 		hlen = ip->ip_hl << 2;
 		args->f_id.addr_type = 4;
 
 		/*
 		 * Collect parameters into local variables for faster matching.
 		 */
 		proto = ip->ip_p;
 		src_ip = ip->ip_src;
 		dst_ip = ip->ip_dst;
 		if (args->eh != NULL) { /* layer 2 packets are as on the wire */
 			offset = ntohs(ip->ip_off) & IP_OFFMASK;
 			ip_len = ntohs(ip->ip_len);
 		} else {
 			offset = ip->ip_off & IP_OFFMASK;
 			ip_len = ip->ip_len;
 		}
 		pktlen = ip_len < pktlen ? ip_len : pktlen;
 
 		if (offset == 0) {
 			switch (proto) {
 			case IPPROTO_TCP:
 				PULLUP_TO(hlen, ulp, struct tcphdr);
 				dst_port = TCP(ulp)->th_dport;
 				src_port = TCP(ulp)->th_sport;
 				args->f_id.flags = TCP(ulp)->th_flags;
 				break;
 
 			case IPPROTO_UDP:
 				PULLUP_TO(hlen, ulp, struct udphdr);
 				dst_port = UDP(ulp)->uh_dport;
 				src_port = UDP(ulp)->uh_sport;
 				break;
 
 			case IPPROTO_ICMP:
 				PULLUP_TO(hlen, ulp, struct icmphdr);
 				args->f_id.flags = ICMP(ulp)->icmp_type;
 				break;
 
 			default:
 				break;
 			}
 		}
 
 		args->f_id.src_ip = ntohl(src_ip.s_addr);
 		args->f_id.dst_ip = ntohl(dst_ip.s_addr);
 	}
 #undef PULLUP_TO
 	if (proto) { /* we may have port numbers, store them */
 		args->f_id.proto = proto;
 		args->f_id.src_port = src_port = ntohs(src_port);
 		args->f_id.dst_port = dst_port = ntohs(dst_port);
 	}
 
 	IPFW_RLOCK(chain);
 	mtag = m_tag_find(m, PACKET_TAG_DIVERT, NULL);
 	if (args->rule) {
 		/*
 		 * Packet has already been tagged. Look for the next rule
 		 * to restart processing.
 		 *
 		 * If fw_one_pass != 0 then just accept it.
 		 * XXX should not happen here, but optimized out in
 		 * the caller.
 		 */
 		if (fw_one_pass) {
 			IPFW_RUNLOCK(chain);
 			return (IP_FW_PASS);
 		}
 
 		f = args->rule->next_rule;
 		if (f == NULL)
 			f = lookup_next_rule(args->rule);
 	} else {
 		/*
 		 * Find the starting rule. It can be either the first
 		 * one, or the one after divert_rule if asked so.
 		 */
 		int skipto = mtag ? divert_cookie(mtag) : 0;
 
 		f = chain->rules;
 		if (args->eh == NULL && skipto != 0) {
 			if (skipto >= IPFW_DEFAULT_RULE) {
 				IPFW_RUNLOCK(chain);
 				return (IP_FW_DENY); /* invalid */
 			}
 			while (f && f->rulenum <= skipto)
 				f = f->next;
 			if (f == NULL) {	/* drop packet */
 				IPFW_RUNLOCK(chain);
 				return (IP_FW_DENY);
 			}
 		}
 	}
 	/* reset divert rule to avoid confusion later */
 	if (mtag) {
 		divinput_flags = divert_info(mtag) &
 		    (IP_FW_DIVERT_OUTPUT_FLAG | IP_FW_DIVERT_LOOPBACK_FLAG);
 		m_tag_delete(m, mtag);
 	}
 
 	/*
 	 * Now scan the rules, and parse microinstructions for each rule.
 	 */
 	for (; f; f = f->next) {
 		ipfw_insn *cmd;
 		uint32_t tablearg = 0;
 		int l, cmdlen, skip_or; /* skip rest of OR block */
 
 again:
 		if (set_disable & (1 << f->set) )
 			continue;
 
 		skip_or = 0;
 		for (l = f->cmd_len, cmd = f->cmd ; l > 0 ;
 		    l -= cmdlen, cmd += cmdlen) {
 			int match;
 
 			/*
 			 * check_body is a jump target used when we find a
 			 * CHECK_STATE, and need to jump to the body of
 			 * the target rule.
 			 */
 
 check_body:
 			cmdlen = F_LEN(cmd);
 			/*
 			 * An OR block (insn_1 || .. || insn_n) has the
 			 * F_OR bit set in all but the last instruction.
 			 * The first match will set "skip_or", and cause
 			 * the following instructions to be skipped until
 			 * past the one with the F_OR bit clear.
 			 */
 			if (skip_or) {		/* skip this instruction */
 				if ((cmd->len & F_OR) == 0)
 					skip_or = 0;	/* next one is good */
 				continue;
 			}
 			match = 0; /* set to 1 if we succeed */
 
 			switch (cmd->opcode) {
 			/*
 			 * The first set of opcodes compares the packet's
 			 * fields with some pattern, setting 'match' if a
 			 * match is found. At the end of the loop there is
 			 * logic to deal with F_NOT and F_OR flags associated
 			 * with the opcode.
 			 */
 			case O_NOP:
 				match = 1;
 				break;
 
 			case O_FORWARD_MAC:
 				printf("ipfw: opcode %d unimplemented\n",
 				    cmd->opcode);
 				break;
 
 			case O_GID:
 			case O_UID:
 			case O_JAIL:
 				/*
 				 * We only check offset == 0 && proto != 0,
 				 * as this ensures that we have a
 				 * packet with the ports info.
 				 */
 				if (offset!=0)
 					break;
 				if (is_ipv6) /* XXX to be fixed later */
 					break;
 				if (proto == IPPROTO_TCP ||
 				    proto == IPPROTO_UDP)
 					match = check_uidgid(
 						    (ipfw_insn_u32 *)cmd,
 						    proto, oif,
 						    dst_ip, dst_port,
 						    src_ip, src_port, &fw_ugid_cache,
 						    &ugid_lookup, args->inp);
 				break;
 
 			case O_RECV:
 				match = iface_match(m->m_pkthdr.rcvif,
 				    (ipfw_insn_if *)cmd);
 				break;
 
 			case O_XMIT:
 				match = iface_match(oif, (ipfw_insn_if *)cmd);
 				break;
 
 			case O_VIA:
 				match = iface_match(oif ? oif :
 				    m->m_pkthdr.rcvif, (ipfw_insn_if *)cmd);
 				break;
 
 			case O_MACADDR2:
 				if (args->eh != NULL) {	/* have MAC header */
 					u_int32_t *want = (u_int32_t *)
 						((ipfw_insn_mac *)cmd)->addr;
 					u_int32_t *mask = (u_int32_t *)
 						((ipfw_insn_mac *)cmd)->mask;
 					u_int32_t *hdr = (u_int32_t *)args->eh;
 
 					match =
 					    ( want[0] == (hdr[0] & mask[0]) &&
 					      want[1] == (hdr[1] & mask[1]) &&
 					      want[2] == (hdr[2] & mask[2]) );
 				}
 				break;
 
 			case O_MAC_TYPE:
 				if (args->eh != NULL) {
 					u_int16_t t =
 					    ntohs(args->eh->ether_type);
 					u_int16_t *p =
 					    ((ipfw_insn_u16 *)cmd)->ports;
 					int i;
 
 					for (i = cmdlen - 1; !match && i>0;
 					    i--, p += 2)
 						match = (t>=p[0] && t<=p[1]);
 				}
 				break;
 
 			case O_FRAG:
 				match = (offset != 0);
 				break;
 
 			case O_IN:	/* "out" is "not in" */
 				match = (oif == NULL);
 				break;
 
 			case O_LAYER2:
 				match = (args->eh != NULL);
 				break;
 
 			case O_DIVERTED:
 				match = (cmd->arg1 & 1 && divinput_flags &
 				    IP_FW_DIVERT_LOOPBACK_FLAG) ||
 					(cmd->arg1 & 2 && divinput_flags &
 				    IP_FW_DIVERT_OUTPUT_FLAG);
 				break;
 
 			case O_PROTO:
 				/*
 				 * We do not allow an arg of 0 so the
 				 * check of "proto" only suffices.
 				 */
 				match = (proto == cmd->arg1);
 				break;
 
 			case O_IP_SRC:
 				match = is_ipv4 &&
 				    (((ipfw_insn_ip *)cmd)->addr.s_addr ==
 				    src_ip.s_addr);
 				break;
 
 			case O_IP_SRC_LOOKUP:
 			case O_IP_DST_LOOKUP:
 				if (is_ipv4) {
 				    uint32_t a =
 					(cmd->opcode == O_IP_DST_LOOKUP) ?
 					    dst_ip.s_addr : src_ip.s_addr;
 				    uint32_t v;
 
 				    match = lookup_table(chain, cmd->arg1, a,
 					&v);
 				    if (!match)
 					break;
 				    if (cmdlen == F_INSN_SIZE(ipfw_insn_u32))
 					match =
 					    ((ipfw_insn_u32 *)cmd)->d[0] == v;
 				    else
 					tablearg = v;
 				}
 				break;
 
 			case O_IP_SRC_MASK:
 			case O_IP_DST_MASK:
 				if (is_ipv4) {
 				    uint32_t a =
 					(cmd->opcode == O_IP_DST_MASK) ?
 					    dst_ip.s_addr : src_ip.s_addr;
 				    uint32_t *p = ((ipfw_insn_u32 *)cmd)->d;
 				    int i = cmdlen-1;
 
 				    for (; !match && i>0; i-= 2, p+= 2)
 					match = (p[0] == (a & p[1]));
 				}
 				break;
 
 			case O_IP_SRC_ME:
 				if (is_ipv4) {
 					struct ifnet *tif;
 
 					INADDR_TO_IFP(src_ip, tif);
 					match = (tif != NULL);
 				}
 				break;
 
 			case O_IP_DST_SET:
 			case O_IP_SRC_SET:
 				if (is_ipv4) {
 					u_int32_t *d = (u_int32_t *)(cmd+1);
 					u_int32_t addr =
 					    cmd->opcode == O_IP_DST_SET ?
 						args->f_id.dst_ip :
 						args->f_id.src_ip;
 
 					    if (addr < d[0])
 						    break;
 					    addr -= d[0]; /* subtract base */
 					    match = (addr < cmd->arg1) &&
 						( d[ 1 + (addr>>5)] &
 						  (1<<(addr & 0x1f)) );
 				}
 				break;
 
 			case O_IP_DST:
 				match = is_ipv4 &&
 				    (((ipfw_insn_ip *)cmd)->addr.s_addr ==
 				    dst_ip.s_addr);
 				break;
 
 			case O_IP_DST_ME:
 				if (is_ipv4) {
 					struct ifnet *tif;
 
 					INADDR_TO_IFP(dst_ip, tif);
 					match = (tif != NULL);
 				}
 				break;
 
 			case O_IP_SRCPORT:
 			case O_IP_DSTPORT:
 				/*
 				 * offset == 0 && proto != 0 is enough
 				 * to guarantee that we have a
 				 * packet with port info.
 				 */
 				if ((proto==IPPROTO_UDP || proto==IPPROTO_TCP)
 				    && offset == 0) {
 					u_int16_t x =
 					    (cmd->opcode == O_IP_SRCPORT) ?
 						src_port : dst_port ;
 					u_int16_t *p =
 					    ((ipfw_insn_u16 *)cmd)->ports;
 					int i;
 
 					for (i = cmdlen - 1; !match && i>0;
 					    i--, p += 2)
 						match = (x>=p[0] && x<=p[1]);
 				}
 				break;
 
 			case O_ICMPTYPE:
 				match = (offset == 0 && proto==IPPROTO_ICMP &&
 				    icmptype_match(ICMP(ulp), (ipfw_insn_u32 *)cmd) );
 				break;
 
 #ifdef INET6
 			case O_ICMP6TYPE:
 				match = is_ipv6 && offset == 0 &&
 				    proto==IPPROTO_ICMPV6 &&
 				    icmp6type_match(
 					ICMP6(ulp)->icmp6_type,
 					(ipfw_insn_u32 *)cmd);
 				break;
 #endif /* INET6 */
 
 			case O_IPOPT:
 				match = (is_ipv4 &&
 				    ipopts_match(mtod(m, struct ip *), cmd) );
 				break;
 
 			case O_IPVER:
 				match = (is_ipv4 &&
 				    cmd->arg1 == mtod(m, struct ip *)->ip_v);
 				break;
 
 			case O_IPID:
 			case O_IPLEN:
 			case O_IPTTL:
 				if (is_ipv4) {	/* only for IP packets */
 				    uint16_t x;
 				    uint16_t *p;
 				    int i;
 
 				    if (cmd->opcode == O_IPLEN)
 					x = ip_len;
 				    else if (cmd->opcode == O_IPTTL)
 					x = mtod(m, struct ip *)->ip_ttl;
 				    else /* must be IPID */
 					x = ntohs(mtod(m, struct ip *)->ip_id);
 				    if (cmdlen == 1) {
 					match = (cmd->arg1 == x);
 					break;
 				    }
 				    /* otherwise we have ranges */
 				    p = ((ipfw_insn_u16 *)cmd)->ports;
 				    i = cmdlen - 1;
 				    for (; !match && i>0; i--, p += 2)
 					match = (x >= p[0] && x <= p[1]);
 				}
 				break;
 
 			case O_IPPRECEDENCE:
 				match = (is_ipv4 &&
 				    (cmd->arg1 == (mtod(m, struct ip *)->ip_tos & 0xe0)) );
 				break;
 
 			case O_IPTOS:
 				match = (is_ipv4 &&
 				    flags_match(cmd, mtod(m, struct ip *)->ip_tos));
 				break;
 
 			case O_TCPDATALEN:
 				if (proto == IPPROTO_TCP && offset == 0) {
 				    struct tcphdr *tcp;
 				    uint16_t x;
 				    uint16_t *p;
 				    int i;
 
 				    tcp = TCP(ulp);
 				    x = ip_len -
 					((ip->ip_hl + tcp->th_off) << 2);
 				    if (cmdlen == 1) {
 					match = (cmd->arg1 == x);
 					break;
 				    }
 				    /* otherwise we have ranges */
 				    p = ((ipfw_insn_u16 *)cmd)->ports;
 				    i = cmdlen - 1;
 				    for (; !match && i>0; i--, p += 2)
 					match = (x >= p[0] && x <= p[1]);
 				}
 				break;
 
 			case O_TCPFLAGS:
 				match = (proto == IPPROTO_TCP && offset == 0 &&
 				    flags_match(cmd, TCP(ulp)->th_flags));
 				break;
 
 			case O_TCPOPTS:
 				match = (proto == IPPROTO_TCP && offset == 0 &&
 				    tcpopts_match(TCP(ulp), cmd));
 				break;
 
 			case O_TCPSEQ:
 				match = (proto == IPPROTO_TCP && offset == 0 &&
 				    ((ipfw_insn_u32 *)cmd)->d[0] ==
 					TCP(ulp)->th_seq);
 				break;
 
 			case O_TCPACK:
 				match = (proto == IPPROTO_TCP && offset == 0 &&
 				    ((ipfw_insn_u32 *)cmd)->d[0] ==
 					TCP(ulp)->th_ack);
 				break;
 
 			case O_TCPWIN:
 				match = (proto == IPPROTO_TCP && offset == 0 &&
 				    cmd->arg1 == TCP(ulp)->th_win);
 				break;
 
 			case O_ESTAB:
 				/* reject packets which have SYN only */
 				/* XXX should i also check for TH_ACK ? */
 				match = (proto == IPPROTO_TCP && offset == 0 &&
 				    (TCP(ulp)->th_flags &
 				     (TH_RST | TH_ACK | TH_SYN)) != TH_SYN);
 				break;
 
 			case O_ALTQ: {
 				struct altq_tag *at;
 				ipfw_insn_altq *altq = (ipfw_insn_altq *)cmd;
 
 				match = 1;
 				mtag = m_tag_find(m, PACKET_TAG_PF_QID, NULL);
 				if (mtag != NULL)
 					break;
 				mtag = m_tag_get(PACKET_TAG_PF_QID,
 						sizeof(struct altq_tag),
 						M_NOWAIT);
 				if (mtag == NULL) {
 					/*
 					 * Let the packet fall back to the
 					 * default ALTQ.
 					 */
 					break;
 				}
 				at = (struct altq_tag *)(mtag+1);
 				at->qid = altq->qid;
 				if (is_ipv4)
 					at->af = AF_INET;
 				else
 					at->af = AF_LINK;
 				at->hdr = ip;
 				m_tag_prepend(m, mtag);
 				break;
 			}
 
 			case O_LOG:
 				if (fw_verbose)
 					ipfw_log(f, hlen, args, m, oif, offset);
 				match = 1;
 				break;
 
 			case O_PROB:
 				match = (random()<((ipfw_insn_u32 *)cmd)->d[0]);
 				break;
 
 			case O_VERREVPATH:
 				/* Outgoing packets automatically pass/match */
 				match = ((oif != NULL) ||
 				    (m->m_pkthdr.rcvif == NULL) ||
 				    (
 #ifdef INET6
 				    is_ipv6 ?
 					verify_path6(&(args->f_id.src_ip6),
 					    m->m_pkthdr.rcvif) :
 #endif
 				    verify_path(src_ip, m->m_pkthdr.rcvif)));
 				break;
 
 			case O_VERSRCREACH:
 				/* Outgoing packets automatically pass/match */
 				match = (hlen > 0 && ((oif != NULL) ||
 #ifdef INET6
 				    is_ipv6 ?
 				        verify_path6(&(args->f_id.src_ip6),
 				            NULL) :
 #endif
 				    verify_path(src_ip, NULL)));
 				break;
 
 			case O_ANTISPOOF:
 				/* Outgoing packets automatically pass/match */
 				if (oif == NULL && hlen > 0 &&
 				    (  (is_ipv4 && in_localaddr(src_ip))
 #ifdef INET6
 				    || (is_ipv6 &&
 				        in6_localaddr(&(args->f_id.src_ip6)))
 #endif
 				    ))
 					match =
 #ifdef INET6
 					    is_ipv6 ? verify_path6(
 					        &(args->f_id.src_ip6),
 					        m->m_pkthdr.rcvif) :
 #endif
 					    verify_path(src_ip,
 					        m->m_pkthdr.rcvif);
 				else
 					match = 1;
 				break;
 
 			case O_IPSEC:
 #ifdef FAST_IPSEC
 				match = (m_tag_find(m,
 				    PACKET_TAG_IPSEC_IN_DONE, NULL) != NULL);
 #endif
 #ifdef IPSEC
 				match = (ipsec_getnhist(m) != 0);
 #endif
 				/* otherwise no match */
 				break;
 
 #ifdef INET6
 			case O_IP6_SRC:
 				match = is_ipv6 &&
 				    IN6_ARE_ADDR_EQUAL(&args->f_id.src_ip6,
 				    &((ipfw_insn_ip6 *)cmd)->addr6);
 				break;
 
 			case O_IP6_DST:
 				match = is_ipv6 &&
 				IN6_ARE_ADDR_EQUAL(&args->f_id.dst_ip6,
 				    &((ipfw_insn_ip6 *)cmd)->addr6);
 				break;
 			case O_IP6_SRC_MASK:
 				if (is_ipv6) {
 					ipfw_insn_ip6 *te = (ipfw_insn_ip6 *)cmd;
 					struct in6_addr p = args->f_id.src_ip6;
 
 					APPLY_MASK(&p, &te->mask6);
 					match = IN6_ARE_ADDR_EQUAL(&te->addr6, &p);
 				}
 				break;
 
 			case O_IP6_DST_MASK:
 				if (is_ipv6) {
 					ipfw_insn_ip6 *te = (ipfw_insn_ip6 *)cmd;
 					struct in6_addr p = args->f_id.dst_ip6;
 
 					APPLY_MASK(&p, &te->mask6);
 					match = IN6_ARE_ADDR_EQUAL(&te->addr6, &p);
 				}
 				break;
 
 			case O_IP6_SRC_ME:
 				match= is_ipv6 && search_ip6_addr_net(&args->f_id.src_ip6);
 				break;
 
 			case O_IP6_DST_ME:
 				match= is_ipv6 && search_ip6_addr_net(&args->f_id.dst_ip6);
 				break;
 
 			case O_FLOW6ID:
 				match = is_ipv6 &&
 				    flow6id_match(args->f_id.flow_id6,
 				    (ipfw_insn_u32 *) cmd);
 				break;
 
 			case O_EXT_HDR:
 				match = is_ipv6 &&
 				    (ext_hd & ((ipfw_insn *) cmd)->arg1);
 				break;
 
 			case O_IP6:
 				match = is_ipv6;
 				break;
 #endif
 
 			case O_IP4:
 				match = is_ipv4;
 				break;
 
 			/*
 			 * The second set of opcodes represents 'actions',
 			 * i.e. the terminal part of a rule once the packet
 			 * matches all previous patterns.
 			 * Typically there is only one action for each rule,
 			 * and the opcode is stored at the end of the rule
 			 * (but there are exceptions -- see below).
 			 *
 			 * In general, here we set retval and terminate the
 			 * outer loop (would be a 'break 3' in some language,
 			 * but we need to do a 'goto done').
 			 *
 			 * Exceptions:
 			 * O_COUNT and O_SKIPTO actions:
 			 *   instead of terminating, we jump to the next rule
 			 *   ('goto next_rule', equivalent to a 'break 2'),
 			 *   or to the SKIPTO target ('goto again' after
 			 *   having set f, cmd and l), respectively.
 			 *
 			 * O_LOG and O_ALTQ action parameters:
 			 *   perform some action and set match = 1;
 			 *
 			 * O_LIMIT and O_KEEP_STATE: these opcodes are
 			 *   not real 'actions', and are stored right
 			 *   before the 'action' part of the rule.
 			 *   These opcodes try to install an entry in the
 			 *   state tables; if successful, we continue with
 			 *   the next opcode (match=1; break;), otherwise
 			 *   the packet *   must be dropped
 			 *   ('goto done' after setting retval);
 			 *
 			 * O_PROBE_STATE and O_CHECK_STATE: these opcodes
 			 *   cause a lookup of the state table, and a jump
 			 *   to the 'action' part of the parent rule
 			 *   ('goto check_body') if an entry is found, or
 			 *   (CHECK_STATE only) a jump to the next rule if
 			 *   the entry is not found ('goto next_rule').
 			 *   The result of the lookup is cached to make
 			 *   further instances of these opcodes are
 			 *   effectively NOPs.
 			 */
 			case O_LIMIT:
 			case O_KEEP_STATE:
 				if (install_state(f,
 				    (ipfw_insn_limit *)cmd, args)) {
 					retval = IP_FW_DENY;
 					goto done; /* error/limit violation */
 				}
 				match = 1;
 				break;
 
 			case O_PROBE_STATE:
 			case O_CHECK_STATE:
 				/*
 				 * dynamic rules are checked at the first
 				 * keep-state or check-state occurrence,
 				 * with the result being stored in dyn_dir.
 				 * The compiler introduces a PROBE_STATE
 				 * instruction for us when we have a
 				 * KEEP_STATE (because PROBE_STATE needs
 				 * to be run first).
 				 */
 				if (dyn_dir == MATCH_UNKNOWN &&
 				    (q = lookup_dyn_rule(&args->f_id,
 				     &dyn_dir, proto == IPPROTO_TCP ?
 					TCP(ulp) : NULL))
 					!= NULL) {
 					/*
 					 * Found dynamic entry, update stats
 					 * and jump to the 'action' part of
 					 * the parent rule.
 					 */
 					q->pcnt++;
 					q->bcnt += pktlen;
 					f = q->rule;
 					cmd = ACTION_PTR(f);
 					l = f->cmd_len - f->act_ofs;
 					IPFW_DYN_UNLOCK();
 					goto check_body;
 				}
 				/*
 				 * Dynamic entry not found. If CHECK_STATE,
 				 * skip to next rule, if PROBE_STATE just
 				 * ignore and continue with next opcode.
 				 */
 				if (cmd->opcode == O_CHECK_STATE)
 					goto next_rule;
 				match = 1;
 				break;
 
 			case O_ACCEPT:
 				retval = 0;	/* accept */
 				goto done;
 
 			case O_PIPE:
 			case O_QUEUE:
 				args->rule = f; /* report matching rule */
 				if (cmd->arg1 == IP_FW_TABLEARG)
 					args->cookie = tablearg;
 				else
 					args->cookie = cmd->arg1;
 				retval = IP_FW_DUMMYNET;
 				goto done;
 
 			case O_DIVERT:
 			case O_TEE: {
 				struct divert_tag *dt;
 
 				if (args->eh) /* not on layer 2 */
 					break;
 				mtag = m_tag_get(PACKET_TAG_DIVERT,
 						sizeof(struct divert_tag),
 						M_NOWAIT);
 				if (mtag == NULL) {
 					/* XXX statistic */
 					/* drop packet */
 					IPFW_RUNLOCK(chain);
 					return (IP_FW_DENY);
 				}
 				dt = (struct divert_tag *)(mtag+1);
 				dt->cookie = f->rulenum;
 				if (cmd->arg1 == IP_FW_TABLEARG)
 					dt->info = tablearg;
 				else
 					dt->info = cmd->arg1;
 				m_tag_prepend(m, mtag);
 				retval = (cmd->opcode == O_DIVERT) ?
 				    IP_FW_DIVERT : IP_FW_TEE;
 				goto done;
 			}
 
 			case O_COUNT:
 			case O_SKIPTO:
 				f->pcnt++;	/* update stats */
 				f->bcnt += pktlen;
 				f->timestamp = time_uptime;
 				if (cmd->opcode == O_COUNT)
 					goto next_rule;
 				/* handle skipto */
 				if (f->next_rule == NULL)
 					lookup_next_rule(f);
 				f = f->next_rule;
 				goto again;
 
 			case O_REJECT:
 				/*
 				 * Drop the packet and send a reject notice
 				 * if the packet is not ICMP (or is an ICMP
 				 * query), and it is not multicast/broadcast.
 				 */
 				if (hlen > 0 && is_ipv4 && offset == 0 &&
 				    (proto != IPPROTO_ICMP ||
 				     is_icmp_query(ICMP(ulp))) &&
 				    !(m->m_flags & (M_BCAST|M_MCAST)) &&
 				    !IN_MULTICAST(ntohl(dst_ip.s_addr))) {
 					send_reject(args, cmd->arg1,
 					    offset,ip_len);
 					m = args->m;
 				}
 				/* FALLTHROUGH */
 #ifdef INET6
 			case O_UNREACH6:
 				if (hlen > 0 && is_ipv6 &&
 				    (proto != IPPROTO_ICMPV6 ||
 				     (is_icmp6_query(args->f_id.flags) == 1)) &&
 				    !(m->m_flags & (M_BCAST|M_MCAST)) &&
 				    !IN6_IS_ADDR_MULTICAST(&args->f_id.dst_ip6)) {
 					send_reject6(args, cmd->arg1,
 					    offset, hlen);
 					m = args->m;
 				}
 				/* FALLTHROUGH */
 #endif
 			case O_DENY:
 				retval = IP_FW_DENY;
 				goto done;
 
 			case O_FORWARD_IP:
 				if (args->eh)	/* not valid on layer2 pkts */
 					break;
 				if (!q || dyn_dir == MATCH_FORWARD)
 					args->next_hop =
 					    &((ipfw_insn_sa *)cmd)->sa;
 				retval = IP_FW_PASS;
 				goto done;
 
 			case O_NETGRAPH:
 			case O_NGTEE:
 				args->rule = f;	/* report matching rule */
 				if (cmd->arg1 == IP_FW_TABLEARG)
 					args->cookie = tablearg;
 				else
 					args->cookie = cmd->arg1;
 				retval = (cmd->opcode == O_NETGRAPH) ?
 				    IP_FW_NETGRAPH : IP_FW_NGTEE;
 				goto done;
 
 			default:
 				panic("-- unknown opcode %d\n", cmd->opcode);
 			} /* end of switch() on opcodes */
 
 			if (cmd->len & F_NOT)
 				match = !match;
 
 			if (match) {
 				if (cmd->len & F_OR)
 					skip_or = 1;
 			} else {
 				if (!(cmd->len & F_OR)) /* not an OR block, */
 					break;		/* try next rule    */
 			}
 
 		}	/* end of inner for, scan opcodes */
 
 next_rule:;		/* try next rule		*/
 
 	}		/* end of outer for, scan rules */
 	printf("ipfw: ouch!, skip past end of rules, denying packet\n");
 	IPFW_RUNLOCK(chain);
 	return (IP_FW_DENY);
 
 done:
 	/* Update statistics */
 	f->pcnt++;
 	f->bcnt += pktlen;
 	f->timestamp = time_uptime;
 	IPFW_RUNLOCK(chain);
 	return (retval);
 
 pullup_failed:
 	if (fw_verbose)
 		printf("ipfw: pullup failed\n");
 	return (IP_FW_DENY);
 }
 
 /*
  * When a rule is added/deleted, clear the next_rule pointers in all rules.
  * These will be reconstructed on the fly as packets are matched.
  */
 static void
 flush_rule_ptrs(struct ip_fw_chain *chain)
 {
 	struct ip_fw *rule;
 
 	IPFW_WLOCK_ASSERT(chain);
 
 	for (rule = chain->rules; rule; rule = rule->next)
 		rule->next_rule = NULL;
 }
 
 /*
  * Add a new rule to the list. Copy the rule into a malloc'ed area, then
  * possibly create a rule number and add the rule to the list.
  * Update the rule_number in the input struct so the caller knows it as well.
  */
 static int
 add_rule(struct ip_fw_chain *chain, struct ip_fw *input_rule)
 {
 	struct ip_fw *rule, *f, *prev;
 	int l = RULESIZE(input_rule);
 
 	if (chain->rules == NULL && input_rule->rulenum != IPFW_DEFAULT_RULE)
 		return (EINVAL);
 
 	rule = malloc(l, M_IPFW, M_NOWAIT | M_ZERO);
 	if (rule == NULL)
 		return (ENOSPC);
 
 	bcopy(input_rule, rule, l);
 
 	rule->next = NULL;
 	rule->next_rule = NULL;
 
 	rule->pcnt = 0;
 	rule->bcnt = 0;
 	rule->timestamp = 0;
 
 	IPFW_WLOCK(chain);
 
 	if (chain->rules == NULL) {	/* default rule */
 		chain->rules = rule;
 		goto done;
         }
 
 	/*
 	 * If rulenum is 0, find highest numbered rule before the
 	 * default rule, and add autoinc_step
 	 */
 	if (autoinc_step < 1)
 		autoinc_step = 1;
 	else if (autoinc_step > 1000)
 		autoinc_step = 1000;
 	if (rule->rulenum == 0) {
 		/*
 		 * locate the highest numbered rule before default
 		 */
 		for (f = chain->rules; f; f = f->next) {
 			if (f->rulenum == IPFW_DEFAULT_RULE)
 				break;
 			rule->rulenum = f->rulenum;
 		}
 		if (rule->rulenum < IPFW_DEFAULT_RULE - autoinc_step)
 			rule->rulenum += autoinc_step;
 		input_rule->rulenum = rule->rulenum;
 	}
 
 	/*
 	 * Now insert the new rule in the right place in the sorted list.
 	 */
 	for (prev = NULL, f = chain->rules; f; prev = f, f = f->next) {
 		if (f->rulenum > rule->rulenum) { /* found the location */
 			if (prev) {
 				rule->next = f;
 				prev->next = rule;
 			} else { /* head insert */
 				rule->next = chain->rules;
 				chain->rules = rule;
 			}
 			break;
 		}
 	}
 	flush_rule_ptrs(chain);
 done:
 	static_count++;
 	static_len += l;
 	IPFW_WUNLOCK(chain);
 	DEB(printf("ipfw: installed rule %d, static count now %d\n",
 		rule->rulenum, static_count);)
 	return (0);
 }
 
 /**
  * Remove a static rule (including derived * dynamic rules)
  * and place it on the ``reap list'' for later reclamation.
  * The caller is in charge of clearing rule pointers to avoid
  * dangling pointers.
  * @return a pointer to the next entry.
  * Arguments are not checked, so they better be correct.
  */
 static struct ip_fw *
 remove_rule(struct ip_fw_chain *chain, struct ip_fw *rule, struct ip_fw *prev)
 {
 	struct ip_fw *n;
 	int l = RULESIZE(rule);
 
 	IPFW_WLOCK_ASSERT(chain);
 
 	n = rule->next;
 	IPFW_DYN_LOCK();
 	remove_dyn_rule(rule, NULL /* force removal */);
 	IPFW_DYN_UNLOCK();
 	if (prev == NULL)
 		chain->rules = n;
 	else
 		prev->next = n;
 	static_count--;
 	static_len -= l;
 
 	rule->next = chain->reap;
 	chain->reap = rule;
 
 	return n;
 }
 
 /**
  * Reclaim storage associated with a list of rules.  This is
  * typically the list created using remove_rule.
  */
 static void
 reap_rules(struct ip_fw *head)
 {
 	struct ip_fw *rule;
 
 	while ((rule = head) != NULL) {
 		head = head->next;
 		if (DUMMYNET_LOADED)
 			ip_dn_ruledel_ptr(rule);
 		free(rule, M_IPFW);
 	}
 }
 
 /*
  * Remove all rules from a chain (except rules in set RESVD_SET
  * unless kill_default = 1).  The caller is responsible for
  * reclaiming storage for the rules left in chain->reap.
  */
 static void
 free_chain(struct ip_fw_chain *chain, int kill_default)
 {
 	struct ip_fw *prev, *rule;
 
 	IPFW_WLOCK_ASSERT(chain);
 
 	flush_rule_ptrs(chain); /* more efficient to do outside the loop */
 	for (prev = NULL, rule = chain->rules; rule ; )
 		if (kill_default || rule->set != RESVD_SET)
 			rule = remove_rule(chain, rule, prev);
 		else {
 			prev = rule;
 			rule = rule->next;
 		}
 }
 
 /**
  * Remove all rules with given number, and also do set manipulation.
  * Assumes chain != NULL && *chain != NULL.
  *
  * The argument is an u_int32_t. The low 16 bit are the rule or set number,
  * the next 8 bits are the new set, the top 8 bits are the command:
  *
  *	0	delete rules with given number
  *	1	delete rules with given set number
  *	2	move rules with given number to new set
  *	3	move rules with given set number to new set
  *	4	swap sets with given numbers
  */
 static int
 del_entry(struct ip_fw_chain *chain, u_int32_t arg)
 {
 	struct ip_fw *prev = NULL, *rule;
 	u_int16_t rulenum;	/* rule or old_set */
 	u_int8_t cmd, new_set;
 
 	rulenum = arg & 0xffff;
 	cmd = (arg >> 24) & 0xff;
 	new_set = (arg >> 16) & 0xff;
 
 	if (cmd > 4)
 		return EINVAL;
 	if (new_set > RESVD_SET)
 		return EINVAL;
 	if (cmd == 0 || cmd == 2) {
 		if (rulenum >= IPFW_DEFAULT_RULE)
 			return EINVAL;
 	} else {
 		if (rulenum > RESVD_SET)	/* old_set */
 			return EINVAL;
 	}
 
 	IPFW_WLOCK(chain);
 	rule = chain->rules;
 	chain->reap = NULL;
 	switch (cmd) {
 	case 0:	/* delete rules with given number */
 		/*
 		 * locate first rule to delete
 		 */
 		for (; rule->rulenum < rulenum; prev = rule, rule = rule->next)
 			;
 		if (rule->rulenum != rulenum) {
 			IPFW_WUNLOCK(chain);
 			return EINVAL;
 		}
 
 		/*
 		 * flush pointers outside the loop, then delete all matching
 		 * rules. prev remains the same throughout the cycle.
 		 */
 		flush_rule_ptrs(chain);
 		while (rule->rulenum == rulenum)
 			rule = remove_rule(chain, rule, prev);
 		break;
 
 	case 1:	/* delete all rules with given set number */
 		flush_rule_ptrs(chain);
 		rule = chain->rules;
 		while (rule->rulenum < IPFW_DEFAULT_RULE)
 			if (rule->set == rulenum)
 				rule = remove_rule(chain, rule, prev);
 			else {
 				prev = rule;
 				rule = rule->next;
 			}
 		break;
 
 	case 2:	/* move rules with given number to new set */
 		rule = chain->rules;
 		for (; rule->rulenum < IPFW_DEFAULT_RULE; rule = rule->next)
 			if (rule->rulenum == rulenum)
 				rule->set = new_set;
 		break;
 
 	case 3: /* move rules with given set number to new set */
 		for (; rule->rulenum < IPFW_DEFAULT_RULE; rule = rule->next)
 			if (rule->set == rulenum)
 				rule->set = new_set;
 		break;
 
 	case 4: /* swap two sets */
 		for (; rule->rulenum < IPFW_DEFAULT_RULE; rule = rule->next)
 			if (rule->set == rulenum)
 				rule->set = new_set;
 			else if (rule->set == new_set)
 				rule->set = rulenum;
 		break;
 	}
 	/*
 	 * Look for rules to reclaim.  We grab the list before
 	 * releasing the lock then reclaim them w/o the lock to
 	 * avoid a LOR with dummynet.
 	 */
 	rule = chain->reap;
 	chain->reap = NULL;
 	IPFW_WUNLOCK(chain);
 	if (rule)
 		reap_rules(rule);
 	return 0;
 }
 
 /*
  * Clear counters for a specific rule.
  * The enclosing "table" is assumed locked.
  */
 static void
 clear_counters(struct ip_fw *rule, int log_only)
 {
 	ipfw_insn_log *l = (ipfw_insn_log *)ACTION_PTR(rule);
 
 	if (log_only == 0) {
 		rule->bcnt = rule->pcnt = 0;
 		rule->timestamp = 0;
 	}
 	if (l->o.opcode == O_LOG)
 		l->log_left = l->max_log;
 }
 
 /**
  * Reset some or all counters on firewall rules.
  * @arg frwl is null to clear all entries, or contains a specific
  * rule number.
  * @arg log_only is 1 if we only want to reset logs, zero otherwise.
  */
 static int
 zero_entry(struct ip_fw_chain *chain, int rulenum, int log_only)
 {
 	struct ip_fw *rule;
 	char *msg;
 
 	IPFW_WLOCK(chain);
 	if (rulenum == 0) {
 		norule_counter = 0;
 		for (rule = chain->rules; rule; rule = rule->next)
 			clear_counters(rule, log_only);
 		msg = log_only ? "ipfw: All logging counts reset.\n" :
 				"ipfw: Accounting cleared.\n";
 	} else {
 		int cleared = 0;
 		/*
 		 * We can have multiple rules with the same number, so we
 		 * need to clear them all.
 		 */
 		for (rule = chain->rules; rule; rule = rule->next)
 			if (rule->rulenum == rulenum) {
 				while (rule && rule->rulenum == rulenum) {
 					clear_counters(rule, log_only);
 					rule = rule->next;
 				}
 				cleared = 1;
 				break;
 			}
 		if (!cleared) {	/* we did not find any matching rules */
 			IPFW_WUNLOCK(chain);
 			return (EINVAL);
 		}
 		msg = log_only ? "ipfw: Entry %d logging count reset.\n" :
 				"ipfw: Entry %d cleared.\n";
 	}
 	IPFW_WUNLOCK(chain);
 
 	if (fw_verbose)
 		log(LOG_SECURITY | LOG_NOTICE, msg, rulenum);
 	return (0);
 }
 
 /*
  * Check validity of the structure before insert.
  * Fortunately rules are simple, so this mostly need to check rule sizes.
  */
 static int
 check_ipfw_struct(struct ip_fw *rule, int size)
 {
 	int l, cmdlen = 0;
 	int have_action=0;
 	ipfw_insn *cmd;
 
 	if (size < sizeof(*rule)) {
 		printf("ipfw: rule too short\n");
 		return (EINVAL);
 	}
 	/* first, check for valid size */
 	l = RULESIZE(rule);
 	if (l != size) {
 		printf("ipfw: size mismatch (have %d want %d)\n", size, l);
 		return (EINVAL);
 	}
 	if (rule->act_ofs >= rule->cmd_len) {
 		printf("ipfw: bogus action offset (%u > %u)\n",
 		    rule->act_ofs, rule->cmd_len - 1);
 		return (EINVAL);
 	}
 	/*
 	 * Now go for the individual checks. Very simple ones, basically only
 	 * instruction sizes.
 	 */
 	for (l = rule->cmd_len, cmd = rule->cmd ;
 			l > 0 ; l -= cmdlen, cmd += cmdlen) {
 		cmdlen = F_LEN(cmd);
 		if (cmdlen > l) {
 			printf("ipfw: opcode %d size truncated\n",
 			    cmd->opcode);
 			return EINVAL;
 		}
 		DEB(printf("ipfw: opcode %d\n", cmd->opcode);)
 		switch (cmd->opcode) {
 		case O_PROBE_STATE:
 		case O_KEEP_STATE:
 		case O_PROTO:
 		case O_IP_SRC_ME:
 		case O_IP_DST_ME:
 		case O_LAYER2:
 		case O_IN:
 		case O_FRAG:
 		case O_DIVERTED:
 		case O_IPOPT:
 		case O_IPTOS:
 		case O_IPPRECEDENCE:
 		case O_IPVER:
 		case O_TCPWIN:
 		case O_TCPFLAGS:
 		case O_TCPOPTS:
 		case O_ESTAB:
 		case O_VERREVPATH:
 		case O_VERSRCREACH:
 		case O_ANTISPOOF:
 		case O_IPSEC:
 #ifdef INET6
 		case O_IP6_SRC_ME:
 		case O_IP6_DST_ME:
 		case O_EXT_HDR:
 		case O_IP6:
 #endif
 		case O_IP4:
 			if (cmdlen != F_INSN_SIZE(ipfw_insn))
 				goto bad_size;
 			break;
 
 		case O_UID:
 		case O_GID:
 		case O_JAIL:
 		case O_IP_SRC:
 		case O_IP_DST:
 		case O_TCPSEQ:
 		case O_TCPACK:
 		case O_PROB:
 		case O_ICMPTYPE:
 			if (cmdlen != F_INSN_SIZE(ipfw_insn_u32))
 				goto bad_size;
 			break;
 
 		case O_LIMIT:
 			if (cmdlen != F_INSN_SIZE(ipfw_insn_limit))
 				goto bad_size;
 			break;
 
 		case O_LOG:
 			if (cmdlen != F_INSN_SIZE(ipfw_insn_log))
 				goto bad_size;
 
 			((ipfw_insn_log *)cmd)->log_left =
 			    ((ipfw_insn_log *)cmd)->max_log;
 
 			break;
 
 		case O_IP_SRC_MASK:
 		case O_IP_DST_MASK:
 			/* only odd command lengths */
 			if ( !(cmdlen & 1) || cmdlen > 31)
 				goto bad_size;
 			break;
 
 		case O_IP_SRC_SET:
 		case O_IP_DST_SET:
 			if (cmd->arg1 == 0 || cmd->arg1 > 256) {
 				printf("ipfw: invalid set size %d\n",
 					cmd->arg1);
 				return EINVAL;
 			}
 			if (cmdlen != F_INSN_SIZE(ipfw_insn_u32) +
 			    (cmd->arg1+31)/32 )
 				goto bad_size;
 			break;
 
 		case O_IP_SRC_LOOKUP:
 		case O_IP_DST_LOOKUP:
 			if (cmd->arg1 >= IPFW_TABLES_MAX) {
 				printf("ipfw: invalid table number %d\n",
 				    cmd->arg1);
 				return (EINVAL);
 			}
 			if (cmdlen != F_INSN_SIZE(ipfw_insn) &&
 			    cmdlen != F_INSN_SIZE(ipfw_insn_u32))
 				goto bad_size;
 			break;
 
 		case O_MACADDR2:
 			if (cmdlen != F_INSN_SIZE(ipfw_insn_mac))
 				goto bad_size;
 			break;
 
 		case O_NOP:
 		case O_IPID:
 		case O_IPTTL:
 		case O_IPLEN:
 		case O_TCPDATALEN:
 			if (cmdlen < 1 || cmdlen > 31)
 				goto bad_size;
 			break;
 
 		case O_MAC_TYPE:
 		case O_IP_SRCPORT:
 		case O_IP_DSTPORT: /* XXX artificial limit, 30 port pairs */
 			if (cmdlen < 2 || cmdlen > 31)
 				goto bad_size;
 			break;
 
 		case O_RECV:
 		case O_XMIT:
 		case O_VIA:
 			if (cmdlen != F_INSN_SIZE(ipfw_insn_if))
 				goto bad_size;
 			break;
 
 		case O_ALTQ:
 			if (cmdlen != F_INSN_SIZE(ipfw_insn_altq))
 				goto bad_size;
 			break;
 
 		case O_PIPE:
 		case O_QUEUE:
 			if (cmdlen != F_INSN_SIZE(ipfw_insn))
 				goto bad_size;
 			goto check_action;
 
 		case O_FORWARD_IP:
 #ifdef	IPFIREWALL_FORWARD
 			if (cmdlen != F_INSN_SIZE(ipfw_insn_sa))
 				goto bad_size;
 			goto check_action;
 #else
 			return EINVAL;
 #endif
 
 		case O_DIVERT:
 		case O_TEE:
 			if (ip_divert_ptr == NULL)
 				return EINVAL;
 			else
 				goto check_size;
 		case O_NETGRAPH:
 		case O_NGTEE:
 			if (!NG_IPFW_LOADED)
 				return EINVAL;
 			else
 				goto check_size;
 		case O_FORWARD_MAC: /* XXX not implemented yet */
 		case O_CHECK_STATE:
 		case O_COUNT:
 		case O_ACCEPT:
 		case O_DENY:
 		case O_REJECT:
 #ifdef INET6
 		case O_UNREACH6:
 #endif
 		case O_SKIPTO:
 check_size:
 			if (cmdlen != F_INSN_SIZE(ipfw_insn))
 				goto bad_size;
 check_action:
 			if (have_action) {
 				printf("ipfw: opcode %d, multiple actions"
 					" not allowed\n",
 					cmd->opcode);
 				return EINVAL;
 			}
 			have_action = 1;
 			if (l != cmdlen) {
 				printf("ipfw: opcode %d, action must be"
 					" last opcode\n",
 					cmd->opcode);
 				return EINVAL;
 			}
 			break;
 #ifdef INET6
 		case O_IP6_SRC:
 		case O_IP6_DST:
 			if (cmdlen != F_INSN_SIZE(struct in6_addr) +
 			    F_INSN_SIZE(ipfw_insn))
 				goto bad_size;
 			break;
 
 		case O_FLOW6ID:
 			if (cmdlen != F_INSN_SIZE(ipfw_insn_u32) +
 			    ((ipfw_insn_u32 *)cmd)->o.arg1)
 				goto bad_size;
 			break;
 
 		case O_IP6_SRC_MASK:
 		case O_IP6_DST_MASK:
 			if ( !(cmdlen & 1) || cmdlen > 127)
 				goto bad_size;
 			break;
 		case O_ICMP6TYPE:
 			if( cmdlen != F_INSN_SIZE( ipfw_insn_icmp6 ) )
 				goto bad_size;
 			break;
 #endif
 
 		default:
 			switch (cmd->opcode) {
 #ifndef INET6
 			case O_IP6_SRC_ME:
 			case O_IP6_DST_ME:
 			case O_EXT_HDR:
 			case O_IP6:
 			case O_UNREACH6:
 			case O_IP6_SRC:
 			case O_IP6_DST:
 			case O_FLOW6ID:
 			case O_IP6_SRC_MASK:
 			case O_IP6_DST_MASK:
 			case O_ICMP6TYPE:
 				printf("ipfw: no IPv6 support in kernel\n");
 				return EPROTONOSUPPORT;
 #endif
 			default:
 				printf("ipfw: opcode %d, unknown opcode\n",
 					cmd->opcode);
 				return EINVAL;
 			}
 		}
 	}
 	if (have_action == 0) {
 		printf("ipfw: missing action\n");
 		return EINVAL;
 	}
 	return 0;
 
 bad_size:
 	printf("ipfw: opcode %d size %d wrong\n",
 		cmd->opcode, cmdlen);
 	return EINVAL;
 }
 
 /*
  * Copy the static and dynamic rules to the supplied buffer
  * and return the amount of space actually used.
  */
 static size_t
 ipfw_getrules(struct ip_fw_chain *chain, void *buf, size_t space)
 {
 	char *bp = buf;
 	char *ep = bp + space;
 	struct ip_fw *rule;
 	int i;
 
 	/* XXX this can take a long time and locking will block packet flow */
 	IPFW_RLOCK(chain);
 	for (rule = chain->rules; rule ; rule = rule->next) {
 		/*
 		 * Verify the entry fits in the buffer in case the
 		 * rules changed between calculating buffer space and
 		 * now.  This would be better done using a generation
 		 * number but should suffice for now.
 		 */
 		i = RULESIZE(rule);
 		if (bp + i <= ep) {
 			bcopy(rule, bp, i);
 			bcopy(&set_disable, &(((struct ip_fw *)bp)->next_rule),
 			    sizeof(set_disable));
 			bp += i;
 		}
 	}
 	IPFW_RUNLOCK(chain);
 	if (ipfw_dyn_v) {
 		ipfw_dyn_rule *p, *last = NULL;
 
 		IPFW_DYN_LOCK();
 		for (i = 0 ; i < curr_dyn_buckets; i++)
 			for (p = ipfw_dyn_v[i] ; p != NULL; p = p->next) {
 				if (bp + sizeof *p <= ep) {
 					ipfw_dyn_rule *dst =
 						(ipfw_dyn_rule *)bp;
 					bcopy(p, dst, sizeof *p);
 					bcopy(&(p->rule->rulenum), &(dst->rule),
 					    sizeof(p->rule->rulenum));
 					/*
 					 * store a non-null value in "next".
 					 * The userland code will interpret a
 					 * NULL here as a marker
 					 * for the last dynamic rule.
 					 */
 					bcopy(&dst, &dst->next, sizeof(dst));
 					last = dst;
 					dst->expire =
 					    TIME_LEQ(dst->expire, time_uptime) ?
 						0 : dst->expire - time_uptime ;
 					bp += sizeof(ipfw_dyn_rule);
 				}
 			}
 		IPFW_DYN_UNLOCK();
 		if (last != NULL) /* mark last dynamic rule */
 			bzero(&last->next, sizeof(last));
 	}
 	return (bp - (char *)buf);
 }
 
 
 /**
  * {set|get}sockopt parser.
  */
 static int
 ipfw_ctl(struct sockopt *sopt)
 {
 #define	RULE_MAXSIZE	(256*sizeof(u_int32_t))
 	int error, rule_num;
 	size_t size;
 	struct ip_fw *buf, *rule;
 	u_int32_t rulenum[2];
 
 	error = suser(sopt->sopt_td);
 	if (error)
 		return (error);
 
 	/*
 	 * Disallow modifications in really-really secure mode, but still allow
 	 * the logging counters to be reset.
 	 */
 	if (sopt->sopt_name == IP_FW_ADD ||
 	    (sopt->sopt_dir == SOPT_SET && sopt->sopt_name != IP_FW_RESETLOG)) {
 		error = securelevel_ge(sopt->sopt_td->td_ucred, 3);
 		if (error)
 			return (error);
 	}
 
 	error = 0;
 
 	switch (sopt->sopt_name) {
 	case IP_FW_GET:
 		/*
 		 * pass up a copy of the current rules. Static rules
 		 * come first (the last of which has number IPFW_DEFAULT_RULE),
 		 * followed by a possibly empty list of dynamic rule.
 		 * The last dynamic rule has NULL in the "next" field.
 		 *
 		 * Note that the calculated size is used to bound the
 		 * amount of data returned to the user.  The rule set may
 		 * change between calculating the size and returning the
 		 * data in which case we'll just return what fits.
 		 */
 		size = static_len;	/* size of static rules */
 		if (ipfw_dyn_v)		/* add size of dyn.rules */
 			size += (dyn_count * sizeof(ipfw_dyn_rule));
 
 		/*
 		 * XXX todo: if the user passes a short length just to know
 		 * how much room is needed, do not bother filling up the
 		 * buffer, just jump to the sooptcopyout.
 		 */
 		buf = malloc(size, M_TEMP, M_WAITOK);
 		error = sooptcopyout(sopt, buf,
 				ipfw_getrules(&layer3_chain, buf, size));
 		free(buf, M_TEMP);
 		break;
 
 	case IP_FW_FLUSH:
 		/*
 		 * Normally we cannot release the lock on each iteration.
 		 * We could do it here only because we start from the head all
 		 * the times so there is no risk of missing some entries.
 		 * On the other hand, the risk is that we end up with
 		 * a very inconsistent ruleset, so better keep the lock
 		 * around the whole cycle.
 		 *
 		 * XXX this code can be improved by resetting the head of
 		 * the list to point to the default rule, and then freeing
 		 * the old list without the need for a lock.
 		 */
 
 		IPFW_WLOCK(&layer3_chain);
 		layer3_chain.reap = NULL;
 		free_chain(&layer3_chain, 0 /* keep default rule */);
 		rule = layer3_chain.reap, layer3_chain.reap = NULL;
 		IPFW_WUNLOCK(&layer3_chain);
 		if (layer3_chain.reap != NULL)
 			reap_rules(rule);
 		break;
 
 	case IP_FW_ADD:
 		rule = malloc(RULE_MAXSIZE, M_TEMP, M_WAITOK);
 		error = sooptcopyin(sopt, rule, RULE_MAXSIZE,
 			sizeof(struct ip_fw) );
 		if (error == 0)
 			error = check_ipfw_struct(rule, sopt->sopt_valsize);
 		if (error == 0) {
 			error = add_rule(&layer3_chain, rule);
 			size = RULESIZE(rule);
 			if (!error && sopt->sopt_dir == SOPT_GET)
 				error = sooptcopyout(sopt, rule, size);
 		}
 		free(rule, M_TEMP);
 		break;
 
 	case IP_FW_DEL:
 		/*
 		 * IP_FW_DEL is used for deleting single rules or sets,
 		 * and (ab)used to atomically manipulate sets. Argument size
 		 * is used to distinguish between the two:
 		 *    sizeof(u_int32_t)
 		 *	delete single rule or set of rules,
 		 *	or reassign rules (or sets) to a different set.
 		 *    2*sizeof(u_int32_t)
 		 *	atomic disable/enable sets.
 		 *	first u_int32_t contains sets to be disabled,
 		 *	second u_int32_t contains sets to be enabled.
 		 */
 		error = sooptcopyin(sopt, rulenum,
 			2*sizeof(u_int32_t), sizeof(u_int32_t));
 		if (error)
 			break;
 		size = sopt->sopt_valsize;
 		if (size == sizeof(u_int32_t))	/* delete or reassign */
 			error = del_entry(&layer3_chain, rulenum[0]);
 		else if (size == 2*sizeof(u_int32_t)) /* set enable/disable */
 			set_disable =
 			    (set_disable | rulenum[0]) & ~rulenum[1] &
 			    ~(1<<RESVD_SET); /* set RESVD_SET always enabled */
 		else
 			error = EINVAL;
 		break;
 
 	case IP_FW_ZERO:
 	case IP_FW_RESETLOG: /* argument is an int, the rule number */
 		rule_num = 0;
 		if (sopt->sopt_val != 0) {
 		    error = sooptcopyin(sopt, &rule_num,
 			    sizeof(int), sizeof(int));
 		    if (error)
 			break;
 		}
 		error = zero_entry(&layer3_chain, rule_num,
 			sopt->sopt_name == IP_FW_RESETLOG);
 		break;
 
 	case IP_FW_TABLE_ADD:
 		{
 			ipfw_table_entry ent;
 
 			error = sooptcopyin(sopt, &ent,
 			    sizeof(ent), sizeof(ent));
 			if (error)
 				break;
 			error = add_table_entry(&layer3_chain, ent.tbl,
 			    ent.addr, ent.masklen, ent.value);
 		}
 		break;
 
 	case IP_FW_TABLE_DEL:
 		{
 			ipfw_table_entry ent;
 
 			error = sooptcopyin(sopt, &ent,
 			    sizeof(ent), sizeof(ent));
 			if (error)
 				break;
 			error = del_table_entry(&layer3_chain, ent.tbl,
 			    ent.addr, ent.masklen);
 		}
 		break;
 
 	case IP_FW_TABLE_FLUSH:
 		{
 			u_int16_t tbl;
 
 			error = sooptcopyin(sopt, &tbl,
 			    sizeof(tbl), sizeof(tbl));
 			if (error)
 				break;
 			IPFW_WLOCK(&layer3_chain);
 			error = flush_table(&layer3_chain, tbl);
 			IPFW_WUNLOCK(&layer3_chain);
 		}
 		break;
 
 	case IP_FW_TABLE_GETSIZE:
 		{
 			u_int32_t tbl, cnt;
 
 			if ((error = sooptcopyin(sopt, &tbl, sizeof(tbl),
 			    sizeof(tbl))))
 				break;
 			IPFW_RLOCK(&layer3_chain);
 			if ((error = count_table(&layer3_chain, tbl, &cnt)))
 				break;
 			IPFW_RUNLOCK(&layer3_chain);
 			error = sooptcopyout(sopt, &cnt, sizeof(cnt));
 		}
 		break;
 
 	case IP_FW_TABLE_LIST:
 		{
 			ipfw_table *tbl;
 
 			if (sopt->sopt_valsize < sizeof(*tbl)) {
 				error = EINVAL;
 				break;
 			}
 			size = sopt->sopt_valsize;
 			tbl = malloc(size, M_TEMP, M_WAITOK);
 			if (tbl == NULL) {
 				error = ENOMEM;
 				break;
 			}
 			error = sooptcopyin(sopt, tbl, size, sizeof(*tbl));
 			if (error) {
 				free(tbl, M_TEMP);
 				break;
 			}
 			tbl->size = (size - sizeof(*tbl)) /
 			    sizeof(ipfw_table_entry);
 			IPFW_WLOCK(&layer3_chain);
 			error = dump_table(&layer3_chain, tbl);
 			if (error) {
 				IPFW_WUNLOCK(&layer3_chain);
 				free(tbl, M_TEMP);
 				break;
 			}
 			IPFW_WUNLOCK(&layer3_chain);
 			error = sooptcopyout(sopt, tbl, size);
 			free(tbl, M_TEMP);
 		}
 		break;
 
 	default:
 		printf("ipfw: ipfw_ctl invalid option %d\n", sopt->sopt_name);
 		error = EINVAL;
 	}
 
 	return (error);
 #undef RULE_MAXSIZE
 }
 
 /**
  * dummynet needs a reference to the default rule, because rules can be
  * deleted while packets hold a reference to them. When this happens,
  * dummynet changes the reference to the default rule (it could well be a
  * NULL pointer, but this way we do not need to check for the special
  * case, plus here he have info on the default behaviour).
  */
 struct ip_fw *ip_fw_default_rule;
 
 /*
  * This procedure is only used to handle keepalives. It is invoked
  * every dyn_keepalive_period
  */
 static void
 ipfw_tick(void * __unused unused)
 {
 	struct mbuf *m0, *m, *mnext, **mtailp;
 	int i;
 	ipfw_dyn_rule *q;
 
 	if (dyn_keepalive == 0 || ipfw_dyn_v == NULL || dyn_count == 0)
 		goto done;
 
 	/*
 	 * We make a chain of packets to go out here -- not deferring
 	 * until after we drop the IPFW dynamic rule lock would result
 	 * in a lock order reversal with the normal packet input -> ipfw
 	 * call stack.
 	 */
 	m0 = NULL;
 	mtailp = &m0;
 	IPFW_DYN_LOCK();
 	for (i = 0 ; i < curr_dyn_buckets ; i++) {
 		for (q = ipfw_dyn_v[i] ; q ; q = q->next ) {
 			if (q->dyn_type == O_LIMIT_PARENT)
 				continue;
 			if (q->id.proto != IPPROTO_TCP)
 				continue;
 			if ( (q->state & BOTH_SYN) != BOTH_SYN)
 				continue;
 			if (TIME_LEQ( time_uptime+dyn_keepalive_interval,
 			    q->expire))
 				continue;	/* too early */
 			if (TIME_LEQ(q->expire, time_uptime))
 				continue;	/* too late, rule expired */
 
 			*mtailp = send_pkt(&(q->id), q->ack_rev - 1,
 				q->ack_fwd, TH_SYN);
 			if (*mtailp != NULL)
 				mtailp = &(*mtailp)->m_nextpkt;
 			*mtailp = send_pkt(&(q->id), q->ack_fwd - 1,
 				q->ack_rev, 0);
 			if (*mtailp != NULL)
 				mtailp = &(*mtailp)->m_nextpkt;
 		}
 	}
 	IPFW_DYN_UNLOCK();
 	for (m = mnext = m0; m != NULL; m = mnext) {
 		mnext = m->m_nextpkt;
 		m->m_nextpkt = NULL;
 		ip_output(m, NULL, NULL, 0, NULL, NULL);
 	}
 done:
 	callout_reset(&ipfw_timeout, dyn_keepalive_period*hz, ipfw_tick, NULL);
 }
 
 int
 ipfw_init(void)
 {
 	struct ip_fw default_rule;
 	int error;
 
 #ifdef INET6
 	/* Setup IPv6 fw sysctl tree. */
 	sysctl_ctx_init(&ip6_fw_sysctl_ctx);
 	ip6_fw_sysctl_tree = SYSCTL_ADD_NODE(&ip6_fw_sysctl_ctx,
 		SYSCTL_STATIC_CHILDREN(_net_inet6_ip6), OID_AUTO, "fw",
 		CTLFLAG_RW | CTLFLAG_SECURE, 0, "Firewall");
 	SYSCTL_ADD_INT(&ip6_fw_sysctl_ctx, SYSCTL_CHILDREN(ip6_fw_sysctl_tree),
 		OID_AUTO, "deny_unknown_exthdrs", CTLFLAG_RW | CTLFLAG_SECURE,
 		&fw_deny_unknown_exthdrs, 0,
 		"Deny packets with unknown IPv6 Extension Headers");
 #endif
 
 	layer3_chain.rules = NULL;
-	layer3_chain.want_write = 0;
-	layer3_chain.busy_count = 0;
-	cv_init(&layer3_chain.cv, "Condition variable for IPFW rw locks");
 	IPFW_LOCK_INIT(&layer3_chain);
 	ipfw_dyn_rule_zone = uma_zcreate("IPFW dynamic rule zone",
 	    sizeof(ipfw_dyn_rule), NULL, NULL, NULL, NULL,
 	    UMA_ALIGN_PTR, 0);
 	IPFW_DYN_LOCK_INIT();
 	callout_init(&ipfw_timeout, NET_CALLOUT_MPSAFE);
 
 	bzero(&default_rule, sizeof default_rule);
 
 	default_rule.act_ofs = 0;
 	default_rule.rulenum = IPFW_DEFAULT_RULE;
 	default_rule.cmd_len = 1;
 	default_rule.set = RESVD_SET;
 
 	default_rule.cmd[0].len = 1;
 	default_rule.cmd[0].opcode =
 #ifdef IPFIREWALL_DEFAULT_TO_ACCEPT
 				1 ? O_ACCEPT :
 #endif
 				O_DENY;
 
 	error = add_rule(&layer3_chain, &default_rule);
 	if (error != 0) {
 		printf("ipfw2: error %u initializing default rule "
 			"(support disabled)\n", error);
 		IPFW_DYN_LOCK_DESTROY();
 		IPFW_LOCK_DESTROY(&layer3_chain);
 		uma_zdestroy(ipfw_dyn_rule_zone);
 		return (error);
 	}
 
 	ip_fw_default_rule = layer3_chain.rules;
 	printf("ipfw2 (+ipv6) initialized, divert %s, "
 		"rule-based forwarding "
 #ifdef IPFIREWALL_FORWARD
 		"enabled, "
 #else
 		"disabled, "
 #endif
 		"default to %s, logging ",
 #ifdef IPDIVERT
 		"enabled",
 #else
 		"loadable",
 #endif
 		default_rule.cmd[0].opcode == O_ACCEPT ? "accept" : "deny");
 
 #ifdef IPFIREWALL_VERBOSE
 	fw_verbose = 1;
 #endif
 #ifdef IPFIREWALL_VERBOSE_LIMIT
 	verbose_limit = IPFIREWALL_VERBOSE_LIMIT;
 #endif
 	if (fw_verbose == 0)
 		printf("disabled\n");
 	else if (verbose_limit == 0)
 		printf("unlimited\n");
 	else
 		printf("limited to %d packets/entry by default\n",
 		    verbose_limit);
 
 	error = init_tables(&layer3_chain);
 	if (error) {
 		IPFW_DYN_LOCK_DESTROY();
 		IPFW_LOCK_DESTROY(&layer3_chain);
 		uma_zdestroy(ipfw_dyn_rule_zone);
 		return (error);
 	}
 	ip_fw_ctl_ptr = ipfw_ctl;
 	ip_fw_chk_ptr = ipfw_chk;
 	callout_reset(&ipfw_timeout, hz, ipfw_tick, NULL);
 
 	return (0);
 }
 
 void
 ipfw_destroy(void)
 {
 	struct ip_fw *reap;
 
 	ip_fw_chk_ptr = NULL;
 	ip_fw_ctl_ptr = NULL;
 	callout_drain(&ipfw_timeout);
 	IPFW_WLOCK(&layer3_chain);
 	flush_tables(&layer3_chain);
 	layer3_chain.reap = NULL;
 	free_chain(&layer3_chain, 1 /* kill default rule */);
 	reap = layer3_chain.reap, layer3_chain.reap = NULL;
 	IPFW_WUNLOCK(&layer3_chain);
 	if (reap != NULL)
 		reap_rules(reap);
 	IPFW_DYN_LOCK_DESTROY();
 	uma_zdestroy(ipfw_dyn_rule_zone);
 	IPFW_LOCK_DESTROY(&layer3_chain);
 
 #ifdef INET6
 	/* Free IPv6 fw sysctl tree. */
 	sysctl_ctx_free(&ip6_fw_sysctl_ctx);
 #endif
 
 	printf("IP firewall unloaded\n");
 }
diff --git a/sys/netinet/ip_input.c b/sys/netinet/ip_input.c
index 6f1130ca1114..2250116c6edd 100644
--- a/sys/netinet/ip_input.c
+++ b/sys/netinet/ip_input.c
@@ -1,1579 +1,1579 @@
 /*-
  * Copyright (c) 1982, 1986, 1988, 1993
  *	The Regents of the University of California.  All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 4. Neither the name of the University nor the names of its contributors
  *    may be used to endorse or promote products derived from this software
  *    without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  *	@(#)ip_input.c	8.2 (Berkeley) 1/4/94
  * $FreeBSD$
  */
 
 #include "opt_bootp.h"
 #include "opt_ipfw.h"
 #include "opt_ipstealth.h"
 #include "opt_ipsec.h"
 #include "opt_mac.h"
 #include "opt_carp.h"
 
 #include <sys/param.h>
 #include <sys/systm.h>
 #include <sys/callout.h>
 #include <sys/mac.h>
 #include <sys/mbuf.h>
 #include <sys/malloc.h>
 #include <sys/domain.h>
 #include <sys/protosw.h>
 #include <sys/socket.h>
 #include <sys/time.h>
 #include <sys/kernel.h>
 #include <sys/syslog.h>
 #include <sys/sysctl.h>
 
 #include <net/pfil.h>
 #include <net/if.h>
 #include <net/if_types.h>
 #include <net/if_var.h>
 #include <net/if_dl.h>
 #include <net/route.h>
 #include <net/netisr.h>
 
 #include <netinet/in.h>
 #include <netinet/in_systm.h>
 #include <netinet/in_var.h>
 #include <netinet/ip.h>
 #include <netinet/in_pcb.h>
 #include <netinet/ip_var.h>
 #include <netinet/ip_icmp.h>
 #include <netinet/ip_options.h>
 #include <machine/in_cksum.h>
 #ifdef DEV_CARP
 #include <netinet/ip_carp.h>
 #endif
 #if defined(IPSEC) || defined(FAST_IPSEC)
 #include <netinet/ip_ipsec.h>
 #endif /* IPSEC */
 
 #include <sys/socketvar.h>
 
 /* XXX: Temporary until ipfw_ether and ipfw_bridge are converted. */
 #include <netinet/ip_fw.h>
 #include <netinet/ip_dummynet.h>
 
 int rsvp_on = 0;
 
 int	ipforwarding = 0;
 SYSCTL_INT(_net_inet_ip, IPCTL_FORWARDING, forwarding, CTLFLAG_RW,
     &ipforwarding, 0, "Enable IP forwarding between interfaces");
 
 static int	ipsendredirects = 1; /* XXX */
 SYSCTL_INT(_net_inet_ip, IPCTL_SENDREDIRECTS, redirect, CTLFLAG_RW,
     &ipsendredirects, 0, "Enable sending IP redirects");
 
 int	ip_defttl = IPDEFTTL;
 SYSCTL_INT(_net_inet_ip, IPCTL_DEFTTL, ttl, CTLFLAG_RW,
     &ip_defttl, 0, "Maximum TTL on IP packets");
 
 static int	ip_keepfaith = 0;
 SYSCTL_INT(_net_inet_ip, IPCTL_KEEPFAITH, keepfaith, CTLFLAG_RW,
 	&ip_keepfaith,	0,
 	"Enable packet capture for FAITH IPv4->IPv6 translater daemon");
 
 static int	ip_sendsourcequench = 0;
 SYSCTL_INT(_net_inet_ip, OID_AUTO, sendsourcequench, CTLFLAG_RW,
 	&ip_sendsourcequench, 0,
 	"Enable the transmission of source quench packets");
 
 int	ip_do_randomid = 0;
 SYSCTL_INT(_net_inet_ip, OID_AUTO, random_id, CTLFLAG_RW,
 	&ip_do_randomid, 0,
 	"Assign random ip_id values");
 
 /*
  * XXX - Setting ip_checkinterface mostly implements the receive side of
  * the Strong ES model described in RFC 1122, but since the routing table
  * and transmit implementation do not implement the Strong ES model,
  * setting this to 1 results in an odd hybrid.
  *
  * XXX - ip_checkinterface currently must be disabled if you use ipnat
  * to translate the destination address to another local interface.
  *
  * XXX - ip_checkinterface must be disabled if you add IP aliases
  * to the loopback interface instead of the interface where the
  * packets for those addresses are received.
  */
 static int	ip_checkinterface = 0;
 SYSCTL_INT(_net_inet_ip, OID_AUTO, check_interface, CTLFLAG_RW,
     &ip_checkinterface, 0, "Verify packet arrives on correct interface");
 
 struct pfil_head inet_pfil_hook;	/* Packet filter hooks */
 
 static struct	ifqueue ipintrq;
 static int	ipqmaxlen = IFQ_MAXLEN;
 
 extern	struct domain inetdomain;
 extern	struct protosw inetsw[];
 u_char	ip_protox[IPPROTO_MAX];
 struct	in_ifaddrhead in_ifaddrhead; 		/* first inet address */
 struct	in_ifaddrhashhead *in_ifaddrhashtbl;	/* inet addr hash table  */
 u_long 	in_ifaddrhmask;				/* mask for hash table */
 
 SYSCTL_INT(_net_inet_ip, IPCTL_INTRQMAXLEN, intr_queue_maxlen, CTLFLAG_RW,
     &ipintrq.ifq_maxlen, 0, "Maximum size of the IP input queue");
 SYSCTL_INT(_net_inet_ip, IPCTL_INTRQDROPS, intr_queue_drops, CTLFLAG_RD,
     &ipintrq.ifq_drops, 0, "Number of packets dropped from the IP input queue");
 
 struct ipstat ipstat;
 SYSCTL_STRUCT(_net_inet_ip, IPCTL_STATS, stats, CTLFLAG_RW,
     &ipstat, ipstat, "IP statistics (struct ipstat, netinet/ip_var.h)");
 
 /*
  * IP datagram reassembly.
  */
 #define IPREASS_NHASH_LOG2      6
 #define IPREASS_NHASH           (1 << IPREASS_NHASH_LOG2)
 #define IPREASS_HMASK           (IPREASS_NHASH - 1)
 #define IPREASS_HASH(x,y) \
 	(((((x) & 0xF) | ((((x) >> 8) & 0xF) << 4)) ^ (y)) & IPREASS_HMASK)
 
 static uma_zone_t ipq_zone;
 static TAILQ_HEAD(ipqhead, ipq) ipq[IPREASS_NHASH];
 static struct mtx ipqlock;
 
 #define	IPQ_LOCK()	mtx_lock(&ipqlock)
 #define	IPQ_UNLOCK()	mtx_unlock(&ipqlock)
 #define	IPQ_LOCK_INIT()	mtx_init(&ipqlock, "ipqlock", NULL, MTX_DEF)
 #define	IPQ_LOCK_ASSERT()	mtx_assert(&ipqlock, MA_OWNED)
 
 static void	maxnipq_update(void);
 
 static int	maxnipq;	/* Administrative limit on # reass queues. */
 static int	nipq = 0;	/* Total # of reass queues */
 SYSCTL_INT(_net_inet_ip, OID_AUTO, fragpackets, CTLFLAG_RD, &nipq, 0,
 	"Current number of IPv4 fragment reassembly queue entries");
 
 static int	maxfragsperpacket;
 SYSCTL_INT(_net_inet_ip, OID_AUTO, maxfragsperpacket, CTLFLAG_RW,
 	&maxfragsperpacket, 0,
 	"Maximum number of IPv4 fragments allowed per packet");
 
 struct callout	ipport_tick_callout;
 
 #ifdef IPCTL_DEFMTU
 SYSCTL_INT(_net_inet_ip, IPCTL_DEFMTU, mtu, CTLFLAG_RW,
     &ip_mtu, 0, "Default MTU");
 #endif
 
 #ifdef IPSTEALTH
 int	ipstealth = 0;
 SYSCTL_INT(_net_inet_ip, OID_AUTO, stealth, CTLFLAG_RW,
     &ipstealth, 0, "");
 #endif
 
 /*
  * ipfw_ether and ipfw_bridge hooks.
  * XXX: Temporary until those are converted to pfil_hooks as well.
  */
 ip_fw_chk_t *ip_fw_chk_ptr = NULL;
 ip_dn_io_t *ip_dn_io_ptr = NULL;
 int fw_enable = 1;
 int fw_one_pass = 1;
 
 static void	ip_freef(struct ipqhead *, struct ipq *);
 
 /*
  * IP initialization: fill in IP protocol switch table.
  * All protocols not implemented in kernel go to raw IP protocol handler.
  */
 void
 ip_init()
 {
 	register struct protosw *pr;
 	register int i;
 
 	TAILQ_INIT(&in_ifaddrhead);
 	in_ifaddrhashtbl = hashinit(INADDR_NHASH, M_IFADDR, &in_ifaddrhmask);
 	pr = pffindproto(PF_INET, IPPROTO_RAW, SOCK_RAW);
 	if (pr == NULL)
 		panic("ip_init: PF_INET not found");
 
 	/* Initialize the entire ip_protox[] array to IPPROTO_RAW. */
 	for (i = 0; i < IPPROTO_MAX; i++)
 		ip_protox[i] = pr - inetsw;
 	/*
 	 * Cycle through IP protocols and put them into the appropriate place
 	 * in ip_protox[].
 	 */
 	for (pr = inetdomain.dom_protosw;
 	    pr < inetdomain.dom_protoswNPROTOSW; pr++)
 		if (pr->pr_domain->dom_family == PF_INET &&
 		    pr->pr_protocol && pr->pr_protocol != IPPROTO_RAW) {
 			/* Be careful to only index valid IP protocols. */
 			if (pr->pr_protocol < IPPROTO_MAX)
 				ip_protox[pr->pr_protocol] = pr - inetsw;
 		}
 
 	/* Initialize packet filter hooks. */
 	inet_pfil_hook.ph_type = PFIL_TYPE_AF;
 	inet_pfil_hook.ph_af = AF_INET;
 	if ((i = pfil_head_register(&inet_pfil_hook)) != 0)
 		printf("%s: WARNING: unable to register pfil hook, "
 			"error %d\n", __func__, i);
 
 	/* Initialize IP reassembly queue. */
 	IPQ_LOCK_INIT();
 	for (i = 0; i < IPREASS_NHASH; i++)
 	    TAILQ_INIT(&ipq[i]);
 	maxnipq = nmbclusters / 32;
 	maxfragsperpacket = 16;
 	ipq_zone = uma_zcreate("ipq", sizeof(struct ipq), NULL, NULL, NULL,
 	    NULL, UMA_ALIGN_PTR, 0);
 	maxnipq_update();
 
 	/* Start ipport_tick. */
 	callout_init(&ipport_tick_callout, CALLOUT_MPSAFE);
 	ipport_tick(NULL);
 	EVENTHANDLER_REGISTER(shutdown_pre_sync, ip_fini, NULL,
 		SHUTDOWN_PRI_DEFAULT);
 
 	/* Initialize various other remaining things. */
 	ip_id = time_second & 0xffff;
 	ipintrq.ifq_maxlen = ipqmaxlen;
 	mtx_init(&ipintrq.ifq_mtx, "ip_inq", NULL, MTX_DEF);
 	netisr_register(NETISR_IP, ip_input, &ipintrq, NETISR_MPSAFE);
 }
 
 void ip_fini(xtp)
 	void *xtp;
 {
 	callout_stop(&ipport_tick_callout);
 }
 
 /*
  * Ip input routine.  Checksum and byte swap header.  If fragmented
  * try to reassemble.  Process options.  Pass to next level.
  */
 void
 ip_input(struct mbuf *m)
 {
 	struct ip *ip = NULL;
 	struct in_ifaddr *ia = NULL;
 	struct ifaddr *ifa;
 	int    checkif, hlen = 0;
 	u_short sum;
 	int dchg = 0;				/* dest changed after fw */
 	struct in_addr odst;			/* original dst address */
 
   	M_ASSERTPKTHDR(m);
 
 	if (m->m_flags & M_FASTFWD_OURS) {
 		/*
 		 * Firewall or NAT changed destination to local.
 		 * We expect ip_len and ip_off to be in host byte order.
 		 */
 		m->m_flags &= ~M_FASTFWD_OURS;
 		/* Set up some basics that will be used later. */
 		ip = mtod(m, struct ip *);
 		hlen = ip->ip_hl << 2;
   		goto ours;
   	}
 
 	ipstat.ips_total++;
 
 	if (m->m_pkthdr.len < sizeof(struct ip))
 		goto tooshort;
 
 	if (m->m_len < sizeof (struct ip) &&
 	    (m = m_pullup(m, sizeof (struct ip))) == NULL) {
 		ipstat.ips_toosmall++;
 		return;
 	}
 	ip = mtod(m, struct ip *);
 
 	if (ip->ip_v != IPVERSION) {
 		ipstat.ips_badvers++;
 		goto bad;
 	}
 
 	hlen = ip->ip_hl << 2;
 	if (hlen < sizeof(struct ip)) {	/* minimum header length */
 		ipstat.ips_badhlen++;
 		goto bad;
 	}
 	if (hlen > m->m_len) {
 		if ((m = m_pullup(m, hlen)) == NULL) {
 			ipstat.ips_badhlen++;
 			return;
 		}
 		ip = mtod(m, struct ip *);
 	}
 
 	/* 127/8 must not appear on wire - RFC1122 */
 	if ((ntohl(ip->ip_dst.s_addr) >> IN_CLASSA_NSHIFT) == IN_LOOPBACKNET ||
 	    (ntohl(ip->ip_src.s_addr) >> IN_CLASSA_NSHIFT) == IN_LOOPBACKNET) {
 		if ((m->m_pkthdr.rcvif->if_flags & IFF_LOOPBACK) == 0) {
 			ipstat.ips_badaddr++;
 			goto bad;
 		}
 	}
 
 	if (m->m_pkthdr.csum_flags & CSUM_IP_CHECKED) {
 		sum = !(m->m_pkthdr.csum_flags & CSUM_IP_VALID);
 	} else {
 		if (hlen == sizeof(struct ip)) {
 			sum = in_cksum_hdr(ip);
 		} else {
 			sum = in_cksum(m, hlen);
 		}
 	}
 	if (sum) {
 		ipstat.ips_badsum++;
 		goto bad;
 	}
 
 #ifdef ALTQ
 	if (altq_input != NULL && (*altq_input)(m, AF_INET) == 0)
 		/* packet is dropped by traffic conditioner */
 		return;
 #endif
 
 	/*
 	 * Convert fields to host representation.
 	 */
 	ip->ip_len = ntohs(ip->ip_len);
 	if (ip->ip_len < hlen) {
 		ipstat.ips_badlen++;
 		goto bad;
 	}
 	ip->ip_off = ntohs(ip->ip_off);
 
 	/*
 	 * Check that the amount of data in the buffers
 	 * is as at least much as the IP header would have us expect.
 	 * Trim mbufs if longer than we expect.
 	 * Drop packet if shorter than we expect.
 	 */
 	if (m->m_pkthdr.len < ip->ip_len) {
 tooshort:
 		ipstat.ips_tooshort++;
 		goto bad;
 	}
 	if (m->m_pkthdr.len > ip->ip_len) {
 		if (m->m_len == m->m_pkthdr.len) {
 			m->m_len = ip->ip_len;
 			m->m_pkthdr.len = ip->ip_len;
 		} else
 			m_adj(m, ip->ip_len - m->m_pkthdr.len);
 	}
 #if defined(IPSEC) || defined(FAST_IPSEC)
 	/*
 	 * Bypass packet filtering for packets from a tunnel (gif).
 	 */
 	if (ip_ipsec_filtergif(m))
 		goto passin;
 #endif /* IPSEC */
 
 	/*
 	 * Run through list of hooks for input packets.
 	 *
 	 * NB: Beware of the destination address changing (e.g.
 	 *     by NAT rewriting).  When this happens, tell
 	 *     ip_forward to do the right thing.
 	 */
 
 	/* Jump over all PFIL processing if hooks are not active. */
-	if (inet_pfil_hook.ph_busy_count == -1)
+	if (!PFIL_HOOKED(&inet_pfil_hook))
 		goto passin;
 
 	odst = ip->ip_dst;
 	if (pfil_run_hooks(&inet_pfil_hook, &m, m->m_pkthdr.rcvif,
 	    PFIL_IN, NULL) != 0)
 		return;
 	if (m == NULL)			/* consumed by filter */
 		return;
 
 	ip = mtod(m, struct ip *);
 	dchg = (odst.s_addr != ip->ip_dst.s_addr);
 
 #ifdef IPFIREWALL_FORWARD
 	if (m->m_flags & M_FASTFWD_OURS) {
 		m->m_flags &= ~M_FASTFWD_OURS;
 		goto ours;
 	}
 #ifndef IPFIREWALL_FORWARD_EXTENDED
 	dchg = (m_tag_find(m, PACKET_TAG_IPFORWARD, NULL) != NULL);
 #else
 	if ((dchg = (m_tag_find(m, PACKET_TAG_IPFORWARD, NULL) != NULL)) != 0) {
 		/*
 		 * Directly ship on the packet.  This allows to forward packets
 		 * that were destined for us to some other directly connected
 		 * host.
 		 */
 		ip_forward(m, dchg);
 		return;
 	}
 #endif /* IPFIREWALL_FORWARD_EXTENDED */
 #endif /* IPFIREWALL_FORWARD */
 
 passin:
 	/*
 	 * Process options and, if not destined for us,
 	 * ship it on.  ip_dooptions returns 1 when an
 	 * error was detected (causing an icmp message
 	 * to be sent and the original packet to be freed).
 	 */
 	if (hlen > sizeof (struct ip) && ip_dooptions(m, 0))
 		return;
 
         /* greedy RSVP, snatches any PATH packet of the RSVP protocol and no
          * matter if it is destined to another node, or whether it is 
          * a multicast one, RSVP wants it! and prevents it from being forwarded
          * anywhere else. Also checks if the rsvp daemon is running before
 	 * grabbing the packet.
          */
 	if (rsvp_on && ip->ip_p==IPPROTO_RSVP) 
 		goto ours;
 
 	/*
 	 * Check our list of addresses, to see if the packet is for us.
 	 * If we don't have any addresses, assume any unicast packet
 	 * we receive might be for us (and let the upper layers deal
 	 * with it).
 	 */
 	if (TAILQ_EMPTY(&in_ifaddrhead) &&
 	    (m->m_flags & (M_MCAST|M_BCAST)) == 0)
 		goto ours;
 
 	/*
 	 * Enable a consistency check between the destination address
 	 * and the arrival interface for a unicast packet (the RFC 1122
 	 * strong ES model) if IP forwarding is disabled and the packet
 	 * is not locally generated and the packet is not subject to
 	 * 'ipfw fwd'.
 	 *
 	 * XXX - Checking also should be disabled if the destination
 	 * address is ipnat'ed to a different interface.
 	 *
 	 * XXX - Checking is incompatible with IP aliases added
 	 * to the loopback interface instead of the interface where
 	 * the packets are received.
 	 *
 	 * XXX - This is the case for carp vhost IPs as well so we
 	 * insert a workaround. If the packet got here, we already
 	 * checked with carp_iamatch() and carp_forus().
 	 */
 	checkif = ip_checkinterface && (ipforwarding == 0) && 
 	    m->m_pkthdr.rcvif != NULL &&
 	    ((m->m_pkthdr.rcvif->if_flags & IFF_LOOPBACK) == 0) &&
 #ifdef DEV_CARP
 	    !m->m_pkthdr.rcvif->if_carp &&
 #endif
 	    (dchg == 0);
 
 	/*
 	 * Check for exact addresses in the hash bucket.
 	 */
 	LIST_FOREACH(ia, INADDR_HASH(ip->ip_dst.s_addr), ia_hash) {
 		/*
 		 * If the address matches, verify that the packet
 		 * arrived via the correct interface if checking is
 		 * enabled.
 		 */
 		if (IA_SIN(ia)->sin_addr.s_addr == ip->ip_dst.s_addr && 
 		    (!checkif || ia->ia_ifp == m->m_pkthdr.rcvif))
 			goto ours;
 	}
 	/*
 	 * Check for broadcast addresses.
 	 *
 	 * Only accept broadcast packets that arrive via the matching
 	 * interface.  Reception of forwarded directed broadcasts would
 	 * be handled via ip_forward() and ether_output() with the loopback
 	 * into the stack for SIMPLEX interfaces handled by ether_output().
 	 */
 	if (m->m_pkthdr.rcvif != NULL &&
 	    m->m_pkthdr.rcvif->if_flags & IFF_BROADCAST) {
 	        TAILQ_FOREACH(ifa, &m->m_pkthdr.rcvif->if_addrhead, ifa_link) {
 			if (ifa->ifa_addr->sa_family != AF_INET)
 				continue;
 			ia = ifatoia(ifa);
 			if (satosin(&ia->ia_broadaddr)->sin_addr.s_addr ==
 			    ip->ip_dst.s_addr)
 				goto ours;
 			if (ia->ia_netbroadcast.s_addr == ip->ip_dst.s_addr)
 				goto ours;
 #ifdef BOOTP_COMPAT
 			if (IA_SIN(ia)->sin_addr.s_addr == INADDR_ANY)
 				goto ours;
 #endif
 		}
 	}
 	if (IN_MULTICAST(ntohl(ip->ip_dst.s_addr))) {
 		struct in_multi *inm;
 		if (ip_mrouter) {
 			/*
 			 * If we are acting as a multicast router, all
 			 * incoming multicast packets are passed to the
 			 * kernel-level multicast forwarding function.
 			 * The packet is returned (relatively) intact; if
 			 * ip_mforward() returns a non-zero value, the packet
 			 * must be discarded, else it may be accepted below.
 			 */
 			if (ip_mforward &&
 			    ip_mforward(ip, m->m_pkthdr.rcvif, m, 0) != 0) {
 				ipstat.ips_cantforward++;
 				m_freem(m);
 				return;
 			}
 
 			/*
 			 * The process-level routing daemon needs to receive
 			 * all multicast IGMP packets, whether or not this
 			 * host belongs to their destination groups.
 			 */
 			if (ip->ip_p == IPPROTO_IGMP)
 				goto ours;
 			ipstat.ips_forward++;
 		}
 		/*
 		 * See if we belong to the destination multicast group on the
 		 * arrival interface.
 		 */
 		IN_MULTI_LOCK();
 		IN_LOOKUP_MULTI(ip->ip_dst, m->m_pkthdr.rcvif, inm);
 		IN_MULTI_UNLOCK();
 		if (inm == NULL) {
 			ipstat.ips_notmember++;
 			m_freem(m);
 			return;
 		}
 		goto ours;
 	}
 	if (ip->ip_dst.s_addr == (u_long)INADDR_BROADCAST)
 		goto ours;
 	if (ip->ip_dst.s_addr == INADDR_ANY)
 		goto ours;
 
 	/*
 	 * FAITH(Firewall Aided Internet Translator)
 	 */
 	if (m->m_pkthdr.rcvif && m->m_pkthdr.rcvif->if_type == IFT_FAITH) {
 		if (ip_keepfaith) {
 			if (ip->ip_p == IPPROTO_TCP || ip->ip_p == IPPROTO_ICMP) 
 				goto ours;
 		}
 		m_freem(m);
 		return;
 	}
 
 	/*
 	 * Not for us; forward if possible and desirable.
 	 */
 	if (ipforwarding == 0) {
 		ipstat.ips_cantforward++;
 		m_freem(m);
 	} else {
 #if defined(IPSEC) || defined(FAST_IPSEC)
 		if (ip_ipsec_fwd(m))
 			goto bad;
 #endif /* IPSEC */
 		ip_forward(m, dchg);
 	}
 	return;
 
 ours:
 #ifdef IPSTEALTH
 	/*
 	 * IPSTEALTH: Process non-routing options only
 	 * if the packet is destined for us.
 	 */
 	if (ipstealth && hlen > sizeof (struct ip) &&
 	    ip_dooptions(m, 1))
 		return;
 #endif /* IPSTEALTH */
 
 	/* Count the packet in the ip address stats */
 	if (ia != NULL) {
 		ia->ia_ifa.if_ipackets++;
 		ia->ia_ifa.if_ibytes += m->m_pkthdr.len;
 	}
 
 	/*
 	 * Attempt reassembly; if it succeeds, proceed.
 	 * ip_reass() will return a different mbuf.
 	 */
 	if (ip->ip_off & (IP_MF | IP_OFFMASK)) {
 		m = ip_reass(m);
 		if (m == NULL)
 			return;
 		ip = mtod(m, struct ip *);
 		/* Get the header length of the reassembled packet */
 		hlen = ip->ip_hl << 2;
 	}
 
 	/*
 	 * Further protocols expect the packet length to be w/o the
 	 * IP header.
 	 */
 	ip->ip_len -= hlen;
 
 #if defined(IPSEC) || defined(FAST_IPSEC)
 	/*
 	 * enforce IPsec policy checking if we are seeing last header.
 	 * note that we do not visit this with protocols with pcb layer
 	 * code - like udp/tcp/raw ip.
 	 */
 	if (ip_ipsec_input(m))
 		goto bad;
 #endif /* IPSEC */
 
 	/*
 	 * Switch out to protocol's input routine.
 	 */
 	ipstat.ips_delivered++;
 
 	(*inetsw[ip_protox[ip->ip_p]].pr_input)(m, hlen);
 	return;
 bad:
 	m_freem(m);
 }
 
 /*
  * After maxnipq has been updated, propagate the change to UMA.  The UMA zone
  * max has slightly different semantics than the sysctl, for historical
  * reasons.
  */
 static void
 maxnipq_update(void)
 {
 
 	/*
 	 * -1 for unlimited allocation.
 	 */
 	if (maxnipq < 0)
 		uma_zone_set_max(ipq_zone, 0);
 	/*
 	 * Positive number for specific bound.
 	 */
 	if (maxnipq > 0)
 		uma_zone_set_max(ipq_zone, maxnipq);
 	/*
 	 * Zero specifies no further fragment queue allocation -- set the
 	 * bound very low, but rely on implementation elsewhere to actually
 	 * prevent allocation and reclaim current queues.
 	 */
 	if (maxnipq == 0)
 		uma_zone_set_max(ipq_zone, 1);
 }
 
 static int
 sysctl_maxnipq(SYSCTL_HANDLER_ARGS)
 {
 	int error, i;
 
 	i = maxnipq;
 	error = sysctl_handle_int(oidp, &i, 0, req);
 	if (error || !req->newptr)
 		return (error);
 
 	/*
 	 * XXXRW: Might be a good idea to sanity check the argument and place
 	 * an extreme upper bound.
 	 */
 	if (i < -1)
 		return (EINVAL);
 	maxnipq = i;
 	maxnipq_update();
 	return (0);
 }
 
 SYSCTL_PROC(_net_inet_ip, OID_AUTO, maxfragpackets, CTLTYPE_INT|CTLFLAG_RW,
     NULL, 0, sysctl_maxnipq, "I",
     "Maximum number of IPv4 fragment reassembly queue entries");
 
 /*
  * Take incoming datagram fragment and try to reassemble it into
  * whole datagram.  If the argument is the first fragment or one
  * in between the function will return NULL and store the mbuf
  * in the fragment chain.  If the argument is the last fragment
  * the packet will be reassembled and the pointer to the new
  * mbuf returned for further processing.  Only m_tags attached
  * to the first packet/fragment are preserved.
  * The IP header is *NOT* adjusted out of iplen.
  */
 
 struct mbuf *
 ip_reass(struct mbuf *m)
 {
 	struct ip *ip;
 	struct mbuf *p, *q, *nq, *t;
 	struct ipq *fp = NULL;
 	struct ipqhead *head;
 	int i, hlen, next;
 	u_int8_t ecn, ecn0;
 	u_short hash;
 
 	/* If maxnipq or maxfragsperpacket are 0, never accept fragments. */
 	if (maxnipq == 0 || maxfragsperpacket == 0) {
 		ipstat.ips_fragments++;
 		ipstat.ips_fragdropped++;
 		m_freem(m);
 		return (NULL);
 	}
 
 	ip = mtod(m, struct ip *);
 	hlen = ip->ip_hl << 2;
 
 	hash = IPREASS_HASH(ip->ip_src.s_addr, ip->ip_id);
 	head = &ipq[hash];
 	IPQ_LOCK();
 
 	/*
 	 * Look for queue of fragments
 	 * of this datagram.
 	 */
 	TAILQ_FOREACH(fp, head, ipq_list)
 		if (ip->ip_id == fp->ipq_id &&
 		    ip->ip_src.s_addr == fp->ipq_src.s_addr &&
 		    ip->ip_dst.s_addr == fp->ipq_dst.s_addr &&
 #ifdef MAC
 		    mac_fragment_match(m, fp) &&
 #endif
 		    ip->ip_p == fp->ipq_p)
 			goto found;
 
 	fp = NULL;
 
 	/*
 	 * Attempt to trim the number of allocated fragment queues if it
 	 * exceeds the administrative limit.
 	 */
 	if ((nipq > maxnipq) && (maxnipq > 0)) {
 		/*
 		 * drop something from the tail of the current queue
 		 * before proceeding further
 		 */
 		struct ipq *q = TAILQ_LAST(head, ipqhead);
 		if (q == NULL) {   /* gak */
 			for (i = 0; i < IPREASS_NHASH; i++) {
 				struct ipq *r = TAILQ_LAST(&ipq[i], ipqhead);
 				if (r) {
 					ipstat.ips_fragtimeout += r->ipq_nfrags;
 					ip_freef(&ipq[i], r);
 					break;
 				}
 			}
 		} else {
 			ipstat.ips_fragtimeout += q->ipq_nfrags;
 			ip_freef(head, q);
 		}
 	}
 
 found:
 	/*
 	 * Adjust ip_len to not reflect header,
 	 * convert offset of this to bytes.
 	 */
 	ip->ip_len -= hlen;
 	if (ip->ip_off & IP_MF) {
 		/*
 		 * Make sure that fragments have a data length
 		 * that's a non-zero multiple of 8 bytes.
 		 */
 		if (ip->ip_len == 0 || (ip->ip_len & 0x7) != 0) {
 			ipstat.ips_toosmall++; /* XXX */
 			goto dropfrag;
 		}
 		m->m_flags |= M_FRAG;
 	} else
 		m->m_flags &= ~M_FRAG;
 	ip->ip_off <<= 3;
 
 
 	/*
 	 * Attempt reassembly; if it succeeds, proceed.
 	 * ip_reass() will return a different mbuf.
 	 */
 	ipstat.ips_fragments++;
 	m->m_pkthdr.header = ip;
 
 	/* Previous ip_reass() started here. */
 	/*
 	 * Presence of header sizes in mbufs
 	 * would confuse code below.
 	 */
 	m->m_data += hlen;
 	m->m_len -= hlen;
 
 	/*
 	 * If first fragment to arrive, create a reassembly queue.
 	 */
 	if (fp == NULL) {
 		fp = uma_zalloc(ipq_zone, M_NOWAIT);
 		if (fp == NULL)
 			goto dropfrag;
 #ifdef MAC
 		if (mac_init_ipq(fp, M_NOWAIT) != 0) {
 			uma_zfree(ipq_zone, fp);
 			goto dropfrag;
 		}
 		mac_create_ipq(m, fp);
 #endif
 		TAILQ_INSERT_HEAD(head, fp, ipq_list);
 		nipq++;
 		fp->ipq_nfrags = 1;
 		fp->ipq_ttl = IPFRAGTTL;
 		fp->ipq_p = ip->ip_p;
 		fp->ipq_id = ip->ip_id;
 		fp->ipq_src = ip->ip_src;
 		fp->ipq_dst = ip->ip_dst;
 		fp->ipq_frags = m;
 		m->m_nextpkt = NULL;
 		goto done;
 	} else {
 		fp->ipq_nfrags++;
 #ifdef MAC
 		mac_update_ipq(m, fp);
 #endif
 	}
 
 #define GETIP(m)	((struct ip*)((m)->m_pkthdr.header))
 
 	/*
 	 * Handle ECN by comparing this segment with the first one;
 	 * if CE is set, do not lose CE.
 	 * drop if CE and not-ECT are mixed for the same packet.
 	 */
 	ecn = ip->ip_tos & IPTOS_ECN_MASK;
 	ecn0 = GETIP(fp->ipq_frags)->ip_tos & IPTOS_ECN_MASK;
 	if (ecn == IPTOS_ECN_CE) {
 		if (ecn0 == IPTOS_ECN_NOTECT)
 			goto dropfrag;
 		if (ecn0 != IPTOS_ECN_CE)
 			GETIP(fp->ipq_frags)->ip_tos |= IPTOS_ECN_CE;
 	}
 	if (ecn == IPTOS_ECN_NOTECT && ecn0 != IPTOS_ECN_NOTECT)
 		goto dropfrag;
 
 	/*
 	 * Find a segment which begins after this one does.
 	 */
 	for (p = NULL, q = fp->ipq_frags; q; p = q, q = q->m_nextpkt)
 		if (GETIP(q)->ip_off > ip->ip_off)
 			break;
 
 	/*
 	 * If there is a preceding segment, it may provide some of
 	 * our data already.  If so, drop the data from the incoming
 	 * segment.  If it provides all of our data, drop us, otherwise
 	 * stick new segment in the proper place.
 	 *
 	 * If some of the data is dropped from the the preceding
 	 * segment, then it's checksum is invalidated.
 	 */
 	if (p) {
 		i = GETIP(p)->ip_off + GETIP(p)->ip_len - ip->ip_off;
 		if (i > 0) {
 			if (i >= ip->ip_len)
 				goto dropfrag;
 			m_adj(m, i);
 			m->m_pkthdr.csum_flags = 0;
 			ip->ip_off += i;
 			ip->ip_len -= i;
 		}
 		m->m_nextpkt = p->m_nextpkt;
 		p->m_nextpkt = m;
 	} else {
 		m->m_nextpkt = fp->ipq_frags;
 		fp->ipq_frags = m;
 	}
 
 	/*
 	 * While we overlap succeeding segments trim them or,
 	 * if they are completely covered, dequeue them.
 	 */
 	for (; q != NULL && ip->ip_off + ip->ip_len > GETIP(q)->ip_off;
 	     q = nq) {
 		i = (ip->ip_off + ip->ip_len) - GETIP(q)->ip_off;
 		if (i < GETIP(q)->ip_len) {
 			GETIP(q)->ip_len -= i;
 			GETIP(q)->ip_off += i;
 			m_adj(q, i);
 			q->m_pkthdr.csum_flags = 0;
 			break;
 		}
 		nq = q->m_nextpkt;
 		m->m_nextpkt = nq;
 		ipstat.ips_fragdropped++;
 		fp->ipq_nfrags--;
 		m_freem(q);
 	}
 
 	/*
 	 * Check for complete reassembly and perform frag per packet
 	 * limiting.
 	 *
 	 * Frag limiting is performed here so that the nth frag has
 	 * a chance to complete the packet before we drop the packet.
 	 * As a result, n+1 frags are actually allowed per packet, but
 	 * only n will ever be stored. (n = maxfragsperpacket.)
 	 *
 	 */
 	next = 0;
 	for (p = NULL, q = fp->ipq_frags; q; p = q, q = q->m_nextpkt) {
 		if (GETIP(q)->ip_off != next) {
 			if (fp->ipq_nfrags > maxfragsperpacket) {
 				ipstat.ips_fragdropped += fp->ipq_nfrags;
 				ip_freef(head, fp);
 			}
 			goto done;
 		}
 		next += GETIP(q)->ip_len;
 	}
 	/* Make sure the last packet didn't have the IP_MF flag */
 	if (p->m_flags & M_FRAG) {
 		if (fp->ipq_nfrags > maxfragsperpacket) {
 			ipstat.ips_fragdropped += fp->ipq_nfrags;
 			ip_freef(head, fp);
 		}
 		goto done;
 	}
 
 	/*
 	 * Reassembly is complete.  Make sure the packet is a sane size.
 	 */
 	q = fp->ipq_frags;
 	ip = GETIP(q);
 	if (next + (ip->ip_hl << 2) > IP_MAXPACKET) {
 		ipstat.ips_toolong++;
 		ipstat.ips_fragdropped += fp->ipq_nfrags;
 		ip_freef(head, fp);
 		goto done;
 	}
 
 	/*
 	 * Concatenate fragments.
 	 */
 	m = q;
 	t = m->m_next;
 	m->m_next = NULL;
 	m_cat(m, t);
 	nq = q->m_nextpkt;
 	q->m_nextpkt = NULL;
 	for (q = nq; q != NULL; q = nq) {
 		nq = q->m_nextpkt;
 		q->m_nextpkt = NULL;
 		m->m_pkthdr.csum_flags &= q->m_pkthdr.csum_flags;
 		m->m_pkthdr.csum_data += q->m_pkthdr.csum_data;
 		m_cat(m, q);
 	}
 #ifdef MAC
 	mac_create_datagram_from_ipq(fp, m);
 	mac_destroy_ipq(fp);
 #endif
 
 	/*
 	 * Create header for new ip packet by modifying header of first
 	 * packet;  dequeue and discard fragment reassembly header.
 	 * Make header visible.
 	 */
 	ip->ip_len = (ip->ip_hl << 2) + next;
 	ip->ip_src = fp->ipq_src;
 	ip->ip_dst = fp->ipq_dst;
 	TAILQ_REMOVE(head, fp, ipq_list);
 	nipq--;
 	uma_zfree(ipq_zone, fp);
 	m->m_len += (ip->ip_hl << 2);
 	m->m_data -= (ip->ip_hl << 2);
 	/* some debugging cruft by sklower, below, will go away soon */
 	if (m->m_flags & M_PKTHDR)	/* XXX this should be done elsewhere */
 		m_fixhdr(m);
 	ipstat.ips_reassembled++;
 	IPQ_UNLOCK();
 	return (m);
 
 dropfrag:
 	ipstat.ips_fragdropped++;
 	if (fp != NULL)
 		fp->ipq_nfrags--;
 	m_freem(m);
 done:
 	IPQ_UNLOCK();
 	return (NULL);
 
 #undef GETIP
 }
 
 /*
  * Free a fragment reassembly header and all
  * associated datagrams.
  */
 static void
 ip_freef(fhp, fp)
 	struct ipqhead *fhp;
 	struct ipq *fp;
 {
 	register struct mbuf *q;
 
 	IPQ_LOCK_ASSERT();
 
 	while (fp->ipq_frags) {
 		q = fp->ipq_frags;
 		fp->ipq_frags = q->m_nextpkt;
 		m_freem(q);
 	}
 	TAILQ_REMOVE(fhp, fp, ipq_list);
 	uma_zfree(ipq_zone, fp);
 	nipq--;
 }
 
 /*
  * IP timer processing;
  * if a timer expires on a reassembly
  * queue, discard it.
  */
 void
 ip_slowtimo()
 {
 	register struct ipq *fp;
 	int i;
 
 	IPQ_LOCK();
 	for (i = 0; i < IPREASS_NHASH; i++) {
 		for(fp = TAILQ_FIRST(&ipq[i]); fp;) {
 			struct ipq *fpp;
 
 			fpp = fp;
 			fp = TAILQ_NEXT(fp, ipq_list);
 			if(--fpp->ipq_ttl == 0) {
 				ipstat.ips_fragtimeout += fpp->ipq_nfrags;
 				ip_freef(&ipq[i], fpp);
 			}
 		}
 	}
 	/*
 	 * If we are over the maximum number of fragments
 	 * (due to the limit being lowered), drain off
 	 * enough to get down to the new limit.
 	 */
 	if (maxnipq >= 0 && nipq > maxnipq) {
 		for (i = 0; i < IPREASS_NHASH; i++) {
 			while (nipq > maxnipq && !TAILQ_EMPTY(&ipq[i])) {
 				ipstat.ips_fragdropped +=
 				    TAILQ_FIRST(&ipq[i])->ipq_nfrags;
 				ip_freef(&ipq[i], TAILQ_FIRST(&ipq[i]));
 			}
 		}
 	}
 	IPQ_UNLOCK();
 }
 
 /*
  * Drain off all datagram fragments.
  */
 void
 ip_drain()
 {
 	int     i;
 
 	IPQ_LOCK();
 	for (i = 0; i < IPREASS_NHASH; i++) {
 		while(!TAILQ_EMPTY(&ipq[i])) {
 			ipstat.ips_fragdropped +=
 			    TAILQ_FIRST(&ipq[i])->ipq_nfrags;
 			ip_freef(&ipq[i], TAILQ_FIRST(&ipq[i]));
 		}
 	}
 	IPQ_UNLOCK();
 	in_rtqdrain();
 }
 
 /*
  * The protocol to be inserted into ip_protox[] must be already registered
  * in inetsw[], either statically or through pf_proto_register().
  */
 int
 ipproto_register(u_char ipproto)
 {
 	struct protosw *pr;
 
 	/* Sanity checks. */
 	if (ipproto == 0)
 		return (EPROTONOSUPPORT);
 
 	/*
 	 * The protocol slot must not be occupied by another protocol
 	 * already.  An index pointing to IPPROTO_RAW is unused.
 	 */
 	pr = pffindproto(PF_INET, IPPROTO_RAW, SOCK_RAW);
 	if (pr == NULL)
 		return (EPFNOSUPPORT);
 	if (ip_protox[ipproto] != pr - inetsw)	/* IPPROTO_RAW */
 		return (EEXIST);
 
 	/* Find the protocol position in inetsw[] and set the index. */
 	for (pr = inetdomain.dom_protosw;
 	     pr < inetdomain.dom_protoswNPROTOSW; pr++) {
 		if (pr->pr_domain->dom_family == PF_INET &&
 		    pr->pr_protocol && pr->pr_protocol == ipproto) {
 			/* Be careful to only index valid IP protocols. */
 			if (pr->pr_protocol < IPPROTO_MAX) {
 				ip_protox[pr->pr_protocol] = pr - inetsw;
 				return (0);
 			} else
 				return (EINVAL);
 		}
 	}
 	return (EPROTONOSUPPORT);
 }
 
 int
 ipproto_unregister(u_char ipproto)
 {
 	struct protosw *pr;
 
 	/* Sanity checks. */
 	if (ipproto == 0)
 		return (EPROTONOSUPPORT);
 
 	/* Check if the protocol was indeed registered. */
 	pr = pffindproto(PF_INET, IPPROTO_RAW, SOCK_RAW);
 	if (pr == NULL)
 		return (EPFNOSUPPORT);
 	if (ip_protox[ipproto] == pr - inetsw)  /* IPPROTO_RAW */
 		return (ENOENT);
 
 	/* Reset the protocol slot to IPPROTO_RAW. */
 	ip_protox[ipproto] = pr - inetsw;
 	return (0);
 }
 
 /*
  * Given address of next destination (final or next hop),
  * return internet address info of interface to be used to get there.
  */
 struct in_ifaddr *
 ip_rtaddr(dst)
 	struct in_addr dst;
 {
 	struct route sro;
 	struct sockaddr_in *sin;
 	struct in_ifaddr *ifa;
 
 	bzero(&sro, sizeof(sro));
 	sin = (struct sockaddr_in *)&sro.ro_dst;
 	sin->sin_family = AF_INET;
 	sin->sin_len = sizeof(*sin);
 	sin->sin_addr = dst;
 	rtalloc_ign(&sro, RTF_CLONING);
 
 	if (sro.ro_rt == NULL)
 		return (NULL);
 
 	ifa = ifatoia(sro.ro_rt->rt_ifa);
 	RTFREE(sro.ro_rt);
 	return (ifa);
 }
 
 u_char inetctlerrmap[PRC_NCMDS] = {
 	0,		0,		0,		0,
 	0,		EMSGSIZE,	EHOSTDOWN,	EHOSTUNREACH,
 	EHOSTUNREACH,	EHOSTUNREACH,	ECONNREFUSED,	ECONNREFUSED,
 	EMSGSIZE,	EHOSTUNREACH,	0,		0,
 	0,		0,		EHOSTUNREACH,	0,
 	ENOPROTOOPT,	ECONNREFUSED
 };
 
 /*
  * Forward a packet.  If some error occurs return the sender
  * an icmp packet.  Note we can't always generate a meaningful
  * icmp message because icmp doesn't have a large enough repertoire
  * of codes and types.
  *
  * If not forwarding, just drop the packet.  This could be confusing
  * if ipforwarding was zero but some routing protocol was advancing
  * us as a gateway to somewhere.  However, we must let the routing
  * protocol deal with that.
  *
  * The srcrt parameter indicates whether the packet is being forwarded
  * via a source route.
  */
 void
 ip_forward(struct mbuf *m, int srcrt)
 {
 	struct ip *ip = mtod(m, struct ip *);
 	struct in_ifaddr *ia = NULL;
 	struct mbuf *mcopy;
 	struct in_addr dest;
 	int error, type = 0, code = 0, mtu = 0;
 
 	if (m->m_flags & (M_BCAST|M_MCAST) || in_canforward(ip->ip_dst) == 0) {
 		ipstat.ips_cantforward++;
 		m_freem(m);
 		return;
 	}
 #ifdef IPSTEALTH
 	if (!ipstealth) {
 #endif
 		if (ip->ip_ttl <= IPTTLDEC) {
 			icmp_error(m, ICMP_TIMXCEED, ICMP_TIMXCEED_INTRANS,
 			    0, 0);
 			return;
 		}
 #ifdef IPSTEALTH
 	}
 #endif
 
 	if (!srcrt && (ia = ip_rtaddr(ip->ip_dst)) == NULL) {
 		icmp_error(m, ICMP_UNREACH, ICMP_UNREACH_HOST, 0, 0);
 		return;
 	}
 
 	/*
 	 * Save the IP header and at most 8 bytes of the payload,
 	 * in case we need to generate an ICMP message to the src.
 	 *
 	 * XXX this can be optimized a lot by saving the data in a local
 	 * buffer on the stack (72 bytes at most), and only allocating the
 	 * mbuf if really necessary. The vast majority of the packets
 	 * are forwarded without having to send an ICMP back (either
 	 * because unnecessary, or because rate limited), so we are
 	 * really we are wasting a lot of work here.
 	 *
 	 * We don't use m_copy() because it might return a reference
 	 * to a shared cluster. Both this function and ip_output()
 	 * assume exclusive access to the IP header in `m', so any
 	 * data in a cluster may change before we reach icmp_error().
 	 */
 	MGETHDR(mcopy, M_DONTWAIT, m->m_type);
 	if (mcopy != NULL && !m_dup_pkthdr(mcopy, m, M_DONTWAIT)) {
 		/*
 		 * It's probably ok if the pkthdr dup fails (because
 		 * the deep copy of the tag chain failed), but for now
 		 * be conservative and just discard the copy since
 		 * code below may some day want the tags.
 		 */
 		m_free(mcopy);
 		mcopy = NULL;
 	}
 	if (mcopy != NULL) {
 		mcopy->m_len = min(ip->ip_len, M_TRAILINGSPACE(mcopy));
 		mcopy->m_pkthdr.len = mcopy->m_len;
 		m_copydata(m, 0, mcopy->m_len, mtod(mcopy, caddr_t));
 	}
 
 #ifdef IPSTEALTH
 	if (!ipstealth) {
 #endif
 		ip->ip_ttl -= IPTTLDEC;
 #ifdef IPSTEALTH
 	}
 #endif
 
 	/*
 	 * If forwarding packet using same interface that it came in on,
 	 * perhaps should send a redirect to sender to shortcut a hop.
 	 * Only send redirect if source is sending directly to us,
 	 * and if packet was not source routed (or has any options).
 	 * Also, don't send redirect if forwarding using a default route
 	 * or a route modified by a redirect.
 	 */
 	dest.s_addr = 0;
 	if (!srcrt && ipsendredirects && ia->ia_ifp == m->m_pkthdr.rcvif) {
 		struct sockaddr_in *sin;
 		struct route ro;
 		struct rtentry *rt;
 
 		bzero(&ro, sizeof(ro));
 		sin = (struct sockaddr_in *)&ro.ro_dst;
 		sin->sin_family = AF_INET;
 		sin->sin_len = sizeof(*sin);
 		sin->sin_addr = ip->ip_dst;
 		rtalloc_ign(&ro, RTF_CLONING);
 
 		rt = ro.ro_rt;
 
 		if (rt && (rt->rt_flags & (RTF_DYNAMIC|RTF_MODIFIED)) == 0 &&
 		    satosin(rt_key(rt))->sin_addr.s_addr != 0) {
 #define	RTA(rt)	((struct in_ifaddr *)(rt->rt_ifa))
 			u_long src = ntohl(ip->ip_src.s_addr);
 
 			if (RTA(rt) &&
 			    (src & RTA(rt)->ia_subnetmask) == RTA(rt)->ia_subnet) {
 				if (rt->rt_flags & RTF_GATEWAY)
 					dest.s_addr = satosin(rt->rt_gateway)->sin_addr.s_addr;
 				else
 					dest.s_addr = ip->ip_dst.s_addr;
 				/* Router requirements says to only send host redirects */
 				type = ICMP_REDIRECT;
 				code = ICMP_REDIRECT_HOST;
 			}
 		}
 		if (rt)
 			RTFREE(rt);
 	}
 
 	error = ip_output(m, NULL, NULL, IP_FORWARDING, NULL, NULL);
 	if (error)
 		ipstat.ips_cantforward++;
 	else {
 		ipstat.ips_forward++;
 		if (type)
 			ipstat.ips_redirectsent++;
 		else {
 			if (mcopy)
 				m_freem(mcopy);
 			return;
 		}
 	}
 	if (mcopy == NULL)
 		return;
 
 	switch (error) {
 
 	case 0:				/* forwarded, but need redirect */
 		/* type, code set above */
 		break;
 
 	case ENETUNREACH:		/* shouldn't happen, checked above */
 	case EHOSTUNREACH:
 	case ENETDOWN:
 	case EHOSTDOWN:
 	default:
 		type = ICMP_UNREACH;
 		code = ICMP_UNREACH_HOST;
 		break;
 
 	case EMSGSIZE:
 		type = ICMP_UNREACH;
 		code = ICMP_UNREACH_NEEDFRAG;
 
 #if defined(IPSEC) || defined(FAST_IPSEC)
 		mtu = ip_ipsec_mtu(m);
 #endif /* IPSEC */
 		/*
 		 * If the MTU wasn't set before use the interface mtu or
 		 * fall back to the next smaller mtu step compared to the
 		 * current packet size.
 		 */
 		if (mtu == 0) {
 			if (ia != NULL)
 				mtu = ia->ia_ifp->if_mtu;
 			else
 				mtu = ip_next_mtu(ip->ip_len, 0);
 		}
 		ipstat.ips_cantfrag++;
 		break;
 
 	case ENOBUFS:
 		/*
 		 * A router should not generate ICMP_SOURCEQUENCH as
 		 * required in RFC1812 Requirements for IP Version 4 Routers.
 		 * Source quench could be a big problem under DoS attacks,
 		 * or if the underlying interface is rate-limited.
 		 * Those who need source quench packets may re-enable them
 		 * via the net.inet.ip.sendsourcequench sysctl.
 		 */
 		if (ip_sendsourcequench == 0) {
 			m_freem(mcopy);
 			return;
 		} else {
 			type = ICMP_SOURCEQUENCH;
 			code = 0;
 		}
 		break;
 
 	case EACCES:			/* ipfw denied packet */
 		m_freem(mcopy);
 		return;
 	}
 	icmp_error(mcopy, type, code, dest.s_addr, mtu);
 }
 
 void
 ip_savecontrol(inp, mp, ip, m)
 	register struct inpcb *inp;
 	register struct mbuf **mp;
 	register struct ip *ip;
 	register struct mbuf *m;
 {
 	if (inp->inp_socket->so_options & (SO_BINTIME | SO_TIMESTAMP)) {
 		struct bintime bt;
 
 		bintime(&bt);
 		if (inp->inp_socket->so_options & SO_BINTIME) {
 			*mp = sbcreatecontrol((caddr_t) &bt, sizeof(bt),
 			SCM_BINTIME, SOL_SOCKET);
 			if (*mp)
 				mp = &(*mp)->m_next;
 		}
 		if (inp->inp_socket->so_options & SO_TIMESTAMP) {
 			struct timeval tv;
 
 			bintime2timeval(&bt, &tv);
 			*mp = sbcreatecontrol((caddr_t) &tv, sizeof(tv),
 				SCM_TIMESTAMP, SOL_SOCKET);
 			if (*mp)
 				mp = &(*mp)->m_next;
 		}
 	}
 	if (inp->inp_flags & INP_RECVDSTADDR) {
 		*mp = sbcreatecontrol((caddr_t) &ip->ip_dst,
 		    sizeof(struct in_addr), IP_RECVDSTADDR, IPPROTO_IP);
 		if (*mp)
 			mp = &(*mp)->m_next;
 	}
 	if (inp->inp_flags & INP_RECVTTL) {
 		*mp = sbcreatecontrol((caddr_t) &ip->ip_ttl,
 		    sizeof(u_char), IP_RECVTTL, IPPROTO_IP);
 		if (*mp)
 			mp = &(*mp)->m_next;
 	}
 #ifdef notyet
 	/* XXX
 	 * Moving these out of udp_input() made them even more broken
 	 * than they already were.
 	 */
 	/* options were tossed already */
 	if (inp->inp_flags & INP_RECVOPTS) {
 		*mp = sbcreatecontrol((caddr_t) opts_deleted_above,
 		    sizeof(struct in_addr), IP_RECVOPTS, IPPROTO_IP);
 		if (*mp)
 			mp = &(*mp)->m_next;
 	}
 	/* ip_srcroute doesn't do what we want here, need to fix */
 	if (inp->inp_flags & INP_RECVRETOPTS) {
 		*mp = sbcreatecontrol((caddr_t) ip_srcroute(m),
 		    sizeof(struct in_addr), IP_RECVRETOPTS, IPPROTO_IP);
 		if (*mp)
 			mp = &(*mp)->m_next;
 	}
 #endif
 	if (inp->inp_flags & INP_RECVIF) {
 		struct ifnet *ifp;
 		struct sdlbuf {
 			struct sockaddr_dl sdl;
 			u_char	pad[32];
 		} sdlbuf;
 		struct sockaddr_dl *sdp;
 		struct sockaddr_dl *sdl2 = &sdlbuf.sdl;
 
 		if (((ifp = m->m_pkthdr.rcvif)) 
 		&& ( ifp->if_index && (ifp->if_index <= if_index))) {
 			sdp = (struct sockaddr_dl *)ifp->if_addr->ifa_addr;
 			/*
 			 * Change our mind and don't try copy.
 			 */
 			if ((sdp->sdl_family != AF_LINK)
 			|| (sdp->sdl_len > sizeof(sdlbuf))) {
 				goto makedummy;
 			}
 			bcopy(sdp, sdl2, sdp->sdl_len);
 		} else {
 makedummy:	
 			sdl2->sdl_len
 				= offsetof(struct sockaddr_dl, sdl_data[0]);
 			sdl2->sdl_family = AF_LINK;
 			sdl2->sdl_index = 0;
 			sdl2->sdl_nlen = sdl2->sdl_alen = sdl2->sdl_slen = 0;
 		}
 		*mp = sbcreatecontrol((caddr_t) sdl2, sdl2->sdl_len,
 			IP_RECVIF, IPPROTO_IP);
 		if (*mp)
 			mp = &(*mp)->m_next;
 	}
 }
 
 /*
  * XXX these routines are called from the upper part of the kernel.
  * They need to be locked when we remove Giant.
  *
  * They could also be moved to ip_mroute.c, since all the RSVP
  *  handling is done there already.
  */
 static int ip_rsvp_on;
 struct socket *ip_rsvpd;
 int
 ip_rsvp_init(struct socket *so)
 {
 	if (so->so_type != SOCK_RAW ||
 	    so->so_proto->pr_protocol != IPPROTO_RSVP)
 		return EOPNOTSUPP;
 
 	if (ip_rsvpd != NULL)
 		return EADDRINUSE;
 
 	ip_rsvpd = so;
 	/*
 	 * This may seem silly, but we need to be sure we don't over-increment
 	 * the RSVP counter, in case something slips up.
 	 */
 	if (!ip_rsvp_on) {
 		ip_rsvp_on = 1;
 		rsvp_on++;
 	}
 
 	return 0;
 }
 
 int
 ip_rsvp_done(void)
 {
 	ip_rsvpd = NULL;
 	/*
 	 * This may seem silly, but we need to be sure we don't over-decrement
 	 * the RSVP counter, in case something slips up.
 	 */
 	if (ip_rsvp_on) {
 		ip_rsvp_on = 0;
 		rsvp_on--;
 	}
 	return 0;
 }
 
 void
 rsvp_input(struct mbuf *m, int off)	/* XXX must fixup manually */
 {
 	if (rsvp_input_p) { /* call the real one if loaded */
 		rsvp_input_p(m, off);
 		return;
 	}
 
 	/* Can still get packets with rsvp_on = 0 if there is a local member
 	 * of the group to which the RSVP packet is addressed.  But in this
 	 * case we want to throw the packet away.
 	 */
 	
 	if (!rsvp_on) {
 		m_freem(m);
 		return;
 	}
 
 	if (ip_rsvpd != NULL) { 
 		rip_input(m, off);
 		return;
 	}
 	/* Drop the packet */
 	m_freem(m);
 }
diff --git a/sys/netinet/ip_output.c b/sys/netinet/ip_output.c
index a3fa63bee6fe..915512e56669 100644
--- a/sys/netinet/ip_output.c
+++ b/sys/netinet/ip_output.c
@@ -1,1609 +1,1609 @@
 /*-
  * Copyright (c) 1982, 1986, 1988, 1990, 1993
  *	The Regents of the University of California.  All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 4. Neither the name of the University nor the names of its contributors
  *    may be used to endorse or promote products derived from this software
  *    without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  *	@(#)ip_output.c	8.3 (Berkeley) 1/21/94
  * $FreeBSD$
  */
 
 #include "opt_ipfw.h"
 #include "opt_ipsec.h"
 #include "opt_mac.h"
 #include "opt_mbuf_stress_test.h"
 
 #include <sys/param.h>
 #include <sys/systm.h>
 #include <sys/kernel.h>
 #include <sys/mac.h>
 #include <sys/malloc.h>
 #include <sys/mbuf.h>
 #include <sys/protosw.h>
 #include <sys/socket.h>
 #include <sys/socketvar.h>
 #include <sys/sysctl.h>
 
 #include <net/if.h>
 #include <net/netisr.h>
 #include <net/pfil.h>
 #include <net/route.h>
 
 #include <netinet/in.h>
 #include <netinet/in_systm.h>
 #include <netinet/ip.h>
 #include <netinet/in_pcb.h>
 #include <netinet/in_var.h>
 #include <netinet/ip_var.h>
 #include <netinet/ip_options.h>
 
 #if defined(IPSEC) || defined(FAST_IPSEC)
 #include <netinet/ip_ipsec.h>
 #ifdef IPSEC
 #include <netinet6/ipsec.h>
 #endif
 #ifdef FAST_IPSEC
 #include <netipsec/ipsec.h>
 #endif
 #endif /*IPSEC*/
 
 #include <machine/in_cksum.h>
 
 static MALLOC_DEFINE(M_IPMOPTS, "ip_moptions", "internet multicast options");
 
 #define print_ip(x, a, y)	 printf("%s %d.%d.%d.%d%s",\
 				x, (ntohl(a.s_addr)>>24)&0xFF,\
 				  (ntohl(a.s_addr)>>16)&0xFF,\
 				  (ntohl(a.s_addr)>>8)&0xFF,\
 				  (ntohl(a.s_addr))&0xFF, y);
 
 u_short ip_id;
 
 #ifdef MBUF_STRESS_TEST
 int mbuf_frag_size = 0;
 SYSCTL_INT(_net_inet_ip, OID_AUTO, mbuf_frag_size, CTLFLAG_RW,
 	&mbuf_frag_size, 0, "Fragment outgoing mbufs to this size");
 #endif
 
 static struct ifnet *ip_multicast_if(struct in_addr *, int *);
 static void	ip_mloopback
 	(struct ifnet *, struct mbuf *, struct sockaddr_in *, int);
 static int	ip_getmoptions(struct inpcb *, struct sockopt *);
 static int	ip_setmoptions(struct inpcb *, struct sockopt *);
 
 
 extern	struct protosw inetsw[];
 
 /*
  * IP output.  The packet in mbuf chain m contains a skeletal IP
  * header (with len, off, ttl, proto, tos, src, dst).
  * The mbuf chain containing the packet will be freed.
  * The mbuf opt, if present, will not be freed.
  * In the IP forwarding case, the packet will arrive with options already
  * inserted, so must have a NULL opt pointer.
  */
 int
 ip_output(struct mbuf *m, struct mbuf *opt, struct route *ro,
 	int flags, struct ip_moptions *imo, struct inpcb *inp)
 {
 	struct ip *ip;
 	struct ifnet *ifp = NULL;	/* keep compiler happy */
 	struct mbuf *m0;
 	int hlen = sizeof (struct ip);
 	int len, error = 0;
 	struct sockaddr_in *dst = NULL;	/* keep compiler happy */
 	struct in_ifaddr *ia = NULL;
 	int isbroadcast, sw_csum;
 	struct route iproute;
 	struct in_addr odst;
 #ifdef IPFIREWALL_FORWARD
 	struct m_tag *fwd_tag = NULL;
 #endif
 	M_ASSERTPKTHDR(m);
 
 	if (ro == NULL) {
 		ro = &iproute;
 		bzero(ro, sizeof (*ro));
 	}
 
 	if (inp != NULL)
 		INP_LOCK_ASSERT(inp);
 
 	if (opt) {
 		len = 0;
 		m = ip_insertoptions(m, opt, &len);
 		if (len != 0)
 			hlen = len;
 	}
 	ip = mtod(m, struct ip *);
 
 	/*
 	 * Fill in IP header.  If we are not allowing fragmentation,
 	 * then the ip_id field is meaningless, but we don't set it
 	 * to zero.  Doing so causes various problems when devices along
 	 * the path (routers, load balancers, firewalls, etc.) illegally
 	 * disable DF on our packet.  Note that a 16-bit counter
 	 * will wrap around in less than 10 seconds at 100 Mbit/s on a
 	 * medium with MTU 1500.  See Steven M. Bellovin, "A Technique
 	 * for Counting NATted Hosts", Proc. IMW'02, available at
 	 * <http://www.research.att.com/~smb/papers/fnat.pdf>.
 	 */
 	if ((flags & (IP_FORWARDING|IP_RAWOUTPUT)) == 0) {
 		ip->ip_v = IPVERSION;
 		ip->ip_hl = hlen >> 2;
 		ip->ip_id = ip_newid();
 		ipstat.ips_localout++;
 	} else {
 		hlen = ip->ip_hl << 2;
 	}
 
 	dst = (struct sockaddr_in *)&ro->ro_dst;
 again:
 	/*
 	 * If there is a cached route,
 	 * check that it is to the same destination
 	 * and is still up.  If not, free it and try again.
 	 * The address family should also be checked in case of sharing the
 	 * cache with IPv6.
 	 */
 	if (ro->ro_rt && ((ro->ro_rt->rt_flags & RTF_UP) == 0 ||
 			  dst->sin_family != AF_INET ||
 			  dst->sin_addr.s_addr != ip->ip_dst.s_addr)) {
 		RTFREE(ro->ro_rt);
 		ro->ro_rt = (struct rtentry *)0;
 	}
 #ifdef IPFIREWALL_FORWARD
 	if (ro->ro_rt == NULL && fwd_tag == NULL) {
 #else
 	if (ro->ro_rt == NULL) {
 #endif
 		bzero(dst, sizeof(*dst));
 		dst->sin_family = AF_INET;
 		dst->sin_len = sizeof(*dst);
 		dst->sin_addr = ip->ip_dst;
 	}
 	/*
 	 * If routing to interface only,
 	 * short circuit routing lookup.
 	 */
 	if (flags & IP_ROUTETOIF) {
 		if ((ia = ifatoia(ifa_ifwithdstaddr(sintosa(dst)))) == NULL &&
 		    (ia = ifatoia(ifa_ifwithnet(sintosa(dst)))) == NULL) {
 			ipstat.ips_noroute++;
 			error = ENETUNREACH;
 			goto bad;
 		}
 		ifp = ia->ia_ifp;
 		ip->ip_ttl = 1;
 		isbroadcast = in_broadcast(dst->sin_addr, ifp);
 	} else if (IN_MULTICAST(ntohl(ip->ip_dst.s_addr)) &&
 	    imo != NULL && imo->imo_multicast_ifp != NULL) {
 		/*
 		 * Bypass the normal routing lookup for multicast
 		 * packets if the interface is specified.
 		 */
 		ifp = imo->imo_multicast_ifp;
 		IFP_TO_IA(ifp, ia);
 		isbroadcast = 0;	/* fool gcc */
 	} else {
 		/*
 		 * We want to do any cloning requested by the link layer,
 		 * as this is probably required in all cases for correct
 		 * operation (as it is for ARP).
 		 */
 		if (ro->ro_rt == NULL)
 			rtalloc_ign(ro, 0);
 		if (ro->ro_rt == NULL) {
 			ipstat.ips_noroute++;
 			error = EHOSTUNREACH;
 			goto bad;
 		}
 		ia = ifatoia(ro->ro_rt->rt_ifa);
 		ifp = ro->ro_rt->rt_ifp;
 		ro->ro_rt->rt_rmx.rmx_pksent++;
 		if (ro->ro_rt->rt_flags & RTF_GATEWAY)
 			dst = (struct sockaddr_in *)ro->ro_rt->rt_gateway;
 		if (ro->ro_rt->rt_flags & RTF_HOST)
 			isbroadcast = (ro->ro_rt->rt_flags & RTF_BROADCAST);
 		else
 			isbroadcast = in_broadcast(dst->sin_addr, ifp);
 	}
 	if (IN_MULTICAST(ntohl(ip->ip_dst.s_addr))) {
 		struct in_multi *inm;
 
 		m->m_flags |= M_MCAST;
 		/*
 		 * IP destination address is multicast.  Make sure "dst"
 		 * still points to the address in "ro".  (It may have been
 		 * changed to point to a gateway address, above.)
 		 */
 		dst = (struct sockaddr_in *)&ro->ro_dst;
 		/*
 		 * See if the caller provided any multicast options
 		 */
 		if (imo != NULL) {
 			ip->ip_ttl = imo->imo_multicast_ttl;
 			if (imo->imo_multicast_vif != -1)
 				ip->ip_src.s_addr =
 				    ip_mcast_src ?
 				    ip_mcast_src(imo->imo_multicast_vif) :
 				    INADDR_ANY;
 		} else
 			ip->ip_ttl = IP_DEFAULT_MULTICAST_TTL;
 		/*
 		 * Confirm that the outgoing interface supports multicast.
 		 */
 		if ((imo == NULL) || (imo->imo_multicast_vif == -1)) {
 			if ((ifp->if_flags & IFF_MULTICAST) == 0) {
 				ipstat.ips_noroute++;
 				error = ENETUNREACH;
 				goto bad;
 			}
 		}
 		/*
 		 * If source address not specified yet, use address
 		 * of outgoing interface.
 		 */
 		if (ip->ip_src.s_addr == INADDR_ANY) {
 			/* Interface may have no addresses. */
 			if (ia != NULL)
 				ip->ip_src = IA_SIN(ia)->sin_addr;
 		}
 
 		IN_MULTI_LOCK();
 		IN_LOOKUP_MULTI(ip->ip_dst, ifp, inm);
 		if (inm != NULL &&
 		   (imo == NULL || imo->imo_multicast_loop)) {
 			IN_MULTI_UNLOCK();
 			/*
 			 * If we belong to the destination multicast group
 			 * on the outgoing interface, and the caller did not
 			 * forbid loopback, loop back a copy.
 			 */
 			ip_mloopback(ifp, m, dst, hlen);
 		}
 		else {
 			IN_MULTI_UNLOCK();
 			/*
 			 * If we are acting as a multicast router, perform
 			 * multicast forwarding as if the packet had just
 			 * arrived on the interface to which we are about
 			 * to send.  The multicast forwarding function
 			 * recursively calls this function, using the
 			 * IP_FORWARDING flag to prevent infinite recursion.
 			 *
 			 * Multicasts that are looped back by ip_mloopback(),
 			 * above, will be forwarded by the ip_input() routine,
 			 * if necessary.
 			 */
 			if (ip_mrouter && (flags & IP_FORWARDING) == 0) {
 				/*
 				 * If rsvp daemon is not running, do not
 				 * set ip_moptions. This ensures that the packet
 				 * is multicast and not just sent down one link
 				 * as prescribed by rsvpd.
 				 */
 				if (!rsvp_on)
 					imo = NULL;
 				if (ip_mforward &&
 				    ip_mforward(ip, ifp, m, imo) != 0) {
 					m_freem(m);
 					goto done;
 				}
 			}
 		}
 
 		/*
 		 * Multicasts with a time-to-live of zero may be looped-
 		 * back, above, but must not be transmitted on a network.
 		 * Also, multicasts addressed to the loopback interface
 		 * are not sent -- the above call to ip_mloopback() will
 		 * loop back a copy if this host actually belongs to the
 		 * destination group on the loopback interface.
 		 */
 		if (ip->ip_ttl == 0 || ifp->if_flags & IFF_LOOPBACK) {
 			m_freem(m);
 			goto done;
 		}
 
 		goto sendit;
 	}
 #ifndef notdef
 	/*
 	 * If the source address is not specified yet, use the address
 	 * of the outoing interface.
 	 */
 	if (ip->ip_src.s_addr == INADDR_ANY) {
 		/* Interface may have no addresses. */
 		if (ia != NULL) {
 			ip->ip_src = IA_SIN(ia)->sin_addr;
 		}
 	}
 #endif /* notdef */
 	/*
 	 * Verify that we have any chance at all of being able to queue the
 	 * packet or packet fragments, unless ALTQ is enabled on the given
 	 * interface in which case packetdrop should be done by queueing.
 	 */
 #ifdef ALTQ
 	if ((!ALTQ_IS_ENABLED(&ifp->if_snd)) &&
 	    ((ifp->if_snd.ifq_len + ip->ip_len / ifp->if_mtu + 1) >=
 	    ifp->if_snd.ifq_maxlen))
 #else
 	if ((ifp->if_snd.ifq_len + ip->ip_len / ifp->if_mtu + 1) >=
 	    ifp->if_snd.ifq_maxlen)
 #endif /* ALTQ */
 	{
 		error = ENOBUFS;
 		ipstat.ips_odropped++;
 		ifp->if_snd.ifq_drops += (ip->ip_len / ifp->if_mtu + 1);
 		goto bad;
 	}
 
 	/*
 	 * Look for broadcast address and
 	 * verify user is allowed to send
 	 * such a packet.
 	 */
 	if (isbroadcast) {
 		if ((ifp->if_flags & IFF_BROADCAST) == 0) {
 			error = EADDRNOTAVAIL;
 			goto bad;
 		}
 		if ((flags & IP_ALLOWBROADCAST) == 0) {
 			error = EACCES;
 			goto bad;
 		}
 		/* don't allow broadcast messages to be fragmented */
 		if (ip->ip_len > ifp->if_mtu) {
 			error = EMSGSIZE;
 			goto bad;
 		}
 		if (flags & IP_SENDONES)
 			ip->ip_dst.s_addr = INADDR_BROADCAST;
 		m->m_flags |= M_BCAST;
 	} else {
 		m->m_flags &= ~M_BCAST;
 	}
 
 sendit:
 #if defined(IPSEC) || defined(FAST_IPSEC)
 	switch(ip_ipsec_output(&m, inp, &flags, &error, &ro, &iproute, &dst, &ia, &ifp)) {
 	case 1:
 		goto bad;
 	case -1:
 		goto done;
 	case 0:
 	default:
 		break;	/* Continue with packet processing. */
 	}
 	/* Update variables that are affected by ipsec4_output(). */
 	ip = mtod(m, struct ip *);
 	hlen = ip->ip_hl << 2;
 #endif /* IPSEC */
 
 	/* Jump over all PFIL processing if hooks are not active. */
-	if (inet_pfil_hook.ph_busy_count == -1)
+	if (!PFIL_HOOKED(&inet_pfil_hook))
 		goto passout;
 
 	/* Run through list of hooks for output packets. */
 	odst.s_addr = ip->ip_dst.s_addr;
 	error = pfil_run_hooks(&inet_pfil_hook, &m, ifp, PFIL_OUT, inp);
 	if (error != 0 || m == NULL)
 		goto done;
 
 	ip = mtod(m, struct ip *);
 
 	/* See if destination IP address was changed by packet filter. */
 	if (odst.s_addr != ip->ip_dst.s_addr) {
 		m->m_flags |= M_SKIP_FIREWALL;
 		/* If destination is now ourself drop to ip_input(). */
 		if (in_localip(ip->ip_dst)) {
 			m->m_flags |= M_FASTFWD_OURS;
 			if (m->m_pkthdr.rcvif == NULL)
 				m->m_pkthdr.rcvif = loif;
 			if (m->m_pkthdr.csum_flags & CSUM_DELAY_DATA) {
 				m->m_pkthdr.csum_flags |=
 				    CSUM_DATA_VALID | CSUM_PSEUDO_HDR;
 				m->m_pkthdr.csum_data = 0xffff;
 			}
 			m->m_pkthdr.csum_flags |=
 			    CSUM_IP_CHECKED | CSUM_IP_VALID;
 
 			error = netisr_queue(NETISR_IP, m);
 			goto done;
 		} else
 			goto again;	/* Redo the routing table lookup. */
 	}
 
 #ifdef IPFIREWALL_FORWARD
 	/* See if local, if yes, send it to netisr with IP_FASTFWD_OURS. */
 	if (m->m_flags & M_FASTFWD_OURS) {
 		if (m->m_pkthdr.rcvif == NULL)
 			m->m_pkthdr.rcvif = loif;
 		if (m->m_pkthdr.csum_flags & CSUM_DELAY_DATA) {
 			m->m_pkthdr.csum_flags |=
 			    CSUM_DATA_VALID | CSUM_PSEUDO_HDR;
 			m->m_pkthdr.csum_data = 0xffff;
 		}
 		m->m_pkthdr.csum_flags |=
 			    CSUM_IP_CHECKED | CSUM_IP_VALID;
 
 		error = netisr_queue(NETISR_IP, m);
 		goto done;
 	}
 	/* Or forward to some other address? */
 	fwd_tag = m_tag_find(m, PACKET_TAG_IPFORWARD, NULL);
 	if (fwd_tag) {
 #ifndef IPFIREWALL_FORWARD_EXTENDED
 		if (!in_localip(ip->ip_src) && !in_localaddr(ip->ip_dst)) {
 #endif
 			dst = (struct sockaddr_in *)&ro->ro_dst;
 			bcopy((fwd_tag+1), dst, sizeof(struct sockaddr_in));
 			m->m_flags |= M_SKIP_FIREWALL;
 			m_tag_delete(m, fwd_tag);
 			goto again;
 #ifndef IPFIREWALL_FORWARD_EXTENDED
 		} else {
 			m_tag_delete(m, fwd_tag);
 			/* Continue. */
 		}
 #endif
 	}
 #endif /* IPFIREWALL_FORWARD */
 
 passout:
 	/* 127/8 must not appear on wire - RFC1122. */
 	if ((ntohl(ip->ip_dst.s_addr) >> IN_CLASSA_NSHIFT) == IN_LOOPBACKNET ||
 	    (ntohl(ip->ip_src.s_addr) >> IN_CLASSA_NSHIFT) == IN_LOOPBACKNET) {
 		if ((ifp->if_flags & IFF_LOOPBACK) == 0) {
 			ipstat.ips_badaddr++;
 			error = EADDRNOTAVAIL;
 			goto bad;
 		}
 	}
 
 	m->m_pkthdr.csum_flags |= CSUM_IP;
 	sw_csum = m->m_pkthdr.csum_flags & ~ifp->if_hwassist;
 	if (sw_csum & CSUM_DELAY_DATA) {
 		in_delayed_cksum(m);
 		sw_csum &= ~CSUM_DELAY_DATA;
 	}
 	m->m_pkthdr.csum_flags &= ifp->if_hwassist;
 
 	/*
 	 * If small enough for interface, or the interface will take
 	 * care of the fragmentation for us, can just send directly.
 	 */
 	if (ip->ip_len <= ifp->if_mtu || (ifp->if_hwassist & CSUM_FRAGMENT &&
 	    ((ip->ip_off & IP_DF) == 0))) {
 		ip->ip_len = htons(ip->ip_len);
 		ip->ip_off = htons(ip->ip_off);
 		ip->ip_sum = 0;
 		if (sw_csum & CSUM_DELAY_IP)
 			ip->ip_sum = in_cksum(m, hlen);
 
 		/* Record statistics for this interface address. */
 		if (!(flags & IP_FORWARDING) && ia) {
 			ia->ia_ifa.if_opackets++;
 			ia->ia_ifa.if_obytes += m->m_pkthdr.len;
 		}
 #ifdef IPSEC
 		/* clean ipsec history once it goes out of the node */
 		ipsec_delaux(m);
 #endif
 #ifdef MBUF_STRESS_TEST
 		if (mbuf_frag_size && m->m_pkthdr.len > mbuf_frag_size)
 			m = m_fragment(m, M_DONTWAIT, mbuf_frag_size);
 #endif
 		/*
 		 * Reset layer specific mbuf flags
 		 * to avoid confusing lower layers.
 		 */
 		m->m_flags &= ~(M_PROTOFLAGS);
 
 		error = (*ifp->if_output)(ifp, m,
 				(struct sockaddr *)dst, ro->ro_rt);
 		goto done;
 	}
 
 	if (ip->ip_off & IP_DF) {
 		error = EMSGSIZE;
 		/*
 		 * This case can happen if the user changed the MTU
 		 * of an interface after enabling IP on it.  Because
 		 * most netifs don't keep track of routes pointing to
 		 * them, there is no way for one to update all its
 		 * routes when the MTU is changed.
 		 */
 		if (ro != NULL &&
 		    (ro->ro_rt->rt_flags & (RTF_UP | RTF_HOST)) &&
 		    (ro->ro_rt->rt_rmx.rmx_mtu > ifp->if_mtu)) {
 			ro->ro_rt->rt_rmx.rmx_mtu = ifp->if_mtu;
 		}
 		ipstat.ips_cantfrag++;
 		goto bad;
 	}
 
 	/*
 	 * Too large for interface; fragment if possible. If successful,
 	 * on return, m will point to a list of packets to be sent.
 	 */
 	error = ip_fragment(ip, &m, ifp->if_mtu, ifp->if_hwassist, sw_csum);
 	if (error)
 		goto bad;
 	for (; m; m = m0) {
 		m0 = m->m_nextpkt;
 		m->m_nextpkt = 0;
 #ifdef IPSEC
 		/* clean ipsec history once it goes out of the node */
 		ipsec_delaux(m);
 #endif
 		if (error == 0) {
 			/* Record statistics for this interface address. */
 			if (ia != NULL) {
 				ia->ia_ifa.if_opackets++;
 				ia->ia_ifa.if_obytes += m->m_pkthdr.len;
 			}
 			/*
 			 * Reset layer specific mbuf flags
 			 * to avoid confusing upper layers.
 			 */
 			m->m_flags &= ~(M_PROTOFLAGS);
 
 			error = (*ifp->if_output)(ifp, m,
 			    (struct sockaddr *)dst, ro->ro_rt);
 		} else
 			m_freem(m);
 	}
 
 	if (error == 0)
 		ipstat.ips_fragmented++;
 
 done:
 	if (ro == &iproute && ro->ro_rt) {
 		RTFREE(ro->ro_rt);
 	}
 	return (error);
 bad:
 	m_freem(m);
 	goto done;
 }
 
 /*
  * Create a chain of fragments which fit the given mtu. m_frag points to the
  * mbuf to be fragmented; on return it points to the chain with the fragments.
  * Return 0 if no error. If error, m_frag may contain a partially built
  * chain of fragments that should be freed by the caller.
  *
  * if_hwassist_flags is the hw offload capabilities (see if_data.ifi_hwassist)
  * sw_csum contains the delayed checksums flags (e.g., CSUM_DELAY_IP).
  */
 int
 ip_fragment(struct ip *ip, struct mbuf **m_frag, int mtu,
 	    u_long if_hwassist_flags, int sw_csum)
 {
 	int error = 0;
 	int hlen = ip->ip_hl << 2;
 	int len = (mtu - hlen) & ~7;	/* size of payload in each fragment */
 	int off;
 	struct mbuf *m0 = *m_frag;	/* the original packet		*/
 	int firstlen;
 	struct mbuf **mnext;
 	int nfrags;
 
 	if (ip->ip_off & IP_DF) {	/* Fragmentation not allowed */
 		ipstat.ips_cantfrag++;
 		return EMSGSIZE;
 	}
 
 	/*
 	 * Must be able to put at least 8 bytes per fragment.
 	 */
 	if (len < 8)
 		return EMSGSIZE;
 
 	/*
 	 * If the interface will not calculate checksums on
 	 * fragmented packets, then do it here.
 	 */
 	if (m0->m_pkthdr.csum_flags & CSUM_DELAY_DATA &&
 	    (if_hwassist_flags & CSUM_IP_FRAGS) == 0) {
 		in_delayed_cksum(m0);
 		m0->m_pkthdr.csum_flags &= ~CSUM_DELAY_DATA;
 	}
 
 	if (len > PAGE_SIZE) {
 		/* 
 		 * Fragment large datagrams such that each segment 
 		 * contains a multiple of PAGE_SIZE amount of data, 
 		 * plus headers. This enables a receiver to perform 
 		 * page-flipping zero-copy optimizations.
 		 *
 		 * XXX When does this help given that sender and receiver
 		 * could have different page sizes, and also mtu could
 		 * be less than the receiver's page size ?
 		 */
 		int newlen;
 		struct mbuf *m;
 
 		for (m = m0, off = 0; m && (off+m->m_len) <= mtu; m = m->m_next)
 			off += m->m_len;
 
 		/*
 		 * firstlen (off - hlen) must be aligned on an 
 		 * 8-byte boundary
 		 */
 		if (off < hlen)
 			goto smart_frag_failure;
 		off = ((off - hlen) & ~7) + hlen;
 		newlen = (~PAGE_MASK) & mtu;
 		if ((newlen + sizeof (struct ip)) > mtu) {
 			/* we failed, go back the default */
 smart_frag_failure:
 			newlen = len;
 			off = hlen + len;
 		}
 		len = newlen;
 
 	} else {
 		off = hlen + len;
 	}
 
 	firstlen = off - hlen;
 	mnext = &m0->m_nextpkt;		/* pointer to next packet */
 
 	/*
 	 * Loop through length of segment after first fragment,
 	 * make new header and copy data of each part and link onto chain.
 	 * Here, m0 is the original packet, m is the fragment being created.
 	 * The fragments are linked off the m_nextpkt of the original
 	 * packet, which after processing serves as the first fragment.
 	 */
 	for (nfrags = 1; off < ip->ip_len; off += len, nfrags++) {
 		struct ip *mhip;	/* ip header on the fragment */
 		struct mbuf *m;
 		int mhlen = sizeof (struct ip);
 
 		MGETHDR(m, M_DONTWAIT, MT_DATA);
 		if (m == NULL) {
 			error = ENOBUFS;
 			ipstat.ips_odropped++;
 			goto done;
 		}
 		m->m_flags |= (m0->m_flags & M_MCAST) | M_FRAG;
 		/*
 		 * In the first mbuf, leave room for the link header, then
 		 * copy the original IP header including options. The payload
 		 * goes into an additional mbuf chain returned by m_copy().
 		 */
 		m->m_data += max_linkhdr;
 		mhip = mtod(m, struct ip *);
 		*mhip = *ip;
 		if (hlen > sizeof (struct ip)) {
 			mhlen = ip_optcopy(ip, mhip) + sizeof (struct ip);
 			mhip->ip_v = IPVERSION;
 			mhip->ip_hl = mhlen >> 2;
 		}
 		m->m_len = mhlen;
 		/* XXX do we need to add ip->ip_off below ? */
 		mhip->ip_off = ((off - hlen) >> 3) + ip->ip_off;
 		if (off + len >= ip->ip_len) {	/* last fragment */
 			len = ip->ip_len - off;
 			m->m_flags |= M_LASTFRAG;
 		} else
 			mhip->ip_off |= IP_MF;
 		mhip->ip_len = htons((u_short)(len + mhlen));
 		m->m_next = m_copy(m0, off, len);
 		if (m->m_next == NULL) {	/* copy failed */
 			m_free(m);
 			error = ENOBUFS;	/* ??? */
 			ipstat.ips_odropped++;
 			goto done;
 		}
 		m->m_pkthdr.len = mhlen + len;
 		m->m_pkthdr.rcvif = NULL;
 #ifdef MAC
 		mac_create_fragment(m0, m);
 #endif
 		m->m_pkthdr.csum_flags = m0->m_pkthdr.csum_flags;
 		mhip->ip_off = htons(mhip->ip_off);
 		mhip->ip_sum = 0;
 		if (sw_csum & CSUM_DELAY_IP)
 			mhip->ip_sum = in_cksum(m, mhlen);
 		*mnext = m;
 		mnext = &m->m_nextpkt;
 	}
 	ipstat.ips_ofragments += nfrags;
 
 	/* set first marker for fragment chain */
 	m0->m_flags |= M_FIRSTFRAG | M_FRAG;
 	m0->m_pkthdr.csum_data = nfrags;
 
 	/*
 	 * Update first fragment by trimming what's been copied out
 	 * and updating header.
 	 */
 	m_adj(m0, hlen + firstlen - ip->ip_len);
 	m0->m_pkthdr.len = hlen + firstlen;
 	ip->ip_len = htons((u_short)m0->m_pkthdr.len);
 	ip->ip_off |= IP_MF;
 	ip->ip_off = htons(ip->ip_off);
 	ip->ip_sum = 0;
 	if (sw_csum & CSUM_DELAY_IP)
 		ip->ip_sum = in_cksum(m0, hlen);
 
 done:
 	*m_frag = m0;
 	return error;
 }
 
 void
 in_delayed_cksum(struct mbuf *m)
 {
 	struct ip *ip;
 	u_short csum, offset;
 
 	ip = mtod(m, struct ip *);
 	offset = ip->ip_hl << 2 ;
 	csum = in_cksum_skip(m, ip->ip_len, offset);
 	if (m->m_pkthdr.csum_flags & CSUM_UDP && csum == 0)
 		csum = 0xffff;
 	offset += m->m_pkthdr.csum_data;	/* checksum offset */
 
 	if (offset + sizeof(u_short) > m->m_len) {
 		printf("delayed m_pullup, m->len: %d  off: %d  p: %d\n",
 		    m->m_len, offset, ip->ip_p);
 		/*
 		 * XXX
 		 * this shouldn't happen, but if it does, the
 		 * correct behavior may be to insert the checksum
 		 * in the appropriate next mbuf in the chain.
 		 */
 		return;
 	}
 	*(u_short *)(m->m_data + offset) = csum;
 }
 
 /*
  * IP socket option processing.
  */
 int
 ip_ctloutput(so, sopt)
 	struct socket *so;
 	struct sockopt *sopt;
 {
 	struct	inpcb *inp = sotoinpcb(so);
 	int	error, optval;
 
 	error = optval = 0;
 	if (sopt->sopt_level != IPPROTO_IP) {
 		return (EINVAL);
 	}
 
 	switch (sopt->sopt_dir) {
 	case SOPT_SET:
 		switch (sopt->sopt_name) {
 		case IP_OPTIONS:
 #ifdef notyet
 		case IP_RETOPTS:
 #endif
 		{
 			struct mbuf *m;
 			if (sopt->sopt_valsize > MLEN) {
 				error = EMSGSIZE;
 				break;
 			}
 			MGET(m, sopt->sopt_td ? M_TRYWAIT : M_DONTWAIT, MT_DATA);
 			if (m == NULL) {
 				error = ENOBUFS;
 				break;
 			}
 			m->m_len = sopt->sopt_valsize;
 			error = sooptcopyin(sopt, mtod(m, char *), m->m_len,
 					    m->m_len);
 			INP_LOCK(inp);
 			error = ip_pcbopts(inp, sopt->sopt_name, m);
 			INP_UNLOCK(inp);
 			return (error);
 		}
 
 		case IP_TOS:
 		case IP_TTL:
 		case IP_MINTTL:
 		case IP_RECVOPTS:
 		case IP_RECVRETOPTS:
 		case IP_RECVDSTADDR:
 		case IP_RECVTTL:
 		case IP_RECVIF:
 		case IP_FAITH:
 		case IP_ONESBCAST:
 		case IP_DONTFRAG:
 			error = sooptcopyin(sopt, &optval, sizeof optval,
 					    sizeof optval);
 			if (error)
 				break;
 
 			switch (sopt->sopt_name) {
 			case IP_TOS:
 				inp->inp_ip_tos = optval;
 				break;
 
 			case IP_TTL:
 				inp->inp_ip_ttl = optval;
 				break;
 
 			case IP_MINTTL:
 				if (optval > 0 && optval <= MAXTTL)
 					inp->inp_ip_minttl = optval;
 				else
 					error = EINVAL;
 				break;
 
 #define	OPTSET(bit) do {						\
 	INP_LOCK(inp);							\
 	if (optval)							\
 		inp->inp_flags |= bit;					\
 	else								\
 		inp->inp_flags &= ~bit;					\
 	INP_UNLOCK(inp);						\
 } while (0)
 
 			case IP_RECVOPTS:
 				OPTSET(INP_RECVOPTS);
 				break;
 
 			case IP_RECVRETOPTS:
 				OPTSET(INP_RECVRETOPTS);
 				break;
 
 			case IP_RECVDSTADDR:
 				OPTSET(INP_RECVDSTADDR);
 				break;
 
 			case IP_RECVTTL:
 				OPTSET(INP_RECVTTL);
 				break;
 
 			case IP_RECVIF:
 				OPTSET(INP_RECVIF);
 				break;
 
 			case IP_FAITH:
 				OPTSET(INP_FAITH);
 				break;
 
 			case IP_ONESBCAST:
 				OPTSET(INP_ONESBCAST);
 				break;
 			case IP_DONTFRAG:
 				OPTSET(INP_DONTFRAG);
 				break;
 			}
 			break;
 #undef OPTSET
 
 		case IP_MULTICAST_IF:
 		case IP_MULTICAST_VIF:
 		case IP_MULTICAST_TTL:
 		case IP_MULTICAST_LOOP:
 		case IP_ADD_MEMBERSHIP:
 		case IP_DROP_MEMBERSHIP:
 			error = ip_setmoptions(inp, sopt);
 			break;
 
 		case IP_PORTRANGE:
 			error = sooptcopyin(sopt, &optval, sizeof optval,
 					    sizeof optval);
 			if (error)
 				break;
 
 			INP_LOCK(inp);
 			switch (optval) {
 			case IP_PORTRANGE_DEFAULT:
 				inp->inp_flags &= ~(INP_LOWPORT);
 				inp->inp_flags &= ~(INP_HIGHPORT);
 				break;
 
 			case IP_PORTRANGE_HIGH:
 				inp->inp_flags &= ~(INP_LOWPORT);
 				inp->inp_flags |= INP_HIGHPORT;
 				break;
 
 			case IP_PORTRANGE_LOW:
 				inp->inp_flags &= ~(INP_HIGHPORT);
 				inp->inp_flags |= INP_LOWPORT;
 				break;
 
 			default:
 				error = EINVAL;
 				break;
 			}
 			INP_UNLOCK(inp);
 			break;
 
 #if defined(IPSEC) || defined(FAST_IPSEC)
 		case IP_IPSEC_POLICY:
 		{
 			caddr_t req;
 			size_t len = 0;
 			int priv;
 			struct mbuf *m;
 			int optname;
 
 			if ((error = soopt_getm(sopt, &m)) != 0) /* XXX */
 				break;
 			if ((error = soopt_mcopyin(sopt, m)) != 0) /* XXX */
 				break;
 			priv = (sopt->sopt_td != NULL &&
 				suser(sopt->sopt_td) != 0) ? 0 : 1;
 			req = mtod(m, caddr_t);
 			len = m->m_len;
 			optname = sopt->sopt_name;
 			error = ipsec4_set_policy(inp, optname, req, len, priv);
 			m_freem(m);
 			break;
 		}
 #endif /*IPSEC*/
 
 		default:
 			error = ENOPROTOOPT;
 			break;
 		}
 		break;
 
 	case SOPT_GET:
 		switch (sopt->sopt_name) {
 		case IP_OPTIONS:
 		case IP_RETOPTS:
 			if (inp->inp_options)
 				error = sooptcopyout(sopt, 
 						     mtod(inp->inp_options,
 							  char *),
 						     inp->inp_options->m_len);
 			else
 				sopt->sopt_valsize = 0;
 			break;
 
 		case IP_TOS:
 		case IP_TTL:
 		case IP_MINTTL:
 		case IP_RECVOPTS:
 		case IP_RECVRETOPTS:
 		case IP_RECVDSTADDR:
 		case IP_RECVTTL:
 		case IP_RECVIF:
 		case IP_PORTRANGE:
 		case IP_FAITH:
 		case IP_ONESBCAST:
 		case IP_DONTFRAG:
 			switch (sopt->sopt_name) {
 
 			case IP_TOS:
 				optval = inp->inp_ip_tos;
 				break;
 
 			case IP_TTL:
 				optval = inp->inp_ip_ttl;
 				break;
 
 			case IP_MINTTL:
 				optval = inp->inp_ip_minttl;
 				break;
 
 #define	OPTBIT(bit)	(inp->inp_flags & bit ? 1 : 0)
 
 			case IP_RECVOPTS:
 				optval = OPTBIT(INP_RECVOPTS);
 				break;
 
 			case IP_RECVRETOPTS:
 				optval = OPTBIT(INP_RECVRETOPTS);
 				break;
 
 			case IP_RECVDSTADDR:
 				optval = OPTBIT(INP_RECVDSTADDR);
 				break;
 
 			case IP_RECVTTL:
 				optval = OPTBIT(INP_RECVTTL);
 				break;
 
 			case IP_RECVIF:
 				optval = OPTBIT(INP_RECVIF);
 				break;
 
 			case IP_PORTRANGE:
 				if (inp->inp_flags & INP_HIGHPORT)
 					optval = IP_PORTRANGE_HIGH;
 				else if (inp->inp_flags & INP_LOWPORT)
 					optval = IP_PORTRANGE_LOW;
 				else
 					optval = 0;
 				break;
 
 			case IP_FAITH:
 				optval = OPTBIT(INP_FAITH);
 				break;
 
 			case IP_ONESBCAST:
 				optval = OPTBIT(INP_ONESBCAST);
 				break;
 			case IP_DONTFRAG:
 				optval = OPTBIT(INP_DONTFRAG);
 				break;
 			}
 			error = sooptcopyout(sopt, &optval, sizeof optval);
 			break;
 
 		case IP_MULTICAST_IF:
 		case IP_MULTICAST_VIF:
 		case IP_MULTICAST_TTL:
 		case IP_MULTICAST_LOOP:
 		case IP_ADD_MEMBERSHIP:
 		case IP_DROP_MEMBERSHIP:
 			error = ip_getmoptions(inp, sopt);
 			break;
 
 #if defined(IPSEC) || defined(FAST_IPSEC)
 		case IP_IPSEC_POLICY:
 		{
 			struct mbuf *m = NULL;
 			caddr_t req = NULL;
 			size_t len = 0;
 
 			if (m != 0) {
 				req = mtod(m, caddr_t);
 				len = m->m_len;
 			}
 			error = ipsec4_get_policy(sotoinpcb(so), req, len, &m);
 			if (error == 0)
 				error = soopt_mcopyout(sopt, m); /* XXX */
 			if (error == 0)
 				m_freem(m);
 			break;
 		}
 #endif /*IPSEC*/
 
 		default:
 			error = ENOPROTOOPT;
 			break;
 		}
 		break;
 	}
 	return (error);
 }
 
 /*
  * XXX
  * The whole multicast option thing needs to be re-thought.
  * Several of these options are equally applicable to non-multicast
  * transmission, and one (IP_MULTICAST_TTL) totally duplicates a
  * standard option (IP_TTL).
  */
 
 /*
  * following RFC1724 section 3.3, 0.0.0.0/8 is interpreted as interface index.
  */
 static struct ifnet *
 ip_multicast_if(a, ifindexp)
 	struct in_addr *a;
 	int *ifindexp;
 {
 	int ifindex;
 	struct ifnet *ifp;
 
 	if (ifindexp)
 		*ifindexp = 0;
 	if (ntohl(a->s_addr) >> 24 == 0) {
 		ifindex = ntohl(a->s_addr) & 0xffffff;
 		if (ifindex < 0 || if_index < ifindex)
 			return NULL;
 		ifp = ifnet_byindex(ifindex);
 		if (ifindexp)
 			*ifindexp = ifindex;
 	} else {
 		INADDR_TO_IFP(*a, ifp);
 	}
 	return ifp;
 }
 
 /*
  * Given an inpcb, return its multicast options structure pointer.  Accepts
  * an unlocked inpcb pointer, but will return it locked.  May sleep.
  */
 static struct ip_moptions *
 ip_findmoptions(struct inpcb *inp)
 {
 	struct ip_moptions *imo;
 
 	INP_LOCK(inp);
 	if (inp->inp_moptions != NULL)
 		return (inp->inp_moptions);
 
 	INP_UNLOCK(inp);
 
 	imo = (struct ip_moptions*)malloc(sizeof(*imo), M_IPMOPTS, M_WAITOK);
 
 	imo->imo_multicast_ifp = NULL;
 	imo->imo_multicast_addr.s_addr = INADDR_ANY;
 	imo->imo_multicast_vif = -1;
 	imo->imo_multicast_ttl = IP_DEFAULT_MULTICAST_TTL;
 	imo->imo_multicast_loop = IP_DEFAULT_MULTICAST_LOOP;
 	imo->imo_num_memberships = 0;
 
 	INP_LOCK(inp);
 	if (inp->inp_moptions != NULL) {
 		free(imo, M_IPMOPTS);
 		return (inp->inp_moptions);
 	}
 	inp->inp_moptions = imo;
 	return (imo);
 }
 
 /*
  * Set the IP multicast options in response to user setsockopt().
  */
 static int
 ip_setmoptions(struct inpcb *inp, struct sockopt *sopt)
 {
 	int error = 0;
 	int i;
 	struct in_addr addr;
 	struct ip_mreq mreq;
 	struct ifnet *ifp;
 	struct ip_moptions *imo;
 	struct route ro;
 	struct sockaddr_in *dst;
 	int ifindex;
 	int s;
 
 	switch (sopt->sopt_name) {
 	/* store an index number for the vif you wanna use in the send */
 	case IP_MULTICAST_VIF:
 		if (legal_vif_num == 0) {
 			error = EOPNOTSUPP;
 			break;
 		}
 		error = sooptcopyin(sopt, &i, sizeof i, sizeof i);
 		if (error)
 			break;
 		if (!legal_vif_num(i) && (i != -1)) {
 			error = EINVAL;
 			break;
 		}
 		imo = ip_findmoptions(inp);
 		imo->imo_multicast_vif = i;
 		INP_UNLOCK(inp);
 		break;
 
 	case IP_MULTICAST_IF:
 		/*
 		 * Select the interface for outgoing multicast packets.
 		 */
 		error = sooptcopyin(sopt, &addr, sizeof addr, sizeof addr);
 		if (error)
 			break;
 		/*
 		 * INADDR_ANY is used to remove a previous selection.
 		 * When no interface is selected, a default one is
 		 * chosen every time a multicast packet is sent.
 		 */
 		imo = ip_findmoptions(inp);
 		if (addr.s_addr == INADDR_ANY) {
 			imo->imo_multicast_ifp = NULL;
 			INP_UNLOCK(inp);
 			break;
 		}
 		/*
 		 * The selected interface is identified by its local
 		 * IP address.  Find the interface and confirm that
 		 * it supports multicasting.
 		 */
 		s = splimp();
 		ifp = ip_multicast_if(&addr, &ifindex);
 		if (ifp == NULL || (ifp->if_flags & IFF_MULTICAST) == 0) {
 			INP_UNLOCK(inp);
 			splx(s);
 			error = EADDRNOTAVAIL;
 			break;
 		}
 		imo->imo_multicast_ifp = ifp;
 		if (ifindex)
 			imo->imo_multicast_addr = addr;
 		else
 			imo->imo_multicast_addr.s_addr = INADDR_ANY;
 		INP_UNLOCK(inp);
 		splx(s);
 		break;
 
 	case IP_MULTICAST_TTL:
 		/*
 		 * Set the IP time-to-live for outgoing multicast packets.
 		 * The original multicast API required a char argument,
 		 * which is inconsistent with the rest of the socket API.
 		 * We allow either a char or an int.
 		 */
 		if (sopt->sopt_valsize == 1) {
 			u_char ttl;
 			error = sooptcopyin(sopt, &ttl, 1, 1);
 			if (error)
 				break;
 			imo = ip_findmoptions(inp);
 			imo->imo_multicast_ttl = ttl;
 			INP_UNLOCK(inp);
 		} else {
 			u_int ttl;
 			error = sooptcopyin(sopt, &ttl, sizeof ttl, 
 					    sizeof ttl);
 			if (error)
 				break;
 			if (ttl > 255)
 				error = EINVAL;
 			else {
 				imo = ip_findmoptions(inp);
 				imo->imo_multicast_ttl = ttl;
 				INP_UNLOCK(inp);
 			}
 		}
 		break;
 
 	case IP_MULTICAST_LOOP:
 		/*
 		 * Set the loopback flag for outgoing multicast packets.
 		 * Must be zero or one.  The original multicast API required a
 		 * char argument, which is inconsistent with the rest
 		 * of the socket API.  We allow either a char or an int.
 		 */
 		if (sopt->sopt_valsize == 1) {
 			u_char loop;
 			error = sooptcopyin(sopt, &loop, 1, 1);
 			if (error)
 				break;
 			imo = ip_findmoptions(inp);
 			imo->imo_multicast_loop = !!loop;
 			INP_UNLOCK(inp);
 		} else {
 			u_int loop;
 			error = sooptcopyin(sopt, &loop, sizeof loop,
 					    sizeof loop);
 			if (error)
 				break;
 			imo = ip_findmoptions(inp);
 			imo->imo_multicast_loop = !!loop;
 			INP_UNLOCK(inp);
 		}
 		break;
 
 	case IP_ADD_MEMBERSHIP:
 		/*
 		 * Add a multicast group membership.
 		 * Group must be a valid IP multicast address.
 		 */
 		error = sooptcopyin(sopt, &mreq, sizeof mreq, sizeof mreq);
 		if (error)
 			break;
 
 		if (!IN_MULTICAST(ntohl(mreq.imr_multiaddr.s_addr))) {
 			error = EINVAL;
 			break;
 		}
 		s = splimp();
 		/*
 		 * If no interface address was provided, use the interface of
 		 * the route to the given multicast address.
 		 */
 		if (mreq.imr_interface.s_addr == INADDR_ANY) {
 			bzero((caddr_t)&ro, sizeof(ro));
 			dst = (struct sockaddr_in *)&ro.ro_dst;
 			dst->sin_len = sizeof(*dst);
 			dst->sin_family = AF_INET;
 			dst->sin_addr = mreq.imr_multiaddr;
 			rtalloc_ign(&ro, RTF_CLONING);
 			if (ro.ro_rt == NULL) {
 				error = EADDRNOTAVAIL;
 				splx(s);
 				break;
 			}
 			ifp = ro.ro_rt->rt_ifp;
 			RTFREE(ro.ro_rt);
 		}
 		else {
 			ifp = ip_multicast_if(&mreq.imr_interface, NULL);
 		}
 
 		/*
 		 * See if we found an interface, and confirm that it
 		 * supports multicast.
 		 */
 		if (ifp == NULL || (ifp->if_flags & IFF_MULTICAST) == 0) {
 			error = EADDRNOTAVAIL;
 			splx(s);
 			break;
 		}
 		/*
 		 * See if the membership already exists or if all the
 		 * membership slots are full.
 		 */
 		imo = ip_findmoptions(inp);
 		for (i = 0; i < imo->imo_num_memberships; ++i) {
 			if (imo->imo_membership[i]->inm_ifp == ifp &&
 			    imo->imo_membership[i]->inm_addr.s_addr
 						== mreq.imr_multiaddr.s_addr)
 				break;
 		}
 		if (i < imo->imo_num_memberships) {
 			INP_UNLOCK(inp);
 			error = EADDRINUSE;
 			splx(s);
 			break;
 		}
 		if (i == IP_MAX_MEMBERSHIPS) {
 			INP_UNLOCK(inp);
 			error = ETOOMANYREFS;
 			splx(s);
 			break;
 		}
 		/*
 		 * Everything looks good; add a new record to the multicast
 		 * address list for the given interface.
 		 */
 		if ((imo->imo_membership[i] =
 		    in_addmulti(&mreq.imr_multiaddr, ifp)) == NULL) {
 			INP_UNLOCK(inp);
 			error = ENOBUFS;
 			splx(s);
 			break;
 		}
 		++imo->imo_num_memberships;
 		INP_UNLOCK(inp);
 		splx(s);
 		break;
 
 	case IP_DROP_MEMBERSHIP:
 		/*
 		 * Drop a multicast group membership.
 		 * Group must be a valid IP multicast address.
 		 */
 		error = sooptcopyin(sopt, &mreq, sizeof mreq, sizeof mreq);
 		if (error)
 			break;
 
 		if (!IN_MULTICAST(ntohl(mreq.imr_multiaddr.s_addr))) {
 			error = EINVAL;
 			break;
 		}
 
 		s = splimp();
 		/*
 		 * If an interface address was specified, get a pointer
 		 * to its ifnet structure.
 		 */
 		if (mreq.imr_interface.s_addr == INADDR_ANY)
 			ifp = NULL;
 		else {
 			ifp = ip_multicast_if(&mreq.imr_interface, NULL);
 			if (ifp == NULL) {
 				error = EADDRNOTAVAIL;
 				splx(s);
 				break;
 			}
 		}
 		/*
 		 * Find the membership in the membership array.
 		 */
 		imo = ip_findmoptions(inp);
 		for (i = 0; i < imo->imo_num_memberships; ++i) {
 			if ((ifp == NULL ||
 			     imo->imo_membership[i]->inm_ifp == ifp) &&
 			     imo->imo_membership[i]->inm_addr.s_addr ==
 			     mreq.imr_multiaddr.s_addr)
 				break;
 		}
 		if (i == imo->imo_num_memberships) {
 			INP_UNLOCK(inp);
 			error = EADDRNOTAVAIL;
 			splx(s);
 			break;
 		}
 		/*
 		 * Give up the multicast address record to which the
 		 * membership points.
 		 */
 		in_delmulti(imo->imo_membership[i]);
 		/*
 		 * Remove the gap in the membership array.
 		 */
 		for (++i; i < imo->imo_num_memberships; ++i)
 			imo->imo_membership[i-1] = imo->imo_membership[i];
 		--imo->imo_num_memberships;
 		INP_UNLOCK(inp);
 		splx(s);
 		break;
 
 	default:
 		error = EOPNOTSUPP;
 		break;
 	}
 
 	return (error);
 }
 
 /*
  * Return the IP multicast options in response to user getsockopt().
  */
 static int
 ip_getmoptions(struct inpcb *inp, struct sockopt *sopt)
 {
 	struct ip_moptions *imo;
 	struct in_addr addr;
 	struct in_ifaddr *ia;
 	int error, optval;
 	u_char coptval;
 
 	INP_LOCK(inp);
 	imo = inp->inp_moptions;
 
 	error = 0;
 	switch (sopt->sopt_name) {
 	case IP_MULTICAST_VIF: 
 		if (imo != NULL)
 			optval = imo->imo_multicast_vif;
 		else
 			optval = -1;
 		INP_UNLOCK(inp);
 		error = sooptcopyout(sopt, &optval, sizeof optval);
 		break;
 
 	case IP_MULTICAST_IF:
 		if (imo == NULL || imo->imo_multicast_ifp == NULL)
 			addr.s_addr = INADDR_ANY;
 		else if (imo->imo_multicast_addr.s_addr) {
 			/* return the value user has set */
 			addr = imo->imo_multicast_addr;
 		} else {
 			IFP_TO_IA(imo->imo_multicast_ifp, ia);
 			addr.s_addr = (ia == NULL) ? INADDR_ANY
 				: IA_SIN(ia)->sin_addr.s_addr;
 		}
 		INP_UNLOCK(inp);
 		error = sooptcopyout(sopt, &addr, sizeof addr);
 		break;
 
 	case IP_MULTICAST_TTL:
 		if (imo == 0)
 			optval = coptval = IP_DEFAULT_MULTICAST_TTL;
 		else
 			optval = coptval = imo->imo_multicast_ttl;
 		INP_UNLOCK(inp);
 		if (sopt->sopt_valsize == 1)
 			error = sooptcopyout(sopt, &coptval, 1);
 		else
 			error = sooptcopyout(sopt, &optval, sizeof optval);
 		break;
 
 	case IP_MULTICAST_LOOP:
 		if (imo == 0)
 			optval = coptval = IP_DEFAULT_MULTICAST_LOOP;
 		else
 			optval = coptval = imo->imo_multicast_loop;
 		INP_UNLOCK(inp);
 		if (sopt->sopt_valsize == 1)
 			error = sooptcopyout(sopt, &coptval, 1);
 		else
 			error = sooptcopyout(sopt, &optval, sizeof optval);
 		break;
 
 	default:
 		INP_UNLOCK(inp);
 		error = ENOPROTOOPT;
 		break;
 	}
 	INP_UNLOCK_ASSERT(inp);
 
 	return (error);
 }
 
 /*
  * Discard the IP multicast options.
  */
 void
 ip_freemoptions(imo)
 	register struct ip_moptions *imo;
 {
 	register int i;
 
 	if (imo != NULL) {
 		for (i = 0; i < imo->imo_num_memberships; ++i)
 			in_delmulti(imo->imo_membership[i]);
 		free(imo, M_IPMOPTS);
 	}
 }
 
 /*
  * Routine called from ip_output() to loop back a copy of an IP multicast
  * packet to the input queue of a specified interface.  Note that this
  * calls the output routine of the loopback "driver", but with an interface
  * pointer that might NOT be a loopback interface -- evil, but easier than
  * replicating that code here.
  */
 static void
 ip_mloopback(ifp, m, dst, hlen)
 	struct ifnet *ifp;
 	register struct mbuf *m;
 	register struct sockaddr_in *dst;
 	int hlen;
 {
 	register struct ip *ip;
 	struct mbuf *copym;
 
 	copym = m_copy(m, 0, M_COPYALL);
 	if (copym != NULL && (copym->m_flags & M_EXT || copym->m_len < hlen))
 		copym = m_pullup(copym, hlen);
 	if (copym != NULL) {
 		/* If needed, compute the checksum and mark it as valid. */
 		if (copym->m_pkthdr.csum_flags & CSUM_DELAY_DATA) {
 			in_delayed_cksum(copym);
 			copym->m_pkthdr.csum_flags &= ~CSUM_DELAY_DATA;
 			copym->m_pkthdr.csum_flags |=
 			    CSUM_DATA_VALID | CSUM_PSEUDO_HDR;
 			copym->m_pkthdr.csum_data = 0xffff;
 		}
 		/*
 		 * We don't bother to fragment if the IP length is greater
 		 * than the interface's MTU.  Can this possibly matter?
 		 */
 		ip = mtod(copym, struct ip *);
 		ip->ip_len = htons(ip->ip_len);
 		ip->ip_off = htons(ip->ip_off);
 		ip->ip_sum = 0;
 		ip->ip_sum = in_cksum(copym, hlen);
 		/*
 		 * NB:
 		 * It's not clear whether there are any lingering
 		 * reentrancy problems in other areas which might
 		 * be exposed by using ip_input directly (in
 		 * particular, everything which modifies the packet
 		 * in-place).  Yet another option is using the
 		 * protosw directly to deliver the looped back
 		 * packet.  For the moment, we'll err on the side
 		 * of safety by using if_simloop().
 		 */
 #if 1 /* XXX */
 		if (dst->sin_family != AF_INET) {
 			printf("ip_mloopback: bad address family %d\n",
 						dst->sin_family);
 			dst->sin_family = AF_INET;
 		}
 #endif
 
 #ifdef notdef
 		copym->m_pkthdr.rcvif = ifp;
 		ip_input(copym);
 #else
 		if_simloop(ifp, copym, dst->sin_family, 0);
 #endif
 	}
 }
diff --git a/sys/netinet6/ip6_forward.c b/sys/netinet6/ip6_forward.c
index 5041fe87ff49..3b542cf6845c 100644
--- a/sys/netinet6/ip6_forward.c
+++ b/sys/netinet6/ip6_forward.c
@@ -1,675 +1,675 @@
 /*	$FreeBSD$	*/
 /*	$KAME: ip6_forward.c,v 1.69 2001/05/17 03:48:30 itojun Exp $	*/
 
 /*-
  * Copyright (C) 1995, 1996, 1997, and 1998 WIDE Project.
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 3. Neither the name of the project nor the names of its contributors
  *    may be used to endorse or promote products derived from this software
  *    without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE PROJECT AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE PROJECT OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  */
 
 #include "opt_ip6fw.h"
 #include "opt_inet.h"
 #include "opt_inet6.h"
 #include "opt_ipsec.h"
 #include "opt_ipstealth.h"
 
 #include <sys/param.h>
 #include <sys/systm.h>
 #include <sys/malloc.h>
 #include <sys/mbuf.h>
 #include <sys/domain.h>
 #include <sys/protosw.h>
 #include <sys/socket.h>
 #include <sys/errno.h>
 #include <sys/time.h>
 #include <sys/kernel.h>
 #include <sys/syslog.h>
 
 #include <net/if.h>
 #include <net/route.h>
 #include <net/pfil.h>
 
 #include <netinet/in.h>
 #include <netinet/in_var.h>
 #include <netinet/in_systm.h>
 #include <netinet/ip.h>
 #include <netinet/ip_var.h>
 #include <netinet6/in6_var.h>
 #include <netinet/ip6.h>
 #include <netinet6/ip6_var.h>
 #include <netinet6/scope6_var.h>
 #include <netinet/icmp6.h>
 #include <netinet6/nd6.h>
 
 #include <netinet/in_pcb.h>
 
 #ifdef IPSEC
 #include <netinet6/ipsec.h>
 #ifdef INET6
 #include <netinet6/ipsec6.h>
 #endif
 #include <netkey/key.h>
 #endif /* IPSEC */
 
 #ifdef FAST_IPSEC
 #include <netipsec/ipsec.h>
 #include <netipsec/ipsec6.h>
 #include <netipsec/key.h>
 #define	IPSEC
 #endif /* FAST_IPSEC */
 
 #include <netinet6/ip6_fw.h>
 
 #include <net/net_osdep.h>
 
 #include <netinet6/ip6protosw.h>
 
 struct	route_in6 ip6_forward_rt;
 
 /*
  * Forward a packet.  If some error occurs return the sender
  * an icmp packet.  Note we can't always generate a meaningful
  * icmp message because icmp doesn't have a large enough repertoire
  * of codes and types.
  *
  * If not forwarding, just drop the packet.  This could be confusing
  * if ipforwarding was zero but some routing protocol was advancing
  * us as a gateway to somewhere.  However, we must let the routing
  * protocol deal with that.
  *
  */
 
 void
 ip6_forward(m, srcrt)
 	struct mbuf *m;
 	int srcrt;
 {
 	struct ip6_hdr *ip6 = mtod(m, struct ip6_hdr *);
 	struct sockaddr_in6 *dst = NULL;
 	struct rtentry *rt = NULL;
 	int error, type = 0, code = 0;
 	struct mbuf *mcopy = NULL;
 	struct ifnet *origifp;	/* maybe unnecessary */
 	u_int32_t inzone, outzone;
 	struct in6_addr src_in6, dst_in6;
 #ifdef IPSEC
 	struct secpolicy *sp = NULL;
 	int ipsecrt = 0;
 #endif
 
 #ifdef IPSEC
 	/*
 	 * Check AH/ESP integrity.
 	 */
 	/*
 	 * Don't increment ip6s_cantforward because this is the check
 	 * before forwarding packet actually.
 	 */
 	if (ipsec6_in_reject(m, NULL)) {
 #if !defined(FAST_IPSEC)
 		ipsec6stat.in_polvio++;
 #endif
 		m_freem(m);
 		return;
 	}
 #endif /* IPSEC */
 
 	/*
 	 * Do not forward packets to multicast destination (should be handled
 	 * by ip6_mforward().
 	 * Do not forward packets with unspecified source.  It was discussed
 	 * in July 2000, on the ipngwg mailing list.
 	 */
 	if ((m->m_flags & (M_BCAST|M_MCAST)) != 0 ||
 	    IN6_IS_ADDR_MULTICAST(&ip6->ip6_dst) ||
 	    IN6_IS_ADDR_UNSPECIFIED(&ip6->ip6_src)) {
 		ip6stat.ip6s_cantforward++;
 		/* XXX in6_ifstat_inc(rt->rt_ifp, ifs6_in_discard) */
 		if (ip6_log_time + ip6_log_interval < time_second) {
 			ip6_log_time = time_second;
 			log(LOG_DEBUG,
 			    "cannot forward "
 			    "from %s to %s nxt %d received on %s\n",
 			    ip6_sprintf(&ip6->ip6_src),
 			    ip6_sprintf(&ip6->ip6_dst),
 			    ip6->ip6_nxt,
 			    if_name(m->m_pkthdr.rcvif));
 		}
 		m_freem(m);
 		return;
 	}
 
 #ifdef IPSTEALTH
 	if (!ip6stealth) {
 #endif
 	if (ip6->ip6_hlim <= IPV6_HLIMDEC) {
 		/* XXX in6_ifstat_inc(rt->rt_ifp, ifs6_in_discard) */
 		icmp6_error(m, ICMP6_TIME_EXCEEDED,
 				ICMP6_TIME_EXCEED_TRANSIT, 0);
 		return;
 	}
 	ip6->ip6_hlim -= IPV6_HLIMDEC;
 
 #ifdef IPSTEALTH
 	}
 #endif
 
 	/*
 	 * Save at most ICMPV6_PLD_MAXLEN (= the min IPv6 MTU -
 	 * size of IPv6 + ICMPv6 headers) bytes of the packet in case
 	 * we need to generate an ICMP6 message to the src.
 	 * Thanks to M_EXT, in most cases copy will not occur.
 	 *
 	 * It is important to save it before IPsec processing as IPsec
 	 * processing may modify the mbuf.
 	 */
 	mcopy = m_copy(m, 0, imin(m->m_pkthdr.len, ICMPV6_PLD_MAXLEN));
 
 #ifdef IPSEC
 	/* get a security policy for this packet */
 	sp = ipsec6_getpolicybyaddr(m, IPSEC_DIR_OUTBOUND,
 	    IP_FORWARDING, &error);
 	if (sp == NULL) {
 		ipsec6stat.out_inval++;
 		ip6stat.ip6s_cantforward++;
 		if (mcopy) {
 #if 0
 			/* XXX: what icmp ? */
 #else
 			m_freem(mcopy);
 #endif
 		}
 		m_freem(m);
 		return;
 	}
 
 	error = 0;
 
 	/* check policy */
 	switch (sp->policy) {
 	case IPSEC_POLICY_DISCARD:
 		/*
 		 * This packet is just discarded.
 		 */
 		ipsec6stat.out_polvio++;
 		ip6stat.ip6s_cantforward++;
 		key_freesp(sp);
 		if (mcopy) {
 #if 0
 			/* XXX: what icmp ? */
 #else
 			m_freem(mcopy);
 #endif
 		}
 		m_freem(m);
 		return;
 
 	case IPSEC_POLICY_BYPASS:
 	case IPSEC_POLICY_NONE:
 		/* no need to do IPsec. */
 		key_freesp(sp);
 		goto skip_ipsec;
 
 	case IPSEC_POLICY_IPSEC:
 		if (sp->req == NULL) {
 			/* XXX should be panic ? */
 			printf("ip6_forward: No IPsec request specified.\n");
 			ip6stat.ip6s_cantforward++;
 			key_freesp(sp);
 			if (mcopy) {
 #if 0
 				/* XXX: what icmp ? */
 #else
 				m_freem(mcopy);
 #endif
 			}
 			m_freem(m);
 			return;
 		}
 		/* do IPsec */
 		break;
 
 	case IPSEC_POLICY_ENTRUST:
 	default:
 		/* should be panic ?? */
 		printf("ip6_forward: Invalid policy found. %d\n", sp->policy);
 		key_freesp(sp);
 		goto skip_ipsec;
 	}
 
     {
 	struct ipsecrequest *isr = NULL;
 	struct ipsec_output_state state;
 
 	/*
 	 * when the kernel forwards a packet, it is not proper to apply
 	 * IPsec transport mode to the packet is not proper.  this check
 	 * avoid from this.
 	 * at present, if there is even a transport mode SA request in the
 	 * security policy, the kernel does not apply IPsec to the packet.
 	 * this check is not enough because the following case is valid.
 	 *      ipsec esp/tunnel/xxx-xxx/require esp/transport//require;
 	 */
 	for (isr = sp->req; isr; isr = isr->next) {
 		if (isr->saidx.mode == IPSEC_MODE_ANY)
 			goto doipsectunnel;
 		if (isr->saidx.mode == IPSEC_MODE_TUNNEL)
 			goto doipsectunnel;
 	}
 
 	/*
 	 * if there's no need for tunnel mode IPsec, skip.
 	 */
 	if (!isr)
 		goto skip_ipsec;
 
     doipsectunnel:
 	/*
 	 * All the extension headers will become inaccessible
 	 * (since they can be encrypted).
 	 * Don't panic, we need no more updates to extension headers
 	 * on inner IPv6 packet (since they are now encapsulated).
 	 *
 	 * IPv6 [ESP|AH] IPv6 [extension headers] payload
 	 */
 	bzero(&state, sizeof(state));
 	state.m = m;
 	state.ro = NULL;	/* update at ipsec6_output_tunnel() */
 	state.dst = NULL;	/* update at ipsec6_output_tunnel() */
 
 	error = ipsec6_output_tunnel(&state, sp, 0);
 
 	m = state.m;
 	key_freesp(sp);
 
 	if (error) {
 		/* mbuf is already reclaimed in ipsec6_output_tunnel. */
 		switch (error) {
 		case EHOSTUNREACH:
 		case ENETUNREACH:
 		case EMSGSIZE:
 		case ENOBUFS:
 		case ENOMEM:
 			break;
 		default:
 			printf("ip6_output (ipsec): error code %d\n", error);
 			/* FALLTHROUGH */
 		case ENOENT:
 			/* don't show these error codes to the user */
 			break;
 		}
 		ip6stat.ip6s_cantforward++;
 		if (mcopy) {
 #if 0
 			/* XXX: what icmp ? */
 #else
 			m_freem(mcopy);
 #endif
 		}
 		m_freem(m);
 		return;
 	}
 
 	if (ip6 != mtod(m, struct ip6_hdr *)) {
 		/*
 		 * now tunnel mode headers are added.  we are originating
 		 * packet instead of forwarding the packet.
 		 */
 		ip6_output(m, NULL, NULL, IPV6_FORWARDING/*XXX*/, NULL, NULL,
 		    NULL);
 		goto freecopy;
 	}
 
 	/* adjust pointer */
 	dst = (struct sockaddr_in6 *)state.dst;
 	rt = state.ro ? state.ro->ro_rt : NULL;
 	if (dst != NULL && rt != NULL)
 		ipsecrt = 1;
     }
     skip_ipsec:
 #endif /* IPSEC */
 
 #ifdef IPSEC
 	if (ipsecrt)
 		goto skip_routing;
 #endif
 
 	dst = (struct sockaddr_in6 *)&ip6_forward_rt.ro_dst;
 	if (!srcrt) {
 		/* ip6_forward_rt.ro_dst.sin6_addr is equal to ip6->ip6_dst */
 		if (ip6_forward_rt.ro_rt == 0 ||
 		    (ip6_forward_rt.ro_rt->rt_flags & RTF_UP) == 0) {
 			if (ip6_forward_rt.ro_rt) {
 				RTFREE(ip6_forward_rt.ro_rt);
 				ip6_forward_rt.ro_rt = 0;
 			}
 
 			/* this probably fails but give it a try again */
 			rtalloc((struct route *)&ip6_forward_rt);
 		}
 
 		if (ip6_forward_rt.ro_rt == 0) {
 			ip6stat.ip6s_noroute++;
 			in6_ifstat_inc(m->m_pkthdr.rcvif, ifs6_in_noroute);
 			if (mcopy) {
 				icmp6_error(mcopy, ICMP6_DST_UNREACH,
 					    ICMP6_DST_UNREACH_NOROUTE, 0);
 			}
 			m_freem(m);
 			return;
 		}
 	} else if ((rt = ip6_forward_rt.ro_rt) == 0 ||
 		   !IN6_ARE_ADDR_EQUAL(&ip6->ip6_dst, &dst->sin6_addr)) {
 		if (ip6_forward_rt.ro_rt) {
 			RTFREE(ip6_forward_rt.ro_rt);
 			ip6_forward_rt.ro_rt = 0;
 		}
 		bzero(dst, sizeof(*dst));
 		dst->sin6_len = sizeof(struct sockaddr_in6);
 		dst->sin6_family = AF_INET6;
 		dst->sin6_addr = ip6->ip6_dst;
 
   		rtalloc((struct route *)&ip6_forward_rt);
 		if (ip6_forward_rt.ro_rt == 0) {
 			ip6stat.ip6s_noroute++;
 			in6_ifstat_inc(m->m_pkthdr.rcvif, ifs6_in_noroute);
 			if (mcopy) {
 				icmp6_error(mcopy, ICMP6_DST_UNREACH,
 					    ICMP6_DST_UNREACH_NOROUTE, 0);
 			}
 			m_freem(m);
 			return;
 		}
 	}
 	rt = ip6_forward_rt.ro_rt;
 #ifdef IPSEC
     skip_routing:;
 #endif
 
 	/*
 	 * Source scope check: if a packet can't be delivered to its
 	 * destination for the reason that the destination is beyond the scope
 	 * of the source address, discard the packet and return an icmp6
 	 * destination unreachable error with Code 2 (beyond scope of source
 	 * address).  We use a local copy of ip6_src, since in6_setscope()
 	 * will possibly modify its first argument.
 	 * [draft-ietf-ipngwg-icmp-v3-04.txt, Section 3.1]
 	 */
 	src_in6 = ip6->ip6_src;
 	if (in6_setscope(&src_in6, rt->rt_ifp, &outzone)) {
 		/* XXX: this should not happen */
 		ip6stat.ip6s_cantforward++;
 		ip6stat.ip6s_badscope++;
 		m_freem(m);
 		return;
 	}
 	if (in6_setscope(&src_in6, m->m_pkthdr.rcvif, &inzone)) {
 		ip6stat.ip6s_cantforward++;
 		ip6stat.ip6s_badscope++;
 		m_freem(m);
 		return;
 	}
 	if (inzone != outzone
 #ifdef IPSEC
 	    && !ipsecrt
 #endif
 	    ) {
 		ip6stat.ip6s_cantforward++;
 		ip6stat.ip6s_badscope++;
 		in6_ifstat_inc(rt->rt_ifp, ifs6_in_discard);
 
 		if (ip6_log_time + ip6_log_interval < time_second) {
 			ip6_log_time = time_second;
 			log(LOG_DEBUG,
 			    "cannot forward "
 			    "src %s, dst %s, nxt %d, rcvif %s, outif %s\n",
 			    ip6_sprintf(&ip6->ip6_src),
 			    ip6_sprintf(&ip6->ip6_dst),
 			    ip6->ip6_nxt,
 			    if_name(m->m_pkthdr.rcvif), if_name(rt->rt_ifp));
 		}
 		if (mcopy)
 			icmp6_error(mcopy, ICMP6_DST_UNREACH,
 				    ICMP6_DST_UNREACH_BEYONDSCOPE, 0);
 		m_freem(m);
 		return;
 	}
 
 	/*
 	 * Destination scope check: if a packet is going to break the scope
 	 * zone of packet's destination address, discard it.  This case should
 	 * usually be prevented by appropriately-configured routing table, but
 	 * we need an explicit check because we may mistakenly forward the
 	 * packet to a different zone by (e.g.) a default route.
 	 */
 	dst_in6 = ip6->ip6_dst;
 	if (in6_setscope(&dst_in6, m->m_pkthdr.rcvif, &inzone) != 0 ||
 	    in6_setscope(&dst_in6, rt->rt_ifp, &outzone) != 0 ||
 	    inzone != outzone) {
 		ip6stat.ip6s_cantforward++;
 		ip6stat.ip6s_badscope++;
 		m_freem(m);
 		return;
 	}
 
 	if (m->m_pkthdr.len > IN6_LINKMTU(rt->rt_ifp)) {
 		in6_ifstat_inc(rt->rt_ifp, ifs6_in_toobig);
 		if (mcopy) {
 			u_long mtu;
 #ifdef IPSEC
 			struct secpolicy *sp;
 			int ipsecerror;
 			size_t ipsechdrsiz;
 #endif
 
 			mtu = IN6_LINKMTU(rt->rt_ifp);
 #ifdef IPSEC
 			/*
 			 * When we do IPsec tunnel ingress, we need to play
 			 * with the link value (decrement IPsec header size
 			 * from mtu value).  The code is much simpler than v4
 			 * case, as we have the outgoing interface for
 			 * encapsulated packet as "rt->rt_ifp".
 			 */
 			sp = ipsec6_getpolicybyaddr(mcopy, IPSEC_DIR_OUTBOUND,
 				IP_FORWARDING, &ipsecerror);
 			if (sp) {
 				ipsechdrsiz = ipsec6_hdrsiz(mcopy,
 					IPSEC_DIR_OUTBOUND, NULL);
 				if (ipsechdrsiz < mtu)
 					mtu -= ipsechdrsiz;
 			}
 
 			/*
 			 * if mtu becomes less than minimum MTU,
 			 * tell minimum MTU (and I'll need to fragment it).
 			 */
 			if (mtu < IPV6_MMTU)
 				mtu = IPV6_MMTU;
 #endif
 			icmp6_error(mcopy, ICMP6_PACKET_TOO_BIG, 0, mtu);
 		}
 		m_freem(m);
 		return;
 	}
 
 	if (rt->rt_flags & RTF_GATEWAY)
 		dst = (struct sockaddr_in6 *)rt->rt_gateway;
 
 	/*
 	 * If we are to forward the packet using the same interface
 	 * as one we got the packet from, perhaps we should send a redirect
 	 * to sender to shortcut a hop.
 	 * Only send redirect if source is sending directly to us,
 	 * and if packet was not source routed (or has any options).
 	 * Also, don't send redirect if forwarding using a route
 	 * modified by a redirect.
 	 */
 	if (rt->rt_ifp == m->m_pkthdr.rcvif && !srcrt &&
 #ifdef IPSEC
 	    !ipsecrt &&
 #endif
 	    (rt->rt_flags & (RTF_DYNAMIC|RTF_MODIFIED)) == 0) {
 		if ((rt->rt_ifp->if_flags & IFF_POINTOPOINT) != 0) {
 			/*
 			 * If the incoming interface is equal to the outgoing
 			 * one, and the link attached to the interface is
 			 * point-to-point, then it will be highly probable
 			 * that a routing loop occurs. Thus, we immediately
 			 * drop the packet and send an ICMPv6 error message.
 			 *
 			 * type/code is based on suggestion by Rich Draves.
 			 * not sure if it is the best pick.
 			 */
 			icmp6_error(mcopy, ICMP6_DST_UNREACH,
 				    ICMP6_DST_UNREACH_ADDR, 0);
 			m_freem(m);
 			return;
 		}
 		type = ND_REDIRECT;
 	}
 
 	/*
 	 * Check with the firewall...
 	 */
 	if (ip6_fw_enable && ip6_fw_chk_ptr) {
 		u_short port = 0;
 		/* If ipfw says divert, we have to just drop packet */
 		if ((*ip6_fw_chk_ptr)(&ip6, rt->rt_ifp, &port, &m)) {
 			m_freem(m);
 			goto freecopy;
 		}
 		if (!m)
 			goto freecopy;
 	}
 
 	/*
 	 * Fake scoped addresses. Note that even link-local source or
 	 * destinaion can appear, if the originating node just sends the
 	 * packet to us (without address resolution for the destination).
 	 * Since both icmp6_error and icmp6_redirect_output fill the embedded
 	 * link identifiers, we can do this stuff after making a copy for
 	 * returning an error.
 	 */
 	if ((rt->rt_ifp->if_flags & IFF_LOOPBACK) != 0) {
 		/*
 		 * See corresponding comments in ip6_output.
 		 * XXX: but is it possible that ip6_forward() sends a packet
 		 *      to a loopback interface? I don't think so, and thus
 		 *      I bark here. (jinmei@kame.net)
 		 * XXX: it is common to route invalid packets to loopback.
 		 *	also, the codepath will be visited on use of ::1 in
 		 *	rthdr. (itojun)
 		 */
 #if 1
 		if (0)
 #else
 		if ((rt->rt_flags & (RTF_BLACKHOLE|RTF_REJECT)) == 0)
 #endif
 		{
 			printf("ip6_forward: outgoing interface is loopback. "
 			       "src %s, dst %s, nxt %d, rcvif %s, outif %s\n",
 			       ip6_sprintf(&ip6->ip6_src),
 			       ip6_sprintf(&ip6->ip6_dst),
 			       ip6->ip6_nxt, if_name(m->m_pkthdr.rcvif),
 			       if_name(rt->rt_ifp));
 		}
 
 		/* we can just use rcvif in forwarding. */
 		origifp = m->m_pkthdr.rcvif;
 	}
 	else
 		origifp = rt->rt_ifp;
 	/*
 	 * clear embedded scope identifiers if necessary.
 	 * in6_clearscope will touch the addresses only when necessary.
 	 */
 	in6_clearscope(&ip6->ip6_src);
 	in6_clearscope(&ip6->ip6_dst);
 
 	/* Jump over all PFIL processing if hooks are not active. */
-	if (inet6_pfil_hook.ph_busy_count == -1)
+	if (!PFIL_HOOKED(&inet6_pfil_hook))
 		goto pass;
 
 	/* Run through list of hooks for output packets. */
 	error = pfil_run_hooks(&inet6_pfil_hook, &m, rt->rt_ifp, PFIL_OUT, NULL);
 	if (error != 0)
 		goto senderr;
 	if (m == NULL)
 		goto freecopy;
 	ip6 = mtod(m, struct ip6_hdr *);
 
 pass:
 	error = nd6_output(rt->rt_ifp, origifp, m, dst, rt);
 	if (error) {
 		in6_ifstat_inc(rt->rt_ifp, ifs6_out_discard);
 		ip6stat.ip6s_cantforward++;
 	} else {
 		ip6stat.ip6s_forward++;
 		in6_ifstat_inc(rt->rt_ifp, ifs6_out_forward);
 		if (type)
 			ip6stat.ip6s_redirectsent++;
 		else {
 			if (mcopy)
 				goto freecopy;
 		}
 	}
 
 senderr:
 	if (mcopy == NULL)
 		return;
 	switch (error) {
 	case 0:
 		if (type == ND_REDIRECT) {
 			icmp6_redirect_output(mcopy, rt);
 			return;
 		}
 		goto freecopy;
 
 	case EMSGSIZE:
 		/* xxx MTU is constant in PPP? */
 		goto freecopy;
 
 	case ENOBUFS:
 		/* Tell source to slow down like source quench in IP? */
 		goto freecopy;
 
 	case ENETUNREACH:	/* shouldn't happen, checked above */
 	case EHOSTUNREACH:
 	case ENETDOWN:
 	case EHOSTDOWN:
 	default:
 		type = ICMP6_DST_UNREACH;
 		code = ICMP6_DST_UNREACH_ADDR;
 		break;
 	}
 	icmp6_error(mcopy, type, code, 0);
 	return;
 
  freecopy:
 	m_freem(mcopy);
 	return;
 }
diff --git a/sys/netinet6/ip6_input.c b/sys/netinet6/ip6_input.c
index 3b345570c78c..a7f5a7c73ced 100644
--- a/sys/netinet6/ip6_input.c
+++ b/sys/netinet6/ip6_input.c
@@ -1,1610 +1,1610 @@
 /*	$FreeBSD$	*/
 /*	$KAME: ip6_input.c,v 1.259 2002/01/21 04:58:09 jinmei Exp $	*/
 
 /*-
  * Copyright (C) 1995, 1996, 1997, and 1998 WIDE Project.
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 3. Neither the name of the project nor the names of its contributors
  *    may be used to endorse or promote products derived from this software
  *    without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE PROJECT AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE PROJECT OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  */
 
 /*-
  * Copyright (c) 1982, 1986, 1988, 1993
  *	The Regents of the University of California.  All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 4. Neither the name of the University nor the names of its contributors
  *    may be used to endorse or promote products derived from this software
  *    without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  *	@(#)ip_input.c	8.2 (Berkeley) 1/4/94
  */
 
 #include "opt_ip6fw.h"
 #include "opt_inet.h"
 #include "opt_inet6.h"
 #include "opt_ipsec.h"
 
 #include <sys/param.h>
 #include <sys/systm.h>
 #include <sys/malloc.h>
 #include <sys/mbuf.h>
 #include <sys/proc.h>
 #include <sys/domain.h>
 #include <sys/protosw.h>
 #include <sys/socket.h>
 #include <sys/socketvar.h>
 #include <sys/errno.h>
 #include <sys/time.h>
 #include <sys/kernel.h>
 #include <sys/syslog.h>
 
 #include <net/if.h>
 #include <net/if_types.h>
 #include <net/if_dl.h>
 #include <net/route.h>
 #include <net/netisr.h>
 #include <net/pfil.h>
 
 #include <netinet/in.h>
 #include <netinet/in_systm.h>
 #ifdef INET
 #include <netinet/ip.h>
 #include <netinet/ip_icmp.h>
 #endif /* INET */
 #include <netinet/ip6.h>
 #include <netinet6/in6_var.h>
 #include <netinet6/ip6_var.h>
 #include <netinet/in_pcb.h>
 #include <netinet/icmp6.h>
 #include <netinet6/scope6_var.h>
 #include <netinet6/in6_ifattach.h>
 #include <netinet6/nd6.h>
 
 #ifdef IPSEC
 #include <netinet6/ipsec.h>
 #ifdef INET6
 #include <netinet6/ipsec6.h>
 #endif
 #endif
 
 #ifdef FAST_IPSEC
 #include <netipsec/ipsec.h>
 #include <netipsec/ipsec6.h>
 #define	IPSEC
 #endif /* FAST_IPSEC */
 
 #include <netinet6/ip6_fw.h>
 
 #include <netinet6/ip6protosw.h>
 
 #include <net/net_osdep.h>
 
 extern struct domain inet6domain;
 
 u_char ip6_protox[IPPROTO_MAX];
 static struct ifqueue ip6intrq;
 static int ip6qmaxlen = IFQ_MAXLEN;
 struct in6_ifaddr *in6_ifaddr;
 
 extern struct callout in6_tmpaddrtimer_ch;
 
 int ip6_forward_srcrt;			/* XXX */
 int ip6_sourcecheck;			/* XXX */
 int ip6_sourcecheck_interval;		/* XXX */
 
 int ip6_ours_check_algorithm;
 
 struct pfil_head inet6_pfil_hook;
 
 /* firewall hooks */
 ip6_fw_chk_t *ip6_fw_chk_ptr;
 ip6_fw_ctl_t *ip6_fw_ctl_ptr;
 int ip6_fw_enable = 1;
 
 struct ip6stat ip6stat;
 
 static void ip6_init2 __P((void *));
 static struct ip6aux *ip6_setdstifaddr __P((struct mbuf *, struct in6_ifaddr *));
 static int ip6_hopopts_input __P((u_int32_t *, u_int32_t *, struct mbuf **, int *));
 #ifdef PULLDOWN_TEST
 static struct mbuf *ip6_pullexthdr __P((struct mbuf *, size_t, int));
 #endif
 
 /*
  * IP6 initialization: fill in IP6 protocol switch table.
  * All protocols not implemented in kernel go to raw IP6 protocol handler.
  */
 void
 ip6_init()
 {
 	struct ip6protosw *pr;
 	int i;
 
 #ifdef DIAGNOSTIC
 	if (sizeof(struct protosw) != sizeof(struct ip6protosw))
 		panic("sizeof(protosw) != sizeof(ip6protosw)");
 #endif
 	pr = (struct ip6protosw *)pffindproto(PF_INET6, IPPROTO_RAW, SOCK_RAW);
 	if (pr == 0)
 		panic("ip6_init");
 
 	/* Initialize the entire ip_protox[] array to IPPROTO_RAW. */
 	for (i = 0; i < IPPROTO_MAX; i++)
 		ip6_protox[i] = pr - inet6sw;
 	/*
 	 * Cycle through IP protocols and put them into the appropriate place
 	 * in ip6_protox[].
 	 */
 	for (pr = (struct ip6protosw *)inet6domain.dom_protosw;
 	    pr < (struct ip6protosw *)inet6domain.dom_protoswNPROTOSW; pr++)
 		if (pr->pr_domain->dom_family == PF_INET6 &&
 		    pr->pr_protocol && pr->pr_protocol != IPPROTO_RAW) {
 			/* Be careful to only index valid IP protocols. */
 			if (pr->pr_protocol < IPPROTO_MAX)
 				ip6_protox[pr->pr_protocol] = pr - inet6sw;
 		}
 
 	/* Initialize packet filter hooks. */
 	inet6_pfil_hook.ph_type = PFIL_TYPE_AF;
 	inet6_pfil_hook.ph_af = AF_INET6;
 	if ((i = pfil_head_register(&inet6_pfil_hook)) != 0)
 		printf("%s: WARNING: unable to register pfil hook, "
 			"error %d\n", __func__, i);
 
 	ip6intrq.ifq_maxlen = ip6qmaxlen;
 	mtx_init(&ip6intrq.ifq_mtx, "ip6_inq", NULL, MTX_DEF);
 	netisr_register(NETISR_IPV6, ip6_input, &ip6intrq, 0);
 	scope6_init();
 	addrsel_policy_init();
 	nd6_init();
 	frag6_init();
 	ip6_desync_factor = arc4random() % MAX_TEMP_DESYNC_FACTOR;
 }
 
 static void
 ip6_init2(dummy)
 	void *dummy;
 {
 
 	/* nd6_timer_init */
 	callout_init(&nd6_timer_ch, 0);
 	callout_reset(&nd6_timer_ch, hz, nd6_timer, NULL);
 
 	/* timer for regeneranation of temporary addresses randomize ID */
 	callout_init(&in6_tmpaddrtimer_ch, 0);
 	callout_reset(&in6_tmpaddrtimer_ch,
 		      (ip6_temp_preferred_lifetime - ip6_desync_factor -
 		       ip6_temp_regen_advance) * hz,
 		      in6_tmpaddrtimer, NULL);
 }
 
 /* cheat */
 /* This must be after route_init(), which is now SI_ORDER_THIRD */
 SYSINIT(netinet6init2, SI_SUB_PROTO_DOMAIN, SI_ORDER_MIDDLE, ip6_init2, NULL);
 
 extern struct	route_in6 ip6_forward_rt;
 
 void
 ip6_input(m)
 	struct mbuf *m;
 {
 	struct ip6_hdr *ip6;
 	int off = sizeof(struct ip6_hdr), nest;
 	u_int32_t plen;
 	u_int32_t rtalert = ~0;
 	int nxt, ours = 0;
 	struct ifnet *deliverifp = NULL;
 	struct in6_addr odst;
 	int srcrt = 0;
 
 	GIANT_REQUIRED;			/* XXX for now */
 #ifdef IPSEC
 	/*
 	 * should the inner packet be considered authentic?
 	 * see comment in ah4_input().
 	 */
 	if (m) {
 		m->m_flags &= ~M_AUTHIPHDR;
 		m->m_flags &= ~M_AUTHIPDGM;
 	}
 #endif
 
 	/*
 	 * make sure we don't have onion peering information into m_tag.
 	 */
 	ip6_delaux(m);
 
 	/*
 	 * mbuf statistics
 	 */
 	if (m->m_flags & M_EXT) {
 		if (m->m_next)
 			ip6stat.ip6s_mext2m++;
 		else
 			ip6stat.ip6s_mext1++;
 	} else {
 #define M2MMAX	(sizeof(ip6stat.ip6s_m2m)/sizeof(ip6stat.ip6s_m2m[0]))
 		if (m->m_next) {
 			if (m->m_flags & M_LOOP) {
 				ip6stat.ip6s_m2m[loif[0].if_index]++; /* XXX */
 			} else if (m->m_pkthdr.rcvif->if_index < M2MMAX)
 				ip6stat.ip6s_m2m[m->m_pkthdr.rcvif->if_index]++;
 			else
 				ip6stat.ip6s_m2m[0]++;
 		} else
 			ip6stat.ip6s_m1++;
 #undef M2MMAX
 	}
 
 	/* drop the packet if IPv6 operation is disabled on the IF */
 	if ((ND_IFINFO(m->m_pkthdr.rcvif)->flags & ND6_IFF_IFDISABLED)) {
 		m_freem(m);
 		return;
 	}
 
 	in6_ifstat_inc(m->m_pkthdr.rcvif, ifs6_in_receive);
 	ip6stat.ip6s_total++;
 
 #ifndef PULLDOWN_TEST
 	/*
 	 * L2 bridge code and some other code can return mbuf chain
 	 * that does not conform to KAME requirement.  too bad.
 	 * XXX: fails to join if interface MTU > MCLBYTES.  jumbogram?
 	 */
 	if (m && m->m_next != NULL && m->m_pkthdr.len < MCLBYTES) {
 		struct mbuf *n;
 
 		MGETHDR(n, M_DONTWAIT, MT_HEADER);
 		if (n)
 			M_MOVE_PKTHDR(n, m);
 		if (n && n->m_pkthdr.len > MHLEN) {
 			MCLGET(n, M_DONTWAIT);
 			if ((n->m_flags & M_EXT) == 0) {
 				m_freem(n);
 				n = NULL;
 			}
 		}
 		if (n == NULL) {
 			m_freem(m);
 			return;	/* ENOBUFS */
 		}
 
 		m_copydata(m, 0, n->m_pkthdr.len, mtod(n, caddr_t));
 		n->m_len = n->m_pkthdr.len;
 		m_freem(m);
 		m = n;
 	}
 	IP6_EXTHDR_CHECK(m, 0, sizeof(struct ip6_hdr), /* nothing */);
 #endif
 
 	if (m->m_len < sizeof(struct ip6_hdr)) {
 		struct ifnet *inifp;
 		inifp = m->m_pkthdr.rcvif;
 		if ((m = m_pullup(m, sizeof(struct ip6_hdr))) == NULL) {
 			ip6stat.ip6s_toosmall++;
 			in6_ifstat_inc(inifp, ifs6_in_hdrerr);
 			return;
 		}
 	}
 
 	ip6 = mtod(m, struct ip6_hdr *);
 
 	if ((ip6->ip6_vfc & IPV6_VERSION_MASK) != IPV6_VERSION) {
 		ip6stat.ip6s_badvers++;
 		in6_ifstat_inc(m->m_pkthdr.rcvif, ifs6_in_hdrerr);
 		goto bad;
 	}
 
 	ip6stat.ip6s_nxthist[ip6->ip6_nxt]++;
 
 	/*
 	 * Check against address spoofing/corruption.
 	 */
 	if (IN6_IS_ADDR_MULTICAST(&ip6->ip6_src) ||
 	    IN6_IS_ADDR_UNSPECIFIED(&ip6->ip6_dst)) {
 		/*
 		 * XXX: "badscope" is not very suitable for a multicast source.
 		 */
 		ip6stat.ip6s_badscope++;
 		in6_ifstat_inc(m->m_pkthdr.rcvif, ifs6_in_addrerr);
 		goto bad;
 	}
 	if (IN6_IS_ADDR_MC_INTFACELOCAL(&ip6->ip6_dst) &&
 	    !(m->m_flags & M_LOOP)) {
 		/*
 		 * In this case, the packet should come from the loopback
 		 * interface.  However, we cannot just check the if_flags,
 		 * because ip6_mloopback() passes the "actual" interface
 		 * as the outgoing/incoming interface.
 		 */
 		ip6stat.ip6s_badscope++;
 		in6_ifstat_inc(m->m_pkthdr.rcvif, ifs6_in_addrerr);
 		goto bad;
 	}
 
 #ifdef ALTQ
 	if (altq_input != NULL && (*altq_input)(m, AF_INET6) == 0) {
 		/* packet is dropped by traffic conditioner */
 		return;
 	}
 #endif
 	/*
 	 * The following check is not documented in specs.  A malicious
 	 * party may be able to use IPv4 mapped addr to confuse tcp/udp stack
 	 * and bypass security checks (act as if it was from 127.0.0.1 by using
 	 * IPv6 src ::ffff:127.0.0.1).  Be cautious.
 	 *
 	 * This check chokes if we are in an SIIT cloud.  As none of BSDs
 	 * support IPv4-less kernel compilation, we cannot support SIIT
 	 * environment at all.  So, it makes more sense for us to reject any
 	 * malicious packets for non-SIIT environment, than try to do a
 	 * partial support for SIIT environment.
 	 */
 	if (IN6_IS_ADDR_V4MAPPED(&ip6->ip6_src) ||
 	    IN6_IS_ADDR_V4MAPPED(&ip6->ip6_dst)) {
 		ip6stat.ip6s_badscope++;
 		in6_ifstat_inc(m->m_pkthdr.rcvif, ifs6_in_addrerr);
 		goto bad;
 	}
 #if 0
 	/*
 	 * Reject packets with IPv4 compatible addresses (auto tunnel).
 	 *
 	 * The code forbids auto tunnel relay case in RFC1933 (the check is
 	 * stronger than RFC1933).  We may want to re-enable it if mech-xx
 	 * is revised to forbid relaying case.
 	 */
 	if (IN6_IS_ADDR_V4COMPAT(&ip6->ip6_src) ||
 	    IN6_IS_ADDR_V4COMPAT(&ip6->ip6_dst)) {
 		ip6stat.ip6s_badscope++;
 		in6_ifstat_inc(m->m_pkthdr.rcvif, ifs6_in_addrerr);
 		goto bad;
 	}
 #endif
 
 	/*
 	 * Run through list of hooks for input packets.
 	 *
 	 * NB: Beware of the destination address changing
 	 *     (e.g. by NAT rewriting).  When this happens,
 	 *     tell ip6_forward to do the right thing.
 	 */
 	odst = ip6->ip6_dst;
 
 	/* Jump over all PFIL processing if hooks are not active. */
-	if (inet6_pfil_hook.ph_busy_count == -1)
+	if (!PFIL_HOOKED(&inet6_pfil_hook))
 		goto passin;
 
 	if (pfil_run_hooks(&inet6_pfil_hook, &m, m->m_pkthdr.rcvif, PFIL_IN, NULL))
 		return;
 	if (m == NULL)			/* consumed by filter */
 		return;
 	ip6 = mtod(m, struct ip6_hdr *);
 	srcrt = !IN6_ARE_ADDR_EQUAL(&odst, &ip6->ip6_dst);
 
 passin:
 	/*
 	 * Check with the firewall...
 	 */
 	if (ip6_fw_enable && ip6_fw_chk_ptr) {
 		u_short port = 0;
 		/* If ipfw says divert, we have to just drop packet */
 		/* use port as a dummy argument */
 		if ((*ip6_fw_chk_ptr)(&ip6, NULL, &port, &m)) {
 			m_freem(m);
 			m = NULL;
 		}
 		if (!m)
 			return;
 	}
 
 	/*
 	 * Disambiguate address scope zones (if there is ambiguity).
 	 * We first make sure that the original source or destination address
 	 * is not in our internal form for scoped addresses.  Such addresses
 	 * are not necessarily invalid spec-wise, but we cannot accept them due
 	 * to the usage conflict.
 	 * in6_setscope() then also checks and rejects the cases where src or
 	 * dst are the loopback address and the receiving interface
 	 * is not loopback.
 	 */
 	if (in6_clearscope(&ip6->ip6_src) || in6_clearscope(&ip6->ip6_dst)) {
 		ip6stat.ip6s_badscope++; /* XXX */
 		goto bad;
 	}
 	if (in6_setscope(&ip6->ip6_src, m->m_pkthdr.rcvif, NULL) ||
 	    in6_setscope(&ip6->ip6_dst, m->m_pkthdr.rcvif, NULL)) {
 		ip6stat.ip6s_badscope++;
 		goto bad;
 	}
 
 	/*
 	 * Multicast check
 	 */
 	if (IN6_IS_ADDR_MULTICAST(&ip6->ip6_dst)) {
 	  	struct in6_multi *in6m = 0;
 
 		in6_ifstat_inc(m->m_pkthdr.rcvif, ifs6_in_mcast);
 		/*
 		 * See if we belong to the destination multicast group on the
 		 * arrival interface.
 		 */
 		IN6_LOOKUP_MULTI(ip6->ip6_dst, m->m_pkthdr.rcvif, in6m);
 		if (in6m)
 			ours = 1;
 		else if (!ip6_mrouter) {
 			ip6stat.ip6s_notmember++;
 			ip6stat.ip6s_cantforward++;
 			in6_ifstat_inc(m->m_pkthdr.rcvif, ifs6_in_discard);
 			goto bad;
 		}
 		deliverifp = m->m_pkthdr.rcvif;
 		goto hbhcheck;
 	}
 
 	/*
 	 *  Unicast check
 	 */
 	if (ip6_forward_rt.ro_rt != NULL &&
 	    (ip6_forward_rt.ro_rt->rt_flags & RTF_UP) != 0 &&
 	    IN6_ARE_ADDR_EQUAL(&ip6->ip6_dst,
 	    &((struct sockaddr_in6 *)(&ip6_forward_rt.ro_dst))->sin6_addr))
 		ip6stat.ip6s_forward_cachehit++;
 	else {
 		struct sockaddr_in6 *dst6;
 
 		if (ip6_forward_rt.ro_rt) {
 			/* route is down or destination is different */
 			ip6stat.ip6s_forward_cachemiss++;
 			RTFREE(ip6_forward_rt.ro_rt);
 			ip6_forward_rt.ro_rt = 0;
 		}
 
 		bzero(&ip6_forward_rt.ro_dst, sizeof(struct sockaddr_in6));
 		dst6 = (struct sockaddr_in6 *)&ip6_forward_rt.ro_dst;
 		dst6->sin6_len = sizeof(struct sockaddr_in6);
 		dst6->sin6_family = AF_INET6;
 		dst6->sin6_addr = ip6->ip6_dst;
 
 		rtalloc((struct route *)&ip6_forward_rt);
 	}
 
 #define rt6_key(r) ((struct sockaddr_in6 *)((r)->rt_nodes->rn_key))
 
 	/*
 	 * Accept the packet if the forwarding interface to the destination
 	 * according to the routing table is the loopback interface,
 	 * unless the associated route has a gateway.
 	 * Note that this approach causes to accept a packet if there is a
 	 * route to the loopback interface for the destination of the packet.
 	 * But we think it's even useful in some situations, e.g. when using
 	 * a special daemon which wants to intercept the packet.
 	 *
 	 * XXX: some OSes automatically make a cloned route for the destination
 	 * of an outgoing packet.  If the outgoing interface of the packet
 	 * is a loopback one, the kernel would consider the packet to be
 	 * accepted, even if we have no such address assinged on the interface.
 	 * We check the cloned flag of the route entry to reject such cases,
 	 * assuming that route entries for our own addresses are not made by
 	 * cloning (it should be true because in6_addloop explicitly installs
 	 * the host route).  However, we might have to do an explicit check
 	 * while it would be less efficient.  Or, should we rather install a
 	 * reject route for such a case?
 	 */
 	if (ip6_forward_rt.ro_rt &&
 	    (ip6_forward_rt.ro_rt->rt_flags &
 	     (RTF_HOST|RTF_GATEWAY)) == RTF_HOST &&
 #ifdef RTF_WASCLONED
 	    !(ip6_forward_rt.ro_rt->rt_flags & RTF_WASCLONED) &&
 #endif
 #ifdef RTF_CLONED
 	    !(ip6_forward_rt.ro_rt->rt_flags & RTF_CLONED) &&
 #endif
 #if 0
 	    /*
 	     * The check below is redundant since the comparison of
 	     * the destination and the key of the rtentry has
 	     * already done through looking up the routing table.
 	     */
 	    IN6_ARE_ADDR_EQUAL(&ip6->ip6_dst,
 	    &rt6_key(ip6_forward_rt.ro_rt)->sin6_addr)
 #endif
 	    ip6_forward_rt.ro_rt->rt_ifp->if_type == IFT_LOOP) {
 		struct in6_ifaddr *ia6 =
 			(struct in6_ifaddr *)ip6_forward_rt.ro_rt->rt_ifa;
 
 		/*
 		 * record address information into m_tag.
 		 */
 		(void)ip6_setdstifaddr(m, ia6);
 
 		/*
 		 * packets to a tentative, duplicated, or somehow invalid
 		 * address must not be accepted.
 		 */
 		if (!(ia6->ia6_flags & IN6_IFF_NOTREADY)) {
 			/* this address is ready */
 			ours = 1;
 			deliverifp = ia6->ia_ifp;	/* correct? */
 			/* Count the packet in the ip address stats */
 			ia6->ia_ifa.if_ipackets++;
 			ia6->ia_ifa.if_ibytes += m->m_pkthdr.len;
 			goto hbhcheck;
 		} else {
 			/* address is not ready, so discard the packet. */
 			nd6log((LOG_INFO,
 			    "ip6_input: packet to an unready address %s->%s\n",
 			    ip6_sprintf(&ip6->ip6_src),
 			    ip6_sprintf(&ip6->ip6_dst)));
 
 			goto bad;
 		}
 	}
 
 	/*
 	 * FAITH (Firewall Aided Internet Translator)
 	 */
 	if (ip6_keepfaith) {
 		if (ip6_forward_rt.ro_rt && ip6_forward_rt.ro_rt->rt_ifp
 		 && ip6_forward_rt.ro_rt->rt_ifp->if_type == IFT_FAITH) {
 			/* XXX do we need more sanity checks? */
 			ours = 1;
 			deliverifp = ip6_forward_rt.ro_rt->rt_ifp; /* faith */
 			goto hbhcheck;
 		}
 	}
 
 	/*
 	 * Now there is no reason to process the packet if it's not our own
 	 * and we're not a router.
 	 */
 	if (!ip6_forwarding) {
 		ip6stat.ip6s_cantforward++;
 		in6_ifstat_inc(m->m_pkthdr.rcvif, ifs6_in_discard);
 		goto bad;
 	}
 
   hbhcheck:
 	/*
 	 * record address information into m_tag, if we don't have one yet.
 	 * note that we are unable to record it, if the address is not listed
 	 * as our interface address (e.g. multicast addresses, addresses
 	 * within FAITH prefixes and such).
 	 */
 	if (deliverifp && !ip6_getdstifaddr(m)) {
 		struct in6_ifaddr *ia6;
 
 		ia6 = in6_ifawithifp(deliverifp, &ip6->ip6_dst);
 		if (ia6) {
 			if (!ip6_setdstifaddr(m, ia6)) {
 				/*
 				 * XXX maybe we should drop the packet here,
 				 * as we could not provide enough information
 				 * to the upper layers.
 				 */
 			}
 		}
 	}
 
 	/*
 	 * Process Hop-by-Hop options header if it's contained.
 	 * m may be modified in ip6_hopopts_input().
 	 * If a JumboPayload option is included, plen will also be modified.
 	 */
 	plen = (u_int32_t)ntohs(ip6->ip6_plen);
 	if (ip6->ip6_nxt == IPPROTO_HOPOPTS) {
 		struct ip6_hbh *hbh;
 
 		if (ip6_hopopts_input(&plen, &rtalert, &m, &off)) {
 #if 0	/*touches NULL pointer*/
 			in6_ifstat_inc(m->m_pkthdr.rcvif, ifs6_in_discard);
 #endif
 			return;	/* m have already been freed */
 		}
 
 		/* adjust pointer */
 		ip6 = mtod(m, struct ip6_hdr *);
 
 		/*
 		 * if the payload length field is 0 and the next header field
 		 * indicates Hop-by-Hop Options header, then a Jumbo Payload
 		 * option MUST be included.
 		 */
 		if (ip6->ip6_plen == 0 && plen == 0) {
 			/*
 			 * Note that if a valid jumbo payload option is
 			 * contained, ip6_hopopts_input() must set a valid
 			 * (non-zero) payload length to the variable plen.
 			 */
 			ip6stat.ip6s_badoptions++;
 			in6_ifstat_inc(m->m_pkthdr.rcvif, ifs6_in_discard);
 			in6_ifstat_inc(m->m_pkthdr.rcvif, ifs6_in_hdrerr);
 			icmp6_error(m, ICMP6_PARAM_PROB,
 				    ICMP6_PARAMPROB_HEADER,
 				    (caddr_t)&ip6->ip6_plen - (caddr_t)ip6);
 			return;
 		}
 #ifndef PULLDOWN_TEST
 		/* ip6_hopopts_input() ensures that mbuf is contiguous */
 		hbh = (struct ip6_hbh *)(ip6 + 1);
 #else
 		IP6_EXTHDR_GET(hbh, struct ip6_hbh *, m, sizeof(struct ip6_hdr),
 			sizeof(struct ip6_hbh));
 		if (hbh == NULL) {
 			ip6stat.ip6s_tooshort++;
 			return;
 		}
 #endif
 		nxt = hbh->ip6h_nxt;
 
 		/*
 		 * accept the packet if a router alert option is included
 		 * and we act as an IPv6 router.
 		 */
 		if (rtalert != ~0 && ip6_forwarding)
 			ours = 1;
 	} else
 		nxt = ip6->ip6_nxt;
 
 	/*
 	 * Check that the amount of data in the buffers
 	 * is as at least much as the IPv6 header would have us expect.
 	 * Trim mbufs if longer than we expect.
 	 * Drop packet if shorter than we expect.
 	 */
 	if (m->m_pkthdr.len - sizeof(struct ip6_hdr) < plen) {
 		ip6stat.ip6s_tooshort++;
 		in6_ifstat_inc(m->m_pkthdr.rcvif, ifs6_in_truncated);
 		goto bad;
 	}
 	if (m->m_pkthdr.len > sizeof(struct ip6_hdr) + plen) {
 		if (m->m_len == m->m_pkthdr.len) {
 			m->m_len = sizeof(struct ip6_hdr) + plen;
 			m->m_pkthdr.len = sizeof(struct ip6_hdr) + plen;
 		} else
 			m_adj(m, sizeof(struct ip6_hdr) + plen - m->m_pkthdr.len);
 	}
 
 	/*
 	 * Forward if desirable.
 	 */
 	if (IN6_IS_ADDR_MULTICAST(&ip6->ip6_dst)) {
 		/*
 		 * If we are acting as a multicast router, all
 		 * incoming multicast packets are passed to the
 		 * kernel-level multicast forwarding function.
 		 * The packet is returned (relatively) intact; if
 		 * ip6_mforward() returns a non-zero value, the packet
 		 * must be discarded, else it may be accepted below.
 		 */
 		if (ip6_mrouter && ip6_mforward(ip6, m->m_pkthdr.rcvif, m)) {
 			ip6stat.ip6s_cantforward++;
 			m_freem(m);
 			return;
 		}
 		if (!ours) {
 			m_freem(m);
 			return;
 		}
 	} else if (!ours) {
 		ip6_forward(m, srcrt);
 		return;
 	}
 
 	ip6 = mtod(m, struct ip6_hdr *);
 
 	/*
 	 * Malicious party may be able to use IPv4 mapped addr to confuse
 	 * tcp/udp stack and bypass security checks (act as if it was from
 	 * 127.0.0.1 by using IPv6 src ::ffff:127.0.0.1).  Be cautious.
 	 *
 	 * For SIIT end node behavior, you may want to disable the check.
 	 * However, you will  become vulnerable to attacks using IPv4 mapped
 	 * source.
 	 */
 	if (IN6_IS_ADDR_V4MAPPED(&ip6->ip6_src) ||
 	    IN6_IS_ADDR_V4MAPPED(&ip6->ip6_dst)) {
 		ip6stat.ip6s_badscope++;
 		in6_ifstat_inc(m->m_pkthdr.rcvif, ifs6_in_addrerr);
 		goto bad;
 	}
 
 	/*
 	 * Tell launch routine the next header
 	 */
 	ip6stat.ip6s_delivered++;
 	in6_ifstat_inc(deliverifp, ifs6_in_deliver);
 	nest = 0;
 
 	while (nxt != IPPROTO_DONE) {
 		if (ip6_hdrnestlimit && (++nest > ip6_hdrnestlimit)) {
 			ip6stat.ip6s_toomanyhdr++;
 			goto bad;
 		}
 
 		/*
 		 * protection against faulty packet - there should be
 		 * more sanity checks in header chain processing.
 		 */
 		if (m->m_pkthdr.len < off) {
 			ip6stat.ip6s_tooshort++;
 			in6_ifstat_inc(m->m_pkthdr.rcvif, ifs6_in_truncated);
 			goto bad;
 		}
 
 #ifdef IPSEC
 		/*
 		 * enforce IPsec policy checking if we are seeing last header.
 		 * note that we do not visit this with protocols with pcb layer
 		 * code - like udp/tcp/raw ip.
 		 */
 		if ((inet6sw[ip6_protox[nxt]].pr_flags & PR_LASTHDR) != 0 &&
 		    ipsec6_in_reject(m, NULL)) {
 			ipsec6stat.in_polvio++;
 			goto bad;
 		}
 #endif
 		nxt = (*inet6sw[ip6_protox[nxt]].pr_input)(&m, &off, nxt);
 	}
 	return;
  bad:
 	m_freem(m);
 }
 
 /*
  * set/grab in6_ifaddr correspond to IPv6 destination address.
  * XXX backward compatibility wrapper
  */
 static struct ip6aux *
 ip6_setdstifaddr(m, ia6)
 	struct mbuf *m;
 	struct in6_ifaddr *ia6;
 {
 	struct ip6aux *ip6a;
 
 	ip6a = ip6_addaux(m);
 	if (ip6a)
 		ip6a->ip6a_dstia6 = ia6;
 	return ip6a;	/* NULL if failed to set */
 }
 
 struct in6_ifaddr *
 ip6_getdstifaddr(m)
 	struct mbuf *m;
 {
 	struct ip6aux *ip6a;
 
 	ip6a = ip6_findaux(m);
 	if (ip6a)
 		return ip6a->ip6a_dstia6;
 	else
 		return NULL;
 }
 
 /*
  * Hop-by-Hop options header processing. If a valid jumbo payload option is
  * included, the real payload length will be stored in plenp.
  */
 static int
 ip6_hopopts_input(plenp, rtalertp, mp, offp)
 	u_int32_t *plenp;
 	u_int32_t *rtalertp;	/* XXX: should be stored more smart way */
 	struct mbuf **mp;
 	int *offp;
 {
 	struct mbuf *m = *mp;
 	int off = *offp, hbhlen;
 	struct ip6_hbh *hbh;
 	u_int8_t *opt;
 
 	/* validation of the length of the header */
 #ifndef PULLDOWN_TEST
 	IP6_EXTHDR_CHECK(m, off, sizeof(*hbh), -1);
 	hbh = (struct ip6_hbh *)(mtod(m, caddr_t) + off);
 	hbhlen = (hbh->ip6h_len + 1) << 3;
 
 	IP6_EXTHDR_CHECK(m, off, hbhlen, -1);
 	hbh = (struct ip6_hbh *)(mtod(m, caddr_t) + off);
 #else
 	IP6_EXTHDR_GET(hbh, struct ip6_hbh *, m,
 		sizeof(struct ip6_hdr), sizeof(struct ip6_hbh));
 	if (hbh == NULL) {
 		ip6stat.ip6s_tooshort++;
 		return -1;
 	}
 	hbhlen = (hbh->ip6h_len + 1) << 3;
 	IP6_EXTHDR_GET(hbh, struct ip6_hbh *, m, sizeof(struct ip6_hdr),
 		hbhlen);
 	if (hbh == NULL) {
 		ip6stat.ip6s_tooshort++;
 		return -1;
 	}
 #endif
 	off += hbhlen;
 	hbhlen -= sizeof(struct ip6_hbh);
 	opt = (u_int8_t *)hbh + sizeof(struct ip6_hbh);
 
 	if (ip6_process_hopopts(m, (u_int8_t *)hbh + sizeof(struct ip6_hbh),
 				hbhlen, rtalertp, plenp) < 0)
 		return (-1);
 
 	*offp = off;
 	*mp = m;
 	return (0);
 }
 
 /*
  * Search header for all Hop-by-hop options and process each option.
  * This function is separate from ip6_hopopts_input() in order to
  * handle a case where the sending node itself process its hop-by-hop
  * options header. In such a case, the function is called from ip6_output().
  *
  * The function assumes that hbh header is located right after the IPv6 header
  * (RFC2460 p7), opthead is pointer into data content in m, and opthead to
  * opthead + hbhlen is located in continuous memory region.
  */
 int
 ip6_process_hopopts(m, opthead, hbhlen, rtalertp, plenp)
 	struct mbuf *m;
 	u_int8_t *opthead;
 	int hbhlen;
 	u_int32_t *rtalertp;
 	u_int32_t *plenp;
 {
 	struct ip6_hdr *ip6;
 	int optlen = 0;
 	u_int8_t *opt = opthead;
 	u_int16_t rtalert_val;
 	u_int32_t jumboplen;
 	const int erroff = sizeof(struct ip6_hdr) + sizeof(struct ip6_hbh);
 
 	for (; hbhlen > 0; hbhlen -= optlen, opt += optlen) {
 		switch (*opt) {
 		case IP6OPT_PAD1:
 			optlen = 1;
 			break;
 		case IP6OPT_PADN:
 			if (hbhlen < IP6OPT_MINLEN) {
 				ip6stat.ip6s_toosmall++;
 				goto bad;
 			}
 			optlen = *(opt + 1) + 2;
 			break;
 		case IP6OPT_ROUTER_ALERT:
 			/* XXX may need check for alignment */
 			if (hbhlen < IP6OPT_RTALERT_LEN) {
 				ip6stat.ip6s_toosmall++;
 				goto bad;
 			}
 			if (*(opt + 1) != IP6OPT_RTALERT_LEN - 2) {
 				/* XXX stat */
 				icmp6_error(m, ICMP6_PARAM_PROB,
 				    ICMP6_PARAMPROB_HEADER,
 				    erroff + opt + 1 - opthead);
 				return (-1);
 			}
 			optlen = IP6OPT_RTALERT_LEN;
 			bcopy((caddr_t)(opt + 2), (caddr_t)&rtalert_val, 2);
 			*rtalertp = ntohs(rtalert_val);
 			break;
 		case IP6OPT_JUMBO:
 			/* XXX may need check for alignment */
 			if (hbhlen < IP6OPT_JUMBO_LEN) {
 				ip6stat.ip6s_toosmall++;
 				goto bad;
 			}
 			if (*(opt + 1) != IP6OPT_JUMBO_LEN - 2) {
 				/* XXX stat */
 				icmp6_error(m, ICMP6_PARAM_PROB,
 				    ICMP6_PARAMPROB_HEADER,
 				    erroff + opt + 1 - opthead);
 				return (-1);
 			}
 			optlen = IP6OPT_JUMBO_LEN;
 
 			/*
 			 * IPv6 packets that have non 0 payload length
 			 * must not contain a jumbo payload option.
 			 */
 			ip6 = mtod(m, struct ip6_hdr *);
 			if (ip6->ip6_plen) {
 				ip6stat.ip6s_badoptions++;
 				icmp6_error(m, ICMP6_PARAM_PROB,
 				    ICMP6_PARAMPROB_HEADER,
 				    erroff + opt - opthead);
 				return (-1);
 			}
 
 			/*
 			 * We may see jumbolen in unaligned location, so
 			 * we'd need to perform bcopy().
 			 */
 			bcopy(opt + 2, &jumboplen, sizeof(jumboplen));
 			jumboplen = (u_int32_t)htonl(jumboplen);
 
 #if 1
 			/*
 			 * if there are multiple jumbo payload options,
 			 * *plenp will be non-zero and the packet will be
 			 * rejected.
 			 * the behavior may need some debate in ipngwg -
 			 * multiple options does not make sense, however,
 			 * there's no explicit mention in specification.
 			 */
 			if (*plenp != 0) {
 				ip6stat.ip6s_badoptions++;
 				icmp6_error(m, ICMP6_PARAM_PROB,
 				    ICMP6_PARAMPROB_HEADER,
 				    erroff + opt + 2 - opthead);
 				return (-1);
 			}
 #endif
 
 			/*
 			 * jumbo payload length must be larger than 65535.
 			 */
 			if (jumboplen <= IPV6_MAXPACKET) {
 				ip6stat.ip6s_badoptions++;
 				icmp6_error(m, ICMP6_PARAM_PROB,
 				    ICMP6_PARAMPROB_HEADER,
 				    erroff + opt + 2 - opthead);
 				return (-1);
 			}
 			*plenp = jumboplen;
 
 			break;
 		default:		/* unknown option */
 			if (hbhlen < IP6OPT_MINLEN) {
 				ip6stat.ip6s_toosmall++;
 				goto bad;
 			}
 			optlen = ip6_unknown_opt(opt, m,
 			    erroff + opt - opthead);
 			if (optlen == -1)
 				return (-1);
 			optlen += 2;
 			break;
 		}
 	}
 
 	return (0);
 
   bad:
 	m_freem(m);
 	return (-1);
 }
 
 /*
  * Unknown option processing.
  * The third argument `off' is the offset from the IPv6 header to the option,
  * which is necessary if the IPv6 header the and option header and IPv6 header
  * is not continuous in order to return an ICMPv6 error.
  */
 int
 ip6_unknown_opt(optp, m, off)
 	u_int8_t *optp;
 	struct mbuf *m;
 	int off;
 {
 	struct ip6_hdr *ip6;
 
 	switch (IP6OPT_TYPE(*optp)) {
 	case IP6OPT_TYPE_SKIP: /* ignore the option */
 		return ((int)*(optp + 1));
 	case IP6OPT_TYPE_DISCARD:	/* silently discard */
 		m_freem(m);
 		return (-1);
 	case IP6OPT_TYPE_FORCEICMP: /* send ICMP even if multicasted */
 		ip6stat.ip6s_badoptions++;
 		icmp6_error(m, ICMP6_PARAM_PROB, ICMP6_PARAMPROB_OPTION, off);
 		return (-1);
 	case IP6OPT_TYPE_ICMP: /* send ICMP if not multicasted */
 		ip6stat.ip6s_badoptions++;
 		ip6 = mtod(m, struct ip6_hdr *);
 		if (IN6_IS_ADDR_MULTICAST(&ip6->ip6_dst) ||
 		    (m->m_flags & (M_BCAST|M_MCAST)))
 			m_freem(m);
 		else
 			icmp6_error(m, ICMP6_PARAM_PROB,
 				    ICMP6_PARAMPROB_OPTION, off);
 		return (-1);
 	}
 
 	m_freem(m);		/* XXX: NOTREACHED */
 	return (-1);
 }
 
 /*
  * Create the "control" list for this pcb.
  * The function will not modify mbuf chain at all.
  *
  * with KAME mbuf chain restriction:
  * The routine will be called from upper layer handlers like tcp6_input().
  * Thus the routine assumes that the caller (tcp6_input) have already
  * called IP6_EXTHDR_CHECK() and all the extension headers are located in the
  * very first mbuf on the mbuf chain.
  */
 void
 ip6_savecontrol(in6p, m, mp)
 	struct inpcb *in6p;
 	struct mbuf *m, **mp;
 {
 #define IS2292(x, y)	((in6p->in6p_flags & IN6P_RFC2292) ? (x) : (y))
 	struct ip6_hdr *ip6 = mtod(m, struct ip6_hdr *);
 
 #ifdef SO_TIMESTAMP
 	if ((in6p->in6p_socket->so_options & SO_TIMESTAMP) != 0) {
 		struct timeval tv;
 
 		microtime(&tv);
 		*mp = sbcreatecontrol((caddr_t) &tv, sizeof(tv),
 		    SCM_TIMESTAMP, SOL_SOCKET);
 		if (*mp)
 			mp = &(*mp)->m_next;
 	}
 #endif
 
 	if ((ip6->ip6_vfc & IPV6_VERSION_MASK) != IPV6_VERSION)
 		return;
 
 	/* RFC 2292 sec. 5 */
 	if ((in6p->in6p_flags & IN6P_PKTINFO) != 0) {
 		struct in6_pktinfo pi6;
 
 		bcopy(&ip6->ip6_dst, &pi6.ipi6_addr, sizeof(struct in6_addr));
 		in6_clearscope(&pi6.ipi6_addr);	/* XXX */
 		pi6.ipi6_ifindex =
 		    (m && m->m_pkthdr.rcvif) ? m->m_pkthdr.rcvif->if_index : 0;
 
 		*mp = sbcreatecontrol((caddr_t) &pi6,
 		    sizeof(struct in6_pktinfo),
 		    IS2292(IPV6_2292PKTINFO, IPV6_PKTINFO), IPPROTO_IPV6);
 		if (*mp)
 			mp = &(*mp)->m_next;
 	}
 
 	if ((in6p->in6p_flags & IN6P_HOPLIMIT) != 0) {
 		int hlim = ip6->ip6_hlim & 0xff;
 
 		*mp = sbcreatecontrol((caddr_t) &hlim, sizeof(int),
 		    IS2292(IPV6_2292HOPLIMIT, IPV6_HOPLIMIT), IPPROTO_IPV6);
 		if (*mp)
 			mp = &(*mp)->m_next;
 	}
 
 	if ((in6p->in6p_flags & IN6P_TCLASS) != 0) {
 		u_int32_t flowinfo;
 		int tclass;
 
 		flowinfo = (u_int32_t)ntohl(ip6->ip6_flow & IPV6_FLOWINFO_MASK);
 		flowinfo >>= 20;
 
 		tclass = flowinfo & 0xff;
 		*mp = sbcreatecontrol((caddr_t) &tclass, sizeof(tclass),
 		    IPV6_TCLASS, IPPROTO_IPV6);
 		if (*mp)
 			mp = &(*mp)->m_next;
 	}
 
 	/*
 	 * IPV6_HOPOPTS socket option.  Recall that we required super-user
 	 * privilege for the option (see ip6_ctloutput), but it might be too
 	 * strict, since there might be some hop-by-hop options which can be
 	 * returned to normal user.
 	 * See also RFC 2292 section 6 (or RFC 3542 section 8).
 	 */
 	if ((in6p->in6p_flags & IN6P_HOPOPTS) != 0) {
 		/*
 		 * Check if a hop-by-hop options header is contatined in the
 		 * received packet, and if so, store the options as ancillary
 		 * data. Note that a hop-by-hop options header must be
 		 * just after the IPv6 header, which is assured through the
 		 * IPv6 input processing.
 		 */
 		if (ip6->ip6_nxt == IPPROTO_HOPOPTS) {
 			struct ip6_hbh *hbh;
 			int hbhlen = 0;
 #ifdef PULLDOWN_TEST
 			struct mbuf *ext;
 #endif
 
 #ifndef PULLDOWN_TEST
 			hbh = (struct ip6_hbh *)(ip6 + 1);
 			hbhlen = (hbh->ip6h_len + 1) << 3;
 #else
 			ext = ip6_pullexthdr(m, sizeof(struct ip6_hdr),
 			    ip6->ip6_nxt);
 			if (ext == NULL) {
 				ip6stat.ip6s_tooshort++;
 				return;
 			}
 			hbh = mtod(ext, struct ip6_hbh *);
 			hbhlen = (hbh->ip6h_len + 1) << 3;
 			if (hbhlen != ext->m_len) {
 				m_freem(ext);
 				ip6stat.ip6s_tooshort++;
 				return;
 			}
 #endif
 
 			/*
 			 * XXX: We copy the whole header even if a
 			 * jumbo payload option is included, the option which
 			 * is to be removed before returning according to
 			 * RFC2292.
 			 * Note: this constraint is removed in RFC3542
 			 */
 			*mp = sbcreatecontrol((caddr_t)hbh, hbhlen,
 			    IS2292(IPV6_2292HOPOPTS, IPV6_HOPOPTS),
 			    IPPROTO_IPV6);
 			if (*mp)
 				mp = &(*mp)->m_next;
 #ifdef PULLDOWN_TEST
 			m_freem(ext);
 #endif
 		}
 	}
 
 	if ((in6p->in6p_flags & (IN6P_RTHDR | IN6P_DSTOPTS)) != 0) {
 		int nxt = ip6->ip6_nxt, off = sizeof(struct ip6_hdr);
 
 		/*
 		 * Search for destination options headers or routing
 		 * header(s) through the header chain, and stores each
 		 * header as ancillary data.
 		 * Note that the order of the headers remains in
 		 * the chain of ancillary data.
 		 */
 		while (1) {	/* is explicit loop prevention necessary? */
 			struct ip6_ext *ip6e = NULL;
 			int elen;
 #ifdef PULLDOWN_TEST
 			struct mbuf *ext = NULL;
 #endif
 
 			/*
 			 * if it is not an extension header, don't try to
 			 * pull it from the chain.
 			 */
 			switch (nxt) {
 			case IPPROTO_DSTOPTS:
 			case IPPROTO_ROUTING:
 			case IPPROTO_HOPOPTS:
 			case IPPROTO_AH: /* is it possible? */
 				break;
 			default:
 				goto loopend;
 			}
 
 #ifndef PULLDOWN_TEST
 			if (off + sizeof(*ip6e) > m->m_len)
 				goto loopend;
 			ip6e = (struct ip6_ext *)(mtod(m, caddr_t) + off);
 			if (nxt == IPPROTO_AH)
 				elen = (ip6e->ip6e_len + 2) << 2;
 			else
 				elen = (ip6e->ip6e_len + 1) << 3;
 			if (off + elen > m->m_len)
 				goto loopend;
 #else
 			ext = ip6_pullexthdr(m, off, nxt);
 			if (ext == NULL) {
 				ip6stat.ip6s_tooshort++;
 				return;
 			}
 			ip6e = mtod(ext, struct ip6_ext *);
 			if (nxt == IPPROTO_AH)
 				elen = (ip6e->ip6e_len + 2) << 2;
 			else
 				elen = (ip6e->ip6e_len + 1) << 3;
 			if (elen != ext->m_len) {
 				m_freem(ext);
 				ip6stat.ip6s_tooshort++;
 				return;
 			}
 #endif
 
 			switch (nxt) {
 			case IPPROTO_DSTOPTS:
 				if (!(in6p->in6p_flags & IN6P_DSTOPTS))
 					break;
 
 				*mp = sbcreatecontrol((caddr_t)ip6e, elen,
 				    IS2292(IPV6_2292DSTOPTS, IPV6_DSTOPTS),
 				    IPPROTO_IPV6);
 				if (*mp)
 					mp = &(*mp)->m_next;
 				break;
 			case IPPROTO_ROUTING:
 				if (!in6p->in6p_flags & IN6P_RTHDR)
 					break;
 
 				*mp = sbcreatecontrol((caddr_t)ip6e, elen,
 				    IS2292(IPV6_2292RTHDR, IPV6_RTHDR),
 				    IPPROTO_IPV6);
 				if (*mp)
 					mp = &(*mp)->m_next;
 				break;
 			case IPPROTO_HOPOPTS:
 			case IPPROTO_AH: /* is it possible? */
 				break;
 
 			default:
 				/*
 			 	 * other cases have been filtered in the above.
 				 * none will visit this case.  here we supply
 				 * the code just in case (nxt overwritten or
 				 * other cases).
 				 */
 #ifdef PULLDOWN_TEST
 				m_freem(ext);
 #endif
 				goto loopend;
 
 			}
 
 			/* proceed with the next header. */
 			off += elen;
 			nxt = ip6e->ip6e_nxt;
 			ip6e = NULL;
 #ifdef PULLDOWN_TEST
 			m_freem(ext);
 			ext = NULL;
 #endif
 		}
 	  loopend:
 		;
 	}
 
 #undef IS2292
 }
 
 void
 ip6_notify_pmtu(in6p, dst, mtu)
 	struct inpcb *in6p;
 	struct sockaddr_in6 *dst;
 	u_int32_t *mtu;
 {
 	struct socket *so;
 	struct mbuf *m_mtu;
 	struct ip6_mtuinfo mtuctl;
 
 	so =  in6p->inp_socket;
 
 	if (mtu == NULL)
 		return;
 
 #ifdef DIAGNOSTIC
 	if (so == NULL)		/* I believe this is impossible */
 		panic("ip6_notify_pmtu: socket is NULL");
 #endif
 
 	bzero(&mtuctl, sizeof(mtuctl));	/* zero-clear for safety */
 	mtuctl.ip6m_mtu = *mtu;
 	mtuctl.ip6m_addr = *dst;
 	if (sa6_recoverscope(&mtuctl.ip6m_addr))
 		return;
 
 	if ((m_mtu = sbcreatecontrol((caddr_t)&mtuctl, sizeof(mtuctl),
 	    IPV6_PATHMTU, IPPROTO_IPV6)) == NULL)
 		return;
 
 	if (sbappendaddr(&so->so_rcv, (struct sockaddr *)dst, NULL, m_mtu)
 	    == 0) {
 		m_freem(m_mtu);
 		/* XXX: should count statistics */
 	} else
 		sorwakeup(so);
 
 	return;
 }
 
 #ifdef PULLDOWN_TEST
 /*
  * pull single extension header from mbuf chain.  returns single mbuf that
  * contains the result, or NULL on error.
  */
 static struct mbuf *
 ip6_pullexthdr(m, off, nxt)
 	struct mbuf *m;
 	size_t off;
 	int nxt;
 {
 	struct ip6_ext ip6e;
 	size_t elen;
 	struct mbuf *n;
 
 #ifdef DIAGNOSTIC
 	switch (nxt) {
 	case IPPROTO_DSTOPTS:
 	case IPPROTO_ROUTING:
 	case IPPROTO_HOPOPTS:
 	case IPPROTO_AH: /* is it possible? */
 		break;
 	default:
 		printf("ip6_pullexthdr: invalid nxt=%d\n", nxt);
 	}
 #endif
 
 	m_copydata(m, off, sizeof(ip6e), (caddr_t)&ip6e);
 	if (nxt == IPPROTO_AH)
 		elen = (ip6e.ip6e_len + 2) << 2;
 	else
 		elen = (ip6e.ip6e_len + 1) << 3;
 
 	MGET(n, M_DONTWAIT, MT_DATA);
 	if (n && elen >= MLEN) {
 		MCLGET(n, M_DONTWAIT);
 		if ((n->m_flags & M_EXT) == 0) {
 			m_free(n);
 			n = NULL;
 		}
 	}
 	if (!n)
 		return NULL;
 
 	n->m_len = 0;
 	if (elen >= M_TRAILINGSPACE(n)) {
 		m_free(n);
 		return NULL;
 	}
 
 	m_copydata(m, off, elen, mtod(n, caddr_t));
 	n->m_len = elen;
 	return n;
 }
 #endif
 
 /*
  * Get pointer to the previous header followed by the header
  * currently processed.
  * XXX: This function supposes that
  *	M includes all headers,
  *	the next header field and the header length field of each header
  *	are valid, and
  *	the sum of each header length equals to OFF.
  * Because of these assumptions, this function must be called very
  * carefully. Moreover, it will not be used in the near future when
  * we develop `neater' mechanism to process extension headers.
  */
 char *
 ip6_get_prevhdr(m, off)
 	struct mbuf *m;
 	int off;
 {
 	struct ip6_hdr *ip6 = mtod(m, struct ip6_hdr *);
 
 	if (off == sizeof(struct ip6_hdr))
 		return (&ip6->ip6_nxt);
 	else {
 		int len, nxt;
 		struct ip6_ext *ip6e = NULL;
 
 		nxt = ip6->ip6_nxt;
 		len = sizeof(struct ip6_hdr);
 		while (len < off) {
 			ip6e = (struct ip6_ext *)(mtod(m, caddr_t) + len);
 
 			switch (nxt) {
 			case IPPROTO_FRAGMENT:
 				len += sizeof(struct ip6_frag);
 				break;
 			case IPPROTO_AH:
 				len += (ip6e->ip6e_len + 2) << 2;
 				break;
 			default:
 				len += (ip6e->ip6e_len + 1) << 3;
 				break;
 			}
 			nxt = ip6e->ip6e_nxt;
 		}
 		if (ip6e)
 			return (&ip6e->ip6e_nxt);
 		else
 			return NULL;
 	}
 }
 
 /*
  * get next header offset.  m will be retained.
  */
 int
 ip6_nexthdr(m, off, proto, nxtp)
 	struct mbuf *m;
 	int off;
 	int proto;
 	int *nxtp;
 {
 	struct ip6_hdr ip6;
 	struct ip6_ext ip6e;
 	struct ip6_frag fh;
 
 	/* just in case */
 	if (m == NULL)
 		panic("ip6_nexthdr: m == NULL");
 	if ((m->m_flags & M_PKTHDR) == 0 || m->m_pkthdr.len < off)
 		return -1;
 
 	switch (proto) {
 	case IPPROTO_IPV6:
 		if (m->m_pkthdr.len < off + sizeof(ip6))
 			return -1;
 		m_copydata(m, off, sizeof(ip6), (caddr_t)&ip6);
 		if (nxtp)
 			*nxtp = ip6.ip6_nxt;
 		off += sizeof(ip6);
 		return off;
 
 	case IPPROTO_FRAGMENT:
 		/*
 		 * terminate parsing if it is not the first fragment,
 		 * it does not make sense to parse through it.
 		 */
 		if (m->m_pkthdr.len < off + sizeof(fh))
 			return -1;
 		m_copydata(m, off, sizeof(fh), (caddr_t)&fh);
 		/* IP6F_OFF_MASK = 0xfff8(BigEndian), 0xf8ff(LittleEndian) */
 		if (fh.ip6f_offlg & IP6F_OFF_MASK)
 			return -1;
 		if (nxtp)
 			*nxtp = fh.ip6f_nxt;
 		off += sizeof(struct ip6_frag);
 		return off;
 
 	case IPPROTO_AH:
 		if (m->m_pkthdr.len < off + sizeof(ip6e))
 			return -1;
 		m_copydata(m, off, sizeof(ip6e), (caddr_t)&ip6e);
 		if (nxtp)
 			*nxtp = ip6e.ip6e_nxt;
 		off += (ip6e.ip6e_len + 2) << 2;
 		return off;
 
 	case IPPROTO_HOPOPTS:
 	case IPPROTO_ROUTING:
 	case IPPROTO_DSTOPTS:
 		if (m->m_pkthdr.len < off + sizeof(ip6e))
 			return -1;
 		m_copydata(m, off, sizeof(ip6e), (caddr_t)&ip6e);
 		if (nxtp)
 			*nxtp = ip6e.ip6e_nxt;
 		off += (ip6e.ip6e_len + 1) << 3;
 		return off;
 
 	case IPPROTO_NONE:
 	case IPPROTO_ESP:
 	case IPPROTO_IPCOMP:
 		/* give up */
 		return -1;
 
 	default:
 		return -1;
 	}
 
 	return -1;
 }
 
 /*
  * get offset for the last header in the chain.  m will be kept untainted.
  */
 int
 ip6_lasthdr(m, off, proto, nxtp)
 	struct mbuf *m;
 	int off;
 	int proto;
 	int *nxtp;
 {
 	int newoff;
 	int nxt;
 
 	if (!nxtp) {
 		nxt = -1;
 		nxtp = &nxt;
 	}
 	while (1) {
 		newoff = ip6_nexthdr(m, off, proto, nxtp);
 		if (newoff < 0)
 			return off;
 		else if (newoff < off)
 			return -1;	/* invalid */
 		else if (newoff == off)
 			return newoff;
 
 		off = newoff;
 		proto = *nxtp;
 	}
 }
 
 struct ip6aux *
 ip6_addaux(m)
 	struct mbuf *m;
 {
 	struct m_tag *mtag;
 
 	mtag = m_tag_find(m, PACKET_TAG_IPV6_INPUT, NULL);
 	if (!mtag) {
 		mtag = m_tag_get(PACKET_TAG_IPV6_INPUT, sizeof(struct ip6aux),
 		    M_NOWAIT);
 		if (mtag) {
 			m_tag_prepend(m, mtag);
 			bzero(mtag + 1, sizeof(struct ip6aux));
 		}
 	}
 	return mtag ? (struct ip6aux *)(mtag + 1) : NULL;
 }
 
 struct ip6aux *
 ip6_findaux(m)
 	struct mbuf *m;
 {
 	struct m_tag *mtag;
 
 	mtag = m_tag_find(m, PACKET_TAG_IPV6_INPUT, NULL);
 	return mtag ? (struct ip6aux *)(mtag + 1) : NULL;
 }
 
 void
 ip6_delaux(m)
 	struct mbuf *m;
 {
 	struct m_tag *mtag;
 
 	mtag = m_tag_find(m, PACKET_TAG_IPV6_INPUT, NULL);
 	if (mtag)
 		m_tag_delete(m, mtag);
 }
 
 /*
  * System control for IP6
  */
 
 u_char	inet6ctlerrmap[PRC_NCMDS] = {
 	0,		0,		0,		0,
 	0,		EMSGSIZE,	EHOSTDOWN,	EHOSTUNREACH,
 	EHOSTUNREACH,	EHOSTUNREACH,	ECONNREFUSED,	ECONNREFUSED,
 	EMSGSIZE,	EHOSTUNREACH,	0,		0,
 	0,		0,		0,		0,
 	ENOPROTOOPT
 };
diff --git a/sys/netinet6/ip6_output.c b/sys/netinet6/ip6_output.c
index 4eb7a6ed368c..57999f86d1ef 100644
--- a/sys/netinet6/ip6_output.c
+++ b/sys/netinet6/ip6_output.c
@@ -1,3500 +1,3500 @@
 /*	$FreeBSD$	*/
 /*	$KAME: ip6_output.c,v 1.279 2002/01/26 06:12:30 jinmei Exp $	*/
 
 /*-
  * Copyright (C) 1995, 1996, 1997, and 1998 WIDE Project.
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 3. Neither the name of the project nor the names of its contributors
  *    may be used to endorse or promote products derived from this software
  *    without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE PROJECT AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE PROJECT OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  */
 
 /*-
  * Copyright (c) 1982, 1986, 1988, 1990, 1993
  *	The Regents of the University of California.  All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 4. Neither the name of the University nor the names of its contributors
  *    may be used to endorse or promote products derived from this software
  *    without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  *	@(#)ip_output.c	8.3 (Berkeley) 1/21/94
  */
 
 #include "opt_ip6fw.h"
 #include "opt_inet.h"
 #include "opt_inet6.h"
 #include "opt_ipsec.h"
 
 #include <sys/param.h>
 #include <sys/malloc.h>
 #include <sys/mbuf.h>
 #include <sys/proc.h>
 #include <sys/errno.h>
 #include <sys/protosw.h>
 #include <sys/socket.h>
 #include <sys/socketvar.h>
 #include <sys/systm.h>
 #include <sys/kernel.h>
 
 #include <net/if.h>
 #include <net/netisr.h>
 #include <net/route.h>
 #include <net/pfil.h>
 
 #include <netinet/in.h>
 #include <netinet/in_var.h>
 #include <netinet6/in6_var.h>
 #include <netinet/ip6.h>
 #include <netinet/icmp6.h>
 #include <netinet6/ip6_var.h>
 #include <netinet/in_pcb.h>
 #include <netinet/tcp_var.h>
 #include <netinet6/nd6.h>
 
 #ifdef IPSEC
 #include <netinet6/ipsec.h>
 #ifdef INET6
 #include <netinet6/ipsec6.h>
 #endif
 #include <netkey/key.h>
 #endif /* IPSEC */
 
 #ifdef FAST_IPSEC
 #include <netipsec/ipsec.h>
 #include <netipsec/ipsec6.h>
 #include <netipsec/key.h>
 #endif /* FAST_IPSEC */
 
 #include <netinet6/ip6_fw.h>
 
 #include <net/net_osdep.h>
 
 #include <netinet6/ip6protosw.h>
 #include <netinet6/scope6_var.h>
 
 static MALLOC_DEFINE(M_IP6MOPTS, "ip6_moptions", "internet multicast options");
 
 struct ip6_exthdrs {
 	struct mbuf *ip6e_ip6;
 	struct mbuf *ip6e_hbh;
 	struct mbuf *ip6e_dest1;
 	struct mbuf *ip6e_rthdr;
 	struct mbuf *ip6e_dest2;
 };
 
 static int ip6_pcbopt __P((int, u_char *, int, struct ip6_pktopts **,
 			   int, int));
 static int ip6_pcbopts __P((struct ip6_pktopts **, struct mbuf *,
 	struct socket *, struct sockopt *));
 static int ip6_getpcbopt __P((struct ip6_pktopts *, int, struct sockopt *));
 static int ip6_setpktopt __P((int, u_char *, int, struct ip6_pktopts *, int,
 	int, int, int));
 
 static int ip6_setmoptions __P((int, struct ip6_moptions **, struct mbuf *));
 static int ip6_getmoptions __P((int, struct ip6_moptions *, struct mbuf **));
 static int ip6_copyexthdr __P((struct mbuf **, caddr_t, int));
 static int ip6_insertfraghdr __P((struct mbuf *, struct mbuf *, int,
 	struct ip6_frag **));
 static int ip6_insert_jumboopt __P((struct ip6_exthdrs *, u_int32_t));
 static int ip6_splithdr __P((struct mbuf *, struct ip6_exthdrs *));
 static int ip6_getpmtu __P((struct route_in6 *, struct route_in6 *,
 	struct ifnet *, struct in6_addr *, u_long *, int *));
 static int copypktopts __P((struct ip6_pktopts *, struct ip6_pktopts *, int));
 
 
 /*
  * IP6 output. The packet in mbuf chain m contains a skeletal IP6
  * header (with pri, len, nxt, hlim, src, dst).
  * This function may modify ver and hlim only.
  * The mbuf chain containing the packet will be freed.
  * The mbuf opt, if present, will not be freed.
  *
  * type of "mtu": rt_rmx.rmx_mtu is u_long, ifnet.ifr_mtu is int, and
  * nd_ifinfo.linkmtu is u_int32_t.  so we use u_long to hold largest one,
  * which is rt_rmx.rmx_mtu.
  */
 int
 ip6_output(m0, opt, ro, flags, im6o, ifpp, inp)
 	struct mbuf *m0;
 	struct ip6_pktopts *opt;
 	struct route_in6 *ro;
 	int flags;
 	struct ip6_moptions *im6o;
 	struct ifnet **ifpp;		/* XXX: just for statistics */
 	struct inpcb *inp;
 {
 	struct ip6_hdr *ip6, *mhip6;
 	struct ifnet *ifp, *origifp;
 	struct mbuf *m = m0;
 	int hlen, tlen, len, off;
 	struct route_in6 ip6route;
 	struct rtentry *rt = NULL;
 	struct sockaddr_in6 *dst, src_sa, dst_sa;
 	struct in6_addr odst;
 	int error = 0;
 	struct in6_ifaddr *ia = NULL;
 	u_long mtu;
 	int alwaysfrag, dontfrag;
 	u_int32_t optlen = 0, plen = 0, unfragpartlen = 0;
 	struct ip6_exthdrs exthdrs;
 	struct in6_addr finaldst, src0, dst0;
 	u_int32_t zone;
 	struct route_in6 *ro_pmtu = NULL;
 	int hdrsplit = 0;
 	int needipsec = 0;
 #if defined(IPSEC) || defined(FAST_IPSEC)
 	int needipsectun = 0;
 	struct secpolicy *sp = NULL;
 #endif /*IPSEC || FAST_IPSEC*/
 
 	ip6 = mtod(m, struct ip6_hdr *);
 	finaldst = ip6->ip6_dst;
 
 #define MAKE_EXTHDR(hp, mp)						\
     do {								\
 	if (hp) {							\
 		struct ip6_ext *eh = (struct ip6_ext *)(hp);		\
 		error = ip6_copyexthdr((mp), (caddr_t)(hp),		\
 		    ((eh)->ip6e_len + 1) << 3);				\
 		if (error)						\
 			goto freehdrs;					\
 	}								\
     } while (/*CONSTCOND*/ 0)
 
 	bzero(&exthdrs, sizeof(exthdrs));
 
 	if (opt) {
 		/* Hop-by-Hop options header */
 		MAKE_EXTHDR(opt->ip6po_hbh, &exthdrs.ip6e_hbh);
 		/* Destination options header(1st part) */
 		if (opt->ip6po_rthdr) {
 			/*
 			 * Destination options header(1st part)
 			 * This only makes sence with a routing header.
 			 * See Section 9.2 of RFC 3542.
 			 * Disabling this part just for MIP6 convenience is
 			 * a bad idea.  We need to think carefully about a
 			 * way to make the advanced API coexist with MIP6
 			 * options, which might automatically be inserted in
 			 * the kernel.
 			 */
 			MAKE_EXTHDR(opt->ip6po_dest1, &exthdrs.ip6e_dest1);
 		}
 		/* Routing header */
 		MAKE_EXTHDR(opt->ip6po_rthdr, &exthdrs.ip6e_rthdr);
 		/* Destination options header(2nd part) */
 		MAKE_EXTHDR(opt->ip6po_dest2, &exthdrs.ip6e_dest2);
 	}
 
 #ifdef IPSEC
 	/* get a security policy for this packet */
 	if (inp == NULL)
 		sp = ipsec6_getpolicybyaddr(m, IPSEC_DIR_OUTBOUND, 0, &error);
 	else
 		sp = ipsec6_getpolicybypcb(m, IPSEC_DIR_OUTBOUND, inp, &error);
 
 	if (sp == NULL) {
 		ipsec6stat.out_inval++;
 		goto freehdrs;
 	}
 
 	error = 0;
 
 	/* check policy */
 	switch (sp->policy) {
 	case IPSEC_POLICY_DISCARD:
 		/*
 		 * This packet is just discarded.
 		 */
 		ipsec6stat.out_polvio++;
 		goto freehdrs;
 
 	case IPSEC_POLICY_BYPASS:
 	case IPSEC_POLICY_NONE:
 		/* no need to do IPsec. */
 		needipsec = 0;
 		break;
 
 	case IPSEC_POLICY_IPSEC:
 		if (sp->req == NULL) {
 			/* acquire a policy */
 			error = key_spdacquire(sp);
 			goto freehdrs;
 		}
 		needipsec = 1;
 		break;
 
 	case IPSEC_POLICY_ENTRUST:
 	default:
 		printf("ip6_output: Invalid policy found. %d\n", sp->policy);
 	}
 #endif /* IPSEC */
 #ifdef FAST_IPSEC
 	/* get a security policy for this packet */
 	if (inp == NULL)
 		sp = ipsec_getpolicybyaddr(m, IPSEC_DIR_OUTBOUND, 0, &error);
 	else
 		sp = ipsec_getpolicybysock(m, IPSEC_DIR_OUTBOUND, inp, &error);
 
 	if (sp == NULL) {
 		newipsecstat.ips_out_inval++;
 		goto freehdrs;
 	}
 
 	error = 0;
 
 	/* check policy */
 	switch (sp->policy) {
 	case IPSEC_POLICY_DISCARD:
 		/*
 		 * This packet is just discarded.
 		 */
 		newipsecstat.ips_out_polvio++;
 		goto freehdrs;
 
 	case IPSEC_POLICY_BYPASS:
 	case IPSEC_POLICY_NONE:
 		/* no need to do IPsec. */
 		needipsec = 0;
 		break;
 
 	case IPSEC_POLICY_IPSEC:
 		if (sp->req == NULL) {
 			/* acquire a policy */
 			error = key_spdacquire(sp);
 			goto freehdrs;
 		}
 		needipsec = 1;
 		break;
 
 	case IPSEC_POLICY_ENTRUST:
 	default:
 		printf("ip6_output: Invalid policy found. %d\n", sp->policy);
 	}
 #endif /* FAST_IPSEC */
 
 	/*
 	 * Calculate the total length of the extension header chain.
 	 * Keep the length of the unfragmentable part for fragmentation.
 	 */
 	optlen = 0;
 	if (exthdrs.ip6e_hbh) optlen += exthdrs.ip6e_hbh->m_len;
 	if (exthdrs.ip6e_dest1) optlen += exthdrs.ip6e_dest1->m_len;
 	if (exthdrs.ip6e_rthdr) optlen += exthdrs.ip6e_rthdr->m_len;
 	unfragpartlen = optlen + sizeof(struct ip6_hdr);
 	/* NOTE: we don't add AH/ESP length here. do that later. */
 	if (exthdrs.ip6e_dest2) optlen += exthdrs.ip6e_dest2->m_len;
 
 	/*
 	 * If we need IPsec, or there is at least one extension header,
 	 * separate IP6 header from the payload.
 	 */
 	if ((needipsec || optlen) && !hdrsplit) {
 		if ((error = ip6_splithdr(m, &exthdrs)) != 0) {
 			m = NULL;
 			goto freehdrs;
 		}
 		m = exthdrs.ip6e_ip6;
 		hdrsplit++;
 	}
 
 	/* adjust pointer */
 	ip6 = mtod(m, struct ip6_hdr *);
 
 	/* adjust mbuf packet header length */
 	m->m_pkthdr.len += optlen;
 	plen = m->m_pkthdr.len - sizeof(*ip6);
 
 	/* If this is a jumbo payload, insert a jumbo payload option. */
 	if (plen > IPV6_MAXPACKET) {
 		if (!hdrsplit) {
 			if ((error = ip6_splithdr(m, &exthdrs)) != 0) {
 				m = NULL;
 				goto freehdrs;
 			}
 			m = exthdrs.ip6e_ip6;
 			hdrsplit++;
 		}
 		/* adjust pointer */
 		ip6 = mtod(m, struct ip6_hdr *);
 		if ((error = ip6_insert_jumboopt(&exthdrs, plen)) != 0)
 			goto freehdrs;
 		ip6->ip6_plen = 0;
 	} else
 		ip6->ip6_plen = htons(plen);
 
 	/*
 	 * Concatenate headers and fill in next header fields.
 	 * Here we have, on "m"
 	 *	IPv6 payload
 	 * and we insert headers accordingly.  Finally, we should be getting:
 	 *	IPv6 hbh dest1 rthdr ah* [esp* dest2 payload]
 	 *
 	 * during the header composing process, "m" points to IPv6 header.
 	 * "mprev" points to an extension header prior to esp.
 	 */
 	{
 		u_char *nexthdrp = &ip6->ip6_nxt;
 		struct mbuf *mprev = m;
 
 		/*
 		 * we treat dest2 specially.  this makes IPsec processing
 		 * much easier.  the goal here is to make mprev point the
 		 * mbuf prior to dest2.
 		 *
 		 * result: IPv6 dest2 payload
 		 * m and mprev will point to IPv6 header.
 		 */
 		if (exthdrs.ip6e_dest2) {
 			if (!hdrsplit)
 				panic("assumption failed: hdr not split");
 			exthdrs.ip6e_dest2->m_next = m->m_next;
 			m->m_next = exthdrs.ip6e_dest2;
 			*mtod(exthdrs.ip6e_dest2, u_char *) = ip6->ip6_nxt;
 			ip6->ip6_nxt = IPPROTO_DSTOPTS;
 		}
 
 #define MAKE_CHAIN(m, mp, p, i)\
     do {\
 	if (m) {\
 		if (!hdrsplit) \
 			panic("assumption failed: hdr not split"); \
 		*mtod((m), u_char *) = *(p);\
 		*(p) = (i);\
 		p = mtod((m), u_char *);\
 		(m)->m_next = (mp)->m_next;\
 		(mp)->m_next = (m);\
 		(mp) = (m);\
 	}\
     } while (/*CONSTCOND*/ 0)
 		/*
 		 * result: IPv6 hbh dest1 rthdr dest2 payload
 		 * m will point to IPv6 header.  mprev will point to the
 		 * extension header prior to dest2 (rthdr in the above case).
 		 */
 		MAKE_CHAIN(exthdrs.ip6e_hbh, mprev, nexthdrp, IPPROTO_HOPOPTS);
 		MAKE_CHAIN(exthdrs.ip6e_dest1, mprev, nexthdrp,
 		    IPPROTO_DSTOPTS);
 		MAKE_CHAIN(exthdrs.ip6e_rthdr, mprev, nexthdrp,
 		    IPPROTO_ROUTING);
 
 #if defined(IPSEC) || defined(FAST_IPSEC)
 		if (!needipsec)
 			goto skip_ipsec2;
 
 		/*
 		 * pointers after IPsec headers are not valid any more.
 		 * other pointers need a great care too.
 		 * (IPsec routines should not mangle mbufs prior to AH/ESP)
 		 */
 		exthdrs.ip6e_dest2 = NULL;
 
 	    {
 		struct ip6_rthdr *rh = NULL;
 		int segleft_org = 0;
 		struct ipsec_output_state state;
 
 		if (exthdrs.ip6e_rthdr) {
 			rh = mtod(exthdrs.ip6e_rthdr, struct ip6_rthdr *);
 			segleft_org = rh->ip6r_segleft;
 			rh->ip6r_segleft = 0;
 		}
 
 		bzero(&state, sizeof(state));
 		state.m = m;
 		error = ipsec6_output_trans(&state, nexthdrp, mprev, sp, flags,
 		    &needipsectun);
 		m = state.m;
 		if (error) {
 			/* mbuf is already reclaimed in ipsec6_output_trans. */
 			m = NULL;
 			switch (error) {
 			case EHOSTUNREACH:
 			case ENETUNREACH:
 			case EMSGSIZE:
 			case ENOBUFS:
 			case ENOMEM:
 				break;
 			default:
 				printf("ip6_output (ipsec): error code %d\n", error);
 				/* FALLTHROUGH */
 			case ENOENT:
 				/* don't show these error codes to the user */
 				error = 0;
 				break;
 			}
 			goto bad;
 		}
 		if (exthdrs.ip6e_rthdr) {
 			/* ah6_output doesn't modify mbuf chain */
 			rh->ip6r_segleft = segleft_org;
 		}
 	    }
 skip_ipsec2:;
 #endif
 	}
 
 	/*
 	 * If there is a routing header, replace the destination address field
 	 * with the first hop of the routing header.
 	 */
 	if (exthdrs.ip6e_rthdr) {
 		struct ip6_rthdr *rh =
 			(struct ip6_rthdr *)(mtod(exthdrs.ip6e_rthdr,
 						  struct ip6_rthdr *));
 		struct ip6_rthdr0 *rh0;
 		struct in6_addr *addr;
 		struct sockaddr_in6 sa;
 
 		switch (rh->ip6r_type) {
 		case IPV6_RTHDR_TYPE_0:
 			 rh0 = (struct ip6_rthdr0 *)rh;
 			 addr = (struct in6_addr *)(rh0 + 1);
 
 			 /*
 			  * construct a sockaddr_in6 form of
 			  * the first hop.
 			  *
 			  * XXX: we may not have enough
 			  * information about its scope zone;
 			  * there is no standard API to pass
 			  * the information from the
 			  * application.
 			  */
 			 bzero(&sa, sizeof(sa));
 			 sa.sin6_family = AF_INET6;
 			 sa.sin6_len = sizeof(sa);
 			 sa.sin6_addr = addr[0];
 			 if ((error = sa6_embedscope(&sa,
 			     ip6_use_defzone)) != 0) {
 				 goto bad;
 			 }
 			 ip6->ip6_dst = sa.sin6_addr;
 			 bcopy(&addr[1], &addr[0], sizeof(struct in6_addr)
 			     * (rh0->ip6r0_segleft - 1));
 			 addr[rh0->ip6r0_segleft - 1] = finaldst;
 			 /* XXX */
 			 in6_clearscope(addr + rh0->ip6r0_segleft - 1);
 			 break;
 		default:	/* is it possible? */
 			 error = EINVAL;
 			 goto bad;
 		}
 	}
 
 	/* Source address validation */
 	if (IN6_IS_ADDR_UNSPECIFIED(&ip6->ip6_src) &&
 	    (flags & IPV6_UNSPECSRC) == 0) {
 		error = EOPNOTSUPP;
 		ip6stat.ip6s_badscope++;
 		goto bad;
 	}
 	if (IN6_IS_ADDR_MULTICAST(&ip6->ip6_src)) {
 		error = EOPNOTSUPP;
 		ip6stat.ip6s_badscope++;
 		goto bad;
 	}
 
 	ip6stat.ip6s_localout++;
 
 	/*
 	 * Route packet.
 	 */
 	if (ro == 0) {
 		ro = &ip6route;
 		bzero((caddr_t)ro, sizeof(*ro));
 	}
 	ro_pmtu = ro;
 	if (opt && opt->ip6po_rthdr)
 		ro = &opt->ip6po_route;
 	dst = (struct sockaddr_in6 *)&ro->ro_dst;
 
 again:
  	/*
 	 * if specified, try to fill in the traffic class field.
 	 * do not override if a non-zero value is already set.
 	 * we check the diffserv field and the ecn field separately.
 	 */
 	if (opt && opt->ip6po_tclass >= 0) {
 		int mask = 0;
 
 		if ((ip6->ip6_flow & htonl(0xfc << 20)) == 0)
 			mask |= 0xfc;
 		if ((ip6->ip6_flow & htonl(0x03 << 20)) == 0)
 			mask |= 0x03;
 		if (mask != 0)
 			ip6->ip6_flow |= htonl((opt->ip6po_tclass & mask) << 20);
 	}
 
 	/* fill in or override the hop limit field, if necessary. */
 	if (opt && opt->ip6po_hlim != -1)
 		ip6->ip6_hlim = opt->ip6po_hlim & 0xff;
 	else if (IN6_IS_ADDR_MULTICAST(&ip6->ip6_dst)) {
 		if (im6o != NULL)
 			ip6->ip6_hlim = im6o->im6o_multicast_hlim;
 		else
 			ip6->ip6_hlim = ip6_defmcasthlim;
 	}
 
 #if defined(IPSEC) || defined(FAST_IPSEC)
 	if (needipsec && needipsectun) {
 		struct ipsec_output_state state;
 
 		/*
 		 * All the extension headers will become inaccessible
 		 * (since they can be encrypted).
 		 * Don't panic, we need no more updates to extension headers
 		 * on inner IPv6 packet (since they are now encapsulated).
 		 *
 		 * IPv6 [ESP|AH] IPv6 [extension headers] payload
 		 */
 		bzero(&exthdrs, sizeof(exthdrs));
 		exthdrs.ip6e_ip6 = m;
 
 		bzero(&state, sizeof(state));
 		state.m = m;
 		state.ro = (struct route *)ro;
 		state.dst = (struct sockaddr *)dst;
 
 		error = ipsec6_output_tunnel(&state, sp, flags);
 
 		m = state.m;
 		ro = (struct route_in6 *)state.ro;
 		dst = (struct sockaddr_in6 *)state.dst;
 		if (error) {
 			/* mbuf is already reclaimed in ipsec6_output_tunnel. */
 			m0 = m = NULL;
 			m = NULL;
 			switch (error) {
 			case EHOSTUNREACH:
 			case ENETUNREACH:
 			case EMSGSIZE:
 			case ENOBUFS:
 			case ENOMEM:
 				break;
 			default:
 				printf("ip6_output (ipsec): error code %d\n", error);
 				/* FALLTHROUGH */
 			case ENOENT:
 				/* don't show these error codes to the user */
 				error = 0;
 				break;
 			}
 			goto bad;
 		}
 
 		exthdrs.ip6e_ip6 = m;
 	}
 #endif /* IPSEC */
 
 	/* adjust pointer */
 	ip6 = mtod(m, struct ip6_hdr *);
 
 	bzero(&dst_sa, sizeof(dst_sa));
 	dst_sa.sin6_family = AF_INET6;
 	dst_sa.sin6_len = sizeof(dst_sa);
 	dst_sa.sin6_addr = ip6->ip6_dst;
 	if ((error = in6_selectroute(&dst_sa, opt, im6o, ro,
 	    &ifp, &rt, 0)) != 0) {
 		switch (error) {
 		case EHOSTUNREACH:
 			ip6stat.ip6s_noroute++;
 			break;
 		case EADDRNOTAVAIL:
 		default:
 			break; /* XXX statistics? */
 		}
 		if (ifp != NULL)
 			in6_ifstat_inc(ifp, ifs6_out_discard);
 		goto bad;
 	}
 	if (rt == NULL) {
 		/*
 		 * If in6_selectroute() does not return a route entry,
 		 * dst may not have been updated.
 		 */
 		*dst = dst_sa;	/* XXX */
 	}
 
 	/*
 	 * then rt (for unicast) and ifp must be non-NULL valid values.
 	 */
 	if ((flags & IPV6_FORWARDING) == 0) {
 		/* XXX: the FORWARDING flag can be set for mrouting. */
 		in6_ifstat_inc(ifp, ifs6_out_request);
 	}
 	if (rt != NULL) {
 		ia = (struct in6_ifaddr *)(rt->rt_ifa);
 		rt->rt_use++;
 	}
 
 	/*
 	 * The outgoing interface must be in the zone of source and
 	 * destination addresses.  We should use ia_ifp to support the
 	 * case of sending packets to an address of our own.
 	 */
 	if (ia != NULL && ia->ia_ifp)
 		origifp = ia->ia_ifp;
 	else
 		origifp = ifp;
 
 	src0 = ip6->ip6_src;
 	if (in6_setscope(&src0, origifp, &zone))
 		goto badscope;
 	bzero(&src_sa, sizeof(src_sa));
 	src_sa.sin6_family = AF_INET6;
 	src_sa.sin6_len = sizeof(src_sa);
 	src_sa.sin6_addr = ip6->ip6_src;
 	if (sa6_recoverscope(&src_sa) || zone != src_sa.sin6_scope_id)
 		goto badscope;
 
 	dst0 = ip6->ip6_dst;
 	if (in6_setscope(&dst0, origifp, &zone))
 		goto badscope;
 	/* re-initialize to be sure */
 	bzero(&dst_sa, sizeof(dst_sa));
 	dst_sa.sin6_family = AF_INET6;
 	dst_sa.sin6_len = sizeof(dst_sa);
 	dst_sa.sin6_addr = ip6->ip6_dst;
 	if (sa6_recoverscope(&dst_sa) || zone != dst_sa.sin6_scope_id) {
 		goto badscope;
 	}
 
 	/* scope check is done. */
 	goto routefound;
 
   badscope:
 	ip6stat.ip6s_badscope++;
 	in6_ifstat_inc(origifp, ifs6_out_discard);
 	if (error == 0)
 		error = EHOSTUNREACH; /* XXX */
 	goto bad;
 
   routefound:
 	if (rt && !IN6_IS_ADDR_MULTICAST(&ip6->ip6_dst)) {
 		if (opt && opt->ip6po_nextroute.ro_rt) {
 			/*
 			 * The nexthop is explicitly specified by the
 			 * application.  We assume the next hop is an IPv6
 			 * address.
 			 */
 			dst = (struct sockaddr_in6 *)opt->ip6po_nexthop;
 		}
 		else if ((rt->rt_flags & RTF_GATEWAY))
 			dst = (struct sockaddr_in6 *)rt->rt_gateway;
 	}
 
 	if (!IN6_IS_ADDR_MULTICAST(&ip6->ip6_dst)) {
 		m->m_flags &= ~(M_BCAST | M_MCAST); /* just in case */
 	} else {
 		struct	in6_multi *in6m;
 
 		m->m_flags = (m->m_flags & ~M_BCAST) | M_MCAST;
 
 		in6_ifstat_inc(ifp, ifs6_out_mcast);
 
 		/*
 		 * Confirm that the outgoing interface supports multicast.
 		 */
 		if (!(ifp->if_flags & IFF_MULTICAST)) {
 			ip6stat.ip6s_noroute++;
 			in6_ifstat_inc(ifp, ifs6_out_discard);
 			error = ENETUNREACH;
 			goto bad;
 		}
 		IN6_LOOKUP_MULTI(ip6->ip6_dst, ifp, in6m);
 		if (in6m != NULL &&
 		   (im6o == NULL || im6o->im6o_multicast_loop)) {
 			/*
 			 * If we belong to the destination multicast group
 			 * on the outgoing interface, and the caller did not
 			 * forbid loopback, loop back a copy.
 			 */
 			ip6_mloopback(ifp, m, dst);
 		} else {
 			/*
 			 * If we are acting as a multicast router, perform
 			 * multicast forwarding as if the packet had just
 			 * arrived on the interface to which we are about
 			 * to send.  The multicast forwarding function
 			 * recursively calls this function, using the
 			 * IPV6_FORWARDING flag to prevent infinite recursion.
 			 *
 			 * Multicasts that are looped back by ip6_mloopback(),
 			 * above, will be forwarded by the ip6_input() routine,
 			 * if necessary.
 			 */
 			if (ip6_mrouter && (flags & IPV6_FORWARDING) == 0) {
 				/*
 				 * XXX: ip6_mforward expects that rcvif is NULL
 				 * when it is called from the originating path.
 				 * However, it is not always the case, since
 				 * some versions of MGETHDR() does not
 				 * initialize the field.
 				 */
 				m->m_pkthdr.rcvif = NULL;
 				if (ip6_mforward(ip6, ifp, m) != 0) {
 					m_freem(m);
 					goto done;
 				}
 			}
 		}
 		/*
 		 * Multicasts with a hoplimit of zero may be looped back,
 		 * above, but must not be transmitted on a network.
 		 * Also, multicasts addressed to the loopback interface
 		 * are not sent -- the above call to ip6_mloopback() will
 		 * loop back a copy if this host actually belongs to the
 		 * destination group on the loopback interface.
 		 */
 		if (ip6->ip6_hlim == 0 || (ifp->if_flags & IFF_LOOPBACK) ||
 		    IN6_IS_ADDR_MC_INTFACELOCAL(&ip6->ip6_dst)) {
 			m_freem(m);
 			goto done;
 		}
 	}
 
 	/*
 	 * Fill the outgoing inteface to tell the upper layer
 	 * to increment per-interface statistics.
 	 */
 	if (ifpp)
 		*ifpp = ifp;
 
 	/* Determine path MTU. */
 	if ((error = ip6_getpmtu(ro_pmtu, ro, ifp, &finaldst, &mtu,
 	    &alwaysfrag)) != 0)
 		goto bad;
 
 	/*
 	 * The caller of this function may specify to use the minimum MTU
 	 * in some cases.
 	 * An advanced API option (IPV6_USE_MIN_MTU) can also override MTU
 	 * setting.  The logic is a bit complicated; by default, unicast
 	 * packets will follow path MTU while multicast packets will be sent at
 	 * the minimum MTU.  If IP6PO_MINMTU_ALL is specified, all packets
 	 * including unicast ones will be sent at the minimum MTU.  Multicast
 	 * packets will always be sent at the minimum MTU unless
 	 * IP6PO_MINMTU_DISABLE is explicitly specified.
 	 * See RFC 3542 for more details.
 	 */
 	if (mtu > IPV6_MMTU) {
 		if ((flags & IPV6_MINMTU))
 			mtu = IPV6_MMTU;
 		else if (opt && opt->ip6po_minmtu == IP6PO_MINMTU_ALL)
 			mtu = IPV6_MMTU;
 		else if (IN6_IS_ADDR_MULTICAST(&ip6->ip6_dst) &&
 			 (opt == NULL ||
 			  opt->ip6po_minmtu != IP6PO_MINMTU_DISABLE)) {
 			mtu = IPV6_MMTU;
 		}
 	}
 
 	/*
 	 * clear embedded scope identifiers if necessary.
 	 * in6_clearscope will touch the addresses only when necessary.
 	 */
 	in6_clearscope(&ip6->ip6_src);
 	in6_clearscope(&ip6->ip6_dst);
 
 	/*
 	 * Check with the firewall...
 	 */
 	if (ip6_fw_enable && ip6_fw_chk_ptr) {
 		u_short port = 0;
 		m->m_pkthdr.rcvif = NULL;	/* XXX */
 		/* If ipfw says divert, we have to just drop packet */
 		if ((*ip6_fw_chk_ptr)(&ip6, ifp, &port, &m)) {
 			m_freem(m);
 			goto done;
 		}
 		if (!m) {
 			error = EACCES;
 			goto done;
 		}
 	}
 
 	/*
 	 * If the outgoing packet contains a hop-by-hop options header,
 	 * it must be examined and processed even by the source node.
 	 * (RFC 2460, section 4.)
 	 */
 	if (exthdrs.ip6e_hbh) {
 		struct ip6_hbh *hbh = mtod(exthdrs.ip6e_hbh, struct ip6_hbh *);
 		u_int32_t dummy; /* XXX unused */
 		u_int32_t plen = 0; /* XXX: ip6_process will check the value */
 
 #ifdef DIAGNOSTIC
 		if ((hbh->ip6h_len + 1) << 3 > exthdrs.ip6e_hbh->m_len)
 			panic("ip6e_hbh is not continuous");
 #endif
 		/*
 		 *  XXX: if we have to send an ICMPv6 error to the sender,
 		 *       we need the M_LOOP flag since icmp6_error() expects
 		 *       the IPv6 and the hop-by-hop options header are
 		 *       continuous unless the flag is set.
 		 */
 		m->m_flags |= M_LOOP;
 		m->m_pkthdr.rcvif = ifp;
 		if (ip6_process_hopopts(m, (u_int8_t *)(hbh + 1),
 		    ((hbh->ip6h_len + 1) << 3) - sizeof(struct ip6_hbh),
 		    &dummy, &plen) < 0) {
 			/* m was already freed at this point */
 			error = EINVAL;/* better error? */
 			goto done;
 		}
 		m->m_flags &= ~M_LOOP; /* XXX */
 		m->m_pkthdr.rcvif = NULL;
 	}
 
 	/* Jump over all PFIL processing if hooks are not active. */
-	if (inet6_pfil_hook.ph_busy_count == -1)
+	if (!PFIL_HOOKED(&inet6_pfil_hook))
 		goto passout;
 
 	odst = ip6->ip6_dst;
 	/* Run through list of hooks for output packets. */
 	error = pfil_run_hooks(&inet6_pfil_hook, &m, ifp, PFIL_OUT, inp);
 	if (error != 0 || m == NULL)
 		goto done;
 	ip6 = mtod(m, struct ip6_hdr *);
 
 	/* See if destination IP address was changed by packet filter. */
 	if (!IN6_ARE_ADDR_EQUAL(&odst, &ip6->ip6_dst)) {
 		m->m_flags |= M_SKIP_FIREWALL;
 		/* If destination is now ourself drop to ip6_input(). */
 		if (in6_localaddr(&ip6->ip6_dst)) {
 			if (m->m_pkthdr.rcvif == NULL)
 				m->m_pkthdr.rcvif = loif;
 			if (m->m_pkthdr.csum_flags & CSUM_DELAY_DATA) {
 				m->m_pkthdr.csum_flags |=
 				    CSUM_DATA_VALID | CSUM_PSEUDO_HDR;
 				m->m_pkthdr.csum_data = 0xffff;
 			}
 			m->m_pkthdr.csum_flags |=
 			    CSUM_IP_CHECKED | CSUM_IP_VALID;
 			error = netisr_queue(NETISR_IPV6, m);
 			goto done;
 		} else
 			goto again;	/* Redo the routing table lookup. */
 	}
 
 	/* XXX: IPFIREWALL_FORWARD */
 
 passout:
 	/*
 	 * Send the packet to the outgoing interface.
 	 * If necessary, do IPv6 fragmentation before sending.
 	 *
 	 * the logic here is rather complex:
 	 * 1: normal case (dontfrag == 0, alwaysfrag == 0)
 	 * 1-a:	send as is if tlen <= path mtu
 	 * 1-b:	fragment if tlen > path mtu
 	 *
 	 * 2: if user asks us not to fragment (dontfrag == 1)
 	 * 2-a:	send as is if tlen <= interface mtu
 	 * 2-b:	error if tlen > interface mtu
 	 *
 	 * 3: if we always need to attach fragment header (alwaysfrag == 1)
 	 *	always fragment
 	 *
 	 * 4: if dontfrag == 1 && alwaysfrag == 1
 	 *	error, as we cannot handle this conflicting request
 	 */
 	tlen = m->m_pkthdr.len;
 
 	if (opt && (opt->ip6po_flags & IP6PO_DONTFRAG))
 		dontfrag = 1;
 	else
 		dontfrag = 0;
 	if (dontfrag && alwaysfrag) {	/* case 4 */
 		/* conflicting request - can't transmit */
 		error = EMSGSIZE;
 		goto bad;
 	}
 	if (dontfrag && tlen > IN6_LINKMTU(ifp)) {	/* case 2-b */
 		/*
 		 * Even if the DONTFRAG option is specified, we cannot send the
 		 * packet when the data length is larger than the MTU of the
 		 * outgoing interface.
 		 * Notify the error by sending IPV6_PATHMTU ancillary data as
 		 * well as returning an error code (the latter is not described
 		 * in the API spec.)
 		 */
 		u_int32_t mtu32;
 		struct ip6ctlparam ip6cp;
 
 		mtu32 = (u_int32_t)mtu;
 		bzero(&ip6cp, sizeof(ip6cp));
 		ip6cp.ip6c_cmdarg = (void *)&mtu32;
 		pfctlinput2(PRC_MSGSIZE, (struct sockaddr *)&ro_pmtu->ro_dst,
 		    (void *)&ip6cp);
 
 		error = EMSGSIZE;
 		goto bad;
 	}
 
 	/*
 	 * transmit packet without fragmentation
 	 */
 	if (dontfrag || (!alwaysfrag && tlen <= mtu)) {	/* case 1-a and 2-a */
 		struct in6_ifaddr *ia6;
 
 		ip6 = mtod(m, struct ip6_hdr *);
 		ia6 = in6_ifawithifp(ifp, &ip6->ip6_src);
 		if (ia6) {
 			/* Record statistics for this interface address. */
 			ia6->ia_ifa.if_opackets++;
 			ia6->ia_ifa.if_obytes += m->m_pkthdr.len;
 		}
 #ifdef IPSEC
 		/* clean ipsec history once it goes out of the node */
 		ipsec_delaux(m);
 #endif
 		error = nd6_output(ifp, origifp, m, dst, ro->ro_rt);
 		goto done;
 	}
 
 	/*
 	 * try to fragment the packet.  case 1-b and 3
 	 */
 	if (mtu < IPV6_MMTU) {
 		/* path MTU cannot be less than IPV6_MMTU */
 		error = EMSGSIZE;
 		in6_ifstat_inc(ifp, ifs6_out_fragfail);
 		goto bad;
 	} else if (ip6->ip6_plen == 0) {
 		/* jumbo payload cannot be fragmented */
 		error = EMSGSIZE;
 		in6_ifstat_inc(ifp, ifs6_out_fragfail);
 		goto bad;
 	} else {
 		struct mbuf **mnext, *m_frgpart;
 		struct ip6_frag *ip6f;
 		u_int32_t id = htonl(ip6_randomid());
 		u_char nextproto;
 #if 0
 		struct ip6ctlparam ip6cp;
 		u_int32_t mtu32;
 #endif
 		int qslots = ifp->if_snd.ifq_maxlen - ifp->if_snd.ifq_len;
 
 		/*
 		 * Too large for the destination or interface;
 		 * fragment if possible.
 		 * Must be able to put at least 8 bytes per fragment.
 		 */
 		hlen = unfragpartlen;
 		if (mtu > IPV6_MAXPACKET)
 			mtu = IPV6_MAXPACKET;
 
 #if 0
 		/*
 		 * It is believed this code is a leftover from the
 		 * development of the IPV6_RECVPATHMTU sockopt and 
 		 * associated work to implement RFC3542.
 		 * It's not entirely clear what the intent of the API
 		 * is at this point, so disable this code for now.
 		 * The IPV6_RECVPATHMTU sockopt and/or IPV6_DONTFRAG
 		 * will send notifications if the application requests.
 		 */
 
 		/* Notify a proper path MTU to applications. */
 		mtu32 = (u_int32_t)mtu;
 		bzero(&ip6cp, sizeof(ip6cp));
 		ip6cp.ip6c_cmdarg = (void *)&mtu32;
 		pfctlinput2(PRC_MSGSIZE, (struct sockaddr *)&ro_pmtu->ro_dst,
 		    (void *)&ip6cp);
 #endif
 
 		len = (mtu - hlen - sizeof(struct ip6_frag)) & ~7;
 		if (len < 8) {
 			error = EMSGSIZE;
 			in6_ifstat_inc(ifp, ifs6_out_fragfail);
 			goto bad;
 		}
 
 		/*
 		 * Verify that we have any chance at all of being able to queue
 		 *      the packet or packet fragments
 		 */
 		if (qslots <= 0 || ((u_int)qslots * (mtu - hlen)
 		    < tlen  /* - hlen */)) {
 			error = ENOBUFS;
 			ip6stat.ip6s_odropped++;
 			goto bad;
 		}
 
 		mnext = &m->m_nextpkt;
 
 		/*
 		 * Change the next header field of the last header in the
 		 * unfragmentable part.
 		 */
 		if (exthdrs.ip6e_rthdr) {
 			nextproto = *mtod(exthdrs.ip6e_rthdr, u_char *);
 			*mtod(exthdrs.ip6e_rthdr, u_char *) = IPPROTO_FRAGMENT;
 		} else if (exthdrs.ip6e_dest1) {
 			nextproto = *mtod(exthdrs.ip6e_dest1, u_char *);
 			*mtod(exthdrs.ip6e_dest1, u_char *) = IPPROTO_FRAGMENT;
 		} else if (exthdrs.ip6e_hbh) {
 			nextproto = *mtod(exthdrs.ip6e_hbh, u_char *);
 			*mtod(exthdrs.ip6e_hbh, u_char *) = IPPROTO_FRAGMENT;
 		} else {
 			nextproto = ip6->ip6_nxt;
 			ip6->ip6_nxt = IPPROTO_FRAGMENT;
 		}
 
 		/*
 		 * Loop through length of segment after first fragment,
 		 * make new header and copy data of each part and link onto
 		 * chain.
 		 */
 		m0 = m;
 		for (off = hlen; off < tlen; off += len) {
 			MGETHDR(m, M_DONTWAIT, MT_HEADER);
 			if (!m) {
 				error = ENOBUFS;
 				ip6stat.ip6s_odropped++;
 				goto sendorfree;
 			}
 			m->m_pkthdr.rcvif = NULL;
 			m->m_flags = m0->m_flags & M_COPYFLAGS;
 			*mnext = m;
 			mnext = &m->m_nextpkt;
 			m->m_data += max_linkhdr;
 			mhip6 = mtod(m, struct ip6_hdr *);
 			*mhip6 = *ip6;
 			m->m_len = sizeof(*mhip6);
 			error = ip6_insertfraghdr(m0, m, hlen, &ip6f);
 			if (error) {
 				ip6stat.ip6s_odropped++;
 				goto sendorfree;
 			}
 			ip6f->ip6f_offlg = htons((u_short)((off - hlen) & ~7));
 			if (off + len >= tlen)
 				len = tlen - off;
 			else
 				ip6f->ip6f_offlg |= IP6F_MORE_FRAG;
 			mhip6->ip6_plen = htons((u_short)(len + hlen +
 			    sizeof(*ip6f) - sizeof(struct ip6_hdr)));
 			if ((m_frgpart = m_copy(m0, off, len)) == 0) {
 				error = ENOBUFS;
 				ip6stat.ip6s_odropped++;
 				goto sendorfree;
 			}
 			m_cat(m, m_frgpart);
 			m->m_pkthdr.len = len + hlen + sizeof(*ip6f);
 			m->m_pkthdr.rcvif = NULL;
 			ip6f->ip6f_reserved = 0;
 			ip6f->ip6f_ident = id;
 			ip6f->ip6f_nxt = nextproto;
 			ip6stat.ip6s_ofragments++;
 			in6_ifstat_inc(ifp, ifs6_out_fragcreat);
 		}
 
 		in6_ifstat_inc(ifp, ifs6_out_fragok);
 	}
 
 	/*
 	 * Remove leading garbages.
 	 */
 sendorfree:
 	m = m0->m_nextpkt;
 	m0->m_nextpkt = 0;
 	m_freem(m0);
 	for (m0 = m; m; m = m0) {
 		m0 = m->m_nextpkt;
 		m->m_nextpkt = 0;
 		if (error == 0) {
  			/* Record statistics for this interface address. */
  			if (ia) {
  				ia->ia_ifa.if_opackets++;
  				ia->ia_ifa.if_obytes += m->m_pkthdr.len;
  			}
 #ifdef IPSEC
 			/* clean ipsec history once it goes out of the node */
 			ipsec_delaux(m);
 #endif
 			error = nd6_output(ifp, origifp, m, dst, ro->ro_rt);
 		} else
 			m_freem(m);
 	}
 
 	if (error == 0)
 		ip6stat.ip6s_fragmented++;
 
 done:
 	if (ro == &ip6route && ro->ro_rt) { /* brace necessary for RTFREE */
 		RTFREE(ro->ro_rt);
 	} else if (ro_pmtu == &ip6route && ro_pmtu->ro_rt) {
 		RTFREE(ro_pmtu->ro_rt);
 	}
 
 #ifdef IPSEC
 	if (sp != NULL)
 		key_freesp(sp);
 #endif /* IPSEC */
 #ifdef FAST_IPSEC
 	if (sp != NULL)
 		KEY_FREESP(&sp);
 #endif /* FAST_IPSEC */
 
 	return (error);
 
 freehdrs:
 	m_freem(exthdrs.ip6e_hbh);	/* m_freem will check if mbuf is 0 */
 	m_freem(exthdrs.ip6e_dest1);
 	m_freem(exthdrs.ip6e_rthdr);
 	m_freem(exthdrs.ip6e_dest2);
 	/* FALLTHROUGH */
 bad:
 	m_freem(m);
 	goto done;
 }
 
 static int
 ip6_copyexthdr(mp, hdr, hlen)
 	struct mbuf **mp;
 	caddr_t hdr;
 	int hlen;
 {
 	struct mbuf *m;
 
 	if (hlen > MCLBYTES)
 		return (ENOBUFS); /* XXX */
 
 	MGET(m, M_DONTWAIT, MT_DATA);
 	if (!m)
 		return (ENOBUFS);
 
 	if (hlen > MLEN) {
 		MCLGET(m, M_DONTWAIT);
 		if ((m->m_flags & M_EXT) == 0) {
 			m_free(m);
 			return (ENOBUFS);
 		}
 	}
 	m->m_len = hlen;
 	if (hdr)
 		bcopy(hdr, mtod(m, caddr_t), hlen);
 
 	*mp = m;
 	return (0);
 }
 
 /*
  * Insert jumbo payload option.
  */
 static int
 ip6_insert_jumboopt(exthdrs, plen)
 	struct ip6_exthdrs *exthdrs;
 	u_int32_t plen;
 {
 	struct mbuf *mopt;
 	u_char *optbuf;
 	u_int32_t v;
 
 #define JUMBOOPTLEN	8	/* length of jumbo payload option and padding */
 
 	/*
 	 * If there is no hop-by-hop options header, allocate new one.
 	 * If there is one but it doesn't have enough space to store the
 	 * jumbo payload option, allocate a cluster to store the whole options.
 	 * Otherwise, use it to store the options.
 	 */
 	if (exthdrs->ip6e_hbh == 0) {
 		MGET(mopt, M_DONTWAIT, MT_DATA);
 		if (mopt == 0)
 			return (ENOBUFS);
 		mopt->m_len = JUMBOOPTLEN;
 		optbuf = mtod(mopt, u_char *);
 		optbuf[1] = 0;	/* = ((JUMBOOPTLEN) >> 3) - 1 */
 		exthdrs->ip6e_hbh = mopt;
 	} else {
 		struct ip6_hbh *hbh;
 
 		mopt = exthdrs->ip6e_hbh;
 		if (M_TRAILINGSPACE(mopt) < JUMBOOPTLEN) {
 			/*
 			 * XXX assumption:
 			 * - exthdrs->ip6e_hbh is not referenced from places
 			 *   other than exthdrs.
 			 * - exthdrs->ip6e_hbh is not an mbuf chain.
 			 */
 			int oldoptlen = mopt->m_len;
 			struct mbuf *n;
 
 			/*
 			 * XXX: give up if the whole (new) hbh header does
 			 * not fit even in an mbuf cluster.
 			 */
 			if (oldoptlen + JUMBOOPTLEN > MCLBYTES)
 				return (ENOBUFS);
 
 			/*
 			 * As a consequence, we must always prepare a cluster
 			 * at this point.
 			 */
 			MGET(n, M_DONTWAIT, MT_DATA);
 			if (n) {
 				MCLGET(n, M_DONTWAIT);
 				if ((n->m_flags & M_EXT) == 0) {
 					m_freem(n);
 					n = NULL;
 				}
 			}
 			if (!n)
 				return (ENOBUFS);
 			n->m_len = oldoptlen + JUMBOOPTLEN;
 			bcopy(mtod(mopt, caddr_t), mtod(n, caddr_t),
 			    oldoptlen);
 			optbuf = mtod(n, caddr_t) + oldoptlen;
 			m_freem(mopt);
 			mopt = exthdrs->ip6e_hbh = n;
 		} else {
 			optbuf = mtod(mopt, u_char *) + mopt->m_len;
 			mopt->m_len += JUMBOOPTLEN;
 		}
 		optbuf[0] = IP6OPT_PADN;
 		optbuf[1] = 1;
 
 		/*
 		 * Adjust the header length according to the pad and
 		 * the jumbo payload option.
 		 */
 		hbh = mtod(mopt, struct ip6_hbh *);
 		hbh->ip6h_len += (JUMBOOPTLEN >> 3);
 	}
 
 	/* fill in the option. */
 	optbuf[2] = IP6OPT_JUMBO;
 	optbuf[3] = 4;
 	v = (u_int32_t)htonl(plen + JUMBOOPTLEN);
 	bcopy(&v, &optbuf[4], sizeof(u_int32_t));
 
 	/* finally, adjust the packet header length */
 	exthdrs->ip6e_ip6->m_pkthdr.len += JUMBOOPTLEN;
 
 	return (0);
 #undef JUMBOOPTLEN
 }
 
 /*
  * Insert fragment header and copy unfragmentable header portions.
  */
 static int
 ip6_insertfraghdr(m0, m, hlen, frghdrp)
 	struct mbuf *m0, *m;
 	int hlen;
 	struct ip6_frag **frghdrp;
 {
 	struct mbuf *n, *mlast;
 
 	if (hlen > sizeof(struct ip6_hdr)) {
 		n = m_copym(m0, sizeof(struct ip6_hdr),
 		    hlen - sizeof(struct ip6_hdr), M_DONTWAIT);
 		if (n == 0)
 			return (ENOBUFS);
 		m->m_next = n;
 	} else
 		n = m;
 
 	/* Search for the last mbuf of unfragmentable part. */
 	for (mlast = n; mlast->m_next; mlast = mlast->m_next)
 		;
 
 	if ((mlast->m_flags & M_EXT) == 0 &&
 	    M_TRAILINGSPACE(mlast) >= sizeof(struct ip6_frag)) {
 		/* use the trailing space of the last mbuf for the fragment hdr */
 		*frghdrp = (struct ip6_frag *)(mtod(mlast, caddr_t) +
 		    mlast->m_len);
 		mlast->m_len += sizeof(struct ip6_frag);
 		m->m_pkthdr.len += sizeof(struct ip6_frag);
 	} else {
 		/* allocate a new mbuf for the fragment header */
 		struct mbuf *mfrg;
 
 		MGET(mfrg, M_DONTWAIT, MT_DATA);
 		if (mfrg == 0)
 			return (ENOBUFS);
 		mfrg->m_len = sizeof(struct ip6_frag);
 		*frghdrp = mtod(mfrg, struct ip6_frag *);
 		mlast->m_next = mfrg;
 	}
 
 	return (0);
 }
 
 static int
 ip6_getpmtu(ro_pmtu, ro, ifp, dst, mtup, alwaysfragp)
 	struct route_in6 *ro_pmtu, *ro;
 	struct ifnet *ifp;
 	struct in6_addr *dst;
 	u_long *mtup;
 	int *alwaysfragp;
 {
 	u_int32_t mtu = 0;
 	int alwaysfrag = 0;
 	int error = 0;
 
 	if (ro_pmtu != ro) {
 		/* The first hop and the final destination may differ. */
 		struct sockaddr_in6 *sa6_dst =
 		    (struct sockaddr_in6 *)&ro_pmtu->ro_dst;
 		if (ro_pmtu->ro_rt &&
 		    ((ro_pmtu->ro_rt->rt_flags & RTF_UP) == 0 ||
 		     !IN6_ARE_ADDR_EQUAL(&sa6_dst->sin6_addr, dst))) {
 			RTFREE(ro_pmtu->ro_rt);
 			ro_pmtu->ro_rt = (struct rtentry *)NULL;
 		}
 		if (ro_pmtu->ro_rt == NULL) {
 			bzero(sa6_dst, sizeof(*sa6_dst));
 			sa6_dst->sin6_family = AF_INET6;
 			sa6_dst->sin6_len = sizeof(struct sockaddr_in6);
 			sa6_dst->sin6_addr = *dst;
 
 			rtalloc((struct route *)ro_pmtu);
 		}
 	}
 	if (ro_pmtu->ro_rt) {
 		u_int32_t ifmtu;
 		struct in_conninfo inc;
 
 		bzero(&inc, sizeof(inc));
 		inc.inc_flags = 1; /* IPv6 */
 		inc.inc6_faddr = *dst;
 
 		if (ifp == NULL)
 			ifp = ro_pmtu->ro_rt->rt_ifp;
 		ifmtu = IN6_LINKMTU(ifp);
 		mtu = tcp_hc_getmtu(&inc);
 		if (mtu)
 			mtu = min(mtu, ro_pmtu->ro_rt->rt_rmx.rmx_mtu);
 		else
 			mtu = ro_pmtu->ro_rt->rt_rmx.rmx_mtu;
 		if (mtu == 0)
 			mtu = ifmtu;
 		else if (mtu < IPV6_MMTU) {
 			/*
 			 * RFC2460 section 5, last paragraph:
 			 * if we record ICMPv6 too big message with
 			 * mtu < IPV6_MMTU, transmit packets sized IPV6_MMTU
 			 * or smaller, with framgent header attached.
 			 * (fragment header is needed regardless from the
 			 * packet size, for translators to identify packets)
 			 */
 			alwaysfrag = 1;
 			mtu = IPV6_MMTU;
 		} else if (mtu > ifmtu) {
 			/*
 			 * The MTU on the route is larger than the MTU on
 			 * the interface!  This shouldn't happen, unless the
 			 * MTU of the interface has been changed after the
 			 * interface was brought up.  Change the MTU in the
 			 * route to match the interface MTU (as long as the
 			 * field isn't locked).
 			 */
 			mtu = ifmtu;
 			ro_pmtu->ro_rt->rt_rmx.rmx_mtu = mtu;
 		}
 	} else if (ifp) {
 		mtu = IN6_LINKMTU(ifp);
 	} else
 		error = EHOSTUNREACH; /* XXX */
 
 	*mtup = mtu;
 	if (alwaysfragp)
 		*alwaysfragp = alwaysfrag;
 	return (error);
 }
 
 /*
  * IP6 socket option processing.
  */
 int
 ip6_ctloutput(so, sopt)
 	struct socket *so;
 	struct sockopt *sopt;
 {
 	int privileged, optdatalen, uproto;
 	void *optdata;
 	struct inpcb *in6p = sotoinpcb(so);
 	int error, optval;
 	int level, op, optname;
 	int optlen;
 	struct thread *td;
 
 	if (sopt) {
 		level = sopt->sopt_level;
 		op = sopt->sopt_dir;
 		optname = sopt->sopt_name;
 		optlen = sopt->sopt_valsize;
 		td = sopt->sopt_td;
 	} else {
 		panic("ip6_ctloutput: arg soopt is NULL");
 	}
 	error = optval = 0;
 
 	privileged = (td == 0 || suser(td)) ? 0 : 1;
 	uproto = (int)so->so_proto->pr_protocol;
 
 	if (level == IPPROTO_IPV6) {
 		switch (op) {
 
 		case SOPT_SET:
 			switch (optname) {
 			case IPV6_2292PKTOPTIONS:
 #ifdef IPV6_PKTOPTIONS
 			case IPV6_PKTOPTIONS:
 #endif
 			{
 				struct mbuf *m;
 
 				error = soopt_getm(sopt, &m); /* XXX */
 				if (error != 0)
 					break;
 				error = soopt_mcopyin(sopt, m); /* XXX */
 				if (error != 0)
 					break;
 				error = ip6_pcbopts(&in6p->in6p_outputopts,
 						    m, so, sopt);
 				m_freem(m); /* XXX */
 				break;
 			}
 
 			/*
 			 * Use of some Hop-by-Hop options or some
 			 * Destination options, might require special
 			 * privilege.  That is, normal applications
 			 * (without special privilege) might be forbidden
 			 * from setting certain options in outgoing packets,
 			 * and might never see certain options in received
 			 * packets. [RFC 2292 Section 6]
 			 * KAME specific note:
 			 *  KAME prevents non-privileged users from sending or
 			 *  receiving ANY hbh/dst options in order to avoid
 			 *  overhead of parsing options in the kernel.
 			 */
 			case IPV6_RECVHOPOPTS:
 			case IPV6_RECVDSTOPTS:
 			case IPV6_RECVRTHDRDSTOPTS:
 				if (!privileged) {
 					error = EPERM;
 					break;
 				}
 				/* FALLTHROUGH */
 			case IPV6_UNICAST_HOPS:
 			case IPV6_HOPLIMIT:
 			case IPV6_FAITH:
 
 			case IPV6_RECVPKTINFO:
 			case IPV6_RECVHOPLIMIT:
 			case IPV6_RECVRTHDR:
 			case IPV6_RECVPATHMTU:
 			case IPV6_RECVTCLASS:
 			case IPV6_V6ONLY:
 			case IPV6_AUTOFLOWLABEL:
 				if (optlen != sizeof(int)) {
 					error = EINVAL;
 					break;
 				}
 				error = sooptcopyin(sopt, &optval,
 					sizeof optval, sizeof optval);
 				if (error)
 					break;
 				switch (optname) {
 
 				case IPV6_UNICAST_HOPS:
 					if (optval < -1 || optval >= 256)
 						error = EINVAL;
 					else {
 						/* -1 = kernel default */
 						in6p->in6p_hops = optval;
 						if ((in6p->in6p_vflag &
 						     INP_IPV4) != 0)
 							in6p->inp_ip_ttl = optval;
 					}
 					break;
 #define OPTSET(bit) \
 do { \
 	if (optval) \
 		in6p->in6p_flags |= (bit); \
 	else \
 		in6p->in6p_flags &= ~(bit); \
 } while (/*CONSTCOND*/ 0)
 #define OPTSET2292(bit) \
 do { \
 	in6p->in6p_flags |= IN6P_RFC2292; \
 	if (optval) \
 		in6p->in6p_flags |= (bit); \
 	else \
 		in6p->in6p_flags &= ~(bit); \
 } while (/*CONSTCOND*/ 0)
 #define OPTBIT(bit) (in6p->in6p_flags & (bit) ? 1 : 0)
 
 				case IPV6_RECVPKTINFO:
 					/* cannot mix with RFC2292 */
 					if (OPTBIT(IN6P_RFC2292)) {
 						error = EINVAL;
 						break;
 					}
 					OPTSET(IN6P_PKTINFO);
 					break;
 
 				case IPV6_HOPLIMIT:
 				{
 					struct ip6_pktopts **optp;
 
 					/* cannot mix with RFC2292 */
 					if (OPTBIT(IN6P_RFC2292)) {
 						error = EINVAL;
 						break;
 					}
 					optp = &in6p->in6p_outputopts;
 					error = ip6_pcbopt(IPV6_HOPLIMIT,
 							   (u_char *)&optval,
 							   sizeof(optval),
 							   optp,
 							   privileged, uproto);
 					break;
 				}
 
 				case IPV6_RECVHOPLIMIT:
 					/* cannot mix with RFC2292 */
 					if (OPTBIT(IN6P_RFC2292)) {
 						error = EINVAL;
 						break;
 					}
 					OPTSET(IN6P_HOPLIMIT);
 					break;
 
 				case IPV6_RECVHOPOPTS:
 					/* cannot mix with RFC2292 */
 					if (OPTBIT(IN6P_RFC2292)) {
 						error = EINVAL;
 						break;
 					}
 					OPTSET(IN6P_HOPOPTS);
 					break;
 
 				case IPV6_RECVDSTOPTS:
 					/* cannot mix with RFC2292 */
 					if (OPTBIT(IN6P_RFC2292)) {
 						error = EINVAL;
 						break;
 					}
 					OPTSET(IN6P_DSTOPTS);
 					break;
 
 				case IPV6_RECVRTHDRDSTOPTS:
 					/* cannot mix with RFC2292 */
 					if (OPTBIT(IN6P_RFC2292)) {
 						error = EINVAL;
 						break;
 					}
 					OPTSET(IN6P_RTHDRDSTOPTS);
 					break;
 
 				case IPV6_RECVRTHDR:
 					/* cannot mix with RFC2292 */
 					if (OPTBIT(IN6P_RFC2292)) {
 						error = EINVAL;
 						break;
 					}
 					OPTSET(IN6P_RTHDR);
 					break;
 
 				case IPV6_FAITH:
 					OPTSET(IN6P_FAITH);
 					break;
 
 				case IPV6_RECVPATHMTU:
 					/*
 					 * We ignore this option for TCP
 					 * sockets.
 					 * (RFC3542 leaves this case
 					 * unspecified.)
 					 */
 					if (uproto != IPPROTO_TCP)
 						OPTSET(IN6P_MTU);
 					break;
 
 				case IPV6_V6ONLY:
 					/*
 					 * make setsockopt(IPV6_V6ONLY)
 					 * available only prior to bind(2).
 					 * see ipng mailing list, Jun 22 2001.
 					 */
 					if (in6p->in6p_lport ||
 					    !IN6_IS_ADDR_UNSPECIFIED(&in6p->in6p_laddr)) {
 						error = EINVAL;
 						break;
 					}
 					OPTSET(IN6P_IPV6_V6ONLY);
 					if (optval)
 						in6p->in6p_vflag &= ~INP_IPV4;
 					else
 						in6p->in6p_vflag |= INP_IPV4;
 					break;
 				case IPV6_RECVTCLASS:
 					/* cannot mix with RFC2292 XXX */
 					if (OPTBIT(IN6P_RFC2292)) {
 						error = EINVAL;
 						break;
 					}
 					OPTSET(IN6P_TCLASS);
 					break;
 				case IPV6_AUTOFLOWLABEL:
 					OPTSET(IN6P_AUTOFLOWLABEL);
 					break;
 
 				}
 				break;
 
 			case IPV6_TCLASS:
 			case IPV6_DONTFRAG:
 			case IPV6_USE_MIN_MTU:
 			case IPV6_PREFER_TEMPADDR:
 				if (optlen != sizeof(optval)) {
 					error = EINVAL;
 					break;
 				}
 				error = sooptcopyin(sopt, &optval,
 					sizeof optval, sizeof optval);
 				if (error)
 					break;
 				{
 					struct ip6_pktopts **optp;
 					optp = &in6p->in6p_outputopts;
 					error = ip6_pcbopt(optname,
 							   (u_char *)&optval,
 							   sizeof(optval),
 							   optp,
 							   privileged, uproto);
 					break;
 				}
 
 			case IPV6_2292PKTINFO:
 			case IPV6_2292HOPLIMIT:
 			case IPV6_2292HOPOPTS:
 			case IPV6_2292DSTOPTS:
 			case IPV6_2292RTHDR:
 				/* RFC 2292 */
 				if (optlen != sizeof(int)) {
 					error = EINVAL;
 					break;
 				}
 				error = sooptcopyin(sopt, &optval,
 					sizeof optval, sizeof optval);
 				if (error)
 					break;
 				switch (optname) {
 				case IPV6_2292PKTINFO:
 					OPTSET2292(IN6P_PKTINFO);
 					break;
 				case IPV6_2292HOPLIMIT:
 					OPTSET2292(IN6P_HOPLIMIT);
 					break;
 				case IPV6_2292HOPOPTS:
 					/*
 					 * Check super-user privilege.
 					 * See comments for IPV6_RECVHOPOPTS.
 					 */
 					if (!privileged)
 						return (EPERM);
 					OPTSET2292(IN6P_HOPOPTS);
 					break;
 				case IPV6_2292DSTOPTS:
 					if (!privileged)
 						return (EPERM);
 					OPTSET2292(IN6P_DSTOPTS|IN6P_RTHDRDSTOPTS); /* XXX */
 					break;
 				case IPV6_2292RTHDR:
 					OPTSET2292(IN6P_RTHDR);
 					break;
 				}
 				break;
 			case IPV6_PKTINFO:
 			case IPV6_HOPOPTS:
 			case IPV6_RTHDR:
 			case IPV6_DSTOPTS:
 			case IPV6_RTHDRDSTOPTS:
 			case IPV6_NEXTHOP:
 			{
 				/* new advanced API (RFC3542) */
 				u_char *optbuf;
 				u_char optbuf_storage[MCLBYTES];
 				int optlen;
 				struct ip6_pktopts **optp;
 
 				/* cannot mix with RFC2292 */
 				if (OPTBIT(IN6P_RFC2292)) {
 					error = EINVAL;
 					break;
 				}
 
 				/*
 				 * We only ensure valsize is not too large
 				 * here.  Further validation will be done
 				 * later.
 				 */
 				error = sooptcopyin(sopt, optbuf_storage,
 				    sizeof(optbuf_storage), 0);
 				if (error)
 					break;
 				optlen = sopt->sopt_valsize;
 				optbuf = optbuf_storage;
 				optp = &in6p->in6p_outputopts;
 				error = ip6_pcbopt(optname,
 						   optbuf, optlen,
 						   optp, privileged, uproto);
 				break;
 			}
 #undef OPTSET
 
 			case IPV6_MULTICAST_IF:
 			case IPV6_MULTICAST_HOPS:
 			case IPV6_MULTICAST_LOOP:
 			case IPV6_JOIN_GROUP:
 			case IPV6_LEAVE_GROUP:
 			    {
 				if (sopt->sopt_valsize > MLEN) {
 					error = EMSGSIZE;
 					break;
 				}
 				/* XXX */
 			    }
 			    /* FALLTHROUGH */
 			    {
 				struct mbuf *m;
 
 				if (sopt->sopt_valsize > MCLBYTES) {
 					error = EMSGSIZE;
 					break;
 				}
 				/* XXX */
 				MGET(m, sopt->sopt_td ? M_WAIT : M_DONTWAIT, MT_DATA);
 				if (m == 0) {
 					error = ENOBUFS;
 					break;
 				}
 				if (sopt->sopt_valsize > MLEN) {
 					MCLGET(m, sopt->sopt_td ? M_WAIT : M_DONTWAIT);
 					if ((m->m_flags & M_EXT) == 0) {
 						m_free(m);
 						error = ENOBUFS;
 						break;
 					}
 				}
 				m->m_len = sopt->sopt_valsize;
 				error = sooptcopyin(sopt, mtod(m, char *),
 						    m->m_len, m->m_len);
 				if (error) {
 					(void)m_free(m);
 					break;
 				}
 				error =	ip6_setmoptions(sopt->sopt_name,
 							&in6p->in6p_moptions,
 							m);
 				(void)m_free(m);
 			    }
 				break;
 
 			case IPV6_PORTRANGE:
 				error = sooptcopyin(sopt, &optval,
 				    sizeof optval, sizeof optval);
 				if (error)
 					break;
 
 				switch (optval) {
 				case IPV6_PORTRANGE_DEFAULT:
 					in6p->in6p_flags &= ~(IN6P_LOWPORT);
 					in6p->in6p_flags &= ~(IN6P_HIGHPORT);
 					break;
 
 				case IPV6_PORTRANGE_HIGH:
 					in6p->in6p_flags &= ~(IN6P_LOWPORT);
 					in6p->in6p_flags |= IN6P_HIGHPORT;
 					break;
 
 				case IPV6_PORTRANGE_LOW:
 					in6p->in6p_flags &= ~(IN6P_HIGHPORT);
 					in6p->in6p_flags |= IN6P_LOWPORT;
 					break;
 
 				default:
 					error = EINVAL;
 					break;
 				}
 				break;
 
 #if defined(IPSEC) || defined(FAST_IPSEC)
 			case IPV6_IPSEC_POLICY:
 			    {
 				caddr_t req = NULL;
 				size_t len = 0;
 				struct mbuf *m;
 
 				if ((error = soopt_getm(sopt, &m)) != 0) /* XXX */
 					break;
 				if ((error = soopt_mcopyin(sopt, m)) != 0) /* XXX */
 					break;
 				if (m) {
 					req = mtod(m, caddr_t);
 					len = m->m_len;
 				}
 				error = ipsec6_set_policy(in6p, optname, req,
 							  len, privileged);
 				m_freem(m);
 			    }
 				break;
 #endif /* KAME IPSEC */
 
 			case IPV6_FW_ADD:
 			case IPV6_FW_DEL:
 			case IPV6_FW_FLUSH:
 			case IPV6_FW_ZERO:
 			    {
 				struct mbuf *m;
 				struct mbuf **mp = &m;
 
 				if (ip6_fw_ctl_ptr == NULL)
 					return EINVAL;
 				/* XXX */
 				if ((error = soopt_getm(sopt, &m)) != 0)
 					break;
 				/* XXX */
 				if ((error = soopt_mcopyin(sopt, m)) != 0)
 					break;
 				error = (*ip6_fw_ctl_ptr)(optname, mp);
 				m = *mp;
 			    }
 				break;
 
 			default:
 				error = ENOPROTOOPT;
 				break;
 			}
 			break;
 
 		case SOPT_GET:
 			switch (optname) {
 
 			case IPV6_2292PKTOPTIONS:
 #ifdef IPV6_PKTOPTIONS
 			case IPV6_PKTOPTIONS:
 #endif
 				/*
 				 * RFC3542 (effectively) deprecated the
 				 * semantics of the 2292-style pktoptions.
 				 * Since it was not reliable in nature (i.e.,
 				 * applications had to expect the lack of some
 				 * information after all), it would make sense
 				 * to simplify this part by always returning
 				 * empty data.
 				 */
 				sopt->sopt_valsize = 0;
 				break;
 
 			case IPV6_RECVHOPOPTS:
 			case IPV6_RECVDSTOPTS:
 			case IPV6_RECVRTHDRDSTOPTS:
 			case IPV6_UNICAST_HOPS:
 			case IPV6_RECVPKTINFO:
 			case IPV6_RECVHOPLIMIT:
 			case IPV6_RECVRTHDR:
 			case IPV6_RECVPATHMTU:
 
 			case IPV6_FAITH:
 			case IPV6_V6ONLY:
 			case IPV6_PORTRANGE:
 			case IPV6_RECVTCLASS:
 			case IPV6_AUTOFLOWLABEL:
 				switch (optname) {
 
 				case IPV6_RECVHOPOPTS:
 					optval = OPTBIT(IN6P_HOPOPTS);
 					break;
 
 				case IPV6_RECVDSTOPTS:
 					optval = OPTBIT(IN6P_DSTOPTS);
 					break;
 
 				case IPV6_RECVRTHDRDSTOPTS:
 					optval = OPTBIT(IN6P_RTHDRDSTOPTS);
 					break;
 
 				case IPV6_UNICAST_HOPS:
 					optval = in6p->in6p_hops;
 					break;
 
 				case IPV6_RECVPKTINFO:
 					optval = OPTBIT(IN6P_PKTINFO);
 					break;
 
 				case IPV6_RECVHOPLIMIT:
 					optval = OPTBIT(IN6P_HOPLIMIT);
 					break;
 
 				case IPV6_RECVRTHDR:
 					optval = OPTBIT(IN6P_RTHDR);
 					break;
 
 				case IPV6_RECVPATHMTU:
 					optval = OPTBIT(IN6P_MTU);
 					break;
 
 				case IPV6_FAITH:
 					optval = OPTBIT(IN6P_FAITH);
 					break;
 
 				case IPV6_V6ONLY:
 					optval = OPTBIT(IN6P_IPV6_V6ONLY);
 					break;
 
 				case IPV6_PORTRANGE:
 				    {
 					int flags;
 					flags = in6p->in6p_flags;
 					if (flags & IN6P_HIGHPORT)
 						optval = IPV6_PORTRANGE_HIGH;
 					else if (flags & IN6P_LOWPORT)
 						optval = IPV6_PORTRANGE_LOW;
 					else
 						optval = 0;
 					break;
 				    }
 				case IPV6_RECVTCLASS:
 					optval = OPTBIT(IN6P_TCLASS);
 					break;
 
 				case IPV6_AUTOFLOWLABEL:
 					optval = OPTBIT(IN6P_AUTOFLOWLABEL);
 					break;
 				}
 				if (error)
 					break;
 				error = sooptcopyout(sopt, &optval,
 					sizeof optval);
 				break;
 
 			case IPV6_PATHMTU:
 			{
 				u_long pmtu = 0;
 				struct ip6_mtuinfo mtuinfo;
 				struct route_in6 sro;
 
 				bzero(&sro, sizeof(sro));
 
 				if (!(so->so_state & SS_ISCONNECTED))
 					return (ENOTCONN);
 				/*
 				 * XXX: we dot not consider the case of source
 				 * routing, or optional information to specify
 				 * the outgoing interface.
 				 */
 				error = ip6_getpmtu(&sro, NULL, NULL,
 				    &in6p->in6p_faddr, &pmtu, NULL);
 				if (sro.ro_rt)
 					RTFREE(sro.ro_rt);
 				if (error)
 					break;
 				if (pmtu > IPV6_MAXPACKET)
 					pmtu = IPV6_MAXPACKET;
 
 				bzero(&mtuinfo, sizeof(mtuinfo));
 				mtuinfo.ip6m_mtu = (u_int32_t)pmtu;
 				optdata = (void *)&mtuinfo;
 				optdatalen = sizeof(mtuinfo);
 				error = sooptcopyout(sopt, optdata,
 				    optdatalen);
 				break;
 			}
 
 			case IPV6_2292PKTINFO:
 			case IPV6_2292HOPLIMIT:
 			case IPV6_2292HOPOPTS:
 			case IPV6_2292RTHDR:
 			case IPV6_2292DSTOPTS:
 				switch (optname) {
 				case IPV6_2292PKTINFO:
 					optval = OPTBIT(IN6P_PKTINFO);
 					break;
 				case IPV6_2292HOPLIMIT:
 					optval = OPTBIT(IN6P_HOPLIMIT);
 					break;
 				case IPV6_2292HOPOPTS:
 					optval = OPTBIT(IN6P_HOPOPTS);
 					break;
 				case IPV6_2292RTHDR:
 					optval = OPTBIT(IN6P_RTHDR);
 					break;
 				case IPV6_2292DSTOPTS:
 					optval = OPTBIT(IN6P_DSTOPTS|IN6P_RTHDRDSTOPTS);
 					break;
 				}
 				error = sooptcopyout(sopt, &optval,
 				    sizeof optval);
 				break;
 			case IPV6_PKTINFO:
 			case IPV6_HOPOPTS:
 			case IPV6_RTHDR:
 			case IPV6_DSTOPTS:
 			case IPV6_RTHDRDSTOPTS:
 			case IPV6_NEXTHOP:
 			case IPV6_TCLASS:
 			case IPV6_DONTFRAG:
 			case IPV6_USE_MIN_MTU:
 			case IPV6_PREFER_TEMPADDR:
 				error = ip6_getpcbopt(in6p->in6p_outputopts,
 				    optname, sopt);
 				break;
 
 			case IPV6_MULTICAST_IF:
 			case IPV6_MULTICAST_HOPS:
 			case IPV6_MULTICAST_LOOP:
 			case IPV6_JOIN_GROUP:
 			case IPV6_LEAVE_GROUP:
 			    {
 				struct mbuf *m;
 				error = ip6_getmoptions(sopt->sopt_name,
 				    in6p->in6p_moptions, &m);
 				if (error == 0)
 					error = sooptcopyout(sopt,
 					    mtod(m, char *), m->m_len);
 				m_freem(m);
 			    }
 				break;
 
 #if defined(IPSEC) || defined(FAST_IPSEC)
 			case IPV6_IPSEC_POLICY:
 			  {
 				caddr_t req = NULL;
 				size_t len = 0;
 				struct mbuf *m = NULL;
 				struct mbuf **mp = &m;
 				size_t ovalsize = sopt->sopt_valsize;
 				caddr_t oval = (caddr_t)sopt->sopt_val;
 
 				error = soopt_getm(sopt, &m); /* XXX */
 				if (error != 0)
 					break;
 				error = soopt_mcopyin(sopt, m); /* XXX */
 				if (error != 0)
 					break;
 				sopt->sopt_valsize = ovalsize;
 				sopt->sopt_val = oval;
 				if (m) {
 					req = mtod(m, caddr_t);
 					len = m->m_len;
 				}
 				error = ipsec6_get_policy(in6p, req, len, mp);
 				if (error == 0)
 					error = soopt_mcopyout(sopt, m); /* XXX */
 				if (error == 0 && m)
 					m_freem(m);
 				break;
 			  }
 #endif /* KAME IPSEC */
 
 			case IPV6_FW_GET:
 			  {
 				struct mbuf *m;
 				struct mbuf **mp = &m;
 
 				if (ip6_fw_ctl_ptr == NULL)
 			        {
 					return EINVAL;
 				}
 				error = (*ip6_fw_ctl_ptr)(optname, mp);
 				if (error == 0)
 					error = soopt_mcopyout(sopt, m); /* XXX */
 				if (error == 0 && m)
 					m_freem(m);
 			  }
 				break;
 
 			default:
 				error = ENOPROTOOPT;
 				break;
 			}
 			break;
 		}
 	} else {		/* level != IPPROTO_IPV6 */
 		error = EINVAL;
 	}
 	return (error);
 }
 
 int
 ip6_raw_ctloutput(so, sopt)
 	struct socket *so;
 	struct sockopt *sopt;
 {
 	int error = 0, optval, optlen;
 	const int icmp6off = offsetof(struct icmp6_hdr, icmp6_cksum);
 	struct in6pcb *in6p = sotoin6pcb(so);
 	int level, op, optname;
 
 	if (sopt) {
 		level = sopt->sopt_level;
 		op = sopt->sopt_dir;
 		optname = sopt->sopt_name;
 		optlen = sopt->sopt_valsize;
 	} else
 		panic("ip6_raw_ctloutput: arg soopt is NULL");
 
 	if (level != IPPROTO_IPV6) {
 		return (EINVAL);
 	}
 
 	switch (optname) {
 	case IPV6_CHECKSUM:
 		/*
 		 * For ICMPv6 sockets, no modification allowed for checksum
 		 * offset, permit "no change" values to help existing apps.
 		 *
 		 * RFC3542 says: "An attempt to set IPV6_CHECKSUM
 		 * for an ICMPv6 socket will fail."
 		 * The current behavior does not meet RFC3542.
 		 */
 		switch (op) {
 		case SOPT_SET:
 			if (optlen != sizeof(int)) {
 				error = EINVAL;
 				break;
 			}
 			error = sooptcopyin(sopt, &optval, sizeof(optval),
 					    sizeof(optval));
 			if (error)
 				break;
 			if ((optval % 2) != 0) {
 				/* the API assumes even offset values */
 				error = EINVAL;
 			} else if (so->so_proto->pr_protocol ==
 			    IPPROTO_ICMPV6) {
 				if (optval != icmp6off)
 					error = EINVAL;
 			} else
 				in6p->in6p_cksum = optval;
 			break;
 
 		case SOPT_GET:
 			if (so->so_proto->pr_protocol == IPPROTO_ICMPV6)
 				optval = icmp6off;
 			else
 				optval = in6p->in6p_cksum;
 
 			error = sooptcopyout(sopt, &optval, sizeof(optval));
 			break;
 
 		default:
 			error = EINVAL;
 			break;
 		}
 		break;
 
 	default:
 		error = ENOPROTOOPT;
 		break;
 	}
 
 	return (error);
 }
 
 /*
  * Set up IP6 options in pcb for insertion in output packets or
  * specifying behavior of outgoing packets.
  */
 static int
 ip6_pcbopts(pktopt, m, so, sopt)
 	struct ip6_pktopts **pktopt;
 	struct mbuf *m;
 	struct socket *so;
 	struct sockopt *sopt;
 {
 	struct ip6_pktopts *opt = *pktopt;
 	int error = 0;
 	struct thread *td = sopt->sopt_td;
 	int priv = 0;
 
 	/* turn off any old options. */
 	if (opt) {
 #ifdef DIAGNOSTIC
 		if (opt->ip6po_pktinfo || opt->ip6po_nexthop ||
 		    opt->ip6po_hbh || opt->ip6po_dest1 || opt->ip6po_dest2 ||
 		    opt->ip6po_rhinfo.ip6po_rhi_rthdr)
 			printf("ip6_pcbopts: all specified options are cleared.\n");
 #endif
 		ip6_clearpktopts(opt, -1);
 	} else
 		opt = malloc(sizeof(*opt), M_IP6OPT, M_WAITOK);
 	*pktopt = NULL;
 
 	if (!m || m->m_len == 0) {
 		/*
 		 * Only turning off any previous options, regardless of
 		 * whether the opt is just created or given.
 		 */
 		free(opt, M_IP6OPT);
 		return (0);
 	}
 
 	/*  set options specified by user. */
 	if (td && !suser(td))
 		priv = 1;
 	if ((error = ip6_setpktopts(m, opt, NULL, priv,
 	    so->so_proto->pr_protocol)) != 0) {
 		ip6_clearpktopts(opt, -1); /* XXX: discard all options */
 		free(opt, M_IP6OPT);
 		return (error);
 	}
 	*pktopt = opt;
 	return (0);
 }
 
 /*
  * initialize ip6_pktopts.  beware that there are non-zero default values in
  * the struct.
  */
 void
 ip6_initpktopts(opt)
 	struct ip6_pktopts *opt;
 {
 
 	bzero(opt, sizeof(*opt));
 	opt->ip6po_hlim = -1;	/* -1 means default hop limit */
 	opt->ip6po_tclass = -1;	/* -1 means default traffic class */
 	opt->ip6po_minmtu = IP6PO_MINMTU_MCASTONLY;
 	opt->ip6po_prefer_tempaddr = IP6PO_TEMPADDR_SYSTEM;
 }
 
 static int
 ip6_pcbopt(optname, buf, len, pktopt, priv, uproto)
 	int optname, len, priv;
 	u_char *buf;
 	struct ip6_pktopts **pktopt;
 	int uproto;
 {
 	struct ip6_pktopts *opt;
 
 	if (*pktopt == NULL) {
 		*pktopt = malloc(sizeof(struct ip6_pktopts), M_IP6OPT,
 		    M_WAITOK);
 		ip6_initpktopts(*pktopt);
 	}
 	opt = *pktopt;
 
 	return (ip6_setpktopt(optname, buf, len, opt, priv, 1, 0, uproto));
 }
 
 static int
 ip6_getpcbopt(pktopt, optname, sopt)
 	struct ip6_pktopts *pktopt;
 	struct sockopt *sopt;
 	int optname;
 {
 	void *optdata = NULL;
 	int optdatalen = 0;
 	struct ip6_ext *ip6e;
 	int error = 0;
 	struct in6_pktinfo null_pktinfo;
 	int deftclass = 0, on;
 	int defminmtu = IP6PO_MINMTU_MCASTONLY;
 	int defpreftemp = IP6PO_TEMPADDR_SYSTEM;
 
 	switch (optname) {
 	case IPV6_PKTINFO:
 		if (pktopt && pktopt->ip6po_pktinfo)
 			optdata = (void *)pktopt->ip6po_pktinfo;
 		else {
 			/* XXX: we don't have to do this every time... */
 			bzero(&null_pktinfo, sizeof(null_pktinfo));
 			optdata = (void *)&null_pktinfo;
 		}
 		optdatalen = sizeof(struct in6_pktinfo);
 		break;
 	case IPV6_TCLASS:
 		if (pktopt && pktopt->ip6po_tclass >= 0)
 			optdata = (void *)&pktopt->ip6po_tclass;
 		else
 			optdata = (void *)&deftclass;
 		optdatalen = sizeof(int);
 		break;
 	case IPV6_HOPOPTS:
 		if (pktopt && pktopt->ip6po_hbh) {
 			optdata = (void *)pktopt->ip6po_hbh;
 			ip6e = (struct ip6_ext *)pktopt->ip6po_hbh;
 			optdatalen = (ip6e->ip6e_len + 1) << 3;
 		}
 		break;
 	case IPV6_RTHDR:
 		if (pktopt && pktopt->ip6po_rthdr) {
 			optdata = (void *)pktopt->ip6po_rthdr;
 			ip6e = (struct ip6_ext *)pktopt->ip6po_rthdr;
 			optdatalen = (ip6e->ip6e_len + 1) << 3;
 		}
 		break;
 	case IPV6_RTHDRDSTOPTS:
 		if (pktopt && pktopt->ip6po_dest1) {
 			optdata = (void *)pktopt->ip6po_dest1;
 			ip6e = (struct ip6_ext *)pktopt->ip6po_dest1;
 			optdatalen = (ip6e->ip6e_len + 1) << 3;
 		}
 		break;
 	case IPV6_DSTOPTS:
 		if (pktopt && pktopt->ip6po_dest2) {
 			optdata = (void *)pktopt->ip6po_dest2;
 			ip6e = (struct ip6_ext *)pktopt->ip6po_dest2;
 			optdatalen = (ip6e->ip6e_len + 1) << 3;
 		}
 		break;
 	case IPV6_NEXTHOP:
 		if (pktopt && pktopt->ip6po_nexthop) {
 			optdata = (void *)pktopt->ip6po_nexthop;
 			optdatalen = pktopt->ip6po_nexthop->sa_len;
 		}
 		break;
 	case IPV6_USE_MIN_MTU:
 		if (pktopt)
 			optdata = (void *)&pktopt->ip6po_minmtu;
 		else
 			optdata = (void *)&defminmtu;
 		optdatalen = sizeof(int);
 		break;
 	case IPV6_DONTFRAG:
 		if (pktopt && ((pktopt->ip6po_flags) & IP6PO_DONTFRAG))
 			on = 1;
 		else
 			on = 0;
 		optdata = (void *)&on;
 		optdatalen = sizeof(on);
 		break;
 	case IPV6_PREFER_TEMPADDR:
 		if (pktopt)
 			optdata = (void *)&pktopt->ip6po_prefer_tempaddr;
 		else
 			optdata = (void *)&defpreftemp;
 		optdatalen = sizeof(int);
 		break;
 	default:		/* should not happen */
 #ifdef DIAGNOSTIC
 		panic("ip6_getpcbopt: unexpected option\n");
 #endif
 		return (ENOPROTOOPT);
 	}
 
 	error = sooptcopyout(sopt, optdata, optdatalen);
 
 	return (error);
 }
 
 void
 ip6_clearpktopts(pktopt, optname)
 	struct ip6_pktopts *pktopt;
 	int optname;
 {
 	if (pktopt == NULL)
 		return;
 
 	if (optname == -1 || optname == IPV6_PKTINFO) {
 		if (pktopt->ip6po_pktinfo)
 			free(pktopt->ip6po_pktinfo, M_IP6OPT);
 		pktopt->ip6po_pktinfo = NULL;
 	}
 	if (optname == -1 || optname == IPV6_HOPLIMIT)
 		pktopt->ip6po_hlim = -1;
 	if (optname == -1 || optname == IPV6_TCLASS)
 		pktopt->ip6po_tclass = -1;
 	if (optname == -1 || optname == IPV6_NEXTHOP) {
 		if (pktopt->ip6po_nextroute.ro_rt) {
 			RTFREE(pktopt->ip6po_nextroute.ro_rt);
 			pktopt->ip6po_nextroute.ro_rt = NULL;
 		}
 		if (pktopt->ip6po_nexthop)
 			free(pktopt->ip6po_nexthop, M_IP6OPT);
 		pktopt->ip6po_nexthop = NULL;
 	}
 	if (optname == -1 || optname == IPV6_HOPOPTS) {
 		if (pktopt->ip6po_hbh)
 			free(pktopt->ip6po_hbh, M_IP6OPT);
 		pktopt->ip6po_hbh = NULL;
 	}
 	if (optname == -1 || optname == IPV6_RTHDRDSTOPTS) {
 		if (pktopt->ip6po_dest1)
 			free(pktopt->ip6po_dest1, M_IP6OPT);
 		pktopt->ip6po_dest1 = NULL;
 	}
 	if (optname == -1 || optname == IPV6_RTHDR) {
 		if (pktopt->ip6po_rhinfo.ip6po_rhi_rthdr)
 			free(pktopt->ip6po_rhinfo.ip6po_rhi_rthdr, M_IP6OPT);
 		pktopt->ip6po_rhinfo.ip6po_rhi_rthdr = NULL;
 		if (pktopt->ip6po_route.ro_rt) {
 			RTFREE(pktopt->ip6po_route.ro_rt);
 			pktopt->ip6po_route.ro_rt = NULL;
 		}
 	}
 	if (optname == -1 || optname == IPV6_DSTOPTS) {
 		if (pktopt->ip6po_dest2)
 			free(pktopt->ip6po_dest2, M_IP6OPT);
 		pktopt->ip6po_dest2 = NULL;
 	}
 }
 
 #define PKTOPT_EXTHDRCPY(type) \
 do {\
 	if (src->type) {\
 		int hlen = (((struct ip6_ext *)src->type)->ip6e_len + 1) << 3;\
 		dst->type = malloc(hlen, M_IP6OPT, canwait);\
 		if (dst->type == NULL && canwait == M_NOWAIT)\
 			goto bad;\
 		bcopy(src->type, dst->type, hlen);\
 	}\
 } while (/*CONSTCOND*/ 0)
 
 static int
 copypktopts(dst, src, canwait)
 	struct ip6_pktopts *dst, *src;
 	int canwait;
 {
 	if (dst == NULL || src == NULL)  {
 		printf("ip6_clearpktopts: invalid argument\n");
 		return (EINVAL);
 	}
 
 	dst->ip6po_hlim = src->ip6po_hlim;
 	dst->ip6po_tclass = src->ip6po_tclass;
 	dst->ip6po_flags = src->ip6po_flags;
 	if (src->ip6po_pktinfo) {
 		dst->ip6po_pktinfo = malloc(sizeof(*dst->ip6po_pktinfo),
 		    M_IP6OPT, canwait);
 		if (dst->ip6po_pktinfo == NULL && canwait == M_NOWAIT)
 			goto bad;
 		*dst->ip6po_pktinfo = *src->ip6po_pktinfo;
 	}
 	if (src->ip6po_nexthop) {
 		dst->ip6po_nexthop = malloc(src->ip6po_nexthop->sa_len,
 		    M_IP6OPT, canwait);
 		if (dst->ip6po_nexthop == NULL)
 			goto bad;
 		bcopy(src->ip6po_nexthop, dst->ip6po_nexthop,
 		    src->ip6po_nexthop->sa_len);
 	}
 	PKTOPT_EXTHDRCPY(ip6po_hbh);
 	PKTOPT_EXTHDRCPY(ip6po_dest1);
 	PKTOPT_EXTHDRCPY(ip6po_dest2);
 	PKTOPT_EXTHDRCPY(ip6po_rthdr); /* not copy the cached route */
 	return (0);
 
   bad:
 	if (dst->ip6po_pktinfo) free(dst->ip6po_pktinfo, M_IP6OPT);
 	if (dst->ip6po_nexthop) free(dst->ip6po_nexthop, M_IP6OPT);
 	if (dst->ip6po_hbh) free(dst->ip6po_hbh, M_IP6OPT);
 	if (dst->ip6po_dest1) free(dst->ip6po_dest1, M_IP6OPT);
 	if (dst->ip6po_dest2) free(dst->ip6po_dest2, M_IP6OPT);
 	if (dst->ip6po_rthdr) free(dst->ip6po_rthdr, M_IP6OPT);
 	return (ENOBUFS);
 }
 #undef PKTOPT_EXTHDRCPY
 
 struct ip6_pktopts *
 ip6_copypktopts(src, canwait)
 	struct ip6_pktopts *src;
 	int canwait;
 {
 	int error;
 	struct ip6_pktopts *dst;
 
 	dst = malloc(sizeof(*dst), M_IP6OPT, canwait);
 	if (dst == NULL && canwait == M_NOWAIT)
 		return (NULL);
 	ip6_initpktopts(dst);
 
 	if ((error = copypktopts(dst, src, canwait)) != 0) {
 		free(dst, M_IP6OPT);
 		return (NULL);
 	}
 
 	return (dst);
 }
 
 void
 ip6_freepcbopts(pktopt)
 	struct ip6_pktopts *pktopt;
 {
 	if (pktopt == NULL)
 		return;
 
 	ip6_clearpktopts(pktopt, -1);
 
 	free(pktopt, M_IP6OPT);
 }
 
 /*
  * Set the IP6 multicast options in response to user setsockopt().
  */
 static int
 ip6_setmoptions(optname, im6op, m)
 	int optname;
 	struct ip6_moptions **im6op;
 	struct mbuf *m;
 {
 	int error = 0;
 	u_int loop, ifindex;
 	struct ipv6_mreq *mreq;
 	struct ifnet *ifp;
 	struct ip6_moptions *im6o = *im6op;
 	struct route_in6 ro;
 	struct in6_multi_mship *imm;
 	struct thread *td = curthread;
 
 	if (im6o == NULL) {
 		/*
 		 * No multicast option buffer attached to the pcb;
 		 * allocate one and initialize to default values.
 		 */
 		im6o = (struct ip6_moptions *)
 			malloc(sizeof(*im6o), M_IP6MOPTS, M_WAITOK);
 
 		if (im6o == NULL)
 			return (ENOBUFS);
 		*im6op = im6o;
 		im6o->im6o_multicast_ifp = NULL;
 		im6o->im6o_multicast_hlim = ip6_defmcasthlim;
 		im6o->im6o_multicast_loop = IPV6_DEFAULT_MULTICAST_LOOP;
 		LIST_INIT(&im6o->im6o_memberships);
 	}
 
 	switch (optname) {
 
 	case IPV6_MULTICAST_IF:
 		/*
 		 * Select the interface for outgoing multicast packets.
 		 */
 		if (m == NULL || m->m_len != sizeof(u_int)) {
 			error = EINVAL;
 			break;
 		}
 		bcopy(mtod(m, u_int *), &ifindex, sizeof(ifindex));
 		if (ifindex < 0 || if_index < ifindex) {
 			error = ENXIO;	/* XXX EINVAL? */
 			break;
 		}
 		ifp = ifnet_byindex(ifindex);
 		if (ifp == NULL || (ifp->if_flags & IFF_MULTICAST) == 0) {
 			error = EADDRNOTAVAIL;
 			break;
 		}
 		im6o->im6o_multicast_ifp = ifp;
 		break;
 
 	case IPV6_MULTICAST_HOPS:
 	    {
 		/*
 		 * Set the IP6 hoplimit for outgoing multicast packets.
 		 */
 		int optval;
 		if (m == NULL || m->m_len != sizeof(int)) {
 			error = EINVAL;
 			break;
 		}
 		bcopy(mtod(m, u_int *), &optval, sizeof(optval));
 		if (optval < -1 || optval >= 256)
 			error = EINVAL;
 		else if (optval == -1)
 			im6o->im6o_multicast_hlim = ip6_defmcasthlim;
 		else
 			im6o->im6o_multicast_hlim = optval;
 		break;
 	    }
 
 	case IPV6_MULTICAST_LOOP:
 		/*
 		 * Set the loopback flag for outgoing multicast packets.
 		 * Must be zero or one.
 		 */
 		if (m == NULL || m->m_len != sizeof(u_int)) {
 			error = EINVAL;
 			break;
 		}
 		bcopy(mtod(m, u_int *), &loop, sizeof(loop));
 		if (loop > 1) {
 			error = EINVAL;
 			break;
 		}
 		im6o->im6o_multicast_loop = loop;
 		break;
 
 	case IPV6_JOIN_GROUP:
 		/*
 		 * Add a multicast group membership.
 		 * Group must be a valid IP6 multicast address.
 		 */
 		if (m == NULL || m->m_len != sizeof(struct ipv6_mreq)) {
 			error = EINVAL;
 			break;
 		}
 		mreq = mtod(m, struct ipv6_mreq *);
 
 		if (IN6_IS_ADDR_UNSPECIFIED(&mreq->ipv6mr_multiaddr)) {
 			/*
 			 * We use the unspecified address to specify to accept
 			 * all multicast addresses. Only super user is allowed
 			 * to do this.
 			 */
 			if (suser(td)) {
 				error = EACCES;
 				break;
 			}
 		} else if (!IN6_IS_ADDR_MULTICAST(&mreq->ipv6mr_multiaddr)) {
 			error = EINVAL;
 			break;
 		}
 
 		/*
 		 * If no interface was explicitly specified, choose an
 		 * appropriate one according to the given multicast address.
 		 */
 		if (mreq->ipv6mr_interface == 0) {
 			struct sockaddr_in6 *dst;
 
 			/*
 			 * Look up the routing table for the
 			 * address, and choose the outgoing interface.
 			 *   XXX: is it a good approach?
 			 */
 			ro.ro_rt = NULL;
 			dst = (struct sockaddr_in6 *)&ro.ro_dst;
 			bzero(dst, sizeof(*dst));
 			dst->sin6_family = AF_INET6;
 			dst->sin6_len = sizeof(*dst);
 			dst->sin6_addr = mreq->ipv6mr_multiaddr;
 			rtalloc((struct route *)&ro);
 			if (ro.ro_rt == NULL) {
 				error = EADDRNOTAVAIL;
 				break;
 			}
 			ifp = ro.ro_rt->rt_ifp;
 			RTFREE(ro.ro_rt);
 		} else {
 			/*
 			 * If the interface is specified, validate it.
 			 */
 			if (mreq->ipv6mr_interface < 0 ||
 			    if_index < mreq->ipv6mr_interface) {
 				error = ENXIO;	/* XXX EINVAL? */
 				break;
 			}
 			ifp = ifnet_byindex(mreq->ipv6mr_interface);
 			if (!ifp) {
 				error = ENXIO;	/* XXX EINVAL? */
 				break;
 			}
 		}
 
 		/*
 		 * See if we found an interface, and confirm that it
 		 * supports multicast
 		 */
 		if (ifp == NULL || (ifp->if_flags & IFF_MULTICAST) == 0) {
 			error = EADDRNOTAVAIL;
 			break;
 		}
 
 		if (in6_setscope(&mreq->ipv6mr_multiaddr, ifp, NULL)) {
 			error = EADDRNOTAVAIL; /* XXX: should not happen */
 			break;
 		}
 
 		/*
 		 * See if the membership already exists.
 		 */
 		for (imm = im6o->im6o_memberships.lh_first;
 		     imm != NULL; imm = imm->i6mm_chain.le_next)
 			if (imm->i6mm_maddr->in6m_ifp == ifp &&
 			    IN6_ARE_ADDR_EQUAL(&imm->i6mm_maddr->in6m_addr,
 					       &mreq->ipv6mr_multiaddr))
 				break;
 		if (imm != NULL) {
 			error = EADDRINUSE;
 			break;
 		}
 		/*
 		 * Everything looks good; add a new record to the multicast
 		 * address list for the given interface.
 		 */
 		imm = in6_joingroup(ifp, &mreq->ipv6mr_multiaddr,  &error, 0);
 		if (imm == NULL)
 			break;
 		LIST_INSERT_HEAD(&im6o->im6o_memberships, imm, i6mm_chain);
 		break;
 
 	case IPV6_LEAVE_GROUP:
 		/*
 		 * Drop a multicast group membership.
 		 * Group must be a valid IP6 multicast address.
 		 */
 		if (m == NULL || m->m_len != sizeof(struct ipv6_mreq)) {
 			error = EINVAL;
 			break;
 		}
 		mreq = mtod(m, struct ipv6_mreq *);
 
 		/*
 		 * If an interface address was specified, get a pointer
 		 * to its ifnet structure.
 		 */
 		if (mreq->ipv6mr_interface < 0 ||
 		    if_index < mreq->ipv6mr_interface) {
 			error = ENXIO;	/* XXX EINVAL? */
 			break;
 		}
 		if (mreq->ipv6mr_interface == 0)
 			ifp = NULL;
 		else
 			ifp = ifnet_byindex(mreq->ipv6mr_interface);
 
 		/* Fill in the scope zone ID */
 		if (ifp) {
 			if (in6_setscope(&mreq->ipv6mr_multiaddr, ifp, NULL)) {
 				/* XXX: should not happen */
 				error = EADDRNOTAVAIL;
 				break;
 			}
 		} else if (mreq->ipv6mr_interface != 0) {
 			/*
 			 * This case happens when the (positive) index is in
 			 * the valid range, but the corresponding interface has
 			 * been detached dynamically (XXX).
 			 */
 			error = EADDRNOTAVAIL;
 			break;
 		} else {	/* ipv6mr_interface == 0 */
 			struct sockaddr_in6 sa6_mc;
 
 			/*
 			 * The API spec says as follows:
 			 *  If the interface index is specified as 0, the
 			 *  system may choose a multicast group membership to
 			 *  drop by matching the multicast address only.
 			 * On the other hand, we cannot disambiguate the scope
 			 * zone unless an interface is provided.  Thus, we
 			 * check if there's ambiguity with the default scope
 			 * zone as the last resort.
 			 */
 			bzero(&sa6_mc, sizeof(sa6_mc));
 			sa6_mc.sin6_family = AF_INET6;
 			sa6_mc.sin6_len = sizeof(sa6_mc);
 			sa6_mc.sin6_addr = mreq->ipv6mr_multiaddr;
 			error = sa6_embedscope(&sa6_mc, ip6_use_defzone);
 			if (error != 0)
 				break;
 			mreq->ipv6mr_multiaddr = sa6_mc.sin6_addr;
 		}
 
 		/*
 		 * Find the membership in the membership list.
 		 */
 		for (imm = im6o->im6o_memberships.lh_first;
 		     imm != NULL; imm = imm->i6mm_chain.le_next) {
 			if ((ifp == NULL || imm->i6mm_maddr->in6m_ifp == ifp) &&
 			    IN6_ARE_ADDR_EQUAL(&imm->i6mm_maddr->in6m_addr,
 			    &mreq->ipv6mr_multiaddr))
 				break;
 		}
 		if (imm == NULL) {
 			/* Unable to resolve interface */
 			error = EADDRNOTAVAIL;
 			break;
 		}
 		/*
 		 * Give up the multicast address record to which the
 		 * membership points.
 		 */
 		LIST_REMOVE(imm, i6mm_chain);
 		in6_delmulti(imm->i6mm_maddr);
 		free(imm, M_IP6MADDR);
 		break;
 
 	default:
 		error = EOPNOTSUPP;
 		break;
 	}
 
 	/*
 	 * If all options have default values, no need to keep the mbuf.
 	 */
 	if (im6o->im6o_multicast_ifp == NULL &&
 	    im6o->im6o_multicast_hlim == ip6_defmcasthlim &&
 	    im6o->im6o_multicast_loop == IPV6_DEFAULT_MULTICAST_LOOP &&
 	    im6o->im6o_memberships.lh_first == NULL) {
 		free(*im6op, M_IP6MOPTS);
 		*im6op = NULL;
 	}
 
 	return (error);
 }
 
 /*
  * Return the IP6 multicast options in response to user getsockopt().
  */
 static int
 ip6_getmoptions(optname, im6o, mp)
 	int optname;
 	struct ip6_moptions *im6o;
 	struct mbuf **mp;
 {
 	u_int *hlim, *loop, *ifindex;
 
 	*mp = m_get(M_TRYWAIT, MT_HEADER);		/* XXX */
 
 	switch (optname) {
 
 	case IPV6_MULTICAST_IF:
 		ifindex = mtod(*mp, u_int *);
 		(*mp)->m_len = sizeof(u_int);
 		if (im6o == NULL || im6o->im6o_multicast_ifp == NULL)
 			*ifindex = 0;
 		else
 			*ifindex = im6o->im6o_multicast_ifp->if_index;
 		return (0);
 
 	case IPV6_MULTICAST_HOPS:
 		hlim = mtod(*mp, u_int *);
 		(*mp)->m_len = sizeof(u_int);
 		if (im6o == NULL)
 			*hlim = ip6_defmcasthlim;
 		else
 			*hlim = im6o->im6o_multicast_hlim;
 		return (0);
 
 	case IPV6_MULTICAST_LOOP:
 		loop = mtod(*mp, u_int *);
 		(*mp)->m_len = sizeof(u_int);
 		if (im6o == NULL)
 			*loop = ip6_defmcasthlim;
 		else
 			*loop = im6o->im6o_multicast_loop;
 		return (0);
 
 	default:
 		return (EOPNOTSUPP);
 	}
 }
 
 /*
  * Discard the IP6 multicast options.
  */
 void
 ip6_freemoptions(im6o)
 	struct ip6_moptions *im6o;
 {
 	struct in6_multi_mship *imm;
 
 	if (im6o == NULL)
 		return;
 
 	while ((imm = im6o->im6o_memberships.lh_first) != NULL) {
 		LIST_REMOVE(imm, i6mm_chain);
 		if (imm->i6mm_maddr)
 			in6_delmulti(imm->i6mm_maddr);
 		free(imm, M_IP6MADDR);
 	}
 	free(im6o, M_IP6MOPTS);
 }
 
 /*
  * Set IPv6 outgoing packet options based on advanced API.
  */
 int
 ip6_setpktopts(control, opt, stickyopt, priv, uproto)
 	struct mbuf *control;
 	struct ip6_pktopts *opt, *stickyopt;
 	int priv, uproto;
 {
 	struct cmsghdr *cm = 0;
 
 	if (control == NULL || opt == NULL)
 		return (EINVAL);
 
 	ip6_initpktopts(opt);
 	if (stickyopt) {
 		int error;
 
 		/*
 		 * If stickyopt is provided, make a local copy of the options
 		 * for this particular packet, then override them by ancillary
 		 * objects.
 		 * XXX: copypktopts() does not copy the cached route to a next
 		 * hop (if any).  This is not very good in terms of efficiency,
 		 * but we can allow this since this option should be rarely
 		 * used.
 		 */
 		if ((error = copypktopts(opt, stickyopt, M_NOWAIT)) != 0)
 			return (error);
 	}
 
 	/*
 	 * XXX: Currently, we assume all the optional information is stored
 	 * in a single mbuf.
 	 */
 	if (control->m_next)
 		return (EINVAL);
 
 	for (; control->m_len; control->m_data += CMSG_ALIGN(cm->cmsg_len),
 	    control->m_len -= CMSG_ALIGN(cm->cmsg_len)) {
 		int error;
 
 		if (control->m_len < CMSG_LEN(0))
 			return (EINVAL);
 
 		cm = mtod(control, struct cmsghdr *);
 		if (cm->cmsg_len == 0 || cm->cmsg_len > control->m_len)
 			return (EINVAL);
 		if (cm->cmsg_level != IPPROTO_IPV6)
 			continue;
 
 		error = ip6_setpktopt(cm->cmsg_type, CMSG_DATA(cm),
 		    cm->cmsg_len - CMSG_LEN(0), opt, priv, 0, 1, uproto);
 		if (error)
 			return (error);
 	}
 
 	return (0);
 }
 
 /*
  * Set a particular packet option, as a sticky option or an ancillary data
  * item.  "len" can be 0 only when it's a sticky option.
  * We have 4 cases of combination of "sticky" and "cmsg":
  * "sticky=0, cmsg=0": impossible
  * "sticky=0, cmsg=1": RFC2292 or RFC3542 ancillary data
  * "sticky=1, cmsg=0": RFC3542 socket option
  * "sticky=1, cmsg=1": RFC2292 socket option
  */
 static int
 ip6_setpktopt(optname, buf, len, opt, priv, sticky, cmsg, uproto)
 	int optname, len, priv, sticky, cmsg, uproto;
 	u_char *buf;
 	struct ip6_pktopts *opt;
 {
 	int minmtupolicy, preftemp;
 
 	if (!sticky && !cmsg) {
 #ifdef DIAGNOSTIC
 		printf("ip6_setpktopt: impossible case\n");
 #endif
 		return (EINVAL);
 	}
 
 	/*
 	 * IPV6_2292xxx is for backward compatibility to RFC2292, and should
 	 * not be specified in the context of RFC3542.  Conversely,
 	 * RFC3542 types should not be specified in the context of RFC2292.
 	 */
 	if (!cmsg) {
 		switch (optname) {
 		case IPV6_2292PKTINFO:
 		case IPV6_2292HOPLIMIT:
 		case IPV6_2292NEXTHOP:
 		case IPV6_2292HOPOPTS:
 		case IPV6_2292DSTOPTS:
 		case IPV6_2292RTHDR:
 		case IPV6_2292PKTOPTIONS:
 			return (ENOPROTOOPT);
 		}
 	}
 	if (sticky && cmsg) {
 		switch (optname) {
 		case IPV6_PKTINFO:
 		case IPV6_HOPLIMIT:
 		case IPV6_NEXTHOP:
 		case IPV6_HOPOPTS:
 		case IPV6_DSTOPTS:
 		case IPV6_RTHDRDSTOPTS:
 		case IPV6_RTHDR:
 		case IPV6_USE_MIN_MTU:
 		case IPV6_DONTFRAG:
 		case IPV6_TCLASS:
 		case IPV6_PREFER_TEMPADDR: /* XXX: not an RFC3542 option */
 			return (ENOPROTOOPT);
 		}
 	}
 
 	switch (optname) {
 	case IPV6_2292PKTINFO:
 	case IPV6_PKTINFO:
 	{
 		struct ifnet *ifp = NULL;
 		struct in6_pktinfo *pktinfo;
 
 		if (len != sizeof(struct in6_pktinfo))
 			return (EINVAL);
 
 		pktinfo = (struct in6_pktinfo *)buf;
 
 		/*
 		 * An application can clear any sticky IPV6_PKTINFO option by
 		 * doing a "regular" setsockopt with ipi6_addr being
 		 * in6addr_any and ipi6_ifindex being zero.
 		 * [RFC 3542, Section 6]
 		 */
 		if (optname == IPV6_PKTINFO && opt->ip6po_pktinfo &&
 		    pktinfo->ipi6_ifindex == 0 &&
 		    IN6_IS_ADDR_UNSPECIFIED(&pktinfo->ipi6_addr)) {
 			ip6_clearpktopts(opt, optname);
 			break;
 		}
 
 		if (uproto == IPPROTO_TCP && optname == IPV6_PKTINFO &&
 		    sticky && !IN6_IS_ADDR_UNSPECIFIED(&pktinfo->ipi6_addr)) {
 			return (EINVAL);
 		}
 
 		/* validate the interface index if specified. */
 		if (pktinfo->ipi6_ifindex > if_index ||
 		    pktinfo->ipi6_ifindex < 0) {
 			 return (ENXIO);
 		}
 		if (pktinfo->ipi6_ifindex) {
 			ifp = ifnet_byindex(pktinfo->ipi6_ifindex);
 			if (ifp == NULL)
 				return (ENXIO);
 		}
 
 		/*
 		 * We store the address anyway, and let in6_selectsrc()
 		 * validate the specified address.  This is because ipi6_addr
 		 * may not have enough information about its scope zone, and
 		 * we may need additional information (such as outgoing
 		 * interface or the scope zone of a destination address) to
 		 * disambiguate the scope.
 		 * XXX: the delay of the validation may confuse the
 		 * application when it is used as a sticky option.
 		 */
 		if (opt->ip6po_pktinfo == NULL) {
 			opt->ip6po_pktinfo = malloc(sizeof(*pktinfo),
 			    M_IP6OPT, M_NOWAIT);
 			if (opt->ip6po_pktinfo == NULL)
 				return (ENOBUFS);
 		}
 		bcopy(pktinfo, opt->ip6po_pktinfo, sizeof(*pktinfo));
 		break;
 	}
 
 	case IPV6_2292HOPLIMIT:
 	case IPV6_HOPLIMIT:
 	{
 		int *hlimp;
 
 		/*
 		 * RFC 3542 deprecated the usage of sticky IPV6_HOPLIMIT
 		 * to simplify the ordering among hoplimit options.
 		 */
 		if (optname == IPV6_HOPLIMIT && sticky)
 			return (ENOPROTOOPT);
 
 		if (len != sizeof(int))
 			return (EINVAL);
 		hlimp = (int *)buf;
 		if (*hlimp < -1 || *hlimp > 255)
 			return (EINVAL);
 
 		opt->ip6po_hlim = *hlimp;
 		break;
 	}
 
 	case IPV6_TCLASS:
 	{
 		int tclass;
 
 		if (len != sizeof(int))
 			return (EINVAL);
 		tclass = *(int *)buf;
 		if (tclass < -1 || tclass > 255)
 			return (EINVAL);
 
 		opt->ip6po_tclass = tclass;
 		break;
 	}
 
 	case IPV6_2292NEXTHOP:
 	case IPV6_NEXTHOP:
 		if (!priv)
 			return (EPERM);
 
 		if (len == 0) {	/* just remove the option */
 			ip6_clearpktopts(opt, IPV6_NEXTHOP);
 			break;
 		}
 
 		/* check if cmsg_len is large enough for sa_len */
 		if (len < sizeof(struct sockaddr) || len < *buf)
 			return (EINVAL);
 
 		switch (((struct sockaddr *)buf)->sa_family) {
 		case AF_INET6:
 		{
 			struct sockaddr_in6 *sa6 = (struct sockaddr_in6 *)buf;
 			int error;
 
 			if (sa6->sin6_len != sizeof(struct sockaddr_in6))
 				return (EINVAL);
 
 			if (IN6_IS_ADDR_UNSPECIFIED(&sa6->sin6_addr) ||
 			    IN6_IS_ADDR_MULTICAST(&sa6->sin6_addr)) {
 				return (EINVAL);
 			}
 			if ((error = sa6_embedscope(sa6, ip6_use_defzone))
 			    != 0) {
 				return (error);
 			}
 			break;
 		}
 		case AF_LINK:	/* should eventually be supported */
 		default:
 			return (EAFNOSUPPORT);
 		}
 
 		/* turn off the previous option, then set the new option. */
 		ip6_clearpktopts(opt, IPV6_NEXTHOP);
 		opt->ip6po_nexthop = malloc(*buf, M_IP6OPT, M_NOWAIT);
 		if (opt->ip6po_nexthop == NULL)
 			return (ENOBUFS);
 		bcopy(buf, opt->ip6po_nexthop, *buf);
 		break;
 
 	case IPV6_2292HOPOPTS:
 	case IPV6_HOPOPTS:
 	{
 		struct ip6_hbh *hbh;
 		int hbhlen;
 
 		/*
 		 * XXX: We don't allow a non-privileged user to set ANY HbH
 		 * options, since per-option restriction has too much
 		 * overhead.
 		 */
 		if (!priv)
 			return (EPERM);
 
 		if (len == 0) {
 			ip6_clearpktopts(opt, IPV6_HOPOPTS);
 			break;	/* just remove the option */
 		}
 
 		/* message length validation */
 		if (len < sizeof(struct ip6_hbh))
 			return (EINVAL);
 		hbh = (struct ip6_hbh *)buf;
 		hbhlen = (hbh->ip6h_len + 1) << 3;
 		if (len != hbhlen)
 			return (EINVAL);
 
 		/* turn off the previous option, then set the new option. */
 		ip6_clearpktopts(opt, IPV6_HOPOPTS);
 		opt->ip6po_hbh = malloc(hbhlen, M_IP6OPT, M_NOWAIT);
 		if (opt->ip6po_hbh == NULL)
 			return (ENOBUFS);
 		bcopy(hbh, opt->ip6po_hbh, hbhlen);
 
 		break;
 	}
 
 	case IPV6_2292DSTOPTS:
 	case IPV6_DSTOPTS:
 	case IPV6_RTHDRDSTOPTS:
 	{
 		struct ip6_dest *dest, **newdest = NULL;
 		int destlen;
 
 		if (!priv)	/* XXX: see the comment for IPV6_HOPOPTS */
 			return (EPERM);
 
 		if (len == 0) {
 			ip6_clearpktopts(opt, optname);
 			break;	/* just remove the option */
 		}
 
 		/* message length validation */
 		if (len < sizeof(struct ip6_dest))
 			return (EINVAL);
 		dest = (struct ip6_dest *)buf;
 		destlen = (dest->ip6d_len + 1) << 3;
 		if (len != destlen)
 			return (EINVAL);
 
 		/*
 		 * Determine the position that the destination options header
 		 * should be inserted; before or after the routing header.
 		 */
 		switch (optname) {
 		case IPV6_2292DSTOPTS:
 			/*
 			 * The old advacned API is ambiguous on this point.
 			 * Our approach is to determine the position based
 			 * according to the existence of a routing header.
 			 * Note, however, that this depends on the order of the
 			 * extension headers in the ancillary data; the 1st
 			 * part of the destination options header must appear
 			 * before the routing header in the ancillary data,
 			 * too.
 			 * RFC3542 solved the ambiguity by introducing
 			 * separate ancillary data or option types.
 			 */
 			if (opt->ip6po_rthdr == NULL)
 				newdest = &opt->ip6po_dest1;
 			else
 				newdest = &opt->ip6po_dest2;
 			break;
 		case IPV6_RTHDRDSTOPTS:
 			newdest = &opt->ip6po_dest1;
 			break;
 		case IPV6_DSTOPTS:
 			newdest = &opt->ip6po_dest2;
 			break;
 		}
 
 		/* turn off the previous option, then set the new option. */
 		ip6_clearpktopts(opt, optname);
 		*newdest = malloc(destlen, M_IP6OPT, M_NOWAIT);
 		if (*newdest == NULL)
 			return (ENOBUFS);
 		bcopy(dest, *newdest, destlen);
 
 		break;
 	}
 
 	case IPV6_2292RTHDR:
 	case IPV6_RTHDR:
 	{
 		struct ip6_rthdr *rth;
 		int rthlen;
 
 		if (len == 0) {
 			ip6_clearpktopts(opt, IPV6_RTHDR);
 			break;	/* just remove the option */
 		}
 
 		/* message length validation */
 		if (len < sizeof(struct ip6_rthdr))
 			return (EINVAL);
 		rth = (struct ip6_rthdr *)buf;
 		rthlen = (rth->ip6r_len + 1) << 3;
 		if (len != rthlen)
 			return (EINVAL);
 
 		switch (rth->ip6r_type) {
 		case IPV6_RTHDR_TYPE_0:
 			if (rth->ip6r_len == 0)	/* must contain one addr */
 				return (EINVAL);
 			if (rth->ip6r_len % 2) /* length must be even */
 				return (EINVAL);
 			if (rth->ip6r_len / 2 != rth->ip6r_segleft)
 				return (EINVAL);
 			break;
 		default:
 			return (EINVAL);	/* not supported */
 		}
 
 		/* turn off the previous option */
 		ip6_clearpktopts(opt, IPV6_RTHDR);
 		opt->ip6po_rthdr = malloc(rthlen, M_IP6OPT, M_NOWAIT);
 		if (opt->ip6po_rthdr == NULL)
 			return (ENOBUFS);
 		bcopy(rth, opt->ip6po_rthdr, rthlen);
 
 		break;
 	}
 
 	case IPV6_USE_MIN_MTU:
 		if (len != sizeof(int))
 			return (EINVAL);
 		minmtupolicy = *(int *)buf;
 		if (minmtupolicy != IP6PO_MINMTU_MCASTONLY &&
 		    minmtupolicy != IP6PO_MINMTU_DISABLE &&
 		    minmtupolicy != IP6PO_MINMTU_ALL) {
 			return (EINVAL);
 		}
 		opt->ip6po_minmtu = minmtupolicy;
 		break;
 
 	case IPV6_DONTFRAG:
 		if (len != sizeof(int))
 			return (EINVAL);
 
 		if (uproto == IPPROTO_TCP || *(int *)buf == 0) {
 			/*
 			 * we ignore this option for TCP sockets.
 			 * (RFC3542 leaves this case unspecified.)
 			 */
 			opt->ip6po_flags &= ~IP6PO_DONTFRAG;
 		} else
 			opt->ip6po_flags |= IP6PO_DONTFRAG;
 		break;
 
 	case IPV6_PREFER_TEMPADDR:
 		if (len != sizeof(int))
 			return (EINVAL);
 		preftemp = *(int *)buf;
 		if (preftemp != IP6PO_TEMPADDR_SYSTEM &&
 		    preftemp != IP6PO_TEMPADDR_NOTPREFER &&
 		    preftemp != IP6PO_TEMPADDR_PREFER) {
 			return (EINVAL);
 		}
 		opt->ip6po_prefer_tempaddr = preftemp;
 		break;
 
 	default:
 		return (ENOPROTOOPT);
 	} /* end of switch */
 
 	return (0);
 }
 
 /*
  * Routine called from ip6_output() to loop back a copy of an IP6 multicast
  * packet to the input queue of a specified interface.  Note that this
  * calls the output routine of the loopback "driver", but with an interface
  * pointer that might NOT be &loif -- easier than replicating that code here.
  */
 void
 ip6_mloopback(ifp, m, dst)
 	struct ifnet *ifp;
 	struct mbuf *m;
 	struct sockaddr_in6 *dst;
 {
 	struct mbuf *copym;
 	struct ip6_hdr *ip6;
 
 	copym = m_copy(m, 0, M_COPYALL);
 	if (copym == NULL)
 		return;
 
 	/*
 	 * Make sure to deep-copy IPv6 header portion in case the data
 	 * is in an mbuf cluster, so that we can safely override the IPv6
 	 * header portion later.
 	 */
 	if ((copym->m_flags & M_EXT) != 0 ||
 	    copym->m_len < sizeof(struct ip6_hdr)) {
 		copym = m_pullup(copym, sizeof(struct ip6_hdr));
 		if (copym == NULL)
 			return;
 	}
 
 #ifdef DIAGNOSTIC
 	if (copym->m_len < sizeof(*ip6)) {
 		m_freem(copym);
 		return;
 	}
 #endif
 
 	ip6 = mtod(copym, struct ip6_hdr *);
 	/*
 	 * clear embedded scope identifiers if necessary.
 	 * in6_clearscope will touch the addresses only when necessary.
 	 */
 	in6_clearscope(&ip6->ip6_src);
 	in6_clearscope(&ip6->ip6_dst);
 
 	(void)if_simloop(ifp, copym, dst->sin6_family, 0);
 }
 
 /*
  * Chop IPv6 header off from the payload.
  */
 static int
 ip6_splithdr(m, exthdrs)
 	struct mbuf *m;
 	struct ip6_exthdrs *exthdrs;
 {
 	struct mbuf *mh;
 	struct ip6_hdr *ip6;
 
 	ip6 = mtod(m, struct ip6_hdr *);
 	if (m->m_len > sizeof(*ip6)) {
 		MGETHDR(mh, M_DONTWAIT, MT_HEADER);
 		if (mh == 0) {
 			m_freem(m);
 			return ENOBUFS;
 		}
 		M_MOVE_PKTHDR(mh, m);
 		MH_ALIGN(mh, sizeof(*ip6));
 		m->m_len -= sizeof(*ip6);
 		m->m_data += sizeof(*ip6);
 		mh->m_next = m;
 		m = mh;
 		m->m_len = sizeof(*ip6);
 		bcopy((caddr_t)ip6, mtod(m, caddr_t), sizeof(*ip6));
 	}
 	exthdrs->ip6e_ip6 = m;
 	return 0;
 }
 
 /*
  * Compute IPv6 extension header length.
  */
 int
 ip6_optlen(in6p)
 	struct in6pcb *in6p;
 {
 	int len;
 
 	if (!in6p->in6p_outputopts)
 		return 0;
 
 	len = 0;
 #define elen(x) \
     (((struct ip6_ext *)(x)) ? (((struct ip6_ext *)(x))->ip6e_len + 1) << 3 : 0)
 
 	len += elen(in6p->in6p_outputopts->ip6po_hbh);
 	if (in6p->in6p_outputopts->ip6po_rthdr)
 		/* dest1 is valid with rthdr only */
 		len += elen(in6p->in6p_outputopts->ip6po_dest1);
 	len += elen(in6p->in6p_outputopts->ip6po_rthdr);
 	len += elen(in6p->in6p_outputopts->ip6po_dest2);
 	return len;
 #undef elen
 }