Index: head/sys/net/pfil.c
===================================================================
--- head/sys/net/pfil.c	(revision 343618)
+++ head/sys/net/pfil.c	(revision 343619)
@@ -1,474 +1,475 @@
 /*	$FreeBSD$ */
 /*	$NetBSD: pfil.c,v 1.20 2001/11/12 23:49:46 lukem Exp $	*/
 
 /*-
  * SPDX-License-Identifier: BSD-3-Clause
  *
  * Copyright (c) 1996 Matthew R. Green
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 3. The name of the author may not be used to endorse or promote products
  *    derived from this software without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
  * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
  * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
  * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
  * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  */
 
 #include <sys/param.h>
 #include <sys/kernel.h>
 #include <sys/errno.h>
 #include <sys/lock.h>
 #include <sys/malloc.h>
 #include <sys/rmlock.h>
 #include <sys/socket.h>
 #include <sys/socketvar.h>
 #include <sys/systm.h>
 #include <sys/condvar.h>
 #include <sys/lock.h>
 #include <sys/mutex.h>
 #include <sys/proc.h>
 #include <sys/queue.h>
 
 #include <net/if.h>
 #include <net/if_var.h>
 #include <net/pfil.h>
 
 static struct mtx pfil_global_lock;
 
 MTX_SYSINIT(pfil_heads_lock, &pfil_global_lock, "pfil_head_list lock",
   MTX_DEF);
 
 static struct packet_filter_hook *pfil_chain_get(int, struct pfil_head *);
 static int pfil_chain_add(pfil_chain_t *, struct packet_filter_hook *, int);
 static int pfil_chain_remove(pfil_chain_t *, void *, void *);
 static int pfil_add_hook_priv(void *, void *, int, struct pfil_head *, bool);
 
 LIST_HEAD(pfilheadhead, pfil_head);
 VNET_DEFINE(struct pfilheadhead, pfil_head_list);
 #define	V_pfil_head_list	VNET(pfil_head_list)
 VNET_DEFINE(struct rmlock, pfil_lock);
+#define	V_pfil_lock	VNET(pfil_lock)
 
 #define	PFIL_LOCK_INIT_REAL(l, t)	\
 	rm_init_flags(l, "PFil " t " rmlock", RM_RECURSE)
 #define	PFIL_LOCK_DESTROY_REAL(l)	\
 	rm_destroy(l)
 #define	PFIL_LOCK_INIT(p)	do {			\
 	if ((p)->flags & PFIL_FLAG_PRIVATE_LOCK) {	\
 		PFIL_LOCK_INIT_REAL(&(p)->ph_lock, "private");	\
 		(p)->ph_plock = &(p)->ph_lock;		\
 	} else						\
 		(p)->ph_plock = &V_pfil_lock;		\
 } while (0)
 #define	PFIL_LOCK_DESTROY(p)	do {			\
 	if ((p)->flags & PFIL_FLAG_PRIVATE_LOCK)	\
 		PFIL_LOCK_DESTROY_REAL((p)->ph_plock);	\
 } while (0)
 
 #define	PFIL_TRY_RLOCK(p, t)	rm_try_rlock((p)->ph_plock, (t))
 #define	PFIL_RLOCK(p, t)	rm_rlock((p)->ph_plock, (t))
 #define	PFIL_WLOCK(p)		rm_wlock((p)->ph_plock)
 #define	PFIL_RUNLOCK(p, t)	rm_runlock((p)->ph_plock, (t))
 #define	PFIL_WUNLOCK(p)		rm_wunlock((p)->ph_plock)
 #define	PFIL_WOWNED(p)		rm_wowned((p)->ph_plock)
 
 #define	PFIL_HEADLIST_LOCK()	mtx_lock(&pfil_global_lock)
 #define	PFIL_HEADLIST_UNLOCK()	mtx_unlock(&pfil_global_lock)
 
 /*
  * pfil_run_hooks() runs the specified packet filter hook chain.
  */
 int
 pfil_run_hooks(struct pfil_head *ph, struct mbuf **mp, struct ifnet *ifp,
     int dir, int flags, struct inpcb *inp)
 {
 	struct rm_priotracker rmpt;
 	struct packet_filter_hook *pfh;
 	struct mbuf *m = *mp;
 	int rv = 0;
 
 	PFIL_RLOCK(ph, &rmpt);
 	KASSERT(ph->ph_nhooks >= 0, ("Pfil hook count dropped < 0"));
 	for (pfh = pfil_chain_get(dir, ph); pfh != NULL;
 	     pfh = TAILQ_NEXT(pfh, pfil_chain)) {
 		if (pfh->pfil_func_flags != NULL) {
 			rv = (*pfh->pfil_func_flags)(pfh->pfil_arg, &m, ifp,
 			    dir, flags, inp);
 			if (rv != 0 || m == NULL)
 				break;
 		}
 		if (pfh->pfil_func != NULL) {
 			rv = (*pfh->pfil_func)(pfh->pfil_arg, &m, ifp, dir,
 			    inp);
 			if (rv != 0 || m == NULL)
 				break;
 		}
 	}
 	PFIL_RUNLOCK(ph, &rmpt);
 	*mp = m;
 	return (rv);
 }
 
 static struct packet_filter_hook *
 pfil_chain_get(int dir, struct pfil_head *ph)
 {
 
 	if (dir == PFIL_IN)
 		return (TAILQ_FIRST(&ph->ph_in));
 	else if (dir == PFIL_OUT)
 		return (TAILQ_FIRST(&ph->ph_out));
 	else
 		return (NULL);
 }
 
 /*
  * pfil_try_rlock() acquires rm reader lock for specified head
  * if this is immediately possible.
  */
 int
 pfil_try_rlock(struct pfil_head *ph, struct rm_priotracker *tracker)
 {
 
 	return (PFIL_TRY_RLOCK(ph, tracker));
 }
 
 /*
  * pfil_rlock() acquires rm reader lock for specified head.
  */
 void
 pfil_rlock(struct pfil_head *ph, struct rm_priotracker *tracker)
 {
 
 	PFIL_RLOCK(ph, tracker);
 }
 
 /*
  * pfil_runlock() releases reader lock for specified head.
  */
 void
 pfil_runlock(struct pfil_head *ph, struct rm_priotracker *tracker)
 {
 
 	PFIL_RUNLOCK(ph, tracker);
 }
 
 /*
  * pfil_wlock() acquires writer lock for specified head.
  */
 void
 pfil_wlock(struct pfil_head *ph)
 {
 
 	PFIL_WLOCK(ph);
 }
 
 /*
  * pfil_wunlock() releases writer lock for specified head.
  */
 void
 pfil_wunlock(struct pfil_head *ph)
 {
 
 	PFIL_WUNLOCK(ph);
 }
 
 /*
  * pfil_wowned() returns a non-zero value if the current thread owns
  * an exclusive lock.
  */
 int
 pfil_wowned(struct pfil_head *ph)
 {
 
 	return (PFIL_WOWNED(ph));
 }
 
 /*
  * pfil_head_register() registers a pfil_head with the packet filter hook
  * mechanism.
  */
 int
 pfil_head_register(struct pfil_head *ph)
 {
 	struct pfil_head *lph;
 
 	PFIL_HEADLIST_LOCK();
 	LIST_FOREACH(lph, &V_pfil_head_list, ph_list) {
 		if (ph->ph_type == lph->ph_type &&
 		    ph->ph_un.phu_val == lph->ph_un.phu_val) {
 			PFIL_HEADLIST_UNLOCK();
 			return (EEXIST);
 		}
 	}
 	PFIL_LOCK_INIT(ph);
 	ph->ph_nhooks = 0;
 	TAILQ_INIT(&ph->ph_in);
 	TAILQ_INIT(&ph->ph_out);
 	LIST_INSERT_HEAD(&V_pfil_head_list, ph, ph_list);
 	PFIL_HEADLIST_UNLOCK();
 	return (0);
 }
 
 /*
  * pfil_head_unregister() removes a pfil_head from the packet filter hook
  * mechanism.  The producer of the hook promises that all outstanding
  * invocations of the hook have completed before it unregisters the hook.
  */
 int
 pfil_head_unregister(struct pfil_head *ph)
 {
 	struct packet_filter_hook *pfh, *pfnext;
 		
 	PFIL_HEADLIST_LOCK();
 	LIST_REMOVE(ph, ph_list);
 	PFIL_HEADLIST_UNLOCK();
 	TAILQ_FOREACH_SAFE(pfh, &ph->ph_in, pfil_chain, pfnext)
 		free(pfh, M_IFADDR);
 	TAILQ_FOREACH_SAFE(pfh, &ph->ph_out, pfil_chain, pfnext)
 		free(pfh, M_IFADDR);
 	PFIL_LOCK_DESTROY(ph);
 	return (0);
 }
 
 /*
  * pfil_head_get() returns the pfil_head for a given key/dlt.
  */
 struct pfil_head *
 pfil_head_get(int type, u_long val)
 {
 	struct pfil_head *ph;
 
 	PFIL_HEADLIST_LOCK();
 	LIST_FOREACH(ph, &V_pfil_head_list, ph_list)
 		if (ph->ph_type == type && ph->ph_un.phu_val == val)
 			break;
 	PFIL_HEADLIST_UNLOCK();
 	return (ph);
 }
 
 /*
  * pfil_add_hook_flags() adds a function to the packet filter hook.  the
  * flags are:
  *	PFIL_IN		call me on incoming packets
  *	PFIL_OUT	call me on outgoing packets
  *	PFIL_ALL	call me on all of the above
  *	PFIL_WAITOK	OK to call malloc with M_WAITOK.
  */
 int
 pfil_add_hook_flags(pfil_func_flags_t func, void *arg, int flags,
     struct pfil_head *ph)
 {
 	return (pfil_add_hook_priv(func, arg, flags, ph, true));
 }
 
 /*
  * pfil_add_hook() adds a function to the packet filter hook.  the
  * flags are:
  *	PFIL_IN		call me on incoming packets
  *	PFIL_OUT	call me on outgoing packets
  *	PFIL_ALL	call me on all of the above
  *	PFIL_WAITOK	OK to call malloc with M_WAITOK.
  */
 int
 pfil_add_hook(pfil_func_t func, void *arg, int flags, struct pfil_head *ph)
 {
 	return (pfil_add_hook_priv(func, arg, flags, ph, false));
 }
 
 static int
 pfil_add_hook_priv(void *func, void *arg, int flags,
     struct pfil_head *ph, bool hasflags)
 {
 	struct packet_filter_hook *pfh1 = NULL;
 	struct packet_filter_hook *pfh2 = NULL;
 	int err;
 
 	if (flags & PFIL_IN) {
 		pfh1 = (struct packet_filter_hook *)malloc(sizeof(*pfh1), 
 		    M_IFADDR, (flags & PFIL_WAITOK) ? M_WAITOK : M_NOWAIT);
 		if (pfh1 == NULL) {
 			err = ENOMEM;
 			goto error;
 		}
 	}
 	if (flags & PFIL_OUT) {
 		pfh2 = (struct packet_filter_hook *)malloc(sizeof(*pfh1),
 		    M_IFADDR, (flags & PFIL_WAITOK) ? M_WAITOK : M_NOWAIT);
 		if (pfh2 == NULL) {
 			err = ENOMEM;
 			goto error;
 		}
 	}
 	PFIL_WLOCK(ph);
 	if (flags & PFIL_IN) {
 		pfh1->pfil_func_flags = hasflags ? func : NULL;
 		pfh1->pfil_func = hasflags ? NULL : func;
 		pfh1->pfil_arg = arg;
 		err = pfil_chain_add(&ph->ph_in, pfh1, flags & ~PFIL_OUT);
 		if (err)
 			goto locked_error;
 		ph->ph_nhooks++;
 	}
 	if (flags & PFIL_OUT) {
 		pfh2->pfil_func_flags = hasflags ? func : NULL;
 		pfh2->pfil_func = hasflags ? NULL : func;
 		pfh2->pfil_arg = arg;
 		err = pfil_chain_add(&ph->ph_out, pfh2, flags & ~PFIL_IN);
 		if (err) {
 			if (flags & PFIL_IN)
 				pfil_chain_remove(&ph->ph_in, func, arg);
 			goto locked_error;
 		}
 		ph->ph_nhooks++;
 	}
 	PFIL_WUNLOCK(ph);
 	return (0);
 locked_error:
 	PFIL_WUNLOCK(ph);
 error:
 	if (pfh1 != NULL)
 		free(pfh1, M_IFADDR);
 	if (pfh2 != NULL)
 		free(pfh2, M_IFADDR);
 	return (err);
 }
 
 /*
  * pfil_remove_hook_flags removes a specific function from the packet filter hook
  * chain.
  */
 int
 pfil_remove_hook_flags(pfil_func_flags_t func, void *arg, int flags,
     struct pfil_head *ph)
 {
 	return (pfil_remove_hook((pfil_func_t)func, arg, flags, ph));
 }
 
 /*
  * pfil_remove_hook removes a specific function from the packet filter hook
  * chain.
  */
 int
 pfil_remove_hook(pfil_func_t func, void *arg, int flags, struct pfil_head *ph)
 {
 	int err = 0;
 
 	PFIL_WLOCK(ph);
 	if (flags & PFIL_IN) {
 		err = pfil_chain_remove(&ph->ph_in, func, arg);
 		if (err == 0)
 			ph->ph_nhooks--;
 	}
 	if ((err == 0) && (flags & PFIL_OUT)) {
 		err = pfil_chain_remove(&ph->ph_out, func, arg);
 		if (err == 0)
 			ph->ph_nhooks--;
 	}
 	PFIL_WUNLOCK(ph);
 	return (err);
 }
 
 /*
  * Internal: Add a new pfil hook into a hook chain.
  */
 static int
 pfil_chain_add(pfil_chain_t *chain, struct packet_filter_hook *pfh1, int flags)
 {
 	struct packet_filter_hook *pfh;
 
 	/*
 	 * First make sure the hook is not already there.
 	 */
 	TAILQ_FOREACH(pfh, chain, pfil_chain)
 		if (((pfh->pfil_func != NULL && pfh->pfil_func == pfh1->pfil_func) ||
 		    (pfh->pfil_func_flags != NULL &&
 		     pfh->pfil_func_flags == pfh1->pfil_func_flags)) &&
 		    pfh->pfil_arg == pfh1->pfil_arg)
 			return (EEXIST);
 
 	/*
 	 * Insert the input list in reverse order of the output list so that
 	 * the same path is followed in or out of the kernel.
 	 */
 	if (flags & PFIL_IN)
 		TAILQ_INSERT_HEAD(chain, pfh1, pfil_chain);
 	else
 		TAILQ_INSERT_TAIL(chain, pfh1, pfil_chain);
 	return (0);
 }
 
 /*
  * Internal: Remove a pfil hook from a hook chain.
  */
 static int
 pfil_chain_remove(pfil_chain_t *chain, void *func, void *arg)
 {
 	struct packet_filter_hook *pfh;
 
 	TAILQ_FOREACH(pfh, chain, pfil_chain)
 		if ((pfh->pfil_func == func || pfh->pfil_func_flags == func) &&
 		    pfh->pfil_arg == arg) {
 			TAILQ_REMOVE(chain, pfh, pfil_chain);
 			free(pfh, M_IFADDR);
 			return (0);
 		}
 	return (ENOENT);
 }
 
 /*
  * Stuff that must be initialized for every instance (including the first of
  * course).
  */
 static void
 vnet_pfil_init(const void *unused __unused)
 {
 
 	LIST_INIT(&V_pfil_head_list);
 	PFIL_LOCK_INIT_REAL(&V_pfil_lock, "shared");
 }
 
 /*
  * Called for the removal of each instance.
  */
 static void
 vnet_pfil_uninit(const void *unused __unused)
 {
 
 	KASSERT(LIST_EMPTY(&V_pfil_head_list),
 	    ("%s: pfil_head_list %p not empty", __func__, &V_pfil_head_list));
 	PFIL_LOCK_DESTROY_REAL(&V_pfil_lock);
 }
 
 /*
  * Starting up.
  *
  * VNET_SYSINIT is called for each existing vnet and each new vnet.
  * Make sure the pfil bits are first before any possible subsystem which
  * might piggyback on the SI_SUB_PROTO_PFIL.
  */
 VNET_SYSINIT(vnet_pfil_init, SI_SUB_PROTO_PFIL, SI_ORDER_FIRST,
     vnet_pfil_init, NULL);
  
 /*
  * Closing up shop.  These are done in REVERSE ORDER.  Not called on reboot.
  *
  * VNET_SYSUNINIT is called for each exiting vnet as it exits.
  */
 VNET_SYSUNINIT(vnet_pfil_uninit, SI_SUB_PROTO_PFIL, SI_ORDER_FIRST,
     vnet_pfil_uninit, NULL);
Index: head/sys/net/pfil.h
===================================================================
--- head/sys/net/pfil.h	(revision 343618)
+++ head/sys/net/pfil.h	(revision 343619)
@@ -1,131 +1,127 @@
 /*	$FreeBSD$ */
 /*	$NetBSD: pfil.h,v 1.22 2003/06/23 12:57:08 martin Exp $	*/
 
 /*-
  * SPDX-License-Identifier: BSD-3-Clause
  *
  * Copyright (c) 1996 Matthew R. Green
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 3. The name of the author may not be used to endorse or promote products
  *    derived from this software without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
  * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
  * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
  * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
  * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  */
 
 #ifndef _NET_PFIL_H_
 #define _NET_PFIL_H_
 
 #include <sys/systm.h>
 #include <sys/queue.h>
 #include <sys/_lock.h>
 #include <sys/_mutex.h>
 #include <sys/lock.h>
 #include <sys/rmlock.h>
-#include <net/vnet.h>
 
 struct mbuf;
 struct ifnet;
 struct inpcb;
 
 typedef	int	(*pfil_func_t)(void *, struct mbuf **, struct ifnet *, int,
 		    struct inpcb *);
 typedef	int	(*pfil_func_flags_t)(void *, struct mbuf **, struct ifnet *,
 		    int, int, struct inpcb *);
 
 /*
  * The packet filter hooks are designed for anything to call them to
  * possibly intercept the packet.  Multiple filter hooks are chained
  * together and after each other in the specified order.
  */
 struct packet_filter_hook {
 	TAILQ_ENTRY(packet_filter_hook) pfil_chain;
 	pfil_func_t		 pfil_func;
 	pfil_func_flags_t	 pfil_func_flags;
 	void			*pfil_arg;
 };
 
 #define PFIL_IN		0x00000001
 #define PFIL_OUT	0x00000002
 #define PFIL_WAITOK	0x00000004
 #define PFIL_FWD	0x00000008
 #define PFIL_ALL	(PFIL_IN|PFIL_OUT)
 
 typedef	TAILQ_HEAD(pfil_chain, packet_filter_hook) pfil_chain_t;
 
 #define	PFIL_TYPE_AF		1	/* key is AF_* type */
 #define	PFIL_TYPE_IFNET		2	/* key is ifnet pointer */
 
 #define	PFIL_FLAG_PRIVATE_LOCK	0x01	/* Personal lock instead of global */
 
 /*
  * A pfil head is created by each protocol or packet intercept point.
  * For packet is then run through the hook chain for inspection.
  */
 struct pfil_head {
 	pfil_chain_t	 ph_in;
 	pfil_chain_t	 ph_out;
 	int		 ph_type;
 	int		 ph_nhooks;
 #if defined( __linux__ ) || defined( _WIN32 )
 	rwlock_t	 ph_mtx;
 #else
 	struct rmlock	*ph_plock;	/* Pointer to the used lock */
 	struct rmlock	 ph_lock;	/* Private lock storage */
 	int		 flags;
 #endif
 	union {
 		u_long	 phu_val;
 		void	*phu_ptr;
 	} ph_un;
 #define	ph_af		 ph_un.phu_val
 #define	ph_ifnet	 ph_un.phu_ptr
 	LIST_ENTRY(pfil_head) ph_list;
 };
-
-VNET_DECLARE(struct rmlock, pfil_lock);
-#define	V_pfil_lock	VNET(pfil_lock)
 
 /* Public functions for pfil hook management by packet filters. */
 struct pfil_head *pfil_head_get(int, u_long);
 int	pfil_add_hook_flags(pfil_func_flags_t, void *, int, struct pfil_head *);
 int	pfil_add_hook(pfil_func_t, void *, int, struct pfil_head *);
 int	pfil_remove_hook_flags(pfil_func_flags_t, void *, int, struct pfil_head *);
 int	pfil_remove_hook(pfil_func_t, void *, int, struct pfil_head *);
 #define	PFIL_HOOKED(p) ((p)->ph_nhooks > 0)
 
 /* Public functions to run the packet inspection by protocols. */
 int	pfil_run_hooks(struct pfil_head *, struct mbuf **, struct ifnet *, int,
     int, struct inpcb *inp);
 
 /* Public functions for pfil head management by protocols. */
 int	pfil_head_register(struct pfil_head *);
 int	pfil_head_unregister(struct pfil_head *);
 
 /* Public pfil locking functions for self managed locks by packet filters. */
 int	pfil_try_rlock(struct pfil_head *, struct rm_priotracker *);
 void	pfil_rlock(struct pfil_head *, struct rm_priotracker *);
 void	pfil_runlock(struct pfil_head *, struct rm_priotracker *);
 void	pfil_wlock(struct pfil_head *);
 void	pfil_wunlock(struct pfil_head *);
 int	pfil_wowned(struct pfil_head *ph);
 
 #endif /* _NET_PFIL_H_ */
Index: head/sys/netpfil/ipfw/ip_fw2.c
===================================================================
--- head/sys/netpfil/ipfw/ip_fw2.c	(revision 343618)
+++ head/sys/netpfil/ipfw/ip_fw2.c	(revision 343619)
@@ -1,3452 +1,3453 @@
 /*-
  * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
  *
  * Copyright (c) 2002-2009 Luigi Rizzo, Universita` di Pisa
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 /*
  * The FreeBSD IP packet firewall, main file
  */
 
 #include "opt_ipfw.h"
 #include "opt_ipdivert.h"
 #include "opt_inet.h"
 #ifndef INET
 #error "IPFIREWALL requires INET"
 #endif /* INET */
 #include "opt_inet6.h"
 
 #include <sys/param.h>
 #include <sys/systm.h>
 #include <sys/condvar.h>
 #include <sys/counter.h>
 #include <sys/eventhandler.h>
 #include <sys/malloc.h>
 #include <sys/mbuf.h>
 #include <sys/kernel.h>
 #include <sys/lock.h>
 #include <sys/jail.h>
 #include <sys/module.h>
 #include <sys/priv.h>
 #include <sys/proc.h>
 #include <sys/rwlock.h>
 #include <sys/rmlock.h>
 #include <sys/socket.h>
 #include <sys/socketvar.h>
 #include <sys/sysctl.h>
 #include <sys/syslog.h>
 #include <sys/ucred.h>
 #include <net/ethernet.h> /* for ETHERTYPE_IP */
 #include <net/if.h>
 #include <net/if_var.h>
 #include <net/route.h>
 #include <net/pfil.h>
 #include <net/vnet.h>
 
 #include <netpfil/pf/pf_mtag.h>
 
 #include <netinet/in.h>
 #include <netinet/in_var.h>
 #include <netinet/in_pcb.h>
 #include <netinet/ip.h>
 #include <netinet/ip_var.h>
 #include <netinet/ip_icmp.h>
 #include <netinet/ip_fw.h>
 #include <netinet/ip_carp.h>
 #include <netinet/pim.h>
 #include <netinet/tcp_var.h>
 #include <netinet/udp.h>
 #include <netinet/udp_var.h>
 #include <netinet/sctp.h>
 #include <netinet/sctp_crc32.h>
 #include <netinet/sctp_header.h>
 
 #include <netinet/ip6.h>
 #include <netinet/icmp6.h>
 #include <netinet/in_fib.h>
 #ifdef INET6
 #include <netinet6/in6_fib.h>
 #include <netinet6/in6_pcb.h>
 #include <netinet6/scope6_var.h>
 #include <netinet6/ip6_var.h>
 #endif
 
 #include <net/if_gre.h> /* for struct grehdr */
 
 #include <netpfil/ipfw/ip_fw_private.h>
 
 #include <machine/in_cksum.h>	/* XXX for in_cksum */
 
 #ifdef MAC
 #include <security/mac/mac_framework.h>
 #endif
 
 /*
  * static variables followed by global ones.
  * All ipfw global variables are here.
  */
 
 VNET_DEFINE_STATIC(int, fw_deny_unknown_exthdrs);
 #define	V_fw_deny_unknown_exthdrs	VNET(fw_deny_unknown_exthdrs)
 
 VNET_DEFINE_STATIC(int, fw_permit_single_frag6) = 1;
 #define	V_fw_permit_single_frag6	VNET(fw_permit_single_frag6)
 
 #ifdef IPFIREWALL_DEFAULT_TO_ACCEPT
 static int default_to_accept = 1;
 #else
 static int default_to_accept;
 #endif
 
 VNET_DEFINE(int, autoinc_step);
 VNET_DEFINE(int, fw_one_pass) = 1;
 
 VNET_DEFINE(unsigned int, fw_tables_max);
 VNET_DEFINE(unsigned int, fw_tables_sets) = 0;	/* Don't use set-aware tables */
 /* Use 128 tables by default */
 static unsigned int default_fw_tables = IPFW_TABLES_DEFAULT;
 
 #ifndef LINEAR_SKIPTO
 static int jump_fast(struct ip_fw_chain *chain, struct ip_fw *f, int num,
     int tablearg, int jump_backwards);
 #define	JUMP(ch, f, num, targ, back)	jump_fast(ch, f, num, targ, back)
 #else
 static int jump_linear(struct ip_fw_chain *chain, struct ip_fw *f, int num,
     int tablearg, int jump_backwards);
 #define	JUMP(ch, f, num, targ, back)	jump_linear(ch, f, num, targ, back)
 #endif
 
 /*
  * Each rule belongs to one of 32 different sets (0..31).
  * The variable set_disable contains one bit per set.
  * If the bit is set, all rules in the corresponding set
  * are disabled. Set RESVD_SET(31) is reserved for the default rule
  * and rules that are not deleted by the flush command,
  * and CANNOT be disabled.
  * Rules in set RESVD_SET can only be deleted individually.
  */
 VNET_DEFINE(u_int32_t, set_disable);
 #define	V_set_disable			VNET(set_disable)
 
 VNET_DEFINE(int, fw_verbose);
 /* counter for ipfw_log(NULL...) */
 VNET_DEFINE(u_int64_t, norule_counter);
 VNET_DEFINE(int, verbose_limit);
 
 /* layer3_chain contains the list of rules for layer 3 */
 VNET_DEFINE(struct ip_fw_chain, layer3_chain);
 
 /* ipfw_vnet_ready controls when we are open for business */
 VNET_DEFINE(int, ipfw_vnet_ready) = 0;
 
 VNET_DEFINE(int, ipfw_nat_ready) = 0;
 
 ipfw_nat_t *ipfw_nat_ptr = NULL;
 struct cfg_nat *(*lookup_nat_ptr)(struct nat_list *, int);
 ipfw_nat_cfg_t *ipfw_nat_cfg_ptr;
 ipfw_nat_cfg_t *ipfw_nat_del_ptr;
 ipfw_nat_cfg_t *ipfw_nat_get_cfg_ptr;
 ipfw_nat_cfg_t *ipfw_nat_get_log_ptr;
 
 #ifdef SYSCTL_NODE
 uint32_t dummy_def = IPFW_DEFAULT_RULE;
 static int sysctl_ipfw_table_num(SYSCTL_HANDLER_ARGS);
 static int sysctl_ipfw_tables_sets(SYSCTL_HANDLER_ARGS);
 
 SYSBEGIN(f3)
 
 SYSCTL_NODE(_net_inet_ip, OID_AUTO, fw, CTLFLAG_RW, 0, "Firewall");
 SYSCTL_INT(_net_inet_ip_fw, OID_AUTO, one_pass,
     CTLFLAG_VNET | CTLFLAG_RW | CTLFLAG_SECURE3, &VNET_NAME(fw_one_pass), 0,
     "Only do a single pass through ipfw when using dummynet(4)");
 SYSCTL_INT(_net_inet_ip_fw, OID_AUTO, autoinc_step,
     CTLFLAG_VNET | CTLFLAG_RW, &VNET_NAME(autoinc_step), 0,
     "Rule number auto-increment step");
 SYSCTL_INT(_net_inet_ip_fw, OID_AUTO, verbose,
     CTLFLAG_VNET | CTLFLAG_RW | CTLFLAG_SECURE3, &VNET_NAME(fw_verbose), 0,
     "Log matches to ipfw rules");
 SYSCTL_INT(_net_inet_ip_fw, OID_AUTO, verbose_limit,
     CTLFLAG_VNET | CTLFLAG_RW, &VNET_NAME(verbose_limit), 0,
     "Set upper limit of matches of ipfw rules logged");
 SYSCTL_UINT(_net_inet_ip_fw, OID_AUTO, default_rule, CTLFLAG_RD,
     &dummy_def, 0,
     "The default/max possible rule number.");
 SYSCTL_PROC(_net_inet_ip_fw, OID_AUTO, tables_max,
     CTLFLAG_VNET | CTLTYPE_UINT | CTLFLAG_RW, 0, 0, sysctl_ipfw_table_num, "IU",
     "Maximum number of concurrently used tables");
 SYSCTL_PROC(_net_inet_ip_fw, OID_AUTO, tables_sets,
     CTLFLAG_VNET | CTLTYPE_UINT | CTLFLAG_RW,
     0, 0, sysctl_ipfw_tables_sets, "IU",
     "Use per-set namespace for tables");
 SYSCTL_INT(_net_inet_ip_fw, OID_AUTO, default_to_accept, CTLFLAG_RDTUN,
     &default_to_accept, 0,
     "Make the default rule accept all packets.");
 TUNABLE_INT("net.inet.ip.fw.tables_max", (int *)&default_fw_tables);
 SYSCTL_INT(_net_inet_ip_fw, OID_AUTO, static_count,
     CTLFLAG_VNET | CTLFLAG_RD, &VNET_NAME(layer3_chain.n_rules), 0,
     "Number of static rules");
 
 #ifdef INET6
 SYSCTL_DECL(_net_inet6_ip6);
 SYSCTL_NODE(_net_inet6_ip6, OID_AUTO, fw, CTLFLAG_RW, 0, "Firewall");
 SYSCTL_INT(_net_inet6_ip6_fw, OID_AUTO, deny_unknown_exthdrs,
     CTLFLAG_VNET | CTLFLAG_RW | CTLFLAG_SECURE,
     &VNET_NAME(fw_deny_unknown_exthdrs), 0,
     "Deny packets with unknown IPv6 Extension Headers");
 SYSCTL_INT(_net_inet6_ip6_fw, OID_AUTO, permit_single_frag6,
     CTLFLAG_VNET | CTLFLAG_RW | CTLFLAG_SECURE,
     &VNET_NAME(fw_permit_single_frag6), 0,
     "Permit single packet IPv6 fragments");
 #endif /* INET6 */
 
 SYSEND
 
 #endif /* SYSCTL_NODE */
 
 
 /*
  * Some macros used in the various matching options.
  * L3HDR maps an ipv4 pointer into a layer3 header pointer of type T
  * Other macros just cast void * into the appropriate type
  */
 #define	L3HDR(T, ip)	((T *)((u_int32_t *)(ip) + (ip)->ip_hl))
 #define	TCP(p)		((struct tcphdr *)(p))
 #define	SCTP(p)		((struct sctphdr *)(p))
 #define	UDP(p)		((struct udphdr *)(p))
 #define	ICMP(p)		((struct icmphdr *)(p))
 #define	ICMP6(p)	((struct icmp6_hdr *)(p))
 
 static __inline int
 icmptype_match(struct icmphdr *icmp, ipfw_insn_u32 *cmd)
 {
 	int type = icmp->icmp_type;
 
 	return (type <= ICMP_MAXTYPE && (cmd->d[0] & (1<<type)) );
 }
 
 #define TT	( (1 << ICMP_ECHO) | (1 << ICMP_ROUTERSOLICIT) | \
     (1 << ICMP_TSTAMP) | (1 << ICMP_IREQ) | (1 << ICMP_MASKREQ) )
 
 static int
 is_icmp_query(struct icmphdr *icmp)
 {
 	int type = icmp->icmp_type;
 
 	return (type <= ICMP_MAXTYPE && (TT & (1<<type)) );
 }
 #undef TT
 
 /*
  * The following checks use two arrays of 8 or 16 bits to store the
  * bits that we want set or clear, respectively. They are in the
  * low and high half of cmd->arg1 or cmd->d[0].
  *
  * We scan options and store the bits we find set. We succeed if
  *
  *	(want_set & ~bits) == 0 && (want_clear & ~bits) == want_clear
  *
  * The code is sometimes optimized not to store additional variables.
  */
 
 static int
 flags_match(ipfw_insn *cmd, u_int8_t bits)
 {
 	u_char want_clear;
 	bits = ~bits;
 
 	if ( ((cmd->arg1 & 0xff) & bits) != 0)
 		return 0; /* some bits we want set were clear */
 	want_clear = (cmd->arg1 >> 8) & 0xff;
 	if ( (want_clear & bits) != want_clear)
 		return 0; /* some bits we want clear were set */
 	return 1;
 }
 
 static int
 ipopts_match(struct ip *ip, ipfw_insn *cmd)
 {
 	int optlen, bits = 0;
 	u_char *cp = (u_char *)(ip + 1);
 	int x = (ip->ip_hl << 2) - sizeof (struct ip);
 
 	for (; x > 0; x -= optlen, cp += optlen) {
 		int opt = cp[IPOPT_OPTVAL];
 
 		if (opt == IPOPT_EOL)
 			break;
 		if (opt == IPOPT_NOP)
 			optlen = 1;
 		else {
 			optlen = cp[IPOPT_OLEN];
 			if (optlen <= 0 || optlen > x)
 				return 0; /* invalid or truncated */
 		}
 		switch (opt) {
 
 		default:
 			break;
 
 		case IPOPT_LSRR:
 			bits |= IP_FW_IPOPT_LSRR;
 			break;
 
 		case IPOPT_SSRR:
 			bits |= IP_FW_IPOPT_SSRR;
 			break;
 
 		case IPOPT_RR:
 			bits |= IP_FW_IPOPT_RR;
 			break;
 
 		case IPOPT_TS:
 			bits |= IP_FW_IPOPT_TS;
 			break;
 		}
 	}
 	return (flags_match(cmd, bits));
 }
 
 static int
 tcpopts_match(struct tcphdr *tcp, ipfw_insn *cmd)
 {
 	int optlen, bits = 0;
 	u_char *cp = (u_char *)(tcp + 1);
 	int x = (tcp->th_off << 2) - sizeof(struct tcphdr);
 
 	for (; x > 0; x -= optlen, cp += optlen) {
 		int opt = cp[0];
 		if (opt == TCPOPT_EOL)
 			break;
 		if (opt == TCPOPT_NOP)
 			optlen = 1;
 		else {
 			optlen = cp[1];
 			if (optlen <= 0)
 				break;
 		}
 
 		switch (opt) {
 
 		default:
 			break;
 
 		case TCPOPT_MAXSEG:
 			bits |= IP_FW_TCPOPT_MSS;
 			break;
 
 		case TCPOPT_WINDOW:
 			bits |= IP_FW_TCPOPT_WINDOW;
 			break;
 
 		case TCPOPT_SACK_PERMITTED:
 		case TCPOPT_SACK:
 			bits |= IP_FW_TCPOPT_SACK;
 			break;
 
 		case TCPOPT_TIMESTAMP:
 			bits |= IP_FW_TCPOPT_TS;
 			break;
 
 		}
 	}
 	return (flags_match(cmd, bits));
 }
 
 static int
 iface_match(struct ifnet *ifp, ipfw_insn_if *cmd, struct ip_fw_chain *chain,
     uint32_t *tablearg)
 {
 
 	if (ifp == NULL)	/* no iface with this packet, match fails */
 		return (0);
 
 	/* Check by name or by IP address */
 	if (cmd->name[0] != '\0') { /* match by name */
 		if (cmd->name[0] == '\1') /* use tablearg to match */
 			return ipfw_lookup_table(chain, cmd->p.kidx, 0,
 			    &ifp->if_index, tablearg);
 		/* Check name */
 		if (cmd->p.glob) {
 			if (fnmatch(cmd->name, ifp->if_xname, 0) == 0)
 				return(1);
 		} else {
 			if (strncmp(ifp->if_xname, cmd->name, IFNAMSIZ) == 0)
 				return(1);
 		}
 	} else {
 #if !defined(USERSPACE) && defined(__FreeBSD__)	/* and OSX too ? */
 		struct ifaddr *ia;
 
 		if_addr_rlock(ifp);
 		CK_STAILQ_FOREACH(ia, &ifp->if_addrhead, ifa_link) {
 			if (ia->ifa_addr->sa_family != AF_INET)
 				continue;
 			if (cmd->p.ip.s_addr == ((struct sockaddr_in *)
 			    (ia->ifa_addr))->sin_addr.s_addr) {
 				if_addr_runlock(ifp);
 				return(1);	/* match */
 			}
 		}
 		if_addr_runlock(ifp);
 #endif /* __FreeBSD__ */
 	}
 	return(0);	/* no match, fail ... */
 }
 
 /*
  * The verify_path function checks if a route to the src exists and
  * if it is reachable via ifp (when provided).
  * 
  * The 'verrevpath' option checks that the interface that an IP packet
  * arrives on is the same interface that traffic destined for the
  * packet's source address would be routed out of.
  * The 'versrcreach' option just checks that the source address is
  * reachable via any route (except default) in the routing table.
  * These two are a measure to block forged packets. This is also
  * commonly known as "anti-spoofing" or Unicast Reverse Path
  * Forwarding (Unicast RFP) in Cisco-ese. The name of the knobs
  * is purposely reminiscent of the Cisco IOS command,
  *
  *   ip verify unicast reverse-path
  *   ip verify unicast source reachable-via any
  *
  * which implements the same functionality. But note that the syntax
  * is misleading, and the check may be performed on all IP packets
  * whether unicast, multicast, or broadcast.
  */
 static int
 verify_path(struct in_addr src, struct ifnet *ifp, u_int fib)
 {
 #if defined(USERSPACE) || !defined(__FreeBSD__)
 	return 0;
 #else
 	struct nhop4_basic nh4;
 
 	if (fib4_lookup_nh_basic(fib, src, NHR_IFAIF, 0, &nh4) != 0)
 		return (0);
 
 	/*
 	 * If ifp is provided, check for equality with rtentry.
 	 * We should use rt->rt_ifa->ifa_ifp, instead of rt->rt_ifp,
 	 * in order to pass packets injected back by if_simloop():
 	 * routing entry (via lo0) for our own address
 	 * may exist, so we need to handle routing assymetry.
 	 */
 	if (ifp != NULL && ifp != nh4.nh_ifp)
 		return (0);
 
 	/* if no ifp provided, check if rtentry is not default route */
 	if (ifp == NULL && (nh4.nh_flags & NHF_DEFAULT) != 0)
 		return (0);
 
 	/* or if this is a blackhole/reject route */
 	if (ifp == NULL && (nh4.nh_flags & (NHF_REJECT|NHF_BLACKHOLE)) != 0)
 		return (0);
 
 	/* found valid route */
 	return 1;
 #endif /* __FreeBSD__ */
 }
 
 /*
  * Generate an SCTP packet containing an ABORT chunk. The verification tag
  * is given by vtag. The T-bit is set in the ABORT chunk if and only if
  * reflected is not 0.
  */
 
 static struct mbuf *
 ipfw_send_abort(struct mbuf *replyto, struct ipfw_flow_id *id, u_int32_t vtag,
     int reflected)
 {
 	struct mbuf *m;
 	struct ip *ip;
 #ifdef INET6
 	struct ip6_hdr *ip6;
 #endif
 	struct sctphdr *sctp;
 	struct sctp_chunkhdr *chunk;
 	u_int16_t hlen, plen, tlen;
 
 	MGETHDR(m, M_NOWAIT, MT_DATA);
 	if (m == NULL)
 		return (NULL);
 
 	M_SETFIB(m, id->fib);
 #ifdef MAC
 	if (replyto != NULL)
 		mac_netinet_firewall_reply(replyto, m);
 	else
 		mac_netinet_firewall_send(m);
 #else
 	(void)replyto;		/* don't warn about unused arg */
 #endif
 
 	switch (id->addr_type) {
 	case 4:
 		hlen = sizeof(struct ip);
 		break;
 #ifdef INET6
 	case 6:
 		hlen = sizeof(struct ip6_hdr);
 		break;
 #endif
 	default:
 		/* XXX: log me?!? */
 		FREE_PKT(m);
 		return (NULL);
 	}
 	plen = sizeof(struct sctphdr) + sizeof(struct sctp_chunkhdr);
 	tlen = hlen + plen;
 	m->m_data += max_linkhdr;
 	m->m_flags |= M_SKIP_FIREWALL;
 	m->m_pkthdr.len = m->m_len = tlen;
 	m->m_pkthdr.rcvif = NULL;
 	bzero(m->m_data, tlen);
 
 	switch (id->addr_type) {
 	case 4:
 		ip = mtod(m, struct ip *);
 
 		ip->ip_v = 4;
 		ip->ip_hl = sizeof(struct ip) >> 2;
 		ip->ip_tos = IPTOS_LOWDELAY;
 		ip->ip_len = htons(tlen);
 		ip->ip_id = htons(0);
 		ip->ip_off = htons(0);
 		ip->ip_ttl = V_ip_defttl;
 		ip->ip_p = IPPROTO_SCTP;
 		ip->ip_sum = 0;
 		ip->ip_src.s_addr = htonl(id->dst_ip);
 		ip->ip_dst.s_addr = htonl(id->src_ip);
 
 		sctp = (struct sctphdr *)(ip + 1);
 		break;
 #ifdef INET6
 	case 6:
 		ip6 = mtod(m, struct ip6_hdr *);
 
 		ip6->ip6_vfc = IPV6_VERSION;
 		ip6->ip6_plen = htons(plen);
 		ip6->ip6_nxt = IPPROTO_SCTP;
 		ip6->ip6_hlim = IPV6_DEFHLIM;
 		ip6->ip6_src = id->dst_ip6;
 		ip6->ip6_dst = id->src_ip6;
 
 		sctp = (struct sctphdr *)(ip6 + 1);
 		break;
 #endif
 	}
 
 	sctp->src_port = htons(id->dst_port);
 	sctp->dest_port = htons(id->src_port);
 	sctp->v_tag = htonl(vtag);
 	sctp->checksum = htonl(0);
 
 	chunk = (struct sctp_chunkhdr *)(sctp + 1);
 	chunk->chunk_type = SCTP_ABORT_ASSOCIATION;
 	chunk->chunk_flags = 0;
 	if (reflected != 0) {
 		chunk->chunk_flags |= SCTP_HAD_NO_TCB;
 	}
 	chunk->chunk_length = htons(sizeof(struct sctp_chunkhdr));
 
 	sctp->checksum = sctp_calculate_cksum(m, hlen);
 
 	return (m);
 }
 
 /*
  * Generate a TCP packet, containing either a RST or a keepalive.
  * When flags & TH_RST, we are sending a RST packet, because of a
  * "reset" action matched the packet.
  * Otherwise we are sending a keepalive, and flags & TH_
  * The 'replyto' mbuf is the mbuf being replied to, if any, and is required
  * so that MAC can label the reply appropriately.
  */
 struct mbuf *
 ipfw_send_pkt(struct mbuf *replyto, struct ipfw_flow_id *id, u_int32_t seq,
     u_int32_t ack, int flags)
 {
 	struct mbuf *m = NULL;		/* stupid compiler */
 	struct ip *h = NULL;		/* stupid compiler */
 #ifdef INET6
 	struct ip6_hdr *h6 = NULL;
 #endif
 	struct tcphdr *th = NULL;
 	int len, dir;
 
 	MGETHDR(m, M_NOWAIT, MT_DATA);
 	if (m == NULL)
 		return (NULL);
 
 	M_SETFIB(m, id->fib);
 #ifdef MAC
 	if (replyto != NULL)
 		mac_netinet_firewall_reply(replyto, m);
 	else
 		mac_netinet_firewall_send(m);
 #else
 	(void)replyto;		/* don't warn about unused arg */
 #endif
 
 	switch (id->addr_type) {
 	case 4:
 		len = sizeof(struct ip) + sizeof(struct tcphdr);
 		break;
 #ifdef INET6
 	case 6:
 		len = sizeof(struct ip6_hdr) + sizeof(struct tcphdr);
 		break;
 #endif
 	default:
 		/* XXX: log me?!? */
 		FREE_PKT(m);
 		return (NULL);
 	}
 	dir = ((flags & (TH_SYN | TH_RST)) == TH_SYN);
 
 	m->m_data += max_linkhdr;
 	m->m_flags |= M_SKIP_FIREWALL;
 	m->m_pkthdr.len = m->m_len = len;
 	m->m_pkthdr.rcvif = NULL;
 	bzero(m->m_data, len);
 
 	switch (id->addr_type) {
 	case 4:
 		h = mtod(m, struct ip *);
 
 		/* prepare for checksum */
 		h->ip_p = IPPROTO_TCP;
 		h->ip_len = htons(sizeof(struct tcphdr));
 		if (dir) {
 			h->ip_src.s_addr = htonl(id->src_ip);
 			h->ip_dst.s_addr = htonl(id->dst_ip);
 		} else {
 			h->ip_src.s_addr = htonl(id->dst_ip);
 			h->ip_dst.s_addr = htonl(id->src_ip);
 		}
 
 		th = (struct tcphdr *)(h + 1);
 		break;
 #ifdef INET6
 	case 6:
 		h6 = mtod(m, struct ip6_hdr *);
 
 		/* prepare for checksum */
 		h6->ip6_nxt = IPPROTO_TCP;
 		h6->ip6_plen = htons(sizeof(struct tcphdr));
 		if (dir) {
 			h6->ip6_src = id->src_ip6;
 			h6->ip6_dst = id->dst_ip6;
 		} else {
 			h6->ip6_src = id->dst_ip6;
 			h6->ip6_dst = id->src_ip6;
 		}
 
 		th = (struct tcphdr *)(h6 + 1);
 		break;
 #endif
 	}
 
 	if (dir) {
 		th->th_sport = htons(id->src_port);
 		th->th_dport = htons(id->dst_port);
 	} else {
 		th->th_sport = htons(id->dst_port);
 		th->th_dport = htons(id->src_port);
 	}
 	th->th_off = sizeof(struct tcphdr) >> 2;
 
 	if (flags & TH_RST) {
 		if (flags & TH_ACK) {
 			th->th_seq = htonl(ack);
 			th->th_flags = TH_RST;
 		} else {
 			if (flags & TH_SYN)
 				seq++;
 			th->th_ack = htonl(seq);
 			th->th_flags = TH_RST | TH_ACK;
 		}
 	} else {
 		/*
 		 * Keepalive - use caller provided sequence numbers
 		 */
 		th->th_seq = htonl(seq);
 		th->th_ack = htonl(ack);
 		th->th_flags = TH_ACK;
 	}
 
 	switch (id->addr_type) {
 	case 4:
 		th->th_sum = in_cksum(m, len);
 
 		/* finish the ip header */
 		h->ip_v = 4;
 		h->ip_hl = sizeof(*h) >> 2;
 		h->ip_tos = IPTOS_LOWDELAY;
 		h->ip_off = htons(0);
 		h->ip_len = htons(len);
 		h->ip_ttl = V_ip_defttl;
 		h->ip_sum = 0;
 		break;
 #ifdef INET6
 	case 6:
 		th->th_sum = in6_cksum(m, IPPROTO_TCP, sizeof(*h6),
 		    sizeof(struct tcphdr));
 
 		/* finish the ip6 header */
 		h6->ip6_vfc |= IPV6_VERSION;
 		h6->ip6_hlim = IPV6_DEFHLIM;
 		break;
 #endif
 	}
 
 	return (m);
 }
 
 #ifdef INET6
 /*
  * ipv6 specific rules here...
  */
 static __inline int
 icmp6type_match (int type, ipfw_insn_u32 *cmd)
 {
 	return (type <= ICMP6_MAXTYPE && (cmd->d[type/32] & (1<<(type%32)) ) );
 }
 
 static int
 flow6id_match( int curr_flow, ipfw_insn_u32 *cmd )
 {
 	int i;
 	for (i=0; i <= cmd->o.arg1; ++i )
 		if (curr_flow == cmd->d[i] )
 			return 1;
 	return 0;
 }
 
 /* support for IP6_*_ME opcodes */
 static const struct in6_addr lla_mask = {{{
 	0xff, 0xff, 0x00, 0x00, 0xff, 0xff, 0xff, 0xff,
 	0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff
 }}};
 
 static int
 ipfw_localip6(struct in6_addr *in6)
 {
 	struct rm_priotracker in6_ifa_tracker;
 	struct in6_ifaddr *ia;
 
 	if (IN6_IS_ADDR_MULTICAST(in6))
 		return (0);
 
 	if (!IN6_IS_ADDR_LINKLOCAL(in6))
 		return (in6_localip(in6));
 
 	IN6_IFADDR_RLOCK(&in6_ifa_tracker);
 	CK_STAILQ_FOREACH(ia, &V_in6_ifaddrhead, ia_link) {
 		if (!IN6_IS_ADDR_LINKLOCAL(&ia->ia_addr.sin6_addr))
 			continue;
 		if (IN6_ARE_MASKED_ADDR_EQUAL(&ia->ia_addr.sin6_addr,
 		    in6, &lla_mask)) {
 			IN6_IFADDR_RUNLOCK(&in6_ifa_tracker);
 			return (1);
 		}
 	}
 	IN6_IFADDR_RUNLOCK(&in6_ifa_tracker);
 	return (0);
 }
 
 static int
 verify_path6(struct in6_addr *src, struct ifnet *ifp, u_int fib)
 {
 	struct nhop6_basic nh6;
 
 	if (IN6_IS_SCOPE_LINKLOCAL(src))
 		return (1);
 
 	if (fib6_lookup_nh_basic(fib, src, 0, NHR_IFAIF, 0, &nh6) != 0)
 		return (0);
 
 	/* If ifp is provided, check for equality with route table. */
 	if (ifp != NULL && ifp != nh6.nh_ifp)
 		return (0);
 
 	/* if no ifp provided, check if rtentry is not default route */
 	if (ifp == NULL && (nh6.nh_flags & NHF_DEFAULT) != 0)
 		return (0);
 
 	/* or if this is a blackhole/reject route */
 	if (ifp == NULL && (nh6.nh_flags & (NHF_REJECT|NHF_BLACKHOLE)) != 0)
 		return (0);
 
 	/* found valid route */
 	return 1;
 }
 
 static int
 is_icmp6_query(int icmp6_type)
 {
 	if ((icmp6_type <= ICMP6_MAXTYPE) &&
 	    (icmp6_type == ICMP6_ECHO_REQUEST ||
 	    icmp6_type == ICMP6_MEMBERSHIP_QUERY ||
 	    icmp6_type == ICMP6_WRUREQUEST ||
 	    icmp6_type == ICMP6_FQDN_QUERY ||
 	    icmp6_type == ICMP6_NI_QUERY))
 		return (1);
 
 	return (0);
 }
 
 static int
 map_icmp_unreach(int code)
 {
 
 	/* RFC 7915 p4.2 */
 	switch (code) {
 	case ICMP_UNREACH_NET:
 	case ICMP_UNREACH_HOST:
 	case ICMP_UNREACH_SRCFAIL:
 	case ICMP_UNREACH_NET_UNKNOWN:
 	case ICMP_UNREACH_HOST_UNKNOWN:
 	case ICMP_UNREACH_TOSNET:
 	case ICMP_UNREACH_TOSHOST:
 		return (ICMP6_DST_UNREACH_NOROUTE);
 	case ICMP_UNREACH_PORT:
 		return (ICMP6_DST_UNREACH_NOPORT);
 	default:
 		/*
 		 * Map the rest of codes into admit prohibited.
 		 * XXX: unreach proto should be mapped into ICMPv6
 		 * parameter problem, but we use only unreach type.
 		 */
 		return (ICMP6_DST_UNREACH_ADMIN);
 	}
 }
 
 static void
 send_reject6(struct ip_fw_args *args, int code, u_int hlen, struct ip6_hdr *ip6)
 {
 	struct mbuf *m;
 
 	m = args->m;
 	if (code == ICMP6_UNREACH_RST && args->f_id.proto == IPPROTO_TCP) {
 		struct tcphdr *tcp;
 		tcp = (struct tcphdr *)((char *)ip6 + hlen);
 
 		if ((tcp->th_flags & TH_RST) == 0) {
 			struct mbuf *m0;
 			m0 = ipfw_send_pkt(args->m, &(args->f_id),
 			    ntohl(tcp->th_seq), ntohl(tcp->th_ack),
 			    tcp->th_flags | TH_RST);
 			if (m0 != NULL)
 				ip6_output(m0, NULL, NULL, 0, NULL, NULL,
 				    NULL);
 		}
 		FREE_PKT(m);
 	} else if (code == ICMP6_UNREACH_ABORT &&
 	    args->f_id.proto == IPPROTO_SCTP) {
 		struct mbuf *m0;
 		struct sctphdr *sctp;
 		u_int32_t v_tag;
 		int reflected;
 
 		sctp = (struct sctphdr *)((char *)ip6 + hlen);
 		reflected = 1;
 		v_tag = ntohl(sctp->v_tag);
 		/* Investigate the first chunk header if available */
 		if (m->m_len >= hlen + sizeof(struct sctphdr) +
 		    sizeof(struct sctp_chunkhdr)) {
 			struct sctp_chunkhdr *chunk;
 
 			chunk = (struct sctp_chunkhdr *)(sctp + 1);
 			switch (chunk->chunk_type) {
 			case SCTP_INITIATION:
 				/*
 				 * Packets containing an INIT chunk MUST have
 				 * a zero v-tag.
 				 */
 				if (v_tag != 0) {
 					v_tag = 0;
 					break;
 				}
 				/* INIT chunk MUST NOT be bundled */
 				if (m->m_pkthdr.len >
 				    hlen + sizeof(struct sctphdr) +
 				    ntohs(chunk->chunk_length) + 3) {
 					break;
 				}
 				/* Use the initiate tag if available */
 				if ((m->m_len >= hlen + sizeof(struct sctphdr) +
 				    sizeof(struct sctp_chunkhdr) +
 				    offsetof(struct sctp_init, a_rwnd))) {
 					struct sctp_init *init;
 
 					init = (struct sctp_init *)(chunk + 1);
 					v_tag = ntohl(init->initiate_tag);
 					reflected = 0;
 				}
 				break;
 			case SCTP_ABORT_ASSOCIATION:
 				/*
 				 * If the packet contains an ABORT chunk, don't
 				 * reply.
 				 * XXX: We should search through all chunks,
 				 *      but don't do to avoid attacks.
 				 */
 				v_tag = 0;
 				break;
 			}
 		}
 		if (v_tag == 0) {
 			m0 = NULL;
 		} else {
 			m0 = ipfw_send_abort(args->m, &(args->f_id), v_tag,
 			    reflected);
 		}
 		if (m0 != NULL)
 			ip6_output(m0, NULL, NULL, 0, NULL, NULL, NULL);
 		FREE_PKT(m);
 	} else if (code != ICMP6_UNREACH_RST && code != ICMP6_UNREACH_ABORT) {
 		/* Send an ICMPv6 unreach. */
 #if 0
 		/*
 		 * Unlike above, the mbufs need to line up with the ip6 hdr,
 		 * as the contents are read. We need to m_adj() the
 		 * needed amount.
 		 * The mbuf will however be thrown away so we can adjust it.
 		 * Remember we did an m_pullup on it already so we
 		 * can make some assumptions about contiguousness.
 		 */
 		if (args->L3offset)
 			m_adj(m, args->L3offset);
 #endif
 		icmp6_error(m, ICMP6_DST_UNREACH, code, 0);
 	} else
 		FREE_PKT(m);
 
 	args->m = NULL;
 }
 
 #endif /* INET6 */
 
 
 /*
  * sends a reject message, consuming the mbuf passed as an argument.
  */
 static void
 send_reject(struct ip_fw_args *args, int code, int iplen, struct ip *ip)
 {
 
 #if 0
 	/* XXX When ip is not guaranteed to be at mtod() we will
 	 * need to account for this */
 	 * The mbuf will however be thrown away so we can adjust it.
 	 * Remember we did an m_pullup on it already so we
 	 * can make some assumptions about contiguousness.
 	 */
 	if (args->L3offset)
 		m_adj(m, args->L3offset);
 #endif
 	if (code != ICMP_REJECT_RST && code != ICMP_REJECT_ABORT) {
 		/* Send an ICMP unreach */
 		icmp_error(args->m, ICMP_UNREACH, code, 0L, 0);
 	} else if (code == ICMP_REJECT_RST && args->f_id.proto == IPPROTO_TCP) {
 		struct tcphdr *const tcp =
 		    L3HDR(struct tcphdr, mtod(args->m, struct ip *));
 		if ( (tcp->th_flags & TH_RST) == 0) {
 			struct mbuf *m;
 			m = ipfw_send_pkt(args->m, &(args->f_id),
 				ntohl(tcp->th_seq), ntohl(tcp->th_ack),
 				tcp->th_flags | TH_RST);
 			if (m != NULL)
 				ip_output(m, NULL, NULL, 0, NULL, NULL);
 		}
 		FREE_PKT(args->m);
 	} else if (code == ICMP_REJECT_ABORT &&
 	    args->f_id.proto == IPPROTO_SCTP) {
 		struct mbuf *m;
 		struct sctphdr *sctp;
 		struct sctp_chunkhdr *chunk;
 		struct sctp_init *init;
 		u_int32_t v_tag;
 		int reflected;
 
 		sctp = L3HDR(struct sctphdr, mtod(args->m, struct ip *));
 		reflected = 1;
 		v_tag = ntohl(sctp->v_tag);
 		if (iplen >= (ip->ip_hl << 2) + sizeof(struct sctphdr) +
 		    sizeof(struct sctp_chunkhdr)) {
 			/* Look at the first chunk header if available */
 			chunk = (struct sctp_chunkhdr *)(sctp + 1);
 			switch (chunk->chunk_type) {
 			case SCTP_INITIATION:
 				/*
 				 * Packets containing an INIT chunk MUST have
 				 * a zero v-tag.
 				 */
 				if (v_tag != 0) {
 					v_tag = 0;
 					break;
 				}
 				/* INIT chunk MUST NOT be bundled */
 				if (iplen >
 				    (ip->ip_hl << 2) + sizeof(struct sctphdr) +
 				    ntohs(chunk->chunk_length) + 3) {
 					break;
 				}
 				/* Use the initiate tag if available */
 				if ((iplen >= (ip->ip_hl << 2) +
 				    sizeof(struct sctphdr) +
 				    sizeof(struct sctp_chunkhdr) +
 				    offsetof(struct sctp_init, a_rwnd))) {
 					init = (struct sctp_init *)(chunk + 1);
 					v_tag = ntohl(init->initiate_tag);
 					reflected = 0;
 				}
 				break;
 			case SCTP_ABORT_ASSOCIATION:
 				/*
 				 * If the packet contains an ABORT chunk, don't
 				 * reply.
 				 * XXX: We should search through all chunks,
 				 * but don't do to avoid attacks.
 				 */
 				v_tag = 0;
 				break;
 			}
 		}
 		if (v_tag == 0) {
 			m = NULL;
 		} else {
 			m = ipfw_send_abort(args->m, &(args->f_id), v_tag,
 			    reflected);
 		}
 		if (m != NULL)
 			ip_output(m, NULL, NULL, 0, NULL, NULL);
 		FREE_PKT(args->m);
 	} else
 		FREE_PKT(args->m);
 	args->m = NULL;
 }
 
 /*
  * Support for uid/gid/jail lookup. These tests are expensive
  * (because we may need to look into the list of active sockets)
  * so we cache the results. ugid_lookupp is 0 if we have not
  * yet done a lookup, 1 if we succeeded, and -1 if we tried
  * and failed. The function always returns the match value.
  * We could actually spare the variable and use *uc, setting
  * it to '(void *)check_uidgid if we have no info, NULL if
  * we tried and failed, or any other value if successful.
  */
 static int
 check_uidgid(ipfw_insn_u32 *insn, struct ip_fw_args *args, int *ugid_lookupp,
     struct ucred **uc)
 {
 #if defined(USERSPACE)
 	return 0;	// not supported in userspace
 #else
 #ifndef __FreeBSD__
 	/* XXX */
 	return cred_check(insn, proto, oif,
 	    dst_ip, dst_port, src_ip, src_port,
 	    (struct bsd_ucred *)uc, ugid_lookupp, ((struct mbuf *)inp)->m_skb);
 #else  /* FreeBSD */
 	struct in_addr src_ip, dst_ip;
 	struct inpcbinfo *pi;
 	struct ipfw_flow_id *id;
 	struct inpcb *pcb, *inp;
 	struct ifnet *oif;
 	int lookupflags;
 	int match;
 
 	id = &args->f_id;
 	inp = args->inp;
 	oif = args->oif;
 
 	/*
 	 * Check to see if the UDP or TCP stack supplied us with
 	 * the PCB. If so, rather then holding a lock and looking
 	 * up the PCB, we can use the one that was supplied.
 	 */
 	if (inp && *ugid_lookupp == 0) {
 		INP_LOCK_ASSERT(inp);
 		if (inp->inp_socket != NULL) {
 			*uc = crhold(inp->inp_cred);
 			*ugid_lookupp = 1;
 		} else
 			*ugid_lookupp = -1;
 	}
 	/*
 	 * If we have already been here and the packet has no
 	 * PCB entry associated with it, then we can safely
 	 * assume that this is a no match.
 	 */
 	if (*ugid_lookupp == -1)
 		return (0);
 	if (id->proto == IPPROTO_TCP) {
 		lookupflags = 0;
 		pi = &V_tcbinfo;
 	} else if (id->proto == IPPROTO_UDP) {
 		lookupflags = INPLOOKUP_WILDCARD;
 		pi = &V_udbinfo;
 	} else if (id->proto == IPPROTO_UDPLITE) {
 		lookupflags = INPLOOKUP_WILDCARD;
 		pi = &V_ulitecbinfo;
 	} else
 		return 0;
 	lookupflags |= INPLOOKUP_RLOCKPCB;
 	match = 0;
 	if (*ugid_lookupp == 0) {
 		if (id->addr_type == 6) {
 #ifdef INET6
 			if (oif == NULL)
 				pcb = in6_pcblookup_mbuf(pi,
 				    &id->src_ip6, htons(id->src_port),
 				    &id->dst_ip6, htons(id->dst_port),
 				    lookupflags, oif, args->m);
 			else
 				pcb = in6_pcblookup_mbuf(pi,
 				    &id->dst_ip6, htons(id->dst_port),
 				    &id->src_ip6, htons(id->src_port),
 				    lookupflags, oif, args->m);
 #else
 			*ugid_lookupp = -1;
 			return (0);
 #endif
 		} else {
 			src_ip.s_addr = htonl(id->src_ip);
 			dst_ip.s_addr = htonl(id->dst_ip);
 			if (oif == NULL)
 				pcb = in_pcblookup_mbuf(pi,
 				    src_ip, htons(id->src_port),
 				    dst_ip, htons(id->dst_port),
 				    lookupflags, oif, args->m);
 			else
 				pcb = in_pcblookup_mbuf(pi,
 				    dst_ip, htons(id->dst_port),
 				    src_ip, htons(id->src_port),
 				    lookupflags, oif, args->m);
 		}
 		if (pcb != NULL) {
 			INP_RLOCK_ASSERT(pcb);
 			*uc = crhold(pcb->inp_cred);
 			*ugid_lookupp = 1;
 			INP_RUNLOCK(pcb);
 		}
 		if (*ugid_lookupp == 0) {
 			/*
 			 * We tried and failed, set the variable to -1
 			 * so we will not try again on this packet.
 			 */
 			*ugid_lookupp = -1;
 			return (0);
 		}
 	}
 	if (insn->o.opcode == O_UID)
 		match = ((*uc)->cr_uid == (uid_t)insn->d[0]);
 	else if (insn->o.opcode == O_GID)
 		match = groupmember((gid_t)insn->d[0], *uc);
 	else if (insn->o.opcode == O_JAIL)
 		match = ((*uc)->cr_prison->pr_id == (int)insn->d[0]);
 	return (match);
 #endif /* __FreeBSD__ */
 #endif /* not supported in userspace */
 }
 
 /*
  * Helper function to set args with info on the rule after the matching
  * one. slot is precise, whereas we guess rule_id as they are
  * assigned sequentially.
  */
 static inline void
 set_match(struct ip_fw_args *args, int slot,
 	struct ip_fw_chain *chain)
 {
 	args->rule.chain_id = chain->id;
 	args->rule.slot = slot + 1; /* we use 0 as a marker */
 	args->rule.rule_id = 1 + chain->map[slot]->id;
 	args->rule.rulenum = chain->map[slot]->rulenum;
 	args->flags |= IPFW_ARGS_REF;
 }
 
 #ifndef LINEAR_SKIPTO
 /*
  * Helper function to enable cached rule lookups using
  * cached_id and cached_pos fields in ipfw rule.
  */
 static int
 jump_fast(struct ip_fw_chain *chain, struct ip_fw *f, int num,
     int tablearg, int jump_backwards)
 {
 	int f_pos;
 
 	/* If possible use cached f_pos (in f->cached_pos),
 	 * whose version is written in f->cached_id
 	 * (horrible hacks to avoid changing the ABI).
 	 */
 	if (num != IP_FW_TARG && f->cached_id == chain->id)
 		f_pos = f->cached_pos;
 	else {
 		int i = IP_FW_ARG_TABLEARG(chain, num, skipto);
 		/* make sure we do not jump backward */
 		if (jump_backwards == 0 && i <= f->rulenum)
 			i = f->rulenum + 1;
 		if (chain->idxmap != NULL)
 			f_pos = chain->idxmap[i];
 		else
 			f_pos = ipfw_find_rule(chain, i, 0);
 		/* update the cache */
 		if (num != IP_FW_TARG) {
 			f->cached_id = chain->id;
 			f->cached_pos = f_pos;
 		}
 	}
 
 	return (f_pos);
 }
 #else
 /*
  * Helper function to enable real fast rule lookups.
  */
 static int
 jump_linear(struct ip_fw_chain *chain, struct ip_fw *f, int num,
     int tablearg, int jump_backwards)
 {
 	int f_pos;
 
 	num = IP_FW_ARG_TABLEARG(chain, num, skipto);
 	/* make sure we do not jump backward */
 	if (jump_backwards == 0 && num <= f->rulenum)
 		num = f->rulenum + 1;
 	f_pos = chain->idxmap[num];
 
 	return (f_pos);
 }
 #endif
 
 #define	TARG(k, f)	IP_FW_ARG_TABLEARG(chain, k, f)
 /*
  * The main check routine for the firewall.
  *
  * All arguments are in args so we can modify them and return them
  * back to the caller.
  *
  * Parameters:
  *
  *	args->m	(in/out) The packet; we set to NULL when/if we nuke it.
  *		Starts with the IP header.
  *	args->eh (in)	Mac header if present, NULL for layer3 packet.
  *	args->L3offset	Number of bytes bypassed if we came from L2.
  *			e.g. often sizeof(eh)  ** NOTYET **
  *	args->oif	Outgoing interface, NULL if packet is incoming.
  *		The incoming interface is in the mbuf. (in)
  *	args->divert_rule (in/out)
  *		Skip up to the first rule past this rule number;
  *		upon return, non-zero port number for divert or tee.
  *
  *	args->rule	Pointer to the last matching rule (in/out)
  *	args->next_hop	Socket we are forwarding to (out).
  *	args->next_hop6	IPv6 next hop we are forwarding to (out).
  *	args->f_id	Addresses grabbed from the packet (out)
  * 	args->rule.info	a cookie depending on rule action
  *
  * Return value:
  *
  *	IP_FW_PASS	the packet must be accepted
  *	IP_FW_DENY	the packet must be dropped
  *	IP_FW_DIVERT	divert packet, port in m_tag
  *	IP_FW_TEE	tee packet, port in m_tag
  *	IP_FW_DUMMYNET	to dummynet, pipe in args->cookie
  *	IP_FW_NETGRAPH	into netgraph, cookie args->cookie
  *		args->rule contains the matching rule,
  *		args->rule.info has additional information.
  *
  */
 int
 ipfw_chk(struct ip_fw_args *args)
 {
 
 	/*
 	 * Local variables holding state while processing a packet:
 	 *
 	 * IMPORTANT NOTE: to speed up the processing of rules, there
 	 * are some assumption on the values of the variables, which
 	 * are documented here. Should you change them, please check
 	 * the implementation of the various instructions to make sure
 	 * that they still work.
 	 *
 	 * args->eh	The MAC header. It is non-null for a layer2
 	 *	packet, it is NULL for a layer-3 packet.
 	 * **notyet**
 	 * args->L3offset Offset in the packet to the L3 (IP or equiv.) header.
 	 *
 	 * m | args->m	Pointer to the mbuf, as received from the caller.
 	 *	It may change if ipfw_chk() does an m_pullup, or if it
 	 *	consumes the packet because it calls send_reject().
 	 *	XXX This has to change, so that ipfw_chk() never modifies
 	 *	or consumes the buffer.
 	 * ip	is the beginning of the ip(4 or 6) header.
 	 *	Calculated by adding the L3offset to the start of data.
 	 *	(Until we start using L3offset, the packet is
 	 *	supposed to start with the ip header).
 	 */
 	struct mbuf *m = args->m;
 	struct ip *ip = mtod(m, struct ip *);
 
 	/*
 	 * For rules which contain uid/gid or jail constraints, cache
 	 * a copy of the users credentials after the pcb lookup has been
 	 * executed. This will speed up the processing of rules with
 	 * these types of constraints, as well as decrease contention
 	 * on pcb related locks.
 	 */
 #ifndef __FreeBSD__
 	struct bsd_ucred ucred_cache;
 #else
 	struct ucred *ucred_cache = NULL;
 #endif
 	int ucred_lookup = 0;
 
 	/*
 	 * oif | args->oif	If NULL, ipfw_chk has been called on the
 	 *	inbound path (ether_input, ip_input).
 	 *	If non-NULL, ipfw_chk has been called on the outbound path
 	 *	(ether_output, ip_output).
 	 */
 	struct ifnet *oif = args->oif;
 
 	int f_pos = 0;		/* index of current rule in the array */
 	int retval = 0;
 
 	/*
 	 * hlen	The length of the IP header.
 	 */
 	u_int hlen = 0;		/* hlen >0 means we have an IP pkt */
 
 	/*
 	 * offset	The offset of a fragment. offset != 0 means that
 	 *	we have a fragment at this offset of an IPv4 packet.
 	 *	offset == 0 means that (if this is an IPv4 packet)
 	 *	this is the first or only fragment.
 	 *	For IPv6 offset|ip6f_mf == 0 means there is no Fragment Header
 	 *	or there is a single packet fragment (fragment header added
 	 *	without needed).  We will treat a single packet fragment as if
 	 *	there was no fragment header (or log/block depending on the
 	 *	V_fw_permit_single_frag6 sysctl setting).
 	 */
 	u_short offset = 0;
 	u_short ip6f_mf = 0;
 
 	/*
 	 * Local copies of addresses. They are only valid if we have
 	 * an IP packet.
 	 *
 	 * proto	The protocol. Set to 0 for non-ip packets,
 	 *	or to the protocol read from the packet otherwise.
 	 *	proto != 0 means that we have an IPv4 packet.
 	 *
 	 * src_port, dst_port	port numbers, in HOST format. Only
 	 *	valid for TCP and UDP packets.
 	 *
 	 * src_ip, dst_ip	ip addresses, in NETWORK format.
 	 *	Only valid for IPv4 packets.
 	 */
 	uint8_t proto;
 	uint16_t src_port, dst_port;		/* NOTE: host format	*/
 	struct in_addr src_ip, dst_ip;		/* NOTE: network format	*/
 	int iplen = 0;
 	int pktlen;
 	uint16_t etype;			/* Host order stored ether type */
 
 	struct ipfw_dyn_info dyn_info;
 	struct ip_fw *q = NULL;
 	struct ip_fw_chain *chain = &V_layer3_chain;
 
 	/*
 	 * We store in ulp a pointer to the upper layer protocol header.
 	 * In the ipv4 case this is easy to determine from the header,
 	 * but for ipv6 we might have some additional headers in the middle.
 	 * ulp is NULL if not found.
 	 */
 	void *ulp = NULL;		/* upper layer protocol pointer. */
 
 	/* XXX ipv6 variables */
 	int is_ipv6 = 0;
 	uint8_t	icmp6_type = 0;
 	uint16_t ext_hd = 0;	/* bits vector for extension header filtering */
 	/* end of ipv6 variables */
 
 	int is_ipv4 = 0;
 
 	int done = 0;		/* flag to exit the outer loop */
+	IPFW_RLOCK_TRACKER;
 
 	if (m->m_flags & M_SKIP_FIREWALL || (! V_ipfw_vnet_ready))
 		return (IP_FW_PASS);	/* accept */
 
 	dst_ip.s_addr = 0;		/* make sure it is initialized */
 	src_ip.s_addr = 0;		/* make sure it is initialized */
 	src_port = dst_port = 0;
 	pktlen = m->m_pkthdr.len;
 
 	DYN_INFO_INIT(&dyn_info);
 /*
  * PULLUP_TO(len, p, T) makes sure that len + sizeof(T) is contiguous,
  * then it sets p to point at the offset "len" in the mbuf. WARNING: the
  * pointer might become stale after other pullups (but we never use it
  * this way).
  */
 #define PULLUP_TO(_len, p, T)	PULLUP_LEN(_len, p, sizeof(T))
 #define PULLUP_LEN(_len, p, T)					\
 do {								\
 	int x = (_len) + T;					\
 	if ((m)->m_len < x) {					\
 		args->m = m = m_pullup(m, x);			\
 		if (m == NULL)					\
 			goto pullup_failed;			\
 	}							\
 	p = (mtod(m, char *) + (_len));				\
 } while (0)
 
 	/*
 	 * if we have an ether header,
 	 */
 	if (args->flags & IPFW_ARGS_ETHER)
 		etype = ntohs(args->eh->ether_type);
 	else
 		etype = 0;
 
 	/* Identify IP packets and fill up variables. */
 	if (pktlen >= sizeof(struct ip6_hdr) &&
 	    (etype == 0 || etype == ETHERTYPE_IPV6) && ip->ip_v == 6) {
 		struct ip6_hdr *ip6 = (struct ip6_hdr *)ip;
 
 		is_ipv6 = 1;
 		hlen = sizeof(struct ip6_hdr);
 		proto = ip6->ip6_nxt;
 		/* Search extension headers to find upper layer protocols */
 		while (ulp == NULL && offset == 0) {
 			switch (proto) {
 			case IPPROTO_ICMPV6:
 				PULLUP_TO(hlen, ulp, struct icmp6_hdr);
 				icmp6_type = ICMP6(ulp)->icmp6_type;
 				break;
 
 			case IPPROTO_TCP:
 				PULLUP_TO(hlen, ulp, struct tcphdr);
 				dst_port = TCP(ulp)->th_dport;
 				src_port = TCP(ulp)->th_sport;
 				/* save flags for dynamic rules */
 				args->f_id._flags = TCP(ulp)->th_flags;
 				break;
 
 			case IPPROTO_SCTP:
 				if (pktlen >= hlen + sizeof(struct sctphdr) +
 				    sizeof(struct sctp_chunkhdr) +
 				    offsetof(struct sctp_init, a_rwnd))
 					PULLUP_LEN(hlen, ulp,
 					    sizeof(struct sctphdr) +
 					    sizeof(struct sctp_chunkhdr) +
 					    offsetof(struct sctp_init, a_rwnd));
 				else if (pktlen >= hlen + sizeof(struct sctphdr))
 					PULLUP_LEN(hlen, ulp, pktlen - hlen);
 				else
 					PULLUP_LEN(hlen, ulp,
 					    sizeof(struct sctphdr));
 				src_port = SCTP(ulp)->src_port;
 				dst_port = SCTP(ulp)->dest_port;
 				break;
 
 			case IPPROTO_UDP:
 			case IPPROTO_UDPLITE:
 				PULLUP_TO(hlen, ulp, struct udphdr);
 				dst_port = UDP(ulp)->uh_dport;
 				src_port = UDP(ulp)->uh_sport;
 				break;
 
 			case IPPROTO_HOPOPTS:	/* RFC 2460 */
 				PULLUP_TO(hlen, ulp, struct ip6_hbh);
 				ext_hd |= EXT_HOPOPTS;
 				hlen += (((struct ip6_hbh *)ulp)->ip6h_len + 1) << 3;
 				proto = ((struct ip6_hbh *)ulp)->ip6h_nxt;
 				ulp = NULL;
 				break;
 
 			case IPPROTO_ROUTING:	/* RFC 2460 */
 				PULLUP_TO(hlen, ulp, struct ip6_rthdr);
 				switch (((struct ip6_rthdr *)ulp)->ip6r_type) {
 				case 0:
 					ext_hd |= EXT_RTHDR0;
 					break;
 				case 2:
 					ext_hd |= EXT_RTHDR2;
 					break;
 				default:
 					if (V_fw_verbose)
 						printf("IPFW2: IPV6 - Unknown "
 						    "Routing Header type(%d)\n",
 						    ((struct ip6_rthdr *)
 						    ulp)->ip6r_type);
 					if (V_fw_deny_unknown_exthdrs)
 					    return (IP_FW_DENY);
 					break;
 				}
 				ext_hd |= EXT_ROUTING;
 				hlen += (((struct ip6_rthdr *)ulp)->ip6r_len + 1) << 3;
 				proto = ((struct ip6_rthdr *)ulp)->ip6r_nxt;
 				ulp = NULL;
 				break;
 
 			case IPPROTO_FRAGMENT:	/* RFC 2460 */
 				PULLUP_TO(hlen, ulp, struct ip6_frag);
 				ext_hd |= EXT_FRAGMENT;
 				hlen += sizeof (struct ip6_frag);
 				proto = ((struct ip6_frag *)ulp)->ip6f_nxt;
 				offset = ((struct ip6_frag *)ulp)->ip6f_offlg &
 					IP6F_OFF_MASK;
 				ip6f_mf = ((struct ip6_frag *)ulp)->ip6f_offlg &
 					IP6F_MORE_FRAG;
 				if (V_fw_permit_single_frag6 == 0 &&
 				    offset == 0 && ip6f_mf == 0) {
 					if (V_fw_verbose)
 						printf("IPFW2: IPV6 - Invalid "
 						    "Fragment Header\n");
 					if (V_fw_deny_unknown_exthdrs)
 					    return (IP_FW_DENY);
 					break;
 				}
 				args->f_id.extra =
 				    ntohl(((struct ip6_frag *)ulp)->ip6f_ident);
 				ulp = NULL;
 				break;
 
 			case IPPROTO_DSTOPTS:	/* RFC 2460 */
 				PULLUP_TO(hlen, ulp, struct ip6_hbh);
 				ext_hd |= EXT_DSTOPTS;
 				hlen += (((struct ip6_hbh *)ulp)->ip6h_len + 1) << 3;
 				proto = ((struct ip6_hbh *)ulp)->ip6h_nxt;
 				ulp = NULL;
 				break;
 
 			case IPPROTO_AH:	/* RFC 2402 */
 				PULLUP_TO(hlen, ulp, struct ip6_ext);
 				ext_hd |= EXT_AH;
 				hlen += (((struct ip6_ext *)ulp)->ip6e_len + 2) << 2;
 				proto = ((struct ip6_ext *)ulp)->ip6e_nxt;
 				ulp = NULL;
 				break;
 
 			case IPPROTO_ESP:	/* RFC 2406 */
 				PULLUP_TO(hlen, ulp, uint32_t);	/* SPI, Seq# */
 				/* Anything past Seq# is variable length and
 				 * data past this ext. header is encrypted. */
 				ext_hd |= EXT_ESP;
 				break;
 
 			case IPPROTO_NONE:	/* RFC 2460 */
 				/*
 				 * Packet ends here, and IPv6 header has
 				 * already been pulled up. If ip6e_len!=0
 				 * then octets must be ignored.
 				 */
 				ulp = ip; /* non-NULL to get out of loop. */
 				break;
 
 			case IPPROTO_OSPFIGP:
 				/* XXX OSPF header check? */
 				PULLUP_TO(hlen, ulp, struct ip6_ext);
 				break;
 
 			case IPPROTO_PIM:
 				/* XXX PIM header check? */
 				PULLUP_TO(hlen, ulp, struct pim);
 				break;
 
 			case IPPROTO_GRE:	/* RFC 1701 */
 				/* XXX GRE header check? */
 				PULLUP_TO(hlen, ulp, struct grehdr);
 				break;
 
 			case IPPROTO_CARP:
 				PULLUP_TO(hlen, ulp, offsetof(
 				    struct carp_header, carp_counter));
 				if (CARP_ADVERTISEMENT !=
 				    ((struct carp_header *)ulp)->carp_type)
 					return (IP_FW_DENY);
 				break;
 
 			case IPPROTO_IPV6:	/* RFC 2893 */
 				PULLUP_TO(hlen, ulp, struct ip6_hdr);
 				break;
 
 			case IPPROTO_IPV4:	/* RFC 2893 */
 				PULLUP_TO(hlen, ulp, struct ip);
 				break;
 
 			default:
 				if (V_fw_verbose)
 					printf("IPFW2: IPV6 - Unknown "
 					    "Extension Header(%d), ext_hd=%x\n",
 					     proto, ext_hd);
 				if (V_fw_deny_unknown_exthdrs)
 				    return (IP_FW_DENY);
 				PULLUP_TO(hlen, ulp, struct ip6_ext);
 				break;
 			} /*switch */
 		}
 		ip = mtod(m, struct ip *);
 		ip6 = (struct ip6_hdr *)ip;
 		args->f_id.addr_type = 6;
 		args->f_id.src_ip6 = ip6->ip6_src;
 		args->f_id.dst_ip6 = ip6->ip6_dst;
 		args->f_id.flow_id6 = ntohl(ip6->ip6_flow);
 		iplen = ntohs(ip6->ip6_plen) + sizeof(*ip6);
 	} else if (pktlen >= sizeof(struct ip) &&
 	    (etype == 0 || etype == ETHERTYPE_IP) && ip->ip_v == 4) {
 		is_ipv4 = 1;
 		hlen = ip->ip_hl << 2;
 		/*
 		 * Collect parameters into local variables for faster
 		 * matching.
 		 */
 		proto = ip->ip_p;
 		src_ip = ip->ip_src;
 		dst_ip = ip->ip_dst;
 		offset = ntohs(ip->ip_off) & IP_OFFMASK;
 		iplen = ntohs(ip->ip_len);
 
 		if (offset == 0) {
 			switch (proto) {
 			case IPPROTO_TCP:
 				PULLUP_TO(hlen, ulp, struct tcphdr);
 				dst_port = TCP(ulp)->th_dport;
 				src_port = TCP(ulp)->th_sport;
 				/* save flags for dynamic rules */
 				args->f_id._flags = TCP(ulp)->th_flags;
 				break;
 
 			case IPPROTO_SCTP:
 				if (pktlen >= hlen + sizeof(struct sctphdr) +
 				    sizeof(struct sctp_chunkhdr) +
 				    offsetof(struct sctp_init, a_rwnd))
 					PULLUP_LEN(hlen, ulp,
 					    sizeof(struct sctphdr) +
 					    sizeof(struct sctp_chunkhdr) +
 					    offsetof(struct sctp_init, a_rwnd));
 				else if (pktlen >= hlen + sizeof(struct sctphdr))
 					PULLUP_LEN(hlen, ulp, pktlen - hlen);
 				else
 					PULLUP_LEN(hlen, ulp,
 					    sizeof(struct sctphdr));
 				src_port = SCTP(ulp)->src_port;
 				dst_port = SCTP(ulp)->dest_port;
 				break;
 
 			case IPPROTO_UDP:
 			case IPPROTO_UDPLITE:
 				PULLUP_TO(hlen, ulp, struct udphdr);
 				dst_port = UDP(ulp)->uh_dport;
 				src_port = UDP(ulp)->uh_sport;
 				break;
 
 			case IPPROTO_ICMP:
 				PULLUP_TO(hlen, ulp, struct icmphdr);
 				//args->f_id.flags = ICMP(ulp)->icmp_type;
 				break;
 
 			default:
 				break;
 			}
 		}
 
 		ip = mtod(m, struct ip *);
 		args->f_id.addr_type = 4;
 		args->f_id.src_ip = ntohl(src_ip.s_addr);
 		args->f_id.dst_ip = ntohl(dst_ip.s_addr);
 	} else {
 		proto = 0;
 		dst_ip.s_addr = src_ip.s_addr = 0;
 
 		args->f_id.addr_type = 1; /* XXX */
 	}
 #undef PULLUP_TO
 	pktlen = iplen < pktlen ? iplen: pktlen;
 
 	/* Properly initialize the rest of f_id */
 	args->f_id.proto = proto;
 	args->f_id.src_port = src_port = ntohs(src_port);
 	args->f_id.dst_port = dst_port = ntohs(dst_port);
 	args->f_id.fib = M_GETFIB(m);
 
 	IPFW_PF_RLOCK(chain);
 	if (! V_ipfw_vnet_ready) { /* shutting down, leave NOW. */
 		IPFW_PF_RUNLOCK(chain);
 		return (IP_FW_PASS);	/* accept */
 	}
 	if (args->flags & IPFW_ARGS_REF) {
 		/*
 		 * Packet has already been tagged as a result of a previous
 		 * match on rule args->rule aka args->rule_id (PIPE, QUEUE,
 		 * REASS, NETGRAPH, DIVERT/TEE...)
 		 * Validate the slot and continue from the next one
 		 * if still present, otherwise do a lookup.
 		 */
 		f_pos = (args->rule.chain_id == chain->id) ?
 		    args->rule.slot :
 		    ipfw_find_rule(chain, args->rule.rulenum,
 			args->rule.rule_id);
 	} else {
 		f_pos = 0;
 	}
 
 	/*
 	 * Now scan the rules, and parse microinstructions for each rule.
 	 * We have two nested loops and an inner switch. Sometimes we
 	 * need to break out of one or both loops, or re-enter one of
 	 * the loops with updated variables. Loop variables are:
 	 *
 	 *	f_pos (outer loop) points to the current rule.
 	 *		On output it points to the matching rule.
 	 *	done (outer loop) is used as a flag to break the loop.
 	 *	l (inner loop)	residual length of current rule.
 	 *		cmd points to the current microinstruction.
 	 *
 	 * We break the inner loop by setting l=0 and possibly
 	 * cmdlen=0 if we don't want to advance cmd.
 	 * We break the outer loop by setting done=1
 	 * We can restart the inner loop by setting l>0 and f_pos, f, cmd
 	 * as needed.
 	 */
 	for (; f_pos < chain->n_rules; f_pos++) {
 		ipfw_insn *cmd;
 		uint32_t tablearg = 0;
 		int l, cmdlen, skip_or; /* skip rest of OR block */
 		struct ip_fw *f;
 
 		f = chain->map[f_pos];
 		if (V_set_disable & (1 << f->set) )
 			continue;
 
 		skip_or = 0;
 		for (l = f->cmd_len, cmd = f->cmd ; l > 0 ;
 		    l -= cmdlen, cmd += cmdlen) {
 			int match;
 
 			/*
 			 * check_body is a jump target used when we find a
 			 * CHECK_STATE, and need to jump to the body of
 			 * the target rule.
 			 */
 
 /* check_body: */
 			cmdlen = F_LEN(cmd);
 			/*
 			 * An OR block (insn_1 || .. || insn_n) has the
 			 * F_OR bit set in all but the last instruction.
 			 * The first match will set "skip_or", and cause
 			 * the following instructions to be skipped until
 			 * past the one with the F_OR bit clear.
 			 */
 			if (skip_or) {		/* skip this instruction */
 				if ((cmd->len & F_OR) == 0)
 					skip_or = 0;	/* next one is good */
 				continue;
 			}
 			match = 0; /* set to 1 if we succeed */
 
 			switch (cmd->opcode) {
 			/*
 			 * The first set of opcodes compares the packet's
 			 * fields with some pattern, setting 'match' if a
 			 * match is found. At the end of the loop there is
 			 * logic to deal with F_NOT and F_OR flags associated
 			 * with the opcode.
 			 */
 			case O_NOP:
 				match = 1;
 				break;
 
 			case O_FORWARD_MAC:
 				printf("ipfw: opcode %d unimplemented\n",
 				    cmd->opcode);
 				break;
 
 			case O_GID:
 			case O_UID:
 			case O_JAIL:
 				/*
 				 * We only check offset == 0 && proto != 0,
 				 * as this ensures that we have a
 				 * packet with the ports info.
 				 */
 				if (offset != 0)
 					break;
 				if (proto == IPPROTO_TCP ||
 				    proto == IPPROTO_UDP ||
 				    proto == IPPROTO_UDPLITE)
 					match = check_uidgid(
 						    (ipfw_insn_u32 *)cmd,
 						    args, &ucred_lookup,
 #ifdef __FreeBSD__
 						    &ucred_cache);
 #else
 						    (void *)&ucred_cache);
 #endif
 				break;
 
 			case O_RECV:
 				match = iface_match(m->m_pkthdr.rcvif,
 				    (ipfw_insn_if *)cmd, chain, &tablearg);
 				break;
 
 			case O_XMIT:
 				match = iface_match(oif, (ipfw_insn_if *)cmd,
 				    chain, &tablearg);
 				break;
 
 			case O_VIA:
 				match = iface_match(oif ? oif :
 				    m->m_pkthdr.rcvif, (ipfw_insn_if *)cmd,
 				    chain, &tablearg);
 				break;
 
 			case O_MACADDR2:
 				if (args->flags & IPFW_ARGS_ETHER) {
 					u_int32_t *want = (u_int32_t *)
 						((ipfw_insn_mac *)cmd)->addr;
 					u_int32_t *mask = (u_int32_t *)
 						((ipfw_insn_mac *)cmd)->mask;
 					u_int32_t *hdr = (u_int32_t *)args->eh;
 
 					match =
 					    ( want[0] == (hdr[0] & mask[0]) &&
 					      want[1] == (hdr[1] & mask[1]) &&
 					      want[2] == (hdr[2] & mask[2]) );
 				}
 				break;
 
 			case O_MAC_TYPE:
 				if (args->flags & IPFW_ARGS_ETHER) {
 					u_int16_t *p =
 					    ((ipfw_insn_u16 *)cmd)->ports;
 					int i;
 
 					for (i = cmdlen - 1; !match && i>0;
 					    i--, p += 2)
 						match = (etype >= p[0] &&
 						    etype <= p[1]);
 				}
 				break;
 
 			case O_FRAG:
 				match = (offset != 0);
 				break;
 
 			case O_IN:	/* "out" is "not in" */
 				match = (oif == NULL);
 				break;
 
 			case O_LAYER2:
 				match = (args->flags & IPFW_ARGS_ETHER);
 				break;
 
 			case O_DIVERTED:
 				if ((args->flags & IPFW_ARGS_REF) == 0)
 					break;
 				/*
 				 * For diverted packets, args->rule.info
 				 * contains the divert port (in host format)
 				 * reason and direction.
 				 */
 				match = ((args->rule.info & IPFW_IS_MASK) ==
 				    IPFW_IS_DIVERT) && (
 				    ((args->rule.info & IPFW_INFO_IN) ?
 					1: 2) & cmd->arg1);
 				break;
 
 			case O_PROTO:
 				/*
 				 * We do not allow an arg of 0 so the
 				 * check of "proto" only suffices.
 				 */
 				match = (proto == cmd->arg1);
 				break;
 
 			case O_IP_SRC:
 				match = is_ipv4 &&
 				    (((ipfw_insn_ip *)cmd)->addr.s_addr ==
 				    src_ip.s_addr);
 				break;
 
 			case O_IP_DST_LOOKUP:
 			{
 				void *pkey;
 				uint32_t vidx, key;
 				uint16_t keylen;
 
 				if (cmdlen > F_INSN_SIZE(ipfw_insn_u32)) {
 					/* Determine lookup key type */
 					vidx = ((ipfw_insn_u32 *)cmd)->d[1];
 					if (vidx != 4 /* uid */ &&
 					    vidx != 5 /* jail */ &&
 					    is_ipv6 == 0 && is_ipv4 == 0)
 						break;
 					/* Determine key length */
 					if (vidx == 0 /* dst-ip */ ||
 					    vidx == 1 /* src-ip */)
 						keylen = is_ipv6 ?
 						    sizeof(struct in6_addr):
 						    sizeof(in_addr_t);
 					else {
 						keylen = sizeof(key);
 						pkey = &key;
 					}
 					if (vidx == 0 /* dst-ip */)
 						pkey = is_ipv4 ? (void *)&dst_ip:
 						    (void *)&args->f_id.dst_ip6;
 					else if (vidx == 1 /* src-ip */)
 						pkey = is_ipv4 ? (void *)&src_ip:
 						    (void *)&args->f_id.src_ip6;
 					else if (vidx == 6 /* dscp */) {
 						if (is_ipv4)
 							key = ip->ip_tos >> 2;
 						else {
 							key = args->f_id.flow_id6;
 							key = (key & 0x0f) << 2 |
 							    (key & 0xf000) >> 14;
 						}
 						key &= 0x3f;
 					} else if (vidx == 2 /* dst-port */ ||
 					    vidx == 3 /* src-port */) {
 						/* Skip fragments */
 						if (offset != 0)
 							break;
 						/* Skip proto without ports */
 						if (proto != IPPROTO_TCP &&
 						    proto != IPPROTO_UDP &&
 						    proto != IPPROTO_UDPLITE &&
 						    proto != IPPROTO_SCTP)
 							break;
 						if (vidx == 2 /* dst-port */)
 							key = dst_port;
 						else
 							key = src_port;
 					}
 #ifndef USERSPACE
 					else if (vidx == 4 /* uid */ ||
 					    vidx == 5 /* jail */) {
 						check_uidgid(
 						    (ipfw_insn_u32 *)cmd,
 						    args, &ucred_lookup,
 #ifdef __FreeBSD__
 						    &ucred_cache);
 						if (vidx == 4 /* uid */)
 							key = ucred_cache->cr_uid;
 						else if (vidx == 5 /* jail */)
 							key = ucred_cache->cr_prison->pr_id;
 #else /* !__FreeBSD__ */
 						    (void *)&ucred_cache);
 						if (vidx == 4 /* uid */)
 							key = ucred_cache.uid;
 						else if (vidx == 5 /* jail */)
 							key = ucred_cache.xid;
 #endif /* !__FreeBSD__ */
 					}
 #endif /* !USERSPACE */
 					else
 						break;
 					match = ipfw_lookup_table(chain,
 					    cmd->arg1, keylen, pkey, &vidx);
 					if (!match)
 						break;
 					tablearg = vidx;
 					break;
 				}
 				/* cmdlen =< F_INSN_SIZE(ipfw_insn_u32) */
 				/* FALLTHROUGH */
 			}
 			case O_IP_SRC_LOOKUP:
 			{
 				void *pkey;
 				uint32_t vidx;
 				uint16_t keylen;
 
 				if (is_ipv4) {
 					keylen = sizeof(in_addr_t);
 					if (cmd->opcode == O_IP_DST_LOOKUP)
 						pkey = &dst_ip;
 					else
 						pkey = &src_ip;
 				} else if (is_ipv6) {
 					keylen = sizeof(struct in6_addr);
 					if (cmd->opcode == O_IP_DST_LOOKUP)
 						pkey = &args->f_id.dst_ip6;
 					else
 						pkey = &args->f_id.src_ip6;
 				} else
 					break;
 				match = ipfw_lookup_table(chain, cmd->arg1,
 				    keylen, pkey, &vidx);
 				if (!match)
 					break;
 				if (cmdlen == F_INSN_SIZE(ipfw_insn_u32)) {
 					match = ((ipfw_insn_u32 *)cmd)->d[0] ==
 					    TARG_VAL(chain, vidx, tag);
 					if (!match)
 						break;
 				}
 				tablearg = vidx;
 				break;
 			}
 
 			case O_IP_FLOW_LOOKUP:
 				{
 					uint32_t v = 0;
 					match = ipfw_lookup_table(chain,
 					    cmd->arg1, 0, &args->f_id, &v);
 					if (cmdlen == F_INSN_SIZE(ipfw_insn_u32))
 						match = ((ipfw_insn_u32 *)cmd)->d[0] ==
 						    TARG_VAL(chain, v, tag);
 					if (match)
 						tablearg = v;
 				}
 				break;
 			case O_IP_SRC_MASK:
 			case O_IP_DST_MASK:
 				if (is_ipv4) {
 				    uint32_t a =
 					(cmd->opcode == O_IP_DST_MASK) ?
 					    dst_ip.s_addr : src_ip.s_addr;
 				    uint32_t *p = ((ipfw_insn_u32 *)cmd)->d;
 				    int i = cmdlen-1;
 
 				    for (; !match && i>0; i-= 2, p+= 2)
 					match = (p[0] == (a & p[1]));
 				}
 				break;
 
 			case O_IP_SRC_ME:
 				if (is_ipv4) {
 					match = in_localip(src_ip);
 					break;
 				}
 #ifdef INET6
 				/* FALLTHROUGH */
 			case O_IP6_SRC_ME:
 				match = is_ipv6 &&
 				    ipfw_localip6(&args->f_id.src_ip6);
 #endif
 				break;
 
 			case O_IP_DST_SET:
 			case O_IP_SRC_SET:
 				if (is_ipv4) {
 					u_int32_t *d = (u_int32_t *)(cmd+1);
 					u_int32_t addr =
 					    cmd->opcode == O_IP_DST_SET ?
 						args->f_id.dst_ip :
 						args->f_id.src_ip;
 
 					    if (addr < d[0])
 						    break;
 					    addr -= d[0]; /* subtract base */
 					    match = (addr < cmd->arg1) &&
 						( d[ 1 + (addr>>5)] &
 						  (1<<(addr & 0x1f)) );
 				}
 				break;
 
 			case O_IP_DST:
 				match = is_ipv4 &&
 				    (((ipfw_insn_ip *)cmd)->addr.s_addr ==
 				    dst_ip.s_addr);
 				break;
 
 			case O_IP_DST_ME:
 				if (is_ipv4) {
 					match = in_localip(dst_ip);
 					break;
 				}
 #ifdef INET6
 				/* FALLTHROUGH */
 			case O_IP6_DST_ME:
 				match = is_ipv6 &&
 				    ipfw_localip6(&args->f_id.dst_ip6);
 #endif
 				break;
 
 
 			case O_IP_SRCPORT:
 			case O_IP_DSTPORT:
 				/*
 				 * offset == 0 && proto != 0 is enough
 				 * to guarantee that we have a
 				 * packet with port info.
 				 */
 				if ((proto == IPPROTO_UDP ||
 				    proto == IPPROTO_UDPLITE ||
 				    proto == IPPROTO_TCP ||
 				    proto == IPPROTO_SCTP) && offset == 0) {
 					u_int16_t x =
 					    (cmd->opcode == O_IP_SRCPORT) ?
 						src_port : dst_port ;
 					u_int16_t *p =
 					    ((ipfw_insn_u16 *)cmd)->ports;
 					int i;
 
 					for (i = cmdlen - 1; !match && i>0;
 					    i--, p += 2)
 						match = (x>=p[0] && x<=p[1]);
 				}
 				break;
 
 			case O_ICMPTYPE:
 				match = (offset == 0 && proto==IPPROTO_ICMP &&
 				    icmptype_match(ICMP(ulp), (ipfw_insn_u32 *)cmd) );
 				break;
 
 #ifdef INET6
 			case O_ICMP6TYPE:
 				match = is_ipv6 && offset == 0 &&
 				    proto==IPPROTO_ICMPV6 &&
 				    icmp6type_match(
 					ICMP6(ulp)->icmp6_type,
 					(ipfw_insn_u32 *)cmd);
 				break;
 #endif /* INET6 */
 
 			case O_IPOPT:
 				match = (is_ipv4 &&
 				    ipopts_match(ip, cmd) );
 				break;
 
 			case O_IPVER:
 				match = (is_ipv4 &&
 				    cmd->arg1 == ip->ip_v);
 				break;
 
 			case O_IPID:
 			case O_IPLEN:
 			case O_IPTTL:
 				if (is_ipv4) {	/* only for IP packets */
 				    uint16_t x;
 				    uint16_t *p;
 				    int i;
 
 				    if (cmd->opcode == O_IPLEN)
 					x = iplen;
 				    else if (cmd->opcode == O_IPTTL)
 					x = ip->ip_ttl;
 				    else /* must be IPID */
 					x = ntohs(ip->ip_id);
 				    if (cmdlen == 1) {
 					match = (cmd->arg1 == x);
 					break;
 				    }
 				    /* otherwise we have ranges */
 				    p = ((ipfw_insn_u16 *)cmd)->ports;
 				    i = cmdlen - 1;
 				    for (; !match && i>0; i--, p += 2)
 					match = (x >= p[0] && x <= p[1]);
 				}
 				break;
 
 			case O_IPPRECEDENCE:
 				match = (is_ipv4 &&
 				    (cmd->arg1 == (ip->ip_tos & 0xe0)) );
 				break;
 
 			case O_IPTOS:
 				match = (is_ipv4 &&
 				    flags_match(cmd, ip->ip_tos));
 				break;
 
 			case O_DSCP:
 			    {
 				uint32_t *p;
 				uint16_t x;
 
 				p = ((ipfw_insn_u32 *)cmd)->d;
 
 				if (is_ipv4)
 					x = ip->ip_tos >> 2;
 				else if (is_ipv6) {
 					uint8_t *v;
 					v = &((struct ip6_hdr *)ip)->ip6_vfc;
 					x = (*v & 0x0F) << 2;
 					v++;
 					x |= *v >> 6;
 				} else
 					break;
 
 				/* DSCP bitmask is stored as low_u32 high_u32 */
 				if (x >= 32)
 					match = *(p + 1) & (1 << (x - 32));
 				else
 					match = *p & (1 << x);
 			    }
 				break;
 
 			case O_TCPDATALEN:
 				if (proto == IPPROTO_TCP && offset == 0) {
 				    struct tcphdr *tcp;
 				    uint16_t x;
 				    uint16_t *p;
 				    int i;
 #ifdef INET6
 				    if (is_ipv6) {
 					    struct ip6_hdr *ip6;
 
 					    ip6 = (struct ip6_hdr *)ip;
 					    if (ip6->ip6_plen == 0) {
 						    /*
 						     * Jumbo payload is not
 						     * supported by this
 						     * opcode.
 						     */
 						    break;
 					    }
 					    x = iplen - hlen;
 				    } else
 #endif /* INET6 */
 					    x = iplen - (ip->ip_hl << 2);
 				    tcp = TCP(ulp);
 				    x -= tcp->th_off << 2;
 				    if (cmdlen == 1) {
 					match = (cmd->arg1 == x);
 					break;
 				    }
 				    /* otherwise we have ranges */
 				    p = ((ipfw_insn_u16 *)cmd)->ports;
 				    i = cmdlen - 1;
 				    for (; !match && i>0; i--, p += 2)
 					match = (x >= p[0] && x <= p[1]);
 				}
 				break;
 
 			case O_TCPFLAGS:
 				match = (proto == IPPROTO_TCP && offset == 0 &&
 				    flags_match(cmd, TCP(ulp)->th_flags));
 				break;
 
 			case O_TCPOPTS:
 				if (proto == IPPROTO_TCP && offset == 0 && ulp){
 					PULLUP_LEN(hlen, ulp,
 					    (TCP(ulp)->th_off << 2));
 					match = tcpopts_match(TCP(ulp), cmd);
 				}
 				break;
 
 			case O_TCPSEQ:
 				match = (proto == IPPROTO_TCP && offset == 0 &&
 				    ((ipfw_insn_u32 *)cmd)->d[0] ==
 					TCP(ulp)->th_seq);
 				break;
 
 			case O_TCPACK:
 				match = (proto == IPPROTO_TCP && offset == 0 &&
 				    ((ipfw_insn_u32 *)cmd)->d[0] ==
 					TCP(ulp)->th_ack);
 				break;
 
 			case O_TCPWIN:
 				if (proto == IPPROTO_TCP && offset == 0) {
 				    uint16_t x;
 				    uint16_t *p;
 				    int i;
 
 				    x = ntohs(TCP(ulp)->th_win);
 				    if (cmdlen == 1) {
 					match = (cmd->arg1 == x);
 					break;
 				    }
 				    /* Otherwise we have ranges. */
 				    p = ((ipfw_insn_u16 *)cmd)->ports;
 				    i = cmdlen - 1;
 				    for (; !match && i > 0; i--, p += 2)
 					match = (x >= p[0] && x <= p[1]);
 				}
 				break;
 
 			case O_ESTAB:
 				/* reject packets which have SYN only */
 				/* XXX should i also check for TH_ACK ? */
 				match = (proto == IPPROTO_TCP && offset == 0 &&
 				    (TCP(ulp)->th_flags &
 				     (TH_RST | TH_ACK | TH_SYN)) != TH_SYN);
 				break;
 
 			case O_ALTQ: {
 				struct pf_mtag *at;
 				struct m_tag *mtag;
 				ipfw_insn_altq *altq = (ipfw_insn_altq *)cmd;
 
 				/*
 				 * ALTQ uses mbuf tags from another
 				 * packet filtering system - pf(4).
 				 * We allocate a tag in its format
 				 * and fill it in, pretending to be pf(4).
 				 */
 				match = 1;
 				at = pf_find_mtag(m);
 				if (at != NULL && at->qid != 0)
 					break;
 				mtag = m_tag_get(PACKET_TAG_PF,
 				    sizeof(struct pf_mtag), M_NOWAIT | M_ZERO);
 				if (mtag == NULL) {
 					/*
 					 * Let the packet fall back to the
 					 * default ALTQ.
 					 */
 					break;
 				}
 				m_tag_prepend(m, mtag);
 				at = (struct pf_mtag *)(mtag + 1);
 				at->qid = altq->qid;
 				at->hdr = ip;
 				break;
 			}
 
 			case O_LOG:
 				ipfw_log(chain, f, hlen, args, m,
 				    oif, offset | ip6f_mf, tablearg, ip);
 				match = 1;
 				break;
 
 			case O_PROB:
 				match = (random()<((ipfw_insn_u32 *)cmd)->d[0]);
 				break;
 
 			case O_VERREVPATH:
 				/* Outgoing packets automatically pass/match */
 				match = ((oif != NULL) ||
 				    (m->m_pkthdr.rcvif == NULL) ||
 				    (
 #ifdef INET6
 				    is_ipv6 ?
 					verify_path6(&(args->f_id.src_ip6),
 					    m->m_pkthdr.rcvif, args->f_id.fib) :
 #endif
 				    verify_path(src_ip, m->m_pkthdr.rcvif,
 				        args->f_id.fib)));
 				break;
 
 			case O_VERSRCREACH:
 				/* Outgoing packets automatically pass/match */
 				match = (hlen > 0 && ((oif != NULL) || (
 #ifdef INET6
 				    is_ipv6 ?
 				        verify_path6(&(args->f_id.src_ip6),
 				            NULL, args->f_id.fib) :
 #endif
 				    verify_path(src_ip, NULL, args->f_id.fib))));
 				break;
 
 			case O_ANTISPOOF:
 				/* Outgoing packets automatically pass/match */
 				if (oif == NULL && hlen > 0 &&
 				    (  (is_ipv4 && in_localaddr(src_ip))
 #ifdef INET6
 				    || (is_ipv6 &&
 				        in6_localaddr(&(args->f_id.src_ip6)))
 #endif
 				    ))
 					match =
 #ifdef INET6
 					    is_ipv6 ? verify_path6(
 					        &(args->f_id.src_ip6),
 					        m->m_pkthdr.rcvif,
 						args->f_id.fib) :
 #endif
 					    verify_path(src_ip,
 					    	m->m_pkthdr.rcvif,
 					        args->f_id.fib);
 				else
 					match = 1;
 				break;
 
 			case O_IPSEC:
 				match = (m_tag_find(m,
 				    PACKET_TAG_IPSEC_IN_DONE, NULL) != NULL);
 				/* otherwise no match */
 				break;
 
 #ifdef INET6
 			case O_IP6_SRC:
 				match = is_ipv6 &&
 				    IN6_ARE_ADDR_EQUAL(&args->f_id.src_ip6,
 				    &((ipfw_insn_ip6 *)cmd)->addr6);
 				break;
 
 			case O_IP6_DST:
 				match = is_ipv6 &&
 				IN6_ARE_ADDR_EQUAL(&args->f_id.dst_ip6,
 				    &((ipfw_insn_ip6 *)cmd)->addr6);
 				break;
 			case O_IP6_SRC_MASK:
 			case O_IP6_DST_MASK:
 				if (is_ipv6) {
 					int i = cmdlen - 1;
 					struct in6_addr p;
 					struct in6_addr *d =
 					    &((ipfw_insn_ip6 *)cmd)->addr6;
 
 					for (; !match && i > 0; d += 2,
 					    i -= F_INSN_SIZE(struct in6_addr)
 					    * 2) {
 						p = (cmd->opcode ==
 						    O_IP6_SRC_MASK) ?
 						    args->f_id.src_ip6:
 						    args->f_id.dst_ip6;
 						APPLY_MASK(&p, &d[1]);
 						match =
 						    IN6_ARE_ADDR_EQUAL(&d[0],
 						    &p);
 					}
 				}
 				break;
 
 			case O_FLOW6ID:
 				match = is_ipv6 &&
 				    flow6id_match(args->f_id.flow_id6,
 				    (ipfw_insn_u32 *) cmd);
 				break;
 
 			case O_EXT_HDR:
 				match = is_ipv6 &&
 				    (ext_hd & ((ipfw_insn *) cmd)->arg1);
 				break;
 
 			case O_IP6:
 				match = is_ipv6;
 				break;
 #endif
 
 			case O_IP4:
 				match = is_ipv4;
 				break;
 
 			case O_TAG: {
 				struct m_tag *mtag;
 				uint32_t tag = TARG(cmd->arg1, tag);
 
 				/* Packet is already tagged with this tag? */
 				mtag = m_tag_locate(m, MTAG_IPFW, tag, NULL);
 
 				/* We have `untag' action when F_NOT flag is
 				 * present. And we must remove this mtag from
 				 * mbuf and reset `match' to zero (`match' will
 				 * be inversed later).
 				 * Otherwise we should allocate new mtag and
 				 * push it into mbuf.
 				 */
 				if (cmd->len & F_NOT) { /* `untag' action */
 					if (mtag != NULL)
 						m_tag_delete(m, mtag);
 					match = 0;
 				} else {
 					if (mtag == NULL) {
 						mtag = m_tag_alloc( MTAG_IPFW,
 						    tag, 0, M_NOWAIT);
 						if (mtag != NULL)
 							m_tag_prepend(m, mtag);
 					}
 					match = 1;
 				}
 				break;
 			}
 
 			case O_FIB: /* try match the specified fib */
 				if (args->f_id.fib == cmd->arg1)
 					match = 1;
 				break;
 
 			case O_SOCKARG:	{
 #ifndef USERSPACE	/* not supported in userspace */
 				struct inpcb *inp = args->inp;
 				struct inpcbinfo *pi;
 				
 				if (is_ipv6) /* XXX can we remove this ? */
 					break;
 
 				if (proto == IPPROTO_TCP)
 					pi = &V_tcbinfo;
 				else if (proto == IPPROTO_UDP)
 					pi = &V_udbinfo;
 				else if (proto == IPPROTO_UDPLITE)
 					pi = &V_ulitecbinfo;
 				else
 					break;
 
 				/*
 				 * XXXRW: so_user_cookie should almost
 				 * certainly be inp_user_cookie?
 				 */
 
 				/* For incoming packet, lookup up the 
 				inpcb using the src/dest ip/port tuple */
 				if (inp == NULL) {
 					inp = in_pcblookup(pi, 
 						src_ip, htons(src_port),
 						dst_ip, htons(dst_port),
 						INPLOOKUP_RLOCKPCB, NULL);
 					if (inp != NULL) {
 						tablearg =
 						    inp->inp_socket->so_user_cookie;
 						if (tablearg)
 							match = 1;
 						INP_RUNLOCK(inp);
 					}
 				} else {
 					if (inp->inp_socket) {
 						tablearg =
 						    inp->inp_socket->so_user_cookie;
 						if (tablearg)
 							match = 1;
 					}
 				}
 #endif /* !USERSPACE */
 				break;
 			}
 
 			case O_TAGGED: {
 				struct m_tag *mtag;
 				uint32_t tag = TARG(cmd->arg1, tag);
 
 				if (cmdlen == 1) {
 					match = m_tag_locate(m, MTAG_IPFW,
 					    tag, NULL) != NULL;
 					break;
 				}
 
 				/* we have ranges */
 				for (mtag = m_tag_first(m);
 				    mtag != NULL && !match;
 				    mtag = m_tag_next(m, mtag)) {
 					uint16_t *p;
 					int i;
 
 					if (mtag->m_tag_cookie != MTAG_IPFW)
 						continue;
 
 					p = ((ipfw_insn_u16 *)cmd)->ports;
 					i = cmdlen - 1;
 					for(; !match && i > 0; i--, p += 2)
 						match =
 						    mtag->m_tag_id >= p[0] &&
 						    mtag->m_tag_id <= p[1];
 				}
 				break;
 			}
 				
 			/*
 			 * The second set of opcodes represents 'actions',
 			 * i.e. the terminal part of a rule once the packet
 			 * matches all previous patterns.
 			 * Typically there is only one action for each rule,
 			 * and the opcode is stored at the end of the rule
 			 * (but there are exceptions -- see below).
 			 *
 			 * In general, here we set retval and terminate the
 			 * outer loop (would be a 'break 3' in some language,
 			 * but we need to set l=0, done=1)
 			 *
 			 * Exceptions:
 			 * O_COUNT and O_SKIPTO actions:
 			 *   instead of terminating, we jump to the next rule
 			 *   (setting l=0), or to the SKIPTO target (setting
 			 *   f/f_len, cmd and l as needed), respectively.
 			 *
 			 * O_TAG, O_LOG and O_ALTQ action parameters:
 			 *   perform some action and set match = 1;
 			 *
 			 * O_LIMIT and O_KEEP_STATE: these opcodes are
 			 *   not real 'actions', and are stored right
 			 *   before the 'action' part of the rule (one
 			 *   exception is O_SKIP_ACTION which could be
 			 *   between these opcodes and 'action' one).
 			 *   These opcodes try to install an entry in the
 			 *   state tables; if successful, we continue with
 			 *   the next opcode (match=1; break;), otherwise
 			 *   the packet must be dropped (set retval,
 			 *   break loops with l=0, done=1)
 			 *
 			 * O_PROBE_STATE and O_CHECK_STATE: these opcodes
 			 *   cause a lookup of the state table, and a jump
 			 *   to the 'action' part of the parent rule
 			 *   if an entry is found, or
 			 *   (CHECK_STATE only) a jump to the next rule if
 			 *   the entry is not found.
 			 *   The result of the lookup is cached so that
 			 *   further instances of these opcodes become NOPs.
 			 *   The jump to the next rule is done by setting
 			 *   l=0, cmdlen=0.
 			 *
 			 * O_SKIP_ACTION: this opcode is not a real 'action'
 			 *  either, and is stored right before the 'action'
 			 *  part of the rule, right after the O_KEEP_STATE
 			 *  opcode. It causes match failure so the real
 			 *  'action' could be executed only if the rule
 			 *  is checked via dynamic rule from the state
 			 *  table, as in such case execution starts
 			 *  from the true 'action' opcode directly.
 			 *   
 			 */
 			case O_LIMIT:
 			case O_KEEP_STATE:
 				if (ipfw_dyn_install_state(chain, f,
 				    (ipfw_insn_limit *)cmd, args, ulp,
 				    pktlen, &dyn_info, tablearg)) {
 					/* error or limit violation */
 					retval = IP_FW_DENY;
 					l = 0;	/* exit inner loop */
 					done = 1; /* exit outer loop */
 				}
 				match = 1;
 				break;
 
 			case O_PROBE_STATE:
 			case O_CHECK_STATE:
 				/*
 				 * dynamic rules are checked at the first
 				 * keep-state or check-state occurrence,
 				 * with the result being stored in dyn_info.
 				 * The compiler introduces a PROBE_STATE
 				 * instruction for us when we have a
 				 * KEEP_STATE (because PROBE_STATE needs
 				 * to be run first).
 				 */
 				if (DYN_LOOKUP_NEEDED(&dyn_info, cmd) &&
 				    (q = ipfw_dyn_lookup_state(args, ulp,
 				    pktlen, cmd, &dyn_info)) != NULL) {
 					/*
 					 * Found dynamic entry, jump to the
 					 * 'action' part of the parent rule
 					 * by setting f, cmd, l and clearing
 					 * cmdlen.
 					 */
 					f = q;
 					f_pos = dyn_info.f_pos;
 					cmd = ACTION_PTR(f);
 					l = f->cmd_len - f->act_ofs;
 					cmdlen = 0;
 					match = 1;
 					break;
 				}
 				/*
 				 * Dynamic entry not found. If CHECK_STATE,
 				 * skip to next rule, if PROBE_STATE just
 				 * ignore and continue with next opcode.
 				 */
 				if (cmd->opcode == O_CHECK_STATE)
 					l = 0;	/* exit inner loop */
 				match = 1;
 				break;
 
 			case O_SKIP_ACTION:
 				match = 0;	/* skip to the next rule */
 				l = 0;		/* exit inner loop */
 				break;
 
 			case O_ACCEPT:
 				retval = 0;	/* accept */
 				l = 0;		/* exit inner loop */
 				done = 1;	/* exit outer loop */
 				break;
 
 			case O_PIPE:
 			case O_QUEUE:
 				set_match(args, f_pos, chain);
 				args->rule.info = TARG(cmd->arg1, pipe);
 				if (cmd->opcode == O_PIPE)
 					args->rule.info |= IPFW_IS_PIPE;
 				if (V_fw_one_pass)
 					args->rule.info |= IPFW_ONEPASS;
 				retval = IP_FW_DUMMYNET;
 				l = 0;          /* exit inner loop */
 				done = 1;       /* exit outer loop */
 				break;
 
 			case O_DIVERT:
 			case O_TEE:
 				if (args->flags & IPFW_ARGS_ETHER)
 					break;	/* not on layer 2 */
 				/* otherwise this is terminal */
 				l = 0;		/* exit inner loop */
 				done = 1;	/* exit outer loop */
 				retval = (cmd->opcode == O_DIVERT) ?
 					IP_FW_DIVERT : IP_FW_TEE;
 				set_match(args, f_pos, chain);
 				args->rule.info = TARG(cmd->arg1, divert);
 				break;
 
 			case O_COUNT:
 				IPFW_INC_RULE_COUNTER(f, pktlen);
 				l = 0;		/* exit inner loop */
 				break;
 
 			case O_SKIPTO:
 			    IPFW_INC_RULE_COUNTER(f, pktlen);
 			    f_pos = JUMP(chain, f, cmd->arg1, tablearg, 0);
 			    /*
 			     * Skip disabled rules, and re-enter
 			     * the inner loop with the correct
 			     * f_pos, f, l and cmd.
 			     * Also clear cmdlen and skip_or
 			     */
 			    for (; f_pos < chain->n_rules - 1 &&
 				    (V_set_disable &
 				     (1 << chain->map[f_pos]->set));
 				    f_pos++)
 				;
 			    /* Re-enter the inner loop at the skipto rule. */
 			    f = chain->map[f_pos];
 			    l = f->cmd_len;
 			    cmd = f->cmd;
 			    match = 1;
 			    cmdlen = 0;
 			    skip_or = 0;
 			    continue;
 			    break;	/* not reached */
 
 			case O_CALLRETURN: {
 				/*
 				 * Implementation of `subroutine' call/return,
 				 * in the stack carried in an mbuf tag. This
 				 * is different from `skipto' in that any call
 				 * address is possible (`skipto' must prevent
 				 * backward jumps to avoid endless loops).
 				 * We have `return' action when F_NOT flag is
 				 * present. The `m_tag_id' field is used as
 				 * stack pointer.
 				 */
 				struct m_tag *mtag;
 				uint16_t jmpto, *stack;
 
 #define	IS_CALL		((cmd->len & F_NOT) == 0)
 #define	IS_RETURN	((cmd->len & F_NOT) != 0)
 				/*
 				 * Hand-rolled version of m_tag_locate() with
 				 * wildcard `type'.
 				 * If not already tagged, allocate new tag.
 				 */
 				mtag = m_tag_first(m);
 				while (mtag != NULL) {
 					if (mtag->m_tag_cookie ==
 					    MTAG_IPFW_CALL)
 						break;
 					mtag = m_tag_next(m, mtag);
 				}
 				if (mtag == NULL && IS_CALL) {
 					mtag = m_tag_alloc(MTAG_IPFW_CALL, 0,
 					    IPFW_CALLSTACK_SIZE *
 					    sizeof(uint16_t), M_NOWAIT);
 					if (mtag != NULL)
 						m_tag_prepend(m, mtag);
 				}
 
 				/*
 				 * On error both `call' and `return' just
 				 * continue with next rule.
 				 */
 				if (IS_RETURN && (mtag == NULL ||
 				    mtag->m_tag_id == 0)) {
 					l = 0;		/* exit inner loop */
 					break;
 				}
 				if (IS_CALL && (mtag == NULL ||
 				    mtag->m_tag_id >= IPFW_CALLSTACK_SIZE)) {
 					printf("ipfw: call stack error, "
 					    "go to next rule\n");
 					l = 0;		/* exit inner loop */
 					break;
 				}
 
 				IPFW_INC_RULE_COUNTER(f, pktlen);
 				stack = (uint16_t *)(mtag + 1);
 
 				/*
 				 * The `call' action may use cached f_pos
 				 * (in f->next_rule), whose version is written
 				 * in f->next_rule.
 				 * The `return' action, however, doesn't have
 				 * fixed jump address in cmd->arg1 and can't use
 				 * cache.
 				 */
 				if (IS_CALL) {
 					stack[mtag->m_tag_id] = f->rulenum;
 					mtag->m_tag_id++;
 			    		f_pos = JUMP(chain, f, cmd->arg1,
 					    tablearg, 1);
 				} else {	/* `return' action */
 					mtag->m_tag_id--;
 					jmpto = stack[mtag->m_tag_id] + 1;
 					f_pos = ipfw_find_rule(chain, jmpto, 0);
 				}
 
 				/*
 				 * Skip disabled rules, and re-enter
 				 * the inner loop with the correct
 				 * f_pos, f, l and cmd.
 				 * Also clear cmdlen and skip_or
 				 */
 				for (; f_pos < chain->n_rules - 1 &&
 				    (V_set_disable &
 				    (1 << chain->map[f_pos]->set)); f_pos++)
 					;
 				/* Re-enter the inner loop at the dest rule. */
 				f = chain->map[f_pos];
 				l = f->cmd_len;
 				cmd = f->cmd;
 				cmdlen = 0;
 				skip_or = 0;
 				continue;
 				break;	/* NOTREACHED */
 			}
 #undef IS_CALL
 #undef IS_RETURN
 
 			case O_REJECT:
 				/*
 				 * Drop the packet and send a reject notice
 				 * if the packet is not ICMP (or is an ICMP
 				 * query), and it is not multicast/broadcast.
 				 */
 				if (hlen > 0 && is_ipv4 && offset == 0 &&
 				    (proto != IPPROTO_ICMP ||
 				     is_icmp_query(ICMP(ulp))) &&
 				    !(m->m_flags & (M_BCAST|M_MCAST)) &&
 				    !IN_MULTICAST(ntohl(dst_ip.s_addr))) {
 					send_reject(args, cmd->arg1, iplen, ip);
 					m = args->m;
 				}
 				/* FALLTHROUGH */
 #ifdef INET6
 			case O_UNREACH6:
 				if (hlen > 0 && is_ipv6 &&
 				    ((offset & IP6F_OFF_MASK) == 0) &&
 				    (proto != IPPROTO_ICMPV6 ||
 				     (is_icmp6_query(icmp6_type) == 1)) &&
 				    !(m->m_flags & (M_BCAST|M_MCAST)) &&
 				    !IN6_IS_ADDR_MULTICAST(
 					&args->f_id.dst_ip6)) {
 					send_reject6(args,
 					    cmd->opcode == O_REJECT ?
 					    map_icmp_unreach(cmd->arg1):
 					    cmd->arg1, hlen,
 					    (struct ip6_hdr *)ip);
 					m = args->m;
 				}
 				/* FALLTHROUGH */
 #endif
 			case O_DENY:
 				retval = IP_FW_DENY;
 				l = 0;		/* exit inner loop */
 				done = 1;	/* exit outer loop */
 				break;
 
 			case O_FORWARD_IP:
 				if (args->flags & IPFW_ARGS_ETHER)
 					break;	/* not valid on layer2 pkts */
 				if (q != f ||
 				    dyn_info.direction == MATCH_FORWARD) {
 				    struct sockaddr_in *sa;
 
 				    sa = &(((ipfw_insn_sa *)cmd)->sa);
 				    if (sa->sin_addr.s_addr == INADDR_ANY) {
 #ifdef INET6
 					/*
 					 * We use O_FORWARD_IP opcode for
 					 * fwd rule with tablearg, but tables
 					 * now support IPv6 addresses. And
 					 * when we are inspecting IPv6 packet,
 					 * we can use nh6 field from
 					 * table_value as next_hop6 address.
 					 */
 					if (is_ipv6) {
 						struct ip_fw_nh6 *nh6;
 
 						args->flags |= IPFW_ARGS_NH6;
 						nh6 = &args->hopstore6;
 						nh6->sin6_addr = TARG_VAL(
 						    chain, tablearg, nh6);
 						nh6->sin6_port = sa->sin_port;
 						nh6->sin6_scope_id = TARG_VAL(
 						    chain, tablearg, zoneid);
 					} else
 #endif
 					{
 						args->flags |= IPFW_ARGS_NH4;
 						args->hopstore.sin_port =
 						    sa->sin_port;
 						sa = &args->hopstore;
 						sa->sin_family = AF_INET;
 						sa->sin_len = sizeof(*sa);
 						sa->sin_addr.s_addr = htonl(
 						    TARG_VAL(chain, tablearg,
 						    nh4));
 					}
 				    } else {
 					    args->flags |= IPFW_ARGS_NH4PTR;
 					    args->next_hop = sa;
 				    }
 				}
 				retval = IP_FW_PASS;
 				l = 0;          /* exit inner loop */
 				done = 1;       /* exit outer loop */
 				break;
 
 #ifdef INET6
 			case O_FORWARD_IP6:
 				if (args->flags & IPFW_ARGS_ETHER)
 					break;	/* not valid on layer2 pkts */
 				if (q != f ||
 				    dyn_info.direction == MATCH_FORWARD) {
 					struct sockaddr_in6 *sin6;
 
 					sin6 = &(((ipfw_insn_sa6 *)cmd)->sa);
 					args->flags |= IPFW_ARGS_NH6PTR;
 					args->next_hop6 = sin6;
 				}
 				retval = IP_FW_PASS;
 				l = 0;		/* exit inner loop */
 				done = 1;	/* exit outer loop */
 				break;
 #endif
 
 			case O_NETGRAPH:
 			case O_NGTEE:
 				set_match(args, f_pos, chain);
 				args->rule.info = TARG(cmd->arg1, netgraph);
 				if (V_fw_one_pass)
 					args->rule.info |= IPFW_ONEPASS;
 				retval = (cmd->opcode == O_NETGRAPH) ?
 				    IP_FW_NETGRAPH : IP_FW_NGTEE;
 				l = 0;          /* exit inner loop */
 				done = 1;       /* exit outer loop */
 				break;
 
 			case O_SETFIB: {
 				uint32_t fib;
 
 				IPFW_INC_RULE_COUNTER(f, pktlen);
 				fib = TARG(cmd->arg1, fib) & 0x7FFF;
 				if (fib >= rt_numfibs)
 					fib = 0;
 				M_SETFIB(m, fib);
 				args->f_id.fib = fib; /* XXX */
 				l = 0;		/* exit inner loop */
 				break;
 		        }
 
 			case O_SETDSCP: {
 				uint16_t code;
 
 				code = TARG(cmd->arg1, dscp) & 0x3F;
 				l = 0;		/* exit inner loop */
 				if (is_ipv4) {
 					uint16_t old;
 
 					old = *(uint16_t *)ip;
 					ip->ip_tos = (code << 2) |
 					    (ip->ip_tos & 0x03);
 					ip->ip_sum = cksum_adjust(ip->ip_sum,
 					    old, *(uint16_t *)ip);
 				} else if (is_ipv6) {
 					uint8_t *v;
 
 					v = &((struct ip6_hdr *)ip)->ip6_vfc;
 					*v = (*v & 0xF0) | (code >> 2);
 					v++;
 					*v = (*v & 0x3F) | ((code & 0x03) << 6);
 				} else
 					break;
 
 				IPFW_INC_RULE_COUNTER(f, pktlen);
 				break;
 			}
 
 			case O_NAT:
 				l = 0;          /* exit inner loop */
 				done = 1;       /* exit outer loop */
 				/*
 				 * Ensure that we do not invoke NAT handler for
 				 * non IPv4 packets. Libalias expects only IPv4.
 				 */
 				if (!is_ipv4 || !IPFW_NAT_LOADED) {
 				    retval = IP_FW_DENY;
 				    break;
 				}
 
 				struct cfg_nat *t;
 				int nat_id;
 
 				args->rule.info = 0;
 				set_match(args, f_pos, chain);
 				/* Check if this is 'global' nat rule */
 				if (cmd->arg1 == IP_FW_NAT44_GLOBAL) {
 					retval = ipfw_nat_ptr(args, NULL, m);
 					break;
 				}
 				t = ((ipfw_insn_nat *)cmd)->nat;
 				if (t == NULL) {
 					nat_id = TARG(cmd->arg1, nat);
 					t = (*lookup_nat_ptr)(&chain->nat, nat_id);
 
 					if (t == NULL) {
 					    retval = IP_FW_DENY;
 					    break;
 					}
 					if (cmd->arg1 != IP_FW_TARG)
 					    ((ipfw_insn_nat *)cmd)->nat = t;
 				}
 				retval = ipfw_nat_ptr(args, t, m);
 				break;
 
 			case O_REASS: {
 				int ip_off;
 
 				l = 0;	/* in any case exit inner loop */
 				if (is_ipv6) /* IPv6 is not supported yet */
 					break;
 				IPFW_INC_RULE_COUNTER(f, pktlen);
 				ip_off = ntohs(ip->ip_off);
 
 				/* if not fragmented, go to next rule */
 				if ((ip_off & (IP_MF | IP_OFFMASK)) == 0)
 				    break;
 
 				args->m = m = ip_reass(m);
 
 				/*
 				 * do IP header checksum fixup.
 				 */
 				if (m == NULL) { /* fragment got swallowed */
 				    retval = IP_FW_DENY;
 				} else { /* good, packet complete */
 				    int hlen;
 
 				    ip = mtod(m, struct ip *);
 				    hlen = ip->ip_hl << 2;
 				    ip->ip_sum = 0;
 				    if (hlen == sizeof(struct ip))
 					ip->ip_sum = in_cksum_hdr(ip);
 				    else
 					ip->ip_sum = in_cksum(m, hlen);
 				    retval = IP_FW_REASS;
 				    args->rule.info = 0;
 				    set_match(args, f_pos, chain);
 				}
 				done = 1;	/* exit outer loop */
 				break;
 			}
 			case O_EXTERNAL_ACTION:
 				l = 0; /* in any case exit inner loop */
 				retval = ipfw_run_eaction(chain, args,
 				    cmd, &done);
 				/*
 				 * If both @retval and @done are zero,
 				 * consider this as rule matching and
 				 * update counters.
 				 */
 				if (retval == 0 && done == 0) {
 					IPFW_INC_RULE_COUNTER(f, pktlen);
 					/*
 					 * Reset the result of the last
 					 * dynamic state lookup.
 					 * External action can change
 					 * @args content, and it may be
 					 * used for new state lookup later.
 					 */
 					DYN_INFO_INIT(&dyn_info);
 				}
 				break;
 
 			default:
 				panic("-- unknown opcode %d\n", cmd->opcode);
 			} /* end of switch() on opcodes */
 			/*
 			 * if we get here with l=0, then match is irrelevant.
 			 */
 
 			if (cmd->len & F_NOT)
 				match = !match;
 
 			if (match) {
 				if (cmd->len & F_OR)
 					skip_or = 1;
 			} else {
 				if (!(cmd->len & F_OR)) /* not an OR block, */
 					break;		/* try next rule    */
 			}
 
 		}	/* end of inner loop, scan opcodes */
 #undef PULLUP_LEN
 
 		if (done)
 			break;
 
 /* next_rule:; */	/* try next rule		*/
 
 	}		/* end of outer for, scan rules */
 
 	if (done) {
 		struct ip_fw *rule = chain->map[f_pos];
 		/* Update statistics */
 		IPFW_INC_RULE_COUNTER(rule, pktlen);
 	} else {
 		retval = IP_FW_DENY;
 		printf("ipfw: ouch!, skip past end of rules, denying packet\n");
 	}
 	IPFW_PF_RUNLOCK(chain);
 #ifdef __FreeBSD__
 	if (ucred_cache != NULL)
 		crfree(ucred_cache);
 #endif
 	return (retval);
 
 pullup_failed:
 	if (V_fw_verbose)
 		printf("ipfw: pullup failed\n");
 	return (IP_FW_DENY);
 }
 
 /*
  * Set maximum number of tables that can be used in given VNET ipfw instance.
  */
 #ifdef SYSCTL_NODE
 static int
 sysctl_ipfw_table_num(SYSCTL_HANDLER_ARGS)
 {
 	int error;
 	unsigned int ntables;
 
 	ntables = V_fw_tables_max;
 
 	error = sysctl_handle_int(oidp, &ntables, 0, req);
 	/* Read operation or some error */
 	if ((error != 0) || (req->newptr == NULL))
 		return (error);
 
 	return (ipfw_resize_tables(&V_layer3_chain, ntables));
 }
 
 /*
  * Switches table namespace between global and per-set.
  */
 static int
 sysctl_ipfw_tables_sets(SYSCTL_HANDLER_ARGS)
 {
 	int error;
 	unsigned int sets;
 
 	sets = V_fw_tables_sets;
 
 	error = sysctl_handle_int(oidp, &sets, 0, req);
 	/* Read operation or some error */
 	if ((error != 0) || (req->newptr == NULL))
 		return (error);
 
 	return (ipfw_switch_tables_namespace(&V_layer3_chain, sets));
 }
 #endif
 
 /*
  * Module and VNET glue
  */
 
 /*
  * Stuff that must be initialised only on boot or module load
  */
 static int
 ipfw_init(void)
 {
 	int error = 0;
 
 	/*
  	 * Only print out this stuff the first time around,
 	 * when called from the sysinit code.
 	 */
 	printf("ipfw2 "
 #ifdef INET6
 		"(+ipv6) "
 #endif
 		"initialized, divert %s, nat %s, "
 		"default to %s, logging ",
 #ifdef IPDIVERT
 		"enabled",
 #else
 		"loadable",
 #endif
 #ifdef IPFIREWALL_NAT
 		"enabled",
 #else
 		"loadable",
 #endif
 		default_to_accept ? "accept" : "deny");
 
 	/*
 	 * Note: V_xxx variables can be accessed here but the vnet specific
 	 * initializer may not have been called yet for the VIMAGE case.
 	 * Tuneables will have been processed. We will print out values for
 	 * the default vnet. 
 	 * XXX This should all be rationalized AFTER 8.0
 	 */
 	if (V_fw_verbose == 0)
 		printf("disabled\n");
 	else if (V_verbose_limit == 0)
 		printf("unlimited\n");
 	else
 		printf("limited to %d packets/entry by default\n",
 		    V_verbose_limit);
 
 	/* Check user-supplied table count for validness */
 	if (default_fw_tables > IPFW_TABLES_MAX)
 	  default_fw_tables = IPFW_TABLES_MAX;
 
 	ipfw_init_sopt_handler();
 	ipfw_init_obj_rewriter();
 	ipfw_iface_init();
 	return (error);
 }
 
 /*
  * Called for the removal of the last instance only on module unload.
  */
 static void
 ipfw_destroy(void)
 {
 
 	ipfw_iface_destroy();
 	ipfw_destroy_sopt_handler();
 	ipfw_destroy_obj_rewriter();
 	printf("IP firewall unloaded\n");
 }
 
 /*
  * Stuff that must be initialized for every instance
  * (including the first of course).
  */
 static int
 vnet_ipfw_init(const void *unused)
 {
 	int error, first;
 	struct ip_fw *rule = NULL;
 	struct ip_fw_chain *chain;
 
 	chain = &V_layer3_chain;
 
 	first = IS_DEFAULT_VNET(curvnet) ? 1 : 0;
 
 	/* First set up some values that are compile time options */
 	V_autoinc_step = 100;	/* bounded to 1..1000 in add_rule() */
 	V_fw_deny_unknown_exthdrs = 1;
 #ifdef IPFIREWALL_VERBOSE
 	V_fw_verbose = 1;
 #endif
 #ifdef IPFIREWALL_VERBOSE_LIMIT
 	V_verbose_limit = IPFIREWALL_VERBOSE_LIMIT;
 #endif
 #ifdef IPFIREWALL_NAT
 	LIST_INIT(&chain->nat);
 #endif
 
 	/* Init shared services hash table */
 	ipfw_init_srv(chain);
 
 	ipfw_init_counters();
 	/* Set initial number of tables */
 	V_fw_tables_max = default_fw_tables;
 	error = ipfw_init_tables(chain, first);
 	if (error) {
 		printf("ipfw2: setting up tables failed\n");
 		free(chain->map, M_IPFW);
 		free(rule, M_IPFW);
 		return (ENOSPC);
 	}
 
 	IPFW_LOCK_INIT(chain);
 
 	/* fill and insert the default rule */
 	rule = ipfw_alloc_rule(chain, sizeof(struct ip_fw));
 	rule->cmd_len = 1;
 	rule->cmd[0].len = 1;
 	rule->cmd[0].opcode = default_to_accept ? O_ACCEPT : O_DENY;
 	chain->default_rule = rule;
 	ipfw_add_protected_rule(chain, rule, 0);
 
 	ipfw_dyn_init(chain);
 	ipfw_eaction_init(chain, first);
 #ifdef LINEAR_SKIPTO
 	ipfw_init_skipto_cache(chain);
 #endif
 	ipfw_bpf_init(first);
 
 	/* First set up some values that are compile time options */
 	V_ipfw_vnet_ready = 1;		/* Open for business */
 
 	/*
 	 * Hook the sockopt handler and pfil hooks for ipv4 and ipv6.
 	 * Even if the latter two fail we still keep the module alive
 	 * because the sockopt and layer2 paths are still useful.
 	 * ipfw[6]_hook return 0 on success, ENOENT on failure,
 	 * so we can ignore the exact return value and just set a flag.
 	 *
 	 * Note that V_fw[6]_enable are manipulated by a SYSCTL_PROC so
 	 * changes in the underlying (per-vnet) variables trigger
 	 * immediate hook()/unhook() calls.
 	 * In layer2 we have the same behaviour, except that V_ether_ipfw
 	 * is checked on each packet because there are no pfil hooks.
 	 */
 	V_ip_fw_ctl_ptr = ipfw_ctl3;
 	error = ipfw_attach_hooks(1);
 	return (error);
 }
 
 /*
  * Called for the removal of each instance.
  */
 static int
 vnet_ipfw_uninit(const void *unused)
 {
 	struct ip_fw *reap;
 	struct ip_fw_chain *chain = &V_layer3_chain;
 	int i, last;
 
 	V_ipfw_vnet_ready = 0; /* tell new callers to go away */
 	/*
 	 * disconnect from ipv4, ipv6, layer2 and sockopt.
 	 * Then grab, release and grab again the WLOCK so we make
 	 * sure the update is propagated and nobody will be in.
 	 */
 	(void)ipfw_attach_hooks(0 /* detach */);
 	V_ip_fw_ctl_ptr = NULL;
 
 	last = IS_DEFAULT_VNET(curvnet) ? 1 : 0;
 
 	IPFW_UH_WLOCK(chain);
 	IPFW_UH_WUNLOCK(chain);
 
 	ipfw_dyn_uninit(0);	/* run the callout_drain */
 
 	IPFW_UH_WLOCK(chain);
 
 	reap = NULL;
 	IPFW_WLOCK(chain);
 	for (i = 0; i < chain->n_rules; i++)
 		ipfw_reap_add(chain, &reap, chain->map[i]);
 	free(chain->map, M_IPFW);
 #ifdef LINEAR_SKIPTO
 	ipfw_destroy_skipto_cache(chain);
 #endif
 	IPFW_WUNLOCK(chain);
 	IPFW_UH_WUNLOCK(chain);
 	ipfw_destroy_tables(chain, last);
 	ipfw_eaction_uninit(chain, last);
 	if (reap != NULL)
 		ipfw_reap_rules(reap);
 	vnet_ipfw_iface_destroy(chain);
 	ipfw_destroy_srv(chain);
 	IPFW_LOCK_DESTROY(chain);
 	ipfw_dyn_uninit(1);	/* free the remaining parts */
 	ipfw_destroy_counters();
 	ipfw_bpf_uninit(last);
 	return (0);
 }
 
 /*
  * Module event handler.
  * In general we have the choice of handling most of these events by the
  * event handler or by the (VNET_)SYS(UN)INIT handlers. I have chosen to
  * use the SYSINIT handlers as they are more capable of expressing the
  * flow of control during module and vnet operations, so this is just
  * a skeleton. Note there is no SYSINIT equivalent of the module
  * SHUTDOWN handler, but we don't have anything to do in that case anyhow.
  */
 static int
 ipfw_modevent(module_t mod, int type, void *unused)
 {
 	int err = 0;
 
 	switch (type) {
 	case MOD_LOAD:
 		/* Called once at module load or
 	 	 * system boot if compiled in. */
 		break;
 	case MOD_QUIESCE:
 		/* Called before unload. May veto unloading. */
 		break;
 	case MOD_UNLOAD:
 		/* Called during unload. */
 		break;
 	case MOD_SHUTDOWN:
 		/* Called during system shutdown. */
 		break;
 	default:
 		err = EOPNOTSUPP;
 		break;
 	}
 	return err;
 }
 
 static moduledata_t ipfwmod = {
 	"ipfw",
 	ipfw_modevent,
 	0
 };
 
 /* Define startup order. */
 #define	IPFW_SI_SUB_FIREWALL	SI_SUB_PROTO_FIREWALL
 #define	IPFW_MODEVENT_ORDER	(SI_ORDER_ANY - 255) /* On boot slot in here. */
 #define	IPFW_MODULE_ORDER	(IPFW_MODEVENT_ORDER + 1) /* A little later. */
 #define	IPFW_VNET_ORDER		(IPFW_MODEVENT_ORDER + 2) /* Later still. */
 
 DECLARE_MODULE(ipfw, ipfwmod, IPFW_SI_SUB_FIREWALL, IPFW_MODEVENT_ORDER);
 FEATURE(ipfw_ctl3, "ipfw new sockopt calls");
 MODULE_VERSION(ipfw, 3);
 /* should declare some dependencies here */
 
 /*
  * Starting up. Done in order after ipfwmod() has been called.
  * VNET_SYSINIT is also called for each existing vnet and each new vnet.
  */
 SYSINIT(ipfw_init, IPFW_SI_SUB_FIREWALL, IPFW_MODULE_ORDER,
 	    ipfw_init, NULL);
 VNET_SYSINIT(vnet_ipfw_init, IPFW_SI_SUB_FIREWALL, IPFW_VNET_ORDER,
 	    vnet_ipfw_init, NULL);
  
 /*
  * Closing up shop. These are done in REVERSE ORDER, but still
  * after ipfwmod() has been called. Not called on reboot.
  * VNET_SYSUNINIT is also called for each exiting vnet as it exits.
  * or when the module is unloaded.
  */
 SYSUNINIT(ipfw_destroy, IPFW_SI_SUB_FIREWALL, IPFW_MODULE_ORDER,
 	    ipfw_destroy, NULL);
 VNET_SYSUNINIT(vnet_ipfw_uninit, IPFW_SI_SUB_FIREWALL, IPFW_VNET_ORDER,
 	    vnet_ipfw_uninit, NULL);
 /* end of file */
Index: head/sys/netpfil/ipfw/ip_fw_dynamic.c
===================================================================
--- head/sys/netpfil/ipfw/ip_fw_dynamic.c	(revision 343618)
+++ head/sys/netpfil/ipfw/ip_fw_dynamic.c	(revision 343619)
@@ -1,3260 +1,3259 @@
 /*-
  * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
  *
  * Copyright (c) 2017-2018 Yandex LLC
  * Copyright (c) 2017-2018 Andrey V. Elsukov <ae@FreeBSD.org>
  * Copyright (c) 2002 Luigi Rizzo, Universita` di Pisa
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include "opt_inet.h"
 #include "opt_inet6.h"
 #include "opt_ipfw.h"
 #ifndef INET
 #error IPFIREWALL requires INET.
 #endif /* INET */
 
 #include <sys/param.h>
 #include <sys/systm.h>
 #include <sys/hash.h>
 #include <sys/mbuf.h>
 #include <sys/kernel.h>
 #include <sys/lock.h>
 #include <sys/pcpu.h>
 #include <sys/queue.h>
 #include <sys/rmlock.h>
 #include <sys/smp.h>
 #include <sys/socket.h>
 #include <sys/sysctl.h>
 #include <sys/syslog.h>
 #include <net/ethernet.h>
 #include <net/if.h>
 #include <net/if_var.h>
-#include <net/pfil.h>
 #include <net/vnet.h>
 
 #include <netinet/in.h>
 #include <netinet/ip.h>
 #include <netinet/ip_var.h>
 #include <netinet/ip_fw.h>
 #include <netinet/tcp_var.h>
 #include <netinet/udp.h>
 
 #include <netinet/ip6.h>	/* IN6_ARE_ADDR_EQUAL */
 #ifdef INET6
 #include <netinet6/in6_var.h>
 #include <netinet6/ip6_var.h>
 #include <netinet6/scope6_var.h>
 #endif
 
 #include <netpfil/ipfw/ip_fw_private.h>
 
 #include <machine/in_cksum.h>	/* XXX for in_cksum */
 
 #ifdef MAC
 #include <security/mac/mac_framework.h>
 #endif
 
 /*
  * Description of dynamic states.
  *
  * Dynamic states are stored in lists accessed through a hash tables
  * whose size is curr_dyn_buckets. This value can be modified through
  * the sysctl variable dyn_buckets.
  *
  * Currently there are four tables: dyn_ipv4, dyn_ipv6, dyn_ipv4_parent,
  * and dyn_ipv6_parent.
  *
  * When a packet is received, its address fields hashed, then matched
  * against the entries in the corresponding list by addr_type.
  * Dynamic states can be used for different purposes:
  *  + stateful rules;
  *  + enforcing limits on the number of sessions;
  *  + in-kernel NAT (not implemented yet)
  *
  * The lifetime of dynamic states is regulated by dyn_*_lifetime,
  * measured in seconds and depending on the flags.
  *
  * The total number of dynamic states is equal to UMA zone items count.
  * The max number of dynamic states is dyn_max. When we reach
  * the maximum number of rules we do not create anymore. This is
  * done to avoid consuming too much memory, but also too much
  * time when searching on each packet (ideally, we should try instead
  * to put a limit on the length of the list on each bucket...).
  *
  * Each state holds a pointer to the parent ipfw rule so we know what
  * action to perform. Dynamic rules are removed when the parent rule is
  * deleted.
  *
  * There are some limitations with dynamic rules -- we do not
  * obey the 'randomized match', and we do not do multiple
  * passes through the firewall. XXX check the latter!!!
  */
 
 /* By default use jenkins hash function */
 #define	IPFIREWALL_JENKINSHASH
 
 #define	DYN_COUNTER_INC(d, dir, pktlen)	do {	\
 	(d)->pcnt_ ## dir++;			\
 	(d)->bcnt_ ## dir += pktlen;		\
 	} while (0)
 
 #define	DYN_REFERENCED		0x01
 /*
  * DYN_REFERENCED flag is used to show that state keeps reference to named
  * object, and this reference should be released when state becomes expired.
  */
 
 struct dyn_data {
 	void		*parent;	/* pointer to parent rule */
 	uint32_t	chain_id;	/* cached ruleset id */
 	uint32_t	f_pos;		/* cached rule index */
 
 	uint32_t	hashval;	/* hash value used for hash resize */
 	uint16_t	fibnum;		/* fib used to send keepalives */
 	uint8_t		_pad[2];
 	uint8_t		flags;		/* internal flags */
 	uint8_t		set;		/* parent rule set number */
 	uint16_t	rulenum;	/* parent rule number */
 	uint32_t	ruleid;		/* parent rule id */
 
 	uint32_t	state;		/* TCP session state and flags */
 	uint32_t	ack_fwd;	/* most recent ACKs in forward */
 	uint32_t	ack_rev;	/* and reverse direction (used */
 					/* to generate keepalives) */
 	uint32_t	sync;		/* synchronization time */
 	uint32_t	expire;		/* expire time */
 
 	uint64_t	pcnt_fwd;	/* bytes counter in forward */
 	uint64_t	bcnt_fwd;	/* packets counter in forward */
 	uint64_t	pcnt_rev;	/* bytes counter in reverse */
 	uint64_t	bcnt_rev;	/* packets counter in reverse */
 };
 
 #define	DPARENT_COUNT_DEC(p)	do {			\
 	MPASS(p->count > 0);				\
 	ck_pr_dec_32(&(p)->count);			\
 } while (0)
 #define	DPARENT_COUNT_INC(p)	ck_pr_inc_32(&(p)->count)
 #define	DPARENT_COUNT(p)	ck_pr_load_32(&(p)->count)
 struct dyn_parent {
 	void		*parent;	/* pointer to parent rule */
 	uint32_t	count;		/* number of linked states */
 	uint8_t		_pad;
 	uint8_t		set;		/* parent rule set number */
 	uint16_t	rulenum;	/* parent rule number */
 	uint32_t	ruleid;		/* parent rule id */
 	uint32_t	hashval;	/* hash value used for hash resize */
 	uint32_t	expire;		/* expire time */
 };
 
 struct dyn_ipv4_state {
 	uint8_t		type;		/* State type */
 	uint8_t		proto;		/* UL Protocol */
 	uint16_t	kidx;		/* named object index */
 	uint16_t	sport, dport;	/* ULP source and destination ports */
 	in_addr_t	src, dst;	/* IPv4 source and destination */
 
 	union {
 		struct dyn_data	*data;
 		struct dyn_parent *limit;
 	};
 	CK_SLIST_ENTRY(dyn_ipv4_state)	entry;
 	SLIST_ENTRY(dyn_ipv4_state)	expired;
 };
 CK_SLIST_HEAD(dyn_ipv4ck_slist, dyn_ipv4_state);
 VNET_DEFINE_STATIC(struct dyn_ipv4ck_slist *, dyn_ipv4);
 VNET_DEFINE_STATIC(struct dyn_ipv4ck_slist *, dyn_ipv4_parent);
 
 SLIST_HEAD(dyn_ipv4_slist, dyn_ipv4_state);
 VNET_DEFINE_STATIC(struct dyn_ipv4_slist, dyn_expired_ipv4);
 #define	V_dyn_ipv4			VNET(dyn_ipv4)
 #define	V_dyn_ipv4_parent		VNET(dyn_ipv4_parent)
 #define	V_dyn_expired_ipv4		VNET(dyn_expired_ipv4)
 
 #ifdef INET6
 struct dyn_ipv6_state {
 	uint8_t		type;		/* State type */
 	uint8_t		proto;		/* UL Protocol */
 	uint16_t	kidx;		/* named object index */
 	uint16_t	sport, dport;	/* ULP source and destination ports */
 	struct in6_addr	src, dst;	/* IPv6 source and destination */
 	uint32_t	zoneid;		/* IPv6 scope zone id */
 	union {
 		struct dyn_data	*data;
 		struct dyn_parent *limit;
 	};
 	CK_SLIST_ENTRY(dyn_ipv6_state)	entry;
 	SLIST_ENTRY(dyn_ipv6_state)	expired;
 };
 CK_SLIST_HEAD(dyn_ipv6ck_slist, dyn_ipv6_state);
 VNET_DEFINE_STATIC(struct dyn_ipv6ck_slist *, dyn_ipv6);
 VNET_DEFINE_STATIC(struct dyn_ipv6ck_slist *, dyn_ipv6_parent);
 
 SLIST_HEAD(dyn_ipv6_slist, dyn_ipv6_state);
 VNET_DEFINE_STATIC(struct dyn_ipv6_slist, dyn_expired_ipv6);
 #define	V_dyn_ipv6			VNET(dyn_ipv6)
 #define	V_dyn_ipv6_parent		VNET(dyn_ipv6_parent)
 #define	V_dyn_expired_ipv6		VNET(dyn_expired_ipv6)
 #endif /* INET6 */
 
 /*
  * Per-CPU pointer indicates that specified state is currently in use
  * and must not be reclaimed by expiration callout.
  */
 static void **dyn_hp_cache;
 DPCPU_DEFINE_STATIC(void *, dyn_hp);
 #define	DYNSTATE_GET(cpu)	ck_pr_load_ptr(DPCPU_ID_PTR((cpu), dyn_hp))
 #define	DYNSTATE_PROTECT(v)	ck_pr_store_ptr(DPCPU_PTR(dyn_hp), (v))
 #define	DYNSTATE_RELEASE()	DYNSTATE_PROTECT(NULL)
 #define	DYNSTATE_CRITICAL_ENTER()	critical_enter()
 #define	DYNSTATE_CRITICAL_EXIT()	do {	\
 	DYNSTATE_RELEASE();			\
 	critical_exit();			\
 } while (0);
 
 /*
  * We keep two version numbers, one is updated when new entry added to
  * the list. Second is updated when an entry deleted from the list.
  * Versions are updated under bucket lock.
  *
  * Bucket "add" version number is used to know, that in the time between
  * state lookup (i.e. ipfw_dyn_lookup_state()) and the followed state
  * creation (i.e. ipfw_dyn_install_state()) another concurrent thread did
  * not install some state in this bucket. Using this info we can avoid
  * additional state lookup, because we are sure that we will not install
  * the state twice.
  *
  * Also doing the tracking of bucket "del" version during lookup we can
  * be sure, that state entry was not unlinked and freed in time between
  * we read the state pointer and protect it with hazard pointer.
  *
  * An entry unlinked from CK list keeps unchanged until it is freed.
  * Unlinked entries are linked into expired lists using "expired" field.
  */
 
 /*
  * dyn_expire_lock is used to protect access to dyn_expired_xxx lists.
  * dyn_bucket_lock is used to get write access to lists in specific bucket.
  * Currently one dyn_bucket_lock is used for all ipv4, ipv4_parent, ipv6,
  * and ipv6_parent lists.
  */
 VNET_DEFINE_STATIC(struct mtx, dyn_expire_lock);
 VNET_DEFINE_STATIC(struct mtx *, dyn_bucket_lock);
 #define	V_dyn_expire_lock		VNET(dyn_expire_lock)
 #define	V_dyn_bucket_lock		VNET(dyn_bucket_lock)
 
 /*
  * Bucket's add/delete generation versions.
  */
 VNET_DEFINE_STATIC(uint32_t *, dyn_ipv4_add);
 VNET_DEFINE_STATIC(uint32_t *, dyn_ipv4_del);
 VNET_DEFINE_STATIC(uint32_t *, dyn_ipv4_parent_add);
 VNET_DEFINE_STATIC(uint32_t *, dyn_ipv4_parent_del);
 #define	V_dyn_ipv4_add			VNET(dyn_ipv4_add)
 #define	V_dyn_ipv4_del			VNET(dyn_ipv4_del)
 #define	V_dyn_ipv4_parent_add		VNET(dyn_ipv4_parent_add)
 #define	V_dyn_ipv4_parent_del		VNET(dyn_ipv4_parent_del)
 
 #ifdef INET6
 VNET_DEFINE_STATIC(uint32_t *, dyn_ipv6_add);
 VNET_DEFINE_STATIC(uint32_t *, dyn_ipv6_del);
 VNET_DEFINE_STATIC(uint32_t *, dyn_ipv6_parent_add);
 VNET_DEFINE_STATIC(uint32_t *, dyn_ipv6_parent_del);
 #define	V_dyn_ipv6_add			VNET(dyn_ipv6_add)
 #define	V_dyn_ipv6_del			VNET(dyn_ipv6_del)
 #define	V_dyn_ipv6_parent_add		VNET(dyn_ipv6_parent_add)
 #define	V_dyn_ipv6_parent_del		VNET(dyn_ipv6_parent_del)
 #endif /* INET6 */
 
 #define	DYN_BUCKET(h, b)		((h) & (b - 1))
 #define	DYN_BUCKET_VERSION(b, v)	ck_pr_load_32(&V_dyn_ ## v[(b)])
 #define	DYN_BUCKET_VERSION_BUMP(b, v)	ck_pr_inc_32(&V_dyn_ ## v[(b)])
 
 #define	DYN_BUCKET_LOCK_INIT(lock, b)		\
     mtx_init(&lock[(b)], "IPFW dynamic bucket", NULL, MTX_DEF)
 #define	DYN_BUCKET_LOCK_DESTROY(lock, b)	mtx_destroy(&lock[(b)])
 #define	DYN_BUCKET_LOCK(b)	mtx_lock(&V_dyn_bucket_lock[(b)])
 #define	DYN_BUCKET_UNLOCK(b)	mtx_unlock(&V_dyn_bucket_lock[(b)])
 #define	DYN_BUCKET_ASSERT(b)	mtx_assert(&V_dyn_bucket_lock[(b)], MA_OWNED)
 
 #define	DYN_EXPIRED_LOCK_INIT()		\
     mtx_init(&V_dyn_expire_lock, "IPFW expired states list", NULL, MTX_DEF)
 #define	DYN_EXPIRED_LOCK_DESTROY()	mtx_destroy(&V_dyn_expire_lock)
 #define	DYN_EXPIRED_LOCK()		mtx_lock(&V_dyn_expire_lock)
 #define	DYN_EXPIRED_UNLOCK()		mtx_unlock(&V_dyn_expire_lock)
 
 VNET_DEFINE_STATIC(uint32_t, dyn_buckets_max);
 VNET_DEFINE_STATIC(uint32_t, curr_dyn_buckets);
 VNET_DEFINE_STATIC(struct callout, dyn_timeout);
 #define	V_dyn_buckets_max		VNET(dyn_buckets_max)
 #define	V_curr_dyn_buckets		VNET(curr_dyn_buckets)
 #define	V_dyn_timeout			VNET(dyn_timeout)
 
 /* Maximum length of states chain in a bucket */
 VNET_DEFINE_STATIC(uint32_t, curr_max_length);
 #define	V_curr_max_length		VNET(curr_max_length)
 
 VNET_DEFINE_STATIC(uint32_t, dyn_keep_states);
 #define	V_dyn_keep_states		VNET(dyn_keep_states)
 
 VNET_DEFINE_STATIC(uma_zone_t, dyn_data_zone);
 VNET_DEFINE_STATIC(uma_zone_t, dyn_parent_zone);
 VNET_DEFINE_STATIC(uma_zone_t, dyn_ipv4_zone);
 #ifdef INET6
 VNET_DEFINE_STATIC(uma_zone_t, dyn_ipv6_zone);
 #define	V_dyn_ipv6_zone			VNET(dyn_ipv6_zone)
 #endif /* INET6 */
 #define	V_dyn_data_zone			VNET(dyn_data_zone)
 #define	V_dyn_parent_zone		VNET(dyn_parent_zone)
 #define	V_dyn_ipv4_zone			VNET(dyn_ipv4_zone)
 
 /*
  * Timeouts for various events in handing dynamic rules.
  */
 VNET_DEFINE_STATIC(uint32_t, dyn_ack_lifetime);
 VNET_DEFINE_STATIC(uint32_t, dyn_syn_lifetime);
 VNET_DEFINE_STATIC(uint32_t, dyn_fin_lifetime);
 VNET_DEFINE_STATIC(uint32_t, dyn_rst_lifetime);
 VNET_DEFINE_STATIC(uint32_t, dyn_udp_lifetime);
 VNET_DEFINE_STATIC(uint32_t, dyn_short_lifetime);
 
 #define	V_dyn_ack_lifetime		VNET(dyn_ack_lifetime)
 #define	V_dyn_syn_lifetime		VNET(dyn_syn_lifetime)
 #define	V_dyn_fin_lifetime		VNET(dyn_fin_lifetime)
 #define	V_dyn_rst_lifetime		VNET(dyn_rst_lifetime)
 #define	V_dyn_udp_lifetime		VNET(dyn_udp_lifetime)
 #define	V_dyn_short_lifetime		VNET(dyn_short_lifetime)
 
 /*
  * Keepalives are sent if dyn_keepalive is set. They are sent every
  * dyn_keepalive_period seconds, in the last dyn_keepalive_interval
  * seconds of lifetime of a rule.
  * dyn_rst_lifetime and dyn_fin_lifetime should be strictly lower
  * than dyn_keepalive_period.
  */
 VNET_DEFINE_STATIC(uint32_t, dyn_keepalive_interval);
 VNET_DEFINE_STATIC(uint32_t, dyn_keepalive_period);
 VNET_DEFINE_STATIC(uint32_t, dyn_keepalive);
 VNET_DEFINE_STATIC(time_t, dyn_keepalive_last);
 
 #define	V_dyn_keepalive_interval	VNET(dyn_keepalive_interval)
 #define	V_dyn_keepalive_period		VNET(dyn_keepalive_period)
 #define	V_dyn_keepalive			VNET(dyn_keepalive)
 #define	V_dyn_keepalive_last		VNET(dyn_keepalive_last)
 
 VNET_DEFINE_STATIC(uint32_t, dyn_max);		/* max # of dynamic states */
 VNET_DEFINE_STATIC(uint32_t, dyn_count);	/* number of states */
 VNET_DEFINE_STATIC(uint32_t, dyn_parent_max);	/* max # of parent states */
 VNET_DEFINE_STATIC(uint32_t, dyn_parent_count);	/* number of parent states */
 
 #define	V_dyn_max			VNET(dyn_max)
 #define	V_dyn_count			VNET(dyn_count)
 #define	V_dyn_parent_max		VNET(dyn_parent_max)
 #define	V_dyn_parent_count		VNET(dyn_parent_count)
 
 #define	DYN_COUNT_DEC(name)	do {			\
 	MPASS((V_ ## name) > 0);			\
 	ck_pr_dec_32(&(V_ ## name));			\
 } while (0)
 #define	DYN_COUNT_INC(name)	ck_pr_inc_32(&(V_ ## name))
 #define	DYN_COUNT(name)		ck_pr_load_32(&(V_ ## name))
 
 static time_t last_log;	/* Log ratelimiting */
 
 /*
  * Get/set maximum number of dynamic states in given VNET instance.
  */
 static int
 sysctl_dyn_max(SYSCTL_HANDLER_ARGS)
 {
 	uint32_t nstates;
 	int error;
 
 	nstates = V_dyn_max;
 	error = sysctl_handle_32(oidp, &nstates, 0, req);
 	/* Read operation or some error */
 	if ((error != 0) || (req->newptr == NULL))
 		return (error);
 
 	V_dyn_max = nstates;
 	uma_zone_set_max(V_dyn_data_zone, V_dyn_max);
 	return (0);
 }
 
 static int
 sysctl_dyn_parent_max(SYSCTL_HANDLER_ARGS)
 {
 	uint32_t nstates;
 	int error;
 
 	nstates = V_dyn_parent_max;
 	error = sysctl_handle_32(oidp, &nstates, 0, req);
 	/* Read operation or some error */
 	if ((error != 0) || (req->newptr == NULL))
 		return (error);
 
 	V_dyn_parent_max = nstates;
 	uma_zone_set_max(V_dyn_parent_zone, V_dyn_parent_max);
 	return (0);
 }
 
 static int
 sysctl_dyn_buckets(SYSCTL_HANDLER_ARGS)
 {
 	uint32_t nbuckets;
 	int error;
 
 	nbuckets = V_dyn_buckets_max;
 	error = sysctl_handle_32(oidp, &nbuckets, 0, req);
 	/* Read operation or some error */
 	if ((error != 0) || (req->newptr == NULL))
 		return (error);
 
 	if (nbuckets > 256)
 		V_dyn_buckets_max = 1 << fls(nbuckets - 1);
 	else
 		return (EINVAL);
 	return (0);
 }
 
 SYSCTL_DECL(_net_inet_ip_fw);
 
 SYSCTL_U32(_net_inet_ip_fw, OID_AUTO, dyn_count,
     CTLFLAG_VNET | CTLFLAG_RD, &VNET_NAME(dyn_count), 0,
     "Current number of dynamic states.");
 SYSCTL_U32(_net_inet_ip_fw, OID_AUTO, dyn_parent_count,
     CTLFLAG_VNET | CTLFLAG_RD, &VNET_NAME(dyn_parent_count), 0,
     "Current number of parent states. ");
 SYSCTL_U32(_net_inet_ip_fw, OID_AUTO, curr_dyn_buckets,
     CTLFLAG_VNET | CTLFLAG_RD, &VNET_NAME(curr_dyn_buckets), 0,
     "Current number of buckets for states hash table.");
 SYSCTL_U32(_net_inet_ip_fw, OID_AUTO, curr_max_length,
     CTLFLAG_VNET | CTLFLAG_RD, &VNET_NAME(curr_max_length), 0,
     "Current maximum length of states chains in hash buckets.");
 SYSCTL_PROC(_net_inet_ip_fw, OID_AUTO, dyn_buckets,
     CTLFLAG_VNET | CTLTYPE_U32 | CTLFLAG_RW, 0, 0, sysctl_dyn_buckets,
     "IU", "Max number of buckets for dynamic states hash table.");
 SYSCTL_PROC(_net_inet_ip_fw, OID_AUTO, dyn_max,
     CTLFLAG_VNET | CTLTYPE_U32 | CTLFLAG_RW, 0, 0, sysctl_dyn_max,
     "IU", "Max number of dynamic states.");
 SYSCTL_PROC(_net_inet_ip_fw, OID_AUTO, dyn_parent_max,
     CTLFLAG_VNET | CTLTYPE_U32 | CTLFLAG_RW, 0, 0, sysctl_dyn_parent_max,
     "IU", "Max number of parent dynamic states.");
 SYSCTL_U32(_net_inet_ip_fw, OID_AUTO, dyn_ack_lifetime,
     CTLFLAG_VNET | CTLFLAG_RW, &VNET_NAME(dyn_ack_lifetime), 0,
     "Lifetime of dynamic states for TCP ACK.");
 SYSCTL_U32(_net_inet_ip_fw, OID_AUTO, dyn_syn_lifetime,
     CTLFLAG_VNET | CTLFLAG_RW, &VNET_NAME(dyn_syn_lifetime), 0,
     "Lifetime of dynamic states for TCP SYN.");
 SYSCTL_U32(_net_inet_ip_fw, OID_AUTO, dyn_fin_lifetime,
     CTLFLAG_VNET | CTLFLAG_RW, &VNET_NAME(dyn_fin_lifetime), 0,
     "Lifetime of dynamic states for TCP FIN.");
 SYSCTL_U32(_net_inet_ip_fw, OID_AUTO, dyn_rst_lifetime,
     CTLFLAG_VNET | CTLFLAG_RW, &VNET_NAME(dyn_rst_lifetime), 0,
     "Lifetime of dynamic states for TCP RST.");
 SYSCTL_U32(_net_inet_ip_fw, OID_AUTO, dyn_udp_lifetime,
     CTLFLAG_VNET | CTLFLAG_RW, &VNET_NAME(dyn_udp_lifetime), 0,
     "Lifetime of dynamic states for UDP.");
 SYSCTL_U32(_net_inet_ip_fw, OID_AUTO, dyn_short_lifetime,
     CTLFLAG_VNET | CTLFLAG_RW, &VNET_NAME(dyn_short_lifetime), 0,
     "Lifetime of dynamic states for other situations.");
 SYSCTL_U32(_net_inet_ip_fw, OID_AUTO, dyn_keepalive,
     CTLFLAG_VNET | CTLFLAG_RW, &VNET_NAME(dyn_keepalive), 0,
     "Enable keepalives for dynamic states.");
 SYSCTL_U32(_net_inet_ip_fw, OID_AUTO, dyn_keep_states,
     CTLFLAG_VNET | CTLFLAG_RW, &VNET_NAME(dyn_keep_states), 0,
     "Do not flush dynamic states on rule deletion");
 
 
 #ifdef IPFIREWALL_DYNDEBUG
 #define	DYN_DEBUG(fmt, ...)	do {			\
 	printf("%s: " fmt "\n", __func__, __VA_ARGS__);	\
 } while (0)
 #else
 #define	DYN_DEBUG(fmt, ...)
 #endif /* !IPFIREWALL_DYNDEBUG */
 
 #ifdef INET6
 /* Functions to work with IPv6 states */
 static struct dyn_ipv6_state *dyn_lookup_ipv6_state(
     const struct ipfw_flow_id *, uint32_t, const void *,
     struct ipfw_dyn_info *, int);
 static int dyn_lookup_ipv6_state_locked(const struct ipfw_flow_id *,
     uint32_t, const void *, int, uint32_t, uint16_t);
 static struct dyn_ipv6_state *dyn_alloc_ipv6_state(
     const struct ipfw_flow_id *, uint32_t, uint16_t, uint8_t);
 static int dyn_add_ipv6_state(void *, uint32_t, uint16_t, uint8_t,
     const struct ipfw_flow_id *, uint32_t, const void *, int, uint32_t,
     struct ipfw_dyn_info *, uint16_t, uint16_t, uint8_t);
 static void dyn_export_ipv6_state(const struct dyn_ipv6_state *,
     ipfw_dyn_rule *);
 
 static uint32_t dyn_getscopeid(const struct ip_fw_args *);
 static void dyn_make_keepalive_ipv6(struct mbuf *, const struct in6_addr *,
     const struct in6_addr *, uint32_t, uint32_t, uint32_t, uint16_t,
     uint16_t);
 static void dyn_enqueue_keepalive_ipv6(struct mbufq *,
     const struct dyn_ipv6_state *);
 static void dyn_send_keepalive_ipv6(struct ip_fw_chain *);
 
 static struct dyn_ipv6_state *dyn_lookup_ipv6_parent(
     const struct ipfw_flow_id *, uint32_t, const void *, uint32_t, uint16_t,
     uint32_t);
 static struct dyn_ipv6_state *dyn_lookup_ipv6_parent_locked(
     const struct ipfw_flow_id *, uint32_t, const void *, uint32_t, uint16_t,
     uint32_t);
 static struct dyn_ipv6_state *dyn_add_ipv6_parent(void *, uint32_t, uint16_t,
     uint8_t, const struct ipfw_flow_id *, uint32_t, uint32_t, uint32_t,
     uint16_t);
 #endif /* INET6 */
 
 /* Functions to work with limit states */
 static void *dyn_get_parent_state(const struct ipfw_flow_id *, uint32_t,
     struct ip_fw *, uint32_t, uint32_t, uint16_t);
 static struct dyn_ipv4_state *dyn_lookup_ipv4_parent(
     const struct ipfw_flow_id *, const void *, uint32_t, uint16_t, uint32_t);
 static struct dyn_ipv4_state *dyn_lookup_ipv4_parent_locked(
     const struct ipfw_flow_id *, const void *, uint32_t, uint16_t, uint32_t);
 static struct dyn_parent *dyn_alloc_parent(void *, uint32_t, uint16_t,
     uint8_t, uint32_t);
 static struct dyn_ipv4_state *dyn_add_ipv4_parent(void *, uint32_t, uint16_t,
     uint8_t, const struct ipfw_flow_id *, uint32_t, uint32_t, uint16_t);
 
 static void dyn_tick(void *);
 static void dyn_expire_states(struct ip_fw_chain *, ipfw_range_tlv *);
 static void dyn_free_states(struct ip_fw_chain *);
 static void dyn_export_parent(const struct dyn_parent *, uint16_t,
     ipfw_dyn_rule *);
 static void dyn_export_data(const struct dyn_data *, uint16_t, uint8_t,
     ipfw_dyn_rule *);
 static uint32_t dyn_update_tcp_state(struct dyn_data *,
     const struct ipfw_flow_id *, const struct tcphdr *, int);
 static void dyn_update_proto_state(struct dyn_data *,
     const struct ipfw_flow_id *, const void *, int, int);
 
 /* Functions to work with IPv4 states */
 struct dyn_ipv4_state *dyn_lookup_ipv4_state(const struct ipfw_flow_id *,
     const void *, struct ipfw_dyn_info *, int);
 static int dyn_lookup_ipv4_state_locked(const struct ipfw_flow_id *,
     const void *, int, uint32_t, uint16_t);
 static struct dyn_ipv4_state *dyn_alloc_ipv4_state(
     const struct ipfw_flow_id *, uint16_t, uint8_t);
 static int dyn_add_ipv4_state(void *, uint32_t, uint16_t, uint8_t,
     const struct ipfw_flow_id *, const void *, int, uint32_t,
     struct ipfw_dyn_info *, uint16_t, uint16_t, uint8_t);
 static void dyn_export_ipv4_state(const struct dyn_ipv4_state *,
     ipfw_dyn_rule *);
 
 /*
  * Named states support.
  */
 static char *default_state_name = "default";
 struct dyn_state_obj {
 	struct named_object	no;
 	char			name[64];
 };
 
 #define	DYN_STATE_OBJ(ch, cmd)	\
     ((struct dyn_state_obj *)SRV_OBJECT(ch, (cmd)->arg1))
 /*
  * Classifier callback.
  * Return 0 if opcode contains object that should be referenced
  * or rewritten.
  */
 static int
 dyn_classify(ipfw_insn *cmd, uint16_t *puidx, uint8_t *ptype)
 {
 
 	DYN_DEBUG("opcode %d, arg1 %d", cmd->opcode, cmd->arg1);
 	/* Don't rewrite "check-state any" */
 	if (cmd->arg1 == 0 &&
 	    cmd->opcode == O_CHECK_STATE)
 		return (1);
 
 	*puidx = cmd->arg1;
 	*ptype = 0;
 	return (0);
 }
 
 static void
 dyn_update(ipfw_insn *cmd, uint16_t idx)
 {
 
 	cmd->arg1 = idx;
 	DYN_DEBUG("opcode %d, arg1 %d", cmd->opcode, cmd->arg1);
 }
 
 static int
 dyn_findbyname(struct ip_fw_chain *ch, struct tid_info *ti,
     struct named_object **pno)
 {
 	ipfw_obj_ntlv *ntlv;
 	const char *name;
 
 	DYN_DEBUG("uidx %d", ti->uidx);
 	if (ti->uidx != 0) {
 		if (ti->tlvs == NULL)
 			return (EINVAL);
 		/* Search ntlv in the buffer provided by user */
 		ntlv = ipfw_find_name_tlv_type(ti->tlvs, ti->tlen, ti->uidx,
 		    IPFW_TLV_STATE_NAME);
 		if (ntlv == NULL)
 			return (EINVAL);
 		name = ntlv->name;
 	} else
 		name = default_state_name;
 	/*
 	 * Search named object with corresponding name.
 	 * Since states objects are global - ignore the set value
 	 * and use zero instead.
 	 */
 	*pno = ipfw_objhash_lookup_name_type(CHAIN_TO_SRV(ch), 0,
 	    IPFW_TLV_STATE_NAME, name);
 	/*
 	 * We always return success here.
 	 * The caller will check *pno and mark object as unresolved,
 	 * then it will automatically create "default" object.
 	 */
 	return (0);
 }
 
 static struct named_object *
 dyn_findbykidx(struct ip_fw_chain *ch, uint16_t idx)
 {
 
 	DYN_DEBUG("kidx %d", idx);
 	return (ipfw_objhash_lookup_kidx(CHAIN_TO_SRV(ch), idx));
 }
 
 static int
 dyn_create(struct ip_fw_chain *ch, struct tid_info *ti,
     uint16_t *pkidx)
 {
 	struct namedobj_instance *ni;
 	struct dyn_state_obj *obj;
 	struct named_object *no;
 	ipfw_obj_ntlv *ntlv;
 	char *name;
 
 	DYN_DEBUG("uidx %d", ti->uidx);
 	if (ti->uidx != 0) {
 		if (ti->tlvs == NULL)
 			return (EINVAL);
 		ntlv = ipfw_find_name_tlv_type(ti->tlvs, ti->tlen, ti->uidx,
 		    IPFW_TLV_STATE_NAME);
 		if (ntlv == NULL)
 			return (EINVAL);
 		name = ntlv->name;
 	} else
 		name = default_state_name;
 
 	ni = CHAIN_TO_SRV(ch);
 	obj = malloc(sizeof(*obj), M_IPFW, M_WAITOK | M_ZERO);
 	obj->no.name = obj->name;
 	obj->no.etlv = IPFW_TLV_STATE_NAME;
 	strlcpy(obj->name, name, sizeof(obj->name));
 
 	IPFW_UH_WLOCK(ch);
 	no = ipfw_objhash_lookup_name_type(ni, 0,
 	    IPFW_TLV_STATE_NAME, name);
 	if (no != NULL) {
 		/*
 		 * Object is already created.
 		 * Just return its kidx and bump refcount.
 		 */
 		*pkidx = no->kidx;
 		no->refcnt++;
 		IPFW_UH_WUNLOCK(ch);
 		free(obj, M_IPFW);
 		DYN_DEBUG("\tfound kidx %d", *pkidx);
 		return (0);
 	}
 	if (ipfw_objhash_alloc_idx(ni, &obj->no.kidx) != 0) {
 		DYN_DEBUG("\talloc_idx failed for %s", name);
 		IPFW_UH_WUNLOCK(ch);
 		free(obj, M_IPFW);
 		return (ENOSPC);
 	}
 	ipfw_objhash_add(ni, &obj->no);
 	SRV_OBJECT(ch, obj->no.kidx) = obj;
 	obj->no.refcnt++;
 	*pkidx = obj->no.kidx;
 	IPFW_UH_WUNLOCK(ch);
 	DYN_DEBUG("\tcreated kidx %d", *pkidx);
 	return (0);
 }
 
 static void
 dyn_destroy(struct ip_fw_chain *ch, struct named_object *no)
 {
 	struct dyn_state_obj *obj;
 
 	IPFW_UH_WLOCK_ASSERT(ch);
 
 	KASSERT(no->etlv == IPFW_TLV_STATE_NAME,
 	    ("%s: wrong object type %u", __func__, no->etlv));
 	KASSERT(no->refcnt == 1,
 	    ("Destroying object '%s' (type %u, idx %u) with refcnt %u",
 	    no->name, no->etlv, no->kidx, no->refcnt));
 	DYN_DEBUG("kidx %d", no->kidx);
 	obj = SRV_OBJECT(ch, no->kidx);
 	SRV_OBJECT(ch, no->kidx) = NULL;
 	ipfw_objhash_del(CHAIN_TO_SRV(ch), no);
 	ipfw_objhash_free_idx(CHAIN_TO_SRV(ch), no->kidx);
 
 	free(obj, M_IPFW);
 }
 
 static struct opcode_obj_rewrite dyn_opcodes[] = {
 	{
 		O_KEEP_STATE, IPFW_TLV_STATE_NAME,
 		dyn_classify, dyn_update,
 		dyn_findbyname, dyn_findbykidx,
 		dyn_create, dyn_destroy
 	},
 	{
 		O_CHECK_STATE, IPFW_TLV_STATE_NAME,
 		dyn_classify, dyn_update,
 		dyn_findbyname, dyn_findbykidx,
 		dyn_create, dyn_destroy
 	},
 	{
 		O_PROBE_STATE, IPFW_TLV_STATE_NAME,
 		dyn_classify, dyn_update,
 		dyn_findbyname, dyn_findbykidx,
 		dyn_create, dyn_destroy
 	},
 	{
 		O_LIMIT, IPFW_TLV_STATE_NAME,
 		dyn_classify, dyn_update,
 		dyn_findbyname, dyn_findbykidx,
 		dyn_create, dyn_destroy
 	},
 };
 
 /*
  * IMPORTANT: the hash function for dynamic rules must be commutative
  * in source and destination (ip,port), because rules are bidirectional
  * and we want to find both in the same bucket.
  */
 #ifndef IPFIREWALL_JENKINSHASH
 static __inline uint32_t
 hash_packet(const struct ipfw_flow_id *id)
 {
 	uint32_t i;
 
 #ifdef INET6
 	if (IS_IP6_FLOW_ID(id))
 		i = ntohl((id->dst_ip6.__u6_addr.__u6_addr32[2]) ^
 		    (id->dst_ip6.__u6_addr.__u6_addr32[3]) ^
 		    (id->src_ip6.__u6_addr.__u6_addr32[2]) ^
 		    (id->src_ip6.__u6_addr.__u6_addr32[3]));
 	else
 #endif /* INET6 */
 	i = (id->dst_ip) ^ (id->src_ip);
 	i ^= (id->dst_port) ^ (id->src_port);
 	return (i);
 }
 
 static __inline uint32_t
 hash_parent(const struct ipfw_flow_id *id, const void *rule)
 {
 
 	return (hash_packet(id) ^ ((uintptr_t)rule));
 }
 
 #else /* IPFIREWALL_JENKINSHASH */
 
 VNET_DEFINE_STATIC(uint32_t, dyn_hashseed);
 #define	V_dyn_hashseed		VNET(dyn_hashseed)
 
 static __inline int
 addrcmp4(const struct ipfw_flow_id *id)
 {
 
 	if (id->src_ip < id->dst_ip)
 		return (0);
 	if (id->src_ip > id->dst_ip)
 		return (1);
 	if (id->src_port <= id->dst_port)
 		return (0);
 	return (1);
 }
 
 #ifdef INET6
 static __inline int
 addrcmp6(const struct ipfw_flow_id *id)
 {
 	int ret;
 
 	ret = memcmp(&id->src_ip6, &id->dst_ip6, sizeof(struct in6_addr));
 	if (ret < 0)
 		return (0);
 	if (ret > 0)
 		return (1);
 	if (id->src_port <= id->dst_port)
 		return (0);
 	return (1);
 }
 
 static __inline uint32_t
 hash_packet6(const struct ipfw_flow_id *id)
 {
 	struct tuple6 {
 		struct in6_addr	addr[2];
 		uint16_t	port[2];
 	} t6;
 
 	if (addrcmp6(id) == 0) {
 		t6.addr[0] = id->src_ip6;
 		t6.addr[1] = id->dst_ip6;
 		t6.port[0] = id->src_port;
 		t6.port[1] = id->dst_port;
 	} else {
 		t6.addr[0] = id->dst_ip6;
 		t6.addr[1] = id->src_ip6;
 		t6.port[0] = id->dst_port;
 		t6.port[1] = id->src_port;
 	}
 	return (jenkins_hash32((const uint32_t *)&t6,
 	    sizeof(t6) / sizeof(uint32_t), V_dyn_hashseed));
 }
 #endif
 
 static __inline uint32_t
 hash_packet(const struct ipfw_flow_id *id)
 {
 	struct tuple4 {
 		in_addr_t	addr[2];
 		uint16_t	port[2];
 	} t4;
 
 	if (IS_IP4_FLOW_ID(id)) {
 		/* All fields are in host byte order */
 		if (addrcmp4(id) == 0) {
 			t4.addr[0] = id->src_ip;
 			t4.addr[1] = id->dst_ip;
 			t4.port[0] = id->src_port;
 			t4.port[1] = id->dst_port;
 		} else {
 			t4.addr[0] = id->dst_ip;
 			t4.addr[1] = id->src_ip;
 			t4.port[0] = id->dst_port;
 			t4.port[1] = id->src_port;
 		}
 		return (jenkins_hash32((const uint32_t *)&t4,
 		    sizeof(t4) / sizeof(uint32_t), V_dyn_hashseed));
 	} else
 #ifdef INET6
 	if (IS_IP6_FLOW_ID(id))
 		return (hash_packet6(id));
 #endif
 	return (0);
 }
 
 static __inline uint32_t
 hash_parent(const struct ipfw_flow_id *id, const void *rule)
 {
 
 	return (jenkins_hash32((const uint32_t *)&rule,
 	    sizeof(rule) / sizeof(uint32_t), hash_packet(id)));
 }
 #endif /* IPFIREWALL_JENKINSHASH */
 
 /*
  * Print customizable flow id description via log(9) facility.
  */
 static void
 print_dyn_rule_flags(const struct ipfw_flow_id *id, int dyn_type,
     int log_flags, char *prefix, char *postfix)
 {
 	struct in_addr da;
 #ifdef INET6
 	char src[INET6_ADDRSTRLEN], dst[INET6_ADDRSTRLEN];
 #else
 	char src[INET_ADDRSTRLEN], dst[INET_ADDRSTRLEN];
 #endif
 
 #ifdef INET6
 	if (IS_IP6_FLOW_ID(id)) {
 		ip6_sprintf(src, &id->src_ip6);
 		ip6_sprintf(dst, &id->dst_ip6);
 	} else
 #endif
 	{
 		da.s_addr = htonl(id->src_ip);
 		inet_ntop(AF_INET, &da, src, sizeof(src));
 		da.s_addr = htonl(id->dst_ip);
 		inet_ntop(AF_INET, &da, dst, sizeof(dst));
 	}
 	log(log_flags, "ipfw: %s type %d %s %d -> %s %d, %d %s\n",
 	    prefix, dyn_type, src, id->src_port, dst,
 	    id->dst_port, V_dyn_count, postfix);
 }
 
 #define	print_dyn_rule(id, dtype, prefix, postfix)	\
 	print_dyn_rule_flags(id, dtype, LOG_DEBUG, prefix, postfix)
 
 #define	TIME_LEQ(a,b)	((int)((a)-(b)) <= 0)
 #define	TIME_LE(a,b)	((int)((a)-(b)) < 0)
 #define	_SEQ_GE(a,b)	((int)((a)-(b)) >= 0)
 #define	BOTH_SYN	(TH_SYN | (TH_SYN << 8))
 #define	BOTH_FIN	(TH_FIN | (TH_FIN << 8))
 #define	TCP_FLAGS	(TH_FLAGS | (TH_FLAGS << 8))
 #define	ACK_FWD		0x00010000	/* fwd ack seen */
 #define	ACK_REV		0x00020000	/* rev ack seen */
 #define	ACK_BOTH	(ACK_FWD | ACK_REV)
 
 static uint32_t
 dyn_update_tcp_state(struct dyn_data *data, const struct ipfw_flow_id *pkt,
     const struct tcphdr *tcp, int dir)
 {
 	uint32_t ack, expire;
 	uint32_t state, old;
 	uint8_t th_flags;
 
 	expire = data->expire;
 	old = state = data->state;
 	th_flags = pkt->_flags & (TH_FIN | TH_SYN | TH_RST);
 	state |= (dir == MATCH_FORWARD) ? th_flags: (th_flags << 8);
 	switch (state & TCP_FLAGS) {
 	case TH_SYN:			/* opening */
 		expire = time_uptime + V_dyn_syn_lifetime;
 		break;
 
 	case BOTH_SYN:			/* move to established */
 	case BOTH_SYN | TH_FIN:		/* one side tries to close */
 	case BOTH_SYN | (TH_FIN << 8):
 		if (tcp == NULL)
 			break;
 		ack = ntohl(tcp->th_ack);
 		if (dir == MATCH_FORWARD) {
 			if (data->ack_fwd == 0 ||
 			    _SEQ_GE(ack, data->ack_fwd)) {
 				state |= ACK_FWD;
 				if (data->ack_fwd != ack)
 					ck_pr_store_32(&data->ack_fwd, ack);
 			}
 		} else {
 			if (data->ack_rev == 0 ||
 			    _SEQ_GE(ack, data->ack_rev)) {
 				state |= ACK_REV;
 				if (data->ack_rev != ack)
 					ck_pr_store_32(&data->ack_rev, ack);
 			}
 		}
 		if ((state & ACK_BOTH) == ACK_BOTH) {
 			/*
 			 * Set expire time to V_dyn_ack_lifetime only if
 			 * we got ACKs for both directions.
 			 * We use XOR here to avoid possible state
 			 * overwriting in concurrent thread.
 			 */
 			expire = time_uptime + V_dyn_ack_lifetime;
 			ck_pr_xor_32(&data->state, ACK_BOTH);
 		} else if ((data->state & ACK_BOTH) != (state & ACK_BOTH))
 			ck_pr_or_32(&data->state, state & ACK_BOTH);
 		break;
 
 	case BOTH_SYN | BOTH_FIN:	/* both sides closed */
 		if (V_dyn_fin_lifetime >= V_dyn_keepalive_period)
 			V_dyn_fin_lifetime = V_dyn_keepalive_period - 1;
 		expire = time_uptime + V_dyn_fin_lifetime;
 		break;
 
 	default:
 		if (V_dyn_keepalive != 0 &&
 		    V_dyn_rst_lifetime >= V_dyn_keepalive_period)
 			V_dyn_rst_lifetime = V_dyn_keepalive_period - 1;
 		expire = time_uptime + V_dyn_rst_lifetime;
 	}
 	/* Save TCP state if it was changed */
 	if ((state & TCP_FLAGS) != (old & TCP_FLAGS))
 		ck_pr_or_32(&data->state, state & TCP_FLAGS);
 	return (expire);
 }
 
 /*
  * Update ULP specific state.
  * For TCP we keep sequence numbers and flags. For other protocols
  * currently we update only expire time. Packets and bytes counters
  * are also updated here.
  */
 static void
 dyn_update_proto_state(struct dyn_data *data, const struct ipfw_flow_id *pkt,
     const void *ulp, int pktlen, int dir)
 {
 	uint32_t expire;
 
 	/* NOTE: we are in critical section here. */
 	switch (pkt->proto) {
 	case IPPROTO_UDP:
 	case IPPROTO_UDPLITE:
 		expire = time_uptime + V_dyn_udp_lifetime;
 		break;
 	case IPPROTO_TCP:
 		expire = dyn_update_tcp_state(data, pkt, ulp, dir);
 		break;
 	default:
 		expire = time_uptime + V_dyn_short_lifetime;
 	}
 	/*
 	 * Expiration timer has the per-second granularity, no need to update
 	 * it every time when state is matched.
 	 */
 	if (data->expire != expire)
 		ck_pr_store_32(&data->expire, expire);
 
 	if (dir == MATCH_FORWARD)
 		DYN_COUNTER_INC(data, fwd, pktlen);
 	else
 		DYN_COUNTER_INC(data, rev, pktlen);
 }
 
 /*
  * Lookup IPv4 state.
  * Must be called in critical section.
  */
 struct dyn_ipv4_state *
 dyn_lookup_ipv4_state(const struct ipfw_flow_id *pkt, const void *ulp,
     struct ipfw_dyn_info *info, int pktlen)
 {
 	struct dyn_ipv4_state *s;
 	uint32_t version, bucket;
 
 	bucket = DYN_BUCKET(info->hashval, V_curr_dyn_buckets);
 	info->version = DYN_BUCKET_VERSION(bucket, ipv4_add);
 restart:
 	version = DYN_BUCKET_VERSION(bucket, ipv4_del);
 	CK_SLIST_FOREACH(s, &V_dyn_ipv4[bucket], entry) {
 		DYNSTATE_PROTECT(s);
 		if (version != DYN_BUCKET_VERSION(bucket, ipv4_del))
 			goto restart;
 		if (s->proto != pkt->proto)
 			continue;
 		if (info->kidx != 0 && s->kidx != info->kidx)
 			continue;
 		if (s->sport == pkt->src_port && s->dport == pkt->dst_port &&
 		    s->src == pkt->src_ip && s->dst == pkt->dst_ip) {
 			info->direction = MATCH_FORWARD;
 			break;
 		}
 		if (s->sport == pkt->dst_port && s->dport == pkt->src_port &&
 		    s->src == pkt->dst_ip && s->dst == pkt->src_ip) {
 			info->direction = MATCH_REVERSE;
 			break;
 		}
 	}
 
 	if (s != NULL)
 		dyn_update_proto_state(s->data, pkt, ulp, pktlen,
 		    info->direction);
 	return (s);
 }
 
 /*
  * Lookup IPv4 state.
  * Simplifed version is used to check that matching state doesn't exist.
  */
 static int
 dyn_lookup_ipv4_state_locked(const struct ipfw_flow_id *pkt,
     const void *ulp, int pktlen, uint32_t bucket, uint16_t kidx)
 {
 	struct dyn_ipv4_state *s;
 	int dir;
 
 	dir = MATCH_NONE;
 	DYN_BUCKET_ASSERT(bucket);
 	CK_SLIST_FOREACH(s, &V_dyn_ipv4[bucket], entry) {
 		if (s->proto != pkt->proto ||
 		    s->kidx != kidx)
 			continue;
 		if (s->sport == pkt->src_port &&
 		    s->dport == pkt->dst_port &&
 		    s->src == pkt->src_ip && s->dst == pkt->dst_ip) {
 			dir = MATCH_FORWARD;
 			break;
 		}
 		if (s->sport == pkt->dst_port && s->dport == pkt->src_port &&
 		    s->src == pkt->dst_ip && s->dst == pkt->src_ip) {
 			dir = MATCH_REVERSE;
 			break;
 		}
 	}
 	if (s != NULL)
 		dyn_update_proto_state(s->data, pkt, ulp, pktlen, dir);
 	return (s != NULL);
 }
 
 struct dyn_ipv4_state *
 dyn_lookup_ipv4_parent(const struct ipfw_flow_id *pkt, const void *rule,
     uint32_t ruleid, uint16_t rulenum, uint32_t hashval)
 {
 	struct dyn_ipv4_state *s;
 	uint32_t version, bucket;
 
 	bucket = DYN_BUCKET(hashval, V_curr_dyn_buckets);
 restart:
 	version = DYN_BUCKET_VERSION(bucket, ipv4_parent_del);
 	CK_SLIST_FOREACH(s, &V_dyn_ipv4_parent[bucket], entry) {
 		DYNSTATE_PROTECT(s);
 		if (version != DYN_BUCKET_VERSION(bucket, ipv4_parent_del))
 			goto restart;
 		/*
 		 * NOTE: we do not need to check kidx, because parent rule
 		 * can not create states with different kidx.
 		 * And parent rule always created for forward direction.
 		 */
 		if (s->limit->parent == rule &&
 		    s->limit->ruleid == ruleid &&
 		    s->limit->rulenum == rulenum &&
 		    s->proto == pkt->proto &&
 		    s->sport == pkt->src_port &&
 		    s->dport == pkt->dst_port &&
 		    s->src == pkt->src_ip && s->dst == pkt->dst_ip) {
 			if (s->limit->expire != time_uptime +
 			    V_dyn_short_lifetime)
 				ck_pr_store_32(&s->limit->expire,
 				    time_uptime + V_dyn_short_lifetime);
 			break;
 		}
 	}
 	return (s);
 }
 
 static struct dyn_ipv4_state *
 dyn_lookup_ipv4_parent_locked(const struct ipfw_flow_id *pkt,
     const void *rule, uint32_t ruleid, uint16_t rulenum, uint32_t bucket)
 {
 	struct dyn_ipv4_state *s;
 
 	DYN_BUCKET_ASSERT(bucket);
 	CK_SLIST_FOREACH(s, &V_dyn_ipv4_parent[bucket], entry) {
 		if (s->limit->parent == rule &&
 		    s->limit->ruleid == ruleid &&
 		    s->limit->rulenum == rulenum &&
 		    s->proto == pkt->proto &&
 		    s->sport == pkt->src_port &&
 		    s->dport == pkt->dst_port &&
 		    s->src == pkt->src_ip && s->dst == pkt->dst_ip)
 			break;
 	}
 	return (s);
 }
 
 
 #ifdef INET6
 static uint32_t
 dyn_getscopeid(const struct ip_fw_args *args)
 {
 
 	/*
 	 * If source or destination address is an scopeid address, we need
 	 * determine the scope zone id to resolve address scope ambiguity.
 	 */
 	if (IN6_IS_ADDR_LINKLOCAL(&args->f_id.src_ip6) ||
 	    IN6_IS_ADDR_LINKLOCAL(&args->f_id.dst_ip6)) {
 		MPASS(args->oif != NULL ||
 		    args->m->m_pkthdr.rcvif != NULL);
 		return (in6_getscopezone(args->oif != NULL ? args->oif:
 		    args->m->m_pkthdr.rcvif, IPV6_ADDR_SCOPE_LINKLOCAL));
 	}
 	return (0);
 }
 
 /*
  * Lookup IPv6 state.
  * Must be called in critical section.
  */
 static struct dyn_ipv6_state *
 dyn_lookup_ipv6_state(const struct ipfw_flow_id *pkt, uint32_t zoneid,
     const void *ulp, struct ipfw_dyn_info *info, int pktlen)
 {
 	struct dyn_ipv6_state *s;
 	uint32_t version, bucket;
 
 	bucket = DYN_BUCKET(info->hashval, V_curr_dyn_buckets);
 	info->version = DYN_BUCKET_VERSION(bucket, ipv6_add);
 restart:
 	version = DYN_BUCKET_VERSION(bucket, ipv6_del);
 	CK_SLIST_FOREACH(s, &V_dyn_ipv6[bucket], entry) {
 		DYNSTATE_PROTECT(s);
 		if (version != DYN_BUCKET_VERSION(bucket, ipv6_del))
 			goto restart;
 		if (s->proto != pkt->proto || s->zoneid != zoneid)
 			continue;
 		if (info->kidx != 0 && s->kidx != info->kidx)
 			continue;
 		if (s->sport == pkt->src_port && s->dport == pkt->dst_port &&
 		    IN6_ARE_ADDR_EQUAL(&s->src, &pkt->src_ip6) &&
 		    IN6_ARE_ADDR_EQUAL(&s->dst, &pkt->dst_ip6)) {
 			info->direction = MATCH_FORWARD;
 			break;
 		}
 		if (s->sport == pkt->dst_port && s->dport == pkt->src_port &&
 		    IN6_ARE_ADDR_EQUAL(&s->src, &pkt->dst_ip6) &&
 		    IN6_ARE_ADDR_EQUAL(&s->dst, &pkt->src_ip6)) {
 			info->direction = MATCH_REVERSE;
 			break;
 		}
 	}
 	if (s != NULL)
 		dyn_update_proto_state(s->data, pkt, ulp, pktlen,
 		    info->direction);
 	return (s);
 }
 
 /*
  * Lookup IPv6 state.
  * Simplifed version is used to check that matching state doesn't exist.
  */
 static int
 dyn_lookup_ipv6_state_locked(const struct ipfw_flow_id *pkt, uint32_t zoneid,
     const void *ulp, int pktlen, uint32_t bucket, uint16_t kidx)
 {
 	struct dyn_ipv6_state *s;
 	int dir;
 
 	dir = MATCH_NONE;
 	DYN_BUCKET_ASSERT(bucket);
 	CK_SLIST_FOREACH(s, &V_dyn_ipv6[bucket], entry) {
 		if (s->proto != pkt->proto || s->kidx != kidx ||
 		    s->zoneid != zoneid)
 			continue;
 		if (s->sport == pkt->src_port && s->dport == pkt->dst_port &&
 		    IN6_ARE_ADDR_EQUAL(&s->src, &pkt->src_ip6) &&
 		    IN6_ARE_ADDR_EQUAL(&s->dst, &pkt->dst_ip6)) {
 			dir = MATCH_FORWARD;
 			break;
 		}
 		if (s->sport == pkt->dst_port && s->dport == pkt->src_port &&
 		    IN6_ARE_ADDR_EQUAL(&s->src, &pkt->dst_ip6) &&
 		    IN6_ARE_ADDR_EQUAL(&s->dst, &pkt->src_ip6)) {
 			dir = MATCH_REVERSE;
 			break;
 		}
 	}
 	if (s != NULL)
 		dyn_update_proto_state(s->data, pkt, ulp, pktlen, dir);
 	return (s != NULL);
 }
 
 static struct dyn_ipv6_state *
 dyn_lookup_ipv6_parent(const struct ipfw_flow_id *pkt, uint32_t zoneid,
     const void *rule, uint32_t ruleid, uint16_t rulenum, uint32_t hashval)
 {
 	struct dyn_ipv6_state *s;
 	uint32_t version, bucket;
 
 	bucket = DYN_BUCKET(hashval, V_curr_dyn_buckets);
 restart:
 	version = DYN_BUCKET_VERSION(bucket, ipv6_parent_del);
 	CK_SLIST_FOREACH(s, &V_dyn_ipv6_parent[bucket], entry) {
 		DYNSTATE_PROTECT(s);
 		if (version != DYN_BUCKET_VERSION(bucket, ipv6_parent_del))
 			goto restart;
 		/*
 		 * NOTE: we do not need to check kidx, because parent rule
 		 * can not create states with different kidx.
 		 * Also parent rule always created for forward direction.
 		 */
 		if (s->limit->parent == rule &&
 		    s->limit->ruleid == ruleid &&
 		    s->limit->rulenum == rulenum &&
 		    s->proto == pkt->proto &&
 		    s->sport == pkt->src_port &&
 		    s->dport == pkt->dst_port && s->zoneid == zoneid &&
 		    IN6_ARE_ADDR_EQUAL(&s->src, &pkt->src_ip6) &&
 		    IN6_ARE_ADDR_EQUAL(&s->dst, &pkt->dst_ip6)) {
 			if (s->limit->expire != time_uptime +
 			    V_dyn_short_lifetime)
 				ck_pr_store_32(&s->limit->expire,
 				    time_uptime + V_dyn_short_lifetime);
 			break;
 		}
 	}
 	return (s);
 }
 
 static struct dyn_ipv6_state *
 dyn_lookup_ipv6_parent_locked(const struct ipfw_flow_id *pkt, uint32_t zoneid,
     const void *rule, uint32_t ruleid, uint16_t rulenum, uint32_t bucket)
 {
 	struct dyn_ipv6_state *s;
 
 	DYN_BUCKET_ASSERT(bucket);
 	CK_SLIST_FOREACH(s, &V_dyn_ipv6_parent[bucket], entry) {
 		if (s->limit->parent == rule &&
 		    s->limit->ruleid == ruleid &&
 		    s->limit->rulenum == rulenum &&
 		    s->proto == pkt->proto &&
 		    s->sport == pkt->src_port &&
 		    s->dport == pkt->dst_port && s->zoneid == zoneid &&
 		    IN6_ARE_ADDR_EQUAL(&s->src, &pkt->src_ip6) &&
 		    IN6_ARE_ADDR_EQUAL(&s->dst, &pkt->dst_ip6))
 			break;
 	}
 	return (s);
 }
 
 #endif /* INET6 */
 
 /*
  * Lookup dynamic state.
  *  pkt - filled by ipfw_chk() ipfw_flow_id;
  *  ulp - determined by ipfw_chk() upper level protocol header;
  *  dyn_info - info about matched state to return back;
  * Returns pointer to state's parent rule and dyn_info. If there is
  * no state, NULL is returned.
  * On match ipfw_dyn_lookup() updates state's counters.
  */
 struct ip_fw *
 ipfw_dyn_lookup_state(const struct ip_fw_args *args, const void *ulp,
     int pktlen, const ipfw_insn *cmd, struct ipfw_dyn_info *info)
 {
 	struct dyn_data *data;
 	struct ip_fw *rule;
 
 	IPFW_RLOCK_ASSERT(&V_layer3_chain);
 
 	data = NULL;
 	rule = NULL;
 	info->kidx = cmd->arg1;
 	info->direction = MATCH_NONE;
 	info->hashval = hash_packet(&args->f_id);
 
 	DYNSTATE_CRITICAL_ENTER();
 	if (IS_IP4_FLOW_ID(&args->f_id)) {
 		struct dyn_ipv4_state *s;
 
 		s = dyn_lookup_ipv4_state(&args->f_id, ulp, info, pktlen);
 		if (s != NULL) {
 			/*
 			 * Dynamic states are created using the same 5-tuple,
 			 * so it is assumed, that parent rule for O_LIMIT
 			 * state has the same address family.
 			 */
 			data = s->data;
 			if (s->type == O_LIMIT) {
 				s = data->parent;
 				rule = s->limit->parent;
 			} else
 				rule = data->parent;
 		}
 	}
 #ifdef INET6
 	else if (IS_IP6_FLOW_ID(&args->f_id)) {
 		struct dyn_ipv6_state *s;
 
 		s = dyn_lookup_ipv6_state(&args->f_id, dyn_getscopeid(args),
 		    ulp, info, pktlen);
 		if (s != NULL) {
 			data = s->data;
 			if (s->type == O_LIMIT) {
 				s = data->parent;
 				rule = s->limit->parent;
 			} else
 				rule = data->parent;
 		}
 	}
 #endif
 	if (data != NULL) {
 		/*
 		 * If cached chain id is the same, we can avoid rule index
 		 * lookup. Otherwise do lookup and update chain_id and f_pos.
 		 * It is safe even if there is concurrent thread that want
 		 * update the same state, because chain->id can be changed
 		 * only under IPFW_WLOCK().
 		 */
 		if (data->chain_id != V_layer3_chain.id) {
 			data->f_pos = ipfw_find_rule(&V_layer3_chain,
 			    data->rulenum, data->ruleid);
 			/*
 			 * Check that found state has not orphaned.
 			 * When chain->id being changed the parent
 			 * rule can be deleted. If found rule doesn't
 			 * match the parent pointer, consider this
 			 * result as MATCH_NONE and return NULL.
 			 *
 			 * This will lead to creation of new similar state
 			 * that will be added into head of this bucket.
 			 * And the state that we currently have matched
 			 * should be deleted by dyn_expire_states().
 			 *
 			 * In case when dyn_keep_states is enabled, return
 			 * pointer to deleted rule and f_pos value
 			 * corresponding to penultimate rule.
 			 * When we have enabled V_dyn_keep_states, states
 			 * that become orphaned will get the DYN_REFERENCED
 			 * flag and rule will keep around. So we can return
 			 * it. But since it is not in the rules map, we need
 			 * return such f_pos value, so after the state
 			 * handling if the search will continue, the next rule
 			 * will be the last one - the default rule.
 			 */
 			if (V_layer3_chain.map[data->f_pos] == rule) {
 				data->chain_id = V_layer3_chain.id;
 				info->f_pos = data->f_pos;
 			} else if (V_dyn_keep_states != 0) {
 				/*
 				 * The original rule pointer is still usable.
 				 * So, we return it, but f_pos need to be
 				 * changed to point to the penultimate rule.
 				 */
 				MPASS(V_layer3_chain.n_rules > 1);
 				data->chain_id = V_layer3_chain.id;
 				data->f_pos = V_layer3_chain.n_rules - 2;
 				info->f_pos = data->f_pos;
 			} else {
 				rule = NULL;
 				info->direction = MATCH_NONE;
 				DYN_DEBUG("rule %p  [%u, %u] is considered "
 				    "invalid in data %p", rule, data->ruleid,
 				    data->rulenum, data);
 				/* info->f_pos doesn't matter here. */
 			}
 		} else
 			info->f_pos = data->f_pos;
 	}
 	DYNSTATE_CRITICAL_EXIT();
 #if 0
 	/*
 	 * Return MATCH_NONE if parent rule is in disabled set.
 	 * This will lead to creation of new similar state that
 	 * will be added into head of this bucket.
 	 *
 	 * XXXAE: we need to be able update state's set when parent
 	 *	  rule set is changed.
 	 */
 	if (rule != NULL && (V_set_disable & (1 << rule->set))) {
 		rule = NULL;
 		info->direction = MATCH_NONE;
 	}
 #endif
 	return (rule);
 }
 
 static struct dyn_parent *
 dyn_alloc_parent(void *parent, uint32_t ruleid, uint16_t rulenum,
     uint8_t set, uint32_t hashval)
 {
 	struct dyn_parent *limit;
 
 	limit = uma_zalloc(V_dyn_parent_zone, M_NOWAIT | M_ZERO);
 	if (limit == NULL) {
 		if (last_log != time_uptime) {
 			last_log = time_uptime;
 			log(LOG_DEBUG,
 			    "ipfw: Cannot allocate parent dynamic state, "
 			    "consider increasing "
 			    "net.inet.ip.fw.dyn_parent_max\n");
 		}
 		return (NULL);
 	}
 
 	limit->parent = parent;
 	limit->ruleid = ruleid;
 	limit->rulenum = rulenum;
 	limit->set = set;
 	limit->hashval = hashval;
 	limit->expire = time_uptime + V_dyn_short_lifetime;
 	return (limit);
 }
 
 static struct dyn_data *
 dyn_alloc_dyndata(void *parent, uint32_t ruleid, uint16_t rulenum,
     uint8_t set, const struct ipfw_flow_id *pkt, const void *ulp, int pktlen,
     uint32_t hashval, uint16_t fibnum)
 {
 	struct dyn_data *data;
 
 	data = uma_zalloc(V_dyn_data_zone, M_NOWAIT | M_ZERO);
 	if (data == NULL) {
 		if (last_log != time_uptime) {
 			last_log = time_uptime;
 			log(LOG_DEBUG,
 			    "ipfw: Cannot allocate dynamic state, "
 			    "consider increasing net.inet.ip.fw.dyn_max\n");
 		}
 		return (NULL);
 	}
 
 	data->parent = parent;
 	data->ruleid = ruleid;
 	data->rulenum = rulenum;
 	data->set = set;
 	data->fibnum = fibnum;
 	data->hashval = hashval;
 	data->expire = time_uptime + V_dyn_syn_lifetime;
 	dyn_update_proto_state(data, pkt, ulp, pktlen, MATCH_FORWARD);
 	return (data);
 }
 
 static struct dyn_ipv4_state *
 dyn_alloc_ipv4_state(const struct ipfw_flow_id *pkt, uint16_t kidx,
     uint8_t type)
 {
 	struct dyn_ipv4_state *s;
 
 	s = uma_zalloc(V_dyn_ipv4_zone, M_NOWAIT | M_ZERO);
 	if (s == NULL)
 		return (NULL);
 
 	s->type = type;
 	s->kidx = kidx;
 	s->proto = pkt->proto;
 	s->sport = pkt->src_port;
 	s->dport = pkt->dst_port;
 	s->src = pkt->src_ip;
 	s->dst = pkt->dst_ip;
 	return (s);
 }
 
 /*
  * Add IPv4 parent state.
  * Returns pointer to parent state. When it is not NULL we are in
  * critical section and pointer protected by hazard pointer.
  * When some error occurs, it returns NULL and exit from critical section
  * is not needed.
  */
 static struct dyn_ipv4_state *
 dyn_add_ipv4_parent(void *rule, uint32_t ruleid, uint16_t rulenum,
     uint8_t set, const struct ipfw_flow_id *pkt, uint32_t hashval,
     uint32_t version, uint16_t kidx)
 {
 	struct dyn_ipv4_state *s;
 	struct dyn_parent *limit;
 	uint32_t bucket;
 
 	bucket = DYN_BUCKET(hashval, V_curr_dyn_buckets);
 	DYN_BUCKET_LOCK(bucket);
 	if (version != DYN_BUCKET_VERSION(bucket, ipv4_parent_add)) {
 		/*
 		 * Bucket version has been changed since last lookup,
 		 * do lookup again to be sure that state does not exist.
 		 */
 		s = dyn_lookup_ipv4_parent_locked(pkt, rule, ruleid,
 		    rulenum, bucket);
 		if (s != NULL) {
 			/*
 			 * Simultaneous thread has already created this
 			 * state. Just return it.
 			 */
 			DYNSTATE_CRITICAL_ENTER();
 			DYNSTATE_PROTECT(s);
 			DYN_BUCKET_UNLOCK(bucket);
 			return (s);
 		}
 	}
 
 	limit = dyn_alloc_parent(rule, ruleid, rulenum, set, hashval);
 	if (limit == NULL) {
 		DYN_BUCKET_UNLOCK(bucket);
 		return (NULL);
 	}
 
 	s = dyn_alloc_ipv4_state(pkt, kidx, O_LIMIT_PARENT);
 	if (s == NULL) {
 		DYN_BUCKET_UNLOCK(bucket);
 		uma_zfree(V_dyn_parent_zone, limit);
 		return (NULL);
 	}
 
 	s->limit = limit;
 	CK_SLIST_INSERT_HEAD(&V_dyn_ipv4_parent[bucket], s, entry);
 	DYN_COUNT_INC(dyn_parent_count);
 	DYN_BUCKET_VERSION_BUMP(bucket, ipv4_parent_add);
 	DYNSTATE_CRITICAL_ENTER();
 	DYNSTATE_PROTECT(s);
 	DYN_BUCKET_UNLOCK(bucket);
 	return (s);
 }
 
 static int
 dyn_add_ipv4_state(void *parent, uint32_t ruleid, uint16_t rulenum,
     uint8_t set, const struct ipfw_flow_id *pkt, const void *ulp, int pktlen,
     uint32_t hashval, struct ipfw_dyn_info *info, uint16_t fibnum,
     uint16_t kidx, uint8_t type)
 {
 	struct dyn_ipv4_state *s;
 	void *data;
 	uint32_t bucket;
 
 	bucket = DYN_BUCKET(hashval, V_curr_dyn_buckets);
 	DYN_BUCKET_LOCK(bucket);
 	if (info->direction == MATCH_UNKNOWN ||
 	    info->kidx != kidx ||
 	    info->hashval != hashval ||
 	    info->version != DYN_BUCKET_VERSION(bucket, ipv4_add)) {
 		/*
 		 * Bucket version has been changed since last lookup,
 		 * do lookup again to be sure that state does not exist.
 		 */
 		if (dyn_lookup_ipv4_state_locked(pkt, ulp, pktlen,
 		    bucket, kidx) != 0) {
 			DYN_BUCKET_UNLOCK(bucket);
 			return (EEXIST);
 		}
 	}
 
 	data = dyn_alloc_dyndata(parent, ruleid, rulenum, set, pkt, ulp,
 	    pktlen, hashval, fibnum);
 	if (data == NULL) {
 		DYN_BUCKET_UNLOCK(bucket);
 		return (ENOMEM);
 	}
 
 	s = dyn_alloc_ipv4_state(pkt, kidx, type);
 	if (s == NULL) {
 		DYN_BUCKET_UNLOCK(bucket);
 		uma_zfree(V_dyn_data_zone, data);
 		return (ENOMEM);
 	}
 
 	s->data = data;
 	CK_SLIST_INSERT_HEAD(&V_dyn_ipv4[bucket], s, entry);
 	DYN_COUNT_INC(dyn_count);
 	DYN_BUCKET_VERSION_BUMP(bucket, ipv4_add);
 	DYN_BUCKET_UNLOCK(bucket);
 	return (0);
 }
 
 #ifdef INET6
 static struct dyn_ipv6_state *
 dyn_alloc_ipv6_state(const struct ipfw_flow_id *pkt, uint32_t zoneid,
     uint16_t kidx, uint8_t type)
 {
 	struct dyn_ipv6_state *s;
 
 	s = uma_zalloc(V_dyn_ipv6_zone, M_NOWAIT | M_ZERO);
 	if (s == NULL)
 		return (NULL);
 
 	s->type = type;
 	s->kidx = kidx;
 	s->zoneid = zoneid;
 	s->proto = pkt->proto;
 	s->sport = pkt->src_port;
 	s->dport = pkt->dst_port;
 	s->src = pkt->src_ip6;
 	s->dst = pkt->dst_ip6;
 	return (s);
 }
 
 /*
  * Add IPv6 parent state.
  * Returns pointer to parent state. When it is not NULL we are in
  * critical section and pointer protected by hazard pointer.
  * When some error occurs, it return NULL and exit from critical section
  * is not needed.
  */
 static struct dyn_ipv6_state *
 dyn_add_ipv6_parent(void *rule, uint32_t ruleid, uint16_t rulenum,
     uint8_t set, const struct ipfw_flow_id *pkt, uint32_t zoneid,
     uint32_t hashval, uint32_t version, uint16_t kidx)
 {
 	struct dyn_ipv6_state *s;
 	struct dyn_parent *limit;
 	uint32_t bucket;
 
 	bucket = DYN_BUCKET(hashval, V_curr_dyn_buckets);
 	DYN_BUCKET_LOCK(bucket);
 	if (version != DYN_BUCKET_VERSION(bucket, ipv6_parent_add)) {
 		/*
 		 * Bucket version has been changed since last lookup,
 		 * do lookup again to be sure that state does not exist.
 		 */
 		s = dyn_lookup_ipv6_parent_locked(pkt, zoneid, rule, ruleid,
 		    rulenum, bucket);
 		if (s != NULL) {
 			/*
 			 * Simultaneous thread has already created this
 			 * state. Just return it.
 			 */
 			DYNSTATE_CRITICAL_ENTER();
 			DYNSTATE_PROTECT(s);
 			DYN_BUCKET_UNLOCK(bucket);
 			return (s);
 		}
 	}
 
 	limit = dyn_alloc_parent(rule, ruleid, rulenum, set, hashval);
 	if (limit == NULL) {
 		DYN_BUCKET_UNLOCK(bucket);
 		return (NULL);
 	}
 
 	s = dyn_alloc_ipv6_state(pkt, zoneid, kidx, O_LIMIT_PARENT);
 	if (s == NULL) {
 		DYN_BUCKET_UNLOCK(bucket);
 		uma_zfree(V_dyn_parent_zone, limit);
 		return (NULL);
 	}
 
 	s->limit = limit;
 	CK_SLIST_INSERT_HEAD(&V_dyn_ipv6_parent[bucket], s, entry);
 	DYN_COUNT_INC(dyn_parent_count);
 	DYN_BUCKET_VERSION_BUMP(bucket, ipv6_parent_add);
 	DYNSTATE_CRITICAL_ENTER();
 	DYNSTATE_PROTECT(s);
 	DYN_BUCKET_UNLOCK(bucket);
 	return (s);
 }
 
 static int
 dyn_add_ipv6_state(void *parent, uint32_t ruleid, uint16_t rulenum,
     uint8_t set, const struct ipfw_flow_id *pkt, uint32_t zoneid,
     const void *ulp, int pktlen, uint32_t hashval, struct ipfw_dyn_info *info,
     uint16_t fibnum, uint16_t kidx, uint8_t type)
 {
 	struct dyn_ipv6_state *s;
 	struct dyn_data *data;
 	uint32_t bucket;
 
 	bucket = DYN_BUCKET(hashval, V_curr_dyn_buckets);
 	DYN_BUCKET_LOCK(bucket);
 	if (info->direction == MATCH_UNKNOWN ||
 	    info->kidx != kidx ||
 	    info->hashval != hashval ||
 	    info->version != DYN_BUCKET_VERSION(bucket, ipv6_add)) {
 		/*
 		 * Bucket version has been changed since last lookup,
 		 * do lookup again to be sure that state does not exist.
 		 */
 		if (dyn_lookup_ipv6_state_locked(pkt, zoneid, ulp, pktlen,
 		    bucket, kidx) != 0) {
 			DYN_BUCKET_UNLOCK(bucket);
 			return (EEXIST);
 		}
 	}
 
 	data = dyn_alloc_dyndata(parent, ruleid, rulenum, set, pkt, ulp,
 	    pktlen, hashval, fibnum);
 	if (data == NULL) {
 		DYN_BUCKET_UNLOCK(bucket);
 		return (ENOMEM);
 	}
 
 	s = dyn_alloc_ipv6_state(pkt, zoneid, kidx, type);
 	if (s == NULL) {
 		DYN_BUCKET_UNLOCK(bucket);
 		uma_zfree(V_dyn_data_zone, data);
 		return (ENOMEM);
 	}
 
 	s->data = data;
 	CK_SLIST_INSERT_HEAD(&V_dyn_ipv6[bucket], s, entry);
 	DYN_COUNT_INC(dyn_count);
 	DYN_BUCKET_VERSION_BUMP(bucket, ipv6_add);
 	DYN_BUCKET_UNLOCK(bucket);
 	return (0);
 }
 #endif /* INET6 */
 
 static void *
 dyn_get_parent_state(const struct ipfw_flow_id *pkt, uint32_t zoneid,
     struct ip_fw *rule, uint32_t hashval, uint32_t limit, uint16_t kidx)
 {
 	char sbuf[24];
 	struct dyn_parent *p;
 	void *ret;
 	uint32_t bucket, version;
 
 	p = NULL;
 	ret = NULL;
 	bucket = DYN_BUCKET(hashval, V_curr_dyn_buckets);
 	DYNSTATE_CRITICAL_ENTER();
 	if (IS_IP4_FLOW_ID(pkt)) {
 		struct dyn_ipv4_state *s;
 
 		version = DYN_BUCKET_VERSION(bucket, ipv4_parent_add);
 		s = dyn_lookup_ipv4_parent(pkt, rule, rule->id,
 		    rule->rulenum, bucket);
 		if (s == NULL) {
 			/*
 			 * Exit from critical section because dyn_add_parent()
 			 * will acquire bucket lock.
 			 */
 			DYNSTATE_CRITICAL_EXIT();
 
 			s = dyn_add_ipv4_parent(rule, rule->id,
 			    rule->rulenum, rule->set, pkt, hashval,
 			    version, kidx);
 			if (s == NULL)
 				return (NULL);
 			/* Now we are in critical section again. */
 		}
 		ret = s;
 		p = s->limit;
 	}
 #ifdef INET6
 	else if (IS_IP6_FLOW_ID(pkt)) {
 		struct dyn_ipv6_state *s;
 
 		version = DYN_BUCKET_VERSION(bucket, ipv6_parent_add);
 		s = dyn_lookup_ipv6_parent(pkt, zoneid, rule, rule->id,
 		    rule->rulenum, bucket);
 		if (s == NULL) {
 			/*
 			 * Exit from critical section because dyn_add_parent()
 			 * can acquire bucket mutex.
 			 */
 			DYNSTATE_CRITICAL_EXIT();
 
 			s = dyn_add_ipv6_parent(rule, rule->id,
 			    rule->rulenum, rule->set, pkt, zoneid, hashval,
 			    version, kidx);
 			if (s == NULL)
 				return (NULL);
 			/* Now we are in critical section again. */
 		}
 		ret = s;
 		p = s->limit;
 	}
 #endif
 	else {
 		DYNSTATE_CRITICAL_EXIT();
 		return (NULL);
 	}
 
 	/* Check the limit */
 	if (DPARENT_COUNT(p) >= limit) {
 		DYNSTATE_CRITICAL_EXIT();
 		if (V_fw_verbose && last_log != time_uptime) {
 			last_log = time_uptime;
 			snprintf(sbuf, sizeof(sbuf), "%u drop session",
 			    rule->rulenum);
 			print_dyn_rule_flags(pkt, O_LIMIT,
 			    LOG_SECURITY | LOG_DEBUG, sbuf,
 			    "too many entries");
 		}
 		return (NULL);
 	}
 
 	/* Take new session into account. */
 	DPARENT_COUNT_INC(p);
 	/*
 	 * We must exit from critical section because the following code
 	 * can acquire bucket mutex.
 	 * We rely on the the 'count' field. The state will not expire
 	 * until it has some child states, i.e. 'count' field is not zero.
 	 * Return state pointer, it will be used by child states as parent.
 	 */
 	DYNSTATE_CRITICAL_EXIT();
 	return (ret);
 }
 
 static int
 dyn_install_state(const struct ipfw_flow_id *pkt, uint32_t zoneid,
     uint16_t fibnum, const void *ulp, int pktlen, void *rule,
     uint32_t ruleid, uint16_t rulenum, uint8_t set,
     struct ipfw_dyn_info *info, uint32_t limit, uint16_t limit_mask,
     uint16_t kidx, uint8_t type)
 {
 	struct ipfw_flow_id id;
 	uint32_t hashval, parent_hashval;
 	int ret;
 
 	MPASS(type == O_LIMIT || type == O_KEEP_STATE);
 
 	if (type == O_LIMIT) {
 		/* Create masked flow id and calculate bucket */
 		id.addr_type = pkt->addr_type;
 		id.proto = pkt->proto;
 		id.fib = fibnum; /* unused */
 		id.src_port = (limit_mask & DYN_SRC_PORT) ?
 		    pkt->src_port: 0;
 		id.dst_port = (limit_mask & DYN_DST_PORT) ?
 		    pkt->dst_port: 0;
 		if (IS_IP4_FLOW_ID(pkt)) {
 			id.src_ip = (limit_mask & DYN_SRC_ADDR) ?
 			    pkt->src_ip: 0;
 			id.dst_ip = (limit_mask & DYN_DST_ADDR) ?
 			    pkt->dst_ip: 0;
 		}
 #ifdef INET6
 		else if (IS_IP6_FLOW_ID(pkt)) {
 			if (limit_mask & DYN_SRC_ADDR)
 				id.src_ip6 = pkt->src_ip6;
 			else
 				memset(&id.src_ip6, 0, sizeof(id.src_ip6));
 			if (limit_mask & DYN_DST_ADDR)
 				id.dst_ip6 = pkt->dst_ip6;
 			else
 				memset(&id.dst_ip6, 0, sizeof(id.dst_ip6));
 		}
 #endif
 		else
 			return (EAFNOSUPPORT);
 
 		parent_hashval = hash_parent(&id, rule);
 		rule = dyn_get_parent_state(&id, zoneid, rule, parent_hashval,
 		    limit, kidx);
 		if (rule == NULL) {
 #if 0
 			if (V_fw_verbose && last_log != time_uptime) {
 				last_log = time_uptime;
 				snprintf(sbuf, sizeof(sbuf),
 				    "%u drop session", rule->rulenum);
 			print_dyn_rule_flags(pkt, O_LIMIT,
 			    LOG_SECURITY | LOG_DEBUG, sbuf,
 			    "too many entries");
 			}
 #endif
 			return (EACCES);
 		}
 		/*
 		 * Limit is not reached, create new state.
 		 * Now rule points to parent state.
 		 */
 	}
 
 	hashval = hash_packet(pkt);
 	if (IS_IP4_FLOW_ID(pkt))
 		ret = dyn_add_ipv4_state(rule, ruleid, rulenum, set, pkt,
 		    ulp, pktlen, hashval, info, fibnum, kidx, type);
 #ifdef INET6
 	else if (IS_IP6_FLOW_ID(pkt))
 		ret = dyn_add_ipv6_state(rule, ruleid, rulenum, set, pkt,
 		    zoneid, ulp, pktlen, hashval, info, fibnum, kidx, type);
 #endif /* INET6 */
 	else
 		ret = EAFNOSUPPORT;
 
 	if (type == O_LIMIT) {
 		if (ret != 0) {
 			/*
 			 * We failed to create child state for O_LIMIT
 			 * opcode. Since we already counted it in the parent,
 			 * we must revert counter back. The 'rule' points to
 			 * parent state, use it to get dyn_parent.
 			 *
 			 * XXXAE: it should be safe to use 'rule' pointer
 			 * without extra lookup, parent state is referenced
 			 * and should not be freed.
 			 */
 			if (IS_IP4_FLOW_ID(&id))
 				DPARENT_COUNT_DEC(
 				    ((struct dyn_ipv4_state *)rule)->limit);
 #ifdef INET6
 			else if (IS_IP6_FLOW_ID(&id))
 				DPARENT_COUNT_DEC(
 				    ((struct dyn_ipv6_state *)rule)->limit);
 #endif
 		}
 	}
 	/*
 	 * EEXIST means that simultaneous thread has created this
 	 * state. Consider this as success.
 	 *
 	 * XXXAE: should we invalidate 'info' content here?
 	 */
 	if (ret == EEXIST)
 		return (0);
 	return (ret);
 }
 
 /*
  * Install dynamic state.
  *  chain - ipfw's instance;
  *  rule - the parent rule that installs the state;
  *  cmd - opcode that installs the state;
  *  args - ipfw arguments;
  *  ulp - upper level protocol header;
  *  pktlen - packet length;
  *  info - dynamic state lookup info;
  *  tablearg - tablearg id.
  *
  * Returns non-zero value (failure) if state is not installed because
  * of errors or because session limitations are enforced.
  */
 int
 ipfw_dyn_install_state(struct ip_fw_chain *chain, struct ip_fw *rule,
     const ipfw_insn_limit *cmd, const struct ip_fw_args *args,
     const void *ulp, int pktlen, struct ipfw_dyn_info *info,
     uint32_t tablearg)
 {
 	uint32_t limit;
 	uint16_t limit_mask;
 
 	if (cmd->o.opcode == O_LIMIT) {
 		limit = IP_FW_ARG_TABLEARG(chain, cmd->conn_limit, limit);
 		limit_mask = cmd->limit_mask;
 	} else {
 		limit = 0;
 		limit_mask = 0;
 	}
 	return (dyn_install_state(&args->f_id,
 #ifdef INET6
 	    IS_IP6_FLOW_ID(&args->f_id) ? dyn_getscopeid(args):
 #endif
 	    0, M_GETFIB(args->m), ulp, pktlen, rule, rule->id, rule->rulenum,
 	    rule->set, info, limit, limit_mask, cmd->o.arg1, cmd->o.opcode));
 }
 
 /*
  * Free safe to remove state entries from expired lists.
  */
 static void
 dyn_free_states(struct ip_fw_chain *chain)
 {
 	struct dyn_ipv4_state *s4, *s4n;
 #ifdef INET6
 	struct dyn_ipv6_state *s6, *s6n;
 #endif
 	int cached_count, i;
 
 	/*
 	 * We keep pointers to objects that are in use on each CPU
 	 * in the per-cpu dyn_hp pointer. When object is going to be
 	 * removed, first of it is unlinked from the corresponding
 	 * list. This leads to changing of dyn_bucket_xxx_delver version.
 	 * Unlinked objects is placed into corresponding dyn_expired_xxx
 	 * list. Reader that is going to dereference object pointer checks
 	 * dyn_bucket_xxx_delver version before and after storing pointer
 	 * into dyn_hp. If version is the same, the object is protected
 	 * from freeing and it is safe to dereference. Othervise reader
 	 * tries to iterate list again from the beginning, but this object
 	 * now unlinked and thus will not be accessible.
 	 *
 	 * Copy dyn_hp pointers for each CPU into dyn_hp_cache array.
 	 * It does not matter that some pointer can be changed in
 	 * time while we are copying. We need to check, that objects
 	 * removed in the previous pass are not in use. And if dyn_hp
 	 * pointer does not contain it in the time when we are copying,
 	 * it will not appear there, because it is already unlinked.
 	 * And for new pointers we will not free objects that will be
 	 * unlinked in this pass.
 	 */
 	cached_count = 0;
 	CPU_FOREACH(i) {
 		dyn_hp_cache[cached_count] = DYNSTATE_GET(i);
 		if (dyn_hp_cache[cached_count] != NULL)
 			cached_count++;
 	}
 
 	/*
 	 * Free expired states that are safe to free.
 	 * Check each entry from previous pass in the dyn_expired_xxx
 	 * list, if pointer to the object is in the dyn_hp_cache array,
 	 * keep it until next pass. Otherwise it is safe to free the
 	 * object.
 	 *
 	 * XXXAE: optimize this to use SLIST_REMOVE_AFTER.
 	 */
 #define	DYN_FREE_STATES(s, next, name)		do {			\
 	s = SLIST_FIRST(&V_dyn_expired_ ## name);			\
 	while (s != NULL) {						\
 		next = SLIST_NEXT(s, expired);				\
 		for (i = 0; i < cached_count; i++)			\
 			if (dyn_hp_cache[i] == s)			\
 				break;					\
 		if (i == cached_count) {				\
 			if (s->type == O_LIMIT_PARENT &&		\
 			    s->limit->count != 0) {			\
 				s = next;				\
 				continue;				\
 			}						\
 			SLIST_REMOVE(&V_dyn_expired_ ## name,		\
 			    s, dyn_ ## name ## _state, expired);	\
 			if (s->type == O_LIMIT_PARENT)			\
 				uma_zfree(V_dyn_parent_zone, s->limit);	\
 			else						\
 				uma_zfree(V_dyn_data_zone, s->data);	\
 			uma_zfree(V_dyn_ ## name ## _zone, s);		\
 		}							\
 		s = next;						\
 	}								\
 } while (0)
 
 	/*
 	 * Protect access to expired lists with DYN_EXPIRED_LOCK.
 	 * Userland can invoke ipfw_expire_dyn_states() to delete
 	 * specific states, this will lead to modification of expired
 	 * lists.
 	 *
 	 * XXXAE: do we need DYN_EXPIRED_LOCK? We can just use
 	 *	  IPFW_UH_WLOCK to protect access to these lists.
 	 */
 	DYN_EXPIRED_LOCK();
 	DYN_FREE_STATES(s4, s4n, ipv4);
 #ifdef INET6
 	DYN_FREE_STATES(s6, s6n, ipv6);
 #endif
 	DYN_EXPIRED_UNLOCK();
 #undef DYN_FREE_STATES
 }
 
 /*
  * Returns:
  * 0 when state is not matched by specified range;
  * 1 when state is matched by specified range;
  * 2 when state is matched by specified range and requested deletion of
  *   dynamic states.
  */
 static int
 dyn_match_range(uint16_t rulenum, uint8_t set, const ipfw_range_tlv *rt)
 {
 
 	MPASS(rt != NULL);
 	/* flush all states */
 	if (rt->flags & IPFW_RCFLAG_ALL) {
 		if (rt->flags & IPFW_RCFLAG_DYNAMIC)
 			return (2); /* forced */
 		return (1);
 	}
 	if ((rt->flags & IPFW_RCFLAG_SET) != 0 && set != rt->set)
 		return (0);
 	if ((rt->flags & IPFW_RCFLAG_RANGE) != 0 &&
 	    (rulenum < rt->start_rule || rulenum > rt->end_rule))
 		return (0);
 	if (rt->flags & IPFW_RCFLAG_DYNAMIC)
 		return (2);
 	return (1);
 }
 
 static void
 dyn_acquire_rule(struct ip_fw_chain *ch, struct dyn_data *data,
     struct ip_fw *rule, uint16_t kidx)
 {
 	struct dyn_state_obj *obj;
 
 	/*
 	 * Do not acquire reference twice.
 	 * This can happen when rule deletion executed for
 	 * the same range, but different ruleset id.
 	 */
 	if (data->flags & DYN_REFERENCED)
 		return;
 
 	IPFW_UH_WLOCK_ASSERT(ch);
 	MPASS(kidx != 0);
 
 	data->flags |= DYN_REFERENCED;
 	/* Reference the named object */
 	obj = SRV_OBJECT(ch, kidx);
 	obj->no.refcnt++;
 	MPASS(obj->no.etlv == IPFW_TLV_STATE_NAME);
 
 	/* Reference the parent rule */
 	rule->refcnt++;
 }
 
 static void
 dyn_release_rule(struct ip_fw_chain *ch, struct dyn_data *data,
     struct ip_fw *rule, uint16_t kidx)
 {
 	struct dyn_state_obj *obj;
 
 	IPFW_UH_WLOCK_ASSERT(ch);
 	MPASS(kidx != 0);
 
 	obj = SRV_OBJECT(ch, kidx);
 	if (obj->no.refcnt == 1)
 		dyn_destroy(ch, &obj->no);
 	else
 		obj->no.refcnt--;
 
 	if (--rule->refcnt == 1)
 		ipfw_free_rule(rule);
 }
 
 /*
  * We do not keep O_LIMIT_PARENT states when V_dyn_keep_states is enabled.
  * O_LIMIT state is created when new connection is going to be established
  * and there is no matching state. So, since the old parent rule was deleted
  * we can't create new states with old parent, and thus we can not account
  * new connections with already established connections, and can not do
  * proper limiting.
  */
 static int
 dyn_match_ipv4_state(struct ip_fw_chain *ch, struct dyn_ipv4_state *s,
     const ipfw_range_tlv *rt)
 {
 	struct ip_fw *rule;
 	int ret;
 
 	if (s->type == O_LIMIT_PARENT)
 		return (dyn_match_range(s->limit->rulenum,
 		    s->limit->set, rt));
 
 	ret = dyn_match_range(s->data->rulenum, s->data->set, rt);
 	if (ret == 0 || V_dyn_keep_states == 0 || ret > 1)
 		return (ret);
 
 	rule = s->data->parent;
 	if (s->type == O_LIMIT)
 		rule = ((struct dyn_ipv4_state *)rule)->limit->parent;
 	dyn_acquire_rule(ch, s->data, rule, s->kidx);
 	return (0);
 }
 
 #ifdef INET6
 static int
 dyn_match_ipv6_state(struct ip_fw_chain *ch, struct dyn_ipv6_state *s,
     const ipfw_range_tlv *rt)
 {
 	struct ip_fw *rule;
 	int ret;
 
 	if (s->type == O_LIMIT_PARENT)
 		return (dyn_match_range(s->limit->rulenum,
 		    s->limit->set, rt));
 
 	ret = dyn_match_range(s->data->rulenum, s->data->set, rt);
 	if (ret == 0 || V_dyn_keep_states == 0 || ret > 1)
 		return (ret);
 
 	rule = s->data->parent;
 	if (s->type == O_LIMIT)
 		rule = ((struct dyn_ipv6_state *)rule)->limit->parent;
 	dyn_acquire_rule(ch, s->data, rule, s->kidx);
 	return (0);
 }
 #endif
 
 /*
  * Unlink expired entries from states lists.
  * @rt can be used to specify the range of states for deletion.
  */
 static void
 dyn_expire_states(struct ip_fw_chain *ch, ipfw_range_tlv *rt)
 {
 	struct dyn_ipv4_slist expired_ipv4;
 #ifdef INET6
 	struct dyn_ipv6_slist expired_ipv6;
 	struct dyn_ipv6_state *s6, *s6n, *s6p;
 #endif
 	struct dyn_ipv4_state *s4, *s4n, *s4p;
 	void *rule;
 	int bucket, removed, length, max_length;
 
 	IPFW_UH_WLOCK_ASSERT(ch);
 
 	/*
 	 * Unlink expired states from each bucket.
 	 * With acquired bucket lock iterate entries of each lists:
 	 * ipv4, ipv4_parent, ipv6, and ipv6_parent. Check expired time
 	 * and unlink entry from the list, link entry into temporary
 	 * expired_xxx lists then bump "del" bucket version.
 	 *
 	 * When an entry is removed, corresponding states counter is
 	 * decremented. If entry has O_LIMIT type, parent's reference
 	 * counter is decremented.
 	 *
 	 * NOTE: this function can be called from userspace context
 	 * when user deletes rules. In this case all matched states
 	 * will be forcedly unlinked. O_LIMIT_PARENT states will be kept
 	 * in the expired lists until reference counter become zero.
 	 */
 #define	DYN_UNLINK_STATES(s, prev, next, exp, af, name, extra)	do {	\
 	length = 0;							\
 	removed = 0;							\
 	prev = NULL;							\
 	s = CK_SLIST_FIRST(&V_dyn_ ## name [bucket]);			\
 	while (s != NULL) {						\
 		next = CK_SLIST_NEXT(s, entry);				\
 		if ((TIME_LEQ((s)->exp, time_uptime) && extra) ||	\
 		    (rt != NULL &&					\
 		     dyn_match_ ## af ## _state(ch, s, rt))) {		\
 			if (prev != NULL)				\
 				CK_SLIST_REMOVE_AFTER(prev, entry);	\
 			else						\
 				CK_SLIST_REMOVE_HEAD(			\
 				    &V_dyn_ ## name [bucket], entry);	\
 			removed++;					\
 			SLIST_INSERT_HEAD(&expired_ ## af, s, expired);	\
 			if (s->type == O_LIMIT_PARENT)			\
 				DYN_COUNT_DEC(dyn_parent_count);	\
 			else {						\
 				DYN_COUNT_DEC(dyn_count);		\
 				if (s->data->flags & DYN_REFERENCED) {	\
 					rule = s->data->parent;		\
 					if (s->type == O_LIMIT)		\
 						rule = ((__typeof(s))	\
 						    rule)->limit->parent;\
 					dyn_release_rule(ch, s->data,	\
 					    rule, s->kidx);		\
 				}					\
 				if (s->type == O_LIMIT)	{		\
 					s = s->data->parent;		\
 					DPARENT_COUNT_DEC(s->limit);	\
 				}					\
 			}						\
 		} else {						\
 			prev = s;					\
 			length++;					\
 		}							\
 		s = next;						\
 	}								\
 	if (removed != 0)						\
 		DYN_BUCKET_VERSION_BUMP(bucket, name ## _del);		\
 	if (length > max_length)				\
 		max_length = length;				\
 } while (0)
 
 	SLIST_INIT(&expired_ipv4);
 #ifdef INET6
 	SLIST_INIT(&expired_ipv6);
 #endif
 	max_length = 0;
 	for (bucket = 0; bucket < V_curr_dyn_buckets; bucket++) {
 		DYN_BUCKET_LOCK(bucket);
 		DYN_UNLINK_STATES(s4, s4p, s4n, data->expire, ipv4, ipv4, 1);
 		DYN_UNLINK_STATES(s4, s4p, s4n, limit->expire, ipv4,
 		    ipv4_parent, (s4->limit->count == 0));
 #ifdef INET6
 		DYN_UNLINK_STATES(s6, s6p, s6n, data->expire, ipv6, ipv6, 1);
 		DYN_UNLINK_STATES(s6, s6p, s6n, limit->expire, ipv6,
 		    ipv6_parent, (s6->limit->count == 0));
 #endif
 		DYN_BUCKET_UNLOCK(bucket);
 	}
 	/* Update curr_max_length for statistics. */
 	V_curr_max_length = max_length;
 	/*
 	 * Concatenate temporary lists with global expired lists.
 	 */
 	DYN_EXPIRED_LOCK();
 	SLIST_CONCAT(&V_dyn_expired_ipv4, &expired_ipv4,
 	    dyn_ipv4_state, expired);
 #ifdef INET6
 	SLIST_CONCAT(&V_dyn_expired_ipv6, &expired_ipv6,
 	    dyn_ipv6_state, expired);
 #endif
 	DYN_EXPIRED_UNLOCK();
 #undef DYN_UNLINK_STATES
 #undef DYN_UNREF_STATES
 }
 
 static struct mbuf *
 dyn_mgethdr(int len, uint16_t fibnum)
 {
 	struct mbuf *m;
 
 	m = m_gethdr(M_NOWAIT, MT_DATA);
 	if (m == NULL)
 		return (NULL);
 #ifdef MAC
 	mac_netinet_firewall_send(m);
 #endif
 	M_SETFIB(m, fibnum);
 	m->m_data += max_linkhdr;
 	m->m_flags |= M_SKIP_FIREWALL;
 	m->m_len = m->m_pkthdr.len = len;
 	bzero(m->m_data, len);
 	return (m);
 }
 
 static void
 dyn_make_keepalive_ipv4(struct mbuf *m, in_addr_t src, in_addr_t dst,
     uint32_t seq, uint32_t ack, uint16_t sport, uint16_t dport)
 {
 	struct tcphdr *tcp;
 	struct ip *ip;
 
 	ip = mtod(m, struct ip *);
 	ip->ip_v = 4;
 	ip->ip_hl = sizeof(*ip) >> 2;
 	ip->ip_tos = IPTOS_LOWDELAY;
 	ip->ip_len = htons(m->m_len);
 	ip->ip_off |= htons(IP_DF);
 	ip->ip_ttl = V_ip_defttl;
 	ip->ip_p = IPPROTO_TCP;
 	ip->ip_src.s_addr = htonl(src);
 	ip->ip_dst.s_addr = htonl(dst);
 
 	tcp = mtodo(m, sizeof(struct ip));
 	tcp->th_sport = htons(sport);
 	tcp->th_dport = htons(dport);
 	tcp->th_off = sizeof(struct tcphdr) >> 2;
 	tcp->th_seq = htonl(seq);
 	tcp->th_ack = htonl(ack);
 	tcp->th_flags = TH_ACK;
 	tcp->th_sum = in_pseudo(ip->ip_src.s_addr, ip->ip_dst.s_addr,
 	    htons(sizeof(struct tcphdr) + IPPROTO_TCP));
 
 	m->m_pkthdr.csum_data = offsetof(struct tcphdr, th_sum);
 	m->m_pkthdr.csum_flags = CSUM_TCP;
 }
 
 static void
 dyn_enqueue_keepalive_ipv4(struct mbufq *q, const struct dyn_ipv4_state *s)
 {
 	struct mbuf *m;
 
 	if ((s->data->state & ACK_FWD) == 0 && s->data->ack_fwd > 0) {
 		m = dyn_mgethdr(sizeof(struct ip) + sizeof(struct tcphdr),
 		    s->data->fibnum);
 		if (m != NULL) {
 			dyn_make_keepalive_ipv4(m, s->dst, s->src,
 			    s->data->ack_fwd - 1, s->data->ack_rev,
 			    s->dport, s->sport);
 			if (mbufq_enqueue(q, m)) {
 				m_freem(m);
 				log(LOG_DEBUG, "ipfw: limit for IPv4 "
 				    "keepalive queue is reached.\n");
 				return;
 			}
 		}
 	}
 
 	if ((s->data->state & ACK_REV) == 0 && s->data->ack_rev > 0) {
 		m = dyn_mgethdr(sizeof(struct ip) + sizeof(struct tcphdr),
 		    s->data->fibnum);
 		if (m != NULL) {
 			dyn_make_keepalive_ipv4(m, s->src, s->dst,
 			    s->data->ack_rev - 1, s->data->ack_fwd,
 			    s->sport, s->dport);
 			if (mbufq_enqueue(q, m)) {
 				m_freem(m);
 				log(LOG_DEBUG, "ipfw: limit for IPv4 "
 				    "keepalive queue is reached.\n");
 				return;
 			}
 		}
 	}
 }
 
 /*
  * Prepare and send keep-alive packets.
  */
 static void
 dyn_send_keepalive_ipv4(struct ip_fw_chain *chain)
 {
 	struct mbufq q;
 	struct mbuf *m;
 	struct dyn_ipv4_state *s;
 	uint32_t bucket;
 
 	mbufq_init(&q, INT_MAX);
 	IPFW_UH_RLOCK(chain);
 	/*
 	 * It is safe to not use hazard pointer and just do lockless
 	 * access to the lists, because states entries can not be deleted
 	 * while we hold IPFW_UH_RLOCK.
 	 */
 	for (bucket = 0; bucket < V_curr_dyn_buckets; bucket++) {
 		CK_SLIST_FOREACH(s, &V_dyn_ipv4[bucket], entry) {
 			/*
 			 * Only established TCP connections that will
 			 * become expired withing dyn_keepalive_interval.
 			 */
 			if (s->proto != IPPROTO_TCP ||
 			    (s->data->state & BOTH_SYN) != BOTH_SYN ||
 			    TIME_LEQ(time_uptime + V_dyn_keepalive_interval,
 				s->data->expire))
 				continue;
 			dyn_enqueue_keepalive_ipv4(&q, s);
 		}
 	}
 	IPFW_UH_RUNLOCK(chain);
 	while ((m = mbufq_dequeue(&q)) != NULL)
 		ip_output(m, NULL, NULL, 0, NULL, NULL);
 }
 
 #ifdef INET6
 static void
 dyn_make_keepalive_ipv6(struct mbuf *m, const struct in6_addr *src,
     const struct in6_addr *dst, uint32_t zoneid, uint32_t seq, uint32_t ack,
     uint16_t sport, uint16_t dport)
 {
 	struct tcphdr *tcp;
 	struct ip6_hdr *ip6;
 
 	ip6 = mtod(m, struct ip6_hdr *);
 	ip6->ip6_vfc |= IPV6_VERSION;
 	ip6->ip6_plen = htons(sizeof(struct tcphdr));
 	ip6->ip6_nxt = IPPROTO_TCP;
 	ip6->ip6_hlim = IPV6_DEFHLIM;
 	ip6->ip6_src = *src;
 	if (IN6_IS_ADDR_LINKLOCAL(src))
 		ip6->ip6_src.s6_addr16[1] = htons(zoneid & 0xffff);
 	ip6->ip6_dst = *dst;
 	if (IN6_IS_ADDR_LINKLOCAL(dst))
 		ip6->ip6_dst.s6_addr16[1] = htons(zoneid & 0xffff);
 
 	tcp = mtodo(m, sizeof(struct ip6_hdr));
 	tcp->th_sport = htons(sport);
 	tcp->th_dport = htons(dport);
 	tcp->th_off = sizeof(struct tcphdr) >> 2;
 	tcp->th_seq = htonl(seq);
 	tcp->th_ack = htonl(ack);
 	tcp->th_flags = TH_ACK;
 	tcp->th_sum = in6_cksum_pseudo(ip6, sizeof(struct tcphdr),
 	    IPPROTO_TCP, 0);
 
 	m->m_pkthdr.csum_data = offsetof(struct tcphdr, th_sum);
 	m->m_pkthdr.csum_flags = CSUM_TCP_IPV6;
 }
 
 static void
 dyn_enqueue_keepalive_ipv6(struct mbufq *q, const struct dyn_ipv6_state *s)
 {
 	struct mbuf *m;
 
 	if ((s->data->state & ACK_FWD) == 0 && s->data->ack_fwd > 0) {
 		m = dyn_mgethdr(sizeof(struct ip6_hdr) +
 		    sizeof(struct tcphdr), s->data->fibnum);
 		if (m != NULL) {
 			dyn_make_keepalive_ipv6(m, &s->dst, &s->src,
 			    s->zoneid, s->data->ack_fwd - 1, s->data->ack_rev,
 			    s->dport, s->sport);
 			if (mbufq_enqueue(q, m)) {
 				m_freem(m);
 				log(LOG_DEBUG, "ipfw: limit for IPv6 "
 				    "keepalive queue is reached.\n");
 				return;
 			}
 		}
 	}
 
 	if ((s->data->state & ACK_REV) == 0 && s->data->ack_rev > 0) {
 		m = dyn_mgethdr(sizeof(struct ip6_hdr) +
 		    sizeof(struct tcphdr), s->data->fibnum);
 		if (m != NULL) {
 			dyn_make_keepalive_ipv6(m, &s->src, &s->dst,
 			    s->zoneid, s->data->ack_rev - 1, s->data->ack_fwd,
 			    s->sport, s->dport);
 			if (mbufq_enqueue(q, m)) {
 				m_freem(m);
 				log(LOG_DEBUG, "ipfw: limit for IPv6 "
 				    "keepalive queue is reached.\n");
 				return;
 			}
 		}
 	}
 }
 
 static void
 dyn_send_keepalive_ipv6(struct ip_fw_chain *chain)
 {
 	struct mbufq q;
 	struct mbuf *m;
 	struct dyn_ipv6_state *s;
 	uint32_t bucket;
 
 	mbufq_init(&q, INT_MAX);
 	IPFW_UH_RLOCK(chain);
 	/*
 	 * It is safe to not use hazard pointer and just do lockless
 	 * access to the lists, because states entries can not be deleted
 	 * while we hold IPFW_UH_RLOCK.
 	 */
 	for (bucket = 0; bucket < V_curr_dyn_buckets; bucket++) {
 		CK_SLIST_FOREACH(s, &V_dyn_ipv6[bucket], entry) {
 			/*
 			 * Only established TCP connections that will
 			 * become expired withing dyn_keepalive_interval.
 			 */
 			if (s->proto != IPPROTO_TCP ||
 			    (s->data->state & BOTH_SYN) != BOTH_SYN ||
 			    TIME_LEQ(time_uptime + V_dyn_keepalive_interval,
 				s->data->expire))
 				continue;
 			dyn_enqueue_keepalive_ipv6(&q, s);
 		}
 	}
 	IPFW_UH_RUNLOCK(chain);
 	while ((m = mbufq_dequeue(&q)) != NULL)
 		ip6_output(m, NULL, NULL, 0, NULL, NULL, NULL);
 }
 #endif /* INET6 */
 
 static void
 dyn_grow_hashtable(struct ip_fw_chain *chain, uint32_t new)
 {
 #ifdef INET6
 	struct dyn_ipv6ck_slist *ipv6, *ipv6_parent;
 	uint32_t *ipv6_add, *ipv6_del, *ipv6_parent_add, *ipv6_parent_del;
 	struct dyn_ipv6_state *s6;
 #endif
 	struct dyn_ipv4ck_slist *ipv4, *ipv4_parent;
 	uint32_t *ipv4_add, *ipv4_del, *ipv4_parent_add, *ipv4_parent_del;
 	struct dyn_ipv4_state *s4;
 	struct mtx *bucket_lock;
 	void *tmp;
 	uint32_t bucket;
 
 	MPASS(powerof2(new));
 	DYN_DEBUG("grow hash size %u -> %u", V_curr_dyn_buckets, new);
 	/*
 	 * Allocate and initialize new lists.
 	 * XXXAE: on memory pressure this can disable callout timer.
 	 */
 	bucket_lock = malloc(new * sizeof(struct mtx), M_IPFW,
 	    M_WAITOK | M_ZERO);
 	ipv4 = malloc(new * sizeof(struct dyn_ipv4ck_slist), M_IPFW,
 	    M_WAITOK | M_ZERO);
 	ipv4_parent = malloc(new * sizeof(struct dyn_ipv4ck_slist), M_IPFW,
 	    M_WAITOK | M_ZERO);
 	ipv4_add = malloc(new * sizeof(uint32_t), M_IPFW, M_WAITOK | M_ZERO);
 	ipv4_del = malloc(new * sizeof(uint32_t), M_IPFW, M_WAITOK | M_ZERO);
 	ipv4_parent_add = malloc(new * sizeof(uint32_t), M_IPFW,
 	    M_WAITOK | M_ZERO);
 	ipv4_parent_del = malloc(new * sizeof(uint32_t), M_IPFW,
 	    M_WAITOK | M_ZERO);
 #ifdef INET6
 	ipv6 = malloc(new * sizeof(struct dyn_ipv6ck_slist), M_IPFW,
 	    M_WAITOK | M_ZERO);
 	ipv6_parent = malloc(new * sizeof(struct dyn_ipv6ck_slist), M_IPFW,
 	    M_WAITOK | M_ZERO);
 	ipv6_add = malloc(new * sizeof(uint32_t), M_IPFW, M_WAITOK | M_ZERO);
 	ipv6_del = malloc(new * sizeof(uint32_t), M_IPFW, M_WAITOK | M_ZERO);
 	ipv6_parent_add = malloc(new * sizeof(uint32_t), M_IPFW,
 	    M_WAITOK | M_ZERO);
 	ipv6_parent_del = malloc(new * sizeof(uint32_t), M_IPFW,
 	    M_WAITOK | M_ZERO);
 #endif
 	for (bucket = 0; bucket < new; bucket++) {
 		DYN_BUCKET_LOCK_INIT(bucket_lock, bucket);
 		CK_SLIST_INIT(&ipv4[bucket]);
 		CK_SLIST_INIT(&ipv4_parent[bucket]);
 #ifdef INET6
 		CK_SLIST_INIT(&ipv6[bucket]);
 		CK_SLIST_INIT(&ipv6_parent[bucket]);
 #endif
 	}
 
 #define DYN_RELINK_STATES(s, hval, i, head, ohead)	do {		\
 	while ((s = CK_SLIST_FIRST(&V_dyn_ ## ohead[i])) != NULL) {	\
 		CK_SLIST_REMOVE_HEAD(&V_dyn_ ## ohead[i], entry);	\
 		CK_SLIST_INSERT_HEAD(&head[DYN_BUCKET(s->hval, new)],	\
 		    s, entry);						\
 	}								\
 } while (0)
 	/*
 	 * Prevent rules changing from userland.
 	 */
 	IPFW_UH_WLOCK(chain);
 	/*
 	 * Hold traffic processing until we finish resize to
 	 * prevent access to states lists.
 	 */
 	IPFW_WLOCK(chain);
 	/* Re-link all dynamic states */
 	for (bucket = 0; bucket < V_curr_dyn_buckets; bucket++) {
 		DYN_RELINK_STATES(s4, data->hashval, bucket, ipv4, ipv4);
 		DYN_RELINK_STATES(s4, limit->hashval, bucket, ipv4_parent,
 		    ipv4_parent);
 #ifdef INET6
 		DYN_RELINK_STATES(s6, data->hashval, bucket, ipv6, ipv6);
 		DYN_RELINK_STATES(s6, limit->hashval, bucket, ipv6_parent,
 		    ipv6_parent);
 #endif
 	}
 
 #define	DYN_SWAP_PTR(old, new, tmp)	do {		\
 	tmp = old;					\
 	old = new;					\
 	new = tmp;					\
 } while (0)
 	/* Swap pointers */
 	DYN_SWAP_PTR(V_dyn_bucket_lock, bucket_lock, tmp);
 	DYN_SWAP_PTR(V_dyn_ipv4, ipv4, tmp);
 	DYN_SWAP_PTR(V_dyn_ipv4_parent, ipv4_parent, tmp);
 	DYN_SWAP_PTR(V_dyn_ipv4_add, ipv4_add, tmp);
 	DYN_SWAP_PTR(V_dyn_ipv4_parent_add, ipv4_parent_add, tmp);
 	DYN_SWAP_PTR(V_dyn_ipv4_del, ipv4_del, tmp);
 	DYN_SWAP_PTR(V_dyn_ipv4_parent_del, ipv4_parent_del, tmp);
 
 #ifdef INET6
 	DYN_SWAP_PTR(V_dyn_ipv6, ipv6, tmp);
 	DYN_SWAP_PTR(V_dyn_ipv6_parent, ipv6_parent, tmp);
 	DYN_SWAP_PTR(V_dyn_ipv6_add, ipv6_add, tmp);
 	DYN_SWAP_PTR(V_dyn_ipv6_parent_add, ipv6_parent_add, tmp);
 	DYN_SWAP_PTR(V_dyn_ipv6_del, ipv6_del, tmp);
 	DYN_SWAP_PTR(V_dyn_ipv6_parent_del, ipv6_parent_del, tmp);
 #endif
 	bucket = V_curr_dyn_buckets;
 	V_curr_dyn_buckets = new;
 
 	IPFW_WUNLOCK(chain);
 	IPFW_UH_WUNLOCK(chain);
 
 	/* Release old resources */
 	while (bucket-- != 0)
 		DYN_BUCKET_LOCK_DESTROY(bucket_lock, bucket);
 	free(bucket_lock, M_IPFW);
 	free(ipv4, M_IPFW);
 	free(ipv4_parent, M_IPFW);
 	free(ipv4_add, M_IPFW);
 	free(ipv4_parent_add, M_IPFW);
 	free(ipv4_del, M_IPFW);
 	free(ipv4_parent_del, M_IPFW);
 #ifdef INET6
 	free(ipv6, M_IPFW);
 	free(ipv6_parent, M_IPFW);
 	free(ipv6_add, M_IPFW);
 	free(ipv6_parent_add, M_IPFW);
 	free(ipv6_del, M_IPFW);
 	free(ipv6_parent_del, M_IPFW);
 #endif
 }
 
 /*
  * This function is used to perform various maintenance
  * on dynamic hash lists. Currently it is called every second.
  */
 static void
 dyn_tick(void *vnetx)
 {
 	uint32_t buckets;
 
 	CURVNET_SET((struct vnet *)vnetx);
 	/*
 	 * First free states unlinked in previous passes.
 	 */
 	dyn_free_states(&V_layer3_chain);
 	/*
 	 * Now unlink others expired states.
 	 * We use IPFW_UH_WLOCK to avoid concurrent call of
 	 * dyn_expire_states(). It is the only function that does
 	 * deletion of state entries from states lists.
 	 */
 	IPFW_UH_WLOCK(&V_layer3_chain);
 	dyn_expire_states(&V_layer3_chain, NULL);
 	IPFW_UH_WUNLOCK(&V_layer3_chain);
 	/*
 	 * Send keepalives if they are enabled and the time has come.
 	 */
 	if (V_dyn_keepalive != 0 &&
 	    V_dyn_keepalive_last + V_dyn_keepalive_period <= time_uptime) {
 		V_dyn_keepalive_last = time_uptime;
 		dyn_send_keepalive_ipv4(&V_layer3_chain);
 #ifdef INET6
 		dyn_send_keepalive_ipv6(&V_layer3_chain);
 #endif
 	}
 	/*
 	 * Check if we need to resize the hash:
 	 * if current number of states exceeds number of buckets in hash,
 	 * and dyn_buckets_max permits to grow the number of buckets, then
 	 * do it. Grow hash size to the minimum power of 2 which is bigger
 	 * than current states count.
 	 */
 	if (V_curr_dyn_buckets < V_dyn_buckets_max &&
 	    (V_curr_dyn_buckets < V_dyn_count / 2 || (
 	    V_curr_dyn_buckets < V_dyn_count && V_curr_max_length > 8))) {
 		buckets = 1 << fls(V_dyn_count);
 		if (buckets > V_dyn_buckets_max)
 			buckets = V_dyn_buckets_max;
 		dyn_grow_hashtable(&V_layer3_chain, buckets);
 	}
 
 	callout_reset_on(&V_dyn_timeout, hz, dyn_tick, vnetx, 0);
 	CURVNET_RESTORE();
 }
 
 void
 ipfw_expire_dyn_states(struct ip_fw_chain *chain, ipfw_range_tlv *rt)
 {
 	/*
 	 * Do not perform any checks if we currently have no dynamic states
 	 */
 	if (V_dyn_count == 0)
 		return;
 
 	IPFW_UH_WLOCK_ASSERT(chain);
 	dyn_expire_states(chain, rt);
 }
 
 /*
  * Pass through all states and reset eaction for orphaned rules.
  */
 void
 ipfw_dyn_reset_eaction(struct ip_fw_chain *ch, uint16_t eaction_id,
     uint16_t default_id, uint16_t instance_id)
 {
 #ifdef INET6
 	struct dyn_ipv6_state *s6;
 #endif
 	struct dyn_ipv4_state *s4;
 	struct ip_fw *rule;
 	uint32_t bucket;
 
 #define	DYN_RESET_EACTION(s, h, b)					\
 	CK_SLIST_FOREACH(s, &V_dyn_ ## h[b], entry) {			\
 		if ((s->data->flags & DYN_REFERENCED) == 0)		\
 			continue;					\
 		rule = s->data->parent;					\
 		if (s->type == O_LIMIT)					\
 			rule = ((__typeof(s))rule)->limit->parent;	\
 		ipfw_reset_eaction(ch, rule, eaction_id,		\
 		    default_id, instance_id);				\
 	}
 
 	IPFW_UH_WLOCK_ASSERT(ch);
 	if (V_dyn_count == 0)
 		return;
 	for (bucket = 0; bucket < V_curr_dyn_buckets; bucket++) {
 		DYN_RESET_EACTION(s4, ipv4, bucket);
 #ifdef INET6
 		DYN_RESET_EACTION(s6, ipv6, bucket);
 #endif
 	}
 }
 
 /*
  * Returns size of dynamic states in legacy format
  */
 int
 ipfw_dyn_len(void)
 {
 
 	return ((V_dyn_count + V_dyn_parent_count) * sizeof(ipfw_dyn_rule));
 }
 
 /*
  * Returns number of dynamic states.
  * Marks every named object index used by dynamic states with bit in @bmask.
  * Returns number of named objects accounted in bmask via @nocnt.
  * Used by dump format v1 (current).
  */
 uint32_t
 ipfw_dyn_get_count(uint32_t *bmask, int *nocnt)
 {
 #ifdef INET6
 	struct dyn_ipv6_state *s6;
 #endif
 	struct dyn_ipv4_state *s4;
 	uint32_t bucket;
 
 #define	DYN_COUNT_OBJECTS(s, h, b)					\
 	CK_SLIST_FOREACH(s, &V_dyn_ ## h[b], entry) {			\
 		MPASS(s->kidx != 0);					\
 		if (ipfw_mark_object_kidx(bmask, IPFW_TLV_STATE_NAME,	\
 		    s->kidx) != 0)					\
 			(*nocnt)++;					\
 	}
 
 	IPFW_UH_RLOCK_ASSERT(&V_layer3_chain);
 
 	/* No need to pass through all the buckets. */
 	*nocnt = 0;
 	if (V_dyn_count + V_dyn_parent_count == 0)
 		return (0);
 
 	for (bucket = 0; bucket < V_curr_dyn_buckets; bucket++) {
 		DYN_COUNT_OBJECTS(s4, ipv4, bucket);
 #ifdef INET6
 		DYN_COUNT_OBJECTS(s6, ipv6, bucket);
 #endif
 	}
 
 	return (V_dyn_count + V_dyn_parent_count);
 }
 
 /*
  * Check if rule contains at least one dynamic opcode.
  *
  * Returns 1 if such opcode is found, 0 otherwise.
  */
 int
 ipfw_is_dyn_rule(struct ip_fw *rule)
 {
 	int cmdlen, l;
 	ipfw_insn *cmd;
 
 	l = rule->cmd_len;
 	cmd = rule->cmd;
 	cmdlen = 0;
 	for ( ;	l > 0 ; l -= cmdlen, cmd += cmdlen) {
 		cmdlen = F_LEN(cmd);
 
 		switch (cmd->opcode) {
 		case O_LIMIT:
 		case O_KEEP_STATE:
 		case O_PROBE_STATE:
 		case O_CHECK_STATE:
 			return (1);
 		}
 	}
 
 	return (0);
 }
 
 static void
 dyn_export_parent(const struct dyn_parent *p, uint16_t kidx,
     ipfw_dyn_rule *dst)
 {
 
 	dst->dyn_type = O_LIMIT_PARENT;
 	dst->kidx = kidx;
 	dst->count = (uint16_t)DPARENT_COUNT(p);
 	dst->expire = TIME_LEQ(p->expire, time_uptime) ?  0:
 	    p->expire - time_uptime;
 
 	/* 'rule' is used to pass up the rule number and set */
 	memcpy(&dst->rule, &p->rulenum, sizeof(p->rulenum));
 	/* store set number into high word of dst->rule pointer. */
 	memcpy((char *)&dst->rule + sizeof(p->rulenum), &p->set,
 	    sizeof(p->set));
 
 	/* unused fields */
 	dst->pcnt = 0;
 	dst->bcnt = 0;
 	dst->parent = NULL;
 	dst->state = 0;
 	dst->ack_fwd = 0;
 	dst->ack_rev = 0;
 	dst->bucket = p->hashval;
 	/*
 	 * The legacy userland code will interpret a NULL here as a marker
 	 * for the last dynamic rule.
 	 */
 	dst->next = (ipfw_dyn_rule *)1;
 }
 
 static void
 dyn_export_data(const struct dyn_data *data, uint16_t kidx, uint8_t type,
     ipfw_dyn_rule *dst)
 {
 
 	dst->dyn_type = type;
 	dst->kidx = kidx;
 	dst->pcnt = data->pcnt_fwd + data->pcnt_rev;
 	dst->bcnt = data->bcnt_fwd + data->bcnt_rev;
 	dst->expire = TIME_LEQ(data->expire, time_uptime) ?  0:
 	    data->expire - time_uptime;
 
 	/* 'rule' is used to pass up the rule number and set */
 	memcpy(&dst->rule, &data->rulenum, sizeof(data->rulenum));
 	/* store set number into high word of dst->rule pointer. */
 	memcpy((char *)&dst->rule + sizeof(data->rulenum), &data->set,
 	    sizeof(data->set));
 
 	dst->state = data->state;
 	if (data->flags & DYN_REFERENCED)
 		dst->state |= IPFW_DYN_ORPHANED;
 
 	/* unused fields */
 	dst->parent = NULL;
 	dst->ack_fwd = data->ack_fwd;
 	dst->ack_rev = data->ack_rev;
 	dst->count = 0;
 	dst->bucket = data->hashval;
 	/*
 	 * The legacy userland code will interpret a NULL here as a marker
 	 * for the last dynamic rule.
 	 */
 	dst->next = (ipfw_dyn_rule *)1;
 }
 
 static void
 dyn_export_ipv4_state(const struct dyn_ipv4_state *s, ipfw_dyn_rule *dst)
 {
 
 	switch (s->type) {
 	case O_LIMIT_PARENT:
 		dyn_export_parent(s->limit, s->kidx, dst);
 		break;
 	default:
 		dyn_export_data(s->data, s->kidx, s->type, dst);
 	}
 
 	dst->id.dst_ip = s->dst;
 	dst->id.src_ip = s->src;
 	dst->id.dst_port = s->dport;
 	dst->id.src_port = s->sport;
 	dst->id.fib = s->data->fibnum;
 	dst->id.proto = s->proto;
 	dst->id._flags = 0;
 	dst->id.addr_type = 4;
 
 	memset(&dst->id.dst_ip6, 0, sizeof(dst->id.dst_ip6));
 	memset(&dst->id.src_ip6, 0, sizeof(dst->id.src_ip6));
 	dst->id.flow_id6 = dst->id.extra = 0;
 }
 
 #ifdef INET6
 static void
 dyn_export_ipv6_state(const struct dyn_ipv6_state *s, ipfw_dyn_rule *dst)
 {
 
 	switch (s->type) {
 	case O_LIMIT_PARENT:
 		dyn_export_parent(s->limit, s->kidx, dst);
 		break;
 	default:
 		dyn_export_data(s->data, s->kidx, s->type, dst);
 	}
 
 	dst->id.src_ip6 = s->src;
 	dst->id.dst_ip6 = s->dst;
 	dst->id.dst_port = s->dport;
 	dst->id.src_port = s->sport;
 	dst->id.fib = s->data->fibnum;
 	dst->id.proto = s->proto;
 	dst->id._flags = 0;
 	dst->id.addr_type = 6;
 
 	dst->id.dst_ip = dst->id.src_ip = 0;
 	dst->id.flow_id6 = dst->id.extra = 0;
 }
 #endif /* INET6 */
 
 /*
  * Fills the buffer given by @sd with dynamic states.
  * Used by dump format v1 (current).
  *
  * Returns 0 on success.
  */
 int
 ipfw_dump_states(struct ip_fw_chain *chain, struct sockopt_data *sd)
 {
 #ifdef INET6
 	struct dyn_ipv6_state *s6;
 #endif
 	struct dyn_ipv4_state *s4;
 	ipfw_obj_dyntlv *dst, *last;
 	ipfw_obj_ctlv *ctlv;
 	uint32_t bucket;
 
 	if (V_dyn_count == 0)
 		return (0);
 
 	/*
 	 * IPFW_UH_RLOCK garantees that another userland request
 	 * and callout thread will not delete entries from states
 	 * lists.
 	 */
 	IPFW_UH_RLOCK_ASSERT(chain);
 
 	ctlv = (ipfw_obj_ctlv *)ipfw_get_sopt_space(sd, sizeof(*ctlv));
 	if (ctlv == NULL)
 		return (ENOMEM);
 	ctlv->head.type = IPFW_TLV_DYNSTATE_LIST;
 	ctlv->objsize = sizeof(ipfw_obj_dyntlv);
 	last = NULL;
 
 #define	DYN_EXPORT_STATES(s, af, h, b)				\
 	CK_SLIST_FOREACH(s, &V_dyn_ ## h[b], entry) {			\
 		dst = (ipfw_obj_dyntlv *)ipfw_get_sopt_space(sd,	\
 		    sizeof(ipfw_obj_dyntlv));				\
 		if (dst == NULL)					\
 			return (ENOMEM);				\
 		dyn_export_ ## af ## _state(s, &dst->state);		\
 		dst->head.length = sizeof(ipfw_obj_dyntlv);		\
 		dst->head.type = IPFW_TLV_DYN_ENT;			\
 		last = dst;						\
 	}
 
 	for (bucket = 0; bucket < V_curr_dyn_buckets; bucket++) {
 		DYN_EXPORT_STATES(s4, ipv4, ipv4_parent, bucket);
 		DYN_EXPORT_STATES(s4, ipv4, ipv4, bucket);
 #ifdef INET6
 		DYN_EXPORT_STATES(s6, ipv6, ipv6_parent, bucket);
 		DYN_EXPORT_STATES(s6, ipv6, ipv6, bucket);
 #endif /* INET6 */
 	}
 
 	/* mark last dynamic rule */
 	if (last != NULL)
 		last->head.flags = IPFW_DF_LAST; /* XXX: unused */
 	return (0);
 #undef DYN_EXPORT_STATES
 }
 
 /*
  * Fill given buffer with dynamic states (legacy format).
  * IPFW_UH_RLOCK has to be held while calling.
  */
 void
 ipfw_get_dynamic(struct ip_fw_chain *chain, char **pbp, const char *ep)
 {
 #ifdef INET6
 	struct dyn_ipv6_state *s6;
 #endif
 	struct dyn_ipv4_state *s4;
 	ipfw_dyn_rule *p, *last = NULL;
 	char *bp;
 	uint32_t bucket;
 
 	if (V_dyn_count == 0)
 		return;
 	bp = *pbp;
 
 	IPFW_UH_RLOCK_ASSERT(chain);
 
 #define	DYN_EXPORT_STATES(s, af, head, b)				\
 	CK_SLIST_FOREACH(s, &V_dyn_ ## head[b], entry) {		\
 		if (bp + sizeof(*p) > ep)				\
 			break;						\
 		p = (ipfw_dyn_rule *)bp;				\
 		dyn_export_ ## af ## _state(s, p);			\
 		last = p;						\
 		bp += sizeof(*p);					\
 	}
 
 	for (bucket = 0; bucket < V_curr_dyn_buckets; bucket++) {
 		DYN_EXPORT_STATES(s4, ipv4, ipv4_parent, bucket);
 		DYN_EXPORT_STATES(s4, ipv4, ipv4, bucket);
 #ifdef INET6
 		DYN_EXPORT_STATES(s6, ipv6, ipv6_parent, bucket);
 		DYN_EXPORT_STATES(s6, ipv6, ipv6, bucket);
 #endif /* INET6 */
 	}
 
 	if (last != NULL) /* mark last dynamic rule */
 		last->next = NULL;
 	*pbp = bp;
 #undef DYN_EXPORT_STATES
 }
 
 void
 ipfw_dyn_init(struct ip_fw_chain *chain)
 {
 
 #ifdef IPFIREWALL_JENKINSHASH
 	V_dyn_hashseed = arc4random();
 #endif
 	V_dyn_max = 16384;		/* max # of states */
 	V_dyn_parent_max = 4096;	/* max # of parent states */
 	V_dyn_buckets_max = 8192;	/* must be power of 2 */
 
 	V_dyn_ack_lifetime = 300;
 	V_dyn_syn_lifetime = 20;
 	V_dyn_fin_lifetime = 1;
 	V_dyn_rst_lifetime = 1;
 	V_dyn_udp_lifetime = 10;
 	V_dyn_short_lifetime = 5;
 
 	V_dyn_keepalive_interval = 20;
 	V_dyn_keepalive_period = 5;
 	V_dyn_keepalive = 1;		/* send keepalives */
 	V_dyn_keepalive_last = time_uptime;
 
 	V_dyn_data_zone = uma_zcreate("IPFW dynamic states data",
 	    sizeof(struct dyn_data), NULL, NULL, NULL, NULL,
 	    UMA_ALIGN_PTR, 0);
 	uma_zone_set_max(V_dyn_data_zone, V_dyn_max);
 
 	V_dyn_parent_zone = uma_zcreate("IPFW parent dynamic states",
 	    sizeof(struct dyn_parent), NULL, NULL, NULL, NULL,
 	    UMA_ALIGN_PTR, 0);
 	uma_zone_set_max(V_dyn_parent_zone, V_dyn_parent_max);
 
 	SLIST_INIT(&V_dyn_expired_ipv4);
 	V_dyn_ipv4 = NULL;
 	V_dyn_ipv4_parent = NULL;
 	V_dyn_ipv4_zone = uma_zcreate("IPFW IPv4 dynamic states",
 	    sizeof(struct dyn_ipv4_state), NULL, NULL, NULL, NULL,
 	    UMA_ALIGN_PTR, 0);
 
 #ifdef INET6
 	SLIST_INIT(&V_dyn_expired_ipv6);
 	V_dyn_ipv6 = NULL;
 	V_dyn_ipv6_parent = NULL;
 	V_dyn_ipv6_zone = uma_zcreate("IPFW IPv6 dynamic states",
 	    sizeof(struct dyn_ipv6_state), NULL, NULL, NULL, NULL,
 	    UMA_ALIGN_PTR, 0);
 #endif
 
 	/* Initialize buckets. */
 	V_curr_dyn_buckets = 0;
 	V_dyn_bucket_lock = NULL;
 	dyn_grow_hashtable(chain, 256);
 
 	if (IS_DEFAULT_VNET(curvnet))
 		dyn_hp_cache = malloc(mp_ncpus * sizeof(void *), M_IPFW,
 		    M_WAITOK | M_ZERO);
 
 	DYN_EXPIRED_LOCK_INIT();
 	callout_init(&V_dyn_timeout, 1);
 	callout_reset(&V_dyn_timeout, hz, dyn_tick, curvnet);
 	IPFW_ADD_OBJ_REWRITER(IS_DEFAULT_VNET(curvnet), dyn_opcodes);
 }
 
 void
 ipfw_dyn_uninit(int pass)
 {
 #ifdef INET6
 	struct dyn_ipv6_state *s6;
 #endif
 	struct dyn_ipv4_state *s4;
 	int bucket;
 
 	if (pass == 0) {
 		callout_drain(&V_dyn_timeout);
 		return;
 	}
 	IPFW_DEL_OBJ_REWRITER(IS_DEFAULT_VNET(curvnet), dyn_opcodes);
 	DYN_EXPIRED_LOCK_DESTROY();
 
 #define	DYN_FREE_STATES_FORCED(CK, s, af, name, en)	do {		\
 	while ((s = CK ## SLIST_FIRST(&V_dyn_ ## name)) != NULL) {	\
 		CK ## SLIST_REMOVE_HEAD(&V_dyn_ ## name, en);	\
 		if (s->type == O_LIMIT_PARENT)				\
 			uma_zfree(V_dyn_parent_zone, s->limit);		\
 		else							\
 			uma_zfree(V_dyn_data_zone, s->data);		\
 		uma_zfree(V_dyn_ ## af ## _zone, s);			\
 	}								\
 } while (0)
 	for (bucket = 0; bucket < V_curr_dyn_buckets; bucket++) {
 		DYN_BUCKET_LOCK_DESTROY(V_dyn_bucket_lock, bucket);
 
 		DYN_FREE_STATES_FORCED(CK_, s4, ipv4, ipv4[bucket], entry);
 		DYN_FREE_STATES_FORCED(CK_, s4, ipv4, ipv4_parent[bucket],
 		    entry);
 #ifdef INET6
 		DYN_FREE_STATES_FORCED(CK_, s6, ipv6, ipv6[bucket], entry);
 		DYN_FREE_STATES_FORCED(CK_, s6, ipv6, ipv6_parent[bucket],
 		    entry);
 #endif /* INET6 */
 	}
 	DYN_FREE_STATES_FORCED(, s4, ipv4, expired_ipv4, expired);
 #ifdef INET6
 	DYN_FREE_STATES_FORCED(, s6, ipv6, expired_ipv6, expired);
 #endif
 #undef DYN_FREE_STATES_FORCED
 
 	uma_zdestroy(V_dyn_ipv4_zone);
 	uma_zdestroy(V_dyn_data_zone);
 	uma_zdestroy(V_dyn_parent_zone);
 #ifdef INET6
 	uma_zdestroy(V_dyn_ipv6_zone);
 	free(V_dyn_ipv6, M_IPFW);
 	free(V_dyn_ipv6_parent, M_IPFW);
 	free(V_dyn_ipv6_add, M_IPFW);
 	free(V_dyn_ipv6_parent_add, M_IPFW);
 	free(V_dyn_ipv6_del, M_IPFW);
 	free(V_dyn_ipv6_parent_del, M_IPFW);
 #endif
 	free(V_dyn_bucket_lock, M_IPFW);
 	free(V_dyn_ipv4, M_IPFW);
 	free(V_dyn_ipv4_parent, M_IPFW);
 	free(V_dyn_ipv4_add, M_IPFW);
 	free(V_dyn_ipv4_parent_add, M_IPFW);
 	free(V_dyn_ipv4_del, M_IPFW);
 	free(V_dyn_ipv4_parent_del, M_IPFW);
 	if (IS_DEFAULT_VNET(curvnet))
 		free(dyn_hp_cache, M_IPFW);
 }
 
 
Index: head/sys/netpfil/ipfw/ip_fw_iface.c
===================================================================
--- head/sys/netpfil/ipfw/ip_fw_iface.c	(revision 343618)
+++ head/sys/netpfil/ipfw/ip_fw_iface.c	(revision 343619)
@@ -1,540 +1,539 @@
 /*-
  * Copyright (c) 2014 Yandex LLC.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 /*
  * Kernel interface tracking API.
  *
  */
 
 #include "opt_ipfw.h"
 #include "opt_inet.h"
 #ifndef INET
 #error IPFIREWALL requires INET.
 #endif /* INET */
 #include "opt_inet6.h"
 
 #include <sys/param.h>
 #include <sys/systm.h>
 #include <sys/malloc.h>
 #include <sys/kernel.h>
 #include <sys/lock.h>
 #include <sys/rwlock.h>
 #include <sys/rmlock.h>
 #include <sys/socket.h>
 #include <sys/queue.h>
 #include <sys/eventhandler.h>
 #include <net/if.h>
 #include <net/if_var.h>
-#include <net/pfil.h>
 #include <net/vnet.h>
 
 #include <netinet/in.h>
 #include <netinet/ip_var.h>	/* struct ipfw_rule_ref */
 #include <netinet/ip_fw.h>
 
 #include <netpfil/ipfw/ip_fw_private.h>
 
 #define	CHAIN_TO_II(ch)		((struct namedobj_instance *)ch->ifcfg)
 
 #define	DEFAULT_IFACES	128
 
 static void handle_ifdetach(struct ip_fw_chain *ch, struct ipfw_iface *iif,
     uint16_t ifindex);
 static void handle_ifattach(struct ip_fw_chain *ch, struct ipfw_iface *iif,
     uint16_t ifindex);
 static int list_ifaces(struct ip_fw_chain *ch, ip_fw3_opheader *op3,
     struct sockopt_data *sd);
 
 static struct ipfw_sopt_handler	scodes[] = {
 	{ IP_FW_XIFLIST,	0,	HDIR_GET,	list_ifaces },
 };
 
 /*
  * FreeBSD Kernel interface.
  */
 static void ipfw_kifhandler(void *arg, struct ifnet *ifp);
 static int ipfw_kiflookup(char *name);
 static void iface_khandler_register(void);
 static void iface_khandler_deregister(void);
 
 static eventhandler_tag ipfw_ifdetach_event, ipfw_ifattach_event;
 static int num_vnets = 0;
 static struct mtx vnet_mtx;
 
 /*
  * Checks if kernel interface is contained in our tracked
  * interface list and calls attach/detach handler.
  */
 static void
 ipfw_kifhandler(void *arg, struct ifnet *ifp)
 {
 	struct ip_fw_chain *ch;
 	struct ipfw_iface *iif;
 	struct namedobj_instance *ii;
 	uintptr_t htype;
 
 	if (V_ipfw_vnet_ready == 0)
 		return;
 
 	ch = &V_layer3_chain;
 	htype = (uintptr_t)arg;
 
 	IPFW_UH_WLOCK(ch);
 	ii = CHAIN_TO_II(ch);
 	if (ii == NULL) {
 		IPFW_UH_WUNLOCK(ch);
 		return;
 	}
 	iif = (struct ipfw_iface*)ipfw_objhash_lookup_name(ii, 0,
 	    if_name(ifp));
 	if (iif != NULL) {
 		if (htype == 1)
 			handle_ifattach(ch, iif, ifp->if_index);
 		else
 			handle_ifdetach(ch, iif, ifp->if_index);
 	}
 	IPFW_UH_WUNLOCK(ch);
 }
 
 /*
  * Reference current VNET as iface tracking API user.
  * Registers interface tracking handlers for first VNET.
  */
 static void
 iface_khandler_register()
 {
 	int create;
 
 	create = 0;
 
 	mtx_lock(&vnet_mtx);
 	if (num_vnets == 0)
 		create = 1;
 	num_vnets++;
 	mtx_unlock(&vnet_mtx);
 
 	if (create == 0)
 		return;
 
 	printf("IPFW: starting up interface tracker\n");
 
 	ipfw_ifdetach_event = EVENTHANDLER_REGISTER(
 	    ifnet_departure_event, ipfw_kifhandler, NULL,
 	    EVENTHANDLER_PRI_ANY);
 	ipfw_ifattach_event = EVENTHANDLER_REGISTER(
 	    ifnet_arrival_event, ipfw_kifhandler, (void*)((uintptr_t)1),
 	    EVENTHANDLER_PRI_ANY);
 }
 
 /*
  *
  * Detach interface event handlers on last VNET instance
  * detach.
  */
 static void
 iface_khandler_deregister()
 {
 	int destroy;
 
 	destroy = 0;
 	mtx_lock(&vnet_mtx);
 	if (num_vnets == 1)
 		destroy = 1;
 	num_vnets--;
 	mtx_unlock(&vnet_mtx);
 
 	if (destroy == 0)
 		return;
 
 	EVENTHANDLER_DEREGISTER(ifnet_arrival_event,
 	    ipfw_ifattach_event);
 	EVENTHANDLER_DEREGISTER(ifnet_departure_event,
 	    ipfw_ifdetach_event);
 }
 
 /*
  * Retrieves ifindex for given @name.
  *
  * Returns ifindex or 0.
  */
 static int
 ipfw_kiflookup(char *name)
 {
 	struct ifnet *ifp;
 	int ifindex;
 
 	ifindex = 0;
 
 	if ((ifp = ifunit_ref(name)) != NULL) {
 		ifindex = ifp->if_index;
 		if_rele(ifp);
 	}
 
 	return (ifindex);
 }
 
 /*
  * Global ipfw startup hook.
  * Since we perform lazy initialization, do nothing except
  * mutex init.
  */
 int
 ipfw_iface_init()
 {
 
 	mtx_init(&vnet_mtx, "IPFW ifhandler mtx", NULL, MTX_DEF);
 	IPFW_ADD_SOPT_HANDLER(1, scodes);
 	return (0);
 }
 
 /*
  * Global ipfw destroy hook.
  * Unregister khandlers iff init has been done.
  */
 void
 ipfw_iface_destroy()
 {
 
 	IPFW_DEL_SOPT_HANDLER(1, scodes);
 	mtx_destroy(&vnet_mtx);
 }
 
 /*
  * Perform actual init on internal request.
  * Inits both namehash and global khandler.
  */
 static void
 vnet_ipfw_iface_init(struct ip_fw_chain *ch)
 {
 	struct namedobj_instance *ii;
 
 	ii = ipfw_objhash_create(DEFAULT_IFACES);
 	IPFW_UH_WLOCK(ch);
 	if (ch->ifcfg == NULL) {
 		ch->ifcfg = ii;
 		ii = NULL;
 	}
 	IPFW_UH_WUNLOCK(ch);
 
 	if (ii != NULL) {
 		/* Already initialized. Free namehash. */
 		ipfw_objhash_destroy(ii);
 	} else {
 		/* We're the first ones. Init kernel hooks. */
 		iface_khandler_register();
 	}
 }
 
 static int
 destroy_iface(struct namedobj_instance *ii, struct named_object *no,
     void *arg)
 {
 
 	/* Assume all consumers have been already detached */
 	free(no, M_IPFW);
 	return (0);
 }
 
 /*
  * Per-VNET ipfw detach hook.
  *
  */
 void
 vnet_ipfw_iface_destroy(struct ip_fw_chain *ch)
 {
 	struct namedobj_instance *ii;
 
 	IPFW_UH_WLOCK(ch);
 	ii = CHAIN_TO_II(ch);
 	ch->ifcfg = NULL;
 	IPFW_UH_WUNLOCK(ch);
 
 	if (ii != NULL) {
 		ipfw_objhash_foreach(ii, destroy_iface, ch);
 		ipfw_objhash_destroy(ii);
 		iface_khandler_deregister();
 	}
 }
 
 /*
  * Notify the subsystem that we are interested in tracking
  * interface @name. This function has to be called without
  * holding any locks to permit allocating the necessary states
  * for proper interface tracking.
  *
  * Returns 0 on success.
  */
 int
 ipfw_iface_ref(struct ip_fw_chain *ch, char *name,
     struct ipfw_ifc *ic)
 {
 	struct namedobj_instance *ii;
 	struct ipfw_iface *iif, *tmp;
 
 	if (strlen(name) >= sizeof(iif->ifname))
 		return (EINVAL);
 
 	IPFW_UH_WLOCK(ch);
 
 	ii = CHAIN_TO_II(ch);
 	if (ii == NULL) {
 
 		/*
 		 * First request to subsystem.
 		 * Let's perform init.
 		 */
 		IPFW_UH_WUNLOCK(ch);
 		vnet_ipfw_iface_init(ch);
 		IPFW_UH_WLOCK(ch);
 		ii = CHAIN_TO_II(ch);
 	}
 
 	iif = (struct ipfw_iface *)ipfw_objhash_lookup_name(ii, 0, name);
 
 	if (iif != NULL) {
 		iif->no.refcnt++;
 		ic->iface = iif;
 		IPFW_UH_WUNLOCK(ch);
 		return (0);
 	}
 
 	IPFW_UH_WUNLOCK(ch);
 
 	/* Not found. Let's create one */
 	iif = malloc(sizeof(struct ipfw_iface), M_IPFW, M_WAITOK | M_ZERO);
 	TAILQ_INIT(&iif->consumers);
 	iif->no.name = iif->ifname;
 	strlcpy(iif->ifname, name, sizeof(iif->ifname));
 
 	/*
 	 * Ref & link to the list.
 	 *
 	 * We assume  ifnet_arrival_event / ifnet_departure_event
 	 * are not holding any locks.
 	 */
 	iif->no.refcnt = 1;
 	IPFW_UH_WLOCK(ch);
 
 	tmp = (struct ipfw_iface *)ipfw_objhash_lookup_name(ii, 0, name);
 	if (tmp != NULL) {
 		/* Interface has been created since unlock. Ref and return */
 		tmp->no.refcnt++;
 		ic->iface = tmp;
 		IPFW_UH_WUNLOCK(ch);
 		free(iif, M_IPFW);
 		return (0);
 	}
 
 	iif->ifindex = ipfw_kiflookup(name);
 	if (iif->ifindex != 0)
 		iif->resolved = 1;
 
 	ipfw_objhash_add(ii, &iif->no);
 	ic->iface = iif;
 
 	IPFW_UH_WUNLOCK(ch);
 
 	return (0);
 }
 
 /*
  * Adds @ic to the list of iif interface consumers.
  * Must be called with holding both UH+WLOCK.
  * Callback may be immediately called (if interface exists).
  */
 void
 ipfw_iface_add_notify(struct ip_fw_chain *ch, struct ipfw_ifc *ic)
 {
 	struct ipfw_iface *iif;
 
 	IPFW_UH_WLOCK_ASSERT(ch);
 	IPFW_WLOCK_ASSERT(ch);
 
 	iif = ic->iface;
 	
 	TAILQ_INSERT_TAIL(&iif->consumers, ic, next);
 	if (iif->resolved != 0)
 		ic->cb(ch, ic->cbdata, iif->ifindex);
 }
 
 /*
  * Unlinks interface tracker object @ic from interface.
  * Must be called while holding UH lock.
  */
 void
 ipfw_iface_del_notify(struct ip_fw_chain *ch, struct ipfw_ifc *ic)
 {
 	struct ipfw_iface *iif;
 
 	IPFW_UH_WLOCK_ASSERT(ch);
 
 	iif = ic->iface;
 	TAILQ_REMOVE(&iif->consumers, ic, next);
 }
 
 /*
  * Unreference interface specified by @ic.
  * Must be called while holding UH lock.
  */
 void
 ipfw_iface_unref(struct ip_fw_chain *ch, struct ipfw_ifc *ic)
 {
 	struct ipfw_iface *iif;
 
 	IPFW_UH_WLOCK_ASSERT(ch);
 
 	iif = ic->iface;
 	ic->iface = NULL;
 
 	iif->no.refcnt--;
 	/* TODO: check for references & delete */
 }
 
 /*
  * Interface arrival handler.
  */
 static void
 handle_ifattach(struct ip_fw_chain *ch, struct ipfw_iface *iif,
     uint16_t ifindex)
 {
 	struct ipfw_ifc *ic;
 
 	IPFW_UH_WLOCK_ASSERT(ch);
 
 	iif->gencnt++;
 	iif->resolved = 1;
 	iif->ifindex = ifindex;
 
 	IPFW_WLOCK(ch);
 	TAILQ_FOREACH(ic, &iif->consumers, next)
 		ic->cb(ch, ic->cbdata, iif->ifindex);
 	IPFW_WUNLOCK(ch);
 }
 
 /*
  * Interface departure handler.
  */
 static void
 handle_ifdetach(struct ip_fw_chain *ch, struct ipfw_iface *iif,
     uint16_t ifindex)
 {
 	struct ipfw_ifc *ic;
 
 	IPFW_UH_WLOCK_ASSERT(ch);
 
 	IPFW_WLOCK(ch);
 	TAILQ_FOREACH(ic, &iif->consumers, next)
 		ic->cb(ch, ic->cbdata, 0);
 	IPFW_WUNLOCK(ch);
 
 	iif->gencnt++;
 	iif->resolved = 0;
 	iif->ifindex = 0;
 }
 
 struct dump_iface_args {
 	struct ip_fw_chain *ch;
 	struct sockopt_data *sd;
 };
 
 static int
 export_iface_internal(struct namedobj_instance *ii, struct named_object *no,
     void *arg)
 {
 	ipfw_iface_info *i;
 	struct dump_iface_args *da;
 	struct ipfw_iface *iif;
 
 	da = (struct dump_iface_args *)arg;
 
 	i = (ipfw_iface_info *)ipfw_get_sopt_space(da->sd, sizeof(*i));
 	KASSERT(i != NULL, ("previously checked buffer is not enough"));
 
 	iif = (struct ipfw_iface *)no;
 
 	strlcpy(i->ifname, iif->ifname, sizeof(i->ifname));
 	if (iif->resolved)
 		i->flags |= IPFW_IFFLAG_RESOLVED;
 	i->ifindex = iif->ifindex;
 	i->refcnt = iif->no.refcnt;
 	i->gencnt = iif->gencnt;
 	return (0);
 }
 
 /*
  * Lists all interface currently tracked by ipfw.
  * Data layout (v0)(current):
  * Request: [ ipfw_obj_lheader ], size = ipfw_obj_lheader.size
  * Reply: [ ipfw_obj_lheader ipfw_iface_info x N ]
  *
  * Returns 0 on success
  */
 static int
 list_ifaces(struct ip_fw_chain *ch, ip_fw3_opheader *op3,
     struct sockopt_data *sd)
 {
 	struct namedobj_instance *ii;
 	struct _ipfw_obj_lheader *olh;
 	struct dump_iface_args da;
 	uint32_t count, size;
 
 	olh = (struct _ipfw_obj_lheader *)ipfw_get_sopt_header(sd,sizeof(*olh));
 	if (olh == NULL)
 		return (EINVAL);
 	if (sd->valsize < olh->size)
 		return (EINVAL);
 
 	IPFW_UH_RLOCK(ch);
 	ii = CHAIN_TO_II(ch);
 	if (ii != NULL)
 		count = ipfw_objhash_count(ii);
 	else
 		count = 0;
 	size = count * sizeof(ipfw_iface_info) + sizeof(ipfw_obj_lheader);
 
 	/* Fill in header regadless of buffer size */
 	olh->count = count;
 	olh->objsize = sizeof(ipfw_iface_info);
 
 	if (size > olh->size) {
 		olh->size = size;
 		IPFW_UH_RUNLOCK(ch);
 		return (ENOMEM);
 	}
 	olh->size = size;
 
 	da.ch = ch;
 	da.sd = sd;
 
 	if (ii != NULL)
 		ipfw_objhash_foreach(ii, export_iface_internal, &da);
 	IPFW_UH_RUNLOCK(ch);
 
 	return (0);
 }
 
Index: head/sys/netpfil/ipfw/ip_fw_nat.c
===================================================================
--- head/sys/netpfil/ipfw/ip_fw_nat.c	(revision 343618)
+++ head/sys/netpfil/ipfw/ip_fw_nat.c	(revision 343619)
@@ -1,1243 +1,1242 @@
 /*-
  * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
  *
  * Copyright (c) 2008 Paolo Pisati
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include <sys/param.h>
 #include <sys/systm.h>
 #include <sys/eventhandler.h>
 #include <sys/malloc.h>
 #include <sys/mbuf.h>
 #include <sys/kernel.h>
 #include <sys/lock.h>
 #include <sys/module.h>
 #include <sys/rwlock.h>
 #include <sys/rmlock.h>
 
 #include <netinet/libalias/alias.h>
 #include <netinet/libalias/alias_local.h>
 
 #include <net/if.h>
 #include <net/if_var.h>
-#include <net/pfil.h>
 #include <netinet/in.h>
 #include <netinet/ip.h>
 #include <netinet/ip_var.h>
 #include <netinet/ip_fw.h>
 #include <netinet/tcp.h>
 #include <netinet/udp.h>
 
 #include <netpfil/ipfw/ip_fw_private.h>
 
 #include <machine/in_cksum.h>	/* XXX for in_cksum */
 
 struct cfg_spool {
 	LIST_ENTRY(cfg_spool)   _next;          /* chain of spool instances */
 	struct in_addr          addr;
 	uint16_t		port;
 };
 
 /* Nat redirect configuration. */
 struct cfg_redir {
 	LIST_ENTRY(cfg_redir)	_next;	/* chain of redir instances */
 	uint16_t		mode;	/* type of redirect mode */
 	uint16_t		proto;	/* protocol: tcp/udp */
 	struct in_addr		laddr;	/* local ip address */
 	struct in_addr		paddr;	/* public ip address */
 	struct in_addr		raddr;	/* remote ip address */
 	uint16_t		lport;	/* local port */
 	uint16_t		pport;	/* public port */
 	uint16_t		rport;	/* remote port	*/
 	uint16_t		pport_cnt;	/* number of public ports */
 	uint16_t		rport_cnt;	/* number of remote ports */
 	struct alias_link	**alink;	
 	u_int16_t		spool_cnt; /* num of entry in spool chain */
 	/* chain of spool instances */
 	LIST_HEAD(spool_chain, cfg_spool) spool_chain;
 };
 
 /* Nat configuration data struct. */
 struct cfg_nat {
 	/* chain of nat instances */
 	LIST_ENTRY(cfg_nat)	_next;
 	int			id;		/* nat id  */
 	struct in_addr		ip;		/* nat ip address */
 	struct libalias		*lib;		/* libalias instance */
 	int			mode;		/* aliasing mode */
 	int			redir_cnt; /* number of entry in spool chain */
 	/* chain of redir instances */
 	LIST_HEAD(redir_chain, cfg_redir) redir_chain;  
 	char			if_name[IF_NAMESIZE];	/* interface name */
 };
 
 static eventhandler_tag ifaddr_event_tag;
 
 static void
 ifaddr_change(void *arg __unused, struct ifnet *ifp)
 {
 	struct cfg_nat *ptr;
 	struct ifaddr *ifa;
 	struct ip_fw_chain *chain;
 
 	KASSERT(curvnet == ifp->if_vnet,
 	    ("curvnet(%p) differs from iface vnet(%p)", curvnet, ifp->if_vnet));
 
 	if (V_ipfw_vnet_ready == 0 || V_ipfw_nat_ready == 0)
 		return;
 
 	chain = &V_layer3_chain;
 	IPFW_UH_WLOCK(chain);
 	/* Check every nat entry... */
 	LIST_FOREACH(ptr, &chain->nat, _next) {
 		/* ...using nic 'ifp->if_xname' as dynamic alias address. */
 		if (strncmp(ptr->if_name, ifp->if_xname, IF_NAMESIZE) != 0)
 			continue;
 		if_addr_rlock(ifp);
 		CK_STAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) {
 			if (ifa->ifa_addr == NULL)
 				continue;
 			if (ifa->ifa_addr->sa_family != AF_INET)
 				continue;
 			IPFW_WLOCK(chain);
 			ptr->ip = ((struct sockaddr_in *)
 			    (ifa->ifa_addr))->sin_addr;
 			LibAliasSetAddress(ptr->lib, ptr->ip);
 			IPFW_WUNLOCK(chain);
 		}
 		if_addr_runlock(ifp);
 	}
 	IPFW_UH_WUNLOCK(chain);
 }
 
 /*
  * delete the pointers for nat entry ix, or all of them if ix < 0
  */
 static void
 flush_nat_ptrs(struct ip_fw_chain *chain, const int ix)
 {
 	int i;
 	ipfw_insn_nat *cmd;
 
 	IPFW_WLOCK_ASSERT(chain);
 	for (i = 0; i < chain->n_rules; i++) {
 		cmd = (ipfw_insn_nat *)ACTION_PTR(chain->map[i]);
 		/* XXX skip log and the like ? */
 		if (cmd->o.opcode == O_NAT && cmd->nat != NULL &&
 			    (ix < 0 || cmd->nat->id == ix))
 			cmd->nat = NULL;
 	}
 }
 
 static void
 del_redir_spool_cfg(struct cfg_nat *n, struct redir_chain *head)
 {
 	struct cfg_redir *r, *tmp_r;
 	struct cfg_spool *s, *tmp_s;
 	int i, num;
 
 	LIST_FOREACH_SAFE(r, head, _next, tmp_r) {
 		num = 1; /* Number of alias_link to delete. */
 		switch (r->mode) {
 		case NAT44_REDIR_PORT:
 			num = r->pport_cnt;
 			/* FALLTHROUGH */
 		case NAT44_REDIR_ADDR:
 		case NAT44_REDIR_PROTO:
 			/* Delete all libalias redirect entry. */
 			for (i = 0; i < num; i++)
 				LibAliasRedirectDelete(n->lib, r->alink[i]);
 			/* Del spool cfg if any. */
 			LIST_FOREACH_SAFE(s, &r->spool_chain, _next, tmp_s) {
 				LIST_REMOVE(s, _next);
 				free(s, M_IPFW);
 			}
 			free(r->alink, M_IPFW);
 			LIST_REMOVE(r, _next);
 			free(r, M_IPFW);
 			break;
 		default:
 			printf("unknown redirect mode: %u\n", r->mode);
 			/* XXX - panic?!?!? */
 			break;
 		}
 	}
 }
 
 static int
 add_redir_spool_cfg(char *buf, struct cfg_nat *ptr)
 {
 	struct cfg_redir *r;
 	struct cfg_spool *s;
 	struct nat44_cfg_redir *ser_r;
 	struct nat44_cfg_spool *ser_s;
 
 	int cnt, off, i;
 
 	for (cnt = 0, off = 0; cnt < ptr->redir_cnt; cnt++) {
 		ser_r = (struct nat44_cfg_redir *)&buf[off];
 		r = malloc(sizeof(*r), M_IPFW, M_WAITOK | M_ZERO);
 		r->mode = ser_r->mode;
 		r->laddr = ser_r->laddr;
 		r->paddr = ser_r->paddr;
 		r->raddr = ser_r->raddr;
 		r->lport = ser_r->lport;
 		r->pport = ser_r->pport;
 		r->rport = ser_r->rport;
 		r->pport_cnt = ser_r->pport_cnt;
 		r->rport_cnt = ser_r->rport_cnt;
 		r->proto = ser_r->proto;
 		r->spool_cnt = ser_r->spool_cnt;
 		//memcpy(r, ser_r, SOF_REDIR);
 		LIST_INIT(&r->spool_chain);
 		off += sizeof(struct nat44_cfg_redir);
 		r->alink = malloc(sizeof(struct alias_link *) * r->pport_cnt,
 		    M_IPFW, M_WAITOK | M_ZERO);
 		switch (r->mode) {
 		case NAT44_REDIR_ADDR:
 			r->alink[0] = LibAliasRedirectAddr(ptr->lib, r->laddr,
 			    r->paddr);
 			break;
 		case NAT44_REDIR_PORT:
 			for (i = 0 ; i < r->pport_cnt; i++) {
 				/* If remotePort is all ports, set it to 0. */
 				u_short remotePortCopy = r->rport + i;
 				if (r->rport_cnt == 1 && r->rport == 0)
 					remotePortCopy = 0;
 				r->alink[i] = LibAliasRedirectPort(ptr->lib,
 				    r->laddr, htons(r->lport + i), r->raddr,
 				    htons(remotePortCopy), r->paddr,
 				    htons(r->pport + i), r->proto);
 				if (r->alink[i] == NULL) {
 					r->alink[0] = NULL;
 					break;
 				}
 			}
 			break;
 		case NAT44_REDIR_PROTO:
 			r->alink[0] = LibAliasRedirectProto(ptr->lib ,r->laddr,
 			    r->raddr, r->paddr, r->proto);
 			break;
 		default:
 			printf("unknown redirect mode: %u\n", r->mode);
 			break;
 		}
 		if (r->alink[0] == NULL) {
 			printf("LibAliasRedirect* returned NULL\n");
 			free(r->alink, M_IPFW);
 			free(r, M_IPFW);
 			return (EINVAL);
 		}
 		/* LSNAT handling. */
 		for (i = 0; i < r->spool_cnt; i++) {
 			ser_s = (struct nat44_cfg_spool *)&buf[off];
 			s = malloc(sizeof(*s), M_IPFW, M_WAITOK | M_ZERO);
 			s->addr = ser_s->addr;
 			s->port = ser_s->port;
 			LibAliasAddServer(ptr->lib, r->alink[0],
 			    s->addr, htons(s->port));
 			off += sizeof(struct nat44_cfg_spool);
 			/* Hook spool entry. */
 			LIST_INSERT_HEAD(&r->spool_chain, s, _next);
 		}
 		/* And finally hook this redir entry. */
 		LIST_INSERT_HEAD(&ptr->redir_chain, r, _next);
 	}
 
 	return (0);
 }
 
 static void
 free_nat_instance(struct cfg_nat *ptr)
 {
 
 	del_redir_spool_cfg(ptr, &ptr->redir_chain);
 	LibAliasUninit(ptr->lib);
 	free(ptr, M_IPFW);
 }
 
 
 /*
  * ipfw_nat - perform mbuf header translation.
  *
  * Note V_layer3_chain has to be locked while calling ipfw_nat() in
  * 'global' operation mode (t == NULL).
  *
  */
 static int
 ipfw_nat(struct ip_fw_args *args, struct cfg_nat *t, struct mbuf *m)
 {
 	struct mbuf *mcl;
 	struct ip *ip;
 	/* XXX - libalias duct tape */
 	int ldt, retval, found;
 	struct ip_fw_chain *chain;
 	char *c;
 
 	ldt = 0;
 	retval = 0;
 	mcl = m_megapullup(m, m->m_pkthdr.len);
 	if (mcl == NULL) {
 		args->m = NULL;
 		return (IP_FW_DENY);
 	}
 	ip = mtod(mcl, struct ip *);
 
 	/*
 	 * XXX - Libalias checksum offload 'duct tape':
 	 *
 	 * locally generated packets have only pseudo-header checksum
 	 * calculated and libalias will break it[1], so mark them for
 	 * later fix.  Moreover there are cases when libalias modifies
 	 * tcp packet data[2], mark them for later fix too.
 	 *
 	 * [1] libalias was never meant to run in kernel, so it does
 	 * not have any knowledge about checksum offloading, and
 	 * expects a packet with a full internet checksum.
 	 * Unfortunately, packets generated locally will have just the
 	 * pseudo header calculated, and when libalias tries to adjust
 	 * the checksum it will actually compute a wrong value.
 	 *
 	 * [2] when libalias modifies tcp's data content, full TCP
 	 * checksum has to be recomputed: the problem is that
 	 * libalias does not have any idea about checksum offloading.
 	 * To work around this, we do not do checksumming in LibAlias,
 	 * but only mark the packets in th_x2 field. If we receive a
 	 * marked packet, we calculate correct checksum for it
 	 * aware of offloading.  Why such a terrible hack instead of
 	 * recalculating checksum for each packet?
 	 * Because the previous checksum was not checked!
 	 * Recalculating checksums for EVERY packet will hide ALL
 	 * transmission errors. Yes, marked packets still suffer from
 	 * this problem. But, sigh, natd(8) has this problem, too.
 	 *
 	 * TODO: -make libalias mbuf aware (so
 	 * it can handle delayed checksum and tso)
 	 */
 
 	if (mcl->m_pkthdr.rcvif == NULL &&
 	    mcl->m_pkthdr.csum_flags & CSUM_DELAY_DATA)
 		ldt = 1;
 
 	c = mtod(mcl, char *);
 
 	/* Check if this is 'global' instance */
 	if (t == NULL) {
 		if (args->oif == NULL) {
 			/* Wrong direction, skip processing */
 			args->m = mcl;
 			return (IP_FW_NAT);
 		}
 
 		found = 0;
 		chain = &V_layer3_chain;
 		IPFW_RLOCK_ASSERT(chain);
 		/* Check every nat entry... */
 		LIST_FOREACH(t, &chain->nat, _next) {
 			if ((t->mode & PKT_ALIAS_SKIP_GLOBAL) != 0)
 				continue;
 			retval = LibAliasOutTry(t->lib, c,
 			    mcl->m_len + M_TRAILINGSPACE(mcl), 0);
 			if (retval == PKT_ALIAS_OK) {
 				/* Nat instance recognises state */
 				found = 1;
 				break;
 			}
 		}
 		if (found != 1) {
 			/* No instance found, return ignore */
 			args->m = mcl;
 			return (IP_FW_NAT);
 		}
 	} else {
 		if (args->oif == NULL)
 			retval = LibAliasIn(t->lib, c,
 				mcl->m_len + M_TRAILINGSPACE(mcl));
 		else
 			retval = LibAliasOut(t->lib, c,
 				mcl->m_len + M_TRAILINGSPACE(mcl));
 	}
 
 	/*
 	 * We drop packet when:
 	 * 1. libalias returns PKT_ALIAS_ERROR;
 	 * 2. For incoming packets:
 	 *	a) for unresolved fragments;
 	 *	b) libalias returns PKT_ALIAS_IGNORED and
 	 *		PKT_ALIAS_DENY_INCOMING flag is set.
 	 */
 	if (retval == PKT_ALIAS_ERROR ||
 	    (args->oif == NULL && (retval == PKT_ALIAS_UNRESOLVED_FRAGMENT ||
 	    (retval == PKT_ALIAS_IGNORED &&
 	    (t->mode & PKT_ALIAS_DENY_INCOMING) != 0)))) {
 		/* XXX - should i add some logging? */
 		m_free(mcl);
 		args->m = NULL;
 		return (IP_FW_DENY);
 	}
 
 	if (retval == PKT_ALIAS_RESPOND)
 		mcl->m_flags |= M_SKIP_FIREWALL;
 	mcl->m_pkthdr.len = mcl->m_len = ntohs(ip->ip_len);
 
 	/*
 	 * XXX - libalias checksum offload
 	 * 'duct tape' (see above)
 	 */
 
 	if ((ip->ip_off & htons(IP_OFFMASK)) == 0 &&
 	    ip->ip_p == IPPROTO_TCP) {
 		struct tcphdr 	*th;
 
 		th = (struct tcphdr *)(ip + 1);
 		if (th->th_x2)
 			ldt = 1;
 	}
 
 	if (ldt) {
 		struct tcphdr 	*th;
 		struct udphdr 	*uh;
 		uint16_t ip_len, cksum;
 
 		ip_len = ntohs(ip->ip_len);
 		cksum = in_pseudo(ip->ip_src.s_addr, ip->ip_dst.s_addr,
 		    htons(ip->ip_p + ip_len - (ip->ip_hl << 2)));
 
 		switch (ip->ip_p) {
 		case IPPROTO_TCP:
 			th = (struct tcphdr *)(ip + 1);
 			/*
 			 * Maybe it was set in
 			 * libalias...
 			 */
 			th->th_x2 = 0;
 			th->th_sum = cksum;
 			mcl->m_pkthdr.csum_data =
 			    offsetof(struct tcphdr, th_sum);
 			break;
 		case IPPROTO_UDP:
 			uh = (struct udphdr *)(ip + 1);
 			uh->uh_sum = cksum;
 			mcl->m_pkthdr.csum_data =
 			    offsetof(struct udphdr, uh_sum);
 			break;
 		}
 		/* No hw checksum offloading: do it ourselves */
 		if ((mcl->m_pkthdr.csum_flags & CSUM_DELAY_DATA) == 0) {
 			in_delayed_cksum(mcl);
 			mcl->m_pkthdr.csum_flags &= ~CSUM_DELAY_DATA;
 		}
 	}
 	args->m = mcl;
 	return (IP_FW_NAT);
 }
 
 static struct cfg_nat *
 lookup_nat(struct nat_list *l, int nat_id)
 {
 	struct cfg_nat *res;
 
 	LIST_FOREACH(res, l, _next) {
 		if (res->id == nat_id)
 			break;
 	}
 	return res;
 }
 
 static struct cfg_nat *
 lookup_nat_name(struct nat_list *l, char *name)
 {
 	struct cfg_nat *res;
 	int id;
 	char *errptr;
 
 	id = strtol(name, &errptr, 10);
 	if (id == 0 || *errptr != '\0')
 		return (NULL);
 
 	LIST_FOREACH(res, l, _next) {
 		if (res->id == id)
 			break;
 	}
 	return (res);
 }
 
 /* IP_FW3 configuration routines */
 
 static void
 nat44_config(struct ip_fw_chain *chain, struct nat44_cfg_nat *ucfg)
 {
 	struct cfg_nat *ptr, *tcfg;
 	int gencnt;
 
 	/*
 	 * Find/create nat rule.
 	 */
 	IPFW_UH_WLOCK(chain);
 	gencnt = chain->gencnt;
 	ptr = lookup_nat_name(&chain->nat, ucfg->name);
 	if (ptr == NULL) {
 		IPFW_UH_WUNLOCK(chain);
 		/* New rule: allocate and init new instance. */
 		ptr = malloc(sizeof(struct cfg_nat), M_IPFW, M_WAITOK | M_ZERO);
 		ptr->lib = LibAliasInit(NULL);
 		LIST_INIT(&ptr->redir_chain);
 	} else {
 		/* Entry already present: temporarily unhook it. */
 		IPFW_WLOCK(chain);
 		LIST_REMOVE(ptr, _next);
 		flush_nat_ptrs(chain, ptr->id);
 		IPFW_WUNLOCK(chain);
 		IPFW_UH_WUNLOCK(chain);
 	}
 
 	/*
 	 * Basic nat (re)configuration.
 	 */
 	ptr->id = strtol(ucfg->name, NULL, 10);
 	/*
 	 * XXX - what if this rule doesn't nat any ip and just
 	 * redirect?
 	 * do we set aliasaddress to 0.0.0.0?
 	 */
 	ptr->ip = ucfg->ip;
 	ptr->redir_cnt = ucfg->redir_cnt;
 	ptr->mode = ucfg->mode;
 	strlcpy(ptr->if_name, ucfg->if_name, sizeof(ptr->if_name));
 	LibAliasSetMode(ptr->lib, ptr->mode, ~0);
 	LibAliasSetAddress(ptr->lib, ptr->ip);
 
 	/*
 	 * Redir and LSNAT configuration.
 	 */
 	/* Delete old cfgs. */
 	del_redir_spool_cfg(ptr, &ptr->redir_chain);
 	/* Add new entries. */
 	add_redir_spool_cfg((char *)(ucfg + 1), ptr);
 	IPFW_UH_WLOCK(chain);
 
 	/* Extra check to avoid race with another ipfw_nat_cfg() */
 	tcfg = NULL;
 	if (gencnt != chain->gencnt)
 	    tcfg = lookup_nat_name(&chain->nat, ucfg->name);
 	IPFW_WLOCK(chain);
 	if (tcfg != NULL)
 		LIST_REMOVE(tcfg, _next);
 	LIST_INSERT_HEAD(&chain->nat, ptr, _next);
 	IPFW_WUNLOCK(chain);
 	chain->gencnt++;
 
 	IPFW_UH_WUNLOCK(chain);
 
 	if (tcfg != NULL)
 		free_nat_instance(ptr);
 }
 
 /*
  * Creates/configure nat44 instance
  * Data layout (v0)(current):
  * Request: [ ipfw_obj_header nat44_cfg_nat .. ]
  *
  * Returns 0 on success
  */
 static int
 nat44_cfg(struct ip_fw_chain *chain, ip_fw3_opheader *op3,
     struct sockopt_data *sd)
 {
 	ipfw_obj_header *oh;
 	struct nat44_cfg_nat *ucfg;
 	int id;
 	size_t read;
 	char *errptr;
 
 	/* Check minimum header size */
 	if (sd->valsize < (sizeof(*oh) + sizeof(*ucfg)))
 		return (EINVAL);
 
 	oh = (ipfw_obj_header *)sd->kbuf;
 
 	/* Basic length checks for TLVs */
 	if (oh->ntlv.head.length != sizeof(oh->ntlv))
 		return (EINVAL);
 
 	ucfg = (struct nat44_cfg_nat *)(oh + 1);
 
 	/* Check if name is properly terminated and looks like number */
 	if (strnlen(ucfg->name, sizeof(ucfg->name)) == sizeof(ucfg->name))
 		return (EINVAL);
 	id = strtol(ucfg->name, &errptr, 10);
 	if (id == 0 || *errptr != '\0')
 		return (EINVAL);
 
 	read = sizeof(*oh) + sizeof(*ucfg);
 	/* Check number of redirs */
 	if (sd->valsize < read + ucfg->redir_cnt*sizeof(struct nat44_cfg_redir))
 		return (EINVAL);
 
 	nat44_config(chain, ucfg);
 	return (0);
 }
 
 /*
  * Destroys given nat instances.
  * Data layout (v0)(current):
  * Request: [ ipfw_obj_header ]
  *
  * Returns 0 on success
  */
 static int
 nat44_destroy(struct ip_fw_chain *chain, ip_fw3_opheader *op3,
     struct sockopt_data *sd)
 {
 	ipfw_obj_header *oh;
 	struct cfg_nat *ptr;
 	ipfw_obj_ntlv *ntlv;
 
 	/* Check minimum header size */
 	if (sd->valsize < sizeof(*oh))
 		return (EINVAL);
 
 	oh = (ipfw_obj_header *)sd->kbuf;
 
 	/* Basic length checks for TLVs */
 	if (oh->ntlv.head.length != sizeof(oh->ntlv))
 		return (EINVAL);
 
 	ntlv = &oh->ntlv;
 	/* Check if name is properly terminated */
 	if (strnlen(ntlv->name, sizeof(ntlv->name)) == sizeof(ntlv->name))
 		return (EINVAL);
 
 	IPFW_UH_WLOCK(chain);
 	ptr = lookup_nat_name(&chain->nat, ntlv->name);
 	if (ptr == NULL) {
 		IPFW_UH_WUNLOCK(chain);
 		return (ESRCH);
 	}
 	IPFW_WLOCK(chain);
 	LIST_REMOVE(ptr, _next);
 	flush_nat_ptrs(chain, ptr->id);
 	IPFW_WUNLOCK(chain);
 	IPFW_UH_WUNLOCK(chain);
 
 	free_nat_instance(ptr);
 
 	return (0);
 }
 
 static void
 export_nat_cfg(struct cfg_nat *ptr, struct nat44_cfg_nat *ucfg)
 {
 
 	snprintf(ucfg->name, sizeof(ucfg->name), "%d", ptr->id);
 	ucfg->ip = ptr->ip;
 	ucfg->redir_cnt = ptr->redir_cnt;
 	ucfg->mode = ptr->mode;
 	strlcpy(ucfg->if_name, ptr->if_name, sizeof(ucfg->if_name));
 }
 
 /*
  * Gets config for given nat instance
  * Data layout (v0)(current):
  * Request: [ ipfw_obj_header nat44_cfg_nat .. ]
  *
  * Returns 0 on success
  */
 static int
 nat44_get_cfg(struct ip_fw_chain *chain, ip_fw3_opheader *op3,
     struct sockopt_data *sd)
 {
 	ipfw_obj_header *oh;
 	struct nat44_cfg_nat *ucfg;
 	struct cfg_nat *ptr;
 	struct cfg_redir *r;
 	struct cfg_spool *s;
 	struct nat44_cfg_redir *ser_r;
 	struct nat44_cfg_spool *ser_s;
 	size_t sz;
 
 	sz = sizeof(*oh) + sizeof(*ucfg);
 	/* Check minimum header size */
 	if (sd->valsize < sz)
 		return (EINVAL);
 
 	oh = (struct _ipfw_obj_header *)ipfw_get_sopt_header(sd, sz);
 
 	/* Basic length checks for TLVs */
 	if (oh->ntlv.head.length != sizeof(oh->ntlv))
 		return (EINVAL);
 
 	ucfg = (struct nat44_cfg_nat *)(oh + 1);
 
 	/* Check if name is properly terminated */
 	if (strnlen(ucfg->name, sizeof(ucfg->name)) == sizeof(ucfg->name))
 		return (EINVAL);
 
 	IPFW_UH_RLOCK(chain);
 	ptr = lookup_nat_name(&chain->nat, ucfg->name);
 	if (ptr == NULL) {
 		IPFW_UH_RUNLOCK(chain);
 		return (ESRCH);
 	}
 
 	export_nat_cfg(ptr, ucfg);
 	
 	/* Estimate memory amount */
 	sz = sizeof(ipfw_obj_header) + sizeof(struct nat44_cfg_nat);
 	LIST_FOREACH(r, &ptr->redir_chain, _next) {
 		sz += sizeof(struct nat44_cfg_redir);
 		LIST_FOREACH(s, &r->spool_chain, _next)
 			sz += sizeof(struct nat44_cfg_spool);
 	}
 
 	ucfg->size = sz;
 	if (sd->valsize < sz) {
 
 		/*
 		 * Submitted buffer size is not enough.
 		 * WE've already filled in @ucfg structure with
 		 * relevant info including size, so we
 		 * can return. Buffer will be flushed automatically.
 		 */
 		IPFW_UH_RUNLOCK(chain);
 		return (ENOMEM);
 	}
 
 	/* Size OK, let's copy data */
 	LIST_FOREACH(r, &ptr->redir_chain, _next) {
 		ser_r = (struct nat44_cfg_redir *)ipfw_get_sopt_space(sd,
 		    sizeof(*ser_r));
 		ser_r->mode = r->mode;
 		ser_r->laddr = r->laddr;
 		ser_r->paddr = r->paddr;
 		ser_r->raddr = r->raddr;
 		ser_r->lport = r->lport;
 		ser_r->pport = r->pport;
 		ser_r->rport = r->rport;
 		ser_r->pport_cnt = r->pport_cnt;
 		ser_r->rport_cnt = r->rport_cnt;
 		ser_r->proto = r->proto;
 		ser_r->spool_cnt = r->spool_cnt;
 
 		LIST_FOREACH(s, &r->spool_chain, _next) {
 			ser_s = (struct nat44_cfg_spool *)ipfw_get_sopt_space(
 			    sd, sizeof(*ser_s));
 
 			ser_s->addr = s->addr;
 			ser_s->port = s->port;
 		}
 	}
 
 	IPFW_UH_RUNLOCK(chain);
 
 	return (0);
 }
 
 /*
  * Lists all nat44 instances currently available in kernel.
  * Data layout (v0)(current):
  * Request: [ ipfw_obj_lheader ]
  * Reply: [ ipfw_obj_lheader nat44_cfg_nat x N ]
  *
  * Returns 0 on success
  */
 static int
 nat44_list_nat(struct ip_fw_chain *chain, ip_fw3_opheader *op3,
     struct sockopt_data *sd)
 {
 	ipfw_obj_lheader *olh;
 	struct nat44_cfg_nat *ucfg;
 	struct cfg_nat *ptr;
 	int nat_count;
 
 	/* Check minimum header size */
 	if (sd->valsize < sizeof(ipfw_obj_lheader))
 		return (EINVAL);
 
 	olh = (ipfw_obj_lheader *)ipfw_get_sopt_header(sd, sizeof(*olh));
 	IPFW_UH_RLOCK(chain);
 	nat_count = 0;
 	LIST_FOREACH(ptr, &chain->nat, _next)
 		nat_count++;
 
 	olh->count = nat_count;
 	olh->objsize = sizeof(struct nat44_cfg_nat);
 	olh->size = sizeof(*olh) + olh->count * olh->objsize;
 
 	if (sd->valsize < olh->size) {
 		IPFW_UH_RUNLOCK(chain);
 		return (ENOMEM);
 	}
 
 	LIST_FOREACH(ptr, &chain->nat, _next) {
 		ucfg = (struct nat44_cfg_nat *)ipfw_get_sopt_space(sd,
 		    sizeof(*ucfg));
 		export_nat_cfg(ptr, ucfg);
 	}
 
 	IPFW_UH_RUNLOCK(chain);
 
 	return (0);
 }
 
 /*
  * Gets log for given nat instance
  * Data layout (v0)(current):
  * Request: [ ipfw_obj_header nat44_cfg_nat ]
  * Reply: [ ipfw_obj_header nat44_cfg_nat LOGBUFFER ]
  *
  * Returns 0 on success
  */
 static int
 nat44_get_log(struct ip_fw_chain *chain, ip_fw3_opheader *op3,
     struct sockopt_data *sd)
 {
 	ipfw_obj_header *oh;
 	struct nat44_cfg_nat *ucfg;
 	struct cfg_nat *ptr;
 	void *pbuf;
 	size_t sz;
 
 	sz = sizeof(*oh) + sizeof(*ucfg);
 	/* Check minimum header size */
 	if (sd->valsize < sz)
 		return (EINVAL);
 
 	oh = (struct _ipfw_obj_header *)ipfw_get_sopt_header(sd, sz);
 
 	/* Basic length checks for TLVs */
 	if (oh->ntlv.head.length != sizeof(oh->ntlv))
 		return (EINVAL);
 
 	ucfg = (struct nat44_cfg_nat *)(oh + 1);
 
 	/* Check if name is properly terminated */
 	if (strnlen(ucfg->name, sizeof(ucfg->name)) == sizeof(ucfg->name))
 		return (EINVAL);
 
 	IPFW_UH_RLOCK(chain);
 	ptr = lookup_nat_name(&chain->nat, ucfg->name);
 	if (ptr == NULL) {
 		IPFW_UH_RUNLOCK(chain);
 		return (ESRCH);
 	}
 
 	if (ptr->lib->logDesc == NULL) {
 		IPFW_UH_RUNLOCK(chain);
 		return (ENOENT);
 	}
 
 	export_nat_cfg(ptr, ucfg);
 	
 	/* Estimate memory amount */
 	ucfg->size = sizeof(struct nat44_cfg_nat) + LIBALIAS_BUF_SIZE;
 	if (sd->valsize < sz + sizeof(*oh)) {
 
 		/*
 		 * Submitted buffer size is not enough.
 		 * WE've already filled in @ucfg structure with
 		 * relevant info including size, so we
 		 * can return. Buffer will be flushed automatically.
 		 */
 		IPFW_UH_RUNLOCK(chain);
 		return (ENOMEM);
 	}
 
 	pbuf = (void *)ipfw_get_sopt_space(sd, LIBALIAS_BUF_SIZE);
 	memcpy(pbuf, ptr->lib->logDesc, LIBALIAS_BUF_SIZE);
 	
 	IPFW_UH_RUNLOCK(chain);
 
 	return (0);
 }
 
 static struct ipfw_sopt_handler	scodes[] = {
 	{ IP_FW_NAT44_XCONFIG,	0,	HDIR_SET,	nat44_cfg },
 	{ IP_FW_NAT44_DESTROY,	0,	HDIR_SET,	nat44_destroy },
 	{ IP_FW_NAT44_XGETCONFIG,	0,	HDIR_GET,	nat44_get_cfg },
 	{ IP_FW_NAT44_LIST_NAT,	0,	HDIR_GET,	nat44_list_nat },
 	{ IP_FW_NAT44_XGETLOG,	0,	HDIR_GET,	nat44_get_log },
 };
 
 
 /*
  * Legacy configuration routines
  */
 
 struct cfg_spool_legacy {
 	LIST_ENTRY(cfg_spool_legacy)	_next;
 	struct in_addr			addr;
 	u_short				port;
 };
 
 struct cfg_redir_legacy {
 	LIST_ENTRY(cfg_redir)   _next;
 	u_int16_t               mode;
 	struct in_addr	        laddr;
 	struct in_addr	        paddr;
 	struct in_addr	        raddr;
 	u_short                 lport;
 	u_short                 pport;
 	u_short                 rport;
 	u_short                 pport_cnt;
 	u_short                 rport_cnt;
 	int                     proto;
 	struct alias_link       **alink;
 	u_int16_t               spool_cnt;
 	LIST_HEAD(, cfg_spool_legacy) spool_chain;
 };
 
 struct cfg_nat_legacy {
 	LIST_ENTRY(cfg_nat_legacy)	_next;
 	int				id;
 	struct in_addr			ip;
 	char				if_name[IF_NAMESIZE];
 	int				mode;
 	struct libalias			*lib;
 	int				redir_cnt;
 	LIST_HEAD(, cfg_redir_legacy)	redir_chain;
 };
 
 static int
 ipfw_nat_cfg(struct sockopt *sopt)
 {
 	struct cfg_nat_legacy *cfg;
 	struct nat44_cfg_nat *ucfg;
 	struct cfg_redir_legacy *rdir;
 	struct nat44_cfg_redir *urdir;
 	char *buf;
 	size_t len, len2;
 	int error, i;
 
 	len = sopt->sopt_valsize;
 	len2 = len + 128;
 
 	/*
 	 * Allocate 2x buffer to store converted structures.
 	 * new redir_cfg has shrunk, so we're sure that
 	 * new buffer size is enough.
 	 */
 	buf = malloc(roundup2(len, 8) + len2, M_TEMP, M_WAITOK | M_ZERO);
 	error = sooptcopyin(sopt, buf, len, sizeof(struct cfg_nat_legacy));
 	if (error != 0)
 		goto out;
 
 	cfg = (struct cfg_nat_legacy *)buf;
 	if (cfg->id < 0) {
 		error = EINVAL;
 		goto out;
 	}
 
 	ucfg = (struct nat44_cfg_nat *)&buf[roundup2(len, 8)];
 	snprintf(ucfg->name, sizeof(ucfg->name), "%d", cfg->id);
 	strlcpy(ucfg->if_name, cfg->if_name, sizeof(ucfg->if_name));
 	ucfg->ip = cfg->ip;
 	ucfg->mode = cfg->mode;
 	ucfg->redir_cnt = cfg->redir_cnt;
 
 	if (len < sizeof(*cfg) + cfg->redir_cnt * sizeof(*rdir)) {
 		error = EINVAL;
 		goto out;
 	}
 
 	urdir = (struct nat44_cfg_redir *)(ucfg + 1);
 	rdir = (struct cfg_redir_legacy *)(cfg + 1);
 	for (i = 0; i < cfg->redir_cnt; i++) {
 		urdir->mode = rdir->mode;
 		urdir->laddr = rdir->laddr;
 		urdir->paddr = rdir->paddr;
 		urdir->raddr = rdir->raddr;
 		urdir->lport = rdir->lport;
 		urdir->pport = rdir->pport;
 		urdir->rport = rdir->rport;
 		urdir->pport_cnt = rdir->pport_cnt;
 		urdir->rport_cnt = rdir->rport_cnt;
 		urdir->proto = rdir->proto;
 		urdir->spool_cnt = rdir->spool_cnt;
 
 		urdir++;
 		rdir++;
 	}
 
 	nat44_config(&V_layer3_chain, ucfg);
 
 out:
 	free(buf, M_TEMP);
 	return (error);
 }
 
 static int
 ipfw_nat_del(struct sockopt *sopt)
 {
 	struct cfg_nat *ptr;
 	struct ip_fw_chain *chain = &V_layer3_chain;
 	int i;
 
 	sooptcopyin(sopt, &i, sizeof i, sizeof i);
 	/* XXX validate i */
 	IPFW_UH_WLOCK(chain);
 	ptr = lookup_nat(&chain->nat, i);
 	if (ptr == NULL) {
 		IPFW_UH_WUNLOCK(chain);
 		return (EINVAL);
 	}
 	IPFW_WLOCK(chain);
 	LIST_REMOVE(ptr, _next);
 	flush_nat_ptrs(chain, i);
 	IPFW_WUNLOCK(chain);
 	IPFW_UH_WUNLOCK(chain);
 	free_nat_instance(ptr);
 	return (0);
 }
 
 static int
 ipfw_nat_get_cfg(struct sockopt *sopt)
 {
 	struct ip_fw_chain *chain = &V_layer3_chain;
 	struct cfg_nat *n;
 	struct cfg_nat_legacy *ucfg;
 	struct cfg_redir *r;
 	struct cfg_spool *s;
 	struct cfg_redir_legacy *ser_r;
 	struct cfg_spool_legacy *ser_s;
 	char *data;
 	int gencnt, nat_cnt, len, error;
 
 	nat_cnt = 0;
 	len = sizeof(nat_cnt);
 
 	IPFW_UH_RLOCK(chain);
 retry:
 	gencnt = chain->gencnt;
 	/* Estimate memory amount */
 	LIST_FOREACH(n, &chain->nat, _next) {
 		nat_cnt++;
 		len += sizeof(struct cfg_nat_legacy);
 		LIST_FOREACH(r, &n->redir_chain, _next) {
 			len += sizeof(struct cfg_redir_legacy);
 			LIST_FOREACH(s, &r->spool_chain, _next)
 				len += sizeof(struct cfg_spool_legacy);
 		}
 	}
 	IPFW_UH_RUNLOCK(chain);
 
 	data = malloc(len, M_TEMP, M_WAITOK | M_ZERO);
 	bcopy(&nat_cnt, data, sizeof(nat_cnt));
 
 	nat_cnt = 0;
 	len = sizeof(nat_cnt);
 
 	IPFW_UH_RLOCK(chain);
 	if (gencnt != chain->gencnt) {
 		free(data, M_TEMP);
 		goto retry;
 	}
 	/* Serialize all the data. */
 	LIST_FOREACH(n, &chain->nat, _next) {
 		ucfg = (struct cfg_nat_legacy *)&data[len];
 		ucfg->id = n->id;
 		ucfg->ip = n->ip;
 		ucfg->redir_cnt = n->redir_cnt;
 		ucfg->mode = n->mode;
 		strlcpy(ucfg->if_name, n->if_name, sizeof(ucfg->if_name));
 		len += sizeof(struct cfg_nat_legacy);
 		LIST_FOREACH(r, &n->redir_chain, _next) {
 			ser_r = (struct cfg_redir_legacy *)&data[len];
 			ser_r->mode = r->mode;
 			ser_r->laddr = r->laddr;
 			ser_r->paddr = r->paddr;
 			ser_r->raddr = r->raddr;
 			ser_r->lport = r->lport;
 			ser_r->pport = r->pport;
 			ser_r->rport = r->rport;
 			ser_r->pport_cnt = r->pport_cnt;
 			ser_r->rport_cnt = r->rport_cnt;
 			ser_r->proto = r->proto;
 			ser_r->spool_cnt = r->spool_cnt;
 			len += sizeof(struct cfg_redir_legacy);
 			LIST_FOREACH(s, &r->spool_chain, _next) {
 				ser_s = (struct cfg_spool_legacy *)&data[len];
 				ser_s->addr = s->addr;
 				ser_s->port = s->port;
 				len += sizeof(struct cfg_spool_legacy);
 			}
 		}
 	}
 	IPFW_UH_RUNLOCK(chain);
 
 	error = sooptcopyout(sopt, data, len);
 	free(data, M_TEMP);
 
 	return (error);
 }
 
 static int
 ipfw_nat_get_log(struct sockopt *sopt)
 {
 	uint8_t *data;
 	struct cfg_nat *ptr;
 	int i, size;
 	struct ip_fw_chain *chain;
 	IPFW_RLOCK_TRACKER;
 
 	chain = &V_layer3_chain;
 
 	IPFW_RLOCK(chain);
 	/* one pass to count, one to copy the data */
 	i = 0;
 	LIST_FOREACH(ptr, &chain->nat, _next) {
 		if (ptr->lib->logDesc == NULL)
 			continue;
 		i++;
 	}
 	size = i * (LIBALIAS_BUF_SIZE + sizeof(int));
 	data = malloc(size, M_IPFW, M_NOWAIT | M_ZERO);
 	if (data == NULL) {
 		IPFW_RUNLOCK(chain);
 		return (ENOSPC);
 	}
 	i = 0;
 	LIST_FOREACH(ptr, &chain->nat, _next) {
 		if (ptr->lib->logDesc == NULL)
 			continue;
 		bcopy(&ptr->id, &data[i], sizeof(int));
 		i += sizeof(int);
 		bcopy(ptr->lib->logDesc, &data[i], LIBALIAS_BUF_SIZE);
 		i += LIBALIAS_BUF_SIZE;
 	}
 	IPFW_RUNLOCK(chain);
 	sooptcopyout(sopt, data, size);
 	free(data, M_IPFW);
 	return(0);
 }
 
 static int
 vnet_ipfw_nat_init(const void *arg __unused)
 {
 
 	V_ipfw_nat_ready = 1;
 	return (0);
 }
 
 static int
 vnet_ipfw_nat_uninit(const void *arg __unused)
 {
 	struct cfg_nat *ptr, *ptr_temp;
 	struct ip_fw_chain *chain;
 
 	chain = &V_layer3_chain;
 	IPFW_WLOCK(chain);
 	V_ipfw_nat_ready = 0;
 	LIST_FOREACH_SAFE(ptr, &chain->nat, _next, ptr_temp) {
 		LIST_REMOVE(ptr, _next);
 		free_nat_instance(ptr);
 	}
 	flush_nat_ptrs(chain, -1 /* flush all */);
 	IPFW_WUNLOCK(chain);
 	return (0);
 }
 
 static void
 ipfw_nat_init(void)
 {
 
 	/* init ipfw hooks */
 	ipfw_nat_ptr = ipfw_nat;
 	lookup_nat_ptr = lookup_nat;
 	ipfw_nat_cfg_ptr = ipfw_nat_cfg;
 	ipfw_nat_del_ptr = ipfw_nat_del;
 	ipfw_nat_get_cfg_ptr = ipfw_nat_get_cfg;
 	ipfw_nat_get_log_ptr = ipfw_nat_get_log;
 	IPFW_ADD_SOPT_HANDLER(1, scodes);
 
 	ifaddr_event_tag = EVENTHANDLER_REGISTER(ifaddr_event, ifaddr_change,
 	    NULL, EVENTHANDLER_PRI_ANY);
 }
 
 static void
 ipfw_nat_destroy(void)
 {
 
 	EVENTHANDLER_DEREGISTER(ifaddr_event, ifaddr_event_tag);
 	/* deregister ipfw_nat */
 	IPFW_DEL_SOPT_HANDLER(1, scodes);
 	ipfw_nat_ptr = NULL;
 	lookup_nat_ptr = NULL;
 	ipfw_nat_cfg_ptr = NULL;
 	ipfw_nat_del_ptr = NULL;
 	ipfw_nat_get_cfg_ptr = NULL;
 	ipfw_nat_get_log_ptr = NULL;
 }
 
 static int
 ipfw_nat_modevent(module_t mod, int type, void *unused)
 {
 	int err = 0;
 
 	switch (type) {
 	case MOD_LOAD:
 		break;
 
 	case MOD_UNLOAD:
 		break;
 
 	default:
 		return EOPNOTSUPP;
 		break;
 	}
 	return err;
 }
 
 static moduledata_t ipfw_nat_mod = {
 	"ipfw_nat",
 	ipfw_nat_modevent,
 	0
 };
 
 /* Define startup order. */
 #define	IPFW_NAT_SI_SUB_FIREWALL	SI_SUB_PROTO_FIREWALL
 #define	IPFW_NAT_MODEVENT_ORDER		(SI_ORDER_ANY - 128) /* after ipfw */
 #define	IPFW_NAT_MODULE_ORDER		(IPFW_NAT_MODEVENT_ORDER + 1)
 #define	IPFW_NAT_VNET_ORDER		(IPFW_NAT_MODEVENT_ORDER + 2)
 
 DECLARE_MODULE(ipfw_nat, ipfw_nat_mod, IPFW_NAT_SI_SUB_FIREWALL, SI_ORDER_ANY);
 MODULE_DEPEND(ipfw_nat, libalias, 1, 1, 1);
 MODULE_DEPEND(ipfw_nat, ipfw, 3, 3, 3);
 MODULE_VERSION(ipfw_nat, 1);
 
 SYSINIT(ipfw_nat_init, IPFW_NAT_SI_SUB_FIREWALL, IPFW_NAT_MODULE_ORDER,
     ipfw_nat_init, NULL);
 VNET_SYSINIT(vnet_ipfw_nat_init, IPFW_NAT_SI_SUB_FIREWALL, IPFW_NAT_VNET_ORDER,
     vnet_ipfw_nat_init, NULL);
 
 SYSUNINIT(ipfw_nat_destroy, IPFW_NAT_SI_SUB_FIREWALL, IPFW_NAT_MODULE_ORDER,
     ipfw_nat_destroy, NULL);
 VNET_SYSUNINIT(vnet_ipfw_nat_uninit, IPFW_NAT_SI_SUB_FIREWALL,
     IPFW_NAT_VNET_ORDER, vnet_ipfw_nat_uninit, NULL);
 
 /* end of file */
Index: head/sys/netpfil/ipfw/ip_fw_private.h
===================================================================
--- head/sys/netpfil/ipfw/ip_fw_private.h	(revision 343618)
+++ head/sys/netpfil/ipfw/ip_fw_private.h	(revision 343619)
@@ -1,833 +1,837 @@
 /*-
  * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
  *
  * Copyright (c) 2002-2009 Luigi Rizzo, Universita` di Pisa
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  * $FreeBSD$
  */
 
 #ifndef _IPFW2_PRIVATE_H
 #define _IPFW2_PRIVATE_H
 
 /*
  * Internal constants and data structures used by ipfw components
  * and not meant to be exported outside the kernel.
  */
 
 #ifdef _KERNEL
 
 /*
  * For platforms that do not have SYSCTL support, we wrap the
  * SYSCTL_* into a function (one per file) to collect the values
  * into an array at module initialization. The wrapping macros,
  * SYSBEGIN() and SYSEND, are empty in the default case.
  */
 #ifndef SYSBEGIN
 #define SYSBEGIN(x)
 #endif
 #ifndef SYSEND
 #define SYSEND
 #endif
 
 /* Return values from ipfw_chk() */
 enum {
 	IP_FW_PASS = 0,
 	IP_FW_DENY,
 	IP_FW_DIVERT,
 	IP_FW_TEE,
 	IP_FW_DUMMYNET,
 	IP_FW_NETGRAPH,
 	IP_FW_NGTEE,
 	IP_FW_NAT,
 	IP_FW_REASS,
 };
 
 /*
  * Structure for collecting parameters to dummynet for ip6_output forwarding
  */
 struct _ip6dn_args {
        struct ip6_pktopts *opt_or;
        int flags_or;
        struct ip6_moptions *im6o_or;
        struct ifnet *origifp_or;
        struct ifnet *ifp_or;
        struct sockaddr_in6 dst_or;
        u_long mtu_or;
 };
 
 
 /*
  * Arguments for calling ipfw_chk() and dummynet_io(). We put them
  * all into a structure because this way it is easier and more
  * efficient to pass variables around and extend the interface.
  */
 struct ip_fw_args {
 	uint32_t		flags;
 #define	IPFW_ARGS_ETHER		0x0001	/* has valid ethernet header	*/
 #define	IPFW_ARGS_NH4		0x0002	/* has IPv4 next hop in hopstore */
 #define	IPFW_ARGS_NH6		0x0004	/* has IPv6 next hop in hopstore */
 #define	IPFW_ARGS_NH4PTR	0x0008	/* has IPv4 next hop in next_hop */
 #define	IPFW_ARGS_NH6PTR	0x0010	/* has IPv6 next hop in next_hop6 */
 #define	IPFW_ARGS_REF		0x0020	/* has valid ipfw_rule_ref	*/
 	/*
 	 * On return, it points to the matching rule.
 	 * On entry, rule.slot > 0 means the info is valid and
 	 * contains the starting rule for an ipfw search.
 	 * If chain_id == chain->id && slot >0 then jump to that slot.
 	 * Otherwise, we locate the first rule >= rulenum:rule_id
 	 */
 	struct ipfw_rule_ref	rule;	/* match/restart info		*/
 
 	struct ifnet		*oif;	/* output interface		*/
 	struct inpcb		*inp;
 	union {
 		/*
 		 * We don't support forwarding on layer2, thus we can
 		 * keep eh pointer in this union.
 		 * next_hop[6] pointers can be used to point to next hop
 		 * stored in rule's opcode to avoid copying into hopstore.
 		 * Also, it is expected that all 0x1-0x10 flags are mutually
 		 * exclusive.
 		 */
 		struct ether_header	*eh;	/* for bridged packets	*/
 		struct sockaddr_in	*next_hop;
 		struct sockaddr_in6	*next_hop6;
 		/* ipfw next hop storage */
 		struct sockaddr_in	hopstore;
 		struct ip_fw_nh6 {
 			struct in6_addr sin6_addr;
 			uint32_t	sin6_scope_id;
 			uint16_t	sin6_port;
 		} hopstore6;
 	};
 
 	struct mbuf		*m;	/* the mbuf chain		*/
 	struct ipfw_flow_id	f_id;	/* grabbed from IP header	*/
 };
 
 MALLOC_DECLARE(M_IPFW);
 
 /*
  * Hooks sometime need to know the direction of the packet
  * (divert, dummynet, netgraph, ...)
  * We use a generic definition here, with bit0-1 indicating the
  * direction, bit 2 indicating layer2 or 3, bit 3-4 indicating the
  * specific protocol
  * indicating the protocol (if necessary)
  */
 enum {
 	DIR_MASK =	0x3,
 	DIR_OUT =	0,
 	DIR_IN =	1,
 	DIR_FWD =	2,
 	DIR_DROP =	3,
 	PROTO_LAYER2 =	0x4, /* set for layer 2 */
 	/* PROTO_DEFAULT = 0, */
 	PROTO_IPV4 =	0x08,
 	PROTO_IPV6 =	0x10,
 	PROTO_IFB =	0x0c, /* layer2 + ifbridge */
    /*	PROTO_OLDBDG =	0x14, unused, old bridge */
 };
 
 /* wrapper for freeing a packet, in case we need to do more work */
 #ifndef FREE_PKT
 #if defined(__linux__) || defined(_WIN32)
 #define FREE_PKT(m)	netisr_dispatch(-1, m)
 #else
 #define FREE_PKT(m)	m_freem(m)
 #endif
 #endif /* !FREE_PKT */
 
 /*
  * Function definitions.
  */
 int ipfw_chk(struct ip_fw_args *args);
 struct mbuf *ipfw_send_pkt(struct mbuf *, struct ipfw_flow_id *,
     u_int32_t, u_int32_t, int);
 
 /* attach (arg = 1) or detach (arg = 0) hooks */
 int ipfw_attach_hooks(int);
 #ifdef NOTYET
 void ipfw_nat_destroy(void);
 #endif
 
 /* In ip_fw_log.c */
 struct ip;
 struct ip_fw_chain;
 
 void ipfw_bpf_init(int);
 void ipfw_bpf_uninit(int);
 void ipfw_bpf_mtap2(void *, u_int, struct mbuf *);
 void ipfw_log(struct ip_fw_chain *chain, struct ip_fw *f, u_int hlen,
     struct ip_fw_args *args, struct mbuf *m, struct ifnet *oif,
     u_short offset, uint32_t tablearg, struct ip *ip);
 VNET_DECLARE(u_int64_t, norule_counter);
 #define	V_norule_counter	VNET(norule_counter)
 VNET_DECLARE(int, verbose_limit);
 #define	V_verbose_limit		VNET(verbose_limit)
 
 /* In ip_fw_dynamic.c */
 struct sockopt_data;
 
 enum { /* result for matching dynamic rules */
 	MATCH_REVERSE = 0,
 	MATCH_FORWARD,
 	MATCH_NONE,
 	MATCH_UNKNOWN,
 };
 
 /*
  * Macro to determine that we need to do or redo dynamic state lookup.
  * direction == MATCH_UNKNOWN means that this is first lookup, then we need
  * to do lookup.
  * Otherwise check the state name, if previous lookup was for "any" name,
  * this means there is no state with specific name. Thus no need to do
  * lookup. If previous name was not "any", redo lookup for specific name.
  */
 #define	DYN_LOOKUP_NEEDED(p, cmd)	\
     ((p)->direction == MATCH_UNKNOWN ||	\
 	((p)->kidx != 0 && (p)->kidx != (cmd)->arg1))
 #define	DYN_INFO_INIT(p)	do {	\
 	(p)->direction = MATCH_UNKNOWN;	\
 	(p)->kidx = 0;			\
 } while (0)
 struct ipfw_dyn_info {
 	uint16_t	direction;	/* match direction */
 	uint16_t	kidx;		/* state name kidx */
 	uint32_t	hashval;	/* hash value */
 	uint32_t	version;	/* bucket version */
 	uint32_t	f_pos;
 };
 int ipfw_dyn_install_state(struct ip_fw_chain *chain, struct ip_fw *rule,
     const ipfw_insn_limit *cmd, const struct ip_fw_args *args,
     const void *ulp, int pktlen, struct ipfw_dyn_info *info,
     uint32_t tablearg);
 struct ip_fw *ipfw_dyn_lookup_state(const struct ip_fw_args *args,
     const void *ulp, int pktlen, const ipfw_insn *cmd,
     struct ipfw_dyn_info *info);
 
 int ipfw_is_dyn_rule(struct ip_fw *rule);
 void ipfw_expire_dyn_states(struct ip_fw_chain *, ipfw_range_tlv *);
 void ipfw_get_dynamic(struct ip_fw_chain *chain, char **bp, const char *ep);
 int ipfw_dump_states(struct ip_fw_chain *chain, struct sockopt_data *sd);
 
 void ipfw_dyn_init(struct ip_fw_chain *);	/* per-vnet initialization */
 void ipfw_dyn_uninit(int);	/* per-vnet deinitialization */
 int ipfw_dyn_len(void);
 uint32_t ipfw_dyn_get_count(uint32_t *, int *);
 void ipfw_dyn_reset_eaction(struct ip_fw_chain *ch, uint16_t eaction_id,
     uint16_t default_id, uint16_t instance_id);
 
 /* common variables */
 VNET_DECLARE(int, fw_one_pass);
 #define	V_fw_one_pass		VNET(fw_one_pass)
 
 VNET_DECLARE(int, fw_verbose);
 #define	V_fw_verbose		VNET(fw_verbose)
 
 VNET_DECLARE(struct ip_fw_chain, layer3_chain);
 #define	V_layer3_chain		VNET(layer3_chain)
 
 VNET_DECLARE(int, ipfw_vnet_ready);
 #define	V_ipfw_vnet_ready	VNET(ipfw_vnet_ready)
 
 VNET_DECLARE(u_int32_t, set_disable);
 #define	V_set_disable		VNET(set_disable)
 
 VNET_DECLARE(int, autoinc_step);
 #define V_autoinc_step		VNET(autoinc_step)
 
 VNET_DECLARE(unsigned int, fw_tables_max);
 #define V_fw_tables_max		VNET(fw_tables_max)
 
 VNET_DECLARE(unsigned int, fw_tables_sets);
 #define V_fw_tables_sets	VNET(fw_tables_sets)
 
 struct tables_config;
 
 #ifdef _KERNEL
 /*
  * Here we have the structure representing an ipfw rule.
  *
  * It starts with a general area 
  * followed by an array of one or more instructions, which the code
  * accesses as an array of 32-bit values.
  *
  * Given a rule pointer  r:
  *
  *  r->cmd		is the start of the first instruction.
  *  ACTION_PTR(r)	is the start of the first action (things to do
  *			once a rule matched).
  */
 
 struct ip_fw {
 	uint16_t	act_ofs;	/* offset of action in 32-bit units */
 	uint16_t	cmd_len;	/* # of 32-bit words in cmd	*/
 	uint16_t	rulenum;	/* rule number			*/
 	uint8_t		set;		/* rule set (0..31)		*/
 	uint8_t		flags;		/* currently unused		*/
 	counter_u64_t	cntr;		/* Pointer to rule counters	*/
 	uint32_t	timestamp;	/* tv_sec of last match		*/
 	uint32_t	id;		/* rule id			*/
 	uint32_t	cached_id;	/* used by jump_fast		*/
 	uint32_t	cached_pos;	/* used by jump_fast		*/
 	uint32_t	refcnt;		/* number of references		*/
 
 	struct ip_fw	*next;		/* linked list of deleted rules */
 	ipfw_insn	cmd[1];		/* storage for commands		*/
 };
 
 #define	IPFW_RULE_CNTR_SIZE	(2 * sizeof(uint64_t))
 
 #endif
 
 struct ip_fw_chain {
 	struct ip_fw	**map;		/* array of rule ptrs to ease lookup */
 	uint32_t	id;		/* ruleset id */
 	int		n_rules;	/* number of static rules */
 	void		*tablestate;	/* runtime table info */
 	void		*valuestate;	/* runtime table value info */
 	int		*idxmap;	/* skipto array of rules */
 	void		**srvstate;	/* runtime service mappings */
 #if defined( __linux__ ) || defined( _WIN32 )
 	spinlock_t rwmtx;
+#else
+	struct rmlock	rwmtx;
 #endif
 	int		static_len;	/* total len of static rules (v0) */
 	uint32_t	gencnt;		/* NAT generation count */
 	LIST_HEAD(nat_list, cfg_nat) nat;       /* list of nat entries */
 	struct ip_fw	*default_rule;
 	struct tables_config *tblcfg;	/* tables module data */
 	void		*ifcfg;		/* interface module data */
 	int		*idxmap_back;	/* standby skipto array of rules */
 	struct namedobj_instance	*srvmap; /* cfg name->number mappings */
 #if defined( __linux__ ) || defined( _WIN32 )
 	spinlock_t uh_lock;
 #else
 	struct rwlock	uh_lock;	/* lock for upper half */
 #endif
 };
 
 /* 64-byte structure representing multi-field table value */
 struct table_value {
 	uint32_t	tag;		/* O_TAG/O_TAGGED */
 	uint32_t	pipe;		/* O_PIPE/O_QUEUE */
 	uint16_t	divert;		/* O_DIVERT/O_TEE */
 	uint16_t	skipto;		/* skipto, CALLRET */
 	uint32_t	netgraph;	/* O_NETGRAPH/O_NGTEE */
 	uint32_t	fib;		/* O_SETFIB */
 	uint32_t	nat;		/* O_NAT */
 	uint32_t	nh4;
 	uint8_t		dscp;
 	uint8_t		spare0;
 	uint16_t	spare1;
 	/* -- 32 bytes -- */
 	struct in6_addr	nh6;
 	uint32_t	limit;		/* O_LIMIT */
 	uint32_t	zoneid;		/* scope zone id for nh6 */
 	uint64_t	refcnt;		/* Number of references */
 };
 
 
 struct named_object {
 	TAILQ_ENTRY(named_object)	nn_next;	/* namehash */
 	TAILQ_ENTRY(named_object)	nv_next;	/* valuehash */
 	char			*name;	/* object name */
 	uint16_t		etlv;	/* Export TLV id */
 	uint8_t			subtype;/* object subtype within class */
 	uint8_t			set;	/* set object belongs to */
 	uint16_t		kidx;	/* object kernel index */
 	uint16_t		spare;
 	uint32_t		ocnt;	/* object counter for internal use */
 	uint32_t		refcnt;	/* number of references */
 };
 TAILQ_HEAD(namedobjects_head, named_object);
 
 struct sockopt;	/* used by tcp_var.h */
 struct sockopt_data {
 	caddr_t		kbuf;		/* allocated buffer */
 	size_t		ksize;		/* given buffer size */
 	size_t		koff;		/* data already used */
 	size_t		kavail;		/* number of bytes available */
 	size_t		ktotal;		/* total bytes pushed */
 	struct sockopt	*sopt;		/* socket data */
 	caddr_t		sopt_val;	/* sopt user buffer */
 	size_t		valsize;	/* original data size */
 };
 
 struct ipfw_ifc;
 
 typedef void (ipfw_ifc_cb)(struct ip_fw_chain *ch, void *cbdata,
     uint16_t ifindex);
 
 struct ipfw_iface {
 	struct named_object	no;
 	char ifname[64];
 	int resolved;
 	uint16_t ifindex;
 	uint16_t spare;
 	uint64_t gencnt;
 	TAILQ_HEAD(, ipfw_ifc)	consumers;
 };
 
 struct ipfw_ifc {
 	TAILQ_ENTRY(ipfw_ifc)	next;
 	struct ipfw_iface	*iface;
 	ipfw_ifc_cb		*cb;
 	void			*cbdata;
 };
 
 /* Macro for working with various counters */
 #define	IPFW_INC_RULE_COUNTER(_cntr, _bytes)	do {	\
 	counter_u64_add((_cntr)->cntr, 1);		\
 	counter_u64_add((_cntr)->cntr + 1, _bytes);	\
 	if ((_cntr)->timestamp != time_uptime)		\
 		(_cntr)->timestamp = time_uptime;	\
 	} while (0)
 
 #define	IPFW_INC_DYN_COUNTER(_cntr, _bytes)	do {		\
 	(_cntr)->pcnt++;				\
 	(_cntr)->bcnt += _bytes;			\
 	} while (0)
 
 #define	IPFW_ZERO_RULE_COUNTER(_cntr) do {		\
 	counter_u64_zero((_cntr)->cntr);		\
 	counter_u64_zero((_cntr)->cntr + 1);		\
 	(_cntr)->timestamp = 0;				\
 	} while (0)
 
 #define	IPFW_ZERO_DYN_COUNTER(_cntr) do {		\
 	(_cntr)->pcnt = 0;				\
 	(_cntr)->bcnt = 0;				\
 	} while (0)
 
 #define	TARG_VAL(ch, k, f)	((struct table_value *)((ch)->valuestate))[k].f
 #define	IP_FW_ARG_TABLEARG(ch, a, f)	\
 	(((a) == IP_FW_TARG) ? TARG_VAL(ch, tablearg, f) : (a))
 /*
  * The lock is heavily used by ip_fw2.c (the main file) and ip_fw_nat.c
  * so the variable and the macros must be here.
  */
 
 #if defined( __linux__ ) || defined( _WIN32 )
 #define	IPFW_LOCK_INIT(_chain) do {			\
 	rw_init(&(_chain)->rwmtx, "IPFW static rules");	\
 	rw_init(&(_chain)->uh_lock, "IPFW UH lock");	\
 	} while (0)
 
 #define	IPFW_LOCK_DESTROY(_chain) do {			\
 	rw_destroy(&(_chain)->rwmtx);			\
 	rw_destroy(&(_chain)->uh_lock);			\
 	} while (0)
 
 #define	IPFW_RLOCK_ASSERT(_chain)	rw_assert(&(_chain)->rwmtx, RA_RLOCKED)
 #define	IPFW_WLOCK_ASSERT(_chain)	rw_assert(&(_chain)->rwmtx, RA_WLOCKED)
 
 #define	IPFW_RLOCK_TRACKER
 #define	IPFW_RLOCK(p)			rw_rlock(&(p)->rwmtx)
 #define	IPFW_RUNLOCK(p)			rw_runlock(&(p)->rwmtx)
 #define	IPFW_WLOCK(p)			rw_wlock(&(p)->rwmtx)
 #define	IPFW_WUNLOCK(p)			rw_wunlock(&(p)->rwmtx)
 #define	IPFW_PF_RLOCK(p)		IPFW_RLOCK(p)
 #define	IPFW_PF_RUNLOCK(p)		IPFW_RUNLOCK(p)
 #else /* FreeBSD */
 #define	IPFW_LOCK_INIT(_chain) do {			\
+	rm_init_flags(&(_chain)->rwmtx, "IPFW static rules", RM_RECURSE); \
 	rw_init(&(_chain)->uh_lock, "IPFW UH lock");	\
 	} while (0)
 
 #define	IPFW_LOCK_DESTROY(_chain) do {			\
+	rm_destroy(&(_chain)->rwmtx);			\
 	rw_destroy(&(_chain)->uh_lock);			\
 	} while (0)
 
-#define	IPFW_RLOCK_ASSERT(_chain)	rm_assert(&V_pfil_lock, RA_RLOCKED)
-#define	IPFW_WLOCK_ASSERT(_chain)	rm_assert(&V_pfil_lock, RA_WLOCKED)
+#define	IPFW_RLOCK_ASSERT(_chain)	rm_assert(&(_chain)->rwmtx, RA_RLOCKED)
+#define	IPFW_WLOCK_ASSERT(_chain)	rm_assert(&(_chain)->rwmtx, RA_WLOCKED)
 
 #define	IPFW_RLOCK_TRACKER		struct rm_priotracker _tracker
-#define	IPFW_RLOCK(p)			rm_rlock(&V_pfil_lock, &_tracker)
-#define	IPFW_RUNLOCK(p)			rm_runlock(&V_pfil_lock, &_tracker)
-#define	IPFW_WLOCK(p)			rm_wlock(&V_pfil_lock)
-#define	IPFW_WUNLOCK(p)			rm_wunlock(&V_pfil_lock)
-#define	IPFW_PF_RLOCK(p)
-#define	IPFW_PF_RUNLOCK(p)
+#define	IPFW_RLOCK(p)			rm_rlock(&(p)->rwmtx, &_tracker)
+#define	IPFW_RUNLOCK(p)			rm_runlock(&(p)->rwmtx, &_tracker)
+#define	IPFW_WLOCK(p)			rm_wlock(&(p)->rwmtx)
+#define	IPFW_WUNLOCK(p)			rm_wunlock(&(p)->rwmtx)
+#define	IPFW_PF_RLOCK(p)		IPFW_RLOCK(p)
+#define	IPFW_PF_RUNLOCK(p)		IPFW_RUNLOCK(p)
 #endif
 
 #define	IPFW_UH_RLOCK_ASSERT(_chain)	rw_assert(&(_chain)->uh_lock, RA_RLOCKED)
 #define	IPFW_UH_WLOCK_ASSERT(_chain)	rw_assert(&(_chain)->uh_lock, RA_WLOCKED)
 #define	IPFW_UH_UNLOCK_ASSERT(_chain)	rw_assert(&(_chain)->uh_lock, RA_UNLOCKED)
 
 #define IPFW_UH_RLOCK(p) rw_rlock(&(p)->uh_lock)
 #define IPFW_UH_RUNLOCK(p) rw_runlock(&(p)->uh_lock)
 #define IPFW_UH_WLOCK(p) rw_wlock(&(p)->uh_lock)
 #define IPFW_UH_WUNLOCK(p) rw_wunlock(&(p)->uh_lock)
 
 struct obj_idx {
 	uint16_t	uidx;	/* internal index supplied by userland */
 	uint16_t	kidx;	/* kernel object index */
 	uint16_t	off;	/* tlv offset from rule end in 4-byte words */
 	uint8_t		spare;
 	uint8_t		type;	/* object type within its category */
 };
 
 struct rule_check_info {
 	uint16_t	flags;		/* rule-specific check flags */
 	uint16_t	object_opcodes;	/* num of opcodes referencing objects */
 	uint16_t	urule_numoff;	/* offset of rulenum in bytes */
 	uint8_t		version;	/* rule version */
 	uint8_t		spare;
 	ipfw_obj_ctlv	*ctlv;		/* name TLV containter */
 	struct ip_fw	*krule;		/* resulting rule pointer */
 	caddr_t		urule;		/* original rule pointer */
 	struct obj_idx	obuf[8];	/* table references storage */
 };
 
 /* Legacy interface support */
 /*
  * FreeBSD 8 export rule format
  */
 struct ip_fw_rule0 {
 	struct ip_fw	*x_next;	/* linked list of rules		*/
 	struct ip_fw	*next_rule;	/* ptr to next [skipto] rule	*/
 	/* 'next_rule' is used to pass up 'set_disable' status		*/
 
 	uint16_t	act_ofs;	/* offset of action in 32-bit units */
 	uint16_t	cmd_len;	/* # of 32-bit words in cmd	*/
 	uint16_t	rulenum;	/* rule number			*/
 	uint8_t		set;		/* rule set (0..31)		*/
 	uint8_t		_pad;		/* padding			*/
 	uint32_t	id;		/* rule id */
 
 	/* These fields are present in all rules.			*/
 	uint64_t	pcnt;		/* Packet counter		*/
 	uint64_t	bcnt;		/* Byte counter			*/
 	uint32_t	timestamp;	/* tv_sec of last match		*/
 
 	ipfw_insn	cmd[1];		/* storage for commands		*/
 };
 
 struct ip_fw_bcounter0 {
 	uint64_t	pcnt;		/* Packet counter		*/
 	uint64_t	bcnt;		/* Byte counter			*/
 	uint32_t	timestamp;	/* tv_sec of last match		*/
 };
 
 /* Kernel rule length */
 /*
  * RULE _K_ SIZE _V_ ->
  * get kernel size from userland rool version _V_.
  * RULE _U_ SIZE _V_ ->
  * get user size version _V_ from kernel rule
  * RULESIZE _V_ ->
  * get user size rule length 
  */
 /* FreeBSD8 <> current kernel format */
 #define	RULEUSIZE0(r)	(sizeof(struct ip_fw_rule0) + (r)->cmd_len * 4 - 4)
 #define	RULEKSIZE0(r)	roundup2((sizeof(struct ip_fw) + (r)->cmd_len*4 - 4), 8)
 /* FreeBSD11 <> current kernel format */
 #define	RULEUSIZE1(r)	(roundup2(sizeof(struct ip_fw_rule) + \
     (r)->cmd_len * 4 - 4, 8))
 #define	RULEKSIZE1(r)	roundup2((sizeof(struct ip_fw) + (r)->cmd_len*4 - 4), 8)
 
 /*
  * Tables/Objects index rewriting code
  */
 
 /* Default and maximum number of ipfw tables/objects. */
 #define	IPFW_TABLES_MAX		65536
 #define	IPFW_TABLES_DEFAULT	128
 #define	IPFW_OBJECTS_MAX	65536
 #define	IPFW_OBJECTS_DEFAULT	1024
 
 #define	CHAIN_TO_SRV(ch)	((ch)->srvmap)
 #define	SRV_OBJECT(ch, idx)	((ch)->srvstate[(idx)])
 
 struct tid_info {
 	uint32_t	set;	/* table set */
 	uint16_t	uidx;	/* table index */
 	uint8_t		type;	/* table type */
 	uint8_t		atype;
 	uint8_t		spare;
 	int		tlen;	/* Total TLV size block */
 	void		*tlvs;	/* Pointer to first TLV */
 };
 
 /*
  * Classifier callback. Checks if @cmd opcode contains kernel object reference.
  * If true, returns its index and type.
  * Returns 0 if match is found, 1 overwise.
  */
 typedef int (ipfw_obj_rw_cl)(ipfw_insn *cmd, uint16_t *puidx, uint8_t *ptype);
 /*
  * Updater callback. Sets kernel object reference index to @puidx
  */
 typedef void (ipfw_obj_rw_upd)(ipfw_insn *cmd, uint16_t puidx);
 /*
  * Finder callback. Tries to find named object by name (specified via @ti).
  * Stores found named object pointer in @pno.
  * If object was not found, NULL is stored.
  *
  * Return 0 if input data was valid.
  */
 typedef int (ipfw_obj_fname_cb)(struct ip_fw_chain *ch,
     struct tid_info *ti, struct named_object **pno);
 /*
  * Another finder callback. Tries to findex named object by kernel index.
  *
  * Returns pointer to named object or NULL.
  */
 typedef struct named_object *(ipfw_obj_fidx_cb)(struct ip_fw_chain *ch,
     uint16_t kidx);
 /*
  * Object creator callback. Tries to create object specified by @ti.
  * Stores newly-allocated object index in @pkidx.
  *
  * Returns 0 on success.
  */
 typedef int (ipfw_obj_create_cb)(struct ip_fw_chain *ch, struct tid_info *ti,
     uint16_t *pkidx);
 /*
  * Object destroy callback. Intended to free resources allocated by
  * create_object callback.
  */
 typedef void (ipfw_obj_destroy_cb)(struct ip_fw_chain *ch,
     struct named_object *no);
 /*
  * Sets handler callback. Handles moving and swaping set of named object.
  *  SWAP_ALL moves all named objects from set `set' to `new_set' and vise versa;
  *  TEST_ALL checks that there aren't any named object with conflicting names;
  *  MOVE_ALL moves all named objects from set `set' to `new_set';
  *  COUNT_ONE used to count number of references used by object with kidx `set';
  *  TEST_ONE checks that named object with kidx `set' can be moved to `new_set`;
  *  MOVE_ONE moves named object with kidx `set' to set `new_set'.
  */
 enum ipfw_sets_cmd {
 	SWAP_ALL = 0, TEST_ALL, MOVE_ALL, COUNT_ONE, TEST_ONE, MOVE_ONE
 };
 typedef int (ipfw_obj_sets_cb)(struct ip_fw_chain *ch,
     uint16_t set, uint8_t new_set, enum ipfw_sets_cmd cmd);
 
 
 struct opcode_obj_rewrite {
 	uint32_t		opcode;		/* Opcode to act upon */
 	uint32_t		etlv;		/* Relevant export TLV id  */
 	ipfw_obj_rw_cl		*classifier;	/* Check if rewrite is needed */
 	ipfw_obj_rw_upd		*update;	/* update cmd with new value */
 	ipfw_obj_fname_cb	*find_byname;	/* Find named object by name */
 	ipfw_obj_fidx_cb	*find_bykidx;	/* Find named object by kidx */
 	ipfw_obj_create_cb	*create_object;	/* Create named object */
 	ipfw_obj_destroy_cb	*destroy_object;/* Destroy named object */
 	ipfw_obj_sets_cb	*manage_sets;	/* Swap or move sets */
 };
 
 #define	IPFW_ADD_OBJ_REWRITER(f, c)	do {	\
 	if ((f) != 0) 				\
 		ipfw_add_obj_rewriter(c,	\
 		    sizeof(c) / sizeof(c[0]));	\
 	} while(0)
 #define	IPFW_DEL_OBJ_REWRITER(l, c)	do {	\
 	if ((l) != 0) 				\
 		ipfw_del_obj_rewriter(c,	\
 		    sizeof(c) / sizeof(c[0]));	\
 	} while(0)
 
 /* In ip_fw_iface.c */
 int ipfw_iface_init(void);
 void ipfw_iface_destroy(void);
 void vnet_ipfw_iface_destroy(struct ip_fw_chain *ch);
 int ipfw_iface_ref(struct ip_fw_chain *ch, char *name,
     struct ipfw_ifc *ic);
 void ipfw_iface_unref(struct ip_fw_chain *ch, struct ipfw_ifc *ic);
 void ipfw_iface_add_notify(struct ip_fw_chain *ch, struct ipfw_ifc *ic);
 void ipfw_iface_del_notify(struct ip_fw_chain *ch, struct ipfw_ifc *ic);
 
 /* In ip_fw_sockopt.c */
 void ipfw_init_skipto_cache(struct ip_fw_chain *chain);
 void ipfw_destroy_skipto_cache(struct ip_fw_chain *chain);
 int ipfw_find_rule(struct ip_fw_chain *chain, uint32_t key, uint32_t id);
 int ipfw_ctl3(struct sockopt *sopt);
 int ipfw_add_protected_rule(struct ip_fw_chain *chain, struct ip_fw *rule,
     int locked);
 void ipfw_reap_add(struct ip_fw_chain *chain, struct ip_fw **head,
     struct ip_fw *rule);
 void ipfw_reap_rules(struct ip_fw *head);
 void ipfw_init_counters(void);
 void ipfw_destroy_counters(void);
 struct ip_fw *ipfw_alloc_rule(struct ip_fw_chain *chain, size_t rulesize);
 void ipfw_free_rule(struct ip_fw *rule);
 int ipfw_match_range(struct ip_fw *rule, ipfw_range_tlv *rt);
 int ipfw_mark_object_kidx(uint32_t *bmask, uint16_t etlv, uint16_t kidx);
 
 typedef int (sopt_handler_f)(struct ip_fw_chain *ch,
     ip_fw3_opheader *op3, struct sockopt_data *sd);
 struct ipfw_sopt_handler {
 	uint16_t	opcode;
 	uint8_t		version;
 	uint8_t		dir;
 	sopt_handler_f	*handler;
 	uint64_t	refcnt;
 };
 #define	HDIR_SET	0x01	/* Handler is used to set some data */
 #define	HDIR_GET	0x02	/* Handler is used to retrieve data */
 #define	HDIR_BOTH	HDIR_GET|HDIR_SET
 
 void ipfw_init_sopt_handler(void);
 void ipfw_destroy_sopt_handler(void);
 void ipfw_add_sopt_handler(struct ipfw_sopt_handler *sh, size_t count);
 int ipfw_del_sopt_handler(struct ipfw_sopt_handler *sh, size_t count);
 caddr_t ipfw_get_sopt_space(struct sockopt_data *sd, size_t needed);
 caddr_t ipfw_get_sopt_header(struct sockopt_data *sd, size_t needed);
 #define	IPFW_ADD_SOPT_HANDLER(f, c)	do {	\
 	if ((f) != 0) 				\
 		ipfw_add_sopt_handler(c,	\
 		    sizeof(c) / sizeof(c[0]));	\
 	} while(0)
 #define	IPFW_DEL_SOPT_HANDLER(l, c)	do {	\
 	if ((l) != 0) 				\
 		ipfw_del_sopt_handler(c,	\
 		    sizeof(c) / sizeof(c[0]));	\
 	} while(0)
 
 struct namedobj_instance;
 typedef int (objhash_cb_t)(struct namedobj_instance *ni, struct named_object *,
     void *arg);
 typedef uint32_t (objhash_hash_f)(struct namedobj_instance *ni, const void *key,
     uint32_t kopt);
 typedef int (objhash_cmp_f)(struct named_object *no, const void *key,
     uint32_t kopt);
 struct namedobj_instance *ipfw_objhash_create(uint32_t items);
 void ipfw_objhash_destroy(struct namedobj_instance *);
 void ipfw_objhash_bitmap_alloc(uint32_t items, void **idx, int *pblocks);
 void ipfw_objhash_bitmap_merge(struct namedobj_instance *ni,
     void **idx, int *blocks);
 void ipfw_objhash_bitmap_swap(struct namedobj_instance *ni,
     void **idx, int *blocks);
 void ipfw_objhash_bitmap_free(void *idx, int blocks);
 void ipfw_objhash_set_hashf(struct namedobj_instance *ni, objhash_hash_f *f);
 struct named_object *ipfw_objhash_lookup_name(struct namedobj_instance *ni,
     uint32_t set, char *name);
 struct named_object *ipfw_objhash_lookup_name_type(struct namedobj_instance *ni,
     uint32_t set, uint32_t type, const char *name);
 struct named_object *ipfw_objhash_lookup_kidx(struct namedobj_instance *ni,
     uint16_t idx);
 int ipfw_objhash_same_name(struct namedobj_instance *ni, struct named_object *a,
     struct named_object *b);
 void ipfw_objhash_add(struct namedobj_instance *ni, struct named_object *no);
 void ipfw_objhash_del(struct namedobj_instance *ni, struct named_object *no);
 uint32_t ipfw_objhash_count(struct namedobj_instance *ni);
 uint32_t ipfw_objhash_count_type(struct namedobj_instance *ni, uint16_t type);
 int ipfw_objhash_foreach(struct namedobj_instance *ni, objhash_cb_t *f,
     void *arg);
 int ipfw_objhash_foreach_type(struct namedobj_instance *ni, objhash_cb_t *f,
     void *arg, uint16_t type);
 int ipfw_objhash_free_idx(struct namedobj_instance *ni, uint16_t idx);
 int ipfw_objhash_alloc_idx(void *n, uint16_t *pidx);
 void ipfw_objhash_set_funcs(struct namedobj_instance *ni,
     objhash_hash_f *hash_f, objhash_cmp_f *cmp_f);
 int ipfw_objhash_find_type(struct namedobj_instance *ni, struct tid_info *ti,
     uint32_t etlv, struct named_object **pno);
 void ipfw_export_obj_ntlv(struct named_object *no, ipfw_obj_ntlv *ntlv);
 ipfw_obj_ntlv *ipfw_find_name_tlv_type(void *tlvs, int len, uint16_t uidx,
     uint32_t etlv);
 void ipfw_init_obj_rewriter(void);
 void ipfw_destroy_obj_rewriter(void);
 void ipfw_add_obj_rewriter(struct opcode_obj_rewrite *rw, size_t count);
 int ipfw_del_obj_rewriter(struct opcode_obj_rewrite *rw, size_t count);
 
 int create_objects_compat(struct ip_fw_chain *ch, ipfw_insn *cmd,
     struct obj_idx *oib, struct obj_idx *pidx, struct tid_info *ti);
 void update_opcode_kidx(ipfw_insn *cmd, uint16_t idx);
 int classify_opcode_kidx(ipfw_insn *cmd, uint16_t *puidx);
 void ipfw_init_srv(struct ip_fw_chain *ch);
 void ipfw_destroy_srv(struct ip_fw_chain *ch);
 int ipfw_check_object_name_generic(const char *name);
 int ipfw_obj_manage_sets(struct namedobj_instance *ni, uint16_t type,
     uint16_t set, uint8_t new_set, enum ipfw_sets_cmd cmd);
 
 /* In ip_fw_eaction.c */
 typedef int (ipfw_eaction_t)(struct ip_fw_chain *ch, struct ip_fw_args *args,
     ipfw_insn *cmd, int *done);
 int ipfw_eaction_init(struct ip_fw_chain *ch, int first);
 void ipfw_eaction_uninit(struct ip_fw_chain *ch, int last);
 
 uint16_t ipfw_add_eaction(struct ip_fw_chain *ch, ipfw_eaction_t handler,
     const char *name);
 int ipfw_del_eaction(struct ip_fw_chain *ch, uint16_t eaction_id);
 int ipfw_run_eaction(struct ip_fw_chain *ch, struct ip_fw_args *args,
     ipfw_insn *cmd, int *done);
 int ipfw_reset_eaction(struct ip_fw_chain *ch, struct ip_fw *rule,
     uint16_t eaction_id, uint16_t default_id, uint16_t instance_id);
 int ipfw_reset_eaction_instance(struct ip_fw_chain *ch, uint16_t eaction_id,
     uint16_t instance_id);
 
 /* In ip_fw_table.c */
 struct table_info;
 
 typedef int (table_lookup_t)(struct table_info *ti, void *key, uint32_t keylen,
     uint32_t *val);
 
 int ipfw_lookup_table(struct ip_fw_chain *ch, uint16_t tbl, uint16_t plen,
     void *paddr, uint32_t *val);
 struct named_object *ipfw_objhash_lookup_table_kidx(struct ip_fw_chain *ch,
     uint16_t kidx);
 int ipfw_ref_table(struct ip_fw_chain *ch, ipfw_obj_ntlv *ntlv, uint16_t *kidx);
 void ipfw_unref_table(struct ip_fw_chain *ch, uint16_t kidx);
 int ipfw_init_tables(struct ip_fw_chain *ch, int first);
 int ipfw_resize_tables(struct ip_fw_chain *ch, unsigned int ntables);
 int ipfw_switch_tables_namespace(struct ip_fw_chain *ch, unsigned int nsets);
 void ipfw_destroy_tables(struct ip_fw_chain *ch, int last);
 
 /* In ip_fw_nat.c -- XXX to be moved to ip_var.h */
 
 extern struct cfg_nat *(*lookup_nat_ptr)(struct nat_list *, int);
 
 typedef int ipfw_nat_t(struct ip_fw_args *, struct cfg_nat *, struct mbuf *);
 typedef int ipfw_nat_cfg_t(struct sockopt *);
 
 VNET_DECLARE(int, ipfw_nat_ready);
 #define	V_ipfw_nat_ready	VNET(ipfw_nat_ready)
 #define	IPFW_NAT_LOADED	(V_ipfw_nat_ready)
 
 extern ipfw_nat_t *ipfw_nat_ptr;
 extern ipfw_nat_cfg_t *ipfw_nat_cfg_ptr;
 extern ipfw_nat_cfg_t *ipfw_nat_del_ptr;
 extern ipfw_nat_cfg_t *ipfw_nat_get_cfg_ptr;
 extern ipfw_nat_cfg_t *ipfw_nat_get_log_ptr;
 
 /* Helper functions for IP checksum adjustment */
 static __inline uint16_t
 cksum_add(uint16_t sum, uint16_t a)
 {
 	uint16_t res;
 
 	res = sum + a;
 	return (res + (res < a));
 }
 
 static __inline uint16_t
 cksum_adjust(uint16_t oldsum, uint16_t old, uint16_t new)
 {
 
 	return (~cksum_add(cksum_add(~oldsum, ~old), new));
 }
 
 #endif /* _KERNEL */
 #endif /* _IPFW2_PRIVATE_H */
Index: head/sys/netpfil/ipfw/ip_fw_sockopt.c
===================================================================
--- head/sys/netpfil/ipfw/ip_fw_sockopt.c	(revision 343618)
+++ head/sys/netpfil/ipfw/ip_fw_sockopt.c	(revision 343619)
@@ -1,4684 +1,4683 @@
 /*-
  * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
  *
  * Copyright (c) 2002-2009 Luigi Rizzo, Universita` di Pisa
  * Copyright (c) 2014 Yandex LLC
  * Copyright (c) 2014 Alexander V. Chernikov
  *
  * Supported by: Valeria Paoli
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 /*
  * Control socket and rule management routines for ipfw.
  * Control is currently implemented via IP_FW3 setsockopt() code.
  */
 
 #include "opt_ipfw.h"
 #include "opt_inet.h"
 #ifndef INET
 #error IPFIREWALL requires INET.
 #endif /* INET */
 #include "opt_inet6.h"
 
 #include <sys/param.h>
 #include <sys/systm.h>
 #include <sys/malloc.h>
 #include <sys/mbuf.h>	/* struct m_tag used by nested headers */
 #include <sys/kernel.h>
 #include <sys/lock.h>
 #include <sys/priv.h>
 #include <sys/proc.h>
 #include <sys/rwlock.h>
 #include <sys/rmlock.h>
 #include <sys/socket.h>
 #include <sys/socketvar.h>
 #include <sys/sysctl.h>
 #include <sys/syslog.h>
 #include <sys/fnv_hash.h>
 #include <net/if.h>
-#include <net/pfil.h>
 #include <net/route.h>
 #include <net/vnet.h>
 #include <vm/vm.h>
 #include <vm/vm_extern.h>
 
 #include <netinet/in.h>
 #include <netinet/ip_var.h> /* hooks */
 #include <netinet/ip_fw.h>
 
 #include <netpfil/ipfw/ip_fw_private.h>
 #include <netpfil/ipfw/ip_fw_table.h>
 
 #ifdef MAC
 #include <security/mac/mac_framework.h>
 #endif
 
 static int ipfw_ctl(struct sockopt *sopt);
 static int check_ipfw_rule_body(ipfw_insn *cmd, int cmd_len,
     struct rule_check_info *ci);
 static int check_ipfw_rule1(struct ip_fw_rule *rule, int size,
     struct rule_check_info *ci);
 static int check_ipfw_rule0(struct ip_fw_rule0 *rule, int size,
     struct rule_check_info *ci);
 static int rewrite_rule_uidx(struct ip_fw_chain *chain,
     struct rule_check_info *ci);
 
 #define	NAMEDOBJ_HASH_SIZE	32
 
 struct namedobj_instance {
 	struct namedobjects_head	*names;
 	struct namedobjects_head	*values;
 	uint32_t nn_size;		/* names hash size */
 	uint32_t nv_size;		/* number hash size */
 	u_long *idx_mask;		/* used items bitmask */
 	uint32_t max_blocks;		/* number of "long" blocks in bitmask */
 	uint32_t count;			/* number of items */
 	uint16_t free_off[IPFW_MAX_SETS];	/* first possible free offset */
 	objhash_hash_f	*hash_f;
 	objhash_cmp_f	*cmp_f;
 };
 #define	BLOCK_ITEMS	(8 * sizeof(u_long))	/* Number of items for ffsl() */
 
 static uint32_t objhash_hash_name(struct namedobj_instance *ni,
     const void *key, uint32_t kopt);
 static uint32_t objhash_hash_idx(struct namedobj_instance *ni, uint32_t val);
 static int objhash_cmp_name(struct named_object *no, const void *name,
     uint32_t set);
 
 MALLOC_DEFINE(M_IPFW, "IpFw/IpAcct", "IpFw/IpAcct chain's");
 
 static int dump_config(struct ip_fw_chain *chain, ip_fw3_opheader *op3,
     struct sockopt_data *sd);
 static int add_rules(struct ip_fw_chain *chain, ip_fw3_opheader *op3,
     struct sockopt_data *sd);
 static int del_rules(struct ip_fw_chain *chain, ip_fw3_opheader *op3,
     struct sockopt_data *sd);
 static int clear_rules(struct ip_fw_chain *chain, ip_fw3_opheader *op3,
     struct sockopt_data *sd);
 static int move_rules(struct ip_fw_chain *chain, ip_fw3_opheader *op3,
     struct sockopt_data *sd);
 static int manage_sets(struct ip_fw_chain *chain, ip_fw3_opheader *op3,
     struct sockopt_data *sd);
 static int dump_soptcodes(struct ip_fw_chain *chain, ip_fw3_opheader *op3,
     struct sockopt_data *sd);
 static int dump_srvobjects(struct ip_fw_chain *chain, ip_fw3_opheader *op3,
     struct sockopt_data *sd);
 
 /* ctl3 handler data */
 struct mtx ctl3_lock;
 #define	CTL3_LOCK_INIT()	mtx_init(&ctl3_lock, "ctl3_lock", NULL, MTX_DEF)
 #define	CTL3_LOCK_DESTROY()	mtx_destroy(&ctl3_lock)
 #define	CTL3_LOCK()		mtx_lock(&ctl3_lock)
 #define	CTL3_UNLOCK()		mtx_unlock(&ctl3_lock)
 
 static struct ipfw_sopt_handler *ctl3_handlers;
 static size_t ctl3_hsize;
 static uint64_t ctl3_refct, ctl3_gencnt;
 #define	CTL3_SMALLBUF	4096			/* small page-size write buffer */
 #define	CTL3_LARGEBUF	16 * 1024 * 1024	/* handle large rulesets */
 
 static int ipfw_flush_sopt_data(struct sockopt_data *sd);
 
 static struct ipfw_sopt_handler	scodes[] = {
 	{ IP_FW_XGET,		0,	HDIR_GET,	dump_config },
 	{ IP_FW_XADD,		0,	HDIR_BOTH,	add_rules },
 	{ IP_FW_XDEL,		0,	HDIR_BOTH,	del_rules },
 	{ IP_FW_XZERO,		0,	HDIR_SET,	clear_rules },
 	{ IP_FW_XRESETLOG,	0,	HDIR_SET,	clear_rules },
 	{ IP_FW_XMOVE,		0,	HDIR_SET,	move_rules },
 	{ IP_FW_SET_SWAP,	0,	HDIR_SET,	manage_sets },
 	{ IP_FW_SET_MOVE,	0,	HDIR_SET,	manage_sets },
 	{ IP_FW_SET_ENABLE,	0,	HDIR_SET,	manage_sets },
 	{ IP_FW_DUMP_SOPTCODES,	0,	HDIR_GET,	dump_soptcodes },
 	{ IP_FW_DUMP_SRVOBJECTS,0,	HDIR_GET,	dump_srvobjects },
 };
 
 static int
 set_legacy_obj_kidx(struct ip_fw_chain *ch, struct ip_fw_rule0 *rule);
 static struct opcode_obj_rewrite *find_op_rw(ipfw_insn *cmd,
     uint16_t *puidx, uint8_t *ptype);
 static int ref_rule_objects(struct ip_fw_chain *ch, struct ip_fw *rule,
     struct rule_check_info *ci, struct obj_idx *oib, struct tid_info *ti);
 static int ref_opcode_object(struct ip_fw_chain *ch, ipfw_insn *cmd,
     struct tid_info *ti, struct obj_idx *pidx, int *unresolved);
 static void unref_rule_objects(struct ip_fw_chain *chain, struct ip_fw *rule);
 static void unref_oib_objects(struct ip_fw_chain *ch, ipfw_insn *cmd,
     struct obj_idx *oib, struct obj_idx *end);
 static int export_objhash_ntlv(struct namedobj_instance *ni, uint16_t kidx,
     struct sockopt_data *sd);
 
 /*
  * Opcode object rewriter variables
  */
 struct opcode_obj_rewrite *ctl3_rewriters;
 static size_t ctl3_rsize;
 
 /*
  * static variables followed by global ones
  */
 
 VNET_DEFINE_STATIC(uma_zone_t, ipfw_cntr_zone);
 #define	V_ipfw_cntr_zone		VNET(ipfw_cntr_zone)
 
 void
 ipfw_init_counters()
 {
 
 	V_ipfw_cntr_zone = uma_zcreate("IPFW counters",
 	    IPFW_RULE_CNTR_SIZE, NULL, NULL, NULL, NULL,
 	    UMA_ALIGN_PTR, UMA_ZONE_PCPU);
 }
 
 void
 ipfw_destroy_counters()
 {
 	
 	uma_zdestroy(V_ipfw_cntr_zone);
 }
 
 struct ip_fw *
 ipfw_alloc_rule(struct ip_fw_chain *chain, size_t rulesize)
 {
 	struct ip_fw *rule;
 
 	rule = malloc(rulesize, M_IPFW, M_WAITOK | M_ZERO);
 	rule->cntr = uma_zalloc_pcpu(V_ipfw_cntr_zone, M_WAITOK | M_ZERO);
 	rule->refcnt = 1;
 
 	return (rule);
 }
 
 void
 ipfw_free_rule(struct ip_fw *rule)
 {
 
 	/*
 	 * We don't release refcnt here, since this function
 	 * can be called without any locks held. The caller
 	 * must release reference under IPFW_UH_WLOCK, and then
 	 * call this function if refcount becomes 1.
 	 */
 	if (rule->refcnt > 1)
 		return;
 	uma_zfree_pcpu(V_ipfw_cntr_zone, rule->cntr);
 	free(rule, M_IPFW);
 }
 
 
 /*
  * Find the smallest rule >= key, id.
  * We could use bsearch but it is so simple that we code it directly
  */
 int
 ipfw_find_rule(struct ip_fw_chain *chain, uint32_t key, uint32_t id)
 {
 	int i, lo, hi;
 	struct ip_fw *r;
 
   	for (lo = 0, hi = chain->n_rules - 1; lo < hi;) {
 		i = (lo + hi) / 2;
 		r = chain->map[i];
 		if (r->rulenum < key)
 			lo = i + 1;	/* continue from the next one */
 		else if (r->rulenum > key)
 			hi = i;		/* this might be good */
 		else if (r->id < id)
 			lo = i + 1;	/* continue from the next one */
 		else /* r->id >= id */
 			hi = i;		/* this might be good */
 	}
 	return hi;
 }
 
 /*
  * Builds skipto cache on rule set @map.
  */
 static void
 update_skipto_cache(struct ip_fw_chain *chain, struct ip_fw **map)
 {
 	int *smap, rulenum;
 	int i, mi;
 
 	IPFW_UH_WLOCK_ASSERT(chain);
 
 	mi = 0;
 	rulenum = map[mi]->rulenum;
 	smap = chain->idxmap_back;
 
 	if (smap == NULL)
 		return;
 
 	for (i = 0; i < 65536; i++) {
 		smap[i] = mi;
 		/* Use the same rule index until i < rulenum */
 		if (i != rulenum || i == 65535)
 			continue;
 		/* Find next rule with num > i */
 		rulenum = map[++mi]->rulenum;
 		while (rulenum == i)
 			rulenum = map[++mi]->rulenum;
 	}
 }
 
 /*
  * Swaps prepared (backup) index with current one.
  */
 static void
 swap_skipto_cache(struct ip_fw_chain *chain)
 {
 	int *map;
 
 	IPFW_UH_WLOCK_ASSERT(chain);
 	IPFW_WLOCK_ASSERT(chain);
 
 	map = chain->idxmap;
 	chain->idxmap = chain->idxmap_back;
 	chain->idxmap_back = map;
 }
 
 /*
  * Allocate and initialize skipto cache.
  */
 void
 ipfw_init_skipto_cache(struct ip_fw_chain *chain)
 {
 	int *idxmap, *idxmap_back;
 
 	idxmap = malloc(65536 * sizeof(int), M_IPFW, M_WAITOK | M_ZERO);
 	idxmap_back = malloc(65536 * sizeof(int), M_IPFW, M_WAITOK);
 
 	/*
 	 * Note we may be called at any time after initialization,
 	 * for example, on first skipto rule, so we need to
 	 * provide valid chain->idxmap on return
 	 */
 
 	IPFW_UH_WLOCK(chain);
 	if (chain->idxmap != NULL) {
 		IPFW_UH_WUNLOCK(chain);
 		free(idxmap, M_IPFW);
 		free(idxmap_back, M_IPFW);
 		return;
 	}
 
 	/* Set backup pointer first to permit building cache */
 	chain->idxmap_back = idxmap_back;
 	update_skipto_cache(chain, chain->map);
 	IPFW_WLOCK(chain);
 	/* It is now safe to set chain->idxmap ptr */
 	chain->idxmap = idxmap;
 	swap_skipto_cache(chain);
 	IPFW_WUNLOCK(chain);
 	IPFW_UH_WUNLOCK(chain);
 }
 
 /*
  * Destroys skipto cache.
  */
 void
 ipfw_destroy_skipto_cache(struct ip_fw_chain *chain)
 {
 
 	if (chain->idxmap != NULL)
 		free(chain->idxmap, M_IPFW);
 	if (chain->idxmap != NULL)
 		free(chain->idxmap_back, M_IPFW);
 }
 
 
 /*
  * allocate a new map, returns the chain locked. extra is the number
  * of entries to add or delete.
  */
 static struct ip_fw **
 get_map(struct ip_fw_chain *chain, int extra, int locked)
 {
 
 	for (;;) {
 		struct ip_fw **map;
 		u_int i, mflags;
 
 		mflags = M_ZERO | ((locked != 0) ? M_NOWAIT : M_WAITOK);
 
 		i = chain->n_rules + extra;
 		map = malloc(i * sizeof(struct ip_fw *), M_IPFW, mflags);
 		if (map == NULL) {
 			printf("%s: cannot allocate map\n", __FUNCTION__);
 			return NULL;
 		}
 		if (!locked)
 			IPFW_UH_WLOCK(chain);
 		if (i >= chain->n_rules + extra) /* good */
 			return map;
 		/* otherwise we lost the race, free and retry */
 		if (!locked)
 			IPFW_UH_WUNLOCK(chain);
 		free(map, M_IPFW);
 	}
 }
 
 /*
  * swap the maps. It is supposed to be called with IPFW_UH_WLOCK
  */
 static struct ip_fw **
 swap_map(struct ip_fw_chain *chain, struct ip_fw **new_map, int new_len)
 {
 	struct ip_fw **old_map;
 
 	IPFW_WLOCK(chain);
 	chain->id++;
 	chain->n_rules = new_len;
 	old_map = chain->map;
 	chain->map = new_map;
 	swap_skipto_cache(chain);
 	IPFW_WUNLOCK(chain);
 	return old_map;
 }
 
 
 static void
 export_cntr1_base(struct ip_fw *krule, struct ip_fw_bcounter *cntr)
 {
 	struct timeval boottime;
 
 	cntr->size = sizeof(*cntr);
 
 	if (krule->cntr != NULL) {
 		cntr->pcnt = counter_u64_fetch(krule->cntr);
 		cntr->bcnt = counter_u64_fetch(krule->cntr + 1);
 		cntr->timestamp = krule->timestamp;
 	}
 	if (cntr->timestamp > 0) {
 		getboottime(&boottime);
 		cntr->timestamp += boottime.tv_sec;
 	}
 }
 
 static void
 export_cntr0_base(struct ip_fw *krule, struct ip_fw_bcounter0 *cntr)
 {
 	struct timeval boottime;
 
 	if (krule->cntr != NULL) {
 		cntr->pcnt = counter_u64_fetch(krule->cntr);
 		cntr->bcnt = counter_u64_fetch(krule->cntr + 1);
 		cntr->timestamp = krule->timestamp;
 	}
 	if (cntr->timestamp > 0) {
 		getboottime(&boottime);
 		cntr->timestamp += boottime.tv_sec;
 	}
 }
 
 /*
  * Copies rule @urule from v1 userland format (current).
  * to kernel @krule.
  * Assume @krule is zeroed.
  */
 static void
 import_rule1(struct rule_check_info *ci)
 {
 	struct ip_fw_rule *urule;
 	struct ip_fw *krule;
 
 	urule = (struct ip_fw_rule *)ci->urule;
 	krule = (struct ip_fw *)ci->krule;
 
 	/* copy header */
 	krule->act_ofs = urule->act_ofs;
 	krule->cmd_len = urule->cmd_len;
 	krule->rulenum = urule->rulenum;
 	krule->set = urule->set;
 	krule->flags = urule->flags;
 
 	/* Save rulenum offset */
 	ci->urule_numoff = offsetof(struct ip_fw_rule, rulenum);
 
 	/* Copy opcodes */
 	memcpy(krule->cmd, urule->cmd, krule->cmd_len * sizeof(uint32_t));
 }
 
 /*
  * Export rule into v1 format (Current).
  * Layout:
  * [ ipfw_obj_tlv(IPFW_TLV_RULE_ENT)
  *     [ ip_fw_rule ] OR
  *     [ ip_fw_bcounter ip_fw_rule] (depends on rcntrs).
  * ]
  * Assume @data is zeroed.
  */
 static void
 export_rule1(struct ip_fw *krule, caddr_t data, int len, int rcntrs)
 {
 	struct ip_fw_bcounter *cntr;
 	struct ip_fw_rule *urule;
 	ipfw_obj_tlv *tlv;
 
 	/* Fill in TLV header */
 	tlv = (ipfw_obj_tlv *)data;
 	tlv->type = IPFW_TLV_RULE_ENT;
 	tlv->length = len;
 
 	if (rcntrs != 0) {
 		/* Copy counters */
 		cntr = (struct ip_fw_bcounter *)(tlv + 1);
 		urule = (struct ip_fw_rule *)(cntr + 1);
 		export_cntr1_base(krule, cntr);
 	} else
 		urule = (struct ip_fw_rule *)(tlv + 1);
 
 	/* copy header */
 	urule->act_ofs = krule->act_ofs;
 	urule->cmd_len = krule->cmd_len;
 	urule->rulenum = krule->rulenum;
 	urule->set = krule->set;
 	urule->flags = krule->flags;
 	urule->id = krule->id;
 
 	/* Copy opcodes */
 	memcpy(urule->cmd, krule->cmd, krule->cmd_len * sizeof(uint32_t));
 }
 
 
 /*
  * Copies rule @urule from FreeBSD8 userland format (v0)
  * to kernel @krule.
  * Assume @krule is zeroed.
  */
 static void
 import_rule0(struct rule_check_info *ci)
 {
 	struct ip_fw_rule0 *urule;
 	struct ip_fw *krule;
 	int cmdlen, l;
 	ipfw_insn *cmd;
 	ipfw_insn_limit *lcmd;
 	ipfw_insn_if *cmdif;
 
 	urule = (struct ip_fw_rule0 *)ci->urule;
 	krule = (struct ip_fw *)ci->krule;
 
 	/* copy header */
 	krule->act_ofs = urule->act_ofs;
 	krule->cmd_len = urule->cmd_len;
 	krule->rulenum = urule->rulenum;
 	krule->set = urule->set;
 	if ((urule->_pad & 1) != 0)
 		krule->flags |= IPFW_RULE_NOOPT;
 
 	/* Save rulenum offset */
 	ci->urule_numoff = offsetof(struct ip_fw_rule0, rulenum);
 
 	/* Copy opcodes */
 	memcpy(krule->cmd, urule->cmd, krule->cmd_len * sizeof(uint32_t));
 
 	/*
 	 * Alter opcodes:
 	 * 1) convert tablearg value from 65535 to 0
 	 * 2) Add high bit to O_SETFIB/O_SETDSCP values (to make room
 	 *    for targ).
 	 * 3) convert table number in iface opcodes to u16
 	 * 4) convert old `nat global` into new 65535
 	 */
 	l = krule->cmd_len;
 	cmd = krule->cmd;
 	cmdlen = 0;
 
 	for ( ;	l > 0 ; l -= cmdlen, cmd += cmdlen) {
 		cmdlen = F_LEN(cmd);
 
 		switch (cmd->opcode) {
 		/* Opcodes supporting tablearg */
 		case O_TAG:
 		case O_TAGGED:
 		case O_PIPE:
 		case O_QUEUE:
 		case O_DIVERT:
 		case O_TEE:
 		case O_SKIPTO:
 		case O_CALLRETURN:
 		case O_NETGRAPH:
 		case O_NGTEE:
 		case O_NAT:
 			if (cmd->arg1 == IP_FW_TABLEARG)
 				cmd->arg1 = IP_FW_TARG;
 			else if (cmd->arg1 == 0)
 				cmd->arg1 = IP_FW_NAT44_GLOBAL;
 			break;
 		case O_SETFIB:
 		case O_SETDSCP:
 			if (cmd->arg1 == IP_FW_TABLEARG)
 				cmd->arg1 = IP_FW_TARG;
 			else
 				cmd->arg1 |= 0x8000;
 			break;
 		case O_LIMIT:
 			lcmd = (ipfw_insn_limit *)cmd;
 			if (lcmd->conn_limit == IP_FW_TABLEARG)
 				lcmd->conn_limit = IP_FW_TARG;
 			break;
 		/* Interface tables */
 		case O_XMIT:
 		case O_RECV:
 		case O_VIA:
 			/* Interface table, possibly */
 			cmdif = (ipfw_insn_if *)cmd;
 			if (cmdif->name[0] != '\1')
 				break;
 
 			cmdif->p.kidx = (uint16_t)cmdif->p.glob;
 			break;
 		}
 	}
 }
 
 /*
  * Copies rule @krule from kernel to FreeBSD8 userland format (v0)
  */
 static void
 export_rule0(struct ip_fw *krule, struct ip_fw_rule0 *urule, int len)
 {
 	int cmdlen, l;
 	ipfw_insn *cmd;
 	ipfw_insn_limit *lcmd;
 	ipfw_insn_if *cmdif;
 
 	/* copy header */
 	memset(urule, 0, len);
 	urule->act_ofs = krule->act_ofs;
 	urule->cmd_len = krule->cmd_len;
 	urule->rulenum = krule->rulenum;
 	urule->set = krule->set;
 	if ((krule->flags & IPFW_RULE_NOOPT) != 0)
 		urule->_pad |= 1;
 
 	/* Copy opcodes */
 	memcpy(urule->cmd, krule->cmd, krule->cmd_len * sizeof(uint32_t));
 
 	/* Export counters */
 	export_cntr0_base(krule, (struct ip_fw_bcounter0 *)&urule->pcnt);
 
 	/*
 	 * Alter opcodes:
 	 * 1) convert tablearg value from 0 to 65535
 	 * 2) Remove highest bit from O_SETFIB/O_SETDSCP values.
 	 * 3) convert table number in iface opcodes to int
 	 */
 	l = urule->cmd_len;
 	cmd = urule->cmd;
 	cmdlen = 0;
 
 	for ( ;	l > 0 ; l -= cmdlen, cmd += cmdlen) {
 		cmdlen = F_LEN(cmd);
 
 		switch (cmd->opcode) {
 		/* Opcodes supporting tablearg */
 		case O_TAG:
 		case O_TAGGED:
 		case O_PIPE:
 		case O_QUEUE:
 		case O_DIVERT:
 		case O_TEE:
 		case O_SKIPTO:
 		case O_CALLRETURN:
 		case O_NETGRAPH:
 		case O_NGTEE:
 		case O_NAT:
 			if (cmd->arg1 == IP_FW_TARG)
 				cmd->arg1 = IP_FW_TABLEARG;
 			else if (cmd->arg1 == IP_FW_NAT44_GLOBAL)
 				cmd->arg1 = 0;
 			break;
 		case O_SETFIB:
 		case O_SETDSCP:
 			if (cmd->arg1 == IP_FW_TARG)
 				cmd->arg1 = IP_FW_TABLEARG;
 			else
 				cmd->arg1 &= ~0x8000;
 			break;
 		case O_LIMIT:
 			lcmd = (ipfw_insn_limit *)cmd;
 			if (lcmd->conn_limit == IP_FW_TARG)
 				lcmd->conn_limit = IP_FW_TABLEARG;
 			break;
 		/* Interface tables */
 		case O_XMIT:
 		case O_RECV:
 		case O_VIA:
 			/* Interface table, possibly */
 			cmdif = (ipfw_insn_if *)cmd;
 			if (cmdif->name[0] != '\1')
 				break;
 
 			cmdif->p.glob = cmdif->p.kidx;
 			break;
 		}
 	}
 }
 
 /*
  * Add new rule(s) to the list possibly creating rule number for each.
  * Update the rule_number in the input struct so the caller knows it as well.
  * Must be called without IPFW_UH held
  */
 static int
 commit_rules(struct ip_fw_chain *chain, struct rule_check_info *rci, int count)
 {
 	int error, i, insert_before, tcount;
 	uint16_t rulenum, *pnum;
 	struct rule_check_info *ci;
 	struct ip_fw *krule;
 	struct ip_fw **map;	/* the new array of pointers */
 
 	/* Check if we need to do table/obj index remap */
 	tcount = 0;
 	for (ci = rci, i = 0; i < count; ci++, i++) {
 		if (ci->object_opcodes == 0)
 			continue;
 
 		/*
 		 * Rule has some object opcodes.
 		 * We need to find (and create non-existing)
 		 * kernel objects, and reference existing ones.
 		 */
 		error = rewrite_rule_uidx(chain, ci);
 		if (error != 0) {
 
 			/*
 			 * rewrite failed, state for current rule
 			 * has been reverted. Check if we need to
 			 * revert more.
 			 */
 			if (tcount > 0) {
 
 				/*
 				 * We have some more table rules
 				 * we need to rollback.
 				 */
 
 				IPFW_UH_WLOCK(chain);
 				while (ci != rci) {
 					ci--;
 					if (ci->object_opcodes == 0)
 						continue;
 					unref_rule_objects(chain,ci->krule);
 
 				}
 				IPFW_UH_WUNLOCK(chain);
 
 			}
 
 			return (error);
 		}
 
 		tcount++;
 	}
 
 	/* get_map returns with IPFW_UH_WLOCK if successful */
 	map = get_map(chain, count, 0 /* not locked */);
 	if (map == NULL) {
 		if (tcount > 0) {
 			/* Unbind tables */
 			IPFW_UH_WLOCK(chain);
 			for (ci = rci, i = 0; i < count; ci++, i++) {
 				if (ci->object_opcodes == 0)
 					continue;
 
 				unref_rule_objects(chain, ci->krule);
 			}
 			IPFW_UH_WUNLOCK(chain);
 		}
 
 		return (ENOSPC);
 	}
 
 	if (V_autoinc_step < 1)
 		V_autoinc_step = 1;
 	else if (V_autoinc_step > 1000)
 		V_autoinc_step = 1000;
 
 	/* FIXME: Handle count > 1 */
 	ci = rci;
 	krule = ci->krule;
 	rulenum = krule->rulenum;
 
 	/* find the insertion point, we will insert before */
 	insert_before = rulenum ? rulenum + 1 : IPFW_DEFAULT_RULE;
 	i = ipfw_find_rule(chain, insert_before, 0);
 	/* duplicate first part */
 	if (i > 0)
 		bcopy(chain->map, map, i * sizeof(struct ip_fw *));
 	map[i] = krule;
 	/* duplicate remaining part, we always have the default rule */
 	bcopy(chain->map + i, map + i + 1,
 		sizeof(struct ip_fw *) *(chain->n_rules - i));
 	if (rulenum == 0) {
 		/* Compute rule number and write it back */
 		rulenum = i > 0 ? map[i-1]->rulenum : 0;
 		if (rulenum < IPFW_DEFAULT_RULE - V_autoinc_step)
 			rulenum += V_autoinc_step;
 		krule->rulenum = rulenum;
 		/* Save number to userland rule */
 		pnum = (uint16_t *)((caddr_t)ci->urule + ci->urule_numoff);
 		*pnum = rulenum;
 	}
 
 	krule->id = chain->id + 1;
 	update_skipto_cache(chain, map);
 	map = swap_map(chain, map, chain->n_rules + 1);
 	chain->static_len += RULEUSIZE0(krule);
 	IPFW_UH_WUNLOCK(chain);
 	if (map)
 		free(map, M_IPFW);
 	return (0);
 }
 
 int
 ipfw_add_protected_rule(struct ip_fw_chain *chain, struct ip_fw *rule,
     int locked)
 {
 	struct ip_fw **map;
 
 	map = get_map(chain, 1, locked);
 	if (map == NULL)
 		return (ENOMEM);
 	if (chain->n_rules > 0)
 		bcopy(chain->map, map,
 		    chain->n_rules * sizeof(struct ip_fw *));
 	map[chain->n_rules] = rule;
 	rule->rulenum = IPFW_DEFAULT_RULE;
 	rule->set = RESVD_SET;
 	rule->id = chain->id + 1;
 	/* We add rule in the end of chain, no need to update skipto cache */
 	map = swap_map(chain, map, chain->n_rules + 1);
 	chain->static_len += RULEUSIZE0(rule);
 	IPFW_UH_WUNLOCK(chain);
 	free(map, M_IPFW);
 	return (0);
 }
 
 /*
  * Adds @rule to the list of rules to reap
  */
 void
 ipfw_reap_add(struct ip_fw_chain *chain, struct ip_fw **head,
     struct ip_fw *rule)
 {
 
 	IPFW_UH_WLOCK_ASSERT(chain);
 
 	/* Unlink rule from everywhere */
 	unref_rule_objects(chain, rule);
 
 	rule->next = *head;
 	*head = rule;
 }
 
 /*
  * Reclaim storage associated with a list of rules.  This is
  * typically the list created using remove_rule.
  * A NULL pointer on input is handled correctly.
  */
 void
 ipfw_reap_rules(struct ip_fw *head)
 {
 	struct ip_fw *rule;
 
 	while ((rule = head) != NULL) {
 		head = head->next;
 		ipfw_free_rule(rule);
 	}
 }
 
 /*
  * Rules to keep are
  *	(default || reserved || !match_set || !match_number)
  * where
  *   default ::= (rule->rulenum == IPFW_DEFAULT_RULE)
  *	// the default rule is always protected
  *
  *   reserved ::= (cmd == 0 && n == 0 && rule->set == RESVD_SET)
  *	// RESVD_SET is protected only if cmd == 0 and n == 0 ("ipfw flush")
  *
  *   match_set ::= (cmd == 0 || rule->set == set)
  *	// set number is ignored for cmd == 0
  *
  *   match_number ::= (cmd == 1 || n == 0 || n == rule->rulenum)
  *	// number is ignored for cmd == 1 or n == 0
  *
  */
 int
 ipfw_match_range(struct ip_fw *rule, ipfw_range_tlv *rt)
 {
 
 	/* Don't match default rule for modification queries */
 	if (rule->rulenum == IPFW_DEFAULT_RULE &&
 	    (rt->flags & IPFW_RCFLAG_DEFAULT) == 0)
 		return (0);
 
 	/* Don't match rules in reserved set for flush requests */
 	if ((rt->flags & IPFW_RCFLAG_ALL) != 0 && rule->set == RESVD_SET)
 		return (0);
 
 	/* If we're filtering by set, don't match other sets */
 	if ((rt->flags & IPFW_RCFLAG_SET) != 0 && rule->set != rt->set)
 		return (0);
 
 	if ((rt->flags & IPFW_RCFLAG_RANGE) != 0 &&
 	    (rule->rulenum < rt->start_rule || rule->rulenum > rt->end_rule))
 		return (0);
 
 	return (1);
 }
 
 struct manage_sets_args {
 	uint16_t	set;
 	uint8_t		new_set;
 };
 
 static int
 swap_sets_cb(struct namedobj_instance *ni, struct named_object *no,
     void *arg)
 {
 	struct manage_sets_args *args;
 
 	args = (struct manage_sets_args *)arg;
 	if (no->set == (uint8_t)args->set)
 		no->set = args->new_set;
 	else if (no->set == args->new_set)
 		no->set = (uint8_t)args->set;
 	return (0);
 }
 
 static int
 move_sets_cb(struct namedobj_instance *ni, struct named_object *no,
     void *arg)
 {
 	struct manage_sets_args *args;
 
 	args = (struct manage_sets_args *)arg;
 	if (no->set == (uint8_t)args->set)
 		no->set = args->new_set;
 	return (0);
 }
 
 static int
 test_sets_cb(struct namedobj_instance *ni, struct named_object *no,
     void *arg)
 {
 	struct manage_sets_args *args;
 
 	args = (struct manage_sets_args *)arg;
 	if (no->set != (uint8_t)args->set)
 		return (0);
 	if (ipfw_objhash_lookup_name_type(ni, args->new_set,
 	    no->etlv, no->name) != NULL)
 		return (EEXIST);
 	return (0);
 }
 
 /*
  * Generic function to handler moving and swapping sets.
  */
 int
 ipfw_obj_manage_sets(struct namedobj_instance *ni, uint16_t type,
     uint16_t set, uint8_t new_set, enum ipfw_sets_cmd cmd)
 {
 	struct manage_sets_args args;
 	struct named_object *no;
 
 	args.set = set;
 	args.new_set = new_set;
 	switch (cmd) {
 	case SWAP_ALL:
 		return (ipfw_objhash_foreach_type(ni, swap_sets_cb,
 		    &args, type));
 	case TEST_ALL:
 		return (ipfw_objhash_foreach_type(ni, test_sets_cb,
 		    &args, type));
 	case MOVE_ALL:
 		return (ipfw_objhash_foreach_type(ni, move_sets_cb,
 		    &args, type));
 	case COUNT_ONE:
 		/*
 		 * @set used to pass kidx.
 		 * When @new_set is zero - reset object counter,
 		 * otherwise increment it.
 		 */
 		no = ipfw_objhash_lookup_kidx(ni, set);
 		if (new_set != 0)
 			no->ocnt++;
 		else
 			no->ocnt = 0;
 		return (0);
 	case TEST_ONE:
 		/* @set used to pass kidx */
 		no = ipfw_objhash_lookup_kidx(ni, set);
 		/*
 		 * First check number of references:
 		 * when it differs, this mean other rules are holding
 		 * reference to given object, so it is not possible to
 		 * change its set. Note that refcnt may account references
 		 * to some going-to-be-added rules. Since we don't know
 		 * their numbers (and even if they will be added) it is
 		 * perfectly OK to return error here.
 		 */
 		if (no->ocnt != no->refcnt)
 			return (EBUSY);
 		if (ipfw_objhash_lookup_name_type(ni, new_set, type,
 		    no->name) != NULL)
 			return (EEXIST);
 		return (0);
 	case MOVE_ONE:
 		/* @set used to pass kidx */
 		no = ipfw_objhash_lookup_kidx(ni, set);
 		no->set = new_set;
 		return (0);
 	}
 	return (EINVAL);
 }
 
 /*
  * Delete rules matching range @rt.
  * Saves number of deleted rules in @ndel.
  *
  * Returns 0 on success.
  */
 static int
 delete_range(struct ip_fw_chain *chain, ipfw_range_tlv *rt, int *ndel)
 {
 	struct ip_fw *reap, *rule, **map;
 	int end, start;
 	int i, n, ndyn, ofs;
 
 	reap = NULL;
 	IPFW_UH_WLOCK(chain);	/* arbitrate writers */
 
 	/*
 	 * Stage 1: Determine range to inspect.
 	 * Range is half-inclusive, e.g [start, end).
 	 */
 	start = 0;
 	end = chain->n_rules - 1;
 
 	if ((rt->flags & IPFW_RCFLAG_RANGE) != 0) {
 		start = ipfw_find_rule(chain, rt->start_rule, 0);
 
 		if (rt->end_rule >= IPFW_DEFAULT_RULE)
 			rt->end_rule = IPFW_DEFAULT_RULE - 1;
 		end = ipfw_find_rule(chain, rt->end_rule, UINT32_MAX);
 	}
 
 	if (rt->flags & IPFW_RCFLAG_DYNAMIC) {
 		/*
 		 * Requested deleting only for dynamic states.
 		 */
 		*ndel = 0;
 		ipfw_expire_dyn_states(chain, rt);
 		IPFW_UH_WUNLOCK(chain);
 		return (0);
 	}
 
 	/* Allocate new map of the same size */
 	map = get_map(chain, 0, 1 /* locked */);
 	if (map == NULL) {
 		IPFW_UH_WUNLOCK(chain);
 		return (ENOMEM);
 	}
 
 	n = 0;
 	ndyn = 0;
 	ofs = start;
 	/* 1. bcopy the initial part of the map */
 	if (start > 0)
 		bcopy(chain->map, map, start * sizeof(struct ip_fw *));
 	/* 2. copy active rules between start and end */
 	for (i = start; i < end; i++) {
 		rule = chain->map[i];
 		if (ipfw_match_range(rule, rt) == 0) {
 			map[ofs++] = rule;
 			continue;
 		}
 
 		n++;
 		if (ipfw_is_dyn_rule(rule) != 0)
 			ndyn++;
 	}
 	/* 3. copy the final part of the map */
 	bcopy(chain->map + end, map + ofs,
 		(chain->n_rules - end) * sizeof(struct ip_fw *));
 	/* 4. recalculate skipto cache */
 	update_skipto_cache(chain, map);
 	/* 5. swap the maps (under UH_WLOCK + WHLOCK) */
 	map = swap_map(chain, map, chain->n_rules - n);
 	/* 6. Remove all dynamic states originated by deleted rules */
 	if (ndyn > 0)
 		ipfw_expire_dyn_states(chain, rt);
 	/* 7. now remove the rules deleted from the old map */
 	for (i = start; i < end; i++) {
 		rule = map[i];
 		if (ipfw_match_range(rule, rt) == 0)
 			continue;
 		chain->static_len -= RULEUSIZE0(rule);
 		ipfw_reap_add(chain, &reap, rule);
 	}
 	IPFW_UH_WUNLOCK(chain);
 
 	ipfw_reap_rules(reap);
 	if (map != NULL)
 		free(map, M_IPFW);
 	*ndel = n;
 	return (0);
 }
 
 static int
 move_objects(struct ip_fw_chain *ch, ipfw_range_tlv *rt)
 {
 	struct opcode_obj_rewrite *rw;
 	struct ip_fw *rule;
 	ipfw_insn *cmd;
 	int cmdlen, i, l, c;
 	uint16_t kidx;
 
 	IPFW_UH_WLOCK_ASSERT(ch);
 
 	/* Stage 1: count number of references by given rules */
 	for (c = 0, i = 0; i < ch->n_rules - 1; i++) {
 		rule = ch->map[i];
 		if (ipfw_match_range(rule, rt) == 0)
 			continue;
 		if (rule->set == rt->new_set) /* nothing to do */
 			continue;
 		/* Search opcodes with named objects */
 		for (l = rule->cmd_len, cmdlen = 0, cmd = rule->cmd;
 		    l > 0; l -= cmdlen, cmd += cmdlen) {
 			cmdlen = F_LEN(cmd);
 			rw = find_op_rw(cmd, &kidx, NULL);
 			if (rw == NULL || rw->manage_sets == NULL)
 				continue;
 			/*
 			 * When manage_sets() returns non-zero value to
 			 * COUNT_ONE command, consider this as an object
 			 * doesn't support sets (e.g. disabled with sysctl).
 			 * So, skip checks for this object.
 			 */
 			if (rw->manage_sets(ch, kidx, 1, COUNT_ONE) != 0)
 				continue;
 			c++;
 		}
 	}
 	if (c == 0) /* No objects found */
 		return (0);
 	/* Stage 2: verify "ownership" */
 	for (c = 0, i = 0; (i < ch->n_rules - 1) && c == 0; i++) {
 		rule = ch->map[i];
 		if (ipfw_match_range(rule, rt) == 0)
 			continue;
 		if (rule->set == rt->new_set) /* nothing to do */
 			continue;
 		/* Search opcodes with named objects */
 		for (l = rule->cmd_len, cmdlen = 0, cmd = rule->cmd;
 		    l > 0 && c == 0; l -= cmdlen, cmd += cmdlen) {
 			cmdlen = F_LEN(cmd);
 			rw = find_op_rw(cmd, &kidx, NULL);
 			if (rw == NULL || rw->manage_sets == NULL)
 				continue;
 			/* Test for ownership and conflicting names */
 			c = rw->manage_sets(ch, kidx,
 			    (uint8_t)rt->new_set, TEST_ONE);
 		}
 	}
 	/* Stage 3: change set and cleanup */
 	for (i = 0; i < ch->n_rules - 1; i++) {
 		rule = ch->map[i];
 		if (ipfw_match_range(rule, rt) == 0)
 			continue;
 		if (rule->set == rt->new_set) /* nothing to do */
 			continue;
 		/* Search opcodes with named objects */
 		for (l = rule->cmd_len, cmdlen = 0, cmd = rule->cmd;
 		    l > 0; l -= cmdlen, cmd += cmdlen) {
 			cmdlen = F_LEN(cmd);
 			rw = find_op_rw(cmd, &kidx, NULL);
 			if (rw == NULL || rw->manage_sets == NULL)
 				continue;
 			/* cleanup object counter */
 			rw->manage_sets(ch, kidx,
 			    0 /* reset counter */, COUNT_ONE);
 			if (c != 0)
 				continue;
 			/* change set */
 			rw->manage_sets(ch, kidx,
 			    (uint8_t)rt->new_set, MOVE_ONE);
 		}
 	}
 	return (c);
 }/*
  * Changes set of given rule rannge @rt
  * with each other.
  *
  * Returns 0 on success.
  */
 static int
 move_range(struct ip_fw_chain *chain, ipfw_range_tlv *rt)
 {
 	struct ip_fw *rule;
 	int i;
 
 	IPFW_UH_WLOCK(chain);
 
 	/*
 	 * Move rules with matching paramenerts to a new set.
 	 * This one is much more complex. We have to ensure
 	 * that all referenced tables (if any) are referenced
 	 * by given rule subset only. Otherwise, we can't move
 	 * them to new set and have to return error.
 	 */
 	if ((i = move_objects(chain, rt)) != 0) {
 		IPFW_UH_WUNLOCK(chain);
 		return (i);
 	}
 
 	/* XXX: We have to do swap holding WLOCK */
 	for (i = 0; i < chain->n_rules; i++) {
 		rule = chain->map[i];
 		if (ipfw_match_range(rule, rt) == 0)
 			continue;
 		rule->set = rt->new_set;
 	}
 
 	IPFW_UH_WUNLOCK(chain);
 
 	return (0);
 }
 
 /*
  * Clear counters for a specific rule.
  * Normally run under IPFW_UH_RLOCK, but these are idempotent ops
  * so we only care that rules do not disappear.
  */
 static void
 clear_counters(struct ip_fw *rule, int log_only)
 {
 	ipfw_insn_log *l = (ipfw_insn_log *)ACTION_PTR(rule);
 
 	if (log_only == 0)
 		IPFW_ZERO_RULE_COUNTER(rule);
 	if (l->o.opcode == O_LOG)
 		l->log_left = l->max_log;
 }
 
 /*
  * Flushes rules counters and/or log values on matching range.
  *
  * Returns number of items cleared.
  */
 static int
 clear_range(struct ip_fw_chain *chain, ipfw_range_tlv *rt, int log_only)
 {
 	struct ip_fw *rule;
 	int num;
 	int i;
 
 	num = 0;
 	rt->flags |= IPFW_RCFLAG_DEFAULT;
 
 	IPFW_UH_WLOCK(chain);	/* arbitrate writers */
 	for (i = 0; i < chain->n_rules; i++) {
 		rule = chain->map[i];
 		if (ipfw_match_range(rule, rt) == 0)
 			continue;
 		clear_counters(rule, log_only);
 		num++;
 	}
 	IPFW_UH_WUNLOCK(chain);
 
 	return (num);
 }
 
 static int
 check_range_tlv(ipfw_range_tlv *rt)
 {
 
 	if (rt->head.length != sizeof(*rt))
 		return (1);
 	if (rt->start_rule > rt->end_rule)
 		return (1);
 	if (rt->set >= IPFW_MAX_SETS || rt->new_set >= IPFW_MAX_SETS)
 		return (1);
 
 	if ((rt->flags & IPFW_RCFLAG_USER) != rt->flags)
 		return (1);
 
 	return (0);
 }
 
 /*
  * Delete rules matching specified parameters
  * Data layout (v0)(current):
  * Request: [ ipfw_obj_header ipfw_range_tlv ]
  * Reply: [ ipfw_obj_header ipfw_range_tlv ]
  *
  * Saves number of deleted rules in ipfw_range_tlv->new_set.
  *
  * Returns 0 on success.
  */
 static int
 del_rules(struct ip_fw_chain *chain, ip_fw3_opheader *op3,
     struct sockopt_data *sd)
 {
 	ipfw_range_header *rh;
 	int error, ndel;
 
 	if (sd->valsize != sizeof(*rh))
 		return (EINVAL);
 
 	rh = (ipfw_range_header *)ipfw_get_sopt_space(sd, sd->valsize);
 
 	if (check_range_tlv(&rh->range) != 0)
 		return (EINVAL);
 
 	ndel = 0;
 	if ((error = delete_range(chain, &rh->range, &ndel)) != 0)
 		return (error);
 
 	/* Save number of rules deleted */
 	rh->range.new_set = ndel;
 	return (0);
 }
 
 /*
  * Move rules/sets matching specified parameters
  * Data layout (v0)(current):
  * Request: [ ipfw_obj_header ipfw_range_tlv ]
  *
  * Returns 0 on success.
  */
 static int
 move_rules(struct ip_fw_chain *chain, ip_fw3_opheader *op3,
     struct sockopt_data *sd)
 {
 	ipfw_range_header *rh;
 
 	if (sd->valsize != sizeof(*rh))
 		return (EINVAL);
 
 	rh = (ipfw_range_header *)ipfw_get_sopt_space(sd, sd->valsize);
 
 	if (check_range_tlv(&rh->range) != 0)
 		return (EINVAL);
 
 	return (move_range(chain, &rh->range));
 }
 
 /*
  * Clear rule accounting data matching specified parameters
  * Data layout (v0)(current):
  * Request: [ ipfw_obj_header ipfw_range_tlv ]
  * Reply: [ ipfw_obj_header ipfw_range_tlv ]
  *
  * Saves number of cleared rules in ipfw_range_tlv->new_set.
  *
  * Returns 0 on success.
  */
 static int
 clear_rules(struct ip_fw_chain *chain, ip_fw3_opheader *op3,
     struct sockopt_data *sd)
 {
 	ipfw_range_header *rh;
 	int log_only, num;
 	char *msg;
 
 	if (sd->valsize != sizeof(*rh))
 		return (EINVAL);
 
 	rh = (ipfw_range_header *)ipfw_get_sopt_space(sd, sd->valsize);
 
 	if (check_range_tlv(&rh->range) != 0)
 		return (EINVAL);
 
 	log_only = (op3->opcode == IP_FW_XRESETLOG);
 
 	num = clear_range(chain, &rh->range, log_only);
 
 	if (rh->range.flags & IPFW_RCFLAG_ALL)
 		msg = log_only ? "All logging counts reset" :
 		    "Accounting cleared";
 	else
 		msg = log_only ? "logging count reset" : "cleared";
 
 	if (V_fw_verbose) {
 		int lev = LOG_SECURITY | LOG_NOTICE;
 		log(lev, "ipfw: %s.\n", msg);
 	}
 
 	/* Save number of rules cleared */
 	rh->range.new_set = num;
 	return (0);
 }
 
 static void
 enable_sets(struct ip_fw_chain *chain, ipfw_range_tlv *rt)
 {
 	uint32_t v_set;
 
 	IPFW_UH_WLOCK_ASSERT(chain);
 
 	/* Change enabled/disabled sets mask */
 	v_set = (V_set_disable | rt->set) & ~rt->new_set;
 	v_set &= ~(1 << RESVD_SET); /* set RESVD_SET always enabled */
 	IPFW_WLOCK(chain);
 	V_set_disable = v_set;
 	IPFW_WUNLOCK(chain);
 }
 
 static int
 swap_sets(struct ip_fw_chain *chain, ipfw_range_tlv *rt, int mv)
 {
 	struct opcode_obj_rewrite *rw;
 	struct ip_fw *rule;
 	int i;
 
 	IPFW_UH_WLOCK_ASSERT(chain);
 
 	if (rt->set == rt->new_set) /* nothing to do */
 		return (0);
 
 	if (mv != 0) {
 		/*
 		 * Berfore moving the rules we need to check that
 		 * there aren't any conflicting named objects.
 		 */
 		for (rw = ctl3_rewriters;
 		    rw < ctl3_rewriters + ctl3_rsize; rw++) {
 			if (rw->manage_sets == NULL)
 				continue;
 			i = rw->manage_sets(chain, (uint8_t)rt->set,
 			    (uint8_t)rt->new_set, TEST_ALL);
 			if (i != 0)
 				return (EEXIST);
 		}
 	}
 	/* Swap or move two sets */
 	for (i = 0; i < chain->n_rules - 1; i++) {
 		rule = chain->map[i];
 		if (rule->set == (uint8_t)rt->set)
 			rule->set = (uint8_t)rt->new_set;
 		else if (rule->set == (uint8_t)rt->new_set && mv == 0)
 			rule->set = (uint8_t)rt->set;
 	}
 	for (rw = ctl3_rewriters; rw < ctl3_rewriters + ctl3_rsize; rw++) {
 		if (rw->manage_sets == NULL)
 			continue;
 		rw->manage_sets(chain, (uint8_t)rt->set,
 		    (uint8_t)rt->new_set, mv != 0 ? MOVE_ALL: SWAP_ALL);
 	}
 	return (0);
 }
 
 /*
  * Swaps or moves set
  * Data layout (v0)(current):
  * Request: [ ipfw_obj_header ipfw_range_tlv ]
  *
  * Returns 0 on success.
  */
 static int
 manage_sets(struct ip_fw_chain *chain, ip_fw3_opheader *op3,
     struct sockopt_data *sd)
 {
 	ipfw_range_header *rh;
 	int ret;
 
 	if (sd->valsize != sizeof(*rh))
 		return (EINVAL);
 
 	rh = (ipfw_range_header *)ipfw_get_sopt_space(sd, sd->valsize);
 
 	if (rh->range.head.length != sizeof(ipfw_range_tlv))
 		return (1);
 	/* enable_sets() expects bitmasks. */
 	if (op3->opcode != IP_FW_SET_ENABLE &&
 	    (rh->range.set >= IPFW_MAX_SETS ||
 	    rh->range.new_set >= IPFW_MAX_SETS))
 		return (EINVAL);
 
 	ret = 0;
 	IPFW_UH_WLOCK(chain);
 	switch (op3->opcode) {
 	case IP_FW_SET_SWAP:
 	case IP_FW_SET_MOVE:
 		ret = swap_sets(chain, &rh->range,
 		    op3->opcode == IP_FW_SET_MOVE);
 		break;
 	case IP_FW_SET_ENABLE:
 		enable_sets(chain, &rh->range);
 		break;
 	}
 	IPFW_UH_WUNLOCK(chain);
 
 	return (ret);
 }
 
 /**
  * Remove all rules with given number, or do set manipulation.
  * Assumes chain != NULL && *chain != NULL.
  *
  * The argument is an uint32_t. The low 16 bit are the rule or set number;
  * the next 8 bits are the new set; the top 8 bits indicate the command:
  *
  *	0	delete rules numbered "rulenum"
  *	1	delete rules in set "rulenum"
  *	2	move rules "rulenum" to set "new_set"
  *	3	move rules from set "rulenum" to set "new_set"
  *	4	swap sets "rulenum" and "new_set"
  *	5	delete rules "rulenum" and set "new_set"
  */
 static int
 del_entry(struct ip_fw_chain *chain, uint32_t arg)
 {
 	uint32_t num;	/* rule number or old_set */
 	uint8_t cmd, new_set;
 	int do_del, ndel;
 	int error = 0;
 	ipfw_range_tlv rt;
 
 	num = arg & 0xffff;
 	cmd = (arg >> 24) & 0xff;
 	new_set = (arg >> 16) & 0xff;
 
 	if (cmd > 5 || new_set > RESVD_SET)
 		return EINVAL;
 	if (cmd == 0 || cmd == 2 || cmd == 5) {
 		if (num >= IPFW_DEFAULT_RULE)
 			return EINVAL;
 	} else {
 		if (num > RESVD_SET)	/* old_set */
 			return EINVAL;
 	}
 
 	/* Convert old requests into new representation */
 	memset(&rt, 0, sizeof(rt));
 	rt.start_rule = num;
 	rt.end_rule = num;
 	rt.set = num;
 	rt.new_set = new_set;
 	do_del = 0;
 
 	switch (cmd) {
 	case 0: /* delete rules numbered "rulenum" */
 		if (num == 0)
 			rt.flags |= IPFW_RCFLAG_ALL;
 		else
 			rt.flags |= IPFW_RCFLAG_RANGE;
 		do_del = 1;
 		break;
 	case 1: /* delete rules in set "rulenum" */
 		rt.flags |= IPFW_RCFLAG_SET;
 		do_del = 1;
 		break;
 	case 5: /* delete rules "rulenum" and set "new_set" */
 		rt.flags |= IPFW_RCFLAG_RANGE | IPFW_RCFLAG_SET;
 		rt.set = new_set;
 		rt.new_set = 0;
 		do_del = 1;
 		break;
 	case 2: /* move rules "rulenum" to set "new_set" */
 		rt.flags |= IPFW_RCFLAG_RANGE;
 		break;
 	case 3: /* move rules from set "rulenum" to set "new_set" */
 		IPFW_UH_WLOCK(chain);
 		error = swap_sets(chain, &rt, 1);
 		IPFW_UH_WUNLOCK(chain);
 		return (error);
 	case 4: /* swap sets "rulenum" and "new_set" */
 		IPFW_UH_WLOCK(chain);
 		error = swap_sets(chain, &rt, 0);
 		IPFW_UH_WUNLOCK(chain);
 		return (error);
 	default:
 		return (ENOTSUP);
 	}
 
 	if (do_del != 0) {
 		if ((error = delete_range(chain, &rt, &ndel)) != 0)
 			return (error);
 
 		if (ndel == 0 && (cmd != 1 && num != 0))
 			return (EINVAL);
 
 		return (0);
 	}
 
 	return (move_range(chain, &rt));
 }
 
 /**
  * Reset some or all counters on firewall rules.
  * The argument `arg' is an u_int32_t. The low 16 bit are the rule number,
  * the next 8 bits are the set number, the top 8 bits are the command:
  *	0	work with rules from all set's;
  *	1	work with rules only from specified set.
  * Specified rule number is zero if we want to clear all entries.
  * log_only is 1 if we only want to reset logs, zero otherwise.
  */
 static int
 zero_entry(struct ip_fw_chain *chain, u_int32_t arg, int log_only)
 {
 	struct ip_fw *rule;
 	char *msg;
 	int i;
 
 	uint16_t rulenum = arg & 0xffff;
 	uint8_t set = (arg >> 16) & 0xff;
 	uint8_t cmd = (arg >> 24) & 0xff;
 
 	if (cmd > 1)
 		return (EINVAL);
 	if (cmd == 1 && set > RESVD_SET)
 		return (EINVAL);
 
 	IPFW_UH_RLOCK(chain);
 	if (rulenum == 0) {
 		V_norule_counter = 0;
 		for (i = 0; i < chain->n_rules; i++) {
 			rule = chain->map[i];
 			/* Skip rules not in our set. */
 			if (cmd == 1 && rule->set != set)
 				continue;
 			clear_counters(rule, log_only);
 		}
 		msg = log_only ? "All logging counts reset" :
 		    "Accounting cleared";
 	} else {
 		int cleared = 0;
 		for (i = 0; i < chain->n_rules; i++) {
 			rule = chain->map[i];
 			if (rule->rulenum == rulenum) {
 				if (cmd == 0 || rule->set == set)
 					clear_counters(rule, log_only);
 				cleared = 1;
 			}
 			if (rule->rulenum > rulenum)
 				break;
 		}
 		if (!cleared) {	/* we did not find any matching rules */
 			IPFW_UH_RUNLOCK(chain);
 			return (EINVAL);
 		}
 		msg = log_only ? "logging count reset" : "cleared";
 	}
 	IPFW_UH_RUNLOCK(chain);
 
 	if (V_fw_verbose) {
 		int lev = LOG_SECURITY | LOG_NOTICE;
 
 		if (rulenum)
 			log(lev, "ipfw: Entry %d %s.\n", rulenum, msg);
 		else
 			log(lev, "ipfw: %s.\n", msg);
 	}
 	return (0);
 }
 
 
 /*
  * Check rule head in FreeBSD11 format
  *
  */
 static int
 check_ipfw_rule1(struct ip_fw_rule *rule, int size,
     struct rule_check_info *ci)
 {
 	int l;
 
 	if (size < sizeof(*rule)) {
 		printf("ipfw: rule too short\n");
 		return (EINVAL);
 	}
 
 	/* Check for valid cmd_len */
 	l = roundup2(RULESIZE(rule), sizeof(uint64_t));
 	if (l != size) {
 		printf("ipfw: size mismatch (have %d want %d)\n", size, l);
 		return (EINVAL);
 	}
 	if (rule->act_ofs >= rule->cmd_len) {
 		printf("ipfw: bogus action offset (%u > %u)\n",
 		    rule->act_ofs, rule->cmd_len - 1);
 		return (EINVAL);
 	}
 
 	if (rule->rulenum > IPFW_DEFAULT_RULE - 1)
 		return (EINVAL);
 
 	return (check_ipfw_rule_body(rule->cmd, rule->cmd_len, ci));
 }
 
 /*
  * Check rule head in FreeBSD8 format
  *
  */
 static int
 check_ipfw_rule0(struct ip_fw_rule0 *rule, int size,
     struct rule_check_info *ci)
 {
 	int l;
 
 	if (size < sizeof(*rule)) {
 		printf("ipfw: rule too short\n");
 		return (EINVAL);
 	}
 
 	/* Check for valid cmd_len */
 	l = sizeof(*rule) + rule->cmd_len * 4 - 4;
 	if (l != size) {
 		printf("ipfw: size mismatch (have %d want %d)\n", size, l);
 		return (EINVAL);
 	}
 	if (rule->act_ofs >= rule->cmd_len) {
 		printf("ipfw: bogus action offset (%u > %u)\n",
 		    rule->act_ofs, rule->cmd_len - 1);
 		return (EINVAL);
 	}
 
 	if (rule->rulenum > IPFW_DEFAULT_RULE - 1)
 		return (EINVAL);
 
 	return (check_ipfw_rule_body(rule->cmd, rule->cmd_len, ci));
 }
 
 static int
 check_ipfw_rule_body(ipfw_insn *cmd, int cmd_len, struct rule_check_info *ci)
 {
 	int cmdlen, l;
 	int have_action;
 
 	have_action = 0;
 
 	/*
 	 * Now go for the individual checks. Very simple ones, basically only
 	 * instruction sizes.
 	 */
 	for (l = cmd_len; l > 0 ; l -= cmdlen, cmd += cmdlen) {
 		cmdlen = F_LEN(cmd);
 		if (cmdlen > l) {
 			printf("ipfw: opcode %d size truncated\n",
 			    cmd->opcode);
 			return EINVAL;
 		}
 		switch (cmd->opcode) {
 		case O_PROBE_STATE:
 		case O_KEEP_STATE:
 			if (cmdlen != F_INSN_SIZE(ipfw_insn))
 				goto bad_size;
 			ci->object_opcodes++;
 			break;
 		case O_PROTO:
 		case O_IP_SRC_ME:
 		case O_IP_DST_ME:
 		case O_LAYER2:
 		case O_IN:
 		case O_FRAG:
 		case O_DIVERTED:
 		case O_IPOPT:
 		case O_IPTOS:
 		case O_IPPRECEDENCE:
 		case O_IPVER:
 		case O_SOCKARG:
 		case O_TCPFLAGS:
 		case O_TCPOPTS:
 		case O_ESTAB:
 		case O_VERREVPATH:
 		case O_VERSRCREACH:
 		case O_ANTISPOOF:
 		case O_IPSEC:
 #ifdef INET6
 		case O_IP6_SRC_ME:
 		case O_IP6_DST_ME:
 		case O_EXT_HDR:
 		case O_IP6:
 #endif
 		case O_IP4:
 		case O_TAG:
 		case O_SKIP_ACTION:
 			if (cmdlen != F_INSN_SIZE(ipfw_insn))
 				goto bad_size;
 			break;
 
 		case O_EXTERNAL_ACTION:
 			if (cmd->arg1 == 0 ||
 			    cmdlen != F_INSN_SIZE(ipfw_insn)) {
 				printf("ipfw: invalid external "
 				    "action opcode\n");
 				return (EINVAL);
 			}
 			ci->object_opcodes++;
 			/*
 			 * Do we have O_EXTERNAL_INSTANCE or O_EXTERNAL_DATA
 			 * opcode?
 			 */
 			if (l != cmdlen) {
 				l -= cmdlen;
 				cmd += cmdlen;
 				cmdlen = F_LEN(cmd);
 				if (cmd->opcode == O_EXTERNAL_DATA)
 					goto check_action;
 				if (cmd->opcode != O_EXTERNAL_INSTANCE) {
 					printf("ipfw: invalid opcode "
 					    "next to external action %u\n",
 					    cmd->opcode);
 					return (EINVAL);
 				}
 				if (cmd->arg1 == 0 ||
 				    cmdlen != F_INSN_SIZE(ipfw_insn)) {
 					printf("ipfw: invalid external "
 					    "action instance opcode\n");
 					return (EINVAL);
 				}
 				ci->object_opcodes++;
 			}
 			goto check_action;
 
 		case O_FIB:
 			if (cmdlen != F_INSN_SIZE(ipfw_insn))
 				goto bad_size;
 			if (cmd->arg1 >= rt_numfibs) {
 				printf("ipfw: invalid fib number %d\n",
 					cmd->arg1);
 				return EINVAL;
 			}
 			break;
 
 		case O_SETFIB:
 			if (cmdlen != F_INSN_SIZE(ipfw_insn))
 				goto bad_size;
 			if ((cmd->arg1 != IP_FW_TARG) &&
 			    ((cmd->arg1 & 0x7FFF) >= rt_numfibs)) {
 				printf("ipfw: invalid fib number %d\n",
 					cmd->arg1 & 0x7FFF);
 				return EINVAL;
 			}
 			goto check_action;
 
 		case O_UID:
 		case O_GID:
 		case O_JAIL:
 		case O_IP_SRC:
 		case O_IP_DST:
 		case O_TCPSEQ:
 		case O_TCPACK:
 		case O_PROB:
 		case O_ICMPTYPE:
 			if (cmdlen != F_INSN_SIZE(ipfw_insn_u32))
 				goto bad_size;
 			break;
 
 		case O_LIMIT:
 			if (cmdlen != F_INSN_SIZE(ipfw_insn_limit))
 				goto bad_size;
 			ci->object_opcodes++;
 			break;
 
 		case O_LOG:
 			if (cmdlen != F_INSN_SIZE(ipfw_insn_log))
 				goto bad_size;
 
 			((ipfw_insn_log *)cmd)->log_left =
 			    ((ipfw_insn_log *)cmd)->max_log;
 
 			break;
 
 		case O_IP_SRC_MASK:
 		case O_IP_DST_MASK:
 			/* only odd command lengths */
 			if ((cmdlen & 1) == 0)
 				goto bad_size;
 			break;
 
 		case O_IP_SRC_SET:
 		case O_IP_DST_SET:
 			if (cmd->arg1 == 0 || cmd->arg1 > 256) {
 				printf("ipfw: invalid set size %d\n",
 					cmd->arg1);
 				return EINVAL;
 			}
 			if (cmdlen != F_INSN_SIZE(ipfw_insn_u32) +
 			    (cmd->arg1+31)/32 )
 				goto bad_size;
 			break;
 
 		case O_IP_SRC_LOOKUP:
 			if (cmdlen > F_INSN_SIZE(ipfw_insn_u32))
 				goto bad_size;
 		case O_IP_DST_LOOKUP:
 			if (cmd->arg1 >= V_fw_tables_max) {
 				printf("ipfw: invalid table number %d\n",
 				    cmd->arg1);
 				return (EINVAL);
 			}
 			if (cmdlen != F_INSN_SIZE(ipfw_insn) &&
 			    cmdlen != F_INSN_SIZE(ipfw_insn_u32) + 1 &&
 			    cmdlen != F_INSN_SIZE(ipfw_insn_u32))
 				goto bad_size;
 			ci->object_opcodes++;
 			break;
 		case O_IP_FLOW_LOOKUP:
 			if (cmd->arg1 >= V_fw_tables_max) {
 				printf("ipfw: invalid table number %d\n",
 				    cmd->arg1);
 				return (EINVAL);
 			}
 			if (cmdlen != F_INSN_SIZE(ipfw_insn) &&
 			    cmdlen != F_INSN_SIZE(ipfw_insn_u32))
 				goto bad_size;
 			ci->object_opcodes++;
 			break;
 		case O_MACADDR2:
 			if (cmdlen != F_INSN_SIZE(ipfw_insn_mac))
 				goto bad_size;
 			break;
 
 		case O_NOP:
 		case O_IPID:
 		case O_IPTTL:
 		case O_IPLEN:
 		case O_TCPDATALEN:
 		case O_TCPWIN:
 		case O_TAGGED:
 			if (cmdlen < 1 || cmdlen > 31)
 				goto bad_size;
 			break;
 
 		case O_DSCP:
 			if (cmdlen != F_INSN_SIZE(ipfw_insn_u32) + 1)
 				goto bad_size;
 			break;
 
 		case O_MAC_TYPE:
 		case O_IP_SRCPORT:
 		case O_IP_DSTPORT: /* XXX artificial limit, 30 port pairs */
 			if (cmdlen < 2 || cmdlen > 31)
 				goto bad_size;
 			break;
 
 		case O_RECV:
 		case O_XMIT:
 		case O_VIA:
 			if (cmdlen != F_INSN_SIZE(ipfw_insn_if))
 				goto bad_size;
 			ci->object_opcodes++;
 			break;
 
 		case O_ALTQ:
 			if (cmdlen != F_INSN_SIZE(ipfw_insn_altq))
 				goto bad_size;
 			break;
 
 		case O_PIPE:
 		case O_QUEUE:
 			if (cmdlen != F_INSN_SIZE(ipfw_insn))
 				goto bad_size;
 			goto check_action;
 
 		case O_FORWARD_IP:
 			if (cmdlen != F_INSN_SIZE(ipfw_insn_sa))
 				goto bad_size;
 			goto check_action;
 #ifdef INET6
 		case O_FORWARD_IP6:
 			if (cmdlen != F_INSN_SIZE(ipfw_insn_sa6))
 				goto bad_size;
 			goto check_action;
 #endif /* INET6 */
 
 		case O_DIVERT:
 		case O_TEE:
 			if (ip_divert_ptr == NULL)
 				return EINVAL;
 			else
 				goto check_size;
 		case O_NETGRAPH:
 		case O_NGTEE:
 			if (ng_ipfw_input_p == NULL)
 				return EINVAL;
 			else
 				goto check_size;
 		case O_NAT:
 			if (!IPFW_NAT_LOADED)
 				return EINVAL;
 			if (cmdlen != F_INSN_SIZE(ipfw_insn_nat))
  				goto bad_size;		
  			goto check_action;
 		case O_CHECK_STATE:
 			ci->object_opcodes++;
 			/* FALLTHROUGH */
 		case O_FORWARD_MAC: /* XXX not implemented yet */
 		case O_COUNT:
 		case O_ACCEPT:
 		case O_DENY:
 		case O_REJECT:
 		case O_SETDSCP:
 #ifdef INET6
 		case O_UNREACH6:
 #endif
 		case O_SKIPTO:
 		case O_REASS:
 		case O_CALLRETURN:
 check_size:
 			if (cmdlen != F_INSN_SIZE(ipfw_insn))
 				goto bad_size;
 check_action:
 			if (have_action) {
 				printf("ipfw: opcode %d, multiple actions"
 					" not allowed\n",
 					cmd->opcode);
 				return (EINVAL);
 			}
 			have_action = 1;
 			if (l != cmdlen) {
 				printf("ipfw: opcode %d, action must be"
 					" last opcode\n",
 					cmd->opcode);
 				return (EINVAL);
 			}
 			break;
 #ifdef INET6
 		case O_IP6_SRC:
 		case O_IP6_DST:
 			if (cmdlen != F_INSN_SIZE(struct in6_addr) +
 			    F_INSN_SIZE(ipfw_insn))
 				goto bad_size;
 			break;
 
 		case O_FLOW6ID:
 			if (cmdlen != F_INSN_SIZE(ipfw_insn_u32) +
 			    ((ipfw_insn_u32 *)cmd)->o.arg1)
 				goto bad_size;
 			break;
 
 		case O_IP6_SRC_MASK:
 		case O_IP6_DST_MASK:
 			if ( !(cmdlen & 1) || cmdlen > 127)
 				goto bad_size;
 			break;
 		case O_ICMP6TYPE:
 			if( cmdlen != F_INSN_SIZE( ipfw_insn_icmp6 ) )
 				goto bad_size;
 			break;
 #endif
 
 		default:
 			switch (cmd->opcode) {
 #ifndef INET6
 			case O_IP6_SRC_ME:
 			case O_IP6_DST_ME:
 			case O_EXT_HDR:
 			case O_IP6:
 			case O_UNREACH6:
 			case O_IP6_SRC:
 			case O_IP6_DST:
 			case O_FLOW6ID:
 			case O_IP6_SRC_MASK:
 			case O_IP6_DST_MASK:
 			case O_ICMP6TYPE:
 				printf("ipfw: no IPv6 support in kernel\n");
 				return (EPROTONOSUPPORT);
 #endif
 			default:
 				printf("ipfw: opcode %d, unknown opcode\n",
 					cmd->opcode);
 				return (EINVAL);
 			}
 		}
 	}
 	if (have_action == 0) {
 		printf("ipfw: missing action\n");
 		return (EINVAL);
 	}
 	return 0;
 
 bad_size:
 	printf("ipfw: opcode %d size %d wrong\n",
 		cmd->opcode, cmdlen);
 	return (EINVAL);
 }
 
 
 /*
  * Translation of requests for compatibility with FreeBSD 7.2/8.
  * a static variable tells us if we have an old client from userland,
  * and if necessary we translate requests and responses between the
  * two formats.
  */
 static int is7 = 0;
 
 struct ip_fw7 {
 	struct ip_fw7	*next;		/* linked list of rules     */
 	struct ip_fw7	*next_rule;	/* ptr to next [skipto] rule    */
 	/* 'next_rule' is used to pass up 'set_disable' status      */
 
 	uint16_t	act_ofs;	/* offset of action in 32-bit units */
 	uint16_t	cmd_len;	/* # of 32-bit words in cmd */
 	uint16_t	rulenum;	/* rule number          */
 	uint8_t		set;		/* rule set (0..31)     */
 	// #define RESVD_SET   31  /* set for default and persistent rules */
 	uint8_t		_pad;		/* padding          */
 	// uint32_t        id;             /* rule id, only in v.8 */
 	/* These fields are present in all rules.           */
 	uint64_t	pcnt;		/* Packet counter       */
 	uint64_t	bcnt;		/* Byte counter         */
 	uint32_t	timestamp;	/* tv_sec of last match     */
 
 	ipfw_insn	cmd[1];		/* storage for commands     */
 };
 
 static int convert_rule_to_7(struct ip_fw_rule0 *rule);
 static int convert_rule_to_8(struct ip_fw_rule0 *rule);
 
 #ifndef RULESIZE7
 #define RULESIZE7(rule)  (sizeof(struct ip_fw7) + \
 	((struct ip_fw7 *)(rule))->cmd_len * 4 - 4)
 #endif
 
 
 /*
  * Copy the static and dynamic rules to the supplied buffer
  * and return the amount of space actually used.
  * Must be run under IPFW_UH_RLOCK
  */
 static size_t
 ipfw_getrules(struct ip_fw_chain *chain, void *buf, size_t space)
 {
 	char *bp = buf;
 	char *ep = bp + space;
 	struct ip_fw *rule;
 	struct ip_fw_rule0 *dst;
 	struct timeval boottime;
 	int error, i, l, warnflag;
 	time_t	boot_seconds;
 
 	warnflag = 0;
 
 	getboottime(&boottime);
         boot_seconds = boottime.tv_sec;
 	for (i = 0; i < chain->n_rules; i++) {
 		rule = chain->map[i];
 
 		if (is7) {
 		    /* Convert rule to FreeBSd 7.2 format */
 		    l = RULESIZE7(rule);
 		    if (bp + l + sizeof(uint32_t) <= ep) {
 			bcopy(rule, bp, l + sizeof(uint32_t));
 			error = set_legacy_obj_kidx(chain,
 			    (struct ip_fw_rule0 *)bp);
 			if (error != 0)
 				return (0);
 			error = convert_rule_to_7((struct ip_fw_rule0 *) bp);
 			if (error)
 				return 0; /*XXX correct? */
 			/*
 			 * XXX HACK. Store the disable mask in the "next"
 			 * pointer in a wild attempt to keep the ABI the same.
 			 * Why do we do this on EVERY rule?
 			 */
 			bcopy(&V_set_disable,
 				&(((struct ip_fw7 *)bp)->next_rule),
 				sizeof(V_set_disable));
 			if (((struct ip_fw7 *)bp)->timestamp)
 			    ((struct ip_fw7 *)bp)->timestamp += boot_seconds;
 			bp += l;
 		    }
 		    continue; /* go to next rule */
 		}
 
 		l = RULEUSIZE0(rule);
 		if (bp + l > ep) { /* should not happen */
 			printf("overflow dumping static rules\n");
 			break;
 		}
 		dst = (struct ip_fw_rule0 *)bp;
 		export_rule0(rule, dst, l);
 		error = set_legacy_obj_kidx(chain, dst);
 
 		/*
 		 * XXX HACK. Store the disable mask in the "next"
 		 * pointer in a wild attempt to keep the ABI the same.
 		 * Why do we do this on EVERY rule?
 		 *
 		 * XXX: "ipfw set show" (ab)uses IP_FW_GET to read disabled mask
 		 * so we need to fail _after_ saving at least one mask.
 		 */
 		bcopy(&V_set_disable, &dst->next_rule, sizeof(V_set_disable));
 		if (dst->timestamp)
 			dst->timestamp += boot_seconds;
 		bp += l;
 
 		if (error != 0) {
 			if (error == 2) {
 				/* Non-fatal table rewrite error. */
 				warnflag = 1;
 				continue;
 			}
 			printf("Stop on rule %d. Fail to convert table\n",
 			    rule->rulenum);
 			break;
 		}
 	}
 	if (warnflag != 0)
 		printf("ipfw: process %s is using legacy interfaces,"
 		    " consider rebuilding\n", "");
 	ipfw_get_dynamic(chain, &bp, ep); /* protected by the dynamic lock */
 	return (bp - (char *)buf);
 }
 
 
 struct dump_args {
 	uint32_t	b;	/* start rule */
 	uint32_t	e;	/* end rule */
 	uint32_t	rcount;	/* number of rules */
 	uint32_t	rsize;	/* rules size */
 	uint32_t	tcount;	/* number of tables */
 	int		rcounters;	/* counters */
 	uint32_t	*bmask;	/* index bitmask of used named objects */
 };
 
 void
 ipfw_export_obj_ntlv(struct named_object *no, ipfw_obj_ntlv *ntlv)
 {
 
 	ntlv->head.type = no->etlv;
 	ntlv->head.length = sizeof(*ntlv);
 	ntlv->idx = no->kidx;
 	strlcpy(ntlv->name, no->name, sizeof(ntlv->name));
 }
 
 /*
  * Export named object info in instance @ni, identified by @kidx
  * to ipfw_obj_ntlv. TLV is allocated from @sd space.
  *
  * Returns 0 on success.
  */
 static int
 export_objhash_ntlv(struct namedobj_instance *ni, uint16_t kidx,
     struct sockopt_data *sd)
 {
 	struct named_object *no;
 	ipfw_obj_ntlv *ntlv;
 
 	no = ipfw_objhash_lookup_kidx(ni, kidx);
 	KASSERT(no != NULL, ("invalid object kernel index passed"));
 
 	ntlv = (ipfw_obj_ntlv *)ipfw_get_sopt_space(sd, sizeof(*ntlv));
 	if (ntlv == NULL)
 		return (ENOMEM);
 
 	ipfw_export_obj_ntlv(no, ntlv);
 	return (0);
 }
 
 static int
 export_named_objects(struct namedobj_instance *ni, struct dump_args *da,
     struct sockopt_data *sd)
 {
 	int error, i;
 
 	for (i = 0; i < IPFW_TABLES_MAX && da->tcount > 0; i++) {
 		if ((da->bmask[i / 32] & (1 << (i % 32))) == 0)
 			continue;
 		if ((error = export_objhash_ntlv(ni, i, sd)) != 0)
 			return (error);
 		da->tcount--;
 	}
 	return (0);
 }
 
 static int
 dump_named_objects(struct ip_fw_chain *ch, struct dump_args *da,
     struct sockopt_data *sd)
 {
 	ipfw_obj_ctlv *ctlv;
 	int error;
 
 	MPASS(da->tcount > 0);
 	/* Header first */
 	ctlv = (ipfw_obj_ctlv *)ipfw_get_sopt_space(sd, sizeof(*ctlv));
 	if (ctlv == NULL)
 		return (ENOMEM);
 	ctlv->head.type = IPFW_TLV_TBLNAME_LIST;
 	ctlv->head.length = da->tcount * sizeof(ipfw_obj_ntlv) +
 	    sizeof(*ctlv);
 	ctlv->count = da->tcount;
 	ctlv->objsize = sizeof(ipfw_obj_ntlv);
 
 	/* Dump table names first (if any) */
 	error = export_named_objects(ipfw_get_table_objhash(ch), da, sd);
 	if (error != 0)
 		return (error);
 	/* Then dump another named objects */
 	da->bmask += IPFW_TABLES_MAX / 32;
 	return (export_named_objects(CHAIN_TO_SRV(ch), da, sd));
 }
 
 /*
  * Dumps static rules with table TLVs in buffer @sd.
  *
  * Returns 0 on success.
  */
 static int
 dump_static_rules(struct ip_fw_chain *chain, struct dump_args *da,
     struct sockopt_data *sd)
 {
 	ipfw_obj_ctlv *ctlv;
 	struct ip_fw *krule;
 	caddr_t dst;
 	int i, l;
 
 	/* Dump rules */
 	ctlv = (ipfw_obj_ctlv *)ipfw_get_sopt_space(sd, sizeof(*ctlv));
 	if (ctlv == NULL)
 		return (ENOMEM);
 	ctlv->head.type = IPFW_TLV_RULE_LIST;
 	ctlv->head.length = da->rsize + sizeof(*ctlv);
 	ctlv->count = da->rcount;
 
 	for (i = da->b; i < da->e; i++) {
 		krule = chain->map[i];
 
 		l = RULEUSIZE1(krule) + sizeof(ipfw_obj_tlv);
 		if (da->rcounters != 0)
 			l += sizeof(struct ip_fw_bcounter);
 		dst = (caddr_t)ipfw_get_sopt_space(sd, l);
 		if (dst == NULL)
 			return (ENOMEM);
 
 		export_rule1(krule, dst, l, da->rcounters);
 	}
 
 	return (0);
 }
 
 int
 ipfw_mark_object_kidx(uint32_t *bmask, uint16_t etlv, uint16_t kidx)
 {
 	uint32_t bidx;
 
 	/*
 	 * Maintain separate bitmasks for table and non-table objects.
 	 */
 	bidx = (etlv == IPFW_TLV_TBL_NAME) ? 0: IPFW_TABLES_MAX / 32;
 	bidx += kidx / 32;
 	if ((bmask[bidx] & (1 << (kidx % 32))) != 0)
 		return (0);
 
 	bmask[bidx] |= 1 << (kidx % 32);
 	return (1);
 }
 
 /*
  * Marks every object index used in @rule with bit in @bmask.
  * Used to generate bitmask of referenced tables/objects for given ruleset
  * or its part.
  */
 static void
 mark_rule_objects(struct ip_fw_chain *ch, struct ip_fw *rule,
     struct dump_args *da)
 {
 	struct opcode_obj_rewrite *rw;
 	ipfw_insn *cmd;
 	int cmdlen, l;
 	uint16_t kidx;
 	uint8_t subtype;
 
 	l = rule->cmd_len;
 	cmd = rule->cmd;
 	cmdlen = 0;
 	for ( ;	l > 0 ; l -= cmdlen, cmd += cmdlen) {
 		cmdlen = F_LEN(cmd);
 
 		rw = find_op_rw(cmd, &kidx, &subtype);
 		if (rw == NULL)
 			continue;
 
 		if (ipfw_mark_object_kidx(da->bmask, rw->etlv, kidx))
 			da->tcount++;
 	}
 }
 
 /*
  * Dumps requested objects data
  * Data layout (version 0)(current):
  * Request: [ ipfw_cfg_lheader ] + IPFW_CFG_GET_* flags
  *   size = ipfw_cfg_lheader.size
  * Reply: [ ipfw_cfg_lheader 
  *   [ ipfw_obj_ctlv(IPFW_TLV_TBL_LIST) ipfw_obj_ntlv x N ] (optional)
  *   [ ipfw_obj_ctlv(IPFW_TLV_RULE_LIST)
  *     ipfw_obj_tlv(IPFW_TLV_RULE_ENT) [ ip_fw_bcounter (optional) ip_fw_rule ]
  *   ] (optional)
  *   [ ipfw_obj_ctlv(IPFW_TLV_STATE_LIST) ipfw_obj_dyntlv x N ] (optional)
  * ]
  * * NOTE IPFW_TLV_STATE_LIST has the single valid field: objsize.
  * The rest (size, count) are set to zero and needs to be ignored.
  *
  * Returns 0 on success.
  */
 static int
 dump_config(struct ip_fw_chain *chain, ip_fw3_opheader *op3,
     struct sockopt_data *sd)
 {
 	struct dump_args da;
 	ipfw_cfg_lheader *hdr;
 	struct ip_fw *rule;
 	size_t sz, rnum;
 	uint32_t hdr_flags, *bmask;
 	int error, i;
 
 	hdr = (ipfw_cfg_lheader *)ipfw_get_sopt_header(sd, sizeof(*hdr));
 	if (hdr == NULL)
 		return (EINVAL);
 
 	error = 0;
 	bmask = NULL;
 	memset(&da, 0, sizeof(da));
 	/*
 	 * Allocate needed state.
 	 * Note we allocate 2xspace mask, for table & srv
 	 */
 	if (hdr->flags & (IPFW_CFG_GET_STATIC | IPFW_CFG_GET_STATES))
 		da.bmask = bmask = malloc(
 		    sizeof(uint32_t) * IPFW_TABLES_MAX * 2 / 32, M_TEMP,
 		    M_WAITOK | M_ZERO);
 	IPFW_UH_RLOCK(chain);
 
 	/*
 	 * STAGE 1: Determine size/count for objects in range.
 	 * Prepare used tables bitmask.
 	 */
 	sz = sizeof(ipfw_cfg_lheader);
 	da.e = chain->n_rules;
 
 	if (hdr->end_rule != 0) {
 		/* Handle custom range */
 		if ((rnum = hdr->start_rule) > IPFW_DEFAULT_RULE)
 			rnum = IPFW_DEFAULT_RULE;
 		da.b = ipfw_find_rule(chain, rnum, 0);
 		rnum = (hdr->end_rule < IPFW_DEFAULT_RULE) ?
 		    hdr->end_rule + 1: IPFW_DEFAULT_RULE;
 		da.e = ipfw_find_rule(chain, rnum, UINT32_MAX) + 1;
 	}
 
 	if (hdr->flags & IPFW_CFG_GET_STATIC) {
 		for (i = da.b; i < da.e; i++) {
 			rule = chain->map[i];
 			da.rsize += RULEUSIZE1(rule) + sizeof(ipfw_obj_tlv);
 			da.rcount++;
 			/* Update bitmask of used objects for given range */
 			mark_rule_objects(chain, rule, &da);
 		}
 		/* Add counters if requested */
 		if (hdr->flags & IPFW_CFG_GET_COUNTERS) {
 			da.rsize += sizeof(struct ip_fw_bcounter) * da.rcount;
 			da.rcounters = 1;
 		}
 		sz += da.rsize + sizeof(ipfw_obj_ctlv);
 	}
 
 	if (hdr->flags & IPFW_CFG_GET_STATES) {
 		sz += sizeof(ipfw_obj_ctlv) +
 		    ipfw_dyn_get_count(bmask, &i) * sizeof(ipfw_obj_dyntlv);
 		da.tcount += i;
 	}
 
 	if (da.tcount > 0)
 		sz += da.tcount * sizeof(ipfw_obj_ntlv) +
 		    sizeof(ipfw_obj_ctlv);
 
 	/*
 	 * Fill header anyway.
 	 * Note we have to save header fields to stable storage
 	 * buffer inside @sd can be flushed after dumping rules
 	 */
 	hdr->size = sz;
 	hdr->set_mask = ~V_set_disable;
 	hdr_flags = hdr->flags;
 	hdr = NULL;
 
 	if (sd->valsize < sz) {
 		error = ENOMEM;
 		goto cleanup;
 	}
 
 	/* STAGE2: Store actual data */
 	if (da.tcount > 0) {
 		error = dump_named_objects(chain, &da, sd);
 		if (error != 0)
 			goto cleanup;
 	}
 
 	if (hdr_flags & IPFW_CFG_GET_STATIC) {
 		error = dump_static_rules(chain, &da, sd);
 		if (error != 0)
 			goto cleanup;
 	}
 
 	if (hdr_flags & IPFW_CFG_GET_STATES)
 		error = ipfw_dump_states(chain, sd);
 
 cleanup:
 	IPFW_UH_RUNLOCK(chain);
 
 	if (bmask != NULL)
 		free(bmask, M_TEMP);
 
 	return (error);
 }
 
 int
 ipfw_check_object_name_generic(const char *name)
 {
 	int nsize;
 
 	nsize = sizeof(((ipfw_obj_ntlv *)0)->name);
 	if (strnlen(name, nsize) == nsize)
 		return (EINVAL);
 	if (name[0] == '\0')
 		return (EINVAL);
 	return (0);
 }
 
 /*
  * Creates non-existent objects referenced by rule.
  *
  * Return 0 on success.
  */
 int
 create_objects_compat(struct ip_fw_chain *ch, ipfw_insn *cmd,
     struct obj_idx *oib, struct obj_idx *pidx, struct tid_info *ti)
 {
 	struct opcode_obj_rewrite *rw;
 	struct obj_idx *p;
 	uint16_t kidx;
 	int error;
 
 	/*
 	 * Compatibility stuff: do actual creation for non-existing,
 	 * but referenced objects.
 	 */
 	for (p = oib; p < pidx; p++) {
 		if (p->kidx != 0)
 			continue;
 
 		ti->uidx = p->uidx;
 		ti->type = p->type;
 		ti->atype = 0;
 
 		rw = find_op_rw(cmd + p->off, NULL, NULL);
 		KASSERT(rw != NULL, ("Unable to find handler for op %d",
 		    (cmd + p->off)->opcode));
 
 		if (rw->create_object == NULL)
 			error = EOPNOTSUPP;
 		else
 			error = rw->create_object(ch, ti, &kidx);
 		if (error == 0) {
 			p->kidx = kidx;
 			continue;
 		}
 
 		/*
 		 * Error happened. We have to rollback everything.
 		 * Drop all already acquired references.
 		 */
 		IPFW_UH_WLOCK(ch);
 		unref_oib_objects(ch, cmd, oib, pidx);
 		IPFW_UH_WUNLOCK(ch);
 
 		return (error);
 	}
 
 	return (0);
 }
 
 /*
  * Compatibility function for old ipfw(8) binaries.
  * Rewrites table/nat kernel indices with userland ones.
  * Convert tables matching '/^\d+$/' to their atoi() value.
  * Use number 65535 for other tables.
  *
  * Returns 0 on success.
  */
 static int
 set_legacy_obj_kidx(struct ip_fw_chain *ch, struct ip_fw_rule0 *rule)
 {
 	struct opcode_obj_rewrite *rw;
 	struct named_object *no;
 	ipfw_insn *cmd;
 	char *end;
 	long val;
 	int cmdlen, error, l;
 	uint16_t kidx, uidx;
 	uint8_t subtype;
 
 	error = 0;
 
 	l = rule->cmd_len;
 	cmd = rule->cmd;
 	cmdlen = 0;
 	for ( ;	l > 0 ; l -= cmdlen, cmd += cmdlen) {
 		cmdlen = F_LEN(cmd);
 
 		/* Check if is index in given opcode */
 		rw = find_op_rw(cmd, &kidx, &subtype);
 		if (rw == NULL)
 			continue;
 
 		/* Try to find referenced kernel object */
 		no = rw->find_bykidx(ch, kidx);
 		if (no == NULL)
 			continue;
 
 		val = strtol(no->name, &end, 10);
 		if (*end == '\0' && val < 65535) {
 			uidx = val;
 		} else {
 
 			/*
 			 * We are called via legacy opcode.
 			 * Save error and show table as fake number
 			 * not to make ipfw(8) hang.
 			 */
 			uidx = 65535;
 			error = 2;
 		}
 
 		rw->update(cmd, uidx);
 	}
 
 	return (error);
 }
 
 
 /*
  * Unreferences all already-referenced objects in given @cmd rule,
  * using information in @oib.
  *
  * Used to rollback partially converted rule on error.
  */
 static void
 unref_oib_objects(struct ip_fw_chain *ch, ipfw_insn *cmd, struct obj_idx *oib,
     struct obj_idx *end)
 {
 	struct opcode_obj_rewrite *rw;
 	struct named_object *no;
 	struct obj_idx *p;
 
 	IPFW_UH_WLOCK_ASSERT(ch);
 
 	for (p = oib; p < end; p++) {
 		if (p->kidx == 0)
 			continue;
 
 		rw = find_op_rw(cmd + p->off, NULL, NULL);
 		KASSERT(rw != NULL, ("Unable to find handler for op %d",
 		    (cmd + p->off)->opcode));
 
 		/* Find & unref by existing idx */
 		no = rw->find_bykidx(ch, p->kidx);
 		KASSERT(no != NULL, ("Ref'd object %d disappeared", p->kidx));
 		no->refcnt--;
 	}
 }
 
 /*
  * Remove references from every object used in @rule.
  * Used at rule removal code.
  */
 static void
 unref_rule_objects(struct ip_fw_chain *ch, struct ip_fw *rule)
 {
 	struct opcode_obj_rewrite *rw;
 	struct named_object *no;
 	ipfw_insn *cmd;
 	int cmdlen, l;
 	uint16_t kidx;
 	uint8_t subtype;
 
 	IPFW_UH_WLOCK_ASSERT(ch);
 
 	l = rule->cmd_len;
 	cmd = rule->cmd;
 	cmdlen = 0;
 	for ( ;	l > 0 ; l -= cmdlen, cmd += cmdlen) {
 		cmdlen = F_LEN(cmd);
 
 		rw = find_op_rw(cmd, &kidx, &subtype);
 		if (rw == NULL)
 			continue;
 		no = rw->find_bykidx(ch, kidx);
 
 		KASSERT(no != NULL, ("object id %d not found", kidx));
 		KASSERT(no->subtype == subtype,
 		    ("wrong type %d (%d) for object id %d",
 		    no->subtype, subtype, kidx));
 		KASSERT(no->refcnt > 0, ("refcount for object %d is %d",
 		    kidx, no->refcnt));
 
 		if (no->refcnt == 1 && rw->destroy_object != NULL)
 			rw->destroy_object(ch, no);
 		else
 			no->refcnt--;
 	}
 }
 
 
 /*
  * Find and reference object (if any) stored in instruction @cmd.
  *
  * Saves object info in @pidx, sets
  *  - @unresolved to 1 if object should exists but not found
  *
  * Returns non-zero value in case of error.
  */
 static int
 ref_opcode_object(struct ip_fw_chain *ch, ipfw_insn *cmd, struct tid_info *ti,
     struct obj_idx *pidx, int *unresolved)
 {
 	struct named_object *no;
 	struct opcode_obj_rewrite *rw;
 	int error;
 
 	/* Check if this opcode is candidate for rewrite */
 	rw = find_op_rw(cmd, &ti->uidx, &ti->type);
 	if (rw == NULL)
 		return (0);
 
 	/* Need to rewrite. Save necessary fields */
 	pidx->uidx = ti->uidx;
 	pidx->type = ti->type;
 
 	/* Try to find referenced kernel object */
 	error = rw->find_byname(ch, ti, &no);
 	if (error != 0)
 		return (error);
 	if (no == NULL) {
 		/*
 		 * Report about unresolved object for automaic
 		 * creation.
 		 */
 		*unresolved = 1;
 		return (0);
 	}
 
 	/*
 	 * Object is already exist.
 	 * Its subtype should match with expected value.
 	 */
 	if (ti->type != no->subtype)
 		return (EINVAL);
 
 	/* Bump refcount and update kidx. */
 	no->refcnt++;
 	rw->update(cmd, no->kidx);
 	return (0);
 }
 
 /*
  * Finds and bumps refcount for objects referenced by given @rule.
  * Auto-creates non-existing tables.
  * Fills in @oib array with userland/kernel indexes.
  *
  * Returns 0 on success.
  */
 static int
 ref_rule_objects(struct ip_fw_chain *ch, struct ip_fw *rule,
     struct rule_check_info *ci, struct obj_idx *oib, struct tid_info *ti)
 {
 	struct obj_idx *pidx;
 	ipfw_insn *cmd;
 	int cmdlen, error, l, unresolved;
 
 	pidx = oib;
 	l = rule->cmd_len;
 	cmd = rule->cmd;
 	cmdlen = 0;
 	error = 0;
 
 	IPFW_UH_WLOCK(ch);
 
 	/* Increase refcount on each existing referenced table. */
 	for ( ;	l > 0 ; l -= cmdlen, cmd += cmdlen) {
 		cmdlen = F_LEN(cmd);
 		unresolved = 0;
 
 		error = ref_opcode_object(ch, cmd, ti, pidx, &unresolved);
 		if (error != 0)
 			break;
 		/*
 		 * Compatibility stuff for old clients:
 		 * prepare to automaitcally create non-existing objects.
 		 */
 		if (unresolved != 0) {
 			pidx->off = rule->cmd_len - l;
 			pidx++;
 		}
 	}
 
 	if (error != 0) {
 		/* Unref everything we have already done */
 		unref_oib_objects(ch, rule->cmd, oib, pidx);
 		IPFW_UH_WUNLOCK(ch);
 		return (error);
 	}
 	IPFW_UH_WUNLOCK(ch);
 
 	/* Perform auto-creation for non-existing objects */
 	if (pidx != oib)
 		error = create_objects_compat(ch, rule->cmd, oib, pidx, ti);
 
 	/* Calculate real number of dynamic objects */
 	ci->object_opcodes = (uint16_t)(pidx - oib);
 
 	return (error);
 }
 
 /*
  * Checks is opcode is referencing table of appropriate type.
  * Adds reference count for found table if true.
  * Rewrites user-supplied opcode values with kernel ones.
  *
  * Returns 0 on success and appropriate error code otherwise.
  */
 static int
 rewrite_rule_uidx(struct ip_fw_chain *chain, struct rule_check_info *ci)
 {
 	int error;
 	ipfw_insn *cmd;
 	uint8_t type;
 	struct obj_idx *p, *pidx_first, *pidx_last;
 	struct tid_info ti;
 
 	/*
 	 * Prepare an array for storing opcode indices.
 	 * Use stack allocation by default.
 	 */
 	if (ci->object_opcodes <= (sizeof(ci->obuf)/sizeof(ci->obuf[0]))) {
 		/* Stack */
 		pidx_first = ci->obuf;
 	} else
 		pidx_first = malloc(
 		    ci->object_opcodes * sizeof(struct obj_idx),
 		    M_IPFW, M_WAITOK | M_ZERO);
 
 	error = 0;
 	type = 0;
 	memset(&ti, 0, sizeof(ti));
 
 	/* Use set rule is assigned to. */
 	ti.set = ci->krule->set;
 	if (ci->ctlv != NULL) {
 		ti.tlvs = (void *)(ci->ctlv + 1);
 		ti.tlen = ci->ctlv->head.length - sizeof(ipfw_obj_ctlv);
 	}
 
 	/* Reference all used tables and other objects */
 	error = ref_rule_objects(chain, ci->krule, ci, pidx_first, &ti);
 	if (error != 0)
 		goto free;
 	/*
 	 * Note that ref_rule_objects() might have updated ci->object_opcodes
 	 * to reflect actual number of object opcodes.
 	 */
 
 	/* Perform rewrite of remaining opcodes */
 	p = pidx_first;
 	pidx_last = pidx_first + ci->object_opcodes;
 	for (p = pidx_first; p < pidx_last; p++) {
 		cmd = ci->krule->cmd + p->off;
 		update_opcode_kidx(cmd, p->kidx);
 	}
 
 free:
 	if (pidx_first != ci->obuf)
 		free(pidx_first, M_IPFW);
 
 	return (error);
 }
 
 /*
  * Adds one or more rules to ipfw @chain.
  * Data layout (version 0)(current):
  * Request:
  * [
  *   ip_fw3_opheader
  *   [ ipfw_obj_ctlv(IPFW_TLV_TBL_LIST) ipfw_obj_ntlv x N ] (optional *1)
  *   [ ipfw_obj_ctlv(IPFW_TLV_RULE_LIST) ip_fw x N ] (*2) (*3)
  * ]
  * Reply:
  * [
  *   ip_fw3_opheader
  *   [ ipfw_obj_ctlv(IPFW_TLV_TBL_LIST) ipfw_obj_ntlv x N ] (optional)
  *   [ ipfw_obj_ctlv(IPFW_TLV_RULE_LIST) ip_fw x N ]
  * ]
  *
  * Rules in reply are modified to store their actual ruleset number.
  *
  * (*1) TLVs inside IPFW_TLV_TBL_LIST needs to be sorted ascending
  * according to their idx field and there has to be no duplicates.
  * (*2) Numbered rules inside IPFW_TLV_RULE_LIST needs to be sorted ascending.
  * (*3) Each ip_fw structure needs to be aligned to u64 boundary.
  *
  * Returns 0 on success.
  */
 static int
 add_rules(struct ip_fw_chain *chain, ip_fw3_opheader *op3,
     struct sockopt_data *sd)
 {
 	ipfw_obj_ctlv *ctlv, *rtlv, *tstate;
 	ipfw_obj_ntlv *ntlv;
 	int clen, error, idx;
 	uint32_t count, read;
 	struct ip_fw_rule *r;
 	struct rule_check_info rci, *ci, *cbuf;
 	int i, rsize;
 
 	op3 = (ip_fw3_opheader *)ipfw_get_sopt_space(sd, sd->valsize);
 	ctlv = (ipfw_obj_ctlv *)(op3 + 1);
 
 	read = sizeof(ip_fw3_opheader);
 	rtlv = NULL;
 	tstate = NULL;
 	cbuf = NULL;
 	memset(&rci, 0, sizeof(struct rule_check_info));
 
 	if (read + sizeof(*ctlv) > sd->valsize)
 		return (EINVAL);
 
 	if (ctlv->head.type == IPFW_TLV_TBLNAME_LIST) {
 		clen = ctlv->head.length;
 		/* Check size and alignment */
 		if (clen > sd->valsize || clen < sizeof(*ctlv))
 			return (EINVAL);
 		if ((clen % sizeof(uint64_t)) != 0)
 			return (EINVAL);
 
 		/*
 		 * Some table names or other named objects.
 		 * Check for validness.
 		 */
 		count = (ctlv->head.length - sizeof(*ctlv)) / sizeof(*ntlv);
 		if (ctlv->count != count || ctlv->objsize != sizeof(*ntlv))
 			return (EINVAL);
 
 		/*
 		 * Check each TLV.
 		 * Ensure TLVs are sorted ascending and
 		 * there are no duplicates.
 		 */
 		idx = -1;
 		ntlv = (ipfw_obj_ntlv *)(ctlv + 1);
 		while (count > 0) {
 			if (ntlv->head.length != sizeof(ipfw_obj_ntlv))
 				return (EINVAL);
 
 			error = ipfw_check_object_name_generic(ntlv->name);
 			if (error != 0)
 				return (error);
 
 			if (ntlv->idx <= idx)
 				return (EINVAL);
 
 			idx = ntlv->idx;
 			count--;
 			ntlv++;
 		}
 
 		tstate = ctlv;
 		read += ctlv->head.length;
 		ctlv = (ipfw_obj_ctlv *)((caddr_t)ctlv + ctlv->head.length);
 	}
 
 	if (read + sizeof(*ctlv) > sd->valsize)
 		return (EINVAL);
 
 	if (ctlv->head.type == IPFW_TLV_RULE_LIST) {
 		clen = ctlv->head.length;
 		if (clen + read > sd->valsize || clen < sizeof(*ctlv))
 			return (EINVAL);
 		if ((clen % sizeof(uint64_t)) != 0)
 			return (EINVAL);
 
 		/*
 		 * TODO: Permit adding multiple rules at once
 		 */
 		if (ctlv->count != 1)
 			return (ENOTSUP);
 
 		clen -= sizeof(*ctlv);
 
 		if (ctlv->count > clen / sizeof(struct ip_fw_rule))
 			return (EINVAL);
 
 		/* Allocate state for each rule or use stack */
 		if (ctlv->count == 1) {
 			memset(&rci, 0, sizeof(struct rule_check_info));
 			cbuf = &rci;
 		} else
 			cbuf = malloc(ctlv->count * sizeof(*ci), M_TEMP,
 			    M_WAITOK | M_ZERO);
 		ci = cbuf;
 
 		/*
 		 * Check each rule for validness.
 		 * Ensure numbered rules are sorted ascending
 		 * and properly aligned
 		 */
 		idx = 0;
 		r = (struct ip_fw_rule *)(ctlv + 1);
 		count = 0;
 		error = 0;
 		while (clen > 0) {
 			rsize = roundup2(RULESIZE(r), sizeof(uint64_t));
 			if (rsize > clen || ctlv->count <= count) {
 				error = EINVAL;
 				break;
 			}
 
 			ci->ctlv = tstate;
 			error = check_ipfw_rule1(r, rsize, ci);
 			if (error != 0)
 				break;
 
 			/* Check sorting */
 			if (r->rulenum != 0 && r->rulenum < idx) {
 				printf("rulenum %d idx %d\n", r->rulenum, idx);
 				error = EINVAL;
 				break;
 			}
 			idx = r->rulenum;
 
 			ci->urule = (caddr_t)r;
 
 			rsize = roundup2(rsize, sizeof(uint64_t));
 			clen -= rsize;
 			r = (struct ip_fw_rule *)((caddr_t)r + rsize);
 			count++;
 			ci++;
 		}
 
 		if (ctlv->count != count || error != 0) {
 			if (cbuf != &rci)
 				free(cbuf, M_TEMP);
 			return (EINVAL);
 		}
 
 		rtlv = ctlv;
 		read += ctlv->head.length;
 		ctlv = (ipfw_obj_ctlv *)((caddr_t)ctlv + ctlv->head.length);
 	}
 
 	if (read != sd->valsize || rtlv == NULL || rtlv->count == 0) {
 		if (cbuf != NULL && cbuf != &rci)
 			free(cbuf, M_TEMP);
 		return (EINVAL);
 	}
 
 	/*
 	 * Passed rules seems to be valid.
 	 * Allocate storage and try to add them to chain.
 	 */
 	for (i = 0, ci = cbuf; i < rtlv->count; i++, ci++) {
 		clen = RULEKSIZE1((struct ip_fw_rule *)ci->urule);
 		ci->krule = ipfw_alloc_rule(chain, clen);
 		import_rule1(ci);
 	}
 
 	if ((error = commit_rules(chain, cbuf, rtlv->count)) != 0) {
 		/* Free allocate krules */
 		for (i = 0, ci = cbuf; i < rtlv->count; i++, ci++)
 			ipfw_free_rule(ci->krule);
 	}
 
 	if (cbuf != NULL && cbuf != &rci)
 		free(cbuf, M_TEMP);
 
 	return (error);
 }
 
 /*
  * Lists all sopts currently registered.
  * Data layout (v0)(current):
  * Request: [ ipfw_obj_lheader ], size = ipfw_obj_lheader.size
  * Reply: [ ipfw_obj_lheader ipfw_sopt_info x N ]
  *
  * Returns 0 on success
  */
 static int
 dump_soptcodes(struct ip_fw_chain *chain, ip_fw3_opheader *op3,
     struct sockopt_data *sd)
 {
 	struct _ipfw_obj_lheader *olh;
 	ipfw_sopt_info *i;
 	struct ipfw_sopt_handler *sh;
 	uint32_t count, n, size;
 
 	olh = (struct _ipfw_obj_lheader *)ipfw_get_sopt_header(sd,sizeof(*olh));
 	if (olh == NULL)
 		return (EINVAL);
 	if (sd->valsize < olh->size)
 		return (EINVAL);
 
 	CTL3_LOCK();
 	count = ctl3_hsize;
 	size = count * sizeof(ipfw_sopt_info) + sizeof(ipfw_obj_lheader);
 
 	/* Fill in header regadless of buffer size */
 	olh->count = count;
 	olh->objsize = sizeof(ipfw_sopt_info);
 
 	if (size > olh->size) {
 		olh->size = size;
 		CTL3_UNLOCK();
 		return (ENOMEM);
 	}
 	olh->size = size;
 
 	for (n = 1; n <= count; n++) {
 		i = (ipfw_sopt_info *)ipfw_get_sopt_space(sd, sizeof(*i));
 		KASSERT(i != NULL, ("previously checked buffer is not enough"));
 		sh = &ctl3_handlers[n];
 		i->opcode = sh->opcode;
 		i->version = sh->version;
 		i->refcnt = sh->refcnt;
 	}
 	CTL3_UNLOCK();
 
 	return (0);
 }
 
 /*
  * Compares two opcodes.
  * Used both in qsort() and bsearch().
  *
  * Returns 0 if match is found.
  */
 static int
 compare_opcodes(const void *_a, const void *_b)
 {
 	const struct opcode_obj_rewrite *a, *b;
 
 	a = (const struct opcode_obj_rewrite *)_a;
 	b = (const struct opcode_obj_rewrite *)_b;
 
 	if (a->opcode < b->opcode)
 		return (-1);
 	else if (a->opcode > b->opcode)
 		return (1);
 
 	return (0);
 }
 
 /*
  * XXX: Rewrite bsearch()
  */
 static int
 find_op_rw_range(uint16_t op, struct opcode_obj_rewrite **plo,
     struct opcode_obj_rewrite **phi)
 {
 	struct opcode_obj_rewrite *ctl3_max, *lo, *hi, h, *rw;
 
 	memset(&h, 0, sizeof(h));
 	h.opcode = op;
 
 	rw = (struct opcode_obj_rewrite *)bsearch(&h, ctl3_rewriters,
 	    ctl3_rsize, sizeof(h), compare_opcodes);
 	if (rw == NULL)
 		return (1);
 
 	/* Find the first element matching the same opcode */
 	lo = rw;
 	for ( ; lo > ctl3_rewriters && (lo - 1)->opcode == op; lo--)
 		;
 
 	/* Find the last element matching the same opcode */
 	hi = rw;
 	ctl3_max = ctl3_rewriters + ctl3_rsize;
 	for ( ; (hi + 1) < ctl3_max && (hi + 1)->opcode == op; hi++)
 		;
 
 	*plo = lo;
 	*phi = hi;
 
 	return (0);
 }
 
 /*
  * Finds opcode object rewriter based on @code.
  *
  * Returns pointer to handler or NULL.
  */
 static struct opcode_obj_rewrite *
 find_op_rw(ipfw_insn *cmd, uint16_t *puidx, uint8_t *ptype)
 {
 	struct opcode_obj_rewrite *rw, *lo, *hi;
 	uint16_t uidx;
 	uint8_t subtype;
 
 	if (find_op_rw_range(cmd->opcode, &lo, &hi) != 0)
 		return (NULL);
 
 	for (rw = lo; rw <= hi; rw++) {
 		if (rw->classifier(cmd, &uidx, &subtype) == 0) {
 			if (puidx != NULL)
 				*puidx = uidx;
 			if (ptype != NULL)
 				*ptype = subtype;
 			return (rw);
 		}
 	}
 
 	return (NULL);
 }
 int
 classify_opcode_kidx(ipfw_insn *cmd, uint16_t *puidx)
 {
 
 	if (find_op_rw(cmd, puidx, NULL) == NULL)
 		return (1);
 	return (0);
 }
 
 void
 update_opcode_kidx(ipfw_insn *cmd, uint16_t idx)
 {
 	struct opcode_obj_rewrite *rw;
 
 	rw = find_op_rw(cmd, NULL, NULL);
 	KASSERT(rw != NULL, ("No handler to update opcode %d", cmd->opcode));
 	rw->update(cmd, idx);
 }
 
 void
 ipfw_init_obj_rewriter()
 {
 
 	ctl3_rewriters = NULL;
 	ctl3_rsize = 0;
 }
 
 void
 ipfw_destroy_obj_rewriter()
 {
 
 	if (ctl3_rewriters != NULL)
 		free(ctl3_rewriters, M_IPFW);
 	ctl3_rewriters = NULL;
 	ctl3_rsize = 0;
 }
 
 /*
  * Adds one or more opcode object rewrite handlers to the global array.
  * Function may sleep.
  */
 void
 ipfw_add_obj_rewriter(struct opcode_obj_rewrite *rw, size_t count)
 {
 	size_t sz;
 	struct opcode_obj_rewrite *tmp;
 
 	CTL3_LOCK();
 
 	for (;;) {
 		sz = ctl3_rsize + count;
 		CTL3_UNLOCK();
 		tmp = malloc(sizeof(*rw) * sz, M_IPFW, M_WAITOK | M_ZERO);
 		CTL3_LOCK();
 		if (ctl3_rsize + count <= sz)
 			break;
 
 		/* Retry */
 		free(tmp, M_IPFW);
 	}
 
 	/* Merge old & new arrays */
 	sz = ctl3_rsize + count;
 	memcpy(tmp, ctl3_rewriters, ctl3_rsize * sizeof(*rw));
 	memcpy(&tmp[ctl3_rsize], rw, count * sizeof(*rw));
 	qsort(tmp, sz, sizeof(*rw), compare_opcodes);
 	/* Switch new and free old */
 	if (ctl3_rewriters != NULL)
 		free(ctl3_rewriters, M_IPFW);
 	ctl3_rewriters = tmp;
 	ctl3_rsize = sz;
 
 	CTL3_UNLOCK();
 }
 
 /*
  * Removes one or more object rewrite handlers from the global array.
  */
 int
 ipfw_del_obj_rewriter(struct opcode_obj_rewrite *rw, size_t count)
 {
 	size_t sz;
 	struct opcode_obj_rewrite *ctl3_max, *ktmp, *lo, *hi;
 	int i;
 
 	CTL3_LOCK();
 
 	for (i = 0; i < count; i++) {
 		if (find_op_rw_range(rw[i].opcode, &lo, &hi) != 0)
 			continue;
 
 		for (ktmp = lo; ktmp <= hi; ktmp++) {
 			if (ktmp->classifier != rw[i].classifier)
 				continue;
 
 			ctl3_max = ctl3_rewriters + ctl3_rsize;
 			sz = (ctl3_max - (ktmp + 1)) * sizeof(*ktmp);
 			memmove(ktmp, ktmp + 1, sz);
 			ctl3_rsize--;
 			break;
 		}
 
 	}
 
 	if (ctl3_rsize == 0) {
 		if (ctl3_rewriters != NULL)
 			free(ctl3_rewriters, M_IPFW);
 		ctl3_rewriters = NULL;
 	}
 
 	CTL3_UNLOCK();
 
 	return (0);
 }
 
 static int
 export_objhash_ntlv_internal(struct namedobj_instance *ni,
     struct named_object *no, void *arg)
 {
 	struct sockopt_data *sd;
 	ipfw_obj_ntlv *ntlv;
 
 	sd = (struct sockopt_data *)arg;
 	ntlv = (ipfw_obj_ntlv *)ipfw_get_sopt_space(sd, sizeof(*ntlv));
 	if (ntlv == NULL)
 		return (ENOMEM);
 	ipfw_export_obj_ntlv(no, ntlv);
 	return (0);
 }
 
 /*
  * Lists all service objects.
  * Data layout (v0)(current):
  * Request: [ ipfw_obj_lheader ] size = ipfw_obj_lheader.size
  * Reply: [ ipfw_obj_lheader [ ipfw_obj_ntlv x N ] (optional) ]
  * Returns 0 on success
  */
 static int
 dump_srvobjects(struct ip_fw_chain *chain, ip_fw3_opheader *op3,
     struct sockopt_data *sd)
 {
 	ipfw_obj_lheader *hdr;
 	int count;
 
 	hdr = (ipfw_obj_lheader *)ipfw_get_sopt_header(sd, sizeof(*hdr));
 	if (hdr == NULL)
 		return (EINVAL);
 
 	IPFW_UH_RLOCK(chain);
 	count = ipfw_objhash_count(CHAIN_TO_SRV(chain));
 	hdr->size = sizeof(ipfw_obj_lheader) + count * sizeof(ipfw_obj_ntlv);
 	if (sd->valsize < hdr->size) {
 		IPFW_UH_RUNLOCK(chain);
 		return (ENOMEM);
 	}
 	hdr->count = count;
 	hdr->objsize = sizeof(ipfw_obj_ntlv);
 	if (count > 0)
 		ipfw_objhash_foreach(CHAIN_TO_SRV(chain),
 		    export_objhash_ntlv_internal, sd);
 	IPFW_UH_RUNLOCK(chain);
 	return (0);
 }
 
 /*
  * Compares two sopt handlers (code, version and handler ptr).
  * Used both as qsort() and bsearch().
  * Does not compare handler for latter case.
  *
  * Returns 0 if match is found.
  */
 static int
 compare_sh(const void *_a, const void *_b)
 {
 	const struct ipfw_sopt_handler *a, *b;
 
 	a = (const struct ipfw_sopt_handler *)_a;
 	b = (const struct ipfw_sopt_handler *)_b;
 
 	if (a->opcode < b->opcode)
 		return (-1);
 	else if (a->opcode > b->opcode)
 		return (1);
 
 	if (a->version < b->version)
 		return (-1);
 	else if (a->version > b->version)
 		return (1);
 
 	/* bsearch helper */
 	if (a->handler == NULL)
 		return (0);
 
 	if ((uintptr_t)a->handler < (uintptr_t)b->handler)
 		return (-1);
 	else if ((uintptr_t)a->handler > (uintptr_t)b->handler)
 		return (1);
 
 	return (0);
 }
 
 /*
  * Finds sopt handler based on @code and @version.
  *
  * Returns pointer to handler or NULL.
  */
 static struct ipfw_sopt_handler *
 find_sh(uint16_t code, uint8_t version, sopt_handler_f *handler)
 {
 	struct ipfw_sopt_handler *sh, h;
 
 	memset(&h, 0, sizeof(h));
 	h.opcode = code;
 	h.version = version;
 	h.handler = handler;
 
 	sh = (struct ipfw_sopt_handler *)bsearch(&h, ctl3_handlers,
 	    ctl3_hsize, sizeof(h), compare_sh);
 
 	return (sh);
 }
 
 static int
 find_ref_sh(uint16_t opcode, uint8_t version, struct ipfw_sopt_handler *psh)
 {
 	struct ipfw_sopt_handler *sh;
 
 	CTL3_LOCK();
 	if ((sh = find_sh(opcode, version, NULL)) == NULL) {
 		CTL3_UNLOCK();
 		printf("ipfw: ipfw_ctl3 invalid option %d""v""%d\n",
 		    opcode, version);
 		return (EINVAL);
 	}
 	sh->refcnt++;
 	ctl3_refct++;
 	/* Copy handler data to requested buffer */
 	*psh = *sh; 
 	CTL3_UNLOCK();
 
 	return (0);
 }
 
 static void
 find_unref_sh(struct ipfw_sopt_handler *psh)
 {
 	struct ipfw_sopt_handler *sh;
 
 	CTL3_LOCK();
 	sh = find_sh(psh->opcode, psh->version, NULL);
 	KASSERT(sh != NULL, ("ctl3 handler disappeared"));
 	sh->refcnt--;
 	ctl3_refct--;
 	CTL3_UNLOCK();
 }
 
 void
 ipfw_init_sopt_handler()
 {
 
 	CTL3_LOCK_INIT();
 	IPFW_ADD_SOPT_HANDLER(1, scodes);
 }
 
 void
 ipfw_destroy_sopt_handler()
 {
 
 	IPFW_DEL_SOPT_HANDLER(1, scodes);
 	CTL3_LOCK_DESTROY();
 }
 
 /*
  * Adds one or more sockopt handlers to the global array.
  * Function may sleep.
  */
 void
 ipfw_add_sopt_handler(struct ipfw_sopt_handler *sh, size_t count)
 {
 	size_t sz;
 	struct ipfw_sopt_handler *tmp;
 
 	CTL3_LOCK();
 
 	for (;;) {
 		sz = ctl3_hsize + count;
 		CTL3_UNLOCK();
 		tmp = malloc(sizeof(*sh) * sz, M_IPFW, M_WAITOK | M_ZERO);
 		CTL3_LOCK();
 		if (ctl3_hsize + count <= sz)
 			break;
 
 		/* Retry */
 		free(tmp, M_IPFW);
 	}
 
 	/* Merge old & new arrays */
 	sz = ctl3_hsize + count;
 	memcpy(tmp, ctl3_handlers, ctl3_hsize * sizeof(*sh));
 	memcpy(&tmp[ctl3_hsize], sh, count * sizeof(*sh));
 	qsort(tmp, sz, sizeof(*sh), compare_sh);
 	/* Switch new and free old */
 	if (ctl3_handlers != NULL)
 		free(ctl3_handlers, M_IPFW);
 	ctl3_handlers = tmp;
 	ctl3_hsize = sz;
 	ctl3_gencnt++;
 
 	CTL3_UNLOCK();
 }
 
 /*
  * Removes one or more sockopt handlers from the global array.
  */
 int
 ipfw_del_sopt_handler(struct ipfw_sopt_handler *sh, size_t count)
 {
 	size_t sz;
 	struct ipfw_sopt_handler *tmp, *h;
 	int i;
 
 	CTL3_LOCK();
 
 	for (i = 0; i < count; i++) {
 		tmp = &sh[i];
 		h = find_sh(tmp->opcode, tmp->version, tmp->handler);
 		if (h == NULL)
 			continue;
 
 		sz = (ctl3_handlers + ctl3_hsize - (h + 1)) * sizeof(*h);
 		memmove(h, h + 1, sz);
 		ctl3_hsize--;
 	}
 
 	if (ctl3_hsize == 0) {
 		if (ctl3_handlers != NULL)
 			free(ctl3_handlers, M_IPFW);
 		ctl3_handlers = NULL;
 	}
 
 	ctl3_gencnt++;
 
 	CTL3_UNLOCK();
 
 	return (0);
 }
 
 /*
  * Writes data accumulated in @sd to sockopt buffer.
  * Zeroes internal @sd buffer.
  */
 static int
 ipfw_flush_sopt_data(struct sockopt_data *sd)
 {
 	struct sockopt *sopt;
 	int error;
 	size_t sz;
 
 	sz = sd->koff;
 	if (sz == 0)
 		return (0);
 
 	sopt = sd->sopt;
 
 	if (sopt->sopt_dir == SOPT_GET) {
 		error = copyout(sd->kbuf, sopt->sopt_val, sz);
 		if (error != 0)
 			return (error);
 	}
 
 	memset(sd->kbuf, 0, sd->ksize);
 	sd->ktotal += sz;
 	sd->koff = 0;
 	if (sd->ktotal + sd->ksize < sd->valsize)
 		sd->kavail = sd->ksize;
 	else
 		sd->kavail = sd->valsize - sd->ktotal;
 
 	/* Update sopt buffer data */
 	sopt->sopt_valsize = sd->ktotal;
 	sopt->sopt_val = sd->sopt_val + sd->ktotal;
 
 	return (0);
 }
 
 /*
  * Ensures that @sd buffer has contiguous @neeeded number of
  * bytes.
  *
  * Returns pointer to requested space or NULL.
  */
 caddr_t
 ipfw_get_sopt_space(struct sockopt_data *sd, size_t needed)
 {
 	int error;
 	caddr_t addr;
 
 	if (sd->kavail < needed) {
 		/*
 		 * Flush data and try another time.
 		 */
 		error = ipfw_flush_sopt_data(sd);
 
 		if (sd->kavail < needed || error != 0)
 			return (NULL);
 	}
 
 	addr = sd->kbuf + sd->koff;
 	sd->koff += needed;
 	sd->kavail -= needed;
 	return (addr);
 }
 
 /*
  * Requests @needed contiguous bytes from @sd buffer.
  * Function is used to notify subsystem that we are
  * interesed in first @needed bytes (request header)
  * and the rest buffer can be safely zeroed.
  *
  * Returns pointer to requested space or NULL.
  */
 caddr_t
 ipfw_get_sopt_header(struct sockopt_data *sd, size_t needed)
 {
 	caddr_t addr;
 
 	if ((addr = ipfw_get_sopt_space(sd, needed)) == NULL)
 		return (NULL);
 
 	if (sd->kavail > 0)
 		memset(sd->kbuf + sd->koff, 0, sd->kavail);
 	
 	return (addr);
 }
 
 /*
  * New sockopt handler.
  */
 int
 ipfw_ctl3(struct sockopt *sopt)
 {
 	int error, locked;
 	size_t size, valsize;
 	struct ip_fw_chain *chain;
 	char xbuf[256];
 	struct sockopt_data sdata;
 	struct ipfw_sopt_handler h;
 	ip_fw3_opheader *op3 = NULL;
 
 	error = priv_check(sopt->sopt_td, PRIV_NETINET_IPFW);
 	if (error != 0)
 		return (error);
 
 	if (sopt->sopt_name != IP_FW3)
 		return (ipfw_ctl(sopt));
 
 	chain = &V_layer3_chain;
 	error = 0;
 
 	/* Save original valsize before it is altered via sooptcopyin() */
 	valsize = sopt->sopt_valsize;
 	memset(&sdata, 0, sizeof(sdata));
 	/* Read op3 header first to determine actual operation */
 	op3 = (ip_fw3_opheader *)xbuf;
 	error = sooptcopyin(sopt, op3, sizeof(*op3), sizeof(*op3));
 	if (error != 0)
 		return (error);
 	sopt->sopt_valsize = valsize;
 
 	/*
 	 * Find and reference command.
 	 */
 	error = find_ref_sh(op3->opcode, op3->version, &h);
 	if (error != 0)
 		return (error);
 
 	/*
 	 * Disallow modifications in really-really secure mode, but still allow
 	 * the logging counters to be reset.
 	 */
 	if ((h.dir & HDIR_SET) != 0 && h.opcode != IP_FW_XRESETLOG) {
 		error = securelevel_ge(sopt->sopt_td->td_ucred, 3);
 		if (error != 0) {
 			find_unref_sh(&h);
 			return (error);
 		}
 	}
 
 	/*
 	 * Fill in sockopt_data structure that may be useful for
 	 * IP_FW3 get requests.
 	 */
 	locked = 0;
 	if (valsize <= sizeof(xbuf)) {
 		/* use on-stack buffer */
 		sdata.kbuf = xbuf;
 		sdata.ksize = sizeof(xbuf);
 		sdata.kavail = valsize;
 	} else {
 
 		/*
 		 * Determine opcode type/buffer size:
 		 * allocate sliding-window buf for data export or
 		 * contiguous buffer for special ops.
 		 */
 		if ((h.dir & HDIR_SET) != 0) {
 			/* Set request. Allocate contigous buffer. */
 			if (valsize > CTL3_LARGEBUF) {
 				find_unref_sh(&h);
 				return (EFBIG);
 			}
 
 			size = valsize;
 		} else {
 			/* Get request. Allocate sliding window buffer */
 			size = (valsize<CTL3_SMALLBUF) ? valsize:CTL3_SMALLBUF;
 
 			if (size < valsize) {
 				/* We have to wire user buffer */
 				error = vslock(sopt->sopt_val, valsize);
 				if (error != 0)
 					return (error);
 				locked = 1;
 			}
 		}
 
 		sdata.kbuf = malloc(size, M_TEMP, M_WAITOK | M_ZERO);
 		sdata.ksize = size;
 		sdata.kavail = size;
 	}
 
 	sdata.sopt = sopt;
 	sdata.sopt_val = sopt->sopt_val;
 	sdata.valsize = valsize;
 
 	/*
 	 * Copy either all request (if valsize < bsize_max)
 	 * or first bsize_max bytes to guarantee most consumers
 	 * that all necessary data has been copied).
 	 * Anyway, copy not less than sizeof(ip_fw3_opheader).
 	 */
 	if ((error = sooptcopyin(sopt, sdata.kbuf, sdata.ksize,
 	    sizeof(ip_fw3_opheader))) != 0)
 		return (error);
 	op3 = (ip_fw3_opheader *)sdata.kbuf;
 
 	/* Finally, run handler */
 	error = h.handler(chain, op3, &sdata);
 	find_unref_sh(&h);
 
 	/* Flush state and free buffers */
 	if (error == 0)
 		error = ipfw_flush_sopt_data(&sdata);
 	else
 		ipfw_flush_sopt_data(&sdata);
 
 	if (locked != 0)
 		vsunlock(sdata.sopt_val, valsize);
 
 	/* Restore original pointer and set number of bytes written */
 	sopt->sopt_val = sdata.sopt_val;
 	sopt->sopt_valsize = sdata.ktotal;
 	if (sdata.kbuf != xbuf)
 		free(sdata.kbuf, M_TEMP);
 
 	return (error);
 }
 
 /**
  * {set|get}sockopt parser.
  */
 int
 ipfw_ctl(struct sockopt *sopt)
 {
 #define	RULE_MAXSIZE	(512*sizeof(u_int32_t))
 	int error;
 	size_t size, valsize;
 	struct ip_fw *buf;
 	struct ip_fw_rule0 *rule;
 	struct ip_fw_chain *chain;
 	u_int32_t rulenum[2];
 	uint32_t opt;
 	struct rule_check_info ci;
 	IPFW_RLOCK_TRACKER;
 
 	chain = &V_layer3_chain;
 	error = 0;
 
 	/* Save original valsize before it is altered via sooptcopyin() */
 	valsize = sopt->sopt_valsize;
 	opt = sopt->sopt_name;
 
 	/*
 	 * Disallow modifications in really-really secure mode, but still allow
 	 * the logging counters to be reset.
 	 */
 	if (opt == IP_FW_ADD ||
 	    (sopt->sopt_dir == SOPT_SET && opt != IP_FW_RESETLOG)) {
 		error = securelevel_ge(sopt->sopt_td->td_ucred, 3);
 		if (error != 0)
 			return (error);
 	}
 
 	switch (opt) {
 	case IP_FW_GET:
 		/*
 		 * pass up a copy of the current rules. Static rules
 		 * come first (the last of which has number IPFW_DEFAULT_RULE),
 		 * followed by a possibly empty list of dynamic rule.
 		 * The last dynamic rule has NULL in the "next" field.
 		 *
 		 * Note that the calculated size is used to bound the
 		 * amount of data returned to the user.  The rule set may
 		 * change between calculating the size and returning the
 		 * data in which case we'll just return what fits.
 		 */
 		for (;;) {
 			int len = 0, want;
 
 			size = chain->static_len;
 			size += ipfw_dyn_len();
 			if (size >= sopt->sopt_valsize)
 				break;
 			buf = malloc(size, M_TEMP, M_WAITOK | M_ZERO);
 			IPFW_UH_RLOCK(chain);
 			/* check again how much space we need */
 			want = chain->static_len + ipfw_dyn_len();
 			if (size >= want)
 				len = ipfw_getrules(chain, buf, size);
 			IPFW_UH_RUNLOCK(chain);
 			if (size >= want)
 				error = sooptcopyout(sopt, buf, len);
 			free(buf, M_TEMP);
 			if (size >= want)
 				break;
 		}
 		break;
 
 	case IP_FW_FLUSH:
 		/* locking is done within del_entry() */
 		error = del_entry(chain, 0); /* special case, rule=0, cmd=0 means all */
 		break;
 
 	case IP_FW_ADD:
 		rule = malloc(RULE_MAXSIZE, M_TEMP, M_WAITOK);
 		error = sooptcopyin(sopt, rule, RULE_MAXSIZE,
 			sizeof(struct ip_fw7) );
 
 		memset(&ci, 0, sizeof(struct rule_check_info));
 
 		/*
 		 * If the size of commands equals RULESIZE7 then we assume
 		 * a FreeBSD7.2 binary is talking to us (set is7=1).
 		 * is7 is persistent so the next 'ipfw list' command
 		 * will use this format.
 		 * NOTE: If wrong version is guessed (this can happen if
 		 *       the first ipfw command is 'ipfw [pipe] list')
 		 *       the ipfw binary may crash or loop infinitly...
 		 */
 		size = sopt->sopt_valsize;
 		if (size == RULESIZE7(rule)) {
 		    is7 = 1;
 		    error = convert_rule_to_8(rule);
 		    if (error) {
 			free(rule, M_TEMP);
 			return error;
 		    }
 		    size = RULESIZE(rule);
 		} else
 		    is7 = 0;
 		if (error == 0)
 			error = check_ipfw_rule0(rule, size, &ci);
 		if (error == 0) {
 			/* locking is done within add_rule() */
 			struct ip_fw *krule;
 			krule = ipfw_alloc_rule(chain, RULEKSIZE0(rule));
 			ci.urule = (caddr_t)rule;
 			ci.krule = krule;
 			import_rule0(&ci);
 			error = commit_rules(chain, &ci, 1);
 			if (error != 0)
 				ipfw_free_rule(ci.krule);
 			else if (sopt->sopt_dir == SOPT_GET) {
 				if (is7) {
 					error = convert_rule_to_7(rule);
 					size = RULESIZE7(rule);
 					if (error) {
 						free(rule, M_TEMP);
 						return error;
 					}
 				}
 				error = sooptcopyout(sopt, rule, size);
 			}
 		}
 		free(rule, M_TEMP);
 		break;
 
 	case IP_FW_DEL:
 		/*
 		 * IP_FW_DEL is used for deleting single rules or sets,
 		 * and (ab)used to atomically manipulate sets. Argument size
 		 * is used to distinguish between the two:
 		 *    sizeof(u_int32_t)
 		 *	delete single rule or set of rules,
 		 *	or reassign rules (or sets) to a different set.
 		 *    2*sizeof(u_int32_t)
 		 *	atomic disable/enable sets.
 		 *	first u_int32_t contains sets to be disabled,
 		 *	second u_int32_t contains sets to be enabled.
 		 */
 		error = sooptcopyin(sopt, rulenum,
 			2*sizeof(u_int32_t), sizeof(u_int32_t));
 		if (error)
 			break;
 		size = sopt->sopt_valsize;
 		if (size == sizeof(u_int32_t) && rulenum[0] != 0) {
 			/* delete or reassign, locking done in del_entry() */
 			error = del_entry(chain, rulenum[0]);
 		} else if (size == 2*sizeof(u_int32_t)) { /* set enable/disable */
 			IPFW_UH_WLOCK(chain);
 			V_set_disable =
 			    (V_set_disable | rulenum[0]) & ~rulenum[1] &
 			    ~(1<<RESVD_SET); /* set RESVD_SET always enabled */
 			IPFW_UH_WUNLOCK(chain);
 		} else
 			error = EINVAL;
 		break;
 
 	case IP_FW_ZERO:
 	case IP_FW_RESETLOG: /* argument is an u_int_32, the rule number */
 		rulenum[0] = 0;
 		if (sopt->sopt_val != 0) {
 		    error = sooptcopyin(sopt, rulenum,
 			    sizeof(u_int32_t), sizeof(u_int32_t));
 		    if (error)
 			break;
 		}
 		error = zero_entry(chain, rulenum[0],
 			sopt->sopt_name == IP_FW_RESETLOG);
 		break;
 
 	/*--- TABLE opcodes ---*/
 	case IP_FW_TABLE_ADD:
 	case IP_FW_TABLE_DEL:
 		{
 			ipfw_table_entry ent;
 			struct tentry_info tei;
 			struct tid_info ti;
 			struct table_value v;
 
 			error = sooptcopyin(sopt, &ent,
 			    sizeof(ent), sizeof(ent));
 			if (error)
 				break;
 
 			memset(&tei, 0, sizeof(tei));
 			tei.paddr = &ent.addr;
 			tei.subtype = AF_INET;
 			tei.masklen = ent.masklen;
 			ipfw_import_table_value_legacy(ent.value, &v);
 			tei.pvalue = &v;
 			memset(&ti, 0, sizeof(ti));
 			ti.uidx = ent.tbl;
 			ti.type = IPFW_TABLE_CIDR;
 
 			error = (opt == IP_FW_TABLE_ADD) ?
 			    add_table_entry(chain, &ti, &tei, 0, 1) :
 			    del_table_entry(chain, &ti, &tei, 0, 1);
 		}
 		break;
 
 
 	case IP_FW_TABLE_FLUSH:
 		{
 			u_int16_t tbl;
 			struct tid_info ti;
 
 			error = sooptcopyin(sopt, &tbl,
 			    sizeof(tbl), sizeof(tbl));
 			if (error)
 				break;
 			memset(&ti, 0, sizeof(ti));
 			ti.uidx = tbl;
 			error = flush_table(chain, &ti);
 		}
 		break;
 
 	case IP_FW_TABLE_GETSIZE:
 		{
 			u_int32_t tbl, cnt;
 			struct tid_info ti;
 
 			if ((error = sooptcopyin(sopt, &tbl, sizeof(tbl),
 			    sizeof(tbl))))
 				break;
 			memset(&ti, 0, sizeof(ti));
 			ti.uidx = tbl;
 			IPFW_RLOCK(chain);
 			error = ipfw_count_table(chain, &ti, &cnt);
 			IPFW_RUNLOCK(chain);
 			if (error)
 				break;
 			error = sooptcopyout(sopt, &cnt, sizeof(cnt));
 		}
 		break;
 
 	case IP_FW_TABLE_LIST:
 		{
 			ipfw_table *tbl;
 			struct tid_info ti;
 
 			if (sopt->sopt_valsize < sizeof(*tbl)) {
 				error = EINVAL;
 				break;
 			}
 			size = sopt->sopt_valsize;
 			tbl = malloc(size, M_TEMP, M_WAITOK);
 			error = sooptcopyin(sopt, tbl, size, sizeof(*tbl));
 			if (error) {
 				free(tbl, M_TEMP);
 				break;
 			}
 			tbl->size = (size - sizeof(*tbl)) /
 			    sizeof(ipfw_table_entry);
 			memset(&ti, 0, sizeof(ti));
 			ti.uidx = tbl->tbl;
 			IPFW_RLOCK(chain);
 			error = ipfw_dump_table_legacy(chain, &ti, tbl);
 			IPFW_RUNLOCK(chain);
 			if (error) {
 				free(tbl, M_TEMP);
 				break;
 			}
 			error = sooptcopyout(sopt, tbl, size);
 			free(tbl, M_TEMP);
 		}
 		break;
 
 	/*--- NAT operations are protected by the IPFW_LOCK ---*/
 	case IP_FW_NAT_CFG:
 		if (IPFW_NAT_LOADED)
 			error = ipfw_nat_cfg_ptr(sopt);
 		else {
 			printf("IP_FW_NAT_CFG: %s\n",
 			    "ipfw_nat not present, please load it");
 			error = EINVAL;
 		}
 		break;
 
 	case IP_FW_NAT_DEL:
 		if (IPFW_NAT_LOADED)
 			error = ipfw_nat_del_ptr(sopt);
 		else {
 			printf("IP_FW_NAT_DEL: %s\n",
 			    "ipfw_nat not present, please load it");
 			error = EINVAL;
 		}
 		break;
 
 	case IP_FW_NAT_GET_CONFIG:
 		if (IPFW_NAT_LOADED)
 			error = ipfw_nat_get_cfg_ptr(sopt);
 		else {
 			printf("IP_FW_NAT_GET_CFG: %s\n",
 			    "ipfw_nat not present, please load it");
 			error = EINVAL;
 		}
 		break;
 
 	case IP_FW_NAT_GET_LOG:
 		if (IPFW_NAT_LOADED)
 			error = ipfw_nat_get_log_ptr(sopt);
 		else {
 			printf("IP_FW_NAT_GET_LOG: %s\n",
 			    "ipfw_nat not present, please load it");
 			error = EINVAL;
 		}
 		break;
 
 	default:
 		printf("ipfw: ipfw_ctl invalid option %d\n", sopt->sopt_name);
 		error = EINVAL;
 	}
 
 	return (error);
 #undef RULE_MAXSIZE
 }
 #define	RULE_MAXSIZE	(256*sizeof(u_int32_t))
 
 /* Functions to convert rules 7.2 <==> 8.0 */
 static int
 convert_rule_to_7(struct ip_fw_rule0 *rule)
 {
 	/* Used to modify original rule */
 	struct ip_fw7 *rule7 = (struct ip_fw7 *)rule;
 	/* copy of original rule, version 8 */
 	struct ip_fw_rule0 *tmp;
 
 	/* Used to copy commands */
 	ipfw_insn *ccmd, *dst;
 	int ll = 0, ccmdlen = 0;
 
 	tmp = malloc(RULE_MAXSIZE, M_TEMP, M_NOWAIT | M_ZERO);
 	if (tmp == NULL) {
 		return 1; //XXX error
 	}
 	bcopy(rule, tmp, RULE_MAXSIZE);
 
 	/* Copy fields */
 	//rule7->_pad = tmp->_pad;
 	rule7->set = tmp->set;
 	rule7->rulenum = tmp->rulenum;
 	rule7->cmd_len = tmp->cmd_len;
 	rule7->act_ofs = tmp->act_ofs;
 	rule7->next_rule = (struct ip_fw7 *)tmp->next_rule;
 	rule7->cmd_len = tmp->cmd_len;
 	rule7->pcnt = tmp->pcnt;
 	rule7->bcnt = tmp->bcnt;
 	rule7->timestamp = tmp->timestamp;
 
 	/* Copy commands */
 	for (ll = tmp->cmd_len, ccmd = tmp->cmd, dst = rule7->cmd ;
 			ll > 0 ; ll -= ccmdlen, ccmd += ccmdlen, dst += ccmdlen) {
 		ccmdlen = F_LEN(ccmd);
 
 		bcopy(ccmd, dst, F_LEN(ccmd)*sizeof(uint32_t));
 
 		if (dst->opcode > O_NAT)
 			/* O_REASS doesn't exists in 7.2 version, so
 			 * decrement opcode if it is after O_REASS
 			 */
 			dst->opcode--;
 
 		if (ccmdlen > ll) {
 			printf("ipfw: opcode %d size truncated\n",
 				ccmd->opcode);
 			return EINVAL;
 		}
 	}
 	free(tmp, M_TEMP);
 
 	return 0;
 }
 
 static int
 convert_rule_to_8(struct ip_fw_rule0 *rule)
 {
 	/* Used to modify original rule */
 	struct ip_fw7 *rule7 = (struct ip_fw7 *) rule;
 
 	/* Used to copy commands */
 	ipfw_insn *ccmd, *dst;
 	int ll = 0, ccmdlen = 0;
 
 	/* Copy of original rule */
 	struct ip_fw7 *tmp = malloc(RULE_MAXSIZE, M_TEMP, M_NOWAIT | M_ZERO);
 	if (tmp == NULL) {
 		return 1; //XXX error
 	}
 
 	bcopy(rule7, tmp, RULE_MAXSIZE);
 
 	for (ll = tmp->cmd_len, ccmd = tmp->cmd, dst = rule->cmd ;
 			ll > 0 ; ll -= ccmdlen, ccmd += ccmdlen, dst += ccmdlen) {
 		ccmdlen = F_LEN(ccmd);
 		
 		bcopy(ccmd, dst, F_LEN(ccmd)*sizeof(uint32_t));
 
 		if (dst->opcode > O_NAT)
 			/* O_REASS doesn't exists in 7.2 version, so
 			 * increment opcode if it is after O_REASS
 			 */
 			dst->opcode++;
 
 		if (ccmdlen > ll) {
 			printf("ipfw: opcode %d size truncated\n",
 			    ccmd->opcode);
 			return EINVAL;
 		}
 	}
 
 	rule->_pad = tmp->_pad;
 	rule->set = tmp->set;
 	rule->rulenum = tmp->rulenum;
 	rule->cmd_len = tmp->cmd_len;
 	rule->act_ofs = tmp->act_ofs;
 	rule->next_rule = (struct ip_fw *)tmp->next_rule;
 	rule->cmd_len = tmp->cmd_len;
 	rule->id = 0; /* XXX see if is ok = 0 */
 	rule->pcnt = tmp->pcnt;
 	rule->bcnt = tmp->bcnt;
 	rule->timestamp = tmp->timestamp;
 
 	free (tmp, M_TEMP);
 	return 0;
 }
 
 /*
  * Named object api
  *
  */
 
 void
 ipfw_init_srv(struct ip_fw_chain *ch)
 {
 
 	ch->srvmap = ipfw_objhash_create(IPFW_OBJECTS_DEFAULT);
 	ch->srvstate = malloc(sizeof(void *) * IPFW_OBJECTS_DEFAULT,
 	    M_IPFW, M_WAITOK | M_ZERO);
 }
 
 void
 ipfw_destroy_srv(struct ip_fw_chain *ch)
 {
 
 	free(ch->srvstate, M_IPFW);
 	ipfw_objhash_destroy(ch->srvmap);
 }
 
 /*
  * Allocate new bitmask which can be used to enlarge/shrink
  * named instance index.
  */
 void
 ipfw_objhash_bitmap_alloc(uint32_t items, void **idx, int *pblocks)
 {
 	size_t size;
 	int max_blocks;
 	u_long *idx_mask;
 
 	KASSERT((items % BLOCK_ITEMS) == 0,
 	   ("bitmask size needs to power of 2 and greater or equal to %zu",
 	    BLOCK_ITEMS));
 
 	max_blocks = items / BLOCK_ITEMS;
 	size = items / 8;
 	idx_mask = malloc(size * IPFW_MAX_SETS, M_IPFW, M_WAITOK);
 	/* Mark all as free */
 	memset(idx_mask, 0xFF, size * IPFW_MAX_SETS);
 	*idx_mask &= ~(u_long)1; /* Skip index 0 */
 
 	*idx = idx_mask;
 	*pblocks = max_blocks;
 }
 
 /*
  * Copy current bitmask index to new one.
  */
 void
 ipfw_objhash_bitmap_merge(struct namedobj_instance *ni, void **idx, int *blocks)
 {
 	int old_blocks, new_blocks;
 	u_long *old_idx, *new_idx;
 	int i;
 
 	old_idx = ni->idx_mask;
 	old_blocks = ni->max_blocks;
 	new_idx = *idx;
 	new_blocks = *blocks;
 
 	for (i = 0; i < IPFW_MAX_SETS; i++) {
 		memcpy(&new_idx[new_blocks * i], &old_idx[old_blocks * i],
 		    old_blocks * sizeof(u_long));
 	}
 }
 
 /*
  * Swaps current @ni index with new one.
  */
 void
 ipfw_objhash_bitmap_swap(struct namedobj_instance *ni, void **idx, int *blocks)
 {
 	int old_blocks;
 	u_long *old_idx;
 
 	old_idx = ni->idx_mask;
 	old_blocks = ni->max_blocks;
 
 	ni->idx_mask = *idx;
 	ni->max_blocks = *blocks;
 
 	/* Save old values */
 	*idx = old_idx;
 	*blocks = old_blocks;
 }
 
 void
 ipfw_objhash_bitmap_free(void *idx, int blocks)
 {
 
 	free(idx, M_IPFW);
 }
 
 /*
  * Creates named hash instance.
  * Must be called without holding any locks.
  * Return pointer to new instance.
  */
 struct namedobj_instance *
 ipfw_objhash_create(uint32_t items)
 {
 	struct namedobj_instance *ni;
 	int i;
 	size_t size;
 
 	size = sizeof(struct namedobj_instance) +
 	    sizeof(struct namedobjects_head) * NAMEDOBJ_HASH_SIZE +
 	    sizeof(struct namedobjects_head) * NAMEDOBJ_HASH_SIZE;
 
 	ni = malloc(size, M_IPFW, M_WAITOK | M_ZERO);
 	ni->nn_size = NAMEDOBJ_HASH_SIZE;
 	ni->nv_size = NAMEDOBJ_HASH_SIZE;
 
 	ni->names = (struct namedobjects_head *)(ni +1);
 	ni->values = &ni->names[ni->nn_size];
 
 	for (i = 0; i < ni->nn_size; i++)
 		TAILQ_INIT(&ni->names[i]);
 
 	for (i = 0; i < ni->nv_size; i++)
 		TAILQ_INIT(&ni->values[i]);
 
 	/* Set default hashing/comparison functions */
 	ni->hash_f = objhash_hash_name;
 	ni->cmp_f = objhash_cmp_name;
 
 	/* Allocate bitmask separately due to possible resize */
 	ipfw_objhash_bitmap_alloc(items, (void*)&ni->idx_mask, &ni->max_blocks);
 
 	return (ni);
 }
 
 void
 ipfw_objhash_destroy(struct namedobj_instance *ni)
 {
 
 	free(ni->idx_mask, M_IPFW);
 	free(ni, M_IPFW);
 }
 
 void
 ipfw_objhash_set_funcs(struct namedobj_instance *ni, objhash_hash_f *hash_f,
     objhash_cmp_f *cmp_f)
 {
 
 	ni->hash_f = hash_f;
 	ni->cmp_f = cmp_f;
 }
 
 static uint32_t
 objhash_hash_name(struct namedobj_instance *ni, const void *name, uint32_t set)
 {
 
 	return (fnv_32_str((const char *)name, FNV1_32_INIT));
 }
 
 static int
 objhash_cmp_name(struct named_object *no, const void *name, uint32_t set)
 {
 
 	if ((strcmp(no->name, (const char *)name) == 0) && (no->set == set))
 		return (0);
 
 	return (1);
 }
 
 static uint32_t
 objhash_hash_idx(struct namedobj_instance *ni, uint32_t val)
 {
 	uint32_t v;
 
 	v = val % (ni->nv_size - 1);
 
 	return (v);
 }
 
 struct named_object *
 ipfw_objhash_lookup_name(struct namedobj_instance *ni, uint32_t set, char *name)
 {
 	struct named_object *no;
 	uint32_t hash;
 
 	hash = ni->hash_f(ni, name, set) % ni->nn_size;
 	
 	TAILQ_FOREACH(no, &ni->names[hash], nn_next) {
 		if (ni->cmp_f(no, name, set) == 0)
 			return (no);
 	}
 
 	return (NULL);
 }
 
 /*
  * Find named object by @uid.
  * Check @tlvs for valid data inside.
  *
  * Returns pointer to found TLV or NULL.
  */
 ipfw_obj_ntlv *
 ipfw_find_name_tlv_type(void *tlvs, int len, uint16_t uidx, uint32_t etlv)
 {
 	ipfw_obj_ntlv *ntlv;
 	uintptr_t pa, pe;
 	int l;
 
 	pa = (uintptr_t)tlvs;
 	pe = pa + len;
 	l = 0;
 	for (; pa < pe; pa += l) {
 		ntlv = (ipfw_obj_ntlv *)pa;
 		l = ntlv->head.length;
 
 		if (l != sizeof(*ntlv))
 			return (NULL);
 
 		if (ntlv->idx != uidx)
 			continue;
 		/*
 		 * When userland has specified zero TLV type, do
 		 * not compare it with eltv. In some cases userland
 		 * doesn't know what type should it have. Use only
 		 * uidx and name for search named_object.
 		 */
 		if (ntlv->head.type != 0 &&
 		    ntlv->head.type != (uint16_t)etlv)
 			continue;
 
 		if (ipfw_check_object_name_generic(ntlv->name) != 0)
 			return (NULL);
 
 		return (ntlv);
 	}
 
 	return (NULL);
 }
 
 /*
  * Finds object config based on either legacy index
  * or name in ntlv.
  * Note @ti structure contains unchecked data from userland.
  *
  * Returns 0 in success and fills in @pno with found config
  */
 int
 ipfw_objhash_find_type(struct namedobj_instance *ni, struct tid_info *ti,
     uint32_t etlv, struct named_object **pno)
 {
 	char *name;
 	ipfw_obj_ntlv *ntlv;
 	uint32_t set;
 
 	if (ti->tlvs == NULL)
 		return (EINVAL);
 
 	ntlv = ipfw_find_name_tlv_type(ti->tlvs, ti->tlen, ti->uidx, etlv);
 	if (ntlv == NULL)
 		return (EINVAL);
 	name = ntlv->name;
 
 	/*
 	 * Use set provided by @ti instead of @ntlv one.
 	 * This is needed due to different sets behavior
 	 * controlled by V_fw_tables_sets.
 	 */
 	set = ti->set;
 	*pno = ipfw_objhash_lookup_name(ni, set, name);
 	if (*pno == NULL)
 		return (ESRCH);
 	return (0);
 }
 
 /*
  * Find named object by name, considering also its TLV type.
  */
 struct named_object *
 ipfw_objhash_lookup_name_type(struct namedobj_instance *ni, uint32_t set,
     uint32_t type, const char *name)
 {
 	struct named_object *no;
 	uint32_t hash;
 
 	hash = ni->hash_f(ni, name, set) % ni->nn_size;
 
 	TAILQ_FOREACH(no, &ni->names[hash], nn_next) {
 		if (ni->cmp_f(no, name, set) == 0 &&
 		    no->etlv == (uint16_t)type)
 			return (no);
 	}
 
 	return (NULL);
 }
 
 struct named_object *
 ipfw_objhash_lookup_kidx(struct namedobj_instance *ni, uint16_t kidx)
 {
 	struct named_object *no;
 	uint32_t hash;
 
 	hash = objhash_hash_idx(ni, kidx);
 	
 	TAILQ_FOREACH(no, &ni->values[hash], nv_next) {
 		if (no->kidx == kidx)
 			return (no);
 	}
 
 	return (NULL);
 }
 
 int
 ipfw_objhash_same_name(struct namedobj_instance *ni, struct named_object *a,
     struct named_object *b)
 {
 
 	if ((strcmp(a->name, b->name) == 0) && a->set == b->set)
 		return (1);
 
 	return (0);
 }
 
 void
 ipfw_objhash_add(struct namedobj_instance *ni, struct named_object *no)
 {
 	uint32_t hash;
 
 	hash = ni->hash_f(ni, no->name, no->set) % ni->nn_size;
 	TAILQ_INSERT_HEAD(&ni->names[hash], no, nn_next);
 
 	hash = objhash_hash_idx(ni, no->kidx);
 	TAILQ_INSERT_HEAD(&ni->values[hash], no, nv_next);
 
 	ni->count++;
 }
 
 void
 ipfw_objhash_del(struct namedobj_instance *ni, struct named_object *no)
 {
 	uint32_t hash;
 
 	hash = ni->hash_f(ni, no->name, no->set) % ni->nn_size;
 	TAILQ_REMOVE(&ni->names[hash], no, nn_next);
 
 	hash = objhash_hash_idx(ni, no->kidx);
 	TAILQ_REMOVE(&ni->values[hash], no, nv_next);
 
 	ni->count--;
 }
 
 uint32_t
 ipfw_objhash_count(struct namedobj_instance *ni)
 {
 
 	return (ni->count);
 }
 
 uint32_t
 ipfw_objhash_count_type(struct namedobj_instance *ni, uint16_t type)
 {
 	struct named_object *no;
 	uint32_t count;
 	int i;
 
 	count = 0;
 	for (i = 0; i < ni->nn_size; i++) {
 		TAILQ_FOREACH(no, &ni->names[i], nn_next) {
 			if (no->etlv == type)
 				count++;
 		}
 	}
 	return (count);
 }
 
 /*
  * Runs @func for each found named object.
  * It is safe to delete objects from callback
  */
 int
 ipfw_objhash_foreach(struct namedobj_instance *ni, objhash_cb_t *f, void *arg)
 {
 	struct named_object *no, *no_tmp;
 	int i, ret;
 
 	for (i = 0; i < ni->nn_size; i++) {
 		TAILQ_FOREACH_SAFE(no, &ni->names[i], nn_next, no_tmp) {
 			ret = f(ni, no, arg);
 			if (ret != 0)
 				return (ret);
 		}
 	}
 	return (0);
 }
 
 /*
  * Runs @f for each found named object with type @type.
  * It is safe to delete objects from callback
  */
 int
 ipfw_objhash_foreach_type(struct namedobj_instance *ni, objhash_cb_t *f,
     void *arg, uint16_t type)
 {
 	struct named_object *no, *no_tmp;
 	int i, ret;
 
 	for (i = 0; i < ni->nn_size; i++) {
 		TAILQ_FOREACH_SAFE(no, &ni->names[i], nn_next, no_tmp) {
 			if (no->etlv != type)
 				continue;
 			ret = f(ni, no, arg);
 			if (ret != 0)
 				return (ret);
 		}
 	}
 	return (0);
 }
 
 /*
  * Removes index from given set.
  * Returns 0 on success.
  */
 int
 ipfw_objhash_free_idx(struct namedobj_instance *ni, uint16_t idx)
 {
 	u_long *mask;
 	int i, v;
 
 	i = idx / BLOCK_ITEMS;
 	v = idx % BLOCK_ITEMS;
 
 	if (i >= ni->max_blocks)
 		return (1);
 
 	mask = &ni->idx_mask[i];
 
 	if ((*mask & ((u_long)1 << v)) != 0)
 		return (1);
 
 	/* Mark as free */
 	*mask |= (u_long)1 << v;
 
 	/* Update free offset */
 	if (ni->free_off[0] > i)
 		ni->free_off[0] = i;
 	
 	return (0);
 }
 
 /*
  * Allocate new index in given instance and stores in in @pidx.
  * Returns 0 on success.
  */
 int
 ipfw_objhash_alloc_idx(void *n, uint16_t *pidx)
 {
 	struct namedobj_instance *ni;
 	u_long *mask;
 	int i, off, v;
 
 	ni = (struct namedobj_instance *)n;
 
 	off = ni->free_off[0];
 	mask = &ni->idx_mask[off];
 
 	for (i = off; i < ni->max_blocks; i++, mask++) {
 		if ((v = ffsl(*mask)) == 0)
 			continue;
 
 		/* Mark as busy */
 		*mask &= ~ ((u_long)1 << (v - 1));
 
 		ni->free_off[0] = i;
 		
 		v = BLOCK_ITEMS * i + v - 1;
 
 		*pidx = v;
 		return (0);
 	}
 
 	return (1);
 }
 
 /* end of file */
Index: head/sys/netpfil/ipfw/ip_fw_table.c
===================================================================
--- head/sys/netpfil/ipfw/ip_fw_table.c	(revision 343618)
+++ head/sys/netpfil/ipfw/ip_fw_table.c	(revision 343619)
@@ -1,3362 +1,3361 @@
 /*-
  * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
  *
  * Copyright (c) 2004 Ruslan Ermilov and Vsevolod Lobko.
  * Copyright (c) 2014 Yandex LLC
  * Copyright (c) 2014 Alexander V. Chernikov
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 /*
  * Lookup table support for ipfw.
  *
  * This file contains handlers for all generic tables' operations:
  * add/del/flush entries, list/dump tables etc..
  *
  * Table data modification is protected by both UH and runtime lock
  * while reading configuration/data is protected by UH lock.
  *
  * Lookup algorithms for all table types are located in ip_fw_table_algo.c
  */
 
 #include "opt_ipfw.h"
 
 #include <sys/param.h>
 #include <sys/systm.h>
 #include <sys/malloc.h>
 #include <sys/kernel.h>
 #include <sys/lock.h>
 #include <sys/rwlock.h>
 #include <sys/rmlock.h>
 #include <sys/socket.h>
 #include <sys/socketvar.h>
 #include <sys/queue.h>
 #include <net/if.h>	/* ip_fw.h requires IFNAMSIZ */
-#include <net/pfil.h>
 
 #include <netinet/in.h>
 #include <netinet/ip_var.h>	/* struct ipfw_rule_ref */
 #include <netinet/ip_fw.h>
 
 #include <netpfil/ipfw/ip_fw_private.h>
 #include <netpfil/ipfw/ip_fw_table.h>
 
  /*
  * Table has the following `type` concepts:
  *
  * `no.type` represents lookup key type (addr, ifp, uid, etc..)
  * vmask represents bitmask of table values which are present at the moment.
  * Special IPFW_VTYPE_LEGACY ( (uint32_t)-1 ) represents old
  * single-value-for-all approach.
  */
 struct table_config {
 	struct named_object	no;
 	uint8_t		tflags;		/* type flags */
 	uint8_t		locked;		/* 1 if locked from changes */
 	uint8_t		linked;		/* 1 if already linked */
 	uint8_t		ochanged;	/* used by set swapping */
 	uint8_t		vshared;	/* 1 if using shared value array */
 	uint8_t		spare[3];
 	uint32_t	count;		/* Number of records */
 	uint32_t	limit;		/* Max number of records */
 	uint32_t	vmask;		/* bitmask with supported values */
 	uint32_t	ocount;		/* used by set swapping */
 	uint64_t	gencnt;		/* generation count */
 	char		tablename[64];	/* table name */
 	struct table_algo	*ta;	/* Callbacks for given algo */
 	void		*astate;	/* algorithm state */
 	struct table_info	ti_copy;	/* data to put to table_info */
 	struct namedobj_instance	*vi;
 };
 
 static int find_table_err(struct namedobj_instance *ni, struct tid_info *ti,
     struct table_config **tc);
 static struct table_config *find_table(struct namedobj_instance *ni,
     struct tid_info *ti);
 static struct table_config *alloc_table_config(struct ip_fw_chain *ch,
     struct tid_info *ti, struct table_algo *ta, char *adata, uint8_t tflags);
 static void free_table_config(struct namedobj_instance *ni,
     struct table_config *tc);
 static int create_table_internal(struct ip_fw_chain *ch, struct tid_info *ti,
     char *aname, ipfw_xtable_info *i, uint16_t *pkidx, int ref);
 static void link_table(struct ip_fw_chain *ch, struct table_config *tc);
 static void unlink_table(struct ip_fw_chain *ch, struct table_config *tc);
 static int find_ref_table(struct ip_fw_chain *ch, struct tid_info *ti,
     struct tentry_info *tei, uint32_t count, int op, struct table_config **ptc);
 #define	OP_ADD	1
 #define	OP_DEL	0
 static int export_tables(struct ip_fw_chain *ch, ipfw_obj_lheader *olh,
     struct sockopt_data *sd);
 static void export_table_info(struct ip_fw_chain *ch, struct table_config *tc,
     ipfw_xtable_info *i);
 static int dump_table_tentry(void *e, void *arg);
 static int dump_table_xentry(void *e, void *arg);
 
 static int swap_tables(struct ip_fw_chain *ch, struct tid_info *a,
     struct tid_info *b);
 
 static int check_table_name(const char *name);
 static int check_table_space(struct ip_fw_chain *ch, struct tableop_state *ts,
     struct table_config *tc, struct table_info *ti, uint32_t count);
 static int destroy_table(struct ip_fw_chain *ch, struct tid_info *ti);
 
 static struct table_algo *find_table_algo(struct tables_config *tableconf,
     struct tid_info *ti, char *name);
 
 static void objheader_to_ti(struct _ipfw_obj_header *oh, struct tid_info *ti);
 static void ntlv_to_ti(struct _ipfw_obj_ntlv *ntlv, struct tid_info *ti);
 
 #define	CHAIN_TO_NI(chain)	(CHAIN_TO_TCFG(chain)->namehash)
 #define	KIDX_TO_TI(ch, k)	(&(((struct table_info *)(ch)->tablestate)[k]))
 
 #define	TA_BUF_SZ	128	/* On-stack buffer for add/delete state */
 
 void
 rollback_toperation_state(struct ip_fw_chain *ch, void *object)
 {
 	struct tables_config *tcfg;
 	struct op_state *os;
 
 	tcfg = CHAIN_TO_TCFG(ch);
 	TAILQ_FOREACH(os, &tcfg->state_list, next)
 		os->func(object, os);
 }
 
 void
 add_toperation_state(struct ip_fw_chain *ch, struct tableop_state *ts)
 {
 	struct tables_config *tcfg;
 
 	tcfg = CHAIN_TO_TCFG(ch);
 	TAILQ_INSERT_HEAD(&tcfg->state_list, &ts->opstate, next);
 }
 
 void
 del_toperation_state(struct ip_fw_chain *ch, struct tableop_state *ts)
 {
 	struct tables_config *tcfg;
 
 	tcfg = CHAIN_TO_TCFG(ch);
 	TAILQ_REMOVE(&tcfg->state_list, &ts->opstate, next);
 }
 
 void
 tc_ref(struct table_config *tc)
 {
 
 	tc->no.refcnt++;
 }
 
 void
 tc_unref(struct table_config *tc)
 {
 
 	tc->no.refcnt--;
 }
 
 static struct table_value *
 get_table_value(struct ip_fw_chain *ch, struct table_config *tc, uint32_t kidx)
 {
 	struct table_value *pval;
 
 	pval = (struct table_value *)ch->valuestate;
 
 	return (&pval[kidx]);
 }
 
 
 /*
  * Checks if we're able to insert/update entry @tei into table
  * w.r.t @tc limits.
  * May alter @tei to indicate insertion error / insert
  * options.
  *
  * Returns 0 if operation can be performed/
  */
 static int
 check_table_limit(struct table_config *tc, struct tentry_info *tei)
 {
 
 	if (tc->limit == 0 || tc->count < tc->limit)
 		return (0);
 
 	if ((tei->flags & TEI_FLAGS_UPDATE) == 0) {
 		/* Notify userland on error cause */
 		tei->flags |= TEI_FLAGS_LIMIT;
 		return (EFBIG);
 	}
 
 	/*
 	 * We have UPDATE flag set.
 	 * Permit updating record (if found),
 	 * but restrict adding new one since we've
 	 * already hit the limit.
 	 */
 	tei->flags |= TEI_FLAGS_DONTADD;
 
 	return (0);
 }
 
 /*
  * Convert algorithm callback return code into
  * one of pre-defined states known by userland.
  */
 static void
 store_tei_result(struct tentry_info *tei, int op, int error, uint32_t num)
 {
 	int flag;
 
 	flag = 0;
 
 	switch (error) {
 	case 0:
 		if (op == OP_ADD && num != 0)
 			flag = TEI_FLAGS_ADDED;
 		if (op == OP_DEL)
 			flag = TEI_FLAGS_DELETED;
 		break;
 	case ENOENT:
 		flag = TEI_FLAGS_NOTFOUND;
 		break;
 	case EEXIST:
 		flag = TEI_FLAGS_EXISTS;
 		break;
 	default:
 		flag = TEI_FLAGS_ERROR;
 	}
 
 	tei->flags |= flag;
 }
 
 /*
  * Creates and references table with default parameters.
  * Saves table config, algo and allocated kidx info @ptc, @pta and
  * @pkidx if non-zero.
  * Used for table auto-creation to support old binaries.
  *
  * Returns 0 on success.
  */
 static int
 create_table_compat(struct ip_fw_chain *ch, struct tid_info *ti,
     uint16_t *pkidx)
 {
 	ipfw_xtable_info xi;
 	int error;
 
 	memset(&xi, 0, sizeof(xi));
 	/* Set default value mask for legacy clients */
 	xi.vmask = IPFW_VTYPE_LEGACY;
 
 	error = create_table_internal(ch, ti, NULL, &xi, pkidx, 1);
 	if (error != 0)
 		return (error);
 
 	return (0);
 }
 
 /*
  * Find and reference existing table optionally
  * creating new one.
  *
  * Saves found table config into @ptc.
  * Note function may drop/acquire UH_WLOCK.
  * Returns 0 if table was found/created and referenced
  * or non-zero return code.
  */
 static int
 find_ref_table(struct ip_fw_chain *ch, struct tid_info *ti,
     struct tentry_info *tei, uint32_t count, int op,
     struct table_config **ptc)
 {
 	struct namedobj_instance *ni;
 	struct table_config *tc;
 	uint16_t kidx;
 	int error;
 
 	IPFW_UH_WLOCK_ASSERT(ch);
 
 	ni = CHAIN_TO_NI(ch);
 	tc = NULL;
 	if ((tc = find_table(ni, ti)) != NULL) {
 		/* check table type */
 		if (tc->no.subtype != ti->type)
 			return (EINVAL);
 
 		if (tc->locked != 0)
 			return (EACCES);
 
 		/* Try to exit early on limit hit */
 		if (op == OP_ADD && count == 1 &&
 		    check_table_limit(tc, tei) != 0)
 			return (EFBIG);
 
 		/* Reference and return */
 		tc->no.refcnt++;
 		*ptc = tc;
 		return (0);
 	}
 
 	if (op == OP_DEL)
 		return (ESRCH);
 
 	/* Compatibility mode: create new table for old clients */
 	if ((tei->flags & TEI_FLAGS_COMPAT) == 0)
 		return (ESRCH);
 
 	IPFW_UH_WUNLOCK(ch);
 	error = create_table_compat(ch, ti, &kidx);
 	IPFW_UH_WLOCK(ch);
 	
 	if (error != 0)
 		return (error);
 
 	tc = (struct table_config *)ipfw_objhash_lookup_kidx(ni, kidx);
 	KASSERT(tc != NULL, ("create_table_compat returned bad idx %d", kidx));
 
 	/* OK, now we've got referenced table. */
 	*ptc = tc;
 	return (0);
 }
 
 /*
  * Rolls back already @added to @tc entries using state array @ta_buf_m.
  * Assume the following layout:
  * 1) ADD state (ta_buf_m[0] ... t_buf_m[added - 1]) for handling update cases
  * 2) DEL state (ta_buf_m[count[ ... t_buf_m[count + added - 1])
  *   for storing deleted state
  */
 static void
 rollback_added_entries(struct ip_fw_chain *ch, struct table_config *tc,
     struct table_info *tinfo, struct tentry_info *tei, caddr_t ta_buf_m,
     uint32_t count, uint32_t added)
 {
 	struct table_algo *ta;
 	struct tentry_info *ptei;
 	caddr_t v, vv;
 	size_t ta_buf_sz;
 	int error, i;
 	uint32_t num;
 
 	IPFW_UH_WLOCK_ASSERT(ch);
 
 	ta = tc->ta;
 	ta_buf_sz = ta->ta_buf_size;
 	v = ta_buf_m;
 	vv = v + count * ta_buf_sz;
 	for (i = 0; i < added; i++, v += ta_buf_sz, vv += ta_buf_sz) {
 		ptei = &tei[i];
 		if ((ptei->flags & TEI_FLAGS_UPDATED) != 0) {
 
 			/*
 			 * We have old value stored by previous
 			 * call in @ptei->value. Do add once again
 			 * to restore it.
 			 */
 			error = ta->add(tc->astate, tinfo, ptei, v, &num);
 			KASSERT(error == 0, ("rollback UPDATE fail"));
 			KASSERT(num == 0, ("rollback UPDATE fail2"));
 			continue;
 		}
 
 		error = ta->prepare_del(ch, ptei, vv);
 		KASSERT(error == 0, ("pre-rollback INSERT failed"));
 		error = ta->del(tc->astate, tinfo, ptei, vv, &num);
 		KASSERT(error == 0, ("rollback INSERT failed"));
 		tc->count -= num;
 	}
 }
 
 /*
  * Prepares add/del state for all @count entries in @tei.
  * Uses either stack buffer (@ta_buf) or allocates a new one.
  * Stores pointer to allocated buffer back to @ta_buf.
  *
  * Returns 0 on success.
  */
 static int
 prepare_batch_buffer(struct ip_fw_chain *ch, struct table_algo *ta,
     struct tentry_info *tei, uint32_t count, int op, caddr_t *ta_buf)
 {
 	caddr_t ta_buf_m, v;
 	size_t ta_buf_sz, sz;
 	struct tentry_info *ptei;
 	int error, i;
 
 	error = 0;
 	ta_buf_sz = ta->ta_buf_size;
 	if (count == 1) {
 		/* Single add/delete, use on-stack buffer */
 		memset(*ta_buf, 0, TA_BUF_SZ);
 		ta_buf_m = *ta_buf;
 	} else {
 
 		/*
 		 * Multiple adds/deletes, allocate larger buffer
 		 *
 		 * Note we need 2xcount buffer for add case:
 		 * we have hold both ADD state
 		 * and DELETE state (this may be needed
 		 * if we need to rollback all changes)
 		 */
 		sz = count * ta_buf_sz;
 		ta_buf_m = malloc((op == OP_ADD) ? sz * 2 : sz, M_TEMP,
 		    M_WAITOK | M_ZERO);
 	}
 
 	v = ta_buf_m;
 	for (i = 0; i < count; i++, v += ta_buf_sz) {
 		ptei = &tei[i];
 		error = (op == OP_ADD) ?
 		    ta->prepare_add(ch, ptei, v) : ta->prepare_del(ch, ptei, v);
 
 		/*
 		 * Some syntax error (incorrect mask, or address, or
 		 * anything). Return error regardless of atomicity
 		 * settings.
 		 */
 		if (error != 0)
 			break;
 	}
 
 	*ta_buf = ta_buf_m;
 	return (error);
 }
 
 /*
  * Flushes allocated state for each @count entries in @tei.
  * Frees @ta_buf_m if differs from stack buffer @ta_buf.
  */
 static void
 flush_batch_buffer(struct ip_fw_chain *ch, struct table_algo *ta,
     struct tentry_info *tei, uint32_t count, int rollback,
     caddr_t ta_buf_m, caddr_t ta_buf)
 {
 	caddr_t v;
 	struct tentry_info *ptei;
 	size_t ta_buf_sz;
 	int i;
 
 	ta_buf_sz = ta->ta_buf_size;
 
 	/* Run cleaning callback anyway */
 	v = ta_buf_m;
 	for (i = 0; i < count; i++, v += ta_buf_sz) {
 		ptei = &tei[i];
 		ta->flush_entry(ch, ptei, v);
 		if (ptei->ptv != NULL) {
 			free(ptei->ptv, M_IPFW);
 			ptei->ptv = NULL;
 		}
 	}
 
 	/* Clean up "deleted" state in case of rollback */
 	if (rollback != 0) {
 		v = ta_buf_m + count * ta_buf_sz;
 		for (i = 0; i < count; i++, v += ta_buf_sz)
 			ta->flush_entry(ch, &tei[i], v);
 	}
 
 	if (ta_buf_m != ta_buf)
 		free(ta_buf_m, M_TEMP);
 }
 
 
 static void
 rollback_add_entry(void *object, struct op_state *_state)
 {
 	struct ip_fw_chain *ch;
 	struct tableop_state *ts;
 
 	ts = (struct tableop_state *)_state;
 
 	if (ts->tc != object && ts->ch != object)
 		return;
 
 	ch = ts->ch;
 
 	IPFW_UH_WLOCK_ASSERT(ch);
 
 	/* Call specifid unlockers */
 	rollback_table_values(ts);
 
 	/* Indicate we've called */
 	ts->modified = 1;
 }
 
 /*
  * Adds/updates one or more entries in table @ti.
  *
  * Function may drop/reacquire UH wlock multiple times due to
  * items alloc, algorithm callbacks (check_space), value linkage
  * (new values, value storage realloc), etc..
  * Other processes like other adds (which may involve storage resize),
  * table swaps (which changes table data and may change algo type),
  * table modify (which may change value mask) may be executed
  * simultaneously so we need to deal with it.
  *
  * The following approach was implemented:
  * we have per-chain linked list, protected with UH lock.
  * add_table_entry prepares special on-stack structure wthich is passed
  * to its descendants. Users add this structure to this list before unlock.
  * After performing needed operations and acquiring UH lock back, each user
  * checks if structure has changed. If true, it rolls local state back and
  * returns without error to the caller.
  * add_table_entry() on its own checks if structure has changed and restarts
  * its operation from the beginning (goto restart).
  *
  * Functions which are modifying fields of interest (currently
  *   resize_shared_value_storage() and swap_tables() )
  * traverses given list while holding UH lock immediately before
  * performing their operations calling function provided be list entry
  * ( currently rollback_add_entry  ) which performs rollback for all necessary
  * state and sets appropriate values in structure indicating rollback
  * has happened.
  *
  * Algo interaction:
  * Function references @ti first to ensure table won't
  * disappear or change its type.
  * After that, prepare_add callback is called for each @tei entry.
  * Next, we try to add each entry under UH+WHLOCK
  * using add() callback.
  * Finally, we free all state by calling flush_entry callback
  * for each @tei.
  *
  * Returns 0 on success.
  */
 int
 add_table_entry(struct ip_fw_chain *ch, struct tid_info *ti,
     struct tentry_info *tei, uint8_t flags, uint32_t count)
 {
 	struct table_config *tc;
 	struct table_algo *ta;
 	uint16_t kidx;
 	int error, first_error, i, rollback;
 	uint32_t num, numadd;
 	struct tentry_info *ptei;
 	struct tableop_state ts;
 	char ta_buf[TA_BUF_SZ];
 	caddr_t ta_buf_m, v;
 
 	memset(&ts, 0, sizeof(ts));
 	ta = NULL;
 	IPFW_UH_WLOCK(ch);
 
 	/*
 	 * Find and reference existing table.
 	 */
 restart:
 	if (ts.modified != 0) {
 		IPFW_UH_WUNLOCK(ch);
 		flush_batch_buffer(ch, ta, tei, count, rollback,
 		    ta_buf_m, ta_buf);
 		memset(&ts, 0, sizeof(ts));
 		ta = NULL;
 		IPFW_UH_WLOCK(ch);
 	}
 
 	error = find_ref_table(ch, ti, tei, count, OP_ADD, &tc);
 	if (error != 0) {
 		IPFW_UH_WUNLOCK(ch);
 		return (error);
 	}
 	ta = tc->ta;
 
 	/* Fill in tablestate */
 	ts.ch = ch;
 	ts.opstate.func = rollback_add_entry;
 	ts.tc = tc;
 	ts.vshared = tc->vshared;
 	ts.vmask = tc->vmask;
 	ts.ta = ta;
 	ts.tei = tei;
 	ts.count = count;
 	rollback = 0;
 	add_toperation_state(ch, &ts);
 	IPFW_UH_WUNLOCK(ch);
 
 	/* Allocate memory and prepare record(s) */
 	/* Pass stack buffer by default */
 	ta_buf_m = ta_buf;
 	error = prepare_batch_buffer(ch, ta, tei, count, OP_ADD, &ta_buf_m);
 
 	IPFW_UH_WLOCK(ch);
 	del_toperation_state(ch, &ts);
 	/* Drop reference we've used in first search */
 	tc->no.refcnt--;
 
 	/* Check prepare_batch_buffer() error */
 	if (error != 0)
 		goto cleanup;
 
 	/*
 	 * Check if table swap has happened.
 	 * (so table algo might be changed).
 	 * Restart operation to achieve consistent behavior.
 	 */
 	if (ts.modified != 0)
 		goto restart;
 
 	/*
 	 * Link all values values to shared/per-table value array.
 	 *
 	 * May release/reacquire UH_WLOCK.
 	 */
 	error = ipfw_link_table_values(ch, &ts);
 	if (error != 0)
 		goto cleanup;
 	if (ts.modified != 0)
 		goto restart;
 
 	/*
 	 * Ensure we are able to add all entries without additional
 	 * memory allocations. May release/reacquire UH_WLOCK.
 	 */
 	kidx = tc->no.kidx;
 	error = check_table_space(ch, &ts, tc, KIDX_TO_TI(ch, kidx), count);
 	if (error != 0)
 		goto cleanup;
 	if (ts.modified != 0)
 		goto restart;
 
 	/* We've got valid table in @tc. Let's try to add data */
 	kidx = tc->no.kidx;
 	ta = tc->ta;
 	numadd = 0;
 	first_error = 0;
 
 	IPFW_WLOCK(ch);
 
 	v = ta_buf_m;
 	for (i = 0; i < count; i++, v += ta->ta_buf_size) {
 		ptei = &tei[i];
 		num = 0;
 		/* check limit before adding */
 		if ((error = check_table_limit(tc, ptei)) == 0) {
 			error = ta->add(tc->astate, KIDX_TO_TI(ch, kidx),
 			    ptei, v, &num);
 			/* Set status flag to inform userland */
 			store_tei_result(ptei, OP_ADD, error, num);
 		}
 		if (error == 0) {
 			/* Update number of records to ease limit checking */
 			tc->count += num;
 			numadd += num;
 			continue;
 		}
 
 		if (first_error == 0)
 			first_error = error;
 
 		/*
 		 * Some error have happened. Check our atomicity
 		 * settings: continue if atomicity is not required,
 		 * rollback changes otherwise.
 		 */
 		if ((flags & IPFW_CTF_ATOMIC) == 0)
 			continue;
 
 		rollback_added_entries(ch, tc, KIDX_TO_TI(ch, kidx),
 		    tei, ta_buf_m, count, i);
 
 		rollback = 1;
 		break;
 	}
 
 	IPFW_WUNLOCK(ch);
 
 	ipfw_garbage_table_values(ch, tc, tei, count, rollback);
 
 	/* Permit post-add algorithm grow/rehash. */
 	if (numadd != 0)
 		check_table_space(ch, NULL, tc, KIDX_TO_TI(ch, kidx), 0);
 
 	/* Return first error to user, if any */
 	error = first_error;
 
 cleanup:
 	IPFW_UH_WUNLOCK(ch);
 
 	flush_batch_buffer(ch, ta, tei, count, rollback, ta_buf_m, ta_buf);
 	
 	return (error);
 }
 
 /*
  * Deletes one or more entries in table @ti.
  *
  * Returns 0 on success.
  */
 int
 del_table_entry(struct ip_fw_chain *ch, struct tid_info *ti,
     struct tentry_info *tei, uint8_t flags, uint32_t count)
 {
 	struct table_config *tc;
 	struct table_algo *ta;
 	struct tentry_info *ptei;
 	uint16_t kidx;
 	int error, first_error, i;
 	uint32_t num, numdel;
 	char ta_buf[TA_BUF_SZ];
 	caddr_t ta_buf_m, v;
 
 	/*
 	 * Find and reference existing table.
 	 */
 	IPFW_UH_WLOCK(ch);
 	error = find_ref_table(ch, ti, tei, count, OP_DEL, &tc);
 	if (error != 0) {
 		IPFW_UH_WUNLOCK(ch);
 		return (error);
 	}
 	ta = tc->ta;
 	IPFW_UH_WUNLOCK(ch);
 
 	/* Allocate memory and prepare record(s) */
 	/* Pass stack buffer by default */
 	ta_buf_m = ta_buf;
 	error = prepare_batch_buffer(ch, ta, tei, count, OP_DEL, &ta_buf_m);
 	if (error != 0)
 		goto cleanup;
 
 	IPFW_UH_WLOCK(ch);
 
 	/* Drop reference we've used in first search */
 	tc->no.refcnt--;
 
 	/*
 	 * Check if table algo is still the same.
 	 * (changed ta may be the result of table swap).
 	 */
 	if (ta != tc->ta) {
 		IPFW_UH_WUNLOCK(ch);
 		error = EINVAL;
 		goto cleanup;
 	}
 
 	kidx = tc->no.kidx;
 	numdel = 0;
 	first_error = 0;
 
 	IPFW_WLOCK(ch);
 	v = ta_buf_m;
 	for (i = 0; i < count; i++, v += ta->ta_buf_size) {
 		ptei = &tei[i];
 		num = 0;
 		error = ta->del(tc->astate, KIDX_TO_TI(ch, kidx), ptei, v,
 		    &num);
 		/* Save state for userland */
 		store_tei_result(ptei, OP_DEL, error, num);
 		if (error != 0 && first_error == 0)
 			first_error = error;
 		tc->count -= num;
 		numdel += num;
 	}
 	IPFW_WUNLOCK(ch);
 
 	/* Unlink non-used values */
 	ipfw_garbage_table_values(ch, tc, tei, count, 0);
 
 	if (numdel != 0) {
 		/* Run post-del hook to permit shrinking */
 		check_table_space(ch, NULL, tc, KIDX_TO_TI(ch, kidx), 0);
 	}
 
 	IPFW_UH_WUNLOCK(ch);
 
 	/* Return first error to user, if any */
 	error = first_error;
 
 cleanup:
 	flush_batch_buffer(ch, ta, tei, count, 0, ta_buf_m, ta_buf);
 
 	return (error);
 }
 
 /*
  * Ensure that table @tc has enough space to add @count entries without
  * need for reallocation.
  *
  * Callbacks order:
  * 0) need_modify() (UH_WLOCK) - checks if @count items can be added w/o resize.
  *
  * 1) alloc_modify (no locks, M_WAITOK) - alloc new state based on @pflags.
  * 2) prepare_modifyt (UH_WLOCK) - copy old data into new storage
  * 3) modify (UH_WLOCK + WLOCK) - switch pointers
  * 4) flush_modify (UH_WLOCK) - free state, if needed
  *
  * Returns 0 on success.
  */
 static int
 check_table_space(struct ip_fw_chain *ch, struct tableop_state *ts,
     struct table_config *tc, struct table_info *ti, uint32_t count)
 {
 	struct table_algo *ta;
 	uint64_t pflags;
 	char ta_buf[TA_BUF_SZ];
 	int error;
 
 	IPFW_UH_WLOCK_ASSERT(ch);
 
 	error = 0;
 	ta = tc->ta;
 	if (ta->need_modify == NULL)
 		return (0);
 
 	/* Acquire reference not to loose @tc between locks/unlocks */
 	tc->no.refcnt++;
 
 	/*
 	 * TODO: think about avoiding race between large add/large delete
 	 * operation on algorithm which implements shrinking along with
 	 * growing.
 	 */
 	while (true) {
 		pflags = 0;
 		if (ta->need_modify(tc->astate, ti, count, &pflags) == 0) {
 			error = 0;
 			break;
 		}
 
 		/* We have to shrink/grow table */
 		if (ts != NULL)
 			add_toperation_state(ch, ts);
 		IPFW_UH_WUNLOCK(ch);
 
 		memset(&ta_buf, 0, sizeof(ta_buf));
 		error = ta->prepare_mod(ta_buf, &pflags);
 
 		IPFW_UH_WLOCK(ch);
 		if (ts != NULL)
 			del_toperation_state(ch, ts);
 
 		if (error != 0)
 			break;
 
 		if (ts != NULL && ts->modified != 0) {
 
 			/*
 			 * Swap operation has happened
 			 * so we're currently operating on other
 			 * table data. Stop doing this.
 			 */
 			ta->flush_mod(ta_buf);
 			break;
 		}
 
 		/* Check if we still need to alter table */
 		ti = KIDX_TO_TI(ch, tc->no.kidx);
 		if (ta->need_modify(tc->astate, ti, count, &pflags) == 0) {
 			IPFW_UH_WUNLOCK(ch);
 
 			/*
 			 * Other thread has already performed resize.
 			 * Flush our state and return.
 			 */
 			ta->flush_mod(ta_buf);
 			break;
 		}
 	
 		error = ta->fill_mod(tc->astate, ti, ta_buf, &pflags);
 		if (error == 0) {
 			/* Do actual modification */
 			IPFW_WLOCK(ch);
 			ta->modify(tc->astate, ti, ta_buf, pflags);
 			IPFW_WUNLOCK(ch);
 		}
 
 		/* Anyway, flush data and retry */
 		ta->flush_mod(ta_buf);
 	}
 
 	tc->no.refcnt--;
 	return (error);
 }
 
 /*
  * Adds or deletes record in table.
  * Data layout (v0):
  * Request: [ ip_fw3_opheader ipfw_table_xentry ]
  *
  * Returns 0 on success
  */
 static int
 manage_table_ent_v0(struct ip_fw_chain *ch, ip_fw3_opheader *op3,
     struct sockopt_data *sd)
 {
 	ipfw_table_xentry *xent;
 	struct tentry_info tei;
 	struct tid_info ti;
 	struct table_value v;
 	int error, hdrlen, read;
 
 	hdrlen = offsetof(ipfw_table_xentry, k);
 
 	/* Check minimum header size */
 	if (sd->valsize < (sizeof(*op3) + hdrlen))
 		return (EINVAL);
 
 	read = sizeof(ip_fw3_opheader);
 
 	/* Check if xentry len field is valid */
 	xent = (ipfw_table_xentry *)(op3 + 1);
 	if (xent->len < hdrlen || xent->len + read > sd->valsize)
 		return (EINVAL);
 	
 	memset(&tei, 0, sizeof(tei));
 	tei.paddr = &xent->k;
 	tei.masklen = xent->masklen;
 	ipfw_import_table_value_legacy(xent->value, &v);
 	tei.pvalue = &v;
 	/* Old requests compatibility */
 	tei.flags = TEI_FLAGS_COMPAT;
 	if (xent->type == IPFW_TABLE_ADDR) {
 		if (xent->len - hdrlen == sizeof(in_addr_t))
 			tei.subtype = AF_INET;
 		else
 			tei.subtype = AF_INET6;
 	}
 
 	memset(&ti, 0, sizeof(ti));
 	ti.uidx = xent->tbl;
 	ti.type = xent->type;
 
 	error = (op3->opcode == IP_FW_TABLE_XADD) ?
 	    add_table_entry(ch, &ti, &tei, 0, 1) :
 	    del_table_entry(ch, &ti, &tei, 0, 1);
 
 	return (error);
 }
 
 /*
  * Adds or deletes record in table.
  * Data layout (v1)(current):
  * Request: [ ipfw_obj_header
  *   ipfw_obj_ctlv(IPFW_TLV_TBLENT_LIST) [ ipfw_obj_tentry x N ]
  * ]
  *
  * Returns 0 on success
  */
 static int
 manage_table_ent_v1(struct ip_fw_chain *ch, ip_fw3_opheader *op3,
     struct sockopt_data *sd)
 {
 	ipfw_obj_tentry *tent, *ptent;
 	ipfw_obj_ctlv *ctlv;
 	ipfw_obj_header *oh;
 	struct tentry_info *ptei, tei, *tei_buf;
 	struct tid_info ti;
 	int error, i, kidx, read;
 
 	/* Check minimum header size */
 	if (sd->valsize < (sizeof(*oh) + sizeof(*ctlv)))
 		return (EINVAL);
 
 	/* Check if passed data is too long */
 	if (sd->valsize != sd->kavail)
 		return (EINVAL);
 
 	oh = (ipfw_obj_header *)sd->kbuf;
 
 	/* Basic length checks for TLVs */
 	if (oh->ntlv.head.length != sizeof(oh->ntlv))
 		return (EINVAL);
 
 	read = sizeof(*oh);
 
 	ctlv = (ipfw_obj_ctlv *)(oh + 1);
 	if (ctlv->head.length + read != sd->valsize)
 		return (EINVAL);
 
 	read += sizeof(*ctlv);
 	tent = (ipfw_obj_tentry *)(ctlv + 1);
 	if (ctlv->count * sizeof(*tent) + read != sd->valsize)
 		return (EINVAL);
 
 	if (ctlv->count == 0)
 		return (0);
 
 	/*
 	 * Mark entire buffer as "read".
 	 * This instructs sopt api write it back
 	 * after function return.
 	 */
 	ipfw_get_sopt_header(sd, sd->valsize);
 
 	/* Perform basic checks for each entry */
 	ptent = tent;
 	kidx = tent->idx;
 	for (i = 0; i < ctlv->count; i++, ptent++) {
 		if (ptent->head.length != sizeof(*ptent))
 			return (EINVAL);
 		if (ptent->idx != kidx)
 			return (ENOTSUP);
 	}
 
 	/* Convert data into kernel request objects */
 	objheader_to_ti(oh, &ti);
 	ti.type = oh->ntlv.type;
 	ti.uidx = kidx;
 
 	/* Use on-stack buffer for single add/del */
 	if (ctlv->count == 1) {
 		memset(&tei, 0, sizeof(tei));
 		tei_buf = &tei;
 	} else
 		tei_buf = malloc(ctlv->count * sizeof(tei), M_TEMP,
 		    M_WAITOK | M_ZERO);
 
 	ptei = tei_buf;
 	ptent = tent;
 	for (i = 0; i < ctlv->count; i++, ptent++, ptei++) {
 		ptei->paddr = &ptent->k;
 		ptei->subtype = ptent->subtype;
 		ptei->masklen = ptent->masklen;
 		if (ptent->head.flags & IPFW_TF_UPDATE)
 			ptei->flags |= TEI_FLAGS_UPDATE;
 
 		ipfw_import_table_value_v1(&ptent->v.value);
 		ptei->pvalue = (struct table_value *)&ptent->v.value;
 	}
 
 	error = (oh->opheader.opcode == IP_FW_TABLE_XADD) ?
 	    add_table_entry(ch, &ti, tei_buf, ctlv->flags, ctlv->count) :
 	    del_table_entry(ch, &ti, tei_buf, ctlv->flags, ctlv->count);
 
 	/* Translate result back to userland */
 	ptei = tei_buf;
 	ptent = tent;
 	for (i = 0; i < ctlv->count; i++, ptent++, ptei++) {
 		if (ptei->flags & TEI_FLAGS_ADDED)
 			ptent->result = IPFW_TR_ADDED;
 		else if (ptei->flags & TEI_FLAGS_DELETED)
 			ptent->result = IPFW_TR_DELETED;
 		else if (ptei->flags & TEI_FLAGS_UPDATED)
 			ptent->result = IPFW_TR_UPDATED;
 		else if (ptei->flags & TEI_FLAGS_LIMIT)
 			ptent->result = IPFW_TR_LIMIT;
 		else if (ptei->flags & TEI_FLAGS_ERROR)
 			ptent->result = IPFW_TR_ERROR;
 		else if (ptei->flags & TEI_FLAGS_NOTFOUND)
 			ptent->result = IPFW_TR_NOTFOUND;
 		else if (ptei->flags & TEI_FLAGS_EXISTS)
 			ptent->result = IPFW_TR_EXISTS;
 		ipfw_export_table_value_v1(ptei->pvalue, &ptent->v.value);
 	}
 
 	if (tei_buf != &tei)
 		free(tei_buf, M_TEMP);
 
 	return (error);
 }
 
 /*
  * Looks up an entry in given table.
  * Data layout (v0)(current):
  * Request: [ ipfw_obj_header ipfw_obj_tentry ]
  * Reply: [ ipfw_obj_header ipfw_obj_tentry ]
  *
  * Returns 0 on success
  */
 static int
 find_table_entry(struct ip_fw_chain *ch, ip_fw3_opheader *op3,
     struct sockopt_data *sd)
 {
 	ipfw_obj_tentry *tent;
 	ipfw_obj_header *oh;
 	struct tid_info ti;
 	struct table_config *tc;
 	struct table_algo *ta;
 	struct table_info *kti;
 	struct table_value *pval;
 	struct namedobj_instance *ni;
 	int error;
 	size_t sz;
 
 	/* Check minimum header size */
 	sz = sizeof(*oh) + sizeof(*tent);
 	if (sd->valsize != sz)
 		return (EINVAL);
 
 	oh = (struct _ipfw_obj_header *)ipfw_get_sopt_header(sd, sz);
 	tent = (ipfw_obj_tentry *)(oh + 1);
 
 	/* Basic length checks for TLVs */
 	if (oh->ntlv.head.length != sizeof(oh->ntlv))
 		return (EINVAL);
 
 	objheader_to_ti(oh, &ti);
 	ti.type = oh->ntlv.type;
 	ti.uidx = tent->idx;
 
 	IPFW_UH_RLOCK(ch);
 	ni = CHAIN_TO_NI(ch);
 
 	/*
 	 * Find existing table and check its type .
 	 */
 	ta = NULL;
 	if ((tc = find_table(ni, &ti)) == NULL) {
 		IPFW_UH_RUNLOCK(ch);
 		return (ESRCH);
 	}
 
 	/* check table type */
 	if (tc->no.subtype != ti.type) {
 		IPFW_UH_RUNLOCK(ch);
 		return (EINVAL);
 	}
 
 	kti = KIDX_TO_TI(ch, tc->no.kidx);
 	ta = tc->ta;
 
 	if (ta->find_tentry == NULL)
 		return (ENOTSUP);
 
 	error = ta->find_tentry(tc->astate, kti, tent);
 	if (error == 0) {
 		pval = get_table_value(ch, tc, tent->v.kidx);
 		ipfw_export_table_value_v1(pval, &tent->v.value);
 	}
 	IPFW_UH_RUNLOCK(ch);
 
 	return (error);
 }
 
 /*
  * Flushes all entries or destroys given table.
  * Data layout (v0)(current):
  * Request: [ ipfw_obj_header ]
  *
  * Returns 0 on success
  */
 static int
 flush_table_v0(struct ip_fw_chain *ch, ip_fw3_opheader *op3,
     struct sockopt_data *sd)
 {
 	int error;
 	struct _ipfw_obj_header *oh;
 	struct tid_info ti;
 
 	if (sd->valsize != sizeof(*oh))
 		return (EINVAL);
 
 	oh = (struct _ipfw_obj_header *)op3;
 	objheader_to_ti(oh, &ti);
 
 	if (op3->opcode == IP_FW_TABLE_XDESTROY)
 		error = destroy_table(ch, &ti);
 	else if (op3->opcode == IP_FW_TABLE_XFLUSH)
 		error = flush_table(ch, &ti);
 	else
 		return (ENOTSUP);
 
 	return (error);
 }
 
 static void
 restart_flush(void *object, struct op_state *_state)
 {
 	struct tableop_state *ts;
 
 	ts = (struct tableop_state *)_state;
 
 	if (ts->tc != object)
 		return;
 
 	/* Indicate we've called */
 	ts->modified = 1;
 }
 
 /*
  * Flushes given table.
  *
  * Function create new table instance with the same
  * parameters, swaps it with old one and
  * flushes state without holding runtime WLOCK.
  *
  * Returns 0 on success.
  */
 int
 flush_table(struct ip_fw_chain *ch, struct tid_info *ti)
 {
 	struct namedobj_instance *ni;
 	struct table_config *tc;
 	struct table_algo *ta;
 	struct table_info ti_old, ti_new, *tablestate;
 	void *astate_old, *astate_new;
 	char algostate[64], *pstate;
 	struct tableop_state ts;
 	int error, need_gc;
 	uint16_t kidx;
 	uint8_t tflags;
 
 	/*
 	 * Stage 1: save table algorithm.
 	 * Reference found table to ensure it won't disappear.
 	 */
 	IPFW_UH_WLOCK(ch);
 	ni = CHAIN_TO_NI(ch);
 	if ((tc = find_table(ni, ti)) == NULL) {
 		IPFW_UH_WUNLOCK(ch);
 		return (ESRCH);
 	}
 	need_gc = 0;
 	astate_new = NULL;
 	memset(&ti_new, 0, sizeof(ti_new));
 restart:
 	/* Set up swap handler */
 	memset(&ts, 0, sizeof(ts));
 	ts.opstate.func = restart_flush;
 	ts.tc = tc;
 
 	ta = tc->ta;
 	/* Do not flush readonly tables */
 	if ((ta->flags & TA_FLAG_READONLY) != 0) {
 		IPFW_UH_WUNLOCK(ch);
 		return (EACCES);
 	}
 	/* Save startup algo parameters */
 	if (ta->print_config != NULL) {
 		ta->print_config(tc->astate, KIDX_TO_TI(ch, tc->no.kidx),
 		    algostate, sizeof(algostate));
 		pstate = algostate;
 	} else
 		pstate = NULL;
 	tflags = tc->tflags;
 	tc->no.refcnt++;
 	add_toperation_state(ch, &ts);
 	IPFW_UH_WUNLOCK(ch);
 
 	/*
 	 * Stage 1.5: if this is not the first attempt, destroy previous state
 	 */
 	if (need_gc != 0) {
 		ta->destroy(astate_new, &ti_new);
 		need_gc = 0;
 	}
 
 	/*
 	 * Stage 2: allocate new table instance using same algo.
 	 */
 	memset(&ti_new, 0, sizeof(struct table_info));
 	error = ta->init(ch, &astate_new, &ti_new, pstate, tflags);
 
 	/*
 	 * Stage 3: swap old state pointers with newly-allocated ones.
 	 * Decrease refcount.
 	 */
 	IPFW_UH_WLOCK(ch);
 	tc->no.refcnt--;
 	del_toperation_state(ch, &ts);
 
 	if (error != 0) {
 		IPFW_UH_WUNLOCK(ch);
 		return (error);
 	}
 
 	/*
 	 * Restart operation if table swap has happened:
 	 * even if algo may be the same, algo init parameters
 	 * may change. Restart operation instead of doing
 	 * complex checks.
 	 */
 	if (ts.modified != 0) {
 		/* Delay destroying data since we're holding UH lock */
 		need_gc = 1;
 		goto restart;
 	}
 
 	ni = CHAIN_TO_NI(ch);
 	kidx = tc->no.kidx;
 	tablestate = (struct table_info *)ch->tablestate;
 
 	IPFW_WLOCK(ch);
 	ti_old = tablestate[kidx];
 	tablestate[kidx] = ti_new;
 	IPFW_WUNLOCK(ch);
 
 	astate_old = tc->astate;
 	tc->astate = astate_new;
 	tc->ti_copy = ti_new;
 	tc->count = 0;
 
 	/* Notify algo on real @ti address */
 	if (ta->change_ti != NULL)
 		ta->change_ti(tc->astate, &tablestate[kidx]);
 
 	/*
 	 * Stage 4: unref values.
 	 */
 	ipfw_unref_table_values(ch, tc, ta, astate_old, &ti_old);
 	IPFW_UH_WUNLOCK(ch);
 
 	/*
 	 * Stage 5: perform real flush/destroy.
 	 */
 	ta->destroy(astate_old, &ti_old);
 
 	return (0);
 }
 
 /*
  * Swaps two tables.
  * Data layout (v0)(current):
  * Request: [ ipfw_obj_header ipfw_obj_ntlv ]
  *
  * Returns 0 on success
  */
 static int
 swap_table(struct ip_fw_chain *ch, ip_fw3_opheader *op3,
     struct sockopt_data *sd)
 {
 	int error;
 	struct _ipfw_obj_header *oh;
 	struct tid_info ti_a, ti_b;
 
 	if (sd->valsize != sizeof(*oh) + sizeof(ipfw_obj_ntlv))
 		return (EINVAL);
 
 	oh = (struct _ipfw_obj_header *)op3;
 	ntlv_to_ti(&oh->ntlv, &ti_a);
 	ntlv_to_ti((ipfw_obj_ntlv *)(oh + 1), &ti_b);
 
 	error = swap_tables(ch, &ti_a, &ti_b);
 
 	return (error);
 }
 
 /*
  * Swaps two tables of the same type/valtype.
  *
  * Checks if tables are compatible and limits
  * permits swap, than actually perform swap.
  *
  * Each table consists of 2 different parts:
  * config:
  *   @tc (with name, set, kidx) and rule bindings, which is "stable".
  *   number of items
  *   table algo
  * runtime:
  *   runtime data @ti (ch->tablestate)
  *   runtime cache in @tc
  *   algo-specific data (@tc->astate)
  *
  * So we switch:
  *  all runtime data
  *   number of items
  *   table algo
  *
  * After that we call @ti change handler for each table.
  *
  * Note that referencing @tc won't protect tc->ta from change.
  * XXX: Do we need to restrict swap between locked tables?
  * XXX: Do we need to exchange ftype?
  *
  * Returns 0 on success.
  */
 static int
 swap_tables(struct ip_fw_chain *ch, struct tid_info *a,
     struct tid_info *b)
 {
 	struct namedobj_instance *ni;
 	struct table_config *tc_a, *tc_b;
 	struct table_algo *ta;
 	struct table_info ti, *tablestate;
 	void *astate;
 	uint32_t count;
 
 	/*
 	 * Stage 1: find both tables and ensure they are of
 	 * the same type.
 	 */
 	IPFW_UH_WLOCK(ch);
 	ni = CHAIN_TO_NI(ch);
 	if ((tc_a = find_table(ni, a)) == NULL) {
 		IPFW_UH_WUNLOCK(ch);
 		return (ESRCH);
 	}
 	if ((tc_b = find_table(ni, b)) == NULL) {
 		IPFW_UH_WUNLOCK(ch);
 		return (ESRCH);
 	}
 
 	/* It is very easy to swap between the same table */
 	if (tc_a == tc_b) {
 		IPFW_UH_WUNLOCK(ch);
 		return (0);
 	}
 
 	/* Check type and value are the same */
 	if (tc_a->no.subtype!=tc_b->no.subtype || tc_a->tflags!=tc_b->tflags) {
 		IPFW_UH_WUNLOCK(ch);
 		return (EINVAL);
 	}
 
 	/* Check limits before swap */
 	if ((tc_a->limit != 0 && tc_b->count > tc_a->limit) ||
 	    (tc_b->limit != 0 && tc_a->count > tc_b->limit)) {
 		IPFW_UH_WUNLOCK(ch);
 		return (EFBIG);
 	}
 
 	/* Check if one of the tables is readonly */
 	if (((tc_a->ta->flags | tc_b->ta->flags) & TA_FLAG_READONLY) != 0) {
 		IPFW_UH_WUNLOCK(ch);
 		return (EACCES);
 	}
 
 	/* Notify we're going to swap */
 	rollback_toperation_state(ch, tc_a);
 	rollback_toperation_state(ch, tc_b);
 
 	/* Everything is fine, prepare to swap */
 	tablestate = (struct table_info *)ch->tablestate;
 	ti = tablestate[tc_a->no.kidx];
 	ta = tc_a->ta;
 	astate = tc_a->astate;
 	count = tc_a->count;
 
 	IPFW_WLOCK(ch);
 	/* a <- b */
 	tablestate[tc_a->no.kidx] = tablestate[tc_b->no.kidx];
 	tc_a->ta = tc_b->ta;
 	tc_a->astate = tc_b->astate;
 	tc_a->count = tc_b->count;
 	/* b <- a */
 	tablestate[tc_b->no.kidx] = ti;
 	tc_b->ta = ta;
 	tc_b->astate = astate;
 	tc_b->count = count;
 	IPFW_WUNLOCK(ch);
 
 	/* Ensure tc.ti copies are in sync */
 	tc_a->ti_copy = tablestate[tc_a->no.kidx];
 	tc_b->ti_copy = tablestate[tc_b->no.kidx];
 
 	/* Notify both tables on @ti change */
 	if (tc_a->ta->change_ti != NULL)
 		tc_a->ta->change_ti(tc_a->astate, &tablestate[tc_a->no.kidx]);
 	if (tc_b->ta->change_ti != NULL)
 		tc_b->ta->change_ti(tc_b->astate, &tablestate[tc_b->no.kidx]);
 
 	IPFW_UH_WUNLOCK(ch);
 
 	return (0);
 }
 
 /*
  * Destroys table specified by @ti.
  * Data layout (v0)(current):
  * Request: [ ip_fw3_opheader ]
  *
  * Returns 0 on success
  */
 static int
 destroy_table(struct ip_fw_chain *ch, struct tid_info *ti)
 {
 	struct namedobj_instance *ni;
 	struct table_config *tc;
 
 	IPFW_UH_WLOCK(ch);
 
 	ni = CHAIN_TO_NI(ch);
 	if ((tc = find_table(ni, ti)) == NULL) {
 		IPFW_UH_WUNLOCK(ch);
 		return (ESRCH);
 	}
 
 	/* Do not permit destroying referenced tables */
 	if (tc->no.refcnt > 0) {
 		IPFW_UH_WUNLOCK(ch);
 		return (EBUSY);
 	}
 
 	IPFW_WLOCK(ch);
 	unlink_table(ch, tc);
 	IPFW_WUNLOCK(ch);
 
 	/* Free obj index */
 	if (ipfw_objhash_free_idx(ni, tc->no.kidx) != 0)
 		printf("Error unlinking kidx %d from table %s\n",
 		    tc->no.kidx, tc->tablename);
 
 	/* Unref values used in tables while holding UH lock */
 	ipfw_unref_table_values(ch, tc, tc->ta, tc->astate, &tc->ti_copy);
 	IPFW_UH_WUNLOCK(ch);
 
 	free_table_config(ni, tc);
 
 	return (0);
 }
 
 static uint32_t
 roundup2p(uint32_t v)
 {
 
 	v--;
 	v |= v >> 1;
 	v |= v >> 2;
 	v |= v >> 4;
 	v |= v >> 8;
 	v |= v >> 16;
 	v++;
 
 	return (v);
 }
 
 /*
  * Grow tables index.
  *
  * Returns 0 on success.
  */
 int
 ipfw_resize_tables(struct ip_fw_chain *ch, unsigned int ntables)
 {
 	unsigned int ntables_old, tbl;
 	struct namedobj_instance *ni;
 	void *new_idx, *old_tablestate, *tablestate;
 	struct table_info *ti;
 	struct table_config *tc;
 	int i, new_blocks;
 
 	/* Check new value for validity */
 	if (ntables == 0)
 		return (EINVAL);
 	if (ntables > IPFW_TABLES_MAX)
 		ntables = IPFW_TABLES_MAX;
 	/* Alight to nearest power of 2 */
 	ntables = (unsigned int)roundup2p(ntables); 
 
 	/* Allocate new pointers */
 	tablestate = malloc(ntables * sizeof(struct table_info),
 	    M_IPFW, M_WAITOK | M_ZERO);
 
 	ipfw_objhash_bitmap_alloc(ntables, (void *)&new_idx, &new_blocks);
 
 	IPFW_UH_WLOCK(ch);
 
 	tbl = (ntables >= V_fw_tables_max) ? V_fw_tables_max : ntables;
 	ni = CHAIN_TO_NI(ch);
 
 	/* Temporary restrict decreasing max_tables */
 	if (ntables < V_fw_tables_max) {
 
 		/*
 		 * FIXME: Check if we really can shrink
 		 */
 		IPFW_UH_WUNLOCK(ch);
 		return (EINVAL);
 	}
 
 	/* Copy table info/indices */
 	memcpy(tablestate, ch->tablestate, sizeof(struct table_info) * tbl);
 	ipfw_objhash_bitmap_merge(ni, &new_idx, &new_blocks);
 
 	IPFW_WLOCK(ch);
 
 	/* Change pointers */
 	old_tablestate = ch->tablestate;
 	ch->tablestate = tablestate;
 	ipfw_objhash_bitmap_swap(ni, &new_idx, &new_blocks);
 
 	ntables_old = V_fw_tables_max;
 	V_fw_tables_max = ntables;
 
 	IPFW_WUNLOCK(ch);
 
 	/* Notify all consumers that their @ti pointer has changed */
 	ti = (struct table_info *)ch->tablestate;
 	for (i = 0; i < tbl; i++, ti++) {
 		if (ti->lookup == NULL)
 			continue;
 		tc = (struct table_config *)ipfw_objhash_lookup_kidx(ni, i);
 		if (tc == NULL || tc->ta->change_ti == NULL)
 			continue;
 
 		tc->ta->change_ti(tc->astate, ti);
 	}
 
 	IPFW_UH_WUNLOCK(ch);
 
 	/* Free old pointers */
 	free(old_tablestate, M_IPFW);
 	ipfw_objhash_bitmap_free(new_idx, new_blocks);
 
 	return (0);
 }
 
 /*
  * Lookup table's named object by its @kidx.
  */
 struct named_object *
 ipfw_objhash_lookup_table_kidx(struct ip_fw_chain *ch, uint16_t kidx)
 {
 
 	return (ipfw_objhash_lookup_kidx(CHAIN_TO_NI(ch), kidx));
 }
 
 /*
  * Take reference to table specified in @ntlv.
  * On success return its @kidx.
  */
 int
 ipfw_ref_table(struct ip_fw_chain *ch, ipfw_obj_ntlv *ntlv, uint16_t *kidx)
 {
 	struct tid_info ti;
 	struct table_config *tc;
 	int error;
 
 	IPFW_UH_WLOCK_ASSERT(ch);
 
 	ntlv_to_ti(ntlv, &ti);
 	error = find_table_err(CHAIN_TO_NI(ch), &ti, &tc);
 	if (error != 0)
 		return (error);
 
 	if (tc == NULL)
 		return (ESRCH);
 
 	tc_ref(tc);
 	*kidx = tc->no.kidx;
 
 	return (0);
 }
 
 void
 ipfw_unref_table(struct ip_fw_chain *ch, uint16_t kidx)
 {
 
 	struct namedobj_instance *ni;
 	struct named_object *no;
 
 	IPFW_UH_WLOCK_ASSERT(ch);
 	ni = CHAIN_TO_NI(ch);
 	no = ipfw_objhash_lookup_kidx(ni, kidx);
 	KASSERT(no != NULL, ("Table with index %d not found", kidx));
 	no->refcnt--;
 }
 
 /*
  * Lookup an arbitrary key @paddr of length @plen in table @tbl.
  * Stores found value in @val.
  *
  * Returns 1 if key was found.
  */
 int
 ipfw_lookup_table(struct ip_fw_chain *ch, uint16_t tbl, uint16_t plen,
     void *paddr, uint32_t *val)
 {
 	struct table_info *ti;
 
 	ti = KIDX_TO_TI(ch, tbl);
 
 	return (ti->lookup(ti, paddr, plen, val));
 }
 
 /*
  * Info/List/dump support for tables.
  *
  */
 
 /*
  * High-level 'get' cmds sysctl handlers
  */
 
 /*
  * Lists all tables currently available in kernel.
  * Data layout (v0)(current):
  * Request: [ ipfw_obj_lheader ], size = ipfw_obj_lheader.size
  * Reply: [ ipfw_obj_lheader ipfw_xtable_info x N ]
  *
  * Returns 0 on success
  */
 static int
 list_tables(struct ip_fw_chain *ch, ip_fw3_opheader *op3,
     struct sockopt_data *sd)
 {
 	struct _ipfw_obj_lheader *olh;
 	int error;
 
 	olh = (struct _ipfw_obj_lheader *)ipfw_get_sopt_header(sd,sizeof(*olh));
 	if (olh == NULL)
 		return (EINVAL);
 	if (sd->valsize < olh->size)
 		return (EINVAL);
 
 	IPFW_UH_RLOCK(ch);
 	error = export_tables(ch, olh, sd);
 	IPFW_UH_RUNLOCK(ch);
 
 	return (error);
 }
 
 /*
  * Store table info to buffer provided by @sd.
  * Data layout (v0)(current):
  * Request: [ ipfw_obj_header ipfw_xtable_info(empty)]
  * Reply: [ ipfw_obj_header ipfw_xtable_info ]
  *
  * Returns 0 on success.
  */
 static int
 describe_table(struct ip_fw_chain *ch, ip_fw3_opheader *op3,
     struct sockopt_data *sd)
 {
 	struct _ipfw_obj_header *oh;
 	struct table_config *tc;
 	struct tid_info ti;
 	size_t sz;
 
 	sz = sizeof(*oh) + sizeof(ipfw_xtable_info);
 	oh = (struct _ipfw_obj_header *)ipfw_get_sopt_header(sd, sz);
 	if (oh == NULL)
 		return (EINVAL);
 
 	objheader_to_ti(oh, &ti);
 
 	IPFW_UH_RLOCK(ch);
 	if ((tc = find_table(CHAIN_TO_NI(ch), &ti)) == NULL) {
 		IPFW_UH_RUNLOCK(ch);
 		return (ESRCH);
 	}
 
 	export_table_info(ch, tc, (ipfw_xtable_info *)(oh + 1));
 	IPFW_UH_RUNLOCK(ch);
 
 	return (0);
 }
 
 /*
  * Modifies existing table.
  * Data layout (v0)(current):
  * Request: [ ipfw_obj_header ipfw_xtable_info ]
  *
  * Returns 0 on success
  */
 static int
 modify_table(struct ip_fw_chain *ch, ip_fw3_opheader *op3,
     struct sockopt_data *sd)
 {
 	struct _ipfw_obj_header *oh;
 	ipfw_xtable_info *i;
 	char *tname;
 	struct tid_info ti;
 	struct namedobj_instance *ni;
 	struct table_config *tc;
 
 	if (sd->valsize != sizeof(*oh) + sizeof(ipfw_xtable_info))
 		return (EINVAL);
 
 	oh = (struct _ipfw_obj_header *)sd->kbuf;
 	i = (ipfw_xtable_info *)(oh + 1);
 
 	/*
 	 * Verify user-supplied strings.
 	 * Check for null-terminated/zero-length strings/
 	 */
 	tname = oh->ntlv.name;
 	if (check_table_name(tname) != 0)
 		return (EINVAL);
 
 	objheader_to_ti(oh, &ti);
 	ti.type = i->type;
 
 	IPFW_UH_WLOCK(ch);
 	ni = CHAIN_TO_NI(ch);
 	if ((tc = find_table(ni, &ti)) == NULL) {
 		IPFW_UH_WUNLOCK(ch);
 		return (ESRCH);
 	}
 
 	/* Do not support any modifications for readonly tables */
 	if ((tc->ta->flags & TA_FLAG_READONLY) != 0) {
 		IPFW_UH_WUNLOCK(ch);
 		return (EACCES);
 	}
 
 	if ((i->mflags & IPFW_TMFLAGS_LIMIT) != 0)
 		tc->limit = i->limit;
 	if ((i->mflags & IPFW_TMFLAGS_LOCK) != 0)
 		tc->locked = ((i->flags & IPFW_TGFLAGS_LOCKED) != 0);
 	IPFW_UH_WUNLOCK(ch);
 
 	return (0);
 }
 
 /*
  * Creates new table.
  * Data layout (v0)(current):
  * Request: [ ipfw_obj_header ipfw_xtable_info ]
  *
  * Returns 0 on success
  */
 static int
 create_table(struct ip_fw_chain *ch, ip_fw3_opheader *op3,
     struct sockopt_data *sd)
 {
 	struct _ipfw_obj_header *oh;
 	ipfw_xtable_info *i;
 	char *tname, *aname;
 	struct tid_info ti;
 	struct namedobj_instance *ni;
 
 	if (sd->valsize != sizeof(*oh) + sizeof(ipfw_xtable_info))
 		return (EINVAL);
 
 	oh = (struct _ipfw_obj_header *)sd->kbuf;
 	i = (ipfw_xtable_info *)(oh + 1);
 
 	/*
 	 * Verify user-supplied strings.
 	 * Check for null-terminated/zero-length strings/
 	 */
 	tname = oh->ntlv.name;
 	aname = i->algoname;
 	if (check_table_name(tname) != 0 ||
 	    strnlen(aname, sizeof(i->algoname)) == sizeof(i->algoname))
 		return (EINVAL);
 
 	if (aname[0] == '\0') {
 		/* Use default algorithm */
 		aname = NULL;
 	}
 
 	objheader_to_ti(oh, &ti);
 	ti.type = i->type;
 
 	ni = CHAIN_TO_NI(ch);
 
 	IPFW_UH_RLOCK(ch);
 	if (find_table(ni, &ti) != NULL) {
 		IPFW_UH_RUNLOCK(ch);
 		return (EEXIST);
 	}
 	IPFW_UH_RUNLOCK(ch);
 
 	return (create_table_internal(ch, &ti, aname, i, NULL, 0));
 }
 
 /*
  * Creates new table based on @ti and @aname.
  *
  * Assume @aname to be checked and valid.
  * Stores allocated table kidx inside @pkidx (if non-NULL).
  * Reference created table if @compat is non-zero.
  *
  * Returns 0 on success.
  */
 static int
 create_table_internal(struct ip_fw_chain *ch, struct tid_info *ti,
     char *aname, ipfw_xtable_info *i, uint16_t *pkidx, int compat)
 {
 	struct namedobj_instance *ni;
 	struct table_config *tc, *tc_new, *tmp;
 	struct table_algo *ta;
 	uint16_t kidx;
 
 	ni = CHAIN_TO_NI(ch);
 
 	ta = find_table_algo(CHAIN_TO_TCFG(ch), ti, aname);
 	if (ta == NULL)
 		return (ENOTSUP);
 
 	tc = alloc_table_config(ch, ti, ta, aname, i->tflags);
 	if (tc == NULL)
 		return (ENOMEM);
 
 	tc->vmask = i->vmask;
 	tc->limit = i->limit;
 	if (ta->flags & TA_FLAG_READONLY)
 		tc->locked = 1;
 	else
 		tc->locked = (i->flags & IPFW_TGFLAGS_LOCKED) != 0;
 
 	IPFW_UH_WLOCK(ch);
 
 	/* Check if table has been already created */
 	tc_new = find_table(ni, ti);
 	if (tc_new != NULL) {
 
 		/*
 		 * Compat: do not fail if we're
 		 * requesting to create existing table
 		 * which has the same type
 		 */
 		if (compat == 0 || tc_new->no.subtype != tc->no.subtype) {
 			IPFW_UH_WUNLOCK(ch);
 			free_table_config(ni, tc);
 			return (EEXIST);
 		}
 
 		/* Exchange tc and tc_new for proper refcounting & freeing */
 		tmp = tc;
 		tc = tc_new;
 		tc_new = tmp;
 	} else {
 		/* New table */
 		if (ipfw_objhash_alloc_idx(ni, &kidx) != 0) {
 			IPFW_UH_WUNLOCK(ch);
 			printf("Unable to allocate table index."
 			    " Consider increasing net.inet.ip.fw.tables_max");
 			free_table_config(ni, tc);
 			return (EBUSY);
 		}
 		tc->no.kidx = kidx;
 		tc->no.etlv = IPFW_TLV_TBL_NAME;
 
 		link_table(ch, tc);
 	}
 
 	if (compat != 0)
 		tc->no.refcnt++;
 	if (pkidx != NULL)
 		*pkidx = tc->no.kidx;
 
 	IPFW_UH_WUNLOCK(ch);
 
 	if (tc_new != NULL)
 		free_table_config(ni, tc_new);
 
 	return (0);
 }
 
 static void
 ntlv_to_ti(ipfw_obj_ntlv *ntlv, struct tid_info *ti)
 {
 
 	memset(ti, 0, sizeof(struct tid_info));
 	ti->set = ntlv->set;
 	ti->uidx = ntlv->idx;
 	ti->tlvs = ntlv;
 	ti->tlen = ntlv->head.length;
 }
 
 static void
 objheader_to_ti(struct _ipfw_obj_header *oh, struct tid_info *ti)
 {
 
 	ntlv_to_ti(&oh->ntlv, ti);
 }
 
 struct namedobj_instance *
 ipfw_get_table_objhash(struct ip_fw_chain *ch)
 {
 
 	return (CHAIN_TO_NI(ch));
 }
 
 /*
  * Exports basic table info as name TLV.
  * Used inside dump_static_rules() to provide info
  * about all tables referenced by current ruleset.
  *
  * Returns 0 on success.
  */
 int
 ipfw_export_table_ntlv(struct ip_fw_chain *ch, uint16_t kidx,
     struct sockopt_data *sd)
 {
 	struct namedobj_instance *ni;
 	struct named_object *no;
 	ipfw_obj_ntlv *ntlv;
 
 	ni = CHAIN_TO_NI(ch);
 
 	no = ipfw_objhash_lookup_kidx(ni, kidx);
 	KASSERT(no != NULL, ("invalid table kidx passed"));
 
 	ntlv = (ipfw_obj_ntlv *)ipfw_get_sopt_space(sd, sizeof(*ntlv));
 	if (ntlv == NULL)
 		return (ENOMEM);
 
 	ntlv->head.type = IPFW_TLV_TBL_NAME;
 	ntlv->head.length = sizeof(*ntlv);
 	ntlv->idx = no->kidx;
 	strlcpy(ntlv->name, no->name, sizeof(ntlv->name));
 
 	return (0);
 }
 
 struct dump_args {
 	struct ip_fw_chain *ch;
 	struct table_info *ti;
 	struct table_config *tc;
 	struct sockopt_data *sd;
 	uint32_t cnt;
 	uint16_t uidx;
 	int error;
 	uint32_t size;
 	ipfw_table_entry *ent;
 	ta_foreach_f *f;
 	void *farg;
 	ipfw_obj_tentry tent;
 };
 
 static int
 count_ext_entries(void *e, void *arg)
 {
 	struct dump_args *da;
 
 	da = (struct dump_args *)arg;
 	da->cnt++;
 
 	return (0);
 }
 
 /*
  * Gets number of items from table either using
  * internal counter or calling algo callback for
  * externally-managed tables.
  *
  * Returns number of records.
  */
 static uint32_t
 table_get_count(struct ip_fw_chain *ch, struct table_config *tc)
 {
 	struct table_info *ti;
 	struct table_algo *ta;
 	struct dump_args da;
 
 	ti = KIDX_TO_TI(ch, tc->no.kidx);
 	ta = tc->ta;
 
 	/* Use internal counter for self-managed tables */
 	if ((ta->flags & TA_FLAG_READONLY) == 0)
 		return (tc->count);
 
 	/* Use callback to quickly get number of items */
 	if ((ta->flags & TA_FLAG_EXTCOUNTER) != 0)
 		return (ta->get_count(tc->astate, ti));
 
 	/* Count number of iterms ourselves */
 	memset(&da, 0, sizeof(da));
 	ta->foreach(tc->astate, ti, count_ext_entries, &da);
 
 	return (da.cnt);
 }
 
 /*
  * Exports table @tc info into standard ipfw_xtable_info format.
  */
 static void
 export_table_info(struct ip_fw_chain *ch, struct table_config *tc,
     ipfw_xtable_info *i)
 {
 	struct table_info *ti;
 	struct table_algo *ta;
 	
 	i->type = tc->no.subtype;
 	i->tflags = tc->tflags;
 	i->vmask = tc->vmask;
 	i->set = tc->no.set;
 	i->kidx = tc->no.kidx;
 	i->refcnt = tc->no.refcnt;
 	i->count = table_get_count(ch, tc);
 	i->limit = tc->limit;
 	i->flags |= (tc->locked != 0) ? IPFW_TGFLAGS_LOCKED : 0;
 	i->size = i->count * sizeof(ipfw_obj_tentry);
 	i->size += sizeof(ipfw_obj_header) + sizeof(ipfw_xtable_info);
 	strlcpy(i->tablename, tc->tablename, sizeof(i->tablename));
 	ti = KIDX_TO_TI(ch, tc->no.kidx);
 	ta = tc->ta;
 	if (ta->print_config != NULL) {
 		/* Use algo function to print table config to string */
 		ta->print_config(tc->astate, ti, i->algoname,
 		    sizeof(i->algoname));
 	} else
 		strlcpy(i->algoname, ta->name, sizeof(i->algoname));
 	/* Dump algo-specific data, if possible */
 	if (ta->dump_tinfo != NULL) {
 		ta->dump_tinfo(tc->astate, ti, &i->ta_info);
 		i->ta_info.flags |= IPFW_TATFLAGS_DATA;
 	}
 }
 
 struct dump_table_args {
 	struct ip_fw_chain *ch;
 	struct sockopt_data *sd;
 };
 
 static int
 export_table_internal(struct namedobj_instance *ni, struct named_object *no,
     void *arg)
 {
 	ipfw_xtable_info *i;
 	struct dump_table_args *dta;
 
 	dta = (struct dump_table_args *)arg;
 
 	i = (ipfw_xtable_info *)ipfw_get_sopt_space(dta->sd, sizeof(*i));
 	KASSERT(i != NULL, ("previously checked buffer is not enough"));
 
 	export_table_info(dta->ch, (struct table_config *)no, i);
 	return (0);
 }
 
 /*
  * Export all tables as ipfw_xtable_info structures to
  * storage provided by @sd.
  *
  * If supplied buffer is too small, fills in required size
  * and returns ENOMEM.
  * Returns 0 on success.
  */
 static int
 export_tables(struct ip_fw_chain *ch, ipfw_obj_lheader *olh,
     struct sockopt_data *sd)
 {
 	uint32_t size;
 	uint32_t count;
 	struct dump_table_args dta;
 
 	count = ipfw_objhash_count(CHAIN_TO_NI(ch));
 	size = count * sizeof(ipfw_xtable_info) + sizeof(ipfw_obj_lheader);
 
 	/* Fill in header regadless of buffer size */
 	olh->count = count;
 	olh->objsize = sizeof(ipfw_xtable_info);
 
 	if (size > olh->size) {
 		olh->size = size;
 		return (ENOMEM);
 	}
 
 	olh->size = size;
 
 	dta.ch = ch;
 	dta.sd = sd;
 
 	ipfw_objhash_foreach(CHAIN_TO_NI(ch), export_table_internal, &dta);
 
 	return (0);
 }
 
 /*
  * Dumps all table data
  * Data layout (v1)(current):
  * Request: [ ipfw_obj_header ], size = ipfw_xtable_info.size
  * Reply: [ ipfw_obj_header ipfw_xtable_info ipfw_obj_tentry x N ]
  *
  * Returns 0 on success
  */
 static int
 dump_table_v1(struct ip_fw_chain *ch, ip_fw3_opheader *op3,
     struct sockopt_data *sd)
 {
 	struct _ipfw_obj_header *oh;
 	ipfw_xtable_info *i;
 	struct tid_info ti;
 	struct table_config *tc;
 	struct table_algo *ta;
 	struct dump_args da;
 	uint32_t sz;
 
 	sz = sizeof(ipfw_obj_header) + sizeof(ipfw_xtable_info);
 	oh = (struct _ipfw_obj_header *)ipfw_get_sopt_header(sd, sz);
 	if (oh == NULL)
 		return (EINVAL);
 
 	i = (ipfw_xtable_info *)(oh + 1);
 	objheader_to_ti(oh, &ti);
 
 	IPFW_UH_RLOCK(ch);
 	if ((tc = find_table(CHAIN_TO_NI(ch), &ti)) == NULL) {
 		IPFW_UH_RUNLOCK(ch);
 		return (ESRCH);
 	}
 	export_table_info(ch, tc, i);
 
 	if (sd->valsize < i->size) {
 
 		/*
 		 * Submitted buffer size is not enough.
 		 * WE've already filled in @i structure with
 		 * relevant table info including size, so we
 		 * can return. Buffer will be flushed automatically.
 		 */
 		IPFW_UH_RUNLOCK(ch);
 		return (ENOMEM);
 	}
 
 	/*
 	 * Do the actual dump in eXtended format
 	 */
 	memset(&da, 0, sizeof(da));
 	da.ch = ch;
 	da.ti = KIDX_TO_TI(ch, tc->no.kidx);
 	da.tc = tc;
 	da.sd = sd;
 
 	ta = tc->ta;
 
 	ta->foreach(tc->astate, da.ti, dump_table_tentry, &da);
 	IPFW_UH_RUNLOCK(ch);
 
 	return (da.error);
 }
 
 /*
  * Dumps all table data
  * Data layout (version 0)(legacy):
  * Request: [ ipfw_xtable ], size = IP_FW_TABLE_XGETSIZE()
  * Reply: [ ipfw_xtable ipfw_table_xentry x N ]
  *
  * Returns 0 on success
  */
 static int
 dump_table_v0(struct ip_fw_chain *ch, ip_fw3_opheader *op3,
     struct sockopt_data *sd)
 {
 	ipfw_xtable *xtbl;
 	struct tid_info ti;
 	struct table_config *tc;
 	struct table_algo *ta;
 	struct dump_args da;
 	size_t sz, count;
 
 	xtbl = (ipfw_xtable *)ipfw_get_sopt_header(sd, sizeof(ipfw_xtable));
 	if (xtbl == NULL)
 		return (EINVAL);
 
 	memset(&ti, 0, sizeof(ti));
 	ti.uidx = xtbl->tbl;
 	
 	IPFW_UH_RLOCK(ch);
 	if ((tc = find_table(CHAIN_TO_NI(ch), &ti)) == NULL) {
 		IPFW_UH_RUNLOCK(ch);
 		return (0);
 	}
 	count = table_get_count(ch, tc);
 	sz = count * sizeof(ipfw_table_xentry) + sizeof(ipfw_xtable);
 
 	xtbl->cnt = count;
 	xtbl->size = sz;
 	xtbl->type = tc->no.subtype;
 	xtbl->tbl = ti.uidx;
 
 	if (sd->valsize < sz) {
 
 		/*
 		 * Submitted buffer size is not enough.
 		 * WE've already filled in @i structure with
 		 * relevant table info including size, so we
 		 * can return. Buffer will be flushed automatically.
 		 */
 		IPFW_UH_RUNLOCK(ch);
 		return (ENOMEM);
 	}
 
 	/* Do the actual dump in eXtended format */
 	memset(&da, 0, sizeof(da));
 	da.ch = ch;
 	da.ti = KIDX_TO_TI(ch, tc->no.kidx);
 	da.tc = tc;
 	da.sd = sd;
 
 	ta = tc->ta;
 
 	ta->foreach(tc->astate, da.ti, dump_table_xentry, &da);
 	IPFW_UH_RUNLOCK(ch);
 
 	return (0);
 }
 
 /*
  * Legacy function to retrieve number of items in table.
  */
 static int
 get_table_size(struct ip_fw_chain *ch, ip_fw3_opheader *op3,
     struct sockopt_data *sd)
 {
 	uint32_t *tbl;
 	struct tid_info ti;
 	size_t sz;
 	int error;
 
 	sz = sizeof(*op3) + sizeof(uint32_t);
 	op3 = (ip_fw3_opheader *)ipfw_get_sopt_header(sd, sz);
 	if (op3 == NULL)
 		return (EINVAL);
 
 	tbl = (uint32_t *)(op3 + 1);
 	memset(&ti, 0, sizeof(ti));
 	ti.uidx = *tbl;
 	IPFW_UH_RLOCK(ch);
 	error = ipfw_count_xtable(ch, &ti, tbl);
 	IPFW_UH_RUNLOCK(ch);
 	return (error);
 }
 
 /*
  * Legacy IP_FW_TABLE_GETSIZE handler
  */
 int
 ipfw_count_table(struct ip_fw_chain *ch, struct tid_info *ti, uint32_t *cnt)
 {
 	struct table_config *tc;
 
 	if ((tc = find_table(CHAIN_TO_NI(ch), ti)) == NULL)
 		return (ESRCH);
 	*cnt = table_get_count(ch, tc);
 	return (0);
 }
 
 /*
  * Legacy IP_FW_TABLE_XGETSIZE handler
  */
 int
 ipfw_count_xtable(struct ip_fw_chain *ch, struct tid_info *ti, uint32_t *cnt)
 {
 	struct table_config *tc;
 	uint32_t count;
 
 	if ((tc = find_table(CHAIN_TO_NI(ch), ti)) == NULL) {
 		*cnt = 0;
 		return (0); /* 'table all list' requires success */
 	}
 
 	count = table_get_count(ch, tc);
 	*cnt = count * sizeof(ipfw_table_xentry);
 	if (count > 0)
 		*cnt += sizeof(ipfw_xtable);
 	return (0);
 }
 
 static int
 dump_table_entry(void *e, void *arg)
 {
 	struct dump_args *da;
 	struct table_config *tc;
 	struct table_algo *ta;
 	ipfw_table_entry *ent;
 	struct table_value *pval;
 	int error;
 
 	da = (struct dump_args *)arg;
 
 	tc = da->tc;
 	ta = tc->ta;
 
 	/* Out of memory, returning */
 	if (da->cnt == da->size)
 		return (1);
 	ent = da->ent++;
 	ent->tbl = da->uidx;
 	da->cnt++;
 
 	error = ta->dump_tentry(tc->astate, da->ti, e, &da->tent);
 	if (error != 0)
 		return (error);
 
 	ent->addr = da->tent.k.addr.s_addr;
 	ent->masklen = da->tent.masklen;
 	pval = get_table_value(da->ch, da->tc, da->tent.v.kidx);
 	ent->value = ipfw_export_table_value_legacy(pval);
 
 	return (0);
 }
 
 /*
  * Dumps table in pre-8.1 legacy format.
  */
 int
 ipfw_dump_table_legacy(struct ip_fw_chain *ch, struct tid_info *ti,
     ipfw_table *tbl)
 {
 	struct table_config *tc;
 	struct table_algo *ta;
 	struct dump_args da;
 
 	tbl->cnt = 0;
 
 	if ((tc = find_table(CHAIN_TO_NI(ch), ti)) == NULL)
 		return (0);	/* XXX: We should return ESRCH */
 
 	ta = tc->ta;
 
 	/* This dump format supports IPv4 only */
 	if (tc->no.subtype != IPFW_TABLE_ADDR)
 		return (0);
 
 	memset(&da, 0, sizeof(da));
 	da.ch = ch;
 	da.ti = KIDX_TO_TI(ch, tc->no.kidx);
 	da.tc = tc;
 	da.ent = &tbl->ent[0];
 	da.size = tbl->size;
 
 	tbl->cnt = 0;
 	ta->foreach(tc->astate, da.ti, dump_table_entry, &da);
 	tbl->cnt = da.cnt;
 
 	return (0);
 }
 
 /*
  * Dumps table entry in eXtended format (v1)(current).
  */
 static int
 dump_table_tentry(void *e, void *arg)
 {
 	struct dump_args *da;
 	struct table_config *tc;
 	struct table_algo *ta;
 	struct table_value *pval;
 	ipfw_obj_tentry *tent;
 	int error;
 
 	da = (struct dump_args *)arg;
 
 	tc = da->tc;
 	ta = tc->ta;
 
 	tent = (ipfw_obj_tentry *)ipfw_get_sopt_space(da->sd, sizeof(*tent));
 	/* Out of memory, returning */
 	if (tent == NULL) {
 		da->error = ENOMEM;
 		return (1);
 	}
 	tent->head.length = sizeof(ipfw_obj_tentry);
 	tent->idx = da->uidx;
 
 	error = ta->dump_tentry(tc->astate, da->ti, e, tent);
 	if (error != 0)
 		return (error);
 
 	pval = get_table_value(da->ch, da->tc, tent->v.kidx);
 	ipfw_export_table_value_v1(pval, &tent->v.value);
 
 	return (0);
 }
 
 /*
  * Dumps table entry in eXtended format (v0).
  */
 static int
 dump_table_xentry(void *e, void *arg)
 {
 	struct dump_args *da;
 	struct table_config *tc;
 	struct table_algo *ta;
 	ipfw_table_xentry *xent;
 	ipfw_obj_tentry *tent;
 	struct table_value *pval;
 	int error;
 
 	da = (struct dump_args *)arg;
 
 	tc = da->tc;
 	ta = tc->ta;
 
 	xent = (ipfw_table_xentry *)ipfw_get_sopt_space(da->sd, sizeof(*xent));
 	/* Out of memory, returning */
 	if (xent == NULL)
 		return (1);
 	xent->len = sizeof(ipfw_table_xentry);
 	xent->tbl = da->uidx;
 
 	memset(&da->tent, 0, sizeof(da->tent));
 	tent = &da->tent;
 	error = ta->dump_tentry(tc->astate, da->ti, e, tent);
 	if (error != 0)
 		return (error);
 
 	/* Convert current format to previous one */
 	xent->masklen = tent->masklen;
 	pval = get_table_value(da->ch, da->tc, da->tent.v.kidx);
 	xent->value = ipfw_export_table_value_legacy(pval);
 	/* Apply some hacks */
 	if (tc->no.subtype == IPFW_TABLE_ADDR && tent->subtype == AF_INET) {
 		xent->k.addr6.s6_addr32[3] = tent->k.addr.s_addr;
 		xent->flags = IPFW_TCF_INET;
 	} else
 		memcpy(&xent->k, &tent->k, sizeof(xent->k));
 
 	return (0);
 }
 
 /*
  * Helper function to export table algo data
  * to tentry format before calling user function.
  *
  * Returns 0 on success.
  */
 static int
 prepare_table_tentry(void *e, void *arg)
 {
 	struct dump_args *da;
 	struct table_config *tc;
 	struct table_algo *ta;
 	int error;
 
 	da = (struct dump_args *)arg;
 
 	tc = da->tc;
 	ta = tc->ta;
 
 	error = ta->dump_tentry(tc->astate, da->ti, e, &da->tent);
 	if (error != 0)
 		return (error);
 
 	da->f(&da->tent, da->farg);
 
 	return (0);
 }
 
 /*
  * Allow external consumers to read table entries in standard format.
  */
 int
 ipfw_foreach_table_tentry(struct ip_fw_chain *ch, uint16_t kidx,
     ta_foreach_f *f, void *arg)
 {
 	struct namedobj_instance *ni;
 	struct table_config *tc;
 	struct table_algo *ta;
 	struct dump_args da;
 
 	ni = CHAIN_TO_NI(ch);
 
 	tc = (struct table_config *)ipfw_objhash_lookup_kidx(ni, kidx);
 	if (tc == NULL)
 		return (ESRCH);
 
 	ta = tc->ta;
 
 	memset(&da, 0, sizeof(da));
 	da.ch = ch;
 	da.ti = KIDX_TO_TI(ch, tc->no.kidx);
 	da.tc = tc;
 	da.f = f;
 	da.farg = arg;
 
 	ta->foreach(tc->astate, da.ti, prepare_table_tentry, &da);
 
 	return (0);
 }
 
 /*
  * Table algorithms
  */ 
 
 /*
  * Finds algorithm by index, table type or supplied name.
  *
  * Returns pointer to algo or NULL.
  */
 static struct table_algo *
 find_table_algo(struct tables_config *tcfg, struct tid_info *ti, char *name)
 {
 	int i, l;
 	struct table_algo *ta;
 
 	if (ti->type > IPFW_TABLE_MAXTYPE)
 		return (NULL);
 
 	/* Search by index */
 	if (ti->atype != 0) {
 		if (ti->atype > tcfg->algo_count)
 			return (NULL);
 		return (tcfg->algo[ti->atype]);
 	}
 
 	if (name == NULL) {
 		/* Return default algorithm for given type if set */
 		return (tcfg->def_algo[ti->type]);
 	}
 
 	/* Search by name */
 	/* TODO: better search */
 	for (i = 1; i <= tcfg->algo_count; i++) {
 		ta = tcfg->algo[i];
 
 		/*
 		 * One can supply additional algorithm
 		 * parameters so we compare only the first word
 		 * of supplied name:
 		 * 'addr:chash hsize=32'
 		 * '^^^^^^^^^'
 		 *
 		 */
 		l = strlen(ta->name);
 		if (strncmp(name, ta->name, l) != 0)
 			continue;
 		if (name[l] != '\0' && name[l] != ' ')
 			continue;
 		/* Check if we're requesting proper table type */
 		if (ti->type != 0 && ti->type != ta->type)
 			return (NULL);
 		return (ta);
 	}
 
 	return (NULL);
 }
 
 /*
  * Register new table algo @ta.
  * Stores algo id inside @idx.
  *
  * Returns 0 on success.
  */
 int
 ipfw_add_table_algo(struct ip_fw_chain *ch, struct table_algo *ta, size_t size,
     int *idx)
 {
 	struct tables_config *tcfg;
 	struct table_algo *ta_new;
 	size_t sz;
 
 	if (size > sizeof(struct table_algo))
 		return (EINVAL);
 
 	/* Check for the required on-stack size for add/del */
 	sz = roundup2(ta->ta_buf_size, sizeof(void *));
 	if (sz > TA_BUF_SZ)
 		return (EINVAL);
 
 	KASSERT(ta->type <= IPFW_TABLE_MAXTYPE,("Increase IPFW_TABLE_MAXTYPE"));
 
 	/* Copy algorithm data to stable storage. */
 	ta_new = malloc(sizeof(struct table_algo), M_IPFW, M_WAITOK | M_ZERO);
 	memcpy(ta_new, ta, size);
 
 	tcfg = CHAIN_TO_TCFG(ch);
 
 	KASSERT(tcfg->algo_count < 255, ("Increase algo array size"));
 
 	tcfg->algo[++tcfg->algo_count] = ta_new;
 	ta_new->idx = tcfg->algo_count;
 
 	/* Set algorithm as default one for given type */
 	if ((ta_new->flags & TA_FLAG_DEFAULT) != 0 &&
 	    tcfg->def_algo[ta_new->type] == NULL)
 		tcfg->def_algo[ta_new->type] = ta_new;
 
 	*idx = ta_new->idx;
 	
 	return (0);
 }
 
 /*
  * Unregisters table algo using @idx as id.
  * XXX: It is NOT safe to call this function in any place
  * other than ipfw instance destroy handler.
  */
 void
 ipfw_del_table_algo(struct ip_fw_chain *ch, int idx)
 {
 	struct tables_config *tcfg;
 	struct table_algo *ta;
 
 	tcfg = CHAIN_TO_TCFG(ch);
 
 	KASSERT(idx <= tcfg->algo_count, ("algo idx %d out of range 1..%d",
 	    idx, tcfg->algo_count));
 
 	ta = tcfg->algo[idx];
 	KASSERT(ta != NULL, ("algo idx %d is NULL", idx));
 
 	if (tcfg->def_algo[ta->type] == ta)
 		tcfg->def_algo[ta->type] = NULL;
 
 	free(ta, M_IPFW);
 }
 
 /*
  * Lists all table algorithms currently available.
  * Data layout (v0)(current):
  * Request: [ ipfw_obj_lheader ], size = ipfw_obj_lheader.size
  * Reply: [ ipfw_obj_lheader ipfw_ta_info x N ]
  *
  * Returns 0 on success
  */
 static int
 list_table_algo(struct ip_fw_chain *ch, ip_fw3_opheader *op3,
     struct sockopt_data *sd)
 {
 	struct _ipfw_obj_lheader *olh;
 	struct tables_config *tcfg;
 	ipfw_ta_info *i;
 	struct table_algo *ta;
 	uint32_t count, n, size;
 
 	olh = (struct _ipfw_obj_lheader *)ipfw_get_sopt_header(sd,sizeof(*olh));
 	if (olh == NULL)
 		return (EINVAL);
 	if (sd->valsize < olh->size)
 		return (EINVAL);
 
 	IPFW_UH_RLOCK(ch);
 	tcfg = CHAIN_TO_TCFG(ch);
 	count = tcfg->algo_count;
 	size = count * sizeof(ipfw_ta_info) + sizeof(ipfw_obj_lheader);
 
 	/* Fill in header regadless of buffer size */
 	olh->count = count;
 	olh->objsize = sizeof(ipfw_ta_info);
 
 	if (size > olh->size) {
 		olh->size = size;
 		IPFW_UH_RUNLOCK(ch);
 		return (ENOMEM);
 	}
 	olh->size = size;
 
 	for (n = 1; n <= count; n++) {
 		i = (ipfw_ta_info *)ipfw_get_sopt_space(sd, sizeof(*i));
 		KASSERT(i != NULL, ("previously checked buffer is not enough"));
 		ta = tcfg->algo[n];
 		strlcpy(i->algoname, ta->name, sizeof(i->algoname));
 		i->type = ta->type;
 		i->refcnt = ta->refcnt;
 	}
 
 	IPFW_UH_RUNLOCK(ch);
 
 	return (0);
 }
 
 static int
 classify_srcdst(ipfw_insn *cmd, uint16_t *puidx, uint8_t *ptype)
 {
 	/* Basic IPv4/IPv6 or u32 lookups */
 	*puidx = cmd->arg1;
 	/* Assume ADDR by default */
 	*ptype = IPFW_TABLE_ADDR;
 	int v;
 		
 	if (F_LEN(cmd) > F_INSN_SIZE(ipfw_insn_u32)) {
 		/*
 		 * generic lookup. The key must be
 		 * in 32bit big-endian format.
 		 */
 		v = ((ipfw_insn_u32 *)cmd)->d[1];
 		switch (v) {
 		case 0:
 		case 1:
 			/* IPv4 src/dst */
 			break;
 		case 2:
 		case 3:
 			/* src/dst port */
 			*ptype = IPFW_TABLE_NUMBER;
 			break;
 		case 4:
 			/* uid/gid */
 			*ptype = IPFW_TABLE_NUMBER;
 			break;
 		case 5:
 			/* jid */
 			*ptype = IPFW_TABLE_NUMBER;
 			break;
 		case 6:
 			/* dscp */
 			*ptype = IPFW_TABLE_NUMBER;
 			break;
 		}
 	}
 
 	return (0);
 }
 
 static int
 classify_via(ipfw_insn *cmd, uint16_t *puidx, uint8_t *ptype)
 {
 	ipfw_insn_if *cmdif;
 
 	/* Interface table, possibly */
 	cmdif = (ipfw_insn_if *)cmd;
 	if (cmdif->name[0] != '\1')
 		return (1);
 
 	*ptype = IPFW_TABLE_INTERFACE;
 	*puidx = cmdif->p.kidx;
 
 	return (0);
 }
 
 static int
 classify_flow(ipfw_insn *cmd, uint16_t *puidx, uint8_t *ptype)
 {
 
 	*puidx = cmd->arg1;
 	*ptype = IPFW_TABLE_FLOW;
 
 	return (0);
 }
 
 static void
 update_arg1(ipfw_insn *cmd, uint16_t idx)
 {
 
 	cmd->arg1 = idx;
 }
 
 static void
 update_via(ipfw_insn *cmd, uint16_t idx)
 {
 	ipfw_insn_if *cmdif;
 
 	cmdif = (ipfw_insn_if *)cmd;
 	cmdif->p.kidx = idx;
 }
 
 static int
 table_findbyname(struct ip_fw_chain *ch, struct tid_info *ti,
     struct named_object **pno)
 {
 	struct table_config *tc;
 	int error;
 
 	IPFW_UH_WLOCK_ASSERT(ch);
 
 	error = find_table_err(CHAIN_TO_NI(ch), ti, &tc);
 	if (error != 0)
 		return (error);
 
 	*pno = &tc->no;
 	return (0);
 }
 
 /* XXX: sets-sets! */
 static struct named_object *
 table_findbykidx(struct ip_fw_chain *ch, uint16_t idx)
 {
 	struct namedobj_instance *ni;
 	struct table_config *tc;
 
 	IPFW_UH_WLOCK_ASSERT(ch);
 	ni = CHAIN_TO_NI(ch);
 	tc = (struct table_config *)ipfw_objhash_lookup_kidx(ni, idx);
 	KASSERT(tc != NULL, ("Table with index %d not found", idx));
 
 	return (&tc->no);
 }
 
 static int
 table_manage_sets(struct ip_fw_chain *ch, uint16_t set, uint8_t new_set,
     enum ipfw_sets_cmd cmd)
 {
 
 	switch (cmd) {
 	case SWAP_ALL:
 	case TEST_ALL:
 	case MOVE_ALL:
 		/*
 		 * Always return success, the real action and decision
 		 * should make table_manage_sets_all().
 		 */
 		return (0);
 	case TEST_ONE:
 	case MOVE_ONE:
 		/*
 		 * NOTE: we need to use ipfw_objhash_del/ipfw_objhash_add
 		 * if set number will be used in hash function. Currently
 		 * we can just use generic handler that replaces set value.
 		 */
 		if (V_fw_tables_sets == 0)
 			return (0);
 		break;
 	case COUNT_ONE:
 		/*
 		 * Return EOPNOTSUPP for COUNT_ONE when per-set sysctl is
 		 * disabled. This allow skip table's opcodes from additional
 		 * checks when specific rules moved to another set.
 		 */
 		if (V_fw_tables_sets == 0)
 			return (EOPNOTSUPP);
 	}
 	/* Use generic sets handler when per-set sysctl is enabled. */
 	return (ipfw_obj_manage_sets(CHAIN_TO_NI(ch), IPFW_TLV_TBL_NAME,
 	    set, new_set, cmd));
 }
 
 /*
  * We register several opcode rewriters for lookup tables.
  * All tables opcodes have the same ETLV type, but different subtype.
  * To avoid invoking sets handler several times for XXX_ALL commands,
  * we use separate manage_sets handler. O_RECV has the lowest value,
  * so it should be called first.
  */
 static int
 table_manage_sets_all(struct ip_fw_chain *ch, uint16_t set, uint8_t new_set,
     enum ipfw_sets_cmd cmd)
 {
 
 	switch (cmd) {
 	case SWAP_ALL:
 	case TEST_ALL:
 		/*
 		 * Return success for TEST_ALL, since nothing prevents
 		 * move rules from one set to another. All tables are
 		 * accessible from all sets when per-set tables sysctl
 		 * is disabled.
 		 */
 	case MOVE_ALL:
 		if (V_fw_tables_sets == 0)
 			return (0);
 		break;
 	default:
 		return (table_manage_sets(ch, set, new_set, cmd));
 	}
 	/* Use generic sets handler when per-set sysctl is enabled. */
 	return (ipfw_obj_manage_sets(CHAIN_TO_NI(ch), IPFW_TLV_TBL_NAME,
 	    set, new_set, cmd));
 }
 
 static struct opcode_obj_rewrite opcodes[] = {
 	{
 		.opcode = O_IP_SRC_LOOKUP,
 		.etlv = IPFW_TLV_TBL_NAME,
 		.classifier = classify_srcdst,
 		.update = update_arg1,
 		.find_byname = table_findbyname,
 		.find_bykidx = table_findbykidx,
 		.create_object = create_table_compat,
 		.manage_sets = table_manage_sets,
 	},
 	{
 		.opcode = O_IP_DST_LOOKUP,
 		.etlv = IPFW_TLV_TBL_NAME,
 		.classifier = classify_srcdst,
 		.update = update_arg1,
 		.find_byname = table_findbyname,
 		.find_bykidx = table_findbykidx,
 		.create_object = create_table_compat,
 		.manage_sets = table_manage_sets,
 	},
 	{
 		.opcode = O_IP_FLOW_LOOKUP,
 		.etlv = IPFW_TLV_TBL_NAME,
 		.classifier = classify_flow,
 		.update = update_arg1,
 		.find_byname = table_findbyname,
 		.find_bykidx = table_findbykidx,
 		.create_object = create_table_compat,
 		.manage_sets = table_manage_sets,
 	},
 	{
 		.opcode = O_XMIT,
 		.etlv = IPFW_TLV_TBL_NAME,
 		.classifier = classify_via,
 		.update = update_via,
 		.find_byname = table_findbyname,
 		.find_bykidx = table_findbykidx,
 		.create_object = create_table_compat,
 		.manage_sets = table_manage_sets,
 	},
 	{
 		.opcode = O_RECV,
 		.etlv = IPFW_TLV_TBL_NAME,
 		.classifier = classify_via,
 		.update = update_via,
 		.find_byname = table_findbyname,
 		.find_bykidx = table_findbykidx,
 		.create_object = create_table_compat,
 		.manage_sets = table_manage_sets_all,
 	},
 	{
 		.opcode = O_VIA,
 		.etlv = IPFW_TLV_TBL_NAME,
 		.classifier = classify_via,
 		.update = update_via,
 		.find_byname = table_findbyname,
 		.find_bykidx = table_findbykidx,
 		.create_object = create_table_compat,
 		.manage_sets = table_manage_sets,
 	},
 };
 
 static int
 test_sets_cb(struct namedobj_instance *ni __unused, struct named_object *no,
     void *arg __unused)
 {
 
 	/* Check that there aren't any tables in not default set */
 	if (no->set != 0)
 		return (EBUSY);
 	return (0);
 }
 
 /*
  * Switch between "set 0" and "rule's set" table binding,
  * Check all ruleset bindings and permits changing
  * IFF each binding has both rule AND table in default set (set 0).
  *
  * Returns 0 on success.
  */
 int
 ipfw_switch_tables_namespace(struct ip_fw_chain *ch, unsigned int sets)
 {
 	struct opcode_obj_rewrite *rw;
 	struct namedobj_instance *ni;
 	struct named_object *no;
 	struct ip_fw *rule;
 	ipfw_insn *cmd;
 	int cmdlen, i, l;
 	uint16_t kidx;
 	uint8_t subtype;
 
 	IPFW_UH_WLOCK(ch);
 
 	if (V_fw_tables_sets == sets) {
 		IPFW_UH_WUNLOCK(ch);
 		return (0);
 	}
 	ni = CHAIN_TO_NI(ch);
 	if (sets == 0) {
 		/*
 		 * Prevent disabling sets support if we have some tables
 		 * in not default sets.
 		 */
 		if (ipfw_objhash_foreach_type(ni, test_sets_cb,
 		    NULL, IPFW_TLV_TBL_NAME) != 0) {
 			IPFW_UH_WUNLOCK(ch);
 			return (EBUSY);
 		}
 	}
 	/*
 	 * Scan all rules and examine tables opcodes.
 	 */
 	for (i = 0; i < ch->n_rules; i++) {
 		rule = ch->map[i];
 
 		l = rule->cmd_len;
 		cmd = rule->cmd;
 		cmdlen = 0;
 		for ( ;	l > 0 ; l -= cmdlen, cmd += cmdlen) {
 			cmdlen = F_LEN(cmd);
 			/* Check only tables opcodes */
 			for (kidx = 0, rw = opcodes;
 			    rw < opcodes + nitems(opcodes); rw++) {
 				if (rw->opcode != cmd->opcode)
 					continue;
 				if (rw->classifier(cmd, &kidx, &subtype) == 0)
 					break;
 			}
 			if (kidx == 0)
 				continue;
 			no = ipfw_objhash_lookup_kidx(ni, kidx);
 			/* Check if both table object and rule has the set 0 */
 			if (no->set != 0 || rule->set != 0) {
 				IPFW_UH_WUNLOCK(ch);
 				return (EBUSY);
 			}
 
 		}
 	}
 	V_fw_tables_sets = sets;
 	IPFW_UH_WUNLOCK(ch);
 	return (0);
 }
 
 /*
  * Checks table name for validity.
  * Enforce basic length checks, the rest
  * should be done in userland.
  *
  * Returns 0 if name is considered valid.
  */
 static int
 check_table_name(const char *name)
 {
 
 	/*
 	 * TODO: do some more complicated checks
 	 */
 	return (ipfw_check_object_name_generic(name));
 }
 
 /*
  * Finds table config based on either legacy index
  * or name in ntlv.
  * Note @ti structure contains unchecked data from userland.
  *
  * Returns 0 in success and fills in @tc with found config
  */
 static int
 find_table_err(struct namedobj_instance *ni, struct tid_info *ti,
     struct table_config **tc)
 {
 	char *name, bname[16];
 	struct named_object *no;
 	ipfw_obj_ntlv *ntlv;
 	uint32_t set;
 
 	if (ti->tlvs != NULL) {
 		ntlv = ipfw_find_name_tlv_type(ti->tlvs, ti->tlen, ti->uidx,
 		    IPFW_TLV_TBL_NAME);
 		if (ntlv == NULL)
 			return (EINVAL);
 		name = ntlv->name;
 
 		/*
 		 * Use set provided by @ti instead of @ntlv one.
 		 * This is needed due to different sets behavior
 		 * controlled by V_fw_tables_sets.
 		 */
 		set = (V_fw_tables_sets != 0) ? ti->set : 0;
 	} else {
 		snprintf(bname, sizeof(bname), "%d", ti->uidx);
 		name = bname;
 		set = 0;
 	}
 
 	no = ipfw_objhash_lookup_name(ni, set, name);
 	*tc = (struct table_config *)no;
 
 	return (0);
 }
 
 /*
  * Finds table config based on either legacy index
  * or name in ntlv.
  * Note @ti structure contains unchecked data from userland.
  *
  * Returns pointer to table_config or NULL.
  */
 static struct table_config *
 find_table(struct namedobj_instance *ni, struct tid_info *ti)
 {
 	struct table_config *tc;
 
 	if (find_table_err(ni, ti, &tc) != 0)
 		return (NULL);
 
 	return (tc);
 }
 
 /*
  * Allocate new table config structure using
  * specified @algo and @aname.
  *
  * Returns pointer to config or NULL.
  */
 static struct table_config *
 alloc_table_config(struct ip_fw_chain *ch, struct tid_info *ti,
     struct table_algo *ta, char *aname, uint8_t tflags)
 {
 	char *name, bname[16];
 	struct table_config *tc;
 	int error;
 	ipfw_obj_ntlv *ntlv;
 	uint32_t set;
 
 	if (ti->tlvs != NULL) {
 		ntlv = ipfw_find_name_tlv_type(ti->tlvs, ti->tlen, ti->uidx,
 		    IPFW_TLV_TBL_NAME);
 		if (ntlv == NULL)
 			return (NULL);
 		name = ntlv->name;
 		set = (V_fw_tables_sets == 0) ? 0 : ntlv->set;
 	} else {
 		/* Compat part: convert number to string representation */
 		snprintf(bname, sizeof(bname), "%d", ti->uidx);
 		name = bname;
 		set = 0;
 	}
 
 	tc = malloc(sizeof(struct table_config), M_IPFW, M_WAITOK | M_ZERO);
 	tc->no.name = tc->tablename;
 	tc->no.subtype = ta->type;
 	tc->no.set = set;
 	tc->tflags = tflags;
 	tc->ta = ta;
 	strlcpy(tc->tablename, name, sizeof(tc->tablename));
 	/* Set "shared" value type by default */
 	tc->vshared = 1;
 
 	/* Preallocate data structures for new tables */
 	error = ta->init(ch, &tc->astate, &tc->ti_copy, aname, tflags);
 	if (error != 0) {
 		free(tc, M_IPFW);
 		return (NULL);
 	}
 	
 	return (tc);
 }
 
 /*
  * Destroys table state and config.
  */
 static void
 free_table_config(struct namedobj_instance *ni, struct table_config *tc)
 {
 
 	KASSERT(tc->linked == 0, ("free() on linked config"));
 	/* UH lock MUST NOT be held */
 
 	/*
 	 * We're using ta without any locking/referencing.
 	 * TODO: fix this if we're going to use unloadable algos.
 	 */
 	tc->ta->destroy(tc->astate, &tc->ti_copy);
 	free(tc, M_IPFW);
 }
 
 /*
  * Links @tc to @chain table named instance.
  * Sets appropriate type/states in @chain table info.
  */
 static void
 link_table(struct ip_fw_chain *ch, struct table_config *tc)
 {
 	struct namedobj_instance *ni;
 	struct table_info *ti;
 	uint16_t kidx;
 
 	IPFW_UH_WLOCK_ASSERT(ch);
 
 	ni = CHAIN_TO_NI(ch);
 	kidx = tc->no.kidx;
 
 	ipfw_objhash_add(ni, &tc->no);
 
 	ti = KIDX_TO_TI(ch, kidx);
 	*ti = tc->ti_copy;
 
 	/* Notify algo on real @ti address */
 	if (tc->ta->change_ti != NULL)
 		tc->ta->change_ti(tc->astate, ti);
 
 	tc->linked = 1;
 	tc->ta->refcnt++;
 }
 
 /*
  * Unlinks @tc from @chain table named instance.
  * Zeroes states in @chain and stores them in @tc.
  */
 static void
 unlink_table(struct ip_fw_chain *ch, struct table_config *tc)
 {
 	struct namedobj_instance *ni;
 	struct table_info *ti;
 	uint16_t kidx;
 
 	IPFW_UH_WLOCK_ASSERT(ch);
 	IPFW_WLOCK_ASSERT(ch);
 
 	ni = CHAIN_TO_NI(ch);
 	kidx = tc->no.kidx;
 
 	/* Clear state. @ti copy is already saved inside @tc */
 	ipfw_objhash_del(ni, &tc->no);
 	ti = KIDX_TO_TI(ch, kidx);
 	memset(ti, 0, sizeof(struct table_info));
 	tc->linked = 0;
 	tc->ta->refcnt--;
 
 	/* Notify algo on real @ti address */
 	if (tc->ta->change_ti != NULL)
 		tc->ta->change_ti(tc->astate, NULL);
 }
 
 static struct ipfw_sopt_handler	scodes[] = {
 	{ IP_FW_TABLE_XCREATE,	0,	HDIR_SET,	create_table },
 	{ IP_FW_TABLE_XDESTROY,	0,	HDIR_SET,	flush_table_v0 },
 	{ IP_FW_TABLE_XFLUSH,	0,	HDIR_SET,	flush_table_v0 },
 	{ IP_FW_TABLE_XMODIFY,	0,	HDIR_BOTH,	modify_table },
 	{ IP_FW_TABLE_XINFO,	0,	HDIR_GET,	describe_table },
 	{ IP_FW_TABLES_XLIST,	0,	HDIR_GET,	list_tables },
 	{ IP_FW_TABLE_XLIST,	0,	HDIR_GET,	dump_table_v0 },
 	{ IP_FW_TABLE_XLIST,	1,	HDIR_GET,	dump_table_v1 },
 	{ IP_FW_TABLE_XADD,	0,	HDIR_BOTH,	manage_table_ent_v0 },
 	{ IP_FW_TABLE_XADD,	1,	HDIR_BOTH,	manage_table_ent_v1 },
 	{ IP_FW_TABLE_XDEL,	0,	HDIR_BOTH,	manage_table_ent_v0 },
 	{ IP_FW_TABLE_XDEL,	1,	HDIR_BOTH,	manage_table_ent_v1 },
 	{ IP_FW_TABLE_XFIND,	0,	HDIR_GET,	find_table_entry },
 	{ IP_FW_TABLE_XSWAP,	0,	HDIR_SET,	swap_table },
 	{ IP_FW_TABLES_ALIST,	0,	HDIR_GET,	list_table_algo },
 	{ IP_FW_TABLE_XGETSIZE,	0,	HDIR_GET,	get_table_size },
 };
 
 static int
 destroy_table_locked(struct namedobj_instance *ni, struct named_object *no,
     void *arg)
 {
 
 	unlink_table((struct ip_fw_chain *)arg, (struct table_config *)no);
 	if (ipfw_objhash_free_idx(ni, no->kidx) != 0)
 		printf("Error unlinking kidx %d from table %s\n",
 		    no->kidx, no->name);
 	free_table_config(ni, (struct table_config *)no);
 	return (0);
 }
 
 /*
  * Shuts tables module down.
  */
 void
 ipfw_destroy_tables(struct ip_fw_chain *ch, int last)
 {
 
 	IPFW_DEL_SOPT_HANDLER(last, scodes);
 	IPFW_DEL_OBJ_REWRITER(last, opcodes);
 
 	/* Remove all tables from working set */
 	IPFW_UH_WLOCK(ch);
 	IPFW_WLOCK(ch);
 	ipfw_objhash_foreach(CHAIN_TO_NI(ch), destroy_table_locked, ch);
 	IPFW_WUNLOCK(ch);
 	IPFW_UH_WUNLOCK(ch);
 
 	/* Free pointers itself */
 	free(ch->tablestate, M_IPFW);
 
 	ipfw_table_value_destroy(ch, last);
 	ipfw_table_algo_destroy(ch);
 
 	ipfw_objhash_destroy(CHAIN_TO_NI(ch));
 	free(CHAIN_TO_TCFG(ch), M_IPFW);
 }
 
 /*
  * Starts tables module.
  */
 int
 ipfw_init_tables(struct ip_fw_chain *ch, int first)
 {
 	struct tables_config *tcfg;
 
 	/* Allocate pointers */
 	ch->tablestate = malloc(V_fw_tables_max * sizeof(struct table_info),
 	    M_IPFW, M_WAITOK | M_ZERO);
 
 	tcfg = malloc(sizeof(struct tables_config), M_IPFW, M_WAITOK | M_ZERO);
 	tcfg->namehash = ipfw_objhash_create(V_fw_tables_max);
 	ch->tblcfg = tcfg;
 
 	ipfw_table_value_init(ch, first);
 	ipfw_table_algo_init(ch);
 
 	IPFW_ADD_OBJ_REWRITER(first, opcodes);
 	IPFW_ADD_SOPT_HANDLER(first, scodes);
 	return (0);
 }
 
 
 
Index: head/sys/netpfil/ipfw/ip_fw_table_value.c
===================================================================
--- head/sys/netpfil/ipfw/ip_fw_table_value.c	(revision 343618)
+++ head/sys/netpfil/ipfw/ip_fw_table_value.c	(revision 343619)
@@ -1,809 +1,808 @@
 /*-
  * Copyright (c) 2014 Yandex LLC
  * Copyright (c) 2014 Alexander V. Chernikov
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 /*
  * Multi-field value support for ipfw tables.
  *
  * This file contains necessary functions to convert
  * large multi-field values into u32 indices suitable to be fed
  * to various table algorithms. Other machinery like proper refcounting,
  * internal structures resizing are also kept here.
  */
 
 #include "opt_ipfw.h"
 
 #include <sys/param.h>
 #include <sys/systm.h>
 #include <sys/malloc.h>
 #include <sys/kernel.h>
 #include <sys/hash.h>
 #include <sys/lock.h>
 #include <sys/rwlock.h>
 #include <sys/rmlock.h>
 #include <sys/socket.h>
 #include <sys/socketvar.h>
 #include <sys/queue.h>
 #include <net/if.h>	/* ip_fw.h requires IFNAMSIZ */
-#include <net/pfil.h>
 
 #include <netinet/in.h>
 #include <netinet/ip_var.h>	/* struct ipfw_rule_ref */
 #include <netinet/ip_fw.h>
 
 #include <netpfil/ipfw/ip_fw_private.h>
 #include <netpfil/ipfw/ip_fw_table.h>
 
 static uint32_t hash_table_value(struct namedobj_instance *ni, const void *key,
     uint32_t kopt);
 static int cmp_table_value(struct named_object *no, const void *key,
     uint32_t kopt);
 
 static int list_table_values(struct ip_fw_chain *ch, ip_fw3_opheader *op3,
     struct sockopt_data *sd);
 
 static struct ipfw_sopt_handler	scodes[] = {
 	{ IP_FW_TABLE_VLIST,	0,	HDIR_GET,	list_table_values },
 };
 
 #define	CHAIN_TO_VI(chain)	(CHAIN_TO_TCFG(chain)->valhash)
 
 struct table_val_link
 {
 	struct named_object	no;
 	struct table_value	*pval;	/* Pointer to real table value */
 };
 #define	VALDATA_START_SIZE	64	/* Allocate 64-items array by default */
 
 struct vdump_args {
 	struct ip_fw_chain *ch;
 	struct sockopt_data *sd;
 	struct table_value *pval;
 	int error;
 };
 
 
 static uint32_t
 hash_table_value(struct namedobj_instance *ni, const void *key, uint32_t kopt)
 {
 
 	return (hash32_buf(key, 56, 0));
 }
 
 static int
 cmp_table_value(struct named_object *no, const void *key, uint32_t kopt)
 {
 
 	return (memcmp(((struct table_val_link *)no)->pval, key, 56));
 }
 
 static void
 mask_table_value(struct table_value *src, struct table_value *dst,
     uint32_t mask)
 {
 #define	_MCPY(f, b)	if ((mask & (b)) != 0) { dst->f = src->f; }
 
 	memset(dst, 0, sizeof(*dst));
 	_MCPY(tag, IPFW_VTYPE_TAG);
 	_MCPY(pipe, IPFW_VTYPE_PIPE);
 	_MCPY(divert, IPFW_VTYPE_DIVERT);
 	_MCPY(skipto, IPFW_VTYPE_SKIPTO);
 	_MCPY(netgraph, IPFW_VTYPE_NETGRAPH);
 	_MCPY(fib, IPFW_VTYPE_FIB);
 	_MCPY(nat, IPFW_VTYPE_NAT);
 	_MCPY(dscp, IPFW_VTYPE_DSCP);
 	_MCPY(nh4, IPFW_VTYPE_NH4);
 	_MCPY(nh6, IPFW_VTYPE_NH6);
 	_MCPY(zoneid, IPFW_VTYPE_NH6);
 #undef	_MCPY
 }
 
 static void
 get_value_ptrs(struct ip_fw_chain *ch, struct table_config *tc, int vshared,
     struct table_value **ptv, struct namedobj_instance **pvi)
 {
 	struct table_value *pval;
 	struct namedobj_instance *vi;
 
 	if (vshared != 0) {
 		pval = (struct table_value *)ch->valuestate;
 		vi = CHAIN_TO_VI(ch);
 	} else {
 		pval = NULL;
 		vi = NULL;
 		//pval = (struct table_value *)&tc->ti.data;
 	}
 
 	if (ptv != NULL)
 		*ptv = pval;
 	if (pvi != NULL)
 		*pvi = vi;
 }
 
 /*
  * Update pointers to real vaues after @pval change.
  */
 static int
 update_tvalue(struct namedobj_instance *ni, struct named_object *no, void *arg)
 {
 	struct vdump_args *da;
 	struct table_val_link *ptv;
 	struct table_value *pval;
 
 	da = (struct vdump_args *)arg;
 	ptv = (struct table_val_link *)no;
 
 	pval = da->pval;
 	ptv->pval = &pval[ptv->no.kidx];
 	ptv->no.name = (char *)&pval[ptv->no.kidx];
 	return (0);
 }
 
 /*
  * Grows value storage shared among all tables.
  * Drops/reacquires UH locks.
  * Notifies other running adds on @ch shared storage resize.
  * Note function does not guarantee that free space
  * will be available after invocation, so one caller needs
  * to roll cycle himself.
  *
  * Returns 0 if case of no errors.
  */
 static int
 resize_shared_value_storage(struct ip_fw_chain *ch)
 {
 	struct tables_config *tcfg;
 	struct namedobj_instance *vi;
 	struct table_value *pval, *valuestate, *old_valuestate;
 	void *new_idx;
 	struct vdump_args da;
 	int new_blocks;
 	int val_size, val_size_old;
 
 	IPFW_UH_WLOCK_ASSERT(ch);
 
 	valuestate = NULL;
 	new_idx = NULL;
 
 	pval = (struct table_value *)ch->valuestate;
 	vi = CHAIN_TO_VI(ch);
 	tcfg = CHAIN_TO_TCFG(ch);
 
 	val_size = tcfg->val_size * 2;
 
 	if (val_size == (1 << 30))
 		return (ENOSPC);
 
 	IPFW_UH_WUNLOCK(ch);
 
 	valuestate = malloc(sizeof(struct table_value) * val_size, M_IPFW,
 	    M_WAITOK | M_ZERO);
 	ipfw_objhash_bitmap_alloc(val_size, (void *)&new_idx,
 	    &new_blocks);
 
 	IPFW_UH_WLOCK(ch);
 
 	/*
 	 * Check if we still need to resize
 	 */
 	if (tcfg->val_size >= val_size)
 		goto done;
 
 	/* Update pointers and notify everyone we're changing @ch */
 	pval = (struct table_value *)ch->valuestate;
 	rollback_toperation_state(ch, ch);
 
 	/* Good. Let's merge */
 	memcpy(valuestate, pval, sizeof(struct table_value) * tcfg->val_size);
 	ipfw_objhash_bitmap_merge(CHAIN_TO_VI(ch), &new_idx, &new_blocks);
 
 	IPFW_WLOCK(ch);
 	/* Change pointers */
 	old_valuestate = ch->valuestate;
 	ch->valuestate = valuestate;
 	valuestate = old_valuestate;
 	ipfw_objhash_bitmap_swap(CHAIN_TO_VI(ch), &new_idx, &new_blocks);
 
 	val_size_old = tcfg->val_size;
 	tcfg->val_size = val_size;
 	val_size = val_size_old;
 	IPFW_WUNLOCK(ch);
 	/* Update pointers to reflect resize */
 	memset(&da, 0, sizeof(da));
 	da.pval = (struct table_value *)ch->valuestate;
 	ipfw_objhash_foreach(vi, update_tvalue, &da);
 
 done:
 	free(valuestate, M_IPFW);
 	ipfw_objhash_bitmap_free(new_idx, new_blocks);
 
 	return (0);
 }
 
 /*
  * Drops reference for table value with index @kidx, stored in @pval and
  * @vi. Frees value if it has no references.
  */
 static void
 unref_table_value(struct namedobj_instance *vi, struct table_value *pval,
     uint32_t kidx)
 {
 	struct table_val_link *ptvl;
 
 	KASSERT(pval[kidx].refcnt > 0, ("Refcount is 0 on kidx %d", kidx));
 	if (--pval[kidx].refcnt > 0)
 		return;
 
 	/* Last reference, delete item */
 	ptvl = (struct table_val_link *)ipfw_objhash_lookup_kidx(vi, kidx);
 	KASSERT(ptvl != NULL, ("lookup on value kidx %d failed", kidx));
 	ipfw_objhash_del(vi, &ptvl->no);
 	ipfw_objhash_free_idx(vi, kidx);
 	free(ptvl, M_IPFW);
 }
 
 struct flush_args {
 	struct ip_fw_chain *ch;
 	struct table_algo *ta;
 	struct table_info *ti;
 	void *astate;
 	ipfw_obj_tentry tent;
 };
 
 static int
 unref_table_value_cb(void *e, void *arg)
 {
 	struct flush_args *fa;
 	struct ip_fw_chain *ch;
 	struct table_algo *ta;
 	ipfw_obj_tentry *tent;
 	int error;
 
 	fa = (struct flush_args *)arg;
 
 	ta = fa->ta;
 	memset(&fa->tent, 0, sizeof(fa->tent));
 	tent = &fa->tent;
 	error = ta->dump_tentry(fa->astate, fa->ti, e, tent);
 	if (error != 0)
 		return (error);
 
 	ch = fa->ch;
 
 	unref_table_value(CHAIN_TO_VI(ch),
 	    (struct table_value *)ch->valuestate, tent->v.kidx);
 
 	return (0);
 }
 
 /*
  * Drop references for each value used in @tc.
  */
 void
 ipfw_unref_table_values(struct ip_fw_chain *ch, struct table_config *tc,
     struct table_algo *ta, void *astate, struct table_info *ti)
 {
 	struct flush_args fa;
 
 	IPFW_UH_WLOCK_ASSERT(ch);
 
 	memset(&fa, 0, sizeof(fa));
 	fa.ch = ch;
 	fa.ta = ta;
 	fa.astate = astate;
 	fa.ti = ti;
 
 	ta->foreach(astate, ti, unref_table_value_cb, &fa);
 }
 
 /*
  * Table operation state handler.
  * Called when we are going to change something in @tc which
  * may lead to inconsistencies in on-going table data addition.
  *
  * Here we rollback all already committed state (table values, currently)
  * and set "modified" field to non-zero value to indicate
  * that we need to restart original operation.
  */
 void
 rollback_table_values(struct tableop_state *ts)
 {
 	struct ip_fw_chain *ch;
 	struct table_value *pval;
 	struct tentry_info *ptei;
 	struct namedobj_instance *vi;
 	int i;
 
 	ch = ts->ch;
 
 	IPFW_UH_WLOCK_ASSERT(ch);
 
 	/* Get current table value pointer */
 	get_value_ptrs(ch, ts->tc, ts->vshared, &pval, &vi);
 
 	for (i = 0; i < ts->count; i++) {
 		ptei = &ts->tei[i];
 
 		if (ptei->value == 0)
 			continue;
 
 		unref_table_value(vi, pval, ptei->value);
 	}
 }
 
 /*
  * Allocate new value index in either shared or per-table array.
  * Function may drop/reacquire UH lock.
  *
  * Returns 0 on success.
  */
 static int
 alloc_table_vidx(struct ip_fw_chain *ch, struct tableop_state *ts,
     struct namedobj_instance *vi, uint16_t *pvidx)
 {
 	int error, vlimit;
 	uint16_t vidx;
 
 	IPFW_UH_WLOCK_ASSERT(ch);
 
 	error = ipfw_objhash_alloc_idx(vi, &vidx);
 	if (error != 0) {
 
 		/*
 		 * We need to resize array. This involves
 		 * lock/unlock, so we need to check "modified"
 		 * state.
 		 */
 		ts->opstate.func(ts->tc, &ts->opstate);
 		error = resize_shared_value_storage(ch);
 		return (error); /* ts->modified should be set, we will restart */
 	}
 
 	vlimit = ts->ta->vlimit;
 	if (vlimit != 0 && vidx >= vlimit) {
 
 		/*
 		 * Algorithm is not able to store given index.
 		 * We have to rollback state, start using
 		 * per-table value array or return error
 		 * if we're already using it.
 		 *
 		 * TODO: do not rollback state if
 		 * atomicity is not required.
 		 */
 		if (ts->vshared != 0) {
 			/* shared -> per-table  */
 			return (ENOSPC); /* TODO: proper error */
 		}
 
 		/* per-table. Fail for now. */
 		return (ENOSPC); /* TODO: proper error */
 	}
 
 	*pvidx = vidx;
 	return (0);
 }
 
 /*
  * Drops value reference for unused values (updates, deletes, partially
  * successful adds or rollbacks).
  */
 void
 ipfw_garbage_table_values(struct ip_fw_chain *ch, struct table_config *tc,
     struct tentry_info *tei, uint32_t count, int rollback)
 {
 	int i;
 	struct tentry_info *ptei;
 	struct table_value *pval;
 	struct namedobj_instance *vi;
 
 	/*
 	 * We have two slightly different ADD cases here:
 	 * either (1) we are successful / partially successful,
 	 * in that case we need
 	 * * to ignore ADDED entries values
 	 * * rollback every other values (either UPDATED since
 	 *   old value has been stored there, or some failure like
 	 *   EXISTS or LIMIT or simply "ignored" case.
 	 *
 	 * (2): atomic rollback of partially successful operation
 	 * in that case we simply need to unref all entries.
 	 *
 	 * DELETE case is simpler: no atomic support there, so
 	 * we simply unref all non-zero values.
 	 */
 
 	/*
 	 * Get current table value pointers.
 	 * XXX: Properly read vshared
 	 */
 	get_value_ptrs(ch, tc, 1, &pval, &vi);
 
 	for (i = 0; i < count; i++) {
 		ptei = &tei[i];
 
 		if (ptei->value == 0) {
 
 			/*
 			 * We may be deleting non-existing record.
 			 * Skip.
 			 */
 			continue;
 		}
 
 		if ((ptei->flags & TEI_FLAGS_ADDED) != 0 && rollback == 0) {
 			ptei->value = 0;
 			continue;
 		}
 
 		unref_table_value(vi, pval, ptei->value);
 		ptei->value = 0;
 	}
 }
 
 /*
  * Main function used to link values of entries going to be added,
  * to the index. Since we may perform many UH locks drops/acquires,
  * handle changes by checking tablestate "modified" field.
  *
  * Success: return 0.
  */
 int
 ipfw_link_table_values(struct ip_fw_chain *ch, struct tableop_state *ts)
 {
 	int error, i, found;
 	struct namedobj_instance *vi;
 	struct table_config *tc;
 	struct tentry_info *tei, *ptei;
 	uint32_t count, vlimit;
 	uint16_t vidx;
 	struct table_val_link *ptv;
 	struct table_value tval, *pval;
 
 	/*
 	 * Stage 1: reference all existing values and
 	 * save their indices.
 	 */
 	IPFW_UH_WLOCK_ASSERT(ch);
 	get_value_ptrs(ch, ts->tc, ts->vshared, &pval, &vi);
 
 	error = 0;
 	found = 0;
 	vlimit = ts->ta->vlimit;
 	vidx = 0;
 	tc = ts->tc;
 	tei = ts->tei;
 	count = ts->count;
 	for (i = 0; i < count; i++) {
 		ptei = &tei[i];
 		ptei->value = 0; /* Ensure value is always 0 in the beginning */
 		mask_table_value(ptei->pvalue, &tval, ts->vmask);
 		ptv = (struct table_val_link *)ipfw_objhash_lookup_name(vi, 0,
 		    (char *)&tval);
 		if (ptv == NULL)
 			continue;
 		/* Deal with vlimit later */
 		if (vlimit > 0 && vlimit <= ptv->no.kidx)
 			continue;
 
 		/* Value found. Bump refcount */
 		ptv->pval->refcnt++;
 		ptei->value = ptv->no.kidx;
 		found++;
 	}
 
 	if (ts->count == found) {
 		/* We've found all values , no need ts create new ones */
 		return (0);
 	}
 
 	/*
 	 * we have added some state here, let's attach operation
 	 * state ts the list ts be able ts rollback if necessary.
 	 */
 	add_toperation_state(ch, ts);
 	/* Ensure table won't disappear */
 	tc_ref(tc);
 	IPFW_UH_WUNLOCK(ch);
 
 	/*
 	 * Stage 2: allocate objects for non-existing values.
 	 */
 	for (i = 0; i < count; i++) {
 		ptei = &tei[i];
 		if (ptei->value != 0)
 			continue;
 		if (ptei->ptv != NULL)
 			continue;
 		ptei->ptv = malloc(sizeof(struct table_val_link), M_IPFW,
 		    M_WAITOK | M_ZERO);
 	}
 
 	/*
 	 * Stage 3: allocate index numbers for new values
 	 * and link them to index.
 	 */
 	IPFW_UH_WLOCK(ch);
 	tc_unref(tc);
 	del_toperation_state(ch, ts);
 	if (ts->modified != 0) {
 
 		/*
 		 * In general, we should free all state/indexes here
 		 * and return. However, we keep allocated state instead
 		 * to ensure we achieve some progress on each restart.
 		 */
 		return (0);
 	}
 
 	KASSERT(pval == ch->valuestate, ("resize_storage() notify failure"));
 
 	/* Let's try to link values */
 	for (i = 0; i < count; i++) {
 		ptei = &tei[i];
 
 		/* Check if record has appeared */
 		mask_table_value(ptei->pvalue, &tval, ts->vmask);
 		ptv = (struct table_val_link *)ipfw_objhash_lookup_name(vi, 0,
 		    (char *)&tval);
 		if (ptv != NULL) {
 			ptv->pval->refcnt++;
 			ptei->value = ptv->no.kidx;
 			continue;
 		}
 
 		/* May perform UH unlock/lock */
 		error = alloc_table_vidx(ch, ts, vi, &vidx);
 		if (error != 0) {
 			ts->opstate.func(ts->tc, &ts->opstate);
 			return (error);
 		}
 		/* value storage resize has happened, return */
 		if (ts->modified != 0)
 			return (0);
 
 		/* Finally, we have allocated valid index, let's add entry */
 		ptei->value = vidx;
 		ptv = (struct table_val_link *)ptei->ptv;
 		ptei->ptv = NULL;
 
 		ptv->no.kidx = vidx;
 		ptv->no.name = (char *)&pval[vidx];
 		ptv->pval = &pval[vidx];
 		memcpy(ptv->pval, &tval, sizeof(struct table_value));
 		pval[vidx].refcnt = 1;
 		ipfw_objhash_add(vi, &ptv->no);
 	}
 
 	return (0);
 }
 
 /*
  * Compatibility function used to import data from old
  * IP_FW_TABLE_ADD / IP_FW_TABLE_XADD opcodes.
  */
 void
 ipfw_import_table_value_legacy(uint32_t value, struct table_value *v)
 {
 
 	memset(v, 0, sizeof(*v));
 	v->tag = value;
 	v->pipe = value;
 	v->divert = value;
 	v->skipto = value;
 	v->netgraph = value;
 	v->fib = value;
 	v->nat = value;
 	v->nh4 = value; /* host format */
 	v->dscp = value;
 	v->limit = value;
 }
 
 /*
  * Export data to legacy table dumps opcodes.
  */
 uint32_t
 ipfw_export_table_value_legacy(struct table_value *v)
 {
 
 	/*
 	 * TODO: provide more compatibility depending on
 	 * vmask value.
 	 */
 	return (v->tag);
 }
 
 /*
  * Imports table value from current userland format.
  * Saves value in kernel format to the same place.
  */
 void
 ipfw_import_table_value_v1(ipfw_table_value *iv)
 {
 	struct table_value v;
 
 	memset(&v, 0, sizeof(v));
 	v.tag = iv->tag;
 	v.pipe = iv->pipe;
 	v.divert = iv->divert;
 	v.skipto = iv->skipto;
 	v.netgraph = iv->netgraph;
 	v.fib = iv->fib;
 	v.nat = iv->nat;
 	v.dscp = iv->dscp;
 	v.nh4 = iv->nh4;
 	v.nh6 = iv->nh6;
 	v.limit = iv->limit;
 	v.zoneid = iv->zoneid;
 
 	memcpy(iv, &v, sizeof(ipfw_table_value));
 }
 
 /*
  * Export real table value @v to current userland format.
  * Note that @v and @piv may point to the same memory.
  */
 void
 ipfw_export_table_value_v1(struct table_value *v, ipfw_table_value *piv)
 {
 	ipfw_table_value iv;
 
 	memset(&iv, 0, sizeof(iv));
 	iv.tag = v->tag;
 	iv.pipe = v->pipe;
 	iv.divert = v->divert;
 	iv.skipto = v->skipto;
 	iv.netgraph = v->netgraph;
 	iv.fib = v->fib;
 	iv.nat = v->nat;
 	iv.dscp = v->dscp;
 	iv.limit = v->limit;
 	iv.nh4 = v->nh4;
 	iv.nh6 = v->nh6;
 	iv.zoneid = v->zoneid;
 
 	memcpy(piv, &iv, sizeof(iv));
 }
 
 /*
  * Exports real value data into ipfw_table_value structure.
  * Utilizes "spare1" field to store kernel index.
  */
 static int
 dump_tvalue(struct namedobj_instance *ni, struct named_object *no, void *arg)
 {
 	struct vdump_args *da;
 	struct table_val_link *ptv;
 	struct table_value *v;
 
 	da = (struct vdump_args *)arg;
 	ptv = (struct table_val_link *)no;
 
 	v = (struct table_value *)ipfw_get_sopt_space(da->sd, sizeof(*v));
 	/* Out of memory, returning */
 	if (v == NULL) {
 		da->error = ENOMEM;
 		return (ENOMEM);
 	}
 
 	memcpy(v, ptv->pval, sizeof(*v));
 	v->spare1 = ptv->no.kidx;
 	return (0);
 }
 
 /*
  * Dumps all shared/table value data
  * Data layout (v1)(current):
  * Request: [ ipfw_obj_lheader ], size = ipfw_obj_lheader.size
  * Reply: [ ipfw_obj_lheader ipfw_table_value x N ]
  *
  * Returns 0 on success
  */
 static int
 list_table_values(struct ip_fw_chain *ch, ip_fw3_opheader *op3,
     struct sockopt_data *sd)
 {
 	struct _ipfw_obj_lheader *olh;
 	struct namedobj_instance *vi;
 	struct vdump_args da;
 	uint32_t count, size;
 
 	olh = (struct _ipfw_obj_lheader *)ipfw_get_sopt_header(sd,sizeof(*olh));
 	if (olh == NULL)
 		return (EINVAL);
 	if (sd->valsize < olh->size)
 		return (EINVAL);
 
 	IPFW_UH_RLOCK(ch);
 	vi = CHAIN_TO_VI(ch);
 
 	count = ipfw_objhash_count(vi);
 	size = count * sizeof(ipfw_table_value) + sizeof(ipfw_obj_lheader);
 
 	/* Fill in header regadless of buffer size */
 	olh->count = count;
 	olh->objsize = sizeof(ipfw_table_value);
 
 	if (size > olh->size) {
 		olh->size = size;
 		IPFW_UH_RUNLOCK(ch);
 		return (ENOMEM);
 	}
 	olh->size = size;
 
 	/*
 	 * Do the actual value dump
 	 */
 	memset(&da, 0, sizeof(da));
 	da.ch = ch;
 	da.sd = sd;
 	ipfw_objhash_foreach(vi, dump_tvalue, &da);
 
 	IPFW_UH_RUNLOCK(ch);
 
 	return (0);
 }
 
 void
 ipfw_table_value_init(struct ip_fw_chain *ch, int first)
 {
 	struct tables_config *tcfg;
 
 	ch->valuestate = malloc(VALDATA_START_SIZE * sizeof(struct table_value),
 	    M_IPFW, M_WAITOK | M_ZERO);
 
 	tcfg = ch->tblcfg;
 
 	tcfg->val_size = VALDATA_START_SIZE;
 	tcfg->valhash = ipfw_objhash_create(tcfg->val_size);
 	ipfw_objhash_set_funcs(tcfg->valhash, hash_table_value,
 	    cmp_table_value);
 
 	IPFW_ADD_SOPT_HANDLER(first, scodes);
 }
 
 static int
 destroy_value(struct namedobj_instance *ni, struct named_object *no,
     void *arg)
 {
 
 	free(no, M_IPFW);
 	return (0);
 }
 
 void
 ipfw_table_value_destroy(struct ip_fw_chain *ch, int last)
 {
 
 	IPFW_DEL_SOPT_HANDLER(last, scodes);
 
 	free(ch->valuestate, M_IPFW);
 	ipfw_objhash_foreach(CHAIN_TO_VI(ch), destroy_value, ch);
 	ipfw_objhash_destroy(CHAIN_TO_VI(ch));
 }