Index: head/sys/netinet/ip_var.h
===================================================================
--- head/sys/netinet/ip_var.h	(revision 345164)
+++ head/sys/netinet/ip_var.h	(revision 345165)
@@ -1,302 +1,302 @@
 /*-
  * SPDX-License-Identifier: BSD-3-Clause
  *
  * Copyright (c) 1982, 1986, 1993
  *	The Regents of the University of California.  All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 3. Neither the name of the University nor the names of its contributors
  *    may be used to endorse or promote products derived from this software
  *    without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  *	@(#)ip_var.h	8.2 (Berkeley) 1/9/95
  * $FreeBSD$
  */
 
 #ifndef _NETINET_IP_VAR_H_
 #define	_NETINET_IP_VAR_H_
 
 #include <sys/queue.h>
 #include <sys/epoch.h>
 
 /*
  * Overlay for ip header used by other protocols (tcp, udp).
  */
 struct ipovly {
 	u_char	ih_x1[9];		/* (unused) */
 	u_char	ih_pr;			/* protocol */
 	u_short	ih_len;			/* protocol length */
 	struct	in_addr ih_src;		/* source internet address */
 	struct	in_addr ih_dst;		/* destination internet address */
 };
 
 #ifdef _KERNEL
 /*
  * Ip reassembly queue structure.  Each fragment
  * being reassembled is attached to one of these structures.
  * They are timed out after ipq_ttl drops to 0, and may also
  * be reclaimed if memory becomes tight.
  */
 struct ipq {
 	TAILQ_ENTRY(ipq) ipq_list;	/* to other reass headers */
 	u_char	ipq_ttl;		/* time for reass q to live */
 	u_char	ipq_p;			/* protocol of this fragment */
 	u_short	ipq_id;			/* sequence id for reassembly */
 	int	ipq_maxoff;		/* total length of packet */
 	struct mbuf *ipq_frags;		/* to ip headers of fragments */
 	struct	in_addr ipq_src,ipq_dst;
 	u_char	ipq_nfrags;		/* # frags in this packet */
 	struct label *ipq_label;	/* MAC label */
 };
 #endif /* _KERNEL */
 
 /*
  * Structure stored in mbuf in inpcb.ip_options
  * and passed to ip_output when ip options are in use.
  * The actual length of the options (including ipopt_dst)
  * is in m_len.
  */
 #define MAX_IPOPTLEN	40
 
 struct ipoption {
 	struct	in_addr ipopt_dst;	/* first-hop dst if source routed */
 	char	ipopt_list[MAX_IPOPTLEN];	/* options proper */
 };
 
 /*
  * Structure attached to inpcb.ip_moptions and
  * passed to ip_output when IP multicast options are in use.
  * This structure is lazy-allocated.
  */
 struct ip_moptions {
 	struct	ifnet *imo_multicast_ifp; /* ifp for outgoing multicasts */
 	struct in_addr imo_multicast_addr; /* ifindex/addr on MULTICAST_IF */
 	u_long	imo_multicast_vif;	/* vif num outgoing multicasts */
 	u_char	imo_multicast_ttl;	/* TTL for outgoing multicasts */
 	u_char	imo_multicast_loop;	/* 1 => hear sends if a member */
 	u_short	imo_num_memberships;	/* no. memberships this socket */
 	u_short	imo_max_memberships;	/* max memberships this socket */
 	struct	in_multi **imo_membership;	/* group memberships */
 	struct	in_mfilter *imo_mfilters;	/* source filters */
 	struct	epoch_context imo_epoch_ctx;
 };
 
 struct	ipstat {
 	uint64_t ips_total;		/* total packets received */
 	uint64_t ips_badsum;		/* checksum bad */
 	uint64_t ips_tooshort;		/* packet too short */
 	uint64_t ips_toosmall;		/* not enough data */
 	uint64_t ips_badhlen;		/* ip header length < data size */
 	uint64_t ips_badlen;		/* ip length < ip header length */
 	uint64_t ips_fragments;		/* fragments received */
 	uint64_t ips_fragdropped;	/* frags dropped (dups, out of space) */
 	uint64_t ips_fragtimeout;	/* fragments timed out */
 	uint64_t ips_forward;		/* packets forwarded */
 	uint64_t ips_fastforward;	/* packets fast forwarded */
 	uint64_t ips_cantforward;	/* packets rcvd for unreachable dest */
 	uint64_t ips_redirectsent;	/* packets forwarded on same net */
 	uint64_t ips_noproto;		/* unknown or unsupported protocol */
 	uint64_t ips_delivered;		/* datagrams delivered to upper level*/
 	uint64_t ips_localout;		/* total ip packets generated here */
 	uint64_t ips_odropped;		/* lost packets due to nobufs, etc. */
 	uint64_t ips_reassembled;	/* total packets reassembled ok */
 	uint64_t ips_fragmented;	/* datagrams successfully fragmented */
 	uint64_t ips_ofragments;	/* output fragments created */
 	uint64_t ips_cantfrag;		/* don't fragment flag was set, etc. */
 	uint64_t ips_badoptions;		/* error in option processing */
 	uint64_t ips_noroute;		/* packets discarded due to no route */
 	uint64_t ips_badvers;		/* ip version != 4 */
 	uint64_t ips_rawout;		/* total raw ip packets generated */
 	uint64_t ips_toolong;		/* ip length > max ip packet size */
 	uint64_t ips_notmember;		/* multicasts for unregistered grps */
 	uint64_t ips_nogif;		/* no match gif found */
 	uint64_t ips_badaddr;		/* invalid address on header */
 };
 
 #ifdef _KERNEL
 
 #include <sys/counter.h>
 #include <net/vnet.h>
 
 VNET_PCPUSTAT_DECLARE(struct ipstat, ipstat);
 /*
  * In-kernel consumers can use these accessor macros directly to update
  * stats.
  */
 #define	IPSTAT_ADD(name, val)	\
     VNET_PCPUSTAT_ADD(struct ipstat, ipstat, name, (val))
 #define	IPSTAT_SUB(name, val)	IPSTAT_ADD(name, -(val))
 #define	IPSTAT_INC(name)	IPSTAT_ADD(name, 1)
 #define	IPSTAT_DEC(name)	IPSTAT_SUB(name, 1)
 
 /*
  * Kernel module consumers must use this accessor macro.
  */
 void	kmod_ipstat_inc(int statnum);
 #define	KMOD_IPSTAT_INC(name)	\
     kmod_ipstat_inc(offsetof(struct ipstat, name) / sizeof(uint64_t))
 void	kmod_ipstat_dec(int statnum);
 #define	KMOD_IPSTAT_DEC(name)	\
     kmod_ipstat_dec(offsetof(struct ipstat, name) / sizeof(uint64_t))
 
 /* flags passed to ip_output as last parameter */
 #define	IP_FORWARDING		0x1		/* most of ip header exists */
 #define	IP_RAWOUTPUT		0x2		/* raw ip header exists */
 #define	IP_SENDONES		0x4		/* send all-ones broadcast */
 #define	IP_SENDTOIF		0x8		/* send on specific ifnet */
 #define IP_ROUTETOIF		SO_DONTROUTE	/* 0x10 bypass routing tables */
 #define IP_ALLOWBROADCAST	SO_BROADCAST	/* 0x20 can send broadcast packets */
 #define	IP_NODEFAULTFLOWID	0x40		/* Don't set the flowid from inp */
 
 #ifdef __NO_STRICT_ALIGNMENT
 #define IP_HDR_ALIGNED_P(ip)	1
 #else
 #define IP_HDR_ALIGNED_P(ip)	((((intptr_t) (ip)) & 3) == 0)
 #endif
 
 struct ip;
 struct inpcb;
 struct route;
 struct sockopt;
 struct inpcbinfo;
 
 VNET_DECLARE(int, ip_defttl);			/* default IP ttl */
 VNET_DECLARE(int, ipforwarding);		/* ip forwarding */
 #ifdef IPSTEALTH
 VNET_DECLARE(int, ipstealth);			/* stealth forwarding */
 #endif
 extern u_char	ip_protox[];
 VNET_DECLARE(struct socket *, ip_rsvpd);	/* reservation protocol daemon*/
 VNET_DECLARE(struct socket *, ip_mrouter);	/* multicast routing daemon */
 extern int	(*legal_vif_num)(int);
 extern u_long	(*ip_mcast_src)(int);
 VNET_DECLARE(int, rsvp_on);
 VNET_DECLARE(int, drop_redirect);
 extern struct	pr_usrreqs rip_usrreqs;
 
 #define	V_ip_id			VNET(ip_id)
 #define	V_ip_defttl		VNET(ip_defttl)
 #define	V_ipforwarding		VNET(ipforwarding)
 #ifdef IPSTEALTH
 #define	V_ipstealth		VNET(ipstealth)
 #endif
 #define	V_ip_rsvpd		VNET(ip_rsvpd)
 #define	V_ip_mrouter		VNET(ip_mrouter)
 #define	V_rsvp_on		VNET(rsvp_on)
 #define	V_drop_redirect		VNET(drop_redirect)
 
 void	inp_freemoptions(struct ip_moptions *);
 int	inp_getmoptions(struct inpcb *, struct sockopt *);
 int	inp_setmoptions(struct inpcb *, struct sockopt *);
 
 int	ip_ctloutput(struct socket *, struct sockopt *sopt);
 void	ip_drain(void);
 int	ip_fragment(struct ip *ip, struct mbuf **m_frag, int mtu,
 	    u_long if_hwassist_flags);
 void	ip_forward(struct mbuf *m, int srcrt);
 void	ip_init(void);
 extern int
 	(*ip_mforward)(struct ip *, struct ifnet *, struct mbuf *,
 	    struct ip_moptions *);
 int	ip_output(struct mbuf *,
 	    struct mbuf *, struct route *, int, struct ip_moptions *,
 	    struct inpcb *);
 int	ipproto_register(short);
 int	ipproto_unregister(short);
 struct mbuf *
 	ip_reass(struct mbuf *);
 void	ip_savecontrol(struct inpcb *, struct mbuf **, struct ip *,
 	    struct mbuf *);
 void	ip_slowtimo(void);
 void	ip_fillid(struct ip *);
 int	rip_ctloutput(struct socket *, struct sockopt *);
 void	rip_ctlinput(int, struct sockaddr *, void *);
 void	rip_init(void);
 int	rip_input(struct mbuf **, int *, int);
 int	rip_output(struct mbuf *, struct socket *, ...);
 int	ipip_input(struct mbuf **, int *, int);
 int	rsvp_input(struct mbuf **, int *, int);
 int	ip_rsvp_init(struct socket *);
 int	ip_rsvp_done(void);
 extern int	(*ip_rsvp_vif)(struct socket *, struct sockopt *);
 extern void	(*ip_rsvp_force_done)(struct socket *);
 extern int	(*rsvp_input_p)(struct mbuf **, int *, int);
 
 VNET_DECLARE(struct pfil_head *, inet_pfil_head);
 #define	V_inet_pfil_head	VNET(inet_pfil_head)
 #define	PFIL_INET_NAME		"inet"
 
 void	in_delayed_cksum(struct mbuf *m);
 
 /* Hooks for ipfw, dummynet, divert etc. Most are declared in raw_ip.c */
 /*
  * Reference to an ipfw or packet filter rule that can be carried
  * outside critical sections.
  * A rule is identified by rulenum:rule_id which is ordered.
  * In version chain_id the rule can be found in slot 'slot', so
  * we don't need a lookup if chain_id == chain->id.
  *
  * On exit from the firewall this structure refers to the rule after
  * the matching one (slot points to the new rule; rulenum:rule_id-1
  * is the matching rule), and additional info (e.g. info often contains
  * the insn argument or tablearg in the low 16 bits, in host format).
  * On entry, the structure is valid if slot>0, and refers to the starting
  * rules. 'info' contains the reason for reinject, e.g. divert port,
  * divert direction, and so on.
  */
 struct ipfw_rule_ref {
 	uint32_t	slot;		/* slot for matching rule	*/
 	uint32_t	rulenum;	/* matching rule number		*/
 	uint32_t	rule_id;	/* matching rule id		*/
 	uint32_t	chain_id;	/* ruleset id			*/
 	uint32_t	info;		/* see below			*/
 };
 
 enum {
 	IPFW_INFO_MASK	= 0x0000ffff,
 	IPFW_INFO_OUT	= 0x00000000,	/* outgoing, just for convenience */
 	IPFW_INFO_IN	= 0x80000000,	/* incoming, overloads dir */
 	IPFW_ONEPASS	= 0x40000000,	/* One-pass, do not reinject */
 	IPFW_IS_MASK	= 0x30000000,	/* which source ? */
 	IPFW_IS_DIVERT	= 0x20000000,
 	IPFW_IS_DUMMYNET =0x10000000,
 	IPFW_IS_PIPE	= 0x08000000,	/* pipe=1, queue = 0 */
 };
 #define MTAG_IPFW	1148380143	/* IPFW-tagged cookie */
 #define MTAG_IPFW_RULE	1262273568	/* rule reference */
 #define	MTAG_IPFW_CALL	1308397630	/* call stack */
 
 struct ip_fw_args;
 typedef int	(*ip_fw_chk_ptr_t)(struct ip_fw_args *args);
 typedef int	(*ip_fw_ctl_ptr_t)(struct sockopt *);
 VNET_DECLARE(ip_fw_ctl_ptr_t, ip_fw_ctl_ptr);
 #define	V_ip_fw_ctl_ptr		VNET(ip_fw_ctl_ptr)
 
 /* Divert hooks. */
 extern void	(*ip_divert_ptr)(struct mbuf *m, bool incoming);
 /* ng_ipfw hooks -- XXX make it the same as divert and dummynet */
 extern int	(*ng_ipfw_input_p)(struct mbuf **, struct ip_fw_args *, bool);
 extern int	(*ip_dn_ctl_ptr)(struct sockopt *);
-extern int	(*ip_dn_io_ptr)(struct mbuf **, int, struct ip_fw_args *);
+extern int	(*ip_dn_io_ptr)(struct mbuf **, struct ip_fw_args *);
 #endif /* _KERNEL */
 
 #endif /* !_NETINET_IP_VAR_H_ */
Index: head/sys/netinet/raw_ip.c
===================================================================
--- head/sys/netinet/raw_ip.c	(revision 345164)
+++ head/sys/netinet/raw_ip.c	(revision 345165)
@@ -1,1178 +1,1178 @@
 /*-
  * SPDX-License-Identifier: BSD-3-Clause
  *
  * Copyright (c) 1982, 1986, 1988, 1993
  *	The Regents of the University of California.
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 3. Neither the name of the University nor the names of its contributors
  *    may be used to endorse or promote products derived from this software
  *    without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  *	@(#)raw_ip.c	8.7 (Berkeley) 5/15/95
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include "opt_inet.h"
 #include "opt_inet6.h"
 #include "opt_ipsec.h"
 
 #include <sys/param.h>
 #include <sys/jail.h>
 #include <sys/kernel.h>
 #include <sys/eventhandler.h>
 #include <sys/lock.h>
 #include <sys/malloc.h>
 #include <sys/mbuf.h>
 #include <sys/priv.h>
 #include <sys/proc.h>
 #include <sys/protosw.h>
 #include <sys/rmlock.h>
 #include <sys/rwlock.h>
 #include <sys/signalvar.h>
 #include <sys/socket.h>
 #include <sys/socketvar.h>
 #include <sys/sx.h>
 #include <sys/sysctl.h>
 #include <sys/systm.h>
 
 #include <vm/uma.h>
 
 #include <net/if.h>
 #include <net/if_var.h>
 #include <net/route.h>
 #include <net/vnet.h>
 
 #include <netinet/in.h>
 #include <netinet/in_systm.h>
 #include <netinet/in_pcb.h>
 #include <netinet/in_var.h>
 #include <netinet/if_ether.h>
 #include <netinet/ip.h>
 #include <netinet/ip_var.h>
 #include <netinet/ip_mroute.h>
 #include <netinet/ip_icmp.h>
 
 #include <netipsec/ipsec_support.h>
 
 #include <machine/stdarg.h>
 #include <security/mac/mac_framework.h>
 
 VNET_DEFINE(int, ip_defttl) = IPDEFTTL;
 SYSCTL_INT(_net_inet_ip, IPCTL_DEFTTL, ttl, CTLFLAG_VNET | CTLFLAG_RW,
     &VNET_NAME(ip_defttl), 0,
     "Maximum TTL on IP packets");
 
 VNET_DEFINE(struct inpcbhead, ripcb);
 VNET_DEFINE(struct inpcbinfo, ripcbinfo);
 
 #define	V_ripcb			VNET(ripcb)
 #define	V_ripcbinfo		VNET(ripcbinfo)
 
 /*
  * Control and data hooks for ipfw, dummynet, divert and so on.
  * The data hooks are not used here but it is convenient
  * to keep them all in one place.
  */
 VNET_DEFINE(ip_fw_chk_ptr_t, ip_fw_chk_ptr) = NULL;
 VNET_DEFINE(ip_fw_ctl_ptr_t, ip_fw_ctl_ptr) = NULL;
 
 int	(*ip_dn_ctl_ptr)(struct sockopt *);
-int	(*ip_dn_io_ptr)(struct mbuf **, int, struct ip_fw_args *);
+int	(*ip_dn_io_ptr)(struct mbuf **, struct ip_fw_args *);
 void	(*ip_divert_ptr)(struct mbuf *, bool);
 int	(*ng_ipfw_input_p)(struct mbuf **, struct ip_fw_args *, bool);
 
 #ifdef INET
 /*
  * Hooks for multicast routing. They all default to NULL, so leave them not
  * initialized and rely on BSS being set to 0.
  */
 
 /*
  * The socket used to communicate with the multicast routing daemon.
  */
 VNET_DEFINE(struct socket *, ip_mrouter);
 
 /*
  * The various mrouter and rsvp functions.
  */
 int (*ip_mrouter_set)(struct socket *, struct sockopt *);
 int (*ip_mrouter_get)(struct socket *, struct sockopt *);
 int (*ip_mrouter_done)(void);
 int (*ip_mforward)(struct ip *, struct ifnet *, struct mbuf *,
 		   struct ip_moptions *);
 int (*mrt_ioctl)(u_long, caddr_t, int);
 int (*legal_vif_num)(int);
 u_long (*ip_mcast_src)(int);
 
 int (*rsvp_input_p)(struct mbuf **, int *, int);
 int (*ip_rsvp_vif)(struct socket *, struct sockopt *);
 void (*ip_rsvp_force_done)(struct socket *);
 #endif /* INET */
 
 extern	struct protosw inetsw[];
 
 u_long	rip_sendspace = 9216;
 SYSCTL_ULONG(_net_inet_raw, OID_AUTO, maxdgram, CTLFLAG_RW,
     &rip_sendspace, 0, "Maximum outgoing raw IP datagram size");
 
 u_long	rip_recvspace = 9216;
 SYSCTL_ULONG(_net_inet_raw, OID_AUTO, recvspace, CTLFLAG_RW,
     &rip_recvspace, 0, "Maximum space for incoming raw IP datagrams");
 
 /*
  * Hash functions
  */
 
 #define INP_PCBHASH_RAW_SIZE	256
 #define INP_PCBHASH_RAW(proto, laddr, faddr, mask) \
         (((proto) + (laddr) + (faddr)) % (mask) + 1)
 
 #ifdef INET
 static void
 rip_inshash(struct inpcb *inp)
 {
 	struct inpcbinfo *pcbinfo = inp->inp_pcbinfo;
 	struct inpcbhead *pcbhash;
 	int hash;
 
 	INP_INFO_WLOCK_ASSERT(pcbinfo);
 	INP_WLOCK_ASSERT(inp);
 	
 	if (inp->inp_ip_p != 0 &&
 	    inp->inp_laddr.s_addr != INADDR_ANY &&
 	    inp->inp_faddr.s_addr != INADDR_ANY) {
 		hash = INP_PCBHASH_RAW(inp->inp_ip_p, inp->inp_laddr.s_addr,
 		    inp->inp_faddr.s_addr, pcbinfo->ipi_hashmask);
 	} else
 		hash = 0;
 	pcbhash = &pcbinfo->ipi_hashbase[hash];
 	CK_LIST_INSERT_HEAD(pcbhash, inp, inp_hash);
 }
 
 static void
 rip_delhash(struct inpcb *inp)
 {
 
 	INP_INFO_WLOCK_ASSERT(inp->inp_pcbinfo);
 	INP_WLOCK_ASSERT(inp);
 
 	CK_LIST_REMOVE(inp, inp_hash);
 }
 #endif /* INET */
 
 /*
  * Raw interface to IP protocol.
  */
 
 /*
  * Initialize raw connection block q.
  */
 static void
 rip_zone_change(void *tag)
 {
 
 	uma_zone_set_max(V_ripcbinfo.ipi_zone, maxsockets);
 }
 
 static int
 rip_inpcb_init(void *mem, int size, int flags)
 {
 	struct inpcb *inp = mem;
 
 	INP_LOCK_INIT(inp, "inp", "rawinp");
 	return (0);
 }
 
 void
 rip_init(void)
 {
 
 	in_pcbinfo_init(&V_ripcbinfo, "rip", &V_ripcb, INP_PCBHASH_RAW_SIZE,
 	    1, "ripcb", rip_inpcb_init, IPI_HASHFIELDS_NONE);
 	EVENTHANDLER_REGISTER(maxsockets_change, rip_zone_change, NULL,
 	    EVENTHANDLER_PRI_ANY);
 }
 
 #ifdef VIMAGE
 static void
 rip_destroy(void *unused __unused)
 {
 
 	in_pcbinfo_destroy(&V_ripcbinfo);
 }
 VNET_SYSUNINIT(raw_ip, SI_SUB_PROTO_DOMAIN, SI_ORDER_FOURTH, rip_destroy, NULL);
 #endif
 
 #ifdef INET
 static int
 rip_append(struct inpcb *last, struct ip *ip, struct mbuf *n,
     struct sockaddr_in *ripsrc)
 {
 	int policyfail = 0;
 
 	INP_LOCK_ASSERT(last);
 
 #if defined(IPSEC) || defined(IPSEC_SUPPORT)
 	/* check AH/ESP integrity. */
 	if (IPSEC_ENABLED(ipv4)) {
 		if (IPSEC_CHECK_POLICY(ipv4, n, last) != 0)
 			policyfail = 1;
 	}
 #endif /* IPSEC */
 #ifdef MAC
 	if (!policyfail && mac_inpcb_check_deliver(last, n) != 0)
 		policyfail = 1;
 #endif
 	/* Check the minimum TTL for socket. */
 	if (last->inp_ip_minttl && last->inp_ip_minttl > ip->ip_ttl)
 		policyfail = 1;
 	if (!policyfail) {
 		struct mbuf *opts = NULL;
 		struct socket *so;
 
 		so = last->inp_socket;
 		if ((last->inp_flags & INP_CONTROLOPTS) ||
 		    (so->so_options & (SO_TIMESTAMP | SO_BINTIME)))
 			ip_savecontrol(last, &opts, ip, n);
 		SOCKBUF_LOCK(&so->so_rcv);
 		if (sbappendaddr_locked(&so->so_rcv,
 		    (struct sockaddr *)ripsrc, n, opts) == 0) {
 			/* should notify about lost packet */
 			m_freem(n);
 			if (opts)
 				m_freem(opts);
 			SOCKBUF_UNLOCK(&so->so_rcv);
 		} else
 			sorwakeup_locked(so);
 	} else
 		m_freem(n);
 	return (policyfail);
 }
 
 /*
  * Setup generic address and protocol structures for raw_input routine, then
  * pass them along with mbuf chain.
  */
 int
 rip_input(struct mbuf **mp, int *offp, int proto)
 {
 	struct ifnet *ifp;
 	struct mbuf *m = *mp;
 	struct ip *ip = mtod(m, struct ip *);
 	struct inpcb *inp, *last;
 	struct sockaddr_in ripsrc;
 	struct epoch_tracker et;
 	int hash;
 
 	*mp = NULL;
 
 	bzero(&ripsrc, sizeof(ripsrc));
 	ripsrc.sin_len = sizeof(ripsrc);
 	ripsrc.sin_family = AF_INET;
 	ripsrc.sin_addr = ip->ip_src;
 	last = NULL;
 
 	ifp = m->m_pkthdr.rcvif;
 
 	hash = INP_PCBHASH_RAW(proto, ip->ip_src.s_addr,
 	    ip->ip_dst.s_addr, V_ripcbinfo.ipi_hashmask);
 	INP_INFO_RLOCK_ET(&V_ripcbinfo, et);
 	CK_LIST_FOREACH(inp, &V_ripcbinfo.ipi_hashbase[hash], inp_hash) {
 		if (inp->inp_ip_p != proto)
 			continue;
 #ifdef INET6
 		/* XXX inp locking */
 		if ((inp->inp_vflag & INP_IPV4) == 0)
 			continue;
 #endif
 		if (inp->inp_laddr.s_addr != ip->ip_dst.s_addr)
 			continue;
 		if (inp->inp_faddr.s_addr != ip->ip_src.s_addr)
 			continue;
 		if (last != NULL) {
 			struct mbuf *n;
 
 			n = m_copym(m, 0, M_COPYALL, M_NOWAIT);
 			if (n != NULL)
 			    (void) rip_append(last, ip, n, &ripsrc);
 			/* XXX count dropped packet */
 			INP_RUNLOCK(last);
 			last = NULL;
 		}
 		INP_RLOCK(inp);
 		if (__predict_false(inp->inp_flags2 & INP_FREED))
 			goto skip_1;
 		if (jailed_without_vnet(inp->inp_cred)) {
 			/*
 			 * XXX: If faddr was bound to multicast group,
 			 * jailed raw socket will drop datagram.
 			 */
 			if (prison_check_ip4(inp->inp_cred, &ip->ip_dst) != 0)
 				goto skip_1;
 		}
 		last = inp;
 		continue;
 	skip_1:
 		INP_RUNLOCK(inp);
 	}
 	CK_LIST_FOREACH(inp, &V_ripcbinfo.ipi_hashbase[0], inp_hash) {
 		if (inp->inp_ip_p && inp->inp_ip_p != proto)
 			continue;
 #ifdef INET6
 		/* XXX inp locking */
 		if ((inp->inp_vflag & INP_IPV4) == 0)
 			continue;
 #endif
 		if (!in_nullhost(inp->inp_laddr) &&
 		    !in_hosteq(inp->inp_laddr, ip->ip_dst))
 			continue;
 		if (!in_nullhost(inp->inp_faddr) &&
 		    !in_hosteq(inp->inp_faddr, ip->ip_src))
 			continue;
 		if (last != NULL) {
 			struct mbuf *n;
 
 			n = m_copym(m, 0, M_COPYALL, M_NOWAIT);
 			if (n != NULL)
 				(void) rip_append(last, ip, n, &ripsrc);
 			/* XXX count dropped packet */
 			INP_RUNLOCK(last);
 			last = NULL;
 		}
 		INP_RLOCK(inp);
 		if (__predict_false(inp->inp_flags2 & INP_FREED))
 			goto skip_2;
 		if (jailed_without_vnet(inp->inp_cred)) {
 			/*
 			 * Allow raw socket in jail to receive multicast;
 			 * assume process had PRIV_NETINET_RAW at attach,
 			 * and fall through into normal filter path if so.
 			 */
 			if (!IN_MULTICAST(ntohl(ip->ip_dst.s_addr)) &&
 			    prison_check_ip4(inp->inp_cred, &ip->ip_dst) != 0)
 				goto skip_2;
 		}
 		/*
 		 * If this raw socket has multicast state, and we
 		 * have received a multicast, check if this socket
 		 * should receive it, as multicast filtering is now
 		 * the responsibility of the transport layer.
 		 */
 		if (inp->inp_moptions != NULL &&
 		    IN_MULTICAST(ntohl(ip->ip_dst.s_addr))) {
 			/*
 			 * If the incoming datagram is for IGMP, allow it
 			 * through unconditionally to the raw socket.
 			 *
 			 * In the case of IGMPv2, we may not have explicitly
 			 * joined the group, and may have set IFF_ALLMULTI
 			 * on the interface. imo_multi_filter() may discard
 			 * control traffic we actually need to see.
 			 *
 			 * Userland multicast routing daemons should continue
 			 * filter the control traffic appropriately.
 			 */
 			int blocked;
 
 			blocked = MCAST_PASS;
 			if (proto != IPPROTO_IGMP) {
 				struct sockaddr_in group;
 
 				bzero(&group, sizeof(struct sockaddr_in));
 				group.sin_len = sizeof(struct sockaddr_in);
 				group.sin_family = AF_INET;
 				group.sin_addr = ip->ip_dst;
 
 				blocked = imo_multi_filter(inp->inp_moptions,
 				    ifp,
 				    (struct sockaddr *)&group,
 				    (struct sockaddr *)&ripsrc);
 			}
 
 			if (blocked != MCAST_PASS) {
 				IPSTAT_INC(ips_notmember);
 				goto skip_2;
 			}
 		}
 		last = inp;
 		continue;
 	skip_2:
 		INP_RUNLOCK(inp);
 	}
 	INP_INFO_RUNLOCK_ET(&V_ripcbinfo, et);
 	if (last != NULL) {
 		if (rip_append(last, ip, m, &ripsrc) != 0)
 			IPSTAT_INC(ips_delivered);
 		INP_RUNLOCK(last);
 	} else {
 		if (inetsw[ip_protox[ip->ip_p]].pr_input == rip_input) {
 			IPSTAT_INC(ips_noproto);
 			IPSTAT_DEC(ips_delivered);
 			icmp_error(m, ICMP_UNREACH, ICMP_UNREACH_PROTOCOL, 0, 0);
 		} else {
 			m_freem(m);
 		}
 	}
 	return (IPPROTO_DONE);
 }
 
 /*
  * Generate IP header and pass packet to ip_output.  Tack on options user may
  * have setup with control call.
  */
 int
 rip_output(struct mbuf *m, struct socket *so, ...)
 {
 	struct ip *ip;
 	int error;
 	struct inpcb *inp = sotoinpcb(so);
 	va_list ap;
 	u_long dst;
 	int flags = ((so->so_options & SO_DONTROUTE) ? IP_ROUTETOIF : 0) |
 	    IP_ALLOWBROADCAST;
 	int cnt;
 	u_char opttype, optlen, *cp;
 
 	va_start(ap, so);
 	dst = va_arg(ap, u_long);
 	va_end(ap);
 
 	/*
 	 * If the user handed us a complete IP packet, use it.  Otherwise,
 	 * allocate an mbuf for a header and fill it in.
 	 */
 	if ((inp->inp_flags & INP_HDRINCL) == 0) {
 		if (m->m_pkthdr.len + sizeof(struct ip) > IP_MAXPACKET) {
 			m_freem(m);
 			return(EMSGSIZE);
 		}
 		M_PREPEND(m, sizeof(struct ip), M_NOWAIT);
 		if (m == NULL)
 			return(ENOBUFS);
 
 		INP_RLOCK(inp);
 		ip = mtod(m, struct ip *);
 		ip->ip_tos = inp->inp_ip_tos;
 		if (inp->inp_flags & INP_DONTFRAG)
 			ip->ip_off = htons(IP_DF);
 		else
 			ip->ip_off = htons(0);
 		ip->ip_p = inp->inp_ip_p;
 		ip->ip_len = htons(m->m_pkthdr.len);
 		ip->ip_src = inp->inp_laddr;
 		ip->ip_dst.s_addr = dst;
 		if (jailed(inp->inp_cred)) {
 			/*
 			 * prison_local_ip4() would be good enough but would
 			 * let a source of INADDR_ANY pass, which we do not
 			 * want to see from jails.
 			 */
 			if (ip->ip_src.s_addr == INADDR_ANY) {
 				error = in_pcbladdr(inp, &ip->ip_dst, &ip->ip_src,
 				    inp->inp_cred);
 			} else {
 				error = prison_local_ip4(inp->inp_cred,
 				    &ip->ip_src);
 			}
 			if (error != 0) {
 				INP_RUNLOCK(inp);
 				m_freem(m);
 				return (error);
 			}
 		}
 		ip->ip_ttl = inp->inp_ip_ttl;
 	} else {
 		if (m->m_pkthdr.len > IP_MAXPACKET) {
 			m_freem(m);
 			return(EMSGSIZE);
 		}
 		INP_RLOCK(inp);
 		ip = mtod(m, struct ip *);
 		error = prison_check_ip4(inp->inp_cred, &ip->ip_src);
 		if (error != 0) {
 			INP_RUNLOCK(inp);
 			m_freem(m);
 			return (error);
 		}
 
 		/*
 		 * Don't allow both user specified and setsockopt options,
 		 * and don't allow packet length sizes that will crash.
 		 */
 		if (((ip->ip_hl != (sizeof (*ip) >> 2)) && inp->inp_options)
 		    || (ntohs(ip->ip_len) != m->m_pkthdr.len)
 		    || (ntohs(ip->ip_len) < (ip->ip_hl << 2))) {
 			INP_RUNLOCK(inp);
 			m_freem(m);
 			return (EINVAL);
 		}
 		/*
 		 * Don't allow IP options which do not have the required
 		 * structure as specified in section 3.1 of RFC 791 on
 		 * pages 15-23.
 		 */
 		cp = (u_char *)(ip + 1);
 		cnt = (ip->ip_hl << 2) - sizeof (struct ip);
 		for (; cnt > 0; cnt -= optlen, cp += optlen) {
 			opttype = cp[IPOPT_OPTVAL];
 			if (opttype == IPOPT_EOL)
 				break;
 			if (opttype == IPOPT_NOP) {
 				optlen = 1;
 				continue;
 			}
 			if (cnt < IPOPT_OLEN + sizeof(u_char)) {
 				INP_RUNLOCK(inp);
 				m_freem(m);
 				return (EINVAL);
 			}
 			optlen = cp[IPOPT_OLEN];
 			if (optlen < IPOPT_OLEN + sizeof(u_char) ||
 			    optlen > cnt) {
 				INP_RUNLOCK(inp);
 				m_freem(m);
 				return (EINVAL);
 			}
 		}
 		/*
 		 * This doesn't allow application to specify ID of zero,
 		 * but we got this limitation from the beginning of history.
 		 */
 		if (ip->ip_id == 0)
 			ip_fillid(ip);
 
 		/*
 		 * XXX prevent ip_output from overwriting header fields.
 		 */
 		flags |= IP_RAWOUTPUT;
 		IPSTAT_INC(ips_rawout);
 	}
 
 	if (inp->inp_flags & INP_ONESBCAST)
 		flags |= IP_SENDONES;
 
 #ifdef MAC
 	mac_inpcb_create_mbuf(inp, m);
 #endif
 
 	error = ip_output(m, inp->inp_options, NULL, flags,
 	    inp->inp_moptions, inp);
 	INP_RUNLOCK(inp);
 	return (error);
 }
 
 /*
  * Raw IP socket option processing.
  *
  * IMPORTANT NOTE regarding access control: Traditionally, raw sockets could
  * only be created by a privileged process, and as such, socket option
  * operations to manage system properties on any raw socket were allowed to
  * take place without explicit additional access control checks.  However,
  * raw sockets can now also be created in jail(), and therefore explicit
  * checks are now required.  Likewise, raw sockets can be used by a process
  * after it gives up privilege, so some caution is required.  For options
  * passed down to the IP layer via ip_ctloutput(), checks are assumed to be
  * performed in ip_ctloutput() and therefore no check occurs here.
  * Unilaterally checking priv_check() here breaks normal IP socket option
  * operations on raw sockets.
  *
  * When adding new socket options here, make sure to add access control
  * checks here as necessary.
  *
  * XXX-BZ inp locking?
  */
 int
 rip_ctloutput(struct socket *so, struct sockopt *sopt)
 {
 	struct	inpcb *inp = sotoinpcb(so);
 	int	error, optval;
 
 	if (sopt->sopt_level != IPPROTO_IP) {
 		if ((sopt->sopt_level == SOL_SOCKET) &&
 		    (sopt->sopt_name == SO_SETFIB)) {
 			inp->inp_inc.inc_fibnum = so->so_fibnum;
 			return (0);
 		}
 		return (EINVAL);
 	}
 
 	error = 0;
 	switch (sopt->sopt_dir) {
 	case SOPT_GET:
 		switch (sopt->sopt_name) {
 		case IP_HDRINCL:
 			optval = inp->inp_flags & INP_HDRINCL;
 			error = sooptcopyout(sopt, &optval, sizeof optval);
 			break;
 
 		case IP_FW3:	/* generic ipfw v.3 functions */
 		case IP_FW_ADD:	/* ADD actually returns the body... */
 		case IP_FW_GET:
 		case IP_FW_TABLE_GETSIZE:
 		case IP_FW_TABLE_LIST:
 		case IP_FW_NAT_GET_CONFIG:
 		case IP_FW_NAT_GET_LOG:
 			if (V_ip_fw_ctl_ptr != NULL)
 				error = V_ip_fw_ctl_ptr(sopt);
 			else
 				error = ENOPROTOOPT;
 			break;
 
 		case IP_DUMMYNET3:	/* generic dummynet v.3 functions */
 		case IP_DUMMYNET_GET:
 			if (ip_dn_ctl_ptr != NULL)
 				error = ip_dn_ctl_ptr(sopt);
 			else
 				error = ENOPROTOOPT;
 			break ;
 
 		case MRT_INIT:
 		case MRT_DONE:
 		case MRT_ADD_VIF:
 		case MRT_DEL_VIF:
 		case MRT_ADD_MFC:
 		case MRT_DEL_MFC:
 		case MRT_VERSION:
 		case MRT_ASSERT:
 		case MRT_API_SUPPORT:
 		case MRT_API_CONFIG:
 		case MRT_ADD_BW_UPCALL:
 		case MRT_DEL_BW_UPCALL:
 			error = priv_check(curthread, PRIV_NETINET_MROUTE);
 			if (error != 0)
 				return (error);
 			error = ip_mrouter_get ? ip_mrouter_get(so, sopt) :
 				EOPNOTSUPP;
 			break;
 
 		default:
 			error = ip_ctloutput(so, sopt);
 			break;
 		}
 		break;
 
 	case SOPT_SET:
 		switch (sopt->sopt_name) {
 		case IP_HDRINCL:
 			error = sooptcopyin(sopt, &optval, sizeof optval,
 					    sizeof optval);
 			if (error)
 				break;
 			if (optval)
 				inp->inp_flags |= INP_HDRINCL;
 			else
 				inp->inp_flags &= ~INP_HDRINCL;
 			break;
 
 		case IP_FW3:	/* generic ipfw v.3 functions */
 		case IP_FW_ADD:
 		case IP_FW_DEL:
 		case IP_FW_FLUSH:
 		case IP_FW_ZERO:
 		case IP_FW_RESETLOG:
 		case IP_FW_TABLE_ADD:
 		case IP_FW_TABLE_DEL:
 		case IP_FW_TABLE_FLUSH:
 		case IP_FW_NAT_CFG:
 		case IP_FW_NAT_DEL:
 			if (V_ip_fw_ctl_ptr != NULL)
 				error = V_ip_fw_ctl_ptr(sopt);
 			else
 				error = ENOPROTOOPT;
 			break;
 
 		case IP_DUMMYNET3:	/* generic dummynet v.3 functions */
 		case IP_DUMMYNET_CONFIGURE:
 		case IP_DUMMYNET_DEL:
 		case IP_DUMMYNET_FLUSH:
 			if (ip_dn_ctl_ptr != NULL)
 				error = ip_dn_ctl_ptr(sopt);
 			else
 				error = ENOPROTOOPT ;
 			break ;
 
 		case IP_RSVP_ON:
 			error = priv_check(curthread, PRIV_NETINET_MROUTE);
 			if (error != 0)
 				return (error);
 			error = ip_rsvp_init(so);
 			break;
 
 		case IP_RSVP_OFF:
 			error = priv_check(curthread, PRIV_NETINET_MROUTE);
 			if (error != 0)
 				return (error);
 			error = ip_rsvp_done();
 			break;
 
 		case IP_RSVP_VIF_ON:
 		case IP_RSVP_VIF_OFF:
 			error = priv_check(curthread, PRIV_NETINET_MROUTE);
 			if (error != 0)
 				return (error);
 			error = ip_rsvp_vif ?
 				ip_rsvp_vif(so, sopt) : EINVAL;
 			break;
 
 		case MRT_INIT:
 		case MRT_DONE:
 		case MRT_ADD_VIF:
 		case MRT_DEL_VIF:
 		case MRT_ADD_MFC:
 		case MRT_DEL_MFC:
 		case MRT_VERSION:
 		case MRT_ASSERT:
 		case MRT_API_SUPPORT:
 		case MRT_API_CONFIG:
 		case MRT_ADD_BW_UPCALL:
 		case MRT_DEL_BW_UPCALL:
 			error = priv_check(curthread, PRIV_NETINET_MROUTE);
 			if (error != 0)
 				return (error);
 			error = ip_mrouter_set ? ip_mrouter_set(so, sopt) :
 					EOPNOTSUPP;
 			break;
 
 		default:
 			error = ip_ctloutput(so, sopt);
 			break;
 		}
 		break;
 	}
 
 	return (error);
 }
 
 /*
  * This function exists solely to receive the PRC_IFDOWN messages which are
  * sent by if_down().  It looks for an ifaddr whose ifa_addr is sa, and calls
  * in_ifadown() to remove all routes corresponding to that address.  It also
  * receives the PRC_IFUP messages from if_up() and reinstalls the interface
  * routes.
  */
 void
 rip_ctlinput(int cmd, struct sockaddr *sa, void *vip)
 {
 	struct rm_priotracker in_ifa_tracker;
 	struct in_ifaddr *ia;
 	struct ifnet *ifp;
 	int err;
 	int flags;
 
 	switch (cmd) {
 	case PRC_IFDOWN:
 		IN_IFADDR_RLOCK(&in_ifa_tracker);
 		CK_STAILQ_FOREACH(ia, &V_in_ifaddrhead, ia_link) {
 			if (ia->ia_ifa.ifa_addr == sa
 			    && (ia->ia_flags & IFA_ROUTE)) {
 				ifa_ref(&ia->ia_ifa);
 				IN_IFADDR_RUNLOCK(&in_ifa_tracker);
 				/*
 				 * in_scrubprefix() kills the interface route.
 				 */
 				in_scrubprefix(ia, 0);
 				/*
 				 * in_ifadown gets rid of all the rest of the
 				 * routes.  This is not quite the right thing
 				 * to do, but at least if we are running a
 				 * routing process they will come back.
 				 */
 				in_ifadown(&ia->ia_ifa, 0);
 				ifa_free(&ia->ia_ifa);
 				break;
 			}
 		}
 		if (ia == NULL)		/* If ia matched, already unlocked. */
 			IN_IFADDR_RUNLOCK(&in_ifa_tracker);
 		break;
 
 	case PRC_IFUP:
 		IN_IFADDR_RLOCK(&in_ifa_tracker);
 		CK_STAILQ_FOREACH(ia, &V_in_ifaddrhead, ia_link) {
 			if (ia->ia_ifa.ifa_addr == sa)
 				break;
 		}
 		if (ia == NULL || (ia->ia_flags & IFA_ROUTE)) {
 			IN_IFADDR_RUNLOCK(&in_ifa_tracker);
 			return;
 		}
 		ifa_ref(&ia->ia_ifa);
 		IN_IFADDR_RUNLOCK(&in_ifa_tracker);
 		flags = RTF_UP;
 		ifp = ia->ia_ifa.ifa_ifp;
 
 		if ((ifp->if_flags & IFF_LOOPBACK)
 		    || (ifp->if_flags & IFF_POINTOPOINT))
 			flags |= RTF_HOST;
 
 		err = ifa_del_loopback_route((struct ifaddr *)ia, sa);
 
 		err = rtinit(&ia->ia_ifa, RTM_ADD, flags);
 		if (err == 0)
 			ia->ia_flags |= IFA_ROUTE;
 
 		err = ifa_add_loopback_route((struct ifaddr *)ia, sa);
 
 		ifa_free(&ia->ia_ifa);
 		break;
 	}
 }
 
 static int
 rip_attach(struct socket *so, int proto, struct thread *td)
 {
 	struct inpcb *inp;
 	int error;
 
 	inp = sotoinpcb(so);
 	KASSERT(inp == NULL, ("rip_attach: inp != NULL"));
 
 	error = priv_check(td, PRIV_NETINET_RAW);
 	if (error)
 		return (error);
 	if (proto >= IPPROTO_MAX || proto < 0)
 		return EPROTONOSUPPORT;
 	error = soreserve(so, rip_sendspace, rip_recvspace);
 	if (error)
 		return (error);
 	INP_INFO_WLOCK(&V_ripcbinfo);
 	error = in_pcballoc(so, &V_ripcbinfo);
 	if (error) {
 		INP_INFO_WUNLOCK(&V_ripcbinfo);
 		return (error);
 	}
 	inp = (struct inpcb *)so->so_pcb;
 	inp->inp_vflag |= INP_IPV4;
 	inp->inp_ip_p = proto;
 	inp->inp_ip_ttl = V_ip_defttl;
 	rip_inshash(inp);
 	INP_INFO_WUNLOCK(&V_ripcbinfo);
 	INP_WUNLOCK(inp);
 	return (0);
 }
 
 static void
 rip_detach(struct socket *so)
 {
 	struct inpcb *inp;
 
 	inp = sotoinpcb(so);
 	KASSERT(inp != NULL, ("rip_detach: inp == NULL"));
 	KASSERT(inp->inp_faddr.s_addr == INADDR_ANY, 
 	    ("rip_detach: not closed"));
 
 	INP_INFO_WLOCK(&V_ripcbinfo);
 	INP_WLOCK(inp);
 	rip_delhash(inp);
 	if (so == V_ip_mrouter && ip_mrouter_done)
 		ip_mrouter_done();
 	if (ip_rsvp_force_done)
 		ip_rsvp_force_done(so);
 	if (so == V_ip_rsvpd)
 		ip_rsvp_done();
 	in_pcbdetach(inp);
 	in_pcbfree(inp);
 	INP_INFO_WUNLOCK(&V_ripcbinfo);
 }
 
 static void
 rip_dodisconnect(struct socket *so, struct inpcb *inp)
 {
 	struct inpcbinfo *pcbinfo;
 
 	pcbinfo = inp->inp_pcbinfo;
 	INP_INFO_WLOCK(pcbinfo);
 	INP_WLOCK(inp);
 	rip_delhash(inp);
 	inp->inp_faddr.s_addr = INADDR_ANY;
 	rip_inshash(inp);
 	SOCK_LOCK(so);
 	so->so_state &= ~SS_ISCONNECTED;
 	SOCK_UNLOCK(so);
 	INP_WUNLOCK(inp);
 	INP_INFO_WUNLOCK(pcbinfo);
 }
 
 static void
 rip_abort(struct socket *so)
 {
 	struct inpcb *inp;
 
 	inp = sotoinpcb(so);
 	KASSERT(inp != NULL, ("rip_abort: inp == NULL"));
 
 	rip_dodisconnect(so, inp);
 }
 
 static void
 rip_close(struct socket *so)
 {
 	struct inpcb *inp;
 
 	inp = sotoinpcb(so);
 	KASSERT(inp != NULL, ("rip_close: inp == NULL"));
 
 	rip_dodisconnect(so, inp);
 }
 
 static int
 rip_disconnect(struct socket *so)
 {
 	struct inpcb *inp;
 
 	if ((so->so_state & SS_ISCONNECTED) == 0)
 		return (ENOTCONN);
 
 	inp = sotoinpcb(so);
 	KASSERT(inp != NULL, ("rip_disconnect: inp == NULL"));
 
 	rip_dodisconnect(so, inp);
 	return (0);
 }
 
 static int
 rip_bind(struct socket *so, struct sockaddr *nam, struct thread *td)
 {
 	struct sockaddr_in *addr = (struct sockaddr_in *)nam;
 	struct inpcb *inp;
 	int error;
 
 	if (nam->sa_len != sizeof(*addr))
 		return (EINVAL);
 
 	error = prison_check_ip4(td->td_ucred, &addr->sin_addr);
 	if (error != 0)
 		return (error);
 
 	inp = sotoinpcb(so);
 	KASSERT(inp != NULL, ("rip_bind: inp == NULL"));
 
 	if (CK_STAILQ_EMPTY(&V_ifnet) ||
 	    (addr->sin_family != AF_INET && addr->sin_family != AF_IMPLINK) ||
 	    (addr->sin_addr.s_addr &&
 	     (inp->inp_flags & INP_BINDANY) == 0 &&
 	     ifa_ifwithaddr_check((struct sockaddr *)addr) == 0))
 		return (EADDRNOTAVAIL);
 
 	INP_INFO_WLOCK(&V_ripcbinfo);
 	INP_WLOCK(inp);
 	rip_delhash(inp);
 	inp->inp_laddr = addr->sin_addr;
 	rip_inshash(inp);
 	INP_WUNLOCK(inp);
 	INP_INFO_WUNLOCK(&V_ripcbinfo);
 	return (0);
 }
 
 static int
 rip_connect(struct socket *so, struct sockaddr *nam, struct thread *td)
 {
 	struct sockaddr_in *addr = (struct sockaddr_in *)nam;
 	struct inpcb *inp;
 
 	if (nam->sa_len != sizeof(*addr))
 		return (EINVAL);
 	if (CK_STAILQ_EMPTY(&V_ifnet))
 		return (EADDRNOTAVAIL);
 	if (addr->sin_family != AF_INET && addr->sin_family != AF_IMPLINK)
 		return (EAFNOSUPPORT);
 
 	inp = sotoinpcb(so);
 	KASSERT(inp != NULL, ("rip_connect: inp == NULL"));
 
 	INP_INFO_WLOCK(&V_ripcbinfo);
 	INP_WLOCK(inp);
 	rip_delhash(inp);
 	inp->inp_faddr = addr->sin_addr;
 	rip_inshash(inp);
 	soisconnected(so);
 	INP_WUNLOCK(inp);
 	INP_INFO_WUNLOCK(&V_ripcbinfo);
 	return (0);
 }
 
 static int
 rip_shutdown(struct socket *so)
 {
 	struct inpcb *inp;
 
 	inp = sotoinpcb(so);
 	KASSERT(inp != NULL, ("rip_shutdown: inp == NULL"));
 
 	INP_WLOCK(inp);
 	socantsendmore(so);
 	INP_WUNLOCK(inp);
 	return (0);
 }
 
 static int
 rip_send(struct socket *so, int flags, struct mbuf *m, struct sockaddr *nam,
     struct mbuf *control, struct thread *td)
 {
 	struct inpcb *inp;
 	u_long dst;
 
 	inp = sotoinpcb(so);
 	KASSERT(inp != NULL, ("rip_send: inp == NULL"));
 
 	/*
 	 * Note: 'dst' reads below are unlocked.
 	 */
 	if (so->so_state & SS_ISCONNECTED) {
 		if (nam) {
 			m_freem(m);
 			return (EISCONN);
 		}
 		dst = inp->inp_faddr.s_addr;	/* Unlocked read. */
 	} else {
 		if (nam == NULL) {
 			m_freem(m);
 			return (ENOTCONN);
 		}
 		dst = ((struct sockaddr_in *)nam)->sin_addr.s_addr;
 	}
 	return (rip_output(m, so, dst));
 }
 #endif /* INET */
 
 static int
 rip_pcblist(SYSCTL_HANDLER_ARGS)
 {
 	int error, i, n;
 	struct inpcb *inp, **inp_list;
 	inp_gen_t gencnt;
 	struct xinpgen xig;
 	struct epoch_tracker et;
 
 	/*
 	 * The process of preparing the TCB list is too time-consuming and
 	 * resource-intensive to repeat twice on every request.
 	 */
 	if (req->oldptr == 0) {
 		n = V_ripcbinfo.ipi_count;
 		n += imax(n / 8, 10);
 		req->oldidx = 2 * (sizeof xig) + n * sizeof(struct xinpcb);
 		return (0);
 	}
 
 	if (req->newptr != 0)
 		return (EPERM);
 
 	/*
 	 * OK, now we're committed to doing something.
 	 */
 	INP_INFO_WLOCK(&V_ripcbinfo);
 	gencnt = V_ripcbinfo.ipi_gencnt;
 	n = V_ripcbinfo.ipi_count;
 	INP_INFO_WUNLOCK(&V_ripcbinfo);
 
 	bzero(&xig, sizeof(xig));
 	xig.xig_len = sizeof xig;
 	xig.xig_count = n;
 	xig.xig_gen = gencnt;
 	xig.xig_sogen = so_gencnt;
 	error = SYSCTL_OUT(req, &xig, sizeof xig);
 	if (error)
 		return (error);
 
 	inp_list = malloc(n * sizeof *inp_list, M_TEMP, M_WAITOK);
 
 	INP_INFO_RLOCK_ET(&V_ripcbinfo, et);
 	for (inp = CK_LIST_FIRST(V_ripcbinfo.ipi_listhead), i = 0; inp && i < n;
 	     inp = CK_LIST_NEXT(inp, inp_list)) {
 		INP_WLOCK(inp);
 		if (inp->inp_gencnt <= gencnt &&
 		    cr_canseeinpcb(req->td->td_ucred, inp) == 0) {
 			in_pcbref(inp);
 			inp_list[i++] = inp;
 		}
 		INP_WUNLOCK(inp);
 	}
 	INP_INFO_RUNLOCK_ET(&V_ripcbinfo, et);
 	n = i;
 
 	error = 0;
 	for (i = 0; i < n; i++) {
 		inp = inp_list[i];
 		INP_RLOCK(inp);
 		if (inp->inp_gencnt <= gencnt) {
 			struct xinpcb xi;
 
 			in_pcbtoxinpcb(inp, &xi);
 			INP_RUNLOCK(inp);
 			error = SYSCTL_OUT(req, &xi, sizeof xi);
 		} else
 			INP_RUNLOCK(inp);
 	}
 	INP_INFO_WLOCK(&V_ripcbinfo);
 	for (i = 0; i < n; i++) {
 		inp = inp_list[i];
 		INP_RLOCK(inp);
 		if (!in_pcbrele_rlocked(inp))
 			INP_RUNLOCK(inp);
 	}
 	INP_INFO_WUNLOCK(&V_ripcbinfo);
 
 	if (!error) {
 		struct epoch_tracker et;
 		/*
 		 * Give the user an updated idea of our state.  If the
 		 * generation differs from what we told her before, she knows
 		 * that something happened while we were processing this
 		 * request, and it might be necessary to retry.
 		 */
 		INP_INFO_RLOCK_ET(&V_ripcbinfo, et);
 		xig.xig_gen = V_ripcbinfo.ipi_gencnt;
 		xig.xig_sogen = so_gencnt;
 		xig.xig_count = V_ripcbinfo.ipi_count;
 		INP_INFO_RUNLOCK_ET(&V_ripcbinfo, et);
 		error = SYSCTL_OUT(req, &xig, sizeof xig);
 	}
 	free(inp_list, M_TEMP);
 	return (error);
 }
 
 SYSCTL_PROC(_net_inet_raw, OID_AUTO/*XXX*/, pcblist,
     CTLTYPE_OPAQUE | CTLFLAG_RD, NULL, 0,
     rip_pcblist, "S,xinpcb", "List of active raw IP sockets");
 
 #ifdef INET
 struct pr_usrreqs rip_usrreqs = {
 	.pru_abort =		rip_abort,
 	.pru_attach =		rip_attach,
 	.pru_bind =		rip_bind,
 	.pru_connect =		rip_connect,
 	.pru_control =		in_control,
 	.pru_detach =		rip_detach,
 	.pru_disconnect =	rip_disconnect,
 	.pru_peeraddr =		in_getpeeraddr,
 	.pru_send =		rip_send,
 	.pru_shutdown =		rip_shutdown,
 	.pru_sockaddr =		in_getsockaddr,
 	.pru_sosetlabel =	in_pcbsosetlabel,
 	.pru_close =		rip_close,
 };
 #endif /* INET */
Index: head/sys/netpfil/ipfw/ip_dn_io.c
===================================================================
--- head/sys/netpfil/ipfw/ip_dn_io.c	(revision 345164)
+++ head/sys/netpfil/ipfw/ip_dn_io.c	(revision 345165)
@@ -1,970 +1,975 @@
 /*-
  * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
  *
  * Copyright (c) 2010 Luigi Rizzo, Riccardo Panicucci, Universita` di Pisa
  * All rights reserved
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  */
 
 /*
  * Dummynet portions related to packet handling.
  */
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include "opt_inet6.h"
 
 #include <sys/param.h>
 #include <sys/systm.h>
 #include <sys/malloc.h>
 #include <sys/mbuf.h>
 #include <sys/kernel.h>
 #include <sys/lock.h>
 #include <sys/module.h>
 #include <sys/mutex.h>
 #include <sys/priv.h>
 #include <sys/proc.h>
 #include <sys/rwlock.h>
 #include <sys/socket.h>
 #include <sys/time.h>
 #include <sys/sysctl.h>
 
 #include <net/if.h>	/* IFNAMSIZ, struct ifaddr, ifq head, lock.h mutex.h */
 #include <net/netisr.h>
 #include <net/vnet.h>
 
 #include <netinet/in.h>
 #include <netinet/ip.h>		/* ip_len, ip_off */
 #include <netinet/ip_var.h>	/* ip_output(), IP_FORWARDING */
 #include <netinet/ip_fw.h>
 #include <netinet/ip_dummynet.h>
 #include <netinet/if_ether.h> /* various ether_* routines */
 #include <netinet/ip6.h>       /* for ip6_input, ip6_output prototypes */
 #include <netinet6/ip6_var.h>
 
 #include <netpfil/ipfw/ip_fw_private.h>
 #include <netpfil/ipfw/dn_heap.h>
 #include <netpfil/ipfw/ip_dn_private.h>
 #ifdef NEW_AQM
 #include <netpfil/ipfw/dn_aqm.h>
 #endif
 #include <netpfil/ipfw/dn_sched.h>
 
 /*
  * We keep a private variable for the simulation time, but we could
  * probably use an existing one ("softticks" in sys/kern/kern_timeout.c)
  * instead of dn_cfg.curr_time
  */
 
 struct dn_parms dn_cfg;
 //VNET_DEFINE(struct dn_parms, _base_dn_cfg);
 
 static long tick_last;		/* Last tick duration (usec). */
 static long tick_delta;		/* Last vs standard tick diff (usec). */
 static long tick_delta_sum;	/* Accumulated tick difference (usec).*/
 static long tick_adjustment;	/* Tick adjustments done. */
 static long tick_lost;		/* Lost(coalesced) ticks number. */
 /* Adjusted vs non-adjusted curr_time difference (ticks). */
 static long tick_diff;
 
 static unsigned long	io_pkt;
 static unsigned long	io_pkt_fast;
 
 #ifdef NEW_AQM
 unsigned long	io_pkt_drop;
 #else
 static unsigned long	io_pkt_drop;
 #endif
 /*
  * We use a heap to store entities for which we have pending timer events.
  * The heap is checked at every tick and all entities with expired events
  * are extracted.
  */
   
 MALLOC_DEFINE(M_DUMMYNET, "dummynet", "dummynet heap");
 
 extern	void (*bridge_dn_p)(struct mbuf *, struct ifnet *);
 
 #ifdef SYSCTL_NODE
 
 /*
  * Because of the way the SYSBEGIN/SYSEND macros work on other
  * platforms, there should not be functions between them.
  * So keep the handlers outside the block.
  */
 static int
 sysctl_hash_size(SYSCTL_HANDLER_ARGS)
 {
 	int error, value;
 
 	value = dn_cfg.hash_size;
 	error = sysctl_handle_int(oidp, &value, 0, req);
 	if (error != 0 || req->newptr == NULL)
 		return (error);
 	if (value < 16 || value > 65536)
 		return (EINVAL);
 	dn_cfg.hash_size = value;
 	return (0);
 }
 
 static int
 sysctl_limits(SYSCTL_HANDLER_ARGS)
 {
 	int error;
 	long value;
 
 	if (arg2 != 0)
 		value = dn_cfg.slot_limit;
 	else
 		value = dn_cfg.byte_limit;
 	error = sysctl_handle_long(oidp, &value, 0, req);
 
 	if (error != 0 || req->newptr == NULL)
 		return (error);
 	if (arg2 != 0) {
 		if (value < 1)
 			return (EINVAL);
 		dn_cfg.slot_limit = value;
 	} else {
 		if (value < 1500)
 			return (EINVAL);
 		dn_cfg.byte_limit = value;
 	}
 	return (0);
 }
 
 SYSBEGIN(f4)
 
 SYSCTL_DECL(_net_inet);
 SYSCTL_DECL(_net_inet_ip);
 #ifdef NEW_AQM
 SYSCTL_NODE(_net_inet_ip, OID_AUTO, dummynet, CTLFLAG_RW, 0, "Dummynet");
 #else
 static SYSCTL_NODE(_net_inet_ip, OID_AUTO, dummynet, CTLFLAG_RW, 0, "Dummynet");
 #endif
 
 /* wrapper to pass dn_cfg fields to SYSCTL_* */
 //#define DC(x)	(&(VNET_NAME(_base_dn_cfg).x))
 #define DC(x)	(&(dn_cfg.x))
 /* parameters */
 
 
 SYSCTL_PROC(_net_inet_ip_dummynet, OID_AUTO, hash_size,
     CTLTYPE_INT | CTLFLAG_RW, 0, 0, sysctl_hash_size,
     "I", "Default hash table size");
 
 
 SYSCTL_PROC(_net_inet_ip_dummynet, OID_AUTO, pipe_slot_limit,
     CTLTYPE_LONG | CTLFLAG_RW, 0, 1, sysctl_limits,
     "L", "Upper limit in slots for pipe queue.");
 SYSCTL_PROC(_net_inet_ip_dummynet, OID_AUTO, pipe_byte_limit,
     CTLTYPE_LONG | CTLFLAG_RW, 0, 0, sysctl_limits,
     "L", "Upper limit in bytes for pipe queue.");
 SYSCTL_INT(_net_inet_ip_dummynet, OID_AUTO, io_fast,
     CTLFLAG_RW, DC(io_fast), 0, "Enable fast dummynet io.");
 SYSCTL_INT(_net_inet_ip_dummynet, OID_AUTO, debug,
     CTLFLAG_RW, DC(debug), 0, "Dummynet debug level");
 
 /* RED parameters */
 SYSCTL_INT(_net_inet_ip_dummynet, OID_AUTO, red_lookup_depth,
     CTLFLAG_RD, DC(red_lookup_depth), 0, "Depth of RED lookup table");
 SYSCTL_INT(_net_inet_ip_dummynet, OID_AUTO, red_avg_pkt_size,
     CTLFLAG_RD, DC(red_avg_pkt_size), 0, "RED Medium packet size");
 SYSCTL_INT(_net_inet_ip_dummynet, OID_AUTO, red_max_pkt_size,
     CTLFLAG_RD, DC(red_max_pkt_size), 0, "RED Max packet size");
 
 /* time adjustment */
 SYSCTL_LONG(_net_inet_ip_dummynet, OID_AUTO, tick_delta,
     CTLFLAG_RD, &tick_delta, 0, "Last vs standard tick difference (usec).");
 SYSCTL_LONG(_net_inet_ip_dummynet, OID_AUTO, tick_delta_sum,
     CTLFLAG_RD, &tick_delta_sum, 0, "Accumulated tick difference (usec).");
 SYSCTL_LONG(_net_inet_ip_dummynet, OID_AUTO, tick_adjustment,
     CTLFLAG_RD, &tick_adjustment, 0, "Tick adjustments done.");
 SYSCTL_LONG(_net_inet_ip_dummynet, OID_AUTO, tick_diff,
     CTLFLAG_RD, &tick_diff, 0,
     "Adjusted vs non-adjusted curr_time difference (ticks).");
 SYSCTL_LONG(_net_inet_ip_dummynet, OID_AUTO, tick_lost,
     CTLFLAG_RD, &tick_lost, 0,
     "Number of ticks coalesced by dummynet taskqueue.");
 
 /* Drain parameters */
 SYSCTL_UINT(_net_inet_ip_dummynet, OID_AUTO, expire,
     CTLFLAG_RW, DC(expire), 0, "Expire empty queues/pipes");
 SYSCTL_UINT(_net_inet_ip_dummynet, OID_AUTO, expire_cycle,
     CTLFLAG_RD, DC(expire_cycle), 0, "Expire cycle for queues/pipes");
 
 /* statistics */
 SYSCTL_INT(_net_inet_ip_dummynet, OID_AUTO, schk_count,
     CTLFLAG_RD, DC(schk_count), 0, "Number of schedulers");
 SYSCTL_INT(_net_inet_ip_dummynet, OID_AUTO, si_count,
     CTLFLAG_RD, DC(si_count), 0, "Number of scheduler instances");
 SYSCTL_INT(_net_inet_ip_dummynet, OID_AUTO, fsk_count,
     CTLFLAG_RD, DC(fsk_count), 0, "Number of flowsets");
 SYSCTL_INT(_net_inet_ip_dummynet, OID_AUTO, queue_count,
     CTLFLAG_RD, DC(queue_count), 0, "Number of queues");
 SYSCTL_ULONG(_net_inet_ip_dummynet, OID_AUTO, io_pkt,
     CTLFLAG_RD, &io_pkt, 0,
     "Number of packets passed to dummynet.");
 SYSCTL_ULONG(_net_inet_ip_dummynet, OID_AUTO, io_pkt_fast,
     CTLFLAG_RD, &io_pkt_fast, 0,
     "Number of packets bypassed dummynet scheduler.");
 SYSCTL_ULONG(_net_inet_ip_dummynet, OID_AUTO, io_pkt_drop,
     CTLFLAG_RD, &io_pkt_drop, 0,
     "Number of packets dropped by dummynet.");
 #undef DC
 SYSEND
 
 #endif
 
 static void	dummynet_send(struct mbuf *);
 
 /*
  * Return the mbuf tag holding the dummynet state (it should
  * be the first one on the list).
  */
 struct dn_pkt_tag *
 dn_tag_get(struct mbuf *m)
 {
 	struct m_tag *mtag = m_tag_first(m);
 #ifdef NEW_AQM
 	/* XXX: to skip ts m_tag. For Debugging only*/
 	if (mtag != NULL && mtag->m_tag_id == DN_AQM_MTAG_TS) {
 		m_tag_delete(m,mtag); 
 		mtag = m_tag_first(m);
 		D("skip TS tag");
 	}
 #endif
 	KASSERT(mtag != NULL &&
 	    mtag->m_tag_cookie == MTAG_ABI_COMPAT &&
 	    mtag->m_tag_id == PACKET_TAG_DUMMYNET,
 	    ("packet on dummynet queue w/o dummynet tag!"));
 	return (struct dn_pkt_tag *)(mtag+1);
 }
 
 #ifndef NEW_AQM
 static inline void
 mq_append(struct mq *q, struct mbuf *m)
 {
 #ifdef USERSPACE
 	// buffers from netmap need to be copied
 	// XXX note that the routine is not expected to fail
 	ND("append %p to %p", m, q);
 	if (m->m_flags & M_STACK) {
 		struct mbuf *m_new;
 		void *p;
 		int l, ofs;
 
 		ofs = m->m_data - m->__m_extbuf;
 		// XXX allocate
 		MGETHDR(m_new, M_NOWAIT, MT_DATA);
 		ND("*** WARNING, volatile buf %p ext %p %d dofs %d m_new %p",
 			m, m->__m_extbuf, m->__m_extlen, ofs, m_new);
 		p = m_new->__m_extbuf;	/* new pointer */
 		l = m_new->__m_extlen;	/* new len */
 		if (l <= m->__m_extlen) {
 			panic("extlen too large");
 		}
 
 		*m_new = *m;	// copy
 		m_new->m_flags &= ~M_STACK;
 		m_new->__m_extbuf = p; // point to new buffer
 		_pkt_copy(m->__m_extbuf, p, m->__m_extlen);
 		m_new->m_data = p + ofs;
 		m = m_new;
 	}
 #endif /* USERSPACE */
 	if (q->head == NULL)
 		q->head = m;
 	else
 		q->tail->m_nextpkt = m;
 	q->count++;
 	q->tail = m;
 	m->m_nextpkt = NULL;
 }
 #endif
 
 /*
  * Dispose a list of packet. Use a functions so if we need to do
  * more work, this is a central point to do it.
  */
 void dn_free_pkts(struct mbuf *mnext)
 {
         struct mbuf *m;
     
         while ((m = mnext) != NULL) {
                 mnext = m->m_nextpkt;
                 FREE_PKT(m);
         }
 }
 
 static int
 red_drops (struct dn_queue *q, int len)
 {
 	/*
 	 * RED algorithm
 	 *
 	 * RED calculates the average queue size (avg) using a low-pass filter
 	 * with an exponential weighted (w_q) moving average:
 	 * 	avg  <-  (1-w_q) * avg + w_q * q_size
 	 * where q_size is the queue length (measured in bytes or * packets).
 	 *
 	 * If q_size == 0, we compute the idle time for the link, and set
 	 *	avg = (1 - w_q)^(idle/s)
 	 * where s is the time needed for transmitting a medium-sized packet.
 	 *
 	 * Now, if avg < min_th the packet is enqueued.
 	 * If avg > max_th the packet is dropped. Otherwise, the packet is
 	 * dropped with probability P function of avg.
 	 */
 
 	struct dn_fsk *fs = q->fs;
 	int64_t p_b = 0;
 
 	/* Queue in bytes or packets? */
 	uint32_t q_size = (fs->fs.flags & DN_QSIZE_BYTES) ?
 	    q->ni.len_bytes : q->ni.length;
 
 	/* Average queue size estimation. */
 	if (q_size != 0) {
 		/* Queue is not empty, avg <- avg + (q_size - avg) * w_q */
 		int diff = SCALE(q_size) - q->avg;
 		int64_t v = SCALE_MUL((int64_t)diff, (int64_t)fs->w_q);
 
 		q->avg += (int)v;
 	} else {
 		/*
 		 * Queue is empty, find for how long the queue has been
 		 * empty and use a lookup table for computing
 		 * (1 - * w_q)^(idle_time/s) where s is the time to send a
 		 * (small) packet.
 		 * XXX check wraps...
 		 */
 		if (q->avg) {
 			u_int t = div64((dn_cfg.curr_time - q->q_time), fs->lookup_step);
 
 			q->avg = (t < fs->lookup_depth) ?
 			    SCALE_MUL(q->avg, fs->w_q_lookup[t]) : 0;
 		}
 	}
 
 	/* Should i drop? */
 	if (q->avg < fs->min_th) {
 		q->count = -1;
 		return (0);	/* accept packet */
 	}
 	if (q->avg >= fs->max_th) {	/* average queue >=  max threshold */
 		if (fs->fs.flags & DN_IS_ECN)
 			return (1);
 		if (fs->fs.flags & DN_IS_GENTLE_RED) {
 			/*
 			 * According to Gentle-RED, if avg is greater than
 			 * max_th the packet is dropped with a probability
 			 *	 p_b = c_3 * avg - c_4
 			 * where c_3 = (1 - max_p) / max_th
 			 *       c_4 = 1 - 2 * max_p
 			 */
 			p_b = SCALE_MUL((int64_t)fs->c_3, (int64_t)q->avg) -
 			    fs->c_4;
 		} else {
 			q->count = -1;
 			return (1);
 		}
 	} else if (q->avg > fs->min_th) {
 		if (fs->fs.flags & DN_IS_ECN)
 			return (1);
 		/*
 		 * We compute p_b using the linear dropping function
 		 *	 p_b = c_1 * avg - c_2
 		 * where c_1 = max_p / (max_th - min_th)
 		 * 	 c_2 = max_p * min_th / (max_th - min_th)
 		 */
 		p_b = SCALE_MUL((int64_t)fs->c_1, (int64_t)q->avg) - fs->c_2;
 	}
 
 	if (fs->fs.flags & DN_QSIZE_BYTES)
 		p_b = div64((p_b * len) , fs->max_pkt_size);
 	if (++q->count == 0)
 		q->random = random() & 0xffff;
 	else {
 		/*
 		 * q->count counts packets arrived since last drop, so a greater
 		 * value of q->count means a greater packet drop probability.
 		 */
 		if (SCALE_MUL(p_b, SCALE((int64_t)q->count)) > q->random) {
 			q->count = 0;
 			/* After a drop we calculate a new random value. */
 			q->random = random() & 0xffff;
 			return (1);	/* drop */
 		}
 	}
 	/* End of RED algorithm. */
 
 	return (0);	/* accept */
 
 }
 
 /*
  * ECN/ECT Processing (partially adopted from altq)
  */
 #ifndef NEW_AQM
 static
 #endif
 int
 ecn_mark(struct mbuf* m)
 {
 	struct ip *ip;
 	ip = (struct ip *)mtodo(m, dn_tag_get(m)->iphdr_off);
 
 	switch (ip->ip_v) {
 	case IPVERSION:
 	{
 		uint16_t old;
 
 		if ((ip->ip_tos & IPTOS_ECN_MASK) == IPTOS_ECN_NOTECT)
 			return (0);	/* not-ECT */
 		if ((ip->ip_tos & IPTOS_ECN_MASK) == IPTOS_ECN_CE)
 			return (1);	/* already marked */
 
 		/*
 		 * ecn-capable but not marked,
 		 * mark CE and update checksum
 		 */
 		old = *(uint16_t *)ip;
 		ip->ip_tos |= IPTOS_ECN_CE;
 		ip->ip_sum = cksum_adjust(ip->ip_sum, old, *(uint16_t *)ip);
 		return (1);
 	}
 #ifdef INET6
 	case (IPV6_VERSION >> 4):
 	{
 		struct ip6_hdr *ip6 = (struct ip6_hdr *)ip;
 		u_int32_t flowlabel;
 
 		flowlabel = ntohl(ip6->ip6_flow);
 		if ((flowlabel >> 28) != 6)
 			return (0);	/* version mismatch! */
 		if ((flowlabel & (IPTOS_ECN_MASK << 20)) ==
 		    (IPTOS_ECN_NOTECT << 20))
 			return (0);	/* not-ECT */
 		if ((flowlabel & (IPTOS_ECN_MASK << 20)) ==
 		    (IPTOS_ECN_CE << 20))
 			return (1);	/* already marked */
 		/*
 		 * ecn-capable but not marked, mark CE
 		 */
 		flowlabel |= (IPTOS_ECN_CE << 20);
 		ip6->ip6_flow = htonl(flowlabel);
 		return (1);
 	}
 #endif
 	}
 	return (0);
 }
 
 /*
  * Enqueue a packet in q, subject to space and queue management policy
  * (whose parameters are in q->fs).
  * Update stats for the queue and the scheduler.
  * Return 0 on success, 1 on drop. The packet is consumed anyways.
  */
 int
 dn_enqueue(struct dn_queue *q, struct mbuf* m, int drop)
 {   
 	struct dn_fs *f;
 	struct dn_flow *ni;	/* stats for scheduler instance */
 	uint64_t len;
 
 	if (q->fs == NULL || q->_si == NULL) {
 		printf("%s fs %p si %p, dropping\n",
 			__FUNCTION__, q->fs, q->_si);
 		FREE_PKT(m);
 		return 1;
 	}
 	f = &(q->fs->fs);
 	ni = &q->_si->ni;
 	len = m->m_pkthdr.len;
 	/* Update statistics, then check reasons to drop pkt. */
 	q->ni.tot_bytes += len;
 	q->ni.tot_pkts++;
 	ni->tot_bytes += len;
 	ni->tot_pkts++;
 	if (drop)
 		goto drop;
 	if (f->plr && random() < f->plr)
 		goto drop;
 #ifdef NEW_AQM
 	/* Call AQM enqueue function */
 	if (q->fs->aqmfp)
 		return q->fs->aqmfp->enqueue(q ,m);
 #endif
 	if (f->flags & DN_IS_RED && red_drops(q, m->m_pkthdr.len)) {
 		if (!(f->flags & DN_IS_ECN) || !ecn_mark(m))
 			goto drop;
 	}
 	if (f->flags & DN_QSIZE_BYTES) {
 		if (q->ni.len_bytes > f->qsize)
 			goto drop;
 	} else if (q->ni.length >= f->qsize) {
 		goto drop;
 	}
 	mq_append(&q->mq, m);
 	q->ni.length++;
 	q->ni.len_bytes += len;
 	ni->length++;
 	ni->len_bytes += len;
 	return (0);
 
 drop:
 	io_pkt_drop++;
 	q->ni.drops++;
 	ni->drops++;
 	FREE_PKT(m);
 	return (1);
 }
 
 /*
  * Fetch packets from the delay line which are due now. If there are
  * leftover packets, reinsert the delay line in the heap.
  * Runs under scheduler lock.
  */
 static void
 transmit_event(struct mq *q, struct delay_line *dline, uint64_t now)
 {
 	struct mbuf *m;
 	struct dn_pkt_tag *pkt = NULL;
 
 	dline->oid.subtype = 0; /* not in heap */
 	while ((m = dline->mq.head) != NULL) {
 		pkt = dn_tag_get(m);
 		if (!DN_KEY_LEQ(pkt->output_time, now))
 			break;
 		dline->mq.head = m->m_nextpkt;
 		dline->mq.count--;
 		mq_append(q, m);
 	}
 	if (m != NULL) {
 		dline->oid.subtype = 1; /* in heap */
 		heap_insert(&dn_cfg.evheap, pkt->output_time, dline);
 	}
 }
 
 /*
  * Convert the additional MAC overheads/delays into an equivalent
  * number of bits for the given data rate. The samples are
  * in milliseconds so we need to divide by 1000.
  */
 static uint64_t
 extra_bits(struct mbuf *m, struct dn_schk *s)
 {
 	int index;
 	uint64_t bits;
 	struct dn_profile *pf = s->profile;
 
 	if (!pf || pf->samples_no == 0)
 		return 0;
 	index  = random() % pf->samples_no;
 	bits = div64((uint64_t)pf->samples[index] * s->link.bandwidth, 1000);
 	if (index >= pf->loss_level) {
 		struct dn_pkt_tag *dt = dn_tag_get(m);
 		if (dt)
 			dt->dn_dir = DIR_DROP;
 	}
 	return bits;
 }
 
 /*
  * Send traffic from a scheduler instance due by 'now'.
  * Return a pointer to the head of the queue.
  */
 static struct mbuf *
 serve_sched(struct mq *q, struct dn_sch_inst *si, uint64_t now)
 {
 	struct mq def_q;
 	struct dn_schk *s = si->sched;
 	struct mbuf *m = NULL;
 	int delay_line_idle = (si->dline.mq.head == NULL);
 	int done, bw;
 
 	if (q == NULL) {
 		q = &def_q;
 		q->head = NULL;
 	}
 
 	bw = s->link.bandwidth;
 	si->kflags &= ~DN_ACTIVE;
 
 	if (bw > 0)
 		si->credit += (now - si->sched_time) * bw;
 	else
 		si->credit = 0;
 	si->sched_time = now;
 	done = 0;
 	while (si->credit >= 0 && (m = s->fp->dequeue(si)) != NULL) {
 		uint64_t len_scaled;
 
 		done++;
 		len_scaled = (bw == 0) ? 0 : hz *
 			(m->m_pkthdr.len * 8 + extra_bits(m, s));
 		si->credit -= len_scaled;
 		/* Move packet in the delay line */
 		dn_tag_get(m)->output_time = dn_cfg.curr_time + s->link.delay ;
 		mq_append(&si->dline.mq, m);
 	}
 
 	/*
 	 * If credit >= 0 the instance is idle, mark time.
 	 * Otherwise put back in the heap, and adjust the output
 	 * time of the last inserted packet, m, which was too early.
 	 */
 	if (si->credit >= 0) {
 		si->idle_time = now;
 	} else {
 		uint64_t t;
 		KASSERT (bw > 0, ("bw=0 and credit<0 ?"));
 		t = div64(bw - 1 - si->credit, bw);
 		if (m)
 			dn_tag_get(m)->output_time += t;
 		si->kflags |= DN_ACTIVE;
 		heap_insert(&dn_cfg.evheap, now + t, si);
 	}
 	if (delay_line_idle && done)
 		transmit_event(q, &si->dline, now);
 	return q->head;
 }
 
 /*
  * The timer handler for dummynet. Time is computed in ticks, but
  * but the code is tolerant to the actual rate at which this is called.
  * Once complete, the function reschedules itself for the next tick.
  */
 void
 dummynet_task(void *context, int pending)
 {
 	struct timeval t;
 	struct mq q = { NULL, NULL }; /* queue to accumulate results */
 
 	CURVNET_SET((struct vnet *)context);
 
 	DN_BH_WLOCK();
 
 	/* Update number of lost(coalesced) ticks. */
 	tick_lost += pending - 1;
 
 	getmicrouptime(&t);
 	/* Last tick duration (usec). */
 	tick_last = (t.tv_sec - dn_cfg.prev_t.tv_sec) * 1000000 +
 	(t.tv_usec - dn_cfg.prev_t.tv_usec);
 	/* Last tick vs standard tick difference (usec). */
 	tick_delta = (tick_last * hz - 1000000) / hz;
 	/* Accumulated tick difference (usec). */
 	tick_delta_sum += tick_delta;
 
 	dn_cfg.prev_t = t;
 
 	/*
 	* Adjust curr_time if the accumulated tick difference is
 	* greater than the 'standard' tick. Since curr_time should
 	* be monotonically increasing, we do positive adjustments
 	* as required, and throttle curr_time in case of negative
 	* adjustment.
 	*/
 	dn_cfg.curr_time++;
 	if (tick_delta_sum - tick >= 0) {
 		int diff = tick_delta_sum / tick;
 
 		dn_cfg.curr_time += diff;
 		tick_diff += diff;
 		tick_delta_sum %= tick;
 		tick_adjustment++;
 	} else if (tick_delta_sum + tick <= 0) {
 		dn_cfg.curr_time--;
 		tick_diff--;
 		tick_delta_sum += tick;
 		tick_adjustment++;
 	}
 
 	/* serve pending events, accumulate in q */
 	for (;;) {
 		struct dn_id *p;    /* generic parameter to handler */
 
 		if (dn_cfg.evheap.elements == 0 ||
 		    DN_KEY_LT(dn_cfg.curr_time, HEAP_TOP(&dn_cfg.evheap)->key))
 			break;
 		p = HEAP_TOP(&dn_cfg.evheap)->object;
 		heap_extract(&dn_cfg.evheap, NULL);
 
 		if (p->type == DN_SCH_I) {
 			serve_sched(&q, (struct dn_sch_inst *)p, dn_cfg.curr_time);
 		} else { /* extracted a delay line */
 			transmit_event(&q, (struct delay_line *)p, dn_cfg.curr_time);
 		}
 	}
 	if (dn_cfg.expire && ++dn_cfg.expire_cycle >= dn_cfg.expire) {
 		dn_cfg.expire_cycle = 0;
 		dn_drain_scheduler();
 		dn_drain_queue();
 	}
 
 	dn_reschedule();
 	DN_BH_WUNLOCK();
 	if (q.head != NULL)
 		dummynet_send(q.head);
 	CURVNET_RESTORE();
 }
 
 /*
  * forward a chain of packets to the proper destination.
  * This runs outside the dummynet lock.
  */
 static void
 dummynet_send(struct mbuf *m)
 {
 	struct mbuf *n;
 
 	for (; m != NULL; m = n) {
 		struct ifnet *ifp = NULL;	/* gcc 3.4.6 complains */
         	struct m_tag *tag;
 		int dst;
 
 		n = m->m_nextpkt;
 		m->m_nextpkt = NULL;
 		tag = m_tag_first(m);
 		if (tag == NULL) { /* should not happen */
 			dst = DIR_DROP;
 		} else {
 			struct dn_pkt_tag *pkt = dn_tag_get(m);
 			/* extract the dummynet info, rename the tag
 			 * to carry reinject info.
 			 */
 			if (pkt->dn_dir == (DIR_OUT | PROTO_LAYER2) &&
 				pkt->ifp == NULL) {
 				dst = DIR_DROP;
 			} else {
 				dst = pkt->dn_dir;
 				ifp = pkt->ifp;
 				tag->m_tag_cookie = MTAG_IPFW_RULE;
 				tag->m_tag_id = 0;
 			}
 		}
 
 		switch (dst) {
 		case DIR_OUT:
 			ip_output(m, NULL, NULL, IP_FORWARDING, NULL, NULL);
 			break ;
 
 		case DIR_IN :
 			netisr_dispatch(NETISR_IP, m);
 			break;
 
 #ifdef INET6
 		case DIR_IN | PROTO_IPV6:
 			netisr_dispatch(NETISR_IPV6, m);
 			break;
 
 		case DIR_OUT | PROTO_IPV6:
 			ip6_output(m, NULL, NULL, IPV6_FORWARDING, NULL, NULL, NULL);
 			break;
 #endif
 
 		case DIR_FWD | PROTO_IFB: /* DN_TO_IFB_FWD: */
 			if (bridge_dn_p != NULL)
 				((*bridge_dn_p)(m, ifp));
 			else
 				printf("dummynet: if_bridge not loaded\n");
 
 			break;
 
 		case DIR_IN | PROTO_LAYER2: /* DN_TO_ETH_DEMUX: */
 			/*
 			 * The Ethernet code assumes the Ethernet header is
 			 * contiguous in the first mbuf header.
 			 * Insure this is true.
 			 */
 			if (m->m_len < ETHER_HDR_LEN &&
 			    (m = m_pullup(m, ETHER_HDR_LEN)) == NULL) {
 				printf("dummynet/ether: pullup failed, "
 				    "dropping packet\n");
 				break;
 			}
 			ether_demux(m->m_pkthdr.rcvif, m);
 			break;
 
 		case DIR_OUT | PROTO_LAYER2: /* DN_TO_ETH_OUT: */
 			ether_output_frame(ifp, m);
 			break;
 
 		case DIR_DROP:
 			/* drop the packet after some time */
 			FREE_PKT(m);
 			break;
 
 		default:
 			printf("dummynet: bad switch %d!\n", dst);
 			FREE_PKT(m);
 			break;
 		}
 	}
 }
 
 static inline int
 tag_mbuf(struct mbuf *m, int dir, struct ip_fw_args *fwa)
 {
 	struct dn_pkt_tag *dt;
 	struct m_tag *mtag;
 
 	mtag = m_tag_get(PACKET_TAG_DUMMYNET,
 		    sizeof(*dt), M_NOWAIT | M_ZERO);
 	if (mtag == NULL)
 		return 1;		/* Cannot allocate packet header. */
 	m_tag_prepend(m, mtag);		/* Attach to mbuf chain. */
 	dt = (struct dn_pkt_tag *)(mtag + 1);
 	dt->rule = fwa->rule;
 	dt->rule.info &= IPFW_ONEPASS;	/* only keep this info */
 	dt->dn_dir = dir;
 	dt->ifp = fwa->flags & IPFW_ARGS_OUT ? fwa->ifp : NULL;
 	/* dt->output tame is updated as we move through */
 	dt->output_time = dn_cfg.curr_time;
 	dt->iphdr_off = (dir & PROTO_LAYER2) ? ETHER_HDR_LEN : 0;
 	return 0;
 }
 
 
 /*
  * dummynet hook for packets.
  * We use the argument to locate the flowset fs and the sched_set sch
  * associated to it. The we apply flow_mask and sched_mask to
  * determine the queue and scheduler instances.
- *
- * dir		where shall we send the packet after dummynet.
- * *m0		the mbuf with the packet
- * ifp		the 'ifp' parameter from the caller.
- *		NULL in ip_input, destination interface in ip_output,
  */
 int
-dummynet_io(struct mbuf **m0, int dir, struct ip_fw_args *fwa)
+dummynet_io(struct mbuf **m0, struct ip_fw_args *fwa)
 {
 	struct mbuf *m = *m0;
 	struct dn_fsk *fs = NULL;
 	struct dn_sch_inst *si;
 	struct dn_queue *q = NULL;	/* default */
+	int fs_id, dir;
 
-	int fs_id = (fwa->rule.info & IPFW_INFO_MASK) +
+	fs_id = (fwa->rule.info & IPFW_INFO_MASK) +
 		((fwa->rule.info & IPFW_IS_PIPE) ? 2*DN_MAX_ID : 0);
+	/* XXXGL: convert args to dir */
+	if (fwa->flags & IPFW_ARGS_IN)
+		dir = DIR_IN;
+	else
+		dir = DIR_OUT;
+	if (fwa->flags & IPFW_ARGS_ETHER)
+		dir |= PROTO_LAYER2;
+	else if (fwa->flags & IPFW_ARGS_IP6)
+		dir |= PROTO_IPV6;
 	DN_BH_WLOCK();
 	io_pkt++;
 	/* we could actually tag outside the lock, but who cares... */
 	if (tag_mbuf(m, dir, fwa))
 		goto dropit;
 	if (dn_cfg.busy) {
 		/* if the upper half is busy doing something expensive,
 		 * lets queue the packet and move forward
 		 */
 		mq_append(&dn_cfg.pending, m);
 		m = *m0 = NULL; /* consumed */
 		goto done; /* already active, nothing to do */
 	}
 	/* XXX locate_flowset could be optimised with a direct ref. */
 	fs = dn_ht_find(dn_cfg.fshash, fs_id, 0, NULL);
 	if (fs == NULL)
 		goto dropit;	/* This queue/pipe does not exist! */
 	if (fs->sched == NULL)	/* should not happen */
 		goto dropit;
 	/* find scheduler instance, possibly applying sched_mask */
 	si = ipdn_si_find(fs->sched, &(fwa->f_id));
 	if (si == NULL)
 		goto dropit;
 	/*
 	 * If the scheduler supports multiple queues, find the right one
 	 * (otherwise it will be ignored by enqueue).
 	 */
 	if (fs->sched->fp->flags & DN_MULTIQUEUE) {
 		q = ipdn_q_find(fs, si, &(fwa->f_id));
 		if (q == NULL)
 			goto dropit;
 	}
 	if (fs->sched->fp->enqueue(si, q, m)) {
 		/* packet was dropped by enqueue() */
 		m = *m0 = NULL;
 
 		/* dn_enqueue already increases io_pkt_drop */
 		io_pkt_drop--;
 
 		goto dropit;
 	}
 
 	if (si->kflags & DN_ACTIVE) {
 		m = *m0 = NULL; /* consumed */
 		goto done; /* already active, nothing to do */
 	}
 
 	/* compute the initial allowance */
 	if (si->idle_time < dn_cfg.curr_time) {
 	    /* Do this only on the first packet on an idle pipe */
 	    struct dn_link *p = &fs->sched->link;
 
 	    si->sched_time = dn_cfg.curr_time;
 	    si->credit = dn_cfg.io_fast ? p->bandwidth : 0;
 	    if (p->burst) {
 		uint64_t burst = (dn_cfg.curr_time - si->idle_time) * p->bandwidth;
 		if (burst > p->burst)
 			burst = p->burst;
 		si->credit += burst;
 	    }
 	}
 	/* pass through scheduler and delay line */
 	m = serve_sched(NULL, si, dn_cfg.curr_time);
 
 	/* optimization -- pass it back to ipfw for immediate send */
 	/* XXX Don't call dummynet_send() if scheduler return the packet
 	 *     just enqueued. This avoid a lock order reversal.
 	 *     
 	 */
 	if (/*dn_cfg.io_fast &&*/ m == *m0 && (dir & PROTO_LAYER2) == 0 ) {
 		/* fast io, rename the tag * to carry reinject info. */
 		struct m_tag *tag = m_tag_first(m);
 
 		tag->m_tag_cookie = MTAG_IPFW_RULE;
 		tag->m_tag_id = 0;
 		io_pkt_fast++;
 		if (m->m_nextpkt != NULL) {
 			printf("dummynet: fast io: pkt chain detected!\n");
 			m->m_nextpkt = NULL;
 		}
 		m = NULL;
 	} else {
 		*m0 = NULL;
 	}
 done:
 	DN_BH_WUNLOCK();
 	if (m)
 		dummynet_send(m);
 	return 0;
 
 dropit:
 	io_pkt_drop++;
 	DN_BH_WUNLOCK();
 	if (m)
 		FREE_PKT(m);
 	*m0 = NULL;
 	return (fs && (fs->fs.flags & DN_NOERROR)) ? 0 : ENOBUFS;
 }
Index: head/sys/netpfil/ipfw/ip_dn_private.h
===================================================================
--- head/sys/netpfil/ipfw/ip_dn_private.h	(revision 345164)
+++ head/sys/netpfil/ipfw/ip_dn_private.h	(revision 345165)
@@ -1,484 +1,499 @@
 /*-
  * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
  *
  * Copyright (c) 2010 Luigi Rizzo, Riccardo Panicucci, Universita` di Pisa
  * All rights reserved
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  */
 
 /*
  * internal dummynet APIs.
  *
  * $FreeBSD$
  */
 
 #ifndef _IP_DN_PRIVATE_H
 #define _IP_DN_PRIVATE_H
 
 /* debugging support
  * use ND() to remove debugging, D() to print a line,
  * DX(level, ...) to print above a certain level
  * If you redefine D() you are expected to redefine all.
  */
 #ifndef D
 #define ND(fmt, ...) do {} while (0)
 #define D1(fmt, ...) do {} while (0)
 #define D(fmt, ...) printf("%-10s " fmt "\n",      \
         __FUNCTION__, ## __VA_ARGS__)
 #define DX(lev, fmt, ...) do {              \
         if (dn_cfg.debug > lev) D(fmt, ## __VA_ARGS__); } while (0)
 #endif
 
 MALLOC_DECLARE(M_DUMMYNET);
 
 #ifndef __linux__
 #define div64(a, b)  ((int64_t)(a) / (int64_t)(b))
 #endif
 
 #define DN_LOCK_INIT() do {				\
 	mtx_init(&dn_cfg.uh_mtx, "dn_uh", NULL, MTX_DEF);	\
 	mtx_init(&dn_cfg.bh_mtx, "dn_bh", NULL, MTX_DEF);	\
 	} while (0)
 #define DN_LOCK_DESTROY() do {				\
 	mtx_destroy(&dn_cfg.uh_mtx);			\
 	mtx_destroy(&dn_cfg.bh_mtx);			\
 	} while (0)
 #if 0 /* not used yet */
 #define DN_UH_RLOCK()		mtx_lock(&dn_cfg.uh_mtx)
 #define DN_UH_RUNLOCK()		mtx_unlock(&dn_cfg.uh_mtx)
 #define DN_UH_WLOCK()		mtx_lock(&dn_cfg.uh_mtx)
 #define DN_UH_WUNLOCK()		mtx_unlock(&dn_cfg.uh_mtx)
 #define DN_UH_LOCK_ASSERT()	mtx_assert(&dn_cfg.uh_mtx, MA_OWNED)
 #endif
 
 #define DN_BH_RLOCK()		mtx_lock(&dn_cfg.uh_mtx)
 #define DN_BH_RUNLOCK()		mtx_unlock(&dn_cfg.uh_mtx)
 #define DN_BH_WLOCK()		mtx_lock(&dn_cfg.uh_mtx)
 #define DN_BH_WUNLOCK()		mtx_unlock(&dn_cfg.uh_mtx)
 #define DN_BH_LOCK_ASSERT()	mtx_assert(&dn_cfg.uh_mtx, MA_OWNED)
 
 SLIST_HEAD(dn_schk_head, dn_schk);
 SLIST_HEAD(dn_sch_inst_head, dn_sch_inst);
 SLIST_HEAD(dn_fsk_head, dn_fsk);
 SLIST_HEAD(dn_queue_head, dn_queue);
 SLIST_HEAD(dn_alg_head, dn_alg);
 
 #ifdef NEW_AQM
 SLIST_HEAD(dn_aqm_head, dn_aqm); /* for new AQMs */
 #endif
 
 struct mq {	/* a basic queue of packets*/
         struct mbuf *head, *tail;
 	int count;
 };
 
 static inline void
 set_oid(struct dn_id *o, int type, int len)
 {
         o->type = type;
         o->len = len;
         o->subtype = 0;
 }
 
 /*
  * configuration and global data for a dummynet instance
  *
  * When a configuration is modified from userland, 'id' is incremented
  * so we can use the value to check for stale pointers.
  */
 struct dn_parms {
 	uint32_t	id;		/* configuration version */
 
 	/* defaults (sysctl-accessible) */
 	int	red_lookup_depth;
 	int	red_avg_pkt_size;
 	int	red_max_pkt_size;
 	int	hash_size;
 	int	max_hash_size;
 	long	byte_limit;		/* max queue sizes */
 	long	slot_limit;
 
 	int	io_fast;
 	int	debug;
 
 	/* timekeeping */
 	struct timeval prev_t;		/* last time dummynet_tick ran */
 	struct dn_heap	evheap;		/* scheduled events */
 
 	/* counters of objects -- used for reporting space */
 	int	schk_count;
 	int	si_count;
 	int	fsk_count;
 	int	queue_count;
 
 	/* ticks and other stuff */
 	uint64_t	curr_time;
 	/* flowsets and schedulers are in hash tables, with 'hash_size'
 	 * buckets. fshash is looked up at every packet arrival
 	 * so better be generous if we expect many entries.
 	 */
 	struct dn_ht	*fshash;
 	struct dn_ht	*schedhash;
 	/* list of flowsets without a scheduler -- use sch_chain */
 	struct dn_fsk_head	fsu;	/* list of unlinked flowsets */
 	struct dn_alg_head	schedlist;	/* list of algorithms */
 #ifdef NEW_AQM
 	struct dn_aqm_head	aqmlist;	/* list of AQMs */
 #endif
 
 	/* Store the fs/sch to scan when draining. The value is the
 	 * bucket number of the hash table. Expire can be disabled
 	 * with net.inet.ip.dummynet.expire=0, or it happens every
 	 * expire ticks.
 	 **/
 	int drain_fs;
 	int drain_sch;
 	uint32_t expire;
 	uint32_t expire_cycle;	/* tick count */
 
 	int init_done;
 
 	/* if the upper half is busy doing something long,
 	 * can set the busy flag and we will enqueue packets in
 	 * a queue for later processing.
 	 */
 	int	busy;
 	struct	mq	pending;
 
 #ifdef _KERNEL
 	/*
 	 * This file is normally used in the kernel, unless we do
 	 * some userland tests, in which case we do not need a mtx.
 	 * uh_mtx arbitrates between system calls and also
 	 * protects fshash, schedhash and fsunlinked.
 	 * These structures are readonly for the lower half.
 	 * bh_mtx protects all other structures which may be
 	 * modified upon packet arrivals
 	 */
 #if defined( __linux__ ) || defined( _WIN32 )
 	spinlock_t uh_mtx;
 	spinlock_t bh_mtx;
 #else
 	struct mtx uh_mtx;
 	struct mtx bh_mtx;
 #endif
 
 #endif /* _KERNEL */
 };
 
 /*
  * Delay line, contains all packets on output from a link.
  * Every scheduler instance has one.
  */
 struct delay_line {
 	struct dn_id oid;
 	struct dn_sch_inst *si;
 	struct mq mq;
 };
 
 /*
  * The kernel side of a flowset. It is linked in a hash table
  * of flowsets, and in a list of children of their parent scheduler.
  * qht is either the queue or (if HAVE_MASK) a hash table queues.
  * Note that the mask to use is the (flow_mask|sched_mask), which
  * changes as we attach/detach schedulers. So we store it here.
  *
  * XXX If we want to add scheduler-specific parameters, we need to
  * put them in external storage because the scheduler may not be
  * available when the fsk is created.
  */
 struct dn_fsk { /* kernel side of a flowset */
 	struct dn_fs fs;
 	SLIST_ENTRY(dn_fsk) fsk_next;	/* hash chain for fshash */
 
 	struct ipfw_flow_id fsk_mask;
 
 	/* qht is a hash table of queues, or just a single queue
 	 * a bit in fs.flags tells us which one
 	 */
 	struct dn_ht	*qht;
 	struct dn_schk *sched;		/* Sched we are linked to */
 	SLIST_ENTRY(dn_fsk) sch_chain;	/* list of fsk attached to sched */
 
 	/* bucket index used by drain routine to drain queues for this
 	 * flowset
 	 */
 	int drain_bucket;
 	/* Parameter realted to RED / GRED */
 	/* original values are in dn_fs*/
 	int w_q ;		/* queue weight (scaled) */
 	int max_th ;		/* maximum threshold for queue (scaled) */
 	int min_th ;		/* minimum threshold for queue (scaled) */
 	int max_p ;		/* maximum value for p_b (scaled) */
 
 	u_int c_1 ;		/* max_p/(max_th-min_th) (scaled) */
 	u_int c_2 ;		/* max_p*min_th/(max_th-min_th) (scaled) */
 	u_int c_3 ;		/* for GRED, (1-max_p)/max_th (scaled) */
 	u_int c_4 ;		/* for GRED, 1 - 2*max_p (scaled) */
 	u_int * w_q_lookup ;	/* lookup table for computing (1-w_q)^t */
 	u_int lookup_depth ;	/* depth of lookup table */
 	int lookup_step ;	/* granularity inside the lookup table */
 	int lookup_weight ;	/* equal to (1-w_q)^t / (1-w_q)^(t+1) */
 	int avg_pkt_size ;	/* medium packet size */
 	int max_pkt_size ;	/* max packet size */
 #ifdef NEW_AQM
 	struct dn_aqm *aqmfp;	/* Pointer to AQM functions */
 	void *aqmcfg;	/* configuration parameters for AQM */
 #endif
 };
 
 /*
  * A queue is created as a child of a flowset unless it belongs to
  * a !MULTIQUEUE scheduler. It is normally in a hash table in the
  * flowset. fs always points to the parent flowset.
  * si normally points to the sch_inst, unless the flowset has been
  * detached from the scheduler -- in this case si == NULL and we
  * should not enqueue.
  */
 struct dn_queue {
 	struct dn_flow ni;	/* oid, flow_id, stats */
 	struct mq mq;	/* packets queue */
 	struct dn_sch_inst *_si;	/* owner scheduler instance */
 	SLIST_ENTRY(dn_queue) q_next; /* hash chain list for qht */
 	struct dn_fsk *fs;		/* parent flowset. */
 
 	/* RED parameters */
 	int avg;		/* average queue length est. (scaled) */
 	int count;		/* arrivals since last RED drop */
 	int random;		/* random value (scaled) */
 	uint64_t q_time;	/* start of queue idle time */
 #ifdef NEW_AQM
 	void *aqm_status;	/* per-queue status variables*/
 #endif
 
 };
 
 /*
  * The kernel side of a scheduler. Contains the userland config,
  * a link, pointer to extra config arguments from command line,
  * kernel flags, and a pointer to the scheduler methods.
  * It is stored in a hash table, and holds a list of all
  * flowsets and scheduler instances.
  * XXX sch must be at the beginning, see schk_hash().
  */
 struct dn_schk {
 	struct dn_sch sch;
 	struct dn_alg *fp;	/* Pointer to scheduler functions */
 	struct dn_link link;	/* The link, embedded */
 	struct dn_profile *profile; /* delay profile, if any */
 	struct dn_id *cfg;	/* extra config arguments */
 
 	SLIST_ENTRY(dn_schk) schk_next;  /* hash chain for schedhash */
 
 	struct dn_fsk_head fsk_list;  /* all fsk linked to me */
 	struct dn_fsk *fs;	/* Flowset for !MULTIQUEUE */
 
 	/* bucket index used by the drain routine to drain the scheduler
 	 * instance for this flowset.
 	 */
 	int drain_bucket;
 
 	/* Hash table of all instances (through sch.sched_mask)
 	 * or single instance if no mask. Always valid.
 	 */
 	struct dn_ht	*siht;
 };
 
 
 /*
  * Scheduler instance.
  * Contains variables and all queues relative to a this instance.
  * This struct is created a runtime.
  */
 struct dn_sch_inst {
 	struct dn_flow	ni;	/* oid, flowid and stats */
 	SLIST_ENTRY(dn_sch_inst) si_next; /* hash chain for siht */
 	struct delay_line dline;
 	struct dn_schk *sched;	/* the template */
 	int		kflags;	/* DN_ACTIVE */
 
 	int64_t	credit;		/* bits I can transmit (more or less). */
 	uint64_t sched_time;	/* time link was scheduled in ready_heap */
 	uint64_t idle_time;	/* start of scheduler instance idle time */
 
 	/* q_count is the number of queues that this instance is using.
 	 * The counter is incremented or decremented when
 	 * a reference from the queue is created or deleted.
 	 * It is used to make sure that a scheduler instance can be safely
 	 * deleted by the drain routine. See notes below.
 	 */
 	int q_count;
 
 };
 
 /*
  * NOTE about object drain.
  * The system will automatically (XXX check when) drain queues and
  * scheduler instances when they are idle.
  * A queue is idle when it has no packets; an instance is idle when
  * it is not in the evheap heap, and the corresponding delay line is empty.
  * A queue can be safely deleted when it is idle because of the scheduler
  * function xxx_free_queue() will remove any references to it.
  * An instance can be only deleted when no queues reference it. To be sure
  * of that, a counter (q_count) stores the number of queues that are pointing
  * to the instance.
  *
  * XXX
  * Order of scan:
  * - take all flowset in a bucket for the flowset hash table
  * - take all queues in a bucket for the flowset
  * - increment the queue bucket
  * - scan next flowset bucket
  * Nothing is done if a bucket contains no entries.
  *
  * The same schema is used for sceduler instances
  */
 
 
 /* kernel-side flags. Linux has DN_DELETE in fcntl.h
  */
 enum {
 	/* 1 and 2 are reserved for the SCAN flags */
 	DN_DESTROY	= 0x0004, /* destroy */
 	DN_DELETE_FS	= 0x0008, /* destroy flowset */
 	DN_DETACH	= 0x0010,
 	DN_ACTIVE	= 0x0020, /* object is in evheap */
 	DN_F_DLINE	= 0x0040, /* object is a delay line */
 	DN_DEL_SAFE	= 0x0080, /* delete a queue only if no longer needed
 				   * by scheduler */
 	DN_QHT_IS_Q	= 0x0100, /* in flowset, qht is a single queue */
 };
 
 /*
  * Packets processed by dummynet have an mbuf tag associated with
  * them that carries their dummynet state.
  * Outside dummynet, only the 'rule' field is relevant, and it must
  * be at the beginning of the structure.
  */
 struct dn_pkt_tag {
 	struct ipfw_rule_ref rule;	/* matching rule	*/
 
 	/* second part, dummynet specific */
 	int dn_dir;		/* action when packet comes out.*/
 				/* see ip_fw_private.h		*/
 	uint64_t output_time;	/* when the pkt is due for delivery*/
 	struct ifnet *ifp;	/* interface, for ip_output	*/
 	struct _ip6dn_args ip6opt;	/* XXX ipv6 options	*/
 	uint16_t iphdr_off;	/* IP header offset for mtodo()	*/
 };
 
+/*
+ * Possible values for dn_dir. XXXGL: this needs to be reviewed
+ * and converted to same values ip_fw_args.flags use.
+ */
+enum {
+	DIR_OUT =	0,
+	DIR_IN =	1,
+	DIR_FWD =	2,
+	DIR_DROP =	3,
+	PROTO_LAYER2 =	0x4, /* set for layer 2 */
+	PROTO_IPV4 =	0x08,
+	PROTO_IPV6 =	0x10,
+	PROTO_IFB =	0x0c, /* layer2 + ifbridge */
+};
+
 extern struct dn_parms dn_cfg;
 //VNET_DECLARE(struct dn_parms, _base_dn_cfg);
 //#define dn_cfg	VNET(_base_dn_cfg)
 
-int dummynet_io(struct mbuf **, int , struct ip_fw_args *);
+int dummynet_io(struct mbuf **, struct ip_fw_args *);
 void dummynet_task(void *context, int pending);
 void dn_reschedule(void);
 struct dn_pkt_tag * dn_tag_get(struct mbuf *m);
 
 struct dn_queue *ipdn_q_find(struct dn_fsk *, struct dn_sch_inst *,
         struct ipfw_flow_id *);
 struct dn_sch_inst *ipdn_si_find(struct dn_schk *, struct ipfw_flow_id *);
 
 /*
  * copy_range is a template for requests for ranges of pipes/queues/scheds.
  * The number of ranges is variable and can be derived by o.len.
  * As a default, we use a small number of entries so that the struct
  * fits easily on the stack and is sufficient for most common requests.
  */
 #define DEFAULT_RANGES	5
 struct copy_range {
         struct dn_id o;
         uint32_t	r[ 2 * DEFAULT_RANGES ];
 };
 
 struct copy_args {
 	char **start;
 	char *end;
 	int flags;
 	int type;
 	struct copy_range *extra;	/* extra filtering */
 };
 
 struct sockopt;
 int ip_dummynet_compat(struct sockopt *sopt);
 int dummynet_get(struct sockopt *sopt, void **compat);
 int dn_c_copy_q (void *_ni, void *arg);
 int dn_c_copy_pipe(struct dn_schk *s, struct copy_args *a, int nq);
 int dn_c_copy_fs(struct dn_fsk *f, struct copy_args *a, int nq);
 int dn_compat_copy_queue(struct copy_args *a, void *_o);
 int dn_compat_copy_pipe(struct copy_args *a, void *_o);
 int copy_data_helper_compat(void *_o, void *_arg);
 int dn_compat_calc_size(void);
 int do_config(void *p, int l);
 
 /* function to drain idle object */
 void dn_drain_scheduler(void);
 void dn_drain_queue(void);
 
 #ifdef NEW_AQM
 int ecn_mark(struct mbuf* m);
 
 /* moved from ip_dn_io.c to here to be available for AQMs modules*/
 static inline void
 mq_append(struct mq *q, struct mbuf *m)
 {
 #ifdef USERSPACE
 	// buffers from netmap need to be copied
 	// XXX note that the routine is not expected to fail
 	ND("append %p to %p", m, q);
 	if (m->m_flags & M_STACK) {
 		struct mbuf *m_new;
 		void *p;
 		int l, ofs;
 
 		ofs = m->m_data - m->__m_extbuf;
 		// XXX allocate
 		MGETHDR(m_new, M_NOWAIT, MT_DATA);
 		ND("*** WARNING, volatile buf %p ext %p %d dofs %d m_new %p",
 			m, m->__m_extbuf, m->__m_extlen, ofs, m_new);
 		p = m_new->__m_extbuf;	/* new pointer */
 		l = m_new->__m_extlen;	/* new len */
 		if (l <= m->__m_extlen) {
 			panic("extlen too large");
 		}
 
 		*m_new = *m;	// copy
 		m_new->m_flags &= ~M_STACK;
 		m_new->__m_extbuf = p; // point to new buffer
 		_pkt_copy(m->__m_extbuf, p, m->__m_extlen);
 		m_new->m_data = p + ofs;
 		m = m_new;
 	}
 #endif /* USERSPACE */
 	if (q->head == NULL)
 		q->head = m;
 	else
 		q->tail->m_nextpkt = m;
 	q->count++;
 	q->tail = m;
 	m->m_nextpkt = NULL;
 }
 #endif /* NEW_AQM */
 
 #endif /* _IP_DN_PRIVATE_H */
Index: head/sys/netpfil/ipfw/ip_fw2.c
===================================================================
--- head/sys/netpfil/ipfw/ip_fw2.c	(revision 345164)
+++ head/sys/netpfil/ipfw/ip_fw2.c	(revision 345165)
@@ -1,3446 +1,3448 @@
 /*-
  * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
  *
  * Copyright (c) 2002-2009 Luigi Rizzo, Universita` di Pisa
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 /*
  * The FreeBSD IP packet firewall, main file
  */
 
 #include "opt_ipfw.h"
 #include "opt_ipdivert.h"
 #include "opt_inet.h"
 #ifndef INET
 #error "IPFIREWALL requires INET"
 #endif /* INET */
 #include "opt_inet6.h"
 
 #include <sys/param.h>
 #include <sys/systm.h>
 #include <sys/condvar.h>
 #include <sys/counter.h>
 #include <sys/eventhandler.h>
 #include <sys/malloc.h>
 #include <sys/mbuf.h>
 #include <sys/kernel.h>
 #include <sys/lock.h>
 #include <sys/jail.h>
 #include <sys/module.h>
 #include <sys/priv.h>
 #include <sys/proc.h>
 #include <sys/rwlock.h>
 #include <sys/rmlock.h>
 #include <sys/socket.h>
 #include <sys/socketvar.h>
 #include <sys/sysctl.h>
 #include <sys/syslog.h>
 #include <sys/ucred.h>
 #include <net/ethernet.h> /* for ETHERTYPE_IP */
 #include <net/if.h>
 #include <net/if_var.h>
 #include <net/route.h>
 #include <net/pfil.h>
 #include <net/vnet.h>
 
 #include <netpfil/pf/pf_mtag.h>
 
 #include <netinet/in.h>
 #include <netinet/in_var.h>
 #include <netinet/in_pcb.h>
 #include <netinet/ip.h>
 #include <netinet/ip_var.h>
 #include <netinet/ip_icmp.h>
 #include <netinet/ip_fw.h>
 #include <netinet/ip_carp.h>
 #include <netinet/pim.h>
 #include <netinet/tcp_var.h>
 #include <netinet/udp.h>
 #include <netinet/udp_var.h>
 #include <netinet/sctp.h>
 #include <netinet/sctp_crc32.h>
 #include <netinet/sctp_header.h>
 
 #include <netinet/ip6.h>
 #include <netinet/icmp6.h>
 #include <netinet/in_fib.h>
 #ifdef INET6
 #include <netinet6/in6_fib.h>
 #include <netinet6/in6_pcb.h>
 #include <netinet6/scope6_var.h>
 #include <netinet6/ip6_var.h>
 #endif
 
 #include <net/if_gre.h> /* for struct grehdr */
 
 #include <netpfil/ipfw/ip_fw_private.h>
 
 #include <machine/in_cksum.h>	/* XXX for in_cksum */
 
 #ifdef MAC
 #include <security/mac/mac_framework.h>
 #endif
 
 /*
  * static variables followed by global ones.
  * All ipfw global variables are here.
  */
 
 VNET_DEFINE_STATIC(int, fw_deny_unknown_exthdrs);
 #define	V_fw_deny_unknown_exthdrs	VNET(fw_deny_unknown_exthdrs)
 
 VNET_DEFINE_STATIC(int, fw_permit_single_frag6) = 1;
 #define	V_fw_permit_single_frag6	VNET(fw_permit_single_frag6)
 
 #ifdef IPFIREWALL_DEFAULT_TO_ACCEPT
 static int default_to_accept = 1;
 #else
 static int default_to_accept;
 #endif
 
 VNET_DEFINE(int, autoinc_step);
 VNET_DEFINE(int, fw_one_pass) = 1;
 
 VNET_DEFINE(unsigned int, fw_tables_max);
 VNET_DEFINE(unsigned int, fw_tables_sets) = 0;	/* Don't use set-aware tables */
 /* Use 128 tables by default */
 static unsigned int default_fw_tables = IPFW_TABLES_DEFAULT;
 
 #ifndef LINEAR_SKIPTO
 static int jump_fast(struct ip_fw_chain *chain, struct ip_fw *f, int num,
     int tablearg, int jump_backwards);
 #define	JUMP(ch, f, num, targ, back)	jump_fast(ch, f, num, targ, back)
 #else
 static int jump_linear(struct ip_fw_chain *chain, struct ip_fw *f, int num,
     int tablearg, int jump_backwards);
 #define	JUMP(ch, f, num, targ, back)	jump_linear(ch, f, num, targ, back)
 #endif
 
 /*
  * Each rule belongs to one of 32 different sets (0..31).
  * The variable set_disable contains one bit per set.
  * If the bit is set, all rules in the corresponding set
  * are disabled. Set RESVD_SET(31) is reserved for the default rule
  * and rules that are not deleted by the flush command,
  * and CANNOT be disabled.
  * Rules in set RESVD_SET can only be deleted individually.
  */
 VNET_DEFINE(u_int32_t, set_disable);
 #define	V_set_disable			VNET(set_disable)
 
 VNET_DEFINE(int, fw_verbose);
 /* counter for ipfw_log(NULL...) */
 VNET_DEFINE(u_int64_t, norule_counter);
 VNET_DEFINE(int, verbose_limit);
 
 /* layer3_chain contains the list of rules for layer 3 */
 VNET_DEFINE(struct ip_fw_chain, layer3_chain);
 
 /* ipfw_vnet_ready controls when we are open for business */
 VNET_DEFINE(int, ipfw_vnet_ready) = 0;
 
 VNET_DEFINE(int, ipfw_nat_ready) = 0;
 
 ipfw_nat_t *ipfw_nat_ptr = NULL;
 struct cfg_nat *(*lookup_nat_ptr)(struct nat_list *, int);
 ipfw_nat_cfg_t *ipfw_nat_cfg_ptr;
 ipfw_nat_cfg_t *ipfw_nat_del_ptr;
 ipfw_nat_cfg_t *ipfw_nat_get_cfg_ptr;
 ipfw_nat_cfg_t *ipfw_nat_get_log_ptr;
 
 #ifdef SYSCTL_NODE
 uint32_t dummy_def = IPFW_DEFAULT_RULE;
 static int sysctl_ipfw_table_num(SYSCTL_HANDLER_ARGS);
 static int sysctl_ipfw_tables_sets(SYSCTL_HANDLER_ARGS);
 
 SYSBEGIN(f3)
 
 SYSCTL_NODE(_net_inet_ip, OID_AUTO, fw, CTLFLAG_RW, 0, "Firewall");
 SYSCTL_INT(_net_inet_ip_fw, OID_AUTO, one_pass,
     CTLFLAG_VNET | CTLFLAG_RW | CTLFLAG_SECURE3, &VNET_NAME(fw_one_pass), 0,
     "Only do a single pass through ipfw when using dummynet(4)");
 SYSCTL_INT(_net_inet_ip_fw, OID_AUTO, autoinc_step,
     CTLFLAG_VNET | CTLFLAG_RW, &VNET_NAME(autoinc_step), 0,
     "Rule number auto-increment step");
 SYSCTL_INT(_net_inet_ip_fw, OID_AUTO, verbose,
     CTLFLAG_VNET | CTLFLAG_RW | CTLFLAG_SECURE3, &VNET_NAME(fw_verbose), 0,
     "Log matches to ipfw rules");
 SYSCTL_INT(_net_inet_ip_fw, OID_AUTO, verbose_limit,
     CTLFLAG_VNET | CTLFLAG_RW, &VNET_NAME(verbose_limit), 0,
     "Set upper limit of matches of ipfw rules logged");
 SYSCTL_UINT(_net_inet_ip_fw, OID_AUTO, default_rule, CTLFLAG_RD,
     &dummy_def, 0,
     "The default/max possible rule number.");
 SYSCTL_PROC(_net_inet_ip_fw, OID_AUTO, tables_max,
     CTLFLAG_VNET | CTLTYPE_UINT | CTLFLAG_RW, 0, 0, sysctl_ipfw_table_num, "IU",
     "Maximum number of concurrently used tables");
 SYSCTL_PROC(_net_inet_ip_fw, OID_AUTO, tables_sets,
     CTLFLAG_VNET | CTLTYPE_UINT | CTLFLAG_RW,
     0, 0, sysctl_ipfw_tables_sets, "IU",
     "Use per-set namespace for tables");
 SYSCTL_INT(_net_inet_ip_fw, OID_AUTO, default_to_accept, CTLFLAG_RDTUN,
     &default_to_accept, 0,
     "Make the default rule accept all packets.");
 TUNABLE_INT("net.inet.ip.fw.tables_max", (int *)&default_fw_tables);
 SYSCTL_INT(_net_inet_ip_fw, OID_AUTO, static_count,
     CTLFLAG_VNET | CTLFLAG_RD, &VNET_NAME(layer3_chain.n_rules), 0,
     "Number of static rules");
 
 #ifdef INET6
 SYSCTL_DECL(_net_inet6_ip6);
 SYSCTL_NODE(_net_inet6_ip6, OID_AUTO, fw, CTLFLAG_RW, 0, "Firewall");
 SYSCTL_INT(_net_inet6_ip6_fw, OID_AUTO, deny_unknown_exthdrs,
     CTLFLAG_VNET | CTLFLAG_RW | CTLFLAG_SECURE,
     &VNET_NAME(fw_deny_unknown_exthdrs), 0,
     "Deny packets with unknown IPv6 Extension Headers");
 SYSCTL_INT(_net_inet6_ip6_fw, OID_AUTO, permit_single_frag6,
     CTLFLAG_VNET | CTLFLAG_RW | CTLFLAG_SECURE,
     &VNET_NAME(fw_permit_single_frag6), 0,
     "Permit single packet IPv6 fragments");
 #endif /* INET6 */
 
 SYSEND
 
 #endif /* SYSCTL_NODE */
 
 
 /*
  * Some macros used in the various matching options.
  * L3HDR maps an ipv4 pointer into a layer3 header pointer of type T
  * Other macros just cast void * into the appropriate type
  */
 #define	L3HDR(T, ip)	((T *)((u_int32_t *)(ip) + (ip)->ip_hl))
 #define	TCP(p)		((struct tcphdr *)(p))
 #define	SCTP(p)		((struct sctphdr *)(p))
 #define	UDP(p)		((struct udphdr *)(p))
 #define	ICMP(p)		((struct icmphdr *)(p))
 #define	ICMP6(p)	((struct icmp6_hdr *)(p))
 
 static __inline int
 icmptype_match(struct icmphdr *icmp, ipfw_insn_u32 *cmd)
 {
 	int type = icmp->icmp_type;
 
 	return (type <= ICMP_MAXTYPE && (cmd->d[0] & (1<<type)) );
 }
 
 #define TT	( (1 << ICMP_ECHO) | (1 << ICMP_ROUTERSOLICIT) | \
     (1 << ICMP_TSTAMP) | (1 << ICMP_IREQ) | (1 << ICMP_MASKREQ) )
 
 static int
 is_icmp_query(struct icmphdr *icmp)
 {
 	int type = icmp->icmp_type;
 
 	return (type <= ICMP_MAXTYPE && (TT & (1<<type)) );
 }
 #undef TT
 
 /*
  * The following checks use two arrays of 8 or 16 bits to store the
  * bits that we want set or clear, respectively. They are in the
  * low and high half of cmd->arg1 or cmd->d[0].
  *
  * We scan options and store the bits we find set. We succeed if
  *
  *	(want_set & ~bits) == 0 && (want_clear & ~bits) == want_clear
  *
  * The code is sometimes optimized not to store additional variables.
  */
 
 static int
 flags_match(ipfw_insn *cmd, u_int8_t bits)
 {
 	u_char want_clear;
 	bits = ~bits;
 
 	if ( ((cmd->arg1 & 0xff) & bits) != 0)
 		return 0; /* some bits we want set were clear */
 	want_clear = (cmd->arg1 >> 8) & 0xff;
 	if ( (want_clear & bits) != want_clear)
 		return 0; /* some bits we want clear were set */
 	return 1;
 }
 
 static int
 ipopts_match(struct ip *ip, ipfw_insn *cmd)
 {
 	int optlen, bits = 0;
 	u_char *cp = (u_char *)(ip + 1);
 	int x = (ip->ip_hl << 2) - sizeof (struct ip);
 
 	for (; x > 0; x -= optlen, cp += optlen) {
 		int opt = cp[IPOPT_OPTVAL];
 
 		if (opt == IPOPT_EOL)
 			break;
 		if (opt == IPOPT_NOP)
 			optlen = 1;
 		else {
 			optlen = cp[IPOPT_OLEN];
 			if (optlen <= 0 || optlen > x)
 				return 0; /* invalid or truncated */
 		}
 		switch (opt) {
 
 		default:
 			break;
 
 		case IPOPT_LSRR:
 			bits |= IP_FW_IPOPT_LSRR;
 			break;
 
 		case IPOPT_SSRR:
 			bits |= IP_FW_IPOPT_SSRR;
 			break;
 
 		case IPOPT_RR:
 			bits |= IP_FW_IPOPT_RR;
 			break;
 
 		case IPOPT_TS:
 			bits |= IP_FW_IPOPT_TS;
 			break;
 		}
 	}
 	return (flags_match(cmd, bits));
 }
 
 static int
 tcpopts_match(struct tcphdr *tcp, ipfw_insn *cmd)
 {
 	int optlen, bits = 0;
 	u_char *cp = (u_char *)(tcp + 1);
 	int x = (tcp->th_off << 2) - sizeof(struct tcphdr);
 
 	for (; x > 0; x -= optlen, cp += optlen) {
 		int opt = cp[0];
 		if (opt == TCPOPT_EOL)
 			break;
 		if (opt == TCPOPT_NOP)
 			optlen = 1;
 		else {
 			optlen = cp[1];
 			if (optlen <= 0)
 				break;
 		}
 
 		switch (opt) {
 
 		default:
 			break;
 
 		case TCPOPT_MAXSEG:
 			bits |= IP_FW_TCPOPT_MSS;
 			break;
 
 		case TCPOPT_WINDOW:
 			bits |= IP_FW_TCPOPT_WINDOW;
 			break;
 
 		case TCPOPT_SACK_PERMITTED:
 		case TCPOPT_SACK:
 			bits |= IP_FW_TCPOPT_SACK;
 			break;
 
 		case TCPOPT_TIMESTAMP:
 			bits |= IP_FW_TCPOPT_TS;
 			break;
 
 		}
 	}
 	return (flags_match(cmd, bits));
 }
 
 static int
 iface_match(struct ifnet *ifp, ipfw_insn_if *cmd, struct ip_fw_chain *chain,
     uint32_t *tablearg)
 {
 
 	if (ifp == NULL)	/* no iface with this packet, match fails */
 		return (0);
 
 	/* Check by name or by IP address */
 	if (cmd->name[0] != '\0') { /* match by name */
 		if (cmd->name[0] == '\1') /* use tablearg to match */
 			return ipfw_lookup_table(chain, cmd->p.kidx, 0,
 			    &ifp->if_index, tablearg);
 		/* Check name */
 		if (cmd->p.glob) {
 			if (fnmatch(cmd->name, ifp->if_xname, 0) == 0)
 				return(1);
 		} else {
 			if (strncmp(ifp->if_xname, cmd->name, IFNAMSIZ) == 0)
 				return(1);
 		}
 	} else {
 #if !defined(USERSPACE) && defined(__FreeBSD__)	/* and OSX too ? */
 		struct ifaddr *ia;
 
 		if_addr_rlock(ifp);
 		CK_STAILQ_FOREACH(ia, &ifp->if_addrhead, ifa_link) {
 			if (ia->ifa_addr->sa_family != AF_INET)
 				continue;
 			if (cmd->p.ip.s_addr == ((struct sockaddr_in *)
 			    (ia->ifa_addr))->sin_addr.s_addr) {
 				if_addr_runlock(ifp);
 				return(1);	/* match */
 			}
 		}
 		if_addr_runlock(ifp);
 #endif /* __FreeBSD__ */
 	}
 	return(0);	/* no match, fail ... */
 }
 
 /*
  * The verify_path function checks if a route to the src exists and
  * if it is reachable via ifp (when provided).
  * 
  * The 'verrevpath' option checks that the interface that an IP packet
  * arrives on is the same interface that traffic destined for the
  * packet's source address would be routed out of.
  * The 'versrcreach' option just checks that the source address is
  * reachable via any route (except default) in the routing table.
  * These two are a measure to block forged packets. This is also
  * commonly known as "anti-spoofing" or Unicast Reverse Path
  * Forwarding (Unicast RFP) in Cisco-ese. The name of the knobs
  * is purposely reminiscent of the Cisco IOS command,
  *
  *   ip verify unicast reverse-path
  *   ip verify unicast source reachable-via any
  *
  * which implements the same functionality. But note that the syntax
  * is misleading, and the check may be performed on all IP packets
  * whether unicast, multicast, or broadcast.
  */
 static int
 verify_path(struct in_addr src, struct ifnet *ifp, u_int fib)
 {
 #if defined(USERSPACE) || !defined(__FreeBSD__)
 	return 0;
 #else
 	struct nhop4_basic nh4;
 
 	if (fib4_lookup_nh_basic(fib, src, NHR_IFAIF, 0, &nh4) != 0)
 		return (0);
 
 	/*
 	 * If ifp is provided, check for equality with rtentry.
 	 * We should use rt->rt_ifa->ifa_ifp, instead of rt->rt_ifp,
 	 * in order to pass packets injected back by if_simloop():
 	 * routing entry (via lo0) for our own address
 	 * may exist, so we need to handle routing assymetry.
 	 */
 	if (ifp != NULL && ifp != nh4.nh_ifp)
 		return (0);
 
 	/* if no ifp provided, check if rtentry is not default route */
 	if (ifp == NULL && (nh4.nh_flags & NHF_DEFAULT) != 0)
 		return (0);
 
 	/* or if this is a blackhole/reject route */
 	if (ifp == NULL && (nh4.nh_flags & (NHF_REJECT|NHF_BLACKHOLE)) != 0)
 		return (0);
 
 	/* found valid route */
 	return 1;
 #endif /* __FreeBSD__ */
 }
 
 /*
  * Generate an SCTP packet containing an ABORT chunk. The verification tag
  * is given by vtag. The T-bit is set in the ABORT chunk if and only if
  * reflected is not 0.
  */
 
 static struct mbuf *
 ipfw_send_abort(struct mbuf *replyto, struct ipfw_flow_id *id, u_int32_t vtag,
     int reflected)
 {
 	struct mbuf *m;
 	struct ip *ip;
 #ifdef INET6
 	struct ip6_hdr *ip6;
 #endif
 	struct sctphdr *sctp;
 	struct sctp_chunkhdr *chunk;
 	u_int16_t hlen, plen, tlen;
 
 	MGETHDR(m, M_NOWAIT, MT_DATA);
 	if (m == NULL)
 		return (NULL);
 
 	M_SETFIB(m, id->fib);
 #ifdef MAC
 	if (replyto != NULL)
 		mac_netinet_firewall_reply(replyto, m);
 	else
 		mac_netinet_firewall_send(m);
 #else
 	(void)replyto;		/* don't warn about unused arg */
 #endif
 
 	switch (id->addr_type) {
 	case 4:
 		hlen = sizeof(struct ip);
 		break;
 #ifdef INET6
 	case 6:
 		hlen = sizeof(struct ip6_hdr);
 		break;
 #endif
 	default:
 		/* XXX: log me?!? */
 		FREE_PKT(m);
 		return (NULL);
 	}
 	plen = sizeof(struct sctphdr) + sizeof(struct sctp_chunkhdr);
 	tlen = hlen + plen;
 	m->m_data += max_linkhdr;
 	m->m_flags |= M_SKIP_FIREWALL;
 	m->m_pkthdr.len = m->m_len = tlen;
 	m->m_pkthdr.rcvif = NULL;
 	bzero(m->m_data, tlen);
 
 	switch (id->addr_type) {
 	case 4:
 		ip = mtod(m, struct ip *);
 
 		ip->ip_v = 4;
 		ip->ip_hl = sizeof(struct ip) >> 2;
 		ip->ip_tos = IPTOS_LOWDELAY;
 		ip->ip_len = htons(tlen);
 		ip->ip_id = htons(0);
 		ip->ip_off = htons(0);
 		ip->ip_ttl = V_ip_defttl;
 		ip->ip_p = IPPROTO_SCTP;
 		ip->ip_sum = 0;
 		ip->ip_src.s_addr = htonl(id->dst_ip);
 		ip->ip_dst.s_addr = htonl(id->src_ip);
 
 		sctp = (struct sctphdr *)(ip + 1);
 		break;
 #ifdef INET6
 	case 6:
 		ip6 = mtod(m, struct ip6_hdr *);
 
 		ip6->ip6_vfc = IPV6_VERSION;
 		ip6->ip6_plen = htons(plen);
 		ip6->ip6_nxt = IPPROTO_SCTP;
 		ip6->ip6_hlim = IPV6_DEFHLIM;
 		ip6->ip6_src = id->dst_ip6;
 		ip6->ip6_dst = id->src_ip6;
 
 		sctp = (struct sctphdr *)(ip6 + 1);
 		break;
 #endif
 	}
 
 	sctp->src_port = htons(id->dst_port);
 	sctp->dest_port = htons(id->src_port);
 	sctp->v_tag = htonl(vtag);
 	sctp->checksum = htonl(0);
 
 	chunk = (struct sctp_chunkhdr *)(sctp + 1);
 	chunk->chunk_type = SCTP_ABORT_ASSOCIATION;
 	chunk->chunk_flags = 0;
 	if (reflected != 0) {
 		chunk->chunk_flags |= SCTP_HAD_NO_TCB;
 	}
 	chunk->chunk_length = htons(sizeof(struct sctp_chunkhdr));
 
 	sctp->checksum = sctp_calculate_cksum(m, hlen);
 
 	return (m);
 }
 
 /*
  * Generate a TCP packet, containing either a RST or a keepalive.
  * When flags & TH_RST, we are sending a RST packet, because of a
  * "reset" action matched the packet.
  * Otherwise we are sending a keepalive, and flags & TH_
  * The 'replyto' mbuf is the mbuf being replied to, if any, and is required
  * so that MAC can label the reply appropriately.
  */
 struct mbuf *
 ipfw_send_pkt(struct mbuf *replyto, struct ipfw_flow_id *id, u_int32_t seq,
     u_int32_t ack, int flags)
 {
 	struct mbuf *m = NULL;		/* stupid compiler */
 	struct ip *h = NULL;		/* stupid compiler */
 #ifdef INET6
 	struct ip6_hdr *h6 = NULL;
 #endif
 	struct tcphdr *th = NULL;
 	int len, dir;
 
 	MGETHDR(m, M_NOWAIT, MT_DATA);
 	if (m == NULL)
 		return (NULL);
 
 	M_SETFIB(m, id->fib);
 #ifdef MAC
 	if (replyto != NULL)
 		mac_netinet_firewall_reply(replyto, m);
 	else
 		mac_netinet_firewall_send(m);
 #else
 	(void)replyto;		/* don't warn about unused arg */
 #endif
 
 	switch (id->addr_type) {
 	case 4:
 		len = sizeof(struct ip) + sizeof(struct tcphdr);
 		break;
 #ifdef INET6
 	case 6:
 		len = sizeof(struct ip6_hdr) + sizeof(struct tcphdr);
 		break;
 #endif
 	default:
 		/* XXX: log me?!? */
 		FREE_PKT(m);
 		return (NULL);
 	}
 	dir = ((flags & (TH_SYN | TH_RST)) == TH_SYN);
 
 	m->m_data += max_linkhdr;
 	m->m_flags |= M_SKIP_FIREWALL;
 	m->m_pkthdr.len = m->m_len = len;
 	m->m_pkthdr.rcvif = NULL;
 	bzero(m->m_data, len);
 
 	switch (id->addr_type) {
 	case 4:
 		h = mtod(m, struct ip *);
 
 		/* prepare for checksum */
 		h->ip_p = IPPROTO_TCP;
 		h->ip_len = htons(sizeof(struct tcphdr));
 		if (dir) {
 			h->ip_src.s_addr = htonl(id->src_ip);
 			h->ip_dst.s_addr = htonl(id->dst_ip);
 		} else {
 			h->ip_src.s_addr = htonl(id->dst_ip);
 			h->ip_dst.s_addr = htonl(id->src_ip);
 		}
 
 		th = (struct tcphdr *)(h + 1);
 		break;
 #ifdef INET6
 	case 6:
 		h6 = mtod(m, struct ip6_hdr *);
 
 		/* prepare for checksum */
 		h6->ip6_nxt = IPPROTO_TCP;
 		h6->ip6_plen = htons(sizeof(struct tcphdr));
 		if (dir) {
 			h6->ip6_src = id->src_ip6;
 			h6->ip6_dst = id->dst_ip6;
 		} else {
 			h6->ip6_src = id->dst_ip6;
 			h6->ip6_dst = id->src_ip6;
 		}
 
 		th = (struct tcphdr *)(h6 + 1);
 		break;
 #endif
 	}
 
 	if (dir) {
 		th->th_sport = htons(id->src_port);
 		th->th_dport = htons(id->dst_port);
 	} else {
 		th->th_sport = htons(id->dst_port);
 		th->th_dport = htons(id->src_port);
 	}
 	th->th_off = sizeof(struct tcphdr) >> 2;
 
 	if (flags & TH_RST) {
 		if (flags & TH_ACK) {
 			th->th_seq = htonl(ack);
 			th->th_flags = TH_RST;
 		} else {
 			if (flags & TH_SYN)
 				seq++;
 			th->th_ack = htonl(seq);
 			th->th_flags = TH_RST | TH_ACK;
 		}
 	} else {
 		/*
 		 * Keepalive - use caller provided sequence numbers
 		 */
 		th->th_seq = htonl(seq);
 		th->th_ack = htonl(ack);
 		th->th_flags = TH_ACK;
 	}
 
 	switch (id->addr_type) {
 	case 4:
 		th->th_sum = in_cksum(m, len);
 
 		/* finish the ip header */
 		h->ip_v = 4;
 		h->ip_hl = sizeof(*h) >> 2;
 		h->ip_tos = IPTOS_LOWDELAY;
 		h->ip_off = htons(0);
 		h->ip_len = htons(len);
 		h->ip_ttl = V_ip_defttl;
 		h->ip_sum = 0;
 		break;
 #ifdef INET6
 	case 6:
 		th->th_sum = in6_cksum(m, IPPROTO_TCP, sizeof(*h6),
 		    sizeof(struct tcphdr));
 
 		/* finish the ip6 header */
 		h6->ip6_vfc |= IPV6_VERSION;
 		h6->ip6_hlim = IPV6_DEFHLIM;
 		break;
 #endif
 	}
 
 	return (m);
 }
 
 #ifdef INET6
 /*
  * ipv6 specific rules here...
  */
 static __inline int
 icmp6type_match (int type, ipfw_insn_u32 *cmd)
 {
 	return (type <= ICMP6_MAXTYPE && (cmd->d[type/32] & (1<<(type%32)) ) );
 }
 
 static int
 flow6id_match( int curr_flow, ipfw_insn_u32 *cmd )
 {
 	int i;
 	for (i=0; i <= cmd->o.arg1; ++i )
 		if (curr_flow == cmd->d[i] )
 			return 1;
 	return 0;
 }
 
 /* support for IP6_*_ME opcodes */
 static const struct in6_addr lla_mask = {{{
 	0xff, 0xff, 0x00, 0x00, 0xff, 0xff, 0xff, 0xff,
 	0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff
 }}};
 
 static int
 ipfw_localip6(struct in6_addr *in6)
 {
 	struct rm_priotracker in6_ifa_tracker;
 	struct in6_ifaddr *ia;
 
 	if (IN6_IS_ADDR_MULTICAST(in6))
 		return (0);
 
 	if (!IN6_IS_ADDR_LINKLOCAL(in6))
 		return (in6_localip(in6));
 
 	IN6_IFADDR_RLOCK(&in6_ifa_tracker);
 	CK_STAILQ_FOREACH(ia, &V_in6_ifaddrhead, ia_link) {
 		if (!IN6_IS_ADDR_LINKLOCAL(&ia->ia_addr.sin6_addr))
 			continue;
 		if (IN6_ARE_MASKED_ADDR_EQUAL(&ia->ia_addr.sin6_addr,
 		    in6, &lla_mask)) {
 			IN6_IFADDR_RUNLOCK(&in6_ifa_tracker);
 			return (1);
 		}
 	}
 	IN6_IFADDR_RUNLOCK(&in6_ifa_tracker);
 	return (0);
 }
 
 static int
 verify_path6(struct in6_addr *src, struct ifnet *ifp, u_int fib)
 {
 	struct nhop6_basic nh6;
 
 	if (IN6_IS_SCOPE_LINKLOCAL(src))
 		return (1);
 
 	if (fib6_lookup_nh_basic(fib, src, 0, NHR_IFAIF, 0, &nh6) != 0)
 		return (0);
 
 	/* If ifp is provided, check for equality with route table. */
 	if (ifp != NULL && ifp != nh6.nh_ifp)
 		return (0);
 
 	/* if no ifp provided, check if rtentry is not default route */
 	if (ifp == NULL && (nh6.nh_flags & NHF_DEFAULT) != 0)
 		return (0);
 
 	/* or if this is a blackhole/reject route */
 	if (ifp == NULL && (nh6.nh_flags & (NHF_REJECT|NHF_BLACKHOLE)) != 0)
 		return (0);
 
 	/* found valid route */
 	return 1;
 }
 
 static int
 is_icmp6_query(int icmp6_type)
 {
 	if ((icmp6_type <= ICMP6_MAXTYPE) &&
 	    (icmp6_type == ICMP6_ECHO_REQUEST ||
 	    icmp6_type == ICMP6_MEMBERSHIP_QUERY ||
 	    icmp6_type == ICMP6_WRUREQUEST ||
 	    icmp6_type == ICMP6_FQDN_QUERY ||
 	    icmp6_type == ICMP6_NI_QUERY))
 		return (1);
 
 	return (0);
 }
 
 static int
 map_icmp_unreach(int code)
 {
 
 	/* RFC 7915 p4.2 */
 	switch (code) {
 	case ICMP_UNREACH_NET:
 	case ICMP_UNREACH_HOST:
 	case ICMP_UNREACH_SRCFAIL:
 	case ICMP_UNREACH_NET_UNKNOWN:
 	case ICMP_UNREACH_HOST_UNKNOWN:
 	case ICMP_UNREACH_TOSNET:
 	case ICMP_UNREACH_TOSHOST:
 		return (ICMP6_DST_UNREACH_NOROUTE);
 	case ICMP_UNREACH_PORT:
 		return (ICMP6_DST_UNREACH_NOPORT);
 	default:
 		/*
 		 * Map the rest of codes into admit prohibited.
 		 * XXX: unreach proto should be mapped into ICMPv6
 		 * parameter problem, but we use only unreach type.
 		 */
 		return (ICMP6_DST_UNREACH_ADMIN);
 	}
 }
 
 static void
 send_reject6(struct ip_fw_args *args, int code, u_int hlen, struct ip6_hdr *ip6)
 {
 	struct mbuf *m;
 
 	m = args->m;
 	if (code == ICMP6_UNREACH_RST && args->f_id.proto == IPPROTO_TCP) {
 		struct tcphdr *tcp;
 		tcp = (struct tcphdr *)((char *)ip6 + hlen);
 
 		if ((tcp->th_flags & TH_RST) == 0) {
 			struct mbuf *m0;
 			m0 = ipfw_send_pkt(args->m, &(args->f_id),
 			    ntohl(tcp->th_seq), ntohl(tcp->th_ack),
 			    tcp->th_flags | TH_RST);
 			if (m0 != NULL)
 				ip6_output(m0, NULL, NULL, 0, NULL, NULL,
 				    NULL);
 		}
 		FREE_PKT(m);
 	} else if (code == ICMP6_UNREACH_ABORT &&
 	    args->f_id.proto == IPPROTO_SCTP) {
 		struct mbuf *m0;
 		struct sctphdr *sctp;
 		u_int32_t v_tag;
 		int reflected;
 
 		sctp = (struct sctphdr *)((char *)ip6 + hlen);
 		reflected = 1;
 		v_tag = ntohl(sctp->v_tag);
 		/* Investigate the first chunk header if available */
 		if (m->m_len >= hlen + sizeof(struct sctphdr) +
 		    sizeof(struct sctp_chunkhdr)) {
 			struct sctp_chunkhdr *chunk;
 
 			chunk = (struct sctp_chunkhdr *)(sctp + 1);
 			switch (chunk->chunk_type) {
 			case SCTP_INITIATION:
 				/*
 				 * Packets containing an INIT chunk MUST have
 				 * a zero v-tag.
 				 */
 				if (v_tag != 0) {
 					v_tag = 0;
 					break;
 				}
 				/* INIT chunk MUST NOT be bundled */
 				if (m->m_pkthdr.len >
 				    hlen + sizeof(struct sctphdr) +
 				    ntohs(chunk->chunk_length) + 3) {
 					break;
 				}
 				/* Use the initiate tag if available */
 				if ((m->m_len >= hlen + sizeof(struct sctphdr) +
 				    sizeof(struct sctp_chunkhdr) +
 				    offsetof(struct sctp_init, a_rwnd))) {
 					struct sctp_init *init;
 
 					init = (struct sctp_init *)(chunk + 1);
 					v_tag = ntohl(init->initiate_tag);
 					reflected = 0;
 				}
 				break;
 			case SCTP_ABORT_ASSOCIATION:
 				/*
 				 * If the packet contains an ABORT chunk, don't
 				 * reply.
 				 * XXX: We should search through all chunks,
 				 *      but don't do to avoid attacks.
 				 */
 				v_tag = 0;
 				break;
 			}
 		}
 		if (v_tag == 0) {
 			m0 = NULL;
 		} else {
 			m0 = ipfw_send_abort(args->m, &(args->f_id), v_tag,
 			    reflected);
 		}
 		if (m0 != NULL)
 			ip6_output(m0, NULL, NULL, 0, NULL, NULL, NULL);
 		FREE_PKT(m);
 	} else if (code != ICMP6_UNREACH_RST && code != ICMP6_UNREACH_ABORT) {
 		/* Send an ICMPv6 unreach. */
 #if 0
 		/*
 		 * Unlike above, the mbufs need to line up with the ip6 hdr,
 		 * as the contents are read. We need to m_adj() the
 		 * needed amount.
 		 * The mbuf will however be thrown away so we can adjust it.
 		 * Remember we did an m_pullup on it already so we
 		 * can make some assumptions about contiguousness.
 		 */
 		if (args->L3offset)
 			m_adj(m, args->L3offset);
 #endif
 		icmp6_error(m, ICMP6_DST_UNREACH, code, 0);
 	} else
 		FREE_PKT(m);
 
 	args->m = NULL;
 }
 
 #endif /* INET6 */
 
 
 /*
  * sends a reject message, consuming the mbuf passed as an argument.
  */
 static void
 send_reject(struct ip_fw_args *args, int code, int iplen, struct ip *ip)
 {
 
 #if 0
 	/* XXX When ip is not guaranteed to be at mtod() we will
 	 * need to account for this */
 	 * The mbuf will however be thrown away so we can adjust it.
 	 * Remember we did an m_pullup on it already so we
 	 * can make some assumptions about contiguousness.
 	 */
 	if (args->L3offset)
 		m_adj(m, args->L3offset);
 #endif
 	if (code != ICMP_REJECT_RST && code != ICMP_REJECT_ABORT) {
 		/* Send an ICMP unreach */
 		icmp_error(args->m, ICMP_UNREACH, code, 0L, 0);
 	} else if (code == ICMP_REJECT_RST && args->f_id.proto == IPPROTO_TCP) {
 		struct tcphdr *const tcp =
 		    L3HDR(struct tcphdr, mtod(args->m, struct ip *));
 		if ( (tcp->th_flags & TH_RST) == 0) {
 			struct mbuf *m;
 			m = ipfw_send_pkt(args->m, &(args->f_id),
 				ntohl(tcp->th_seq), ntohl(tcp->th_ack),
 				tcp->th_flags | TH_RST);
 			if (m != NULL)
 				ip_output(m, NULL, NULL, 0, NULL, NULL);
 		}
 		FREE_PKT(args->m);
 	} else if (code == ICMP_REJECT_ABORT &&
 	    args->f_id.proto == IPPROTO_SCTP) {
 		struct mbuf *m;
 		struct sctphdr *sctp;
 		struct sctp_chunkhdr *chunk;
 		struct sctp_init *init;
 		u_int32_t v_tag;
 		int reflected;
 
 		sctp = L3HDR(struct sctphdr, mtod(args->m, struct ip *));
 		reflected = 1;
 		v_tag = ntohl(sctp->v_tag);
 		if (iplen >= (ip->ip_hl << 2) + sizeof(struct sctphdr) +
 		    sizeof(struct sctp_chunkhdr)) {
 			/* Look at the first chunk header if available */
 			chunk = (struct sctp_chunkhdr *)(sctp + 1);
 			switch (chunk->chunk_type) {
 			case SCTP_INITIATION:
 				/*
 				 * Packets containing an INIT chunk MUST have
 				 * a zero v-tag.
 				 */
 				if (v_tag != 0) {
 					v_tag = 0;
 					break;
 				}
 				/* INIT chunk MUST NOT be bundled */
 				if (iplen >
 				    (ip->ip_hl << 2) + sizeof(struct sctphdr) +
 				    ntohs(chunk->chunk_length) + 3) {
 					break;
 				}
 				/* Use the initiate tag if available */
 				if ((iplen >= (ip->ip_hl << 2) +
 				    sizeof(struct sctphdr) +
 				    sizeof(struct sctp_chunkhdr) +
 				    offsetof(struct sctp_init, a_rwnd))) {
 					init = (struct sctp_init *)(chunk + 1);
 					v_tag = ntohl(init->initiate_tag);
 					reflected = 0;
 				}
 				break;
 			case SCTP_ABORT_ASSOCIATION:
 				/*
 				 * If the packet contains an ABORT chunk, don't
 				 * reply.
 				 * XXX: We should search through all chunks,
 				 * but don't do to avoid attacks.
 				 */
 				v_tag = 0;
 				break;
 			}
 		}
 		if (v_tag == 0) {
 			m = NULL;
 		} else {
 			m = ipfw_send_abort(args->m, &(args->f_id), v_tag,
 			    reflected);
 		}
 		if (m != NULL)
 			ip_output(m, NULL, NULL, 0, NULL, NULL);
 		FREE_PKT(args->m);
 	} else
 		FREE_PKT(args->m);
 	args->m = NULL;
 }
 
 /*
  * Support for uid/gid/jail lookup. These tests are expensive
  * (because we may need to look into the list of active sockets)
  * so we cache the results. ugid_lookupp is 0 if we have not
  * yet done a lookup, 1 if we succeeded, and -1 if we tried
  * and failed. The function always returns the match value.
  * We could actually spare the variable and use *uc, setting
  * it to '(void *)check_uidgid if we have no info, NULL if
  * we tried and failed, or any other value if successful.
  */
 static int
 check_uidgid(ipfw_insn_u32 *insn, struct ip_fw_args *args, int *ugid_lookupp,
     struct ucred **uc)
 {
 #if defined(USERSPACE)
 	return 0;	// not supported in userspace
 #else
 #ifndef __FreeBSD__
 	/* XXX */
 	return cred_check(insn, proto, oif,
 	    dst_ip, dst_port, src_ip, src_port,
 	    (struct bsd_ucred *)uc, ugid_lookupp, ((struct mbuf *)inp)->m_skb);
 #else  /* FreeBSD */
 	struct in_addr src_ip, dst_ip;
 	struct inpcbinfo *pi;
 	struct ipfw_flow_id *id;
 	struct inpcb *pcb, *inp;
 	int lookupflags;
 	int match;
 
 	id = &args->f_id;
 	inp = args->inp;
 
 	/*
 	 * Check to see if the UDP or TCP stack supplied us with
 	 * the PCB. If so, rather then holding a lock and looking
 	 * up the PCB, we can use the one that was supplied.
 	 */
 	if (inp && *ugid_lookupp == 0) {
 		INP_LOCK_ASSERT(inp);
 		if (inp->inp_socket != NULL) {
 			*uc = crhold(inp->inp_cred);
 			*ugid_lookupp = 1;
 		} else
 			*ugid_lookupp = -1;
 	}
 	/*
 	 * If we have already been here and the packet has no
 	 * PCB entry associated with it, then we can safely
 	 * assume that this is a no match.
 	 */
 	if (*ugid_lookupp == -1)
 		return (0);
 	if (id->proto == IPPROTO_TCP) {
 		lookupflags = 0;
 		pi = &V_tcbinfo;
 	} else if (id->proto == IPPROTO_UDP) {
 		lookupflags = INPLOOKUP_WILDCARD;
 		pi = &V_udbinfo;
 	} else if (id->proto == IPPROTO_UDPLITE) {
 		lookupflags = INPLOOKUP_WILDCARD;
 		pi = &V_ulitecbinfo;
 	} else
 		return 0;
 	lookupflags |= INPLOOKUP_RLOCKPCB;
 	match = 0;
 	if (*ugid_lookupp == 0) {
 		if (id->addr_type == 6) {
 #ifdef INET6
 			if (args->flags & IPFW_ARGS_IN)
 				pcb = in6_pcblookup_mbuf(pi,
 				    &id->src_ip6, htons(id->src_port),
 				    &id->dst_ip6, htons(id->dst_port),
 				    lookupflags, NULL, args->m);
 			else
 				pcb = in6_pcblookup_mbuf(pi,
 				    &id->dst_ip6, htons(id->dst_port),
 				    &id->src_ip6, htons(id->src_port),
 				    lookupflags, args->ifp, args->m);
 #else
 			*ugid_lookupp = -1;
 			return (0);
 #endif
 		} else {
 			src_ip.s_addr = htonl(id->src_ip);
 			dst_ip.s_addr = htonl(id->dst_ip);
 			if (args->flags & IPFW_ARGS_IN)
 				pcb = in_pcblookup_mbuf(pi,
 				    src_ip, htons(id->src_port),
 				    dst_ip, htons(id->dst_port),
 				    lookupflags, NULL, args->m);
 			else
 				pcb = in_pcblookup_mbuf(pi,
 				    dst_ip, htons(id->dst_port),
 				    src_ip, htons(id->src_port),
 				    lookupflags, args->ifp, args->m);
 		}
 		if (pcb != NULL) {
 			INP_RLOCK_ASSERT(pcb);
 			*uc = crhold(pcb->inp_cred);
 			*ugid_lookupp = 1;
 			INP_RUNLOCK(pcb);
 		}
 		if (*ugid_lookupp == 0) {
 			/*
 			 * We tried and failed, set the variable to -1
 			 * so we will not try again on this packet.
 			 */
 			*ugid_lookupp = -1;
 			return (0);
 		}
 	}
 	if (insn->o.opcode == O_UID)
 		match = ((*uc)->cr_uid == (uid_t)insn->d[0]);
 	else if (insn->o.opcode == O_GID)
 		match = groupmember((gid_t)insn->d[0], *uc);
 	else if (insn->o.opcode == O_JAIL)
 		match = ((*uc)->cr_prison->pr_id == (int)insn->d[0]);
 	return (match);
 #endif /* __FreeBSD__ */
 #endif /* not supported in userspace */
 }
 
 /*
  * Helper function to set args with info on the rule after the matching
  * one. slot is precise, whereas we guess rule_id as they are
  * assigned sequentially.
  */
 static inline void
 set_match(struct ip_fw_args *args, int slot,
 	struct ip_fw_chain *chain)
 {
 	args->rule.chain_id = chain->id;
 	args->rule.slot = slot + 1; /* we use 0 as a marker */
 	args->rule.rule_id = 1 + chain->map[slot]->id;
 	args->rule.rulenum = chain->map[slot]->rulenum;
 	args->flags |= IPFW_ARGS_REF;
 }
 
 #ifndef LINEAR_SKIPTO
 /*
  * Helper function to enable cached rule lookups using
  * cached_id and cached_pos fields in ipfw rule.
  */
 static int
 jump_fast(struct ip_fw_chain *chain, struct ip_fw *f, int num,
     int tablearg, int jump_backwards)
 {
 	int f_pos;
 
 	/* If possible use cached f_pos (in f->cached_pos),
 	 * whose version is written in f->cached_id
 	 * (horrible hacks to avoid changing the ABI).
 	 */
 	if (num != IP_FW_TARG && f->cached_id == chain->id)
 		f_pos = f->cached_pos;
 	else {
 		int i = IP_FW_ARG_TABLEARG(chain, num, skipto);
 		/* make sure we do not jump backward */
 		if (jump_backwards == 0 && i <= f->rulenum)
 			i = f->rulenum + 1;
 		if (chain->idxmap != NULL)
 			f_pos = chain->idxmap[i];
 		else
 			f_pos = ipfw_find_rule(chain, i, 0);
 		/* update the cache */
 		if (num != IP_FW_TARG) {
 			f->cached_id = chain->id;
 			f->cached_pos = f_pos;
 		}
 	}
 
 	return (f_pos);
 }
 #else
 /*
  * Helper function to enable real fast rule lookups.
  */
 static int
 jump_linear(struct ip_fw_chain *chain, struct ip_fw *f, int num,
     int tablearg, int jump_backwards)
 {
 	int f_pos;
 
 	num = IP_FW_ARG_TABLEARG(chain, num, skipto);
 	/* make sure we do not jump backward */
 	if (jump_backwards == 0 && num <= f->rulenum)
 		num = f->rulenum + 1;
 	f_pos = chain->idxmap[num];
 
 	return (f_pos);
 }
 #endif
 
 #define	TARG(k, f)	IP_FW_ARG_TABLEARG(chain, k, f)
 /*
  * The main check routine for the firewall.
  *
  * All arguments are in args so we can modify them and return them
  * back to the caller.
  *
  * Parameters:
  *
  *	args->m	(in/out) The packet; we set to NULL when/if we nuke it.
  *		Starts with the IP header.
  *	args->eh (in)	Mac header if present, NULL for layer3 packet.
  *	args->L3offset	Number of bytes bypassed if we came from L2.
  *			e.g. often sizeof(eh)  ** NOTYET **
  *	args->ifp	Incoming or outgoing interface.
  *	args->divert_rule (in/out)
  *		Skip up to the first rule past this rule number;
  *		upon return, non-zero port number for divert or tee.
  *
  *	args->rule	Pointer to the last matching rule (in/out)
  *	args->next_hop	Socket we are forwarding to (out).
  *	args->next_hop6	IPv6 next hop we are forwarding to (out).
  *	args->f_id	Addresses grabbed from the packet (out)
  * 	args->rule.info	a cookie depending on rule action
  *
  * Return value:
  *
  *	IP_FW_PASS	the packet must be accepted
  *	IP_FW_DENY	the packet must be dropped
  *	IP_FW_DIVERT	divert packet, port in m_tag
  *	IP_FW_TEE	tee packet, port in m_tag
  *	IP_FW_DUMMYNET	to dummynet, pipe in args->cookie
  *	IP_FW_NETGRAPH	into netgraph, cookie args->cookie
  *		args->rule contains the matching rule,
  *		args->rule.info has additional information.
  *
  */
 int
 ipfw_chk(struct ip_fw_args *args)
 {
 
 	/*
 	 * Local variables holding state while processing a packet:
 	 *
 	 * IMPORTANT NOTE: to speed up the processing of rules, there
 	 * are some assumption on the values of the variables, which
 	 * are documented here. Should you change them, please check
 	 * the implementation of the various instructions to make sure
 	 * that they still work.
 	 *
 	 * args->eh	The MAC header. It is non-null for a layer2
 	 *	packet, it is NULL for a layer-3 packet.
 	 * **notyet**
 	 * args->L3offset Offset in the packet to the L3 (IP or equiv.) header.
 	 *
 	 * m | args->m	Pointer to the mbuf, as received from the caller.
 	 *	It may change if ipfw_chk() does an m_pullup, or if it
 	 *	consumes the packet because it calls send_reject().
 	 *	XXX This has to change, so that ipfw_chk() never modifies
 	 *	or consumes the buffer.
 	 * ip	is the beginning of the ip(4 or 6) header.
 	 *	Calculated by adding the L3offset to the start of data.
 	 *	(Until we start using L3offset, the packet is
 	 *	supposed to start with the ip header).
 	 */
 	struct mbuf *m = args->m;
 	struct ip *ip = mtod(m, struct ip *);
 
 	/*
 	 * For rules which contain uid/gid or jail constraints, cache
 	 * a copy of the users credentials after the pcb lookup has been
 	 * executed. This will speed up the processing of rules with
 	 * these types of constraints, as well as decrease contention
 	 * on pcb related locks.
 	 */
 #ifndef __FreeBSD__
 	struct bsd_ucred ucred_cache;
 #else
 	struct ucred *ucred_cache = NULL;
 #endif
 	int ucred_lookup = 0;
 	int f_pos = 0;		/* index of current rule in the array */
 	int retval = 0;
 	struct ifnet *oif, *iif;
 
 	/*
 	 * hlen	The length of the IP header.
 	 */
 	u_int hlen = 0;		/* hlen >0 means we have an IP pkt */
 
 	/*
 	 * offset	The offset of a fragment. offset != 0 means that
 	 *	we have a fragment at this offset of an IPv4 packet.
 	 *	offset == 0 means that (if this is an IPv4 packet)
 	 *	this is the first or only fragment.
 	 *	For IPv6 offset|ip6f_mf == 0 means there is no Fragment Header
 	 *	or there is a single packet fragment (fragment header added
 	 *	without needed).  We will treat a single packet fragment as if
 	 *	there was no fragment header (or log/block depending on the
 	 *	V_fw_permit_single_frag6 sysctl setting).
 	 */
 	u_short offset = 0;
 	u_short ip6f_mf = 0;
 
 	/*
 	 * Local copies of addresses. They are only valid if we have
 	 * an IP packet.
 	 *
 	 * proto	The protocol. Set to 0 for non-ip packets,
 	 *	or to the protocol read from the packet otherwise.
 	 *	proto != 0 means that we have an IPv4 packet.
 	 *
 	 * src_port, dst_port	port numbers, in HOST format. Only
 	 *	valid for TCP and UDP packets.
 	 *
 	 * src_ip, dst_ip	ip addresses, in NETWORK format.
 	 *	Only valid for IPv4 packets.
 	 */
 	uint8_t proto;
 	uint16_t src_port, dst_port;		/* NOTE: host format	*/
 	struct in_addr src_ip, dst_ip;		/* NOTE: network format	*/
 	int iplen = 0;
 	int pktlen;
 	uint16_t etype;			/* Host order stored ether type */
 
 	struct ipfw_dyn_info dyn_info;
 	struct ip_fw *q = NULL;
 	struct ip_fw_chain *chain = &V_layer3_chain;
 
 	/*
 	 * We store in ulp a pointer to the upper layer protocol header.
 	 * In the ipv4 case this is easy to determine from the header,
 	 * but for ipv6 we might have some additional headers in the middle.
 	 * ulp is NULL if not found.
 	 */
 	void *ulp = NULL;		/* upper layer protocol pointer. */
 
 	/* XXX ipv6 variables */
 	int is_ipv6 = 0;
 	uint8_t	icmp6_type = 0;
 	uint16_t ext_hd = 0;	/* bits vector for extension header filtering */
 	/* end of ipv6 variables */
 
 	int is_ipv4 = 0;
 
 	int done = 0;		/* flag to exit the outer loop */
 	IPFW_RLOCK_TRACKER;
 
 	if (m->m_flags & M_SKIP_FIREWALL || (! V_ipfw_vnet_ready))
 		return (IP_FW_PASS);	/* accept */
 
 	dst_ip.s_addr = 0;		/* make sure it is initialized */
 	src_ip.s_addr = 0;		/* make sure it is initialized */
 	src_port = dst_port = 0;
 	pktlen = m->m_pkthdr.len;
 
 	DYN_INFO_INIT(&dyn_info);
 /*
  * PULLUP_TO(len, p, T) makes sure that len + sizeof(T) is contiguous,
  * then it sets p to point at the offset "len" in the mbuf. WARNING: the
  * pointer might become stale after other pullups (but we never use it
  * this way).
  */
 #define PULLUP_TO(_len, p, T)	PULLUP_LEN(_len, p, sizeof(T))
 #define PULLUP_LEN(_len, p, T)					\
 do {								\
 	int x = (_len) + T;					\
 	if ((m)->m_len < x) {					\
 		args->m = m = m_pullup(m, x);			\
 		if (m == NULL)					\
 			goto pullup_failed;			\
 	}							\
 	p = (mtod(m, char *) + (_len));				\
 } while (0)
 
 	/*
 	 * if we have an ether header,
 	 */
 	if (args->flags & IPFW_ARGS_ETHER)
 		etype = ntohs(args->eh->ether_type);
 	else
 		etype = 0;
 
 	/* Identify IP packets and fill up variables. */
 	if (pktlen >= sizeof(struct ip6_hdr) &&
 	    (etype == 0 || etype == ETHERTYPE_IPV6) && ip->ip_v == 6) {
 		struct ip6_hdr *ip6 = (struct ip6_hdr *)ip;
 
 		is_ipv6 = 1;
+		args->flags |= IPFW_ARGS_IP6;
 		hlen = sizeof(struct ip6_hdr);
 		proto = ip6->ip6_nxt;
 		/* Search extension headers to find upper layer protocols */
 		while (ulp == NULL && offset == 0) {
 			switch (proto) {
 			case IPPROTO_ICMPV6:
 				PULLUP_TO(hlen, ulp, struct icmp6_hdr);
 				icmp6_type = ICMP6(ulp)->icmp6_type;
 				break;
 
 			case IPPROTO_TCP:
 				PULLUP_TO(hlen, ulp, struct tcphdr);
 				dst_port = TCP(ulp)->th_dport;
 				src_port = TCP(ulp)->th_sport;
 				/* save flags for dynamic rules */
 				args->f_id._flags = TCP(ulp)->th_flags;
 				break;
 
 			case IPPROTO_SCTP:
 				if (pktlen >= hlen + sizeof(struct sctphdr) +
 				    sizeof(struct sctp_chunkhdr) +
 				    offsetof(struct sctp_init, a_rwnd))
 					PULLUP_LEN(hlen, ulp,
 					    sizeof(struct sctphdr) +
 					    sizeof(struct sctp_chunkhdr) +
 					    offsetof(struct sctp_init, a_rwnd));
 				else if (pktlen >= hlen + sizeof(struct sctphdr))
 					PULLUP_LEN(hlen, ulp, pktlen - hlen);
 				else
 					PULLUP_LEN(hlen, ulp,
 					    sizeof(struct sctphdr));
 				src_port = SCTP(ulp)->src_port;
 				dst_port = SCTP(ulp)->dest_port;
 				break;
 
 			case IPPROTO_UDP:
 			case IPPROTO_UDPLITE:
 				PULLUP_TO(hlen, ulp, struct udphdr);
 				dst_port = UDP(ulp)->uh_dport;
 				src_port = UDP(ulp)->uh_sport;
 				break;
 
 			case IPPROTO_HOPOPTS:	/* RFC 2460 */
 				PULLUP_TO(hlen, ulp, struct ip6_hbh);
 				ext_hd |= EXT_HOPOPTS;
 				hlen += (((struct ip6_hbh *)ulp)->ip6h_len + 1) << 3;
 				proto = ((struct ip6_hbh *)ulp)->ip6h_nxt;
 				ulp = NULL;
 				break;
 
 			case IPPROTO_ROUTING:	/* RFC 2460 */
 				PULLUP_TO(hlen, ulp, struct ip6_rthdr);
 				switch (((struct ip6_rthdr *)ulp)->ip6r_type) {
 				case 0:
 					ext_hd |= EXT_RTHDR0;
 					break;
 				case 2:
 					ext_hd |= EXT_RTHDR2;
 					break;
 				default:
 					if (V_fw_verbose)
 						printf("IPFW2: IPV6 - Unknown "
 						    "Routing Header type(%d)\n",
 						    ((struct ip6_rthdr *)
 						    ulp)->ip6r_type);
 					if (V_fw_deny_unknown_exthdrs)
 					    return (IP_FW_DENY);
 					break;
 				}
 				ext_hd |= EXT_ROUTING;
 				hlen += (((struct ip6_rthdr *)ulp)->ip6r_len + 1) << 3;
 				proto = ((struct ip6_rthdr *)ulp)->ip6r_nxt;
 				ulp = NULL;
 				break;
 
 			case IPPROTO_FRAGMENT:	/* RFC 2460 */
 				PULLUP_TO(hlen, ulp, struct ip6_frag);
 				ext_hd |= EXT_FRAGMENT;
 				hlen += sizeof (struct ip6_frag);
 				proto = ((struct ip6_frag *)ulp)->ip6f_nxt;
 				offset = ((struct ip6_frag *)ulp)->ip6f_offlg &
 					IP6F_OFF_MASK;
 				ip6f_mf = ((struct ip6_frag *)ulp)->ip6f_offlg &
 					IP6F_MORE_FRAG;
 				if (V_fw_permit_single_frag6 == 0 &&
 				    offset == 0 && ip6f_mf == 0) {
 					if (V_fw_verbose)
 						printf("IPFW2: IPV6 - Invalid "
 						    "Fragment Header\n");
 					if (V_fw_deny_unknown_exthdrs)
 					    return (IP_FW_DENY);
 					break;
 				}
 				args->f_id.extra =
 				    ntohl(((struct ip6_frag *)ulp)->ip6f_ident);
 				ulp = NULL;
 				break;
 
 			case IPPROTO_DSTOPTS:	/* RFC 2460 */
 				PULLUP_TO(hlen, ulp, struct ip6_hbh);
 				ext_hd |= EXT_DSTOPTS;
 				hlen += (((struct ip6_hbh *)ulp)->ip6h_len + 1) << 3;
 				proto = ((struct ip6_hbh *)ulp)->ip6h_nxt;
 				ulp = NULL;
 				break;
 
 			case IPPROTO_AH:	/* RFC 2402 */
 				PULLUP_TO(hlen, ulp, struct ip6_ext);
 				ext_hd |= EXT_AH;
 				hlen += (((struct ip6_ext *)ulp)->ip6e_len + 2) << 2;
 				proto = ((struct ip6_ext *)ulp)->ip6e_nxt;
 				ulp = NULL;
 				break;
 
 			case IPPROTO_ESP:	/* RFC 2406 */
 				PULLUP_TO(hlen, ulp, uint32_t);	/* SPI, Seq# */
 				/* Anything past Seq# is variable length and
 				 * data past this ext. header is encrypted. */
 				ext_hd |= EXT_ESP;
 				break;
 
 			case IPPROTO_NONE:	/* RFC 2460 */
 				/*
 				 * Packet ends here, and IPv6 header has
 				 * already been pulled up. If ip6e_len!=0
 				 * then octets must be ignored.
 				 */
 				ulp = ip; /* non-NULL to get out of loop. */
 				break;
 
 			case IPPROTO_OSPFIGP:
 				/* XXX OSPF header check? */
 				PULLUP_TO(hlen, ulp, struct ip6_ext);
 				break;
 
 			case IPPROTO_PIM:
 				/* XXX PIM header check? */
 				PULLUP_TO(hlen, ulp, struct pim);
 				break;
 
 			case IPPROTO_GRE:	/* RFC 1701 */
 				/* XXX GRE header check? */
 				PULLUP_TO(hlen, ulp, struct grehdr);
 				break;
 
 			case IPPROTO_CARP:
 				PULLUP_TO(hlen, ulp, offsetof(
 				    struct carp_header, carp_counter));
 				if (CARP_ADVERTISEMENT !=
 				    ((struct carp_header *)ulp)->carp_type)
 					return (IP_FW_DENY);
 				break;
 
 			case IPPROTO_IPV6:	/* RFC 2893 */
 				PULLUP_TO(hlen, ulp, struct ip6_hdr);
 				break;
 
 			case IPPROTO_IPV4:	/* RFC 2893 */
 				PULLUP_TO(hlen, ulp, struct ip);
 				break;
 
 			default:
 				if (V_fw_verbose)
 					printf("IPFW2: IPV6 - Unknown "
 					    "Extension Header(%d), ext_hd=%x\n",
 					     proto, ext_hd);
 				if (V_fw_deny_unknown_exthdrs)
 				    return (IP_FW_DENY);
 				PULLUP_TO(hlen, ulp, struct ip6_ext);
 				break;
 			} /*switch */
 		}
 		ip = mtod(m, struct ip *);
 		ip6 = (struct ip6_hdr *)ip;
 		args->f_id.addr_type = 6;
 		args->f_id.src_ip6 = ip6->ip6_src;
 		args->f_id.dst_ip6 = ip6->ip6_dst;
 		args->f_id.flow_id6 = ntohl(ip6->ip6_flow);
 		iplen = ntohs(ip6->ip6_plen) + sizeof(*ip6);
 	} else if (pktlen >= sizeof(struct ip) &&
 	    (etype == 0 || etype == ETHERTYPE_IP) && ip->ip_v == 4) {
 		is_ipv4 = 1;
+		args->flags |= IPFW_ARGS_IP4;
 		hlen = ip->ip_hl << 2;
 		/*
 		 * Collect parameters into local variables for faster
 		 * matching.
 		 */
 		proto = ip->ip_p;
 		src_ip = ip->ip_src;
 		dst_ip = ip->ip_dst;
 		offset = ntohs(ip->ip_off) & IP_OFFMASK;
 		iplen = ntohs(ip->ip_len);
 
 		if (offset == 0) {
 			switch (proto) {
 			case IPPROTO_TCP:
 				PULLUP_TO(hlen, ulp, struct tcphdr);
 				dst_port = TCP(ulp)->th_dport;
 				src_port = TCP(ulp)->th_sport;
 				/* save flags for dynamic rules */
 				args->f_id._flags = TCP(ulp)->th_flags;
 				break;
 
 			case IPPROTO_SCTP:
 				if (pktlen >= hlen + sizeof(struct sctphdr) +
 				    sizeof(struct sctp_chunkhdr) +
 				    offsetof(struct sctp_init, a_rwnd))
 					PULLUP_LEN(hlen, ulp,
 					    sizeof(struct sctphdr) +
 					    sizeof(struct sctp_chunkhdr) +
 					    offsetof(struct sctp_init, a_rwnd));
 				else if (pktlen >= hlen + sizeof(struct sctphdr))
 					PULLUP_LEN(hlen, ulp, pktlen - hlen);
 				else
 					PULLUP_LEN(hlen, ulp,
 					    sizeof(struct sctphdr));
 				src_port = SCTP(ulp)->src_port;
 				dst_port = SCTP(ulp)->dest_port;
 				break;
 
 			case IPPROTO_UDP:
 			case IPPROTO_UDPLITE:
 				PULLUP_TO(hlen, ulp, struct udphdr);
 				dst_port = UDP(ulp)->uh_dport;
 				src_port = UDP(ulp)->uh_sport;
 				break;
 
 			case IPPROTO_ICMP:
 				PULLUP_TO(hlen, ulp, struct icmphdr);
 				//args->f_id.flags = ICMP(ulp)->icmp_type;
 				break;
 
 			default:
 				break;
 			}
 		}
 
 		ip = mtod(m, struct ip *);
 		args->f_id.addr_type = 4;
 		args->f_id.src_ip = ntohl(src_ip.s_addr);
 		args->f_id.dst_ip = ntohl(dst_ip.s_addr);
 	} else {
 		proto = 0;
 		dst_ip.s_addr = src_ip.s_addr = 0;
 
 		args->f_id.addr_type = 1; /* XXX */
 	}
 #undef PULLUP_TO
 	pktlen = iplen < pktlen ? iplen: pktlen;
 
 	/* Properly initialize the rest of f_id */
 	args->f_id.proto = proto;
 	args->f_id.src_port = src_port = ntohs(src_port);
 	args->f_id.dst_port = dst_port = ntohs(dst_port);
 	args->f_id.fib = M_GETFIB(m);
 
 	IPFW_PF_RLOCK(chain);
 	if (! V_ipfw_vnet_ready) { /* shutting down, leave NOW. */
 		IPFW_PF_RUNLOCK(chain);
 		return (IP_FW_PASS);	/* accept */
 	}
 	if (args->flags & IPFW_ARGS_REF) {
 		/*
 		 * Packet has already been tagged as a result of a previous
 		 * match on rule args->rule aka args->rule_id (PIPE, QUEUE,
 		 * REASS, NETGRAPH, DIVERT/TEE...)
 		 * Validate the slot and continue from the next one
 		 * if still present, otherwise do a lookup.
 		 */
 		f_pos = (args->rule.chain_id == chain->id) ?
 		    args->rule.slot :
 		    ipfw_find_rule(chain, args->rule.rulenum,
 			args->rule.rule_id);
 	} else {
 		f_pos = 0;
 	}
 
 	if (args->flags & IPFW_ARGS_IN) {
 		iif = args->ifp;
 		oif = NULL;
 	} else {
 		MPASS(args->flags & IPFW_ARGS_OUT);
 		iif = m->m_pkthdr.rcvif;
 		oif = args->ifp;
 	}
 
 	/*
 	 * Now scan the rules, and parse microinstructions for each rule.
 	 * We have two nested loops and an inner switch. Sometimes we
 	 * need to break out of one or both loops, or re-enter one of
 	 * the loops with updated variables. Loop variables are:
 	 *
 	 *	f_pos (outer loop) points to the current rule.
 	 *		On output it points to the matching rule.
 	 *	done (outer loop) is used as a flag to break the loop.
 	 *	l (inner loop)	residual length of current rule.
 	 *		cmd points to the current microinstruction.
 	 *
 	 * We break the inner loop by setting l=0 and possibly
 	 * cmdlen=0 if we don't want to advance cmd.
 	 * We break the outer loop by setting done=1
 	 * We can restart the inner loop by setting l>0 and f_pos, f, cmd
 	 * as needed.
 	 */
 	for (; f_pos < chain->n_rules; f_pos++) {
 		ipfw_insn *cmd;
 		uint32_t tablearg = 0;
 		int l, cmdlen, skip_or; /* skip rest of OR block */
 		struct ip_fw *f;
 
 		f = chain->map[f_pos];
 		if (V_set_disable & (1 << f->set) )
 			continue;
 
 		skip_or = 0;
 		for (l = f->cmd_len, cmd = f->cmd ; l > 0 ;
 		    l -= cmdlen, cmd += cmdlen) {
 			int match;
 
 			/*
 			 * check_body is a jump target used when we find a
 			 * CHECK_STATE, and need to jump to the body of
 			 * the target rule.
 			 */
 
 /* check_body: */
 			cmdlen = F_LEN(cmd);
 			/*
 			 * An OR block (insn_1 || .. || insn_n) has the
 			 * F_OR bit set in all but the last instruction.
 			 * The first match will set "skip_or", and cause
 			 * the following instructions to be skipped until
 			 * past the one with the F_OR bit clear.
 			 */
 			if (skip_or) {		/* skip this instruction */
 				if ((cmd->len & F_OR) == 0)
 					skip_or = 0;	/* next one is good */
 				continue;
 			}
 			match = 0; /* set to 1 if we succeed */
 
 			switch (cmd->opcode) {
 			/*
 			 * The first set of opcodes compares the packet's
 			 * fields with some pattern, setting 'match' if a
 			 * match is found. At the end of the loop there is
 			 * logic to deal with F_NOT and F_OR flags associated
 			 * with the opcode.
 			 */
 			case O_NOP:
 				match = 1;
 				break;
 
 			case O_FORWARD_MAC:
 				printf("ipfw: opcode %d unimplemented\n",
 				    cmd->opcode);
 				break;
 
 			case O_GID:
 			case O_UID:
 			case O_JAIL:
 				/*
 				 * We only check offset == 0 && proto != 0,
 				 * as this ensures that we have a
 				 * packet with the ports info.
 				 */
 				if (offset != 0)
 					break;
 				if (proto == IPPROTO_TCP ||
 				    proto == IPPROTO_UDP ||
 				    proto == IPPROTO_UDPLITE)
 					match = check_uidgid(
 						    (ipfw_insn_u32 *)cmd,
 						    args, &ucred_lookup,
 #ifdef __FreeBSD__
 						    &ucred_cache);
 #else
 						    (void *)&ucred_cache);
 #endif
 				break;
 
 			case O_RECV:
 				match = iface_match(iif, (ipfw_insn_if *)cmd,
 				    chain, &tablearg);
 				break;
 
 			case O_XMIT:
 				match = iface_match(oif, (ipfw_insn_if *)cmd,
 				    chain, &tablearg);
 				break;
 
 			case O_VIA:
 				match = iface_match(args->ifp,
 				    (ipfw_insn_if *)cmd, chain, &tablearg);
 				break;
 
 			case O_MACADDR2:
 				if (args->flags & IPFW_ARGS_ETHER) {
 					u_int32_t *want = (u_int32_t *)
 						((ipfw_insn_mac *)cmd)->addr;
 					u_int32_t *mask = (u_int32_t *)
 						((ipfw_insn_mac *)cmd)->mask;
 					u_int32_t *hdr = (u_int32_t *)args->eh;
 
 					match =
 					    ( want[0] == (hdr[0] & mask[0]) &&
 					      want[1] == (hdr[1] & mask[1]) &&
 					      want[2] == (hdr[2] & mask[2]) );
 				}
 				break;
 
 			case O_MAC_TYPE:
 				if (args->flags & IPFW_ARGS_ETHER) {
 					u_int16_t *p =
 					    ((ipfw_insn_u16 *)cmd)->ports;
 					int i;
 
 					for (i = cmdlen - 1; !match && i>0;
 					    i--, p += 2)
 						match = (etype >= p[0] &&
 						    etype <= p[1]);
 				}
 				break;
 
 			case O_FRAG:
 				match = (offset != 0);
 				break;
 
 			case O_IN:	/* "out" is "not in" */
 				match = (oif == NULL);
 				break;
 
 			case O_LAYER2:
 				match = (args->flags & IPFW_ARGS_ETHER);
 				break;
 
 			case O_DIVERTED:
 				if ((args->flags & IPFW_ARGS_REF) == 0)
 					break;
 				/*
 				 * For diverted packets, args->rule.info
 				 * contains the divert port (in host format)
 				 * reason and direction.
 				 */
 				match = ((args->rule.info & IPFW_IS_MASK) ==
 				    IPFW_IS_DIVERT) && (
 				    ((args->rule.info & IPFW_INFO_IN) ?
 					1: 2) & cmd->arg1);
 				break;
 
 			case O_PROTO:
 				/*
 				 * We do not allow an arg of 0 so the
 				 * check of "proto" only suffices.
 				 */
 				match = (proto == cmd->arg1);
 				break;
 
 			case O_IP_SRC:
 				match = is_ipv4 &&
 				    (((ipfw_insn_ip *)cmd)->addr.s_addr ==
 				    src_ip.s_addr);
 				break;
 
 			case O_IP_DST_LOOKUP:
 			{
 				void *pkey;
 				uint32_t vidx, key;
 				uint16_t keylen;
 
 				if (cmdlen > F_INSN_SIZE(ipfw_insn_u32)) {
 					/* Determine lookup key type */
 					vidx = ((ipfw_insn_u32 *)cmd)->d[1];
 					if (vidx != 4 /* uid */ &&
 					    vidx != 5 /* jail */ &&
 					    is_ipv6 == 0 && is_ipv4 == 0)
 						break;
 					/* Determine key length */
 					if (vidx == 0 /* dst-ip */ ||
 					    vidx == 1 /* src-ip */)
 						keylen = is_ipv6 ?
 						    sizeof(struct in6_addr):
 						    sizeof(in_addr_t);
 					else {
 						keylen = sizeof(key);
 						pkey = &key;
 					}
 					if (vidx == 0 /* dst-ip */)
 						pkey = is_ipv4 ? (void *)&dst_ip:
 						    (void *)&args->f_id.dst_ip6;
 					else if (vidx == 1 /* src-ip */)
 						pkey = is_ipv4 ? (void *)&src_ip:
 						    (void *)&args->f_id.src_ip6;
 					else if (vidx == 6 /* dscp */) {
 						if (is_ipv4)
 							key = ip->ip_tos >> 2;
 						else {
 							key = args->f_id.flow_id6;
 							key = (key & 0x0f) << 2 |
 							    (key & 0xf000) >> 14;
 						}
 						key &= 0x3f;
 					} else if (vidx == 2 /* dst-port */ ||
 					    vidx == 3 /* src-port */) {
 						/* Skip fragments */
 						if (offset != 0)
 							break;
 						/* Skip proto without ports */
 						if (proto != IPPROTO_TCP &&
 						    proto != IPPROTO_UDP &&
 						    proto != IPPROTO_UDPLITE &&
 						    proto != IPPROTO_SCTP)
 							break;
 						if (vidx == 2 /* dst-port */)
 							key = dst_port;
 						else
 							key = src_port;
 					}
 #ifndef USERSPACE
 					else if (vidx == 4 /* uid */ ||
 					    vidx == 5 /* jail */) {
 						check_uidgid(
 						    (ipfw_insn_u32 *)cmd,
 						    args, &ucred_lookup,
 #ifdef __FreeBSD__
 						    &ucred_cache);
 						if (vidx == 4 /* uid */)
 							key = ucred_cache->cr_uid;
 						else if (vidx == 5 /* jail */)
 							key = ucred_cache->cr_prison->pr_id;
 #else /* !__FreeBSD__ */
 						    (void *)&ucred_cache);
 						if (vidx == 4 /* uid */)
 							key = ucred_cache.uid;
 						else if (vidx == 5 /* jail */)
 							key = ucred_cache.xid;
 #endif /* !__FreeBSD__ */
 					}
 #endif /* !USERSPACE */
 					else
 						break;
 					match = ipfw_lookup_table(chain,
 					    cmd->arg1, keylen, pkey, &vidx);
 					if (!match)
 						break;
 					tablearg = vidx;
 					break;
 				}
 				/* cmdlen =< F_INSN_SIZE(ipfw_insn_u32) */
 				/* FALLTHROUGH */
 			}
 			case O_IP_SRC_LOOKUP:
 			{
 				void *pkey;
 				uint32_t vidx;
 				uint16_t keylen;
 
 				if (is_ipv4) {
 					keylen = sizeof(in_addr_t);
 					if (cmd->opcode == O_IP_DST_LOOKUP)
 						pkey = &dst_ip;
 					else
 						pkey = &src_ip;
 				} else if (is_ipv6) {
 					keylen = sizeof(struct in6_addr);
 					if (cmd->opcode == O_IP_DST_LOOKUP)
 						pkey = &args->f_id.dst_ip6;
 					else
 						pkey = &args->f_id.src_ip6;
 				} else
 					break;
 				match = ipfw_lookup_table(chain, cmd->arg1,
 				    keylen, pkey, &vidx);
 				if (!match)
 					break;
 				if (cmdlen == F_INSN_SIZE(ipfw_insn_u32)) {
 					match = ((ipfw_insn_u32 *)cmd)->d[0] ==
 					    TARG_VAL(chain, vidx, tag);
 					if (!match)
 						break;
 				}
 				tablearg = vidx;
 				break;
 			}
 
 			case O_IP_FLOW_LOOKUP:
 				{
 					uint32_t v = 0;
 					match = ipfw_lookup_table(chain,
 					    cmd->arg1, 0, &args->f_id, &v);
 					if (cmdlen == F_INSN_SIZE(ipfw_insn_u32))
 						match = ((ipfw_insn_u32 *)cmd)->d[0] ==
 						    TARG_VAL(chain, v, tag);
 					if (match)
 						tablearg = v;
 				}
 				break;
 			case O_IP_SRC_MASK:
 			case O_IP_DST_MASK:
 				if (is_ipv4) {
 				    uint32_t a =
 					(cmd->opcode == O_IP_DST_MASK) ?
 					    dst_ip.s_addr : src_ip.s_addr;
 				    uint32_t *p = ((ipfw_insn_u32 *)cmd)->d;
 				    int i = cmdlen-1;
 
 				    for (; !match && i>0; i-= 2, p+= 2)
 					match = (p[0] == (a & p[1]));
 				}
 				break;
 
 			case O_IP_SRC_ME:
 				if (is_ipv4) {
 					match = in_localip(src_ip);
 					break;
 				}
 #ifdef INET6
 				/* FALLTHROUGH */
 			case O_IP6_SRC_ME:
 				match = is_ipv6 &&
 				    ipfw_localip6(&args->f_id.src_ip6);
 #endif
 				break;
 
 			case O_IP_DST_SET:
 			case O_IP_SRC_SET:
 				if (is_ipv4) {
 					u_int32_t *d = (u_int32_t *)(cmd+1);
 					u_int32_t addr =
 					    cmd->opcode == O_IP_DST_SET ?
 						args->f_id.dst_ip :
 						args->f_id.src_ip;
 
 					    if (addr < d[0])
 						    break;
 					    addr -= d[0]; /* subtract base */
 					    match = (addr < cmd->arg1) &&
 						( d[ 1 + (addr>>5)] &
 						  (1<<(addr & 0x1f)) );
 				}
 				break;
 
 			case O_IP_DST:
 				match = is_ipv4 &&
 				    (((ipfw_insn_ip *)cmd)->addr.s_addr ==
 				    dst_ip.s_addr);
 				break;
 
 			case O_IP_DST_ME:
 				if (is_ipv4) {
 					match = in_localip(dst_ip);
 					break;
 				}
 #ifdef INET6
 				/* FALLTHROUGH */
 			case O_IP6_DST_ME:
 				match = is_ipv6 &&
 				    ipfw_localip6(&args->f_id.dst_ip6);
 #endif
 				break;
 
 
 			case O_IP_SRCPORT:
 			case O_IP_DSTPORT:
 				/*
 				 * offset == 0 && proto != 0 is enough
 				 * to guarantee that we have a
 				 * packet with port info.
 				 */
 				if ((proto == IPPROTO_UDP ||
 				    proto == IPPROTO_UDPLITE ||
 				    proto == IPPROTO_TCP ||
 				    proto == IPPROTO_SCTP) && offset == 0) {
 					u_int16_t x =
 					    (cmd->opcode == O_IP_SRCPORT) ?
 						src_port : dst_port ;
 					u_int16_t *p =
 					    ((ipfw_insn_u16 *)cmd)->ports;
 					int i;
 
 					for (i = cmdlen - 1; !match && i>0;
 					    i--, p += 2)
 						match = (x>=p[0] && x<=p[1]);
 				}
 				break;
 
 			case O_ICMPTYPE:
 				match = (offset == 0 && proto==IPPROTO_ICMP &&
 				    icmptype_match(ICMP(ulp), (ipfw_insn_u32 *)cmd) );
 				break;
 
 #ifdef INET6
 			case O_ICMP6TYPE:
 				match = is_ipv6 && offset == 0 &&
 				    proto==IPPROTO_ICMPV6 &&
 				    icmp6type_match(
 					ICMP6(ulp)->icmp6_type,
 					(ipfw_insn_u32 *)cmd);
 				break;
 #endif /* INET6 */
 
 			case O_IPOPT:
 				match = (is_ipv4 &&
 				    ipopts_match(ip, cmd) );
 				break;
 
 			case O_IPVER:
 				match = (is_ipv4 &&
 				    cmd->arg1 == ip->ip_v);
 				break;
 
 			case O_IPID:
 			case O_IPLEN:
 			case O_IPTTL:
 				if (is_ipv4) {	/* only for IP packets */
 				    uint16_t x;
 				    uint16_t *p;
 				    int i;
 
 				    if (cmd->opcode == O_IPLEN)
 					x = iplen;
 				    else if (cmd->opcode == O_IPTTL)
 					x = ip->ip_ttl;
 				    else /* must be IPID */
 					x = ntohs(ip->ip_id);
 				    if (cmdlen == 1) {
 					match = (cmd->arg1 == x);
 					break;
 				    }
 				    /* otherwise we have ranges */
 				    p = ((ipfw_insn_u16 *)cmd)->ports;
 				    i = cmdlen - 1;
 				    for (; !match && i>0; i--, p += 2)
 					match = (x >= p[0] && x <= p[1]);
 				}
 				break;
 
 			case O_IPPRECEDENCE:
 				match = (is_ipv4 &&
 				    (cmd->arg1 == (ip->ip_tos & 0xe0)) );
 				break;
 
 			case O_IPTOS:
 				match = (is_ipv4 &&
 				    flags_match(cmd, ip->ip_tos));
 				break;
 
 			case O_DSCP:
 			    {
 				uint32_t *p;
 				uint16_t x;
 
 				p = ((ipfw_insn_u32 *)cmd)->d;
 
 				if (is_ipv4)
 					x = ip->ip_tos >> 2;
 				else if (is_ipv6) {
 					uint8_t *v;
 					v = &((struct ip6_hdr *)ip)->ip6_vfc;
 					x = (*v & 0x0F) << 2;
 					v++;
 					x |= *v >> 6;
 				} else
 					break;
 
 				/* DSCP bitmask is stored as low_u32 high_u32 */
 				if (x >= 32)
 					match = *(p + 1) & (1 << (x - 32));
 				else
 					match = *p & (1 << x);
 			    }
 				break;
 
 			case O_TCPDATALEN:
 				if (proto == IPPROTO_TCP && offset == 0) {
 				    struct tcphdr *tcp;
 				    uint16_t x;
 				    uint16_t *p;
 				    int i;
 #ifdef INET6
 				    if (is_ipv6) {
 					    struct ip6_hdr *ip6;
 
 					    ip6 = (struct ip6_hdr *)ip;
 					    if (ip6->ip6_plen == 0) {
 						    /*
 						     * Jumbo payload is not
 						     * supported by this
 						     * opcode.
 						     */
 						    break;
 					    }
 					    x = iplen - hlen;
 				    } else
 #endif /* INET6 */
 					    x = iplen - (ip->ip_hl << 2);
 				    tcp = TCP(ulp);
 				    x -= tcp->th_off << 2;
 				    if (cmdlen == 1) {
 					match = (cmd->arg1 == x);
 					break;
 				    }
 				    /* otherwise we have ranges */
 				    p = ((ipfw_insn_u16 *)cmd)->ports;
 				    i = cmdlen - 1;
 				    for (; !match && i>0; i--, p += 2)
 					match = (x >= p[0] && x <= p[1]);
 				}
 				break;
 
 			case O_TCPFLAGS:
 				match = (proto == IPPROTO_TCP && offset == 0 &&
 				    flags_match(cmd, TCP(ulp)->th_flags));
 				break;
 
 			case O_TCPOPTS:
 				if (proto == IPPROTO_TCP && offset == 0 && ulp){
 					PULLUP_LEN(hlen, ulp,
 					    (TCP(ulp)->th_off << 2));
 					match = tcpopts_match(TCP(ulp), cmd);
 				}
 				break;
 
 			case O_TCPSEQ:
 				match = (proto == IPPROTO_TCP && offset == 0 &&
 				    ((ipfw_insn_u32 *)cmd)->d[0] ==
 					TCP(ulp)->th_seq);
 				break;
 
 			case O_TCPACK:
 				match = (proto == IPPROTO_TCP && offset == 0 &&
 				    ((ipfw_insn_u32 *)cmd)->d[0] ==
 					TCP(ulp)->th_ack);
 				break;
 
 			case O_TCPWIN:
 				if (proto == IPPROTO_TCP && offset == 0) {
 				    uint16_t x;
 				    uint16_t *p;
 				    int i;
 
 				    x = ntohs(TCP(ulp)->th_win);
 				    if (cmdlen == 1) {
 					match = (cmd->arg1 == x);
 					break;
 				    }
 				    /* Otherwise we have ranges. */
 				    p = ((ipfw_insn_u16 *)cmd)->ports;
 				    i = cmdlen - 1;
 				    for (; !match && i > 0; i--, p += 2)
 					match = (x >= p[0] && x <= p[1]);
 				}
 				break;
 
 			case O_ESTAB:
 				/* reject packets which have SYN only */
 				/* XXX should i also check for TH_ACK ? */
 				match = (proto == IPPROTO_TCP && offset == 0 &&
 				    (TCP(ulp)->th_flags &
 				     (TH_RST | TH_ACK | TH_SYN)) != TH_SYN);
 				break;
 
 			case O_ALTQ: {
 				struct pf_mtag *at;
 				struct m_tag *mtag;
 				ipfw_insn_altq *altq = (ipfw_insn_altq *)cmd;
 
 				/*
 				 * ALTQ uses mbuf tags from another
 				 * packet filtering system - pf(4).
 				 * We allocate a tag in its format
 				 * and fill it in, pretending to be pf(4).
 				 */
 				match = 1;
 				at = pf_find_mtag(m);
 				if (at != NULL && at->qid != 0)
 					break;
 				mtag = m_tag_get(PACKET_TAG_PF,
 				    sizeof(struct pf_mtag), M_NOWAIT | M_ZERO);
 				if (mtag == NULL) {
 					/*
 					 * Let the packet fall back to the
 					 * default ALTQ.
 					 */
 					break;
 				}
 				m_tag_prepend(m, mtag);
 				at = (struct pf_mtag *)(mtag + 1);
 				at->qid = altq->qid;
 				at->hdr = ip;
 				break;
 			}
 
 			case O_LOG:
 				ipfw_log(chain, f, hlen, args, m,
 				    offset | ip6f_mf, tablearg, ip);
 				match = 1;
 				break;
 
 			case O_PROB:
 				match = (random()<((ipfw_insn_u32 *)cmd)->d[0]);
 				break;
 
 			case O_VERREVPATH:
 				/* Outgoing packets automatically pass/match */
 				match = (args->flags & IPFW_ARGS_OUT ||
 				    (
 #ifdef INET6
 				    is_ipv6 ?
 					verify_path6(&(args->f_id.src_ip6),
 					    iif, args->f_id.fib) :
 #endif
 				    verify_path(src_ip, iif, args->f_id.fib)));
 				break;
 
 			case O_VERSRCREACH:
 				/* Outgoing packets automatically pass/match */
 				match = (hlen > 0 && ((oif != NULL) || (
 #ifdef INET6
 				    is_ipv6 ?
 				        verify_path6(&(args->f_id.src_ip6),
 				            NULL, args->f_id.fib) :
 #endif
 				    verify_path(src_ip, NULL, args->f_id.fib))));
 				break;
 
 			case O_ANTISPOOF:
 				/* Outgoing packets automatically pass/match */
 				if (oif == NULL && hlen > 0 &&
 				    (  (is_ipv4 && in_localaddr(src_ip))
 #ifdef INET6
 				    || (is_ipv6 &&
 				        in6_localaddr(&(args->f_id.src_ip6)))
 #endif
 				    ))
 					match =
 #ifdef INET6
 					    is_ipv6 ? verify_path6(
 					        &(args->f_id.src_ip6), iif,
 						args->f_id.fib) :
 #endif
 					    verify_path(src_ip, iif,
 					        args->f_id.fib);
 				else
 					match = 1;
 				break;
 
 			case O_IPSEC:
 				match = (m_tag_find(m,
 				    PACKET_TAG_IPSEC_IN_DONE, NULL) != NULL);
 				/* otherwise no match */
 				break;
 
 #ifdef INET6
 			case O_IP6_SRC:
 				match = is_ipv6 &&
 				    IN6_ARE_ADDR_EQUAL(&args->f_id.src_ip6,
 				    &((ipfw_insn_ip6 *)cmd)->addr6);
 				break;
 
 			case O_IP6_DST:
 				match = is_ipv6 &&
 				IN6_ARE_ADDR_EQUAL(&args->f_id.dst_ip6,
 				    &((ipfw_insn_ip6 *)cmd)->addr6);
 				break;
 			case O_IP6_SRC_MASK:
 			case O_IP6_DST_MASK:
 				if (is_ipv6) {
 					int i = cmdlen - 1;
 					struct in6_addr p;
 					struct in6_addr *d =
 					    &((ipfw_insn_ip6 *)cmd)->addr6;
 
 					for (; !match && i > 0; d += 2,
 					    i -= F_INSN_SIZE(struct in6_addr)
 					    * 2) {
 						p = (cmd->opcode ==
 						    O_IP6_SRC_MASK) ?
 						    args->f_id.src_ip6:
 						    args->f_id.dst_ip6;
 						APPLY_MASK(&p, &d[1]);
 						match =
 						    IN6_ARE_ADDR_EQUAL(&d[0],
 						    &p);
 					}
 				}
 				break;
 
 			case O_FLOW6ID:
 				match = is_ipv6 &&
 				    flow6id_match(args->f_id.flow_id6,
 				    (ipfw_insn_u32 *) cmd);
 				break;
 
 			case O_EXT_HDR:
 				match = is_ipv6 &&
 				    (ext_hd & ((ipfw_insn *) cmd)->arg1);
 				break;
 
 			case O_IP6:
 				match = is_ipv6;
 				break;
 #endif
 
 			case O_IP4:
 				match = is_ipv4;
 				break;
 
 			case O_TAG: {
 				struct m_tag *mtag;
 				uint32_t tag = TARG(cmd->arg1, tag);
 
 				/* Packet is already tagged with this tag? */
 				mtag = m_tag_locate(m, MTAG_IPFW, tag, NULL);
 
 				/* We have `untag' action when F_NOT flag is
 				 * present. And we must remove this mtag from
 				 * mbuf and reset `match' to zero (`match' will
 				 * be inversed later).
 				 * Otherwise we should allocate new mtag and
 				 * push it into mbuf.
 				 */
 				if (cmd->len & F_NOT) { /* `untag' action */
 					if (mtag != NULL)
 						m_tag_delete(m, mtag);
 					match = 0;
 				} else {
 					if (mtag == NULL) {
 						mtag = m_tag_alloc( MTAG_IPFW,
 						    tag, 0, M_NOWAIT);
 						if (mtag != NULL)
 							m_tag_prepend(m, mtag);
 					}
 					match = 1;
 				}
 				break;
 			}
 
 			case O_FIB: /* try match the specified fib */
 				if (args->f_id.fib == cmd->arg1)
 					match = 1;
 				break;
 
 			case O_SOCKARG:	{
 #ifndef USERSPACE	/* not supported in userspace */
 				struct inpcb *inp = args->inp;
 				struct inpcbinfo *pi;
 				
 				if (is_ipv6) /* XXX can we remove this ? */
 					break;
 
 				if (proto == IPPROTO_TCP)
 					pi = &V_tcbinfo;
 				else if (proto == IPPROTO_UDP)
 					pi = &V_udbinfo;
 				else if (proto == IPPROTO_UDPLITE)
 					pi = &V_ulitecbinfo;
 				else
 					break;
 
 				/*
 				 * XXXRW: so_user_cookie should almost
 				 * certainly be inp_user_cookie?
 				 */
 
 				/* For incoming packet, lookup up the 
 				inpcb using the src/dest ip/port tuple */
 				if (inp == NULL) {
 					inp = in_pcblookup(pi, 
 						src_ip, htons(src_port),
 						dst_ip, htons(dst_port),
 						INPLOOKUP_RLOCKPCB, NULL);
 					if (inp != NULL) {
 						tablearg =
 						    inp->inp_socket->so_user_cookie;
 						if (tablearg)
 							match = 1;
 						INP_RUNLOCK(inp);
 					}
 				} else {
 					if (inp->inp_socket) {
 						tablearg =
 						    inp->inp_socket->so_user_cookie;
 						if (tablearg)
 							match = 1;
 					}
 				}
 #endif /* !USERSPACE */
 				break;
 			}
 
 			case O_TAGGED: {
 				struct m_tag *mtag;
 				uint32_t tag = TARG(cmd->arg1, tag);
 
 				if (cmdlen == 1) {
 					match = m_tag_locate(m, MTAG_IPFW,
 					    tag, NULL) != NULL;
 					break;
 				}
 
 				/* we have ranges */
 				for (mtag = m_tag_first(m);
 				    mtag != NULL && !match;
 				    mtag = m_tag_next(m, mtag)) {
 					uint16_t *p;
 					int i;
 
 					if (mtag->m_tag_cookie != MTAG_IPFW)
 						continue;
 
 					p = ((ipfw_insn_u16 *)cmd)->ports;
 					i = cmdlen - 1;
 					for(; !match && i > 0; i--, p += 2)
 						match =
 						    mtag->m_tag_id >= p[0] &&
 						    mtag->m_tag_id <= p[1];
 				}
 				break;
 			}
 				
 			/*
 			 * The second set of opcodes represents 'actions',
 			 * i.e. the terminal part of a rule once the packet
 			 * matches all previous patterns.
 			 * Typically there is only one action for each rule,
 			 * and the opcode is stored at the end of the rule
 			 * (but there are exceptions -- see below).
 			 *
 			 * In general, here we set retval and terminate the
 			 * outer loop (would be a 'break 3' in some language,
 			 * but we need to set l=0, done=1)
 			 *
 			 * Exceptions:
 			 * O_COUNT and O_SKIPTO actions:
 			 *   instead of terminating, we jump to the next rule
 			 *   (setting l=0), or to the SKIPTO target (setting
 			 *   f/f_len, cmd and l as needed), respectively.
 			 *
 			 * O_TAG, O_LOG and O_ALTQ action parameters:
 			 *   perform some action and set match = 1;
 			 *
 			 * O_LIMIT and O_KEEP_STATE: these opcodes are
 			 *   not real 'actions', and are stored right
 			 *   before the 'action' part of the rule (one
 			 *   exception is O_SKIP_ACTION which could be
 			 *   between these opcodes and 'action' one).
 			 *   These opcodes try to install an entry in the
 			 *   state tables; if successful, we continue with
 			 *   the next opcode (match=1; break;), otherwise
 			 *   the packet must be dropped (set retval,
 			 *   break loops with l=0, done=1)
 			 *
 			 * O_PROBE_STATE and O_CHECK_STATE: these opcodes
 			 *   cause a lookup of the state table, and a jump
 			 *   to the 'action' part of the parent rule
 			 *   if an entry is found, or
 			 *   (CHECK_STATE only) a jump to the next rule if
 			 *   the entry is not found.
 			 *   The result of the lookup is cached so that
 			 *   further instances of these opcodes become NOPs.
 			 *   The jump to the next rule is done by setting
 			 *   l=0, cmdlen=0.
 			 *
 			 * O_SKIP_ACTION: this opcode is not a real 'action'
 			 *  either, and is stored right before the 'action'
 			 *  part of the rule, right after the O_KEEP_STATE
 			 *  opcode. It causes match failure so the real
 			 *  'action' could be executed only if the rule
 			 *  is checked via dynamic rule from the state
 			 *  table, as in such case execution starts
 			 *  from the true 'action' opcode directly.
 			 *   
 			 */
 			case O_LIMIT:
 			case O_KEEP_STATE:
 				if (ipfw_dyn_install_state(chain, f,
 				    (ipfw_insn_limit *)cmd, args, ulp,
 				    pktlen, &dyn_info, tablearg)) {
 					/* error or limit violation */
 					retval = IP_FW_DENY;
 					l = 0;	/* exit inner loop */
 					done = 1; /* exit outer loop */
 				}
 				match = 1;
 				break;
 
 			case O_PROBE_STATE:
 			case O_CHECK_STATE:
 				/*
 				 * dynamic rules are checked at the first
 				 * keep-state or check-state occurrence,
 				 * with the result being stored in dyn_info.
 				 * The compiler introduces a PROBE_STATE
 				 * instruction for us when we have a
 				 * KEEP_STATE (because PROBE_STATE needs
 				 * to be run first).
 				 */
 				if (DYN_LOOKUP_NEEDED(&dyn_info, cmd) &&
 				    (q = ipfw_dyn_lookup_state(args, ulp,
 				    pktlen, cmd, &dyn_info)) != NULL) {
 					/*
 					 * Found dynamic entry, jump to the
 					 * 'action' part of the parent rule
 					 * by setting f, cmd, l and clearing
 					 * cmdlen.
 					 */
 					f = q;
 					f_pos = dyn_info.f_pos;
 					cmd = ACTION_PTR(f);
 					l = f->cmd_len - f->act_ofs;
 					cmdlen = 0;
 					match = 1;
 					break;
 				}
 				/*
 				 * Dynamic entry not found. If CHECK_STATE,
 				 * skip to next rule, if PROBE_STATE just
 				 * ignore and continue with next opcode.
 				 */
 				if (cmd->opcode == O_CHECK_STATE)
 					l = 0;	/* exit inner loop */
 				match = 1;
 				break;
 
 			case O_SKIP_ACTION:
 				match = 0;	/* skip to the next rule */
 				l = 0;		/* exit inner loop */
 				break;
 
 			case O_ACCEPT:
 				retval = 0;	/* accept */
 				l = 0;		/* exit inner loop */
 				done = 1;	/* exit outer loop */
 				break;
 
 			case O_PIPE:
 			case O_QUEUE:
 				set_match(args, f_pos, chain);
 				args->rule.info = TARG(cmd->arg1, pipe);
 				if (cmd->opcode == O_PIPE)
 					args->rule.info |= IPFW_IS_PIPE;
 				if (V_fw_one_pass)
 					args->rule.info |= IPFW_ONEPASS;
 				retval = IP_FW_DUMMYNET;
 				l = 0;          /* exit inner loop */
 				done = 1;       /* exit outer loop */
 				break;
 
 			case O_DIVERT:
 			case O_TEE:
 				if (args->flags & IPFW_ARGS_ETHER)
 					break;	/* not on layer 2 */
 				/* otherwise this is terminal */
 				l = 0;		/* exit inner loop */
 				done = 1;	/* exit outer loop */
 				retval = (cmd->opcode == O_DIVERT) ?
 					IP_FW_DIVERT : IP_FW_TEE;
 				set_match(args, f_pos, chain);
 				args->rule.info = TARG(cmd->arg1, divert);
 				break;
 
 			case O_COUNT:
 				IPFW_INC_RULE_COUNTER(f, pktlen);
 				l = 0;		/* exit inner loop */
 				break;
 
 			case O_SKIPTO:
 			    IPFW_INC_RULE_COUNTER(f, pktlen);
 			    f_pos = JUMP(chain, f, cmd->arg1, tablearg, 0);
 			    /*
 			     * Skip disabled rules, and re-enter
 			     * the inner loop with the correct
 			     * f_pos, f, l and cmd.
 			     * Also clear cmdlen and skip_or
 			     */
 			    for (; f_pos < chain->n_rules - 1 &&
 				    (V_set_disable &
 				     (1 << chain->map[f_pos]->set));
 				    f_pos++)
 				;
 			    /* Re-enter the inner loop at the skipto rule. */
 			    f = chain->map[f_pos];
 			    l = f->cmd_len;
 			    cmd = f->cmd;
 			    match = 1;
 			    cmdlen = 0;
 			    skip_or = 0;
 			    continue;
 			    break;	/* not reached */
 
 			case O_CALLRETURN: {
 				/*
 				 * Implementation of `subroutine' call/return,
 				 * in the stack carried in an mbuf tag. This
 				 * is different from `skipto' in that any call
 				 * address is possible (`skipto' must prevent
 				 * backward jumps to avoid endless loops).
 				 * We have `return' action when F_NOT flag is
 				 * present. The `m_tag_id' field is used as
 				 * stack pointer.
 				 */
 				struct m_tag *mtag;
 				uint16_t jmpto, *stack;
 
 #define	IS_CALL		((cmd->len & F_NOT) == 0)
 #define	IS_RETURN	((cmd->len & F_NOT) != 0)
 				/*
 				 * Hand-rolled version of m_tag_locate() with
 				 * wildcard `type'.
 				 * If not already tagged, allocate new tag.
 				 */
 				mtag = m_tag_first(m);
 				while (mtag != NULL) {
 					if (mtag->m_tag_cookie ==
 					    MTAG_IPFW_CALL)
 						break;
 					mtag = m_tag_next(m, mtag);
 				}
 				if (mtag == NULL && IS_CALL) {
 					mtag = m_tag_alloc(MTAG_IPFW_CALL, 0,
 					    IPFW_CALLSTACK_SIZE *
 					    sizeof(uint16_t), M_NOWAIT);
 					if (mtag != NULL)
 						m_tag_prepend(m, mtag);
 				}
 
 				/*
 				 * On error both `call' and `return' just
 				 * continue with next rule.
 				 */
 				if (IS_RETURN && (mtag == NULL ||
 				    mtag->m_tag_id == 0)) {
 					l = 0;		/* exit inner loop */
 					break;
 				}
 				if (IS_CALL && (mtag == NULL ||
 				    mtag->m_tag_id >= IPFW_CALLSTACK_SIZE)) {
 					printf("ipfw: call stack error, "
 					    "go to next rule\n");
 					l = 0;		/* exit inner loop */
 					break;
 				}
 
 				IPFW_INC_RULE_COUNTER(f, pktlen);
 				stack = (uint16_t *)(mtag + 1);
 
 				/*
 				 * The `call' action may use cached f_pos
 				 * (in f->next_rule), whose version is written
 				 * in f->next_rule.
 				 * The `return' action, however, doesn't have
 				 * fixed jump address in cmd->arg1 and can't use
 				 * cache.
 				 */
 				if (IS_CALL) {
 					stack[mtag->m_tag_id] = f->rulenum;
 					mtag->m_tag_id++;
 			    		f_pos = JUMP(chain, f, cmd->arg1,
 					    tablearg, 1);
 				} else {	/* `return' action */
 					mtag->m_tag_id--;
 					jmpto = stack[mtag->m_tag_id] + 1;
 					f_pos = ipfw_find_rule(chain, jmpto, 0);
 				}
 
 				/*
 				 * Skip disabled rules, and re-enter
 				 * the inner loop with the correct
 				 * f_pos, f, l and cmd.
 				 * Also clear cmdlen and skip_or
 				 */
 				for (; f_pos < chain->n_rules - 1 &&
 				    (V_set_disable &
 				    (1 << chain->map[f_pos]->set)); f_pos++)
 					;
 				/* Re-enter the inner loop at the dest rule. */
 				f = chain->map[f_pos];
 				l = f->cmd_len;
 				cmd = f->cmd;
 				cmdlen = 0;
 				skip_or = 0;
 				continue;
 				break;	/* NOTREACHED */
 			}
 #undef IS_CALL
 #undef IS_RETURN
 
 			case O_REJECT:
 				/*
 				 * Drop the packet and send a reject notice
 				 * if the packet is not ICMP (or is an ICMP
 				 * query), and it is not multicast/broadcast.
 				 */
 				if (hlen > 0 && is_ipv4 && offset == 0 &&
 				    (proto != IPPROTO_ICMP ||
 				     is_icmp_query(ICMP(ulp))) &&
 				    !(m->m_flags & (M_BCAST|M_MCAST)) &&
 				    !IN_MULTICAST(ntohl(dst_ip.s_addr))) {
 					send_reject(args, cmd->arg1, iplen, ip);
 					m = args->m;
 				}
 				/* FALLTHROUGH */
 #ifdef INET6
 			case O_UNREACH6:
 				if (hlen > 0 && is_ipv6 &&
 				    ((offset & IP6F_OFF_MASK) == 0) &&
 				    (proto != IPPROTO_ICMPV6 ||
 				     (is_icmp6_query(icmp6_type) == 1)) &&
 				    !(m->m_flags & (M_BCAST|M_MCAST)) &&
 				    !IN6_IS_ADDR_MULTICAST(
 					&args->f_id.dst_ip6)) {
 					send_reject6(args,
 					    cmd->opcode == O_REJECT ?
 					    map_icmp_unreach(cmd->arg1):
 					    cmd->arg1, hlen,
 					    (struct ip6_hdr *)ip);
 					m = args->m;
 				}
 				/* FALLTHROUGH */
 #endif
 			case O_DENY:
 				retval = IP_FW_DENY;
 				l = 0;		/* exit inner loop */
 				done = 1;	/* exit outer loop */
 				break;
 
 			case O_FORWARD_IP:
 				if (args->flags & IPFW_ARGS_ETHER)
 					break;	/* not valid on layer2 pkts */
 				if (q != f ||
 				    dyn_info.direction == MATCH_FORWARD) {
 				    struct sockaddr_in *sa;
 
 				    sa = &(((ipfw_insn_sa *)cmd)->sa);
 				    if (sa->sin_addr.s_addr == INADDR_ANY) {
 #ifdef INET6
 					/*
 					 * We use O_FORWARD_IP opcode for
 					 * fwd rule with tablearg, but tables
 					 * now support IPv6 addresses. And
 					 * when we are inspecting IPv6 packet,
 					 * we can use nh6 field from
 					 * table_value as next_hop6 address.
 					 */
 					if (is_ipv6) {
 						struct ip_fw_nh6 *nh6;
 
 						args->flags |= IPFW_ARGS_NH6;
 						nh6 = &args->hopstore6;
 						nh6->sin6_addr = TARG_VAL(
 						    chain, tablearg, nh6);
 						nh6->sin6_port = sa->sin_port;
 						nh6->sin6_scope_id = TARG_VAL(
 						    chain, tablearg, zoneid);
 					} else
 #endif
 					{
 						args->flags |= IPFW_ARGS_NH4;
 						args->hopstore.sin_port =
 						    sa->sin_port;
 						sa = &args->hopstore;
 						sa->sin_family = AF_INET;
 						sa->sin_len = sizeof(*sa);
 						sa->sin_addr.s_addr = htonl(
 						    TARG_VAL(chain, tablearg,
 						    nh4));
 					}
 				    } else {
 					    args->flags |= IPFW_ARGS_NH4PTR;
 					    args->next_hop = sa;
 				    }
 				}
 				retval = IP_FW_PASS;
 				l = 0;          /* exit inner loop */
 				done = 1;       /* exit outer loop */
 				break;
 
 #ifdef INET6
 			case O_FORWARD_IP6:
 				if (args->flags & IPFW_ARGS_ETHER)
 					break;	/* not valid on layer2 pkts */
 				if (q != f ||
 				    dyn_info.direction == MATCH_FORWARD) {
 					struct sockaddr_in6 *sin6;
 
 					sin6 = &(((ipfw_insn_sa6 *)cmd)->sa);
 					args->flags |= IPFW_ARGS_NH6PTR;
 					args->next_hop6 = sin6;
 				}
 				retval = IP_FW_PASS;
 				l = 0;		/* exit inner loop */
 				done = 1;	/* exit outer loop */
 				break;
 #endif
 
 			case O_NETGRAPH:
 			case O_NGTEE:
 				set_match(args, f_pos, chain);
 				args->rule.info = TARG(cmd->arg1, netgraph);
 				if (V_fw_one_pass)
 					args->rule.info |= IPFW_ONEPASS;
 				retval = (cmd->opcode == O_NETGRAPH) ?
 				    IP_FW_NETGRAPH : IP_FW_NGTEE;
 				l = 0;          /* exit inner loop */
 				done = 1;       /* exit outer loop */
 				break;
 
 			case O_SETFIB: {
 				uint32_t fib;
 
 				IPFW_INC_RULE_COUNTER(f, pktlen);
 				fib = TARG(cmd->arg1, fib) & 0x7FFF;
 				if (fib >= rt_numfibs)
 					fib = 0;
 				M_SETFIB(m, fib);
 				args->f_id.fib = fib; /* XXX */
 				l = 0;		/* exit inner loop */
 				break;
 		        }
 
 			case O_SETDSCP: {
 				uint16_t code;
 
 				code = TARG(cmd->arg1, dscp) & 0x3F;
 				l = 0;		/* exit inner loop */
 				if (is_ipv4) {
 					uint16_t old;
 
 					old = *(uint16_t *)ip;
 					ip->ip_tos = (code << 2) |
 					    (ip->ip_tos & 0x03);
 					ip->ip_sum = cksum_adjust(ip->ip_sum,
 					    old, *(uint16_t *)ip);
 				} else if (is_ipv6) {
 					uint8_t *v;
 
 					v = &((struct ip6_hdr *)ip)->ip6_vfc;
 					*v = (*v & 0xF0) | (code >> 2);
 					v++;
 					*v = (*v & 0x3F) | ((code & 0x03) << 6);
 				} else
 					break;
 
 				IPFW_INC_RULE_COUNTER(f, pktlen);
 				break;
 			}
 
 			case O_NAT:
 				l = 0;          /* exit inner loop */
 				done = 1;       /* exit outer loop */
 				/*
 				 * Ensure that we do not invoke NAT handler for
 				 * non IPv4 packets. Libalias expects only IPv4.
 				 */
 				if (!is_ipv4 || !IPFW_NAT_LOADED) {
 				    retval = IP_FW_DENY;
 				    break;
 				}
 
 				struct cfg_nat *t;
 				int nat_id;
 
 				args->rule.info = 0;
 				set_match(args, f_pos, chain);
 				/* Check if this is 'global' nat rule */
 				if (cmd->arg1 == IP_FW_NAT44_GLOBAL) {
 					retval = ipfw_nat_ptr(args, NULL, m);
 					break;
 				}
 				t = ((ipfw_insn_nat *)cmd)->nat;
 				if (t == NULL) {
 					nat_id = TARG(cmd->arg1, nat);
 					t = (*lookup_nat_ptr)(&chain->nat, nat_id);
 
 					if (t == NULL) {
 					    retval = IP_FW_DENY;
 					    break;
 					}
 					if (cmd->arg1 != IP_FW_TARG)
 					    ((ipfw_insn_nat *)cmd)->nat = t;
 				}
 				retval = ipfw_nat_ptr(args, t, m);
 				break;
 
 			case O_REASS: {
 				int ip_off;
 
 				l = 0;	/* in any case exit inner loop */
 				if (is_ipv6) /* IPv6 is not supported yet */
 					break;
 				IPFW_INC_RULE_COUNTER(f, pktlen);
 				ip_off = ntohs(ip->ip_off);
 
 				/* if not fragmented, go to next rule */
 				if ((ip_off & (IP_MF | IP_OFFMASK)) == 0)
 				    break;
 
 				args->m = m = ip_reass(m);
 
 				/*
 				 * do IP header checksum fixup.
 				 */
 				if (m == NULL) { /* fragment got swallowed */
 				    retval = IP_FW_DENY;
 				} else { /* good, packet complete */
 				    int hlen;
 
 				    ip = mtod(m, struct ip *);
 				    hlen = ip->ip_hl << 2;
 				    ip->ip_sum = 0;
 				    if (hlen == sizeof(struct ip))
 					ip->ip_sum = in_cksum_hdr(ip);
 				    else
 					ip->ip_sum = in_cksum(m, hlen);
 				    retval = IP_FW_REASS;
 				    args->rule.info = 0;
 				    set_match(args, f_pos, chain);
 				}
 				done = 1;	/* exit outer loop */
 				break;
 			}
 			case O_EXTERNAL_ACTION:
 				l = 0; /* in any case exit inner loop */
 				retval = ipfw_run_eaction(chain, args,
 				    cmd, &done);
 				/*
 				 * If both @retval and @done are zero,
 				 * consider this as rule matching and
 				 * update counters.
 				 */
 				if (retval == 0 && done == 0) {
 					IPFW_INC_RULE_COUNTER(f, pktlen);
 					/*
 					 * Reset the result of the last
 					 * dynamic state lookup.
 					 * External action can change
 					 * @args content, and it may be
 					 * used for new state lookup later.
 					 */
 					DYN_INFO_INIT(&dyn_info);
 				}
 				break;
 
 			default:
 				panic("-- unknown opcode %d\n", cmd->opcode);
 			} /* end of switch() on opcodes */
 			/*
 			 * if we get here with l=0, then match is irrelevant.
 			 */
 
 			if (cmd->len & F_NOT)
 				match = !match;
 
 			if (match) {
 				if (cmd->len & F_OR)
 					skip_or = 1;
 			} else {
 				if (!(cmd->len & F_OR)) /* not an OR block, */
 					break;		/* try next rule    */
 			}
 
 		}	/* end of inner loop, scan opcodes */
 #undef PULLUP_LEN
 
 		if (done)
 			break;
 
 /* next_rule:; */	/* try next rule		*/
 
 	}		/* end of outer for, scan rules */
 
 	if (done) {
 		struct ip_fw *rule = chain->map[f_pos];
 		/* Update statistics */
 		IPFW_INC_RULE_COUNTER(rule, pktlen);
 	} else {
 		retval = IP_FW_DENY;
 		printf("ipfw: ouch!, skip past end of rules, denying packet\n");
 	}
 	IPFW_PF_RUNLOCK(chain);
 #ifdef __FreeBSD__
 	if (ucred_cache != NULL)
 		crfree(ucred_cache);
 #endif
 	return (retval);
 
 pullup_failed:
 	if (V_fw_verbose)
 		printf("ipfw: pullup failed\n");
 	return (IP_FW_DENY);
 }
 
 /*
  * Set maximum number of tables that can be used in given VNET ipfw instance.
  */
 #ifdef SYSCTL_NODE
 static int
 sysctl_ipfw_table_num(SYSCTL_HANDLER_ARGS)
 {
 	int error;
 	unsigned int ntables;
 
 	ntables = V_fw_tables_max;
 
 	error = sysctl_handle_int(oidp, &ntables, 0, req);
 	/* Read operation or some error */
 	if ((error != 0) || (req->newptr == NULL))
 		return (error);
 
 	return (ipfw_resize_tables(&V_layer3_chain, ntables));
 }
 
 /*
  * Switches table namespace between global and per-set.
  */
 static int
 sysctl_ipfw_tables_sets(SYSCTL_HANDLER_ARGS)
 {
 	int error;
 	unsigned int sets;
 
 	sets = V_fw_tables_sets;
 
 	error = sysctl_handle_int(oidp, &sets, 0, req);
 	/* Read operation or some error */
 	if ((error != 0) || (req->newptr == NULL))
 		return (error);
 
 	return (ipfw_switch_tables_namespace(&V_layer3_chain, sets));
 }
 #endif
 
 /*
  * Module and VNET glue
  */
 
 /*
  * Stuff that must be initialised only on boot or module load
  */
 static int
 ipfw_init(void)
 {
 	int error = 0;
 
 	/*
  	 * Only print out this stuff the first time around,
 	 * when called from the sysinit code.
 	 */
 	printf("ipfw2 "
 #ifdef INET6
 		"(+ipv6) "
 #endif
 		"initialized, divert %s, nat %s, "
 		"default to %s, logging ",
 #ifdef IPDIVERT
 		"enabled",
 #else
 		"loadable",
 #endif
 #ifdef IPFIREWALL_NAT
 		"enabled",
 #else
 		"loadable",
 #endif
 		default_to_accept ? "accept" : "deny");
 
 	/*
 	 * Note: V_xxx variables can be accessed here but the vnet specific
 	 * initializer may not have been called yet for the VIMAGE case.
 	 * Tuneables will have been processed. We will print out values for
 	 * the default vnet. 
 	 * XXX This should all be rationalized AFTER 8.0
 	 */
 	if (V_fw_verbose == 0)
 		printf("disabled\n");
 	else if (V_verbose_limit == 0)
 		printf("unlimited\n");
 	else
 		printf("limited to %d packets/entry by default\n",
 		    V_verbose_limit);
 
 	/* Check user-supplied table count for validness */
 	if (default_fw_tables > IPFW_TABLES_MAX)
 	  default_fw_tables = IPFW_TABLES_MAX;
 
 	ipfw_init_sopt_handler();
 	ipfw_init_obj_rewriter();
 	ipfw_iface_init();
 	return (error);
 }
 
 /*
  * Called for the removal of the last instance only on module unload.
  */
 static void
 ipfw_destroy(void)
 {
 
 	ipfw_iface_destroy();
 	ipfw_destroy_sopt_handler();
 	ipfw_destroy_obj_rewriter();
 	printf("IP firewall unloaded\n");
 }
 
 /*
  * Stuff that must be initialized for every instance
  * (including the first of course).
  */
 static int
 vnet_ipfw_init(const void *unused)
 {
 	int error, first;
 	struct ip_fw *rule = NULL;
 	struct ip_fw_chain *chain;
 
 	chain = &V_layer3_chain;
 
 	first = IS_DEFAULT_VNET(curvnet) ? 1 : 0;
 
 	/* First set up some values that are compile time options */
 	V_autoinc_step = 100;	/* bounded to 1..1000 in add_rule() */
 	V_fw_deny_unknown_exthdrs = 1;
 #ifdef IPFIREWALL_VERBOSE
 	V_fw_verbose = 1;
 #endif
 #ifdef IPFIREWALL_VERBOSE_LIMIT
 	V_verbose_limit = IPFIREWALL_VERBOSE_LIMIT;
 #endif
 #ifdef IPFIREWALL_NAT
 	LIST_INIT(&chain->nat);
 #endif
 
 	/* Init shared services hash table */
 	ipfw_init_srv(chain);
 
 	ipfw_init_counters();
 	/* Set initial number of tables */
 	V_fw_tables_max = default_fw_tables;
 	error = ipfw_init_tables(chain, first);
 	if (error) {
 		printf("ipfw2: setting up tables failed\n");
 		free(chain->map, M_IPFW);
 		free(rule, M_IPFW);
 		return (ENOSPC);
 	}
 
 	IPFW_LOCK_INIT(chain);
 
 	/* fill and insert the default rule */
 	rule = ipfw_alloc_rule(chain, sizeof(struct ip_fw));
 	rule->cmd_len = 1;
 	rule->cmd[0].len = 1;
 	rule->cmd[0].opcode = default_to_accept ? O_ACCEPT : O_DENY;
 	chain->default_rule = rule;
 	ipfw_add_protected_rule(chain, rule, 0);
 
 	ipfw_dyn_init(chain);
 	ipfw_eaction_init(chain, first);
 #ifdef LINEAR_SKIPTO
 	ipfw_init_skipto_cache(chain);
 #endif
 	ipfw_bpf_init(first);
 
 	/* First set up some values that are compile time options */
 	V_ipfw_vnet_ready = 1;		/* Open for business */
 
 	/*
 	 * Hook the sockopt handler and pfil hooks for ipv4 and ipv6.
 	 * Even if the latter two fail we still keep the module alive
 	 * because the sockopt and layer2 paths are still useful.
 	 * ipfw[6]_hook return 0 on success, ENOENT on failure,
 	 * so we can ignore the exact return value and just set a flag.
 	 *
 	 * Note that V_fw[6]_enable are manipulated by a SYSCTL_PROC so
 	 * changes in the underlying (per-vnet) variables trigger
 	 * immediate hook()/unhook() calls.
 	 * In layer2 we have the same behaviour, except that V_ether_ipfw
 	 * is checked on each packet because there are no pfil hooks.
 	 */
 	V_ip_fw_ctl_ptr = ipfw_ctl3;
 	error = ipfw_attach_hooks(1);
 	return (error);
 }
 
 /*
  * Called for the removal of each instance.
  */
 static int
 vnet_ipfw_uninit(const void *unused)
 {
 	struct ip_fw *reap;
 	struct ip_fw_chain *chain = &V_layer3_chain;
 	int i, last;
 
 	V_ipfw_vnet_ready = 0; /* tell new callers to go away */
 	/*
 	 * disconnect from ipv4, ipv6, layer2 and sockopt.
 	 * Then grab, release and grab again the WLOCK so we make
 	 * sure the update is propagated and nobody will be in.
 	 */
 	(void)ipfw_attach_hooks(0 /* detach */);
 	V_ip_fw_ctl_ptr = NULL;
 
 	last = IS_DEFAULT_VNET(curvnet) ? 1 : 0;
 
 	IPFW_UH_WLOCK(chain);
 	IPFW_UH_WUNLOCK(chain);
 
 	ipfw_dyn_uninit(0);	/* run the callout_drain */
 
 	IPFW_UH_WLOCK(chain);
 
 	reap = NULL;
 	IPFW_WLOCK(chain);
 	for (i = 0; i < chain->n_rules; i++)
 		ipfw_reap_add(chain, &reap, chain->map[i]);
 	free(chain->map, M_IPFW);
 #ifdef LINEAR_SKIPTO
 	ipfw_destroy_skipto_cache(chain);
 #endif
 	IPFW_WUNLOCK(chain);
 	IPFW_UH_WUNLOCK(chain);
 	ipfw_destroy_tables(chain, last);
 	ipfw_eaction_uninit(chain, last);
 	if (reap != NULL)
 		ipfw_reap_rules(reap);
 	vnet_ipfw_iface_destroy(chain);
 	ipfw_destroy_srv(chain);
 	IPFW_LOCK_DESTROY(chain);
 	ipfw_dyn_uninit(1);	/* free the remaining parts */
 	ipfw_destroy_counters();
 	ipfw_bpf_uninit(last);
 	return (0);
 }
 
 /*
  * Module event handler.
  * In general we have the choice of handling most of these events by the
  * event handler or by the (VNET_)SYS(UN)INIT handlers. I have chosen to
  * use the SYSINIT handlers as they are more capable of expressing the
  * flow of control during module and vnet operations, so this is just
  * a skeleton. Note there is no SYSINIT equivalent of the module
  * SHUTDOWN handler, but we don't have anything to do in that case anyhow.
  */
 static int
 ipfw_modevent(module_t mod, int type, void *unused)
 {
 	int err = 0;
 
 	switch (type) {
 	case MOD_LOAD:
 		/* Called once at module load or
 	 	 * system boot if compiled in. */
 		break;
 	case MOD_QUIESCE:
 		/* Called before unload. May veto unloading. */
 		break;
 	case MOD_UNLOAD:
 		/* Called during unload. */
 		break;
 	case MOD_SHUTDOWN:
 		/* Called during system shutdown. */
 		break;
 	default:
 		err = EOPNOTSUPP;
 		break;
 	}
 	return err;
 }
 
 static moduledata_t ipfwmod = {
 	"ipfw",
 	ipfw_modevent,
 	0
 };
 
 /* Define startup order. */
 #define	IPFW_SI_SUB_FIREWALL	SI_SUB_PROTO_FIREWALL
 #define	IPFW_MODEVENT_ORDER	(SI_ORDER_ANY - 255) /* On boot slot in here. */
 #define	IPFW_MODULE_ORDER	(IPFW_MODEVENT_ORDER + 1) /* A little later. */
 #define	IPFW_VNET_ORDER		(IPFW_MODEVENT_ORDER + 2) /* Later still. */
 
 DECLARE_MODULE(ipfw, ipfwmod, IPFW_SI_SUB_FIREWALL, IPFW_MODEVENT_ORDER);
 FEATURE(ipfw_ctl3, "ipfw new sockopt calls");
 MODULE_VERSION(ipfw, 3);
 /* should declare some dependencies here */
 
 /*
  * Starting up. Done in order after ipfwmod() has been called.
  * VNET_SYSINIT is also called for each existing vnet and each new vnet.
  */
 SYSINIT(ipfw_init, IPFW_SI_SUB_FIREWALL, IPFW_MODULE_ORDER,
 	    ipfw_init, NULL);
 VNET_SYSINIT(vnet_ipfw_init, IPFW_SI_SUB_FIREWALL, IPFW_VNET_ORDER,
 	    vnet_ipfw_init, NULL);
  
 /*
  * Closing up shop. These are done in REVERSE ORDER, but still
  * after ipfwmod() has been called. Not called on reboot.
  * VNET_SYSUNINIT is also called for each exiting vnet as it exits.
  * or when the module is unloaded.
  */
 SYSUNINIT(ipfw_destroy, IPFW_SI_SUB_FIREWALL, IPFW_MODULE_ORDER,
 	    ipfw_destroy, NULL);
 VNET_SYSUNINIT(vnet_ipfw_uninit, IPFW_SI_SUB_FIREWALL, IPFW_VNET_ORDER,
 	    vnet_ipfw_uninit, NULL);
 /* end of file */
Index: head/sys/netpfil/ipfw/ip_fw_pfil.c
===================================================================
--- head/sys/netpfil/ipfw/ip_fw_pfil.c	(revision 345164)
+++ head/sys/netpfil/ipfw/ip_fw_pfil.c	(revision 345165)
@@ -1,663 +1,659 @@
 /*-
  * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
  *
  * Copyright (c) 2004 Andre Oppermann, Internet Business Solutions AG
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include "opt_ipfw.h"
 #include "opt_inet.h"
 #include "opt_inet6.h"
 #ifndef INET
 #error IPFIREWALL requires INET.
 #endif /* INET */
 
 #include <sys/param.h>
 #include <sys/systm.h>
 #include <sys/malloc.h>
 #include <sys/mbuf.h>
 #include <sys/module.h>
 #include <sys/kernel.h>
 #include <sys/lock.h>
 #include <sys/rwlock.h>
 #include <sys/socket.h>
 #include <sys/sysctl.h>
 
 #include <net/if.h>
 #include <net/if_var.h>
 #include <net/route.h>
 #include <net/ethernet.h>
 #include <net/pfil.h>
 #include <net/vnet.h>
 
 #include <netinet/in.h>
 #include <netinet/in_systm.h>
 #include <netinet/ip.h>
 #include <netinet/ip_var.h>
 #include <netinet/ip_fw.h>
 #ifdef INET6
 #include <netinet/ip6.h>
 #include <netinet6/ip6_var.h>
 #include <netinet6/scope6_var.h>
 #endif
 
 #include <netgraph/ng_ipfw.h>
 
 #include <netpfil/ipfw/ip_fw_private.h>
 
 #include <machine/in_cksum.h>
 
 VNET_DEFINE_STATIC(int, fw_enable) = 1;
 #define V_fw_enable	VNET(fw_enable)
 
 #ifdef INET6
 VNET_DEFINE_STATIC(int, fw6_enable) = 1;
 #define V_fw6_enable	VNET(fw6_enable)
 #endif
 
 VNET_DEFINE_STATIC(int, fwlink_enable) = 0;
 #define V_fwlink_enable	VNET(fwlink_enable)
 
 int ipfw_chg_hook(SYSCTL_HANDLER_ARGS);
 
 /* Forward declarations. */
 static int ipfw_divert(struct mbuf **, struct ip_fw_args *, bool);
 
 #ifdef SYSCTL_NODE
 
 SYSBEGIN(f1)
 
 SYSCTL_DECL(_net_inet_ip_fw);
 SYSCTL_PROC(_net_inet_ip_fw, OID_AUTO, enable,
     CTLFLAG_VNET | CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_SECURE3,
     &VNET_NAME(fw_enable), 0, ipfw_chg_hook, "I", "Enable ipfw");
 #ifdef INET6
 SYSCTL_DECL(_net_inet6_ip6_fw);
 SYSCTL_PROC(_net_inet6_ip6_fw, OID_AUTO, enable,
     CTLFLAG_VNET | CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_SECURE3,
     &VNET_NAME(fw6_enable), 0, ipfw_chg_hook, "I", "Enable ipfw+6");
 #endif /* INET6 */
 
 SYSCTL_DECL(_net_link_ether);
 SYSCTL_PROC(_net_link_ether, OID_AUTO, ipfw,
     CTLFLAG_VNET | CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_SECURE3,
     &VNET_NAME(fwlink_enable), 0, ipfw_chg_hook, "I",
     "Pass ether pkts through firewall");
 
 SYSEND
 
 #endif /* SYSCTL_NODE */
 
 /*
  * The pfilter hook to pass packets to ipfw_chk and then to
  * dummynet, divert, netgraph or other modules.
  * The packet may be consumed.
  */
 static pfil_return_t
 ipfw_check_packet(struct mbuf **m0, struct ifnet *ifp, int flags,
     void *ruleset __unused, struct inpcb *inp)
 {
 	struct ip_fw_args args;
 	struct m_tag *tag;
 	pfil_return_t ret;
-	int ipfw, dir;
+	int ipfw;
 
 	args.flags = (flags & PFIL_IN) ? IPFW_ARGS_IN : IPFW_ARGS_OUT;
-	dir = (flags & PFIL_IN) ? DIR_IN : DIR_OUT;
 again:
 	/*
 	 * extract and remove the tag if present. If we are left
 	 * with onepass, optimize the outgoing path.
 	 */
 	tag = m_tag_locate(*m0, MTAG_IPFW_RULE, 0, NULL);
 	if (tag != NULL) {
 		args.rule = *((struct ipfw_rule_ref *)(tag+1));
 		m_tag_delete(*m0, tag);
 		if (args.rule.info & IPFW_ONEPASS)
 			return (0);
 		args.flags |= IPFW_ARGS_REF;
 	}
 
 	args.m = *m0;
 	args.ifp = ifp;
 	args.inp = inp;
 
 	ipfw = ipfw_chk(&args);
 	*m0 = args.m;
 
 	KASSERT(*m0 != NULL || ipfw == IP_FW_DENY, ("%s: m0 is NULL",
 	    __func__));
 
 	ret = PFIL_PASS;
 	switch (ipfw) {
 	case IP_FW_PASS:
 		/* next_hop may be set by ipfw_chk */
 		if ((args.flags & (IPFW_ARGS_NH4 | IPFW_ARGS_NH4PTR |
 		    IPFW_ARGS_NH6 | IPFW_ARGS_NH6PTR)) == 0)
 			break;
 #if (!defined(INET6) && !defined(INET))
 		ret = PFIL_DROPPED;
 #else
 	    {
 		void *psa;
 		size_t len;
 #ifdef INET
 		if (args.flags & (IPFW_ARGS_NH4 | IPFW_ARGS_NH4PTR)) {
 			MPASS((args.flags & (IPFW_ARGS_NH4 |
 			    IPFW_ARGS_NH4PTR)) != (IPFW_ARGS_NH4 |
 			    IPFW_ARGS_NH4PTR));
 			MPASS((args.flags & (IPFW_ARGS_NH6 |
 			    IPFW_ARGS_NH6PTR)) == 0);
 			len = sizeof(struct sockaddr_in);
 			psa = (args.flags & IPFW_ARGS_NH4) ?
 			    &args.hopstore : args.next_hop;
 			if (in_localip(satosin(psa)->sin_addr))
 				(*m0)->m_flags |= M_FASTFWD_OURS;
 			(*m0)->m_flags |= M_IP_NEXTHOP;
 		}
 #endif /* INET */
 #ifdef INET6
 		if (args.flags & (IPFW_ARGS_NH6 | IPFW_ARGS_NH6PTR)) {
 			MPASS((args.flags & (IPFW_ARGS_NH6 |
 			    IPFW_ARGS_NH6PTR)) != (IPFW_ARGS_NH6 |
 			    IPFW_ARGS_NH6PTR));
 			MPASS((args.flags & (IPFW_ARGS_NH4 |
 			    IPFW_ARGS_NH4PTR)) == 0);
 			len = sizeof(struct sockaddr_in6);
 			psa = args.next_hop6;
 			(*m0)->m_flags |= M_IP6_NEXTHOP;
 		}
 #endif /* INET6 */
 		/*
 		 * Incoming packets should not be tagged so we do not
 		 * m_tag_find. Outgoing packets may be tagged, so we
 		 * reuse the tag if present.
 		 */
 		tag = (flags & PFIL_IN) ? NULL :
 			m_tag_find(*m0, PACKET_TAG_IPFORWARD, NULL);
 		if (tag != NULL) {
 			m_tag_unlink(*m0, tag);
 		} else {
 			tag = m_tag_get(PACKET_TAG_IPFORWARD, len,
 			    M_NOWAIT);
 			if (tag == NULL) {
 				ret = PFIL_DROPPED;
 				break;
 			}
 		}
 		if ((args.flags & IPFW_ARGS_NH6) == 0)
 			bcopy(psa, tag + 1, len);
 		m_tag_prepend(*m0, tag);
 		ret = 0;
 #ifdef INET6
 		/* IPv6 next hop needs additional handling */
 		if (args.flags & (IPFW_ARGS_NH6 | IPFW_ARGS_NH6PTR)) {
 			struct sockaddr_in6 *sa6;
 
 			sa6 = satosin6(tag + 1);
 			if (args.flags & IPFW_ARGS_NH6) {
 				sa6->sin6_family = AF_INET6;
 				sa6->sin6_len = sizeof(*sa6);
 				sa6->sin6_addr = args.hopstore6.sin6_addr;
 				sa6->sin6_port = args.hopstore6.sin6_port;
 				sa6->sin6_scope_id =
 				    args.hopstore6.sin6_scope_id;
 			}
 			/*
 			 * If nh6 address is link-local we should convert
 			 * it to kernel internal form before doing any
 			 * comparisons.
 			 */
 			if (sa6_embedscope(sa6, V_ip6_use_defzone) != 0) {
 				ret = PFIL_DROPPED;
 				break;
 			}
 			if (in6_localip(&sa6->sin6_addr))
 				(*m0)->m_flags |= M_FASTFWD_OURS;
 		}
 #endif /* INET6 */
 	    }
 #endif /* INET || INET6 */
 		break;
 
 	case IP_FW_DENY:
 		ret = PFIL_DROPPED;
 		break;
 
 	case IP_FW_DUMMYNET:
 		if (ip_dn_io_ptr == NULL) {
 			ret = PFIL_DROPPED;
 			break;
 		}
 		MPASS(args.flags & IPFW_ARGS_REF);
-		if (mtod(*m0, struct ip *)->ip_v == 4)
-			(void )ip_dn_io_ptr(m0, dir, &args);
-		else if (mtod(*m0, struct ip *)->ip_v == 6)
-			(void )ip_dn_io_ptr(m0, dir | PROTO_IPV6, &args);
+		if (args.flags & (IPFW_ARGS_IP4 | IPFW_ARGS_IP6))
+			(void )ip_dn_io_ptr(m0, &args);
 		else {
 			ret = PFIL_DROPPED;
 			break;
 		}
 		/*
 		 * XXX should read the return value.
 		 * dummynet normally eats the packet and sets *m0=NULL
 		 * unless the packet can be sent immediately. In this
 		 * case args is updated and we should re-run the
 		 * check without clearing args.
 		 */
 		if (*m0 != NULL)
 			goto again;
 		ret = PFIL_CONSUMED;
 		break;
 
 	case IP_FW_TEE:
 	case IP_FW_DIVERT:
 		if (ip_divert_ptr == NULL) {
 			ret = PFIL_DROPPED;
 			break;
 		}
 		MPASS(args.flags & IPFW_ARGS_REF);
 		(void )ipfw_divert(m0, &args, ipfw == IP_FW_TEE);
 		/* continue processing for the original packet (tee). */
 		if (*m0)
 			goto again;
 		ret = PFIL_CONSUMED;
 		break;
 
 	case IP_FW_NGTEE:
 	case IP_FW_NETGRAPH:
 		if (ng_ipfw_input_p == NULL) {
 			ret = PFIL_DROPPED;
 			break;
 		}
 		MPASS(args.flags & IPFW_ARGS_REF);
 		(void )ng_ipfw_input_p(m0, &args, ipfw == IP_FW_NGTEE);
 		if (ipfw == IP_FW_NGTEE) /* ignore errors for NGTEE */
 			goto again;	/* continue with packet */
 		ret = PFIL_CONSUMED;
 		break;
 
 	case IP_FW_NAT:
 		/* honor one-pass in case of successful nat */
 		if (V_fw_one_pass)
 			break;
 		goto again;
 
 	case IP_FW_REASS:
 		goto again;		/* continue with packet */
 
 	case IP_FW_NAT64:
 		ret = PFIL_CONSUMED;
 		break;
 
 	default:
 		KASSERT(0, ("%s: unknown retval", __func__));
 	}
 
 	if (ret != PFIL_PASS) {
 		if (*m0)
 			FREE_PKT(*m0);
 		*m0 = NULL;
 	}
 
 	return (ret);
 }
 
 /*
  * ipfw processing for ethernet packets (in and out).
  */
 static pfil_return_t
-ipfw_check_frame(struct mbuf **m0, struct ifnet *ifp, int dir,
+ipfw_check_frame(struct mbuf **m0, struct ifnet *ifp, int flags,
     void *ruleset __unused, struct inpcb *inp)
 {
 	struct ip_fw_args args;
 	struct ether_header save_eh;
 	struct ether_header *eh;
 	struct m_tag *mtag;
 	struct mbuf *m;
 	pfil_return_t ret;
 	int i;
 
 	args.flags = IPFW_ARGS_ETHER;
-	args.flags |= (dir & PFIL_IN) ? IPFW_ARGS_IN : IPFW_ARGS_OUT;
+	args.flags |= (flags & PFIL_IN) ? IPFW_ARGS_IN : IPFW_ARGS_OUT;
 again:
 	/* fetch start point from rule, if any.  remove the tag if present. */
 	mtag = m_tag_locate(*m0, MTAG_IPFW_RULE, 0, NULL);
 	if (mtag != NULL) {
 		args.rule = *((struct ipfw_rule_ref *)(mtag+1));
 		m_tag_delete(*m0, mtag);
 		if (args.rule.info & IPFW_ONEPASS)
 			return (0);
 		args.flags |= IPFW_ARGS_REF;
 	}
 
 	/* I need some amt of data to be contiguous */
 	m = *m0;
 	i = min(m->m_pkthdr.len, max_protohdr);
 	if (m->m_len < i) {
 		m = m_pullup(m, i);
 		if (m == NULL) {
 			*m0 = m;
 			return (0);
 		}
 	}
 	eh = mtod(m, struct ether_header *);
 	save_eh = *eh;			/* save copy for restore below */
 	m_adj(m, ETHER_HDR_LEN);	/* strip ethernet header */
 
 	args.m = m;		/* the packet we are looking at		*/
 	args.ifp = ifp;
 	args.eh = &save_eh;	/* MAC header for bridged/MAC packets	*/
 	args.inp = inp;	/* used by ipfw uid/gid/jail rules	*/
 	i = ipfw_chk(&args);
 	m = args.m;
 	if (m != NULL) {
 		/*
 		 * Restore Ethernet header, as needed, in case the
 		 * mbuf chain was replaced by ipfw.
 		 */
 		M_PREPEND(m, ETHER_HDR_LEN, M_NOWAIT);
 		if (m == NULL) {
 			*m0 = NULL;
 			return (0);
 		}
 		if (eh != mtod(m, struct ether_header *))
 			bcopy(&save_eh, mtod(m, struct ether_header *),
 				ETHER_HDR_LEN);
 	}
 	*m0 = m;
 
 	ret = PFIL_PASS;
 	/* Check result of ipfw_chk() */
 	switch (i) {
 	case IP_FW_PASS:
 		break;
 
 	case IP_FW_DENY:
 		ret = PFIL_DROPPED;
 		break;
 
 	case IP_FW_DUMMYNET:
 		if (ip_dn_io_ptr == NULL) {
 			ret = PFIL_DROPPED;
 			break;
 		}
 		*m0 = NULL;
-		dir = (dir & PFIL_IN) ? DIR_IN : DIR_OUT;
 		MPASS(args.flags & IPFW_ARGS_REF);
-		ip_dn_io_ptr(&m, dir | PROTO_LAYER2, &args);
+		ip_dn_io_ptr(&m, &args);
 		return (PFIL_CONSUMED);
 
 	case IP_FW_NGTEE:
 	case IP_FW_NETGRAPH:
 		if (ng_ipfw_input_p == NULL) {
 			ret = PFIL_DROPPED;
 			break;
 		}
 		MPASS(args.flags & IPFW_ARGS_REF);
 		(void )ng_ipfw_input_p(m0, &args, i == IP_FW_NGTEE);
 		if (i == IP_FW_NGTEE) /* ignore errors for NGTEE */
 			goto again;	/* continue with packet */
 		ret = PFIL_CONSUMED;
 		break;
 
 	default:
 		KASSERT(0, ("%s: unknown retval", __func__));
 	}
 
 	if (ret != PFIL_PASS) {
 		if (*m0)
 			FREE_PKT(*m0);
 		*m0 = NULL;
 	}
 
 	return (ret);
 }
 
 /* do the divert, return 1 on error 0 on success */
 static int
 ipfw_divert(struct mbuf **m0, struct ip_fw_args *args, bool tee)
 {
 	/*
 	 * ipfw_chk() has already tagged the packet with the divert tag.
 	 * If tee is set, copy packet and return original.
 	 * If not tee, consume packet and send it to divert socket.
 	 */
 	struct mbuf *clone;
 	struct ip *ip = mtod(*m0, struct ip *);
 	struct m_tag *tag;
 
 	/* Cloning needed for tee? */
 	if (tee == false) {
 		clone = *m0;	/* use the original mbuf */
 		*m0 = NULL;
 	} else {
 		clone = m_dup(*m0, M_NOWAIT);
 		/* If we cannot duplicate the mbuf, we sacrifice the divert
 		 * chain and continue with the tee-ed packet.
 		 */
 		if (clone == NULL)
 			return 1;
 	}
 
 	/*
 	 * Divert listeners can normally handle non-fragmented packets,
 	 * but we can only reass in the non-tee case.
 	 * This means that listeners on a tee rule may get fragments,
 	 * and have to live with that.
 	 * Note that we now have the 'reass' ipfw option so if we care
 	 * we can do it before a 'tee'.
 	 */
 	if (tee == false) switch (ip->ip_v) {
 	case IPVERSION:
 	    if (ntohs(ip->ip_off) & (IP_MF | IP_OFFMASK)) {
 		int hlen;
 		struct mbuf *reass;
 
 		reass = ip_reass(clone); /* Reassemble packet. */
 		if (reass == NULL)
 			return 0; /* not an error */
 		/* if reass = NULL then it was consumed by ip_reass */
 		/*
 		 * IP header checksum fixup after reassembly and leave header
 		 * in network byte order.
 		 */
 		ip = mtod(reass, struct ip *);
 		hlen = ip->ip_hl << 2;
 		ip->ip_sum = 0;
 		if (hlen == sizeof(struct ip))
 			ip->ip_sum = in_cksum_hdr(ip);
 		else
 			ip->ip_sum = in_cksum(reass, hlen);
 		clone = reass;
 	    }
 	    break;
 #ifdef INET6
 	case IPV6_VERSION >> 4:
 	    {
 	    struct ip6_hdr *const ip6 = mtod(clone, struct ip6_hdr *);
 
 		if (ip6->ip6_nxt == IPPROTO_FRAGMENT) {
 			int nxt, off;
 
 			off = sizeof(struct ip6_hdr);
 			nxt = frag6_input(&clone, &off, 0);
 			if (nxt == IPPROTO_DONE)
 				return (0);
 		}
 		break;
 	    }
 #endif
 	}
 
 	/* attach a tag to the packet with the reinject info */
 	tag = m_tag_alloc(MTAG_IPFW_RULE, 0,
 		    sizeof(struct ipfw_rule_ref), M_NOWAIT);
 	if (tag == NULL) {
 		FREE_PKT(clone);
 		return 1;
 	}
 	*((struct ipfw_rule_ref *)(tag+1)) = args->rule;
 	m_tag_prepend(clone, tag);
 
 	/* Do the dirty job... */
 	ip_divert_ptr(clone, args->flags & IPFW_ARGS_IN);
 	return 0;
 }
 
 /*
  * attach or detach hooks for a given protocol family
  */
 VNET_DEFINE_STATIC(pfil_hook_t, ipfw_inet_hook);
 #define	V_ipfw_inet_hook	VNET(ipfw_inet_hook)
 #ifdef INET6
 VNET_DEFINE_STATIC(pfil_hook_t, ipfw_inet6_hook);
 #define	V_ipfw_inet6_hook	VNET(ipfw_inet6_hook)
 #endif
 VNET_DEFINE_STATIC(pfil_hook_t, ipfw_link_hook);
 #define	V_ipfw_link_hook	VNET(ipfw_link_hook)
 
 static int
 ipfw_hook(int onoff, int pf)
 {
 	struct pfil_hook_args pha;
 	struct pfil_link_args pla;
 	pfil_hook_t *h;
 
 	pha.pa_version = PFIL_VERSION;
 	pha.pa_flags = PFIL_IN | PFIL_OUT;
 	pha.pa_modname = "ipfw";
 	pha.pa_ruleset = NULL;
 
 	pla.pa_version = PFIL_VERSION;
 	pla.pa_flags = PFIL_IN | PFIL_OUT |
 	    PFIL_HEADPTR | PFIL_HOOKPTR;
 
 	switch (pf) {
 	case AF_INET:
 		pha.pa_func = ipfw_check_packet;
 		pha.pa_type = PFIL_TYPE_IP4;
 		pha.pa_rulname = "default";
 		h = &V_ipfw_inet_hook;
 		pla.pa_head = V_inet_pfil_head;
 		break;
 #ifdef INET6
 	case AF_INET6:
 		pha.pa_func = ipfw_check_packet;
 		pha.pa_type = PFIL_TYPE_IP6;
 		pha.pa_rulname = "default6";
 		h = &V_ipfw_inet6_hook;
 		pla.pa_head = V_inet6_pfil_head;
 		break;
 #endif
 	case AF_LINK:
 		pha.pa_func = ipfw_check_frame;
 		pha.pa_type = PFIL_TYPE_ETHERNET;
 		pha.pa_rulname = "default-link";
 		h = &V_ipfw_link_hook;
 		pla.pa_head = V_link_pfil_head;
 		break;
 	}
 
 	if (onoff) {
 		*h = pfil_add_hook(&pha);
 		pla.pa_hook = *h;
 		(void)pfil_link(&pla);
 	} else
 		if (*h != NULL)
 			pfil_remove_hook(*h);
 
 	return 0;
 }
 
 int
 ipfw_attach_hooks(int arg)
 {
 	int error = 0;
 
 	if (arg == 0) /* detach */
 		ipfw_hook(0, AF_INET);
 	else if (V_fw_enable && ipfw_hook(1, AF_INET) != 0) {
                 error = ENOENT; /* see ip_fw_pfil.c::ipfw_hook() */
                 printf("ipfw_hook() error\n");
         }
 #ifdef INET6
 	if (arg == 0) /* detach */
 		ipfw_hook(0, AF_INET6);
 	else if (V_fw6_enable && ipfw_hook(1, AF_INET6) != 0) {
                 error = ENOENT;
                 printf("ipfw6_hook() error\n");
         }
 #endif
 	if (arg == 0) /* detach */
 		ipfw_hook(0, AF_LINK);
 	else if (V_fwlink_enable && ipfw_hook(1, AF_LINK) != 0) {
                 error = ENOENT;
                 printf("ipfw_link_hook() error\n");
         }
 	return error;
 }
 
 int
 ipfw_chg_hook(SYSCTL_HANDLER_ARGS)
 {
 	int newval;
 	int error;
 	int af;
 
 	if (arg1 == &V_fw_enable)
 		af = AF_INET;
 #ifdef INET6
 	else if (arg1 == &V_fw6_enable)
 		af = AF_INET6;
 #endif
 	else if (arg1 == &V_fwlink_enable)
 		af = AF_LINK;
 	else 
 		return (EINVAL);
 
 	newval = *(int *)arg1;
 	/* Handle sysctl change */
 	error = sysctl_handle_int(oidp, &newval, 0, req);
 
 	if (error)
 		return (error);
 
 	/* Formalize new value */
 	newval = (newval) ? 1 : 0;
 
 	if (*(int *)arg1 == newval)
 		return (0);
 
 	error = ipfw_hook(newval, af);
 	if (error)
 		return (error);
 	*(int *)arg1 = newval;
 
 	return (0);
 }
 /* end of file */
Index: head/sys/netpfil/ipfw/ip_fw_private.h
===================================================================
--- head/sys/netpfil/ipfw/ip_fw_private.h	(revision 345164)
+++ head/sys/netpfil/ipfw/ip_fw_private.h	(revision 345165)
@@ -1,845 +1,823 @@
 /*-
  * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
  *
  * Copyright (c) 2002-2009 Luigi Rizzo, Universita` di Pisa
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  * $FreeBSD$
  */
 
 #ifndef _IPFW2_PRIVATE_H
 #define _IPFW2_PRIVATE_H
 
 /*
  * Internal constants and data structures used by ipfw components
  * and not meant to be exported outside the kernel.
  */
 
 #ifdef _KERNEL
 
 /*
  * For platforms that do not have SYSCTL support, we wrap the
  * SYSCTL_* into a function (one per file) to collect the values
  * into an array at module initialization. The wrapping macros,
  * SYSBEGIN() and SYSEND, are empty in the default case.
  */
 #ifndef SYSBEGIN
 #define SYSBEGIN(x)
 #endif
 #ifndef SYSEND
 #define SYSEND
 #endif
 
 /* Return values from ipfw_chk() */
 enum {
 	IP_FW_PASS = 0,
 	IP_FW_DENY,
 	IP_FW_DIVERT,
 	IP_FW_TEE,
 	IP_FW_DUMMYNET,
 	IP_FW_NETGRAPH,
 	IP_FW_NGTEE,
 	IP_FW_NAT,
 	IP_FW_REASS,
 	IP_FW_NAT64,
 };
 
 /*
  * Structure for collecting parameters to dummynet for ip6_output forwarding
  */
 struct _ip6dn_args {
        struct ip6_pktopts *opt_or;
        int flags_or;
        struct ip6_moptions *im6o_or;
        struct ifnet *origifp_or;
        struct ifnet *ifp_or;
        struct sockaddr_in6 dst_or;
        u_long mtu_or;
 };
 
 
 /*
  * Arguments for calling ipfw_chk() and dummynet_io(). We put them
  * all into a structure because this way it is easier and more
  * efficient to pass variables around and extend the interface.
  */
 struct ip_fw_args {
 	uint32_t		flags;
 #define	IPFW_ARGS_ETHER		0x00010000	/* valid ethernet header */
 #define	IPFW_ARGS_NH4		0x00020000	/* IPv4 next hop in hopstore */
 #define	IPFW_ARGS_NH6		0x00040000	/* IPv6 next hop in hopstore */
 #define	IPFW_ARGS_NH4PTR	0x00080000	/* IPv4 next hop in next_hop */
 #define	IPFW_ARGS_NH6PTR	0x00100000	/* IPv6 next hop in next_hop6 */
 #define	IPFW_ARGS_REF		0x00200000	/* valid ipfw_rule_ref	*/
 #define	IPFW_ARGS_IN		0x00400000	/* called on input */
 #define	IPFW_ARGS_OUT		0x00800000	/* called on output */
 #define	IPFW_ARGS_IP4		0x01000000	/* belongs to v4 ISR */
 #define	IPFW_ARGS_IP6		0x02000000	/* belongs to v6 ISR */
 #define	IPFW_ARGS_DROP		0x04000000	/* drop it (dummynet) */
 #define	IPFW_ARGS_LENMASK	0x0000ffff	/* length of data in *mem */
 #define	IPFW_ARGS_LENGTH(f)	((f) & IPFW_ARGS_LENMASK)
 	/*
 	 * On return, it points to the matching rule.
 	 * On entry, rule.slot > 0 means the info is valid and
 	 * contains the starting rule for an ipfw search.
 	 * If chain_id == chain->id && slot >0 then jump to that slot.
 	 * Otherwise, we locate the first rule >= rulenum:rule_id
 	 */
 	struct ipfw_rule_ref	rule;	/* match/restart info		*/
 
 	struct ifnet		*ifp;	/* input/output interface	*/
 	struct inpcb		*inp;
 	union {
 		/*
 		 * We don't support forwarding on layer2, thus we can
 		 * keep eh pointer in this union.
 		 * next_hop[6] pointers can be used to point to next hop
 		 * stored in rule's opcode to avoid copying into hopstore.
 		 * Also, it is expected that all 0x1-0x10 flags are mutually
 		 * exclusive.
 		 */
 		struct ether_header	*eh;	/* for bridged packets	*/
 		struct sockaddr_in	*next_hop;
 		struct sockaddr_in6	*next_hop6;
 		/* ipfw next hop storage */
 		struct sockaddr_in	hopstore;
 		struct ip_fw_nh6 {
 			struct in6_addr sin6_addr;
 			uint32_t	sin6_scope_id;
 			uint16_t	sin6_port;
 		} hopstore6;
 	};
 
 	struct mbuf		*m;	/* the mbuf chain		*/
 	struct ipfw_flow_id	f_id;	/* grabbed from IP header	*/
 };
 
 MALLOC_DECLARE(M_IPFW);
 
-/*
- * Hooks sometime need to know the direction of the packet
- * (divert, dummynet, netgraph, ...)
- * We use a generic definition here, with bit0-1 indicating the
- * direction, bit 2 indicating layer2 or 3, bit 3-4 indicating the
- * specific protocol
- * indicating the protocol (if necessary)
- */
-enum {
-	DIR_MASK =	0x3,
-	DIR_OUT =	0,
-	DIR_IN =	1,
-	DIR_FWD =	2,
-	DIR_DROP =	3,
-	PROTO_LAYER2 =	0x4, /* set for layer 2 */
-	/* PROTO_DEFAULT = 0, */
-	PROTO_IPV4 =	0x08,
-	PROTO_IPV6 =	0x10,
-	PROTO_IFB =	0x0c, /* layer2 + ifbridge */
-   /*	PROTO_OLDBDG =	0x14, unused, old bridge */
-};
-
 /* wrapper for freeing a packet, in case we need to do more work */
 #ifndef FREE_PKT
 #if defined(__linux__) || defined(_WIN32)
 #define FREE_PKT(m)	netisr_dispatch(-1, m)
 #else
 #define FREE_PKT(m)	m_freem(m)
 #endif
 #endif /* !FREE_PKT */
 
 /*
  * Function definitions.
  */
 int ipfw_chk(struct ip_fw_args *args);
 struct mbuf *ipfw_send_pkt(struct mbuf *, struct ipfw_flow_id *,
     u_int32_t, u_int32_t, int);
 
 /* attach (arg = 1) or detach (arg = 0) hooks */
 int ipfw_attach_hooks(int);
 #ifdef NOTYET
 void ipfw_nat_destroy(void);
 #endif
 
 /* In ip_fw_log.c */
 struct ip;
 struct ip_fw_chain;
 
 void ipfw_bpf_init(int);
 void ipfw_bpf_uninit(int);
 void ipfw_bpf_mtap2(void *, u_int, struct mbuf *);
 void ipfw_log(struct ip_fw_chain *chain, struct ip_fw *f, u_int hlen,
     struct ip_fw_args *args, struct mbuf *m,
     u_short offset, uint32_t tablearg, struct ip *ip);
 VNET_DECLARE(u_int64_t, norule_counter);
 #define	V_norule_counter	VNET(norule_counter)
 VNET_DECLARE(int, verbose_limit);
 #define	V_verbose_limit		VNET(verbose_limit)
 
 /* In ip_fw_dynamic.c */
 struct sockopt_data;
 
 enum { /* result for matching dynamic rules */
 	MATCH_REVERSE = 0,
 	MATCH_FORWARD,
 	MATCH_NONE,
 	MATCH_UNKNOWN,
 };
 
 /*
  * Macro to determine that we need to do or redo dynamic state lookup.
  * direction == MATCH_UNKNOWN means that this is first lookup, then we need
  * to do lookup.
  * Otherwise check the state name, if previous lookup was for "any" name,
  * this means there is no state with specific name. Thus no need to do
  * lookup. If previous name was not "any", redo lookup for specific name.
  */
 #define	DYN_LOOKUP_NEEDED(p, cmd)	\
     ((p)->direction == MATCH_UNKNOWN ||	\
 	((p)->kidx != 0 && (p)->kidx != (cmd)->arg1))
 #define	DYN_INFO_INIT(p)	do {	\
 	(p)->direction = MATCH_UNKNOWN;	\
 	(p)->kidx = 0;			\
 } while (0)
 struct ipfw_dyn_info {
 	uint16_t	direction;	/* match direction */
 	uint16_t	kidx;		/* state name kidx */
 	uint32_t	hashval;	/* hash value */
 	uint32_t	version;	/* bucket version */
 	uint32_t	f_pos;
 };
 int ipfw_dyn_install_state(struct ip_fw_chain *chain, struct ip_fw *rule,
     const ipfw_insn_limit *cmd, const struct ip_fw_args *args,
     const void *ulp, int pktlen, struct ipfw_dyn_info *info,
     uint32_t tablearg);
 struct ip_fw *ipfw_dyn_lookup_state(const struct ip_fw_args *args,
     const void *ulp, int pktlen, const ipfw_insn *cmd,
     struct ipfw_dyn_info *info);
 
 int ipfw_is_dyn_rule(struct ip_fw *rule);
 void ipfw_expire_dyn_states(struct ip_fw_chain *, ipfw_range_tlv *);
 void ipfw_get_dynamic(struct ip_fw_chain *chain, char **bp, const char *ep);
 int ipfw_dump_states(struct ip_fw_chain *chain, struct sockopt_data *sd);
 
 void ipfw_dyn_init(struct ip_fw_chain *);	/* per-vnet initialization */
 void ipfw_dyn_uninit(int);	/* per-vnet deinitialization */
 int ipfw_dyn_len(void);
 uint32_t ipfw_dyn_get_count(uint32_t *, int *);
 void ipfw_dyn_reset_eaction(struct ip_fw_chain *ch, uint16_t eaction_id,
     uint16_t default_id, uint16_t instance_id);
 
 /* common variables */
 VNET_DECLARE(int, fw_one_pass);
 #define	V_fw_one_pass		VNET(fw_one_pass)
 
 VNET_DECLARE(int, fw_verbose);
 #define	V_fw_verbose		VNET(fw_verbose)
 
 VNET_DECLARE(struct ip_fw_chain, layer3_chain);
 #define	V_layer3_chain		VNET(layer3_chain)
 
 VNET_DECLARE(int, ipfw_vnet_ready);
 #define	V_ipfw_vnet_ready	VNET(ipfw_vnet_ready)
 
 VNET_DECLARE(u_int32_t, set_disable);
 #define	V_set_disable		VNET(set_disable)
 
 VNET_DECLARE(int, autoinc_step);
 #define V_autoinc_step		VNET(autoinc_step)
 
 VNET_DECLARE(unsigned int, fw_tables_max);
 #define V_fw_tables_max		VNET(fw_tables_max)
 
 VNET_DECLARE(unsigned int, fw_tables_sets);
 #define V_fw_tables_sets	VNET(fw_tables_sets)
 
 struct tables_config;
 
 #ifdef _KERNEL
 /*
  * Here we have the structure representing an ipfw rule.
  *
  * It starts with a general area 
  * followed by an array of one or more instructions, which the code
  * accesses as an array of 32-bit values.
  *
  * Given a rule pointer  r:
  *
  *  r->cmd		is the start of the first instruction.
  *  ACTION_PTR(r)	is the start of the first action (things to do
  *			once a rule matched).
  */
 
 struct ip_fw {
 	uint16_t	act_ofs;	/* offset of action in 32-bit units */
 	uint16_t	cmd_len;	/* # of 32-bit words in cmd	*/
 	uint16_t	rulenum;	/* rule number			*/
 	uint8_t		set;		/* rule set (0..31)		*/
 	uint8_t		flags;		/* currently unused		*/
 	counter_u64_t	cntr;		/* Pointer to rule counters	*/
 	uint32_t	timestamp;	/* tv_sec of last match		*/
 	uint32_t	id;		/* rule id			*/
 	uint32_t	cached_id;	/* used by jump_fast		*/
 	uint32_t	cached_pos;	/* used by jump_fast		*/
 	uint32_t	refcnt;		/* number of references		*/
 
 	struct ip_fw	*next;		/* linked list of deleted rules */
 	ipfw_insn	cmd[1];		/* storage for commands		*/
 };
 
 #define	IPFW_RULE_CNTR_SIZE	(2 * sizeof(uint64_t))
 
 #endif
 
 struct ip_fw_chain {
 	struct ip_fw	**map;		/* array of rule ptrs to ease lookup */
 	uint32_t	id;		/* ruleset id */
 	int		n_rules;	/* number of static rules */
 	void		*tablestate;	/* runtime table info */
 	void		*valuestate;	/* runtime table value info */
 	int		*idxmap;	/* skipto array of rules */
 	void		**srvstate;	/* runtime service mappings */
 #if defined( __linux__ ) || defined( _WIN32 )
 	spinlock_t rwmtx;
 #else
 	struct rmlock	rwmtx;
 #endif
 	int		static_len;	/* total len of static rules (v0) */
 	uint32_t	gencnt;		/* NAT generation count */
 	LIST_HEAD(nat_list, cfg_nat) nat;       /* list of nat entries */
 	struct ip_fw	*default_rule;
 	struct tables_config *tblcfg;	/* tables module data */
 	void		*ifcfg;		/* interface module data */
 	int		*idxmap_back;	/* standby skipto array of rules */
 	struct namedobj_instance	*srvmap; /* cfg name->number mappings */
 #if defined( __linux__ ) || defined( _WIN32 )
 	spinlock_t uh_lock;
 #else
 	struct rwlock	uh_lock;	/* lock for upper half */
 #endif
 };
 
 /* 64-byte structure representing multi-field table value */
 struct table_value {
 	uint32_t	tag;		/* O_TAG/O_TAGGED */
 	uint32_t	pipe;		/* O_PIPE/O_QUEUE */
 	uint16_t	divert;		/* O_DIVERT/O_TEE */
 	uint16_t	skipto;		/* skipto, CALLRET */
 	uint32_t	netgraph;	/* O_NETGRAPH/O_NGTEE */
 	uint32_t	fib;		/* O_SETFIB */
 	uint32_t	nat;		/* O_NAT */
 	uint32_t	nh4;
 	uint8_t		dscp;
 	uint8_t		spare0;
 	uint16_t	spare1;
 	/* -- 32 bytes -- */
 	struct in6_addr	nh6;
 	uint32_t	limit;		/* O_LIMIT */
 	uint32_t	zoneid;		/* scope zone id for nh6 */
 	uint64_t	refcnt;		/* Number of references */
 };
 
 
 struct named_object {
 	TAILQ_ENTRY(named_object)	nn_next;	/* namehash */
 	TAILQ_ENTRY(named_object)	nv_next;	/* valuehash */
 	char			*name;	/* object name */
 	uint16_t		etlv;	/* Export TLV id */
 	uint8_t			subtype;/* object subtype within class */
 	uint8_t			set;	/* set object belongs to */
 	uint16_t		kidx;	/* object kernel index */
 	uint16_t		spare;
 	uint32_t		ocnt;	/* object counter for internal use */
 	uint32_t		refcnt;	/* number of references */
 };
 TAILQ_HEAD(namedobjects_head, named_object);
 
 struct sockopt;	/* used by tcp_var.h */
 struct sockopt_data {
 	caddr_t		kbuf;		/* allocated buffer */
 	size_t		ksize;		/* given buffer size */
 	size_t		koff;		/* data already used */
 	size_t		kavail;		/* number of bytes available */
 	size_t		ktotal;		/* total bytes pushed */
 	struct sockopt	*sopt;		/* socket data */
 	caddr_t		sopt_val;	/* sopt user buffer */
 	size_t		valsize;	/* original data size */
 };
 
 struct ipfw_ifc;
 
 typedef void (ipfw_ifc_cb)(struct ip_fw_chain *ch, void *cbdata,
     uint16_t ifindex);
 
 struct ipfw_iface {
 	struct named_object	no;
 	char ifname[64];
 	int resolved;
 	uint16_t ifindex;
 	uint16_t spare;
 	uint64_t gencnt;
 	TAILQ_HEAD(, ipfw_ifc)	consumers;
 };
 
 struct ipfw_ifc {
 	TAILQ_ENTRY(ipfw_ifc)	next;
 	struct ipfw_iface	*iface;
 	ipfw_ifc_cb		*cb;
 	void			*cbdata;
 };
 
 /* Macro for working with various counters */
 #define	IPFW_INC_RULE_COUNTER(_cntr, _bytes)	do {	\
 	counter_u64_add((_cntr)->cntr, 1);		\
 	counter_u64_add((_cntr)->cntr + 1, _bytes);	\
 	if ((_cntr)->timestamp != time_uptime)		\
 		(_cntr)->timestamp = time_uptime;	\
 	} while (0)
 
 #define	IPFW_INC_DYN_COUNTER(_cntr, _bytes)	do {		\
 	(_cntr)->pcnt++;				\
 	(_cntr)->bcnt += _bytes;			\
 	} while (0)
 
 #define	IPFW_ZERO_RULE_COUNTER(_cntr) do {		\
 	counter_u64_zero((_cntr)->cntr);		\
 	counter_u64_zero((_cntr)->cntr + 1);		\
 	(_cntr)->timestamp = 0;				\
 	} while (0)
 
 #define	IPFW_ZERO_DYN_COUNTER(_cntr) do {		\
 	(_cntr)->pcnt = 0;				\
 	(_cntr)->bcnt = 0;				\
 	} while (0)
 
 #define	TARG_VAL(ch, k, f)	((struct table_value *)((ch)->valuestate))[k].f
 #define	IP_FW_ARG_TABLEARG(ch, a, f)	\
 	(((a) == IP_FW_TARG) ? TARG_VAL(ch, tablearg, f) : (a))
 /*
  * The lock is heavily used by ip_fw2.c (the main file) and ip_fw_nat.c
  * so the variable and the macros must be here.
  */
 
 #if defined( __linux__ ) || defined( _WIN32 )
 #define	IPFW_LOCK_INIT(_chain) do {			\
 	rw_init(&(_chain)->rwmtx, "IPFW static rules");	\
 	rw_init(&(_chain)->uh_lock, "IPFW UH lock");	\
 	} while (0)
 
 #define	IPFW_LOCK_DESTROY(_chain) do {			\
 	rw_destroy(&(_chain)->rwmtx);			\
 	rw_destroy(&(_chain)->uh_lock);			\
 	} while (0)
 
 #define	IPFW_RLOCK_ASSERT(_chain)	rw_assert(&(_chain)->rwmtx, RA_RLOCKED)
 #define	IPFW_WLOCK_ASSERT(_chain)	rw_assert(&(_chain)->rwmtx, RA_WLOCKED)
 
 #define	IPFW_RLOCK_TRACKER
 #define	IPFW_RLOCK(p)			rw_rlock(&(p)->rwmtx)
 #define	IPFW_RUNLOCK(p)			rw_runlock(&(p)->rwmtx)
 #define	IPFW_WLOCK(p)			rw_wlock(&(p)->rwmtx)
 #define	IPFW_WUNLOCK(p)			rw_wunlock(&(p)->rwmtx)
 #define	IPFW_PF_RLOCK(p)		IPFW_RLOCK(p)
 #define	IPFW_PF_RUNLOCK(p)		IPFW_RUNLOCK(p)
 #else /* FreeBSD */
 #define	IPFW_LOCK_INIT(_chain) do {			\
 	rm_init_flags(&(_chain)->rwmtx, "IPFW static rules", RM_RECURSE); \
 	rw_init(&(_chain)->uh_lock, "IPFW UH lock");	\
 	} while (0)
 
 #define	IPFW_LOCK_DESTROY(_chain) do {			\
 	rm_destroy(&(_chain)->rwmtx);			\
 	rw_destroy(&(_chain)->uh_lock);			\
 	} while (0)
 
 #define	IPFW_RLOCK_ASSERT(_chain)	rm_assert(&(_chain)->rwmtx, RA_RLOCKED)
 #define	IPFW_WLOCK_ASSERT(_chain)	rm_assert(&(_chain)->rwmtx, RA_WLOCKED)
 
 #define	IPFW_RLOCK_TRACKER		struct rm_priotracker _tracker
 #define	IPFW_RLOCK(p)			rm_rlock(&(p)->rwmtx, &_tracker)
 #define	IPFW_RUNLOCK(p)			rm_runlock(&(p)->rwmtx, &_tracker)
 #define	IPFW_WLOCK(p)			rm_wlock(&(p)->rwmtx)
 #define	IPFW_WUNLOCK(p)			rm_wunlock(&(p)->rwmtx)
 #define	IPFW_PF_RLOCK(p)		IPFW_RLOCK(p)
 #define	IPFW_PF_RUNLOCK(p)		IPFW_RUNLOCK(p)
 #endif
 
 #define	IPFW_UH_RLOCK_ASSERT(_chain)	rw_assert(&(_chain)->uh_lock, RA_RLOCKED)
 #define	IPFW_UH_WLOCK_ASSERT(_chain)	rw_assert(&(_chain)->uh_lock, RA_WLOCKED)
 #define	IPFW_UH_UNLOCK_ASSERT(_chain)	rw_assert(&(_chain)->uh_lock, RA_UNLOCKED)
 
 #define IPFW_UH_RLOCK(p) rw_rlock(&(p)->uh_lock)
 #define IPFW_UH_RUNLOCK(p) rw_runlock(&(p)->uh_lock)
 #define IPFW_UH_WLOCK(p) rw_wlock(&(p)->uh_lock)
 #define IPFW_UH_WUNLOCK(p) rw_wunlock(&(p)->uh_lock)
 
 struct obj_idx {
 	uint16_t	uidx;	/* internal index supplied by userland */
 	uint16_t	kidx;	/* kernel object index */
 	uint16_t	off;	/* tlv offset from rule end in 4-byte words */
 	uint8_t		spare;
 	uint8_t		type;	/* object type within its category */
 };
 
 struct rule_check_info {
 	uint16_t	flags;		/* rule-specific check flags */
 	uint16_t	object_opcodes;	/* num of opcodes referencing objects */
 	uint16_t	urule_numoff;	/* offset of rulenum in bytes */
 	uint8_t		version;	/* rule version */
 	uint8_t		spare;
 	ipfw_obj_ctlv	*ctlv;		/* name TLV containter */
 	struct ip_fw	*krule;		/* resulting rule pointer */
 	caddr_t		urule;		/* original rule pointer */
 	struct obj_idx	obuf[8];	/* table references storage */
 };
 
 /* Legacy interface support */
 /*
  * FreeBSD 8 export rule format
  */
 struct ip_fw_rule0 {
 	struct ip_fw	*x_next;	/* linked list of rules		*/
 	struct ip_fw	*next_rule;	/* ptr to next [skipto] rule	*/
 	/* 'next_rule' is used to pass up 'set_disable' status		*/
 
 	uint16_t	act_ofs;	/* offset of action in 32-bit units */
 	uint16_t	cmd_len;	/* # of 32-bit words in cmd	*/
 	uint16_t	rulenum;	/* rule number			*/
 	uint8_t		set;		/* rule set (0..31)		*/
 	uint8_t		_pad;		/* padding			*/
 	uint32_t	id;		/* rule id */
 
 	/* These fields are present in all rules.			*/
 	uint64_t	pcnt;		/* Packet counter		*/
 	uint64_t	bcnt;		/* Byte counter			*/
 	uint32_t	timestamp;	/* tv_sec of last match		*/
 
 	ipfw_insn	cmd[1];		/* storage for commands		*/
 };
 
 struct ip_fw_bcounter0 {
 	uint64_t	pcnt;		/* Packet counter		*/
 	uint64_t	bcnt;		/* Byte counter			*/
 	uint32_t	timestamp;	/* tv_sec of last match		*/
 };
 
 /* Kernel rule length */
 /*
  * RULE _K_ SIZE _V_ ->
  * get kernel size from userland rool version _V_.
  * RULE _U_ SIZE _V_ ->
  * get user size version _V_ from kernel rule
  * RULESIZE _V_ ->
  * get user size rule length 
  */
 /* FreeBSD8 <> current kernel format */
 #define	RULEUSIZE0(r)	(sizeof(struct ip_fw_rule0) + (r)->cmd_len * 4 - 4)
 #define	RULEKSIZE0(r)	roundup2((sizeof(struct ip_fw) + (r)->cmd_len*4 - 4), 8)
 /* FreeBSD11 <> current kernel format */
 #define	RULEUSIZE1(r)	(roundup2(sizeof(struct ip_fw_rule) + \
     (r)->cmd_len * 4 - 4, 8))
 #define	RULEKSIZE1(r)	roundup2((sizeof(struct ip_fw) + (r)->cmd_len*4 - 4), 8)
 
 /*
  * Tables/Objects index rewriting code
  */
 
 /* Default and maximum number of ipfw tables/objects. */
 #define	IPFW_TABLES_MAX		65536
 #define	IPFW_TABLES_DEFAULT	128
 #define	IPFW_OBJECTS_MAX	65536
 #define	IPFW_OBJECTS_DEFAULT	1024
 
 #define	CHAIN_TO_SRV(ch)	((ch)->srvmap)
 #define	SRV_OBJECT(ch, idx)	((ch)->srvstate[(idx)])
 
 struct tid_info {
 	uint32_t	set;	/* table set */
 	uint16_t	uidx;	/* table index */
 	uint8_t		type;	/* table type */
 	uint8_t		atype;
 	uint8_t		spare;
 	int		tlen;	/* Total TLV size block */
 	void		*tlvs;	/* Pointer to first TLV */
 };
 
 /*
  * Classifier callback. Checks if @cmd opcode contains kernel object reference.
  * If true, returns its index and type.
  * Returns 0 if match is found, 1 overwise.
  */
 typedef int (ipfw_obj_rw_cl)(ipfw_insn *cmd, uint16_t *puidx, uint8_t *ptype);
 /*
  * Updater callback. Sets kernel object reference index to @puidx
  */
 typedef void (ipfw_obj_rw_upd)(ipfw_insn *cmd, uint16_t puidx);
 /*
  * Finder callback. Tries to find named object by name (specified via @ti).
  * Stores found named object pointer in @pno.
  * If object was not found, NULL is stored.
  *
  * Return 0 if input data was valid.
  */
 typedef int (ipfw_obj_fname_cb)(struct ip_fw_chain *ch,
     struct tid_info *ti, struct named_object **pno);
 /*
  * Another finder callback. Tries to findex named object by kernel index.
  *
  * Returns pointer to named object or NULL.
  */
 typedef struct named_object *(ipfw_obj_fidx_cb)(struct ip_fw_chain *ch,
     uint16_t kidx);
 /*
  * Object creator callback. Tries to create object specified by @ti.
  * Stores newly-allocated object index in @pkidx.
  *
  * Returns 0 on success.
  */
 typedef int (ipfw_obj_create_cb)(struct ip_fw_chain *ch, struct tid_info *ti,
     uint16_t *pkidx);
 /*
  * Object destroy callback. Intended to free resources allocated by
  * create_object callback.
  */
 typedef void (ipfw_obj_destroy_cb)(struct ip_fw_chain *ch,
     struct named_object *no);
 /*
  * Sets handler callback. Handles moving and swaping set of named object.
  *  SWAP_ALL moves all named objects from set `set' to `new_set' and vise versa;
  *  TEST_ALL checks that there aren't any named object with conflicting names;
  *  MOVE_ALL moves all named objects from set `set' to `new_set';
  *  COUNT_ONE used to count number of references used by object with kidx `set';
  *  TEST_ONE checks that named object with kidx `set' can be moved to `new_set`;
  *  MOVE_ONE moves named object with kidx `set' to set `new_set'.
  */
 enum ipfw_sets_cmd {
 	SWAP_ALL = 0, TEST_ALL, MOVE_ALL, COUNT_ONE, TEST_ONE, MOVE_ONE
 };
 typedef int (ipfw_obj_sets_cb)(struct ip_fw_chain *ch,
     uint16_t set, uint8_t new_set, enum ipfw_sets_cmd cmd);
 
 
 struct opcode_obj_rewrite {
 	uint32_t		opcode;		/* Opcode to act upon */
 	uint32_t		etlv;		/* Relevant export TLV id  */
 	ipfw_obj_rw_cl		*classifier;	/* Check if rewrite is needed */
 	ipfw_obj_rw_upd		*update;	/* update cmd with new value */
 	ipfw_obj_fname_cb	*find_byname;	/* Find named object by name */
 	ipfw_obj_fidx_cb	*find_bykidx;	/* Find named object by kidx */
 	ipfw_obj_create_cb	*create_object;	/* Create named object */
 	ipfw_obj_destroy_cb	*destroy_object;/* Destroy named object */
 	ipfw_obj_sets_cb	*manage_sets;	/* Swap or move sets */
 };
 
 #define	IPFW_ADD_OBJ_REWRITER(f, c)	do {	\
 	if ((f) != 0) 				\
 		ipfw_add_obj_rewriter(c,	\
 		    sizeof(c) / sizeof(c[0]));	\
 	} while(0)
 #define	IPFW_DEL_OBJ_REWRITER(l, c)	do {	\
 	if ((l) != 0) 				\
 		ipfw_del_obj_rewriter(c,	\
 		    sizeof(c) / sizeof(c[0]));	\
 	} while(0)
 
 /* In ip_fw_iface.c */
 int ipfw_iface_init(void);
 void ipfw_iface_destroy(void);
 void vnet_ipfw_iface_destroy(struct ip_fw_chain *ch);
 int ipfw_iface_ref(struct ip_fw_chain *ch, char *name,
     struct ipfw_ifc *ic);
 void ipfw_iface_unref(struct ip_fw_chain *ch, struct ipfw_ifc *ic);
 void ipfw_iface_add_notify(struct ip_fw_chain *ch, struct ipfw_ifc *ic);
 void ipfw_iface_del_notify(struct ip_fw_chain *ch, struct ipfw_ifc *ic);
 
 /* In ip_fw_sockopt.c */
 void ipfw_init_skipto_cache(struct ip_fw_chain *chain);
 void ipfw_destroy_skipto_cache(struct ip_fw_chain *chain);
 int ipfw_find_rule(struct ip_fw_chain *chain, uint32_t key, uint32_t id);
 int ipfw_ctl3(struct sockopt *sopt);
 int ipfw_add_protected_rule(struct ip_fw_chain *chain, struct ip_fw *rule,
     int locked);
 void ipfw_reap_add(struct ip_fw_chain *chain, struct ip_fw **head,
     struct ip_fw *rule);
 void ipfw_reap_rules(struct ip_fw *head);
 void ipfw_init_counters(void);
 void ipfw_destroy_counters(void);
 struct ip_fw *ipfw_alloc_rule(struct ip_fw_chain *chain, size_t rulesize);
 void ipfw_free_rule(struct ip_fw *rule);
 int ipfw_match_range(struct ip_fw *rule, ipfw_range_tlv *rt);
 int ipfw_mark_object_kidx(uint32_t *bmask, uint16_t etlv, uint16_t kidx);
 
 typedef int (sopt_handler_f)(struct ip_fw_chain *ch,
     ip_fw3_opheader *op3, struct sockopt_data *sd);
 struct ipfw_sopt_handler {
 	uint16_t	opcode;
 	uint8_t		version;
 	uint8_t		dir;
 	sopt_handler_f	*handler;
 	uint64_t	refcnt;
 };
 #define	HDIR_SET	0x01	/* Handler is used to set some data */
 #define	HDIR_GET	0x02	/* Handler is used to retrieve data */
 #define	HDIR_BOTH	HDIR_GET|HDIR_SET
 
 void ipfw_init_sopt_handler(void);
 void ipfw_destroy_sopt_handler(void);
 void ipfw_add_sopt_handler(struct ipfw_sopt_handler *sh, size_t count);
 int ipfw_del_sopt_handler(struct ipfw_sopt_handler *sh, size_t count);
 caddr_t ipfw_get_sopt_space(struct sockopt_data *sd, size_t needed);
 caddr_t ipfw_get_sopt_header(struct sockopt_data *sd, size_t needed);
 #define	IPFW_ADD_SOPT_HANDLER(f, c)	do {	\
 	if ((f) != 0) 				\
 		ipfw_add_sopt_handler(c,	\
 		    sizeof(c) / sizeof(c[0]));	\
 	} while(0)
 #define	IPFW_DEL_SOPT_HANDLER(l, c)	do {	\
 	if ((l) != 0) 				\
 		ipfw_del_sopt_handler(c,	\
 		    sizeof(c) / sizeof(c[0]));	\
 	} while(0)
 
 struct namedobj_instance;
 typedef int (objhash_cb_t)(struct namedobj_instance *ni, struct named_object *,
     void *arg);
 typedef uint32_t (objhash_hash_f)(struct namedobj_instance *ni, const void *key,
     uint32_t kopt);
 typedef int (objhash_cmp_f)(struct named_object *no, const void *key,
     uint32_t kopt);
 struct namedobj_instance *ipfw_objhash_create(uint32_t items);
 void ipfw_objhash_destroy(struct namedobj_instance *);
 void ipfw_objhash_bitmap_alloc(uint32_t items, void **idx, int *pblocks);
 void ipfw_objhash_bitmap_merge(struct namedobj_instance *ni,
     void **idx, int *blocks);
 void ipfw_objhash_bitmap_swap(struct namedobj_instance *ni,
     void **idx, int *blocks);
 void ipfw_objhash_bitmap_free(void *idx, int blocks);
 void ipfw_objhash_set_hashf(struct namedobj_instance *ni, objhash_hash_f *f);
 struct named_object *ipfw_objhash_lookup_name(struct namedobj_instance *ni,
     uint32_t set, char *name);
 struct named_object *ipfw_objhash_lookup_name_type(struct namedobj_instance *ni,
     uint32_t set, uint32_t type, const char *name);
 struct named_object *ipfw_objhash_lookup_kidx(struct namedobj_instance *ni,
     uint16_t idx);
 int ipfw_objhash_same_name(struct namedobj_instance *ni, struct named_object *a,
     struct named_object *b);
 void ipfw_objhash_add(struct namedobj_instance *ni, struct named_object *no);
 void ipfw_objhash_del(struct namedobj_instance *ni, struct named_object *no);
 uint32_t ipfw_objhash_count(struct namedobj_instance *ni);
 uint32_t ipfw_objhash_count_type(struct namedobj_instance *ni, uint16_t type);
 int ipfw_objhash_foreach(struct namedobj_instance *ni, objhash_cb_t *f,
     void *arg);
 int ipfw_objhash_foreach_type(struct namedobj_instance *ni, objhash_cb_t *f,
     void *arg, uint16_t type);
 int ipfw_objhash_free_idx(struct namedobj_instance *ni, uint16_t idx);
 int ipfw_objhash_alloc_idx(void *n, uint16_t *pidx);
 void ipfw_objhash_set_funcs(struct namedobj_instance *ni,
     objhash_hash_f *hash_f, objhash_cmp_f *cmp_f);
 int ipfw_objhash_find_type(struct namedobj_instance *ni, struct tid_info *ti,
     uint32_t etlv, struct named_object **pno);
 void ipfw_export_obj_ntlv(struct named_object *no, ipfw_obj_ntlv *ntlv);
 ipfw_obj_ntlv *ipfw_find_name_tlv_type(void *tlvs, int len, uint16_t uidx,
     uint32_t etlv);
 void ipfw_init_obj_rewriter(void);
 void ipfw_destroy_obj_rewriter(void);
 void ipfw_add_obj_rewriter(struct opcode_obj_rewrite *rw, size_t count);
 int ipfw_del_obj_rewriter(struct opcode_obj_rewrite *rw, size_t count);
 
 int create_objects_compat(struct ip_fw_chain *ch, ipfw_insn *cmd,
     struct obj_idx *oib, struct obj_idx *pidx, struct tid_info *ti);
 void update_opcode_kidx(ipfw_insn *cmd, uint16_t idx);
 int classify_opcode_kidx(ipfw_insn *cmd, uint16_t *puidx);
 void ipfw_init_srv(struct ip_fw_chain *ch);
 void ipfw_destroy_srv(struct ip_fw_chain *ch);
 int ipfw_check_object_name_generic(const char *name);
 int ipfw_obj_manage_sets(struct namedobj_instance *ni, uint16_t type,
     uint16_t set, uint8_t new_set, enum ipfw_sets_cmd cmd);
 
 /* In ip_fw_eaction.c */
 typedef int (ipfw_eaction_t)(struct ip_fw_chain *ch, struct ip_fw_args *args,
     ipfw_insn *cmd, int *done);
 int ipfw_eaction_init(struct ip_fw_chain *ch, int first);
 void ipfw_eaction_uninit(struct ip_fw_chain *ch, int last);
 
 uint16_t ipfw_add_eaction(struct ip_fw_chain *ch, ipfw_eaction_t handler,
     const char *name);
 int ipfw_del_eaction(struct ip_fw_chain *ch, uint16_t eaction_id);
 int ipfw_run_eaction(struct ip_fw_chain *ch, struct ip_fw_args *args,
     ipfw_insn *cmd, int *done);
 int ipfw_reset_eaction(struct ip_fw_chain *ch, struct ip_fw *rule,
     uint16_t eaction_id, uint16_t default_id, uint16_t instance_id);
 int ipfw_reset_eaction_instance(struct ip_fw_chain *ch, uint16_t eaction_id,
     uint16_t instance_id);
 
 /* In ip_fw_table.c */
 struct table_info;
 
 typedef int (table_lookup_t)(struct table_info *ti, void *key, uint32_t keylen,
     uint32_t *val);
 
 int ipfw_lookup_table(struct ip_fw_chain *ch, uint16_t tbl, uint16_t plen,
     void *paddr, uint32_t *val);
 struct named_object *ipfw_objhash_lookup_table_kidx(struct ip_fw_chain *ch,
     uint16_t kidx);
 int ipfw_ref_table(struct ip_fw_chain *ch, ipfw_obj_ntlv *ntlv, uint16_t *kidx);
 void ipfw_unref_table(struct ip_fw_chain *ch, uint16_t kidx);
 int ipfw_init_tables(struct ip_fw_chain *ch, int first);
 int ipfw_resize_tables(struct ip_fw_chain *ch, unsigned int ntables);
 int ipfw_switch_tables_namespace(struct ip_fw_chain *ch, unsigned int nsets);
 void ipfw_destroy_tables(struct ip_fw_chain *ch, int last);
 
 /* In ip_fw_nat.c -- XXX to be moved to ip_var.h */
 
 extern struct cfg_nat *(*lookup_nat_ptr)(struct nat_list *, int);
 
 typedef int ipfw_nat_t(struct ip_fw_args *, struct cfg_nat *, struct mbuf *);
 typedef int ipfw_nat_cfg_t(struct sockopt *);
 
 VNET_DECLARE(int, ipfw_nat_ready);
 #define	V_ipfw_nat_ready	VNET(ipfw_nat_ready)
 #define	IPFW_NAT_LOADED	(V_ipfw_nat_ready)
 
 extern ipfw_nat_t *ipfw_nat_ptr;
 extern ipfw_nat_cfg_t *ipfw_nat_cfg_ptr;
 extern ipfw_nat_cfg_t *ipfw_nat_del_ptr;
 extern ipfw_nat_cfg_t *ipfw_nat_get_cfg_ptr;
 extern ipfw_nat_cfg_t *ipfw_nat_get_log_ptr;
 
 /* Helper functions for IP checksum adjustment */
 static __inline uint16_t
 cksum_add(uint16_t sum, uint16_t a)
 {
 	uint16_t res;
 
 	res = sum + a;
 	return (res + (res < a));
 }
 
 static __inline uint16_t
 cksum_adjust(uint16_t oldsum, uint16_t old, uint16_t new)
 {
 
 	return (~cksum_add(cksum_add(~oldsum, ~old), new));
 }
 
 #endif /* _KERNEL */
 #endif /* _IPFW2_PRIVATE_H */