Index: stable/10/contrib/libpcap/grammar.y
===================================================================
--- stable/10/contrib/libpcap/grammar.y	(revision 263085)
+++ stable/10/contrib/libpcap/grammar.y	(revision 263086)
@@ -1,698 +1,698 @@
 %{
 /*
  * Copyright (c) 1988, 1989, 1990, 1991, 1992, 1993, 1994, 1995, 1996
  *	The Regents of the University of California.  All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that: (1) source code distributions
  * retain the above copyright notice and this paragraph in its entirety, (2)
  * distributions including binary code include the above copyright notice and
  * this paragraph in its entirety in the documentation or other materials
  * provided with the distribution, and (3) all advertising materials mentioning
  * features or use of this software display the following acknowledgement:
  * ``This product includes software developed by the University of California,
  * Lawrence Berkeley Laboratory and its contributors.'' Neither the name of
  * the University nor the names of its contributors may be used to endorse
  * or promote products derived from this software without specific prior
  * written permission.
  * THIS SOFTWARE IS PROVIDED ``AS IS'' AND WITHOUT ANY EXPRESS OR IMPLIED
  * WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED WARRANTIES OF
  * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE.
  *
  * $FreeBSD$
  */
 #ifndef lint
 static const char rcsid[] _U_ =
     "@(#) $Header: /tcpdump/master/libpcap/grammar.y,v 1.101 2007-11-18 02:03:52 guy Exp $ (LBL)";
 #endif
 
 #ifdef HAVE_CONFIG_H
 #include "config.h"
 #endif
 
 #ifdef WIN32
 #include <pcap-stdinc.h>
 #else /* WIN32 */
 #include <sys/types.h>
 #include <sys/socket.h>
 #endif /* WIN32 */
 
 #include <stdlib.h>
 
 #ifndef WIN32
 #if __STDC__
 struct mbuf;
 struct rtentry;
 #endif
 
 #include <netinet/in.h>
 #include <arpa/inet.h>
 #endif /* WIN32 */
 
 #include <stdio.h>
 
 #include "pcap-int.h"
 
 #include "gencode.h"
 #ifdef HAVE_NET_PFVAR_H
 #include <net/if.h>
-#include <net/pfvar.h>
+#include <netpfil/pf/pf.h>
 #include <net/if_pflog.h>
 #endif
 #include "ieee80211.h"
 #include <pcap/namedb.h>
 
 #ifdef HAVE_OS_PROTO_H
 #include "os-proto.h"
 #endif
 
 #define QSET(q, p, d, a) (q).proto = (p),\
 			 (q).dir = (d),\
 			 (q).addr = (a)
 
 struct tok {
 	int v;			/* value */
 	const char *s;		/* string */
 };
 
 static const struct tok ieee80211_types[] = {
 	{ IEEE80211_FC0_TYPE_DATA, "data" },
 	{ IEEE80211_FC0_TYPE_MGT, "mgt" },
 	{ IEEE80211_FC0_TYPE_MGT, "management" },
 	{ IEEE80211_FC0_TYPE_CTL, "ctl" },
 	{ IEEE80211_FC0_TYPE_CTL, "control" },
 	{ 0, NULL }
 };
 static const struct tok ieee80211_mgt_subtypes[] = {
 	{ IEEE80211_FC0_SUBTYPE_ASSOC_REQ, "assocreq" },
 	{ IEEE80211_FC0_SUBTYPE_ASSOC_REQ, "assoc-req" },
 	{ IEEE80211_FC0_SUBTYPE_ASSOC_RESP, "assocresp" },
 	{ IEEE80211_FC0_SUBTYPE_ASSOC_RESP, "assoc-resp" },
 	{ IEEE80211_FC0_SUBTYPE_REASSOC_REQ, "reassocreq" },
 	{ IEEE80211_FC0_SUBTYPE_REASSOC_REQ, "reassoc-req" },
 	{ IEEE80211_FC0_SUBTYPE_REASSOC_RESP, "reassocresp" },
 	{ IEEE80211_FC0_SUBTYPE_REASSOC_RESP, "reassoc-resp" },
 	{ IEEE80211_FC0_SUBTYPE_PROBE_REQ, "probereq" },
 	{ IEEE80211_FC0_SUBTYPE_PROBE_REQ, "probe-req" },
 	{ IEEE80211_FC0_SUBTYPE_PROBE_RESP, "proberesp" },
 	{ IEEE80211_FC0_SUBTYPE_PROBE_RESP, "probe-resp" },
 	{ IEEE80211_FC0_SUBTYPE_BEACON, "beacon" },
 	{ IEEE80211_FC0_SUBTYPE_ATIM, "atim" },
 	{ IEEE80211_FC0_SUBTYPE_DISASSOC, "disassoc" },
 	{ IEEE80211_FC0_SUBTYPE_DISASSOC, "disassociation" },
 	{ IEEE80211_FC0_SUBTYPE_AUTH, "auth" },
 	{ IEEE80211_FC0_SUBTYPE_AUTH, "authentication" },
 	{ IEEE80211_FC0_SUBTYPE_DEAUTH, "deauth" },
 	{ IEEE80211_FC0_SUBTYPE_DEAUTH, "deauthentication" },
 	{ 0, NULL }
 };
 static const struct tok ieee80211_ctl_subtypes[] = {
 	{ IEEE80211_FC0_SUBTYPE_PS_POLL, "ps-poll" },
 	{ IEEE80211_FC0_SUBTYPE_RTS, "rts" },
 	{ IEEE80211_FC0_SUBTYPE_CTS, "cts" },
 	{ IEEE80211_FC0_SUBTYPE_ACK, "ack" },
 	{ IEEE80211_FC0_SUBTYPE_CF_END, "cf-end" },
 	{ IEEE80211_FC0_SUBTYPE_CF_END_ACK, "cf-end-ack" },
 	{ 0, NULL }
 };
 static const struct tok ieee80211_data_subtypes[] = {
 	{ IEEE80211_FC0_SUBTYPE_DATA, "data" },
 	{ IEEE80211_FC0_SUBTYPE_CF_ACK, "data-cf-ack" },
 	{ IEEE80211_FC0_SUBTYPE_CF_POLL, "data-cf-poll" },
 	{ IEEE80211_FC0_SUBTYPE_CF_ACPL, "data-cf-ack-poll" },
 	{ IEEE80211_FC0_SUBTYPE_NODATA, "null" },
 	{ IEEE80211_FC0_SUBTYPE_NODATA_CF_ACK, "cf-ack" },
 	{ IEEE80211_FC0_SUBTYPE_NODATA_CF_POLL, "cf-poll"  },
 	{ IEEE80211_FC0_SUBTYPE_NODATA_CF_ACPL, "cf-ack-poll" },
 	{ IEEE80211_FC0_SUBTYPE_QOS|IEEE80211_FC0_SUBTYPE_DATA, "qos-data" },
 	{ IEEE80211_FC0_SUBTYPE_QOS|IEEE80211_FC0_SUBTYPE_CF_ACK, "qos-data-cf-ack" },
 	{ IEEE80211_FC0_SUBTYPE_QOS|IEEE80211_FC0_SUBTYPE_CF_POLL, "qos-data-cf-poll" },
 	{ IEEE80211_FC0_SUBTYPE_QOS|IEEE80211_FC0_SUBTYPE_CF_ACPL, "qos-data-cf-ack-poll" },
 	{ IEEE80211_FC0_SUBTYPE_QOS|IEEE80211_FC0_SUBTYPE_NODATA, "qos" },
 	{ IEEE80211_FC0_SUBTYPE_QOS|IEEE80211_FC0_SUBTYPE_NODATA_CF_POLL, "qos-cf-poll" },
 	{ IEEE80211_FC0_SUBTYPE_QOS|IEEE80211_FC0_SUBTYPE_NODATA_CF_ACPL, "qos-cf-ack-poll" },
 	{ 0, NULL }
 };
 struct type2tok {
 	int type;
 	const struct tok *tok;
 };
 static const struct type2tok ieee80211_type_subtypes[] = {
 	{ IEEE80211_FC0_TYPE_MGT, ieee80211_mgt_subtypes },
 	{ IEEE80211_FC0_TYPE_CTL, ieee80211_ctl_subtypes },
 	{ IEEE80211_FC0_TYPE_DATA, ieee80211_data_subtypes },
 	{ 0, NULL }
 };
 
 static int
 str2tok(const char *str, const struct tok *toks)
 {
 	int i;
 
 	for (i = 0; toks[i].s != NULL; i++) {
 		if (pcap_strcasecmp(toks[i].s, str) == 0)
 			return (toks[i].v);
 	}
 	return (-1);
 }
 
 int n_errors = 0;
 
 static struct qual qerr = { Q_UNDEF, Q_UNDEF, Q_UNDEF, Q_UNDEF };
 
 static void
 yyerror(const char *msg)
 {
 	++n_errors;
 	bpf_error("%s", msg);
 	/* NOTREACHED */
 }
 
 #ifdef NEED_YYPARSE_WRAPPER
 int yyparse(void);
 
 int
 pcap_parse()
 {
 	return (yyparse());
 }
 #endif
 
 #ifdef HAVE_NET_PFVAR_H
 static int
 pfreason_to_num(const char *reason)
 {
 	const char *reasons[] = PFRES_NAMES;
 	int i;
 
 	for (i = 0; reasons[i]; i++) {
 		if (pcap_strcasecmp(reason, reasons[i]) == 0)
 			return (i);
 	}
 	bpf_error("unknown PF reason");
 	/*NOTREACHED*/
 }
 
 static int
 pfaction_to_num(const char *action)
 {
 	if (pcap_strcasecmp(action, "pass") == 0 ||
 	    pcap_strcasecmp(action, "accept") == 0)
 		return (PF_PASS);
 	else if (pcap_strcasecmp(action, "drop") == 0 ||
 		pcap_strcasecmp(action, "block") == 0)
 		return (PF_DROP);
 #if HAVE_PF_NAT_THROUGH_PF_NORDR
 	else if (pcap_strcasecmp(action, "rdr") == 0)
 		return (PF_RDR);
 	else if (pcap_strcasecmp(action, "nat") == 0)
 		return (PF_NAT);
 	else if (pcap_strcasecmp(action, "binat") == 0)
 		return (PF_BINAT);
 	else if (pcap_strcasecmp(action, "nordr") == 0)
 		return (PF_NORDR);
 #endif
 	else {
 		bpf_error("unknown PF action");
 		/*NOTREACHED*/
 	}
 }
 #else /* !HAVE_NET_PFVAR_H */
 static int
 pfreason_to_num(const char *reason)
 {
 	bpf_error("libpcap was compiled on a machine without pf support");
 	/*NOTREACHED*/
 
 	/* this is to make the VC compiler happy */
 	return -1;
 }
 
 static int
 pfaction_to_num(const char *action)
 {
 	bpf_error("libpcap was compiled on a machine without pf support");
 	/*NOTREACHED*/
 
 	/* this is to make the VC compiler happy */
 	return -1;
 }
 #endif /* HAVE_NET_PFVAR_H */
 %}
 
 %union {
 	int i;
 	bpf_u_int32 h;
 	u_char *e;
 	char *s;
 	struct stmt *stmt;
 	struct arth *a;
 	struct {
 		struct qual q;
 		int atmfieldtype;
 		int mtp3fieldtype;
 		struct block *b;
 	} blk;
 	struct block *rblk;
 }
 
 %type	<blk>	expr id nid pid term rterm qid
 %type	<blk>	head
 %type	<i>	pqual dqual aqual ndaqual
 %type	<a>	arth narth
 %type	<i>	byteop pname pnum relop irelop
 %type	<blk>	and or paren not null prog
 %type	<rblk>	other pfvar p80211
 %type	<i>	atmtype atmmultitype
 %type	<blk>	atmfield
 %type	<blk>	atmfieldvalue atmvalue atmlistvalue
 %type	<i>	mtp2type
 %type	<blk>	mtp3field
 %type	<blk>	mtp3fieldvalue mtp3value mtp3listvalue
 
 
 %token  DST SRC HOST GATEWAY
 %token  NET NETMASK PORT PORTRANGE LESS GREATER PROTO PROTOCHAIN CBYTE
 %token  ARP RARP IP SCTP TCP UDP ICMP IGMP IGRP PIM VRRP CARP
 %token  ATALK AARP DECNET LAT SCA MOPRC MOPDL
 %token  TK_BROADCAST TK_MULTICAST
 %token  NUM INBOUND OUTBOUND
 %token  PF_IFNAME PF_RSET PF_RNR PF_SRNR PF_REASON PF_ACTION
 %token	TYPE SUBTYPE DIR ADDR1 ADDR2 ADDR3 ADDR4 RA TA
 %token  LINK
 %token	GEQ LEQ NEQ
 %token	ID EID HID HID6 AID
 %token	LSH RSH
 %token  LEN
 %token  IPV6 ICMPV6 AH ESP
 %token	VLAN MPLS
 %token	PPPOED PPPOES
 %token  ISO ESIS CLNP ISIS L1 L2 IIH LSP SNP CSNP PSNP 
 %token  STP
 %token  IPX
 %token  NETBEUI
 %token	LANE LLC METAC BCC SC ILMIC OAMF4EC OAMF4SC
 %token	OAM OAMF4 CONNECTMSG METACONNECT
 %token	VPI VCI
 %token	RADIO
 %token	FISU LSSU MSU
 %token	SIO OPC DPC SLS
 
 %type	<s> ID
 %type	<e> EID
 %type	<e> AID
 %type	<s> HID HID6
 %type	<i> NUM action reason type subtype type_subtype dir
 
 %left OR AND
 %nonassoc  '!'
 %left '|'
 %left '&'
 %left LSH RSH
 %left '+' '-'
 %left '*' '/'
 %nonassoc UMINUS
 %%
 prog:	  null expr
 {
 	finish_parse($2.b);
 }
 	| null
 	;
 null:	  /* null */		{ $$.q = qerr; }
 	;
 expr:	  term
 	| expr and term		{ gen_and($1.b, $3.b); $$ = $3; }
 	| expr and id		{ gen_and($1.b, $3.b); $$ = $3; }
 	| expr or term		{ gen_or($1.b, $3.b); $$ = $3; }
 	| expr or id		{ gen_or($1.b, $3.b); $$ = $3; }
 	;
 and:	  AND			{ $$ = $<blk>0; }
 	;
 or:	  OR			{ $$ = $<blk>0; }
 	;
 id:	  nid
 	| pnum			{ $$.b = gen_ncode(NULL, (bpf_u_int32)$1,
 						   $$.q = $<blk>0.q); }
 	| paren pid ')'		{ $$ = $2; }
 	;
 nid:	  ID			{ $$.b = gen_scode($1, $$.q = $<blk>0.q); }
 	| HID '/' NUM		{ $$.b = gen_mcode($1, NULL, $3,
 				    $$.q = $<blk>0.q); }
 	| HID NETMASK HID	{ $$.b = gen_mcode($1, $3, 0,
 				    $$.q = $<blk>0.q); }
 	| HID			{
 				  /* Decide how to parse HID based on proto */
 				  $$.q = $<blk>0.q;
 				  if ($$.q.addr == Q_PORT)
 				  	bpf_error("'port' modifier applied to ip host");
 				  else if ($$.q.addr == Q_PORTRANGE)
 				  	bpf_error("'portrange' modifier applied to ip host");
 				  else if ($$.q.addr == Q_PROTO)
 				  	bpf_error("'proto' modifier applied to ip host");
 				  else if ($$.q.addr == Q_PROTOCHAIN)
 				  	bpf_error("'protochain' modifier applied to ip host");
 				  $$.b = gen_ncode($1, 0, $$.q);
 				}
 	| HID6 '/' NUM		{
 #ifdef INET6
 				  $$.b = gen_mcode6($1, NULL, $3,
 				    $$.q = $<blk>0.q);
 #else
 				  bpf_error("'ip6addr/prefixlen' not supported "
 					"in this configuration");
 #endif /*INET6*/
 				}
 	| HID6			{
 #ifdef INET6
 				  $$.b = gen_mcode6($1, 0, 128,
 				    $$.q = $<blk>0.q);
 #else
 				  bpf_error("'ip6addr' not supported "
 					"in this configuration");
 #endif /*INET6*/
 				}
 	| EID			{ 
 				  $$.b = gen_ecode($1, $$.q = $<blk>0.q);
 				  /*
 				   * $1 was allocated by "pcap_ether_aton()",
 				   * so we must free it now that we're done
 				   * with it.
 				   */
 				  free($1);
 				}
 	| AID			{
 				  $$.b = gen_acode($1, $$.q = $<blk>0.q);
 				  /*
 				   * $1 was allocated by "pcap_ether_aton()",
 				   * so we must free it now that we're done
 				   * with it.
 				   */
 				  free($1);
 				}
 	| not id		{ gen_not($2.b); $$ = $2; }
 	;
 not:	  '!'			{ $$ = $<blk>0; }
 	;
 paren:	  '('			{ $$ = $<blk>0; }
 	;
 pid:	  nid
 	| qid and id		{ gen_and($1.b, $3.b); $$ = $3; }
 	| qid or id		{ gen_or($1.b, $3.b); $$ = $3; }
 	;
 qid:	  pnum			{ $$.b = gen_ncode(NULL, (bpf_u_int32)$1,
 						   $$.q = $<blk>0.q); }
 	| pid
 	;
 term:	  rterm
 	| not term		{ gen_not($2.b); $$ = $2; }
 	;
 head:	  pqual dqual aqual	{ QSET($$.q, $1, $2, $3); }
 	| pqual dqual		{ QSET($$.q, $1, $2, Q_DEFAULT); }
 	| pqual aqual		{ QSET($$.q, $1, Q_DEFAULT, $2); }
 	| pqual PROTO		{ QSET($$.q, $1, Q_DEFAULT, Q_PROTO); }
 	| pqual PROTOCHAIN	{ QSET($$.q, $1, Q_DEFAULT, Q_PROTOCHAIN); }
 	| pqual ndaqual		{ QSET($$.q, $1, Q_DEFAULT, $2); }
 	;
 rterm:	  head id		{ $$ = $2; }
 	| paren expr ')'	{ $$.b = $2.b; $$.q = $1.q; }
 	| pname			{ $$.b = gen_proto_abbrev($1); $$.q = qerr; }
 	| arth relop arth	{ $$.b = gen_relation($2, $1, $3, 0);
 				  $$.q = qerr; }
 	| arth irelop arth	{ $$.b = gen_relation($2, $1, $3, 1);
 				  $$.q = qerr; }
 	| other			{ $$.b = $1; $$.q = qerr; }
 	| atmtype		{ $$.b = gen_atmtype_abbrev($1); $$.q = qerr; }
 	| atmmultitype		{ $$.b = gen_atmmulti_abbrev($1); $$.q = qerr; }
 	| atmfield atmvalue	{ $$.b = $2.b; $$.q = qerr; }
 	| mtp2type		{ $$.b = gen_mtp2type_abbrev($1); $$.q = qerr; }
 	| mtp3field mtp3value	{ $$.b = $2.b; $$.q = qerr; }
 	;
 /* protocol level qualifiers */
 pqual:	  pname
 	|			{ $$ = Q_DEFAULT; }
 	;
 /* 'direction' qualifiers */
 dqual:	  SRC			{ $$ = Q_SRC; }
 	| DST			{ $$ = Q_DST; }
 	| SRC OR DST		{ $$ = Q_OR; }
 	| DST OR SRC		{ $$ = Q_OR; }
 	| SRC AND DST		{ $$ = Q_AND; }
 	| DST AND SRC		{ $$ = Q_AND; }
 	| ADDR1			{ $$ = Q_ADDR1; }
 	| ADDR2			{ $$ = Q_ADDR2; }
 	| ADDR3			{ $$ = Q_ADDR3; }
 	| ADDR4			{ $$ = Q_ADDR4; }
 	| RA			{ $$ = Q_RA; }
 	| TA			{ $$ = Q_TA; }
 	;
 /* address type qualifiers */
 aqual:	  HOST			{ $$ = Q_HOST; }
 	| NET			{ $$ = Q_NET; }
 	| PORT			{ $$ = Q_PORT; }
 	| PORTRANGE		{ $$ = Q_PORTRANGE; }
 	;
 /* non-directional address type qualifiers */
 ndaqual:  GATEWAY		{ $$ = Q_GATEWAY; }
 	;
 pname:	  LINK			{ $$ = Q_LINK; }
 	| IP			{ $$ = Q_IP; }
 	| ARP			{ $$ = Q_ARP; }
 	| RARP			{ $$ = Q_RARP; }
 	| SCTP			{ $$ = Q_SCTP; }
 	| TCP			{ $$ = Q_TCP; }
 	| UDP			{ $$ = Q_UDP; }
 	| ICMP			{ $$ = Q_ICMP; }
 	| IGMP			{ $$ = Q_IGMP; }
 	| IGRP			{ $$ = Q_IGRP; }
 	| PIM			{ $$ = Q_PIM; }
 	| VRRP			{ $$ = Q_VRRP; }
 	| CARP 			{ $$ = Q_CARP; }
 	| ATALK			{ $$ = Q_ATALK; }
 	| AARP			{ $$ = Q_AARP; }
 	| DECNET		{ $$ = Q_DECNET; }
 	| LAT			{ $$ = Q_LAT; }
 	| SCA			{ $$ = Q_SCA; }
 	| MOPDL			{ $$ = Q_MOPDL; }
 	| MOPRC			{ $$ = Q_MOPRC; }
 	| IPV6			{ $$ = Q_IPV6; }
 	| ICMPV6		{ $$ = Q_ICMPV6; }
 	| AH			{ $$ = Q_AH; }
 	| ESP			{ $$ = Q_ESP; }
 	| ISO			{ $$ = Q_ISO; }
 	| ESIS			{ $$ = Q_ESIS; }
 	| ISIS			{ $$ = Q_ISIS; }
 	| L1			{ $$ = Q_ISIS_L1; }
 	| L2			{ $$ = Q_ISIS_L2; }
 	| IIH			{ $$ = Q_ISIS_IIH; }
 	| LSP			{ $$ = Q_ISIS_LSP; }
 	| SNP			{ $$ = Q_ISIS_SNP; }
 	| PSNP			{ $$ = Q_ISIS_PSNP; }
 	| CSNP			{ $$ = Q_ISIS_CSNP; }
 	| CLNP			{ $$ = Q_CLNP; }
 	| STP			{ $$ = Q_STP; }
 	| IPX			{ $$ = Q_IPX; }
 	| NETBEUI		{ $$ = Q_NETBEUI; }
 	| RADIO			{ $$ = Q_RADIO; }
 	;
 other:	  pqual TK_BROADCAST	{ $$ = gen_broadcast($1); }
 	| pqual TK_MULTICAST	{ $$ = gen_multicast($1); }
 	| LESS NUM		{ $$ = gen_less($2); }
 	| GREATER NUM		{ $$ = gen_greater($2); }
 	| CBYTE NUM byteop NUM	{ $$ = gen_byteop($3, $2, $4); }
 	| INBOUND		{ $$ = gen_inbound(0); }
 	| OUTBOUND		{ $$ = gen_inbound(1); }
 	| VLAN pnum		{ $$ = gen_vlan($2); }
 	| VLAN			{ $$ = gen_vlan(-1); }
 	| MPLS pnum		{ $$ = gen_mpls($2); }
 	| MPLS			{ $$ = gen_mpls(-1); }
 	| PPPOED		{ $$ = gen_pppoed(); }
 	| PPPOES		{ $$ = gen_pppoes(); }
 	| pfvar			{ $$ = $1; }
 	| pqual p80211		{ $$ = $2; }
 	;
 
 pfvar:	  PF_IFNAME ID		{ $$ = gen_pf_ifname($2); }
 	| PF_RSET ID		{ $$ = gen_pf_ruleset($2); }
 	| PF_RNR NUM		{ $$ = gen_pf_rnr($2); }
 	| PF_SRNR NUM		{ $$ = gen_pf_srnr($2); }
 	| PF_REASON reason	{ $$ = gen_pf_reason($2); }
 	| PF_ACTION action	{ $$ = gen_pf_action($2); }
 	;
 
 p80211:   TYPE type SUBTYPE subtype
 				{ $$ = gen_p80211_type($2 | $4,
 					IEEE80211_FC0_TYPE_MASK |
 					IEEE80211_FC0_SUBTYPE_MASK);
 				}
 	| TYPE type		{ $$ = gen_p80211_type($2,
 					IEEE80211_FC0_TYPE_MASK);
 				}
 	| SUBTYPE type_subtype	{ $$ = gen_p80211_type($2,
 					IEEE80211_FC0_TYPE_MASK |
 					IEEE80211_FC0_SUBTYPE_MASK);
 				}
 	| DIR dir		{ $$ = gen_p80211_fcdir($2); }
 	;
 
 type:	  NUM
 	| ID			{ $$ = str2tok($1, ieee80211_types);
 				  if ($$ == -1)
 				  	bpf_error("unknown 802.11 type name");
 				}
 	;
 
 subtype:  NUM
 	| ID			{ const struct tok *types = NULL;
 				  int i;
 				  for (i = 0;; i++) {
 				  	if (ieee80211_type_subtypes[i].tok == NULL) {
 				  		/* Ran out of types */
 						bpf_error("unknown 802.11 type");
 						break;
 					}
 					if ($<i>-1 == ieee80211_type_subtypes[i].type) {
 						types = ieee80211_type_subtypes[i].tok;
 						break;
 					}
 				  }
 
 				  $$ = str2tok($1, types);
 				  if ($$ == -1)
 					bpf_error("unknown 802.11 subtype name");
 				}
 	;
 
 type_subtype:	ID		{ int i;
 				  for (i = 0;; i++) {
 				  	if (ieee80211_type_subtypes[i].tok == NULL) {
 				  		/* Ran out of types */
 						bpf_error("unknown 802.11 type name");
 						break;
 					}
 					$$ = str2tok($1, ieee80211_type_subtypes[i].tok);
 					if ($$ != -1) {
 						$$ |= ieee80211_type_subtypes[i].type;
 						break;
 					}
 				  }
 				}
 		;
 
 dir:	  NUM
 	| ID			{ if (pcap_strcasecmp($1, "nods") == 0)
 					$$ = IEEE80211_FC1_DIR_NODS;
 				  else if (pcap_strcasecmp($1, "tods") == 0)
 					$$ = IEEE80211_FC1_DIR_TODS;
 				  else if (pcap_strcasecmp($1, "fromds") == 0)
 					$$ = IEEE80211_FC1_DIR_FROMDS;
 				  else if (pcap_strcasecmp($1, "dstods") == 0)
 					$$ = IEEE80211_FC1_DIR_DSTODS;
 				  else
 					bpf_error("unknown 802.11 direction");
 				}
 	;
 
 reason:	  NUM			{ $$ = $1; }
 	| ID			{ $$ = pfreason_to_num($1); }
 	;
 
 action:	  ID			{ $$ = pfaction_to_num($1); }
 	;
 
 relop:	  '>'			{ $$ = BPF_JGT; }
 	| GEQ			{ $$ = BPF_JGE; }
 	| '='			{ $$ = BPF_JEQ; }
 	;
 irelop:	  LEQ			{ $$ = BPF_JGT; }
 	| '<'			{ $$ = BPF_JGE; }
 	| NEQ			{ $$ = BPF_JEQ; }
 	;
 arth:	  pnum			{ $$ = gen_loadi($1); }
 	| narth
 	;
 narth:	  pname '[' arth ']'		{ $$ = gen_load($1, $3, 1); }
 	| pname '[' arth ':' NUM ']'	{ $$ = gen_load($1, $3, $5); }
 	| arth '+' arth			{ $$ = gen_arth(BPF_ADD, $1, $3); }
 	| arth '-' arth			{ $$ = gen_arth(BPF_SUB, $1, $3); }
 	| arth '*' arth			{ $$ = gen_arth(BPF_MUL, $1, $3); }
 	| arth '/' arth			{ $$ = gen_arth(BPF_DIV, $1, $3); }
 	| arth '&' arth			{ $$ = gen_arth(BPF_AND, $1, $3); }
 	| arth '|' arth			{ $$ = gen_arth(BPF_OR, $1, $3); }
 	| arth LSH arth			{ $$ = gen_arth(BPF_LSH, $1, $3); }
 	| arth RSH arth			{ $$ = gen_arth(BPF_RSH, $1, $3); }
 	| '-' arth %prec UMINUS		{ $$ = gen_neg($2); }
 	| paren narth ')'		{ $$ = $2; }
 	| LEN				{ $$ = gen_loadlen(); }
 	;
 byteop:	  '&'			{ $$ = '&'; }
 	| '|'			{ $$ = '|'; }
 	| '<'			{ $$ = '<'; }
 	| '>'			{ $$ = '>'; }
 	| '='			{ $$ = '='; }
 	;
 pnum:	  NUM
 	| paren pnum ')'	{ $$ = $2; }
 	;
 atmtype: LANE			{ $$ = A_LANE; }
 	| LLC			{ $$ = A_LLC; }
 	| METAC			{ $$ = A_METAC;	}
 	| BCC			{ $$ = A_BCC; }
 	| OAMF4EC		{ $$ = A_OAMF4EC; }
 	| OAMF4SC		{ $$ = A_OAMF4SC; }
 	| SC			{ $$ = A_SC; }
 	| ILMIC			{ $$ = A_ILMIC; }
 	;
 atmmultitype: OAM		{ $$ = A_OAM; }
 	| OAMF4			{ $$ = A_OAMF4; }
 	| CONNECTMSG		{ $$ = A_CONNECTMSG; }
 	| METACONNECT		{ $$ = A_METACONNECT; }
 	;
 	/* ATM field types quantifier */
 atmfield: VPI			{ $$.atmfieldtype = A_VPI; }
 	| VCI			{ $$.atmfieldtype = A_VCI; }
 	;
 atmvalue: atmfieldvalue
 	| relop NUM		{ $$.b = gen_atmfield_code($<blk>0.atmfieldtype, (bpf_int32)$2, (bpf_u_int32)$1, 0); }
 	| irelop NUM		{ $$.b = gen_atmfield_code($<blk>0.atmfieldtype, (bpf_int32)$2, (bpf_u_int32)$1, 1); }
 	| paren atmlistvalue ')' { $$.b = $2.b; $$.q = qerr; }
 	;
 atmfieldvalue: NUM {
 	$$.atmfieldtype = $<blk>0.atmfieldtype;
 	if ($$.atmfieldtype == A_VPI ||
 	    $$.atmfieldtype == A_VCI)
 		$$.b = gen_atmfield_code($$.atmfieldtype, (bpf_int32) $1, BPF_JEQ, 0);
 	}
 	;
 atmlistvalue: atmfieldvalue
 	| atmlistvalue or atmfieldvalue { gen_or($1.b, $3.b); $$ = $3; }
 	;
 	/* MTP2 types quantifier */
 mtp2type: FISU			{ $$ = M_FISU; }
 	| LSSU			{ $$ = M_LSSU; }
 	| MSU			{ $$ = M_MSU; }
 	;
 	/* MTP3 field types quantifier */
 mtp3field: SIO			{ $$.mtp3fieldtype = M_SIO; }
 	| OPC			{ $$.mtp3fieldtype = M_OPC; }
 	| DPC			{ $$.mtp3fieldtype = M_DPC; }
 	| SLS                   { $$.mtp3fieldtype = M_SLS; }
 	;
 mtp3value: mtp3fieldvalue
 	| relop NUM		{ $$.b = gen_mtp3field_code($<blk>0.mtp3fieldtype, (u_int)$2, (u_int)$1, 0); }
 	| irelop NUM		{ $$.b = gen_mtp3field_code($<blk>0.mtp3fieldtype, (u_int)$2, (u_int)$1, 1); }
 	| paren mtp3listvalue ')' { $$.b = $2.b; $$.q = qerr; }
 	;
 mtp3fieldvalue: NUM {
 	$$.mtp3fieldtype = $<blk>0.mtp3fieldtype;
 	if ($$.mtp3fieldtype == M_SIO ||
 	    $$.mtp3fieldtype == M_OPC ||
 	    $$.mtp3fieldtype == M_DPC ||
 	    $$.mtp3fieldtype == M_SLS )
 		$$.b = gen_mtp3field_code($$.mtp3fieldtype, (u_int) $1, BPF_JEQ, 0);
 	}
 	;
 mtp3listvalue: mtp3fieldvalue
 	| mtp3listvalue or mtp3fieldvalue { gen_or($1.b, $3.b); $$ = $3; }
 	;
 %%
Index: stable/10/contrib/tcpdump/print-ip.c
===================================================================
--- stable/10/contrib/tcpdump/print-ip.c	(revision 263085)
+++ stable/10/contrib/tcpdump/print-ip.c	(revision 263086)
@@ -1,711 +1,713 @@
 /*
  * Copyright (c) 1988, 1989, 1990, 1991, 1992, 1993, 1994, 1995, 1996, 1997
  *	The Regents of the University of California.  All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that: (1) source code distributions
  * retain the above copyright notice and this paragraph in its entirety, (2)
  * distributions including binary code include the above copyright notice and
  * this paragraph in its entirety in the documentation or other materials
  * provided with the distribution, and (3) all advertising materials mentioning
  * features or use of this software display the following acknowledgement:
  * ``This product includes software developed by the University of California,
  * Lawrence Berkeley Laboratory and its contributors.'' Neither the name of
  * the University nor the names of its contributors may be used to endorse
  * or promote products derived from this software without specific prior
  * written permission.
  * THIS SOFTWARE IS PROVIDED ``AS IS'' AND WITHOUT ANY EXPRESS OR IMPLIED
  * WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED WARRANTIES OF
  * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE.
  *
  * $FreeBSD$
  */
 
 #ifndef lint
 static const char rcsid[] _U_ =
     "@(#) $Header: /tcpdump/master/tcpdump/print-ip.c,v 1.159 2007-09-14 01:29:28 guy Exp $ (LBL)";
 #endif
 
 #ifdef HAVE_CONFIG_H
 #include "config.h"
 #endif
 
 #include <tcpdump-stdinc.h>
 
 #include <stdio.h>
 #include <stdlib.h>
 #include <string.h>
 
 #include "addrtoname.h"
 #include "interface.h"
 #include "extract.h"			/* must come after interface.h */
 
 #include "ip.h"
 #include "ipproto.h"
 
 struct tok ip_option_values[] = {
     { IPOPT_EOL, "EOL" },
     { IPOPT_NOP, "NOP" },
     { IPOPT_TS, "timestamp" },
     { IPOPT_SECURITY, "security" },
     { IPOPT_RR, "RR" },
     { IPOPT_SSRR, "SSRR" },
     { IPOPT_LSRR, "LSRR" },
     { IPOPT_RA, "RA" },
     { IPOPT_RFC1393, "traceroute" },
     { 0, NULL }
 };
 
 /*
  * print the recorded route in an IP RR, LSRR or SSRR option.
  */
 static void
 ip_printroute(register const u_char *cp, u_int length)
 {
 	register u_int ptr;
 	register u_int len;
 
 	if (length < 3) {
 		printf(" [bad length %u]", length);
 		return;
 	}
 	if ((length + 1) & 3)
 		printf(" [bad length %u]", length);
 	ptr = cp[2] - 1;
 	if (ptr < 3 || ((ptr + 1) & 3) || ptr > length + 1)
 		printf(" [bad ptr %u]", cp[2]);
 
 	for (len = 3; len < length; len += 4) {
 		printf(" %s", ipaddr_string(&cp[len]));
                 if (ptr > len)
                         printf(",");
 	}
 }
 
 /*
  * If source-routing is present and valid, return the final destination.
  * Otherwise, return IP destination.
  *
  * This is used for UDP and TCP pseudo-header in the checksum
  * calculation.
  */
 static u_int32_t
 ip_finddst(const struct ip *ip)
 {
 	int length;
 	int len;
 	const u_char *cp;
 	u_int32_t retval;
 
 	cp = (const u_char *)(ip + 1);
 	length = (IP_HL(ip) << 2) - sizeof(struct ip);
 
 	for (; length > 0; cp += len, length -= len) {
 		int tt;
 
 		TCHECK(*cp);
 		tt = *cp;
 		if (tt == IPOPT_EOL)
 			break;
 		else if (tt == IPOPT_NOP)
 			len = 1;
 		else {
 			TCHECK(cp[1]);
 			len = cp[1];
 			if (len < 2)
 				break;
 		}
 		TCHECK2(*cp, len);
 		switch (tt) {
 
 		case IPOPT_SSRR:
 		case IPOPT_LSRR:
 			if (len < 7)
 				break;
 			memcpy(&retval, cp + len - 4, 4);
 			return retval;
 		}
 	}
 trunc:
 	memcpy(&retval, &ip->ip_dst.s_addr, sizeof(u_int32_t));
 	return retval;
 }
 
 /*
  * Compute a V4-style checksum by building a pseudoheader.
  */
 int
 nextproto4_cksum(const struct ip *ip, const u_int8_t *data,
 		 u_int len, u_int next_proto)
 {
 	struct phdr {
 		u_int32_t src;
 		u_int32_t dst;
 		u_char mbz;
 		u_char proto;
 		u_int16_t len;
 	} ph;
 	struct cksum_vec vec[2];
 
 	/* pseudo-header.. */
 	ph.len = htons((u_int16_t)len);
 	ph.mbz = 0;
 	ph.proto = next_proto;
 	memcpy(&ph.src, &ip->ip_src.s_addr, sizeof(u_int32_t));
 	if (IP_HL(ip) == 5)
 		memcpy(&ph.dst, &ip->ip_dst.s_addr, sizeof(u_int32_t));
 	else
 		ph.dst = ip_finddst(ip);
 
 	vec[0].ptr = (const u_int8_t *)(void *)&ph;
 	vec[0].len = sizeof(ph);
 	vec[1].ptr = data;
 	vec[1].len = len;
 	return (in_cksum(vec, 2));
 }
 
 static void
 ip_printts(register const u_char *cp, u_int length)
 {
 	register u_int ptr;
 	register u_int len;
 	int hoplen;
 	const char *type;
 
 	if (length < 4) {
 		printf("[bad length %u]", length);
 		return;
 	}
 	printf(" TS{");
 	hoplen = ((cp[3]&0xF) != IPOPT_TS_TSONLY) ? 8 : 4;
 	if ((length - 4) & (hoplen-1))
 		printf("[bad length %u]", length);
 	ptr = cp[2] - 1;
 	len = 0;
 	if (ptr < 4 || ((ptr - 4) & (hoplen-1)) || ptr > length + 1)
 		printf("[bad ptr %u]", cp[2]);
 	switch (cp[3]&0xF) {
 	case IPOPT_TS_TSONLY:
 		printf("TSONLY");
 		break;
 	case IPOPT_TS_TSANDADDR:
 		printf("TS+ADDR");
 		break;
 	/*
 	 * prespecified should really be 3, but some ones might send 2
 	 * instead, and the IPOPT_TS_PRESPEC constant can apparently
 	 * have both values, so we have to hard-code it here.
 	 */
 
 	case 2:
 		printf("PRESPEC2.0");
 		break;
 	case 3:			/* IPOPT_TS_PRESPEC */
 		printf("PRESPEC");
 		break;
 	default:
 		printf("[bad ts type %d]", cp[3]&0xF);
 		goto done;
 	}
 
 	type = " ";
 	for (len = 4; len < length; len += hoplen) {
 		if (ptr == len)
 			type = " ^ ";
 		printf("%s%d@%s", type, EXTRACT_32BITS(&cp[len+hoplen-4]),
 		       hoplen!=8 ? "" : ipaddr_string(&cp[len]));
 		type = " ";
 	}
 
 done:
 	printf("%s", ptr == len ? " ^ " : "");
 
 	if (cp[3]>>4)
 		printf(" [%d hops not recorded]} ", cp[3]>>4);
 	else
 		printf("}");
 }
 
 /*
  * print IP options.
  */
 static void
 ip_optprint(register const u_char *cp, u_int length)
 {
 	register u_int option_len;
 	const char *sep = "";
 
 	for (; length > 0; cp += option_len, length -= option_len) {
 		u_int option_code;
 
 		printf("%s", sep);
 		sep = ",";
 
 		TCHECK(*cp);
 		option_code = *cp;
 
                 printf("%s",
                         tok2str(ip_option_values,"unknown %u",option_code));
 
 		if (option_code == IPOPT_NOP ||
                     option_code == IPOPT_EOL)
 			option_len = 1;
 
 		else {
 			TCHECK(cp[1]);
 			option_len = cp[1];
 			if (option_len < 2) {
 		                printf(" [bad length %u]", option_len);
 				return;
 			}
 		}
 
 		if (option_len > length) {
 	                printf(" [bad length %u]", option_len);
 			return;
 		}
 
                 TCHECK2(*cp, option_len);
 
 		switch (option_code) {
 		case IPOPT_EOL:
 			return;
 
 		case IPOPT_TS:
 			ip_printts(cp, option_len);
 			break;
 
 		case IPOPT_RR:       /* fall through */
 		case IPOPT_SSRR:
 		case IPOPT_LSRR:
 			ip_printroute(cp, option_len);
 			break;
 
 		case IPOPT_RA:
 			if (option_len < 4) {
 				printf(" [bad length %u]", option_len);
 				break;
 			}
                         TCHECK(cp[3]);
                         if (EXTRACT_16BITS(&cp[2]) != 0)
                             printf(" value %u", EXTRACT_16BITS(&cp[2]));
 			break;
 
 		case IPOPT_NOP:       /* nothing to print - fall through */
 		case IPOPT_SECURITY:
 		default:
 			break;
 		}
 	}
 	return;
 
 trunc:
 	printf("[|ip]");
 }
 
 #define IP_RES 0x8000
 
 static struct tok ip_frag_values[] = {
         { IP_MF,        "+" },
         { IP_DF,        "DF" },
 	{ IP_RES,       "rsvd" }, /* The RFC3514 evil ;-) bit */
         { 0,            NULL }
 };
 
 struct ip_print_demux_state {
 	const struct ip *ip;
 	const u_char *cp;
 	u_int   len, off;
 	u_char  nh;
 	int     advance;
 };
 
 static void
 ip_print_demux(netdissect_options *ndo,
 	       struct ip_print_demux_state *ipds)
 {
 	struct protoent *proto;
 	struct cksum_vec vec[1];
 
 again:
 	switch (ipds->nh) {
 
 	case IPPROTO_AH:
 		ipds->nh = *ipds->cp;
 		ipds->advance = ah_print(ipds->cp);
 		if (ipds->advance <= 0)
 			break;
 		ipds->cp += ipds->advance;
 		ipds->len -= ipds->advance;
 		goto again;
 
 	case IPPROTO_ESP:
 	{
 		int enh, padlen;
 		ipds->advance = esp_print(ndo, ipds->cp, ipds->len,
 				    (const u_char *)ipds->ip,
 				    &enh, &padlen);
 		if (ipds->advance <= 0)
 			break;
 		ipds->cp += ipds->advance;
 		ipds->len -= ipds->advance + padlen;
 		ipds->nh = enh & 0xff;
 		goto again;
 	}
 
 	case IPPROTO_IPCOMP:
 	{
 		int enh;
 		ipds->advance = ipcomp_print(ipds->cp, &enh);
 		if (ipds->advance <= 0)
 			break;
 		ipds->cp += ipds->advance;
 		ipds->len -= ipds->advance;
 		ipds->nh = enh & 0xff;
 		goto again;
 	}
 
 	case IPPROTO_SCTP:
 		sctp_print(ipds->cp, (const u_char *)ipds->ip, ipds->len);
 		break;
 
 	case IPPROTO_DCCP:
 		dccp_print(ipds->cp, (const u_char *)ipds->ip, ipds->len);
 		break;
 
 	case IPPROTO_TCP:
 		/* pass on the MF bit plus the offset to detect fragments */
 		tcp_print(ipds->cp, ipds->len, (const u_char *)ipds->ip,
 			  ipds->off & (IP_MF|IP_OFFMASK));
 		break;
 
 	case IPPROTO_UDP:
 		/* pass on the MF bit plus the offset to detect fragments */
 		udp_print(ipds->cp, ipds->len, (const u_char *)ipds->ip,
 			  ipds->off & (IP_MF|IP_OFFMASK));
 		break;
 
 	case IPPROTO_ICMP:
 		/* pass on the MF bit plus the offset to detect fragments */
 		icmp_print(ipds->cp, ipds->len, (const u_char *)ipds->ip,
 			   ipds->off & (IP_MF|IP_OFFMASK));
 		break;
 
 	case IPPROTO_PIGP:
 		/*
 		 * XXX - the current IANA protocol number assignments
 		 * page lists 9 as "any private interior gateway
 		 * (used by Cisco for their IGRP)" and 88 as
 		 * "EIGRP" from Cisco.
 		 *
 		 * Recent BSD <netinet/in.h> headers define
 		 * IP_PROTO_PIGP as 9 and IP_PROTO_IGRP as 88.
 		 * We define IP_PROTO_PIGP as 9 and
 		 * IP_PROTO_EIGRP as 88; those names better
 		 * match was the current protocol number
 		 * assignments say.
 		 */
 		igrp_print(ipds->cp, ipds->len, (const u_char *)ipds->ip);
 		break;
 
 	case IPPROTO_EIGRP:
 		eigrp_print(ipds->cp, ipds->len);
 		break;
 
 	case IPPROTO_ND:
 		ND_PRINT((ndo, " nd %d", ipds->len));
 		break;
 
 	case IPPROTO_EGP:
 		egp_print(ipds->cp, ipds->len);
 		break;
 
 	case IPPROTO_OSPF:
 		ospf_print(ipds->cp, ipds->len, (const u_char *)ipds->ip);
 		break;
 
 	case IPPROTO_IGMP:
 		igmp_print(ipds->cp, ipds->len);
 		break;
 
 	case IPPROTO_IPV4:
 		/* DVMRP multicast tunnel (ip-in-ip encapsulation) */
 		ip_print(ndo, ipds->cp, ipds->len);
 		if (! vflag) {
 			ND_PRINT((ndo, " (ipip-proto-4)"));
 			return;
 		}
 		break;
 
 #ifdef INET6
 	case IPPROTO_IPV6:
 		/* ip6-in-ip encapsulation */
 		ip6_print(ndo, ipds->cp, ipds->len);
 		break;
 #endif /*INET6*/
 
 	case IPPROTO_RSVP:
 		rsvp_print(ipds->cp, ipds->len);
 		break;
 
 	case IPPROTO_GRE:
 		/* do it */
 		gre_print(ipds->cp, ipds->len);
 		break;
 
 	case IPPROTO_MOBILE:
 		mobile_print(ipds->cp, ipds->len);
 		break;
 
 	case IPPROTO_PIM:
 		vec[0].ptr = ipds->cp;
 		vec[0].len = ipds->len;
 		pim_print(ipds->cp, ipds->len, in_cksum(vec, 1));
 		break;
 
 	case IPPROTO_VRRP:
 		if (packettype == PT_CARP) {
 			if (vflag)
 				(void)printf("carp %s > %s: ",
 					     ipaddr_string(&ipds->ip->ip_src),
 					     ipaddr_string(&ipds->ip->ip_dst));
 			carp_print(ipds->cp, ipds->len, ipds->ip->ip_ttl);
 		} else {
 			if (vflag)
 				(void)printf("vrrp %s > %s: ",
 					     ipaddr_string(&ipds->ip->ip_src),
 					     ipaddr_string(&ipds->ip->ip_dst));
 			vrrp_print(ipds->cp, ipds->len, ipds->ip->ip_ttl);
 		}
 		break;
 
 	case IPPROTO_PGM:
 		pgm_print(ipds->cp, ipds->len, (const u_char *)ipds->ip);
 		break;
 
+#if defined(HAVE_NET_PFVAR_H)
 	case IPPROTO_PFSYNC:
 		pfsync_ip_print(ipds->cp, ipds->len);
 		break;
+#endif
 
 	default:
 		if (ndo->ndo_nflag==0 && (proto = getprotobynumber(ipds->nh)) != NULL)
 			ND_PRINT((ndo, " %s", proto->p_name));
 		else
 			ND_PRINT((ndo, " ip-proto-%d", ipds->nh));
 		ND_PRINT((ndo, " %d", ipds->len));
 		break;
 	}
 }
 
 void
 ip_print_inner(netdissect_options *ndo,
 	       const u_char *bp,
 	       u_int length, u_int nh,
 	       const u_char *bp2)
 {
 	struct ip_print_demux_state  ipd;
 
 	ipd.ip = (const struct ip *)bp2;
 	ipd.cp = bp;
 	ipd.len  = length;
 	ipd.off  = 0;
 	ipd.nh   = nh;
 	ipd.advance = 0;
 
 	ip_print_demux(ndo, &ipd);
 }
 
 
 /*
  * print an IP datagram.
  */
 void
 ip_print(netdissect_options *ndo,
 	 const u_char *bp,
 	 u_int length)
 {
 	struct ip_print_demux_state  ipd;
 	struct ip_print_demux_state *ipds=&ipd;
 	const u_char *ipend;
 	u_int hlen;
 	struct cksum_vec vec[1];
 	u_int16_t sum, ip_sum;
 	struct protoent *proto;
 
 	ipds->ip = (const struct ip *)bp;
 	if (IP_V(ipds->ip) != 4) { /* print version if != 4 */
 	    printf("IP%u ", IP_V(ipds->ip));
 	    if (IP_V(ipds->ip) == 6)
 		printf(", wrong link-layer encapsulation");
 	}
         else if (!eflag)
 	    printf("IP ");
 
 	if ((u_char *)(ipds->ip + 1) > ndo->ndo_snapend) {
 		printf("[|ip]");
 		return;
 	}
 	if (length < sizeof (struct ip)) {
 		(void)printf("truncated-ip %u", length);
 		return;
 	}
 	hlen = IP_HL(ipds->ip) * 4;
 	if (hlen < sizeof (struct ip)) {
 		(void)printf("bad-hlen %u", hlen);
 		return;
 	}
 
 	ipds->len = EXTRACT_16BITS(&ipds->ip->ip_len);
 	if (length < ipds->len)
 		(void)printf("truncated-ip - %u bytes missing! ",
 			ipds->len - length);
 	if (ipds->len < hlen) {
 #ifdef GUESS_TSO
             if (ipds->len) {
                 (void)printf("bad-len %u", ipds->len);
                 return;
             }
             else {
                 /* we guess that it is a TSO send */
                 ipds->len = length;
             }
 #else
             (void)printf("bad-len %u", ipds->len);
             return;
 #endif /* GUESS_TSO */
 	}
 
 	/*
 	 * Cut off the snapshot length to the end of the IP payload.
 	 */
 	ipend = bp + ipds->len;
 	if (ipend < ndo->ndo_snapend)
 		ndo->ndo_snapend = ipend;
 
 	ipds->len -= hlen;
 
 	ipds->off = EXTRACT_16BITS(&ipds->ip->ip_off);
 
         if (vflag) {
             (void)printf("(tos 0x%x", (int)ipds->ip->ip_tos);
             /* ECN bits */
             if (ipds->ip->ip_tos & 0x03) {
                 switch (ipds->ip->ip_tos & 0x03) {
                 case 1:
                     (void)printf(",ECT(1)");
                     break;
                 case 2:
                     (void)printf(",ECT(0)");
                     break;
                 case 3:
                     (void)printf(",CE");
                 }
             }
 
             if (ipds->ip->ip_ttl >= 1)
                 (void)printf(", ttl %u", ipds->ip->ip_ttl);
 
 	    /*
 	     * for the firewall guys, print id, offset.
              * On all but the last stick a "+" in the flags portion.
 	     * For unfragmented datagrams, note the don't fragment flag.
 	     */
 
 	    (void)printf(", id %u, offset %u, flags [%s], proto %s (%u)",
                          EXTRACT_16BITS(&ipds->ip->ip_id),
                          (ipds->off & 0x1fff) * 8,
                          bittok2str(ip_frag_values, "none", ipds->off&0xe000),
                          tok2str(ipproto_values,"unknown",ipds->ip->ip_p),
                          ipds->ip->ip_p);
 
             (void)printf(", length %u", EXTRACT_16BITS(&ipds->ip->ip_len));
 
             if ((hlen - sizeof(struct ip)) > 0) {
                 printf(", options (");
                 ip_optprint((u_char *)(ipds->ip + 1), hlen - sizeof(struct ip));
                 printf(")");
             }
 
 	    if (!Kflag && (u_char *)ipds->ip + hlen <= ndo->ndo_snapend) {
 	        vec[0].ptr = (const u_int8_t *)(void *)ipds->ip;
 	        vec[0].len = hlen;
 	        sum = in_cksum(vec, 1);
 		if (sum != 0) {
 		    ip_sum = EXTRACT_16BITS(&ipds->ip->ip_sum);
 		    (void)printf(", bad cksum %x (->%x)!", ip_sum,
 			     in_cksum_shouldbe(ip_sum, sum));
 		}
 	    }
 
             printf(")\n    ");
 	}
 
 	/*
 	 * If this is fragment zero, hand it to the next higher
 	 * level protocol.
 	 */
 	if ((ipds->off & 0x1fff) == 0) {
 		ipds->cp = (const u_char *)ipds->ip + hlen;
 		ipds->nh = ipds->ip->ip_p;
 
 		if (ipds->nh != IPPROTO_TCP && ipds->nh != IPPROTO_UDP &&
 		    ipds->nh != IPPROTO_SCTP && ipds->nh != IPPROTO_DCCP) {
 			(void)printf("%s > %s: ",
 				     ipaddr_string(&ipds->ip->ip_src),
 				     ipaddr_string(&ipds->ip->ip_dst));
 		}
 		ip_print_demux(ndo, ipds);
 	} else {
 	    /* Ultra quiet now means that all this stuff should be suppressed */
 	    if (qflag > 1) return;
 
 	    /*
 	     * if this isn't the first frag, we're missing the
 	     * next level protocol header.  print the ip addr
 	     * and the protocol.
 	     */
 	    if (ipds->off & 0x1fff) {
 	        (void)printf("%s > %s:", ipaddr_string(&ipds->ip->ip_src),
 			     ipaddr_string(&ipds->ip->ip_dst));
 		if (!ndo->ndo_nflag && (proto = getprotobynumber(ipds->ip->ip_p)) != NULL)
 		    (void)printf(" %s", proto->p_name);
 		else
 		    (void)printf(" ip-proto-%d", ipds->ip->ip_p);
 	    }
 	}
 }
 
 void
 ipN_print(register const u_char *bp, register u_int length)
 {
 	struct ip *ip, hdr;
 
 	ip = (struct ip *)bp;
 	if (length < 4) {
 		(void)printf("truncated-ip %d", length);
 		return;
 	}
 	memcpy (&hdr, (char *)ip, 4);
 	switch (IP_V(&hdr)) {
 	case 4:
 		ip_print (gndo, bp, length);
 		return;
 #ifdef INET6
 	case 6:
 		ip6_print (gndo, bp, length);
 		return;
 #endif
 	default:
 		(void)printf("unknown ip %d", IP_V(&hdr));
 		return;
 	}
 }
 
 /*
  * Local Variables:
  * c-style: whitesmith
  * c-basic-offset: 8
  * End:
  */
 
 
Index: stable/10/etc/mtree/BSD.include.dist
===================================================================
--- stable/10/etc/mtree/BSD.include.dist	(revision 263085)
+++ stable/10/etc/mtree/BSD.include.dist	(revision 263086)
@@ -1,340 +1,344 @@
 # $FreeBSD$
 #
 # Please see the file src/etc/mtree/README before making changes to this file.
 #
 
 /set type=dir uname=root gname=wheel mode=0755
 .
     altq
     ..
     arpa
     ..
     bsm
     ..
     bsnmp
     ..
     c++
         4.2
             backward
             ..
             bits
             ..
             debug
             ..
             ext
                 pb_ds
                     detail
                         basic_tree_policy
                         ..
                         bin_search_tree_
                         ..
                         binary_heap_
                         ..
                         binomial_heap_
                         ..
                         binomial_heap_base_
                         ..
                         cc_hash_table_map_
                         ..
                         eq_fn
                         ..
                         gp_hash_table_map_
                         ..
                         hash_fn
                         ..
                         left_child_next_sibling_heap_
                         ..
                         list_update_map_
                         ..
                         list_update_policy
                         ..
                         ov_tree_map_
                         ..
                         pairing_heap_
                         ..
                         pat_trie_
                         ..
                         rb_tree_map_
                         ..
                         rc_binomial_heap_
                         ..
                         resize_policy
                         ..
                         splay_tree_
                         ..
                         thin_heap_
                         ..
                         tree_policy
                         ..
                         trie_policy
                         ..
                         unordered_iterator
                         ..
                     ..
                 ..
             ..
             tr1
             ..
         ..
         v1
             experimental
             ..
             ext
             ..
             tr1
             ..
         ..
     ..
     cam
         ata
         ..
         scsi
         ..
     ..
     clang
         3.3
         ..
     ..
     crypto
     ..
     dev
         acpica
         ..
         agp
         ..
         an
         ..
         bktr
         ..
         ciss
         ..
         filemon
         ..
         firewire
         ..
         hwpmc
         ..
         ic
         ..
         ieee488
         ..
         iicbus
         ..
         io
         ..
         lmc
         ..
         mfi
         ..
         mpt
             mpilib
             ..
         ..
         nand
         ..
         nvme
         ..
         ofw
         ..
         pbio
         ..
         pci
         ..
         powermac_nvram
         ..
         ppbus
         ..
         smbus
         ..
         speaker
         ..
         usb
         ..
         utopia
         ..
         vkbd
         ..
         wi
         ..
     ..
     edit
         readline
         ..
     ..
     fs
         devfs
         ..
         fdescfs
         ..
         msdosfs
         ..
         nandfs
         ..
         nfs
         ..
         nullfs
         ..
         procfs
         ..
         smbfs
         ..
         udf
         ..
         unionfs
         ..
     ..
     gcc
         4.2
         ..
     ..
     geom
         cache
         ..
         concat
         ..
         eli
         ..
         gate
         ..
         journal
         ..
         label
         ..
         mirror
         ..
         mountver
         ..
         multipath
         ..
         nop
         ..
         raid
         ..
         raid3
         ..
         shsec
         ..
         stripe
         ..
         virstor
         ..
     ..
     gnu
         posix
         ..
     ..
     gpib
     ..
     gssapi
     ..
     infiniband
         complib 
         ..
         iba
         ..
         opensm
         ..
         vendor
         ..
     ..
     isofs
         cd9660
         ..
     ..
     kadm5
     ..
     krb5
     ..
     libmilter
     ..
     lzma
     ..
     machine
         pc
         ..
     ..
     net
     ..
     net80211
     ..
     netatalk
     ..
     netgraph
         atm
         ..
         bluetooth
             include
             ..
         ..
         netflow
         ..
     ..
     netinet
     ..
     netinet6
     ..
     netipsec
     ..
     netipx
     ..
     netnatm
         api
         ..
         msg
         ..
         saal
         ..
         sig
         ..
     ..
+    netpfil
+        pf
+        ..
+    ..
     netsmb
     ..
     nfs
     ..
     nfsclient
     ..
     nfsserver
     ..
     openssl
     ..
     pcap
     ..
     protocols
     ..
     rdma
     ..
     readline
     ..
     rpc
     ..
     rpcsvc
     ..
     security
         audit
         ..
         mac_biba
         ..
         mac_bsdextended
         ..
         mac_lomac
         ..
         mac_mls
         ..
         mac_partition
         ..
     ..
     ssp
     ..
     sys
     ..
     ufs
         ffs
         ..
         ufs
         ..
     ..
     vm
     ..
     xlocale
     ..
 ..
Index: stable/10/include/Makefile
===================================================================
--- stable/10/include/Makefile	(revision 263085)
+++ stable/10/include/Makefile	(revision 263086)
@@ -1,334 +1,341 @@
 #	@(#)Makefile	8.2 (Berkeley) 1/4/94
 # $FreeBSD$
 #
 # Doing a "make install" builds /usr/include.
 
 .include <bsd.own.mk>
 
 CLEANFILES= osreldate.h version vers.c
 SUBDIR= arpa gssapi protocols rpcsvc rpc xlocale
 INCS=	a.out.h ar.h assert.h bitstring.h complex.h cpio.h _ctype.h ctype.h \
 	db.h \
 	dirent.h dlfcn.h elf.h elf-hints.h err.h fmtmsg.h fnmatch.h fstab.h \
 	fts.h ftw.h getopt.h glob.h grp.h gssapi.h \
 	ieeefp.h ifaddrs.h \
 	inttypes.h iso646.h kenv.h langinfo.h libgen.h limits.h link.h \
 	locale.h malloc.h malloc_np.h memory.h monetary.h mpool.h mqueue.h \
 	ndbm.h netconfig.h \
 	netdb.h nl_types.h nlist.h nss.h nsswitch.h paths.h \
 	printf.h proc_service.h pthread.h \
 	pthread_np.h pwd.h ranlib.h readpassphrase.h regex.h \
 	res_update.h resolv.h runetype.h search.h semaphore.h setjmp.h \
 	signal.h spawn.h stab.h stdalign.h stdbool.h stddef.h \
 	stdnoreturn.h stdio.h stdlib.h string.h stringlist.h \
 	strings.h sysexits.h tar.h termios.h tgmath.h \
 	time.h timeconv.h timers.h ttyent.h \
 	uchar.h ulimit.h unistd.h utime.h utmpx.h uuid.h varargs.h \
 	wchar.h wctype.h wordexp.h xlocale.h
 
 .PATH: ${.CURDIR}/../contrib/libc-vis
 INCS+=	vis.h
 
 MHDRS=	float.h floatingpoint.h stdarg.h
 
 PHDRS=	sched.h _semaphore.h
 
 LHDRS=	aio.h errno.h fcntl.h linker_set.h poll.h stdatomic.h stdint.h \
 	syslog.h ucontext.h
 
 LDIRS=	bsm cam geom net net80211 netatalk netgraph netinet netinet6 \
 	netipsec ${_netipx} netnatm netsmb \
 	nfs nfsclient nfsserver \
 	sys vm
 
 LSUBDIRS=	cam/ata cam/scsi \
 	dev/acpica dev/agp dev/an dev/bktr dev/ciss dev/filemon dev/firewire \
 	dev/hwpmc \
 	dev/ic dev/iicbus ${_dev_ieee488} dev/io dev/lmc dev/mfi dev/nvme \
 	dev/ofw dev/pbio dev/pci ${_dev_powermac_nvram} dev/ppbus dev/smbus \
 	dev/speaker dev/usb dev/utopia dev/vkbd dev/wi \
 	fs/devfs fs/fdescfs fs/msdosfs fs/nandfs fs/nfs fs/nullfs \
 	fs/procfs fs/smbfs fs/udf fs/unionfs \
 	geom/cache geom/concat geom/eli geom/gate geom/journal geom/label \
 	geom/mirror geom/mountver geom/multipath geom/nop \
 	geom/raid geom/raid3 geom/shsec geom/stripe geom/virstor \
 	netgraph/atm netgraph/netflow \
 	security/audit \
 	security/mac_biba security/mac_bsdextended security/mac_lomac \
 	security/mac_mls security/mac_partition \
 	ufs/ffs ufs/ufs
 
 LSUBSUBDIRS=	dev/mpt/mpilib
 
 .if ${MACHINE_ARCH} == "powerpc" || ${MACHINE_ARCH} == "powerpc64"
 _dev_powermac_nvram=	dev/powermac_nvram
 .endif
 
 .if ${MK_GPIB} != "no"
 _dev_ieee488=	dev/ieee488
 .endif
 
 .if ${MK_HESIOD} != "no"
 INCS+=	hesiod.h
 .endif
 
 .if ${MK_BLUETOOTH} != "no"
 LSUBSUBDIRS+=	netgraph/bluetooth/include
 .endif
 
 # XXX unconditionally needed by <netsmb/netbios.h>
 #.if ${MK_IPX} != "no"
 _netipx=	netipx
 #.endif
 
 # Handle the #define aliases for libiconv
 .if ${MK_ICONV} == "yes"
 INCS+=		iconv.h
 .endif
 	
 
 # Define SHARED to indicate whether you want symbolic links to the system
 # source (``symlinks''), or a separate copy (``copies'').  ``symlinks'' is
 # probably only useful for developers and should be avoided if you do not
 # wish to tie your /usr/include and /usr/src together.
 #SHARED=	symlinks
 SHARED?=	copies
 
 INCS+=	osreldate.h
 
 SYSDIR=			${.CURDIR}/../sys
 NEWVERS_SH=		${SYSDIR}/conf/newvers.sh
 PARAM_H=		${SYSDIR}/sys/param.h
 MK_OSRELDATE_SH=	${.CURDIR}/mk-osreldate.sh
 
 osreldate.h vers.c: ${NEWVERS_SH} ${PARAM_H} ${MK_OSRELDATE_SH}
 	env ECHO="${ECHO}" \
 	    MAKE="${MAKE}" \
 	    NEWVERS_SH=${NEWVERS_SH} \
 	    PARAM_H=${PARAM_H} \
 	    SYSDIR=${SYSDIR} \
 	    sh ${MK_OSRELDATE_SH}
 
 .for i in ${LHDRS}
 INCSLINKS+=	sys/$i ${INCLUDEDIR}/$i
 .endfor
 .for i in ${MHDRS}
 INCSLINKS+=	machine/$i ${INCLUDEDIR}/$i
 .endfor
 .for i in ${PHDRS}
 INCSLINKS+=	sys/$i ${INCLUDEDIR}/$i
 .endfor
 
 .if ${MACHINE} != ${MACHINE_CPUARCH}
 _MARCHS=	${MACHINE_CPUARCH}
 .endif
 .if ${MACHINE_CPUARCH} == "i386" || ${MACHINE_CPUARCH} == "amd64"
 _MARCHS+=	x86
 .endif
 
 .include <bsd.prog.mk>
 
 installincludes: ${SHARED}
 ${SHARED}: compat
 
 # Take care of stale directory-level symlinks.
 compat:
 .for i in ${LDIRS} ${LSUBDIRS} machine ${_MARCHS} crypto
 	if [ -L ${DESTDIR}${INCLUDEDIR}/$i ]; then \
 		rm -f ${DESTDIR}${INCLUDEDIR}/$i; \
 	fi
 .endfor
 	mtree -deU ${MTREE_FOLLOWS_SYMLINKS} \
 	    -f ${.CURDIR}/../etc/mtree/BSD.include.dist \
 	    -p ${DESTDIR}${INCLUDEDIR}
 
 copies:
 .for i in ${LDIRS} ${LSUBDIRS} ${LSUBSUBDIRS} altq crypto machine machine/pc \
 	${_MARCHS}
 .if exists(${DESTDIR}${INCLUDEDIR}/$i)
 	cd ${DESTDIR}${INCLUDEDIR}/$i; \
 	for h in *.h; do \
 		if [ -L $$h ]; then rm -f $$h; fi; \
 	done
 .endif
 .endfor
 .for i in ${LDIRS} ${LSUBDIRS:Ndev/agp:Ndev/acpica:Ndev/bktr:Ndev/nand:Ndev/pci} ${LSUBSUBDIRS}
 	cd ${.CURDIR}/../sys; \
 	${INSTALL} -C -o ${BINOWN} -g ${BINGRP} -m 444 $i/*.h \
 	    ${DESTDIR}${INCLUDEDIR}/$i
 .endfor
 	cd ${.CURDIR}/../sys/dev/acpica; \
 	${INSTALL} -C -o ${BINOWN} -g ${BINGRP} -m 444 acpiio.h \
 	    ${DESTDIR}${INCLUDEDIR}/dev/acpica
 	cd ${.CURDIR}/../sys/dev/agp; \
 	${INSTALL} -C -o ${BINOWN} -g ${BINGRP} -m 444 agpreg.h \
 	    ${DESTDIR}${INCLUDEDIR}/dev/agp
 	cd ${.CURDIR}/../sys/dev/bktr; \
 	${INSTALL} -C -o ${BINOWN} -g ${BINGRP} -m 444 ioctl_*.h \
 	    ${DESTDIR}${INCLUDEDIR}/dev/bktr
 .if ${MK_NAND} != "no"
 	cd ${.CURDIR}/../sys/dev/nand; \
 	${INSTALL} -C -o ${BINOWN} -g ${BINGRP} -m 444 nandsim.h \
 	    ${DESTDIR}${INCLUDEDIR}/dev/nand; \
 	${INSTALL} -C -o ${BINOWN} -g ${BINGRP} -m 444 nand_dev.h \
 	    ${DESTDIR}${INCLUDEDIR}/dev/nand
 .endif
 	cd ${.CURDIR}/../sys/dev/pci; \
 	${INSTALL} -C -o ${BINOWN} -g ${BINGRP} -m 444 pcireg.h \
 	    ${DESTDIR}${INCLUDEDIR}/dev/pci
 	cd ${.CURDIR}/../sys/contrib/altq/altq; \
 	${INSTALL} -C -o ${BINOWN} -g ${BINGRP} -m 444 *.h \
 	    ${DESTDIR}${INCLUDEDIR}/altq
 	cd ${.CURDIR}/../sys/fs/cd9660/; \
 	${INSTALL} -C -o ${BINOWN} -g ${BINGRP} -m 444 *.h \
 	    ${DESTDIR}${INCLUDEDIR}/isofs/cd9660
 .if ${MK_IPFILTER} != "no"
 	cd ${.CURDIR}/../sys/contrib/ipfilter/netinet; \
 	${INSTALL} -C -o ${BINOWN} -g ${BINGRP} -m 444 *.h \
 	    ${DESTDIR}${INCLUDEDIR}/netinet
 .endif
 	cd ${.CURDIR}/../sys/crypto; \
 	${INSTALL} -C -o ${BINOWN} -g ${BINGRP} -m 444 rijndael/rijndael.h \
 	    ${DESTDIR}${INCLUDEDIR}/crypto
 	cd ${.CURDIR}/../sys/opencrypto; \
 	${INSTALL} -C -o ${BINOWN} -g ${BINGRP} -m 444 *.h \
 	    ${DESTDIR}${INCLUDEDIR}/crypto
 	cd ${.CURDIR}/../sys/${MACHINE}/include; \
 	${INSTALL} -C -o ${BINOWN} -g ${BINGRP} -m 444 *.h \
 	    ${DESTDIR}${INCLUDEDIR}/machine
 .if exists(${.CURDIR}/../sys/${MACHINE}/include/pc)
 	cd ${.CURDIR}/../sys/${MACHINE}/include/pc; \
 	${INSTALL} -C -o ${BINOWN} -g ${BINGRP} -m 444 *.h \
 	    ${DESTDIR}${INCLUDEDIR}/machine/pc
 .endif
 .for _MARCH in ${_MARCHS}
 .if exists(${.CURDIR}/../sys/${_MARCH}/include)
 	${INSTALL} -d -o ${BINOWN} -g ${BINGRP} -m 755 \
 	    ${DESTDIR}${INCLUDEDIR}/${_MARCH}; \
 	cd ${.CURDIR}/../sys/${_MARCH}/include; \
 	${INSTALL} -C -o ${BINOWN} -g ${BINGRP} -m 444 *.h \
 	    ${DESTDIR}${INCLUDEDIR}/${_MARCH}
 .if exists(${.CURDIR}/../sys/${_MARCH}/include/pc)
 	${INSTALL} -d -o ${BINOWN} -g ${BINGRP} -m 755 \
 	    ${DESTDIR}${INCLUDEDIR}/${_MARCH}/pc; \
 	cd ${.CURDIR}/../sys/${_MARCH}/include/pc; \
 	${INSTALL} -C -o ${BINOWN} -g ${BINGRP} -m 444 *.h \
 	    ${DESTDIR}${INCLUDEDIR}/${_MARCH}/pc
 .endif
 .endif
 .endfor
 	cd ${.CURDIR}/../sys/rpc; \
 	${INSTALL} -C -o ${BINOWN} -g ${BINGRP} -m 444 types.h \
 	    ${DESTDIR}${INCLUDEDIR}/rpc
 
 symlinks:
 	@${ECHO} "Setting up symlinks to kernel source tree..."
 .for i in ${LDIRS}
 	cd ${.CURDIR}/../sys/$i; \
 	for h in *.h; do \
 		ln -fs ../../../sys/$i/$$h ${DESTDIR}${INCLUDEDIR}/$i; \
 	done
 .endfor
 .for i in ${LSUBDIRS:Ndev/agp:Ndev/acpica:Ndev/bktr:Ndev/nand:Ndev/pci}
 	cd ${.CURDIR}/../sys/$i; \
 	for h in *.h; do \
 		ln -fs ../../../../sys/$i/$$h ${DESTDIR}${INCLUDEDIR}/$i; \
 	done
 .endfor
 	cd ${.CURDIR}/../sys/dev/acpica; \
 	for h in acpiio.h; do \
 		ln -fs ../../../../sys/dev/acpica/$$h \
 		    ${DESTDIR}${INCLUDEDIR}/dev/acpica; \
 	done
 	cd ${.CURDIR}/../sys/dev/agp; \
 	for h in agpreg.h; do \
 		ln -fs ../../../../sys/dev/agp/$$h \
 		    ${DESTDIR}${INCLUDEDIR}/dev/agp; \
 	done
 	cd ${.CURDIR}/../sys/dev/bktr; \
 	for h in ioctl_*.h; do \
 		ln -fs ../../../../sys/dev/bktr/$$h \
 		    ${DESTDIR}${INCLUDEDIR}/dev/bktr; \
 	done
 .if ${MK_NAND} != "no"
 	cd ${.CURDIR}/../sys/dev/nand; \
 	for h in nandsim.h nand_dev.h; do \
 		ln -fs ../../../../sys/dev/nand/$$h \
 		    ${DESTDIR}${INCLUDEDIR}/dev/nand; \
 	done
 .endif
 	cd ${.CURDIR}/../sys/dev/pci; \
 	for h in pcireg.h; do \
 		ln -fs ../../../../sys/dev/pci/$$h \
 		    ${DESTDIR}${INCLUDEDIR}/dev/pci; \
 	done
 .for i in ${LSUBSUBDIRS}
 	cd ${.CURDIR}/../sys/$i; \
 	for h in *.h; do \
 		ln -fs ../../../../../sys/$i/$$h ${DESTDIR}${INCLUDEDIR}/$i; \
 	done
 .endfor
 	cd ${.CURDIR}/../sys/contrib/altq/altq; \
 	for h in *.h; do \
 		ln -fs ../../../sys/contrib/altq/altq/$$h \
 		    ${DESTDIR}${INCLUDEDIR}/altq; \
 	done
 .if ${MK_IPFILTER} != "no"
 	cd ${.CURDIR}/../sys/contrib/ipfilter/netinet; \
 	for h in *.h; do \
 		ln -fs ../../../sys/contrib/ipfilter/netinet/$$h \
 		    ${DESTDIR}${INCLUDEDIR}/netinet; \
 	done
 .endif
+.if ${MK_PF} != "no"
+	cd ${.CURDIR}/../sys/netpfil/pf; \
+	for h in *.h; do \
+		ln -fs ../../../../sys/netpfil/pf/$$h \
+		    ${DESTDIR}${INCLUDEDIR}/netpfil/pf; \
+	done
+.endif
 	cd ${.CURDIR}/../sys/crypto; \
 	for h in rijndael/rijndael.h; do \
 		ln -fs ../../../sys/crypto/$$h \
 		    ${DESTDIR}${INCLUDEDIR}/crypto; \
 	done
 	cd ${.CURDIR}/../sys/opencrypto; \
 	for h in *.h; do \
 		ln -fs ../../../sys/opencrypto/$$h \
 		    ${DESTDIR}${INCLUDEDIR}/crypto; \
 	done
 	cd ${.CURDIR}/../sys/${MACHINE}/include; \
 	for h in *.h; do \
 		ln -fs ../../../sys/${MACHINE}/include/$$h \
 		    ${DESTDIR}${INCLUDEDIR}/machine; \
 	done
 .if exists(${.CURDIR}/../sys/${MACHINE}/include/pc)
 	cd ${.CURDIR}/../sys/${MACHINE}/include/pc; \
 	for h in *.h; do \
 		ln -fs ../../../../sys/${MACHINE}/include/pc/$$h \
 		    ${DESTDIR}${INCLUDEDIR}/machine/pc; \
 	done
 .endif
 .for _MARCH in ${_MARCHS}
 .if exists(${.CURDIR}/../sys/${_MARCH}/include)
 	${INSTALL} -d -o ${BINOWN} -g ${BINGRP} -m 755 \
 	    ${DESTDIR}${INCLUDEDIR}/${_MARCH}; \
 	cd ${.CURDIR}/../sys/${_MARCH}/include; \
 	for h in *.h; do \
 		ln -fs ../../../sys/${_MARCH}/include/$$h \
 		    ${DESTDIR}${INCLUDEDIR}/${_MARCH}; \
 	done
 .if exists(${.CURDIR}/../sys/${_MARCH}/include/pc)
 	${INSTALL} -d -o ${BINOWN} -g ${BINGRP} -m 755 \
 	    ${DESTDIR}${INCLUDEDIR}/${_MARCH}/pc; \
 	cd ${.CURDIR}/../sys/${_MARCH}/include/pc; \
 	for h in *.h; do \
 		ln -fs ../../../../sys/${_MARCH}/include/pc/$$h \
 		    ${DESTDIR}${INCLUDEDIR}/${_MARCH}/pc; \
 	done
 .endif
 .endif
 .endfor
 	cd ${.CURDIR}/../sys/fs/cd9660; \
 	for h in *.h; do \
 		ln -fs ../../../../sys/fs/cd9660/$$h \
 		    ${DESTDIR}${INCLUDEDIR}/isofs/cd9660; \
 	done
 	cd ${.CURDIR}/../sys/rpc; \
 	for h in types.h; do \
 		ln -fs ../../../sys/rpc/$$h \
 		    ${DESTDIR}${INCLUDEDIR}/rpc; \
 	done
Index: stable/10/sbin/ifconfig/Makefile
===================================================================
--- stable/10/sbin/ifconfig/Makefile	(revision 263085)
+++ stable/10/sbin/ifconfig/Makefile	(revision 263086)
@@ -1,70 +1,72 @@
 #	From: @(#)Makefile	8.1 (Berkeley) 6/5/93
 # $FreeBSD$
 
 .include <bsd.own.mk>
 
 PROG=	ifconfig
 
 SRCS=	ifconfig.c		# base support
 
 #
 # NB: The order here defines the order in which the constructors
 #     are called.  This in turn defines the default order in which
 #     status is displayed.  Probably should add a priority mechanism
 #     to the registration process so we don't depend on this aspect
 #     of the toolchain.
 #
 SRCS+=	af_link.c		# LLC support
 .if ${MK_INET_SUPPORT} != "no"
 SRCS+=	af_inet.c		# IPv4 support
 .endif
 .if ${MK_INET6_SUPPORT} != "no"
 SRCS+=	af_inet6.c		# IPv6 support
 .endif
 SRCS+=	af_atalk.c		# AppleTalk support
 .if ${MK_INET6_SUPPORT} != "no"
 SRCS+=	af_nd6.c		# ND6 support
 .endif
 
 SRCS+=	ifclone.c		# clone device support
 SRCS+=	ifmac.c			# MAC support
 SRCS+=	ifmedia.c		# SIOC[GS]IFMEDIA support
 SRCS+=	iffib.c			# non-default FIB support
 SRCS+=	ifvlan.c		# SIOC[GS]ETVLAN support
 SRCS+=	ifgre.c			# GRE keys etc
 SRCS+=	ifgif.c			# GIF reversed header workaround
 
 SRCS+=	ifieee80211.c regdomain.c # SIOC[GS]IEEE80211 support
 DPADD+=	${LIBBSDXML} ${LIBSBUF}
 LDADD+=	-lbsdxml -lsbuf
 
 SRCS+=	carp.c			# SIOC[GS]VH support
 SRCS+=	ifgroup.c		# ...
+.if ${MK_PF} != "no"
 SRCS+=	ifpfsync.c		# pfsync(4) support
+.endif
 
 SRCS+=	ifbridge.c		# bridge support
 SRCS+=	iflagg.c		# lagg support
 
 .if ${MK_INET6_SUPPORT} != "no"
 CFLAGS+= -DINET6
 .endif
 .if ${MK_INET_SUPPORT} != "no"
 CFLAGS+= -DINET
 .endif
 .if ${MK_IPX_SUPPORT} != "no" && !defined(RELEASE_CRUNCH)
 SRCS+=	af_ipx.c		# IPX support
 DPADD+=	${LIBIPX}
 LDADD+=	-lipx
 .endif
 .if ${MK_JAIL} != "no" && !defined(RELEASE_CRUNCH) && !defined(RESCUE)
 CFLAGS+= -DJAIL
 DPADD+= ${LIBJAIL}
 LDADD+= -ljail
 .endif
 
 MAN=	ifconfig.8
 
 CFLAGS+= -Wall -Wmissing-prototypes -Wcast-qual -Wwrite-strings -Wnested-externs
 WARNS?=	2
 
 .include <bsd.prog.mk>
Index: stable/10/sbin/ipfw/Makefile
===================================================================
--- stable/10/sbin/ipfw/Makefile	(revision 263085)
+++ stable/10/sbin/ipfw/Makefile	(revision 263086)
@@ -1,10 +1,18 @@
 # $FreeBSD$
 
+.include <bsd.own.mk>
+
 PROG=	ipfw
-SRCS=	ipfw2.c dummynet.c ipv6.c main.c nat.c altq.c
+SRCS=	ipfw2.c dummynet.c ipv6.c main.c nat.c
 WARNS?=	2
+
+.if ${MK_PF} != "no"
+SRCS+=	altq.c
+CFLAGS+=-DPF
+.endif
+
 DPADD=	${LIBUTIL}
 LDADD=	-lutil
 MAN=	ipfw.8
 
 .include <bsd.prog.mk>
Index: stable/10/sbin/ipfw/ipfw2.h
===================================================================
--- stable/10/sbin/ipfw/ipfw2.h	(revision 263085)
+++ stable/10/sbin/ipfw/ipfw2.h	(revision 263086)
@@ -1,293 +1,296 @@
 /*
  * Copyright (c) 2002-2003 Luigi Rizzo
  * Copyright (c) 1996 Alex Nash, Paul Traina, Poul-Henning Kamp
  * Copyright (c) 1994 Ugen J.S.Antsilevich
  *
  * Idea and grammar partially left from:
  * Copyright (c) 1993 Daniel Boulet
  *
  * Redistribution and use in source forms, with and without modification,
  * are permitted provided that this entire comment appears intact.
  *
  * Redistribution in binary form may occur without any restrictions.
  * Obviously, it would be nice if you gave credit where credit is due
  * but requiring it would be too onerous.
  *
  * This software is provided ``AS IS'' without any warranties of any kind.
  *
  * NEW command line interface for IP firewall facility
  *
  * $FreeBSD$
  */
 
 /*
  * Options that can be set on the command line.
  * When reading commands from a file, a subset of the options can also
  * be applied globally by specifying them before the file name.
  * After that, each line can contain its own option that changes
  * the global value.
  * XXX The context is not restored after each line.
  */
 
 struct cmdline_opts {
 	/* boolean options: */
 	int	do_value_as_ip;	/* show table value as IP */
 	int	do_resolv;	/* try to resolve all ip to names */
 	int	do_time;	/* Show time stamps */
 	int	do_quiet;	/* Be quiet in add and flush */
 	int	do_pipe;	/* this cmd refers to a pipe/queue/sched */
 	int	do_nat; 	/* this cmd refers to a nat config */
 	int	do_dynamic;	/* display dynamic rules */
 	int	do_expired;	/* display expired dynamic rules */
 	int	do_compact;	/* show rules in compact mode */
 	int	do_force;	/* do not ask for confirmation */
 	int	show_sets;	/* display the set each rule belongs to */
 	int	test_only;	/* only check syntax */
 	int	comment_only;	/* only print action and comment */
 	int	verbose;	/* be verbose on some commands */
 
 	/* The options below can have multiple values. */
 
 	int	do_sort;	/* field to sort results (0 = no) */
 		/* valid fields are 1 and above */
 
 	int	use_set;	/* work with specified set number */
 		/* 0 means all sets, otherwise apply to set use_set - 1 */
 
 };
 
 extern struct cmdline_opts co;
 
 /*
  * _s_x is a structure that stores a string <-> token pairs, used in
  * various places in the parser. Entries are stored in arrays,
  * with an entry with s=NULL as terminator.
  * The search routines are match_token() and match_value().
  * Often, an element with x=0 contains an error string.
  *
  */
 struct _s_x {
 	char const *s;
 	int x;
 };
 
 enum tokens {
 	TOK_NULL=0,
 
 	TOK_OR,
 	TOK_NOT,
 	TOK_STARTBRACE,
 	TOK_ENDBRACE,
 
 	TOK_ACCEPT,
 	TOK_COUNT,
 	TOK_PIPE,
 	TOK_LINK,
 	TOK_QUEUE,
 	TOK_FLOWSET,
 	TOK_SCHED,
 	TOK_DIVERT,
 	TOK_TEE,
 	TOK_NETGRAPH,
 	TOK_NGTEE,
 	TOK_FORWARD,
 	TOK_SKIPTO,
 	TOK_DENY,
 	TOK_REJECT,
 	TOK_RESET,
 	TOK_UNREACH,
 	TOK_CHECKSTATE,
 	TOK_NAT,
 	TOK_REASS,
 	TOK_CALL,
 	TOK_RETURN,
 
 	TOK_ALTQ,
 	TOK_LOG,
 	TOK_TAG,
 	TOK_UNTAG,
 
 	TOK_TAGGED,
 	TOK_UID,
 	TOK_GID,
 	TOK_JAIL,
 	TOK_IN,
 	TOK_LIMIT,
 	TOK_KEEPSTATE,
 	TOK_LAYER2,
 	TOK_OUT,
 	TOK_DIVERTED,
 	TOK_DIVERTEDLOOPBACK,
 	TOK_DIVERTEDOUTPUT,
 	TOK_XMIT,
 	TOK_RECV,
 	TOK_VIA,
 	TOK_FRAG,
 	TOK_IPOPTS,
 	TOK_IPLEN,
 	TOK_IPID,
 	TOK_IPPRECEDENCE,
 	TOK_DSCP,
 	TOK_IPTOS,
 	TOK_IPTTL,
 	TOK_IPVER,
 	TOK_ESTAB,
 	TOK_SETUP,
 	TOK_TCPDATALEN,
 	TOK_TCPFLAGS,
 	TOK_TCPOPTS,
 	TOK_TCPSEQ,
 	TOK_TCPACK,
 	TOK_TCPWIN,
 	TOK_ICMPTYPES,
 	TOK_MAC,
 	TOK_MACTYPE,
 	TOK_VERREVPATH,
 	TOK_VERSRCREACH,
 	TOK_ANTISPOOF,
 	TOK_IPSEC,
 	TOK_COMMENT,
 
 	TOK_PLR,
 	TOK_NOERROR,
 	TOK_BUCKETS,
 	TOK_DSTIP,
 	TOK_SRCIP,
 	TOK_DSTPORT,
 	TOK_SRCPORT,
 	TOK_ALL,
 	TOK_MASK,
 	TOK_FLOW_MASK,
 	TOK_SCHED_MASK,
 	TOK_BW,
 	TOK_DELAY,
 	TOK_PROFILE,
 	TOK_BURST,
 	TOK_RED,
 	TOK_GRED,
 	TOK_DROPTAIL,
 	TOK_PROTO,
 	/* dummynet tokens */
 	TOK_WEIGHT,
 	TOK_LMAX,
 	TOK_PRI,
 	TOK_TYPE,
 	TOK_SLOTSIZE,
 
 	TOK_IP,
 	TOK_IF,
  	TOK_ALOG,
  	TOK_DENY_INC,
  	TOK_SAME_PORTS,
  	TOK_UNREG_ONLY,
 	TOK_SKIP_GLOBAL,
  	TOK_RESET_ADDR,
  	TOK_ALIAS_REV,
  	TOK_PROXY_ONLY,
 	TOK_REDIR_ADDR,
 	TOK_REDIR_PORT,
 	TOK_REDIR_PROTO,
 
 	TOK_IPV6,
 	TOK_FLOWID,
 	TOK_ICMP6TYPES,
 	TOK_EXT6HDR,
 	TOK_DSTIP6,
 	TOK_SRCIP6,
 
 	TOK_IPV4,
 	TOK_UNREACH6,
 	TOK_RESET6,
 
 	TOK_FIB,
 	TOK_SETFIB,
 	TOK_LOOKUP,
 	TOK_SOCKARG,
 	TOK_SETDSCP,
 };
 /*
  * the following macro returns an error message if we run out of
  * arguments.
  */
 #define NEED(_p, msg)      {if (!_p) errx(EX_USAGE, msg);}
 #define NEED1(msg)      {if (!(*av)) errx(EX_USAGE, msg);}
 
 int pr_u64(uint64_t *pd, int width);
 
 /* memory allocation support */
 void *safe_calloc(size_t number, size_t size);
 void *safe_realloc(void *ptr, size_t size);
 
 /* string comparison functions used for historical compatibility */
 int _substrcmp(const char *str1, const char* str2);
 int _substrcmp2(const char *str1, const char* str2, const char* str3);
 
 /* utility functions */
 int match_token(struct _s_x *table, char *string);
 char const *match_value(struct _s_x *p, int value);
 
 int do_cmd(int optname, void *optval, uintptr_t optlen);
 
 struct in6_addr;
 void n2mask(struct in6_addr *mask, int n);
 int contigmask(uint8_t *p, int len);
 
 /*
  * Forward declarations to avoid include way too many headers.
  * C does not allow duplicated typedefs, so we use the base struct
  * that the typedef points to.
  * Should the typedefs use a different type, the compiler will
  * still detect the change when compiling the body of the
  * functions involved, so we do not lose error checking.
  */
 struct _ipfw_insn;
 struct _ipfw_insn_altq;
 struct _ipfw_insn_u32;
 struct _ipfw_insn_ip6;
 struct _ipfw_insn_icmp6;
 
 /*
  * The reserved set numer. This is a constant in ip_fw.h
  * but we store it in a variable so other files do not depend
  * in that header just for one constant.
  */
 extern int resvd_set_number;
 
 /* first-level command handlers */
 void ipfw_add(char *av[]);
 void ipfw_show_nat(int ac, char **av);
 void ipfw_config_pipe(int ac, char **av);
 void ipfw_config_nat(int ac, char **av);
 void ipfw_sets_handler(char *av[]);
 void ipfw_table_handler(int ac, char *av[]);
 void ipfw_sysctl_handler(char *av[], int which);
 void ipfw_delete(char *av[]);
 void ipfw_flush(int force);
 void ipfw_zero(int ac, char *av[], int optname);
 void ipfw_list(int ac, char *av[], int show_counters);
 
+#ifdef PF
 /* altq.c */
 void altq_set_enabled(int enabled);
 u_int32_t altq_name_to_qid(const char *name);
-
 void print_altq_cmd(struct _ipfw_insn_altq *altqptr);
+#else
+#define NO_ALTQ
+#endif
 
 /* dummynet.c */
 void dummynet_list(int ac, char *av[], int show_counters);
 void dummynet_flush(void);
 int ipfw_delete_pipe(int pipe_or_queue, int n);
 
 /* ipv6.c */
 void print_unreach6_code(uint16_t code);
 void print_ip6(struct _ipfw_insn_ip6 *cmd, char const *s);
 void print_flow6id(struct _ipfw_insn_u32 *cmd);
 void print_icmp6types(struct _ipfw_insn_u32 *cmd);
 void print_ext6hdr(struct _ipfw_insn *cmd );
 
 struct _ipfw_insn *add_srcip6(struct _ipfw_insn *cmd, char *av, int cblen);
 struct _ipfw_insn *add_dstip6(struct _ipfw_insn *cmd, char *av, int cblen);
 
 void fill_flow6(struct _ipfw_insn_u32 *cmd, char *av, int cblen);
 void fill_unreach6_code(u_short *codep, char *str);
 void fill_icmp6types(struct _ipfw_insn_icmp6 *cmd, char *av, int cblen);
 int fill_ext6hdr(struct _ipfw_insn *cmd, char *av);
Index: stable/10/sys/contrib/altq/altq/altq_cbq.c
===================================================================
--- stable/10/sys/contrib/altq/altq/altq_cbq.c	(revision 263085)
+++ stable/10/sys/contrib/altq/altq/altq_cbq.c	(revision 263086)
@@ -1,1170 +1,1173 @@
 /*	$FreeBSD$	*/
 /*	$KAME: altq_cbq.c,v 1.19 2003/09/17 14:23:25 kjc Exp $	*/
 
 /*
  * Copyright (c) Sun Microsystems, Inc. 1993-1998 All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  *
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  *
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * 3. All advertising materials mentioning features or use of this software
  *    must display the following acknowledgement:
  *      This product includes software developed by the SMCC Technology
  *      Development Group at Sun Microsystems, Inc.
  *
  * 4. The name of the Sun Microsystems, Inc nor may not be used to endorse or
  *      promote products derived from this software without specific prior
  *      written permission.
  *
  * SUN MICROSYSTEMS DOES NOT CLAIM MERCHANTABILITY OF THIS SOFTWARE OR THE
  * SUITABILITY OF THIS SOFTWARE FOR ANY PARTICULAR PURPOSE.  The software is
  * provided "as is" without express or implied warranty of any kind.
  *
  * These notices must be retained in any copies of any part of this software.
  */
 
 #if defined(__FreeBSD__) || defined(__NetBSD__)
 #include "opt_altq.h"
 #include "opt_inet.h"
 #ifdef __FreeBSD__
 #include "opt_inet6.h"
 #endif
 #endif /* __FreeBSD__ || __NetBSD__ */
 #ifdef ALTQ_CBQ	/* cbq is enabled by ALTQ_CBQ option in opt_altq.h */
 
 #include <sys/param.h>
 #include <sys/malloc.h>
 #include <sys/mbuf.h>
 #include <sys/socket.h>
 #include <sys/systm.h>
 #include <sys/proc.h>
 #include <sys/errno.h>
 #include <sys/time.h>
 #ifdef ALTQ3_COMPAT
 #include <sys/uio.h>
 #include <sys/kernel.h>
 #endif
 
 #include <net/if.h>
+#include <net/if_var.h>
 #include <netinet/in.h>
 
-#include <net/pfvar.h>
+#include <netpfil/pf/pf.h>
+#include <netpfil/pf/pf_altq.h>
+#include <netpfil/pf/pf_mtag.h>
 #include <altq/altq.h>
 #include <altq/altq_cbq.h>
 #ifdef ALTQ3_COMPAT
 #include <altq/altq_conf.h>
 #endif
 
 #ifdef ALTQ3_COMPAT
 /*
  * Local Data structures.
  */
 static cbq_state_t *cbq_list = NULL;
 #endif
 
 /*
  * Forward Declarations.
  */
 static int		 cbq_class_destroy(cbq_state_t *, struct rm_class *);
 static struct rm_class  *clh_to_clp(cbq_state_t *, u_int32_t);
 static int		 cbq_clear_interface(cbq_state_t *);
 static int		 cbq_request(struct ifaltq *, int, void *);
 static int		 cbq_enqueue(struct ifaltq *, struct mbuf *,
 			     struct altq_pktattr *);
 static struct mbuf	*cbq_dequeue(struct ifaltq *, int);
 static void		 cbqrestart(struct ifaltq *);
 static void		 get_class_stats(class_stats_t *, struct rm_class *);
 static void		 cbq_purge(cbq_state_t *);
 #ifdef ALTQ3_COMPAT
 static int	cbq_add_class(struct cbq_add_class *);
 static int	cbq_delete_class(struct cbq_delete_class *);
 static int	cbq_modify_class(struct cbq_modify_class *);
 static int 	cbq_class_create(cbq_state_t *, struct cbq_add_class *,
 				 struct rm_class *, struct rm_class *);
 static int	cbq_clear_hierarchy(struct cbq_interface *);
 static int	cbq_set_enable(struct cbq_interface *, int);
 static int	cbq_ifattach(struct cbq_interface *);
 static int	cbq_ifdetach(struct cbq_interface *);
 static int 	cbq_getstats(struct cbq_getstats *);
 
 static int	cbq_add_filter(struct cbq_add_filter *);
 static int	cbq_delete_filter(struct cbq_delete_filter *);
 #endif /* ALTQ3_COMPAT */
 
 /*
  * int
  * cbq_class_destroy(cbq_mod_state_t *, struct rm_class *) - This
  *	function destroys a given traffic class.  Before destroying
  *	the class, all traffic for that class is released.
  */
 static int
 cbq_class_destroy(cbq_state_t *cbqp, struct rm_class *cl)
 {
 	int	i;
 
 	/* delete the class */
 	rmc_delete_class(&cbqp->ifnp, cl);
 
 	/*
 	 * free the class handle
 	 */
 	for (i = 0; i < CBQ_MAX_CLASSES; i++)
 		if (cbqp->cbq_class_tbl[i] == cl)
 			cbqp->cbq_class_tbl[i] = NULL;
 
 	if (cl == cbqp->ifnp.root_)
 		cbqp->ifnp.root_ = NULL;
 	if (cl == cbqp->ifnp.default_)
 		cbqp->ifnp.default_ = NULL;
 #ifdef ALTQ3_COMPAT
 	if (cl == cbqp->ifnp.ctl_)
 		cbqp->ifnp.ctl_ = NULL;
 #endif
 	return (0);
 }
 
 /* convert class handle to class pointer */
 static struct rm_class *
 clh_to_clp(cbq_state_t *cbqp, u_int32_t chandle)
 {
 	int i;
 	struct rm_class *cl;
 
 	if (chandle == 0)
 		return (NULL);
 	/*
 	 * first, try optimistically the slot matching the lower bits of
 	 * the handle.  if it fails, do the linear table search.
 	 */
 	i = chandle % CBQ_MAX_CLASSES;
 	if ((cl = cbqp->cbq_class_tbl[i]) != NULL &&
 	    cl->stats_.handle == chandle)
 		return (cl);
 	for (i = 0; i < CBQ_MAX_CLASSES; i++)
 		if ((cl = cbqp->cbq_class_tbl[i]) != NULL &&
 		    cl->stats_.handle == chandle)
 			return (cl);
 	return (NULL);
 }
 
 static int
 cbq_clear_interface(cbq_state_t *cbqp)
 {
 	int		 again, i;
 	struct rm_class	*cl;
 
 #ifdef ALTQ3_CLFIER_COMPAT
 	/* free the filters for this interface */
 	acc_discard_filters(&cbqp->cbq_classifier, NULL, 1);
 #endif
 
 	/* clear out the classes now */
 	do {
 		again = 0;
 		for (i = 0; i < CBQ_MAX_CLASSES; i++) {
 			if ((cl = cbqp->cbq_class_tbl[i]) != NULL) {
 				if (is_a_parent_class(cl))
 					again++;
 				else {
 					cbq_class_destroy(cbqp, cl);
 					cbqp->cbq_class_tbl[i] = NULL;
 					if (cl == cbqp->ifnp.root_)
 						cbqp->ifnp.root_ = NULL;
 					if (cl == cbqp->ifnp.default_)
 						cbqp->ifnp.default_ = NULL;
 #ifdef ALTQ3_COMPAT
 					if (cl == cbqp->ifnp.ctl_)
 						cbqp->ifnp.ctl_ = NULL;
 #endif
 				}
 			}
 		}
 	} while (again);
 
 	return (0);
 }
 
 static int
 cbq_request(struct ifaltq *ifq, int req, void *arg)
 {
 	cbq_state_t	*cbqp = (cbq_state_t *)ifq->altq_disc;
 
 	IFQ_LOCK_ASSERT(ifq);
 
 	switch (req) {
 	case ALTRQ_PURGE:
 		cbq_purge(cbqp);
 		break;
 	}
 	return (0);
 }
 
 /* copy the stats info in rm_class to class_states_t */
 static void
 get_class_stats(class_stats_t *statsp, struct rm_class *cl)
 {
 	statsp->xmit_cnt	= cl->stats_.xmit_cnt;
 	statsp->drop_cnt	= cl->stats_.drop_cnt;
 	statsp->over		= cl->stats_.over;
 	statsp->borrows		= cl->stats_.borrows;
 	statsp->overactions	= cl->stats_.overactions;
 	statsp->delays		= cl->stats_.delays;
 
 	statsp->depth		= cl->depth_;
 	statsp->priority	= cl->pri_;
 	statsp->maxidle		= cl->maxidle_;
 	statsp->minidle		= cl->minidle_;
 	statsp->offtime		= cl->offtime_;
 	statsp->qmax		= qlimit(cl->q_);
 	statsp->ns_per_byte	= cl->ns_per_byte_;
 	statsp->wrr_allot	= cl->w_allotment_;
 	statsp->qcnt		= qlen(cl->q_);
 	statsp->avgidle		= cl->avgidle_;
 
 	statsp->qtype		= qtype(cl->q_);
 #ifdef ALTQ_RED
 	if (q_is_red(cl->q_))
 		red_getstats(cl->red_, &statsp->red[0]);
 #endif
 #ifdef ALTQ_RIO
 	if (q_is_rio(cl->q_))
 		rio_getstats((rio_t *)cl->red_, &statsp->red[0]);
 #endif
 }
 
 int
 cbq_pfattach(struct pf_altq *a)
 {
 	struct ifnet	*ifp;
 	int		 s, error;
 
 	if ((ifp = ifunit(a->ifname)) == NULL || a->altq_disc == NULL)
 		return (EINVAL);
 #ifdef __NetBSD__
 	s = splnet();
 #else
 	s = splimp();
 #endif
 	error = altq_attach(&ifp->if_snd, ALTQT_CBQ, a->altq_disc,
 	    cbq_enqueue, cbq_dequeue, cbq_request, NULL, NULL);
 	splx(s);
 	return (error);
 }
 
 int
 cbq_add_altq(struct pf_altq *a)
 {
 	cbq_state_t	*cbqp;
 	struct ifnet	*ifp;
 
 	if ((ifp = ifunit(a->ifname)) == NULL)
 		return (EINVAL);
 	if (!ALTQ_IS_READY(&ifp->if_snd))
 		return (ENODEV);
 
 	/* allocate and initialize cbq_state_t */
 	cbqp = malloc(sizeof(cbq_state_t), M_DEVBUF, M_NOWAIT | M_ZERO);
 	if (cbqp == NULL)
 		return (ENOMEM);
 	CALLOUT_INIT(&cbqp->cbq_callout);
 	cbqp->cbq_qlen = 0;
 	cbqp->ifnp.ifq_ = &ifp->if_snd;	    /* keep the ifq */
 
 	/* keep the state in pf_altq */
 	a->altq_disc = cbqp;
 
 	return (0);
 }
 
 int
 cbq_remove_altq(struct pf_altq *a)
 {
 	cbq_state_t	*cbqp;
 
 	if ((cbqp = a->altq_disc) == NULL)
 		return (EINVAL);
 	a->altq_disc = NULL;
 
 	cbq_clear_interface(cbqp);
 
 	if (cbqp->ifnp.default_)
 		cbq_class_destroy(cbqp, cbqp->ifnp.default_);
 	if (cbqp->ifnp.root_)
 		cbq_class_destroy(cbqp, cbqp->ifnp.root_);
 
 	/* deallocate cbq_state_t */
 	free(cbqp, M_DEVBUF);
 
 	return (0);
 }
 
 int
 cbq_add_queue(struct pf_altq *a)
 {
 	struct rm_class	*borrow, *parent;
 	cbq_state_t	*cbqp;
 	struct rm_class	*cl;
 	struct cbq_opts	*opts;
 	int		i;
 
 	if ((cbqp = a->altq_disc) == NULL)
 		return (EINVAL);
 	if (a->qid == 0)
 		return (EINVAL);
 
 	/*
 	 * find a free slot in the class table.  if the slot matching
 	 * the lower bits of qid is free, use this slot.  otherwise,
 	 * use the first free slot.
 	 */
 	i = a->qid % CBQ_MAX_CLASSES;
 	if (cbqp->cbq_class_tbl[i] != NULL) {
 		for (i = 0; i < CBQ_MAX_CLASSES; i++)
 			if (cbqp->cbq_class_tbl[i] == NULL)
 				break;
 		if (i == CBQ_MAX_CLASSES)
 			return (EINVAL);
 	}
 
 	opts = &a->pq_u.cbq_opts;
 	/* check parameters */
 	if (a->priority >= CBQ_MAXPRI)
 		return (EINVAL);
 
 	/* Get pointers to parent and borrow classes.  */
 	parent = clh_to_clp(cbqp, a->parent_qid);
 	if (opts->flags & CBQCLF_BORROW)
 		borrow = parent;
 	else
 		borrow = NULL;
 
 	/*
 	 * A class must borrow from it's parent or it can not
 	 * borrow at all.  Hence, borrow can be null.
 	 */
 	if (parent == NULL && (opts->flags & CBQCLF_ROOTCLASS) == 0) {
 		printf("cbq_add_queue: no parent class!\n");
 		return (EINVAL);
 	}
 
 	if ((borrow != parent)  && (borrow != NULL)) {
 		printf("cbq_add_class: borrow class != parent\n");
 		return (EINVAL);
 	}
 
 	/*
 	 * check parameters
 	 */
 	switch (opts->flags & CBQCLF_CLASSMASK) {
 	case CBQCLF_ROOTCLASS:
 		if (parent != NULL)
 			return (EINVAL);
 		if (cbqp->ifnp.root_)
 			return (EINVAL);
 		break;
 	case CBQCLF_DEFCLASS:
 		if (cbqp->ifnp.default_)
 			return (EINVAL);
 		break;
 	case 0:
 		if (a->qid == 0)
 			return (EINVAL);
 		break;
 	default:
 		/* more than two flags bits set */
 		return (EINVAL);
 	}
 
 	/*
 	 * create a class.  if this is a root class, initialize the
 	 * interface.
 	 */
 	if ((opts->flags & CBQCLF_CLASSMASK) == CBQCLF_ROOTCLASS) {
 		rmc_init(cbqp->ifnp.ifq_, &cbqp->ifnp, opts->ns_per_byte,
 		    cbqrestart, a->qlimit, RM_MAXQUEUED,
 		    opts->maxidle, opts->minidle, opts->offtime,
 		    opts->flags);
 		cl = cbqp->ifnp.root_;
 	} else {
 		cl = rmc_newclass(a->priority,
 				  &cbqp->ifnp, opts->ns_per_byte,
 				  rmc_delay_action, a->qlimit, parent, borrow,
 				  opts->maxidle, opts->minidle, opts->offtime,
 				  opts->pktsize, opts->flags);
 	}
 	if (cl == NULL)
 		return (ENOMEM);
 
 	/* return handle to user space. */
 	cl->stats_.handle = a->qid;
 	cl->stats_.depth = cl->depth_;
 
 	/* save the allocated class */
 	cbqp->cbq_class_tbl[i] = cl;
 
 	if ((opts->flags & CBQCLF_CLASSMASK) == CBQCLF_DEFCLASS)
 		cbqp->ifnp.default_ = cl;
 
 	return (0);
 }
 
 int
 cbq_remove_queue(struct pf_altq *a)
 {
 	struct rm_class	*cl;
 	cbq_state_t	*cbqp;
 	int		i;
 
 	if ((cbqp = a->altq_disc) == NULL)
 		return (EINVAL);
 
 	if ((cl = clh_to_clp(cbqp, a->qid)) == NULL)
 		return (EINVAL);
 
 	/* if we are a parent class, then return an error. */
 	if (is_a_parent_class(cl))
 		return (EINVAL);
 
 	/* delete the class */
 	rmc_delete_class(&cbqp->ifnp, cl);
 
 	/*
 	 * free the class handle
 	 */
 	for (i = 0; i < CBQ_MAX_CLASSES; i++)
 		if (cbqp->cbq_class_tbl[i] == cl) {
 			cbqp->cbq_class_tbl[i] = NULL;
 			if (cl == cbqp->ifnp.root_)
 				cbqp->ifnp.root_ = NULL;
 			if (cl == cbqp->ifnp.default_)
 				cbqp->ifnp.default_ = NULL;
 			break;
 		}
 
 	return (0);
 }
 
 int
 cbq_getqstats(struct pf_altq *a, void *ubuf, int *nbytes)
 {
 	cbq_state_t	*cbqp;
 	struct rm_class	*cl;
 	class_stats_t	 stats;
 	int		 error = 0;
 
 	if ((cbqp = altq_lookup(a->ifname, ALTQT_CBQ)) == NULL)
 		return (EBADF);
 
 	if ((cl = clh_to_clp(cbqp, a->qid)) == NULL)
 		return (EINVAL);
 
 	if (*nbytes < sizeof(stats))
 		return (EINVAL);
 
 	get_class_stats(&stats, cl);
 
 	if ((error = copyout((caddr_t)&stats, ubuf, sizeof(stats))) != 0)
 		return (error);
 	*nbytes = sizeof(stats);
 	return (0);
 }
 
 /*
  * int
  * cbq_enqueue(struct ifaltq *ifq, struct mbuf *m, struct altq_pktattr *pattr)
  *		- Queue data packets.
  *
  *	cbq_enqueue is set to ifp->if_altqenqueue and called by an upper
  *	layer (e.g. ether_output).  cbq_enqueue queues the given packet
  *	to the cbq, then invokes the driver's start routine.
  *
  *	Assumptions:	called in splimp
  *	Returns:	0 if the queueing is successful.
  *			ENOBUFS if a packet dropping occurred as a result of
  *			the queueing.
  */
 
 static int
 cbq_enqueue(struct ifaltq *ifq, struct mbuf *m, struct altq_pktattr *pktattr)
 {
 	cbq_state_t	*cbqp = (cbq_state_t *)ifq->altq_disc;
 	struct rm_class	*cl;
 	struct pf_mtag	*t;
 	int		 len;
 
 	IFQ_LOCK_ASSERT(ifq);
 
 	/* grab class set by classifier */
 	if ((m->m_flags & M_PKTHDR) == 0) {
 		/* should not happen */
 		printf("altq: packet for %s does not have pkthdr\n",
 		    ifq->altq_ifp->if_xname);
 		m_freem(m);
 		return (ENOBUFS);
 	}
 	cl = NULL;
 	if ((t = pf_find_mtag(m)) != NULL)
 		cl = clh_to_clp(cbqp, t->qid);
 #ifdef ALTQ3_COMPAT
 	else if ((ifq->altq_flags & ALTQF_CLASSIFY) && pktattr != NULL)
 		cl = pktattr->pattr_class;
 #endif
 	if (cl == NULL) {
 		cl = cbqp->ifnp.default_;
 		if (cl == NULL) {
 			m_freem(m);
 			return (ENOBUFS);
 		}
 	}
 #ifdef ALTQ3_COMPAT
 	if (pktattr != NULL)
 		cl->pktattr_ = pktattr;  /* save proto hdr used by ECN */
 	else
 #endif
 		cl->pktattr_ = NULL;
 	len = m_pktlen(m);
 	if (rmc_queue_packet(cl, m) != 0) {
 		/* drop occurred.  some mbuf was freed in rmc_queue_packet. */
 		PKTCNTR_ADD(&cl->stats_.drop_cnt, len);
 		return (ENOBUFS);
 	}
 
 	/* successfully queued. */
 	++cbqp->cbq_qlen;
 	IFQ_INC_LEN(ifq);
 	return (0);
 }
 
 static struct mbuf *
 cbq_dequeue(struct ifaltq *ifq, int op)
 {
 	cbq_state_t	*cbqp = (cbq_state_t *)ifq->altq_disc;
 	struct mbuf	*m;
 
 	IFQ_LOCK_ASSERT(ifq);
 
 	m = rmc_dequeue_next(&cbqp->ifnp, op);
 
 	if (m && op == ALTDQ_REMOVE) {
 		--cbqp->cbq_qlen;  /* decrement # of packets in cbq */
 		IFQ_DEC_LEN(ifq);
 
 		/* Update the class. */
 		rmc_update_class_util(&cbqp->ifnp);
 	}
 	return (m);
 }
 
 /*
  * void
  * cbqrestart(queue_t *) - Restart sending of data.
  * called from rmc_restart in splimp via timeout after waking up
  * a suspended class.
  *	Returns:	NONE
  */
 
 static void
 cbqrestart(struct ifaltq *ifq)
 {
 	cbq_state_t	*cbqp;
 	struct ifnet	*ifp;
 
 	IFQ_LOCK_ASSERT(ifq);
 
 	if (!ALTQ_IS_ENABLED(ifq))
 		/* cbq must have been detached */
 		return;
 
 	if ((cbqp = (cbq_state_t *)ifq->altq_disc) == NULL)
 		/* should not happen */
 		return;
 
 	ifp = ifq->altq_ifp;
 	if (ifp->if_start &&
 	    cbqp->cbq_qlen > 0 && (ifp->if_drv_flags & IFF_DRV_OACTIVE) == 0) {
 	    	IFQ_UNLOCK(ifq);
 		(*ifp->if_start)(ifp);
 		IFQ_LOCK(ifq);
 	}
 }
 
 static void cbq_purge(cbq_state_t *cbqp)
 {
 	struct rm_class	*cl;
 	int		 i;
 
 	for (i = 0; i < CBQ_MAX_CLASSES; i++)
 		if ((cl = cbqp->cbq_class_tbl[i]) != NULL)
 			rmc_dropall(cl);
 	if (ALTQ_IS_ENABLED(cbqp->ifnp.ifq_))
 		cbqp->ifnp.ifq_->ifq_len = 0;
 }
 #ifdef ALTQ3_COMPAT
 
 static int
 cbq_add_class(acp)
 	struct cbq_add_class *acp;
 {
 	char		*ifacename;
 	struct rm_class	*borrow, *parent;
 	cbq_state_t	*cbqp;
 
 	ifacename = acp->cbq_iface.cbq_ifacename;
 	if ((cbqp = altq_lookup(ifacename, ALTQT_CBQ)) == NULL)
 		return (EBADF);
 
 	/* check parameters */
 	if (acp->cbq_class.priority >= CBQ_MAXPRI ||
 	    acp->cbq_class.maxq > CBQ_MAXQSIZE)
 		return (EINVAL);
 
 	/* Get pointers to parent and borrow classes.  */
 	parent = clh_to_clp(cbqp, acp->cbq_class.parent_class_handle);
 	borrow = clh_to_clp(cbqp, acp->cbq_class.borrow_class_handle);
 
 	/*
 	 * A class must borrow from it's parent or it can not
 	 * borrow at all.  Hence, borrow can be null.
 	 */
 	if (parent == NULL && (acp->cbq_class.flags & CBQCLF_ROOTCLASS) == 0) {
 		printf("cbq_add_class: no parent class!\n");
 		return (EINVAL);
 	}
 
 	if ((borrow != parent)  && (borrow != NULL)) {
 		printf("cbq_add_class: borrow class != parent\n");
 		return (EINVAL);
 	}
 
 	return cbq_class_create(cbqp, acp, parent, borrow);
 }
 
 static int
 cbq_delete_class(dcp)
 	struct cbq_delete_class *dcp;
 {
 	char		*ifacename;
 	struct rm_class	*cl;
 	cbq_state_t	*cbqp;
 
 	ifacename = dcp->cbq_iface.cbq_ifacename;
 	if ((cbqp = altq_lookup(ifacename, ALTQT_CBQ)) == NULL)
 		return (EBADF);
 
 	if ((cl = clh_to_clp(cbqp, dcp->cbq_class_handle)) == NULL)
 		return (EINVAL);
 
 	/* if we are a parent class, then return an error. */
 	if (is_a_parent_class(cl))
 		return (EINVAL);
 
 	/* if a filter has a reference to this class delete the filter */
 	acc_discard_filters(&cbqp->cbq_classifier, cl, 0);
 
 	return cbq_class_destroy(cbqp, cl);
 }
 
 static int
 cbq_modify_class(acp)
 	struct cbq_modify_class *acp;
 {
 	char		*ifacename;
 	struct rm_class	*cl;
 	cbq_state_t	*cbqp;
 
 	ifacename = acp->cbq_iface.cbq_ifacename;
 	if ((cbqp = altq_lookup(ifacename, ALTQT_CBQ)) == NULL)
 		return (EBADF);
 
 	/* Get pointer to this class */
 	if ((cl = clh_to_clp(cbqp, acp->cbq_class_handle)) == NULL)
 		return (EINVAL);
 
 	if (rmc_modclass(cl, acp->cbq_class.nano_sec_per_byte,
 			 acp->cbq_class.maxq, acp->cbq_class.maxidle,
 			 acp->cbq_class.minidle, acp->cbq_class.offtime,
 			 acp->cbq_class.pktsize) < 0)
 		return (EINVAL);
 	return (0);
 }
 
 /*
  * struct rm_class *
  * cbq_class_create(cbq_mod_state_t *cbqp, struct cbq_add_class *acp,
  *		struct rm_class *parent, struct rm_class *borrow)
  *
  * This function create a new traffic class in the CBQ class hierarchy of
  * given paramters.  The class that created is either the root, default,
  * or a new dynamic class.  If CBQ is not initilaized, the the root class
  * will be created.
  */
 static int
 cbq_class_create(cbqp, acp, parent, borrow)
 	cbq_state_t *cbqp;
 	struct cbq_add_class *acp;
 	struct rm_class *parent, *borrow;
 {
 	struct rm_class	*cl;
 	cbq_class_spec_t *spec = &acp->cbq_class;
 	u_int32_t	chandle;
 	int		i;
 
 	/*
 	 * allocate class handle
 	 */
 	for (i = 1; i < CBQ_MAX_CLASSES; i++)
 		if (cbqp->cbq_class_tbl[i] == NULL)
 			break;
 	if (i == CBQ_MAX_CLASSES)
 		return (EINVAL);
 	chandle = i;	/* use the slot number as class handle */
 
 	/*
 	 * create a class.  if this is a root class, initialize the
 	 * interface.
 	 */
 	if ((spec->flags & CBQCLF_CLASSMASK) == CBQCLF_ROOTCLASS) {
 		rmc_init(cbqp->ifnp.ifq_, &cbqp->ifnp, spec->nano_sec_per_byte,
 			 cbqrestart, spec->maxq, RM_MAXQUEUED,
 			 spec->maxidle, spec->minidle, spec->offtime,
 			 spec->flags);
 		cl = cbqp->ifnp.root_;
 	} else {
 		cl = rmc_newclass(spec->priority,
 				  &cbqp->ifnp, spec->nano_sec_per_byte,
 				  rmc_delay_action, spec->maxq, parent, borrow,
 				  spec->maxidle, spec->minidle, spec->offtime,
 				  spec->pktsize, spec->flags);
 	}
 	if (cl == NULL)
 		return (ENOMEM);
 
 	/* return handle to user space. */
 	acp->cbq_class_handle = chandle;
 
 	cl->stats_.handle = chandle;
 	cl->stats_.depth = cl->depth_;
 
 	/* save the allocated class */
 	cbqp->cbq_class_tbl[i] = cl;
 
 	if ((spec->flags & CBQCLF_CLASSMASK) == CBQCLF_DEFCLASS)
 		cbqp->ifnp.default_ = cl;
 	if ((spec->flags & CBQCLF_CLASSMASK) == CBQCLF_CTLCLASS)
 		cbqp->ifnp.ctl_ = cl;
 
 	return (0);
 }
 
 static int
 cbq_add_filter(afp)
 	struct cbq_add_filter *afp;
 {
 	char		*ifacename;
 	cbq_state_t	*cbqp;
 	struct rm_class	*cl;
 
 	ifacename = afp->cbq_iface.cbq_ifacename;
 	if ((cbqp = altq_lookup(ifacename, ALTQT_CBQ)) == NULL)
 		return (EBADF);
 
 	/* Get the pointer to class. */
 	if ((cl = clh_to_clp(cbqp, afp->cbq_class_handle)) == NULL)
 		return (EINVAL);
 
 	return acc_add_filter(&cbqp->cbq_classifier, &afp->cbq_filter,
 			      cl, &afp->cbq_filter_handle);
 }
 
 static int
 cbq_delete_filter(dfp)
 	struct cbq_delete_filter *dfp;
 {
 	char		*ifacename;
 	cbq_state_t	*cbqp;
 
 	ifacename = dfp->cbq_iface.cbq_ifacename;
 	if ((cbqp = altq_lookup(ifacename, ALTQT_CBQ)) == NULL)
 		return (EBADF);
 
 	return acc_delete_filter(&cbqp->cbq_classifier,
 				 dfp->cbq_filter_handle);
 }
 
 /*
  * cbq_clear_hierarchy deletes all classes and their filters on the
  * given interface.
  */
 static int
 cbq_clear_hierarchy(ifacep)
 	struct cbq_interface *ifacep;
 {
 	char		*ifacename;
 	cbq_state_t	*cbqp;
 
 	ifacename = ifacep->cbq_ifacename;
 	if ((cbqp = altq_lookup(ifacename, ALTQT_CBQ)) == NULL)
 		return (EBADF);
 
 	return cbq_clear_interface(cbqp);
 }
 
 /*
  * static int
  * cbq_set_enable(struct cbq_enable *ep) - this function processed the
  *	ioctl request to enable class based queueing.  It searches the list
  *	of interfaces for the specified interface and then enables CBQ on
  *	that interface.
  *
  *	Returns:	0, for no error.
  *			EBADF, for specified inteface not found.
  */
 
 static int
 cbq_set_enable(ep, enable)
 	struct cbq_interface *ep;
 	int enable;
 {
 	int 	error = 0;
 	cbq_state_t	*cbqp;
 	char 	*ifacename;
 
 	ifacename = ep->cbq_ifacename;
 	if ((cbqp = altq_lookup(ifacename, ALTQT_CBQ)) == NULL)
 		return (EBADF);
 
 	switch (enable) {
 	case ENABLE:
 		if (cbqp->ifnp.root_ == NULL || cbqp->ifnp.default_ == NULL ||
 		    cbqp->ifnp.ctl_ == NULL) {
 			if (cbqp->ifnp.root_ == NULL)
 				printf("No Root Class for %s\n", ifacename);
 			if (cbqp->ifnp.default_ == NULL)
 				printf("No Default Class for %s\n", ifacename);
 			if (cbqp->ifnp.ctl_ == NULL)
 				printf("No Control Class for %s\n", ifacename);
 			error = EINVAL;
 		} else if ((error = altq_enable(cbqp->ifnp.ifq_)) == 0) {
 			cbqp->cbq_qlen = 0;
 		}
 		break;
 
 	case DISABLE:
 		error = altq_disable(cbqp->ifnp.ifq_);
 		break;
 	}
 	return (error);
 }
 
 static int
 cbq_getstats(gsp)
 	struct cbq_getstats *gsp;
 {
 	char		*ifacename;
 	int		i, n, nclasses;
 	cbq_state_t	*cbqp;
 	struct rm_class	*cl;
 	class_stats_t	stats, *usp;
 	int error = 0;
 
 	ifacename = gsp->iface.cbq_ifacename;
 	nclasses = gsp->nclasses;
 	usp = gsp->stats;
 
 	if ((cbqp = altq_lookup(ifacename, ALTQT_CBQ)) == NULL)
 		return (EBADF);
 	if (nclasses <= 0)
 		return (EINVAL);
 
 	for (n = 0, i = 0; n < nclasses && i < CBQ_MAX_CLASSES; n++, i++) {
 		while ((cl = cbqp->cbq_class_tbl[i]) == NULL)
 			if (++i >= CBQ_MAX_CLASSES)
 				goto out;
 
 		get_class_stats(&stats, cl);
 		stats.handle = cl->stats_.handle;
 
 		if ((error = copyout((caddr_t)&stats, (caddr_t)usp++,
 		    sizeof(stats))) != 0)
 			return (error);
 	}
 
  out:
 	gsp->nclasses = n;
 	return (error);
 }
 
 static int
 cbq_ifattach(ifacep)
 	struct cbq_interface *ifacep;
 {
 	int		error = 0;
 	char		*ifacename;
 	cbq_state_t	*new_cbqp;
 	struct ifnet 	*ifp;
 
 	ifacename = ifacep->cbq_ifacename;
 	if ((ifp = ifunit(ifacename)) == NULL)
 		return (ENXIO);
 	if (!ALTQ_IS_READY(&ifp->if_snd))
 		return (ENXIO);
 
 	/* allocate and initialize cbq_state_t */
 	new_cbqp = malloc(sizeof(cbq_state_t), M_DEVBUF, M_WAITOK);
 	if (new_cbqp == NULL)
 		return (ENOMEM);
 	bzero(new_cbqp, sizeof(cbq_state_t));
  	CALLOUT_INIT(&new_cbqp->cbq_callout);
 
 	new_cbqp->cbq_qlen = 0;
 	new_cbqp->ifnp.ifq_ = &ifp->if_snd;	    /* keep the ifq */
 
 	/*
 	 * set CBQ to this ifnet structure.
 	 */
 	error = altq_attach(&ifp->if_snd, ALTQT_CBQ, new_cbqp,
 			    cbq_enqueue, cbq_dequeue, cbq_request,
 			    &new_cbqp->cbq_classifier, acc_classify);
 	if (error) {
 		free(new_cbqp, M_DEVBUF);
 		return (error);
 	}
 
 	/* prepend to the list of cbq_state_t's. */
 	new_cbqp->cbq_next = cbq_list;
 	cbq_list = new_cbqp;
 
 	return (0);
 }
 
 static int
 cbq_ifdetach(ifacep)
 	struct cbq_interface *ifacep;
 {
 	char		*ifacename;
 	cbq_state_t 	*cbqp;
 
 	ifacename = ifacep->cbq_ifacename;
 	if ((cbqp = altq_lookup(ifacename, ALTQT_CBQ)) == NULL)
 		return (EBADF);
 
 	(void)cbq_set_enable(ifacep, DISABLE);
 
 	cbq_clear_interface(cbqp);
 
 	/* remove CBQ from the ifnet structure. */
 	(void)altq_detach(cbqp->ifnp.ifq_);
 
 	/* remove from the list of cbq_state_t's. */
 	if (cbq_list == cbqp)
 		cbq_list = cbqp->cbq_next;
 	else {
 		cbq_state_t *cp;
 
 		for (cp = cbq_list; cp != NULL; cp = cp->cbq_next)
 			if (cp->cbq_next == cbqp) {
 				cp->cbq_next = cbqp->cbq_next;
 				break;
 			}
 		ASSERT(cp != NULL);
 	}
 
 	/* deallocate cbq_state_t */
 	free(cbqp, M_DEVBUF);
 
 	return (0);
 }
 
 /*
  * cbq device interface
  */
 
 altqdev_decl(cbq);
 
 int
 cbqopen(dev, flag, fmt, p)
 	dev_t dev;
 	int flag, fmt;
 #if (__FreeBSD_version > 500000)
 	struct thread *p;
 #else
 	struct proc *p;
 #endif
 {
 	return (0);
 }
 
 int
 cbqclose(dev, flag, fmt, p)
 	dev_t dev;
 	int flag, fmt;
 #if (__FreeBSD_version > 500000)
 	struct thread *p;
 #else
 	struct proc *p;
 #endif
 {
 	struct ifnet *ifp;
 	struct cbq_interface iface;
 	int err, error = 0;
 
 	while (cbq_list) {
 		ifp = cbq_list->ifnp.ifq_->altq_ifp;
 		sprintf(iface.cbq_ifacename, "%s", ifp->if_xname);
 		err = cbq_ifdetach(&iface);
 		if (err != 0 && error == 0)
 			error = err;
 	}
 
 	return (error);
 }
 
 int
 cbqioctl(dev, cmd, addr, flag, p)
 	dev_t dev;
 	ioctlcmd_t cmd;
 	caddr_t addr;
 	int flag;
 #if (__FreeBSD_version > 500000)
 	struct thread *p;
 #else
 	struct proc *p;
 #endif
 {
 	int	error = 0;
 
 	/* check cmd for superuser only */
 	switch (cmd) {
 	case CBQ_GETSTATS:
 		/* currently only command that an ordinary user can call */
 		break;
 	default:
 #if (__FreeBSD_version > 700000)
 		error = priv_check(p, PRIV_ALTQ_MANAGE);
 #elsif (__FreeBSD_version > 400000)
 		error = suser(p);
 #else
 		error = suser(p->p_ucred, &p->p_acflag);
 #endif
 		if (error)
 			return (error);
 		break;
 	}
 
 	switch (cmd) {
 
 	case CBQ_ENABLE:
 		error = cbq_set_enable((struct cbq_interface *)addr, ENABLE);
 		break;
 
 	case CBQ_DISABLE:
 		error = cbq_set_enable((struct cbq_interface *)addr, DISABLE);
 		break;
 
 	case CBQ_ADD_FILTER:
 		error = cbq_add_filter((struct cbq_add_filter *)addr);
 		break;
 
 	case CBQ_DEL_FILTER:
 		error = cbq_delete_filter((struct cbq_delete_filter *)addr);
 		break;
 
 	case CBQ_ADD_CLASS:
 		error = cbq_add_class((struct cbq_add_class *)addr);
 		break;
 
 	case CBQ_DEL_CLASS:
 		error = cbq_delete_class((struct cbq_delete_class *)addr);
 		break;
 
 	case CBQ_MODIFY_CLASS:
 		error = cbq_modify_class((struct cbq_modify_class *)addr);
 		break;
 
 	case CBQ_CLEAR_HIERARCHY:
 		error = cbq_clear_hierarchy((struct cbq_interface *)addr);
 		break;
 
 	case CBQ_IF_ATTACH:
 		error = cbq_ifattach((struct cbq_interface *)addr);
 		break;
 
 	case CBQ_IF_DETACH:
 		error = cbq_ifdetach((struct cbq_interface *)addr);
 		break;
 
 	case CBQ_GETSTATS:
 		error = cbq_getstats((struct cbq_getstats *)addr);
 		break;
 
 	default:
 		error = EINVAL;
 		break;
 	}
 
 	return error;
 }
 
 #if 0
 /* for debug */
 static void cbq_class_dump(int);
 
 static void cbq_class_dump(i)
 	int i;
 {
 	struct rm_class *cl;
 	rm_class_stats_t *s;
 	struct _class_queue_ *q;
 
 	if (cbq_list == NULL) {
 		printf("cbq_class_dump: no cbq_state found\n");
 		return;
 	}
 	cl = cbq_list->cbq_class_tbl[i];
 
 	printf("class %d cl=%p\n", i, cl);
 	if (cl != NULL) {
 		s = &cl->stats_;
 		q = cl->q_;
 
 		printf("pri=%d, depth=%d, maxrate=%d, allotment=%d\n",
 		       cl->pri_, cl->depth_, cl->maxrate_, cl->allotment_);
 		printf("w_allotment=%d, bytes_alloc=%d, avgidle=%d, maxidle=%d\n",
 		       cl->w_allotment_, cl->bytes_alloc_, cl->avgidle_,
 		       cl->maxidle_);
 		printf("minidle=%d, offtime=%d, sleeping=%d, leaf=%d\n",
 		       cl->minidle_, cl->offtime_, cl->sleeping_, cl->leaf_);
 		printf("handle=%d, depth=%d, packets=%d, bytes=%d\n",
 		       s->handle, s->depth,
 		       (int)s->xmit_cnt.packets, (int)s->xmit_cnt.bytes);
 		printf("over=%d\n, borrows=%d, drops=%d, overactions=%d, delays=%d\n",
 		       s->over, s->borrows, (int)s->drop_cnt.packets,
 		       s->overactions, s->delays);
 		printf("tail=%p, head=%p, qlen=%d, qlim=%d, qthresh=%d,qtype=%d\n",
 		       q->tail_, q->head_, q->qlen_, q->qlim_,
 		       q->qthresh_, q->qtype_);
 	}
 }
 #endif /* 0 */
 
 #ifdef KLD_MODULE
 
 static struct altqsw cbq_sw =
 	{"cbq", cbqopen, cbqclose, cbqioctl};
 
 ALTQ_MODULE(altq_cbq, ALTQT_CBQ, &cbq_sw);
 MODULE_DEPEND(altq_cbq, altq_red, 1, 1, 1);
 MODULE_DEPEND(altq_cbq, altq_rio, 1, 1, 1);
 
 #endif /* KLD_MODULE */
 #endif /* ALTQ3_COMPAT */
 
 #endif /* ALTQ_CBQ */
Index: stable/10/sys/contrib/altq/altq/altq_cdnr.c
===================================================================
--- stable/10/sys/contrib/altq/altq/altq_cdnr.c	(revision 263085)
+++ stable/10/sys/contrib/altq/altq/altq_cdnr.c	(revision 263086)
@@ -1,1389 +1,1390 @@
 /*	$FreeBSD$	*/
 /*	$KAME: altq_cdnr.c,v 1.15 2005/04/13 03:44:24 suz Exp $	*/
 
 /*
  * Copyright (C) 1999-2002
  *	Sony Computer Science Laboratories Inc.  All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY SONY CSL AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL SONY CSL OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  */
 
 #if defined(__FreeBSD__) || defined(__NetBSD__)
 #include "opt_altq.h"
 #include "opt_inet.h"
 #ifdef __FreeBSD__
 #include "opt_inet6.h"
 #endif
 #endif /* __FreeBSD__ || __NetBSD__ */
 
 #include <sys/param.h>
 #include <sys/malloc.h>
 #include <sys/mbuf.h>
 #include <sys/socket.h>
 #include <sys/sockio.h>
 #include <sys/systm.h>
 #include <sys/proc.h>
 #include <sys/errno.h>
 #include <sys/kernel.h>
 #include <sys/queue.h>
 
 #include <net/if.h>
 #include <net/if_types.h>
 #include <netinet/in.h>
 #include <netinet/in_systm.h>
 #include <netinet/ip.h>
 #ifdef INET6
 #include <netinet/ip6.h>
 #endif
 
+#include <altq/if_altq.h>
 #include <altq/altq.h>
 #ifdef ALTQ3_COMPAT
 #include <altq/altq_conf.h>
 #endif
 #include <altq/altq_cdnr.h>
 
 #ifdef ALTQ3_COMPAT
 /*
  * diffserv traffic conditioning module
  */
 
 int altq_cdnr_enabled = 0;
 
 /* traffic conditioner is enabled by ALTQ_CDNR option in opt_altq.h */
 #ifdef ALTQ_CDNR
 
 /* cdnr_list keeps all cdnr's allocated. */
 static LIST_HEAD(, top_cdnr) tcb_list;
 
 static int altq_cdnr_input(struct mbuf *, int);
 static struct top_cdnr *tcb_lookup(char *ifname);
 static struct cdnr_block *cdnr_handle2cb(u_long);
 static u_long cdnr_cb2handle(struct cdnr_block *);
 static void *cdnr_cballoc(struct top_cdnr *, int,
        struct tc_action *(*)(struct cdnr_block *, struct cdnr_pktinfo *));
 static void cdnr_cbdestroy(void *);
 static int tca_verify_action(struct tc_action *);
 static void tca_import_action(struct tc_action *, struct tc_action *);
 static void tca_invalidate_action(struct tc_action *);
 
 static int generic_element_destroy(struct cdnr_block *);
 static struct top_cdnr *top_create(struct ifaltq *);
 static int top_destroy(struct top_cdnr *);
 static struct cdnr_block *element_create(struct top_cdnr *, struct tc_action *);
 static int element_destroy(struct cdnr_block *);
 static void tb_import_profile(struct tbe *, struct tb_profile *);
 static struct tbmeter *tbm_create(struct top_cdnr *, struct tb_profile *,
 				  struct tc_action *, struct tc_action *);
 static int tbm_destroy(struct tbmeter *);
 static struct tc_action *tbm_input(struct cdnr_block *, struct cdnr_pktinfo *);
 static struct trtcm *trtcm_create(struct top_cdnr *,
 		  struct tb_profile *, struct tb_profile *,
 		  struct tc_action *, struct tc_action *, struct tc_action *,
 		  int);
 static int trtcm_destroy(struct trtcm *);
 static struct tc_action *trtcm_input(struct cdnr_block *, struct cdnr_pktinfo *);
 static struct tswtcm *tswtcm_create(struct top_cdnr *,
 		  u_int32_t, u_int32_t, u_int32_t,
 		  struct tc_action *, struct tc_action *, struct tc_action *);
 static int tswtcm_destroy(struct tswtcm *);
 static struct tc_action *tswtcm_input(struct cdnr_block *, struct cdnr_pktinfo *);
 
 static int cdnrcmd_if_attach(char *);
 static int cdnrcmd_if_detach(char *);
 static int cdnrcmd_add_element(struct cdnr_add_element *);
 static int cdnrcmd_delete_element(struct cdnr_delete_element *);
 static int cdnrcmd_add_filter(struct cdnr_add_filter *);
 static int cdnrcmd_delete_filter(struct cdnr_delete_filter *);
 static int cdnrcmd_add_tbm(struct cdnr_add_tbmeter *);
 static int cdnrcmd_modify_tbm(struct cdnr_modify_tbmeter *);
 static int cdnrcmd_tbm_stats(struct cdnr_tbmeter_stats *);
 static int cdnrcmd_add_trtcm(struct cdnr_add_trtcm *);
 static int cdnrcmd_modify_trtcm(struct cdnr_modify_trtcm *);
 static int cdnrcmd_tcm_stats(struct cdnr_tcm_stats *);
 static int cdnrcmd_add_tswtcm(struct cdnr_add_tswtcm *);
 static int cdnrcmd_modify_tswtcm(struct cdnr_modify_tswtcm *);
 static int cdnrcmd_get_stats(struct cdnr_get_stats *);
 
 altqdev_decl(cdnr);
 
 /*
  * top level input function called from ip_input.
  * should be called before converting header fields to host-byte-order.
  */
 int
 altq_cdnr_input(m, af)
 	struct mbuf	*m;
 	int		af;	/* address family */
 {
 	struct ifnet		*ifp;
 	struct ip		*ip;
 	struct top_cdnr		*top;
 	struct tc_action	*tca;
 	struct cdnr_block	*cb;
 	struct cdnr_pktinfo	pktinfo;
 
 	ifp = m->m_pkthdr.rcvif;
 	if (!ALTQ_IS_CNDTNING(&ifp->if_snd))
 		/* traffic conditioner is not enabled on this interface */
 		return (1);
 
 	top = ifp->if_snd.altq_cdnr;
 
 	ip = mtod(m, struct ip *);
 #ifdef INET6
 	if (af == AF_INET6) {
 		u_int32_t flowlabel;
 
 		flowlabel = ((struct ip6_hdr *)ip)->ip6_flow;
 		pktinfo.pkt_dscp = (ntohl(flowlabel) >> 20) & DSCP_MASK;
 	} else
 #endif
 		pktinfo.pkt_dscp = ip->ip_tos & DSCP_MASK;
 	pktinfo.pkt_len = m_pktlen(m);
 
 	tca = NULL;
 
 	cb = acc_classify(&top->tc_classifier, m, af);
 	if (cb != NULL)
 		tca = &cb->cb_action;
 
 	if (tca == NULL)
 		tca = &top->tc_block.cb_action;
 
 	while (1) {
 		PKTCNTR_ADD(&top->tc_cnts[tca->tca_code], pktinfo.pkt_len);
 
 		switch (tca->tca_code) {
 		case TCACODE_PASS:
 			return (1);
 		case TCACODE_DROP:
 			m_freem(m);
 			return (0);
 		case TCACODE_RETURN:
 			return (0);
 		case TCACODE_MARK:
 #ifdef INET6
 			if (af == AF_INET6) {
 				struct ip6_hdr *ip6 = (struct ip6_hdr *)ip;
 				u_int32_t flowlabel;
 
 				flowlabel = ntohl(ip6->ip6_flow);
 				flowlabel = (tca->tca_dscp << 20) |
 					(flowlabel & ~(DSCP_MASK << 20));
 				ip6->ip6_flow = htonl(flowlabel);
 			} else
 #endif
 				ip->ip_tos = tca->tca_dscp |
 					(ip->ip_tos & DSCP_CUMASK);
 			return (1);
 		case TCACODE_NEXT:
 			cb = tca->tca_next;
 			tca = (*cb->cb_input)(cb, &pktinfo);
 			break;
 		case TCACODE_NONE:
 		default:
 			return (1);
 		}
 	}
 }
 
 static struct top_cdnr *
 tcb_lookup(ifname)
 	char *ifname;
 {
 	struct top_cdnr *top;
 	struct ifnet *ifp;
 
 	if ((ifp = ifunit(ifname)) != NULL)
 		LIST_FOREACH(top, &tcb_list, tc_next)
 			if (top->tc_ifq->altq_ifp == ifp)
 				return (top);
 	return (NULL);
 }
 
 static struct cdnr_block *
 cdnr_handle2cb(handle)
 	u_long handle;
 {
 	struct cdnr_block *cb;
 
 	cb = (struct cdnr_block *)handle;
 	if (handle != ALIGN(cb))
 		return (NULL);
 
 	if (cb == NULL || cb->cb_handle != handle)
 		return (NULL);
 	return (cb);
 }
 
 static u_long
 cdnr_cb2handle(cb)
 	struct cdnr_block *cb;
 {
 	return (cb->cb_handle);
 }
 
 static void *
 cdnr_cballoc(top, type, input_func)
 	struct top_cdnr *top;
 	int type;
 	struct tc_action *(*input_func)(struct cdnr_block *,
 					struct cdnr_pktinfo *);
 {
 	struct cdnr_block *cb;
 	int size;
 
 	switch (type) {
 	case TCETYPE_TOP:
 		size = sizeof(struct top_cdnr);
 		break;
 	case TCETYPE_ELEMENT:
 		size = sizeof(struct cdnr_block);
 		break;
 	case TCETYPE_TBMETER:
 		size = sizeof(struct tbmeter);
 		break;
 	case TCETYPE_TRTCM:
 		size = sizeof(struct trtcm);
 		break;
 	case TCETYPE_TSWTCM:
 		size = sizeof(struct tswtcm);
 		break;
 	default:
 		return (NULL);
 	}
 
 	cb = malloc(size, M_DEVBUF, M_WAITOK);
 	if (cb == NULL)
 		return (NULL);
 	bzero(cb, size);
 
 	cb->cb_len = size;
 	cb->cb_type = type;
 	cb->cb_ref = 0;
 	cb->cb_handle = (u_long)cb;
 	if (top == NULL)
 		cb->cb_top = (struct top_cdnr *)cb;
 	else
 		cb->cb_top = top;
 
 	if (input_func != NULL) {
 		/*
 		 * if this cdnr has an action function,
 		 * make tc_action to call itself.
 		 */
 		cb->cb_action.tca_code = TCACODE_NEXT;
 		cb->cb_action.tca_next = cb;
 		cb->cb_input = input_func;
 	} else
 		cb->cb_action.tca_code = TCACODE_NONE;
 
 	/* if this isn't top, register the element to the top level cdnr */
 	if (top != NULL)
 		LIST_INSERT_HEAD(&top->tc_elements, cb, cb_next);
 
 	return ((void *)cb);
 }
 
 static void
 cdnr_cbdestroy(cblock)
 	void *cblock;
 {
 	struct cdnr_block *cb = cblock;
 
 	/* delete filters belonging to this cdnr */
 	acc_discard_filters(&cb->cb_top->tc_classifier, cb, 0);
 
 	/* remove from the top level cdnr */
 	if (cb->cb_top != cblock)
 		LIST_REMOVE(cb, cb_next);
 
 	free(cb, M_DEVBUF);
 }
 
 /*
  * conditioner common destroy routine
  */
 static int
 generic_element_destroy(cb)
 	struct cdnr_block *cb;
 {
 	int error = 0;
 
 	switch (cb->cb_type) {
 	case TCETYPE_TOP:
 		error = top_destroy((struct top_cdnr *)cb);
 		break;
 	case TCETYPE_ELEMENT:
 		error = element_destroy(cb);
 		break;
 	case TCETYPE_TBMETER:
 		error = tbm_destroy((struct tbmeter *)cb);
 		break;
 	case TCETYPE_TRTCM:
 		error = trtcm_destroy((struct trtcm *)cb);
 		break;
 	case TCETYPE_TSWTCM:
 		error = tswtcm_destroy((struct tswtcm *)cb);
 		break;
 	default:
 		error = EINVAL;
 	}
 	return (error);
 }
 
 static int
 tca_verify_action(utca)
 	struct tc_action *utca;
 {
 	switch (utca->tca_code) {
 	case TCACODE_PASS:
 	case TCACODE_DROP:
 	case TCACODE_MARK:
 		/* these are ok */
 		break;
 
 	case TCACODE_HANDLE:
 		/* verify handle value */
 		if (cdnr_handle2cb(utca->tca_handle) == NULL)
 			return (-1);
 		break;
 
 	case TCACODE_NONE:
 	case TCACODE_RETURN:
 	case TCACODE_NEXT:
 	default:
 		/* should not be passed from a user */
 		return (-1);
 	}
 	return (0);
 }
 
 static void
 tca_import_action(ktca, utca)
 	struct tc_action *ktca, *utca;
 {
 	struct cdnr_block *cb;
 
 	*ktca = *utca;
 	if (ktca->tca_code == TCACODE_HANDLE) {
 		cb = cdnr_handle2cb(ktca->tca_handle);
 		if (cb == NULL) {
 			ktca->tca_code = TCACODE_NONE;
 			return;
 		}
 		ktca->tca_code = TCACODE_NEXT;
 		ktca->tca_next = cb;
 		cb->cb_ref++;
 	} else if (ktca->tca_code == TCACODE_MARK) {
 		ktca->tca_dscp &= DSCP_MASK;
 	}
 	return;
 }
 
 static void
 tca_invalidate_action(tca)
 	struct tc_action *tca;
 {
 	struct cdnr_block *cb;
 
 	if (tca->tca_code == TCACODE_NEXT) {
 		cb = tca->tca_next;
 		if (cb == NULL)
 			return;
 		cb->cb_ref--;
 	}
 	tca->tca_code = TCACODE_NONE;
 }
 
 /*
  * top level traffic conditioner
  */
 static struct top_cdnr *
 top_create(ifq)
 	struct ifaltq *ifq;
 {
 	struct top_cdnr *top;
 
 	if ((top = cdnr_cballoc(NULL, TCETYPE_TOP, NULL)) == NULL)
 		return (NULL);
 
 	top->tc_ifq = ifq;
 	/* set default action for the top level conditioner */
 	top->tc_block.cb_action.tca_code = TCACODE_PASS;
 
 	LIST_INSERT_HEAD(&tcb_list, top, tc_next);
 
 	ifq->altq_cdnr = top;
 
 	return (top);
 }
 
 static int
 top_destroy(top)
 	struct top_cdnr *top;
 {
 	struct cdnr_block *cb;
 
 	if (ALTQ_IS_CNDTNING(top->tc_ifq))
 		ALTQ_CLEAR_CNDTNING(top->tc_ifq);
 	top->tc_ifq->altq_cdnr = NULL;
 
 	/*
 	 * destroy all the conditioner elements belonging to this interface
 	 */
 	while ((cb = LIST_FIRST(&top->tc_elements)) != NULL) {
 		while (cb != NULL && cb->cb_ref > 0)
 			cb = LIST_NEXT(cb, cb_next);
 		if (cb != NULL)
 			generic_element_destroy(cb);
 	}
 
 	LIST_REMOVE(top, tc_next);
 
 	cdnr_cbdestroy(top);
 
 	/* if there is no active conditioner, remove the input hook */
 	if (altq_input != NULL) {
 		LIST_FOREACH(top, &tcb_list, tc_next)
 			if (ALTQ_IS_CNDTNING(top->tc_ifq))
 				break;
 		if (top == NULL)
 			altq_input = NULL;
 	}
 
 	return (0);
 }
 
 /*
  * simple tc elements without input function (e.g., dropper and makers).
  */
 static struct cdnr_block *
 element_create(top, action)
 	struct top_cdnr *top;
 	struct tc_action *action;
 {
 	struct cdnr_block *cb;
 
 	if (tca_verify_action(action) < 0)
 		return (NULL);
 
 	if ((cb = cdnr_cballoc(top, TCETYPE_ELEMENT, NULL)) == NULL)
 		return (NULL);
 
 	tca_import_action(&cb->cb_action, action);
 
 	return (cb);
 }
 
 static int
 element_destroy(cb)
 	struct cdnr_block *cb;
 {
 	if (cb->cb_ref > 0)
 		return (EBUSY);
 
 	tca_invalidate_action(&cb->cb_action);
 
 	cdnr_cbdestroy(cb);
 	return (0);
 }
 
 /*
  * internal representation of token bucket parameters
  *	rate: 	byte_per_unittime << 32
  *		(((bits_per_sec) / 8) << 32) / machclk_freq
  *	depth:	byte << 32
  *
  */
 #define	TB_SHIFT	32
 #define	TB_SCALE(x)	((u_int64_t)(x) << TB_SHIFT)
 #define	TB_UNSCALE(x)	((x) >> TB_SHIFT)
 
 static void
 tb_import_profile(tb, profile)
 	struct tbe *tb;
 	struct tb_profile *profile;
 {
 	tb->rate = TB_SCALE(profile->rate / 8) / machclk_freq;
 	tb->depth = TB_SCALE(profile->depth);
 	if (tb->rate > 0)
 		tb->filluptime = tb->depth / tb->rate;
 	else
 		tb->filluptime = 0xffffffffffffffffLL;
 	tb->token = tb->depth;
 	tb->last = read_machclk();
 }
 
 /*
  * simple token bucket meter
  */
 static struct tbmeter *
 tbm_create(top, profile, in_action, out_action)
 	struct top_cdnr *top;
 	struct tb_profile *profile;
 	struct tc_action *in_action, *out_action;
 {
 	struct tbmeter *tbm = NULL;
 
 	if (tca_verify_action(in_action) < 0
 	    || tca_verify_action(out_action) < 0)
 		return (NULL);
 
 	if ((tbm = cdnr_cballoc(top, TCETYPE_TBMETER,
 				tbm_input)) == NULL)
 		return (NULL);
 
 	tb_import_profile(&tbm->tb, profile);
 
 	tca_import_action(&tbm->in_action, in_action);
 	tca_import_action(&tbm->out_action, out_action);
 
 	return (tbm);
 }
 
 static int
 tbm_destroy(tbm)
 	struct tbmeter *tbm;
 {
 	if (tbm->cdnrblk.cb_ref > 0)
 		return (EBUSY);
 
 	tca_invalidate_action(&tbm->in_action);
 	tca_invalidate_action(&tbm->out_action);
 
 	cdnr_cbdestroy(tbm);
 	return (0);
 }
 
 static struct tc_action *
 tbm_input(cb, pktinfo)
 	struct cdnr_block *cb;
 	struct cdnr_pktinfo *pktinfo;
 {
 	struct tbmeter *tbm = (struct tbmeter *)cb;
 	u_int64_t	len;
 	u_int64_t	interval, now;
 
 	len = TB_SCALE(pktinfo->pkt_len);
 
 	if (tbm->tb.token < len) {
 		now = read_machclk();
 		interval = now - tbm->tb.last;
 		if (interval >= tbm->tb.filluptime)
 			tbm->tb.token = tbm->tb.depth;
 		else {
 			tbm->tb.token += interval * tbm->tb.rate;
 			if (tbm->tb.token > tbm->tb.depth)
 				tbm->tb.token = tbm->tb.depth;
 		}
 		tbm->tb.last = now;
 	}
 
 	if (tbm->tb.token < len) {
 		PKTCNTR_ADD(&tbm->out_cnt, pktinfo->pkt_len);
 		return (&tbm->out_action);
 	}
 
 	tbm->tb.token -= len;
 	PKTCNTR_ADD(&tbm->in_cnt, pktinfo->pkt_len);
 	return (&tbm->in_action);
 }
 
 /*
  * two rate three color marker
  * as described in draft-heinanen-diffserv-trtcm-01.txt
  */
 static struct trtcm *
 trtcm_create(top, cmtd_profile, peak_profile,
 	     green_action, yellow_action, red_action, coloraware)
 	struct top_cdnr *top;
 	struct tb_profile *cmtd_profile, *peak_profile;
 	struct tc_action *green_action, *yellow_action, *red_action;
 	int	coloraware;
 {
 	struct trtcm *tcm = NULL;
 
 	if (tca_verify_action(green_action) < 0
 	    || tca_verify_action(yellow_action) < 0
 	    || tca_verify_action(red_action) < 0)
 		return (NULL);
 
 	if ((tcm = cdnr_cballoc(top, TCETYPE_TRTCM,
 				trtcm_input)) == NULL)
 		return (NULL);
 
 	tb_import_profile(&tcm->cmtd_tb, cmtd_profile);
 	tb_import_profile(&tcm->peak_tb, peak_profile);
 
 	tca_import_action(&tcm->green_action, green_action);
 	tca_import_action(&tcm->yellow_action, yellow_action);
 	tca_import_action(&tcm->red_action, red_action);
 
 	/* set dscps to use */
 	if (tcm->green_action.tca_code == TCACODE_MARK)
 		tcm->green_dscp = tcm->green_action.tca_dscp & DSCP_MASK;
 	else
 		tcm->green_dscp = DSCP_AF11;
 	if (tcm->yellow_action.tca_code == TCACODE_MARK)
 		tcm->yellow_dscp = tcm->yellow_action.tca_dscp & DSCP_MASK;
 	else
 		tcm->yellow_dscp = DSCP_AF12;
 	if (tcm->red_action.tca_code == TCACODE_MARK)
 		tcm->red_dscp = tcm->red_action.tca_dscp & DSCP_MASK;
 	else
 		tcm->red_dscp = DSCP_AF13;
 
 	tcm->coloraware = coloraware;
 
 	return (tcm);
 }
 
 static int
 trtcm_destroy(tcm)
 	struct trtcm *tcm;
 {
 	if (tcm->cdnrblk.cb_ref > 0)
 		return (EBUSY);
 
 	tca_invalidate_action(&tcm->green_action);
 	tca_invalidate_action(&tcm->yellow_action);
 	tca_invalidate_action(&tcm->red_action);
 
 	cdnr_cbdestroy(tcm);
 	return (0);
 }
 
 static struct tc_action *
 trtcm_input(cb, pktinfo)
 	struct cdnr_block *cb;
 	struct cdnr_pktinfo *pktinfo;
 {
 	struct trtcm *tcm = (struct trtcm *)cb;
 	u_int64_t	len;
 	u_int64_t	interval, now;
 	u_int8_t	color;
 
 	len = TB_SCALE(pktinfo->pkt_len);
 	if (tcm->coloraware) {
 		color = pktinfo->pkt_dscp;
 		if (color != tcm->yellow_dscp && color != tcm->red_dscp)
 			color = tcm->green_dscp;
 	} else {
 		/* if color-blind, precolor it as green */
 		color = tcm->green_dscp;
 	}
 
 	now = read_machclk();
 	if (tcm->cmtd_tb.token < len) {
 		interval = now - tcm->cmtd_tb.last;
 		if (interval >= tcm->cmtd_tb.filluptime)
 			tcm->cmtd_tb.token = tcm->cmtd_tb.depth;
 		else {
 			tcm->cmtd_tb.token += interval * tcm->cmtd_tb.rate;
 			if (tcm->cmtd_tb.token > tcm->cmtd_tb.depth)
 				tcm->cmtd_tb.token = tcm->cmtd_tb.depth;
 		}
 		tcm->cmtd_tb.last = now;
 	}
 	if (tcm->peak_tb.token < len) {
 		interval = now - tcm->peak_tb.last;
 		if (interval >= tcm->peak_tb.filluptime)
 			tcm->peak_tb.token = tcm->peak_tb.depth;
 		else {
 			tcm->peak_tb.token += interval * tcm->peak_tb.rate;
 			if (tcm->peak_tb.token > tcm->peak_tb.depth)
 				tcm->peak_tb.token = tcm->peak_tb.depth;
 		}
 		tcm->peak_tb.last = now;
 	}
 
 	if (color == tcm->red_dscp || tcm->peak_tb.token < len) {
 		pktinfo->pkt_dscp = tcm->red_dscp;
 		PKTCNTR_ADD(&tcm->red_cnt, pktinfo->pkt_len);
 		return (&tcm->red_action);
 	}
 
 	if (color == tcm->yellow_dscp || tcm->cmtd_tb.token < len) {
 		pktinfo->pkt_dscp = tcm->yellow_dscp;
 		tcm->peak_tb.token -= len;
 		PKTCNTR_ADD(&tcm->yellow_cnt, pktinfo->pkt_len);
 		return (&tcm->yellow_action);
 	}
 
 	pktinfo->pkt_dscp = tcm->green_dscp;
 	tcm->cmtd_tb.token -= len;
 	tcm->peak_tb.token -= len;
 	PKTCNTR_ADD(&tcm->green_cnt, pktinfo->pkt_len);
 	return (&tcm->green_action);
 }
 
 /*
  * time sliding window three color marker
  * as described in draft-fang-diffserv-tc-tswtcm-00.txt
  */
 static struct tswtcm *
 tswtcm_create(top, cmtd_rate, peak_rate, avg_interval,
 	      green_action, yellow_action, red_action)
 	struct top_cdnr *top;
 	u_int32_t	cmtd_rate, peak_rate, avg_interval;
 	struct tc_action *green_action, *yellow_action, *red_action;
 {
 	struct tswtcm *tsw;
 
 	if (tca_verify_action(green_action) < 0
 	    || tca_verify_action(yellow_action) < 0
 	    || tca_verify_action(red_action) < 0)
 		return (NULL);
 
 	if ((tsw = cdnr_cballoc(top, TCETYPE_TSWTCM,
 				tswtcm_input)) == NULL)
 		return (NULL);
 
 	tca_import_action(&tsw->green_action, green_action);
 	tca_import_action(&tsw->yellow_action, yellow_action);
 	tca_import_action(&tsw->red_action, red_action);
 
 	/* set dscps to use */
 	if (tsw->green_action.tca_code == TCACODE_MARK)
 		tsw->green_dscp = tsw->green_action.tca_dscp & DSCP_MASK;
 	else
 		tsw->green_dscp = DSCP_AF11;
 	if (tsw->yellow_action.tca_code == TCACODE_MARK)
 		tsw->yellow_dscp = tsw->yellow_action.tca_dscp & DSCP_MASK;
 	else
 		tsw->yellow_dscp = DSCP_AF12;
 	if (tsw->red_action.tca_code == TCACODE_MARK)
 		tsw->red_dscp = tsw->red_action.tca_dscp & DSCP_MASK;
 	else
 		tsw->red_dscp = DSCP_AF13;
 
 	/* convert rates from bits/sec to bytes/sec */
 	tsw->cmtd_rate = cmtd_rate / 8;
 	tsw->peak_rate = peak_rate / 8;
 	tsw->avg_rate = 0;
 
 	/* timewin is converted from msec to machine clock unit */
 	tsw->timewin = (u_int64_t)machclk_freq * avg_interval / 1000;
 
 	return (tsw);
 }
 
 static int
 tswtcm_destroy(tsw)
 	struct tswtcm *tsw;
 {
 	if (tsw->cdnrblk.cb_ref > 0)
 		return (EBUSY);
 
 	tca_invalidate_action(&tsw->green_action);
 	tca_invalidate_action(&tsw->yellow_action);
 	tca_invalidate_action(&tsw->red_action);
 
 	cdnr_cbdestroy(tsw);
 	return (0);
 }
 
 static struct tc_action *
 tswtcm_input(cb, pktinfo)
 	struct cdnr_block *cb;
 	struct cdnr_pktinfo *pktinfo;
 {
 	struct tswtcm	*tsw = (struct tswtcm *)cb;
 	int		len;
 	u_int32_t	avg_rate;
 	u_int64_t	interval, now, tmp;
 
 	/*
 	 * rate estimator
 	 */
 	len = pktinfo->pkt_len;
 	now = read_machclk();
 
 	interval = now - tsw->t_front;
 	/*
 	 * calculate average rate:
 	 *	avg = (avg * timewin + pkt_len)/(timewin + interval)
 	 * pkt_len needs to be multiplied by machclk_freq in order to
 	 * get (bytes/sec).
 	 * note: when avg_rate (bytes/sec) and timewin (machclk unit) are
 	 * less than 32 bits, the following 64-bit operation has enough
 	 * precision.
 	 */
 	tmp = ((u_int64_t)tsw->avg_rate * tsw->timewin
 	       + (u_int64_t)len * machclk_freq) / (tsw->timewin + interval);
 	tsw->avg_rate = avg_rate = (u_int32_t)tmp;
 	tsw->t_front = now;
 
 	/*
 	 * marker
 	 */
 	if (avg_rate > tsw->cmtd_rate) {
 		u_int32_t randval = arc4random() % avg_rate;
 
 		if (avg_rate > tsw->peak_rate) {
 			if (randval < avg_rate - tsw->peak_rate) {
 				/* mark red */
 				pktinfo->pkt_dscp = tsw->red_dscp;
 				PKTCNTR_ADD(&tsw->red_cnt, len);
 				return (&tsw->red_action);
 			} else if (randval < avg_rate - tsw->cmtd_rate)
 				goto mark_yellow;
 		} else {
 			/* peak_rate >= avg_rate > cmtd_rate */
 			if (randval < avg_rate - tsw->cmtd_rate) {
 			mark_yellow:
 				pktinfo->pkt_dscp = tsw->yellow_dscp;
 				PKTCNTR_ADD(&tsw->yellow_cnt, len);
 				return (&tsw->yellow_action);
 			}
 		}
 	}
 
 	/* mark green */
 	pktinfo->pkt_dscp = tsw->green_dscp;
 	PKTCNTR_ADD(&tsw->green_cnt, len);
 	return (&tsw->green_action);
 }
 
 /*
  * ioctl requests
  */
 static int
 cdnrcmd_if_attach(ifname)
 	char *ifname;
 {
 	struct ifnet *ifp;
 	struct top_cdnr *top;
 
 	if ((ifp = ifunit(ifname)) == NULL)
 		return (EBADF);
 
 	if (ifp->if_snd.altq_cdnr != NULL)
 		return (EBUSY);
 
 	if ((top = top_create(&ifp->if_snd)) == NULL)
 		return (ENOMEM);
 	return (0);
 }
 
 static int
 cdnrcmd_if_detach(ifname)
 	char *ifname;
 {
 	struct top_cdnr *top;
 
 	if ((top = tcb_lookup(ifname)) == NULL)
 		return (EBADF);
 
 	return top_destroy(top);
 }
 
 static int
 cdnrcmd_add_element(ap)
 	struct cdnr_add_element *ap;
 {
 	struct top_cdnr *top;
 	struct cdnr_block *cb;
 
 	if ((top = tcb_lookup(ap->iface.cdnr_ifname)) == NULL)
 		return (EBADF);
 
 	cb = element_create(top, &ap->action);
 	if (cb == NULL)
 		return (EINVAL);
 	/* return a class handle to the user */
 	ap->cdnr_handle = cdnr_cb2handle(cb);
 	return (0);
 }
 
 static int
 cdnrcmd_delete_element(ap)
 	struct cdnr_delete_element *ap;
 {
 	struct top_cdnr *top;
 	struct cdnr_block *cb;
 
 	if ((top = tcb_lookup(ap->iface.cdnr_ifname)) == NULL)
 		return (EBADF);
 
 	if ((cb = cdnr_handle2cb(ap->cdnr_handle)) == NULL)
 		return (EINVAL);
 
 	if (cb->cb_type != TCETYPE_ELEMENT)
 		return generic_element_destroy(cb);
 
 	return element_destroy(cb);
 }
 
 static int
 cdnrcmd_add_filter(ap)
 	struct cdnr_add_filter *ap;
 {
 	struct top_cdnr *top;
 	struct cdnr_block *cb;
 
 	if ((top = tcb_lookup(ap->iface.cdnr_ifname)) == NULL)
 		return (EBADF);
 
 	if ((cb = cdnr_handle2cb(ap->cdnr_handle)) == NULL)
 		return (EINVAL);
 
 	return acc_add_filter(&top->tc_classifier, &ap->filter,
 			      cb, &ap->filter_handle);
 }
 
 static int
 cdnrcmd_delete_filter(ap)
 	struct cdnr_delete_filter *ap;
 {
 	struct top_cdnr *top;
 
 	if ((top = tcb_lookup(ap->iface.cdnr_ifname)) == NULL)
 		return (EBADF);
 
 	return acc_delete_filter(&top->tc_classifier, ap->filter_handle);
 }
 
 static int
 cdnrcmd_add_tbm(ap)
 	struct cdnr_add_tbmeter *ap;
 {
 	struct top_cdnr *top;
 	struct tbmeter *tbm;
 
 	if ((top = tcb_lookup(ap->iface.cdnr_ifname)) == NULL)
 		return (EBADF);
 
 	tbm = tbm_create(top, &ap->profile, &ap->in_action, &ap->out_action);
 	if (tbm == NULL)
 		return (EINVAL);
 	/* return a class handle to the user */
 	ap->cdnr_handle = cdnr_cb2handle(&tbm->cdnrblk);
 	return (0);
 }
 
 static int
 cdnrcmd_modify_tbm(ap)
 	struct cdnr_modify_tbmeter *ap;
 {
 	struct tbmeter *tbm;
 
 	if ((tbm = (struct tbmeter *)cdnr_handle2cb(ap->cdnr_handle)) == NULL)
 		return (EINVAL);
 
 	tb_import_profile(&tbm->tb, &ap->profile);
 
 	return (0);
 }
 
 static int
 cdnrcmd_tbm_stats(ap)
 	struct cdnr_tbmeter_stats *ap;
 {
 	struct tbmeter *tbm;
 
 	if ((tbm = (struct tbmeter *)cdnr_handle2cb(ap->cdnr_handle)) == NULL)
 		return (EINVAL);
 
 	ap->in_cnt = tbm->in_cnt;
 	ap->out_cnt = tbm->out_cnt;
 
 	return (0);
 }
 
 static int
 cdnrcmd_add_trtcm(ap)
 	struct cdnr_add_trtcm *ap;
 {
 	struct top_cdnr *top;
 	struct trtcm *tcm;
 
 	if ((top = tcb_lookup(ap->iface.cdnr_ifname)) == NULL)
 		return (EBADF);
 
 	tcm = trtcm_create(top, &ap->cmtd_profile, &ap->peak_profile,
 			   &ap->green_action, &ap->yellow_action,
 			   &ap->red_action, ap->coloraware);
 	if (tcm == NULL)
 		return (EINVAL);
 
 	/* return a class handle to the user */
 	ap->cdnr_handle = cdnr_cb2handle(&tcm->cdnrblk);
 	return (0);
 }
 
 static int
 cdnrcmd_modify_trtcm(ap)
 	struct cdnr_modify_trtcm *ap;
 {
 	struct trtcm *tcm;
 
 	if ((tcm = (struct trtcm *)cdnr_handle2cb(ap->cdnr_handle)) == NULL)
 		return (EINVAL);
 
 	tb_import_profile(&tcm->cmtd_tb, &ap->cmtd_profile);
 	tb_import_profile(&tcm->peak_tb, &ap->peak_profile);
 
 	return (0);
 }
 
 static int
 cdnrcmd_tcm_stats(ap)
 	struct cdnr_tcm_stats *ap;
 {
 	struct cdnr_block *cb;
 
 	if ((cb = cdnr_handle2cb(ap->cdnr_handle)) == NULL)
 		return (EINVAL);
 
 	if (cb->cb_type == TCETYPE_TRTCM) {
 	    struct trtcm *tcm = (struct trtcm *)cb;
 
 	    ap->green_cnt = tcm->green_cnt;
 	    ap->yellow_cnt = tcm->yellow_cnt;
 	    ap->red_cnt = tcm->red_cnt;
 	} else if (cb->cb_type == TCETYPE_TSWTCM) {
 	    struct tswtcm *tsw = (struct tswtcm *)cb;
 
 	    ap->green_cnt = tsw->green_cnt;
 	    ap->yellow_cnt = tsw->yellow_cnt;
 	    ap->red_cnt = tsw->red_cnt;
 	} else
 	    return (EINVAL);
 
 	return (0);
 }
 
 static int
 cdnrcmd_add_tswtcm(ap)
 	struct cdnr_add_tswtcm *ap;
 {
 	struct top_cdnr *top;
 	struct tswtcm *tsw;
 
 	if ((top = tcb_lookup(ap->iface.cdnr_ifname)) == NULL)
 		return (EBADF);
 
 	if (ap->cmtd_rate > ap->peak_rate)
 		return (EINVAL);
 
 	tsw = tswtcm_create(top, ap->cmtd_rate, ap->peak_rate,
 			    ap->avg_interval, &ap->green_action,
 			    &ap->yellow_action, &ap->red_action);
 	if (tsw == NULL)
 	    return (EINVAL);
 
 	/* return a class handle to the user */
 	ap->cdnr_handle = cdnr_cb2handle(&tsw->cdnrblk);
 	return (0);
 }
 
 static int
 cdnrcmd_modify_tswtcm(ap)
 	struct cdnr_modify_tswtcm *ap;
 {
 	struct tswtcm *tsw;
 
 	if ((tsw = (struct tswtcm *)cdnr_handle2cb(ap->cdnr_handle)) == NULL)
 		return (EINVAL);
 
 	if (ap->cmtd_rate > ap->peak_rate)
 		return (EINVAL);
 
 	/* convert rates from bits/sec to bytes/sec */
 	tsw->cmtd_rate = ap->cmtd_rate / 8;
 	tsw->peak_rate = ap->peak_rate / 8;
 	tsw->avg_rate = 0;
 
 	/* timewin is converted from msec to machine clock unit */
 	tsw->timewin = (u_int64_t)machclk_freq * ap->avg_interval / 1000;
 
 	return (0);
 }
 
 static int
 cdnrcmd_get_stats(ap)
 	struct cdnr_get_stats *ap;
 {
 	struct top_cdnr *top;
 	struct cdnr_block *cb;
 	struct tbmeter *tbm;
 	struct trtcm *tcm;
 	struct tswtcm *tsw;
 	struct tce_stats tce, *usp;
 	int error, n, nskip, nelements;
 
 	if ((top = tcb_lookup(ap->iface.cdnr_ifname)) == NULL)
 		return (EBADF);
 
 	/* copy action stats */
 	bcopy(top->tc_cnts, ap->cnts, sizeof(ap->cnts));
 
 	/* stats for each element */
 	nelements = ap->nelements;
 	usp = ap->tce_stats;
 	if (nelements <= 0 || usp == NULL)
 		return (0);
 
 	nskip = ap->nskip;
 	n = 0;
 	LIST_FOREACH(cb, &top->tc_elements, cb_next) {
 		if (nskip > 0) {
 			nskip--;
 			continue;
 		}
 
 		bzero(&tce, sizeof(tce));
 		tce.tce_handle = cb->cb_handle;
 		tce.tce_type = cb->cb_type;
 		switch (cb->cb_type) {
 		case TCETYPE_TBMETER:
 			tbm = (struct tbmeter *)cb;
 			tce.tce_cnts[0] = tbm->in_cnt;
 			tce.tce_cnts[1] = tbm->out_cnt;
 			break;
 		case TCETYPE_TRTCM:
 			tcm = (struct trtcm *)cb;
 			tce.tce_cnts[0] = tcm->green_cnt;
 			tce.tce_cnts[1] = tcm->yellow_cnt;
 			tce.tce_cnts[2] = tcm->red_cnt;
 			break;
 		case TCETYPE_TSWTCM:
 			tsw = (struct tswtcm *)cb;
 			tce.tce_cnts[0] = tsw->green_cnt;
 			tce.tce_cnts[1] = tsw->yellow_cnt;
 			tce.tce_cnts[2] = tsw->red_cnt;
 			break;
 		default:
 			continue;
 		}
 
 		if ((error = copyout((caddr_t)&tce, (caddr_t)usp++,
 				     sizeof(tce))) != 0)
 			return (error);
 
 		if (++n == nelements)
 			break;
 	}
 	ap->nelements = n;
 
 	return (0);
 }
 
 /*
  * conditioner device interface
  */
 int
 cdnropen(dev, flag, fmt, p)
 	dev_t dev;
 	int flag, fmt;
 #if (__FreeBSD_version > 500000)
 	struct thread *p;
 #else
 	struct proc *p;
 #endif
 {
 	if (machclk_freq == 0)
 		init_machclk();
 
 	if (machclk_freq == 0) {
 		printf("cdnr: no cpu clock available!\n");
 		return (ENXIO);
 	}
 
 	/* everything will be done when the queueing scheme is attached. */
 	return 0;
 }
 
 int
 cdnrclose(dev, flag, fmt, p)
 	dev_t dev;
 	int flag, fmt;
 #if (__FreeBSD_version > 500000)
 	struct thread *p;
 #else
 	struct proc *p;
 #endif
 {
 	struct top_cdnr *top;
 	int err, error = 0;
 
 	while ((top = LIST_FIRST(&tcb_list)) != NULL) {
 		/* destroy all */
 		err = top_destroy(top);
 		if (err != 0 && error == 0)
 			error = err;
 	}
 	altq_input = NULL;
 
 	return (error);
 }
 
 int
 cdnrioctl(dev, cmd, addr, flag, p)
 	dev_t dev;
 	ioctlcmd_t cmd;
 	caddr_t addr;
 	int flag;
 #if (__FreeBSD_version > 500000)
 	struct thread *p;
 #else
 	struct proc *p;
 #endif
 {
 	struct top_cdnr *top;
 	struct cdnr_interface *ifacep;
 	int	s, error = 0;
 
 	/* check super-user privilege */
 	switch (cmd) {
 	case CDNR_GETSTATS:
 		break;
 	default:
 #if (__FreeBSD_version > 700000)
 		if ((error = priv_check(p, PRIV_ALTQ_MANAGE)) != 0)
 #elsif (__FreeBSD_version > 400000)
 		if ((error = suser(p)) != 0)
 #else
 		if ((error = suser(p->p_ucred, &p->p_acflag)) != 0)
 #endif
 			return (error);
 		break;
 	}
 
 #ifdef __NetBSD__
 	s = splnet();
 #else
 	s = splimp();
 #endif
 	switch (cmd) {
 
 	case CDNR_IF_ATTACH:
 		ifacep = (struct cdnr_interface *)addr;
 		error = cdnrcmd_if_attach(ifacep->cdnr_ifname);
 		break;
 
 	case CDNR_IF_DETACH:
 		ifacep = (struct cdnr_interface *)addr;
 		error = cdnrcmd_if_detach(ifacep->cdnr_ifname);
 		break;
 
 	case CDNR_ENABLE:
 	case CDNR_DISABLE:
 		ifacep = (struct cdnr_interface *)addr;
 		if ((top = tcb_lookup(ifacep->cdnr_ifname)) == NULL) {
 			error = EBADF;
 			break;
 		}
 
 		switch (cmd) {
 
 		case CDNR_ENABLE:
 			ALTQ_SET_CNDTNING(top->tc_ifq);
 			if (altq_input == NULL)
 				altq_input = altq_cdnr_input;
 			break;
 
 		case CDNR_DISABLE:
 			ALTQ_CLEAR_CNDTNING(top->tc_ifq);
 			LIST_FOREACH(top, &tcb_list, tc_next)
 				if (ALTQ_IS_CNDTNING(top->tc_ifq))
 					break;
 			if (top == NULL)
 				altq_input = NULL;
 			break;
 		}
 		break;
 
 	case CDNR_ADD_ELEM:
 		error = cdnrcmd_add_element((struct cdnr_add_element *)addr);
 		break;
 
 	case CDNR_DEL_ELEM:
 		error = cdnrcmd_delete_element((struct cdnr_delete_element *)addr);
 		break;
 
 	case CDNR_ADD_TBM:
 		error = cdnrcmd_add_tbm((struct cdnr_add_tbmeter *)addr);
 		break;
 
 	case CDNR_MOD_TBM:
 		error = cdnrcmd_modify_tbm((struct cdnr_modify_tbmeter *)addr);
 		break;
 
 	case CDNR_TBM_STATS:
 		error = cdnrcmd_tbm_stats((struct cdnr_tbmeter_stats *)addr);
 		break;
 
 	case CDNR_ADD_TCM:
 		error = cdnrcmd_add_trtcm((struct cdnr_add_trtcm *)addr);
 		break;
 
 	case CDNR_MOD_TCM:
 		error = cdnrcmd_modify_trtcm((struct cdnr_modify_trtcm *)addr);
 		break;
 
 	case CDNR_TCM_STATS:
 		error = cdnrcmd_tcm_stats((struct cdnr_tcm_stats *)addr);
 		break;
 
 	case CDNR_ADD_FILTER:
 		error = cdnrcmd_add_filter((struct cdnr_add_filter *)addr);
 		break;
 
 	case CDNR_DEL_FILTER:
 		error = cdnrcmd_delete_filter((struct cdnr_delete_filter *)addr);
 		break;
 
 	case CDNR_GETSTATS:
 		error = cdnrcmd_get_stats((struct cdnr_get_stats *)addr);
 		break;
 
 	case CDNR_ADD_TSW:
 		error = cdnrcmd_add_tswtcm((struct cdnr_add_tswtcm *)addr);
 		break;
 
 	case CDNR_MOD_TSW:
 		error = cdnrcmd_modify_tswtcm((struct cdnr_modify_tswtcm *)addr);
 		break;
 
 	default:
 		error = EINVAL;
 		break;
 	}
 	splx(s);
 
 	return error;
 }
 
 #ifdef KLD_MODULE
 
 static struct altqsw cdnr_sw =
 	{"cdnr", cdnropen, cdnrclose, cdnrioctl};
 
 ALTQ_MODULE(altq_cdnr, ALTQT_CDNR, &cdnr_sw);
 
 #endif /* KLD_MODULE */
 
 #endif /* ALTQ3_COMPAT */
 #endif /* ALTQ_CDNR */
Index: stable/10/sys/contrib/altq/altq/altq_hfsc.c
===================================================================
--- stable/10/sys/contrib/altq/altq/altq_hfsc.c	(revision 263085)
+++ stable/10/sys/contrib/altq/altq/altq_hfsc.c	(revision 263086)
@@ -1,2219 +1,2222 @@
 /*	$FreeBSD$	*/
 /*	$KAME: altq_hfsc.c,v 1.24 2003/12/05 05:40:46 kjc Exp $	*/
 
 /*
  * Copyright (c) 1997-1999 Carnegie Mellon University. All Rights Reserved.
  *
  * Permission to use, copy, modify, and distribute this software and
  * its documentation is hereby granted (including for commercial or
  * for-profit use), provided that both the copyright notice and this
  * permission notice appear in all copies of the software, derivative
  * works, or modified versions, and any portions thereof.
  *
  * THIS SOFTWARE IS EXPERIMENTAL AND IS KNOWN TO HAVE BUGS, SOME OF
  * WHICH MAY HAVE SERIOUS CONSEQUENCES.  CARNEGIE MELLON PROVIDES THIS
  * SOFTWARE IN ITS ``AS IS'' CONDITION, AND ANY EXPRESS OR IMPLIED
  * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
  * DISCLAIMED.  IN NO EVENT SHALL CARNEGIE MELLON UNIVERSITY BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT
  * OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
  * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
  * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
  * USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH
  * DAMAGE.
  *
  * Carnegie Mellon encourages (but does not require) users of this
  * software to return any improvements or extensions that they make,
  * and to grant Carnegie Mellon the rights to redistribute these
  * changes without encumbrance.
  */
 /*
  * H-FSC is described in Proceedings of SIGCOMM'97,
  * "A Hierarchical Fair Service Curve Algorithm for Link-Sharing,
  * Real-Time and Priority Service"
  * by Ion Stoica, Hui Zhang, and T. S. Eugene Ng.
  *
  * Oleg Cherevko <olwi@aq.ml.com.ua> added the upperlimit for link-sharing.
  * when a class has an upperlimit, the fit-time is computed from the
  * upperlimit service curve.  the link-sharing scheduler does not schedule
  * a class whose fit-time exceeds the current time.
  */
 
 #if defined(__FreeBSD__) || defined(__NetBSD__)
 #include "opt_altq.h"
 #include "opt_inet.h"
 #ifdef __FreeBSD__
 #include "opt_inet6.h"
 #endif
 #endif /* __FreeBSD__ || __NetBSD__ */
 
 #ifdef ALTQ_HFSC  /* hfsc is enabled by ALTQ_HFSC option in opt_altq.h */
 
 #include <sys/param.h>
 #include <sys/malloc.h>
 #include <sys/mbuf.h>
 #include <sys/socket.h>
 #include <sys/systm.h>
 #include <sys/errno.h>
 #include <sys/queue.h>
 #if 1 /* ALTQ3_COMPAT */
 #include <sys/sockio.h>
 #include <sys/proc.h>
 #include <sys/kernel.h>
 #endif /* ALTQ3_COMPAT */
 
 #include <net/if.h>
+#include <net/if_var.h>
 #include <netinet/in.h>
 
-#include <net/pfvar.h>
+#include <netpfil/pf/pf.h>
+#include <netpfil/pf/pf_altq.h>
+#include <netpfil/pf/pf_mtag.h>
 #include <altq/altq.h>
 #include <altq/altq_hfsc.h>
 #ifdef ALTQ3_COMPAT
 #include <altq/altq_conf.h>
 #endif
 
 /*
  * function prototypes
  */
 static int			 hfsc_clear_interface(struct hfsc_if *);
 static int			 hfsc_request(struct ifaltq *, int, void *);
 static void			 hfsc_purge(struct hfsc_if *);
 static struct hfsc_class	*hfsc_class_create(struct hfsc_if *,
     struct service_curve *, struct service_curve *, struct service_curve *,
     struct hfsc_class *, int, int, int);
 static int			 hfsc_class_destroy(struct hfsc_class *);
 static struct hfsc_class	*hfsc_nextclass(struct hfsc_class *);
 static int			 hfsc_enqueue(struct ifaltq *, struct mbuf *,
 				    struct altq_pktattr *);
 static struct mbuf		*hfsc_dequeue(struct ifaltq *, int);
 
 static int		 hfsc_addq(struct hfsc_class *, struct mbuf *);
 static struct mbuf	*hfsc_getq(struct hfsc_class *);
 static struct mbuf	*hfsc_pollq(struct hfsc_class *);
 static void		 hfsc_purgeq(struct hfsc_class *);
 
 static void		 update_cfmin(struct hfsc_class *);
 static void		 set_active(struct hfsc_class *, int);
 static void		 set_passive(struct hfsc_class *);
 
 static void		 init_ed(struct hfsc_class *, int);
 static void		 update_ed(struct hfsc_class *, int);
 static void		 update_d(struct hfsc_class *, int);
 static void		 init_vf(struct hfsc_class *, int);
 static void		 update_vf(struct hfsc_class *, int, u_int64_t);
 static void		 ellist_insert(struct hfsc_class *);
 static void		 ellist_remove(struct hfsc_class *);
 static void		 ellist_update(struct hfsc_class *);
 struct hfsc_class	*hfsc_get_mindl(struct hfsc_if *, u_int64_t);
 static void		 actlist_insert(struct hfsc_class *);
 static void		 actlist_remove(struct hfsc_class *);
 static void		 actlist_update(struct hfsc_class *);
 
 static struct hfsc_class	*actlist_firstfit(struct hfsc_class *,
 				    u_int64_t);
 
 static __inline u_int64_t	seg_x2y(u_int64_t, u_int64_t);
 static __inline u_int64_t	seg_y2x(u_int64_t, u_int64_t);
 static __inline u_int64_t	m2sm(u_int);
 static __inline u_int64_t	m2ism(u_int);
 static __inline u_int64_t	d2dx(u_int);
 static u_int			sm2m(u_int64_t);
 static u_int			dx2d(u_int64_t);
 
 static void		sc2isc(struct service_curve *, struct internal_sc *);
 static void		rtsc_init(struct runtime_sc *, struct internal_sc *,
 			    u_int64_t, u_int64_t);
 static u_int64_t	rtsc_y2x(struct runtime_sc *, u_int64_t);
 static u_int64_t	rtsc_x2y(struct runtime_sc *, u_int64_t);
 static void		rtsc_min(struct runtime_sc *, struct internal_sc *,
 			    u_int64_t, u_int64_t);
 
 static void			 get_class_stats(struct hfsc_classstats *,
 				    struct hfsc_class *);
 static struct hfsc_class	*clh_to_clp(struct hfsc_if *, u_int32_t);
 
 
 #ifdef ALTQ3_COMPAT
 static struct hfsc_if *hfsc_attach(struct ifaltq *, u_int);
 static int hfsc_detach(struct hfsc_if *);
 static int hfsc_class_modify(struct hfsc_class *, struct service_curve *,
     struct service_curve *, struct service_curve *);
 
 static int hfsccmd_if_attach(struct hfsc_attach *);
 static int hfsccmd_if_detach(struct hfsc_interface *);
 static int hfsccmd_add_class(struct hfsc_add_class *);
 static int hfsccmd_delete_class(struct hfsc_delete_class *);
 static int hfsccmd_modify_class(struct hfsc_modify_class *);
 static int hfsccmd_add_filter(struct hfsc_add_filter *);
 static int hfsccmd_delete_filter(struct hfsc_delete_filter *);
 static int hfsccmd_class_stats(struct hfsc_class_stats *);
 
 altqdev_decl(hfsc);
 #endif /* ALTQ3_COMPAT */
 
 /*
  * macros
  */
 #define	is_a_parent_class(cl)	((cl)->cl_children != NULL)
 
 #define	HT_INFINITY	0xffffffffffffffffLL	/* infinite time value */
 
 #ifdef ALTQ3_COMPAT
 /* hif_list keeps all hfsc_if's allocated. */
 static struct hfsc_if *hif_list = NULL;
 #endif /* ALTQ3_COMPAT */
 
 int
 hfsc_pfattach(struct pf_altq *a)
 {
 	struct ifnet *ifp;
 	int s, error;
 
 	if ((ifp = ifunit(a->ifname)) == NULL || a->altq_disc == NULL)
 		return (EINVAL);
 #ifdef __NetBSD__
 	s = splnet();
 #else
 	s = splimp();
 #endif
 	error = altq_attach(&ifp->if_snd, ALTQT_HFSC, a->altq_disc,
 	    hfsc_enqueue, hfsc_dequeue, hfsc_request, NULL, NULL);
 	splx(s);
 	return (error);
 }
 
 int
 hfsc_add_altq(struct pf_altq *a)
 {
 	struct hfsc_if *hif;
 	struct ifnet *ifp;
 
 	if ((ifp = ifunit(a->ifname)) == NULL)
 		return (EINVAL);
 	if (!ALTQ_IS_READY(&ifp->if_snd))
 		return (ENODEV);
 
 	hif = malloc(sizeof(struct hfsc_if), M_DEVBUF, M_NOWAIT | M_ZERO);
 	if (hif == NULL)
 		return (ENOMEM);
 
 	TAILQ_INIT(&hif->hif_eligible);
 	hif->hif_ifq = &ifp->if_snd;
 
 	/* keep the state in pf_altq */
 	a->altq_disc = hif;
 
 	return (0);
 }
 
 int
 hfsc_remove_altq(struct pf_altq *a)
 {
 	struct hfsc_if *hif;
 
 	if ((hif = a->altq_disc) == NULL)
 		return (EINVAL);
 	a->altq_disc = NULL;
 
 	(void)hfsc_clear_interface(hif);
 	(void)hfsc_class_destroy(hif->hif_rootclass);
 
 	free(hif, M_DEVBUF);
 
 	return (0);
 }
 
 int
 hfsc_add_queue(struct pf_altq *a)
 {
 	struct hfsc_if *hif;
 	struct hfsc_class *cl, *parent;
 	struct hfsc_opts *opts;
 	struct service_curve rtsc, lssc, ulsc;
 
 	if ((hif = a->altq_disc) == NULL)
 		return (EINVAL);
 
 	opts = &a->pq_u.hfsc_opts;
 
 	if (a->parent_qid == HFSC_NULLCLASS_HANDLE &&
 	    hif->hif_rootclass == NULL)
 		parent = NULL;
 	else if ((parent = clh_to_clp(hif, a->parent_qid)) == NULL)
 		return (EINVAL);
 
 	if (a->qid == 0)
 		return (EINVAL);
 
 	if (clh_to_clp(hif, a->qid) != NULL)
 		return (EBUSY);
 
 	rtsc.m1 = opts->rtsc_m1;
 	rtsc.d  = opts->rtsc_d;
 	rtsc.m2 = opts->rtsc_m2;
 	lssc.m1 = opts->lssc_m1;
 	lssc.d  = opts->lssc_d;
 	lssc.m2 = opts->lssc_m2;
 	ulsc.m1 = opts->ulsc_m1;
 	ulsc.d  = opts->ulsc_d;
 	ulsc.m2 = opts->ulsc_m2;
 
 	cl = hfsc_class_create(hif, &rtsc, &lssc, &ulsc,
 	    parent, a->qlimit, opts->flags, a->qid);
 	if (cl == NULL)
 		return (ENOMEM);
 
 	return (0);
 }
 
 int
 hfsc_remove_queue(struct pf_altq *a)
 {
 	struct hfsc_if *hif;
 	struct hfsc_class *cl;
 
 	if ((hif = a->altq_disc) == NULL)
 		return (EINVAL);
 
 	if ((cl = clh_to_clp(hif, a->qid)) == NULL)
 		return (EINVAL);
 
 	return (hfsc_class_destroy(cl));
 }
 
 int
 hfsc_getqstats(struct pf_altq *a, void *ubuf, int *nbytes)
 {
 	struct hfsc_if *hif;
 	struct hfsc_class *cl;
 	struct hfsc_classstats stats;
 	int error = 0;
 
 	if ((hif = altq_lookup(a->ifname, ALTQT_HFSC)) == NULL)
 		return (EBADF);
 
 	if ((cl = clh_to_clp(hif, a->qid)) == NULL)
 		return (EINVAL);
 
 	if (*nbytes < sizeof(stats))
 		return (EINVAL);
 
 	get_class_stats(&stats, cl);
 
 	if ((error = copyout((caddr_t)&stats, ubuf, sizeof(stats))) != 0)
 		return (error);
 	*nbytes = sizeof(stats);
 	return (0);
 }
 
 /*
  * bring the interface back to the initial state by discarding
  * all the filters and classes except the root class.
  */
 static int
 hfsc_clear_interface(struct hfsc_if *hif)
 {
 	struct hfsc_class	*cl;
 
 #ifdef ALTQ3_COMPAT
 	/* free the filters for this interface */
 	acc_discard_filters(&hif->hif_classifier, NULL, 1);
 #endif
 
 	/* clear out the classes */
 	while (hif->hif_rootclass != NULL &&
 	    (cl = hif->hif_rootclass->cl_children) != NULL) {
 		/*
 		 * remove the first leaf class found in the hierarchy
 		 * then start over
 		 */
 		for (; cl != NULL; cl = hfsc_nextclass(cl)) {
 			if (!is_a_parent_class(cl)) {
 				(void)hfsc_class_destroy(cl);
 				break;
 			}
 		}
 	}
 
 	return (0);
 }
 
 static int
 hfsc_request(struct ifaltq *ifq, int req, void *arg)
 {
 	struct hfsc_if	*hif = (struct hfsc_if *)ifq->altq_disc;
 
 	IFQ_LOCK_ASSERT(ifq);
 
 	switch (req) {
 	case ALTRQ_PURGE:
 		hfsc_purge(hif);
 		break;
 	}
 	return (0);
 }
 
 /* discard all the queued packets on the interface */
 static void
 hfsc_purge(struct hfsc_if *hif)
 {
 	struct hfsc_class *cl;
 
 	for (cl = hif->hif_rootclass; cl != NULL; cl = hfsc_nextclass(cl))
 		if (!qempty(cl->cl_q))
 			hfsc_purgeq(cl);
 	if (ALTQ_IS_ENABLED(hif->hif_ifq))
 		hif->hif_ifq->ifq_len = 0;
 }
 
 struct hfsc_class *
 hfsc_class_create(struct hfsc_if *hif, struct service_curve *rsc,
     struct service_curve *fsc, struct service_curve *usc,
     struct hfsc_class *parent, int qlimit, int flags, int qid)
 {
 	struct hfsc_class *cl, *p;
 	int i, s;
 
 	if (hif->hif_classes >= HFSC_MAX_CLASSES)
 		return (NULL);
 
 #ifndef ALTQ_RED
 	if (flags & HFCF_RED) {
 #ifdef ALTQ_DEBUG
 		printf("hfsc_class_create: RED not configured for HFSC!\n");
 #endif
 		return (NULL);
 	}
 #endif
 
 	cl = malloc(sizeof(struct hfsc_class), M_DEVBUF, M_NOWAIT | M_ZERO);
 	if (cl == NULL)
 		return (NULL);
 
 	cl->cl_q = malloc(sizeof(class_queue_t), M_DEVBUF, M_NOWAIT | M_ZERO);
 	if (cl->cl_q == NULL)
 		goto err_ret;
 
 	TAILQ_INIT(&cl->cl_actc);
 
 	if (qlimit == 0)
 		qlimit = 50;  /* use default */
 	qlimit(cl->cl_q) = qlimit;
 	qtype(cl->cl_q) = Q_DROPTAIL;
 	qlen(cl->cl_q) = 0;
 	cl->cl_flags = flags;
 #ifdef ALTQ_RED
 	if (flags & (HFCF_RED|HFCF_RIO)) {
 		int red_flags, red_pkttime;
 		u_int m2;
 
 		m2 = 0;
 		if (rsc != NULL && rsc->m2 > m2)
 			m2 = rsc->m2;
 		if (fsc != NULL && fsc->m2 > m2)
 			m2 = fsc->m2;
 		if (usc != NULL && usc->m2 > m2)
 			m2 = usc->m2;
 
 		red_flags = 0;
 		if (flags & HFCF_ECN)
 			red_flags |= REDF_ECN;
 #ifdef ALTQ_RIO
 		if (flags & HFCF_CLEARDSCP)
 			red_flags |= RIOF_CLEARDSCP;
 #endif
 		if (m2 < 8)
 			red_pkttime = 1000 * 1000 * 1000; /* 1 sec */
 		else
 			red_pkttime = (int64_t)hif->hif_ifq->altq_ifp->if_mtu
 				* 1000 * 1000 * 1000 / (m2 / 8);
 		if (flags & HFCF_RED) {
 			cl->cl_red = red_alloc(0, 0,
 			    qlimit(cl->cl_q) * 10/100,
 			    qlimit(cl->cl_q) * 30/100,
 			    red_flags, red_pkttime);
 			if (cl->cl_red != NULL)
 				qtype(cl->cl_q) = Q_RED;
 		}
 #ifdef ALTQ_RIO
 		else {
 			cl->cl_red = (red_t *)rio_alloc(0, NULL,
 			    red_flags, red_pkttime);
 			if (cl->cl_red != NULL)
 				qtype(cl->cl_q) = Q_RIO;
 		}
 #endif
 	}
 #endif /* ALTQ_RED */
 
 	if (rsc != NULL && (rsc->m1 != 0 || rsc->m2 != 0)) {
 		cl->cl_rsc = malloc(sizeof(struct internal_sc),
 		    M_DEVBUF, M_NOWAIT);
 		if (cl->cl_rsc == NULL)
 			goto err_ret;
 		sc2isc(rsc, cl->cl_rsc);
 		rtsc_init(&cl->cl_deadline, cl->cl_rsc, 0, 0);
 		rtsc_init(&cl->cl_eligible, cl->cl_rsc, 0, 0);
 	}
 	if (fsc != NULL && (fsc->m1 != 0 || fsc->m2 != 0)) {
 		cl->cl_fsc = malloc(sizeof(struct internal_sc),
 		    M_DEVBUF, M_NOWAIT);
 		if (cl->cl_fsc == NULL)
 			goto err_ret;
 		sc2isc(fsc, cl->cl_fsc);
 		rtsc_init(&cl->cl_virtual, cl->cl_fsc, 0, 0);
 	}
 	if (usc != NULL && (usc->m1 != 0 || usc->m2 != 0)) {
 		cl->cl_usc = malloc(sizeof(struct internal_sc),
 		    M_DEVBUF, M_NOWAIT);
 		if (cl->cl_usc == NULL)
 			goto err_ret;
 		sc2isc(usc, cl->cl_usc);
 		rtsc_init(&cl->cl_ulimit, cl->cl_usc, 0, 0);
 	}
 
 	cl->cl_id = hif->hif_classid++;
 	cl->cl_handle = qid;
 	cl->cl_hif = hif;
 	cl->cl_parent = parent;
 
 #ifdef __NetBSD__
 	s = splnet();
 #else
 	s = splimp();
 #endif
 	IFQ_LOCK(hif->hif_ifq);
 	hif->hif_classes++;
 
 	/*
 	 * find a free slot in the class table.  if the slot matching
 	 * the lower bits of qid is free, use this slot.  otherwise,
 	 * use the first free slot.
 	 */
 	i = qid % HFSC_MAX_CLASSES;
 	if (hif->hif_class_tbl[i] == NULL)
 		hif->hif_class_tbl[i] = cl;
 	else {
 		for (i = 0; i < HFSC_MAX_CLASSES; i++)
 			if (hif->hif_class_tbl[i] == NULL) {
 				hif->hif_class_tbl[i] = cl;
 				break;
 			}
 		if (i == HFSC_MAX_CLASSES) {
 			IFQ_UNLOCK(hif->hif_ifq);
 			splx(s);
 			goto err_ret;
 		}
 	}
 
 	if (flags & HFCF_DEFAULTCLASS)
 		hif->hif_defaultclass = cl;
 
 	if (parent == NULL) {
 		/* this is root class */
 		hif->hif_rootclass = cl;
 	} else {
 		/* add this class to the children list of the parent */
 		if ((p = parent->cl_children) == NULL)
 			parent->cl_children = cl;
 		else {
 			while (p->cl_siblings != NULL)
 				p = p->cl_siblings;
 			p->cl_siblings = cl;
 		}
 	}
 	IFQ_UNLOCK(hif->hif_ifq);
 	splx(s);
 
 	return (cl);
 
  err_ret:
 	if (cl->cl_red != NULL) {
 #ifdef ALTQ_RIO
 		if (q_is_rio(cl->cl_q))
 			rio_destroy((rio_t *)cl->cl_red);
 #endif
 #ifdef ALTQ_RED
 		if (q_is_red(cl->cl_q))
 			red_destroy(cl->cl_red);
 #endif
 	}
 	if (cl->cl_fsc != NULL)
 		free(cl->cl_fsc, M_DEVBUF);
 	if (cl->cl_rsc != NULL)
 		free(cl->cl_rsc, M_DEVBUF);
 	if (cl->cl_usc != NULL)
 		free(cl->cl_usc, M_DEVBUF);
 	if (cl->cl_q != NULL)
 		free(cl->cl_q, M_DEVBUF);
 	free(cl, M_DEVBUF);
 	return (NULL);
 }
 
 static int
 hfsc_class_destroy(struct hfsc_class *cl)
 {
 	int i, s;
 
 	if (cl == NULL)
 		return (0);
 
 	if (is_a_parent_class(cl))
 		return (EBUSY);
 
 #ifdef __NetBSD__
 	s = splnet();
 #else
 	s = splimp();
 #endif
 	IFQ_LOCK(cl->cl_hif->hif_ifq);
 
 #ifdef ALTQ3_COMPAT
 	/* delete filters referencing to this class */
 	acc_discard_filters(&cl->cl_hif->hif_classifier, cl, 0);
 #endif /* ALTQ3_COMPAT */
 
 	if (!qempty(cl->cl_q))
 		hfsc_purgeq(cl);
 
 	if (cl->cl_parent == NULL) {
 		/* this is root class */
 	} else {
 		struct hfsc_class *p = cl->cl_parent->cl_children;
 
 		if (p == cl)
 			cl->cl_parent->cl_children = cl->cl_siblings;
 		else do {
 			if (p->cl_siblings == cl) {
 				p->cl_siblings = cl->cl_siblings;
 				break;
 			}
 		} while ((p = p->cl_siblings) != NULL);
 		ASSERT(p != NULL);
 	}
 
 	for (i = 0; i < HFSC_MAX_CLASSES; i++)
 		if (cl->cl_hif->hif_class_tbl[i] == cl) {
 			cl->cl_hif->hif_class_tbl[i] = NULL;
 			break;
 		}
 
 	cl->cl_hif->hif_classes--;
 	IFQ_UNLOCK(cl->cl_hif->hif_ifq);
 	splx(s);
 
 	if (cl->cl_red != NULL) {
 #ifdef ALTQ_RIO
 		if (q_is_rio(cl->cl_q))
 			rio_destroy((rio_t *)cl->cl_red);
 #endif
 #ifdef ALTQ_RED
 		if (q_is_red(cl->cl_q))
 			red_destroy(cl->cl_red);
 #endif
 	}
 
 	IFQ_LOCK(cl->cl_hif->hif_ifq);
 	if (cl == cl->cl_hif->hif_rootclass)
 		cl->cl_hif->hif_rootclass = NULL;
 	if (cl == cl->cl_hif->hif_defaultclass)
 		cl->cl_hif->hif_defaultclass = NULL;
 	IFQ_UNLOCK(cl->cl_hif->hif_ifq);
 
 	if (cl->cl_usc != NULL)
 		free(cl->cl_usc, M_DEVBUF);
 	if (cl->cl_fsc != NULL)
 		free(cl->cl_fsc, M_DEVBUF);
 	if (cl->cl_rsc != NULL)
 		free(cl->cl_rsc, M_DEVBUF);
 	free(cl->cl_q, M_DEVBUF);
 	free(cl, M_DEVBUF);
 
 	return (0);
 }
 
 /*
  * hfsc_nextclass returns the next class in the tree.
  *   usage:
  *	for (cl = hif->hif_rootclass; cl != NULL; cl = hfsc_nextclass(cl))
  *		do_something;
  */
 static struct hfsc_class *
 hfsc_nextclass(struct hfsc_class *cl)
 {
 	if (cl->cl_children != NULL)
 		cl = cl->cl_children;
 	else if (cl->cl_siblings != NULL)
 		cl = cl->cl_siblings;
 	else {
 		while ((cl = cl->cl_parent) != NULL)
 			if (cl->cl_siblings) {
 				cl = cl->cl_siblings;
 				break;
 			}
 	}
 
 	return (cl);
 }
 
 /*
  * hfsc_enqueue is an enqueue function to be registered to
  * (*altq_enqueue) in struct ifaltq.
  */
 static int
 hfsc_enqueue(struct ifaltq *ifq, struct mbuf *m, struct altq_pktattr *pktattr)
 {
 	struct hfsc_if	*hif = (struct hfsc_if *)ifq->altq_disc;
 	struct hfsc_class *cl;
 	struct pf_mtag *t;
 	int len;
 
 	IFQ_LOCK_ASSERT(ifq);
 
 	/* grab class set by classifier */
 	if ((m->m_flags & M_PKTHDR) == 0) {
 		/* should not happen */
 		printf("altq: packet for %s does not have pkthdr\n",
 		    ifq->altq_ifp->if_xname);
 		m_freem(m);
 		return (ENOBUFS);
 	}
 	cl = NULL;
 	if ((t = pf_find_mtag(m)) != NULL)
 		cl = clh_to_clp(hif, t->qid);
 #ifdef ALTQ3_COMPAT
 	else if ((ifq->altq_flags & ALTQF_CLASSIFY) && pktattr != NULL)
 		cl = pktattr->pattr_class;
 #endif
 	if (cl == NULL || is_a_parent_class(cl)) {
 		cl = hif->hif_defaultclass;
 		if (cl == NULL) {
 			m_freem(m);
 			return (ENOBUFS);
 		}
 	}
 #ifdef ALTQ3_COMPAT
 	if (pktattr != NULL)
 		cl->cl_pktattr = pktattr;  /* save proto hdr used by ECN */
 	else
 #endif
 		cl->cl_pktattr = NULL;
 	len = m_pktlen(m);
 	if (hfsc_addq(cl, m) != 0) {
 		/* drop occurred.  mbuf was freed in hfsc_addq. */
 		PKTCNTR_ADD(&cl->cl_stats.drop_cnt, len);
 		return (ENOBUFS);
 	}
 	IFQ_INC_LEN(ifq);
 	cl->cl_hif->hif_packets++;
 
 	/* successfully queued. */
 	if (qlen(cl->cl_q) == 1)
 		set_active(cl, m_pktlen(m));
 
 	return (0);
 }
 
 /*
  * hfsc_dequeue is a dequeue function to be registered to
  * (*altq_dequeue) in struct ifaltq.
  *
  * note: ALTDQ_POLL returns the next packet without removing the packet
  *	from the queue.  ALTDQ_REMOVE is a normal dequeue operation.
  *	ALTDQ_REMOVE must return the same packet if called immediately
  *	after ALTDQ_POLL.
  */
 static struct mbuf *
 hfsc_dequeue(struct ifaltq *ifq, int op)
 {
 	struct hfsc_if	*hif = (struct hfsc_if *)ifq->altq_disc;
 	struct hfsc_class *cl;
 	struct mbuf *m;
 	int len, next_len;
 	int realtime = 0;
 	u_int64_t cur_time;
 
 	IFQ_LOCK_ASSERT(ifq);
 
 	if (hif->hif_packets == 0)
 		/* no packet in the tree */
 		return (NULL);
 
 	cur_time = read_machclk();
 
 	if (op == ALTDQ_REMOVE && hif->hif_pollcache != NULL) {
 
 		cl = hif->hif_pollcache;
 		hif->hif_pollcache = NULL;
 		/* check if the class was scheduled by real-time criteria */
 		if (cl->cl_rsc != NULL)
 			realtime = (cl->cl_e <= cur_time);
 	} else {
 		/*
 		 * if there are eligible classes, use real-time criteria.
 		 * find the class with the minimum deadline among
 		 * the eligible classes.
 		 */
 		if ((cl = hfsc_get_mindl(hif, cur_time))
 		    != NULL) {
 			realtime = 1;
 		} else {
 #ifdef ALTQ_DEBUG
 			int fits = 0;
 #endif
 			/*
 			 * use link-sharing criteria
 			 * get the class with the minimum vt in the hierarchy
 			 */
 			cl = hif->hif_rootclass;
 			while (is_a_parent_class(cl)) {
 
 				cl = actlist_firstfit(cl, cur_time);
 				if (cl == NULL) {
 #ifdef ALTQ_DEBUG
 					if (fits > 0)
 						printf("%d fit but none found\n",fits);
 #endif
 					return (NULL);
 				}
 				/*
 				 * update parent's cl_cvtmin.
 				 * don't update if the new vt is smaller.
 				 */
 				if (cl->cl_parent->cl_cvtmin < cl->cl_vt)
 					cl->cl_parent->cl_cvtmin = cl->cl_vt;
 #ifdef ALTQ_DEBUG
 				fits++;
 #endif
 			}
 		}
 
 		if (op == ALTDQ_POLL) {
 			hif->hif_pollcache = cl;
 			m = hfsc_pollq(cl);
 			return (m);
 		}
 	}
 
 	m = hfsc_getq(cl);
 	if (m == NULL)
 		panic("hfsc_dequeue:");
 	len = m_pktlen(m);
 	cl->cl_hif->hif_packets--;
 	IFQ_DEC_LEN(ifq);
 	PKTCNTR_ADD(&cl->cl_stats.xmit_cnt, len);
 
 	update_vf(cl, len, cur_time);
 	if (realtime)
 		cl->cl_cumul += len;
 
 	if (!qempty(cl->cl_q)) {
 		if (cl->cl_rsc != NULL) {
 			/* update ed */
 			next_len = m_pktlen(qhead(cl->cl_q));
 
 			if (realtime)
 				update_ed(cl, next_len);
 			else
 				update_d(cl, next_len);
 		}
 	} else {
 		/* the class becomes passive */
 		set_passive(cl);
 	}
 
 	return (m);
 }
 
 static int
 hfsc_addq(struct hfsc_class *cl, struct mbuf *m)
 {
 
 #ifdef ALTQ_RIO
 	if (q_is_rio(cl->cl_q))
 		return rio_addq((rio_t *)cl->cl_red, cl->cl_q,
 				m, cl->cl_pktattr);
 #endif
 #ifdef ALTQ_RED
 	if (q_is_red(cl->cl_q))
 		return red_addq(cl->cl_red, cl->cl_q, m, cl->cl_pktattr);
 #endif
 	if (qlen(cl->cl_q) >= qlimit(cl->cl_q)) {
 		m_freem(m);
 		return (-1);
 	}
 
 	if (cl->cl_flags & HFCF_CLEARDSCP)
 		write_dsfield(m, cl->cl_pktattr, 0);
 
 	_addq(cl->cl_q, m);
 
 	return (0);
 }
 
 static struct mbuf *
 hfsc_getq(struct hfsc_class *cl)
 {
 #ifdef ALTQ_RIO
 	if (q_is_rio(cl->cl_q))
 		return rio_getq((rio_t *)cl->cl_red, cl->cl_q);
 #endif
 #ifdef ALTQ_RED
 	if (q_is_red(cl->cl_q))
 		return red_getq(cl->cl_red, cl->cl_q);
 #endif
 	return _getq(cl->cl_q);
 }
 
 static struct mbuf *
 hfsc_pollq(struct hfsc_class *cl)
 {
 	return qhead(cl->cl_q);
 }
 
 static void
 hfsc_purgeq(struct hfsc_class *cl)
 {
 	struct mbuf *m;
 
 	if (qempty(cl->cl_q))
 		return;
 
 	while ((m = _getq(cl->cl_q)) != NULL) {
 		PKTCNTR_ADD(&cl->cl_stats.drop_cnt, m_pktlen(m));
 		m_freem(m);
 		cl->cl_hif->hif_packets--;
 		IFQ_DEC_LEN(cl->cl_hif->hif_ifq);
 	}
 	ASSERT(qlen(cl->cl_q) == 0);
 
 	update_vf(cl, 0, 0);	/* remove cl from the actlist */
 	set_passive(cl);
 }
 
 static void
 set_active(struct hfsc_class *cl, int len)
 {
 	if (cl->cl_rsc != NULL)
 		init_ed(cl, len);
 	if (cl->cl_fsc != NULL)
 		init_vf(cl, len);
 
 	cl->cl_stats.period++;
 }
 
 static void
 set_passive(struct hfsc_class *cl)
 {
 	if (cl->cl_rsc != NULL)
 		ellist_remove(cl);
 
 	/*
 	 * actlist is now handled in update_vf() so that update_vf(cl, 0, 0)
 	 * needs to be called explicitly to remove a class from actlist
 	 */
 }
 
 static void
 init_ed(struct hfsc_class *cl, int next_len)
 {
 	u_int64_t cur_time;
 
 	cur_time = read_machclk();
 
 	/* update the deadline curve */
 	rtsc_min(&cl->cl_deadline, cl->cl_rsc, cur_time, cl->cl_cumul);
 
 	/*
 	 * update the eligible curve.
 	 * for concave, it is equal to the deadline curve.
 	 * for convex, it is a linear curve with slope m2.
 	 */
 	cl->cl_eligible = cl->cl_deadline;
 	if (cl->cl_rsc->sm1 <= cl->cl_rsc->sm2) {
 		cl->cl_eligible.dx = 0;
 		cl->cl_eligible.dy = 0;
 	}
 
 	/* compute e and d */
 	cl->cl_e = rtsc_y2x(&cl->cl_eligible, cl->cl_cumul);
 	cl->cl_d = rtsc_y2x(&cl->cl_deadline, cl->cl_cumul + next_len);
 
 	ellist_insert(cl);
 }
 
 static void
 update_ed(struct hfsc_class *cl, int next_len)
 {
 	cl->cl_e = rtsc_y2x(&cl->cl_eligible, cl->cl_cumul);
 	cl->cl_d = rtsc_y2x(&cl->cl_deadline, cl->cl_cumul + next_len);
 
 	ellist_update(cl);
 }
 
 static void
 update_d(struct hfsc_class *cl, int next_len)
 {
 	cl->cl_d = rtsc_y2x(&cl->cl_deadline, cl->cl_cumul + next_len);
 }
 
 static void
 init_vf(struct hfsc_class *cl, int len)
 {
 	struct hfsc_class *max_cl, *p;
 	u_int64_t vt, f, cur_time;
 	int go_active;
 
 	cur_time = 0;
 	go_active = 1;
 	for ( ; cl->cl_parent != NULL; cl = cl->cl_parent) {
 
 		if (go_active && cl->cl_nactive++ == 0)
 			go_active = 1;
 		else
 			go_active = 0;
 
 		if (go_active) {
 			max_cl = TAILQ_LAST(&cl->cl_parent->cl_actc, acthead);
 			if (max_cl != NULL) {
 				/*
 				 * set vt to the average of the min and max
 				 * classes.  if the parent's period didn't
 				 * change, don't decrease vt of the class.
 				 */
 				vt = max_cl->cl_vt;
 				if (cl->cl_parent->cl_cvtmin != 0)
 					vt = (cl->cl_parent->cl_cvtmin + vt)/2;
 
 				if (cl->cl_parent->cl_vtperiod !=
 				    cl->cl_parentperiod || vt > cl->cl_vt)
 					cl->cl_vt = vt;
 			} else {
 				/*
 				 * first child for a new parent backlog period.
 				 * add parent's cvtmax to vtoff of children
 				 * to make a new vt (vtoff + vt) larger than
 				 * the vt in the last period for all children.
 				 */
 				vt = cl->cl_parent->cl_cvtmax;
 				for (p = cl->cl_parent->cl_children; p != NULL;
 				     p = p->cl_siblings)
 					p->cl_vtoff += vt;
 				cl->cl_vt = 0;
 				cl->cl_parent->cl_cvtmax = 0;
 				cl->cl_parent->cl_cvtmin = 0;
 			}
 			cl->cl_initvt = cl->cl_vt;
 
 			/* update the virtual curve */
 			vt = cl->cl_vt + cl->cl_vtoff;
 			rtsc_min(&cl->cl_virtual, cl->cl_fsc, vt, cl->cl_total);
 			if (cl->cl_virtual.x == vt) {
 				cl->cl_virtual.x -= cl->cl_vtoff;
 				cl->cl_vtoff = 0;
 			}
 			cl->cl_vtadj = 0;
 
 			cl->cl_vtperiod++;  /* increment vt period */
 			cl->cl_parentperiod = cl->cl_parent->cl_vtperiod;
 			if (cl->cl_parent->cl_nactive == 0)
 				cl->cl_parentperiod++;
 			cl->cl_f = 0;
 
 			actlist_insert(cl);
 
 			if (cl->cl_usc != NULL) {
 				/* class has upper limit curve */
 				if (cur_time == 0)
 					cur_time = read_machclk();
 
 				/* update the ulimit curve */
 				rtsc_min(&cl->cl_ulimit, cl->cl_usc, cur_time,
 				    cl->cl_total);
 				/* compute myf */
 				cl->cl_myf = rtsc_y2x(&cl->cl_ulimit,
 				    cl->cl_total);
 				cl->cl_myfadj = 0;
 			}
 		}
 
 		if (cl->cl_myf > cl->cl_cfmin)
 			f = cl->cl_myf;
 		else
 			f = cl->cl_cfmin;
 		if (f != cl->cl_f) {
 			cl->cl_f = f;
 			update_cfmin(cl->cl_parent);
 		}
 	}
 }
 
 static void
 update_vf(struct hfsc_class *cl, int len, u_int64_t cur_time)
 {
 	u_int64_t f, myf_bound, delta;
 	int go_passive;
 
 	go_passive = qempty(cl->cl_q);
 
 	for (; cl->cl_parent != NULL; cl = cl->cl_parent) {
 
 		cl->cl_total += len;
 
 		if (cl->cl_fsc == NULL || cl->cl_nactive == 0)
 			continue;
 
 		if (go_passive && --cl->cl_nactive == 0)
 			go_passive = 1;
 		else
 			go_passive = 0;
 
 		if (go_passive) {
 			/* no more active child, going passive */
 
 			/* update cvtmax of the parent class */
 			if (cl->cl_vt > cl->cl_parent->cl_cvtmax)
 				cl->cl_parent->cl_cvtmax = cl->cl_vt;
 
 			/* remove this class from the vt list */
 			actlist_remove(cl);
 
 			update_cfmin(cl->cl_parent);
 
 			continue;
 		}
 
 		/*
 		 * update vt and f
 		 */
 		cl->cl_vt = rtsc_y2x(&cl->cl_virtual, cl->cl_total)
 		    - cl->cl_vtoff + cl->cl_vtadj;
 
 		/*
 		 * if vt of the class is smaller than cvtmin,
 		 * the class was skipped in the past due to non-fit.
 		 * if so, we need to adjust vtadj.
 		 */
 		if (cl->cl_vt < cl->cl_parent->cl_cvtmin) {
 			cl->cl_vtadj += cl->cl_parent->cl_cvtmin - cl->cl_vt;
 			cl->cl_vt = cl->cl_parent->cl_cvtmin;
 		}
 
 		/* update the vt list */
 		actlist_update(cl);
 
 		if (cl->cl_usc != NULL) {
 			cl->cl_myf = cl->cl_myfadj
 			    + rtsc_y2x(&cl->cl_ulimit, cl->cl_total);
 
 			/*
 			 * if myf lags behind by more than one clock tick
 			 * from the current time, adjust myfadj to prevent
 			 * a rate-limited class from going greedy.
 			 * in a steady state under rate-limiting, myf
 			 * fluctuates within one clock tick.
 			 */
 			myf_bound = cur_time - machclk_per_tick;
 			if (cl->cl_myf < myf_bound) {
 				delta = cur_time - cl->cl_myf;
 				cl->cl_myfadj += delta;
 				cl->cl_myf += delta;
 			}
 		}
 
 		/* cl_f is max(cl_myf, cl_cfmin) */
 		if (cl->cl_myf > cl->cl_cfmin)
 			f = cl->cl_myf;
 		else
 			f = cl->cl_cfmin;
 		if (f != cl->cl_f) {
 			cl->cl_f = f;
 			update_cfmin(cl->cl_parent);
 		}
 	}
 }
 
 static void
 update_cfmin(struct hfsc_class *cl)
 {
 	struct hfsc_class *p;
 	u_int64_t cfmin;
 
 	if (TAILQ_EMPTY(&cl->cl_actc)) {
 		cl->cl_cfmin = 0;
 		return;
 	}
 	cfmin = HT_INFINITY;
 	TAILQ_FOREACH(p, &cl->cl_actc, cl_actlist) {
 		if (p->cl_f == 0) {
 			cl->cl_cfmin = 0;
 			return;
 		}
 		if (p->cl_f < cfmin)
 			cfmin = p->cl_f;
 	}
 	cl->cl_cfmin = cfmin;
 }
 
 /*
  * TAILQ based ellist and actlist implementation
  * (ion wanted to make a calendar queue based implementation)
  */
 /*
  * eligible list holds backlogged classes being sorted by their eligible times.
  * there is one eligible list per interface.
  */
 
 static void
 ellist_insert(struct hfsc_class *cl)
 {
 	struct hfsc_if	*hif = cl->cl_hif;
 	struct hfsc_class *p;
 
 	/* check the last entry first */
 	if ((p = TAILQ_LAST(&hif->hif_eligible, elighead)) == NULL ||
 	    p->cl_e <= cl->cl_e) {
 		TAILQ_INSERT_TAIL(&hif->hif_eligible, cl, cl_ellist);
 		return;
 	}
 
 	TAILQ_FOREACH(p, &hif->hif_eligible, cl_ellist) {
 		if (cl->cl_e < p->cl_e) {
 			TAILQ_INSERT_BEFORE(p, cl, cl_ellist);
 			return;
 		}
 	}
 	ASSERT(0); /* should not reach here */
 }
 
 static void
 ellist_remove(struct hfsc_class *cl)
 {
 	struct hfsc_if	*hif = cl->cl_hif;
 
 	TAILQ_REMOVE(&hif->hif_eligible, cl, cl_ellist);
 }
 
 static void
 ellist_update(struct hfsc_class *cl)
 {
 	struct hfsc_if	*hif = cl->cl_hif;
 	struct hfsc_class *p, *last;
 
 	/*
 	 * the eligible time of a class increases monotonically.
 	 * if the next entry has a larger eligible time, nothing to do.
 	 */
 	p = TAILQ_NEXT(cl, cl_ellist);
 	if (p == NULL || cl->cl_e <= p->cl_e)
 		return;
 
 	/* check the last entry */
 	last = TAILQ_LAST(&hif->hif_eligible, elighead);
 	ASSERT(last != NULL);
 	if (last->cl_e <= cl->cl_e) {
 		TAILQ_REMOVE(&hif->hif_eligible, cl, cl_ellist);
 		TAILQ_INSERT_TAIL(&hif->hif_eligible, cl, cl_ellist);
 		return;
 	}
 
 	/*
 	 * the new position must be between the next entry
 	 * and the last entry
 	 */
 	while ((p = TAILQ_NEXT(p, cl_ellist)) != NULL) {
 		if (cl->cl_e < p->cl_e) {
 			TAILQ_REMOVE(&hif->hif_eligible, cl, cl_ellist);
 			TAILQ_INSERT_BEFORE(p, cl, cl_ellist);
 			return;
 		}
 	}
 	ASSERT(0); /* should not reach here */
 }
 
 /* find the class with the minimum deadline among the eligible classes */
 struct hfsc_class *
 hfsc_get_mindl(struct hfsc_if *hif, u_int64_t cur_time)
 {
 	struct hfsc_class *p, *cl = NULL;
 
 	TAILQ_FOREACH(p, &hif->hif_eligible, cl_ellist) {
 		if (p->cl_e > cur_time)
 			break;
 		if (cl == NULL || p->cl_d < cl->cl_d)
 			cl = p;
 	}
 	return (cl);
 }
 
 /*
  * active children list holds backlogged child classes being sorted
  * by their virtual time.
  * each intermediate class has one active children list.
  */
 
 static void
 actlist_insert(struct hfsc_class *cl)
 {
 	struct hfsc_class *p;
 
 	/* check the last entry first */
 	if ((p = TAILQ_LAST(&cl->cl_parent->cl_actc, acthead)) == NULL
 	    || p->cl_vt <= cl->cl_vt) {
 		TAILQ_INSERT_TAIL(&cl->cl_parent->cl_actc, cl, cl_actlist);
 		return;
 	}
 
 	TAILQ_FOREACH(p, &cl->cl_parent->cl_actc, cl_actlist) {
 		if (cl->cl_vt < p->cl_vt) {
 			TAILQ_INSERT_BEFORE(p, cl, cl_actlist);
 			return;
 		}
 	}
 	ASSERT(0); /* should not reach here */
 }
 
 static void
 actlist_remove(struct hfsc_class *cl)
 {
 	TAILQ_REMOVE(&cl->cl_parent->cl_actc, cl, cl_actlist);
 }
 
 static void
 actlist_update(struct hfsc_class *cl)
 {
 	struct hfsc_class *p, *last;
 
 	/*
 	 * the virtual time of a class increases monotonically during its
 	 * backlogged period.
 	 * if the next entry has a larger virtual time, nothing to do.
 	 */
 	p = TAILQ_NEXT(cl, cl_actlist);
 	if (p == NULL || cl->cl_vt < p->cl_vt)
 		return;
 
 	/* check the last entry */
 	last = TAILQ_LAST(&cl->cl_parent->cl_actc, acthead);
 	ASSERT(last != NULL);
 	if (last->cl_vt <= cl->cl_vt) {
 		TAILQ_REMOVE(&cl->cl_parent->cl_actc, cl, cl_actlist);
 		TAILQ_INSERT_TAIL(&cl->cl_parent->cl_actc, cl, cl_actlist);
 		return;
 	}
 
 	/*
 	 * the new position must be between the next entry
 	 * and the last entry
 	 */
 	while ((p = TAILQ_NEXT(p, cl_actlist)) != NULL) {
 		if (cl->cl_vt < p->cl_vt) {
 			TAILQ_REMOVE(&cl->cl_parent->cl_actc, cl, cl_actlist);
 			TAILQ_INSERT_BEFORE(p, cl, cl_actlist);
 			return;
 		}
 	}
 	ASSERT(0); /* should not reach here */
 }
 
 static struct hfsc_class *
 actlist_firstfit(struct hfsc_class *cl, u_int64_t cur_time)
 {
 	struct hfsc_class *p;
 
 	TAILQ_FOREACH(p, &cl->cl_actc, cl_actlist) {
 		if (p->cl_f <= cur_time)
 			return (p);
 	}
 	return (NULL);
 }
 
 /*
  * service curve support functions
  *
  *  external service curve parameters
  *	m: bits/sec
  *	d: msec
  *  internal service curve parameters
  *	sm: (bytes/tsc_interval) << SM_SHIFT
  *	ism: (tsc_count/byte) << ISM_SHIFT
  *	dx: tsc_count
  *
  * SM_SHIFT and ISM_SHIFT are scaled in order to keep effective digits.
  * we should be able to handle 100K-1Gbps linkspeed with 200Hz-1GHz CPU
  * speed.  SM_SHIFT and ISM_SHIFT are selected to have at least 3 effective
  * digits in decimal using the following table.
  *
  *  bits/sec    100Kbps     1Mbps     10Mbps     100Mbps    1Gbps
  *  ----------+-------------------------------------------------------
  *  bytes/nsec  12.5e-6    125e-6     1250e-6    12500e-6   125000e-6
  *  sm(500MHz)  25.0e-6    250e-6     2500e-6    25000e-6   250000e-6
  *  sm(200MHz)  62.5e-6    625e-6     6250e-6    62500e-6   625000e-6
  *
  *  nsec/byte   80000      8000       800        80         8
  *  ism(500MHz) 40000      4000       400        40         4
  *  ism(200MHz) 16000      1600       160        16         1.6
  */
 #define	SM_SHIFT	24
 #define	ISM_SHIFT	10
 
 #define	SM_MASK		((1LL << SM_SHIFT) - 1)
 #define	ISM_MASK	((1LL << ISM_SHIFT) - 1)
 
 static __inline u_int64_t
 seg_x2y(u_int64_t x, u_int64_t sm)
 {
 	u_int64_t y;
 
 	/*
 	 * compute
 	 *	y = x * sm >> SM_SHIFT
 	 * but divide it for the upper and lower bits to avoid overflow
 	 */
 	y = (x >> SM_SHIFT) * sm + (((x & SM_MASK) * sm) >> SM_SHIFT);
 	return (y);
 }
 
 static __inline u_int64_t
 seg_y2x(u_int64_t y, u_int64_t ism)
 {
 	u_int64_t x;
 
 	if (y == 0)
 		x = 0;
 	else if (ism == HT_INFINITY)
 		x = HT_INFINITY;
 	else {
 		x = (y >> ISM_SHIFT) * ism
 		    + (((y & ISM_MASK) * ism) >> ISM_SHIFT);
 	}
 	return (x);
 }
 
 static __inline u_int64_t
 m2sm(u_int m)
 {
 	u_int64_t sm;
 
 	sm = ((u_int64_t)m << SM_SHIFT) / 8 / machclk_freq;
 	return (sm);
 }
 
 static __inline u_int64_t
 m2ism(u_int m)
 {
 	u_int64_t ism;
 
 	if (m == 0)
 		ism = HT_INFINITY;
 	else
 		ism = ((u_int64_t)machclk_freq << ISM_SHIFT) * 8 / m;
 	return (ism);
 }
 
 static __inline u_int64_t
 d2dx(u_int d)
 {
 	u_int64_t dx;
 
 	dx = ((u_int64_t)d * machclk_freq) / 1000;
 	return (dx);
 }
 
 static u_int
 sm2m(u_int64_t sm)
 {
 	u_int64_t m;
 
 	m = (sm * 8 * machclk_freq) >> SM_SHIFT;
 	return ((u_int)m);
 }
 
 static u_int
 dx2d(u_int64_t dx)
 {
 	u_int64_t d;
 
 	d = dx * 1000 / machclk_freq;
 	return ((u_int)d);
 }
 
 static void
 sc2isc(struct service_curve *sc, struct internal_sc *isc)
 {
 	isc->sm1 = m2sm(sc->m1);
 	isc->ism1 = m2ism(sc->m1);
 	isc->dx = d2dx(sc->d);
 	isc->dy = seg_x2y(isc->dx, isc->sm1);
 	isc->sm2 = m2sm(sc->m2);
 	isc->ism2 = m2ism(sc->m2);
 }
 
 /*
  * initialize the runtime service curve with the given internal
  * service curve starting at (x, y).
  */
 static void
 rtsc_init(struct runtime_sc *rtsc, struct internal_sc * isc, u_int64_t x,
     u_int64_t y)
 {
 	rtsc->x =	x;
 	rtsc->y =	y;
 	rtsc->sm1 =	isc->sm1;
 	rtsc->ism1 =	isc->ism1;
 	rtsc->dx =	isc->dx;
 	rtsc->dy =	isc->dy;
 	rtsc->sm2 =	isc->sm2;
 	rtsc->ism2 =	isc->ism2;
 }
 
 /*
  * calculate the y-projection of the runtime service curve by the
  * given x-projection value
  */
 static u_int64_t
 rtsc_y2x(struct runtime_sc *rtsc, u_int64_t y)
 {
 	u_int64_t	x;
 
 	if (y < rtsc->y)
 		x = rtsc->x;
 	else if (y <= rtsc->y + rtsc->dy) {
 		/* x belongs to the 1st segment */
 		if (rtsc->dy == 0)
 			x = rtsc->x + rtsc->dx;
 		else
 			x = rtsc->x + seg_y2x(y - rtsc->y, rtsc->ism1);
 	} else {
 		/* x belongs to the 2nd segment */
 		x = rtsc->x + rtsc->dx
 		    + seg_y2x(y - rtsc->y - rtsc->dy, rtsc->ism2);
 	}
 	return (x);
 }
 
 static u_int64_t
 rtsc_x2y(struct runtime_sc *rtsc, u_int64_t x)
 {
 	u_int64_t	y;
 
 	if (x <= rtsc->x)
 		y = rtsc->y;
 	else if (x <= rtsc->x + rtsc->dx)
 		/* y belongs to the 1st segment */
 		y = rtsc->y + seg_x2y(x - rtsc->x, rtsc->sm1);
 	else
 		/* y belongs to the 2nd segment */
 		y = rtsc->y + rtsc->dy
 		    + seg_x2y(x - rtsc->x - rtsc->dx, rtsc->sm2);
 	return (y);
 }
 
 /*
  * update the runtime service curve by taking the minimum of the current
  * runtime service curve and the service curve starting at (x, y).
  */
 static void
 rtsc_min(struct runtime_sc *rtsc, struct internal_sc *isc, u_int64_t x,
     u_int64_t y)
 {
 	u_int64_t	y1, y2, dx, dy;
 
 	if (isc->sm1 <= isc->sm2) {
 		/* service curve is convex */
 		y1 = rtsc_x2y(rtsc, x);
 		if (y1 < y)
 			/* the current rtsc is smaller */
 			return;
 		rtsc->x = x;
 		rtsc->y = y;
 		return;
 	}
 
 	/*
 	 * service curve is concave
 	 * compute the two y values of the current rtsc
 	 *	y1: at x
 	 *	y2: at (x + dx)
 	 */
 	y1 = rtsc_x2y(rtsc, x);
 	if (y1 <= y) {
 		/* rtsc is below isc, no change to rtsc */
 		return;
 	}
 
 	y2 = rtsc_x2y(rtsc, x + isc->dx);
 	if (y2 >= y + isc->dy) {
 		/* rtsc is above isc, replace rtsc by isc */
 		rtsc->x = x;
 		rtsc->y = y;
 		rtsc->dx = isc->dx;
 		rtsc->dy = isc->dy;
 		return;
 	}
 
 	/*
 	 * the two curves intersect
 	 * compute the offsets (dx, dy) using the reverse
 	 * function of seg_x2y()
 	 *	seg_x2y(dx, sm1) == seg_x2y(dx, sm2) + (y1 - y)
 	 */
 	dx = ((y1 - y) << SM_SHIFT) / (isc->sm1 - isc->sm2);
 	/*
 	 * check if (x, y1) belongs to the 1st segment of rtsc.
 	 * if so, add the offset.
 	 */
 	if (rtsc->x + rtsc->dx > x)
 		dx += rtsc->x + rtsc->dx - x;
 	dy = seg_x2y(dx, isc->sm1);
 
 	rtsc->x = x;
 	rtsc->y = y;
 	rtsc->dx = dx;
 	rtsc->dy = dy;
 	return;
 }
 
 static void
 get_class_stats(struct hfsc_classstats *sp, struct hfsc_class *cl)
 {
 	sp->class_id = cl->cl_id;
 	sp->class_handle = cl->cl_handle;
 
 	if (cl->cl_rsc != NULL) {
 		sp->rsc.m1 = sm2m(cl->cl_rsc->sm1);
 		sp->rsc.d = dx2d(cl->cl_rsc->dx);
 		sp->rsc.m2 = sm2m(cl->cl_rsc->sm2);
 	} else {
 		sp->rsc.m1 = 0;
 		sp->rsc.d = 0;
 		sp->rsc.m2 = 0;
 	}
 	if (cl->cl_fsc != NULL) {
 		sp->fsc.m1 = sm2m(cl->cl_fsc->sm1);
 		sp->fsc.d = dx2d(cl->cl_fsc->dx);
 		sp->fsc.m2 = sm2m(cl->cl_fsc->sm2);
 	} else {
 		sp->fsc.m1 = 0;
 		sp->fsc.d = 0;
 		sp->fsc.m2 = 0;
 	}
 	if (cl->cl_usc != NULL) {
 		sp->usc.m1 = sm2m(cl->cl_usc->sm1);
 		sp->usc.d = dx2d(cl->cl_usc->dx);
 		sp->usc.m2 = sm2m(cl->cl_usc->sm2);
 	} else {
 		sp->usc.m1 = 0;
 		sp->usc.d = 0;
 		sp->usc.m2 = 0;
 	}
 
 	sp->total = cl->cl_total;
 	sp->cumul = cl->cl_cumul;
 
 	sp->d = cl->cl_d;
 	sp->e = cl->cl_e;
 	sp->vt = cl->cl_vt;
 	sp->f = cl->cl_f;
 
 	sp->initvt = cl->cl_initvt;
 	sp->vtperiod = cl->cl_vtperiod;
 	sp->parentperiod = cl->cl_parentperiod;
 	sp->nactive = cl->cl_nactive;
 	sp->vtoff = cl->cl_vtoff;
 	sp->cvtmax = cl->cl_cvtmax;
 	sp->myf = cl->cl_myf;
 	sp->cfmin = cl->cl_cfmin;
 	sp->cvtmin = cl->cl_cvtmin;
 	sp->myfadj = cl->cl_myfadj;
 	sp->vtadj = cl->cl_vtadj;
 
 	sp->cur_time = read_machclk();
 	sp->machclk_freq = machclk_freq;
 
 	sp->qlength = qlen(cl->cl_q);
 	sp->qlimit = qlimit(cl->cl_q);
 	sp->xmit_cnt = cl->cl_stats.xmit_cnt;
 	sp->drop_cnt = cl->cl_stats.drop_cnt;
 	sp->period = cl->cl_stats.period;
 
 	sp->qtype = qtype(cl->cl_q);
 #ifdef ALTQ_RED
 	if (q_is_red(cl->cl_q))
 		red_getstats(cl->cl_red, &sp->red[0]);
 #endif
 #ifdef ALTQ_RIO
 	if (q_is_rio(cl->cl_q))
 		rio_getstats((rio_t *)cl->cl_red, &sp->red[0]);
 #endif
 }
 
 /* convert a class handle to the corresponding class pointer */
 static struct hfsc_class *
 clh_to_clp(struct hfsc_if *hif, u_int32_t chandle)
 {
 	int i;
 	struct hfsc_class *cl;
 
 	if (chandle == 0)
 		return (NULL);
 	/*
 	 * first, try optimistically the slot matching the lower bits of
 	 * the handle.  if it fails, do the linear table search.
 	 */
 	i = chandle % HFSC_MAX_CLASSES;
 	if ((cl = hif->hif_class_tbl[i]) != NULL && cl->cl_handle == chandle)
 		return (cl);
 	for (i = 0; i < HFSC_MAX_CLASSES; i++)
 		if ((cl = hif->hif_class_tbl[i]) != NULL &&
 		    cl->cl_handle == chandle)
 			return (cl);
 	return (NULL);
 }
 
 #ifdef ALTQ3_COMPAT
 static struct hfsc_if *
 hfsc_attach(ifq, bandwidth)
 	struct ifaltq *ifq;
 	u_int bandwidth;
 {
 	struct hfsc_if *hif;
 
 	hif = malloc(sizeof(struct hfsc_if), M_DEVBUF, M_WAITOK);
 	if (hif == NULL)
 		return (NULL);
 	bzero(hif, sizeof(struct hfsc_if));
 
 	hif->hif_eligible = ellist_alloc();
 	if (hif->hif_eligible == NULL) {
 		free(hif, M_DEVBUF);
 		return NULL;
 	}
 
 	hif->hif_ifq = ifq;
 
 	/* add this state to the hfsc list */
 	hif->hif_next = hif_list;
 	hif_list = hif;
 
 	return (hif);
 }
 
 static int
 hfsc_detach(hif)
 	struct hfsc_if *hif;
 {
 	(void)hfsc_clear_interface(hif);
 	(void)hfsc_class_destroy(hif->hif_rootclass);
 
 	/* remove this interface from the hif list */
 	if (hif_list == hif)
 		hif_list = hif->hif_next;
 	else {
 		struct hfsc_if *h;
 
 		for (h = hif_list; h != NULL; h = h->hif_next)
 			if (h->hif_next == hif) {
 				h->hif_next = hif->hif_next;
 				break;
 			}
 		ASSERT(h != NULL);
 	}
 
 	ellist_destroy(hif->hif_eligible);
 
 	free(hif, M_DEVBUF);
 
 	return (0);
 }
 
 static int
 hfsc_class_modify(cl, rsc, fsc, usc)
 	struct hfsc_class *cl;
 	struct service_curve *rsc, *fsc, *usc;
 {
 	struct internal_sc *rsc_tmp, *fsc_tmp, *usc_tmp;
 	u_int64_t cur_time;
 	int s;
 
 	rsc_tmp = fsc_tmp = usc_tmp = NULL;
 	if (rsc != NULL && (rsc->m1 != 0 || rsc->m2 != 0) &&
 	    cl->cl_rsc == NULL) {
 		rsc_tmp = malloc(sizeof(struct internal_sc),
 		    M_DEVBUF, M_WAITOK);
 		if (rsc_tmp == NULL)
 			return (ENOMEM);
 	}
 	if (fsc != NULL && (fsc->m1 != 0 || fsc->m2 != 0) &&
 	    cl->cl_fsc == NULL) {
 		fsc_tmp = malloc(sizeof(struct internal_sc),
 		    M_DEVBUF, M_WAITOK);
 		if (fsc_tmp == NULL) {
 			free(rsc_tmp);
 			return (ENOMEM);
 		}
 	}
 	if (usc != NULL && (usc->m1 != 0 || usc->m2 != 0) &&
 	    cl->cl_usc == NULL) {
 		usc_tmp = malloc(sizeof(struct internal_sc),
 		    M_DEVBUF, M_WAITOK);
 		if (usc_tmp == NULL) {
 			free(rsc_tmp);
 			free(fsc_tmp);
 			return (ENOMEM);
 		}
 	}
 
 	cur_time = read_machclk();
 #ifdef __NetBSD__
 	s = splnet();
 #else
 	s = splimp();
 #endif
 	IFQ_LOCK(cl->cl_hif->hif_ifq);
 
 	if (rsc != NULL) {
 		if (rsc->m1 == 0 && rsc->m2 == 0) {
 			if (cl->cl_rsc != NULL) {
 				if (!qempty(cl->cl_q))
 					hfsc_purgeq(cl);
 				free(cl->cl_rsc, M_DEVBUF);
 				cl->cl_rsc = NULL;
 			}
 		} else {
 			if (cl->cl_rsc == NULL)
 				cl->cl_rsc = rsc_tmp;
 			sc2isc(rsc, cl->cl_rsc);
 			rtsc_init(&cl->cl_deadline, cl->cl_rsc, cur_time,
 			    cl->cl_cumul);
 			cl->cl_eligible = cl->cl_deadline;
 			if (cl->cl_rsc->sm1 <= cl->cl_rsc->sm2) {
 				cl->cl_eligible.dx = 0;
 				cl->cl_eligible.dy = 0;
 			}
 		}
 	}
 
 	if (fsc != NULL) {
 		if (fsc->m1 == 0 && fsc->m2 == 0) {
 			if (cl->cl_fsc != NULL) {
 				if (!qempty(cl->cl_q))
 					hfsc_purgeq(cl);
 				free(cl->cl_fsc, M_DEVBUF);
 				cl->cl_fsc = NULL;
 			}
 		} else {
 			if (cl->cl_fsc == NULL)
 				cl->cl_fsc = fsc_tmp;
 			sc2isc(fsc, cl->cl_fsc);
 			rtsc_init(&cl->cl_virtual, cl->cl_fsc, cl->cl_vt,
 			    cl->cl_total);
 		}
 	}
 
 	if (usc != NULL) {
 		if (usc->m1 == 0 && usc->m2 == 0) {
 			if (cl->cl_usc != NULL) {
 				free(cl->cl_usc, M_DEVBUF);
 				cl->cl_usc = NULL;
 				cl->cl_myf = 0;
 			}
 		} else {
 			if (cl->cl_usc == NULL)
 				cl->cl_usc = usc_tmp;
 			sc2isc(usc, cl->cl_usc);
 			rtsc_init(&cl->cl_ulimit, cl->cl_usc, cur_time,
 			    cl->cl_total);
 		}
 	}
 
 	if (!qempty(cl->cl_q)) {
 		if (cl->cl_rsc != NULL)
 			update_ed(cl, m_pktlen(qhead(cl->cl_q)));
 		if (cl->cl_fsc != NULL)
 			update_vf(cl, 0, cur_time);
 		/* is this enough? */
 	}
 
 	IFQ_UNLOCK(cl->cl_hif->hif_ifq);
 	splx(s);
 
 	return (0);
 }
 
 /*
  * hfsc device interface
  */
 int
 hfscopen(dev, flag, fmt, p)
 	dev_t dev;
 	int flag, fmt;
 #if (__FreeBSD_version > 500000)
 	struct thread *p;
 #else
 	struct proc *p;
 #endif
 {
 	if (machclk_freq == 0)
 		init_machclk();
 
 	if (machclk_freq == 0) {
 		printf("hfsc: no cpu clock available!\n");
 		return (ENXIO);
 	}
 
 	/* everything will be done when the queueing scheme is attached. */
 	return 0;
 }
 
 int
 hfscclose(dev, flag, fmt, p)
 	dev_t dev;
 	int flag, fmt;
 #if (__FreeBSD_version > 500000)
 	struct thread *p;
 #else
 	struct proc *p;
 #endif
 {
 	struct hfsc_if *hif;
 	int err, error = 0;
 
 	while ((hif = hif_list) != NULL) {
 		/* destroy all */
 		if (ALTQ_IS_ENABLED(hif->hif_ifq))
 			altq_disable(hif->hif_ifq);
 
 		err = altq_detach(hif->hif_ifq);
 		if (err == 0)
 			err = hfsc_detach(hif);
 		if (err != 0 && error == 0)
 			error = err;
 	}
 
 	return error;
 }
 
 int
 hfscioctl(dev, cmd, addr, flag, p)
 	dev_t dev;
 	ioctlcmd_t cmd;
 	caddr_t addr;
 	int flag;
 #if (__FreeBSD_version > 500000)
 	struct thread *p;
 #else
 	struct proc *p;
 #endif
 {
 	struct hfsc_if *hif;
 	struct hfsc_interface *ifacep;
 	int	error = 0;
 
 	/* check super-user privilege */
 	switch (cmd) {
 	case HFSC_GETSTATS:
 		break;
 	default:
 #if (__FreeBSD_version > 700000)
 		if ((error = priv_check(p, PRIV_ALTQ_MANAGE)) != 0)
 			return (error);
 #elsif (__FreeBSD_version > 400000)
 		if ((error = suser(p)) != 0)
 			return (error);
 #else
 		if ((error = suser(p->p_ucred, &p->p_acflag)) != 0)
 			return (error);
 #endif
 		break;
 	}
 
 	switch (cmd) {
 
 	case HFSC_IF_ATTACH:
 		error = hfsccmd_if_attach((struct hfsc_attach *)addr);
 		break;
 
 	case HFSC_IF_DETACH:
 		error = hfsccmd_if_detach((struct hfsc_interface *)addr);
 		break;
 
 	case HFSC_ENABLE:
 	case HFSC_DISABLE:
 	case HFSC_CLEAR_HIERARCHY:
 		ifacep = (struct hfsc_interface *)addr;
 		if ((hif = altq_lookup(ifacep->hfsc_ifname,
 				       ALTQT_HFSC)) == NULL) {
 			error = EBADF;
 			break;
 		}
 
 		switch (cmd) {
 
 		case HFSC_ENABLE:
 			if (hif->hif_defaultclass == NULL) {
 #ifdef ALTQ_DEBUG
 				printf("hfsc: no default class\n");
 #endif
 				error = EINVAL;
 				break;
 			}
 			error = altq_enable(hif->hif_ifq);
 			break;
 
 		case HFSC_DISABLE:
 			error = altq_disable(hif->hif_ifq);
 			break;
 
 		case HFSC_CLEAR_HIERARCHY:
 			hfsc_clear_interface(hif);
 			break;
 		}
 		break;
 
 	case HFSC_ADD_CLASS:
 		error = hfsccmd_add_class((struct hfsc_add_class *)addr);
 		break;
 
 	case HFSC_DEL_CLASS:
 		error = hfsccmd_delete_class((struct hfsc_delete_class *)addr);
 		break;
 
 	case HFSC_MOD_CLASS:
 		error = hfsccmd_modify_class((struct hfsc_modify_class *)addr);
 		break;
 
 	case HFSC_ADD_FILTER:
 		error = hfsccmd_add_filter((struct hfsc_add_filter *)addr);
 		break;
 
 	case HFSC_DEL_FILTER:
 		error = hfsccmd_delete_filter((struct hfsc_delete_filter *)addr);
 		break;
 
 	case HFSC_GETSTATS:
 		error = hfsccmd_class_stats((struct hfsc_class_stats *)addr);
 		break;
 
 	default:
 		error = EINVAL;
 		break;
 	}
 	return error;
 }
 
 static int
 hfsccmd_if_attach(ap)
 	struct hfsc_attach *ap;
 {
 	struct hfsc_if *hif;
 	struct ifnet *ifp;
 	int error;
 
 	if ((ifp = ifunit(ap->iface.hfsc_ifname)) == NULL)
 		return (ENXIO);
 
 	if ((hif = hfsc_attach(&ifp->if_snd, ap->bandwidth)) == NULL)
 		return (ENOMEM);
 
 	/*
 	 * set HFSC to this ifnet structure.
 	 */
 	if ((error = altq_attach(&ifp->if_snd, ALTQT_HFSC, hif,
 				 hfsc_enqueue, hfsc_dequeue, hfsc_request,
 				 &hif->hif_classifier, acc_classify)) != 0)
 		(void)hfsc_detach(hif);
 
 	return (error);
 }
 
 static int
 hfsccmd_if_detach(ap)
 	struct hfsc_interface *ap;
 {
 	struct hfsc_if *hif;
 	int error;
 
 	if ((hif = altq_lookup(ap->hfsc_ifname, ALTQT_HFSC)) == NULL)
 		return (EBADF);
 
 	if (ALTQ_IS_ENABLED(hif->hif_ifq))
 		altq_disable(hif->hif_ifq);
 
 	if ((error = altq_detach(hif->hif_ifq)))
 		return (error);
 
 	return hfsc_detach(hif);
 }
 
 static int
 hfsccmd_add_class(ap)
 	struct hfsc_add_class *ap;
 {
 	struct hfsc_if *hif;
 	struct hfsc_class *cl, *parent;
 	int	i;
 
 	if ((hif = altq_lookup(ap->iface.hfsc_ifname, ALTQT_HFSC)) == NULL)
 		return (EBADF);
 
 	if (ap->parent_handle == HFSC_NULLCLASS_HANDLE &&
 	    hif->hif_rootclass == NULL)
 		parent = NULL;
 	else if ((parent = clh_to_clp(hif, ap->parent_handle)) == NULL)
 		return (EINVAL);
 
 	/* assign a class handle (use a free slot number for now) */
 	for (i = 1; i < HFSC_MAX_CLASSES; i++)
 		if (hif->hif_class_tbl[i] == NULL)
 			break;
 	if (i == HFSC_MAX_CLASSES)
 		return (EBUSY);
 
 	if ((cl = hfsc_class_create(hif, &ap->service_curve, NULL, NULL,
 	    parent, ap->qlimit, ap->flags, i)) == NULL)
 		return (ENOMEM);
 
 	/* return a class handle to the user */
 	ap->class_handle = i;
 
 	return (0);
 }
 
 static int
 hfsccmd_delete_class(ap)
 	struct hfsc_delete_class *ap;
 {
 	struct hfsc_if *hif;
 	struct hfsc_class *cl;
 
 	if ((hif = altq_lookup(ap->iface.hfsc_ifname, ALTQT_HFSC)) == NULL)
 		return (EBADF);
 
 	if ((cl = clh_to_clp(hif, ap->class_handle)) == NULL)
 		return (EINVAL);
 
 	return hfsc_class_destroy(cl);
 }
 
 static int
 hfsccmd_modify_class(ap)
 	struct hfsc_modify_class *ap;
 {
 	struct hfsc_if *hif;
 	struct hfsc_class *cl;
 	struct service_curve *rsc = NULL;
 	struct service_curve *fsc = NULL;
 	struct service_curve *usc = NULL;
 
 	if ((hif = altq_lookup(ap->iface.hfsc_ifname, ALTQT_HFSC)) == NULL)
 		return (EBADF);
 
 	if ((cl = clh_to_clp(hif, ap->class_handle)) == NULL)
 		return (EINVAL);
 
 	if (ap->sctype & HFSC_REALTIMESC)
 		rsc = &ap->service_curve;
 	if (ap->sctype & HFSC_LINKSHARINGSC)
 		fsc = &ap->service_curve;
 	if (ap->sctype & HFSC_UPPERLIMITSC)
 		usc = &ap->service_curve;
 
 	return hfsc_class_modify(cl, rsc, fsc, usc);
 }
 
 static int
 hfsccmd_add_filter(ap)
 	struct hfsc_add_filter *ap;
 {
 	struct hfsc_if *hif;
 	struct hfsc_class *cl;
 
 	if ((hif = altq_lookup(ap->iface.hfsc_ifname, ALTQT_HFSC)) == NULL)
 		return (EBADF);
 
 	if ((cl = clh_to_clp(hif, ap->class_handle)) == NULL)
 		return (EINVAL);
 
 	if (is_a_parent_class(cl)) {
 #ifdef ALTQ_DEBUG
 		printf("hfsccmd_add_filter: not a leaf class!\n");
 #endif
 		return (EINVAL);
 	}
 
 	return acc_add_filter(&hif->hif_classifier, &ap->filter,
 			      cl, &ap->filter_handle);
 }
 
 static int
 hfsccmd_delete_filter(ap)
 	struct hfsc_delete_filter *ap;
 {
 	struct hfsc_if *hif;
 
 	if ((hif = altq_lookup(ap->iface.hfsc_ifname, ALTQT_HFSC)) == NULL)
 		return (EBADF);
 
 	return acc_delete_filter(&hif->hif_classifier,
 				 ap->filter_handle);
 }
 
 static int
 hfsccmd_class_stats(ap)
 	struct hfsc_class_stats *ap;
 {
 	struct hfsc_if *hif;
 	struct hfsc_class *cl;
 	struct hfsc_classstats stats, *usp;
 	int	n, nclasses, error;
 
 	if ((hif = altq_lookup(ap->iface.hfsc_ifname, ALTQT_HFSC)) == NULL)
 		return (EBADF);
 
 	ap->cur_time = read_machclk();
 	ap->machclk_freq = machclk_freq;
 	ap->hif_classes = hif->hif_classes;
 	ap->hif_packets = hif->hif_packets;
 
 	/* skip the first N classes in the tree */
 	nclasses = ap->nskip;
 	for (cl = hif->hif_rootclass, n = 0; cl != NULL && n < nclasses;
 	     cl = hfsc_nextclass(cl), n++)
 		;
 	if (n != nclasses)
 		return (EINVAL);
 
 	/* then, read the next N classes in the tree */
 	nclasses = ap->nclasses;
 	usp = ap->stats;
 	for (n = 0; cl != NULL && n < nclasses; cl = hfsc_nextclass(cl), n++) {
 
 		get_class_stats(&stats, cl);
 
 		if ((error = copyout((caddr_t)&stats, (caddr_t)usp++,
 				     sizeof(stats))) != 0)
 			return (error);
 	}
 
 	ap->nclasses = n;
 
 	return (0);
 }
 
 #ifdef KLD_MODULE
 
 static struct altqsw hfsc_sw =
 	{"hfsc", hfscopen, hfscclose, hfscioctl};
 
 ALTQ_MODULE(altq_hfsc, ALTQT_HFSC, &hfsc_sw);
 MODULE_DEPEND(altq_hfsc, altq_red, 1, 1, 1);
 MODULE_DEPEND(altq_hfsc, altq_rio, 1, 1, 1);
 
 #endif /* KLD_MODULE */
 #endif /* ALTQ3_COMPAT */
 
 #endif /* ALTQ_HFSC */
Index: stable/10/sys/contrib/altq/altq/altq_priq.c
===================================================================
--- stable/10/sys/contrib/altq/altq/altq_priq.c	(revision 263085)
+++ stable/10/sys/contrib/altq/altq/altq_priq.c	(revision 263086)
@@ -1,1043 +1,1046 @@
 /*	$FreeBSD$	*/
 /*	$KAME: altq_priq.c,v 1.11 2003/09/17 14:23:25 kjc Exp $	*/
 /*
  * Copyright (C) 2000-2003
  *	Sony Computer Science Laboratories Inc.  All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY SONY CSL AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL SONY CSL OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  */
 /*
  * priority queue
  */
 
 #if defined(__FreeBSD__) || defined(__NetBSD__)
 #include "opt_altq.h"
 #include "opt_inet.h"
 #ifdef __FreeBSD__
 #include "opt_inet6.h"
 #endif
 #endif /* __FreeBSD__ || __NetBSD__ */
 
 #ifdef ALTQ_PRIQ  /* priq is enabled by ALTQ_PRIQ option in opt_altq.h */
 
 #include <sys/param.h>
 #include <sys/malloc.h>
 #include <sys/mbuf.h>
 #include <sys/socket.h>
 #include <sys/sockio.h>
 #include <sys/systm.h>
 #include <sys/proc.h>
 #include <sys/errno.h>
 #include <sys/kernel.h>
 #include <sys/queue.h>
 
 #include <net/if.h>
+#include <net/if_var.h>
 #include <netinet/in.h>
 
-#include <net/pfvar.h>
+#include <netpfil/pf/pf.h>
+#include <netpfil/pf/pf_altq.h>
+#include <netpfil/pf/pf_mtag.h>
 #include <altq/altq.h>
 #ifdef ALTQ3_COMPAT
 #include <altq/altq_conf.h>
 #endif
 #include <altq/altq_priq.h>
 
 /*
  * function prototypes
  */
 #ifdef ALTQ3_COMPAT
 static struct priq_if *priq_attach(struct ifaltq *, u_int);
 static int priq_detach(struct priq_if *);
 #endif
 static int priq_clear_interface(struct priq_if *);
 static int priq_request(struct ifaltq *, int, void *);
 static void priq_purge(struct priq_if *);
 static struct priq_class *priq_class_create(struct priq_if *, int, int, int,
     int);
 static int priq_class_destroy(struct priq_class *);
 static int priq_enqueue(struct ifaltq *, struct mbuf *, struct altq_pktattr *);
 static struct mbuf *priq_dequeue(struct ifaltq *, int);
 
 static int priq_addq(struct priq_class *, struct mbuf *);
 static struct mbuf *priq_getq(struct priq_class *);
 static struct mbuf *priq_pollq(struct priq_class *);
 static void priq_purgeq(struct priq_class *);
 
 #ifdef ALTQ3_COMPAT
 static int priqcmd_if_attach(struct priq_interface *);
 static int priqcmd_if_detach(struct priq_interface *);
 static int priqcmd_add_class(struct priq_add_class *);
 static int priqcmd_delete_class(struct priq_delete_class *);
 static int priqcmd_modify_class(struct priq_modify_class *);
 static int priqcmd_add_filter(struct priq_add_filter *);
 static int priqcmd_delete_filter(struct priq_delete_filter *);
 static int priqcmd_class_stats(struct priq_class_stats *);
 #endif /* ALTQ3_COMPAT */
 
 static void get_class_stats(struct priq_classstats *, struct priq_class *);
 static struct priq_class *clh_to_clp(struct priq_if *, u_int32_t);
 
 #ifdef ALTQ3_COMPAT
 altqdev_decl(priq);
 
 /* pif_list keeps all priq_if's allocated. */
 static struct priq_if *pif_list = NULL;
 #endif /* ALTQ3_COMPAT */
 
 int
 priq_pfattach(struct pf_altq *a)
 {
 	struct ifnet *ifp;
 	int s, error;
 
 	if ((ifp = ifunit(a->ifname)) == NULL || a->altq_disc == NULL)
 		return (EINVAL);
 #ifdef __NetBSD__
 	s = splnet();
 #else
 	s = splimp();
 #endif
 	error = altq_attach(&ifp->if_snd, ALTQT_PRIQ, a->altq_disc,
 	    priq_enqueue, priq_dequeue, priq_request, NULL, NULL);
 	splx(s);
 	return (error);
 }
 
 int
 priq_add_altq(struct pf_altq *a)
 {
 	struct priq_if	*pif;
 	struct ifnet	*ifp;
 
 	if ((ifp = ifunit(a->ifname)) == NULL)
 		return (EINVAL);
 	if (!ALTQ_IS_READY(&ifp->if_snd))
 		return (ENODEV);
 
 	pif = malloc(sizeof(struct priq_if), M_DEVBUF, M_NOWAIT | M_ZERO);
 	if (pif == NULL)
 		return (ENOMEM);
 	pif->pif_bandwidth = a->ifbandwidth;
 	pif->pif_maxpri = -1;
 	pif->pif_ifq = &ifp->if_snd;
 
 	/* keep the state in pf_altq */
 	a->altq_disc = pif;
 
 	return (0);
 }
 
 int
 priq_remove_altq(struct pf_altq *a)
 {
 	struct priq_if *pif;
 
 	if ((pif = a->altq_disc) == NULL)
 		return (EINVAL);
 	a->altq_disc = NULL;
 
 	(void)priq_clear_interface(pif);
 
 	free(pif, M_DEVBUF);
 	return (0);
 }
 
 int
 priq_add_queue(struct pf_altq *a)
 {
 	struct priq_if *pif;
 	struct priq_class *cl;
 
 	if ((pif = a->altq_disc) == NULL)
 		return (EINVAL);
 
 	/* check parameters */
 	if (a->priority >= PRIQ_MAXPRI)
 		return (EINVAL);
 	if (a->qid == 0)
 		return (EINVAL);
 	if (pif->pif_classes[a->priority] != NULL)
 		return (EBUSY);
 	if (clh_to_clp(pif, a->qid) != NULL)
 		return (EBUSY);
 
 	cl = priq_class_create(pif, a->priority, a->qlimit,
 	    a->pq_u.priq_opts.flags, a->qid);
 	if (cl == NULL)
 		return (ENOMEM);
 
 	return (0);
 }
 
 int
 priq_remove_queue(struct pf_altq *a)
 {
 	struct priq_if *pif;
 	struct priq_class *cl;
 
 	if ((pif = a->altq_disc) == NULL)
 		return (EINVAL);
 
 	if ((cl = clh_to_clp(pif, a->qid)) == NULL)
 		return (EINVAL);
 
 	return (priq_class_destroy(cl));
 }
 
 int
 priq_getqstats(struct pf_altq *a, void *ubuf, int *nbytes)
 {
 	struct priq_if *pif;
 	struct priq_class *cl;
 	struct priq_classstats stats;
 	int error = 0;
 
 	if ((pif = altq_lookup(a->ifname, ALTQT_PRIQ)) == NULL)
 		return (EBADF);
 
 	if ((cl = clh_to_clp(pif, a->qid)) == NULL)
 		return (EINVAL);
 
 	if (*nbytes < sizeof(stats))
 		return (EINVAL);
 
 	get_class_stats(&stats, cl);
 
 	if ((error = copyout((caddr_t)&stats, ubuf, sizeof(stats))) != 0)
 		return (error);
 	*nbytes = sizeof(stats);
 	return (0);
 }
 
 /*
  * bring the interface back to the initial state by discarding
  * all the filters and classes.
  */
 static int
 priq_clear_interface(struct priq_if *pif)
 {
 	struct priq_class	*cl;
 	int pri;
 
 #ifdef ALTQ3_CLFIER_COMPAT
 	/* free the filters for this interface */
 	acc_discard_filters(&pif->pif_classifier, NULL, 1);
 #endif
 
 	/* clear out the classes */
 	for (pri = 0; pri <= pif->pif_maxpri; pri++)
 		if ((cl = pif->pif_classes[pri]) != NULL)
 			priq_class_destroy(cl);
 
 	return (0);
 }
 
 static int
 priq_request(struct ifaltq *ifq, int req, void *arg)
 {
 	struct priq_if	*pif = (struct priq_if *)ifq->altq_disc;
 
 	IFQ_LOCK_ASSERT(ifq);
 
 	switch (req) {
 	case ALTRQ_PURGE:
 		priq_purge(pif);
 		break;
 	}
 	return (0);
 }
 
 /* discard all the queued packets on the interface */
 static void
 priq_purge(struct priq_if *pif)
 {
 	struct priq_class *cl;
 	int pri;
 
 	for (pri = 0; pri <= pif->pif_maxpri; pri++) {
 		if ((cl = pif->pif_classes[pri]) != NULL && !qempty(cl->cl_q))
 			priq_purgeq(cl);
 	}
 	if (ALTQ_IS_ENABLED(pif->pif_ifq))
 		pif->pif_ifq->ifq_len = 0;
 }
 
 static struct priq_class *
 priq_class_create(struct priq_if *pif, int pri, int qlimit, int flags, int qid)
 {
 	struct priq_class *cl;
 	int s;
 
 #ifndef ALTQ_RED
 	if (flags & PRCF_RED) {
 #ifdef ALTQ_DEBUG
 		printf("priq_class_create: RED not configured for PRIQ!\n");
 #endif
 		return (NULL);
 	}
 #endif
 
 	if ((cl = pif->pif_classes[pri]) != NULL) {
 		/* modify the class instead of creating a new one */
 #ifdef __NetBSD__
 		s = splnet();
 #else
 		s = splimp();
 #endif
 		IFQ_LOCK(cl->cl_pif->pif_ifq);
 		if (!qempty(cl->cl_q))
 			priq_purgeq(cl);
 		IFQ_UNLOCK(cl->cl_pif->pif_ifq);
 		splx(s);
 #ifdef ALTQ_RIO
 		if (q_is_rio(cl->cl_q))
 			rio_destroy((rio_t *)cl->cl_red);
 #endif
 #ifdef ALTQ_RED
 		if (q_is_red(cl->cl_q))
 			red_destroy(cl->cl_red);
 #endif
 	} else {
 		cl = malloc(sizeof(struct priq_class), M_DEVBUF,
 		    M_NOWAIT | M_ZERO);
 		if (cl == NULL)
 			return (NULL);
 
 		cl->cl_q = malloc(sizeof(class_queue_t), M_DEVBUF,
 		    M_NOWAIT | M_ZERO);
 		if (cl->cl_q == NULL)
 			goto err_ret;
 	}
 
 	pif->pif_classes[pri] = cl;
 	if (flags & PRCF_DEFAULTCLASS)
 		pif->pif_default = cl;
 	if (qlimit == 0)
 		qlimit = 50;  /* use default */
 	qlimit(cl->cl_q) = qlimit;
 	qtype(cl->cl_q) = Q_DROPTAIL;
 	qlen(cl->cl_q) = 0;
 	cl->cl_flags = flags;
 	cl->cl_pri = pri;
 	if (pri > pif->pif_maxpri)
 		pif->pif_maxpri = pri;
 	cl->cl_pif = pif;
 	cl->cl_handle = qid;
 
 #ifdef ALTQ_RED
 	if (flags & (PRCF_RED|PRCF_RIO)) {
 		int red_flags, red_pkttime;
 
 		red_flags = 0;
 		if (flags & PRCF_ECN)
 			red_flags |= REDF_ECN;
 #ifdef ALTQ_RIO
 		if (flags & PRCF_CLEARDSCP)
 			red_flags |= RIOF_CLEARDSCP;
 #endif
 		if (pif->pif_bandwidth < 8)
 			red_pkttime = 1000 * 1000 * 1000; /* 1 sec */
 		else
 			red_pkttime = (int64_t)pif->pif_ifq->altq_ifp->if_mtu
 			  * 1000 * 1000 * 1000 / (pif->pif_bandwidth / 8);
 #ifdef ALTQ_RIO
 		if (flags & PRCF_RIO) {
 			cl->cl_red = (red_t *)rio_alloc(0, NULL,
 						red_flags, red_pkttime);
 			if (cl->cl_red == NULL)
 				goto err_ret;
 			qtype(cl->cl_q) = Q_RIO;
 		} else
 #endif
 		if (flags & PRCF_RED) {
 			cl->cl_red = red_alloc(0, 0,
 			    qlimit(cl->cl_q) * 10/100,
 			    qlimit(cl->cl_q) * 30/100,
 			    red_flags, red_pkttime);
 			if (cl->cl_red == NULL)
 				goto err_ret;
 			qtype(cl->cl_q) = Q_RED;
 		}
 	}
 #endif /* ALTQ_RED */
 
 	return (cl);
 
  err_ret:
 	if (cl->cl_red != NULL) {
 #ifdef ALTQ_RIO
 		if (q_is_rio(cl->cl_q))
 			rio_destroy((rio_t *)cl->cl_red);
 #endif
 #ifdef ALTQ_RED
 		if (q_is_red(cl->cl_q))
 			red_destroy(cl->cl_red);
 #endif
 	}
 	if (cl->cl_q != NULL)
 		free(cl->cl_q, M_DEVBUF);
 	free(cl, M_DEVBUF);
 	return (NULL);
 }
 
 static int
 priq_class_destroy(struct priq_class *cl)
 {
 	struct priq_if *pif;
 	int s, pri;
 
 #ifdef __NetBSD__
 	s = splnet();
 #else
 	s = splimp();
 #endif
 	IFQ_LOCK(cl->cl_pif->pif_ifq);
 
 #ifdef ALTQ3_CLFIER_COMPAT
 	/* delete filters referencing to this class */
 	acc_discard_filters(&cl->cl_pif->pif_classifier, cl, 0);
 #endif
 
 	if (!qempty(cl->cl_q))
 		priq_purgeq(cl);
 
 	pif = cl->cl_pif;
 	pif->pif_classes[cl->cl_pri] = NULL;
 	if (pif->pif_maxpri == cl->cl_pri) {
 		for (pri = cl->cl_pri; pri >= 0; pri--)
 			if (pif->pif_classes[pri] != NULL) {
 				pif->pif_maxpri = pri;
 				break;
 			}
 		if (pri < 0)
 			pif->pif_maxpri = -1;
 	}
 	IFQ_UNLOCK(cl->cl_pif->pif_ifq);
 	splx(s);
 
 	if (cl->cl_red != NULL) {
 #ifdef ALTQ_RIO
 		if (q_is_rio(cl->cl_q))
 			rio_destroy((rio_t *)cl->cl_red);
 #endif
 #ifdef ALTQ_RED
 		if (q_is_red(cl->cl_q))
 			red_destroy(cl->cl_red);
 #endif
 	}
 	free(cl->cl_q, M_DEVBUF);
 	free(cl, M_DEVBUF);
 	return (0);
 }
 
 /*
  * priq_enqueue is an enqueue function to be registered to
  * (*altq_enqueue) in struct ifaltq.
  */
 static int
 priq_enqueue(struct ifaltq *ifq, struct mbuf *m, struct altq_pktattr *pktattr)
 {
 	struct priq_if	*pif = (struct priq_if *)ifq->altq_disc;
 	struct priq_class *cl;
 	struct pf_mtag *t;
 	int len;
 
 	IFQ_LOCK_ASSERT(ifq);
 
 	/* grab class set by classifier */
 	if ((m->m_flags & M_PKTHDR) == 0) {
 		/* should not happen */
 		printf("altq: packet for %s does not have pkthdr\n",
 		    ifq->altq_ifp->if_xname);
 		m_freem(m);
 		return (ENOBUFS);
 	}
 	cl = NULL;
 	if ((t = pf_find_mtag(m)) != NULL)
 		cl = clh_to_clp(pif, t->qid);
 #ifdef ALTQ3_COMPAT
 	else if ((ifq->altq_flags & ALTQF_CLASSIFY) && pktattr != NULL)
 		cl = pktattr->pattr_class;
 #endif
 	if (cl == NULL) {
 		cl = pif->pif_default;
 		if (cl == NULL) {
 			m_freem(m);
 			return (ENOBUFS);
 		}
 	}
 #ifdef ALTQ3_COMPAT
 	if (pktattr != NULL)
 		cl->cl_pktattr = pktattr;  /* save proto hdr used by ECN */
 	else
 #endif
 		cl->cl_pktattr = NULL;
 	len = m_pktlen(m);
 	if (priq_addq(cl, m) != 0) {
 		/* drop occurred.  mbuf was freed in priq_addq. */
 		PKTCNTR_ADD(&cl->cl_dropcnt, len);
 		return (ENOBUFS);
 	}
 	IFQ_INC_LEN(ifq);
 
 	/* successfully queued. */
 	return (0);
 }
 
 /*
  * priq_dequeue is a dequeue function to be registered to
  * (*altq_dequeue) in struct ifaltq.
  *
  * note: ALTDQ_POLL returns the next packet without removing the packet
  *	from the queue.  ALTDQ_REMOVE is a normal dequeue operation.
  *	ALTDQ_REMOVE must return the same packet if called immediately
  *	after ALTDQ_POLL.
  */
 static struct mbuf *
 priq_dequeue(struct ifaltq *ifq, int op)
 {
 	struct priq_if	*pif = (struct priq_if *)ifq->altq_disc;
 	struct priq_class *cl;
 	struct mbuf *m;
 	int pri;
 
 	IFQ_LOCK_ASSERT(ifq);
 
 	if (IFQ_IS_EMPTY(ifq))
 		/* no packet in the queue */
 		return (NULL);
 
 	for (pri = pif->pif_maxpri;  pri >= 0; pri--) {
 		if ((cl = pif->pif_classes[pri]) != NULL &&
 		    !qempty(cl->cl_q)) {
 			if (op == ALTDQ_POLL)
 				return (priq_pollq(cl));
 
 			m = priq_getq(cl);
 			if (m != NULL) {
 				IFQ_DEC_LEN(ifq);
 				if (qempty(cl->cl_q))
 					cl->cl_period++;
 				PKTCNTR_ADD(&cl->cl_xmitcnt, m_pktlen(m));
 			}
 			return (m);
 		}
 	}
 	return (NULL);
 }
 
 static int
 priq_addq(struct priq_class *cl, struct mbuf *m)
 {
 
 #ifdef ALTQ_RIO
 	if (q_is_rio(cl->cl_q))
 		return rio_addq((rio_t *)cl->cl_red, cl->cl_q, m,
 				cl->cl_pktattr);
 #endif
 #ifdef ALTQ_RED
 	if (q_is_red(cl->cl_q))
 		return red_addq(cl->cl_red, cl->cl_q, m, cl->cl_pktattr);
 #endif
 	if (qlen(cl->cl_q) >= qlimit(cl->cl_q)) {
 		m_freem(m);
 		return (-1);
 	}
 
 	if (cl->cl_flags & PRCF_CLEARDSCP)
 		write_dsfield(m, cl->cl_pktattr, 0);
 
 	_addq(cl->cl_q, m);
 
 	return (0);
 }
 
 static struct mbuf *
 priq_getq(struct priq_class *cl)
 {
 #ifdef ALTQ_RIO
 	if (q_is_rio(cl->cl_q))
 		return rio_getq((rio_t *)cl->cl_red, cl->cl_q);
 #endif
 #ifdef ALTQ_RED
 	if (q_is_red(cl->cl_q))
 		return red_getq(cl->cl_red, cl->cl_q);
 #endif
 	return _getq(cl->cl_q);
 }
 
 static struct mbuf *
 priq_pollq(cl)
 	struct priq_class *cl;
 {
 	return qhead(cl->cl_q);
 }
 
 static void
 priq_purgeq(struct priq_class *cl)
 {
 	struct mbuf *m;
 
 	if (qempty(cl->cl_q))
 		return;
 
 	while ((m = _getq(cl->cl_q)) != NULL) {
 		PKTCNTR_ADD(&cl->cl_dropcnt, m_pktlen(m));
 		m_freem(m);
 	}
 	ASSERT(qlen(cl->cl_q) == 0);
 }
 
 static void
 get_class_stats(struct priq_classstats *sp, struct priq_class *cl)
 {
 	sp->class_handle = cl->cl_handle;
 	sp->qlength = qlen(cl->cl_q);
 	sp->qlimit = qlimit(cl->cl_q);
 	sp->period = cl->cl_period;
 	sp->xmitcnt = cl->cl_xmitcnt;
 	sp->dropcnt = cl->cl_dropcnt;
 
 	sp->qtype = qtype(cl->cl_q);
 #ifdef ALTQ_RED
 	if (q_is_red(cl->cl_q))
 		red_getstats(cl->cl_red, &sp->red[0]);
 #endif
 #ifdef ALTQ_RIO
 	if (q_is_rio(cl->cl_q))
 		rio_getstats((rio_t *)cl->cl_red, &sp->red[0]);
 #endif
 
 }
 
 /* convert a class handle to the corresponding class pointer */
 static struct priq_class *
 clh_to_clp(struct priq_if *pif, u_int32_t chandle)
 {
 	struct priq_class *cl;
 	int idx;
 
 	if (chandle == 0)
 		return (NULL);
 
 	for (idx = pif->pif_maxpri; idx >= 0; idx--)
 		if ((cl = pif->pif_classes[idx]) != NULL &&
 		    cl->cl_handle == chandle)
 			return (cl);
 
 	return (NULL);
 }
 
 
 #ifdef ALTQ3_COMPAT
 
 static struct priq_if *
 priq_attach(ifq, bandwidth)
 	struct ifaltq *ifq;
 	u_int bandwidth;
 {
 	struct priq_if *pif;
 
 	pif = malloc(sizeof(struct priq_if),
 	       M_DEVBUF, M_WAITOK);
 	if (pif == NULL)
 		return (NULL);
 	bzero(pif, sizeof(struct priq_if));
 	pif->pif_bandwidth = bandwidth;
 	pif->pif_maxpri = -1;
 	pif->pif_ifq = ifq;
 
 	/* add this state to the priq list */
 	pif->pif_next = pif_list;
 	pif_list = pif;
 
 	return (pif);
 }
 
 static int
 priq_detach(pif)
 	struct priq_if *pif;
 {
 	(void)priq_clear_interface(pif);
 
 	/* remove this interface from the pif list */
 	if (pif_list == pif)
 		pif_list = pif->pif_next;
 	else {
 		struct priq_if *p;
 
 		for (p = pif_list; p != NULL; p = p->pif_next)
 			if (p->pif_next == pif) {
 				p->pif_next = pif->pif_next;
 				break;
 			}
 		ASSERT(p != NULL);
 	}
 
 	free(pif, M_DEVBUF);
 	return (0);
 }
 
 /*
  * priq device interface
  */
 int
 priqopen(dev, flag, fmt, p)
 	dev_t dev;
 	int flag, fmt;
 #if (__FreeBSD_version > 500000)
 	struct thread *p;
 #else
 	struct proc *p;
 #endif
 {
 	/* everything will be done when the queueing scheme is attached. */
 	return 0;
 }
 
 int
 priqclose(dev, flag, fmt, p)
 	dev_t dev;
 	int flag, fmt;
 #if (__FreeBSD_version > 500000)
 	struct thread *p;
 #else
 	struct proc *p;
 #endif
 {
 	struct priq_if *pif;
 	int err, error = 0;
 
 	while ((pif = pif_list) != NULL) {
 		/* destroy all */
 		if (ALTQ_IS_ENABLED(pif->pif_ifq))
 			altq_disable(pif->pif_ifq);
 
 		err = altq_detach(pif->pif_ifq);
 		if (err == 0)
 			err = priq_detach(pif);
 		if (err != 0 && error == 0)
 			error = err;
 	}
 
 	return error;
 }
 
 int
 priqioctl(dev, cmd, addr, flag, p)
 	dev_t dev;
 	ioctlcmd_t cmd;
 	caddr_t addr;
 	int flag;
 #if (__FreeBSD_version > 500000)
 	struct thread *p;
 #else
 	struct proc *p;
 #endif
 {
 	struct priq_if *pif;
 	struct priq_interface *ifacep;
 	int	error = 0;
 
 	/* check super-user privilege */
 	switch (cmd) {
 	case PRIQ_GETSTATS:
 		break;
 	default:
 #if (__FreeBSD_version > 700000)
 		if ((error = priv_check(p, PRIV_ALTQ_MANAGE)) != 0)
 			return (error);
 #elsif (__FreeBSD_version > 400000)
 		if ((error = suser(p)) != 0)
 			return (error);
 #else
 		if ((error = suser(p->p_ucred, &p->p_acflag)) != 0)
 			return (error);
 #endif
 		break;
 	}
 
 	switch (cmd) {
 
 	case PRIQ_IF_ATTACH:
 		error = priqcmd_if_attach((struct priq_interface *)addr);
 		break;
 
 	case PRIQ_IF_DETACH:
 		error = priqcmd_if_detach((struct priq_interface *)addr);
 		break;
 
 	case PRIQ_ENABLE:
 	case PRIQ_DISABLE:
 	case PRIQ_CLEAR:
 		ifacep = (struct priq_interface *)addr;
 		if ((pif = altq_lookup(ifacep->ifname,
 				       ALTQT_PRIQ)) == NULL) {
 			error = EBADF;
 			break;
 		}
 
 		switch (cmd) {
 		case PRIQ_ENABLE:
 			if (pif->pif_default == NULL) {
 #ifdef ALTQ_DEBUG
 				printf("priq: no default class\n");
 #endif
 				error = EINVAL;
 				break;
 			}
 			error = altq_enable(pif->pif_ifq);
 			break;
 
 		case PRIQ_DISABLE:
 			error = altq_disable(pif->pif_ifq);
 			break;
 
 		case PRIQ_CLEAR:
 			priq_clear_interface(pif);
 			break;
 		}
 		break;
 
 	case PRIQ_ADD_CLASS:
 		error = priqcmd_add_class((struct priq_add_class *)addr);
 		break;
 
 	case PRIQ_DEL_CLASS:
 		error = priqcmd_delete_class((struct priq_delete_class *)addr);
 		break;
 
 	case PRIQ_MOD_CLASS:
 		error = priqcmd_modify_class((struct priq_modify_class *)addr);
 		break;
 
 	case PRIQ_ADD_FILTER:
 		error = priqcmd_add_filter((struct priq_add_filter *)addr);
 		break;
 
 	case PRIQ_DEL_FILTER:
 		error = priqcmd_delete_filter((struct priq_delete_filter *)addr);
 		break;
 
 	case PRIQ_GETSTATS:
 		error = priqcmd_class_stats((struct priq_class_stats *)addr);
 		break;
 
 	default:
 		error = EINVAL;
 		break;
 	}
 	return error;
 }
 
 static int
 priqcmd_if_attach(ap)
 	struct priq_interface *ap;
 {
 	struct priq_if *pif;
 	struct ifnet *ifp;
 	int error;
 
 	if ((ifp = ifunit(ap->ifname)) == NULL)
 		return (ENXIO);
 
 	if ((pif = priq_attach(&ifp->if_snd, ap->arg)) == NULL)
 		return (ENOMEM);
 
 	/*
 	 * set PRIQ to this ifnet structure.
 	 */
 	if ((error = altq_attach(&ifp->if_snd, ALTQT_PRIQ, pif,
 				 priq_enqueue, priq_dequeue, priq_request,
 				 &pif->pif_classifier, acc_classify)) != 0)
 		(void)priq_detach(pif);
 
 	return (error);
 }
 
 static int
 priqcmd_if_detach(ap)
 	struct priq_interface *ap;
 {
 	struct priq_if *pif;
 	int error;
 
 	if ((pif = altq_lookup(ap->ifname, ALTQT_PRIQ)) == NULL)
 		return (EBADF);
 
 	if (ALTQ_IS_ENABLED(pif->pif_ifq))
 		altq_disable(pif->pif_ifq);
 
 	if ((error = altq_detach(pif->pif_ifq)))
 		return (error);
 
 	return priq_detach(pif);
 }
 
 static int
 priqcmd_add_class(ap)
 	struct priq_add_class *ap;
 {
 	struct priq_if *pif;
 	struct priq_class *cl;
 	int qid;
 
 	if ((pif = altq_lookup(ap->iface.ifname, ALTQT_PRIQ)) == NULL)
 		return (EBADF);
 
 	if (ap->pri < 0 || ap->pri >= PRIQ_MAXPRI)
 		return (EINVAL);
 	if (pif->pif_classes[ap->pri] != NULL)
 		return (EBUSY);
 
 	qid = ap->pri + 1;
 	if ((cl = priq_class_create(pif, ap->pri,
 	    ap->qlimit, ap->flags, qid)) == NULL)
 		return (ENOMEM);
 
 	/* return a class handle to the user */
 	ap->class_handle = cl->cl_handle;
 
 	return (0);
 }
 
 static int
 priqcmd_delete_class(ap)
 	struct priq_delete_class *ap;
 {
 	struct priq_if *pif;
 	struct priq_class *cl;
 
 	if ((pif = altq_lookup(ap->iface.ifname, ALTQT_PRIQ)) == NULL)
 		return (EBADF);
 
 	if ((cl = clh_to_clp(pif, ap->class_handle)) == NULL)
 		return (EINVAL);
 
 	return priq_class_destroy(cl);
 }
 
 static int
 priqcmd_modify_class(ap)
 	struct priq_modify_class *ap;
 {
 	struct priq_if *pif;
 	struct priq_class *cl;
 
 	if ((pif = altq_lookup(ap->iface.ifname, ALTQT_PRIQ)) == NULL)
 		return (EBADF);
 
 	if (ap->pri < 0 || ap->pri >= PRIQ_MAXPRI)
 		return (EINVAL);
 
 	if ((cl = clh_to_clp(pif, ap->class_handle)) == NULL)
 		return (EINVAL);
 
 	/*
 	 * if priority is changed, move the class to the new priority
 	 */
 	if (pif->pif_classes[ap->pri] != cl) {
 		if (pif->pif_classes[ap->pri] != NULL)
 			return (EEXIST);
 		pif->pif_classes[cl->cl_pri] = NULL;
 		pif->pif_classes[ap->pri] = cl;
 		cl->cl_pri = ap->pri;
 	}
 
 	/* call priq_class_create to change class parameters */
 	if ((cl = priq_class_create(pif, ap->pri,
 	    ap->qlimit, ap->flags, ap->class_handle)) == NULL)
 		return (ENOMEM);
 	return 0;
 }
 
 static int
 priqcmd_add_filter(ap)
 	struct priq_add_filter *ap;
 {
 	struct priq_if *pif;
 	struct priq_class *cl;
 
 	if ((pif = altq_lookup(ap->iface.ifname, ALTQT_PRIQ)) == NULL)
 		return (EBADF);
 
 	if ((cl = clh_to_clp(pif, ap->class_handle)) == NULL)
 		return (EINVAL);
 
 	return acc_add_filter(&pif->pif_classifier, &ap->filter,
 			      cl, &ap->filter_handle);
 }
 
 static int
 priqcmd_delete_filter(ap)
 	struct priq_delete_filter *ap;
 {
 	struct priq_if *pif;
 
 	if ((pif = altq_lookup(ap->iface.ifname, ALTQT_PRIQ)) == NULL)
 		return (EBADF);
 
 	return acc_delete_filter(&pif->pif_classifier,
 				 ap->filter_handle);
 }
 
 static int
 priqcmd_class_stats(ap)
 	struct priq_class_stats *ap;
 {
 	struct priq_if *pif;
 	struct priq_class *cl;
 	struct priq_classstats stats, *usp;
 	int	pri, error;
 
 	if ((pif = altq_lookup(ap->iface.ifname, ALTQT_PRIQ)) == NULL)
 		return (EBADF);
 
 	ap->maxpri = pif->pif_maxpri;
 
 	/* then, read the next N classes in the tree */
 	usp = ap->stats;
 	for (pri = 0; pri <= pif->pif_maxpri; pri++) {
 		cl = pif->pif_classes[pri];
 		if (cl != NULL)
 			get_class_stats(&stats, cl);
 		else
 			bzero(&stats, sizeof(stats));
 		if ((error = copyout((caddr_t)&stats, (caddr_t)usp++,
 				     sizeof(stats))) != 0)
 			return (error);
 	}
 	return (0);
 }
 
 #ifdef KLD_MODULE
 
 static struct altqsw priq_sw =
 	{"priq", priqopen, priqclose, priqioctl};
 
 ALTQ_MODULE(altq_priq, ALTQT_PRIQ, &priq_sw);
 MODULE_DEPEND(altq_priq, altq_red, 1, 1, 1);
 MODULE_DEPEND(altq_priq, altq_rio, 1, 1, 1);
 
 #endif /* KLD_MODULE */
 
 #endif /* ALTQ3_COMPAT */
 #endif /* ALTQ_PRIQ */
Index: stable/10/sys/contrib/altq/altq/altq_red.c
===================================================================
--- stable/10/sys/contrib/altq/altq/altq_red.c	(revision 263085)
+++ stable/10/sys/contrib/altq/altq/altq_red.c	(revision 263086)
@@ -1,1497 +1,1500 @@
 /*	$FreeBSD$	*/
 /*	$KAME: altq_red.c,v 1.18 2003/09/05 22:40:36 itojun Exp $	*/
 
 /*
  * Copyright (C) 1997-2003
  *	Sony Computer Science Laboratories Inc.  All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY SONY CSL AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL SONY CSL OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  */
 /*
  * Copyright (c) 1990-1994 Regents of the University of California.
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 3. All advertising materials mentioning features or use of this software
  *    must display the following acknowledgement:
  *	This product includes software developed by the Computer Systems
  *	Engineering Group at Lawrence Berkeley Laboratory.
  * 4. Neither the name of the University nor of the Laboratory may be used
  *    to endorse or promote products derived from this software without
  *    specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  */
 
 #if defined(__FreeBSD__) || defined(__NetBSD__)
 #include "opt_altq.h"
 #include "opt_inet.h"
 #ifdef __FreeBSD__
 #include "opt_inet6.h"
 #endif
 #endif /* __FreeBSD__ || __NetBSD__ */
 #ifdef ALTQ_RED	/* red is enabled by ALTQ_RED option in opt_altq.h */
 
 #include <sys/param.h>
 #include <sys/malloc.h>
 #include <sys/mbuf.h>
 #include <sys/socket.h>
 #include <sys/systm.h>
 #include <sys/errno.h>
 #if 1 /* ALTQ3_COMPAT */
 #include <sys/sockio.h>
 #include <sys/proc.h>
 #include <sys/kernel.h>
 #ifdef ALTQ_FLOWVALVE
 #include <sys/queue.h>
 #include <sys/time.h>
 #endif
 #endif /* ALTQ3_COMPAT */
 
 #include <net/if.h>
+#include <net/if_var.h>
 
 #include <netinet/in.h>
 #include <netinet/in_systm.h>
 #include <netinet/ip.h>
 #ifdef INET6
 #include <netinet/ip6.h>
 #endif
 
-#include <net/pfvar.h>
+#include <netpfil/pf/pf.h>
+#include <netpfil/pf/pf_altq.h>
+#include <netpfil/pf/pf_mtag.h>
 #include <altq/altq.h>
 #include <altq/altq_red.h>
 #ifdef ALTQ3_COMPAT
 #include <altq/altq_conf.h>
 #ifdef ALTQ_FLOWVALVE
 #include <altq/altq_flowvalve.h>
 #endif
 #endif
 
 /*
  * ALTQ/RED (Random Early Detection) implementation using 32-bit
  * fixed-point calculation.
  *
  * written by kjc using the ns code as a reference.
  * you can learn more about red and ns from Sally's home page at
  * http://www-nrg.ee.lbl.gov/floyd/
  *
  * most of the red parameter values are fixed in this implementation
  * to prevent fixed-point overflow/underflow.
  * if you change the parameters, watch out for overflow/underflow!
  *
  * the parameters used are recommended values by Sally.
  * the corresponding ns config looks:
  *	q_weight=0.00195
  *	minthresh=5 maxthresh=15 queue-size=60
  *	linterm=30
  *	dropmech=drop-tail
  *	bytes=false (can't be handled by 32-bit fixed-point)
  *	doubleq=false dqthresh=false
  *	wait=true
  */
 /*
  * alternative red parameters for a slow link.
  *
  * assume the queue length becomes from zero to L and keeps L, it takes
  * N packets for q_avg to reach 63% of L.
  * when q_weight is 0.002, N is about 500 packets.
  * for a slow link like dial-up, 500 packets takes more than 1 minute!
  * when q_weight is 0.008, N is about 127 packets.
  * when q_weight is 0.016, N is about 63 packets.
  * bursts of 50 packets are allowed for 0.002, bursts of 25 packets
  * are allowed for 0.016.
  * see Sally's paper for more details.
  */
 /* normal red parameters */
 #define	W_WEIGHT	512	/* inverse of weight of EWMA (511/512) */
 				/* q_weight = 0.00195 */
 
 /* red parameters for a slow link */
 #define	W_WEIGHT_1	128	/* inverse of weight of EWMA (127/128) */
 				/* q_weight = 0.0078125 */
 
 /* red parameters for a very slow link (e.g., dialup) */
 #define	W_WEIGHT_2	64	/* inverse of weight of EWMA (63/64) */
 				/* q_weight = 0.015625 */
 
 /* fixed-point uses 12-bit decimal places */
 #define	FP_SHIFT	12	/* fixed-point shift */
 
 /* red parameters for drop probability */
 #define	INV_P_MAX	10	/* inverse of max drop probability */
 #define	TH_MIN		5	/* min threshold */
 #define	TH_MAX		15	/* max threshold */
 
 #define	RED_LIMIT	60	/* default max queue lenght */
 #define	RED_STATS		/* collect statistics */
 
 /*
  * our default policy for forced-drop is drop-tail.
  * (in altq-1.1.2 or earlier, the default was random-drop.
  * but it makes more sense to punish the cause of the surge.)
  * to switch to the random-drop policy, define "RED_RANDOM_DROP".
  */
 
 #ifdef ALTQ3_COMPAT
 #ifdef ALTQ_FLOWVALVE
 /*
  * flow-valve is an extention to protect red from unresponsive flows
  * and to promote end-to-end congestion control.
  * flow-valve observes the average drop rates of the flows that have
  * experienced packet drops in the recent past.
  * when the average drop rate exceeds the threshold, the flow is
  * blocked by the flow-valve.  the trapped flow should back off
  * exponentially to escape from the flow-valve.
  */
 #ifdef RED_RANDOM_DROP
 #error "random-drop can't be used with flow-valve!"
 #endif
 #endif /* ALTQ_FLOWVALVE */
 
 /* red_list keeps all red_queue_t's allocated. */
 static red_queue_t *red_list = NULL;
 
 #endif /* ALTQ3_COMPAT */
 
 /* default red parameter values */
 static int default_th_min = TH_MIN;
 static int default_th_max = TH_MAX;
 static int default_inv_pmax = INV_P_MAX;
 
 #ifdef ALTQ3_COMPAT
 /* internal function prototypes */
 static int red_enqueue(struct ifaltq *, struct mbuf *, struct altq_pktattr *);
 static struct mbuf *red_dequeue(struct ifaltq *, int);
 static int red_request(struct ifaltq *, int, void *);
 static void red_purgeq(red_queue_t *);
 static int red_detach(red_queue_t *);
 #ifdef ALTQ_FLOWVALVE
 static __inline struct fve *flowlist_lookup(struct flowvalve *,
 			 struct altq_pktattr *, struct timeval *);
 static __inline struct fve *flowlist_reclaim(struct flowvalve *,
 					     struct altq_pktattr *);
 static __inline void flowlist_move_to_head(struct flowvalve *, struct fve *);
 static __inline int fv_p2f(struct flowvalve *, int);
 #if 0 /* XXX: make the compiler happy (fv_alloc unused) */
 static struct flowvalve *fv_alloc(struct red *);
 #endif
 static void fv_destroy(struct flowvalve *);
 static int fv_checkflow(struct flowvalve *, struct altq_pktattr *,
 			struct fve **);
 static void fv_dropbyred(struct flowvalve *fv, struct altq_pktattr *,
 			 struct fve *);
 #endif
 #endif /* ALTQ3_COMPAT */
 
 /*
  * red support routines
  */
 red_t *
 red_alloc(int weight, int inv_pmax, int th_min, int th_max, int flags,
    int pkttime)
 {
 	red_t	*rp;
 	int	 w, i;
 	int	 npkts_per_sec;
 
 	rp = malloc(sizeof(red_t), M_DEVBUF, M_NOWAIT | M_ZERO);
 	if (rp == NULL)
 		return (NULL);
 
 	if (weight == 0)
 		rp->red_weight = W_WEIGHT;
 	else
 		rp->red_weight = weight;
 
 	/* allocate weight table */
 	rp->red_wtab = wtab_alloc(rp->red_weight);
 	if (rp->red_wtab == NULL) {
 		free(rp, M_DEVBUF);
 		return (NULL);
 	}
 
 	rp->red_avg = 0;
 	rp->red_idle = 1;
 
 	if (inv_pmax == 0)
 		rp->red_inv_pmax = default_inv_pmax;
 	else
 		rp->red_inv_pmax = inv_pmax;
 	if (th_min == 0)
 		rp->red_thmin = default_th_min;
 	else
 		rp->red_thmin = th_min;
 	if (th_max == 0)
 		rp->red_thmax = default_th_max;
 	else
 		rp->red_thmax = th_max;
 
 	rp->red_flags = flags;
 
 	if (pkttime == 0)
 		/* default packet time: 1000 bytes / 10Mbps * 8 * 1000000 */
 		rp->red_pkttime = 800;
 	else
 		rp->red_pkttime = pkttime;
 
 	if (weight == 0) {
 		/* when the link is very slow, adjust red parameters */
 		npkts_per_sec = 1000000 / rp->red_pkttime;
 		if (npkts_per_sec < 50) {
 			/* up to about 400Kbps */
 			rp->red_weight = W_WEIGHT_2;
 		} else if (npkts_per_sec < 300) {
 			/* up to about 2.4Mbps */
 			rp->red_weight = W_WEIGHT_1;
 		}
 	}
 
 	/* calculate wshift.  weight must be power of 2 */
 	w = rp->red_weight;
 	for (i = 0; w > 1; i++)
 		w = w >> 1;
 	rp->red_wshift = i;
 	w = 1 << rp->red_wshift;
 	if (w != rp->red_weight) {
 		printf("invalid weight value %d for red! use %d\n",
 		       rp->red_weight, w);
 		rp->red_weight = w;
 	}
 
 	/*
 	 * thmin_s and thmax_s are scaled versions of th_min and th_max
 	 * to be compared with avg.
 	 */
 	rp->red_thmin_s = rp->red_thmin << (rp->red_wshift + FP_SHIFT);
 	rp->red_thmax_s = rp->red_thmax << (rp->red_wshift + FP_SHIFT);
 
 	/*
 	 * precompute probability denominator
 	 *  probd = (2 * (TH_MAX-TH_MIN) / pmax) in fixed-point
 	 */
 	rp->red_probd = (2 * (rp->red_thmax - rp->red_thmin)
 			 * rp->red_inv_pmax) << FP_SHIFT;
 
 	microtime(&rp->red_last);
 	return (rp);
 }
 
 void
 red_destroy(red_t *rp)
 {
 #ifdef ALTQ3_COMPAT
 #ifdef ALTQ_FLOWVALVE
 	if (rp->red_flowvalve != NULL)
 		fv_destroy(rp->red_flowvalve);
 #endif
 #endif /* ALTQ3_COMPAT */
 	wtab_destroy(rp->red_wtab);
 	free(rp, M_DEVBUF);
 }
 
 void
 red_getstats(red_t *rp, struct redstats *sp)
 {
 	sp->q_avg		= rp->red_avg >> rp->red_wshift;
 	sp->xmit_cnt		= rp->red_stats.xmit_cnt;
 	sp->drop_cnt		= rp->red_stats.drop_cnt;
 	sp->drop_forced		= rp->red_stats.drop_forced;
 	sp->drop_unforced	= rp->red_stats.drop_unforced;
 	sp->marked_packets	= rp->red_stats.marked_packets;
 }
 
 int
 red_addq(red_t *rp, class_queue_t *q, struct mbuf *m,
     struct altq_pktattr *pktattr)
 {
 	int avg, droptype;
 	int n;
 #ifdef ALTQ3_COMPAT
 #ifdef ALTQ_FLOWVALVE
 	struct fve *fve = NULL;
 
 	if (rp->red_flowvalve != NULL && rp->red_flowvalve->fv_flows > 0)
 		if (fv_checkflow(rp->red_flowvalve, pktattr, &fve)) {
 			m_freem(m);
 			return (-1);
 		}
 #endif
 #endif /* ALTQ3_COMPAT */
 
 	avg = rp->red_avg;
 
 	/*
 	 * if we were idle, we pretend that n packets arrived during
 	 * the idle period.
 	 */
 	if (rp->red_idle) {
 		struct timeval now;
 		int t;
 
 		rp->red_idle = 0;
 		microtime(&now);
 		t = (now.tv_sec - rp->red_last.tv_sec);
 		if (t > 60) {
 			/*
 			 * being idle for more than 1 minute, set avg to zero.
 			 * this prevents t from overflow.
 			 */
 			avg = 0;
 		} else {
 			t = t * 1000000 + (now.tv_usec - rp->red_last.tv_usec);
 			n = t / rp->red_pkttime - 1;
 
 			/* the following line does (avg = (1 - Wq)^n * avg) */
 			if (n > 0)
 				avg = (avg >> FP_SHIFT) *
 				    pow_w(rp->red_wtab, n);
 		}
 	}
 
 	/* run estimator. (note: avg is scaled by WEIGHT in fixed-point) */
 	avg += (qlen(q) << FP_SHIFT) - (avg >> rp->red_wshift);
 	rp->red_avg = avg;		/* save the new value */
 
 	/*
 	 * red_count keeps a tally of arriving traffic that has not
 	 * been dropped.
 	 */
 	rp->red_count++;
 
 	/* see if we drop early */
 	droptype = DTYPE_NODROP;
 	if (avg >= rp->red_thmin_s && qlen(q) > 1) {
 		if (avg >= rp->red_thmax_s) {
 			/* avg >= th_max: forced drop */
 			droptype = DTYPE_FORCED;
 		} else if (rp->red_old == 0) {
 			/* first exceeds th_min */
 			rp->red_count = 1;
 			rp->red_old = 1;
 		} else if (drop_early((avg - rp->red_thmin_s) >> rp->red_wshift,
 				      rp->red_probd, rp->red_count)) {
 			/* mark or drop by red */
 			if ((rp->red_flags & REDF_ECN) &&
 			    mark_ecn(m, pktattr, rp->red_flags)) {
 				/* successfully marked.  do not drop. */
 				rp->red_count = 0;
 #ifdef RED_STATS
 				rp->red_stats.marked_packets++;
 #endif
 			} else {
 				/* unforced drop by red */
 				droptype = DTYPE_EARLY;
 			}
 		}
 	} else {
 		/* avg < th_min */
 		rp->red_old = 0;
 	}
 
 	/*
 	 * if the queue length hits the hard limit, it's a forced drop.
 	 */
 	if (droptype == DTYPE_NODROP && qlen(q) >= qlimit(q))
 		droptype = DTYPE_FORCED;
 
 #ifdef RED_RANDOM_DROP
 	/* if successful or forced drop, enqueue this packet. */
 	if (droptype != DTYPE_EARLY)
 		_addq(q, m);
 #else
 	/* if successful, enqueue this packet. */
 	if (droptype == DTYPE_NODROP)
 		_addq(q, m);
 #endif
 	if (droptype != DTYPE_NODROP) {
 		if (droptype == DTYPE_EARLY) {
 			/* drop the incoming packet */
 #ifdef RED_STATS
 			rp->red_stats.drop_unforced++;
 #endif
 		} else {
 			/* forced drop, select a victim packet in the queue. */
 #ifdef RED_RANDOM_DROP
 			m = _getq_random(q);
 #endif
 #ifdef RED_STATS
 			rp->red_stats.drop_forced++;
 #endif
 		}
 #ifdef RED_STATS
 		PKTCNTR_ADD(&rp->red_stats.drop_cnt, m_pktlen(m));
 #endif
 		rp->red_count = 0;
 #ifdef ALTQ3_COMPAT
 #ifdef ALTQ_FLOWVALVE
 		if (rp->red_flowvalve != NULL)
 			fv_dropbyred(rp->red_flowvalve, pktattr, fve);
 #endif
 #endif /* ALTQ3_COMPAT */
 		m_freem(m);
 		return (-1);
 	}
 	/* successfully queued */
 #ifdef RED_STATS
 	PKTCNTR_ADD(&rp->red_stats.xmit_cnt, m_pktlen(m));
 #endif
 	return (0);
 }
 
 /*
  * early-drop probability is calculated as follows:
  *   prob = p_max * (avg - th_min) / (th_max - th_min)
  *   prob_a = prob / (2 - count*prob)
  *	    = (avg-th_min) / (2*(th_max-th_min)*inv_p_max - count*(avg-th_min))
  * here prob_a increases as successive undrop count increases.
  * (prob_a starts from prob/2, becomes prob when (count == (1 / prob)),
  * becomes 1 when (count >= (2 / prob))).
  */
 int
 drop_early(int fp_len, int fp_probd, int count)
 {
 	int	d;		/* denominator of drop-probability */
 
 	d = fp_probd - count * fp_len;
 	if (d <= 0)
 		/* count exceeds the hard limit: drop or mark */
 		return (1);
 
 	/*
 	 * now the range of d is [1..600] in fixed-point. (when
 	 * th_max-th_min=10 and p_max=1/30)
 	 * drop probability = (avg - TH_MIN) / d
 	 */
 
 	if ((arc4random() % d) < fp_len) {
 		/* drop or mark */
 		return (1);
 	}
 	/* no drop/mark */
 	return (0);
 }
 
 /*
  * try to mark CE bit to the packet.
  *    returns 1 if successfully marked, 0 otherwise.
  */
 int
 mark_ecn(struct mbuf *m, struct altq_pktattr *pktattr, int flags)
 {
 	struct mbuf	*m0;
 	struct pf_mtag	*at;
 	void		*hdr;
 
 	at = pf_find_mtag(m);
 	if (at != NULL) {
 		hdr = at->hdr;
 #ifdef ALTQ3_COMPAT
 	} else if (pktattr != NULL) {
 		af = pktattr->pattr_af;
 		hdr = pktattr->pattr_hdr;
 #endif /* ALTQ3_COMPAT */
 	} else
 		return (0);
 
 	/* verify that pattr_hdr is within the mbuf data */
 	for (m0 = m; m0 != NULL; m0 = m0->m_next)
 		if (((caddr_t)hdr >= m0->m_data) &&
 		    ((caddr_t)hdr < m0->m_data + m0->m_len))
 			break;
 	if (m0 == NULL) {
 		/* ick, tag info is stale */
 		return (0);
 	}
 
 	switch (((struct ip *)hdr)->ip_v) {
 	case IPVERSION:
 		if (flags & REDF_ECN4) {
 			struct ip *ip = hdr;
 			u_int8_t otos;
 			int sum;
 
 			if (ip->ip_v != 4)
 				return (0);	/* version mismatch! */
 
 			if ((ip->ip_tos & IPTOS_ECN_MASK) == IPTOS_ECN_NOTECT)
 				return (0);	/* not-ECT */
 			if ((ip->ip_tos & IPTOS_ECN_MASK) == IPTOS_ECN_CE)
 				return (1);	/* already marked */
 
 			/*
 			 * ecn-capable but not marked,
 			 * mark CE and update checksum
 			 */
 			otos = ip->ip_tos;
 			ip->ip_tos |= IPTOS_ECN_CE;
 			/*
 			 * update checksum (from RFC1624)
 			 *	   HC' = ~(~HC + ~m + m')
 			 */
 			sum = ~ntohs(ip->ip_sum) & 0xffff;
 			sum += (~otos & 0xffff) + ip->ip_tos;
 			sum = (sum >> 16) + (sum & 0xffff);
 			sum += (sum >> 16);  /* add carry */
 			ip->ip_sum = htons(~sum & 0xffff);
 			return (1);
 		}
 		break;
 #ifdef INET6
 	case (IPV6_VERSION >> 4):
 		if (flags & REDF_ECN6) {
 			struct ip6_hdr *ip6 = hdr;
 			u_int32_t flowlabel;
 
 			flowlabel = ntohl(ip6->ip6_flow);
 			if ((flowlabel >> 28) != 6)
 				return (0);	/* version mismatch! */
 			if ((flowlabel & (IPTOS_ECN_MASK << 20)) ==
 			    (IPTOS_ECN_NOTECT << 20))
 				return (0);	/* not-ECT */
 			if ((flowlabel & (IPTOS_ECN_MASK << 20)) ==
 			    (IPTOS_ECN_CE << 20))
 				return (1);	/* already marked */
 			/*
 			 * ecn-capable but not marked,  mark CE
 			 */
 			flowlabel |= (IPTOS_ECN_CE << 20);
 			ip6->ip6_flow = htonl(flowlabel);
 			return (1);
 		}
 		break;
 #endif  /* INET6 */
 	}
 
 	/* not marked */
 	return (0);
 }
 
 struct mbuf *
 red_getq(rp, q)
 	red_t *rp;
 	class_queue_t *q;
 {
 	struct mbuf *m;
 
 	if ((m = _getq(q)) == NULL) {
 		if (rp->red_idle == 0) {
 			rp->red_idle = 1;
 			microtime(&rp->red_last);
 		}
 		return NULL;
 	}
 
 	rp->red_idle = 0;
 	return (m);
 }
 
 /*
  * helper routine to calibrate avg during idle.
  * pow_w(wtab, n) returns (1 - Wq)^n in fixed-point
  * here Wq = 1/weight and the code assumes Wq is close to zero.
  *
  * w_tab[n] holds ((1 - Wq)^(2^n)) in fixed-point.
  */
 static struct wtab *wtab_list = NULL;	/* pointer to wtab list */
 
 struct wtab *
 wtab_alloc(int weight)
 {
 	struct wtab	*w;
 	int		 i;
 
 	for (w = wtab_list; w != NULL; w = w->w_next)
 		if (w->w_weight == weight) {
 			w->w_refcount++;
 			return (w);
 		}
 
 	w = malloc(sizeof(struct wtab), M_DEVBUF, M_NOWAIT | M_ZERO);
 	if (w == NULL)
 		return (NULL);
 	w->w_weight = weight;
 	w->w_refcount = 1;
 	w->w_next = wtab_list;
 	wtab_list = w;
 
 	/* initialize the weight table */
 	w->w_tab[0] = ((weight - 1) << FP_SHIFT) / weight;
 	for (i = 1; i < 32; i++) {
 		w->w_tab[i] = (w->w_tab[i-1] * w->w_tab[i-1]) >> FP_SHIFT;
 		if (w->w_tab[i] == 0 && w->w_param_max == 0)
 			w->w_param_max = 1 << i;
 	}
 
 	return (w);
 }
 
 int
 wtab_destroy(struct wtab *w)
 {
 	struct wtab	*prev;
 
 	if (--w->w_refcount > 0)
 		return (0);
 
 	if (wtab_list == w)
 		wtab_list = w->w_next;
 	else for (prev = wtab_list; prev->w_next != NULL; prev = prev->w_next)
 		if (prev->w_next == w) {
 			prev->w_next = w->w_next;
 			break;
 		}
 
 	free(w, M_DEVBUF);
 	return (0);
 }
 
 int32_t
 pow_w(struct wtab *w, int n)
 {
 	int	i, bit;
 	int32_t	val;
 
 	if (n >= w->w_param_max)
 		return (0);
 
 	val = 1 << FP_SHIFT;
 	if (n <= 0)
 		return (val);
 
 	bit = 1;
 	i = 0;
 	while (n) {
 		if (n & bit) {
 			val = (val * w->w_tab[i]) >> FP_SHIFT;
 			n &= ~bit;
 		}
 		i++;
 		bit <<=  1;
 	}
 	return (val);
 }
 
 #ifdef ALTQ3_COMPAT
 /*
  * red device interface
  */
 altqdev_decl(red);
 
 int
 redopen(dev, flag, fmt, p)
 	dev_t dev;
 	int flag, fmt;
 #if (__FreeBSD_version > 500000)
 	struct thread *p;
 #else
 	struct proc *p;
 #endif
 {
 	/* everything will be done when the queueing scheme is attached. */
 	return 0;
 }
 
 int
 redclose(dev, flag, fmt, p)
 	dev_t dev;
 	int flag, fmt;
 #if (__FreeBSD_version > 500000)
 	struct thread *p;
 #else
 	struct proc *p;
 #endif
 {
 	red_queue_t *rqp;
 	int err, error = 0;
 
 	while ((rqp = red_list) != NULL) {
 		/* destroy all */
 		err = red_detach(rqp);
 		if (err != 0 && error == 0)
 			error = err;
 	}
 
 	return error;
 }
 
 int
 redioctl(dev, cmd, addr, flag, p)
 	dev_t dev;
 	ioctlcmd_t cmd;
 	caddr_t addr;
 	int flag;
 #if (__FreeBSD_version > 500000)
 	struct thread *p;
 #else
 	struct proc *p;
 #endif
 {
 	red_queue_t *rqp;
 	struct red_interface *ifacep;
 	struct ifnet *ifp;
 	int	error = 0;
 
 	/* check super-user privilege */
 	switch (cmd) {
 	case RED_GETSTATS:
 		break;
 	default:
 #if (__FreeBSD_version > 700000)
 		if ((error = priv_check(p, PRIV_ALTQ_MANAGE)) != 0)
 #elsif (__FreeBSD_version > 400000)
 		if ((error = suser(p)) != 0)
 #else
 		if ((error = suser(p->p_ucred, &p->p_acflag)) != 0)
 #endif
 			return (error);
 		break;
 	}
 
 	switch (cmd) {
 
 	case RED_ENABLE:
 		ifacep = (struct red_interface *)addr;
 		if ((rqp = altq_lookup(ifacep->red_ifname, ALTQT_RED)) == NULL) {
 			error = EBADF;
 			break;
 		}
 		error = altq_enable(rqp->rq_ifq);
 		break;
 
 	case RED_DISABLE:
 		ifacep = (struct red_interface *)addr;
 		if ((rqp = altq_lookup(ifacep->red_ifname, ALTQT_RED)) == NULL) {
 			error = EBADF;
 			break;
 		}
 		error = altq_disable(rqp->rq_ifq);
 		break;
 
 	case RED_IF_ATTACH:
 		ifp = ifunit(((struct red_interface *)addr)->red_ifname);
 		if (ifp == NULL) {
 			error = ENXIO;
 			break;
 		}
 
 		/* allocate and initialize red_queue_t */
 		rqp = malloc(sizeof(red_queue_t), M_DEVBUF, M_WAITOK);
 		if (rqp == NULL) {
 			error = ENOMEM;
 			break;
 		}
 		bzero(rqp, sizeof(red_queue_t));
 
 		rqp->rq_q = malloc(sizeof(class_queue_t),
 		       M_DEVBUF, M_WAITOK);
 		if (rqp->rq_q == NULL) {
 			free(rqp, M_DEVBUF);
 			error = ENOMEM;
 			break;
 		}
 		bzero(rqp->rq_q, sizeof(class_queue_t));
 
 		rqp->rq_red = red_alloc(0, 0, 0, 0, 0, 0);
 		if (rqp->rq_red == NULL) {
 			free(rqp->rq_q, M_DEVBUF);
 			free(rqp, M_DEVBUF);
 			error = ENOMEM;
 			break;
 		}
 
 		rqp->rq_ifq = &ifp->if_snd;
 		qtail(rqp->rq_q) = NULL;
 		qlen(rqp->rq_q) = 0;
 		qlimit(rqp->rq_q) = RED_LIMIT;
 		qtype(rqp->rq_q) = Q_RED;
 
 		/*
 		 * set RED to this ifnet structure.
 		 */
 		error = altq_attach(rqp->rq_ifq, ALTQT_RED, rqp,
 				    red_enqueue, red_dequeue, red_request,
 				    NULL, NULL);
 		if (error) {
 			red_destroy(rqp->rq_red);
 			free(rqp->rq_q, M_DEVBUF);
 			free(rqp, M_DEVBUF);
 			break;
 		}
 
 		/* add this state to the red list */
 		rqp->rq_next = red_list;
 		red_list = rqp;
 		break;
 
 	case RED_IF_DETACH:
 		ifacep = (struct red_interface *)addr;
 		if ((rqp = altq_lookup(ifacep->red_ifname, ALTQT_RED)) == NULL) {
 			error = EBADF;
 			break;
 		}
 		error = red_detach(rqp);
 		break;
 
 	case RED_GETSTATS:
 		do {
 			struct red_stats *q_stats;
 			red_t *rp;
 
 			q_stats = (struct red_stats *)addr;
 			if ((rqp = altq_lookup(q_stats->iface.red_ifname,
 					     ALTQT_RED)) == NULL) {
 				error = EBADF;
 				break;
 			}
 
 			q_stats->q_len 	   = qlen(rqp->rq_q);
 			q_stats->q_limit   = qlimit(rqp->rq_q);
 
 			rp = rqp->rq_red;
 			q_stats->q_avg 	   = rp->red_avg >> rp->red_wshift;
 			q_stats->xmit_cnt  = rp->red_stats.xmit_cnt;
 			q_stats->drop_cnt  = rp->red_stats.drop_cnt;
 			q_stats->drop_forced   = rp->red_stats.drop_forced;
 			q_stats->drop_unforced = rp->red_stats.drop_unforced;
 			q_stats->marked_packets = rp->red_stats.marked_packets;
 
 			q_stats->weight		= rp->red_weight;
 			q_stats->inv_pmax	= rp->red_inv_pmax;
 			q_stats->th_min		= rp->red_thmin;
 			q_stats->th_max		= rp->red_thmax;
 
 #ifdef ALTQ_FLOWVALVE
 			if (rp->red_flowvalve != NULL) {
 				struct flowvalve *fv = rp->red_flowvalve;
 				q_stats->fv_flows    = fv->fv_flows;
 				q_stats->fv_pass     = fv->fv_stats.pass;
 				q_stats->fv_predrop  = fv->fv_stats.predrop;
 				q_stats->fv_alloc    = fv->fv_stats.alloc;
 				q_stats->fv_escape   = fv->fv_stats.escape;
 			} else {
 #endif /* ALTQ_FLOWVALVE */
 				q_stats->fv_flows    = 0;
 				q_stats->fv_pass     = 0;
 				q_stats->fv_predrop  = 0;
 				q_stats->fv_alloc    = 0;
 				q_stats->fv_escape   = 0;
 #ifdef ALTQ_FLOWVALVE
 			}
 #endif /* ALTQ_FLOWVALVE */
 		} while (/*CONSTCOND*/ 0);
 		break;
 
 	case RED_CONFIG:
 		do {
 			struct red_conf *fc;
 			red_t *new;
 			int s, limit;
 
 			fc = (struct red_conf *)addr;
 			if ((rqp = altq_lookup(fc->iface.red_ifname,
 					       ALTQT_RED)) == NULL) {
 				error = EBADF;
 				break;
 			}
 			new = red_alloc(fc->red_weight,
 					fc->red_inv_pmax,
 					fc->red_thmin,
 					fc->red_thmax,
 					fc->red_flags,
 					fc->red_pkttime);
 			if (new == NULL) {
 				error = ENOMEM;
 				break;
 			}
 
 #ifdef __NetBSD__
 			s = splnet();
 #else
 			s = splimp();
 #endif
 			red_purgeq(rqp);
 			limit = fc->red_limit;
 			if (limit < fc->red_thmax)
 				limit = fc->red_thmax;
 			qlimit(rqp->rq_q) = limit;
 			fc->red_limit = limit;	/* write back the new value */
 
 			red_destroy(rqp->rq_red);
 			rqp->rq_red = new;
 
 			splx(s);
 
 			/* write back new values */
 			fc->red_limit = limit;
 			fc->red_inv_pmax = rqp->rq_red->red_inv_pmax;
 			fc->red_thmin = rqp->rq_red->red_thmin;
 			fc->red_thmax = rqp->rq_red->red_thmax;
 
 		} while (/*CONSTCOND*/ 0);
 		break;
 
 	case RED_SETDEFAULTS:
 		do {
 			struct redparams *rp;
 
 			rp = (struct redparams *)addr;
 
 			default_th_min = rp->th_min;
 			default_th_max = rp->th_max;
 			default_inv_pmax = rp->inv_pmax;
 		} while (/*CONSTCOND*/ 0);
 		break;
 
 	default:
 		error = EINVAL;
 		break;
 	}
 	return error;
 }
 
 static int
 red_detach(rqp)
 	red_queue_t *rqp;
 {
 	red_queue_t *tmp;
 	int error = 0;
 
 	if (ALTQ_IS_ENABLED(rqp->rq_ifq))
 		altq_disable(rqp->rq_ifq);
 
 	if ((error = altq_detach(rqp->rq_ifq)))
 		return (error);
 
 	if (red_list == rqp)
 		red_list = rqp->rq_next;
 	else {
 		for (tmp = red_list; tmp != NULL; tmp = tmp->rq_next)
 			if (tmp->rq_next == rqp) {
 				tmp->rq_next = rqp->rq_next;
 				break;
 			}
 		if (tmp == NULL)
 			printf("red_detach: no state found in red_list!\n");
 	}
 
 	red_destroy(rqp->rq_red);
 	free(rqp->rq_q, M_DEVBUF);
 	free(rqp, M_DEVBUF);
 	return (error);
 }
 
 /*
  * enqueue routine:
  *
  *	returns: 0 when successfully queued.
  *		 ENOBUFS when drop occurs.
  */
 static int
 red_enqueue(ifq, m, pktattr)
 	struct ifaltq *ifq;
 	struct mbuf *m;
 	struct altq_pktattr *pktattr;
 {
 	red_queue_t *rqp = (red_queue_t *)ifq->altq_disc;
 
 	IFQ_LOCK_ASSERT(ifq);
 
 	if (red_addq(rqp->rq_red, rqp->rq_q, m, pktattr) < 0)
 		return ENOBUFS;
 	ifq->ifq_len++;
 	return 0;
 }
 
 /*
  * dequeue routine:
  *	must be called in splimp.
  *
  *	returns: mbuf dequeued.
  *		 NULL when no packet is available in the queue.
  */
 
 static struct mbuf *
 red_dequeue(ifq, op)
 	struct ifaltq *ifq;
 	int op;
 {
 	red_queue_t *rqp = (red_queue_t *)ifq->altq_disc;
 	struct mbuf *m;
 
 	IFQ_LOCK_ASSERT(ifq);
 
 	if (op == ALTDQ_POLL)
 		return qhead(rqp->rq_q);
 
 	/* op == ALTDQ_REMOVE */
 	m =  red_getq(rqp->rq_red, rqp->rq_q);
 	if (m != NULL)
 		ifq->ifq_len--;
 	return (m);
 }
 
 static int
 red_request(ifq, req, arg)
 	struct ifaltq *ifq;
 	int req;
 	void *arg;
 {
 	red_queue_t *rqp = (red_queue_t *)ifq->altq_disc;
 
 	IFQ_LOCK_ASSERT(ifq);
 
 	switch (req) {
 	case ALTRQ_PURGE:
 		red_purgeq(rqp);
 		break;
 	}
 	return (0);
 }
 
 static void
 red_purgeq(rqp)
 	red_queue_t *rqp;
 {
 	_flushq(rqp->rq_q);
 	if (ALTQ_IS_ENABLED(rqp->rq_ifq))
 		rqp->rq_ifq->ifq_len = 0;
 }
 
 #ifdef ALTQ_FLOWVALVE
 
 #define	FV_PSHIFT	7	/* weight of average drop rate -- 1/128 */
 #define	FV_PSCALE(x)	((x) << FV_PSHIFT)
 #define	FV_PUNSCALE(x)	((x) >> FV_PSHIFT)
 #define	FV_FSHIFT	5	/* weight of average fraction -- 1/32 */
 #define	FV_FSCALE(x)	((x) << FV_FSHIFT)
 #define	FV_FUNSCALE(x)	((x) >> FV_FSHIFT)
 
 #define	FV_TIMER	(3 * hz)	/* timer value for garbage collector */
 #define	FV_FLOWLISTSIZE		64	/* how many flows in flowlist */
 
 #define	FV_N			10	/* update fve_f every FV_N packets */
 
 #define	FV_BACKOFFTHRESH	1  /* backoff threshold interval in second */
 #define	FV_TTHRESH		3  /* time threshold to delete fve */
 #define	FV_ALPHA		5  /* extra packet count */
 
 #define	FV_STATS
 
 #if (__FreeBSD_version > 300000)
 #define	FV_TIMESTAMP(tp)	getmicrotime(tp)
 #else
 #define	FV_TIMESTAMP(tp)	{ (*(tp)) = time; }
 #endif
 
 /*
  * Brtt table: 127 entry table to convert drop rate (p) to
  * the corresponding bandwidth fraction (f)
  * the following equation is implemented to use scaled values,
  * fve_p and fve_f, in the fixed point format.
  *
  *   Brtt(p) = 1 /(sqrt(4*p/3) + min(1,3*sqrt(p*6/8)) * p * (1+32 * p*p))
  *   f = Brtt(p) / (max_th + alpha)
  */
 #define	BRTT_SIZE	128
 #define	BRTT_SHIFT	12
 #define	BRTT_MASK	0x0007f000
 #define	BRTT_PMAX	(1 << (FV_PSHIFT + FP_SHIFT))
 
 const int brtt_tab[BRTT_SIZE] = {
 	0, 1262010, 877019, 703694, 598706, 525854, 471107, 427728,
 	392026, 361788, 335598, 312506, 291850, 273158, 256081, 240361,
 	225800, 212247, 199585, 187788, 178388, 169544, 161207, 153333,
 	145888, 138841, 132165, 125836, 119834, 114141, 108739, 103612,
 	98747, 94129, 89746, 85585, 81637, 77889, 74333, 70957,
 	67752, 64711, 61824, 59084, 56482, 54013, 51667, 49440,
 	47325, 45315, 43406, 41591, 39866, 38227, 36667, 35184,
 	33773, 32430, 31151, 29933, 28774, 27668, 26615, 25611,
 	24653, 23740, 22868, 22035, 21240, 20481, 19755, 19062,
 	18399, 17764, 17157, 16576, 16020, 15487, 14976, 14487,
 	14017, 13567, 13136, 12721, 12323, 11941, 11574, 11222,
 	10883, 10557, 10243, 9942, 9652, 9372, 9103, 8844,
 	8594, 8354, 8122, 7898, 7682, 7474, 7273, 7079,
 	6892, 6711, 6536, 6367, 6204, 6046, 5893, 5746,
 	5603, 5464, 5330, 5201, 5075, 4954, 4836, 4722,
 	4611, 4504, 4400, 4299, 4201, 4106, 4014, 3924
 };
 
 static __inline struct fve *
 flowlist_lookup(fv, pktattr, now)
 	struct flowvalve *fv;
 	struct altq_pktattr *pktattr;
 	struct timeval *now;
 {
 	struct fve *fve;
 	int flows;
 	struct ip *ip;
 #ifdef INET6
 	struct ip6_hdr *ip6;
 #endif
 	struct timeval tthresh;
 
 	if (pktattr == NULL)
 		return (NULL);
 
 	tthresh.tv_sec = now->tv_sec - FV_TTHRESH;
 	flows = 0;
 	/*
 	 * search the flow list
 	 */
 	switch (pktattr->pattr_af) {
 	case AF_INET:
 		ip = (struct ip *)pktattr->pattr_hdr;
 		TAILQ_FOREACH(fve, &fv->fv_flowlist, fve_lru){
 			if (fve->fve_lastdrop.tv_sec == 0)
 				break;
 			if (fve->fve_lastdrop.tv_sec < tthresh.tv_sec) {
 				fve->fve_lastdrop.tv_sec = 0;
 				break;
 			}
 			if (fve->fve_flow.flow_af == AF_INET &&
 			    fve->fve_flow.flow_ip.ip_src.s_addr ==
 			    ip->ip_src.s_addr &&
 			    fve->fve_flow.flow_ip.ip_dst.s_addr ==
 			    ip->ip_dst.s_addr)
 				return (fve);
 			flows++;
 		}
 		break;
 #ifdef INET6
 	case AF_INET6:
 		ip6 = (struct ip6_hdr *)pktattr->pattr_hdr;
 		TAILQ_FOREACH(fve, &fv->fv_flowlist, fve_lru){
 			if (fve->fve_lastdrop.tv_sec == 0)
 				break;
 			if (fve->fve_lastdrop.tv_sec < tthresh.tv_sec) {
 				fve->fve_lastdrop.tv_sec = 0;
 				break;
 			}
 			if (fve->fve_flow.flow_af == AF_INET6 &&
 			    IN6_ARE_ADDR_EQUAL(&fve->fve_flow.flow_ip6.ip6_src,
 					       &ip6->ip6_src) &&
 			    IN6_ARE_ADDR_EQUAL(&fve->fve_flow.flow_ip6.ip6_dst,
 					       &ip6->ip6_dst))
 				return (fve);
 			flows++;
 		}
 		break;
 #endif /* INET6 */
 
 	default:
 		/* unknown protocol.  no drop. */
 		return (NULL);
 	}
 	fv->fv_flows = flows;	/* save the number of active fve's */
 	return (NULL);
 }
 
 static __inline struct fve *
 flowlist_reclaim(fv, pktattr)
 	struct flowvalve *fv;
 	struct altq_pktattr *pktattr;
 {
 	struct fve *fve;
 	struct ip *ip;
 #ifdef INET6
 	struct ip6_hdr *ip6;
 #endif
 
 	/*
 	 * get an entry from the tail of the LRU list.
 	 */
 	fve = TAILQ_LAST(&fv->fv_flowlist, fv_flowhead);
 
 	switch (pktattr->pattr_af) {
 	case AF_INET:
 		ip = (struct ip *)pktattr->pattr_hdr;
 		fve->fve_flow.flow_af = AF_INET;
 		fve->fve_flow.flow_ip.ip_src = ip->ip_src;
 		fve->fve_flow.flow_ip.ip_dst = ip->ip_dst;
 		break;
 #ifdef INET6
 	case AF_INET6:
 		ip6 = (struct ip6_hdr *)pktattr->pattr_hdr;
 		fve->fve_flow.flow_af = AF_INET6;
 		fve->fve_flow.flow_ip6.ip6_src = ip6->ip6_src;
 		fve->fve_flow.flow_ip6.ip6_dst = ip6->ip6_dst;
 		break;
 #endif
 	}
 
 	fve->fve_state = Green;
 	fve->fve_p = 0.0;
 	fve->fve_f = 0.0;
 	fve->fve_ifseq = fv->fv_ifseq - 1;
 	fve->fve_count = 0;
 
 	fv->fv_flows++;
 #ifdef FV_STATS
 	fv->fv_stats.alloc++;
 #endif
 	return (fve);
 }
 
 static __inline void
 flowlist_move_to_head(fv, fve)
 	struct flowvalve *fv;
 	struct fve *fve;
 {
 	if (TAILQ_FIRST(&fv->fv_flowlist) != fve) {
 		TAILQ_REMOVE(&fv->fv_flowlist, fve, fve_lru);
 		TAILQ_INSERT_HEAD(&fv->fv_flowlist, fve, fve_lru);
 	}
 }
 
 #if 0 /* XXX: make the compiler happy (fv_alloc unused) */
 /*
  * allocate flowvalve structure
  */
 static struct flowvalve *
 fv_alloc(rp)
 	struct red *rp;
 {
 	struct flowvalve *fv;
 	struct fve *fve;
 	int i, num;
 
 	num = FV_FLOWLISTSIZE;
 	fv = malloc(sizeof(struct flowvalve),
 	       M_DEVBUF, M_WAITOK);
 	if (fv == NULL)
 		return (NULL);
 	bzero(fv, sizeof(struct flowvalve));
 
 	fv->fv_fves = malloc(sizeof(struct fve) * num,
 	       M_DEVBUF, M_WAITOK);
 	if (fv->fv_fves == NULL) {
 		free(fv, M_DEVBUF);
 		return (NULL);
 	}
 	bzero(fv->fv_fves, sizeof(struct fve) * num);
 
 	fv->fv_flows = 0;
 	TAILQ_INIT(&fv->fv_flowlist);
 	for (i = 0; i < num; i++) {
 		fve = &fv->fv_fves[i];
 		fve->fve_lastdrop.tv_sec = 0;
 		TAILQ_INSERT_TAIL(&fv->fv_flowlist, fve, fve_lru);
 	}
 
 	/* initialize drop rate threshold in scaled fixed-point */
 	fv->fv_pthresh = (FV_PSCALE(1) << FP_SHIFT) / rp->red_inv_pmax;
 
 	/* initialize drop rate to fraction table */
 	fv->fv_p2ftab = malloc(sizeof(int) * BRTT_SIZE,
 	       M_DEVBUF, M_WAITOK);
 	if (fv->fv_p2ftab == NULL) {
 		free(fv->fv_fves, M_DEVBUF);
 		free(fv, M_DEVBUF);
 		return (NULL);
 	}
 	/*
 	 * create the p2f table.
 	 * (shift is used to keep the precision)
 	 */
 	for (i = 1; i < BRTT_SIZE; i++) {
 		int f;
 
 		f = brtt_tab[i] << 8;
 		fv->fv_p2ftab[i] = (f / (rp->red_thmax + FV_ALPHA)) >> 8;
 	}
 
 	return (fv);
 }
 #endif
 
 static void fv_destroy(fv)
 	struct flowvalve *fv;
 {
 	free(fv->fv_p2ftab, M_DEVBUF);
 	free(fv->fv_fves, M_DEVBUF);
 	free(fv, M_DEVBUF);
 }
 
 static __inline int
 fv_p2f(fv, p)
 	struct flowvalve	*fv;
 	int	p;
 {
 	int val, f;
 
 	if (p >= BRTT_PMAX)
 		f = fv->fv_p2ftab[BRTT_SIZE-1];
 	else if ((val = (p & BRTT_MASK)))
 		f = fv->fv_p2ftab[(val >> BRTT_SHIFT)];
 	else
 		f = fv->fv_p2ftab[1];
 	return (f);
 }
 
 /*
  * check if an arriving packet should be pre-dropped.
  * called from red_addq() when a packet arrives.
  * returns 1 when the packet should be pre-dropped.
  * should be called in splimp.
  */
 static int
 fv_checkflow(fv, pktattr, fcache)
 	struct flowvalve *fv;
 	struct altq_pktattr *pktattr;
 	struct fve **fcache;
 {
 	struct fve *fve;
 	struct timeval now;
 
 	fv->fv_ifseq++;
 	FV_TIMESTAMP(&now);
 
 	if ((fve = flowlist_lookup(fv, pktattr, &now)) == NULL)
 		/* no matching entry in the flowlist */
 		return (0);
 
 	*fcache = fve;
 
 	/* update fraction f for every FV_N packets */
 	if (++fve->fve_count == FV_N) {
 		/*
 		 * f = Wf * N / (fv_ifseq - fve_ifseq) + (1 - Wf) * f
 		 */
 		fve->fve_f =
 			(FV_N << FP_SHIFT) / (fv->fv_ifseq - fve->fve_ifseq)
 			+ fve->fve_f - FV_FUNSCALE(fve->fve_f);
 		fve->fve_ifseq = fv->fv_ifseq;
 		fve->fve_count = 0;
 	}
 
 	/*
 	 * overpumping test
 	 */
 	if (fve->fve_state == Green && fve->fve_p > fv->fv_pthresh) {
 		int fthresh;
 
 		/* calculate a threshold */
 		fthresh = fv_p2f(fv, fve->fve_p);
 		if (fve->fve_f > fthresh)
 			fve->fve_state = Red;
 	}
 
 	if (fve->fve_state == Red) {
 		/*
 		 * backoff test
 		 */
 		if (now.tv_sec - fve->fve_lastdrop.tv_sec > FV_BACKOFFTHRESH) {
 			/* no drop for at least FV_BACKOFFTHRESH sec */
 			fve->fve_p = 0;
 			fve->fve_state = Green;
 #ifdef FV_STATS
 			fv->fv_stats.escape++;
 #endif
 		} else {
 			/* block this flow */
 			flowlist_move_to_head(fv, fve);
 			fve->fve_lastdrop = now;
 #ifdef FV_STATS
 			fv->fv_stats.predrop++;
 #endif
 			return (1);
 		}
 	}
 
 	/*
 	 * p = (1 - Wp) * p
 	 */
 	fve->fve_p -= FV_PUNSCALE(fve->fve_p);
 	if (fve->fve_p < 0)
 		fve->fve_p = 0;
 #ifdef FV_STATS
 	fv->fv_stats.pass++;
 #endif
 	return (0);
 }
 
 /*
  * called from red_addq when a packet is dropped by red.
  * should be called in splimp.
  */
 static void fv_dropbyred(fv, pktattr, fcache)
 	struct flowvalve *fv;
 	struct altq_pktattr *pktattr;
 	struct fve *fcache;
 {
 	struct fve *fve;
 	struct timeval now;
 
 	if (pktattr == NULL)
 		return;
 	FV_TIMESTAMP(&now);
 
 	if (fcache != NULL)
 		/* the fve of this packet is already cached */
 		fve = fcache;
 	else if ((fve = flowlist_lookup(fv, pktattr, &now)) == NULL)
 		fve = flowlist_reclaim(fv, pktattr);
 
 	flowlist_move_to_head(fv, fve);
 
 	/*
 	 * update p:  the following line cancels the update
 	 *	      in fv_checkflow() and calculate
 	 *	p = Wp + (1 - Wp) * p
 	 */
 	fve->fve_p = (1 << FP_SHIFT) + fve->fve_p;
 
 	fve->fve_lastdrop = now;
 }
 
 #endif /* ALTQ_FLOWVALVE */
 
 #ifdef KLD_MODULE
 
 static struct altqsw red_sw =
 	{"red", redopen, redclose, redioctl};
 
 ALTQ_MODULE(altq_red, ALTQT_RED, &red_sw);
 MODULE_VERSION(altq_red, 1);
 
 #endif /* KLD_MODULE */
 #endif /* ALTQ3_COMPAT */
 
 #endif /* ALTQ_RED */
Index: stable/10/sys/contrib/altq/altq/altq_rio.c
===================================================================
--- stable/10/sys/contrib/altq/altq/altq_rio.c	(revision 263085)
+++ stable/10/sys/contrib/altq/altq/altq_rio.c	(revision 263086)
@@ -1,850 +1,852 @@
 /*	$FreeBSD$	*/
 /*	$KAME: altq_rio.c,v 1.17 2003/07/10 12:07:49 kjc Exp $	*/
 
 /*
  * Copyright (C) 1998-2003
  *	Sony Computer Science Laboratories Inc.  All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY SONY CSL AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL SONY CSL OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  */
 /*
  * Copyright (c) 1990-1994 Regents of the University of California.
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 3. All advertising materials mentioning features or use of this software
  *    must display the following acknowledgement:
  *	This product includes software developed by the Computer Systems
  *	Engineering Group at Lawrence Berkeley Laboratory.
  * 4. Neither the name of the University nor of the Laboratory may be used
  *    to endorse or promote products derived from this software without
  *    specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  */
 
 #if defined(__FreeBSD__) || defined(__NetBSD__)
 #include "opt_altq.h"
 #include "opt_inet.h"
 #ifdef __FreeBSD__
 #include "opt_inet6.h"
 #endif
 #endif /* __FreeBSD__ || __NetBSD__ */
 #ifdef ALTQ_RIO	/* rio is enabled by ALTQ_RIO option in opt_altq.h */
 
 #include <sys/param.h>
 #include <sys/malloc.h>
 #include <sys/mbuf.h>
 #include <sys/socket.h>
 #include <sys/systm.h>
 #include <sys/errno.h>
 #if 1 /* ALTQ3_COMPAT */
 #include <sys/proc.h>
 #include <sys/sockio.h>
 #include <sys/kernel.h>
 #endif
 
 #include <net/if.h>
+#include <net/if_var.h>
 
 #include <netinet/in.h>
 #include <netinet/in_systm.h>
 #include <netinet/ip.h>
 #ifdef INET6
 #include <netinet/ip6.h>
 #endif
 
-#include <net/pfvar.h>
+#include <netpfil/pf/pf.h>
+#include <netpfil/pf/pf_altq.h>
 #include <altq/altq.h>
 #include <altq/altq_cdnr.h>
 #include <altq/altq_red.h>
 #include <altq/altq_rio.h>
 #ifdef ALTQ3_COMPAT
 #include <altq/altq_conf.h>
 #endif
 
 /*
  * RIO: RED with IN/OUT bit
  *   described in
  *	"Explicit Allocation of Best Effort Packet Delivery Service"
  *	David D. Clark and Wenjia Fang, MIT Lab for Computer Science
  *	http://diffserv.lcs.mit.edu/Papers/exp-alloc-ddc-wf.{ps,pdf}
  *
  * this implementation is extended to support more than 2 drop precedence
  * values as described in RFC2597 (Assured Forwarding PHB Group).
  *
  */
 /*
  * AF DS (differentiated service) codepoints.
  * (classes can be mapped to CBQ or H-FSC classes.)
  *
  *      0   1   2   3   4   5   6   7
  *    +---+---+---+---+---+---+---+---+
  *    |   CLASS   |DropPre| 0 |  CU   |
  *    +---+---+---+---+---+---+---+---+
  *
  *    class 1: 001
  *    class 2: 010
  *    class 3: 011
  *    class 4: 100
  *
  *    low drop prec:    01
  *    medium drop prec: 10
  *    high drop prec:   01
  */
 
 /* normal red parameters */
 #define	W_WEIGHT	512	/* inverse of weight of EWMA (511/512) */
 				/* q_weight = 0.00195 */
 
 /* red parameters for a slow link */
 #define	W_WEIGHT_1	128	/* inverse of weight of EWMA (127/128) */
 				/* q_weight = 0.0078125 */
 
 /* red parameters for a very slow link (e.g., dialup) */
 #define	W_WEIGHT_2	64	/* inverse of weight of EWMA (63/64) */
 				/* q_weight = 0.015625 */
 
 /* fixed-point uses 12-bit decimal places */
 #define	FP_SHIFT	12	/* fixed-point shift */
 
 /* red parameters for drop probability */
 #define	INV_P_MAX	10	/* inverse of max drop probability */
 #define	TH_MIN		 5	/* min threshold */
 #define	TH_MAX		15	/* max threshold */
 
 #define	RIO_LIMIT	60	/* default max queue lenght */
 #define	RIO_STATS		/* collect statistics */
 
 #define	TV_DELTA(a, b, delta) {					\
 	register int	xxs;					\
 								\
 	delta = (a)->tv_usec - (b)->tv_usec; 			\
 	if ((xxs = (a)->tv_sec - (b)->tv_sec) != 0) { 		\
 		if (xxs < 0) { 					\
 			delta = 60000000;			\
 		} else if (xxs > 4)  {				\
 			if (xxs > 60)				\
 				delta = 60000000;		\
 			else					\
 				delta += xxs * 1000000;		\
 		} else while (xxs > 0) {			\
 			delta += 1000000;			\
 			xxs--;					\
 		}						\
 	}							\
 }
 
 #ifdef ALTQ3_COMPAT
 /* rio_list keeps all rio_queue_t's allocated. */
 static rio_queue_t *rio_list = NULL;
 #endif
 /* default rio parameter values */
 static struct redparams default_rio_params[RIO_NDROPPREC] = {
   /* th_min,		 th_max,     inv_pmax */
   { TH_MAX * 2 + TH_MIN, TH_MAX * 3, INV_P_MAX }, /* low drop precedence */
   { TH_MAX + TH_MIN,	 TH_MAX * 2, INV_P_MAX }, /* medium drop precedence */
   { TH_MIN,		 TH_MAX,     INV_P_MAX }  /* high drop precedence */
 };
 
 /* internal function prototypes */
 static int dscp2index(u_int8_t);
 #ifdef ALTQ3_COMPAT
 static int rio_enqueue(struct ifaltq *, struct mbuf *, struct altq_pktattr *);
 static struct mbuf *rio_dequeue(struct ifaltq *, int);
 static int rio_request(struct ifaltq *, int, void *);
 static int rio_detach(rio_queue_t *);
 
 /*
  * rio device interface
  */
 altqdev_decl(rio);
 
 #endif /* ALTQ3_COMPAT */
 
 rio_t *
 rio_alloc(int weight, struct redparams *params, int flags, int pkttime)
 {
 	rio_t	*rp;
 	int	 w, i;
 	int	 npkts_per_sec;
 
 	rp = malloc(sizeof(rio_t), M_DEVBUF, M_NOWAIT | M_ZERO);
 	if (rp == NULL)
 		return (NULL);
 
 	rp->rio_flags = flags;
 	if (pkttime == 0)
 		/* default packet time: 1000 bytes / 10Mbps * 8 * 1000000 */
 		rp->rio_pkttime = 800;
 	else
 		rp->rio_pkttime = pkttime;
 
 	if (weight != 0)
 		rp->rio_weight = weight;
 	else {
 		/* use default */
 		rp->rio_weight = W_WEIGHT;
 
 		/* when the link is very slow, adjust red parameters */
 		npkts_per_sec = 1000000 / rp->rio_pkttime;
 		if (npkts_per_sec < 50) {
 			/* up to about 400Kbps */
 			rp->rio_weight = W_WEIGHT_2;
 		} else if (npkts_per_sec < 300) {
 			/* up to about 2.4Mbps */
 			rp->rio_weight = W_WEIGHT_1;
 		}
 	}
 
 	/* calculate wshift.  weight must be power of 2 */
 	w = rp->rio_weight;
 	for (i = 0; w > 1; i++)
 		w = w >> 1;
 	rp->rio_wshift = i;
 	w = 1 << rp->rio_wshift;
 	if (w != rp->rio_weight) {
 		printf("invalid weight value %d for red! use %d\n",
 		       rp->rio_weight, w);
 		rp->rio_weight = w;
 	}
 
 	/* allocate weight table */
 	rp->rio_wtab = wtab_alloc(rp->rio_weight);
 
 	for (i = 0; i < RIO_NDROPPREC; i++) {
 		struct dropprec_state *prec = &rp->rio_precstate[i];
 
 		prec->avg = 0;
 		prec->idle = 1;
 
 		if (params == NULL || params[i].inv_pmax == 0)
 			prec->inv_pmax = default_rio_params[i].inv_pmax;
 		else
 			prec->inv_pmax = params[i].inv_pmax;
 		if (params == NULL || params[i].th_min == 0)
 			prec->th_min = default_rio_params[i].th_min;
 		else
 			prec->th_min = params[i].th_min;
 		if (params == NULL || params[i].th_max == 0)
 			prec->th_max = default_rio_params[i].th_max;
 		else
 			prec->th_max = params[i].th_max;
 
 		/*
 		 * th_min_s and th_max_s are scaled versions of th_min
 		 * and th_max to be compared with avg.
 		 */
 		prec->th_min_s = prec->th_min << (rp->rio_wshift + FP_SHIFT);
 		prec->th_max_s = prec->th_max << (rp->rio_wshift + FP_SHIFT);
 
 		/*
 		 * precompute probability denominator
 		 *  probd = (2 * (TH_MAX-TH_MIN) / pmax) in fixed-point
 		 */
 		prec->probd = (2 * (prec->th_max - prec->th_min)
 			       * prec->inv_pmax) << FP_SHIFT;
 
 		microtime(&prec->last);
 	}
 
 	return (rp);
 }
 
 void
 rio_destroy(rio_t *rp)
 {
 	wtab_destroy(rp->rio_wtab);
 	free(rp, M_DEVBUF);
 }
 
 void
 rio_getstats(rio_t *rp, struct redstats *sp)
 {
 	int	i;
 
 	for (i = 0; i < RIO_NDROPPREC; i++) {
 		bcopy(&rp->q_stats[i], sp, sizeof(struct redstats));
 		sp->q_avg = rp->rio_precstate[i].avg >> rp->rio_wshift;
 		sp++;
 	}
 }
 
 #if (RIO_NDROPPREC == 3)
 /*
  * internally, a drop precedence value is converted to an index
  * starting from 0.
  */
 static int
 dscp2index(u_int8_t dscp)
 {
 	int	dpindex = dscp & AF_DROPPRECMASK;
 
 	if (dpindex == 0)
 		return (0);
 	return ((dpindex >> 3) - 1);
 }
 #endif
 
 #if 1
 /*
  * kludge: when a packet is dequeued, we need to know its drop precedence
  * in order to keep the queue length of each drop precedence.
  * use m_pkthdr.rcvif to pass this info.
  */
 #define	RIOM_SET_PRECINDEX(m, idx)	\
 	do { (m)->m_pkthdr.rcvif = (void *)((long)(idx)); } while (0)
 #define	RIOM_GET_PRECINDEX(m)	\
 	({ long idx; idx = (long)((m)->m_pkthdr.rcvif); \
 	(m)->m_pkthdr.rcvif = NULL; idx; })
 #endif
 
 int
 rio_addq(rio_t *rp, class_queue_t *q, struct mbuf *m,
     struct altq_pktattr *pktattr)
 {
 	int			 avg, droptype;
 	u_int8_t		 dsfield, odsfield;
 	int			 dpindex, i, n, t;
 	struct timeval		 now;
 	struct dropprec_state	*prec;
 
 	dsfield = odsfield = read_dsfield(m, pktattr);
 	dpindex = dscp2index(dsfield);
 
 	/*
 	 * update avg of the precedence states whose drop precedence
 	 * is larger than or equal to the drop precedence of the packet
 	 */
 	now.tv_sec = 0;
 	for (i = dpindex; i < RIO_NDROPPREC; i++) {
 		prec = &rp->rio_precstate[i];
 		avg = prec->avg;
 		if (prec->idle) {
 			prec->idle = 0;
 			if (now.tv_sec == 0)
 				microtime(&now);
 			t = (now.tv_sec - prec->last.tv_sec);
 			if (t > 60)
 				avg = 0;
 			else {
 				t = t * 1000000 +
 					(now.tv_usec - prec->last.tv_usec);
 				n = t / rp->rio_pkttime;
 				/* calculate (avg = (1 - Wq)^n * avg) */
 				if (n > 0)
 					avg = (avg >> FP_SHIFT) *
 						pow_w(rp->rio_wtab, n);
 			}
 		}
 
 		/* run estimator. (avg is scaled by WEIGHT in fixed-point) */
 		avg += (prec->qlen << FP_SHIFT) - (avg >> rp->rio_wshift);
 		prec->avg = avg;		/* save the new value */
 		/*
 		 * count keeps a tally of arriving traffic that has not
 		 * been dropped.
 		 */
 		prec->count++;
 	}
 
 	prec = &rp->rio_precstate[dpindex];
 	avg = prec->avg;
 
 	/* see if we drop early */
 	droptype = DTYPE_NODROP;
 	if (avg >= prec->th_min_s && prec->qlen > 1) {
 		if (avg >= prec->th_max_s) {
 			/* avg >= th_max: forced drop */
 			droptype = DTYPE_FORCED;
 		} else if (prec->old == 0) {
 			/* first exceeds th_min */
 			prec->count = 1;
 			prec->old = 1;
 		} else if (drop_early((avg - prec->th_min_s) >> rp->rio_wshift,
 				      prec->probd, prec->count)) {
 			/* unforced drop by red */
 			droptype = DTYPE_EARLY;
 		}
 	} else {
 		/* avg < th_min */
 		prec->old = 0;
 	}
 
 	/*
 	 * if the queue length hits the hard limit, it's a forced drop.
 	 */
 	if (droptype == DTYPE_NODROP && qlen(q) >= qlimit(q))
 		droptype = DTYPE_FORCED;
 
 	if (droptype != DTYPE_NODROP) {
 		/* always drop incoming packet (as opposed to randomdrop) */
 		for (i = dpindex; i < RIO_NDROPPREC; i++)
 			rp->rio_precstate[i].count = 0;
 #ifdef RIO_STATS
 		if (droptype == DTYPE_EARLY)
 			rp->q_stats[dpindex].drop_unforced++;
 		else
 			rp->q_stats[dpindex].drop_forced++;
 		PKTCNTR_ADD(&rp->q_stats[dpindex].drop_cnt, m_pktlen(m));
 #endif
 		m_freem(m);
 		return (-1);
 	}
 
 	for (i = dpindex; i < RIO_NDROPPREC; i++)
 		rp->rio_precstate[i].qlen++;
 
 	/* save drop precedence index in mbuf hdr */
 	RIOM_SET_PRECINDEX(m, dpindex);
 
 	if (rp->rio_flags & RIOF_CLEARDSCP)
 		dsfield &= ~DSCP_MASK;
 
 	if (dsfield != odsfield)
 		write_dsfield(m, pktattr, dsfield);
 
 	_addq(q, m);
 
 #ifdef RIO_STATS
 	PKTCNTR_ADD(&rp->q_stats[dpindex].xmit_cnt, m_pktlen(m));
 #endif
 	return (0);
 }
 
 struct mbuf *
 rio_getq(rio_t *rp, class_queue_t *q)
 {
 	struct mbuf	*m;
 	int		 dpindex, i;
 
 	if ((m = _getq(q)) == NULL)
 		return NULL;
 
 	dpindex = RIOM_GET_PRECINDEX(m);
 	for (i = dpindex; i < RIO_NDROPPREC; i++) {
 		if (--rp->rio_precstate[i].qlen == 0) {
 			if (rp->rio_precstate[i].idle == 0) {
 				rp->rio_precstate[i].idle = 1;
 				microtime(&rp->rio_precstate[i].last);
 			}
 		}
 	}
 	return (m);
 }
 
 #ifdef ALTQ3_COMPAT
 int
 rioopen(dev, flag, fmt, p)
 	dev_t dev;
 	int flag, fmt;
 #if (__FreeBSD_version > 500000)
 	struct thread *p;
 #else
 	struct proc *p;
 #endif
 {
 	/* everything will be done when the queueing scheme is attached. */
 	return 0;
 }
 
 int
 rioclose(dev, flag, fmt, p)
 	dev_t dev;
 	int flag, fmt;
 #if (__FreeBSD_version > 500000)
 	struct thread *p;
 #else
 	struct proc *p;
 #endif
 {
 	rio_queue_t *rqp;
 	int err, error = 0;
 
 	while ((rqp = rio_list) != NULL) {
 		/* destroy all */
 		err = rio_detach(rqp);
 		if (err != 0 && error == 0)
 			error = err;
 	}
 
 	return error;
 }
 
 int
 rioioctl(dev, cmd, addr, flag, p)
 	dev_t dev;
 	ioctlcmd_t cmd;
 	caddr_t addr;
 	int flag;
 #if (__FreeBSD_version > 500000)
 	struct thread *p;
 #else
 	struct proc *p;
 #endif
 {
 	rio_queue_t *rqp;
 	struct rio_interface *ifacep;
 	struct ifnet *ifp;
 	int	error = 0;
 
 	/* check super-user privilege */
 	switch (cmd) {
 	case RIO_GETSTATS:
 		break;
 	default:
 #if (__FreeBSD_version > 700000)
 		if ((error = priv_check(p, PRIV_ALTQ_MANAGE)) != 0)
 			return (error);
 #elsif (__FreeBSD_version > 400000)
 		if ((error = suser(p)) != 0)
 			return (error);
 #else
 		if ((error = suser(p->p_ucred, &p->p_acflag)) != 0)
 			return (error);
 #endif
 		break;
 	}
 
 	switch (cmd) {
 
 	case RIO_ENABLE:
 		ifacep = (struct rio_interface *)addr;
 		if ((rqp = altq_lookup(ifacep->rio_ifname, ALTQT_RIO)) == NULL) {
 			error = EBADF;
 			break;
 		}
 		error = altq_enable(rqp->rq_ifq);
 		break;
 
 	case RIO_DISABLE:
 		ifacep = (struct rio_interface *)addr;
 		if ((rqp = altq_lookup(ifacep->rio_ifname, ALTQT_RIO)) == NULL) {
 			error = EBADF;
 			break;
 		}
 		error = altq_disable(rqp->rq_ifq);
 		break;
 
 	case RIO_IF_ATTACH:
 		ifp = ifunit(((struct rio_interface *)addr)->rio_ifname);
 		if (ifp == NULL) {
 			error = ENXIO;
 			break;
 		}
 
 		/* allocate and initialize rio_queue_t */
 		rqp = malloc(sizeof(rio_queue_t), M_DEVBUF, M_WAITOK);
 		if (rqp == NULL) {
 			error = ENOMEM;
 			break;
 		}
 		bzero(rqp, sizeof(rio_queue_t));
 
 		rqp->rq_q = malloc(sizeof(class_queue_t),
 		       M_DEVBUF, M_WAITOK);
 		if (rqp->rq_q == NULL) {
 			free(rqp, M_DEVBUF);
 			error = ENOMEM;
 			break;
 		}
 		bzero(rqp->rq_q, sizeof(class_queue_t));
 
 		rqp->rq_rio = rio_alloc(0, NULL, 0, 0);
 		if (rqp->rq_rio == NULL) {
 			free(rqp->rq_q, M_DEVBUF);
 			free(rqp, M_DEVBUF);
 			error = ENOMEM;
 			break;
 		}
 
 		rqp->rq_ifq = &ifp->if_snd;
 		qtail(rqp->rq_q) = NULL;
 		qlen(rqp->rq_q) = 0;
 		qlimit(rqp->rq_q) = RIO_LIMIT;
 		qtype(rqp->rq_q) = Q_RIO;
 
 		/*
 		 * set RIO to this ifnet structure.
 		 */
 		error = altq_attach(rqp->rq_ifq, ALTQT_RIO, rqp,
 				    rio_enqueue, rio_dequeue, rio_request,
 				    NULL, NULL);
 		if (error) {
 			rio_destroy(rqp->rq_rio);
 			free(rqp->rq_q, M_DEVBUF);
 			free(rqp, M_DEVBUF);
 			break;
 		}
 
 		/* add this state to the rio list */
 		rqp->rq_next = rio_list;
 		rio_list = rqp;
 		break;
 
 	case RIO_IF_DETACH:
 		ifacep = (struct rio_interface *)addr;
 		if ((rqp = altq_lookup(ifacep->rio_ifname, ALTQT_RIO)) == NULL) {
 			error = EBADF;
 			break;
 		}
 		error = rio_detach(rqp);
 		break;
 
 	case RIO_GETSTATS:
 		do {
 			struct rio_stats *q_stats;
 			rio_t *rp;
 			int i;
 
 			q_stats = (struct rio_stats *)addr;
 			if ((rqp = altq_lookup(q_stats->iface.rio_ifname,
 					       ALTQT_RIO)) == NULL) {
 				error = EBADF;
 				break;
 			}
 
 			rp = rqp->rq_rio;
 
 			q_stats->q_limit = qlimit(rqp->rq_q);
 			q_stats->weight	= rp->rio_weight;
 			q_stats->flags = rp->rio_flags;
 
 			for (i = 0; i < RIO_NDROPPREC; i++) {
 				q_stats->q_len[i] = rp->rio_precstate[i].qlen;
 				bcopy(&rp->q_stats[i], &q_stats->q_stats[i],
 				      sizeof(struct redstats));
 				q_stats->q_stats[i].q_avg =
 				    rp->rio_precstate[i].avg >> rp->rio_wshift;
 
 				q_stats->q_params[i].inv_pmax
 					= rp->rio_precstate[i].inv_pmax;
 				q_stats->q_params[i].th_min
 					= rp->rio_precstate[i].th_min;
 				q_stats->q_params[i].th_max
 					= rp->rio_precstate[i].th_max;
 			}
 		} while (/*CONSTCOND*/ 0);
 		break;
 
 	case RIO_CONFIG:
 		do {
 			struct rio_conf *fc;
 			rio_t	*new;
 			int s, limit, i;
 
 			fc = (struct rio_conf *)addr;
 			if ((rqp = altq_lookup(fc->iface.rio_ifname,
 					       ALTQT_RIO)) == NULL) {
 				error = EBADF;
 				break;
 			}
 
 			new = rio_alloc(fc->rio_weight, &fc->q_params[0],
 					fc->rio_flags, fc->rio_pkttime);
 			if (new == NULL) {
 				error = ENOMEM;
 				break;
 			}
 
 #ifdef __NetBSD__
 			s = splnet();
 #else
 			s = splimp();
 #endif
 			_flushq(rqp->rq_q);
 			limit = fc->rio_limit;
 			if (limit < fc->q_params[RIO_NDROPPREC-1].th_max)
 				limit = fc->q_params[RIO_NDROPPREC-1].th_max;
 			qlimit(rqp->rq_q) = limit;
 
 			rio_destroy(rqp->rq_rio);
 			rqp->rq_rio = new;
 
 			splx(s);
 
 			/* write back new values */
 			fc->rio_limit = limit;
 			for (i = 0; i < RIO_NDROPPREC; i++) {
 				fc->q_params[i].inv_pmax =
 					rqp->rq_rio->rio_precstate[i].inv_pmax;
 				fc->q_params[i].th_min =
 					rqp->rq_rio->rio_precstate[i].th_min;
 				fc->q_params[i].th_max =
 					rqp->rq_rio->rio_precstate[i].th_max;
 			}
 		} while (/*CONSTCOND*/ 0);
 		break;
 
 	case RIO_SETDEFAULTS:
 		do {
 			struct redparams *rp;
 			int i;
 
 			rp = (struct redparams *)addr;
 			for (i = 0; i < RIO_NDROPPREC; i++)
 				default_rio_params[i] = rp[i];
 		} while (/*CONSTCOND*/ 0);
 		break;
 
 	default:
 		error = EINVAL;
 		break;
 	}
 
 	return error;
 }
 
 static int
 rio_detach(rqp)
 	rio_queue_t *rqp;
 {
 	rio_queue_t *tmp;
 	int error = 0;
 
 	if (ALTQ_IS_ENABLED(rqp->rq_ifq))
 		altq_disable(rqp->rq_ifq);
 
 	if ((error = altq_detach(rqp->rq_ifq)))
 		return (error);
 
 	if (rio_list == rqp)
 		rio_list = rqp->rq_next;
 	else {
 		for (tmp = rio_list; tmp != NULL; tmp = tmp->rq_next)
 			if (tmp->rq_next == rqp) {
 				tmp->rq_next = rqp->rq_next;
 				break;
 			}
 		if (tmp == NULL)
 			printf("rio_detach: no state found in rio_list!\n");
 	}
 
 	rio_destroy(rqp->rq_rio);
 	free(rqp->rq_q, M_DEVBUF);
 	free(rqp, M_DEVBUF);
 	return (error);
 }
 
 /*
  * rio support routines
  */
 static int
 rio_request(ifq, req, arg)
 	struct ifaltq *ifq;
 	int req;
 	void *arg;
 {
 	rio_queue_t *rqp = (rio_queue_t *)ifq->altq_disc;
 
 	IFQ_LOCK_ASSERT(ifq);
 
 	switch (req) {
 	case ALTRQ_PURGE:
 		_flushq(rqp->rq_q);
 		if (ALTQ_IS_ENABLED(ifq))
 			ifq->ifq_len = 0;
 		break;
 	}
 	return (0);
 }
 
 /*
  * enqueue routine:
  *
  *	returns: 0 when successfully queued.
  *		 ENOBUFS when drop occurs.
  */
 static int
 rio_enqueue(ifq, m, pktattr)
 	struct ifaltq *ifq;
 	struct mbuf *m;
 	struct altq_pktattr *pktattr;
 {
 	rio_queue_t *rqp = (rio_queue_t *)ifq->altq_disc;
 	int error = 0;
 
 	IFQ_LOCK_ASSERT(ifq);
 
 	if (rio_addq(rqp->rq_rio, rqp->rq_q, m, pktattr) == 0)
 		ifq->ifq_len++;
 	else
 		error = ENOBUFS;
 	return error;
 }
 
 /*
  * dequeue routine:
  *	must be called in splimp.
  *
  *	returns: mbuf dequeued.
  *		 NULL when no packet is available in the queue.
  */
 
 static struct mbuf *
 rio_dequeue(ifq, op)
 	struct ifaltq *ifq;
 	int op;
 {
 	rio_queue_t *rqp = (rio_queue_t *)ifq->altq_disc;
 	struct mbuf *m = NULL;
 
 	IFQ_LOCK_ASSERT(ifq);
 
 	if (op == ALTDQ_POLL)
 		return qhead(rqp->rq_q);
 
 	m = rio_getq(rqp->rq_rio, rqp->rq_q);
 	if (m != NULL)
 		ifq->ifq_len--;
 	return m;
 }
 
 #ifdef KLD_MODULE
 
 static struct altqsw rio_sw =
 	{"rio", rioopen, rioclose, rioioctl};
 
 ALTQ_MODULE(altq_rio, ALTQT_RIO, &rio_sw);
 MODULE_VERSION(altq_rio, 1);
 MODULE_DEPEND(altq_rio, altq_red, 1, 1, 1);
 
 #endif /* KLD_MODULE */
 #endif /* ALTQ3_COMPAT */
 
 #endif /* ALTQ_RIO */
Index: stable/10/sys/contrib/altq/altq/altq_rmclass.c
===================================================================
--- stable/10/sys/contrib/altq/altq/altq_rmclass.c	(revision 263085)
+++ stable/10/sys/contrib/altq/altq/altq_rmclass.c	(revision 263086)
@@ -1,1834 +1,1836 @@
 /*	$FreeBSD$	*/
 /*	$KAME: altq_rmclass.c,v 1.19 2005/04/13 03:44:25 suz Exp $	*/
 
 /*
  * Copyright (c) 1991-1997 Regents of the University of California.
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 3. All advertising materials mentioning features or use of this software
  *    must display the following acknowledgement:
  *      This product includes software developed by the Network Research
  *      Group at Lawrence Berkeley Laboratory.
  * 4. Neither the name of the University nor of the Laboratory may be used
  *    to endorse or promote products derived from this software without
  *    specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  * LBL code modified by speer@eng.sun.com, May 1977.
  * For questions and/or comments, please send mail to cbq@ee.lbl.gov
  *
  * @(#)rm_class.c  1.48     97/12/05 SMI
  */
 #if defined(__FreeBSD__) || defined(__NetBSD__)
 #include "opt_altq.h"
 #include "opt_inet.h"
 #ifdef __FreeBSD__
 #include "opt_inet6.h"
 #endif
 #endif /* __FreeBSD__ || __NetBSD__ */
 #ifdef ALTQ_CBQ	/* cbq is enabled by ALTQ_CBQ option in opt_altq.h */
 
 #include <sys/param.h>
 #include <sys/malloc.h>
 #include <sys/mbuf.h>
 #include <sys/socket.h>
 #include <sys/systm.h>
 #include <sys/errno.h>
 #include <sys/time.h>
 #ifdef ALTQ3_COMPAT
 #include <sys/kernel.h>
 #endif
 
 #include <net/if.h>
+#include <net/if_var.h>
 #ifdef ALTQ3_COMPAT
 #include <netinet/in.h>
 #include <netinet/in_systm.h>
 #include <netinet/ip.h>
 #endif
 
+#include <altq/if_altq.h>
 #include <altq/altq.h>
 #include <altq/altq_rmclass.h>
 #include <altq/altq_rmclass_debug.h>
 #include <altq/altq_red.h>
 #include <altq/altq_rio.h>
 
 /*
  * Local Macros
  */
 
 #define	reset_cutoff(ifd)	{ ifd->cutoff_ = RM_MAXDEPTH; }
 
 /*
  * Local routines.
  */
 
 static int	rmc_satisfied(struct rm_class *, struct timeval *);
 static void	rmc_wrr_set_weights(struct rm_ifdat *);
 static void	rmc_depth_compute(struct rm_class *);
 static void	rmc_depth_recompute(rm_class_t *);
 
 static mbuf_t	*_rmc_wrr_dequeue_next(struct rm_ifdat *, int);
 static mbuf_t	*_rmc_prr_dequeue_next(struct rm_ifdat *, int);
 
 static int	_rmc_addq(rm_class_t *, mbuf_t *);
 static void	_rmc_dropq(rm_class_t *);
 static mbuf_t	*_rmc_getq(rm_class_t *);
 static mbuf_t	*_rmc_pollq(rm_class_t *);
 
 static int	rmc_under_limit(struct rm_class *, struct timeval *);
 static void	rmc_tl_satisfied(struct rm_ifdat *, struct timeval *);
 static void	rmc_drop_action(struct rm_class *);
 static void	rmc_restart(struct rm_class *);
 static void	rmc_root_overlimit(struct rm_class *, struct rm_class *);
 
 #define	BORROW_OFFTIME
 /*
  * BORROW_OFFTIME (experimental):
  * borrow the offtime of the class borrowing from.
  * the reason is that when its own offtime is set, the class is unable
  * to borrow much, especially when cutoff is taking effect.
  * but when the borrowed class is overloaded (advidle is close to minidle),
  * use the borrowing class's offtime to avoid overload.
  */
 #define	ADJUST_CUTOFF
 /*
  * ADJUST_CUTOFF (experimental):
  * if no underlimit class is found due to cutoff, increase cutoff and
  * retry the scheduling loop.
  * also, don't invoke delay_actions while cutoff is taking effect,
  * since a sleeping class won't have a chance to be scheduled in the
  * next loop.
  *
  * now heuristics for setting the top-level variable (cutoff_) becomes:
  *	1. if a packet arrives for a not-overlimit class, set cutoff
  *	   to the depth of the class.
  *	2. if cutoff is i, and a packet arrives for an overlimit class
  *	   with an underlimit ancestor at a lower level than i (say j),
  *	   then set cutoff to j.
  *	3. at scheduling a packet, if there is no underlimit class
  *	   due to the current cutoff level, increase cutoff by 1 and
  *	   then try to schedule again.
  */
 
 /*
  * rm_class_t *
  * rmc_newclass(...) - Create a new resource management class at priority
  * 'pri' on the interface given by 'ifd'.
  *
  * nsecPerByte  is the data rate of the interface in nanoseconds/byte.
  *              E.g., 800 for a 10Mb/s ethernet.  If the class gets less
  *              than 100% of the bandwidth, this number should be the
  *              'effective' rate for the class.  Let f be the
  *              bandwidth fraction allocated to this class, and let
  *              nsPerByte be the data rate of the output link in
  *              nanoseconds/byte.  Then nsecPerByte is set to
  *              nsPerByte / f.  E.g., 1600 (= 800 / .5)
  *              for a class that gets 50% of an ethernet's bandwidth.
  *
  * action       the routine to call when the class is over limit.
  *
  * maxq         max allowable queue size for class (in packets).
  *
  * parent       parent class pointer.
  *
  * borrow       class to borrow from (should be either 'parent' or null).
  *
  * maxidle      max value allowed for class 'idle' time estimate (this
  *              parameter determines how large an initial burst of packets
  *              can be before overlimit action is invoked.
  *
  * offtime      how long 'delay' action will delay when class goes over
  *              limit (this parameter determines the steady-state burst
  *              size when a class is running over its limit).
  *
  * Maxidle and offtime have to be computed from the following:  If the
  * average packet size is s, the bandwidth fraction allocated to this
  * class is f, we want to allow b packet bursts, and the gain of the
  * averaging filter is g (= 1 - 2^(-RM_FILTER_GAIN)), then:
  *
  *   ptime = s * nsPerByte * (1 - f) / f
  *   maxidle = ptime * (1 - g^b) / g^b
  *   minidle = -ptime * (1 / (f - 1))
  *   offtime = ptime * (1 + 1/(1 - g) * (1 - g^(b - 1)) / g^(b - 1)
  *
  * Operationally, it's convenient to specify maxidle & offtime in units
  * independent of the link bandwidth so the maxidle & offtime passed to
  * this routine are the above values multiplied by 8*f/(1000*nsPerByte).
  * (The constant factor is a scale factor needed to make the parameters
  * integers.  This scaling also means that the 'unscaled' values of
  * maxidle*nsecPerByte/8 and offtime*nsecPerByte/8 will be in microseconds,
  * not nanoseconds.)  Also note that the 'idle' filter computation keeps
  * an estimate scaled upward by 2^RM_FILTER_GAIN so the passed value of
  * maxidle also must be scaled upward by this value.  Thus, the passed
  * values for maxidle and offtime can be computed as follows:
  *
  * maxidle = maxidle * 2^RM_FILTER_GAIN * 8 / (1000 * nsecPerByte)
  * offtime = offtime * 8 / (1000 * nsecPerByte)
  *
  * When USE_HRTIME is employed, then maxidle and offtime become:
  * 	maxidle = maxilde * (8.0 / nsecPerByte);
  * 	offtime = offtime * (8.0 / nsecPerByte);
  */
 struct rm_class *
 rmc_newclass(int pri, struct rm_ifdat *ifd, u_int nsecPerByte,
     void (*action)(rm_class_t *, rm_class_t *), int maxq,
     struct rm_class *parent, struct rm_class *borrow, u_int maxidle,
     int minidle, u_int offtime, int pktsize, int flags)
 {
 	struct rm_class	*cl;
 	struct rm_class	*peer;
 	int		 s;
 
 	if (pri >= RM_MAXPRIO)
 		return (NULL);
 #ifndef ALTQ_RED
 	if (flags & RMCF_RED) {
 #ifdef ALTQ_DEBUG
 		printf("rmc_newclass: RED not configured for CBQ!\n");
 #endif
 		return (NULL);
 	}
 #endif
 #ifndef ALTQ_RIO
 	if (flags & RMCF_RIO) {
 #ifdef ALTQ_DEBUG
 		printf("rmc_newclass: RIO not configured for CBQ!\n");
 #endif
 		return (NULL);
 	}
 #endif
 
 	cl = malloc(sizeof(struct rm_class), M_DEVBUF, M_NOWAIT | M_ZERO);
 	if (cl == NULL)
 		return (NULL);
 	CALLOUT_INIT(&cl->callout_);
 	cl->q_ = malloc(sizeof(class_queue_t), M_DEVBUF, M_NOWAIT | M_ZERO);
 	if (cl->q_ == NULL) {
 		free(cl, M_DEVBUF);
 		return (NULL);
 	}
 
 	/*
 	 * Class initialization.
 	 */
 	cl->children_ = NULL;
 	cl->parent_ = parent;
 	cl->borrow_ = borrow;
 	cl->leaf_ = 1;
 	cl->ifdat_ = ifd;
 	cl->pri_ = pri;
 	cl->allotment_ = RM_NS_PER_SEC / nsecPerByte; /* Bytes per sec */
 	cl->depth_ = 0;
 	cl->qthresh_ = 0;
 	cl->ns_per_byte_ = nsecPerByte;
 
 	qlimit(cl->q_) = maxq;
 	qtype(cl->q_) = Q_DROPHEAD;
 	qlen(cl->q_) = 0;
 	cl->flags_ = flags;
 
 #if 1 /* minidle is also scaled in ALTQ */
 	cl->minidle_ = (minidle * (int)nsecPerByte) / 8;
 	if (cl->minidle_ > 0)
 		cl->minidle_ = 0;
 #else
 	cl->minidle_ = minidle;
 #endif
 	cl->maxidle_ = (maxidle * nsecPerByte) / 8;
 	if (cl->maxidle_ == 0)
 		cl->maxidle_ = 1;
 #if 1 /* offtime is also scaled in ALTQ */
 	cl->avgidle_ = cl->maxidle_;
 	cl->offtime_ = ((offtime * nsecPerByte) / 8) >> RM_FILTER_GAIN;
 	if (cl->offtime_ == 0)
 		cl->offtime_ = 1;
 #else
 	cl->avgidle_ = 0;
 	cl->offtime_ = (offtime * nsecPerByte) / 8;
 #endif
 	cl->overlimit = action;
 
 #ifdef ALTQ_RED
 	if (flags & (RMCF_RED|RMCF_RIO)) {
 		int red_flags, red_pkttime;
 
 		red_flags = 0;
 		if (flags & RMCF_ECN)
 			red_flags |= REDF_ECN;
 		if (flags & RMCF_FLOWVALVE)
 			red_flags |= REDF_FLOWVALVE;
 #ifdef ALTQ_RIO
 		if (flags & RMCF_CLEARDSCP)
 			red_flags |= RIOF_CLEARDSCP;
 #endif
 		red_pkttime = nsecPerByte * pktsize  / 1000;
 
 		if (flags & RMCF_RED) {
 			cl->red_ = red_alloc(0, 0,
 			    qlimit(cl->q_) * 10/100,
 			    qlimit(cl->q_) * 30/100,
 			    red_flags, red_pkttime);
 			if (cl->red_ != NULL)
 				qtype(cl->q_) = Q_RED;
 		}
 #ifdef ALTQ_RIO
 		else {
 			cl->red_ = (red_t *)rio_alloc(0, NULL,
 						      red_flags, red_pkttime);
 			if (cl->red_ != NULL)
 				qtype(cl->q_) = Q_RIO;
 		}
 #endif
 	}
 #endif /* ALTQ_RED */
 
 	/*
 	 * put the class into the class tree
 	 */
 #ifdef __NetBSD__
 	s = splnet();
 #else
 	s = splimp();
 #endif
 	IFQ_LOCK(ifd->ifq_);
 	if ((peer = ifd->active_[pri]) != NULL) {
 		/* find the last class at this pri */
 		cl->peer_ = peer;
 		while (peer->peer_ != ifd->active_[pri])
 			peer = peer->peer_;
 		peer->peer_ = cl;
 	} else {
 		ifd->active_[pri] = cl;
 		cl->peer_ = cl;
 	}
 
 	if (cl->parent_) {
 		cl->next_ = parent->children_;
 		parent->children_ = cl;
 		parent->leaf_ = 0;
 	}
 
 	/*
 	 * Compute the depth of this class and its ancestors in the class
 	 * hierarchy.
 	 */
 	rmc_depth_compute(cl);
 
 	/*
 	 * If CBQ's WRR is enabled, then initialize the class WRR state.
 	 */
 	if (ifd->wrr_) {
 		ifd->num_[pri]++;
 		ifd->alloc_[pri] += cl->allotment_;
 		rmc_wrr_set_weights(ifd);
 	}
 	IFQ_UNLOCK(ifd->ifq_);
 	splx(s);
 	return (cl);
 }
 
 int
 rmc_modclass(struct rm_class *cl, u_int nsecPerByte, int maxq, u_int maxidle,
     int minidle, u_int offtime, int pktsize)
 {
 	struct rm_ifdat	*ifd;
 	u_int		 old_allotment;
 	int		 s;
 
 	ifd = cl->ifdat_;
 	old_allotment = cl->allotment_;
 
 #ifdef __NetBSD__
 	s = splnet();
 #else
 	s = splimp();
 #endif
 	IFQ_LOCK(ifd->ifq_);
 	cl->allotment_ = RM_NS_PER_SEC / nsecPerByte; /* Bytes per sec */
 	cl->qthresh_ = 0;
 	cl->ns_per_byte_ = nsecPerByte;
 
 	qlimit(cl->q_) = maxq;
 
 #if 1 /* minidle is also scaled in ALTQ */
 	cl->minidle_ = (minidle * nsecPerByte) / 8;
 	if (cl->minidle_ > 0)
 		cl->minidle_ = 0;
 #else
 	cl->minidle_ = minidle;
 #endif
 	cl->maxidle_ = (maxidle * nsecPerByte) / 8;
 	if (cl->maxidle_ == 0)
 		cl->maxidle_ = 1;
 #if 1 /* offtime is also scaled in ALTQ */
 	cl->avgidle_ = cl->maxidle_;
 	cl->offtime_ = ((offtime * nsecPerByte) / 8) >> RM_FILTER_GAIN;
 	if (cl->offtime_ == 0)
 		cl->offtime_ = 1;
 #else
 	cl->avgidle_ = 0;
 	cl->offtime_ = (offtime * nsecPerByte) / 8;
 #endif
 
 	/*
 	 * If CBQ's WRR is enabled, then initialize the class WRR state.
 	 */
 	if (ifd->wrr_) {
 		ifd->alloc_[cl->pri_] += cl->allotment_ - old_allotment;
 		rmc_wrr_set_weights(ifd);
 	}
 	IFQ_UNLOCK(ifd->ifq_);
 	splx(s);
 	return (0);
 }
 
 /*
  * static void
  * rmc_wrr_set_weights(struct rm_ifdat *ifdat) - This function computes
  *	the appropriate run robin weights for the CBQ weighted round robin
  *	algorithm.
  *
  *	Returns: NONE
  */
 
 static void
 rmc_wrr_set_weights(struct rm_ifdat *ifd)
 {
 	int		i;
 	struct rm_class	*cl, *clh;
 
 	for (i = 0; i < RM_MAXPRIO; i++) {
 		/*
 		 * This is inverted from that of the simulator to
 		 * maintain precision.
 		 */
 		if (ifd->num_[i] == 0)
 			ifd->M_[i] = 0;
 		else
 			ifd->M_[i] = ifd->alloc_[i] /
 				(ifd->num_[i] * ifd->maxpkt_);
 		/*
 		 * Compute the weighted allotment for each class.
 		 * This takes the expensive div instruction out
 		 * of the main loop for the wrr scheduling path.
 		 * These only get recomputed when a class comes or
 		 * goes.
 		 */
 		if (ifd->active_[i] != NULL) {
 			clh = cl = ifd->active_[i];
 			do {
 				/* safe-guard for slow link or alloc_ == 0 */
 				if (ifd->M_[i] == 0)
 					cl->w_allotment_ = 0;
 				else
 					cl->w_allotment_ = cl->allotment_ /
 						ifd->M_[i];
 				cl = cl->peer_;
 			} while ((cl != NULL) && (cl != clh));
 		}
 	}
 }
 
 int
 rmc_get_weight(struct rm_ifdat *ifd, int pri)
 {
 	if ((pri >= 0) && (pri < RM_MAXPRIO))
 		return (ifd->M_[pri]);
 	else
 		return (0);
 }
 
 /*
  * static void
  * rmc_depth_compute(struct rm_class *cl) - This function computes the
  *	appropriate depth of class 'cl' and its ancestors.
  *
  *	Returns:	NONE
  */
 
 static void
 rmc_depth_compute(struct rm_class *cl)
 {
 	rm_class_t	*t = cl, *p;
 
 	/*
 	 * Recompute the depth for the branch of the tree.
 	 */
 	while (t != NULL) {
 		p = t->parent_;
 		if (p && (t->depth_ >= p->depth_)) {
 			p->depth_ = t->depth_ + 1;
 			t = p;
 		} else
 			t = NULL;
 	}
 }
 
 /*
  * static void
  * rmc_depth_recompute(struct rm_class *cl) - This function re-computes
  *	the depth of the tree after a class has been deleted.
  *
  *	Returns:	NONE
  */
 
 static void
 rmc_depth_recompute(rm_class_t *cl)
 {
 #if 1 /* ALTQ */
 	rm_class_t	*p, *t;
 
 	p = cl;
 	while (p != NULL) {
 		if ((t = p->children_) == NULL) {
 			p->depth_ = 0;
 		} else {
 			int cdepth = 0;
 
 			while (t != NULL) {
 				if (t->depth_ > cdepth)
 					cdepth = t->depth_;
 				t = t->next_;
 			}
 
 			if (p->depth_ == cdepth + 1)
 				/* no change to this parent */
 				return;
 
 			p->depth_ = cdepth + 1;
 		}
 
 		p = p->parent_;
 	}
 #else
 	rm_class_t	*t;
 
 	if (cl->depth_ >= 1) {
 		if (cl->children_ == NULL) {
 			cl->depth_ = 0;
 		} else if ((t = cl->children_) != NULL) {
 			while (t != NULL) {
 				if (t->children_ != NULL)
 					rmc_depth_recompute(t);
 				t = t->next_;
 			}
 		} else
 			rmc_depth_compute(cl);
 	}
 #endif
 }
 
 /*
  * void
  * rmc_delete_class(struct rm_ifdat *ifdat, struct rm_class *cl) - This
  *	function deletes a class from the link-sharing structure and frees
  *	all resources associated with the class.
  *
  *	Returns: NONE
  */
 
 void
 rmc_delete_class(struct rm_ifdat *ifd, struct rm_class *cl)
 {
 	struct rm_class	*p, *head, *previous;
 	int		 s;
 
 	ASSERT(cl->children_ == NULL);
 
 	if (cl->sleeping_)
 		CALLOUT_STOP(&cl->callout_);
 
 #ifdef __NetBSD__
 	s = splnet();
 #else
 	s = splimp();
 #endif
 	IFQ_LOCK(ifd->ifq_);
 	/*
 	 * Free packets in the packet queue.
 	 * XXX - this may not be a desired behavior.  Packets should be
 	 *		re-queued.
 	 */
 	rmc_dropall(cl);
 
 	/*
 	 * If the class has a parent, then remove the class from the
 	 * class from the parent's children chain.
 	 */
 	if (cl->parent_ != NULL) {
 		head = cl->parent_->children_;
 		p = previous = head;
 		if (head->next_ == NULL) {
 			ASSERT(head == cl);
 			cl->parent_->children_ = NULL;
 			cl->parent_->leaf_ = 1;
 		} else while (p != NULL) {
 			if (p == cl) {
 				if (cl == head)
 					cl->parent_->children_ = cl->next_;
 				else
 					previous->next_ = cl->next_;
 				cl->next_ = NULL;
 				p = NULL;
 			} else {
 				previous = p;
 				p = p->next_;
 			}
 		}
 	}
 
 	/*
 	 * Delete class from class priority peer list.
 	 */
 	if ((p = ifd->active_[cl->pri_]) != NULL) {
 		/*
 		 * If there is more than one member of this priority
 		 * level, then look for class(cl) in the priority level.
 		 */
 		if (p != p->peer_) {
 			while (p->peer_ != cl)
 				p = p->peer_;
 			p->peer_ = cl->peer_;
 
 			if (ifd->active_[cl->pri_] == cl)
 				ifd->active_[cl->pri_] = cl->peer_;
 		} else {
 			ASSERT(p == cl);
 			ifd->active_[cl->pri_] = NULL;
 		}
 	}
 
 	/*
 	 * Recompute the WRR weights.
 	 */
 	if (ifd->wrr_) {
 		ifd->alloc_[cl->pri_] -= cl->allotment_;
 		ifd->num_[cl->pri_]--;
 		rmc_wrr_set_weights(ifd);
 	}
 
 	/*
 	 * Re-compute the depth of the tree.
 	 */
 #if 1 /* ALTQ */
 	rmc_depth_recompute(cl->parent_);
 #else
 	rmc_depth_recompute(ifd->root_);
 #endif
 
 	IFQ_UNLOCK(ifd->ifq_);
 	splx(s);
 
 	/*
 	 * Free the class structure.
 	 */
 	if (cl->red_ != NULL) {
 #ifdef ALTQ_RIO
 		if (q_is_rio(cl->q_))
 			rio_destroy((rio_t *)cl->red_);
 #endif
 #ifdef ALTQ_RED
 		if (q_is_red(cl->q_))
 			red_destroy(cl->red_);
 #endif
 	}
 	free(cl->q_, M_DEVBUF);
 	free(cl, M_DEVBUF);
 }
 
 
 /*
  * void
  * rmc_init(...) - Initialize the resource management data structures
  *	associated with the output portion of interface 'ifp'.  'ifd' is
  *	where the structures will be built (for backwards compatibility, the
  *	structures aren't kept in the ifnet struct).  'nsecPerByte'
  *	gives the link speed (inverse of bandwidth) in nanoseconds/byte.
  *	'restart' is the driver-specific routine that the generic 'delay
  *	until under limit' action will call to restart output.  `maxq'
  *	is the queue size of the 'link' & 'default' classes.  'maxqueued'
  *	is the maximum number of packets that the resource management
  *	code will allow to be queued 'downstream' (this is typically 1).
  *
  *	Returns:	NONE
  */
 
 void
 rmc_init(struct ifaltq *ifq, struct rm_ifdat *ifd, u_int nsecPerByte,
     void (*restart)(struct ifaltq *), int maxq, int maxqueued, u_int maxidle,
     int minidle, u_int offtime, int flags)
 {
 	int		i, mtu;
 
 	/*
 	 * Initialize the CBQ tracing/debug facility.
 	 */
 	CBQTRACEINIT();
 
 	bzero((char *)ifd, sizeof (*ifd));
 	mtu = ifq->altq_ifp->if_mtu;
 	ifd->ifq_ = ifq;
 	ifd->restart = restart;
 	ifd->maxqueued_ = maxqueued;
 	ifd->ns_per_byte_ = nsecPerByte;
 	ifd->maxpkt_ = mtu;
 	ifd->wrr_ = (flags & RMCF_WRR) ? 1 : 0;
 	ifd->efficient_ = (flags & RMCF_EFFICIENT) ? 1 : 0;
 #if 1
 	ifd->maxiftime_ = mtu * nsecPerByte / 1000 * 16;
 	if (mtu * nsecPerByte > 10 * 1000000)
 		ifd->maxiftime_ /= 4;
 #endif
 
 	reset_cutoff(ifd);
 	CBQTRACE(rmc_init, 'INIT', ifd->cutoff_);
 
 	/*
 	 * Initialize the CBQ's WRR state.
 	 */
 	for (i = 0; i < RM_MAXPRIO; i++) {
 		ifd->alloc_[i] = 0;
 		ifd->M_[i] = 0;
 		ifd->num_[i] = 0;
 		ifd->na_[i] = 0;
 		ifd->active_[i] = NULL;
 	}
 
 	/*
 	 * Initialize current packet state.
 	 */
 	ifd->qi_ = 0;
 	ifd->qo_ = 0;
 	for (i = 0; i < RM_MAXQUEUED; i++) {
 		ifd->class_[i] = NULL;
 		ifd->curlen_[i] = 0;
 		ifd->borrowed_[i] = NULL;
 	}
 
 	/*
 	 * Create the root class of the link-sharing structure.
 	 */
 	if ((ifd->root_ = rmc_newclass(0, ifd,
 				       nsecPerByte,
 				       rmc_root_overlimit, maxq, 0, 0,
 				       maxidle, minidle, offtime,
 				       0, 0)) == NULL) {
 		printf("rmc_init: root class not allocated\n");
 		return ;
 	}
 	ifd->root_->depth_ = 0;
 }
 
 /*
  * void
  * rmc_queue_packet(struct rm_class *cl, mbuf_t *m) - Add packet given by
  *	mbuf 'm' to queue for resource class 'cl'.  This routine is called
  *	by a driver's if_output routine.  This routine must be called with
  *	output packet completion interrupts locked out (to avoid racing with
  *	rmc_dequeue_next).
  *
  *	Returns:	0 on successful queueing
  *			-1 when packet drop occurs
  */
 int
 rmc_queue_packet(struct rm_class *cl, mbuf_t *m)
 {
 	struct timeval	 now;
 	struct rm_ifdat *ifd = cl->ifdat_;
 	int		 cpri = cl->pri_;
 	int		 is_empty = qempty(cl->q_);
 
 	RM_GETTIME(now);
 	if (ifd->cutoff_ > 0) {
 		if (TV_LT(&cl->undertime_, &now)) {
 			if (ifd->cutoff_ > cl->depth_)
 				ifd->cutoff_ = cl->depth_;
 			CBQTRACE(rmc_queue_packet, 'ffoc', cl->depth_);
 		}
 #if 1 /* ALTQ */
 		else {
 			/*
 			 * the class is overlimit. if the class has
 			 * underlimit ancestors, set cutoff to the lowest
 			 * depth among them.
 			 */
 			struct rm_class *borrow = cl->borrow_;
 
 			while (borrow != NULL &&
 			       borrow->depth_ < ifd->cutoff_) {
 				if (TV_LT(&borrow->undertime_, &now)) {
 					ifd->cutoff_ = borrow->depth_;
 					CBQTRACE(rmc_queue_packet, 'ffob', ifd->cutoff_);
 					break;
 				}
 				borrow = borrow->borrow_;
 			}
 		}
 #else /* !ALTQ */
 		else if ((ifd->cutoff_ > 1) && cl->borrow_) {
 			if (TV_LT(&cl->borrow_->undertime_, &now)) {
 				ifd->cutoff_ = cl->borrow_->depth_;
 				CBQTRACE(rmc_queue_packet, 'ffob',
 					 cl->borrow_->depth_);
 			}
 		}
 #endif /* !ALTQ */
 	}
 
 	if (_rmc_addq(cl, m) < 0)
 		/* failed */
 		return (-1);
 
 	if (is_empty) {
 		CBQTRACE(rmc_queue_packet, 'ytpe', cl->stats_.handle);
 		ifd->na_[cpri]++;
 	}
 
 	if (qlen(cl->q_) > qlimit(cl->q_)) {
 		/* note: qlimit can be set to 0 or 1 */
 		rmc_drop_action(cl);
 		return (-1);
 	}
 	return (0);
 }
 
 /*
  * void
  * rmc_tl_satisfied(struct rm_ifdat *ifd, struct timeval *now) - Check all
  *	classes to see if there are satified.
  */
 
 static void
 rmc_tl_satisfied(struct rm_ifdat *ifd, struct timeval *now)
 {
 	int		 i;
 	rm_class_t	*p, *bp;
 
 	for (i = RM_MAXPRIO - 1; i >= 0; i--) {
 		if ((bp = ifd->active_[i]) != NULL) {
 			p = bp;
 			do {
 				if (!rmc_satisfied(p, now)) {
 					ifd->cutoff_ = p->depth_;
 					return;
 				}
 				p = p->peer_;
 			} while (p != bp);
 		}
 	}
 
 	reset_cutoff(ifd);
 }
 
 /*
  * rmc_satisfied - Return 1 of the class is satisfied.  O, otherwise.
  */
 
 static int
 rmc_satisfied(struct rm_class *cl, struct timeval *now)
 {
 	rm_class_t	*p;
 
 	if (cl == NULL)
 		return (1);
 	if (TV_LT(now, &cl->undertime_))
 		return (1);
 	if (cl->depth_ == 0) {
 		if (!cl->sleeping_ && (qlen(cl->q_) > cl->qthresh_))
 			return (0);
 		else
 			return (1);
 	}
 	if (cl->children_ != NULL) {
 		p = cl->children_;
 		while (p != NULL) {
 			if (!rmc_satisfied(p, now))
 				return (0);
 			p = p->next_;
 		}
 	}
 
 	return (1);
 }
 
 /*
  * Return 1 if class 'cl' is under limit or can borrow from a parent,
  * 0 if overlimit.  As a side-effect, this routine will invoke the
  * class overlimit action if the class if overlimit.
  */
 
 static int
 rmc_under_limit(struct rm_class *cl, struct timeval *now)
 {
 	rm_class_t	*p = cl;
 	rm_class_t	*top;
 	struct rm_ifdat	*ifd = cl->ifdat_;
 
 	ifd->borrowed_[ifd->qi_] = NULL;
 	/*
 	 * If cl is the root class, then always return that it is
 	 * underlimit.  Otherwise, check to see if the class is underlimit.
 	 */
 	if (cl->parent_ == NULL)
 		return (1);
 
 	if (cl->sleeping_) {
 		if (TV_LT(now, &cl->undertime_))
 			return (0);
 
 		CALLOUT_STOP(&cl->callout_);
 		cl->sleeping_ = 0;
 		cl->undertime_.tv_sec = 0;
 		return (1);
 	}
 
 	top = NULL;
 	while (cl->undertime_.tv_sec && TV_LT(now, &cl->undertime_)) {
 		if (((cl = cl->borrow_) == NULL) ||
 		    (cl->depth_ > ifd->cutoff_)) {
 #ifdef ADJUST_CUTOFF
 			if (cl != NULL)
 				/* cutoff is taking effect, just
 				   return false without calling
 				   the delay action. */
 				return (0);
 #endif
 #ifdef BORROW_OFFTIME
 			/*
 			 * check if the class can borrow offtime too.
 			 * borrow offtime from the top of the borrow
 			 * chain if the top class is not overloaded.
 			 */
 			if (cl != NULL) {
 				/* cutoff is taking effect, use this class as top. */
 				top = cl;
 				CBQTRACE(rmc_under_limit, 'ffou', ifd->cutoff_);
 			}
 			if (top != NULL && top->avgidle_ == top->minidle_)
 				top = NULL;
 			p->overtime_ = *now;
 			(p->overlimit)(p, top);
 #else
 			p->overtime_ = *now;
 			(p->overlimit)(p, NULL);
 #endif
 			return (0);
 		}
 		top = cl;
 	}
 
 	if (cl != p)
 		ifd->borrowed_[ifd->qi_] = cl;
 	return (1);
 }
 
 /*
  * _rmc_wrr_dequeue_next() - This is scheduler for WRR as opposed to
  *	Packet-by-packet round robin.
  *
  * The heart of the weighted round-robin scheduler, which decides which
  * class next gets to send a packet.  Highest priority first, then
  * weighted round-robin within priorites.
  *
  * Each able-to-send class gets to send until its byte allocation is
  * exhausted.  Thus, the active pointer is only changed after a class has
  * exhausted its allocation.
  *
  * If the scheduler finds no class that is underlimit or able to borrow,
  * then the first class found that had a nonzero queue and is allowed to
  * borrow gets to send.
  */
 
 static mbuf_t *
 _rmc_wrr_dequeue_next(struct rm_ifdat *ifd, int op)
 {
 	struct rm_class	*cl = NULL, *first = NULL;
 	u_int		 deficit;
 	int		 cpri;
 	mbuf_t		*m;
 	struct timeval	 now;
 
 	RM_GETTIME(now);
 
 	/*
 	 * if the driver polls the top of the queue and then removes
 	 * the polled packet, we must return the same packet.
 	 */
 	if (op == ALTDQ_REMOVE && ifd->pollcache_) {
 		cl = ifd->pollcache_;
 		cpri = cl->pri_;
 		if (ifd->efficient_) {
 			/* check if this class is overlimit */
 			if (cl->undertime_.tv_sec != 0 &&
 			    rmc_under_limit(cl, &now) == 0)
 				first = cl;
 		}
 		ifd->pollcache_ = NULL;
 		goto _wrr_out;
 	}
 	else {
 		/* mode == ALTDQ_POLL || pollcache == NULL */
 		ifd->pollcache_ = NULL;
 		ifd->borrowed_[ifd->qi_] = NULL;
 	}
 #ifdef ADJUST_CUTOFF
  _again:
 #endif
 	for (cpri = RM_MAXPRIO - 1; cpri >= 0; cpri--) {
 		if (ifd->na_[cpri] == 0)
 			continue;
 		deficit = 0;
 		/*
 		 * Loop through twice for a priority level, if some class
 		 * was unable to send a packet the first round because
 		 * of the weighted round-robin mechanism.
 		 * During the second loop at this level, deficit==2.
 		 * (This second loop is not needed if for every class,
 		 * "M[cl->pri_])" times "cl->allotment" is greater than
 		 * the byte size for the largest packet in the class.)
 		 */
  _wrr_loop:
 		cl = ifd->active_[cpri];
 		ASSERT(cl != NULL);
 		do {
 			if ((deficit < 2) && (cl->bytes_alloc_ <= 0))
 				cl->bytes_alloc_ += cl->w_allotment_;
 			if (!qempty(cl->q_)) {
 				if ((cl->undertime_.tv_sec == 0) ||
 				    rmc_under_limit(cl, &now)) {
 					if (cl->bytes_alloc_ > 0 || deficit > 1)
 						goto _wrr_out;
 
 					/* underlimit but no alloc */
 					deficit = 1;
 #if 1
 					ifd->borrowed_[ifd->qi_] = NULL;
 #endif
 				}
 				else if (first == NULL && cl->borrow_ != NULL)
 					first = cl; /* borrowing candidate */
 			}
 
 			cl->bytes_alloc_ = 0;
 			cl = cl->peer_;
 		} while (cl != ifd->active_[cpri]);
 
 		if (deficit == 1) {
 			/* first loop found an underlimit class with deficit */
 			/* Loop on same priority level, with new deficit.  */
 			deficit = 2;
 			goto _wrr_loop;
 		}
 	}
 
 #ifdef ADJUST_CUTOFF
 	/*
 	 * no underlimit class found.  if cutoff is taking effect,
 	 * increase cutoff and try again.
 	 */
 	if (first != NULL && ifd->cutoff_ < ifd->root_->depth_) {
 		ifd->cutoff_++;
 		CBQTRACE(_rmc_wrr_dequeue_next, 'ojda', ifd->cutoff_);
 		goto _again;
 	}
 #endif /* ADJUST_CUTOFF */
 	/*
 	 * If LINK_EFFICIENCY is turned on, then the first overlimit
 	 * class we encounter will send a packet if all the classes
 	 * of the link-sharing structure are overlimit.
 	 */
 	reset_cutoff(ifd);
 	CBQTRACE(_rmc_wrr_dequeue_next, 'otsr', ifd->cutoff_);
 
 	if (!ifd->efficient_ || first == NULL)
 		return (NULL);
 
 	cl = first;
 	cpri = cl->pri_;
 #if 0	/* too time-consuming for nothing */
 	if (cl->sleeping_)
 		CALLOUT_STOP(&cl->callout_);
 	cl->sleeping_ = 0;
 	cl->undertime_.tv_sec = 0;
 #endif
 	ifd->borrowed_[ifd->qi_] = cl->borrow_;
 	ifd->cutoff_ = cl->borrow_->depth_;
 
 	/*
 	 * Deque the packet and do the book keeping...
 	 */
  _wrr_out:
 	if (op == ALTDQ_REMOVE) {
 		m = _rmc_getq(cl);
 		if (m == NULL)
 			panic("_rmc_wrr_dequeue_next");
 		if (qempty(cl->q_))
 			ifd->na_[cpri]--;
 
 		/*
 		 * Update class statistics and link data.
 		 */
 		if (cl->bytes_alloc_ > 0)
 			cl->bytes_alloc_ -= m_pktlen(m);
 
 		if ((cl->bytes_alloc_ <= 0) || first == cl)
 			ifd->active_[cl->pri_] = cl->peer_;
 		else
 			ifd->active_[cl->pri_] = cl;
 
 		ifd->class_[ifd->qi_] = cl;
 		ifd->curlen_[ifd->qi_] = m_pktlen(m);
 		ifd->now_[ifd->qi_] = now;
 		ifd->qi_ = (ifd->qi_ + 1) % ifd->maxqueued_;
 		ifd->queued_++;
 	} else {
 		/* mode == ALTDQ_PPOLL */
 		m = _rmc_pollq(cl);
 		ifd->pollcache_ = cl;
 	}
 	return (m);
 }
 
 /*
  * Dequeue & return next packet from the highest priority class that
  * has a packet to send & has enough allocation to send it.  This
  * routine is called by a driver whenever it needs a new packet to
  * output.
  */
 static mbuf_t *
 _rmc_prr_dequeue_next(struct rm_ifdat *ifd, int op)
 {
 	mbuf_t		*m;
 	int		 cpri;
 	struct rm_class	*cl, *first = NULL;
 	struct timeval	 now;
 
 	RM_GETTIME(now);
 
 	/*
 	 * if the driver polls the top of the queue and then removes
 	 * the polled packet, we must return the same packet.
 	 */
 	if (op == ALTDQ_REMOVE && ifd->pollcache_) {
 		cl = ifd->pollcache_;
 		cpri = cl->pri_;
 		ifd->pollcache_ = NULL;
 		goto _prr_out;
 	} else {
 		/* mode == ALTDQ_POLL || pollcache == NULL */
 		ifd->pollcache_ = NULL;
 		ifd->borrowed_[ifd->qi_] = NULL;
 	}
 #ifdef ADJUST_CUTOFF
  _again:
 #endif
 	for (cpri = RM_MAXPRIO - 1; cpri >= 0; cpri--) {
 		if (ifd->na_[cpri] == 0)
 			continue;
 		cl = ifd->active_[cpri];
 		ASSERT(cl != NULL);
 		do {
 			if (!qempty(cl->q_)) {
 				if ((cl->undertime_.tv_sec == 0) ||
 				    rmc_under_limit(cl, &now))
 					goto _prr_out;
 				if (first == NULL && cl->borrow_ != NULL)
 					first = cl;
 			}
 			cl = cl->peer_;
 		} while (cl != ifd->active_[cpri]);
 	}
 
 #ifdef ADJUST_CUTOFF
 	/*
 	 * no underlimit class found.  if cutoff is taking effect, increase
 	 * cutoff and try again.
 	 */
 	if (first != NULL && ifd->cutoff_ < ifd->root_->depth_) {
 		ifd->cutoff_++;
 		goto _again;
 	}
 #endif /* ADJUST_CUTOFF */
 	/*
 	 * If LINK_EFFICIENCY is turned on, then the first overlimit
 	 * class we encounter will send a packet if all the classes
 	 * of the link-sharing structure are overlimit.
 	 */
 	reset_cutoff(ifd);
 	if (!ifd->efficient_ || first == NULL)
 		return (NULL);
 
 	cl = first;
 	cpri = cl->pri_;
 #if 0	/* too time-consuming for nothing */
 	if (cl->sleeping_)
 		CALLOUT_STOP(&cl->callout_);
 	cl->sleeping_ = 0;
 	cl->undertime_.tv_sec = 0;
 #endif
 	ifd->borrowed_[ifd->qi_] = cl->borrow_;
 	ifd->cutoff_ = cl->borrow_->depth_;
 
 	/*
 	 * Deque the packet and do the book keeping...
 	 */
  _prr_out:
 	if (op == ALTDQ_REMOVE) {
 		m = _rmc_getq(cl);
 		if (m == NULL)
 			panic("_rmc_prr_dequeue_next");
 		if (qempty(cl->q_))
 			ifd->na_[cpri]--;
 
 		ifd->active_[cpri] = cl->peer_;
 
 		ifd->class_[ifd->qi_] = cl;
 		ifd->curlen_[ifd->qi_] = m_pktlen(m);
 		ifd->now_[ifd->qi_] = now;
 		ifd->qi_ = (ifd->qi_ + 1) % ifd->maxqueued_;
 		ifd->queued_++;
 	} else {
 		/* mode == ALTDQ_POLL */
 		m = _rmc_pollq(cl);
 		ifd->pollcache_ = cl;
 	}
 	return (m);
 }
 
 /*
  * mbuf_t *
  * rmc_dequeue_next(struct rm_ifdat *ifd, struct timeval *now) - this function
  *	is invoked by the packet driver to get the next packet to be
  *	dequeued and output on the link.  If WRR is enabled, then the
  *	WRR dequeue next routine will determine the next packet to sent.
  *	Otherwise, packet-by-packet round robin is invoked.
  *
  *	Returns:	NULL, if a packet is not available or if all
  *			classes are overlimit.
  *
  *			Otherwise, Pointer to the next packet.
  */
 
 mbuf_t *
 rmc_dequeue_next(struct rm_ifdat *ifd, int mode)
 {
 	if (ifd->queued_ >= ifd->maxqueued_)
 		return (NULL);
 	else if (ifd->wrr_)
 		return (_rmc_wrr_dequeue_next(ifd, mode));
 	else
 		return (_rmc_prr_dequeue_next(ifd, mode));
 }
 
 /*
  * Update the utilization estimate for the packet that just completed.
  * The packet's class & the parent(s) of that class all get their
  * estimators updated.  This routine is called by the driver's output-
  * packet-completion interrupt service routine.
  */
 
 /*
  * a macro to approximate "divide by 1000" that gives 0.000999,
  * if a value has enough effective digits.
  * (on pentium, mul takes 9 cycles but div takes 46!)
  */
 #define	NSEC_TO_USEC(t)	(((t) >> 10) + ((t) >> 16) + ((t) >> 17))
 void
 rmc_update_class_util(struct rm_ifdat *ifd)
 {
 	int		 idle, avgidle, pktlen;
 	int		 pkt_time, tidle;
 	rm_class_t	*cl, *borrowed;
 	rm_class_t	*borrows;
 	struct timeval	*nowp;
 
 	/*
 	 * Get the most recent completed class.
 	 */
 	if ((cl = ifd->class_[ifd->qo_]) == NULL)
 		return;
 
 	pktlen = ifd->curlen_[ifd->qo_];
 	borrowed = ifd->borrowed_[ifd->qo_];
 	borrows = borrowed;
 
 	PKTCNTR_ADD(&cl->stats_.xmit_cnt, pktlen);
 
 	/*
 	 * Run estimator on class and its ancestors.
 	 */
 	/*
 	 * rm_update_class_util is designed to be called when the
 	 * transfer is completed from a xmit complete interrupt,
 	 * but most drivers don't implement an upcall for that.
 	 * so, just use estimated completion time.
 	 * as a result, ifd->qi_ and ifd->qo_ are always synced.
 	 */
 	nowp = &ifd->now_[ifd->qo_];
 	/* get pkt_time (for link) in usec */
 #if 1  /* use approximation */
 	pkt_time = ifd->curlen_[ifd->qo_] * ifd->ns_per_byte_;
 	pkt_time = NSEC_TO_USEC(pkt_time);
 #else
 	pkt_time = ifd->curlen_[ifd->qo_] * ifd->ns_per_byte_ / 1000;
 #endif
 #if 1 /* ALTQ4PPP */
 	if (TV_LT(nowp, &ifd->ifnow_)) {
 		int iftime;
 
 		/*
 		 * make sure the estimated completion time does not go
 		 * too far.  it can happen when the link layer supports
 		 * data compression or the interface speed is set to
 		 * a much lower value.
 		 */
 		TV_DELTA(&ifd->ifnow_, nowp, iftime);
 		if (iftime+pkt_time < ifd->maxiftime_) {
 			TV_ADD_DELTA(&ifd->ifnow_, pkt_time, &ifd->ifnow_);
 		} else {
 			TV_ADD_DELTA(nowp, ifd->maxiftime_, &ifd->ifnow_);
 		}
 	} else {
 		TV_ADD_DELTA(nowp, pkt_time, &ifd->ifnow_);
 	}
 #else
 	if (TV_LT(nowp, &ifd->ifnow_)) {
 		TV_ADD_DELTA(&ifd->ifnow_, pkt_time, &ifd->ifnow_);
 	} else {
 		TV_ADD_DELTA(nowp, pkt_time, &ifd->ifnow_);
 	}
 #endif
 
 	while (cl != NULL) {
 		TV_DELTA(&ifd->ifnow_, &cl->last_, idle);
 		if (idle >= 2000000)
 			/*
 			 * this class is idle enough, reset avgidle.
 			 * (TV_DELTA returns 2000000 us when delta is large.)
 			 */
 			cl->avgidle_ = cl->maxidle_;
 
 		/* get pkt_time (for class) in usec */
 #if 1  /* use approximation */
 		pkt_time = pktlen * cl->ns_per_byte_;
 		pkt_time = NSEC_TO_USEC(pkt_time);
 #else
 		pkt_time = pktlen * cl->ns_per_byte_ / 1000;
 #endif
 		idle -= pkt_time;
 
 		avgidle = cl->avgidle_;
 		avgidle += idle - (avgidle >> RM_FILTER_GAIN);
 		cl->avgidle_ = avgidle;
 
 		/* Are we overlimit ? */
 		if (avgidle <= 0) {
 			CBQTRACE(rmc_update_class_util, 'milo', cl->stats_.handle);
 #if 1 /* ALTQ */
 			/*
 			 * need some lower bound for avgidle, otherwise
 			 * a borrowing class gets unbounded penalty.
 			 */
 			if (avgidle < cl->minidle_)
 				avgidle = cl->avgidle_ = cl->minidle_;
 #endif
 			/* set next idle to make avgidle 0 */
 			tidle = pkt_time +
 				(((1 - RM_POWER) * avgidle) >> RM_FILTER_GAIN);
 			TV_ADD_DELTA(nowp, tidle, &cl->undertime_);
 			++cl->stats_.over;
 		} else {
 			cl->avgidle_ =
 			    (avgidle > cl->maxidle_) ? cl->maxidle_ : avgidle;
 			cl->undertime_.tv_sec = 0;
 			if (cl->sleeping_) {
 				CALLOUT_STOP(&cl->callout_);
 				cl->sleeping_ = 0;
 			}
 		}
 
 		if (borrows != NULL) {
 			if (borrows != cl)
 				++cl->stats_.borrows;
 			else
 				borrows = NULL;
 		}
 		cl->last_ = ifd->ifnow_;
 		cl->last_pkttime_ = pkt_time;
 
 #if 1
 		if (cl->parent_ == NULL) {
 			/* take stats of root class */
 			PKTCNTR_ADD(&cl->stats_.xmit_cnt, pktlen);
 		}
 #endif
 
 		cl = cl->parent_;
 	}
 
 	/*
 	 * Check to see if cutoff needs to set to a new level.
 	 */
 	cl = ifd->class_[ifd->qo_];
 	if (borrowed && (ifd->cutoff_ >= borrowed->depth_)) {
 #if 1 /* ALTQ */
 		if ((qlen(cl->q_) <= 0) || TV_LT(nowp, &borrowed->undertime_)) {
 			rmc_tl_satisfied(ifd, nowp);
 			CBQTRACE(rmc_update_class_util, 'broe', ifd->cutoff_);
 		} else {
 			ifd->cutoff_ = borrowed->depth_;
 			CBQTRACE(rmc_update_class_util, 'ffob', borrowed->depth_);
 		}
 #else /* !ALTQ */
 		if ((qlen(cl->q_) <= 1) || TV_LT(&now, &borrowed->undertime_)) {
 			reset_cutoff(ifd);
 #ifdef notdef
 			rmc_tl_satisfied(ifd, &now);
 #endif
 			CBQTRACE(rmc_update_class_util, 'broe', ifd->cutoff_);
 		} else {
 			ifd->cutoff_ = borrowed->depth_;
 			CBQTRACE(rmc_update_class_util, 'ffob', borrowed->depth_);
 		}
 #endif /* !ALTQ */
 	}
 
 	/*
 	 * Release class slot
 	 */
 	ifd->borrowed_[ifd->qo_] = NULL;
 	ifd->class_[ifd->qo_] = NULL;
 	ifd->qo_ = (ifd->qo_ + 1) % ifd->maxqueued_;
 	ifd->queued_--;
 }
 
 /*
  * void
  * rmc_drop_action(struct rm_class *cl) - Generic (not protocol-specific)
  *	over-limit action routines.  These get invoked by rmc_under_limit()
  *	if a class with packets to send if over its bandwidth limit & can't
  *	borrow from a parent class.
  *
  *	Returns: NONE
  */
 
 static void
 rmc_drop_action(struct rm_class *cl)
 {
 	struct rm_ifdat	*ifd = cl->ifdat_;
 
 	ASSERT(qlen(cl->q_) > 0);
 	_rmc_dropq(cl);
 	if (qempty(cl->q_))
 		ifd->na_[cl->pri_]--;
 }
 
 void rmc_dropall(struct rm_class *cl)
 {
 	struct rm_ifdat	*ifd = cl->ifdat_;
 
 	if (!qempty(cl->q_)) {
 		_flushq(cl->q_);
 
 		ifd->na_[cl->pri_]--;
 	}
 }
 
 #if (__FreeBSD_version > 300000)
 /* hzto() is removed from FreeBSD-3.0 */
 static int hzto(struct timeval *);
 
 static int
 hzto(tv)
 	struct timeval *tv;
 {
 	struct timeval t2;
 
 	getmicrotime(&t2);
 	t2.tv_sec = tv->tv_sec - t2.tv_sec;
 	t2.tv_usec = tv->tv_usec - t2.tv_usec;
 	return (tvtohz(&t2));
 }
 #endif /* __FreeBSD_version > 300000 */
 
 /*
  * void
  * rmc_delay_action(struct rm_class *cl) - This function is the generic CBQ
  *	delay action routine.  It is invoked via rmc_under_limit when the
  *	packet is discoverd to be overlimit.
  *
  *	If the delay action is result of borrow class being overlimit, then
  *	delay for the offtime of the borrowing class that is overlimit.
  *
  *	Returns: NONE
  */
 
 void
 rmc_delay_action(struct rm_class *cl, struct rm_class *borrow)
 {
 	int	delay, t, extradelay;
 
 	cl->stats_.overactions++;
 	TV_DELTA(&cl->undertime_, &cl->overtime_, delay);
 #ifndef BORROW_OFFTIME
 	delay += cl->offtime_;
 #endif
 
 	if (!cl->sleeping_) {
 		CBQTRACE(rmc_delay_action, 'yled', cl->stats_.handle);
 #ifdef BORROW_OFFTIME
 		if (borrow != NULL)
 			extradelay = borrow->offtime_;
 		else
 #endif
 			extradelay = cl->offtime_;
 
 #ifdef ALTQ
 		/*
 		 * XXX recalculate suspend time:
 		 * current undertime is (tidle + pkt_time) calculated
 		 * from the last transmission.
 		 *	tidle: time required to bring avgidle back to 0
 		 *	pkt_time: target waiting time for this class
 		 * we need to replace pkt_time by offtime
 		 */
 		extradelay -= cl->last_pkttime_;
 #endif
 		if (extradelay > 0) {
 			TV_ADD_DELTA(&cl->undertime_, extradelay, &cl->undertime_);
 			delay += extradelay;
 		}
 
 		cl->sleeping_ = 1;
 		cl->stats_.delays++;
 
 		/*
 		 * Since packets are phased randomly with respect to the
 		 * clock, 1 tick (the next clock tick) can be an arbitrarily
 		 * short time so we have to wait for at least two ticks.
 		 * NOTE:  If there's no other traffic, we need the timer as
 		 * a 'backstop' to restart this class.
 		 */
 		if (delay > tick * 2) {
 #ifdef __FreeBSD__
 			/* FreeBSD rounds up the tick */
 			t = hzto(&cl->undertime_);
 #else
 			/* other BSDs round down the tick */
 			t = hzto(&cl->undertime_) + 1;
 #endif
 		} else
 			t = 2;
 		CALLOUT_RESET(&cl->callout_, t,
 			      (timeout_t *)rmc_restart, (caddr_t)cl);
 	}
 }
 
 /*
  * void
  * rmc_restart() - is just a helper routine for rmc_delay_action -- it is
  *	called by the system timer code & is responsible checking if the
  *	class is still sleeping (it might have been restarted as a side
  *	effect of the queue scan on a packet arrival) and, if so, restarting
  *	output for the class.  Inspecting the class state & restarting output
  *	require locking the class structure.  In general the driver is
  *	responsible for locking but this is the only routine that is not
  *	called directly or indirectly from the interface driver so it has
  *	know about system locking conventions.  Under bsd, locking is done
  *	by raising IPL to splimp so that's what's implemented here.  On a
  *	different system this would probably need to be changed.
  *
  *	Returns:	NONE
  */
 
 static void
 rmc_restart(struct rm_class *cl)
 {
 	struct rm_ifdat	*ifd = cl->ifdat_;
 	int		 s;
 
 #ifdef __NetBSD__
 	s = splnet();
 #else
 	s = splimp();
 #endif
 	IFQ_LOCK(ifd->ifq_);
 	if (cl->sleeping_) {
 		cl->sleeping_ = 0;
 		cl->undertime_.tv_sec = 0;
 
 		if (ifd->queued_ < ifd->maxqueued_ && ifd->restart != NULL) {
 			CBQTRACE(rmc_restart, 'trts', cl->stats_.handle);
 			(ifd->restart)(ifd->ifq_);
 		}
 	}
 	IFQ_UNLOCK(ifd->ifq_);
 	splx(s);
 }
 
 /*
  * void
  * rmc_root_overlimit(struct rm_class *cl) - This the generic overlimit
  *	handling routine for the root class of the link sharing structure.
  *
  *	Returns: NONE
  */
 
 static void
 rmc_root_overlimit(struct rm_class *cl, struct rm_class *borrow)
 {
     panic("rmc_root_overlimit");
 }
 
 /*
  * Packet Queue handling routines.  Eventually, this is to localize the
  *	effects on the code whether queues are red queues or droptail
  *	queues.
  */
 
 static int
 _rmc_addq(rm_class_t *cl, mbuf_t *m)
 {
 #ifdef ALTQ_RIO
 	if (q_is_rio(cl->q_))
 		return rio_addq((rio_t *)cl->red_, cl->q_, m, cl->pktattr_);
 #endif
 #ifdef ALTQ_RED
 	if (q_is_red(cl->q_))
 		return red_addq(cl->red_, cl->q_, m, cl->pktattr_);
 #endif /* ALTQ_RED */
 
 	if (cl->flags_ & RMCF_CLEARDSCP)
 		write_dsfield(m, cl->pktattr_, 0);
 
 	_addq(cl->q_, m);
 	return (0);
 }
 
 /* note: _rmc_dropq is not called for red */
 static void
 _rmc_dropq(rm_class_t *cl)
 {
 	mbuf_t	*m;
 
 	if ((m = _getq(cl->q_)) != NULL)
 		m_freem(m);
 }
 
 static mbuf_t *
 _rmc_getq(rm_class_t *cl)
 {
 #ifdef ALTQ_RIO
 	if (q_is_rio(cl->q_))
 		return rio_getq((rio_t *)cl->red_, cl->q_);
 #endif
 #ifdef ALTQ_RED
 	if (q_is_red(cl->q_))
 		return red_getq(cl->red_, cl->q_);
 #endif
 	return _getq(cl->q_);
 }
 
 static mbuf_t *
 _rmc_pollq(rm_class_t *cl)
 {
 	return qhead(cl->q_);
 }
 
 #ifdef CBQ_TRACE
 
 struct cbqtrace		 cbqtrace_buffer[NCBQTRACE+1];
 struct cbqtrace		*cbqtrace_ptr = NULL;
 int			 cbqtrace_count;
 
 /*
  * DDB hook to trace cbq events:
  *  the last 1024 events are held in a circular buffer.
  *  use "call cbqtrace_dump(N)" to display 20 events from Nth event.
  */
 void cbqtrace_dump(int);
 static char *rmc_funcname(void *);
 
 static struct rmc_funcs {
 	void	*func;
 	char	*name;
 } rmc_funcs[] =
 {
 	rmc_init,		"rmc_init",
 	rmc_queue_packet,	"rmc_queue_packet",
 	rmc_under_limit,	"rmc_under_limit",
 	rmc_update_class_util,	"rmc_update_class_util",
 	rmc_delay_action,	"rmc_delay_action",
 	rmc_restart,		"rmc_restart",
 	_rmc_wrr_dequeue_next,	"_rmc_wrr_dequeue_next",
 	NULL,			NULL
 };
 
 static char *rmc_funcname(void *func)
 {
 	struct rmc_funcs *fp;
 
 	for (fp = rmc_funcs; fp->func != NULL; fp++)
 		if (fp->func == func)
 			return (fp->name);
 	return ("unknown");
 }
 
 void cbqtrace_dump(int counter)
 {
 	int	 i, *p;
 	char	*cp;
 
 	counter = counter % NCBQTRACE;
 	p = (int *)&cbqtrace_buffer[counter];
 
 	for (i=0; i<20; i++) {
 		printf("[0x%x] ", *p++);
 		printf("%s: ", rmc_funcname((void *)*p++));
 		cp = (char *)p++;
 		printf("%c%c%c%c: ", cp[0], cp[1], cp[2], cp[3]);
 		printf("%d\n",*p++);
 
 		if (p >= (int *)&cbqtrace_buffer[NCBQTRACE])
 			p = (int *)cbqtrace_buffer;
 	}
 }
 #endif /* CBQ_TRACE */
 #endif /* ALTQ_CBQ */
 
 #if defined(ALTQ_CBQ) || defined(ALTQ_RED) || defined(ALTQ_RIO) || defined(ALTQ_HFSC) || defined(ALTQ_PRIQ)
 #if !defined(__GNUC__) || defined(ALTQ_DEBUG)
 
 void
 _addq(class_queue_t *q, mbuf_t *m)
 {
         mbuf_t	*m0;
 
 	if ((m0 = qtail(q)) != NULL)
 		m->m_nextpkt = m0->m_nextpkt;
 	else
 		m0 = m;
 	m0->m_nextpkt = m;
 	qtail(q) = m;
 	qlen(q)++;
 }
 
 mbuf_t *
 _getq(class_queue_t *q)
 {
 	mbuf_t	*m, *m0;
 
 	if ((m = qtail(q)) == NULL)
 		return (NULL);
 	if ((m0 = m->m_nextpkt) != m)
 		m->m_nextpkt = m0->m_nextpkt;
 	else {
 		ASSERT(qlen(q) == 1);
 		qtail(q) = NULL;
 	}
 	qlen(q)--;
 	m0->m_nextpkt = NULL;
 	return (m0);
 }
 
 /* drop a packet at the tail of the queue */
 mbuf_t *
 _getq_tail(class_queue_t *q)
 {
 	mbuf_t	*m, *m0, *prev;
 
 	if ((m = m0 = qtail(q)) == NULL)
 		return NULL;
 	do {
 		prev = m0;
 		m0 = m0->m_nextpkt;
 	} while (m0 != m);
 	prev->m_nextpkt = m->m_nextpkt;
 	if (prev == m)  {
 		ASSERT(qlen(q) == 1);
 		qtail(q) = NULL;
 	} else
 		qtail(q) = prev;
 	qlen(q)--;
 	m->m_nextpkt = NULL;
 	return (m);
 }
 
 /* randomly select a packet in the queue */
 mbuf_t *
 _getq_random(class_queue_t *q)
 {
 	struct mbuf	*m;
 	int		 i, n;
 
 	if ((m = qtail(q)) == NULL)
 		return NULL;
 	if (m->m_nextpkt == m) {
 		ASSERT(qlen(q) == 1);
 		qtail(q) = NULL;
 	} else {
 		struct mbuf *prev = NULL;
 
 		n = arc4random() % qlen(q) + 1;
 		for (i = 0; i < n; i++) {
 			prev = m;
 			m = m->m_nextpkt;
 		}
 		prev->m_nextpkt = m->m_nextpkt;
 		if (m == qtail(q))
 			qtail(q) = prev;
 	}
 	qlen(q)--;
 	m->m_nextpkt = NULL;
 	return (m);
 }
 
 void
 _removeq(class_queue_t *q, mbuf_t *m)
 {
 	mbuf_t	*m0, *prev;
 
 	m0 = qtail(q);
 	do {
 		prev = m0;
 		m0 = m0->m_nextpkt;
 	} while (m0 != m);
 	prev->m_nextpkt = m->m_nextpkt;
 	if (prev == m)
 		qtail(q) = NULL;
 	else if (qtail(q) == m)
 		qtail(q) = prev;
 	qlen(q)--;
 }
 
 void
 _flushq(class_queue_t *q)
 {
 	mbuf_t *m;
 
 	while ((m = _getq(q)) != NULL)
 		m_freem(m);
 	ASSERT(qlen(q) == 0);
 }
 
 #endif /* !__GNUC__ || ALTQ_DEBUG */
 #endif /* ALTQ_CBQ || ALTQ_RED || ALTQ_RIO || ALTQ_HFSC || ALTQ_PRIQ */
Index: stable/10/sys/contrib/altq/altq/altq_subr.c
===================================================================
--- stable/10/sys/contrib/altq/altq/altq_subr.c	(revision 263085)
+++ stable/10/sys/contrib/altq/altq/altq_subr.c	(revision 263086)
@@ -1,1979 +1,1981 @@
 /*	$FreeBSD$	*/
 /*	$KAME: altq_subr.c,v 1.21 2003/11/06 06:32:53 kjc Exp $	*/
 
 /*
  * Copyright (C) 1997-2003
  *	Sony Computer Science Laboratories Inc.  All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY SONY CSL AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL SONY CSL OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  */
 
 #if defined(__FreeBSD__) || defined(__NetBSD__)
 #include "opt_altq.h"
 #include "opt_inet.h"
 #ifdef __FreeBSD__
 #include "opt_inet6.h"
 #endif
 #endif /* __FreeBSD__ || __NetBSD__ */
 
 #include <sys/param.h>
 #include <sys/malloc.h>
 #include <sys/mbuf.h>
 #include <sys/systm.h>
 #include <sys/proc.h>
 #include <sys/socket.h>
 #include <sys/socketvar.h>
 #include <sys/kernel.h>
 #include <sys/errno.h>
 #include <sys/syslog.h>
 #include <sys/sysctl.h>
 #include <sys/queue.h>
 
 #include <net/if.h>
+#include <net/if_var.h>
 #include <net/if_dl.h>
 #include <net/if_types.h>
 #ifdef __FreeBSD__
 #include <net/vnet.h>
 #endif
 
 #include <netinet/in.h>
 #include <netinet/in_systm.h>
 #include <netinet/ip.h>
 #ifdef INET6
 #include <netinet/ip6.h>
 #endif
 #include <netinet/tcp.h>
 #include <netinet/udp.h>
 
-#include <net/pfvar.h>
+#include <netpfil/pf/pf.h>
+#include <netpfil/pf/pf_altq.h>
 #include <altq/altq.h>
 #ifdef ALTQ3_COMPAT
 #include <altq/altq_conf.h>
 #endif
 
 /* machine dependent clock related includes */
 #ifdef __FreeBSD__
 #include <sys/bus.h>
 #include <sys/cpu.h>
 #include <sys/eventhandler.h>
 #include <machine/clock.h>
 #endif
 #if defined(__amd64__) || defined(__i386__)
 #include <machine/cpufunc.h>		/* for pentium tsc */
 #include <machine/specialreg.h>		/* for CPUID_TSC */
 #ifdef __FreeBSD__
 #include <machine/md_var.h>		/* for cpu_feature */
 #elif defined(__NetBSD__) || defined(__OpenBSD__)
 #include <machine/cpu.h>		/* for cpu_feature */
 #endif
 #endif /* __amd64 || __i386__ */
 
 /*
  * internal function prototypes
  */
 static void	tbr_timeout(void *);
 int (*altq_input)(struct mbuf *, int) = NULL;
 static struct mbuf *tbr_dequeue(struct ifaltq *, int);
 static int tbr_timer = 0;	/* token bucket regulator timer */
 #if !defined(__FreeBSD__) || (__FreeBSD_version < 600000)
 static struct callout tbr_callout = CALLOUT_INITIALIZER;
 #else
 static struct callout tbr_callout;
 #endif
 
 #ifdef ALTQ3_CLFIER_COMPAT
 static int 	extract_ports4(struct mbuf *, struct ip *, struct flowinfo_in *);
 #ifdef INET6
 static int 	extract_ports6(struct mbuf *, struct ip6_hdr *,
 			       struct flowinfo_in6 *);
 #endif
 static int	apply_filter4(u_int32_t, struct flow_filter *,
 			      struct flowinfo_in *);
 static int	apply_ppfilter4(u_int32_t, struct flow_filter *,
 				struct flowinfo_in *);
 #ifdef INET6
 static int	apply_filter6(u_int32_t, struct flow_filter6 *,
 			      struct flowinfo_in6 *);
 #endif
 static int	apply_tosfilter4(u_int32_t, struct flow_filter *,
 				 struct flowinfo_in *);
 static u_long	get_filt_handle(struct acc_classifier *, int);
 static struct acc_filter *filth_to_filtp(struct acc_classifier *, u_long);
 static u_int32_t filt2fibmask(struct flow_filter *);
 
 static void 	ip4f_cache(struct ip *, struct flowinfo_in *);
 static int 	ip4f_lookup(struct ip *, struct flowinfo_in *);
 static int 	ip4f_init(void);
 static struct ip4_frag	*ip4f_alloc(void);
 static void 	ip4f_free(struct ip4_frag *);
 #endif /* ALTQ3_CLFIER_COMPAT */
 
 /*
  * alternate queueing support routines
  */
 
 /* look up the queue state by the interface name and the queueing type. */
 void *
 altq_lookup(name, type)
 	char *name;
 	int type;
 {
 	struct ifnet *ifp;
 
 	if ((ifp = ifunit(name)) != NULL) {
 		/* read if_snd unlocked */
 		if (type != ALTQT_NONE && ifp->if_snd.altq_type == type)
 			return (ifp->if_snd.altq_disc);
 	}
 
 	return NULL;
 }
 
 int
 altq_attach(ifq, type, discipline, enqueue, dequeue, request, clfier, classify)
 	struct ifaltq *ifq;
 	int type;
 	void *discipline;
 	int (*enqueue)(struct ifaltq *, struct mbuf *, struct altq_pktattr *);
 	struct mbuf *(*dequeue)(struct ifaltq *, int);
 	int (*request)(struct ifaltq *, int, void *);
 	void *clfier;
 	void *(*classify)(void *, struct mbuf *, int);
 {
 	IFQ_LOCK(ifq);
 	if (!ALTQ_IS_READY(ifq)) {
 		IFQ_UNLOCK(ifq);
 		return ENXIO;
 	}
 
 #ifdef ALTQ3_COMPAT
 	/*
 	 * pfaltq can override the existing discipline, but altq3 cannot.
 	 * check these if clfier is not NULL (which implies altq3).
 	 */
 	if (clfier != NULL) {
 		if (ALTQ_IS_ENABLED(ifq)) {
 			IFQ_UNLOCK(ifq);
 			return EBUSY;
 		}
 		if (ALTQ_IS_ATTACHED(ifq)) {
 			IFQ_UNLOCK(ifq);
 			return EEXIST;
 		}
 	}
 #endif
 	ifq->altq_type     = type;
 	ifq->altq_disc     = discipline;
 	ifq->altq_enqueue  = enqueue;
 	ifq->altq_dequeue  = dequeue;
 	ifq->altq_request  = request;
 	ifq->altq_clfier   = clfier;
 	ifq->altq_classify = classify;
 	ifq->altq_flags &= (ALTQF_CANTCHANGE|ALTQF_ENABLED);
 #ifdef ALTQ3_COMPAT
 #ifdef ALTQ_KLD
 	altq_module_incref(type);
 #endif
 #endif
 	IFQ_UNLOCK(ifq);
 	return 0;
 }
 
 int
 altq_detach(ifq)
 	struct ifaltq *ifq;
 {
 	IFQ_LOCK(ifq);
 
 	if (!ALTQ_IS_READY(ifq)) {
 		IFQ_UNLOCK(ifq);
 		return ENXIO;
 	}
 	if (ALTQ_IS_ENABLED(ifq)) {
 		IFQ_UNLOCK(ifq);
 		return EBUSY;
 	}
 	if (!ALTQ_IS_ATTACHED(ifq)) {
 		IFQ_UNLOCK(ifq);
 		return (0);
 	}
 #ifdef ALTQ3_COMPAT
 #ifdef ALTQ_KLD
 	altq_module_declref(ifq->altq_type);
 #endif
 #endif
 
 	ifq->altq_type     = ALTQT_NONE;
 	ifq->altq_disc     = NULL;
 	ifq->altq_enqueue  = NULL;
 	ifq->altq_dequeue  = NULL;
 	ifq->altq_request  = NULL;
 	ifq->altq_clfier   = NULL;
 	ifq->altq_classify = NULL;
 	ifq->altq_flags &= ALTQF_CANTCHANGE;
 
 	IFQ_UNLOCK(ifq);
 	return 0;
 }
 
 int
 altq_enable(ifq)
 	struct ifaltq *ifq;
 {
 	int s;
 
 	IFQ_LOCK(ifq);
 
 	if (!ALTQ_IS_READY(ifq)) {
 		IFQ_UNLOCK(ifq);
 		return ENXIO;
 	}
 	if (ALTQ_IS_ENABLED(ifq)) {
 		IFQ_UNLOCK(ifq);
 		return 0;
 	}
 
 #ifdef __NetBSD__
 	s = splnet();
 #else
 	s = splimp();
 #endif
 	IFQ_PURGE_NOLOCK(ifq);
 	ASSERT(ifq->ifq_len == 0);
 	ifq->ifq_drv_maxlen = 0;		/* disable bulk dequeue */
 	ifq->altq_flags |= ALTQF_ENABLED;
 	if (ifq->altq_clfier != NULL)
 		ifq->altq_flags |= ALTQF_CLASSIFY;
 	splx(s);
 
 	IFQ_UNLOCK(ifq);
 	return 0;
 }
 
 int
 altq_disable(ifq)
 	struct ifaltq *ifq;
 {
 	int s;
 
 	IFQ_LOCK(ifq);
 	if (!ALTQ_IS_ENABLED(ifq)) {
 		IFQ_UNLOCK(ifq);
 		return 0;
 	}
 
 #ifdef __NetBSD__
 	s = splnet();
 #else
 	s = splimp();
 #endif
 	IFQ_PURGE_NOLOCK(ifq);
 	ASSERT(ifq->ifq_len == 0);
 	ifq->altq_flags &= ~(ALTQF_ENABLED|ALTQF_CLASSIFY);
 	splx(s);
 	
 	IFQ_UNLOCK(ifq);
 	return 0;
 }
 
 #ifdef ALTQ_DEBUG
 void
 altq_assert(file, line, failedexpr)
 	const char *file, *failedexpr;
 	int line;
 {
 	(void)printf("altq assertion \"%s\" failed: file \"%s\", line %d\n",
 		     failedexpr, file, line);
 	panic("altq assertion");
 	/* NOTREACHED */
 }
 #endif
 
 /*
  * internal representation of token bucket parameters
  *	rate:	byte_per_unittime << 32
  *		(((bits_per_sec) / 8) << 32) / machclk_freq
  *	depth:	byte << 32
  *
  */
 #define	TBR_SHIFT	32
 #define	TBR_SCALE(x)	((int64_t)(x) << TBR_SHIFT)
 #define	TBR_UNSCALE(x)	((x) >> TBR_SHIFT)
 
 static struct mbuf *
 tbr_dequeue(ifq, op)
 	struct ifaltq *ifq;
 	int op;
 {
 	struct tb_regulator *tbr;
 	struct mbuf *m;
 	int64_t interval;
 	u_int64_t now;
 
 	IFQ_LOCK_ASSERT(ifq);
 	tbr = ifq->altq_tbr;
 	if (op == ALTDQ_REMOVE && tbr->tbr_lastop == ALTDQ_POLL) {
 		/* if this is a remove after poll, bypass tbr check */
 	} else {
 		/* update token only when it is negative */
 		if (tbr->tbr_token <= 0) {
 			now = read_machclk();
 			interval = now - tbr->tbr_last;
 			if (interval >= tbr->tbr_filluptime)
 				tbr->tbr_token = tbr->tbr_depth;
 			else {
 				tbr->tbr_token += interval * tbr->tbr_rate;
 				if (tbr->tbr_token > tbr->tbr_depth)
 					tbr->tbr_token = tbr->tbr_depth;
 			}
 			tbr->tbr_last = now;
 		}
 		/* if token is still negative, don't allow dequeue */
 		if (tbr->tbr_token <= 0)
 			return (NULL);
 	}
 
 	if (ALTQ_IS_ENABLED(ifq))
 		m = (*ifq->altq_dequeue)(ifq, op);
 	else {
 		if (op == ALTDQ_POLL)
 			_IF_POLL(ifq, m);
 		else
 			_IF_DEQUEUE(ifq, m);
 	}
 
 	if (m != NULL && op == ALTDQ_REMOVE)
 		tbr->tbr_token -= TBR_SCALE(m_pktlen(m));
 	tbr->tbr_lastop = op;
 	return (m);
 }
 
 /*
  * set a token bucket regulator.
  * if the specified rate is zero, the token bucket regulator is deleted.
  */
 int
 tbr_set(ifq, profile)
 	struct ifaltq *ifq;
 	struct tb_profile *profile;
 {
 	struct tb_regulator *tbr, *otbr;
 	
 	if (tbr_dequeue_ptr == NULL)
 		tbr_dequeue_ptr = tbr_dequeue;
 
 	if (machclk_freq == 0)
 		init_machclk();
 	if (machclk_freq == 0) {
 		printf("tbr_set: no cpu clock available!\n");
 		return (ENXIO);
 	}
 
 	IFQ_LOCK(ifq);
 	if (profile->rate == 0) {
 		/* delete this tbr */
 		if ((tbr = ifq->altq_tbr) == NULL) {
 			IFQ_UNLOCK(ifq);
 			return (ENOENT);
 		}
 		ifq->altq_tbr = NULL;
 		free(tbr, M_DEVBUF);
 		IFQ_UNLOCK(ifq);
 		return (0);
 	}
 
 	tbr = malloc(sizeof(struct tb_regulator), M_DEVBUF, M_NOWAIT | M_ZERO);
 	if (tbr == NULL) {
 		IFQ_UNLOCK(ifq);
 		return (ENOMEM);
 	}
 
 	tbr->tbr_rate = TBR_SCALE(profile->rate / 8) / machclk_freq;
 	tbr->tbr_depth = TBR_SCALE(profile->depth);
 	if (tbr->tbr_rate > 0)
 		tbr->tbr_filluptime = tbr->tbr_depth / tbr->tbr_rate;
 	else
 		tbr->tbr_filluptime = 0xffffffffffffffffLL;
 	tbr->tbr_token = tbr->tbr_depth;
 	tbr->tbr_last = read_machclk();
 	tbr->tbr_lastop = ALTDQ_REMOVE;
 
 	otbr = ifq->altq_tbr;
 	ifq->altq_tbr = tbr;	/* set the new tbr */
 
 	if (otbr != NULL)
 		free(otbr, M_DEVBUF);
 	else {
 		if (tbr_timer == 0) {
 			CALLOUT_RESET(&tbr_callout, 1, tbr_timeout, (void *)0);
 			tbr_timer = 1;
 		}
 	}
 	IFQ_UNLOCK(ifq);
 	return (0);
 }
 
 /*
  * tbr_timeout goes through the interface list, and kicks the drivers
  * if necessary.
  *
  * MPSAFE
  */
 static void
 tbr_timeout(arg)
 	void *arg;
 {
 #ifdef __FreeBSD__
 	VNET_ITERATOR_DECL(vnet_iter);
 #endif
 	struct ifnet *ifp;
 	int active, s;
 
 	active = 0;
 #ifdef __NetBSD__
 	s = splnet();
 #else
 	s = splimp();
 #endif
 #ifdef __FreeBSD__
 	IFNET_RLOCK_NOSLEEP();
 	VNET_LIST_RLOCK_NOSLEEP();
 	VNET_FOREACH(vnet_iter) {
 		CURVNET_SET(vnet_iter);
 #endif
 		for (ifp = TAILQ_FIRST(&V_ifnet); ifp;
 		    ifp = TAILQ_NEXT(ifp, if_list)) {
 			/* read from if_snd unlocked */
 			if (!TBR_IS_ENABLED(&ifp->if_snd))
 				continue;
 			active++;
 			if (!IFQ_IS_EMPTY(&ifp->if_snd) &&
 			    ifp->if_start != NULL)
 				(*ifp->if_start)(ifp);
 		}
 #ifdef __FreeBSD__
 		CURVNET_RESTORE();
 	}
 	VNET_LIST_RUNLOCK_NOSLEEP();
 	IFNET_RUNLOCK_NOSLEEP();
 #endif
 	splx(s);
 	if (active > 0)
 		CALLOUT_RESET(&tbr_callout, 1, tbr_timeout, (void *)0);
 	else
 		tbr_timer = 0;	/* don't need tbr_timer anymore */
 }
 
 /*
  * get token bucket regulator profile
  */
 int
 tbr_get(ifq, profile)
 	struct ifaltq *ifq;
 	struct tb_profile *profile;
 {
 	struct tb_regulator *tbr;
 
 	IFQ_LOCK(ifq);
 	if ((tbr = ifq->altq_tbr) == NULL) {
 		profile->rate = 0;
 		profile->depth = 0;
 	} else {
 		profile->rate =
 		    (u_int)TBR_UNSCALE(tbr->tbr_rate * 8 * machclk_freq);
 		profile->depth = (u_int)TBR_UNSCALE(tbr->tbr_depth);
 	}
 	IFQ_UNLOCK(ifq);
 	return (0);
 }
 
 /*
  * attach a discipline to the interface.  if one already exists, it is
  * overridden.
  * Locking is done in the discipline specific attach functions. Basically
  * they call back to altq_attach which takes care of the attach and locking.
  */
 int
 altq_pfattach(struct pf_altq *a)
 {
 	int error = 0;
 
 	switch (a->scheduler) {
 	case ALTQT_NONE:
 		break;
 #ifdef ALTQ_CBQ
 	case ALTQT_CBQ:
 		error = cbq_pfattach(a);
 		break;
 #endif
 #ifdef ALTQ_PRIQ
 	case ALTQT_PRIQ:
 		error = priq_pfattach(a);
 		break;
 #endif
 #ifdef ALTQ_HFSC
 	case ALTQT_HFSC:
 		error = hfsc_pfattach(a);
 		break;
 #endif
 	default:
 		error = ENXIO;
 	}
 
 	return (error);
 }
 
 /*
  * detach a discipline from the interface.
  * it is possible that the discipline was already overridden by another
  * discipline.
  */
 int
 altq_pfdetach(struct pf_altq *a)
 {
 	struct ifnet *ifp;
 	int s, error = 0;
 
 	if ((ifp = ifunit(a->ifname)) == NULL)
 		return (EINVAL);
 
 	/* if this discipline is no longer referenced, just return */
 	/* read unlocked from if_snd */
 	if (a->altq_disc == NULL || a->altq_disc != ifp->if_snd.altq_disc)
 		return (0);
 
 #ifdef __NetBSD__
 	s = splnet();
 #else
 	s = splimp();
 #endif
 	/* read unlocked from if_snd, _disable and _detach take care */
 	if (ALTQ_IS_ENABLED(&ifp->if_snd))
 		error = altq_disable(&ifp->if_snd);
 	if (error == 0)
 		error = altq_detach(&ifp->if_snd);
 	splx(s);
 
 	return (error);
 }
 
 /*
  * add a discipline or a queue
  * Locking is done in the discipline specific functions with regards to
  * malloc with WAITOK, also it is not yet clear which lock to use.
  */
 int
 altq_add(struct pf_altq *a)
 {
 	int error = 0;
 
 	if (a->qname[0] != 0)
 		return (altq_add_queue(a));
 
 	if (machclk_freq == 0)
 		init_machclk();
 	if (machclk_freq == 0)
 		panic("altq_add: no cpu clock");
 
 	switch (a->scheduler) {
 #ifdef ALTQ_CBQ
 	case ALTQT_CBQ:
 		error = cbq_add_altq(a);
 		break;
 #endif
 #ifdef ALTQ_PRIQ
 	case ALTQT_PRIQ:
 		error = priq_add_altq(a);
 		break;
 #endif
 #ifdef ALTQ_HFSC
 	case ALTQT_HFSC:
 		error = hfsc_add_altq(a);
 		break;
 #endif
 	default:
 		error = ENXIO;
 	}
 
 	return (error);
 }
 
 /*
  * remove a discipline or a queue
  * It is yet unclear what lock to use to protect this operation, the
  * discipline specific functions will determine and grab it
  */
 int
 altq_remove(struct pf_altq *a)
 {
 	int error = 0;
 
 	if (a->qname[0] != 0)
 		return (altq_remove_queue(a));
 
 	switch (a->scheduler) {
 #ifdef ALTQ_CBQ
 	case ALTQT_CBQ:
 		error = cbq_remove_altq(a);
 		break;
 #endif
 #ifdef ALTQ_PRIQ
 	case ALTQT_PRIQ:
 		error = priq_remove_altq(a);
 		break;
 #endif
 #ifdef ALTQ_HFSC
 	case ALTQT_HFSC:
 		error = hfsc_remove_altq(a);
 		break;
 #endif
 	default:
 		error = ENXIO;
 	}
 
 	return (error);
 }
 
 /*
  * add a queue to the discipline
  * It is yet unclear what lock to use to protect this operation, the
  * discipline specific functions will determine and grab it
  */
 int
 altq_add_queue(struct pf_altq *a)
 {
 	int error = 0;
 
 	switch (a->scheduler) {
 #ifdef ALTQ_CBQ
 	case ALTQT_CBQ:
 		error = cbq_add_queue(a);
 		break;
 #endif
 #ifdef ALTQ_PRIQ
 	case ALTQT_PRIQ:
 		error = priq_add_queue(a);
 		break;
 #endif
 #ifdef ALTQ_HFSC
 	case ALTQT_HFSC:
 		error = hfsc_add_queue(a);
 		break;
 #endif
 	default:
 		error = ENXIO;
 	}
 
 	return (error);
 }
 
 /*
  * remove a queue from the discipline
  * It is yet unclear what lock to use to protect this operation, the
  * discipline specific functions will determine and grab it
  */
 int
 altq_remove_queue(struct pf_altq *a)
 {
 	int error = 0;
 
 	switch (a->scheduler) {
 #ifdef ALTQ_CBQ
 	case ALTQT_CBQ:
 		error = cbq_remove_queue(a);
 		break;
 #endif
 #ifdef ALTQ_PRIQ
 	case ALTQT_PRIQ:
 		error = priq_remove_queue(a);
 		break;
 #endif
 #ifdef ALTQ_HFSC
 	case ALTQT_HFSC:
 		error = hfsc_remove_queue(a);
 		break;
 #endif
 	default:
 		error = ENXIO;
 	}
 
 	return (error);
 }
 
 /*
  * get queue statistics
  * Locking is done in the discipline specific functions with regards to
  * copyout operations, also it is not yet clear which lock to use.
  */
 int
 altq_getqstats(struct pf_altq *a, void *ubuf, int *nbytes)
 {
 	int error = 0;
 
 	switch (a->scheduler) {
 #ifdef ALTQ_CBQ
 	case ALTQT_CBQ:
 		error = cbq_getqstats(a, ubuf, nbytes);
 		break;
 #endif
 #ifdef ALTQ_PRIQ
 	case ALTQT_PRIQ:
 		error = priq_getqstats(a, ubuf, nbytes);
 		break;
 #endif
 #ifdef ALTQ_HFSC
 	case ALTQT_HFSC:
 		error = hfsc_getqstats(a, ubuf, nbytes);
 		break;
 #endif
 	default:
 		error = ENXIO;
 	}
 
 	return (error);
 }
 
 /*
  * read and write diffserv field in IPv4 or IPv6 header
  */
 u_int8_t
 read_dsfield(m, pktattr)
 	struct mbuf *m;
 	struct altq_pktattr *pktattr;
 {
 	struct mbuf *m0;
 	u_int8_t ds_field = 0;
 
 	if (pktattr == NULL ||
 	    (pktattr->pattr_af != AF_INET && pktattr->pattr_af != AF_INET6))
 		return ((u_int8_t)0);
 
 	/* verify that pattr_hdr is within the mbuf data */
 	for (m0 = m; m0 != NULL; m0 = m0->m_next)
 		if ((pktattr->pattr_hdr >= m0->m_data) &&
 		    (pktattr->pattr_hdr < m0->m_data + m0->m_len))
 			break;
 	if (m0 == NULL) {
 		/* ick, pattr_hdr is stale */
 		pktattr->pattr_af = AF_UNSPEC;
 #ifdef ALTQ_DEBUG
 		printf("read_dsfield: can't locate header!\n");
 #endif
 		return ((u_int8_t)0);
 	}
 
 	if (pktattr->pattr_af == AF_INET) {
 		struct ip *ip = (struct ip *)pktattr->pattr_hdr;
 
 		if (ip->ip_v != 4)
 			return ((u_int8_t)0);	/* version mismatch! */
 		ds_field = ip->ip_tos;
 	}
 #ifdef INET6
 	else if (pktattr->pattr_af == AF_INET6) {
 		struct ip6_hdr *ip6 = (struct ip6_hdr *)pktattr->pattr_hdr;
 		u_int32_t flowlabel;
 
 		flowlabel = ntohl(ip6->ip6_flow);
 		if ((flowlabel >> 28) != 6)
 			return ((u_int8_t)0);	/* version mismatch! */
 		ds_field = (flowlabel >> 20) & 0xff;
 	}
 #endif
 	return (ds_field);
 }
 
 void
 write_dsfield(struct mbuf *m, struct altq_pktattr *pktattr, u_int8_t dsfield)
 {
 	struct mbuf *m0;
 
 	if (pktattr == NULL ||
 	    (pktattr->pattr_af != AF_INET && pktattr->pattr_af != AF_INET6))
 		return;
 
 	/* verify that pattr_hdr is within the mbuf data */
 	for (m0 = m; m0 != NULL; m0 = m0->m_next)
 		if ((pktattr->pattr_hdr >= m0->m_data) &&
 		    (pktattr->pattr_hdr < m0->m_data + m0->m_len))
 			break;
 	if (m0 == NULL) {
 		/* ick, pattr_hdr is stale */
 		pktattr->pattr_af = AF_UNSPEC;
 #ifdef ALTQ_DEBUG
 		printf("write_dsfield: can't locate header!\n");
 #endif
 		return;
 	}
 
 	if (pktattr->pattr_af == AF_INET) {
 		struct ip *ip = (struct ip *)pktattr->pattr_hdr;
 		u_int8_t old;
 		int32_t sum;
 
 		if (ip->ip_v != 4)
 			return;		/* version mismatch! */
 		old = ip->ip_tos;
 		dsfield |= old & 3;	/* leave CU bits */
 		if (old == dsfield)
 			return;
 		ip->ip_tos = dsfield;
 		/*
 		 * update checksum (from RFC1624)
 		 *	   HC' = ~(~HC + ~m + m')
 		 */
 		sum = ~ntohs(ip->ip_sum) & 0xffff;
 		sum += 0xff00 + (~old & 0xff) + dsfield;
 		sum = (sum >> 16) + (sum & 0xffff);
 		sum += (sum >> 16);  /* add carry */
 
 		ip->ip_sum = htons(~sum & 0xffff);
 	}
 #ifdef INET6
 	else if (pktattr->pattr_af == AF_INET6) {
 		struct ip6_hdr *ip6 = (struct ip6_hdr *)pktattr->pattr_hdr;
 		u_int32_t flowlabel;
 
 		flowlabel = ntohl(ip6->ip6_flow);
 		if ((flowlabel >> 28) != 6)
 			return;		/* version mismatch! */
 		flowlabel = (flowlabel & 0xf03fffff) | (dsfield << 20);
 		ip6->ip6_flow = htonl(flowlabel);
 	}
 #endif
 	return;
 }
 
 
 /*
  * high resolution clock support taking advantage of a machine dependent
  * high resolution time counter (e.g., timestamp counter of intel pentium).
  * we assume
  *  - 64-bit-long monotonically-increasing counter
  *  - frequency range is 100M-4GHz (CPU speed)
  */
 /* if pcc is not available or disabled, emulate 256MHz using microtime() */
 #define	MACHCLK_SHIFT	8
 
 int machclk_usepcc;
 u_int32_t machclk_freq;
 u_int32_t machclk_per_tick;
 
 #if defined(__i386__) && defined(__NetBSD__)
 extern u_int64_t cpu_tsc_freq;
 #endif
 
 #if (__FreeBSD_version >= 700035)
 /* Update TSC freq with the value indicated by the caller. */
 static void
 tsc_freq_changed(void *arg, const struct cf_level *level, int status)
 {
 	/* If there was an error during the transition, don't do anything. */
 	if (status != 0)
 		return;
 
 #if (__FreeBSD_version >= 701102) && (defined(__amd64__) || defined(__i386__))
 	/* If TSC is P-state invariant, don't do anything. */
 	if (tsc_is_invariant)
 		return;
 #endif
 
 	/* Total setting for this level gives the new frequency in MHz. */
 	init_machclk();
 }
 EVENTHANDLER_DEFINE(cpufreq_post_change, tsc_freq_changed, NULL,
     EVENTHANDLER_PRI_LAST);
 #endif /* __FreeBSD_version >= 700035 */
 
 static void
 init_machclk_setup(void)
 {
 #if (__FreeBSD_version >= 600000)
 	callout_init(&tbr_callout, 0);
 #endif
 
 	machclk_usepcc = 1;
 
 #if (!defined(__amd64__) && !defined(__i386__)) || defined(ALTQ_NOPCC)
 	machclk_usepcc = 0;
 #endif
 #if defined(__FreeBSD__) && defined(SMP)
 	machclk_usepcc = 0;
 #endif
 #if defined(__NetBSD__) && defined(MULTIPROCESSOR)
 	machclk_usepcc = 0;
 #endif
 #if defined(__amd64__) || defined(__i386__)
 	/* check if TSC is available */
 #ifdef __FreeBSD__
 	if ((cpu_feature & CPUID_TSC) == 0 ||
 	    atomic_load_acq_64(&tsc_freq) == 0)
 #else
 	if ((cpu_feature & CPUID_TSC) == 0)
 #endif
 		machclk_usepcc = 0;
 #endif
 }
 
 void
 init_machclk(void)
 {
 	static int called;
 
 	/* Call one-time initialization function. */
 	if (!called) {
 		init_machclk_setup();
 		called = 1;
 	}
 
 	if (machclk_usepcc == 0) {
 		/* emulate 256MHz using microtime() */
 		machclk_freq = 1000000 << MACHCLK_SHIFT;
 		machclk_per_tick = machclk_freq / hz;
 #ifdef ALTQ_DEBUG
 		printf("altq: emulate %uHz cpu clock\n", machclk_freq);
 #endif
 		return;
 	}
 
 	/*
 	 * if the clock frequency (of Pentium TSC or Alpha PCC) is
 	 * accessible, just use it.
 	 */
 #if defined(__amd64__) || defined(__i386__)
 #ifdef __FreeBSD__
 	machclk_freq = atomic_load_acq_64(&tsc_freq);
 #elif defined(__NetBSD__)
 	machclk_freq = (u_int32_t)cpu_tsc_freq;
 #elif defined(__OpenBSD__) && (defined(I586_CPU) || defined(I686_CPU))
 	machclk_freq = pentium_mhz * 1000000;
 #endif
 #endif
 
 	/*
 	 * if we don't know the clock frequency, measure it.
 	 */
 	if (machclk_freq == 0) {
 		static int	wait;
 		struct timeval	tv_start, tv_end;
 		u_int64_t	start, end, diff;
 		int		timo;
 
 		microtime(&tv_start);
 		start = read_machclk();
 		timo = hz;	/* 1 sec */
 		(void)tsleep(&wait, PWAIT | PCATCH, "init_machclk", timo);
 		microtime(&tv_end);
 		end = read_machclk();
 		diff = (u_int64_t)(tv_end.tv_sec - tv_start.tv_sec) * 1000000
 		    + tv_end.tv_usec - tv_start.tv_usec;
 		if (diff != 0)
 			machclk_freq = (u_int)((end - start) * 1000000 / diff);
 	}
 
 	machclk_per_tick = machclk_freq / hz;
 
 #ifdef ALTQ_DEBUG
 	printf("altq: CPU clock: %uHz\n", machclk_freq);
 #endif
 }
 
 #if defined(__OpenBSD__) && defined(__i386__)
 static __inline u_int64_t
 rdtsc(void)
 {
 	u_int64_t rv;
 	__asm __volatile(".byte 0x0f, 0x31" : "=A" (rv));
 	return (rv);
 }
 #endif /* __OpenBSD__ && __i386__ */
 
 u_int64_t
 read_machclk(void)
 {
 	u_int64_t val;
 
 	if (machclk_usepcc) {
 #if defined(__amd64__) || defined(__i386__)
 		val = rdtsc();
 #else
 		panic("read_machclk");
 #endif
 	} else {
 		struct timeval tv;
 
 		microtime(&tv);
 		val = (((u_int64_t)(tv.tv_sec - boottime.tv_sec) * 1000000
 		    + tv.tv_usec) << MACHCLK_SHIFT);
 	}
 	return (val);
 }
 
 #ifdef ALTQ3_CLFIER_COMPAT
 
 #ifndef IPPROTO_ESP
 #define	IPPROTO_ESP	50		/* encapsulating security payload */
 #endif
 #ifndef IPPROTO_AH
 #define	IPPROTO_AH	51		/* authentication header */
 #endif
 
 /*
  * extract flow information from a given packet.
  * filt_mask shows flowinfo fields required.
  * we assume the ip header is in one mbuf, and addresses and ports are
  * in network byte order.
  */
 int
 altq_extractflow(m, af, flow, filt_bmask)
 	struct mbuf *m;
 	int af;
 	struct flowinfo *flow;
 	u_int32_t	filt_bmask;
 {
 
 	switch (af) {
 	case PF_INET: {
 		struct flowinfo_in *fin;
 		struct ip *ip;
 
 		ip = mtod(m, struct ip *);
 
 		if (ip->ip_v != 4)
 			break;
 
 		fin = (struct flowinfo_in *)flow;
 		fin->fi_len = sizeof(struct flowinfo_in);
 		fin->fi_family = AF_INET;
 
 		fin->fi_proto = ip->ip_p;
 		fin->fi_tos = ip->ip_tos;
 
 		fin->fi_src.s_addr = ip->ip_src.s_addr;
 		fin->fi_dst.s_addr = ip->ip_dst.s_addr;
 
 		if (filt_bmask & FIMB4_PORTS)
 			/* if port info is required, extract port numbers */
 			extract_ports4(m, ip, fin);
 		else {
 			fin->fi_sport = 0;
 			fin->fi_dport = 0;
 			fin->fi_gpi = 0;
 		}
 		return (1);
 	}
 
 #ifdef INET6
 	case PF_INET6: {
 		struct flowinfo_in6 *fin6;
 		struct ip6_hdr *ip6;
 
 		ip6 = mtod(m, struct ip6_hdr *);
 		/* should we check the ip version? */
 
 		fin6 = (struct flowinfo_in6 *)flow;
 		fin6->fi6_len = sizeof(struct flowinfo_in6);
 		fin6->fi6_family = AF_INET6;
 
 		fin6->fi6_proto = ip6->ip6_nxt;
 		fin6->fi6_tclass   = (ntohl(ip6->ip6_flow) >> 20) & 0xff;
 
 		fin6->fi6_flowlabel = ip6->ip6_flow & htonl(0x000fffff);
 		fin6->fi6_src = ip6->ip6_src;
 		fin6->fi6_dst = ip6->ip6_dst;
 
 		if ((filt_bmask & FIMB6_PORTS) ||
 		    ((filt_bmask & FIMB6_PROTO)
 		     && ip6->ip6_nxt > IPPROTO_IPV6))
 			/*
 			 * if port info is required, or proto is required
 			 * but there are option headers, extract port
 			 * and protocol numbers.
 			 */
 			extract_ports6(m, ip6, fin6);
 		else {
 			fin6->fi6_sport = 0;
 			fin6->fi6_dport = 0;
 			fin6->fi6_gpi = 0;
 		}
 		return (1);
 	}
 #endif /* INET6 */
 
 	default:
 		break;
 	}
 
 	/* failed */
 	flow->fi_len = sizeof(struct flowinfo);
 	flow->fi_family = AF_UNSPEC;
 	return (0);
 }
 
 /*
  * helper routine to extract port numbers
  */
 /* structure for ipsec and ipv6 option header template */
 struct _opt6 {
 	u_int8_t	opt6_nxt;	/* next header */
 	u_int8_t	opt6_hlen;	/* header extension length */
 	u_int16_t	_pad;
 	u_int32_t	ah_spi;		/* security parameter index
 					   for authentication header */
 };
 
 /*
  * extract port numbers from a ipv4 packet.
  */
 static int
 extract_ports4(m, ip, fin)
 	struct mbuf *m;
 	struct ip *ip;
 	struct flowinfo_in *fin;
 {
 	struct mbuf *m0;
 	u_short ip_off;
 	u_int8_t proto;
 	int 	off;
 
 	fin->fi_sport = 0;
 	fin->fi_dport = 0;
 	fin->fi_gpi = 0;
 
 	ip_off = ntohs(ip->ip_off);
 	/* if it is a fragment, try cached fragment info */
 	if (ip_off & IP_OFFMASK) {
 		ip4f_lookup(ip, fin);
 		return (1);
 	}
 
 	/* locate the mbuf containing the protocol header */
 	for (m0 = m; m0 != NULL; m0 = m0->m_next)
 		if (((caddr_t)ip >= m0->m_data) &&
 		    ((caddr_t)ip < m0->m_data + m0->m_len))
 			break;
 	if (m0 == NULL) {
 #ifdef ALTQ_DEBUG
 		printf("extract_ports4: can't locate header! ip=%p\n", ip);
 #endif
 		return (0);
 	}
 	off = ((caddr_t)ip - m0->m_data) + (ip->ip_hl << 2);
 	proto = ip->ip_p;
 
 #ifdef ALTQ_IPSEC
  again:
 #endif
 	while (off >= m0->m_len) {
 		off -= m0->m_len;
 		m0 = m0->m_next;
 		if (m0 == NULL)
 			return (0);  /* bogus ip_hl! */
 	}
 	if (m0->m_len < off + 4)
 		return (0);
 
 	switch (proto) {
 	case IPPROTO_TCP:
 	case IPPROTO_UDP: {
 		struct udphdr *udp;
 
 		udp = (struct udphdr *)(mtod(m0, caddr_t) + off);
 		fin->fi_sport = udp->uh_sport;
 		fin->fi_dport = udp->uh_dport;
 		fin->fi_proto = proto;
 		}
 		break;
 
 #ifdef ALTQ_IPSEC
 	case IPPROTO_ESP:
 		if (fin->fi_gpi == 0){
 			u_int32_t *gpi;
 
 			gpi = (u_int32_t *)(mtod(m0, caddr_t) + off);
 			fin->fi_gpi   = *gpi;
 		}
 		fin->fi_proto = proto;
 		break;
 
 	case IPPROTO_AH: {
 			/* get next header and header length */
 			struct _opt6 *opt6;
 
 			opt6 = (struct _opt6 *)(mtod(m0, caddr_t) + off);
 			proto = opt6->opt6_nxt;
 			off += 8 + (opt6->opt6_hlen * 4);
 			if (fin->fi_gpi == 0 && m0->m_len >= off + 8)
 				fin->fi_gpi = opt6->ah_spi;
 		}
 		/* goto the next header */
 		goto again;
 #endif  /* ALTQ_IPSEC */
 
 	default:
 		fin->fi_proto = proto;
 		return (0);
 	}
 
 	/* if this is a first fragment, cache it. */
 	if (ip_off & IP_MF)
 		ip4f_cache(ip, fin);
 
 	return (1);
 }
 
 #ifdef INET6
 static int
 extract_ports6(m, ip6, fin6)
 	struct mbuf *m;
 	struct ip6_hdr *ip6;
 	struct flowinfo_in6 *fin6;
 {
 	struct mbuf *m0;
 	int	off;
 	u_int8_t proto;
 
 	fin6->fi6_gpi   = 0;
 	fin6->fi6_sport = 0;
 	fin6->fi6_dport = 0;
 
 	/* locate the mbuf containing the protocol header */
 	for (m0 = m; m0 != NULL; m0 = m0->m_next)
 		if (((caddr_t)ip6 >= m0->m_data) &&
 		    ((caddr_t)ip6 < m0->m_data + m0->m_len))
 			break;
 	if (m0 == NULL) {
 #ifdef ALTQ_DEBUG
 		printf("extract_ports6: can't locate header! ip6=%p\n", ip6);
 #endif
 		return (0);
 	}
 	off = ((caddr_t)ip6 - m0->m_data) + sizeof(struct ip6_hdr);
 
 	proto = ip6->ip6_nxt;
 	do {
 		while (off >= m0->m_len) {
 			off -= m0->m_len;
 			m0 = m0->m_next;
 			if (m0 == NULL)
 				return (0);
 		}
 		if (m0->m_len < off + 4)
 			return (0);
 
 		switch (proto) {
 		case IPPROTO_TCP:
 		case IPPROTO_UDP: {
 			struct udphdr *udp;
 
 			udp = (struct udphdr *)(mtod(m0, caddr_t) + off);
 			fin6->fi6_sport = udp->uh_sport;
 			fin6->fi6_dport = udp->uh_dport;
 			fin6->fi6_proto = proto;
 			}
 			return (1);
 
 		case IPPROTO_ESP:
 			if (fin6->fi6_gpi == 0) {
 				u_int32_t *gpi;
 
 				gpi = (u_int32_t *)(mtod(m0, caddr_t) + off);
 				fin6->fi6_gpi   = *gpi;
 			}
 			fin6->fi6_proto = proto;
 			return (1);
 
 		case IPPROTO_AH: {
 			/* get next header and header length */
 			struct _opt6 *opt6;
 
 			opt6 = (struct _opt6 *)(mtod(m0, caddr_t) + off);
 			if (fin6->fi6_gpi == 0 && m0->m_len >= off + 8)
 				fin6->fi6_gpi = opt6->ah_spi;
 			proto = opt6->opt6_nxt;
 			off += 8 + (opt6->opt6_hlen * 4);
 			/* goto the next header */
 			break;
 			}
 
 		case IPPROTO_HOPOPTS:
 		case IPPROTO_ROUTING:
 		case IPPROTO_DSTOPTS: {
 			/* get next header and header length */
 			struct _opt6 *opt6;
 
 			opt6 = (struct _opt6 *)(mtod(m0, caddr_t) + off);
 			proto = opt6->opt6_nxt;
 			off += (opt6->opt6_hlen + 1) * 8;
 			/* goto the next header */
 			break;
 			}
 
 		case IPPROTO_FRAGMENT:
 			/* ipv6 fragmentations are not supported yet */
 		default:
 			fin6->fi6_proto = proto;
 			return (0);
 		}
 	} while (1);
 	/*NOTREACHED*/
 }
 #endif /* INET6 */
 
 /*
  * altq common classifier
  */
 int
 acc_add_filter(classifier, filter, class, phandle)
 	struct acc_classifier *classifier;
 	struct flow_filter *filter;
 	void	*class;
 	u_long	*phandle;
 {
 	struct acc_filter *afp, *prev, *tmp;
 	int	i, s;
 
 #ifdef INET6
 	if (filter->ff_flow.fi_family != AF_INET &&
 	    filter->ff_flow.fi_family != AF_INET6)
 		return (EINVAL);
 #else
 	if (filter->ff_flow.fi_family != AF_INET)
 		return (EINVAL);
 #endif
 
 	afp = malloc(sizeof(struct acc_filter),
 	       M_DEVBUF, M_WAITOK);
 	if (afp == NULL)
 		return (ENOMEM);
 	bzero(afp, sizeof(struct acc_filter));
 
 	afp->f_filter = *filter;
 	afp->f_class = class;
 
 	i = ACC_WILDCARD_INDEX;
 	if (filter->ff_flow.fi_family == AF_INET) {
 		struct flow_filter *filter4 = &afp->f_filter;
 
 		/*
 		 * if address is 0, it's a wildcard.  if address mask
 		 * isn't set, use full mask.
 		 */
 		if (filter4->ff_flow.fi_dst.s_addr == 0)
 			filter4->ff_mask.mask_dst.s_addr = 0;
 		else if (filter4->ff_mask.mask_dst.s_addr == 0)
 			filter4->ff_mask.mask_dst.s_addr = 0xffffffff;
 		if (filter4->ff_flow.fi_src.s_addr == 0)
 			filter4->ff_mask.mask_src.s_addr = 0;
 		else if (filter4->ff_mask.mask_src.s_addr == 0)
 			filter4->ff_mask.mask_src.s_addr = 0xffffffff;
 
 		/* clear extra bits in addresses  */
 		   filter4->ff_flow.fi_dst.s_addr &=
 		       filter4->ff_mask.mask_dst.s_addr;
 		   filter4->ff_flow.fi_src.s_addr &=
 		       filter4->ff_mask.mask_src.s_addr;
 
 		/*
 		 * if dst address is a wildcard, use hash-entry
 		 * ACC_WILDCARD_INDEX.
 		 */
 		if (filter4->ff_mask.mask_dst.s_addr != 0xffffffff)
 			i = ACC_WILDCARD_INDEX;
 		else
 			i = ACC_GET_HASH_INDEX(filter4->ff_flow.fi_dst.s_addr);
 	}
 #ifdef INET6
 	else if (filter->ff_flow.fi_family == AF_INET6) {
 		struct flow_filter6 *filter6 =
 			(struct flow_filter6 *)&afp->f_filter;
 #ifndef IN6MASK0 /* taken from kame ipv6 */
 #define	IN6MASK0	{{{ 0, 0, 0, 0 }}}
 #define	IN6MASK128	{{{ 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff }}}
 		const struct in6_addr in6mask0 = IN6MASK0;
 		const struct in6_addr in6mask128 = IN6MASK128;
 #endif
 
 		if (IN6_IS_ADDR_UNSPECIFIED(&filter6->ff_flow6.fi6_dst))
 			filter6->ff_mask6.mask6_dst = in6mask0;
 		else if (IN6_IS_ADDR_UNSPECIFIED(&filter6->ff_mask6.mask6_dst))
 			filter6->ff_mask6.mask6_dst = in6mask128;
 		if (IN6_IS_ADDR_UNSPECIFIED(&filter6->ff_flow6.fi6_src))
 			filter6->ff_mask6.mask6_src = in6mask0;
 		else if (IN6_IS_ADDR_UNSPECIFIED(&filter6->ff_mask6.mask6_src))
 			filter6->ff_mask6.mask6_src = in6mask128;
 
 		/* clear extra bits in addresses  */
 		for (i = 0; i < 16; i++)
 			filter6->ff_flow6.fi6_dst.s6_addr[i] &=
 			    filter6->ff_mask6.mask6_dst.s6_addr[i];
 		for (i = 0; i < 16; i++)
 			filter6->ff_flow6.fi6_src.s6_addr[i] &=
 			    filter6->ff_mask6.mask6_src.s6_addr[i];
 
 		if (filter6->ff_flow6.fi6_flowlabel == 0)
 			i = ACC_WILDCARD_INDEX;
 		else
 			i = ACC_GET_HASH_INDEX(filter6->ff_flow6.fi6_flowlabel);
 	}
 #endif /* INET6 */
 
 	afp->f_handle = get_filt_handle(classifier, i);
 
 	/* update filter bitmask */
 	afp->f_fbmask = filt2fibmask(filter);
 	classifier->acc_fbmask |= afp->f_fbmask;
 
 	/*
 	 * add this filter to the filter list.
 	 * filters are ordered from the highest rule number.
 	 */
 #ifdef __NetBSD__
 	s = splnet();
 #else
 	s = splimp();
 #endif
 	prev = NULL;
 	LIST_FOREACH(tmp, &classifier->acc_filters[i], f_chain) {
 		if (tmp->f_filter.ff_ruleno > afp->f_filter.ff_ruleno)
 			prev = tmp;
 		else
 			break;
 	}
 	if (prev == NULL)
 		LIST_INSERT_HEAD(&classifier->acc_filters[i], afp, f_chain);
 	else
 		LIST_INSERT_AFTER(prev, afp, f_chain);
 	splx(s);
 
 	*phandle = afp->f_handle;
 	return (0);
 }
 
 int
 acc_delete_filter(classifier, handle)
 	struct acc_classifier *classifier;
 	u_long handle;
 {
 	struct acc_filter *afp;
 	int	s;
 
 	if ((afp = filth_to_filtp(classifier, handle)) == NULL)
 		return (EINVAL);
 
 #ifdef __NetBSD__
 	s = splnet();
 #else
 	s = splimp();
 #endif
 	LIST_REMOVE(afp, f_chain);
 	splx(s);
 
 	free(afp, M_DEVBUF);
 
 	/* todo: update filt_bmask */
 
 	return (0);
 }
 
 /*
  * delete filters referencing to the specified class.
  * if the all flag is not 0, delete all the filters.
  */
 int
 acc_discard_filters(classifier, class, all)
 	struct acc_classifier *classifier;
 	void	*class;
 	int	all;
 {
 	struct acc_filter *afp;
 	int	i, s;
 
 #ifdef __NetBSD__
 	s = splnet();
 #else
 	s = splimp();
 #endif
 	for (i = 0; i < ACC_FILTER_TABLESIZE; i++) {
 		do {
 			LIST_FOREACH(afp, &classifier->acc_filters[i], f_chain)
 				if (all || afp->f_class == class) {
 					LIST_REMOVE(afp, f_chain);
 					free(afp, M_DEVBUF);
 					/* start again from the head */
 					break;
 				}
 		} while (afp != NULL);
 	}
 	splx(s);
 
 	if (all)
 		classifier->acc_fbmask = 0;
 
 	return (0);
 }
 
 void *
 acc_classify(clfier, m, af)
 	void *clfier;
 	struct mbuf *m;
 	int af;
 {
 	struct acc_classifier *classifier;
 	struct flowinfo flow;
 	struct acc_filter *afp;
 	int	i;
 
 	classifier = (struct acc_classifier *)clfier;
 	altq_extractflow(m, af, &flow, classifier->acc_fbmask);
 
 	if (flow.fi_family == AF_INET) {
 		struct flowinfo_in *fp = (struct flowinfo_in *)&flow;
 
 		if ((classifier->acc_fbmask & FIMB4_ALL) == FIMB4_TOS) {
 			/* only tos is used */
 			LIST_FOREACH(afp,
 				 &classifier->acc_filters[ACC_WILDCARD_INDEX],
 				 f_chain)
 				if (apply_tosfilter4(afp->f_fbmask,
 						     &afp->f_filter, fp))
 					/* filter matched */
 					return (afp->f_class);
 		} else if ((classifier->acc_fbmask &
 			(~(FIMB4_PROTO|FIMB4_SPORT|FIMB4_DPORT) & FIMB4_ALL))
 		    == 0) {
 			/* only proto and ports are used */
 			LIST_FOREACH(afp,
 				 &classifier->acc_filters[ACC_WILDCARD_INDEX],
 				 f_chain)
 				if (apply_ppfilter4(afp->f_fbmask,
 						    &afp->f_filter, fp))
 					/* filter matched */
 					return (afp->f_class);
 		} else {
 			/* get the filter hash entry from its dest address */
 			i = ACC_GET_HASH_INDEX(fp->fi_dst.s_addr);
 			do {
 				/*
 				 * go through this loop twice.  first for dst
 				 * hash, second for wildcards.
 				 */
 				LIST_FOREACH(afp, &classifier->acc_filters[i],
 					     f_chain)
 					if (apply_filter4(afp->f_fbmask,
 							  &afp->f_filter, fp))
 						/* filter matched */
 						return (afp->f_class);
 
 				/*
 				 * check again for filters with a dst addr
 				 * wildcard.
 				 * (daddr == 0 || dmask != 0xffffffff).
 				 */
 				if (i != ACC_WILDCARD_INDEX)
 					i = ACC_WILDCARD_INDEX;
 				else
 					break;
 			} while (1);
 		}
 	}
 #ifdef INET6
 	else if (flow.fi_family == AF_INET6) {
 		struct flowinfo_in6 *fp6 = (struct flowinfo_in6 *)&flow;
 
 		/* get the filter hash entry from its flow ID */
 		if (fp6->fi6_flowlabel != 0)
 			i = ACC_GET_HASH_INDEX(fp6->fi6_flowlabel);
 		else
 			/* flowlable can be zero */
 			i = ACC_WILDCARD_INDEX;
 
 		/* go through this loop twice.  first for flow hash, second
 		   for wildcards. */
 		do {
 			LIST_FOREACH(afp, &classifier->acc_filters[i], f_chain)
 				if (apply_filter6(afp->f_fbmask,
 					(struct flow_filter6 *)&afp->f_filter,
 					fp6))
 					/* filter matched */
 					return (afp->f_class);
 
 			/*
 			 * check again for filters with a wildcard.
 			 */
 			if (i != ACC_WILDCARD_INDEX)
 				i = ACC_WILDCARD_INDEX;
 			else
 				break;
 		} while (1);
 	}
 #endif /* INET6 */
 
 	/* no filter matched */
 	return (NULL);
 }
 
 static int
 apply_filter4(fbmask, filt, pkt)
 	u_int32_t	fbmask;
 	struct flow_filter *filt;
 	struct flowinfo_in *pkt;
 {
 	if (filt->ff_flow.fi_family != AF_INET)
 		return (0);
 	if ((fbmask & FIMB4_SPORT) && filt->ff_flow.fi_sport != pkt->fi_sport)
 		return (0);
 	if ((fbmask & FIMB4_DPORT) && filt->ff_flow.fi_dport != pkt->fi_dport)
 		return (0);
 	if ((fbmask & FIMB4_DADDR) &&
 	    filt->ff_flow.fi_dst.s_addr !=
 	    (pkt->fi_dst.s_addr & filt->ff_mask.mask_dst.s_addr))
 		return (0);
 	if ((fbmask & FIMB4_SADDR) &&
 	    filt->ff_flow.fi_src.s_addr !=
 	    (pkt->fi_src.s_addr & filt->ff_mask.mask_src.s_addr))
 		return (0);
 	if ((fbmask & FIMB4_PROTO) && filt->ff_flow.fi_proto != pkt->fi_proto)
 		return (0);
 	if ((fbmask & FIMB4_TOS) && filt->ff_flow.fi_tos !=
 	    (pkt->fi_tos & filt->ff_mask.mask_tos))
 		return (0);
 	if ((fbmask & FIMB4_GPI) && filt->ff_flow.fi_gpi != (pkt->fi_gpi))
 		return (0);
 	/* match */
 	return (1);
 }
 
 /*
  * filter matching function optimized for a common case that checks
  * only protocol and port numbers
  */
 static int
 apply_ppfilter4(fbmask, filt, pkt)
 	u_int32_t	fbmask;
 	struct flow_filter *filt;
 	struct flowinfo_in *pkt;
 {
 	if (filt->ff_flow.fi_family != AF_INET)
 		return (0);
 	if ((fbmask & FIMB4_SPORT) && filt->ff_flow.fi_sport != pkt->fi_sport)
 		return (0);
 	if ((fbmask & FIMB4_DPORT) && filt->ff_flow.fi_dport != pkt->fi_dport)
 		return (0);
 	if ((fbmask & FIMB4_PROTO) && filt->ff_flow.fi_proto != pkt->fi_proto)
 		return (0);
 	/* match */
 	return (1);
 }
 
 /*
  * filter matching function only for tos field.
  */
 static int
 apply_tosfilter4(fbmask, filt, pkt)
 	u_int32_t	fbmask;
 	struct flow_filter *filt;
 	struct flowinfo_in *pkt;
 {
 	if (filt->ff_flow.fi_family != AF_INET)
 		return (0);
 	if ((fbmask & FIMB4_TOS) && filt->ff_flow.fi_tos !=
 	    (pkt->fi_tos & filt->ff_mask.mask_tos))
 		return (0);
 	/* match */
 	return (1);
 }
 
 #ifdef INET6
 static int
 apply_filter6(fbmask, filt, pkt)
 	u_int32_t	fbmask;
 	struct flow_filter6 *filt;
 	struct flowinfo_in6 *pkt;
 {
 	int i;
 
 	if (filt->ff_flow6.fi6_family != AF_INET6)
 		return (0);
 	if ((fbmask & FIMB6_FLABEL) &&
 	    filt->ff_flow6.fi6_flowlabel != pkt->fi6_flowlabel)
 		return (0);
 	if ((fbmask & FIMB6_PROTO) &&
 	    filt->ff_flow6.fi6_proto != pkt->fi6_proto)
 		return (0);
 	if ((fbmask & FIMB6_SPORT) &&
 	    filt->ff_flow6.fi6_sport != pkt->fi6_sport)
 		return (0);
 	if ((fbmask & FIMB6_DPORT) &&
 	    filt->ff_flow6.fi6_dport != pkt->fi6_dport)
 		return (0);
 	if (fbmask & FIMB6_SADDR) {
 		for (i = 0; i < 4; i++)
 			if (filt->ff_flow6.fi6_src.s6_addr32[i] !=
 			    (pkt->fi6_src.s6_addr32[i] &
 			     filt->ff_mask6.mask6_src.s6_addr32[i]))
 				return (0);
 	}
 	if (fbmask & FIMB6_DADDR) {
 		for (i = 0; i < 4; i++)
 			if (filt->ff_flow6.fi6_dst.s6_addr32[i] !=
 			    (pkt->fi6_dst.s6_addr32[i] &
 			     filt->ff_mask6.mask6_dst.s6_addr32[i]))
 				return (0);
 	}
 	if ((fbmask & FIMB6_TCLASS) &&
 	    filt->ff_flow6.fi6_tclass !=
 	    (pkt->fi6_tclass & filt->ff_mask6.mask6_tclass))
 		return (0);
 	if ((fbmask & FIMB6_GPI) &&
 	    filt->ff_flow6.fi6_gpi != pkt->fi6_gpi)
 		return (0);
 	/* match */
 	return (1);
 }
 #endif /* INET6 */
 
 /*
  *  filter handle:
  *	bit 20-28: index to the filter hash table
  *	bit  0-19: unique id in the hash bucket.
  */
 static u_long
 get_filt_handle(classifier, i)
 	struct acc_classifier *classifier;
 	int	i;
 {
 	static u_long handle_number = 1;
 	u_long 	handle;
 	struct acc_filter *afp;
 
 	while (1) {
 		handle = handle_number++ & 0x000fffff;
 
 		if (LIST_EMPTY(&classifier->acc_filters[i]))
 			break;
 
 		LIST_FOREACH(afp, &classifier->acc_filters[i], f_chain)
 			if ((afp->f_handle & 0x000fffff) == handle)
 				break;
 		if (afp == NULL)
 			break;
 		/* this handle is already used, try again */
 	}
 
 	return ((i << 20) | handle);
 }
 
 /* convert filter handle to filter pointer */
 static struct acc_filter *
 filth_to_filtp(classifier, handle)
 	struct acc_classifier *classifier;
 	u_long handle;
 {
 	struct acc_filter *afp;
 	int	i;
 
 	i = ACC_GET_HINDEX(handle);
 
 	LIST_FOREACH(afp, &classifier->acc_filters[i], f_chain)
 		if (afp->f_handle == handle)
 			return (afp);
 
 	return (NULL);
 }
 
 /* create flowinfo bitmask */
 static u_int32_t
 filt2fibmask(filt)
 	struct flow_filter *filt;
 {
 	u_int32_t mask = 0;
 #ifdef INET6
 	struct flow_filter6 *filt6;
 #endif
 
 	switch (filt->ff_flow.fi_family) {
 	case AF_INET:
 		if (filt->ff_flow.fi_proto != 0)
 			mask |= FIMB4_PROTO;
 		if (filt->ff_flow.fi_tos != 0)
 			mask |= FIMB4_TOS;
 		if (filt->ff_flow.fi_dst.s_addr != 0)
 			mask |= FIMB4_DADDR;
 		if (filt->ff_flow.fi_src.s_addr != 0)
 			mask |= FIMB4_SADDR;
 		if (filt->ff_flow.fi_sport != 0)
 			mask |= FIMB4_SPORT;
 		if (filt->ff_flow.fi_dport != 0)
 			mask |= FIMB4_DPORT;
 		if (filt->ff_flow.fi_gpi != 0)
 			mask |= FIMB4_GPI;
 		break;
 #ifdef INET6
 	case AF_INET6:
 		filt6 = (struct flow_filter6 *)filt;
 
 		if (filt6->ff_flow6.fi6_proto != 0)
 			mask |= FIMB6_PROTO;
 		if (filt6->ff_flow6.fi6_tclass != 0)
 			mask |= FIMB6_TCLASS;
 		if (!IN6_IS_ADDR_UNSPECIFIED(&filt6->ff_flow6.fi6_dst))
 			mask |= FIMB6_DADDR;
 		if (!IN6_IS_ADDR_UNSPECIFIED(&filt6->ff_flow6.fi6_src))
 			mask |= FIMB6_SADDR;
 		if (filt6->ff_flow6.fi6_sport != 0)
 			mask |= FIMB6_SPORT;
 		if (filt6->ff_flow6.fi6_dport != 0)
 			mask |= FIMB6_DPORT;
 		if (filt6->ff_flow6.fi6_gpi != 0)
 			mask |= FIMB6_GPI;
 		if (filt6->ff_flow6.fi6_flowlabel != 0)
 			mask |= FIMB6_FLABEL;
 		break;
 #endif /* INET6 */
 	}
 	return (mask);
 }
 
 
 /*
  * helper functions to handle IPv4 fragments.
  * currently only in-sequence fragments are handled.
  *	- fragment info is cached in a LRU list.
  *	- when a first fragment is found, cache its flow info.
  *	- when a non-first fragment is found, lookup the cache.
  */
 
 struct ip4_frag {
     TAILQ_ENTRY(ip4_frag) ip4f_chain;
     char    ip4f_valid;
     u_short ip4f_id;
     struct flowinfo_in ip4f_info;
 };
 
 static TAILQ_HEAD(ip4f_list, ip4_frag) ip4f_list; /* IPv4 fragment cache */
 
 #define	IP4F_TABSIZE		16	/* IPv4 fragment cache size */
 
 
 static void
 ip4f_cache(ip, fin)
 	struct ip *ip;
 	struct flowinfo_in *fin;
 {
 	struct ip4_frag *fp;
 
 	if (TAILQ_EMPTY(&ip4f_list)) {
 		/* first time call, allocate fragment cache entries. */
 		if (ip4f_init() < 0)
 			/* allocation failed! */
 			return;
 	}
 
 	fp = ip4f_alloc();
 	fp->ip4f_id = ip->ip_id;
 	fp->ip4f_info.fi_proto = ip->ip_p;
 	fp->ip4f_info.fi_src.s_addr = ip->ip_src.s_addr;
 	fp->ip4f_info.fi_dst.s_addr = ip->ip_dst.s_addr;
 
 	/* save port numbers */
 	fp->ip4f_info.fi_sport = fin->fi_sport;
 	fp->ip4f_info.fi_dport = fin->fi_dport;
 	fp->ip4f_info.fi_gpi   = fin->fi_gpi;
 }
 
 static int
 ip4f_lookup(ip, fin)
 	struct ip *ip;
 	struct flowinfo_in *fin;
 {
 	struct ip4_frag *fp;
 
 	for (fp = TAILQ_FIRST(&ip4f_list); fp != NULL && fp->ip4f_valid;
 	     fp = TAILQ_NEXT(fp, ip4f_chain))
 		if (ip->ip_id == fp->ip4f_id &&
 		    ip->ip_src.s_addr == fp->ip4f_info.fi_src.s_addr &&
 		    ip->ip_dst.s_addr == fp->ip4f_info.fi_dst.s_addr &&
 		    ip->ip_p == fp->ip4f_info.fi_proto) {
 
 			/* found the matching entry */
 			fin->fi_sport = fp->ip4f_info.fi_sport;
 			fin->fi_dport = fp->ip4f_info.fi_dport;
 			fin->fi_gpi   = fp->ip4f_info.fi_gpi;
 
 			if ((ntohs(ip->ip_off) & IP_MF) == 0)
 				/* this is the last fragment,
 				   release the entry. */
 				ip4f_free(fp);
 
 			return (1);
 		}
 
 	/* no matching entry found */
 	return (0);
 }
 
 static int
 ip4f_init(void)
 {
 	struct ip4_frag *fp;
 	int i;
 
 	TAILQ_INIT(&ip4f_list);
 	for (i=0; i<IP4F_TABSIZE; i++) {
 		fp = malloc(sizeof(struct ip4_frag),
 		       M_DEVBUF, M_NOWAIT);
 		if (fp == NULL) {
 			printf("ip4f_init: can't alloc %dth entry!\n", i);
 			if (i == 0)
 				return (-1);
 			return (0);
 		}
 		fp->ip4f_valid = 0;
 		TAILQ_INSERT_TAIL(&ip4f_list, fp, ip4f_chain);
 	}
 	return (0);
 }
 
 static struct ip4_frag *
 ip4f_alloc(void)
 {
 	struct ip4_frag *fp;
 
 	/* reclaim an entry at the tail, put it at the head */
 	fp = TAILQ_LAST(&ip4f_list, ip4f_list);
 	TAILQ_REMOVE(&ip4f_list, fp, ip4f_chain);
 	fp->ip4f_valid = 1;
 	TAILQ_INSERT_HEAD(&ip4f_list, fp, ip4f_chain);
 	return (fp);
 }
 
 static void
 ip4f_free(fp)
 	struct ip4_frag *fp;
 {
 	TAILQ_REMOVE(&ip4f_list, fp, ip4f_chain);
 	fp->ip4f_valid = 0;
 	TAILQ_INSERT_TAIL(&ip4f_list, fp, ip4f_chain);
 }
 
 #endif /* ALTQ3_CLFIER_COMPAT */
Index: stable/10/sys/net/pf_mtag.h
===================================================================
--- stable/10/sys/net/pf_mtag.h	(revision 263085)
+++ stable/10/sys/net/pf_mtag.h	(nonexistent)
@@ -1,62 +0,0 @@
-/*	$FreeBSD$	*/
-/*
- * Copyright (c) 2001 Daniel Hartmeier
- * All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- *
- *    - Redistributions of source code must retain the above copyright
- *      notice, this list of conditions and the following disclaimer.
- *    - Redistributions in binary form must reproduce the above
- *      copyright notice, this list of conditions and the following
- *      disclaimer in the documentation and/or other materials provided
- *      with the distribution.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
- * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
- * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
- * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
- * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
- * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
- * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
- * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
- * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
- *
- */
-
-#ifndef _NET_PF_MTAG_H_
-#define _NET_PF_MTAG_H_
-
-#ifdef _KERNEL
-
-#define	PF_TAG_GENERATED		0x01
-#define	PF_TAG_FRAGCACHE		0x02
-#define	PF_TAG_TRANSLATE_LOCALHOST	0x04
-#define	PF_PACKET_LOOPED		0x08
-#define	PF_FASTFWD_OURS_PRESENT		0x10
-
-struct pf_mtag {
-	void		*hdr;		/* saved hdr pos in mbuf, for ECN */
-	u_int32_t	 qid;		/* queue id */
-	u_int16_t	 tag;		/* tag id */
-	u_int8_t	 flags;
-	u_int8_t	 routed;
-};
-
-static __inline struct pf_mtag *
-pf_find_mtag(struct mbuf *m)
-{
-	struct m_tag	*mtag;
-
-	if ((mtag = m_tag_find(m, PACKET_TAG_PF, NULL)) == NULL)
-		return (NULL);
-
-	return ((struct pf_mtag *)(mtag + 1));
-}
-#endif /* _KERNEL */
-#endif /* _NET_PF_MTAG_H_ */

Property changes on: stable/10/sys/net/pf_mtag.h
___________________________________________________________________
Deleted: svn:keywords
## -1 +0,0 ##
-FreeBSD=%H
\ No newline at end of property
Index: stable/10/sys/net/if_ethersubr.c
===================================================================
--- stable/10/sys/net/if_ethersubr.c	(revision 263085)
+++ stable/10/sys/net/if_ethersubr.c	(revision 263086)
@@ -1,1321 +1,1322 @@
 /*-
  * Copyright (c) 1982, 1989, 1993
  *	The Regents of the University of California.  All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 4. Neither the name of the University nor the names of its contributors
  *    may be used to endorse or promote products derived from this software
  *    without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  *	@(#)if_ethersubr.c	8.1 (Berkeley) 6/10/93
  * $FreeBSD$
  */
 
 #include "opt_atalk.h"
 #include "opt_inet.h"
 #include "opt_inet6.h"
 #include "opt_ipx.h"
 #include "opt_netgraph.h"
 #include "opt_mbuf_profiling.h"
 
 #include <sys/param.h>
 #include <sys/systm.h>
 #include <sys/kernel.h>
 #include <sys/lock.h>
 #include <sys/malloc.h>
 #include <sys/module.h>
 #include <sys/mbuf.h>
 #include <sys/random.h>
 #include <sys/socket.h>
 #include <sys/sockio.h>
 #include <sys/sysctl.h>
 #include <sys/uuid.h>
 
 #include <net/if.h>
 #include <net/if_arp.h>
 #include <net/netisr.h>
 #include <net/route.h>
 #include <net/if_llc.h>
 #include <net/if_dl.h>
 #include <net/if_types.h>
 #include <net/bpf.h>
 #include <net/ethernet.h>
 #include <net/if_bridgevar.h>
 #include <net/if_vlan_var.h>
 #include <net/if_llatbl.h>
-#include <net/pf_mtag.h>
 #include <net/pfil.h>
 #include <net/vnet.h>
+
+#include <netpfil/pf/pf_mtag.h>
 
 #if defined(INET) || defined(INET6)
 #include <netinet/in.h>
 #include <netinet/in_var.h>
 #include <netinet/if_ether.h>
 #include <netinet/ip_carp.h>
 #include <netinet/ip_var.h>
 #endif
 #ifdef INET6
 #include <netinet6/nd6.h>
 #endif
 
 #ifdef IPX
 #include <netipx/ipx.h>
 #include <netipx/ipx_if.h>
 #endif
 
 int (*ef_inputp)(struct ifnet*, struct ether_header *eh, struct mbuf *m);
 int (*ef_outputp)(struct ifnet *ifp, struct mbuf **mp,
 		const struct sockaddr *dst, short *tp, int *hlen);
 
 #ifdef NETATALK
 #include <netatalk/at.h>
 #include <netatalk/at_var.h>
 #include <netatalk/at_extern.h>
 
 #define llc_snap_org_code llc_un.type_snap.org_code
 #define llc_snap_ether_type llc_un.type_snap.ether_type
 
 extern u_char	at_org_code[3];
 extern u_char	aarp_org_code[3];
 #endif /* NETATALK */
 
 #include <security/mac/mac_framework.h>
 
 #ifdef CTASSERT
 CTASSERT(sizeof (struct ether_header) == ETHER_ADDR_LEN * 2 + 2);
 CTASSERT(sizeof (struct ether_addr) == ETHER_ADDR_LEN);
 #endif
 
 VNET_DEFINE(struct pfil_head, link_pfil_hook);	/* Packet filter hooks */
 
 /* netgraph node hooks for ng_ether(4) */
 void	(*ng_ether_input_p)(struct ifnet *ifp, struct mbuf **mp);
 void	(*ng_ether_input_orphan_p)(struct ifnet *ifp, struct mbuf *m);
 int	(*ng_ether_output_p)(struct ifnet *ifp, struct mbuf **mp);
 void	(*ng_ether_attach_p)(struct ifnet *ifp);
 void	(*ng_ether_detach_p)(struct ifnet *ifp);
 
 void	(*vlan_input_p)(struct ifnet *, struct mbuf *);
 
 /* if_bridge(4) support */
 struct mbuf *(*bridge_input_p)(struct ifnet *, struct mbuf *); 
 int	(*bridge_output_p)(struct ifnet *, struct mbuf *, 
 		struct sockaddr *, struct rtentry *);
 void	(*bridge_dn_p)(struct mbuf *, struct ifnet *);
 
 /* if_lagg(4) support */
 struct mbuf *(*lagg_input_p)(struct ifnet *, struct mbuf *); 
 
 static const u_char etherbroadcastaddr[ETHER_ADDR_LEN] =
 			{ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff };
 
 static	int ether_resolvemulti(struct ifnet *, struct sockaddr **,
 		struct sockaddr *);
 #ifdef VIMAGE
 static	void ether_reassign(struct ifnet *, struct vnet *, char *);
 #endif
 
 /* XXX: should be in an arp support file, not here */
 static MALLOC_DEFINE(M_ARPCOM, "arpcom", "802.* interface internals");
 
 #define	ETHER_IS_BROADCAST(addr) \
 	(bcmp(etherbroadcastaddr, (addr), ETHER_ADDR_LEN) == 0)
 
 #define senderr(e) do { error = (e); goto bad;} while (0)
 
 static void
 update_mbuf_csumflags(struct mbuf *src, struct mbuf *dst)
 {
 	int csum_flags = 0;
 
 	if (src->m_pkthdr.csum_flags & CSUM_IP)
 		csum_flags |= (CSUM_IP_CHECKED|CSUM_IP_VALID);
 	if (src->m_pkthdr.csum_flags & CSUM_DELAY_DATA)
 		csum_flags |= (CSUM_DATA_VALID|CSUM_PSEUDO_HDR);
 	if (src->m_pkthdr.csum_flags & CSUM_SCTP)
 		csum_flags |= CSUM_SCTP_VALID;
 	dst->m_pkthdr.csum_flags |= csum_flags;
 	if (csum_flags & CSUM_DATA_VALID)
 		dst->m_pkthdr.csum_data = 0xffff;
 }
 
 /*
  * Ethernet output routine.
  * Encapsulate a packet of type family for the local net.
  * Use trailer local net encapsulation if enough data in first
  * packet leaves a multiple of 512 bytes of data in remainder.
  */
 int
 ether_output(struct ifnet *ifp, struct mbuf *m,
 	const struct sockaddr *dst, struct route *ro)
 {
 	short type;
 	int error = 0, hdrcmplt = 0;
 	u_char esrc[ETHER_ADDR_LEN], edst[ETHER_ADDR_LEN];
 	struct llentry *lle = NULL;
 	struct rtentry *rt0 = NULL;
 	struct ether_header *eh;
 	struct pf_mtag *t;
 	int loop_copy = 1;
 	int hlen;	/* link layer header length */
 
 	if (ro != NULL) {
 		if (!(m->m_flags & (M_BCAST | M_MCAST)))
 			lle = ro->ro_lle;
 		rt0 = ro->ro_rt;
 	}
 #ifdef MAC
 	error = mac_ifnet_check_transmit(ifp, m);
 	if (error)
 		senderr(error);
 #endif
 
 	M_PROFILE(m);
 	if (ifp->if_flags & IFF_MONITOR)
 		senderr(ENETDOWN);
 	if (!((ifp->if_flags & IFF_UP) &&
 	    (ifp->if_drv_flags & IFF_DRV_RUNNING)))
 		senderr(ENETDOWN);
 
 	hlen = ETHER_HDR_LEN;
 	switch (dst->sa_family) {
 #ifdef INET
 	case AF_INET:
 		if (lle != NULL && (lle->la_flags & LLE_VALID))
 			memcpy(edst, &lle->ll_addr.mac16, sizeof(edst));
 		else
 			error = arpresolve(ifp, rt0, m, dst, edst, &lle);
 		if (error)
 			return (error == EWOULDBLOCK ? 0 : error);
 		type = htons(ETHERTYPE_IP);
 		break;
 	case AF_ARP:
 	{
 		struct arphdr *ah;
 		ah = mtod(m, struct arphdr *);
 		ah->ar_hrd = htons(ARPHRD_ETHER);
 
 		loop_copy = 0; /* if this is for us, don't do it */
 
 		switch(ntohs(ah->ar_op)) {
 		case ARPOP_REVREQUEST:
 		case ARPOP_REVREPLY:
 			type = htons(ETHERTYPE_REVARP);
 			break;
 		case ARPOP_REQUEST:
 		case ARPOP_REPLY:
 		default:
 			type = htons(ETHERTYPE_ARP);
 			break;
 		}
 
 		if (m->m_flags & M_BCAST)
 			bcopy(ifp->if_broadcastaddr, edst, ETHER_ADDR_LEN);
 		else
 			bcopy(ar_tha(ah), edst, ETHER_ADDR_LEN);
 
 	}
 	break;
 #endif
 #ifdef INET6
 	case AF_INET6:
 		if (lle != NULL && (lle->la_flags & LLE_VALID))
 			memcpy(edst, &lle->ll_addr.mac16, sizeof(edst));
 		else
 			error = nd6_storelladdr(ifp, m, dst, (u_char *)edst, &lle);
 		if (error)
 			return error;
 		type = htons(ETHERTYPE_IPV6);
 		break;
 #endif
 #ifdef IPX
 	case AF_IPX:
 		if (ef_outputp) {
 		    error = ef_outputp(ifp, &m, dst, &type, &hlen);
 		    if (error)
 			goto bad;
 		} else
 		    type = htons(ETHERTYPE_IPX);
 		bcopy(&((const struct sockaddr_ipx *)dst)->sipx_addr.x_host,
 		    edst, sizeof (edst));
 		break;
 #endif
 #ifdef NETATALK
 	case AF_APPLETALK:
 	  {
 	    struct at_ifaddr *aa;
 
 	    if ((aa = at_ifawithnet((const struct sockaddr_at *)dst)) == NULL)
 		    senderr(EHOSTUNREACH); /* XXX */
 	    if (!aarpresolve(ifp, m, (const struct sockaddr_at *)dst, edst)) {
 		    ifa_free(&aa->aa_ifa);
 		    return (0);
 	    }
 	    /*
 	     * In the phase 2 case, need to prepend an mbuf for the llc header.
 	     */
 	    if ( aa->aa_flags & AFA_PHASE2 ) {
 		struct llc llc;
 
 		ifa_free(&aa->aa_ifa);
 		M_PREPEND(m, LLC_SNAPFRAMELEN, M_NOWAIT);
 		if (m == NULL)
 			senderr(ENOBUFS);
 		llc.llc_dsap = llc.llc_ssap = LLC_SNAP_LSAP;
 		llc.llc_control = LLC_UI;
 		bcopy(at_org_code, llc.llc_snap_org_code, sizeof(at_org_code));
 		llc.llc_snap_ether_type = htons( ETHERTYPE_AT );
 		bcopy(&llc, mtod(m, caddr_t), LLC_SNAPFRAMELEN);
 		type = htons(m->m_pkthdr.len);
 		hlen = LLC_SNAPFRAMELEN + ETHER_HDR_LEN;
 	    } else {
 		ifa_free(&aa->aa_ifa);
 		type = htons(ETHERTYPE_AT);
 	    }
 	    break;
 	  }
 #endif /* NETATALK */
 
 	case pseudo_AF_HDRCMPLT:
 	    {
 		const struct ether_header *eh;
 		
 		hdrcmplt = 1;
 		eh = (const struct ether_header *)dst->sa_data;
 		(void)memcpy(esrc, eh->ether_shost, sizeof (esrc));
 		/* FALLTHROUGH */
 
 	case AF_UNSPEC:
 		loop_copy = 0; /* if this is for us, don't do it */
 		eh = (const struct ether_header *)dst->sa_data;
 		(void)memcpy(edst, eh->ether_dhost, sizeof (edst));
 		type = eh->ether_type;
 		break;
             }
 	default:
 		if_printf(ifp, "can't handle af%d\n", dst->sa_family);
 		senderr(EAFNOSUPPORT);
 	}
 
 	if (lle != NULL && (lle->la_flags & LLE_IFADDR)) {
 		update_mbuf_csumflags(m, m);
 		return (if_simloop(ifp, m, dst->sa_family, 0));
 	}
 
 	/*
 	 * Add local net header.  If no space in first mbuf,
 	 * allocate another.
 	 */
 	M_PREPEND(m, ETHER_HDR_LEN, M_NOWAIT);
 	if (m == NULL)
 		senderr(ENOBUFS);
 	eh = mtod(m, struct ether_header *);
 	(void)memcpy(&eh->ether_type, &type,
 		sizeof(eh->ether_type));
 	(void)memcpy(eh->ether_dhost, edst, sizeof (edst));
 	if (hdrcmplt)
 		(void)memcpy(eh->ether_shost, esrc,
 			sizeof(eh->ether_shost));
 	else
 		(void)memcpy(eh->ether_shost, IF_LLADDR(ifp),
 			sizeof(eh->ether_shost));
 
 	/*
 	 * If a simplex interface, and the packet is being sent to our
 	 * Ethernet address or a broadcast address, loopback a copy.
 	 * XXX To make a simplex device behave exactly like a duplex
 	 * device, we should copy in the case of sending to our own
 	 * ethernet address (thus letting the original actually appear
 	 * on the wire). However, we don't do that here for security
 	 * reasons and compatibility with the original behavior.
 	 */
 	if ((ifp->if_flags & IFF_SIMPLEX) && loop_copy &&
 	    ((t = pf_find_mtag(m)) == NULL || !t->routed)) {
 		if (m->m_flags & M_BCAST) {
 			struct mbuf *n;
 
 			/*
 			 * Because if_simloop() modifies the packet, we need a
 			 * writable copy through m_dup() instead of a readonly
 			 * one as m_copy[m] would give us. The alternative would
 			 * be to modify if_simloop() to handle the readonly mbuf,
 			 * but performancewise it is mostly equivalent (trading
 			 * extra data copying vs. extra locking).
 			 *
 			 * XXX This is a local workaround.  A number of less
 			 * often used kernel parts suffer from the same bug.
 			 * See PR kern/105943 for a proposed general solution.
 			 */
 			if ((n = m_dup(m, M_NOWAIT)) != NULL) {
 				update_mbuf_csumflags(m, n);
 				(void)if_simloop(ifp, n, dst->sa_family, hlen);
 			} else
 				ifp->if_iqdrops++;
 		} else if (bcmp(eh->ether_dhost, eh->ether_shost,
 				ETHER_ADDR_LEN) == 0) {
 			update_mbuf_csumflags(m, m);
 			(void) if_simloop(ifp, m, dst->sa_family, hlen);
 			return (0);	/* XXX */
 		}
 	}
 
        /*
 	* Bridges require special output handling.
 	*/
 	if (ifp->if_bridge) {
 		BRIDGE_OUTPUT(ifp, m, error);
 		return (error);
 	}
 
 #if defined(INET) || defined(INET6)
 	if (ifp->if_carp &&
 	    (error = (*carp_output_p)(ifp, m, dst)))
 		goto bad;
 #endif
 
 	/* Handle ng_ether(4) processing, if any */
 	if (IFP2AC(ifp)->ac_netgraph != NULL) {
 		KASSERT(ng_ether_output_p != NULL,
 		    ("ng_ether_output_p is NULL"));
 		if ((error = (*ng_ether_output_p)(ifp, &m)) != 0) {
 bad:			if (m != NULL)
 				m_freem(m);
 			return (error);
 		}
 		if (m == NULL)
 			return (0);
 	}
 
 	/* Continue with link-layer output */
 	return ether_output_frame(ifp, m);
 }
 
 /*
  * Ethernet link layer output routine to send a raw frame to the device.
  *
  * This assumes that the 14 byte Ethernet header is present and contiguous
  * in the first mbuf (if BRIDGE'ing).
  */
 int
 ether_output_frame(struct ifnet *ifp, struct mbuf *m)
 {
 	int i;
 
 	if (PFIL_HOOKED(&V_link_pfil_hook)) {
 		i = pfil_run_hooks(&V_link_pfil_hook, &m, ifp, PFIL_OUT, NULL);
 
 		if (i != 0)
 			return (EACCES);
 
 		if (m == NULL)
 			return (0);
 	}
 
 	/*
 	 * Queue message on interface, update output statistics if
 	 * successful, and start output if interface not yet active.
 	 */
 	return ((ifp->if_transmit)(ifp, m));
 }
 
 #if defined(INET) || defined(INET6)
 #endif
 
 /*
  * Process a received Ethernet packet; the packet is in the
  * mbuf chain m with the ethernet header at the front.
  */
 static void
 ether_input_internal(struct ifnet *ifp, struct mbuf *m)
 {
 	struct ether_header *eh;
 	u_short etype;
 
 	if ((ifp->if_flags & IFF_UP) == 0) {
 		m_freem(m);
 		return;
 	}
 #ifdef DIAGNOSTIC
 	if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0) {
 		if_printf(ifp, "discard frame at !IFF_DRV_RUNNING\n");
 		m_freem(m);
 		return;
 	}
 #endif
 	/*
 	 * Do consistency checks to verify assumptions
 	 * made by code past this point.
 	 */
 	if ((m->m_flags & M_PKTHDR) == 0) {
 		if_printf(ifp, "discard frame w/o packet header\n");
 		ifp->if_ierrors++;
 		m_freem(m);
 		return;
 	}
 	if (m->m_len < ETHER_HDR_LEN) {
 		/* XXX maybe should pullup? */
 		if_printf(ifp, "discard frame w/o leading ethernet "
 				"header (len %u pkt len %u)\n",
 				m->m_len, m->m_pkthdr.len);
 		ifp->if_ierrors++;
 		m_freem(m);
 		return;
 	}
 	eh = mtod(m, struct ether_header *);
 	etype = ntohs(eh->ether_type);
 	if (m->m_pkthdr.rcvif == NULL) {
 		if_printf(ifp, "discard frame w/o interface pointer\n");
 		ifp->if_ierrors++;
 		m_freem(m);
 		return;
 	}
 #ifdef DIAGNOSTIC
 	if (m->m_pkthdr.rcvif != ifp) {
 		if_printf(ifp, "Warning, frame marked as received on %s\n",
 			m->m_pkthdr.rcvif->if_xname);
 	}
 #endif
 
 	CURVNET_SET_QUIET(ifp->if_vnet);
 
 	if (ETHER_IS_MULTICAST(eh->ether_dhost)) {
 		if (ETHER_IS_BROADCAST(eh->ether_dhost))
 			m->m_flags |= M_BCAST;
 		else
 			m->m_flags |= M_MCAST;
 		ifp->if_imcasts++;
 	}
 
 #ifdef MAC
 	/*
 	 * Tag the mbuf with an appropriate MAC label before any other
 	 * consumers can get to it.
 	 */
 	mac_ifnet_create_mbuf(ifp, m);
 #endif
 
 	/*
 	 * Give bpf a chance at the packet.
 	 */
 	ETHER_BPF_MTAP(ifp, m);
 
 	/*
 	 * If the CRC is still on the packet, trim it off. We do this once
 	 * and once only in case we are re-entered. Nothing else on the
 	 * Ethernet receive path expects to see the FCS.
 	 */
 	if (m->m_flags & M_HASFCS) {
 		m_adj(m, -ETHER_CRC_LEN);
 		m->m_flags &= ~M_HASFCS;
 	}
 
 	if (!(ifp->if_capenable & IFCAP_HWSTATS))
 		ifp->if_ibytes += m->m_pkthdr.len;
 
 	/* Allow monitor mode to claim this frame, after stats are updated. */
 	if (ifp->if_flags & IFF_MONITOR) {
 		m_freem(m);
 		CURVNET_RESTORE();
 		return;
 	}
 
 	/* Handle input from a lagg(4) port */
 	if (ifp->if_type == IFT_IEEE8023ADLAG) {
 		KASSERT(lagg_input_p != NULL,
 		    ("%s: if_lagg not loaded!", __func__));
 		m = (*lagg_input_p)(ifp, m);
 		if (m != NULL)
 			ifp = m->m_pkthdr.rcvif;
 		else {
 			CURVNET_RESTORE();
 			return;
 		}
 	}
 
 	/*
 	 * If the hardware did not process an 802.1Q tag, do this now,
 	 * to allow 802.1P priority frames to be passed to the main input
 	 * path correctly.
 	 * TODO: Deal with Q-in-Q frames, but not arbitrary nesting levels.
 	 */
 	if ((m->m_flags & M_VLANTAG) == 0 && etype == ETHERTYPE_VLAN) {
 		struct ether_vlan_header *evl;
 
 		if (m->m_len < sizeof(*evl) &&
 		    (m = m_pullup(m, sizeof(*evl))) == NULL) {
 #ifdef DIAGNOSTIC
 			if_printf(ifp, "cannot pullup VLAN header\n");
 #endif
 			ifp->if_ierrors++;
 			m_freem(m);
 			CURVNET_RESTORE();
 			return;
 		}
 
 		evl = mtod(m, struct ether_vlan_header *);
 		m->m_pkthdr.ether_vtag = ntohs(evl->evl_tag);
 		m->m_flags |= M_VLANTAG;
 
 		bcopy((char *)evl, (char *)evl + ETHER_VLAN_ENCAP_LEN,
 		    ETHER_HDR_LEN - ETHER_TYPE_LEN);
 		m_adj(m, ETHER_VLAN_ENCAP_LEN);
 		eh = mtod(m, struct ether_header *);
 	}
 
 	M_SETFIB(m, ifp->if_fib);
 
 	/* Allow ng_ether(4) to claim this frame. */
 	if (IFP2AC(ifp)->ac_netgraph != NULL) {
 		KASSERT(ng_ether_input_p != NULL,
 		    ("%s: ng_ether_input_p is NULL", __func__));
 		m->m_flags &= ~M_PROMISC;
 		(*ng_ether_input_p)(ifp, &m);
 		if (m == NULL) {
 			CURVNET_RESTORE();
 			return;
 		}
 		eh = mtod(m, struct ether_header *);
 	}
 
 	/*
 	 * Allow if_bridge(4) to claim this frame.
 	 * The BRIDGE_INPUT() macro will update ifp if the bridge changed it
 	 * and the frame should be delivered locally.
 	 */
 	if (ifp->if_bridge != NULL) {
 		m->m_flags &= ~M_PROMISC;
 		BRIDGE_INPUT(ifp, m);
 		if (m == NULL) {
 			CURVNET_RESTORE();
 			return;
 		}
 		eh = mtod(m, struct ether_header *);
 	}
 
 #if defined(INET) || defined(INET6)
 	/*
 	 * Clear M_PROMISC on frame so that carp(4) will see it when the
 	 * mbuf flows up to Layer 3.
 	 * FreeBSD's implementation of carp(4) uses the inprotosw
 	 * to dispatch IPPROTO_CARP. carp(4) also allocates its own
 	 * Ethernet addresses of the form 00:00:5e:00:01:xx, which
 	 * is outside the scope of the M_PROMISC test below.
 	 * TODO: Maintain a hash table of ethernet addresses other than
 	 * ether_dhost which may be active on this ifp.
 	 */
 	if (ifp->if_carp && (*carp_forus_p)(ifp, eh->ether_dhost)) {
 		m->m_flags &= ~M_PROMISC;
 	} else
 #endif
 	{
 		/*
 		 * If the frame received was not for our MAC address, set the
 		 * M_PROMISC flag on the mbuf chain. The frame may need to
 		 * be seen by the rest of the Ethernet input path in case of
 		 * re-entry (e.g. bridge, vlan, netgraph) but should not be
 		 * seen by upper protocol layers.
 		 */
 		if (!ETHER_IS_MULTICAST(eh->ether_dhost) &&
 		    bcmp(IF_LLADDR(ifp), eh->ether_dhost, ETHER_ADDR_LEN) != 0)
 			m->m_flags |= M_PROMISC;
 	}
 
 	if (harvest.ethernet)
 		random_harvest(&(m->m_data), 12, 2, RANDOM_NET_ETHER);
 
 	ether_demux(ifp, m);
 	CURVNET_RESTORE();
 }
 
 /*
  * Ethernet input dispatch; by default, direct dispatch here regardless of
  * global configuration.
  */
 static void
 ether_nh_input(struct mbuf *m)
 {
 
 	ether_input_internal(m->m_pkthdr.rcvif, m);
 }
 
 static struct netisr_handler	ether_nh = {
 	.nh_name = "ether",
 	.nh_handler = ether_nh_input,
 	.nh_proto = NETISR_ETHER,
 	.nh_policy = NETISR_POLICY_SOURCE,
 	.nh_dispatch = NETISR_DISPATCH_DIRECT,
 };
 
 static void
 ether_init(__unused void *arg)
 {
 
 	netisr_register(&ether_nh);
 }
 SYSINIT(ether, SI_SUB_INIT_IF, SI_ORDER_ANY, ether_init, NULL);
 
 static void
 vnet_ether_init(__unused void *arg)
 {
 	int i;
 
 	/* Initialize packet filter hooks. */
 	V_link_pfil_hook.ph_type = PFIL_TYPE_AF;
 	V_link_pfil_hook.ph_af = AF_LINK;
 	if ((i = pfil_head_register(&V_link_pfil_hook)) != 0)
 		printf("%s: WARNING: unable to register pfil link hook, "
 			"error %d\n", __func__, i);
 }
 VNET_SYSINIT(vnet_ether_init, SI_SUB_PROTO_IF, SI_ORDER_ANY,
     vnet_ether_init, NULL);
  
 static void
 vnet_ether_destroy(__unused void *arg)
 {
 	int i;
 
 	if ((i = pfil_head_unregister(&V_link_pfil_hook)) != 0)
 		printf("%s: WARNING: unable to unregister pfil link hook, "
 			"error %d\n", __func__, i);
 }
 VNET_SYSUNINIT(vnet_ether_uninit, SI_SUB_PROTO_IF, SI_ORDER_ANY,
     vnet_ether_destroy, NULL);
 
 
 
 static void
 ether_input(struct ifnet *ifp, struct mbuf *m)
 {
 
 	/*
 	 * We will rely on rcvif being set properly in the deferred context,
 	 * so assert it is correct here.
 	 */
 	KASSERT(m->m_pkthdr.rcvif == ifp, ("%s: ifnet mismatch", __func__));
 
 	netisr_dispatch(NETISR_ETHER, m);
 }
 
 /*
  * Upper layer processing for a received Ethernet packet.
  */
 void
 ether_demux(struct ifnet *ifp, struct mbuf *m)
 {
 	struct ether_header *eh;
 	int i, isr;
 	u_short ether_type;
 #if defined(NETATALK)
 	struct llc *l;
 #endif
 
 	KASSERT(ifp != NULL, ("%s: NULL interface pointer", __func__));
 
 	/* Do not grab PROMISC frames in case we are re-entered. */
 	if (PFIL_HOOKED(&V_link_pfil_hook) && !(m->m_flags & M_PROMISC)) {
 		i = pfil_run_hooks(&V_link_pfil_hook, &m, ifp, PFIL_IN, NULL);
 
 		if (i != 0 || m == NULL)
 			return;
 	}
 
 	eh = mtod(m, struct ether_header *);
 	ether_type = ntohs(eh->ether_type);
 
 	/*
 	 * If this frame has a VLAN tag other than 0, call vlan_input()
 	 * if its module is loaded. Otherwise, drop.
 	 */
 	if ((m->m_flags & M_VLANTAG) &&
 	    EVL_VLANOFTAG(m->m_pkthdr.ether_vtag) != 0) {
 		if (ifp->if_vlantrunk == NULL) {
 			ifp->if_noproto++;
 			m_freem(m);
 			return;
 		}
 		KASSERT(vlan_input_p != NULL,("%s: VLAN not loaded!",
 		    __func__));
 		/* Clear before possibly re-entering ether_input(). */
 		m->m_flags &= ~M_PROMISC;
 		(*vlan_input_p)(ifp, m);
 		return;
 	}
 
 	/*
 	 * Pass promiscuously received frames to the upper layer if the user
 	 * requested this by setting IFF_PPROMISC. Otherwise, drop them.
 	 */
 	if ((ifp->if_flags & IFF_PPROMISC) == 0 && (m->m_flags & M_PROMISC)) {
 		m_freem(m);
 		return;
 	}
 
 	/*
 	 * Reset layer specific mbuf flags to avoid confusing upper layers.
 	 * Strip off Ethernet header.
 	 */
 	m->m_flags &= ~M_VLANTAG;
 	m_clrprotoflags(m);
 	m_adj(m, ETHER_HDR_LEN);
 
 	/*
 	 * Dispatch frame to upper layer.
 	 */
 	switch (ether_type) {
 #ifdef INET
 	case ETHERTYPE_IP:
 		if ((m = ip_fastforward(m)) == NULL)
 			return;
 		isr = NETISR_IP;
 		break;
 
 	case ETHERTYPE_ARP:
 		if (ifp->if_flags & IFF_NOARP) {
 			/* Discard packet if ARP is disabled on interface */
 			m_freem(m);
 			return;
 		}
 		isr = NETISR_ARP;
 		break;
 #endif
 #ifdef IPX
 	case ETHERTYPE_IPX:
 		if (ef_inputp && ef_inputp(ifp, eh, m) == 0)
 			return;
 		isr = NETISR_IPX;
 		break;
 #endif
 #ifdef INET6
 	case ETHERTYPE_IPV6:
 		isr = NETISR_IPV6;
 		break;
 #endif
 #ifdef NETATALK
 	case ETHERTYPE_AT:
 		isr = NETISR_ATALK1;
 		break;
 	case ETHERTYPE_AARP:
 		isr = NETISR_AARP;
 		break;
 #endif /* NETATALK */
 	default:
 #ifdef IPX
 		if (ef_inputp && ef_inputp(ifp, eh, m) == 0)
 			return;
 #endif /* IPX */
 #if defined(NETATALK)
 		if (ether_type > ETHERMTU)
 			goto discard;
 		l = mtod(m, struct llc *);
 		if (l->llc_dsap == LLC_SNAP_LSAP &&
 		    l->llc_ssap == LLC_SNAP_LSAP &&
 		    l->llc_control == LLC_UI) {
 			if (bcmp(&(l->llc_snap_org_code)[0], at_org_code,
 			    sizeof(at_org_code)) == 0 &&
 			    ntohs(l->llc_snap_ether_type) == ETHERTYPE_AT) {
 				m_adj(m, LLC_SNAPFRAMELEN);
 				isr = NETISR_ATALK2;
 				break;
 			}
 			if (bcmp(&(l->llc_snap_org_code)[0], aarp_org_code,
 			    sizeof(aarp_org_code)) == 0 &&
 			    ntohs(l->llc_snap_ether_type) == ETHERTYPE_AARP) {
 				m_adj(m, LLC_SNAPFRAMELEN);
 				isr = NETISR_AARP;
 				break;
 			}
 		}
 #endif /* NETATALK */
 		goto discard;
 	}
 	netisr_dispatch(isr, m);
 	return;
 
 discard:
 	/*
 	 * Packet is to be discarded.  If netgraph is present,
 	 * hand the packet to it for last chance processing;
 	 * otherwise dispose of it.
 	 */
 	if (IFP2AC(ifp)->ac_netgraph != NULL) {
 		KASSERT(ng_ether_input_orphan_p != NULL,
 		    ("ng_ether_input_orphan_p is NULL"));
 		/*
 		 * Put back the ethernet header so netgraph has a
 		 * consistent view of inbound packets.
 		 */
 		M_PREPEND(m, ETHER_HDR_LEN, M_NOWAIT);
 		(*ng_ether_input_orphan_p)(ifp, m);
 		return;
 	}
 	m_freem(m);
 }
 
 /*
  * Convert Ethernet address to printable (loggable) representation.
  * This routine is for compatibility; it's better to just use
  *
  *	printf("%6D", <pointer to address>, ":");
  *
  * since there's no static buffer involved.
  */
 char *
 ether_sprintf(const u_char *ap)
 {
 	static char etherbuf[18];
 	snprintf(etherbuf, sizeof (etherbuf), "%6D", ap, ":");
 	return (etherbuf);
 }
 
 /*
  * Perform common duties while attaching to interface list
  */
 void
 ether_ifattach(struct ifnet *ifp, const u_int8_t *lla)
 {
 	int i;
 	struct ifaddr *ifa;
 	struct sockaddr_dl *sdl;
 
 	ifp->if_addrlen = ETHER_ADDR_LEN;
 	ifp->if_hdrlen = ETHER_HDR_LEN;
 	if_attach(ifp);
 	ifp->if_mtu = ETHERMTU;
 	ifp->if_output = ether_output;
 	ifp->if_input = ether_input;
 	ifp->if_resolvemulti = ether_resolvemulti;
 #ifdef VIMAGE
 	ifp->if_reassign = ether_reassign;
 #endif
 	if (ifp->if_baudrate == 0)
 		ifp->if_baudrate = IF_Mbps(10);		/* just a default */
 	ifp->if_broadcastaddr = etherbroadcastaddr;
 
 	ifa = ifp->if_addr;
 	KASSERT(ifa != NULL, ("%s: no lladdr!\n", __func__));
 	sdl = (struct sockaddr_dl *)ifa->ifa_addr;
 	sdl->sdl_type = IFT_ETHER;
 	sdl->sdl_alen = ifp->if_addrlen;
 	bcopy(lla, LLADDR(sdl), ifp->if_addrlen);
 
 	bpfattach(ifp, DLT_EN10MB, ETHER_HDR_LEN);
 	if (ng_ether_attach_p != NULL)
 		(*ng_ether_attach_p)(ifp);
 
 	/* Announce Ethernet MAC address if non-zero. */
 	for (i = 0; i < ifp->if_addrlen; i++)
 		if (lla[i] != 0)
 			break; 
 	if (i != ifp->if_addrlen)
 		if_printf(ifp, "Ethernet address: %6D\n", lla, ":");
 
 	uuid_ether_add(LLADDR(sdl));
 }
 
 /*
  * Perform common duties while detaching an Ethernet interface
  */
 void
 ether_ifdetach(struct ifnet *ifp)
 {
 	struct sockaddr_dl *sdl;
 
 	sdl = (struct sockaddr_dl *)(ifp->if_addr->ifa_addr);
 	uuid_ether_del(LLADDR(sdl));
 
 	if (IFP2AC(ifp)->ac_netgraph != NULL) {
 		KASSERT(ng_ether_detach_p != NULL,
 		    ("ng_ether_detach_p is NULL"));
 		(*ng_ether_detach_p)(ifp);
 	}
 
 	bpfdetach(ifp);
 	if_detach(ifp);
 }
 
 #ifdef VIMAGE
 void
 ether_reassign(struct ifnet *ifp, struct vnet *new_vnet, char *unused __unused)
 {
 
 	if (IFP2AC(ifp)->ac_netgraph != NULL) {
 		KASSERT(ng_ether_detach_p != NULL,
 		    ("ng_ether_detach_p is NULL"));
 		(*ng_ether_detach_p)(ifp);
 	}
 
 	if (ng_ether_attach_p != NULL) {
 		CURVNET_SET_QUIET(new_vnet);
 		(*ng_ether_attach_p)(ifp);
 		CURVNET_RESTORE();
 	}
 }
 #endif
 
 SYSCTL_DECL(_net_link);
 SYSCTL_NODE(_net_link, IFT_ETHER, ether, CTLFLAG_RW, 0, "Ethernet");
 
 #if 0
 /*
  * This is for reference.  We have a table-driven version
  * of the little-endian crc32 generator, which is faster
  * than the double-loop.
  */
 uint32_t
 ether_crc32_le(const uint8_t *buf, size_t len)
 {
 	size_t i;
 	uint32_t crc;
 	int bit;
 	uint8_t data;
 
 	crc = 0xffffffff;	/* initial value */
 
 	for (i = 0; i < len; i++) {
 		for (data = *buf++, bit = 0; bit < 8; bit++, data >>= 1) {
 			carry = (crc ^ data) & 1;
 			crc >>= 1;
 			if (carry)
 				crc = (crc ^ ETHER_CRC_POLY_LE);
 		}
 	}
 
 	return (crc);
 }
 #else
 uint32_t
 ether_crc32_le(const uint8_t *buf, size_t len)
 {
 	static const uint32_t crctab[] = {
 		0x00000000, 0x1db71064, 0x3b6e20c8, 0x26d930ac,
 		0x76dc4190, 0x6b6b51f4, 0x4db26158, 0x5005713c,
 		0xedb88320, 0xf00f9344, 0xd6d6a3e8, 0xcb61b38c,
 		0x9b64c2b0, 0x86d3d2d4, 0xa00ae278, 0xbdbdf21c
 	};
 	size_t i;
 	uint32_t crc;
 
 	crc = 0xffffffff;	/* initial value */
 
 	for (i = 0; i < len; i++) {
 		crc ^= buf[i];
 		crc = (crc >> 4) ^ crctab[crc & 0xf];
 		crc = (crc >> 4) ^ crctab[crc & 0xf];
 	}
 
 	return (crc);
 }
 #endif
 
 uint32_t
 ether_crc32_be(const uint8_t *buf, size_t len)
 {
 	size_t i;
 	uint32_t crc, carry;
 	int bit;
 	uint8_t data;
 
 	crc = 0xffffffff;	/* initial value */
 
 	for (i = 0; i < len; i++) {
 		for (data = *buf++, bit = 0; bit < 8; bit++, data >>= 1) {
 			carry = ((crc & 0x80000000) ? 1 : 0) ^ (data & 0x01);
 			crc <<= 1;
 			if (carry)
 				crc = (crc ^ ETHER_CRC_POLY_BE) | carry;
 		}
 	}
 
 	return (crc);
 }
 
 int
 ether_ioctl(struct ifnet *ifp, u_long command, caddr_t data)
 {
 	struct ifaddr *ifa = (struct ifaddr *) data;
 	struct ifreq *ifr = (struct ifreq *) data;
 	int error = 0;
 
 	switch (command) {
 	case SIOCSIFADDR:
 		ifp->if_flags |= IFF_UP;
 
 		switch (ifa->ifa_addr->sa_family) {
 #ifdef INET
 		case AF_INET:
 			ifp->if_init(ifp->if_softc);	/* before arpwhohas */
 			arp_ifinit(ifp, ifa);
 			break;
 #endif
 #ifdef IPX
 		/*
 		 * XXX - This code is probably wrong
 		 */
 		case AF_IPX:
 			{
 			struct ipx_addr *ina = &(IA_SIPX(ifa)->sipx_addr);
 
 			if (ipx_nullhost(*ina))
 				ina->x_host =
 				    *(union ipx_host *)
 				    IF_LLADDR(ifp);
 			else {
 				bcopy((caddr_t) ina->x_host.c_host,
 				      (caddr_t) IF_LLADDR(ifp),
 				      ETHER_ADDR_LEN);
 			}
 
 			/*
 			 * Set new address
 			 */
 			ifp->if_init(ifp->if_softc);
 			break;
 			}
 #endif
 		default:
 			ifp->if_init(ifp->if_softc);
 			break;
 		}
 		break;
 
 	case SIOCGIFADDR:
 		{
 			struct sockaddr *sa;
 
 			sa = (struct sockaddr *) & ifr->ifr_data;
 			bcopy(IF_LLADDR(ifp),
 			      (caddr_t) sa->sa_data, ETHER_ADDR_LEN);
 		}
 		break;
 
 	case SIOCSIFMTU:
 		/*
 		 * Set the interface MTU.
 		 */
 		if (ifr->ifr_mtu > ETHERMTU) {
 			error = EINVAL;
 		} else {
 			ifp->if_mtu = ifr->ifr_mtu;
 		}
 		break;
 	default:
 		error = EINVAL;			/* XXX netbsd has ENOTTY??? */
 		break;
 	}
 	return (error);
 }
 
 static int
 ether_resolvemulti(struct ifnet *ifp, struct sockaddr **llsa,
 	struct sockaddr *sa)
 {
 	struct sockaddr_dl *sdl;
 #ifdef INET
 	struct sockaddr_in *sin;
 #endif
 #ifdef INET6
 	struct sockaddr_in6 *sin6;
 #endif
 	u_char *e_addr;
 
 	switch(sa->sa_family) {
 	case AF_LINK:
 		/*
 		 * No mapping needed. Just check that it's a valid MC address.
 		 */
 		sdl = (struct sockaddr_dl *)sa;
 		e_addr = LLADDR(sdl);
 		if (!ETHER_IS_MULTICAST(e_addr))
 			return EADDRNOTAVAIL;
 		*llsa = 0;
 		return 0;
 
 #ifdef INET
 	case AF_INET:
 		sin = (struct sockaddr_in *)sa;
 		if (!IN_MULTICAST(ntohl(sin->sin_addr.s_addr)))
 			return EADDRNOTAVAIL;
 		sdl = malloc(sizeof *sdl, M_IFMADDR,
 		       M_NOWAIT|M_ZERO);
 		if (sdl == NULL)
 			return ENOMEM;
 		sdl->sdl_len = sizeof *sdl;
 		sdl->sdl_family = AF_LINK;
 		sdl->sdl_index = ifp->if_index;
 		sdl->sdl_type = IFT_ETHER;
 		sdl->sdl_alen = ETHER_ADDR_LEN;
 		e_addr = LLADDR(sdl);
 		ETHER_MAP_IP_MULTICAST(&sin->sin_addr, e_addr);
 		*llsa = (struct sockaddr *)sdl;
 		return 0;
 #endif
 #ifdef INET6
 	case AF_INET6:
 		sin6 = (struct sockaddr_in6 *)sa;
 		if (IN6_IS_ADDR_UNSPECIFIED(&sin6->sin6_addr)) {
 			/*
 			 * An IP6 address of 0 means listen to all
 			 * of the Ethernet multicast address used for IP6.
 			 * (This is used for multicast routers.)
 			 */
 			ifp->if_flags |= IFF_ALLMULTI;
 			*llsa = 0;
 			return 0;
 		}
 		if (!IN6_IS_ADDR_MULTICAST(&sin6->sin6_addr))
 			return EADDRNOTAVAIL;
 		sdl = malloc(sizeof *sdl, M_IFMADDR,
 		       M_NOWAIT|M_ZERO);
 		if (sdl == NULL)
 			return (ENOMEM);
 		sdl->sdl_len = sizeof *sdl;
 		sdl->sdl_family = AF_LINK;
 		sdl->sdl_index = ifp->if_index;
 		sdl->sdl_type = IFT_ETHER;
 		sdl->sdl_alen = ETHER_ADDR_LEN;
 		e_addr = LLADDR(sdl);
 		ETHER_MAP_IPV6_MULTICAST(&sin6->sin6_addr, e_addr);
 		*llsa = (struct sockaddr *)sdl;
 		return 0;
 #endif
 
 	default:
 		/*
 		 * Well, the text isn't quite right, but it's the name
 		 * that counts...
 		 */
 		return EAFNOSUPPORT;
 	}
 }
 
 static void*
 ether_alloc(u_char type, struct ifnet *ifp)
 {
 	struct arpcom	*ac;
 	
 	ac = malloc(sizeof(struct arpcom), M_ARPCOM, M_WAITOK | M_ZERO);
 	ac->ac_ifp = ifp;
 
 	return (ac);
 }
 
 static void
 ether_free(void *com, u_char type)
 {
 
 	free(com, M_ARPCOM);
 }
 
 static int
 ether_modevent(module_t mod, int type, void *data)
 {
 
 	switch (type) {
 	case MOD_LOAD:
 		if_register_com_alloc(IFT_ETHER, ether_alloc, ether_free);
 		break;
 	case MOD_UNLOAD:
 		if_deregister_com_alloc(IFT_ETHER);
 		break;
 	default:
 		return EOPNOTSUPP;
 	}
 
 	return (0);
 }
 
 static moduledata_t ether_mod = {
 	"ether",
 	ether_modevent,
 	0
 };
 
 void
 ether_vlan_mtap(struct bpf_if *bp, struct mbuf *m, void *data, u_int dlen)
 {
 	struct ether_vlan_header vlan;
 	struct mbuf mv, mb;
 
 	KASSERT((m->m_flags & M_VLANTAG) != 0,
 	    ("%s: vlan information not present", __func__));
 	KASSERT(m->m_len >= sizeof(struct ether_header),
 	    ("%s: mbuf not large enough for header", __func__));
 	bcopy(mtod(m, char *), &vlan, sizeof(struct ether_header));
 	vlan.evl_proto = vlan.evl_encap_proto;
 	vlan.evl_encap_proto = htons(ETHERTYPE_VLAN);
 	vlan.evl_tag = htons(m->m_pkthdr.ether_vtag);
 	m->m_len -= sizeof(struct ether_header);
 	m->m_data += sizeof(struct ether_header);
 	/*
 	 * If a data link has been supplied by the caller, then we will need to
 	 * re-create a stack allocated mbuf chain with the following structure:
 	 *
 	 * (1) mbuf #1 will contain the supplied data link
 	 * (2) mbuf #2 will contain the vlan header
 	 * (3) mbuf #3 will contain the original mbuf's packet data
 	 *
 	 * Otherwise, submit the packet and vlan header via bpf_mtap2().
 	 */
 	if (data != NULL) {
 		mv.m_next = m;
 		mv.m_data = (caddr_t)&vlan;
 		mv.m_len = sizeof(vlan);
 		mb.m_next = &mv;
 		mb.m_data = data;
 		mb.m_len = dlen;
 		bpf_mtap(bp, &mb);
 	} else
 		bpf_mtap2(bp, &vlan, sizeof(vlan), m);
 	m->m_len += sizeof(struct ether_header);
 	m->m_data -= sizeof(struct ether_header);
 }
 
 struct mbuf *
 ether_vlanencap(struct mbuf *m, uint16_t tag)
 {
 	struct ether_vlan_header *evl;
 
 	M_PREPEND(m, ETHER_VLAN_ENCAP_LEN, M_NOWAIT);
 	if (m == NULL)
 		return (NULL);
 	/* M_PREPEND takes care of m_len, m_pkthdr.len for us */
 
 	if (m->m_len < sizeof(*evl)) {
 		m = m_pullup(m, sizeof(*evl));
 		if (m == NULL)
 			return (NULL);
 	}
 
 	/*
 	 * Transform the Ethernet header into an Ethernet header
 	 * with 802.1Q encapsulation.
 	 */
 	evl = mtod(m, struct ether_vlan_header *);
 	bcopy((char *)evl + ETHER_VLAN_ENCAP_LEN,
 	    (char *)evl, ETHER_HDR_LEN - ETHER_TYPE_LEN);
 	evl->evl_encap_proto = htons(ETHERTYPE_VLAN);
 	evl->evl_tag = htons(tag);
 	return (m);
 }
 
 DECLARE_MODULE(ether, ether_mod, SI_SUB_INIT_IF, SI_ORDER_ANY);
 MODULE_VERSION(ether, 1);
Index: stable/10/sys/net/pfvar.h
===================================================================
--- stable/10/sys/net/pfvar.h	(revision 263085)
+++ stable/10/sys/net/pfvar.h	(revision 263086)
@@ -1,1951 +1,1775 @@
 /*
  * Copyright (c) 2001 Daniel Hartmeier
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  *
  *    - Redistributions of source code must retain the above copyright
  *      notice, this list of conditions and the following disclaimer.
  *    - Redistributions in binary form must reproduce the above
  *      copyright notice, this list of conditions and the following
  *      disclaimer in the documentation and/or other materials provided
  *      with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
  * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
  * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
  * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
  * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
  * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
  * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
  * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
  * POSSIBILITY OF SUCH DAMAGE.
  *
  *	$OpenBSD: pfvar.h,v 1.282 2009/01/29 15:12:28 pyr Exp $
  *	$FreeBSD$
  */
 
 #ifndef _NET_PFVAR_H_
 #define _NET_PFVAR_H_
 
 #include <sys/param.h>
 #include <sys/queue.h>
 #include <sys/counter.h>
 #include <sys/refcount.h>
 #include <sys/tree.h>
 
 #include <net/radix.h>
 #include <netinet/in.h>
 
-#include <net/pf_mtag.h>
+#include <netpfil/pf/pf.h>
+#include <netpfil/pf/pf_altq.h>
+#include <netpfil/pf/pf_mtag.h>
 
-#define	PF_TCPS_PROXY_SRC	((TCP_NSTATES)+0)
-#define	PF_TCPS_PROXY_DST	((TCP_NSTATES)+1)
-
-#define	PF_MD5_DIGEST_LENGTH	16
-#ifdef MD5_DIGEST_LENGTH
-#if PF_MD5_DIGEST_LENGTH != MD5_DIGEST_LENGTH
-#error
-#endif
-#endif
-
-enum	{ PF_INOUT, PF_IN, PF_OUT };
-enum	{ PF_PASS, PF_DROP, PF_SCRUB, PF_NOSCRUB, PF_NAT, PF_NONAT,
-	  PF_BINAT, PF_NOBINAT, PF_RDR, PF_NORDR, PF_SYNPROXY_DROP, PF_DEFER };
-enum	{ PF_RULESET_SCRUB, PF_RULESET_FILTER, PF_RULESET_NAT,
-	  PF_RULESET_BINAT, PF_RULESET_RDR, PF_RULESET_MAX };
-enum	{ PF_OP_NONE, PF_OP_IRG, PF_OP_EQ, PF_OP_NE, PF_OP_LT,
-	  PF_OP_LE, PF_OP_GT, PF_OP_GE, PF_OP_XRG, PF_OP_RRG };
-enum	{ PF_DEBUG_NONE, PF_DEBUG_URGENT, PF_DEBUG_MISC, PF_DEBUG_NOISY };
-enum	{ PF_CHANGE_NONE, PF_CHANGE_ADD_HEAD, PF_CHANGE_ADD_TAIL,
-	  PF_CHANGE_ADD_BEFORE, PF_CHANGE_ADD_AFTER,
-	  PF_CHANGE_REMOVE, PF_CHANGE_GET_TICKET };
-enum	{ PF_GET_NONE, PF_GET_CLR_CNTR };
-enum	{ PF_SK_WIRE, PF_SK_STACK, PF_SK_BOTH };
-
-/*
- * Note about PFTM_*: real indices into pf_rule.timeout[] come before
- * PFTM_MAX, special cases afterwards. See pf_state_expires().
- */
-enum	{ PFTM_TCP_FIRST_PACKET, PFTM_TCP_OPENING, PFTM_TCP_ESTABLISHED,
-	  PFTM_TCP_CLOSING, PFTM_TCP_FIN_WAIT, PFTM_TCP_CLOSED,
-	  PFTM_UDP_FIRST_PACKET, PFTM_UDP_SINGLE, PFTM_UDP_MULTIPLE,
-	  PFTM_ICMP_FIRST_PACKET, PFTM_ICMP_ERROR_REPLY,
-	  PFTM_OTHER_FIRST_PACKET, PFTM_OTHER_SINGLE,
-	  PFTM_OTHER_MULTIPLE, PFTM_FRAG, PFTM_INTERVAL,
-	  PFTM_ADAPTIVE_START, PFTM_ADAPTIVE_END, PFTM_SRC_NODE,
-	  PFTM_TS_DIFF, PFTM_MAX, PFTM_PURGE, PFTM_UNLINKED };
-
-/* PFTM default values */
-#define PFTM_TCP_FIRST_PACKET_VAL	120	/* First TCP packet */
-#define PFTM_TCP_OPENING_VAL		30	/* No response yet */
-#define PFTM_TCP_ESTABLISHED_VAL	24*60*60/* Established */
-#define PFTM_TCP_CLOSING_VAL		15 * 60	/* Half closed */
-#define PFTM_TCP_FIN_WAIT_VAL		45	/* Got both FINs */
-#define PFTM_TCP_CLOSED_VAL		90	/* Got a RST */
-#define PFTM_UDP_FIRST_PACKET_VAL	60	/* First UDP packet */
-#define PFTM_UDP_SINGLE_VAL		30	/* Unidirectional */
-#define PFTM_UDP_MULTIPLE_VAL		60	/* Bidirectional */
-#define PFTM_ICMP_FIRST_PACKET_VAL	20	/* First ICMP packet */
-#define PFTM_ICMP_ERROR_REPLY_VAL	10	/* Got error response */
-#define PFTM_OTHER_FIRST_PACKET_VAL	60	/* First packet */
-#define PFTM_OTHER_SINGLE_VAL		30	/* Unidirectional */
-#define PFTM_OTHER_MULTIPLE_VAL		60	/* Bidirectional */
-#define PFTM_FRAG_VAL			30	/* Fragment expire */
-#define PFTM_INTERVAL_VAL		10	/* Expire interval */
-#define PFTM_SRC_NODE_VAL		0	/* Source tracking */
-#define PFTM_TS_DIFF_VAL		30	/* Allowed TS diff */
-
-enum	{ PF_NOPFROUTE, PF_FASTROUTE, PF_ROUTETO, PF_DUPTO, PF_REPLYTO };
-enum	{ PF_LIMIT_STATES, PF_LIMIT_SRC_NODES, PF_LIMIT_FRAGS,
-	  PF_LIMIT_TABLE_ENTRIES, PF_LIMIT_MAX };
-#define PF_POOL_IDMASK		0x0f
-enum	{ PF_POOL_NONE, PF_POOL_BITMASK, PF_POOL_RANDOM,
-	  PF_POOL_SRCHASH, PF_POOL_ROUNDROBIN };
-enum	{ PF_ADDR_ADDRMASK, PF_ADDR_NOROUTE, PF_ADDR_DYNIFTL,
-	  PF_ADDR_TABLE, PF_ADDR_URPFFAILED,
-	  PF_ADDR_RANGE };
-#define PF_POOL_TYPEMASK	0x0f
-#define PF_POOL_STICKYADDR	0x20
-#define	PF_WSCALE_FLAG		0x80
-#define	PF_WSCALE_MASK		0x0f
-
-#define	PF_LOG			0x01
-#define	PF_LOG_ALL		0x02
-#define	PF_LOG_SOCKET_LOOKUP	0x04
-
 struct pf_addr {
 	union {
 		struct in_addr		v4;
 		struct in6_addr		v6;
 		u_int8_t		addr8[16];
 		u_int16_t		addr16[8];
 		u_int32_t		addr32[4];
 	} pfa;		    /* 128-bit address */
 #define v4	pfa.v4
 #define v6	pfa.v6
 #define addr8	pfa.addr8
 #define addr16	pfa.addr16
 #define addr32	pfa.addr32
 };
 
-#define	PF_TABLE_NAME_SIZE	 32
-
 #define PFI_AFLAG_NETWORK	0x01
 #define PFI_AFLAG_BROADCAST	0x02
 #define PFI_AFLAG_PEER		0x04
 #define PFI_AFLAG_MODEMASK	0x07
 #define PFI_AFLAG_NOALIAS	0x08
 
 struct pf_addr_wrap {
 	union {
 		struct {
 			struct pf_addr		 addr;
 			struct pf_addr		 mask;
 		}			 a;
 		char			 ifname[IFNAMSIZ];
 		char			 tblname[PF_TABLE_NAME_SIZE];
 	}			 v;
 	union {
 		struct pfi_dynaddr	*dyn;
 		struct pfr_ktable	*tbl;
 		int			 dyncnt;
 		int			 tblcnt;
 	}			 p;
 	u_int8_t		 type;		/* PF_ADDR_* */
 	u_int8_t		 iflags;	/* PFI_AFLAG_* */
 };
 
 #ifdef _KERNEL
 
 struct pfi_dynaddr {
 	TAILQ_ENTRY(pfi_dynaddr)	 entry;
 	struct pf_addr			 pfid_addr4;
 	struct pf_addr			 pfid_mask4;
 	struct pf_addr			 pfid_addr6;
 	struct pf_addr			 pfid_mask6;
 	struct pfr_ktable		*pfid_kt;
 	struct pfi_kif			*pfid_kif;
 	int				 pfid_net;	/* mask or 128 */
 	int				 pfid_acnt4;	/* address count IPv4 */
 	int				 pfid_acnt6;	/* address count IPv6 */
 	sa_family_t			 pfid_af;	/* rule af */
 	u_int8_t			 pfid_iflags;	/* PFI_AFLAG_* */
 };
 
 /*
  * Address manipulation macros
  */
 #define	HTONL(x)	(x) = htonl((__uint32_t)(x))
 #define	HTONS(x)	(x) = htons((__uint16_t)(x))
 #define	NTOHL(x)	(x) = ntohl((__uint32_t)(x))
 #define	NTOHS(x)	(x) = ntohs((__uint16_t)(x))
 
 #define	PF_NAME		"pf"
 
 #define	PF_HASHROW_ASSERT(h)	mtx_assert(&(h)->lock, MA_OWNED)
 #define	PF_HASHROW_LOCK(h)	mtx_lock(&(h)->lock)
 #define	PF_HASHROW_UNLOCK(h)	mtx_unlock(&(h)->lock)
 
 #define	PF_STATE_LOCK(s)						\
 	do {								\
 		struct pf_idhash *_ih = &V_pf_idhash[PF_IDHASH(s)];	\
 		PF_HASHROW_LOCK(_ih);					\
 	} while (0)
 
 #define	PF_STATE_UNLOCK(s)						\
 	do {								\
 		struct pf_idhash *_ih = &V_pf_idhash[PF_IDHASH((s))];	\
 		PF_HASHROW_UNLOCK(_ih);					\
 	} while (0)
 
 #ifdef INVARIANTS
 #define	PF_STATE_LOCK_ASSERT(s)						\
 	do {								\
 		struct pf_idhash *_ih = &V_pf_idhash[PF_IDHASH(s)];	\
 		PF_HASHROW_ASSERT(_ih);					\
 	} while (0)
 #else /* !INVARIANTS */
 #define	PF_STATE_LOCK_ASSERT(s)		do {} while (0)
 #endif /* INVARIANTS */
 
 extern struct mtx pf_unlnkdrules_mtx;
 #define	PF_UNLNKDRULES_LOCK()	mtx_lock(&pf_unlnkdrules_mtx)
 #define	PF_UNLNKDRULES_UNLOCK()	mtx_unlock(&pf_unlnkdrules_mtx)
 
 extern struct rwlock pf_rules_lock;
 #define	PF_RULES_RLOCK()	rw_rlock(&pf_rules_lock)
 #define	PF_RULES_RUNLOCK()	rw_runlock(&pf_rules_lock)
 #define	PF_RULES_WLOCK()	rw_wlock(&pf_rules_lock)
 #define	PF_RULES_WUNLOCK()	rw_wunlock(&pf_rules_lock)
 #define	PF_RULES_ASSERT()	rw_assert(&pf_rules_lock, RA_LOCKED)
 #define	PF_RULES_RASSERT()	rw_assert(&pf_rules_lock, RA_RLOCKED)
 #define	PF_RULES_WASSERT()	rw_assert(&pf_rules_lock, RA_WLOCKED)
 
 #define	PF_MODVER	1
 #define	PFLOG_MODVER	1
 #define	PFSYNC_MODVER	1
 
 #define	PFLOG_MINVER	1
 #define	PFLOG_PREFVER	PFLOG_MODVER
 #define	PFLOG_MAXVER	1
 #define	PFSYNC_MINVER	1
 #define	PFSYNC_PREFVER	PFSYNC_MODVER
 #define	PFSYNC_MAXVER	1
 
 #ifdef INET
 #ifndef INET6
 #define	PF_INET_ONLY
 #endif /* ! INET6 */
 #endif /* INET */
 
 #ifdef INET6
 #ifndef INET
 #define	PF_INET6_ONLY
 #endif /* ! INET */
 #endif /* INET6 */
 
 #ifdef INET
 #ifdef INET6
 #define	PF_INET_INET6
 #endif /* INET6 */
 #endif /* INET */
 
 #else
 
 #define	PF_INET_INET6
 
 #endif /* _KERNEL */
 
 /* Both IPv4 and IPv6 */
 #ifdef PF_INET_INET6
 
 #define PF_AEQ(a, b, c) \
 	((c == AF_INET && (a)->addr32[0] == (b)->addr32[0]) || \
 	((a)->addr32[3] == (b)->addr32[3] && \
 	(a)->addr32[2] == (b)->addr32[2] && \
 	(a)->addr32[1] == (b)->addr32[1] && \
 	(a)->addr32[0] == (b)->addr32[0])) \
 
 #define PF_ANEQ(a, b, c) \
 	((c == AF_INET && (a)->addr32[0] != (b)->addr32[0]) || \
 	((a)->addr32[3] != (b)->addr32[3] || \
 	(a)->addr32[2] != (b)->addr32[2] || \
 	(a)->addr32[1] != (b)->addr32[1] || \
 	(a)->addr32[0] != (b)->addr32[0])) \
 
 #define PF_AZERO(a, c) \
 	((c == AF_INET && !(a)->addr32[0]) || \
 	(!(a)->addr32[0] && !(a)->addr32[1] && \
 	!(a)->addr32[2] && !(a)->addr32[3] )) \
 
 #define PF_MATCHA(n, a, m, b, f) \
 	pf_match_addr(n, a, m, b, f)
 
 #define PF_ACPY(a, b, f) \
 	pf_addrcpy(a, b, f)
 
 #define PF_AINC(a, f) \
 	pf_addr_inc(a, f)
 
 #define PF_POOLMASK(a, b, c, d, f) \
 	pf_poolmask(a, b, c, d, f)
 
 #else
 
 /* Just IPv6 */
 
 #ifdef PF_INET6_ONLY
 
 #define PF_AEQ(a, b, c) \
 	((a)->addr32[3] == (b)->addr32[3] && \
 	(a)->addr32[2] == (b)->addr32[2] && \
 	(a)->addr32[1] == (b)->addr32[1] && \
 	(a)->addr32[0] == (b)->addr32[0]) \
 
 #define PF_ANEQ(a, b, c) \
 	((a)->addr32[3] != (b)->addr32[3] || \
 	(a)->addr32[2] != (b)->addr32[2] || \
 	(a)->addr32[1] != (b)->addr32[1] || \
 	(a)->addr32[0] != (b)->addr32[0]) \
 
 #define PF_AZERO(a, c) \
 	(!(a)->addr32[0] && \
 	!(a)->addr32[1] && \
 	!(a)->addr32[2] && \
 	!(a)->addr32[3] ) \
 
 #define PF_MATCHA(n, a, m, b, f) \
 	pf_match_addr(n, a, m, b, f)
 
 #define PF_ACPY(a, b, f) \
 	pf_addrcpy(a, b, f)
 
 #define PF_AINC(a, f) \
 	pf_addr_inc(a, f)
 
 #define PF_POOLMASK(a, b, c, d, f) \
 	pf_poolmask(a, b, c, d, f)
 
 #else
 
 /* Just IPv4 */
 #ifdef PF_INET_ONLY
 
 #define PF_AEQ(a, b, c) \
 	((a)->addr32[0] == (b)->addr32[0])
 
 #define PF_ANEQ(a, b, c) \
 	((a)->addr32[0] != (b)->addr32[0])
 
 #define PF_AZERO(a, c) \
 	(!(a)->addr32[0])
 
 #define PF_MATCHA(n, a, m, b, f) \
 	pf_match_addr(n, a, m, b, f)
 
 #define PF_ACPY(a, b, f) \
 	(a)->v4.s_addr = (b)->v4.s_addr
 
 #define PF_AINC(a, f) \
 	do { \
 		(a)->addr32[0] = htonl(ntohl((a)->addr32[0]) + 1); \
 	} while (0)
 
 #define PF_POOLMASK(a, b, c, d, f) \
 	do { \
 		(a)->addr32[0] = ((b)->addr32[0] & (c)->addr32[0]) | \
 		(((c)->addr32[0] ^ 0xffffffff ) & (d)->addr32[0]); \
 	} while (0)
 
 #endif /* PF_INET_ONLY */
 #endif /* PF_INET6_ONLY */
 #endif /* PF_INET_INET6 */
 
 /*
  * XXX callers not FIB-aware in our version of pf yet.
  * OpenBSD fixed it later it seems, 2010/05/07 13:33:16 claudio.
  */
 #define	PF_MISMATCHAW(aw, x, af, neg, ifp, rtid)			\
 	(								\
 		(((aw)->type == PF_ADDR_NOROUTE &&			\
 		    pf_routable((x), (af), NULL, (rtid))) ||		\
 		(((aw)->type == PF_ADDR_URPFFAILED && (ifp) != NULL &&	\
 		    pf_routable((x), (af), (ifp), (rtid))) ||		\
 		((aw)->type == PF_ADDR_TABLE &&				\
 		    !pfr_match_addr((aw)->p.tbl, (x), (af))) ||		\
 		((aw)->type == PF_ADDR_DYNIFTL &&			\
 		    !pfi_match_addr((aw)->p.dyn, (x), (af))) ||		\
 		((aw)->type == PF_ADDR_RANGE &&				\
 		    !pf_match_addr_range(&(aw)->v.a.addr,		\
 		    &(aw)->v.a.mask, (x), (af))) ||			\
 		((aw)->type == PF_ADDR_ADDRMASK &&			\
 		    !PF_AZERO(&(aw)->v.a.mask, (af)) &&			\
 		    !PF_MATCHA(0, &(aw)->v.a.addr,			\
 		    &(aw)->v.a.mask, (x), (af))))) !=			\
 		(neg)							\
 	)
 
 
 struct pf_rule_uid {
 	uid_t		 uid[2];
 	u_int8_t	 op;
 };
 
 struct pf_rule_gid {
 	uid_t		 gid[2];
 	u_int8_t	 op;
 };
 
 struct pf_rule_addr {
 	struct pf_addr_wrap	 addr;
 	u_int16_t		 port[2];
 	u_int8_t		 neg;
 	u_int8_t		 port_op;
 };
 
 struct pf_pooladdr {
 	struct pf_addr_wrap		 addr;
 	TAILQ_ENTRY(pf_pooladdr)	 entries;
 	char				 ifname[IFNAMSIZ];
 	struct pfi_kif			*kif;
 };
 
 TAILQ_HEAD(pf_palist, pf_pooladdr);
 
 struct pf_poolhashkey {
 	union {
 		u_int8_t		key8[16];
 		u_int16_t		key16[8];
 		u_int32_t		key32[4];
 	} pfk;		    /* 128-bit hash key */
 #define key8	pfk.key8
 #define key16	pfk.key16
 #define key32	pfk.key32
 };
 
 struct pf_pool {
 	struct pf_palist	 list;
 	struct pf_pooladdr	*cur;
 	struct pf_poolhashkey	 key;
 	struct pf_addr		 counter;
 	int			 tblidx;
 	u_int16_t		 proxy_port[2];
 	u_int8_t		 opts;
 };
 
 
 /* A packed Operating System description for fingerprinting */
 typedef u_int32_t pf_osfp_t;
 #define PF_OSFP_ANY	((pf_osfp_t)0)
 #define PF_OSFP_UNKNOWN	((pf_osfp_t)-1)
 #define PF_OSFP_NOMATCH	((pf_osfp_t)-2)
 
 struct pf_osfp_entry {
 	SLIST_ENTRY(pf_osfp_entry) fp_entry;
 	pf_osfp_t		fp_os;
 	int			fp_enflags;
 #define PF_OSFP_EXPANDED	0x001		/* expanded entry */
 #define PF_OSFP_GENERIC		0x002		/* generic signature */
 #define PF_OSFP_NODETAIL	0x004		/* no p0f details */
 #define PF_OSFP_LEN	32
 	char			fp_class_nm[PF_OSFP_LEN];
 	char			fp_version_nm[PF_OSFP_LEN];
 	char			fp_subtype_nm[PF_OSFP_LEN];
 };
 #define PF_OSFP_ENTRY_EQ(a, b) \
     ((a)->fp_os == (b)->fp_os && \
     memcmp((a)->fp_class_nm, (b)->fp_class_nm, PF_OSFP_LEN) == 0 && \
     memcmp((a)->fp_version_nm, (b)->fp_version_nm, PF_OSFP_LEN) == 0 && \
     memcmp((a)->fp_subtype_nm, (b)->fp_subtype_nm, PF_OSFP_LEN) == 0)
 
 /* handle pf_osfp_t packing */
 #define _FP_RESERVED_BIT	1  /* For the special negative #defines */
 #define _FP_UNUSED_BITS		1
 #define _FP_CLASS_BITS		10 /* OS Class (Windows, Linux) */
 #define _FP_VERSION_BITS	10 /* OS version (95, 98, NT, 2.4.54, 3.2) */
 #define _FP_SUBTYPE_BITS	10 /* patch level (NT SP4, SP3, ECN patch) */
 #define PF_OSFP_UNPACK(osfp, class, version, subtype) do { \
 	(class) = ((osfp) >> (_FP_VERSION_BITS+_FP_SUBTYPE_BITS)) & \
 	    ((1 << _FP_CLASS_BITS) - 1); \
 	(version) = ((osfp) >> _FP_SUBTYPE_BITS) & \
 	    ((1 << _FP_VERSION_BITS) - 1);\
 	(subtype) = (osfp) & ((1 << _FP_SUBTYPE_BITS) - 1); \
 } while(0)
 #define PF_OSFP_PACK(osfp, class, version, subtype) do { \
 	(osfp) = ((class) & ((1 << _FP_CLASS_BITS) - 1)) << (_FP_VERSION_BITS \
 	    + _FP_SUBTYPE_BITS); \
 	(osfp) |= ((version) & ((1 << _FP_VERSION_BITS) - 1)) << \
 	    _FP_SUBTYPE_BITS; \
 	(osfp) |= (subtype) & ((1 << _FP_SUBTYPE_BITS) - 1); \
 } while(0)
 
 /* the fingerprint of an OSes TCP SYN packet */
 typedef u_int64_t	pf_tcpopts_t;
 struct pf_os_fingerprint {
 	SLIST_HEAD(pf_osfp_enlist, pf_osfp_entry) fp_oses; /* list of matches */
 	pf_tcpopts_t		fp_tcpopts;	/* packed TCP options */
 	u_int16_t		fp_wsize;	/* TCP window size */
 	u_int16_t		fp_psize;	/* ip->ip_len */
 	u_int16_t		fp_mss;		/* TCP MSS */
 	u_int16_t		fp_flags;
 #define PF_OSFP_WSIZE_MOD	0x0001		/* Window modulus */
 #define PF_OSFP_WSIZE_DC	0x0002		/* Window don't care */
 #define PF_OSFP_WSIZE_MSS	0x0004		/* Window multiple of MSS */
 #define PF_OSFP_WSIZE_MTU	0x0008		/* Window multiple of MTU */
 #define PF_OSFP_PSIZE_MOD	0x0010		/* packet size modulus */
 #define PF_OSFP_PSIZE_DC	0x0020		/* packet size don't care */
 #define PF_OSFP_WSCALE		0x0040		/* TCP window scaling */
 #define PF_OSFP_WSCALE_MOD	0x0080		/* TCP window scale modulus */
 #define PF_OSFP_WSCALE_DC	0x0100		/* TCP window scale dont-care */
 #define PF_OSFP_MSS		0x0200		/* TCP MSS */
 #define PF_OSFP_MSS_MOD		0x0400		/* TCP MSS modulus */
 #define PF_OSFP_MSS_DC		0x0800		/* TCP MSS dont-care */
 #define PF_OSFP_DF		0x1000		/* IPv4 don't fragment bit */
 #define PF_OSFP_TS0		0x2000		/* Zero timestamp */
 #define PF_OSFP_INET6		0x4000		/* IPv6 */
 	u_int8_t		fp_optcnt;	/* TCP option count */
 	u_int8_t		fp_wscale;	/* TCP window scaling */
 	u_int8_t		fp_ttl;		/* IPv4 TTL */
 #define PF_OSFP_MAXTTL_OFFSET	40
 /* TCP options packing */
 #define PF_OSFP_TCPOPT_NOP	0x0		/* TCP NOP option */
 #define PF_OSFP_TCPOPT_WSCALE	0x1		/* TCP window scaling option */
 #define PF_OSFP_TCPOPT_MSS	0x2		/* TCP max segment size opt */
 #define PF_OSFP_TCPOPT_SACK	0x3		/* TCP SACK OK option */
 #define PF_OSFP_TCPOPT_TS	0x4		/* TCP timestamp option */
 #define PF_OSFP_TCPOPT_BITS	3		/* bits used by each option */
 #define PF_OSFP_MAX_OPTS \
     (sizeof(((struct pf_os_fingerprint *)0)->fp_tcpopts) * 8) \
     / PF_OSFP_TCPOPT_BITS
 
 	SLIST_ENTRY(pf_os_fingerprint)	fp_next;
 };
 
 struct pf_osfp_ioctl {
 	struct pf_osfp_entry	fp_os;
 	pf_tcpopts_t		fp_tcpopts;	/* packed TCP options */
 	u_int16_t		fp_wsize;	/* TCP window size */
 	u_int16_t		fp_psize;	/* ip->ip_len */
 	u_int16_t		fp_mss;		/* TCP MSS */
 	u_int16_t		fp_flags;
 	u_int8_t		fp_optcnt;	/* TCP option count */
 	u_int8_t		fp_wscale;	/* TCP window scaling */
 	u_int8_t		fp_ttl;		/* IPv4 TTL */
 
 	int			fp_getnum;	/* DIOCOSFPGET number */
 };
 
 
 union pf_rule_ptr {
 	struct pf_rule		*ptr;
 	u_int32_t		 nr;
 };
 
 #define	PF_ANCHOR_NAME_SIZE	 64
 
 struct pf_rule {
 	struct pf_rule_addr	 src;
 	struct pf_rule_addr	 dst;
 #define PF_SKIP_IFP		0
 #define PF_SKIP_DIR		1
 #define PF_SKIP_AF		2
 #define PF_SKIP_PROTO		3
 #define PF_SKIP_SRC_ADDR	4
 #define PF_SKIP_SRC_PORT	5
 #define PF_SKIP_DST_ADDR	6
 #define PF_SKIP_DST_PORT	7
 #define PF_SKIP_COUNT		8
 	union pf_rule_ptr	 skip[PF_SKIP_COUNT];
 #define PF_RULE_LABEL_SIZE	 64
 	char			 label[PF_RULE_LABEL_SIZE];
-#define PF_QNAME_SIZE		 64
 	char			 ifname[IFNAMSIZ];
 	char			 qname[PF_QNAME_SIZE];
 	char			 pqname[PF_QNAME_SIZE];
 #define	PF_TAG_NAME_SIZE	 64
 	char			 tagname[PF_TAG_NAME_SIZE];
 	char			 match_tagname[PF_TAG_NAME_SIZE];
 
 	char			 overload_tblname[PF_TABLE_NAME_SIZE];
 
 	TAILQ_ENTRY(pf_rule)	 entries;
 	struct pf_pool		 rpool;
 
 	u_int64_t		 evaluations;
 	u_int64_t		 packets[2];
 	u_int64_t		 bytes[2];
 
 	struct pfi_kif		*kif;
 	struct pf_anchor	*anchor;
 	struct pfr_ktable	*overload_tbl;
 
 	pf_osfp_t		 os_fingerprint;
 
 	int			 rtableid;
 	u_int32_t		 timeout[PFTM_MAX];
 	u_int32_t		 max_states;
 	u_int32_t		 max_src_nodes;
 	u_int32_t		 max_src_states;
 	u_int32_t		 max_src_conn;
 	struct {
 		u_int32_t		limit;
 		u_int32_t		seconds;
 	}			 max_src_conn_rate;
 	u_int32_t		 qid;
 	u_int32_t		 pqid;
 	u_int32_t		 rt_listid;
 	u_int32_t		 nr;
 	u_int32_t		 prob;
 	uid_t			 cuid;
 	pid_t			 cpid;
 
 	counter_u64_t		 states_cur;
 	counter_u64_t		 states_tot;
 	counter_u64_t		 src_nodes;
 
 	u_int16_t		 return_icmp;
 	u_int16_t		 return_icmp6;
 	u_int16_t		 max_mss;
 	u_int16_t		 tag;
 	u_int16_t		 match_tag;
 	u_int16_t		 spare2;			/* netgraph */
 
 	struct pf_rule_uid	 uid;
 	struct pf_rule_gid	 gid;
 
 	u_int32_t		 rule_flag;
 	u_int8_t		 action;
 	u_int8_t		 direction;
 	u_int8_t		 log;
 	u_int8_t		 logif;
 	u_int8_t		 quick;
 	u_int8_t		 ifnot;
 	u_int8_t		 match_tag_not;
 	u_int8_t		 natpass;
 
 #define PF_STATE_NORMAL		0x1
 #define PF_STATE_MODULATE	0x2
 #define PF_STATE_SYNPROXY	0x3
 	u_int8_t		 keep_state;
 	sa_family_t		 af;
 	u_int8_t		 proto;
 	u_int8_t		 type;
 	u_int8_t		 code;
 	u_int8_t		 flags;
 	u_int8_t		 flagset;
 	u_int8_t		 min_ttl;
 	u_int8_t		 allow_opts;
 	u_int8_t		 rt;
 	u_int8_t		 return_ttl;
 	u_int8_t		 tos;
 	u_int8_t		 set_tos;
 	u_int8_t		 anchor_relative;
 	u_int8_t		 anchor_wildcard;
 
 #define PF_FLUSH		0x01
 #define PF_FLUSH_GLOBAL		0x02
 	u_int8_t		 flush;
 
 	struct {
 		struct pf_addr		addr;
 		u_int16_t		port;
 	}			divert;
 
 	uint64_t		 u_states_cur;
 	uint64_t		 u_states_tot;
 	uint64_t		 u_src_nodes;
 };
 
 /* rule flags */
 #define	PFRULE_DROP		0x0000
 #define	PFRULE_RETURNRST	0x0001
 #define	PFRULE_FRAGMENT		0x0002
 #define	PFRULE_RETURNICMP	0x0004
 #define	PFRULE_RETURN		0x0008
 #define	PFRULE_NOSYNC		0x0010
 #define PFRULE_SRCTRACK		0x0020  /* track source states */
 #define PFRULE_RULESRCTRACK	0x0040  /* per rule */
 #define	PFRULE_REFS		0x0080	/* rule has references */
 
 /* scrub flags */
 #define	PFRULE_NODF		0x0100
 #define	PFRULE_FRAGCROP		0x0200	/* non-buffering frag cache */
 #define	PFRULE_FRAGDROP		0x0400	/* drop funny fragments */
 #define PFRULE_RANDOMID		0x0800
 #define PFRULE_REASSEMBLE_TCP	0x1000
 #define PFRULE_SET_TOS		0x2000
 
 /* rule flags again */
 #define PFRULE_IFBOUND		0x00010000	/* if-bound */
 #define PFRULE_STATESLOPPY	0x00020000	/* sloppy state tracking */
 
 #define PFSTATE_HIWAT		10000	/* default state table size */
 #define PFSTATE_ADAPT_START	6000	/* default adaptive timeout start */
 #define PFSTATE_ADAPT_END	12000	/* default adaptive timeout end */
 
 
 struct pf_threshold {
 	u_int32_t	limit;
 #define	PF_THRESHOLD_MULT	1000
 #define PF_THRESHOLD_MAX	0xffffffff / PF_THRESHOLD_MULT
 	u_int32_t	seconds;
 	u_int32_t	count;
 	u_int32_t	last;
 };
 
 struct pf_src_node {
 	LIST_ENTRY(pf_src_node) entry;
 	struct pf_addr	 addr;
 	struct pf_addr	 raddr;
 	union pf_rule_ptr rule;
 	struct pfi_kif	*kif;
 	u_int64_t	 bytes[2];
 	u_int64_t	 packets[2];
 	u_int32_t	 states;
 	u_int32_t	 conn;
 	struct pf_threshold	conn_rate;
 	u_int32_t	 creation;
 	u_int32_t	 expire;
 	sa_family_t	 af;
 	u_int8_t	 ruletype;
 };
 
 #define PFSNODE_HIWAT		10000	/* default source node table size */
 
 struct pf_state_scrub {
 	struct timeval	pfss_last;	/* time received last packet	*/
 	u_int32_t	pfss_tsecr;	/* last echoed timestamp	*/
 	u_int32_t	pfss_tsval;	/* largest timestamp		*/
 	u_int32_t	pfss_tsval0;	/* original timestamp		*/
 	u_int16_t	pfss_flags;
 #define PFSS_TIMESTAMP	0x0001		/* modulate timestamp		*/
 #define PFSS_PAWS	0x0010		/* stricter PAWS checks		*/
 #define PFSS_PAWS_IDLED	0x0020		/* was idle too long.  no PAWS	*/
 #define PFSS_DATA_TS	0x0040		/* timestamp on data packets	*/
 #define PFSS_DATA_NOTS	0x0080		/* no timestamp on data packets	*/
 	u_int8_t	pfss_ttl;	/* stashed TTL			*/
 	u_int8_t	pad;
 	u_int32_t	pfss_ts_mod;	/* timestamp modulation		*/
 };
 
 struct pf_state_host {
 	struct pf_addr	addr;
 	u_int16_t	port;
 	u_int16_t	pad;
 };
 
 struct pf_state_peer {
 	struct pf_state_scrub	*scrub;	/* state is scrubbed		*/
 	u_int32_t	seqlo;		/* Max sequence number sent	*/
 	u_int32_t	seqhi;		/* Max the other end ACKd + win	*/
 	u_int32_t	seqdiff;	/* Sequence number modulator	*/
 	u_int16_t	max_win;	/* largest window (pre scaling)	*/
 	u_int16_t	mss;		/* Maximum segment size option	*/
 	u_int8_t	state;		/* active state level		*/
 	u_int8_t	wscale;		/* window scaling factor	*/
 	u_int8_t	tcp_est;	/* Did we reach TCPS_ESTABLISHED */
 	u_int8_t	pad[1];
 };
 
 /* Keep synced with struct pf_state_key. */
 struct pf_state_key_cmp {
 	struct pf_addr	 addr[2];
 	u_int16_t	 port[2];
 	sa_family_t	 af;
 	u_int8_t	 proto;
 	u_int8_t	 pad[2];
 };
 
 struct pf_state_key {
 	struct pf_addr	 addr[2];
 	u_int16_t	 port[2];
 	sa_family_t	 af;
 	u_int8_t	 proto;
 	u_int8_t	 pad[2];
 
 	LIST_ENTRY(pf_state_key) entry;
 	TAILQ_HEAD(, pf_state)	 states[2];
 };
 
 /* Keep synced with struct pf_state. */
 struct pf_state_cmp {
 	u_int64_t		 id;
 	u_int32_t		 creatorid;
 	u_int8_t		 direction;
 	u_int8_t		 pad[3];
 };
 
 struct pf_state {
 	u_int64_t		 id;
 	u_int32_t		 creatorid;
 	u_int8_t		 direction;
 	u_int8_t		 pad[3];
 
 	u_int			 refs;
 	TAILQ_ENTRY(pf_state)	 sync_list;
 	TAILQ_ENTRY(pf_state)	 key_list[2];
 	LIST_ENTRY(pf_state)	 entry;
 	struct pf_state_peer	 src;
 	struct pf_state_peer	 dst;
 	union pf_rule_ptr	 rule;
 	union pf_rule_ptr	 anchor;
 	union pf_rule_ptr	 nat_rule;
 	struct pf_addr		 rt_addr;
 	struct pf_state_key	*key[2];	/* addresses stack and wire  */
 	struct pfi_kif		*kif;
 	struct pfi_kif		*rt_kif;
 	struct pf_src_node	*src_node;
 	struct pf_src_node	*nat_src_node;
 	u_int64_t		 packets[2];
 	u_int64_t		 bytes[2];
 	u_int32_t		 creation;
 	u_int32_t	 	 expire;
 	u_int32_t		 pfsync_time;
 	u_int16_t		 tag;
 	u_int8_t		 log;
 	u_int8_t		 state_flags;
 #define	PFSTATE_ALLOWOPTS	0x01
 #define	PFSTATE_SLOPPY		0x02
 /*  was	PFSTATE_PFLOW		0x04 */
 #define	PFSTATE_NOSYNC		0x08
 #define	PFSTATE_ACK		0x10
 	u_int8_t		 timeout;
 	u_int8_t		 sync_state; /* PFSYNC_S_x */
 
 	/* XXX */
 	u_int8_t		 sync_updates;
 	u_int8_t		_tail[3];
 };
 
 /*
  * Unified state structures for pulling states out of the kernel
  * used by pfsync(4) and the pf(4) ioctl.
  */
 struct pfsync_state_scrub {
 	u_int16_t	pfss_flags;
 	u_int8_t	pfss_ttl;	/* stashed TTL		*/
 #define PFSYNC_SCRUB_FLAG_VALID		0x01
 	u_int8_t	scrub_flag;
 	u_int32_t	pfss_ts_mod;	/* timestamp modulation	*/
 } __packed;
 
 struct pfsync_state_peer {
 	struct pfsync_state_scrub scrub;	/* state is scrubbed	*/
 	u_int32_t	seqlo;		/* Max sequence number sent	*/
 	u_int32_t	seqhi;		/* Max the other end ACKd + win	*/
 	u_int32_t	seqdiff;	/* Sequence number modulator	*/
 	u_int16_t	max_win;	/* largest window (pre scaling)	*/
 	u_int16_t	mss;		/* Maximum segment size option	*/
 	u_int8_t	state;		/* active state level		*/
 	u_int8_t	wscale;		/* window scaling factor	*/
 	u_int8_t	pad[6];
 } __packed;
 
 struct pfsync_state_key {
 	struct pf_addr	 addr[2];
 	u_int16_t	 port[2];
 };
 
 struct pfsync_state {
 	u_int64_t	 id;
 	char		 ifname[IFNAMSIZ];
 	struct pfsync_state_key	key[2];
 	struct pfsync_state_peer src;
 	struct pfsync_state_peer dst;
 	struct pf_addr	 rt_addr;
 	u_int32_t	 rule;
 	u_int32_t	 anchor;
 	u_int32_t	 nat_rule;
 	u_int32_t	 creation;
 	u_int32_t	 expire;
 	u_int32_t	 packets[2][2];
 	u_int32_t	 bytes[2][2];
 	u_int32_t	 creatorid;
 	sa_family_t	 af;
 	u_int8_t	 proto;
 	u_int8_t	 direction;
 	u_int8_t	 __spare[2];
 	u_int8_t	 log;
 	u_int8_t	 state_flags;
 	u_int8_t	 timeout;
 	u_int8_t	 sync_flags;
 	u_int8_t	 updates;
 } __packed;
 
 #ifdef _KERNEL
 /* pfsync */
 typedef int		pfsync_state_import_t(struct pfsync_state *, u_int8_t);
 typedef	void		pfsync_insert_state_t(struct pf_state *);
 typedef	void		pfsync_update_state_t(struct pf_state *);
 typedef	void		pfsync_delete_state_t(struct pf_state *);
 typedef void		pfsync_clear_states_t(u_int32_t, const char *);
 typedef int		pfsync_defer_t(struct pf_state *, struct mbuf *);
 
 extern pfsync_state_import_t	*pfsync_state_import_ptr;
 extern pfsync_insert_state_t	*pfsync_insert_state_ptr;
 extern pfsync_update_state_t	*pfsync_update_state_ptr;
 extern pfsync_delete_state_t	*pfsync_delete_state_ptr;
 extern pfsync_clear_states_t	*pfsync_clear_states_ptr;
 extern pfsync_defer_t		*pfsync_defer_ptr;
 
 void			pfsync_state_export(struct pfsync_state *,
 			    struct pf_state *);
 
 /* pflog */
 struct pf_ruleset;
 struct pf_pdesc;
 typedef int pflog_packet_t(struct pfi_kif *, struct mbuf *, sa_family_t,
     u_int8_t, u_int8_t, struct pf_rule *, struct pf_rule *,
     struct pf_ruleset *, struct pf_pdesc *, int);
 extern pflog_packet_t		*pflog_packet_ptr;
 
 #define	V_pf_end_threads	VNET(pf_end_threads)
 #endif /* _KERNEL */
 
 #define	PFSYNC_FLAG_SRCNODE	0x04
 #define	PFSYNC_FLAG_NATSRCNODE	0x08
 
 /* for copies to/from network byte order */
 /* ioctl interface also uses network byte order */
 #define pf_state_peer_hton(s,d) do {		\
 	(d)->seqlo = htonl((s)->seqlo);		\
 	(d)->seqhi = htonl((s)->seqhi);		\
 	(d)->seqdiff = htonl((s)->seqdiff);	\
 	(d)->max_win = htons((s)->max_win);	\
 	(d)->mss = htons((s)->mss);		\
 	(d)->state = (s)->state;		\
 	(d)->wscale = (s)->wscale;		\
 	if ((s)->scrub) {						\
 		(d)->scrub.pfss_flags = 				\
 		    htons((s)->scrub->pfss_flags & PFSS_TIMESTAMP);	\
 		(d)->scrub.pfss_ttl = (s)->scrub->pfss_ttl;		\
 		(d)->scrub.pfss_ts_mod = htonl((s)->scrub->pfss_ts_mod);\
 		(d)->scrub.scrub_flag = PFSYNC_SCRUB_FLAG_VALID;	\
 	}								\
 } while (0)
 
 #define pf_state_peer_ntoh(s,d) do {		\
 	(d)->seqlo = ntohl((s)->seqlo);		\
 	(d)->seqhi = ntohl((s)->seqhi);		\
 	(d)->seqdiff = ntohl((s)->seqdiff);	\
 	(d)->max_win = ntohs((s)->max_win);	\
 	(d)->mss = ntohs((s)->mss);		\
 	(d)->state = (s)->state;		\
 	(d)->wscale = (s)->wscale;		\
 	if ((s)->scrub.scrub_flag == PFSYNC_SCRUB_FLAG_VALID && 	\
 	    (d)->scrub != NULL) {					\
 		(d)->scrub->pfss_flags =				\
 		    ntohs((s)->scrub.pfss_flags) & PFSS_TIMESTAMP;	\
 		(d)->scrub->pfss_ttl = (s)->scrub.pfss_ttl;		\
 		(d)->scrub->pfss_ts_mod = ntohl((s)->scrub.pfss_ts_mod);\
 	}								\
 } while (0)
 
 #define pf_state_counter_hton(s,d) do {				\
 	d[0] = htonl((s>>32)&0xffffffff);			\
 	d[1] = htonl(s&0xffffffff);				\
 } while (0)
 
 #define pf_state_counter_from_pfsync(s)				\
 	(((u_int64_t)(s[0])<<32) | (u_int64_t)(s[1]))
 
 #define pf_state_counter_ntoh(s,d) do {				\
 	d = ntohl(s[0]);					\
 	d = d<<32;						\
 	d += ntohl(s[1]);					\
 } while (0)
 
 TAILQ_HEAD(pf_rulequeue, pf_rule);
 
 struct pf_anchor;
 
 struct pf_ruleset {
 	struct {
 		struct pf_rulequeue	 queues[2];
 		struct {
 			struct pf_rulequeue	*ptr;
 			struct pf_rule		**ptr_array;
 			u_int32_t		 rcount;
 			u_int32_t		 ticket;
 			int			 open;
 		}			 active, inactive;
 	}			 rules[PF_RULESET_MAX];
 	struct pf_anchor	*anchor;
 	u_int32_t		 tticket;
 	int			 tables;
 	int			 topen;
 };
 
 RB_HEAD(pf_anchor_global, pf_anchor);
 RB_HEAD(pf_anchor_node, pf_anchor);
 struct pf_anchor {
 	RB_ENTRY(pf_anchor)	 entry_global;
 	RB_ENTRY(pf_anchor)	 entry_node;
 	struct pf_anchor	*parent;
 	struct pf_anchor_node	 children;
 	char			 name[PF_ANCHOR_NAME_SIZE];
 	char			 path[MAXPATHLEN];
 	struct pf_ruleset	 ruleset;
 	int			 refcnt;	/* anchor rules */
 	int			 match;	/* XXX: used for pfctl black magic */
 };
 RB_PROTOTYPE(pf_anchor_global, pf_anchor, entry_global, pf_anchor_compare);
 RB_PROTOTYPE(pf_anchor_node, pf_anchor, entry_node, pf_anchor_compare);
 
 #define PF_RESERVED_ANCHOR	"_pf"
 
 #define PFR_TFLAG_PERSIST	0x00000001
 #define PFR_TFLAG_CONST		0x00000002
 #define PFR_TFLAG_ACTIVE	0x00000004
 #define PFR_TFLAG_INACTIVE	0x00000008
 #define PFR_TFLAG_REFERENCED	0x00000010
 #define PFR_TFLAG_REFDANCHOR	0x00000020
 #define PFR_TFLAG_COUNTERS	0x00000040
 /* Adjust masks below when adding flags. */
 #define PFR_TFLAG_USRMASK	(PFR_TFLAG_PERSIST	| \
 				 PFR_TFLAG_CONST	| \
 				 PFR_TFLAG_COUNTERS)
 #define PFR_TFLAG_SETMASK	(PFR_TFLAG_ACTIVE	| \
 				 PFR_TFLAG_INACTIVE	| \
 				 PFR_TFLAG_REFERENCED	| \
 				 PFR_TFLAG_REFDANCHOR)
 #define PFR_TFLAG_ALLMASK	(PFR_TFLAG_PERSIST	| \
 				 PFR_TFLAG_CONST	| \
 				 PFR_TFLAG_ACTIVE	| \
 				 PFR_TFLAG_INACTIVE	| \
 				 PFR_TFLAG_REFERENCED	| \
 				 PFR_TFLAG_REFDANCHOR	| \
 				 PFR_TFLAG_COUNTERS)
 
 struct pf_anchor_stackframe;
 
 struct pfr_table {
 	char			 pfrt_anchor[MAXPATHLEN];
 	char			 pfrt_name[PF_TABLE_NAME_SIZE];
 	u_int32_t		 pfrt_flags;
 	u_int8_t		 pfrt_fback;
 };
 
 enum { PFR_FB_NONE, PFR_FB_MATCH, PFR_FB_ADDED, PFR_FB_DELETED,
 	PFR_FB_CHANGED, PFR_FB_CLEARED, PFR_FB_DUPLICATE,
 	PFR_FB_NOTMATCH, PFR_FB_CONFLICT, PFR_FB_NOCOUNT, PFR_FB_MAX };
 
 struct pfr_addr {
 	union {
 		struct in_addr	 _pfra_ip4addr;
 		struct in6_addr	 _pfra_ip6addr;
 	}		 pfra_u;
 	u_int8_t	 pfra_af;
 	u_int8_t	 pfra_net;
 	u_int8_t	 pfra_not;
 	u_int8_t	 pfra_fback;
 };
 #define	pfra_ip4addr	pfra_u._pfra_ip4addr
 #define	pfra_ip6addr	pfra_u._pfra_ip6addr
 
 enum { PFR_DIR_IN, PFR_DIR_OUT, PFR_DIR_MAX };
 enum { PFR_OP_BLOCK, PFR_OP_PASS, PFR_OP_ADDR_MAX, PFR_OP_TABLE_MAX };
 #define PFR_OP_XPASS	PFR_OP_ADDR_MAX
 
 struct pfr_astats {
 	struct pfr_addr	 pfras_a;
 	u_int64_t	 pfras_packets[PFR_DIR_MAX][PFR_OP_ADDR_MAX];
 	u_int64_t	 pfras_bytes[PFR_DIR_MAX][PFR_OP_ADDR_MAX];
 	long		 pfras_tzero;
 };
 
 enum { PFR_REFCNT_RULE, PFR_REFCNT_ANCHOR, PFR_REFCNT_MAX };
 
 struct pfr_tstats {
 	struct pfr_table pfrts_t;
 	u_int64_t	 pfrts_packets[PFR_DIR_MAX][PFR_OP_TABLE_MAX];
 	u_int64_t	 pfrts_bytes[PFR_DIR_MAX][PFR_OP_TABLE_MAX];
 	u_int64_t	 pfrts_match;
 	u_int64_t	 pfrts_nomatch;
 	long		 pfrts_tzero;
 	int		 pfrts_cnt;
 	int		 pfrts_refcnt[PFR_REFCNT_MAX];
 };
 #define	pfrts_name	pfrts_t.pfrt_name
 #define pfrts_flags	pfrts_t.pfrt_flags
 
 #ifndef _SOCKADDR_UNION_DEFINED
 #define	_SOCKADDR_UNION_DEFINED
 union sockaddr_union {
 	struct sockaddr		sa;
 	struct sockaddr_in	sin;
 	struct sockaddr_in6	sin6;
 };
 #endif /* _SOCKADDR_UNION_DEFINED */
 
 struct pfr_kcounters {
 	u_int64_t		 pfrkc_packets[PFR_DIR_MAX][PFR_OP_ADDR_MAX];
 	u_int64_t		 pfrkc_bytes[PFR_DIR_MAX][PFR_OP_ADDR_MAX];
 };
 
 SLIST_HEAD(pfr_kentryworkq, pfr_kentry);
 struct pfr_kentry {
 	struct radix_node	 pfrke_node[2];
 	union sockaddr_union	 pfrke_sa;
 	SLIST_ENTRY(pfr_kentry)	 pfrke_workq;
 	struct pfr_kcounters	*pfrke_counters;
 	long			 pfrke_tzero;
 	u_int8_t		 pfrke_af;
 	u_int8_t		 pfrke_net;
 	u_int8_t		 pfrke_not;
 	u_int8_t		 pfrke_mark;
 };
 
 SLIST_HEAD(pfr_ktableworkq, pfr_ktable);
 RB_HEAD(pfr_ktablehead, pfr_ktable);
 struct pfr_ktable {
 	struct pfr_tstats	 pfrkt_ts;
 	RB_ENTRY(pfr_ktable)	 pfrkt_tree;
 	SLIST_ENTRY(pfr_ktable)	 pfrkt_workq;
 	struct radix_node_head	*pfrkt_ip4;
 	struct radix_node_head	*pfrkt_ip6;
 	struct pfr_ktable	*pfrkt_shadow;
 	struct pfr_ktable	*pfrkt_root;
 	struct pf_ruleset	*pfrkt_rs;
 	long			 pfrkt_larg;
 	int			 pfrkt_nflags;
 };
 #define pfrkt_t		pfrkt_ts.pfrts_t
 #define pfrkt_name	pfrkt_t.pfrt_name
 #define pfrkt_anchor	pfrkt_t.pfrt_anchor
 #define pfrkt_ruleset	pfrkt_t.pfrt_ruleset
 #define pfrkt_flags	pfrkt_t.pfrt_flags
 #define pfrkt_cnt	pfrkt_ts.pfrts_cnt
 #define pfrkt_refcnt	pfrkt_ts.pfrts_refcnt
 #define pfrkt_packets	pfrkt_ts.pfrts_packets
 #define pfrkt_bytes	pfrkt_ts.pfrts_bytes
 #define pfrkt_match	pfrkt_ts.pfrts_match
 #define pfrkt_nomatch	pfrkt_ts.pfrts_nomatch
 #define pfrkt_tzero	pfrkt_ts.pfrts_tzero
 
 /* keep synced with pfi_kif, used in RB_FIND */
 struct pfi_kif_cmp {
 	char				 pfik_name[IFNAMSIZ];
 };
 
 struct pfi_kif {
 	char				 pfik_name[IFNAMSIZ];
 	union {
 		RB_ENTRY(pfi_kif)	 _pfik_tree;
 		LIST_ENTRY(pfi_kif)	 _pfik_list;
 	} _pfik_glue;
 #define	pfik_tree	_pfik_glue._pfik_tree
 #define	pfik_list	_pfik_glue._pfik_list
 	u_int64_t			 pfik_packets[2][2][2];
 	u_int64_t			 pfik_bytes[2][2][2];
 	u_int32_t			 pfik_tzero;
 	u_int				 pfik_flags;
 	struct ifnet			*pfik_ifp;
 	struct ifg_group		*pfik_group;
 	u_int				 pfik_rulerefs;
 	TAILQ_HEAD(, pfi_dynaddr)	 pfik_dynaddrs;
 };
 
 #define	PFI_IFLAG_REFS		0x0001	/* has state references */
 #define PFI_IFLAG_SKIP		0x0100	/* skip filtering on interface */
 
 struct pf_pdesc {
 	struct {
 		int	 done;
 		uid_t	 uid;
 		gid_t	 gid;
 	}		 lookup;
 	u_int64_t	 tot_len;	/* Make Mickey money */
 	union {
 		struct tcphdr		*tcp;
 		struct udphdr		*udp;
 		struct icmp		*icmp;
 #ifdef INET6
 		struct icmp6_hdr	*icmp6;
 #endif /* INET6 */
 		void			*any;
 	} hdr;
 
 	struct pf_rule	*nat_rule;	/* nat/rdr rule applied to packet */
 	struct pf_addr	*src;		/* src address */
 	struct pf_addr	*dst;		/* dst address */
 	u_int16_t *sport;
 	u_int16_t *dport;
 	struct pf_mtag	*pf_mtag;
 
 	u_int32_t	 p_len;		/* total length of payload */
 
 	u_int16_t	*ip_sum;
 	u_int16_t	*proto_sum;
 	u_int16_t	 flags;		/* Let SCRUB trigger behavior in
 					 * state code. Easier than tags */
 #define PFDESC_TCP_NORM	0x0001		/* TCP shall be statefully scrubbed */
 #define PFDESC_IP_REAS	0x0002		/* IP frags would've been reassembled */
 	sa_family_t	 af;
 	u_int8_t	 proto;
 	u_int8_t	 tos;
 	u_int8_t	 dir;		/* direction */
 	u_int8_t	 sidx;		/* key index for source */
 	u_int8_t	 didx;		/* key index for destination */
 };
 
 /* flags for RDR options */
 #define PF_DPORT_RANGE	0x01		/* Dest port uses range */
 #define PF_RPORT_RANGE	0x02		/* RDR'ed port uses range */
 
-/* Reasons code for passing/dropping a packet */
-#define PFRES_MATCH	0		/* Explicit match of a rule */
-#define PFRES_BADOFF	1		/* Bad offset for pull_hdr */
-#define PFRES_FRAG	2		/* Dropping following fragment */
-#define PFRES_SHORT	3		/* Dropping short packet */
-#define PFRES_NORM	4		/* Dropping by normalizer */
-#define PFRES_MEMORY	5		/* Dropped due to lacking mem */
-#define PFRES_TS	6		/* Bad TCP Timestamp (RFC1323) */
-#define PFRES_CONGEST	7		/* Congestion (of ipintrq) */
-#define PFRES_IPOPTIONS 8		/* IP option */
-#define PFRES_PROTCKSUM 9		/* Protocol checksum invalid */
-#define PFRES_BADSTATE	10		/* State mismatch */
-#define PFRES_STATEINS	11		/* State insertion failure */
-#define PFRES_MAXSTATES	12		/* State limit */
-#define PFRES_SRCLIMIT	13		/* Source node/conn limit */
-#define PFRES_SYNPROXY	14		/* SYN proxy */
-#define PFRES_MAX	15		/* total+1 */
-
-#define PFRES_NAMES { \
-	"match", \
-	"bad-offset", \
-	"fragment", \
-	"short", \
-	"normalize", \
-	"memory", \
-	"bad-timestamp", \
-	"congestion", \
-	"ip-option", \
-	"proto-cksum", \
-	"state-mismatch", \
-	"state-insert", \
-	"state-limit", \
-	"src-limit", \
-	"synproxy", \
-	NULL \
-}
-
 /* Counters for other things we want to keep track of */
 #define LCNT_STATES		0	/* states */
 #define LCNT_SRCSTATES		1	/* max-src-states */
 #define LCNT_SRCNODES		2	/* max-src-nodes */
 #define LCNT_SRCCONN		3	/* max-src-conn */
 #define LCNT_SRCCONNRATE	4	/* max-src-conn-rate */
 #define LCNT_OVERLOAD_TABLE	5	/* entry added to overload table */
 #define LCNT_OVERLOAD_FLUSH	6	/* state entries flushed */
 #define LCNT_MAX		7	/* total+1 */
 
 #define LCNT_NAMES { \
 	"max states per rule", \
 	"max-src-states", \
 	"max-src-nodes", \
 	"max-src-conn", \
 	"max-src-conn-rate", \
 	"overload table insertion", \
 	"overload flush states", \
 	NULL \
 }
 
 /* UDP state enumeration */
 #define PFUDPS_NO_TRAFFIC	0
 #define PFUDPS_SINGLE		1
 #define PFUDPS_MULTIPLE		2
 
 #define PFUDPS_NSTATES		3	/* number of state levels */
 
 #define PFUDPS_NAMES { \
 	"NO_TRAFFIC", \
 	"SINGLE", \
 	"MULTIPLE", \
 	NULL \
 }
 
 /* Other protocol state enumeration */
 #define PFOTHERS_NO_TRAFFIC	0
 #define PFOTHERS_SINGLE		1
 #define PFOTHERS_MULTIPLE	2
 
 #define PFOTHERS_NSTATES	3	/* number of state levels */
 
 #define PFOTHERS_NAMES { \
 	"NO_TRAFFIC", \
 	"SINGLE", \
 	"MULTIPLE", \
 	NULL \
 }
 
 #define FCNT_STATE_SEARCH	0
 #define FCNT_STATE_INSERT	1
 #define FCNT_STATE_REMOVALS	2
 #define FCNT_MAX		3
 
 #define SCNT_SRC_NODE_SEARCH	0
 #define SCNT_SRC_NODE_INSERT	1
 #define SCNT_SRC_NODE_REMOVALS	2
 #define SCNT_MAX		3
 
 #define ACTION_SET(a, x) \
 	do { \
 		if ((a) != NULL) \
 			*(a) = (x); \
 	} while (0)
 
 #define REASON_SET(a, x) \
 	do { \
 		if ((a) != NULL) \
 			*(a) = (x); \
 		if (x < PFRES_MAX) \
 			V_pf_status.counters[x]++; \
 	} while (0)
 
 struct pf_status {
 	u_int64_t	counters[PFRES_MAX];
 	u_int64_t	lcounters[LCNT_MAX];	/* limit counters */
 	u_int64_t	fcounters[FCNT_MAX];
 	u_int64_t	scounters[SCNT_MAX];
 	u_int64_t	pcounters[2][2][3];
 	u_int64_t	bcounters[2][2];
 	u_int32_t	running;
 	u_int32_t	states;
 	u_int32_t	src_nodes;
 	u_int32_t	since;
 	u_int32_t	debug;
 	u_int32_t	hostid;
 	char		ifname[IFNAMSIZ];
 	u_int8_t	pf_chksum[PF_MD5_DIGEST_LENGTH];
-};
-
-struct cbq_opts {
-	u_int		minburst;
-	u_int		maxburst;
-	u_int		pktsize;
-	u_int		maxpktsize;
-	u_int		ns_per_byte;
-	u_int		maxidle;
-	int		minidle;
-	u_int		offtime;
-	int		flags;
-};
-
-struct priq_opts {
-	int		flags;
-};
-
-struct hfsc_opts {
-	/* real-time service curve */
-	u_int		rtsc_m1;	/* slope of the 1st segment in bps */
-	u_int		rtsc_d;		/* the x-projection of m1 in msec */
-	u_int		rtsc_m2;	/* slope of the 2nd segment in bps */
-	/* link-sharing service curve */
-	u_int		lssc_m1;
-	u_int		lssc_d;
-	u_int		lssc_m2;
-	/* upper-limit service curve */
-	u_int		ulsc_m1;
-	u_int		ulsc_d;
-	u_int		ulsc_m2;
-	int		flags;
-};
-
-struct pf_altq {
-	char			 ifname[IFNAMSIZ];
-
-	void			*altq_disc;	/* discipline-specific state */
-	TAILQ_ENTRY(pf_altq)	 entries;
-
-	/* scheduler spec */
-	u_int8_t		 scheduler;	/* scheduler type */
-	u_int16_t		 tbrsize;	/* tokenbucket regulator size */
-	u_int32_t		 ifbandwidth;	/* interface bandwidth */
-
-	/* queue spec */
-	char			 qname[PF_QNAME_SIZE];	/* queue name */
-	char			 parent[PF_QNAME_SIZE];	/* parent name */
-	u_int32_t		 parent_qid;	/* parent queue id */
-	u_int32_t		 bandwidth;	/* queue bandwidth */
-	u_int8_t		 priority;	/* priority */
-	u_int8_t		 local_flags;	/* dynamic interface */
-#define	PFALTQ_FLAG_IF_REMOVED		0x01
-
-	u_int16_t		 qlimit;	/* queue size limit */
-	u_int16_t		 flags;		/* misc flags */
-	union {
-		struct cbq_opts		 cbq_opts;
-		struct priq_opts	 priq_opts;
-		struct hfsc_opts	 hfsc_opts;
-	} pq_u;
-
-	u_int32_t		 qid;		/* return value */
 };
 
 struct pf_divert {
 	union {
 		struct in_addr	ipv4;
 		struct in6_addr	ipv6;
 	}		addr;
 	u_int16_t	port;
 };
 
 #define PFFRAG_FRENT_HIWAT	5000	/* Number of fragment entries */
 #define PFR_KENTRY_HIWAT	200000	/* Number of table entries */
 
 /*
  * ioctl parameter structures
  */
 
 struct pfioc_pooladdr {
 	u_int32_t		 action;
 	u_int32_t		 ticket;
 	u_int32_t		 nr;
 	u_int32_t		 r_num;
 	u_int8_t		 r_action;
 	u_int8_t		 r_last;
 	u_int8_t		 af;
 	char			 anchor[MAXPATHLEN];
 	struct pf_pooladdr	 addr;
 };
 
 struct pfioc_rule {
 	u_int32_t	 action;
 	u_int32_t	 ticket;
 	u_int32_t	 pool_ticket;
 	u_int32_t	 nr;
 	char		 anchor[MAXPATHLEN];
 	char		 anchor_call[MAXPATHLEN];
 	struct pf_rule	 rule;
 };
 
 struct pfioc_natlook {
 	struct pf_addr	 saddr;
 	struct pf_addr	 daddr;
 	struct pf_addr	 rsaddr;
 	struct pf_addr	 rdaddr;
 	u_int16_t	 sport;
 	u_int16_t	 dport;
 	u_int16_t	 rsport;
 	u_int16_t	 rdport;
 	sa_family_t	 af;
 	u_int8_t	 proto;
 	u_int8_t	 direction;
 };
 
 struct pfioc_state {
 	struct pfsync_state	state;
 };
 
 struct pfioc_src_node_kill {
 	sa_family_t psnk_af;
 	struct pf_rule_addr psnk_src;
 	struct pf_rule_addr psnk_dst;
 	u_int		    psnk_killed;
 };
 
 struct pfioc_state_kill {
 	struct pf_state_cmp	psk_pfcmp;
 	sa_family_t		psk_af;
 	int			psk_proto;
 	struct pf_rule_addr	psk_src;
 	struct pf_rule_addr	psk_dst;
 	char			psk_ifname[IFNAMSIZ];
 	char			psk_label[PF_RULE_LABEL_SIZE];
 	u_int			psk_killed;
 };
 
 struct pfioc_states {
 	int	ps_len;
 	union {
 		caddr_t			 psu_buf;
 		struct pfsync_state	*psu_states;
 	} ps_u;
 #define ps_buf		ps_u.psu_buf
 #define ps_states	ps_u.psu_states
 };
 
 struct pfioc_src_nodes {
 	int	psn_len;
 	union {
 		caddr_t		 psu_buf;
 		struct pf_src_node	*psu_src_nodes;
 	} psn_u;
 #define psn_buf		psn_u.psu_buf
 #define psn_src_nodes	psn_u.psu_src_nodes
 };
 
 struct pfioc_if {
 	char		 ifname[IFNAMSIZ];
 };
 
 struct pfioc_tm {
 	int		 timeout;
 	int		 seconds;
 };
 
 struct pfioc_limit {
 	int		 index;
 	unsigned	 limit;
 };
 
 struct pfioc_altq {
 	u_int32_t	 action;
 	u_int32_t	 ticket;
 	u_int32_t	 nr;
 	struct pf_altq	 altq;
 };
 
 struct pfioc_qstats {
 	u_int32_t	 ticket;
 	u_int32_t	 nr;
 	void		*buf;
 	int		 nbytes;
 	u_int8_t	 scheduler;
 };
 
 struct pfioc_ruleset {
 	u_int32_t	 nr;
 	char		 path[MAXPATHLEN];
 	char		 name[PF_ANCHOR_NAME_SIZE];
 };
 
 #define PF_RULESET_ALTQ		(PF_RULESET_MAX)
 #define PF_RULESET_TABLE	(PF_RULESET_MAX+1)
 struct pfioc_trans {
 	int		 size;	/* number of elements */
 	int		 esize; /* size of each element in bytes */
 	struct pfioc_trans_e {
 		int		rs_num;
 		char		anchor[MAXPATHLEN];
 		u_int32_t	ticket;
 	}		*array;
 };
 
 #define PFR_FLAG_ATOMIC		0x00000001	/* unused */
 #define PFR_FLAG_DUMMY		0x00000002
 #define PFR_FLAG_FEEDBACK	0x00000004
 #define PFR_FLAG_CLSTATS	0x00000008
 #define PFR_FLAG_ADDRSTOO	0x00000010
 #define PFR_FLAG_REPLACE	0x00000020
 #define PFR_FLAG_ALLRSETS	0x00000040
 #define PFR_FLAG_ALLMASK	0x0000007F
 #ifdef _KERNEL
 #define PFR_FLAG_USERIOCTL	0x10000000
 #endif
 
 struct pfioc_table {
 	struct pfr_table	 pfrio_table;
 	void			*pfrio_buffer;
 	int			 pfrio_esize;
 	int			 pfrio_size;
 	int			 pfrio_size2;
 	int			 pfrio_nadd;
 	int			 pfrio_ndel;
 	int			 pfrio_nchange;
 	int			 pfrio_flags;
 	u_int32_t		 pfrio_ticket;
 };
 #define	pfrio_exists	pfrio_nadd
 #define	pfrio_nzero	pfrio_nadd
 #define	pfrio_nmatch	pfrio_nadd
 #define pfrio_naddr	pfrio_size2
 #define pfrio_setflag	pfrio_size2
 #define pfrio_clrflag	pfrio_nadd
 
 struct pfioc_iface {
 	char	 pfiio_name[IFNAMSIZ];
 	void	*pfiio_buffer;
 	int	 pfiio_esize;
 	int	 pfiio_size;
 	int	 pfiio_nzero;
 	int	 pfiio_flags;
 };
 
 
 /*
  * ioctl operations
  */
 
 #define DIOCSTART	_IO  ('D',  1)
 #define DIOCSTOP	_IO  ('D',  2)
 #define DIOCADDRULE	_IOWR('D',  4, struct pfioc_rule)
 #define DIOCGETRULES	_IOWR('D',  6, struct pfioc_rule)
 #define DIOCGETRULE	_IOWR('D',  7, struct pfioc_rule)
 /* XXX cut 8 - 17 */
 #define DIOCCLRSTATES	_IOWR('D', 18, struct pfioc_state_kill)
 #define DIOCGETSTATE	_IOWR('D', 19, struct pfioc_state)
 #define DIOCSETSTATUSIF _IOWR('D', 20, struct pfioc_if)
 #define DIOCGETSTATUS	_IOWR('D', 21, struct pf_status)
 #define DIOCCLRSTATUS	_IO  ('D', 22)
 #define DIOCNATLOOK	_IOWR('D', 23, struct pfioc_natlook)
 #define DIOCSETDEBUG	_IOWR('D', 24, u_int32_t)
 #define DIOCGETSTATES	_IOWR('D', 25, struct pfioc_states)
 #define DIOCCHANGERULE	_IOWR('D', 26, struct pfioc_rule)
 /* XXX cut 26 - 28 */
 #define DIOCSETTIMEOUT	_IOWR('D', 29, struct pfioc_tm)
 #define DIOCGETTIMEOUT	_IOWR('D', 30, struct pfioc_tm)
 #define DIOCADDSTATE	_IOWR('D', 37, struct pfioc_state)
 #define DIOCCLRRULECTRS	_IO  ('D', 38)
 #define DIOCGETLIMIT	_IOWR('D', 39, struct pfioc_limit)
 #define DIOCSETLIMIT	_IOWR('D', 40, struct pfioc_limit)
 #define DIOCKILLSTATES	_IOWR('D', 41, struct pfioc_state_kill)
 #define DIOCSTARTALTQ	_IO  ('D', 42)
 #define DIOCSTOPALTQ	_IO  ('D', 43)
 #define DIOCADDALTQ	_IOWR('D', 45, struct pfioc_altq)
 #define DIOCGETALTQS	_IOWR('D', 47, struct pfioc_altq)
 #define DIOCGETALTQ	_IOWR('D', 48, struct pfioc_altq)
 #define DIOCCHANGEALTQ	_IOWR('D', 49, struct pfioc_altq)
 #define DIOCGETQSTATS	_IOWR('D', 50, struct pfioc_qstats)
 #define DIOCBEGINADDRS	_IOWR('D', 51, struct pfioc_pooladdr)
 #define DIOCADDADDR	_IOWR('D', 52, struct pfioc_pooladdr)
 #define DIOCGETADDRS	_IOWR('D', 53, struct pfioc_pooladdr)
 #define DIOCGETADDR	_IOWR('D', 54, struct pfioc_pooladdr)
 #define DIOCCHANGEADDR	_IOWR('D', 55, struct pfioc_pooladdr)
 /* XXX cut 55 - 57 */
 #define	DIOCGETRULESETS	_IOWR('D', 58, struct pfioc_ruleset)
 #define	DIOCGETRULESET	_IOWR('D', 59, struct pfioc_ruleset)
 #define	DIOCRCLRTABLES	_IOWR('D', 60, struct pfioc_table)
 #define	DIOCRADDTABLES	_IOWR('D', 61, struct pfioc_table)
 #define	DIOCRDELTABLES	_IOWR('D', 62, struct pfioc_table)
 #define	DIOCRGETTABLES	_IOWR('D', 63, struct pfioc_table)
 #define	DIOCRGETTSTATS	_IOWR('D', 64, struct pfioc_table)
 #define DIOCRCLRTSTATS	_IOWR('D', 65, struct pfioc_table)
 #define	DIOCRCLRADDRS	_IOWR('D', 66, struct pfioc_table)
 #define	DIOCRADDADDRS	_IOWR('D', 67, struct pfioc_table)
 #define	DIOCRDELADDRS	_IOWR('D', 68, struct pfioc_table)
 #define	DIOCRSETADDRS	_IOWR('D', 69, struct pfioc_table)
 #define	DIOCRGETADDRS	_IOWR('D', 70, struct pfioc_table)
 #define	DIOCRGETASTATS	_IOWR('D', 71, struct pfioc_table)
 #define	DIOCRCLRASTATS	_IOWR('D', 72, struct pfioc_table)
 #define	DIOCRTSTADDRS	_IOWR('D', 73, struct pfioc_table)
 #define	DIOCRSETTFLAGS	_IOWR('D', 74, struct pfioc_table)
 #define	DIOCRINADEFINE	_IOWR('D', 77, struct pfioc_table)
 #define	DIOCOSFPFLUSH	_IO('D', 78)
 #define	DIOCOSFPADD	_IOWR('D', 79, struct pf_osfp_ioctl)
 #define	DIOCOSFPGET	_IOWR('D', 80, struct pf_osfp_ioctl)
 #define	DIOCXBEGIN	_IOWR('D', 81, struct pfioc_trans)
 #define	DIOCXCOMMIT	_IOWR('D', 82, struct pfioc_trans)
 #define	DIOCXROLLBACK	_IOWR('D', 83, struct pfioc_trans)
 #define	DIOCGETSRCNODES	_IOWR('D', 84, struct pfioc_src_nodes)
 #define	DIOCCLRSRCNODES	_IO('D', 85)
 #define	DIOCSETHOSTID	_IOWR('D', 86, u_int32_t)
 #define	DIOCIGETIFACES	_IOWR('D', 87, struct pfioc_iface)
 #define	DIOCSETIFFLAG	_IOWR('D', 89, struct pfioc_iface)
 #define	DIOCCLRIFFLAG	_IOWR('D', 90, struct pfioc_iface)
 #define	DIOCKILLSRCNODES	_IOWR('D', 91, struct pfioc_src_node_kill)
 struct pf_ifspeed {
 	char			ifname[IFNAMSIZ];
 	u_int32_t		baudrate;
 };
 #define	DIOCGIFSPEED	_IOWR('D', 92, struct pf_ifspeed)
 
 #ifdef _KERNEL
 LIST_HEAD(pf_src_node_list, pf_src_node);
 struct pf_srchash {
 	struct pf_src_node_list		nodes;
 	struct mtx			lock;
 };
 
 struct pf_keyhash {
 	LIST_HEAD(, pf_state_key)	keys;
 	struct mtx			lock;
 };
 
 struct pf_idhash {
 	LIST_HEAD(, pf_state)		states;
 	struct mtx			lock;
 };
 
 #define	PF_HASHSIZ	(32768)
 VNET_DECLARE(struct pf_keyhash *, pf_keyhash);
 VNET_DECLARE(struct pf_idhash *, pf_idhash);
 VNET_DECLARE(u_long, pf_hashmask);
 #define V_pf_keyhash	VNET(pf_keyhash)
 #define	V_pf_idhash	VNET(pf_idhash)
 #define	V_pf_hashmask	VNET(pf_hashmask)
 VNET_DECLARE(struct pf_srchash *, pf_srchash);
 VNET_DECLARE(u_long, pf_srchashmask);
 #define	V_pf_srchash	VNET(pf_srchash)
 #define V_pf_srchashmask VNET(pf_srchashmask)
 
 #define PF_IDHASH(s)	(be64toh((s)->id) % (V_pf_hashmask + 1))
 
 VNET_DECLARE(void *, pf_swi_cookie);
 #define V_pf_swi_cookie	VNET(pf_swi_cookie)
 
 VNET_DECLARE(uint64_t, pf_stateid[MAXCPU]);
 #define	V_pf_stateid	VNET(pf_stateid)
 
 TAILQ_HEAD(pf_altqqueue, pf_altq);
 VNET_DECLARE(struct pf_altqqueue,	 pf_altqs[2]);
 #define	V_pf_altqs			 VNET(pf_altqs)
 VNET_DECLARE(struct pf_palist,		 pf_pabuf);
 #define	V_pf_pabuf			 VNET(pf_pabuf)
 
 VNET_DECLARE(u_int32_t,			 ticket_altqs_active);
 #define	V_ticket_altqs_active		 VNET(ticket_altqs_active)
 VNET_DECLARE(u_int32_t,			 ticket_altqs_inactive);
 #define	V_ticket_altqs_inactive		 VNET(ticket_altqs_inactive)
 VNET_DECLARE(int,			 altqs_inactive_open);
 #define	V_altqs_inactive_open		 VNET(altqs_inactive_open)
 VNET_DECLARE(u_int32_t,			 ticket_pabuf);
 #define	V_ticket_pabuf			 VNET(ticket_pabuf)
 VNET_DECLARE(struct pf_altqqueue *,	 pf_altqs_active);
 #define	V_pf_altqs_active		 VNET(pf_altqs_active)
 VNET_DECLARE(struct pf_altqqueue *,	 pf_altqs_inactive);
 #define	V_pf_altqs_inactive		 VNET(pf_altqs_inactive)
 
 VNET_DECLARE(struct pf_rulequeue, pf_unlinked_rules);
 #define	V_pf_unlinked_rules	VNET(pf_unlinked_rules)
 
 void				 pf_initialize(void);
 void				 pf_cleanup(void);
 
 struct pf_mtag			*pf_get_mtag(struct mbuf *);
 
 extern void			 pf_calc_skip_steps(struct pf_rulequeue *);
 #ifdef ALTQ
 extern	void			 pf_altq_ifnet_event(struct ifnet *, int);
 #endif
 VNET_DECLARE(uma_zone_t,	 pf_state_z);
 #define	V_pf_state_z		 VNET(pf_state_z)
 VNET_DECLARE(uma_zone_t,	 pf_state_key_z);
 #define	V_pf_state_key_z	 VNET(pf_state_key_z)
 VNET_DECLARE(uma_zone_t,	 pf_state_scrub_z);
 #define	V_pf_state_scrub_z	 VNET(pf_state_scrub_z)
 
 extern void			 pf_purge_thread(void *);
 extern void			 pf_intr(void *);
 extern void			 pf_purge_expired_src_nodes(void);
 
 extern int			 pf_unlink_state(struct pf_state *, u_int);
 #define	PF_ENTER_LOCKED		0x00000001
 #define	PF_RETURN_LOCKED	0x00000002
 extern int			 pf_state_insert(struct pfi_kif *,
 				    struct pf_state_key *,
 				    struct pf_state_key *,
 				    struct pf_state *);
 extern void			 pf_free_state(struct pf_state *);
 
 static __inline void
 pf_ref_state(struct pf_state *s)
 {
 
 	refcount_acquire(&s->refs);
 }
 
 static __inline int
 pf_release_state(struct pf_state *s)
 {
 
 	if (refcount_release(&s->refs)) {
 		pf_free_state(s);
 		return (1);
 	} else
 		return (0);
 }
 
 extern struct pf_state		*pf_find_state_byid(uint64_t, uint32_t);
 extern struct pf_state		*pf_find_state_all(struct pf_state_key_cmp *,
 				    u_int, int *);
 extern struct pf_src_node	*pf_find_src_node(struct pf_addr *,
 				    struct pf_rule *, sa_family_t, int);
 extern void			 pf_unlink_src_node(struct pf_src_node *);
 extern void			 pf_unlink_src_node_locked(struct pf_src_node *);
 extern u_int			 pf_free_src_nodes(struct pf_src_node_list *);
 extern void			 pf_print_state(struct pf_state *);
 extern void			 pf_print_flags(u_int8_t);
 extern u_int16_t		 pf_cksum_fixup(u_int16_t, u_int16_t, u_int16_t,
 				    u_int8_t);
 
 VNET_DECLARE(struct ifnet *,		 sync_ifp);
 #define	V_sync_ifp		 	 VNET(sync_ifp);
 VNET_DECLARE(struct pf_rule,		 pf_default_rule);
 #define	V_pf_default_rule		  VNET(pf_default_rule)
 extern void			 pf_addrcpy(struct pf_addr *, struct pf_addr *,
 				    u_int8_t);
 void				pf_free_rule(struct pf_rule *);
 
 #ifdef INET
 int	pf_test(int, struct ifnet *, struct mbuf **, struct inpcb *);
 #endif /* INET */
 
 #ifdef INET6
 int	pf_test6(int, struct ifnet *, struct mbuf **, struct inpcb *);
 void	pf_poolmask(struct pf_addr *, struct pf_addr*,
 	    struct pf_addr *, struct pf_addr *, u_int8_t);
 void	pf_addr_inc(struct pf_addr *, sa_family_t);
 #endif /* INET6 */
 
 u_int32_t	pf_new_isn(struct pf_state *);
 void   *pf_pull_hdr(struct mbuf *, int, void *, int, u_short *, u_short *,
 	    sa_family_t);
 void	pf_change_a(void *, u_int16_t *, u_int32_t, u_int8_t);
 void	pf_send_deferred_syn(struct pf_state *);
 int	pf_match_addr(u_int8_t, struct pf_addr *, struct pf_addr *,
 	    struct pf_addr *, sa_family_t);
 int	pf_match_addr_range(struct pf_addr *, struct pf_addr *,
 	    struct pf_addr *, sa_family_t);
 int	pf_match_port(u_int8_t, u_int16_t, u_int16_t, u_int16_t);
 
 void	pf_normalize_init(void);
 void	pf_normalize_cleanup(void);
 int	pf_normalize_ip(struct mbuf **, int, struct pfi_kif *, u_short *,
 	    struct pf_pdesc *);
 int	pf_normalize_ip6(struct mbuf **, int, struct pfi_kif *, u_short *,
 	    struct pf_pdesc *);
 int	pf_normalize_tcp(int, struct pfi_kif *, struct mbuf *, int, int, void *,
 	    struct pf_pdesc *);
 void	pf_normalize_tcp_cleanup(struct pf_state *);
 int	pf_normalize_tcp_init(struct mbuf *, int, struct pf_pdesc *,
 	    struct tcphdr *, struct pf_state_peer *, struct pf_state_peer *);
 int	pf_normalize_tcp_stateful(struct mbuf *, int, struct pf_pdesc *,
 	    u_short *, struct tcphdr *, struct pf_state *,
 	    struct pf_state_peer *, struct pf_state_peer *, int *);
 u_int32_t
 	pf_state_expires(const struct pf_state *);
 void	pf_purge_expired_fragments(void);
 int	pf_routable(struct pf_addr *addr, sa_family_t af, struct pfi_kif *,
 	    int);
 int	pf_socket_lookup(int, struct pf_pdesc *, struct mbuf *);
 struct pf_state_key *pf_alloc_state_key(int);
 void	pfr_initialize(void);
 void	pfr_cleanup(void);
 int	pfr_match_addr(struct pfr_ktable *, struct pf_addr *, sa_family_t);
 void	pfr_update_stats(struct pfr_ktable *, struct pf_addr *, sa_family_t,
 	    u_int64_t, int, int, int);
 int	pfr_pool_get(struct pfr_ktable *, int *, struct pf_addr *, sa_family_t);
 void	pfr_dynaddr_update(struct pfr_ktable *, struct pfi_dynaddr *);
 struct pfr_ktable *
 	pfr_attach_table(struct pf_ruleset *, char *);
 void	pfr_detach_table(struct pfr_ktable *);
 int	pfr_clr_tables(struct pfr_table *, int *, int);
 int	pfr_add_tables(struct pfr_table *, int, int *, int);
 int	pfr_del_tables(struct pfr_table *, int, int *, int);
 int	pfr_get_tables(struct pfr_table *, struct pfr_table *, int *, int);
 int	pfr_get_tstats(struct pfr_table *, struct pfr_tstats *, int *, int);
 int	pfr_clr_tstats(struct pfr_table *, int, int *, int);
 int	pfr_set_tflags(struct pfr_table *, int, int, int, int *, int *, int);
 int	pfr_clr_addrs(struct pfr_table *, int *, int);
 int	pfr_insert_kentry(struct pfr_ktable *, struct pfr_addr *, long);
 int	pfr_add_addrs(struct pfr_table *, struct pfr_addr *, int, int *,
 	    int);
 int	pfr_del_addrs(struct pfr_table *, struct pfr_addr *, int, int *,
 	    int);
 int	pfr_set_addrs(struct pfr_table *, struct pfr_addr *, int, int *,
 	    int *, int *, int *, int, u_int32_t);
 int	pfr_get_addrs(struct pfr_table *, struct pfr_addr *, int *, int);
 int	pfr_get_astats(struct pfr_table *, struct pfr_astats *, int *, int);
 int	pfr_clr_astats(struct pfr_table *, struct pfr_addr *, int, int *,
 	    int);
 int	pfr_tst_addrs(struct pfr_table *, struct pfr_addr *, int, int *,
 	    int);
 int	pfr_ina_begin(struct pfr_table *, u_int32_t *, int *, int);
 int	pfr_ina_rollback(struct pfr_table *, u_int32_t, int *, int);
 int	pfr_ina_commit(struct pfr_table *, u_int32_t, int *, int *, int);
 int	pfr_ina_define(struct pfr_table *, struct pfr_addr *, int, int *,
 	    int *, u_int32_t, int);
 
 MALLOC_DECLARE(PFI_MTYPE);
 VNET_DECLARE(struct pfi_kif *,		 pfi_all);
 #define	V_pfi_all	 		 VNET(pfi_all)
 
 void		 pfi_initialize(void);
 void		 pfi_cleanup(void);
 void		 pfi_kif_ref(struct pfi_kif *);
 void		 pfi_kif_unref(struct pfi_kif *);
 struct pfi_kif	*pfi_kif_find(const char *);
 struct pfi_kif	*pfi_kif_attach(struct pfi_kif *, const char *);
 int		 pfi_kif_match(struct pfi_kif *, struct pfi_kif *);
 void		 pfi_kif_purge(void);
 int		 pfi_match_addr(struct pfi_dynaddr *, struct pf_addr *,
 		    sa_family_t);
 int		 pfi_dynaddr_setup(struct pf_addr_wrap *, sa_family_t);
 void		 pfi_dynaddr_remove(struct pfi_dynaddr *);
 void		 pfi_dynaddr_copyout(struct pf_addr_wrap *);
 void		 pfi_update_status(const char *, struct pf_status *);
 void		 pfi_get_ifaces(const char *, struct pfi_kif *, int *);
 int		 pfi_set_flags(const char *, int);
 int		 pfi_clear_flags(const char *, int);
 
 int		 pf_match_tag(struct mbuf *, struct pf_rule *, int *, int);
 int		 pf_tag_packet(struct mbuf *, struct pf_pdesc *, int);
 void		 pf_qid2qname(u_int32_t, char *);
 
 VNET_DECLARE(struct pf_status,		 pf_status);
 #define	V_pf_status			 VNET(pf_status)
 
 struct pf_limit {
 	uma_zone_t	zone;
 	u_int		limit;
 };
 VNET_DECLARE(struct pf_limit, pf_limits[PF_LIMIT_MAX]);
 #define	V_pf_limits VNET(pf_limits)
 
 #endif /* _KERNEL */
 
 #ifdef _KERNEL
 VNET_DECLARE(struct pf_anchor_global,		 pf_anchors);
 #define	V_pf_anchors				 VNET(pf_anchors)
 VNET_DECLARE(struct pf_anchor,			 pf_main_anchor);
 #define	V_pf_main_anchor			 VNET(pf_main_anchor)
 #define pf_main_ruleset	V_pf_main_anchor.ruleset
 #endif
 
 /* these ruleset functions can be linked into userland programs (pfctl) */
 int			 pf_get_ruleset_number(u_int8_t);
 void			 pf_init_ruleset(struct pf_ruleset *);
 int			 pf_anchor_setup(struct pf_rule *,
 			    const struct pf_ruleset *, const char *);
 int			 pf_anchor_copyout(const struct pf_ruleset *,
 			    const struct pf_rule *, struct pfioc_rule *);
 void			 pf_anchor_remove(struct pf_rule *);
 void			 pf_remove_if_empty_ruleset(struct pf_ruleset *);
 struct pf_ruleset	*pf_find_ruleset(const char *);
 struct pf_ruleset	*pf_find_or_create_ruleset(const char *);
 void			 pf_rs_initialize(void);
 
 /* The fingerprint functions can be linked into userland programs (tcpdump) */
 int	pf_osfp_add(struct pf_osfp_ioctl *);
 #ifdef _KERNEL
 struct pf_osfp_enlist *
 	pf_osfp_fingerprint(struct pf_pdesc *, struct mbuf *, int,
 	    const struct tcphdr *);
 #endif /* _KERNEL */
 void	pf_osfp_flush(void);
 int	pf_osfp_get(struct pf_osfp_ioctl *);
 int	pf_osfp_match(struct pf_osfp_enlist *, pf_osfp_t);
 
 #ifdef _KERNEL
 void			 pf_print_host(struct pf_addr *, u_int16_t, u_int8_t);
 
 void			 pf_step_into_anchor(struct pf_anchor_stackframe *, int *,
 			    struct pf_ruleset **, int, struct pf_rule **,
 			    struct pf_rule **, int *);
 int			 pf_step_out_of_anchor(struct pf_anchor_stackframe *, int *,
 			    struct pf_ruleset **, int, struct pf_rule **,
 			    struct pf_rule **, int *);
 
 int			 pf_map_addr(u_int8_t, struct pf_rule *,
 			    struct pf_addr *, struct pf_addr *,
 			    struct pf_addr *, struct pf_src_node **);
 struct pf_rule		*pf_get_translation(struct pf_pdesc *, struct mbuf *,
 			    int, int, struct pfi_kif *, struct pf_src_node **,
 			    struct pf_state_key **, struct pf_state_key **,
 			    struct pf_addr *, struct pf_addr *,
 			    uint16_t, uint16_t, struct pf_anchor_stackframe *);
 
 struct pf_state_key	*pf_state_key_setup(struct pf_pdesc *, struct pf_addr *,
 			    struct pf_addr *, u_int16_t, u_int16_t);
 struct pf_state_key	*pf_state_key_clone(struct pf_state_key *);
 #endif /* _KERNEL */
 
 #endif /* _NET_PFVAR_H_ */
Index: stable/10/sys/netpfil/ipfw/ip_fw2.c
===================================================================
--- stable/10/sys/netpfil/ipfw/ip_fw2.c	(revision 263085)
+++ stable/10/sys/netpfil/ipfw/ip_fw2.c	(revision 263086)
@@ -1,2804 +1,2805 @@
 /*-
  * Copyright (c) 2002-2009 Luigi Rizzo, Universita` di Pisa
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 /*
  * The FreeBSD IP packet firewall, main file
  */
 
 #include "opt_ipfw.h"
 #include "opt_ipdivert.h"
 #include "opt_inet.h"
 #ifndef INET
 #error "IPFIREWALL requires INET"
 #endif /* INET */
 #include "opt_inet6.h"
 #include "opt_ipsec.h"
 
 #include <sys/param.h>
 #include <sys/systm.h>
 #include <sys/condvar.h>
 #include <sys/eventhandler.h>
 #include <sys/malloc.h>
 #include <sys/mbuf.h>
 #include <sys/kernel.h>
 #include <sys/lock.h>
 #include <sys/jail.h>
 #include <sys/module.h>
 #include <sys/priv.h>
 #include <sys/proc.h>
 #include <sys/rwlock.h>
 #include <sys/socket.h>
 #include <sys/socketvar.h>
 #include <sys/sysctl.h>
 #include <sys/syslog.h>
 #include <sys/ucred.h>
 #include <net/ethernet.h> /* for ETHERTYPE_IP */
 #include <net/if.h>
 #include <net/route.h>
-#include <net/pf_mtag.h>
 #include <net/pfil.h>
 #include <net/vnet.h>
+
+#include <netpfil/pf/pf_mtag.h>
 
 #include <netinet/in.h>
 #include <netinet/in_var.h>
 #include <netinet/in_pcb.h>
 #include <netinet/ip.h>
 #include <netinet/ip_var.h>
 #include <netinet/ip_icmp.h>
 #include <netinet/ip_fw.h>
 #include <netinet/ip_carp.h>
 #include <netinet/pim.h>
 #include <netinet/tcp_var.h>
 #include <netinet/udp.h>
 #include <netinet/udp_var.h>
 #include <netinet/sctp.h>
 
 #include <netinet/ip6.h>
 #include <netinet/icmp6.h>
 #ifdef INET6
 #include <netinet6/in6_pcb.h>
 #include <netinet6/scope6_var.h>
 #include <netinet6/ip6_var.h>
 #endif
 
 #include <netpfil/ipfw/ip_fw_private.h>
 
 #include <machine/in_cksum.h>	/* XXX for in_cksum */
 
 #ifdef MAC
 #include <security/mac/mac_framework.h>
 #endif
 
 /*
  * static variables followed by global ones.
  * All ipfw global variables are here.
  */
 
 /* ipfw_vnet_ready controls when we are open for business */
 static VNET_DEFINE(int, ipfw_vnet_ready) = 0;
 #define	V_ipfw_vnet_ready	VNET(ipfw_vnet_ready)
 
 static VNET_DEFINE(int, fw_deny_unknown_exthdrs);
 #define	V_fw_deny_unknown_exthdrs	VNET(fw_deny_unknown_exthdrs)
 
 static VNET_DEFINE(int, fw_permit_single_frag6) = 1;
 #define	V_fw_permit_single_frag6	VNET(fw_permit_single_frag6)
 
 #ifdef IPFIREWALL_DEFAULT_TO_ACCEPT
 static int default_to_accept = 1;
 #else
 static int default_to_accept;
 #endif
 
 VNET_DEFINE(int, autoinc_step);
 VNET_DEFINE(int, fw_one_pass) = 1;
 
 VNET_DEFINE(unsigned int, fw_tables_max);
 /* Use 128 tables by default */
 static unsigned int default_fw_tables = IPFW_TABLES_DEFAULT;
 
 /*
  * Each rule belongs to one of 32 different sets (0..31).
  * The variable set_disable contains one bit per set.
  * If the bit is set, all rules in the corresponding set
  * are disabled. Set RESVD_SET(31) is reserved for the default rule
  * and rules that are not deleted by the flush command,
  * and CANNOT be disabled.
  * Rules in set RESVD_SET can only be deleted individually.
  */
 VNET_DEFINE(u_int32_t, set_disable);
 #define	V_set_disable			VNET(set_disable)
 
 VNET_DEFINE(int, fw_verbose);
 /* counter for ipfw_log(NULL...) */
 VNET_DEFINE(u_int64_t, norule_counter);
 VNET_DEFINE(int, verbose_limit);
 
 /* layer3_chain contains the list of rules for layer 3 */
 VNET_DEFINE(struct ip_fw_chain, layer3_chain);
 
 VNET_DEFINE(int, ipfw_nat_ready) = 0;
 
 ipfw_nat_t *ipfw_nat_ptr = NULL;
 struct cfg_nat *(*lookup_nat_ptr)(struct nat_list *, int);
 ipfw_nat_cfg_t *ipfw_nat_cfg_ptr;
 ipfw_nat_cfg_t *ipfw_nat_del_ptr;
 ipfw_nat_cfg_t *ipfw_nat_get_cfg_ptr;
 ipfw_nat_cfg_t *ipfw_nat_get_log_ptr;
 
 #ifdef SYSCTL_NODE
 uint32_t dummy_def = IPFW_DEFAULT_RULE;
 static int sysctl_ipfw_table_num(SYSCTL_HANDLER_ARGS);
 
 SYSBEGIN(f3)
 
 SYSCTL_NODE(_net_inet_ip, OID_AUTO, fw, CTLFLAG_RW, 0, "Firewall");
 SYSCTL_VNET_INT(_net_inet_ip_fw, OID_AUTO, one_pass,
     CTLFLAG_RW | CTLFLAG_SECURE3, &VNET_NAME(fw_one_pass), 0,
     "Only do a single pass through ipfw when using dummynet(4)");
 SYSCTL_VNET_INT(_net_inet_ip_fw, OID_AUTO, autoinc_step,
     CTLFLAG_RW, &VNET_NAME(autoinc_step), 0,
     "Rule number auto-increment step");
 SYSCTL_VNET_INT(_net_inet_ip_fw, OID_AUTO, verbose,
     CTLFLAG_RW | CTLFLAG_SECURE3, &VNET_NAME(fw_verbose), 0,
     "Log matches to ipfw rules");
 SYSCTL_VNET_INT(_net_inet_ip_fw, OID_AUTO, verbose_limit,
     CTLFLAG_RW, &VNET_NAME(verbose_limit), 0,
     "Set upper limit of matches of ipfw rules logged");
 SYSCTL_UINT(_net_inet_ip_fw, OID_AUTO, default_rule, CTLFLAG_RD,
     &dummy_def, 0,
     "The default/max possible rule number.");
 SYSCTL_VNET_PROC(_net_inet_ip_fw, OID_AUTO, tables_max,
     CTLTYPE_UINT|CTLFLAG_RW, 0, 0, sysctl_ipfw_table_num, "IU",
     "Maximum number of tables");
 SYSCTL_INT(_net_inet_ip_fw, OID_AUTO, default_to_accept, CTLFLAG_RDTUN,
     &default_to_accept, 0,
     "Make the default rule accept all packets.");
 TUNABLE_INT("net.inet.ip.fw.default_to_accept", &default_to_accept);
 TUNABLE_INT("net.inet.ip.fw.tables_max", (int *)&default_fw_tables);
 SYSCTL_VNET_INT(_net_inet_ip_fw, OID_AUTO, static_count,
     CTLFLAG_RD, &VNET_NAME(layer3_chain.n_rules), 0,
     "Number of static rules");
 
 #ifdef INET6
 SYSCTL_DECL(_net_inet6_ip6);
 SYSCTL_NODE(_net_inet6_ip6, OID_AUTO, fw, CTLFLAG_RW, 0, "Firewall");
 SYSCTL_VNET_INT(_net_inet6_ip6_fw, OID_AUTO, deny_unknown_exthdrs,
     CTLFLAG_RW | CTLFLAG_SECURE, &VNET_NAME(fw_deny_unknown_exthdrs), 0,
     "Deny packets with unknown IPv6 Extension Headers");
 SYSCTL_VNET_INT(_net_inet6_ip6_fw, OID_AUTO, permit_single_frag6,
     CTLFLAG_RW | CTLFLAG_SECURE, &VNET_NAME(fw_permit_single_frag6), 0,
     "Permit single packet IPv6 fragments");
 #endif /* INET6 */
 
 SYSEND
 
 #endif /* SYSCTL_NODE */
 
 
 /*
  * Some macros used in the various matching options.
  * L3HDR maps an ipv4 pointer into a layer3 header pointer of type T
  * Other macros just cast void * into the appropriate type
  */
 #define	L3HDR(T, ip)	((T *)((u_int32_t *)(ip) + (ip)->ip_hl))
 #define	TCP(p)		((struct tcphdr *)(p))
 #define	SCTP(p)		((struct sctphdr *)(p))
 #define	UDP(p)		((struct udphdr *)(p))
 #define	ICMP(p)		((struct icmphdr *)(p))
 #define	ICMP6(p)	((struct icmp6_hdr *)(p))
 
 static __inline int
 icmptype_match(struct icmphdr *icmp, ipfw_insn_u32 *cmd)
 {
 	int type = icmp->icmp_type;
 
 	return (type <= ICMP_MAXTYPE && (cmd->d[0] & (1<<type)) );
 }
 
 #define TT	( (1 << ICMP_ECHO) | (1 << ICMP_ROUTERSOLICIT) | \
     (1 << ICMP_TSTAMP) | (1 << ICMP_IREQ) | (1 << ICMP_MASKREQ) )
 
 static int
 is_icmp_query(struct icmphdr *icmp)
 {
 	int type = icmp->icmp_type;
 
 	return (type <= ICMP_MAXTYPE && (TT & (1<<type)) );
 }
 #undef TT
 
 /*
  * The following checks use two arrays of 8 or 16 bits to store the
  * bits that we want set or clear, respectively. They are in the
  * low and high half of cmd->arg1 or cmd->d[0].
  *
  * We scan options and store the bits we find set. We succeed if
  *
  *	(want_set & ~bits) == 0 && (want_clear & ~bits) == want_clear
  *
  * The code is sometimes optimized not to store additional variables.
  */
 
 static int
 flags_match(ipfw_insn *cmd, u_int8_t bits)
 {
 	u_char want_clear;
 	bits = ~bits;
 
 	if ( ((cmd->arg1 & 0xff) & bits) != 0)
 		return 0; /* some bits we want set were clear */
 	want_clear = (cmd->arg1 >> 8) & 0xff;
 	if ( (want_clear & bits) != want_clear)
 		return 0; /* some bits we want clear were set */
 	return 1;
 }
 
 static int
 ipopts_match(struct ip *ip, ipfw_insn *cmd)
 {
 	int optlen, bits = 0;
 	u_char *cp = (u_char *)(ip + 1);
 	int x = (ip->ip_hl << 2) - sizeof (struct ip);
 
 	for (; x > 0; x -= optlen, cp += optlen) {
 		int opt = cp[IPOPT_OPTVAL];
 
 		if (opt == IPOPT_EOL)
 			break;
 		if (opt == IPOPT_NOP)
 			optlen = 1;
 		else {
 			optlen = cp[IPOPT_OLEN];
 			if (optlen <= 0 || optlen > x)
 				return 0; /* invalid or truncated */
 		}
 		switch (opt) {
 
 		default:
 			break;
 
 		case IPOPT_LSRR:
 			bits |= IP_FW_IPOPT_LSRR;
 			break;
 
 		case IPOPT_SSRR:
 			bits |= IP_FW_IPOPT_SSRR;
 			break;
 
 		case IPOPT_RR:
 			bits |= IP_FW_IPOPT_RR;
 			break;
 
 		case IPOPT_TS:
 			bits |= IP_FW_IPOPT_TS;
 			break;
 		}
 	}
 	return (flags_match(cmd, bits));
 }
 
 static int
 tcpopts_match(struct tcphdr *tcp, ipfw_insn *cmd)
 {
 	int optlen, bits = 0;
 	u_char *cp = (u_char *)(tcp + 1);
 	int x = (tcp->th_off << 2) - sizeof(struct tcphdr);
 
 	for (; x > 0; x -= optlen, cp += optlen) {
 		int opt = cp[0];
 		if (opt == TCPOPT_EOL)
 			break;
 		if (opt == TCPOPT_NOP)
 			optlen = 1;
 		else {
 			optlen = cp[1];
 			if (optlen <= 0)
 				break;
 		}
 
 		switch (opt) {
 
 		default:
 			break;
 
 		case TCPOPT_MAXSEG:
 			bits |= IP_FW_TCPOPT_MSS;
 			break;
 
 		case TCPOPT_WINDOW:
 			bits |= IP_FW_TCPOPT_WINDOW;
 			break;
 
 		case TCPOPT_SACK_PERMITTED:
 		case TCPOPT_SACK:
 			bits |= IP_FW_TCPOPT_SACK;
 			break;
 
 		case TCPOPT_TIMESTAMP:
 			bits |= IP_FW_TCPOPT_TS;
 			break;
 
 		}
 	}
 	return (flags_match(cmd, bits));
 }
 
 static int
 iface_match(struct ifnet *ifp, ipfw_insn_if *cmd, struct ip_fw_chain *chain, uint32_t *tablearg)
 {
 	if (ifp == NULL)	/* no iface with this packet, match fails */
 		return 0;
 	/* Check by name or by IP address */
 	if (cmd->name[0] != '\0') { /* match by name */
 		if (cmd->name[0] == '\1') /* use tablearg to match */
 			return ipfw_lookup_table_extended(chain, cmd->p.glob,
 				ifp->if_xname, tablearg, IPFW_TABLE_INTERFACE);
 		/* Check name */
 		if (cmd->p.glob) {
 			if (fnmatch(cmd->name, ifp->if_xname, 0) == 0)
 				return(1);
 		} else {
 			if (strncmp(ifp->if_xname, cmd->name, IFNAMSIZ) == 0)
 				return(1);
 		}
 	} else {
 #ifdef __FreeBSD__	/* and OSX too ? */
 		struct ifaddr *ia;
 
 		if_addr_rlock(ifp);
 		TAILQ_FOREACH(ia, &ifp->if_addrhead, ifa_link) {
 			if (ia->ifa_addr->sa_family != AF_INET)
 				continue;
 			if (cmd->p.ip.s_addr == ((struct sockaddr_in *)
 			    (ia->ifa_addr))->sin_addr.s_addr) {
 				if_addr_runlock(ifp);
 				return(1);	/* match */
 			}
 		}
 		if_addr_runlock(ifp);
 #endif /* __FreeBSD__ */
 	}
 	return(0);	/* no match, fail ... */
 }
 
 /*
  * The verify_path function checks if a route to the src exists and
  * if it is reachable via ifp (when provided).
  * 
  * The 'verrevpath' option checks that the interface that an IP packet
  * arrives on is the same interface that traffic destined for the
  * packet's source address would be routed out of.
  * The 'versrcreach' option just checks that the source address is
  * reachable via any route (except default) in the routing table.
  * These two are a measure to block forged packets. This is also
  * commonly known as "anti-spoofing" or Unicast Reverse Path
  * Forwarding (Unicast RFP) in Cisco-ese. The name of the knobs
  * is purposely reminiscent of the Cisco IOS command,
  *
  *   ip verify unicast reverse-path
  *   ip verify unicast source reachable-via any
  *
  * which implements the same functionality. But note that the syntax
  * is misleading, and the check may be performed on all IP packets
  * whether unicast, multicast, or broadcast.
  */
 static int
 verify_path(struct in_addr src, struct ifnet *ifp, u_int fib)
 {
 #ifndef __FreeBSD__
 	return 0;
 #else
 	struct route ro;
 	struct sockaddr_in *dst;
 
 	bzero(&ro, sizeof(ro));
 
 	dst = (struct sockaddr_in *)&(ro.ro_dst);
 	dst->sin_family = AF_INET;
 	dst->sin_len = sizeof(*dst);
 	dst->sin_addr = src;
 	in_rtalloc_ign(&ro, 0, fib);
 
 	if (ro.ro_rt == NULL)
 		return 0;
 
 	/*
 	 * If ifp is provided, check for equality with rtentry.
 	 * We should use rt->rt_ifa->ifa_ifp, instead of rt->rt_ifp,
 	 * in order to pass packets injected back by if_simloop():
 	 * if useloopback == 1 routing entry (via lo0) for our own address
 	 * may exist, so we need to handle routing assymetry.
 	 */
 	if (ifp != NULL && ro.ro_rt->rt_ifa->ifa_ifp != ifp) {
 		RTFREE(ro.ro_rt);
 		return 0;
 	}
 
 	/* if no ifp provided, check if rtentry is not default route */
 	if (ifp == NULL &&
 	     satosin(rt_key(ro.ro_rt))->sin_addr.s_addr == INADDR_ANY) {
 		RTFREE(ro.ro_rt);
 		return 0;
 	}
 
 	/* or if this is a blackhole/reject route */
 	if (ifp == NULL && ro.ro_rt->rt_flags & (RTF_REJECT|RTF_BLACKHOLE)) {
 		RTFREE(ro.ro_rt);
 		return 0;
 	}
 
 	/* found valid route */
 	RTFREE(ro.ro_rt);
 	return 1;
 #endif /* __FreeBSD__ */
 }
 
 #ifdef INET6
 /*
  * ipv6 specific rules here...
  */
 static __inline int
 icmp6type_match (int type, ipfw_insn_u32 *cmd)
 {
 	return (type <= ICMP6_MAXTYPE && (cmd->d[type/32] & (1<<(type%32)) ) );
 }
 
 static int
 flow6id_match( int curr_flow, ipfw_insn_u32 *cmd )
 {
 	int i;
 	for (i=0; i <= cmd->o.arg1; ++i )
 		if (curr_flow == cmd->d[i] )
 			return 1;
 	return 0;
 }
 
 /* support for IP6_*_ME opcodes */
 static int
 search_ip6_addr_net (struct in6_addr * ip6_addr)
 {
 	struct ifnet *mdc;
 	struct ifaddr *mdc2;
 	struct in6_ifaddr *fdm;
 	struct in6_addr copia;
 
 	TAILQ_FOREACH(mdc, &V_ifnet, if_link) {
 		if_addr_rlock(mdc);
 		TAILQ_FOREACH(mdc2, &mdc->if_addrhead, ifa_link) {
 			if (mdc2->ifa_addr->sa_family == AF_INET6) {
 				fdm = (struct in6_ifaddr *)mdc2;
 				copia = fdm->ia_addr.sin6_addr;
 				/* need for leaving scope_id in the sock_addr */
 				in6_clearscope(&copia);
 				if (IN6_ARE_ADDR_EQUAL(ip6_addr, &copia)) {
 					if_addr_runlock(mdc);
 					return 1;
 				}
 			}
 		}
 		if_addr_runlock(mdc);
 	}
 	return 0;
 }
 
 static int
 verify_path6(struct in6_addr *src, struct ifnet *ifp, u_int fib)
 {
 	struct route_in6 ro;
 	struct sockaddr_in6 *dst;
 
 	bzero(&ro, sizeof(ro));
 
 	dst = (struct sockaddr_in6 * )&(ro.ro_dst);
 	dst->sin6_family = AF_INET6;
 	dst->sin6_len = sizeof(*dst);
 	dst->sin6_addr = *src;
 
 	in6_rtalloc_ign(&ro, 0, fib);
 	if (ro.ro_rt == NULL)
 		return 0;
 
 	/* 
 	 * if ifp is provided, check for equality with rtentry
 	 * We should use rt->rt_ifa->ifa_ifp, instead of rt->rt_ifp,
 	 * to support the case of sending packets to an address of our own.
 	 * (where the former interface is the first argument of if_simloop()
 	 *  (=ifp), the latter is lo0)
 	 */
 	if (ifp != NULL && ro.ro_rt->rt_ifa->ifa_ifp != ifp) {
 		RTFREE(ro.ro_rt);
 		return 0;
 	}
 
 	/* if no ifp provided, check if rtentry is not default route */
 	if (ifp == NULL &&
 	    IN6_IS_ADDR_UNSPECIFIED(&satosin6(rt_key(ro.ro_rt))->sin6_addr)) {
 		RTFREE(ro.ro_rt);
 		return 0;
 	}
 
 	/* or if this is a blackhole/reject route */
 	if (ifp == NULL && ro.ro_rt->rt_flags & (RTF_REJECT|RTF_BLACKHOLE)) {
 		RTFREE(ro.ro_rt);
 		return 0;
 	}
 
 	/* found valid route */
 	RTFREE(ro.ro_rt);
 	return 1;
 
 }
 
 static int
 is_icmp6_query(int icmp6_type)
 {
 	if ((icmp6_type <= ICMP6_MAXTYPE) &&
 	    (icmp6_type == ICMP6_ECHO_REQUEST ||
 	    icmp6_type == ICMP6_MEMBERSHIP_QUERY ||
 	    icmp6_type == ICMP6_WRUREQUEST ||
 	    icmp6_type == ICMP6_FQDN_QUERY ||
 	    icmp6_type == ICMP6_NI_QUERY))
 		return (1);
 
 	return (0);
 }
 
 static void
 send_reject6(struct ip_fw_args *args, int code, u_int hlen, struct ip6_hdr *ip6)
 {
 	struct mbuf *m;
 
 	m = args->m;
 	if (code == ICMP6_UNREACH_RST && args->f_id.proto == IPPROTO_TCP) {
 		struct tcphdr *tcp;
 		tcp = (struct tcphdr *)((char *)ip6 + hlen);
 
 		if ((tcp->th_flags & TH_RST) == 0) {
 			struct mbuf *m0;
 			m0 = ipfw_send_pkt(args->m, &(args->f_id),
 			    ntohl(tcp->th_seq), ntohl(tcp->th_ack),
 			    tcp->th_flags | TH_RST);
 			if (m0 != NULL)
 				ip6_output(m0, NULL, NULL, 0, NULL, NULL,
 				    NULL);
 		}
 		FREE_PKT(m);
 	} else if (code != ICMP6_UNREACH_RST) { /* Send an ICMPv6 unreach. */
 #if 0
 		/*
 		 * Unlike above, the mbufs need to line up with the ip6 hdr,
 		 * as the contents are read. We need to m_adj() the
 		 * needed amount.
 		 * The mbuf will however be thrown away so we can adjust it.
 		 * Remember we did an m_pullup on it already so we
 		 * can make some assumptions about contiguousness.
 		 */
 		if (args->L3offset)
 			m_adj(m, args->L3offset);
 #endif
 		icmp6_error(m, ICMP6_DST_UNREACH, code, 0);
 	} else
 		FREE_PKT(m);
 
 	args->m = NULL;
 }
 
 #endif /* INET6 */
 
 
 /*
  * sends a reject message, consuming the mbuf passed as an argument.
  */
 static void
 send_reject(struct ip_fw_args *args, int code, int iplen, struct ip *ip)
 {
 
 #if 0
 	/* XXX When ip is not guaranteed to be at mtod() we will
 	 * need to account for this */
 	 * The mbuf will however be thrown away so we can adjust it.
 	 * Remember we did an m_pullup on it already so we
 	 * can make some assumptions about contiguousness.
 	 */
 	if (args->L3offset)
 		m_adj(m, args->L3offset);
 #endif
 	if (code != ICMP_REJECT_RST) { /* Send an ICMP unreach */
 		icmp_error(args->m, ICMP_UNREACH, code, 0L, 0);
 	} else if (args->f_id.proto == IPPROTO_TCP) {
 		struct tcphdr *const tcp =
 		    L3HDR(struct tcphdr, mtod(args->m, struct ip *));
 		if ( (tcp->th_flags & TH_RST) == 0) {
 			struct mbuf *m;
 			m = ipfw_send_pkt(args->m, &(args->f_id),
 				ntohl(tcp->th_seq), ntohl(tcp->th_ack),
 				tcp->th_flags | TH_RST);
 			if (m != NULL)
 				ip_output(m, NULL, NULL, 0, NULL, NULL);
 		}
 		FREE_PKT(args->m);
 	} else
 		FREE_PKT(args->m);
 	args->m = NULL;
 }
 
 /*
  * Support for uid/gid/jail lookup. These tests are expensive
  * (because we may need to look into the list of active sockets)
  * so we cache the results. ugid_lookupp is 0 if we have not
  * yet done a lookup, 1 if we succeeded, and -1 if we tried
  * and failed. The function always returns the match value.
  * We could actually spare the variable and use *uc, setting
  * it to '(void *)check_uidgid if we have no info, NULL if
  * we tried and failed, or any other value if successful.
  */
 static int
 check_uidgid(ipfw_insn_u32 *insn, struct ip_fw_args *args, int *ugid_lookupp,
     struct ucred **uc)
 {
 #ifndef __FreeBSD__
 	/* XXX */
 	return cred_check(insn, proto, oif,
 	    dst_ip, dst_port, src_ip, src_port,
 	    (struct bsd_ucred *)uc, ugid_lookupp, ((struct mbuf *)inp)->m_skb);
 #else  /* FreeBSD */
 	struct in_addr src_ip, dst_ip;
 	struct inpcbinfo *pi;
 	struct ipfw_flow_id *id;
 	struct inpcb *pcb, *inp;
 	struct ifnet *oif;
 	int lookupflags;
 	int match;
 
 	id = &args->f_id;
 	inp = args->inp;
 	oif = args->oif;
 
 	/*
 	 * Check to see if the UDP or TCP stack supplied us with
 	 * the PCB. If so, rather then holding a lock and looking
 	 * up the PCB, we can use the one that was supplied.
 	 */
 	if (inp && *ugid_lookupp == 0) {
 		INP_LOCK_ASSERT(inp);
 		if (inp->inp_socket != NULL) {
 			*uc = crhold(inp->inp_cred);
 			*ugid_lookupp = 1;
 		} else
 			*ugid_lookupp = -1;
 	}
 	/*
 	 * If we have already been here and the packet has no
 	 * PCB entry associated with it, then we can safely
 	 * assume that this is a no match.
 	 */
 	if (*ugid_lookupp == -1)
 		return (0);
 	if (id->proto == IPPROTO_TCP) {
 		lookupflags = 0;
 		pi = &V_tcbinfo;
 	} else if (id->proto == IPPROTO_UDP) {
 		lookupflags = INPLOOKUP_WILDCARD;
 		pi = &V_udbinfo;
 	} else
 		return 0;
 	lookupflags |= INPLOOKUP_RLOCKPCB;
 	match = 0;
 	if (*ugid_lookupp == 0) {
 		if (id->addr_type == 6) {
 #ifdef INET6
 			if (oif == NULL)
 				pcb = in6_pcblookup_mbuf(pi,
 				    &id->src_ip6, htons(id->src_port),
 				    &id->dst_ip6, htons(id->dst_port),
 				    lookupflags, oif, args->m);
 			else
 				pcb = in6_pcblookup_mbuf(pi,
 				    &id->dst_ip6, htons(id->dst_port),
 				    &id->src_ip6, htons(id->src_port),
 				    lookupflags, oif, args->m);
 #else
 			*ugid_lookupp = -1;
 			return (0);
 #endif
 		} else {
 			src_ip.s_addr = htonl(id->src_ip);
 			dst_ip.s_addr = htonl(id->dst_ip);
 			if (oif == NULL)
 				pcb = in_pcblookup_mbuf(pi,
 				    src_ip, htons(id->src_port),
 				    dst_ip, htons(id->dst_port),
 				    lookupflags, oif, args->m);
 			else
 				pcb = in_pcblookup_mbuf(pi,
 				    dst_ip, htons(id->dst_port),
 				    src_ip, htons(id->src_port),
 				    lookupflags, oif, args->m);
 		}
 		if (pcb != NULL) {
 			INP_RLOCK_ASSERT(pcb);
 			*uc = crhold(pcb->inp_cred);
 			*ugid_lookupp = 1;
 			INP_RUNLOCK(pcb);
 		}
 		if (*ugid_lookupp == 0) {
 			/*
 			 * We tried and failed, set the variable to -1
 			 * so we will not try again on this packet.
 			 */
 			*ugid_lookupp = -1;
 			return (0);
 		}
 	}
 	if (insn->o.opcode == O_UID)
 		match = ((*uc)->cr_uid == (uid_t)insn->d[0]);
 	else if (insn->o.opcode == O_GID)
 		match = groupmember((gid_t)insn->d[0], *uc);
 	else if (insn->o.opcode == O_JAIL)
 		match = ((*uc)->cr_prison->pr_id == (int)insn->d[0]);
 	return (match);
 #endif /* __FreeBSD__ */
 }
 
 /*
  * Helper function to set args with info on the rule after the matching
  * one. slot is precise, whereas we guess rule_id as they are
  * assigned sequentially.
  */
 static inline void
 set_match(struct ip_fw_args *args, int slot,
 	struct ip_fw_chain *chain)
 {
 	args->rule.chain_id = chain->id;
 	args->rule.slot = slot + 1; /* we use 0 as a marker */
 	args->rule.rule_id = 1 + chain->map[slot]->id;
 	args->rule.rulenum = chain->map[slot]->rulenum;
 }
 
 /*
  * Helper function to enable cached rule lookups using
  * x_next and next_rule fields in ipfw rule.
  */
 static int
 jump_fast(struct ip_fw_chain *chain, struct ip_fw *f, int num,
     int tablearg, int jump_backwards)
 {
 	int f_pos;
 
 	/* If possible use cached f_pos (in f->next_rule),
 	 * whose version is written in f->next_rule
 	 * (horrible hacks to avoid changing the ABI).
 	 */
 	if (num != IP_FW_TABLEARG && (uintptr_t)f->x_next == chain->id)
 		f_pos = (uintptr_t)f->next_rule;
 	else {
 		int i = IP_FW_ARG_TABLEARG(num);
 		/* make sure we do not jump backward */
 		if (jump_backwards == 0 && i <= f->rulenum)
 			i = f->rulenum + 1;
 		f_pos = ipfw_find_rule(chain, i, 0);
 		/* update the cache */
 		if (num != IP_FW_TABLEARG) {
 			f->next_rule = (void *)(uintptr_t)f_pos;
 			f->x_next = (void *)(uintptr_t)chain->id;
 		}
 	}
 
 	return (f_pos);
 }
 
 /*
  * The main check routine for the firewall.
  *
  * All arguments are in args so we can modify them and return them
  * back to the caller.
  *
  * Parameters:
  *
  *	args->m	(in/out) The packet; we set to NULL when/if we nuke it.
  *		Starts with the IP header.
  *	args->eh (in)	Mac header if present, NULL for layer3 packet.
  *	args->L3offset	Number of bytes bypassed if we came from L2.
  *			e.g. often sizeof(eh)  ** NOTYET **
  *	args->oif	Outgoing interface, NULL if packet is incoming.
  *		The incoming interface is in the mbuf. (in)
  *	args->divert_rule (in/out)
  *		Skip up to the first rule past this rule number;
  *		upon return, non-zero port number for divert or tee.
  *
  *	args->rule	Pointer to the last matching rule (in/out)
  *	args->next_hop	Socket we are forwarding to (out).
  *	args->next_hop6	IPv6 next hop we are forwarding to (out).
  *	args->f_id	Addresses grabbed from the packet (out)
  * 	args->rule.info	a cookie depending on rule action
  *
  * Return value:
  *
  *	IP_FW_PASS	the packet must be accepted
  *	IP_FW_DENY	the packet must be dropped
  *	IP_FW_DIVERT	divert packet, port in m_tag
  *	IP_FW_TEE	tee packet, port in m_tag
  *	IP_FW_DUMMYNET	to dummynet, pipe in args->cookie
  *	IP_FW_NETGRAPH	into netgraph, cookie args->cookie
  *		args->rule contains the matching rule,
  *		args->rule.info has additional information.
  *
  */
 int
 ipfw_chk(struct ip_fw_args *args)
 {
 
 	/*
 	 * Local variables holding state while processing a packet:
 	 *
 	 * IMPORTANT NOTE: to speed up the processing of rules, there
 	 * are some assumption on the values of the variables, which
 	 * are documented here. Should you change them, please check
 	 * the implementation of the various instructions to make sure
 	 * that they still work.
 	 *
 	 * args->eh	The MAC header. It is non-null for a layer2
 	 *	packet, it is NULL for a layer-3 packet.
 	 * **notyet**
 	 * args->L3offset Offset in the packet to the L3 (IP or equiv.) header.
 	 *
 	 * m | args->m	Pointer to the mbuf, as received from the caller.
 	 *	It may change if ipfw_chk() does an m_pullup, or if it
 	 *	consumes the packet because it calls send_reject().
 	 *	XXX This has to change, so that ipfw_chk() never modifies
 	 *	or consumes the buffer.
 	 * ip	is the beginning of the ip(4 or 6) header.
 	 *	Calculated by adding the L3offset to the start of data.
 	 *	(Until we start using L3offset, the packet is
 	 *	supposed to start with the ip header).
 	 */
 	struct mbuf *m = args->m;
 	struct ip *ip = mtod(m, struct ip *);
 
 	/*
 	 * For rules which contain uid/gid or jail constraints, cache
 	 * a copy of the users credentials after the pcb lookup has been
 	 * executed. This will speed up the processing of rules with
 	 * these types of constraints, as well as decrease contention
 	 * on pcb related locks.
 	 */
 #ifndef __FreeBSD__
 	struct bsd_ucred ucred_cache;
 #else
 	struct ucred *ucred_cache = NULL;
 #endif
 	int ucred_lookup = 0;
 
 	/*
 	 * oif | args->oif	If NULL, ipfw_chk has been called on the
 	 *	inbound path (ether_input, ip_input).
 	 *	If non-NULL, ipfw_chk has been called on the outbound path
 	 *	(ether_output, ip_output).
 	 */
 	struct ifnet *oif = args->oif;
 
 	int f_pos = 0;		/* index of current rule in the array */
 	int retval = 0;
 
 	/*
 	 * hlen	The length of the IP header.
 	 */
 	u_int hlen = 0;		/* hlen >0 means we have an IP pkt */
 
 	/*
 	 * offset	The offset of a fragment. offset != 0 means that
 	 *	we have a fragment at this offset of an IPv4 packet.
 	 *	offset == 0 means that (if this is an IPv4 packet)
 	 *	this is the first or only fragment.
 	 *	For IPv6 offset|ip6f_mf == 0 means there is no Fragment Header
 	 *	or there is a single packet fragement (fragement header added
 	 *	without needed).  We will treat a single packet fragment as if
 	 *	there was no fragment header (or log/block depending on the
 	 *	V_fw_permit_single_frag6 sysctl setting).
 	 */
 	u_short offset = 0;
 	u_short ip6f_mf = 0;
 
 	/*
 	 * Local copies of addresses. They are only valid if we have
 	 * an IP packet.
 	 *
 	 * proto	The protocol. Set to 0 for non-ip packets,
 	 *	or to the protocol read from the packet otherwise.
 	 *	proto != 0 means that we have an IPv4 packet.
 	 *
 	 * src_port, dst_port	port numbers, in HOST format. Only
 	 *	valid for TCP and UDP packets.
 	 *
 	 * src_ip, dst_ip	ip addresses, in NETWORK format.
 	 *	Only valid for IPv4 packets.
 	 */
 	uint8_t proto;
 	uint16_t src_port = 0, dst_port = 0;	/* NOTE: host format	*/
 	struct in_addr src_ip, dst_ip;		/* NOTE: network format	*/
 	uint16_t iplen=0;
 	int pktlen;
 	uint16_t	etype = 0;	/* Host order stored ether type */
 
 	/*
 	 * dyn_dir = MATCH_UNKNOWN when rules unchecked,
 	 * 	MATCH_NONE when checked and not matched (q = NULL),
 	 *	MATCH_FORWARD or MATCH_REVERSE otherwise (q != NULL)
 	 */
 	int dyn_dir = MATCH_UNKNOWN;
 	ipfw_dyn_rule *q = NULL;
 	struct ip_fw_chain *chain = &V_layer3_chain;
 
 	/*
 	 * We store in ulp a pointer to the upper layer protocol header.
 	 * In the ipv4 case this is easy to determine from the header,
 	 * but for ipv6 we might have some additional headers in the middle.
 	 * ulp is NULL if not found.
 	 */
 	void *ulp = NULL;		/* upper layer protocol pointer. */
 
 	/* XXX ipv6 variables */
 	int is_ipv6 = 0;
 	uint8_t	icmp6_type = 0;
 	uint16_t ext_hd = 0;	/* bits vector for extension header filtering */
 	/* end of ipv6 variables */
 
 	int is_ipv4 = 0;
 
 	int done = 0;		/* flag to exit the outer loop */
 
 	if (m->m_flags & M_SKIP_FIREWALL || (! V_ipfw_vnet_ready))
 		return (IP_FW_PASS);	/* accept */
 
 	dst_ip.s_addr = 0;		/* make sure it is initialized */
 	src_ip.s_addr = 0;		/* make sure it is initialized */
 	pktlen = m->m_pkthdr.len;
 	args->f_id.fib = M_GETFIB(m); /* note mbuf not altered) */
 	proto = args->f_id.proto = 0;	/* mark f_id invalid */
 		/* XXX 0 is a valid proto: IP/IPv6 Hop-by-Hop Option */
 
 /*
  * PULLUP_TO(len, p, T) makes sure that len + sizeof(T) is contiguous,
  * then it sets p to point at the offset "len" in the mbuf. WARNING: the
  * pointer might become stale after other pullups (but we never use it
  * this way).
  */
 #define PULLUP_TO(_len, p, T)	PULLUP_LEN(_len, p, sizeof(T))
 #define PULLUP_LEN(_len, p, T)					\
 do {								\
 	int x = (_len) + T;					\
 	if ((m)->m_len < x) {					\
 		args->m = m = m_pullup(m, x);			\
 		if (m == NULL)					\
 			goto pullup_failed;			\
 	}							\
 	p = (mtod(m, char *) + (_len));				\
 } while (0)
 
 	/*
 	 * if we have an ether header,
 	 */
 	if (args->eh)
 		etype = ntohs(args->eh->ether_type);
 
 	/* Identify IP packets and fill up variables. */
 	if (pktlen >= sizeof(struct ip6_hdr) &&
 	    (args->eh == NULL || etype == ETHERTYPE_IPV6) && ip->ip_v == 6) {
 		struct ip6_hdr *ip6 = (struct ip6_hdr *)ip;
 		is_ipv6 = 1;
 		args->f_id.addr_type = 6;
 		hlen = sizeof(struct ip6_hdr);
 		proto = ip6->ip6_nxt;
 
 		/* Search extension headers to find upper layer protocols */
 		while (ulp == NULL && offset == 0) {
 			switch (proto) {
 			case IPPROTO_ICMPV6:
 				PULLUP_TO(hlen, ulp, struct icmp6_hdr);
 				icmp6_type = ICMP6(ulp)->icmp6_type;
 				break;
 
 			case IPPROTO_TCP:
 				PULLUP_TO(hlen, ulp, struct tcphdr);
 				dst_port = TCP(ulp)->th_dport;
 				src_port = TCP(ulp)->th_sport;
 				/* save flags for dynamic rules */
 				args->f_id._flags = TCP(ulp)->th_flags;
 				break;
 
 			case IPPROTO_SCTP:
 				PULLUP_TO(hlen, ulp, struct sctphdr);
 				src_port = SCTP(ulp)->src_port;
 				dst_port = SCTP(ulp)->dest_port;
 				break;
 
 			case IPPROTO_UDP:
 				PULLUP_TO(hlen, ulp, struct udphdr);
 				dst_port = UDP(ulp)->uh_dport;
 				src_port = UDP(ulp)->uh_sport;
 				break;
 
 			case IPPROTO_HOPOPTS:	/* RFC 2460 */
 				PULLUP_TO(hlen, ulp, struct ip6_hbh);
 				ext_hd |= EXT_HOPOPTS;
 				hlen += (((struct ip6_hbh *)ulp)->ip6h_len + 1) << 3;
 				proto = ((struct ip6_hbh *)ulp)->ip6h_nxt;
 				ulp = NULL;
 				break;
 
 			case IPPROTO_ROUTING:	/* RFC 2460 */
 				PULLUP_TO(hlen, ulp, struct ip6_rthdr);
 				switch (((struct ip6_rthdr *)ulp)->ip6r_type) {
 				case 0:
 					ext_hd |= EXT_RTHDR0;
 					break;
 				case 2:
 					ext_hd |= EXT_RTHDR2;
 					break;
 				default:
 					if (V_fw_verbose)
 						printf("IPFW2: IPV6 - Unknown "
 						    "Routing Header type(%d)\n",
 						    ((struct ip6_rthdr *)
 						    ulp)->ip6r_type);
 					if (V_fw_deny_unknown_exthdrs)
 					    return (IP_FW_DENY);
 					break;
 				}
 				ext_hd |= EXT_ROUTING;
 				hlen += (((struct ip6_rthdr *)ulp)->ip6r_len + 1) << 3;
 				proto = ((struct ip6_rthdr *)ulp)->ip6r_nxt;
 				ulp = NULL;
 				break;
 
 			case IPPROTO_FRAGMENT:	/* RFC 2460 */
 				PULLUP_TO(hlen, ulp, struct ip6_frag);
 				ext_hd |= EXT_FRAGMENT;
 				hlen += sizeof (struct ip6_frag);
 				proto = ((struct ip6_frag *)ulp)->ip6f_nxt;
 				offset = ((struct ip6_frag *)ulp)->ip6f_offlg &
 					IP6F_OFF_MASK;
 				ip6f_mf = ((struct ip6_frag *)ulp)->ip6f_offlg &
 					IP6F_MORE_FRAG;
 				if (V_fw_permit_single_frag6 == 0 &&
 				    offset == 0 && ip6f_mf == 0) {
 					if (V_fw_verbose)
 						printf("IPFW2: IPV6 - Invalid "
 						    "Fragment Header\n");
 					if (V_fw_deny_unknown_exthdrs)
 					    return (IP_FW_DENY);
 					break;
 				}
 				args->f_id.extra =
 				    ntohl(((struct ip6_frag *)ulp)->ip6f_ident);
 				ulp = NULL;
 				break;
 
 			case IPPROTO_DSTOPTS:	/* RFC 2460 */
 				PULLUP_TO(hlen, ulp, struct ip6_hbh);
 				ext_hd |= EXT_DSTOPTS;
 				hlen += (((struct ip6_hbh *)ulp)->ip6h_len + 1) << 3;
 				proto = ((struct ip6_hbh *)ulp)->ip6h_nxt;
 				ulp = NULL;
 				break;
 
 			case IPPROTO_AH:	/* RFC 2402 */
 				PULLUP_TO(hlen, ulp, struct ip6_ext);
 				ext_hd |= EXT_AH;
 				hlen += (((struct ip6_ext *)ulp)->ip6e_len + 2) << 2;
 				proto = ((struct ip6_ext *)ulp)->ip6e_nxt;
 				ulp = NULL;
 				break;
 
 			case IPPROTO_ESP:	/* RFC 2406 */
 				PULLUP_TO(hlen, ulp, uint32_t);	/* SPI, Seq# */
 				/* Anything past Seq# is variable length and
 				 * data past this ext. header is encrypted. */
 				ext_hd |= EXT_ESP;
 				break;
 
 			case IPPROTO_NONE:	/* RFC 2460 */
 				/*
 				 * Packet ends here, and IPv6 header has
 				 * already been pulled up. If ip6e_len!=0
 				 * then octets must be ignored.
 				 */
 				ulp = ip; /* non-NULL to get out of loop. */
 				break;
 
 			case IPPROTO_OSPFIGP:
 				/* XXX OSPF header check? */
 				PULLUP_TO(hlen, ulp, struct ip6_ext);
 				break;
 
 			case IPPROTO_PIM:
 				/* XXX PIM header check? */
 				PULLUP_TO(hlen, ulp, struct pim);
 				break;
 
 			case IPPROTO_CARP:
 				PULLUP_TO(hlen, ulp, struct carp_header);
 				if (((struct carp_header *)ulp)->carp_version !=
 				    CARP_VERSION) 
 					return (IP_FW_DENY);
 				if (((struct carp_header *)ulp)->carp_type !=
 				    CARP_ADVERTISEMENT) 
 					return (IP_FW_DENY);
 				break;
 
 			case IPPROTO_IPV6:	/* RFC 2893 */
 				PULLUP_TO(hlen, ulp, struct ip6_hdr);
 				break;
 
 			case IPPROTO_IPV4:	/* RFC 2893 */
 				PULLUP_TO(hlen, ulp, struct ip);
 				break;
 
 			default:
 				if (V_fw_verbose)
 					printf("IPFW2: IPV6 - Unknown "
 					    "Extension Header(%d), ext_hd=%x\n",
 					     proto, ext_hd);
 				if (V_fw_deny_unknown_exthdrs)
 				    return (IP_FW_DENY);
 				PULLUP_TO(hlen, ulp, struct ip6_ext);
 				break;
 			} /*switch */
 		}
 		ip = mtod(m, struct ip *);
 		ip6 = (struct ip6_hdr *)ip;
 		args->f_id.src_ip6 = ip6->ip6_src;
 		args->f_id.dst_ip6 = ip6->ip6_dst;
 		args->f_id.src_ip = 0;
 		args->f_id.dst_ip = 0;
 		args->f_id.flow_id6 = ntohl(ip6->ip6_flow);
 	} else if (pktlen >= sizeof(struct ip) &&
 	    (args->eh == NULL || etype == ETHERTYPE_IP) && ip->ip_v == 4) {
 	    	is_ipv4 = 1;
 		hlen = ip->ip_hl << 2;
 		args->f_id.addr_type = 4;
 
 		/*
 		 * Collect parameters into local variables for faster matching.
 		 */
 		proto = ip->ip_p;
 		src_ip = ip->ip_src;
 		dst_ip = ip->ip_dst;
 		offset = ntohs(ip->ip_off) & IP_OFFMASK;
 		iplen = ntohs(ip->ip_len);
 		pktlen = iplen < pktlen ? iplen : pktlen;
 
 		if (offset == 0) {
 			switch (proto) {
 			case IPPROTO_TCP:
 				PULLUP_TO(hlen, ulp, struct tcphdr);
 				dst_port = TCP(ulp)->th_dport;
 				src_port = TCP(ulp)->th_sport;
 				/* save flags for dynamic rules */
 				args->f_id._flags = TCP(ulp)->th_flags;
 				break;
 
 			case IPPROTO_SCTP:
 				PULLUP_TO(hlen, ulp, struct sctphdr);
 				src_port = SCTP(ulp)->src_port;
 				dst_port = SCTP(ulp)->dest_port;
 				break;
 
 			case IPPROTO_UDP:
 				PULLUP_TO(hlen, ulp, struct udphdr);
 				dst_port = UDP(ulp)->uh_dport;
 				src_port = UDP(ulp)->uh_sport;
 				break;
 
 			case IPPROTO_ICMP:
 				PULLUP_TO(hlen, ulp, struct icmphdr);
 				//args->f_id.flags = ICMP(ulp)->icmp_type;
 				break;
 
 			default:
 				break;
 			}
 		}
 
 		ip = mtod(m, struct ip *);
 		args->f_id.src_ip = ntohl(src_ip.s_addr);
 		args->f_id.dst_ip = ntohl(dst_ip.s_addr);
 	}
 #undef PULLUP_TO
 	if (proto) { /* we may have port numbers, store them */
 		args->f_id.proto = proto;
 		args->f_id.src_port = src_port = ntohs(src_port);
 		args->f_id.dst_port = dst_port = ntohs(dst_port);
 	}
 
 	IPFW_PF_RLOCK(chain);
 	if (! V_ipfw_vnet_ready) { /* shutting down, leave NOW. */
 		IPFW_PF_RUNLOCK(chain);
 		return (IP_FW_PASS);	/* accept */
 	}
 	if (args->rule.slot) {
 		/*
 		 * Packet has already been tagged as a result of a previous
 		 * match on rule args->rule aka args->rule_id (PIPE, QUEUE,
 		 * REASS, NETGRAPH, DIVERT/TEE...)
 		 * Validate the slot and continue from the next one
 		 * if still present, otherwise do a lookup.
 		 */
 		f_pos = (args->rule.chain_id == chain->id) ?
 		    args->rule.slot :
 		    ipfw_find_rule(chain, args->rule.rulenum,
 			args->rule.rule_id);
 	} else {
 		f_pos = 0;
 	}
 
 	/*
 	 * Now scan the rules, and parse microinstructions for each rule.
 	 * We have two nested loops and an inner switch. Sometimes we
 	 * need to break out of one or both loops, or re-enter one of
 	 * the loops with updated variables. Loop variables are:
 	 *
 	 *	f_pos (outer loop) points to the current rule.
 	 *		On output it points to the matching rule.
 	 *	done (outer loop) is used as a flag to break the loop.
 	 *	l (inner loop)	residual length of current rule.
 	 *		cmd points to the current microinstruction.
 	 *
 	 * We break the inner loop by setting l=0 and possibly
 	 * cmdlen=0 if we don't want to advance cmd.
 	 * We break the outer loop by setting done=1
 	 * We can restart the inner loop by setting l>0 and f_pos, f, cmd
 	 * as needed.
 	 */
 	for (; f_pos < chain->n_rules; f_pos++) {
 		ipfw_insn *cmd;
 		uint32_t tablearg = 0;
 		int l, cmdlen, skip_or; /* skip rest of OR block */
 		struct ip_fw *f;
 
 		f = chain->map[f_pos];
 		if (V_set_disable & (1 << f->set) )
 			continue;
 
 		skip_or = 0;
 		for (l = f->cmd_len, cmd = f->cmd ; l > 0 ;
 		    l -= cmdlen, cmd += cmdlen) {
 			int match;
 
 			/*
 			 * check_body is a jump target used when we find a
 			 * CHECK_STATE, and need to jump to the body of
 			 * the target rule.
 			 */
 
 /* check_body: */
 			cmdlen = F_LEN(cmd);
 			/*
 			 * An OR block (insn_1 || .. || insn_n) has the
 			 * F_OR bit set in all but the last instruction.
 			 * The first match will set "skip_or", and cause
 			 * the following instructions to be skipped until
 			 * past the one with the F_OR bit clear.
 			 */
 			if (skip_or) {		/* skip this instruction */
 				if ((cmd->len & F_OR) == 0)
 					skip_or = 0;	/* next one is good */
 				continue;
 			}
 			match = 0; /* set to 1 if we succeed */
 
 			switch (cmd->opcode) {
 			/*
 			 * The first set of opcodes compares the packet's
 			 * fields with some pattern, setting 'match' if a
 			 * match is found. At the end of the loop there is
 			 * logic to deal with F_NOT and F_OR flags associated
 			 * with the opcode.
 			 */
 			case O_NOP:
 				match = 1;
 				break;
 
 			case O_FORWARD_MAC:
 				printf("ipfw: opcode %d unimplemented\n",
 				    cmd->opcode);
 				break;
 
 			case O_GID:
 			case O_UID:
 			case O_JAIL:
 				/*
 				 * We only check offset == 0 && proto != 0,
 				 * as this ensures that we have a
 				 * packet with the ports info.
 				 */
 				if (offset != 0)
 					break;
 				if (proto == IPPROTO_TCP ||
 				    proto == IPPROTO_UDP)
 					match = check_uidgid(
 						    (ipfw_insn_u32 *)cmd,
 						    args, &ucred_lookup,
 #ifdef __FreeBSD__
 						    &ucred_cache);
 #else
 						    (void *)&ucred_cache);
 #endif
 				break;
 
 			case O_RECV:
 				match = iface_match(m->m_pkthdr.rcvif,
 				    (ipfw_insn_if *)cmd, chain, &tablearg);
 				break;
 
 			case O_XMIT:
 				match = iface_match(oif, (ipfw_insn_if *)cmd,
 				    chain, &tablearg);
 				break;
 
 			case O_VIA:
 				match = iface_match(oif ? oif :
 				    m->m_pkthdr.rcvif, (ipfw_insn_if *)cmd,
 				    chain, &tablearg);
 				break;
 
 			case O_MACADDR2:
 				if (args->eh != NULL) {	/* have MAC header */
 					u_int32_t *want = (u_int32_t *)
 						((ipfw_insn_mac *)cmd)->addr;
 					u_int32_t *mask = (u_int32_t *)
 						((ipfw_insn_mac *)cmd)->mask;
 					u_int32_t *hdr = (u_int32_t *)args->eh;
 
 					match =
 					    ( want[0] == (hdr[0] & mask[0]) &&
 					      want[1] == (hdr[1] & mask[1]) &&
 					      want[2] == (hdr[2] & mask[2]) );
 				}
 				break;
 
 			case O_MAC_TYPE:
 				if (args->eh != NULL) {
 					u_int16_t *p =
 					    ((ipfw_insn_u16 *)cmd)->ports;
 					int i;
 
 					for (i = cmdlen - 1; !match && i>0;
 					    i--, p += 2)
 						match = (etype >= p[0] &&
 						    etype <= p[1]);
 				}
 				break;
 
 			case O_FRAG:
 				match = (offset != 0);
 				break;
 
 			case O_IN:	/* "out" is "not in" */
 				match = (oif == NULL);
 				break;
 
 			case O_LAYER2:
 				match = (args->eh != NULL);
 				break;
 
 			case O_DIVERTED:
 			    {
 				/* For diverted packets, args->rule.info
 				 * contains the divert port (in host format)
 				 * reason and direction.
 				 */
 				uint32_t i = args->rule.info;
 				match = (i&IPFW_IS_MASK) == IPFW_IS_DIVERT &&
 				    cmd->arg1 & ((i & IPFW_INFO_IN) ? 1 : 2);
 			    }
 				break;
 
 			case O_PROTO:
 				/*
 				 * We do not allow an arg of 0 so the
 				 * check of "proto" only suffices.
 				 */
 				match = (proto == cmd->arg1);
 				break;
 
 			case O_IP_SRC:
 				match = is_ipv4 &&
 				    (((ipfw_insn_ip *)cmd)->addr.s_addr ==
 				    src_ip.s_addr);
 				break;
 
 			case O_IP_SRC_LOOKUP:
 			case O_IP_DST_LOOKUP:
 				if (is_ipv4) {
 				    uint32_t key =
 					(cmd->opcode == O_IP_DST_LOOKUP) ?
 					    dst_ip.s_addr : src_ip.s_addr;
 				    uint32_t v = 0;
 
 				    if (cmdlen > F_INSN_SIZE(ipfw_insn_u32)) {
 					/* generic lookup. The key must be
 					 * in 32bit big-endian format.
 					 */
 					v = ((ipfw_insn_u32 *)cmd)->d[1];
 					if (v == 0)
 					    key = dst_ip.s_addr;
 					else if (v == 1)
 					    key = src_ip.s_addr;
 					else if (v == 6) /* dscp */
 					    key = (ip->ip_tos >> 2) & 0x3f;
 					else if (offset != 0)
 					    break;
 					else if (proto != IPPROTO_TCP &&
 						proto != IPPROTO_UDP)
 					    break;
 					else if (v == 2)
 					    key = htonl(dst_port);
 					else if (v == 3)
 					    key = htonl(src_port);
 					else if (v == 4 || v == 5) {
 					    check_uidgid(
 						(ipfw_insn_u32 *)cmd,
 						args, &ucred_lookup,
 #ifdef __FreeBSD__
 						&ucred_cache);
 					    if (v == 4 /* O_UID */)
 						key = ucred_cache->cr_uid;
 					    else if (v == 5 /* O_JAIL */)
 						key = ucred_cache->cr_prison->pr_id;
 #else /* !__FreeBSD__ */
 						(void *)&ucred_cache);
 					    if (v ==4 /* O_UID */)
 						key = ucred_cache.uid;
 					    else if (v == 5 /* O_JAIL */)
 						key = ucred_cache.xid;
 #endif /* !__FreeBSD__ */
 					    key = htonl(key);
 					} else
 					    break;
 				    }
 				    match = ipfw_lookup_table(chain,
 					cmd->arg1, key, &v);
 				    if (!match)
 					break;
 				    if (cmdlen == F_INSN_SIZE(ipfw_insn_u32))
 					match =
 					    ((ipfw_insn_u32 *)cmd)->d[0] == v;
 				    else
 					tablearg = v;
 				} else if (is_ipv6) {
 					uint32_t v = 0;
 					void *pkey = (cmd->opcode == O_IP_DST_LOOKUP) ?
 						&args->f_id.dst_ip6: &args->f_id.src_ip6;
 					match = ipfw_lookup_table_extended(chain,
 							cmd->arg1, pkey, &v,
 							IPFW_TABLE_CIDR);
 					if (cmdlen == F_INSN_SIZE(ipfw_insn_u32))
 						match = ((ipfw_insn_u32 *)cmd)->d[0] == v;
 					if (match)
 						tablearg = v;
 				}
 				break;
 
 			case O_IP_SRC_MASK:
 			case O_IP_DST_MASK:
 				if (is_ipv4) {
 				    uint32_t a =
 					(cmd->opcode == O_IP_DST_MASK) ?
 					    dst_ip.s_addr : src_ip.s_addr;
 				    uint32_t *p = ((ipfw_insn_u32 *)cmd)->d;
 				    int i = cmdlen-1;
 
 				    for (; !match && i>0; i-= 2, p+= 2)
 					match = (p[0] == (a & p[1]));
 				}
 				break;
 
 			case O_IP_SRC_ME:
 				if (is_ipv4) {
 					struct ifnet *tif;
 
 					INADDR_TO_IFP(src_ip, tif);
 					match = (tif != NULL);
 					break;
 				}
 #ifdef INET6
 				/* FALLTHROUGH */
 			case O_IP6_SRC_ME:
 				match= is_ipv6 && search_ip6_addr_net(&args->f_id.src_ip6);
 #endif
 				break;
 
 			case O_IP_DST_SET:
 			case O_IP_SRC_SET:
 				if (is_ipv4) {
 					u_int32_t *d = (u_int32_t *)(cmd+1);
 					u_int32_t addr =
 					    cmd->opcode == O_IP_DST_SET ?
 						args->f_id.dst_ip :
 						args->f_id.src_ip;
 
 					    if (addr < d[0])
 						    break;
 					    addr -= d[0]; /* subtract base */
 					    match = (addr < cmd->arg1) &&
 						( d[ 1 + (addr>>5)] &
 						  (1<<(addr & 0x1f)) );
 				}
 				break;
 
 			case O_IP_DST:
 				match = is_ipv4 &&
 				    (((ipfw_insn_ip *)cmd)->addr.s_addr ==
 				    dst_ip.s_addr);
 				break;
 
 			case O_IP_DST_ME:
 				if (is_ipv4) {
 					struct ifnet *tif;
 
 					INADDR_TO_IFP(dst_ip, tif);
 					match = (tif != NULL);
 					break;
 				}
 #ifdef INET6
 				/* FALLTHROUGH */
 			case O_IP6_DST_ME:
 				match= is_ipv6 && search_ip6_addr_net(&args->f_id.dst_ip6);
 #endif
 				break;
 
 
 			case O_IP_SRCPORT:
 			case O_IP_DSTPORT:
 				/*
 				 * offset == 0 && proto != 0 is enough
 				 * to guarantee that we have a
 				 * packet with port info.
 				 */
 				if ((proto==IPPROTO_UDP || proto==IPPROTO_TCP)
 				    && offset == 0) {
 					u_int16_t x =
 					    (cmd->opcode == O_IP_SRCPORT) ?
 						src_port : dst_port ;
 					u_int16_t *p =
 					    ((ipfw_insn_u16 *)cmd)->ports;
 					int i;
 
 					for (i = cmdlen - 1; !match && i>0;
 					    i--, p += 2)
 						match = (x>=p[0] && x<=p[1]);
 				}
 				break;
 
 			case O_ICMPTYPE:
 				match = (offset == 0 && proto==IPPROTO_ICMP &&
 				    icmptype_match(ICMP(ulp), (ipfw_insn_u32 *)cmd) );
 				break;
 
 #ifdef INET6
 			case O_ICMP6TYPE:
 				match = is_ipv6 && offset == 0 &&
 				    proto==IPPROTO_ICMPV6 &&
 				    icmp6type_match(
 					ICMP6(ulp)->icmp6_type,
 					(ipfw_insn_u32 *)cmd);
 				break;
 #endif /* INET6 */
 
 			case O_IPOPT:
 				match = (is_ipv4 &&
 				    ipopts_match(ip, cmd) );
 				break;
 
 			case O_IPVER:
 				match = (is_ipv4 &&
 				    cmd->arg1 == ip->ip_v);
 				break;
 
 			case O_IPID:
 			case O_IPLEN:
 			case O_IPTTL:
 				if (is_ipv4) {	/* only for IP packets */
 				    uint16_t x;
 				    uint16_t *p;
 				    int i;
 
 				    if (cmd->opcode == O_IPLEN)
 					x = iplen;
 				    else if (cmd->opcode == O_IPTTL)
 					x = ip->ip_ttl;
 				    else /* must be IPID */
 					x = ntohs(ip->ip_id);
 				    if (cmdlen == 1) {
 					match = (cmd->arg1 == x);
 					break;
 				    }
 				    /* otherwise we have ranges */
 				    p = ((ipfw_insn_u16 *)cmd)->ports;
 				    i = cmdlen - 1;
 				    for (; !match && i>0; i--, p += 2)
 					match = (x >= p[0] && x <= p[1]);
 				}
 				break;
 
 			case O_IPPRECEDENCE:
 				match = (is_ipv4 &&
 				    (cmd->arg1 == (ip->ip_tos & 0xe0)) );
 				break;
 
 			case O_IPTOS:
 				match = (is_ipv4 &&
 				    flags_match(cmd, ip->ip_tos));
 				break;
 
 			case O_DSCP:
 			    {
 				uint32_t *p;
 				uint16_t x;
 
 				p = ((ipfw_insn_u32 *)cmd)->d;
 
 				if (is_ipv4)
 					x = ip->ip_tos >> 2;
 				else if (is_ipv6) {
 					uint8_t *v;
 					v = &((struct ip6_hdr *)ip)->ip6_vfc;
 					x = (*v & 0x0F) << 2;
 					v++;
 					x |= *v >> 6;
 				} else
 					break;
 
 				/* DSCP bitmask is stored as low_u32 high_u32 */
 				if (x > 32)
 					match = *(p + 1) & (1 << (x - 32));
 				else
 					match = *p & (1 << x);
 			    }
 				break;
 
 			case O_TCPDATALEN:
 				if (proto == IPPROTO_TCP && offset == 0) {
 				    struct tcphdr *tcp;
 				    uint16_t x;
 				    uint16_t *p;
 				    int i;
 
 				    tcp = TCP(ulp);
 				    x = iplen -
 					((ip->ip_hl + tcp->th_off) << 2);
 				    if (cmdlen == 1) {
 					match = (cmd->arg1 == x);
 					break;
 				    }
 				    /* otherwise we have ranges */
 				    p = ((ipfw_insn_u16 *)cmd)->ports;
 				    i = cmdlen - 1;
 				    for (; !match && i>0; i--, p += 2)
 					match = (x >= p[0] && x <= p[1]);
 				}
 				break;
 
 			case O_TCPFLAGS:
 				match = (proto == IPPROTO_TCP && offset == 0 &&
 				    flags_match(cmd, TCP(ulp)->th_flags));
 				break;
 
 			case O_TCPOPTS:
 				PULLUP_LEN(hlen, ulp, (TCP(ulp)->th_off << 2));
 				match = (proto == IPPROTO_TCP && offset == 0 &&
 				    tcpopts_match(TCP(ulp), cmd));
 				break;
 
 			case O_TCPSEQ:
 				match = (proto == IPPROTO_TCP && offset == 0 &&
 				    ((ipfw_insn_u32 *)cmd)->d[0] ==
 					TCP(ulp)->th_seq);
 				break;
 
 			case O_TCPACK:
 				match = (proto == IPPROTO_TCP && offset == 0 &&
 				    ((ipfw_insn_u32 *)cmd)->d[0] ==
 					TCP(ulp)->th_ack);
 				break;
 
 			case O_TCPWIN:
 				if (proto == IPPROTO_TCP && offset == 0) {
 				    uint16_t x;
 				    uint16_t *p;
 				    int i;
 
 				    x = ntohs(TCP(ulp)->th_win);
 				    if (cmdlen == 1) {
 					match = (cmd->arg1 == x);
 					break;
 				    }
 				    /* Otherwise we have ranges. */
 				    p = ((ipfw_insn_u16 *)cmd)->ports;
 				    i = cmdlen - 1;
 				    for (; !match && i > 0; i--, p += 2)
 					match = (x >= p[0] && x <= p[1]);
 				}
 				break;
 
 			case O_ESTAB:
 				/* reject packets which have SYN only */
 				/* XXX should i also check for TH_ACK ? */
 				match = (proto == IPPROTO_TCP && offset == 0 &&
 				    (TCP(ulp)->th_flags &
 				     (TH_RST | TH_ACK | TH_SYN)) != TH_SYN);
 				break;
 
 			case O_ALTQ: {
 				struct pf_mtag *at;
 				struct m_tag *mtag;
 				ipfw_insn_altq *altq = (ipfw_insn_altq *)cmd;
 
 				/*
 				 * ALTQ uses mbuf tags from another
 				 * packet filtering system - pf(4).
 				 * We allocate a tag in its format
 				 * and fill it in, pretending to be pf(4).
 				 */
 				match = 1;
 				at = pf_find_mtag(m);
 				if (at != NULL && at->qid != 0)
 					break;
 				mtag = m_tag_get(PACKET_TAG_PF,
 				    sizeof(struct pf_mtag), M_NOWAIT | M_ZERO);
 				if (mtag == NULL) {
 					/*
 					 * Let the packet fall back to the
 					 * default ALTQ.
 					 */
 					break;
 				}
 				m_tag_prepend(m, mtag);
 				at = (struct pf_mtag *)(mtag + 1);
 				at->qid = altq->qid;
 				at->hdr = ip;
 				break;
 			}
 
 			case O_LOG:
 				ipfw_log(f, hlen, args, m,
 				    oif, offset | ip6f_mf, tablearg, ip);
 				match = 1;
 				break;
 
 			case O_PROB:
 				match = (random()<((ipfw_insn_u32 *)cmd)->d[0]);
 				break;
 
 			case O_VERREVPATH:
 				/* Outgoing packets automatically pass/match */
 				match = ((oif != NULL) ||
 				    (m->m_pkthdr.rcvif == NULL) ||
 				    (
 #ifdef INET6
 				    is_ipv6 ?
 					verify_path6(&(args->f_id.src_ip6),
 					    m->m_pkthdr.rcvif, args->f_id.fib) :
 #endif
 				    verify_path(src_ip, m->m_pkthdr.rcvif,
 				        args->f_id.fib)));
 				break;
 
 			case O_VERSRCREACH:
 				/* Outgoing packets automatically pass/match */
 				match = (hlen > 0 && ((oif != NULL) ||
 #ifdef INET6
 				    is_ipv6 ?
 				        verify_path6(&(args->f_id.src_ip6),
 				            NULL, args->f_id.fib) :
 #endif
 				    verify_path(src_ip, NULL, args->f_id.fib)));
 				break;
 
 			case O_ANTISPOOF:
 				/* Outgoing packets automatically pass/match */
 				if (oif == NULL && hlen > 0 &&
 				    (  (is_ipv4 && in_localaddr(src_ip))
 #ifdef INET6
 				    || (is_ipv6 &&
 				        in6_localaddr(&(args->f_id.src_ip6)))
 #endif
 				    ))
 					match =
 #ifdef INET6
 					    is_ipv6 ? verify_path6(
 					        &(args->f_id.src_ip6),
 					        m->m_pkthdr.rcvif,
 						args->f_id.fib) :
 #endif
 					    verify_path(src_ip,
 					    	m->m_pkthdr.rcvif,
 					        args->f_id.fib);
 				else
 					match = 1;
 				break;
 
 			case O_IPSEC:
 #ifdef IPSEC
 				match = (m_tag_find(m,
 				    PACKET_TAG_IPSEC_IN_DONE, NULL) != NULL);
 #endif
 				/* otherwise no match */
 				break;
 
 #ifdef INET6
 			case O_IP6_SRC:
 				match = is_ipv6 &&
 				    IN6_ARE_ADDR_EQUAL(&args->f_id.src_ip6,
 				    &((ipfw_insn_ip6 *)cmd)->addr6);
 				break;
 
 			case O_IP6_DST:
 				match = is_ipv6 &&
 				IN6_ARE_ADDR_EQUAL(&args->f_id.dst_ip6,
 				    &((ipfw_insn_ip6 *)cmd)->addr6);
 				break;
 			case O_IP6_SRC_MASK:
 			case O_IP6_DST_MASK:
 				if (is_ipv6) {
 					int i = cmdlen - 1;
 					struct in6_addr p;
 					struct in6_addr *d =
 					    &((ipfw_insn_ip6 *)cmd)->addr6;
 
 					for (; !match && i > 0; d += 2,
 					    i -= F_INSN_SIZE(struct in6_addr)
 					    * 2) {
 						p = (cmd->opcode ==
 						    O_IP6_SRC_MASK) ?
 						    args->f_id.src_ip6:
 						    args->f_id.dst_ip6;
 						APPLY_MASK(&p, &d[1]);
 						match =
 						    IN6_ARE_ADDR_EQUAL(&d[0],
 						    &p);
 					}
 				}
 				break;
 
 			case O_FLOW6ID:
 				match = is_ipv6 &&
 				    flow6id_match(args->f_id.flow_id6,
 				    (ipfw_insn_u32 *) cmd);
 				break;
 
 			case O_EXT_HDR:
 				match = is_ipv6 &&
 				    (ext_hd & ((ipfw_insn *) cmd)->arg1);
 				break;
 
 			case O_IP6:
 				match = is_ipv6;
 				break;
 #endif
 
 			case O_IP4:
 				match = is_ipv4;
 				break;
 
 			case O_TAG: {
 				struct m_tag *mtag;
 				uint32_t tag = IP_FW_ARG_TABLEARG(cmd->arg1);
 
 				/* Packet is already tagged with this tag? */
 				mtag = m_tag_locate(m, MTAG_IPFW, tag, NULL);
 
 				/* We have `untag' action when F_NOT flag is
 				 * present. And we must remove this mtag from
 				 * mbuf and reset `match' to zero (`match' will
 				 * be inversed later).
 				 * Otherwise we should allocate new mtag and
 				 * push it into mbuf.
 				 */
 				if (cmd->len & F_NOT) { /* `untag' action */
 					if (mtag != NULL)
 						m_tag_delete(m, mtag);
 					match = 0;
 				} else {
 					if (mtag == NULL) {
 						mtag = m_tag_alloc( MTAG_IPFW,
 						    tag, 0, M_NOWAIT);
 						if (mtag != NULL)
 							m_tag_prepend(m, mtag);
 					}
 					match = 1;
 				}
 				break;
 			}
 
 			case O_FIB: /* try match the specified fib */
 				if (args->f_id.fib == cmd->arg1)
 					match = 1;
 				break;
 
 			case O_SOCKARG:	{
 				struct inpcb *inp = args->inp;
 				struct inpcbinfo *pi;
 				
 				if (is_ipv6) /* XXX can we remove this ? */
 					break;
 
 				if (proto == IPPROTO_TCP)
 					pi = &V_tcbinfo;
 				else if (proto == IPPROTO_UDP)
 					pi = &V_udbinfo;
 				else
 					break;
 
 				/*
 				 * XXXRW: so_user_cookie should almost
 				 * certainly be inp_user_cookie?
 				 */
 
 				/* For incomming packet, lookup up the 
 				inpcb using the src/dest ip/port tuple */
 				if (inp == NULL) {
 					inp = in_pcblookup(pi, 
 						src_ip, htons(src_port),
 						dst_ip, htons(dst_port),
 						INPLOOKUP_RLOCKPCB, NULL);
 					if (inp != NULL) {
 						tablearg =
 						    inp->inp_socket->so_user_cookie;
 						if (tablearg)
 							match = 1;
 						INP_RUNLOCK(inp);
 					}
 				} else {
 					if (inp->inp_socket) {
 						tablearg =
 						    inp->inp_socket->so_user_cookie;
 						if (tablearg)
 							match = 1;
 					}
 				}
 				break;
 			}
 
 			case O_TAGGED: {
 				struct m_tag *mtag;
 				uint32_t tag = IP_FW_ARG_TABLEARG(cmd->arg1);
 
 				if (cmdlen == 1) {
 					match = m_tag_locate(m, MTAG_IPFW,
 					    tag, NULL) != NULL;
 					break;
 				}
 
 				/* we have ranges */
 				for (mtag = m_tag_first(m);
 				    mtag != NULL && !match;
 				    mtag = m_tag_next(m, mtag)) {
 					uint16_t *p;
 					int i;
 
 					if (mtag->m_tag_cookie != MTAG_IPFW)
 						continue;
 
 					p = ((ipfw_insn_u16 *)cmd)->ports;
 					i = cmdlen - 1;
 					for(; !match && i > 0; i--, p += 2)
 						match =
 						    mtag->m_tag_id >= p[0] &&
 						    mtag->m_tag_id <= p[1];
 				}
 				break;
 			}
 				
 			/*
 			 * The second set of opcodes represents 'actions',
 			 * i.e. the terminal part of a rule once the packet
 			 * matches all previous patterns.
 			 * Typically there is only one action for each rule,
 			 * and the opcode is stored at the end of the rule
 			 * (but there are exceptions -- see below).
 			 *
 			 * In general, here we set retval and terminate the
 			 * outer loop (would be a 'break 3' in some language,
 			 * but we need to set l=0, done=1)
 			 *
 			 * Exceptions:
 			 * O_COUNT and O_SKIPTO actions:
 			 *   instead of terminating, we jump to the next rule
 			 *   (setting l=0), or to the SKIPTO target (setting
 			 *   f/f_len, cmd and l as needed), respectively.
 			 *
 			 * O_TAG, O_LOG and O_ALTQ action parameters:
 			 *   perform some action and set match = 1;
 			 *
 			 * O_LIMIT and O_KEEP_STATE: these opcodes are
 			 *   not real 'actions', and are stored right
 			 *   before the 'action' part of the rule.
 			 *   These opcodes try to install an entry in the
 			 *   state tables; if successful, we continue with
 			 *   the next opcode (match=1; break;), otherwise
 			 *   the packet must be dropped (set retval,
 			 *   break loops with l=0, done=1)
 			 *
 			 * O_PROBE_STATE and O_CHECK_STATE: these opcodes
 			 *   cause a lookup of the state table, and a jump
 			 *   to the 'action' part of the parent rule
 			 *   if an entry is found, or
 			 *   (CHECK_STATE only) a jump to the next rule if
 			 *   the entry is not found.
 			 *   The result of the lookup is cached so that
 			 *   further instances of these opcodes become NOPs.
 			 *   The jump to the next rule is done by setting
 			 *   l=0, cmdlen=0.
 			 */
 			case O_LIMIT:
 			case O_KEEP_STATE:
 				if (ipfw_install_state(f,
 				    (ipfw_insn_limit *)cmd, args, tablearg)) {
 					/* error or limit violation */
 					retval = IP_FW_DENY;
 					l = 0;	/* exit inner loop */
 					done = 1; /* exit outer loop */
 				}
 				match = 1;
 				break;
 
 			case O_PROBE_STATE:
 			case O_CHECK_STATE:
 				/*
 				 * dynamic rules are checked at the first
 				 * keep-state or check-state occurrence,
 				 * with the result being stored in dyn_dir.
 				 * The compiler introduces a PROBE_STATE
 				 * instruction for us when we have a
 				 * KEEP_STATE (because PROBE_STATE needs
 				 * to be run first).
 				 */
 				if (dyn_dir == MATCH_UNKNOWN &&
 				    (q = ipfw_lookup_dyn_rule(&args->f_id,
 				     &dyn_dir, proto == IPPROTO_TCP ?
 					TCP(ulp) : NULL))
 					!= NULL) {
 					/*
 					 * Found dynamic entry, update stats
 					 * and jump to the 'action' part of
 					 * the parent rule by setting
 					 * f, cmd, l and clearing cmdlen.
 					 */
 					IPFW_INC_DYN_COUNTER(q, pktlen);
 					/* XXX we would like to have f_pos
 					 * readily accessible in the dynamic
 				         * rule, instead of having to
 					 * lookup q->rule.
 					 */
 					f = q->rule;
 					f_pos = ipfw_find_rule(chain,
 						f->rulenum, f->id);
 					cmd = ACTION_PTR(f);
 					l = f->cmd_len - f->act_ofs;
 					ipfw_dyn_unlock(q);
 					cmdlen = 0;
 					match = 1;
 					break;
 				}
 				/*
 				 * Dynamic entry not found. If CHECK_STATE,
 				 * skip to next rule, if PROBE_STATE just
 				 * ignore and continue with next opcode.
 				 */
 				if (cmd->opcode == O_CHECK_STATE)
 					l = 0;	/* exit inner loop */
 				match = 1;
 				break;
 
 			case O_ACCEPT:
 				retval = 0;	/* accept */
 				l = 0;		/* exit inner loop */
 				done = 1;	/* exit outer loop */
 				break;
 
 			case O_PIPE:
 			case O_QUEUE:
 				set_match(args, f_pos, chain);
 				args->rule.info = IP_FW_ARG_TABLEARG(cmd->arg1);
 				if (cmd->opcode == O_PIPE)
 					args->rule.info |= IPFW_IS_PIPE;
 				if (V_fw_one_pass)
 					args->rule.info |= IPFW_ONEPASS;
 				retval = IP_FW_DUMMYNET;
 				l = 0;          /* exit inner loop */
 				done = 1;       /* exit outer loop */
 				break;
 
 			case O_DIVERT:
 			case O_TEE:
 				if (args->eh) /* not on layer 2 */
 				    break;
 				/* otherwise this is terminal */
 				l = 0;		/* exit inner loop */
 				done = 1;	/* exit outer loop */
 				retval = (cmd->opcode == O_DIVERT) ?
 					IP_FW_DIVERT : IP_FW_TEE;
 				set_match(args, f_pos, chain);
 				args->rule.info = IP_FW_ARG_TABLEARG(cmd->arg1);
 				break;
 
 			case O_COUNT:
 				IPFW_INC_RULE_COUNTER(f, pktlen);
 				l = 0;		/* exit inner loop */
 				break;
 
 			case O_SKIPTO:
 			    IPFW_INC_RULE_COUNTER(f, pktlen);
 			    f_pos = jump_fast(chain, f, cmd->arg1, tablearg, 0);
 			    /*
 			     * Skip disabled rules, and re-enter
 			     * the inner loop with the correct
 			     * f_pos, f, l and cmd.
 			     * Also clear cmdlen and skip_or
 			     */
 			    for (; f_pos < chain->n_rules - 1 &&
 				    (V_set_disable &
 				     (1 << chain->map[f_pos]->set));
 				    f_pos++)
 				;
 			    /* Re-enter the inner loop at the skipto rule. */
 			    f = chain->map[f_pos];
 			    l = f->cmd_len;
 			    cmd = f->cmd;
 			    match = 1;
 			    cmdlen = 0;
 			    skip_or = 0;
 			    continue;
 			    break;	/* not reached */
 
 			case O_CALLRETURN: {
 				/*
 				 * Implementation of `subroutine' call/return,
 				 * in the stack carried in an mbuf tag. This
 				 * is different from `skipto' in that any call
 				 * address is possible (`skipto' must prevent
 				 * backward jumps to avoid endless loops).
 				 * We have `return' action when F_NOT flag is
 				 * present. The `m_tag_id' field is used as
 				 * stack pointer.
 				 */
 				struct m_tag *mtag;
 				uint16_t jmpto, *stack;
 
 #define	IS_CALL		((cmd->len & F_NOT) == 0)
 #define	IS_RETURN	((cmd->len & F_NOT) != 0)
 				/*
 				 * Hand-rolled version of m_tag_locate() with
 				 * wildcard `type'.
 				 * If not already tagged, allocate new tag.
 				 */
 				mtag = m_tag_first(m);
 				while (mtag != NULL) {
 					if (mtag->m_tag_cookie ==
 					    MTAG_IPFW_CALL)
 						break;
 					mtag = m_tag_next(m, mtag);
 				}
 				if (mtag == NULL && IS_CALL) {
 					mtag = m_tag_alloc(MTAG_IPFW_CALL, 0,
 					    IPFW_CALLSTACK_SIZE *
 					    sizeof(uint16_t), M_NOWAIT);
 					if (mtag != NULL)
 						m_tag_prepend(m, mtag);
 				}
 
 				/*
 				 * On error both `call' and `return' just
 				 * continue with next rule.
 				 */
 				if (IS_RETURN && (mtag == NULL ||
 				    mtag->m_tag_id == 0)) {
 					l = 0;		/* exit inner loop */
 					break;
 				}
 				if (IS_CALL && (mtag == NULL ||
 				    mtag->m_tag_id >= IPFW_CALLSTACK_SIZE)) {
 					printf("ipfw: call stack error, "
 					    "go to next rule\n");
 					l = 0;		/* exit inner loop */
 					break;
 				}
 
 				IPFW_INC_RULE_COUNTER(f, pktlen);
 				stack = (uint16_t *)(mtag + 1);
 
 				/*
 				 * The `call' action may use cached f_pos
 				 * (in f->next_rule), whose version is written
 				 * in f->next_rule.
 				 * The `return' action, however, doesn't have
 				 * fixed jump address in cmd->arg1 and can't use
 				 * cache.
 				 */
 				if (IS_CALL) {
 					stack[mtag->m_tag_id] = f->rulenum;
 					mtag->m_tag_id++;
 			    		f_pos = jump_fast(chain, f, cmd->arg1,
 					    tablearg, 1);
 				} else {	/* `return' action */
 					mtag->m_tag_id--;
 					jmpto = stack[mtag->m_tag_id] + 1;
 					f_pos = ipfw_find_rule(chain, jmpto, 0);
 				}
 
 				/*
 				 * Skip disabled rules, and re-enter
 				 * the inner loop with the correct
 				 * f_pos, f, l and cmd.
 				 * Also clear cmdlen and skip_or
 				 */
 				for (; f_pos < chain->n_rules - 1 &&
 				    (V_set_disable &
 				    (1 << chain->map[f_pos]->set)); f_pos++)
 					;
 				/* Re-enter the inner loop at the dest rule. */
 				f = chain->map[f_pos];
 				l = f->cmd_len;
 				cmd = f->cmd;
 				cmdlen = 0;
 				skip_or = 0;
 				continue;
 				break;	/* NOTREACHED */
 			}
 #undef IS_CALL
 #undef IS_RETURN
 
 			case O_REJECT:
 				/*
 				 * Drop the packet and send a reject notice
 				 * if the packet is not ICMP (or is an ICMP
 				 * query), and it is not multicast/broadcast.
 				 */
 				if (hlen > 0 && is_ipv4 && offset == 0 &&
 				    (proto != IPPROTO_ICMP ||
 				     is_icmp_query(ICMP(ulp))) &&
 				    !(m->m_flags & (M_BCAST|M_MCAST)) &&
 				    !IN_MULTICAST(ntohl(dst_ip.s_addr))) {
 					send_reject(args, cmd->arg1, iplen, ip);
 					m = args->m;
 				}
 				/* FALLTHROUGH */
 #ifdef INET6
 			case O_UNREACH6:
 				if (hlen > 0 && is_ipv6 &&
 				    ((offset & IP6F_OFF_MASK) == 0) &&
 				    (proto != IPPROTO_ICMPV6 ||
 				     (is_icmp6_query(icmp6_type) == 1)) &&
 				    !(m->m_flags & (M_BCAST|M_MCAST)) &&
 				    !IN6_IS_ADDR_MULTICAST(&args->f_id.dst_ip6)) {
 					send_reject6(
 					    args, cmd->arg1, hlen,
 					    (struct ip6_hdr *)ip);
 					m = args->m;
 				}
 				/* FALLTHROUGH */
 #endif
 			case O_DENY:
 				retval = IP_FW_DENY;
 				l = 0;		/* exit inner loop */
 				done = 1;	/* exit outer loop */
 				break;
 
 			case O_FORWARD_IP:
 				if (args->eh)	/* not valid on layer2 pkts */
 					break;
 				if (q == NULL || q->rule != f ||
 				    dyn_dir == MATCH_FORWARD) {
 				    struct sockaddr_in *sa;
 				    sa = &(((ipfw_insn_sa *)cmd)->sa);
 				    if (sa->sin_addr.s_addr == INADDR_ANY) {
 					bcopy(sa, &args->hopstore,
 							sizeof(*sa));
 					args->hopstore.sin_addr.s_addr =
 						    htonl(tablearg);
 					args->next_hop = &args->hopstore;
 				    } else {
 					args->next_hop = sa;
 				    }
 				}
 				retval = IP_FW_PASS;
 				l = 0;          /* exit inner loop */
 				done = 1;       /* exit outer loop */
 				break;
 
 #ifdef INET6
 			case O_FORWARD_IP6:
 				if (args->eh)	/* not valid on layer2 pkts */
 					break;
 				if (q == NULL || q->rule != f ||
 				    dyn_dir == MATCH_FORWARD) {
 					struct sockaddr_in6 *sin6;
 
 					sin6 = &(((ipfw_insn_sa6 *)cmd)->sa);
 					args->next_hop6 = sin6;
 				}
 				retval = IP_FW_PASS;
 				l = 0;		/* exit inner loop */
 				done = 1;	/* exit outer loop */
 				break;
 #endif
 
 			case O_NETGRAPH:
 			case O_NGTEE:
 				set_match(args, f_pos, chain);
 				args->rule.info = IP_FW_ARG_TABLEARG(cmd->arg1);
 				if (V_fw_one_pass)
 					args->rule.info |= IPFW_ONEPASS;
 				retval = (cmd->opcode == O_NETGRAPH) ?
 				    IP_FW_NETGRAPH : IP_FW_NGTEE;
 				l = 0;          /* exit inner loop */
 				done = 1;       /* exit outer loop */
 				break;
 
 			case O_SETFIB: {
 				uint32_t fib;
 
 				IPFW_INC_RULE_COUNTER(f, pktlen);
 				fib = IP_FW_ARG_TABLEARG(cmd->arg1);
 				if (fib >= rt_numfibs)
 					fib = 0;
 				M_SETFIB(m, fib);
 				args->f_id.fib = fib;
 				l = 0;		/* exit inner loop */
 				break;
 		        }
 
 			case O_SETDSCP: {
 				uint16_t code;
 
 				code = IP_FW_ARG_TABLEARG(cmd->arg1) & 0x3F;
 				l = 0;		/* exit inner loop */
 				if (is_ipv4) {
 					uint16_t a;
 
 					a = ip->ip_tos;
 					ip->ip_tos = (code << 2) | (ip->ip_tos & 0x03);
 					a += ntohs(ip->ip_sum) - ip->ip_tos;
 					ip->ip_sum = htons(a);
 				} else if (is_ipv6) {
 					uint8_t *v;
 
 					v = &((struct ip6_hdr *)ip)->ip6_vfc;
 					*v = (*v & 0xF0) | (code >> 2);
 					v++;
 					*v = (*v & 0x3F) | ((code & 0x03) << 6);
 				} else
 					break;
 
 				IPFW_INC_RULE_COUNTER(f, pktlen);
 				break;
 			}
 
 			case O_NAT:
  				if (!IPFW_NAT_LOADED) {
 				    retval = IP_FW_DENY;
 				} else {
 				    struct cfg_nat *t;
 				    int nat_id;
 
 				    set_match(args, f_pos, chain);
 				    /* Check if this is 'global' nat rule */
 				    if (cmd->arg1 == 0) {
 					    retval = ipfw_nat_ptr(args, NULL, m);
 					    l = 0;
 					    done = 1;
 					    break;
 				    }
 				    t = ((ipfw_insn_nat *)cmd)->nat;
 				    if (t == NULL) {
 					nat_id = IP_FW_ARG_TABLEARG(cmd->arg1);
 					t = (*lookup_nat_ptr)(&chain->nat, nat_id);
 
 					if (t == NULL) {
 					    retval = IP_FW_DENY;
 					    l = 0;	/* exit inner loop */
 					    done = 1;	/* exit outer loop */
 					    break;
 					}
 					if (cmd->arg1 != IP_FW_TABLEARG)
 					    ((ipfw_insn_nat *)cmd)->nat = t;
 				    }
 				    retval = ipfw_nat_ptr(args, t, m);
 				}
 				l = 0;          /* exit inner loop */
 				done = 1;       /* exit outer loop */
 				break;
 
 			case O_REASS: {
 				int ip_off;
 
 				IPFW_INC_RULE_COUNTER(f, pktlen);
 				l = 0;	/* in any case exit inner loop */
 				ip_off = ntohs(ip->ip_off);
 
 				/* if not fragmented, go to next rule */
 				if ((ip_off & (IP_MF | IP_OFFMASK)) == 0)
 				    break;
 
 				args->m = m = ip_reass(m);
 
 				/*
 				 * do IP header checksum fixup.
 				 */
 				if (m == NULL) { /* fragment got swallowed */
 				    retval = IP_FW_DENY;
 				} else { /* good, packet complete */
 				    int hlen;
 
 				    ip = mtod(m, struct ip *);
 				    hlen = ip->ip_hl << 2;
 				    ip->ip_sum = 0;
 				    if (hlen == sizeof(struct ip))
 					ip->ip_sum = in_cksum_hdr(ip);
 				    else
 					ip->ip_sum = in_cksum(m, hlen);
 				    retval = IP_FW_REASS;
 				    set_match(args, f_pos, chain);
 				}
 				done = 1;	/* exit outer loop */
 				break;
 			}
 
 			default:
 				panic("-- unknown opcode %d\n", cmd->opcode);
 			} /* end of switch() on opcodes */
 			/*
 			 * if we get here with l=0, then match is irrelevant.
 			 */
 
 			if (cmd->len & F_NOT)
 				match = !match;
 
 			if (match) {
 				if (cmd->len & F_OR)
 					skip_or = 1;
 			} else {
 				if (!(cmd->len & F_OR)) /* not an OR block, */
 					break;		/* try next rule    */
 			}
 
 		}	/* end of inner loop, scan opcodes */
 #undef PULLUP_LEN
 
 		if (done)
 			break;
 
 /* next_rule:; */	/* try next rule		*/
 
 	}		/* end of outer for, scan rules */
 
 	if (done) {
 		struct ip_fw *rule = chain->map[f_pos];
 		/* Update statistics */
 		IPFW_INC_RULE_COUNTER(rule, pktlen);
 	} else {
 		retval = IP_FW_DENY;
 		printf("ipfw: ouch!, skip past end of rules, denying packet\n");
 	}
 	IPFW_PF_RUNLOCK(chain);
 #ifdef __FreeBSD__
 	if (ucred_cache != NULL)
 		crfree(ucred_cache);
 #endif
 	return (retval);
 
 pullup_failed:
 	if (V_fw_verbose)
 		printf("ipfw: pullup failed\n");
 	return (IP_FW_DENY);
 }
 
 /*
  * Set maximum number of tables that can be used in given VNET ipfw instance.
  */
 #ifdef SYSCTL_NODE
 static int
 sysctl_ipfw_table_num(SYSCTL_HANDLER_ARGS)
 {
 	int error;
 	unsigned int ntables;
 
 	ntables = V_fw_tables_max;
 
 	error = sysctl_handle_int(oidp, &ntables, 0, req);
 	/* Read operation or some error */
 	if ((error != 0) || (req->newptr == NULL))
 		return (error);
 
 	return (ipfw_resize_tables(&V_layer3_chain, ntables));
 }
 #endif
 /*
  * Module and VNET glue
  */
 
 /*
  * Stuff that must be initialised only on boot or module load
  */
 static int
 ipfw_init(void)
 {
 	int error = 0;
 
 	/*
  	 * Only print out this stuff the first time around,
 	 * when called from the sysinit code.
 	 */
 	printf("ipfw2 "
 #ifdef INET6
 		"(+ipv6) "
 #endif
 		"initialized, divert %s, nat %s, "
 		"default to %s, logging ",
 #ifdef IPDIVERT
 		"enabled",
 #else
 		"loadable",
 #endif
 #ifdef IPFIREWALL_NAT
 		"enabled",
 #else
 		"loadable",
 #endif
 		default_to_accept ? "accept" : "deny");
 
 	/*
 	 * Note: V_xxx variables can be accessed here but the vnet specific
 	 * initializer may not have been called yet for the VIMAGE case.
 	 * Tuneables will have been processed. We will print out values for
 	 * the default vnet. 
 	 * XXX This should all be rationalized AFTER 8.0
 	 */
 	if (V_fw_verbose == 0)
 		printf("disabled\n");
 	else if (V_verbose_limit == 0)
 		printf("unlimited\n");
 	else
 		printf("limited to %d packets/entry by default\n",
 		    V_verbose_limit);
 
 	/* Check user-supplied table count for validness */
 	if (default_fw_tables > IPFW_TABLES_MAX)
 	  default_fw_tables = IPFW_TABLES_MAX;
 
 	ipfw_log_bpf(1); /* init */
 	return (error);
 }
 
 /*
  * Called for the removal of the last instance only on module unload.
  */
 static void
 ipfw_destroy(void)
 {
 
 	ipfw_log_bpf(0); /* uninit */
 	printf("IP firewall unloaded\n");
 }
 
 /*
  * Stuff that must be initialized for every instance
  * (including the first of course).
  */
 static int
 vnet_ipfw_init(const void *unused)
 {
 	int error;
 	struct ip_fw *rule = NULL;
 	struct ip_fw_chain *chain;
 
 	chain = &V_layer3_chain;
 
 	/* First set up some values that are compile time options */
 	V_autoinc_step = 100;	/* bounded to 1..1000 in add_rule() */
 	V_fw_deny_unknown_exthdrs = 1;
 #ifdef IPFIREWALL_VERBOSE
 	V_fw_verbose = 1;
 #endif
 #ifdef IPFIREWALL_VERBOSE_LIMIT
 	V_verbose_limit = IPFIREWALL_VERBOSE_LIMIT;
 #endif
 #ifdef IPFIREWALL_NAT
 	LIST_INIT(&chain->nat);
 #endif
 
 	/* insert the default rule and create the initial map */
 	chain->n_rules = 1;
 	chain->static_len = sizeof(struct ip_fw);
 	chain->map = malloc(sizeof(struct ip_fw *), M_IPFW, M_WAITOK | M_ZERO);
 	if (chain->map)
 		rule = malloc(chain->static_len, M_IPFW, M_WAITOK | M_ZERO);
 
 	/* Set initial number of tables */
 	V_fw_tables_max = default_fw_tables;
 	error = ipfw_init_tables(chain);
 	if (error) {
 		printf("ipfw2: setting up tables failed\n");
 		free(chain->map, M_IPFW);
 		free(rule, M_IPFW);
 		return (ENOSPC);
 	}
 
 	/* fill and insert the default rule */
 	rule->act_ofs = 0;
 	rule->rulenum = IPFW_DEFAULT_RULE;
 	rule->cmd_len = 1;
 	rule->set = RESVD_SET;
 	rule->cmd[0].len = 1;
 	rule->cmd[0].opcode = default_to_accept ? O_ACCEPT : O_DENY;
 	chain->rules = chain->default_rule = chain->map[0] = rule;
 	chain->id = rule->id = 1;
 
 	IPFW_LOCK_INIT(chain);
 	ipfw_dyn_init(chain);
 
 	/* First set up some values that are compile time options */
 	V_ipfw_vnet_ready = 1;		/* Open for business */
 
 	/*
 	 * Hook the sockopt handler and pfil hooks for ipv4 and ipv6.
 	 * Even if the latter two fail we still keep the module alive
 	 * because the sockopt and layer2 paths are still useful.
 	 * ipfw[6]_hook return 0 on success, ENOENT on failure,
 	 * so we can ignore the exact return value and just set a flag.
 	 *
 	 * Note that V_fw[6]_enable are manipulated by a SYSCTL_PROC so
 	 * changes in the underlying (per-vnet) variables trigger
 	 * immediate hook()/unhook() calls.
 	 * In layer2 we have the same behaviour, except that V_ether_ipfw
 	 * is checked on each packet because there are no pfil hooks.
 	 */
 	V_ip_fw_ctl_ptr = ipfw_ctl;
 	error = ipfw_attach_hooks(1);
 	return (error);
 }
 
 /*
  * Called for the removal of each instance.
  */
 static int
 vnet_ipfw_uninit(const void *unused)
 {
 	struct ip_fw *reap, *rule;
 	struct ip_fw_chain *chain = &V_layer3_chain;
 	int i;
 
 	V_ipfw_vnet_ready = 0; /* tell new callers to go away */
 	/*
 	 * disconnect from ipv4, ipv6, layer2 and sockopt.
 	 * Then grab, release and grab again the WLOCK so we make
 	 * sure the update is propagated and nobody will be in.
 	 */
 	(void)ipfw_attach_hooks(0 /* detach */);
 	V_ip_fw_ctl_ptr = NULL;
 	IPFW_UH_WLOCK(chain);
 	IPFW_UH_WUNLOCK(chain);
 	IPFW_UH_WLOCK(chain);
 
 	IPFW_WLOCK(chain);
 	ipfw_dyn_uninit(0);	/* run the callout_drain */
 	IPFW_WUNLOCK(chain);
 
 	ipfw_destroy_tables(chain);
 	reap = NULL;
 	IPFW_WLOCK(chain);
 	for (i = 0; i < chain->n_rules; i++) {
 		rule = chain->map[i];
 		rule->x_next = reap;
 		reap = rule;
 	}
 	if (chain->map)
 		free(chain->map, M_IPFW);
 	IPFW_WUNLOCK(chain);
 	IPFW_UH_WUNLOCK(chain);
 	if (reap != NULL)
 		ipfw_reap_rules(reap);
 	IPFW_LOCK_DESTROY(chain);
 	ipfw_dyn_uninit(1);	/* free the remaining parts */
 	return 0;
 }
 
 /*
  * Module event handler.
  * In general we have the choice of handling most of these events by the
  * event handler or by the (VNET_)SYS(UN)INIT handlers. I have chosen to
  * use the SYSINIT handlers as they are more capable of expressing the
  * flow of control during module and vnet operations, so this is just
  * a skeleton. Note there is no SYSINIT equivalent of the module
  * SHUTDOWN handler, but we don't have anything to do in that case anyhow.
  */
 static int
 ipfw_modevent(module_t mod, int type, void *unused)
 {
 	int err = 0;
 
 	switch (type) {
 	case MOD_LOAD:
 		/* Called once at module load or
 	 	 * system boot if compiled in. */
 		break;
 	case MOD_QUIESCE:
 		/* Called before unload. May veto unloading. */
 		break;
 	case MOD_UNLOAD:
 		/* Called during unload. */
 		break;
 	case MOD_SHUTDOWN:
 		/* Called during system shutdown. */
 		break;
 	default:
 		err = EOPNOTSUPP;
 		break;
 	}
 	return err;
 }
 
 static moduledata_t ipfwmod = {
 	"ipfw",
 	ipfw_modevent,
 	0
 };
 
 /* Define startup order. */
 #define	IPFW_SI_SUB_FIREWALL	SI_SUB_PROTO_IFATTACHDOMAIN
 #define	IPFW_MODEVENT_ORDER	(SI_ORDER_ANY - 255) /* On boot slot in here. */
 #define	IPFW_MODULE_ORDER	(IPFW_MODEVENT_ORDER + 1) /* A little later. */
 #define	IPFW_VNET_ORDER		(IPFW_MODEVENT_ORDER + 2) /* Later still. */
 
 DECLARE_MODULE(ipfw, ipfwmod, IPFW_SI_SUB_FIREWALL, IPFW_MODEVENT_ORDER);
 MODULE_VERSION(ipfw, 2);
 /* should declare some dependencies here */
 
 /*
  * Starting up. Done in order after ipfwmod() has been called.
  * VNET_SYSINIT is also called for each existing vnet and each new vnet.
  */
 SYSINIT(ipfw_init, IPFW_SI_SUB_FIREWALL, IPFW_MODULE_ORDER,
 	    ipfw_init, NULL);
 VNET_SYSINIT(vnet_ipfw_init, IPFW_SI_SUB_FIREWALL, IPFW_VNET_ORDER,
 	    vnet_ipfw_init, NULL);
  
 /*
  * Closing up shop. These are done in REVERSE ORDER, but still
  * after ipfwmod() has been called. Not called on reboot.
  * VNET_SYSUNINIT is also called for each exiting vnet as it exits.
  * or when the module is unloaded.
  */
 SYSUNINIT(ipfw_destroy, IPFW_SI_SUB_FIREWALL, IPFW_MODULE_ORDER,
 	    ipfw_destroy, NULL);
 VNET_SYSUNINIT(vnet_ipfw_uninit, IPFW_SI_SUB_FIREWALL, IPFW_VNET_ORDER,
 	    vnet_ipfw_uninit, NULL);
 /* end of file */
Index: stable/10/sys/netpfil/pf/pf.c
===================================================================
--- stable/10/sys/netpfil/pf/pf.c	(revision 263085)
+++ stable/10/sys/netpfil/pf/pf.c	(revision 263086)
@@ -1,6392 +1,6391 @@
 /*-
  * Copyright (c) 2001 Daniel Hartmeier
  * Copyright (c) 2002 - 2008 Henning Brauer
  * Copyright (c) 2012 Gleb Smirnoff <glebius@FreeBSD.org>
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  *
  *    - Redistributions of source code must retain the above copyright
  *      notice, this list of conditions and the following disclaimer.
  *    - Redistributions in binary form must reproduce the above
  *      copyright notice, this list of conditions and the following
  *      disclaimer in the documentation and/or other materials provided
  *      with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
  * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
  * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
  * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
  * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
  * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
  * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
  * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
  * POSSIBILITY OF SUCH DAMAGE.
  *
  * Effort sponsored in part by the Defense Advanced Research Projects
  * Agency (DARPA) and Air Force Research Laboratory, Air Force
  * Materiel Command, USAF, under agreement number F30602-01-2-0537.
  *
  *	$OpenBSD: pf.c,v 1.634 2009/02/27 12:37:45 henning Exp $
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include "opt_inet.h"
 #include "opt_inet6.h"
 #include "opt_bpf.h"
 #include "opt_pf.h"
 
 #include <sys/param.h>
 #include <sys/bus.h>
 #include <sys/endian.h>
 #include <sys/hash.h>
 #include <sys/interrupt.h>
 #include <sys/kernel.h>
 #include <sys/kthread.h>
 #include <sys/limits.h>
 #include <sys/mbuf.h>
 #include <sys/md5.h>
 #include <sys/random.h>
 #include <sys/refcount.h>
 #include <sys/socket.h>
 #include <sys/sysctl.h>
 #include <sys/taskqueue.h>
 #include <sys/ucred.h>
 
 #include <net/if.h>
 #include <net/if_types.h>
 #include <net/route.h>
 #include <net/radix_mpath.h>
 #include <net/vnet.h>
 
 #include <net/pfvar.h>
-#include <net/pf_mtag.h>
 #include <net/if_pflog.h>
 #include <net/if_pfsync.h>
 
 #include <netinet/in_pcb.h>
 #include <netinet/in_var.h>
 #include <netinet/ip.h>
 #include <netinet/ip_fw.h>
 #include <netinet/ip_icmp.h>
 #include <netinet/icmp_var.h>
 #include <netinet/ip_var.h>
 #include <netinet/tcp.h>
 #include <netinet/tcp_fsm.h>
 #include <netinet/tcp_seq.h>
 #include <netinet/tcp_timer.h>
 #include <netinet/tcp_var.h>
 #include <netinet/udp.h>
 #include <netinet/udp_var.h>
 
 #include <netpfil/ipfw/ip_fw_private.h> /* XXX: only for DIR_IN/DIR_OUT */
 
 #ifdef INET6
 #include <netinet/ip6.h>
 #include <netinet/icmp6.h>
 #include <netinet6/nd6.h>
 #include <netinet6/ip6_var.h>
 #include <netinet6/in6_pcb.h>
 #endif /* INET6 */
 
 #include <machine/in_cksum.h>
 #include <security/mac/mac_framework.h>
 
 #define	DPFPRINTF(n, x)	if (V_pf_status.debug >= (n)) printf x
 
 /*
  * Global variables
  */
 
 /* state tables */
 VNET_DEFINE(struct pf_altqqueue,	 pf_altqs[2]);
 VNET_DEFINE(struct pf_palist,		 pf_pabuf);
 VNET_DEFINE(struct pf_altqqueue *,	 pf_altqs_active);
 VNET_DEFINE(struct pf_altqqueue *,	 pf_altqs_inactive);
 VNET_DEFINE(struct pf_status,		 pf_status);
 
 VNET_DEFINE(u_int32_t,			 ticket_altqs_active);
 VNET_DEFINE(u_int32_t,			 ticket_altqs_inactive);
 VNET_DEFINE(int,			 altqs_inactive_open);
 VNET_DEFINE(u_int32_t,			 ticket_pabuf);
 
 VNET_DEFINE(MD5_CTX,			 pf_tcp_secret_ctx);
 #define	V_pf_tcp_secret_ctx		 VNET(pf_tcp_secret_ctx)
 VNET_DEFINE(u_char,			 pf_tcp_secret[16]);
 #define	V_pf_tcp_secret			 VNET(pf_tcp_secret)
 VNET_DEFINE(int,			 pf_tcp_secret_init);
 #define	V_pf_tcp_secret_init		 VNET(pf_tcp_secret_init)
 VNET_DEFINE(int,			 pf_tcp_iss_off);
 #define	V_pf_tcp_iss_off		 VNET(pf_tcp_iss_off)
 
 /*
  * Queue for pf_intr() sends.
  */
 static MALLOC_DEFINE(M_PFTEMP, "pf_temp", "pf(4) temporary allocations");
 struct pf_send_entry {
 	STAILQ_ENTRY(pf_send_entry)	pfse_next;
 	struct mbuf			*pfse_m;
 	enum {
 		PFSE_IP,
 		PFSE_IP6,
 		PFSE_ICMP,
 		PFSE_ICMP6,
 	}				pfse_type;
 	union {
 		struct route		ro;
 		struct {
 			int		type;
 			int		code;
 			int		mtu;
 		} icmpopts;
 	} u;
 #define	pfse_ro		u.ro
 #define	pfse_icmp_type	u.icmpopts.type
 #define	pfse_icmp_code	u.icmpopts.code
 #define	pfse_icmp_mtu	u.icmpopts.mtu
 };
 
 STAILQ_HEAD(pf_send_head, pf_send_entry);
 static VNET_DEFINE(struct pf_send_head, pf_sendqueue);
 #define	V_pf_sendqueue	VNET(pf_sendqueue)
 
 static struct mtx pf_sendqueue_mtx;
 #define	PF_SENDQ_LOCK()		mtx_lock(&pf_sendqueue_mtx)
 #define	PF_SENDQ_UNLOCK()	mtx_unlock(&pf_sendqueue_mtx)
 
 /*
  * Queue for pf_overload_task() tasks.
  */
 struct pf_overload_entry {
 	SLIST_ENTRY(pf_overload_entry)	next;
 	struct pf_addr  		addr;
 	sa_family_t			af;
 	uint8_t				dir;
 	struct pf_rule  		*rule;
 };
 
 SLIST_HEAD(pf_overload_head, pf_overload_entry);
 static VNET_DEFINE(struct pf_overload_head, pf_overloadqueue);
 #define V_pf_overloadqueue	VNET(pf_overloadqueue)
 static VNET_DEFINE(struct task, pf_overloadtask);
 #define	V_pf_overloadtask	VNET(pf_overloadtask)
 
 static struct mtx pf_overloadqueue_mtx;
 #define	PF_OVERLOADQ_LOCK()	mtx_lock(&pf_overloadqueue_mtx)
 #define	PF_OVERLOADQ_UNLOCK()	mtx_unlock(&pf_overloadqueue_mtx)
 
 VNET_DEFINE(struct pf_rulequeue, pf_unlinked_rules);
 struct mtx pf_unlnkdrules_mtx;
 
 static VNET_DEFINE(uma_zone_t,	pf_sources_z);
 #define	V_pf_sources_z	VNET(pf_sources_z)
 static VNET_DEFINE(uma_zone_t,	pf_mtag_z);
 #define	V_pf_mtag_z	VNET(pf_mtag_z)
 VNET_DEFINE(uma_zone_t,	 pf_state_z);
 VNET_DEFINE(uma_zone_t,	 pf_state_key_z);
 
 VNET_DEFINE(uint64_t, pf_stateid[MAXCPU]);
 #define	PFID_CPUBITS	8
 #define	PFID_CPUSHIFT	(sizeof(uint64_t) * NBBY - PFID_CPUBITS)
 #define	PFID_CPUMASK	((uint64_t)((1 << PFID_CPUBITS) - 1) <<	PFID_CPUSHIFT)
 #define	PFID_MAXID	(~PFID_CPUMASK)
 CTASSERT((1 << PFID_CPUBITS) > MAXCPU);
 
 static void		 pf_src_tree_remove_state(struct pf_state *);
 static void		 pf_init_threshold(struct pf_threshold *, u_int32_t,
 			    u_int32_t);
 static void		 pf_add_threshold(struct pf_threshold *);
 static int		 pf_check_threshold(struct pf_threshold *);
 
 static void		 pf_change_ap(struct pf_addr *, u_int16_t *,
 			    u_int16_t *, u_int16_t *, struct pf_addr *,
 			    u_int16_t, u_int8_t, sa_family_t);
 static int		 pf_modulate_sack(struct mbuf *, int, struct pf_pdesc *,
 			    struct tcphdr *, struct pf_state_peer *);
 static void		 pf_change_icmp(struct pf_addr *, u_int16_t *,
 			    struct pf_addr *, struct pf_addr *, u_int16_t,
 			    u_int16_t *, u_int16_t *, u_int16_t *,
 			    u_int16_t *, u_int8_t, sa_family_t);
 static void		 pf_send_tcp(struct mbuf *,
 			    const struct pf_rule *, sa_family_t,
 			    const struct pf_addr *, const struct pf_addr *,
 			    u_int16_t, u_int16_t, u_int32_t, u_int32_t,
 			    u_int8_t, u_int16_t, u_int16_t, u_int8_t, int,
 			    u_int16_t, struct ifnet *);
 static void		 pf_send_icmp(struct mbuf *, u_int8_t, u_int8_t,
 			    sa_family_t, struct pf_rule *);
 static void		 pf_detach_state(struct pf_state *);
 static int		 pf_state_key_attach(struct pf_state_key *,
 			    struct pf_state_key *, struct pf_state *);
 static void		 pf_state_key_detach(struct pf_state *, int);
 static int		 pf_state_key_ctor(void *, int, void *, int);
 static u_int32_t	 pf_tcp_iss(struct pf_pdesc *);
 static int		 pf_test_rule(struct pf_rule **, struct pf_state **,
 			    int, struct pfi_kif *, struct mbuf *, int,
 			    struct pf_pdesc *, struct pf_rule **,
 			    struct pf_ruleset **, struct inpcb *);
 static int		 pf_create_state(struct pf_rule *, struct pf_rule *,
 			    struct pf_rule *, struct pf_pdesc *,
 			    struct pf_src_node *, struct pf_state_key *,
 			    struct pf_state_key *, struct mbuf *, int,
 			    u_int16_t, u_int16_t, int *, struct pfi_kif *,
 			    struct pf_state **, int, u_int16_t, u_int16_t,
 			    int);
 static int		 pf_test_fragment(struct pf_rule **, int,
 			    struct pfi_kif *, struct mbuf *, void *,
 			    struct pf_pdesc *, struct pf_rule **,
 			    struct pf_ruleset **);
 static int		 pf_tcp_track_full(struct pf_state_peer *,
 			    struct pf_state_peer *, struct pf_state **,
 			    struct pfi_kif *, struct mbuf *, int,
 			    struct pf_pdesc *, u_short *, int *);
 static int		 pf_tcp_track_sloppy(struct pf_state_peer *,
 			    struct pf_state_peer *, struct pf_state **,
 			    struct pf_pdesc *, u_short *);
 static int		 pf_test_state_tcp(struct pf_state **, int,
 			    struct pfi_kif *, struct mbuf *, int,
 			    void *, struct pf_pdesc *, u_short *);
 static int		 pf_test_state_udp(struct pf_state **, int,
 			    struct pfi_kif *, struct mbuf *, int,
 			    void *, struct pf_pdesc *);
 static int		 pf_test_state_icmp(struct pf_state **, int,
 			    struct pfi_kif *, struct mbuf *, int,
 			    void *, struct pf_pdesc *, u_short *);
 static int		 pf_test_state_other(struct pf_state **, int,
 			    struct pfi_kif *, struct mbuf *, struct pf_pdesc *);
 static u_int8_t		 pf_get_wscale(struct mbuf *, int, u_int16_t,
 			    sa_family_t);
 static u_int16_t	 pf_get_mss(struct mbuf *, int, u_int16_t,
 			    sa_family_t);
 static u_int16_t	 pf_calc_mss(struct pf_addr *, sa_family_t,
 				int, u_int16_t);
 static void		 pf_set_rt_ifp(struct pf_state *,
 			    struct pf_addr *);
 static int		 pf_check_proto_cksum(struct mbuf *, int, int,
 			    u_int8_t, sa_family_t);
 static void		 pf_print_state_parts(struct pf_state *,
 			    struct pf_state_key *, struct pf_state_key *);
 static int		 pf_addr_wrap_neq(struct pf_addr_wrap *,
 			    struct pf_addr_wrap *);
 static struct pf_state	*pf_find_state(struct pfi_kif *,
 			    struct pf_state_key_cmp *, u_int);
 static int		 pf_src_connlimit(struct pf_state **);
 static void		 pf_overload_task(void *c, int pending);
 static int		 pf_insert_src_node(struct pf_src_node **,
 			    struct pf_rule *, struct pf_addr *, sa_family_t);
 static u_int		 pf_purge_expired_states(u_int, int);
 static void		 pf_purge_unlinked_rules(void);
 static int		 pf_mtag_init(void *, int, int);
 static void		 pf_mtag_free(struct m_tag *);
 #ifdef INET
 static void		 pf_route(struct mbuf **, struct pf_rule *, int,
 			    struct ifnet *, struct pf_state *,
 			    struct pf_pdesc *);
 #endif /* INET */
 #ifdef INET6
 static void		 pf_change_a6(struct pf_addr *, u_int16_t *,
 			    struct pf_addr *, u_int8_t);
 static void		 pf_route6(struct mbuf **, struct pf_rule *, int,
 			    struct ifnet *, struct pf_state *,
 			    struct pf_pdesc *);
 #endif /* INET6 */
 
 int in4_cksum(struct mbuf *m, u_int8_t nxt, int off, int len);
 
 VNET_DECLARE(int, pf_end_threads);
 
 VNET_DEFINE(struct pf_limit, pf_limits[PF_LIMIT_MAX]);
 
 #define	PACKET_LOOPED(pd)	((pd)->pf_mtag &&			\
 				 (pd)->pf_mtag->flags & PF_PACKET_LOOPED)
 
 #define	STATE_LOOKUP(i, k, d, s, pd)					\
 	do {								\
 		(s) = pf_find_state((i), (k), (d));			\
 		if ((s) == NULL)					\
 			return (PF_DROP);				\
 		if (PACKET_LOOPED(pd))					\
 			return (PF_PASS);				\
 		if ((d) == PF_OUT &&					\
 		    (((s)->rule.ptr->rt == PF_ROUTETO &&		\
 		    (s)->rule.ptr->direction == PF_OUT) ||		\
 		    ((s)->rule.ptr->rt == PF_REPLYTO &&			\
 		    (s)->rule.ptr->direction == PF_IN)) &&		\
 		    (s)->rt_kif != NULL &&				\
 		    (s)->rt_kif != (i))					\
 			return (PF_PASS);				\
 	} while (0)
 
 #define	BOUND_IFACE(r, k) \
 	((r)->rule_flag & PFRULE_IFBOUND) ? (k) : V_pfi_all
 
 #define	STATE_INC_COUNTERS(s)						\
 	do {								\
 		counter_u64_add(s->rule.ptr->states_cur, 1);		\
 		counter_u64_add(s->rule.ptr->states_tot, 1);		\
 		if (s->anchor.ptr != NULL) {				\
 			counter_u64_add(s->anchor.ptr->states_cur, 1);	\
 			counter_u64_add(s->anchor.ptr->states_tot, 1);	\
 		}							\
 		if (s->nat_rule.ptr != NULL) {				\
 			counter_u64_add(s->nat_rule.ptr->states_cur, 1);\
 			counter_u64_add(s->nat_rule.ptr->states_tot, 1);\
 		}							\
 	} while (0)
 
 #define	STATE_DEC_COUNTERS(s)						\
 	do {								\
 		if (s->nat_rule.ptr != NULL)				\
 			counter_u64_add(s->nat_rule.ptr->states_cur, -1);\
 		if (s->anchor.ptr != NULL)				\
 			counter_u64_add(s->anchor.ptr->states_cur, -1);	\
 		counter_u64_add(s->rule.ptr->states_cur, -1);		\
 	} while (0)
 
 static MALLOC_DEFINE(M_PFHASH, "pf_hash", "pf(4) hash header structures");
 VNET_DEFINE(struct pf_keyhash *, pf_keyhash);
 VNET_DEFINE(struct pf_idhash *, pf_idhash);
 VNET_DEFINE(u_long, pf_hashmask);
 VNET_DEFINE(struct pf_srchash *, pf_srchash);
 VNET_DEFINE(u_long, pf_srchashmask);
 
 SYSCTL_NODE(_net, OID_AUTO, pf, CTLFLAG_RW, 0, "pf(4)");
 
 VNET_DEFINE(u_long, pf_hashsize);
 #define	V_pf_hashsize	VNET(pf_hashsize)
 SYSCTL_VNET_UINT(_net_pf, OID_AUTO, states_hashsize, CTLFLAG_RDTUN,
     &VNET_NAME(pf_hashsize), 0, "Size of pf(4) states hashtable");
 
 VNET_DEFINE(u_long, pf_srchashsize);
 #define	V_pf_srchashsize	VNET(pf_srchashsize)
 SYSCTL_VNET_UINT(_net_pf, OID_AUTO, source_nodes_hashsize, CTLFLAG_RDTUN,
     &VNET_NAME(pf_srchashsize), 0, "Size of pf(4) source nodes hashtable");
 
 VNET_DEFINE(void *, pf_swi_cookie);
 
 VNET_DEFINE(uint32_t, pf_hashseed);
 #define	V_pf_hashseed	VNET(pf_hashseed)
 
 static __inline uint32_t
 pf_hashkey(struct pf_state_key *sk)
 {
 	uint32_t h;
 
 	h = jenkins_hash32((uint32_t *)sk,
 	    sizeof(struct pf_state_key_cmp)/sizeof(uint32_t),
 	    V_pf_hashseed);
 
 	return (h & V_pf_hashmask);
 }
 
 static __inline uint32_t
 pf_hashsrc(struct pf_addr *addr, sa_family_t af)
 {
 	uint32_t h;
 
 	switch (af) {
 	case AF_INET:
 		h = jenkins_hash32((uint32_t *)&addr->v4,
 		    sizeof(addr->v4)/sizeof(uint32_t), V_pf_hashseed);
 		break;
 	case AF_INET6:
 		h = jenkins_hash32((uint32_t *)&addr->v6,
 		    sizeof(addr->v6)/sizeof(uint32_t), V_pf_hashseed);
 		break;
 	default:
 		panic("%s: unknown address family %u", __func__, af);
 	}
 
 	return (h & V_pf_srchashmask);
 }
 
 #ifdef INET6
 void
 pf_addrcpy(struct pf_addr *dst, struct pf_addr *src, sa_family_t af)
 {
 	switch (af) {
 #ifdef INET
 	case AF_INET:
 		dst->addr32[0] = src->addr32[0];
 		break;
 #endif /* INET */
 	case AF_INET6:
 		dst->addr32[0] = src->addr32[0];
 		dst->addr32[1] = src->addr32[1];
 		dst->addr32[2] = src->addr32[2];
 		dst->addr32[3] = src->addr32[3];
 		break;
 	}
 }
 #endif /* INET6 */
 
 static void
 pf_init_threshold(struct pf_threshold *threshold,
     u_int32_t limit, u_int32_t seconds)
 {
 	threshold->limit = limit * PF_THRESHOLD_MULT;
 	threshold->seconds = seconds;
 	threshold->count = 0;
 	threshold->last = time_uptime;
 }
 
 static void
 pf_add_threshold(struct pf_threshold *threshold)
 {
 	u_int32_t t = time_uptime, diff = t - threshold->last;
 
 	if (diff >= threshold->seconds)
 		threshold->count = 0;
 	else
 		threshold->count -= threshold->count * diff /
 		    threshold->seconds;
 	threshold->count += PF_THRESHOLD_MULT;
 	threshold->last = t;
 }
 
 static int
 pf_check_threshold(struct pf_threshold *threshold)
 {
 	return (threshold->count > threshold->limit);
 }
 
 static int
 pf_src_connlimit(struct pf_state **state)
 {
 	struct pf_overload_entry *pfoe;
 	int bad = 0;
 
 	PF_STATE_LOCK_ASSERT(*state);
 
 	(*state)->src_node->conn++;
 	(*state)->src.tcp_est = 1;
 	pf_add_threshold(&(*state)->src_node->conn_rate);
 
 	if ((*state)->rule.ptr->max_src_conn &&
 	    (*state)->rule.ptr->max_src_conn <
 	    (*state)->src_node->conn) {
 		V_pf_status.lcounters[LCNT_SRCCONN]++;
 		bad++;
 	}
 
 	if ((*state)->rule.ptr->max_src_conn_rate.limit &&
 	    pf_check_threshold(&(*state)->src_node->conn_rate)) {
 		V_pf_status.lcounters[LCNT_SRCCONNRATE]++;
 		bad++;
 	}
 
 	if (!bad)
 		return (0);
 
 	/* Kill this state. */
 	(*state)->timeout = PFTM_PURGE;
 	(*state)->src.state = (*state)->dst.state = TCPS_CLOSED;
 
 	if ((*state)->rule.ptr->overload_tbl == NULL)
 		return (1);
 
 	/* Schedule overloading and flushing task. */
 	pfoe = malloc(sizeof(*pfoe), M_PFTEMP, M_NOWAIT);
 	if (pfoe == NULL)
 		return (1);	/* too bad :( */
 
 	bcopy(&(*state)->src_node->addr, &pfoe->addr, sizeof(pfoe->addr));
 	pfoe->af = (*state)->key[PF_SK_WIRE]->af;
 	pfoe->rule = (*state)->rule.ptr;
 	pfoe->dir = (*state)->direction;
 	PF_OVERLOADQ_LOCK();
 	SLIST_INSERT_HEAD(&V_pf_overloadqueue, pfoe, next);
 	PF_OVERLOADQ_UNLOCK();
 	taskqueue_enqueue(taskqueue_swi, &V_pf_overloadtask);
 
 	return (1);
 }
 
 static void
 pf_overload_task(void *c, int pending)
 {
 	struct pf_overload_head queue;
 	struct pfr_addr p;
 	struct pf_overload_entry *pfoe, *pfoe1;
 	uint32_t killed = 0;
 
 	PF_OVERLOADQ_LOCK();
 	queue = *(struct pf_overload_head *)c;
 	SLIST_INIT((struct pf_overload_head *)c);
 	PF_OVERLOADQ_UNLOCK();
 
 	bzero(&p, sizeof(p));
 	SLIST_FOREACH(pfoe, &queue, next) {
 		V_pf_status.lcounters[LCNT_OVERLOAD_TABLE]++;
 		if (V_pf_status.debug >= PF_DEBUG_MISC) {
 			printf("%s: blocking address ", __func__);
 			pf_print_host(&pfoe->addr, 0, pfoe->af);
 			printf("\n");
 		}
 
 		p.pfra_af = pfoe->af;
 		switch (pfoe->af) {
 #ifdef INET
 		case AF_INET:
 			p.pfra_net = 32;
 			p.pfra_ip4addr = pfoe->addr.v4;
 			break;
 #endif
 #ifdef INET6
 		case AF_INET6:
 			p.pfra_net = 128;
 			p.pfra_ip6addr = pfoe->addr.v6;
 			break;
 #endif
 		}
 
 		PF_RULES_WLOCK();
 		pfr_insert_kentry(pfoe->rule->overload_tbl, &p, time_second);
 		PF_RULES_WUNLOCK();
 	}
 
 	/*
 	 * Remove those entries, that don't need flushing.
 	 */
 	SLIST_FOREACH_SAFE(pfoe, &queue, next, pfoe1)
 		if (pfoe->rule->flush == 0) {
 			SLIST_REMOVE(&queue, pfoe, pf_overload_entry, next);
 			free(pfoe, M_PFTEMP);
 		} else
 			V_pf_status.lcounters[LCNT_OVERLOAD_FLUSH]++;
 
 	/* If nothing to flush, return. */
 	if (SLIST_EMPTY(&queue))
 		return;
 
 	for (int i = 0; i <= V_pf_hashmask; i++) {
 		struct pf_idhash *ih = &V_pf_idhash[i];
 		struct pf_state_key *sk;
 		struct pf_state *s;
 
 		PF_HASHROW_LOCK(ih);
 		LIST_FOREACH(s, &ih->states, entry) {
 		    sk = s->key[PF_SK_WIRE];
 		    SLIST_FOREACH(pfoe, &queue, next)
 			if (sk->af == pfoe->af &&
 			    ((pfoe->rule->flush & PF_FLUSH_GLOBAL) ||
 			    pfoe->rule == s->rule.ptr) &&
 			    ((pfoe->dir == PF_OUT &&
 			    PF_AEQ(&pfoe->addr, &sk->addr[1], sk->af)) ||
 			    (pfoe->dir == PF_IN &&
 			    PF_AEQ(&pfoe->addr, &sk->addr[0], sk->af)))) {
 				s->timeout = PFTM_PURGE;
 				s->src.state = s->dst.state = TCPS_CLOSED;
 				killed++;
 			}
 		}
 		PF_HASHROW_UNLOCK(ih);
 	}
 	SLIST_FOREACH_SAFE(pfoe, &queue, next, pfoe1)
 		free(pfoe, M_PFTEMP);
 	if (V_pf_status.debug >= PF_DEBUG_MISC)
 		printf("%s: %u states killed", __func__, killed);
 }
 
 /*
  * Can return locked on failure, so that we can consistently
  * allocate and insert a new one.
  */
 struct pf_src_node *
 pf_find_src_node(struct pf_addr *src, struct pf_rule *rule, sa_family_t af,
 	int returnlocked)
 {
 	struct pf_srchash *sh;
 	struct pf_src_node *n;
 
 	V_pf_status.scounters[SCNT_SRC_NODE_SEARCH]++;
 
 	sh = &V_pf_srchash[pf_hashsrc(src, af)];
 	PF_HASHROW_LOCK(sh);
 	LIST_FOREACH(n, &sh->nodes, entry)
 		if (n->rule.ptr == rule && n->af == af &&
 		    ((af == AF_INET && n->addr.v4.s_addr == src->v4.s_addr) ||
 		    (af == AF_INET6 && bcmp(&n->addr, src, sizeof(*src)) == 0)))
 			break;
 	if (n != NULL || returnlocked == 0)
 		PF_HASHROW_UNLOCK(sh);
 
 	return (n);
 }
 
 static int
 pf_insert_src_node(struct pf_src_node **sn, struct pf_rule *rule,
     struct pf_addr *src, sa_family_t af)
 {
 
 	KASSERT((rule->rule_flag & PFRULE_RULESRCTRACK ||
 	    rule->rpool.opts & PF_POOL_STICKYADDR),
 	    ("%s for non-tracking rule %p", __func__, rule));
 
 	if (*sn == NULL)
 		*sn = pf_find_src_node(src, rule, af, 1);
 
 	if (*sn == NULL) {
 		struct pf_srchash *sh = &V_pf_srchash[pf_hashsrc(src, af)];
 
 		PF_HASHROW_ASSERT(sh);
 
 		if (!rule->max_src_nodes ||
 		    counter_u64_fetch(rule->src_nodes) < rule->max_src_nodes)
 			(*sn) = uma_zalloc(V_pf_sources_z, M_NOWAIT | M_ZERO);
 		else
 			V_pf_status.lcounters[LCNT_SRCNODES]++;
 		if ((*sn) == NULL) {
 			PF_HASHROW_UNLOCK(sh);
 			return (-1);
 		}
 
 		pf_init_threshold(&(*sn)->conn_rate,
 		    rule->max_src_conn_rate.limit,
 		    rule->max_src_conn_rate.seconds);
 
 		(*sn)->af = af;
 		(*sn)->rule.ptr = rule;
 		PF_ACPY(&(*sn)->addr, src, af);
 		LIST_INSERT_HEAD(&sh->nodes, *sn, entry);
 		(*sn)->creation = time_uptime;
 		(*sn)->ruletype = rule->action;
 		if ((*sn)->rule.ptr != NULL)
 			counter_u64_add((*sn)->rule.ptr->src_nodes, 1);
 		PF_HASHROW_UNLOCK(sh);
 		V_pf_status.scounters[SCNT_SRC_NODE_INSERT]++;
 		V_pf_status.src_nodes++;
 	} else {
 		if (rule->max_src_states &&
 		    (*sn)->states >= rule->max_src_states) {
 			V_pf_status.lcounters[LCNT_SRCSTATES]++;
 			return (-1);
 		}
 	}
 	return (0);
 }
 
 void
 pf_unlink_src_node_locked(struct pf_src_node *src)
 {
 #ifdef INVARIANTS
 	struct pf_srchash *sh;
 
 	sh = &V_pf_srchash[pf_hashsrc(&src->addr, src->af)];
 	PF_HASHROW_ASSERT(sh);
 #endif
 	LIST_REMOVE(src, entry);
 	if (src->rule.ptr)
 		counter_u64_add(src->rule.ptr->src_nodes, -1);
 	V_pf_status.scounters[SCNT_SRC_NODE_REMOVALS]++;
 	V_pf_status.src_nodes--;
 }
 
 void
 pf_unlink_src_node(struct pf_src_node *src)
 {
 	struct pf_srchash *sh;
 
 	sh = &V_pf_srchash[pf_hashsrc(&src->addr, src->af)];
 	PF_HASHROW_LOCK(sh);
 	pf_unlink_src_node_locked(src);
 	PF_HASHROW_UNLOCK(sh);
 }
 
 static void
 pf_free_src_node(struct pf_src_node *sn)
 {
 
 	KASSERT(sn->states == 0, ("%s: %p has refs", __func__, sn));
 	uma_zfree(V_pf_sources_z, sn);
 }
 
 u_int
 pf_free_src_nodes(struct pf_src_node_list *head)
 {
 	struct pf_src_node *sn, *tmp;
 	u_int count = 0;
 
 	LIST_FOREACH_SAFE(sn, head, entry, tmp) {
 		pf_free_src_node(sn);
 		count++;
 	}
 
 	return (count);
 }
 
 /* Data storage structures initialization. */
 void
 pf_initialize()
 {
 	struct pf_keyhash	*kh;
 	struct pf_idhash	*ih;
 	struct pf_srchash	*sh;
 	u_int i;
 
 	TUNABLE_ULONG_FETCH("net.pf.states_hashsize", &V_pf_hashsize);
 	if (V_pf_hashsize == 0 || !powerof2(V_pf_hashsize))
 		V_pf_hashsize = PF_HASHSIZ;
 	TUNABLE_ULONG_FETCH("net.pf.source_nodes_hashsize", &V_pf_srchashsize);
 	if (V_pf_srchashsize == 0 || !powerof2(V_pf_srchashsize))
 		V_pf_srchashsize = PF_HASHSIZ / 4;
 
 	V_pf_hashseed = arc4random();
 
 	/* States and state keys storage. */
 	V_pf_state_z = uma_zcreate("pf states", sizeof(struct pf_state),
 	    NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, 0);
 	V_pf_limits[PF_LIMIT_STATES].zone = V_pf_state_z;
 	uma_zone_set_max(V_pf_state_z, PFSTATE_HIWAT);
 	uma_zone_set_warning(V_pf_state_z, "PF states limit reached");
 
 	V_pf_state_key_z = uma_zcreate("pf state keys",
 	    sizeof(struct pf_state_key), pf_state_key_ctor, NULL, NULL, NULL,
 	    UMA_ALIGN_PTR, 0);
 	V_pf_keyhash = malloc(V_pf_hashsize * sizeof(struct pf_keyhash),
 	    M_PFHASH, M_WAITOK | M_ZERO);
 	V_pf_idhash = malloc(V_pf_hashsize * sizeof(struct pf_idhash),
 	    M_PFHASH, M_WAITOK | M_ZERO);
 	V_pf_hashmask = V_pf_hashsize - 1;
 	for (i = 0, kh = V_pf_keyhash, ih = V_pf_idhash; i <= V_pf_hashmask;
 	    i++, kh++, ih++) {
 		mtx_init(&kh->lock, "pf_keyhash", NULL, MTX_DEF | MTX_DUPOK);
 		mtx_init(&ih->lock, "pf_idhash", NULL, MTX_DEF);
 	}
 
 	/* Source nodes. */
 	V_pf_sources_z = uma_zcreate("pf source nodes",
 	    sizeof(struct pf_src_node), NULL, NULL, NULL, NULL, UMA_ALIGN_PTR,
 	    0);
 	V_pf_limits[PF_LIMIT_SRC_NODES].zone = V_pf_sources_z;
 	uma_zone_set_max(V_pf_sources_z, PFSNODE_HIWAT);
 	uma_zone_set_warning(V_pf_sources_z, "PF source nodes limit reached");
 	V_pf_srchash = malloc(V_pf_srchashsize * sizeof(struct pf_srchash),
 	  M_PFHASH, M_WAITOK|M_ZERO);
 	V_pf_srchashmask = V_pf_srchashsize - 1;
 	for (i = 0, sh = V_pf_srchash; i <= V_pf_srchashmask; i++, sh++)
 		mtx_init(&sh->lock, "pf_srchash", NULL, MTX_DEF);
 
 	/* ALTQ */
 	TAILQ_INIT(&V_pf_altqs[0]);
 	TAILQ_INIT(&V_pf_altqs[1]);
 	TAILQ_INIT(&V_pf_pabuf);
 	V_pf_altqs_active = &V_pf_altqs[0];
 	V_pf_altqs_inactive = &V_pf_altqs[1];
 
 	/* Mbuf tags */
 	V_pf_mtag_z = uma_zcreate("pf mtags", sizeof(struct m_tag) +
 	    sizeof(struct pf_mtag), NULL, NULL, pf_mtag_init, NULL,
 	    UMA_ALIGN_PTR, 0);
 
 	/* Send & overload+flush queues. */
 	STAILQ_INIT(&V_pf_sendqueue);
 	SLIST_INIT(&V_pf_overloadqueue);
 	TASK_INIT(&V_pf_overloadtask, 0, pf_overload_task, &V_pf_overloadqueue);
 	mtx_init(&pf_sendqueue_mtx, "pf send queue", NULL, MTX_DEF);
 	mtx_init(&pf_overloadqueue_mtx, "pf overload/flush queue", NULL,
 	    MTX_DEF);
 
 	/* Unlinked, but may be referenced rules. */
 	TAILQ_INIT(&V_pf_unlinked_rules);
 	mtx_init(&pf_unlnkdrules_mtx, "pf unlinked rules", NULL, MTX_DEF);
 }
 
 void
 pf_cleanup()
 {
 	struct pf_keyhash	*kh;
 	struct pf_idhash	*ih;
 	struct pf_srchash	*sh;
 	struct pf_send_entry	*pfse, *next;
 	u_int i;
 
 	for (i = 0, kh = V_pf_keyhash, ih = V_pf_idhash; i <= V_pf_hashmask;
 	    i++, kh++, ih++) {
 		KASSERT(LIST_EMPTY(&kh->keys), ("%s: key hash not empty",
 		    __func__));
 		KASSERT(LIST_EMPTY(&ih->states), ("%s: id hash not empty",
 		    __func__));
 		mtx_destroy(&kh->lock);
 		mtx_destroy(&ih->lock);
 	}
 	free(V_pf_keyhash, M_PFHASH);
 	free(V_pf_idhash, M_PFHASH);
 
 	for (i = 0, sh = V_pf_srchash; i <= V_pf_srchashmask; i++, sh++) {
 		KASSERT(LIST_EMPTY(&sh->nodes),
 		    ("%s: source node hash not empty", __func__));
 		mtx_destroy(&sh->lock);
 	}
 	free(V_pf_srchash, M_PFHASH);
 
 	STAILQ_FOREACH_SAFE(pfse, &V_pf_sendqueue, pfse_next, next) {
 		m_freem(pfse->pfse_m);
 		free(pfse, M_PFTEMP);
 	}
 
 	mtx_destroy(&pf_sendqueue_mtx);
 	mtx_destroy(&pf_overloadqueue_mtx);
 	mtx_destroy(&pf_unlnkdrules_mtx);
 
 	uma_zdestroy(V_pf_mtag_z);
 	uma_zdestroy(V_pf_sources_z);
 	uma_zdestroy(V_pf_state_z);
 	uma_zdestroy(V_pf_state_key_z);
 }
 
 static int
 pf_mtag_init(void *mem, int size, int how)
 {
 	struct m_tag *t;
 
 	t = (struct m_tag *)mem;
 	t->m_tag_cookie = MTAG_ABI_COMPAT;
 	t->m_tag_id = PACKET_TAG_PF;
 	t->m_tag_len = sizeof(struct pf_mtag);
 	t->m_tag_free = pf_mtag_free;
 
 	return (0);
 }
 
 static void
 pf_mtag_free(struct m_tag *t)
 {
 
 	uma_zfree(V_pf_mtag_z, t);
 }
 
 struct pf_mtag *
 pf_get_mtag(struct mbuf *m)
 {
 	struct m_tag *mtag;
 
 	if ((mtag = m_tag_find(m, PACKET_TAG_PF, NULL)) != NULL)
 		return ((struct pf_mtag *)(mtag + 1));
 
 	mtag = uma_zalloc(V_pf_mtag_z, M_NOWAIT);
 	if (mtag == NULL)
 		return (NULL);
 	bzero(mtag + 1, sizeof(struct pf_mtag));
 	m_tag_prepend(m, mtag);
 
 	return ((struct pf_mtag *)(mtag + 1));
 }
 
 static int
 pf_state_key_attach(struct pf_state_key *skw, struct pf_state_key *sks,
     struct pf_state *s)
 {
 	struct pf_keyhash	*khs, *khw, *kh;
 	struct pf_state_key	*sk, *cur;
 	struct pf_state		*si, *olds = NULL;
 	int idx;
 
 	KASSERT(s->refs == 0, ("%s: state not pristine", __func__));
 	KASSERT(s->key[PF_SK_WIRE] == NULL, ("%s: state has key", __func__));
 	KASSERT(s->key[PF_SK_STACK] == NULL, ("%s: state has key", __func__));
 
 	/*
 	 * We need to lock hash slots of both keys. To avoid deadlock
 	 * we always lock the slot with lower address first. Unlock order
 	 * isn't important.
 	 *
 	 * We also need to lock ID hash slot before dropping key
 	 * locks. On success we return with ID hash slot locked.
 	 */
 
 	if (skw == sks) {
 		khs = khw = &V_pf_keyhash[pf_hashkey(skw)];
 		PF_HASHROW_LOCK(khs);
 	} else {
 		khs = &V_pf_keyhash[pf_hashkey(sks)];
 		khw = &V_pf_keyhash[pf_hashkey(skw)];
 		if (khs == khw) {
 			PF_HASHROW_LOCK(khs);
 		} else if (khs < khw) {
 			PF_HASHROW_LOCK(khs);
 			PF_HASHROW_LOCK(khw);
 		} else {
 			PF_HASHROW_LOCK(khw);
 			PF_HASHROW_LOCK(khs);
 		}
 	}
 
 #define	KEYS_UNLOCK()	do {			\
 	if (khs != khw) {			\
 		PF_HASHROW_UNLOCK(khs);		\
 		PF_HASHROW_UNLOCK(khw);		\
 	} else					\
 		PF_HASHROW_UNLOCK(khs);		\
 } while (0)
 
 	/*
 	 * First run: start with wire key.
 	 */
 	sk = skw;
 	kh = khw;
 	idx = PF_SK_WIRE;
 
 keyattach:
 	LIST_FOREACH(cur, &kh->keys, entry)
 		if (bcmp(cur, sk, sizeof(struct pf_state_key_cmp)) == 0)
 			break;
 
 	if (cur != NULL) {
 		/* Key exists. Check for same kif, if none, add to key. */
 		TAILQ_FOREACH(si, &cur->states[idx], key_list[idx]) {
 			struct pf_idhash *ih = &V_pf_idhash[PF_IDHASH(si)];
 
 			PF_HASHROW_LOCK(ih);
 			if (si->kif == s->kif &&
 			    si->direction == s->direction) {
 				if (sk->proto == IPPROTO_TCP &&
 				    si->src.state >= TCPS_FIN_WAIT_2 &&
 				    si->dst.state >= TCPS_FIN_WAIT_2) {
 					/*
 					 * New state matches an old >FIN_WAIT_2
 					 * state. We can't drop key hash locks,
 					 * thus we can't unlink it properly.
 					 *
 					 * As a workaround we drop it into
 					 * TCPS_CLOSED state, schedule purge
 					 * ASAP and push it into the very end
 					 * of the slot TAILQ, so that it won't
 					 * conflict with our new state.
 					 */
 					si->src.state = si->dst.state =
 					    TCPS_CLOSED;
 					si->timeout = PFTM_PURGE;
 					olds = si;
 				} else {
 					if (V_pf_status.debug >= PF_DEBUG_MISC) {
 						printf("pf: %s key attach "
 						    "failed on %s: ",
 						    (idx == PF_SK_WIRE) ?
 						    "wire" : "stack",
 						    s->kif->pfik_name);
 						pf_print_state_parts(s,
 						    (idx == PF_SK_WIRE) ?
 						    sk : NULL,
 						    (idx == PF_SK_STACK) ?
 						    sk : NULL);
 						printf(", existing: ");
 						pf_print_state_parts(si,
 						    (idx == PF_SK_WIRE) ?
 						    sk : NULL,
 						    (idx == PF_SK_STACK) ?
 						    sk : NULL);
 						printf("\n");
 					}
 					PF_HASHROW_UNLOCK(ih);
 					KEYS_UNLOCK();
 					uma_zfree(V_pf_state_key_z, sk);
 					if (idx == PF_SK_STACK)
 						pf_detach_state(s);
 					return (EEXIST); /* collision! */
 				}
 			}
 			PF_HASHROW_UNLOCK(ih);
 		}
 		uma_zfree(V_pf_state_key_z, sk);
 		s->key[idx] = cur;
 	} else {
 		LIST_INSERT_HEAD(&kh->keys, sk, entry);
 		s->key[idx] = sk;
 	}
 
 stateattach:
 	/* List is sorted, if-bound states before floating. */
 	if (s->kif == V_pfi_all)
 		TAILQ_INSERT_TAIL(&s->key[idx]->states[idx], s, key_list[idx]);
 	else
 		TAILQ_INSERT_HEAD(&s->key[idx]->states[idx], s, key_list[idx]);
 
 	if (olds) {
 		TAILQ_REMOVE(&s->key[idx]->states[idx], olds, key_list[idx]);
 		TAILQ_INSERT_TAIL(&s->key[idx]->states[idx], olds,
 		    key_list[idx]);
 		olds = NULL;
 	}
 
 	/*
 	 * Attach done. See how should we (or should not?)
 	 * attach a second key.
 	 */
 	if (sks == skw) {
 		s->key[PF_SK_STACK] = s->key[PF_SK_WIRE];
 		idx = PF_SK_STACK;
 		sks = NULL;
 		goto stateattach;
 	} else if (sks != NULL) {
 		/*
 		 * Continue attaching with stack key.
 		 */
 		sk = sks;
 		kh = khs;
 		idx = PF_SK_STACK;
 		sks = NULL;
 		goto keyattach;
 	}
 
 	PF_STATE_LOCK(s);
 	KEYS_UNLOCK();
 
 	KASSERT(s->key[PF_SK_WIRE] != NULL && s->key[PF_SK_STACK] != NULL,
 	    ("%s failure", __func__));
 
 	return (0);
 #undef	KEYS_UNLOCK
 }
 
 static void
 pf_detach_state(struct pf_state *s)
 {
 	struct pf_state_key *sks = s->key[PF_SK_STACK];
 	struct pf_keyhash *kh;
 
 	if (sks != NULL) {
 		kh = &V_pf_keyhash[pf_hashkey(sks)];
 		PF_HASHROW_LOCK(kh);
 		if (s->key[PF_SK_STACK] != NULL)
 			pf_state_key_detach(s, PF_SK_STACK);
 		/*
 		 * If both point to same key, then we are done.
 		 */
 		if (sks == s->key[PF_SK_WIRE]) {
 			pf_state_key_detach(s, PF_SK_WIRE);
 			PF_HASHROW_UNLOCK(kh);
 			return;
 		}
 		PF_HASHROW_UNLOCK(kh);
 	}
 
 	if (s->key[PF_SK_WIRE] != NULL) {
 		kh = &V_pf_keyhash[pf_hashkey(s->key[PF_SK_WIRE])];
 		PF_HASHROW_LOCK(kh);
 		if (s->key[PF_SK_WIRE] != NULL)
 			pf_state_key_detach(s, PF_SK_WIRE);
 		PF_HASHROW_UNLOCK(kh);
 	}
 }
 
 static void
 pf_state_key_detach(struct pf_state *s, int idx)
 {
 	struct pf_state_key *sk = s->key[idx];
 #ifdef INVARIANTS
 	struct pf_keyhash *kh = &V_pf_keyhash[pf_hashkey(sk)];
 
 	PF_HASHROW_ASSERT(kh);
 #endif
 	TAILQ_REMOVE(&sk->states[idx], s, key_list[idx]);
 	s->key[idx] = NULL;
 
 	if (TAILQ_EMPTY(&sk->states[0]) && TAILQ_EMPTY(&sk->states[1])) {
 		LIST_REMOVE(sk, entry);
 		uma_zfree(V_pf_state_key_z, sk);
 	}
 }
 
 static int
 pf_state_key_ctor(void *mem, int size, void *arg, int flags)
 {
 	struct pf_state_key *sk = mem;
 
 	bzero(sk, sizeof(struct pf_state_key_cmp));
 	TAILQ_INIT(&sk->states[PF_SK_WIRE]);
 	TAILQ_INIT(&sk->states[PF_SK_STACK]);
 
 	return (0);
 }
 
 struct pf_state_key *
 pf_state_key_setup(struct pf_pdesc *pd, struct pf_addr *saddr,
 	struct pf_addr *daddr, u_int16_t sport, u_int16_t dport)
 {
 	struct pf_state_key *sk;
 
 	sk = uma_zalloc(V_pf_state_key_z, M_NOWAIT);
 	if (sk == NULL)
 		return (NULL);
 
 	PF_ACPY(&sk->addr[pd->sidx], saddr, pd->af);
 	PF_ACPY(&sk->addr[pd->didx], daddr, pd->af);
 	sk->port[pd->sidx] = sport;
 	sk->port[pd->didx] = dport;
 	sk->proto = pd->proto;
 	sk->af = pd->af;
 
 	return (sk);
 }
 
 struct pf_state_key *
 pf_state_key_clone(struct pf_state_key *orig)
 {
 	struct pf_state_key *sk;
 
 	sk = uma_zalloc(V_pf_state_key_z, M_NOWAIT);
 	if (sk == NULL)
 		return (NULL);
 
 	bcopy(orig, sk, sizeof(struct pf_state_key_cmp));
 
 	return (sk);
 }
 
 int
 pf_state_insert(struct pfi_kif *kif, struct pf_state_key *skw,
     struct pf_state_key *sks, struct pf_state *s)
 {
 	struct pf_idhash *ih;
 	struct pf_state *cur;
 	int error;
 
 	KASSERT(TAILQ_EMPTY(&sks->states[0]) && TAILQ_EMPTY(&sks->states[1]),
 	    ("%s: sks not pristine", __func__));
 	KASSERT(TAILQ_EMPTY(&skw->states[0]) && TAILQ_EMPTY(&skw->states[1]),
 	    ("%s: skw not pristine", __func__));
 	KASSERT(s->refs == 0, ("%s: state not pristine", __func__));
 
 	s->kif = kif;
 
 	if (s->id == 0 && s->creatorid == 0) {
 		/* XXX: should be atomic, but probability of collision low */
 		if ((s->id = V_pf_stateid[curcpu]++) == PFID_MAXID)
 			V_pf_stateid[curcpu] = 1;
 		s->id |= (uint64_t )curcpu << PFID_CPUSHIFT;
 		s->id = htobe64(s->id);
 		s->creatorid = V_pf_status.hostid;
 	}
 
 	/* Returns with ID locked on success. */
 	if ((error = pf_state_key_attach(skw, sks, s)) != 0)
 		return (error);
 
 	ih = &V_pf_idhash[PF_IDHASH(s)];
 	PF_HASHROW_ASSERT(ih);
 	LIST_FOREACH(cur, &ih->states, entry)
 		if (cur->id == s->id && cur->creatorid == s->creatorid)
 			break;
 
 	if (cur != NULL) {
 		PF_HASHROW_UNLOCK(ih);
 		if (V_pf_status.debug >= PF_DEBUG_MISC) {
 			printf("pf: state ID collision: "
 			    "id: %016llx creatorid: %08x\n",
 			    (unsigned long long)be64toh(s->id),
 			    ntohl(s->creatorid));
 		}
 		pf_detach_state(s);
 		return (EEXIST);
 	}
 	LIST_INSERT_HEAD(&ih->states, s, entry);
 	/* One for keys, one for ID hash. */
 	refcount_init(&s->refs, 2);
 
 	V_pf_status.fcounters[FCNT_STATE_INSERT]++;
 	if (pfsync_insert_state_ptr != NULL)
 		pfsync_insert_state_ptr(s);
 
 	/* Returns locked. */
 	return (0);
 }
 
 /*
  * Find state by ID: returns with locked row on success.
  */
 struct pf_state *
 pf_find_state_byid(uint64_t id, uint32_t creatorid)
 {
 	struct pf_idhash *ih;
 	struct pf_state *s;
 
 	V_pf_status.fcounters[FCNT_STATE_SEARCH]++;
 
 	ih = &V_pf_idhash[(be64toh(id) % (V_pf_hashmask + 1))];
 
 	PF_HASHROW_LOCK(ih);
 	LIST_FOREACH(s, &ih->states, entry)
 		if (s->id == id && s->creatorid == creatorid)
 			break;
 
 	if (s == NULL)
 		PF_HASHROW_UNLOCK(ih);
 
 	return (s);
 }
 
 /*
  * Find state by key.
  * Returns with ID hash slot locked on success.
  */
 static struct pf_state *
 pf_find_state(struct pfi_kif *kif, struct pf_state_key_cmp *key, u_int dir)
 {
 	struct pf_keyhash	*kh;
 	struct pf_state_key	*sk;
 	struct pf_state		*s;
 	int idx;
 
 	V_pf_status.fcounters[FCNT_STATE_SEARCH]++;
 
 	kh = &V_pf_keyhash[pf_hashkey((struct pf_state_key *)key)];
 
 	PF_HASHROW_LOCK(kh);
 	LIST_FOREACH(sk, &kh->keys, entry)
 		if (bcmp(sk, key, sizeof(struct pf_state_key_cmp)) == 0)
 			break;
 	if (sk == NULL) {
 		PF_HASHROW_UNLOCK(kh);
 		return (NULL);
 	}
 
 	idx = (dir == PF_IN ? PF_SK_WIRE : PF_SK_STACK);
 
 	/* List is sorted, if-bound states before floating ones. */
 	TAILQ_FOREACH(s, &sk->states[idx], key_list[idx])
 		if (s->kif == V_pfi_all || s->kif == kif) {
 			PF_STATE_LOCK(s);
 			PF_HASHROW_UNLOCK(kh);
 			if (s->timeout >= PFTM_MAX) {
 				/*
 				 * State is either being processed by
 				 * pf_unlink_state() in an other thread, or
 				 * is scheduled for immediate expiry.
 				 */
 				PF_STATE_UNLOCK(s);
 				return (NULL);
 			}
 			return (s);
 		}
 	PF_HASHROW_UNLOCK(kh);
 
 	return (NULL);
 }
 
 struct pf_state *
 pf_find_state_all(struct pf_state_key_cmp *key, u_int dir, int *more)
 {
 	struct pf_keyhash	*kh;
 	struct pf_state_key	*sk;
 	struct pf_state		*s, *ret = NULL;
 	int			 idx, inout = 0;
 
 	V_pf_status.fcounters[FCNT_STATE_SEARCH]++;
 
 	kh = &V_pf_keyhash[pf_hashkey((struct pf_state_key *)key)];
 
 	PF_HASHROW_LOCK(kh);
 	LIST_FOREACH(sk, &kh->keys, entry)
 		if (bcmp(sk, key, sizeof(struct pf_state_key_cmp)) == 0)
 			break;
 	if (sk == NULL) {
 		PF_HASHROW_UNLOCK(kh);
 		return (NULL);
 	}
 	switch (dir) {
 	case PF_IN:
 		idx = PF_SK_WIRE;
 		break;
 	case PF_OUT:
 		idx = PF_SK_STACK;
 		break;
 	case PF_INOUT:
 		idx = PF_SK_WIRE;
 		inout = 1;
 		break;
 	default:
 		panic("%s: dir %u", __func__, dir);
 	}
 second_run:
 	TAILQ_FOREACH(s, &sk->states[idx], key_list[idx]) {
 		if (more == NULL) {
 			PF_HASHROW_UNLOCK(kh);
 			return (s);
 		}
 
 		if (ret)
 			(*more)++;
 		else
 			ret = s;
 	}
 	if (inout == 1) {
 		inout = 0;
 		idx = PF_SK_STACK;
 		goto second_run;
 	}
 	PF_HASHROW_UNLOCK(kh);
 
 	return (ret);
 }
 
 /* END state table stuff */
 
 static void
 pf_send(struct pf_send_entry *pfse)
 {
 
 	PF_SENDQ_LOCK();
 	STAILQ_INSERT_TAIL(&V_pf_sendqueue, pfse, pfse_next);
 	PF_SENDQ_UNLOCK();
 	swi_sched(V_pf_swi_cookie, 0);
 }
 
 void
 pf_intr(void *v)
 {
 	struct pf_send_head queue;
 	struct pf_send_entry *pfse, *next;
 
 	CURVNET_SET((struct vnet *)v);
 
 	PF_SENDQ_LOCK();
 	queue = V_pf_sendqueue;
 	STAILQ_INIT(&V_pf_sendqueue);
 	PF_SENDQ_UNLOCK();
 
 	STAILQ_FOREACH_SAFE(pfse, &queue, pfse_next, next) {
 		switch (pfse->pfse_type) {
 #ifdef INET
 		case PFSE_IP:
 			ip_output(pfse->pfse_m, NULL, NULL, 0, NULL, NULL);
 			break;
 		case PFSE_ICMP:
 			icmp_error(pfse->pfse_m, pfse->pfse_icmp_type,
 			    pfse->pfse_icmp_code, 0, pfse->pfse_icmp_mtu);
 			break;
 #endif /* INET */
 #ifdef INET6
 		case PFSE_IP6:
 			ip6_output(pfse->pfse_m, NULL, NULL, 0, NULL, NULL,
 			    NULL);
 			break;
 		case PFSE_ICMP6:
 			icmp6_error(pfse->pfse_m, pfse->pfse_icmp_type,
 			    pfse->pfse_icmp_code, pfse->pfse_icmp_mtu);
 			break;
 #endif /* INET6 */
 		default:
 			panic("%s: unknown type", __func__);
 		}
 		free(pfse, M_PFTEMP);
 	}
 	CURVNET_RESTORE();
 }
 
 void
 pf_purge_thread(void *v)
 {
 	u_int idx = 0;
 
 	CURVNET_SET((struct vnet *)v);
 
 	for (;;) {
 		PF_RULES_RLOCK();
 		rw_sleep(pf_purge_thread, &pf_rules_lock, 0, "pftm", hz / 10);
 
 		if (V_pf_end_threads) {
 			/*
 			 * To cleanse up all kifs and rules we need
 			 * two runs: first one clears reference flags,
 			 * then pf_purge_expired_states() doesn't
 			 * raise them, and then second run frees.
 			 */
 			PF_RULES_RUNLOCK();
 			pf_purge_unlinked_rules();
 			pfi_kif_purge();
 
 			/*
 			 * Now purge everything.
 			 */
 			pf_purge_expired_states(0, V_pf_hashmask);
 			pf_purge_expired_fragments();
 			pf_purge_expired_src_nodes();
 
 			/*
 			 * Now all kifs & rules should be unreferenced,
 			 * thus should be successfully freed.
 			 */
 			pf_purge_unlinked_rules();
 			pfi_kif_purge();
 
 			/*
 			 * Announce success and exit.
 			 */
 			PF_RULES_RLOCK();
 			V_pf_end_threads++;
 			PF_RULES_RUNLOCK();
 			wakeup(pf_purge_thread);
 			kproc_exit(0);
 		}
 		PF_RULES_RUNLOCK();
 
 		/* Process 1/interval fraction of the state table every run. */
 		idx = pf_purge_expired_states(idx, V_pf_hashmask /
 			    (V_pf_default_rule.timeout[PFTM_INTERVAL] * 10));
 
 		/* Purge other expired types every PFTM_INTERVAL seconds. */
 		if (idx == 0) {
 			/*
 			 * Order is important:
 			 * - states and src nodes reference rules
 			 * - states and rules reference kifs
 			 */
 			pf_purge_expired_fragments();
 			pf_purge_expired_src_nodes();
 			pf_purge_unlinked_rules();
 			pfi_kif_purge();
 		}
 	}
 	/* not reached */
 	CURVNET_RESTORE();
 }
 
 u_int32_t
 pf_state_expires(const struct pf_state *state)
 {
 	u_int32_t	timeout;
 	u_int32_t	start;
 	u_int32_t	end;
 	u_int32_t	states;
 
 	/* handle all PFTM_* > PFTM_MAX here */
 	if (state->timeout == PFTM_PURGE)
 		return (time_uptime);
 	KASSERT(state->timeout != PFTM_UNLINKED,
 	    ("pf_state_expires: timeout == PFTM_UNLINKED"));
 	KASSERT((state->timeout < PFTM_MAX),
 	    ("pf_state_expires: timeout > PFTM_MAX"));
 	timeout = state->rule.ptr->timeout[state->timeout];
 	if (!timeout)
 		timeout = V_pf_default_rule.timeout[state->timeout];
 	start = state->rule.ptr->timeout[PFTM_ADAPTIVE_START];
 	if (start) {
 		end = state->rule.ptr->timeout[PFTM_ADAPTIVE_END];
 		states = counter_u64_fetch(state->rule.ptr->states_cur);
 	} else {
 		start = V_pf_default_rule.timeout[PFTM_ADAPTIVE_START];
 		end = V_pf_default_rule.timeout[PFTM_ADAPTIVE_END];
 		states = V_pf_status.states;
 	}
 	if (end && states > start && start < end) {
 		if (states < end)
 			return (state->expire + timeout * (end - states) /
 			    (end - start));
 		else
 			return (time_uptime);
 	}
 	return (state->expire + timeout);
 }
 
 void
 pf_purge_expired_src_nodes()
 {
 	struct pf_src_node_list	 freelist;
 	struct pf_srchash	*sh;
 	struct pf_src_node	*cur, *next;
 	int i;
 
 	LIST_INIT(&freelist);
 	for (i = 0, sh = V_pf_srchash; i <= V_pf_srchashmask; i++, sh++) {
 	    PF_HASHROW_LOCK(sh);
 	    LIST_FOREACH_SAFE(cur, &sh->nodes, entry, next)
 		if (cur->states == 0 && cur->expire <= time_uptime) {
 			pf_unlink_src_node_locked(cur);
 			LIST_INSERT_HEAD(&freelist, cur, entry);
 		} else if (cur->rule.ptr != NULL)
 			cur->rule.ptr->rule_flag |= PFRULE_REFS;
 	    PF_HASHROW_UNLOCK(sh);
 	}
 
 	pf_free_src_nodes(&freelist);
 }
 
 static void
 pf_src_tree_remove_state(struct pf_state *s)
 {
 	u_int32_t timeout;
 
 	if (s->src_node != NULL) {
 		if (s->src.tcp_est)
 			--s->src_node->conn;
 		if (--s->src_node->states == 0) {
 			timeout = s->rule.ptr->timeout[PFTM_SRC_NODE];
 			if (!timeout)
 				timeout =
 				    V_pf_default_rule.timeout[PFTM_SRC_NODE];
 			s->src_node->expire = time_uptime + timeout;
 		}
 	}
 	if (s->nat_src_node != s->src_node && s->nat_src_node != NULL) {
 		if (--s->nat_src_node->states == 0) {
 			timeout = s->rule.ptr->timeout[PFTM_SRC_NODE];
 			if (!timeout)
 				timeout =
 				    V_pf_default_rule.timeout[PFTM_SRC_NODE];
 			s->nat_src_node->expire = time_uptime + timeout;
 		}
 	}
 	s->src_node = s->nat_src_node = NULL;
 }
 
 /*
  * Unlink and potentilly free a state. Function may be
  * called with ID hash row locked, but always returns
  * unlocked, since it needs to go through key hash locking.
  */
 int
 pf_unlink_state(struct pf_state *s, u_int flags)
 {
 	struct pf_idhash *ih = &V_pf_idhash[PF_IDHASH(s)];
 
 	if ((flags & PF_ENTER_LOCKED) == 0)
 		PF_HASHROW_LOCK(ih);
 	else
 		PF_HASHROW_ASSERT(ih);
 
 	if (s->timeout == PFTM_UNLINKED) {
 		/*
 		 * State is being processed
 		 * by pf_unlink_state() in
 		 * an other thread.
 		 */
 		PF_HASHROW_UNLOCK(ih);
 		return (0);	/* XXXGL: undefined actually */
 	}
 
 	if (s->src.state == PF_TCPS_PROXY_DST) {
 		/* XXX wire key the right one? */
 		pf_send_tcp(NULL, s->rule.ptr, s->key[PF_SK_WIRE]->af,
 		    &s->key[PF_SK_WIRE]->addr[1],
 		    &s->key[PF_SK_WIRE]->addr[0],
 		    s->key[PF_SK_WIRE]->port[1],
 		    s->key[PF_SK_WIRE]->port[0],
 		    s->src.seqhi, s->src.seqlo + 1,
 		    TH_RST|TH_ACK, 0, 0, 0, 1, s->tag, NULL);
 	}
 
 	LIST_REMOVE(s, entry);
 	pf_src_tree_remove_state(s);
 
 	if (pfsync_delete_state_ptr != NULL)
 		pfsync_delete_state_ptr(s);
 
 	STATE_DEC_COUNTERS(s);
 
 	s->timeout = PFTM_UNLINKED;
 
 	PF_HASHROW_UNLOCK(ih);
 
 	pf_detach_state(s);
 	refcount_release(&s->refs);
 
 	return (pf_release_state(s));
 }
 
 void
 pf_free_state(struct pf_state *cur)
 {
 
 	KASSERT(cur->refs == 0, ("%s: %p has refs", __func__, cur));
 	KASSERT(cur->timeout == PFTM_UNLINKED, ("%s: timeout %u", __func__,
 	    cur->timeout));
 
 	pf_normalize_tcp_cleanup(cur);
 	uma_zfree(V_pf_state_z, cur);
 	V_pf_status.fcounters[FCNT_STATE_REMOVALS]++;
 }
 
 /*
  * Called only from pf_purge_thread(), thus serialized.
  */
 static u_int
 pf_purge_expired_states(u_int i, int maxcheck)
 {
 	struct pf_idhash *ih;
 	struct pf_state *s;
 
 	V_pf_status.states = uma_zone_get_cur(V_pf_state_z);
 
 	/*
 	 * Go through hash and unlink states that expire now.
 	 */
 	while (maxcheck > 0) {
 
 		ih = &V_pf_idhash[i];
 relock:
 		PF_HASHROW_LOCK(ih);
 		LIST_FOREACH(s, &ih->states, entry) {
 			if (pf_state_expires(s) <= time_uptime) {
 				V_pf_status.states -=
 				    pf_unlink_state(s, PF_ENTER_LOCKED);
 				goto relock;
 			}
 			s->rule.ptr->rule_flag |= PFRULE_REFS;
 			if (s->nat_rule.ptr != NULL)
 				s->nat_rule.ptr->rule_flag |= PFRULE_REFS;
 			if (s->anchor.ptr != NULL)
 				s->anchor.ptr->rule_flag |= PFRULE_REFS;
 			s->kif->pfik_flags |= PFI_IFLAG_REFS;
 			if (s->rt_kif)
 				s->rt_kif->pfik_flags |= PFI_IFLAG_REFS;
 		}
 		PF_HASHROW_UNLOCK(ih);
 
 		/* Return when we hit end of hash. */
 		if (++i > V_pf_hashmask) {
 			V_pf_status.states = uma_zone_get_cur(V_pf_state_z);
 			return (0);
 		}
 
 		maxcheck--;
 	}
 
 	V_pf_status.states = uma_zone_get_cur(V_pf_state_z);
 
 	return (i);
 }
 
 static void
 pf_purge_unlinked_rules()
 {
 	struct pf_rulequeue tmpq;
 	struct pf_rule *r, *r1;
 
 	/*
 	 * If we have overloading task pending, then we'd
 	 * better skip purging this time. There is a tiny
 	 * probability that overloading task references
 	 * an already unlinked rule.
 	 */
 	PF_OVERLOADQ_LOCK();
 	if (!SLIST_EMPTY(&V_pf_overloadqueue)) {
 		PF_OVERLOADQ_UNLOCK();
 		return;
 	}
 	PF_OVERLOADQ_UNLOCK();
 
 	/*
 	 * Do naive mark-and-sweep garbage collecting of old rules.
 	 * Reference flag is raised by pf_purge_expired_states()
 	 * and pf_purge_expired_src_nodes().
 	 *
 	 * To avoid LOR between PF_UNLNKDRULES_LOCK/PF_RULES_WLOCK,
 	 * use a temporary queue.
 	 */
 	TAILQ_INIT(&tmpq);
 	PF_UNLNKDRULES_LOCK();
 	TAILQ_FOREACH_SAFE(r, &V_pf_unlinked_rules, entries, r1) {
 		if (!(r->rule_flag & PFRULE_REFS)) {
 			TAILQ_REMOVE(&V_pf_unlinked_rules, r, entries);
 			TAILQ_INSERT_TAIL(&tmpq, r, entries);
 		} else
 			r->rule_flag &= ~PFRULE_REFS;
 	}
 	PF_UNLNKDRULES_UNLOCK();
 
 	if (!TAILQ_EMPTY(&tmpq)) {
 		PF_RULES_WLOCK();
 		TAILQ_FOREACH_SAFE(r, &tmpq, entries, r1) {
 			TAILQ_REMOVE(&tmpq, r, entries);
 			pf_free_rule(r);
 		}
 		PF_RULES_WUNLOCK();
 	}
 }
 
 void
 pf_print_host(struct pf_addr *addr, u_int16_t p, sa_family_t af)
 {
 	switch (af) {
 #ifdef INET
 	case AF_INET: {
 		u_int32_t a = ntohl(addr->addr32[0]);
 		printf("%u.%u.%u.%u", (a>>24)&255, (a>>16)&255,
 		    (a>>8)&255, a&255);
 		if (p) {
 			p = ntohs(p);
 			printf(":%u", p);
 		}
 		break;
 	}
 #endif /* INET */
 #ifdef INET6
 	case AF_INET6: {
 		u_int16_t b;
 		u_int8_t i, curstart, curend, maxstart, maxend;
 		curstart = curend = maxstart = maxend = 255;
 		for (i = 0; i < 8; i++) {
 			if (!addr->addr16[i]) {
 				if (curstart == 255)
 					curstart = i;
 				curend = i;
 			} else {
 				if ((curend - curstart) >
 				    (maxend - maxstart)) {
 					maxstart = curstart;
 					maxend = curend;
 				}
 				curstart = curend = 255;
 			}
 		}
 		if ((curend - curstart) >
 		    (maxend - maxstart)) {
 			maxstart = curstart;
 			maxend = curend;
 		}
 		for (i = 0; i < 8; i++) {
 			if (i >= maxstart && i <= maxend) {
 				if (i == 0)
 					printf(":");
 				if (i == maxend)
 					printf(":");
 			} else {
 				b = ntohs(addr->addr16[i]);
 				printf("%x", b);
 				if (i < 7)
 					printf(":");
 			}
 		}
 		if (p) {
 			p = ntohs(p);
 			printf("[%u]", p);
 		}
 		break;
 	}
 #endif /* INET6 */
 	}
 }
 
 void
 pf_print_state(struct pf_state *s)
 {
 	pf_print_state_parts(s, NULL, NULL);
 }
 
 static void
 pf_print_state_parts(struct pf_state *s,
     struct pf_state_key *skwp, struct pf_state_key *sksp)
 {
 	struct pf_state_key *skw, *sks;
 	u_int8_t proto, dir;
 
 	/* Do our best to fill these, but they're skipped if NULL */
 	skw = skwp ? skwp : (s ? s->key[PF_SK_WIRE] : NULL);
 	sks = sksp ? sksp : (s ? s->key[PF_SK_STACK] : NULL);
 	proto = skw ? skw->proto : (sks ? sks->proto : 0);
 	dir = s ? s->direction : 0;
 
 	switch (proto) {
 	case IPPROTO_IPV4:
 		printf("IPv4");
 		break;
 	case IPPROTO_IPV6:
 		printf("IPv6");
 		break;
 	case IPPROTO_TCP:
 		printf("TCP");
 		break;
 	case IPPROTO_UDP:
 		printf("UDP");
 		break;
 	case IPPROTO_ICMP:
 		printf("ICMP");
 		break;
 	case IPPROTO_ICMPV6:
 		printf("ICMPv6");
 		break;
 	default:
 		printf("%u", skw->proto);
 		break;
 	}
 	switch (dir) {
 	case PF_IN:
 		printf(" in");
 		break;
 	case PF_OUT:
 		printf(" out");
 		break;
 	}
 	if (skw) {
 		printf(" wire: ");
 		pf_print_host(&skw->addr[0], skw->port[0], skw->af);
 		printf(" ");
 		pf_print_host(&skw->addr[1], skw->port[1], skw->af);
 	}
 	if (sks) {
 		printf(" stack: ");
 		if (sks != skw) {
 			pf_print_host(&sks->addr[0], sks->port[0], sks->af);
 			printf(" ");
 			pf_print_host(&sks->addr[1], sks->port[1], sks->af);
 		} else
 			printf("-");
 	}
 	if (s) {
 		if (proto == IPPROTO_TCP) {
 			printf(" [lo=%u high=%u win=%u modulator=%u",
 			    s->src.seqlo, s->src.seqhi,
 			    s->src.max_win, s->src.seqdiff);
 			if (s->src.wscale && s->dst.wscale)
 				printf(" wscale=%u",
 				    s->src.wscale & PF_WSCALE_MASK);
 			printf("]");
 			printf(" [lo=%u high=%u win=%u modulator=%u",
 			    s->dst.seqlo, s->dst.seqhi,
 			    s->dst.max_win, s->dst.seqdiff);
 			if (s->src.wscale && s->dst.wscale)
 				printf(" wscale=%u",
 				s->dst.wscale & PF_WSCALE_MASK);
 			printf("]");
 		}
 		printf(" %u:%u", s->src.state, s->dst.state);
 	}
 }
 
 void
 pf_print_flags(u_int8_t f)
 {
 	if (f)
 		printf(" ");
 	if (f & TH_FIN)
 		printf("F");
 	if (f & TH_SYN)
 		printf("S");
 	if (f & TH_RST)
 		printf("R");
 	if (f & TH_PUSH)
 		printf("P");
 	if (f & TH_ACK)
 		printf("A");
 	if (f & TH_URG)
 		printf("U");
 	if (f & TH_ECE)
 		printf("E");
 	if (f & TH_CWR)
 		printf("W");
 }
 
 #define	PF_SET_SKIP_STEPS(i)					\
 	do {							\
 		while (head[i] != cur) {			\
 			head[i]->skip[i].ptr = cur;		\
 			head[i] = TAILQ_NEXT(head[i], entries);	\
 		}						\
 	} while (0)
 
 void
 pf_calc_skip_steps(struct pf_rulequeue *rules)
 {
 	struct pf_rule *cur, *prev, *head[PF_SKIP_COUNT];
 	int i;
 
 	cur = TAILQ_FIRST(rules);
 	prev = cur;
 	for (i = 0; i < PF_SKIP_COUNT; ++i)
 		head[i] = cur;
 	while (cur != NULL) {
 
 		if (cur->kif != prev->kif || cur->ifnot != prev->ifnot)
 			PF_SET_SKIP_STEPS(PF_SKIP_IFP);
 		if (cur->direction != prev->direction)
 			PF_SET_SKIP_STEPS(PF_SKIP_DIR);
 		if (cur->af != prev->af)
 			PF_SET_SKIP_STEPS(PF_SKIP_AF);
 		if (cur->proto != prev->proto)
 			PF_SET_SKIP_STEPS(PF_SKIP_PROTO);
 		if (cur->src.neg != prev->src.neg ||
 		    pf_addr_wrap_neq(&cur->src.addr, &prev->src.addr))
 			PF_SET_SKIP_STEPS(PF_SKIP_SRC_ADDR);
 		if (cur->src.port[0] != prev->src.port[0] ||
 		    cur->src.port[1] != prev->src.port[1] ||
 		    cur->src.port_op != prev->src.port_op)
 			PF_SET_SKIP_STEPS(PF_SKIP_SRC_PORT);
 		if (cur->dst.neg != prev->dst.neg ||
 		    pf_addr_wrap_neq(&cur->dst.addr, &prev->dst.addr))
 			PF_SET_SKIP_STEPS(PF_SKIP_DST_ADDR);
 		if (cur->dst.port[0] != prev->dst.port[0] ||
 		    cur->dst.port[1] != prev->dst.port[1] ||
 		    cur->dst.port_op != prev->dst.port_op)
 			PF_SET_SKIP_STEPS(PF_SKIP_DST_PORT);
 
 		prev = cur;
 		cur = TAILQ_NEXT(cur, entries);
 	}
 	for (i = 0; i < PF_SKIP_COUNT; ++i)
 		PF_SET_SKIP_STEPS(i);
 }
 
 static int
 pf_addr_wrap_neq(struct pf_addr_wrap *aw1, struct pf_addr_wrap *aw2)
 {
 	if (aw1->type != aw2->type)
 		return (1);
 	switch (aw1->type) {
 	case PF_ADDR_ADDRMASK:
 	case PF_ADDR_RANGE:
 		if (PF_ANEQ(&aw1->v.a.addr, &aw2->v.a.addr, 0))
 			return (1);
 		if (PF_ANEQ(&aw1->v.a.mask, &aw2->v.a.mask, 0))
 			return (1);
 		return (0);
 	case PF_ADDR_DYNIFTL:
 		return (aw1->p.dyn->pfid_kt != aw2->p.dyn->pfid_kt);
 	case PF_ADDR_NOROUTE:
 	case PF_ADDR_URPFFAILED:
 		return (0);
 	case PF_ADDR_TABLE:
 		return (aw1->p.tbl != aw2->p.tbl);
 	default:
 		printf("invalid address type: %d\n", aw1->type);
 		return (1);
 	}
 }
 
 u_int16_t
 pf_cksum_fixup(u_int16_t cksum, u_int16_t old, u_int16_t new, u_int8_t udp)
 {
 	u_int32_t	l;
 
 	if (udp && !cksum)
 		return (0x0000);
 	l = cksum + old - new;
 	l = (l >> 16) + (l & 65535);
 	l = l & 65535;
 	if (udp && !l)
 		return (0xFFFF);
 	return (l);
 }
 
 static void
 pf_change_ap(struct pf_addr *a, u_int16_t *p, u_int16_t *ic, u_int16_t *pc,
     struct pf_addr *an, u_int16_t pn, u_int8_t u, sa_family_t af)
 {
 	struct pf_addr	ao;
 	u_int16_t	po = *p;
 
 	PF_ACPY(&ao, a, af);
 	PF_ACPY(a, an, af);
 
 	*p = pn;
 
 	switch (af) {
 #ifdef INET
 	case AF_INET:
 		*ic = pf_cksum_fixup(pf_cksum_fixup(*ic,
 		    ao.addr16[0], an->addr16[0], 0),
 		    ao.addr16[1], an->addr16[1], 0);
 		*p = pn;
 		*pc = pf_cksum_fixup(pf_cksum_fixup(pf_cksum_fixup(*pc,
 		    ao.addr16[0], an->addr16[0], u),
 		    ao.addr16[1], an->addr16[1], u),
 		    po, pn, u);
 		break;
 #endif /* INET */
 #ifdef INET6
 	case AF_INET6:
 		*pc = pf_cksum_fixup(pf_cksum_fixup(pf_cksum_fixup(
 		    pf_cksum_fixup(pf_cksum_fixup(pf_cksum_fixup(
 		    pf_cksum_fixup(pf_cksum_fixup(pf_cksum_fixup(*pc,
 		    ao.addr16[0], an->addr16[0], u),
 		    ao.addr16[1], an->addr16[1], u),
 		    ao.addr16[2], an->addr16[2], u),
 		    ao.addr16[3], an->addr16[3], u),
 		    ao.addr16[4], an->addr16[4], u),
 		    ao.addr16[5], an->addr16[5], u),
 		    ao.addr16[6], an->addr16[6], u),
 		    ao.addr16[7], an->addr16[7], u),
 		    po, pn, u);
 		break;
 #endif /* INET6 */
 	}
 }
 
 
 /* Changes a u_int32_t.  Uses a void * so there are no align restrictions */
 void
 pf_change_a(void *a, u_int16_t *c, u_int32_t an, u_int8_t u)
 {
 	u_int32_t	ao;
 
 	memcpy(&ao, a, sizeof(ao));
 	memcpy(a, &an, sizeof(u_int32_t));
 	*c = pf_cksum_fixup(pf_cksum_fixup(*c, ao / 65536, an / 65536, u),
 	    ao % 65536, an % 65536, u);
 }
 
 #ifdef INET6
 static void
 pf_change_a6(struct pf_addr *a, u_int16_t *c, struct pf_addr *an, u_int8_t u)
 {
 	struct pf_addr	ao;
 
 	PF_ACPY(&ao, a, AF_INET6);
 	PF_ACPY(a, an, AF_INET6);
 
 	*c = pf_cksum_fixup(pf_cksum_fixup(pf_cksum_fixup(
 	    pf_cksum_fixup(pf_cksum_fixup(pf_cksum_fixup(
 	    pf_cksum_fixup(pf_cksum_fixup(*c,
 	    ao.addr16[0], an->addr16[0], u),
 	    ao.addr16[1], an->addr16[1], u),
 	    ao.addr16[2], an->addr16[2], u),
 	    ao.addr16[3], an->addr16[3], u),
 	    ao.addr16[4], an->addr16[4], u),
 	    ao.addr16[5], an->addr16[5], u),
 	    ao.addr16[6], an->addr16[6], u),
 	    ao.addr16[7], an->addr16[7], u);
 }
 #endif /* INET6 */
 
 static void
 pf_change_icmp(struct pf_addr *ia, u_int16_t *ip, struct pf_addr *oa,
     struct pf_addr *na, u_int16_t np, u_int16_t *pc, u_int16_t *h2c,
     u_int16_t *ic, u_int16_t *hc, u_int8_t u, sa_family_t af)
 {
 	struct pf_addr	oia, ooa;
 
 	PF_ACPY(&oia, ia, af);
 	if (oa)
 		PF_ACPY(&ooa, oa, af);
 
 	/* Change inner protocol port, fix inner protocol checksum. */
 	if (ip != NULL) {
 		u_int16_t	oip = *ip;
 		u_int32_t	opc;
 
 		if (pc != NULL)
 			opc = *pc;
 		*ip = np;
 		if (pc != NULL)
 			*pc = pf_cksum_fixup(*pc, oip, *ip, u);
 		*ic = pf_cksum_fixup(*ic, oip, *ip, 0);
 		if (pc != NULL)
 			*ic = pf_cksum_fixup(*ic, opc, *pc, 0);
 	}
 	/* Change inner ip address, fix inner ip and icmp checksums. */
 	PF_ACPY(ia, na, af);
 	switch (af) {
 #ifdef INET
 	case AF_INET: {
 		u_int32_t	 oh2c = *h2c;
 
 		*h2c = pf_cksum_fixup(pf_cksum_fixup(*h2c,
 		    oia.addr16[0], ia->addr16[0], 0),
 		    oia.addr16[1], ia->addr16[1], 0);
 		*ic = pf_cksum_fixup(pf_cksum_fixup(*ic,
 		    oia.addr16[0], ia->addr16[0], 0),
 		    oia.addr16[1], ia->addr16[1], 0);
 		*ic = pf_cksum_fixup(*ic, oh2c, *h2c, 0);
 		break;
 	}
 #endif /* INET */
 #ifdef INET6
 	case AF_INET6:
 		*ic = pf_cksum_fixup(pf_cksum_fixup(pf_cksum_fixup(
 		    pf_cksum_fixup(pf_cksum_fixup(pf_cksum_fixup(
 		    pf_cksum_fixup(pf_cksum_fixup(*ic,
 		    oia.addr16[0], ia->addr16[0], u),
 		    oia.addr16[1], ia->addr16[1], u),
 		    oia.addr16[2], ia->addr16[2], u),
 		    oia.addr16[3], ia->addr16[3], u),
 		    oia.addr16[4], ia->addr16[4], u),
 		    oia.addr16[5], ia->addr16[5], u),
 		    oia.addr16[6], ia->addr16[6], u),
 		    oia.addr16[7], ia->addr16[7], u);
 		break;
 #endif /* INET6 */
 	}
 	/* Outer ip address, fix outer ip or icmpv6 checksum, if necessary. */
 	if (oa) {
 		PF_ACPY(oa, na, af);
 		switch (af) {
 #ifdef INET
 		case AF_INET:
 			*hc = pf_cksum_fixup(pf_cksum_fixup(*hc,
 			    ooa.addr16[0], oa->addr16[0], 0),
 			    ooa.addr16[1], oa->addr16[1], 0);
 			break;
 #endif /* INET */
 #ifdef INET6
 		case AF_INET6:
 			*ic = pf_cksum_fixup(pf_cksum_fixup(pf_cksum_fixup(
 			    pf_cksum_fixup(pf_cksum_fixup(pf_cksum_fixup(
 			    pf_cksum_fixup(pf_cksum_fixup(*ic,
 			    ooa.addr16[0], oa->addr16[0], u),
 			    ooa.addr16[1], oa->addr16[1], u),
 			    ooa.addr16[2], oa->addr16[2], u),
 			    ooa.addr16[3], oa->addr16[3], u),
 			    ooa.addr16[4], oa->addr16[4], u),
 			    ooa.addr16[5], oa->addr16[5], u),
 			    ooa.addr16[6], oa->addr16[6], u),
 			    ooa.addr16[7], oa->addr16[7], u);
 			break;
 #endif /* INET6 */
 		}
 	}
 }
 
 
 /*
  * Need to modulate the sequence numbers in the TCP SACK option
  * (credits to Krzysztof Pfaff for report and patch)
  */
 static int
 pf_modulate_sack(struct mbuf *m, int off, struct pf_pdesc *pd,
     struct tcphdr *th, struct pf_state_peer *dst)
 {
 	int hlen = (th->th_off << 2) - sizeof(*th), thoptlen = hlen;
 	u_int8_t opts[TCP_MAXOLEN], *opt = opts;
 	int copyback = 0, i, olen;
 	struct sackblk sack;
 
 #define	TCPOLEN_SACKLEN	(TCPOLEN_SACK + 2)
 	if (hlen < TCPOLEN_SACKLEN ||
 	    !pf_pull_hdr(m, off + sizeof(*th), opts, hlen, NULL, NULL, pd->af))
 		return 0;
 
 	while (hlen >= TCPOLEN_SACKLEN) {
 		olen = opt[1];
 		switch (*opt) {
 		case TCPOPT_EOL:	/* FALLTHROUGH */
 		case TCPOPT_NOP:
 			opt++;
 			hlen--;
 			break;
 		case TCPOPT_SACK:
 			if (olen > hlen)
 				olen = hlen;
 			if (olen >= TCPOLEN_SACKLEN) {
 				for (i = 2; i + TCPOLEN_SACK <= olen;
 				    i += TCPOLEN_SACK) {
 					memcpy(&sack, &opt[i], sizeof(sack));
 					pf_change_a(&sack.start, &th->th_sum,
 					    htonl(ntohl(sack.start) -
 					    dst->seqdiff), 0);
 					pf_change_a(&sack.end, &th->th_sum,
 					    htonl(ntohl(sack.end) -
 					    dst->seqdiff), 0);
 					memcpy(&opt[i], &sack, sizeof(sack));
 				}
 				copyback = 1;
 			}
 			/* FALLTHROUGH */
 		default:
 			if (olen < 2)
 				olen = 2;
 			hlen -= olen;
 			opt += olen;
 		}
 	}
 
 	if (copyback)
 		m_copyback(m, off + sizeof(*th), thoptlen, (caddr_t)opts);
 	return (copyback);
 }
 
 static void
 pf_send_tcp(struct mbuf *replyto, const struct pf_rule *r, sa_family_t af,
     const struct pf_addr *saddr, const struct pf_addr *daddr,
     u_int16_t sport, u_int16_t dport, u_int32_t seq, u_int32_t ack,
     u_int8_t flags, u_int16_t win, u_int16_t mss, u_int8_t ttl, int tag,
     u_int16_t rtag, struct ifnet *ifp)
 {
 	struct pf_send_entry *pfse;
 	struct mbuf	*m;
 	int		 len, tlen;
 #ifdef INET
 	struct ip	*h = NULL;
 #endif /* INET */
 #ifdef INET6
 	struct ip6_hdr	*h6 = NULL;
 #endif /* INET6 */
 	struct tcphdr	*th;
 	char		*opt;
 	struct pf_mtag  *pf_mtag;
 
 	len = 0;
 	th = NULL;
 
 	/* maximum segment size tcp option */
 	tlen = sizeof(struct tcphdr);
 	if (mss)
 		tlen += 4;
 
 	switch (af) {
 #ifdef INET
 	case AF_INET:
 		len = sizeof(struct ip) + tlen;
 		break;
 #endif /* INET */
 #ifdef INET6
 	case AF_INET6:
 		len = sizeof(struct ip6_hdr) + tlen;
 		break;
 #endif /* INET6 */
 	default:
 		panic("%s: unsupported af %d", __func__, af);
 	}
 
 	/* Allocate outgoing queue entry, mbuf and mbuf tag. */
 	pfse = malloc(sizeof(*pfse), M_PFTEMP, M_NOWAIT);
 	if (pfse == NULL)
 		return;
 	m = m_gethdr(M_NOWAIT, MT_DATA);
 	if (m == NULL) {
 		free(pfse, M_PFTEMP);
 		return;
 	}
 #ifdef MAC
 	mac_netinet_firewall_send(m);
 #endif
 	if ((pf_mtag = pf_get_mtag(m)) == NULL) {
 		free(pfse, M_PFTEMP);
 		m_freem(m);
 		return;
 	}
 	if (tag)
 		m->m_flags |= M_SKIP_FIREWALL;
 	pf_mtag->tag = rtag;
 
 	if (r != NULL && r->rtableid >= 0)
 		M_SETFIB(m, r->rtableid);
 
 #ifdef ALTQ
 	if (r != NULL && r->qid) {
 		pf_mtag->qid = r->qid;
 
 		/* add hints for ecn */
 		pf_mtag->hdr = mtod(m, struct ip *);
 	}
 #endif /* ALTQ */
 	m->m_data += max_linkhdr;
 	m->m_pkthdr.len = m->m_len = len;
 	m->m_pkthdr.rcvif = NULL;
 	bzero(m->m_data, len);
 	switch (af) {
 #ifdef INET
 	case AF_INET:
 		h = mtod(m, struct ip *);
 
 		/* IP header fields included in the TCP checksum */
 		h->ip_p = IPPROTO_TCP;
 		h->ip_len = htons(tlen);
 		h->ip_src.s_addr = saddr->v4.s_addr;
 		h->ip_dst.s_addr = daddr->v4.s_addr;
 
 		th = (struct tcphdr *)((caddr_t)h + sizeof(struct ip));
 		break;
 #endif /* INET */
 #ifdef INET6
 	case AF_INET6:
 		h6 = mtod(m, struct ip6_hdr *);
 
 		/* IP header fields included in the TCP checksum */
 		h6->ip6_nxt = IPPROTO_TCP;
 		h6->ip6_plen = htons(tlen);
 		memcpy(&h6->ip6_src, &saddr->v6, sizeof(struct in6_addr));
 		memcpy(&h6->ip6_dst, &daddr->v6, sizeof(struct in6_addr));
 
 		th = (struct tcphdr *)((caddr_t)h6 + sizeof(struct ip6_hdr));
 		break;
 #endif /* INET6 */
 	}
 
 	/* TCP header */
 	th->th_sport = sport;
 	th->th_dport = dport;
 	th->th_seq = htonl(seq);
 	th->th_ack = htonl(ack);
 	th->th_off = tlen >> 2;
 	th->th_flags = flags;
 	th->th_win = htons(win);
 
 	if (mss) {
 		opt = (char *)(th + 1);
 		opt[0] = TCPOPT_MAXSEG;
 		opt[1] = 4;
 		HTONS(mss);
 		bcopy((caddr_t)&mss, (caddr_t)(opt + 2), 2);
 	}
 
 	switch (af) {
 #ifdef INET
 	case AF_INET:
 		/* TCP checksum */
 		th->th_sum = in_cksum(m, len);
 
 		/* Finish the IP header */
 		h->ip_v = 4;
 		h->ip_hl = sizeof(*h) >> 2;
 		h->ip_tos = IPTOS_LOWDELAY;
 		h->ip_off = htons(V_path_mtu_discovery ? IP_DF : 0);
 		h->ip_len = htons(len);
 		h->ip_ttl = ttl ? ttl : V_ip_defttl;
 		h->ip_sum = 0;
 
 		pfse->pfse_type = PFSE_IP;
 		break;
 #endif /* INET */
 #ifdef INET6
 	case AF_INET6:
 		/* TCP checksum */
 		th->th_sum = in6_cksum(m, IPPROTO_TCP,
 		    sizeof(struct ip6_hdr), tlen);
 
 		h6->ip6_vfc |= IPV6_VERSION;
 		h6->ip6_hlim = IPV6_DEFHLIM;
 
 		pfse->pfse_type = PFSE_IP6;
 		break;
 #endif /* INET6 */
 	}
 	pfse->pfse_m = m;
 	pf_send(pfse);
 }
 
 static void
 pf_send_icmp(struct mbuf *m, u_int8_t type, u_int8_t code, sa_family_t af,
     struct pf_rule *r)
 {
 	struct pf_send_entry *pfse;
 	struct mbuf *m0;
 	struct pf_mtag *pf_mtag;
 
 	/* Allocate outgoing queue entry, mbuf and mbuf tag. */
 	pfse = malloc(sizeof(*pfse), M_PFTEMP, M_NOWAIT);
 	if (pfse == NULL)
 		return;
 
 	if ((m0 = m_copypacket(m, M_NOWAIT)) == NULL) {
 		free(pfse, M_PFTEMP);
 		return;
 	}
 
 	if ((pf_mtag = pf_get_mtag(m0)) == NULL) {
 		free(pfse, M_PFTEMP);
 		return;
 	}
 	/* XXX: revisit */
 	m0->m_flags |= M_SKIP_FIREWALL;
 
 	if (r->rtableid >= 0)
 		M_SETFIB(m0, r->rtableid);
 
 #ifdef ALTQ
 	if (r->qid) {
 		pf_mtag->qid = r->qid;
 		/* add hints for ecn */
 		pf_mtag->hdr = mtod(m0, struct ip *);
 	}
 #endif /* ALTQ */
 
 	switch (af) {
 #ifdef INET
 	case AF_INET:
 		pfse->pfse_type = PFSE_ICMP;
 		break;
 #endif /* INET */
 #ifdef INET6
 	case AF_INET6:
 		pfse->pfse_type = PFSE_ICMP6;
 		break;
 #endif /* INET6 */
 	}
 	pfse->pfse_m = m0;
 	pfse->pfse_icmp_type = type;
 	pfse->pfse_icmp_code = code;
 	pf_send(pfse);
 }
 
 /*
  * Return 1 if the addresses a and b match (with mask m), otherwise return 0.
  * If n is 0, they match if they are equal. If n is != 0, they match if they
  * are different.
  */
 int
 pf_match_addr(u_int8_t n, struct pf_addr *a, struct pf_addr *m,
     struct pf_addr *b, sa_family_t af)
 {
 	int	match = 0;
 
 	switch (af) {
 #ifdef INET
 	case AF_INET:
 		if ((a->addr32[0] & m->addr32[0]) ==
 		    (b->addr32[0] & m->addr32[0]))
 			match++;
 		break;
 #endif /* INET */
 #ifdef INET6
 	case AF_INET6:
 		if (((a->addr32[0] & m->addr32[0]) ==
 		     (b->addr32[0] & m->addr32[0])) &&
 		    ((a->addr32[1] & m->addr32[1]) ==
 		     (b->addr32[1] & m->addr32[1])) &&
 		    ((a->addr32[2] & m->addr32[2]) ==
 		     (b->addr32[2] & m->addr32[2])) &&
 		    ((a->addr32[3] & m->addr32[3]) ==
 		     (b->addr32[3] & m->addr32[3])))
 			match++;
 		break;
 #endif /* INET6 */
 	}
 	if (match) {
 		if (n)
 			return (0);
 		else
 			return (1);
 	} else {
 		if (n)
 			return (1);
 		else
 			return (0);
 	}
 }
 
 /*
  * Return 1 if b <= a <= e, otherwise return 0.
  */
 int
 pf_match_addr_range(struct pf_addr *b, struct pf_addr *e,
     struct pf_addr *a, sa_family_t af)
 {
 	switch (af) {
 #ifdef INET
 	case AF_INET:
 		if ((a->addr32[0] < b->addr32[0]) ||
 		    (a->addr32[0] > e->addr32[0]))
 			return (0);
 		break;
 #endif /* INET */
 #ifdef INET6
 	case AF_INET6: {
 		int	i;
 
 		/* check a >= b */
 		for (i = 0; i < 4; ++i)
 			if (a->addr32[i] > b->addr32[i])
 				break;
 			else if (a->addr32[i] < b->addr32[i])
 				return (0);
 		/* check a <= e */
 		for (i = 0; i < 4; ++i)
 			if (a->addr32[i] < e->addr32[i])
 				break;
 			else if (a->addr32[i] > e->addr32[i])
 				return (0);
 		break;
 	}
 #endif /* INET6 */
 	}
 	return (1);
 }
 
 static int
 pf_match(u_int8_t op, u_int32_t a1, u_int32_t a2, u_int32_t p)
 {
 	switch (op) {
 	case PF_OP_IRG:
 		return ((p > a1) && (p < a2));
 	case PF_OP_XRG:
 		return ((p < a1) || (p > a2));
 	case PF_OP_RRG:
 		return ((p >= a1) && (p <= a2));
 	case PF_OP_EQ:
 		return (p == a1);
 	case PF_OP_NE:
 		return (p != a1);
 	case PF_OP_LT:
 		return (p < a1);
 	case PF_OP_LE:
 		return (p <= a1);
 	case PF_OP_GT:
 		return (p > a1);
 	case PF_OP_GE:
 		return (p >= a1);
 	}
 	return (0); /* never reached */
 }
 
 int
 pf_match_port(u_int8_t op, u_int16_t a1, u_int16_t a2, u_int16_t p)
 {
 	NTOHS(a1);
 	NTOHS(a2);
 	NTOHS(p);
 	return (pf_match(op, a1, a2, p));
 }
 
 static int
 pf_match_uid(u_int8_t op, uid_t a1, uid_t a2, uid_t u)
 {
 	if (u == UID_MAX && op != PF_OP_EQ && op != PF_OP_NE)
 		return (0);
 	return (pf_match(op, a1, a2, u));
 }
 
 static int
 pf_match_gid(u_int8_t op, gid_t a1, gid_t a2, gid_t g)
 {
 	if (g == GID_MAX && op != PF_OP_EQ && op != PF_OP_NE)
 		return (0);
 	return (pf_match(op, a1, a2, g));
 }
 
 int
 pf_match_tag(struct mbuf *m, struct pf_rule *r, int *tag, int mtag)
 {
 	if (*tag == -1)
 		*tag = mtag;
 
 	return ((!r->match_tag_not && r->match_tag == *tag) ||
 	    (r->match_tag_not && r->match_tag != *tag));
 }
 
 int
 pf_tag_packet(struct mbuf *m, struct pf_pdesc *pd, int tag)
 {
 
 	KASSERT(tag > 0, ("%s: tag %d", __func__, tag));
 
 	if (pd->pf_mtag == NULL && ((pd->pf_mtag = pf_get_mtag(m)) == NULL))
 		return (ENOMEM);
 
 	pd->pf_mtag->tag = tag;
 
 	return (0);
 }
 
 #define	PF_ANCHOR_STACKSIZE	32
 struct pf_anchor_stackframe {
 	struct pf_ruleset	*rs;
 	struct pf_rule		*r;	/* XXX: + match bit */
 	struct pf_anchor	*child;
 };
 
 /*
  * XXX: We rely on malloc(9) returning pointer aligned addresses.
  */
 #define	PF_ANCHORSTACK_MATCH	0x00000001
 #define	PF_ANCHORSTACK_MASK	(PF_ANCHORSTACK_MATCH)
 
 #define	PF_ANCHOR_MATCH(f)	((uintptr_t)(f)->r & PF_ANCHORSTACK_MATCH)
 #define	PF_ANCHOR_RULE(f)	(struct pf_rule *)			\
 				((uintptr_t)(f)->r & ~PF_ANCHORSTACK_MASK)
 #define	PF_ANCHOR_SET_MATCH(f)	do { (f)->r = (void *) 			\
 				((uintptr_t)(f)->r | PF_ANCHORSTACK_MATCH);  \
 } while (0)
 
 void
 pf_step_into_anchor(struct pf_anchor_stackframe *stack, int *depth,
     struct pf_ruleset **rs, int n, struct pf_rule **r, struct pf_rule **a,
     int *match)
 {
 	struct pf_anchor_stackframe	*f;
 
 	PF_RULES_RASSERT();
 
 	if (match)
 		*match = 0;
 	if (*depth >= PF_ANCHOR_STACKSIZE) {
 		printf("%s: anchor stack overflow on %s\n",
 		    __func__, (*r)->anchor->name);
 		*r = TAILQ_NEXT(*r, entries);
 		return;
 	} else if (*depth == 0 && a != NULL)
 		*a = *r;
 	f = stack + (*depth)++;
 	f->rs = *rs;
 	f->r = *r;
 	if ((*r)->anchor_wildcard) {
 		struct pf_anchor_node *parent = &(*r)->anchor->children;
 
 		if ((f->child = RB_MIN(pf_anchor_node, parent)) == NULL) {
 			*r = NULL;
 			return;
 		}
 		*rs = &f->child->ruleset;
 	} else {
 		f->child = NULL;
 		*rs = &(*r)->anchor->ruleset;
 	}
 	*r = TAILQ_FIRST((*rs)->rules[n].active.ptr);
 }
 
 int
 pf_step_out_of_anchor(struct pf_anchor_stackframe *stack, int *depth,
     struct pf_ruleset **rs, int n, struct pf_rule **r, struct pf_rule **a,
     int *match)
 {
 	struct pf_anchor_stackframe	*f;
 	struct pf_rule *fr;
 	int quick = 0;
 
 	PF_RULES_RASSERT();
 
 	do {
 		if (*depth <= 0)
 			break;
 		f = stack + *depth - 1;
 		fr = PF_ANCHOR_RULE(f);
 		if (f->child != NULL) {
 			struct pf_anchor_node *parent;
 
 			/*
 			 * This block traverses through
 			 * a wildcard anchor.
 			 */
 			parent = &fr->anchor->children;
 			if (match != NULL && *match) {
 				/*
 				 * If any of "*" matched, then
 				 * "foo/ *" matched, mark frame
 				 * appropriately.
 				 */
 				PF_ANCHOR_SET_MATCH(f);
 				*match = 0;
 			}
 			f->child = RB_NEXT(pf_anchor_node, parent, f->child);
 			if (f->child != NULL) {
 				*rs = &f->child->ruleset;
 				*r = TAILQ_FIRST((*rs)->rules[n].active.ptr);
 				if (*r == NULL)
 					continue;
 				else
 					break;
 			}
 		}
 		(*depth)--;
 		if (*depth == 0 && a != NULL)
 			*a = NULL;
 		*rs = f->rs;
 		if (PF_ANCHOR_MATCH(f) || (match != NULL && *match))
 			quick = fr->quick;
 		*r = TAILQ_NEXT(fr, entries);
 	} while (*r == NULL);
 
 	return (quick);
 }
 
 #ifdef INET6
 void
 pf_poolmask(struct pf_addr *naddr, struct pf_addr *raddr,
     struct pf_addr *rmask, struct pf_addr *saddr, sa_family_t af)
 {
 	switch (af) {
 #ifdef INET
 	case AF_INET:
 		naddr->addr32[0] = (raddr->addr32[0] & rmask->addr32[0]) |
 		((rmask->addr32[0] ^ 0xffffffff ) & saddr->addr32[0]);
 		break;
 #endif /* INET */
 	case AF_INET6:
 		naddr->addr32[0] = (raddr->addr32[0] & rmask->addr32[0]) |
 		((rmask->addr32[0] ^ 0xffffffff ) & saddr->addr32[0]);
 		naddr->addr32[1] = (raddr->addr32[1] & rmask->addr32[1]) |
 		((rmask->addr32[1] ^ 0xffffffff ) & saddr->addr32[1]);
 		naddr->addr32[2] = (raddr->addr32[2] & rmask->addr32[2]) |
 		((rmask->addr32[2] ^ 0xffffffff ) & saddr->addr32[2]);
 		naddr->addr32[3] = (raddr->addr32[3] & rmask->addr32[3]) |
 		((rmask->addr32[3] ^ 0xffffffff ) & saddr->addr32[3]);
 		break;
 	}
 }
 
 void
 pf_addr_inc(struct pf_addr *addr, sa_family_t af)
 {
 	switch (af) {
 #ifdef INET
 	case AF_INET:
 		addr->addr32[0] = htonl(ntohl(addr->addr32[0]) + 1);
 		break;
 #endif /* INET */
 	case AF_INET6:
 		if (addr->addr32[3] == 0xffffffff) {
 			addr->addr32[3] = 0;
 			if (addr->addr32[2] == 0xffffffff) {
 				addr->addr32[2] = 0;
 				if (addr->addr32[1] == 0xffffffff) {
 					addr->addr32[1] = 0;
 					addr->addr32[0] =
 					    htonl(ntohl(addr->addr32[0]) + 1);
 				} else
 					addr->addr32[1] =
 					    htonl(ntohl(addr->addr32[1]) + 1);
 			} else
 				addr->addr32[2] =
 				    htonl(ntohl(addr->addr32[2]) + 1);
 		} else
 			addr->addr32[3] =
 			    htonl(ntohl(addr->addr32[3]) + 1);
 		break;
 	}
 }
 #endif /* INET6 */
 
 int
 pf_socket_lookup(int direction, struct pf_pdesc *pd, struct mbuf *m)
 {
 	struct pf_addr		*saddr, *daddr;
 	u_int16_t		 sport, dport;
 	struct inpcbinfo	*pi;
 	struct inpcb		*inp;
 
 	pd->lookup.uid = UID_MAX;
 	pd->lookup.gid = GID_MAX;
 
 	switch (pd->proto) {
 	case IPPROTO_TCP:
 		if (pd->hdr.tcp == NULL)
 			return (-1);
 		sport = pd->hdr.tcp->th_sport;
 		dport = pd->hdr.tcp->th_dport;
 		pi = &V_tcbinfo;
 		break;
 	case IPPROTO_UDP:
 		if (pd->hdr.udp == NULL)
 			return (-1);
 		sport = pd->hdr.udp->uh_sport;
 		dport = pd->hdr.udp->uh_dport;
 		pi = &V_udbinfo;
 		break;
 	default:
 		return (-1);
 	}
 	if (direction == PF_IN) {
 		saddr = pd->src;
 		daddr = pd->dst;
 	} else {
 		u_int16_t	p;
 
 		p = sport;
 		sport = dport;
 		dport = p;
 		saddr = pd->dst;
 		daddr = pd->src;
 	}
 	switch (pd->af) {
 #ifdef INET
 	case AF_INET:
 		inp = in_pcblookup_mbuf(pi, saddr->v4, sport, daddr->v4,
 		    dport, INPLOOKUP_RLOCKPCB, NULL, m);
 		if (inp == NULL) {
 			inp = in_pcblookup_mbuf(pi, saddr->v4, sport,
 			   daddr->v4, dport, INPLOOKUP_WILDCARD |
 			   INPLOOKUP_RLOCKPCB, NULL, m);
 			if (inp == NULL)
 				return (-1);
 		}
 		break;
 #endif /* INET */
 #ifdef INET6
 	case AF_INET6:
 		inp = in6_pcblookup_mbuf(pi, &saddr->v6, sport, &daddr->v6,
 		    dport, INPLOOKUP_RLOCKPCB, NULL, m);
 		if (inp == NULL) {
 			inp = in6_pcblookup_mbuf(pi, &saddr->v6, sport,
 			    &daddr->v6, dport, INPLOOKUP_WILDCARD |
 			    INPLOOKUP_RLOCKPCB, NULL, m);
 			if (inp == NULL)
 				return (-1);
 		}
 		break;
 #endif /* INET6 */
 
 	default:
 		return (-1);
 	}
 	INP_RLOCK_ASSERT(inp);
 	pd->lookup.uid = inp->inp_cred->cr_uid;
 	pd->lookup.gid = inp->inp_cred->cr_groups[0];
 	INP_RUNLOCK(inp);
 
 	return (1);
 }
 
 static u_int8_t
 pf_get_wscale(struct mbuf *m, int off, u_int16_t th_off, sa_family_t af)
 {
 	int		 hlen;
 	u_int8_t	 hdr[60];
 	u_int8_t	*opt, optlen;
 	u_int8_t	 wscale = 0;
 
 	hlen = th_off << 2;		/* hlen <= sizeof(hdr) */
 	if (hlen <= sizeof(struct tcphdr))
 		return (0);
 	if (!pf_pull_hdr(m, off, hdr, hlen, NULL, NULL, af))
 		return (0);
 	opt = hdr + sizeof(struct tcphdr);
 	hlen -= sizeof(struct tcphdr);
 	while (hlen >= 3) {
 		switch (*opt) {
 		case TCPOPT_EOL:
 		case TCPOPT_NOP:
 			++opt;
 			--hlen;
 			break;
 		case TCPOPT_WINDOW:
 			wscale = opt[2];
 			if (wscale > TCP_MAX_WINSHIFT)
 				wscale = TCP_MAX_WINSHIFT;
 			wscale |= PF_WSCALE_FLAG;
 			/* FALLTHROUGH */
 		default:
 			optlen = opt[1];
 			if (optlen < 2)
 				optlen = 2;
 			hlen -= optlen;
 			opt += optlen;
 			break;
 		}
 	}
 	return (wscale);
 }
 
 static u_int16_t
 pf_get_mss(struct mbuf *m, int off, u_int16_t th_off, sa_family_t af)
 {
 	int		 hlen;
 	u_int8_t	 hdr[60];
 	u_int8_t	*opt, optlen;
 	u_int16_t	 mss = V_tcp_mssdflt;
 
 	hlen = th_off << 2;	/* hlen <= sizeof(hdr) */
 	if (hlen <= sizeof(struct tcphdr))
 		return (0);
 	if (!pf_pull_hdr(m, off, hdr, hlen, NULL, NULL, af))
 		return (0);
 	opt = hdr + sizeof(struct tcphdr);
 	hlen -= sizeof(struct tcphdr);
 	while (hlen >= TCPOLEN_MAXSEG) {
 		switch (*opt) {
 		case TCPOPT_EOL:
 		case TCPOPT_NOP:
 			++opt;
 			--hlen;
 			break;
 		case TCPOPT_MAXSEG:
 			bcopy((caddr_t)(opt + 2), (caddr_t)&mss, 2);
 			NTOHS(mss);
 			/* FALLTHROUGH */
 		default:
 			optlen = opt[1];
 			if (optlen < 2)
 				optlen = 2;
 			hlen -= optlen;
 			opt += optlen;
 			break;
 		}
 	}
 	return (mss);
 }
 
 static u_int16_t
 pf_calc_mss(struct pf_addr *addr, sa_family_t af, int rtableid, u_int16_t offer)
 {
 #ifdef INET
 	struct sockaddr_in	*dst;
 	struct route		 ro;
 #endif /* INET */
 #ifdef INET6
 	struct sockaddr_in6	*dst6;
 	struct route_in6	 ro6;
 #endif /* INET6 */
 	struct rtentry		*rt = NULL;
 	int			 hlen = 0;
 	u_int16_t		 mss = V_tcp_mssdflt;
 
 	switch (af) {
 #ifdef INET
 	case AF_INET:
 		hlen = sizeof(struct ip);
 		bzero(&ro, sizeof(ro));
 		dst = (struct sockaddr_in *)&ro.ro_dst;
 		dst->sin_family = AF_INET;
 		dst->sin_len = sizeof(*dst);
 		dst->sin_addr = addr->v4;
 		in_rtalloc_ign(&ro, 0, rtableid);
 		rt = ro.ro_rt;
 		break;
 #endif /* INET */
 #ifdef INET6
 	case AF_INET6:
 		hlen = sizeof(struct ip6_hdr);
 		bzero(&ro6, sizeof(ro6));
 		dst6 = (struct sockaddr_in6 *)&ro6.ro_dst;
 		dst6->sin6_family = AF_INET6;
 		dst6->sin6_len = sizeof(*dst6);
 		dst6->sin6_addr = addr->v6;
 		in6_rtalloc_ign(&ro6, 0, rtableid);
 		rt = ro6.ro_rt;
 		break;
 #endif /* INET6 */
 	}
 
 	if (rt && rt->rt_ifp) {
 		mss = rt->rt_ifp->if_mtu - hlen - sizeof(struct tcphdr);
 		mss = max(V_tcp_mssdflt, mss);
 		RTFREE(rt);
 	}
 	mss = min(mss, offer);
 	mss = max(mss, 64);		/* sanity - at least max opt space */
 	return (mss);
 }
 
 static void
 pf_set_rt_ifp(struct pf_state *s, struct pf_addr *saddr)
 {
 	struct pf_rule *r = s->rule.ptr;
 	struct pf_src_node *sn = NULL;
 
 	s->rt_kif = NULL;
 	if (!r->rt || r->rt == PF_FASTROUTE)
 		return;
 	switch (s->key[PF_SK_WIRE]->af) {
 #ifdef INET
 	case AF_INET:
 		pf_map_addr(AF_INET, r, saddr, &s->rt_addr, NULL, &sn);
 		s->rt_kif = r->rpool.cur->kif;
 		break;
 #endif /* INET */
 #ifdef INET6
 	case AF_INET6:
 		pf_map_addr(AF_INET6, r, saddr, &s->rt_addr, NULL, &sn);
 		s->rt_kif = r->rpool.cur->kif;
 		break;
 #endif /* INET6 */
 	}
 }
 
 static u_int32_t
 pf_tcp_iss(struct pf_pdesc *pd)
 {
 	MD5_CTX ctx;
 	u_int32_t digest[4];
 
 	if (V_pf_tcp_secret_init == 0) {
 		read_random(&V_pf_tcp_secret, sizeof(V_pf_tcp_secret));
 		MD5Init(&V_pf_tcp_secret_ctx);
 		MD5Update(&V_pf_tcp_secret_ctx, V_pf_tcp_secret,
 		    sizeof(V_pf_tcp_secret));
 		V_pf_tcp_secret_init = 1;
 	}
 
 	ctx = V_pf_tcp_secret_ctx;
 
 	MD5Update(&ctx, (char *)&pd->hdr.tcp->th_sport, sizeof(u_short));
 	MD5Update(&ctx, (char *)&pd->hdr.tcp->th_dport, sizeof(u_short));
 	if (pd->af == AF_INET6) {
 		MD5Update(&ctx, (char *)&pd->src->v6, sizeof(struct in6_addr));
 		MD5Update(&ctx, (char *)&pd->dst->v6, sizeof(struct in6_addr));
 	} else {
 		MD5Update(&ctx, (char *)&pd->src->v4, sizeof(struct in_addr));
 		MD5Update(&ctx, (char *)&pd->dst->v4, sizeof(struct in_addr));
 	}
 	MD5Final((u_char *)digest, &ctx);
 	V_pf_tcp_iss_off += 4096;
 #define	ISN_RANDOM_INCREMENT (4096 - 1)
 	return (digest[0] + (arc4random() & ISN_RANDOM_INCREMENT) +
 	    V_pf_tcp_iss_off);
 #undef	ISN_RANDOM_INCREMENT
 }
 
 static int
 pf_test_rule(struct pf_rule **rm, struct pf_state **sm, int direction,
     struct pfi_kif *kif, struct mbuf *m, int off, struct pf_pdesc *pd,
     struct pf_rule **am, struct pf_ruleset **rsm, struct inpcb *inp)
 {
 	struct pf_rule		*nr = NULL;
 	struct pf_addr		* const saddr = pd->src;
 	struct pf_addr		* const daddr = pd->dst;
 	sa_family_t		 af = pd->af;
 	struct pf_rule		*r, *a = NULL;
 	struct pf_ruleset	*ruleset = NULL;
 	struct pf_src_node	*nsn = NULL;
 	struct tcphdr		*th = pd->hdr.tcp;
 	struct pf_state_key	*sk = NULL, *nk = NULL;
 	u_short			 reason;
 	int			 rewrite = 0, hdrlen = 0;
 	int			 tag = -1, rtableid = -1;
 	int			 asd = 0;
 	int			 match = 0;
 	int			 state_icmp = 0;
 	u_int16_t		 sport = 0, dport = 0;
 	u_int16_t		 bproto_sum = 0, bip_sum = 0;
 	u_int8_t		 icmptype = 0, icmpcode = 0;
 	struct pf_anchor_stackframe	anchor_stack[PF_ANCHOR_STACKSIZE];
 
 	PF_RULES_RASSERT();
 
 	if (inp != NULL) {
 		INP_LOCK_ASSERT(inp);
 		pd->lookup.uid = inp->inp_cred->cr_uid;
 		pd->lookup.gid = inp->inp_cred->cr_groups[0];
 		pd->lookup.done = 1;
 	}
 
 	switch (pd->proto) {
 	case IPPROTO_TCP:
 		sport = th->th_sport;
 		dport = th->th_dport;
 		hdrlen = sizeof(*th);
 		break;
 	case IPPROTO_UDP:
 		sport = pd->hdr.udp->uh_sport;
 		dport = pd->hdr.udp->uh_dport;
 		hdrlen = sizeof(*pd->hdr.udp);
 		break;
 #ifdef INET
 	case IPPROTO_ICMP:
 		if (pd->af != AF_INET)
 			break;
 		sport = dport = pd->hdr.icmp->icmp_id;
 		hdrlen = sizeof(*pd->hdr.icmp);
 		icmptype = pd->hdr.icmp->icmp_type;
 		icmpcode = pd->hdr.icmp->icmp_code;
 
 		if (icmptype == ICMP_UNREACH ||
 		    icmptype == ICMP_SOURCEQUENCH ||
 		    icmptype == ICMP_REDIRECT ||
 		    icmptype == ICMP_TIMXCEED ||
 		    icmptype == ICMP_PARAMPROB)
 			state_icmp++;
 		break;
 #endif /* INET */
 #ifdef INET6
 	case IPPROTO_ICMPV6:
 		if (af != AF_INET6)
 			break;
 		sport = dport = pd->hdr.icmp6->icmp6_id;
 		hdrlen = sizeof(*pd->hdr.icmp6);
 		icmptype = pd->hdr.icmp6->icmp6_type;
 		icmpcode = pd->hdr.icmp6->icmp6_code;
 
 		if (icmptype == ICMP6_DST_UNREACH ||
 		    icmptype == ICMP6_PACKET_TOO_BIG ||
 		    icmptype == ICMP6_TIME_EXCEEDED ||
 		    icmptype == ICMP6_PARAM_PROB)
 			state_icmp++;
 		break;
 #endif /* INET6 */
 	default:
 		sport = dport = hdrlen = 0;
 		break;
 	}
 
 	r = TAILQ_FIRST(pf_main_ruleset.rules[PF_RULESET_FILTER].active.ptr);
 
 	/* check packet for BINAT/NAT/RDR */
 	if ((nr = pf_get_translation(pd, m, off, direction, kif, &nsn, &sk,
 	    &nk, saddr, daddr, sport, dport, anchor_stack)) != NULL) {
 		KASSERT(sk != NULL, ("%s: null sk", __func__));
 		KASSERT(nk != NULL, ("%s: null nk", __func__));
 
 		if (pd->ip_sum)
 			bip_sum = *pd->ip_sum;
 
 		switch (pd->proto) {
 		case IPPROTO_TCP:
 			bproto_sum = th->th_sum;
 			pd->proto_sum = &th->th_sum;
 
 			if (PF_ANEQ(saddr, &nk->addr[pd->sidx], af) ||
 			    nk->port[pd->sidx] != sport) {
 				pf_change_ap(saddr, &th->th_sport, pd->ip_sum,
 				    &th->th_sum, &nk->addr[pd->sidx],
 				    nk->port[pd->sidx], 0, af);
 				pd->sport = &th->th_sport;
 				sport = th->th_sport;
 			}
 
 			if (PF_ANEQ(daddr, &nk->addr[pd->didx], af) ||
 			    nk->port[pd->didx] != dport) {
 				pf_change_ap(daddr, &th->th_dport, pd->ip_sum,
 				    &th->th_sum, &nk->addr[pd->didx],
 				    nk->port[pd->didx], 0, af);
 				dport = th->th_dport;
 				pd->dport = &th->th_dport;
 			}
 			rewrite++;
 			break;
 		case IPPROTO_UDP:
 			bproto_sum = pd->hdr.udp->uh_sum;
 			pd->proto_sum = &pd->hdr.udp->uh_sum;
 
 			if (PF_ANEQ(saddr, &nk->addr[pd->sidx], af) ||
 			    nk->port[pd->sidx] != sport) {
 				pf_change_ap(saddr, &pd->hdr.udp->uh_sport,
 				    pd->ip_sum, &pd->hdr.udp->uh_sum,
 				    &nk->addr[pd->sidx],
 				    nk->port[pd->sidx], 1, af);
 				sport = pd->hdr.udp->uh_sport;
 				pd->sport = &pd->hdr.udp->uh_sport;
 			}
 
 			if (PF_ANEQ(daddr, &nk->addr[pd->didx], af) ||
 			    nk->port[pd->didx] != dport) {
 				pf_change_ap(daddr, &pd->hdr.udp->uh_dport,
 				    pd->ip_sum, &pd->hdr.udp->uh_sum,
 				    &nk->addr[pd->didx],
 				    nk->port[pd->didx], 1, af);
 				dport = pd->hdr.udp->uh_dport;
 				pd->dport = &pd->hdr.udp->uh_dport;
 			}
 			rewrite++;
 			break;
 #ifdef INET
 		case IPPROTO_ICMP:
 			nk->port[0] = nk->port[1];
 			if (PF_ANEQ(saddr, &nk->addr[pd->sidx], AF_INET))
 				pf_change_a(&saddr->v4.s_addr, pd->ip_sum,
 				    nk->addr[pd->sidx].v4.s_addr, 0);
 
 			if (PF_ANEQ(daddr, &nk->addr[pd->didx], AF_INET))
 				pf_change_a(&daddr->v4.s_addr, pd->ip_sum,
 				    nk->addr[pd->didx].v4.s_addr, 0);
 
 			if (nk->port[1] != pd->hdr.icmp->icmp_id) {
 				pd->hdr.icmp->icmp_cksum = pf_cksum_fixup(
 				    pd->hdr.icmp->icmp_cksum, sport,
 				    nk->port[1], 0);
 				pd->hdr.icmp->icmp_id = nk->port[1];
 				pd->sport = &pd->hdr.icmp->icmp_id;
 			}
 			m_copyback(m, off, ICMP_MINLEN, (caddr_t)pd->hdr.icmp);
 			break;
 #endif /* INET */
 #ifdef INET6
 		case IPPROTO_ICMPV6:
 			nk->port[0] = nk->port[1];
 			if (PF_ANEQ(saddr, &nk->addr[pd->sidx], AF_INET6))
 				pf_change_a6(saddr, &pd->hdr.icmp6->icmp6_cksum,
 				    &nk->addr[pd->sidx], 0);
 
 			if (PF_ANEQ(daddr, &nk->addr[pd->didx], AF_INET6))
 				pf_change_a6(daddr, &pd->hdr.icmp6->icmp6_cksum,
 				    &nk->addr[pd->didx], 0);
 			rewrite++;
 			break;
 #endif /* INET */
 		default:
 			switch (af) {
 #ifdef INET
 			case AF_INET:
 				if (PF_ANEQ(saddr,
 				    &nk->addr[pd->sidx], AF_INET))
 					pf_change_a(&saddr->v4.s_addr,
 					    pd->ip_sum,
 					    nk->addr[pd->sidx].v4.s_addr, 0);
 
 				if (PF_ANEQ(daddr,
 				    &nk->addr[pd->didx], AF_INET))
 					pf_change_a(&daddr->v4.s_addr,
 					    pd->ip_sum,
 					    nk->addr[pd->didx].v4.s_addr, 0);
 				break;
 #endif /* INET */
 #ifdef INET6
 			case AF_INET6:
 				if (PF_ANEQ(saddr,
 				    &nk->addr[pd->sidx], AF_INET6))
 					PF_ACPY(saddr, &nk->addr[pd->sidx], af);
 
 				if (PF_ANEQ(daddr,
 				    &nk->addr[pd->didx], AF_INET6))
 					PF_ACPY(saddr, &nk->addr[pd->didx], af);
 				break;
 #endif /* INET */
 			}
 			break;
 		}
 		if (nr->natpass)
 			r = NULL;
 		pd->nat_rule = nr;
 	}
 
 	while (r != NULL) {
 		r->evaluations++;
 		if (pfi_kif_match(r->kif, kif) == r->ifnot)
 			r = r->skip[PF_SKIP_IFP].ptr;
 		else if (r->direction && r->direction != direction)
 			r = r->skip[PF_SKIP_DIR].ptr;
 		else if (r->af && r->af != af)
 			r = r->skip[PF_SKIP_AF].ptr;
 		else if (r->proto && r->proto != pd->proto)
 			r = r->skip[PF_SKIP_PROTO].ptr;
 		else if (PF_MISMATCHAW(&r->src.addr, saddr, af,
 		    r->src.neg, kif, M_GETFIB(m)))
 			r = r->skip[PF_SKIP_SRC_ADDR].ptr;
 		/* tcp/udp only. port_op always 0 in other cases */
 		else if (r->src.port_op && !pf_match_port(r->src.port_op,
 		    r->src.port[0], r->src.port[1], sport))
 			r = r->skip[PF_SKIP_SRC_PORT].ptr;
 		else if (PF_MISMATCHAW(&r->dst.addr, daddr, af,
 		    r->dst.neg, NULL, M_GETFIB(m)))
 			r = r->skip[PF_SKIP_DST_ADDR].ptr;
 		/* tcp/udp only. port_op always 0 in other cases */
 		else if (r->dst.port_op && !pf_match_port(r->dst.port_op,
 		    r->dst.port[0], r->dst.port[1], dport))
 			r = r->skip[PF_SKIP_DST_PORT].ptr;
 		/* icmp only. type always 0 in other cases */
 		else if (r->type && r->type != icmptype + 1)
 			r = TAILQ_NEXT(r, entries);
 		/* icmp only. type always 0 in other cases */
 		else if (r->code && r->code != icmpcode + 1)
 			r = TAILQ_NEXT(r, entries);
 		else if (r->tos && !(r->tos == pd->tos))
 			r = TAILQ_NEXT(r, entries);
 		else if (r->rule_flag & PFRULE_FRAGMENT)
 			r = TAILQ_NEXT(r, entries);
 		else if (pd->proto == IPPROTO_TCP &&
 		    (r->flagset & th->th_flags) != r->flags)
 			r = TAILQ_NEXT(r, entries);
 		/* tcp/udp only. uid.op always 0 in other cases */
 		else if (r->uid.op && (pd->lookup.done || (pd->lookup.done =
 		    pf_socket_lookup(direction, pd, m), 1)) &&
 		    !pf_match_uid(r->uid.op, r->uid.uid[0], r->uid.uid[1],
 		    pd->lookup.uid))
 			r = TAILQ_NEXT(r, entries);
 		/* tcp/udp only. gid.op always 0 in other cases */
 		else if (r->gid.op && (pd->lookup.done || (pd->lookup.done =
 		    pf_socket_lookup(direction, pd, m), 1)) &&
 		    !pf_match_gid(r->gid.op, r->gid.gid[0], r->gid.gid[1],
 		    pd->lookup.gid))
 			r = TAILQ_NEXT(r, entries);
 		else if (r->prob &&
 		    r->prob <= arc4random())
 			r = TAILQ_NEXT(r, entries);
 		else if (r->match_tag && !pf_match_tag(m, r, &tag,
 		    pd->pf_mtag ? pd->pf_mtag->tag : 0))
 			r = TAILQ_NEXT(r, entries);
 		else if (r->os_fingerprint != PF_OSFP_ANY &&
 		    (pd->proto != IPPROTO_TCP || !pf_osfp_match(
 		    pf_osfp_fingerprint(pd, m, off, th),
 		    r->os_fingerprint)))
 			r = TAILQ_NEXT(r, entries);
 		else {
 			if (r->tag)
 				tag = r->tag;
 			if (r->rtableid >= 0)
 				rtableid = r->rtableid;
 			if (r->anchor == NULL) {
 				match = 1;
 				*rm = r;
 				*am = a;
 				*rsm = ruleset;
 				if ((*rm)->quick)
 					break;
 				r = TAILQ_NEXT(r, entries);
 			} else
 				pf_step_into_anchor(anchor_stack, &asd,
 				    &ruleset, PF_RULESET_FILTER, &r, &a,
 				    &match);
 		}
 		if (r == NULL && pf_step_out_of_anchor(anchor_stack, &asd,
 		    &ruleset, PF_RULESET_FILTER, &r, &a, &match))
 			break;
 	}
 	r = *rm;
 	a = *am;
 	ruleset = *rsm;
 
 	REASON_SET(&reason, PFRES_MATCH);
 
 	if (r->log || (nr != NULL && nr->log)) {
 		if (rewrite)
 			m_copyback(m, off, hdrlen, pd->hdr.any);
 		PFLOG_PACKET(kif, m, af, direction, reason, r->log ? r : nr, a,
 		    ruleset, pd, 1);
 	}
 
 	if ((r->action == PF_DROP) &&
 	    ((r->rule_flag & PFRULE_RETURNRST) ||
 	    (r->rule_flag & PFRULE_RETURNICMP) ||
 	    (r->rule_flag & PFRULE_RETURN))) {
 		/* undo NAT changes, if they have taken place */
 		if (nr != NULL) {
 			PF_ACPY(saddr, &sk->addr[pd->sidx], af);
 			PF_ACPY(daddr, &sk->addr[pd->didx], af);
 			if (pd->sport)
 				*pd->sport = sk->port[pd->sidx];
 			if (pd->dport)
 				*pd->dport = sk->port[pd->didx];
 			if (pd->proto_sum)
 				*pd->proto_sum = bproto_sum;
 			if (pd->ip_sum)
 				*pd->ip_sum = bip_sum;
 			m_copyback(m, off, hdrlen, pd->hdr.any);
 		}
 		if (pd->proto == IPPROTO_TCP &&
 		    ((r->rule_flag & PFRULE_RETURNRST) ||
 		    (r->rule_flag & PFRULE_RETURN)) &&
 		    !(th->th_flags & TH_RST)) {
 			u_int32_t	 ack = ntohl(th->th_seq) + pd->p_len;
 			int		 len = 0;
 #ifdef INET
 			struct ip	*h4;
 #endif
 #ifdef INET6
 			struct ip6_hdr	*h6;
 #endif
 
 			switch (af) {
 #ifdef INET
 			case AF_INET:
 				h4 = mtod(m, struct ip *);
 				len = ntohs(h4->ip_len) - off;
 				break;
 #endif
 #ifdef INET6
 			case AF_INET6:
 				h6 = mtod(m, struct ip6_hdr *);
 				len = ntohs(h6->ip6_plen) - (off - sizeof(*h6));
 				break;
 #endif
 			}
 
 			if (pf_check_proto_cksum(m, off, len, IPPROTO_TCP, af))
 				REASON_SET(&reason, PFRES_PROTCKSUM);
 			else {
 				if (th->th_flags & TH_SYN)
 					ack++;
 				if (th->th_flags & TH_FIN)
 					ack++;
 				pf_send_tcp(m, r, af, pd->dst,
 				    pd->src, th->th_dport, th->th_sport,
 				    ntohl(th->th_ack), ack, TH_RST|TH_ACK, 0, 0,
 				    r->return_ttl, 1, 0, kif->pfik_ifp);
 			}
 		} else if (pd->proto != IPPROTO_ICMP && af == AF_INET &&
 		    r->return_icmp)
 			pf_send_icmp(m, r->return_icmp >> 8,
 			    r->return_icmp & 255, af, r);
 		else if (pd->proto != IPPROTO_ICMPV6 && af == AF_INET6 &&
 		    r->return_icmp6)
 			pf_send_icmp(m, r->return_icmp6 >> 8,
 			    r->return_icmp6 & 255, af, r);
 	}
 
 	if (r->action == PF_DROP)
 		goto cleanup;
 
 	if (tag > 0 && pf_tag_packet(m, pd, tag)) {
 		REASON_SET(&reason, PFRES_MEMORY);
 		goto cleanup;
 	}
 	if (rtableid >= 0)
 		M_SETFIB(m, rtableid);
 
 	if (!state_icmp && (r->keep_state || nr != NULL ||
 	    (pd->flags & PFDESC_TCP_NORM))) {
 		int action;
 		action = pf_create_state(r, nr, a, pd, nsn, nk, sk, m, off,
 		    sport, dport, &rewrite, kif, sm, tag, bproto_sum, bip_sum,
 		    hdrlen);
 		if (action != PF_PASS)
 			return (action);
 	} else {
 		if (sk != NULL)
 			uma_zfree(V_pf_state_key_z, sk);
 		if (nk != NULL)
 			uma_zfree(V_pf_state_key_z, nk);
 	}
 
 	/* copy back packet headers if we performed NAT operations */
 	if (rewrite)
 		m_copyback(m, off, hdrlen, pd->hdr.any);
 
 	if (*sm != NULL && !((*sm)->state_flags & PFSTATE_NOSYNC) &&
 	    direction == PF_OUT &&
 	    pfsync_defer_ptr != NULL && pfsync_defer_ptr(*sm, m))
 		/*
 		 * We want the state created, but we dont
 		 * want to send this in case a partner
 		 * firewall has to know about it to allow
 		 * replies through it.
 		 */
 		return (PF_DEFER);
 
 	return (PF_PASS);
 
 cleanup:
 	if (sk != NULL)
 		uma_zfree(V_pf_state_key_z, sk);
 	if (nk != NULL)
 		uma_zfree(V_pf_state_key_z, nk);
 	return (PF_DROP);
 }
 
 static int
 pf_create_state(struct pf_rule *r, struct pf_rule *nr, struct pf_rule *a,
     struct pf_pdesc *pd, struct pf_src_node *nsn, struct pf_state_key *nk,
     struct pf_state_key *sk, struct mbuf *m, int off, u_int16_t sport,
     u_int16_t dport, int *rewrite, struct pfi_kif *kif, struct pf_state **sm,
     int tag, u_int16_t bproto_sum, u_int16_t bip_sum, int hdrlen)
 {
 	struct pf_state		*s = NULL;
 	struct pf_src_node	*sn = NULL;
 	struct tcphdr		*th = pd->hdr.tcp;
 	u_int16_t		 mss = V_tcp_mssdflt;
 	u_short			 reason;
 
 	/* check maximums */
 	if (r->max_states &&
 	    (counter_u64_fetch(r->states_cur) >= r->max_states)) {
 		V_pf_status.lcounters[LCNT_STATES]++;
 		REASON_SET(&reason, PFRES_MAXSTATES);
 		return (PF_DROP);
 	}
 	/* src node for filter rule */
 	if ((r->rule_flag & PFRULE_SRCTRACK ||
 	    r->rpool.opts & PF_POOL_STICKYADDR) &&
 	    pf_insert_src_node(&sn, r, pd->src, pd->af) != 0) {
 		REASON_SET(&reason, PFRES_SRCLIMIT);
 		goto csfailed;
 	}
 	/* src node for translation rule */
 	if (nr != NULL && (nr->rpool.opts & PF_POOL_STICKYADDR) &&
 	    pf_insert_src_node(&nsn, nr, &sk->addr[pd->sidx], pd->af)) {
 		REASON_SET(&reason, PFRES_SRCLIMIT);
 		goto csfailed;
 	}
 	s = uma_zalloc(V_pf_state_z, M_NOWAIT | M_ZERO);
 	if (s == NULL) {
 		REASON_SET(&reason, PFRES_MEMORY);
 		goto csfailed;
 	}
 	s->rule.ptr = r;
 	s->nat_rule.ptr = nr;
 	s->anchor.ptr = a;
 	STATE_INC_COUNTERS(s);
 	if (r->allow_opts)
 		s->state_flags |= PFSTATE_ALLOWOPTS;
 	if (r->rule_flag & PFRULE_STATESLOPPY)
 		s->state_flags |= PFSTATE_SLOPPY;
 	s->log = r->log & PF_LOG_ALL;
 	s->sync_state = PFSYNC_S_NONE;
 	if (nr != NULL)
 		s->log |= nr->log & PF_LOG_ALL;
 	switch (pd->proto) {
 	case IPPROTO_TCP:
 		s->src.seqlo = ntohl(th->th_seq);
 		s->src.seqhi = s->src.seqlo + pd->p_len + 1;
 		if ((th->th_flags & (TH_SYN|TH_ACK)) == TH_SYN &&
 		    r->keep_state == PF_STATE_MODULATE) {
 			/* Generate sequence number modulator */
 			if ((s->src.seqdiff = pf_tcp_iss(pd) - s->src.seqlo) ==
 			    0)
 				s->src.seqdiff = 1;
 			pf_change_a(&th->th_seq, &th->th_sum,
 			    htonl(s->src.seqlo + s->src.seqdiff), 0);
 			*rewrite = 1;
 		} else
 			s->src.seqdiff = 0;
 		if (th->th_flags & TH_SYN) {
 			s->src.seqhi++;
 			s->src.wscale = pf_get_wscale(m, off,
 			    th->th_off, pd->af);
 		}
 		s->src.max_win = MAX(ntohs(th->th_win), 1);
 		if (s->src.wscale & PF_WSCALE_MASK) {
 			/* Remove scale factor from initial window */
 			int win = s->src.max_win;
 			win += 1 << (s->src.wscale & PF_WSCALE_MASK);
 			s->src.max_win = (win - 1) >>
 			    (s->src.wscale & PF_WSCALE_MASK);
 		}
 		if (th->th_flags & TH_FIN)
 			s->src.seqhi++;
 		s->dst.seqhi = 1;
 		s->dst.max_win = 1;
 		s->src.state = TCPS_SYN_SENT;
 		s->dst.state = TCPS_CLOSED;
 		s->timeout = PFTM_TCP_FIRST_PACKET;
 		break;
 	case IPPROTO_UDP:
 		s->src.state = PFUDPS_SINGLE;
 		s->dst.state = PFUDPS_NO_TRAFFIC;
 		s->timeout = PFTM_UDP_FIRST_PACKET;
 		break;
 	case IPPROTO_ICMP:
 #ifdef INET6
 	case IPPROTO_ICMPV6:
 #endif
 		s->timeout = PFTM_ICMP_FIRST_PACKET;
 		break;
 	default:
 		s->src.state = PFOTHERS_SINGLE;
 		s->dst.state = PFOTHERS_NO_TRAFFIC;
 		s->timeout = PFTM_OTHER_FIRST_PACKET;
 	}
 
 	s->creation = time_uptime;
 	s->expire = time_uptime;
 
 	if (sn != NULL) {
 		s->src_node = sn;
 		s->src_node->states++;
 	}
 	if (nsn != NULL) {
 		/* XXX We only modify one side for now. */
 		PF_ACPY(&nsn->raddr, &nk->addr[1], pd->af);
 		s->nat_src_node = nsn;
 		s->nat_src_node->states++;
 	}
 	if (pd->proto == IPPROTO_TCP) {
 		if ((pd->flags & PFDESC_TCP_NORM) && pf_normalize_tcp_init(m,
 		    off, pd, th, &s->src, &s->dst)) {
 			REASON_SET(&reason, PFRES_MEMORY);
 			pf_src_tree_remove_state(s);
 			STATE_DEC_COUNTERS(s);
 			uma_zfree(V_pf_state_z, s);
 			return (PF_DROP);
 		}
 		if ((pd->flags & PFDESC_TCP_NORM) && s->src.scrub &&
 		    pf_normalize_tcp_stateful(m, off, pd, &reason, th, s,
 		    &s->src, &s->dst, rewrite)) {
 			/* This really shouldn't happen!!! */
 			DPFPRINTF(PF_DEBUG_URGENT,
 			    ("pf_normalize_tcp_stateful failed on first pkt"));
 			pf_normalize_tcp_cleanup(s);
 			pf_src_tree_remove_state(s);
 			STATE_DEC_COUNTERS(s);
 			uma_zfree(V_pf_state_z, s);
 			return (PF_DROP);
 		}
 	}
 	s->direction = pd->dir;
 
 	/*
 	 * sk/nk could already been setup by pf_get_translation().
 	 */
 	if (nr == NULL) {
 		KASSERT((sk == NULL && nk == NULL), ("%s: nr %p sk %p, nk %p",
 		    __func__, nr, sk, nk));
 		sk = pf_state_key_setup(pd, pd->src, pd->dst, sport, dport);
 		if (sk == NULL)
 			goto csfailed;
 		nk = sk;
 	} else
 		KASSERT((sk != NULL && nk != NULL), ("%s: nr %p sk %p, nk %p",
 		    __func__, nr, sk, nk));
 
 	/* Swap sk/nk for PF_OUT. */
 	if (pf_state_insert(BOUND_IFACE(r, kif),
 	    (pd->dir == PF_IN) ? sk : nk,
 	    (pd->dir == PF_IN) ? nk : sk, s)) {
 		if (pd->proto == IPPROTO_TCP)
 			pf_normalize_tcp_cleanup(s);
 		REASON_SET(&reason, PFRES_STATEINS);
 		pf_src_tree_remove_state(s);
 		STATE_DEC_COUNTERS(s);
 		uma_zfree(V_pf_state_z, s);
 		return (PF_DROP);
 	} else
 		*sm = s;
 
 	pf_set_rt_ifp(s, pd->src);	/* needs s->state_key set */
 	if (tag > 0)
 		s->tag = tag;
 	if (pd->proto == IPPROTO_TCP && (th->th_flags & (TH_SYN|TH_ACK)) ==
 	    TH_SYN && r->keep_state == PF_STATE_SYNPROXY) {
 		s->src.state = PF_TCPS_PROXY_SRC;
 		/* undo NAT changes, if they have taken place */
 		if (nr != NULL) {
 			struct pf_state_key *skt = s->key[PF_SK_WIRE];
 			if (pd->dir == PF_OUT)
 				skt = s->key[PF_SK_STACK];
 			PF_ACPY(pd->src, &skt->addr[pd->sidx], pd->af);
 			PF_ACPY(pd->dst, &skt->addr[pd->didx], pd->af);
 			if (pd->sport)
 				*pd->sport = skt->port[pd->sidx];
 			if (pd->dport)
 				*pd->dport = skt->port[pd->didx];
 			if (pd->proto_sum)
 				*pd->proto_sum = bproto_sum;
 			if (pd->ip_sum)
 				*pd->ip_sum = bip_sum;
 			m_copyback(m, off, hdrlen, pd->hdr.any);
 		}
 		s->src.seqhi = htonl(arc4random());
 		/* Find mss option */
 		int rtid = M_GETFIB(m);
 		mss = pf_get_mss(m, off, th->th_off, pd->af);
 		mss = pf_calc_mss(pd->src, pd->af, rtid, mss);
 		mss = pf_calc_mss(pd->dst, pd->af, rtid, mss);
 		s->src.mss = mss;
 		pf_send_tcp(NULL, r, pd->af, pd->dst, pd->src, th->th_dport,
 		    th->th_sport, s->src.seqhi, ntohl(th->th_seq) + 1,
 		    TH_SYN|TH_ACK, 0, s->src.mss, 0, 1, 0, NULL);
 		REASON_SET(&reason, PFRES_SYNPROXY);
 		return (PF_SYNPROXY_DROP);
 	}
 
 	return (PF_PASS);
 
 csfailed:
 	if (sk != NULL)
 		uma_zfree(V_pf_state_key_z, sk);
 	if (nk != NULL)
 		uma_zfree(V_pf_state_key_z, nk);
 
 	if (sn != NULL && sn->states == 0 && sn->expire == 0) {
 		pf_unlink_src_node(sn);
 		pf_free_src_node(sn);
 	}
 
 	if (nsn != sn && nsn != NULL && nsn->states == 0 && nsn->expire == 0) {
 		pf_unlink_src_node(nsn);
 		pf_free_src_node(nsn);
 	}
 
 	return (PF_DROP);
 }
 
 static int
 pf_test_fragment(struct pf_rule **rm, int direction, struct pfi_kif *kif,
     struct mbuf *m, void *h, struct pf_pdesc *pd, struct pf_rule **am,
     struct pf_ruleset **rsm)
 {
 	struct pf_rule		*r, *a = NULL;
 	struct pf_ruleset	*ruleset = NULL;
 	sa_family_t		 af = pd->af;
 	u_short			 reason;
 	int			 tag = -1;
 	int			 asd = 0;
 	int			 match = 0;
 	struct pf_anchor_stackframe	anchor_stack[PF_ANCHOR_STACKSIZE];
 
 	PF_RULES_RASSERT();
 
 	r = TAILQ_FIRST(pf_main_ruleset.rules[PF_RULESET_FILTER].active.ptr);
 	while (r != NULL) {
 		r->evaluations++;
 		if (pfi_kif_match(r->kif, kif) == r->ifnot)
 			r = r->skip[PF_SKIP_IFP].ptr;
 		else if (r->direction && r->direction != direction)
 			r = r->skip[PF_SKIP_DIR].ptr;
 		else if (r->af && r->af != af)
 			r = r->skip[PF_SKIP_AF].ptr;
 		else if (r->proto && r->proto != pd->proto)
 			r = r->skip[PF_SKIP_PROTO].ptr;
 		else if (PF_MISMATCHAW(&r->src.addr, pd->src, af,
 		    r->src.neg, kif, M_GETFIB(m)))
 			r = r->skip[PF_SKIP_SRC_ADDR].ptr;
 		else if (PF_MISMATCHAW(&r->dst.addr, pd->dst, af,
 		    r->dst.neg, NULL, M_GETFIB(m)))
 			r = r->skip[PF_SKIP_DST_ADDR].ptr;
 		else if (r->tos && !(r->tos == pd->tos))
 			r = TAILQ_NEXT(r, entries);
 		else if (r->os_fingerprint != PF_OSFP_ANY)
 			r = TAILQ_NEXT(r, entries);
 		else if (pd->proto == IPPROTO_UDP &&
 		    (r->src.port_op || r->dst.port_op))
 			r = TAILQ_NEXT(r, entries);
 		else if (pd->proto == IPPROTO_TCP &&
 		    (r->src.port_op || r->dst.port_op || r->flagset))
 			r = TAILQ_NEXT(r, entries);
 		else if ((pd->proto == IPPROTO_ICMP ||
 		    pd->proto == IPPROTO_ICMPV6) &&
 		    (r->type || r->code))
 			r = TAILQ_NEXT(r, entries);
 		else if (r->prob && r->prob <=
 		    (arc4random() % (UINT_MAX - 1) + 1))
 			r = TAILQ_NEXT(r, entries);
 		else if (r->match_tag && !pf_match_tag(m, r, &tag,
 		    pd->pf_mtag ? pd->pf_mtag->tag : 0))
 			r = TAILQ_NEXT(r, entries);
 		else {
 			if (r->anchor == NULL) {
 				match = 1;
 				*rm = r;
 				*am = a;
 				*rsm = ruleset;
 				if ((*rm)->quick)
 					break;
 				r = TAILQ_NEXT(r, entries);
 			} else
 				pf_step_into_anchor(anchor_stack, &asd,
 				    &ruleset, PF_RULESET_FILTER, &r, &a,
 				    &match);
 		}
 		if (r == NULL && pf_step_out_of_anchor(anchor_stack, &asd,
 		    &ruleset, PF_RULESET_FILTER, &r, &a, &match))
 			break;
 	}
 	r = *rm;
 	a = *am;
 	ruleset = *rsm;
 
 	REASON_SET(&reason, PFRES_MATCH);
 
 	if (r->log)
 		PFLOG_PACKET(kif, m, af, direction, reason, r, a, ruleset, pd,
 		    1);
 
 	if (r->action != PF_PASS)
 		return (PF_DROP);
 
 	if (tag > 0 && pf_tag_packet(m, pd, tag)) {
 		REASON_SET(&reason, PFRES_MEMORY);
 		return (PF_DROP);
 	}
 
 	return (PF_PASS);
 }
 
 static int
 pf_tcp_track_full(struct pf_state_peer *src, struct pf_state_peer *dst,
 	struct pf_state **state, struct pfi_kif *kif, struct mbuf *m, int off,
 	struct pf_pdesc *pd, u_short *reason, int *copyback)
 {
 	struct tcphdr		*th = pd->hdr.tcp;
 	u_int16_t		 win = ntohs(th->th_win);
 	u_int32_t		 ack, end, seq, orig_seq;
 	u_int8_t		 sws, dws;
 	int			 ackskew;
 
 	if (src->wscale && dst->wscale && !(th->th_flags & TH_SYN)) {
 		sws = src->wscale & PF_WSCALE_MASK;
 		dws = dst->wscale & PF_WSCALE_MASK;
 	} else
 		sws = dws = 0;
 
 	/*
 	 * Sequence tracking algorithm from Guido van Rooij's paper:
 	 *   http://www.madison-gurkha.com/publications/tcp_filtering/
 	 *	tcp_filtering.ps
 	 */
 
 	orig_seq = seq = ntohl(th->th_seq);
 	if (src->seqlo == 0) {
 		/* First packet from this end. Set its state */
 
 		if ((pd->flags & PFDESC_TCP_NORM || dst->scrub) &&
 		    src->scrub == NULL) {
 			if (pf_normalize_tcp_init(m, off, pd, th, src, dst)) {
 				REASON_SET(reason, PFRES_MEMORY);
 				return (PF_DROP);
 			}
 		}
 
 		/* Deferred generation of sequence number modulator */
 		if (dst->seqdiff && !src->seqdiff) {
 			/* use random iss for the TCP server */
 			while ((src->seqdiff = arc4random() - seq) == 0)
 				;
 			ack = ntohl(th->th_ack) - dst->seqdiff;
 			pf_change_a(&th->th_seq, &th->th_sum, htonl(seq +
 			    src->seqdiff), 0);
 			pf_change_a(&th->th_ack, &th->th_sum, htonl(ack), 0);
 			*copyback = 1;
 		} else {
 			ack = ntohl(th->th_ack);
 		}
 
 		end = seq + pd->p_len;
 		if (th->th_flags & TH_SYN) {
 			end++;
 			if (dst->wscale & PF_WSCALE_FLAG) {
 				src->wscale = pf_get_wscale(m, off, th->th_off,
 				    pd->af);
 				if (src->wscale & PF_WSCALE_FLAG) {
 					/* Remove scale factor from initial
 					 * window */
 					sws = src->wscale & PF_WSCALE_MASK;
 					win = ((u_int32_t)win + (1 << sws) - 1)
 					    >> sws;
 					dws = dst->wscale & PF_WSCALE_MASK;
 				} else {
 					/* fixup other window */
 					dst->max_win <<= dst->wscale &
 					    PF_WSCALE_MASK;
 					/* in case of a retrans SYN|ACK */
 					dst->wscale = 0;
 				}
 			}
 		}
 		if (th->th_flags & TH_FIN)
 			end++;
 
 		src->seqlo = seq;
 		if (src->state < TCPS_SYN_SENT)
 			src->state = TCPS_SYN_SENT;
 
 		/*
 		 * May need to slide the window (seqhi may have been set by
 		 * the crappy stack check or if we picked up the connection
 		 * after establishment)
 		 */
 		if (src->seqhi == 1 ||
 		    SEQ_GEQ(end + MAX(1, dst->max_win << dws), src->seqhi))
 			src->seqhi = end + MAX(1, dst->max_win << dws);
 		if (win > src->max_win)
 			src->max_win = win;
 
 	} else {
 		ack = ntohl(th->th_ack) - dst->seqdiff;
 		if (src->seqdiff) {
 			/* Modulate sequence numbers */
 			pf_change_a(&th->th_seq, &th->th_sum, htonl(seq +
 			    src->seqdiff), 0);
 			pf_change_a(&th->th_ack, &th->th_sum, htonl(ack), 0);
 			*copyback = 1;
 		}
 		end = seq + pd->p_len;
 		if (th->th_flags & TH_SYN)
 			end++;
 		if (th->th_flags & TH_FIN)
 			end++;
 	}
 
 	if ((th->th_flags & TH_ACK) == 0) {
 		/* Let it pass through the ack skew check */
 		ack = dst->seqlo;
 	} else if ((ack == 0 &&
 	    (th->th_flags & (TH_ACK|TH_RST)) == (TH_ACK|TH_RST)) ||
 	    /* broken tcp stacks do not set ack */
 	    (dst->state < TCPS_SYN_SENT)) {
 		/*
 		 * Many stacks (ours included) will set the ACK number in an
 		 * FIN|ACK if the SYN times out -- no sequence to ACK.
 		 */
 		ack = dst->seqlo;
 	}
 
 	if (seq == end) {
 		/* Ease sequencing restrictions on no data packets */
 		seq = src->seqlo;
 		end = seq;
 	}
 
 	ackskew = dst->seqlo - ack;
 
 
 	/*
 	 * Need to demodulate the sequence numbers in any TCP SACK options
 	 * (Selective ACK). We could optionally validate the SACK values
 	 * against the current ACK window, either forwards or backwards, but
 	 * I'm not confident that SACK has been implemented properly
 	 * everywhere. It wouldn't surprise me if several stacks accidently
 	 * SACK too far backwards of previously ACKed data. There really aren't
 	 * any security implications of bad SACKing unless the target stack
 	 * doesn't validate the option length correctly. Someone trying to
 	 * spoof into a TCP connection won't bother blindly sending SACK
 	 * options anyway.
 	 */
 	if (dst->seqdiff && (th->th_off << 2) > sizeof(struct tcphdr)) {
 		if (pf_modulate_sack(m, off, pd, th, dst))
 			*copyback = 1;
 	}
 
 
 #define	MAXACKWINDOW (0xffff + 1500)	/* 1500 is an arbitrary fudge factor */
 	if (SEQ_GEQ(src->seqhi, end) &&
 	    /* Last octet inside other's window space */
 	    SEQ_GEQ(seq, src->seqlo - (dst->max_win << dws)) &&
 	    /* Retrans: not more than one window back */
 	    (ackskew >= -MAXACKWINDOW) &&
 	    /* Acking not more than one reassembled fragment backwards */
 	    (ackskew <= (MAXACKWINDOW << sws)) &&
 	    /* Acking not more than one window forward */
 	    ((th->th_flags & TH_RST) == 0 || orig_seq == src->seqlo ||
 	    (orig_seq == src->seqlo + 1) || (orig_seq + 1 == src->seqlo) ||
 	    (pd->flags & PFDESC_IP_REAS) == 0)) {
 	    /* Require an exact/+1 sequence match on resets when possible */
 
 		if (dst->scrub || src->scrub) {
 			if (pf_normalize_tcp_stateful(m, off, pd, reason, th,
 			    *state, src, dst, copyback))
 				return (PF_DROP);
 		}
 
 		/* update max window */
 		if (src->max_win < win)
 			src->max_win = win;
 		/* synchronize sequencing */
 		if (SEQ_GT(end, src->seqlo))
 			src->seqlo = end;
 		/* slide the window of what the other end can send */
 		if (SEQ_GEQ(ack + (win << sws), dst->seqhi))
 			dst->seqhi = ack + MAX((win << sws), 1);
 
 
 		/* update states */
 		if (th->th_flags & TH_SYN)
 			if (src->state < TCPS_SYN_SENT)
 				src->state = TCPS_SYN_SENT;
 		if (th->th_flags & TH_FIN)
 			if (src->state < TCPS_CLOSING)
 				src->state = TCPS_CLOSING;
 		if (th->th_flags & TH_ACK) {
 			if (dst->state == TCPS_SYN_SENT) {
 				dst->state = TCPS_ESTABLISHED;
 				if (src->state == TCPS_ESTABLISHED &&
 				    (*state)->src_node != NULL &&
 				    pf_src_connlimit(state)) {
 					REASON_SET(reason, PFRES_SRCLIMIT);
 					return (PF_DROP);
 				}
 			} else if (dst->state == TCPS_CLOSING)
 				dst->state = TCPS_FIN_WAIT_2;
 		}
 		if (th->th_flags & TH_RST)
 			src->state = dst->state = TCPS_TIME_WAIT;
 
 		/* update expire time */
 		(*state)->expire = time_uptime;
 		if (src->state >= TCPS_FIN_WAIT_2 &&
 		    dst->state >= TCPS_FIN_WAIT_2)
 			(*state)->timeout = PFTM_TCP_CLOSED;
 		else if (src->state >= TCPS_CLOSING &&
 		    dst->state >= TCPS_CLOSING)
 			(*state)->timeout = PFTM_TCP_FIN_WAIT;
 		else if (src->state < TCPS_ESTABLISHED ||
 		    dst->state < TCPS_ESTABLISHED)
 			(*state)->timeout = PFTM_TCP_OPENING;
 		else if (src->state >= TCPS_CLOSING ||
 		    dst->state >= TCPS_CLOSING)
 			(*state)->timeout = PFTM_TCP_CLOSING;
 		else
 			(*state)->timeout = PFTM_TCP_ESTABLISHED;
 
 		/* Fall through to PASS packet */
 
 	} else if ((dst->state < TCPS_SYN_SENT ||
 		dst->state >= TCPS_FIN_WAIT_2 ||
 		src->state >= TCPS_FIN_WAIT_2) &&
 	    SEQ_GEQ(src->seqhi + MAXACKWINDOW, end) &&
 	    /* Within a window forward of the originating packet */
 	    SEQ_GEQ(seq, src->seqlo - MAXACKWINDOW)) {
 	    /* Within a window backward of the originating packet */
 
 		/*
 		 * This currently handles three situations:
 		 *  1) Stupid stacks will shotgun SYNs before their peer
 		 *     replies.
 		 *  2) When PF catches an already established stream (the
 		 *     firewall rebooted, the state table was flushed, routes
 		 *     changed...)
 		 *  3) Packets get funky immediately after the connection
 		 *     closes (this should catch Solaris spurious ACK|FINs
 		 *     that web servers like to spew after a close)
 		 *
 		 * This must be a little more careful than the above code
 		 * since packet floods will also be caught here. We don't
 		 * update the TTL here to mitigate the damage of a packet
 		 * flood and so the same code can handle awkward establishment
 		 * and a loosened connection close.
 		 * In the establishment case, a correct peer response will
 		 * validate the connection, go through the normal state code
 		 * and keep updating the state TTL.
 		 */
 
 		if (V_pf_status.debug >= PF_DEBUG_MISC) {
 			printf("pf: loose state match: ");
 			pf_print_state(*state);
 			pf_print_flags(th->th_flags);
 			printf(" seq=%u (%u) ack=%u len=%u ackskew=%d "
 			    "pkts=%llu:%llu dir=%s,%s\n", seq, orig_seq, ack,
 			    pd->p_len, ackskew, (unsigned long long)(*state)->packets[0],
 			    (unsigned long long)(*state)->packets[1],
 			    pd->dir == PF_IN ? "in" : "out",
 			    pd->dir == (*state)->direction ? "fwd" : "rev");
 		}
 
 		if (dst->scrub || src->scrub) {
 			if (pf_normalize_tcp_stateful(m, off, pd, reason, th,
 			    *state, src, dst, copyback))
 				return (PF_DROP);
 		}
 
 		/* update max window */
 		if (src->max_win < win)
 			src->max_win = win;
 		/* synchronize sequencing */
 		if (SEQ_GT(end, src->seqlo))
 			src->seqlo = end;
 		/* slide the window of what the other end can send */
 		if (SEQ_GEQ(ack + (win << sws), dst->seqhi))
 			dst->seqhi = ack + MAX((win << sws), 1);
 
 		/*
 		 * Cannot set dst->seqhi here since this could be a shotgunned
 		 * SYN and not an already established connection.
 		 */
 
 		if (th->th_flags & TH_FIN)
 			if (src->state < TCPS_CLOSING)
 				src->state = TCPS_CLOSING;
 		if (th->th_flags & TH_RST)
 			src->state = dst->state = TCPS_TIME_WAIT;
 
 		/* Fall through to PASS packet */
 
 	} else {
 		if ((*state)->dst.state == TCPS_SYN_SENT &&
 		    (*state)->src.state == TCPS_SYN_SENT) {
 			/* Send RST for state mismatches during handshake */
 			if (!(th->th_flags & TH_RST))
 				pf_send_tcp(NULL, (*state)->rule.ptr, pd->af,
 				    pd->dst, pd->src, th->th_dport,
 				    th->th_sport, ntohl(th->th_ack), 0,
 				    TH_RST, 0, 0,
 				    (*state)->rule.ptr->return_ttl, 1, 0,
 				    kif->pfik_ifp);
 			src->seqlo = 0;
 			src->seqhi = 1;
 			src->max_win = 1;
 		} else if (V_pf_status.debug >= PF_DEBUG_MISC) {
 			printf("pf: BAD state: ");
 			pf_print_state(*state);
 			pf_print_flags(th->th_flags);
 			printf(" seq=%u (%u) ack=%u len=%u ackskew=%d "
 			    "pkts=%llu:%llu dir=%s,%s\n",
 			    seq, orig_seq, ack, pd->p_len, ackskew,
 			    (unsigned long long)(*state)->packets[0],
 			    (unsigned long long)(*state)->packets[1],
 			    pd->dir == PF_IN ? "in" : "out",
 			    pd->dir == (*state)->direction ? "fwd" : "rev");
 			printf("pf: State failure on: %c %c %c %c | %c %c\n",
 			    SEQ_GEQ(src->seqhi, end) ? ' ' : '1',
 			    SEQ_GEQ(seq, src->seqlo - (dst->max_win << dws)) ?
 			    ' ': '2',
 			    (ackskew >= -MAXACKWINDOW) ? ' ' : '3',
 			    (ackskew <= (MAXACKWINDOW << sws)) ? ' ' : '4',
 			    SEQ_GEQ(src->seqhi + MAXACKWINDOW, end) ?' ' :'5',
 			    SEQ_GEQ(seq, src->seqlo - MAXACKWINDOW) ?' ' :'6');
 		}
 		REASON_SET(reason, PFRES_BADSTATE);
 		return (PF_DROP);
 	}
 
 	return (PF_PASS);
 }
 
 static int
 pf_tcp_track_sloppy(struct pf_state_peer *src, struct pf_state_peer *dst,
 	struct pf_state **state, struct pf_pdesc *pd, u_short *reason)
 {
 	struct tcphdr		*th = pd->hdr.tcp;
 
 	if (th->th_flags & TH_SYN)
 		if (src->state < TCPS_SYN_SENT)
 			src->state = TCPS_SYN_SENT;
 	if (th->th_flags & TH_FIN)
 		if (src->state < TCPS_CLOSING)
 			src->state = TCPS_CLOSING;
 	if (th->th_flags & TH_ACK) {
 		if (dst->state == TCPS_SYN_SENT) {
 			dst->state = TCPS_ESTABLISHED;
 			if (src->state == TCPS_ESTABLISHED &&
 			    (*state)->src_node != NULL &&
 			    pf_src_connlimit(state)) {
 				REASON_SET(reason, PFRES_SRCLIMIT);
 				return (PF_DROP);
 			}
 		} else if (dst->state == TCPS_CLOSING) {
 			dst->state = TCPS_FIN_WAIT_2;
 		} else if (src->state == TCPS_SYN_SENT &&
 		    dst->state < TCPS_SYN_SENT) {
 			/*
 			 * Handle a special sloppy case where we only see one
 			 * half of the connection. If there is a ACK after
 			 * the initial SYN without ever seeing a packet from
 			 * the destination, set the connection to established.
 			 */
 			dst->state = src->state = TCPS_ESTABLISHED;
 			if ((*state)->src_node != NULL &&
 			    pf_src_connlimit(state)) {
 				REASON_SET(reason, PFRES_SRCLIMIT);
 				return (PF_DROP);
 			}
 		} else if (src->state == TCPS_CLOSING &&
 		    dst->state == TCPS_ESTABLISHED &&
 		    dst->seqlo == 0) {
 			/*
 			 * Handle the closing of half connections where we
 			 * don't see the full bidirectional FIN/ACK+ACK
 			 * handshake.
 			 */
 			dst->state = TCPS_CLOSING;
 		}
 	}
 	if (th->th_flags & TH_RST)
 		src->state = dst->state = TCPS_TIME_WAIT;
 
 	/* update expire time */
 	(*state)->expire = time_uptime;
 	if (src->state >= TCPS_FIN_WAIT_2 &&
 	    dst->state >= TCPS_FIN_WAIT_2)
 		(*state)->timeout = PFTM_TCP_CLOSED;
 	else if (src->state >= TCPS_CLOSING &&
 	    dst->state >= TCPS_CLOSING)
 		(*state)->timeout = PFTM_TCP_FIN_WAIT;
 	else if (src->state < TCPS_ESTABLISHED ||
 	    dst->state < TCPS_ESTABLISHED)
 		(*state)->timeout = PFTM_TCP_OPENING;
 	else if (src->state >= TCPS_CLOSING ||
 	    dst->state >= TCPS_CLOSING)
 		(*state)->timeout = PFTM_TCP_CLOSING;
 	else
 		(*state)->timeout = PFTM_TCP_ESTABLISHED;
 
 	return (PF_PASS);
 }
 
 static int
 pf_test_state_tcp(struct pf_state **state, int direction, struct pfi_kif *kif,
     struct mbuf *m, int off, void *h, struct pf_pdesc *pd,
     u_short *reason)
 {
 	struct pf_state_key_cmp	 key;
 	struct tcphdr		*th = pd->hdr.tcp;
 	int			 copyback = 0;
 	struct pf_state_peer	*src, *dst;
 	struct pf_state_key	*sk;
 
 	bzero(&key, sizeof(key));
 	key.af = pd->af;
 	key.proto = IPPROTO_TCP;
 	if (direction == PF_IN)	{	/* wire side, straight */
 		PF_ACPY(&key.addr[0], pd->src, key.af);
 		PF_ACPY(&key.addr[1], pd->dst, key.af);
 		key.port[0] = th->th_sport;
 		key.port[1] = th->th_dport;
 	} else {			/* stack side, reverse */
 		PF_ACPY(&key.addr[1], pd->src, key.af);
 		PF_ACPY(&key.addr[0], pd->dst, key.af);
 		key.port[1] = th->th_sport;
 		key.port[0] = th->th_dport;
 	}
 
 	STATE_LOOKUP(kif, &key, direction, *state, pd);
 
 	if (direction == (*state)->direction) {
 		src = &(*state)->src;
 		dst = &(*state)->dst;
 	} else {
 		src = &(*state)->dst;
 		dst = &(*state)->src;
 	}
 
 	sk = (*state)->key[pd->didx];
 
 	if ((*state)->src.state == PF_TCPS_PROXY_SRC) {
 		if (direction != (*state)->direction) {
 			REASON_SET(reason, PFRES_SYNPROXY);
 			return (PF_SYNPROXY_DROP);
 		}
 		if (th->th_flags & TH_SYN) {
 			if (ntohl(th->th_seq) != (*state)->src.seqlo) {
 				REASON_SET(reason, PFRES_SYNPROXY);
 				return (PF_DROP);
 			}
 			pf_send_tcp(NULL, (*state)->rule.ptr, pd->af, pd->dst,
 			    pd->src, th->th_dport, th->th_sport,
 			    (*state)->src.seqhi, ntohl(th->th_seq) + 1,
 			    TH_SYN|TH_ACK, 0, (*state)->src.mss, 0, 1, 0, NULL);
 			REASON_SET(reason, PFRES_SYNPROXY);
 			return (PF_SYNPROXY_DROP);
 		} else if (!(th->th_flags & TH_ACK) ||
 		    (ntohl(th->th_ack) != (*state)->src.seqhi + 1) ||
 		    (ntohl(th->th_seq) != (*state)->src.seqlo + 1)) {
 			REASON_SET(reason, PFRES_SYNPROXY);
 			return (PF_DROP);
 		} else if ((*state)->src_node != NULL &&
 		    pf_src_connlimit(state)) {
 			REASON_SET(reason, PFRES_SRCLIMIT);
 			return (PF_DROP);
 		} else
 			(*state)->src.state = PF_TCPS_PROXY_DST;
 	}
 	if ((*state)->src.state == PF_TCPS_PROXY_DST) {
 		if (direction == (*state)->direction) {
 			if (((th->th_flags & (TH_SYN|TH_ACK)) != TH_ACK) ||
 			    (ntohl(th->th_ack) != (*state)->src.seqhi + 1) ||
 			    (ntohl(th->th_seq) != (*state)->src.seqlo + 1)) {
 				REASON_SET(reason, PFRES_SYNPROXY);
 				return (PF_DROP);
 			}
 			(*state)->src.max_win = MAX(ntohs(th->th_win), 1);
 			if ((*state)->dst.seqhi == 1)
 				(*state)->dst.seqhi = htonl(arc4random());
 			pf_send_tcp(NULL, (*state)->rule.ptr, pd->af,
 			    &sk->addr[pd->sidx], &sk->addr[pd->didx],
 			    sk->port[pd->sidx], sk->port[pd->didx],
 			    (*state)->dst.seqhi, 0, TH_SYN, 0,
 			    (*state)->src.mss, 0, 0, (*state)->tag, NULL);
 			REASON_SET(reason, PFRES_SYNPROXY);
 			return (PF_SYNPROXY_DROP);
 		} else if (((th->th_flags & (TH_SYN|TH_ACK)) !=
 		    (TH_SYN|TH_ACK)) ||
 		    (ntohl(th->th_ack) != (*state)->dst.seqhi + 1)) {
 			REASON_SET(reason, PFRES_SYNPROXY);
 			return (PF_DROP);
 		} else {
 			(*state)->dst.max_win = MAX(ntohs(th->th_win), 1);
 			(*state)->dst.seqlo = ntohl(th->th_seq);
 			pf_send_tcp(NULL, (*state)->rule.ptr, pd->af, pd->dst,
 			    pd->src, th->th_dport, th->th_sport,
 			    ntohl(th->th_ack), ntohl(th->th_seq) + 1,
 			    TH_ACK, (*state)->src.max_win, 0, 0, 0,
 			    (*state)->tag, NULL);
 			pf_send_tcp(NULL, (*state)->rule.ptr, pd->af,
 			    &sk->addr[pd->sidx], &sk->addr[pd->didx],
 			    sk->port[pd->sidx], sk->port[pd->didx],
 			    (*state)->src.seqhi + 1, (*state)->src.seqlo + 1,
 			    TH_ACK, (*state)->dst.max_win, 0, 0, 1, 0, NULL);
 			(*state)->src.seqdiff = (*state)->dst.seqhi -
 			    (*state)->src.seqlo;
 			(*state)->dst.seqdiff = (*state)->src.seqhi -
 			    (*state)->dst.seqlo;
 			(*state)->src.seqhi = (*state)->src.seqlo +
 			    (*state)->dst.max_win;
 			(*state)->dst.seqhi = (*state)->dst.seqlo +
 			    (*state)->src.max_win;
 			(*state)->src.wscale = (*state)->dst.wscale = 0;
 			(*state)->src.state = (*state)->dst.state =
 			    TCPS_ESTABLISHED;
 			REASON_SET(reason, PFRES_SYNPROXY);
 			return (PF_SYNPROXY_DROP);
 		}
 	}
 
 	if (((th->th_flags & (TH_SYN|TH_ACK)) == TH_SYN) &&
 	    dst->state >= TCPS_FIN_WAIT_2 &&
 	    src->state >= TCPS_FIN_WAIT_2) {
 		if (V_pf_status.debug >= PF_DEBUG_MISC) {
 			printf("pf: state reuse ");
 			pf_print_state(*state);
 			pf_print_flags(th->th_flags);
 			printf("\n");
 		}
 		/* XXX make sure it's the same direction ?? */
 		(*state)->src.state = (*state)->dst.state = TCPS_CLOSED;
 		pf_unlink_state(*state, PF_ENTER_LOCKED);
 		*state = NULL;
 		return (PF_DROP);
 	}
 
 	if ((*state)->state_flags & PFSTATE_SLOPPY) {
 		if (pf_tcp_track_sloppy(src, dst, state, pd, reason) == PF_DROP)
 			return (PF_DROP);
 	} else {
 		if (pf_tcp_track_full(src, dst, state, kif, m, off, pd, reason,
 		    &copyback) == PF_DROP)
 			return (PF_DROP);
 	}
 
 	/* translate source/destination address, if necessary */
 	if ((*state)->key[PF_SK_WIRE] != (*state)->key[PF_SK_STACK]) {
 		struct pf_state_key *nk = (*state)->key[pd->didx];
 
 		if (PF_ANEQ(pd->src, &nk->addr[pd->sidx], pd->af) ||
 		    nk->port[pd->sidx] != th->th_sport)
 			pf_change_ap(pd->src, &th->th_sport, pd->ip_sum,
 			    &th->th_sum, &nk->addr[pd->sidx],
 			    nk->port[pd->sidx], 0, pd->af);
 
 		if (PF_ANEQ(pd->dst, &nk->addr[pd->didx], pd->af) ||
 		    nk->port[pd->didx] != th->th_dport)
 			pf_change_ap(pd->dst, &th->th_dport, pd->ip_sum,
 			    &th->th_sum, &nk->addr[pd->didx],
 			    nk->port[pd->didx], 0, pd->af);
 		copyback = 1;
 	}
 
 	/* Copyback sequence modulation or stateful scrub changes if needed */
 	if (copyback)
 		m_copyback(m, off, sizeof(*th), (caddr_t)th);
 
 	return (PF_PASS);
 }
 
 static int
 pf_test_state_udp(struct pf_state **state, int direction, struct pfi_kif *kif,
     struct mbuf *m, int off, void *h, struct pf_pdesc *pd)
 {
 	struct pf_state_peer	*src, *dst;
 	struct pf_state_key_cmp	 key;
 	struct udphdr		*uh = pd->hdr.udp;
 
 	bzero(&key, sizeof(key));
 	key.af = pd->af;
 	key.proto = IPPROTO_UDP;
 	if (direction == PF_IN)	{	/* wire side, straight */
 		PF_ACPY(&key.addr[0], pd->src, key.af);
 		PF_ACPY(&key.addr[1], pd->dst, key.af);
 		key.port[0] = uh->uh_sport;
 		key.port[1] = uh->uh_dport;
 	} else {			/* stack side, reverse */
 		PF_ACPY(&key.addr[1], pd->src, key.af);
 		PF_ACPY(&key.addr[0], pd->dst, key.af);
 		key.port[1] = uh->uh_sport;
 		key.port[0] = uh->uh_dport;
 	}
 
 	STATE_LOOKUP(kif, &key, direction, *state, pd);
 
 	if (direction == (*state)->direction) {
 		src = &(*state)->src;
 		dst = &(*state)->dst;
 	} else {
 		src = &(*state)->dst;
 		dst = &(*state)->src;
 	}
 
 	/* update states */
 	if (src->state < PFUDPS_SINGLE)
 		src->state = PFUDPS_SINGLE;
 	if (dst->state == PFUDPS_SINGLE)
 		dst->state = PFUDPS_MULTIPLE;
 
 	/* update expire time */
 	(*state)->expire = time_uptime;
 	if (src->state == PFUDPS_MULTIPLE && dst->state == PFUDPS_MULTIPLE)
 		(*state)->timeout = PFTM_UDP_MULTIPLE;
 	else
 		(*state)->timeout = PFTM_UDP_SINGLE;
 
 	/* translate source/destination address, if necessary */
 	if ((*state)->key[PF_SK_WIRE] != (*state)->key[PF_SK_STACK]) {
 		struct pf_state_key *nk = (*state)->key[pd->didx];
 
 		if (PF_ANEQ(pd->src, &nk->addr[pd->sidx], pd->af) ||
 		    nk->port[pd->sidx] != uh->uh_sport)
 			pf_change_ap(pd->src, &uh->uh_sport, pd->ip_sum,
 			    &uh->uh_sum, &nk->addr[pd->sidx],
 			    nk->port[pd->sidx], 1, pd->af);
 
 		if (PF_ANEQ(pd->dst, &nk->addr[pd->didx], pd->af) ||
 		    nk->port[pd->didx] != uh->uh_dport)
 			pf_change_ap(pd->dst, &uh->uh_dport, pd->ip_sum,
 			    &uh->uh_sum, &nk->addr[pd->didx],
 			    nk->port[pd->didx], 1, pd->af);
 		m_copyback(m, off, sizeof(*uh), (caddr_t)uh);
 	}
 
 	return (PF_PASS);
 }
 
 static int
 pf_test_state_icmp(struct pf_state **state, int direction, struct pfi_kif *kif,
     struct mbuf *m, int off, void *h, struct pf_pdesc *pd, u_short *reason)
 {
 	struct pf_addr  *saddr = pd->src, *daddr = pd->dst;
 	u_int16_t	 icmpid = 0, *icmpsum;
 	u_int8_t	 icmptype;
 	int		 state_icmp = 0;
 	struct pf_state_key_cmp key;
 
 	bzero(&key, sizeof(key));
 	switch (pd->proto) {
 #ifdef INET
 	case IPPROTO_ICMP:
 		icmptype = pd->hdr.icmp->icmp_type;
 		icmpid = pd->hdr.icmp->icmp_id;
 		icmpsum = &pd->hdr.icmp->icmp_cksum;
 
 		if (icmptype == ICMP_UNREACH ||
 		    icmptype == ICMP_SOURCEQUENCH ||
 		    icmptype == ICMP_REDIRECT ||
 		    icmptype == ICMP_TIMXCEED ||
 		    icmptype == ICMP_PARAMPROB)
 			state_icmp++;
 		break;
 #endif /* INET */
 #ifdef INET6
 	case IPPROTO_ICMPV6:
 		icmptype = pd->hdr.icmp6->icmp6_type;
 		icmpid = pd->hdr.icmp6->icmp6_id;
 		icmpsum = &pd->hdr.icmp6->icmp6_cksum;
 
 		if (icmptype == ICMP6_DST_UNREACH ||
 		    icmptype == ICMP6_PACKET_TOO_BIG ||
 		    icmptype == ICMP6_TIME_EXCEEDED ||
 		    icmptype == ICMP6_PARAM_PROB)
 			state_icmp++;
 		break;
 #endif /* INET6 */
 	}
 
 	if (!state_icmp) {
 
 		/*
 		 * ICMP query/reply message not related to a TCP/UDP packet.
 		 * Search for an ICMP state.
 		 */
 		key.af = pd->af;
 		key.proto = pd->proto;
 		key.port[0] = key.port[1] = icmpid;
 		if (direction == PF_IN)	{	/* wire side, straight */
 			PF_ACPY(&key.addr[0], pd->src, key.af);
 			PF_ACPY(&key.addr[1], pd->dst, key.af);
 		} else {			/* stack side, reverse */
 			PF_ACPY(&key.addr[1], pd->src, key.af);
 			PF_ACPY(&key.addr[0], pd->dst, key.af);
 		}
 
 		STATE_LOOKUP(kif, &key, direction, *state, pd);
 
 		(*state)->expire = time_uptime;
 		(*state)->timeout = PFTM_ICMP_ERROR_REPLY;
 
 		/* translate source/destination address, if necessary */
 		if ((*state)->key[PF_SK_WIRE] != (*state)->key[PF_SK_STACK]) {
 			struct pf_state_key *nk = (*state)->key[pd->didx];
 
 			switch (pd->af) {
 #ifdef INET
 			case AF_INET:
 				if (PF_ANEQ(pd->src,
 				    &nk->addr[pd->sidx], AF_INET))
 					pf_change_a(&saddr->v4.s_addr,
 					    pd->ip_sum,
 					    nk->addr[pd->sidx].v4.s_addr, 0);
 
 				if (PF_ANEQ(pd->dst, &nk->addr[pd->didx],
 				    AF_INET))
 					pf_change_a(&daddr->v4.s_addr,
 					    pd->ip_sum,
 					    nk->addr[pd->didx].v4.s_addr, 0);
 
 				if (nk->port[0] !=
 				    pd->hdr.icmp->icmp_id) {
 					pd->hdr.icmp->icmp_cksum =
 					    pf_cksum_fixup(
 					    pd->hdr.icmp->icmp_cksum, icmpid,
 					    nk->port[pd->sidx], 0);
 					pd->hdr.icmp->icmp_id =
 					    nk->port[pd->sidx];
 				}
 
 				m_copyback(m, off, ICMP_MINLEN,
 				    (caddr_t )pd->hdr.icmp);
 				break;
 #endif /* INET */
 #ifdef INET6
 			case AF_INET6:
 				if (PF_ANEQ(pd->src,
 				    &nk->addr[pd->sidx], AF_INET6))
 					pf_change_a6(saddr,
 					    &pd->hdr.icmp6->icmp6_cksum,
 					    &nk->addr[pd->sidx], 0);
 
 				if (PF_ANEQ(pd->dst,
 				    &nk->addr[pd->didx], AF_INET6))
 					pf_change_a6(daddr,
 					    &pd->hdr.icmp6->icmp6_cksum,
 					    &nk->addr[pd->didx], 0);
 
 				m_copyback(m, off, sizeof(struct icmp6_hdr),
 				    (caddr_t )pd->hdr.icmp6);
 				break;
 #endif /* INET6 */
 			}
 		}
 		return (PF_PASS);
 
 	} else {
 		/*
 		 * ICMP error message in response to a TCP/UDP packet.
 		 * Extract the inner TCP/UDP header and search for that state.
 		 */
 
 		struct pf_pdesc	pd2;
 		bzero(&pd2, sizeof pd2);
 #ifdef INET
 		struct ip	h2;
 #endif /* INET */
 #ifdef INET6
 		struct ip6_hdr	h2_6;
 		int		terminal = 0;
 #endif /* INET6 */
 		int		ipoff2 = 0;
 		int		off2 = 0;
 
 		pd2.af = pd->af;
 		/* Payload packet is from the opposite direction. */
 		pd2.sidx = (direction == PF_IN) ? 1 : 0;
 		pd2.didx = (direction == PF_IN) ? 0 : 1;
 		switch (pd->af) {
 #ifdef INET
 		case AF_INET:
 			/* offset of h2 in mbuf chain */
 			ipoff2 = off + ICMP_MINLEN;
 
 			if (!pf_pull_hdr(m, ipoff2, &h2, sizeof(h2),
 			    NULL, reason, pd2.af)) {
 				DPFPRINTF(PF_DEBUG_MISC,
 				    ("pf: ICMP error message too short "
 				    "(ip)\n"));
 				return (PF_DROP);
 			}
 			/*
 			 * ICMP error messages don't refer to non-first
 			 * fragments
 			 */
 			if (h2.ip_off & htons(IP_OFFMASK)) {
 				REASON_SET(reason, PFRES_FRAG);
 				return (PF_DROP);
 			}
 
 			/* offset of protocol header that follows h2 */
 			off2 = ipoff2 + (h2.ip_hl << 2);
 
 			pd2.proto = h2.ip_p;
 			pd2.src = (struct pf_addr *)&h2.ip_src;
 			pd2.dst = (struct pf_addr *)&h2.ip_dst;
 			pd2.ip_sum = &h2.ip_sum;
 			break;
 #endif /* INET */
 #ifdef INET6
 		case AF_INET6:
 			ipoff2 = off + sizeof(struct icmp6_hdr);
 
 			if (!pf_pull_hdr(m, ipoff2, &h2_6, sizeof(h2_6),
 			    NULL, reason, pd2.af)) {
 				DPFPRINTF(PF_DEBUG_MISC,
 				    ("pf: ICMP error message too short "
 				    "(ip6)\n"));
 				return (PF_DROP);
 			}
 			pd2.proto = h2_6.ip6_nxt;
 			pd2.src = (struct pf_addr *)&h2_6.ip6_src;
 			pd2.dst = (struct pf_addr *)&h2_6.ip6_dst;
 			pd2.ip_sum = NULL;
 			off2 = ipoff2 + sizeof(h2_6);
 			do {
 				switch (pd2.proto) {
 				case IPPROTO_FRAGMENT:
 					/*
 					 * ICMPv6 error messages for
 					 * non-first fragments
 					 */
 					REASON_SET(reason, PFRES_FRAG);
 					return (PF_DROP);
 				case IPPROTO_AH:
 				case IPPROTO_HOPOPTS:
 				case IPPROTO_ROUTING:
 				case IPPROTO_DSTOPTS: {
 					/* get next header and header length */
 					struct ip6_ext opt6;
 
 					if (!pf_pull_hdr(m, off2, &opt6,
 					    sizeof(opt6), NULL, reason,
 					    pd2.af)) {
 						DPFPRINTF(PF_DEBUG_MISC,
 						    ("pf: ICMPv6 short opt\n"));
 						return (PF_DROP);
 					}
 					if (pd2.proto == IPPROTO_AH)
 						off2 += (opt6.ip6e_len + 2) * 4;
 					else
 						off2 += (opt6.ip6e_len + 1) * 8;
 					pd2.proto = opt6.ip6e_nxt;
 					/* goto the next header */
 					break;
 				}
 				default:
 					terminal++;
 					break;
 				}
 			} while (!terminal);
 			break;
 #endif /* INET6 */
 		}
 
 		switch (pd2.proto) {
 		case IPPROTO_TCP: {
 			struct tcphdr		 th;
 			u_int32_t		 seq;
 			struct pf_state_peer	*src, *dst;
 			u_int8_t		 dws;
 			int			 copyback = 0;
 
 			/*
 			 * Only the first 8 bytes of the TCP header can be
 			 * expected. Don't access any TCP header fields after
 			 * th_seq, an ackskew test is not possible.
 			 */
 			if (!pf_pull_hdr(m, off2, &th, 8, NULL, reason,
 			    pd2.af)) {
 				DPFPRINTF(PF_DEBUG_MISC,
 				    ("pf: ICMP error message too short "
 				    "(tcp)\n"));
 				return (PF_DROP);
 			}
 
 			key.af = pd2.af;
 			key.proto = IPPROTO_TCP;
 			PF_ACPY(&key.addr[pd2.sidx], pd2.src, key.af);
 			PF_ACPY(&key.addr[pd2.didx], pd2.dst, key.af);
 			key.port[pd2.sidx] = th.th_sport;
 			key.port[pd2.didx] = th.th_dport;
 
 			STATE_LOOKUP(kif, &key, direction, *state, pd);
 
 			if (direction == (*state)->direction) {
 				src = &(*state)->dst;
 				dst = &(*state)->src;
 			} else {
 				src = &(*state)->src;
 				dst = &(*state)->dst;
 			}
 
 			if (src->wscale && dst->wscale)
 				dws = dst->wscale & PF_WSCALE_MASK;
 			else
 				dws = 0;
 
 			/* Demodulate sequence number */
 			seq = ntohl(th.th_seq) - src->seqdiff;
 			if (src->seqdiff) {
 				pf_change_a(&th.th_seq, icmpsum,
 				    htonl(seq), 0);
 				copyback = 1;
 			}
 
 			if (!((*state)->state_flags & PFSTATE_SLOPPY) &&
 			    (!SEQ_GEQ(src->seqhi, seq) ||
 			    !SEQ_GEQ(seq, src->seqlo - (dst->max_win << dws)))) {
 				if (V_pf_status.debug >= PF_DEBUG_MISC) {
 					printf("pf: BAD ICMP %d:%d ",
 					    icmptype, pd->hdr.icmp->icmp_code);
 					pf_print_host(pd->src, 0, pd->af);
 					printf(" -> ");
 					pf_print_host(pd->dst, 0, pd->af);
 					printf(" state: ");
 					pf_print_state(*state);
 					printf(" seq=%u\n", seq);
 				}
 				REASON_SET(reason, PFRES_BADSTATE);
 				return (PF_DROP);
 			} else {
 				if (V_pf_status.debug >= PF_DEBUG_MISC) {
 					printf("pf: OK ICMP %d:%d ",
 					    icmptype, pd->hdr.icmp->icmp_code);
 					pf_print_host(pd->src, 0, pd->af);
 					printf(" -> ");
 					pf_print_host(pd->dst, 0, pd->af);
 					printf(" state: ");
 					pf_print_state(*state);
 					printf(" seq=%u\n", seq);
 				}
 			}
 
 			/* translate source/destination address, if necessary */
 			if ((*state)->key[PF_SK_WIRE] !=
 			    (*state)->key[PF_SK_STACK]) {
 				struct pf_state_key *nk =
 				    (*state)->key[pd->didx];
 
 				if (PF_ANEQ(pd2.src,
 				    &nk->addr[pd2.sidx], pd2.af) ||
 				    nk->port[pd2.sidx] != th.th_sport)
 					pf_change_icmp(pd2.src, &th.th_sport,
 					    daddr, &nk->addr[pd2.sidx],
 					    nk->port[pd2.sidx], NULL,
 					    pd2.ip_sum, icmpsum,
 					    pd->ip_sum, 0, pd2.af);
 
 				if (PF_ANEQ(pd2.dst,
 				    &nk->addr[pd2.didx], pd2.af) ||
 				    nk->port[pd2.didx] != th.th_dport)
 					pf_change_icmp(pd2.dst, &th.th_dport,
 					    NULL, /* XXX Inbound NAT? */
 					    &nk->addr[pd2.didx],
 					    nk->port[pd2.didx], NULL,
 					    pd2.ip_sum, icmpsum,
 					    pd->ip_sum, 0, pd2.af);
 				copyback = 1;
 			}
 
 			if (copyback) {
 				switch (pd2.af) {
 #ifdef INET
 				case AF_INET:
 					m_copyback(m, off, ICMP_MINLEN,
 					    (caddr_t )pd->hdr.icmp);
 					m_copyback(m, ipoff2, sizeof(h2),
 					    (caddr_t )&h2);
 					break;
 #endif /* INET */
 #ifdef INET6
 				case AF_INET6:
 					m_copyback(m, off,
 					    sizeof(struct icmp6_hdr),
 					    (caddr_t )pd->hdr.icmp6);
 					m_copyback(m, ipoff2, sizeof(h2_6),
 					    (caddr_t )&h2_6);
 					break;
 #endif /* INET6 */
 				}
 				m_copyback(m, off2, 8, (caddr_t)&th);
 			}
 
 			return (PF_PASS);
 			break;
 		}
 		case IPPROTO_UDP: {
 			struct udphdr		uh;
 
 			if (!pf_pull_hdr(m, off2, &uh, sizeof(uh),
 			    NULL, reason, pd2.af)) {
 				DPFPRINTF(PF_DEBUG_MISC,
 				    ("pf: ICMP error message too short "
 				    "(udp)\n"));
 				return (PF_DROP);
 			}
 
 			key.af = pd2.af;
 			key.proto = IPPROTO_UDP;
 			PF_ACPY(&key.addr[pd2.sidx], pd2.src, key.af);
 			PF_ACPY(&key.addr[pd2.didx], pd2.dst, key.af);
 			key.port[pd2.sidx] = uh.uh_sport;
 			key.port[pd2.didx] = uh.uh_dport;
 
 			STATE_LOOKUP(kif, &key, direction, *state, pd);
 
 			/* translate source/destination address, if necessary */
 			if ((*state)->key[PF_SK_WIRE] !=
 			    (*state)->key[PF_SK_STACK]) {
 				struct pf_state_key *nk =
 				    (*state)->key[pd->didx];
 
 				if (PF_ANEQ(pd2.src,
 				    &nk->addr[pd2.sidx], pd2.af) ||
 				    nk->port[pd2.sidx] != uh.uh_sport)
 					pf_change_icmp(pd2.src, &uh.uh_sport,
 					    daddr, &nk->addr[pd2.sidx],
 					    nk->port[pd2.sidx], &uh.uh_sum,
 					    pd2.ip_sum, icmpsum,
 					    pd->ip_sum, 1, pd2.af);
 
 				if (PF_ANEQ(pd2.dst,
 				    &nk->addr[pd2.didx], pd2.af) ||
 				    nk->port[pd2.didx] != uh.uh_dport)
 					pf_change_icmp(pd2.dst, &uh.uh_dport,
 					    NULL, /* XXX Inbound NAT? */
 					    &nk->addr[pd2.didx],
 					    nk->port[pd2.didx], &uh.uh_sum,
 					    pd2.ip_sum, icmpsum,
 					    pd->ip_sum, 1, pd2.af);
 
 				switch (pd2.af) {
 #ifdef INET
 				case AF_INET:
 					m_copyback(m, off, ICMP_MINLEN,
 					    (caddr_t )pd->hdr.icmp);
 					m_copyback(m, ipoff2, sizeof(h2), (caddr_t)&h2);
 					break;
 #endif /* INET */
 #ifdef INET6
 				case AF_INET6:
 					m_copyback(m, off,
 					    sizeof(struct icmp6_hdr),
 					    (caddr_t )pd->hdr.icmp6);
 					m_copyback(m, ipoff2, sizeof(h2_6),
 					    (caddr_t )&h2_6);
 					break;
 #endif /* INET6 */
 				}
 				m_copyback(m, off2, sizeof(uh), (caddr_t)&uh);
 			}
 			return (PF_PASS);
 			break;
 		}
 #ifdef INET
 		case IPPROTO_ICMP: {
 			struct icmp		iih;
 
 			if (!pf_pull_hdr(m, off2, &iih, ICMP_MINLEN,
 			    NULL, reason, pd2.af)) {
 				DPFPRINTF(PF_DEBUG_MISC,
 				    ("pf: ICMP error message too short i"
 				    "(icmp)\n"));
 				return (PF_DROP);
 			}
 
 			key.af = pd2.af;
 			key.proto = IPPROTO_ICMP;
 			PF_ACPY(&key.addr[pd2.sidx], pd2.src, key.af);
 			PF_ACPY(&key.addr[pd2.didx], pd2.dst, key.af);
 			key.port[0] = key.port[1] = iih.icmp_id;
 
 			STATE_LOOKUP(kif, &key, direction, *state, pd);
 
 			/* translate source/destination address, if necessary */
 			if ((*state)->key[PF_SK_WIRE] !=
 			    (*state)->key[PF_SK_STACK]) {
 				struct pf_state_key *nk =
 				    (*state)->key[pd->didx];
 
 				if (PF_ANEQ(pd2.src,
 				    &nk->addr[pd2.sidx], pd2.af) ||
 				    nk->port[pd2.sidx] != iih.icmp_id)
 					pf_change_icmp(pd2.src, &iih.icmp_id,
 					    daddr, &nk->addr[pd2.sidx],
 					    nk->port[pd2.sidx], NULL,
 					    pd2.ip_sum, icmpsum,
 					    pd->ip_sum, 0, AF_INET);
 
 				if (PF_ANEQ(pd2.dst,
 				    &nk->addr[pd2.didx], pd2.af) ||
 				    nk->port[pd2.didx] != iih.icmp_id)
 					pf_change_icmp(pd2.dst, &iih.icmp_id,
 					    NULL, /* XXX Inbound NAT? */
 					    &nk->addr[pd2.didx],
 					    nk->port[pd2.didx], NULL,
 					    pd2.ip_sum, icmpsum,
 					    pd->ip_sum, 0, AF_INET);
 
 				m_copyback(m, off, ICMP_MINLEN, (caddr_t)pd->hdr.icmp);
 				m_copyback(m, ipoff2, sizeof(h2), (caddr_t)&h2);
 				m_copyback(m, off2, ICMP_MINLEN, (caddr_t)&iih);
 			}
 			return (PF_PASS);
 			break;
 		}
 #endif /* INET */
 #ifdef INET6
 		case IPPROTO_ICMPV6: {
 			struct icmp6_hdr	iih;
 
 			if (!pf_pull_hdr(m, off2, &iih,
 			    sizeof(struct icmp6_hdr), NULL, reason, pd2.af)) {
 				DPFPRINTF(PF_DEBUG_MISC,
 				    ("pf: ICMP error message too short "
 				    "(icmp6)\n"));
 				return (PF_DROP);
 			}
 
 			key.af = pd2.af;
 			key.proto = IPPROTO_ICMPV6;
 			PF_ACPY(&key.addr[pd2.sidx], pd2.src, key.af);
 			PF_ACPY(&key.addr[pd2.didx], pd2.dst, key.af);
 			key.port[0] = key.port[1] = iih.icmp6_id;
 
 			STATE_LOOKUP(kif, &key, direction, *state, pd);
 
 			/* translate source/destination address, if necessary */
 			if ((*state)->key[PF_SK_WIRE] !=
 			    (*state)->key[PF_SK_STACK]) {
 				struct pf_state_key *nk =
 				    (*state)->key[pd->didx];
 
 				if (PF_ANEQ(pd2.src,
 				    &nk->addr[pd2.sidx], pd2.af) ||
 				    nk->port[pd2.sidx] != iih.icmp6_id)
 					pf_change_icmp(pd2.src, &iih.icmp6_id,
 					    daddr, &nk->addr[pd2.sidx],
 					    nk->port[pd2.sidx], NULL,
 					    pd2.ip_sum, icmpsum,
 					    pd->ip_sum, 0, AF_INET6);
 
 				if (PF_ANEQ(pd2.dst,
 				    &nk->addr[pd2.didx], pd2.af) ||
 				    nk->port[pd2.didx] != iih.icmp6_id)
 					pf_change_icmp(pd2.dst, &iih.icmp6_id,
 					    NULL, /* XXX Inbound NAT? */
 					    &nk->addr[pd2.didx],
 					    nk->port[pd2.didx], NULL,
 					    pd2.ip_sum, icmpsum,
 					    pd->ip_sum, 0, AF_INET6);
 
 				m_copyback(m, off, sizeof(struct icmp6_hdr),
 				    (caddr_t)pd->hdr.icmp6);
 				m_copyback(m, ipoff2, sizeof(h2_6), (caddr_t)&h2_6);
 				m_copyback(m, off2, sizeof(struct icmp6_hdr),
 				    (caddr_t)&iih);
 			}
 			return (PF_PASS);
 			break;
 		}
 #endif /* INET6 */
 		default: {
 			key.af = pd2.af;
 			key.proto = pd2.proto;
 			PF_ACPY(&key.addr[pd2.sidx], pd2.src, key.af);
 			PF_ACPY(&key.addr[pd2.didx], pd2.dst, key.af);
 			key.port[0] = key.port[1] = 0;
 
 			STATE_LOOKUP(kif, &key, direction, *state, pd);
 
 			/* translate source/destination address, if necessary */
 			if ((*state)->key[PF_SK_WIRE] !=
 			    (*state)->key[PF_SK_STACK]) {
 				struct pf_state_key *nk =
 				    (*state)->key[pd->didx];
 
 				if (PF_ANEQ(pd2.src,
 				    &nk->addr[pd2.sidx], pd2.af))
 					pf_change_icmp(pd2.src, NULL, daddr,
 					    &nk->addr[pd2.sidx], 0, NULL,
 					    pd2.ip_sum, icmpsum,
 					    pd->ip_sum, 0, pd2.af);
 
 				if (PF_ANEQ(pd2.dst,
 				    &nk->addr[pd2.didx], pd2.af))
 					pf_change_icmp(pd2.src, NULL,
 					    NULL, /* XXX Inbound NAT? */
 					    &nk->addr[pd2.didx], 0, NULL,
 					    pd2.ip_sum, icmpsum,
 					    pd->ip_sum, 0, pd2.af);
 
 				switch (pd2.af) {
 #ifdef INET
 				case AF_INET:
 					m_copyback(m, off, ICMP_MINLEN,
 					    (caddr_t)pd->hdr.icmp);
 					m_copyback(m, ipoff2, sizeof(h2), (caddr_t)&h2);
 					break;
 #endif /* INET */
 #ifdef INET6
 				case AF_INET6:
 					m_copyback(m, off,
 					    sizeof(struct icmp6_hdr),
 					    (caddr_t )pd->hdr.icmp6);
 					m_copyback(m, ipoff2, sizeof(h2_6),
 					    (caddr_t )&h2_6);
 					break;
 #endif /* INET6 */
 				}
 			}
 			return (PF_PASS);
 			break;
 		}
 		}
 	}
 }
 
 static int
 pf_test_state_other(struct pf_state **state, int direction, struct pfi_kif *kif,
     struct mbuf *m, struct pf_pdesc *pd)
 {
 	struct pf_state_peer	*src, *dst;
 	struct pf_state_key_cmp	 key;
 
 	bzero(&key, sizeof(key));
 	key.af = pd->af;
 	key.proto = pd->proto;
 	if (direction == PF_IN)	{
 		PF_ACPY(&key.addr[0], pd->src, key.af);
 		PF_ACPY(&key.addr[1], pd->dst, key.af);
 		key.port[0] = key.port[1] = 0;
 	} else {
 		PF_ACPY(&key.addr[1], pd->src, key.af);
 		PF_ACPY(&key.addr[0], pd->dst, key.af);
 		key.port[1] = key.port[0] = 0;
 	}
 
 	STATE_LOOKUP(kif, &key, direction, *state, pd);
 
 	if (direction == (*state)->direction) {
 		src = &(*state)->src;
 		dst = &(*state)->dst;
 	} else {
 		src = &(*state)->dst;
 		dst = &(*state)->src;
 	}
 
 	/* update states */
 	if (src->state < PFOTHERS_SINGLE)
 		src->state = PFOTHERS_SINGLE;
 	if (dst->state == PFOTHERS_SINGLE)
 		dst->state = PFOTHERS_MULTIPLE;
 
 	/* update expire time */
 	(*state)->expire = time_uptime;
 	if (src->state == PFOTHERS_MULTIPLE && dst->state == PFOTHERS_MULTIPLE)
 		(*state)->timeout = PFTM_OTHER_MULTIPLE;
 	else
 		(*state)->timeout = PFTM_OTHER_SINGLE;
 
 	/* translate source/destination address, if necessary */
 	if ((*state)->key[PF_SK_WIRE] != (*state)->key[PF_SK_STACK]) {
 		struct pf_state_key *nk = (*state)->key[pd->didx];
 
 		KASSERT(nk, ("%s: nk is null", __func__));
 		KASSERT(pd, ("%s: pd is null", __func__));
 		KASSERT(pd->src, ("%s: pd->src is null", __func__));
 		KASSERT(pd->dst, ("%s: pd->dst is null", __func__));
 		switch (pd->af) {
 #ifdef INET
 		case AF_INET:
 			if (PF_ANEQ(pd->src, &nk->addr[pd->sidx], AF_INET))
 				pf_change_a(&pd->src->v4.s_addr,
 				    pd->ip_sum,
 				    nk->addr[pd->sidx].v4.s_addr,
 				    0);
 
 
 			if (PF_ANEQ(pd->dst, &nk->addr[pd->didx], AF_INET))
 				pf_change_a(&pd->dst->v4.s_addr,
 				    pd->ip_sum,
 				    nk->addr[pd->didx].v4.s_addr,
 				    0);
 
 				break;
 #endif /* INET */
 #ifdef INET6
 		case AF_INET6:
 			if (PF_ANEQ(pd->src, &nk->addr[pd->sidx], AF_INET))
 				PF_ACPY(pd->src, &nk->addr[pd->sidx], pd->af);
 
 			if (PF_ANEQ(pd->dst, &nk->addr[pd->didx], AF_INET))
 				PF_ACPY(pd->dst, &nk->addr[pd->didx], pd->af);
 #endif /* INET6 */
 		}
 	}
 	return (PF_PASS);
 }
 
 /*
  * ipoff and off are measured from the start of the mbuf chain.
  * h must be at "ipoff" on the mbuf chain.
  */
 void *
 pf_pull_hdr(struct mbuf *m, int off, void *p, int len,
     u_short *actionp, u_short *reasonp, sa_family_t af)
 {
 	switch (af) {
 #ifdef INET
 	case AF_INET: {
 		struct ip	*h = mtod(m, struct ip *);
 		u_int16_t	 fragoff = (ntohs(h->ip_off) & IP_OFFMASK) << 3;
 
 		if (fragoff) {
 			if (fragoff >= len)
 				ACTION_SET(actionp, PF_PASS);
 			else {
 				ACTION_SET(actionp, PF_DROP);
 				REASON_SET(reasonp, PFRES_FRAG);
 			}
 			return (NULL);
 		}
 		if (m->m_pkthdr.len < off + len ||
 		    ntohs(h->ip_len) < off + len) {
 			ACTION_SET(actionp, PF_DROP);
 			REASON_SET(reasonp, PFRES_SHORT);
 			return (NULL);
 		}
 		break;
 	}
 #endif /* INET */
 #ifdef INET6
 	case AF_INET6: {
 		struct ip6_hdr	*h = mtod(m, struct ip6_hdr *);
 
 		if (m->m_pkthdr.len < off + len ||
 		    (ntohs(h->ip6_plen) + sizeof(struct ip6_hdr)) <
 		    (unsigned)(off + len)) {
 			ACTION_SET(actionp, PF_DROP);
 			REASON_SET(reasonp, PFRES_SHORT);
 			return (NULL);
 		}
 		break;
 	}
 #endif /* INET6 */
 	}
 	m_copydata(m, off, len, p);
 	return (p);
 }
 
 int
 pf_routable(struct pf_addr *addr, sa_family_t af, struct pfi_kif *kif,
     int rtableid)
 {
 #ifdef RADIX_MPATH
 	struct radix_node_head	*rnh;
 #endif
 	struct sockaddr_in	*dst;
 	int			 ret = 1;
 	int			 check_mpath;
 #ifdef INET6
 	struct sockaddr_in6	*dst6;
 	struct route_in6	 ro;
 #else
 	struct route		 ro;
 #endif
 	struct radix_node	*rn;
 	struct rtentry		*rt;
 	struct ifnet		*ifp;
 
 	check_mpath = 0;
 #ifdef RADIX_MPATH
 	/* XXX: stick to table 0 for now */
 	rnh = rt_tables_get_rnh(0, af);
 	if (rnh != NULL && rn_mpath_capable(rnh))
 		check_mpath = 1;
 #endif
 	bzero(&ro, sizeof(ro));
 	switch (af) {
 	case AF_INET:
 		dst = satosin(&ro.ro_dst);
 		dst->sin_family = AF_INET;
 		dst->sin_len = sizeof(*dst);
 		dst->sin_addr = addr->v4;
 		break;
 #ifdef INET6
 	case AF_INET6:
 		/*
 		 * Skip check for addresses with embedded interface scope,
 		 * as they would always match anyway.
 		 */
 		if (IN6_IS_SCOPE_EMBED(&addr->v6))
 			goto out;
 		dst6 = (struct sockaddr_in6 *)&ro.ro_dst;
 		dst6->sin6_family = AF_INET6;
 		dst6->sin6_len = sizeof(*dst6);
 		dst6->sin6_addr = addr->v6;
 		break;
 #endif /* INET6 */
 	default:
 		return (0);
 	}
 
 	/* Skip checks for ipsec interfaces */
 	if (kif != NULL && kif->pfik_ifp->if_type == IFT_ENC)
 		goto out;
 
 	switch (af) {
 #ifdef INET6
 	case AF_INET6:
 		in6_rtalloc_ign(&ro, 0, rtableid);
 		break;
 #endif
 #ifdef INET
 	case AF_INET:
 		in_rtalloc_ign((struct route *)&ro, 0, rtableid);
 		break;
 #endif
 	default:
 		rtalloc_ign((struct route *)&ro, 0);	/* No/default FIB. */
 		break;
 	}
 
 	if (ro.ro_rt != NULL) {
 		/* No interface given, this is a no-route check */
 		if (kif == NULL)
 			goto out;
 
 		if (kif->pfik_ifp == NULL) {
 			ret = 0;
 			goto out;
 		}
 
 		/* Perform uRPF check if passed input interface */
 		ret = 0;
 		rn = (struct radix_node *)ro.ro_rt;
 		do {
 			rt = (struct rtentry *)rn;
 			ifp = rt->rt_ifp;
 
 			if (kif->pfik_ifp == ifp)
 				ret = 1;
 #ifdef RADIX_MPATH
 			rn = rn_mpath_next(rn);
 #endif
 		} while (check_mpath == 1 && rn != NULL && ret == 0);
 	} else
 		ret = 0;
 out:
 	if (ro.ro_rt != NULL)
 		RTFREE(ro.ro_rt);
 	return (ret);
 }
 
 #ifdef INET
 static void
 pf_route(struct mbuf **m, struct pf_rule *r, int dir, struct ifnet *oifp,
     struct pf_state *s, struct pf_pdesc *pd)
 {
 	struct mbuf		*m0, *m1;
 	struct sockaddr_in	dst;
 	struct ip		*ip;
 	struct ifnet		*ifp = NULL;
 	struct pf_addr		 naddr;
 	struct pf_src_node	*sn = NULL;
 	int			 error = 0;
 	uint16_t		 ip_len, ip_off;
 
 	KASSERT(m && *m && r && oifp, ("%s: invalid parameters", __func__));
 	KASSERT(dir == PF_IN || dir == PF_OUT, ("%s: invalid direction",
 	    __func__));
 
 	if ((pd->pf_mtag == NULL &&
 	    ((pd->pf_mtag = pf_get_mtag(*m)) == NULL)) ||
 	    pd->pf_mtag->routed++ > 3) {
 		m0 = *m;
 		*m = NULL;
 		goto bad_locked;
 	}
 
 	if (r->rt == PF_DUPTO) {
 		if ((m0 = m_dup(*m, M_NOWAIT)) == NULL) {
 			if (s)
 				PF_STATE_UNLOCK(s);
 			return;
 		}
 	} else {
 		if ((r->rt == PF_REPLYTO) == (r->direction == dir)) {
 			if (s)
 				PF_STATE_UNLOCK(s);
 			return;
 		}
 		m0 = *m;
 	}
 
 	ip = mtod(m0, struct ip *);
 
 	bzero(&dst, sizeof(dst));
 	dst.sin_family = AF_INET;
 	dst.sin_len = sizeof(dst);
 	dst.sin_addr = ip->ip_dst;
 
 	if (r->rt == PF_FASTROUTE) {
 		struct rtentry *rt;
 
 		if (s)
 			PF_STATE_UNLOCK(s);
 		rt = rtalloc1_fib(sintosa(&dst), 0, 0, M_GETFIB(m0));
 		if (rt == NULL) {
 			KMOD_IPSTAT_INC(ips_noroute);
 			error = EHOSTUNREACH;
 			goto bad;
 		}
 
 		ifp = rt->rt_ifp;
 		rt->rt_rmx.rmx_pksent++;
 
 		if (rt->rt_flags & RTF_GATEWAY)
 			bcopy(satosin(rt->rt_gateway), &dst, sizeof(dst));
 		RTFREE_LOCKED(rt);
 	} else {
 		if (TAILQ_EMPTY(&r->rpool.list)) {
 			DPFPRINTF(PF_DEBUG_URGENT,
 			    ("%s: TAILQ_EMPTY(&r->rpool.list)\n", __func__));
 			goto bad_locked;
 		}
 		if (s == NULL) {
 			pf_map_addr(AF_INET, r, (struct pf_addr *)&ip->ip_src,
 			    &naddr, NULL, &sn);
 			if (!PF_AZERO(&naddr, AF_INET))
 				dst.sin_addr.s_addr = naddr.v4.s_addr;
 			ifp = r->rpool.cur->kif ?
 			    r->rpool.cur->kif->pfik_ifp : NULL;
 		} else {
 			if (!PF_AZERO(&s->rt_addr, AF_INET))
 				dst.sin_addr.s_addr =
 				    s->rt_addr.v4.s_addr;
 			ifp = s->rt_kif ? s->rt_kif->pfik_ifp : NULL;
 			PF_STATE_UNLOCK(s);
 		}
 	}
 	if (ifp == NULL)
 		goto bad;
 
 	if (oifp != ifp) {
 		if (pf_test(PF_OUT, ifp, &m0, NULL) != PF_PASS)
 			goto bad;
 		else if (m0 == NULL)
 			goto done;
 		if (m0->m_len < sizeof(struct ip)) {
 			DPFPRINTF(PF_DEBUG_URGENT,
 			    ("%s: m0->m_len < sizeof(struct ip)\n", __func__));
 			goto bad;
 		}
 		ip = mtod(m0, struct ip *);
 	}
 
 	if (ifp->if_flags & IFF_LOOPBACK)
 		m0->m_flags |= M_SKIP_FIREWALL;
 
 	ip_len = ntohs(ip->ip_len);
 	ip_off = ntohs(ip->ip_off);
 
 	/* Copied from FreeBSD 10.0-CURRENT ip_output. */
 	m0->m_pkthdr.csum_flags |= CSUM_IP;
 	if (m0->m_pkthdr.csum_flags & CSUM_DELAY_DATA & ~ifp->if_hwassist) {
 		in_delayed_cksum(m0);
 		m0->m_pkthdr.csum_flags &= ~CSUM_DELAY_DATA;
 	}
 #ifdef SCTP
 	if (m0->m_pkthdr.csum_flags & CSUM_SCTP & ~ifp->if_hwassist) {
 		sctp_delayed_cksum(m, (uint32_t)(ip->ip_hl << 2));
 		m0->m_pkthdr.csum_flags &= ~CSUM_SCTP;
 	}
 #endif
 
 	/*
 	 * If small enough for interface, or the interface will take
 	 * care of the fragmentation for us, we can just send directly.
 	 */
 	if (ip_len <= ifp->if_mtu ||
 	    (m0->m_pkthdr.csum_flags & ifp->if_hwassist & CSUM_TSO) != 0 ||
 	    ((ip_off & IP_DF) == 0 && (ifp->if_hwassist & CSUM_FRAGMENT))) {
 		ip->ip_sum = 0;
 		if (m0->m_pkthdr.csum_flags & CSUM_IP & ~ifp->if_hwassist) {
 			ip->ip_sum = in_cksum(m0, ip->ip_hl << 2);
 			m0->m_pkthdr.csum_flags &= ~CSUM_IP;
 		}
 		m_clrprotoflags(m0);	/* Avoid confusing lower layers. */
 		error = (*ifp->if_output)(ifp, m0, sintosa(&dst), NULL);
 		goto done;
 	}
 
 	/* Balk when DF bit is set or the interface didn't support TSO. */
 	if ((ip_off & IP_DF) || (m0->m_pkthdr.csum_flags & CSUM_TSO)) {
 		error = EMSGSIZE;
 		KMOD_IPSTAT_INC(ips_cantfrag);
 		if (r->rt != PF_DUPTO) {
 			icmp_error(m0, ICMP_UNREACH, ICMP_UNREACH_NEEDFRAG, 0,
 			    ifp->if_mtu);
 			goto done;
 		} else
 			goto bad;
 	}
 
 	error = ip_fragment(ip, &m0, ifp->if_mtu, ifp->if_hwassist);
 	if (error)
 		goto bad;
 
 	for (; m0; m0 = m1) {
 		m1 = m0->m_nextpkt;
 		m0->m_nextpkt = NULL;
 		if (error == 0) {
 			m_clrprotoflags(m0);
 			error = (*ifp->if_output)(ifp, m0, sintosa(&dst), NULL);
 		} else
 			m_freem(m0);
 	}
 
 	if (error == 0)
 		KMOD_IPSTAT_INC(ips_fragmented);
 
 done:
 	if (r->rt != PF_DUPTO)
 		*m = NULL;
 	return;
 
 bad_locked:
 	if (s)
 		PF_STATE_UNLOCK(s);
 bad:
 	m_freem(m0);
 	goto done;
 }
 #endif /* INET */
 
 #ifdef INET6
 static void
 pf_route6(struct mbuf **m, struct pf_rule *r, int dir, struct ifnet *oifp,
     struct pf_state *s, struct pf_pdesc *pd)
 {
 	struct mbuf		*m0;
 	struct sockaddr_in6	dst;
 	struct ip6_hdr		*ip6;
 	struct ifnet		*ifp = NULL;
 	struct pf_addr		 naddr;
 	struct pf_src_node	*sn = NULL;
 
 	KASSERT(m && *m && r && oifp, ("%s: invalid parameters", __func__));
 	KASSERT(dir == PF_IN || dir == PF_OUT, ("%s: invalid direction",
 	    __func__));
 
 	if ((pd->pf_mtag == NULL &&
 	    ((pd->pf_mtag = pf_get_mtag(*m)) == NULL)) ||
 	    pd->pf_mtag->routed++ > 3) {
 		m0 = *m;
 		*m = NULL;
 		goto bad_locked;
 	}
 
 	if (r->rt == PF_DUPTO) {
 		if ((m0 = m_dup(*m, M_NOWAIT)) == NULL) {
 			if (s)
 				PF_STATE_UNLOCK(s);
 			return;
 		}
 	} else {
 		if ((r->rt == PF_REPLYTO) == (r->direction == dir)) {
 			if (s)
 				PF_STATE_UNLOCK(s);
 			return;
 		}
 		m0 = *m;
 	}
 
 	ip6 = mtod(m0, struct ip6_hdr *);
 
 	bzero(&dst, sizeof(dst));
 	dst.sin6_family = AF_INET6;
 	dst.sin6_len = sizeof(dst);
 	dst.sin6_addr = ip6->ip6_dst;
 
 	/* Cheat. XXX why only in the v6 case??? */
 	if (r->rt == PF_FASTROUTE) {
 		if (s)
 			PF_STATE_UNLOCK(s);
 		m0->m_flags |= M_SKIP_FIREWALL;
 		ip6_output(m0, NULL, NULL, 0, NULL, NULL, NULL);
 		return;
 	}
 
 	if (TAILQ_EMPTY(&r->rpool.list)) {
 		DPFPRINTF(PF_DEBUG_URGENT,
 		    ("%s: TAILQ_EMPTY(&r->rpool.list)\n", __func__));
 		goto bad_locked;
 	}
 	if (s == NULL) {
 		pf_map_addr(AF_INET6, r, (struct pf_addr *)&ip6->ip6_src,
 		    &naddr, NULL, &sn);
 		if (!PF_AZERO(&naddr, AF_INET6))
 			PF_ACPY((struct pf_addr *)&dst.sin6_addr,
 			    &naddr, AF_INET6);
 		ifp = r->rpool.cur->kif ? r->rpool.cur->kif->pfik_ifp : NULL;
 	} else {
 		if (!PF_AZERO(&s->rt_addr, AF_INET6))
 			PF_ACPY((struct pf_addr *)&dst.sin6_addr,
 			    &s->rt_addr, AF_INET6);
 		ifp = s->rt_kif ? s->rt_kif->pfik_ifp : NULL;
 	}
 
 	if (s)
 		PF_STATE_UNLOCK(s);
 
 	if (ifp == NULL)
 		goto bad;
 
 	if (oifp != ifp) {
 		if (pf_test6(PF_OUT, ifp, &m0, NULL) != PF_PASS)
 			goto bad;
 		else if (m0 == NULL)
 			goto done;
 		if (m0->m_len < sizeof(struct ip6_hdr)) {
 			DPFPRINTF(PF_DEBUG_URGENT,
 			    ("%s: m0->m_len < sizeof(struct ip6_hdr)\n",
 			    __func__));
 			goto bad;
 		}
 		ip6 = mtod(m0, struct ip6_hdr *);
 	}
 
 	if (ifp->if_flags & IFF_LOOPBACK)
 		m0->m_flags |= M_SKIP_FIREWALL;
 
 	/*
 	 * If the packet is too large for the outgoing interface,
 	 * send back an icmp6 error.
 	 */
 	if (IN6_IS_SCOPE_EMBED(&dst.sin6_addr))
 		dst.sin6_addr.s6_addr16[1] = htons(ifp->if_index);
 	if ((u_long)m0->m_pkthdr.len <= ifp->if_mtu)
 		nd6_output(ifp, ifp, m0, &dst, NULL);
 	else {
 		in6_ifstat_inc(ifp, ifs6_in_toobig);
 		if (r->rt != PF_DUPTO)
 			icmp6_error(m0, ICMP6_PACKET_TOO_BIG, 0, ifp->if_mtu);
 		else
 			goto bad;
 	}
 
 done:
 	if (r->rt != PF_DUPTO)
 		*m = NULL;
 	return;
 
 bad_locked:
 	if (s)
 		PF_STATE_UNLOCK(s);
 bad:
 	m_freem(m0);
 	goto done;
 }
 #endif /* INET6 */
 
 /*
  * FreeBSD supports cksum offloads for the following drivers.
  *  em(4), fxp(4), ixgb(4), lge(4), ndis(4), nge(4), re(4),
  *   ti(4), txp(4), xl(4)
  *
  * CSUM_DATA_VALID | CSUM_PSEUDO_HDR :
  *  network driver performed cksum including pseudo header, need to verify
  *   csum_data
  * CSUM_DATA_VALID :
  *  network driver performed cksum, needs to additional pseudo header
  *  cksum computation with partial csum_data(i.e. lack of H/W support for
  *  pseudo header, for instance hme(4), sk(4) and possibly gem(4))
  *
  * After validating the cksum of packet, set both flag CSUM_DATA_VALID and
  * CSUM_PSEUDO_HDR in order to avoid recomputation of the cksum in upper
  * TCP/UDP layer.
  * Also, set csum_data to 0xffff to force cksum validation.
  */
 static int
 pf_check_proto_cksum(struct mbuf *m, int off, int len, u_int8_t p, sa_family_t af)
 {
 	u_int16_t sum = 0;
 	int hw_assist = 0;
 	struct ip *ip;
 
 	if (off < sizeof(struct ip) || len < sizeof(struct udphdr))
 		return (1);
 	if (m->m_pkthdr.len < off + len)
 		return (1);
 
 	switch (p) {
 	case IPPROTO_TCP:
 		if (m->m_pkthdr.csum_flags & CSUM_DATA_VALID) {
 			if (m->m_pkthdr.csum_flags & CSUM_PSEUDO_HDR) {
 				sum = m->m_pkthdr.csum_data;
 			} else {
 				ip = mtod(m, struct ip *);
 				sum = in_pseudo(ip->ip_src.s_addr,
 				ip->ip_dst.s_addr, htonl((u_short)len +
 				m->m_pkthdr.csum_data + IPPROTO_TCP));
 			}
 			sum ^= 0xffff;
 			++hw_assist;
 		}
 		break;
 	case IPPROTO_UDP:
 		if (m->m_pkthdr.csum_flags & CSUM_DATA_VALID) {
 			if (m->m_pkthdr.csum_flags & CSUM_PSEUDO_HDR) {
 				sum = m->m_pkthdr.csum_data;
 			} else {
 				ip = mtod(m, struct ip *);
 				sum = in_pseudo(ip->ip_src.s_addr,
 				ip->ip_dst.s_addr, htonl((u_short)len +
 				m->m_pkthdr.csum_data + IPPROTO_UDP));
 			}
 			sum ^= 0xffff;
 			++hw_assist;
 		}
 		break;
 	case IPPROTO_ICMP:
 #ifdef INET6
 	case IPPROTO_ICMPV6:
 #endif /* INET6 */
 		break;
 	default:
 		return (1);
 	}
 
 	if (!hw_assist) {
 		switch (af) {
 		case AF_INET:
 			if (p == IPPROTO_ICMP) {
 				if (m->m_len < off)
 					return (1);
 				m->m_data += off;
 				m->m_len -= off;
 				sum = in_cksum(m, len);
 				m->m_data -= off;
 				m->m_len += off;
 			} else {
 				if (m->m_len < sizeof(struct ip))
 					return (1);
 				sum = in4_cksum(m, p, off, len);
 			}
 			break;
 #ifdef INET6
 		case AF_INET6:
 			if (m->m_len < sizeof(struct ip6_hdr))
 				return (1);
 			sum = in6_cksum(m, p, off, len);
 			break;
 #endif /* INET6 */
 		default:
 			return (1);
 		}
 	}
 	if (sum) {
 		switch (p) {
 		case IPPROTO_TCP:
 		    {
 			KMOD_TCPSTAT_INC(tcps_rcvbadsum);
 			break;
 		    }
 		case IPPROTO_UDP:
 		    {
 			KMOD_UDPSTAT_INC(udps_badsum);
 			break;
 		    }
 #ifdef INET
 		case IPPROTO_ICMP:
 		    {
 			KMOD_ICMPSTAT_INC(icps_checksum);
 			break;
 		    }
 #endif
 #ifdef INET6
 		case IPPROTO_ICMPV6:
 		    {
 			KMOD_ICMP6STAT_INC(icp6s_checksum);
 			break;
 		    }
 #endif /* INET6 */
 		}
 		return (1);
 	} else {
 		if (p == IPPROTO_TCP || p == IPPROTO_UDP) {
 			m->m_pkthdr.csum_flags |=
 			    (CSUM_DATA_VALID | CSUM_PSEUDO_HDR);
 			m->m_pkthdr.csum_data = 0xffff;
 		}
 	}
 	return (0);
 }
 
 
 #ifdef INET
 int
 pf_test(int dir, struct ifnet *ifp, struct mbuf **m0, struct inpcb *inp)
 {
 	struct pfi_kif		*kif;
 	u_short			 action, reason = 0, log = 0;
 	struct mbuf		*m = *m0;
 	struct ip		*h = NULL;
 	struct m_tag		*ipfwtag;
 	struct pf_rule		*a = NULL, *r = &V_pf_default_rule, *tr, *nr;
 	struct pf_state		*s = NULL;
 	struct pf_ruleset	*ruleset = NULL;
 	struct pf_pdesc		 pd;
 	int			 off, dirndx, pqid = 0;
 
 	M_ASSERTPKTHDR(m);
 
 	if (!V_pf_status.running)
 		return (PF_PASS);
 
 	memset(&pd, 0, sizeof(pd));
 
 	kif = (struct pfi_kif *)ifp->if_pf_kif;
 
 	if (kif == NULL) {
 		DPFPRINTF(PF_DEBUG_URGENT,
 		    ("pf_test: kif == NULL, if_xname %s\n", ifp->if_xname));
 		return (PF_DROP);
 	}
 	if (kif->pfik_flags & PFI_IFLAG_SKIP)
 		return (PF_PASS);
 
 	if (m->m_flags & M_SKIP_FIREWALL)
 		return (PF_PASS);
 
 	pd.pf_mtag = pf_find_mtag(m);
 
 	PF_RULES_RLOCK();
 
 	if (ip_divert_ptr != NULL &&
 	    ((ipfwtag = m_tag_locate(m, MTAG_IPFW_RULE, 0, NULL)) != NULL)) {
 		struct ipfw_rule_ref *rr = (struct ipfw_rule_ref *)(ipfwtag+1);
 		if (rr->info & IPFW_IS_DIVERT && rr->rulenum == 0) {
 			if (pd.pf_mtag == NULL &&
 			    ((pd.pf_mtag = pf_get_mtag(m)) == NULL)) {
 				action = PF_DROP;
 				goto done;
 			}
 			pd.pf_mtag->flags |= PF_PACKET_LOOPED;
 			m_tag_delete(m, ipfwtag);
 		}
 		if (pd.pf_mtag && pd.pf_mtag->flags & PF_FASTFWD_OURS_PRESENT) {
 			m->m_flags |= M_FASTFWD_OURS;
 			pd.pf_mtag->flags &= ~PF_FASTFWD_OURS_PRESENT;
 		}
 	} else if (pf_normalize_ip(m0, dir, kif, &reason, &pd) != PF_PASS) {
 		/* We do IP header normalization and packet reassembly here */
 		action = PF_DROP;
 		goto done;
 	}
 	m = *m0;	/* pf_normalize messes with m0 */
 	h = mtod(m, struct ip *);
 
 	off = h->ip_hl << 2;
 	if (off < (int)sizeof(struct ip)) {
 		action = PF_DROP;
 		REASON_SET(&reason, PFRES_SHORT);
 		log = 1;
 		goto done;
 	}
 
 	pd.src = (struct pf_addr *)&h->ip_src;
 	pd.dst = (struct pf_addr *)&h->ip_dst;
 	pd.sport = pd.dport = NULL;
 	pd.ip_sum = &h->ip_sum;
 	pd.proto_sum = NULL;
 	pd.proto = h->ip_p;
 	pd.dir = dir;
 	pd.sidx = (dir == PF_IN) ? 0 : 1;
 	pd.didx = (dir == PF_IN) ? 1 : 0;
 	pd.af = AF_INET;
 	pd.tos = h->ip_tos;
 	pd.tot_len = ntohs(h->ip_len);
 
 	/* handle fragments that didn't get reassembled by normalization */
 	if (h->ip_off & htons(IP_MF | IP_OFFMASK)) {
 		action = pf_test_fragment(&r, dir, kif, m, h,
 		    &pd, &a, &ruleset);
 		goto done;
 	}
 
 	switch (h->ip_p) {
 
 	case IPPROTO_TCP: {
 		struct tcphdr	th;
 
 		pd.hdr.tcp = &th;
 		if (!pf_pull_hdr(m, off, &th, sizeof(th),
 		    &action, &reason, AF_INET)) {
 			log = action != PF_PASS;
 			goto done;
 		}
 		pd.p_len = pd.tot_len - off - (th.th_off << 2);
 		if ((th.th_flags & TH_ACK) && pd.p_len == 0)
 			pqid = 1;
 		action = pf_normalize_tcp(dir, kif, m, 0, off, h, &pd);
 		if (action == PF_DROP)
 			goto done;
 		action = pf_test_state_tcp(&s, dir, kif, m, off, h, &pd,
 		    &reason);
 		if (action == PF_PASS) {
 			if (pfsync_update_state_ptr != NULL)
 				pfsync_update_state_ptr(s);
 			r = s->rule.ptr;
 			a = s->anchor.ptr;
 			log = s->log;
 		} else if (s == NULL)
 			action = pf_test_rule(&r, &s, dir, kif, m, off, &pd,
 			    &a, &ruleset, inp);
 		break;
 	}
 
 	case IPPROTO_UDP: {
 		struct udphdr	uh;
 
 		pd.hdr.udp = &uh;
 		if (!pf_pull_hdr(m, off, &uh, sizeof(uh),
 		    &action, &reason, AF_INET)) {
 			log = action != PF_PASS;
 			goto done;
 		}
 		if (uh.uh_dport == 0 ||
 		    ntohs(uh.uh_ulen) > m->m_pkthdr.len - off ||
 		    ntohs(uh.uh_ulen) < sizeof(struct udphdr)) {
 			action = PF_DROP;
 			REASON_SET(&reason, PFRES_SHORT);
 			goto done;
 		}
 		action = pf_test_state_udp(&s, dir, kif, m, off, h, &pd);
 		if (action == PF_PASS) {
 			if (pfsync_update_state_ptr != NULL)
 				pfsync_update_state_ptr(s);
 			r = s->rule.ptr;
 			a = s->anchor.ptr;
 			log = s->log;
 		} else if (s == NULL)
 			action = pf_test_rule(&r, &s, dir, kif, m, off, &pd,
 			    &a, &ruleset, inp);
 		break;
 	}
 
 	case IPPROTO_ICMP: {
 		struct icmp	ih;
 
 		pd.hdr.icmp = &ih;
 		if (!pf_pull_hdr(m, off, &ih, ICMP_MINLEN,
 		    &action, &reason, AF_INET)) {
 			log = action != PF_PASS;
 			goto done;
 		}
 		action = pf_test_state_icmp(&s, dir, kif, m, off, h, &pd,
 		    &reason);
 		if (action == PF_PASS) {
 			if (pfsync_update_state_ptr != NULL)
 				pfsync_update_state_ptr(s);
 			r = s->rule.ptr;
 			a = s->anchor.ptr;
 			log = s->log;
 		} else if (s == NULL)
 			action = pf_test_rule(&r, &s, dir, kif, m, off, &pd,
 			    &a, &ruleset, inp);
 		break;
 	}
 
 #ifdef INET6
 	case IPPROTO_ICMPV6: {
 		action = PF_DROP;
 		DPFPRINTF(PF_DEBUG_MISC,
 		    ("pf: dropping IPv4 packet with ICMPv6 payload\n"));
 		goto done;
 	}
 #endif
 
 	default:
 		action = pf_test_state_other(&s, dir, kif, m, &pd);
 		if (action == PF_PASS) {
 			if (pfsync_update_state_ptr != NULL)
 				pfsync_update_state_ptr(s);
 			r = s->rule.ptr;
 			a = s->anchor.ptr;
 			log = s->log;
 		} else if (s == NULL)
 			action = pf_test_rule(&r, &s, dir, kif, m, off, &pd,
 			    &a, &ruleset, inp);
 		break;
 	}
 
 done:
 	PF_RULES_RUNLOCK();
 	if (action == PF_PASS && h->ip_hl > 5 &&
 	    !((s && s->state_flags & PFSTATE_ALLOWOPTS) || r->allow_opts)) {
 		action = PF_DROP;
 		REASON_SET(&reason, PFRES_IPOPTIONS);
 		log = 1;
 		DPFPRINTF(PF_DEBUG_MISC,
 		    ("pf: dropping packet with ip options\n"));
 	}
 
 	if (s && s->tag > 0 && pf_tag_packet(m, &pd, s->tag)) {
 		action = PF_DROP;
 		REASON_SET(&reason, PFRES_MEMORY);
 	}
 	if (r->rtableid >= 0)
 		M_SETFIB(m, r->rtableid);
 
 #ifdef ALTQ
 	if (action == PF_PASS && r->qid) {
 		if (pd.pf_mtag == NULL &&
 		    ((pd.pf_mtag = pf_get_mtag(m)) == NULL)) {
 			action = PF_DROP;
 			REASON_SET(&reason, PFRES_MEMORY);
 		}
 		if (pqid || (pd.tos & IPTOS_LOWDELAY))
 			pd.pf_mtag->qid = r->pqid;
 		else
 			pd.pf_mtag->qid = r->qid;
 		/* add hints for ecn */
 		pd.pf_mtag->hdr = h;
 
 	}
 #endif /* ALTQ */
 
 	/*
 	 * connections redirected to loopback should not match sockets
 	 * bound specifically to loopback due to security implications,
 	 * see tcp_input() and in_pcblookup_listen().
 	 */
 	if (dir == PF_IN && action == PF_PASS && (pd.proto == IPPROTO_TCP ||
 	    pd.proto == IPPROTO_UDP) && s != NULL && s->nat_rule.ptr != NULL &&
 	    (s->nat_rule.ptr->action == PF_RDR ||
 	    s->nat_rule.ptr->action == PF_BINAT) &&
 	    (ntohl(pd.dst->v4.s_addr) >> IN_CLASSA_NSHIFT) == IN_LOOPBACKNET)
 		m->m_flags |= M_SKIP_FIREWALL;
 
 	if (action == PF_PASS && r->divert.port && ip_divert_ptr != NULL &&
 	    !PACKET_LOOPED(&pd)) {
 
 		ipfwtag = m_tag_alloc(MTAG_IPFW_RULE, 0,
 		    sizeof(struct ipfw_rule_ref), M_NOWAIT | M_ZERO);
 		if (ipfwtag != NULL) {
 			((struct ipfw_rule_ref *)(ipfwtag+1))->info =
 			    ntohs(r->divert.port);
 			((struct ipfw_rule_ref *)(ipfwtag+1))->rulenum = dir;
 
 			if (s)
 				PF_STATE_UNLOCK(s);
 
 			m_tag_prepend(m, ipfwtag);
 			if (m->m_flags & M_FASTFWD_OURS) {
 				if (pd.pf_mtag == NULL &&
 				    ((pd.pf_mtag = pf_get_mtag(m)) == NULL)) {
 					action = PF_DROP;
 					REASON_SET(&reason, PFRES_MEMORY);
 					log = 1;
 					DPFPRINTF(PF_DEBUG_MISC,
 					    ("pf: failed to allocate tag\n"));
 				}
 				pd.pf_mtag->flags |= PF_FASTFWD_OURS_PRESENT;
 				m->m_flags &= ~M_FASTFWD_OURS;
 			}
 			ip_divert_ptr(*m0, dir ==  PF_IN ? DIR_IN : DIR_OUT);
 			*m0 = NULL;
 
 			return (action);
 		} else {
 			/* XXX: ipfw has the same behaviour! */
 			action = PF_DROP;
 			REASON_SET(&reason, PFRES_MEMORY);
 			log = 1;
 			DPFPRINTF(PF_DEBUG_MISC,
 			    ("pf: failed to allocate divert tag\n"));
 		}
 	}
 
 	if (log) {
 		struct pf_rule *lr;
 
 		if (s != NULL && s->nat_rule.ptr != NULL &&
 		    s->nat_rule.ptr->log & PF_LOG_ALL)
 			lr = s->nat_rule.ptr;
 		else
 			lr = r;
 		PFLOG_PACKET(kif, m, AF_INET, dir, reason, lr, a, ruleset, &pd,
 		    (s == NULL));
 	}
 
 	kif->pfik_bytes[0][dir == PF_OUT][action != PF_PASS] += pd.tot_len;
 	kif->pfik_packets[0][dir == PF_OUT][action != PF_PASS]++;
 
 	if (action == PF_PASS || r->action == PF_DROP) {
 		dirndx = (dir == PF_OUT);
 		r->packets[dirndx]++;
 		r->bytes[dirndx] += pd.tot_len;
 		if (a != NULL) {
 			a->packets[dirndx]++;
 			a->bytes[dirndx] += pd.tot_len;
 		}
 		if (s != NULL) {
 			if (s->nat_rule.ptr != NULL) {
 				s->nat_rule.ptr->packets[dirndx]++;
 				s->nat_rule.ptr->bytes[dirndx] += pd.tot_len;
 			}
 			if (s->src_node != NULL) {
 				s->src_node->packets[dirndx]++;
 				s->src_node->bytes[dirndx] += pd.tot_len;
 			}
 			if (s->nat_src_node != NULL) {
 				s->nat_src_node->packets[dirndx]++;
 				s->nat_src_node->bytes[dirndx] += pd.tot_len;
 			}
 			dirndx = (dir == s->direction) ? 0 : 1;
 			s->packets[dirndx]++;
 			s->bytes[dirndx] += pd.tot_len;
 		}
 		tr = r;
 		nr = (s != NULL) ? s->nat_rule.ptr : pd.nat_rule;
 		if (nr != NULL && r == &V_pf_default_rule)
 			tr = nr;
 		if (tr->src.addr.type == PF_ADDR_TABLE)
 			pfr_update_stats(tr->src.addr.p.tbl,
 			    (s == NULL) ? pd.src :
 			    &s->key[(s->direction == PF_IN)]->
 				addr[(s->direction == PF_OUT)],
 			    pd.af, pd.tot_len, dir == PF_OUT,
 			    r->action == PF_PASS, tr->src.neg);
 		if (tr->dst.addr.type == PF_ADDR_TABLE)
 			pfr_update_stats(tr->dst.addr.p.tbl,
 			    (s == NULL) ? pd.dst :
 			    &s->key[(s->direction == PF_IN)]->
 				addr[(s->direction == PF_IN)],
 			    pd.af, pd.tot_len, dir == PF_OUT,
 			    r->action == PF_PASS, tr->dst.neg);
 	}
 
 	switch (action) {
 	case PF_SYNPROXY_DROP:
 		m_freem(*m0);
 	case PF_DEFER:
 		*m0 = NULL;
 		action = PF_PASS;
 		break;
 	default:
 		/* pf_route() returns unlocked. */
 		if (r->rt) {
 			pf_route(m0, r, dir, kif->pfik_ifp, s, &pd);
 			return (action);
 		}
 		break;
 	}
 	if (s)
 		PF_STATE_UNLOCK(s);
 
 	return (action);
 }
 #endif /* INET */
 
 #ifdef INET6
 int
 pf_test6(int dir, struct ifnet *ifp, struct mbuf **m0, struct inpcb *inp)
 {
 	struct pfi_kif		*kif;
 	u_short			 action, reason = 0, log = 0;
 	struct mbuf		*m = *m0, *n = NULL;
 	struct ip6_hdr		*h = NULL;
 	struct pf_rule		*a = NULL, *r = &V_pf_default_rule, *tr, *nr;
 	struct pf_state		*s = NULL;
 	struct pf_ruleset	*ruleset = NULL;
 	struct pf_pdesc		 pd;
 	int			 off, terminal = 0, dirndx, rh_cnt = 0;
 
 	M_ASSERTPKTHDR(m);
 
 	if (!V_pf_status.running)
 		return (PF_PASS);
 
 	memset(&pd, 0, sizeof(pd));
 	pd.pf_mtag = pf_find_mtag(m);
 
 	if (pd.pf_mtag && pd.pf_mtag->flags & PF_TAG_GENERATED)
 		return (PF_PASS);
 
 	kif = (struct pfi_kif *)ifp->if_pf_kif;
 	if (kif == NULL) {
 		DPFPRINTF(PF_DEBUG_URGENT,
 		    ("pf_test6: kif == NULL, if_xname %s\n", ifp->if_xname));
 		return (PF_DROP);
 	}
 	if (kif->pfik_flags & PFI_IFLAG_SKIP)
 		return (PF_PASS);
 
 	PF_RULES_RLOCK();
 
 	/* We do IP header normalization and packet reassembly here */
 	if (pf_normalize_ip6(m0, dir, kif, &reason, &pd) != PF_PASS) {
 		action = PF_DROP;
 		goto done;
 	}
 	m = *m0;	/* pf_normalize messes with m0 */
 	h = mtod(m, struct ip6_hdr *);
 
 #if 1
 	/*
 	 * we do not support jumbogram yet.  if we keep going, zero ip6_plen
 	 * will do something bad, so drop the packet for now.
 	 */
 	if (htons(h->ip6_plen) == 0) {
 		action = PF_DROP;
 		REASON_SET(&reason, PFRES_NORM);	/*XXX*/
 		goto done;
 	}
 #endif
 
 	pd.src = (struct pf_addr *)&h->ip6_src;
 	pd.dst = (struct pf_addr *)&h->ip6_dst;
 	pd.sport = pd.dport = NULL;
 	pd.ip_sum = NULL;
 	pd.proto_sum = NULL;
 	pd.dir = dir;
 	pd.sidx = (dir == PF_IN) ? 0 : 1;
 	pd.didx = (dir == PF_IN) ? 1 : 0;
 	pd.af = AF_INET6;
 	pd.tos = 0;
 	pd.tot_len = ntohs(h->ip6_plen) + sizeof(struct ip6_hdr);
 
 	off = ((caddr_t)h - m->m_data) + sizeof(struct ip6_hdr);
 	pd.proto = h->ip6_nxt;
 	do {
 		switch (pd.proto) {
 		case IPPROTO_FRAGMENT:
 			action = pf_test_fragment(&r, dir, kif, m, h,
 			    &pd, &a, &ruleset);
 			if (action == PF_DROP)
 				REASON_SET(&reason, PFRES_FRAG);
 			goto done;
 		case IPPROTO_ROUTING: {
 			struct ip6_rthdr rthdr;
 
 			if (rh_cnt++) {
 				DPFPRINTF(PF_DEBUG_MISC,
 				    ("pf: IPv6 more than one rthdr\n"));
 				action = PF_DROP;
 				REASON_SET(&reason, PFRES_IPOPTIONS);
 				log = 1;
 				goto done;
 			}
 			if (!pf_pull_hdr(m, off, &rthdr, sizeof(rthdr), NULL,
 			    &reason, pd.af)) {
 				DPFPRINTF(PF_DEBUG_MISC,
 				    ("pf: IPv6 short rthdr\n"));
 				action = PF_DROP;
 				REASON_SET(&reason, PFRES_SHORT);
 				log = 1;
 				goto done;
 			}
 			if (rthdr.ip6r_type == IPV6_RTHDR_TYPE_0) {
 				DPFPRINTF(PF_DEBUG_MISC,
 				    ("pf: IPv6 rthdr0\n"));
 				action = PF_DROP;
 				REASON_SET(&reason, PFRES_IPOPTIONS);
 				log = 1;
 				goto done;
 			}
 			/* FALLTHROUGH */
 		}
 		case IPPROTO_AH:
 		case IPPROTO_HOPOPTS:
 		case IPPROTO_DSTOPTS: {
 			/* get next header and header length */
 			struct ip6_ext	opt6;
 
 			if (!pf_pull_hdr(m, off, &opt6, sizeof(opt6),
 			    NULL, &reason, pd.af)) {
 				DPFPRINTF(PF_DEBUG_MISC,
 				    ("pf: IPv6 short opt\n"));
 				action = PF_DROP;
 				log = 1;
 				goto done;
 			}
 			if (pd.proto == IPPROTO_AH)
 				off += (opt6.ip6e_len + 2) * 4;
 			else
 				off += (opt6.ip6e_len + 1) * 8;
 			pd.proto = opt6.ip6e_nxt;
 			/* goto the next header */
 			break;
 		}
 		default:
 			terminal++;
 			break;
 		}
 	} while (!terminal);
 
 	/* if there's no routing header, use unmodified mbuf for checksumming */
 	if (!n)
 		n = m;
 
 	switch (pd.proto) {
 
 	case IPPROTO_TCP: {
 		struct tcphdr	th;
 
 		pd.hdr.tcp = &th;
 		if (!pf_pull_hdr(m, off, &th, sizeof(th),
 		    &action, &reason, AF_INET6)) {
 			log = action != PF_PASS;
 			goto done;
 		}
 		pd.p_len = pd.tot_len - off - (th.th_off << 2);
 		action = pf_normalize_tcp(dir, kif, m, 0, off, h, &pd);
 		if (action == PF_DROP)
 			goto done;
 		action = pf_test_state_tcp(&s, dir, kif, m, off, h, &pd,
 		    &reason);
 		if (action == PF_PASS) {
 			if (pfsync_update_state_ptr != NULL)
 				pfsync_update_state_ptr(s);
 			r = s->rule.ptr;
 			a = s->anchor.ptr;
 			log = s->log;
 		} else if (s == NULL)
 			action = pf_test_rule(&r, &s, dir, kif, m, off, &pd,
 			    &a, &ruleset, inp);
 		break;
 	}
 
 	case IPPROTO_UDP: {
 		struct udphdr	uh;
 
 		pd.hdr.udp = &uh;
 		if (!pf_pull_hdr(m, off, &uh, sizeof(uh),
 		    &action, &reason, AF_INET6)) {
 			log = action != PF_PASS;
 			goto done;
 		}
 		if (uh.uh_dport == 0 ||
 		    ntohs(uh.uh_ulen) > m->m_pkthdr.len - off ||
 		    ntohs(uh.uh_ulen) < sizeof(struct udphdr)) {
 			action = PF_DROP;
 			REASON_SET(&reason, PFRES_SHORT);
 			goto done;
 		}
 		action = pf_test_state_udp(&s, dir, kif, m, off, h, &pd);
 		if (action == PF_PASS) {
 			if (pfsync_update_state_ptr != NULL)
 				pfsync_update_state_ptr(s);
 			r = s->rule.ptr;
 			a = s->anchor.ptr;
 			log = s->log;
 		} else if (s == NULL)
 			action = pf_test_rule(&r, &s, dir, kif, m, off, &pd,
 			    &a, &ruleset, inp);
 		break;
 	}
 
 	case IPPROTO_ICMP: {
 		action = PF_DROP;
 		DPFPRINTF(PF_DEBUG_MISC,
 		    ("pf: dropping IPv6 packet with ICMPv4 payload\n"));
 		goto done;
 	}
 
 	case IPPROTO_ICMPV6: {
 		struct icmp6_hdr	ih;
 
 		pd.hdr.icmp6 = &ih;
 		if (!pf_pull_hdr(m, off, &ih, sizeof(ih),
 		    &action, &reason, AF_INET6)) {
 			log = action != PF_PASS;
 			goto done;
 		}
 		action = pf_test_state_icmp(&s, dir, kif,
 		    m, off, h, &pd, &reason);
 		if (action == PF_PASS) {
 			if (pfsync_update_state_ptr != NULL)
 				pfsync_update_state_ptr(s);
 			r = s->rule.ptr;
 			a = s->anchor.ptr;
 			log = s->log;
 		} else if (s == NULL)
 			action = pf_test_rule(&r, &s, dir, kif, m, off, &pd,
 			    &a, &ruleset, inp);
 		break;
 	}
 
 	default:
 		action = pf_test_state_other(&s, dir, kif, m, &pd);
 		if (action == PF_PASS) {
 			if (pfsync_update_state_ptr != NULL)
 				pfsync_update_state_ptr(s);
 			r = s->rule.ptr;
 			a = s->anchor.ptr;
 			log = s->log;
 		} else if (s == NULL)
 			action = pf_test_rule(&r, &s, dir, kif, m, off, &pd,
 			    &a, &ruleset, inp);
 		break;
 	}
 
 done:
 	PF_RULES_RUNLOCK();
 	if (n != m) {
 		m_freem(n);
 		n = NULL;
 	}
 
 	/* handle dangerous IPv6 extension headers. */
 	if (action == PF_PASS && rh_cnt &&
 	    !((s && s->state_flags & PFSTATE_ALLOWOPTS) || r->allow_opts)) {
 		action = PF_DROP;
 		REASON_SET(&reason, PFRES_IPOPTIONS);
 		log = 1;
 		DPFPRINTF(PF_DEBUG_MISC,
 		    ("pf: dropping packet with dangerous v6 headers\n"));
 	}
 
 	if (s && s->tag > 0 && pf_tag_packet(m, &pd, s->tag)) {
 		action = PF_DROP;
 		REASON_SET(&reason, PFRES_MEMORY);
 	}
 	if (r->rtableid >= 0)
 		M_SETFIB(m, r->rtableid);
 
 #ifdef ALTQ
 	if (action == PF_PASS && r->qid) {
 		if (pd.pf_mtag == NULL &&
 		    ((pd.pf_mtag = pf_get_mtag(m)) == NULL)) {
 			action = PF_DROP;
 			REASON_SET(&reason, PFRES_MEMORY);
 		}
 		if (pd.tos & IPTOS_LOWDELAY)
 			pd.pf_mtag->qid = r->pqid;
 		else
 			pd.pf_mtag->qid = r->qid;
 		/* add hints for ecn */
 		pd.pf_mtag->hdr = h;
 	}
 #endif /* ALTQ */
 
 	if (dir == PF_IN && action == PF_PASS && (pd.proto == IPPROTO_TCP ||
 	    pd.proto == IPPROTO_UDP) && s != NULL && s->nat_rule.ptr != NULL &&
 	    (s->nat_rule.ptr->action == PF_RDR ||
 	    s->nat_rule.ptr->action == PF_BINAT) &&
 	    IN6_IS_ADDR_LOOPBACK(&pd.dst->v6))
 		m->m_flags |= M_SKIP_FIREWALL;
 
 	/* XXX: Anybody working on it?! */
 	if (r->divert.port)
 		printf("pf: divert(9) is not supported for IPv6\n");
 
 	if (log) {
 		struct pf_rule *lr;
 
 		if (s != NULL && s->nat_rule.ptr != NULL &&
 		    s->nat_rule.ptr->log & PF_LOG_ALL)
 			lr = s->nat_rule.ptr;
 		else
 			lr = r;
 		PFLOG_PACKET(kif, m, AF_INET6, dir, reason, lr, a, ruleset,
 		    &pd, (s == NULL));
 	}
 
 	kif->pfik_bytes[1][dir == PF_OUT][action != PF_PASS] += pd.tot_len;
 	kif->pfik_packets[1][dir == PF_OUT][action != PF_PASS]++;
 
 	if (action == PF_PASS || r->action == PF_DROP) {
 		dirndx = (dir == PF_OUT);
 		r->packets[dirndx]++;
 		r->bytes[dirndx] += pd.tot_len;
 		if (a != NULL) {
 			a->packets[dirndx]++;
 			a->bytes[dirndx] += pd.tot_len;
 		}
 		if (s != NULL) {
 			if (s->nat_rule.ptr != NULL) {
 				s->nat_rule.ptr->packets[dirndx]++;
 				s->nat_rule.ptr->bytes[dirndx] += pd.tot_len;
 			}
 			if (s->src_node != NULL) {
 				s->src_node->packets[dirndx]++;
 				s->src_node->bytes[dirndx] += pd.tot_len;
 			}
 			if (s->nat_src_node != NULL) {
 				s->nat_src_node->packets[dirndx]++;
 				s->nat_src_node->bytes[dirndx] += pd.tot_len;
 			}
 			dirndx = (dir == s->direction) ? 0 : 1;
 			s->packets[dirndx]++;
 			s->bytes[dirndx] += pd.tot_len;
 		}
 		tr = r;
 		nr = (s != NULL) ? s->nat_rule.ptr : pd.nat_rule;
 		if (nr != NULL && r == &V_pf_default_rule)
 			tr = nr;
 		if (tr->src.addr.type == PF_ADDR_TABLE)
 			pfr_update_stats(tr->src.addr.p.tbl,
 			    (s == NULL) ? pd.src :
 			    &s->key[(s->direction == PF_IN)]->addr[0],
 			    pd.af, pd.tot_len, dir == PF_OUT,
 			    r->action == PF_PASS, tr->src.neg);
 		if (tr->dst.addr.type == PF_ADDR_TABLE)
 			pfr_update_stats(tr->dst.addr.p.tbl,
 			    (s == NULL) ? pd.dst :
 			    &s->key[(s->direction == PF_IN)]->addr[1],
 			    pd.af, pd.tot_len, dir == PF_OUT,
 			    r->action == PF_PASS, tr->dst.neg);
 	}
 
 	switch (action) {
 	case PF_SYNPROXY_DROP:
 		m_freem(*m0);
 	case PF_DEFER:
 		*m0 = NULL;
 		action = PF_PASS;
 		break;
 	default:
 		/* pf_route6() returns unlocked. */
 		if (r->rt) {
 			pf_route6(m0, r, dir, kif->pfik_ifp, s, &pd);
 			return (action);
 		}
 		break;
 	}
 
 	if (s)
 		PF_STATE_UNLOCK(s);
 
 	return (action);
 }
 #endif /* INET6 */
Index: stable/10/sys/netpfil/pf/pf.h
===================================================================
--- stable/10/sys/netpfil/pf/pf.h	(nonexistent)
+++ stable/10/sys/netpfil/pf/pf.h	(revision 263086)
@@ -0,0 +1,152 @@
+/*
+ * Copyright (c) 2001 Daniel Hartmeier
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ *    - Redistributions of source code must retain the above copyright
+ *      notice, this list of conditions and the following disclaimer.
+ *    - Redistributions in binary form must reproduce the above
+ *      copyright notice, this list of conditions and the following
+ *      disclaimer in the documentation and/or other materials provided
+ *      with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
+ * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
+ * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
+ * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+ * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
+ * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ *
+ *	$OpenBSD: pfvar.h,v 1.282 2009/01/29 15:12:28 pyr Exp $
+ *	$FreeBSD$
+ */
+
+#ifndef	_NET_PF_H_
+#define	_NET_PF_H_
+
+#define	PF_TCPS_PROXY_SRC	((TCP_NSTATES)+0)
+#define	PF_TCPS_PROXY_DST	((TCP_NSTATES)+1)
+
+#define	PF_MD5_DIGEST_LENGTH	16
+#ifdef MD5_DIGEST_LENGTH
+#if PF_MD5_DIGEST_LENGTH != MD5_DIGEST_LENGTH
+#error
+#endif
+#endif
+
+enum	{ PF_INOUT, PF_IN, PF_OUT };
+enum	{ PF_PASS, PF_DROP, PF_SCRUB, PF_NOSCRUB, PF_NAT, PF_NONAT,
+	  PF_BINAT, PF_NOBINAT, PF_RDR, PF_NORDR, PF_SYNPROXY_DROP, PF_DEFER };
+enum	{ PF_RULESET_SCRUB, PF_RULESET_FILTER, PF_RULESET_NAT,
+	  PF_RULESET_BINAT, PF_RULESET_RDR, PF_RULESET_MAX };
+enum	{ PF_OP_NONE, PF_OP_IRG, PF_OP_EQ, PF_OP_NE, PF_OP_LT,
+	  PF_OP_LE, PF_OP_GT, PF_OP_GE, PF_OP_XRG, PF_OP_RRG };
+enum	{ PF_DEBUG_NONE, PF_DEBUG_URGENT, PF_DEBUG_MISC, PF_DEBUG_NOISY };
+enum	{ PF_CHANGE_NONE, PF_CHANGE_ADD_HEAD, PF_CHANGE_ADD_TAIL,
+	  PF_CHANGE_ADD_BEFORE, PF_CHANGE_ADD_AFTER,
+	  PF_CHANGE_REMOVE, PF_CHANGE_GET_TICKET };
+enum	{ PF_GET_NONE, PF_GET_CLR_CNTR };
+enum	{ PF_SK_WIRE, PF_SK_STACK, PF_SK_BOTH };
+
+/*
+ * Note about PFTM_*: real indices into pf_rule.timeout[] come before
+ * PFTM_MAX, special cases afterwards. See pf_state_expires().
+ */
+enum	{ PFTM_TCP_FIRST_PACKET, PFTM_TCP_OPENING, PFTM_TCP_ESTABLISHED,
+	  PFTM_TCP_CLOSING, PFTM_TCP_FIN_WAIT, PFTM_TCP_CLOSED,
+	  PFTM_UDP_FIRST_PACKET, PFTM_UDP_SINGLE, PFTM_UDP_MULTIPLE,
+	  PFTM_ICMP_FIRST_PACKET, PFTM_ICMP_ERROR_REPLY,
+	  PFTM_OTHER_FIRST_PACKET, PFTM_OTHER_SINGLE,
+	  PFTM_OTHER_MULTIPLE, PFTM_FRAG, PFTM_INTERVAL,
+	  PFTM_ADAPTIVE_START, PFTM_ADAPTIVE_END, PFTM_SRC_NODE,
+	  PFTM_TS_DIFF, PFTM_MAX, PFTM_PURGE, PFTM_UNLINKED,
+	  PFTM_UNTIL_PACKET };
+
+/* PFTM default values */
+#define PFTM_TCP_FIRST_PACKET_VAL	120	/* First TCP packet */
+#define PFTM_TCP_OPENING_VAL		30	/* No response yet */
+#define PFTM_TCP_ESTABLISHED_VAL	24*60*60/* Established */
+#define PFTM_TCP_CLOSING_VAL		15 * 60	/* Half closed */
+#define PFTM_TCP_FIN_WAIT_VAL		45	/* Got both FINs */
+#define PFTM_TCP_CLOSED_VAL		90	/* Got a RST */
+#define PFTM_UDP_FIRST_PACKET_VAL	60	/* First UDP packet */
+#define PFTM_UDP_SINGLE_VAL		30	/* Unidirectional */
+#define PFTM_UDP_MULTIPLE_VAL		60	/* Bidirectional */
+#define PFTM_ICMP_FIRST_PACKET_VAL	20	/* First ICMP packet */
+#define PFTM_ICMP_ERROR_REPLY_VAL	10	/* Got error response */
+#define PFTM_OTHER_FIRST_PACKET_VAL	60	/* First packet */
+#define PFTM_OTHER_SINGLE_VAL		30	/* Unidirectional */
+#define PFTM_OTHER_MULTIPLE_VAL		60	/* Bidirectional */
+#define PFTM_FRAG_VAL			30	/* Fragment expire */
+#define PFTM_INTERVAL_VAL		10	/* Expire interval */
+#define PFTM_SRC_NODE_VAL		0	/* Source tracking */
+#define PFTM_TS_DIFF_VAL		30	/* Allowed TS diff */
+
+enum	{ PF_NOPFROUTE, PF_FASTROUTE, PF_ROUTETO, PF_DUPTO, PF_REPLYTO };
+enum	{ PF_LIMIT_STATES, PF_LIMIT_SRC_NODES, PF_LIMIT_FRAGS,
+	  PF_LIMIT_TABLE_ENTRIES, PF_LIMIT_MAX };
+#define PF_POOL_IDMASK		0x0f
+enum	{ PF_POOL_NONE, PF_POOL_BITMASK, PF_POOL_RANDOM,
+	  PF_POOL_SRCHASH, PF_POOL_ROUNDROBIN };
+enum	{ PF_ADDR_ADDRMASK, PF_ADDR_NOROUTE, PF_ADDR_DYNIFTL,
+	  PF_ADDR_TABLE, PF_ADDR_URPFFAILED,
+	  PF_ADDR_RANGE };
+#define PF_POOL_TYPEMASK	0x0f
+#define PF_POOL_STICKYADDR	0x20
+#define	PF_WSCALE_FLAG		0x80
+#define	PF_WSCALE_MASK		0x0f
+
+#define	PF_LOG			0x01
+#define	PF_LOG_ALL		0x02
+#define	PF_LOG_SOCKET_LOOKUP	0x04
+
+/* Reasons code for passing/dropping a packet */
+#define PFRES_MATCH	0		/* Explicit match of a rule */
+#define PFRES_BADOFF	1		/* Bad offset for pull_hdr */
+#define PFRES_FRAG	2		/* Dropping following fragment */
+#define PFRES_SHORT	3		/* Dropping short packet */
+#define PFRES_NORM	4		/* Dropping by normalizer */
+#define PFRES_MEMORY	5		/* Dropped due to lacking mem */
+#define PFRES_TS	6		/* Bad TCP Timestamp (RFC1323) */
+#define PFRES_CONGEST	7		/* Congestion (of ipintrq) */
+#define PFRES_IPOPTIONS 8		/* IP option */
+#define PFRES_PROTCKSUM 9		/* Protocol checksum invalid */
+#define PFRES_BADSTATE	10		/* State mismatch */
+#define PFRES_STATEINS	11		/* State insertion failure */
+#define PFRES_MAXSTATES	12		/* State limit */
+#define PFRES_SRCLIMIT	13		/* Source node/conn limit */
+#define PFRES_SYNPROXY	14		/* SYN proxy */
+#define PFRES_MAX	15		/* total+1 */
+
+#define PFRES_NAMES { \
+	"match", \
+	"bad-offset", \
+	"fragment", \
+	"short", \
+	"normalize", \
+	"memory", \
+	"bad-timestamp", \
+	"congestion", \
+	"ip-option", \
+	"proto-cksum", \
+	"state-mismatch", \
+	"state-insert", \
+	"state-limit", \
+	"src-limit", \
+	"synproxy", \
+	NULL \
+}
+
+#define	PF_TABLE_NAME_SIZE	32
+#define	PF_QNAME_SIZE		64
+
+#endif	/* _NET_PF_H_ */

Property changes on: stable/10/sys/netpfil/pf/pf.h
___________________________________________________________________
Added: svn:keywords
## -0,0 +1 ##
+FreeBSD=%H
\ No newline at end of property
Index: stable/10/sys/netpfil/pf/pf_altq.h
===================================================================
--- stable/10/sys/netpfil/pf/pf_altq.h	(nonexistent)
+++ stable/10/sys/netpfil/pf/pf_altq.h	(revision 263086)
@@ -0,0 +1,99 @@
+/*
+ * Copyright (c) 2001 Daniel Hartmeier
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ *    - Redistributions of source code must retain the above copyright
+ *      notice, this list of conditions and the following disclaimer.
+ *    - Redistributions in binary form must reproduce the above
+ *      copyright notice, this list of conditions and the following
+ *      disclaimer in the documentation and/or other materials provided
+ *      with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
+ * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
+ * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
+ * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+ * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
+ * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ *
+ *	$OpenBSD: pfvar.h,v 1.282 2009/01/29 15:12:28 pyr Exp $
+ *	$FreeBSD$
+ */
+
+#ifndef	_NET_PF_ALTQ_H_
+#define	_NET_PF_ALTQ_H_
+
+struct cbq_opts {
+	u_int		minburst;
+	u_int		maxburst;
+	u_int		pktsize;
+	u_int		maxpktsize;
+	u_int		ns_per_byte;
+	u_int		maxidle;
+	int		minidle;
+	u_int		offtime;
+	int		flags;
+};
+
+struct priq_opts {
+	int		flags;
+};
+
+struct hfsc_opts {
+	/* real-time service curve */
+	u_int		rtsc_m1;	/* slope of the 1st segment in bps */
+	u_int		rtsc_d;		/* the x-projection of m1 in msec */
+	u_int		rtsc_m2;	/* slope of the 2nd segment in bps */
+	/* link-sharing service curve */
+	u_int		lssc_m1;
+	u_int		lssc_d;
+	u_int		lssc_m2;
+	/* upper-limit service curve */
+	u_int		ulsc_m1;
+	u_int		ulsc_d;
+	u_int		ulsc_m2;
+	int		flags;
+};
+
+struct pf_altq {
+	char			 ifname[IFNAMSIZ];
+
+	void			*altq_disc;	/* discipline-specific state */
+	TAILQ_ENTRY(pf_altq)	 entries;
+
+	/* scheduler spec */
+	uint8_t			 scheduler;	/* scheduler type */
+	uint16_t		 tbrsize;	/* tokenbucket regulator size */
+	uint32_t		 ifbandwidth;	/* interface bandwidth */
+
+	/* queue spec */
+	char			 qname[PF_QNAME_SIZE];	/* queue name */
+	char			 parent[PF_QNAME_SIZE];	/* parent name */
+	uint32_t		 parent_qid;	/* parent queue id */
+	uint32_t		 bandwidth;	/* queue bandwidth */
+	uint8_t			 priority;	/* priority */
+	uint8_t			 local_flags;	/* dynamic interface */
+#define	PFALTQ_FLAG_IF_REMOVED		0x01
+
+	uint16_t		 qlimit;	/* queue size limit */
+	uint16_t		 flags;		/* misc flags */
+	union {
+		struct cbq_opts		 cbq_opts;
+		struct priq_opts	 priq_opts;
+		struct hfsc_opts	 hfsc_opts;
+	} pq_u;
+
+	uint32_t		 qid;		/* return value */
+};
+
+#endif	/* _NET_PF_ALTQ_H_ */

Property changes on: stable/10/sys/netpfil/pf/pf_altq.h
___________________________________________________________________
Added: svn:keywords
## -0,0 +1 ##
+FreeBSD=%H
\ No newline at end of property
Index: stable/10/sys/netpfil/pf/pf_lb.c
===================================================================
--- stable/10/sys/netpfil/pf/pf_lb.c	(revision 263085)
+++ stable/10/sys/netpfil/pf/pf_lb.c	(revision 263086)
@@ -1,669 +1,668 @@
 /*-
  * Copyright (c) 2001 Daniel Hartmeier
  * Copyright (c) 2002 - 2008 Henning Brauer
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  *
  *    - Redistributions of source code must retain the above copyright
  *      notice, this list of conditions and the following disclaimer.
  *    - Redistributions in binary form must reproduce the above
  *      copyright notice, this list of conditions and the following
  *      disclaimer in the documentation and/or other materials provided
  *      with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
  * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
  * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
  * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
  * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
  * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
  * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
  * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
  * POSSIBILITY OF SUCH DAMAGE.
  *
  * Effort sponsored in part by the Defense Advanced Research Projects
  * Agency (DARPA) and Air Force Research Laboratory, Air Force
  * Materiel Command, USAF, under agreement number F30602-01-2-0537.
  *
  *	$OpenBSD: pf_lb.c,v 1.2 2009/02/12 02:13:15 sthen Exp $
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include "opt_pf.h"
 #include "opt_inet.h"
 #include "opt_inet6.h"
 
 #include <sys/param.h>
 #include <sys/socket.h>
 #include <sys/sysctl.h>
 
 #include <net/if.h>
 #include <net/pfvar.h>
 #include <net/if_pflog.h>
-#include <net/pf_mtag.h>
 
 #define DPFPRINTF(n, x)	if (V_pf_status.debug >= (n)) printf x
 
 static void		 pf_hash(struct pf_addr *, struct pf_addr *,
 			    struct pf_poolhashkey *, sa_family_t);
 static struct pf_rule	*pf_match_translation(struct pf_pdesc *, struct mbuf *,
 			    int, int, struct pfi_kif *,
 			    struct pf_addr *, u_int16_t, struct pf_addr *,
 			    uint16_t, int, struct pf_anchor_stackframe *);
 static int pf_get_sport(sa_family_t, uint8_t, struct pf_rule *,
     struct pf_addr *, uint16_t, struct pf_addr *, uint16_t, struct pf_addr *,
     uint16_t *, uint16_t, uint16_t, struct pf_src_node **);
 
 #define mix(a,b,c) \
 	do {					\
 		a -= b; a -= c; a ^= (c >> 13);	\
 		b -= c; b -= a; b ^= (a << 8);	\
 		c -= a; c -= b; c ^= (b >> 13);	\
 		a -= b; a -= c; a ^= (c >> 12);	\
 		b -= c; b -= a; b ^= (a << 16);	\
 		c -= a; c -= b; c ^= (b >> 5);	\
 		a -= b; a -= c; a ^= (c >> 3);	\
 		b -= c; b -= a; b ^= (a << 10);	\
 		c -= a; c -= b; c ^= (b >> 15);	\
 	} while (0)
 
 /*
  * hash function based on bridge_hash in if_bridge.c
  */
 static void
 pf_hash(struct pf_addr *inaddr, struct pf_addr *hash,
     struct pf_poolhashkey *key, sa_family_t af)
 {
 	u_int32_t	a = 0x9e3779b9, b = 0x9e3779b9, c = key->key32[0];
 
 	switch (af) {
 #ifdef INET
 	case AF_INET:
 		a += inaddr->addr32[0];
 		b += key->key32[1];
 		mix(a, b, c);
 		hash->addr32[0] = c + key->key32[2];
 		break;
 #endif /* INET */
 #ifdef INET6
 	case AF_INET6:
 		a += inaddr->addr32[0];
 		b += inaddr->addr32[2];
 		mix(a, b, c);
 		hash->addr32[0] = c;
 		a += inaddr->addr32[1];
 		b += inaddr->addr32[3];
 		c += key->key32[1];
 		mix(a, b, c);
 		hash->addr32[1] = c;
 		a += inaddr->addr32[2];
 		b += inaddr->addr32[1];
 		c += key->key32[2];
 		mix(a, b, c);
 		hash->addr32[2] = c;
 		a += inaddr->addr32[3];
 		b += inaddr->addr32[0];
 		c += key->key32[3];
 		mix(a, b, c);
 		hash->addr32[3] = c;
 		break;
 #endif /* INET6 */
 	}
 }
 
 static struct pf_rule *
 pf_match_translation(struct pf_pdesc *pd, struct mbuf *m, int off,
     int direction, struct pfi_kif *kif, struct pf_addr *saddr, u_int16_t sport,
     struct pf_addr *daddr, uint16_t dport, int rs_num,
     struct pf_anchor_stackframe *anchor_stack)
 {
 	struct pf_rule		*r, *rm = NULL;
 	struct pf_ruleset	*ruleset = NULL;
 	int			 tag = -1;
 	int			 rtableid = -1;
 	int			 asd = 0;
 
 	r = TAILQ_FIRST(pf_main_ruleset.rules[rs_num].active.ptr);
 	while (r && rm == NULL) {
 		struct pf_rule_addr	*src = NULL, *dst = NULL;
 		struct pf_addr_wrap	*xdst = NULL;
 
 		if (r->action == PF_BINAT && direction == PF_IN) {
 			src = &r->dst;
 			if (r->rpool.cur != NULL)
 				xdst = &r->rpool.cur->addr;
 		} else {
 			src = &r->src;
 			dst = &r->dst;
 		}
 
 		r->evaluations++;
 		if (pfi_kif_match(r->kif, kif) == r->ifnot)
 			r = r->skip[PF_SKIP_IFP].ptr;
 		else if (r->direction && r->direction != direction)
 			r = r->skip[PF_SKIP_DIR].ptr;
 		else if (r->af && r->af != pd->af)
 			r = r->skip[PF_SKIP_AF].ptr;
 		else if (r->proto && r->proto != pd->proto)
 			r = r->skip[PF_SKIP_PROTO].ptr;
 		else if (PF_MISMATCHAW(&src->addr, saddr, pd->af,
 		    src->neg, kif, M_GETFIB(m)))
 			r = r->skip[src == &r->src ? PF_SKIP_SRC_ADDR :
 			    PF_SKIP_DST_ADDR].ptr;
 		else if (src->port_op && !pf_match_port(src->port_op,
 		    src->port[0], src->port[1], sport))
 			r = r->skip[src == &r->src ? PF_SKIP_SRC_PORT :
 			    PF_SKIP_DST_PORT].ptr;
 		else if (dst != NULL &&
 		    PF_MISMATCHAW(&dst->addr, daddr, pd->af, dst->neg, NULL,
 		    M_GETFIB(m)))
 			r = r->skip[PF_SKIP_DST_ADDR].ptr;
 		else if (xdst != NULL && PF_MISMATCHAW(xdst, daddr, pd->af,
 		    0, NULL, M_GETFIB(m)))
 			r = TAILQ_NEXT(r, entries);
 		else if (dst != NULL && dst->port_op &&
 		    !pf_match_port(dst->port_op, dst->port[0],
 		    dst->port[1], dport))
 			r = r->skip[PF_SKIP_DST_PORT].ptr;
 		else if (r->match_tag && !pf_match_tag(m, r, &tag,
 		    pd->pf_mtag ? pd->pf_mtag->tag : 0))
 			r = TAILQ_NEXT(r, entries);
 		else if (r->os_fingerprint != PF_OSFP_ANY && (pd->proto !=
 		    IPPROTO_TCP || !pf_osfp_match(pf_osfp_fingerprint(pd, m,
 		    off, pd->hdr.tcp), r->os_fingerprint)))
 			r = TAILQ_NEXT(r, entries);
 		else {
 			if (r->tag)
 				tag = r->tag;
 			if (r->rtableid >= 0)
 				rtableid = r->rtableid;
 			if (r->anchor == NULL) {
 				rm = r;
 			} else
 				pf_step_into_anchor(anchor_stack, &asd,
 				    &ruleset, rs_num, &r, NULL, NULL);
 		}
 		if (r == NULL)
 			pf_step_out_of_anchor(anchor_stack, &asd, &ruleset,
 			    rs_num, &r, NULL, NULL);
 	}
 
 	if (tag > 0 && pf_tag_packet(m, pd, tag))
 		return (NULL);
 	if (rtableid >= 0)
 		M_SETFIB(m, rtableid);
 
 	if (rm != NULL && (rm->action == PF_NONAT ||
 	    rm->action == PF_NORDR || rm->action == PF_NOBINAT))
 		return (NULL);
 	return (rm);
 }
 
 static int
 pf_get_sport(sa_family_t af, u_int8_t proto, struct pf_rule *r,
     struct pf_addr *saddr, uint16_t sport, struct pf_addr *daddr,
     uint16_t dport, struct pf_addr *naddr, uint16_t *nport, uint16_t low,
     uint16_t high, struct pf_src_node **sn)
 {
 	struct pf_state_key_cmp	key;
 	struct pf_addr		init_addr;
 	uint16_t		cut;
 
 	bzero(&init_addr, sizeof(init_addr));
 	if (pf_map_addr(af, r, saddr, naddr, &init_addr, sn))
 		return (1);
 
 	if (proto == IPPROTO_ICMP) {
 		low = 1;
 		high = 65535;
 	}
 
 	bzero(&key, sizeof(key));
 	key.af = af;
 	key.proto = proto;
 	key.port[0] = dport;
 	PF_ACPY(&key.addr[0], daddr, key.af);
 
 	do {
 		PF_ACPY(&key.addr[1], naddr, key.af);
 
 		/*
 		 * port search; start random, step;
 		 * similar 2 portloop in in_pcbbind
 		 */
 		if (!(proto == IPPROTO_TCP || proto == IPPROTO_UDP ||
 		    proto == IPPROTO_ICMP) || (low == 0 && high == 0)) {
 			/*
 			 * XXX bug: icmp states don't use the id on both sides.
 			 * (traceroute -I through nat)
 			 */
 			key.port[1] = sport;
 			if (pf_find_state_all(&key, PF_IN, NULL) == NULL) {
 				*nport = sport;
 				return (0);
 			}
 		} else if (low == high) {
 			key.port[1] = htons(low);
 			if (pf_find_state_all(&key, PF_IN, NULL) == NULL) {
 				*nport = htons(low);
 				return (0);
 			}
 		} else {
 			uint16_t tmp;
 
 			if (low > high) {
 				tmp = low;
 				low = high;
 				high = tmp;
 			}
 			/* low < high */
 			cut = htonl(arc4random()) % (1 + high - low) + low;
 			/* low <= cut <= high */
 			for (tmp = cut; tmp <= high; ++(tmp)) {
 				key.port[1] = htons(tmp);
 				if (pf_find_state_all(&key, PF_IN, NULL) ==
 				    NULL) {
 					*nport = htons(tmp);
 					return (0);
 				}
 			}
 			for (tmp = cut - 1; tmp >= low; --(tmp)) {
 				key.port[1] = htons(tmp);
 				if (pf_find_state_all(&key, PF_IN, NULL) ==
 				    NULL) {
 					*nport = htons(tmp);
 					return (0);
 				}
 			}
 		}
 
 		switch (r->rpool.opts & PF_POOL_TYPEMASK) {
 		case PF_POOL_RANDOM:
 		case PF_POOL_ROUNDROBIN:
 			if (pf_map_addr(af, r, saddr, naddr, &init_addr, sn))
 				return (1);
 			break;
 		case PF_POOL_NONE:
 		case PF_POOL_SRCHASH:
 		case PF_POOL_BITMASK:
 		default:
 			return (1);
 		}
 	} while (! PF_AEQ(&init_addr, naddr, af) );
 	return (1);					/* none available */
 }
 
 int
 pf_map_addr(sa_family_t af, struct pf_rule *r, struct pf_addr *saddr,
     struct pf_addr *naddr, struct pf_addr *init_addr, struct pf_src_node **sn)
 {
 	struct pf_pool		*rpool = &r->rpool;
 	struct pf_addr		*raddr = NULL, *rmask = NULL;
 
 	if (*sn == NULL && r->rpool.opts & PF_POOL_STICKYADDR &&
 	    (r->rpool.opts & PF_POOL_TYPEMASK) != PF_POOL_NONE) {
 		*sn = pf_find_src_node(saddr, r, af, 0);
 		if (*sn != NULL && !PF_AZERO(&(*sn)->raddr, af)) {
 			PF_ACPY(naddr, &(*sn)->raddr, af);
 			if (V_pf_status.debug >= PF_DEBUG_MISC) {
 				printf("pf_map_addr: src tracking maps ");
 				pf_print_host(saddr, 0, af);
 				printf(" to ");
 				pf_print_host(naddr, 0, af);
 				printf("\n");
 			}
 			return (0);
 		}
 	}
 
 	if (rpool->cur->addr.type == PF_ADDR_NOROUTE)
 		return (1);
 	if (rpool->cur->addr.type == PF_ADDR_DYNIFTL) {
 		switch (af) {
 #ifdef INET
 		case AF_INET:
 			if (rpool->cur->addr.p.dyn->pfid_acnt4 < 1 &&
 			    (rpool->opts & PF_POOL_TYPEMASK) !=
 			    PF_POOL_ROUNDROBIN)
 				return (1);
 			 raddr = &rpool->cur->addr.p.dyn->pfid_addr4;
 			 rmask = &rpool->cur->addr.p.dyn->pfid_mask4;
 			break;
 #endif /* INET */
 #ifdef INET6
 		case AF_INET6:
 			if (rpool->cur->addr.p.dyn->pfid_acnt6 < 1 &&
 			    (rpool->opts & PF_POOL_TYPEMASK) !=
 			    PF_POOL_ROUNDROBIN)
 				return (1);
 			raddr = &rpool->cur->addr.p.dyn->pfid_addr6;
 			rmask = &rpool->cur->addr.p.dyn->pfid_mask6;
 			break;
 #endif /* INET6 */
 		}
 	} else if (rpool->cur->addr.type == PF_ADDR_TABLE) {
 		if ((rpool->opts & PF_POOL_TYPEMASK) != PF_POOL_ROUNDROBIN)
 			return (1); /* unsupported */
 	} else {
 		raddr = &rpool->cur->addr.v.a.addr;
 		rmask = &rpool->cur->addr.v.a.mask;
 	}
 
 	switch (rpool->opts & PF_POOL_TYPEMASK) {
 	case PF_POOL_NONE:
 		PF_ACPY(naddr, raddr, af);
 		break;
 	case PF_POOL_BITMASK:
 		PF_POOLMASK(naddr, raddr, rmask, saddr, af);
 		break;
 	case PF_POOL_RANDOM:
 		if (init_addr != NULL && PF_AZERO(init_addr, af)) {
 			switch (af) {
 #ifdef INET
 			case AF_INET:
 				rpool->counter.addr32[0] = htonl(arc4random());
 				break;
 #endif /* INET */
 #ifdef INET6
 			case AF_INET6:
 				if (rmask->addr32[3] != 0xffffffff)
 					rpool->counter.addr32[3] =
 					    htonl(arc4random());
 				else
 					break;
 				if (rmask->addr32[2] != 0xffffffff)
 					rpool->counter.addr32[2] =
 					    htonl(arc4random());
 				else
 					break;
 				if (rmask->addr32[1] != 0xffffffff)
 					rpool->counter.addr32[1] =
 					    htonl(arc4random());
 				else
 					break;
 				if (rmask->addr32[0] != 0xffffffff)
 					rpool->counter.addr32[0] =
 					    htonl(arc4random());
 				break;
 #endif /* INET6 */
 			}
 			PF_POOLMASK(naddr, raddr, rmask, &rpool->counter, af);
 			PF_ACPY(init_addr, naddr, af);
 
 		} else {
 			PF_AINC(&rpool->counter, af);
 			PF_POOLMASK(naddr, raddr, rmask, &rpool->counter, af);
 		}
 		break;
 	case PF_POOL_SRCHASH:
 	    {
 		unsigned char hash[16];
 
 		pf_hash(saddr, (struct pf_addr *)&hash, &rpool->key, af);
 		PF_POOLMASK(naddr, raddr, rmask, (struct pf_addr *)&hash, af);
 		break;
 	    }
 	case PF_POOL_ROUNDROBIN:
 	    {
 		struct pf_pooladdr *acur = rpool->cur;
 
 		/*
 		 * XXXGL: in the round-robin case we need to store
 		 * the round-robin machine state in the rule, thus
 		 * forwarding thread needs to modify rule.
 		 *
 		 * This is done w/o locking, because performance is assumed
 		 * more important than round-robin precision.
 		 *
 		 * In the simpliest case we just update the "rpool->cur"
 		 * pointer. However, if pool contains tables or dynamic
 		 * addresses, then "tblidx" is also used to store machine
 		 * state. Since "tblidx" is int, concurrent access to it can't
 		 * lead to inconsistence, only to lost of precision.
 		 *
 		 * Things get worse, if table contains not hosts, but
 		 * prefixes. In this case counter also stores machine state,
 		 * and for IPv6 address, counter can't be updated atomically.
 		 * Probably, using round-robin on a table containing IPv6
 		 * prefixes (or even IPv4) would cause a panic.
 		 */
 
 		if (rpool->cur->addr.type == PF_ADDR_TABLE) {
 			if (!pfr_pool_get(rpool->cur->addr.p.tbl,
 			    &rpool->tblidx, &rpool->counter, af))
 				goto get_addr;
 		} else if (rpool->cur->addr.type == PF_ADDR_DYNIFTL) {
 			if (!pfr_pool_get(rpool->cur->addr.p.dyn->pfid_kt,
 			    &rpool->tblidx, &rpool->counter, af))
 				goto get_addr;
 		} else if (pf_match_addr(0, raddr, rmask, &rpool->counter, af))
 			goto get_addr;
 
 	try_next:
 		if (TAILQ_NEXT(rpool->cur, entries) == NULL)
 			rpool->cur = TAILQ_FIRST(&rpool->list);
 		else
 			rpool->cur = TAILQ_NEXT(rpool->cur, entries);
 		if (rpool->cur->addr.type == PF_ADDR_TABLE) {
 			rpool->tblidx = -1;
 			if (pfr_pool_get(rpool->cur->addr.p.tbl,
 			    &rpool->tblidx, &rpool->counter, af)) {
 				/* table contains no address of type 'af' */
 				if (rpool->cur != acur)
 					goto try_next;
 				return (1);
 			}
 		} else if (rpool->cur->addr.type == PF_ADDR_DYNIFTL) {
 			rpool->tblidx = -1;
 			if (pfr_pool_get(rpool->cur->addr.p.dyn->pfid_kt,
 			    &rpool->tblidx, &rpool->counter, af)) {
 				/* table contains no address of type 'af' */
 				if (rpool->cur != acur)
 					goto try_next;
 				return (1);
 			}
 		} else {
 			raddr = &rpool->cur->addr.v.a.addr;
 			rmask = &rpool->cur->addr.v.a.mask;
 			PF_ACPY(&rpool->counter, raddr, af);
 		}
 
 	get_addr:
 		PF_ACPY(naddr, &rpool->counter, af);
 		if (init_addr != NULL && PF_AZERO(init_addr, af))
 			PF_ACPY(init_addr, naddr, af);
 		PF_AINC(&rpool->counter, af);
 		break;
 	    }
 	}
 	if (*sn != NULL)
 		PF_ACPY(&(*sn)->raddr, naddr, af);
 
 	if (V_pf_status.debug >= PF_DEBUG_MISC &&
 	    (rpool->opts & PF_POOL_TYPEMASK) != PF_POOL_NONE) {
 		printf("pf_map_addr: selected address ");
 		pf_print_host(naddr, 0, af);
 		printf("\n");
 	}
 
 	return (0);
 }
 
 struct pf_rule *
 pf_get_translation(struct pf_pdesc *pd, struct mbuf *m, int off, int direction,
     struct pfi_kif *kif, struct pf_src_node **sn,
     struct pf_state_key **skp, struct pf_state_key **nkp,
     struct pf_addr *saddr, struct pf_addr *daddr,
     uint16_t sport, uint16_t dport, struct pf_anchor_stackframe *anchor_stack)
 {
 	struct pf_rule	*r = NULL;
 	struct pf_addr	*naddr;
 	uint16_t	*nport;
 
 	PF_RULES_RASSERT();
 	KASSERT(*skp == NULL, ("*skp not NULL"));
 	KASSERT(*nkp == NULL, ("*nkp not NULL"));
 
 	if (direction == PF_OUT) {
 		r = pf_match_translation(pd, m, off, direction, kif, saddr,
 		    sport, daddr, dport, PF_RULESET_BINAT, anchor_stack);
 		if (r == NULL)
 			r = pf_match_translation(pd, m, off, direction, kif,
 			    saddr, sport, daddr, dport, PF_RULESET_NAT,
 			    anchor_stack);
 	} else {
 		r = pf_match_translation(pd, m, off, direction, kif, saddr,
 		    sport, daddr, dport, PF_RULESET_RDR, anchor_stack);
 		if (r == NULL)
 			r = pf_match_translation(pd, m, off, direction, kif,
 			    saddr, sport, daddr, dport, PF_RULESET_BINAT,
 			    anchor_stack);
 	}
 
 	if (r == NULL)
 		return (NULL);
 
 	switch (r->action) {
 	case PF_NONAT:
 	case PF_NOBINAT:
 	case PF_NORDR:
 		return (NULL);
 	}
 
 	*skp = pf_state_key_setup(pd, saddr, daddr, sport, dport);
 	if (*skp == NULL)
 		return (NULL);
 	*nkp = pf_state_key_clone(*skp);
 	if (*nkp == NULL) {
 		uma_zfree(V_pf_state_key_z, skp);
 		*skp = NULL;
 		return (NULL);
 	}
 
 	/* XXX We only modify one side for now. */
 	naddr = &(*nkp)->addr[1];
 	nport = &(*nkp)->port[1];
 
 	switch (r->action) {
 	case PF_NAT:
 		if (pf_get_sport(pd->af, pd->proto, r, saddr, sport, daddr,
 		    dport, naddr, nport, r->rpool.proxy_port[0],
 		    r->rpool.proxy_port[1], sn)) {
 			DPFPRINTF(PF_DEBUG_MISC,
 			    ("pf: NAT proxy port allocation (%u-%u) failed\n",
 			    r->rpool.proxy_port[0], r->rpool.proxy_port[1]));
 			goto notrans;
 		}
 		break;
 	case PF_BINAT:
 		switch (direction) {
 		case PF_OUT:
 			if (r->rpool.cur->addr.type == PF_ADDR_DYNIFTL){
 				switch (pd->af) {
 #ifdef INET
 				case AF_INET:
 					if (r->rpool.cur->addr.p.dyn->
 					    pfid_acnt4 < 1)
 						goto notrans;
 					PF_POOLMASK(naddr,
 					    &r->rpool.cur->addr.p.dyn->
 					    pfid_addr4,
 					    &r->rpool.cur->addr.p.dyn->
 					    pfid_mask4, saddr, AF_INET);
 					break;
 #endif /* INET */
 #ifdef INET6
 				case AF_INET6:
 					if (r->rpool.cur->addr.p.dyn->
 					    pfid_acnt6 < 1)
 						goto notrans;
 					PF_POOLMASK(naddr,
 					    &r->rpool.cur->addr.p.dyn->
 					    pfid_addr6,
 					    &r->rpool.cur->addr.p.dyn->
 					    pfid_mask6, saddr, AF_INET6);
 					break;
 #endif /* INET6 */
 				}
 			} else
 				PF_POOLMASK(naddr,
 				    &r->rpool.cur->addr.v.a.addr,
 				    &r->rpool.cur->addr.v.a.mask, saddr,
 				    pd->af);
 			break;
 		case PF_IN:
 			if (r->src.addr.type == PF_ADDR_DYNIFTL) {
 				switch (pd->af) {
 #ifdef INET
 				case AF_INET:
 					if (r->src.addr.p.dyn-> pfid_acnt4 < 1)
 						goto notrans;
 					PF_POOLMASK(naddr,
 					    &r->src.addr.p.dyn->pfid_addr4,
 					    &r->src.addr.p.dyn->pfid_mask4,
 					    daddr, AF_INET);
 					break;
 #endif /* INET */
 #ifdef INET6
 				case AF_INET6:
 					if (r->src.addr.p.dyn->pfid_acnt6 < 1)
 						goto notrans;
 					PF_POOLMASK(naddr,
 					    &r->src.addr.p.dyn->pfid_addr6,
 					    &r->src.addr.p.dyn->pfid_mask6,
 					    daddr, AF_INET6);
 					break;
 #endif /* INET6 */
 				}
 			} else
 				PF_POOLMASK(naddr, &r->src.addr.v.a.addr,
 				    &r->src.addr.v.a.mask, daddr, pd->af);
 			break;
 		}
 		break;
 	case PF_RDR: {
 		if (pf_map_addr(pd->af, r, saddr, naddr, NULL, sn))
 			goto notrans;
 		if ((r->rpool.opts & PF_POOL_TYPEMASK) == PF_POOL_BITMASK)
 			PF_POOLMASK(naddr, naddr, &r->rpool.cur->addr.v.a.mask,
 			    daddr, pd->af);
 
 		if (r->rpool.proxy_port[1]) {
 			uint32_t	tmp_nport;
 
 			tmp_nport = ((ntohs(dport) - ntohs(r->dst.port[0])) %
 			    (r->rpool.proxy_port[1] - r->rpool.proxy_port[0] +
 			    1)) + r->rpool.proxy_port[0];
 
 			/* Wrap around if necessary. */
 			if (tmp_nport > 65535)
 				tmp_nport -= 65535;
 			*nport = htons((uint16_t)tmp_nport);
 		} else if (r->rpool.proxy_port[0])
 			*nport = htons(r->rpool.proxy_port[0]);
 		break;
 	}
 	default:
 		panic("%s: unknown action %u", __func__, r->action);
 	}
 
 	/* Return success only if translation really happened. */
 	if (bcmp(*skp, *nkp, sizeof(struct pf_state_key_cmp)))
 		return (r);
 
 notrans:
 	uma_zfree(V_pf_state_key_z, *nkp);
 	uma_zfree(V_pf_state_key_z, *skp);
 	*skp = *nkp = NULL;
 	*sn = NULL;
 
 	return (NULL);
 }
Index: stable/10/sys/netpfil/pf/pf_mtag.h
===================================================================
--- stable/10/sys/netpfil/pf/pf_mtag.h	(nonexistent)
+++ stable/10/sys/netpfil/pf/pf_mtag.h	(revision 263086)
@@ -0,0 +1,62 @@
+/*	$FreeBSD$	*/
+/*
+ * Copyright (c) 2001 Daniel Hartmeier
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ *    - Redistributions of source code must retain the above copyright
+ *      notice, this list of conditions and the following disclaimer.
+ *    - Redistributions in binary form must reproduce the above
+ *      copyright notice, this list of conditions and the following
+ *      disclaimer in the documentation and/or other materials provided
+ *      with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
+ * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
+ * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
+ * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+ * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
+ * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ *
+ */
+
+#ifndef _NET_PF_MTAG_H_
+#define _NET_PF_MTAG_H_
+
+#ifdef _KERNEL
+
+#define	PF_TAG_GENERATED		0x01
+#define	PF_TAG_FRAGCACHE		0x02
+#define	PF_TAG_TRANSLATE_LOCALHOST	0x04
+#define	PF_PACKET_LOOPED		0x08
+#define	PF_FASTFWD_OURS_PRESENT		0x10
+
+struct pf_mtag {
+	void		*hdr;		/* saved hdr pos in mbuf, for ECN */
+	u_int32_t	 qid;		/* queue id */
+	u_int16_t	 tag;		/* tag id */
+	u_int8_t	 flags;
+	u_int8_t	 routed;
+};
+
+static __inline struct pf_mtag *
+pf_find_mtag(struct mbuf *m)
+{
+	struct m_tag	*mtag;
+
+	if ((mtag = m_tag_find(m, PACKET_TAG_PF, NULL)) == NULL)
+		return (NULL);
+
+	return ((struct pf_mtag *)(mtag + 1));
+}
+#endif /* _KERNEL */
+#endif /* _NET_PF_MTAG_H_ */

Property changes on: stable/10/sys/netpfil/pf/pf_mtag.h
___________________________________________________________________
Added: svn:keywords
## -0,0 +1 ##
+FreeBSD=%H
\ No newline at end of property
Index: stable/10/sys/netpfil/pf/pf_norm.c
===================================================================
--- stable/10/sys/netpfil/pf/pf_norm.c	(revision 263085)
+++ stable/10/sys/netpfil/pf/pf_norm.c	(revision 263086)
@@ -1,2000 +1,1999 @@
 /*-
  * Copyright 2001 Niels Provos <provos@citi.umich.edu>
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  *
  *	$OpenBSD: pf_norm.c,v 1.114 2009/01/29 14:11:45 henning Exp $
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include "opt_inet.h"
 #include "opt_inet6.h"
 #include "opt_pf.h"
 
 #include <sys/param.h>
 #include <sys/lock.h>
 #include <sys/mbuf.h>
 #include <sys/mutex.h>
 #include <sys/refcount.h>
 #include <sys/rwlock.h>
 #include <sys/socket.h>
 
 #include <net/if.h>
 #include <net/vnet.h>
 #include <net/pfvar.h>
-#include <net/pf_mtag.h>
 #include <net/if_pflog.h>
 
 #include <netinet/in.h>
 #include <netinet/ip.h>
 #include <netinet/ip_var.h>
 #include <netinet/tcp.h>
 #include <netinet/tcp_fsm.h>
 #include <netinet/tcp_seq.h>
 
 #ifdef INET6
 #include <netinet/ip6.h>
 #endif /* INET6 */
 
 struct pf_frent {
 	LIST_ENTRY(pf_frent) fr_next;
 	union {
 		struct {
 			struct ip *_fr_ip;
 			struct mbuf *_fr_m;
 		} _frag;
 		struct {
 			uint16_t _fr_off;
 			uint16_t _fr_end;
 		} _cache;
 	} _u;
 };
 #define	fr_ip	_u._frag._fr_ip
 #define	fr_m	_u._frag._fr_m
 #define	fr_off	_u._cache._fr_off
 #define	fr_end	_u._cache._fr_end
 
 struct pf_fragment {
 	RB_ENTRY(pf_fragment) fr_entry;
 	TAILQ_ENTRY(pf_fragment) frag_next;
 	struct in_addr	fr_src;
 	struct in_addr	fr_dst;
 	u_int8_t	fr_p;		/* protocol of this fragment */
 	u_int8_t	fr_flags;	/* status flags */
 #define PFFRAG_SEENLAST	0x0001		/* Seen the last fragment for this */
 #define PFFRAG_NOBUFFER	0x0002		/* Non-buffering fragment cache */
 #define PFFRAG_DROP	0x0004		/* Drop all fragments */
 #define BUFFER_FRAGMENTS(fr)	(!((fr)->fr_flags & PFFRAG_NOBUFFER))
 	u_int16_t	fr_id;		/* fragment id for reassemble */
 	u_int16_t	fr_max;		/* fragment data max */
 	u_int32_t	fr_timeout;
 	LIST_HEAD(, pf_frent) fr_queue;
 };
 
 static struct mtx pf_frag_mtx;
 #define PF_FRAG_LOCK()		mtx_lock(&pf_frag_mtx)
 #define PF_FRAG_UNLOCK()	mtx_unlock(&pf_frag_mtx)
 #define PF_FRAG_ASSERT()	mtx_assert(&pf_frag_mtx, MA_OWNED)
 
 VNET_DEFINE(uma_zone_t, pf_state_scrub_z);	/* XXX: shared with pfsync */
 
 static VNET_DEFINE(uma_zone_t, pf_frent_z);
 #define	V_pf_frent_z	VNET(pf_frent_z)
 static VNET_DEFINE(uma_zone_t, pf_frag_z);
 #define	V_pf_frag_z	VNET(pf_frag_z)
 
 TAILQ_HEAD(pf_fragqueue, pf_fragment);
 TAILQ_HEAD(pf_cachequeue, pf_fragment);
 static VNET_DEFINE(struct pf_fragqueue,	pf_fragqueue);
 #define	V_pf_fragqueue			VNET(pf_fragqueue)
 static VNET_DEFINE(struct pf_cachequeue,	pf_cachequeue);
 #define	V_pf_cachequeue			VNET(pf_cachequeue)
 RB_HEAD(pf_frag_tree, pf_fragment);
 static VNET_DEFINE(struct pf_frag_tree,	pf_frag_tree);
 #define	V_pf_frag_tree			VNET(pf_frag_tree)
 static VNET_DEFINE(struct pf_frag_tree,	pf_cache_tree);
 #define	V_pf_cache_tree			VNET(pf_cache_tree)
 static int		 pf_frag_compare(struct pf_fragment *,
 			    struct pf_fragment *);
 static RB_PROTOTYPE(pf_frag_tree, pf_fragment, fr_entry, pf_frag_compare);
 static RB_GENERATE(pf_frag_tree, pf_fragment, fr_entry, pf_frag_compare);
 
 /* Private prototypes */
 static void		 pf_free_fragment(struct pf_fragment *);
 static void		 pf_remove_fragment(struct pf_fragment *);
 static int		 pf_normalize_tcpopt(struct pf_rule *, struct mbuf *,
 			    struct tcphdr *, int, sa_family_t);
 #ifdef INET
 static void		 pf_ip2key(struct pf_fragment *, struct ip *);
 static void		 pf_scrub_ip(struct mbuf **, u_int32_t, u_int8_t,
 			    u_int8_t);
 static void		 pf_flush_fragments(void);
 static struct pf_fragment *pf_find_fragment(struct ip *, struct pf_frag_tree *);
 static struct mbuf	*pf_reassemble(struct mbuf **, struct pf_fragment **,
 			    struct pf_frent *, int);
 static struct mbuf	*pf_fragcache(struct mbuf **, struct ip*,
 			    struct pf_fragment **, int, int, int *);
 #endif /* INET */
 #ifdef INET6
 static void		 pf_scrub_ip6(struct mbuf **, u_int8_t);
 #endif
 #define	DPFPRINTF(x) do {				\
 	if (V_pf_status.debug >= PF_DEBUG_MISC) {	\
 		printf("%s: ", __func__);		\
 		printf x ;				\
 	}						\
 } while(0)
 
 void
 pf_normalize_init(void)
 {
 
 	V_pf_frag_z = uma_zcreate("pf frags", sizeof(struct pf_fragment),
 	    NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, 0);
 	V_pf_frent_z = uma_zcreate("pf frag entries", sizeof(struct pf_frent),
 	    NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, 0);
 	V_pf_state_scrub_z = uma_zcreate("pf state scrubs",
 	    sizeof(struct pf_state_scrub),  NULL, NULL, NULL, NULL,
 	    UMA_ALIGN_PTR, 0);
 
 	V_pf_limits[PF_LIMIT_FRAGS].zone = V_pf_frent_z;
 	V_pf_limits[PF_LIMIT_FRAGS].limit = PFFRAG_FRENT_HIWAT;
 	uma_zone_set_max(V_pf_frent_z, PFFRAG_FRENT_HIWAT);
 	uma_zone_set_warning(V_pf_frent_z, "PF frag entries limit reached");
 
 	mtx_init(&pf_frag_mtx, "pf fragments", NULL, MTX_DEF);
 
 	TAILQ_INIT(&V_pf_fragqueue);
 	TAILQ_INIT(&V_pf_cachequeue);
 }
 
 void
 pf_normalize_cleanup(void)
 {
 
 	uma_zdestroy(V_pf_state_scrub_z);
 	uma_zdestroy(V_pf_frent_z);
 	uma_zdestroy(V_pf_frag_z);
 
 	mtx_destroy(&pf_frag_mtx);
 }
 
 static int
 pf_frag_compare(struct pf_fragment *a, struct pf_fragment *b)
 {
 	int	diff;
 
 	if ((diff = a->fr_id - b->fr_id))
 		return (diff);
 	else if ((diff = a->fr_p - b->fr_p))
 		return (diff);
 	else if (a->fr_src.s_addr < b->fr_src.s_addr)
 		return (-1);
 	else if (a->fr_src.s_addr > b->fr_src.s_addr)
 		return (1);
 	else if (a->fr_dst.s_addr < b->fr_dst.s_addr)
 		return (-1);
 	else if (a->fr_dst.s_addr > b->fr_dst.s_addr)
 		return (1);
 	return (0);
 }
 
 void
 pf_purge_expired_fragments(void)
 {
 	struct pf_fragment	*frag;
 	u_int32_t		 expire = time_uptime -
 				    V_pf_default_rule.timeout[PFTM_FRAG];
 
 	PF_FRAG_LOCK();
 	while ((frag = TAILQ_LAST(&V_pf_fragqueue, pf_fragqueue)) != NULL) {
 		KASSERT((BUFFER_FRAGMENTS(frag)),
 		    ("BUFFER_FRAGMENTS(frag) == 0: %s", __FUNCTION__));
 		if (frag->fr_timeout > expire)
 			break;
 
 		DPFPRINTF(("expiring %d(%p)\n", frag->fr_id, frag));
 		pf_free_fragment(frag);
 	}
 
 	while ((frag = TAILQ_LAST(&V_pf_cachequeue, pf_cachequeue)) != NULL) {
 		KASSERT((!BUFFER_FRAGMENTS(frag)),
 		    ("BUFFER_FRAGMENTS(frag) != 0: %s", __FUNCTION__));
 		if (frag->fr_timeout > expire)
 			break;
 
 		DPFPRINTF(("expiring %d(%p)\n", frag->fr_id, frag));
 		pf_free_fragment(frag);
 		KASSERT((TAILQ_EMPTY(&V_pf_cachequeue) ||
 		    TAILQ_LAST(&V_pf_cachequeue, pf_cachequeue) != frag),
 		    ("!(TAILQ_EMPTY() || TAILQ_LAST() == farg): %s",
 		    __FUNCTION__));
 	}
 	PF_FRAG_UNLOCK();
 }
 
 #ifdef INET
 /*
  * Try to flush old fragments to make space for new ones
  */
 static void
 pf_flush_fragments(void)
 {
 	struct pf_fragment	*frag, *cache;
 	int			 goal;
 
 	PF_FRAG_ASSERT();
 
 	goal = uma_zone_get_cur(V_pf_frent_z) * 9 / 10;
 	DPFPRINTF(("trying to free %d frag entriess\n", goal));
 	while (goal < uma_zone_get_cur(V_pf_frent_z)) {
 		frag = TAILQ_LAST(&V_pf_fragqueue, pf_fragqueue);
 		if (frag)
 			pf_free_fragment(frag);
 		cache = TAILQ_LAST(&V_pf_cachequeue, pf_cachequeue);
 		if (cache)
 			pf_free_fragment(cache);
 		if (frag == NULL && cache == NULL)
 			break;
 	}
 }
 #endif /* INET */
 
 /* Frees the fragments and all associated entries */
 static void
 pf_free_fragment(struct pf_fragment *frag)
 {
 	struct pf_frent		*frent;
 
 	PF_FRAG_ASSERT();
 
 	/* Free all fragments */
 	if (BUFFER_FRAGMENTS(frag)) {
 		for (frent = LIST_FIRST(&frag->fr_queue); frent;
 		    frent = LIST_FIRST(&frag->fr_queue)) {
 			LIST_REMOVE(frent, fr_next);
 
 			m_freem(frent->fr_m);
 			uma_zfree(V_pf_frent_z, frent);
 		}
 	} else {
 		for (frent = LIST_FIRST(&frag->fr_queue); frent;
 		    frent = LIST_FIRST(&frag->fr_queue)) {
 			LIST_REMOVE(frent, fr_next);
 
 			KASSERT((LIST_EMPTY(&frag->fr_queue) ||
 			    LIST_FIRST(&frag->fr_queue)->fr_off >
 			    frent->fr_end),
 			    ("! (LIST_EMPTY() || LIST_FIRST()->fr_off >"
 			    " frent->fr_end): %s", __func__));
 
 			uma_zfree(V_pf_frent_z, frent);
 		}
 	}
 
 	pf_remove_fragment(frag);
 }
 
 #ifdef INET
 static void
 pf_ip2key(struct pf_fragment *key, struct ip *ip)
 {
 	key->fr_p = ip->ip_p;
 	key->fr_id = ip->ip_id;
 	key->fr_src.s_addr = ip->ip_src.s_addr;
 	key->fr_dst.s_addr = ip->ip_dst.s_addr;
 }
 
 static struct pf_fragment *
 pf_find_fragment(struct ip *ip, struct pf_frag_tree *tree)
 {
 	struct pf_fragment	 key;
 	struct pf_fragment	*frag;
 
 	PF_FRAG_ASSERT();
 
 	pf_ip2key(&key, ip);
 
 	frag = RB_FIND(pf_frag_tree, tree, &key);
 	if (frag != NULL) {
 		/* XXX Are we sure we want to update the timeout? */
 		frag->fr_timeout = time_uptime;
 		if (BUFFER_FRAGMENTS(frag)) {
 			TAILQ_REMOVE(&V_pf_fragqueue, frag, frag_next);
 			TAILQ_INSERT_HEAD(&V_pf_fragqueue, frag, frag_next);
 		} else {
 			TAILQ_REMOVE(&V_pf_cachequeue, frag, frag_next);
 			TAILQ_INSERT_HEAD(&V_pf_cachequeue, frag, frag_next);
 		}
 	}
 
 	return (frag);
 }
 #endif /* INET */
 
 /* Removes a fragment from the fragment queue and frees the fragment */
 
 static void
 pf_remove_fragment(struct pf_fragment *frag)
 {
 
 	PF_FRAG_ASSERT();
 
 	if (BUFFER_FRAGMENTS(frag)) {
 		RB_REMOVE(pf_frag_tree, &V_pf_frag_tree, frag);
 		TAILQ_REMOVE(&V_pf_fragqueue, frag, frag_next);
 		uma_zfree(V_pf_frag_z, frag);
 	} else {
 		RB_REMOVE(pf_frag_tree, &V_pf_cache_tree, frag);
 		TAILQ_REMOVE(&V_pf_cachequeue, frag, frag_next);
 		uma_zfree(V_pf_frag_z, frag);
 	}
 }
 
 #ifdef INET
 #define FR_IP_OFF(fr)	((ntohs((fr)->fr_ip->ip_off) & IP_OFFMASK) << 3)
 static struct mbuf *
 pf_reassemble(struct mbuf **m0, struct pf_fragment **frag,
     struct pf_frent *frent, int mff)
 {
 	struct mbuf	*m = *m0, *m2;
 	struct pf_frent	*frea, *next;
 	struct pf_frent	*frep = NULL;
 	struct ip	*ip = frent->fr_ip;
 	int		 hlen = ip->ip_hl << 2;
 	u_int16_t	 off = (ntohs(ip->ip_off) & IP_OFFMASK) << 3;
 	u_int16_t	 ip_len = ntohs(ip->ip_len) - ip->ip_hl * 4;
 	u_int16_t	 max = ip_len + off;
 
 	PF_FRAG_ASSERT();
 	KASSERT((*frag == NULL || BUFFER_FRAGMENTS(*frag)),
 	    ("! (*frag == NULL || BUFFER_FRAGMENTS(*frag)): %s", __FUNCTION__));
 
 	/* Strip off ip header */
 	m->m_data += hlen;
 	m->m_len -= hlen;
 
 	/* Create a new reassembly queue for this packet */
 	if (*frag == NULL) {
 		*frag = uma_zalloc(V_pf_frag_z, M_NOWAIT);
 		if (*frag == NULL) {
 			pf_flush_fragments();
 			*frag = uma_zalloc(V_pf_frag_z, M_NOWAIT);
 			if (*frag == NULL)
 				goto drop_fragment;
 		}
 
 		(*frag)->fr_flags = 0;
 		(*frag)->fr_max = 0;
 		(*frag)->fr_src = frent->fr_ip->ip_src;
 		(*frag)->fr_dst = frent->fr_ip->ip_dst;
 		(*frag)->fr_p = frent->fr_ip->ip_p;
 		(*frag)->fr_id = frent->fr_ip->ip_id;
 		(*frag)->fr_timeout = time_uptime;
 		LIST_INIT(&(*frag)->fr_queue);
 
 		RB_INSERT(pf_frag_tree, &V_pf_frag_tree, *frag);
 		TAILQ_INSERT_HEAD(&V_pf_fragqueue, *frag, frag_next);
 
 		/* We do not have a previous fragment */
 		frep = NULL;
 		goto insert;
 	}
 
 	/*
 	 * Find a fragment after the current one:
 	 *  - off contains the real shifted offset.
 	 */
 	LIST_FOREACH(frea, &(*frag)->fr_queue, fr_next) {
 		if (FR_IP_OFF(frea) > off)
 			break;
 		frep = frea;
 	}
 
 	KASSERT((frep != NULL || frea != NULL),
 	    ("!(frep != NULL || frea != NULL): %s", __FUNCTION__));;
 
 	if (frep != NULL &&
 	    FR_IP_OFF(frep) + ntohs(frep->fr_ip->ip_len) - frep->fr_ip->ip_hl *
 	    4 > off)
 	{
 		u_int16_t	precut;
 
 		precut = FR_IP_OFF(frep) + ntohs(frep->fr_ip->ip_len) -
 		    frep->fr_ip->ip_hl * 4 - off;
 		if (precut >= ip_len)
 			goto drop_fragment;
 		m_adj(frent->fr_m, precut);
 		DPFPRINTF(("overlap -%d\n", precut));
 		/* Enforce 8 byte boundaries */
 		ip->ip_off = htons(ntohs(ip->ip_off) + (precut >> 3));
 		off = (ntohs(ip->ip_off) & IP_OFFMASK) << 3;
 		ip_len -= precut;
 		ip->ip_len = htons(ip_len);
 	}
 
 	for (; frea != NULL && ip_len + off > FR_IP_OFF(frea);
 	    frea = next)
 	{
 		u_int16_t	aftercut;
 
 		aftercut = ip_len + off - FR_IP_OFF(frea);
 		DPFPRINTF(("adjust overlap %d\n", aftercut));
 		if (aftercut < ntohs(frea->fr_ip->ip_len) - frea->fr_ip->ip_hl
 		    * 4)
 		{
 			frea->fr_ip->ip_len =
 			    htons(ntohs(frea->fr_ip->ip_len) - aftercut);
 			frea->fr_ip->ip_off = htons(ntohs(frea->fr_ip->ip_off) +
 			    (aftercut >> 3));
 			m_adj(frea->fr_m, aftercut);
 			break;
 		}
 
 		/* This fragment is completely overlapped, lose it */
 		next = LIST_NEXT(frea, fr_next);
 		m_freem(frea->fr_m);
 		LIST_REMOVE(frea, fr_next);
 		uma_zfree(V_pf_frent_z, frea);
 	}
 
  insert:
 	/* Update maximum data size */
 	if ((*frag)->fr_max < max)
 		(*frag)->fr_max = max;
 	/* This is the last segment */
 	if (!mff)
 		(*frag)->fr_flags |= PFFRAG_SEENLAST;
 
 	if (frep == NULL)
 		LIST_INSERT_HEAD(&(*frag)->fr_queue, frent, fr_next);
 	else
 		LIST_INSERT_AFTER(frep, frent, fr_next);
 
 	/* Check if we are completely reassembled */
 	if (!((*frag)->fr_flags & PFFRAG_SEENLAST))
 		return (NULL);
 
 	/* Check if we have all the data */
 	off = 0;
 	for (frep = LIST_FIRST(&(*frag)->fr_queue); frep; frep = next) {
 		next = LIST_NEXT(frep, fr_next);
 
 		off += ntohs(frep->fr_ip->ip_len) - frep->fr_ip->ip_hl * 4;
 		if (off < (*frag)->fr_max &&
 		    (next == NULL || FR_IP_OFF(next) != off))
 		{
 			DPFPRINTF(("missing fragment at %d, next %d, max %d\n",
 			    off, next == NULL ? -1 : FR_IP_OFF(next),
 			    (*frag)->fr_max));
 			return (NULL);
 		}
 	}
 	DPFPRINTF(("%d < %d?\n", off, (*frag)->fr_max));
 	if (off < (*frag)->fr_max)
 		return (NULL);
 
 	/* We have all the data */
 	frent = LIST_FIRST(&(*frag)->fr_queue);
 	KASSERT((frent != NULL), ("frent == NULL: %s", __FUNCTION__));
 	if ((frent->fr_ip->ip_hl << 2) + off > IP_MAXPACKET) {
 		DPFPRINTF(("drop: too big: %d\n", off));
 		pf_free_fragment(*frag);
 		*frag = NULL;
 		return (NULL);
 	}
 	next = LIST_NEXT(frent, fr_next);
 
 	/* Magic from ip_input */
 	ip = frent->fr_ip;
 	m = frent->fr_m;
 	m2 = m->m_next;
 	m->m_next = NULL;
 	m_cat(m, m2);
 	uma_zfree(V_pf_frent_z, frent);
 	for (frent = next; frent != NULL; frent = next) {
 		next = LIST_NEXT(frent, fr_next);
 
 		m2 = frent->fr_m;
 		uma_zfree(V_pf_frent_z, frent);
 		m->m_pkthdr.csum_flags &= m2->m_pkthdr.csum_flags;
 		m->m_pkthdr.csum_data += m2->m_pkthdr.csum_data;
 		m_cat(m, m2);
 	}
 
 	while (m->m_pkthdr.csum_data & 0xffff0000)
 		m->m_pkthdr.csum_data = (m->m_pkthdr.csum_data & 0xffff) +
 		    (m->m_pkthdr.csum_data >> 16);
 	ip->ip_src = (*frag)->fr_src;
 	ip->ip_dst = (*frag)->fr_dst;
 
 	/* Remove from fragment queue */
 	pf_remove_fragment(*frag);
 	*frag = NULL;
 
 	hlen = ip->ip_hl << 2;
 	ip->ip_len = htons(off + hlen);
 	m->m_len += hlen;
 	m->m_data -= hlen;
 
 	/* some debugging cruft by sklower, below, will go away soon */
 	/* XXX this should be done elsewhere */
 	if (m->m_flags & M_PKTHDR) {
 		int plen = 0;
 		for (m2 = m; m2; m2 = m2->m_next)
 			plen += m2->m_len;
 		m->m_pkthdr.len = plen;
 	}
 
 	DPFPRINTF(("complete: %p(%d)\n", m, ntohs(ip->ip_len)));
 	return (m);
 
  drop_fragment:
 	/* Oops - fail safe - drop packet */
 	uma_zfree(V_pf_frent_z, frent);
 	m_freem(m);
 	return (NULL);
 }
 
 static struct mbuf *
 pf_fragcache(struct mbuf **m0, struct ip *h, struct pf_fragment **frag, int mff,
     int drop, int *nomem)
 {
 	struct mbuf		*m = *m0;
 	struct pf_frent		*frp, *fra, *cur = NULL;
 	int			 ip_len = ntohs(h->ip_len) - (h->ip_hl << 2);
 	u_int16_t		 off = ntohs(h->ip_off) << 3;
 	u_int16_t		 max = ip_len + off;
 	int			 hosed = 0;
 
 	PF_FRAG_ASSERT();
 	KASSERT((*frag == NULL || !BUFFER_FRAGMENTS(*frag)),
 	    ("!(*frag == NULL || !BUFFER_FRAGMENTS(*frag)): %s", __FUNCTION__));
 
 	/* Create a new range queue for this packet */
 	if (*frag == NULL) {
 		*frag = uma_zalloc(V_pf_frag_z, M_NOWAIT);
 		if (*frag == NULL) {
 			pf_flush_fragments();
 			*frag = uma_zalloc(V_pf_frag_z, M_NOWAIT);
 			if (*frag == NULL)
 				goto no_mem;
 		}
 
 		/* Get an entry for the queue */
 		cur = uma_zalloc(V_pf_frent_z, M_NOWAIT);
 		if (cur == NULL) {
 			uma_zfree(V_pf_frag_z, *frag);
 			*frag = NULL;
 			goto no_mem;
 		}
 
 		(*frag)->fr_flags = PFFRAG_NOBUFFER;
 		(*frag)->fr_max = 0;
 		(*frag)->fr_src = h->ip_src;
 		(*frag)->fr_dst = h->ip_dst;
 		(*frag)->fr_p = h->ip_p;
 		(*frag)->fr_id = h->ip_id;
 		(*frag)->fr_timeout = time_uptime;
 
 		cur->fr_off = off;
 		cur->fr_end = max;
 		LIST_INIT(&(*frag)->fr_queue);
 		LIST_INSERT_HEAD(&(*frag)->fr_queue, cur, fr_next);
 
 		RB_INSERT(pf_frag_tree, &V_pf_cache_tree, *frag);
 		TAILQ_INSERT_HEAD(&V_pf_cachequeue, *frag, frag_next);
 
 		DPFPRINTF(("fragcache[%d]: new %d-%d\n", h->ip_id, off, max));
 
 		goto pass;
 	}
 
 	/*
 	 * Find a fragment after the current one:
 	 *  - off contains the real shifted offset.
 	 */
 	frp = NULL;
 	LIST_FOREACH(fra, &(*frag)->fr_queue, fr_next) {
 		if (fra->fr_off > off)
 			break;
 		frp = fra;
 	}
 
 	KASSERT((frp != NULL || fra != NULL),
 	    ("!(frp != NULL || fra != NULL): %s", __FUNCTION__));
 
 	if (frp != NULL) {
 		int	precut;
 
 		precut = frp->fr_end - off;
 		if (precut >= ip_len) {
 			/* Fragment is entirely a duplicate */
 			DPFPRINTF(("fragcache[%d]: dead (%d-%d) %d-%d\n",
 			    h->ip_id, frp->fr_off, frp->fr_end, off, max));
 			goto drop_fragment;
 		}
 		if (precut == 0) {
 			/* They are adjacent.  Fixup cache entry */
 			DPFPRINTF(("fragcache[%d]: adjacent (%d-%d) %d-%d\n",
 			    h->ip_id, frp->fr_off, frp->fr_end, off, max));
 			frp->fr_end = max;
 		} else if (precut > 0) {
 			/* The first part of this payload overlaps with a
 			 * fragment that has already been passed.
 			 * Need to trim off the first part of the payload.
 			 * But to do so easily, we need to create another
 			 * mbuf to throw the original header into.
 			 */
 
 			DPFPRINTF(("fragcache[%d]: chop %d (%d-%d) %d-%d\n",
 			    h->ip_id, precut, frp->fr_off, frp->fr_end, off,
 			    max));
 
 			off += precut;
 			max -= precut;
 			/* Update the previous frag to encompass this one */
 			frp->fr_end = max;
 
 			if (!drop) {
 				/* XXX Optimization opportunity
 				 * This is a very heavy way to trim the payload.
 				 * we could do it much faster by diddling mbuf
 				 * internals but that would be even less legible
 				 * than this mbuf magic.  For my next trick,
 				 * I'll pull a rabbit out of my laptop.
 				 */
 				*m0 = m_dup(m, M_NOWAIT);
 				if (*m0 == NULL)
 					goto no_mem;
 				/* From KAME Project : We have missed this! */
 				m_adj(*m0, (h->ip_hl << 2) -
 				    (*m0)->m_pkthdr.len);
 
 				KASSERT(((*m0)->m_next == NULL),
 				    ("(*m0)->m_next != NULL: %s",
 				    __FUNCTION__));
 				m_adj(m, precut + (h->ip_hl << 2));
 				m_cat(*m0, m);
 				m = *m0;
 				if (m->m_flags & M_PKTHDR) {
 					int plen = 0;
 					struct mbuf *t;
 					for (t = m; t; t = t->m_next)
 						plen += t->m_len;
 					m->m_pkthdr.len = plen;
 				}
 
 
 				h = mtod(m, struct ip *);
 
 				KASSERT(((int)m->m_len ==
 				    ntohs(h->ip_len) - precut),
 				    ("m->m_len != ntohs(h->ip_len) - precut: %s",
 				    __FUNCTION__));
 				h->ip_off = htons(ntohs(h->ip_off) +
 				    (precut >> 3));
 				h->ip_len = htons(ntohs(h->ip_len) - precut);
 			} else {
 				hosed++;
 			}
 		} else {
 			/* There is a gap between fragments */
 
 			DPFPRINTF(("fragcache[%d]: gap %d (%d-%d) %d-%d\n",
 			    h->ip_id, -precut, frp->fr_off, frp->fr_end, off,
 			    max));
 
 			cur = uma_zalloc(V_pf_frent_z, M_NOWAIT);
 			if (cur == NULL)
 				goto no_mem;
 
 			cur->fr_off = off;
 			cur->fr_end = max;
 			LIST_INSERT_AFTER(frp, cur, fr_next);
 		}
 	}
 
 	if (fra != NULL) {
 		int	aftercut;
 		int	merge = 0;
 
 		aftercut = max - fra->fr_off;
 		if (aftercut == 0) {
 			/* Adjacent fragments */
 			DPFPRINTF(("fragcache[%d]: adjacent %d-%d (%d-%d)\n",
 			    h->ip_id, off, max, fra->fr_off, fra->fr_end));
 			fra->fr_off = off;
 			merge = 1;
 		} else if (aftercut > 0) {
 			/* Need to chop off the tail of this fragment */
 			DPFPRINTF(("fragcache[%d]: chop %d %d-%d (%d-%d)\n",
 			    h->ip_id, aftercut, off, max, fra->fr_off,
 			    fra->fr_end));
 			fra->fr_off = off;
 			max -= aftercut;
 
 			merge = 1;
 
 			if (!drop) {
 				m_adj(m, -aftercut);
 				if (m->m_flags & M_PKTHDR) {
 					int plen = 0;
 					struct mbuf *t;
 					for (t = m; t; t = t->m_next)
 						plen += t->m_len;
 					m->m_pkthdr.len = plen;
 				}
 				h = mtod(m, struct ip *);
 				KASSERT(((int)m->m_len == ntohs(h->ip_len) - aftercut),
 				    ("m->m_len != ntohs(h->ip_len) - aftercut: %s",
 				    __FUNCTION__));
 				h->ip_len = htons(ntohs(h->ip_len) - aftercut);
 			} else {
 				hosed++;
 			}
 		} else if (frp == NULL) {
 			/* There is a gap between fragments */
 			DPFPRINTF(("fragcache[%d]: gap %d %d-%d (%d-%d)\n",
 			    h->ip_id, -aftercut, off, max, fra->fr_off,
 			    fra->fr_end));
 
 			cur = uma_zalloc(V_pf_frent_z, M_NOWAIT);
 			if (cur == NULL)
 				goto no_mem;
 
 			cur->fr_off = off;
 			cur->fr_end = max;
 			LIST_INSERT_BEFORE(fra, cur, fr_next);
 		}
 
 
 		/* Need to glue together two separate fragment descriptors */
 		if (merge) {
 			if (cur && fra->fr_off <= cur->fr_end) {
 				/* Need to merge in a previous 'cur' */
 				DPFPRINTF(("fragcache[%d]: adjacent(merge "
 				    "%d-%d) %d-%d (%d-%d)\n",
 				    h->ip_id, cur->fr_off, cur->fr_end, off,
 				    max, fra->fr_off, fra->fr_end));
 				fra->fr_off = cur->fr_off;
 				LIST_REMOVE(cur, fr_next);
 				uma_zfree(V_pf_frent_z, cur);
 				cur = NULL;
 
 			} else if (frp && fra->fr_off <= frp->fr_end) {
 				/* Need to merge in a modified 'frp' */
 				KASSERT((cur == NULL), ("cur != NULL: %s",
 				    __FUNCTION__));
 				DPFPRINTF(("fragcache[%d]: adjacent(merge "
 				    "%d-%d) %d-%d (%d-%d)\n",
 				    h->ip_id, frp->fr_off, frp->fr_end, off,
 				    max, fra->fr_off, fra->fr_end));
 				fra->fr_off = frp->fr_off;
 				LIST_REMOVE(frp, fr_next);
 				uma_zfree(V_pf_frent_z, frp);
 				frp = NULL;
 
 			}
 		}
 	}
 
 	if (hosed) {
 		/*
 		 * We must keep tracking the overall fragment even when
 		 * we're going to drop it anyway so that we know when to
 		 * free the overall descriptor.  Thus we drop the frag late.
 		 */
 		goto drop_fragment;
 	}
 
 
  pass:
 	/* Update maximum data size */
 	if ((*frag)->fr_max < max)
 		(*frag)->fr_max = max;
 
 	/* This is the last segment */
 	if (!mff)
 		(*frag)->fr_flags |= PFFRAG_SEENLAST;
 
 	/* Check if we are completely reassembled */
 	if (((*frag)->fr_flags & PFFRAG_SEENLAST) &&
 	    LIST_FIRST(&(*frag)->fr_queue)->fr_off == 0 &&
 	    LIST_FIRST(&(*frag)->fr_queue)->fr_end == (*frag)->fr_max) {
 		/* Remove from fragment queue */
 		DPFPRINTF(("fragcache[%d]: done 0-%d\n", h->ip_id,
 		    (*frag)->fr_max));
 		pf_free_fragment(*frag);
 		*frag = NULL;
 	}
 
 	return (m);
 
  no_mem:
 	*nomem = 1;
 
 	/* Still need to pay attention to !IP_MF */
 	if (!mff && *frag != NULL)
 		(*frag)->fr_flags |= PFFRAG_SEENLAST;
 
 	m_freem(m);
 	return (NULL);
 
  drop_fragment:
 
 	/* Still need to pay attention to !IP_MF */
 	if (!mff && *frag != NULL)
 		(*frag)->fr_flags |= PFFRAG_SEENLAST;
 
 	if (drop) {
 		/* This fragment has been deemed bad.  Don't reass */
 		if (((*frag)->fr_flags & PFFRAG_DROP) == 0)
 			DPFPRINTF(("fragcache[%d]: dropping overall fragment\n",
 			    h->ip_id));
 		(*frag)->fr_flags |= PFFRAG_DROP;
 	}
 
 	m_freem(m);
 	return (NULL);
 }
 
 int
 pf_normalize_ip(struct mbuf **m0, int dir, struct pfi_kif *kif, u_short *reason,
     struct pf_pdesc *pd)
 {
 	struct mbuf		*m = *m0;
 	struct pf_rule		*r;
 	struct pf_frent		*frent;
 	struct pf_fragment	*frag = NULL;
 	struct ip		*h = mtod(m, struct ip *);
 	int			 mff = (ntohs(h->ip_off) & IP_MF);
 	int			 hlen = h->ip_hl << 2;
 	u_int16_t		 fragoff = (ntohs(h->ip_off) & IP_OFFMASK) << 3;
 	u_int16_t		 max;
 	int			 ip_len;
 	int			 ip_off;
 	int			 tag = -1;
 
 	PF_RULES_RASSERT();
 
 	r = TAILQ_FIRST(pf_main_ruleset.rules[PF_RULESET_SCRUB].active.ptr);
 	while (r != NULL) {
 		r->evaluations++;
 		if (pfi_kif_match(r->kif, kif) == r->ifnot)
 			r = r->skip[PF_SKIP_IFP].ptr;
 		else if (r->direction && r->direction != dir)
 			r = r->skip[PF_SKIP_DIR].ptr;
 		else if (r->af && r->af != AF_INET)
 			r = r->skip[PF_SKIP_AF].ptr;
 		else if (r->proto && r->proto != h->ip_p)
 			r = r->skip[PF_SKIP_PROTO].ptr;
 		else if (PF_MISMATCHAW(&r->src.addr,
 		    (struct pf_addr *)&h->ip_src.s_addr, AF_INET,
 		    r->src.neg, kif, M_GETFIB(m)))
 			r = r->skip[PF_SKIP_SRC_ADDR].ptr;
 		else if (PF_MISMATCHAW(&r->dst.addr,
 		    (struct pf_addr *)&h->ip_dst.s_addr, AF_INET,
 		    r->dst.neg, NULL, M_GETFIB(m)))
 			r = r->skip[PF_SKIP_DST_ADDR].ptr;
 		else if (r->match_tag && !pf_match_tag(m, r, &tag,
 		    pd->pf_mtag ? pd->pf_mtag->tag : 0))
 			r = TAILQ_NEXT(r, entries);
 		else
 			break;
 	}
 
 	if (r == NULL || r->action == PF_NOSCRUB)
 		return (PF_PASS);
 	else {
 		r->packets[dir == PF_OUT]++;
 		r->bytes[dir == PF_OUT] += pd->tot_len;
 	}
 
 	/* Check for illegal packets */
 	if (hlen < (int)sizeof(struct ip))
 		goto drop;
 
 	if (hlen > ntohs(h->ip_len))
 		goto drop;
 
 	/* Clear IP_DF if the rule uses the no-df option */
 	if (r->rule_flag & PFRULE_NODF && h->ip_off & htons(IP_DF)) {
 		u_int16_t ip_off = h->ip_off;
 
 		h->ip_off &= htons(~IP_DF);
 		h->ip_sum = pf_cksum_fixup(h->ip_sum, ip_off, h->ip_off, 0);
 	}
 
 	/* We will need other tests here */
 	if (!fragoff && !mff)
 		goto no_fragment;
 
 	/* We're dealing with a fragment now. Don't allow fragments
 	 * with IP_DF to enter the cache. If the flag was cleared by
 	 * no-df above, fine. Otherwise drop it.
 	 */
 	if (h->ip_off & htons(IP_DF)) {
 		DPFPRINTF(("IP_DF\n"));
 		goto bad;
 	}
 
 	ip_len = ntohs(h->ip_len) - hlen;
 	ip_off = (ntohs(h->ip_off) & IP_OFFMASK) << 3;
 
 	/* All fragments are 8 byte aligned */
 	if (mff && (ip_len & 0x7)) {
 		DPFPRINTF(("mff and %d\n", ip_len));
 		goto bad;
 	}
 
 	/* Respect maximum length */
 	if (fragoff + ip_len > IP_MAXPACKET) {
 		DPFPRINTF(("max packet %d\n", fragoff + ip_len));
 		goto bad;
 	}
 	max = fragoff + ip_len;
 
 	if ((r->rule_flag & (PFRULE_FRAGCROP|PFRULE_FRAGDROP)) == 0) {
 
 		/* Fully buffer all of the fragments */
 		PF_FRAG_LOCK();
 		frag = pf_find_fragment(h, &V_pf_frag_tree);
 
 		/* Check if we saw the last fragment already */
 		if (frag != NULL && (frag->fr_flags & PFFRAG_SEENLAST) &&
 		    max > frag->fr_max)
 			goto bad;
 
 		/* Get an entry for the fragment queue */
 		frent = uma_zalloc(V_pf_frent_z, M_NOWAIT);
 		if (frent == NULL) {
 			PF_FRAG_UNLOCK();
 			REASON_SET(reason, PFRES_MEMORY);
 			return (PF_DROP);
 		}
 		frent->fr_ip = h;
 		frent->fr_m = m;
 
 		/* Might return a completely reassembled mbuf, or NULL */
 		DPFPRINTF(("reass frag %d @ %d-%d\n", h->ip_id, fragoff, max));
 		*m0 = m = pf_reassemble(m0, &frag, frent, mff);
 		PF_FRAG_UNLOCK();
 
 		if (m == NULL)
 			return (PF_DROP);
 
 		/* use mtag from concatenated mbuf chain */
 		pd->pf_mtag = pf_find_mtag(m);
 #ifdef DIAGNOSTIC
 		if (pd->pf_mtag == NULL) {
 			printf("%s: pf_find_mtag returned NULL(1)\n", __func__);
 			if ((pd->pf_mtag = pf_get_mtag(m)) == NULL) {
 				m_freem(m);
 				*m0 = NULL;
 				goto no_mem;
 			}
 		}
 #endif
 		if (frag != NULL && (frag->fr_flags & PFFRAG_DROP))
 			goto drop;
 
 		h = mtod(m, struct ip *);
 	} else {
 		/* non-buffering fragment cache (drops or masks overlaps) */
 		int	nomem = 0;
 
 		if (dir == PF_OUT && pd->pf_mtag->flags & PF_TAG_FRAGCACHE) {
 			/*
 			 * Already passed the fragment cache in the
 			 * input direction.  If we continued, it would
 			 * appear to be a dup and would be dropped.
 			 */
 			goto fragment_pass;
 		}
 
 		PF_FRAG_LOCK();
 		frag = pf_find_fragment(h, &V_pf_cache_tree);
 
 		/* Check if we saw the last fragment already */
 		if (frag != NULL && (frag->fr_flags & PFFRAG_SEENLAST) &&
 		    max > frag->fr_max) {
 			if (r->rule_flag & PFRULE_FRAGDROP)
 				frag->fr_flags |= PFFRAG_DROP;
 			goto bad;
 		}
 
 		*m0 = m = pf_fragcache(m0, h, &frag, mff,
 		    (r->rule_flag & PFRULE_FRAGDROP) ? 1 : 0, &nomem);
 		PF_FRAG_UNLOCK();
 		if (m == NULL) {
 			if (nomem)
 				goto no_mem;
 			goto drop;
 		}
 
 		/* use mtag from copied and trimmed mbuf chain */
 		pd->pf_mtag = pf_find_mtag(m);
 #ifdef DIAGNOSTIC
 		if (pd->pf_mtag == NULL) {
 			printf("%s: pf_find_mtag returned NULL(2)\n", __func__);
 			if ((pd->pf_mtag = pf_get_mtag(m)) == NULL) {
 				m_freem(m);
 				*m0 = NULL;
 				goto no_mem;
 			}
 		}
 #endif
 		if (dir == PF_IN)
 			pd->pf_mtag->flags |= PF_TAG_FRAGCACHE;
 
 		if (frag != NULL && (frag->fr_flags & PFFRAG_DROP))
 			goto drop;
 		goto fragment_pass;
 	}
 
  no_fragment:
 	/* At this point, only IP_DF is allowed in ip_off */
 	if (h->ip_off & ~htons(IP_DF)) {
 		u_int16_t ip_off = h->ip_off;
 
 		h->ip_off &= htons(IP_DF);
 		h->ip_sum = pf_cksum_fixup(h->ip_sum, ip_off, h->ip_off, 0);
 	}
 
 	/* not missing a return here */
 
  fragment_pass:
 	pf_scrub_ip(&m, r->rule_flag, r->min_ttl, r->set_tos);
 
 	if ((r->rule_flag & (PFRULE_FRAGCROP|PFRULE_FRAGDROP)) == 0)
 		pd->flags |= PFDESC_IP_REAS;
 	return (PF_PASS);
 
  no_mem:
 	REASON_SET(reason, PFRES_MEMORY);
 	if (r != NULL && r->log)
 		PFLOG_PACKET(kif, m, AF_INET, dir, *reason, r, NULL, NULL, pd,
 		    1);
 	return (PF_DROP);
 
  drop:
 	REASON_SET(reason, PFRES_NORM);
 	if (r != NULL && r->log)
 		PFLOG_PACKET(kif, m, AF_INET, dir, *reason, r, NULL, NULL, pd,
 		    1);
 	return (PF_DROP);
 
  bad:
 	DPFPRINTF(("dropping bad fragment\n"));
 
 	/* Free associated fragments */
 	if (frag != NULL) {
 		pf_free_fragment(frag);
 		PF_FRAG_UNLOCK();
 	}
 
 	REASON_SET(reason, PFRES_FRAG);
 	if (r != NULL && r->log)
 		PFLOG_PACKET(kif, m, AF_INET, dir, *reason, r, NULL, NULL, pd,
 		    1);
 
 	return (PF_DROP);
 }
 #endif
 
 #ifdef INET6
 int
 pf_normalize_ip6(struct mbuf **m0, int dir, struct pfi_kif *kif,
     u_short *reason, struct pf_pdesc *pd)
 {
 	struct mbuf		*m = *m0;
 	struct pf_rule		*r;
 	struct ip6_hdr		*h = mtod(m, struct ip6_hdr *);
 	int			 off;
 	struct ip6_ext		 ext;
 	struct ip6_opt		 opt;
 	struct ip6_opt_jumbo	 jumbo;
 	struct ip6_frag		 frag;
 	u_int32_t		 jumbolen = 0, plen;
 	u_int16_t		 fragoff = 0;
 	int			 optend;
 	int			 ooff;
 	u_int8_t		 proto;
 	int			 terminal;
 
 	PF_RULES_RASSERT();
 
 	r = TAILQ_FIRST(pf_main_ruleset.rules[PF_RULESET_SCRUB].active.ptr);
 	while (r != NULL) {
 		r->evaluations++;
 		if (pfi_kif_match(r->kif, kif) == r->ifnot)
 			r = r->skip[PF_SKIP_IFP].ptr;
 		else if (r->direction && r->direction != dir)
 			r = r->skip[PF_SKIP_DIR].ptr;
 		else if (r->af && r->af != AF_INET6)
 			r = r->skip[PF_SKIP_AF].ptr;
 #if 0 /* header chain! */
 		else if (r->proto && r->proto != h->ip6_nxt)
 			r = r->skip[PF_SKIP_PROTO].ptr;
 #endif
 		else if (PF_MISMATCHAW(&r->src.addr,
 		    (struct pf_addr *)&h->ip6_src, AF_INET6,
 		    r->src.neg, kif, M_GETFIB(m)))
 			r = r->skip[PF_SKIP_SRC_ADDR].ptr;
 		else if (PF_MISMATCHAW(&r->dst.addr,
 		    (struct pf_addr *)&h->ip6_dst, AF_INET6,
 		    r->dst.neg, NULL, M_GETFIB(m)))
 			r = r->skip[PF_SKIP_DST_ADDR].ptr;
 		else
 			break;
 	}
 
 	if (r == NULL || r->action == PF_NOSCRUB)
 		return (PF_PASS);
 	else {
 		r->packets[dir == PF_OUT]++;
 		r->bytes[dir == PF_OUT] += pd->tot_len;
 	}
 
 	/* Check for illegal packets */
 	if (sizeof(struct ip6_hdr) + IPV6_MAXPACKET < m->m_pkthdr.len)
 		goto drop;
 
 	off = sizeof(struct ip6_hdr);
 	proto = h->ip6_nxt;
 	terminal = 0;
 	do {
 		switch (proto) {
 		case IPPROTO_FRAGMENT:
 			goto fragment;
 			break;
 		case IPPROTO_AH:
 		case IPPROTO_ROUTING:
 		case IPPROTO_DSTOPTS:
 			if (!pf_pull_hdr(m, off, &ext, sizeof(ext), NULL,
 			    NULL, AF_INET6))
 				goto shortpkt;
 			if (proto == IPPROTO_AH)
 				off += (ext.ip6e_len + 2) * 4;
 			else
 				off += (ext.ip6e_len + 1) * 8;
 			proto = ext.ip6e_nxt;
 			break;
 		case IPPROTO_HOPOPTS:
 			if (!pf_pull_hdr(m, off, &ext, sizeof(ext), NULL,
 			    NULL, AF_INET6))
 				goto shortpkt;
 			optend = off + (ext.ip6e_len + 1) * 8;
 			ooff = off + sizeof(ext);
 			do {
 				if (!pf_pull_hdr(m, ooff, &opt.ip6o_type,
 				    sizeof(opt.ip6o_type), NULL, NULL,
 				    AF_INET6))
 					goto shortpkt;
 				if (opt.ip6o_type == IP6OPT_PAD1) {
 					ooff++;
 					continue;
 				}
 				if (!pf_pull_hdr(m, ooff, &opt, sizeof(opt),
 				    NULL, NULL, AF_INET6))
 					goto shortpkt;
 				if (ooff + sizeof(opt) + opt.ip6o_len > optend)
 					goto drop;
 				switch (opt.ip6o_type) {
 				case IP6OPT_JUMBO:
 					if (h->ip6_plen != 0)
 						goto drop;
 					if (!pf_pull_hdr(m, ooff, &jumbo,
 					    sizeof(jumbo), NULL, NULL,
 					    AF_INET6))
 						goto shortpkt;
 					memcpy(&jumbolen, jumbo.ip6oj_jumbo_len,
 					    sizeof(jumbolen));
 					jumbolen = ntohl(jumbolen);
 					if (jumbolen <= IPV6_MAXPACKET)
 						goto drop;
 					if (sizeof(struct ip6_hdr) + jumbolen !=
 					    m->m_pkthdr.len)
 						goto drop;
 					break;
 				default:
 					break;
 				}
 				ooff += sizeof(opt) + opt.ip6o_len;
 			} while (ooff < optend);
 
 			off = optend;
 			proto = ext.ip6e_nxt;
 			break;
 		default:
 			terminal = 1;
 			break;
 		}
 	} while (!terminal);
 
 	/* jumbo payload option must be present, or plen > 0 */
 	if (ntohs(h->ip6_plen) == 0)
 		plen = jumbolen;
 	else
 		plen = ntohs(h->ip6_plen);
 	if (plen == 0)
 		goto drop;
 	if (sizeof(struct ip6_hdr) + plen > m->m_pkthdr.len)
 		goto shortpkt;
 
 	pf_scrub_ip6(&m, r->min_ttl);
 
 	return (PF_PASS);
 
  fragment:
 	if (ntohs(h->ip6_plen) == 0 || jumbolen)
 		goto drop;
 	plen = ntohs(h->ip6_plen);
 
 	if (!pf_pull_hdr(m, off, &frag, sizeof(frag), NULL, NULL, AF_INET6))
 		goto shortpkt;
 	fragoff = ntohs(frag.ip6f_offlg & IP6F_OFF_MASK);
 	if (fragoff + (plen - off - sizeof(frag)) > IPV6_MAXPACKET)
 		goto badfrag;
 
 	/* do something about it */
 	/* remember to set pd->flags |= PFDESC_IP_REAS */
 	return (PF_PASS);
 
  shortpkt:
 	REASON_SET(reason, PFRES_SHORT);
 	if (r != NULL && r->log)
 		PFLOG_PACKET(kif, m, AF_INET6, dir, *reason, r, NULL, NULL, pd,
 		    1);
 	return (PF_DROP);
 
  drop:
 	REASON_SET(reason, PFRES_NORM);
 	if (r != NULL && r->log)
 		PFLOG_PACKET(kif, m, AF_INET6, dir, *reason, r, NULL, NULL, pd,
 		    1);
 	return (PF_DROP);
 
  badfrag:
 	REASON_SET(reason, PFRES_FRAG);
 	if (r != NULL && r->log)
 		PFLOG_PACKET(kif, m, AF_INET6, dir, *reason, r, NULL, NULL, pd,
 		    1);
 	return (PF_DROP);
 }
 #endif /* INET6 */
 
 int
 pf_normalize_tcp(int dir, struct pfi_kif *kif, struct mbuf *m, int ipoff,
     int off, void *h, struct pf_pdesc *pd)
 {
 	struct pf_rule	*r, *rm = NULL;
 	struct tcphdr	*th = pd->hdr.tcp;
 	int		 rewrite = 0;
 	u_short		 reason;
 	u_int8_t	 flags;
 	sa_family_t	 af = pd->af;
 
 	PF_RULES_RASSERT();
 
 	r = TAILQ_FIRST(pf_main_ruleset.rules[PF_RULESET_SCRUB].active.ptr);
 	while (r != NULL) {
 		r->evaluations++;
 		if (pfi_kif_match(r->kif, kif) == r->ifnot)
 			r = r->skip[PF_SKIP_IFP].ptr;
 		else if (r->direction && r->direction != dir)
 			r = r->skip[PF_SKIP_DIR].ptr;
 		else if (r->af && r->af != af)
 			r = r->skip[PF_SKIP_AF].ptr;
 		else if (r->proto && r->proto != pd->proto)
 			r = r->skip[PF_SKIP_PROTO].ptr;
 		else if (PF_MISMATCHAW(&r->src.addr, pd->src, af,
 		    r->src.neg, kif, M_GETFIB(m)))
 			r = r->skip[PF_SKIP_SRC_ADDR].ptr;
 		else if (r->src.port_op && !pf_match_port(r->src.port_op,
 			    r->src.port[0], r->src.port[1], th->th_sport))
 			r = r->skip[PF_SKIP_SRC_PORT].ptr;
 		else if (PF_MISMATCHAW(&r->dst.addr, pd->dst, af,
 		    r->dst.neg, NULL, M_GETFIB(m)))
 			r = r->skip[PF_SKIP_DST_ADDR].ptr;
 		else if (r->dst.port_op && !pf_match_port(r->dst.port_op,
 			    r->dst.port[0], r->dst.port[1], th->th_dport))
 			r = r->skip[PF_SKIP_DST_PORT].ptr;
 		else if (r->os_fingerprint != PF_OSFP_ANY && !pf_osfp_match(
 			    pf_osfp_fingerprint(pd, m, off, th),
 			    r->os_fingerprint))
 			r = TAILQ_NEXT(r, entries);
 		else {
 			rm = r;
 			break;
 		}
 	}
 
 	if (rm == NULL || rm->action == PF_NOSCRUB)
 		return (PF_PASS);
 	else {
 		r->packets[dir == PF_OUT]++;
 		r->bytes[dir == PF_OUT] += pd->tot_len;
 	}
 
 	if (rm->rule_flag & PFRULE_REASSEMBLE_TCP)
 		pd->flags |= PFDESC_TCP_NORM;
 
 	flags = th->th_flags;
 	if (flags & TH_SYN) {
 		/* Illegal packet */
 		if (flags & TH_RST)
 			goto tcp_drop;
 
 		if (flags & TH_FIN)
 			flags &= ~TH_FIN;
 	} else {
 		/* Illegal packet */
 		if (!(flags & (TH_ACK|TH_RST)))
 			goto tcp_drop;
 	}
 
 	if (!(flags & TH_ACK)) {
 		/* These flags are only valid if ACK is set */
 		if ((flags & TH_FIN) || (flags & TH_PUSH) || (flags & TH_URG))
 			goto tcp_drop;
 	}
 
 	/* Check for illegal header length */
 	if (th->th_off < (sizeof(struct tcphdr) >> 2))
 		goto tcp_drop;
 
 	/* If flags changed, or reserved data set, then adjust */
 	if (flags != th->th_flags || th->th_x2 != 0) {
 		u_int16_t	ov, nv;
 
 		ov = *(u_int16_t *)(&th->th_ack + 1);
 		th->th_flags = flags;
 		th->th_x2 = 0;
 		nv = *(u_int16_t *)(&th->th_ack + 1);
 
 		th->th_sum = pf_cksum_fixup(th->th_sum, ov, nv, 0);
 		rewrite = 1;
 	}
 
 	/* Remove urgent pointer, if TH_URG is not set */
 	if (!(flags & TH_URG) && th->th_urp) {
 		th->th_sum = pf_cksum_fixup(th->th_sum, th->th_urp, 0, 0);
 		th->th_urp = 0;
 		rewrite = 1;
 	}
 
 	/* Process options */
 	if (r->max_mss && pf_normalize_tcpopt(r, m, th, off, pd->af))
 		rewrite = 1;
 
 	/* copy back packet headers if we sanitized */
 	if (rewrite)
 		m_copyback(m, off, sizeof(*th), (caddr_t)th);
 
 	return (PF_PASS);
 
  tcp_drop:
 	REASON_SET(&reason, PFRES_NORM);
 	if (rm != NULL && r->log)
 		PFLOG_PACKET(kif, m, AF_INET, dir, reason, r, NULL, NULL, pd,
 		    1);
 	return (PF_DROP);
 }
 
 int
 pf_normalize_tcp_init(struct mbuf *m, int off, struct pf_pdesc *pd,
     struct tcphdr *th, struct pf_state_peer *src, struct pf_state_peer *dst)
 {
 	u_int32_t tsval, tsecr;
 	u_int8_t hdr[60];
 	u_int8_t *opt;
 
 	KASSERT((src->scrub == NULL),
 	    ("pf_normalize_tcp_init: src->scrub != NULL"));
 
 	src->scrub = uma_zalloc(V_pf_state_scrub_z, M_ZERO | M_NOWAIT);
 	if (src->scrub == NULL)
 		return (1);
 
 	switch (pd->af) {
 #ifdef INET
 	case AF_INET: {
 		struct ip *h = mtod(m, struct ip *);
 		src->scrub->pfss_ttl = h->ip_ttl;
 		break;
 	}
 #endif /* INET */
 #ifdef INET6
 	case AF_INET6: {
 		struct ip6_hdr *h = mtod(m, struct ip6_hdr *);
 		src->scrub->pfss_ttl = h->ip6_hlim;
 		break;
 	}
 #endif /* INET6 */
 	}
 
 
 	/*
 	 * All normalizations below are only begun if we see the start of
 	 * the connections.  They must all set an enabled bit in pfss_flags
 	 */
 	if ((th->th_flags & TH_SYN) == 0)
 		return (0);
 
 
 	if (th->th_off > (sizeof(struct tcphdr) >> 2) && src->scrub &&
 	    pf_pull_hdr(m, off, hdr, th->th_off << 2, NULL, NULL, pd->af)) {
 		/* Diddle with TCP options */
 		int hlen;
 		opt = hdr + sizeof(struct tcphdr);
 		hlen = (th->th_off << 2) - sizeof(struct tcphdr);
 		while (hlen >= TCPOLEN_TIMESTAMP) {
 			switch (*opt) {
 			case TCPOPT_EOL:	/* FALLTHROUGH */
 			case TCPOPT_NOP:
 				opt++;
 				hlen--;
 				break;
 			case TCPOPT_TIMESTAMP:
 				if (opt[1] >= TCPOLEN_TIMESTAMP) {
 					src->scrub->pfss_flags |=
 					    PFSS_TIMESTAMP;
 					src->scrub->pfss_ts_mod =
 					    htonl(arc4random());
 
 					/* note PFSS_PAWS not set yet */
 					memcpy(&tsval, &opt[2],
 					    sizeof(u_int32_t));
 					memcpy(&tsecr, &opt[6],
 					    sizeof(u_int32_t));
 					src->scrub->pfss_tsval0 = ntohl(tsval);
 					src->scrub->pfss_tsval = ntohl(tsval);
 					src->scrub->pfss_tsecr = ntohl(tsecr);
 					getmicrouptime(&src->scrub->pfss_last);
 				}
 				/* FALLTHROUGH */
 			default:
 				hlen -= MAX(opt[1], 2);
 				opt += MAX(opt[1], 2);
 				break;
 			}
 		}
 	}
 
 	return (0);
 }
 
 void
 pf_normalize_tcp_cleanup(struct pf_state *state)
 {
 	if (state->src.scrub)
 		uma_zfree(V_pf_state_scrub_z, state->src.scrub);
 	if (state->dst.scrub)
 		uma_zfree(V_pf_state_scrub_z, state->dst.scrub);
 
 	/* Someday... flush the TCP segment reassembly descriptors. */
 }
 
 int
 pf_normalize_tcp_stateful(struct mbuf *m, int off, struct pf_pdesc *pd,
     u_short *reason, struct tcphdr *th, struct pf_state *state,
     struct pf_state_peer *src, struct pf_state_peer *dst, int *writeback)
 {
 	struct timeval uptime;
 	u_int32_t tsval, tsecr;
 	u_int tsval_from_last;
 	u_int8_t hdr[60];
 	u_int8_t *opt;
 	int copyback = 0;
 	int got_ts = 0;
 
 	KASSERT((src->scrub || dst->scrub),
 	    ("%s: src->scrub && dst->scrub!", __func__));
 
 	/*
 	 * Enforce the minimum TTL seen for this connection.  Negate a common
 	 * technique to evade an intrusion detection system and confuse
 	 * firewall state code.
 	 */
 	switch (pd->af) {
 #ifdef INET
 	case AF_INET: {
 		if (src->scrub) {
 			struct ip *h = mtod(m, struct ip *);
 			if (h->ip_ttl > src->scrub->pfss_ttl)
 				src->scrub->pfss_ttl = h->ip_ttl;
 			h->ip_ttl = src->scrub->pfss_ttl;
 		}
 		break;
 	}
 #endif /* INET */
 #ifdef INET6
 	case AF_INET6: {
 		if (src->scrub) {
 			struct ip6_hdr *h = mtod(m, struct ip6_hdr *);
 			if (h->ip6_hlim > src->scrub->pfss_ttl)
 				src->scrub->pfss_ttl = h->ip6_hlim;
 			h->ip6_hlim = src->scrub->pfss_ttl;
 		}
 		break;
 	}
 #endif /* INET6 */
 	}
 
 	if (th->th_off > (sizeof(struct tcphdr) >> 2) &&
 	    ((src->scrub && (src->scrub->pfss_flags & PFSS_TIMESTAMP)) ||
 	    (dst->scrub && (dst->scrub->pfss_flags & PFSS_TIMESTAMP))) &&
 	    pf_pull_hdr(m, off, hdr, th->th_off << 2, NULL, NULL, pd->af)) {
 		/* Diddle with TCP options */
 		int hlen;
 		opt = hdr + sizeof(struct tcphdr);
 		hlen = (th->th_off << 2) - sizeof(struct tcphdr);
 		while (hlen >= TCPOLEN_TIMESTAMP) {
 			switch (*opt) {
 			case TCPOPT_EOL:	/* FALLTHROUGH */
 			case TCPOPT_NOP:
 				opt++;
 				hlen--;
 				break;
 			case TCPOPT_TIMESTAMP:
 				/* Modulate the timestamps.  Can be used for
 				 * NAT detection, OS uptime determination or
 				 * reboot detection.
 				 */
 
 				if (got_ts) {
 					/* Huh?  Multiple timestamps!? */
 					if (V_pf_status.debug >= PF_DEBUG_MISC) {
 						DPFPRINTF(("multiple TS??"));
 						pf_print_state(state);
 						printf("\n");
 					}
 					REASON_SET(reason, PFRES_TS);
 					return (PF_DROP);
 				}
 				if (opt[1] >= TCPOLEN_TIMESTAMP) {
 					memcpy(&tsval, &opt[2],
 					    sizeof(u_int32_t));
 					if (tsval && src->scrub &&
 					    (src->scrub->pfss_flags &
 					    PFSS_TIMESTAMP)) {
 						tsval = ntohl(tsval);
 						pf_change_a(&opt[2],
 						    &th->th_sum,
 						    htonl(tsval +
 						    src->scrub->pfss_ts_mod),
 						    0);
 						copyback = 1;
 					}
 
 					/* Modulate TS reply iff valid (!0) */
 					memcpy(&tsecr, &opt[6],
 					    sizeof(u_int32_t));
 					if (tsecr && dst->scrub &&
 					    (dst->scrub->pfss_flags &
 					    PFSS_TIMESTAMP)) {
 						tsecr = ntohl(tsecr)
 						    - dst->scrub->pfss_ts_mod;
 						pf_change_a(&opt[6],
 						    &th->th_sum, htonl(tsecr),
 						    0);
 						copyback = 1;
 					}
 					got_ts = 1;
 				}
 				/* FALLTHROUGH */
 			default:
 				hlen -= MAX(opt[1], 2);
 				opt += MAX(opt[1], 2);
 				break;
 			}
 		}
 		if (copyback) {
 			/* Copyback the options, caller copys back header */
 			*writeback = 1;
 			m_copyback(m, off + sizeof(struct tcphdr),
 			    (th->th_off << 2) - sizeof(struct tcphdr), hdr +
 			    sizeof(struct tcphdr));
 		}
 	}
 
 
 	/*
 	 * Must invalidate PAWS checks on connections idle for too long.
 	 * The fastest allowed timestamp clock is 1ms.  That turns out to
 	 * be about 24 days before it wraps.  XXX Right now our lowerbound
 	 * TS echo check only works for the first 12 days of a connection
 	 * when the TS has exhausted half its 32bit space
 	 */
 #define TS_MAX_IDLE	(24*24*60*60)
 #define TS_MAX_CONN	(12*24*60*60)	/* XXX remove when better tsecr check */
 
 	getmicrouptime(&uptime);
 	if (src->scrub && (src->scrub->pfss_flags & PFSS_PAWS) &&
 	    (uptime.tv_sec - src->scrub->pfss_last.tv_sec > TS_MAX_IDLE ||
 	    time_uptime - state->creation > TS_MAX_CONN))  {
 		if (V_pf_status.debug >= PF_DEBUG_MISC) {
 			DPFPRINTF(("src idled out of PAWS\n"));
 			pf_print_state(state);
 			printf("\n");
 		}
 		src->scrub->pfss_flags = (src->scrub->pfss_flags & ~PFSS_PAWS)
 		    | PFSS_PAWS_IDLED;
 	}
 	if (dst->scrub && (dst->scrub->pfss_flags & PFSS_PAWS) &&
 	    uptime.tv_sec - dst->scrub->pfss_last.tv_sec > TS_MAX_IDLE) {
 		if (V_pf_status.debug >= PF_DEBUG_MISC) {
 			DPFPRINTF(("dst idled out of PAWS\n"));
 			pf_print_state(state);
 			printf("\n");
 		}
 		dst->scrub->pfss_flags = (dst->scrub->pfss_flags & ~PFSS_PAWS)
 		    | PFSS_PAWS_IDLED;
 	}
 
 	if (got_ts && src->scrub && dst->scrub &&
 	    (src->scrub->pfss_flags & PFSS_PAWS) &&
 	    (dst->scrub->pfss_flags & PFSS_PAWS)) {
 		/* Validate that the timestamps are "in-window".
 		 * RFC1323 describes TCP Timestamp options that allow
 		 * measurement of RTT (round trip time) and PAWS
 		 * (protection against wrapped sequence numbers).  PAWS
 		 * gives us a set of rules for rejecting packets on
 		 * long fat pipes (packets that were somehow delayed
 		 * in transit longer than the time it took to send the
 		 * full TCP sequence space of 4Gb).  We can use these
 		 * rules and infer a few others that will let us treat
 		 * the 32bit timestamp and the 32bit echoed timestamp
 		 * as sequence numbers to prevent a blind attacker from
 		 * inserting packets into a connection.
 		 *
 		 * RFC1323 tells us:
 		 *  - The timestamp on this packet must be greater than
 		 *    or equal to the last value echoed by the other
 		 *    endpoint.  The RFC says those will be discarded
 		 *    since it is a dup that has already been acked.
 		 *    This gives us a lowerbound on the timestamp.
 		 *        timestamp >= other last echoed timestamp
 		 *  - The timestamp will be less than or equal to
 		 *    the last timestamp plus the time between the
 		 *    last packet and now.  The RFC defines the max
 		 *    clock rate as 1ms.  We will allow clocks to be
 		 *    up to 10% fast and will allow a total difference
 		 *    or 30 seconds due to a route change.  And this
 		 *    gives us an upperbound on the timestamp.
 		 *        timestamp <= last timestamp + max ticks
 		 *    We have to be careful here.  Windows will send an
 		 *    initial timestamp of zero and then initialize it
 		 *    to a random value after the 3whs; presumably to
 		 *    avoid a DoS by having to call an expensive RNG
 		 *    during a SYN flood.  Proof MS has at least one
 		 *    good security geek.
 		 *
 		 *  - The TCP timestamp option must also echo the other
 		 *    endpoints timestamp.  The timestamp echoed is the
 		 *    one carried on the earliest unacknowledged segment
 		 *    on the left edge of the sequence window.  The RFC
 		 *    states that the host will reject any echoed
 		 *    timestamps that were larger than any ever sent.
 		 *    This gives us an upperbound on the TS echo.
 		 *        tescr <= largest_tsval
 		 *  - The lowerbound on the TS echo is a little more
 		 *    tricky to determine.  The other endpoint's echoed
 		 *    values will not decrease.  But there may be
 		 *    network conditions that re-order packets and
 		 *    cause our view of them to decrease.  For now the
 		 *    only lowerbound we can safely determine is that
 		 *    the TS echo will never be less than the original
 		 *    TS.  XXX There is probably a better lowerbound.
 		 *    Remove TS_MAX_CONN with better lowerbound check.
 		 *        tescr >= other original TS
 		 *
 		 * It is also important to note that the fastest
 		 * timestamp clock of 1ms will wrap its 32bit space in
 		 * 24 days.  So we just disable TS checking after 24
 		 * days of idle time.  We actually must use a 12d
 		 * connection limit until we can come up with a better
 		 * lowerbound to the TS echo check.
 		 */
 		struct timeval delta_ts;
 		int ts_fudge;
 
 
 		/*
 		 * PFTM_TS_DIFF is how many seconds of leeway to allow
 		 * a host's timestamp.  This can happen if the previous
 		 * packet got delayed in transit for much longer than
 		 * this packet.
 		 */
 		if ((ts_fudge = state->rule.ptr->timeout[PFTM_TS_DIFF]) == 0)
 			ts_fudge = V_pf_default_rule.timeout[PFTM_TS_DIFF];
 
 		/* Calculate max ticks since the last timestamp */
 #define TS_MAXFREQ	1100		/* RFC max TS freq of 1Khz + 10% skew */
 #define TS_MICROSECS	1000000		/* microseconds per second */
 		delta_ts = uptime;
 		timevalsub(&delta_ts, &src->scrub->pfss_last);
 		tsval_from_last = (delta_ts.tv_sec + ts_fudge) * TS_MAXFREQ;
 		tsval_from_last += delta_ts.tv_usec / (TS_MICROSECS/TS_MAXFREQ);
 
 		if ((src->state >= TCPS_ESTABLISHED &&
 		    dst->state >= TCPS_ESTABLISHED) &&
 		    (SEQ_LT(tsval, dst->scrub->pfss_tsecr) ||
 		    SEQ_GT(tsval, src->scrub->pfss_tsval + tsval_from_last) ||
 		    (tsecr && (SEQ_GT(tsecr, dst->scrub->pfss_tsval) ||
 		    SEQ_LT(tsecr, dst->scrub->pfss_tsval0))))) {
 			/* Bad RFC1323 implementation or an insertion attack.
 			 *
 			 * - Solaris 2.6 and 2.7 are known to send another ACK
 			 *   after the FIN,FIN|ACK,ACK closing that carries
 			 *   an old timestamp.
 			 */
 
 			DPFPRINTF(("Timestamp failed %c%c%c%c\n",
 			    SEQ_LT(tsval, dst->scrub->pfss_tsecr) ? '0' : ' ',
 			    SEQ_GT(tsval, src->scrub->pfss_tsval +
 			    tsval_from_last) ? '1' : ' ',
 			    SEQ_GT(tsecr, dst->scrub->pfss_tsval) ? '2' : ' ',
 			    SEQ_LT(tsecr, dst->scrub->pfss_tsval0)? '3' : ' '));
 			DPFPRINTF((" tsval: %u  tsecr: %u  +ticks: %u  "
 			    "idle: %jus %lums\n",
 			    tsval, tsecr, tsval_from_last,
 			    (uintmax_t)delta_ts.tv_sec,
 			    delta_ts.tv_usec / 1000));
 			DPFPRINTF((" src->tsval: %u  tsecr: %u\n",
 			    src->scrub->pfss_tsval, src->scrub->pfss_tsecr));
 			DPFPRINTF((" dst->tsval: %u  tsecr: %u  tsval0: %u"
 			    "\n", dst->scrub->pfss_tsval,
 			    dst->scrub->pfss_tsecr, dst->scrub->pfss_tsval0));
 			if (V_pf_status.debug >= PF_DEBUG_MISC) {
 				pf_print_state(state);
 				pf_print_flags(th->th_flags);
 				printf("\n");
 			}
 			REASON_SET(reason, PFRES_TS);
 			return (PF_DROP);
 		}
 
 		/* XXX I'd really like to require tsecr but it's optional */
 
 	} else if (!got_ts && (th->th_flags & TH_RST) == 0 &&
 	    ((src->state == TCPS_ESTABLISHED && dst->state == TCPS_ESTABLISHED)
 	    || pd->p_len > 0 || (th->th_flags & TH_SYN)) &&
 	    src->scrub && dst->scrub &&
 	    (src->scrub->pfss_flags & PFSS_PAWS) &&
 	    (dst->scrub->pfss_flags & PFSS_PAWS)) {
 		/* Didn't send a timestamp.  Timestamps aren't really useful
 		 * when:
 		 *  - connection opening or closing (often not even sent).
 		 *    but we must not let an attacker to put a FIN on a
 		 *    data packet to sneak it through our ESTABLISHED check.
 		 *  - on a TCP reset.  RFC suggests not even looking at TS.
 		 *  - on an empty ACK.  The TS will not be echoed so it will
 		 *    probably not help keep the RTT calculation in sync and
 		 *    there isn't as much danger when the sequence numbers
 		 *    got wrapped.  So some stacks don't include TS on empty
 		 *    ACKs :-(
 		 *
 		 * To minimize the disruption to mostly RFC1323 conformant
 		 * stacks, we will only require timestamps on data packets.
 		 *
 		 * And what do ya know, we cannot require timestamps on data
 		 * packets.  There appear to be devices that do legitimate
 		 * TCP connection hijacking.  There are HTTP devices that allow
 		 * a 3whs (with timestamps) and then buffer the HTTP request.
 		 * If the intermediate device has the HTTP response cache, it
 		 * will spoof the response but not bother timestamping its
 		 * packets.  So we can look for the presence of a timestamp in
 		 * the first data packet and if there, require it in all future
 		 * packets.
 		 */
 
 		if (pd->p_len > 0 && (src->scrub->pfss_flags & PFSS_DATA_TS)) {
 			/*
 			 * Hey!  Someone tried to sneak a packet in.  Or the
 			 * stack changed its RFC1323 behavior?!?!
 			 */
 			if (V_pf_status.debug >= PF_DEBUG_MISC) {
 				DPFPRINTF(("Did not receive expected RFC1323 "
 				    "timestamp\n"));
 				pf_print_state(state);
 				pf_print_flags(th->th_flags);
 				printf("\n");
 			}
 			REASON_SET(reason, PFRES_TS);
 			return (PF_DROP);
 		}
 	}
 
 
 	/*
 	 * We will note if a host sends his data packets with or without
 	 * timestamps.  And require all data packets to contain a timestamp
 	 * if the first does.  PAWS implicitly requires that all data packets be
 	 * timestamped.  But I think there are middle-man devices that hijack
 	 * TCP streams immediately after the 3whs and don't timestamp their
 	 * packets (seen in a WWW accelerator or cache).
 	 */
 	if (pd->p_len > 0 && src->scrub && (src->scrub->pfss_flags &
 	    (PFSS_TIMESTAMP|PFSS_DATA_TS|PFSS_DATA_NOTS)) == PFSS_TIMESTAMP) {
 		if (got_ts)
 			src->scrub->pfss_flags |= PFSS_DATA_TS;
 		else {
 			src->scrub->pfss_flags |= PFSS_DATA_NOTS;
 			if (V_pf_status.debug >= PF_DEBUG_MISC && dst->scrub &&
 			    (dst->scrub->pfss_flags & PFSS_TIMESTAMP)) {
 				/* Don't warn if other host rejected RFC1323 */
 				DPFPRINTF(("Broken RFC1323 stack did not "
 				    "timestamp data packet. Disabled PAWS "
 				    "security.\n"));
 				pf_print_state(state);
 				pf_print_flags(th->th_flags);
 				printf("\n");
 			}
 		}
 	}
 
 
 	/*
 	 * Update PAWS values
 	 */
 	if (got_ts && src->scrub && PFSS_TIMESTAMP == (src->scrub->pfss_flags &
 	    (PFSS_PAWS_IDLED|PFSS_TIMESTAMP))) {
 		getmicrouptime(&src->scrub->pfss_last);
 		if (SEQ_GEQ(tsval, src->scrub->pfss_tsval) ||
 		    (src->scrub->pfss_flags & PFSS_PAWS) == 0)
 			src->scrub->pfss_tsval = tsval;
 
 		if (tsecr) {
 			if (SEQ_GEQ(tsecr, src->scrub->pfss_tsecr) ||
 			    (src->scrub->pfss_flags & PFSS_PAWS) == 0)
 				src->scrub->pfss_tsecr = tsecr;
 
 			if ((src->scrub->pfss_flags & PFSS_PAWS) == 0 &&
 			    (SEQ_LT(tsval, src->scrub->pfss_tsval0) ||
 			    src->scrub->pfss_tsval0 == 0)) {
 				/* tsval0 MUST be the lowest timestamp */
 				src->scrub->pfss_tsval0 = tsval;
 			}
 
 			/* Only fully initialized after a TS gets echoed */
 			if ((src->scrub->pfss_flags & PFSS_PAWS) == 0)
 				src->scrub->pfss_flags |= PFSS_PAWS;
 		}
 	}
 
 	/* I have a dream....  TCP segment reassembly.... */
 	return (0);
 }
 
 static int
 pf_normalize_tcpopt(struct pf_rule *r, struct mbuf *m, struct tcphdr *th,
     int off, sa_family_t af)
 {
 	u_int16_t	*mss;
 	int		 thoff;
 	int		 opt, cnt, optlen = 0;
 	int		 rewrite = 0;
 	u_char		 opts[TCP_MAXOLEN];
 	u_char		*optp = opts;
 
 	thoff = th->th_off << 2;
 	cnt = thoff - sizeof(struct tcphdr);
 
 	if (cnt > 0 && !pf_pull_hdr(m, off + sizeof(*th), opts, cnt,
 	    NULL, NULL, af))
 		return (rewrite);
 
 	for (; cnt > 0; cnt -= optlen, optp += optlen) {
 		opt = optp[0];
 		if (opt == TCPOPT_EOL)
 			break;
 		if (opt == TCPOPT_NOP)
 			optlen = 1;
 		else {
 			if (cnt < 2)
 				break;
 			optlen = optp[1];
 			if (optlen < 2 || optlen > cnt)
 				break;
 		}
 		switch (opt) {
 		case TCPOPT_MAXSEG:
 			mss = (u_int16_t *)(optp + 2);
 			if ((ntohs(*mss)) > r->max_mss) {
 				th->th_sum = pf_cksum_fixup(th->th_sum,
 				    *mss, htons(r->max_mss), 0);
 				*mss = htons(r->max_mss);
 				rewrite = 1;
 			}
 			break;
 		default:
 			break;
 		}
 	}
 
 	if (rewrite)
 		m_copyback(m, off + sizeof(*th), thoff - sizeof(*th), opts);
 
 	return (rewrite);
 }
 
 #ifdef INET
 static void
 pf_scrub_ip(struct mbuf **m0, u_int32_t flags, u_int8_t min_ttl, u_int8_t tos)
 {
 	struct mbuf		*m = *m0;
 	struct ip		*h = mtod(m, struct ip *);
 
 	/* Clear IP_DF if no-df was requested */
 	if (flags & PFRULE_NODF && h->ip_off & htons(IP_DF)) {
 		u_int16_t ip_off = h->ip_off;
 
 		h->ip_off &= htons(~IP_DF);
 		h->ip_sum = pf_cksum_fixup(h->ip_sum, ip_off, h->ip_off, 0);
 	}
 
 	/* Enforce a minimum ttl, may cause endless packet loops */
 	if (min_ttl && h->ip_ttl < min_ttl) {
 		u_int16_t ip_ttl = h->ip_ttl;
 
 		h->ip_ttl = min_ttl;
 		h->ip_sum = pf_cksum_fixup(h->ip_sum, ip_ttl, h->ip_ttl, 0);
 	}
 
 	/* Enforce tos */
 	if (flags & PFRULE_SET_TOS) {
 		u_int16_t	ov, nv;
 
 		ov = *(u_int16_t *)h;
 		h->ip_tos = tos;
 		nv = *(u_int16_t *)h;
 
 		h->ip_sum = pf_cksum_fixup(h->ip_sum, ov, nv, 0);
 	}
 
 	/* random-id, but not for fragments */
 	if (flags & PFRULE_RANDOMID && !(h->ip_off & ~htons(IP_DF))) {
 		u_int16_t ip_id = h->ip_id;
 
 		h->ip_id = ip_randomid();
 		h->ip_sum = pf_cksum_fixup(h->ip_sum, ip_id, h->ip_id, 0);
 	}
 }
 #endif /* INET */
 
 #ifdef INET6
 static void
 pf_scrub_ip6(struct mbuf **m0, u_int8_t min_ttl)
 {
 	struct mbuf		*m = *m0;
 	struct ip6_hdr		*h = mtod(m, struct ip6_hdr *);
 
 	/* Enforce a minimum ttl, may cause endless packet loops */
 	if (min_ttl && h->ip6_hlim < min_ttl)
 		h->ip6_hlim = min_ttl;
 }
 #endif
Index: stable/10/usr.bin/kdump/Makefile
===================================================================
--- stable/10/usr.bin/kdump/Makefile	(revision 263085)
+++ stable/10/usr.bin/kdump/Makefile	(revision 263086)
@@ -1,40 +1,46 @@
 #	@(#)Makefile	8.1 (Berkeley) 6/6/93
 # $FreeBSD$
 
+.include <bsd.own.mk>
+
 .if (${MACHINE_ARCH} == "amd64")
 SFX=		32
 .endif
 
 .PATH: ${.CURDIR}/../ktrace
 
 PROG=		kdump
 SRCS=		kdump_subr.c kdump.c ioctl.c subr.c
 DPSRCS=		kdump_subr.h 
 CFLAGS+=	-I${.CURDIR}/../ktrace -I${.CURDIR} -I${.CURDIR}/../.. -I.
+
+.if ${MK_PF} != "no"
+CFLAGS+=-DPF
+.endif
 
 .if ${MACHINE_ARCH} == "amd64" || ${MACHINE_ARCH} == "i386"
 SRCS+=		linux_syscalls.c
 .endif
 
 NO_WERROR?=	YES
 
 CLEANFILES=	ioctl.c kdump_subr.c kdump_subr.h linux_syscalls.c
 
 ioctl.c: mkioctls
 	env MACHINE=${MACHINE} CPP="${CPP}" \
 	    sh ${.CURDIR}/mkioctls print ${DESTDIR}/usr/include > ${.TARGET}
 
 kdump_subr.h: mksubr
 	sh ${.CURDIR}/mksubr ${DESTDIR}/usr/include | \
 	    sed -n 's/^\([a-z].*)\)$$/void \1;/p' >${.TARGET}
 
 kdump_subr.c: mksubr kdump_subr.h
 	sh ${.CURDIR}/mksubr ${DESTDIR}/usr/include >${.TARGET}
 
 linux_syscalls.c:
 	/bin/sh ${.CURDIR}/../../sys/kern/makesyscalls.sh \
 	    ${.CURDIR}/../../sys/${MACHINE_ARCH}/linux${SFX}/syscalls.master ${.CURDIR}/linux_syscalls.conf
 	echo "int nlinux_syscalls = sizeof(linux_syscallnames) / sizeof(linux_syscallnames[0]);" \
 	    >> linux_syscalls.c
 
 .include <bsd.prog.mk>
Index: stable/10/usr.bin/kdump/mkioctls
===================================================================
--- stable/10/usr.bin/kdump/mkioctls	(revision 263085)
+++ stable/10/usr.bin/kdump/mkioctls	(revision 263086)
@@ -1,115 +1,119 @@
 #!/bin/sh
 #
 # $FreeBSD$
 #
 # When editing this script, keep in mind that truss also uses it.
 #
 
 set -e
 
 if [ $# -ne 2 -o \( $1 != "print" -a $1 != "return" \) ]; then
 	echo "usage: sh $0 print|return include-dir"
 	exit 1
 fi
 
 style="$1"
 includedir="$2"
 
 LC_ALL=C; export LC_ALL
 
 # Build a list of headers that have ioctls in them.
 # XXX should we use an ANSI cpp?
 ioctl_includes=$(
 	cd $includedir
-	find -H -s * -name '*.h' | grep -v '.*disk.*\.h' | \
+	find -H -s * -name '*.h' | \
+	egrep -v '(.*disk.*|net/pfvar|net/if_pfsync)\.h' | \
 		xargs egrep -l \
 '^#[ 	]*define[ 	]+[A-Za-z_][A-Za-z0-9_]*[ 	]+_IO[^a-z0-9_]' |
 		awk '{printf("#include <%s>\\n", $1)}'
 )
 
 : ${MACHINE=$(uname -m)}
 case "${MACHINE}" in
 *pc98*)
 	ioctl_includes="$ioctl_includes#include <sys/diskpc98.h>\\n"
 	;;
 *)
 	ioctl_includes="$ioctl_includes#include <sys/diskmbr.h>\\n"
 	;;
 esac
 
 awk -v x="$ioctl_includes" 'BEGIN {print x}' |
 	$CPP -I$1 -dM -DCOMPAT_43TTY - |
 	awk -v ioctl_includes="$ioctl_includes" -v style="$style" '
 BEGIN {
 	print "/* XXX obnoxious prerequisites. */"
 	print "#define COMPAT_43"
 	print "#define COMPAT_43TTY"
 	print "#include <sys/param.h>"
 	print "#include <sys/devicestat.h>"
 	print "#include <sys/disklabel.h>"
 	print "#include <sys/socket.h>"
 	print "#include <sys/time.h>"
 	print "#include <sys/tty.h>"
 	print "#include <bsm/audit.h>"
 	print "#include <net/ethernet.h>"
 	print "#include <net/if.h>"
 	print "#include <net/if_var.h>"
+	print "#ifdef PF"
 	print "#include <net/pfvar.h>"
+	print "#include <net/if_pfsync.h>"
+	print "#endif"
 	print "#include <net/route.h>"
 	print "#include <netinet/in.h>"
 	print "#include <netinet/ip_mroute.h>"
 	print "#include <netinet6/in6_var.h>"
 	print "#include <netinet6/nd6.h>"
 	print "#include <netinet6/ip6_mroute.h>"
 	print "#include <stdio.h>"
 	print "#include <cam/cam.h>"
 	print "#include <stddef.h>"
 	print "#include <stdint.h>"
 	print ""
 	print ioctl_includes
 	print ""
 	if (style == "print") {
 		print "void ioctlname(unsigned long val, int decimal);"
 		print ""
 		print "void"
 		print "ioctlname(unsigned long val, int decimal)"
 	} else {
 		print "const char *ioctlname(unsigned long val);"
 		print ""
 		print "const char *"
 		print "ioctlname(unsigned long val)"
 	}
 	print "{"
 	print "\tconst char *str = NULL;"
 	print ""
 }
 
 /^#[ 	]*define[ 	]+[A-Za-z_][A-Za-z0-9_]*[ 	]+_IO/ {
 
 	# find where the name starts
 	for (i = 1; i <= NF; i++)
 		if ($i ~ /define/)
 			break;
 	++i;
 	#
 	printf("\t");
 	if (n++ > 0)
 		printf("else ");
 	printf("if (val == %s)\n", $i);
 	printf("\t\tstr = \"%s\";\n", $i);
 }
 END {
 	print ""
 	if (style == "print") {
 		print "\tif (str != NULL)"
 		print "\t\tprintf(\"%s\", str);"
 		print "\telse if (decimal)"
 		print "\t\tprintf(\"%lu\", val);"
 		print "\telse"
 		print "\t\tprintf(\"%#lx\", val);"
 	} else {
 		print "\treturn (str);"
 	}
 	print "}"
 }
 '
Index: stable/10/usr.bin/netstat/Makefile
===================================================================
--- stable/10/usr.bin/netstat/Makefile	(revision 263085)
+++ stable/10/usr.bin/netstat/Makefile	(revision 263086)
@@ -1,49 +1,53 @@
 #	@(#)Makefile	8.1 (Berkeley) 6/12/93
 # $FreeBSD$
 
 .include <bsd.own.mk>
 
 PROG=	netstat
 SRCS=	if.c inet.c main.c mbuf.c mroute.c netisr.c route.c \
 	unix.c atalk.c mroute6.c ipsec.c bpf.c pfkey.c sctp.c \
 	flowtable.c
 
 WARNS?=	3
 CFLAGS+=-fno-strict-aliasing
 
 CFLAGS+=-DIPSEC
 CFLAGS+=-DSCTP
 
 .if ${MK_INET_SUPPORT} != "no"
 CFLAGS+=-DINET
 .endif
 
 .if ${MK_INET6_SUPPORT} != "no"
 SRCS+=	inet6.c
 CFLAGS+=-DINET6
 .endif
 
 .if ${MK_OFED} != "no"
 CFLAGS+=-DSDP
 .endif
 
+.if ${MK_PF} != "no"
+CFLAGS+=-DPF
+.endif
+
 BINGRP=	kmem
 BINMODE=2555
 DPADD=	${LIBKVM} ${LIBMEMSTAT} ${LIBUTIL}
 LDADD=	-lkvm -lmemstat -lutil
 
 .if ${MK_NETGRAPH_SUPPORT} != "no"
 SRCS+=	netgraph.c
 DPADD+=	${LIBNETGRAPH}
 LDADD+=	-lnetgraph
 CFLAGS+=-DNETGRAPH
 .endif
 
 .if ${MK_IPX_SUPPORT} != "no"
 SRCS+=	ipx.c
 DPADD+=	${LIBIPX}
 LDADD+=	-lipx
 CFLAGS+=-DIPX
 .endif
 
 .include <bsd.prog.mk>
Index: stable/10/usr.bin/netstat/if.c
===================================================================
--- stable/10/usr.bin/netstat/if.c	(revision 263085)
+++ stable/10/usr.bin/netstat/if.c	(revision 263086)
@@ -1,590 +1,594 @@
 /*-
  * Copyright (c) 2013 Gleb Smirnoff <glebius@FreeBSD.org>
  * Copyright (c) 1983, 1988, 1993
  *	The Regents of the University of California.  All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 4. Neither the name of the University nor the names of its contributors
  *    may be used to endorse or promote products derived from this software
  *    without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  */
 
 #if 0
 #ifndef lint
 static char sccsid[] = "@(#)if.c	8.3 (Berkeley) 4/28/95";
 #endif /* not lint */
 #endif
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include <sys/types.h>
 #include <sys/protosw.h>
 #include <sys/socket.h>
 #include <sys/socketvar.h>
 #include <sys/sysctl.h>
 #include <sys/time.h>
 
 #include <net/if.h>
 #include <net/if_var.h>
 #include <net/if_dl.h>
 #include <net/if_types.h>
 #include <net/ethernet.h>
-#include <net/pfvar.h>
-#include <net/if_pfsync.h>
 #include <netinet/in.h>
 #include <netinet/in_var.h>
 #include <netipx/ipx.h>
 #include <netipx/ipx_if.h>
 #include <arpa/inet.h>
+#ifdef PF
+#include <net/pfvar.h>
+#include <net/if_pfsync.h>
+#endif
 
 #include <err.h>
 #include <errno.h>
 #include <ifaddrs.h>
 #include <libutil.h>
 #ifdef INET6
 #include <netdb.h>
 #endif
 #include <signal.h>
 #include <stdbool.h>
 #include <stdint.h>
 #include <stdio.h>
 #include <stdlib.h>
 #include <string.h>
 #include <sysexits.h>
 #include <unistd.h>
 
 #include "netstat.h"
 
 static void sidewaysintpr(int);
 
 #ifdef INET6
 static char addr_buf[NI_MAXHOST];		/* for getnameinfo() */
 #endif
 
+#ifdef PF
 static const char* pfsyncacts[] = {
 	/* PFSYNC_ACT_CLR */		"clear all request",
 	/* PFSYNC_ACT_INS */		"state insert",
 	/* PFSYNC_ACT_INS_ACK */	"state inserted ack",
 	/* PFSYNC_ACT_UPD */		"state update",
 	/* PFSYNC_ACT_UPD_C */		"compressed state update",
 	/* PFSYNC_ACT_UPD_REQ */	"uncompressed state request",
 	/* PFSYNC_ACT_DEL */		"state delete",
 	/* PFSYNC_ACT_DEL_C */		"compressed state delete",
 	/* PFSYNC_ACT_INS_F */		"fragment insert",
 	/* PFSYNC_ACT_DEL_F */		"fragment delete",
 	/* PFSYNC_ACT_BUS */		"bulk update mark",
 	/* PFSYNC_ACT_TDB */		"TDB replay counter update",
 	/* PFSYNC_ACT_EOF */		"end of frame mark",
 };
 
 static void
 pfsync_acts_stats(const char *fmt, uint64_t *a)
 {
 	int i;
 
 	for (i = 0; i < PFSYNC_ACT_MAX; i++, a++)
 		if (*a || sflag <= 1)
 			printf(fmt, *a, pfsyncacts[i], plural(*a));
 }
 
 /*
  * Dump pfsync statistics structure.
  */
 void
 pfsync_stats(u_long off, const char *name, int af1 __unused, int proto __unused)
 {
 	struct pfsyncstats pfsyncstat, zerostat;
 	size_t len = sizeof(struct pfsyncstats);
 
 	if (live) {
 		if (zflag)
 			memset(&zerostat, 0, len);
 		if (sysctlbyname("net.pfsync.stats", &pfsyncstat, &len,
 		    zflag ? &zerostat : NULL, zflag ? len : 0) < 0) {
 			if (errno != ENOENT)
 				warn("sysctl: net.pfsync.stats");
 			return;
 		}
 	} else
 		kread(off, &pfsyncstat, len);
 
 	printf("%s:\n", name);
 
 #define	p(f, m) if (pfsyncstat.f || sflag <= 1) \
 	printf(m, (uintmax_t)pfsyncstat.f, plural(pfsyncstat.f))
 
 	p(pfsyncs_ipackets, "\t%ju packet%s received (IPv4)\n");
 	p(pfsyncs_ipackets6, "\t%ju packet%s received (IPv6)\n");
 	pfsync_acts_stats("\t    %ju %s%s received\n",
 	    &pfsyncstat.pfsyncs_iacts[0]);
 	p(pfsyncs_badif, "\t\t%ju packet%s discarded for bad interface\n");
 	p(pfsyncs_badttl, "\t\t%ju packet%s discarded for bad ttl\n");
 	p(pfsyncs_hdrops, "\t\t%ju packet%s shorter than header\n");
 	p(pfsyncs_badver, "\t\t%ju packet%s discarded for bad version\n");
 	p(pfsyncs_badauth, "\t\t%ju packet%s discarded for bad HMAC\n");
 	p(pfsyncs_badact,"\t\t%ju packet%s discarded for bad action\n");
 	p(pfsyncs_badlen, "\t\t%ju packet%s discarded for short packet\n");
 	p(pfsyncs_badval, "\t\t%ju state%s discarded for bad values\n");
 	p(pfsyncs_stale, "\t\t%ju stale state%s\n");
 	p(pfsyncs_badstate, "\t\t%ju failed state lookup/insert%s\n");
 	p(pfsyncs_opackets, "\t%ju packet%s sent (IPv4)\n");
 	p(pfsyncs_opackets6, "\t%ju packet%s sent (IPv6)\n");
 	pfsync_acts_stats("\t    %ju %s%s sent\n",
 	    &pfsyncstat.pfsyncs_oacts[0]);
 	p(pfsyncs_onomem, "\t\t%ju failure%s due to mbuf memory error\n");
 	p(pfsyncs_oerrors, "\t\t%ju send error%s\n");
 #undef p
 }
+#endif /* PF */
 
 /*
  * Display a formatted value, or a '-' in the same space.
  */
 static void
 show_stat(const char *fmt, int width, u_long value, short showvalue)
 {
 	const char *lsep, *rsep;
 	char newfmt[32];
 
 	lsep = "";
 	if (strncmp(fmt, "LS", 2) == 0) {
 		lsep = " ";
 		fmt += 2;
 	}
 	rsep = " ";
 	if (strncmp(fmt, "NRS", 3) == 0) {
 		rsep = "";
 		fmt += 3;
 	}
 	if (showvalue == 0) {
 		/* Print just dash. */
 		sprintf(newfmt, "%s%%%ds%s", lsep, width, rsep);
 		printf(newfmt, "-");
 		return;
 	}
 
 	if (hflag) {
 		char buf[5];
 
 		/* Format in human readable form. */
 		humanize_number(buf, sizeof(buf), (int64_t)value, "",
 		    HN_AUTOSCALE, HN_NOSPACE | HN_DECIMAL);
 		sprintf(newfmt, "%s%%%ds%s", lsep, width, rsep);
 		printf(newfmt, buf);
 	} else {
 		/* Construct the format string. */
 		sprintf(newfmt, "%s%%%d%s%s", lsep, width, fmt, rsep);
 		printf(newfmt, value);
 	}
 }
 
 /*
  * Find next multiaddr for a given interface name.
  */
 static struct ifmaddrs *
 next_ifma(struct ifmaddrs *ifma, const char *name, const sa_family_t family)
 {
 
 	for(; ifma != NULL; ifma = ifma->ifma_next) {
 		struct sockaddr_dl *sdl;
 
 		sdl = (struct sockaddr_dl *)ifma->ifma_name;
 		if (ifma->ifma_addr->sa_family == family &&
 		    strcmp(sdl->sdl_data, name) == 0)
 			break;
 	}
 
 	return (ifma);
 }
 
 /*
  * Print a description of the network interfaces.
  */
 void
 intpr(int interval, void (*pfunc)(char *))
 {
 	struct ifaddrs *ifap, *ifa;
 	struct ifmaddrs *ifmap, *ifma;
 	
 	if (interval)
 		return sidewaysintpr(interval);
 
 	if (getifaddrs(&ifap) != 0)
 		err(EX_OSERR, "getifaddrs");
 	if (aflag && getifmaddrs(&ifmap) != 0)
 		err(EX_OSERR, "getifmaddrs");
 
 	if (!pfunc) {
 		if (Wflag)
 			printf("%-7.7s", "Name");
 		else
 			printf("%-5.5s", "Name");
 		printf(" %5.5s %-13.13s %-17.17s %8.8s %5.5s %5.5s",
 		    "Mtu", "Network", "Address", "Ipkts", "Ierrs", "Idrop");
 		if (bflag)
 			printf(" %10.10s","Ibytes");
 		printf(" %8.8s %5.5s", "Opkts", "Oerrs");
 		if (bflag)
 			printf(" %10.10s","Obytes");
 		printf(" %5s", "Coll");
 		if (dflag)
 			printf(" %s", "Drop");
 		putchar('\n');
 	}
 
 	for (ifa = ifap; ifa; ifa = ifa->ifa_next) {
 		bool network = false, link = false;
 
 		if (interface != NULL && strcmp(ifa->ifa_name, interface) != 0)
 			continue;
 
 		if (pfunc) {
 			char *name;
 
 			name = ifa->ifa_name;
 			(*pfunc)(name);
 
 			/*
 			 * Skip all ifaddrs belonging to same interface.
 			 */
 			while(ifa->ifa_next != NULL &&
 			    (strcmp(ifa->ifa_next->ifa_name, name) == 0)) {
 				ifa = ifa->ifa_next;
 			}
 			continue;
 		}
 
 		if (af != AF_UNSPEC && ifa->ifa_addr->sa_family != af)
 			continue;
 
 		if (Wflag)
 			printf("%-7.7s", ifa->ifa_name);
 		else
 			printf("%-5.5s", ifa->ifa_name);
 
 #define IFA_MTU(ifa)	(((struct if_data *)(ifa)->ifa_data)->ifi_mtu)
 		show_stat("lu", 6, IFA_MTU(ifa), IFA_MTU(ifa));
 #undef IFA_MTU
 
 		switch (ifa->ifa_addr->sa_family) {
 		case AF_UNSPEC:
 			printf("%-13.13s ", "none");
 			printf("%-15.15s ", "none");
 			break;
 		case AF_INET:
 		    {
 			struct sockaddr_in *sin, *mask;
 
 			sin = (struct sockaddr_in *)ifa->ifa_addr;
 			mask = (struct sockaddr_in *)ifa->ifa_netmask;
 			printf("%-13.13s ", netname(sin->sin_addr.s_addr,
 			    mask->sin_addr.s_addr));
 			printf("%-17.17s ",
 			    routename(sin->sin_addr.s_addr));
 
 			network = true;
 			break;
 		    }
 #ifdef INET6
 		case AF_INET6:
 		    {
 			struct sockaddr_in6 *sin6, *mask;
 
 			sin6 = (struct sockaddr_in6 *)ifa->ifa_addr;
 			mask = (struct sockaddr_in6 *)ifa->ifa_netmask;
 
 			printf("%-13.13s ", netname6(sin6, &mask->sin6_addr));
 			getnameinfo(ifa->ifa_addr, ifa->ifa_addr->sa_len,
 			    addr_buf, sizeof(addr_buf), 0, 0, NI_NUMERICHOST);
 			printf("%-17.17s ", addr_buf);
 
 			network = 1;
 			break;
 	            }
 #endif /* INET6 */
 		case AF_IPX:
 		    {
 			struct sockaddr_ipx *sipx;
 			u_long net;
 			char netnum[10];
 
 			sipx = (struct sockaddr_ipx *)ifa->ifa_addr;
 			*(union ipx_net *) &net = sipx->sipx_addr.x_net;
 
 			sprintf(netnum, "%lx", (u_long)ntohl(net));
 			printf("ipx:%-8s  ", netnum);
 			printf("%-17s ", ipx_phost((struct sockaddr *)sipx));
 
 			network = 1;
 			break;
 		    }
 		case AF_APPLETALK:
 			printf("atalk:%-12.12s ",
 			    atalk_print(ifa->ifa_addr, 0x10));
 			printf("%-11.11s  ",
 			    atalk_print(ifa->ifa_addr, 0x0b));
 			break;
 		case AF_LINK:
 		    {
 			struct sockaddr_dl *sdl;
 			char *cp, linknum[10];
 			int n, m;
 
 			sdl = (struct sockaddr_dl *)ifa->ifa_addr;
 			cp = (char *)LLADDR(sdl);
 			n = sdl->sdl_alen;
 			sprintf(linknum, "<Link#%d>", sdl->sdl_index);
 			m = printf("%-13.13s ", linknum);
 
 			while ((--n >= 0) && (m < 30))
 				m += printf("%02x%c", *cp++ & 0xff,
 					    n > 0 ? ':' : ' ');
 			m = 32 - m;
 			while (m-- > 0)
 				putchar(' ');
 
 			link = 1;
 			break;
 		    }
 		}
 
 #define	IFA_STAT(s)	(((struct if_data *)ifa->ifa_data)->ifi_ ## s)
 		show_stat("lu", 8, IFA_STAT(ipackets), link|network);
 		show_stat("lu", 5, IFA_STAT(ierrors), link);
 		show_stat("lu", 5, IFA_STAT(iqdrops), link);
 		if (bflag)
 			show_stat("lu", 10, IFA_STAT(ibytes), link|network);
 		show_stat("lu", 8, IFA_STAT(opackets), link|network);
 		show_stat("lu", 5, IFA_STAT(oerrors), link);
 		if (bflag)
 			show_stat("lu", 10, IFA_STAT(obytes), link|network);
 		show_stat("NRSlu", 5, IFA_STAT(collisions), link);
 		/* XXXGL: output queue drops */
 		putchar('\n');
 
 		if (!aflag)
 			continue;
 
 		/*
 		 * Print family's multicast addresses.
 		 */
 		for (ifma = next_ifma(ifmap, ifa->ifa_name,
 		     ifa->ifa_addr->sa_family);
 		     ifma != NULL;
 		     ifma = next_ifma(ifma, ifa->ifa_name,
 		     ifa->ifa_addr->sa_family)) {
 			const char *fmt = NULL;
 
 			switch (ifma->ifma_addr->sa_family) {
 			case AF_INET:
 			    {
 				struct sockaddr_in *sin;
 
 				sin = (struct sockaddr_in *)ifma->ifma_addr;
 				fmt = routename(sin->sin_addr.s_addr);
 				break;
 			    }
 #ifdef INET6
 			case AF_INET6:
 
 				/* in6_fillscopeid(&msa.in6); */
 				getnameinfo(ifma->ifma_addr,
 				    ifma->ifma_addr->sa_len, addr_buf,
 				    sizeof(addr_buf), 0, 0, NI_NUMERICHOST);
 				printf("%*s %s\n",
 				    Wflag ? 27 : 25, "", addr_buf);
 				break;
 #endif /* INET6 */
 			case AF_LINK:
 			    {
 				struct sockaddr_dl *sdl;
 
 				sdl = (struct sockaddr_dl *)ifma->ifma_addr;
 				switch (sdl->sdl_type) {
 				case IFT_ETHER:
 				case IFT_FDDI:
 					fmt = ether_ntoa(
 					    (struct ether_addr *)LLADDR(sdl));
 					break;
 				}
 				break;
 			    }
 			}
 
 			if (fmt) {
 				printf("%*s %-17.17s",
 				    Wflag ? 27 : 25, "", fmt);
 				if (ifma->ifma_addr->sa_family == AF_LINK) {
 					printf(" %8lu", IFA_STAT(imcasts));
 					printf("%*s", bflag ? 17 : 6, "");
 					printf(" %8lu", IFA_STAT(omcasts));
 				}
 				putchar('\n');
 			}
 
 			ifma = ifma->ifma_next;
 		}
 	}
 
 	freeifaddrs(ifap);
 	if (aflag)
 		freeifmaddrs(ifmap);
 }
 
 struct iftot {
 	u_long	ift_ip;			/* input packets */
 	u_long	ift_ie;			/* input errors */
 	u_long	ift_id;			/* input drops */
 	u_long	ift_op;			/* output packets */
 	u_long	ift_oe;			/* output errors */
 	u_long	ift_co;			/* collisions */
 	u_long	ift_ib;			/* input bytes */
 	u_long	ift_ob;			/* output bytes */
 };
 
 /*
  * Obtain stats for interface(s).
  */
 static void
 fill_iftot(struct iftot *st)
 {
 	struct ifaddrs *ifap, *ifa;
 	bool found = false;
 
 	if (getifaddrs(&ifap) != 0)
 		err(EX_OSERR, "getifaddrs");
 
 	bzero(st, sizeof(*st));
 
 	for (ifa = ifap; ifa; ifa = ifa->ifa_next) {
 		if (ifa->ifa_addr->sa_family != AF_LINK)
 			continue;
 		if (interface) {
 			if (strcmp(ifa->ifa_name, interface) == 0)
 				found = true;
 			else
 				continue;
 		}
 
 		st->ift_ip += IFA_STAT(ipackets);
 		st->ift_ie += IFA_STAT(ierrors);
 		st->ift_id += IFA_STAT(iqdrops);
 		st->ift_ib += IFA_STAT(ibytes);
 		st->ift_op += IFA_STAT(opackets);
 		st->ift_oe += IFA_STAT(oerrors);
 		st->ift_ob += IFA_STAT(obytes);
  		st->ift_co += IFA_STAT(collisions);
 	}
 
 	if (interface && found == false)
 		err(EX_DATAERR, "interface %s not found", interface);
 
 	freeifaddrs(ifap);
 }
 
 /*
  * Set a flag to indicate that a signal from the periodic itimer has been
  * caught.
  */
 static sig_atomic_t signalled;
 static void
 catchalarm(int signo __unused)
 {
 	signalled = true;
 }
 
 /*
  * Print a running summary of interface statistics.
  * Repeat display every interval seconds, showing statistics
  * collected over that interval.  Assumes that interval is non-zero.
  * First line printed at top of screen is always cumulative.
  */
 static void
 sidewaysintpr(int interval)
 {
 	struct iftot ift[2], *new, *old;
 	struct itimerval interval_it;
 	int oldmask, line;
 
 	new = &ift[0];
 	old = &ift[1];
 	fill_iftot(old);
 
 	(void)signal(SIGALRM, catchalarm);
 	signalled = false;
 	interval_it.it_interval.tv_sec = interval;
 	interval_it.it_interval.tv_usec = 0;
 	interval_it.it_value = interval_it.it_interval;
 	setitimer(ITIMER_REAL, &interval_it, NULL);
 
 banner:
 	printf("%17s %14s %16s", "input",
 	    interface != NULL ? interface : "(Total)", "output");
 	putchar('\n');
 	printf("%10s %5s %5s %10s %10s %5s %10s %5s",
 	    "packets", "errs", "idrops", "bytes", "packets", "errs", "bytes",
 	    "colls");
 	if (dflag)
 		printf(" %5.5s", "drops");
 	putchar('\n');
 	fflush(stdout);
 	line = 0;
 
 loop:
 	if ((noutputs != 0) && (--noutputs == 0))
 		exit(0);
 	oldmask = sigblock(sigmask(SIGALRM));
 	while (!signalled)
 		sigpause(0);
 	signalled = false;
 	sigsetmask(oldmask);
 	line++;
 
 	fill_iftot(new);
 
 	show_stat("lu", 10, new->ift_ip - old->ift_ip, 1);
 	show_stat("lu", 5, new->ift_ie - old->ift_ie, 1);
 	show_stat("lu", 5, new->ift_id - old->ift_id, 1);
 	show_stat("lu", 10, new->ift_ib - old->ift_ib, 1);
 	show_stat("lu", 10, new->ift_op - old->ift_op, 1);
 	show_stat("lu", 5, new->ift_oe - old->ift_oe, 1);
 	show_stat("lu", 10, new->ift_ob - old->ift_ob, 1);
 	show_stat("NRSlu", 5, new->ift_co - old->ift_co, 1);
 	/* XXXGL: output queue drops */
 	putchar('\n');
 	fflush(stdout);
 
 	if (new == &ift[0]) {
 		new = &ift[1];
 		old = &ift[0];
 	} else {
 		new = &ift[0];
 		old = &ift[1];
 	}
 
 	if (line == 21)
 		goto banner;
 	else
 		goto loop;
 
 	/* NOTREACHED */
 }
Index: stable/10/usr.bin/netstat/main.c
===================================================================
--- stable/10/usr.bin/netstat/main.c	(revision 263085)
+++ stable/10/usr.bin/netstat/main.c	(revision 263086)
@@ -1,846 +1,848 @@
 /*-
  * Copyright (c) 1983, 1988, 1993
  *	Regents of the University of California.  All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 4. Neither the name of the University nor the names of its contributors
  *    may be used to endorse or promote products derived from this software
  *    without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  */
 
 #ifndef lint
 char const copyright[] =
 "@(#) Copyright (c) 1983, 1988, 1993\n\
 	Regents of the University of California.  All rights reserved.\n";
 #endif /* not lint */
 
 #if 0
 #ifndef lint
 static char sccsid[] = "@(#)main.c	8.4 (Berkeley) 3/1/94";
 #endif /* not lint */
 #endif
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include <sys/param.h>
 #include <sys/file.h>
 #include <sys/protosw.h>
 #include <sys/socket.h>
 #include <sys/socketvar.h>
 
 #include <netinet/in.h>
 
 #ifdef NETGRAPH
 #include <netgraph/ng_socket.h>
 #endif
 
 #include <ctype.h>
 #include <err.h>
 #include <errno.h>
 #include <kvm.h>
 #include <limits.h>
 #include <netdb.h>
 #include <nlist.h>
 #include <paths.h>
 #include <stdint.h>
 #include <stdio.h>
 #include <stdlib.h>
 #include <string.h>
 #include <unistd.h>
 #include "netstat.h"
 
 static struct nlist nl[] = {
 #define	N_IFNET		0
 	{ .n_name = "_ifnet" },		/* XXXGL: can be deleted */
 #define	N_RTSTAT	1
 	{ .n_name = "_rtstat" },
 #define	N_RTREE		2
 	{ .n_name = "_rt_tables"},
 #define	N_MRTSTAT	3
 	{ .n_name = "_mrtstat" },
 #define	N_MFCHASHTBL	4
 	{ .n_name = "_mfchashtbl" },
 #define	N_VIFTABLE	5
 	{ .n_name = "_viftable" },
 #define	N_IPX		6
 	{ .n_name = "_ipxpcb_list"},
 #define	N_IPXSTAT	7
 	{ .n_name = "_ipxstat"},
 #define	N_SPXSTAT	8
 	{ .n_name = "_spx_istat"},
 #define	N_DDPSTAT	9
 	{ .n_name = "_ddpstat"},
 #define	N_DDPCB		10
 	{ .n_name = "_ddpcb"},
 #define	N_NGSOCKS	11
 	{ .n_name = "_ngsocklist"},
 #define	N_IP6STAT	12
 	{ .n_name = "_ip6stat" },
 #define	N_ICMP6STAT	13
 	{ .n_name = "_icmp6stat" },
 #define	N_IPSECSTAT	14
 	{ .n_name = "_ipsec4stat" },
 #define	N_IPSEC6STAT	15
 	{ .n_name = "_ipsec6stat" },
 #define	N_PIM6STAT	16
 	{ .n_name = "_pim6stat" },
 #define	N_MRT6STAT	17
 	{ .n_name = "_mrt6stat" },
 #define	N_MF6CTABLE	18
 	{ .n_name = "_mf6ctable" },
 #define	N_MIF6TABLE	19
 	{ .n_name = "_mif6table" },
 #define	N_PFKEYSTAT	20
 	{ .n_name = "_pfkeystat" },
 #define	N_RTTRASH	21
 	{ .n_name = "_rttrash" },
 #define	N_CARPSTAT	22
 	{ .n_name = "_carpstats" },
 #define	N_PFSYNCSTAT	23
 	{ .n_name = "_pfsyncstats" },
 #define	N_AHSTAT	24
 	{ .n_name = "_ahstat" },
 #define	N_ESPSTAT	25
 	{ .n_name = "_espstat" },
 #define	N_IPCOMPSTAT	26
 	{ .n_name = "_ipcompstat" },
 #define	N_TCPSTAT	27
 	{ .n_name = "_tcpstat" },
 #define	N_UDPSTAT	28
 	{ .n_name = "_udpstat" },
 #define	N_IPSTAT	29
 	{ .n_name = "_ipstat" },
 #define	N_ICMPSTAT	30
 	{ .n_name = "_icmpstat" },
 #define	N_IGMPSTAT	31
 	{ .n_name = "_igmpstat" },
 #define	N_PIMSTAT	32
 	{ .n_name = "_pimstat" },
 #define	N_TCBINFO	33
 	{ .n_name = "_tcbinfo" },
 #define	N_UDBINFO	34
 	{ .n_name = "_udbinfo" },
 #define	N_DIVCBINFO	35
 	{ .n_name = "_divcbinfo" },
 #define	N_RIPCBINFO	36
 	{ .n_name = "_ripcbinfo" },
 #define	N_UNP_COUNT	37
 	{ .n_name = "_unp_count" },
 #define	N_UNP_GENCNT	38
 	{ .n_name = "_unp_gencnt" },
 #define	N_UNP_DHEAD	39
 	{ .n_name = "_unp_dhead" },
 #define	N_UNP_SHEAD	40
 	{ .n_name = "_unp_shead" },
 #define	N_RIP6STAT	41
 	{ .n_name = "_rip6stat" },
 #define	N_SCTPSTAT	42
 	{ .n_name = "_sctpstat" },
 #define	N_MFCTABLESIZE	43
 	{ .n_name = "_mfctablesize" },
 #define	N_ARPSTAT       44
 	{ .n_name = "_arpstat" },
 #define	N_UNP_SPHEAD	45
 	{ .n_name = "unp_sphead" },
 #define	N_SFSTAT	46
 	{ .n_name = "_sfstat"},
 	{ .n_name = NULL },
 };
 
 struct protox {
 	int	pr_index;		/* index into nlist of cb head */
 	int	pr_sindex;		/* index into nlist of stat block */
 	u_char	pr_wanted;		/* 1 if wanted, 0 otherwise */
 	void	(*pr_cblocks)(u_long, const char *, int, int);
 					/* control blocks printing routine */
 	void	(*pr_stats)(u_long, const char *, int, int);
 					/* statistics printing routine */
 	void	(*pr_istats)(char *);	/* per/if statistics printing routine */
 	const char	*pr_name;		/* well-known name */
 	int	pr_usesysctl;		/* non-zero if we use sysctl, not kvm */
 	int	pr_protocol;
 } protox[] = {
 	{ N_TCBINFO,	N_TCPSTAT,	1,	protopr,
 	  tcp_stats,	NULL,		"tcp",	1,	IPPROTO_TCP },
 	{ N_UDBINFO,	N_UDPSTAT,	1,	protopr,
 	  udp_stats,	NULL,		"udp",	1,	IPPROTO_UDP },
 #ifdef SCTP
 	{ -1,		N_SCTPSTAT,	1,	sctp_protopr,
 	  sctp_stats,	NULL,		"sctp",	1,	IPPROTO_SCTP },
 #endif
 #ifdef SDP
 	{ -1,		-1,		1,	protopr,
 	 NULL,		NULL,		"sdp",	1,	IPPROTO_TCP },
 #endif
 	{ N_DIVCBINFO,	-1,		1,	protopr,
 	  NULL,		NULL,		"divert", 1,	IPPROTO_DIVERT },
 	{ N_RIPCBINFO,	N_IPSTAT,	1,	protopr,
 	  ip_stats,	NULL,		"ip",	1,	IPPROTO_RAW },
 	{ N_RIPCBINFO,	N_ICMPSTAT,	1,	protopr,
 	  icmp_stats,	NULL,		"icmp",	1,	IPPROTO_ICMP },
 	{ N_RIPCBINFO,	N_IGMPSTAT,	1,	protopr,
 	  igmp_stats,	NULL,		"igmp",	1,	IPPROTO_IGMP },
 #ifdef IPSEC
 	{ -1,		N_IPSECSTAT,	1,	NULL,	/* keep as compat */
 	  ipsec_stats,	NULL,		"ipsec", 0,	0},
 	{ -1,		N_AHSTAT,	1,	NULL,
 	  ah_stats,	NULL,		"ah",	0,	0},
 	{ -1,		N_ESPSTAT,	1,	NULL,
 	  esp_stats,	NULL,		"esp",	0,	0},
 	{ -1,		N_IPCOMPSTAT,	1,	NULL,
 	  ipcomp_stats,	NULL,		"ipcomp", 0,	0},
 #endif
 	{ N_RIPCBINFO,	N_PIMSTAT,	1,	protopr,
 	  pim_stats,	NULL,		"pim",	1,	IPPROTO_PIM },
 	{ -1,		N_CARPSTAT,	1,	NULL,
 	  carp_stats,	NULL,		"carp",	1,	0 },
+#ifdef PF
 	{ -1,		N_PFSYNCSTAT,	1,	NULL,
 	  pfsync_stats,	NULL,		"pfsync", 1,	0 },
+#endif
 	{ -1,		N_ARPSTAT,	1,	NULL,
 	  arp_stats,	NULL,		"arp", 1,	0 },
 	{ -1,		-1,		0,	NULL,
 	  NULL,		NULL,		NULL,	0,	0 }
 };
 
 #ifdef INET6
 struct protox ip6protox[] = {
 	{ N_TCBINFO,	N_TCPSTAT,	1,	protopr,
 	  tcp_stats,	NULL,		"tcp",	1,	IPPROTO_TCP },
 	{ N_UDBINFO,	N_UDPSTAT,	1,	protopr,
 	  udp_stats,	NULL,		"udp",	1,	IPPROTO_UDP },
 	{ N_RIPCBINFO,	N_IP6STAT,	1,	protopr,
 	  ip6_stats,	ip6_ifstats,	"ip6",	1,	IPPROTO_RAW },
 	{ N_RIPCBINFO,	N_ICMP6STAT,	1,	protopr,
 	  icmp6_stats,	icmp6_ifstats,	"icmp6", 1,	IPPROTO_ICMPV6 },
 #ifdef SDP
 	{ -1,		-1,		1,	protopr,
 	 NULL,		NULL,		"sdp",	1,	IPPROTO_TCP },
 #endif
 #ifdef IPSEC
 	{ -1,		N_IPSEC6STAT,	1,	NULL,
 	  ipsec_stats,	NULL,		"ipsec6", 0,	0 },
 #endif
 #ifdef notyet
 	{ -1,		N_PIM6STAT,	1,	NULL,
 	  pim6_stats,	NULL,		"pim6",	1,	0 },
 #endif
 	{ -1,		N_RIP6STAT,	1,	NULL,
 	  rip6_stats,	NULL,		"rip6",	1,	0 },
 	{ -1,		-1,		0,	NULL,
 	  NULL,		NULL,		NULL,	0,	0 }
 };
 #endif /*INET6*/
 
 #ifdef IPSEC
 struct protox pfkeyprotox[] = {
 	{ -1,		N_PFKEYSTAT,	1,	NULL,
 	  pfkey_stats,	NULL,		"pfkey", 0,	0 },
 	{ -1,		-1,		0,	NULL,
 	  NULL,		NULL,		NULL,	0,	0 }
 };
 #endif
 
 struct protox atalkprotox[] = {
 	{ N_DDPCB,	N_DDPSTAT,	1,	atalkprotopr,
 	  ddp_stats,	NULL,		"ddp",	0,	0 },
 	{ -1,		-1,		0,	NULL,
 	  NULL,		NULL,		NULL,	0,	0 }
 };
 #ifdef NETGRAPH
 struct protox netgraphprotox[] = {
 	{ N_NGSOCKS,	-1,		1,	netgraphprotopr,
 	  NULL,		NULL,		"ctrl",	0,	0 },
 	{ N_NGSOCKS,	-1,		1,	netgraphprotopr,
 	  NULL,		NULL,		"data",	0,	0 },
 	{ -1,		-1,		0,	NULL,
 	  NULL,		NULL,		NULL,	0,	0 }
 };
 #endif
 #ifdef IPX
 struct protox ipxprotox[] = {
 	{ N_IPX,	N_IPXSTAT,	1,	ipxprotopr,
 	  ipx_stats,	NULL,		"ipx",	0,	0 },
 	{ N_IPX,	N_SPXSTAT,	1,	ipxprotopr,
 	  spx_stats,	NULL,		"spx",	0,	0 },
 	{ -1,		-1,		0,	NULL,
 	  NULL,		NULL,		0,	0,	0 }
 };
 #endif
 
 struct protox *protoprotox[] = {
 					 protox,
 #ifdef INET6
 					 ip6protox,
 #endif
 #ifdef IPSEC
 					 pfkeyprotox,
 #endif
 #ifdef IPX
 					 ipxprotox,
 #endif
 					 atalkprotox, NULL };
 
 static void printproto(struct protox *, const char *);
 static void usage(void);
 static struct protox *name2protox(const char *);
 static struct protox *knownname(const char *);
 
 static kvm_t *kvmd;
 static char *nlistf = NULL, *memf = NULL;
 
 int	Aflag;		/* show addresses of protocol control block */
 int	aflag;		/* show all sockets (including servers) */
 int	Bflag;		/* show information about bpf consumers */
 int	bflag;		/* show i/f total bytes in/out */
 int	dflag;		/* show i/f dropped packets */
 int	gflag;		/* show group (multicast) routing or stats */
 int	hflag;		/* show counters in human readable format */
 int	iflag;		/* show interfaces */
 int	Lflag;		/* show size of listen queues */
 int	mflag;		/* show memory stats */
 int	noutputs = 0;	/* how much outputs before we exit */
 int	numeric_addr;	/* show addresses numerically */
 int	numeric_port;	/* show ports numerically */
 static int pflag;	/* show given protocol */
 int	Qflag;		/* show netisr information */
 int	rflag;		/* show routing tables (or routing stats) */
 int	sflag;		/* show protocol statistics */
 int	Wflag;		/* wide display */
 int	Tflag;		/* TCP Information */
 int	xflag;		/* extra information, includes all socket buffer info */
 int	zflag;		/* zero stats */
 
 int	interval;	/* repeat interval for i/f stats */
 
 char	*interface;	/* desired i/f for stats, or NULL for all i/fs */
 int	unit;		/* unit number for above */
 
 int	af;		/* address family */
 int	live;		/* true if we are examining a live system */
 
 int
 main(int argc, char *argv[])
 {
 	struct protox *tp = NULL;  /* for printing cblocks & stats */
 	int ch;
 	int fib = -1;
 	char *endptr;
 
 	af = AF_UNSPEC;
 
 	while ((ch = getopt(argc, argv, "AaBbdF:f:ghI:iLlM:mN:np:Qq:rSTsuWw:xz"))
 	    != -1)
 		switch(ch) {
 		case 'A':
 			Aflag = 1;
 			break;
 		case 'a':
 			aflag = 1;
 			break;
 		case 'B':
 			Bflag = 1;
 			break;
 		case 'b':
 			bflag = 1;
 			break;
 		case 'd':
 			dflag = 1;
 			break;
 		case 'F':
 			fib = strtol(optarg, &endptr, 0);
 			if (*endptr != '\0' ||
 			    (fib == 0 && (errno == EINVAL || errno == ERANGE)))
 				errx(1, "%s: invalid fib", optarg);
 			break;
 		case 'f':
 			if (strcmp(optarg, "ipx") == 0)
 				af = AF_IPX;
 			else if (strcmp(optarg, "inet") == 0)
 				af = AF_INET;
 #ifdef INET6
 			else if (strcmp(optarg, "inet6") == 0)
 				af = AF_INET6;
 #endif
 #ifdef IPSEC
 			else if (strcmp(optarg, "pfkey") == 0)
 				af = PF_KEY;
 #endif
 			else if (strcmp(optarg, "unix") == 0)
 				af = AF_UNIX;
 			else if (strcmp(optarg, "atalk") == 0)
 				af = AF_APPLETALK;
 #ifdef NETGRAPH
 			else if (strcmp(optarg, "ng") == 0
 			    || strcmp(optarg, "netgraph") == 0)
 				af = AF_NETGRAPH;
 #endif
 			else if (strcmp(optarg, "link") == 0)
 				af = AF_LINK;
 			else {
 				errx(1, "%s: unknown address family", optarg);
 			}
 			break;
 		case 'g':
 			gflag = 1;
 			break;
 		case 'h':
 			hflag = 1;
 			break;
 		case 'I': {
 			char *cp;
 
 			iflag = 1;
 			for (cp = interface = optarg; isalpha(*cp); cp++)
 				continue;
 			unit = atoi(cp);
 			break;
 		}
 		case 'i':
 			iflag = 1;
 			break;
 		case 'L':
 			Lflag = 1;
 			break;
 		case 'M':
 			memf = optarg;
 			break;
 		case 'm':
 			mflag = 1;
 			break;
 		case 'N':
 			nlistf = optarg;
 			break;
 		case 'n':
 			numeric_addr = numeric_port = 1;
 			break;
 		case 'p':
 			if ((tp = name2protox(optarg)) == NULL) {
 				errx(1,
 				     "%s: unknown or uninstrumented protocol",
 				     optarg);
 			}
 			pflag = 1;
 			break;
 		case 'Q':
 			Qflag = 1;
 			break;
 		case 'q':
 			noutputs = atoi(optarg);
 			if (noutputs != 0)
 				noutputs++;
 			break;
 		case 'r':
 			rflag = 1;
 			break;
 		case 's':
 			++sflag;
 			break;
 		case 'S':
 			numeric_addr = 1;
 			break;
 		case 'u':
 			af = AF_UNIX;
 			break;
 		case 'W':
 		case 'l':
 			Wflag = 1;
 			break;
 		case 'w':
 			interval = atoi(optarg);
 			iflag = 1;
 			break;
 		case 'T':
 			Tflag = 1;
 			break;
 		case 'x':
 			xflag = 1;
 			break;
 		case 'z':
 			zflag = 1;
 			break;
 		case '?':
 		default:
 			usage();
 		}
 	argv += optind;
 	argc -= optind;
 
 #define	BACKWARD_COMPATIBILITY
 #ifdef	BACKWARD_COMPATIBILITY
 	if (*argv) {
 		if (isdigit(**argv)) {
 			interval = atoi(*argv);
 			if (interval <= 0)
 				usage();
 			++argv;
 			iflag = 1;
 		}
 		if (*argv) {
 			nlistf = *argv;
 			if (*++argv)
 				memf = *argv;
 		}
 	}
 #endif
 
 	/*
 	 * Discard setgid privileges if not the running kernel so that bad
 	 * guys can't print interesting stuff from kernel memory.
 	 */
 	live = (nlistf == NULL && memf == NULL);
 	if (!live)
 		setgid(getgid());
 
 	if (xflag && Tflag) 
 		errx(1, "-x and -T are incompatible, pick one.");
 
 	if (Bflag) {
 		if (!live)
 			usage();
 		bpf_stats(interface);
 		exit(0);
 	}
 	if (mflag) {
 		if (!live) {
 			if (kread(0, NULL, 0) == 0)
 				mbpr(kvmd, nl[N_SFSTAT].n_value);
 		} else
 			mbpr(NULL, 0);
 		exit(0);
 	}
 	if (Qflag) {
 		if (!live) {
 			if (kread(0, NULL, 0) == 0)
 				netisr_stats(kvmd);
 		} else
 			netisr_stats(NULL);
 		exit(0);
 	}
 #if 0
 	/*
 	 * Keep file descriptors open to avoid overhead
 	 * of open/close on each call to get* routines.
 	 */
 	sethostent(1);
 	setnetent(1);
 #else
 	/*
 	 * This does not make sense any more with DNS being default over
 	 * the files.  Doing a setXXXXent(1) causes a tcp connection to be
 	 * used for the queries, which is slower.
 	 */
 #endif
 	kread(0, NULL, 0);
 	if (iflag && !sflag) {
 		intpr(interval, NULL);
 		exit(0);
 	}
 	if (rflag) {
 		if (sflag) {
 			rt_stats(nl[N_RTSTAT].n_value, nl[N_RTTRASH].n_value);
 			flowtable_stats();
 		} else
 			routepr(nl[N_RTREE].n_value, fib);
 		exit(0);
 	}
 	if (gflag) {
 		if (sflag) {
 			if (af == AF_INET || af == AF_UNSPEC)
 				mrt_stats(nl[N_MRTSTAT].n_value);
 #ifdef INET6
 			if (af == AF_INET6 || af == AF_UNSPEC)
 				mrt6_stats(nl[N_MRT6STAT].n_value);
 #endif
 		} else {
 			if (af == AF_INET || af == AF_UNSPEC)
 				mroutepr(nl[N_MFCHASHTBL].n_value,
 					 nl[N_MFCTABLESIZE].n_value,
 					 nl[N_VIFTABLE].n_value);
 #ifdef INET6
 			if (af == AF_INET6 || af == AF_UNSPEC)
 				mroute6pr(nl[N_MF6CTABLE].n_value,
 					  nl[N_MIF6TABLE].n_value);
 #endif
 		}
 		exit(0);
 	}
 
 	if (tp) {
 		printproto(tp, tp->pr_name);
 		exit(0);
 	}
 	if (af == AF_INET || af == AF_UNSPEC)
 		for (tp = protox; tp->pr_name; tp++)
 			printproto(tp, tp->pr_name);
 #ifdef INET6
 	if (af == AF_INET6 || af == AF_UNSPEC)
 		for (tp = ip6protox; tp->pr_name; tp++)
 			printproto(tp, tp->pr_name);
 #endif /*INET6*/
 #ifdef IPSEC
 	if (af == PF_KEY || af == AF_UNSPEC)
 		for (tp = pfkeyprotox; tp->pr_name; tp++)
 			printproto(tp, tp->pr_name);
 #endif /*IPSEC*/
 #ifdef IPX
 	if (af == AF_IPX || af == AF_UNSPEC) {
 		for (tp = ipxprotox; tp->pr_name; tp++)
 			printproto(tp, tp->pr_name);
 	}
 #endif /* IPX */
 	if (af == AF_APPLETALK || af == AF_UNSPEC)
 		for (tp = atalkprotox; tp->pr_name; tp++)
 			printproto(tp, tp->pr_name);
 #ifdef NETGRAPH
 	if (af == AF_NETGRAPH || af == AF_UNSPEC)
 		for (tp = netgraphprotox; tp->pr_name; tp++)
 			printproto(tp, tp->pr_name);
 #endif /* NETGRAPH */
 	if ((af == AF_UNIX || af == AF_UNSPEC) && !sflag)
 		unixpr(nl[N_UNP_COUNT].n_value, nl[N_UNP_GENCNT].n_value,
 		    nl[N_UNP_DHEAD].n_value, nl[N_UNP_SHEAD].n_value,
 		    nl[N_UNP_SPHEAD].n_value);
 	exit(0);
 }
 
 /*
  * Print out protocol statistics or control blocks (per sflag).
  * If the interface was not specifically requested, and the symbol
  * is not in the namelist, ignore this one.
  */
 static void
 printproto(struct protox *tp, const char *name)
 {
 	void (*pr)(u_long, const char *, int, int);
 	u_long off;
 
 	if (sflag) {
 		if (iflag) {
 			if (tp->pr_istats)
 				intpr(interval, tp->pr_istats);
 			else if (pflag)
 				printf("%s: no per-interface stats routine\n",
 				    tp->pr_name);
 			return;
 		} else {
 			pr = tp->pr_stats;
 			if (!pr) {
 				if (pflag)
 					printf("%s: no stats routine\n",
 					    tp->pr_name);
 				return;
 			}
 			if (tp->pr_usesysctl && live)
 				off = 0;
 			else if (tp->pr_sindex < 0) {
 				if (pflag)
 					printf(
 				    "%s: stats routine doesn't work on cores\n",
 					    tp->pr_name);
 				return;
 			} else
 				off = nl[tp->pr_sindex].n_value;
 		}
 	} else {
 		pr = tp->pr_cblocks;
 		if (!pr) {
 			if (pflag)
 				printf("%s: no PCB routine\n", tp->pr_name);
 			return;
 		}
 		if (tp->pr_usesysctl && live)
 			off = 0;
 		else if (tp->pr_index < 0) {
 			if (pflag)
 				printf(
 				    "%s: PCB routine doesn't work on cores\n",
 				    tp->pr_name);
 			return;
 		} else
 			off = nl[tp->pr_index].n_value;
 	}
 	if (pr != NULL && (off || (live && tp->pr_usesysctl) ||
 	    af != AF_UNSPEC))
 		(*pr)(off, name, af, tp->pr_protocol);
 }
 
 static int
 kvmd_init(void)
 {
 	char errbuf[_POSIX2_LINE_MAX];
 
 	if (kvmd != NULL)
 		return (0);
 
 	kvmd = kvm_openfiles(nlistf, memf, NULL, O_RDONLY, errbuf);
 	setgid(getgid());
 
 	if (kvmd == NULL) {
 		warnx("kvm not available: %s", errbuf);
 		return (-1);
 	}
 
 	if (kvm_nlist(kvmd, nl) < 0) {
 		if (nlistf)
 			errx(1, "%s: kvm_nlist: %s", nlistf,
 			     kvm_geterr(kvmd));
 		else
 			errx(1, "kvm_nlist: %s", kvm_geterr(kvmd));
 	}
 
 	if (nl[0].n_type == 0) {
 		if (nlistf)
 			errx(1, "%s: no namelist", nlistf);
 		else
 			errx(1, "no namelist");
 	}
 
 	return (0);
 }
 
 /*
  * Read kernel memory, return 0 on success.
  */
 int
 kread(u_long addr, void *buf, size_t size)
 {
 
 	if (kvmd_init() < 0)
 		return (-1);
 
 	if (!buf)
 		return (0);
 	if (kvm_read(kvmd, addr, buf, size) != (ssize_t)size) {
 		warnx("%s", kvm_geterr(kvmd));
 		return (-1);
 	}
 	return (0);
 }
 
 /*
  * Read an array of N counters in kernel memory into array of N uint64_t's.
  */
 int
 kread_counters(u_long addr, void *buf, size_t size)
 {
 	uint64_t *c = buf;
 
 	if (kvmd_init() < 0)
 		return (-1);
 
 	if (kread(addr, buf, size) < 0)
 		return (-1);
 
 	while (size != 0) {
 		*c = kvm_counter_u64_fetch(kvmd, *c);
 		size -= sizeof(*c);
 		c++;
 	}
 	return (0);
 }
 
 const char *
 plural(uintmax_t n)
 {
 	return (n != 1 ? "s" : "");
 }
 
 const char *
 plurales(uintmax_t n)
 {
 	return (n != 1 ? "es" : "");
 }
 
 const char *
 pluralies(uintmax_t n)
 {
 	return (n != 1 ? "ies" : "y");
 }
 
 /*
  * Find the protox for the given "well-known" name.
  */
 static struct protox *
 knownname(const char *name)
 {
 	struct protox **tpp, *tp;
 
 	for (tpp = protoprotox; *tpp; tpp++)
 		for (tp = *tpp; tp->pr_name; tp++)
 			if (strcmp(tp->pr_name, name) == 0)
 				return (tp);
 	return (NULL);
 }
 
 /*
  * Find the protox corresponding to name.
  */
 static struct protox *
 name2protox(const char *name)
 {
 	struct protox *tp;
 	char **alias;			/* alias from p->aliases */
 	struct protoent *p;
 
 	/*
 	 * Try to find the name in the list of "well-known" names. If that
 	 * fails, check if name is an alias for an Internet protocol.
 	 */
 	if ((tp = knownname(name)) != NULL)
 		return (tp);
 
 	setprotoent(1);			/* make protocol lookup cheaper */
 	while ((p = getprotoent()) != NULL) {
 		/* assert: name not same as p->name */
 		for (alias = p->p_aliases; *alias; alias++)
 			if (strcmp(name, *alias) == 0) {
 				endprotoent();
 				return (knownname(p->p_name));
 			}
 	}
 	endprotoent();
 	return (NULL);
 }
 
 static void
 usage(void)
 {
 	(void)fprintf(stderr, "%s\n%s\n%s\n%s\n%s\n%s\n%s\n%s\n%s\n%s\n%s\n%s\n",
 "usage: netstat [-AaLnSTWx] [-f protocol_family | -p protocol]\n"
 "               [-M core] [-N system]",
 "       netstat -i | -I interface [-abdhnW] [-f address_family]\n"
 "               [-M core] [-N system]",
 "       netstat -w wait [-I interface] [-d] [-M core] [-N system] [-q howmany]",
 "       netstat -s [-s] [-z] [-f protocol_family | -p protocol]\n"
 "               [-M core] [-N system]",
 "       netstat -i | -I interface -s [-f protocol_family | -p protocol]\n"
 "               [-M core] [-N system]",
 "       netstat -m [-M core] [-N system]",
 "       netstat -B [-I interface]",
 "       netstat -r [-AanW] [-f address_family] [-M core] [-N system]",
 "       netstat -rs [-s] [-M core] [-N system]",
 "       netstat -g [-W] [-f address_family] [-M core] [-N system]",
 "       netstat -gs [-s] [-f address_family] [-M core] [-N system]",
 "       netstat -Q");
 	exit(1);
 }
Index: stable/10/usr.sbin/bsnmpd/modules/Makefile
===================================================================
--- stable/10/usr.sbin/bsnmpd/modules/Makefile	(revision 263085)
+++ stable/10/usr.sbin/bsnmpd/modules/Makefile	(revision 263086)
@@ -1,29 +1,32 @@
 # $FreeBSD$
 
 .include <bsd.own.mk>
 
 .PATH: ${.CURDIR}/../../../contrib/bsnmp/snmpd
 
 .if ${MK_ATM} != "no"
 _snmp_atm= snmp_atm
 .endif
 
 SUBDIR=	${_snmp_atm} \
 	snmp_bridge \
 	snmp_hast \
 	snmp_hostres \
 	snmp_mibII \
-	snmp_pf \
 	snmp_target \
 	snmp_usm \
 	snmp_vacm \
 	snmp_wlan
+
+.if ${MK_PF} != "no"
+SUBDIR+=snmp_pf
+.endif
 
 .if ${MK_NETGRAPH_SUPPORT} != "no"
 SUBDIR+=snmp_netgraph
 .endif
 
 INCS=	snmpmod.h
 INCSDIR= ${INCLUDEDIR}/bsnmp
 
 .include <bsd.prog.mk>
Index: stable/10/usr.sbin/tcpdump/tcpdump/Makefile
===================================================================
--- stable/10/usr.sbin/tcpdump/tcpdump/Makefile	(revision 263085)
+++ stable/10/usr.sbin/tcpdump/tcpdump/Makefile	(revision 263086)
@@ -1,183 +1,183 @@
 # $FreeBSD$
 
 .include <bsd.own.mk>
 
 TCPDUMP_DISTDIR?= ${.CURDIR}/../../../contrib/tcpdump
 .PATH: ${TCPDUMP_DISTDIR}
 
 PROG=	tcpdump
 
 SRCS=	addrtoname.c \
 	af.c \
 	checksum.c \
 	cpack.c \
 	gmpls.c \
 	gmt2local.c \
 	in_cksum.c \
 	ipproto.c \
 	l2vpn.c \
 	machdep.c \
 	nlpid.c \
 	oui.c \
 	parsenfsfh.c \
 	print-802_11.c \
 	print-802_15_4.c \
 	print-ah.c \
 	print-aodv.c \
 	print-ap1394.c \
 	print-arcnet.c \
 	print-arp.c \
 	print-ascii.c \
 	print-atalk.c \
 	print-atm.c \
 	print-beep.c \
 	print-bfd.c \
 	print-bgp.c \
 	print-bootp.c \
 	print-bt.c \
 	print-carp.c \
 	print-cdp.c \
 	print-cfm.c \
 	print-chdlc.c \
 	print-cip.c \
 	print-cnfp.c \
 	print-dccp.c \
 	print-decnet.c \
 	print-domain.c \
 	print-dtp.c \
 	print-dvmrp.c \
 	print-eap.c \
 	print-egp.c \
 	print-eigrp.c \
 	print-enc.c \
 	print-esp.c \
 	print-ether.c \
 	print-fddi.c \
 	print-forces.c \
 	print-fr.c \
 	print-gre.c \
 	print-hsrp.c \
 	print-icmp.c \
 	print-igmp.c \
 	print-igrp.c \
 	print-ip.c \
 	print-ipcomp.c \
 	print-ipfc.c \
 	print-ipnet.c \
 	print-ipx.c \
 	print-isakmp.c \
 	print-isoclns.c \
 	print-juniper.c \
 	print-krb.c \
 	print-l2tp.c \
 	print-lane.c \
 	print-ldp.c \
 	print-llc.c \
 	print-lldp.c \
 	print-lmp.c \
 	print-lspping.c \
 	print-lwapp.c \
 	print-lwres.c \
 	print-mobile.c \
 	print-mpcp.c \
 	print-mpls.c \
 	print-msdp.c \
 	print-msnlb.c \
 	print-nfs.c \
 	print-ntp.c \
 	print-null.c \
 	print-olsr.c \
 	print-ospf.c \
 	print-otv.c \
-	print-pfsync.c \
 	print-pgm.c \
 	print-pim.c \
 	print-ppi.c \
 	print-ppp.c \
 	print-pppoe.c \
 	print-pptp.c \
 	print-radius.c \
 	print-raw.c \
 	print-rip.c \
 	print-rpki-rtr.c \
 	print-rrcp.c \
 	print-rsvp.c \
 	print-rx.c \
 	print-sctp.c \
 	print-sflow.c \
 	print-sip.c \
 	print-sl.c \
 	print-sll.c \
 	print-slow.c \
 	print-smb.c \
 	print-snmp.c \
 	print-stp.c \
 	print-sunatm.c \
 	print-sunrpc.c \
 	print-symantec.c \
 	print-syslog.c \
 	print-tcp.c \
 	print-telnet.c \
 	print-tftp.c \
 	print-timed.c \
 	print-tipc.c \
 	print-token.c \
 	print-udld.c \
 	print-udp.c \
 	print-vjc.c \
 	print-vqp.c \
 	print-vrrp.c \
 	print-vtp.c \
 	print-vxlan.c \
 	print-wb.c \
 	print-zephyr.c \
 	print-zeromq.c \
 	setsignal.c \
 	signature.c \
 	smbutil.c \
 	tcpdump.c \
 	util.c \
 	version.c
 CLEANFILES+=	version.c
 
 CFLAGS+= -I${.CURDIR} -I${TCPDUMP_DISTDIR}
 CFLAGS+= -DHAVE_CONFIG_H
 CFLAGS+= -D_U_="__attribute__((unused))"
 
 .if ${MK_INET6_SUPPORT} != "no"
 SRCS+=	print-babel.c \
 	print-dhcp6.c \
 	print-frag6.c \
 	print-icmp6.c \
 	print-ip6.c \
 	print-ip6opts.c \
 	print-mobility.c \
 	print-ospf6.c \
 	print-ripng.c \
 	print-rt6.c
 CFLAGS+=	-DINET6
 .endif
 .if ${MACHINE_CPUARCH} != "i386"
 CFLAGS+=	-DLBL_ALIGN
 .endif
 
 DPADD=	${LIBL} ${LIBPCAP}
 LDADD=	-ll -lpcap
 .if ${MK_OPENSSL} != "no" && !defined(RELEASE_CRUNCH)
 DPADD+= ${LIBCRYPTO}
 LDADD+= -lcrypto
 CFLAGS+= -I${DESTDIR}/usr/include/openssl
 CFLAGS+= -DHAVE_LIBCRYPTO -DHAVE_OPENSSL_EVP_H
 .endif
 
 .if ${MK_PF} != "no"
-SRCS+=	print-pflog.c
+SRCS+=	print-pflog.c \
+	print-pfsync.c
 CFLAGS+= -DHAVE_NET_PFVAR_H
 .endif
 
 version.c: ${TCPDUMP_DISTDIR}/VERSION
 	rm -f version.c ; \
 	    sed 's/.*/char version[] = "&";/' ${TCPDUMP_DISTDIR}/VERSION \
 		> version.c
 
 .include <bsd.prog.mk>
Index: stable/10
===================================================================
--- stable/10	(revision 263085)
+++ stable/10	(revision 263086)

Property changes on: stable/10
___________________________________________________________________
Modified: svn:mergeinfo
## -0,0 +0,1 ##
   Merged /head:r257186,257215,257349,259736,261797