Index: head/sys/contrib/ipfilter/netinet/fil.c
===================================================================
--- head/sys/contrib/ipfilter/netinet/fil.c	(revision 305823)
+++ head/sys/contrib/ipfilter/netinet/fil.c	(revision 305824)
@@ -1,10285 +1,10285 @@
 /*	$FreeBSD$	*/
 
 /*
  * Copyright (C) 2012 by Darren Reed.
  *
  * See the IPFILTER.LICENCE file for details on licencing.
  *
  * Copyright 2008 Sun Microsystems.
  *
  * $Id$
  *
  */
 #if defined(KERNEL) || defined(_KERNEL)
 # undef KERNEL
 # undef _KERNEL
 # define        KERNEL	1
 # define        _KERNEL	1
 #endif
 #include <sys/errno.h>
 #include <sys/types.h>
 #include <sys/param.h>
 #include <sys/time.h>
 #if defined(_KERNEL) && defined(__FreeBSD_version) && \
     (__FreeBSD_version >= 220000)
 # if (__FreeBSD_version >= 400000)
 #  if !defined(IPFILTER_LKM)
 #   include "opt_inet6.h"
 #  endif
 #  if (__FreeBSD_version == 400019)
 #   define CSUM_DELAY_DATA
 #  endif
 # endif
 # include <sys/filio.h>
 #else
 # include <sys/ioctl.h>
 #endif
 #if (defined(__SVR4) || defined(__svr4__)) && defined(sun)
 # include <sys/filio.h>
 #endif
 #if !defined(_AIX51)
 # include <sys/fcntl.h>
 #endif
 #if defined(_KERNEL)
 # include <sys/systm.h>
 # include <sys/file.h>
 #else
 # include <stdio.h>
 # include <string.h>
 # include <stdlib.h>
 # include <stddef.h>
 # include <sys/file.h>
 # define _KERNEL
 # ifdef __OpenBSD__
 struct file;
 # endif
 # include <sys/uio.h>
 # undef _KERNEL
 #endif
 #if !defined(__SVR4) && !defined(__svr4__) && !defined(__hpux) && \
     !defined(linux)
 # include <sys/mbuf.h>
 #else
 # if !defined(linux)
 #  include <sys/byteorder.h>
 # endif
 # if (SOLARIS2 < 5) && defined(sun)
 #  include <sys/dditypes.h>
 # endif
 #endif
 #ifdef __hpux
 # define _NET_ROUTE_INCLUDED
 #endif
 #if !defined(linux)
 # include <sys/protosw.h>
 #endif
 #include <sys/socket.h>
 #include <net/if.h>
 #ifdef sun
 # include <net/af.h>
 #endif
 #include <netinet/in.h>
 #include <netinet/in_systm.h>
 #include <netinet/ip.h>
 #if defined(__sgi) && defined(IFF_DRVRLOCK) /* IRIX 6 */
 # include <sys/hashing.h>
 # include <netinet/in_var.h>
 #endif
 #include <netinet/tcp.h>
 #if (!defined(__sgi) && !defined(AIX)) || defined(_KERNEL)
 # include <netinet/udp.h>
 # include <netinet/ip_icmp.h>
 #endif
 #ifdef __hpux
 # undef _NET_ROUTE_INCLUDED
 #endif
 #ifdef __osf__
 # undef _RADIX_H_
 #endif
 #include "netinet/ip_compat.h"
 #ifdef	USE_INET6
 # include <netinet/icmp6.h>
 # if !SOLARIS && defined(_KERNEL) && !defined(__osf__) && !defined(__hpux)
 #  include <netinet6/in6_var.h>
 # endif
 #endif
 #include "netinet/ip_fil.h"
 #include "netinet/ip_nat.h"
 #include "netinet/ip_frag.h"
 #include "netinet/ip_state.h"
 #include "netinet/ip_proxy.h"
 #include "netinet/ip_auth.h"
 #ifdef IPFILTER_SCAN
 # include "netinet/ip_scan.h"
 #endif
 #include "netinet/ip_sync.h"
 #include "netinet/ip_lookup.h"
 #include "netinet/ip_pool.h"
 #include "netinet/ip_htable.h"
 #ifdef IPFILTER_COMPILED
 # include "netinet/ip_rules.h"
 #endif
 #if defined(IPFILTER_BPF) && defined(_KERNEL)
 # include <net/bpf.h>
 #endif
 #if defined(__FreeBSD_version) && (__FreeBSD_version >= 300000)
 # include <sys/malloc.h>
 #endif
 #include "netinet/ipl.h"
 
 #if defined(__NetBSD__) && (__NetBSD_Version__ >= 104230000)
 # include <sys/callout.h>
 extern struct callout ipf_slowtimer_ch;
 #endif
 #if defined(__OpenBSD__)
 # include <sys/timeout.h>
 extern struct timeout ipf_slowtimer_ch;
 #endif
 /* END OF INCLUDES */
 
 #if !defined(lint)
 static const char sccsid[] = "@(#)fil.c	1.36 6/5/96 (C) 1993-2000 Darren Reed";
 static const char rcsid[] = "@(#)$FreeBSD$";
 /* static const char rcsid[] = "@(#)$Id: fil.c,v 2.243.2.125 2007/10/10 09:27:20 darrenr Exp $"; */
 #endif
 
 #ifndef	_KERNEL
 # include "ipf.h"
 # include "ipt.h"
 extern	int	opts;
 extern	int	blockreason;
 #endif /* _KERNEL */
 
 #define	LBUMP(x)	softc->x++
 #define	LBUMPD(x, y)	do { softc->x.y++; DT(y); } while (0)
 
 static	INLINE int	ipf_check_ipf __P((fr_info_t *, frentry_t *, int));
 static	u_32_t		ipf_checkcipso __P((fr_info_t *, u_char *, int));
 static	u_32_t		ipf_checkripso __P((u_char *));
 static	u_32_t		ipf_decaps __P((fr_info_t *, u_32_t, int));
 #ifdef IPFILTER_LOG
 static	frentry_t	*ipf_dolog __P((fr_info_t *, u_32_t *));
 #endif
 static	int		ipf_flushlist __P((ipf_main_softc_t *, int *,
 					   frentry_t **));
 static	int		ipf_flush_groups __P((ipf_main_softc_t *, frgroup_t **,
 					      int));
 static	ipfunc_t	ipf_findfunc __P((ipfunc_t));
 static	void		*ipf_findlookup __P((ipf_main_softc_t *, int,
 					     frentry_t *,
 					     i6addr_t *, i6addr_t *));
 static	frentry_t	*ipf_firewall __P((fr_info_t *, u_32_t *));
 static	int		ipf_fr_matcharray __P((fr_info_t *, int *));
 static	int		ipf_frruleiter __P((ipf_main_softc_t *, void *, int,
 					    void *));
 static	void		ipf_funcfini __P((ipf_main_softc_t *, frentry_t *));
 static	int		ipf_funcinit __P((ipf_main_softc_t *, frentry_t *));
 static	int		ipf_geniter __P((ipf_main_softc_t *, ipftoken_t *,
 					 ipfgeniter_t *));
 static	void		ipf_getstat __P((ipf_main_softc_t *,
 					 struct friostat *, int));
 static	int		ipf_group_flush __P((ipf_main_softc_t *, frgroup_t *));
 static	void		ipf_group_free __P((frgroup_t *));
 static	int		ipf_grpmapfini __P((struct ipf_main_softc_s *,
 					    frentry_t *));
 static	int		ipf_grpmapinit __P((struct ipf_main_softc_s *,
 					    frentry_t *));
 static	frentry_t	*ipf_nextrule __P((ipf_main_softc_t *, int, int,
 					   frentry_t *, int));
 static	int		ipf_portcheck __P((frpcmp_t *, u_32_t));
 static	INLINE int	ipf_pr_ah __P((fr_info_t *));
 static	INLINE void	ipf_pr_esp __P((fr_info_t *));
 static	INLINE void	ipf_pr_gre __P((fr_info_t *));
 static	INLINE void	ipf_pr_udp __P((fr_info_t *));
 static	INLINE void	ipf_pr_tcp __P((fr_info_t *));
 static	INLINE void	ipf_pr_icmp __P((fr_info_t *));
 static	INLINE void	ipf_pr_ipv4hdr __P((fr_info_t *));
 static	INLINE void	ipf_pr_short __P((fr_info_t *, int));
 static	INLINE int	ipf_pr_tcpcommon __P((fr_info_t *));
 static	INLINE int	ipf_pr_udpcommon __P((fr_info_t *));
 static	void		ipf_rule_delete __P((ipf_main_softc_t *, frentry_t *f,
 					     int, int));
 static	void		ipf_rule_expire_insert __P((ipf_main_softc_t *,
 						    frentry_t *, int));
 static	int		ipf_synclist __P((ipf_main_softc_t *, frentry_t *,
 					  void *));
 static	void		ipf_token_flush __P((ipf_main_softc_t *));
 static	void		ipf_token_unlink __P((ipf_main_softc_t *,
 					      ipftoken_t *));
 static	ipftuneable_t	*ipf_tune_findbyname __P((ipftuneable_t *,
 						  const char *));
 static	ipftuneable_t	*ipf_tune_findbycookie __P((ipftuneable_t **, void *,
 						    void **));
 static	int		ipf_updateipid __P((fr_info_t *));
 static	int		ipf_settimeout __P((struct ipf_main_softc_s *,
 					    struct ipftuneable *,
 					    ipftuneval_t *));
 #if !defined(_KERNEL) || (!defined(__NetBSD__) && !defined(__OpenBSD__) && \
      !defined(__FreeBSD__)) || \
     FREEBSD_LT_REV(501000) || NETBSD_LT_REV(105000000) || \
     OPENBSD_LT_REV(200006)
 static	int		ppsratecheck(struct timeval *, int *, int);
 #endif
 
 
 /*
  * bit values for identifying presence of individual IP options
  * All of these tables should be ordered by increasing key value on the left
  * hand side to allow for binary searching of the array and include a trailer
  * with a 0 for the bitmask for linear searches to easily find the end with.
  */
 static const	struct	optlist	ipopts[20] = {
 	{ IPOPT_NOP,	0x000001 },
 	{ IPOPT_RR,	0x000002 },
 	{ IPOPT_ZSU,	0x000004 },
 	{ IPOPT_MTUP,	0x000008 },
 	{ IPOPT_MTUR,	0x000010 },
 	{ IPOPT_ENCODE,	0x000020 },
 	{ IPOPT_TS,	0x000040 },
 	{ IPOPT_TR,	0x000080 },
 	{ IPOPT_SECURITY, 0x000100 },
 	{ IPOPT_LSRR,	0x000200 },
 	{ IPOPT_E_SEC,	0x000400 },
 	{ IPOPT_CIPSO,	0x000800 },
 	{ IPOPT_SATID,	0x001000 },
 	{ IPOPT_SSRR,	0x002000 },
 	{ IPOPT_ADDEXT,	0x004000 },
 	{ IPOPT_VISA,	0x008000 },
 	{ IPOPT_IMITD,	0x010000 },
 	{ IPOPT_EIP,	0x020000 },
 	{ IPOPT_FINN,	0x040000 },
 	{ 0,		0x000000 }
 };
 
 #ifdef USE_INET6
 static const struct optlist ip6exthdr[] = {
 	{ IPPROTO_HOPOPTS,		0x000001 },
 	{ IPPROTO_IPV6,			0x000002 },
 	{ IPPROTO_ROUTING,		0x000004 },
 	{ IPPROTO_FRAGMENT,		0x000008 },
 	{ IPPROTO_ESP,			0x000010 },
 	{ IPPROTO_AH,			0x000020 },
 	{ IPPROTO_NONE,			0x000040 },
 	{ IPPROTO_DSTOPTS,		0x000080 },
 	{ IPPROTO_MOBILITY,		0x000100 },
 	{ 0,				0 }
 };
 #endif
 
 /*
  * bit values for identifying presence of individual IP security options
  */
 static const	struct	optlist	secopt[8] = {
 	{ IPSO_CLASS_RES4,	0x01 },
 	{ IPSO_CLASS_TOPS,	0x02 },
 	{ IPSO_CLASS_SECR,	0x04 },
 	{ IPSO_CLASS_RES3,	0x08 },
 	{ IPSO_CLASS_CONF,	0x10 },
 	{ IPSO_CLASS_UNCL,	0x20 },
 	{ IPSO_CLASS_RES2,	0x40 },
 	{ IPSO_CLASS_RES1,	0x80 }
 };
 
 char	ipfilter_version[] = IPL_VERSION;
 
 int	ipf_features = 0
 #ifdef	IPFILTER_LKM
 		| IPF_FEAT_LKM
 #endif
 #ifdef	IPFILTER_LOG
 		| IPF_FEAT_LOG
 #endif
 		| IPF_FEAT_LOOKUP
 #ifdef	IPFILTER_BPF
 		| IPF_FEAT_BPF
 #endif
 #ifdef	IPFILTER_COMPILED
 		| IPF_FEAT_COMPILED
 #endif
 #ifdef	IPFILTER_CKSUM
 		| IPF_FEAT_CKSUM
 #endif
 		| IPF_FEAT_SYNC
 #ifdef	IPFILTER_SCAN
 		| IPF_FEAT_SCAN
 #endif
 #ifdef	USE_INET6
 		| IPF_FEAT_IPV6
 #endif
 	;
 
 
 /*
  * Table of functions available for use with call rules.
  */
 static ipfunc_resolve_t ipf_availfuncs[] = {
 	{ "srcgrpmap", ipf_srcgrpmap, ipf_grpmapinit, ipf_grpmapfini },
 	{ "dstgrpmap", ipf_dstgrpmap, ipf_grpmapinit, ipf_grpmapfini },
 	{ "",	      NULL,	      NULL,	      NULL }
 };
 
 static ipftuneable_t ipf_main_tuneables[] = {
 	{ { (void *)offsetof(struct ipf_main_softc_s, ipf_flags) },
 		"ipf_flags",		0,	0xffffffff,
 		stsizeof(ipf_main_softc_t, ipf_flags),
 		0,			NULL,	NULL },
 	{ { (void *)offsetof(struct ipf_main_softc_s, ipf_active) },
 		"active",		0,	0,
 		stsizeof(ipf_main_softc_t, ipf_active),
 		IPFT_RDONLY,		NULL,	NULL },
 	{ { (void *)offsetof(ipf_main_softc_t, ipf_control_forwarding) },
 		"control_forwarding",	0, 1,
 		stsizeof(ipf_main_softc_t, ipf_control_forwarding),
 		0,			NULL,	NULL },
 	{ { (void *)offsetof(ipf_main_softc_t, ipf_update_ipid) },
 		"update_ipid",		0,	1,
 		stsizeof(ipf_main_softc_t, ipf_update_ipid),
 		0,			NULL,	NULL },
 	{ { (void *)offsetof(ipf_main_softc_t, ipf_chksrc) },
 		"chksrc",		0,	1,
 		stsizeof(ipf_main_softc_t, ipf_chksrc),
 		0,			NULL,	NULL },
 	{ { (void *)offsetof(ipf_main_softc_t, ipf_minttl) },
 		"min_ttl",		0,	1,
 		stsizeof(ipf_main_softc_t, ipf_minttl),
 		0,			NULL,	NULL },
 	{ { (void *)offsetof(ipf_main_softc_t, ipf_icmpminfragmtu) },
 		"icmp_minfragmtu",	0,	1,
 		stsizeof(ipf_main_softc_t, ipf_icmpminfragmtu),
 		0,			NULL,	NULL },
 	{ { (void *)offsetof(ipf_main_softc_t, ipf_pass) },
 		"default_pass",		0,	0xffffffff,
 		stsizeof(ipf_main_softc_t, ipf_pass),
 		0,			NULL,	NULL },
 	{ { (void *)offsetof(ipf_main_softc_t, ipf_tcpidletimeout) },
 		"tcp_idle_timeout",	1,	0x7fffffff,
 		stsizeof(ipf_main_softc_t, ipf_tcpidletimeout),
 		0,			NULL,	ipf_settimeout },
 	{ { (void *)offsetof(ipf_main_softc_t, ipf_tcpclosewait) },
 		"tcp_close_wait",	1,	0x7fffffff,
 		stsizeof(ipf_main_softc_t, ipf_tcpclosewait),
 		0,			NULL,	ipf_settimeout },
 	{ { (void *)offsetof(ipf_main_softc_t, ipf_tcplastack) },
 		"tcp_last_ack",		1,	0x7fffffff,
 		stsizeof(ipf_main_softc_t, ipf_tcplastack),
 		0,			NULL,	ipf_settimeout },
 	{ { (void *)offsetof(ipf_main_softc_t, ipf_tcptimeout) },
 		"tcp_timeout",		1,	0x7fffffff,
 		stsizeof(ipf_main_softc_t, ipf_tcptimeout),
 		0,			NULL,	ipf_settimeout },
 	{ { (void *)offsetof(ipf_main_softc_t, ipf_tcpsynsent) },
 		"tcp_syn_sent",		1,	0x7fffffff,
 		stsizeof(ipf_main_softc_t, ipf_tcpsynsent),
 		0,			NULL,	ipf_settimeout },
 	{ { (void *)offsetof(ipf_main_softc_t, ipf_tcpsynrecv) },
 		"tcp_syn_received",	1,	0x7fffffff,
 		stsizeof(ipf_main_softc_t, ipf_tcpsynrecv),
 		0,			NULL,	ipf_settimeout },
 	{ { (void *)offsetof(ipf_main_softc_t, ipf_tcpclosed) },
 		"tcp_closed",		1,	0x7fffffff,
 		stsizeof(ipf_main_softc_t, ipf_tcpclosed),
 		0,			NULL,	ipf_settimeout },
 	{ { (void *)offsetof(ipf_main_softc_t, ipf_tcphalfclosed) },
 		"tcp_half_closed",	1,	0x7fffffff,
 		stsizeof(ipf_main_softc_t, ipf_tcphalfclosed),
 		0,			NULL,	ipf_settimeout },
 	{ { (void *)offsetof(ipf_main_softc_t, ipf_tcptimewait) },
 		"tcp_time_wait",	1,	0x7fffffff,
 		stsizeof(ipf_main_softc_t, ipf_tcptimewait),
 		0,			NULL,	ipf_settimeout },
 	{ { (void *)offsetof(ipf_main_softc_t, ipf_udptimeout) },
 		"udp_timeout",		1,	0x7fffffff,
 		stsizeof(ipf_main_softc_t, ipf_udptimeout),
 		0,			NULL,	ipf_settimeout },
 	{ { (void *)offsetof(ipf_main_softc_t, ipf_udpacktimeout) },
 		"udp_ack_timeout",	1,	0x7fffffff,
 		stsizeof(ipf_main_softc_t, ipf_udpacktimeout),
 		0,			NULL,	ipf_settimeout },
 	{ { (void *)offsetof(ipf_main_softc_t, ipf_icmptimeout) },
 		"icmp_timeout",		1,	0x7fffffff,
 		stsizeof(ipf_main_softc_t, ipf_icmptimeout),
 		0,			NULL,	ipf_settimeout },
 	{ { (void *)offsetof(ipf_main_softc_t, ipf_icmpacktimeout) },
 		"icmp_ack_timeout",	1,	0x7fffffff,
 		stsizeof(ipf_main_softc_t, ipf_icmpacktimeout),
 		0,			NULL,	ipf_settimeout },
 	{ { (void *)offsetof(ipf_main_softc_t, ipf_iptimeout) },
 		"ip_timeout",		1,	0x7fffffff,
 		stsizeof(ipf_main_softc_t, ipf_iptimeout),
 		0,			NULL,	ipf_settimeout },
 #if defined(INSTANCES) && defined(_KERNEL)
 	{ { (void *)offsetof(ipf_main_softc_t, ipf_get_loopback) },
 		"intercept_loopback",	0,	1,
 		stsizeof(ipf_main_softc_t, ipf_get_loopback),
 		0,			NULL,	ipf_set_loopback },
 #endif
 	{ { 0 },
 		NULL,			0,	0,
 		0,
 		0,			NULL,	NULL }
 };
 
 
 /*
  * The next section of code is a a collection of small routines that set
  * fields in the fr_info_t structure passed based on properties of the
  * current packet.  There are different routines for the same protocol
  * for each of IPv4 and IPv6.  Adding a new protocol, for which there
  * will "special" inspection for setup, is now more easily done by adding
  * a new routine and expanding the ipf_pr_ipinit*() function rather than by
  * adding more code to a growing switch statement.
  */
 #ifdef USE_INET6
 static	INLINE int	ipf_pr_ah6 __P((fr_info_t *));
 static	INLINE void	ipf_pr_esp6 __P((fr_info_t *));
 static	INLINE void	ipf_pr_gre6 __P((fr_info_t *));
 static	INLINE void	ipf_pr_udp6 __P((fr_info_t *));
 static	INLINE void	ipf_pr_tcp6 __P((fr_info_t *));
 static	INLINE void	ipf_pr_icmp6 __P((fr_info_t *));
 static	INLINE void	ipf_pr_ipv6hdr __P((fr_info_t *));
 static	INLINE void	ipf_pr_short6 __P((fr_info_t *, int));
 static	INLINE int	ipf_pr_hopopts6 __P((fr_info_t *));
 static	INLINE int	ipf_pr_mobility6 __P((fr_info_t *));
 static	INLINE int	ipf_pr_routing6 __P((fr_info_t *));
 static	INLINE int	ipf_pr_dstopts6 __P((fr_info_t *));
 static	INLINE int	ipf_pr_fragment6 __P((fr_info_t *));
 static	INLINE struct ip6_ext *ipf_pr_ipv6exthdr __P((fr_info_t *, int, int));
 
 
 /* ------------------------------------------------------------------------ */
 /* Function:    ipf_pr_short6                                               */
 /* Returns:     void                                                        */
 /* Parameters:  fin(I)  - pointer to packet information                     */
 /*              xmin(I) - minimum header size                               */
 /*                                                                          */
 /* IPv6 Only                                                                */
 /* This is function enforces the 'is a packet too short to be legit' rule   */
 /* for IPv6 and marks the packet with FI_SHORT if so.  See function comment */
 /* for ipf_pr_short() for more details.                                     */
 /* ------------------------------------------------------------------------ */
 static INLINE void
 ipf_pr_short6(fin, xmin)
 	fr_info_t *fin;
 	int xmin;
 {
 
 	if (fin->fin_dlen < xmin)
 		fin->fin_flx |= FI_SHORT;
 }
 
 
 /* ------------------------------------------------------------------------ */
 /* Function:    ipf_pr_ipv6hdr                                              */
 /* Returns:     void                                                        */
 /* Parameters:  fin(I) - pointer to packet information                      */
 /*                                                                          */
 /* IPv6 Only                                                                */
 /* Copy values from the IPv6 header into the fr_info_t struct and call the  */
 /* per-protocol analyzer if it exists.  In validating the packet, a protocol*/
 /* analyzer may pullup or free the packet itself so we need to be vigiliant */
 /* of that possibility arising.                                             */
 /* ------------------------------------------------------------------------ */
 static INLINE void
 ipf_pr_ipv6hdr(fin)
 	fr_info_t *fin;
 {
 	ip6_t *ip6 = (ip6_t *)fin->fin_ip;
 	int p, go = 1, i, hdrcount;
 	fr_ip_t *fi = &fin->fin_fi;
 
 	fin->fin_off = 0;
 
 	fi->fi_tos = 0;
 	fi->fi_optmsk = 0;
 	fi->fi_secmsk = 0;
 	fi->fi_auth = 0;
 
 	p = ip6->ip6_nxt;
 	fin->fin_crc = p;
 	fi->fi_ttl = ip6->ip6_hlim;
 	fi->fi_src.in6 = ip6->ip6_src;
 	fin->fin_crc += fi->fi_src.i6[0];
 	fin->fin_crc += fi->fi_src.i6[1];
 	fin->fin_crc += fi->fi_src.i6[2];
 	fin->fin_crc += fi->fi_src.i6[3];
 	fi->fi_dst.in6 = ip6->ip6_dst;
 	fin->fin_crc += fi->fi_dst.i6[0];
 	fin->fin_crc += fi->fi_dst.i6[1];
 	fin->fin_crc += fi->fi_dst.i6[2];
 	fin->fin_crc += fi->fi_dst.i6[3];
 	fin->fin_id = 0;
 	if (IN6_IS_ADDR_MULTICAST(&fi->fi_dst.in6))
 		fin->fin_flx |= FI_MULTICAST|FI_MBCAST;
 
 	hdrcount = 0;
 	while (go && !(fin->fin_flx & FI_SHORT)) {
 		switch (p)
 		{
 		case IPPROTO_UDP :
 			ipf_pr_udp6(fin);
 			go = 0;
 			break;
 
 		case IPPROTO_TCP :
 			ipf_pr_tcp6(fin);
 			go = 0;
 			break;
 
 		case IPPROTO_ICMPV6 :
 			ipf_pr_icmp6(fin);
 			go = 0;
 			break;
 
 		case IPPROTO_GRE :
 			ipf_pr_gre6(fin);
 			go = 0;
 			break;
 
 		case IPPROTO_HOPOPTS :
 			p = ipf_pr_hopopts6(fin);
 			break;
 
 		case IPPROTO_MOBILITY :
 			p = ipf_pr_mobility6(fin);
 			break;
 
 		case IPPROTO_DSTOPTS :
 			p = ipf_pr_dstopts6(fin);
 			break;
 
 		case IPPROTO_ROUTING :
 			p = ipf_pr_routing6(fin);
 			break;
 
 		case IPPROTO_AH :
 			p = ipf_pr_ah6(fin);
 			break;
 
 		case IPPROTO_ESP :
 			ipf_pr_esp6(fin);
 			go = 0;
 			break;
 
 		case IPPROTO_IPV6 :
 			for (i = 0; ip6exthdr[i].ol_bit != 0; i++)
 				if (ip6exthdr[i].ol_val == p) {
 					fin->fin_flx |= ip6exthdr[i].ol_bit;
 					break;
 				}
 			go = 0;
 			break;
 
 		case IPPROTO_NONE :
 			go = 0;
 			break;
 
 		case IPPROTO_FRAGMENT :
 			p = ipf_pr_fragment6(fin);
 			/*
 			 * Given that the only fragments we want to let through
 			 * (where fin_off != 0) are those where the non-first
 			 * fragments only have data, we can safely stop looking
 			 * at headers if this is a non-leading fragment.
 			 */
 			if (fin->fin_off != 0)
 				go = 0;
 			break;
 
 		default :
 			go = 0;
 			break;
 		}
 		hdrcount++;
 
 		/*
 		 * It is important to note that at this point, for the
 		 * extension headers (go != 0), the entire header may not have
 		 * been pulled up when the code gets to this point.  This is
 		 * only done for "go != 0" because the other header handlers
 		 * will all pullup their complete header.  The other indicator
 		 * of an incomplete packet is that this was just an extension
 		 * header.
 		 */
 		if ((go != 0) && (p != IPPROTO_NONE) &&
 		    (ipf_pr_pullup(fin, 0) == -1)) {
 			p = IPPROTO_NONE;
 			break;
 		}
 	}
 
 	/*
 	 * Some of the above functions, like ipf_pr_esp6(), can call ipf_pullup
 	 * and destroy whatever packet was here.  The caller of this function
 	 * expects us to return if there is a problem with ipf_pullup.
 	 */
 	if (fin->fin_m == NULL) {
 		ipf_main_softc_t *softc = fin->fin_main_soft;
 
 		LBUMPD(ipf_stats[fin->fin_out], fr_v6_bad);
 		return;
 	}
 
 	fi->fi_p = p;
 
 	/*
 	 * IPv6 fragment case 1 - see comment for ipf_pr_fragment6().
 	 * "go != 0" imples the above loop hasn't arrived at a layer 4 header.
 	 */
 	if ((go != 0) && (fin->fin_flx & FI_FRAG) && (fin->fin_off == 0)) {
 		ipf_main_softc_t *softc = fin->fin_main_soft;
 
 		fin->fin_flx |= FI_BAD;
 		DT2(ipf_fi_bad_ipv6_frag_1, fr_info_t *, fin, int, go);
 		LBUMPD(ipf_stats[fin->fin_out], fr_v6_badfrag);
 		LBUMP(ipf_stats[fin->fin_out].fr_v6_bad);
 	}
 }
 
 
 /* ------------------------------------------------------------------------ */
 /* Function:    ipf_pr_ipv6exthdr                                           */
 /* Returns:     struct ip6_ext * - pointer to the start of the next header  */
 /*                                 or NULL if there is a prolblem.          */
 /* Parameters:  fin(I)      - pointer to packet information                 */
 /*              multiple(I) - flag indicating yes/no if multiple occurances */
 /*                            of this extension header are allowed.         */
 /*              proto(I)    - protocol number for this extension header     */
 /*                                                                          */
 /* IPv6 Only                                                                */
 /* This function embodies a number of common checks that all IPv6 extension */
 /* headers must be subjected to.  For example, making sure the packet is    */
 /* big enough for it to be in, checking if it is repeated and setting a     */
 /* flag to indicate its presence.                                           */
 /* ------------------------------------------------------------------------ */
 static INLINE struct ip6_ext *
 ipf_pr_ipv6exthdr(fin, multiple, proto)
 	fr_info_t *fin;
 	int multiple, proto;
 {
 	ipf_main_softc_t *softc = fin->fin_main_soft;
 	struct ip6_ext *hdr;
 	u_short shift;
 	int i;
 
 	fin->fin_flx |= FI_V6EXTHDR;
 
 				/* 8 is default length of extension hdr */
 	if ((fin->fin_dlen - 8) < 0) {
 		fin->fin_flx |= FI_SHORT;
 		LBUMPD(ipf_stats[fin->fin_out], fr_v6_ext_short);
 		return NULL;
 	}
 
 	if (ipf_pr_pullup(fin, 8) == -1) {
 		LBUMPD(ipf_stats[fin->fin_out], fr_v6_ext_pullup);
 		return NULL;
 	}
 
 	hdr = fin->fin_dp;
 	switch (proto)
 	{
 	case IPPROTO_FRAGMENT :
 		shift = 8;
 		break;
 	default :
 		shift = 8 + (hdr->ip6e_len << 3);
 		break;
 	}
 
 	if (shift > fin->fin_dlen) {	/* Nasty extension header length? */
 		fin->fin_flx |= FI_BAD;
 		DT3(ipf_fi_bad_pr_ipv6exthdr_len, fr_info_t *, fin, u_short, shift, u_short, fin->fin_dlen);
 		LBUMPD(ipf_stats[fin->fin_out], fr_v6_ext_hlen);
 		return NULL;
 	}
 
 	fin->fin_dp = (char *)fin->fin_dp + shift;
 	fin->fin_dlen -= shift;
 
 	/*
 	 * If we have seen a fragment header, do not set any flags to indicate
 	 * the presence of this extension header as it has no impact on the
 	 * end result until after it has been defragmented.
 	 */
 	if (fin->fin_flx & FI_FRAG)
 		return hdr;
 
 	for (i = 0; ip6exthdr[i].ol_bit != 0; i++)
 		if (ip6exthdr[i].ol_val == proto) {
 			/*
 			 * Most IPv6 extension headers are only allowed once.
 			 */
 			if ((multiple == 0) &&
 			    ((fin->fin_optmsk & ip6exthdr[i].ol_bit) != 0)) {
 				fin->fin_flx |= FI_BAD;
 				DT2(ipf_fi_bad_ipv6exthdr_once, fr_info_t *, fin, u_int, (fin->fin_optmsk & ip6exthdr[i].ol_bit));
 			} else
 				fin->fin_optmsk |= ip6exthdr[i].ol_bit;
 			break;
 		}
 
 	return hdr;
 }
 
 
 /* ------------------------------------------------------------------------ */
 /* Function:    ipf_pr_hopopts6                                             */
 /* Returns:     int    - value of the next header or IPPROTO_NONE if error  */
 /* Parameters:  fin(I) - pointer to packet information                      */
 /*                                                                          */
 /* IPv6 Only                                                                */
 /* This is function checks pending hop by hop options extension header      */
 /* ------------------------------------------------------------------------ */
 static INLINE int
 ipf_pr_hopopts6(fin)
 	fr_info_t *fin;
 {
 	struct ip6_ext *hdr;
 
 	hdr = ipf_pr_ipv6exthdr(fin, 0, IPPROTO_HOPOPTS);
 	if (hdr == NULL)
 		return IPPROTO_NONE;
 	return hdr->ip6e_nxt;
 }
 
 
 /* ------------------------------------------------------------------------ */
 /* Function:    ipf_pr_mobility6                                            */
 /* Returns:     int    - value of the next header or IPPROTO_NONE if error  */
 /* Parameters:  fin(I) - pointer to packet information                      */
 /*                                                                          */
 /* IPv6 Only                                                                */
 /* This is function checks the IPv6 mobility extension header               */
 /* ------------------------------------------------------------------------ */
 static INLINE int
 ipf_pr_mobility6(fin)
 	fr_info_t *fin;
 {
 	struct ip6_ext *hdr;
 
 	hdr = ipf_pr_ipv6exthdr(fin, 0, IPPROTO_MOBILITY);
 	if (hdr == NULL)
 		return IPPROTO_NONE;
 	return hdr->ip6e_nxt;
 }
 
 
 /* ------------------------------------------------------------------------ */
 /* Function:    ipf_pr_routing6                                             */
 /* Returns:     int    - value of the next header or IPPROTO_NONE if error  */
 /* Parameters:  fin(I) - pointer to packet information                      */
 /*                                                                          */
 /* IPv6 Only                                                                */
 /* This is function checks pending routing extension header                 */
 /* ------------------------------------------------------------------------ */
 static INLINE int
 ipf_pr_routing6(fin)
 	fr_info_t *fin;
 {
 	struct ip6_routing *hdr;
 
 	hdr = (struct ip6_routing *)ipf_pr_ipv6exthdr(fin, 0, IPPROTO_ROUTING);
 	if (hdr == NULL)
 		return IPPROTO_NONE;
 
 	switch (hdr->ip6r_type)
 	{
 	case 0 :
 		/*
 		 * Nasty extension header length?
 		 */
 		if (((hdr->ip6r_len >> 1) < hdr->ip6r_segleft) ||
 		    (hdr->ip6r_segleft && (hdr->ip6r_len & 1))) {
 			ipf_main_softc_t *softc = fin->fin_main_soft;
 
 			fin->fin_flx |= FI_BAD;
 			DT1(ipf_fi_bad_routing6, fr_info_t *, fin);
 			LBUMPD(ipf_stats[fin->fin_out], fr_v6_rh_bad);
 			return IPPROTO_NONE;
 		}
 		break;
 
 	default :
 		break;
 	}
 
 	return hdr->ip6r_nxt;
 }
 
 
 /* ------------------------------------------------------------------------ */
 /* Function:    ipf_pr_fragment6                                            */
 /* Returns:     int    - value of the next header or IPPROTO_NONE if error  */
 /* Parameters:  fin(I) - pointer to packet information                      */
 /*                                                                          */
 /* IPv6 Only                                                                */
 /* Examine the IPv6 fragment header and extract fragment offset information.*/
 /*                                                                          */
 /* Fragments in IPv6 are extraordinarily difficult to deal with - much more */
 /* so than in IPv4.  There are 5 cases of fragments with IPv6 that all      */
 /* packets with a fragment header can fit into.  They are as follows:       */
 /*                                                                          */
 /* 1.  [IPv6][0-n EH][FH][0-n EH] (no L4HDR present)                        */
 /* 2.  [IPV6][0-n EH][FH][0-n EH][L4HDR part] (short)                       */
 /* 3.  [IPV6][0-n EH][FH][L4HDR part][0-n data] (short)                     */
 /* 4.  [IPV6][0-n EH][FH][0-n EH][L4HDR][0-n data]                          */
 /* 5.  [IPV6][0-n EH][FH][data]                                             */
 /*                                                                          */
 /* IPV6 = IPv6 header, FH = Fragment Header,                                */
 /* 0-n EH = 0 or more extension headers, 0-n data = 0 or more bytes of data */
 /*                                                                          */
 /* Packets that match 1, 2, 3 will be dropped as the only reasonable        */
 /* scenario in which they happen is in extreme circumstances that are most  */
 /* likely to be an indication of an attack rather than normal traffic.      */
 /* A type 3 packet may be sent by an attacked after a type 4 packet.  There */
 /* are two rules that can be used to guard against type 3 packets: L4       */
 /* headers must always be in a packet that has the offset field set to 0    */
 /* and no packet is allowed to overlay that where offset = 0.               */
 /* ------------------------------------------------------------------------ */
 static INLINE int
 ipf_pr_fragment6(fin)
 	fr_info_t *fin;
 {
 	ipf_main_softc_t *softc = fin->fin_main_soft;
 	struct ip6_frag *frag;
 
 	fin->fin_flx |= FI_FRAG;
 
 	frag = (struct ip6_frag *)ipf_pr_ipv6exthdr(fin, 0, IPPROTO_FRAGMENT);
 	if (frag == NULL) {
 		LBUMPD(ipf_stats[fin->fin_out], fr_v6_frag_bad);
 		return IPPROTO_NONE;
 	}
 
 	if ((frag->ip6f_offlg & IP6F_MORE_FRAG) != 0) {
 		/*
 		 * Any fragment that isn't the last fragment must have its
 		 * length as a multiple of 8.
 		 */
 		if ((fin->fin_plen & 7) != 0) {
 			fin->fin_flx |= FI_BAD;
 			DT2(ipf_fi_bad_frag_not_8, fr_info_t *, fin, u_int, (fin->fin_plen & 7));
 		}
 	}
 
 	fin->fin_fraghdr = frag;
 	fin->fin_id = frag->ip6f_ident;
 	fin->fin_off = ntohs(frag->ip6f_offlg & IP6F_OFF_MASK);
 	if (fin->fin_off != 0)
 		fin->fin_flx |= FI_FRAGBODY;
 
 	/*
 	 * Jumbograms aren't handled, so the max. length is 64k
 	 */
 	if ((fin->fin_off << 3) + fin->fin_dlen > 65535) {
 		  fin->fin_flx |= FI_BAD;
 		  DT2(ipf_fi_bad_jumbogram, fr_info_t *, fin, u_int, ((fin->fin_off << 3) + fin->fin_dlen));
 	}
 
 	/*
 	 * We don't know where the transport layer header (or whatever is next
 	 * is), as it could be behind destination options (amongst others) so
 	 * return the fragment header as the type of packet this is.  Note that
 	 * this effectively disables the fragment cache for > 1 protocol at a
 	 * time.
 	 */
 	return frag->ip6f_nxt;
 }
 
 
 /* ------------------------------------------------------------------------ */
 /* Function:    ipf_pr_dstopts6                                             */
 /* Returns:     int    - value of the next header or IPPROTO_NONE if error  */
 /* Parameters:  fin(I) - pointer to packet information                      */
 /*                                                                          */
 /* IPv6 Only                                                                */
 /* This is function checks pending destination options extension header     */
 /* ------------------------------------------------------------------------ */
 static INLINE int
 ipf_pr_dstopts6(fin)
 	fr_info_t *fin;
 {
 	ipf_main_softc_t *softc = fin->fin_main_soft;
 	struct ip6_ext *hdr;
 
 	hdr = ipf_pr_ipv6exthdr(fin, 0, IPPROTO_DSTOPTS);
 	if (hdr == NULL) {
 		LBUMPD(ipf_stats[fin->fin_out], fr_v6_dst_bad);
 		return IPPROTO_NONE;
 	}
 	return hdr->ip6e_nxt;
 }
 
 
 /* ------------------------------------------------------------------------ */
 /* Function:    ipf_pr_icmp6                                                */
 /* Returns:     void                                                        */
 /* Parameters:  fin(I) - pointer to packet information                      */
 /*                                                                          */
 /* IPv6 Only                                                                */
 /* This routine is mainly concerned with determining the minimum valid size */
 /* for an ICMPv6 packet.                                                    */
 /* ------------------------------------------------------------------------ */
 static INLINE void
 ipf_pr_icmp6(fin)
 	fr_info_t *fin;
 {
 	int minicmpsz = sizeof(struct icmp6_hdr);
 	struct icmp6_hdr *icmp6;
 
 	if (ipf_pr_pullup(fin, ICMP6ERR_MINPKTLEN - sizeof(ip6_t)) == -1) {
 		ipf_main_softc_t *softc = fin->fin_main_soft;
 
 		LBUMPD(ipf_stats[fin->fin_out], fr_v6_icmp6_pullup);
 		return;
 	}
 
 	if (fin->fin_dlen > 1) {
 		ip6_t *ip6;
 
 		icmp6 = fin->fin_dp;
 
 		fin->fin_data[0] = *(u_short *)icmp6;
 
 		if ((icmp6->icmp6_type & ICMP6_INFOMSG_MASK) != 0)
 			fin->fin_flx |= FI_ICMPQUERY;
 
 		switch (icmp6->icmp6_type)
 		{
 		case ICMP6_ECHO_REPLY :
 		case ICMP6_ECHO_REQUEST :
 			if (fin->fin_dlen >= 6)
 				fin->fin_data[1] = icmp6->icmp6_id;
 			minicmpsz = ICMP6ERR_MINPKTLEN - sizeof(ip6_t);
 			break;
 
 		case ICMP6_DST_UNREACH :
 		case ICMP6_PACKET_TOO_BIG :
 		case ICMP6_TIME_EXCEEDED :
 		case ICMP6_PARAM_PROB :
 			fin->fin_flx |= FI_ICMPERR;
 			minicmpsz = ICMP6ERR_IPICMPHLEN - sizeof(ip6_t);
 			if (fin->fin_plen < ICMP6ERR_IPICMPHLEN)
 				break;
 
 			if (M_LEN(fin->fin_m) < fin->fin_plen) {
 				if (ipf_coalesce(fin) != 1)
 					return;
 			}
 
 			if (ipf_pr_pullup(fin, ICMP6ERR_MINPKTLEN) == -1)
 				return;
 
 			/*
 			 * If the destination of this packet doesn't match the
 			 * source of the original packet then this packet is
 			 * not correct.
 			 */
 			icmp6 = fin->fin_dp;
 			ip6 = (ip6_t *)((char *)icmp6 + ICMPERR_ICMPHLEN);
 			if (IP6_NEQ(&fin->fin_fi.fi_dst,
 				    (i6addr_t *)&ip6->ip6_src)) {
 				fin->fin_flx |= FI_BAD;
 				DT1(ipf_fi_bad_icmp6, fr_info_t *, fin);
 			}
 			break;
 		default :
 			break;
 		}
 	}
 
 	ipf_pr_short6(fin, minicmpsz);
 	if ((fin->fin_flx & (FI_SHORT|FI_BAD)) == 0) {
 		u_char p = fin->fin_p;
 
 		fin->fin_p = IPPROTO_ICMPV6;
 		ipf_checkv6sum(fin);
 		fin->fin_p = p;
 	}
 }
 
 
 /* ------------------------------------------------------------------------ */
 /* Function:    ipf_pr_udp6                                                 */
 /* Returns:     void                                                        */
 /* Parameters:  fin(I) - pointer to packet information                      */
 /*                                                                          */
 /* IPv6 Only                                                                */
 /* Analyse the packet for IPv6/UDP properties.                              */
 /* Is not expected to be called for fragmented packets.                     */
 /* ------------------------------------------------------------------------ */
 static INLINE void
 ipf_pr_udp6(fin)
 	fr_info_t *fin;
 {
 
 	if (ipf_pr_udpcommon(fin) == 0) {
 		u_char p = fin->fin_p;
 
 		fin->fin_p = IPPROTO_UDP;
 		ipf_checkv6sum(fin);
 		fin->fin_p = p;
 	}
 }
 
 
 /* ------------------------------------------------------------------------ */
 /* Function:    ipf_pr_tcp6                                                 */
 /* Returns:     void                                                        */
 /* Parameters:  fin(I) - pointer to packet information                      */
 /*                                                                          */
 /* IPv6 Only                                                                */
 /* Analyse the packet for IPv6/TCP properties.                              */
 /* Is not expected to be called for fragmented packets.                     */
 /* ------------------------------------------------------------------------ */
 static INLINE void
 ipf_pr_tcp6(fin)
 	fr_info_t *fin;
 {
 
 	if (ipf_pr_tcpcommon(fin) == 0) {
 		u_char p = fin->fin_p;
 
 		fin->fin_p = IPPROTO_TCP;
 		ipf_checkv6sum(fin);
 		fin->fin_p = p;
 	}
 }
 
 
 /* ------------------------------------------------------------------------ */
 /* Function:    ipf_pr_esp6                                                 */
 /* Returns:     void                                                        */
 /* Parameters:  fin(I) - pointer to packet information                      */
 /*                                                                          */
 /* IPv6 Only                                                                */
 /* Analyse the packet for ESP properties.                                   */
 /* The minimum length is taken to be the SPI (32bits) plus a tail (32bits)  */
 /* even though the newer ESP packets must also have a sequence number that  */
 /* is 32bits as well, it is not possible(?) to determine the version from a */
 /* simple packet header.                                                    */
 /* ------------------------------------------------------------------------ */
 static INLINE void
 ipf_pr_esp6(fin)
 	fr_info_t *fin;
 {
 
 	if ((fin->fin_off == 0) && (ipf_pr_pullup(fin, 8) == -1)) {
 		ipf_main_softc_t *softc = fin->fin_main_soft;
 
 		LBUMPD(ipf_stats[fin->fin_out], fr_v6_esp_pullup);
 		return;
 	}
 }
 
 
 /* ------------------------------------------------------------------------ */
 /* Function:    ipf_pr_ah6                                                  */
 /* Returns:     int    - value of the next header or IPPROTO_NONE if error  */
 /* Parameters:  fin(I) - pointer to packet information                      */
 /*                                                                          */
 /* IPv6 Only                                                                */
 /* Analyse the packet for AH properties.                                    */
 /* The minimum length is taken to be the combination of all fields in the   */
 /* header being present and no authentication data (null algorithm used.)   */
 /* ------------------------------------------------------------------------ */
 static INLINE int
 ipf_pr_ah6(fin)
 	fr_info_t *fin;
 {
 	authhdr_t *ah;
 
 	fin->fin_flx |= FI_AH;
 
 	ah = (authhdr_t *)ipf_pr_ipv6exthdr(fin, 0, IPPROTO_HOPOPTS);
 	if (ah == NULL) {
 		ipf_main_softc_t *softc = fin->fin_main_soft;
 
 		LBUMPD(ipf_stats[fin->fin_out], fr_v6_ah_bad);
 		return IPPROTO_NONE;
 	}
 
 	ipf_pr_short6(fin, sizeof(*ah));
 
 	/*
 	 * No need for another pullup, ipf_pr_ipv6exthdr() will pullup
 	 * enough data to satisfy ah_next (the very first one.)
 	 */
 	return ah->ah_next;
 }
 
 
 /* ------------------------------------------------------------------------ */
 /* Function:    ipf_pr_gre6                                                 */
 /* Returns:     void                                                        */
 /* Parameters:  fin(I) - pointer to packet information                      */
 /*                                                                          */
 /* Analyse the packet for GRE properties.                                   */
 /* ------------------------------------------------------------------------ */
 static INLINE void
 ipf_pr_gre6(fin)
 	fr_info_t *fin;
 {
 	grehdr_t *gre;
 
 	if (ipf_pr_pullup(fin, sizeof(grehdr_t)) == -1) {
 		ipf_main_softc_t *softc = fin->fin_main_soft;
 
 		LBUMPD(ipf_stats[fin->fin_out], fr_v6_gre_pullup);
 		return;
 	}
 
 	gre = fin->fin_dp;
 	if (GRE_REV(gre->gr_flags) == 1)
 		fin->fin_data[0] = gre->gr_call;
 }
 #endif	/* USE_INET6 */
 
 
 /* ------------------------------------------------------------------------ */
 /* Function:    ipf_pr_pullup                                               */
 /* Returns:     int     - 0 == pullup succeeded, -1 == failure              */
 /* Parameters:  fin(I)  - pointer to packet information                     */
 /*              plen(I) - length (excluding L3 header) to pullup            */
 /*                                                                          */
 /* Short inline function to cut down on code duplication to perform a call  */
 /* to ipf_pullup to ensure there is the required amount of data,            */
 /* consecutively in the packet buffer.                                      */
 /*                                                                          */
 /* This function pulls up 'extra' data at the location of fin_dp.  fin_dp   */
 /* points to the first byte after the complete layer 3 header, which will   */
 /* include all of the known extension headers for IPv6 or options for IPv4. */
 /*                                                                          */
 /* Since fr_pullup() expects the total length of bytes to be pulled up, it  */
 /* is necessary to add those we can already assume to be pulled up (fin_dp  */
 /* - fin_ip) to what is passed through.                                     */
 /* ------------------------------------------------------------------------ */
 int
 ipf_pr_pullup(fin, plen)
 	fr_info_t *fin;
 	int plen;
 {
 	ipf_main_softc_t *softc = fin->fin_main_soft;
 
 	if (fin->fin_m != NULL) {
 		if (fin->fin_dp != NULL)
 			plen += (char *)fin->fin_dp -
 				((char *)fin->fin_ip + fin->fin_hlen);
 		plen += fin->fin_hlen;
 		if (M_LEN(fin->fin_m) < plen + fin->fin_ipoff) {
 #if defined(_KERNEL)
 			if (ipf_pullup(fin->fin_m, fin, plen) == NULL) {
 				DT(ipf_pullup_fail);
 				LBUMP(ipf_stats[fin->fin_out].fr_pull[1]);
 				return -1;
 			}
 			LBUMP(ipf_stats[fin->fin_out].fr_pull[0]);
 #else
 			LBUMP(ipf_stats[fin->fin_out].fr_pull[1]);
 			/*
 			 * Fake ipf_pullup failing
 			 */
 			fin->fin_reason = FRB_PULLUP;
 			*fin->fin_mp = NULL;
 			fin->fin_m = NULL;
 			fin->fin_ip = NULL;
 			return -1;
 #endif
 		}
 	}
 	return 0;
 }
 
 
 /* ------------------------------------------------------------------------ */
 /* Function:    ipf_pr_short                                                */
 /* Returns:     void                                                        */
 /* Parameters:  fin(I)  - pointer to packet information                     */
 /*              xmin(I) - minimum header size                               */
 /*                                                                          */
 /* Check if a packet is "short" as defined by xmin.  The rule we are        */
 /* applying here is that the packet must not be fragmented within the layer */
 /* 4 header.  That is, it must not be a fragment that has its offset set to */
 /* start within the layer 4 header (hdrmin) or if it is at offset 0, the    */
 /* entire layer 4 header must be present (min).                             */
 /* ------------------------------------------------------------------------ */
 static INLINE void
 ipf_pr_short(fin, xmin)
 	fr_info_t *fin;
 	int xmin;
 {
 
 	if (fin->fin_off == 0) {
 		if (fin->fin_dlen < xmin)
 			fin->fin_flx |= FI_SHORT;
 	} else if (fin->fin_off < xmin) {
 		fin->fin_flx |= FI_SHORT;
 	}
 }
 
 
 /* ------------------------------------------------------------------------ */
 /* Function:    ipf_pr_icmp                                                 */
 /* Returns:     void                                                        */
 /* Parameters:  fin(I) - pointer to packet information                      */
 /*                                                                          */
 /* IPv4 Only                                                                */
 /* Do a sanity check on the packet for ICMP (v4).  In nearly all cases,     */
 /* except extrememly bad packets, both type and code will be present.       */
 /* The expected minimum size of an ICMP packet is very much dependent on    */
 /* the type of it.                                                          */
 /*                                                                          */
 /* XXX - other ICMP sanity checks?                                          */
 /* ------------------------------------------------------------------------ */
 static INLINE void
 ipf_pr_icmp(fin)
 	fr_info_t *fin;
 {
 	ipf_main_softc_t *softc = fin->fin_main_soft;
 	int minicmpsz = sizeof(struct icmp);
 	icmphdr_t *icmp;
 	ip_t *oip;
 
 	ipf_pr_short(fin, ICMPERR_ICMPHLEN);
 
 	if (fin->fin_off != 0) {
 		LBUMPD(ipf_stats[fin->fin_out], fr_v4_icmp_frag);
 		return;
 	}
 
 	if (ipf_pr_pullup(fin, ICMPERR_ICMPHLEN) == -1) {
 		LBUMPD(ipf_stats[fin->fin_out], fr_v4_icmp_pullup);
 		return;
 	}
 
 	icmp = fin->fin_dp;
 
 	fin->fin_data[0] = *(u_short *)icmp;
 	fin->fin_data[1] = icmp->icmp_id;
 
 	switch (icmp->icmp_type)
 	{
 	case ICMP_ECHOREPLY :
 	case ICMP_ECHO :
 	/* Router discovery messaes - RFC 1256 */
 	case ICMP_ROUTERADVERT :
 	case ICMP_ROUTERSOLICIT :
 		fin->fin_flx |= FI_ICMPQUERY;
 		minicmpsz = ICMP_MINLEN;
 		break;
 	/*
 	 * type(1) + code(1) + cksum(2) + id(2) seq(2) +
 	 * 3 * timestamp(3 * 4)
 	 */
 	case ICMP_TSTAMP :
 	case ICMP_TSTAMPREPLY :
 		fin->fin_flx |= FI_ICMPQUERY;
 		minicmpsz = 20;
 		break;
 	/*
 	 * type(1) + code(1) + cksum(2) + id(2) seq(2) +
 	 * mask(4)
 	 */
 	case ICMP_IREQ :
 	case ICMP_IREQREPLY :
 	case ICMP_MASKREQ :
 	case ICMP_MASKREPLY :
 		fin->fin_flx |= FI_ICMPQUERY;
 		minicmpsz = 12;
 		break;
 	/*
 	 * type(1) + code(1) + cksum(2) + id(2) seq(2) + ip(20+)
 	 */
 	case ICMP_UNREACH :
 #ifdef icmp_nextmtu
 		if (icmp->icmp_code == ICMP_UNREACH_NEEDFRAG) {
 			if (icmp->icmp_nextmtu < softc->ipf_icmpminfragmtu) {
 				fin->fin_flx |= FI_BAD;
 				DT3(ipf_fi_bad_icmp_nextmtu, fr_info_t *, fin, u_int, icmp->icmp_nextmtu, u_int, softc->ipf_icmpminfragmtu);
 			}
 		}
 #endif
 	case ICMP_SOURCEQUENCH :
 	case ICMP_REDIRECT :
 	case ICMP_TIMXCEED :
 	case ICMP_PARAMPROB :
 		fin->fin_flx |= FI_ICMPERR;
 		if (ipf_coalesce(fin) != 1) {
 			LBUMPD(ipf_stats[fin->fin_out], fr_icmp_coalesce);
 			return;
 		}
 
 		/*
 		 * ICMP error packets should not be generated for IP
 		 * packets that are a fragment that isn't the first
 		 * fragment.
 		 */
 		oip = (ip_t *)((char *)fin->fin_dp + ICMPERR_ICMPHLEN);
 		if ((ntohs(oip->ip_off) & IP_OFFMASK) != 0) {
 			fin->fin_flx |= FI_BAD;
 			DT2(ipf_fi_bad_icmp_err, fr_info_t, fin, u_int, (ntohs(oip->ip_off) & IP_OFFMASK));
 		}
 
 		/*
 		 * If the destination of this packet doesn't match the
 		 * source of the original packet then this packet is
 		 * not correct.
 		 */
 		if (oip->ip_src.s_addr != fin->fin_daddr) {
 			fin->fin_flx |= FI_BAD;
 			DT1(ipf_fi_bad_src_ne_dst, fr_info_t *, fin);
 		}
 		break;
 	default :
 		break;
 	}
 
 	ipf_pr_short(fin, minicmpsz);
 
 	ipf_checkv4sum(fin);
 }
 
 
 /* ------------------------------------------------------------------------ */
 /* Function:    ipf_pr_tcpcommon                                            */
 /* Returns:     int    - 0 = header ok, 1 = bad packet, -1 = buffer error   */
 /* Parameters:  fin(I) - pointer to packet information                      */
 /*                                                                          */
 /* TCP header sanity checking.  Look for bad combinations of TCP flags,     */
 /* and make some checks with how they interact with other fields.           */
 /* If compiled with IPFILTER_CKSUM, check to see if the TCP checksum is     */
 /* valid and mark the packet as bad if not.                                 */
 /* ------------------------------------------------------------------------ */
 static INLINE int
 ipf_pr_tcpcommon(fin)
 	fr_info_t *fin;
 {
 	ipf_main_softc_t *softc = fin->fin_main_soft;
 	int flags, tlen;
 	tcphdr_t *tcp;
 
 	fin->fin_flx |= FI_TCPUDP;
 	if (fin->fin_off != 0) {
 		LBUMPD(ipf_stats[fin->fin_out], fr_tcp_frag);
 		return 0;
 	}
 
 	if (ipf_pr_pullup(fin, sizeof(*tcp)) == -1) {
 		LBUMPD(ipf_stats[fin->fin_out], fr_tcp_pullup);
 		return -1;
 	}
 
 	tcp = fin->fin_dp;
 	if (fin->fin_dlen > 3) {
 		fin->fin_sport = ntohs(tcp->th_sport);
 		fin->fin_dport = ntohs(tcp->th_dport);
 	}
 
 	if ((fin->fin_flx & FI_SHORT) != 0) {
 		LBUMPD(ipf_stats[fin->fin_out], fr_tcp_short);
 		return 1;
 	}
 
 	/*
 	 * Use of the TCP data offset *must* result in a value that is at
 	 * least the same size as the TCP header.
 	 */
 	tlen = TCP_OFF(tcp) << 2;
 	if (tlen < sizeof(tcphdr_t)) {
 		LBUMPD(ipf_stats[fin->fin_out], fr_tcp_small);
 		fin->fin_flx |= FI_BAD;
 		DT3(ipf_fi_bad_tlen, fr_info_t, fin, u_int, tlen, u_int, sizeof(tcphdr_t));
 		return 1;
 	}
 
 	flags = tcp->th_flags;
 	fin->fin_tcpf = tcp->th_flags;
 
 	/*
 	 * If the urgent flag is set, then the urgent pointer must
 	 * also be set and vice versa.  Good TCP packets do not have
 	 * just one of these set.
 	 */
 	if ((flags & TH_URG) != 0 && (tcp->th_urp == 0)) {
 		fin->fin_flx |= FI_BAD;
 		DT3(ipf_fi_bad_th_urg, fr_info_t*, fin, u_int, (flags & TH_URG), u_int, tcp->th_urp);
 #if 0
 	} else if ((flags & TH_URG) == 0 && (tcp->th_urp != 0)) {
 		/*
 		 * Ignore this case (#if 0) as it shows up in "real"
 		 * traffic with bogus values in the urgent pointer field.
 		 */
 		fin->fin_flx |= FI_BAD;
 		DT3(ipf_fi_bad_th_urg0, fr_info_t *, fin, u_int, (flags & TH_URG), u_int, tcp->th_urp);
 #endif
 	} else if (((flags & (TH_SYN|TH_FIN)) != 0) &&
 		   ((flags & (TH_RST|TH_ACK)) == TH_RST)) {
 		/* TH_FIN|TH_RST|TH_ACK seems to appear "naturally" */
 		fin->fin_flx |= FI_BAD;
 		DT1(ipf_fi_bad_th_fin_rst_ack, fr_info_t, fin);
 #if 1
 	} else if (((flags & TH_SYN) != 0) &&
 		   ((flags & (TH_URG|TH_PUSH)) != 0)) {
 		/*
 		 * SYN with URG and PUSH set is not for normal TCP but it is
 		 * possible(?) with T/TCP...but who uses T/TCP?
 		 */
 		fin->fin_flx |= FI_BAD;
 		DT1(ipf_fi_bad_th_syn_urg_psh, fr_info_t *, fin);
 #endif
 	} else if (!(flags & TH_ACK)) {
 		/*
 		 * If the ack bit isn't set, then either the SYN or
 		 * RST bit must be set.  If the SYN bit is set, then
 		 * we expect the ACK field to be 0.  If the ACK is
 		 * not set and if URG, PSH or FIN are set, consdier
 		 * that to indicate a bad TCP packet.
 		 */
 		if ((flags == TH_SYN) && (tcp->th_ack != 0)) {
 			/*
 			 * Cisco PIX sets the ACK field to a random value.
 			 * In light of this, do not set FI_BAD until a patch
 			 * is available from Cisco to ensure that
 			 * interoperability between existing systems is
 			 * achieved.
 			 */
 			/*fin->fin_flx |= FI_BAD*/;
 			/*DT1(ipf_fi_bad_th_syn_ack, fr_info_t *, fin);*/
 		} else if (!(flags & (TH_RST|TH_SYN))) {
 			fin->fin_flx |= FI_BAD;
 			DT1(ipf_fi_bad_th_rst_syn, fr_info_t *, fin);
 		} else if ((flags & (TH_URG|TH_PUSH|TH_FIN)) != 0) {
 			fin->fin_flx |= FI_BAD;
 			DT1(ipf_fi_bad_th_urg_push_fin, fr_info_t *, fin);
 		}
 	}
 	if (fin->fin_flx & FI_BAD) {
 		LBUMPD(ipf_stats[fin->fin_out], fr_tcp_bad_flags);
 		return 1;
 	}
 
 	/*
 	 * At this point, it's not exactly clear what is to be gained by
 	 * marking up which TCP options are and are not present.  The one we
 	 * are most interested in is the TCP window scale.  This is only in
 	 * a SYN packet [RFC1323] so we don't need this here...?
 	 * Now if we were to analyse the header for passive fingerprinting,
 	 * then that might add some weight to adding this...
 	 */
 	if (tlen == sizeof(tcphdr_t)) {
 		return 0;
 	}
 
 	if (ipf_pr_pullup(fin, tlen) == -1) {
 		LBUMPD(ipf_stats[fin->fin_out], fr_tcp_pullup);
 		return -1;
 	}
 
 #if 0
 	tcp = fin->fin_dp;
 	ip = fin->fin_ip;
 	s = (u_char *)(tcp + 1);
 	off = IP_HL(ip) << 2;
 # ifdef _KERNEL
 	if (fin->fin_mp != NULL) {
 		mb_t *m = *fin->fin_mp;
 
 		if (off + tlen > M_LEN(m))
 			return;
 	}
 # endif
 	for (tlen -= (int)sizeof(*tcp); tlen > 0; ) {
 		opt = *s;
 		if (opt == '\0')
 			break;
 		else if (opt == TCPOPT_NOP)
 			ol = 1;
 		else {
 			if (tlen < 2)
 				break;
 			ol = (int)*(s + 1);
 			if (ol < 2 || ol > tlen)
 				break;
 		}
 
 		for (i = 9, mv = 4; mv >= 0; ) {
 			op = ipopts + i;
 			if (opt == (u_char)op->ol_val) {
 				optmsk |= op->ol_bit;
 				break;
 			}
 		}
 		tlen -= ol;
 		s += ol;
 	}
 #endif /* 0 */
 
 	return 0;
 }
 
 
 
 /* ------------------------------------------------------------------------ */
 /* Function:    ipf_pr_udpcommon                                            */
 /* Returns:     int    - 0 = header ok, 1 = bad packet                      */
 /* Parameters:  fin(I) - pointer to packet information                      */
 /*                                                                          */
 /* Extract the UDP source and destination ports, if present.  If compiled   */
 /* with IPFILTER_CKSUM, check to see if the UDP checksum is valid.          */
 /* ------------------------------------------------------------------------ */
 static INLINE int
 ipf_pr_udpcommon(fin)
 	fr_info_t *fin;
 {
 	udphdr_t *udp;
 
 	fin->fin_flx |= FI_TCPUDP;
 
 	if (!fin->fin_off && (fin->fin_dlen > 3)) {
 		if (ipf_pr_pullup(fin, sizeof(*udp)) == -1) {
 			ipf_main_softc_t *softc = fin->fin_main_soft;
 
 			fin->fin_flx |= FI_SHORT;
 			LBUMPD(ipf_stats[fin->fin_out], fr_udp_pullup);
 			return 1;
 		}
 
 		udp = fin->fin_dp;
 
 		fin->fin_sport = ntohs(udp->uh_sport);
 		fin->fin_dport = ntohs(udp->uh_dport);
 	}
 
 	return 0;
 }
 
 
 /* ------------------------------------------------------------------------ */
 /* Function:    ipf_pr_tcp                                                  */
 /* Returns:     void                                                        */
 /* Parameters:  fin(I) - pointer to packet information                      */
 /*                                                                          */
 /* IPv4 Only                                                                */
 /* Analyse the packet for IPv4/TCP properties.                              */
 /* ------------------------------------------------------------------------ */
 static INLINE void
 ipf_pr_tcp(fin)
 	fr_info_t *fin;
 {
 
 	ipf_pr_short(fin, sizeof(tcphdr_t));
 
 	if (ipf_pr_tcpcommon(fin) == 0)
 		ipf_checkv4sum(fin);
 }
 
 
 /* ------------------------------------------------------------------------ */
 /* Function:    ipf_pr_udp                                                  */
 /* Returns:     void                                                        */
 /* Parameters:  fin(I) - pointer to packet information                      */
 /*                                                                          */
 /* IPv4 Only                                                                */
 /* Analyse the packet for IPv4/UDP properties.                              */
 /* ------------------------------------------------------------------------ */
 static INLINE void
 ipf_pr_udp(fin)
 	fr_info_t *fin;
 {
 
 	ipf_pr_short(fin, sizeof(udphdr_t));
 
 	if (ipf_pr_udpcommon(fin) == 0)
 		ipf_checkv4sum(fin);
 }
 
 
 /* ------------------------------------------------------------------------ */
 /* Function:    ipf_pr_esp                                                  */
 /* Returns:     void                                                        */
 /* Parameters:  fin(I) - pointer to packet information                      */
 /*                                                                          */
 /* Analyse the packet for ESP properties.                                   */
 /* The minimum length is taken to be the SPI (32bits) plus a tail (32bits)  */
 /* even though the newer ESP packets must also have a sequence number that  */
 /* is 32bits as well, it is not possible(?) to determine the version from a */
 /* simple packet header.                                                    */
 /* ------------------------------------------------------------------------ */
 static INLINE void
 ipf_pr_esp(fin)
 	fr_info_t *fin;
 {
 
 	if (fin->fin_off == 0) {
 		ipf_pr_short(fin, 8);
 		if (ipf_pr_pullup(fin, 8) == -1) {
 			ipf_main_softc_t *softc = fin->fin_main_soft;
 
 			LBUMPD(ipf_stats[fin->fin_out], fr_v4_esp_pullup);
 		}
 	}
 }
 
 
 /* ------------------------------------------------------------------------ */
 /* Function:    ipf_pr_ah                                                   */
 /* Returns:     int    - value of the next header or IPPROTO_NONE if error  */
 /* Parameters:  fin(I) - pointer to packet information                      */
 /*                                                                          */
 /* Analyse the packet for AH properties.                                    */
 /* The minimum length is taken to be the combination of all fields in the   */
 /* header being present and no authentication data (null algorithm used.)   */
 /* ------------------------------------------------------------------------ */
 static INLINE int
 ipf_pr_ah(fin)
 	fr_info_t *fin;
 {
 	ipf_main_softc_t *softc = fin->fin_main_soft;
 	authhdr_t *ah;
 	int len;
 
 	fin->fin_flx |= FI_AH;
 	ipf_pr_short(fin, sizeof(*ah));
 
 	if (((fin->fin_flx & FI_SHORT) != 0) || (fin->fin_off != 0)) {
 		LBUMPD(ipf_stats[fin->fin_out], fr_v4_ah_bad);
 		return IPPROTO_NONE;
 	}
 
 	if (ipf_pr_pullup(fin, sizeof(*ah)) == -1) {
 		DT(fr_v4_ah_pullup_1);
 		LBUMP(ipf_stats[fin->fin_out].fr_v4_ah_pullup);
 		return IPPROTO_NONE;
 	}
 
 	ah = (authhdr_t *)fin->fin_dp;
 
 	len = (ah->ah_plen + 2) << 2;
 	ipf_pr_short(fin, len);
 	if (ipf_pr_pullup(fin, len) == -1) {
 		DT(fr_v4_ah_pullup_2);
 		LBUMP(ipf_stats[fin->fin_out].fr_v4_ah_pullup);
 		return IPPROTO_NONE;
 	}
 
 	/*
 	 * Adjust fin_dp and fin_dlen for skipping over the authentication
 	 * header.
 	 */
 	fin->fin_dp = (char *)fin->fin_dp + len;
 	fin->fin_dlen -= len;
 	return ah->ah_next;
 }
 
 
 /* ------------------------------------------------------------------------ */
 /* Function:    ipf_pr_gre                                                  */
 /* Returns:     void                                                        */
 /* Parameters:  fin(I) - pointer to packet information                      */
 /*                                                                          */
 /* Analyse the packet for GRE properties.                                   */
 /* ------------------------------------------------------------------------ */
 static INLINE void
 ipf_pr_gre(fin)
 	fr_info_t *fin;
 {
 	ipf_main_softc_t *softc = fin->fin_main_soft;
 	grehdr_t *gre;
 
 	ipf_pr_short(fin, sizeof(grehdr_t));
 
 	if (fin->fin_off != 0) {
 		LBUMPD(ipf_stats[fin->fin_out], fr_v4_gre_frag);
 		return;
 	}
 
 	if (ipf_pr_pullup(fin, sizeof(grehdr_t)) == -1) {
 		LBUMPD(ipf_stats[fin->fin_out], fr_v4_gre_pullup);
 		return;
 	}
 
 	gre = fin->fin_dp;
 	if (GRE_REV(gre->gr_flags) == 1)
 		fin->fin_data[0] = gre->gr_call;
 }
 
 
 /* ------------------------------------------------------------------------ */
 /* Function:    ipf_pr_ipv4hdr                                              */
 /* Returns:     void                                                        */
 /* Parameters:  fin(I) - pointer to packet information                      */
 /*                                                                          */
 /* IPv4 Only                                                                */
 /* Analyze the IPv4 header and set fields in the fr_info_t structure.       */
 /* Check all options present and flag their presence if any exist.          */
 /* ------------------------------------------------------------------------ */
 static INLINE void
 ipf_pr_ipv4hdr(fin)
 	fr_info_t *fin;
 {
 	u_short optmsk = 0, secmsk = 0, auth = 0;
 	int hlen, ol, mv, p, i;
 	const struct optlist *op;
 	u_char *s, opt;
 	u_short off;
 	fr_ip_t *fi;
 	ip_t *ip;
 
 	fi = &fin->fin_fi;
 	hlen = fin->fin_hlen;
 
 	ip = fin->fin_ip;
 	p = ip->ip_p;
 	fi->fi_p = p;
 	fin->fin_crc = p;
 	fi->fi_tos = ip->ip_tos;
 	fin->fin_id = ip->ip_id;
 	off = ntohs(ip->ip_off);
 
 	/* Get both TTL and protocol */
 	fi->fi_p = ip->ip_p;
 	fi->fi_ttl = ip->ip_ttl;
 
 	/* Zero out bits not used in IPv6 address */
 	fi->fi_src.i6[1] = 0;
 	fi->fi_src.i6[2] = 0;
 	fi->fi_src.i6[3] = 0;
 	fi->fi_dst.i6[1] = 0;
 	fi->fi_dst.i6[2] = 0;
 	fi->fi_dst.i6[3] = 0;
 
 	fi->fi_saddr = ip->ip_src.s_addr;
 	fin->fin_crc += fi->fi_saddr;
 	fi->fi_daddr = ip->ip_dst.s_addr;
 	fin->fin_crc += fi->fi_daddr;
 	if (IN_CLASSD(ntohl(fi->fi_daddr)))
 		fin->fin_flx |= FI_MULTICAST|FI_MBCAST;
 
 	/*
 	 * set packet attribute flags based on the offset and
 	 * calculate the byte offset that it represents.
 	 */
 	off &= IP_MF|IP_OFFMASK;
 	if (off != 0) {
 		int morefrag = off & IP_MF;
 
 		fi->fi_flx |= FI_FRAG;
 		off &= IP_OFFMASK;
 		if (off != 0) {
 			fin->fin_flx |= FI_FRAGBODY;
 			off <<= 3;
 			if ((off + fin->fin_dlen > 65535) ||
 			    (fin->fin_dlen == 0) ||
 			    ((morefrag != 0) && ((fin->fin_dlen & 7) != 0))) {
 				/*
 				 * The length of the packet, starting at its
 				 * offset cannot exceed 65535 (0xffff) as the
 				 * length of an IP packet is only 16 bits.
 				 *
 				 * Any fragment that isn't the last fragment
 				 * must have a length greater than 0 and it
 				 * must be an even multiple of 8.
 				 */
 				fi->fi_flx |= FI_BAD;
 				DT1(ipf_fi_bad_fragbody_gt_65535, fr_info_t *, fin);
 			}
 		}
 	}
 	fin->fin_off = off;
 
 	/*
 	 * Call per-protocol setup and checking
 	 */
 	if (p == IPPROTO_AH) {
 		/*
 		 * Treat AH differently because we expect there to be another
 		 * layer 4 header after it.
 		 */
 		p = ipf_pr_ah(fin);
 	}
 
 	switch (p)
 	{
 	case IPPROTO_UDP :
 		ipf_pr_udp(fin);
 		break;
 	case IPPROTO_TCP :
 		ipf_pr_tcp(fin);
 		break;
 	case IPPROTO_ICMP :
 		ipf_pr_icmp(fin);
 		break;
 	case IPPROTO_ESP :
 		ipf_pr_esp(fin);
 		break;
 	case IPPROTO_GRE :
 		ipf_pr_gre(fin);
 		break;
 	}
 
 	ip = fin->fin_ip;
 	if (ip == NULL)
 		return;
 
 	/*
 	 * If it is a standard IP header (no options), set the flag fields
 	 * which relate to options to 0.
 	 */
 	if (hlen == sizeof(*ip)) {
 		fi->fi_optmsk = 0;
 		fi->fi_secmsk = 0;
 		fi->fi_auth = 0;
 		return;
 	}
 
 	/*
 	 * So the IP header has some IP options attached.  Walk the entire
 	 * list of options present with this packet and set flags to indicate
 	 * which ones are here and which ones are not.  For the somewhat out
 	 * of date and obscure security classification options, set a flag to
 	 * represent which classification is present.
 	 */
 	fi->fi_flx |= FI_OPTIONS;
 
 	for (s = (u_char *)(ip + 1), hlen -= (int)sizeof(*ip); hlen > 0; ) {
 		opt = *s;
 		if (opt == '\0')
 			break;
 		else if (opt == IPOPT_NOP)
 			ol = 1;
 		else {
 			if (hlen < 2)
 				break;
 			ol = (int)*(s + 1);
 			if (ol < 2 || ol > hlen)
 				break;
 		}
 		for (i = 9, mv = 4; mv >= 0; ) {
 			op = ipopts + i;
 
 			if ((opt == (u_char)op->ol_val) && (ol > 4)) {
 				u_32_t doi;
 
 				switch (opt)
 				{
 				case IPOPT_SECURITY :
 					if (optmsk & op->ol_bit) {
 						fin->fin_flx |= FI_BAD;
 						DT2(ipf_fi_bad_ipopt_security, fr_info_t *, fin, u_short, (optmsk & op->ol_bit));
 					} else {
 						doi = ipf_checkripso(s);
 						secmsk = doi >> 16;
 						auth = doi & 0xffff;
 					}
 					break;
 
 				case IPOPT_CIPSO :
 
 					if (optmsk & op->ol_bit) {
 						fin->fin_flx |= FI_BAD;
 						DT2(ipf_fi_bad_ipopt_cipso, fr_info_t *, fin, u_short, (optmsk & op->ol_bit));
 					} else {
 						doi = ipf_checkcipso(fin,
 								     s, ol);
 						secmsk = doi >> 16;
 						auth = doi & 0xffff;
 					}
 					break;
 				}
 				optmsk |= op->ol_bit;
 			}
 
 			if (opt < op->ol_val)
 				i -= mv;
 			else
 				i += mv;
 			mv--;
 		}
 		hlen -= ol;
 		s += ol;
 	}
 
 	/*
 	 *
 	 */
 	if (auth && !(auth & 0x0100))
 		auth &= 0xff00;
 	fi->fi_optmsk = optmsk;
 	fi->fi_secmsk = secmsk;
 	fi->fi_auth = auth;
 }
 
 
 /* ------------------------------------------------------------------------ */
 /* Function:    ipf_checkripso                                              */
 /* Returns:     void                                                        */
 /* Parameters:  s(I)   - pointer to start of RIPSO option                   */
 /*                                                                          */
 /* ------------------------------------------------------------------------ */
 static u_32_t
 ipf_checkripso(s)
 	u_char *s;
 {
 	const struct optlist *sp;
 	u_short secmsk = 0, auth = 0;
 	u_char sec;
 	int j, m;
 
 	sec = *(s + 2);	/* classification */
 	for (j = 3, m = 2; m >= 0; ) {
 		sp = secopt + j;
 		if (sec == sp->ol_val) {
 			secmsk |= sp->ol_bit;
 			auth = *(s + 3);
 			auth *= 256;
 			auth += *(s + 4);
 			break;
 		}
 		if (sec < sp->ol_val)
 			j -= m;
 		else
 			j += m;
 		m--;
 	}
 
 	return (secmsk << 16) | auth;
 }
 
 
 /* ------------------------------------------------------------------------ */
 /* Function:    ipf_checkcipso                                              */
 /* Returns:     u_32_t  - 0 = failure, else the doi from the header         */
 /* Parameters:  fin(IO) - pointer to packet information                     */
 /*              s(I)    - pointer to start of CIPSO option                  */
 /*              ol(I)   - length of CIPSO option field                      */
 /*                                                                          */
 /* This function returns the domain of integrity (DOI) field from the CIPSO */
 /* header and returns that whilst also storing the highest sensitivity      */
 /* value found in the fr_info_t structure.                                  */
 /*                                                                          */
 /* No attempt is made to extract the category bitmaps as these are defined  */
 /* by the user (rather than the protocol) and can be rather numerous on the */
 /* end nodes.                                                               */
 /* ------------------------------------------------------------------------ */
 static u_32_t
 ipf_checkcipso(fin, s, ol)
 	fr_info_t *fin;
 	u_char *s;
 	int ol;
 {
 	ipf_main_softc_t *softc = fin->fin_main_soft;
 	fr_ip_t *fi;
 	u_32_t doi;
 	u_char *t, tag, tlen, sensitivity;
 	int len;
 
 	if (ol < 6 || ol > 40) {
 		LBUMPD(ipf_stats[fin->fin_out], fr_v4_cipso_bad);
 		fin->fin_flx |= FI_BAD;
 		DT2(ipf_fi_bad_checkcipso_ol, fr_info_t *, fin, u_int, ol);
 		return 0;
 	}
 
 	fi = &fin->fin_fi;
 	fi->fi_sensitivity = 0;
 	/*
 	 * The DOI field MUST be there.
 	 */
 	bcopy(s + 2, &doi, sizeof(doi));
 
 	t = (u_char *)s + 6;
 	for (len = ol - 6; len >= 2; len -= tlen, t+= tlen) {
 		tag = *t;
 		tlen = *(t + 1);
 		if (tlen > len || tlen < 4 || tlen > 34) {
 			LBUMPD(ipf_stats[fin->fin_out], fr_v4_cipso_tlen);
 			fin->fin_flx |= FI_BAD;
 			DT2(ipf_fi_bad_checkcipso_tlen, fr_info_t *, fin, u_int, tlen);
 			return 0;
 		}
 
 		sensitivity = 0;
 		/*
 		 * Tag numbers 0, 1, 2, 5 are laid out in the CIPSO Internet
 		 * draft (16 July 1992) that has expired.
 		 */
 		if (tag == 0) {
 			fin->fin_flx |= FI_BAD;
 			DT2(ipf_fi_bad_checkcipso_tag, fr_info_t *, fin, u_int, tag);
 			continue;
 		} else if (tag == 1) {
 			if (*(t + 2) != 0) {
 				fin->fin_flx |= FI_BAD;
 				DT2(ipf_fi_bad_checkcipso_tag1_t2, fr_info_t *, fin, u_int, (*t + 2));
 				continue;
 			}
 			sensitivity = *(t + 3);
 			/* Category bitmap for categories 0-239 */
 
 		} else if (tag == 4) {
 			if (*(t + 2) != 0) {
 				fin->fin_flx |= FI_BAD;
 				DT2(ipf_fi_bad_checkcipso_tag4_t2, fr_info_t *, fin, u_int, (*t + 2));
 				continue;
 			}
 			sensitivity = *(t + 3);
 			/* Enumerated categories, 16bits each, upto 15 */
 
 		} else if (tag == 5) {
 			if (*(t + 2) != 0) {
 				fin->fin_flx |= FI_BAD;
 				DT2(ipf_fi_bad_checkcipso_tag5_t2, fr_info_t *, fin, u_int, (*t + 2));
 				continue;
 			}
 			sensitivity = *(t + 3);
 			/* Range of categories (2*16bits), up to 7 pairs */
 
 		} else if (tag > 127) {
 			/* Custom defined DOI */
 			;
 		} else {
 			fin->fin_flx |= FI_BAD;
 			DT2(ipf_fi_bad_checkcipso_tag127, fr_info_t *, fin, u_int, tag);
 			continue;
 		}
 
 		if (sensitivity > fi->fi_sensitivity)
 			fi->fi_sensitivity = sensitivity;
 	}
 
 	return doi;
 }
 
 
 /* ------------------------------------------------------------------------ */
 /* Function:    ipf_makefrip                                                */
 /* Returns:     int     - 0 == packet ok, -1 == packet freed                */
 /* Parameters:  hlen(I) - length of IP packet header                        */
 /*              ip(I)   - pointer to the IP header                          */
 /*              fin(IO) - pointer to packet information                     */
 /*                                                                          */
 /* Compact the IP header into a structure which contains just the info.     */
 /* which is useful for comparing IP headers with and store this information */
 /* in the fr_info_t structure pointer to by fin.  At present, it is assumed */
 /* this function will be called with either an IPv4 or IPv6 packet.         */
 /* ------------------------------------------------------------------------ */
 int
 ipf_makefrip(hlen, ip, fin)
 	int hlen;
 	ip_t *ip;
 	fr_info_t *fin;
 {
 	ipf_main_softc_t *softc = fin->fin_main_soft;
 	int v;
 
 	fin->fin_depth = 0;
 	fin->fin_hlen = (u_short)hlen;
 	fin->fin_ip = ip;
 	fin->fin_rule = 0xffffffff;
 	fin->fin_group[0] = -1;
 	fin->fin_group[1] = '\0';
 	fin->fin_dp = (char *)ip + hlen;
 
 	v = fin->fin_v;
 	if (v == 4) {
 		fin->fin_plen = ntohs(ip->ip_len);
 		fin->fin_dlen = fin->fin_plen - hlen;
 		ipf_pr_ipv4hdr(fin);
 #ifdef	USE_INET6
 	} else if (v == 6) {
 		fin->fin_plen = ntohs(((ip6_t *)ip)->ip6_plen);
 		fin->fin_dlen = fin->fin_plen;
 		fin->fin_plen += hlen;
 
 		ipf_pr_ipv6hdr(fin);
 #endif
 	}
 	if (fin->fin_ip == NULL) {
 		LBUMP(ipf_stats[fin->fin_out].fr_ip_freed);
 		return -1;
 	}
 	return 0;
 }
 
 
 /* ------------------------------------------------------------------------ */
 /* Function:    ipf_portcheck                                               */
 /* Returns:     int - 1 == port matched, 0 == port match failed             */
 /* Parameters:  frp(I) - pointer to port check `expression'                 */
 /*              pop(I) - port number to evaluate                            */
 /*                                                                          */
 /* Perform a comparison of a port number against some other(s), using a     */
 /* structure with compare information stored in it.                         */
 /* ------------------------------------------------------------------------ */
 static INLINE int
 ipf_portcheck(frp, pop)
 	frpcmp_t *frp;
 	u_32_t pop;
 {
 	int err = 1;
 	u_32_t po;
 
 	po = frp->frp_port;
 
 	/*
 	 * Do opposite test to that required and continue if that succeeds.
 	 */
 	switch (frp->frp_cmp)
 	{
 	case FR_EQUAL :
 		if (pop != po) /* EQUAL */
 			err = 0;
 		break;
 	case FR_NEQUAL :
 		if (pop == po) /* NOTEQUAL */
 			err = 0;
 		break;
 	case FR_LESST :
 		if (pop >= po) /* LESSTHAN */
 			err = 0;
 		break;
 	case FR_GREATERT :
 		if (pop <= po) /* GREATERTHAN */
 			err = 0;
 		break;
 	case FR_LESSTE :
 		if (pop > po) /* LT or EQ */
 			err = 0;
 		break;
 	case FR_GREATERTE :
 		if (pop < po) /* GT or EQ */
 			err = 0;
 		break;
 	case FR_OUTRANGE :
 		if (pop >= po && pop <= frp->frp_top) /* Out of range */
 			err = 0;
 		break;
 	case FR_INRANGE :
 		if (pop <= po || pop >= frp->frp_top) /* In range */
 			err = 0;
 		break;
 	case FR_INCRANGE :
 		if (pop < po || pop > frp->frp_top) /* Inclusive range */
 			err = 0;
 		break;
 	default :
 		break;
 	}
 	return err;
 }
 
 
 /* ------------------------------------------------------------------------ */
 /* Function:    ipf_tcpudpchk                                               */
 /* Returns:     int - 1 == protocol matched, 0 == check failed              */
 /* Parameters:  fda(I) - pointer to packet information                      */
 /*              ft(I)  - pointer to structure with comparison data          */
 /*                                                                          */
 /* Compares the current pcket (assuming it is TCP/UDP) information with a   */
 /* structure containing information that we want to match against.          */
 /* ------------------------------------------------------------------------ */
 int
 ipf_tcpudpchk(fi, ft)
 	fr_ip_t *fi;
 	frtuc_t *ft;
 {
 	int err = 1;
 
 	/*
 	 * Both ports should *always* be in the first fragment.
 	 * So far, I cannot find any cases where they can not be.
 	 *
 	 * compare destination ports
 	 */
 	if (ft->ftu_dcmp)
 		err = ipf_portcheck(&ft->ftu_dst, fi->fi_ports[1]);
 
 	/*
 	 * compare source ports
 	 */
 	if (err && ft->ftu_scmp)
 		err = ipf_portcheck(&ft->ftu_src, fi->fi_ports[0]);
 
 	/*
 	 * If we don't have all the TCP/UDP header, then how can we
 	 * expect to do any sort of match on it ?  If we were looking for
 	 * TCP flags, then NO match.  If not, then match (which should
 	 * satisfy the "short" class too).
 	 */
 	if (err && (fi->fi_p == IPPROTO_TCP)) {
 		if (fi->fi_flx & FI_SHORT)
 			return !(ft->ftu_tcpf | ft->ftu_tcpfm);
 		/*
 		 * Match the flags ?  If not, abort this match.
 		 */
 		if (ft->ftu_tcpfm &&
 		    ft->ftu_tcpf != (fi->fi_tcpf & ft->ftu_tcpfm)) {
 			FR_DEBUG(("f. %#x & %#x != %#x\n", fi->fi_tcpf,
 				 ft->ftu_tcpfm, ft->ftu_tcpf));
 			err = 0;
 		}
 	}
 	return err;
 }
 
 
 /* ------------------------------------------------------------------------ */
 /* Function:    ipf_check_ipf                                               */
 /* Returns:     int - 0 == match, else no match                             */
 /* Parameters:  fin(I)     - pointer to packet information                  */
 /*              fr(I)      - pointer to filter rule                         */
 /*              portcmp(I) - flag indicating whether to attempt matching on */
 /*                           TCP/UDP port data.                             */
 /*                                                                          */
 /* Check to see if a packet matches an IPFilter rule.  Checks of addresses, */
 /* port numbers, etc, for "standard" IPFilter rules are all orchestrated in */
 /* this function.                                                           */
 /* ------------------------------------------------------------------------ */
 static INLINE int
 ipf_check_ipf(fin, fr, portcmp)
 	fr_info_t *fin;
 	frentry_t *fr;
 	int portcmp;
 {
 	u_32_t	*ld, *lm, *lip;
 	fripf_t *fri;
 	fr_ip_t *fi;
 	int i;
 
 	fi = &fin->fin_fi;
 	fri = fr->fr_ipf;
 	lip = (u_32_t *)fi;
 	lm = (u_32_t *)&fri->fri_mip;
 	ld = (u_32_t *)&fri->fri_ip;
 
 	/*
 	 * first 32 bits to check coversion:
 	 * IP version, TOS, TTL, protocol
 	 */
 	i = ((*lip & *lm) != *ld);
 	FR_DEBUG(("0. %#08x & %#08x != %#08x\n",
 		   ntohl(*lip), ntohl(*lm), ntohl(*ld)));
 	if (i)
 		return 1;
 
 	/*
 	 * Next 32 bits is a constructed bitmask indicating which IP options
 	 * are present (if any) in this packet.
 	 */
 	lip++, lm++, ld++;
 	i = ((*lip & *lm) != *ld);
 	FR_DEBUG(("1. %#08x & %#08x != %#08x\n",
 		   ntohl(*lip), ntohl(*lm), ntohl(*ld)));
 	if (i != 0)
 		return 1;
 
 	lip++, lm++, ld++;
 	/*
 	 * Unrolled loops (4 each, for 32 bits) for address checks.
 	 */
 	/*
 	 * Check the source address.
 	 */
 	if (fr->fr_satype == FRI_LOOKUP) {
 		i = (*fr->fr_srcfunc)(fin->fin_main_soft, fr->fr_srcptr,
 				      fi->fi_v, lip, fin->fin_plen);
 		if (i == -1)
 			return 1;
 		lip += 3;
 		lm += 3;
 		ld += 3;
 	} else {
 		i = ((*lip & *lm) != *ld);
 		FR_DEBUG(("2a. %#08x & %#08x != %#08x\n",
 			   ntohl(*lip), ntohl(*lm), ntohl(*ld)));
 		if (fi->fi_v == 6) {
 			lip++, lm++, ld++;
 			i |= ((*lip & *lm) != *ld);
 			FR_DEBUG(("2b. %#08x & %#08x != %#08x\n",
 				   ntohl(*lip), ntohl(*lm), ntohl(*ld)));
 			lip++, lm++, ld++;
 			i |= ((*lip & *lm) != *ld);
 			FR_DEBUG(("2c. %#08x & %#08x != %#08x\n",
 				   ntohl(*lip), ntohl(*lm), ntohl(*ld)));
 			lip++, lm++, ld++;
 			i |= ((*lip & *lm) != *ld);
 			FR_DEBUG(("2d. %#08x & %#08x != %#08x\n",
 				   ntohl(*lip), ntohl(*lm), ntohl(*ld)));
 		} else {
 			lip += 3;
 			lm += 3;
 			ld += 3;
 		}
 	}
 	i ^= (fr->fr_flags & FR_NOTSRCIP) >> 6;
 	if (i != 0)
 		return 1;
 
 	/*
 	 * Check the destination address.
 	 */
 	lip++, lm++, ld++;
 	if (fr->fr_datype == FRI_LOOKUP) {
 		i = (*fr->fr_dstfunc)(fin->fin_main_soft, fr->fr_dstptr,
 				      fi->fi_v, lip, fin->fin_plen);
 		if (i == -1)
 			return 1;
 		lip += 3;
 		lm += 3;
 		ld += 3;
 	} else {
 		i = ((*lip & *lm) != *ld);
 		FR_DEBUG(("3a. %#08x & %#08x != %#08x\n",
 			   ntohl(*lip), ntohl(*lm), ntohl(*ld)));
 		if (fi->fi_v == 6) {
 			lip++, lm++, ld++;
 			i |= ((*lip & *lm) != *ld);
 			FR_DEBUG(("3b. %#08x & %#08x != %#08x\n",
 				   ntohl(*lip), ntohl(*lm), ntohl(*ld)));
 			lip++, lm++, ld++;
 			i |= ((*lip & *lm) != *ld);
 			FR_DEBUG(("3c. %#08x & %#08x != %#08x\n",
 				   ntohl(*lip), ntohl(*lm), ntohl(*ld)));
 			lip++, lm++, ld++;
 			i |= ((*lip & *lm) != *ld);
 			FR_DEBUG(("3d. %#08x & %#08x != %#08x\n",
 				   ntohl(*lip), ntohl(*lm), ntohl(*ld)));
 		} else {
 			lip += 3;
 			lm += 3;
 			ld += 3;
 		}
 	}
 	i ^= (fr->fr_flags & FR_NOTDSTIP) >> 7;
 	if (i != 0)
 		return 1;
 	/*
 	 * IP addresses matched.  The next 32bits contains:
 	 * mast of old IP header security & authentication bits.
 	 */
 	lip++, lm++, ld++;
 	i = (*ld - (*lip & *lm));
 	FR_DEBUG(("4. %#08x & %#08x != %#08x\n", *lip, *lm, *ld));
 
 	/*
 	 * Next we have 32 bits of packet flags.
 	 */
 	lip++, lm++, ld++;
 	i |= (*ld - (*lip & *lm));
 	FR_DEBUG(("5. %#08x & %#08x != %#08x\n", *lip, *lm, *ld));
 
 	if (i == 0) {
 		/*
 		 * If a fragment, then only the first has what we're
 		 * looking for here...
 		 */
 		if (portcmp) {
 			if (!ipf_tcpudpchk(&fin->fin_fi, &fr->fr_tuc))
 				i = 1;
 		} else {
 			if (fr->fr_dcmp || fr->fr_scmp ||
 			    fr->fr_tcpf || fr->fr_tcpfm)
 				i = 1;
 			if (fr->fr_icmpm || fr->fr_icmp) {
 				if (((fi->fi_p != IPPROTO_ICMP) &&
 				     (fi->fi_p != IPPROTO_ICMPV6)) ||
 				    fin->fin_off || (fin->fin_dlen < 2))
 					i = 1;
 				else if ((fin->fin_data[0] & fr->fr_icmpm) !=
 					 fr->fr_icmp) {
 					FR_DEBUG(("i. %#x & %#x != %#x\n",
 						 fin->fin_data[0],
 						 fr->fr_icmpm, fr->fr_icmp));
 					i = 1;
 				}
 			}
 		}
 	}
 	return i;
 }
 
 
 /* ------------------------------------------------------------------------ */
 /* Function:    ipf_scanlist                                                */
 /* Returns:     int - result flags of scanning filter list                  */
 /* Parameters:  fin(I) - pointer to packet information                      */
 /*              pass(I) - default result to return for filtering            */
 /*                                                                          */
 /* Check the input/output list of rules for a match to the current packet.  */
 /* If a match is found, the value of fr_flags from the rule becomes the     */
 /* return value and fin->fin_fr points to the matched rule.                 */
 /*                                                                          */
 /* This function may be called recusively upto 16 times (limit inbuilt.)    */
 /* When unwinding, it should finish up with fin_depth as 0.                 */
 /*                                                                          */
 /* Could be per interface, but this gets real nasty when you don't have,    */
 /* or can't easily change, the kernel source code to .                      */
 /* ------------------------------------------------------------------------ */
 int
 ipf_scanlist(fin, pass)
 	fr_info_t *fin;
 	u_32_t pass;
 {
 	ipf_main_softc_t *softc = fin->fin_main_soft;
 	int rulen, portcmp, off, skip;
 	struct frentry *fr, *fnext;
 	u_32_t passt, passo;
 
 	/*
 	 * Do not allow nesting deeper than 16 levels.
 	 */
 	if (fin->fin_depth >= 16)
 		return pass;
 
 	fr = fin->fin_fr;
 
 	/*
 	 * If there are no rules in this list, return now.
 	 */
 	if (fr == NULL)
 		return pass;
 
 	skip = 0;
 	portcmp = 0;
 	fin->fin_depth++;
 	fin->fin_fr = NULL;
 	off = fin->fin_off;
 
 	if ((fin->fin_flx & FI_TCPUDP) && (fin->fin_dlen > 3) && !off)
 		portcmp = 1;
 
 	for (rulen = 0; fr; fr = fnext, rulen++) {
 		fnext = fr->fr_next;
 		if (skip != 0) {
 			FR_VERBOSE(("SKIP %d (%#x)\n", skip, fr->fr_flags));
 			skip--;
 			continue;
 		}
 
 		/*
 		 * In all checks below, a null (zero) value in the
 		 * filter struture is taken to mean a wildcard.
 		 *
 		 * check that we are working for the right interface
 		 */
 #ifdef	_KERNEL
 		if (fr->fr_ifa && fr->fr_ifa != fin->fin_ifp)
 			continue;
 #else
 		if (opts & (OPT_VERBOSE|OPT_DEBUG))
 			printf("\n");
 		FR_VERBOSE(("%c", FR_ISSKIP(pass) ? 's' :
 				  FR_ISPASS(pass) ? 'p' :
 				  FR_ISACCOUNT(pass) ? 'A' :
 				  FR_ISAUTH(pass) ? 'a' :
 				  (pass & FR_NOMATCH) ? 'n' :'b'));
 		if (fr->fr_ifa && fr->fr_ifa != fin->fin_ifp)
 			continue;
 		FR_VERBOSE((":i"));
 #endif
 
 		switch (fr->fr_type)
 		{
 		case FR_T_IPF :
 		case FR_T_IPF_BUILTIN :
 			if (ipf_check_ipf(fin, fr, portcmp))
 				continue;
 			break;
 #if defined(IPFILTER_BPF)
 		case FR_T_BPFOPC :
 		case FR_T_BPFOPC_BUILTIN :
 		    {
 			u_char *mc;
 			int wlen;
 
 			if (*fin->fin_mp == NULL)
 				continue;
 			if (fin->fin_family != fr->fr_family)
 				continue;
 			mc = (u_char *)fin->fin_m;
 			wlen = fin->fin_dlen + fin->fin_hlen;
 			if (!bpf_filter(fr->fr_data, mc, wlen, 0))
 				continue;
 			break;
 		    }
 #endif
 		case FR_T_CALLFUNC_BUILTIN :
 		    {
 			frentry_t *f;
 
 			f = (*fr->fr_func)(fin, &pass);
 			if (f != NULL)
 				fr = f;
 			else
 				continue;
 			break;
 		    }
 
 		case FR_T_IPFEXPR :
 		case FR_T_IPFEXPR_BUILTIN :
 			if (fin->fin_family != fr->fr_family)
 				continue;
 			if (ipf_fr_matcharray(fin, fr->fr_data) == 0)
 				continue;
 			break;
 
 		default :
 			break;
 		}
 
 		if ((fin->fin_out == 0) && (fr->fr_nattag.ipt_num[0] != 0)) {
 			if (fin->fin_nattag == NULL)
 				continue;
 			if (ipf_matchtag(&fr->fr_nattag, fin->fin_nattag) == 0)
 				continue;
 		}
 		FR_VERBOSE(("=%d/%d.%d *", fr->fr_grhead, fr->fr_group, rulen));
 
 		passt = fr->fr_flags;
 
 		/*
 		 * If the rule is a "call now" rule, then call the function
 		 * in the rule, if it exists and use the results from that.
 		 * If the function pointer is bad, just make like we ignore
 		 * it, except for increasing the hit counter.
 		 */
 		if ((passt & FR_CALLNOW) != 0) {
 			frentry_t *frs;
 
 			ATOMIC_INC64(fr->fr_hits);
 			if ((fr->fr_func == NULL) ||
 			    (fr->fr_func == (ipfunc_t)-1))
 				continue;
 
 			frs = fin->fin_fr;
 			fin->fin_fr = fr;
 			fr = (*fr->fr_func)(fin, &passt);
 			if (fr == NULL) {
 				fin->fin_fr = frs;
 				continue;
 			}
 			passt = fr->fr_flags;
 		}
 		fin->fin_fr = fr;
 
 #ifdef  IPFILTER_LOG
 		/*
 		 * Just log this packet...
 		 */
 		if ((passt & FR_LOGMASK) == FR_LOG) {
 			if (ipf_log_pkt(fin, passt) == -1) {
 				if (passt & FR_LOGORBLOCK) {
 					DT(frb_logfail);
 					passt &= ~FR_CMDMASK;
 					passt |= FR_BLOCK|FR_QUICK;
 					fin->fin_reason = FRB_LOGFAIL;
 				}
 			}
 		}
 #endif /* IPFILTER_LOG */
 
 		MUTEX_ENTER(&fr->fr_lock);
 		fr->fr_bytes += (U_QUAD_T)fin->fin_plen;
 		fr->fr_hits++;
 		MUTEX_EXIT(&fr->fr_lock);
 		fin->fin_rule = rulen;
 
 		passo = pass;
 		if (FR_ISSKIP(passt)) {
 			skip = fr->fr_arg;
 			continue;
 		} else if (((passt & FR_LOGMASK) != FR_LOG) &&
 			   ((passt & FR_LOGMASK) != FR_DECAPSULATE)) {
 			pass = passt;
 		}
 
 		if (passt & (FR_RETICMP|FR_FAKEICMP))
 			fin->fin_icode = fr->fr_icode;
 
 		if (fr->fr_group != -1) {
 			(void) strncpy(fin->fin_group,
 				       FR_NAME(fr, fr_group),
 				       strlen(FR_NAME(fr, fr_group)));
 		} else {
 			fin->fin_group[0] = '\0';
 		}
 
 		FR_DEBUG(("pass %#x/%#x/%x\n", passo, pass, passt));
 
 		if (fr->fr_grphead != NULL) {
 			fin->fin_fr = fr->fr_grphead->fg_start;
 			FR_VERBOSE(("group %s\n", FR_NAME(fr, fr_grhead)));
 
 			if (FR_ISDECAPS(passt))
 				passt = ipf_decaps(fin, pass, fr->fr_icode);
 			else
 				passt = ipf_scanlist(fin, pass);
 
 			if (fin->fin_fr == NULL) {
 				fin->fin_rule = rulen;
 				if (fr->fr_group != -1)
 					(void) strncpy(fin->fin_group,
 						       fr->fr_names +
 						       fr->fr_group,
 						       strlen(fr->fr_names +
 							      fr->fr_group));
 				fin->fin_fr = fr;
 				passt = pass;
 			}
 			pass = passt;
 		}
 
 		if (pass & FR_QUICK) {
 			/*
 			 * Finally, if we've asked to track state for this
 			 * packet, set it up.  Add state for "quick" rules
 			 * here so that if the action fails we can consider
 			 * the rule to "not match" and keep on processing
 			 * filter rules.
 			 */
 			if ((pass & FR_KEEPSTATE) && !FR_ISAUTH(pass) &&
 			    !(fin->fin_flx & FI_STATE)) {
 				int out = fin->fin_out;
 
 				fin->fin_fr = fr;
 				if (ipf_state_add(softc, fin, NULL, 0) == 0) {
 					LBUMPD(ipf_stats[out], fr_ads);
 				} else {
 					LBUMPD(ipf_stats[out], fr_bads);
 					pass = passo;
 					continue;
 				}
 			}
 			break;
 		}
 	}
 	fin->fin_depth--;
 	return pass;
 }
 
 
 /* ------------------------------------------------------------------------ */
 /* Function:    ipf_acctpkt                                                 */
 /* Returns:     frentry_t* - always returns NULL                            */
 /* Parameters:  fin(I) - pointer to packet information                      */
 /*              passp(IO) - pointer to current/new filter decision (unused) */
 /*                                                                          */
 /* Checks a packet against accounting rules, if there are any for the given */
 /* IP protocol version.                                                     */
 /*                                                                          */
 /* N.B.: this function returns NULL to match the prototype used by other    */
 /* functions called from the IPFilter "mainline" in ipf_check().            */
 /* ------------------------------------------------------------------------ */
 frentry_t *
 ipf_acctpkt(fin, passp)
 	fr_info_t *fin;
 	u_32_t *passp;
 {
 	ipf_main_softc_t *softc = fin->fin_main_soft;
 	char group[FR_GROUPLEN];
 	frentry_t *fr, *frsave;
 	u_32_t pass, rulen;
 
 	passp = passp;
 	fr = softc->ipf_acct[fin->fin_out][softc->ipf_active];
 
 	if (fr != NULL) {
 		frsave = fin->fin_fr;
 		bcopy(fin->fin_group, group, FR_GROUPLEN);
 		rulen = fin->fin_rule;
 		fin->fin_fr = fr;
 		pass = ipf_scanlist(fin, FR_NOMATCH);
 		if (FR_ISACCOUNT(pass)) {
 			LBUMPD(ipf_stats[0], fr_acct);
 		}
 		fin->fin_fr = frsave;
 		bcopy(group, fin->fin_group, FR_GROUPLEN);
 		fin->fin_rule = rulen;
 	}
 	return NULL;
 }
 
 
 /* ------------------------------------------------------------------------ */
 /* Function:    ipf_firewall                                                */
 /* Returns:     frentry_t* - returns pointer to matched rule, if no matches */
 /*                           were found, returns NULL.                      */
 /* Parameters:  fin(I) - pointer to packet information                      */
 /*              passp(IO) - pointer to current/new filter decision (unused) */
 /*                                                                          */
 /* Applies an appropriate set of firewall rules to the packet, to see if    */
 /* there are any matches.  The first check is to see if a match can be seen */
 /* in the cache.  If not, then search an appropriate list of rules.  Once a */
 /* matching rule is found, take any appropriate actions as defined by the   */
 /* rule - except logging.                                                   */
 /* ------------------------------------------------------------------------ */
 static frentry_t *
 ipf_firewall(fin, passp)
 	fr_info_t *fin;
 	u_32_t *passp;
 {
 	ipf_main_softc_t *softc = fin->fin_main_soft;
 	frentry_t *fr;
 	u_32_t pass;
 	int out;
 
 	out = fin->fin_out;
 	pass = *passp;
 
 	/*
 	 * This rule cache will only affect packets that are not being
 	 * statefully filtered.
 	 */
 	fin->fin_fr = softc->ipf_rules[out][softc->ipf_active];
 	if (fin->fin_fr != NULL)
 		pass = ipf_scanlist(fin, softc->ipf_pass);
 
 	if ((pass & FR_NOMATCH)) {
 		LBUMPD(ipf_stats[out], fr_nom);
 	}
 	fr = fin->fin_fr;
 
 	/*
 	 * Apply packets per second rate-limiting to a rule as required.
 	 */
 	if ((fr != NULL) && (fr->fr_pps != 0) &&
 	    !ppsratecheck(&fr->fr_lastpkt, &fr->fr_curpps, fr->fr_pps)) {
 		DT2(frb_ppsrate, fr_info_t *, fin, frentry_t *, fr);
 		pass &= ~(FR_CMDMASK|FR_RETICMP|FR_RETRST);
 		pass |= FR_BLOCK;
 		LBUMPD(ipf_stats[out], fr_ppshit);
 		fin->fin_reason = FRB_PPSRATE;
 	}
 
 	/*
 	 * If we fail to add a packet to the authorization queue, then we
 	 * drop the packet later.  However, if it was added then pretend
 	 * we've dropped it already.
 	 */
 	if (FR_ISAUTH(pass)) {
 		if (ipf_auth_new(fin->fin_m, fin) != 0) {
 			DT1(frb_authnew, fr_info_t *, fin);
 			fin->fin_m = *fin->fin_mp = NULL;
 			fin->fin_reason = FRB_AUTHNEW;
 			fin->fin_error = 0;
 		} else {
 			IPFERROR(1);
 			fin->fin_error = ENOSPC;
 		}
 	}
 
 	if ((fr != NULL) && (fr->fr_func != NULL) &&
 	    (fr->fr_func != (ipfunc_t)-1) && !(pass & FR_CALLNOW))
 		(void) (*fr->fr_func)(fin, &pass);
 
 	/*
 	 * If a rule is a pre-auth rule, check again in the list of rules
 	 * loaded for authenticated use.  It does not particulary matter
 	 * if this search fails because a "preauth" result, from a rule,
 	 * is treated as "not a pass", hence the packet is blocked.
 	 */
 	if (FR_ISPREAUTH(pass)) {
 		pass = ipf_auth_pre_scanlist(softc, fin, pass);
 	}
 
 	/*
 	 * If the rule has "keep frag" and the packet is actually a fragment,
 	 * then create a fragment state entry.
 	 */
 	if ((pass & (FR_KEEPFRAG|FR_KEEPSTATE)) == FR_KEEPFRAG) {
 		if (fin->fin_flx & FI_FRAG) {
 			if (ipf_frag_new(softc, fin, pass) == -1) {
 				LBUMP(ipf_stats[out].fr_bnfr);
 			} else {
 				LBUMP(ipf_stats[out].fr_nfr);
 			}
 		} else {
 			LBUMP(ipf_stats[out].fr_cfr);
 		}
 	}
 
 	fr = fin->fin_fr;
 	*passp = pass;
 
 	return fr;
 }
 
 
 /* ------------------------------------------------------------------------ */
 /* Function:    ipf_check                                                   */
 /* Returns:     int -  0 == packet allowed through,                         */
 /*              User space:                                                 */
 /*                    -1 == packet blocked                                  */
 /*                     1 == packet not matched                              */
 /*                    -2 == requires authentication                         */
 /*              Kernel:                                                     */
 /*                   > 0 == filter error # for packet                       */
 /* Parameters: ip(I)   - pointer to start of IPv4/6 packet                  */
 /*             hlen(I) - length of header                                   */
 /*             ifp(I)  - pointer to interface this packet is on             */
 /*             out(I)  - 0 == packet going in, 1 == packet going out        */
 /*             mp(IO)  - pointer to caller's buffer pointer that holds this */
 /*                       IP packet.                                         */
 /* Solaris & HP-UX ONLY :                                                   */
 /*             qpi(I)  - pointer to STREAMS queue information for this      */
 /*                       interface & direction.                             */
 /*                                                                          */
 /* ipf_check() is the master function for all IPFilter packet processing.   */
 /* It orchestrates: Network Address Translation (NAT), checking for packet  */
 /* authorisation (or pre-authorisation), presence of related state info.,   */
 /* generating log entries, IP packet accounting, routing of packets as      */
 /* directed by firewall rules and of course whether or not to allow the     */
 /* packet to be further processed by the kernel.                            */
 /*                                                                          */
 /* For packets blocked, the contents of "mp" will be NULL'd and the buffer  */
 /* freed.  Packets passed may be returned with the pointer pointed to by    */
 /* by "mp" changed to a new buffer.                                         */
 /* ------------------------------------------------------------------------ */
 int
 ipf_check(ctx, ip, hlen, ifp, out
 #if defined(_KERNEL) && defined(MENTAT)
 	, qif, mp)
 	void *qif;
 #else
 	, mp)
 #endif
 	mb_t **mp;
 	ip_t *ip;
 	int hlen;
 	void *ifp;
 	int out;
 	void *ctx;
 {
 	/*
 	 * The above really sucks, but short of writing a diff
 	 */
 	ipf_main_softc_t *softc = ctx;
 	fr_info_t frinfo;
 	fr_info_t *fin = &frinfo;
 	u_32_t pass = softc->ipf_pass;
 	frentry_t *fr = NULL;
 	int v = IP_V(ip);
 	mb_t *mc = NULL;
 	mb_t *m;
 	/*
 	 * The first part of ipf_check() deals with making sure that what goes
 	 * into the filtering engine makes some sense.  Information about the
 	 * the packet is distilled, collected into a fr_info_t structure and
 	 * the an attempt to ensure the buffer the packet is in is big enough
 	 * to hold all the required packet headers.
 	 */
 #ifdef	_KERNEL
 # ifdef MENTAT
 	qpktinfo_t *qpi = qif;
 
 #  ifdef __sparc
 	if ((u_int)ip & 0x3)
 		return 2;
 #  endif
 # else
 	SPL_INT(s);
 # endif
 
 	if (softc->ipf_running <= 0) {
 		return 0;
 	}
 
 	bzero((char *)fin, sizeof(*fin));
 
 # ifdef MENTAT
 	if (qpi->qpi_flags & QF_BROADCAST)
 		fin->fin_flx |= FI_MBCAST|FI_BROADCAST;
 	if (qpi->qpi_flags & QF_MULTICAST)
 		fin->fin_flx |= FI_MBCAST|FI_MULTICAST;
 	m = qpi->qpi_m;
 	fin->fin_qfm = m;
 	fin->fin_qpi = qpi;
 # else /* MENTAT */
 
 	m = *mp;
 
 #  if defined(M_MCAST)
 	if ((m->m_flags & M_MCAST) != 0)
 		fin->fin_flx |= FI_MBCAST|FI_MULTICAST;
 #  endif
 #  if defined(M_MLOOP)
 	if ((m->m_flags & M_MLOOP) != 0)
 		fin->fin_flx |= FI_MBCAST|FI_MULTICAST;
 #  endif
 #  if defined(M_BCAST)
 	if ((m->m_flags & M_BCAST) != 0)
 		fin->fin_flx |= FI_MBCAST|FI_BROADCAST;
 #  endif
 #  ifdef M_CANFASTFWD
 	/*
 	 * XXX For now, IP Filter and fast-forwarding of cached flows
 	 * XXX are mutually exclusive.  Eventually, IP Filter should
 	 * XXX get a "can-fast-forward" filter rule.
 	 */
 	m->m_flags &= ~M_CANFASTFWD;
 #  endif /* M_CANFASTFWD */
 #  if defined(CSUM_DELAY_DATA) && (!defined(__FreeBSD_version) || \
 				   (__FreeBSD_version < 501108))
 	/*
 	 * disable delayed checksums.
 	 */
 	if (m->m_pkthdr.csum_flags & CSUM_DELAY_DATA) {
 		in_delayed_cksum(m);
 		m->m_pkthdr.csum_flags &= ~CSUM_DELAY_DATA;
 	}
 #  endif /* CSUM_DELAY_DATA */
 # endif /* MENTAT */
 #else
 	bzero((char *)fin, sizeof(*fin));
 	m = *mp;
 # if defined(M_MCAST)
 	if ((m->m_flags & M_MCAST) != 0)
 		fin->fin_flx |= FI_MBCAST|FI_MULTICAST;
 # endif
 # if defined(M_MLOOP)
 	if ((m->m_flags & M_MLOOP) != 0)
 		fin->fin_flx |= FI_MBCAST|FI_MULTICAST;
 # endif
 # if defined(M_BCAST)
 	if ((m->m_flags & M_BCAST) != 0)
 		fin->fin_flx |= FI_MBCAST|FI_BROADCAST;
 # endif
 #endif /* _KERNEL */
 
 	fin->fin_v = v;
 	fin->fin_m = m;
 	fin->fin_ip = ip;
 	fin->fin_mp = mp;
 	fin->fin_out = out;
 	fin->fin_ifp = ifp;
 	fin->fin_error = ENETUNREACH;
 	fin->fin_hlen = (u_short)hlen;
 	fin->fin_dp = (char *)ip + hlen;
 	fin->fin_main_soft = softc;
 
 	fin->fin_ipoff = (char *)ip - MTOD(m, char *);
 
 	SPL_NET(s);
 
 #ifdef	USE_INET6
 	if (v == 6) {
 		LBUMP(ipf_stats[out].fr_ipv6);
 		/*
 		 * Jumbo grams are quite likely too big for internal buffer
 		 * structures to handle comfortably, for now, so just drop
 		 * them.
 		 */
 		if (((ip6_t *)ip)->ip6_plen == 0) {
 			DT1(frb_jumbo, ip6_t *, (ip6_t *)ip);
 			pass = FR_BLOCK|FR_NOMATCH;
 			fin->fin_reason = FRB_JUMBO;
 			goto finished;
 		}
 		fin->fin_family = AF_INET6;
 	} else
 #endif
 	{
 		fin->fin_family = AF_INET;
 	}
 
 	if (ipf_makefrip(hlen, ip, fin) == -1) {
 		DT1(frb_makefrip, fr_info_t *, fin);
 		pass = FR_BLOCK|FR_NOMATCH;
 		fin->fin_reason = FRB_MAKEFRIP;
 		goto finished;
 	}
 
 	/*
 	 * For at least IPv6 packets, if a m_pullup() fails then this pointer
 	 * becomes NULL and so we have no packet to free.
 	 */
 	if (*fin->fin_mp == NULL)
 		goto finished;
 
 	if (!out) {
 		if (v == 4) {
 			if (softc->ipf_chksrc && !ipf_verifysrc(fin)) {
 				LBUMPD(ipf_stats[0], fr_v4_badsrc);
 				fin->fin_flx |= FI_BADSRC;
 			}
 			if (fin->fin_ip->ip_ttl < softc->ipf_minttl) {
 				LBUMPD(ipf_stats[0], fr_v4_badttl);
 				fin->fin_flx |= FI_LOWTTL;
 			}
 		}
 #ifdef USE_INET6
 		else  if (v == 6) {
 			if (((ip6_t *)ip)->ip6_hlim < softc->ipf_minttl) {
 				LBUMPD(ipf_stats[0], fr_v6_badttl);
 				fin->fin_flx |= FI_LOWTTL;
 			}
 		}
 #endif
 	}
 
 	if (fin->fin_flx & FI_SHORT) {
 		LBUMPD(ipf_stats[out], fr_short);
 	}
 
 	READ_ENTER(&softc->ipf_mutex);
 
 	if (!out) {
 		switch (fin->fin_v)
 		{
 		case 4 :
 			if (ipf_nat_checkin(fin, &pass) == -1) {
 				goto filterdone;
 			}
 			break;
 #ifdef USE_INET6
 		case 6 :
 			if (ipf_nat6_checkin(fin, &pass) == -1) {
 				goto filterdone;
 			}
 			break;
 #endif
 		default :
 			break;
 		}
 	}
 	/*
 	 * Check auth now.
 	 * If a packet is found in the auth table, then skip checking
 	 * the access lists for permission but we do need to consider
 	 * the result as if it were from the ACL's.  In addition, being
 	 * found in the auth table means it has been seen before, so do
 	 * not pass it through accounting (again), lest it be counted twice.
 	 */
 	fr = ipf_auth_check(fin, &pass);
 	if (!out && (fr == NULL))
 		(void) ipf_acctpkt(fin, NULL);
 
 	if (fr == NULL) {
 		if ((fin->fin_flx & FI_FRAG) != 0)
 			fr = ipf_frag_known(fin, &pass);
 
 		if (fr == NULL)
 			fr = ipf_state_check(fin, &pass);
 	}
 
 	if ((pass & FR_NOMATCH) || (fr == NULL))
 		fr = ipf_firewall(fin, &pass);
 
 	/*
 	 * If we've asked to track state for this packet, set it up.
 	 * Here rather than ipf_firewall because ipf_checkauth may decide
 	 * to return a packet for "keep state"
 	 */
 	if ((pass & FR_KEEPSTATE) && (fin->fin_m != NULL) &&
 	    !(fin->fin_flx & FI_STATE)) {
 		if (ipf_state_add(softc, fin, NULL, 0) == 0) {
 			LBUMP(ipf_stats[out].fr_ads);
 		} else {
 			LBUMP(ipf_stats[out].fr_bads);
 			if (FR_ISPASS(pass)) {
 				DT(frb_stateadd);
 				pass &= ~FR_CMDMASK;
 				pass |= FR_BLOCK;
 				fin->fin_reason = FRB_STATEADD;
 			}
 		}
 	}
 
 	fin->fin_fr = fr;
 	if ((fr != NULL) && !(fin->fin_flx & FI_STATE)) {
 		fin->fin_dif = &fr->fr_dif;
 		fin->fin_tif = &fr->fr_tifs[fin->fin_rev];
 	}
 
 	/*
 	 * Only count/translate packets which will be passed on, out the
 	 * interface.
 	 */
 	if (out && FR_ISPASS(pass)) {
 		(void) ipf_acctpkt(fin, NULL);
 
 		switch (fin->fin_v)
 		{
 		case 4 :
 			if (ipf_nat_checkout(fin, &pass) == -1) {
 				;
 			} else if ((softc->ipf_update_ipid != 0) && (v == 4)) {
 				if (ipf_updateipid(fin) == -1) {
 					DT(frb_updateipid);
 					LBUMP(ipf_stats[1].fr_ipud);
 					pass &= ~FR_CMDMASK;
 					pass |= FR_BLOCK;
 					fin->fin_reason = FRB_UPDATEIPID;
 				} else {
 					LBUMP(ipf_stats[0].fr_ipud);
 				}
 			}
 			break;
 #ifdef USE_INET6
 		case 6 :
 			(void) ipf_nat6_checkout(fin, &pass);
 			break;
 #endif
 		default :
 			break;
 		}
 	}
 
 filterdone:
 #ifdef	IPFILTER_LOG
 	if ((softc->ipf_flags & FF_LOGGING) || (pass & FR_LOGMASK)) {
 		(void) ipf_dolog(fin, &pass);
 	}
 #endif
 
 	/*
 	 * The FI_STATE flag is cleared here so that calling ipf_state_check
 	 * will work when called from inside of fr_fastroute.  Although
 	 * there is a similar flag, FI_NATED, for NAT, it does have the same
 	 * impact on code execution.
 	 */
 	fin->fin_flx &= ~FI_STATE;
 
 #if defined(FASTROUTE_RECURSION)
 	/*
 	 * Up the reference on fr_lock and exit ipf_mutex. The generation of
 	 * a packet below can sometimes cause a recursive call into IPFilter.
 	 * On those platforms where that does happen, we need to hang onto
 	 * the filter rule just in case someone decides to remove or flush it
 	 * in the meantime.
 	 */
 	if (fr != NULL) {
 		MUTEX_ENTER(&fr->fr_lock);
 		fr->fr_ref++;
 		MUTEX_EXIT(&fr->fr_lock);
 	}
 
 	RWLOCK_EXIT(&softc->ipf_mutex);
 #endif
 
 	if ((pass & FR_RETMASK) != 0) {
 		/*
 		 * Should we return an ICMP packet to indicate error
 		 * status passing through the packet filter ?
 		 * WARNING: ICMP error packets AND TCP RST packets should
 		 * ONLY be sent in repsonse to incoming packets.  Sending
 		 * them in response to outbound packets can result in a
 		 * panic on some operating systems.
 		 */
 		if (!out) {
 			if (pass & FR_RETICMP) {
 				int dst;
 
 				if ((pass & FR_RETMASK) == FR_FAKEICMP)
 					dst = 1;
 				else
 					dst = 0;
 				(void) ipf_send_icmp_err(ICMP_UNREACH, fin,
 							 dst);
 				LBUMP(ipf_stats[0].fr_ret);
 			} else if (((pass & FR_RETMASK) == FR_RETRST) &&
 				   !(fin->fin_flx & FI_SHORT)) {
 				if (((fin->fin_flx & FI_OOW) != 0) ||
 				    (ipf_send_reset(fin) == 0)) {
 					LBUMP(ipf_stats[1].fr_ret);
 				}
 			}
 
 			/*
 			 * When using return-* with auth rules, the auth code
 			 * takes over disposing of this packet.
 			 */
 			if (FR_ISAUTH(pass) && (fin->fin_m != NULL)) {
 				DT1(frb_authcapture, fr_info_t *, fin);
 				fin->fin_m = *fin->fin_mp = NULL;
 				fin->fin_reason = FRB_AUTHCAPTURE;
 				m = NULL;
 			}
 		} else {
 			if (pass & FR_RETRST) {
 				fin->fin_error = ECONNRESET;
 			}
 		}
 	}
 
 	/*
 	 * After the above so that ICMP unreachables and TCP RSTs get
 	 * created properly.
 	 */
 	if (FR_ISBLOCK(pass) && (fin->fin_flx & FI_NEWNAT))
 		ipf_nat_uncreate(fin);
 
 	/*
 	 * If we didn't drop off the bottom of the list of rules (and thus
 	 * the 'current' rule fr is not NULL), then we may have some extra
 	 * instructions about what to do with a packet.
 	 * Once we're finished return to our caller, freeing the packet if
 	 * we are dropping it.
 	 */
 	if (fr != NULL) {
 		frdest_t *fdp;
 
 		/*
 		 * Generate a duplicated packet first because ipf_fastroute
 		 * can lead to fin_m being free'd... not good.
 		 */
 		fdp = fin->fin_dif;
 		if ((fdp != NULL) && (fdp->fd_ptr != NULL) &&
 		    (fdp->fd_ptr != (void *)-1)) {
-			mc = M_COPY(fin->fin_m);
+			mc = M_COPYM(fin->fin_m);
 			if (mc != NULL)
 				ipf_fastroute(mc, &mc, fin, fdp);
 		}
 
 		fdp = fin->fin_tif;
 		if (!out && (pass & FR_FASTROUTE)) {
 			/*
 			 * For fastroute rule, no destination interface defined
 			 * so pass NULL as the frdest_t parameter
 			 */
 			(void) ipf_fastroute(fin->fin_m, mp, fin, NULL);
 			m = *mp = NULL;
 		} else if ((fdp != NULL) && (fdp->fd_ptr != NULL) &&
 			   (fdp->fd_ptr != (struct ifnet *)-1)) {
 			/* this is for to rules: */
 			ipf_fastroute(fin->fin_m, mp, fin, fdp);
 			m = *mp = NULL;
 		}
 
 #if defined(FASTROUTE_RECURSION)
 		(void) ipf_derefrule(softc, &fr);
 #endif
 	}
 #if !defined(FASTROUTE_RECURSION)
 	RWLOCK_EXIT(&softc->ipf_mutex);
 #endif
 
 finished:
 	if (!FR_ISPASS(pass)) {
 		LBUMP(ipf_stats[out].fr_block);
 		if (*mp != NULL) {
 #ifdef _KERNEL
 			FREE_MB_T(*mp);
 #endif
 			m = *mp = NULL;
 		}
 	} else {
 		LBUMP(ipf_stats[out].fr_pass);
 #if defined(_KERNEL) && defined(__sgi)
 		if ((fin->fin_hbuf != NULL) &&
 		    (mtod(fin->fin_m, struct ip *) != fin->fin_ip)) {
 			COPYBACK(fin->fin_m, 0, fin->fin_plen, fin->fin_hbuf);
 		}
 #endif
 	}
 
 	SPL_X(s);
 
 #ifdef _KERNEL
 	if (FR_ISPASS(pass))
 		return 0;
 	LBUMP(ipf_stats[out].fr_blocked[fin->fin_reason]);
 	return fin->fin_error;
 #else /* _KERNEL */
 	if (*mp != NULL)
 		(*mp)->mb_ifp = fin->fin_ifp;
 	blockreason = fin->fin_reason;
 	FR_VERBOSE(("fin_flx %#x pass %#x ", fin->fin_flx, pass));
 	/*if ((pass & FR_CMDMASK) == (softc->ipf_pass & FR_CMDMASK))*/
 		if ((pass & FR_NOMATCH) != 0)
 			return 1;
 
 	if ((pass & FR_RETMASK) != 0)
 		switch (pass & FR_RETMASK)
 		{
 		case FR_RETRST :
 			return 3;
 		case FR_RETICMP :
 			return 4;
 		case FR_FAKEICMP :
 			return 5;
 		}
 
 	switch (pass & FR_CMDMASK)
 	{
 	case FR_PASS :
 		return 0;
 	case FR_BLOCK :
 		return -1;
 	case FR_AUTH :
 		return -2;
 	case FR_ACCOUNT :
 		return -3;
 	case FR_PREAUTH :
 		return -4;
 	}
 	return 2;
 #endif /* _KERNEL */
 }
 
 
 #ifdef	IPFILTER_LOG
 /* ------------------------------------------------------------------------ */
 /* Function:    ipf_dolog                                                   */
 /* Returns:     frentry_t* - returns contents of fin_fr (no change made)    */
 /* Parameters:  fin(I) - pointer to packet information                      */
 /*              passp(IO) - pointer to current/new filter decision (unused) */
 /*                                                                          */
 /* Checks flags set to see how a packet should be logged, if it is to be    */
 /* logged.  Adjust statistics based on its success or not.                  */
 /* ------------------------------------------------------------------------ */
 frentry_t *
 ipf_dolog(fin, passp)
 	fr_info_t *fin;
 	u_32_t *passp;
 {
 	ipf_main_softc_t *softc = fin->fin_main_soft;
 	u_32_t pass;
 	int out;
 
 	out = fin->fin_out;
 	pass = *passp;
 
 	if ((softc->ipf_flags & FF_LOGNOMATCH) && (pass & FR_NOMATCH)) {
 		pass |= FF_LOGNOMATCH;
 		LBUMPD(ipf_stats[out], fr_npkl);
 		goto logit;
 
 	} else if (((pass & FR_LOGMASK) == FR_LOGP) ||
 	    (FR_ISPASS(pass) && (softc->ipf_flags & FF_LOGPASS))) {
 		if ((pass & FR_LOGMASK) != FR_LOGP)
 			pass |= FF_LOGPASS;
 		LBUMPD(ipf_stats[out], fr_ppkl);
 		goto logit;
 
 	} else if (((pass & FR_LOGMASK) == FR_LOGB) ||
 		   (FR_ISBLOCK(pass) && (softc->ipf_flags & FF_LOGBLOCK))) {
 		if ((pass & FR_LOGMASK) != FR_LOGB)
 			pass |= FF_LOGBLOCK;
 		LBUMPD(ipf_stats[out], fr_bpkl);
 
 logit:
 		if (ipf_log_pkt(fin, pass) == -1) {
 			/*
 			 * If the "or-block" option has been used then
 			 * block the packet if we failed to log it.
 			 */
 			if ((pass & FR_LOGORBLOCK) && FR_ISPASS(pass)) {
 				DT1(frb_logfail2, u_int, pass);
 				pass &= ~FR_CMDMASK;
 				pass |= FR_BLOCK;
 				fin->fin_reason = FRB_LOGFAIL2;
 			}
 		}
 		*passp = pass;
 	}
 
 	return fin->fin_fr;
 }
 #endif /* IPFILTER_LOG */
 
 
 /* ------------------------------------------------------------------------ */
 /* Function:    ipf_cksum                                                   */
 /* Returns:     u_short - IP header checksum                                */
 /* Parameters:  addr(I) - pointer to start of buffer to checksum            */
 /*              len(I)  - length of buffer in bytes                         */
 /*                                                                          */
 /* Calculate the two's complement 16 bit checksum of the buffer passed.     */
 /*                                                                          */
 /* N.B.: addr should be 16bit aligned.                                      */
 /* ------------------------------------------------------------------------ */
 u_short
 ipf_cksum(addr, len)
 	u_short *addr;
 	int len;
 {
 	u_32_t sum = 0;
 
 	for (sum = 0; len > 1; len -= 2)
 		sum += *addr++;
 
 	/* mop up an odd byte, if necessary */
 	if (len == 1)
 		sum += *(u_char *)addr;
 
 	/*
 	 * add back carry outs from top 16 bits to low 16 bits
 	 */
 	sum = (sum >> 16) + (sum & 0xffff);	/* add hi 16 to low 16 */
 	sum += (sum >> 16);			/* add carry */
 	return (u_short)(~sum);
 }
 
 
 /* ------------------------------------------------------------------------ */
 /* Function:    fr_cksum                                                    */
 /* Returns:     u_short - layer 4 checksum                                  */
 /* Parameters:  fin(I)     - pointer to packet information                  */
 /*              ip(I)      - pointer to IP header                           */
 /*              l4proto(I) - protocol to caclulate checksum for             */
 /*              l4hdr(I)   - pointer to layer 4 header                      */
 /*                                                                          */
 /* Calculates the TCP checksum for the packet held in "m", using the data   */
 /* in the IP header "ip" to seed it.                                        */
 /*                                                                          */
 /* NB: This function assumes we've pullup'd enough for all of the IP header */
 /* and the TCP header.  We also assume that data blocks aren't allocated in */
 /* odd sizes.                                                               */
 /*                                                                          */
 /* Expects ip_len and ip_off to be in network byte order when called.       */
 /* ------------------------------------------------------------------------ */
 u_short
 fr_cksum(fin, ip, l4proto, l4hdr)
 	fr_info_t *fin;
 	ip_t *ip;
 	int l4proto;
 	void *l4hdr;
 {
 	u_short *sp, slen, sumsave, *csump;
 	u_int sum, sum2;
 	int hlen;
 	int off;
 #ifdef	USE_INET6
 	ip6_t *ip6;
 #endif
 
 	csump = NULL;
 	sumsave = 0;
 	sp = NULL;
 	slen = 0;
 	hlen = 0;
 	sum = 0;
 
 	sum = htons((u_short)l4proto);
 	/*
 	 * Add up IP Header portion
 	 */
 #ifdef	USE_INET6
 	if (IP_V(ip) == 4) {
 #endif
 		hlen = IP_HL(ip) << 2;
 		off = hlen;
 		sp = (u_short *)&ip->ip_src;
 		sum += *sp++;	/* ip_src */
 		sum += *sp++;
 		sum += *sp++;	/* ip_dst */
 		sum += *sp++;
 #ifdef	USE_INET6
 	} else if (IP_V(ip) == 6) {
 		ip6 = (ip6_t *)ip;
 		hlen = sizeof(*ip6);
 		off = ((char *)fin->fin_dp - (char *)fin->fin_ip);
 		sp = (u_short *)&ip6->ip6_src;
 		sum += *sp++;	/* ip6_src */
 		sum += *sp++;
 		sum += *sp++;
 		sum += *sp++;
 		sum += *sp++;
 		sum += *sp++;
 		sum += *sp++;
 		sum += *sp++;
 		/* This needs to be routing header aware. */
 		sum += *sp++;	/* ip6_dst */
 		sum += *sp++;
 		sum += *sp++;
 		sum += *sp++;
 		sum += *sp++;
 		sum += *sp++;
 		sum += *sp++;
 		sum += *sp++;
 	} else {
 		return 0xffff;
 	}
 #endif
 	slen = fin->fin_plen - off;
 	sum += htons(slen);
 
 	switch (l4proto)
 	{
 	case IPPROTO_UDP :
 		csump = &((udphdr_t *)l4hdr)->uh_sum;
 		break;
 
 	case IPPROTO_TCP :
 		csump = &((tcphdr_t *)l4hdr)->th_sum;
 		break;
 	case IPPROTO_ICMP :
 		csump = &((icmphdr_t *)l4hdr)->icmp_cksum;
 		sum = 0;	/* Pseudo-checksum is not included */
 		break;
 #ifdef USE_INET6
 	case IPPROTO_ICMPV6 :
 		csump = &((struct icmp6_hdr *)l4hdr)->icmp6_cksum;
 		break;
 #endif
 	default :
 		break;
 	}
 
 	if (csump != NULL) {
 		sumsave = *csump;
 		*csump = 0;
 	}
 
 	sum2 = ipf_pcksum(fin, off, sum);
 	if (csump != NULL)
 		*csump = sumsave;
 	return sum2;
 }
 
 
 /* ------------------------------------------------------------------------ */
 /* Function:    ipf_findgroup                                               */
 /* Returns:     frgroup_t * - NULL = group not found, else pointer to group */
 /* Parameters:  softc(I) - pointer to soft context main structure           */
 /*              group(I) - group name to search for                         */
 /*              unit(I)  - device to which this group belongs               */
 /*              set(I)   - which set of rules (inactive/inactive) this is   */
 /*              fgpp(O)  - pointer to place to store pointer to the pointer */
 /*                         to where to add the next (last) group or where   */
 /*                         to delete group from.                            */
 /*                                                                          */
 /* Search amongst the defined groups for a particular group number.         */
 /* ------------------------------------------------------------------------ */
 frgroup_t *
 ipf_findgroup(softc, group, unit, set, fgpp)
 	ipf_main_softc_t *softc;
 	char *group;
 	minor_t unit;
 	int set;
 	frgroup_t ***fgpp;
 {
 	frgroup_t *fg, **fgp;
 
 	/*
 	 * Which list of groups to search in is dependent on which list of
 	 * rules are being operated on.
 	 */
 	fgp = &softc->ipf_groups[unit][set];
 
 	while ((fg = *fgp) != NULL) {
 		if (strncmp(group, fg->fg_name, FR_GROUPLEN) == 0)
 			break;
 		else
 			fgp = &fg->fg_next;
 	}
 	if (fgpp != NULL)
 		*fgpp = fgp;
 	return fg;
 }
 
 
 /* ------------------------------------------------------------------------ */
 /* Function:    ipf_group_add                                               */
 /* Returns:     frgroup_t * - NULL == did not create group,                 */
 /*                            != NULL == pointer to the group               */
 /* Parameters:  softc(I) - pointer to soft context main structure           */
 /*              num(I)   - group number to add                              */
 /*              head(I)  - rule pointer that is using this as the head      */
 /*              flags(I) - rule flags which describe the type of rule it is */
 /*              unit(I)  - device to which this group will belong to        */
 /*              set(I)   - which set of rules (inactive/inactive) this is   */
 /* Write Locks: ipf_mutex                                                   */
 /*                                                                          */
 /* Add a new group head, or if it already exists, increase the reference    */
 /* count to it.                                                             */
 /* ------------------------------------------------------------------------ */
 frgroup_t *
 ipf_group_add(softc, group, head, flags, unit, set)
 	ipf_main_softc_t *softc;
 	char *group;
 	void *head;
 	u_32_t flags;
 	minor_t unit;
 	int set;
 {
 	frgroup_t *fg, **fgp;
 	u_32_t gflags;
 
 	if (group == NULL)
 		return NULL;
 
 	if (unit == IPL_LOGIPF && *group == '\0')
 		return NULL;
 
 	fgp = NULL;
 	gflags = flags & FR_INOUT;
 
 	fg = ipf_findgroup(softc, group, unit, set, &fgp);
 	if (fg != NULL) {
 		if (fg->fg_head == NULL && head != NULL)
 			fg->fg_head = head;
 		if (fg->fg_flags == 0)
 			fg->fg_flags = gflags;
 		else if (gflags != fg->fg_flags)
 			return NULL;
 		fg->fg_ref++;
 		return fg;
 	}
 
 	KMALLOC(fg, frgroup_t *);
 	if (fg != NULL) {
 		fg->fg_head = head;
 		fg->fg_start = NULL;
 		fg->fg_next = *fgp;
 		bcopy(group, fg->fg_name, strlen(group) + 1);
 		fg->fg_flags = gflags;
 		fg->fg_ref = 1;
 		fg->fg_set = &softc->ipf_groups[unit][set];
 		*fgp = fg;
 	}
 	return fg;
 }
 
 
 /* ------------------------------------------------------------------------ */
 /* Function:    ipf_group_del                                               */
 /* Returns:     int      - number of rules deleted                          */
 /* Parameters:  softc(I) - pointer to soft context main structure           */
 /*              group(I) - group name to delete                             */
 /*              fr(I)    - filter rule from which group is referenced       */
 /* Write Locks: ipf_mutex                                                   */
 /*                                                                          */
 /* This function is called whenever a reference to a group is to be dropped */
 /* and thus its reference count needs to be lowered and the group free'd if */
 /* the reference count reaches zero. Passing in fr is really for the sole   */
 /* purpose of knowing when the head rule is being deleted.                  */
 /* ------------------------------------------------------------------------ */
 void
 ipf_group_del(softc, group, fr)
 	ipf_main_softc_t *softc;
 	frgroup_t *group;
 	frentry_t *fr;
 {
 
 	if (group->fg_head == fr)
 		group->fg_head = NULL;
 
 	group->fg_ref--;
 	if ((group->fg_ref == 0) && (group->fg_start == NULL))
 		ipf_group_free(group);
 }
 
 
 /* ------------------------------------------------------------------------ */
 /* Function:    ipf_group_free                                              */
 /* Returns:     Nil                                                         */
 /* Parameters:  group(I) - pointer to filter rule group                     */
 /*                                                                          */
 /* Remove the group from the list of groups and free it.                    */
 /* ------------------------------------------------------------------------ */
 static void
 ipf_group_free(group)
 	frgroup_t *group;
 {
 	frgroup_t **gp;
 
 	for (gp = group->fg_set; *gp != NULL; gp = &(*gp)->fg_next) {
 		if (*gp == group) {
 			*gp = group->fg_next;
 			break;
 		}
 	}
 	KFREE(group);
 }
 
 
 /* ------------------------------------------------------------------------ */
 /* Function:    ipf_group_flush                                             */
 /* Returns:     int      - number of rules flush from group                 */
 /* Parameters:  softc(I) - pointer to soft context main structure           */
 /* Parameters:  group(I) - pointer to filter rule group                     */
 /*                                                                          */
 /* Remove all of the rules that currently are listed under the given group. */
 /* ------------------------------------------------------------------------ */
 static int
 ipf_group_flush(softc, group)
 	ipf_main_softc_t *softc;
 	frgroup_t *group;
 {
 	int gone = 0;
 
 	(void) ipf_flushlist(softc, &gone, &group->fg_start);
 
 	return gone;
 }
 
 
 /* ------------------------------------------------------------------------ */
 /* Function:    ipf_getrulen                                                */
 /* Returns:     frentry_t * - NULL == not found, else pointer to rule n     */
 /* Parameters:  softc(I) - pointer to soft context main structure           */
 /* Parameters:  unit(I)  - device for which to count the rule's number      */
 /*              flags(I) - which set of rules to find the rule in           */
 /*              group(I) - group name                                       */
 /*              n(I)     - rule number to find                              */
 /*                                                                          */
 /* Find rule # n in group # g and return a pointer to it.  Return NULl if   */
 /* group # g doesn't exist or there are less than n rules in the group.     */
 /* ------------------------------------------------------------------------ */
 frentry_t *
 ipf_getrulen(softc, unit, group, n)
 	ipf_main_softc_t *softc;
 	int unit;
 	char *group;
 	u_32_t n;
 {
 	frentry_t *fr;
 	frgroup_t *fg;
 
 	fg = ipf_findgroup(softc, group, unit, softc->ipf_active, NULL);
 	if (fg == NULL)
 		return NULL;
 	for (fr = fg->fg_start; fr && n; fr = fr->fr_next, n--)
 		;
 	if (n != 0)
 		return NULL;
 	return fr;
 }
 
 
 /* ------------------------------------------------------------------------ */
 /* Function:    ipf_flushlist                                               */
 /* Returns:     int - >= 0 - number of flushed rules                        */
 /* Parameters:  softc(I)   - pointer to soft context main structure         */
 /*              nfreedp(O) - pointer to int where flush count is stored     */
 /*              listp(I)   - pointer to list to flush pointer               */
 /* Write Locks: ipf_mutex                                                   */
 /*                                                                          */
 /* Recursively flush rules from the list, descending groups as they are     */
 /* encountered.  if a rule is the head of a group and it has lost all its   */
 /* group members, then also delete the group reference.  nfreedp is needed  */
 /* to store the accumulating count of rules removed, whereas the returned   */
 /* value is just the number removed from the current list.  The latter is   */
 /* needed to correctly adjust reference counts on rules that define groups. */
 /*                                                                          */
 /* NOTE: Rules not loaded from user space cannot be flushed.                */
 /* ------------------------------------------------------------------------ */
 static int
 ipf_flushlist(softc, nfreedp, listp)
 	ipf_main_softc_t *softc;
 	int *nfreedp;
 	frentry_t **listp;
 {
 	int freed = 0;
 	frentry_t *fp;
 
 	while ((fp = *listp) != NULL) {
 		if ((fp->fr_type & FR_T_BUILTIN) ||
 		    !(fp->fr_flags & FR_COPIED)) {
 			listp = &fp->fr_next;
 			continue;
 		}
 		*listp = fp->fr_next;
 		if (fp->fr_next != NULL)
 			fp->fr_next->fr_pnext = fp->fr_pnext;
 		fp->fr_pnext = NULL;
 
 		if (fp->fr_grphead != NULL) {
 			freed += ipf_group_flush(softc, fp->fr_grphead);
 			fp->fr_names[fp->fr_grhead] = '\0';
 		}
 
 		if (fp->fr_icmpgrp != NULL) {
 			freed += ipf_group_flush(softc, fp->fr_icmpgrp);
 			fp->fr_names[fp->fr_icmphead] = '\0';
 		}
 
 		if (fp->fr_srctrack.ht_max_nodes)
 			ipf_rb_ht_flush(&fp->fr_srctrack);
 
 		fp->fr_next = NULL;
 
 		ASSERT(fp->fr_ref > 0);
 		if (ipf_derefrule(softc, &fp) == 0)
 			freed++;
 	}
 	*nfreedp += freed;
 	return freed;
 }
 
 
 /* ------------------------------------------------------------------------ */
 /* Function:    ipf_flush                                                   */
 /* Returns:     int - >= 0 - number of flushed rules                        */
 /* Parameters:  softc(I) - pointer to soft context main structure           */
 /*              unit(I)  - device for which to flush rules                  */
 /*              flags(I) - which set of rules to flush                      */
 /*                                                                          */
 /* Calls flushlist() for all filter rules (accounting, firewall - both IPv4 */
 /* and IPv6) as defined by the value of flags.                              */
 /* ------------------------------------------------------------------------ */
 int
 ipf_flush(softc, unit, flags)
 	ipf_main_softc_t *softc;
 	minor_t unit;
 	int flags;
 {
 	int flushed = 0, set;
 
 	WRITE_ENTER(&softc->ipf_mutex);
 
 	set = softc->ipf_active;
 	if ((flags & FR_INACTIVE) == FR_INACTIVE)
 		set = 1 - set;
 
 	if (flags & FR_OUTQUE) {
 		ipf_flushlist(softc, &flushed, &softc->ipf_rules[1][set]);
 		ipf_flushlist(softc, &flushed, &softc->ipf_acct[1][set]);
 	}
 	if (flags & FR_INQUE) {
 		ipf_flushlist(softc, &flushed, &softc->ipf_rules[0][set]);
 		ipf_flushlist(softc, &flushed, &softc->ipf_acct[0][set]);
 	}
 
 	flushed += ipf_flush_groups(softc, &softc->ipf_groups[unit][set],
 				    flags & (FR_INQUE|FR_OUTQUE));
 
 	RWLOCK_EXIT(&softc->ipf_mutex);
 
 	if (unit == IPL_LOGIPF) {
 		int tmp;
 
 		tmp = ipf_flush(softc, IPL_LOGCOUNT, flags);
 		if (tmp >= 0)
 			flushed += tmp;
 	}
 	return flushed;
 }
 
 
 /* ------------------------------------------------------------------------ */
 /* Function:    ipf_flush_groups                                            */
 /* Returns:     int - >= 0 - number of flushed rules                        */
 /* Parameters:  softc(I)  - soft context pointerto work with                */
 /*              grhead(I) - pointer to the start of the group list to flush */
 /*              flags(I)  - which set of rules to flush                     */
 /*                                                                          */
 /* Walk through all of the groups under the given group head and remove all */
 /* of those that match the flags passed in. The for loop here is bit more   */
 /* complicated than usual because the removal of a rule with ipf_derefrule  */
 /* may end up removing not only the structure pointed to by "fg" but also   */
 /* what is fg_next and fg_next after that. So if a filter rule is actually  */
 /* removed from the group then it is necessary to start again.              */
 /* ------------------------------------------------------------------------ */
 static int
 ipf_flush_groups(softc, grhead, flags)
 	ipf_main_softc_t *softc;
 	frgroup_t **grhead;
 	int flags;
 {
 	frentry_t *fr, **frp;
 	frgroup_t *fg, **fgp;
 	int flushed = 0;
 	int removed = 0;
 
 	for (fgp = grhead; (fg = *fgp) != NULL; ) {
 		while ((fg != NULL) && ((fg->fg_flags & flags) == 0))
 			fg = fg->fg_next;
 		if (fg == NULL)
 			break;
 		removed = 0;
 		frp = &fg->fg_start;
 		while ((removed == 0) && ((fr = *frp) != NULL)) {
 			if ((fr->fr_flags & flags) == 0) {
 				frp = &fr->fr_next;
 			} else {
 				if (fr->fr_next != NULL)
 					fr->fr_next->fr_pnext = fr->fr_pnext;
 				*frp = fr->fr_next;
 				fr->fr_pnext = NULL;
 				fr->fr_next = NULL;
 				(void) ipf_derefrule(softc, &fr);
 				flushed++;
 				removed++;
 			}
 		}
 		if (removed == 0)
 			fgp = &fg->fg_next;
 	}
 	return flushed;
 }
 
 
 /* ------------------------------------------------------------------------ */
 /* Function:    memstr                                                      */
 /* Returns:     char *  - NULL if failed, != NULL pointer to matching bytes */
 /* Parameters:  src(I)  - pointer to byte sequence to match                 */
 /*              dst(I)  - pointer to byte sequence to search                */
 /*              slen(I) - match length                                      */
 /*              dlen(I) - length available to search in                     */
 /*                                                                          */
 /* Search dst for a sequence of bytes matching those at src and extend for  */
 /* slen bytes.                                                              */
 /* ------------------------------------------------------------------------ */
 char *
 memstr(src, dst, slen, dlen)
 	const char *src;
 	char *dst;
 	size_t slen, dlen;
 {
 	char *s = NULL;
 
 	while (dlen >= slen) {
 		if (bcmp(src, dst, slen) == 0) {
 			s = dst;
 			break;
 		}
 		dst++;
 		dlen--;
 	}
 	return s;
 }
 /* ------------------------------------------------------------------------ */
 /* Function:    ipf_fixskip                                                 */
 /* Returns:     Nil                                                         */
 /* Parameters:  listp(IO)    - pointer to start of list with skip rule      */
 /*              rp(I)        - rule added/removed with skip in it.          */
 /*              addremove(I) - adjustment (-1/+1) to make to skip count,    */
 /*                             depending on whether a rule was just added   */
 /*                             or removed.                                  */
 /*                                                                          */
 /* Adjust all the rules in a list which would have skip'd past the position */
 /* where we are inserting to skip to the right place given the change.      */
 /* ------------------------------------------------------------------------ */
 void
 ipf_fixskip(listp, rp, addremove)
 	frentry_t **listp, *rp;
 	int addremove;
 {
 	int rules, rn;
 	frentry_t *fp;
 
 	rules = 0;
 	for (fp = *listp; (fp != NULL) && (fp != rp); fp = fp->fr_next)
 		rules++;
 
 	if (!fp)
 		return;
 
 	for (rn = 0, fp = *listp; fp && (fp != rp); fp = fp->fr_next, rn++)
 		if (FR_ISSKIP(fp->fr_flags) && (rn + fp->fr_arg >= rules))
 			fp->fr_arg += addremove;
 }
 
 
 #ifdef	_KERNEL
 /* ------------------------------------------------------------------------ */
 /* Function:    count4bits                                                  */
 /* Returns:     int - >= 0 - number of consecutive bits in input            */
 /* Parameters:  ip(I) - 32bit IP address                                    */
 /*                                                                          */
 /* IPv4 ONLY                                                                */
 /* count consecutive 1's in bit mask.  If the mask generated by counting    */
 /* consecutive 1's is different to that passed, return -1, else return #    */
 /* of bits.                                                                 */
 /* ------------------------------------------------------------------------ */
 int
 count4bits(ip)
 	u_32_t	ip;
 {
 	u_32_t	ipn;
 	int	cnt = 0, i, j;
 
 	ip = ipn = ntohl(ip);
 	for (i = 32; i; i--, ipn *= 2)
 		if (ipn & 0x80000000)
 			cnt++;
 		else
 			break;
 	ipn = 0;
 	for (i = 32, j = cnt; i; i--, j--) {
 		ipn *= 2;
 		if (j > 0)
 			ipn++;
 	}
 	if (ipn == ip)
 		return cnt;
 	return -1;
 }
 
 
 /* ------------------------------------------------------------------------ */
 /* Function:    count6bits                                                  */
 /* Returns:     int - >= 0 - number of consecutive bits in input            */
 /* Parameters:  msk(I) - pointer to start of IPv6 bitmask                   */
 /*                                                                          */
 /* IPv6 ONLY                                                                */
 /* count consecutive 1's in bit mask.                                       */
 /* ------------------------------------------------------------------------ */
 # ifdef USE_INET6
 int
 count6bits(msk)
 	u_32_t *msk;
 {
 	int i = 0, k;
 	u_32_t j;
 
 	for (k = 3; k >= 0; k--)
 		if (msk[k] == 0xffffffff)
 			i += 32;
 		else {
 			for (j = msk[k]; j; j <<= 1)
 				if (j & 0x80000000)
 					i++;
 		}
 	return i;
 }
 # endif
 #endif /* _KERNEL */
 
 
 /* ------------------------------------------------------------------------ */
 /* Function:    ipf_synclist                                                */
 /* Returns:     int    - 0 = no failures, else indication of first failure  */
 /* Parameters:  fr(I)  - start of filter list to sync interface names for   */
 /*              ifp(I) - interface pointer for limiting sync lookups        */
 /* Write Locks: ipf_mutex                                                   */
 /*                                                                          */
 /* Walk through a list of filter rules and resolve any interface names into */
 /* pointers.  Where dynamic addresses are used, also update the IP address  */
 /* used in the rule.  The interface pointer is used to limit the lookups to */
 /* a specific set of matching names if it is non-NULL.                      */
 /* Errors can occur when resolving the destination name of to/dup-to fields */
 /* when the name points to a pool and that pool doest not exist. If this    */
 /* does happen then it is necessary to check if there are any lookup refs   */
 /* that need to be dropped before returning with an error.                  */
 /* ------------------------------------------------------------------------ */
 static int
 ipf_synclist(softc, fr, ifp)
 	ipf_main_softc_t *softc;
 	frentry_t *fr;
 	void *ifp;
 {
 	frentry_t *frt, *start = fr;
 	frdest_t *fdp;
 	char *name;
 	int error;
 	void *ifa;
 	int v, i;
 
 	error = 0;
 
 	for (; fr; fr = fr->fr_next) {
 		if (fr->fr_family == AF_INET)
 			v = 4;
 		else if (fr->fr_family == AF_INET6)
 			v = 6;
 		else
 			v = 0;
 
 		/*
 		 * Lookup all the interface names that are part of the rule.
 		 */
 		for (i = 0; i < 4; i++) {
 			if ((ifp != NULL) && (fr->fr_ifas[i] != ifp))
 				continue;
 			if (fr->fr_ifnames[i] == -1)
 				continue;
 			name = FR_NAME(fr, fr_ifnames[i]);
 			fr->fr_ifas[i] = ipf_resolvenic(softc, name, v);
 		}
 
 		if ((fr->fr_type & ~FR_T_BUILTIN) == FR_T_IPF) {
 			if (fr->fr_satype != FRI_NORMAL &&
 			    fr->fr_satype != FRI_LOOKUP) {
 				ifa = ipf_resolvenic(softc, fr->fr_names +
 						     fr->fr_sifpidx, v);
 				ipf_ifpaddr(softc, v, fr->fr_satype, ifa,
 					    &fr->fr_src6, &fr->fr_smsk6);
 			}
 			if (fr->fr_datype != FRI_NORMAL &&
 			    fr->fr_datype != FRI_LOOKUP) {
 				ifa = ipf_resolvenic(softc, fr->fr_names +
 						     fr->fr_sifpidx, v);
 				ipf_ifpaddr(softc, v, fr->fr_datype, ifa,
 					    &fr->fr_dst6, &fr->fr_dmsk6);
 			}
 		}
 
 		fdp = &fr->fr_tifs[0];
 		if ((ifp == NULL) || (fdp->fd_ptr == ifp)) {
 			error = ipf_resolvedest(softc, fr->fr_names, fdp, v);
 			if (error != 0)
 				goto unwind;
 		}
 
 		fdp = &fr->fr_tifs[1];
 		if ((ifp == NULL) || (fdp->fd_ptr == ifp)) {
 			error = ipf_resolvedest(softc, fr->fr_names, fdp, v);
 			if (error != 0)
 				goto unwind;
 		}
 
 		fdp = &fr->fr_dif;
 		if ((ifp == NULL) || (fdp->fd_ptr == ifp)) {
 			error = ipf_resolvedest(softc, fr->fr_names, fdp, v);
 			if (error != 0)
 				goto unwind;
 		}
 
 		if (((fr->fr_type & ~FR_T_BUILTIN) == FR_T_IPF) &&
 		    (fr->fr_satype == FRI_LOOKUP) && (fr->fr_srcptr == NULL)) {
 			fr->fr_srcptr = ipf_lookup_res_num(softc,
 							   fr->fr_srctype,
 							   IPL_LOGIPF,
 							   fr->fr_srcnum,
 							   &fr->fr_srcfunc);
 		}
 		if (((fr->fr_type & ~FR_T_BUILTIN) == FR_T_IPF) &&
 		    (fr->fr_datype == FRI_LOOKUP) && (fr->fr_dstptr == NULL)) {
 			fr->fr_dstptr = ipf_lookup_res_num(softc,
 							   fr->fr_dsttype,
 							   IPL_LOGIPF,
 							   fr->fr_dstnum,
 							   &fr->fr_dstfunc);
 		}
 	}
 	return 0;
 
 unwind:
 	for (frt = start; frt != fr; fr = fr->fr_next) {
 		if (((frt->fr_type & ~FR_T_BUILTIN) == FR_T_IPF) &&
 		    (frt->fr_satype == FRI_LOOKUP) && (frt->fr_srcptr != NULL))
 				ipf_lookup_deref(softc, frt->fr_srctype,
 						 frt->fr_srcptr);
 		if (((frt->fr_type & ~FR_T_BUILTIN) == FR_T_IPF) &&
 		    (frt->fr_datype == FRI_LOOKUP) && (frt->fr_dstptr != NULL))
 				ipf_lookup_deref(softc, frt->fr_dsttype,
 						 frt->fr_dstptr);
 	}
 	return error;
 }
 
 
 /* ------------------------------------------------------------------------ */
 /* Function:    ipf_sync                                                    */
 /* Returns:     void                                                        */
 /* Parameters:  Nil                                                         */
 /*                                                                          */
 /* ipf_sync() is called when we suspect that the interface list or          */
 /* information about interfaces (like IP#) has changed.  Go through all     */
 /* filter rules, NAT entries and the state table and check if anything      */
 /* needs to be changed/updated.                                             */
 /* ------------------------------------------------------------------------ */
 int
 ipf_sync(softc, ifp)
 	ipf_main_softc_t *softc;
 	void *ifp;
 {
 	int i;
 
 # if !SOLARIS
 	ipf_nat_sync(softc, ifp);
 	ipf_state_sync(softc, ifp);
 	ipf_lookup_sync(softc, ifp);
 # endif
 
 	WRITE_ENTER(&softc->ipf_mutex);
 	(void) ipf_synclist(softc, softc->ipf_acct[0][softc->ipf_active], ifp);
 	(void) ipf_synclist(softc, softc->ipf_acct[1][softc->ipf_active], ifp);
 	(void) ipf_synclist(softc, softc->ipf_rules[0][softc->ipf_active], ifp);
 	(void) ipf_synclist(softc, softc->ipf_rules[1][softc->ipf_active], ifp);
 
 	for (i = 0; i < IPL_LOGSIZE; i++) {
 		frgroup_t *g;
 
 		for (g = softc->ipf_groups[i][0]; g != NULL; g = g->fg_next)
 			(void) ipf_synclist(softc, g->fg_start, ifp);
 		for (g = softc->ipf_groups[i][1]; g != NULL; g = g->fg_next)
 			(void) ipf_synclist(softc, g->fg_start, ifp);
 	}
 	RWLOCK_EXIT(&softc->ipf_mutex);
 
 	return 0;
 }
 
 
 /*
  * In the functions below, bcopy() is called because the pointer being
  * copied _from_ in this instance is a pointer to a char buf (which could
  * end up being unaligned) and on the kernel's local stack.
  */
 /* ------------------------------------------------------------------------ */
 /* Function:    copyinptr                                                   */
 /* Returns:     int - 0 = success, else failure                             */
 /* Parameters:  src(I)  - pointer to the source address                     */
 /*              dst(I)  - destination address                               */
 /*              size(I) - number of bytes to copy                           */
 /*                                                                          */
 /* Copy a block of data in from user space, given a pointer to the pointer  */
 /* to start copying from (src) and a pointer to where to store it (dst).    */
 /* NB: src - pointer to user space pointer, dst - kernel space pointer      */
 /* ------------------------------------------------------------------------ */
 int
 copyinptr(softc, src, dst, size)
 	ipf_main_softc_t *softc;
 	void *src, *dst;
 	size_t size;
 {
 	caddr_t ca;
 	int error;
 
 # if SOLARIS
 	error = COPYIN(src, &ca, sizeof(ca));
 	if (error != 0)
 		return error;
 # else
 	bcopy(src, (caddr_t)&ca, sizeof(ca));
 # endif
 	error = COPYIN(ca, dst, size);
 	if (error != 0) {
 		IPFERROR(3);
 		error = EFAULT;
 	}
 	return error;
 }
 
 
 /* ------------------------------------------------------------------------ */
 /* Function:    copyoutptr                                                  */
 /* Returns:     int - 0 = success, else failure                             */
 /* Parameters:  src(I)  - pointer to the source address                     */
 /*              dst(I)  - destination address                               */
 /*              size(I) - number of bytes to copy                           */
 /*                                                                          */
 /* Copy a block of data out to user space, given a pointer to the pointer   */
 /* to start copying from (src) and a pointer to where to store it (dst).    */
 /* NB: src - kernel space pointer, dst - pointer to user space pointer.     */
 /* ------------------------------------------------------------------------ */
 int
 copyoutptr(softc, src, dst, size)
 	ipf_main_softc_t *softc;
 	void *src, *dst;
 	size_t size;
 {
 	caddr_t ca;
 	int error;
 
 	bcopy(dst, (caddr_t)&ca, sizeof(ca));
 	error = COPYOUT(src, ca, size);
 	if (error != 0) {
 		IPFERROR(4);
 		error = EFAULT;
 	}
 	return error;
 }
 #ifdef	_KERNEL
 #endif
 
 
 /* ------------------------------------------------------------------------ */
 /* Function:    ipf_lock                                                    */
 /* Returns:     int      - 0 = success, else error                          */
 /* Parameters:  data(I)  - pointer to lock value to set                     */
 /*              lockp(O) - pointer to location to store old lock value      */
 /*                                                                          */
 /* Get the new value for the lock integer, set it and return the old value  */
 /* in *lockp.                                                               */
 /* ------------------------------------------------------------------------ */
 int
 ipf_lock(data, lockp)
 	caddr_t data;
 	int *lockp;
 {
 	int arg, err;
 
 	err = BCOPYIN(data, &arg, sizeof(arg));
 	if (err != 0)
 		return EFAULT;
 	err = BCOPYOUT(lockp, data, sizeof(*lockp));
 	if (err != 0)
 		return EFAULT;
 	*lockp = arg;
 	return 0;
 }
 
 
 /* ------------------------------------------------------------------------ */
 /* Function:    ipf_getstat                                                 */
 /* Returns:     Nil                                                         */
 /* Parameters:  softc(I) - pointer to soft context main structure           */
 /*              fiop(I)  - pointer to ipfilter stats structure              */
 /*              rev(I)   - version claim by program doing ioctl             */
 /*                                                                          */
 /* Stores a copy of current pointers, counters, etc, in the friostat        */
 /* structure.                                                               */
 /* If IPFILTER_COMPAT is compiled, we pretend to be whatever version the    */
 /* program is looking for. This ensure that validation of the version it    */
 /* expects will always succeed. Thus kernels with IPFILTER_COMPAT will      */
 /* allow older binaries to work but kernels without it will not.            */
 /* ------------------------------------------------------------------------ */
 /*ARGSUSED*/
 static void
 ipf_getstat(softc, fiop, rev)
 	ipf_main_softc_t *softc;
 	friostat_t *fiop;
 	int rev;
 {
 	int i;
 
 	bcopy((char *)softc->ipf_stats, (char *)fiop->f_st,
 	      sizeof(ipf_statistics_t) * 2);
 	fiop->f_locks[IPL_LOGSTATE] = -1;
 	fiop->f_locks[IPL_LOGNAT] = -1;
 	fiop->f_locks[IPL_LOGIPF] = -1;
 	fiop->f_locks[IPL_LOGAUTH] = -1;
 
 	fiop->f_ipf[0][0] = softc->ipf_rules[0][0];
 	fiop->f_acct[0][0] = softc->ipf_acct[0][0];
 	fiop->f_ipf[0][1] = softc->ipf_rules[0][1];
 	fiop->f_acct[0][1] = softc->ipf_acct[0][1];
 	fiop->f_ipf[1][0] = softc->ipf_rules[1][0];
 	fiop->f_acct[1][0] = softc->ipf_acct[1][0];
 	fiop->f_ipf[1][1] = softc->ipf_rules[1][1];
 	fiop->f_acct[1][1] = softc->ipf_acct[1][1];
 
 	fiop->f_ticks = softc->ipf_ticks;
 	fiop->f_active = softc->ipf_active;
 	fiop->f_froute[0] = softc->ipf_frouteok[0];
 	fiop->f_froute[1] = softc->ipf_frouteok[1];
 	fiop->f_rb_no_mem = softc->ipf_rb_no_mem;
 	fiop->f_rb_node_max = softc->ipf_rb_node_max;
 
 	fiop->f_running = softc->ipf_running;
 	for (i = 0; i < IPL_LOGSIZE; i++) {
 		fiop->f_groups[i][0] = softc->ipf_groups[i][0];
 		fiop->f_groups[i][1] = softc->ipf_groups[i][1];
 	}
 #ifdef  IPFILTER_LOG
 	fiop->f_log_ok = ipf_log_logok(softc, IPL_LOGIPF);
 	fiop->f_log_fail = ipf_log_failures(softc, IPL_LOGIPF);
 	fiop->f_logging = 1;
 #else
 	fiop->f_log_ok = 0;
 	fiop->f_log_fail = 0;
 	fiop->f_logging = 0;
 #endif
 	fiop->f_defpass = softc->ipf_pass;
 	fiop->f_features = ipf_features;
 
 #ifdef IPFILTER_COMPAT
 	sprintf(fiop->f_version, "IP Filter: v%d.%d.%d",
 		(rev / 1000000) % 100,
 		(rev / 10000) % 100,
 		(rev / 100) % 100);
 #else
 	rev = rev;
 	(void) strncpy(fiop->f_version, ipfilter_version,
 		       sizeof(fiop->f_version));
 #endif
 }
 
 
 #ifdef	USE_INET6
 int icmptoicmp6types[ICMP_MAXTYPE+1] = {
 	ICMP6_ECHO_REPLY,	/* 0: ICMP_ECHOREPLY */
 	-1,			/* 1: UNUSED */
 	-1,			/* 2: UNUSED */
 	ICMP6_DST_UNREACH,	/* 3: ICMP_UNREACH */
 	-1,			/* 4: ICMP_SOURCEQUENCH */
 	ND_REDIRECT,		/* 5: ICMP_REDIRECT */
 	-1,			/* 6: UNUSED */
 	-1,			/* 7: UNUSED */
 	ICMP6_ECHO_REQUEST,	/* 8: ICMP_ECHO */
 	-1,			/* 9: UNUSED */
 	-1,			/* 10: UNUSED */
 	ICMP6_TIME_EXCEEDED,	/* 11: ICMP_TIMXCEED */
 	ICMP6_PARAM_PROB,	/* 12: ICMP_PARAMPROB */
 	-1,			/* 13: ICMP_TSTAMP */
 	-1,			/* 14: ICMP_TSTAMPREPLY */
 	-1,			/* 15: ICMP_IREQ */
 	-1,			/* 16: ICMP_IREQREPLY */
 	-1,			/* 17: ICMP_MASKREQ */
 	-1,			/* 18: ICMP_MASKREPLY */
 };
 
 
 int	icmptoicmp6unreach[ICMP_MAX_UNREACH] = {
 	ICMP6_DST_UNREACH_ADDR,		/* 0: ICMP_UNREACH_NET */
 	ICMP6_DST_UNREACH_ADDR,		/* 1: ICMP_UNREACH_HOST */
 	-1,				/* 2: ICMP_UNREACH_PROTOCOL */
 	ICMP6_DST_UNREACH_NOPORT,	/* 3: ICMP_UNREACH_PORT */
 	-1,				/* 4: ICMP_UNREACH_NEEDFRAG */
 	ICMP6_DST_UNREACH_NOTNEIGHBOR,	/* 5: ICMP_UNREACH_SRCFAIL */
 	ICMP6_DST_UNREACH_ADDR,		/* 6: ICMP_UNREACH_NET_UNKNOWN */
 	ICMP6_DST_UNREACH_ADDR,		/* 7: ICMP_UNREACH_HOST_UNKNOWN */
 	-1,				/* 8: ICMP_UNREACH_ISOLATED */
 	ICMP6_DST_UNREACH_ADMIN,	/* 9: ICMP_UNREACH_NET_PROHIB */
 	ICMP6_DST_UNREACH_ADMIN,	/* 10: ICMP_UNREACH_HOST_PROHIB */
 	-1,				/* 11: ICMP_UNREACH_TOSNET */
 	-1,				/* 12: ICMP_UNREACH_TOSHOST */
 	ICMP6_DST_UNREACH_ADMIN,	/* 13: ICMP_UNREACH_ADMIN_PROHIBIT */
 };
 int	icmpreplytype6[ICMP6_MAXTYPE + 1];
 #endif
 
 int	icmpreplytype4[ICMP_MAXTYPE + 1];
 
 
 /* ------------------------------------------------------------------------ */
 /* Function:    ipf_matchicmpqueryreply                                     */
 /* Returns:     int - 1 if "icmp" is a valid reply to "ic" else 0.          */
 /* Parameters:  v(I)    - IP protocol version (4 or 6)                      */
 /*              ic(I)   - ICMP information                                  */
 /*              icmp(I) - ICMP packet header                                */
 /*              rev(I)  - direction (0 = forward/1 = reverse) of packet     */
 /*                                                                          */
 /* Check if the ICMP packet defined by the header pointed to by icmp is a   */
 /* reply to one as described by what's in ic.  If it is a match, return 1,  */
 /* else return 0 for no match.                                              */
 /* ------------------------------------------------------------------------ */
 int
 ipf_matchicmpqueryreply(v, ic, icmp, rev)
 	int v;
 	icmpinfo_t *ic;
 	icmphdr_t *icmp;
 	int rev;
 {
 	int ictype;
 
 	ictype = ic->ici_type;
 
 	if (v == 4) {
 		/*
 		 * If we matched its type on the way in, then when going out
 		 * it will still be the same type.
 		 */
 		if ((!rev && (icmp->icmp_type == ictype)) ||
 		    (rev && (icmpreplytype4[ictype] == icmp->icmp_type))) {
 			if (icmp->icmp_type != ICMP_ECHOREPLY)
 				return 1;
 			if (icmp->icmp_id == ic->ici_id)
 				return 1;
 		}
 	}
 #ifdef	USE_INET6
 	else if (v == 6) {
 		if ((!rev && (icmp->icmp_type == ictype)) ||
 		    (rev && (icmpreplytype6[ictype] == icmp->icmp_type))) {
 			if (icmp->icmp_type != ICMP6_ECHO_REPLY)
 				return 1;
 			if (icmp->icmp_id == ic->ici_id)
 				return 1;
 		}
 	}
 #endif
 	return 0;
 }
 
 
 /* ------------------------------------------------------------------------ */
 /* Function:    ipf_rule_compare                                            */
 /* Parameters:  fr1(I) - first rule structure to compare                    */
 /*              fr2(I) - second rule structure to compare                   */
 /* Returns:     int    - 0 == rules are the same, else mismatch             */
 /*                                                                          */
 /* Compare two rules and return 0 if they match or a number indicating      */
 /* which of the individual checks failed.                                   */
 /* ------------------------------------------------------------------------ */
 static int
 ipf_rule_compare(frentry_t *fr1, frentry_t *fr2)
 {
 	if (fr1->fr_cksum != fr2->fr_cksum)
 		return 1;
 	if (fr1->fr_size != fr2->fr_size)
 		return 2;
 	if (fr1->fr_dsize != fr2->fr_dsize)
 		return 3;
 	if (bcmp((char *)&fr1->fr_func, (char *)&fr2->fr_func,
 		 fr1->fr_size - offsetof(struct frentry, fr_func)) != 0)
 		return 4;
 	if (fr1->fr_data && !fr2->fr_data)
 		return 5;
 	if (!fr1->fr_data && fr2->fr_data)
 		return 6;
 	if (fr1->fr_data) {
 		if (bcmp(fr1->fr_caddr, fr2->fr_caddr, fr1->fr_dsize))
 			return 7;
 	}
 	return 0;
 }
 
 
 /* ------------------------------------------------------------------------ */
 /* Function:    frrequest                                                   */
 /* Returns:     int - 0 == success, > 0 == errno value                      */
 /* Parameters:  unit(I)     - device for which this is for                  */
 /*              req(I)      - ioctl command (SIOC*)                         */
 /*              data(I)     - pointr to ioctl data                          */
 /*              set(I)      - 1 or 0 (filter set)                           */
 /*              makecopy(I) - flag indicating whether data points to a rule */
 /*                            in kernel space & hence doesn't need copying. */
 /*                                                                          */
 /* This function handles all the requests which operate on the list of      */
 /* filter rules.  This includes adding, deleting, insertion.  It is also    */
 /* responsible for creating groups when a "head" rule is loaded.  Interface */
 /* names are resolved here and other sanity checks are made on the content  */
 /* of the rule structure being loaded.  If a rule has user defined timeouts */
 /* then make sure they are created and initialised before exiting.          */
 /* ------------------------------------------------------------------------ */
 int
 frrequest(softc, unit, req, data, set, makecopy)
 	ipf_main_softc_t *softc;
 	int unit;
 	ioctlcmd_t req;
 	int set, makecopy;
 	caddr_t data;
 {
 	int error = 0, in, family, addrem, need_free = 0;
 	frentry_t frd, *fp, *f, **fprev, **ftail;
 	void *ptr, *uptr, *cptr;
 	u_int *p, *pp;
 	frgroup_t *fg;
 	char *group;
 
 	ptr = NULL;
 	cptr = NULL;
 	fg = NULL;
 	fp = &frd;
 	if (makecopy != 0) {
 		bzero(fp, sizeof(frd));
 		error = ipf_inobj(softc, data, NULL, fp, IPFOBJ_FRENTRY);
 		if (error) {
 			return error;
 		}
 		if ((fp->fr_type & FR_T_BUILTIN) != 0) {
 			IPFERROR(6);
 			return EINVAL;
 		}
 		KMALLOCS(f, frentry_t *, fp->fr_size);
 		if (f == NULL) {
 			IPFERROR(131);
 			return ENOMEM;
 		}
 		bzero(f, fp->fr_size);
 		error = ipf_inobjsz(softc, data, f, IPFOBJ_FRENTRY,
 				    fp->fr_size);
 		if (error) {
 			KFREES(f, fp->fr_size);
 			return error;
 		}
 
 		fp = f;
 		f = NULL;
 		fp->fr_next = NULL;
 		fp->fr_dnext = NULL;
 		fp->fr_pnext = NULL;
 		fp->fr_pdnext = NULL;
 		fp->fr_grp = NULL;
 		fp->fr_grphead = NULL;
 		fp->fr_icmpgrp = NULL;
 		fp->fr_isc = (void *)-1;
 		fp->fr_ptr = NULL;
 		fp->fr_ref = 0;
 		fp->fr_flags |= FR_COPIED;
 	} else {
 		fp = (frentry_t *)data;
 		if ((fp->fr_type & FR_T_BUILTIN) == 0) {
 			IPFERROR(7);
 			return EINVAL;
 		}
 		fp->fr_flags &= ~FR_COPIED;
 	}
 
 	if (((fp->fr_dsize == 0) && (fp->fr_data != NULL)) ||
 	    ((fp->fr_dsize != 0) && (fp->fr_data == NULL))) {
 		IPFERROR(8);
 		error = EINVAL;
 		goto donenolock;
 	}
 
 	family = fp->fr_family;
 	uptr = fp->fr_data;
 
 	if (req == (ioctlcmd_t)SIOCINAFR || req == (ioctlcmd_t)SIOCINIFR ||
 	    req == (ioctlcmd_t)SIOCADAFR || req == (ioctlcmd_t)SIOCADIFR)
 		addrem = 0;
 	else if (req == (ioctlcmd_t)SIOCRMAFR || req == (ioctlcmd_t)SIOCRMIFR)
 		addrem = 1;
 	else if (req == (ioctlcmd_t)SIOCZRLST)
 		addrem = 2;
 	else {
 		IPFERROR(9);
 		error = EINVAL;
 		goto donenolock;
 	}
 
 	/*
 	 * Only filter rules for IPv4 or IPv6 are accepted.
 	 */
 	if (family == AF_INET) {
 		/*EMPTY*/;
 #ifdef	USE_INET6
 	} else if (family == AF_INET6) {
 		/*EMPTY*/;
 #endif
 	} else if (family != 0) {
 		IPFERROR(10);
 		error = EINVAL;
 		goto donenolock;
 	}
 
 	/*
 	 * If the rule is being loaded from user space, i.e. we had to copy it
 	 * into kernel space, then do not trust the function pointer in the
 	 * rule.
 	 */
 	if ((makecopy == 1) && (fp->fr_func != NULL)) {
 		if (ipf_findfunc(fp->fr_func) == NULL) {
 			IPFERROR(11);
 			error = ESRCH;
 			goto donenolock;
 		}
 
 		if (addrem == 0) {
 			error = ipf_funcinit(softc, fp);
 			if (error != 0)
 				goto donenolock;
 		}
 	}
 	if ((fp->fr_flags & FR_CALLNOW) &&
 	    ((fp->fr_func == NULL) || (fp->fr_func == (ipfunc_t)-1))) {
 		IPFERROR(142);
 		error = ESRCH;
 		goto donenolock;
 	}
 	if (((fp->fr_flags & FR_CMDMASK) == FR_CALL) &&
 	    ((fp->fr_func == NULL) || (fp->fr_func == (ipfunc_t)-1))) {
 		IPFERROR(143);
 		error = ESRCH;
 		goto donenolock;
 	}
 
 	ptr = NULL;
 	cptr = NULL;
 
 	if (FR_ISACCOUNT(fp->fr_flags))
 		unit = IPL_LOGCOUNT;
 
 	/*
 	 * Check that each group name in the rule has a start index that
 	 * is valid.
 	 */
 	if (fp->fr_icmphead != -1) {
 		if ((fp->fr_icmphead < 0) ||
 		    (fp->fr_icmphead >= fp->fr_namelen)) {
 			IPFERROR(136);
 			error = EINVAL;
 			goto donenolock;
 		}
 		if (!strcmp(FR_NAME(fp, fr_icmphead), "0"))
 			fp->fr_names[fp->fr_icmphead] = '\0';
 	}
 
 	if (fp->fr_grhead != -1) {
 		if ((fp->fr_grhead < 0) ||
 		    (fp->fr_grhead >= fp->fr_namelen)) {
 			IPFERROR(137);
 			error = EINVAL;
 			goto donenolock;
 		}
 		if (!strcmp(FR_NAME(fp, fr_grhead), "0"))
 			fp->fr_names[fp->fr_grhead] = '\0';
 	}
 
 	if (fp->fr_group != -1) {
 		if ((fp->fr_group < 0) ||
 		    (fp->fr_group >= fp->fr_namelen)) {
 			IPFERROR(138);
 			error = EINVAL;
 			goto donenolock;
 		}
 		if ((req != (int)SIOCZRLST) && (fp->fr_group != -1)) {
 			/*
 			 * Allow loading rules that are in groups to cause
 			 * them to be created if they don't already exit.
 			 */
 			group = FR_NAME(fp, fr_group);
 			if (addrem == 0) {
 				fg = ipf_group_add(softc, group, NULL,
 						   fp->fr_flags, unit, set);
 				fp->fr_grp = fg;
 			} else {
 				fg = ipf_findgroup(softc, group, unit,
 						   set, NULL);
 				if (fg == NULL) {
 					IPFERROR(12);
 					error = ESRCH;
 					goto donenolock;
 				}
 			}
 
 			if (fg->fg_flags == 0) {
 				fg->fg_flags = fp->fr_flags & FR_INOUT;
 			} else if (fg->fg_flags != (fp->fr_flags & FR_INOUT)) {
 				IPFERROR(13);
 				error = ESRCH;
 				goto donenolock;
 			}
 		}
 	} else {
 		/*
 		 * If a rule is going to be part of a group then it does
 		 * not matter whether it is an in or out rule, but if it
 		 * isn't in a group, then it does...
 		 */
 		if ((fp->fr_flags & (FR_INQUE|FR_OUTQUE)) == 0) {
 			IPFERROR(14);
 			error = EINVAL;
 			goto donenolock;
 		}
 	}
 	in = (fp->fr_flags & FR_INQUE) ? 0 : 1;
 
 	/*
 	 * Work out which rule list this change is being applied to.
 	 */
 	ftail = NULL;
 	fprev = NULL;
 	if (unit == IPL_LOGAUTH) {
                 if ((fp->fr_tifs[0].fd_ptr != NULL) ||
 		    (fp->fr_tifs[1].fd_ptr != NULL) ||
 		    (fp->fr_dif.fd_ptr != NULL) ||
 		    (fp->fr_flags & FR_FASTROUTE)) {
 			softc->ipf_interror = 145;
 			error = EINVAL;
 			goto donenolock;
 		}
 		fprev = ipf_auth_rulehead(softc);
 	} else {
 		if (FR_ISACCOUNT(fp->fr_flags))
 			fprev = &softc->ipf_acct[in][set];
 		else if ((fp->fr_flags & (FR_OUTQUE|FR_INQUE)) != 0)
 			fprev = &softc->ipf_rules[in][set];
 	}
 	if (fprev == NULL) {
 		IPFERROR(15);
 		error = ESRCH;
 		goto donenolock;
 	}
 
 	if (fg != NULL)
 		fprev = &fg->fg_start;
 
 	/*
 	 * Copy in extra data for the rule.
 	 */
 	if (fp->fr_dsize != 0) {
 		if (makecopy != 0) {
 			KMALLOCS(ptr, void *, fp->fr_dsize);
 			if (ptr == NULL) {
 				IPFERROR(16);
 				error = ENOMEM;
 				goto donenolock;
 			}
 
 			/*
 			 * The bcopy case is for when the data is appended
 			 * to the rule by ipf_in_compat().
 			 */
 			if (uptr >= (void *)fp &&
 			    uptr < (void *)((char *)fp + fp->fr_size)) {
 				bcopy(uptr, ptr, fp->fr_dsize);
 				error = 0;
 			} else {
 				error = COPYIN(uptr, ptr, fp->fr_dsize);
 				if (error != 0) {
 					IPFERROR(17);
 					error = EFAULT;
 					goto donenolock;
 				}
 			}
 		} else {
 			ptr = uptr;
 		}
 		fp->fr_data = ptr;
 	} else {
 		fp->fr_data = NULL;
 	}
 
 	/*
 	 * Perform per-rule type sanity checks of their members.
 	 * All code after this needs to be aware that allocated memory
 	 * may need to be free'd before exiting.
 	 */
 	switch (fp->fr_type & ~FR_T_BUILTIN)
 	{
 #if defined(IPFILTER_BPF)
 	case FR_T_BPFOPC :
 		if (fp->fr_dsize == 0) {
 			IPFERROR(19);
 			error = EINVAL;
 			break;
 		}
 		if (!bpf_validate(ptr, fp->fr_dsize/sizeof(struct bpf_insn))) {
 			IPFERROR(20);
 			error = EINVAL;
 			break;
 		}
 		break;
 #endif
 	case FR_T_IPF :
 		/*
 		 * Preparation for error case at the bottom of this function.
 		 */
 		if (fp->fr_datype == FRI_LOOKUP)
 			fp->fr_dstptr = NULL;
 		if (fp->fr_satype == FRI_LOOKUP)
 			fp->fr_srcptr = NULL;
 
 		if (fp->fr_dsize != sizeof(fripf_t)) {
 			IPFERROR(21);
 			error = EINVAL;
 			break;
 		}
 
 		/*
 		 * Allowing a rule with both "keep state" and "with oow" is
 		 * pointless because adding a state entry to the table will
 		 * fail with the out of window (oow) flag set.
 		 */
 		if ((fp->fr_flags & FR_KEEPSTATE) && (fp->fr_flx & FI_OOW)) {
 			IPFERROR(22);
 			error = EINVAL;
 			break;
 		}
 
 		switch (fp->fr_satype)
 		{
 		case FRI_BROADCAST :
 		case FRI_DYNAMIC :
 		case FRI_NETWORK :
 		case FRI_NETMASKED :
 		case FRI_PEERADDR :
 			if (fp->fr_sifpidx < 0) {
 				IPFERROR(23);
 				error = EINVAL;
 			}
 			break;
 		case FRI_LOOKUP :
 			fp->fr_srcptr = ipf_findlookup(softc, unit, fp,
 						       &fp->fr_src6,
 						       &fp->fr_smsk6);
 			if (fp->fr_srcfunc == NULL) {
 				IPFERROR(132);
 				error = ESRCH;
 				break;
 			}
 			break;
 		case FRI_NORMAL :
 			break;
 		default :
 			IPFERROR(133);
 			error = EINVAL;
 			break;
 		}
 		if (error != 0)
 			break;
 
 		switch (fp->fr_datype)
 		{
 		case FRI_BROADCAST :
 		case FRI_DYNAMIC :
 		case FRI_NETWORK :
 		case FRI_NETMASKED :
 		case FRI_PEERADDR :
 			if (fp->fr_difpidx < 0) {
 				IPFERROR(24);
 				error = EINVAL;
 			}
 			break;
 		case FRI_LOOKUP :
 			fp->fr_dstptr = ipf_findlookup(softc, unit, fp,
 						       &fp->fr_dst6,
 						       &fp->fr_dmsk6);
 			if (fp->fr_dstfunc == NULL) {
 				IPFERROR(134);
 				error = ESRCH;
 			}
 			break;
 		case FRI_NORMAL :
 			break;
 		default :
 			IPFERROR(135);
 			error = EINVAL;
 		}
 		break;
 
 	case FR_T_NONE :
 	case FR_T_CALLFUNC :
 	case FR_T_COMPIPF :
 		break;
 
 	case FR_T_IPFEXPR :
 		if (ipf_matcharray_verify(fp->fr_data, fp->fr_dsize) == -1) {
 			IPFERROR(25);
 			error = EINVAL;
 		}
 		break;
 
 	default :
 		IPFERROR(26);
 		error = EINVAL;
 		break;
 	}
 	if (error != 0)
 		goto donenolock;
 
 	if (fp->fr_tif.fd_name != -1) {
 		if ((fp->fr_tif.fd_name < 0) ||
 		    (fp->fr_tif.fd_name >= fp->fr_namelen)) {
 			IPFERROR(139);
 			error = EINVAL;
 			goto donenolock;
 		}
 	}
 
 	if (fp->fr_dif.fd_name != -1) {
 		if ((fp->fr_dif.fd_name < 0) ||
 		    (fp->fr_dif.fd_name >= fp->fr_namelen)) {
 			IPFERROR(140);
 			error = EINVAL;
 			goto donenolock;
 		}
 	}
 
 	if (fp->fr_rif.fd_name != -1) {
 		if ((fp->fr_rif.fd_name < 0) ||
 		    (fp->fr_rif.fd_name >= fp->fr_namelen)) {
 			IPFERROR(141);
 			error = EINVAL;
 			goto donenolock;
 		}
 	}
 
 	/*
 	 * Lookup all the interface names that are part of the rule.
 	 */
 	error = ipf_synclist(softc, fp, NULL);
 	if (error != 0)
 		goto donenolock;
 	fp->fr_statecnt = 0;
 	if (fp->fr_srctrack.ht_max_nodes != 0)
 		ipf_rb_ht_init(&fp->fr_srctrack);
 
 	/*
 	 * Look for an existing matching filter rule, but don't include the
 	 * next or interface pointer in the comparison (fr_next, fr_ifa).
 	 * This elminates rules which are indentical being loaded.  Checksum
 	 * the constant part of the filter rule to make comparisons quicker
 	 * (this meaning no pointers are included).
 	 */
 	for (fp->fr_cksum = 0, p = (u_int *)&fp->fr_func, pp = &fp->fr_cksum;
 	     p < pp; p++)
 		fp->fr_cksum += *p;
 	pp = (u_int *)(fp->fr_caddr + fp->fr_dsize);
 	for (p = (u_int *)fp->fr_data; p < pp; p++)
 		fp->fr_cksum += *p;
 
 	WRITE_ENTER(&softc->ipf_mutex);
 
 	/*
 	 * Now that the filter rule lists are locked, we can walk the
 	 * chain of them without fear.
 	 */
 	ftail = fprev;
 	for (f = *ftail; (f = *ftail) != NULL; ftail = &f->fr_next) {
 		if (fp->fr_collect <= f->fr_collect) {
 			ftail = fprev;
 			f = NULL;
 			break;
 		}
 		fprev = ftail;
 	}
 
 	for (; (f = *ftail) != NULL; ftail = &f->fr_next) {
 		if (ipf_rule_compare(fp, f) == 0)
 			break;
 	}
 
 	/*
 	 * If zero'ing statistics, copy current to caller and zero.
 	 */
 	if (addrem == 2) {
 		if (f == NULL) {
 			IPFERROR(27);
 			error = ESRCH;
 		} else {
 			/*
 			 * Copy and reduce lock because of impending copyout.
 			 * Well we should, but if we do then the atomicity of
 			 * this call and the correctness of fr_hits and
 			 * fr_bytes cannot be guaranteed.  As it is, this code
 			 * only resets them to 0 if they are successfully
 			 * copied out into user space.
 			 */
 			bcopy((char *)f, (char *)fp, f->fr_size);
 			/* MUTEX_DOWNGRADE(&softc->ipf_mutex); */
 
 			/*
 			 * When we copy this rule back out, set the data
 			 * pointer to be what it was in user space.
 			 */
 			fp->fr_data = uptr;
 			error = ipf_outobj(softc, data, fp, IPFOBJ_FRENTRY);
 
 			if (error == 0) {
 				if ((f->fr_dsize != 0) && (uptr != NULL))
 					error = COPYOUT(f->fr_data, uptr,
 							f->fr_dsize);
 					if (error != 0) {
 						IPFERROR(28);
 						error = EFAULT;
 					}
 				if (error == 0) {
 					f->fr_hits = 0;
 					f->fr_bytes = 0;
 				}
 			}
 		}
 
 		if (makecopy != 0) {
 			if (ptr != NULL) {
 				KFREES(ptr, fp->fr_dsize);
 			}
 			KFREES(fp, fp->fr_size);
 		}
 		RWLOCK_EXIT(&softc->ipf_mutex);
 		return error;
 	}
 
   	if (!f) {
 		/*
 		 * At the end of this, ftail must point to the place where the
 		 * new rule is to be saved/inserted/added.
 		 * For SIOCAD*FR, this should be the last rule in the group of
 		 * rules that have equal fr_collect fields.
 		 * For SIOCIN*FR, ...
 		 */
 		if (req == (ioctlcmd_t)SIOCADAFR ||
 		    req == (ioctlcmd_t)SIOCADIFR) {
 
 			for (ftail = fprev; (f = *ftail) != NULL; ) {
 				if (f->fr_collect > fp->fr_collect)
 					break;
 				ftail = &f->fr_next;
 				fprev = ftail;
 			}
 			ftail = fprev;
 			f = NULL;
 			ptr = NULL;
 		} else if (req == (ioctlcmd_t)SIOCINAFR ||
 			   req == (ioctlcmd_t)SIOCINIFR) {
 			while ((f = *fprev) != NULL) {
 				if (f->fr_collect >= fp->fr_collect)
 					break;
 				fprev = &f->fr_next;
 			}
   			ftail = fprev;
   			if (fp->fr_hits != 0) {
 				while (fp->fr_hits && (f = *ftail)) {
 					if (f->fr_collect != fp->fr_collect)
 						break;
 					fprev = ftail;
   					ftail = &f->fr_next;
 					fp->fr_hits--;
 				}
   			}
   			f = NULL;
   			ptr = NULL;
 		}
 	}
 
 	/*
 	 * Request to remove a rule.
 	 */
 	if (addrem == 1) {
 		if (!f) {
 			IPFERROR(29);
 			error = ESRCH;
 		} else {
 			/*
 			 * Do not allow activity from user space to interfere
 			 * with rules not loaded that way.
 			 */
 			if ((makecopy == 1) && !(f->fr_flags & FR_COPIED)) {
 				IPFERROR(30);
 				error = EPERM;
 				goto done;
 			}
 
 			/*
 			 * Return EBUSY if the rule is being reference by
 			 * something else (eg state information.)
 			 */
 			if (f->fr_ref > 1) {
 				IPFERROR(31);
 				error = EBUSY;
 				goto done;
 			}
 #ifdef	IPFILTER_SCAN
 			if (f->fr_isctag != -1 &&
 			    (f->fr_isc != (struct ipscan *)-1))
 				ipf_scan_detachfr(f);
 #endif
 
 			if (unit == IPL_LOGAUTH) {
 				error = ipf_auth_precmd(softc, req, f, ftail);
 				goto done;
 			}
 
 			ipf_rule_delete(softc, f, unit, set);
 
 			need_free = makecopy;
 		}
 	} else {
 		/*
 		 * Not removing, so we must be adding/inserting a rule.
 		 */
 		if (f != NULL) {
 			IPFERROR(32);
 			error = EEXIST;
 			goto done;
 		}
 		if (unit == IPL_LOGAUTH) {
 			error = ipf_auth_precmd(softc, req, fp, ftail);
 			goto done;
 		}
 
 		MUTEX_NUKE(&fp->fr_lock);
 		MUTEX_INIT(&fp->fr_lock, "filter rule lock");
 		if (fp->fr_die != 0)
 			ipf_rule_expire_insert(softc, fp, set);
 
 		fp->fr_hits = 0;
 		if (makecopy != 0)
 			fp->fr_ref = 1;
 		fp->fr_pnext = ftail;
 		fp->fr_next = *ftail;
 		if (fp->fr_next != NULL)
 			fp->fr_next->fr_pnext = &fp->fr_next;
 		*ftail = fp;
 		if (addrem == 0)
 			ipf_fixskip(ftail, fp, 1);
 
 		fp->fr_icmpgrp = NULL;
 		if (fp->fr_icmphead != -1) {
 			group = FR_NAME(fp, fr_icmphead);
 			fg = ipf_group_add(softc, group, fp, 0, unit, set);
 			fp->fr_icmpgrp = fg;
 		}
 
 		fp->fr_grphead = NULL;
 		if (fp->fr_grhead != -1) {
 			group = FR_NAME(fp, fr_grhead);
 			fg = ipf_group_add(softc, group, fp, fp->fr_flags,
 					   unit, set);
 			fp->fr_grphead = fg;
 		}
 	}
 done:
 	RWLOCK_EXIT(&softc->ipf_mutex);
 donenolock:
 	if (need_free || (error != 0)) {
 		if ((fp->fr_type & ~FR_T_BUILTIN) == FR_T_IPF) {
 			if ((fp->fr_satype == FRI_LOOKUP) &&
 			    (fp->fr_srcptr != NULL))
 				ipf_lookup_deref(softc, fp->fr_srctype,
 						 fp->fr_srcptr);
 			if ((fp->fr_datype == FRI_LOOKUP) &&
 			    (fp->fr_dstptr != NULL))
 				ipf_lookup_deref(softc, fp->fr_dsttype,
 						 fp->fr_dstptr);
 		}
 		if (fp->fr_grp != NULL) {
 			WRITE_ENTER(&softc->ipf_mutex);
 			ipf_group_del(softc, fp->fr_grp, fp);
 			RWLOCK_EXIT(&softc->ipf_mutex);
 		}
 		if ((ptr != NULL) && (makecopy != 0)) {
 			KFREES(ptr, fp->fr_dsize);
 		}
 		KFREES(fp, fp->fr_size);
 	}
 	return (error);
 }
 
 
 /* ------------------------------------------------------------------------ */
 /* Function:   ipf_rule_delete                                              */
 /* Returns:    Nil                                                          */
 /* Parameters: softc(I) - pointer to soft context main structure            */
 /*             f(I)     - pointer to the rule being deleted                 */
 /*             ftail(I) - pointer to the pointer to f                       */
 /*             unit(I)  - device for which this is for                      */
 /*             set(I)   - 1 or 0 (filter set)                               */
 /*                                                                          */
 /* This function attempts to do what it can to delete a filter rule: remove */
 /* it from any linked lists and remove any groups it is responsible for.    */
 /* But in the end, removing a rule can only drop the reference count - we   */
 /* must use that as the guide for whether or not it can be freed.           */
 /* ------------------------------------------------------------------------ */
 static void
 ipf_rule_delete(softc, f, unit, set)
 	ipf_main_softc_t *softc;
 	frentry_t *f;
 	int unit, set;
 {
 
 	/*
 	 * If fr_pdnext is set, then the rule is on the expire list, so
 	 * remove it from there.
 	 */
 	if (f->fr_pdnext != NULL) {
 		*f->fr_pdnext = f->fr_dnext;
 		if (f->fr_dnext != NULL)
 			f->fr_dnext->fr_pdnext = f->fr_pdnext;
 		f->fr_pdnext = NULL;
 		f->fr_dnext = NULL;
 	}
 
 	ipf_fixskip(f->fr_pnext, f, -1);
 	if (f->fr_pnext != NULL)
 		*f->fr_pnext = f->fr_next;
 	if (f->fr_next != NULL)
 		f->fr_next->fr_pnext = f->fr_pnext;
 	f->fr_pnext = NULL;
 	f->fr_next = NULL;
 
 	(void) ipf_derefrule(softc, &f);
 }
 
 /* ------------------------------------------------------------------------ */
 /* Function:   ipf_rule_expire_insert                                       */
 /* Returns:    Nil                                                          */
 /* Parameters: softc(I) - pointer to soft context main structure            */
 /*             f(I)     - pointer to rule to be added to expire list        */
 /*             set(I)   - 1 or 0 (filter set)                               */
 /*                                                                          */
 /* If the new rule has a given expiration time, insert it into the list of  */
 /* expiring rules with the ones to be removed first added to the front of   */
 /* the list. The insertion is O(n) but it is kept sorted for quick scans at */
 /* expiration interval checks.                                              */
 /* ------------------------------------------------------------------------ */
 static void
 ipf_rule_expire_insert(softc, f, set)
 	ipf_main_softc_t *softc;
 	frentry_t *f;
 	int set;
 {
 	frentry_t *fr;
 
 	/*
 	 */
 
 	f->fr_die = softc->ipf_ticks + IPF_TTLVAL(f->fr_die);
 	for (fr = softc->ipf_rule_explist[set]; fr != NULL;
 	     fr = fr->fr_dnext) {
 		if (f->fr_die < fr->fr_die)
 			break;
 		if (fr->fr_dnext == NULL) {
 			/*
 			 * We've got to the last rule and everything
 			 * wanted to be expired before this new node,
 			 * so we have to tack it on the end...
 			 */
 			fr->fr_dnext = f;
 			f->fr_pdnext = &fr->fr_dnext;
 			fr = NULL;
 			break;
 		}
 	}
 
 	if (softc->ipf_rule_explist[set] == NULL) {
 		softc->ipf_rule_explist[set] = f;
 		f->fr_pdnext = &softc->ipf_rule_explist[set];
 	} else if (fr != NULL) {
 		f->fr_dnext = fr;
 		f->fr_pdnext = fr->fr_pdnext;
 		fr->fr_pdnext = &f->fr_dnext;
 	}
 }
 
 
 /* ------------------------------------------------------------------------ */
 /* Function:   ipf_findlookup                                               */
 /* Returns:    NULL = failure, else success                                 */
 /* Parameters: softc(I) - pointer to soft context main structure            */
 /*             unit(I)  - ipf device we want to find match for              */
 /*             fp(I)    - rule for which lookup is for                      */
 /*             addrp(I) - pointer to lookup information in address struct   */
 /*             maskp(O) - pointer to lookup information for storage         */
 /*                                                                          */
 /* When using pools and hash tables to store addresses for matching in      */
 /* rules, it is necessary to resolve both the object referred to by the     */
 /* name or address (and return that pointer) and also provide the means by  */
 /* which to determine if an address belongs to that object to make the      */
 /* packet matching quicker.                                                 */
 /* ------------------------------------------------------------------------ */
 static void *
 ipf_findlookup(softc, unit, fr, addrp, maskp)
 	ipf_main_softc_t *softc;
 	int unit;
 	frentry_t *fr;
 	i6addr_t *addrp, *maskp;
 {
 	void *ptr = NULL;
 
 	switch (addrp->iplookupsubtype)
 	{
 	case 0 :
 		ptr = ipf_lookup_res_num(softc, unit, addrp->iplookuptype,
 					 addrp->iplookupnum,
 					 &maskp->iplookupfunc);
 		break;
 	case 1 :
 		if (addrp->iplookupname < 0)
 			break;
 		if (addrp->iplookupname >= fr->fr_namelen)
 			break;
 		ptr = ipf_lookup_res_name(softc, unit, addrp->iplookuptype,
 					  fr->fr_names + addrp->iplookupname,
 					  &maskp->iplookupfunc);
 		break;
 	default :
 		break;
 	}
 
 	return ptr;
 }
 
 
 /* ------------------------------------------------------------------------ */
 /* Function:    ipf_funcinit                                                */
 /* Returns:     int - 0 == success, else ESRCH: cannot resolve rule details */
 /* Parameters:  softc(I) - pointer to soft context main structure           */
 /*              fr(I)    - pointer to filter rule                           */
 /*                                                                          */
 /* If a rule is a call rule, then check if the function it points to needs  */
 /* an init function to be called now the rule has been loaded.              */
 /* ------------------------------------------------------------------------ */
 static int
 ipf_funcinit(softc, fr)
 	ipf_main_softc_t *softc;
 	frentry_t *fr;
 {
 	ipfunc_resolve_t *ft;
 	int err;
 
 	IPFERROR(34);
 	err = ESRCH;
 
 	for (ft = ipf_availfuncs; ft->ipfu_addr != NULL; ft++)
 		if (ft->ipfu_addr == fr->fr_func) {
 			err = 0;
 			if (ft->ipfu_init != NULL)
 				err = (*ft->ipfu_init)(softc, fr);
 			break;
 		}
 	return err;
 }
 
 
 /* ------------------------------------------------------------------------ */
 /* Function:    ipf_funcfini                                                */
 /* Returns:     Nil                                                         */
 /* Parameters:  softc(I) - pointer to soft context main structure           */
 /*              fr(I)    - pointer to filter rule                           */
 /*                                                                          */
 /* For a given filter rule, call the matching "fini" function if the rule   */
 /* is using a known function that would have resulted in the "init" being   */
 /* called for ealier.                                                       */
 /* ------------------------------------------------------------------------ */
 static void
 ipf_funcfini(softc, fr)
 	ipf_main_softc_t *softc;
 	frentry_t *fr;
 {
 	ipfunc_resolve_t *ft;
 
 	for (ft = ipf_availfuncs; ft->ipfu_addr != NULL; ft++)
 		if (ft->ipfu_addr == fr->fr_func) {
 			if (ft->ipfu_fini != NULL)
 				(void) (*ft->ipfu_fini)(softc, fr);
 			break;
 		}
 }
 
 
 /* ------------------------------------------------------------------------ */
 /* Function:    ipf_findfunc                                                */
 /* Returns:     ipfunc_t - pointer to function if found, else NULL          */
 /* Parameters:  funcptr(I) - function pointer to lookup                     */
 /*                                                                          */
 /* Look for a function in the table of known functions.                     */
 /* ------------------------------------------------------------------------ */
 static ipfunc_t
 ipf_findfunc(funcptr)
 	ipfunc_t funcptr;
 {
 	ipfunc_resolve_t *ft;
 
 	for (ft = ipf_availfuncs; ft->ipfu_addr != NULL; ft++)
 		if (ft->ipfu_addr == funcptr)
 			return funcptr;
 	return NULL;
 }
 
 
 /* ------------------------------------------------------------------------ */
 /* Function:    ipf_resolvefunc                                             */
 /* Returns:     int - 0 == success, else error                              */
 /* Parameters:  data(IO) - ioctl data pointer to ipfunc_resolve_t struct    */
 /*                                                                          */
 /* Copy in a ipfunc_resolve_t structure and then fill in the missing field. */
 /* This will either be the function name (if the pointer is set) or the     */
 /* function pointer if the name is set.  When found, fill in the other one  */
 /* so that the entire, complete, structure can be copied back to user space.*/
 /* ------------------------------------------------------------------------ */
 int
 ipf_resolvefunc(softc, data)
 	ipf_main_softc_t *softc;
 	void *data;
 {
 	ipfunc_resolve_t res, *ft;
 	int error;
 
 	error = BCOPYIN(data, &res, sizeof(res));
 	if (error != 0) {
 		IPFERROR(123);
 		return EFAULT;
 	}
 
 	if (res.ipfu_addr == NULL && res.ipfu_name[0] != '\0') {
 		for (ft = ipf_availfuncs; ft->ipfu_addr != NULL; ft++)
 			if (strncmp(res.ipfu_name, ft->ipfu_name,
 				    sizeof(res.ipfu_name)) == 0) {
 				res.ipfu_addr = ft->ipfu_addr;
 				res.ipfu_init = ft->ipfu_init;
 				if (COPYOUT(&res, data, sizeof(res)) != 0) {
 					IPFERROR(35);
 					return EFAULT;
 				}
 				return 0;
 			}
 	}
 	if (res.ipfu_addr != NULL && res.ipfu_name[0] == '\0') {
 		for (ft = ipf_availfuncs; ft->ipfu_addr != NULL; ft++)
 			if (ft->ipfu_addr == res.ipfu_addr) {
 				(void) strncpy(res.ipfu_name, ft->ipfu_name,
 					       sizeof(res.ipfu_name));
 				res.ipfu_init = ft->ipfu_init;
 				if (COPYOUT(&res, data, sizeof(res)) != 0) {
 					IPFERROR(36);
 					return EFAULT;
 				}
 				return 0;
 			}
 	}
 	IPFERROR(37);
 	return ESRCH;
 }
 
 
 #if !defined(_KERNEL) || (!defined(__NetBSD__) && !defined(__OpenBSD__) && \
      !defined(__FreeBSD__)) || \
     FREEBSD_LT_REV(501000) || NETBSD_LT_REV(105000000) || \
     OPENBSD_LT_REV(200006)
 /*
  * From: NetBSD
  * ppsratecheck(): packets (or events) per second limitation.
  */
 int
 ppsratecheck(lasttime, curpps, maxpps)
 	struct timeval *lasttime;
 	int *curpps;
 	int maxpps;	/* maximum pps allowed */
 {
 	struct timeval tv, delta;
 	int rv;
 
 	GETKTIME(&tv);
 
 	delta.tv_sec = tv.tv_sec - lasttime->tv_sec;
 	delta.tv_usec = tv.tv_usec - lasttime->tv_usec;
 	if (delta.tv_usec < 0) {
 		delta.tv_sec--;
 		delta.tv_usec += 1000000;
 	}
 
 	/*
 	 * check for 0,0 is so that the message will be seen at least once.
 	 * if more than one second have passed since the last update of
 	 * lasttime, reset the counter.
 	 *
 	 * we do increment *curpps even in *curpps < maxpps case, as some may
 	 * try to use *curpps for stat purposes as well.
 	 */
 	if ((lasttime->tv_sec == 0 && lasttime->tv_usec == 0) ||
 	    delta.tv_sec >= 1) {
 		*lasttime = tv;
 		*curpps = 0;
 		rv = 1;
 	} else if (maxpps < 0)
 		rv = 1;
 	else if (*curpps < maxpps)
 		rv = 1;
 	else
 		rv = 0;
 	*curpps = *curpps + 1;
 
 	return (rv);
 }
 #endif
 
 
 /* ------------------------------------------------------------------------ */
 /* Function:    ipf_derefrule                                               */
 /* Returns:     int   - 0 == rule freed up, else rule not freed             */
 /* Parameters:  fr(I) - pointer to filter rule                              */
 /*                                                                          */
 /* Decrement the reference counter to a rule by one.  If it reaches zero,   */
 /* free it and any associated storage space being used by it.               */
 /* ------------------------------------------------------------------------ */
 int
 ipf_derefrule(softc, frp)
 	ipf_main_softc_t *softc;
 	frentry_t **frp;
 {
 	frentry_t *fr;
 	frdest_t *fdp;
 
 	fr = *frp;
 	*frp = NULL;
 
 	MUTEX_ENTER(&fr->fr_lock);
 	fr->fr_ref--;
 	if (fr->fr_ref == 0) {
 		MUTEX_EXIT(&fr->fr_lock);
 		MUTEX_DESTROY(&fr->fr_lock);
 
 		ipf_funcfini(softc, fr);
 
 		fdp = &fr->fr_tif;
 		if (fdp->fd_type == FRD_DSTLIST)
 			ipf_lookup_deref(softc, IPLT_DSTLIST, fdp->fd_ptr);
 
 		fdp = &fr->fr_rif;
 		if (fdp->fd_type == FRD_DSTLIST)
 			ipf_lookup_deref(softc, IPLT_DSTLIST, fdp->fd_ptr);
 
 		fdp = &fr->fr_dif;
 		if (fdp->fd_type == FRD_DSTLIST)
 			ipf_lookup_deref(softc, IPLT_DSTLIST, fdp->fd_ptr);
 
 		if ((fr->fr_type & ~FR_T_BUILTIN) == FR_T_IPF &&
 		    fr->fr_satype == FRI_LOOKUP)
 			ipf_lookup_deref(softc, fr->fr_srctype, fr->fr_srcptr);
 		if ((fr->fr_type & ~FR_T_BUILTIN) == FR_T_IPF &&
 		    fr->fr_datype == FRI_LOOKUP)
 			ipf_lookup_deref(softc, fr->fr_dsttype, fr->fr_dstptr);
 
 		if (fr->fr_grp != NULL)
 			ipf_group_del(softc, fr->fr_grp, fr);
 
 		if (fr->fr_grphead != NULL)
 			ipf_group_del(softc, fr->fr_grphead, fr);
 
 		if (fr->fr_icmpgrp != NULL)
 			ipf_group_del(softc, fr->fr_icmpgrp, fr);
 
 		if ((fr->fr_flags & FR_COPIED) != 0) {
 			if (fr->fr_dsize) {
 				KFREES(fr->fr_data, fr->fr_dsize);
 			}
 			KFREES(fr, fr->fr_size);
 			return 0;
 		}
 		return 1;
 	} else {
 		MUTEX_EXIT(&fr->fr_lock);
 	}
 	return -1;
 }
 
 
 /* ------------------------------------------------------------------------ */
 /* Function:    ipf_grpmapinit                                              */
 /* Returns:     int - 0 == success, else ESRCH because table entry not found*/
 /* Parameters:  fr(I) - pointer to rule to find hash table for              */
 /*                                                                          */
 /* Looks for group hash table fr_arg and stores a pointer to it in fr_ptr.  */
 /* fr_ptr is later used by ipf_srcgrpmap and ipf_dstgrpmap.                 */
 /* ------------------------------------------------------------------------ */
 static int
 ipf_grpmapinit(softc, fr)
 	ipf_main_softc_t *softc;
 	frentry_t *fr;
 {
 	char name[FR_GROUPLEN];
 	iphtable_t *iph;
 
 #if defined(SNPRINTF) && defined(_KERNEL)
 	SNPRINTF(name, sizeof(name), "%d", fr->fr_arg);
 #else
 	(void) sprintf(name, "%d", fr->fr_arg);
 #endif
 	iph = ipf_lookup_find_htable(softc, IPL_LOGIPF, name);
 	if (iph == NULL) {
 		IPFERROR(38);
 		return ESRCH;
 	}
 	if ((iph->iph_flags & FR_INOUT) != (fr->fr_flags & FR_INOUT)) {
 		IPFERROR(39);
 		return ESRCH;
 	}
 	iph->iph_ref++;
 	fr->fr_ptr = iph;
 	return 0;
 }
 
 
 /* ------------------------------------------------------------------------ */
 /* Function:    ipf_grpmapfini                                              */
 /* Returns:     int - 0 == success, else ESRCH because table entry not found*/
 /* Parameters:  softc(I) - pointer to soft context main structure           */
 /*              fr(I)    - pointer to rule to release hash table for        */
 /*                                                                          */
 /* For rules that have had ipf_grpmapinit called, ipf_lookup_deref needs to */
 /* be called to undo what ipf_grpmapinit caused to be done.                 */
 /* ------------------------------------------------------------------------ */
 static int
 ipf_grpmapfini(softc, fr)
 	ipf_main_softc_t *softc;
 	frentry_t *fr;
 {
 	iphtable_t *iph;
 	iph = fr->fr_ptr;
 	if (iph != NULL)
 		ipf_lookup_deref(softc, IPLT_HASH, iph);
 	return 0;
 }
 
 
 /* ------------------------------------------------------------------------ */
 /* Function:    ipf_srcgrpmap                                               */
 /* Returns:     frentry_t * - pointer to "new last matching" rule or NULL   */
 /* Parameters:  fin(I)    - pointer to packet information                   */
 /*              passp(IO) - pointer to current/new filter decision (unused) */
 /*                                                                          */
 /* Look for a rule group head in a hash table, using the source address as  */
 /* the key, and descend into that group and continue matching rules against */
 /* the packet.                                                              */
 /* ------------------------------------------------------------------------ */
 frentry_t *
 ipf_srcgrpmap(fin, passp)
 	fr_info_t *fin;
 	u_32_t *passp;
 {
 	frgroup_t *fg;
 	void *rval;
 
 	rval = ipf_iphmfindgroup(fin->fin_main_soft, fin->fin_fr->fr_ptr,
 				 &fin->fin_src);
 	if (rval == NULL)
 		return NULL;
 
 	fg = rval;
 	fin->fin_fr = fg->fg_start;
 	(void) ipf_scanlist(fin, *passp);
 	return fin->fin_fr;
 }
 
 
 /* ------------------------------------------------------------------------ */
 /* Function:    ipf_dstgrpmap                                               */
 /* Returns:     frentry_t * - pointer to "new last matching" rule or NULL   */
 /* Parameters:  fin(I)    - pointer to packet information                   */
 /*              passp(IO) - pointer to current/new filter decision (unused) */
 /*                                                                          */
 /* Look for a rule group head in a hash table, using the destination        */
 /* address as the key, and descend into that group and continue matching    */
 /* rules against  the packet.                                               */
 /* ------------------------------------------------------------------------ */
 frentry_t *
 ipf_dstgrpmap(fin, passp)
 	fr_info_t *fin;
 	u_32_t *passp;
 {
 	frgroup_t *fg;
 	void *rval;
 
 	rval = ipf_iphmfindgroup(fin->fin_main_soft, fin->fin_fr->fr_ptr,
 				 &fin->fin_dst);
 	if (rval == NULL)
 		return NULL;
 
 	fg = rval;
 	fin->fin_fr = fg->fg_start;
 	(void) ipf_scanlist(fin, *passp);
 	return fin->fin_fr;
 }
 
 /*
  * Queue functions
  * ===============
  * These functions manage objects on queues for efficient timeouts.  There
  * are a number of system defined queues as well as user defined timeouts.
  * It is expected that a lock is held in the domain in which the queue
  * belongs (i.e. either state or NAT) when calling any of these functions
  * that prevents ipf_freetimeoutqueue() from being called at the same time
  * as any other.
  */
 
 
 /* ------------------------------------------------------------------------ */
 /* Function:    ipf_addtimeoutqueue                                         */
 /* Returns:     struct ifqtq * - NULL if malloc fails, else pointer to      */
 /*                               timeout queue with given interval.         */
 /* Parameters:  parent(I)  - pointer to pointer to parent node of this list */
 /*                           of interface queues.                           */
 /*              seconds(I) - timeout value in seconds for this queue.       */
 /*                                                                          */
 /* This routine first looks for a timeout queue that matches the interval   */
 /* being requested.  If it finds one, increments the reference counter and  */
 /* returns a pointer to it.  If none are found, it allocates a new one and  */
 /* inserts it at the top of the list.                                       */
 /*                                                                          */
 /* Locking.                                                                 */
 /* It is assumed that the caller of this function has an appropriate lock   */
 /* held (exclusively) in the domain that encompases 'parent'.               */
 /* ------------------------------------------------------------------------ */
 ipftq_t *
 ipf_addtimeoutqueue(softc, parent, seconds)
 	ipf_main_softc_t *softc;
 	ipftq_t **parent;
 	u_int seconds;
 {
 	ipftq_t *ifq;
 	u_int period;
 
 	period = seconds * IPF_HZ_DIVIDE;
 
 	MUTEX_ENTER(&softc->ipf_timeoutlock);
 	for (ifq = *parent; ifq != NULL; ifq = ifq->ifq_next) {
 		if (ifq->ifq_ttl == period) {
 			/*
 			 * Reset the delete flag, if set, so the structure
 			 * gets reused rather than freed and reallocated.
 			 */
 			MUTEX_ENTER(&ifq->ifq_lock);
 			ifq->ifq_flags &= ~IFQF_DELETE;
 			ifq->ifq_ref++;
 			MUTEX_EXIT(&ifq->ifq_lock);
 			MUTEX_EXIT(&softc->ipf_timeoutlock);
 
 			return ifq;
 		}
 	}
 
 	KMALLOC(ifq, ipftq_t *);
 	if (ifq != NULL) {
 		MUTEX_NUKE(&ifq->ifq_lock);
 		IPFTQ_INIT(ifq, period, "ipftq mutex");
 		ifq->ifq_next = *parent;
 		ifq->ifq_pnext = parent;
 		ifq->ifq_flags = IFQF_USER;
 		ifq->ifq_ref++;
 		*parent = ifq;
 		softc->ipf_userifqs++;
 	}
 	MUTEX_EXIT(&softc->ipf_timeoutlock);
 	return ifq;
 }
 
 
 /* ------------------------------------------------------------------------ */
 /* Function:    ipf_deletetimeoutqueue                                      */
 /* Returns:     int    - new reference count value of the timeout queue     */
 /* Parameters:  ifq(I) - timeout queue which is losing a reference.         */
 /* Locks:       ifq->ifq_lock                                               */
 /*                                                                          */
 /* This routine must be called when we're discarding a pointer to a timeout */
 /* queue object, taking care of the reference counter.                      */
 /*                                                                          */
 /* Now that this just sets a DELETE flag, it requires the expire code to    */
 /* check the list of user defined timeout queues and call the free function */
 /* below (currently commented out) to stop memory leaking.  It is done this */
 /* way because the locking may not be sufficient to safely do a free when   */
 /* this function is called.                                                 */
 /* ------------------------------------------------------------------------ */
 int
 ipf_deletetimeoutqueue(ifq)
 	ipftq_t *ifq;
 {
 
 	ifq->ifq_ref--;
 	if ((ifq->ifq_ref == 0) && ((ifq->ifq_flags & IFQF_USER) != 0)) {
 		ifq->ifq_flags |= IFQF_DELETE;
 	}
 
 	return ifq->ifq_ref;
 }
 
 
 /* ------------------------------------------------------------------------ */
 /* Function:    ipf_freetimeoutqueue                                        */
 /* Parameters:  ifq(I) - timeout queue which is losing a reference.         */
 /* Returns:     Nil                                                         */
 /*                                                                          */
 /* Locking:                                                                 */
 /* It is assumed that the caller of this function has an appropriate lock   */
 /* held (exclusively) in the domain that encompases the callers "domain".   */
 /* The ifq_lock for this structure should not be held.                      */
 /*                                                                          */
 /* Remove a user defined timeout queue from the list of queues it is in and */
 /* tidy up after this is done.                                              */
 /* ------------------------------------------------------------------------ */
 void
 ipf_freetimeoutqueue(softc, ifq)
 	ipf_main_softc_t *softc;
 	ipftq_t *ifq;
 {
 
 	if (((ifq->ifq_flags & IFQF_DELETE) == 0) || (ifq->ifq_ref != 0) ||
 	    ((ifq->ifq_flags & IFQF_USER) == 0)) {
 		printf("ipf_freetimeoutqueue(%lx) flags 0x%x ttl %d ref %d\n",
 		       (u_long)ifq, ifq->ifq_flags, ifq->ifq_ttl,
 		       ifq->ifq_ref);
 		return;
 	}
 
 	/*
 	 * Remove from its position in the list.
 	 */
 	*ifq->ifq_pnext = ifq->ifq_next;
 	if (ifq->ifq_next != NULL)
 		ifq->ifq_next->ifq_pnext = ifq->ifq_pnext;
 	ifq->ifq_next = NULL;
 	ifq->ifq_pnext = NULL;
 
 	MUTEX_DESTROY(&ifq->ifq_lock);
 	ATOMIC_DEC(softc->ipf_userifqs);
 	KFREE(ifq);
 }
 
 
 /* ------------------------------------------------------------------------ */
 /* Function:    ipf_deletequeueentry                                        */
 /* Returns:     Nil                                                         */
 /* Parameters:  tqe(I) - timeout queue entry to delete                      */
 /*                                                                          */
 /* Remove a tail queue entry from its queue and make it an orphan.          */
 /* ipf_deletetimeoutqueue is called to make sure the reference count on the */
 /* queue is correct.  We can't, however, call ipf_freetimeoutqueue because  */
 /* the correct lock(s) may not be held that would make it safe to do so.    */
 /* ------------------------------------------------------------------------ */
 void
 ipf_deletequeueentry(tqe)
 	ipftqent_t *tqe;
 {
 	ipftq_t *ifq;
 
 	ifq = tqe->tqe_ifq;
 
 	MUTEX_ENTER(&ifq->ifq_lock);
 
 	if (tqe->tqe_pnext != NULL) {
 		*tqe->tqe_pnext = tqe->tqe_next;
 		if (tqe->tqe_next != NULL)
 			tqe->tqe_next->tqe_pnext = tqe->tqe_pnext;
 		else    /* we must be the tail anyway */
 			ifq->ifq_tail = tqe->tqe_pnext;
 
 		tqe->tqe_pnext = NULL;
 		tqe->tqe_ifq = NULL;
 	}
 
 	(void) ipf_deletetimeoutqueue(ifq);
 	ASSERT(ifq->ifq_ref > 0);
 
 	MUTEX_EXIT(&ifq->ifq_lock);
 }
 
 
 /* ------------------------------------------------------------------------ */
 /* Function:    ipf_queuefront                                              */
 /* Returns:     Nil                                                         */
 /* Parameters:  tqe(I) - pointer to timeout queue entry                     */
 /*                                                                          */
 /* Move a queue entry to the front of the queue, if it isn't already there. */
 /* ------------------------------------------------------------------------ */
 void
 ipf_queuefront(tqe)
 	ipftqent_t *tqe;
 {
 	ipftq_t *ifq;
 
 	ifq = tqe->tqe_ifq;
 	if (ifq == NULL)
 		return;
 
 	MUTEX_ENTER(&ifq->ifq_lock);
 	if (ifq->ifq_head != tqe) {
 		*tqe->tqe_pnext = tqe->tqe_next;
 		if (tqe->tqe_next)
 			tqe->tqe_next->tqe_pnext = tqe->tqe_pnext;
 		else
 			ifq->ifq_tail = tqe->tqe_pnext;
 
 		tqe->tqe_next = ifq->ifq_head;
 		ifq->ifq_head->tqe_pnext = &tqe->tqe_next;
 		ifq->ifq_head = tqe;
 		tqe->tqe_pnext = &ifq->ifq_head;
 	}
 	MUTEX_EXIT(&ifq->ifq_lock);
 }
 
 
 /* ------------------------------------------------------------------------ */
 /* Function:    ipf_queueback                                               */
 /* Returns:     Nil                                                         */
 /* Parameters:  ticks(I) - ipf tick time to use with this call              */
 /*              tqe(I)   - pointer to timeout queue entry                   */
 /*                                                                          */
 /* Move a queue entry to the back of the queue, if it isn't already there.  */
 /* We use use ticks to calculate the expiration and mark for when we last   */
 /* touched the structure.                                                   */
 /* ------------------------------------------------------------------------ */
 void
 ipf_queueback(ticks, tqe)
 	u_long ticks;
 	ipftqent_t *tqe;
 {
 	ipftq_t *ifq;
 
 	ifq = tqe->tqe_ifq;
 	if (ifq == NULL)
 		return;
 	tqe->tqe_die = ticks + ifq->ifq_ttl;
 	tqe->tqe_touched = ticks;
 
 	MUTEX_ENTER(&ifq->ifq_lock);
 	if (tqe->tqe_next != NULL) {		/* at the end already ? */
 		/*
 		 * Remove from list
 		 */
 		*tqe->tqe_pnext = tqe->tqe_next;
 		tqe->tqe_next->tqe_pnext = tqe->tqe_pnext;
 
 		/*
 		 * Make it the last entry.
 		 */
 		tqe->tqe_next = NULL;
 		tqe->tqe_pnext = ifq->ifq_tail;
 		*ifq->ifq_tail = tqe;
 		ifq->ifq_tail = &tqe->tqe_next;
 	}
 	MUTEX_EXIT(&ifq->ifq_lock);
 }
 
 
 /* ------------------------------------------------------------------------ */
 /* Function:    ipf_queueappend                                             */
 /* Returns:     Nil                                                         */
 /* Parameters:  ticks(I)  - ipf tick time to use with this call             */
 /*              tqe(I)    - pointer to timeout queue entry                  */
 /*              ifq(I)    - pointer to timeout queue                        */
 /*              parent(I) - owing object pointer                            */
 /*                                                                          */
 /* Add a new item to this queue and put it on the very end.                 */
 /* We use use ticks to calculate the expiration and mark for when we last   */
 /* touched the structure.                                                   */
 /* ------------------------------------------------------------------------ */
 void
 ipf_queueappend(ticks, tqe, ifq, parent)
 	u_long ticks;
 	ipftqent_t *tqe;
 	ipftq_t *ifq;
 	void *parent;
 {
 
 	MUTEX_ENTER(&ifq->ifq_lock);
 	tqe->tqe_parent = parent;
 	tqe->tqe_pnext = ifq->ifq_tail;
 	*ifq->ifq_tail = tqe;
 	ifq->ifq_tail = &tqe->tqe_next;
 	tqe->tqe_next = NULL;
 	tqe->tqe_ifq = ifq;
 	tqe->tqe_die = ticks + ifq->ifq_ttl;
 	tqe->tqe_touched = ticks;
 	ifq->ifq_ref++;
 	MUTEX_EXIT(&ifq->ifq_lock);
 }
 
 
 /* ------------------------------------------------------------------------ */
 /* Function:    ipf_movequeue                                               */
 /* Returns:     Nil                                                         */
 /* Parameters:  tq(I)   - pointer to timeout queue information              */
 /*              oifp(I) - old timeout queue entry was on                    */
 /*              nifp(I) - new timeout queue to put entry on                 */
 /*                                                                          */
 /* Move a queue entry from one timeout queue to another timeout queue.      */
 /* If it notices that the current entry is already last and does not need   */
 /* to move queue, the return.                                               */
 /* ------------------------------------------------------------------------ */
 void
 ipf_movequeue(ticks, tqe, oifq, nifq)
 	u_long ticks;
 	ipftqent_t *tqe;
 	ipftq_t *oifq, *nifq;
 {
 
 	/*
 	 * If the queue hasn't changed and we last touched this entry at the
 	 * same ipf time, then we're not going to achieve anything by either
 	 * changing the ttl or moving it on the queue.
 	 */
 	if (oifq == nifq && tqe->tqe_touched == ticks)
 		return;
 
 	/*
 	 * For any of this to be outside the lock, there is a risk that two
 	 * packets entering simultaneously, with one changing to a different
 	 * queue and one not, could end up with things in a bizarre state.
 	 */
 	MUTEX_ENTER(&oifq->ifq_lock);
 
 	tqe->tqe_touched = ticks;
 	tqe->tqe_die = ticks + nifq->ifq_ttl;
 	/*
 	 * Is the operation here going to be a no-op ?
 	 */
 	if (oifq == nifq) {
 		if ((tqe->tqe_next == NULL) ||
 		    (tqe->tqe_next->tqe_die == tqe->tqe_die)) {
 			MUTEX_EXIT(&oifq->ifq_lock);
 			return;
 		}
 	}
 
 	/*
 	 * Remove from the old queue
 	 */
 	*tqe->tqe_pnext = tqe->tqe_next;
 	if (tqe->tqe_next)
 		tqe->tqe_next->tqe_pnext = tqe->tqe_pnext;
 	else
 		oifq->ifq_tail = tqe->tqe_pnext;
 	tqe->tqe_next = NULL;
 
 	/*
 	 * If we're moving from one queue to another, release the
 	 * lock on the old queue and get a lock on the new queue.
 	 * For user defined queues, if we're moving off it, call
 	 * delete in case it can now be freed.
 	 */
 	if (oifq != nifq) {
 		tqe->tqe_ifq = NULL;
 
 		(void) ipf_deletetimeoutqueue(oifq);
 
 		MUTEX_EXIT(&oifq->ifq_lock);
 
 		MUTEX_ENTER(&nifq->ifq_lock);
 
 		tqe->tqe_ifq = nifq;
 		nifq->ifq_ref++;
 	}
 
 	/*
 	 * Add to the bottom of the new queue
 	 */
 	tqe->tqe_pnext = nifq->ifq_tail;
 	*nifq->ifq_tail = tqe;
 	nifq->ifq_tail = &tqe->tqe_next;
 	MUTEX_EXIT(&nifq->ifq_lock);
 }
 
 
 /* ------------------------------------------------------------------------ */
 /* Function:    ipf_updateipid                                              */
 /* Returns:     int - 0 == success, -1 == error (packet should be droppped) */
 /* Parameters:  fin(I) - pointer to packet information                      */
 /*                                                                          */
 /* When we are doing NAT, change the IP of every packet to represent a      */
 /* single sequence of packets coming from the host, hiding any host         */
 /* specific sequencing that might otherwise be revealed.  If the packet is  */
 /* a fragment, then store the 'new' IPid in the fragment cache and look up  */
 /* the fragment cache for non-leading fragments.  If a non-leading fragment */
 /* has no match in the cache, return an error.                              */
 /* ------------------------------------------------------------------------ */
 static int
 ipf_updateipid(fin)
 	fr_info_t *fin;
 {
 	u_short id, ido, sums;
 	u_32_t sumd, sum;
 	ip_t *ip;
 
 	ip = fin->fin_ip;
 	ido = ntohs(ip->ip_id);
 	if (fin->fin_off != 0) {
 		sum = ipf_frag_ipidknown(fin);
 		if (sum == 0xffffffff)
 			return -1;
 		sum &= 0xffff;
 		id = (u_short)sum;
 		ip->ip_id = htons(id);
 	} else {
 		ip_fillid(ip);
 		id = ntohs(ip->ip_id);
 		if ((fin->fin_flx & FI_FRAG) != 0)
 			(void) ipf_frag_ipidnew(fin, (u_32_t)id);
 	}
 
 	if (id == ido)
 		return 0;
 	CALC_SUMD(ido, id, sumd);	/* DESTRUCTIVE MACRO! id,ido change */
 	sum = (~ntohs(ip->ip_sum)) & 0xffff;
 	sum += sumd;
 	sum = (sum >> 16) + (sum & 0xffff);
 	sum = (sum >> 16) + (sum & 0xffff);
 	sums = ~(u_short)sum;
 	ip->ip_sum = htons(sums);
 	return 0;
 }
 
 
 #ifdef	NEED_FRGETIFNAME
 /* ------------------------------------------------------------------------ */
 /* Function:    ipf_getifname                                               */
 /* Returns:     char *    - pointer to interface name                       */
 /* Parameters:  ifp(I)    - pointer to network interface                    */
 /*              buffer(O) - pointer to where to store interface name        */
 /*                                                                          */
 /* Constructs an interface name in the buffer passed.  The buffer passed is */
 /* expected to be at least LIFNAMSIZ in bytes big.  If buffer is passed in  */
 /* as a NULL pointer then return a pointer to a static array.               */
 /* ------------------------------------------------------------------------ */
 char *
 ipf_getifname(ifp, buffer)
 	struct ifnet *ifp;
 	char *buffer;
 {
 	static char namebuf[LIFNAMSIZ];
 # if defined(MENTAT) || defined(__FreeBSD__) || defined(__osf__) || \
      defined(__sgi) || defined(linux) || defined(_AIX51) || \
      (defined(sun) && !defined(__SVR4) && !defined(__svr4__))
 	int unit, space;
 	char temp[20];
 	char *s;
 # endif
 
 	if (buffer == NULL)
 		buffer = namebuf;
 	(void) strncpy(buffer, ifp->if_name, LIFNAMSIZ);
 	buffer[LIFNAMSIZ - 1] = '\0';
 # if defined(MENTAT) || defined(__FreeBSD__) || defined(__osf__) || \
      defined(__sgi) || defined(_AIX51) || \
      (defined(sun) && !defined(__SVR4) && !defined(__svr4__))
 	for (s = buffer; *s; s++)
 		;
 	unit = ifp->if_unit;
 	space = LIFNAMSIZ - (s - buffer);
 	if ((space > 0) && (unit >= 0)) {
 #  if defined(SNPRINTF) && defined(_KERNEL)
 		SNPRINTF(temp, sizeof(temp), "%d", unit);
 #  else
 		(void) sprintf(temp, "%d", unit);
 #  endif
 		(void) strncpy(s, temp, space);
 	}
 # endif
 	return buffer;
 }
 #endif
 
 
 /* ------------------------------------------------------------------------ */
 /* Function:    ipf_ioctlswitch                                             */
 /* Returns:     int     - -1 continue processing, else ioctl return value   */
 /* Parameters:  unit(I) - device unit opened                                */
 /*              data(I) - pointer to ioctl data                             */
 /*              cmd(I)  - ioctl command                                     */
 /*              mode(I) - mode value                                        */
 /*              uid(I)  - uid making the ioctl call                         */
 /*              ctx(I)  - pointer to context data                           */
 /*                                                                          */
 /* Based on the value of unit, call the appropriate ioctl handler or return */
 /* EIO if ipfilter is not running.   Also checks if write perms are req'd   */
 /* for the device in order to execute the ioctl.  A special case is made    */
 /* SIOCIPFINTERROR so that the same code isn't required in every handler.   */
 /* The context data pointer is passed through as this is used as the key    */
 /* for locating a matching token for continued access for walking lists,    */
 /* etc.                                                                     */
 /* ------------------------------------------------------------------------ */
 int
 ipf_ioctlswitch(softc, unit, data, cmd, mode, uid, ctx)
 	ipf_main_softc_t *softc;
 	int unit, mode, uid;
 	ioctlcmd_t cmd;
 	void *data, *ctx;
 {
 	int error = 0;
 
 	switch (cmd)
 	{
 	case SIOCIPFINTERROR :
 		error = BCOPYOUT(&softc->ipf_interror, data,
 				 sizeof(softc->ipf_interror));
 		if (error != 0) {
 			IPFERROR(40);
 			error = EFAULT;
 		}
 		return error;
 	default :
 		break;
 	}
 
 	switch (unit)
 	{
 	case IPL_LOGIPF :
 		error = ipf_ipf_ioctl(softc, data, cmd, mode, uid, ctx);
 		break;
 	case IPL_LOGNAT :
 		if (softc->ipf_running > 0) {
 			error = ipf_nat_ioctl(softc, data, cmd, mode,
 					      uid, ctx);
 		} else {
 			IPFERROR(42);
 			error = EIO;
 		}
 		break;
 	case IPL_LOGSTATE :
 		if (softc->ipf_running > 0) {
 			error = ipf_state_ioctl(softc, data, cmd, mode,
 						uid, ctx);
 		} else {
 			IPFERROR(43);
 			error = EIO;
 		}
 		break;
 	case IPL_LOGAUTH :
 		if (softc->ipf_running > 0) {
 			error = ipf_auth_ioctl(softc, data, cmd, mode,
 					       uid, ctx);
 		} else {
 			IPFERROR(44);
 			error = EIO;
 		}
 		break;
 	case IPL_LOGSYNC :
 		if (softc->ipf_running > 0) {
 			error = ipf_sync_ioctl(softc, data, cmd, mode,
 					       uid, ctx);
 		} else {
 			error = EIO;
 			IPFERROR(45);
 		}
 		break;
 	case IPL_LOGSCAN :
 #ifdef IPFILTER_SCAN
 		if (softc->ipf_running > 0)
 			error = ipf_scan_ioctl(softc, data, cmd, mode,
 					       uid, ctx);
 		else
 #endif
 		{
 			error = EIO;
 			IPFERROR(46);
 		}
 		break;
 	case IPL_LOGLOOKUP :
 		if (softc->ipf_running > 0) {
 			error = ipf_lookup_ioctl(softc, data, cmd, mode,
 						 uid, ctx);
 		} else {
 			error = EIO;
 			IPFERROR(47);
 		}
 		break;
 	default :
 		IPFERROR(48);
 		error = EIO;
 		break;
 	}
 
 	return error;
 }
 
 
 /*
  * This array defines the expected size of objects coming into the kernel
  * for the various recognised object types. The first column is flags (see
  * below), 2nd column is current size, 3rd column is the version number of
  * when the current size became current.
  * Flags:
  * 1 = minimum size, not absolute size
  */
 static	int	ipf_objbytes[IPFOBJ_COUNT][3] = {
 	{ 1,	sizeof(struct frentry),		5010000 },	/* 0 */
 	{ 1,	sizeof(struct friostat),	5010000 },
 	{ 0,	sizeof(struct fr_info),		5010000 },
 	{ 0,	sizeof(struct ipf_authstat),	4010100 },
 	{ 0,	sizeof(struct ipfrstat),	5010000 },
 	{ 1,	sizeof(struct ipnat),		5010000 },	/* 5 */
 	{ 0,	sizeof(struct natstat),		5010000 },
 	{ 0,	sizeof(struct ipstate_save),	5010000 },
 	{ 1,	sizeof(struct nat_save),	5010000 },
 	{ 0,	sizeof(struct natlookup),	5010000 },
 	{ 1,	sizeof(struct ipstate),		5010000 },	/* 10 */
 	{ 0,	sizeof(struct ips_stat),	5010000 },
 	{ 0,	sizeof(struct frauth),		5010000 },
 	{ 0,	sizeof(struct ipftune),		4010100 },
 	{ 0,	sizeof(struct nat),		5010000 },
 	{ 0,	sizeof(struct ipfruleiter),	4011400 },	/* 15 */
 	{ 0,	sizeof(struct ipfgeniter),	4011400 },
 	{ 0,	sizeof(struct ipftable),	4011400 },
 	{ 0,	sizeof(struct ipflookupiter),	4011400 },
 	{ 0,	sizeof(struct ipftq) * IPF_TCP_NSTATES },
 	{ 1,	0,				0	}, /* IPFEXPR */
 	{ 0,	0,				0	}, /* PROXYCTL */
 	{ 0,	sizeof (struct fripf),		5010000	}
 };
 
 
 /* ------------------------------------------------------------------------ */
 /* Function:    ipf_inobj                                                   */
 /* Returns:     int     - 0 = success, else failure                         */
 /* Parameters:  softc(I) - soft context pointerto work with                 */
 /*              data(I)  - pointer to ioctl data                            */
 /*              objp(O)  - where to store ipfobj structure                  */
 /*              ptr(I)   - pointer to data to copy out                      */
 /*              type(I)  - type of structure being moved                    */
 /*                                                                          */
 /* Copy in the contents of what the ipfobj_t points to.  In future, we      */
 /* add things to check for version numbers, sizes, etc, to make it backward */
 /* compatible at the ABI for user land.                                     */
 /* If objp is not NULL then we assume that the caller wants to see what is  */
 /* in the ipfobj_t structure being copied in. As an example, this can tell  */
 /* the caller what version of ipfilter the ioctl program was written to.    */
 /* ------------------------------------------------------------------------ */
 int
 ipf_inobj(softc, data, objp, ptr, type)
 	ipf_main_softc_t *softc;
 	void *data;
 	ipfobj_t *objp;
 	void *ptr;
 	int type;
 {
 	ipfobj_t obj;
 	int error;
 	int size;
 
 	if ((type < 0) || (type >= IPFOBJ_COUNT)) {
 		IPFERROR(49);
 		return EINVAL;
 	}
 
 	if (objp == NULL)
 		objp = &obj;
 	error = BCOPYIN(data, objp, sizeof(*objp));
 	if (error != 0) {
 		IPFERROR(124);
 		return EFAULT;
 	}
 
 	if (objp->ipfo_type != type) {
 		IPFERROR(50);
 		return EINVAL;
 	}
 
 	if (objp->ipfo_rev >= ipf_objbytes[type][2]) {
 		if ((ipf_objbytes[type][0] & 1) != 0) {
 			if (objp->ipfo_size < ipf_objbytes[type][1]) {
 				IPFERROR(51);
 				return EINVAL;
 			}
 			size =  ipf_objbytes[type][1];
 		} else if (objp->ipfo_size == ipf_objbytes[type][1]) {
 			size =  objp->ipfo_size;
 		} else {
 			IPFERROR(52);
 			return EINVAL;
 		}
 		error = COPYIN(objp->ipfo_ptr, ptr, size);
 		if (error != 0) {
 			IPFERROR(55);
 			error = EFAULT;
 		}
 	} else {
 #ifdef  IPFILTER_COMPAT
 		error = ipf_in_compat(softc, objp, ptr, 0);
 #else
 		IPFERROR(54);
 		error = EINVAL;
 #endif
 	}
 	return error;
 }
 
 
 /* ------------------------------------------------------------------------ */
 /* Function:    ipf_inobjsz                                                 */
 /* Returns:     int     - 0 = success, else failure                         */
 /* Parameters:  softc(I) - soft context pointerto work with                 */
 /*              data(I)  - pointer to ioctl data                            */
 /*              ptr(I)   - pointer to store real data in                    */
 /*              type(I)  - type of structure being moved                    */
 /*              sz(I)    - size of data to copy                             */
 /*                                                                          */
 /* As per ipf_inobj, except the size of the object to copy in is passed in  */
 /* but it must not be smaller than the size defined for the type and the    */
 /* type must allow for varied sized objects.  The extra requirement here is */
 /* that sz must match the size of the object being passed in - this is not  */
 /* not possible nor required in ipf_inobj().                                */
 /* ------------------------------------------------------------------------ */
 int
 ipf_inobjsz(softc, data, ptr, type, sz)
 	ipf_main_softc_t *softc;
 	void *data;
 	void *ptr;
 	int type, sz;
 {
 	ipfobj_t obj;
 	int error;
 
 	if ((type < 0) || (type >= IPFOBJ_COUNT)) {
 		IPFERROR(56);
 		return EINVAL;
 	}
 
 	error = BCOPYIN(data, &obj, sizeof(obj));
 	if (error != 0) {
 		IPFERROR(125);
 		return EFAULT;
 	}
 
 	if (obj.ipfo_type != type) {
 		IPFERROR(58);
 		return EINVAL;
 	}
 
 	if (obj.ipfo_rev >= ipf_objbytes[type][2]) {
 		if (((ipf_objbytes[type][0] & 1) == 0) ||
 		    (sz < ipf_objbytes[type][1])) {
 			IPFERROR(57);
 			return EINVAL;
 		}
 		error = COPYIN(obj.ipfo_ptr, ptr, sz);
 		if (error != 0) {
 			IPFERROR(61);
 			error = EFAULT;
 		}
 	} else {
 #ifdef	IPFILTER_COMPAT
 		error = ipf_in_compat(softc, &obj, ptr, sz);
 #else
 		IPFERROR(60);
 		error = EINVAL;
 #endif
 	}
 	return error;
 }
 
 
 /* ------------------------------------------------------------------------ */
 /* Function:    ipf_outobjsz                                                */
 /* Returns:     int     - 0 = success, else failure                         */
 /* Parameters:  data(I) - pointer to ioctl data                             */
 /*              ptr(I)  - pointer to store real data in                     */
 /*              type(I) - type of structure being moved                     */
 /*              sz(I)   - size of data to copy                              */
 /*                                                                          */
 /* As per ipf_outobj, except the size of the object to copy out is passed in*/
 /* but it must not be smaller than the size defined for the type and the    */
 /* type must allow for varied sized objects.  The extra requirement here is */
 /* that sz must match the size of the object being passed in - this is not  */
 /* not possible nor required in ipf_outobj().                               */
 /* ------------------------------------------------------------------------ */
 int
 ipf_outobjsz(softc, data, ptr, type, sz)
 	ipf_main_softc_t *softc;
 	void *data;
 	void *ptr;
 	int type, sz;
 {
 	ipfobj_t obj;
 	int error;
 
 	if ((type < 0) || (type >= IPFOBJ_COUNT)) {
 		IPFERROR(62);
 		return EINVAL;
 	}
 
 	error = BCOPYIN(data, &obj, sizeof(obj));
 	if (error != 0) {
 		IPFERROR(127);
 		return EFAULT;
 	}
 
 	if (obj.ipfo_type != type) {
 		IPFERROR(63);
 		return EINVAL;
 	}
 
 	if (obj.ipfo_rev >= ipf_objbytes[type][2]) {
 		if (((ipf_objbytes[type][0] & 1) == 0) ||
 		    (sz < ipf_objbytes[type][1])) {
 			IPFERROR(146);
 			return EINVAL;
 		}
 		error = COPYOUT(ptr, obj.ipfo_ptr, sz);
 		if (error != 0) {
 			IPFERROR(66);
 			error = EFAULT;
 		}
 	} else {
 #ifdef	IPFILTER_COMPAT
 		error = ipf_out_compat(softc, &obj, ptr);
 #else
 		IPFERROR(65);
 		error = EINVAL;
 #endif
 	}
 	return error;
 }
 
 
 /* ------------------------------------------------------------------------ */
 /* Function:    ipf_outobj                                                  */
 /* Returns:     int     - 0 = success, else failure                         */
 /* Parameters:  data(I) - pointer to ioctl data                             */
 /*              ptr(I)  - pointer to store real data in                     */
 /*              type(I) - type of structure being moved                     */
 /*                                                                          */
 /* Copy out the contents of what ptr is to where ipfobj points to.  In      */
 /* future, we add things to check for version numbers, sizes, etc, to make  */
 /* it backward  compatible at the ABI for user land.                        */
 /* ------------------------------------------------------------------------ */
 int
 ipf_outobj(softc, data, ptr, type)
 	ipf_main_softc_t *softc;
 	void *data;
 	void *ptr;
 	int type;
 {
 	ipfobj_t obj;
 	int error;
 
 	if ((type < 0) || (type >= IPFOBJ_COUNT)) {
 		IPFERROR(67);
 		return EINVAL;
 	}
 
 	error = BCOPYIN(data, &obj, sizeof(obj));
 	if (error != 0) {
 		IPFERROR(126);
 		return EFAULT;
 	}
 
 	if (obj.ipfo_type != type) {
 		IPFERROR(68);
 		return EINVAL;
 	}
 
 	if (obj.ipfo_rev >= ipf_objbytes[type][2]) {
 		if ((ipf_objbytes[type][0] & 1) != 0) {
 			if (obj.ipfo_size < ipf_objbytes[type][1]) {
 				IPFERROR(69);
 				return EINVAL;
 			}
 		} else if (obj.ipfo_size != ipf_objbytes[type][1]) {
 			IPFERROR(70);
 			return EINVAL;
 		}
 
 		error = COPYOUT(ptr, obj.ipfo_ptr, obj.ipfo_size);
 		if (error != 0) {
 			IPFERROR(73);
 			error = EFAULT;
 		}
 	} else {
 #ifdef	IPFILTER_COMPAT
 		error = ipf_out_compat(softc, &obj, ptr);
 #else
 		IPFERROR(72);
 		error = EINVAL;
 #endif
 	}
 	return error;
 }
 
 
 /* ------------------------------------------------------------------------ */
 /* Function:    ipf_outobjk                                                 */
 /* Returns:     int     - 0 = success, else failure                         */
 /* Parameters:  obj(I)  - pointer to data description structure             */
 /*              ptr(I)  - pointer to kernel data to copy out                */
 /*                                                                          */
 /* In the above functions, the ipfobj_t structure is copied into the kernel,*/
 /* telling ipfilter how to copy out data. In this instance, the ipfobj_t is */
 /* already populated with information and now we just need to use it.       */
 /* There is no need for this function to have a "type" parameter as there   */
 /* is no point in validating information that comes from the kernel with    */
 /* itself.                                                                  */
 /* ------------------------------------------------------------------------ */
 int
 ipf_outobjk(softc, obj, ptr)
 	ipf_main_softc_t *softc;
 	ipfobj_t *obj;
 	void *ptr;
 {
 	int type = obj->ipfo_type;
 	int error;
 
 	if ((type < 0) || (type >= IPFOBJ_COUNT)) {
 		IPFERROR(147);
 		return EINVAL;
 	}
 
 	if (obj->ipfo_rev >= ipf_objbytes[type][2]) {
 		if ((ipf_objbytes[type][0] & 1) != 0) {
 			if (obj->ipfo_size < ipf_objbytes[type][1]) {
 				IPFERROR(148);
 				return EINVAL;
 			}
 
 		} else if (obj->ipfo_size != ipf_objbytes[type][1]) {
 			IPFERROR(149);
 			return EINVAL;
 		}
 
 		error = COPYOUT(ptr, obj->ipfo_ptr, obj->ipfo_size);
 		if (error != 0) {
 			IPFERROR(150);
 			error = EFAULT;
 		}
 	} else {
 #ifdef  IPFILTER_COMPAT
 		error = ipf_out_compat(softc, obj, ptr);
 #else
 		IPFERROR(151);
 		error = EINVAL;
 #endif
 	}
 	return error;
 }
 
 
 /* ------------------------------------------------------------------------ */
 /* Function:    ipf_checkl4sum                                              */
 /* Returns:     int     - 0 = good, -1 = bad, 1 = cannot check              */
 /* Parameters:  fin(I) - pointer to packet information                      */
 /*                                                                          */
 /* If possible, calculate the layer 4 checksum for the packet.  If this is  */
 /* not possible, return without indicating a failure or success but in a    */
 /* way that is ditinguishable. This function should only be called by the   */
 /* ipf_checkv6sum() for each platform.                                      */
 /* ------------------------------------------------------------------------ */
 INLINE int
 ipf_checkl4sum(fin)
 	fr_info_t *fin;
 {
 	u_short sum, hdrsum, *csump;
 	udphdr_t *udp;
 	int dosum;
 
 	/*
 	 * If the TCP packet isn't a fragment, isn't too short and otherwise
 	 * isn't already considered "bad", then validate the checksum.  If
 	 * this check fails then considered the packet to be "bad".
 	 */
 	if ((fin->fin_flx & (FI_FRAG|FI_SHORT|FI_BAD)) != 0)
 		return 1;
 
 	csump = NULL;
 	hdrsum = 0;
 	dosum = 0;
 	sum = 0;
 
 	switch (fin->fin_p)
 	{
 	case IPPROTO_TCP :
 		csump = &((tcphdr_t *)fin->fin_dp)->th_sum;
 		dosum = 1;
 		break;
 
 	case IPPROTO_UDP :
 		udp = fin->fin_dp;
 		if (udp->uh_sum != 0) {
 			csump = &udp->uh_sum;
 			dosum = 1;
 		}
 		break;
 
 #ifdef USE_INET6
 	case IPPROTO_ICMPV6 :
 		csump = &((struct icmp6_hdr *)fin->fin_dp)->icmp6_cksum;
 		dosum = 1;
 		break;
 #endif
 
 	case IPPROTO_ICMP :
 		csump = &((struct icmp *)fin->fin_dp)->icmp_cksum;
 		dosum = 1;
 		break;
 
 	default :
 		return 1;
 		/*NOTREACHED*/
 	}
 
 	if (csump != NULL)
 		hdrsum = *csump;
 
 	if (dosum) {
 		sum = fr_cksum(fin, fin->fin_ip, fin->fin_p, fin->fin_dp);
 	}
 #if !defined(_KERNEL)
 	if (sum == hdrsum) {
 		FR_DEBUG(("checkl4sum: %hx == %hx\n", sum, hdrsum));
 	} else {
 		FR_DEBUG(("checkl4sum: %hx != %hx\n", sum, hdrsum));
 	}
 #endif
 	DT2(l4sums, u_short, hdrsum, u_short, sum);
 	if (hdrsum == sum) {
 		fin->fin_cksum = FI_CK_SUMOK;
 		return 0;
 	}
 	fin->fin_cksum = FI_CK_BAD;
 	return -1;
 }
 
 
 /* ------------------------------------------------------------------------ */
 /* Function:    ipf_ifpfillv4addr                                           */
 /* Returns:     int     - 0 = address update, -1 = address not updated      */
 /* Parameters:  atype(I)   - type of network address update to perform      */
 /*              sin(I)     - pointer to source of address information       */
 /*              mask(I)    - pointer to source of netmask information       */
 /*              inp(I)     - pointer to destination address store           */
 /*              inpmask(I) - pointer to destination netmask store           */
 /*                                                                          */
 /* Given a type of network address update (atype) to perform, copy          */
 /* information from sin/mask into inp/inpmask.  If ipnmask is NULL then no  */
 /* netmask update is performed unless FRI_NETMASKED is passed as atype, in  */
 /* which case the operation fails.  For all values of atype other than      */
 /* FRI_NETMASKED, if inpmask is non-NULL then the mask is set to an all 1s  */
 /* value.                                                                   */
 /* ------------------------------------------------------------------------ */
 int
 ipf_ifpfillv4addr(atype, sin, mask, inp, inpmask)
 	int atype;
 	struct sockaddr_in *sin, *mask;
 	struct in_addr *inp, *inpmask;
 {
 	if (inpmask != NULL && atype != FRI_NETMASKED)
 		inpmask->s_addr = 0xffffffff;
 
 	if (atype == FRI_NETWORK || atype == FRI_NETMASKED) {
 		if (atype == FRI_NETMASKED) {
 			if (inpmask == NULL)
 				return -1;
 			inpmask->s_addr = mask->sin_addr.s_addr;
 		}
 		inp->s_addr = sin->sin_addr.s_addr & mask->sin_addr.s_addr;
 	} else {
 		inp->s_addr = sin->sin_addr.s_addr;
 	}
 	return 0;
 }
 
 
 #ifdef	USE_INET6
 /* ------------------------------------------------------------------------ */
 /* Function:    ipf_ifpfillv6addr                                           */
 /* Returns:     int     - 0 = address update, -1 = address not updated      */
 /* Parameters:  atype(I)   - type of network address update to perform      */
 /*              sin(I)     - pointer to source of address information       */
 /*              mask(I)    - pointer to source of netmask information       */
 /*              inp(I)     - pointer to destination address store           */
 /*              inpmask(I) - pointer to destination netmask store           */
 /*                                                                          */
 /* Given a type of network address update (atype) to perform, copy          */
 /* information from sin/mask into inp/inpmask.  If ipnmask is NULL then no  */
 /* netmask update is performed unless FRI_NETMASKED is passed as atype, in  */
 /* which case the operation fails.  For all values of atype other than      */
 /* FRI_NETMASKED, if inpmask is non-NULL then the mask is set to an all 1s  */
 /* value.                                                                   */
 /* ------------------------------------------------------------------------ */
 int
 ipf_ifpfillv6addr(atype, sin, mask, inp, inpmask)
 	int atype;
 	struct sockaddr_in6 *sin, *mask;
 	i6addr_t *inp, *inpmask;
 {
 	i6addr_t *src, *and;
 
 	src = (i6addr_t *)&sin->sin6_addr;
 	and = (i6addr_t *)&mask->sin6_addr;
 
 	if (inpmask != NULL && atype != FRI_NETMASKED) {
 		inpmask->i6[0] = 0xffffffff;
 		inpmask->i6[1] = 0xffffffff;
 		inpmask->i6[2] = 0xffffffff;
 		inpmask->i6[3] = 0xffffffff;
 	}
 
 	if (atype == FRI_NETWORK || atype == FRI_NETMASKED) {
 		if (atype == FRI_NETMASKED) {
 			if (inpmask == NULL)
 				return -1;
 			inpmask->i6[0] = and->i6[0];
 			inpmask->i6[1] = and->i6[1];
 			inpmask->i6[2] = and->i6[2];
 			inpmask->i6[3] = and->i6[3];
 		}
 
 		inp->i6[0] = src->i6[0] & and->i6[0];
 		inp->i6[1] = src->i6[1] & and->i6[1];
 		inp->i6[2] = src->i6[2] & and->i6[2];
 		inp->i6[3] = src->i6[3] & and->i6[3];
 	} else {
 		inp->i6[0] = src->i6[0];
 		inp->i6[1] = src->i6[1];
 		inp->i6[2] = src->i6[2];
 		inp->i6[3] = src->i6[3];
 	}
 	return 0;
 }
 #endif
 
 
 /* ------------------------------------------------------------------------ */
 /* Function:    ipf_matchtag                                                */
 /* Returns:     0 == mismatch, 1 == match.                                  */
 /* Parameters:  tag1(I) - pointer to first tag to compare                   */
 /*              tag2(I) - pointer to second tag to compare                  */
 /*                                                                          */
 /* Returns true (non-zero) or false(0) if the two tag structures can be     */
 /* considered to be a match or not match, respectively.  The tag is 16      */
 /* bytes long (16 characters) but that is overlayed with 4 32bit ints so    */
 /* compare the ints instead, for speed. tag1 is the master of the           */
 /* comparison.  This function should only be called with both tag1 and tag2 */
 /* as non-NULL pointers.                                                    */
 /* ------------------------------------------------------------------------ */
 int
 ipf_matchtag(tag1, tag2)
 	ipftag_t *tag1, *tag2;
 {
 	if (tag1 == tag2)
 		return 1;
 
 	if ((tag1->ipt_num[0] == 0) && (tag2->ipt_num[0] == 0))
 		return 1;
 
 	if ((tag1->ipt_num[0] == tag2->ipt_num[0]) &&
 	    (tag1->ipt_num[1] == tag2->ipt_num[1]) &&
 	    (tag1->ipt_num[2] == tag2->ipt_num[2]) &&
 	    (tag1->ipt_num[3] == tag2->ipt_num[3]))
 		return 1;
 	return 0;
 }
 
 
 /* ------------------------------------------------------------------------ */
 /* Function:    ipf_coalesce                                                */
 /* Returns:     1 == success, -1 == failure, 0 == no change                 */
 /* Parameters:  fin(I) - pointer to packet information                      */
 /*                                                                          */
 /* Attempt to get all of the packet data into a single, contiguous buffer.  */
 /* If this call returns a failure then the buffers have also been freed.    */
 /* ------------------------------------------------------------------------ */
 int
 ipf_coalesce(fin)
 	fr_info_t *fin;
 {
 
 	if ((fin->fin_flx & FI_COALESCE) != 0)
 		return 1;
 
 	/*
 	 * If the mbuf pointers indicate that there is no mbuf to work with,
 	 * return but do not indicate success or failure.
 	 */
 	if (fin->fin_m == NULL || fin->fin_mp == NULL)
 		return 0;
 
 #if defined(_KERNEL)
 	if (ipf_pullup(fin->fin_m, fin, fin->fin_plen) == NULL) {
 		ipf_main_softc_t *softc = fin->fin_main_soft;
 
 		DT1(frb_coalesce, fr_info_t *, fin);
 		LBUMP(ipf_stats[fin->fin_out].fr_badcoalesces);
 # ifdef MENTAT
 		FREE_MB_T(*fin->fin_mp);
 # endif
 		fin->fin_reason = FRB_COALESCE;
 		*fin->fin_mp = NULL;
 		fin->fin_m = NULL;
 		return -1;
 	}
 #else
 	fin = fin;	/* LINT */
 #endif
 	return 1;
 }
 
 
 /*
  * The following table lists all of the tunable variables that can be
  * accessed via SIOCIPFGET/SIOCIPFSET/SIOCIPFGETNEXt.  The format of each row
  * in the table below is as follows:
  *
  * pointer to value, name of value, minimum, maximum, size of the value's
  *     container, value attribute flags
  *
  * For convienience, IPFT_RDONLY means the value is read-only, IPFT_WRDISABLED
  * means the value can only be written to when IPFilter is loaded but disabled.
  * The obvious implication is if neither of these are set then the value can be
  * changed at any time without harm.
  */
 
 
 /* ------------------------------------------------------------------------ */
 /* Function:    ipf_tune_findbycookie                                       */
 /* Returns:     NULL = search failed, else pointer to tune struct           */
 /* Parameters:  cookie(I) - cookie value to search for amongst tuneables    */
 /*              next(O)   - pointer to place to store the cookie for the    */
 /*                          "next" tuneable, if it is desired.              */
 /*                                                                          */
 /* This function is used to walk through all of the existing tunables with  */
 /* successive calls.  It searches the known tunables for the one which has  */
 /* a matching value for "cookie" - ie its address.  When returning a match, */
 /* the next one to be found may be returned inside next.                    */
 /* ------------------------------------------------------------------------ */
 static ipftuneable_t *
 ipf_tune_findbycookie(ptop, cookie, next)
 	ipftuneable_t **ptop;
 	void *cookie, **next;
 {
 	ipftuneable_t *ta, **tap;
 
 	for (ta = *ptop; ta->ipft_name != NULL; ta++)
 		if (ta == cookie) {
 			if (next != NULL) {
 				/*
 				 * If the next entry in the array has a name
 				 * present, then return a pointer to it for
 				 * where to go next, else return a pointer to
 				 * the dynaminc list as a key to search there
 				 * next.  This facilitates a weak linking of
 				 * the two "lists" together.
 				 */
 				if ((ta + 1)->ipft_name != NULL)
 					*next = ta + 1;
 				else
 					*next = ptop;
 			}
 			return ta;
 		}
 
 	for (tap = ptop; (ta = *tap) != NULL; tap = &ta->ipft_next)
 		if (tap == cookie) {
 			if (next != NULL)
 				*next = &ta->ipft_next;
 			return ta;
 		}
 
 	if (next != NULL)
 		*next = NULL;
 	return NULL;
 }
 
 
 /* ------------------------------------------------------------------------ */
 /* Function:    ipf_tune_findbyname                                         */
 /* Returns:     NULL = search failed, else pointer to tune struct           */
 /* Parameters:  name(I) - name of the tuneable entry to find.               */
 /*                                                                          */
 /* Search the static array of tuneables and the list of dynamic tuneables   */
 /* for an entry with a matching name.  If we can find one, return a pointer */
 /* to the matching structure.                                               */
 /* ------------------------------------------------------------------------ */
 static ipftuneable_t *
 ipf_tune_findbyname(top, name)
 	ipftuneable_t *top;
 	const char *name;
 {
 	ipftuneable_t *ta;
 
 	for (ta = top; ta != NULL; ta = ta->ipft_next)
 		if (!strcmp(ta->ipft_name, name)) {
 			return ta;
 		}
 
 	return NULL;
 }
 
 
 /* ------------------------------------------------------------------------ */
 /* Function:    ipf_tune_add_array                                          */
 /* Returns:     int - 0 == success, else failure                            */
 /* Parameters:  newtune - pointer to new tune array to add to tuneables     */
 /*                                                                          */
 /* Appends tune structures from the array passed in (newtune) to the end of */
 /* the current list of "dynamic" tuneable parameters.                       */
 /* If any entry to be added is already present (by name) then the operation */
 /* is aborted - entries that have been added are removed before returning.  */
 /* An entry with no name (NULL) is used as the indication that the end of   */
 /* the array has been reached.                                              */
 /* ------------------------------------------------------------------------ */
 int
 ipf_tune_add_array(softc, newtune)
 	ipf_main_softc_t *softc;
 	ipftuneable_t *newtune;
 {
 	ipftuneable_t *nt, *dt;
 	int error = 0;
 
 	for (nt = newtune; nt->ipft_name != NULL; nt++) {
 		error = ipf_tune_add(softc, nt);
 		if (error != 0) {
 			for (dt = newtune; dt != nt; dt++) {
 				(void) ipf_tune_del(softc, dt);
 			}
 		}
 	}
 
 	return error;
 }
 
 
 /* ------------------------------------------------------------------------ */
 /* Function:    ipf_tune_array_link                                         */
 /* Returns:     0 == success, -1 == failure                                 */
 /* Parameters:  softc(I) - soft context pointerto work with                 */
 /*              array(I) - pointer to an array of tuneables                 */
 /*                                                                          */
 /* Given an array of tunables (array), append them to the current list of   */
 /* tuneables for this context (softc->ipf_tuners.) To properly prepare the  */
 /* the array for being appended to the list, initialise all of the next     */
 /* pointers so we don't need to walk parts of it with ++ and others with    */
 /* next. The array is expected to have an entry with a NULL name as the     */
 /* terminator. Trying to add an array with no non-NULL names will return as */
 /* a failure.                                                               */
 /* ------------------------------------------------------------------------ */
 int
 ipf_tune_array_link(softc, array)
 	ipf_main_softc_t *softc;
 	ipftuneable_t *array;
 {
 	ipftuneable_t *t, **p;
 
 	t = array;
 	if (t->ipft_name == NULL)
 		return -1;
 
 	for (; t[1].ipft_name != NULL; t++)
 		t[0].ipft_next = &t[1];
 	t->ipft_next = NULL;
 
 	/*
 	 * Since a pointer to the last entry isn't kept, we need to find it
 	 * each time we want to add new variables to the list.
 	 */
 	for (p = &softc->ipf_tuners; (t = *p) != NULL; p = &t->ipft_next)
 		if (t->ipft_name == NULL)
 			break;
 	*p = array;
 
 	return 0;
 }
 
 
 /* ------------------------------------------------------------------------ */
 /* Function:    ipf_tune_array_unlink                                       */
 /* Returns:     0 == success, -1 == failure                                 */
 /* Parameters:  softc(I) - soft context pointerto work with                 */
 /*              array(I) - pointer to an array of tuneables                 */
 /*                                                                          */
 /* ------------------------------------------------------------------------ */
 int
 ipf_tune_array_unlink(softc, array)
 	ipf_main_softc_t *softc;
 	ipftuneable_t *array;
 {
 	ipftuneable_t *t, **p;
 
 	for (p = &softc->ipf_tuners; (t = *p) != NULL; p = &t->ipft_next)
 		if (t == array)
 			break;
 	if (t == NULL)
 		return -1;
 
 	for (; t[1].ipft_name != NULL; t++)
 		;
 
 	*p = t->ipft_next;
 
 	return 0;
 }
 
 
 /* ------------------------------------------------------------------------ */
 /* Function:   ipf_tune_array_copy                                          */
 /* Returns:    NULL = failure, else pointer to new array                    */
 /* Parameters: base(I)     - pointer to structure base                      */
 /*             size(I)     - size of the array at template                  */
 /*             template(I) - original array to copy                         */
 /*                                                                          */
 /* Allocate memory for a new set of tuneable values and copy everything     */
 /* from template into the new region of memory.  The new region is full of  */
 /* uninitialised pointers (ipft_next) so set them up.  Now, ipftp_offset... */
 /*                                                                          */
 /* NOTE: the following assumes that sizeof(long) == sizeof(void *)          */
 /* In the array template, ipftp_offset is the offset (in bytes) of the      */
 /* location of the tuneable value inside the structure pointed to by base.  */
 /* As ipftp_offset is a union over the pointers to the tuneable values, if  */
 /* we add base to the copy's ipftp_offset, copy ends up with a pointer in   */
 /* ipftp_void that points to the stored value.                              */
 /* ------------------------------------------------------------------------ */
 ipftuneable_t *
 ipf_tune_array_copy(base, size, template)
 	void *base;
 	size_t size;
 	ipftuneable_t *template;
 {
 	ipftuneable_t *copy;
 	int i;
 
 
 	KMALLOCS(copy, ipftuneable_t *, size);
 	if (copy == NULL) {
 		return NULL;
 	}
 	bcopy(template, copy, size);
 
 	for (i = 0; copy[i].ipft_name; i++) {
 		copy[i].ipft_una.ipftp_offset += (u_long)base;
 		copy[i].ipft_next = copy + i + 1;
 	}
 
 	return copy;
 }
 
 
 /* ------------------------------------------------------------------------ */
 /* Function:    ipf_tune_add                                                */
 /* Returns:     int - 0 == success, else failure                            */
 /* Parameters:  newtune - pointer to new tune entry to add to tuneables     */
 /*                                                                          */
 /* Appends tune structures from the array passed in (newtune) to the end of */
 /* the current list of "dynamic" tuneable parameters.  Once added, the      */
 /* owner of the object is not expected to ever change "ipft_next".          */
 /* ------------------------------------------------------------------------ */
 int
 ipf_tune_add(softc, newtune)
 	ipf_main_softc_t *softc;
 	ipftuneable_t *newtune;
 {
 	ipftuneable_t *ta, **tap;
 
 	ta = ipf_tune_findbyname(softc->ipf_tuners, newtune->ipft_name);
 	if (ta != NULL) {
 		IPFERROR(74);
 		return EEXIST;
 	}
 
 	for (tap = &softc->ipf_tuners; *tap != NULL; tap = &(*tap)->ipft_next)
 		;
 
 	newtune->ipft_next = NULL;
 	*tap = newtune;
 	return 0;
 }
 
 
 /* ------------------------------------------------------------------------ */
 /* Function:    ipf_tune_del                                                */
 /* Returns:     int - 0 == success, else failure                            */
 /* Parameters:  oldtune - pointer to tune entry to remove from the list of  */
 /*                        current dynamic tuneables                         */
 /*                                                                          */
 /* Search for the tune structure, by pointer, in the list of those that are */
 /* dynamically added at run time.  If found, adjust the list so that this   */
 /* structure is no longer part of it.                                       */
 /* ------------------------------------------------------------------------ */
 int
 ipf_tune_del(softc, oldtune)
 	ipf_main_softc_t *softc;
 	ipftuneable_t *oldtune;
 {
 	ipftuneable_t *ta, **tap;
 	int error = 0;
 
 	for (tap = &softc->ipf_tuners; (ta = *tap) != NULL;
 	     tap = &ta->ipft_next) {
 		if (ta == oldtune) {
 			*tap = oldtune->ipft_next;
 			oldtune->ipft_next = NULL;
 			break;
 		}
 	}
 
 	if (ta == NULL) {
 		error = ESRCH;
 		IPFERROR(75);
 	}
 	return error;
 }
 
 
 /* ------------------------------------------------------------------------ */
 /* Function:    ipf_tune_del_array                                          */
 /* Returns:     int - 0 == success, else failure                            */
 /* Parameters:  oldtune - pointer to tuneables array                        */
 /*                                                                          */
 /* Remove each tuneable entry in the array from the list of "dynamic"       */
 /* tunables.  If one entry should fail to be found, an error will be        */
 /* returned and no further ones removed.                                    */
 /* An entry with a NULL name is used as the indicator of the last entry in  */
 /* the array.                                                               */
 /* ------------------------------------------------------------------------ */
 int
 ipf_tune_del_array(softc, oldtune)
 	ipf_main_softc_t *softc;
 	ipftuneable_t *oldtune;
 {
 	ipftuneable_t *ot;
 	int error = 0;
 
 	for (ot = oldtune; ot->ipft_name != NULL; ot++) {
 		error = ipf_tune_del(softc, ot);
 		if (error != 0)
 			break;
 	}
 
 	return error;
 
 }
 
 
 /* ------------------------------------------------------------------------ */
 /* Function:    ipf_tune                                                    */
 /* Returns:     int - 0 == success, else failure                            */
 /* Parameters:  cmd(I)  - ioctl command number                              */
 /*              data(I) - pointer to ioctl data structure                   */
 /*                                                                          */
 /* Implement handling of SIOCIPFGETNEXT, SIOCIPFGET and SIOCIPFSET.  These  */
 /* three ioctls provide the means to access and control global variables    */
 /* within IPFilter, allowing (for example) timeouts and table sizes to be   */
 /* changed without rebooting, reloading or recompiling.  The initialisation */
 /* and 'destruction' routines of the various components of ipfilter are all */
 /* each responsible for handling their own values being too big.            */
 /* ------------------------------------------------------------------------ */
 int
 ipf_ipftune(softc, cmd, data)
 	ipf_main_softc_t *softc;
 	ioctlcmd_t cmd;
 	void *data;
 {
 	ipftuneable_t *ta;
 	ipftune_t tu;
 	void *cookie;
 	int error;
 
 	error = ipf_inobj(softc, data, NULL, &tu, IPFOBJ_TUNEABLE);
 	if (error != 0)
 		return error;
 
 	tu.ipft_name[sizeof(tu.ipft_name) - 1] = '\0';
 	cookie = tu.ipft_cookie;
 	ta = NULL;
 
 	switch (cmd)
 	{
 	case SIOCIPFGETNEXT :
 		/*
 		 * If cookie is non-NULL, assume it to be a pointer to the last
 		 * entry we looked at, so find it (if possible) and return a
 		 * pointer to the next one after it.  The last entry in the
 		 * the table is a NULL entry, so when we get to it, set cookie
 		 * to NULL and return that, indicating end of list, erstwhile
 		 * if we come in with cookie set to NULL, we are starting anew
 		 * at the front of the list.
 		 */
 		if (cookie != NULL) {
 			ta = ipf_tune_findbycookie(&softc->ipf_tuners,
 						   cookie, &tu.ipft_cookie);
 		} else {
 			ta = softc->ipf_tuners;
 			tu.ipft_cookie = ta + 1;
 		}
 		if (ta != NULL) {
 			/*
 			 * Entry found, but does the data pointed to by that
 			 * row fit in what we can return?
 			 */
 			if (ta->ipft_sz > sizeof(tu.ipft_un)) {
 				IPFERROR(76);
 				return EINVAL;
 			}
 
 			tu.ipft_vlong = 0;
 			if (ta->ipft_sz == sizeof(u_long))
 				tu.ipft_vlong = *ta->ipft_plong;
 			else if (ta->ipft_sz == sizeof(u_int))
 				tu.ipft_vint = *ta->ipft_pint;
 			else if (ta->ipft_sz == sizeof(u_short))
 				tu.ipft_vshort = *ta->ipft_pshort;
 			else if (ta->ipft_sz == sizeof(u_char))
 				tu.ipft_vchar = *ta->ipft_pchar;
 
 			tu.ipft_sz = ta->ipft_sz;
 			tu.ipft_min = ta->ipft_min;
 			tu.ipft_max = ta->ipft_max;
 			tu.ipft_flags = ta->ipft_flags;
 			bcopy(ta->ipft_name, tu.ipft_name,
 			      MIN(sizeof(tu.ipft_name),
 				  strlen(ta->ipft_name) + 1));
 		}
 		error = ipf_outobj(softc, data, &tu, IPFOBJ_TUNEABLE);
 		break;
 
 	case SIOCIPFGET :
 	case SIOCIPFSET :
 		/*
 		 * Search by name or by cookie value for a particular entry
 		 * in the tuning paramter table.
 		 */
 		IPFERROR(77);
 		error = ESRCH;
 		if (cookie != NULL) {
 			ta = ipf_tune_findbycookie(&softc->ipf_tuners,
 						   cookie, NULL);
 			if (ta != NULL)
 				error = 0;
 		} else if (tu.ipft_name[0] != '\0') {
 			ta = ipf_tune_findbyname(softc->ipf_tuners,
 						 tu.ipft_name);
 			if (ta != NULL)
 				error = 0;
 		}
 		if (error != 0)
 			break;
 
 		if (cmd == (ioctlcmd_t)SIOCIPFGET) {
 			/*
 			 * Fetch the tuning parameters for a particular value
 			 */
 			tu.ipft_vlong = 0;
 			if (ta->ipft_sz == sizeof(u_long))
 				tu.ipft_vlong = *ta->ipft_plong;
 			else if (ta->ipft_sz == sizeof(u_int))
 				tu.ipft_vint = *ta->ipft_pint;
 			else if (ta->ipft_sz == sizeof(u_short))
 				tu.ipft_vshort = *ta->ipft_pshort;
 			else if (ta->ipft_sz == sizeof(u_char))
 				tu.ipft_vchar = *ta->ipft_pchar;
 			tu.ipft_cookie = ta;
 			tu.ipft_sz = ta->ipft_sz;
 			tu.ipft_min = ta->ipft_min;
 			tu.ipft_max = ta->ipft_max;
 			tu.ipft_flags = ta->ipft_flags;
 			error = ipf_outobj(softc, data, &tu, IPFOBJ_TUNEABLE);
 
 		} else if (cmd == (ioctlcmd_t)SIOCIPFSET) {
 			/*
 			 * Set an internal parameter.  The hard part here is
 			 * getting the new value safely and correctly out of
 			 * the kernel (given we only know its size, not type.)
 			 */
 			u_long in;
 
 			if (((ta->ipft_flags & IPFT_WRDISABLED) != 0) &&
 			    (softc->ipf_running > 0)) {
 				IPFERROR(78);
 				error = EBUSY;
 				break;
 			}
 
 			in = tu.ipft_vlong;
 			if (in < ta->ipft_min || in > ta->ipft_max) {
 				IPFERROR(79);
 				error = EINVAL;
 				break;
 			}
 
 			if (ta->ipft_func != NULL) {
 				SPL_INT(s);
 
 				SPL_NET(s);
 				error = (*ta->ipft_func)(softc, ta,
 							 &tu.ipft_un);
 				SPL_X(s);
 
 			} else if (ta->ipft_sz == sizeof(u_long)) {
 				tu.ipft_vlong = *ta->ipft_plong;
 				*ta->ipft_plong = in;
 
 			} else if (ta->ipft_sz == sizeof(u_int)) {
 				tu.ipft_vint = *ta->ipft_pint;
 				*ta->ipft_pint = (u_int)(in & 0xffffffff);
 
 			} else if (ta->ipft_sz == sizeof(u_short)) {
 				tu.ipft_vshort = *ta->ipft_pshort;
 				*ta->ipft_pshort = (u_short)(in & 0xffff);
 
 			} else if (ta->ipft_sz == sizeof(u_char)) {
 				tu.ipft_vchar = *ta->ipft_pchar;
 				*ta->ipft_pchar = (u_char)(in & 0xff);
 			}
 			error = ipf_outobj(softc, data, &tu, IPFOBJ_TUNEABLE);
 		}
 		break;
 
 	default :
 		IPFERROR(80);
 		error = EINVAL;
 		break;
 	}
 
 	return error;
 }
 
 
 /* ------------------------------------------------------------------------ */
 /* Function:    ipf_zerostats                                               */
 /* Returns:     int - 0 = success, else failure                             */
 /* Parameters:  data(O) - pointer to pointer for copying data back to       */
 /*                                                                          */
 /* Copies the current statistics out to userspace and then zero's the       */
 /* current ones in the kernel. The lock is only held across the bzero() as  */
 /* the copyout may result in paging (ie network activity.)                  */
 /* ------------------------------------------------------------------------ */
 int
 ipf_zerostats(softc, data)
 	ipf_main_softc_t *softc;
 	caddr_t	data;
 {
 	friostat_t fio;
 	ipfobj_t obj;
 	int error;
 
 	error = ipf_inobj(softc, data, &obj, &fio, IPFOBJ_IPFSTAT);
 	if (error != 0)
 		return error;
 	ipf_getstat(softc, &fio, obj.ipfo_rev);
 	error = ipf_outobj(softc, data, &fio, IPFOBJ_IPFSTAT);
 	if (error != 0)
 		return error;
 
 	WRITE_ENTER(&softc->ipf_mutex);
 	bzero(&softc->ipf_stats, sizeof(softc->ipf_stats));
 	RWLOCK_EXIT(&softc->ipf_mutex);
 
 	return 0;
 }
 
 
 /* ------------------------------------------------------------------------ */
 /* Function:    ipf_resolvedest                                             */
 /* Returns:     Nil                                                         */
 /* Parameters:  softc(I) - pointer to soft context main structure           */
 /*              base(I)  - where strings are stored                         */
 /*              fdp(IO)  - pointer to destination information to resolve    */
 /*              v(I)     - IP protocol version to match                     */
 /*                                                                          */
 /* Looks up an interface name in the frdest structure pointed to by fdp and */
 /* if a matching name can be found for the particular IP protocol version   */
 /* then store the interface pointer in the frdest struct.  If no match is   */
 /* found, then set the interface pointer to be -1 as NULL is considered to  */
 /* indicate there is no information at all in the structure.                */
 /* ------------------------------------------------------------------------ */
 int
 ipf_resolvedest(softc, base, fdp, v)
 	ipf_main_softc_t *softc;
 	char *base;
 	frdest_t *fdp;
 	int v;
 {
 	int errval = 0;
 	void *ifp;
 
 	ifp = NULL;
 
 	if (fdp->fd_name != -1) {
 		if (fdp->fd_type == FRD_DSTLIST) {
 			ifp = ipf_lookup_res_name(softc, IPL_LOGIPF,
 						  IPLT_DSTLIST,
 						  base + fdp->fd_name,
 						  NULL);
 			if (ifp == NULL) {
 				IPFERROR(144);
 				errval = ESRCH;
 			}
 		} else {
 			ifp = GETIFP(base + fdp->fd_name, v);
 			if (ifp == NULL)
 				ifp = (void *)-1;
 		}
 	}
 	fdp->fd_ptr = ifp;
 
 	if ((ifp != NULL) && (ifp != (void *)-1)) {
 		fdp->fd_local = ipf_deliverlocal(softc, v, ifp, &fdp->fd_ip6);
 	}
 
 	return errval;
 }
 
 
 /* ------------------------------------------------------------------------ */
 /* Function:    ipf_resolvenic                                              */
 /* Returns:     void* - NULL = wildcard name, -1 = failed to find NIC, else */
 /*                      pointer to interface structure for NIC              */
 /* Parameters:  softc(I)- pointer to soft context main structure            */
 /*              name(I) - complete interface name                           */
 /*              v(I)    - IP protocol version                               */
 /*                                                                          */
 /* Look for a network interface structure that firstly has a matching name  */
 /* to that passed in and that is also being used for that IP protocol       */
 /* version (necessary on some platforms where there are separate listings   */
 /* for both IPv4 and IPv6 on the same physical NIC.                         */
 /* ------------------------------------------------------------------------ */
 void *
 ipf_resolvenic(softc, name, v)
 	ipf_main_softc_t *softc;
 	char *name;
 	int v;
 {
 	void *nic;
 
 	softc = softc;	/* gcc -Wextra */
 	if (name[0] == '\0')
 		return NULL;
 
 	if ((name[1] == '\0') && ((name[0] == '-') || (name[0] == '*'))) {
 		return NULL;
 	}
 
 	nic = GETIFP(name, v);
 	if (nic == NULL)
 		nic = (void *)-1;
 	return nic;
 }
 
 
 /* ------------------------------------------------------------------------ */
 /* Function:    ipf_token_expire                                            */
 /* Returns:     None.                                                       */
 /* Parameters:  softc(I) - pointer to soft context main structure           */
 /*                                                                          */
 /* This function is run every ipf tick to see if there are any tokens that  */
 /* have been held for too long and need to be freed up.                     */
 /* ------------------------------------------------------------------------ */
 void
 ipf_token_expire(softc)
 	ipf_main_softc_t *softc;
 {
 	ipftoken_t *it;
 
 	WRITE_ENTER(&softc->ipf_tokens);
 	while ((it = softc->ipf_token_head) != NULL) {
 		if (it->ipt_die > softc->ipf_ticks)
 			break;
 
 		ipf_token_deref(softc, it);
 	}
 	RWLOCK_EXIT(&softc->ipf_tokens);
 }
 
 
 /* ------------------------------------------------------------------------ */
 /* Function:    ipf_token_flush                                             */
 /* Returns:     None.                                                       */
 /* Parameters:  softc(I) - pointer to soft context main structure           */
 /*                                                                          */
 /* Loop through all of the existing tokens and call deref to see if they    */
 /* can be freed. Normally a function like this might just loop on           */
 /* ipf_token_head but there is a chance that a token might have a ref count */
 /* of greater than one and in that case the the reference would drop twice  */
 /* by code that is only entitled to drop it once.                           */
 /* ------------------------------------------------------------------------ */
 static void
 ipf_token_flush(softc)
 	ipf_main_softc_t *softc;
 {
 	ipftoken_t *it, *next;
 
 	WRITE_ENTER(&softc->ipf_tokens);
 	for (it = softc->ipf_token_head; it != NULL; it = next) {
 		next = it->ipt_next;
 		(void) ipf_token_deref(softc, it);
 	}
 	RWLOCK_EXIT(&softc->ipf_tokens);
 }
 
 
 /* ------------------------------------------------------------------------ */
 /* Function:    ipf_token_del                                               */
 /* Returns:     int     - 0 = success, else error                           */
 /* Parameters:  softc(I)- pointer to soft context main structure            */
 /*              type(I) - the token type to match                           */
 /*              uid(I)  - uid owning the token                              */
 /*              ptr(I)  - context pointer for the token                     */
 /*                                                                          */
 /* This function looks for a a token in the current list that matches up    */
 /* the fields (type, uid, ptr).  If none is found, ESRCH is returned, else  */
 /* call ipf_token_dewref() to remove it from the list. In the event that    */
 /* the token has a reference held elsewhere, setting ipt_complete to 2      */
 /* enables debugging to distinguish between the two paths that ultimately   */
 /* lead to a token to be deleted.                                           */
 /* ------------------------------------------------------------------------ */
 int
 ipf_token_del(softc, type, uid, ptr)
 	ipf_main_softc_t *softc;
 	int type, uid;
 	void *ptr;
 {
 	ipftoken_t *it;
 	int error;
 
 	IPFERROR(82);
 	error = ESRCH;
 
 	WRITE_ENTER(&softc->ipf_tokens);
 	for (it = softc->ipf_token_head; it != NULL; it = it->ipt_next) {
 		if (ptr == it->ipt_ctx && type == it->ipt_type &&
 		    uid == it->ipt_uid) {
 			it->ipt_complete = 2;
 			ipf_token_deref(softc, it);
 			error = 0;
 			break;
 		}
 	}
 	RWLOCK_EXIT(&softc->ipf_tokens);
 
 	return error;
 }
 
 
 /* ------------------------------------------------------------------------ */
 /* Function:    ipf_token_mark_complete                                     */
 /* Returns:     None.                                                       */
 /* Parameters:  token(I) - pointer to token structure                       */
 /*                                                                          */
 /* Mark a token as being ineligable for being found with ipf_token_find.    */
 /* ------------------------------------------------------------------------ */
 void
 ipf_token_mark_complete(token)
 	ipftoken_t *token;
 {
 	if (token->ipt_complete == 0)
 		token->ipt_complete = 1;
 }
 
 
 /* ------------------------------------------------------------------------ */
 /* Function:    ipf_token_find                                               */
 /* Returns:     ipftoken_t * - NULL if no memory, else pointer to token     */
 /* Parameters:  softc(I)- pointer to soft context main structure            */
 /*              type(I) - the token type to match                           */
 /*              uid(I)  - uid owning the token                              */
 /*              ptr(I)  - context pointer for the token                     */
 /*                                                                          */
 /* This function looks for a live token in the list of current tokens that  */
 /* matches the tuple (type, uid, ptr).  If one cannot be found then one is  */
 /* allocated.  If one is found then it is moved to the top of the list of   */
 /* currently active tokens.                                                 */
 /* ------------------------------------------------------------------------ */
 ipftoken_t *
 ipf_token_find(softc, type, uid, ptr)
 	ipf_main_softc_t *softc;
 	int type, uid;
 	void *ptr;
 {
 	ipftoken_t *it, *new;
 
 	KMALLOC(new, ipftoken_t *);
 	if (new != NULL)
 		bzero((char *)new, sizeof(*new));
 
 	WRITE_ENTER(&softc->ipf_tokens);
 	for (it = softc->ipf_token_head; it != NULL; it = it->ipt_next) {
 		if ((ptr == it->ipt_ctx) && (type == it->ipt_type) &&
 		    (uid == it->ipt_uid) && (it->ipt_complete < 2))
 			break;
 	}
 
 	if (it == NULL) {
 		it = new;
 		new = NULL;
 		if (it == NULL) {
 			RWLOCK_EXIT(&softc->ipf_tokens);
 			return NULL;
 		}
 		it->ipt_ctx = ptr;
 		it->ipt_uid = uid;
 		it->ipt_type = type;
 		it->ipt_ref = 1;
 	} else {
 		if (new != NULL) {
 			KFREE(new);
 			new = NULL;
 		}
 
 		if (it->ipt_complete > 0)
 			it = NULL;
 		else
 			ipf_token_unlink(softc, it);
 	}
 
 	if (it != NULL) {
 		it->ipt_pnext = softc->ipf_token_tail;
 		*softc->ipf_token_tail = it;
 		softc->ipf_token_tail = &it->ipt_next;
 		it->ipt_next = NULL;
 		it->ipt_ref++;
 
 		it->ipt_die = softc->ipf_ticks + 20;
 	}
 
 	RWLOCK_EXIT(&softc->ipf_tokens);
 
 	return it;
 }
 
 
 /* ------------------------------------------------------------------------ */
 /* Function:    ipf_token_unlink                                            */
 /* Returns:     None.                                                       */
 /* Parameters:  softc(I) - pointer to soft context main structure           */
 /*              token(I) - pointer to token structure                       */
 /* Write Locks: ipf_tokens                                                  */
 /*                                                                          */
 /* This function unlinks a token structure from the linked list of tokens   */
 /* that "own" it.  The head pointer never needs to be explicitly adjusted   */
 /* but the tail does due to the linked list implementation.                 */
 /* ------------------------------------------------------------------------ */
 static void
 ipf_token_unlink(softc, token)
 	ipf_main_softc_t *softc;
 	ipftoken_t *token;
 {
 
 	if (softc->ipf_token_tail == &token->ipt_next)
 		softc->ipf_token_tail = token->ipt_pnext;
 
 	*token->ipt_pnext = token->ipt_next;
 	if (token->ipt_next != NULL)
 		token->ipt_next->ipt_pnext = token->ipt_pnext;
 	token->ipt_next = NULL;
 	token->ipt_pnext = NULL;
 }
 
 
 /* ------------------------------------------------------------------------ */
 /* Function:    ipf_token_deref                                             */
 /* Returns:     int      - 0 == token freed, else reference count           */
 /* Parameters:  softc(I) - pointer to soft context main structure           */
 /*              token(I) - pointer to token structure                       */
 /* Write Locks: ipf_tokens                                                  */
 /*                                                                          */
 /* Drop the reference count on the token structure and if it drops to zero, */
 /* call the dereference function for the token type because it is then      */
 /* possible to free the token data structure.                               */
 /* ------------------------------------------------------------------------ */
 int
 ipf_token_deref(softc, token)
 	ipf_main_softc_t *softc;
 	ipftoken_t *token;
 {
 	void *data, **datap;
 
 	ASSERT(token->ipt_ref > 0);
 	token->ipt_ref--;
 	if (token->ipt_ref > 0)
 		return token->ipt_ref;
 
 	data = token->ipt_data;
 	datap = &data;
 
 	if ((data != NULL) && (data != (void *)-1)) {
 		switch (token->ipt_type)
 		{
 		case IPFGENITER_IPF :
 			(void) ipf_derefrule(softc, (frentry_t **)datap);
 			break;
 		case IPFGENITER_IPNAT :
 			WRITE_ENTER(&softc->ipf_nat);
 			ipf_nat_rule_deref(softc, (ipnat_t **)datap);
 			RWLOCK_EXIT(&softc->ipf_nat);
 			break;
 		case IPFGENITER_NAT :
 			ipf_nat_deref(softc, (nat_t **)datap);
 			break;
 		case IPFGENITER_STATE :
 			ipf_state_deref(softc, (ipstate_t **)datap);
 			break;
 		case IPFGENITER_FRAG :
 			ipf_frag_pkt_deref(softc, (ipfr_t **)datap);
 			break;
 		case IPFGENITER_NATFRAG :
 			ipf_frag_nat_deref(softc, (ipfr_t **)datap);
 			break;
 		case IPFGENITER_HOSTMAP :
 			WRITE_ENTER(&softc->ipf_nat);
 			ipf_nat_hostmapdel(softc, (hostmap_t **)datap);
 			RWLOCK_EXIT(&softc->ipf_nat);
 			break;
 		default :
 			ipf_lookup_iterderef(softc, token->ipt_type, data);
 			break;
 		}
 	}
 
 	ipf_token_unlink(softc, token);
 	KFREE(token);
 	return 0;
 }
 
 
 /* ------------------------------------------------------------------------ */
 /* Function:    ipf_nextrule                                                */
 /* Returns:     frentry_t * - NULL == no more rules, else pointer to next   */
 /* Parameters:  softc(I)    - pointer to soft context main structure        */
 /*              fr(I)       - pointer to filter rule                        */
 /*              out(I)      - 1 == out rules, 0 == input rules              */
 /*                                                                          */
 /* Starting with "fr", find the next rule to visit. This includes visiting  */
 /* the list of rule groups if either fr is NULL (empty list) or it is the   */
 /* last rule in the list. When walking rule lists, it is either input or    */
 /* output rules that are returned, never both.                              */
 /* ------------------------------------------------------------------------ */
 static frentry_t *
 ipf_nextrule(softc, active, unit, fr, out)
 	ipf_main_softc_t *softc;
 	int active, unit;
 	frentry_t *fr;
 	int out;
 {
 	frentry_t *next;
 	frgroup_t *fg;
 
 	if (fr != NULL && fr->fr_group != -1) {
 		fg = ipf_findgroup(softc, fr->fr_names + fr->fr_group,
 				   unit, active, NULL);
 		if (fg != NULL)
 			fg = fg->fg_next;
 	} else {
 		fg = softc->ipf_groups[unit][active];
 	}
 
 	while (fg != NULL) {
 		next = fg->fg_start;
 		while (next != NULL) {
 			if (out) {
 				if (next->fr_flags & FR_OUTQUE)
 					return next;
 			} else if (next->fr_flags & FR_INQUE) {
 				return next;
 			}
 			next = next->fr_next;
 		}
 		if (next == NULL)
 			fg = fg->fg_next;
 	}
 
 	return NULL;
 }
 
 /* ------------------------------------------------------------------------ */
 /* Function:    ipf_getnextrule                                             */
 /* Returns:     int - 0 = success, else error                               */
 /* Parameters:  softc(I)- pointer to soft context main structure            */
 /*              t(I)   - pointer to destination information to resolve      */
 /*              ptr(I) - pointer to ipfobj_t to copyin from user space      */
 /*                                                                          */
 /* This function's first job is to bring in the ipfruleiter_t structure via */
 /* the ipfobj_t structure to determine what should be the next rule to      */
 /* return. Once the ipfruleiter_t has been brought in, it then tries to     */
 /* find the 'next rule'.  This may include searching rule group lists or    */
 /* just be as simple as looking at the 'next' field in the rule structure.  */
 /* When we have found the rule to return, increase its reference count and  */
 /* if we used an existing rule to get here, decrease its reference count.   */
 /* ------------------------------------------------------------------------ */
 int
 ipf_getnextrule(softc, t, ptr)
 	ipf_main_softc_t *softc;
 	ipftoken_t *t;
 	void *ptr;
 {
 	frentry_t *fr, *next, zero;
 	ipfruleiter_t it;
 	int error, out;
 	frgroup_t *fg;
 	ipfobj_t obj;
 	int predict;
 	char *dst;
 	int unit;
 
 	if (t == NULL || ptr == NULL) {
 		IPFERROR(84);
 		return EFAULT;
 	}
 
 	error = ipf_inobj(softc, ptr, &obj, &it, IPFOBJ_IPFITER);
 	if (error != 0)
 		return error;
 
 	if ((it.iri_inout < 0) || (it.iri_inout > 3)) {
 		IPFERROR(85);
 		return EINVAL;
 	}
 	if ((it.iri_active != 0) && (it.iri_active != 1)) {
 		IPFERROR(86);
 		return EINVAL;
 	}
 	if (it.iri_nrules == 0) {
 		IPFERROR(87);
 		return ENOSPC;
 	}
 	if (it.iri_rule == NULL) {
 		IPFERROR(88);
 		return EFAULT;
 	}
 
 	fg = NULL;
 	fr = t->ipt_data;
 	if ((it.iri_inout & F_OUT) != 0)
 		out = 1;
 	else
 		out = 0;
 	if ((it.iri_inout & F_ACIN) != 0)
 		unit = IPL_LOGCOUNT;
 	else
 		unit = IPL_LOGIPF;
 
 	READ_ENTER(&softc->ipf_mutex);
 	if (fr == NULL) {
 		if (*it.iri_group == '\0') {
 			if (unit == IPL_LOGCOUNT) {
 				next = softc->ipf_acct[out][it.iri_active];
 			} else {
 				next = softc->ipf_rules[out][it.iri_active];
 			}
 			if (next == NULL)
 				next = ipf_nextrule(softc, it.iri_active,
 						    unit, NULL, out);
 		} else {
 			fg = ipf_findgroup(softc, it.iri_group, unit,
 					   it.iri_active, NULL);
 			if (fg != NULL)
 				next = fg->fg_start;
 			else
 				next = NULL;
 		}
 	} else {
 		next = fr->fr_next;
 		if (next == NULL)
 			next = ipf_nextrule(softc, it.iri_active, unit,
 					    fr, out);
 	}
 
 	if (next != NULL && next->fr_next != NULL)
 		predict = 1;
 	else if (ipf_nextrule(softc, it.iri_active, unit, next, out) != NULL)
 		predict = 1;
 	else
 		predict = 0;
 
 	if (fr != NULL)
 		(void) ipf_derefrule(softc, &fr);
 
 	obj.ipfo_type = IPFOBJ_FRENTRY;
 	dst = (char *)it.iri_rule;
 
 	if (next != NULL) {
 		obj.ipfo_size = next->fr_size;
 		MUTEX_ENTER(&next->fr_lock);
 		next->fr_ref++;
 		MUTEX_EXIT(&next->fr_lock);
 		t->ipt_data = next;
 	} else {
 		obj.ipfo_size = sizeof(frentry_t);
 		bzero(&zero, sizeof(zero));
 		next = &zero;
 		t->ipt_data = NULL;
 	}
 	it.iri_rule = predict ? next : NULL;
 	if (predict == 0)
 		ipf_token_mark_complete(t);
 
 	RWLOCK_EXIT(&softc->ipf_mutex);
 
 	obj.ipfo_ptr = dst;
 	error = ipf_outobjk(softc, &obj, next);
 	if (error == 0 && t->ipt_data != NULL) {
 		dst += obj.ipfo_size;
 		if (next->fr_data != NULL) {
 			ipfobj_t dobj;
 
 			if (next->fr_type == FR_T_IPFEXPR)
 				dobj.ipfo_type = IPFOBJ_IPFEXPR;
 			else
 				dobj.ipfo_type = IPFOBJ_FRIPF;
 			dobj.ipfo_size = next->fr_dsize;
 			dobj.ipfo_rev = obj.ipfo_rev;
 			dobj.ipfo_ptr = dst;
 			error = ipf_outobjk(softc, &dobj, next->fr_data);
 		}
 	}
 
 	if ((fr != NULL) && (next == &zero))
 		(void) ipf_derefrule(softc, &fr);
 
 	return error;
 }
 
 
 /* ------------------------------------------------------------------------ */
 /* Function:    ipf_frruleiter                                              */
 /* Returns:     int - 0 = success, else error                               */
 /* Parameters:  softc(I)- pointer to soft context main structure            */
 /*              data(I) - the token type to match                           */
 /*              uid(I)  - uid owning the token                              */
 /*              ptr(I)  - context pointer for the token                     */
 /*                                                                          */
 /* This function serves as a stepping stone between ipf_ipf_ioctl and       */
 /* ipf_getnextrule.  It's role is to find the right token in the kernel for */
 /* the process doing the ioctl and use that to ask for the next rule.       */
 /* ------------------------------------------------------------------------ */
 static int
 ipf_frruleiter(softc, data, uid, ctx)
 	ipf_main_softc_t *softc;
 	void *data, *ctx;
 	int uid;
 {
 	ipftoken_t *token;
 	ipfruleiter_t it;
 	ipfobj_t obj;
 	int error;
 
 	token = ipf_token_find(softc, IPFGENITER_IPF, uid, ctx);
 	if (token != NULL) {
 		error = ipf_getnextrule(softc, token, data);
 		WRITE_ENTER(&softc->ipf_tokens);
 		ipf_token_deref(softc, token);
 		RWLOCK_EXIT(&softc->ipf_tokens);
 	} else {
 		error = ipf_inobj(softc, data, &obj, &it, IPFOBJ_IPFITER);
 		if (error != 0)
 			return error;
 		it.iri_rule = NULL;
 		error = ipf_outobj(softc, data, &it, IPFOBJ_IPFITER);
 	}
 
 	return error;
 }
 
 
 /* ------------------------------------------------------------------------ */
 /* Function:    ipf_geniter                                                 */
 /* Returns:     int - 0 = success, else error                               */
 /* Parameters:  softc(I) - pointer to soft context main structure           */
 /*              token(I) - pointer to ipftoken_t structure                  */
 /*              itp(I)   - pointer to iterator data                         */
 /*                                                                          */
 /* Decide which iterator function to call using information passed through  */
 /* the ipfgeniter_t structure at itp.                                       */
 /* ------------------------------------------------------------------------ */
 static int
 ipf_geniter(softc, token, itp)
 	ipf_main_softc_t *softc;
 	ipftoken_t *token;
 	ipfgeniter_t *itp;
 {
 	int error;
 
 	switch (itp->igi_type)
 	{
 	case IPFGENITER_FRAG :
 		error = ipf_frag_pkt_next(softc, token, itp);
 		break;
 	default :
 		IPFERROR(92);
 		error = EINVAL;
 		break;
 	}
 
 	return error;
 }
 
 
 /* ------------------------------------------------------------------------ */
 /* Function:    ipf_genericiter                                             */
 /* Returns:     int - 0 = success, else error                               */
 /* Parameters:  softc(I)- pointer to soft context main structure            */
 /*              data(I) - the token type to match                           */
 /*              uid(I)  - uid owning the token                              */
 /*              ptr(I)  - context pointer for the token                     */
 /*                                                                          */
 /* Handle the SIOCGENITER ioctl for the ipfilter device. The primary role   */
 /* ------------------------------------------------------------------------ */
 int
 ipf_genericiter(softc, data, uid, ctx)
 	ipf_main_softc_t *softc;
 	void *data, *ctx;
 	int uid;
 {
 	ipftoken_t *token;
 	ipfgeniter_t iter;
 	int error;
 
 	error = ipf_inobj(softc, data, NULL, &iter, IPFOBJ_GENITER);
 	if (error != 0)
 		return error;
 
 	token = ipf_token_find(softc, iter.igi_type, uid, ctx);
 	if (token != NULL) {
 		token->ipt_subtype = iter.igi_type;
 		error = ipf_geniter(softc, token, &iter);
 		WRITE_ENTER(&softc->ipf_tokens);
 		ipf_token_deref(softc, token);
 		RWLOCK_EXIT(&softc->ipf_tokens);
 	} else {
 		IPFERROR(93);
 		error = 0;
 	}
 
 	return error;
 }
 
 
 /* ------------------------------------------------------------------------ */
 /* Function:    ipf_ipf_ioctl                                               */
 /* Returns:     int - 0 = success, else error                               */
 /* Parameters:  softc(I)- pointer to soft context main structure           */
 /*              data(I) - the token type to match                           */
 /*              cmd(I)  - the ioctl command number                          */
 /*              mode(I) - mode flags for the ioctl                          */
 /*              uid(I)  - uid owning the token                              */
 /*              ptr(I)  - context pointer for the token                     */
 /*                                                                          */
 /* This function handles all of the ioctl command that are actually isssued */
 /* to the /dev/ipl device.                                                  */
 /* ------------------------------------------------------------------------ */
 int
 ipf_ipf_ioctl(softc, data, cmd, mode, uid, ctx)
 	ipf_main_softc_t *softc;
 	caddr_t data;
 	ioctlcmd_t cmd;
 	int mode, uid;
 	void *ctx;
 {
 	friostat_t fio;
 	int error, tmp;
 	ipfobj_t obj;
 	SPL_INT(s);
 
 	switch (cmd)
 	{
 	case SIOCFRENB :
 		if (!(mode & FWRITE)) {
 			IPFERROR(94);
 			error = EPERM;
 		} else {
 			error = BCOPYIN(data, &tmp, sizeof(tmp));
 			if (error != 0) {
 				IPFERROR(95);
 				error = EFAULT;
 				break;
 			}
 
 			WRITE_ENTER(&softc->ipf_global);
 			if (tmp) {
 				if (softc->ipf_running > 0)
 					error = 0;
 				else
 					error = ipfattach(softc);
 				if (error == 0)
 					softc->ipf_running = 1;
 				else
 					(void) ipfdetach(softc);
 			} else {
 				if (softc->ipf_running == 1)
 					error = ipfdetach(softc);
 				else
 					error = 0;
 				if (error == 0)
 					softc->ipf_running = -1;
 			}
 			RWLOCK_EXIT(&softc->ipf_global);
 		}
 		break;
 
 	case SIOCIPFSET :
 		if (!(mode & FWRITE)) {
 			IPFERROR(96);
 			error = EPERM;
 			break;
 		}
 		/* FALLTHRU */
 	case SIOCIPFGETNEXT :
 	case SIOCIPFGET :
 		error = ipf_ipftune(softc, cmd, (void *)data);
 		break;
 
 	case SIOCSETFF :
 		if (!(mode & FWRITE)) {
 			IPFERROR(97);
 			error = EPERM;
 		} else {
 			error = BCOPYIN(data, &softc->ipf_flags,
 					sizeof(softc->ipf_flags));
 			if (error != 0) {
 				IPFERROR(98);
 				error = EFAULT;
 			}
 		}
 		break;
 
 	case SIOCGETFF :
 		error = BCOPYOUT(&softc->ipf_flags, data,
 				 sizeof(softc->ipf_flags));
 		if (error != 0) {
 			IPFERROR(99);
 			error = EFAULT;
 		}
 		break;
 
 	case SIOCFUNCL :
 		error = ipf_resolvefunc(softc, (void *)data);
 		break;
 
 	case SIOCINAFR :
 	case SIOCRMAFR :
 	case SIOCADAFR :
 	case SIOCZRLST :
 		if (!(mode & FWRITE)) {
 			IPFERROR(100);
 			error = EPERM;
 		} else {
 			error = frrequest(softc, IPL_LOGIPF, cmd, (caddr_t)data,
 					  softc->ipf_active, 1);
 		}
 		break;
 
 	case SIOCINIFR :
 	case SIOCRMIFR :
 	case SIOCADIFR :
 		if (!(mode & FWRITE)) {
 			IPFERROR(101);
 			error = EPERM;
 		} else {
 			error = frrequest(softc, IPL_LOGIPF, cmd, (caddr_t)data,
 					  1 - softc->ipf_active, 1);
 		}
 		break;
 
 	case SIOCSWAPA :
 		if (!(mode & FWRITE)) {
 			IPFERROR(102);
 			error = EPERM;
 		} else {
 			WRITE_ENTER(&softc->ipf_mutex);
 			error = BCOPYOUT(&softc->ipf_active, data,
 					 sizeof(softc->ipf_active));
 			if (error != 0) {
 				IPFERROR(103);
 				error = EFAULT;
 			} else {
 				softc->ipf_active = 1 - softc->ipf_active;
 			}
 			RWLOCK_EXIT(&softc->ipf_mutex);
 		}
 		break;
 
 	case SIOCGETFS :
 		error = ipf_inobj(softc, (void *)data, &obj, &fio,
 				  IPFOBJ_IPFSTAT);
 		if (error != 0)
 			break;
 		ipf_getstat(softc, &fio, obj.ipfo_rev);
 		error = ipf_outobj(softc, (void *)data, &fio, IPFOBJ_IPFSTAT);
 		break;
 
 	case SIOCFRZST :
 		if (!(mode & FWRITE)) {
 			IPFERROR(104);
 			error = EPERM;
 		} else
 			error = ipf_zerostats(softc, (caddr_t)data);
 		break;
 
 	case SIOCIPFFL :
 		if (!(mode & FWRITE)) {
 			IPFERROR(105);
 			error = EPERM;
 		} else {
 			error = BCOPYIN(data, &tmp, sizeof(tmp));
 			if (!error) {
 				tmp = ipf_flush(softc, IPL_LOGIPF, tmp);
 				error = BCOPYOUT(&tmp, data, sizeof(tmp));
 				if (error != 0) {
 					IPFERROR(106);
 					error = EFAULT;
 				}
 			} else {
 				IPFERROR(107);
 				error = EFAULT;
 			}
 		}
 		break;
 
 #ifdef USE_INET6
 	case SIOCIPFL6 :
 		if (!(mode & FWRITE)) {
 			IPFERROR(108);
 			error = EPERM;
 		} else {
 			error = BCOPYIN(data, &tmp, sizeof(tmp));
 			if (!error) {
 				tmp = ipf_flush(softc, IPL_LOGIPF, tmp);
 				error = BCOPYOUT(&tmp, data, sizeof(tmp));
 				if (error != 0) {
 					IPFERROR(109);
 					error = EFAULT;
 				}
 			} else {
 				IPFERROR(110);
 				error = EFAULT;
 			}
 		}
 		break;
 #endif
 
 	case SIOCSTLCK :
 		if (!(mode & FWRITE)) {
 			IPFERROR(122);
 			error = EPERM;
 		} else {
 			error = BCOPYIN(data, &tmp, sizeof(tmp));
 			if (error == 0) {
 				ipf_state_setlock(softc->ipf_state_soft, tmp);
 				ipf_nat_setlock(softc->ipf_nat_soft, tmp);
 				ipf_frag_setlock(softc->ipf_frag_soft, tmp);
 				ipf_auth_setlock(softc->ipf_auth_soft, tmp);
 			} else {
 				IPFERROR(111);
 				error = EFAULT;
 			}
 		}
 		break;
 
 #ifdef	IPFILTER_LOG
 	case SIOCIPFFB :
 		if (!(mode & FWRITE)) {
 			IPFERROR(112);
 			error = EPERM;
 		} else {
 			tmp = ipf_log_clear(softc, IPL_LOGIPF);
 			error = BCOPYOUT(&tmp, data, sizeof(tmp));
 			if (error) {
 				IPFERROR(113);
 				error = EFAULT;
 			}
 		}
 		break;
 #endif /* IPFILTER_LOG */
 
 	case SIOCFRSYN :
 		if (!(mode & FWRITE)) {
 			IPFERROR(114);
 			error = EPERM;
 		} else {
 			WRITE_ENTER(&softc->ipf_global);
 #if (defined(MENTAT) && defined(_KERNEL)) && !defined(INSTANCES)
 			error = ipfsync();
 #else
 			ipf_sync(softc, NULL);
 			error = 0;
 #endif
 			RWLOCK_EXIT(&softc->ipf_global);
 
 		}
 		break;
 
 	case SIOCGFRST :
 		error = ipf_outobj(softc, (void *)data,
 				   ipf_frag_stats(softc->ipf_frag_soft),
 				   IPFOBJ_FRAGSTAT);
 		break;
 
 #ifdef	IPFILTER_LOG
 	case FIONREAD :
 		tmp = ipf_log_bytesused(softc, IPL_LOGIPF);
 		error = BCOPYOUT(&tmp, data, sizeof(tmp));
 		break;
 #endif
 
 	case SIOCIPFITER :
 		SPL_SCHED(s);
 		error = ipf_frruleiter(softc, data, uid, ctx);
 		SPL_X(s);
 		break;
 
 	case SIOCGENITER :
 		SPL_SCHED(s);
 		error = ipf_genericiter(softc, data, uid, ctx);
 		SPL_X(s);
 		break;
 
 	case SIOCIPFDELTOK :
 		error = BCOPYIN(data, &tmp, sizeof(tmp));
 		if (error == 0) {
 			SPL_SCHED(s);
 			error = ipf_token_del(softc, tmp, uid, ctx);
 			SPL_X(s);
 		}
 		break;
 
 	default :
 		IPFERROR(115);
 		error = EINVAL;
 		break;
 	}
 
 	return error;
 }
 
 
 /* ------------------------------------------------------------------------ */
 /* Function:    ipf_decaps                                                  */
 /* Returns:     int        - -1 == decapsulation failed, else bit mask of   */
 /*                           flags indicating packet filtering decision.    */
 /* Parameters:  fin(I)     - pointer to packet information                  */
 /*              pass(I)    - IP protocol version to match                   */
 /*              l5proto(I) - layer 5 protocol to decode UDP data as.        */
 /*                                                                          */
 /* This function is called for packets that are wrapt up in other packets,  */
 /* for example, an IP packet that is the entire data segment for another IP */
 /* packet.  If the basic constraints for this are satisfied, change the     */
 /* buffer to point to the start of the inner packet and start processing    */
 /* rules belonging to the head group this rule specifies.                   */
 /* ------------------------------------------------------------------------ */
 u_32_t
 ipf_decaps(fin, pass, l5proto)
 	fr_info_t *fin;
 	u_32_t pass;
 	int l5proto;
 {
 	fr_info_t fin2, *fino = NULL;
 	int elen, hlen, nh;
 	grehdr_t gre;
 	ip_t *ip;
 	mb_t *m;
 
 	if ((fin->fin_flx & FI_COALESCE) == 0)
 		if (ipf_coalesce(fin) == -1)
 			goto cantdecaps;
 
 	m = fin->fin_m;
 	hlen = fin->fin_hlen;
 
 	switch (fin->fin_p)
 	{
 	case IPPROTO_UDP :
 		/*
 		 * In this case, the specific protocol being decapsulated
 		 * inside UDP frames comes from the rule.
 		 */
 		nh = fin->fin_fr->fr_icode;
 		break;
 
 	case IPPROTO_GRE :	/* 47 */
 		bcopy(fin->fin_dp, (char *)&gre, sizeof(gre));
 		hlen += sizeof(grehdr_t);
 		if (gre.gr_R|gre.gr_s)
 			goto cantdecaps;
 		if (gre.gr_C)
 			hlen += 4;
 		if (gre.gr_K)
 			hlen += 4;
 		if (gre.gr_S)
 			hlen += 4;
 
 		nh = IPPROTO_IP;
 
 		/*
 		 * If the routing options flag is set, validate that it is
 		 * there and bounce over it.
 		 */
 #if 0
 		/* This is really heavy weight and lots of room for error, */
 		/* so for now, put it off and get the simple stuff right.  */
 		if (gre.gr_R) {
 			u_char off, len, *s;
 			u_short af;
 			int end;
 
 			end = 0;
 			s = fin->fin_dp;
 			s += hlen;
 			aplen = fin->fin_plen - hlen;
 			while (aplen > 3) {
 				af = (s[0] << 8) | s[1];
 				off = s[2];
 				len = s[3];
 				aplen -= 4;
 				s += 4;
 				if (af == 0 && len == 0) {
 					end = 1;
 					break;
 				}
 				if (aplen < len)
 					break;
 				s += len;
 				aplen -= len;
 			}
 			if (end != 1)
 				goto cantdecaps;
 			hlen = s - (u_char *)fin->fin_dp;
 		}
 #endif
 		break;
 
 #ifdef IPPROTO_IPIP
 	case IPPROTO_IPIP :	/* 4 */
 #endif
 		nh = IPPROTO_IP;
 		break;
 
 	default :	/* Includes ESP, AH is special for IPv4 */
 		goto cantdecaps;
 	}
 
 	switch (nh)
 	{
 	case IPPROTO_IP :
 	case IPPROTO_IPV6 :
 		break;
 	default :
 		goto cantdecaps;
 	}
 
 	bcopy((char *)fin, (char *)&fin2, sizeof(fin2));
 	fino = fin;
 	fin = &fin2;
 	elen = hlen;
 #if defined(MENTAT) && defined(_KERNEL)
 	m->b_rptr += elen;
 #else
 	m->m_data += elen;
 	m->m_len -= elen;
 #endif
 	fin->fin_plen -= elen;
 
 	ip = (ip_t *)((char *)fin->fin_ip + elen);
 
 	/*
 	 * Make sure we have at least enough data for the network layer
 	 * header.
 	 */
 	if (IP_V(ip) == 4)
 		hlen = IP_HL(ip) << 2;
 #ifdef USE_INET6
 	else if (IP_V(ip) == 6)
 		hlen = sizeof(ip6_t);
 #endif
 	else
 		goto cantdecaps2;
 
 	if (fin->fin_plen < hlen)
 		goto cantdecaps2;
 
 	fin->fin_dp = (char *)ip + hlen;
 
 	if (IP_V(ip) == 4) {
 		/*
 		 * Perform IPv4 header checksum validation.
 		 */
 		if (ipf_cksum((u_short *)ip, hlen))
 			goto cantdecaps2;
 	}
 
 	if (ipf_makefrip(hlen, ip, fin) == -1) {
 cantdecaps2:
 		if (m != NULL) {
 #if defined(MENTAT) && defined(_KERNEL)
 			m->b_rptr -= elen;
 #else
 			m->m_data -= elen;
 			m->m_len += elen;
 #endif
 		}
 cantdecaps:
 		DT1(frb_decapfrip, fr_info_t *, fin);
 		pass &= ~FR_CMDMASK;
 		pass |= FR_BLOCK|FR_QUICK;
 		fin->fin_reason = FRB_DECAPFRIP;
 		return -1;
 	}
 
 	pass = ipf_scanlist(fin, pass);
 
 	/*
 	 * Copy the packet filter "result" fields out of the fr_info_t struct
 	 * that is local to the decapsulation processing and back into the
 	 * one we were called with.
 	 */
 	fino->fin_flx = fin->fin_flx;
 	fino->fin_rev = fin->fin_rev;
 	fino->fin_icode = fin->fin_icode;
 	fino->fin_rule = fin->fin_rule;
 	(void) strncpy(fino->fin_group, fin->fin_group, FR_GROUPLEN);
 	fino->fin_fr = fin->fin_fr;
 	fino->fin_error = fin->fin_error;
 	fino->fin_mp = fin->fin_mp;
 	fino->fin_m = fin->fin_m;
 	m = fin->fin_m;
 	if (m != NULL) {
 #if defined(MENTAT) && defined(_KERNEL)
 		m->b_rptr -= elen;
 #else
 		m->m_data -= elen;
 		m->m_len += elen;
 #endif
 	}
 	return pass;
 }
 
 
 /* ------------------------------------------------------------------------ */
 /* Function:    ipf_matcharray_load                                         */
 /* Returns:     int         - 0 = success, else error                       */
 /* Parameters:  softc(I)    - pointer to soft context main structure        */
 /*              data(I)     - pointer to ioctl data                         */
 /*              objp(I)     - ipfobj_t structure to load data into          */
 /*              arrayptr(I) - pointer to location to store array pointer    */
 /*                                                                          */
 /* This function loads in a mathing array through the ipfobj_t struct that  */
 /* describes it.  Sanity checking and array size limitations are enforced   */
 /* in this function to prevent userspace from trying to load in something   */
 /* that is insanely big.  Once the size of the array is known, the memory   */
 /* required is malloc'd and returned through changing *arrayptr.  The       */
 /* contents of the array are verified before returning.  Only in the event  */
 /* of a successful call is the caller required to free up the malloc area.  */
 /* ------------------------------------------------------------------------ */
 int
 ipf_matcharray_load(softc, data, objp, arrayptr)
 	ipf_main_softc_t *softc;
 	caddr_t data;
 	ipfobj_t *objp;
 	int **arrayptr;
 {
 	int arraysize, *array, error;
 
 	*arrayptr = NULL;
 
 	error = BCOPYIN(data, objp, sizeof(*objp));
 	if (error != 0) {
 		IPFERROR(116);
 		return EFAULT;
 	}
 
 	if (objp->ipfo_type != IPFOBJ_IPFEXPR) {
 		IPFERROR(117);
 		return EINVAL;
 	}
 
 	if (((objp->ipfo_size & 3) != 0) || (objp->ipfo_size == 0) ||
 	    (objp->ipfo_size > 1024)) {
 		IPFERROR(118);
 		return EINVAL;
 	}
 
 	arraysize = objp->ipfo_size * sizeof(*array);
 	KMALLOCS(array, int *, arraysize);
 	if (array == NULL) {
 		IPFERROR(119);
 		return ENOMEM;
 	}
 
 	error = COPYIN(objp->ipfo_ptr, array, arraysize);
 	if (error != 0) {
 		KFREES(array, arraysize);
 		IPFERROR(120);
 		return EFAULT;
 	}
 
 	if (ipf_matcharray_verify(array, arraysize) != 0) {
 		KFREES(array, arraysize);
 		IPFERROR(121);
 		return EINVAL;
 	}
 
 	*arrayptr = array;
 	return 0;
 }
 
 
 /* ------------------------------------------------------------------------ */
 /* Function:    ipf_matcharray_verify                                       */
 /* Returns:     Nil                                                         */
 /* Parameters:  array(I)     - pointer to matching array                    */
 /*              arraysize(I) - number of elements in the array              */
 /*                                                                          */
 /* Verify the contents of a matching array by stepping through each element */
 /* in it.  The actual commands in the array are not verified for            */
 /* correctness, only that all of the sizes are correctly within limits.     */
 /* ------------------------------------------------------------------------ */
 int
 ipf_matcharray_verify(array, arraysize)
 	int *array, arraysize;
 {
 	int i, nelem, maxidx;
 	ipfexp_t *e;
 
 	nelem = arraysize / sizeof(*array);
 
 	/*
 	 * Currently, it makes no sense to have an array less than 6
 	 * elements long - the initial size at the from, a single operation
 	 * (minimum 4 in length) and a trailer, for a total of 6.
 	 */
 	if ((array[0] < 6) || (arraysize < 24) || (arraysize > 4096)) {
 		return -1;
 	}
 
 	/*
 	 * Verify the size of data pointed to by array with how long
 	 * the array claims to be itself.
 	 */
 	if (array[0] * sizeof(*array) != arraysize) {
 		return -1;
 	}
 
 	maxidx = nelem - 1;
 	/*
 	 * The last opcode in this array should be an IPF_EXP_END.
 	 */
 	if (array[maxidx] != IPF_EXP_END) {
 		return -1;
 	}
 
 	for (i = 1; i < maxidx; ) {
 		e = (ipfexp_t *)(array + i);
 
 		/*
 		 * The length of the bits to check must be at least 1
 		 * (or else there is nothing to comapre with!) and it
 		 * cannot exceed the length of the data present.
 		 */
 		if ((e->ipfe_size < 1 ) ||
 		    (e->ipfe_size + i > maxidx)) {
 			return -1;
 		}
 		i += e->ipfe_size;
 	}
 	return 0;
 }
 
 
 /* ------------------------------------------------------------------------ */
 /* Function:    ipf_fr_matcharray                                           */
 /* Returns:     int      - 0 = match failed, else positive match            */
 /* Parameters:  fin(I)   - pointer to packet information                    */
 /*              array(I) - pointer to matching array                        */
 /*                                                                          */
 /* This function is used to apply a matching array against a packet and     */
 /* return an indication of whether or not the packet successfully matches   */
 /* all of the commands in it.                                               */
 /* ------------------------------------------------------------------------ */
 static int
 ipf_fr_matcharray(fin, array)
 	fr_info_t *fin;
 	int *array;
 {
 	int i, n, *x, rv, p;
 	ipfexp_t *e;
 
 	rv = 0;
 	n = array[0];
 	x = array + 1;
 
 	for (; n > 0; x += 3 + x[3], rv = 0) {
 		e = (ipfexp_t *)x;
 		if (e->ipfe_cmd == IPF_EXP_END)
 			break;
 		n -= e->ipfe_size;
 
 		/*
 		 * The upper 16 bits currently store the protocol value.
 		 * This is currently used with TCP and UDP port compares and
 		 * allows "tcp.port = 80" without requiring an explicit
 		 " "ip.pr = tcp" first.
 		 */
 		p = e->ipfe_cmd >> 16;
 		if ((p != 0) && (p != fin->fin_p))
 			break;
 
 		switch (e->ipfe_cmd)
 		{
 		case IPF_EXP_IP_PR :
 			for (i = 0; !rv && i < e->ipfe_narg; i++) {
 				rv |= (fin->fin_p == e->ipfe_arg0[i]);
 			}
 			break;
 
 		case IPF_EXP_IP_SRCADDR :
 			if (fin->fin_v != 4)
 				break;
 			for (i = 0; !rv && i < e->ipfe_narg; i++) {
 				rv |= ((fin->fin_saddr &
 					e->ipfe_arg0[i * 2 + 1]) ==
 				       e->ipfe_arg0[i * 2]);
 			}
 			break;
 
 		case IPF_EXP_IP_DSTADDR :
 			if (fin->fin_v != 4)
 				break;
 			for (i = 0; !rv && i < e->ipfe_narg; i++) {
 				rv |= ((fin->fin_daddr &
 					e->ipfe_arg0[i * 2 + 1]) ==
 				       e->ipfe_arg0[i * 2]);
 			}
 			break;
 
 		case IPF_EXP_IP_ADDR :
 			if (fin->fin_v != 4)
 				break;
 			for (i = 0; !rv && i < e->ipfe_narg; i++) {
 				rv |= ((fin->fin_saddr &
 					e->ipfe_arg0[i * 2 + 1]) ==
 				       e->ipfe_arg0[i * 2]) ||
 				      ((fin->fin_daddr &
 					e->ipfe_arg0[i * 2 + 1]) ==
 				       e->ipfe_arg0[i * 2]);
 			}
 			break;
 
 #ifdef USE_INET6
 		case IPF_EXP_IP6_SRCADDR :
 			if (fin->fin_v != 6)
 				break;
 			for (i = 0; !rv && i < e->ipfe_narg; i++) {
 				rv |= IP6_MASKEQ(&fin->fin_src6,
 						 &e->ipfe_arg0[i * 8 + 4],
 						 &e->ipfe_arg0[i * 8]);
 			}
 			break;
 
 		case IPF_EXP_IP6_DSTADDR :
 			if (fin->fin_v != 6)
 				break;
 			for (i = 0; !rv && i < e->ipfe_narg; i++) {
 				rv |= IP6_MASKEQ(&fin->fin_dst6,
 						 &e->ipfe_arg0[i * 8 + 4],
 						 &e->ipfe_arg0[i * 8]);
 			}
 			break;
 
 		case IPF_EXP_IP6_ADDR :
 			if (fin->fin_v != 6)
 				break;
 			for (i = 0; !rv && i < e->ipfe_narg; i++) {
 				rv |= IP6_MASKEQ(&fin->fin_src6,
 						 &e->ipfe_arg0[i * 8 + 4],
 						 &e->ipfe_arg0[i * 8]) ||
 				      IP6_MASKEQ(&fin->fin_dst6,
 						 &e->ipfe_arg0[i * 8 + 4],
 						 &e->ipfe_arg0[i * 8]);
 			}
 			break;
 #endif
 
 		case IPF_EXP_UDP_PORT :
 		case IPF_EXP_TCP_PORT :
 			for (i = 0; !rv && i < e->ipfe_narg; i++) {
 				rv |= (fin->fin_sport == e->ipfe_arg0[i]) ||
 				      (fin->fin_dport == e->ipfe_arg0[i]);
 			}
 			break;
 
 		case IPF_EXP_UDP_SPORT :
 		case IPF_EXP_TCP_SPORT :
 			for (i = 0; !rv && i < e->ipfe_narg; i++) {
 				rv |= (fin->fin_sport == e->ipfe_arg0[i]);
 			}
 			break;
 
 		case IPF_EXP_UDP_DPORT :
 		case IPF_EXP_TCP_DPORT :
 			for (i = 0; !rv && i < e->ipfe_narg; i++) {
 				rv |= (fin->fin_dport == e->ipfe_arg0[i]);
 			}
 			break;
 
 		case IPF_EXP_TCP_FLAGS :
 			for (i = 0; !rv && i < e->ipfe_narg; i++) {
 				rv |= ((fin->fin_tcpf &
 					e->ipfe_arg0[i * 2 + 1]) ==
 				       e->ipfe_arg0[i * 2]);
 			}
 			break;
 		}
 		rv ^= e->ipfe_not;
 
 		if (rv == 0)
 			break;
 	}
 
 	return rv;
 }
 
 
 /* ------------------------------------------------------------------------ */
 /* Function:    ipf_queueflush                                              */
 /* Returns:     int - number of entries flushed (0 = none)                  */
 /* Parameters:  softc(I)    - pointer to soft context main structure        */
 /*              deletefn(I) - function to call to delete entry              */
 /*              ipfqs(I)    - top of the list of ipf internal queues        */
 /*              userqs(I)   - top of the list of user defined timeouts      */
 /*                                                                          */
 /* This fucntion gets called when the state/NAT hash tables fill up and we  */
 /* need to try a bit harder to free up some space.  The algorithm used here */
 /* split into two parts but both halves have the same goal: to reduce the   */
 /* number of connections considered to be "active" to the low watermark.    */
 /* There are two steps in doing this:                                       */
 /* 1) Remove any TCP connections that are already considered to be "closed" */
 /*    but have not yet been removed from the state table.  The two states   */
 /*    TCPS_TIME_WAIT and TCPS_CLOSED are considered to be the perfect       */
 /*    candidates for this style of removal.  If freeing up entries in       */
 /*    CLOSED or both CLOSED and TIME_WAIT brings us to the low watermark,   */
 /*    we do not go on to step 2.                                            */
 /*                                                                          */
 /* 2) Look for the oldest entries on each timeout queue and free them if    */
 /*    they are within the given window we are considering.  Where the       */
 /*    window starts and the steps taken to increase its size depend upon    */
 /*    how long ipf has been running (ipf_ticks.)  Anything modified in the  */
 /*    last 30 seconds is not touched.                                       */
 /*                                              touched                     */
 /*         die     ipf_ticks  30*1.5    1800*1.5   |  43200*1.5             */
 /*           |          |        |           |     |     |                  */
 /* future <--+----------+--------+-----------+-----+-----+-----------> past */
 /*                     now        \_int=30s_/ \_int=1hr_/ \_int=12hr        */
 /*                                                                          */
 /* Points to note:                                                          */
 /* - tqe_die is the time, in the future, when entries die.                  */
 /* - tqe_die - ipf_ticks is how long left the connection has to live in ipf */
 /*   ticks.                                                                 */
 /* - tqe_touched is when the entry was last used by NAT/state               */
 /* - the closer tqe_touched is to ipf_ticks, the further tqe_die will be    */
 /*   ipf_ticks any given timeout queue and vice versa.                      */
 /* - both tqe_die and tqe_touched increase over time                        */
 /* - timeout queues are sorted with the highest value of tqe_die at the     */
 /*   bottom and therefore the smallest values of each are at the top        */
 /* - the pointer passed in as ipfqs should point to an array of timeout     */
 /*   queues representing each of the TCP states                             */
 /*                                                                          */
 /* We start by setting up a maximum range to scan for things to move of     */
 /* iend (newest) to istart (oldest) in chunks of "interval".  If nothing is */
 /* found in that range, "interval" is adjusted (so long as it isn't 30) and */
 /* we start again with a new value for "iend" and "istart".  This is        */
 /* continued until we either finish the scan of 30 second intervals or the  */
 /* low water mark is reached.                                               */
 /* ------------------------------------------------------------------------ */
 int
 ipf_queueflush(softc, deletefn, ipfqs, userqs, activep, size, low)
 	ipf_main_softc_t *softc;
 	ipftq_delete_fn_t deletefn;
 	ipftq_t *ipfqs, *userqs;
 	u_int *activep;
 	int size, low;
 {
 	u_long interval, istart, iend;
 	ipftq_t *ifq, *ifqnext;
 	ipftqent_t *tqe, *tqn;
 	int removed = 0;
 
 	for (tqn = ipfqs[IPF_TCPS_CLOSED].ifq_head; ((tqe = tqn) != NULL); ) {
 		tqn = tqe->tqe_next;
 		if ((*deletefn)(softc, tqe->tqe_parent) == 0)
 			removed++;
 	}
 	if ((*activep * 100 / size) > low) {
 		for (tqn = ipfqs[IPF_TCPS_TIME_WAIT].ifq_head;
 		     ((tqe = tqn) != NULL); ) {
 			tqn = tqe->tqe_next;
 			if ((*deletefn)(softc, tqe->tqe_parent) == 0)
 				removed++;
 		}
 	}
 
 	if ((*activep * 100 / size) <= low) {
 		return removed;
 	}
 
 	/*
 	 * NOTE: Use of "* 15 / 10" is required here because if "* 1.5" is
 	 *       used then the operations are upgraded to floating point
 	 *       and kernels don't like floating point...
 	 */
 	if (softc->ipf_ticks > IPF_TTLVAL(43200 * 15 / 10)) {
 		istart = IPF_TTLVAL(86400 * 4);
 		interval = IPF_TTLVAL(43200);
 	} else if (softc->ipf_ticks > IPF_TTLVAL(1800 * 15 / 10)) {
 		istart = IPF_TTLVAL(43200);
 		interval = IPF_TTLVAL(1800);
 	} else if (softc->ipf_ticks > IPF_TTLVAL(30 * 15 / 10)) {
 		istart = IPF_TTLVAL(1800);
 		interval = IPF_TTLVAL(30);
 	} else {
 		return 0;
 	}
 	if (istart > softc->ipf_ticks) {
 		if (softc->ipf_ticks - interval < interval)
 			istart = interval;
 		else
 			istart = (softc->ipf_ticks / interval) * interval;
 	}
 
 	iend = softc->ipf_ticks - interval;
 
 	while ((*activep * 100 / size) > low) {
 		u_long try;
 
 		try = softc->ipf_ticks - istart;
 
 		for (ifq = ipfqs; ifq != NULL; ifq = ifq->ifq_next) {
 			for (tqn = ifq->ifq_head; ((tqe = tqn) != NULL); ) {
 				if (try < tqe->tqe_touched)
 					break;
 				tqn = tqe->tqe_next;
 				if ((*deletefn)(softc, tqe->tqe_parent) == 0)
 					removed++;
 			}
 		}
 
 		for (ifq = userqs; ifq != NULL; ifq = ifqnext) {
 			ifqnext = ifq->ifq_next;
 
 			for (tqn = ifq->ifq_head; ((tqe = tqn) != NULL); ) {
 				if (try < tqe->tqe_touched)
 					break;
 				tqn = tqe->tqe_next;
 				if ((*deletefn)(softc, tqe->tqe_parent) == 0)
 					removed++;
 			}
 		}
 
 		if (try >= iend) {
 			if (interval == IPF_TTLVAL(43200)) {
 				interval = IPF_TTLVAL(1800);
 			} else if (interval == IPF_TTLVAL(1800)) {
 				interval = IPF_TTLVAL(30);
 			} else {
 				break;
 			}
 			if (interval >= softc->ipf_ticks)
 				break;
 
 			iend = softc->ipf_ticks - interval;
 		}
 		istart -= interval;
 	}
 
 	return removed;
 }
 
 
 /* ------------------------------------------------------------------------ */
 /* Function:    ipf_deliverlocal                                            */
 /* Returns:     int - 1 = local address, 0 = non-local address              */
 /* Parameters:  softc(I)     - pointer to soft context main structure       */
 /*              ipversion(I) - IP protocol version (4 or 6)                 */
 /*              ifp(I)       - network interface pointer                    */
 /*              ipaddr(I)    - IPv4/6 destination address                   */
 /*                                                                          */
 /* This fucntion is used to determine in the address "ipaddr" belongs to    */
 /* the network interface represented by ifp.                                */
 /* ------------------------------------------------------------------------ */
 int
 ipf_deliverlocal(softc, ipversion, ifp, ipaddr)
 	ipf_main_softc_t *softc;
 	int ipversion;
 	void *ifp;
 	i6addr_t *ipaddr;
 {
 	i6addr_t addr;
 	int islocal = 0;
 
 	if (ipversion == 4) {
 		if (ipf_ifpaddr(softc, 4, FRI_NORMAL, ifp, &addr, NULL) == 0) {
 			if (addr.in4.s_addr == ipaddr->in4.s_addr)
 				islocal = 1;
 		}
 
 #ifdef USE_INET6
 	} else if (ipversion == 6) {
 		if (ipf_ifpaddr(softc, 6, FRI_NORMAL, ifp, &addr, NULL) == 0) {
 			if (IP6_EQ(&addr, ipaddr))
 				islocal = 1;
 		}
 #endif
 	}
 
 	return islocal;
 }
 
 
 /* ------------------------------------------------------------------------ */
 /* Function:    ipf_settimeout                                              */
 /* Returns:     int - 0 = success, -1 = failure                             */
 /* Parameters:  softc(I) - pointer to soft context main structure           */
 /*              t(I)     - pointer to tuneable array entry                  */
 /*              p(I)     - pointer to values passed in to apply             */
 /*                                                                          */
 /* This function is called to set the timeout values for each distinct      */
 /* queue timeout that is available.  When called, it calls into both the    */
 /* state and NAT code, telling them to update their timeout queues.         */
 /* ------------------------------------------------------------------------ */
 static int
 ipf_settimeout(softc, t, p)
 	struct ipf_main_softc_s *softc;
 	ipftuneable_t *t;
 	ipftuneval_t *p;
 {
 
 	/*
 	 * ipf_interror should be set by the functions called here, not
 	 * by this function - it's just a middle man.
 	 */
 	if (ipf_state_settimeout(softc, t, p) == -1)
 		return -1;
 	if (ipf_nat_settimeout(softc, t, p) == -1)
 		return -1;
 	return 0;
 }
 
 
 /* ------------------------------------------------------------------------ */
 /* Function:    ipf_apply_timeout                                           */
 /* Returns:     int - 0 = success, -1 = failure                             */
 /* Parameters:  head(I)    - pointer to tuneable array entry                */
 /*              seconds(I) - pointer to values passed in to apply           */
 /*                                                                          */
 /* This function applies a timeout of "seconds" to the timeout queue that   */
 /* is pointed to by "head".  All entries on this list have an expiration    */
 /* set to be the current tick value of ipf plus the ttl.  Given that this   */
 /* function should only be called when the delta is non-zero, the task is   */
 /* to walk the entire list and apply the change.  The sort order will not   */
 /* change.  The only catch is that this is O(n) across the list, so if the  */
 /* queue has lots of entries (10s of thousands or 100s of thousands), it    */
 /* could take a relatively long time to work through them all.              */
 /* ------------------------------------------------------------------------ */
 void
 ipf_apply_timeout(head, seconds)
 	ipftq_t *head;
 	u_int seconds;
 {
 	u_int oldtimeout, newtimeout;
 	ipftqent_t *tqe;
 	int delta;
 
 	MUTEX_ENTER(&head->ifq_lock);
 	oldtimeout = head->ifq_ttl;
 	newtimeout = IPF_TTLVAL(seconds);
 	delta = oldtimeout - newtimeout;
 
 	head->ifq_ttl = newtimeout;
 
 	for (tqe = head->ifq_head; tqe != NULL; tqe = tqe->tqe_next) {
 		tqe->tqe_die += delta;
 	}
 	MUTEX_EXIT(&head->ifq_lock);
 }
 
 
 /* ------------------------------------------------------------------------ */
 /* Function:   ipf_settimeout_tcp                                           */
 /* Returns:    int - 0 = successfully applied, -1 = failed                  */
 /* Parameters: t(I)   - pointer to tuneable to change                       */
 /*             p(I)   - pointer to new timeout information                  */
 /*             tab(I) - pointer to table of TCP queues                      */
 /*                                                                          */
 /* This function applies the new timeout (p) to the TCP tunable (t) and     */
 /* updates all of the entries on the relevant timeout queue by calling      */
 /* ipf_apply_timeout().                                                     */
 /* ------------------------------------------------------------------------ */
 int
 ipf_settimeout_tcp(t, p, tab)
 	ipftuneable_t *t;
 	ipftuneval_t *p;
 	ipftq_t *tab;
 {
 	if (!strcmp(t->ipft_name, "tcp_idle_timeout") ||
 	    !strcmp(t->ipft_name, "tcp_established")) {
 		ipf_apply_timeout(&tab[IPF_TCPS_ESTABLISHED], p->ipftu_int);
 	} else if (!strcmp(t->ipft_name, "tcp_close_wait")) {
 		ipf_apply_timeout(&tab[IPF_TCPS_CLOSE_WAIT], p->ipftu_int);
 	} else if (!strcmp(t->ipft_name, "tcp_last_ack")) {
 		ipf_apply_timeout(&tab[IPF_TCPS_LAST_ACK], p->ipftu_int);
 	} else if (!strcmp(t->ipft_name, "tcp_timeout")) {
 		ipf_apply_timeout(&tab[IPF_TCPS_LISTEN], p->ipftu_int);
 		ipf_apply_timeout(&tab[IPF_TCPS_HALF_ESTAB], p->ipftu_int);
 		ipf_apply_timeout(&tab[IPF_TCPS_CLOSING], p->ipftu_int);
 	} else if (!strcmp(t->ipft_name, "tcp_listen")) {
 		ipf_apply_timeout(&tab[IPF_TCPS_LISTEN], p->ipftu_int);
 	} else if (!strcmp(t->ipft_name, "tcp_half_established")) {
 		ipf_apply_timeout(&tab[IPF_TCPS_HALF_ESTAB], p->ipftu_int);
 	} else if (!strcmp(t->ipft_name, "tcp_closing")) {
 		ipf_apply_timeout(&tab[IPF_TCPS_CLOSING], p->ipftu_int);
 	} else if (!strcmp(t->ipft_name, "tcp_syn_received")) {
 		ipf_apply_timeout(&tab[IPF_TCPS_SYN_RECEIVED], p->ipftu_int);
 	} else if (!strcmp(t->ipft_name, "tcp_syn_sent")) {
 		ipf_apply_timeout(&tab[IPF_TCPS_SYN_SENT], p->ipftu_int);
 	} else if (!strcmp(t->ipft_name, "tcp_closed")) {
 		ipf_apply_timeout(&tab[IPF_TCPS_CLOSED], p->ipftu_int);
 	} else if (!strcmp(t->ipft_name, "tcp_half_closed")) {
 		ipf_apply_timeout(&tab[IPF_TCPS_CLOSED], p->ipftu_int);
 	} else if (!strcmp(t->ipft_name, "tcp_time_wait")) {
 		ipf_apply_timeout(&tab[IPF_TCPS_TIME_WAIT], p->ipftu_int);
 	} else {
 		/*
 		 * ipf_interror isn't set here because it should be set
 		 * by whatever called this function.
 		 */
 		return -1;
 	}
 	return 0;
 }
 
 
 /* ------------------------------------------------------------------------ */
 /* Function:   ipf_main_soft_create                                         */
 /* Returns:    NULL = failure, else success                                 */
 /* Parameters: arg(I) - pointer to soft context structure if already allocd */
 /*                                                                          */
 /* Create the foundation soft context structure. In circumstances where it  */
 /* is not required to dynamically allocate the context, a pointer can be    */
 /* passed in (rather than NULL) to a structure to be initialised.           */
 /* The main thing of interest is that a number of locks are initialised     */
 /* here instead of in the where might be expected - in the relevant create  */
 /* function elsewhere.  This is done because the current locking design has */
 /* some areas where these locks are used outside of their module.           */
 /* Possibly the most important exercise that is done here is setting of all */
 /* the timeout values, allowing them to be changed before init().           */
 /* ------------------------------------------------------------------------ */
 void *
 ipf_main_soft_create(arg)
 	void *arg;
 {
 	ipf_main_softc_t *softc;
 
 	if (arg == NULL) {
 		KMALLOC(softc, ipf_main_softc_t *);
 		if (softc == NULL)
 			return NULL;
 	} else {
 		softc = arg;
 	}
 
 	bzero((char *)softc, sizeof(*softc));
 
 	/*
 	 * This serves as a flag as to whether or not the softc should be
 	 * free'd when _destroy is called.
 	 */
 	softc->ipf_dynamic_softc = (arg == NULL) ? 1 : 0;
 
 	softc->ipf_tuners = ipf_tune_array_copy(softc,
 						sizeof(ipf_main_tuneables),
 						ipf_main_tuneables);
 	if (softc->ipf_tuners == NULL) {
 		ipf_main_soft_destroy(softc);
 		return NULL;
 	}
 
 	MUTEX_INIT(&softc->ipf_rw, "ipf rw mutex");
 	MUTEX_INIT(&softc->ipf_timeoutlock, "ipf timeout lock");
 	RWLOCK_INIT(&softc->ipf_global, "ipf filter load/unload mutex");
 	RWLOCK_INIT(&softc->ipf_mutex, "ipf filter rwlock");
 	RWLOCK_INIT(&softc->ipf_tokens, "ipf token rwlock");
 	RWLOCK_INIT(&softc->ipf_state, "ipf state rwlock");
 	RWLOCK_INIT(&softc->ipf_nat, "ipf IP NAT rwlock");
 	RWLOCK_INIT(&softc->ipf_poolrw, "ipf pool rwlock");
 	RWLOCK_INIT(&softc->ipf_frag, "ipf frag rwlock");
 
 	softc->ipf_token_head = NULL;
 	softc->ipf_token_tail = &softc->ipf_token_head;
 
 	softc->ipf_tcpidletimeout = FIVE_DAYS;
 	softc->ipf_tcpclosewait = IPF_TTLVAL(2 * TCP_MSL);
 	softc->ipf_tcplastack = IPF_TTLVAL(30);
 	softc->ipf_tcptimewait = IPF_TTLVAL(2 * TCP_MSL);
 	softc->ipf_tcptimeout = IPF_TTLVAL(2 * TCP_MSL);
 	softc->ipf_tcpsynsent = IPF_TTLVAL(2 * TCP_MSL);
 	softc->ipf_tcpsynrecv = IPF_TTLVAL(2 * TCP_MSL);
 	softc->ipf_tcpclosed = IPF_TTLVAL(30);
 	softc->ipf_tcphalfclosed = IPF_TTLVAL(2 * 3600);
 	softc->ipf_udptimeout = IPF_TTLVAL(120);
 	softc->ipf_udpacktimeout = IPF_TTLVAL(12);
 	softc->ipf_icmptimeout = IPF_TTLVAL(60);
 	softc->ipf_icmpacktimeout = IPF_TTLVAL(6);
 	softc->ipf_iptimeout = IPF_TTLVAL(60);
 
 #if defined(IPFILTER_DEFAULT_BLOCK)
 	softc->ipf_pass = FR_BLOCK|FR_NOMATCH;
 #else
 	softc->ipf_pass = (IPF_DEFAULT_PASS)|FR_NOMATCH;
 #endif
 	softc->ipf_minttl = 4;
 	softc->ipf_icmpminfragmtu = 68;
 	softc->ipf_flags = IPF_LOGGING;
 
 	return softc;
 }
 
 /* ------------------------------------------------------------------------ */
 /* Function:   ipf_main_soft_init                                           */
 /* Returns:    0 = success, -1 = failure                                    */
 /* Parameters: softc(I) - pointer to soft context main structure            */
 /*                                                                          */
 /* A null-op function that exists as a placeholder so that the flow in      */
 /* other functions is obvious.                                              */
 /* ------------------------------------------------------------------------ */
 /*ARGSUSED*/
 int
 ipf_main_soft_init(softc)
 	ipf_main_softc_t *softc;
 {
 	return 0;
 }
 
 
 /* ------------------------------------------------------------------------ */
 /* Function:   ipf_main_soft_destroy                                        */
 /* Returns:    void                                                         */
 /* Parameters: softc(I) - pointer to soft context main structure            */
 /*                                                                          */
 /* Undo everything that we did in ipf_main_soft_create.                     */
 /*                                                                          */
 /* The most important check that needs to be made here is whether or not    */
 /* the structure was allocated by ipf_main_soft_create() by checking what   */
 /* value is stored in ipf_dynamic_main.                                     */
 /* ------------------------------------------------------------------------ */
 /*ARGSUSED*/
 void
 ipf_main_soft_destroy(softc)
 	ipf_main_softc_t *softc;
 {
 
 	RW_DESTROY(&softc->ipf_frag);
 	RW_DESTROY(&softc->ipf_poolrw);
 	RW_DESTROY(&softc->ipf_nat);
 	RW_DESTROY(&softc->ipf_state);
 	RW_DESTROY(&softc->ipf_tokens);
 	RW_DESTROY(&softc->ipf_mutex);
 	RW_DESTROY(&softc->ipf_global);
 	MUTEX_DESTROY(&softc->ipf_timeoutlock);
 	MUTEX_DESTROY(&softc->ipf_rw);
 
 	if (softc->ipf_tuners != NULL) {
 		KFREES(softc->ipf_tuners, sizeof(ipf_main_tuneables));
 	}
 	if (softc->ipf_dynamic_softc == 1) {
 		KFREE(softc);
 	}
 }
 
 
 /* ------------------------------------------------------------------------ */
 /* Function:   ipf_main_soft_fini                                           */
 /* Returns:    0 = success, -1 = failure                                    */
 /* Parameters: softc(I) - pointer to soft context main structure            */
 /*                                                                          */
 /* Clean out the rules which have been added since _init was last called,   */
 /* the only dynamic part of the mainline.                                   */
 /* ------------------------------------------------------------------------ */
 int
 ipf_main_soft_fini(softc)
 	ipf_main_softc_t *softc;
 {
 	(void) ipf_flush(softc, IPL_LOGIPF, FR_INQUE|FR_OUTQUE|FR_INACTIVE);
 	(void) ipf_flush(softc, IPL_LOGIPF, FR_INQUE|FR_OUTQUE);
 	(void) ipf_flush(softc, IPL_LOGCOUNT, FR_INQUE|FR_OUTQUE|FR_INACTIVE);
 	(void) ipf_flush(softc, IPL_LOGCOUNT, FR_INQUE|FR_OUTQUE);
 
 	return 0;
 }
 
 
 /* ------------------------------------------------------------------------ */
 /* Function:   ipf_main_load                                                */
 /* Returns:    0 = success, -1 = failure                                    */
 /* Parameters: none                                                         */
 /*                                                                          */
 /* Handle global initialisation that needs to be done for the base part of  */
 /* IPFilter. At present this just amounts to initialising some ICMP lookup  */
 /* arrays that get used by the state/NAT code.                              */
 /* ------------------------------------------------------------------------ */
 int
 ipf_main_load()
 {
 	int i;
 
 	/* fill icmp reply type table */
 	for (i = 0; i <= ICMP_MAXTYPE; i++)
 		icmpreplytype4[i] = -1;
 	icmpreplytype4[ICMP_ECHO] = ICMP_ECHOREPLY;
 	icmpreplytype4[ICMP_TSTAMP] = ICMP_TSTAMPREPLY;
 	icmpreplytype4[ICMP_IREQ] = ICMP_IREQREPLY;
 	icmpreplytype4[ICMP_MASKREQ] = ICMP_MASKREPLY;
 
 #ifdef  USE_INET6
 	/* fill icmp reply type table */
 	for (i = 0; i <= ICMP6_MAXTYPE; i++)
 		icmpreplytype6[i] = -1;
 	icmpreplytype6[ICMP6_ECHO_REQUEST] = ICMP6_ECHO_REPLY;
 	icmpreplytype6[ICMP6_MEMBERSHIP_QUERY] = ICMP6_MEMBERSHIP_REPORT;
 	icmpreplytype6[ICMP6_NI_QUERY] = ICMP6_NI_REPLY;
 	icmpreplytype6[ND_ROUTER_SOLICIT] = ND_ROUTER_ADVERT;
 	icmpreplytype6[ND_NEIGHBOR_SOLICIT] = ND_NEIGHBOR_ADVERT;
 #endif
 
 	return 0;
 }
 
 
 /* ------------------------------------------------------------------------ */
 /* Function:   ipf_main_unload                                              */
 /* Returns:    0 = success, -1 = failure                                    */
 /* Parameters: none                                                         */
 /*                                                                          */
 /* A null-op function that exists as a placeholder so that the flow in      */
 /* other functions is obvious.                                              */
 /* ------------------------------------------------------------------------ */
 int
 ipf_main_unload()
 {
 	return 0;
 }
 
 
 /* ------------------------------------------------------------------------ */
 /* Function:   ipf_load_all                                                 */
 /* Returns:    0 = success, -1 = failure                                    */
 /* Parameters: none                                                         */
 /*                                                                          */
 /* Work through all of the subsystems inside IPFilter and call the load     */
 /* function for each in an order that won't lead to a crash :)              */
 /* ------------------------------------------------------------------------ */
 int
 ipf_load_all()
 {
 	if (ipf_main_load() == -1)
 		return -1;
 
 	if (ipf_state_main_load() == -1)
 		return -1;
 
 	if (ipf_nat_main_load() == -1)
 		return -1;
 
 	if (ipf_frag_main_load() == -1)
 		return -1;
 
 	if (ipf_auth_main_load() == -1)
 		return -1;
 
 	if (ipf_proxy_main_load() == -1)
 		return -1;
 
 	return 0;
 }
 
 
 /* ------------------------------------------------------------------------ */
 /* Function:   ipf_unload_all                                               */
 /* Returns:    0 = success, -1 = failure                                    */
 /* Parameters: none                                                         */
 /*                                                                          */
 /* Work through all of the subsystems inside IPFilter and call the unload   */
 /* function for each in an order that won't lead to a crash :)              */
 /* ------------------------------------------------------------------------ */
 int
 ipf_unload_all()
 {
 	if (ipf_proxy_main_unload() == -1)
 		return -1;
 
 	if (ipf_auth_main_unload() == -1)
 		return -1;
 
 	if (ipf_frag_main_unload() == -1)
 		return -1;
 
 	if (ipf_nat_main_unload() == -1)
 		return -1;
 
 	if (ipf_state_main_unload() == -1)
 		return -1;
 
 	if (ipf_main_unload() == -1)
 		return -1;
 
 	return 0;
 }
 
 
 /* ------------------------------------------------------------------------ */
 /* Function:   ipf_create_all                                               */
 /* Returns:    NULL = failure, else success                                 */
 /* Parameters: arg(I) - pointer to soft context main structure              */
 /*                                                                          */
 /* Work through all of the subsystems inside IPFilter and call the create   */
 /* function for each in an order that won't lead to a crash :)              */
 /* ------------------------------------------------------------------------ */
 ipf_main_softc_t *
 ipf_create_all(arg)
 	void *arg;
 {
 	ipf_main_softc_t *softc;
 
 	softc = ipf_main_soft_create(arg);
 	if (softc == NULL)
 		return NULL;
 
 #ifdef IPFILTER_LOG
 	softc->ipf_log_soft = ipf_log_soft_create(softc);
 	if (softc->ipf_log_soft == NULL) {
 		ipf_destroy_all(softc);
 		return NULL;
 	}
 #endif
 
 	softc->ipf_lookup_soft = ipf_lookup_soft_create(softc);
 	if (softc->ipf_lookup_soft == NULL) {
 		ipf_destroy_all(softc);
 		return NULL;
 	}
 
 	softc->ipf_sync_soft = ipf_sync_soft_create(softc);
 	if (softc->ipf_sync_soft == NULL) {
 		ipf_destroy_all(softc);
 		return NULL;
 	}
 
 	softc->ipf_state_soft = ipf_state_soft_create(softc);
 	if (softc->ipf_state_soft == NULL) {
 		ipf_destroy_all(softc);
 		return NULL;
 	}
 
 	softc->ipf_nat_soft = ipf_nat_soft_create(softc);
 	if (softc->ipf_nat_soft == NULL) {
 		ipf_destroy_all(softc);
 		return NULL;
 	}
 
 	softc->ipf_frag_soft = ipf_frag_soft_create(softc);
 	if (softc->ipf_frag_soft == NULL) {
 		ipf_destroy_all(softc);
 		return NULL;
 	}
 
 	softc->ipf_auth_soft = ipf_auth_soft_create(softc);
 	if (softc->ipf_auth_soft == NULL) {
 		ipf_destroy_all(softc);
 		return NULL;
 	}
 
 	softc->ipf_proxy_soft = ipf_proxy_soft_create(softc);
 	if (softc->ipf_proxy_soft == NULL) {
 		ipf_destroy_all(softc);
 		return NULL;
 	}
 
 	return softc;
 }
 
 
 /* ------------------------------------------------------------------------ */
 /* Function:   ipf_destroy_all                                              */
 /* Returns:    void                                                         */
 /* Parameters: softc(I) - pointer to soft context main structure            */
 /*                                                                          */
 /* Work through all of the subsystems inside IPFilter and call the destroy  */
 /* function for each in an order that won't lead to a crash :)              */
 /*                                                                          */
 /* Every one of these functions is expected to succeed, so there is no      */
 /* checking of return values.                                               */
 /* ------------------------------------------------------------------------ */
 void
 ipf_destroy_all(softc)
 	ipf_main_softc_t *softc;
 {
 
 	if (softc->ipf_state_soft != NULL) {
 		ipf_state_soft_destroy(softc, softc->ipf_state_soft);
 		softc->ipf_state_soft = NULL;
 	}
 
 	if (softc->ipf_nat_soft != NULL) {
 		ipf_nat_soft_destroy(softc, softc->ipf_nat_soft);
 		softc->ipf_nat_soft = NULL;
 	}
 
 	if (softc->ipf_frag_soft != NULL) {
 		ipf_frag_soft_destroy(softc, softc->ipf_frag_soft);
 		softc->ipf_frag_soft = NULL;
 	}
 
 	if (softc->ipf_auth_soft != NULL) {
 		ipf_auth_soft_destroy(softc, softc->ipf_auth_soft);
 		softc->ipf_auth_soft = NULL;
 	}
 
 	if (softc->ipf_proxy_soft != NULL) {
 		ipf_proxy_soft_destroy(softc, softc->ipf_proxy_soft);
 		softc->ipf_proxy_soft = NULL;
 	}
 
 	if (softc->ipf_sync_soft != NULL) {
 		ipf_sync_soft_destroy(softc, softc->ipf_sync_soft);
 		softc->ipf_sync_soft = NULL;
 	}
 
 	if (softc->ipf_lookup_soft != NULL) {
 		ipf_lookup_soft_destroy(softc, softc->ipf_lookup_soft);
 		softc->ipf_lookup_soft = NULL;
 	}
 
 #ifdef IPFILTER_LOG
 	if (softc->ipf_log_soft != NULL) {
 		ipf_log_soft_destroy(softc, softc->ipf_log_soft);
 		softc->ipf_log_soft = NULL;
 	}
 #endif
 
 	ipf_main_soft_destroy(softc);
 }
 
 
 /* ------------------------------------------------------------------------ */
 /* Function:   ipf_init_all                                                 */
 /* Returns:    0 = success, -1 = failure                                    */
 /* Parameters: softc(I) - pointer to soft context main structure            */
 /*                                                                          */
 /* Work through all of the subsystems inside IPFilter and call the init     */
 /* function for each in an order that won't lead to a crash :)              */
 /* ------------------------------------------------------------------------ */
 int
 ipf_init_all(softc)
 	ipf_main_softc_t *softc;
 {
 
 	if (ipf_main_soft_init(softc) == -1)
 		return -1;
 
 #ifdef IPFILTER_LOG
 	if (ipf_log_soft_init(softc, softc->ipf_log_soft) == -1)
 		return -1;
 #endif
 
 	if (ipf_lookup_soft_init(softc, softc->ipf_lookup_soft) == -1)
 		return -1;
 
 	if (ipf_sync_soft_init(softc, softc->ipf_sync_soft) == -1)
 		return -1;
 
 	if (ipf_state_soft_init(softc, softc->ipf_state_soft) == -1)
 		return -1;
 
 	if (ipf_nat_soft_init(softc, softc->ipf_nat_soft) == -1)
 		return -1;
 
 	if (ipf_frag_soft_init(softc, softc->ipf_frag_soft) == -1)
 		return -1;
 
 	if (ipf_auth_soft_init(softc, softc->ipf_auth_soft) == -1)
 		return -1;
 
 	if (ipf_proxy_soft_init(softc, softc->ipf_proxy_soft) == -1)
 		return -1;
 
 	return 0;
 }
 
 
 /* ------------------------------------------------------------------------ */
 /* Function:   ipf_fini_all                                                 */
 /* Returns:    0 = success, -1 = failure                                    */
 /* Parameters: softc(I) - pointer to soft context main structure            */
 /*                                                                          */
 /* Work through all of the subsystems inside IPFilter and call the fini     */
 /* function for each in an order that won't lead to a crash :)              */
 /* ------------------------------------------------------------------------ */
 int
 ipf_fini_all(softc)
 	ipf_main_softc_t *softc;
 {
 
 	ipf_token_flush(softc);
 
 	if (ipf_proxy_soft_fini(softc, softc->ipf_proxy_soft) == -1)
 		return -1;
 
 	if (ipf_auth_soft_fini(softc, softc->ipf_auth_soft) == -1)
 		return -1;
 
 	if (ipf_frag_soft_fini(softc, softc->ipf_frag_soft) == -1)
 		return -1;
 
 	if (ipf_nat_soft_fini(softc, softc->ipf_nat_soft) == -1)
 		return -1;
 
 	if (ipf_state_soft_fini(softc, softc->ipf_state_soft) == -1)
 		return -1;
 
 	if (ipf_sync_soft_fini(softc, softc->ipf_sync_soft) == -1)
 		return -1;
 
 	if (ipf_lookup_soft_fini(softc, softc->ipf_lookup_soft) == -1)
 		return -1;
 
 #ifdef IPFILTER_LOG
 	if (ipf_log_soft_fini(softc, softc->ipf_log_soft) == -1)
 		return -1;
 #endif
 
 	if (ipf_main_soft_fini(softc) == -1)
 		return -1;
 
 	return 0;
 }
 
 
 /* ------------------------------------------------------------------------ */
 /* Function:    ipf_rule_expire                                             */
 /* Returns:     Nil                                                         */
 /* Parameters:  softc(I) - pointer to soft context main structure           */
 /*                                                                          */
 /* At present this function exists just to support temporary addition of    */
 /* firewall rules. Both inactive and active lists are scanned for items to  */
 /* purge, as by rights, the expiration is computed as soon as the rule is   */
 /* loaded in.                                                               */
 /* ------------------------------------------------------------------------ */
 void
 ipf_rule_expire(softc)
 	ipf_main_softc_t *softc;
 {
 	frentry_t *fr;
 
 	if ((softc->ipf_rule_explist[0] == NULL) &&
 	    (softc->ipf_rule_explist[1] == NULL))
 		return;
 
 	WRITE_ENTER(&softc->ipf_mutex);
 
 	while ((fr = softc->ipf_rule_explist[0]) != NULL) {
 		/*
 		 * Because the list is kept sorted on insertion, the fist
 		 * one that dies in the future means no more work to do.
 		 */
 		if (fr->fr_die > softc->ipf_ticks)
 			break;
 		ipf_rule_delete(softc, fr, IPL_LOGIPF, 0);
 	}
 
 	while ((fr = softc->ipf_rule_explist[1]) != NULL) {
 		/*
 		 * Because the list is kept sorted on insertion, the fist
 		 * one that dies in the future means no more work to do.
 		 */
 		if (fr->fr_die > softc->ipf_ticks)
 			break;
 		ipf_rule_delete(softc, fr, IPL_LOGIPF, 1);
 	}
 
 	RWLOCK_EXIT(&softc->ipf_mutex);
 }
 
 
 static int ipf_ht_node_cmp __P((struct host_node_s *, struct host_node_s *));
 static void ipf_ht_node_make_key __P((host_track_t *, host_node_t *, int,
 				      i6addr_t *));
 
 host_node_t RBI_ZERO(ipf_rb);
 RBI_CODE(ipf_rb, host_node_t, hn_entry, ipf_ht_node_cmp)
 
 
 /* ------------------------------------------------------------------------ */
 /* Function:    ipf_ht_node_cmp                                             */
 /* Returns:     int   - 0 == nodes are the same, ..                         */
 /* Parameters:  k1(I) - pointer to first key to compare                     */
 /*              k2(I) - pointer to second key to compare                    */
 /*                                                                          */
 /* The "key" for the node is a combination of two fields: the address       */
 /* family and the address itself.                                           */
 /*                                                                          */
 /* Because we're not actually interpreting the address data, it isn't       */
 /* necessary to convert them to/from network/host byte order. The mask is   */
 /* just used to remove bits that aren't significant - it doesn't matter     */
 /* where they are, as long as they're always in the same place.             */
 /*                                                                          */
 /* As with IP6_EQ, comparing IPv6 addresses starts at the bottom because    */
 /* this is where individual ones will differ the most - but not true for    */
 /* for /48's, etc.                                                          */
 /* ------------------------------------------------------------------------ */
 static int
 ipf_ht_node_cmp(k1, k2)
 	struct host_node_s *k1, *k2;
 {
 	int i;
 
 	i = (k2->hn_addr.adf_family - k1->hn_addr.adf_family);
 	if (i != 0)
 		return i;
 
 	if (k1->hn_addr.adf_family == AF_INET)
 		return (k2->hn_addr.adf_addr.in4.s_addr -
 			k1->hn_addr.adf_addr.in4.s_addr);
 
 	i = k2->hn_addr.adf_addr.i6[3] - k1->hn_addr.adf_addr.i6[3];
 	if (i != 0)
 		return i;
 	i = k2->hn_addr.adf_addr.i6[2] - k1->hn_addr.adf_addr.i6[2];
 	if (i != 0)
 		return i;
 	i = k2->hn_addr.adf_addr.i6[1] - k1->hn_addr.adf_addr.i6[1];
 	if (i != 0)
 		return i;
 	i = k2->hn_addr.adf_addr.i6[0] - k1->hn_addr.adf_addr.i6[0];
 	return i;
 }
 
 
 /* ------------------------------------------------------------------------ */
 /* Function:    ipf_ht_node_make_key                                        */
 /* Returns:     Nil                                                         */
 /* parameters:  htp(I)    - pointer to address tracking structure           */
 /*              key(I)    - where to store masked address for lookup        */
 /*              family(I) - protocol family of address                      */
 /*              addr(I)   - pointer to network address                      */
 /*                                                                          */
 /* Using the "netmask" (number of bits) stored parent host tracking struct, */
 /* copy the address passed in into the key structure whilst masking out the */
 /* bits that we don't want.                                                 */
 /*                                                                          */
 /* Because the parser will set ht_netmask to 128 if there is no protocol    */
 /* specified (the parser doesn't know if it should be a v4 or v6 rule), we  */
 /* have to be wary of that and not allow 32-128 to happen.                  */
 /* ------------------------------------------------------------------------ */
 static void
 ipf_ht_node_make_key(htp, key, family, addr)
 	host_track_t *htp;
 	host_node_t *key;
 	int family;
 	i6addr_t *addr;
 {
 	key->hn_addr.adf_family = family;
 	if (family == AF_INET) {
 		u_32_t mask;
 		int bits;
 
 		key->hn_addr.adf_len = sizeof(key->hn_addr.adf_addr.in4);
 		bits = htp->ht_netmask;
 		if (bits >= 32) {
 			mask = 0xffffffff;
 		} else {
 			mask = htonl(0xffffffff << (32 - bits));
 		}
 		key->hn_addr.adf_addr.in4.s_addr = addr->in4.s_addr & mask;
 #ifdef USE_INET6
 	} else {
 		int bits = htp->ht_netmask;
 
 		key->hn_addr.adf_len = sizeof(key->hn_addr.adf_addr.in6);
 		if (bits > 96) {
 			key->hn_addr.adf_addr.i6[3] = addr->i6[3] &
 					     htonl(0xffffffff << (128 - bits));
 			key->hn_addr.adf_addr.i6[2] = addr->i6[2];
 			key->hn_addr.adf_addr.i6[1] = addr->i6[2];
 			key->hn_addr.adf_addr.i6[0] = addr->i6[2];
 		} else if (bits > 64) {
 			key->hn_addr.adf_addr.i6[3] = 0;
 			key->hn_addr.adf_addr.i6[2] = addr->i6[2] &
 					     htonl(0xffffffff << (96 - bits));
 			key->hn_addr.adf_addr.i6[1] = addr->i6[1];
 			key->hn_addr.adf_addr.i6[0] = addr->i6[0];
 		} else if (bits > 32) {
 			key->hn_addr.adf_addr.i6[3] = 0;
 			key->hn_addr.adf_addr.i6[2] = 0;
 			key->hn_addr.adf_addr.i6[1] = addr->i6[1] &
 					     htonl(0xffffffff << (64 - bits));
 			key->hn_addr.adf_addr.i6[0] = addr->i6[0];
 		} else {
 			key->hn_addr.adf_addr.i6[3] = 0;
 			key->hn_addr.adf_addr.i6[2] = 0;
 			key->hn_addr.adf_addr.i6[1] = 0;
 			key->hn_addr.adf_addr.i6[0] = addr->i6[0] &
 					     htonl(0xffffffff << (32 - bits));
 		}
 #endif
 	}
 }
 
 
 /* ------------------------------------------------------------------------ */
 /* Function:    ipf_ht_node_add                                             */
 /* Returns:     int       - 0 == success,  -1 == failure                    */
 /* Parameters:  softc(I)  - pointer to soft context main structure          */
 /*              htp(I)    - pointer to address tracking structure           */
 /*              family(I) - protocol family of address                      */
 /*              addr(I)   - pointer to network address                      */
 /*                                                                          */
 /* NOTE: THIS FUNCTION MUST BE CALLED WITH AN EXCLUSIVE LOCK THAT PREVENTS  */
 /*       ipf_ht_node_del FROM RUNNING CONCURRENTLY ON THE SAME htp.         */
 /*                                                                          */
 /* After preparing the key with the address information to find, look in    */
 /* the red-black tree to see if the address is known. A successful call to  */
 /* this function can mean one of two things: a new node was added to the    */
 /* tree or a matching node exists and we're able to bump up its activity.   */
 /* ------------------------------------------------------------------------ */
 int
 ipf_ht_node_add(softc, htp, family, addr)
 	ipf_main_softc_t *softc;
 	host_track_t *htp;
 	int family;
 	i6addr_t *addr;
 {
 	host_node_t *h;
 	host_node_t k;
 
 	ipf_ht_node_make_key(htp, &k, family, addr);
 
 	h = RBI_SEARCH(ipf_rb, &htp->ht_root, &k);
 	if (h == NULL) {
 		if (htp->ht_cur_nodes >= htp->ht_max_nodes)
 			return -1;
 		KMALLOC(h, host_node_t *);
 		if (h == NULL) {
 			DT(ipf_rb_no_mem);
 			LBUMP(ipf_rb_no_mem);
 			return -1;
 		}
 
 		/*
 		 * If there was a macro to initialise the RB node then that
 		 * would get used here, but there isn't...
 		 */
 		bzero((char *)h, sizeof(*h));
 		h->hn_addr = k.hn_addr;
 		h->hn_addr.adf_family = k.hn_addr.adf_family;
 		RBI_INSERT(ipf_rb, &htp->ht_root, h);
 		htp->ht_cur_nodes++;
 	} else {
 		if ((htp->ht_max_per_node != 0) &&
 		    (h->hn_active >= htp->ht_max_per_node)) {
 			DT(ipf_rb_node_max);
 			LBUMP(ipf_rb_node_max);
 			return -1;
 		}
 	}
 
 	h->hn_active++;
 
 	return 0;
 }
 
 
 /* ------------------------------------------------------------------------ */
 /* Function:    ipf_ht_node_del                                             */
 /* Returns:     int       - 0 == success,  -1 == failure                    */
 /* parameters:  htp(I)    - pointer to address tracking structure           */
 /*              family(I) - protocol family of address                      */
 /*              addr(I)   - pointer to network address                      */
 /*                                                                          */
 /* NOTE: THIS FUNCTION MUST BE CALLED WITH AN EXCLUSIVE LOCK THAT PREVENTS  */
 /*       ipf_ht_node_add FROM RUNNING CONCURRENTLY ON THE SAME htp.         */
 /*                                                                          */
 /* Try and find the address passed in amongst the leavese on this tree to   */
 /* be friend. If found then drop the active account for that node drops by  */
 /* one. If that count reaches 0, it is time to free it all up.              */
 /* ------------------------------------------------------------------------ */
 int
 ipf_ht_node_del(htp, family, addr)
 	host_track_t *htp;
 	int family;
 	i6addr_t *addr;
 {
 	host_node_t *h;
 	host_node_t k;
 
 	ipf_ht_node_make_key(htp, &k, family, addr);
 
 	h = RBI_SEARCH(ipf_rb, &htp->ht_root, &k);
 	if (h == NULL) {
 		return -1;
 	} else {
 		h->hn_active--;
 		if (h->hn_active == 0) {
 			(void) RBI_DELETE(ipf_rb, &htp->ht_root, h);
 			htp->ht_cur_nodes--;
 			KFREE(h);
 		}
 	}
 
 	return 0;
 }
 
 
 /* ------------------------------------------------------------------------ */
 /* Function:    ipf_rb_ht_init                                              */
 /* Returns:     Nil                                                         */
 /* Parameters:  head(I) - pointer to host tracking structure                */
 /*                                                                          */
 /* Initialise the host tracking structure to be ready for use above.        */
 /* ------------------------------------------------------------------------ */
 void
 ipf_rb_ht_init(head)
 	host_track_t *head;
 {
 	RBI_INIT(ipf_rb, &head->ht_root);
 }
 
 
 /* ------------------------------------------------------------------------ */
 /* Function:    ipf_rb_ht_freenode                                          */
 /* Returns:     Nil                                                         */
 /* Parameters:  head(I) - pointer to host tracking structure                */
 /*              arg(I)  - additional argument from walk caller              */
 /*                                                                          */
 /* Free an actual host_node_t structure.                                    */
 /* ------------------------------------------------------------------------ */
 void
 ipf_rb_ht_freenode(node, arg)
 	host_node_t *node;
 	void *arg;
 {
 	KFREE(node);
 }
 
 
 /* ------------------------------------------------------------------------ */
 /* Function:    ipf_rb_ht_flush                                             */
 /* Returns:     Nil                                                         */
 /* Parameters:  head(I) - pointer to host tracking structure                */
 /*                                                                          */
 /* Remove all of the nodes in the tree tracking hosts by calling a walker   */
 /* and free'ing each one.                                                   */
 /* ------------------------------------------------------------------------ */
 void
 ipf_rb_ht_flush(head)
 	host_track_t *head;
 {
 	RBI_WALK(ipf_rb, &head->ht_root, ipf_rb_ht_freenode, NULL);
 }
 
 
 /* ------------------------------------------------------------------------ */
 /* Function:    ipf_slowtimer                                               */
 /* Returns:     Nil                                                         */
 /* Parameters:  ptr(I) - pointer to main ipf soft context structure         */
 /*                                                                          */
 /* Slowly expire held state for fragments.  Timeouts are set * in           */
 /* expectation of this being called twice per second.                       */
 /* ------------------------------------------------------------------------ */
 void
 ipf_slowtimer(softc)
 	ipf_main_softc_t *softc;
 {
 
 	ipf_token_expire(softc);
 	ipf_frag_expire(softc);
 	ipf_state_expire(softc);
 	ipf_nat_expire(softc);
 	ipf_auth_expire(softc);
 	ipf_lookup_expire(softc);
 	ipf_rule_expire(softc);
 	ipf_sync_expire(softc);
 	softc->ipf_ticks++;
 #   if defined(__OpenBSD__)
 	timeout_add(&ipf_slowtimer_ch, hz/2);
 #   endif
 }
 
 
 /* ------------------------------------------------------------------------ */
 /* Function:    ipf_inet_mask_add                                           */
 /* Returns:     Nil                                                         */
 /* Parameters:  bits(I) - pointer to nat context information                */
 /*              mtab(I) - pointer to mask hash table structure              */
 /*                                                                          */
 /* When called, bits represents the mask of a new NAT rule that has just    */
 /* been added. This function inserts a bitmask into the array of masks to   */
 /* search when searching for a matching NAT rule for a packet.              */
 /* Prevention of duplicate masks is achieved by checking the use count for  */
 /* a given netmask.                                                         */
 /* ------------------------------------------------------------------------ */
 void
 ipf_inet_mask_add(bits, mtab)
 	int bits;
 	ipf_v4_masktab_t *mtab;
 {
 	u_32_t mask;
 	int i, j;
 
 	mtab->imt4_masks[bits]++;
 	if (mtab->imt4_masks[bits] > 1)
 		return;
 
 	if (bits == 0)
 		mask = 0;
 	else
 		mask = 0xffffffff << (32 - bits);
 
 	for (i = 0; i < 33; i++) {
 		if (ntohl(mtab->imt4_active[i]) < mask) {
 			for (j = 32; j > i; j--)
 				mtab->imt4_active[j] = mtab->imt4_active[j - 1];
 			mtab->imt4_active[i] = htonl(mask);
 			break;
 		}
 	}
 	mtab->imt4_max++;
 }
 
 
 /* ------------------------------------------------------------------------ */
 /* Function:    ipf_inet_mask_del                                           */
 /* Returns:     Nil                                                         */
 /* Parameters:  bits(I) - number of bits set in the netmask                 */
 /*              mtab(I) - pointer to mask hash table structure              */
 /*                                                                          */
 /* Remove the 32bit bitmask represented by "bits" from the collection of    */
 /* netmasks stored inside of mtab.                                          */
 /* ------------------------------------------------------------------------ */
 void
 ipf_inet_mask_del(bits, mtab)
 	int bits;
 	ipf_v4_masktab_t *mtab;
 {
 	u_32_t mask;
 	int i, j;
 
 	mtab->imt4_masks[bits]--;
 	if (mtab->imt4_masks[bits] > 0)
 		return;
 
 	mask = htonl(0xffffffff << (32 - bits));
 	for (i = 0; i < 33; i++) {
 		if (mtab->imt4_active[i] == mask) {
 			for (j = i + 1; j < 33; j++)
 				mtab->imt4_active[j - 1] = mtab->imt4_active[j];
 			break;
 		}
 	}
 	mtab->imt4_max--;
 	ASSERT(mtab->imt4_max >= 0);
 }
 
 
 #ifdef USE_INET6
 /* ------------------------------------------------------------------------ */
 /* Function:    ipf_inet6_mask_add                                          */
 /* Returns:     Nil                                                         */
 /* Parameters:  bits(I) - number of bits set in mask                        */
 /*              mask(I) - pointer to mask to add                            */
 /*              mtab(I) - pointer to mask hash table structure              */
 /*                                                                          */
 /* When called, bitcount represents the mask of a IPv6 NAT map rule that    */
 /* has just been added. This function inserts a bitmask into the array of   */
 /* masks to search when searching for a matching NAT rule for a packet.     */
 /* Prevention of duplicate masks is achieved by checking the use count for  */
 /* a given netmask.                                                         */
 /* ------------------------------------------------------------------------ */
 void
 ipf_inet6_mask_add(bits, mask, mtab)
 	int bits;
 	i6addr_t *mask;
 	ipf_v6_masktab_t *mtab;
 {
 	i6addr_t zero;
 	int i, j;
 
 	mtab->imt6_masks[bits]++;
 	if (mtab->imt6_masks[bits] > 1)
 		return;
 
 	if (bits == 0) {
 		mask = &zero;
 		zero.i6[0] = 0;
 		zero.i6[1] = 0;
 		zero.i6[2] = 0;
 		zero.i6[3] = 0;
 	}
 
 	for (i = 0; i < 129; i++) {
 		if (IP6_LT(&mtab->imt6_active[i], mask)) {
 			for (j = 128; j > i; j--)
 				mtab->imt6_active[j] = mtab->imt6_active[j - 1];
 			mtab->imt6_active[i] = *mask;
 			break;
 		}
 	}
 	mtab->imt6_max++;
 }
 
 
 /* ------------------------------------------------------------------------ */
 /* Function:    ipf_inet6_mask_del                                          */
 /* Returns:     Nil                                                         */
 /* Parameters:  bits(I) - number of bits set in mask                        */
 /*              mask(I) - pointer to mask to remove                         */
 /*              mtab(I) - pointer to mask hash table structure              */
 /*                                                                          */
 /* Remove the 128bit bitmask represented by "bits" from the collection of   */
 /* netmasks stored inside of mtab.                                          */
 /* ------------------------------------------------------------------------ */
 void
 ipf_inet6_mask_del(bits, mask, mtab)
 	int bits;
 	i6addr_t *mask;
 	ipf_v6_masktab_t *mtab;
 {
 	i6addr_t zero;
 	int i, j;
 
 	mtab->imt6_masks[bits]--;
 	if (mtab->imt6_masks[bits] > 0)
 		return;
 
 	if (bits == 0)
 		mask = &zero;
 	zero.i6[0] = 0;
 	zero.i6[1] = 0;
 	zero.i6[2] = 0;
 	zero.i6[3] = 0;
 
 	for (i = 0; i < 129; i++) {
 		if (IP6_EQ(&mtab->imt6_active[i], mask)) {
 			for (j = i + 1; j < 129; j++) {
 				mtab->imt6_active[j - 1] = mtab->imt6_active[j];
 				if (IP6_EQ(&mtab->imt6_active[j - 1], &zero))
 					break;
 			}
 			break;
 		}
 	}
 	mtab->imt6_max--;
 	ASSERT(mtab->imt6_max >= 0);
 }
 #endif
Index: head/sys/contrib/ipfilter/netinet/ip_compat.h
===================================================================
--- head/sys/contrib/ipfilter/netinet/ip_compat.h	(revision 305823)
+++ head/sys/contrib/ipfilter/netinet/ip_compat.h	(revision 305824)
@@ -1,1497 +1,1497 @@
 /*
  * Copyright (C) 2012 by Darren Reed.
  *
  * See the IPFILTER.LICENCE file for details on licencing.
  *
  * @(#)ip_compat.h	1.8 1/14/96
  * $FreeBSD$
  * Id: ip_compat.h,v 2.142.2.57 2007/10/10 09:51:42 darrenr Exp $
  */
 
 #ifndef	__IP_COMPAT_H__
 #define	__IP_COMPAT_H__
 
 #ifndef	__P
 # ifdef	__STDC__
 #  define	__P(x)  x
 # else
 #  define	__P(x)  ()
 # endif
 #endif
 #ifndef	__STDC__
 # undef		const
 # define	const
 #endif
 
 #if defined(_KERNEL) || defined(KERNEL) || defined(__KERNEL__)
 # undef	KERNEL
 # undef	_KERNEL
 # undef 	__KERNEL__
 # define	KERNEL
 # define	_KERNEL
 # define 	__KERNEL__
 #endif
 
 #ifndef	SOLARIS
 # if defined(sun) && (defined(__svr4__) || defined(__SVR4))
 #  define	SOLARIS		1
 # else
 #  define	SOLARIS		0
 # endif
 #endif
 
 
 #if defined(__SVR4) || defined(__svr4__) || defined(__sgi)
 # define index   strchr
 # if !defined(_KERNEL)
 #  define	bzero(a,b)	memset(a,0,b)
 #  define	bcmp		memcmp
 #  define	bcopy(a,b,c)	memmove(b,a,c)
 # endif
 #endif
 
 #ifndef LIFNAMSIZ
 # ifdef IF_NAMESIZE
 #  define	LIFNAMSIZ	IF_NAMESIZE
 # else
 #  ifdef	IFNAMSIZ
 #   define	LIFNAMSIZ	IFNAMSIZ
 #  else
 #   define	LIFNAMSIZ	16
 #  endif
 # endif
 #endif
 
 #if defined(__sgi) || defined(bsdi) || defined(__hpux) || defined(hpux)
 struct  ether_addr {
         u_char  ether_addr_octet[6];
 };
 #endif
 
 # ifdef __STDC__
 #  define IPL_EXTERN(ep) ipl##ep
 # else
 #  define IPL_EXTERN(ep) ipl/**/ep
 # endif
 
 /*
  * This is a workaround for <sys/uio.h> troubles on FreeBSD and OpenBSD.
  */
 # ifndef _KERNEL
 #  define ADD_KERNEL
 #  define _KERNEL
 #  define KERNEL
 # endif
 # include <sys/uio.h>
 # ifdef ADD_KERNEL
 #  undef _KERNEL
 #  undef KERNEL
 # endif
 
 #define	NETBSD_GE_REV(x)	(defined(__NetBSD_Version__) && \
 				 (__NetBSD_Version__ >= (x)))
 #define	NETBSD_GT_REV(x)	(defined(__NetBSD_Version__) && \
 				 (__NetBSD_Version__ > (x)))
 #define	NETBSD_LT_REV(x)	(defined(__NetBSD_Version__) && \
 				 (__NetBSD_Version__ < (x)))
 #define	FREEBSD_GE_REV(x)	(defined(__FreeBSD_version) && \
 				 (__FreeBSD_version >= (x)))
 #define	FREEBSD_GT_REV(x)	(defined(__FreeBSD_version) && \
 				 (__FreeBSD_version > (x)))
 #define	FREEBSD_LT_REV(x)	(defined(__FreeBSD_version) && \
 				 (__FreeBSD_version < (x)))
 #define	BSDOS_GE_REV(x)		(defined(_BSDI_VERSION) && \
 				 (_BSDI_VERSION >= (x)))
 #define	BSDOS_GT_REV(x)		(defined(_BSDI_VERSION) && \
 				 (_BSDI_VERSION > (x)))
 #define	BSDOS_LT_REV(x)		(defined(_BSDI_VERSION) && \
 				 (_BSDI_VERSION < (x)))
 #define	OPENBSD_GE_REV(x)	(defined(OpenBSD) && (OpenBSD >= (x)))
 #define	OPENBSD_GT_REV(x)	(defined(OpenBSD) && (OpenBSD > (x)))
 #define	OPENBSD_LT_REV(x)	(defined(OpenBSD) && (OpenBSD < (x)))
 #define	BSD_GE_YEAR(x)		(defined(BSD) && (BSD >= (x)))
 #define	BSD_GT_YEAR(x)		(defined(BSD) && (BSD > (x)))
 #define	BSD_LT_YEAR(x)		(defined(BSD) && (BSD < (x)))
 
 
 /* ----------------------------------------------------------------------- */
 /*                                F R E E B S D                            */
 /* ----------------------------------------------------------------------- */
 # define HAS_SYS_MD5_H	1
 # if defined(_KERNEL)
 #   include "opt_bpf.h"
 #   include "opt_inet6.h"
 #  if defined(INET6) && !defined(USE_INET6)
 #   define USE_INET6
 #  endif
 # else
 #  if !defined(USE_INET6) && !defined(NOINET6)
 #   define	USE_INET6
 #  endif
 # endif
 
 # if defined(_KERNEL)
 #  include <netinet/ip_var.h>
 #    define	p_cred	td_ucred
 #    define	p_uid	td_ucred->cr_ruid
 
 /*
  * When #define'd, the 5.2.1 kernel panics when used with the ftp proxy.
  * There may be other, safe, kernels but this is not extensively tested yet.
  */
 #   define HAVE_M_PULLDOWN
 #  if !defined(IPFILTER_LKM) && (__FreeBSD_version >= 300000)
 #   include "opt_ipfilter.h"
 #  endif
 #  define	COPYIN(a,b,c)	copyin((caddr_t)(a), (caddr_t)(b), (c))
 #  define	COPYOUT(a,b,c)	copyout((caddr_t)(a), (caddr_t)(b), (c))
 
 #   define NETBSD_PF
 # else
 #  include <inttypes.h>
 # endif /* _KERNEL */
 
 #  include <sys/selinfo.h>
 #  include <sys/lock.h>
 #  include <sys/malloc.h>
 #  include <sys/mutex.h>
 #    define	KRWLOCK_FILL_SZ		56
 #    define	KMUTEX_FILL_SZ		56
 #   include <sys/rwlock.h>
 #   define	KMUTEX_T		struct mtx
 #   define	KRWLOCK_T		struct rwlock
 
 #ifdef _KERNEL
 #    define	READ_ENTER(x)		rw_rlock(&(x)->ipf_lk)
 #    define	WRITE_ENTER(x)		rw_wlock(&(x)->ipf_lk)
 #    define	MUTEX_DOWNGRADE(x)	rw_downgrade(&(x)->ipf_lk)
 #    define	RWLOCK_INIT(x,y)	rw_init(&(x)->ipf_lk, (y))
 #    define	RW_DESTROY(x)		rw_destroy(&(x)->ipf_lk)
 #    define	RWLOCK_EXIT(x)		do { \
 					    if (rw_wowned(&(x)->ipf_lk)) \
 					    	rw_wunlock(&(x)->ipf_lk); \
 					    else \
 						rw_runlock(&(x)->ipf_lk); \
 					} while (0)
 #  include <net/if_var.h>
 #  define	GETKTIME(x)	microtime((struct timeval *)x)
 
 #   include <netinet/in_systm.h>
 #   include <netinet/ip.h>
 #   include <machine/in_cksum.h>
 
 #   define	USE_MUTEXES
 #   define	MUTEX_ENTER(x)		mtx_lock(&(x)->ipf_lk)
 #   define	MUTEX_EXIT(x)		mtx_unlock(&(x)->ipf_lk)
 #   define	MUTEX_INIT(x,y)		mtx_init(&(x)->ipf_lk, (y), NULL,\
 						 MTX_DEF)
 #   define	MUTEX_DESTROY(x)	mtx_destroy(&(x)->ipf_lk)
 #   define	MUTEX_NUKE(x)		bzero((x), sizeof(*(x)))
 /*
  * Whilst the sx(9) locks on FreeBSD have the right semantics and interface
  * for what we want to use them for, despite testing showing they work -
  * with a WITNESS kernel, it generates LOR messages.
  */
 #   include <machine/atomic.h>
 #   define	ATOMIC_INC(x)		{ mtx_lock(&softc->ipf_rw.ipf_lk); (x)++; \
 					  mtx_unlock(&softc->ipf_rw.ipf_lk); }
 #   define	ATOMIC_DEC(x)		{ mtx_lock(&softc->ipf_rw.ipf_lk); (x)--; \
 					  mtx_unlock(&softc->ipf_rw.ipf_lk); }
 #   define	ATOMIC_INCL(x)		atomic_add_long(&(x), 1)
 #   define	ATOMIC_INC64(x)		ATOMIC_INC(x)
 #   define	ATOMIC_INC32(x)		atomic_add_32((u_int *)&(x), 1)
 #   define	ATOMIC_DECL(x)		atomic_add_long(&(x), -1)
 #   define	ATOMIC_DEC64(x)		ATOMIC_DEC(x)
 #   define	ATOMIC_DEC32(x)		atomic_add_32((u_int *)&(x), -1)
 #   define	SPL_X(x)	;
 #   define	SPL_NET(x)	;
 #   define	SPL_IMP(x)	;
 #   define	SPL_SCHED(x)	;
 #   define	GET_MINOR		dev2unit
 #  define	MSGDSIZE(m)	mbufchainlen(m)
 #  define	M_LEN(m)	(m)->m_len
 #  define	M_ADJ(m,x)	m_adj(m, x)
-#  define	M_COPY(x)	m_copy((x), 0, M_COPYALL)
+#  define	M_COPYM(x)	m_copym((x), 0, M_COPYALL, M_NOWAIT)
 #  define	M_DUP(m)	m_dup(m, M_NOWAIT)
 #  define	IPF_PANIC(x,y)	if (x) { printf y; panic("ipf_panic"); }
 typedef struct mbuf mb_t;
 
 #else	/* !_KERNEL */
 #ifndef _NET_IF_VAR_H_
 /*
  * Userland emulation of struct ifnet.
  */
 struct route;
 struct mbuf;
 struct ifnet {
 	char			if_xname[IFNAMSIZ];
 	TAILQ_HEAD(, ifaddr)	if_addrlist;
 	int	(*if_output)(struct ifnet *, struct mbuf *,
 	    const struct sockaddr *, struct route *);
 };
 #endif /* _NET_IF_VAR_H_ */
 #endif /* _KERNEL */
 
 #  define	IFNAME(x)	((struct ifnet *)x)->if_xname
 #  define	COPYIFNAME(v, x, b) \
 				(void) strncpy(b, \
 					       ((struct ifnet *)x)->if_xname, \
 					       LIFNAMSIZ)
 
 typedef	u_long		ioctlcmd_t;
 typedef	struct uio	uio_t;
 typedef	int		minor_t;
 typedef	u_int32_t	u_32_t;
 # define	U_32_T	1
 
 
 /* ----------------------------------------------------------------------- */
 /*                           G E N E R I C                                 */
 /* ----------------------------------------------------------------------- */
 
 /*
  * For BSD kernels, if bpf is in the kernel, enable ipfilter to use bpf in
  * filter rules.
  */
 #if !defined(IPFILTER_BPF)
 # if (defined(NBPF) && (NBPF > 0)) || (defined(DEV_BPF) && (DEV_BPF > 0)) || \
      (defined(NBPFILTER) && (NBPFILTER > 0))
 #  define	IPFILTER_BPF
 # endif
 #endif
 
 /*
  * Userland locking primitives
  */
 #ifndef _KERNEL
 #if !defined(KMUTEX_FILL_SZ)
 # define	KMUTEX_FILL_SZ	1
 #endif
 #if !defined(KRWLOCK_FILL_SZ)
 # define	KRWLOCK_FILL_SZ	1
 #endif
 #endif
 
 typedef	struct	{
 	char	*eMm_owner;
 	char	*eMm_heldin;
 	u_int	eMm_magic;
 	int	eMm_held;
 	int	eMm_heldat;
 } eMmutex_t;
 
 typedef	struct	{
 	char	*eMrw_owner;
 	char	*eMrw_heldin;
 	u_int	eMrw_magic;
 	short	eMrw_read;
 	short	eMrw_write;
 	int	eMrw_heldat;
 } eMrwlock_t;
 
 typedef union {
 	char	_fill[KMUTEX_FILL_SZ];
 #ifdef KMUTEX_T
 	struct	{
 		KMUTEX_T	ipf_slk;
 		const char	*ipf_lname;
 	} ipf_lkun_s;
 #endif
 	eMmutex_t	ipf_emu;
 } ipfmutex_t;
 
 typedef union {
 	char	_fill[KRWLOCK_FILL_SZ];
 #ifdef KRWLOCK_T
 	struct	{
 		KRWLOCK_T	ipf_slk;
 		const char	*ipf_lname;
 		int		ipf_sr;
 		int		ipf_sw;
 		u_int		ipf_magic;
 	} ipf_lkun_s;
 #endif
 	eMrwlock_t	ipf_emu;
 } ipfrwlock_t;
 
 #define	ipf_lk		ipf_lkun_s.ipf_slk
 #define	ipf_lname	ipf_lkun_s.ipf_lname
 #define	ipf_isr		ipf_lkun_s.ipf_sr
 #define	ipf_isw		ipf_lkun_s.ipf_sw
 #define	ipf_magic	ipf_lkun_s.ipf_magic
 
 #if !defined(__GNUC__) || \
     (defined(__FreeBSD_version) && (__FreeBSD_version >= 503000))
 # ifndef	INLINE
 #  define	INLINE
 # endif
 #else
 # define	INLINE	__inline__
 #endif
 
 #if defined(__FreeBSD_version) && defined(_KERNEL)
      CTASSERT(sizeof(ipfrwlock_t) == KRWLOCK_FILL_SZ);
      CTASSERT(sizeof(ipfmutex_t) == KMUTEX_FILL_SZ);
 #endif
 
 
 /*
  * In a non-kernel environment, there are a lot of macros that need to be
  * filled in to be null-ops or to point to some compatibility function,
  * somewhere in userland.
  */
 #ifndef _KERNEL
 typedef	struct	mb_s	{
 	struct	mb_s	*mb_next;
 	char		*mb_data;
 	void		*mb_ifp;
 	int		mb_len;
 	int		mb_flags;
 	u_long		mb_buf[2048];
 } mb_t;
 # undef		m_next
 # define	m_next		mb_next
 # undef		m_len
 # define	m_len		mb_len
 # undef		m_flags
 # define	m_flags		mb_flags
 # undef		m_data
 # define	m_data		mb_data
 # undef		M_MCAST
 # define	M_MCAST		0x01
 # undef		M_BCAST
 # define	M_BCAST		0x02
 # undef		M_MBCAST
 # define	M_MBCAST	0x04
 # define	MSGDSIZE(m)	msgdsize(m)
 # define	M_LEN(m)	(m)->mb_len
 # define	M_ADJ(m,x)	(m)->mb_len += x
-# define	M_COPY(m)	dupmbt(m)
+# define	M_COPYM(m)	dupmbt(m)
 # define	M_DUP(m)	dupmbt(m)
 # define	GETKTIME(x)	gettimeofday((struct timeval *)(x), NULL)
 # define	MTOD(m, t)	((t)(m)->mb_data)
 # define	FREE_MB_T(m)	freembt(m)
 # define	ALLOC_MB_T(m,l)	(m) = allocmbt(l)
 # define	PREP_MB_T(f, m)	do { \
 						(m)->mb_next = *(f)->fin_mp; \
 						*(fin)->fin_mp = (m); \
 						(f)->fin_m = (m); \
 					} while (0)
 # define	SLEEP(x,y)	1;
 # define	WAKEUP(x,y)	;
 # define	POLLWAKEUP(y)	;
 # define	IPF_PANIC(x,y)	;
 # define	PANIC(x,y)	;
 # define	SPL_SCHED(x)	;
 # define	SPL_NET(x)	;
 # define	SPL_IMP(x)	;
 # define	SPL_X(x)	;
 # define	KMALLOC(a,b)	(a) = (b)malloc(sizeof(*a))
 # define	KMALLOCS(a,b,c)	(a) = (b)malloc(c)
 # define	KFREE(x)	free(x)
 # define	KFREES(x,s)	free(x)
 # define	GETIFP(x, v)	get_unit(x,v)
 # define	GETIFMTU_4(x)	2048
 # define	GETIFMTU_6(x)	2048
 # define	COPYIN(a,b,c)	bcopywrap((a), (b), (c))
 # define	COPYOUT(a,b,c)	bcopywrap((a), (b), (c))
 # define	COPYDATA(m, o, l, b)	bcopy(MTOD((mb_t *)m, char *) + (o), \
 					      (b), (l))
 # define	COPYBACK(m, o, l, b)	bcopy((b), \
 					      MTOD((mb_t *)m, char *) + (o), \
 					      (l))
 # define	UIOMOVE(a,b,c,d)	ipfuiomove((caddr_t)a,b,c,d)
 extern	void	m_copydata __P((mb_t *, int, int, caddr_t));
 extern	int	ipfuiomove __P((caddr_t, int, int, struct uio *));
 extern	int	bcopywrap __P((void *, void *, size_t));
 extern	mb_t	*allocmbt __P((size_t));
 extern	mb_t	*dupmbt __P((mb_t *));
 extern	void	freembt __P((mb_t *));
 
 # define	MUTEX_DESTROY(x)	eMmutex_destroy(&(x)->ipf_emu, \
 							__FILE__, __LINE__)
 # define	MUTEX_ENTER(x)		eMmutex_enter(&(x)->ipf_emu, \
 						      __FILE__, __LINE__)
 # define	MUTEX_EXIT(x)		eMmutex_exit(&(x)->ipf_emu, \
 						     __FILE__, __LINE__)
 # define	MUTEX_INIT(x,y)		eMmutex_init(&(x)->ipf_emu, y, \
 						     __FILE__, __LINE__)
 # define	MUTEX_NUKE(x)		bzero((x), sizeof(*(x)))
 
 # define	MUTEX_DOWNGRADE(x)	eMrwlock_downgrade(&(x)->ipf_emu, \
 							   __FILE__, __LINE__)
 # define	READ_ENTER(x)		eMrwlock_read_enter(&(x)->ipf_emu, \
 							    __FILE__, __LINE__)
 # define	RWLOCK_INIT(x, y)	eMrwlock_init(&(x)->ipf_emu, y)
 # define	RWLOCK_EXIT(x)		eMrwlock_exit(&(x)->ipf_emu)
 # define	RW_DESTROY(x)		eMrwlock_destroy(&(x)->ipf_emu)
 # define	WRITE_ENTER(x)		eMrwlock_write_enter(&(x)->ipf_emu, \
 							     __FILE__, \
 							     __LINE__)
 
 # define	USE_MUTEXES		1
 
 extern void eMmutex_destroy __P((eMmutex_t *, char *, int));
 extern void eMmutex_enter __P((eMmutex_t *, char *, int));
 extern void eMmutex_exit __P((eMmutex_t *, char *, int));
 extern void eMmutex_init __P((eMmutex_t *, char *, char *, int));
 extern void eMrwlock_destroy __P((eMrwlock_t *));
 extern void eMrwlock_exit __P((eMrwlock_t *));
 extern void eMrwlock_init __P((eMrwlock_t *, char *));
 extern void eMrwlock_read_enter __P((eMrwlock_t *, char *, int));
 extern void eMrwlock_write_enter __P((eMrwlock_t *, char *, int));
 extern void eMrwlock_downgrade __P((eMrwlock_t *, char *, int));
 
 #endif
 
 extern	mb_t	*allocmbt(size_t);
 
 #define	MAX_IPV4HDR	((0xf << 2) + sizeof(struct icmp) + sizeof(ip_t) + 8)
 
 #ifndef	IP_OFFMASK
 # define	IP_OFFMASK	0x1fff
 #endif
 
 
 /*
  * On BSD's use quad_t as a guarantee for getting at least a 64bit sized
  * object.
  */
 #if !defined(__amd64__) && BSD_GT_YEAR(199306)
 # define	USE_QUAD_T
 # define	U_QUAD_T	unsigned long long
 # define	QUAD_T		long long
 #else /* BSD > 199306 */
 # if !defined(U_QUAD_T)
 #  define	U_QUAD_T	u_long
 #  define	QUAD_T		long
 # endif
 #endif /* BSD > 199306 */
 
 
 #ifdef	USE_INET6
 # if defined(__NetBSD__) || defined(__OpenBSD__) || defined(__FreeBSD__) || \
      defined(__osf__) || defined(linux)
 #  include <netinet/ip6.h>
 #  include <netinet/icmp6.h>
 #   if defined(_KERNEL) && !defined(__osf__)
 #    include <netinet6/ip6_var.h>
 #   endif
 typedef	struct ip6_hdr	ip6_t;
 # endif
 #endif
 
 #ifndef	MAX
 # define	MAX(a,b)	(((a) > (b)) ? (a) : (b))
 #endif
 
 #if defined(_KERNEL)
 # if defined(MENTAT) && !defined(INSTANCES)
 #  define	COPYDATA	mb_copydata
 #  define	COPYBACK	mb_copyback
 # else
 #  define	COPYDATA	m_copydata
 #  define	COPYBACK	m_copyback
 # endif
 #  if (defined(__NetBSD_Version__) && (__NetBSD_Version__ < 105180000)) || \
        defined(__FreeBSD__) || (defined(OpenBSD) && (OpenBSD < 200206)) || \
        defined(_BSDI_VERSION)
 #   include <vm/vm.h>
 #  endif
 #  if !defined(__FreeBSD__) || FREEBSD_GE_REV(300000)
 #   if NETBSD_GE_REV(105180000) || OPENBSD_GE_REV(200111)
 #    include <uvm/uvm_extern.h>
 #   else
 #    include <vm/vm_extern.h>
 extern  vm_map_t        kmem_map;
 #   endif
 #   include <sys/proc.h>
 #  else /* !__FreeBSD__ || (__FreeBSD__ && __FreeBSD_version >= 300000) */
 #   include <vm/vm_kern.h>
 #  endif /* !__FreeBSD__ || (__FreeBSD__ && __FreeBSD_version >= 300000) */
 
 #  ifdef IPFILTER_M_IPFILTER
 #    include <sys/malloc.h>
 MALLOC_DECLARE(M_IPFILTER);
 #    define	_M_IPF		M_IPFILTER
 #  else /* IPFILTER_M_IPFILTER */
 #   ifdef M_PFIL
 #    define	_M_IPF		M_PFIL
 #   else
 #    ifdef M_IPFILTER
 #     define	_M_IPF		M_IPFILTER
 #    else
 #     define	_M_IPF		M_TEMP
 #    endif /* M_IPFILTER */
 #   endif /* M_PFIL */
 #  endif /* IPFILTER_M_IPFILTER */
 #  if !defined(KMALLOC)
 #   define	KMALLOC(a, b)	MALLOC((a), b, sizeof(*(a)), _M_IPF, M_NOWAIT)
 #  endif
 #  if !defined(KMALLOCS)
 #   define	KMALLOCS(a, b, c)	MALLOC((a), b, (c), _M_IPF, M_NOWAIT)
 #  endif
 #  if !defined(KFREE)
 #   define	KFREE(x)	FREE((x), _M_IPF)
 #  endif
 #   if !defined(KFREES)
 #  define	KFREES(x,s)	FREE((x), _M_IPF)
 #  endif
 #  define	UIOMOVE(a,b,c,d)	uiomove((caddr_t)a,b,d)
 #  define	SLEEP(id, n)	tsleep((id), PPAUSE|PCATCH, n, 0)
 #  define	WAKEUP(id,x)	wakeup(id+x)
 #  if !defined(POLLWAKEUP)
 #   define	POLLWAKEUP(x)	selwakeup(softc->ipf_selwait+x)
 #  endif
 #  define	GETIFP(n, v)	ifunit(n)
 #  define	GETIFMTU_4(x)	((struct ifnet *)x)->if_mtu
 #  define	GETIFMTU_6(x)	((struct ifnet *)x)->if_mtu
 
 # if !defined(USE_MUTEXES) && !defined(SPL_NET)
 #  define	SPL_IMP(x)	x = splimp()
 #  define	SPL_NET(x)	x = splnet()
 #  if !defined(SPL_SCHED)
 #   define	SPL_SCHED(x)	x = splsched()
 #  endif
 #  define	SPL_X(x)	(void) splx(x)
 # endif /* !USE_MUTEXES */
 
 # ifndef FREE_MB_T
 #  define	FREE_MB_T(m)	m_freem(m)
 # endif
 # ifndef ALLOC_MB_T
 #  ifdef MGETHDR
 #   define	ALLOC_MB_T(m,l)	do { \
 					MGETHDR((m), M_NOWAIT, MT_HEADER); \
 					if ((m) != NULL) { \
 						(m)->m_len = (l); \
 						(m)->m_pkthdr.len = (l); \
 					} \
 				} while (0)
 #  else
 #   define	ALLOC_MB_T(m,l)	do { \
 					MGET((m), M_NOWAIT, MT_HEADER); \
 					if ((m) != NULL) { \
 						(m)->m_len = (l); \
 						(m)->m_pkthdr.len = (l); \
 					} \
 				} while (0)
 #  endif
 # endif
 # ifndef PREP_MB_T
 #  define	PREP_MB_T(f, m)	do { \
 						mb_t *_o = *(f)->fin_mp; \
 						(m)->m_next = _o; \
 						*(fin)->fin_mp = (m); \
 						if (_o->m_flags & M_PKTHDR) { \
 							(m)->m_pkthdr.len += \
 							    _o->m_pkthdr.len; \
 							(m)->m_pkthdr.rcvif = \
 							  _o->m_pkthdr.rcvif; \
 						} \
 					} while (0)
 # endif
 # ifndef M_DUP
 #  ifdef M_COPYALL
 #   define	M_DUP(m)	m_dup(m, 0, M_COPYALL, 0)
 #  else
 #   define	M_DUP(m)	m_dup(m)
 #  endif
 # endif
 
 # ifndef MTOD
 #  define	MTOD(m,t)	mtod(m,t)
 # endif
 
 # ifndef COPYIN
 #  define	COPYIN(a,b,c)	(bcopy((caddr_t)(a), (caddr_t)(b), (c)), 0)
 #  define	COPYOUT(a,b,c)	(bcopy((caddr_t)(a), (caddr_t)(b), (c)), 0)
 # endif
 
 # ifndef KMALLOC
 #  define	KMALLOC(a,b)	(a) = (b)new_kmem_alloc(sizeof(*(a)), \
 							KMEM_NOSLEEP)
 #  define	KMALLOCS(a,b,c)	(a) = (b)new_kmem_alloc((c), KMEM_NOSLEEP)
 # endif
 
 # ifndef	GET_MINOR
 #  define	GET_MINOR(x)	dev2unit(x)
 # endif
 # define	PANIC(x,y)	if (x) panic y
 #endif /* _KERNEL */
 
 #if !defined(IFNAME) && !defined(_KERNEL)
 # define	IFNAME(x)	get_ifname((struct ifnet *)x)
 #endif
 #ifndef	COPYIFNAME
 # define	NEED_FRGETIFNAME
 extern	char	*ipf_getifname __P((struct ifnet *, char *));
 # define	COPYIFNAME(v, x, b) \
 				ipf_getifname((struct ifnet *)x, b)
 #endif
 
 #ifndef ASSERT
 # ifdef _KERNEL
 #  define	ASSERT(x)
 # else
 #  define	ASSERT(x)	do { if (!(x)) abort(); } while (0)
 # endif
 #endif
 
 #ifndef BCOPYIN
 #  define	BCOPYIN(a,b,c)	(bcopy((caddr_t)(a), (caddr_t)(b), (c)), 0)
 #  define	BCOPYOUT(a,b,c)	(bcopy((caddr_t)(a), (caddr_t)(b), (c)), 0)
 #endif
 
 /*
  * Because the ctype(3) posix definition, if used "safely" in code everywhere,
  * would mean all normal code that walks through strings needed casts.  Yuck.
  */
 #define	ISALNUM(x)	isalnum((u_char)(x))
 #define	ISALPHA(x)	isalpha((u_char)(x))
 #define	ISDIGIT(x)	isdigit((u_char)(x))
 #define	ISSPACE(x)	isspace((u_char)(x))
 #define	ISUPPER(x)	isupper((u_char)(x))
 #define	ISXDIGIT(x)	isxdigit((u_char)(x))
 #define	ISLOWER(x)	islower((u_char)(x))
 #define	TOUPPER(x)	toupper((u_char)(x))
 #define	TOLOWER(x)	tolower((u_char)(x))
 
 /*
  * If mutexes aren't being used, turn all the mutex functions into null-ops.
  */
 #if !defined(USE_MUTEXES)
 # define	USE_SPL			1
 # undef		RW_DESTROY
 # undef		MUTEX_INIT
 # undef		MUTEX_NUKE
 # undef		MUTEX_DESTROY
 # define	MUTEX_ENTER(x)		;
 # define	READ_ENTER(x)		;
 # define	WRITE_ENTER(x)		;
 # define	MUTEX_DOWNGRADE(x)	;
 # define	RWLOCK_INIT(x, y)	;
 # define	RWLOCK_EXIT(x)		;
 # define	RW_DESTROY(x)		;
 # define	MUTEX_EXIT(x)		;
 # define	MUTEX_INIT(x,y)		;
 # define	MUTEX_DESTROY(x)	;
 # define	MUTEX_NUKE(x)		;
 #endif /* !USE_MUTEXES */
 #ifndef	ATOMIC_INC
 # define	ATOMIC_INC(x)		(x)++
 # define	ATOMIC_DEC(x)		(x)--
 #endif
 
 #if defined(USE_SPL) && defined(_KERNEL)
 # define	SPL_INT(x)	int x
 #else
 # define	SPL_INT(x)
 #endif
 
 /*
  * If there are no atomic operations for bit sizes defined, define them to all
  * use a generic one that works for all sizes.
  */
 #ifndef	ATOMIC_INCL
 # define	ATOMIC_INCL		ATOMIC_INC
 # define	ATOMIC_INC64		ATOMIC_INC
 # define	ATOMIC_INC32		ATOMIC_INC
 # define	ATOMIC_DECL		ATOMIC_DEC
 # define	ATOMIC_DEC64		ATOMIC_DEC
 # define	ATOMIC_DEC32		ATOMIC_DEC
 #endif
 
 #ifndef HDR_T_PRIVATE
 typedef	struct	tcphdr	tcphdr_t;
 typedef	struct	udphdr	udphdr_t;
 #endif
 typedef	struct	icmp	icmphdr_t;
 typedef	struct	ip	ip_t;
 typedef	struct	ether_header	ether_header_t;
 typedef	struct	tcpiphdr	tcpiphdr_t;
 
 #ifndef	FR_GROUPLEN
 # define	FR_GROUPLEN	16
 #endif
 
 #ifndef offsetof
 # define offsetof(t,m) (size_t)((&((t *)0L)->m))
 #endif
 #ifndef stsizeof
 # define stsizeof(t,m)	sizeof(((t *)0L)->m)
 #endif
 
 /*
  * This set of macros has been brought about because on Tru64 it is not
  * possible to easily assign or examine values in a structure that are
  * bit fields.
  */
 #ifndef IP_V
 # define	IP_V(x)		(x)->ip_v
 #endif
 #ifndef	IP_V_A
 # define	IP_V_A(x,y)	(x)->ip_v = (y)
 #endif
 #ifndef	IP_HL
 # define	IP_HL(x)	(x)->ip_hl
 #endif
 #ifndef	IP_HL_A
 # define	IP_HL_A(x,y)	(x)->ip_hl = ((y) & 0xf)
 #endif
 #ifndef	TCP_X2
 # define	TCP_X2(x)	(x)->th_x2
 #endif
 #ifndef	TCP_X2_A
 # define	TCP_X2_A(x,y)	(x)->th_x2 = (y)
 #endif
 #ifndef	TCP_OFF
 # define	TCP_OFF(x)	(x)->th_off
 #endif
 #ifndef	TCP_OFF_A
 # define	TCP_OFF_A(x,y)	(x)->th_off = (y)
 #endif
 #define	IPMINLEN(i, h)	((i)->ip_len >= (IP_HL(i) * 4 + sizeof(struct h)))
 
 
 /*
  * XXX - This is one of those *awful* hacks which nobody likes
  */
 #ifdef	ultrix
 #define	A_A
 #else
 #define	A_A	&
 #endif
 
 #define	TCPF_ALL	(TH_FIN|TH_SYN|TH_RST|TH_PUSH|TH_ACK|TH_URG|\
 			 TH_ECN|TH_CWR)
 
 #if BSD_GE_YEAR(199306) && !defined(m_act)
 # define	m_act	m_nextpkt
 #endif
 
 /*
  * Security Options for Intenet Protocol (IPSO) as defined in RFC 1108.
  *
  * Basic Option
  *
  * 00000001   -   (Reserved 4)
  * 00111101   -   Top Secret
  * 01011010   -   Secret
  * 10010110   -   Confidential
  * 01100110   -   (Reserved 3)
  * 11001100   -   (Reserved 2)
  * 10101011   -   Unclassified
  * 11110001   -   (Reserved 1)
  */
 #define	IPSO_CLASS_RES4		0x01
 #define	IPSO_CLASS_TOPS		0x3d
 #define	IPSO_CLASS_SECR		0x5a
 #define	IPSO_CLASS_CONF		0x96
 #define	IPSO_CLASS_RES3		0x66
 #define	IPSO_CLASS_RES2		0xcc
 #define	IPSO_CLASS_UNCL		0xab
 #define	IPSO_CLASS_RES1		0xf1
 
 #define	IPSO_AUTH_GENSER	0x80
 #define	IPSO_AUTH_ESI		0x40
 #define	IPSO_AUTH_SCI		0x20
 #define	IPSO_AUTH_NSA		0x10
 #define	IPSO_AUTH_DOE		0x08
 #define	IPSO_AUTH_UN		0x06
 #define	IPSO_AUTH_FTE		0x01
 
 /*
  * IP option #defines
  */
 #undef	IPOPT_RR
 #define	IPOPT_RR	7
 #undef	IPOPT_ZSU
 #define	IPOPT_ZSU	10	/* ZSU */
 #undef	IPOPT_MTUP
 #define	IPOPT_MTUP	11	/* MTUP */
 #undef	IPOPT_MTUR
 #define	IPOPT_MTUR	12	/* MTUR */
 #undef	IPOPT_ENCODE
 #define	IPOPT_ENCODE	15	/* ENCODE */
 #undef	IPOPT_TS
 #define	IPOPT_TS	68
 #undef	IPOPT_TR
 #define	IPOPT_TR	82	/* TR */
 #undef	IPOPT_SECURITY
 #define	IPOPT_SECURITY	130
 #undef	IPOPT_LSRR
 #define	IPOPT_LSRR	131
 #undef	IPOPT_E_SEC
 #define	IPOPT_E_SEC	133	/* E-SEC */
 #undef	IPOPT_CIPSO
 #define	IPOPT_CIPSO	134	/* CIPSO */
 #undef	IPOPT_SATID
 #define	IPOPT_SATID	136
 #ifndef	IPOPT_SID
 # define	IPOPT_SID	IPOPT_SATID
 #endif
 #undef	IPOPT_SSRR
 #define	IPOPT_SSRR	137
 #undef	IPOPT_ADDEXT
 #define	IPOPT_ADDEXT	147	/* ADDEXT */
 #undef	IPOPT_VISA
 #define	IPOPT_VISA	142	/* VISA */
 #undef	IPOPT_IMITD
 #define	IPOPT_IMITD	144	/* IMITD */
 #undef	IPOPT_EIP
 #define	IPOPT_EIP	145	/* EIP */
 #undef	IPOPT_RTRALRT
 #define	IPOPT_RTRALRT	148	/* RTRALRT */
 #undef	IPOPT_SDB
 #define	IPOPT_SDB	149
 #undef	IPOPT_NSAPA
 #define	IPOPT_NSAPA	150
 #undef	IPOPT_DPS
 #define	IPOPT_DPS	151
 #undef	IPOPT_UMP
 #define	IPOPT_UMP	152
 #undef	IPOPT_FINN
 #define	IPOPT_FINN	205	/* FINN */
 #undef	IPOPT_AH
 #define	IPOPT_AH	256+IPPROTO_AH
 
 #ifndef TCPOPT_EOL
 # define TCPOPT_EOL		0
 #endif
 #ifndef TCPOPT_NOP
 # define TCPOPT_NOP		1
 #endif
 #ifndef TCPOPT_MAXSEG
 # define TCPOPT_MAXSEG		2
 #endif
 #ifndef TCPOLEN_MAXSEG
 # define TCPOLEN_MAXSEG		4
 #endif
 #ifndef TCPOPT_WINDOW
 # define TCPOPT_WINDOW		3
 #endif
 #ifndef TCPOLEN_WINDOW
 # define TCPOLEN_WINDOW		3
 #endif
 #ifndef TCPOPT_SACK_PERMITTED
 # define TCPOPT_SACK_PERMITTED	4
 #endif
 #ifndef TCPOLEN_SACK_PERMITTED
 # define TCPOLEN_SACK_PERMITTED	2
 #endif
 #ifndef TCPOPT_SACK
 # define TCPOPT_SACK		5
 #endif
 #ifndef TCPOPT_TIMESTAMP
 # define TCPOPT_TIMESTAMP	8
 #endif
 
 #ifndef	ICMP_MINLEN
 # define	ICMP_MINLEN	8
 #endif
 #ifndef	ICMP_ECHOREPLY
 # define	ICMP_ECHOREPLY	0
 #endif
 #ifndef	ICMP_UNREACH
 # define	ICMP_UNREACH	3
 #endif
 #ifndef	ICMP_UNREACH_NET
 # define	ICMP_UNREACH_NET	0
 #endif
 #ifndef	ICMP_UNREACH_HOST
 # define	ICMP_UNREACH_HOST	1
 #endif
 #ifndef	ICMP_UNREACH_PROTOCOL
 # define	ICMP_UNREACH_PROTOCOL	2
 #endif
 #ifndef	ICMP_UNREACH_PORT
 # define	ICMP_UNREACH_PORT	3
 #endif
 #ifndef	ICMP_UNREACH_NEEDFRAG
 # define	ICMP_UNREACH_NEEDFRAG	4
 #endif
 #ifndef	ICMP_UNREACH_SRCFAIL
 # define	ICMP_UNREACH_SRCFAIL	5
 #endif
 #ifndef	ICMP_UNREACH_NET_UNKNOWN
 # define	ICMP_UNREACH_NET_UNKNOWN	6
 #endif
 #ifndef	ICMP_UNREACH_HOST_UNKNOWN
 # define	ICMP_UNREACH_HOST_UNKNOWN	7
 #endif
 #ifndef	ICMP_UNREACH_ISOLATED
 # define	ICMP_UNREACH_ISOLATED	8
 #endif
 #ifndef	ICMP_UNREACH_NET_PROHIB
 # define	ICMP_UNREACH_NET_PROHIB	9
 #endif
 #ifndef	ICMP_UNREACH_HOST_PROHIB
 # define	ICMP_UNREACH_HOST_PROHIB	10
 #endif
 #ifndef	ICMP_UNREACH_TOSNET
 # define	ICMP_UNREACH_TOSNET	11
 #endif
 #ifndef	ICMP_UNREACH_TOSHOST
 # define	ICMP_UNREACH_TOSHOST	12
 #endif
 #ifndef	ICMP_UNREACH_ADMIN_PROHIBIT
 # define	ICMP_UNREACH_ADMIN_PROHIBIT	13
 #endif
 #ifndef	ICMP_UNREACH_FILTER
 # define	ICMP_UNREACH_FILTER	13
 #endif
 #ifndef	ICMP_UNREACH_HOST_PRECEDENCE
 # define	ICMP_UNREACH_HOST_PRECEDENCE	14
 #endif
 #ifndef	ICMP_UNREACH_PRECEDENCE_CUTOFF
 # define	ICMP_UNREACH_PRECEDENCE_CUTOFF	15
 #endif
 #ifndef	ICMP_SOURCEQUENCH
 # define	ICMP_SOURCEQUENCH	4
 #endif
 #ifndef	ICMP_REDIRECT_NET
 # define	ICMP_REDIRECT_NET	0
 #endif
 #ifndef	ICMP_REDIRECT_HOST
 # define	ICMP_REDIRECT_HOST	1
 #endif
 #ifndef	ICMP_REDIRECT_TOSNET
 # define	ICMP_REDIRECT_TOSNET	2
 #endif
 #ifndef	ICMP_REDIRECT_TOSHOST
 # define	ICMP_REDIRECT_TOSHOST	3
 #endif
 #ifndef	ICMP_ALTHOSTADDR
 # define	ICMP_ALTHOSTADDR	6
 #endif
 #ifndef	ICMP_TIMXCEED
 # define	ICMP_TIMXCEED	11
 #endif
 #ifndef	ICMP_TIMXCEED_INTRANS
 # define	ICMP_TIMXCEED_INTRANS	0
 #endif
 #ifndef	ICMP_TIMXCEED_REASS
 # define		ICMP_TIMXCEED_REASS	1
 #endif
 #ifndef	ICMP_PARAMPROB
 # define	ICMP_PARAMPROB	12
 #endif
 #ifndef	ICMP_PARAMPROB_ERRATPTR
 # define	ICMP_PARAMPROB_ERRATPTR	0
 #endif
 #ifndef	ICMP_PARAMPROB_OPTABSENT
 # define	ICMP_PARAMPROB_OPTABSENT	1
 #endif
 #ifndef	ICMP_PARAMPROB_LENGTH
 # define	ICMP_PARAMPROB_LENGTH	2
 #endif
 #ifndef ICMP_TSTAMP
 # define	ICMP_TSTAMP	13
 #endif
 #ifndef ICMP_TSTAMPREPLY
 # define	ICMP_TSTAMPREPLY	14
 #endif
 #ifndef ICMP_IREQ
 # define	ICMP_IREQ	15
 #endif
 #ifndef ICMP_IREQREPLY
 # define	ICMP_IREQREPLY	16
 #endif
 #ifndef	ICMP_MASKREQ
 # define	ICMP_MASKREQ	17
 #endif
 #ifndef ICMP_MASKREPLY
 # define	ICMP_MASKREPLY	18
 #endif
 #ifndef	ICMP_TRACEROUTE
 # define	ICMP_TRACEROUTE	30
 #endif
 #ifndef	ICMP_DATACONVERR
 # define	ICMP_DATACONVERR	31
 #endif
 #ifndef	ICMP_MOBILE_REDIRECT
 # define	ICMP_MOBILE_REDIRECT	32
 #endif
 #ifndef	ICMP_IPV6_WHEREAREYOU
 # define	ICMP_IPV6_WHEREAREYOU	33
 #endif
 #ifndef	ICMP_IPV6_IAMHERE
 # define	ICMP_IPV6_IAMHERE	34
 #endif
 #ifndef	ICMP_MOBILE_REGREQUEST
 # define	ICMP_MOBILE_REGREQUEST	35
 #endif
 #ifndef	ICMP_MOBILE_REGREPLY
 # define	ICMP_MOBILE_REGREPLY	36
 #endif
 #ifndef	ICMP_SKIP
 # define	ICMP_SKIP	39
 #endif
 #ifndef	ICMP_PHOTURIS
 # define	ICMP_PHOTURIS	40
 #endif
 #ifndef	ICMP_PHOTURIS_UNKNOWN_INDEX
 # define	ICMP_PHOTURIS_UNKNOWN_INDEX	1
 #endif
 #ifndef	ICMP_PHOTURIS_AUTH_FAILED
 # define	ICMP_PHOTURIS_AUTH_FAILED	2
 #endif
 #ifndef	ICMP_PHOTURIS_DECRYPT_FAILED
 # define	ICMP_PHOTURIS_DECRYPT_FAILED	3
 #endif
 #ifndef	IPVERSION
 # define	IPVERSION	4
 #endif
 #ifndef	IPOPT_MINOFF
 # define	IPOPT_MINOFF	4
 #endif
 #ifndef	IPOPT_COPIED
 # define	IPOPT_COPIED(x)	((x)&0x80)
 #endif
 #ifndef	IPOPT_EOL
 # define	IPOPT_EOL	0
 #endif
 #ifndef	IPOPT_NOP
 # define	IPOPT_NOP	1
 #endif
 #ifndef	IP_MF
 # define	IP_MF	((u_short)0x2000)
 #endif
 #ifndef	ETHERTYPE_IP
 # define	ETHERTYPE_IP	((u_short)0x0800)
 #endif
 #ifndef	TH_FIN
 # define	TH_FIN	0x01
 #endif
 #ifndef	TH_SYN
 # define	TH_SYN	0x02
 #endif
 #ifndef	TH_RST
 # define	TH_RST	0x04
 #endif
 #ifndef	TH_PUSH
 # define	TH_PUSH	0x08
 #endif
 #ifndef	TH_ACK
 # define	TH_ACK	0x10
 #endif
 #ifndef	TH_URG
 # define	TH_URG	0x20
 #endif
 #undef	TH_ACKMASK
 #define	TH_ACKMASK	(TH_FIN|TH_SYN|TH_RST|TH_ACK)
 
 #ifndef	IPOPT_EOL
 # define	IPOPT_EOL	0
 #endif
 #ifndef	IPOPT_NOP
 # define	IPOPT_NOP	1
 #endif
 #ifndef	IPOPT_RR
 # define	IPOPT_RR	7
 #endif
 #ifndef	IPOPT_TS
 # define	IPOPT_TS	68
 #endif
 #ifndef	IPOPT_SECURITY
 # define	IPOPT_SECURITY	130
 #endif
 #ifndef	IPOPT_LSRR
 # define	IPOPT_LSRR	131
 #endif
 #ifndef	IPOPT_SATID
 # define	IPOPT_SATID	136
 #endif
 #ifndef	IPOPT_SSRR
 # define	IPOPT_SSRR	137
 #endif
 #ifndef	IPOPT_SECUR_UNCLASS
 # define	IPOPT_SECUR_UNCLASS	((u_short)0x0000)
 #endif
 #ifndef	IPOPT_SECUR_CONFID
 # define	IPOPT_SECUR_CONFID	((u_short)0xf135)
 #endif
 #ifndef	IPOPT_SECUR_EFTO
 # define	IPOPT_SECUR_EFTO	((u_short)0x789a)
 #endif
 #ifndef	IPOPT_SECUR_MMMM
 # define	IPOPT_SECUR_MMMM	((u_short)0xbc4d)
 #endif
 #ifndef	IPOPT_SECUR_RESTR
 # define	IPOPT_SECUR_RESTR	((u_short)0xaf13)
 #endif
 #ifndef	IPOPT_SECUR_SECRET
 # define	IPOPT_SECUR_SECRET	((u_short)0xd788)
 #endif
 #ifndef IPOPT_SECUR_TOPSECRET
 # define	IPOPT_SECUR_TOPSECRET	((u_short)0x6bc5)
 #endif
 #ifndef IPOPT_OLEN
 # define	IPOPT_OLEN	1
 #endif
 #ifndef	IPPROTO_HOPOPTS
 # define	IPPROTO_HOPOPTS	0
 #endif
 #ifndef	IPPROTO_IPIP
 # define	IPPROTO_IPIP	4
 #endif
 #ifndef	IPPROTO_ENCAP
 # define	IPPROTO_ENCAP	98
 #endif
 #ifndef	IPPROTO_IPV6
 # define	IPPROTO_IPV6	41
 #endif
 #ifndef	IPPROTO_ROUTING
 # define	IPPROTO_ROUTING	43
 #endif
 #ifndef	IPPROTO_FRAGMENT
 # define	IPPROTO_FRAGMENT	44
 #endif
 #ifndef	IPPROTO_GRE
 # define	IPPROTO_GRE	47	/* GRE encaps RFC 1701 */
 #endif
 #ifndef	IPPROTO_ESP
 # define	IPPROTO_ESP	50
 #endif
 #ifndef	IPPROTO_AH
 # define	IPPROTO_AH	51
 #endif
 #ifndef	IPPROTO_ICMPV6
 # define	IPPROTO_ICMPV6	58
 #endif
 #ifndef	IPPROTO_NONE
 # define	IPPROTO_NONE	59
 #endif
 #ifndef	IPPROTO_DSTOPTS
 # define	IPPROTO_DSTOPTS	60
 #endif
 #ifndef	IPPROTO_MOBILITY
 # define	IPPROTO_MOBILITY	135
 #endif
 
 #ifndef	ICMP_ROUTERADVERT
 # define	ICMP_ROUTERADVERT	9
 #endif
 #ifndef	ICMP_ROUTERSOLICIT
 # define	ICMP_ROUTERSOLICIT	10
 #endif
 #ifndef	ICMP6_DST_UNREACH
 # define	ICMP6_DST_UNREACH	1
 #endif
 #ifndef	ICMP6_PACKET_TOO_BIG
 # define	ICMP6_PACKET_TOO_BIG	2
 #endif
 #ifndef	ICMP6_TIME_EXCEEDED
 # define	ICMP6_TIME_EXCEEDED	3
 #endif
 #ifndef	ICMP6_PARAM_PROB
 # define	ICMP6_PARAM_PROB	4
 #endif
 
 #ifndef	ICMP6_ECHO_REQUEST
 # define	ICMP6_ECHO_REQUEST	128
 #endif
 #ifndef	ICMP6_ECHO_REPLY
 # define	ICMP6_ECHO_REPLY	129
 #endif
 #ifndef	ICMP6_MEMBERSHIP_QUERY
 # define	ICMP6_MEMBERSHIP_QUERY	130
 #endif
 #ifndef	MLD6_LISTENER_QUERY
 # define	MLD6_LISTENER_QUERY	130
 #endif
 #ifndef	ICMP6_MEMBERSHIP_REPORT
 # define	ICMP6_MEMBERSHIP_REPORT	131
 #endif
 #ifndef	MLD6_LISTENER_REPORT
 # define	MLD6_LISTENER_REPORT	131
 #endif
 #ifndef	ICMP6_MEMBERSHIP_REDUCTION
 # define	ICMP6_MEMBERSHIP_REDUCTION	132
 #endif
 #ifndef	MLD6_LISTENER_DONE
 # define	MLD6_LISTENER_DONE	132
 #endif
 #ifndef	ND_ROUTER_SOLICIT
 # define	ND_ROUTER_SOLICIT	133
 #endif
 #ifndef	ND_ROUTER_ADVERT
 # define	ND_ROUTER_ADVERT	134
 #endif
 #ifndef	ND_NEIGHBOR_SOLICIT
 # define	ND_NEIGHBOR_SOLICIT	135
 #endif
 #ifndef	ND_NEIGHBOR_ADVERT
 # define	ND_NEIGHBOR_ADVERT	136
 #endif
 #ifndef	ND_REDIRECT
 # define	ND_REDIRECT	137
 #endif
 #ifndef	ICMP6_ROUTER_RENUMBERING
 # define	ICMP6_ROUTER_RENUMBERING	138
 #endif
 #ifndef	ICMP6_WRUREQUEST
 # define	ICMP6_WRUREQUEST	139
 #endif
 #ifndef	ICMP6_WRUREPLY
 # define	ICMP6_WRUREPLY		140
 #endif
 #ifndef	ICMP6_FQDN_QUERY
 # define	ICMP6_FQDN_QUERY	139
 #endif
 #ifndef	ICMP6_FQDN_REPLY
 # define	ICMP6_FQDN_REPLY	140
 #endif
 #ifndef	ICMP6_NI_QUERY
 # define	ICMP6_NI_QUERY		139
 #endif
 #ifndef	ICMP6_NI_REPLY
 # define	ICMP6_NI_REPLY		140
 #endif
 #ifndef	MLD6_MTRACE_RESP
 # define	MLD6_MTRACE_RESP	200
 #endif
 #ifndef	MLD6_MTRACE
 # define	MLD6_MTRACE		201
 #endif
 #ifndef	ICMP6_HADISCOV_REQUEST
 # define	ICMP6_HADISCOV_REQUEST	202
 #endif
 #ifndef	ICMP6_HADISCOV_REPLY
 # define	ICMP6_HADISCOV_REPLY	203
 #endif
 #ifndef	ICMP6_MOBILEPREFIX_SOLICIT
 # define	ICMP6_MOBILEPREFIX_SOLICIT	204
 #endif
 #ifndef	ICMP6_MOBILEPREFIX_ADVERT
 # define	ICMP6_MOBILEPREFIX_ADVERT	205
 #endif
 #ifndef	ICMP6_MAXTYPE
 # define	ICMP6_MAXTYPE		205
 #endif
 
 #ifndef	ICMP6_DST_UNREACH_NOROUTE
 # define	ICMP6_DST_UNREACH_NOROUTE	0
 #endif
 #ifndef	ICMP6_DST_UNREACH_ADMIN
 # define	ICMP6_DST_UNREACH_ADMIN		1
 #endif
 #ifndef	ICMP6_DST_UNREACH_NOTNEIGHBOR
 # define	ICMP6_DST_UNREACH_NOTNEIGHBOR	2
 #endif
 #ifndef	ICMP6_DST_UNREACH_BEYONDSCOPE
 # define	ICMP6_DST_UNREACH_BEYONDSCOPE	2
 #endif
 #ifndef	ICMP6_DST_UNREACH_ADDR
 # define	ICMP6_DST_UNREACH_ADDR		3
 #endif
 #ifndef	ICMP6_DST_UNREACH_NOPORT
 # define	ICMP6_DST_UNREACH_NOPORT	4
 #endif
 #ifndef	ICMP6_TIME_EXCEED_TRANSIT
 # define	ICMP6_TIME_EXCEED_TRANSIT	0
 #endif
 #ifndef	ICMP6_TIME_EXCEED_REASSEMBLY
 # define	ICMP6_TIME_EXCEED_REASSEMBLY	1
 #endif
 
 #ifndef	ICMP6_NI_SUCCESS
 # define	ICMP6_NI_SUCCESS	0
 #endif
 #ifndef	ICMP6_NI_REFUSED
 # define	ICMP6_NI_REFUSED	1
 #endif
 #ifndef	ICMP6_NI_UNKNOWN
 # define	ICMP6_NI_UNKNOWN	2
 #endif
 
 #ifndef	ICMP6_ROUTER_RENUMBERING_COMMAND
 # define	ICMP6_ROUTER_RENUMBERING_COMMAND	0
 #endif
 #ifndef	ICMP6_ROUTER_RENUMBERING_RESULT
 # define	ICMP6_ROUTER_RENUMBERING_RESULT	1
 #endif
 #ifndef	ICMP6_ROUTER_RENUMBERING_SEQNUM_RESET
 # define	ICMP6_ROUTER_RENUMBERING_SEQNUM_RESET	255
 #endif
 
 #ifndef	ICMP6_PARAMPROB_HEADER
 # define	ICMP6_PARAMPROB_HEADER	0
 #endif
 #ifndef	ICMP6_PARAMPROB_NEXTHEADER
 # define	ICMP6_PARAMPROB_NEXTHEADER	1
 #endif
 #ifndef	ICMP6_PARAMPROB_OPTION
 # define	ICMP6_PARAMPROB_OPTION	2
 #endif
 
 #ifndef	ICMP6_NI_SUBJ_IPV6
 # define	ICMP6_NI_SUBJ_IPV6	0
 #endif
 #ifndef	ICMP6_NI_SUBJ_FQDN
 # define	ICMP6_NI_SUBJ_FQDN	1
 #endif
 #ifndef	ICMP6_NI_SUBJ_IPV4
 # define	ICMP6_NI_SUBJ_IPV4	2
 #endif
 
 #ifndef	MLD_MTRACE_RESP
 # define	MLD_MTRACE_RESP		200
 #endif
 #ifndef	MLD_MTRACE
 # define	MLD_MTRACE		201
 #endif
 #ifndef	MLD6_MTRACE_RESP
 # define	MLD6_MTRACE_RESP	MLD_MTRACE_RESP
 #endif
 #ifndef	MLD6_MTRACE
 # define	MLD6_MTRACE		MLD_MTRACE
 #endif
 
 #if !defined(IPV6_FLOWINFO_MASK)
 # if (BYTE_ORDER == BIG_ENDIAN) || defined(_BIG_ENDIAN)
 #  define IPV6_FLOWINFO_MASK	0x0fffffff	/* flow info (28 bits) */
 # else
 #  if(BYTE_ORDER == LITTLE_ENDIAN) || !defined(_BIG_ENDIAN)
 #   define IPV6_FLOWINFO_MASK	0xffffff0f	/* flow info (28 bits) */
 #  endif /* LITTLE_ENDIAN */
 # endif
 #endif
 #if !defined(IPV6_FLOWLABEL_MASK)
 # if (BYTE_ORDER == BIG_ENDIAN) || defined(_BIG_ENDIAN)
 #  define IPV6_FLOWLABEL_MASK	0x000fffff	/* flow label (20 bits) */
 # else
 #  if (BYTE_ORDER == LITTLE_ENDIAN) || !defined(_BIG_ENDIAN)
 #   define IPV6_FLOWLABEL_MASK	0xffff0f00	/* flow label (20 bits) */
 #  endif /* LITTLE_ENDIAN */
 # endif
 #endif
 
 /*
  * ECN is a new addition to TCP - RFC 2481
  */
 #ifndef TH_ECN
 # define	TH_ECN	0x40
 #endif
 #ifndef TH_CWR
 # define	TH_CWR	0x80
 #endif
 #define	TH_ECNALL	(TH_ECN|TH_CWR)
 
 /*
  * TCP States
  */
 #define IPF_TCPS_LISTEN		0	/* listening for connection */
 #define IPF_TCPS_SYN_SENT	1	/* active, have sent syn */
 #define IPF_TCPS_SYN_RECEIVED	2	/* have send and received syn */
 #define IPF_TCPS_HALF_ESTAB	3	/* for connections not fully "up" */
 /* states < IPF_TCPS_ESTABLISHED are those where connections not established */
 #define IPF_TCPS_ESTABLISHED	4	/* established */
 #define IPF_TCPS_CLOSE_WAIT	5	/* rcvd fin, waiting for close */
 /* states > IPF_TCPS_CLOSE_WAIT are those where user has closed */
 #define IPF_TCPS_FIN_WAIT_1	6	/* have closed, sent fin */
 #define IPF_TCPS_CLOSING	7	/* closed xchd FIN; await FIN ACK */
 #define IPF_TCPS_LAST_ACK	8	/* had fin and close; await FIN ACK */
 /* states > IPF_TCPS_CLOSE_WAIT && < IPF_TCPS_FIN_WAIT_2 await ACK of FIN */
 #define IPF_TCPS_FIN_WAIT_2	9	/* have closed, fin is acked */
 #define IPF_TCPS_TIME_WAIT	10	/* in 2*msl quiet wait after close */
 #define IPF_TCPS_CLOSED		11	/* closed */
 #define IPF_TCP_NSTATES		12
 
 #define	TCP_MSL			120
 
 #undef	ICMP_MAX_UNREACH
 #define	ICMP_MAX_UNREACH	14
 #undef	ICMP_MAXTYPE
 #define	ICMP_MAXTYPE		18
 
 #ifndef	IFNAMSIZ
 #define	IFNAMSIZ		16
 #endif
 
 #ifndef	LOG_FTP
 # define	LOG_FTP		(11<<3)
 #endif
 #ifndef	LOG_AUTHPRIV
 # define	LOG_AUTHPRIV	(10<<3)
 #endif
 #ifndef	LOG_AUDIT
 # define	LOG_AUDIT	(13<<3)
 #endif
 #ifndef	LOG_NTP
 # define	LOG_NTP		(12<<3)
 #endif
 #ifndef	LOG_SECURITY
 # define	LOG_SECURITY	(13<<3)
 #endif
 #ifndef	LOG_LFMT
 # define	LOG_LFMT	(14<<3)
 #endif
 #ifndef	LOG_CONSOLE
 # define	LOG_CONSOLE	(14<<3)
 #endif
 
 /*
  * ICMP error replies have an IP header (20 bytes), 8 bytes of ICMP data,
  * another IP header and then 64 bits of data, totalling 56.  Of course,
  * the last 64 bits is dependent on that being available.
  */
 #define	ICMPERR_ICMPHLEN	8
 #define	ICMPERR_IPICMPHLEN	(20 + 8)
 #define	ICMPERR_MINPKTLEN	(20 + 8 + 20)
 #define	ICMPERR_MAXPKTLEN	(20 + 8 + 20 + 8)
 #define ICMP6ERR_MINPKTLEN	(40 + 8)
 #define ICMP6ERR_IPICMPHLEN	(40 + 8 + 40)
 
 #ifndef MIN
 # define	MIN(a,b)	(((a)<(b))?(a):(b))
 #endif
 
 #ifdef RESCUE
 # undef IPFILTER_BPF
 #endif
 
 #ifdef IPF_DEBUG
 # define	DPRINT(x)	printf x
 #else
 # define	DPRINT(x)
 #endif
 
 #ifndef	AF_INET6
 # define	AF_INET6	26
 #endif
 
 #ifdef DTRACE_PROBE
 # ifdef _KERNEL
 #  define	DT(_n)			DTRACE_PROBE(_n)
 #  define	DT1(_n,_a,_b)		DTRACE_PROBE1(_n,_a,_b)
 #  define	DT2(_n,_a,_b,_c,_d)	DTRACE_PROBE2(_n,_a,_b,_c,_d)
 #  define	DT3(_n,_a,_b,_c,_d,_e,_f)	\
 					DTRACE_PROBE3(_n,_a,_b,_c,_d,_e,_f)
 #  define	DT4(_n,_a,_b,_c,_d,_e,_f,_g,_h) \
 				DTRACE_PROBE4(_n,_a,_b,_c,_d,_e,_f,_g,_h)
 # else
 #  define	DT(_n)
 #  define	DT1(_n,_a,_b)
 #  define	DT2(_n,_a,_b,_c,_d)
 #  define	DT3(_n,_a,_b,_c,_d,_e,_f)
 #  define	DT4(_n,_a,_b,_c,_d,_e,_f,_g,_h)
 # endif
 #else
 # define	DT(_n)
 # define	DT1(_n,_a,_b)
 # define	DT2(_n,_a,_b,_c,_d)
 # define	DT3(_n,_a,_b,_c,_d,_e,_f)
 # define	DT4(_n,_a,_b,_c,_d,_e,_f,_g,_h)
 #endif
 
 struct ip6_routing {
 	u_char	ip6r_nxt;	/* next header */
 	u_char	ip6r_len;	/* length in units of 8 octets */
 	u_char	ip6r_type;	/* always zero */
 	u_char	ip6r_segleft;	/* segments left */
 	u_32_t	ip6r_reserved;	/* reserved field */
 };
 
 #endif	/* __IP_COMPAT_H__ */
Index: head/sys/contrib/ipfilter/netinet/ip_fil_freebsd.c
===================================================================
--- head/sys/contrib/ipfilter/netinet/ip_fil_freebsd.c	(revision 305823)
+++ head/sys/contrib/ipfilter/netinet/ip_fil_freebsd.c	(revision 305824)
@@ -1,1484 +1,1484 @@
 /*	$FreeBSD$	*/
 
 /*
  * Copyright (C) 2012 by Darren Reed.
  *
  * See the IPFILTER.LICENCE file for details on licencing.
  */
 #if !defined(lint)
 static const char sccsid[] = "@(#)ip_fil.c	2.41 6/5/96 (C) 1993-2000 Darren Reed";
 static const char rcsid[] = "@(#)$Id$";
 #endif
 
 #if defined(KERNEL) || defined(_KERNEL)
 # undef KERNEL
 # undef _KERNEL
 # define	KERNEL	1
 # define	_KERNEL	1
 #endif
 #if defined(__FreeBSD_version) && (__FreeBSD_version >= 400000) && \
     !defined(KLD_MODULE) && !defined(IPFILTER_LKM)
 # include "opt_inet6.h"
 #endif
 #if defined(__FreeBSD_version) && (__FreeBSD_version >= 440000) && \
     !defined(KLD_MODULE) && !defined(IPFILTER_LKM)
 # include "opt_random_ip_id.h"
 #endif
 #include <sys/param.h>
 #include <sys/errno.h>
 #include <sys/types.h>
 #include <sys/file.h>
 # include <sys/fcntl.h>
 # include <sys/filio.h>
 #include <sys/time.h>
 #include <sys/systm.h>
 # include <sys/dirent.h>
 #if defined(__FreeBSD_version) && (__FreeBSD_version >= 800000)
 #include <sys/jail.h>
 #endif
 # include <sys/malloc.h>
 # include <sys/mbuf.h>
 # include <sys/sockopt.h>
 #if !defined(__hpux)
 # include <sys/mbuf.h>
 #endif
 #include <sys/socket.h>
 # include <sys/selinfo.h>
 # include <netinet/tcp_var.h>
 
 #include <net/if.h>
 # include <net/if_var.h>
 #  include <net/netisr.h>
 #include <net/route.h>
 #include <netinet/in.h>
 #include <netinet/in_fib.h>
 #include <netinet/in_var.h>
 #include <netinet/in_systm.h>
 #include <netinet/ip.h>
 #include <netinet/ip_var.h>
 #include <netinet/tcp.h>
 #if defined(__FreeBSD_version) && (__FreeBSD_version >= 800000)
 #include <net/vnet.h>
 #else
 #define CURVNET_SET(arg)
 #define CURVNET_RESTORE()
 #define	VNET_DEFINE(_t, _v)	_t _v
 #define	VNET_DECLARE(_t, _v)	extern _t _v
 #define	VNET(arg)	arg
 #endif
 #if defined(__osf__)
 # include <netinet/tcp_timer.h>
 #endif
 #include <netinet/udp.h>
 #include <netinet/tcpip.h>
 #include <netinet/ip_icmp.h>
 #include "netinet/ip_compat.h"
 #ifdef USE_INET6
 # include <netinet/icmp6.h>
 #endif
 #include "netinet/ip_fil.h"
 #include "netinet/ip_nat.h"
 #include "netinet/ip_frag.h"
 #include "netinet/ip_state.h"
 #include "netinet/ip_proxy.h"
 #include "netinet/ip_auth.h"
 #include "netinet/ip_sync.h"
 #include "netinet/ip_lookup.h"
 #include "netinet/ip_dstlist.h"
 #ifdef	IPFILTER_SCAN
 #include "netinet/ip_scan.h"
 #endif
 #include "netinet/ip_pool.h"
 # include <sys/malloc.h>
 #include <sys/kernel.h>
 #ifdef CSUM_DATA_VALID
 #include <machine/in_cksum.h>
 #endif
 extern	int	ip_optcopy __P((struct ip *, struct ip *));
 
 
 # ifdef IPFILTER_M_IPFILTER
 MALLOC_DEFINE(M_IPFILTER, "ipfilter", "IP Filter packet filter data structures");
 # endif
 
 
 static	int	ipf_send_ip __P((fr_info_t *, mb_t *));
 static void	ipf_timer_func __P((void *arg));
 
 VNET_DEFINE(ipf_main_softc_t, ipfmain) = {
 	.ipf_running		= -2,
 };
 #define	V_ipfmain		VNET(ipfmain)
 
 # include <sys/conf.h>
 # if defined(NETBSD_PF)
 #  include <net/pfil.h>
 # endif /* NETBSD_PF */
 
 static eventhandler_tag ipf_arrivetag, ipf_departtag;
 #if 0
 /*
  * Disable the "cloner" event handler;  we are getting interface
  * events before the firewall is fully initiallized and also no vnet
  * information thus leading to uninitialised memory accesses.
  * In addition it is unclear why we need it in first place.
  * If it turns out to be needed, well need a dedicated event handler
  * for it to deal with the ifc and the correct vnet.
  */
 static eventhandler_tag ipf_clonetag;
 #endif
 
 static void ipf_ifevent(void *arg, struct ifnet *ifp);
 
 static void ipf_ifevent(arg, ifp)
 	void *arg;
 	struct ifnet *ifp;
 {
 
 	CURVNET_SET(ifp->if_vnet);
 	if (V_ipfmain.ipf_running > 0)
 		ipf_sync(&V_ipfmain, NULL);
 	CURVNET_RESTORE();
 }
 
 
 
 static int
 ipf_check_wrapper(void *arg, struct mbuf **mp, struct ifnet *ifp, int dir)
 {
 	struct ip *ip = mtod(*mp, struct ip *);
 	int rv;
 
 	/*
 	 * IPFilter expects evreything in network byte order
 	 */
 #if (__FreeBSD_version < 1000019)
 	ip->ip_len = htons(ip->ip_len);
 	ip->ip_off = htons(ip->ip_off);
 #endif
 	CURVNET_SET(ifp->if_vnet);
 	rv = ipf_check(&V_ipfmain, ip, ip->ip_hl << 2, ifp, (dir == PFIL_OUT),
 		       mp);
 	CURVNET_RESTORE();
 #if (__FreeBSD_version < 1000019)
 	if ((rv == 0) && (*mp != NULL)) {
 		ip = mtod(*mp, struct ip *);
 		ip->ip_len = ntohs(ip->ip_len);
 		ip->ip_off = ntohs(ip->ip_off);
 	}
 #endif
 	return rv;
 }
 
 # ifdef USE_INET6
 #  include <netinet/ip6.h>
 
 static int
 ipf_check_wrapper6(void *arg, struct mbuf **mp, struct ifnet *ifp, int dir)
 {
 	int error;
 
 	CURVNET_SET(ifp->if_vnet);
 	error = ipf_check(&V_ipfmain, mtod(*mp, struct ip *),
 			  sizeof(struct ip6_hdr), ifp, (dir == PFIL_OUT), mp);
 	CURVNET_RESTORE();
 	return (error);
 }
 # endif
 #if	defined(IPFILTER_LKM)
 int ipf_identify(s)
 	char *s;
 {
 	if (strcmp(s, "ipl") == 0)
 		return 1;
 	return 0;
 }
 #endif /* IPFILTER_LKM */
 
 
 static void
 ipf_timer_func(arg)
 	void *arg;
 {
 	ipf_main_softc_t *softc = arg;
 	SPL_INT(s);
 
 	SPL_NET(s);
 	READ_ENTER(&softc->ipf_global);
 
         if (softc->ipf_running > 0)
 		ipf_slowtimer(softc);
 
 	if (softc->ipf_running == -1 || softc->ipf_running == 1) {
 #if 0
 		softc->ipf_slow_ch = timeout(ipf_timer_func, softc, hz/2);
 #endif
 		callout_init(&softc->ipf_slow_ch, 1);
 		callout_reset(&softc->ipf_slow_ch,
 			(hz / IPF_HZ_DIVIDE) * IPF_HZ_MULT,
 			ipf_timer_func, softc);
 	}
 	RWLOCK_EXIT(&softc->ipf_global);
 	SPL_X(s);
 }
 
 
 int
 ipfattach(softc)
 	ipf_main_softc_t *softc;
 {
 #ifdef USE_SPL
 	int s;
 #endif
 
 	SPL_NET(s);
 	if (softc->ipf_running > 0) {
 		SPL_X(s);
 		return EBUSY;
 	}
 
 	if (ipf_init_all(softc) < 0) {
 		SPL_X(s);
 		return EIO;
 	}
 
 
 	bzero((char *)V_ipfmain.ipf_selwait, sizeof(V_ipfmain.ipf_selwait));
 	softc->ipf_running = 1;
 
 	if (softc->ipf_control_forwarding & 1)
 		V_ipforwarding = 1;
 
 	SPL_X(s);
 #if 0
 	softc->ipf_slow_ch = timeout(ipf_timer_func, softc,
 				     (hz / IPF_HZ_DIVIDE) * IPF_HZ_MULT);
 #endif
 	callout_init(&softc->ipf_slow_ch, 1);
 	callout_reset(&softc->ipf_slow_ch, (hz / IPF_HZ_DIVIDE) * IPF_HZ_MULT,
 		ipf_timer_func, softc);
 	return 0;
 }
 
 
 /*
  * Disable the filter by removing the hooks from the IP input/output
  * stream.
  */
 int
 ipfdetach(softc)
 	ipf_main_softc_t *softc;
 {
 #ifdef USE_SPL
 	int s;
 #endif
 
 	if (softc->ipf_control_forwarding & 2)
 		V_ipforwarding = 0;
 
 	SPL_NET(s);
 
 #if 0
 	if (softc->ipf_slow_ch.callout != NULL)
 		untimeout(ipf_timer_func, softc, softc->ipf_slow_ch);
 	bzero(&softc->ipf_slow, sizeof(softc->ipf_slow));
 #endif
 	callout_drain(&softc->ipf_slow_ch);
 
 	ipf_fini_all(softc);
 
 	softc->ipf_running = -2;
 
 	SPL_X(s);
 
 	return 0;
 }
 
 
 /*
  * Filter ioctl interface.
  */
 int
 ipfioctl(dev, cmd, data, mode
 , p)
 	struct thread *p;
 #    define	p_cred	td_ucred
 #    define	p_uid	td_ucred->cr_ruid
 	struct cdev *dev;
 	ioctlcmd_t cmd;
 	caddr_t data;
 	int mode;
 {
 	int error = 0, unit = 0;
 	SPL_INT(s);
 
 	CURVNET_SET(TD_TO_VNET(p));
 #if (BSD >= 199306)
         if (securelevel_ge(p->p_cred, 3) && (mode & FWRITE))
 	{
 		V_ipfmain.ipf_interror = 130001;
 		CURVNET_RESTORE();
 		return EPERM;
 	}
 #endif
 
 	unit = GET_MINOR(dev);
 	if ((IPL_LOGMAX < unit) || (unit < 0)) {
 		V_ipfmain.ipf_interror = 130002;
 		CURVNET_RESTORE();
 		return ENXIO;
 	}
 
 	if (V_ipfmain.ipf_running <= 0) {
 		if (unit != IPL_LOGIPF && cmd != SIOCIPFINTERROR) {
 			V_ipfmain.ipf_interror = 130003;
 			CURVNET_RESTORE();
 			return EIO;
 		}
 		if (cmd != SIOCIPFGETNEXT && cmd != SIOCIPFGET &&
 		    cmd != SIOCIPFSET && cmd != SIOCFRENB &&
 		    cmd != SIOCGETFS && cmd != SIOCGETFF &&
 		    cmd != SIOCIPFINTERROR) {
 			V_ipfmain.ipf_interror = 130004;
 			CURVNET_RESTORE();
 			return EIO;
 		}
 	}
 
 	SPL_NET(s);
 
 	error = ipf_ioctlswitch(&V_ipfmain, unit, data, cmd, mode, p->p_uid, p);
 	CURVNET_RESTORE();
 	if (error != -1) {
 		SPL_X(s);
 		return error;
 	}
 
 	SPL_X(s);
 
 	return error;
 }
 
 
 /*
  * ipf_send_reset - this could conceivably be a call to tcp_respond(), but that
  * requires a large amount of setting up and isn't any more efficient.
  */
 int
 ipf_send_reset(fin)
 	fr_info_t *fin;
 {
 	struct tcphdr *tcp, *tcp2;
 	int tlen = 0, hlen;
 	struct mbuf *m;
 #ifdef USE_INET6
 	ip6_t *ip6;
 #endif
 	ip_t *ip;
 
 	tcp = fin->fin_dp;
 	if (tcp->th_flags & TH_RST)
 		return -1;		/* feedback loop */
 
 	if (ipf_checkl4sum(fin) == -1)
 		return -1;
 
 	tlen = fin->fin_dlen - (TCP_OFF(tcp) << 2) +
 			((tcp->th_flags & TH_SYN) ? 1 : 0) +
 			((tcp->th_flags & TH_FIN) ? 1 : 0);
 
 #ifdef USE_INET6
 	hlen = (fin->fin_v == 6) ? sizeof(ip6_t) : sizeof(ip_t);
 #else
 	hlen = sizeof(ip_t);
 #endif
 #ifdef MGETHDR
 	MGETHDR(m, M_NOWAIT, MT_HEADER);
 #else
 	MGET(m, M_NOWAIT, MT_HEADER);
 #endif
 	if (m == NULL)
 		return -1;
 	if (sizeof(*tcp2) + hlen > MLEN) {
 		if (!(MCLGET(m, M_NOWAIT))) {
 			FREE_MB_T(m);
 			return -1;
 		}
 	}
 
 	m->m_len = sizeof(*tcp2) + hlen;
 #if (BSD >= 199103)
 	m->m_data += max_linkhdr;
 	m->m_pkthdr.len = m->m_len;
 	m->m_pkthdr.rcvif = (struct ifnet *)0;
 #endif
 	ip = mtod(m, struct ip *);
 	bzero((char *)ip, hlen);
 #ifdef USE_INET6
 	ip6 = (ip6_t *)ip;
 #endif
 	tcp2 = (struct tcphdr *)((char *)ip + hlen);
 	tcp2->th_sport = tcp->th_dport;
 	tcp2->th_dport = tcp->th_sport;
 
 	if (tcp->th_flags & TH_ACK) {
 		tcp2->th_seq = tcp->th_ack;
 		tcp2->th_flags = TH_RST;
 		tcp2->th_ack = 0;
 	} else {
 		tcp2->th_seq = 0;
 		tcp2->th_ack = ntohl(tcp->th_seq);
 		tcp2->th_ack += tlen;
 		tcp2->th_ack = htonl(tcp2->th_ack);
 		tcp2->th_flags = TH_RST|TH_ACK;
 	}
 	TCP_X2_A(tcp2, 0);
 	TCP_OFF_A(tcp2, sizeof(*tcp2) >> 2);
 	tcp2->th_win = tcp->th_win;
 	tcp2->th_sum = 0;
 	tcp2->th_urp = 0;
 
 #ifdef USE_INET6
 	if (fin->fin_v == 6) {
 		ip6->ip6_flow = ((ip6_t *)fin->fin_ip)->ip6_flow;
 		ip6->ip6_plen = htons(sizeof(struct tcphdr));
 		ip6->ip6_nxt = IPPROTO_TCP;
 		ip6->ip6_hlim = 0;
 		ip6->ip6_src = fin->fin_dst6.in6;
 		ip6->ip6_dst = fin->fin_src6.in6;
 		tcp2->th_sum = in6_cksum(m, IPPROTO_TCP,
 					 sizeof(*ip6), sizeof(*tcp2));
 		return ipf_send_ip(fin, m);
 	}
 #endif
 	ip->ip_p = IPPROTO_TCP;
 	ip->ip_len = htons(sizeof(struct tcphdr));
 	ip->ip_src.s_addr = fin->fin_daddr;
 	ip->ip_dst.s_addr = fin->fin_saddr;
 	tcp2->th_sum = in_cksum(m, hlen + sizeof(*tcp2));
 	ip->ip_len = htons(hlen + sizeof(*tcp2));
 	return ipf_send_ip(fin, m);
 }
 
 
 /*
  * ip_len must be in network byte order when called.
  */
 static int
 ipf_send_ip(fin, m)
 	fr_info_t *fin;
 	mb_t *m;
 {
 	fr_info_t fnew;
 	ip_t *ip, *oip;
 	int hlen;
 
 	ip = mtod(m, ip_t *);
 	bzero((char *)&fnew, sizeof(fnew));
 	fnew.fin_main_soft = fin->fin_main_soft;
 
 	IP_V_A(ip, fin->fin_v);
 	switch (fin->fin_v)
 	{
 	case 4 :
 		oip = fin->fin_ip;
 		hlen = sizeof(*oip);
 		fnew.fin_v = 4;
 		fnew.fin_p = ip->ip_p;
 		fnew.fin_plen = ntohs(ip->ip_len);
 		IP_HL_A(ip, sizeof(*oip) >> 2);
 		ip->ip_tos = oip->ip_tos;
 		ip->ip_id = fin->fin_ip->ip_id;
 #if defined(FreeBSD) && (__FreeBSD_version > 460000)
 		ip->ip_off = htons(path_mtu_discovery ? IP_DF : 0);
 #else
 		ip->ip_off = 0;
 #endif
 		ip->ip_ttl = V_ip_defttl;
 		ip->ip_sum = 0;
 		break;
 #ifdef USE_INET6
 	case 6 :
 	{
 		ip6_t *ip6 = (ip6_t *)ip;
 
 		ip6->ip6_vfc = 0x60;
 		ip6->ip6_hlim = IPDEFTTL;
 
 		hlen = sizeof(*ip6);
 		fnew.fin_p = ip6->ip6_nxt;
 		fnew.fin_v = 6;
 		fnew.fin_plen = ntohs(ip6->ip6_plen) + hlen;
 		break;
 	}
 #endif
 	default :
 		return EINVAL;
 	}
 #ifdef IPSEC
 	m->m_pkthdr.rcvif = NULL;
 #endif
 
 	fnew.fin_ifp = fin->fin_ifp;
 	fnew.fin_flx = FI_NOCKSUM;
 	fnew.fin_m = m;
 	fnew.fin_ip = ip;
 	fnew.fin_mp = &m;
 	fnew.fin_hlen = hlen;
 	fnew.fin_dp = (char *)ip + hlen;
 	(void) ipf_makefrip(hlen, ip, &fnew);
 
 	return ipf_fastroute(m, &m, &fnew, NULL);
 }
 
 
 int
 ipf_send_icmp_err(type, fin, dst)
 	int type;
 	fr_info_t *fin;
 	int dst;
 {
 	int err, hlen, xtra, iclen, ohlen, avail, code;
 	struct in_addr dst4;
 	struct icmp *icmp;
 	struct mbuf *m;
 	i6addr_t dst6;
 	void *ifp;
 #ifdef USE_INET6
 	ip6_t *ip6;
 #endif
 	ip_t *ip, *ip2;
 
 	if ((type < 0) || (type >= ICMP_MAXTYPE))
 		return -1;
 
 	code = fin->fin_icode;
 #ifdef USE_INET6
 #if 0
 	/* XXX Fix an off by one error: s/>/>=/
 	 was:
 	 if ((code < 0) || (code > sizeof(icmptoicmp6unreach)/sizeof(int)))
 	 Fix obtained from NetBSD ip_fil_netbsd.c r1.4: */
 #endif
 	if ((code < 0) || (code >= sizeof(icmptoicmp6unreach)/sizeof(int)))
 		return -1;
 #endif
 
 	if (ipf_checkl4sum(fin) == -1)
 		return -1;
 #ifdef MGETHDR
 	MGETHDR(m, M_NOWAIT, MT_HEADER);
 #else
 	MGET(m, M_NOWAIT, MT_HEADER);
 #endif
 	if (m == NULL)
 		return -1;
 	avail = MHLEN;
 
 	xtra = 0;
 	hlen = 0;
 	ohlen = 0;
 	dst4.s_addr = 0;
 	ifp = fin->fin_ifp;
 	if (fin->fin_v == 4) {
 		if ((fin->fin_p == IPPROTO_ICMP) && !(fin->fin_flx & FI_SHORT))
 			switch (ntohs(fin->fin_data[0]) >> 8)
 			{
 			case ICMP_ECHO :
 			case ICMP_TSTAMP :
 			case ICMP_IREQ :
 			case ICMP_MASKREQ :
 				break;
 			default :
 				FREE_MB_T(m);
 				return 0;
 			}
 
 		if (dst == 0) {
 			if (ipf_ifpaddr(&V_ipfmain, 4, FRI_NORMAL, ifp,
 					&dst6, NULL) == -1) {
 				FREE_MB_T(m);
 				return -1;
 			}
 			dst4 = dst6.in4;
 		} else
 			dst4.s_addr = fin->fin_daddr;
 
 		hlen = sizeof(ip_t);
 		ohlen = fin->fin_hlen;
 		iclen = hlen + offsetof(struct icmp, icmp_ip) + ohlen;
 		if (fin->fin_hlen < fin->fin_plen)
 			xtra = MIN(fin->fin_dlen, 8);
 		else
 			xtra = 0;
 	}
 
 #ifdef USE_INET6
 	else if (fin->fin_v == 6) {
 		hlen = sizeof(ip6_t);
 		ohlen = sizeof(ip6_t);
 		iclen = hlen + offsetof(struct icmp, icmp_ip) + ohlen;
 		type = icmptoicmp6types[type];
 		if (type == ICMP6_DST_UNREACH)
 			code = icmptoicmp6unreach[code];
 
 		if (iclen + max_linkhdr + fin->fin_plen > avail) {
 			if (!(MCLGET(m, M_NOWAIT))) {
 				FREE_MB_T(m);
 				return -1;
 			}
 			avail = MCLBYTES;
 		}
 		xtra = MIN(fin->fin_plen, avail - iclen - max_linkhdr);
 		xtra = MIN(xtra, IPV6_MMTU - iclen);
 		if (dst == 0) {
 			if (ipf_ifpaddr(&V_ipfmain, 6, FRI_NORMAL, ifp,
 					&dst6, NULL) == -1) {
 				FREE_MB_T(m);
 				return -1;
 			}
 		} else
 			dst6 = fin->fin_dst6;
 	}
 #endif
 	else {
 		FREE_MB_T(m);
 		return -1;
 	}
 
 	avail -= (max_linkhdr + iclen);
 	if (avail < 0) {
 		FREE_MB_T(m);
 		return -1;
 	}
 	if (xtra > avail)
 		xtra = avail;
 	iclen += xtra;
 	m->m_data += max_linkhdr;
 	m->m_pkthdr.rcvif = (struct ifnet *)0;
 	m->m_pkthdr.len = iclen;
 	m->m_len = iclen;
 	ip = mtod(m, ip_t *);
 	icmp = (struct icmp *)((char *)ip + hlen);
 	ip2 = (ip_t *)&icmp->icmp_ip;
 
 	icmp->icmp_type = type;
 	icmp->icmp_code = fin->fin_icode;
 	icmp->icmp_cksum = 0;
 #ifdef icmp_nextmtu
 	if (type == ICMP_UNREACH && fin->fin_icode == ICMP_UNREACH_NEEDFRAG) {
 		if (fin->fin_mtu != 0) {
 			icmp->icmp_nextmtu = htons(fin->fin_mtu);
 
 		} else if (ifp != NULL) {
 			icmp->icmp_nextmtu = htons(GETIFMTU_4(ifp));
 
 		} else {	/* make up a number... */
 			icmp->icmp_nextmtu = htons(fin->fin_plen - 20);
 		}
 	}
 #endif
 
 	bcopy((char *)fin->fin_ip, (char *)ip2, ohlen);
 
 #ifdef USE_INET6
 	ip6 = (ip6_t *)ip;
 	if (fin->fin_v == 6) {
 		ip6->ip6_flow = ((ip6_t *)fin->fin_ip)->ip6_flow;
 		ip6->ip6_plen = htons(iclen - hlen);
 		ip6->ip6_nxt = IPPROTO_ICMPV6;
 		ip6->ip6_hlim = 0;
 		ip6->ip6_src = dst6.in6;
 		ip6->ip6_dst = fin->fin_src6.in6;
 		if (xtra > 0)
 			bcopy((char *)fin->fin_ip + ohlen,
 			      (char *)&icmp->icmp_ip + ohlen, xtra);
 		icmp->icmp_cksum = in6_cksum(m, IPPROTO_ICMPV6,
 					     sizeof(*ip6), iclen - hlen);
 	} else
 #endif
 	{
 		ip->ip_p = IPPROTO_ICMP;
 		ip->ip_src.s_addr = dst4.s_addr;
 		ip->ip_dst.s_addr = fin->fin_saddr;
 
 		if (xtra > 0)
 			bcopy((char *)fin->fin_ip + ohlen,
 			      (char *)&icmp->icmp_ip + ohlen, xtra);
 		icmp->icmp_cksum = ipf_cksum((u_short *)icmp,
 					     sizeof(*icmp) + 8);
 		ip->ip_len = htons(iclen);
 		ip->ip_p = IPPROTO_ICMP;
 	}
 	err = ipf_send_ip(fin, m);
 	return err;
 }
 
 
 
 
 /*
  * m0 - pointer to mbuf where the IP packet starts
  * mpp - pointer to the mbuf pointer that is the start of the mbuf chain
  */
 int
 ipf_fastroute(m0, mpp, fin, fdp)
 	mb_t *m0, **mpp;
 	fr_info_t *fin;
 	frdest_t *fdp;
 {
 	register struct ip *ip, *mhip;
 	register struct mbuf *m = *mpp;
 	int len, off, error = 0, hlen, code;
 	struct ifnet *ifp, *sifp;
 	struct sockaddr_in dst;
 	struct nhop4_extended nh4;
 	int has_nhop = 0;
 	u_long fibnum = 0;
 	u_short ip_off;
 	frdest_t node;
 	frentry_t *fr;
 
 #ifdef M_WRITABLE
 	/*
 	* HOT FIX/KLUDGE:
 	*
 	* If the mbuf we're about to send is not writable (because of
 	* a cluster reference, for example) we'll need to make a copy
 	* of it since this routine modifies the contents.
 	*
 	* If you have non-crappy network hardware that can transmit data
 	* from the mbuf, rather than making a copy, this is gonna be a
 	* problem.
 	*/
 	if (M_WRITABLE(m) == 0) {
 		m0 = m_dup(m, M_NOWAIT);
 		if (m0 != NULL) {
 			FREE_MB_T(m);
 			m = m0;
 			*mpp = m;
 		} else {
 			error = ENOBUFS;
 			FREE_MB_T(m);
 			goto done;
 		}
 	}
 #endif
 
 #ifdef USE_INET6
 	if (fin->fin_v == 6) {
 		/*
 		 * currently "to <if>" and "to <if>:ip#" are not supported
 		 * for IPv6
 		 */
 		return ip6_output(m, NULL, NULL, 0, NULL, NULL, NULL);
 	}
 #endif
 
 	hlen = fin->fin_hlen;
 	ip = mtod(m0, struct ip *);
 	ifp = NULL;
 
 	/*
 	 * Route packet.
 	 */
 	bzero(&dst, sizeof (dst));
 	dst.sin_family = AF_INET;
 	dst.sin_addr = ip->ip_dst;
 	dst.sin_len = sizeof(dst);
 
 	fr = fin->fin_fr;
 	if ((fr != NULL) && !(fr->fr_flags & FR_KEEPSTATE) && (fdp != NULL) &&
 	    (fdp->fd_type == FRD_DSTLIST)) {
 		if (ipf_dstlist_select_node(fin, fdp->fd_ptr, NULL, &node) == 0)
 			fdp = &node;
 	}
 
 	if (fdp != NULL)
 		ifp = fdp->fd_ptr;
 	else
 		ifp = fin->fin_ifp;
 
 	if ((ifp == NULL) && ((fr == NULL) || !(fr->fr_flags & FR_FASTROUTE))) {
 		error = -2;
 		goto bad;
 	}
 
 	if ((fdp != NULL) && (fdp->fd_ip.s_addr != 0))
 		dst.sin_addr = fdp->fd_ip;
 
 	fibnum = M_GETFIB(m0);
 	if (fib4_lookup_nh_ext(fibnum, dst.sin_addr, NHR_REF, 0, &nh4) != 0) {
 		if (in_localaddr(ip->ip_dst))
 			error = EHOSTUNREACH;
 		else
 			error = ENETUNREACH;
 		goto bad;
 	}
 
 	has_nhop = 1;
 	if (ifp == NULL)
 		ifp = nh4.nh_ifp;
 	if (nh4.nh_flags & NHF_GATEWAY)
 		dst.sin_addr = nh4.nh_addr;
 
 	/*
 	 * For input packets which are being "fastrouted", they won't
 	 * go back through output filtering and miss their chance to get
 	 * NAT'd and counted.  Duplicated packets aren't considered to be
 	 * part of the normal packet stream, so do not NAT them or pass
 	 * them through stateful checking, etc.
 	 */
 	if ((fdp != &fr->fr_dif) && (fin->fin_out == 0)) {
 		sifp = fin->fin_ifp;
 		fin->fin_ifp = ifp;
 		fin->fin_out = 1;
 		(void) ipf_acctpkt(fin, NULL);
 		fin->fin_fr = NULL;
 		if (!fr || !(fr->fr_flags & FR_RETMASK)) {
 			u_32_t pass;
 
 			(void) ipf_state_check(fin, &pass);
 		}
 
 		switch (ipf_nat_checkout(fin, NULL))
 		{
 		case 0 :
 			break;
 		case 1 :
 			ip->ip_sum = 0;
 			break;
 		case -1 :
 			error = -1;
 			goto bad;
 			break;
 		}
 
 		fin->fin_ifp = sifp;
 		fin->fin_out = 0;
 	} else
 		ip->ip_sum = 0;
 	/*
 	 * If small enough for interface, can just send directly.
 	 */
 	if (ntohs(ip->ip_len) <= ifp->if_mtu) {
 		if (!ip->ip_sum)
 			ip->ip_sum = in_cksum(m, hlen);
 		error = (*ifp->if_output)(ifp, m, (struct sockaddr *)&dst,
 			    NULL
 			);
 		goto done;
 	}
 	/*
 	 * Too large for interface; fragment if possible.
 	 * Must be able to put at least 8 bytes per fragment.
 	 */
 	ip_off = ntohs(ip->ip_off);
 	if (ip_off & IP_DF) {
 		error = EMSGSIZE;
 		goto bad;
 	}
 	len = (ifp->if_mtu - hlen) &~ 7;
 	if (len < 8) {
 		error = EMSGSIZE;
 		goto bad;
 	}
 
     {
 	int mhlen, firstlen = len;
 	struct mbuf **mnext = &m->m_act;
 
 	/*
 	 * Loop through length of segment after first fragment,
 	 * make new header and copy data of each part and link onto chain.
 	 */
 	m0 = m;
 	mhlen = sizeof (struct ip);
 	for (off = hlen + len; off < ntohs(ip->ip_len); off += len) {
 #ifdef MGETHDR
 		MGETHDR(m, M_NOWAIT, MT_HEADER);
 #else
 		MGET(m, M_NOWAIT, MT_HEADER);
 #endif
 		if (m == NULL) {
 			m = m0;
 			error = ENOBUFS;
 			goto bad;
 		}
 		m->m_data += max_linkhdr;
 		mhip = mtod(m, struct ip *);
 		bcopy((char *)ip, (char *)mhip, sizeof(*ip));
 		if (hlen > sizeof (struct ip)) {
 			mhlen = ip_optcopy(ip, mhip) + sizeof (struct ip);
 			IP_HL_A(mhip, mhlen >> 2);
 		}
 		m->m_len = mhlen;
 		mhip->ip_off = ((off - hlen) >> 3) + ip_off;
 		if (off + len >= ntohs(ip->ip_len))
 			len = ntohs(ip->ip_len) - off;
 		else
 			mhip->ip_off |= IP_MF;
 		mhip->ip_len = htons((u_short)(len + mhlen));
 		*mnext = m;
-		m->m_next = m_copy(m0, off, len);
+		m->m_next = m_copym(m0, off, len, M_NOWAIT);
 		if (m->m_next == 0) {
 			error = ENOBUFS;	/* ??? */
 			goto sendorfree;
 		}
 		m->m_pkthdr.len = mhlen + len;
 		m->m_pkthdr.rcvif = NULL;
 		mhip->ip_off = htons((u_short)mhip->ip_off);
 		mhip->ip_sum = 0;
 		mhip->ip_sum = in_cksum(m, mhlen);
 		mnext = &m->m_act;
 	}
 	/*
 	 * Update first fragment by trimming what's been copied out
 	 * and updating header, then send each fragment (in order).
 	 */
 	m_adj(m0, hlen + firstlen - ip->ip_len);
 	ip->ip_len = htons((u_short)(hlen + firstlen));
 	ip->ip_off = htons((u_short)IP_MF);
 	ip->ip_sum = 0;
 	ip->ip_sum = in_cksum(m0, hlen);
 sendorfree:
 	for (m = m0; m; m = m0) {
 		m0 = m->m_act;
 		m->m_act = 0;
 		if (error == 0)
 			error = (*ifp->if_output)(ifp, m,
 			    (struct sockaddr *)&dst,
 			    NULL
 			    );
 		else
 			FREE_MB_T(m);
 	}
     }
 done:
 	if (!error)
 		V_ipfmain.ipf_frouteok[0]++;
 	else
 		V_ipfmain.ipf_frouteok[1]++;
 
 	if (has_nhop)
 		fib4_free_nh_ext(fibnum, &nh4);
 
 	return 0;
 bad:
 	if (error == EMSGSIZE) {
 		sifp = fin->fin_ifp;
 		code = fin->fin_icode;
 		fin->fin_icode = ICMP_UNREACH_NEEDFRAG;
 		fin->fin_ifp = ifp;
 		(void) ipf_send_icmp_err(ICMP_UNREACH, fin, 1);
 		fin->fin_ifp = sifp;
 		fin->fin_icode = code;
 	}
 	FREE_MB_T(m);
 	goto done;
 }
 
 
 int
 ipf_verifysrc(fin)
 	fr_info_t *fin;
 {
 	struct nhop4_basic nh4;
 
 	if (fib4_lookup_nh_basic(0, fin->fin_src, 0, 0, &nh4) != 0)
 		return (0);
 	return (fin->fin_ifp == nh4.nh_ifp);
 }
 
 
 /*
  * return the first IP Address associated with an interface
  */
 int
 ipf_ifpaddr(softc, v, atype, ifptr, inp, inpmask)
 	ipf_main_softc_t *softc;
 	int v, atype;
 	void *ifptr;
 	i6addr_t *inp, *inpmask;
 {
 #ifdef USE_INET6
 	struct in6_addr *inp6 = NULL;
 #endif
 	struct sockaddr *sock, *mask;
 	struct sockaddr_in *sin;
 	struct ifaddr *ifa;
 	struct ifnet *ifp;
 
 	if ((ifptr == NULL) || (ifptr == (void *)-1))
 		return -1;
 
 	sin = NULL;
 	ifp = ifptr;
 
 	if (v == 4)
 		inp->in4.s_addr = 0;
 #ifdef USE_INET6
 	else if (v == 6)
 		bzero((char *)inp, sizeof(*inp));
 #endif
 	ifa = TAILQ_FIRST(&ifp->if_addrhead);
 
 	sock = ifa->ifa_addr;
 	while (sock != NULL && ifa != NULL) {
 		sin = (struct sockaddr_in *)sock;
 		if ((v == 4) && (sin->sin_family == AF_INET))
 			break;
 #ifdef USE_INET6
 		if ((v == 6) && (sin->sin_family == AF_INET6)) {
 			inp6 = &((struct sockaddr_in6 *)sin)->sin6_addr;
 			if (!IN6_IS_ADDR_LINKLOCAL(inp6) &&
 			    !IN6_IS_ADDR_LOOPBACK(inp6))
 				break;
 		}
 #endif
 		ifa = TAILQ_NEXT(ifa, ifa_link);
 		if (ifa != NULL)
 			sock = ifa->ifa_addr;
 	}
 
 	if (ifa == NULL || sin == NULL)
 		return -1;
 
 	mask = ifa->ifa_netmask;
 	if (atype == FRI_BROADCAST)
 		sock = ifa->ifa_broadaddr;
 	else if (atype == FRI_PEERADDR)
 		sock = ifa->ifa_dstaddr;
 
 	if (sock == NULL)
 		return -1;
 
 #ifdef USE_INET6
 	if (v == 6) {
 		return ipf_ifpfillv6addr(atype, (struct sockaddr_in6 *)sock,
 					 (struct sockaddr_in6 *)mask,
 					 inp, inpmask);
 	}
 #endif
 	return ipf_ifpfillv4addr(atype, (struct sockaddr_in *)sock,
 				 (struct sockaddr_in *)mask,
 				 &inp->in4, &inpmask->in4);
 }
 
 
 u_32_t
 ipf_newisn(fin)
 	fr_info_t *fin;
 {
 	u_32_t newiss;
 	newiss = arc4random();
 	return newiss;
 }
 
 
 INLINE int
 ipf_checkv4sum(fin)
 	fr_info_t *fin;
 {
 #ifdef CSUM_DATA_VALID
 	int manual = 0;
 	u_short sum;
 	ip_t *ip;
 	mb_t *m;
 
 	if ((fin->fin_flx & FI_NOCKSUM) != 0)
 		return 0;
 
 	if ((fin->fin_flx & FI_SHORT) != 0)
 		return 1;
 
 	if (fin->fin_cksum != FI_CK_NEEDED)
 		return (fin->fin_cksum > FI_CK_NEEDED) ? 0 : -1;
 
 	m = fin->fin_m;
 	if (m == NULL) {
 		manual = 1;
 		goto skipauto;
 	}
 	ip = fin->fin_ip;
 
 	if ((m->m_pkthdr.csum_flags & (CSUM_IP_CHECKED|CSUM_IP_VALID)) ==
 	    CSUM_IP_CHECKED) {
 		fin->fin_cksum = FI_CK_BAD;
 		fin->fin_flx |= FI_BAD;
 		DT2(ipf_fi_bad_checkv4sum_csum_ip_checked, fr_info_t *, fin, u_int, m->m_pkthdr.csum_flags & (CSUM_IP_CHECKED|CSUM_IP_VALID));
 		return -1;
 	}
 	if (m->m_pkthdr.csum_flags & CSUM_DATA_VALID) {
 		/* Depending on the driver, UDP may have zero checksum */
 		if (fin->fin_p == IPPROTO_UDP && (fin->fin_flx &
 		    (FI_FRAG|FI_SHORT|FI_BAD)) == 0) {
 			udphdr_t *udp = fin->fin_dp;
 			if (udp->uh_sum == 0) {
 				/*
 				 * we're good no matter what the hardware
 				 * checksum flags and csum_data say (handling
 				 * of csum_data for zero UDP checksum is not
 				 * consistent across all drivers)
 				 */
 				fin->fin_cksum = 1;
 				return 0;
 			}
 		}
 
 		if (m->m_pkthdr.csum_flags & CSUM_PSEUDO_HDR)
 			sum = m->m_pkthdr.csum_data;
 		else
 			sum = in_pseudo(ip->ip_src.s_addr, ip->ip_dst.s_addr,
 					htonl(m->m_pkthdr.csum_data +
 					fin->fin_dlen + fin->fin_p));
 		sum ^= 0xffff;
 		if (sum != 0) {
 			fin->fin_cksum = FI_CK_BAD;
 			fin->fin_flx |= FI_BAD;
 			DT2(ipf_fi_bad_checkv4sum_sum, fr_info_t *, fin, u_int, sum);
 		} else {
 			fin->fin_cksum = FI_CK_SUMOK;
 			return 0;
 		}
 	} else {
 		if (m->m_pkthdr.csum_flags == CSUM_DELAY_DATA) {
 			fin->fin_cksum = FI_CK_L4FULL;
 			return 0;
 		} else if (m->m_pkthdr.csum_flags == CSUM_TCP ||
 			   m->m_pkthdr.csum_flags == CSUM_UDP) {
 			fin->fin_cksum = FI_CK_L4PART;
 			return 0;
 		} else if (m->m_pkthdr.csum_flags == CSUM_IP) {
 			fin->fin_cksum = FI_CK_L4PART;
 			return 0;
 		} else {
 			manual = 1;
 		}
 	}
 skipauto:
 	if (manual != 0) {
 		if (ipf_checkl4sum(fin) == -1) {
 			fin->fin_flx |= FI_BAD;
 			DT2(ipf_fi_bad_checkv4sum_manual, fr_info_t *, fin, u_int, manual);
 			return -1;
 		}
 	}
 #else
 	if (ipf_checkl4sum(fin) == -1) {
 		fin->fin_flx |= FI_BAD;
 		DT2(ipf_fi_bad_checkv4sum_checkl4sum, fr_info_t *, fin, u_int, -1);
 		return -1;
 	}
 #endif
 	return 0;
 }
 
 
 #ifdef USE_INET6
 INLINE int
 ipf_checkv6sum(fin)
 	fr_info_t *fin;
 {
 	if ((fin->fin_flx & FI_NOCKSUM) != 0) {
 		DT(ipf_checkv6sum_fi_nocksum);
 		return 0;
 	}
 
 	if ((fin->fin_flx & FI_SHORT) != 0) {
 		DT(ipf_checkv6sum_fi_short);
 		return 1;
 	}
 
 	if (fin->fin_cksum != FI_CK_NEEDED) {
 		DT(ipf_checkv6sum_fi_ck_needed);
 		return (fin->fin_cksum > FI_CK_NEEDED) ? 0 : -1;
 	}
 
 	if (ipf_checkl4sum(fin) == -1) {
 		fin->fin_flx |= FI_BAD;
 		DT2(ipf_fi_bad_checkv6sum_checkl4sum, fr_info_t *, fin, u_int, -1);
 		return -1;
 	}
 	return 0;
 }
 #endif /* USE_INET6 */
 
 
 size_t
 mbufchainlen(m0)
 	struct mbuf *m0;
 	{
 	size_t len;
 
 	if ((m0->m_flags & M_PKTHDR) != 0) {
 		len = m0->m_pkthdr.len;
 	} else {
 		struct mbuf *m;
 
 		for (m = m0, len = 0; m != NULL; m = m->m_next)
 			len += m->m_len;
 	}
 	return len;
 }
 
 
 /* ------------------------------------------------------------------------ */
 /* Function:    ipf_pullup                                                  */
 /* Returns:     NULL == pullup failed, else pointer to protocol header      */
 /* Parameters:  xmin(I)- pointer to buffer where data packet starts         */
 /*              fin(I) - pointer to packet information                      */
 /*              len(I) - number of bytes to pullup                          */
 /*                                                                          */
 /* Attempt to move at least len bytes (from the start of the buffer) into a */
 /* single buffer for ease of access.  Operating system native functions are */
 /* used to manage buffers - if necessary.  If the entire packet ends up in  */
 /* a single buffer, set the FI_COALESCE flag even though ipf_coalesce() has */
 /* not been called.  Both fin_ip and fin_dp are updated before exiting _IF_ */
 /* and ONLY if the pullup succeeds.                                         */
 /*                                                                          */
 /* We assume that 'xmin' is a pointer to a buffer that is part of the chain */
 /* of buffers that starts at *fin->fin_mp.                                  */
 /* ------------------------------------------------------------------------ */
 void *
 ipf_pullup(xmin, fin, len)
 	mb_t *xmin;
 	fr_info_t *fin;
 	int len;
 {
 	int dpoff, ipoff;
 	mb_t *m = xmin;
 	char *ip;
 
 	if (m == NULL)
 		return NULL;
 
 	ip = (char *)fin->fin_ip;
 	if ((fin->fin_flx & FI_COALESCE) != 0)
 		return ip;
 
 	ipoff = fin->fin_ipoff;
 	if (fin->fin_dp != NULL)
 		dpoff = (char *)fin->fin_dp - (char *)ip;
 	else
 		dpoff = 0;
 
 	if (M_LEN(m) < len) {
 		mb_t *n = *fin->fin_mp;
 		/*
 		 * Assume that M_PKTHDR is set and just work with what is left
 		 * rather than check..
 		 * Should not make any real difference, anyway.
 		 */
 		if (m != n) {
 			/*
 			 * Record the mbuf that points to the mbuf that we're
 			 * about to go to work on so that we can update the
 			 * m_next appropriately later.
 			 */
 			for (; n->m_next != m; n = n->m_next)
 				;
 		} else {
 			n = NULL;
 		}
 
 #ifdef MHLEN
 		if (len > MHLEN)
 #else
 		if (len > MLEN)
 #endif
 		{
 #ifdef HAVE_M_PULLDOWN
 			if (m_pulldown(m, 0, len, NULL) == NULL)
 				m = NULL;
 #else
 			FREE_MB_T(*fin->fin_mp);
 			m = NULL;
 			n = NULL;
 #endif
 		} else
 		{
 			m = m_pullup(m, len);
 		}
 		if (n != NULL)
 			n->m_next = m;
 		if (m == NULL) {
 			/*
 			 * When n is non-NULL, it indicates that m pointed to
 			 * a sub-chain (tail) of the mbuf and that the head
 			 * of this chain has not yet been free'd.
 			 */
 			if (n != NULL) {
 				FREE_MB_T(*fin->fin_mp);
 			}
 
 			*fin->fin_mp = NULL;
 			fin->fin_m = NULL;
 			return NULL;
 		}
 
 		if (n == NULL)
 			*fin->fin_mp = m;
 
 		while (M_LEN(m) == 0) {
 			m = m->m_next;
 		}
 		fin->fin_m = m;
 		ip = MTOD(m, char *) + ipoff;
 
 		fin->fin_ip = (ip_t *)ip;
 		if (fin->fin_dp != NULL)
 			fin->fin_dp = (char *)fin->fin_ip + dpoff;
 		if (fin->fin_fraghdr != NULL)
 			fin->fin_fraghdr = (char *)ip +
 					   ((char *)fin->fin_fraghdr -
 					    (char *)fin->fin_ip);
 	}
 
 	if (len == fin->fin_plen)
 		fin->fin_flx |= FI_COALESCE;
 	return ip;
 }
 
 
 int
 ipf_inject(fin, m)
 	fr_info_t *fin;
 	mb_t *m;
 {
 	int error = 0;
 
 	if (fin->fin_out == 0) {
 		netisr_dispatch(NETISR_IP, m);
 	} else {
 		fin->fin_ip->ip_len = ntohs(fin->fin_ip->ip_len);
 		fin->fin_ip->ip_off = ntohs(fin->fin_ip->ip_off);
 		error = ip_output(m, NULL, NULL, IP_FORWARDING, NULL, NULL);
 	}
 
 	return error;
 }
 
 int ipf_pfil_unhook(void) {
 #if defined(NETBSD_PF) && (__FreeBSD_version >= 500011)
 	struct pfil_head *ph_inet;
 #  ifdef USE_INET6
 	struct pfil_head *ph_inet6;
 #  endif
 #endif
 
 #ifdef NETBSD_PF
 	ph_inet = pfil_head_get(PFIL_TYPE_AF, AF_INET);
 	if (ph_inet != NULL)
 		pfil_remove_hook((void *)ipf_check_wrapper, NULL,
 		    PFIL_IN|PFIL_OUT|PFIL_WAITOK, ph_inet);
 # ifdef USE_INET6
 	ph_inet6 = pfil_head_get(PFIL_TYPE_AF, AF_INET6);
 	if (ph_inet6 != NULL)
 		pfil_remove_hook((void *)ipf_check_wrapper6, NULL,
 		    PFIL_IN|PFIL_OUT|PFIL_WAITOK, ph_inet6);
 # endif
 #endif
 
 	return (0);
 }
 
 int ipf_pfil_hook(void) {
 #if defined(NETBSD_PF) && (__FreeBSD_version >= 500011)
 	struct pfil_head *ph_inet;
 #  ifdef USE_INET6
 	struct pfil_head *ph_inet6;
 #  endif
 #endif
 
 # ifdef NETBSD_PF
 	ph_inet = pfil_head_get(PFIL_TYPE_AF, AF_INET);
 #    ifdef USE_INET6
 	ph_inet6 = pfil_head_get(PFIL_TYPE_AF, AF_INET6);
 #    endif
 	if (ph_inet == NULL
 #    ifdef USE_INET6
 	    && ph_inet6 == NULL
 #    endif
 	   ) {
 		return ENODEV;
 	}
 
 	if (ph_inet != NULL)
 		pfil_add_hook((void *)ipf_check_wrapper, NULL,
 		    PFIL_IN|PFIL_OUT|PFIL_WAITOK, ph_inet);
 #  ifdef USE_INET6
 	if (ph_inet6 != NULL)
 		pfil_add_hook((void *)ipf_check_wrapper6, NULL,
 				      PFIL_IN|PFIL_OUT|PFIL_WAITOK, ph_inet6);
 #  endif
 # endif
 	return (0);
 }
 
 void
 ipf_event_reg(void)
 {
 	ipf_arrivetag = EVENTHANDLER_REGISTER(ifnet_arrival_event, \
 					       ipf_ifevent, NULL, \
 					       EVENTHANDLER_PRI_ANY);
 	ipf_departtag = EVENTHANDLER_REGISTER(ifnet_departure_event, \
 					       ipf_ifevent, NULL, \
 					       EVENTHANDLER_PRI_ANY);
 #if 0
 	ipf_clonetag  = EVENTHANDLER_REGISTER(if_clone_event, ipf_ifevent, \
 					       NULL, EVENTHANDLER_PRI_ANY);
 #endif
 }
 
 void
 ipf_event_dereg(void)
 {
 	if (ipf_arrivetag != NULL) {
 		EVENTHANDLER_DEREGISTER(ifnet_arrival_event, ipf_arrivetag);
 	}
 	if (ipf_departtag != NULL) {
 		EVENTHANDLER_DEREGISTER(ifnet_departure_event, ipf_departtag);
 	}
 #if 0
 	if (ipf_clonetag != NULL) {
 		EVENTHANDLER_DEREGISTER(if_clone_event, ipf_clonetag);
 	}
 #endif
 }
 
 
 u_32_t
 ipf_random()
 {
 	return arc4random();
 }
 
 
 u_int
 ipf_pcksum(fin, hlen, sum)
 	fr_info_t *fin;
 	int hlen;
 	u_int sum;
 {
 	struct mbuf *m;
 	u_int sum2;
 	int off;
 
 	m = fin->fin_m;
 	off = (char *)fin->fin_dp - (char *)fin->fin_ip;
 	m->m_data += hlen;
 	m->m_len -= hlen;
 	sum2 = in_cksum(fin->fin_m, fin->fin_plen - off);
 	m->m_len += hlen;
 	m->m_data -= hlen;
 
 	/*
 	 * Both sum and sum2 are partial sums, so combine them together.
 	 */
 	sum += ~sum2 & 0xffff;
 	while (sum > 0xffff)
 		sum = (sum & 0xffff) + (sum >> 16);
 	sum2 = ~sum & 0xffff;
 	return sum2;
 }
Index: head/sys/kern/uipc_socket.c
===================================================================
--- head/sys/kern/uipc_socket.c	(revision 305823)
+++ head/sys/kern/uipc_socket.c	(revision 305824)
@@ -1,3727 +1,3728 @@
 /*-
  * Copyright (c) 1982, 1986, 1988, 1990, 1993
  *	The Regents of the University of California.
  * Copyright (c) 2004 The FreeBSD Foundation
  * Copyright (c) 2004-2008 Robert N. M. Watson
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 4. Neither the name of the University nor the names of its contributors
  *    may be used to endorse or promote products derived from this software
  *    without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  *	@(#)uipc_socket.c	8.3 (Berkeley) 4/15/94
  */
 
 /*
  * Comments on the socket life cycle:
  *
  * soalloc() sets of socket layer state for a socket, called only by
  * socreate() and sonewconn().  Socket layer private.
  *
  * sodealloc() tears down socket layer state for a socket, called only by
  * sofree() and sonewconn().  Socket layer private.
  *
  * pru_attach() associates protocol layer state with an allocated socket;
  * called only once, may fail, aborting socket allocation.  This is called
  * from socreate() and sonewconn().  Socket layer private.
  *
  * pru_detach() disassociates protocol layer state from an attached socket,
  * and will be called exactly once for sockets in which pru_attach() has
  * been successfully called.  If pru_attach() returned an error,
  * pru_detach() will not be called.  Socket layer private.
  *
  * pru_abort() and pru_close() notify the protocol layer that the last
  * consumer of a socket is starting to tear down the socket, and that the
  * protocol should terminate the connection.  Historically, pru_abort() also
  * detached protocol state from the socket state, but this is no longer the
  * case.
  *
  * socreate() creates a socket and attaches protocol state.  This is a public
  * interface that may be used by socket layer consumers to create new
  * sockets.
  *
  * sonewconn() creates a socket and attaches protocol state.  This is a
  * public interface  that may be used by protocols to create new sockets when
  * a new connection is received and will be available for accept() on a
  * listen socket.
  *
  * soclose() destroys a socket after possibly waiting for it to disconnect.
  * This is a public interface that socket consumers should use to close and
  * release a socket when done with it.
  *
  * soabort() destroys a socket without waiting for it to disconnect (used
  * only for incoming connections that are already partially or fully
  * connected).  This is used internally by the socket layer when clearing
  * listen socket queues (due to overflow or close on the listen socket), but
  * is also a public interface protocols may use to abort connections in
  * their incomplete listen queues should they no longer be required.  Sockets
  * placed in completed connection listen queues should not be aborted for
  * reasons described in the comment above the soclose() implementation.  This
  * is not a general purpose close routine, and except in the specific
  * circumstances described here, should not be used.
  *
  * sofree() will free a socket and its protocol state if all references on
  * the socket have been released, and is the public interface to attempt to
  * free a socket when a reference is removed.  This is a socket layer private
  * interface.
  *
  * NOTE: In addition to socreate() and soclose(), which provide a single
  * socket reference to the consumer to be managed as required, there are two
  * calls to explicitly manage socket references, soref(), and sorele().
  * Currently, these are generally required only when transitioning a socket
  * from a listen queue to a file descriptor, in order to prevent garbage
  * collection of the socket at an untimely moment.  For a number of reasons,
  * these interfaces are not preferred, and should be avoided.
  *
  * NOTE: With regard to VNETs the general rule is that callers do not set
  * curvnet. Exceptions to this rule include soabort(), sodisconnect(),
  * sofree() (and with that sorele(), sotryfree()), as well as sonewconn()
  * and sorflush(), which are usually called from a pre-set VNET context.
  * sopoll() currently does not need a VNET context to be set.
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include "opt_inet.h"
 #include "opt_inet6.h"
 #include "opt_compat.h"
 
 #include <sys/param.h>
 #include <sys/systm.h>
 #include <sys/fcntl.h>
 #include <sys/limits.h>
 #include <sys/lock.h>
 #include <sys/mac.h>
 #include <sys/malloc.h>
 #include <sys/mbuf.h>
 #include <sys/mutex.h>
 #include <sys/domain.h>
 #include <sys/file.h>			/* for struct knote */
 #include <sys/hhook.h>
 #include <sys/kernel.h>
 #include <sys/khelp.h>
 #include <sys/event.h>
 #include <sys/eventhandler.h>
 #include <sys/poll.h>
 #include <sys/proc.h>
 #include <sys/protosw.h>
 #include <sys/socket.h>
 #include <sys/socketvar.h>
 #include <sys/resourcevar.h>
 #include <net/route.h>
 #include <sys/signalvar.h>
 #include <sys/stat.h>
 #include <sys/sx.h>
 #include <sys/sysctl.h>
 #include <sys/taskqueue.h>
 #include <sys/uio.h>
 #include <sys/jail.h>
 #include <sys/syslog.h>
 #include <netinet/in.h>
 
 #include <net/vnet.h>
 
 #include <security/mac/mac_framework.h>
 
 #include <vm/uma.h>
 
 #ifdef COMPAT_FREEBSD32
 #include <sys/mount.h>
 #include <sys/sysent.h>
 #include <compat/freebsd32/freebsd32.h>
 #endif
 
 static int	soreceive_rcvoob(struct socket *so, struct uio *uio,
 		    int flags);
 
 static void	filt_sordetach(struct knote *kn);
 static int	filt_soread(struct knote *kn, long hint);
 static void	filt_sowdetach(struct knote *kn);
 static int	filt_sowrite(struct knote *kn, long hint);
 static int	filt_solisten(struct knote *kn, long hint);
 static int inline hhook_run_socket(struct socket *so, void *hctx, int32_t h_id);
 fo_kqfilter_t	soo_kqfilter;
 
 static struct filterops solisten_filtops = {
 	.f_isfd = 1,
 	.f_detach = filt_sordetach,
 	.f_event = filt_solisten,
 };
 static struct filterops soread_filtops = {
 	.f_isfd = 1,
 	.f_detach = filt_sordetach,
 	.f_event = filt_soread,
 };
 static struct filterops sowrite_filtops = {
 	.f_isfd = 1,
 	.f_detach = filt_sowdetach,
 	.f_event = filt_sowrite,
 };
 
 so_gen_t	so_gencnt;	/* generation count for sockets */
 
 MALLOC_DEFINE(M_SONAME, "soname", "socket name");
 MALLOC_DEFINE(M_PCB, "pcb", "protocol control block");
 
 #define	VNET_SO_ASSERT(so)						\
 	VNET_ASSERT(curvnet != NULL,					\
 	    ("%s:%d curvnet is NULL, so=%p", __func__, __LINE__, (so)));
 
 VNET_DEFINE(struct hhook_head *, socket_hhh[HHOOK_SOCKET_LAST + 1]);
 #define	V_socket_hhh		VNET(socket_hhh)
 
 /*
  * Limit on the number of connections in the listen queue waiting
  * for accept(2).
  * NB: The original sysctl somaxconn is still available but hidden
  * to prevent confusion about the actual purpose of this number.
  */
 static u_int somaxconn = SOMAXCONN;
 
 static int
 sysctl_somaxconn(SYSCTL_HANDLER_ARGS)
 {
 	int error;
 	int val;
 
 	val = somaxconn;
 	error = sysctl_handle_int(oidp, &val, 0, req);
 	if (error || !req->newptr )
 		return (error);
 
 	/*
 	 * The purpose of the UINT_MAX / 3 limit, is so that the formula
 	 *   3 * so_qlimit / 2
 	 * below, will not overflow.
          */
 
 	if (val < 1 || val > UINT_MAX / 3)
 		return (EINVAL);
 
 	somaxconn = val;
 	return (0);
 }
 SYSCTL_PROC(_kern_ipc, OID_AUTO, soacceptqueue, CTLTYPE_UINT | CTLFLAG_RW,
     0, sizeof(int), sysctl_somaxconn, "I",
     "Maximum listen socket pending connection accept queue size");
 SYSCTL_PROC(_kern_ipc, KIPC_SOMAXCONN, somaxconn,
     CTLTYPE_UINT | CTLFLAG_RW | CTLFLAG_SKIP,
     0, sizeof(int), sysctl_somaxconn, "I",
     "Maximum listen socket pending connection accept queue size (compat)");
 
 static int numopensockets;
 SYSCTL_INT(_kern_ipc, OID_AUTO, numopensockets, CTLFLAG_RD,
     &numopensockets, 0, "Number of open sockets");
 
 /*
  * accept_mtx locks down per-socket fields relating to accept queues.  See
  * socketvar.h for an annotation of the protected fields of struct socket.
  */
 struct mtx accept_mtx;
 MTX_SYSINIT(accept_mtx, &accept_mtx, "accept", MTX_DEF);
 
 /*
  * so_global_mtx protects so_gencnt, numopensockets, and the per-socket
  * so_gencnt field.
  */
 static struct mtx so_global_mtx;
 MTX_SYSINIT(so_global_mtx, &so_global_mtx, "so_glabel", MTX_DEF);
 
 /*
  * General IPC sysctl name space, used by sockets and a variety of other IPC
  * types.
  */
 SYSCTL_NODE(_kern, KERN_IPC, ipc, CTLFLAG_RW, 0, "IPC");
 
 /*
  * Initialize the socket subsystem and set up the socket
  * memory allocator.
  */
 static uma_zone_t socket_zone;
 int	maxsockets;
 
 static void
 socket_zone_change(void *tag)
 {
 
 	maxsockets = uma_zone_set_max(socket_zone, maxsockets);
 }
 
 static void
 socket_hhook_register(int subtype)
 {
 	
 	if (hhook_head_register(HHOOK_TYPE_SOCKET, subtype,
 	    &V_socket_hhh[subtype],
 	    HHOOK_NOWAIT|HHOOK_HEADISINVNET) != 0)
 		printf("%s: WARNING: unable to register hook\n", __func__);
 }
 
 static void
 socket_hhook_deregister(int subtype)
 {
 	
 	if (hhook_head_deregister(V_socket_hhh[subtype]) != 0)
 		printf("%s: WARNING: unable to deregister hook\n", __func__);
 }
 
 static void
 socket_init(void *tag)
 {
 
 	socket_zone = uma_zcreate("socket", sizeof(struct socket), NULL, NULL,
 	    NULL, NULL, UMA_ALIGN_PTR, UMA_ZONE_NOFREE);
 	maxsockets = uma_zone_set_max(socket_zone, maxsockets);
 	uma_zone_set_warning(socket_zone, "kern.ipc.maxsockets limit reached");
 	EVENTHANDLER_REGISTER(maxsockets_change, socket_zone_change, NULL,
 	    EVENTHANDLER_PRI_FIRST);
 }
 SYSINIT(socket, SI_SUB_PROTO_DOMAININIT, SI_ORDER_ANY, socket_init, NULL);
 
 static void
 socket_vnet_init(const void *unused __unused)
 {
 	int i;
 
 	/* We expect a contiguous range */
 	for (i = 0; i <= HHOOK_SOCKET_LAST; i++)
 		socket_hhook_register(i);
 }
 VNET_SYSINIT(socket_vnet_init, SI_SUB_PROTO_DOMAININIT, SI_ORDER_ANY,
     socket_vnet_init, NULL);
 
 static void
 socket_vnet_uninit(const void *unused __unused)
 {
 	int i;
 
 	for (i = 0; i <= HHOOK_SOCKET_LAST; i++)
 		socket_hhook_deregister(i);
 }
 VNET_SYSUNINIT(socket_vnet_uninit, SI_SUB_PROTO_DOMAININIT, SI_ORDER_ANY,
     socket_vnet_uninit, NULL);
 
 /*
  * Initialise maxsockets.  This SYSINIT must be run after
  * tunable_mbinit().
  */
 static void
 init_maxsockets(void *ignored)
 {
 
 	TUNABLE_INT_FETCH("kern.ipc.maxsockets", &maxsockets);
 	maxsockets = imax(maxsockets, maxfiles);
 }
 SYSINIT(param, SI_SUB_TUNABLES, SI_ORDER_ANY, init_maxsockets, NULL);
 
 /*
  * Sysctl to get and set the maximum global sockets limit.  Notify protocols
  * of the change so that they can update their dependent limits as required.
  */
 static int
 sysctl_maxsockets(SYSCTL_HANDLER_ARGS)
 {
 	int error, newmaxsockets;
 
 	newmaxsockets = maxsockets;
 	error = sysctl_handle_int(oidp, &newmaxsockets, 0, req);
 	if (error == 0 && req->newptr) {
 		if (newmaxsockets > maxsockets &&
 		    newmaxsockets <= maxfiles) {
 			maxsockets = newmaxsockets;
 			EVENTHANDLER_INVOKE(maxsockets_change);
 		} else
 			error = EINVAL;
 	}
 	return (error);
 }
 SYSCTL_PROC(_kern_ipc, OID_AUTO, maxsockets, CTLTYPE_INT|CTLFLAG_RW,
     &maxsockets, 0, sysctl_maxsockets, "IU",
     "Maximum number of sockets available");
 
 /*
  * Socket operation routines.  These routines are called by the routines in
  * sys_socket.c or from a system process, and implement the semantics of
  * socket operations by switching out to the protocol specific routines.
  */
 
 /*
  * Get a socket structure from our zone, and initialize it.  Note that it
  * would probably be better to allocate socket and PCB at the same time, but
  * I'm not convinced that all the protocols can be easily modified to do
  * this.
  *
  * soalloc() returns a socket with a ref count of 0.
  */
 static struct socket *
 soalloc(struct vnet *vnet)
 {
 	struct socket *so;
 
 	so = uma_zalloc(socket_zone, M_NOWAIT | M_ZERO);
 	if (so == NULL)
 		return (NULL);
 #ifdef MAC
 	if (mac_socket_init(so, M_NOWAIT) != 0) {
 		uma_zfree(socket_zone, so);
 		return (NULL);
 	}
 #endif
 	if (khelp_init_osd(HELPER_CLASS_SOCKET, &so->osd)) {
 		uma_zfree(socket_zone, so);
 		return (NULL);
 	}
 
 	SOCKBUF_LOCK_INIT(&so->so_snd, "so_snd");
 	SOCKBUF_LOCK_INIT(&so->so_rcv, "so_rcv");
 	sx_init(&so->so_snd.sb_sx, "so_snd_sx");
 	sx_init(&so->so_rcv.sb_sx, "so_rcv_sx");
 	TAILQ_INIT(&so->so_snd.sb_aiojobq);
 	TAILQ_INIT(&so->so_rcv.sb_aiojobq);
 	TASK_INIT(&so->so_snd.sb_aiotask, 0, soaio_snd, so);
 	TASK_INIT(&so->so_rcv.sb_aiotask, 0, soaio_rcv, so);
 #ifdef VIMAGE
 	VNET_ASSERT(vnet != NULL, ("%s:%d vnet is NULL, so=%p",
 	    __func__, __LINE__, so));
 	so->so_vnet = vnet;
 #endif
 	/* We shouldn't need the so_global_mtx */
 	if (hhook_run_socket(so, NULL, HHOOK_SOCKET_CREATE)) {
 		/* Do we need more comprehensive error returns? */
 		uma_zfree(socket_zone, so);
 		return (NULL);
 	}
 	mtx_lock(&so_global_mtx);
 	so->so_gencnt = ++so_gencnt;
 	++numopensockets;
 #ifdef VIMAGE
 	vnet->vnet_sockcnt++;
 #endif
 	mtx_unlock(&so_global_mtx);
 
 	return (so);
 }
 
 /*
  * Free the storage associated with a socket at the socket layer, tear down
  * locks, labels, etc.  All protocol state is assumed already to have been
  * torn down (and possibly never set up) by the caller.
  */
 static void
 sodealloc(struct socket *so)
 {
 
 	KASSERT(so->so_count == 0, ("sodealloc(): so_count %d", so->so_count));
 	KASSERT(so->so_pcb == NULL, ("sodealloc(): so_pcb != NULL"));
 
 	mtx_lock(&so_global_mtx);
 	so->so_gencnt = ++so_gencnt;
 	--numopensockets;	/* Could be below, but faster here. */
 #ifdef VIMAGE
 	VNET_ASSERT(so->so_vnet != NULL, ("%s:%d so_vnet is NULL, so=%p",
 	    __func__, __LINE__, so));
 	so->so_vnet->vnet_sockcnt--;
 #endif
 	mtx_unlock(&so_global_mtx);
 	if (so->so_rcv.sb_hiwat)
 		(void)chgsbsize(so->so_cred->cr_uidinfo,
 		    &so->so_rcv.sb_hiwat, 0, RLIM_INFINITY);
 	if (so->so_snd.sb_hiwat)
 		(void)chgsbsize(so->so_cred->cr_uidinfo,
 		    &so->so_snd.sb_hiwat, 0, RLIM_INFINITY);
 	/* remove accept filter if one is present. */
 	if (so->so_accf != NULL)
 		do_setopt_accept_filter(so, NULL);
 #ifdef MAC
 	mac_socket_destroy(so);
 #endif
 	hhook_run_socket(so, NULL, HHOOK_SOCKET_CLOSE);
 
 	crfree(so->so_cred);
 	khelp_destroy_osd(&so->osd);
 	sx_destroy(&so->so_snd.sb_sx);
 	sx_destroy(&so->so_rcv.sb_sx);
 	SOCKBUF_LOCK_DESTROY(&so->so_snd);
 	SOCKBUF_LOCK_DESTROY(&so->so_rcv);
 	uma_zfree(socket_zone, so);
 }
 
 /*
  * socreate returns a socket with a ref count of 1.  The socket should be
  * closed with soclose().
  */
 int
 socreate(int dom, struct socket **aso, int type, int proto,
     struct ucred *cred, struct thread *td)
 {
 	struct protosw *prp;
 	struct socket *so;
 	int error;
 
 	if (proto)
 		prp = pffindproto(dom, proto, type);
 	else
 		prp = pffindtype(dom, type);
 
 	if (prp == NULL) {
 		/* No support for domain. */
 		if (pffinddomain(dom) == NULL)
 			return (EAFNOSUPPORT);
 		/* No support for socket type. */
 		if (proto == 0 && type != 0)
 			return (EPROTOTYPE);
 		return (EPROTONOSUPPORT);
 	}
 	if (prp->pr_usrreqs->pru_attach == NULL ||
 	    prp->pr_usrreqs->pru_attach == pru_attach_notsupp)
 		return (EPROTONOSUPPORT);
 
 	if (prison_check_af(cred, prp->pr_domain->dom_family) != 0)
 		return (EPROTONOSUPPORT);
 
 	if (prp->pr_type != type)
 		return (EPROTOTYPE);
 	so = soalloc(CRED_TO_VNET(cred));
 	if (so == NULL)
 		return (ENOBUFS);
 
 	TAILQ_INIT(&so->so_incomp);
 	TAILQ_INIT(&so->so_comp);
 	so->so_type = type;
 	so->so_cred = crhold(cred);
 	if ((prp->pr_domain->dom_family == PF_INET) ||
 	    (prp->pr_domain->dom_family == PF_INET6) ||
 	    (prp->pr_domain->dom_family == PF_ROUTE))
 		so->so_fibnum = td->td_proc->p_fibnum;
 	else
 		so->so_fibnum = 0;
 	so->so_proto = prp;
 #ifdef MAC
 	mac_socket_create(cred, so);
 #endif
 	knlist_init_mtx(&so->so_rcv.sb_sel.si_note, SOCKBUF_MTX(&so->so_rcv));
 	knlist_init_mtx(&so->so_snd.sb_sel.si_note, SOCKBUF_MTX(&so->so_snd));
 	so->so_count = 1;
 	/*
 	 * Auto-sizing of socket buffers is managed by the protocols and
 	 * the appropriate flags must be set in the pru_attach function.
 	 */
 	CURVNET_SET(so->so_vnet);
 	error = (*prp->pr_usrreqs->pru_attach)(so, proto, td);
 	CURVNET_RESTORE();
 	if (error) {
 		KASSERT(so->so_count == 1, ("socreate: so_count %d",
 		    so->so_count));
 		so->so_count = 0;
 		sodealloc(so);
 		return (error);
 	}
 	*aso = so;
 	return (0);
 }
 
 #ifdef REGRESSION
 static int regression_sonewconn_earlytest = 1;
 SYSCTL_INT(_regression, OID_AUTO, sonewconn_earlytest, CTLFLAG_RW,
     &regression_sonewconn_earlytest, 0, "Perform early sonewconn limit test");
 #endif
 
 /*
  * When an attempt at a new connection is noted on a socket which accepts
  * connections, sonewconn is called.  If the connection is possible (subject
  * to space constraints, etc.) then we allocate a new structure, properly
  * linked into the data structure of the original socket, and return this.
  * Connstatus may be 0, or SS_ISCONFIRMING, or SS_ISCONNECTED.
  *
  * Note: the ref count on the socket is 0 on return.
  */
 struct socket *
 sonewconn(struct socket *head, int connstatus)
 {
 	static struct timeval lastover;
 	static struct timeval overinterval = { 60, 0 };
 	static int overcount;
 
 	struct socket *so;
 	int over;
 
 	ACCEPT_LOCK();
 	over = (head->so_qlen > 3 * head->so_qlimit / 2);
 	ACCEPT_UNLOCK();
 #ifdef REGRESSION
 	if (regression_sonewconn_earlytest && over) {
 #else
 	if (over) {
 #endif
 		overcount++;
 
 		if (ratecheck(&lastover, &overinterval)) {
 			log(LOG_DEBUG, "%s: pcb %p: Listen queue overflow: "
 			    "%i already in queue awaiting acceptance "
 			    "(%d occurrences)\n",
 			    __func__, head->so_pcb, head->so_qlen, overcount);
 
 			overcount = 0;
 		}
 
 		return (NULL);
 	}
 	VNET_ASSERT(head->so_vnet != NULL, ("%s:%d so_vnet is NULL, head=%p",
 	    __func__, __LINE__, head));
 	so = soalloc(head->so_vnet);
 	if (so == NULL) {
 		log(LOG_DEBUG, "%s: pcb %p: New socket allocation failure: "
 		    "limit reached or out of memory\n",
 		    __func__, head->so_pcb);
 		return (NULL);
 	}
 	if ((head->so_options & SO_ACCEPTFILTER) != 0)
 		connstatus = 0;
 	so->so_head = head;
 	so->so_type = head->so_type;
 	so->so_options = head->so_options &~ SO_ACCEPTCONN;
 	so->so_linger = head->so_linger;
 	so->so_state = head->so_state | SS_NOFDREF;
 	so->so_fibnum = head->so_fibnum;
 	so->so_proto = head->so_proto;
 	so->so_cred = crhold(head->so_cred);
 #ifdef MAC
 	mac_socket_newconn(head, so);
 #endif
 	knlist_init_mtx(&so->so_rcv.sb_sel.si_note, SOCKBUF_MTX(&so->so_rcv));
 	knlist_init_mtx(&so->so_snd.sb_sel.si_note, SOCKBUF_MTX(&so->so_snd));
 	VNET_SO_ASSERT(head);
 	if (soreserve(so, head->so_snd.sb_hiwat, head->so_rcv.sb_hiwat)) {
 		sodealloc(so);
 		log(LOG_DEBUG, "%s: pcb %p: soreserve() failed\n",
 		    __func__, head->so_pcb);
 		return (NULL);
 	}
 	if ((*so->so_proto->pr_usrreqs->pru_attach)(so, 0, NULL)) {
 		sodealloc(so);
 		log(LOG_DEBUG, "%s: pcb %p: pru_attach() failed\n",
 		    __func__, head->so_pcb);
 		return (NULL);
 	}
 	so->so_rcv.sb_lowat = head->so_rcv.sb_lowat;
 	so->so_snd.sb_lowat = head->so_snd.sb_lowat;
 	so->so_rcv.sb_timeo = head->so_rcv.sb_timeo;
 	so->so_snd.sb_timeo = head->so_snd.sb_timeo;
 	so->so_rcv.sb_flags |= head->so_rcv.sb_flags & SB_AUTOSIZE;
 	so->so_snd.sb_flags |= head->so_snd.sb_flags & SB_AUTOSIZE;
 	so->so_state |= connstatus;
 	ACCEPT_LOCK();
 	/*
 	 * The accept socket may be tearing down but we just
 	 * won a race on the ACCEPT_LOCK.
 	 * However, if sctp_peeloff() is called on a 1-to-many
 	 * style socket, the SO_ACCEPTCONN doesn't need to be set.
 	 */
 	if (!(head->so_options & SO_ACCEPTCONN) &&
 	    ((head->so_proto->pr_protocol != IPPROTO_SCTP) ||
 	     (head->so_type != SOCK_SEQPACKET))) {
 		SOCK_LOCK(so);
 		so->so_head = NULL;
 		sofree(so);		/* NB: returns ACCEPT_UNLOCK'ed. */
 		return (NULL);
 	}
 	if (connstatus) {
 		TAILQ_INSERT_TAIL(&head->so_comp, so, so_list);
 		so->so_qstate |= SQ_COMP;
 		head->so_qlen++;
 	} else {
 		/*
 		 * Keep removing sockets from the head until there's room for
 		 * us to insert on the tail.  In pre-locking revisions, this
 		 * was a simple if(), but as we could be racing with other
 		 * threads and soabort() requires dropping locks, we must
 		 * loop waiting for the condition to be true.
 		 */
 		while (head->so_incqlen > head->so_qlimit) {
 			struct socket *sp;
 			sp = TAILQ_FIRST(&head->so_incomp);
 			TAILQ_REMOVE(&head->so_incomp, sp, so_list);
 			head->so_incqlen--;
 			sp->so_qstate &= ~SQ_INCOMP;
 			sp->so_head = NULL;
 			ACCEPT_UNLOCK();
 			soabort(sp);
 			ACCEPT_LOCK();
 		}
 		TAILQ_INSERT_TAIL(&head->so_incomp, so, so_list);
 		so->so_qstate |= SQ_INCOMP;
 		head->so_incqlen++;
 	}
 	ACCEPT_UNLOCK();
 	if (connstatus) {
 		sorwakeup(head);
 		wakeup_one(&head->so_timeo);
 	}
 	return (so);
 }
 
 int
 sobind(struct socket *so, struct sockaddr *nam, struct thread *td)
 {
 	int error;
 
 	CURVNET_SET(so->so_vnet);
 	error = (*so->so_proto->pr_usrreqs->pru_bind)(so, nam, td);
 	CURVNET_RESTORE();
 	return (error);
 }
 
 int
 sobindat(int fd, struct socket *so, struct sockaddr *nam, struct thread *td)
 {
 	int error;
 
 	CURVNET_SET(so->so_vnet);
 	error = (*so->so_proto->pr_usrreqs->pru_bindat)(fd, so, nam, td);
 	CURVNET_RESTORE();
 	return (error);
 }
 
 /*
  * solisten() transitions a socket from a non-listening state to a listening
  * state, but can also be used to update the listen queue depth on an
  * existing listen socket.  The protocol will call back into the sockets
  * layer using solisten_proto_check() and solisten_proto() to check and set
  * socket-layer listen state.  Call backs are used so that the protocol can
  * acquire both protocol and socket layer locks in whatever order is required
  * by the protocol.
  *
  * Protocol implementors are advised to hold the socket lock across the
  * socket-layer test and set to avoid races at the socket layer.
  */
 int
 solisten(struct socket *so, int backlog, struct thread *td)
 {
 	int error;
 
 	CURVNET_SET(so->so_vnet);
 	error = (*so->so_proto->pr_usrreqs->pru_listen)(so, backlog, td);
 	CURVNET_RESTORE();
 	return (error);
 }
 
 int
 solisten_proto_check(struct socket *so)
 {
 
 	SOCK_LOCK_ASSERT(so);
 
 	if (so->so_state & (SS_ISCONNECTED | SS_ISCONNECTING |
 	    SS_ISDISCONNECTING))
 		return (EINVAL);
 	return (0);
 }
 
 void
 solisten_proto(struct socket *so, int backlog)
 {
 
 	SOCK_LOCK_ASSERT(so);
 
 	if (backlog < 0 || backlog > somaxconn)
 		backlog = somaxconn;
 	so->so_qlimit = backlog;
 	so->so_options |= SO_ACCEPTCONN;
 }
 
 /*
  * Evaluate the reference count and named references on a socket; if no
  * references remain, free it.  This should be called whenever a reference is
  * released, such as in sorele(), but also when named reference flags are
  * cleared in socket or protocol code.
  *
  * sofree() will free the socket if:
  *
  * - There are no outstanding file descriptor references or related consumers
  *   (so_count == 0).
  *
  * - The socket has been closed by user space, if ever open (SS_NOFDREF).
  *
  * - The protocol does not have an outstanding strong reference on the socket
  *   (SS_PROTOREF).
  *
  * - The socket is not in a completed connection queue, so a process has been
  *   notified that it is present.  If it is removed, the user process may
  *   block in accept() despite select() saying the socket was ready.
  */
 void
 sofree(struct socket *so)
 {
 	struct protosw *pr = so->so_proto;
 	struct socket *head;
 
 	ACCEPT_LOCK_ASSERT();
 	SOCK_LOCK_ASSERT(so);
 
 	if ((so->so_state & SS_NOFDREF) == 0 || so->so_count != 0 ||
 	    (so->so_state & SS_PROTOREF) || (so->so_qstate & SQ_COMP)) {
 		SOCK_UNLOCK(so);
 		ACCEPT_UNLOCK();
 		return;
 	}
 
 	head = so->so_head;
 	if (head != NULL) {
 		KASSERT((so->so_qstate & SQ_COMP) != 0 ||
 		    (so->so_qstate & SQ_INCOMP) != 0,
 		    ("sofree: so_head != NULL, but neither SQ_COMP nor "
 		    "SQ_INCOMP"));
 		KASSERT((so->so_qstate & SQ_COMP) == 0 ||
 		    (so->so_qstate & SQ_INCOMP) == 0,
 		    ("sofree: so->so_qstate is SQ_COMP and also SQ_INCOMP"));
 		TAILQ_REMOVE(&head->so_incomp, so, so_list);
 		head->so_incqlen--;
 		so->so_qstate &= ~SQ_INCOMP;
 		so->so_head = NULL;
 	}
 	KASSERT((so->so_qstate & SQ_COMP) == 0 &&
 	    (so->so_qstate & SQ_INCOMP) == 0,
 	    ("sofree: so_head == NULL, but still SQ_COMP(%d) or SQ_INCOMP(%d)",
 	    so->so_qstate & SQ_COMP, so->so_qstate & SQ_INCOMP));
 	if (so->so_options & SO_ACCEPTCONN) {
 		KASSERT((TAILQ_EMPTY(&so->so_comp)),
 		    ("sofree: so_comp populated"));
 		KASSERT((TAILQ_EMPTY(&so->so_incomp)),
 		    ("sofree: so_incomp populated"));
 	}
 	SOCK_UNLOCK(so);
 	ACCEPT_UNLOCK();
 
 	VNET_SO_ASSERT(so);
 	if (pr->pr_flags & PR_RIGHTS && pr->pr_domain->dom_dispose != NULL)
 		(*pr->pr_domain->dom_dispose)(so);
 	if (pr->pr_usrreqs->pru_detach != NULL)
 		(*pr->pr_usrreqs->pru_detach)(so);
 
 	/*
 	 * From this point on, we assume that no other references to this
 	 * socket exist anywhere else in the stack.  Therefore, no locks need
 	 * to be acquired or held.
 	 *
 	 * We used to do a lot of socket buffer and socket locking here, as
 	 * well as invoke sorflush() and perform wakeups.  The direct call to
 	 * dom_dispose() and sbrelease_internal() are an inlining of what was
 	 * necessary from sorflush().
 	 *
 	 * Notice that the socket buffer and kqueue state are torn down
 	 * before calling pru_detach.  This means that protocols shold not
 	 * assume they can perform socket wakeups, etc, in their detach code.
 	 */
 	sbdestroy(&so->so_snd, so);
 	sbdestroy(&so->so_rcv, so);
 	seldrain(&so->so_snd.sb_sel);
 	seldrain(&so->so_rcv.sb_sel);
 	knlist_destroy(&so->so_rcv.sb_sel.si_note);
 	knlist_destroy(&so->so_snd.sb_sel.si_note);
 	sodealloc(so);
 }
 
 /*
  * Close a socket on last file table reference removal.  Initiate disconnect
  * if connected.  Free socket when disconnect complete.
  *
  * This function will sorele() the socket.  Note that soclose() may be called
  * prior to the ref count reaching zero.  The actual socket structure will
  * not be freed until the ref count reaches zero.
  */
 int
 soclose(struct socket *so)
 {
 	int error = 0;
 
 	KASSERT(!(so->so_state & SS_NOFDREF), ("soclose: SS_NOFDREF on enter"));
 
 	CURVNET_SET(so->so_vnet);
 	funsetown(&so->so_sigio);
 	if (so->so_state & SS_ISCONNECTED) {
 		if ((so->so_state & SS_ISDISCONNECTING) == 0) {
 			error = sodisconnect(so);
 			if (error) {
 				if (error == ENOTCONN)
 					error = 0;
 				goto drop;
 			}
 		}
 		if (so->so_options & SO_LINGER) {
 			if ((so->so_state & SS_ISDISCONNECTING) &&
 			    (so->so_state & SS_NBIO))
 				goto drop;
 			while (so->so_state & SS_ISCONNECTED) {
 				error = tsleep(&so->so_timeo,
 				    PSOCK | PCATCH, "soclos",
 				    so->so_linger * hz);
 				if (error)
 					break;
 			}
 		}
 	}
 
 drop:
 	if (so->so_proto->pr_usrreqs->pru_close != NULL)
 		(*so->so_proto->pr_usrreqs->pru_close)(so);
 	ACCEPT_LOCK();
 	if (so->so_options & SO_ACCEPTCONN) {
 		struct socket *sp;
 		/*
 		 * Prevent new additions to the accept queues due
 		 * to ACCEPT_LOCK races while we are draining them.
 		 */
 		so->so_options &= ~SO_ACCEPTCONN;
 		while ((sp = TAILQ_FIRST(&so->so_incomp)) != NULL) {
 			TAILQ_REMOVE(&so->so_incomp, sp, so_list);
 			so->so_incqlen--;
 			sp->so_qstate &= ~SQ_INCOMP;
 			sp->so_head = NULL;
 			ACCEPT_UNLOCK();
 			soabort(sp);
 			ACCEPT_LOCK();
 		}
 		while ((sp = TAILQ_FIRST(&so->so_comp)) != NULL) {
 			TAILQ_REMOVE(&so->so_comp, sp, so_list);
 			so->so_qlen--;
 			sp->so_qstate &= ~SQ_COMP;
 			sp->so_head = NULL;
 			ACCEPT_UNLOCK();
 			soabort(sp);
 			ACCEPT_LOCK();
 		}
 		KASSERT((TAILQ_EMPTY(&so->so_comp)),
 		    ("%s: so_comp populated", __func__));
 		KASSERT((TAILQ_EMPTY(&so->so_incomp)),
 		    ("%s: so_incomp populated", __func__));
 	}
 	SOCK_LOCK(so);
 	KASSERT((so->so_state & SS_NOFDREF) == 0, ("soclose: NOFDREF"));
 	so->so_state |= SS_NOFDREF;
 	sorele(so);			/* NB: Returns with ACCEPT_UNLOCK(). */
 	CURVNET_RESTORE();
 	return (error);
 }
 
 /*
  * soabort() is used to abruptly tear down a connection, such as when a
  * resource limit is reached (listen queue depth exceeded), or if a listen
  * socket is closed while there are sockets waiting to be accepted.
  *
  * This interface is tricky, because it is called on an unreferenced socket,
  * and must be called only by a thread that has actually removed the socket
  * from the listen queue it was on, or races with other threads are risked.
  *
  * This interface will call into the protocol code, so must not be called
  * with any socket locks held.  Protocols do call it while holding their own
  * recursible protocol mutexes, but this is something that should be subject
  * to review in the future.
  */
 void
 soabort(struct socket *so)
 {
 
 	/*
 	 * In as much as is possible, assert that no references to this
 	 * socket are held.  This is not quite the same as asserting that the
 	 * current thread is responsible for arranging for no references, but
 	 * is as close as we can get for now.
 	 */
 	KASSERT(so->so_count == 0, ("soabort: so_count"));
 	KASSERT((so->so_state & SS_PROTOREF) == 0, ("soabort: SS_PROTOREF"));
 	KASSERT(so->so_state & SS_NOFDREF, ("soabort: !SS_NOFDREF"));
 	KASSERT((so->so_state & SQ_COMP) == 0, ("soabort: SQ_COMP"));
 	KASSERT((so->so_state & SQ_INCOMP) == 0, ("soabort: SQ_INCOMP"));
 	VNET_SO_ASSERT(so);
 
 	if (so->so_proto->pr_usrreqs->pru_abort != NULL)
 		(*so->so_proto->pr_usrreqs->pru_abort)(so);
 	ACCEPT_LOCK();
 	SOCK_LOCK(so);
 	sofree(so);
 }
 
 int
 soaccept(struct socket *so, struct sockaddr **nam)
 {
 	int error;
 
 	SOCK_LOCK(so);
 	KASSERT((so->so_state & SS_NOFDREF) != 0, ("soaccept: !NOFDREF"));
 	so->so_state &= ~SS_NOFDREF;
 	SOCK_UNLOCK(so);
 
 	CURVNET_SET(so->so_vnet);
 	error = (*so->so_proto->pr_usrreqs->pru_accept)(so, nam);
 	CURVNET_RESTORE();
 	return (error);
 }
 
 int
 soconnect(struct socket *so, struct sockaddr *nam, struct thread *td)
 {
 
 	return (soconnectat(AT_FDCWD, so, nam, td));
 }
 
 int
 soconnectat(int fd, struct socket *so, struct sockaddr *nam, struct thread *td)
 {
 	int error;
 
 	if (so->so_options & SO_ACCEPTCONN)
 		return (EOPNOTSUPP);
 
 	CURVNET_SET(so->so_vnet);
 	/*
 	 * If protocol is connection-based, can only connect once.
 	 * Otherwise, if connected, try to disconnect first.  This allows
 	 * user to disconnect by connecting to, e.g., a null address.
 	 */
 	if (so->so_state & (SS_ISCONNECTED|SS_ISCONNECTING) &&
 	    ((so->so_proto->pr_flags & PR_CONNREQUIRED) ||
 	    (error = sodisconnect(so)))) {
 		error = EISCONN;
 	} else {
 		/*
 		 * Prevent accumulated error from previous connection from
 		 * biting us.
 		 */
 		so->so_error = 0;
 		if (fd == AT_FDCWD) {
 			error = (*so->so_proto->pr_usrreqs->pru_connect)(so,
 			    nam, td);
 		} else {
 			error = (*so->so_proto->pr_usrreqs->pru_connectat)(fd,
 			    so, nam, td);
 		}
 	}
 	CURVNET_RESTORE();
 
 	return (error);
 }
 
 int
 soconnect2(struct socket *so1, struct socket *so2)
 {
 	int error;
 
 	CURVNET_SET(so1->so_vnet);
 	error = (*so1->so_proto->pr_usrreqs->pru_connect2)(so1, so2);
 	CURVNET_RESTORE();
 	return (error);
 }
 
 int
 sodisconnect(struct socket *so)
 {
 	int error;
 
 	if ((so->so_state & SS_ISCONNECTED) == 0)
 		return (ENOTCONN);
 	if (so->so_state & SS_ISDISCONNECTING)
 		return (EALREADY);
 	VNET_SO_ASSERT(so);
 	error = (*so->so_proto->pr_usrreqs->pru_disconnect)(so);
 	return (error);
 }
 
 #define	SBLOCKWAIT(f)	(((f) & MSG_DONTWAIT) ? 0 : SBL_WAIT)
 
 int
 sosend_dgram(struct socket *so, struct sockaddr *addr, struct uio *uio,
     struct mbuf *top, struct mbuf *control, int flags, struct thread *td)
 {
 	long space;
 	ssize_t resid;
 	int clen = 0, error, dontroute;
 
 	KASSERT(so->so_type == SOCK_DGRAM, ("sosend_dgram: !SOCK_DGRAM"));
 	KASSERT(so->so_proto->pr_flags & PR_ATOMIC,
 	    ("sosend_dgram: !PR_ATOMIC"));
 
 	if (uio != NULL)
 		resid = uio->uio_resid;
 	else
 		resid = top->m_pkthdr.len;
 	/*
 	 * In theory resid should be unsigned.  However, space must be
 	 * signed, as it might be less than 0 if we over-committed, and we
 	 * must use a signed comparison of space and resid.  On the other
 	 * hand, a negative resid causes us to loop sending 0-length
 	 * segments to the protocol.
 	 */
 	if (resid < 0) {
 		error = EINVAL;
 		goto out;
 	}
 
 	dontroute =
 	    (flags & MSG_DONTROUTE) && (so->so_options & SO_DONTROUTE) == 0;
 	if (td != NULL)
 		td->td_ru.ru_msgsnd++;
 	if (control != NULL)
 		clen = control->m_len;
 
 	SOCKBUF_LOCK(&so->so_snd);
 	if (so->so_snd.sb_state & SBS_CANTSENDMORE) {
 		SOCKBUF_UNLOCK(&so->so_snd);
 		error = EPIPE;
 		goto out;
 	}
 	if (so->so_error) {
 		error = so->so_error;
 		so->so_error = 0;
 		SOCKBUF_UNLOCK(&so->so_snd);
 		goto out;
 	}
 	if ((so->so_state & SS_ISCONNECTED) == 0) {
 		/*
 		 * `sendto' and `sendmsg' is allowed on a connection-based
 		 * socket if it supports implied connect.  Return ENOTCONN if
 		 * not connected and no address is supplied.
 		 */
 		if ((so->so_proto->pr_flags & PR_CONNREQUIRED) &&
 		    (so->so_proto->pr_flags & PR_IMPLOPCL) == 0) {
 			if ((so->so_state & SS_ISCONFIRMING) == 0 &&
 			    !(resid == 0 && clen != 0)) {
 				SOCKBUF_UNLOCK(&so->so_snd);
 				error = ENOTCONN;
 				goto out;
 			}
 		} else if (addr == NULL) {
 			if (so->so_proto->pr_flags & PR_CONNREQUIRED)
 				error = ENOTCONN;
 			else
 				error = EDESTADDRREQ;
 			SOCKBUF_UNLOCK(&so->so_snd);
 			goto out;
 		}
 	}
 
 	/*
 	 * Do we need MSG_OOB support in SOCK_DGRAM?  Signs here may be a
 	 * problem and need fixing.
 	 */
 	space = sbspace(&so->so_snd);
 	if (flags & MSG_OOB)
 		space += 1024;
 	space -= clen;
 	SOCKBUF_UNLOCK(&so->so_snd);
 	if (resid > space) {
 		error = EMSGSIZE;
 		goto out;
 	}
 	if (uio == NULL) {
 		resid = 0;
 		if (flags & MSG_EOR)
 			top->m_flags |= M_EOR;
 	} else {
 		/*
 		 * Copy the data from userland into a mbuf chain.
 		 * If no data is to be copied in, a single empty mbuf
 		 * is returned.
 		 */
 		top = m_uiotombuf(uio, M_WAITOK, space, max_hdr,
 		    (M_PKTHDR | ((flags & MSG_EOR) ? M_EOR : 0)));
 		if (top == NULL) {
 			error = EFAULT;	/* only possible error */
 			goto out;
 		}
 		space -= resid - uio->uio_resid;
 		resid = uio->uio_resid;
 	}
 	KASSERT(resid == 0, ("sosend_dgram: resid != 0"));
 	/*
 	 * XXXRW: Frobbing SO_DONTROUTE here is even worse without sblock
 	 * than with.
 	 */
 	if (dontroute) {
 		SOCK_LOCK(so);
 		so->so_options |= SO_DONTROUTE;
 		SOCK_UNLOCK(so);
 	}
 	/*
 	 * XXX all the SBS_CANTSENDMORE checks previously done could be out
 	 * of date.  We could have received a reset packet in an interrupt or
 	 * maybe we slept while doing page faults in uiomove() etc.  We could
 	 * probably recheck again inside the locking protection here, but
 	 * there are probably other places that this also happens.  We must
 	 * rethink this.
 	 */
 	VNET_SO_ASSERT(so);
 	error = (*so->so_proto->pr_usrreqs->pru_send)(so,
 	    (flags & MSG_OOB) ? PRUS_OOB :
 	/*
 	 * If the user set MSG_EOF, the protocol understands this flag and
 	 * nothing left to send then use PRU_SEND_EOF instead of PRU_SEND.
 	 */
 	    ((flags & MSG_EOF) &&
 	     (so->so_proto->pr_flags & PR_IMPLOPCL) &&
 	     (resid <= 0)) ?
 		PRUS_EOF :
 		/* If there is more to send set PRUS_MORETOCOME */
 		(resid > 0 && space > 0) ? PRUS_MORETOCOME : 0,
 		top, addr, control, td);
 	if (dontroute) {
 		SOCK_LOCK(so);
 		so->so_options &= ~SO_DONTROUTE;
 		SOCK_UNLOCK(so);
 	}
 	clen = 0;
 	control = NULL;
 	top = NULL;
 out:
 	if (top != NULL)
 		m_freem(top);
 	if (control != NULL)
 		m_freem(control);
 	return (error);
 }
 
 /*
  * Send on a socket.  If send must go all at once and message is larger than
  * send buffering, then hard error.  Lock against other senders.  If must go
  * all at once and not enough room now, then inform user that this would
  * block and do nothing.  Otherwise, if nonblocking, send as much as
  * possible.  The data to be sent is described by "uio" if nonzero, otherwise
  * by the mbuf chain "top" (which must be null if uio is not).  Data provided
  * in mbuf chain must be small enough to send all at once.
  *
  * Returns nonzero on error, timeout or signal; callers must check for short
  * counts if EINTR/ERESTART are returned.  Data and control buffers are freed
  * on return.
  */
 int
 sosend_generic(struct socket *so, struct sockaddr *addr, struct uio *uio,
     struct mbuf *top, struct mbuf *control, int flags, struct thread *td)
 {
 	long space;
 	ssize_t resid;
 	int clen = 0, error, dontroute;
 	int atomic = sosendallatonce(so) || top;
 
 	if (uio != NULL)
 		resid = uio->uio_resid;
 	else
 		resid = top->m_pkthdr.len;
 	/*
 	 * In theory resid should be unsigned.  However, space must be
 	 * signed, as it might be less than 0 if we over-committed, and we
 	 * must use a signed comparison of space and resid.  On the other
 	 * hand, a negative resid causes us to loop sending 0-length
 	 * segments to the protocol.
 	 *
 	 * Also check to make sure that MSG_EOR isn't used on SOCK_STREAM
 	 * type sockets since that's an error.
 	 */
 	if (resid < 0 || (so->so_type == SOCK_STREAM && (flags & MSG_EOR))) {
 		error = EINVAL;
 		goto out;
 	}
 
 	dontroute =
 	    (flags & MSG_DONTROUTE) && (so->so_options & SO_DONTROUTE) == 0 &&
 	    (so->so_proto->pr_flags & PR_ATOMIC);
 	if (td != NULL)
 		td->td_ru.ru_msgsnd++;
 	if (control != NULL)
 		clen = control->m_len;
 
 	error = sblock(&so->so_snd, SBLOCKWAIT(flags));
 	if (error)
 		goto out;
 
 restart:
 	do {
 		SOCKBUF_LOCK(&so->so_snd);
 		if (so->so_snd.sb_state & SBS_CANTSENDMORE) {
 			SOCKBUF_UNLOCK(&so->so_snd);
 			error = EPIPE;
 			goto release;
 		}
 		if (so->so_error) {
 			error = so->so_error;
 			so->so_error = 0;
 			SOCKBUF_UNLOCK(&so->so_snd);
 			goto release;
 		}
 		if ((so->so_state & SS_ISCONNECTED) == 0) {
 			/*
 			 * `sendto' and `sendmsg' is allowed on a connection-
 			 * based socket if it supports implied connect.
 			 * Return ENOTCONN if not connected and no address is
 			 * supplied.
 			 */
 			if ((so->so_proto->pr_flags & PR_CONNREQUIRED) &&
 			    (so->so_proto->pr_flags & PR_IMPLOPCL) == 0) {
 				if ((so->so_state & SS_ISCONFIRMING) == 0 &&
 				    !(resid == 0 && clen != 0)) {
 					SOCKBUF_UNLOCK(&so->so_snd);
 					error = ENOTCONN;
 					goto release;
 				}
 			} else if (addr == NULL) {
 				SOCKBUF_UNLOCK(&so->so_snd);
 				if (so->so_proto->pr_flags & PR_CONNREQUIRED)
 					error = ENOTCONN;
 				else
 					error = EDESTADDRREQ;
 				goto release;
 			}
 		}
 		space = sbspace(&so->so_snd);
 		if (flags & MSG_OOB)
 			space += 1024;
 		if ((atomic && resid > so->so_snd.sb_hiwat) ||
 		    clen > so->so_snd.sb_hiwat) {
 			SOCKBUF_UNLOCK(&so->so_snd);
 			error = EMSGSIZE;
 			goto release;
 		}
 		if (space < resid + clen &&
 		    (atomic || space < so->so_snd.sb_lowat || space < clen)) {
 			if ((so->so_state & SS_NBIO) || (flags & MSG_NBIO)) {
 				SOCKBUF_UNLOCK(&so->so_snd);
 				error = EWOULDBLOCK;
 				goto release;
 			}
 			error = sbwait(&so->so_snd);
 			SOCKBUF_UNLOCK(&so->so_snd);
 			if (error)
 				goto release;
 			goto restart;
 		}
 		SOCKBUF_UNLOCK(&so->so_snd);
 		space -= clen;
 		do {
 			if (uio == NULL) {
 				resid = 0;
 				if (flags & MSG_EOR)
 					top->m_flags |= M_EOR;
 			} else {
 				/*
 				 * Copy the data from userland into a mbuf
 				 * chain.  If resid is 0, which can happen
 				 * only if we have control to send, then
 				 * a single empty mbuf is returned.  This
 				 * is a workaround to prevent protocol send
 				 * methods to panic.
 				 */
 				top = m_uiotombuf(uio, M_WAITOK, space,
 				    (atomic ? max_hdr : 0),
 				    (atomic ? M_PKTHDR : 0) |
 				    ((flags & MSG_EOR) ? M_EOR : 0));
 				if (top == NULL) {
 					error = EFAULT; /* only possible error */
 					goto release;
 				}
 				space -= resid - uio->uio_resid;
 				resid = uio->uio_resid;
 			}
 			if (dontroute) {
 				SOCK_LOCK(so);
 				so->so_options |= SO_DONTROUTE;
 				SOCK_UNLOCK(so);
 			}
 			/*
 			 * XXX all the SBS_CANTSENDMORE checks previously
 			 * done could be out of date.  We could have received
 			 * a reset packet in an interrupt or maybe we slept
 			 * while doing page faults in uiomove() etc.  We
 			 * could probably recheck again inside the locking
 			 * protection here, but there are probably other
 			 * places that this also happens.  We must rethink
 			 * this.
 			 */
 			VNET_SO_ASSERT(so);
 			error = (*so->so_proto->pr_usrreqs->pru_send)(so,
 			    (flags & MSG_OOB) ? PRUS_OOB :
 			/*
 			 * If the user set MSG_EOF, the protocol understands
 			 * this flag and nothing left to send then use
 			 * PRU_SEND_EOF instead of PRU_SEND.
 			 */
 			    ((flags & MSG_EOF) &&
 			     (so->so_proto->pr_flags & PR_IMPLOPCL) &&
 			     (resid <= 0)) ?
 				PRUS_EOF :
 			/* If there is more to send set PRUS_MORETOCOME. */
 			    (resid > 0 && space > 0) ? PRUS_MORETOCOME : 0,
 			    top, addr, control, td);
 			if (dontroute) {
 				SOCK_LOCK(so);
 				so->so_options &= ~SO_DONTROUTE;
 				SOCK_UNLOCK(so);
 			}
 			clen = 0;
 			control = NULL;
 			top = NULL;
 			if (error)
 				goto release;
 		} while (resid && space > 0);
 	} while (resid);
 
 release:
 	sbunlock(&so->so_snd);
 out:
 	if (top != NULL)
 		m_freem(top);
 	if (control != NULL)
 		m_freem(control);
 	return (error);
 }
 
 int
 sosend(struct socket *so, struct sockaddr *addr, struct uio *uio,
     struct mbuf *top, struct mbuf *control, int flags, struct thread *td)
 {
 	int error;
 
 	CURVNET_SET(so->so_vnet);
 	error = so->so_proto->pr_usrreqs->pru_sosend(so, addr, uio, top,
 	    control, flags, td);
 	CURVNET_RESTORE();
 	return (error);
 }
 
 /*
  * The part of soreceive() that implements reading non-inline out-of-band
  * data from a socket.  For more complete comments, see soreceive(), from
  * which this code originated.
  *
  * Note that soreceive_rcvoob(), unlike the remainder of soreceive(), is
  * unable to return an mbuf chain to the caller.
  */
 static int
 soreceive_rcvoob(struct socket *so, struct uio *uio, int flags)
 {
 	struct protosw *pr = so->so_proto;
 	struct mbuf *m;
 	int error;
 
 	KASSERT(flags & MSG_OOB, ("soreceive_rcvoob: (flags & MSG_OOB) == 0"));
 	VNET_SO_ASSERT(so);
 
 	m = m_get(M_WAITOK, MT_DATA);
 	error = (*pr->pr_usrreqs->pru_rcvoob)(so, m, flags & MSG_PEEK);
 	if (error)
 		goto bad;
 	do {
 		error = uiomove(mtod(m, void *),
 		    (int) min(uio->uio_resid, m->m_len), uio);
 		m = m_free(m);
 	} while (uio->uio_resid && error == 0 && m);
 bad:
 	if (m != NULL)
 		m_freem(m);
 	return (error);
 }
 
 /*
  * Following replacement or removal of the first mbuf on the first mbuf chain
  * of a socket buffer, push necessary state changes back into the socket
  * buffer so that other consumers see the values consistently.  'nextrecord'
  * is the callers locally stored value of the original value of
  * sb->sb_mb->m_nextpkt which must be restored when the lead mbuf changes.
  * NOTE: 'nextrecord' may be NULL.
  */
 static __inline void
 sockbuf_pushsync(struct sockbuf *sb, struct mbuf *nextrecord)
 {
 
 	SOCKBUF_LOCK_ASSERT(sb);
 	/*
 	 * First, update for the new value of nextrecord.  If necessary, make
 	 * it the first record.
 	 */
 	if (sb->sb_mb != NULL)
 		sb->sb_mb->m_nextpkt = nextrecord;
 	else
 		sb->sb_mb = nextrecord;
 
 	/*
 	 * Now update any dependent socket buffer fields to reflect the new
 	 * state.  This is an expanded inline of SB_EMPTY_FIXUP(), with the
 	 * addition of a second clause that takes care of the case where
 	 * sb_mb has been updated, but remains the last record.
 	 */
 	if (sb->sb_mb == NULL) {
 		sb->sb_mbtail = NULL;
 		sb->sb_lastrecord = NULL;
 	} else if (sb->sb_mb->m_nextpkt == NULL)
 		sb->sb_lastrecord = sb->sb_mb;
 }
 
 /*
  * Implement receive operations on a socket.  We depend on the way that
  * records are added to the sockbuf by sbappend.  In particular, each record
  * (mbufs linked through m_next) must begin with an address if the protocol
  * so specifies, followed by an optional mbuf or mbufs containing ancillary
  * data, and then zero or more mbufs of data.  In order to allow parallelism
  * between network receive and copying to user space, as well as avoid
  * sleeping with a mutex held, we release the socket buffer mutex during the
  * user space copy.  Although the sockbuf is locked, new data may still be
  * appended, and thus we must maintain consistency of the sockbuf during that
  * time.
  *
  * The caller may receive the data as a single mbuf chain by supplying an
  * mbuf **mp0 for use in returning the chain.  The uio is then used only for
  * the count in uio_resid.
  */
 int
 soreceive_generic(struct socket *so, struct sockaddr **psa, struct uio *uio,
     struct mbuf **mp0, struct mbuf **controlp, int *flagsp)
 {
 	struct mbuf *m, **mp;
 	int flags, error, offset;
 	ssize_t len;
 	struct protosw *pr = so->so_proto;
 	struct mbuf *nextrecord;
 	int moff, type = 0;
 	ssize_t orig_resid = uio->uio_resid;
 
 	mp = mp0;
 	if (psa != NULL)
 		*psa = NULL;
 	if (controlp != NULL)
 		*controlp = NULL;
 	if (flagsp != NULL)
 		flags = *flagsp &~ MSG_EOR;
 	else
 		flags = 0;
 	if (flags & MSG_OOB)
 		return (soreceive_rcvoob(so, uio, flags));
 	if (mp != NULL)
 		*mp = NULL;
 	if ((pr->pr_flags & PR_WANTRCVD) && (so->so_state & SS_ISCONFIRMING)
 	    && uio->uio_resid) {
 		VNET_SO_ASSERT(so);
 		(*pr->pr_usrreqs->pru_rcvd)(so, 0);
 	}
 
 	error = sblock(&so->so_rcv, SBLOCKWAIT(flags));
 	if (error)
 		return (error);
 
 restart:
 	SOCKBUF_LOCK(&so->so_rcv);
 	m = so->so_rcv.sb_mb;
 	/*
 	 * If we have less data than requested, block awaiting more (subject
 	 * to any timeout) if:
 	 *   1. the current count is less than the low water mark, or
 	 *   2. MSG_DONTWAIT is not set
 	 */
 	if (m == NULL || (((flags & MSG_DONTWAIT) == 0 &&
 	    sbavail(&so->so_rcv) < uio->uio_resid) &&
 	    sbavail(&so->so_rcv) < so->so_rcv.sb_lowat &&
 	    m->m_nextpkt == NULL && (pr->pr_flags & PR_ATOMIC) == 0)) {
 		KASSERT(m != NULL || !sbavail(&so->so_rcv),
 		    ("receive: m == %p sbavail == %u",
 		    m, sbavail(&so->so_rcv)));
 		if (so->so_error) {
 			if (m != NULL)
 				goto dontblock;
 			error = so->so_error;
 			if ((flags & MSG_PEEK) == 0)
 				so->so_error = 0;
 			SOCKBUF_UNLOCK(&so->so_rcv);
 			goto release;
 		}
 		SOCKBUF_LOCK_ASSERT(&so->so_rcv);
 		if (so->so_rcv.sb_state & SBS_CANTRCVMORE) {
 			if (m == NULL) {
 				SOCKBUF_UNLOCK(&so->so_rcv);
 				goto release;
 			} else
 				goto dontblock;
 		}
 		for (; m != NULL; m = m->m_next)
 			if (m->m_type == MT_OOBDATA  || (m->m_flags & M_EOR)) {
 				m = so->so_rcv.sb_mb;
 				goto dontblock;
 			}
 		if ((so->so_state & (SS_ISCONNECTED|SS_ISCONNECTING)) == 0 &&
 		    (so->so_proto->pr_flags & PR_CONNREQUIRED)) {
 			SOCKBUF_UNLOCK(&so->so_rcv);
 			error = ENOTCONN;
 			goto release;
 		}
 		if (uio->uio_resid == 0) {
 			SOCKBUF_UNLOCK(&so->so_rcv);
 			goto release;
 		}
 		if ((so->so_state & SS_NBIO) ||
 		    (flags & (MSG_DONTWAIT|MSG_NBIO))) {
 			SOCKBUF_UNLOCK(&so->so_rcv);
 			error = EWOULDBLOCK;
 			goto release;
 		}
 		SBLASTRECORDCHK(&so->so_rcv);
 		SBLASTMBUFCHK(&so->so_rcv);
 		error = sbwait(&so->so_rcv);
 		SOCKBUF_UNLOCK(&so->so_rcv);
 		if (error)
 			goto release;
 		goto restart;
 	}
 dontblock:
 	/*
 	 * From this point onward, we maintain 'nextrecord' as a cache of the
 	 * pointer to the next record in the socket buffer.  We must keep the
 	 * various socket buffer pointers and local stack versions of the
 	 * pointers in sync, pushing out modifications before dropping the
 	 * socket buffer mutex, and re-reading them when picking it up.
 	 *
 	 * Otherwise, we will race with the network stack appending new data
 	 * or records onto the socket buffer by using inconsistent/stale
 	 * versions of the field, possibly resulting in socket buffer
 	 * corruption.
 	 *
 	 * By holding the high-level sblock(), we prevent simultaneous
 	 * readers from pulling off the front of the socket buffer.
 	 */
 	SOCKBUF_LOCK_ASSERT(&so->so_rcv);
 	if (uio->uio_td)
 		uio->uio_td->td_ru.ru_msgrcv++;
 	KASSERT(m == so->so_rcv.sb_mb, ("soreceive: m != so->so_rcv.sb_mb"));
 	SBLASTRECORDCHK(&so->so_rcv);
 	SBLASTMBUFCHK(&so->so_rcv);
 	nextrecord = m->m_nextpkt;
 	if (pr->pr_flags & PR_ADDR) {
 		KASSERT(m->m_type == MT_SONAME,
 		    ("m->m_type == %d", m->m_type));
 		orig_resid = 0;
 		if (psa != NULL)
 			*psa = sodupsockaddr(mtod(m, struct sockaddr *),
 			    M_NOWAIT);
 		if (flags & MSG_PEEK) {
 			m = m->m_next;
 		} else {
 			sbfree(&so->so_rcv, m);
 			so->so_rcv.sb_mb = m_free(m);
 			m = so->so_rcv.sb_mb;
 			sockbuf_pushsync(&so->so_rcv, nextrecord);
 		}
 	}
 
 	/*
 	 * Process one or more MT_CONTROL mbufs present before any data mbufs
 	 * in the first mbuf chain on the socket buffer.  If MSG_PEEK, we
 	 * just copy the data; if !MSG_PEEK, we call into the protocol to
 	 * perform externalization (or freeing if controlp == NULL).
 	 */
 	if (m != NULL && m->m_type == MT_CONTROL) {
 		struct mbuf *cm = NULL, *cmn;
 		struct mbuf **cme = &cm;
 
 		do {
 			if (flags & MSG_PEEK) {
 				if (controlp != NULL) {
-					*controlp = m_copy(m, 0, m->m_len);
+					*controlp = m_copym(m, 0, m->m_len,
+					    M_NOWAIT);
 					controlp = &(*controlp)->m_next;
 				}
 				m = m->m_next;
 			} else {
 				sbfree(&so->so_rcv, m);
 				so->so_rcv.sb_mb = m->m_next;
 				m->m_next = NULL;
 				*cme = m;
 				cme = &(*cme)->m_next;
 				m = so->so_rcv.sb_mb;
 			}
 		} while (m != NULL && m->m_type == MT_CONTROL);
 		if ((flags & MSG_PEEK) == 0)
 			sockbuf_pushsync(&so->so_rcv, nextrecord);
 		while (cm != NULL) {
 			cmn = cm->m_next;
 			cm->m_next = NULL;
 			if (pr->pr_domain->dom_externalize != NULL) {
 				SOCKBUF_UNLOCK(&so->so_rcv);
 				VNET_SO_ASSERT(so);
 				error = (*pr->pr_domain->dom_externalize)
 				    (cm, controlp, flags);
 				SOCKBUF_LOCK(&so->so_rcv);
 			} else if (controlp != NULL)
 				*controlp = cm;
 			else
 				m_freem(cm);
 			if (controlp != NULL) {
 				orig_resid = 0;
 				while (*controlp != NULL)
 					controlp = &(*controlp)->m_next;
 			}
 			cm = cmn;
 		}
 		if (m != NULL)
 			nextrecord = so->so_rcv.sb_mb->m_nextpkt;
 		else
 			nextrecord = so->so_rcv.sb_mb;
 		orig_resid = 0;
 	}
 	if (m != NULL) {
 		if ((flags & MSG_PEEK) == 0) {
 			KASSERT(m->m_nextpkt == nextrecord,
 			    ("soreceive: post-control, nextrecord !sync"));
 			if (nextrecord == NULL) {
 				KASSERT(so->so_rcv.sb_mb == m,
 				    ("soreceive: post-control, sb_mb!=m"));
 				KASSERT(so->so_rcv.sb_lastrecord == m,
 				    ("soreceive: post-control, lastrecord!=m"));
 			}
 		}
 		type = m->m_type;
 		if (type == MT_OOBDATA)
 			flags |= MSG_OOB;
 	} else {
 		if ((flags & MSG_PEEK) == 0) {
 			KASSERT(so->so_rcv.sb_mb == nextrecord,
 			    ("soreceive: sb_mb != nextrecord"));
 			if (so->so_rcv.sb_mb == NULL) {
 				KASSERT(so->so_rcv.sb_lastrecord == NULL,
 				    ("soreceive: sb_lastercord != NULL"));
 			}
 		}
 	}
 	SOCKBUF_LOCK_ASSERT(&so->so_rcv);
 	SBLASTRECORDCHK(&so->so_rcv);
 	SBLASTMBUFCHK(&so->so_rcv);
 
 	/*
 	 * Now continue to read any data mbufs off of the head of the socket
 	 * buffer until the read request is satisfied.  Note that 'type' is
 	 * used to store the type of any mbuf reads that have happened so far
 	 * such that soreceive() can stop reading if the type changes, which
 	 * causes soreceive() to return only one of regular data and inline
 	 * out-of-band data in a single socket receive operation.
 	 */
 	moff = 0;
 	offset = 0;
 	while (m != NULL && !(m->m_flags & M_NOTAVAIL) && uio->uio_resid > 0
 	    && error == 0) {
 		/*
 		 * If the type of mbuf has changed since the last mbuf
 		 * examined ('type'), end the receive operation.
 		 */
 		SOCKBUF_LOCK_ASSERT(&so->so_rcv);
 		if (m->m_type == MT_OOBDATA || m->m_type == MT_CONTROL) {
 			if (type != m->m_type)
 				break;
 		} else if (type == MT_OOBDATA)
 			break;
 		else
 		    KASSERT(m->m_type == MT_DATA,
 			("m->m_type == %d", m->m_type));
 		so->so_rcv.sb_state &= ~SBS_RCVATMARK;
 		len = uio->uio_resid;
 		if (so->so_oobmark && len > so->so_oobmark - offset)
 			len = so->so_oobmark - offset;
 		if (len > m->m_len - moff)
 			len = m->m_len - moff;
 		/*
 		 * If mp is set, just pass back the mbufs.  Otherwise copy
 		 * them out via the uio, then free.  Sockbuf must be
 		 * consistent here (points to current mbuf, it points to next
 		 * record) when we drop priority; we must note any additions
 		 * to the sockbuf when we block interrupts again.
 		 */
 		if (mp == NULL) {
 			SOCKBUF_LOCK_ASSERT(&so->so_rcv);
 			SBLASTRECORDCHK(&so->so_rcv);
 			SBLASTMBUFCHK(&so->so_rcv);
 			SOCKBUF_UNLOCK(&so->so_rcv);
 			error = uiomove(mtod(m, char *) + moff, (int)len, uio);
 			SOCKBUF_LOCK(&so->so_rcv);
 			if (error) {
 				/*
 				 * The MT_SONAME mbuf has already been removed
 				 * from the record, so it is necessary to
 				 * remove the data mbufs, if any, to preserve
 				 * the invariant in the case of PR_ADDR that
 				 * requires MT_SONAME mbufs at the head of
 				 * each record.
 				 */
 				if (m && pr->pr_flags & PR_ATOMIC &&
 				    ((flags & MSG_PEEK) == 0))
 					(void)sbdroprecord_locked(&so->so_rcv);
 				SOCKBUF_UNLOCK(&so->so_rcv);
 				goto release;
 			}
 		} else
 			uio->uio_resid -= len;
 		SOCKBUF_LOCK_ASSERT(&so->so_rcv);
 		if (len == m->m_len - moff) {
 			if (m->m_flags & M_EOR)
 				flags |= MSG_EOR;
 			if (flags & MSG_PEEK) {
 				m = m->m_next;
 				moff = 0;
 			} else {
 				nextrecord = m->m_nextpkt;
 				sbfree(&so->so_rcv, m);
 				if (mp != NULL) {
 					m->m_nextpkt = NULL;
 					*mp = m;
 					mp = &m->m_next;
 					so->so_rcv.sb_mb = m = m->m_next;
 					*mp = NULL;
 				} else {
 					so->so_rcv.sb_mb = m_free(m);
 					m = so->so_rcv.sb_mb;
 				}
 				sockbuf_pushsync(&so->so_rcv, nextrecord);
 				SBLASTRECORDCHK(&so->so_rcv);
 				SBLASTMBUFCHK(&so->so_rcv);
 			}
 		} else {
 			if (flags & MSG_PEEK)
 				moff += len;
 			else {
 				if (mp != NULL) {
 					if (flags & MSG_DONTWAIT) {
 						*mp = m_copym(m, 0, len,
 						    M_NOWAIT);
 						if (*mp == NULL) {
 							/*
 							 * m_copym() couldn't
 							 * allocate an mbuf.
 							 * Adjust uio_resid back
 							 * (it was adjusted
 							 * down by len bytes,
 							 * which we didn't end
 							 * up "copying" over).
 							 */
 							uio->uio_resid += len;
 							break;
 						}
 					} else {
 						SOCKBUF_UNLOCK(&so->so_rcv);
 						*mp = m_copym(m, 0, len,
 						    M_WAITOK);
 						SOCKBUF_LOCK(&so->so_rcv);
 					}
 				}
 				sbcut_locked(&so->so_rcv, len);
 			}
 		}
 		SOCKBUF_LOCK_ASSERT(&so->so_rcv);
 		if (so->so_oobmark) {
 			if ((flags & MSG_PEEK) == 0) {
 				so->so_oobmark -= len;
 				if (so->so_oobmark == 0) {
 					so->so_rcv.sb_state |= SBS_RCVATMARK;
 					break;
 				}
 			} else {
 				offset += len;
 				if (offset == so->so_oobmark)
 					break;
 			}
 		}
 		if (flags & MSG_EOR)
 			break;
 		/*
 		 * If the MSG_WAITALL flag is set (for non-atomic socket), we
 		 * must not quit until "uio->uio_resid == 0" or an error
 		 * termination.  If a signal/timeout occurs, return with a
 		 * short count but without error.  Keep sockbuf locked
 		 * against other readers.
 		 */
 		while (flags & MSG_WAITALL && m == NULL && uio->uio_resid > 0 &&
 		    !sosendallatonce(so) && nextrecord == NULL) {
 			SOCKBUF_LOCK_ASSERT(&so->so_rcv);
 			if (so->so_error ||
 			    so->so_rcv.sb_state & SBS_CANTRCVMORE)
 				break;
 			/*
 			 * Notify the protocol that some data has been
 			 * drained before blocking.
 			 */
 			if (pr->pr_flags & PR_WANTRCVD) {
 				SOCKBUF_UNLOCK(&so->so_rcv);
 				VNET_SO_ASSERT(so);
 				(*pr->pr_usrreqs->pru_rcvd)(so, flags);
 				SOCKBUF_LOCK(&so->so_rcv);
 			}
 			SBLASTRECORDCHK(&so->so_rcv);
 			SBLASTMBUFCHK(&so->so_rcv);
 			/*
 			 * We could receive some data while was notifying
 			 * the protocol. Skip blocking in this case.
 			 */
 			if (so->so_rcv.sb_mb == NULL) {
 				error = sbwait(&so->so_rcv);
 				if (error) {
 					SOCKBUF_UNLOCK(&so->so_rcv);
 					goto release;
 				}
 			}
 			m = so->so_rcv.sb_mb;
 			if (m != NULL)
 				nextrecord = m->m_nextpkt;
 		}
 	}
 
 	SOCKBUF_LOCK_ASSERT(&so->so_rcv);
 	if (m != NULL && pr->pr_flags & PR_ATOMIC) {
 		flags |= MSG_TRUNC;
 		if ((flags & MSG_PEEK) == 0)
 			(void) sbdroprecord_locked(&so->so_rcv);
 	}
 	if ((flags & MSG_PEEK) == 0) {
 		if (m == NULL) {
 			/*
 			 * First part is an inline SB_EMPTY_FIXUP().  Second
 			 * part makes sure sb_lastrecord is up-to-date if
 			 * there is still data in the socket buffer.
 			 */
 			so->so_rcv.sb_mb = nextrecord;
 			if (so->so_rcv.sb_mb == NULL) {
 				so->so_rcv.sb_mbtail = NULL;
 				so->so_rcv.sb_lastrecord = NULL;
 			} else if (nextrecord->m_nextpkt == NULL)
 				so->so_rcv.sb_lastrecord = nextrecord;
 		}
 		SBLASTRECORDCHK(&so->so_rcv);
 		SBLASTMBUFCHK(&so->so_rcv);
 		/*
 		 * If soreceive() is being done from the socket callback,
 		 * then don't need to generate ACK to peer to update window,
 		 * since ACK will be generated on return to TCP.
 		 */
 		if (!(flags & MSG_SOCALLBCK) &&
 		    (pr->pr_flags & PR_WANTRCVD)) {
 			SOCKBUF_UNLOCK(&so->so_rcv);
 			VNET_SO_ASSERT(so);
 			(*pr->pr_usrreqs->pru_rcvd)(so, flags);
 			SOCKBUF_LOCK(&so->so_rcv);
 		}
 	}
 	SOCKBUF_LOCK_ASSERT(&so->so_rcv);
 	if (orig_resid == uio->uio_resid && orig_resid &&
 	    (flags & MSG_EOR) == 0 && (so->so_rcv.sb_state & SBS_CANTRCVMORE) == 0) {
 		SOCKBUF_UNLOCK(&so->so_rcv);
 		goto restart;
 	}
 	SOCKBUF_UNLOCK(&so->so_rcv);
 
 	if (flagsp != NULL)
 		*flagsp |= flags;
 release:
 	sbunlock(&so->so_rcv);
 	return (error);
 }
 
 /*
  * Optimized version of soreceive() for stream (TCP) sockets.
  * XXXAO: (MSG_WAITALL | MSG_PEEK) isn't properly handled.
  */
 int
 soreceive_stream(struct socket *so, struct sockaddr **psa, struct uio *uio,
     struct mbuf **mp0, struct mbuf **controlp, int *flagsp)
 {
 	int len = 0, error = 0, flags, oresid;
 	struct sockbuf *sb;
 	struct mbuf *m, *n = NULL;
 
 	/* We only do stream sockets. */
 	if (so->so_type != SOCK_STREAM)
 		return (EINVAL);
 	if (psa != NULL)
 		*psa = NULL;
 	if (controlp != NULL)
 		return (EINVAL);
 	if (flagsp != NULL)
 		flags = *flagsp &~ MSG_EOR;
 	else
 		flags = 0;
 	if (flags & MSG_OOB)
 		return (soreceive_rcvoob(so, uio, flags));
 	if (mp0 != NULL)
 		*mp0 = NULL;
 
 	sb = &so->so_rcv;
 
 	/* Prevent other readers from entering the socket. */
 	error = sblock(sb, SBLOCKWAIT(flags));
 	if (error)
 		goto out;
 	SOCKBUF_LOCK(sb);
 
 	/* Easy one, no space to copyout anything. */
 	if (uio->uio_resid == 0) {
 		error = EINVAL;
 		goto out;
 	}
 	oresid = uio->uio_resid;
 
 	/* We will never ever get anything unless we are or were connected. */
 	if (!(so->so_state & (SS_ISCONNECTED|SS_ISDISCONNECTED))) {
 		error = ENOTCONN;
 		goto out;
 	}
 
 restart:
 	SOCKBUF_LOCK_ASSERT(&so->so_rcv);
 
 	/* Abort if socket has reported problems. */
 	if (so->so_error) {
 		if (sbavail(sb) > 0)
 			goto deliver;
 		if (oresid > uio->uio_resid)
 			goto out;
 		error = so->so_error;
 		if (!(flags & MSG_PEEK))
 			so->so_error = 0;
 		goto out;
 	}
 
 	/* Door is closed.  Deliver what is left, if any. */
 	if (sb->sb_state & SBS_CANTRCVMORE) {
 		if (sbavail(sb) > 0)
 			goto deliver;
 		else
 			goto out;
 	}
 
 	/* Socket buffer is empty and we shall not block. */
 	if (sbavail(sb) == 0 &&
 	    ((so->so_state & SS_NBIO) || (flags & (MSG_DONTWAIT|MSG_NBIO)))) {
 		error = EAGAIN;
 		goto out;
 	}
 
 	/* Socket buffer got some data that we shall deliver now. */
 	if (sbavail(sb) > 0 && !(flags & MSG_WAITALL) &&
 	    ((so->so_state & SS_NBIO) ||
 	     (flags & (MSG_DONTWAIT|MSG_NBIO)) ||
 	     sbavail(sb) >= sb->sb_lowat ||
 	     sbavail(sb) >= uio->uio_resid ||
 	     sbavail(sb) >= sb->sb_hiwat) ) {
 		goto deliver;
 	}
 
 	/* On MSG_WAITALL we must wait until all data or error arrives. */
 	if ((flags & MSG_WAITALL) &&
 	    (sbavail(sb) >= uio->uio_resid || sbavail(sb) >= sb->sb_hiwat))
 		goto deliver;
 
 	/*
 	 * Wait and block until (more) data comes in.
 	 * NB: Drops the sockbuf lock during wait.
 	 */
 	error = sbwait(sb);
 	if (error)
 		goto out;
 	goto restart;
 
 deliver:
 	SOCKBUF_LOCK_ASSERT(&so->so_rcv);
 	KASSERT(sbavail(sb) > 0, ("%s: sockbuf empty", __func__));
 	KASSERT(sb->sb_mb != NULL, ("%s: sb_mb == NULL", __func__));
 
 	/* Statistics. */
 	if (uio->uio_td)
 		uio->uio_td->td_ru.ru_msgrcv++;
 
 	/* Fill uio until full or current end of socket buffer is reached. */
 	len = min(uio->uio_resid, sbavail(sb));
 	if (mp0 != NULL) {
 		/* Dequeue as many mbufs as possible. */
 		if (!(flags & MSG_PEEK) && len >= sb->sb_mb->m_len) {
 			if (*mp0 == NULL)
 				*mp0 = sb->sb_mb;
 			else
 				m_cat(*mp0, sb->sb_mb);
 			for (m = sb->sb_mb;
 			     m != NULL && m->m_len <= len;
 			     m = m->m_next) {
 				KASSERT(!(m->m_flags & M_NOTAVAIL),
 				    ("%s: m %p not available", __func__, m));
 				len -= m->m_len;
 				uio->uio_resid -= m->m_len;
 				sbfree(sb, m);
 				n = m;
 			}
 			n->m_next = NULL;
 			sb->sb_mb = m;
 			sb->sb_lastrecord = sb->sb_mb;
 			if (sb->sb_mb == NULL)
 				SB_EMPTY_FIXUP(sb);
 		}
 		/* Copy the remainder. */
 		if (len > 0) {
 			KASSERT(sb->sb_mb != NULL,
 			    ("%s: len > 0 && sb->sb_mb empty", __func__));
 
 			m = m_copym(sb->sb_mb, 0, len, M_NOWAIT);
 			if (m == NULL)
 				len = 0;	/* Don't flush data from sockbuf. */
 			else
 				uio->uio_resid -= len;
 			if (*mp0 != NULL)
 				m_cat(*mp0, m);
 			else
 				*mp0 = m;
 			if (*mp0 == NULL) {
 				error = ENOBUFS;
 				goto out;
 			}
 		}
 	} else {
 		/* NB: Must unlock socket buffer as uiomove may sleep. */
 		SOCKBUF_UNLOCK(sb);
 		error = m_mbuftouio(uio, sb->sb_mb, len);
 		SOCKBUF_LOCK(sb);
 		if (error)
 			goto out;
 	}
 	SBLASTRECORDCHK(sb);
 	SBLASTMBUFCHK(sb);
 
 	/*
 	 * Remove the delivered data from the socket buffer unless we
 	 * were only peeking.
 	 */
 	if (!(flags & MSG_PEEK)) {
 		if (len > 0)
 			sbdrop_locked(sb, len);
 
 		/* Notify protocol that we drained some data. */
 		if ((so->so_proto->pr_flags & PR_WANTRCVD) &&
 		    (((flags & MSG_WAITALL) && uio->uio_resid > 0) ||
 		     !(flags & MSG_SOCALLBCK))) {
 			SOCKBUF_UNLOCK(sb);
 			VNET_SO_ASSERT(so);
 			(*so->so_proto->pr_usrreqs->pru_rcvd)(so, flags);
 			SOCKBUF_LOCK(sb);
 		}
 	}
 
 	/*
 	 * For MSG_WAITALL we may have to loop again and wait for
 	 * more data to come in.
 	 */
 	if ((flags & MSG_WAITALL) && uio->uio_resid > 0)
 		goto restart;
 out:
 	SOCKBUF_LOCK_ASSERT(sb);
 	SBLASTRECORDCHK(sb);
 	SBLASTMBUFCHK(sb);
 	SOCKBUF_UNLOCK(sb);
 	sbunlock(sb);
 	return (error);
 }
 
 /*
  * Optimized version of soreceive() for simple datagram cases from userspace.
  * Unlike in the stream case, we're able to drop a datagram if copyout()
  * fails, and because we handle datagrams atomically, we don't need to use a
  * sleep lock to prevent I/O interlacing.
  */
 int
 soreceive_dgram(struct socket *so, struct sockaddr **psa, struct uio *uio,
     struct mbuf **mp0, struct mbuf **controlp, int *flagsp)
 {
 	struct mbuf *m, *m2;
 	int flags, error;
 	ssize_t len;
 	struct protosw *pr = so->so_proto;
 	struct mbuf *nextrecord;
 
 	if (psa != NULL)
 		*psa = NULL;
 	if (controlp != NULL)
 		*controlp = NULL;
 	if (flagsp != NULL)
 		flags = *flagsp &~ MSG_EOR;
 	else
 		flags = 0;
 
 	/*
 	 * For any complicated cases, fall back to the full
 	 * soreceive_generic().
 	 */
 	if (mp0 != NULL || (flags & MSG_PEEK) || (flags & MSG_OOB))
 		return (soreceive_generic(so, psa, uio, mp0, controlp,
 		    flagsp));
 
 	/*
 	 * Enforce restrictions on use.
 	 */
 	KASSERT((pr->pr_flags & PR_WANTRCVD) == 0,
 	    ("soreceive_dgram: wantrcvd"));
 	KASSERT(pr->pr_flags & PR_ATOMIC, ("soreceive_dgram: !atomic"));
 	KASSERT((so->so_rcv.sb_state & SBS_RCVATMARK) == 0,
 	    ("soreceive_dgram: SBS_RCVATMARK"));
 	KASSERT((so->so_proto->pr_flags & PR_CONNREQUIRED) == 0,
 	    ("soreceive_dgram: P_CONNREQUIRED"));
 
 	/*
 	 * Loop blocking while waiting for a datagram.
 	 */
 	SOCKBUF_LOCK(&so->so_rcv);
 	while ((m = so->so_rcv.sb_mb) == NULL) {
 		KASSERT(sbavail(&so->so_rcv) == 0,
 		    ("soreceive_dgram: sb_mb NULL but sbavail %u",
 		    sbavail(&so->so_rcv)));
 		if (so->so_error) {
 			error = so->so_error;
 			so->so_error = 0;
 			SOCKBUF_UNLOCK(&so->so_rcv);
 			return (error);
 		}
 		if (so->so_rcv.sb_state & SBS_CANTRCVMORE ||
 		    uio->uio_resid == 0) {
 			SOCKBUF_UNLOCK(&so->so_rcv);
 			return (0);
 		}
 		if ((so->so_state & SS_NBIO) ||
 		    (flags & (MSG_DONTWAIT|MSG_NBIO))) {
 			SOCKBUF_UNLOCK(&so->so_rcv);
 			return (EWOULDBLOCK);
 		}
 		SBLASTRECORDCHK(&so->so_rcv);
 		SBLASTMBUFCHK(&so->so_rcv);
 		error = sbwait(&so->so_rcv);
 		if (error) {
 			SOCKBUF_UNLOCK(&so->so_rcv);
 			return (error);
 		}
 	}
 	SOCKBUF_LOCK_ASSERT(&so->so_rcv);
 
 	if (uio->uio_td)
 		uio->uio_td->td_ru.ru_msgrcv++;
 	SBLASTRECORDCHK(&so->so_rcv);
 	SBLASTMBUFCHK(&so->so_rcv);
 	nextrecord = m->m_nextpkt;
 	if (nextrecord == NULL) {
 		KASSERT(so->so_rcv.sb_lastrecord == m,
 		    ("soreceive_dgram: lastrecord != m"));
 	}
 
 	KASSERT(so->so_rcv.sb_mb->m_nextpkt == nextrecord,
 	    ("soreceive_dgram: m_nextpkt != nextrecord"));
 
 	/*
 	 * Pull 'm' and its chain off the front of the packet queue.
 	 */
 	so->so_rcv.sb_mb = NULL;
 	sockbuf_pushsync(&so->so_rcv, nextrecord);
 
 	/*
 	 * Walk 'm's chain and free that many bytes from the socket buffer.
 	 */
 	for (m2 = m; m2 != NULL; m2 = m2->m_next)
 		sbfree(&so->so_rcv, m2);
 
 	/*
 	 * Do a few last checks before we let go of the lock.
 	 */
 	SBLASTRECORDCHK(&so->so_rcv);
 	SBLASTMBUFCHK(&so->so_rcv);
 	SOCKBUF_UNLOCK(&so->so_rcv);
 
 	if (pr->pr_flags & PR_ADDR) {
 		KASSERT(m->m_type == MT_SONAME,
 		    ("m->m_type == %d", m->m_type));
 		if (psa != NULL)
 			*psa = sodupsockaddr(mtod(m, struct sockaddr *),
 			    M_NOWAIT);
 		m = m_free(m);
 	}
 	if (m == NULL) {
 		/* XXXRW: Can this happen? */
 		return (0);
 	}
 
 	/*
 	 * Packet to copyout() is now in 'm' and it is disconnected from the
 	 * queue.
 	 *
 	 * Process one or more MT_CONTROL mbufs present before any data mbufs
 	 * in the first mbuf chain on the socket buffer.  We call into the
 	 * protocol to perform externalization (or freeing if controlp ==
 	 * NULL). In some cases there can be only MT_CONTROL mbufs without
 	 * MT_DATA mbufs.
 	 */
 	if (m->m_type == MT_CONTROL) {
 		struct mbuf *cm = NULL, *cmn;
 		struct mbuf **cme = &cm;
 
 		do {
 			m2 = m->m_next;
 			m->m_next = NULL;
 			*cme = m;
 			cme = &(*cme)->m_next;
 			m = m2;
 		} while (m != NULL && m->m_type == MT_CONTROL);
 		while (cm != NULL) {
 			cmn = cm->m_next;
 			cm->m_next = NULL;
 			if (pr->pr_domain->dom_externalize != NULL) {
 				error = (*pr->pr_domain->dom_externalize)
 				    (cm, controlp, flags);
 			} else if (controlp != NULL)
 				*controlp = cm;
 			else
 				m_freem(cm);
 			if (controlp != NULL) {
 				while (*controlp != NULL)
 					controlp = &(*controlp)->m_next;
 			}
 			cm = cmn;
 		}
 	}
 	KASSERT(m == NULL || m->m_type == MT_DATA,
 	    ("soreceive_dgram: !data"));
 	while (m != NULL && uio->uio_resid > 0) {
 		len = uio->uio_resid;
 		if (len > m->m_len)
 			len = m->m_len;
 		error = uiomove(mtod(m, char *), (int)len, uio);
 		if (error) {
 			m_freem(m);
 			return (error);
 		}
 		if (len == m->m_len)
 			m = m_free(m);
 		else {
 			m->m_data += len;
 			m->m_len -= len;
 		}
 	}
 	if (m != NULL) {
 		flags |= MSG_TRUNC;
 		m_freem(m);
 	}
 	if (flagsp != NULL)
 		*flagsp |= flags;
 	return (0);
 }
 
 int
 soreceive(struct socket *so, struct sockaddr **psa, struct uio *uio,
     struct mbuf **mp0, struct mbuf **controlp, int *flagsp)
 {
 	int error;
 
 	CURVNET_SET(so->so_vnet);
 	error = (so->so_proto->pr_usrreqs->pru_soreceive(so, psa, uio, mp0,
 	    controlp, flagsp));
 	CURVNET_RESTORE();
 	return (error);
 }
 
 int
 soshutdown(struct socket *so, int how)
 {
 	struct protosw *pr = so->so_proto;
 	int error;
 
 	if (!(how == SHUT_RD || how == SHUT_WR || how == SHUT_RDWR))
 		return (EINVAL);
 	if ((so->so_state &
 	    (SS_ISCONNECTED | SS_ISCONNECTING | SS_ISDISCONNECTING)) == 0)
 		return (ENOTCONN);
 
 	CURVNET_SET(so->so_vnet);
 	if (pr->pr_usrreqs->pru_flush != NULL)
 		(*pr->pr_usrreqs->pru_flush)(so, how);
 	if (how != SHUT_WR)
 		sorflush(so);
 	if (how != SHUT_RD) {
 		error = (*pr->pr_usrreqs->pru_shutdown)(so);
 		wakeup(&so->so_timeo);
 		CURVNET_RESTORE();
 		return (error);
 	}
 	wakeup(&so->so_timeo);
 	CURVNET_RESTORE();
 	return (0);
 }
 
 void
 sorflush(struct socket *so)
 {
 	struct sockbuf *sb = &so->so_rcv;
 	struct protosw *pr = so->so_proto;
 	struct socket aso;
 
 	VNET_SO_ASSERT(so);
 
 	/*
 	 * In order to avoid calling dom_dispose with the socket buffer mutex
 	 * held, and in order to generally avoid holding the lock for a long
 	 * time, we make a copy of the socket buffer and clear the original
 	 * (except locks, state).  The new socket buffer copy won't have
 	 * initialized locks so we can only call routines that won't use or
 	 * assert those locks.
 	 *
 	 * Dislodge threads currently blocked in receive and wait to acquire
 	 * a lock against other simultaneous readers before clearing the
 	 * socket buffer.  Don't let our acquire be interrupted by a signal
 	 * despite any existing socket disposition on interruptable waiting.
 	 */
 	socantrcvmore(so);
 	(void) sblock(sb, SBL_WAIT | SBL_NOINTR);
 
 	/*
 	 * Invalidate/clear most of the sockbuf structure, but leave selinfo
 	 * and mutex data unchanged.
 	 */
 	SOCKBUF_LOCK(sb);
 	bzero(&aso, sizeof(aso));
 	aso.so_pcb = so->so_pcb;
 	bcopy(&sb->sb_startzero, &aso.so_rcv.sb_startzero,
 	    sizeof(*sb) - offsetof(struct sockbuf, sb_startzero));
 	bzero(&sb->sb_startzero,
 	    sizeof(*sb) - offsetof(struct sockbuf, sb_startzero));
 	SOCKBUF_UNLOCK(sb);
 	sbunlock(sb);
 
 	/*
 	 * Dispose of special rights and flush the copied socket.  Don't call
 	 * any unsafe routines (that rely on locks being initialized) on aso.
 	 */
 	if (pr->pr_flags & PR_RIGHTS && pr->pr_domain->dom_dispose != NULL)
 		(*pr->pr_domain->dom_dispose)(&aso);
 	sbrelease_internal(&aso.so_rcv, so);
 }
 
 /*
  * Wrapper for Socket established helper hook.
  * Parameters: socket, context of the hook point, hook id.
  */
 static int inline
 hhook_run_socket(struct socket *so, void *hctx, int32_t h_id)
 {
 	struct socket_hhook_data hhook_data = {
 		.so = so,
 		.hctx = hctx,
 		.m = NULL,
 		.status = 0
 	};
 
 	CURVNET_SET(so->so_vnet);
 	HHOOKS_RUN_IF(V_socket_hhh[h_id], &hhook_data, &so->osd);
 	CURVNET_RESTORE();
 
 	/* Ugly but needed, since hhooks return void for now */
 	return (hhook_data.status);
 }
 
 /*
  * Perhaps this routine, and sooptcopyout(), below, ought to come in an
  * additional variant to handle the case where the option value needs to be
  * some kind of integer, but not a specific size.  In addition to their use
  * here, these functions are also called by the protocol-level pr_ctloutput()
  * routines.
  */
 int
 sooptcopyin(struct sockopt *sopt, void *buf, size_t len, size_t minlen)
 {
 	size_t	valsize;
 
 	/*
 	 * If the user gives us more than we wanted, we ignore it, but if we
 	 * don't get the minimum length the caller wants, we return EINVAL.
 	 * On success, sopt->sopt_valsize is set to however much we actually
 	 * retrieved.
 	 */
 	if ((valsize = sopt->sopt_valsize) < minlen)
 		return EINVAL;
 	if (valsize > len)
 		sopt->sopt_valsize = valsize = len;
 
 	if (sopt->sopt_td != NULL)
 		return (copyin(sopt->sopt_val, buf, valsize));
 
 	bcopy(sopt->sopt_val, buf, valsize);
 	return (0);
 }
 
 /*
  * Kernel version of setsockopt(2).
  *
  * XXX: optlen is size_t, not socklen_t
  */
 int
 so_setsockopt(struct socket *so, int level, int optname, void *optval,
     size_t optlen)
 {
 	struct sockopt sopt;
 
 	sopt.sopt_level = level;
 	sopt.sopt_name = optname;
 	sopt.sopt_dir = SOPT_SET;
 	sopt.sopt_val = optval;
 	sopt.sopt_valsize = optlen;
 	sopt.sopt_td = NULL;
 	return (sosetopt(so, &sopt));
 }
 
 int
 sosetopt(struct socket *so, struct sockopt *sopt)
 {
 	int	error, optval;
 	struct	linger l;
 	struct	timeval tv;
 	sbintime_t val;
 	uint32_t val32;
 #ifdef MAC
 	struct mac extmac;
 #endif
 
 	CURVNET_SET(so->so_vnet);
 	error = 0;
 	if (sopt->sopt_level != SOL_SOCKET) {
 		if (so->so_proto->pr_ctloutput != NULL) {
 			error = (*so->so_proto->pr_ctloutput)(so, sopt);
 			CURVNET_RESTORE();
 			return (error);
 		}
 		error = ENOPROTOOPT;
 	} else {
 		switch (sopt->sopt_name) {
 		case SO_ACCEPTFILTER:
 			error = do_setopt_accept_filter(so, sopt);
 			if (error)
 				goto bad;
 			break;
 
 		case SO_LINGER:
 			error = sooptcopyin(sopt, &l, sizeof l, sizeof l);
 			if (error)
 				goto bad;
 
 			SOCK_LOCK(so);
 			so->so_linger = l.l_linger;
 			if (l.l_onoff)
 				so->so_options |= SO_LINGER;
 			else
 				so->so_options &= ~SO_LINGER;
 			SOCK_UNLOCK(so);
 			break;
 
 		case SO_DEBUG:
 		case SO_KEEPALIVE:
 		case SO_DONTROUTE:
 		case SO_USELOOPBACK:
 		case SO_BROADCAST:
 		case SO_REUSEADDR:
 		case SO_REUSEPORT:
 		case SO_OOBINLINE:
 		case SO_TIMESTAMP:
 		case SO_BINTIME:
 		case SO_NOSIGPIPE:
 		case SO_NO_DDP:
 		case SO_NO_OFFLOAD:
 			error = sooptcopyin(sopt, &optval, sizeof optval,
 			    sizeof optval);
 			if (error)
 				goto bad;
 			SOCK_LOCK(so);
 			if (optval)
 				so->so_options |= sopt->sopt_name;
 			else
 				so->so_options &= ~sopt->sopt_name;
 			SOCK_UNLOCK(so);
 			break;
 
 		case SO_SETFIB:
 			error = sooptcopyin(sopt, &optval, sizeof optval,
 			    sizeof optval);
 			if (error)
 				goto bad;
 
 			if (optval < 0 || optval >= rt_numfibs) {
 				error = EINVAL;
 				goto bad;
 			}
 			if (((so->so_proto->pr_domain->dom_family == PF_INET) ||
 			   (so->so_proto->pr_domain->dom_family == PF_INET6) ||
 			   (so->so_proto->pr_domain->dom_family == PF_ROUTE)))
 				so->so_fibnum = optval;
 			else
 				so->so_fibnum = 0;
 			break;
 
 		case SO_USER_COOKIE:
 			error = sooptcopyin(sopt, &val32, sizeof val32,
 			    sizeof val32);
 			if (error)
 				goto bad;
 			so->so_user_cookie = val32;
 			break;
 
 		case SO_SNDBUF:
 		case SO_RCVBUF:
 		case SO_SNDLOWAT:
 		case SO_RCVLOWAT:
 			error = sooptcopyin(sopt, &optval, sizeof optval,
 			    sizeof optval);
 			if (error)
 				goto bad;
 
 			/*
 			 * Values < 1 make no sense for any of these options,
 			 * so disallow them.
 			 */
 			if (optval < 1) {
 				error = EINVAL;
 				goto bad;
 			}
 
 			switch (sopt->sopt_name) {
 			case SO_SNDBUF:
 			case SO_RCVBUF:
 				if (sbreserve(sopt->sopt_name == SO_SNDBUF ?
 				    &so->so_snd : &so->so_rcv, (u_long)optval,
 				    so, curthread) == 0) {
 					error = ENOBUFS;
 					goto bad;
 				}
 				(sopt->sopt_name == SO_SNDBUF ? &so->so_snd :
 				    &so->so_rcv)->sb_flags &= ~SB_AUTOSIZE;
 				break;
 
 			/*
 			 * Make sure the low-water is never greater than the
 			 * high-water.
 			 */
 			case SO_SNDLOWAT:
 				SOCKBUF_LOCK(&so->so_snd);
 				so->so_snd.sb_lowat =
 				    (optval > so->so_snd.sb_hiwat) ?
 				    so->so_snd.sb_hiwat : optval;
 				SOCKBUF_UNLOCK(&so->so_snd);
 				break;
 			case SO_RCVLOWAT:
 				SOCKBUF_LOCK(&so->so_rcv);
 				so->so_rcv.sb_lowat =
 				    (optval > so->so_rcv.sb_hiwat) ?
 				    so->so_rcv.sb_hiwat : optval;
 				SOCKBUF_UNLOCK(&so->so_rcv);
 				break;
 			}
 			break;
 
 		case SO_SNDTIMEO:
 		case SO_RCVTIMEO:
 #ifdef COMPAT_FREEBSD32
 			if (SV_CURPROC_FLAG(SV_ILP32)) {
 				struct timeval32 tv32;
 
 				error = sooptcopyin(sopt, &tv32, sizeof tv32,
 				    sizeof tv32);
 				CP(tv32, tv, tv_sec);
 				CP(tv32, tv, tv_usec);
 			} else
 #endif
 				error = sooptcopyin(sopt, &tv, sizeof tv,
 				    sizeof tv);
 			if (error)
 				goto bad;
 			if (tv.tv_sec < 0 || tv.tv_usec < 0 ||
 			    tv.tv_usec >= 1000000) {
 				error = EDOM;
 				goto bad;
 			}
 			if (tv.tv_sec > INT32_MAX)
 				val = SBT_MAX;
 			else
 				val = tvtosbt(tv);
 			switch (sopt->sopt_name) {
 			case SO_SNDTIMEO:
 				so->so_snd.sb_timeo = val;
 				break;
 			case SO_RCVTIMEO:
 				so->so_rcv.sb_timeo = val;
 				break;
 			}
 			break;
 
 		case SO_LABEL:
 #ifdef MAC
 			error = sooptcopyin(sopt, &extmac, sizeof extmac,
 			    sizeof extmac);
 			if (error)
 				goto bad;
 			error = mac_setsockopt_label(sopt->sopt_td->td_ucred,
 			    so, &extmac);
 #else
 			error = EOPNOTSUPP;
 #endif
 			break;
 
 		default:
 			if (V_socket_hhh[HHOOK_SOCKET_OPT]->hhh_nhooks > 0)
 				error = hhook_run_socket(so, sopt,
 				    HHOOK_SOCKET_OPT);
 			else
 				error = ENOPROTOOPT;
 			break;
 		}
 		if (error == 0 && so->so_proto->pr_ctloutput != NULL)
 			(void)(*so->so_proto->pr_ctloutput)(so, sopt);
 	}
 bad:
 	CURVNET_RESTORE();
 	return (error);
 }
 
 /*
  * Helper routine for getsockopt.
  */
 int
 sooptcopyout(struct sockopt *sopt, const void *buf, size_t len)
 {
 	int	error;
 	size_t	valsize;
 
 	error = 0;
 
 	/*
 	 * Documented get behavior is that we always return a value, possibly
 	 * truncated to fit in the user's buffer.  Traditional behavior is
 	 * that we always tell the user precisely how much we copied, rather
 	 * than something useful like the total amount we had available for
 	 * her.  Note that this interface is not idempotent; the entire
 	 * answer must be generated ahead of time.
 	 */
 	valsize = min(len, sopt->sopt_valsize);
 	sopt->sopt_valsize = valsize;
 	if (sopt->sopt_val != NULL) {
 		if (sopt->sopt_td != NULL)
 			error = copyout(buf, sopt->sopt_val, valsize);
 		else
 			bcopy(buf, sopt->sopt_val, valsize);
 	}
 	return (error);
 }
 
 int
 sogetopt(struct socket *so, struct sockopt *sopt)
 {
 	int	error, optval;
 	struct	linger l;
 	struct	timeval tv;
 #ifdef MAC
 	struct mac extmac;
 #endif
 
 	CURVNET_SET(so->so_vnet);
 	error = 0;
 	if (sopt->sopt_level != SOL_SOCKET) {
 		if (so->so_proto->pr_ctloutput != NULL)
 			error = (*so->so_proto->pr_ctloutput)(so, sopt);
 		else
 			error = ENOPROTOOPT;
 		CURVNET_RESTORE();
 		return (error);
 	} else {
 		switch (sopt->sopt_name) {
 		case SO_ACCEPTFILTER:
 			error = do_getopt_accept_filter(so, sopt);
 			break;
 
 		case SO_LINGER:
 			SOCK_LOCK(so);
 			l.l_onoff = so->so_options & SO_LINGER;
 			l.l_linger = so->so_linger;
 			SOCK_UNLOCK(so);
 			error = sooptcopyout(sopt, &l, sizeof l);
 			break;
 
 		case SO_USELOOPBACK:
 		case SO_DONTROUTE:
 		case SO_DEBUG:
 		case SO_KEEPALIVE:
 		case SO_REUSEADDR:
 		case SO_REUSEPORT:
 		case SO_BROADCAST:
 		case SO_OOBINLINE:
 		case SO_ACCEPTCONN:
 		case SO_TIMESTAMP:
 		case SO_BINTIME:
 		case SO_NOSIGPIPE:
 			optval = so->so_options & sopt->sopt_name;
 integer:
 			error = sooptcopyout(sopt, &optval, sizeof optval);
 			break;
 
 		case SO_TYPE:
 			optval = so->so_type;
 			goto integer;
 
 		case SO_PROTOCOL:
 			optval = so->so_proto->pr_protocol;
 			goto integer;
 
 		case SO_ERROR:
 			SOCK_LOCK(so);
 			optval = so->so_error;
 			so->so_error = 0;
 			SOCK_UNLOCK(so);
 			goto integer;
 
 		case SO_SNDBUF:
 			optval = so->so_snd.sb_hiwat;
 			goto integer;
 
 		case SO_RCVBUF:
 			optval = so->so_rcv.sb_hiwat;
 			goto integer;
 
 		case SO_SNDLOWAT:
 			optval = so->so_snd.sb_lowat;
 			goto integer;
 
 		case SO_RCVLOWAT:
 			optval = so->so_rcv.sb_lowat;
 			goto integer;
 
 		case SO_SNDTIMEO:
 		case SO_RCVTIMEO:
 			tv = sbttotv(sopt->sopt_name == SO_SNDTIMEO ?
 			    so->so_snd.sb_timeo : so->so_rcv.sb_timeo);
 #ifdef COMPAT_FREEBSD32
 			if (SV_CURPROC_FLAG(SV_ILP32)) {
 				struct timeval32 tv32;
 
 				CP(tv, tv32, tv_sec);
 				CP(tv, tv32, tv_usec);
 				error = sooptcopyout(sopt, &tv32, sizeof tv32);
 			} else
 #endif
 				error = sooptcopyout(sopt, &tv, sizeof tv);
 			break;
 
 		case SO_LABEL:
 #ifdef MAC
 			error = sooptcopyin(sopt, &extmac, sizeof(extmac),
 			    sizeof(extmac));
 			if (error)
 				goto bad;
 			error = mac_getsockopt_label(sopt->sopt_td->td_ucred,
 			    so, &extmac);
 			if (error)
 				goto bad;
 			error = sooptcopyout(sopt, &extmac, sizeof extmac);
 #else
 			error = EOPNOTSUPP;
 #endif
 			break;
 
 		case SO_PEERLABEL:
 #ifdef MAC
 			error = sooptcopyin(sopt, &extmac, sizeof(extmac),
 			    sizeof(extmac));
 			if (error)
 				goto bad;
 			error = mac_getsockopt_peerlabel(
 			    sopt->sopt_td->td_ucred, so, &extmac);
 			if (error)
 				goto bad;
 			error = sooptcopyout(sopt, &extmac, sizeof extmac);
 #else
 			error = EOPNOTSUPP;
 #endif
 			break;
 
 		case SO_LISTENQLIMIT:
 			optval = so->so_qlimit;
 			goto integer;
 
 		case SO_LISTENQLEN:
 			optval = so->so_qlen;
 			goto integer;
 
 		case SO_LISTENINCQLEN:
 			optval = so->so_incqlen;
 			goto integer;
 
 		default:
 			if (V_socket_hhh[HHOOK_SOCKET_OPT]->hhh_nhooks > 0)
 				error = hhook_run_socket(so, sopt,
 				    HHOOK_SOCKET_OPT);
 			else
 				error = ENOPROTOOPT;
 			break;
 		}
 	}
 #ifdef MAC
 bad:
 #endif
 	CURVNET_RESTORE();
 	return (error);
 }
 
 int
 soopt_getm(struct sockopt *sopt, struct mbuf **mp)
 {
 	struct mbuf *m, *m_prev;
 	int sopt_size = sopt->sopt_valsize;
 
 	MGET(m, sopt->sopt_td ? M_WAITOK : M_NOWAIT, MT_DATA);
 	if (m == NULL)
 		return ENOBUFS;
 	if (sopt_size > MLEN) {
 		MCLGET(m, sopt->sopt_td ? M_WAITOK : M_NOWAIT);
 		if ((m->m_flags & M_EXT) == 0) {
 			m_free(m);
 			return ENOBUFS;
 		}
 		m->m_len = min(MCLBYTES, sopt_size);
 	} else {
 		m->m_len = min(MLEN, sopt_size);
 	}
 	sopt_size -= m->m_len;
 	*mp = m;
 	m_prev = m;
 
 	while (sopt_size) {
 		MGET(m, sopt->sopt_td ? M_WAITOK : M_NOWAIT, MT_DATA);
 		if (m == NULL) {
 			m_freem(*mp);
 			return ENOBUFS;
 		}
 		if (sopt_size > MLEN) {
 			MCLGET(m, sopt->sopt_td != NULL ? M_WAITOK :
 			    M_NOWAIT);
 			if ((m->m_flags & M_EXT) == 0) {
 				m_freem(m);
 				m_freem(*mp);
 				return ENOBUFS;
 			}
 			m->m_len = min(MCLBYTES, sopt_size);
 		} else {
 			m->m_len = min(MLEN, sopt_size);
 		}
 		sopt_size -= m->m_len;
 		m_prev->m_next = m;
 		m_prev = m;
 	}
 	return (0);
 }
 
 int
 soopt_mcopyin(struct sockopt *sopt, struct mbuf *m)
 {
 	struct mbuf *m0 = m;
 
 	if (sopt->sopt_val == NULL)
 		return (0);
 	while (m != NULL && sopt->sopt_valsize >= m->m_len) {
 		if (sopt->sopt_td != NULL) {
 			int error;
 
 			error = copyin(sopt->sopt_val, mtod(m, char *),
 			    m->m_len);
 			if (error != 0) {
 				m_freem(m0);
 				return(error);
 			}
 		} else
 			bcopy(sopt->sopt_val, mtod(m, char *), m->m_len);
 		sopt->sopt_valsize -= m->m_len;
 		sopt->sopt_val = (char *)sopt->sopt_val + m->m_len;
 		m = m->m_next;
 	}
 	if (m != NULL) /* should be allocated enoughly at ip6_sooptmcopyin() */
 		panic("ip6_sooptmcopyin");
 	return (0);
 }
 
 int
 soopt_mcopyout(struct sockopt *sopt, struct mbuf *m)
 {
 	struct mbuf *m0 = m;
 	size_t valsize = 0;
 
 	if (sopt->sopt_val == NULL)
 		return (0);
 	while (m != NULL && sopt->sopt_valsize >= m->m_len) {
 		if (sopt->sopt_td != NULL) {
 			int error;
 
 			error = copyout(mtod(m, char *), sopt->sopt_val,
 			    m->m_len);
 			if (error != 0) {
 				m_freem(m0);
 				return(error);
 			}
 		} else
 			bcopy(mtod(m, char *), sopt->sopt_val, m->m_len);
 		sopt->sopt_valsize -= m->m_len;
 		sopt->sopt_val = (char *)sopt->sopt_val + m->m_len;
 		valsize += m->m_len;
 		m = m->m_next;
 	}
 	if (m != NULL) {
 		/* enough soopt buffer should be given from user-land */
 		m_freem(m0);
 		return(EINVAL);
 	}
 	sopt->sopt_valsize = valsize;
 	return (0);
 }
 
 /*
  * sohasoutofband(): protocol notifies socket layer of the arrival of new
  * out-of-band data, which will then notify socket consumers.
  */
 void
 sohasoutofband(struct socket *so)
 {
 
 	if (so->so_sigio != NULL)
 		pgsigio(&so->so_sigio, SIGURG, 0);
 	selwakeuppri(&so->so_rcv.sb_sel, PSOCK);
 }
 
 int
 sopoll(struct socket *so, int events, struct ucred *active_cred,
     struct thread *td)
 {
 
 	/*
 	 * We do not need to set or assert curvnet as long as everyone uses
 	 * sopoll_generic().
 	 */
 	return (so->so_proto->pr_usrreqs->pru_sopoll(so, events, active_cred,
 	    td));
 }
 
 int
 sopoll_generic(struct socket *so, int events, struct ucred *active_cred,
     struct thread *td)
 {
 	int revents = 0;
 
 	SOCKBUF_LOCK(&so->so_snd);
 	SOCKBUF_LOCK(&so->so_rcv);
 	if (events & (POLLIN | POLLRDNORM))
 		if (soreadabledata(so))
 			revents |= events & (POLLIN | POLLRDNORM);
 
 	if (events & (POLLOUT | POLLWRNORM))
 		if (sowriteable(so))
 			revents |= events & (POLLOUT | POLLWRNORM);
 
 	if (events & (POLLPRI | POLLRDBAND))
 		if (so->so_oobmark || (so->so_rcv.sb_state & SBS_RCVATMARK))
 			revents |= events & (POLLPRI | POLLRDBAND);
 
 	if ((events & POLLINIGNEOF) == 0) {
 		if (so->so_rcv.sb_state & SBS_CANTRCVMORE) {
 			revents |= events & (POLLIN | POLLRDNORM);
 			if (so->so_snd.sb_state & SBS_CANTSENDMORE)
 				revents |= POLLHUP;
 		}
 	}
 
 	if (revents == 0) {
 		if (events & (POLLIN | POLLPRI | POLLRDNORM | POLLRDBAND)) {
 			selrecord(td, &so->so_rcv.sb_sel);
 			so->so_rcv.sb_flags |= SB_SEL;
 		}
 
 		if (events & (POLLOUT | POLLWRNORM)) {
 			selrecord(td, &so->so_snd.sb_sel);
 			so->so_snd.sb_flags |= SB_SEL;
 		}
 	}
 
 	SOCKBUF_UNLOCK(&so->so_rcv);
 	SOCKBUF_UNLOCK(&so->so_snd);
 	return (revents);
 }
 
 int
 soo_kqfilter(struct file *fp, struct knote *kn)
 {
 	struct socket *so = kn->kn_fp->f_data;
 	struct sockbuf *sb;
 
 	switch (kn->kn_filter) {
 	case EVFILT_READ:
 		if (so->so_options & SO_ACCEPTCONN)
 			kn->kn_fop = &solisten_filtops;
 		else
 			kn->kn_fop = &soread_filtops;
 		sb = &so->so_rcv;
 		break;
 	case EVFILT_WRITE:
 		kn->kn_fop = &sowrite_filtops;
 		sb = &so->so_snd;
 		break;
 	default:
 		return (EINVAL);
 	}
 
 	SOCKBUF_LOCK(sb);
 	knlist_add(&sb->sb_sel.si_note, kn, 1);
 	sb->sb_flags |= SB_KNOTE;
 	SOCKBUF_UNLOCK(sb);
 	return (0);
 }
 
 /*
  * Some routines that return EOPNOTSUPP for entry points that are not
  * supported by a protocol.  Fill in as needed.
  */
 int
 pru_accept_notsupp(struct socket *so, struct sockaddr **nam)
 {
 
 	return EOPNOTSUPP;
 }
 
 int
 pru_aio_queue_notsupp(struct socket *so, struct kaiocb *job)
 {
 
 	return EOPNOTSUPP;
 }
 
 int
 pru_attach_notsupp(struct socket *so, int proto, struct thread *td)
 {
 
 	return EOPNOTSUPP;
 }
 
 int
 pru_bind_notsupp(struct socket *so, struct sockaddr *nam, struct thread *td)
 {
 
 	return EOPNOTSUPP;
 }
 
 int
 pru_bindat_notsupp(int fd, struct socket *so, struct sockaddr *nam,
     struct thread *td)
 {
 
 	return EOPNOTSUPP;
 }
 
 int
 pru_connect_notsupp(struct socket *so, struct sockaddr *nam, struct thread *td)
 {
 
 	return EOPNOTSUPP;
 }
 
 int
 pru_connectat_notsupp(int fd, struct socket *so, struct sockaddr *nam,
     struct thread *td)
 {
 
 	return EOPNOTSUPP;
 }
 
 int
 pru_connect2_notsupp(struct socket *so1, struct socket *so2)
 {
 
 	return EOPNOTSUPP;
 }
 
 int
 pru_control_notsupp(struct socket *so, u_long cmd, caddr_t data,
     struct ifnet *ifp, struct thread *td)
 {
 
 	return EOPNOTSUPP;
 }
 
 int
 pru_disconnect_notsupp(struct socket *so)
 {
 
 	return EOPNOTSUPP;
 }
 
 int
 pru_listen_notsupp(struct socket *so, int backlog, struct thread *td)
 {
 
 	return EOPNOTSUPP;
 }
 
 int
 pru_peeraddr_notsupp(struct socket *so, struct sockaddr **nam)
 {
 
 	return EOPNOTSUPP;
 }
 
 int
 pru_rcvd_notsupp(struct socket *so, int flags)
 {
 
 	return EOPNOTSUPP;
 }
 
 int
 pru_rcvoob_notsupp(struct socket *so, struct mbuf *m, int flags)
 {
 
 	return EOPNOTSUPP;
 }
 
 int
 pru_send_notsupp(struct socket *so, int flags, struct mbuf *m,
     struct sockaddr *addr, struct mbuf *control, struct thread *td)
 {
 
 	return EOPNOTSUPP;
 }
 
 int
 pru_ready_notsupp(struct socket *so, struct mbuf *m, int count)
 {
 
 	return (EOPNOTSUPP);
 }
 
 /*
  * This isn't really a ``null'' operation, but it's the default one and
  * doesn't do anything destructive.
  */
 int
 pru_sense_null(struct socket *so, struct stat *sb)
 {
 
 	sb->st_blksize = so->so_snd.sb_hiwat;
 	return 0;
 }
 
 int
 pru_shutdown_notsupp(struct socket *so)
 {
 
 	return EOPNOTSUPP;
 }
 
 int
 pru_sockaddr_notsupp(struct socket *so, struct sockaddr **nam)
 {
 
 	return EOPNOTSUPP;
 }
 
 int
 pru_sosend_notsupp(struct socket *so, struct sockaddr *addr, struct uio *uio,
     struct mbuf *top, struct mbuf *control, int flags, struct thread *td)
 {
 
 	return EOPNOTSUPP;
 }
 
 int
 pru_soreceive_notsupp(struct socket *so, struct sockaddr **paddr,
     struct uio *uio, struct mbuf **mp0, struct mbuf **controlp, int *flagsp)
 {
 
 	return EOPNOTSUPP;
 }
 
 int
 pru_sopoll_notsupp(struct socket *so, int events, struct ucred *cred,
     struct thread *td)
 {
 
 	return EOPNOTSUPP;
 }
 
 static void
 filt_sordetach(struct knote *kn)
 {
 	struct socket *so = kn->kn_fp->f_data;
 
 	SOCKBUF_LOCK(&so->so_rcv);
 	knlist_remove(&so->so_rcv.sb_sel.si_note, kn, 1);
 	if (knlist_empty(&so->so_rcv.sb_sel.si_note))
 		so->so_rcv.sb_flags &= ~SB_KNOTE;
 	SOCKBUF_UNLOCK(&so->so_rcv);
 }
 
 /*ARGSUSED*/
 static int
 filt_soread(struct knote *kn, long hint)
 {
 	struct socket *so;
 
 	so = kn->kn_fp->f_data;
 	SOCKBUF_LOCK_ASSERT(&so->so_rcv);
 
 	kn->kn_data = sbavail(&so->so_rcv) - so->so_rcv.sb_ctl;
 	if (so->so_rcv.sb_state & SBS_CANTRCVMORE) {
 		kn->kn_flags |= EV_EOF;
 		kn->kn_fflags = so->so_error;
 		return (1);
 	} else if (so->so_error)	/* temporary udp error */
 		return (1);
 
 	if (kn->kn_sfflags & NOTE_LOWAT) {
 		if (kn->kn_data >= kn->kn_sdata)
 			return 1;
 	} else {
 		if (sbavail(&so->so_rcv) >= so->so_rcv.sb_lowat)
 			return 1;
 	}
 
 	/* This hook returning non-zero indicates an event, not error */
 	return (hhook_run_socket(so, NULL, HHOOK_FILT_SOREAD));
 }
 
 static void
 filt_sowdetach(struct knote *kn)
 {
 	struct socket *so = kn->kn_fp->f_data;
 
 	SOCKBUF_LOCK(&so->so_snd);
 	knlist_remove(&so->so_snd.sb_sel.si_note, kn, 1);
 	if (knlist_empty(&so->so_snd.sb_sel.si_note))
 		so->so_snd.sb_flags &= ~SB_KNOTE;
 	SOCKBUF_UNLOCK(&so->so_snd);
 }
 
 /*ARGSUSED*/
 static int
 filt_sowrite(struct knote *kn, long hint)
 {
 	struct socket *so;
 
 	so = kn->kn_fp->f_data;
 	SOCKBUF_LOCK_ASSERT(&so->so_snd);
 	kn->kn_data = sbspace(&so->so_snd);
 
 	hhook_run_socket(so, kn, HHOOK_FILT_SOWRITE);
 
 	if (so->so_snd.sb_state & SBS_CANTSENDMORE) {
 		kn->kn_flags |= EV_EOF;
 		kn->kn_fflags = so->so_error;
 		return (1);
 	} else if (so->so_error)	/* temporary udp error */
 		return (1);
 	else if (((so->so_state & SS_ISCONNECTED) == 0) &&
 	    (so->so_proto->pr_flags & PR_CONNREQUIRED))
 		return (0);
 	else if (kn->kn_sfflags & NOTE_LOWAT)
 		return (kn->kn_data >= kn->kn_sdata);
 	else
 		return (kn->kn_data >= so->so_snd.sb_lowat);
 }
 
 /*ARGSUSED*/
 static int
 filt_solisten(struct knote *kn, long hint)
 {
 	struct socket *so = kn->kn_fp->f_data;
 
 	kn->kn_data = so->so_qlen;
 	return (!TAILQ_EMPTY(&so->so_comp));
 }
 
 int
 socheckuid(struct socket *so, uid_t uid)
 {
 
 	if (so == NULL)
 		return (EPERM);
 	if (so->so_cred->cr_uid != uid)
 		return (EPERM);
 	return (0);
 }
 
 /*
  * These functions are used by protocols to notify the socket layer (and its
  * consumers) of state changes in the sockets driven by protocol-side events.
  */
 
 /*
  * Procedures to manipulate state flags of socket and do appropriate wakeups.
  *
  * Normal sequence from the active (originating) side is that
  * soisconnecting() is called during processing of connect() call, resulting
  * in an eventual call to soisconnected() if/when the connection is
  * established.  When the connection is torn down soisdisconnecting() is
  * called during processing of disconnect() call, and soisdisconnected() is
  * called when the connection to the peer is totally severed.  The semantics
  * of these routines are such that connectionless protocols can call
  * soisconnected() and soisdisconnected() only, bypassing the in-progress
  * calls when setting up a ``connection'' takes no time.
  *
  * From the passive side, a socket is created with two queues of sockets:
  * so_incomp for connections in progress and so_comp for connections already
  * made and awaiting user acceptance.  As a protocol is preparing incoming
  * connections, it creates a socket structure queued on so_incomp by calling
  * sonewconn().  When the connection is established, soisconnected() is
  * called, and transfers the socket structure to so_comp, making it available
  * to accept().
  *
  * If a socket is closed with sockets on either so_incomp or so_comp, these
  * sockets are dropped.
  *
  * If higher-level protocols are implemented in the kernel, the wakeups done
  * here will sometimes cause software-interrupt process scheduling.
  */
 void
 soisconnecting(struct socket *so)
 {
 
 	SOCK_LOCK(so);
 	so->so_state &= ~(SS_ISCONNECTED|SS_ISDISCONNECTING);
 	so->so_state |= SS_ISCONNECTING;
 	SOCK_UNLOCK(so);
 }
 
 void
 soisconnected(struct socket *so)
 {
 	struct socket *head;
 	int ret;
 
 restart:
 	ACCEPT_LOCK();
 	SOCK_LOCK(so);
 	so->so_state &= ~(SS_ISCONNECTING|SS_ISDISCONNECTING|SS_ISCONFIRMING);
 	so->so_state |= SS_ISCONNECTED;
 	head = so->so_head;
 	if (head != NULL && (so->so_qstate & SQ_INCOMP)) {
 		if ((so->so_options & SO_ACCEPTFILTER) == 0) {
 			SOCK_UNLOCK(so);
 			TAILQ_REMOVE(&head->so_incomp, so, so_list);
 			head->so_incqlen--;
 			so->so_qstate &= ~SQ_INCOMP;
 			TAILQ_INSERT_TAIL(&head->so_comp, so, so_list);
 			head->so_qlen++;
 			so->so_qstate |= SQ_COMP;
 			ACCEPT_UNLOCK();
 			sorwakeup(head);
 			wakeup_one(&head->so_timeo);
 		} else {
 			ACCEPT_UNLOCK();
 			soupcall_set(so, SO_RCV,
 			    head->so_accf->so_accept_filter->accf_callback,
 			    head->so_accf->so_accept_filter_arg);
 			so->so_options &= ~SO_ACCEPTFILTER;
 			ret = head->so_accf->so_accept_filter->accf_callback(so,
 			    head->so_accf->so_accept_filter_arg, M_NOWAIT);
 			if (ret == SU_ISCONNECTED)
 				soupcall_clear(so, SO_RCV);
 			SOCK_UNLOCK(so);
 			if (ret == SU_ISCONNECTED)
 				goto restart;
 		}
 		return;
 	}
 	SOCK_UNLOCK(so);
 	ACCEPT_UNLOCK();
 	wakeup(&so->so_timeo);
 	sorwakeup(so);
 	sowwakeup(so);
 }
 
 void
 soisdisconnecting(struct socket *so)
 {
 
 	/*
 	 * Note: This code assumes that SOCK_LOCK(so) and
 	 * SOCKBUF_LOCK(&so->so_rcv) are the same.
 	 */
 	SOCKBUF_LOCK(&so->so_rcv);
 	so->so_state &= ~SS_ISCONNECTING;
 	so->so_state |= SS_ISDISCONNECTING;
 	socantrcvmore_locked(so);
 	SOCKBUF_LOCK(&so->so_snd);
 	socantsendmore_locked(so);
 	wakeup(&so->so_timeo);
 }
 
 void
 soisdisconnected(struct socket *so)
 {
 
 	/*
 	 * Note: This code assumes that SOCK_LOCK(so) and
 	 * SOCKBUF_LOCK(&so->so_rcv) are the same.
 	 */
 	SOCKBUF_LOCK(&so->so_rcv);
 	so->so_state &= ~(SS_ISCONNECTING|SS_ISCONNECTED|SS_ISDISCONNECTING);
 	so->so_state |= SS_ISDISCONNECTED;
 	socantrcvmore_locked(so);
 	SOCKBUF_LOCK(&so->so_snd);
 	sbdrop_locked(&so->so_snd, sbused(&so->so_snd));
 	socantsendmore_locked(so);
 	wakeup(&so->so_timeo);
 }
 
 /*
  * Make a copy of a sockaddr in a malloced buffer of type M_SONAME.
  */
 struct sockaddr *
 sodupsockaddr(const struct sockaddr *sa, int mflags)
 {
 	struct sockaddr *sa2;
 
 	sa2 = malloc(sa->sa_len, M_SONAME, mflags);
 	if (sa2)
 		bcopy(sa, sa2, sa->sa_len);
 	return sa2;
 }
 
 /*
  * Register per-socket buffer upcalls.
  */
 void
 soupcall_set(struct socket *so, int which,
     int (*func)(struct socket *, void *, int), void *arg)
 {
 	struct sockbuf *sb;
 
 	switch (which) {
 	case SO_RCV:
 		sb = &so->so_rcv;
 		break;
 	case SO_SND:
 		sb = &so->so_snd;
 		break;
 	default:
 		panic("soupcall_set: bad which");
 	}
 	SOCKBUF_LOCK_ASSERT(sb);
 #if 0
 	/* XXX: accf_http actually wants to do this on purpose. */
 	KASSERT(sb->sb_upcall == NULL, ("soupcall_set: overwriting upcall"));
 #endif
 	sb->sb_upcall = func;
 	sb->sb_upcallarg = arg;
 	sb->sb_flags |= SB_UPCALL;
 }
 
 void
 soupcall_clear(struct socket *so, int which)
 {
 	struct sockbuf *sb;
 
 	switch (which) {
 	case SO_RCV:
 		sb = &so->so_rcv;
 		break;
 	case SO_SND:
 		sb = &so->so_snd;
 		break;
 	default:
 		panic("soupcall_clear: bad which");
 	}
 	SOCKBUF_LOCK_ASSERT(sb);
 	KASSERT(sb->sb_upcall != NULL, ("soupcall_clear: no upcall to clear"));
 	sb->sb_upcall = NULL;
 	sb->sb_upcallarg = NULL;
 	sb->sb_flags &= ~SB_UPCALL;
 }
 
 /*
  * Create an external-format (``xsocket'') structure using the information in
  * the kernel-format socket structure pointed to by so.  This is done to
  * reduce the spew of irrelevant information over this interface, to isolate
  * user code from changes in the kernel structure, and potentially to provide
  * information-hiding if we decide that some of this information should be
  * hidden from users.
  */
 void
 sotoxsocket(struct socket *so, struct xsocket *xso)
 {
 
 	xso->xso_len = sizeof *xso;
 	xso->xso_so = so;
 	xso->so_type = so->so_type;
 	xso->so_options = so->so_options;
 	xso->so_linger = so->so_linger;
 	xso->so_state = so->so_state;
 	xso->so_pcb = so->so_pcb;
 	xso->xso_protocol = so->so_proto->pr_protocol;
 	xso->xso_family = so->so_proto->pr_domain->dom_family;
 	xso->so_qlen = so->so_qlen;
 	xso->so_incqlen = so->so_incqlen;
 	xso->so_qlimit = so->so_qlimit;
 	xso->so_timeo = so->so_timeo;
 	xso->so_error = so->so_error;
 	xso->so_pgid = so->so_sigio ? so->so_sigio->sio_pgid : 0;
 	xso->so_oobmark = so->so_oobmark;
 	sbtoxsockbuf(&so->so_snd, &xso->so_snd);
 	sbtoxsockbuf(&so->so_rcv, &xso->so_rcv);
 	xso->so_uid = so->so_cred->cr_uid;
 }
 
 
 /*
  * Socket accessor functions to provide external consumers with
  * a safe interface to socket state
  *
  */
 
 void
 so_listeners_apply_all(struct socket *so, void (*func)(struct socket *, void *),
     void *arg)
 {
 
 	TAILQ_FOREACH(so, &so->so_comp, so_list)
 		func(so, arg);
 }
 
 struct sockbuf *
 so_sockbuf_rcv(struct socket *so)
 {
 
 	return (&so->so_rcv);
 }
 
 struct sockbuf *
 so_sockbuf_snd(struct socket *so)
 {
 
 	return (&so->so_snd);
 }
 
 int
 so_state_get(const struct socket *so)
 {
 
 	return (so->so_state);
 }
 
 void
 so_state_set(struct socket *so, int val)
 {
 
 	so->so_state = val;
 }
 
 int
 so_options_get(const struct socket *so)
 {
 
 	return (so->so_options);
 }
 
 void
 so_options_set(struct socket *so, int val)
 {
 
 	so->so_options = val;
 }
 
 int
 so_error_get(const struct socket *so)
 {
 
 	return (so->so_error);
 }
 
 void
 so_error_set(struct socket *so, int val)
 {
 
 	so->so_error = val;
 }
 
 int
 so_linger_get(const struct socket *so)
 {
 
 	return (so->so_linger);
 }
 
 void
 so_linger_set(struct socket *so, int val)
 {
 
 	so->so_linger = val;
 }
 
 struct protosw *
 so_protosw_get(const struct socket *so)
 {
 
 	return (so->so_proto);
 }
 
 void
 so_protosw_set(struct socket *so, struct protosw *val)
 {
 
 	so->so_proto = val;
 }
 
 void
 so_sorwakeup(struct socket *so)
 {
 
 	sorwakeup(so);
 }
 
 void
 so_sowwakeup(struct socket *so)
 {
 
 	sowwakeup(so);
 }
 
 void
 so_sorwakeup_locked(struct socket *so)
 {
 
 	sorwakeup_locked(so);
 }
 
 void
 so_sowwakeup_locked(struct socket *so)
 {
 
 	sowwakeup_locked(so);
 }
 
 void
 so_lock(struct socket *so)
 {
 
 	SOCK_LOCK(so);
 }
 
 void
 so_unlock(struct socket *so)
 {
 
 	SOCK_UNLOCK(so);
 }
Index: head/sys/net/if_arcsubr.c
===================================================================
--- head/sys/net/if_arcsubr.c	(revision 305823)
+++ head/sys/net/if_arcsubr.c	(revision 305824)
@@ -1,834 +1,834 @@
 /*	$NetBSD: if_arcsubr.c,v 1.36 2001/06/14 05:44:23 itojun Exp $	*/
 /*	$FreeBSD$ */
 
 /*-
  * Copyright (c) 1994, 1995 Ignatios Souvatzis
  * Copyright (c) 1982, 1989, 1993
  *	The Regents of the University of California.  All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 3. All advertising materials mentioning features or use of this software
  *    must display the following acknowledgement:
  *	This product includes software developed by the University of
  *	California, Berkeley and its contributors.
  * 4. Neither the name of the University nor the names of its contributors
  *    may be used to endorse or promote products derived from this software
  *    without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  * from: NetBSD: if_ethersubr.c,v 1.9 1994/06/29 06:36:11 cgd Exp
  *       @(#)if_ethersubr.c	8.1 (Berkeley) 6/10/93
  *
  */
 #include "opt_inet.h"
 #include "opt_inet6.h"
 
 #include <sys/param.h>
 #include <sys/systm.h>
 #include <sys/kernel.h>
 #include <sys/module.h>
 #include <sys/malloc.h>
 #include <sys/mbuf.h>
 #include <sys/protosw.h>
 #include <sys/socket.h>
 #include <sys/sockio.h>
 #include <sys/errno.h>
 #include <sys/syslog.h>
 
 #include <machine/cpu.h>
 
 #include <net/if.h>
 #include <net/if_var.h>
 #include <net/netisr.h>
 #include <net/route.h>
 #include <net/if_dl.h>
 #include <net/if_types.h>
 #include <net/if_arc.h>
 #include <net/if_arp.h>
 #include <net/bpf.h>
 #include <net/if_llatbl.h>
 
 #if defined(INET) || defined(INET6)
 #include <netinet/in.h>
 #include <netinet/in_var.h>
 #include <netinet/if_ether.h>
 #endif
 
 #ifdef INET6
 #include <netinet6/nd6.h>
 #endif
 
 #define ARCNET_ALLOW_BROKEN_ARP
 
 static struct mbuf *arc_defrag(struct ifnet *, struct mbuf *);
 static int arc_resolvemulti(struct ifnet *, struct sockaddr **,
 			    struct sockaddr *);
 
 u_int8_t  arcbroadcastaddr = 0;
 
 #define ARC_LLADDR(ifp)	(*(u_int8_t *)IF_LLADDR(ifp))
 
 #define senderr(e) { error = (e); goto bad;}
 #define SIN(s)	((const struct sockaddr_in *)(s))
 
 /*
  * ARCnet output routine.
  * Encapsulate a packet of type family for the local net.
  * Assumes that ifp is actually pointer to arccom structure.
  */
 int
 arc_output(struct ifnet *ifp, struct mbuf *m, const struct sockaddr *dst,
     struct route *ro)
 {
 	struct arc_header	*ah;
 	int			error;
 	u_int8_t		atype, adst;
 	int			loop_copy = 0;
 	int			isphds;
 #if defined(INET) || defined(INET6)
 	int			is_gw = 0;
 #endif
 
 	if (!((ifp->if_flags & IFF_UP) &&
 	    (ifp->if_drv_flags & IFF_DRV_RUNNING)))
 		return(ENETDOWN); /* m, m1 aren't initialized yet */
 
 	error = 0;
 #if defined(INET) || defined(INET6)
 	if (ro != NULL)
 		is_gw = (ro->ro_flags & RT_HAS_GW) != 0;
 #endif
 
 	switch (dst->sa_family) {
 #ifdef INET
 	case AF_INET:
 
 		/*
 		 * For now, use the simple IP addr -> ARCnet addr mapping
 		 */
 		if (m->m_flags & (M_BCAST|M_MCAST))
 			adst = arcbroadcastaddr; /* ARCnet broadcast address */
 		else if (ifp->if_flags & IFF_NOARP)
 			adst = ntohl(SIN(dst)->sin_addr.s_addr) & 0xFF;
 		else {
 			error = arpresolve(ifp, is_gw, m, dst, &adst, NULL,
 			    NULL);
 			if (error)
 				return (error == EWOULDBLOCK ? 0 : error);
 		}
 
 		atype = (ifp->if_flags & IFF_LINK0) ?
 			ARCTYPE_IP_OLD : ARCTYPE_IP;
 		break;
 	case AF_ARP:
 	{
 		struct arphdr *ah;
 		ah = mtod(m, struct arphdr *);
 		ah->ar_hrd = htons(ARPHRD_ARCNET);
 
 		loop_copy = -1; /* if this is for us, don't do it */
 
 		switch(ntohs(ah->ar_op)) {
 		case ARPOP_REVREQUEST:
 		case ARPOP_REVREPLY:
 			atype = ARCTYPE_REVARP;
 			break;
 		case ARPOP_REQUEST:
 		case ARPOP_REPLY:
 		default:
 			atype = ARCTYPE_ARP;
 			break;
 		}
 
 		if (m->m_flags & M_BCAST)
 			bcopy(ifp->if_broadcastaddr, &adst, ARC_ADDR_LEN);
 		else
 			bcopy(ar_tha(ah), &adst, ARC_ADDR_LEN);
         
 	}
 	break;
 #endif
 #ifdef INET6
 	case AF_INET6:
 		if ((m->m_flags & M_MCAST) != 0)
 			adst = arcbroadcastaddr; /* ARCnet broadcast address */
 		else {
 			error = nd6_resolve(ifp, is_gw, m, dst, &adst, NULL,
 			    NULL);
 			if (error != 0)
 				return (error == EWOULDBLOCK ? 0 : error);
 		}
 		atype = ARCTYPE_INET6;
 		break;
 #endif
 	case AF_UNSPEC:
 	    {
 		const struct arc_header *ah;
 
 		loop_copy = -1;
 		ah = (const struct arc_header *)dst->sa_data;
 		adst = ah->arc_dhost;
 		atype = ah->arc_type;
 
 		if (atype == ARCTYPE_ARP) {
 			atype = (ifp->if_flags & IFF_LINK0) ?
 			    ARCTYPE_ARP_OLD: ARCTYPE_ARP;
 
 #ifdef ARCNET_ALLOW_BROKEN_ARP
 			/*
 			 * XXX It's not clear per RFC826 if this is needed, but
 			 * "assigned numbers" say this is wrong.
 			 * However, e.g., AmiTCP 3.0Beta used it... we make this
 			 * switchable for emergency cases. Not perfect, but...
 			 */
 			if (ifp->if_flags & IFF_LINK2)
 				mtod(m, struct arphdr *)->ar_pro = atype - 1;
 #endif
 		}
 		break;
 	    }
 	default:
 		if_printf(ifp, "can't handle af%d\n", dst->sa_family);
 		senderr(EAFNOSUPPORT);
 	}
 
 	isphds = arc_isphds(atype);
 	M_PREPEND(m, isphds ? ARC_HDRNEWLEN : ARC_HDRLEN, M_NOWAIT);
 	if (m == NULL)
 		senderr(ENOBUFS);
 	ah = mtod(m, struct arc_header *);
 	ah->arc_type = atype;
 	ah->arc_dhost = adst;
 	ah->arc_shost = ARC_LLADDR(ifp);
 	if (isphds) {
 		ah->arc_flag = 0;
 		ah->arc_seqid = 0;
 	}
 
 	if ((ifp->if_flags & IFF_SIMPLEX) && (loop_copy != -1)) {
 		if ((m->m_flags & M_BCAST) || (loop_copy > 0)) {
-			struct mbuf *n = m_copy(m, 0, (int)M_COPYALL);
+			struct mbuf *n = m_copym(m, 0, M_COPYALL, M_NOWAIT);
 
 			(void) if_simloop(ifp, n, dst->sa_family, ARC_HDRLEN);
 		} else if (ah->arc_dhost == ah->arc_shost) {
 			(void) if_simloop(ifp, m, dst->sa_family, ARC_HDRLEN);
 			return (0);     /* XXX */
 		}
 	}
 
 	BPF_MTAP(ifp, m);
 
 	error = ifp->if_transmit(ifp, m);
 
 	return (error);
 
 bad:
 	if (m)
 		m_freem(m);
 	return (error);
 }
 
 void
 arc_frag_init(struct ifnet *ifp)
 {
 	struct arccom *ac;
 
 	ac = (struct arccom *)ifp->if_l2com;
 	ac->curr_frag = 0;
 }
 
 struct mbuf *
 arc_frag_next(struct ifnet *ifp)
 {
 	struct arccom *ac;
 	struct mbuf *m;
 	struct arc_header *ah;
 
 	ac = (struct arccom *)ifp->if_l2com;
 	if ((m = ac->curr_frag) == NULL) {
 		int tfrags;
 
 		/* dequeue new packet */
 		IF_DEQUEUE(&ifp->if_snd, m);
 		if (m == NULL)
 			return 0;
 
 		ah = mtod(m, struct arc_header *);
 		if (!arc_isphds(ah->arc_type))
 			return m;
 
 		++ac->ac_seqid;		/* make the seqid unique */
 		tfrags = howmany(m->m_pkthdr.len, ARC_MAX_DATA);
 		ac->fsflag = 2 * tfrags - 3;
 		ac->sflag = 0;
 		ac->rsflag = ac->fsflag;
 		ac->arc_dhost = ah->arc_dhost;
 		ac->arc_shost = ah->arc_shost;
 		ac->arc_type = ah->arc_type;
 
 		m_adj(m, ARC_HDRNEWLEN);
 		ac->curr_frag = m;
 	}
 
 	/* split out next fragment and return it */
 	if (ac->sflag < ac->fsflag) {
 		/* we CAN'T have short packets here */
 		ac->curr_frag = m_split(m, ARC_MAX_DATA, M_NOWAIT);
 		if (ac->curr_frag == 0) {
 			m_freem(m);
 			return 0;
 		}
 
 		M_PREPEND(m, ARC_HDRNEWLEN, M_NOWAIT);
 		if (m == NULL) {
 			m_freem(ac->curr_frag);
 			ac->curr_frag = 0;
 			return 0;
 		}
 
 		ah = mtod(m, struct arc_header *);
 		ah->arc_flag = ac->rsflag;
 		ah->arc_seqid = ac->ac_seqid;
 
 		ac->sflag += 2;
 		ac->rsflag = ac->sflag;
 	} else if ((m->m_pkthdr.len >=
 	    ARC_MIN_FORBID_LEN - ARC_HDRNEWLEN + 2) &&
 	    (m->m_pkthdr.len <=
 	    ARC_MAX_FORBID_LEN - ARC_HDRNEWLEN + 2)) {
 		ac->curr_frag = 0;
 
 		M_PREPEND(m, ARC_HDRNEWLEN_EXC, M_NOWAIT);
 		if (m == NULL)
 			return 0;
 
 		ah = mtod(m, struct arc_header *);
 		ah->arc_flag = 0xFF;
 		ah->arc_seqid = 0xFFFF;
 		ah->arc_type2 = ac->arc_type;
 		ah->arc_flag2 = ac->sflag;
 		ah->arc_seqid2 = ac->ac_seqid;
 	} else {
 		ac->curr_frag = 0;
 
 		M_PREPEND(m, ARC_HDRNEWLEN, M_NOWAIT);
 		if (m == NULL)
 			return 0;
 
 		ah = mtod(m, struct arc_header *);
 		ah->arc_flag = ac->sflag;
 		ah->arc_seqid = ac->ac_seqid;
 	}
 
 	ah->arc_dhost = ac->arc_dhost;
 	ah->arc_shost = ac->arc_shost;
 	ah->arc_type = ac->arc_type;
 
 	return m;
 }
 
 /*
  * Defragmenter. Returns mbuf if last packet found, else
  * NULL. frees incoming mbuf as necessary.
  */
 
 static __inline struct mbuf *
 arc_defrag(struct ifnet *ifp, struct mbuf *m)
 {
 	struct arc_header *ah, *ah1;
 	struct arccom *ac;
 	struct ac_frag *af;
 	struct mbuf *m1;
 	char *s;
 	int newflen;
 	u_char src,dst,typ;
 
 	ac = (struct arccom *)ifp->if_l2com;
 
 	if (m->m_len < ARC_HDRNEWLEN) {
 		m = m_pullup(m, ARC_HDRNEWLEN);
 		if (m == NULL) {
 			if_inc_counter(ifp, IFCOUNTER_IERRORS, 1);
 			return NULL;
 		}
 	}
 
 	ah = mtod(m, struct arc_header *);
 	typ = ah->arc_type;
 
 	if (!arc_isphds(typ))
 		return m;
 
 	src = ah->arc_shost;
 	dst = ah->arc_dhost;
 
 	if (ah->arc_flag == 0xff) {
 		m_adj(m, 4);
 
 		if (m->m_len < ARC_HDRNEWLEN) {
 			m = m_pullup(m, ARC_HDRNEWLEN);
 			if (m == NULL) {
 				if_inc_counter(ifp, IFCOUNTER_IERRORS, 1);
 				return NULL;
 			}
 		}
 
 		ah = mtod(m, struct arc_header *);
 	}
 
 	af = &ac->ac_fragtab[src];
 	m1 = af->af_packet;
 	s = "debug code error";
 
 	if (ah->arc_flag & 1) {
 		/*
 		 * first fragment. We always initialize, which is
 		 * about the right thing to do, as we only want to
 		 * accept one fragmented packet per src at a time.
 		 */
 		if (m1 != NULL)
 			m_freem(m1);
 
 		af->af_packet = m;
 		m1 = m;
 		af->af_maxflag = ah->arc_flag;
 		af->af_lastseen = 0;
 		af->af_seqid = ah->arc_seqid;
 
 		return NULL;
 		/* notreached */
 	} else {
 		/* check for unfragmented packet */
 		if (ah->arc_flag == 0)
 			return m;
 
 		/* do we have a first packet from that src? */
 		if (m1 == NULL) {
 			s = "no first frag";
 			goto outofseq;
 		}
 
 		ah1 = mtod(m1, struct arc_header *);
 
 		if (ah->arc_seqid != ah1->arc_seqid) {
 			s = "seqid differs";
 			goto outofseq;
 		}
 
 		if (typ != ah1->arc_type) {
 			s = "type differs";
 			goto outofseq;
 		}
 
 		if (dst != ah1->arc_dhost) {
 			s = "dest host differs";
 			goto outofseq;
 		}
 
 		/* typ, seqid and dst are ok here. */
 
 		if (ah->arc_flag == af->af_lastseen) {
 			m_freem(m);
 			return NULL;
 		}
 
 		if (ah->arc_flag == af->af_lastseen + 2) {
 			/* ok, this is next fragment */
 			af->af_lastseen = ah->arc_flag;
 			m_adj(m,ARC_HDRNEWLEN);
 
 			/*
 			 * m_cat might free the first mbuf (with pkthdr)
 			 * in 2nd chain; therefore:
 			 */
 
 			newflen = m->m_pkthdr.len;
 
 			m_cat(m1,m);
 
 			m1->m_pkthdr.len += newflen;
 
 			/* is it the last one? */
 			if (af->af_lastseen > af->af_maxflag) {
 				af->af_packet = NULL;
 				return(m1);
 			} else
 				return NULL;
 		}
 		s = "other reason";
 		/* if all else fails, it is out of sequence, too */
 	}
 outofseq:
 	if (m1) {
 		m_freem(m1);
 		af->af_packet = NULL;
 	}
 
 	if (m)
 		m_freem(m);
 
 	log(LOG_INFO,"%s: got out of seq. packet: %s\n",
 	    ifp->if_xname, s);
 
 	return NULL;
 }
 
 /*
  * return 1 if Packet Header Definition Standard, else 0.
  * For now: old IP, old ARP aren't obviously. Lacking correct information,
  * we guess that besides new IP and new ARP also IPX and APPLETALK are PHDS.
  * (Apple and Novell corporations were involved, among others, in PHDS work).
  * Easiest is to assume that everybody else uses that, too.
  */
 int
 arc_isphds(u_int8_t type)
 {
 	return (type != ARCTYPE_IP_OLD &&
 		type != ARCTYPE_ARP_OLD &&
 		type != ARCTYPE_DIAGNOSE);
 }
 
 /*
  * Process a received Arcnet packet;
  * the packet is in the mbuf chain m with
  * the ARCnet header.
  */
 void
 arc_input(struct ifnet *ifp, struct mbuf *m)
 {
 	struct arc_header *ah;
 	int isr;
 	u_int8_t atype;
 
 	if ((ifp->if_flags & IFF_UP) == 0) {
 		m_freem(m);
 		return;
 	}
 
 	/* possibly defragment: */
 	m = arc_defrag(ifp, m);
 	if (m == NULL)
 		return;
 
 	BPF_MTAP(ifp, m);
 
 	ah = mtod(m, struct arc_header *);
 	/* does this belong to us? */
 	if ((ifp->if_flags & IFF_PROMISC) == 0
 	    && ah->arc_dhost != arcbroadcastaddr
 	    && ah->arc_dhost != ARC_LLADDR(ifp)) {
 		m_freem(m);
 		return;
 	}
 
 	if_inc_counter(ifp, IFCOUNTER_IBYTES, m->m_pkthdr.len);
 
 	if (ah->arc_dhost == arcbroadcastaddr) {
 		m->m_flags |= M_BCAST|M_MCAST;
 		if_inc_counter(ifp, IFCOUNTER_IMCASTS, 1);
 	}
 
 	atype = ah->arc_type;
 	switch (atype) {
 #ifdef INET
 	case ARCTYPE_IP:
 		m_adj(m, ARC_HDRNEWLEN);
 		isr = NETISR_IP;
 		break;
 
 	case ARCTYPE_IP_OLD:
 		m_adj(m, ARC_HDRLEN);
 		isr = NETISR_IP;
 		break;
 
 	case ARCTYPE_ARP:
 		if (ifp->if_flags & IFF_NOARP) {
 			/* Discard packet if ARP is disabled on interface */
 			m_freem(m);
 			return;
 		}
 		m_adj(m, ARC_HDRNEWLEN);
 		isr = NETISR_ARP;
 #ifdef ARCNET_ALLOW_BROKEN_ARP
 		mtod(m, struct arphdr *)->ar_pro = htons(ETHERTYPE_IP);
 #endif
 		break;
 
 	case ARCTYPE_ARP_OLD:
 		if (ifp->if_flags & IFF_NOARP) {
 			/* Discard packet if ARP is disabled on interface */
 			m_freem(m);
 			return;
 		}
 		m_adj(m, ARC_HDRLEN);
 		isr = NETISR_ARP;
 #ifdef ARCNET_ALLOW_BROKEN_ARP
 		mtod(m, struct arphdr *)->ar_pro = htons(ETHERTYPE_IP);
 #endif
 		break;
 #endif
 #ifdef INET6
 	case ARCTYPE_INET6:
 		m_adj(m, ARC_HDRNEWLEN);
 		isr = NETISR_IPV6;
 		break;
 #endif
 	default:
 		m_freem(m);
 		return;
 	}
 	M_SETFIB(m, ifp->if_fib);
 	netisr_dispatch(isr, m);
 }
 
 /*
  * Register (new) link level address.
  */
 void
 arc_storelladdr(struct ifnet *ifp, u_int8_t lla)
 {
 	ARC_LLADDR(ifp) = lla;
 }
 
 /*
  * Perform common duties while attaching to interface list
  */
 void
 arc_ifattach(struct ifnet *ifp, u_int8_t lla)
 {
 	struct ifaddr *ifa;
 	struct sockaddr_dl *sdl;
 	struct arccom *ac;
 
 	if_attach(ifp);
 	ifp->if_addrlen = 1;
 	ifp->if_hdrlen = ARC_HDRLEN;
 	ifp->if_mtu = 1500;
 	ifp->if_resolvemulti = arc_resolvemulti;
 	if (ifp->if_baudrate == 0)
 		ifp->if_baudrate = 2500000;
 	ifa = ifp->if_addr;
 	KASSERT(ifa != NULL, ("%s: no lladdr!\n", __func__));
 	sdl = (struct sockaddr_dl *)ifa->ifa_addr;
 	sdl->sdl_type = IFT_ARCNET;
 	sdl->sdl_alen = ifp->if_addrlen;
 
 	if (ifp->if_flags & IFF_BROADCAST)
 		ifp->if_flags |= IFF_MULTICAST|IFF_ALLMULTI;
 
 	ac = (struct arccom *)ifp->if_l2com;
 	ac->ac_seqid = (time_second) & 0xFFFF; /* try to make seqid unique */
 	if (lla == 0) {
 		/* XXX this message isn't entirely clear, to me -- cgd */
 		log(LOG_ERR,"%s: link address 0 reserved for broadcasts.  Please change it and ifconfig %s down up\n",
 		   ifp->if_xname, ifp->if_xname);
 	}
 	arc_storelladdr(ifp, lla);
 
 	ifp->if_broadcastaddr = &arcbroadcastaddr;
 
 	bpfattach(ifp, DLT_ARCNET, ARC_HDRLEN);
 }
 
 void
 arc_ifdetach(struct ifnet *ifp)
 {
 	bpfdetach(ifp);
 	if_detach(ifp);
 }
 
 int
 arc_ioctl(struct ifnet *ifp, u_long command, caddr_t data)
 {
 	struct ifaddr *ifa = (struct ifaddr *) data;
 	struct ifreq *ifr = (struct ifreq *) data;
 	int error = 0;
 
 	switch (command) {
 	case SIOCSIFADDR:
 		ifp->if_flags |= IFF_UP;
 		switch (ifa->ifa_addr->sa_family) {
 #ifdef INET
 		case AF_INET:
 			ifp->if_init(ifp->if_softc);	/* before arpwhohas */
 			arp_ifinit(ifp, ifa);
 			break;
 #endif
 		default:
 			ifp->if_init(ifp->if_softc);
 			break;
 		}
 		break;
 
 	case SIOCGIFADDR:
 		{
 			struct sockaddr *sa;
 
 			sa = (struct sockaddr *) &ifr->ifr_data;
 			*(u_int8_t *)sa->sa_data = ARC_LLADDR(ifp);
 		}
 		break;
 
 	case SIOCADDMULTI:
 	case SIOCDELMULTI:
 		if (ifr == NULL)
 			error = EAFNOSUPPORT;
 		else {
 			switch (ifr->ifr_addr.sa_family) {
 			case AF_INET:
 			case AF_INET6:
 				error = 0;
 				break;
 			default:
 				error = EAFNOSUPPORT;
 				break;
 			}
 		}
 		break;
 
 	case SIOCSIFMTU:
 		/*
 		 * Set the interface MTU.
 		 * mtu can't be larger than ARCMTU for RFC1051
 		 * and can't be larger than ARC_PHDS_MTU
 		 */
 		if (((ifp->if_flags & IFF_LINK0) && ifr->ifr_mtu > ARCMTU) ||
 		    ifr->ifr_mtu > ARC_PHDS_MAXMTU)
 			error = EINVAL;
 		else
 			ifp->if_mtu = ifr->ifr_mtu;
 		break;
 	}
 
 	return (error);
 }
 
 /* based on ether_resolvemulti() */
 int
 arc_resolvemulti(struct ifnet *ifp, struct sockaddr **llsa,
     struct sockaddr *sa)
 {
 	struct sockaddr_dl *sdl;
 #ifdef INET
 	struct sockaddr_in *sin;
 #endif
 #ifdef INET6
 	struct sockaddr_in6 *sin6;
 #endif
 
 	switch(sa->sa_family) {
 	case AF_LINK:
 		/*
 		* No mapping needed. Just check that it's a valid MC address.
 		*/
 		sdl = (struct sockaddr_dl *)sa;
 		if (*LLADDR(sdl) != arcbroadcastaddr)
 			return EADDRNOTAVAIL;
 		*llsa = NULL;
 		return 0;
 #ifdef INET
 	case AF_INET:
 		sin = (struct sockaddr_in *)sa;
 		if (!IN_MULTICAST(ntohl(sin->sin_addr.s_addr)))
 			return EADDRNOTAVAIL;
 		sdl = link_init_sdl(ifp, *llsa, IFT_ETHER);
 		sdl->sdl_alen = ARC_ADDR_LEN;
 		*LLADDR(sdl) = 0;
 		*llsa = (struct sockaddr *)sdl;
 		return 0;
 #endif
 #ifdef INET6
 	case AF_INET6:
 		sin6 = (struct sockaddr_in6 *)sa;
 		if (IN6_IS_ADDR_UNSPECIFIED(&sin6->sin6_addr)) {
 			/*
 			 * An IP6 address of 0 means listen to all
 			 * of the Ethernet multicast address used for IP6.
 			 * (This is used for multicast routers.)
 			 */
 			ifp->if_flags |= IFF_ALLMULTI;
 			*llsa = NULL;
 			return 0;
 		}
 		if (!IN6_IS_ADDR_MULTICAST(&sin6->sin6_addr))
 			return EADDRNOTAVAIL;
 		sdl = link_init_sdl(ifp, *llsa, IFT_ETHER);
 		sdl->sdl_alen = ARC_ADDR_LEN;
 		*LLADDR(sdl) = 0;
 		*llsa = (struct sockaddr *)sdl;
 		return 0;
 #endif
 
 	default:
 		/*
 		 * Well, the text isn't quite right, but it's the name
 		 * that counts...
 		 */
 		return EAFNOSUPPORT;
 	}
 }
 
 static MALLOC_DEFINE(M_ARCCOM, "arccom", "ARCNET interface internals");
 
 static void*
 arc_alloc(u_char type, struct ifnet *ifp)
 {
 	struct arccom	*ac;
 	
 	ac = malloc(sizeof(struct arccom), M_ARCCOM, M_WAITOK | M_ZERO);
 	ac->ac_ifp = ifp;
 
 	return (ac);
 }
 
 static void
 arc_free(void *com, u_char type)
 {
 
 	free(com, M_ARCCOM);
 }
 
 static int
 arc_modevent(module_t mod, int type, void *data)
 {
 
 	switch (type) {
 	case MOD_LOAD:
 		if_register_com_alloc(IFT_ARCNET, arc_alloc, arc_free);
 		break;
 	case MOD_UNLOAD:
 		if_deregister_com_alloc(IFT_ARCNET);
 		break;
 	default:
 		return EOPNOTSUPP;
 	}
 
 	return (0);
 }
 
 static moduledata_t arc_mod = {
 	"arcnet",
 	arc_modevent,
 	0
 };
 
 DECLARE_MODULE(arcnet, arc_mod, SI_SUB_INIT_IF, SI_ORDER_ANY);
 MODULE_VERSION(arcnet, 1);
Index: head/sys/net/if_fddisubr.c
===================================================================
--- head/sys/net/if_fddisubr.c	(revision 305823)
+++ head/sys/net/if_fddisubr.c	(revision 305824)
@@ -1,670 +1,670 @@
 /*-
  * Copyright (c) 1995, 1996
  *	Matt Thomas <matt@3am-software.com>.  All rights reserved.
  * Copyright (c) 1982, 1989, 1993
  *	The Regents of the University of California.  All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 3. All advertising materials mentioning features or use of this software
  *    must display the following acknowledgement:
  *	This product includes software developed by the University of
  *	California, Berkeley and its contributors.
  * 4. Neither the name of the University nor the names of its contributors
  *    may be used to endorse or promote products derived from this software
  *    without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  *	from: if_ethersubr.c,v 1.5 1994/12/13 22:31:45 wollman Exp
  * $FreeBSD$
  */
 
 #include "opt_inet.h"
 #include "opt_inet6.h"
 
 #include <sys/param.h>
 #include <sys/systm.h>
 #include <sys/kernel.h>
 #include <sys/malloc.h>
 #include <sys/mbuf.h>
 #include <sys/module.h>
 #include <sys/socket.h>
 #include <sys/sockio.h>
 
 #include <net/if.h>
 #include <net/if_var.h>
 #include <net/if_dl.h>
 #include <net/if_llc.h>
 #include <net/if_types.h>
 #include <net/if_llatbl.h>
 
 #include <net/ethernet.h>
 #include <net/netisr.h>
 #include <net/route.h>
 #include <net/bpf.h>
 #include <net/fddi.h>
 
 #if defined(INET) || defined(INET6)
 #include <netinet/in.h>
 #include <netinet/in_var.h>
 #include <netinet/if_ether.h>
 #endif
 #ifdef INET6
 #include <netinet6/nd6.h>
 #endif
 
 #ifdef DECNET
 #include <netdnet/dn.h>
 #endif
 
 #include <security/mac/mac_framework.h>
 
 static const u_char fddibroadcastaddr[FDDI_ADDR_LEN] =
 			{ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff };
 
 static int fddi_resolvemulti(struct ifnet *, struct sockaddr **,
 			      struct sockaddr *);
 static int fddi_output(struct ifnet *, struct mbuf *, const struct sockaddr *,
 		       struct route *); 
 static void fddi_input(struct ifnet *ifp, struct mbuf *m);
 
 #define	senderr(e)	do { error = (e); goto bad; } while (0)
 
 /*
  * FDDI output routine.
  * Encapsulate a packet of type family for the local net.
  * Use trailer local net encapsulation if enough data in first
  * packet leaves a multiple of 512 bytes of data in remainder.
  */
 static int
 fddi_output(struct ifnet *ifp, struct mbuf *m, const struct sockaddr *dst,
 	struct route *ro)
 {
 	u_int16_t type;
 	int loop_copy = 0, error = 0, hdrcmplt = 0;
  	u_char esrc[FDDI_ADDR_LEN], edst[FDDI_ADDR_LEN];
 	struct fddi_header *fh;
 #if defined(INET) || defined(INET6)
 	int is_gw = 0;
 #endif
 
 #ifdef MAC
 	error = mac_ifnet_check_transmit(ifp, m);
 	if (error)
 		senderr(error);
 #endif
 
 	if (ifp->if_flags & IFF_MONITOR)
 		senderr(ENETDOWN);
 	if (!((ifp->if_flags & IFF_UP) &&
 	    (ifp->if_drv_flags & IFF_DRV_RUNNING)))
 		senderr(ENETDOWN);
 	getmicrotime(&ifp->if_lastchange);
 
 #if defined(INET) || defined(INET6)
 	if (ro != NULL)
 		is_gw = (ro->ro_flags & RT_HAS_GW) != 0;
 #endif
 
 	switch (dst->sa_family) {
 #ifdef INET
 	case AF_INET: {
 		error = arpresolve(ifp, is_gw, m, dst, edst, NULL, NULL);
 		if (error)
 			return (error == EWOULDBLOCK ? 0 : error);
 		type = htons(ETHERTYPE_IP);
 		break;
 	}
 	case AF_ARP:
 	{
 		struct arphdr *ah;
 		ah = mtod(m, struct arphdr *);
 		ah->ar_hrd = htons(ARPHRD_ETHER);
 
 		loop_copy = -1; /* if this is for us, don't do it */
 
 		switch (ntohs(ah->ar_op)) {
 		case ARPOP_REVREQUEST:
 		case ARPOP_REVREPLY:
 			type = htons(ETHERTYPE_REVARP);
 			break;
 		case ARPOP_REQUEST:
 		case ARPOP_REPLY:
 		default:
 			type = htons(ETHERTYPE_ARP);
 			break;
 		}
 
 		if (m->m_flags & M_BCAST)
 			bcopy(ifp->if_broadcastaddr, edst, FDDI_ADDR_LEN);
                 else
 			bcopy(ar_tha(ah), edst, FDDI_ADDR_LEN);
 
 	}
 	break;
 #endif /* INET */
 #ifdef INET6
 	case AF_INET6:
 		error = nd6_resolve(ifp, is_gw, m, dst, edst, NULL, NULL);
 		if (error)
 			return (error == EWOULDBLOCK ? 0 : error);
 		type = htons(ETHERTYPE_IPV6);
 		break;
 #endif /* INET6 */
 	case pseudo_AF_HDRCMPLT:
 	{
 		const struct ether_header *eh;
 
 		hdrcmplt = 1;
 		eh = (const struct ether_header *)dst->sa_data;
 		bcopy(eh->ether_shost, esrc, FDDI_ADDR_LEN);
 		/* FALLTHROUGH */
 	}
 
 	case AF_UNSPEC:
 	{
 		const struct ether_header *eh;
 
 		loop_copy = -1;
 		eh = (const struct ether_header *)dst->sa_data;
 		bcopy(eh->ether_dhost, edst, FDDI_ADDR_LEN);
 		if (*edst & 1)
 			m->m_flags |= (M_BCAST|M_MCAST);
 		type = eh->ether_type;
 		break;
 	}
 
 	case AF_IMPLINK:
 	{
 		fh = mtod(m, struct fddi_header *);
 		error = EPROTONOSUPPORT;
 		switch (fh->fddi_fc & (FDDIFC_C|FDDIFC_L|FDDIFC_F)) {
 			case FDDIFC_LLC_ASYNC: {
 				/* legal priorities are 0 through 7 */
 				if ((fh->fddi_fc & FDDIFC_Z) > 7)
 			        	goto bad;
 				break;
 			}
 			case FDDIFC_LLC_SYNC: {
 				/* FDDIFC_Z bits reserved, must be zero */
 				if (fh->fddi_fc & FDDIFC_Z)
 					goto bad;
 				break;
 			}
 			case FDDIFC_SMT: {
 				/* FDDIFC_Z bits must be non zero */
 				if ((fh->fddi_fc & FDDIFC_Z) == 0)
 					goto bad;
 				break;
 			}
 			default: {
 				/* anything else is too dangerous */
                	 		goto bad;
 			}
 		}
 		error = 0;
 		if (fh->fddi_dhost[0] & 1)
 			m->m_flags |= (M_BCAST|M_MCAST);
 		goto queue_it;
 	}
 	default:
 		if_printf(ifp, "can't handle af%d\n", dst->sa_family);
 		senderr(EAFNOSUPPORT);
 	}
 
 	/*
 	 * Add LLC header.
 	 */
 	if (type != 0) {
 		struct llc *l;
 		M_PREPEND(m, LLC_SNAPFRAMELEN, M_NOWAIT);
 		if (m == NULL)
 			senderr(ENOBUFS);
 		l = mtod(m, struct llc *);
 		l->llc_control = LLC_UI;
 		l->llc_dsap = l->llc_ssap = LLC_SNAP_LSAP;
 		l->llc_snap.org_code[0] =
 			l->llc_snap.org_code[1] =
 			l->llc_snap.org_code[2] = 0;
 		l->llc_snap.ether_type = htons(type);
 	}
 
 	/*
 	 * Add local net header.  If no space in first mbuf,
 	 * allocate another.
 	 */
 	M_PREPEND(m, FDDI_HDR_LEN, M_NOWAIT);
 	if (m == NULL)
 		senderr(ENOBUFS);
 	fh = mtod(m, struct fddi_header *);
 	fh->fddi_fc = FDDIFC_LLC_ASYNC|FDDIFC_LLC_PRIO4;
 	bcopy((caddr_t)edst, (caddr_t)fh->fddi_dhost, FDDI_ADDR_LEN);
   queue_it:
 	if (hdrcmplt)
 		bcopy((caddr_t)esrc, (caddr_t)fh->fddi_shost, FDDI_ADDR_LEN);
 	else
 		bcopy(IF_LLADDR(ifp), (caddr_t)fh->fddi_shost,
 			FDDI_ADDR_LEN);
 
 	/*
 	 * If a simplex interface, and the packet is being sent to our
 	 * Ethernet address or a broadcast address, loopback a copy.
 	 * XXX To make a simplex device behave exactly like a duplex
 	 * device, we should copy in the case of sending to our own
 	 * ethernet address (thus letting the original actually appear
 	 * on the wire). However, we don't do that here for security
 	 * reasons and compatibility with the original behavior.
 	 */
 	if ((ifp->if_flags & IFF_SIMPLEX) && (loop_copy != -1)) {
 		if ((m->m_flags & M_BCAST) || (loop_copy > 0)) {
 			struct mbuf *n;
-			n = m_copy(m, 0, (int)M_COPYALL);
+			n = m_copym(m, 0, M_COPYALL, M_NOWAIT);
 			(void) if_simloop(ifp, n, dst->sa_family,
 					  FDDI_HDR_LEN);
 	     	} else if (bcmp(fh->fddi_dhost, fh->fddi_shost,
 				FDDI_ADDR_LEN) == 0) {
 			(void) if_simloop(ifp, m, dst->sa_family,
 					  FDDI_HDR_LEN);
 			return (0);	/* XXX */
 		}
 	}
 
 	error = (ifp->if_transmit)(ifp, m);
 	if (error)
 		if_inc_counter(ifp, IFCOUNTER_OERRORS, 1);
 
 	return (error);
 
 bad:
 	if_inc_counter(ifp, IFCOUNTER_OERRORS, 1);
 	if (m)
 		m_freem(m);
 	return (error);
 }
 
 /*
  * Process a received FDDI packet.
  */
 static void
 fddi_input(ifp, m)
 	struct ifnet *ifp;
 	struct mbuf *m;
 {
 	int isr;
 	struct llc *l;
 	struct fddi_header *fh;
 
 	/*
 	 * Do consistency checks to verify assumptions
 	 * made by code past this point.
 	 */
 	if ((m->m_flags & M_PKTHDR) == 0) {
 		if_printf(ifp, "discard frame w/o packet header\n");
 		if_inc_counter(ifp, IFCOUNTER_IERRORS, 1);
 		m_freem(m);
 		return;
 	}
 	if (m->m_pkthdr.rcvif == NULL) {
 		if_printf(ifp, "discard frame w/o interface pointer\n");
 		if_inc_counter(ifp, IFCOUNTER_IERRORS, 1);
 		m_freem(m);
 		return;
         }
 
 	m = m_pullup(m, FDDI_HDR_LEN);
 	if (m == NULL) {
 		if_inc_counter(ifp, IFCOUNTER_IERRORS, 1);
 		goto dropanyway;
 	}
 	fh = mtod(m, struct fddi_header *);
 
 	/*
 	 * Discard packet if interface is not up.
 	 */
 	if (!((ifp->if_flags & IFF_UP) &&
 	    (ifp->if_drv_flags & IFF_DRV_RUNNING)))
 		goto dropanyway;
 
 	/*
 	 * Give bpf a chance at the packet.
 	 */
 	BPF_MTAP(ifp, m);
 
 	/*
 	 * Interface marked for monitoring; discard packet.
 	 */
 	if (ifp->if_flags & IFF_MONITOR) {
 		m_freem(m);
 		return;
 	}
 
 #ifdef MAC
 	mac_ifnet_create_mbuf(ifp, m);
 #endif
 
 	/*
 	 * Update interface statistics.
 	 */
 	if_inc_counter(ifp, IFCOUNTER_IBYTES, m->m_pkthdr.len);
 	getmicrotime(&ifp->if_lastchange);
 
 	/*
 	 * Discard non local unicast packets when interface
 	 * is in promiscuous mode.
 	 */
 	if ((ifp->if_flags & IFF_PROMISC) && ((fh->fddi_dhost[0] & 1) == 0) &&
 	    (bcmp(IF_LLADDR(ifp), (caddr_t)fh->fddi_dhost,
 	     FDDI_ADDR_LEN) != 0))
 		goto dropanyway;
 
 	/*
 	 * Set mbuf flags for bcast/mcast.
 	 */
 	if (fh->fddi_dhost[0] & 1) {
 		if (bcmp(ifp->if_broadcastaddr, fh->fddi_dhost,
 		    FDDI_ADDR_LEN) == 0)
 			m->m_flags |= M_BCAST;
 		else
 			m->m_flags |= M_MCAST;
 		if_inc_counter(ifp, IFCOUNTER_IMCASTS, 1);
 	}
 
 #ifdef M_LINK0
 	/*
 	 * If this has a LLC priority of 0, then mark it so upper
 	 * layers have a hint that it really came via a FDDI/Ethernet
 	 * bridge.
 	 */
 	if ((fh->fddi_fc & FDDIFC_LLC_PRIO7) == FDDIFC_LLC_PRIO0)
 		m->m_flags |= M_LINK0;
 #endif
 
 	/* Strip off FDDI header. */
 	m_adj(m, FDDI_HDR_LEN);
 
 	m = m_pullup(m, LLC_SNAPFRAMELEN);
 	if (m == 0) {
 		if_inc_counter(ifp, IFCOUNTER_IERRORS, 1);
 		goto dropanyway;
 	}
 	l = mtod(m, struct llc *);
 
 	switch (l->llc_dsap) {
 	case LLC_SNAP_LSAP:
 	{
 		u_int16_t type;
 		if ((l->llc_control != LLC_UI) ||
 		    (l->llc_ssap != LLC_SNAP_LSAP)) {
 			if_inc_counter(ifp, IFCOUNTER_NOPROTO, 1);
 			goto dropanyway;
 		}
 		if (l->llc_snap.org_code[0] != 0 ||
 		    l->llc_snap.org_code[1] != 0 ||
 		    l->llc_snap.org_code[2] != 0) {
 			if_inc_counter(ifp, IFCOUNTER_NOPROTO, 1);
 			goto dropanyway;
 		}
 
 		type = ntohs(l->llc_snap.ether_type);
 		m_adj(m, LLC_SNAPFRAMELEN);
 
 		switch (type) {
 #ifdef INET
 		case ETHERTYPE_IP:
 			isr = NETISR_IP;
 			break;
 
 		case ETHERTYPE_ARP:
 			if (ifp->if_flags & IFF_NOARP)
 				goto dropanyway;
 			isr = NETISR_ARP;
 			break;
 #endif
 #ifdef INET6
 		case ETHERTYPE_IPV6:
 			isr = NETISR_IPV6;
 			break;
 #endif
 #ifdef DECNET
 		case ETHERTYPE_DECNET:
 			isr = NETISR_DECNET;
 			break;
 #endif
 		default:
 			/* printf("fddi_input: unknown protocol 0x%x\n", type); */
 			if_inc_counter(ifp, IFCOUNTER_NOPROTO, 1);
 			goto dropanyway;
 		}
 		break;
 	}
 		
 	default:
 		/* printf("fddi_input: unknown dsap 0x%x\n", l->llc_dsap); */
 		if_inc_counter(ifp, IFCOUNTER_NOPROTO, 1);
 		goto dropanyway;
 	}
 	M_SETFIB(m, ifp->if_fib);
 	netisr_dispatch(isr, m);
 	return;
 
 dropanyway:
 	if_inc_counter(ifp, IFCOUNTER_IQDROPS, 1);
 	if (m)
 		m_freem(m);
 	return;
 }
 
 /*
  * Perform common duties while attaching to interface list
  */
 void
 fddi_ifattach(ifp, lla, bpf)
 	struct ifnet *ifp;
 	const u_int8_t *lla;
 	int bpf;
 {
 	struct ifaddr *ifa;
 	struct sockaddr_dl *sdl;
 
 	ifp->if_type = IFT_FDDI;
 	ifp->if_addrlen = FDDI_ADDR_LEN;
 	ifp->if_hdrlen = 21;
 
 	if_attach(ifp);         /* Must be called before additional assignments */
 
 	ifp->if_mtu = FDDIMTU;
 	ifp->if_output = fddi_output;
 	ifp->if_input = fddi_input;
 	ifp->if_resolvemulti = fddi_resolvemulti;
 	ifp->if_broadcastaddr = fddibroadcastaddr;
 	ifp->if_baudrate = 100000000;
 #ifdef IFF_NOTRAILERS
 	ifp->if_flags |= IFF_NOTRAILERS;
 #endif
 	ifa = ifp->if_addr;
 	KASSERT(ifa != NULL, ("%s: no lladdr!\n", __func__));
 
 	sdl = (struct sockaddr_dl *)ifa->ifa_addr;
 	sdl->sdl_type = IFT_FDDI;
 	sdl->sdl_alen = ifp->if_addrlen;
 	bcopy(lla, LLADDR(sdl), ifp->if_addrlen);
 
 	if (bpf)
 		bpfattach(ifp, DLT_FDDI, FDDI_HDR_LEN);
 
 	return;
 }
 
 void
 fddi_ifdetach(ifp, bpf)
 	struct ifnet *ifp;
 	int bpf;
 {
      
 	if (bpf)
 		bpfdetach(ifp);
 
 	if_detach(ifp);
 
 	return;
 }
 
 int
 fddi_ioctl (ifp, command, data)
 	struct ifnet *ifp;
 	u_long command;
 	caddr_t data;
 {
 	struct ifaddr *ifa;
 	struct ifreq *ifr;
 	int error;
 
 	ifa = (struct ifaddr *) data;
 	ifr = (struct ifreq *) data;
 	error = 0;
 
 	switch (command) {
 	case SIOCSIFADDR:
 		ifp->if_flags |= IFF_UP;
 
 		switch (ifa->ifa_addr->sa_family) {
 #ifdef INET
 		case AF_INET:	/* before arpwhohas */
 			ifp->if_init(ifp->if_softc);
 			arp_ifinit(ifp, ifa);
 			break;
 #endif
 		default:
 			ifp->if_init(ifp->if_softc);
 			break;
 		}
 		break;
 	case SIOCGIFADDR: {
 			struct sockaddr *sa;
 
 			sa = (struct sockaddr *) & ifr->ifr_data;
 			bcopy(IF_LLADDR(ifp),
 			      (caddr_t) sa->sa_data, FDDI_ADDR_LEN);
 
 		}
 		break;
 	case SIOCSIFMTU:
 		/*
 		 * Set the interface MTU.
 		 */
 		if (ifr->ifr_mtu > FDDIMTU) {
 			error = EINVAL;
 		} else {
 			ifp->if_mtu = ifr->ifr_mtu;
 		}
 		break;
 	default:
 		error = EINVAL;
 		break;
 	}
 
 	return (error);
 }
 
 static int
 fddi_resolvemulti(ifp, llsa, sa)
 	struct ifnet *ifp;
 	struct sockaddr **llsa;
 	struct sockaddr *sa;
 {
 	struct sockaddr_dl *sdl;
 #ifdef INET
 	struct sockaddr_in *sin;
 #endif
 #ifdef INET6
 	struct sockaddr_in6 *sin6;
 #endif
 	u_char *e_addr;
 
 	switch(sa->sa_family) {
 	case AF_LINK:
 		/*
 		 * No mapping needed. Just check that it's a valid MC address.
 		 */
 		sdl = (struct sockaddr_dl *)sa;
 		e_addr = LLADDR(sdl);
 		if ((e_addr[0] & 1) != 1)
 			return (EADDRNOTAVAIL);
 		*llsa = NULL;
 		return (0);
 
 #ifdef INET
 	case AF_INET:
 		sin = (struct sockaddr_in *)sa;
 		if (!IN_MULTICAST(ntohl(sin->sin_addr.s_addr)))
 			return (EADDRNOTAVAIL);
 		sdl = link_init_sdl(ifp, *llsa, IFT_FDDI);
 		sdl->sdl_nlen = 0;
 		sdl->sdl_alen = FDDI_ADDR_LEN;
 		sdl->sdl_slen = 0;
 		e_addr = LLADDR(sdl);
 		ETHER_MAP_IP_MULTICAST(&sin->sin_addr, e_addr);
 		*llsa = (struct sockaddr *)sdl;
 		return (0);
 #endif
 #ifdef INET6
 	case AF_INET6:
 		sin6 = (struct sockaddr_in6 *)sa;
 		if (IN6_IS_ADDR_UNSPECIFIED(&sin6->sin6_addr)) {
 			/*
 			 * An IP6 address of 0 means listen to all
 			 * of the Ethernet multicast address used for IP6.
 			 * (This is used for multicast routers.)
 			 */
 			ifp->if_flags |= IFF_ALLMULTI;
 			*llsa = NULL;
 			return (0);
 		}
 		if (!IN6_IS_ADDR_MULTICAST(&sin6->sin6_addr))
 			return (EADDRNOTAVAIL);
 		sdl = link_init_sdl(ifp, *llsa, IFT_FDDI);
 		sdl->sdl_nlen = 0;
 		sdl->sdl_alen = FDDI_ADDR_LEN;
 		sdl->sdl_slen = 0;
 		e_addr = LLADDR(sdl);
 		ETHER_MAP_IPV6_MULTICAST(&sin6->sin6_addr, e_addr);
 		*llsa = (struct sockaddr *)sdl;
 		return (0);
 #endif
 
 	default:
 		/*
 		 * Well, the text isn't quite right, but it's the name
 		 * that counts...
 		 */
 		return (EAFNOSUPPORT);
 	}
 
 	return (0);
 }
 
 static moduledata_t fddi_mod = {
 	"fddi",	/* module name */
 	NULL,	/* event handler */
 	0	/* extra data */
 };
 
 DECLARE_MODULE(fddi, fddi_mod, SI_SUB_PSEUDO, SI_ORDER_ANY);
 MODULE_VERSION(fddi, 1);
Index: head/sys/net/if_iso88025subr.c
===================================================================
--- head/sys/net/if_iso88025subr.c	(revision 305823)
+++ head/sys/net/if_iso88025subr.c	(revision 305824)
@@ -1,697 +1,697 @@
 /*-
  * Copyright (c) 1998, Larry Lile
  * All rights reserved.
  *
  * For latest sources and information on this driver, please
  * go to http://anarchy.stdio.com.
  *
  * Questions, comments or suggestions should be directed to
  * Larry Lile <lile@stdio.com>.
  * 
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice unmodified, this list of conditions, and the following
  *    disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  * $FreeBSD$
  *
  */
 
 /*
  *
  * General ISO 802.5 (Token Ring) support routines
  * 
  */
 
 #include "opt_inet.h"
 #include "opt_inet6.h"
 
 #include <sys/param.h>
 #include <sys/systm.h>
 #include <sys/kernel.h>
 #include <sys/malloc.h>
 #include <sys/mbuf.h>
 #include <sys/module.h>
 #include <sys/socket.h>
 #include <sys/sockio.h> 
 
 #include <net/if.h>
 #include <net/if_var.h>
 #include <net/if_arp.h>
 #include <net/if_dl.h>
 #include <net/if_llc.h>
 #include <net/if_types.h>
 #include <net/if_llatbl.h>
 
 #include <net/ethernet.h>
 #include <net/netisr.h>
 #include <net/route.h>
 #include <net/bpf.h>
 #include <net/iso88025.h>
 
 #if defined(INET) || defined(INET6)
 #include <netinet/in.h>
 #include <netinet/in_var.h>
 #include <netinet/if_ether.h>
 #endif
 #ifdef INET6
 #include <netinet6/nd6.h>
 #endif
 
 #include <security/mac/mac_framework.h>
 
 static const u_char iso88025_broadcastaddr[ISO88025_ADDR_LEN] =
 			{ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff };
 
 static int iso88025_resolvemulti (struct ifnet *, struct sockaddr **,
 				  struct sockaddr *);
 
 #define	senderr(e)	do { error = (e); goto bad; } while (0)
 
 /*
  * Perform common duties while attaching to interface list
  */
 void
 iso88025_ifattach(struct ifnet *ifp, const u_int8_t *lla, int bpf)
 {
     struct ifaddr *ifa;
     struct sockaddr_dl *sdl;
 
     ifa = NULL;
 
     ifp->if_type = IFT_ISO88025;
     ifp->if_addrlen = ISO88025_ADDR_LEN;
     ifp->if_hdrlen = ISO88025_HDR_LEN;
 
     if_attach(ifp);	/* Must be called before additional assignments */
 
     ifp->if_output = iso88025_output;
     ifp->if_input = iso88025_input;
     ifp->if_resolvemulti = iso88025_resolvemulti;
     ifp->if_broadcastaddr = iso88025_broadcastaddr;
 
     if (ifp->if_baudrate == 0)
         ifp->if_baudrate = TR_16MBPS; /* 16Mbit should be a safe default */
     if (ifp->if_mtu == 0)
         ifp->if_mtu = ISO88025_DEFAULT_MTU;
 
     ifa = ifp->if_addr;
     KASSERT(ifa != NULL, ("%s: no lladdr!\n", __func__));
 
     sdl = (struct sockaddr_dl *)ifa->ifa_addr;
     sdl->sdl_type = IFT_ISO88025;
     sdl->sdl_alen = ifp->if_addrlen;
     bcopy(lla, LLADDR(sdl), ifp->if_addrlen);
 
     if (bpf)
         bpfattach(ifp, DLT_IEEE802, ISO88025_HDR_LEN);
 
     return;
 }
 
 /*
  * Perform common duties while detaching a Token Ring interface
  */
 void
 iso88025_ifdetach(ifp, bpf)
         struct ifnet *ifp;
         int bpf;
 {
 
 	if (bpf)
                 bpfdetach(ifp);
 
 	if_detach(ifp);
 
 	return;
 }
 
 int
 iso88025_ioctl(struct ifnet *ifp, u_long command, caddr_t data)
 {
         struct ifaddr *ifa;
         struct ifreq *ifr;
         int error;
 
 	ifa = (struct ifaddr *) data;
 	ifr = (struct ifreq *) data;
 	error = 0;
 
         switch (command) {
         case SIOCSIFADDR:
                 ifp->if_flags |= IFF_UP;
 
                 switch (ifa->ifa_addr->sa_family) {
 #ifdef INET
                 case AF_INET:
                         ifp->if_init(ifp->if_softc);    /* before arpwhohas */
                         arp_ifinit(ifp, ifa);
                         break;
 #endif	/* INET */
                 default:
                         ifp->if_init(ifp->if_softc);
                         break;
                 }
                 break;
 
         case SIOCGIFADDR: {
                         struct sockaddr *sa;
 
                         sa = (struct sockaddr *) & ifr->ifr_data;
                         bcopy(IF_LLADDR(ifp),
                               (caddr_t) sa->sa_data, ISO88025_ADDR_LEN);
                 }
                 break;
 
         case SIOCSIFMTU:
                 /*
                  * Set the interface MTU.
                  */
                 if (ifr->ifr_mtu > ISO88025_MAX_MTU) {
                         error = EINVAL;
                 } else {
                         ifp->if_mtu = ifr->ifr_mtu;
                 }
                 break;
 	default:
 		error = EINVAL;			/* XXX netbsd has ENOTTY??? */
 		break;
         }
 
         return (error);
 }
 
 /*
  * ISO88025 encapsulation
  */
 int
 iso88025_output(struct ifnet *ifp, struct mbuf *m, const struct sockaddr *dst,
 	struct route *ro)
 {
 	u_int16_t snap_type = 0;
 	int loop_copy = 0, error = 0, rif_len = 0;
 	u_char edst[ISO88025_ADDR_LEN];
 	struct iso88025_header *th;
 	struct iso88025_header gen_th;
 	struct sockaddr_dl *sdl = NULL;
 	struct rtentry *rt0 = NULL;
 	int is_gw = 0;
 
 	if (ro != NULL)
 		is_gw = (ro->ro_flags & RT_HAS_GW) != 0;
 #ifdef MAC
 	error = mac_ifnet_check_transmit(ifp, m);
 	if (error)
 		senderr(error);
 #endif
 
 	if (ifp->if_flags & IFF_MONITOR)
 		senderr(ENETDOWN);
 	if (!((ifp->if_flags & IFF_UP) &&
 	    (ifp->if_drv_flags & IFF_DRV_RUNNING)))
 		senderr(ENETDOWN);
 	getmicrotime(&ifp->if_lastchange);
 
 	/* Calculate routing info length based on arp table entry */
 	/* XXX any better way to do this ? */
 
 	if (rt0 && (sdl = (struct sockaddr_dl *)rt0->rt_gateway))
 		if (SDL_ISO88025(sdl)->trld_rcf != 0)
 			rif_len = TR_RCF_RIFLEN(SDL_ISO88025(sdl)->trld_rcf);
 
 	/* Generate a generic 802.5 header for the packet */
 	gen_th.ac = TR_AC;
 	gen_th.fc = TR_LLC_FRAME;
 	(void)memcpy((caddr_t)gen_th.iso88025_shost, IF_LLADDR(ifp),
 		     ISO88025_ADDR_LEN);
 	if (rif_len) {
 		gen_th.iso88025_shost[0] |= TR_RII;
 		if (rif_len > 2) {
 			gen_th.rcf = SDL_ISO88025(sdl)->trld_rcf;
 			(void)memcpy((caddr_t)gen_th.rd,
 				(caddr_t)SDL_ISO88025(sdl)->trld_route,
 				rif_len - 2);
 		}
 	}
 	
 	switch (dst->sa_family) {
 #ifdef INET
 	case AF_INET:
 		error = arpresolve(ifp, is_gw, m, dst, edst, NULL, NULL);
 		if (error)
 			return (error == EWOULDBLOCK ? 0 : error);
 		snap_type = ETHERTYPE_IP;
 		break;
 	case AF_ARP:
 	{
 		struct arphdr *ah;
 		ah = mtod(m, struct arphdr *);
 		ah->ar_hrd = htons(ARPHRD_IEEE802);
 
 		loop_copy = -1; /* if this is for us, don't do it */
 
 		switch(ntohs(ah->ar_op)) {
 		case ARPOP_REVREQUEST:
 		case ARPOP_REVREPLY:
 			snap_type = ETHERTYPE_REVARP;
 			break;
 		case ARPOP_REQUEST:
 		case ARPOP_REPLY:
 		default:
 			snap_type = ETHERTYPE_ARP;
 			break;
 		}
 
 		if (m->m_flags & M_BCAST)
 			bcopy(ifp->if_broadcastaddr, edst, ISO88025_ADDR_LEN);
 		else
 			bcopy(ar_tha(ah), edst, ISO88025_ADDR_LEN);
 
 	}
 	break;
 #endif	/* INET */
 #ifdef INET6
 	case AF_INET6:
 		error = nd6_resolve(ifp, is_gw, m, dst, edst, NULL, NULL);
 		if (error)
 			return (error == EWOULDBLOCK ? 0 : error);
 		snap_type = ETHERTYPE_IPV6;
 		break;
 #endif	/* INET6 */
 	case AF_UNSPEC:
 	{
 		const struct iso88025_sockaddr_data *sd;
 		/*
 		 * For AF_UNSPEC sockaddr.sa_data must contain all of the
 		 * mac information needed to send the packet.  This allows
 		 * full mac, llc, and source routing function to be controlled.
 		 * llc and source routing information must already be in the
 		 * mbuf provided, ac/fc are set in sa_data.  sockaddr.sa_data
 		 * should be an iso88025_sockaddr_data structure see iso88025.h
 		 */
                 loop_copy = -1;
 		sd = (const struct iso88025_sockaddr_data *)dst->sa_data;
 		gen_th.ac = sd->ac;
 		gen_th.fc = sd->fc;
 		(void)memcpy(edst, sd->ether_dhost, ISO88025_ADDR_LEN);
 		(void)memcpy(gen_th.iso88025_shost, sd->ether_shost,
 		    ISO88025_ADDR_LEN);
 		rif_len = 0;
 		break;
 	}
 	default:
 		if_printf(ifp, "can't handle af%d\n", dst->sa_family);
 		senderr(EAFNOSUPPORT);
 		break;
 	}
 
 	/*
 	 * Add LLC header.
 	 */
 	if (snap_type != 0) {
         	struct llc *l;
 		M_PREPEND(m, LLC_SNAPFRAMELEN, M_NOWAIT);
 		if (m == NULL)
 			senderr(ENOBUFS);
 		l = mtod(m, struct llc *);
 		l->llc_control = LLC_UI;
 		l->llc_dsap = l->llc_ssap = LLC_SNAP_LSAP;
 		l->llc_snap.org_code[0] =
 			l->llc_snap.org_code[1] =
 			l->llc_snap.org_code[2] = 0;
 		l->llc_snap.ether_type = htons(snap_type);
 	}
 
 	/*
 	 * Add local net header.  If no space in first mbuf,
 	 * allocate another.
 	 */
 	M_PREPEND(m, ISO88025_HDR_LEN + rif_len, M_NOWAIT);
 	if (m == NULL)
 		senderr(ENOBUFS);
 	th = mtod(m, struct iso88025_header *);
 	bcopy((caddr_t)edst, (caddr_t)&gen_th.iso88025_dhost, ISO88025_ADDR_LEN);
 
 	/* Copy as much of the generic header as is needed into the mbuf */
 	memcpy(th, &gen_th, ISO88025_HDR_LEN + rif_len);
 
         /*
          * If a simplex interface, and the packet is being sent to our
          * Ethernet address or a broadcast address, loopback a copy.
          * XXX To make a simplex device behave exactly like a duplex
          * device, we should copy in the case of sending to our own
          * ethernet address (thus letting the original actually appear
          * on the wire). However, we don't do that here for security
          * reasons and compatibility with the original behavior.
          */     
         if ((ifp->if_flags & IFF_SIMPLEX) && (loop_copy != -1)) {
                 if ((m->m_flags & M_BCAST) || (loop_copy > 0)) { 
                         struct mbuf *n;
-			n = m_copy(m, 0, (int)M_COPYALL);
+			n = m_copym(m, 0, M_COPYALL, M_NOWAIT);
                         (void) if_simloop(ifp, n, dst->sa_family,
 					  ISO88025_HDR_LEN);
                 } else if (bcmp(th->iso88025_dhost, th->iso88025_shost,
 				 ETHER_ADDR_LEN) == 0) {
 			(void) if_simloop(ifp, m, dst->sa_family,
 					  ISO88025_HDR_LEN);
                        	return(0);      /* XXX */
 		}       
         }      
 
 	IFQ_HANDOFF_ADJ(ifp, m, ISO88025_HDR_LEN + LLC_SNAPFRAMELEN, error);
 	if (error) {
 		printf("iso88025_output: packet dropped QFULL.\n");
 		if_inc_counter(ifp, IFCOUNTER_OERRORS, 1);
 	}
 	return (error);
 
 bad:
 	if_inc_counter(ifp, IFCOUNTER_OERRORS, 1);
 	if (m)
 		m_freem(m);
 	return (error);
 }
 
 /*
  * ISO 88025 de-encapsulation
  */
 void
 iso88025_input(ifp, m)
 	struct ifnet *ifp;
 	struct mbuf *m;
 {
 	struct iso88025_header *th;
 	struct llc *l;
 	int isr;
 	int mac_hdr_len;
 
 	/*
 	 * Do consistency checks to verify assumptions
 	 * made by code past this point.
 	 */
 	if ((m->m_flags & M_PKTHDR) == 0) {
 		if_printf(ifp, "discard frame w/o packet header\n");
 		if_inc_counter(ifp, IFCOUNTER_IERRORS, 1);
 		m_freem(m);
 		return;
 	}
 	if (m->m_pkthdr.rcvif == NULL) {
 		if_printf(ifp, "discard frame w/o interface pointer\n");
 		if_inc_counter(ifp, IFCOUNTER_IERRORS, 1);
  		m_freem(m);
 		return;
 	}
 
 	m = m_pullup(m, ISO88025_HDR_LEN);
 	if (m == NULL) {
 		if_inc_counter(ifp, IFCOUNTER_IERRORS, 1);
 		goto dropanyway;
 	}
 	th = mtod(m, struct iso88025_header *);
 
 	/*
 	 * Discard packet if interface is not up.
 	 */
 	if (!((ifp->if_flags & IFF_UP) &&
 	    (ifp->if_drv_flags & IFF_DRV_RUNNING)))
 		goto dropanyway;
 
 	/*
 	 * Give bpf a chance at the packet.
 	 */
 	BPF_MTAP(ifp, m);
 
 	/*
 	 * Interface marked for monitoring; discard packet.
 	 */
 	if (ifp->if_flags & IFF_MONITOR) {
 		m_freem(m);
 		return;
 	}
 
 #ifdef MAC
 	mac_ifnet_create_mbuf(ifp, m);
 #endif
 
 	/*
 	 * Update interface statistics.
 	 */
 	if_inc_counter(ifp, IFCOUNTER_IBYTES, m->m_pkthdr.len);
 	getmicrotime(&ifp->if_lastchange);
 
 	/*
 	 * Discard non local unicast packets when interface
 	 * is in promiscuous mode.
 	 */
 	if ((ifp->if_flags & IFF_PROMISC) &&
 	    ((th->iso88025_dhost[0] & 1) == 0) &&
 	     (bcmp(IF_LLADDR(ifp), (caddr_t) th->iso88025_dhost,
 	     ISO88025_ADDR_LEN) != 0))
 		goto dropanyway;
 
 	/*
 	 * Set mbuf flags for bcast/mcast.
 	 */
 	if (th->iso88025_dhost[0] & 1) {
 		if (bcmp(iso88025_broadcastaddr, th->iso88025_dhost,
 		    ISO88025_ADDR_LEN) == 0)
 			m->m_flags |= M_BCAST;
 		else
 			m->m_flags |= M_MCAST;
 		if_inc_counter(ifp, IFCOUNTER_IMCASTS, 1);
 	}
 
 	mac_hdr_len = ISO88025_HDR_LEN;
 	/* Check for source routing info */
 	if (th->iso88025_shost[0] & TR_RII)
 		mac_hdr_len += TR_RCF_RIFLEN(th->rcf);
 
 	/* Strip off ISO88025 header. */
 	m_adj(m, mac_hdr_len);
 
 	m = m_pullup(m, LLC_SNAPFRAMELEN);
 	if (m == 0) {
 		if_inc_counter(ifp, IFCOUNTER_IERRORS, 1);
 		goto dropanyway;
 	}
 	l = mtod(m, struct llc *);
 
 	switch (l->llc_dsap) {
 	case LLC_SNAP_LSAP: {
 		u_int16_t type;
 		if ((l->llc_control != LLC_UI) ||
 		    (l->llc_ssap != LLC_SNAP_LSAP)) {
 			if_inc_counter(ifp, IFCOUNTER_NOPROTO, 1);
 			goto dropanyway;
 		}
 
 		if (l->llc_snap.org_code[0] != 0 ||
 		    l->llc_snap.org_code[1] != 0 ||
 		    l->llc_snap.org_code[2] != 0) {
 			if_inc_counter(ifp, IFCOUNTER_NOPROTO, 1);
 			goto dropanyway;
 		}
 
 		type = ntohs(l->llc_snap.ether_type);
 		m_adj(m, LLC_SNAPFRAMELEN);
 		switch (type) {
 #ifdef INET
 		case ETHERTYPE_IP:
 			th->iso88025_shost[0] &= ~(TR_RII); 
 			isr = NETISR_IP;
 			break;
 
 		case ETHERTYPE_ARP:
 			if (ifp->if_flags & IFF_NOARP)
 				goto dropanyway;
 			isr = NETISR_ARP;
 			break;
 #endif	/* INET */
 #ifdef INET6
 		case ETHERTYPE_IPV6:
 			th->iso88025_shost[0] &= ~(TR_RII); 
 			isr = NETISR_IPV6;
 			break;
 #endif	/* INET6 */
 		default:
 			printf("iso88025_input: unexpected llc_snap ether_type  0x%02x\n", type);
 			if_inc_counter(ifp, IFCOUNTER_NOPROTO, 1);
 			goto dropanyway;
 		}
 		break;
 	}
 #ifdef ISO
 	case LLC_ISO_LSAP:
 		switch (l->llc_control) {
 		case LLC_UI:
 			if_inc_counter(ifp, IFCOUNTER_NOPROTO, 1);
 			goto dropanyway;
 			break;
                 case LLC_XID:
                 case LLC_XID_P:
 			if(m->m_len < ISO88025_ADDR_LEN)
 				goto dropanyway;
 			l->llc_window = 0;
 			l->llc_fid = 9;  
 			l->llc_class = 1;
 			l->llc_dsap = l->llc_ssap = 0;
 			/* Fall through to */  
 		case LLC_TEST:
 		case LLC_TEST_P:
 		{
 			struct sockaddr sa;
 			struct iso88025_sockaddr_data *th2;
 			int i;
 			u_char c;
 
 			c = l->llc_dsap;
 
 			if (th->iso88025_shost[0] & TR_RII) { /* XXX */
 				printf("iso88025_input: dropping source routed LLC_TEST\n");
 				goto dropanyway;
 			}
 			l->llc_dsap = l->llc_ssap;
 			l->llc_ssap = c;
 			if (m->m_flags & (M_BCAST | M_MCAST))
 				bcopy((caddr_t)IF_LLADDR(ifp),
 				      (caddr_t)th->iso88025_dhost,
 					ISO88025_ADDR_LEN);
 			sa.sa_family = AF_UNSPEC;
 			sa.sa_len = sizeof(sa);
 			th2 = (struct iso88025_sockaddr_data *)sa.sa_data;
 			for (i = 0; i < ISO88025_ADDR_LEN; i++) {
 				th2->ether_shost[i] = c = th->iso88025_dhost[i];
 				th2->ether_dhost[i] = th->iso88025_dhost[i] =
 					th->iso88025_shost[i];
 				th->iso88025_shost[i] = c;
 			}
 			th2->ac = TR_AC;
 			th2->fc = TR_LLC_FRAME;
 			ifp->if_output(ifp, m, &sa, NULL);
 			return;
 		}
 		default:
 			printf("iso88025_input: unexpected llc control 0x%02x\n", l->llc_control);
 			if_inc_counter(ifp, IFCOUNTER_NOPROTO, 1);
 			goto dropanyway;
 			break;
 		}
 		break;
 #endif	/* ISO */
 	default:
 		printf("iso88025_input: unknown dsap 0x%x\n", l->llc_dsap);
 		if_inc_counter(ifp, IFCOUNTER_NOPROTO, 1);
 		goto dropanyway;
 		break;
 	}
 
 	M_SETFIB(m, ifp->if_fib);
 	netisr_dispatch(isr, m);
 	return;
 
 dropanyway:
 	if_inc_counter(ifp, IFCOUNTER_IQDROPS, 1);
 	if (m)
 		m_freem(m);
 	return;
 }
 
 static int
 iso88025_resolvemulti (ifp, llsa, sa)
 	struct ifnet *ifp;
 	struct sockaddr **llsa;
 	struct sockaddr *sa;
 {
 	struct sockaddr_dl *sdl;
 #ifdef INET
 	struct sockaddr_in *sin;
 #endif
 #ifdef INET6
 	struct sockaddr_in6 *sin6;
 #endif
 	u_char *e_addr;
 
 	switch(sa->sa_family) {
 	case AF_LINK:
 		/*
 		 * No mapping needed. Just check that it's a valid MC address.
 		 */
 		sdl = (struct sockaddr_dl *)sa;
 		e_addr = LLADDR(sdl);
 		if ((e_addr[0] & 1) != 1) {
 			return (EADDRNOTAVAIL);
 		}
 		*llsa = NULL;
 		return (0);
 
 #ifdef INET
 	case AF_INET:
 		sin = (struct sockaddr_in *)sa;
 		if (!IN_MULTICAST(ntohl(sin->sin_addr.s_addr))) {
 			return (EADDRNOTAVAIL);
 		}
 		sdl = link_init_sdl(ifp, *llsa, IFT_ISO88025);
 		sdl->sdl_alen = ISO88025_ADDR_LEN;
 		e_addr = LLADDR(sdl);
 		ETHER_MAP_IP_MULTICAST(&sin->sin_addr, e_addr);
 		*llsa = (struct sockaddr *)sdl;
 		return (0);
 #endif
 #ifdef INET6
 	case AF_INET6:
 		sin6 = (struct sockaddr_in6 *)sa;
 		if (IN6_IS_ADDR_UNSPECIFIED(&sin6->sin6_addr)) {
 			/*
 			 * An IP6 address of 0 means listen to all
 			 * of the Ethernet multicast address used for IP6.
 			 * (This is used for multicast routers.)
 			 */
 			ifp->if_flags |= IFF_ALLMULTI;
 			*llsa = NULL;
 			return (0);
 		}
 		if (!IN6_IS_ADDR_MULTICAST(&sin6->sin6_addr)) {
 			return (EADDRNOTAVAIL);
 		}
 		sdl = link_init_sdl(ifp, *llsa, IFT_ISO88025);
 		sdl->sdl_alen = ISO88025_ADDR_LEN;
 		e_addr = LLADDR(sdl);
 		ETHER_MAP_IPV6_MULTICAST(&sin6->sin6_addr, e_addr);
 		*llsa = (struct sockaddr *)sdl;
 		return (0);
 #endif
 
 	default:
 		/*
 		 * Well, the text isn't quite right, but it's the name
 		 * that counts...
 		 */
 		return (EAFNOSUPPORT);
 	}
 
 	return (0);
 }
 
 static moduledata_t iso88025_mod = {
 	.name = "iso88025",
 };
 
 DECLARE_MODULE(iso88025, iso88025_mod, SI_SUB_PSEUDO, SI_ORDER_ANY);
 MODULE_VERSION(iso88025, 1);
Index: head/sys/net/raw_usrreq.c
===================================================================
--- head/sys/net/raw_usrreq.c	(revision 305823)
+++ head/sys/net/raw_usrreq.c	(revision 305824)
@@ -1,274 +1,274 @@
 /*-
  * Copyright (c) 1980, 1986, 1993
  *	The Regents of the University of California.
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 4. Neither the name of the University nor the names of its contributors
  *    may be used to endorse or promote products derived from this software
  *    without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  *	@(#)raw_usrreq.c	8.1 (Berkeley) 6/10/93
  * $FreeBSD$
  */
 
 #include <sys/param.h>
 #include <sys/kernel.h>
 #include <sys/lock.h>
 #include <sys/malloc.h>
 #include <sys/mbuf.h>
 #include <sys/mutex.h>
 #include <sys/priv.h>
 #include <sys/protosw.h>
 #include <sys/signalvar.h>
 #include <sys/socket.h>
 #include <sys/socketvar.h>
 #include <sys/sx.h>
 #include <sys/systm.h>
 
 #include <net/if.h>
 #include <net/vnet.h>
 #include <net/raw_cb.h>
 
 MTX_SYSINIT(rawcb_mtx, &rawcb_mtx, "rawcb", MTX_DEF);
 
 /*
  * Initialize raw connection block q.
  */
 void
 raw_init(void)
 {
 
 	LIST_INIT(&V_rawcb_list);
 }
 
 /*
  * Raw protocol input routine.  Find the socket associated with the packet(s)
  * and move them over.  If nothing exists for this packet, drop it.
  */
 /*
  * Raw protocol interface.
  */
 void
 raw_input(struct mbuf *m0, struct sockproto *proto, struct sockaddr *src)
 {
 
 	return (raw_input_ext(m0, proto, src, NULL));
 }
 
 void
 raw_input_ext(struct mbuf *m0, struct sockproto *proto, struct sockaddr *src,
     raw_input_cb_fn cb)
 {
 	struct rawcb *rp;
 	struct mbuf *m = m0;
 	struct socket *last;
 
 	last = NULL;
 	mtx_lock(&rawcb_mtx);
 	LIST_FOREACH(rp, &V_rawcb_list, list) {
 		if (rp->rcb_proto.sp_family != proto->sp_family)
 			continue;
 		if (rp->rcb_proto.sp_protocol  &&
 		    rp->rcb_proto.sp_protocol != proto->sp_protocol)
 			continue;
 		if (cb != NULL && (*cb)(m, proto, src, rp) != 0)
 			continue;
 		if (last) {
 			struct mbuf *n;
-			n = m_copy(m, 0, (int)M_COPYALL);
+			n = m_copym(m, 0, M_COPYALL, M_NOWAIT);
 			if (n) {
 				if (sbappendaddr(&last->so_rcv, src,
 				    n, (struct mbuf *)0) == 0)
 					/* should notify about lost packet */
 					m_freem(n);
 				else
 					sorwakeup(last);
 			}
 		}
 		last = rp->rcb_socket;
 	}
 	if (last) {
 		if (sbappendaddr(&last->so_rcv, src,
 		    m, (struct mbuf *)0) == 0)
 			m_freem(m);
 		else
 			sorwakeup(last);
 	} else
 		m_freem(m);
 	mtx_unlock(&rawcb_mtx);
 }
 
 /*ARGSUSED*/
 void
 raw_ctlinput(int cmd, struct sockaddr *arg, void *dummy)
 {
 
 	if (cmd < 0 || cmd >= PRC_NCMDS)
 		return;
 	/* INCOMPLETE */
 }
 
 static void
 raw_uabort(struct socket *so)
 {
 
 	KASSERT(sotorawcb(so) != NULL, ("raw_uabort: rp == NULL"));
 
 	soisdisconnected(so);
 }
 
 static void
 raw_uclose(struct socket *so)
 {
 
 	KASSERT(sotorawcb(so) != NULL, ("raw_uabort: rp == NULL"));
 
 	soisdisconnected(so);
 }
 
 /* pru_accept is EOPNOTSUPP */
 
 static int
 raw_uattach(struct socket *so, int proto, struct thread *td)
 {
 	int error;
 
 	/*
 	 * Implementors of raw sockets will already have allocated the PCB,
 	 * so it must be non-NULL here.
 	 */
 	KASSERT(sotorawcb(so) != NULL, ("raw_uattach: so_pcb == NULL"));
 
 	if (td != NULL) {
 		error = priv_check(td, PRIV_NET_RAW);
 		if (error)
 			return (error);
 	}
 	return (raw_attach(so, proto));
 }
 
 static int
 raw_ubind(struct socket *so, struct sockaddr *nam, struct thread *td)
 {
 
 	return (EINVAL);
 }
 
 static int
 raw_uconnect(struct socket *so, struct sockaddr *nam, struct thread *td)
 {
 
 	return (EINVAL);
 }
 
 /* pru_connect2 is EOPNOTSUPP */
 /* pru_control is EOPNOTSUPP */
 
 static void
 raw_udetach(struct socket *so)
 {
 	struct rawcb *rp = sotorawcb(so);
 
 	KASSERT(rp != NULL, ("raw_udetach: rp == NULL"));
 
 	raw_detach(rp);
 }
 
 static int
 raw_udisconnect(struct socket *so)
 {
 
 	KASSERT(sotorawcb(so) != NULL, ("raw_udisconnect: rp == NULL"));
 
 	return (ENOTCONN);
 }
 
 /* pru_listen is EOPNOTSUPP */
 
 static int
 raw_upeeraddr(struct socket *so, struct sockaddr **nam)
 {
 
 	KASSERT(sotorawcb(so) != NULL, ("raw_upeeraddr: rp == NULL"));
 
 	return (ENOTCONN);
 }
 
 /* pru_rcvd is EOPNOTSUPP */
 /* pru_rcvoob is EOPNOTSUPP */
 
 static int
 raw_usend(struct socket *so, int flags, struct mbuf *m, struct sockaddr *nam,
     struct mbuf *control, struct thread *td)
 {
 
 	KASSERT(sotorawcb(so) != NULL, ("raw_usend: rp == NULL"));
 
 	if ((flags & PRUS_OOB) || (control && control->m_len)) {
 		/* XXXRW: Should control also be freed here? */
 		if (m != NULL)
 			m_freem(m);
 		return (EOPNOTSUPP);
 	}
 
 	/*
 	 * For historical (bad?) reasons, we effectively ignore the address
 	 * argument to sendto(2).  Perhaps we should return an error instead?
 	 */
 	return ((*so->so_proto->pr_output)(m, so));
 }
 
 /* pru_sense is null */
 
 static int
 raw_ushutdown(struct socket *so)
 {
 
 	KASSERT(sotorawcb(so) != NULL, ("raw_ushutdown: rp == NULL"));
 
 	socantsendmore(so);
 	return (0);
 }
 
 static int
 raw_usockaddr(struct socket *so, struct sockaddr **nam)
 {
 
 	KASSERT(sotorawcb(so) != NULL, ("raw_usockaddr: rp == NULL"));
 
 	return (EINVAL);
 }
 
 struct pr_usrreqs raw_usrreqs = {
 	.pru_abort =		raw_uabort,
 	.pru_attach =		raw_uattach,
 	.pru_bind =		raw_ubind,
 	.pru_connect =		raw_uconnect,
 	.pru_detach =		raw_udetach, 
 	.pru_disconnect =	raw_udisconnect,
 	.pru_peeraddr =		raw_upeeraddr,
 	.pru_send =		raw_usend,
 	.pru_shutdown =		raw_ushutdown,
 	.pru_sockaddr =		raw_usockaddr,
 	.pru_close =		raw_uclose,
 };
Index: head/sys/netinet/ip_input.c
===================================================================
--- head/sys/netinet/ip_input.c	(revision 305823)
+++ head/sys/netinet/ip_input.c	(revision 305824)
@@ -1,1367 +1,1367 @@
 /*-
  * Copyright (c) 1982, 1986, 1988, 1993
  *	The Regents of the University of California.  All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 4. Neither the name of the University nor the names of its contributors
  *    may be used to endorse or promote products derived from this software
  *    without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  *	@(#)ip_input.c	8.2 (Berkeley) 1/4/94
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include "opt_bootp.h"
 #include "opt_ipstealth.h"
 #include "opt_ipsec.h"
 #include "opt_route.h"
 #include "opt_rss.h"
 
 #include <sys/param.h>
 #include <sys/systm.h>
 #include <sys/hhook.h>
 #include <sys/mbuf.h>
 #include <sys/malloc.h>
 #include <sys/domain.h>
 #include <sys/protosw.h>
 #include <sys/socket.h>
 #include <sys/time.h>
 #include <sys/kernel.h>
 #include <sys/lock.h>
 #include <sys/rmlock.h>
 #include <sys/rwlock.h>
 #include <sys/sdt.h>
 #include <sys/syslog.h>
 #include <sys/sysctl.h>
 
 #include <net/pfil.h>
 #include <net/if.h>
 #include <net/if_types.h>
 #include <net/if_var.h>
 #include <net/if_dl.h>
 #include <net/route.h>
 #include <net/netisr.h>
 #include <net/rss_config.h>
 #include <net/vnet.h>
 
 #include <netinet/in.h>
 #include <netinet/in_kdtrace.h>
 #include <netinet/in_systm.h>
 #include <netinet/in_var.h>
 #include <netinet/ip.h>
 #include <netinet/in_pcb.h>
 #include <netinet/ip_var.h>
 #include <netinet/ip_fw.h>
 #include <netinet/ip_icmp.h>
 #include <netinet/ip_options.h>
 #include <machine/in_cksum.h>
 #include <netinet/ip_carp.h>
 #ifdef IPSEC
 #include <netinet/ip_ipsec.h>
 #include <netipsec/ipsec.h>
 #include <netipsec/key.h>
 #endif /* IPSEC */
 #include <netinet/in_rss.h>
 
 #include <sys/socketvar.h>
 
 #include <security/mac/mac_framework.h>
 
 #ifdef CTASSERT
 CTASSERT(sizeof(struct ip) == 20);
 #endif
 
 /* IP reassembly functions are defined in ip_reass.c. */
 extern void ipreass_init(void);
 extern void ipreass_drain(void);
 extern void ipreass_slowtimo(void);
 #ifdef VIMAGE
 extern void ipreass_destroy(void);
 #endif
 
 struct rmlock in_ifaddr_lock;
 RM_SYSINIT(in_ifaddr_lock, &in_ifaddr_lock, "in_ifaddr_lock");
 
 VNET_DEFINE(int, rsvp_on);
 
 VNET_DEFINE(int, ipforwarding);
 SYSCTL_INT(_net_inet_ip, IPCTL_FORWARDING, forwarding, CTLFLAG_VNET | CTLFLAG_RW,
     &VNET_NAME(ipforwarding), 0,
     "Enable IP forwarding between interfaces");
 
 static VNET_DEFINE(int, ipsendredirects) = 1;	/* XXX */
 #define	V_ipsendredirects	VNET(ipsendredirects)
 SYSCTL_INT(_net_inet_ip, IPCTL_SENDREDIRECTS, redirect, CTLFLAG_VNET | CTLFLAG_RW,
     &VNET_NAME(ipsendredirects), 0,
     "Enable sending IP redirects");
 
 /*
  * XXX - Setting ip_checkinterface mostly implements the receive side of
  * the Strong ES model described in RFC 1122, but since the routing table
  * and transmit implementation do not implement the Strong ES model,
  * setting this to 1 results in an odd hybrid.
  *
  * XXX - ip_checkinterface currently must be disabled if you use ipnat
  * to translate the destination address to another local interface.
  *
  * XXX - ip_checkinterface must be disabled if you add IP aliases
  * to the loopback interface instead of the interface where the
  * packets for those addresses are received.
  */
 static VNET_DEFINE(int, ip_checkinterface);
 #define	V_ip_checkinterface	VNET(ip_checkinterface)
 SYSCTL_INT(_net_inet_ip, OID_AUTO, check_interface, CTLFLAG_VNET | CTLFLAG_RW,
     &VNET_NAME(ip_checkinterface), 0,
     "Verify packet arrives on correct interface");
 
 VNET_DEFINE(struct pfil_head, inet_pfil_hook);	/* Packet filter hooks */
 
 static struct netisr_handler ip_nh = {
 	.nh_name = "ip",
 	.nh_handler = ip_input,
 	.nh_proto = NETISR_IP,
 #ifdef	RSS
 	.nh_m2cpuid = rss_soft_m2cpuid_v4,
 	.nh_policy = NETISR_POLICY_CPU,
 	.nh_dispatch = NETISR_DISPATCH_HYBRID,
 #else
 	.nh_policy = NETISR_POLICY_FLOW,
 #endif
 };
 
 #ifdef	RSS
 /*
  * Directly dispatched frames are currently assumed
  * to have a flowid already calculated.
  *
  * It should likely have something that assert it
  * actually has valid flow details.
  */
 static struct netisr_handler ip_direct_nh = {
 	.nh_name = "ip_direct",
 	.nh_handler = ip_direct_input,
 	.nh_proto = NETISR_IP_DIRECT,
 	.nh_m2cpuid = rss_soft_m2cpuid_v4,
 	.nh_policy = NETISR_POLICY_CPU,
 	.nh_dispatch = NETISR_DISPATCH_HYBRID,
 };
 #endif
 
 extern	struct domain inetdomain;
 extern	struct protosw inetsw[];
 u_char	ip_protox[IPPROTO_MAX];
 VNET_DEFINE(struct in_ifaddrhead, in_ifaddrhead);  /* first inet address */
 VNET_DEFINE(struct in_ifaddrhashhead *, in_ifaddrhashtbl); /* inet addr hash table  */
 VNET_DEFINE(u_long, in_ifaddrhmask);		/* mask for hash table */
 
 #ifdef IPCTL_DEFMTU
 SYSCTL_INT(_net_inet_ip, IPCTL_DEFMTU, mtu, CTLFLAG_RW,
     &ip_mtu, 0, "Default MTU");
 #endif
 
 #ifdef IPSTEALTH
 VNET_DEFINE(int, ipstealth);
 SYSCTL_INT(_net_inet_ip, OID_AUTO, stealth, CTLFLAG_VNET | CTLFLAG_RW,
     &VNET_NAME(ipstealth), 0,
     "IP stealth mode, no TTL decrementation on forwarding");
 #endif
 
 /*
  * IP statistics are stored in the "array" of counter(9)s.
  */
 VNET_PCPUSTAT_DEFINE(struct ipstat, ipstat);
 VNET_PCPUSTAT_SYSINIT(ipstat);
 SYSCTL_VNET_PCPUSTAT(_net_inet_ip, IPCTL_STATS, stats, struct ipstat, ipstat,
     "IP statistics (struct ipstat, netinet/ip_var.h)");
 
 #ifdef VIMAGE
 VNET_PCPUSTAT_SYSUNINIT(ipstat);
 #endif /* VIMAGE */
 
 /*
  * Kernel module interface for updating ipstat.  The argument is an index
  * into ipstat treated as an array.
  */
 void
 kmod_ipstat_inc(int statnum)
 {
 
 	counter_u64_add(VNET(ipstat)[statnum], 1);
 }
 
 void
 kmod_ipstat_dec(int statnum)
 {
 
 	counter_u64_add(VNET(ipstat)[statnum], -1);
 }
 
 static int
 sysctl_netinet_intr_queue_maxlen(SYSCTL_HANDLER_ARGS)
 {
 	int error, qlimit;
 
 	netisr_getqlimit(&ip_nh, &qlimit);
 	error = sysctl_handle_int(oidp, &qlimit, 0, req);
 	if (error || !req->newptr)
 		return (error);
 	if (qlimit < 1)
 		return (EINVAL);
 	return (netisr_setqlimit(&ip_nh, qlimit));
 }
 SYSCTL_PROC(_net_inet_ip, IPCTL_INTRQMAXLEN, intr_queue_maxlen,
     CTLTYPE_INT|CTLFLAG_RW, 0, 0, sysctl_netinet_intr_queue_maxlen, "I",
     "Maximum size of the IP input queue");
 
 static int
 sysctl_netinet_intr_queue_drops(SYSCTL_HANDLER_ARGS)
 {
 	u_int64_t qdrops_long;
 	int error, qdrops;
 
 	netisr_getqdrops(&ip_nh, &qdrops_long);
 	qdrops = qdrops_long;
 	error = sysctl_handle_int(oidp, &qdrops, 0, req);
 	if (error || !req->newptr)
 		return (error);
 	if (qdrops != 0)
 		return (EINVAL);
 	netisr_clearqdrops(&ip_nh);
 	return (0);
 }
 
 SYSCTL_PROC(_net_inet_ip, IPCTL_INTRQDROPS, intr_queue_drops,
     CTLTYPE_INT|CTLFLAG_RD, 0, 0, sysctl_netinet_intr_queue_drops, "I",
     "Number of packets dropped from the IP input queue");
 
 #ifdef	RSS
 static int
 sysctl_netinet_intr_direct_queue_maxlen(SYSCTL_HANDLER_ARGS)
 {
 	int error, qlimit;
 
 	netisr_getqlimit(&ip_direct_nh, &qlimit);
 	error = sysctl_handle_int(oidp, &qlimit, 0, req);
 	if (error || !req->newptr)
 		return (error);
 	if (qlimit < 1)
 		return (EINVAL);
 	return (netisr_setqlimit(&ip_direct_nh, qlimit));
 }
 SYSCTL_PROC(_net_inet_ip, IPCTL_INTRQMAXLEN, intr_direct_queue_maxlen,
     CTLTYPE_INT|CTLFLAG_RW, 0, 0, sysctl_netinet_intr_direct_queue_maxlen, "I",
     "Maximum size of the IP direct input queue");
 
 static int
 sysctl_netinet_intr_direct_queue_drops(SYSCTL_HANDLER_ARGS)
 {
 	u_int64_t qdrops_long;
 	int error, qdrops;
 
 	netisr_getqdrops(&ip_direct_nh, &qdrops_long);
 	qdrops = qdrops_long;
 	error = sysctl_handle_int(oidp, &qdrops, 0, req);
 	if (error || !req->newptr)
 		return (error);
 	if (qdrops != 0)
 		return (EINVAL);
 	netisr_clearqdrops(&ip_direct_nh);
 	return (0);
 }
 
 SYSCTL_PROC(_net_inet_ip, IPCTL_INTRQDROPS, intr_direct_queue_drops,
     CTLTYPE_INT|CTLFLAG_RD, 0, 0, sysctl_netinet_intr_direct_queue_drops, "I",
     "Number of packets dropped from the IP direct input queue");
 #endif	/* RSS */
 
 /*
  * IP initialization: fill in IP protocol switch table.
  * All protocols not implemented in kernel go to raw IP protocol handler.
  */
 void
 ip_init(void)
 {
 	struct protosw *pr;
 	int i;
 
 	TAILQ_INIT(&V_in_ifaddrhead);
 	V_in_ifaddrhashtbl = hashinit(INADDR_NHASH, M_IFADDR, &V_in_ifaddrhmask);
 
 	/* Initialize IP reassembly queue. */
 	ipreass_init();
 
 	/* Initialize packet filter hooks. */
 	V_inet_pfil_hook.ph_type = PFIL_TYPE_AF;
 	V_inet_pfil_hook.ph_af = AF_INET;
 	if ((i = pfil_head_register(&V_inet_pfil_hook)) != 0)
 		printf("%s: WARNING: unable to register pfil hook, "
 			"error %d\n", __func__, i);
 
 	if (hhook_head_register(HHOOK_TYPE_IPSEC_IN, AF_INET,
 	    &V_ipsec_hhh_in[HHOOK_IPSEC_INET],
 	    HHOOK_WAITOK | HHOOK_HEADISINVNET) != 0)
 		printf("%s: WARNING: unable to register input helper hook\n",
 		    __func__);
 	if (hhook_head_register(HHOOK_TYPE_IPSEC_OUT, AF_INET,
 	    &V_ipsec_hhh_out[HHOOK_IPSEC_INET],
 	    HHOOK_WAITOK | HHOOK_HEADISINVNET) != 0)
 		printf("%s: WARNING: unable to register output helper hook\n",
 		    __func__);
 
 	/* Skip initialization of globals for non-default instances. */
 #ifdef VIMAGE
 	if (!IS_DEFAULT_VNET(curvnet)) {
 		netisr_register_vnet(&ip_nh);
 #ifdef	RSS
 		netisr_register_vnet(&ip_direct_nh);
 #endif
 		return;
 	}
 #endif
 
 	pr = pffindproto(PF_INET, IPPROTO_RAW, SOCK_RAW);
 	if (pr == NULL)
 		panic("ip_init: PF_INET not found");
 
 	/* Initialize the entire ip_protox[] array to IPPROTO_RAW. */
 	for (i = 0; i < IPPROTO_MAX; i++)
 		ip_protox[i] = pr - inetsw;
 	/*
 	 * Cycle through IP protocols and put them into the appropriate place
 	 * in ip_protox[].
 	 */
 	for (pr = inetdomain.dom_protosw;
 	    pr < inetdomain.dom_protoswNPROTOSW; pr++)
 		if (pr->pr_domain->dom_family == PF_INET &&
 		    pr->pr_protocol && pr->pr_protocol != IPPROTO_RAW) {
 			/* Be careful to only index valid IP protocols. */
 			if (pr->pr_protocol < IPPROTO_MAX)
 				ip_protox[pr->pr_protocol] = pr - inetsw;
 		}
 
 	netisr_register(&ip_nh);
 #ifdef	RSS
 	netisr_register(&ip_direct_nh);
 #endif
 }
 
 #ifdef VIMAGE
 static void
 ip_destroy(void *unused __unused)
 {
 	struct ifnet *ifp;
 	int error;
 
 #ifdef	RSS
 	netisr_unregister_vnet(&ip_direct_nh);
 #endif
 	netisr_unregister_vnet(&ip_nh);
 
 	if ((error = pfil_head_unregister(&V_inet_pfil_hook)) != 0)
 		printf("%s: WARNING: unable to unregister pfil hook, "
 		    "error %d\n", __func__, error);
 
 	error = hhook_head_deregister(V_ipsec_hhh_in[HHOOK_IPSEC_INET]);
 	if (error != 0) {
 		printf("%s: WARNING: unable to deregister input helper hook "
 		    "type HHOOK_TYPE_IPSEC_IN, id HHOOK_IPSEC_INET: "
 		    "error %d returned\n", __func__, error);
 	}
 	error = hhook_head_deregister(V_ipsec_hhh_out[HHOOK_IPSEC_INET]);
 	if (error != 0) {
 		printf("%s: WARNING: unable to deregister output helper hook "
 		    "type HHOOK_TYPE_IPSEC_OUT, id HHOOK_IPSEC_INET: "
 		    "error %d returned\n", __func__, error);
 	}
 
 	/* Remove the IPv4 addresses from all interfaces. */
 	in_ifscrub_all();
 
 	/* Make sure the IPv4 routes are gone as well. */
 	IFNET_RLOCK();
 	TAILQ_FOREACH(ifp, &V_ifnet, if_link)
 		rt_flushifroutes_af(ifp, AF_INET);
 	IFNET_RUNLOCK();
 
 	/* Destroy IP reassembly queue. */
 	ipreass_destroy();
 
 	/* Cleanup in_ifaddr hash table; should be empty. */
 	hashdestroy(V_in_ifaddrhashtbl, M_IFADDR, V_in_ifaddrhmask);
 }
 
 VNET_SYSUNINIT(ip, SI_SUB_PROTO_DOMAIN, SI_ORDER_THIRD, ip_destroy, NULL);
 #endif
 
 #ifdef	RSS
 /*
  * IP direct input routine.
  *
  * This is called when reinjecting completed fragments where
  * all of the previous checking and book-keeping has been done.
  */
 void
 ip_direct_input(struct mbuf *m)
 {
 	struct ip *ip;
 	int hlen;
 
 	ip = mtod(m, struct ip *);
 	hlen = ip->ip_hl << 2;
 
 	IPSTAT_INC(ips_delivered);
 	(*inetsw[ip_protox[ip->ip_p]].pr_input)(&m, &hlen, ip->ip_p);
 	return;
 }
 #endif
 
 /*
  * Ip input routine.  Checksum and byte swap header.  If fragmented
  * try to reassemble.  Process options.  Pass to next level.
  */
 void
 ip_input(struct mbuf *m)
 {
 	struct ip *ip = NULL;
 	struct in_ifaddr *ia = NULL;
 	struct ifaddr *ifa;
 	struct ifnet *ifp;
 	int    checkif, hlen = 0;
 	uint16_t sum, ip_len;
 	int dchg = 0;				/* dest changed after fw */
 	struct in_addr odst;			/* original dst address */
 
 	M_ASSERTPKTHDR(m);
 
 	if (m->m_flags & M_FASTFWD_OURS) {
 		m->m_flags &= ~M_FASTFWD_OURS;
 		/* Set up some basics that will be used later. */
 		ip = mtod(m, struct ip *);
 		hlen = ip->ip_hl << 2;
 		ip_len = ntohs(ip->ip_len);
 		goto ours;
 	}
 
 	IPSTAT_INC(ips_total);
 
 	if (m->m_pkthdr.len < sizeof(struct ip))
 		goto tooshort;
 
 	if (m->m_len < sizeof (struct ip) &&
 	    (m = m_pullup(m, sizeof (struct ip))) == NULL) {
 		IPSTAT_INC(ips_toosmall);
 		return;
 	}
 	ip = mtod(m, struct ip *);
 
 	if (ip->ip_v != IPVERSION) {
 		IPSTAT_INC(ips_badvers);
 		goto bad;
 	}
 
 	hlen = ip->ip_hl << 2;
 	if (hlen < sizeof(struct ip)) {	/* minimum header length */
 		IPSTAT_INC(ips_badhlen);
 		goto bad;
 	}
 	if (hlen > m->m_len) {
 		if ((m = m_pullup(m, hlen)) == NULL) {
 			IPSTAT_INC(ips_badhlen);
 			return;
 		}
 		ip = mtod(m, struct ip *);
 	}
 
 	IP_PROBE(receive, NULL, NULL, ip, m->m_pkthdr.rcvif, ip, NULL);
 
 	/* 127/8 must not appear on wire - RFC1122 */
 	ifp = m->m_pkthdr.rcvif;
 	if ((ntohl(ip->ip_dst.s_addr) >> IN_CLASSA_NSHIFT) == IN_LOOPBACKNET ||
 	    (ntohl(ip->ip_src.s_addr) >> IN_CLASSA_NSHIFT) == IN_LOOPBACKNET) {
 		if ((ifp->if_flags & IFF_LOOPBACK) == 0) {
 			IPSTAT_INC(ips_badaddr);
 			goto bad;
 		}
 	}
 
 	if (m->m_pkthdr.csum_flags & CSUM_IP_CHECKED) {
 		sum = !(m->m_pkthdr.csum_flags & CSUM_IP_VALID);
 	} else {
 		if (hlen == sizeof(struct ip)) {
 			sum = in_cksum_hdr(ip);
 		} else {
 			sum = in_cksum(m, hlen);
 		}
 	}
 	if (sum) {
 		IPSTAT_INC(ips_badsum);
 		goto bad;
 	}
 
 #ifdef ALTQ
 	if (altq_input != NULL && (*altq_input)(m, AF_INET) == 0)
 		/* packet is dropped by traffic conditioner */
 		return;
 #endif
 
 	ip_len = ntohs(ip->ip_len);
 	if (ip_len < hlen) {
 		IPSTAT_INC(ips_badlen);
 		goto bad;
 	}
 
 	/*
 	 * Check that the amount of data in the buffers
 	 * is as at least much as the IP header would have us expect.
 	 * Trim mbufs if longer than we expect.
 	 * Drop packet if shorter than we expect.
 	 */
 	if (m->m_pkthdr.len < ip_len) {
 tooshort:
 		IPSTAT_INC(ips_tooshort);
 		goto bad;
 	}
 	if (m->m_pkthdr.len > ip_len) {
 		if (m->m_len == m->m_pkthdr.len) {
 			m->m_len = ip_len;
 			m->m_pkthdr.len = ip_len;
 		} else
 			m_adj(m, ip_len - m->m_pkthdr.len);
 	}
 
 	/* Try to forward the packet, but if we fail continue */
 #ifdef IPSEC
 	/* For now we do not handle IPSEC in tryforward. */
 	if (!key_havesp(IPSEC_DIR_INBOUND) && !key_havesp(IPSEC_DIR_OUTBOUND) &&
 	    (V_ipforwarding == 1))
 		if (ip_tryforward(m) == NULL)
 			return;
 	/*
 	 * Bypass packet filtering for packets previously handled by IPsec.
 	 */
 	if (ip_ipsec_filtertunnel(m))
 		goto passin;
 #else
 	if (V_ipforwarding == 1)
 		if (ip_tryforward(m) == NULL)
 			return;
 #endif /* IPSEC */
 
 	/*
 	 * Run through list of hooks for input packets.
 	 *
 	 * NB: Beware of the destination address changing (e.g.
 	 *     by NAT rewriting).  When this happens, tell
 	 *     ip_forward to do the right thing.
 	 */
 
 	/* Jump over all PFIL processing if hooks are not active. */
 	if (!PFIL_HOOKED(&V_inet_pfil_hook))
 		goto passin;
 
 	odst = ip->ip_dst;
 	if (pfil_run_hooks(&V_inet_pfil_hook, &m, ifp, PFIL_IN, NULL) != 0)
 		return;
 	if (m == NULL)			/* consumed by filter */
 		return;
 
 	ip = mtod(m, struct ip *);
 	dchg = (odst.s_addr != ip->ip_dst.s_addr);
 	ifp = m->m_pkthdr.rcvif;
 
 	if (m->m_flags & M_FASTFWD_OURS) {
 		m->m_flags &= ~M_FASTFWD_OURS;
 		goto ours;
 	}
 	if (m->m_flags & M_IP_NEXTHOP) {
 		if (m_tag_find(m, PACKET_TAG_IPFORWARD, NULL) != NULL) {
 			/*
 			 * Directly ship the packet on.  This allows
 			 * forwarding packets originally destined to us
 			 * to some other directly connected host.
 			 */
 			ip_forward(m, 1);
 			return;
 		}
 	}
 passin:
 
 	/*
 	 * Process options and, if not destined for us,
 	 * ship it on.  ip_dooptions returns 1 when an
 	 * error was detected (causing an icmp message
 	 * to be sent and the original packet to be freed).
 	 */
 	if (hlen > sizeof (struct ip) && ip_dooptions(m, 0))
 		return;
 
         /* greedy RSVP, snatches any PATH packet of the RSVP protocol and no
          * matter if it is destined to another node, or whether it is 
          * a multicast one, RSVP wants it! and prevents it from being forwarded
          * anywhere else. Also checks if the rsvp daemon is running before
 	 * grabbing the packet.
          */
 	if (V_rsvp_on && ip->ip_p==IPPROTO_RSVP) 
 		goto ours;
 
 	/*
 	 * Check our list of addresses, to see if the packet is for us.
 	 * If we don't have any addresses, assume any unicast packet
 	 * we receive might be for us (and let the upper layers deal
 	 * with it).
 	 */
 	if (TAILQ_EMPTY(&V_in_ifaddrhead) &&
 	    (m->m_flags & (M_MCAST|M_BCAST)) == 0)
 		goto ours;
 
 	/*
 	 * Enable a consistency check between the destination address
 	 * and the arrival interface for a unicast packet (the RFC 1122
 	 * strong ES model) if IP forwarding is disabled and the packet
 	 * is not locally generated and the packet is not subject to
 	 * 'ipfw fwd'.
 	 *
 	 * XXX - Checking also should be disabled if the destination
 	 * address is ipnat'ed to a different interface.
 	 *
 	 * XXX - Checking is incompatible with IP aliases added
 	 * to the loopback interface instead of the interface where
 	 * the packets are received.
 	 *
 	 * XXX - This is the case for carp vhost IPs as well so we
 	 * insert a workaround. If the packet got here, we already
 	 * checked with carp_iamatch() and carp_forus().
 	 */
 	checkif = V_ip_checkinterface && (V_ipforwarding == 0) && 
 	    ifp != NULL && ((ifp->if_flags & IFF_LOOPBACK) == 0) &&
 	    ifp->if_carp == NULL && (dchg == 0);
 
 	/*
 	 * Check for exact addresses in the hash bucket.
 	 */
 	/* IN_IFADDR_RLOCK(); */
 	LIST_FOREACH(ia, INADDR_HASH(ip->ip_dst.s_addr), ia_hash) {
 		/*
 		 * If the address matches, verify that the packet
 		 * arrived via the correct interface if checking is
 		 * enabled.
 		 */
 		if (IA_SIN(ia)->sin_addr.s_addr == ip->ip_dst.s_addr && 
 		    (!checkif || ia->ia_ifp == ifp)) {
 			counter_u64_add(ia->ia_ifa.ifa_ipackets, 1);
 			counter_u64_add(ia->ia_ifa.ifa_ibytes,
 			    m->m_pkthdr.len);
 			/* IN_IFADDR_RUNLOCK(); */
 			goto ours;
 		}
 	}
 	/* IN_IFADDR_RUNLOCK(); */
 
 	/*
 	 * Check for broadcast addresses.
 	 *
 	 * Only accept broadcast packets that arrive via the matching
 	 * interface.  Reception of forwarded directed broadcasts would
 	 * be handled via ip_forward() and ether_output() with the loopback
 	 * into the stack for SIMPLEX interfaces handled by ether_output().
 	 */
 	if (ifp != NULL && ifp->if_flags & IFF_BROADCAST) {
 		IF_ADDR_RLOCK(ifp);
 	        TAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) {
 			if (ifa->ifa_addr->sa_family != AF_INET)
 				continue;
 			ia = ifatoia(ifa);
 			if (satosin(&ia->ia_broadaddr)->sin_addr.s_addr ==
 			    ip->ip_dst.s_addr) {
 				counter_u64_add(ia->ia_ifa.ifa_ipackets, 1);
 				counter_u64_add(ia->ia_ifa.ifa_ibytes,
 				    m->m_pkthdr.len);
 				IF_ADDR_RUNLOCK(ifp);
 				goto ours;
 			}
 #ifdef BOOTP_COMPAT
 			if (IA_SIN(ia)->sin_addr.s_addr == INADDR_ANY) {
 				counter_u64_add(ia->ia_ifa.ifa_ipackets, 1);
 				counter_u64_add(ia->ia_ifa.ifa_ibytes,
 				    m->m_pkthdr.len);
 				IF_ADDR_RUNLOCK(ifp);
 				goto ours;
 			}
 #endif
 		}
 		IF_ADDR_RUNLOCK(ifp);
 		ia = NULL;
 	}
 	/* RFC 3927 2.7: Do not forward datagrams for 169.254.0.0/16. */
 	if (IN_LINKLOCAL(ntohl(ip->ip_dst.s_addr))) {
 		IPSTAT_INC(ips_cantforward);
 		m_freem(m);
 		return;
 	}
 	if (IN_MULTICAST(ntohl(ip->ip_dst.s_addr))) {
 		if (V_ip_mrouter) {
 			/*
 			 * If we are acting as a multicast router, all
 			 * incoming multicast packets are passed to the
 			 * kernel-level multicast forwarding function.
 			 * The packet is returned (relatively) intact; if
 			 * ip_mforward() returns a non-zero value, the packet
 			 * must be discarded, else it may be accepted below.
 			 */
 			if (ip_mforward && ip_mforward(ip, ifp, m, 0) != 0) {
 				IPSTAT_INC(ips_cantforward);
 				m_freem(m);
 				return;
 			}
 
 			/*
 			 * The process-level routing daemon needs to receive
 			 * all multicast IGMP packets, whether or not this
 			 * host belongs to their destination groups.
 			 */
 			if (ip->ip_p == IPPROTO_IGMP)
 				goto ours;
 			IPSTAT_INC(ips_forward);
 		}
 		/*
 		 * Assume the packet is for us, to avoid prematurely taking
 		 * a lock on the in_multi hash. Protocols must perform
 		 * their own filtering and update statistics accordingly.
 		 */
 		goto ours;
 	}
 	if (ip->ip_dst.s_addr == (u_long)INADDR_BROADCAST)
 		goto ours;
 	if (ip->ip_dst.s_addr == INADDR_ANY)
 		goto ours;
 
 	/*
 	 * Not for us; forward if possible and desirable.
 	 */
 	if (V_ipforwarding == 0) {
 		IPSTAT_INC(ips_cantforward);
 		m_freem(m);
 	} else {
 		ip_forward(m, dchg);
 	}
 	return;
 
 ours:
 #ifdef IPSTEALTH
 	/*
 	 * IPSTEALTH: Process non-routing options only
 	 * if the packet is destined for us.
 	 */
 	if (V_ipstealth && hlen > sizeof (struct ip) && ip_dooptions(m, 1))
 		return;
 #endif /* IPSTEALTH */
 
 	/*
 	 * Attempt reassembly; if it succeeds, proceed.
 	 * ip_reass() will return a different mbuf.
 	 */
 	if (ip->ip_off & htons(IP_MF | IP_OFFMASK)) {
 		/* XXXGL: shouldn't we save & set m_flags? */
 		m = ip_reass(m);
 		if (m == NULL)
 			return;
 		ip = mtod(m, struct ip *);
 		/* Get the header length of the reassembled packet */
 		hlen = ip->ip_hl << 2;
 	}
 
 #ifdef IPSEC
 	/*
 	 * enforce IPsec policy checking if we are seeing last header.
 	 * note that we do not visit this with protocols with pcb layer
 	 * code - like udp/tcp/raw ip.
 	 */
 	if (ip_ipsec_input(m, ip->ip_p) != 0)
 		goto bad;
 #endif /* IPSEC */
 
 	/*
 	 * Switch out to protocol's input routine.
 	 */
 	IPSTAT_INC(ips_delivered);
 
 	(*inetsw[ip_protox[ip->ip_p]].pr_input)(&m, &hlen, ip->ip_p);
 	return;
 bad:
 	m_freem(m);
 }
 
 /*
  * IP timer processing;
  * if a timer expires on a reassembly
  * queue, discard it.
  */
 void
 ip_slowtimo(void)
 {
 	VNET_ITERATOR_DECL(vnet_iter);
 
 	VNET_LIST_RLOCK_NOSLEEP();
 	VNET_FOREACH(vnet_iter) {
 		CURVNET_SET(vnet_iter);
 		ipreass_slowtimo();
 		CURVNET_RESTORE();
 	}
 	VNET_LIST_RUNLOCK_NOSLEEP();
 }
 
 void
 ip_drain(void)
 {
 	VNET_ITERATOR_DECL(vnet_iter);
 
 	VNET_LIST_RLOCK_NOSLEEP();
 	VNET_FOREACH(vnet_iter) {
 		CURVNET_SET(vnet_iter);
 		ipreass_drain();
 		CURVNET_RESTORE();
 	}
 	VNET_LIST_RUNLOCK_NOSLEEP();
 }
 
 /*
  * The protocol to be inserted into ip_protox[] must be already registered
  * in inetsw[], either statically or through pf_proto_register().
  */
 int
 ipproto_register(short ipproto)
 {
 	struct protosw *pr;
 
 	/* Sanity checks. */
 	if (ipproto <= 0 || ipproto >= IPPROTO_MAX)
 		return (EPROTONOSUPPORT);
 
 	/*
 	 * The protocol slot must not be occupied by another protocol
 	 * already.  An index pointing to IPPROTO_RAW is unused.
 	 */
 	pr = pffindproto(PF_INET, IPPROTO_RAW, SOCK_RAW);
 	if (pr == NULL)
 		return (EPFNOSUPPORT);
 	if (ip_protox[ipproto] != pr - inetsw)	/* IPPROTO_RAW */
 		return (EEXIST);
 
 	/* Find the protocol position in inetsw[] and set the index. */
 	for (pr = inetdomain.dom_protosw;
 	     pr < inetdomain.dom_protoswNPROTOSW; pr++) {
 		if (pr->pr_domain->dom_family == PF_INET &&
 		    pr->pr_protocol && pr->pr_protocol == ipproto) {
 			ip_protox[pr->pr_protocol] = pr - inetsw;
 			return (0);
 		}
 	}
 	return (EPROTONOSUPPORT);
 }
 
 int
 ipproto_unregister(short ipproto)
 {
 	struct protosw *pr;
 
 	/* Sanity checks. */
 	if (ipproto <= 0 || ipproto >= IPPROTO_MAX)
 		return (EPROTONOSUPPORT);
 
 	/* Check if the protocol was indeed registered. */
 	pr = pffindproto(PF_INET, IPPROTO_RAW, SOCK_RAW);
 	if (pr == NULL)
 		return (EPFNOSUPPORT);
 	if (ip_protox[ipproto] == pr - inetsw)  /* IPPROTO_RAW */
 		return (ENOENT);
 
 	/* Reset the protocol slot to IPPROTO_RAW. */
 	ip_protox[ipproto] = pr - inetsw;
 	return (0);
 }
 
 u_char inetctlerrmap[PRC_NCMDS] = {
 	0,		0,		0,		0,
 	0,		EMSGSIZE,	EHOSTDOWN,	EHOSTUNREACH,
 	EHOSTUNREACH,	EHOSTUNREACH,	ECONNREFUSED,	ECONNREFUSED,
 	EMSGSIZE,	EHOSTUNREACH,	0,		0,
 	0,		0,		EHOSTUNREACH,	0,
 	ENOPROTOOPT,	ECONNREFUSED
 };
 
 /*
  * Forward a packet.  If some error occurs return the sender
  * an icmp packet.  Note we can't always generate a meaningful
  * icmp message because icmp doesn't have a large enough repertoire
  * of codes and types.
  *
  * If not forwarding, just drop the packet.  This could be confusing
  * if ipforwarding was zero but some routing protocol was advancing
  * us as a gateway to somewhere.  However, we must let the routing
  * protocol deal with that.
  *
  * The srcrt parameter indicates whether the packet is being forwarded
  * via a source route.
  */
 void
 ip_forward(struct mbuf *m, int srcrt)
 {
 	struct ip *ip = mtod(m, struct ip *);
 	struct in_ifaddr *ia;
 	struct mbuf *mcopy;
 	struct sockaddr_in *sin;
 	struct in_addr dest;
 	struct route ro;
 	int error, type = 0, code = 0, mtu = 0;
 
 	if (m->m_flags & (M_BCAST|M_MCAST) || in_canforward(ip->ip_dst) == 0) {
 		IPSTAT_INC(ips_cantforward);
 		m_freem(m);
 		return;
 	}
 #ifdef IPSEC
 	if (ip_ipsec_fwd(m) != 0) {
 		IPSTAT_INC(ips_cantforward);
 		m_freem(m);
 		return;
 	}
 #endif /* IPSEC */
 #ifdef IPSTEALTH
 	if (!V_ipstealth) {
 #endif
 		if (ip->ip_ttl <= IPTTLDEC) {
 			icmp_error(m, ICMP_TIMXCEED, ICMP_TIMXCEED_INTRANS,
 			    0, 0);
 			return;
 		}
 #ifdef IPSTEALTH
 	}
 #endif
 
 	bzero(&ro, sizeof(ro));
 	sin = (struct sockaddr_in *)&ro.ro_dst;
 	sin->sin_family = AF_INET;
 	sin->sin_len = sizeof(*sin);
 	sin->sin_addr = ip->ip_dst;
 #ifdef RADIX_MPATH
 	rtalloc_mpath_fib(&ro,
 	    ntohl(ip->ip_src.s_addr ^ ip->ip_dst.s_addr),
 	    M_GETFIB(m));
 #else
 	in_rtalloc_ign(&ro, 0, M_GETFIB(m));
 #endif
 	if (ro.ro_rt != NULL) {
 		ia = ifatoia(ro.ro_rt->rt_ifa);
 		ifa_ref(&ia->ia_ifa);
 	} else
 		ia = NULL;
 #ifndef IPSEC
 	/*
 	 * 'ia' may be NULL if there is no route for this destination.
 	 * In case of IPsec, Don't discard it just yet, but pass it to
 	 * ip_output in case of outgoing IPsec policy.
 	 */
 	if (!srcrt && ia == NULL) {
 		icmp_error(m, ICMP_UNREACH, ICMP_UNREACH_HOST, 0, 0);
 		RO_RTFREE(&ro);
 		return;
 	}
 #endif
 
 	/*
 	 * Save the IP header and at most 8 bytes of the payload,
 	 * in case we need to generate an ICMP message to the src.
 	 *
 	 * XXX this can be optimized a lot by saving the data in a local
 	 * buffer on the stack (72 bytes at most), and only allocating the
 	 * mbuf if really necessary. The vast majority of the packets
 	 * are forwarded without having to send an ICMP back (either
 	 * because unnecessary, or because rate limited), so we are
 	 * really we are wasting a lot of work here.
 	 *
-	 * We don't use m_copy() because it might return a reference
+	 * We don't use m_copym() because it might return a reference
 	 * to a shared cluster. Both this function and ip_output()
 	 * assume exclusive access to the IP header in `m', so any
 	 * data in a cluster may change before we reach icmp_error().
 	 */
 	mcopy = m_gethdr(M_NOWAIT, m->m_type);
 	if (mcopy != NULL && !m_dup_pkthdr(mcopy, m, M_NOWAIT)) {
 		/*
 		 * It's probably ok if the pkthdr dup fails (because
 		 * the deep copy of the tag chain failed), but for now
 		 * be conservative and just discard the copy since
 		 * code below may some day want the tags.
 		 */
 		m_free(mcopy);
 		mcopy = NULL;
 	}
 	if (mcopy != NULL) {
 		mcopy->m_len = min(ntohs(ip->ip_len), M_TRAILINGSPACE(mcopy));
 		mcopy->m_pkthdr.len = mcopy->m_len;
 		m_copydata(m, 0, mcopy->m_len, mtod(mcopy, caddr_t));
 	}
 
 #ifdef IPSTEALTH
 	if (!V_ipstealth) {
 #endif
 		ip->ip_ttl -= IPTTLDEC;
 #ifdef IPSTEALTH
 	}
 #endif
 
 	/*
 	 * If forwarding packet using same interface that it came in on,
 	 * perhaps should send a redirect to sender to shortcut a hop.
 	 * Only send redirect if source is sending directly to us,
 	 * and if packet was not source routed (or has any options).
 	 * Also, don't send redirect if forwarding using a default route
 	 * or a route modified by a redirect.
 	 */
 	dest.s_addr = 0;
 	if (!srcrt && V_ipsendredirects &&
 	    ia != NULL && ia->ia_ifp == m->m_pkthdr.rcvif) {
 		struct rtentry *rt;
 
 		rt = ro.ro_rt;
 
 		if (rt && (rt->rt_flags & (RTF_DYNAMIC|RTF_MODIFIED)) == 0 &&
 		    satosin(rt_key(rt))->sin_addr.s_addr != 0) {
 #define	RTA(rt)	((struct in_ifaddr *)(rt->rt_ifa))
 			u_long src = ntohl(ip->ip_src.s_addr);
 
 			if (RTA(rt) &&
 			    (src & RTA(rt)->ia_subnetmask) == RTA(rt)->ia_subnet) {
 				if (rt->rt_flags & RTF_GATEWAY)
 					dest.s_addr = satosin(rt->rt_gateway)->sin_addr.s_addr;
 				else
 					dest.s_addr = ip->ip_dst.s_addr;
 				/* Router requirements says to only send host redirects */
 				type = ICMP_REDIRECT;
 				code = ICMP_REDIRECT_HOST;
 			}
 		}
 	}
 
 	error = ip_output(m, NULL, &ro, IP_FORWARDING, NULL, NULL);
 
 	if (error == EMSGSIZE && ro.ro_rt)
 		mtu = ro.ro_rt->rt_mtu;
 	RO_RTFREE(&ro);
 
 	if (error)
 		IPSTAT_INC(ips_cantforward);
 	else {
 		IPSTAT_INC(ips_forward);
 		if (type)
 			IPSTAT_INC(ips_redirectsent);
 		else {
 			if (mcopy)
 				m_freem(mcopy);
 			if (ia != NULL)
 				ifa_free(&ia->ia_ifa);
 			return;
 		}
 	}
 	if (mcopy == NULL) {
 		if (ia != NULL)
 			ifa_free(&ia->ia_ifa);
 		return;
 	}
 
 	switch (error) {
 
 	case 0:				/* forwarded, but need redirect */
 		/* type, code set above */
 		break;
 
 	case ENETUNREACH:
 	case EHOSTUNREACH:
 	case ENETDOWN:
 	case EHOSTDOWN:
 	default:
 		type = ICMP_UNREACH;
 		code = ICMP_UNREACH_HOST;
 		break;
 
 	case EMSGSIZE:
 		type = ICMP_UNREACH;
 		code = ICMP_UNREACH_NEEDFRAG;
 
 #ifdef IPSEC
 		/* 
 		 * If IPsec is configured for this path,
 		 * override any possibly mtu value set by ip_output.
 		 */ 
 		mtu = ip_ipsec_mtu(mcopy, mtu);
 #endif /* IPSEC */
 		/*
 		 * If the MTU was set before make sure we are below the
 		 * interface MTU.
 		 * If the MTU wasn't set before use the interface mtu or
 		 * fall back to the next smaller mtu step compared to the
 		 * current packet size.
 		 */
 		if (mtu != 0) {
 			if (ia != NULL)
 				mtu = min(mtu, ia->ia_ifp->if_mtu);
 		} else {
 			if (ia != NULL)
 				mtu = ia->ia_ifp->if_mtu;
 			else
 				mtu = ip_next_mtu(ntohs(ip->ip_len), 0);
 		}
 		IPSTAT_INC(ips_cantfrag);
 		break;
 
 	case ENOBUFS:
 	case EACCES:			/* ipfw denied packet */
 		m_freem(mcopy);
 		if (ia != NULL)
 			ifa_free(&ia->ia_ifa);
 		return;
 	}
 	if (ia != NULL)
 		ifa_free(&ia->ia_ifa);
 	icmp_error(mcopy, type, code, dest.s_addr, mtu);
 }
 
 void
 ip_savecontrol(struct inpcb *inp, struct mbuf **mp, struct ip *ip,
     struct mbuf *m)
 {
 
 	if (inp->inp_socket->so_options & (SO_BINTIME | SO_TIMESTAMP)) {
 		struct bintime bt;
 
 		bintime(&bt);
 		if (inp->inp_socket->so_options & SO_BINTIME) {
 			*mp = sbcreatecontrol((caddr_t)&bt, sizeof(bt),
 			    SCM_BINTIME, SOL_SOCKET);
 			if (*mp)
 				mp = &(*mp)->m_next;
 		}
 		if (inp->inp_socket->so_options & SO_TIMESTAMP) {
 			struct timeval tv;
 
 			bintime2timeval(&bt, &tv);
 			*mp = sbcreatecontrol((caddr_t)&tv, sizeof(tv),
 			    SCM_TIMESTAMP, SOL_SOCKET);
 			if (*mp)
 				mp = &(*mp)->m_next;
 		}
 	}
 	if (inp->inp_flags & INP_RECVDSTADDR) {
 		*mp = sbcreatecontrol((caddr_t)&ip->ip_dst,
 		    sizeof(struct in_addr), IP_RECVDSTADDR, IPPROTO_IP);
 		if (*mp)
 			mp = &(*mp)->m_next;
 	}
 	if (inp->inp_flags & INP_RECVTTL) {
 		*mp = sbcreatecontrol((caddr_t)&ip->ip_ttl,
 		    sizeof(u_char), IP_RECVTTL, IPPROTO_IP);
 		if (*mp)
 			mp = &(*mp)->m_next;
 	}
 #ifdef notyet
 	/* XXX
 	 * Moving these out of udp_input() made them even more broken
 	 * than they already were.
 	 */
 	/* options were tossed already */
 	if (inp->inp_flags & INP_RECVOPTS) {
 		*mp = sbcreatecontrol((caddr_t)opts_deleted_above,
 		    sizeof(struct in_addr), IP_RECVOPTS, IPPROTO_IP);
 		if (*mp)
 			mp = &(*mp)->m_next;
 	}
 	/* ip_srcroute doesn't do what we want here, need to fix */
 	if (inp->inp_flags & INP_RECVRETOPTS) {
 		*mp = sbcreatecontrol((caddr_t)ip_srcroute(m),
 		    sizeof(struct in_addr), IP_RECVRETOPTS, IPPROTO_IP);
 		if (*mp)
 			mp = &(*mp)->m_next;
 	}
 #endif
 	if (inp->inp_flags & INP_RECVIF) {
 		struct ifnet *ifp;
 		struct sdlbuf {
 			struct sockaddr_dl sdl;
 			u_char	pad[32];
 		} sdlbuf;
 		struct sockaddr_dl *sdp;
 		struct sockaddr_dl *sdl2 = &sdlbuf.sdl;
 
 		if ((ifp = m->m_pkthdr.rcvif) &&
 		    ifp->if_index && ifp->if_index <= V_if_index) {
 			sdp = (struct sockaddr_dl *)ifp->if_addr->ifa_addr;
 			/*
 			 * Change our mind and don't try copy.
 			 */
 			if (sdp->sdl_family != AF_LINK ||
 			    sdp->sdl_len > sizeof(sdlbuf)) {
 				goto makedummy;
 			}
 			bcopy(sdp, sdl2, sdp->sdl_len);
 		} else {
 makedummy:	
 			sdl2->sdl_len =
 			    offsetof(struct sockaddr_dl, sdl_data[0]);
 			sdl2->sdl_family = AF_LINK;
 			sdl2->sdl_index = 0;
 			sdl2->sdl_nlen = sdl2->sdl_alen = sdl2->sdl_slen = 0;
 		}
 		*mp = sbcreatecontrol((caddr_t)sdl2, sdl2->sdl_len,
 		    IP_RECVIF, IPPROTO_IP);
 		if (*mp)
 			mp = &(*mp)->m_next;
 	}
 	if (inp->inp_flags & INP_RECVTOS) {
 		*mp = sbcreatecontrol((caddr_t)&ip->ip_tos,
 		    sizeof(u_char), IP_RECVTOS, IPPROTO_IP);
 		if (*mp)
 			mp = &(*mp)->m_next;
 	}
 
 	if (inp->inp_flags2 & INP_RECVFLOWID) {
 		uint32_t flowid, flow_type;
 
 		flowid = m->m_pkthdr.flowid;
 		flow_type = M_HASHTYPE_GET(m);
 
 		/*
 		 * XXX should handle the failure of one or the
 		 * other - don't populate both?
 		 */
 		*mp = sbcreatecontrol((caddr_t) &flowid,
 		    sizeof(uint32_t), IP_FLOWID, IPPROTO_IP);
 		if (*mp)
 			mp = &(*mp)->m_next;
 		*mp = sbcreatecontrol((caddr_t) &flow_type,
 		    sizeof(uint32_t), IP_FLOWTYPE, IPPROTO_IP);
 		if (*mp)
 			mp = &(*mp)->m_next;
 	}
 
 #ifdef	RSS
 	if (inp->inp_flags2 & INP_RECVRSSBUCKETID) {
 		uint32_t flowid, flow_type;
 		uint32_t rss_bucketid;
 
 		flowid = m->m_pkthdr.flowid;
 		flow_type = M_HASHTYPE_GET(m);
 
 		if (rss_hash2bucket(flowid, flow_type, &rss_bucketid) == 0) {
 			*mp = sbcreatecontrol((caddr_t) &rss_bucketid,
 			   sizeof(uint32_t), IP_RSSBUCKETID, IPPROTO_IP);
 			if (*mp)
 				mp = &(*mp)->m_next;
 		}
 	}
 #endif
 }
 
 /*
  * XXXRW: Multicast routing code in ip_mroute.c is generally MPSAFE, but the
  * ip_rsvp and ip_rsvp_on variables need to be interlocked with rsvp_on
  * locking.  This code remains in ip_input.c as ip_mroute.c is optionally
  * compiled.
  */
 static VNET_DEFINE(int, ip_rsvp_on);
 VNET_DEFINE(struct socket *, ip_rsvpd);
 
 #define	V_ip_rsvp_on		VNET(ip_rsvp_on)
 
 int
 ip_rsvp_init(struct socket *so)
 {
 
 	if (so->so_type != SOCK_RAW ||
 	    so->so_proto->pr_protocol != IPPROTO_RSVP)
 		return EOPNOTSUPP;
 
 	if (V_ip_rsvpd != NULL)
 		return EADDRINUSE;
 
 	V_ip_rsvpd = so;
 	/*
 	 * This may seem silly, but we need to be sure we don't over-increment
 	 * the RSVP counter, in case something slips up.
 	 */
 	if (!V_ip_rsvp_on) {
 		V_ip_rsvp_on = 1;
 		V_rsvp_on++;
 	}
 
 	return 0;
 }
 
 int
 ip_rsvp_done(void)
 {
 
 	V_ip_rsvpd = NULL;
 	/*
 	 * This may seem silly, but we need to be sure we don't over-decrement
 	 * the RSVP counter, in case something slips up.
 	 */
 	if (V_ip_rsvp_on) {
 		V_ip_rsvp_on = 0;
 		V_rsvp_on--;
 	}
 	return 0;
 }
 
 int
 rsvp_input(struct mbuf **mp, int *offp, int proto)
 {
 	struct mbuf *m;
 
 	m = *mp;
 	*mp = NULL;
 
 	if (rsvp_input_p) { /* call the real one if loaded */
 		*mp = m;
 		rsvp_input_p(mp, offp, proto);
 		return (IPPROTO_DONE);
 	}
 
 	/* Can still get packets with rsvp_on = 0 if there is a local member
 	 * of the group to which the RSVP packet is addressed.  But in this
 	 * case we want to throw the packet away.
 	 */
 	
 	if (!V_rsvp_on) {
 		m_freem(m);
 		return (IPPROTO_DONE);
 	}
 
 	if (V_ip_rsvpd != NULL) { 
 		*mp = m;
 		rip_input(mp, offp, proto);
 		return (IPPROTO_DONE);
 	}
 	/* Drop the packet */
 	m_freem(m);
 	return (IPPROTO_DONE);
 }
Index: head/sys/netinet/ip_mroute.c
===================================================================
--- head/sys/netinet/ip_mroute.c	(revision 305823)
+++ head/sys/netinet/ip_mroute.c	(revision 305824)
@@ -1,2949 +1,2949 @@
 /*-
  * Copyright (c) 1989 Stephen Deering
  * Copyright (c) 1992, 1993
  *      The Regents of the University of California.  All rights reserved.
  *
  * This code is derived from software contributed to Berkeley by
  * Stephen Deering of Stanford University.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 4. Neither the name of the University nor the names of its contributors
  *    may be used to endorse or promote products derived from this software
  *    without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  *      @(#)ip_mroute.c 8.2 (Berkeley) 11/15/93
  */
 
 /*
  * IP multicast forwarding procedures
  *
  * Written by David Waitzman, BBN Labs, August 1988.
  * Modified by Steve Deering, Stanford, February 1989.
  * Modified by Mark J. Steiglitz, Stanford, May, 1991
  * Modified by Van Jacobson, LBL, January 1993
  * Modified by Ajit Thyagarajan, PARC, August 1993
  * Modified by Bill Fenner, PARC, April 1995
  * Modified by Ahmed Helmy, SGI, June 1996
  * Modified by George Edmond Eddy (Rusty), ISI, February 1998
  * Modified by Pavlin Radoslavov, USC/ISI, May 1998, August 1999, October 2000
  * Modified by Hitoshi Asaeda, WIDE, August 2000
  * Modified by Pavlin Radoslavov, ICSI, October 2002
  *
  * MROUTING Revision: 3.5
  * and PIM-SMv2 and PIM-DM support, advanced API support,
  * bandwidth metering and signaling
  */
 
 /*
  * TODO: Prefix functions with ipmf_.
  * TODO: Maintain a refcount on if_allmulti() in ifnet or in the protocol
  * domain attachment (if_afdata) so we can track consumers of that service.
  * TODO: Deprecate routing socket path for SIOCGETSGCNT and SIOCGETVIFCNT,
  * move it to socket options.
  * TODO: Cleanup LSRR removal further.
  * TODO: Push RSVP stubs into raw_ip.c.
  * TODO: Use bitstring.h for vif set.
  * TODO: Fix mrt6_ioctl dangling ref when dynamically loaded.
  * TODO: Sync ip6_mroute.c with this file.
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include "opt_inet.h"
 #include "opt_mrouting.h"
 
 #define _PIM_VT 1
 
 #include <sys/param.h>
 #include <sys/kernel.h>
 #include <sys/stddef.h>
 #include <sys/eventhandler.h>
 #include <sys/lock.h>
 #include <sys/ktr.h>
 #include <sys/malloc.h>
 #include <sys/mbuf.h>
 #include <sys/module.h>
 #include <sys/priv.h>
 #include <sys/protosw.h>
 #include <sys/signalvar.h>
 #include <sys/socket.h>
 #include <sys/socketvar.h>
 #include <sys/sockio.h>
 #include <sys/sx.h>
 #include <sys/sysctl.h>
 #include <sys/syslog.h>
 #include <sys/systm.h>
 #include <sys/time.h>
 #include <sys/counter.h>
 
 #include <net/if.h>
 #include <net/if_var.h>
 #include <net/netisr.h>
 #include <net/route.h>
 #include <net/vnet.h>
 
 #include <netinet/in.h>
 #include <netinet/igmp.h>
 #include <netinet/in_systm.h>
 #include <netinet/in_var.h>
 #include <netinet/ip.h>
 #include <netinet/ip_encap.h>
 #include <netinet/ip_mroute.h>
 #include <netinet/ip_var.h>
 #include <netinet/ip_options.h>
 #include <netinet/pim.h>
 #include <netinet/pim_var.h>
 #include <netinet/udp.h>
 
 #include <machine/in_cksum.h>
 
 #ifndef KTR_IPMF
 #define KTR_IPMF KTR_INET
 #endif
 
 #define		VIFI_INVALID	((vifi_t) -1)
 
 static VNET_DEFINE(uint32_t, last_tv_sec); /* last time we processed this */
 #define	V_last_tv_sec	VNET(last_tv_sec)
 
 static MALLOC_DEFINE(M_MRTABLE, "mroutetbl", "multicast forwarding cache");
 
 /*
  * Locking.  We use two locks: one for the virtual interface table and
  * one for the forwarding table.  These locks may be nested in which case
  * the VIF lock must always be taken first.  Note that each lock is used
  * to cover not only the specific data structure but also related data
  * structures.
  */
 
 static struct mtx mrouter_mtx;
 #define	MROUTER_LOCK()		mtx_lock(&mrouter_mtx)
 #define	MROUTER_UNLOCK()	mtx_unlock(&mrouter_mtx)
 #define	MROUTER_LOCK_ASSERT()	mtx_assert(&mrouter_mtx, MA_OWNED)
 #define	MROUTER_LOCK_INIT()						\
 	mtx_init(&mrouter_mtx, "IPv4 multicast forwarding", NULL, MTX_DEF)
 #define	MROUTER_LOCK_DESTROY()	mtx_destroy(&mrouter_mtx)
 
 static int ip_mrouter_cnt;	/* # of vnets with active mrouters */
 static int ip_mrouter_unloading; /* Allow no more V_ip_mrouter sockets */
 
 static VNET_PCPUSTAT_DEFINE(struct mrtstat, mrtstat);
 VNET_PCPUSTAT_SYSINIT(mrtstat);
 VNET_PCPUSTAT_SYSUNINIT(mrtstat);
 SYSCTL_VNET_PCPUSTAT(_net_inet_ip, OID_AUTO, mrtstat, struct mrtstat,
     mrtstat, "IPv4 Multicast Forwarding Statistics (struct mrtstat, "
     "netinet/ip_mroute.h)");
 
 static VNET_DEFINE(u_long, mfchash);
 #define	V_mfchash		VNET(mfchash)
 #define	MFCHASH(a, g)							\
 	((((a).s_addr >> 20) ^ ((a).s_addr >> 10) ^ (a).s_addr ^ \
 	  ((g).s_addr >> 20) ^ ((g).s_addr >> 10) ^ (g).s_addr) & V_mfchash)
 #define	MFCHASHSIZE	256
 
 static u_long mfchashsize;			/* Hash size */
 static VNET_DEFINE(u_char *, nexpire);		/* 0..mfchashsize-1 */
 #define	V_nexpire		VNET(nexpire)
 static VNET_DEFINE(LIST_HEAD(mfchashhdr, mfc)*, mfchashtbl);
 #define	V_mfchashtbl		VNET(mfchashtbl)
 
 static struct mtx mfc_mtx;
 #define	MFC_LOCK()		mtx_lock(&mfc_mtx)
 #define	MFC_UNLOCK()		mtx_unlock(&mfc_mtx)
 #define	MFC_LOCK_ASSERT()	mtx_assert(&mfc_mtx, MA_OWNED)
 #define	MFC_LOCK_INIT()							\
 	mtx_init(&mfc_mtx, "IPv4 multicast forwarding cache", NULL, MTX_DEF)
 #define	MFC_LOCK_DESTROY()	mtx_destroy(&mfc_mtx)
 
 static VNET_DEFINE(vifi_t, numvifs);
 #define	V_numvifs		VNET(numvifs)
 static VNET_DEFINE(struct vif, viftable[MAXVIFS]);
 #define	V_viftable		VNET(viftable)
 SYSCTL_OPAQUE(_net_inet_ip, OID_AUTO, viftable, CTLFLAG_VNET | CTLFLAG_RD,
     &VNET_NAME(viftable), sizeof(V_viftable), "S,vif[MAXVIFS]",
     "IPv4 Multicast Interfaces (struct vif[MAXVIFS], netinet/ip_mroute.h)");
 
 static struct mtx vif_mtx;
 #define	VIF_LOCK()		mtx_lock(&vif_mtx)
 #define	VIF_UNLOCK()		mtx_unlock(&vif_mtx)
 #define	VIF_LOCK_ASSERT()	mtx_assert(&vif_mtx, MA_OWNED)
 #define	VIF_LOCK_INIT()							\
 	mtx_init(&vif_mtx, "IPv4 multicast interfaces", NULL, MTX_DEF)
 #define	VIF_LOCK_DESTROY()	mtx_destroy(&vif_mtx)
 
 static eventhandler_tag if_detach_event_tag = NULL;
 
 static VNET_DEFINE(struct callout, expire_upcalls_ch);
 #define	V_expire_upcalls_ch	VNET(expire_upcalls_ch)
 
 #define		EXPIRE_TIMEOUT	(hz / 4)	/* 4x / second		*/
 #define		UPCALL_EXPIRE	6		/* number of timeouts	*/
 
 /*
  * Bandwidth meter variables and constants
  */
 static MALLOC_DEFINE(M_BWMETER, "bwmeter", "multicast upcall bw meters");
 /*
  * Pending timeouts are stored in a hash table, the key being the
  * expiration time. Periodically, the entries are analysed and processed.
  */
 #define	BW_METER_BUCKETS	1024
 static VNET_DEFINE(struct bw_meter*, bw_meter_timers[BW_METER_BUCKETS]);
 #define	V_bw_meter_timers	VNET(bw_meter_timers)
 static VNET_DEFINE(struct callout, bw_meter_ch);
 #define	V_bw_meter_ch		VNET(bw_meter_ch)
 #define	BW_METER_PERIOD (hz)		/* periodical handling of bw meters */
 
 /*
  * Pending upcalls are stored in a vector which is flushed when
  * full, or periodically
  */
 static VNET_DEFINE(struct bw_upcall, bw_upcalls[BW_UPCALLS_MAX]);
 #define	V_bw_upcalls		VNET(bw_upcalls)
 static VNET_DEFINE(u_int, bw_upcalls_n); /* # of pending upcalls */
 #define	V_bw_upcalls_n    	VNET(bw_upcalls_n)
 static VNET_DEFINE(struct callout, bw_upcalls_ch);
 #define	V_bw_upcalls_ch		VNET(bw_upcalls_ch)
 
 #define BW_UPCALLS_PERIOD (hz)		/* periodical flush of bw upcalls */
 
 static VNET_PCPUSTAT_DEFINE(struct pimstat, pimstat);
 VNET_PCPUSTAT_SYSINIT(pimstat);
 VNET_PCPUSTAT_SYSUNINIT(pimstat);
 
 SYSCTL_NODE(_net_inet, IPPROTO_PIM, pim, CTLFLAG_RW, 0, "PIM");
 SYSCTL_VNET_PCPUSTAT(_net_inet_pim, PIMCTL_STATS, stats, struct pimstat,
     pimstat, "PIM Statistics (struct pimstat, netinet/pim_var.h)");
 
 static u_long	pim_squelch_wholepkt = 0;
 SYSCTL_ULONG(_net_inet_pim, OID_AUTO, squelch_wholepkt, CTLFLAG_RW,
     &pim_squelch_wholepkt, 0,
     "Disable IGMP_WHOLEPKT notifications if rendezvous point is unspecified");
 
 extern  struct domain inetdomain;
 static const struct protosw in_pim_protosw = {
 	.pr_type =		SOCK_RAW,
 	.pr_domain =		&inetdomain,
 	.pr_protocol =		IPPROTO_PIM,
 	.pr_flags =		PR_ATOMIC|PR_ADDR|PR_LASTHDR,
 	.pr_input =		pim_input,
 	.pr_output =		rip_output,
 	.pr_ctloutput =		rip_ctloutput,
 	.pr_usrreqs =		&rip_usrreqs
 };
 static const struct encaptab *pim_encap_cookie;
 
 static int pim_encapcheck(const struct mbuf *, int, int, void *);
 
 /*
  * Note: the PIM Register encapsulation adds the following in front of a
  * data packet:
  *
  * struct pim_encap_hdr {
  *    struct ip ip;
  *    struct pim_encap_pimhdr  pim;
  * }
  *
  */
 
 struct pim_encap_pimhdr {
 	struct pim pim;
 	uint32_t   flags;
 };
 #define		PIM_ENCAP_TTL	64
 
 static struct ip pim_encap_iphdr = {
 #if BYTE_ORDER == LITTLE_ENDIAN
 	sizeof(struct ip) >> 2,
 	IPVERSION,
 #else
 	IPVERSION,
 	sizeof(struct ip) >> 2,
 #endif
 	0,			/* tos */
 	sizeof(struct ip),	/* total length */
 	0,			/* id */
 	0,			/* frag offset */
 	PIM_ENCAP_TTL,
 	IPPROTO_PIM,
 	0,			/* checksum */
 };
 
 static struct pim_encap_pimhdr pim_encap_pimhdr = {
     {
 	PIM_MAKE_VT(PIM_VERSION, PIM_REGISTER), /* PIM vers and message type */
 	0,			/* reserved */
 	0,			/* checksum */
     },
     0				/* flags */
 };
 
 static VNET_DEFINE(vifi_t, reg_vif_num) = VIFI_INVALID;
 #define	V_reg_vif_num		VNET(reg_vif_num)
 static VNET_DEFINE(struct ifnet, multicast_register_if);
 #define	V_multicast_register_if	VNET(multicast_register_if)
 
 /*
  * Private variables.
  */
 
 static u_long	X_ip_mcast_src(int);
 static int	X_ip_mforward(struct ip *, struct ifnet *, struct mbuf *,
 		    struct ip_moptions *);
 static int	X_ip_mrouter_done(void);
 static int	X_ip_mrouter_get(struct socket *, struct sockopt *);
 static int	X_ip_mrouter_set(struct socket *, struct sockopt *);
 static int	X_legal_vif_num(int);
 static int	X_mrt_ioctl(u_long, caddr_t, int);
 
 static int	add_bw_upcall(struct bw_upcall *);
 static int	add_mfc(struct mfcctl2 *);
 static int	add_vif(struct vifctl *);
 static void	bw_meter_prepare_upcall(struct bw_meter *, struct timeval *);
 static void	bw_meter_process(void);
 static void	bw_meter_receive_packet(struct bw_meter *, int,
 		    struct timeval *);
 static void	bw_upcalls_send(void);
 static int	del_bw_upcall(struct bw_upcall *);
 static int	del_mfc(struct mfcctl2 *);
 static int	del_vif(vifi_t);
 static int	del_vif_locked(vifi_t);
 static void	expire_bw_meter_process(void *);
 static void	expire_bw_upcalls_send(void *);
 static void	expire_mfc(struct mfc *);
 static void	expire_upcalls(void *);
 static void	free_bw_list(struct bw_meter *);
 static int	get_sg_cnt(struct sioc_sg_req *);
 static int	get_vif_cnt(struct sioc_vif_req *);
 static void	if_detached_event(void *, struct ifnet *);
 static int	ip_mdq(struct mbuf *, struct ifnet *, struct mfc *, vifi_t);
 static int	ip_mrouter_init(struct socket *, int);
 static __inline struct mfc *
 		mfc_find(struct in_addr *, struct in_addr *);
 static void	phyint_send(struct ip *, struct vif *, struct mbuf *);
 static struct mbuf *
 		pim_register_prepare(struct ip *, struct mbuf *);
 static int	pim_register_send(struct ip *, struct vif *,
 		    struct mbuf *, struct mfc *);
 static int	pim_register_send_rp(struct ip *, struct vif *,
 		    struct mbuf *, struct mfc *);
 static int	pim_register_send_upcall(struct ip *, struct vif *,
 		    struct mbuf *, struct mfc *);
 static void	schedule_bw_meter(struct bw_meter *, struct timeval *);
 static void	send_packet(struct vif *, struct mbuf *);
 static int	set_api_config(uint32_t *);
 static int	set_assert(int);
 static int	socket_send(struct socket *, struct mbuf *,
 		    struct sockaddr_in *);
 static void	unschedule_bw_meter(struct bw_meter *);
 
 /*
  * Kernel multicast forwarding API capabilities and setup.
  * If more API capabilities are added to the kernel, they should be
  * recorded in `mrt_api_support'.
  */
 #define MRT_API_VERSION		0x0305
 
 static const int mrt_api_version = MRT_API_VERSION;
 static const uint32_t mrt_api_support = (MRT_MFC_FLAGS_DISABLE_WRONGVIF |
 					 MRT_MFC_FLAGS_BORDER_VIF |
 					 MRT_MFC_RP |
 					 MRT_MFC_BW_UPCALL);
 static VNET_DEFINE(uint32_t, mrt_api_config);
 #define	V_mrt_api_config	VNET(mrt_api_config)
 static VNET_DEFINE(int, pim_assert_enabled);
 #define	V_pim_assert_enabled	VNET(pim_assert_enabled)
 static struct timeval pim_assert_interval = { 3, 0 };	/* Rate limit */
 
 /*
  * Find a route for a given origin IP address and multicast group address.
  * Statistics must be updated by the caller.
  */
 static __inline struct mfc *
 mfc_find(struct in_addr *o, struct in_addr *g)
 {
 	struct mfc *rt;
 
 	MFC_LOCK_ASSERT();
 
 	LIST_FOREACH(rt, &V_mfchashtbl[MFCHASH(*o, *g)], mfc_hash) {
 		if (in_hosteq(rt->mfc_origin, *o) &&
 		    in_hosteq(rt->mfc_mcastgrp, *g) &&
 		    TAILQ_EMPTY(&rt->mfc_stall))
 			break;
 	}
 
 	return (rt);
 }
 
 /*
  * Handle MRT setsockopt commands to modify the multicast forwarding tables.
  */
 static int
 X_ip_mrouter_set(struct socket *so, struct sockopt *sopt)
 {
     int	error, optval;
     vifi_t	vifi;
     struct	vifctl vifc;
     struct	mfcctl2 mfc;
     struct	bw_upcall bw_upcall;
     uint32_t	i;
 
     if (so != V_ip_mrouter && sopt->sopt_name != MRT_INIT)
 	return EPERM;
 
     error = 0;
     switch (sopt->sopt_name) {
     case MRT_INIT:
 	error = sooptcopyin(sopt, &optval, sizeof optval, sizeof optval);
 	if (error)
 	    break;
 	error = ip_mrouter_init(so, optval);
 	break;
 
     case MRT_DONE:
 	error = ip_mrouter_done();
 	break;
 
     case MRT_ADD_VIF:
 	error = sooptcopyin(sopt, &vifc, sizeof vifc, sizeof vifc);
 	if (error)
 	    break;
 	error = add_vif(&vifc);
 	break;
 
     case MRT_DEL_VIF:
 	error = sooptcopyin(sopt, &vifi, sizeof vifi, sizeof vifi);
 	if (error)
 	    break;
 	error = del_vif(vifi);
 	break;
 
     case MRT_ADD_MFC:
     case MRT_DEL_MFC:
 	/*
 	 * select data size depending on API version.
 	 */
 	if (sopt->sopt_name == MRT_ADD_MFC &&
 		V_mrt_api_config & MRT_API_FLAGS_ALL) {
 	    error = sooptcopyin(sopt, &mfc, sizeof(struct mfcctl2),
 				sizeof(struct mfcctl2));
 	} else {
 	    error = sooptcopyin(sopt, &mfc, sizeof(struct mfcctl),
 				sizeof(struct mfcctl));
 	    bzero((caddr_t)&mfc + sizeof(struct mfcctl),
 			sizeof(mfc) - sizeof(struct mfcctl));
 	}
 	if (error)
 	    break;
 	if (sopt->sopt_name == MRT_ADD_MFC)
 	    error = add_mfc(&mfc);
 	else
 	    error = del_mfc(&mfc);
 	break;
 
     case MRT_ASSERT:
 	error = sooptcopyin(sopt, &optval, sizeof optval, sizeof optval);
 	if (error)
 	    break;
 	set_assert(optval);
 	break;
 
     case MRT_API_CONFIG:
 	error = sooptcopyin(sopt, &i, sizeof i, sizeof i);
 	if (!error)
 	    error = set_api_config(&i);
 	if (!error)
 	    error = sooptcopyout(sopt, &i, sizeof i);
 	break;
 
     case MRT_ADD_BW_UPCALL:
     case MRT_DEL_BW_UPCALL:
 	error = sooptcopyin(sopt, &bw_upcall, sizeof bw_upcall,
 				sizeof bw_upcall);
 	if (error)
 	    break;
 	if (sopt->sopt_name == MRT_ADD_BW_UPCALL)
 	    error = add_bw_upcall(&bw_upcall);
 	else
 	    error = del_bw_upcall(&bw_upcall);
 	break;
 
     default:
 	error = EOPNOTSUPP;
 	break;
     }
     return error;
 }
 
 /*
  * Handle MRT getsockopt commands
  */
 static int
 X_ip_mrouter_get(struct socket *so, struct sockopt *sopt)
 {
     int error;
 
     switch (sopt->sopt_name) {
     case MRT_VERSION:
 	error = sooptcopyout(sopt, &mrt_api_version, sizeof mrt_api_version);
 	break;
 
     case MRT_ASSERT:
 	error = sooptcopyout(sopt, &V_pim_assert_enabled,
 	    sizeof V_pim_assert_enabled);
 	break;
 
     case MRT_API_SUPPORT:
 	error = sooptcopyout(sopt, &mrt_api_support, sizeof mrt_api_support);
 	break;
 
     case MRT_API_CONFIG:
 	error = sooptcopyout(sopt, &V_mrt_api_config, sizeof V_mrt_api_config);
 	break;
 
     default:
 	error = EOPNOTSUPP;
 	break;
     }
     return error;
 }
 
 /*
  * Handle ioctl commands to obtain information from the cache
  */
 static int
 X_mrt_ioctl(u_long cmd, caddr_t data, int fibnum __unused)
 {
     int error = 0;
 
     /*
      * Currently the only function calling this ioctl routine is rtioctl_fib().
      * Typically, only root can create the raw socket in order to execute
      * this ioctl method, however the request might be coming from a prison
      */
     error = priv_check(curthread, PRIV_NETINET_MROUTE);
     if (error)
 	return (error);
     switch (cmd) {
     case (SIOCGETVIFCNT):
 	error = get_vif_cnt((struct sioc_vif_req *)data);
 	break;
 
     case (SIOCGETSGCNT):
 	error = get_sg_cnt((struct sioc_sg_req *)data);
 	break;
 
     default:
 	error = EINVAL;
 	break;
     }
     return error;
 }
 
 /*
  * returns the packet, byte, rpf-failure count for the source group provided
  */
 static int
 get_sg_cnt(struct sioc_sg_req *req)
 {
     struct mfc *rt;
 
     MFC_LOCK();
     rt = mfc_find(&req->src, &req->grp);
     if (rt == NULL) {
 	MFC_UNLOCK();
 	req->pktcnt = req->bytecnt = req->wrong_if = 0xffffffff;
 	return EADDRNOTAVAIL;
     }
     req->pktcnt = rt->mfc_pkt_cnt;
     req->bytecnt = rt->mfc_byte_cnt;
     req->wrong_if = rt->mfc_wrong_if;
     MFC_UNLOCK();
     return 0;
 }
 
 /*
  * returns the input and output packet and byte counts on the vif provided
  */
 static int
 get_vif_cnt(struct sioc_vif_req *req)
 {
     vifi_t vifi = req->vifi;
 
     VIF_LOCK();
     if (vifi >= V_numvifs) {
 	VIF_UNLOCK();
 	return EINVAL;
     }
 
     req->icount = V_viftable[vifi].v_pkt_in;
     req->ocount = V_viftable[vifi].v_pkt_out;
     req->ibytes = V_viftable[vifi].v_bytes_in;
     req->obytes = V_viftable[vifi].v_bytes_out;
     VIF_UNLOCK();
 
     return 0;
 }
 
 static void
 if_detached_event(void *arg __unused, struct ifnet *ifp)
 {
     vifi_t vifi;
     u_long i;
 
     MROUTER_LOCK();
 
     if (V_ip_mrouter == NULL) {
 	MROUTER_UNLOCK();
 	return;
     }
 
     VIF_LOCK();
     MFC_LOCK();
 
     /*
      * Tear down multicast forwarder state associated with this ifnet.
      * 1. Walk the vif list, matching vifs against this ifnet.
      * 2. Walk the multicast forwarding cache (mfc) looking for
      *    inner matches with this vif's index.
      * 3. Expire any matching multicast forwarding cache entries.
      * 4. Free vif state. This should disable ALLMULTI on the interface.
      */
     for (vifi = 0; vifi < V_numvifs; vifi++) {
 	if (V_viftable[vifi].v_ifp != ifp)
 		continue;
 	for (i = 0; i < mfchashsize; i++) {
 		struct mfc *rt, *nrt;
 
 		LIST_FOREACH_SAFE(rt, &V_mfchashtbl[i], mfc_hash, nrt) {
 			if (rt->mfc_parent == vifi) {
 				expire_mfc(rt);
 			}
 		}
 	}
 	del_vif_locked(vifi);
     }
 
     MFC_UNLOCK();
     VIF_UNLOCK();
 
     MROUTER_UNLOCK();
 }
                         
 /*
  * Enable multicast forwarding.
  */
 static int
 ip_mrouter_init(struct socket *so, int version)
 {
 
     CTR3(KTR_IPMF, "%s: so_type %d, pr_protocol %d", __func__,
         so->so_type, so->so_proto->pr_protocol);
 
     if (so->so_type != SOCK_RAW || so->so_proto->pr_protocol != IPPROTO_IGMP)
 	return EOPNOTSUPP;
 
     if (version != 1)
 	return ENOPROTOOPT;
 
     MROUTER_LOCK();
 
     if (ip_mrouter_unloading) {
 	MROUTER_UNLOCK();
 	return ENOPROTOOPT;
     }
 
     if (V_ip_mrouter != NULL) {
 	MROUTER_UNLOCK();
 	return EADDRINUSE;
     }
 
     V_mfchashtbl = hashinit_flags(mfchashsize, M_MRTABLE, &V_mfchash,
 	HASH_NOWAIT);
 
     callout_reset(&V_expire_upcalls_ch, EXPIRE_TIMEOUT, expire_upcalls,
 	curvnet);
     callout_reset(&V_bw_upcalls_ch, BW_UPCALLS_PERIOD, expire_bw_upcalls_send,
 	curvnet);
     callout_reset(&V_bw_meter_ch, BW_METER_PERIOD, expire_bw_meter_process,
 	curvnet);
 
     V_ip_mrouter = so;
     ip_mrouter_cnt++;
 
     MROUTER_UNLOCK();
 
     CTR1(KTR_IPMF, "%s: done", __func__);
 
     return 0;
 }
 
 /*
  * Disable multicast forwarding.
  */
 static int
 X_ip_mrouter_done(void)
 {
     struct ifnet *ifp;
     u_long i;
     vifi_t vifi;
 
     MROUTER_LOCK();
 
     if (V_ip_mrouter == NULL) {
 	MROUTER_UNLOCK();
 	return EINVAL;
     }
 
     /*
      * Detach/disable hooks to the reset of the system.
      */
     V_ip_mrouter = NULL;
     ip_mrouter_cnt--;
     V_mrt_api_config = 0;
 
     VIF_LOCK();
 
     /*
      * For each phyint in use, disable promiscuous reception of all IP
      * multicasts.
      */
     for (vifi = 0; vifi < V_numvifs; vifi++) {
 	if (!in_nullhost(V_viftable[vifi].v_lcl_addr) &&
 		!(V_viftable[vifi].v_flags & (VIFF_TUNNEL | VIFF_REGISTER))) {
 	    ifp = V_viftable[vifi].v_ifp;
 	    if_allmulti(ifp, 0);
 	}
     }
     bzero((caddr_t)V_viftable, sizeof(V_viftable));
     V_numvifs = 0;
     V_pim_assert_enabled = 0;
     
     VIF_UNLOCK();
 
     callout_stop(&V_expire_upcalls_ch);
     callout_stop(&V_bw_upcalls_ch);
     callout_stop(&V_bw_meter_ch);
 
     MFC_LOCK();
 
     /*
      * Free all multicast forwarding cache entries.
      * Do not use hashdestroy(), as we must perform other cleanup.
      */
     for (i = 0; i < mfchashsize; i++) {
 	struct mfc *rt, *nrt;
 
 	LIST_FOREACH_SAFE(rt, &V_mfchashtbl[i], mfc_hash, nrt) {
 		expire_mfc(rt);
 	}
     }
     free(V_mfchashtbl, M_MRTABLE);
     V_mfchashtbl = NULL;
 
     bzero(V_nexpire, sizeof(V_nexpire[0]) * mfchashsize);
 
     V_bw_upcalls_n = 0;
     bzero(V_bw_meter_timers, sizeof(V_bw_meter_timers));
 
     MFC_UNLOCK();
 
     V_reg_vif_num = VIFI_INVALID;
 
     MROUTER_UNLOCK();
 
     CTR1(KTR_IPMF, "%s: done", __func__);
 
     return 0;
 }
 
 /*
  * Set PIM assert processing global
  */
 static int
 set_assert(int i)
 {
     if ((i != 1) && (i != 0))
 	return EINVAL;
 
     V_pim_assert_enabled = i;
 
     return 0;
 }
 
 /*
  * Configure API capabilities
  */
 int
 set_api_config(uint32_t *apival)
 {
     u_long i;
 
     /*
      * We can set the API capabilities only if it is the first operation
      * after MRT_INIT. I.e.:
      *  - there are no vifs installed
      *  - pim_assert is not enabled
      *  - the MFC table is empty
      */
     if (V_numvifs > 0) {
 	*apival = 0;
 	return EPERM;
     }
     if (V_pim_assert_enabled) {
 	*apival = 0;
 	return EPERM;
     }
 
     MFC_LOCK();
 
     for (i = 0; i < mfchashsize; i++) {
 	if (LIST_FIRST(&V_mfchashtbl[i]) != NULL) {
 	    MFC_UNLOCK();
 	    *apival = 0;
 	    return EPERM;
 	}
     }
 
     MFC_UNLOCK();
 
     V_mrt_api_config = *apival & mrt_api_support;
     *apival = V_mrt_api_config;
 
     return 0;
 }
 
 /*
  * Add a vif to the vif table
  */
 static int
 add_vif(struct vifctl *vifcp)
 {
     struct vif *vifp = V_viftable + vifcp->vifc_vifi;
     struct sockaddr_in sin = {sizeof sin, AF_INET};
     struct ifaddr *ifa;
     struct ifnet *ifp;
     int error;
 
     VIF_LOCK();
     if (vifcp->vifc_vifi >= MAXVIFS) {
 	VIF_UNLOCK();
 	return EINVAL;
     }
     /* rate limiting is no longer supported by this code */
     if (vifcp->vifc_rate_limit != 0) {
 	log(LOG_ERR, "rate limiting is no longer supported\n");
 	VIF_UNLOCK();
 	return EINVAL;
     }
     if (!in_nullhost(vifp->v_lcl_addr)) {
 	VIF_UNLOCK();
 	return EADDRINUSE;
     }
     if (in_nullhost(vifcp->vifc_lcl_addr)) {
 	VIF_UNLOCK();
 	return EADDRNOTAVAIL;
     }
 
     /* Find the interface with an address in AF_INET family */
     if (vifcp->vifc_flags & VIFF_REGISTER) {
 	/*
 	 * XXX: Because VIFF_REGISTER does not really need a valid
 	 * local interface (e.g. it could be 127.0.0.2), we don't
 	 * check its address.
 	 */
 	ifp = NULL;
     } else {
 	sin.sin_addr = vifcp->vifc_lcl_addr;
 	ifa = ifa_ifwithaddr((struct sockaddr *)&sin);
 	if (ifa == NULL) {
 	    VIF_UNLOCK();
 	    return EADDRNOTAVAIL;
 	}
 	ifp = ifa->ifa_ifp;
 	ifa_free(ifa);
     }
 
     if ((vifcp->vifc_flags & VIFF_TUNNEL) != 0) {
 	CTR1(KTR_IPMF, "%s: tunnels are no longer supported", __func__);
 	VIF_UNLOCK();
 	return EOPNOTSUPP;
     } else if (vifcp->vifc_flags & VIFF_REGISTER) {
 	ifp = &V_multicast_register_if;
 	CTR2(KTR_IPMF, "%s: add register vif for ifp %p", __func__, ifp);
 	if (V_reg_vif_num == VIFI_INVALID) {
 	    if_initname(&V_multicast_register_if, "register_vif", 0);
 	    V_multicast_register_if.if_flags = IFF_LOOPBACK;
 	    V_reg_vif_num = vifcp->vifc_vifi;
 	}
     } else {		/* Make sure the interface supports multicast */
 	if ((ifp->if_flags & IFF_MULTICAST) == 0) {
 	    VIF_UNLOCK();
 	    return EOPNOTSUPP;
 	}
 
 	/* Enable promiscuous reception of all IP multicasts from the if */
 	error = if_allmulti(ifp, 1);
 	if (error) {
 	    VIF_UNLOCK();
 	    return error;
 	}
     }
 
     vifp->v_flags     = vifcp->vifc_flags;
     vifp->v_threshold = vifcp->vifc_threshold;
     vifp->v_lcl_addr  = vifcp->vifc_lcl_addr;
     vifp->v_rmt_addr  = vifcp->vifc_rmt_addr;
     vifp->v_ifp       = ifp;
     /* initialize per vif pkt counters */
     vifp->v_pkt_in    = 0;
     vifp->v_pkt_out   = 0;
     vifp->v_bytes_in  = 0;
     vifp->v_bytes_out = 0;
 
     /* Adjust numvifs up if the vifi is higher than numvifs */
     if (V_numvifs <= vifcp->vifc_vifi)
 	V_numvifs = vifcp->vifc_vifi + 1;
 
     VIF_UNLOCK();
 
     CTR4(KTR_IPMF, "%s: add vif %d laddr %s thresh %x", __func__,
 	(int)vifcp->vifc_vifi, inet_ntoa(vifcp->vifc_lcl_addr),
 	(int)vifcp->vifc_threshold);
 
     return 0;
 }
 
 /*
  * Delete a vif from the vif table
  */
 static int
 del_vif_locked(vifi_t vifi)
 {
     struct vif *vifp;
 
     VIF_LOCK_ASSERT();
 
     if (vifi >= V_numvifs) {
 	return EINVAL;
     }
     vifp = &V_viftable[vifi];
     if (in_nullhost(vifp->v_lcl_addr)) {
 	return EADDRNOTAVAIL;
     }
 
     if (!(vifp->v_flags & (VIFF_TUNNEL | VIFF_REGISTER)))
 	if_allmulti(vifp->v_ifp, 0);
 
     if (vifp->v_flags & VIFF_REGISTER)
 	V_reg_vif_num = VIFI_INVALID;
 
     bzero((caddr_t)vifp, sizeof (*vifp));
 
     CTR2(KTR_IPMF, "%s: delete vif %d", __func__, (int)vifi);
 
     /* Adjust numvifs down */
     for (vifi = V_numvifs; vifi > 0; vifi--)
 	if (!in_nullhost(V_viftable[vifi-1].v_lcl_addr))
 	    break;
     V_numvifs = vifi;
 
     return 0;
 }
 
 static int
 del_vif(vifi_t vifi)
 {
     int cc;
 
     VIF_LOCK();
     cc = del_vif_locked(vifi);
     VIF_UNLOCK();
 
     return cc;
 }
 
 /*
  * update an mfc entry without resetting counters and S,G addresses.
  */
 static void
 update_mfc_params(struct mfc *rt, struct mfcctl2 *mfccp)
 {
     int i;
 
     rt->mfc_parent = mfccp->mfcc_parent;
     for (i = 0; i < V_numvifs; i++) {
 	rt->mfc_ttls[i] = mfccp->mfcc_ttls[i];
 	rt->mfc_flags[i] = mfccp->mfcc_flags[i] & V_mrt_api_config &
 	    MRT_MFC_FLAGS_ALL;
     }
     /* set the RP address */
     if (V_mrt_api_config & MRT_MFC_RP)
 	rt->mfc_rp = mfccp->mfcc_rp;
     else
 	rt->mfc_rp.s_addr = INADDR_ANY;
 }
 
 /*
  * fully initialize an mfc entry from the parameter.
  */
 static void
 init_mfc_params(struct mfc *rt, struct mfcctl2 *mfccp)
 {
     rt->mfc_origin     = mfccp->mfcc_origin;
     rt->mfc_mcastgrp   = mfccp->mfcc_mcastgrp;
 
     update_mfc_params(rt, mfccp);
 
     /* initialize pkt counters per src-grp */
     rt->mfc_pkt_cnt    = 0;
     rt->mfc_byte_cnt   = 0;
     rt->mfc_wrong_if   = 0;
     timevalclear(&rt->mfc_last_assert);
 }
 
 static void
 expire_mfc(struct mfc *rt)
 {
 	struct rtdetq *rte, *nrte;
 
 	MFC_LOCK_ASSERT();
 
 	free_bw_list(rt->mfc_bw_meter);
 
 	TAILQ_FOREACH_SAFE(rte, &rt->mfc_stall, rte_link, nrte) {
 		m_freem(rte->m);
 		TAILQ_REMOVE(&rt->mfc_stall, rte, rte_link);
 		free(rte, M_MRTABLE);
 	}
 
 	LIST_REMOVE(rt, mfc_hash);
 	free(rt, M_MRTABLE);
 }
 
 /*
  * Add an mfc entry
  */
 static int
 add_mfc(struct mfcctl2 *mfccp)
 {
     struct mfc *rt;
     struct rtdetq *rte, *nrte;
     u_long hash = 0;
     u_short nstl;
 
     VIF_LOCK();
     MFC_LOCK();
 
     rt = mfc_find(&mfccp->mfcc_origin, &mfccp->mfcc_mcastgrp);
 
     /* If an entry already exists, just update the fields */
     if (rt) {
 	CTR4(KTR_IPMF, "%s: update mfc orig %s group %lx parent %x",
 	    __func__, inet_ntoa(mfccp->mfcc_origin),
 	    (u_long)ntohl(mfccp->mfcc_mcastgrp.s_addr),
 	    mfccp->mfcc_parent);
 	update_mfc_params(rt, mfccp);
 	MFC_UNLOCK();
 	VIF_UNLOCK();
 	return (0);
     }
 
     /*
      * Find the entry for which the upcall was made and update
      */
     nstl = 0;
     hash = MFCHASH(mfccp->mfcc_origin, mfccp->mfcc_mcastgrp);
     LIST_FOREACH(rt, &V_mfchashtbl[hash], mfc_hash) {
 	if (in_hosteq(rt->mfc_origin, mfccp->mfcc_origin) &&
 	    in_hosteq(rt->mfc_mcastgrp, mfccp->mfcc_mcastgrp) &&
 	    !TAILQ_EMPTY(&rt->mfc_stall)) {
 		CTR5(KTR_IPMF,
 		    "%s: add mfc orig %s group %lx parent %x qh %p",
 		    __func__, inet_ntoa(mfccp->mfcc_origin),
 		    (u_long)ntohl(mfccp->mfcc_mcastgrp.s_addr),
 		    mfccp->mfcc_parent,
 		    TAILQ_FIRST(&rt->mfc_stall));
 		if (nstl++)
 			CTR1(KTR_IPMF, "%s: multiple matches", __func__);
 
 		init_mfc_params(rt, mfccp);
 		rt->mfc_expire = 0;	/* Don't clean this guy up */
 		V_nexpire[hash]--;
 
 		/* Free queued packets, but attempt to forward them first. */
 		TAILQ_FOREACH_SAFE(rte, &rt->mfc_stall, rte_link, nrte) {
 			if (rte->ifp != NULL)
 				ip_mdq(rte->m, rte->ifp, rt, -1);
 			m_freem(rte->m);
 			TAILQ_REMOVE(&rt->mfc_stall, rte, rte_link);
 			rt->mfc_nstall--;
 			free(rte, M_MRTABLE);
 		}
 	}
     }
 
     /*
      * It is possible that an entry is being inserted without an upcall
      */
     if (nstl == 0) {
 	CTR1(KTR_IPMF, "%s: adding mfc w/o upcall", __func__);
 	LIST_FOREACH(rt, &V_mfchashtbl[hash], mfc_hash) {
 		if (in_hosteq(rt->mfc_origin, mfccp->mfcc_origin) &&
 		    in_hosteq(rt->mfc_mcastgrp, mfccp->mfcc_mcastgrp)) {
 			init_mfc_params(rt, mfccp);
 			if (rt->mfc_expire)
 			    V_nexpire[hash]--;
 			rt->mfc_expire = 0;
 			break; /* XXX */
 		}
 	}
 
 	if (rt == NULL) {		/* no upcall, so make a new entry */
 	    rt = (struct mfc *)malloc(sizeof(*rt), M_MRTABLE, M_NOWAIT);
 	    if (rt == NULL) {
 		MFC_UNLOCK();
 		VIF_UNLOCK();
 		return (ENOBUFS);
 	    }
 
 	    init_mfc_params(rt, mfccp);
 	    TAILQ_INIT(&rt->mfc_stall);
 	    rt->mfc_nstall = 0;
 
 	    rt->mfc_expire     = 0;
 	    rt->mfc_bw_meter = NULL;
 
 	    /* insert new entry at head of hash chain */
 	    LIST_INSERT_HEAD(&V_mfchashtbl[hash], rt, mfc_hash);
 	}
     }
 
     MFC_UNLOCK();
     VIF_UNLOCK();
 
     return (0);
 }
 
 /*
  * Delete an mfc entry
  */
 static int
 del_mfc(struct mfcctl2 *mfccp)
 {
     struct in_addr	origin;
     struct in_addr	mcastgrp;
     struct mfc		*rt;
 
     origin = mfccp->mfcc_origin;
     mcastgrp = mfccp->mfcc_mcastgrp;
 
     CTR3(KTR_IPMF, "%s: delete mfc orig %s group %lx", __func__,
 	inet_ntoa(origin), (u_long)ntohl(mcastgrp.s_addr));
 
     MFC_LOCK();
 
     rt = mfc_find(&origin, &mcastgrp);
     if (rt == NULL) {
 	MFC_UNLOCK();
 	return EADDRNOTAVAIL;
     }
 
     /*
      * free the bw_meter entries
      */
     free_bw_list(rt->mfc_bw_meter);
     rt->mfc_bw_meter = NULL;
 
     LIST_REMOVE(rt, mfc_hash);
     free(rt, M_MRTABLE);
 
     MFC_UNLOCK();
 
     return (0);
 }
 
 /*
  * Send a message to the routing daemon on the multicast routing socket.
  */
 static int
 socket_send(struct socket *s, struct mbuf *mm, struct sockaddr_in *src)
 {
     if (s) {
 	SOCKBUF_LOCK(&s->so_rcv);
 	if (sbappendaddr_locked(&s->so_rcv, (struct sockaddr *)src, mm,
 	    NULL) != 0) {
 	    sorwakeup_locked(s);
 	    return 0;
 	}
 	SOCKBUF_UNLOCK(&s->so_rcv);
     }
     m_freem(mm);
     return -1;
 }
 
 /*
  * IP multicast forwarding function. This function assumes that the packet
  * pointed to by "ip" has arrived on (or is about to be sent to) the interface
  * pointed to by "ifp", and the packet is to be relayed to other networks
  * that have members of the packet's destination IP multicast group.
  *
  * The packet is returned unscathed to the caller, unless it is
  * erroneous, in which case a non-zero return value tells the caller to
  * discard it.
  */
 
 #define TUNNEL_LEN  12  /* # bytes of IP option for tunnel encapsulation  */
 
 static int
 X_ip_mforward(struct ip *ip, struct ifnet *ifp, struct mbuf *m,
     struct ip_moptions *imo)
 {
     struct mfc *rt;
     int error;
     vifi_t vifi;
 
     CTR3(KTR_IPMF, "ip_mforward: delete mfc orig %s group %lx ifp %p",
 	inet_ntoa(ip->ip_src), (u_long)ntohl(ip->ip_dst.s_addr), ifp);
 
     if (ip->ip_hl < (sizeof(struct ip) + TUNNEL_LEN) >> 2 ||
 		((u_char *)(ip + 1))[1] != IPOPT_LSRR ) {
 	/*
 	 * Packet arrived via a physical interface or
 	 * an encapsulated tunnel or a register_vif.
 	 */
     } else {
 	/*
 	 * Packet arrived through a source-route tunnel.
 	 * Source-route tunnels are no longer supported.
 	 */
 	return (1);
     }
 
     VIF_LOCK();
     MFC_LOCK();
     if (imo && ((vifi = imo->imo_multicast_vif) < V_numvifs)) {
 	if (ip->ip_ttl < MAXTTL)
 	    ip->ip_ttl++;	/* compensate for -1 in *_send routines */
 	error = ip_mdq(m, ifp, NULL, vifi);
 	MFC_UNLOCK();
 	VIF_UNLOCK();
 	return error;
     }
 
     /*
      * Don't forward a packet with time-to-live of zero or one,
      * or a packet destined to a local-only group.
      */
     if (ip->ip_ttl <= 1 || IN_LOCAL_GROUP(ntohl(ip->ip_dst.s_addr))) {
 	MFC_UNLOCK();
 	VIF_UNLOCK();
 	return 0;
     }
 
     /*
      * Determine forwarding vifs from the forwarding cache table
      */
     MRTSTAT_INC(mrts_mfc_lookups);
     rt = mfc_find(&ip->ip_src, &ip->ip_dst);
 
     /* Entry exists, so forward if necessary */
     if (rt != NULL) {
 	error = ip_mdq(m, ifp, rt, -1);
 	MFC_UNLOCK();
 	VIF_UNLOCK();
 	return error;
     } else {
 	/*
 	 * If we don't have a route for packet's origin,
 	 * Make a copy of the packet & send message to routing daemon
 	 */
 
 	struct mbuf *mb0;
 	struct rtdetq *rte;
 	u_long hash;
 	int hlen = ip->ip_hl << 2;
 
 	MRTSTAT_INC(mrts_mfc_misses);
 	MRTSTAT_INC(mrts_no_route);
 	CTR2(KTR_IPMF, "ip_mforward: no mfc for (%s,%lx)",
 	    inet_ntoa(ip->ip_src), (u_long)ntohl(ip->ip_dst.s_addr));
 
 	/*
 	 * Allocate mbufs early so that we don't do extra work if we are
 	 * just going to fail anyway.  Make sure to pullup the header so
 	 * that other people can't step on it.
 	 */
 	rte = (struct rtdetq *)malloc((sizeof *rte), M_MRTABLE,
 	    M_NOWAIT|M_ZERO);
 	if (rte == NULL) {
 	    MFC_UNLOCK();
 	    VIF_UNLOCK();
 	    return ENOBUFS;
 	}
 
 	mb0 = m_copypacket(m, M_NOWAIT);
 	if (mb0 && (!M_WRITABLE(mb0) || mb0->m_len < hlen))
 	    mb0 = m_pullup(mb0, hlen);
 	if (mb0 == NULL) {
 	    free(rte, M_MRTABLE);
 	    MFC_UNLOCK();
 	    VIF_UNLOCK();
 	    return ENOBUFS;
 	}
 
 	/* is there an upcall waiting for this flow ? */
 	hash = MFCHASH(ip->ip_src, ip->ip_dst);
 	LIST_FOREACH(rt, &V_mfchashtbl[hash], mfc_hash) {
 		if (in_hosteq(ip->ip_src, rt->mfc_origin) &&
 		    in_hosteq(ip->ip_dst, rt->mfc_mcastgrp) &&
 		    !TAILQ_EMPTY(&rt->mfc_stall))
 			break;
 	}
 
 	if (rt == NULL) {
 	    int i;
 	    struct igmpmsg *im;
 	    struct sockaddr_in k_igmpsrc = { sizeof k_igmpsrc, AF_INET };
 	    struct mbuf *mm;
 
 	    /*
 	     * Locate the vifi for the incoming interface for this packet.
 	     * If none found, drop packet.
 	     */
 	    for (vifi = 0; vifi < V_numvifs &&
 		    V_viftable[vifi].v_ifp != ifp; vifi++)
 		;
 	    if (vifi >= V_numvifs)	/* vif not found, drop packet */
 		goto non_fatal;
 
 	    /* no upcall, so make a new entry */
 	    rt = (struct mfc *)malloc(sizeof(*rt), M_MRTABLE, M_NOWAIT);
 	    if (rt == NULL)
 		goto fail;
 
 	    /* Make a copy of the header to send to the user level process */
-	    mm = m_copy(mb0, 0, hlen);
+	    mm = m_copym(mb0, 0, hlen, M_NOWAIT);
 	    if (mm == NULL)
 		goto fail1;
 
 	    /*
 	     * Send message to routing daemon to install
 	     * a route into the kernel table
 	     */
 
 	    im = mtod(mm, struct igmpmsg *);
 	    im->im_msgtype = IGMPMSG_NOCACHE;
 	    im->im_mbz = 0;
 	    im->im_vif = vifi;
 
 	    MRTSTAT_INC(mrts_upcalls);
 
 	    k_igmpsrc.sin_addr = ip->ip_src;
 	    if (socket_send(V_ip_mrouter, mm, &k_igmpsrc) < 0) {
 		CTR0(KTR_IPMF, "ip_mforward: socket queue full");
 		MRTSTAT_INC(mrts_upq_sockfull);
 fail1:
 		free(rt, M_MRTABLE);
 fail:
 		free(rte, M_MRTABLE);
 		m_freem(mb0);
 		MFC_UNLOCK();
 		VIF_UNLOCK();
 		return ENOBUFS;
 	    }
 
 	    /* insert new entry at head of hash chain */
 	    rt->mfc_origin.s_addr     = ip->ip_src.s_addr;
 	    rt->mfc_mcastgrp.s_addr   = ip->ip_dst.s_addr;
 	    rt->mfc_expire	      = UPCALL_EXPIRE;
 	    V_nexpire[hash]++;
 	    for (i = 0; i < V_numvifs; i++) {
 		rt->mfc_ttls[i] = 0;
 		rt->mfc_flags[i] = 0;
 	    }
 	    rt->mfc_parent = -1;
 
 	    /* clear the RP address */
 	    rt->mfc_rp.s_addr = INADDR_ANY;
 	    rt->mfc_bw_meter = NULL;
 
 	    /* initialize pkt counters per src-grp */
 	    rt->mfc_pkt_cnt = 0;
 	    rt->mfc_byte_cnt = 0;
 	    rt->mfc_wrong_if = 0;
 	    timevalclear(&rt->mfc_last_assert);
 
 	    TAILQ_INIT(&rt->mfc_stall);
 	    rt->mfc_nstall = 0;
 
 	    /* link into table */
 	    LIST_INSERT_HEAD(&V_mfchashtbl[hash], rt, mfc_hash);
 	    TAILQ_INSERT_HEAD(&rt->mfc_stall, rte, rte_link);
 	    rt->mfc_nstall++;
 
 	} else {
 	    /* determine if queue has overflowed */
 	    if (rt->mfc_nstall > MAX_UPQ) {
 		MRTSTAT_INC(mrts_upq_ovflw);
 non_fatal:
 		free(rte, M_MRTABLE);
 		m_freem(mb0);
 		MFC_UNLOCK();
 		VIF_UNLOCK();
 		return (0);
 	    }
 	    TAILQ_INSERT_TAIL(&rt->mfc_stall, rte, rte_link);
 	    rt->mfc_nstall++;
 	}
 
 	rte->m			= mb0;
 	rte->ifp		= ifp;
 
 	MFC_UNLOCK();
 	VIF_UNLOCK();
 
 	return 0;
     }
 }
 
 /*
  * Clean up the cache entry if upcall is not serviced
  */
 static void
 expire_upcalls(void *arg)
 {
     u_long i;
 
     CURVNET_SET((struct vnet *) arg);
 
     MFC_LOCK();
 
     for (i = 0; i < mfchashsize; i++) {
 	struct mfc *rt, *nrt;
 
 	if (V_nexpire[i] == 0)
 	    continue;
 
 	LIST_FOREACH_SAFE(rt, &V_mfchashtbl[i], mfc_hash, nrt) {
 		if (TAILQ_EMPTY(&rt->mfc_stall))
 			continue;
 
 		if (rt->mfc_expire == 0 || --rt->mfc_expire > 0)
 			continue;
 
 		/*
 		 * free the bw_meter entries
 		 */
 		while (rt->mfc_bw_meter != NULL) {
 		    struct bw_meter *x = rt->mfc_bw_meter;
 
 		    rt->mfc_bw_meter = x->bm_mfc_next;
 		    free(x, M_BWMETER);
 		}
 
 		MRTSTAT_INC(mrts_cache_cleanups);
 		CTR3(KTR_IPMF, "%s: expire (%lx, %lx)", __func__,
 		    (u_long)ntohl(rt->mfc_origin.s_addr),
 		    (u_long)ntohl(rt->mfc_mcastgrp.s_addr));
 
 		expire_mfc(rt);
 	    }
     }
 
     MFC_UNLOCK();
 
     callout_reset(&V_expire_upcalls_ch, EXPIRE_TIMEOUT, expire_upcalls,
 	curvnet);
 
     CURVNET_RESTORE();
 }
 
 /*
  * Packet forwarding routine once entry in the cache is made
  */
 static int
 ip_mdq(struct mbuf *m, struct ifnet *ifp, struct mfc *rt, vifi_t xmt_vif)
 {
     struct ip  *ip = mtod(m, struct ip *);
     vifi_t vifi;
     int plen = ntohs(ip->ip_len);
 
     VIF_LOCK_ASSERT();
 
     /*
      * If xmt_vif is not -1, send on only the requested vif.
      *
      * (since vifi_t is u_short, -1 becomes MAXUSHORT, which > numvifs.)
      */
     if (xmt_vif < V_numvifs) {
 	if (V_viftable[xmt_vif].v_flags & VIFF_REGISTER)
 		pim_register_send(ip, V_viftable + xmt_vif, m, rt);
 	else
 		phyint_send(ip, V_viftable + xmt_vif, m);
 	return 1;
     }
 
     /*
      * Don't forward if it didn't arrive from the parent vif for its origin.
      */
     vifi = rt->mfc_parent;
     if ((vifi >= V_numvifs) || (V_viftable[vifi].v_ifp != ifp)) {
 	CTR4(KTR_IPMF, "%s: rx on wrong ifp %p (vifi %d, v_ifp %p)",
 	    __func__, ifp, (int)vifi, V_viftable[vifi].v_ifp);
 	MRTSTAT_INC(mrts_wrong_if);
 	++rt->mfc_wrong_if;
 	/*
 	 * If we are doing PIM assert processing, send a message
 	 * to the routing daemon.
 	 *
 	 * XXX: A PIM-SM router needs the WRONGVIF detection so it
 	 * can complete the SPT switch, regardless of the type
 	 * of the iif (broadcast media, GRE tunnel, etc).
 	 */
 	if (V_pim_assert_enabled && (vifi < V_numvifs) &&
 	    V_viftable[vifi].v_ifp) {
 
 	    if (ifp == &V_multicast_register_if)
 		PIMSTAT_INC(pims_rcv_registers_wrongiif);
 
 	    /* Get vifi for the incoming packet */
 	    for (vifi = 0; vifi < V_numvifs && V_viftable[vifi].v_ifp != ifp;
 		vifi++)
 		;
 	    if (vifi >= V_numvifs)
 		return 0;	/* The iif is not found: ignore the packet. */
 
 	    if (rt->mfc_flags[vifi] & MRT_MFC_FLAGS_DISABLE_WRONGVIF)
 		return 0;	/* WRONGVIF disabled: ignore the packet */
 
 	    if (ratecheck(&rt->mfc_last_assert, &pim_assert_interval)) {
 		struct sockaddr_in k_igmpsrc = { sizeof k_igmpsrc, AF_INET };
 		struct igmpmsg *im;
 		int hlen = ip->ip_hl << 2;
-		struct mbuf *mm = m_copy(m, 0, hlen);
+		struct mbuf *mm = m_copym(m, 0, hlen, M_NOWAIT);
 
 		if (mm && (!M_WRITABLE(mm) || mm->m_len < hlen))
 		    mm = m_pullup(mm, hlen);
 		if (mm == NULL)
 		    return ENOBUFS;
 
 		im = mtod(mm, struct igmpmsg *);
 		im->im_msgtype	= IGMPMSG_WRONGVIF;
 		im->im_mbz		= 0;
 		im->im_vif		= vifi;
 
 		MRTSTAT_INC(mrts_upcalls);
 
 		k_igmpsrc.sin_addr = im->im_src;
 		if (socket_send(V_ip_mrouter, mm, &k_igmpsrc) < 0) {
 		    CTR1(KTR_IPMF, "%s: socket queue full", __func__);
 		    MRTSTAT_INC(mrts_upq_sockfull);
 		    return ENOBUFS;
 		}
 	    }
 	}
 	return 0;
     }
 
 
     /* If I sourced this packet, it counts as output, else it was input. */
     if (in_hosteq(ip->ip_src, V_viftable[vifi].v_lcl_addr)) {
 	V_viftable[vifi].v_pkt_out++;
 	V_viftable[vifi].v_bytes_out += plen;
     } else {
 	V_viftable[vifi].v_pkt_in++;
 	V_viftable[vifi].v_bytes_in += plen;
     }
     rt->mfc_pkt_cnt++;
     rt->mfc_byte_cnt += plen;
 
     /*
      * For each vif, decide if a copy of the packet should be forwarded.
      * Forward if:
      *		- the ttl exceeds the vif's threshold
      *		- there are group members downstream on interface
      */
     for (vifi = 0; vifi < V_numvifs; vifi++)
 	if ((rt->mfc_ttls[vifi] > 0) && (ip->ip_ttl > rt->mfc_ttls[vifi])) {
 	    V_viftable[vifi].v_pkt_out++;
 	    V_viftable[vifi].v_bytes_out += plen;
 	    if (V_viftable[vifi].v_flags & VIFF_REGISTER)
 		pim_register_send(ip, V_viftable + vifi, m, rt);
 	    else
 		phyint_send(ip, V_viftable + vifi, m);
 	}
 
     /*
      * Perform upcall-related bw measuring.
      */
     if (rt->mfc_bw_meter != NULL) {
 	struct bw_meter *x;
 	struct timeval now;
 
 	microtime(&now);
 	MFC_LOCK_ASSERT();
 	for (x = rt->mfc_bw_meter; x != NULL; x = x->bm_mfc_next)
 	    bw_meter_receive_packet(x, plen, &now);
     }
 
     return 0;
 }
 
 /*
  * Check if a vif number is legal/ok. This is used by in_mcast.c.
  */
 static int
 X_legal_vif_num(int vif)
 {
 	int ret;
 
 	ret = 0;
 	if (vif < 0)
 		return (ret);
 
 	VIF_LOCK();
 	if (vif < V_numvifs)
 		ret = 1;
 	VIF_UNLOCK();
 
 	return (ret);
 }
 
 /*
  * Return the local address used by this vif
  */
 static u_long
 X_ip_mcast_src(int vifi)
 {
 	in_addr_t addr;
 
 	addr = INADDR_ANY;
 	if (vifi < 0)
 		return (addr);
 
 	VIF_LOCK();
 	if (vifi < V_numvifs)
 		addr = V_viftable[vifi].v_lcl_addr.s_addr;
 	VIF_UNLOCK();
 
 	return (addr);
 }
 
 static void
 phyint_send(struct ip *ip, struct vif *vifp, struct mbuf *m)
 {
     struct mbuf *mb_copy;
     int hlen = ip->ip_hl << 2;
 
     VIF_LOCK_ASSERT();
 
     /*
      * Make a new reference to the packet; make sure that
      * the IP header is actually copied, not just referenced,
      * so that ip_output() only scribbles on the copy.
      */
     mb_copy = m_copypacket(m, M_NOWAIT);
     if (mb_copy && (!M_WRITABLE(mb_copy) || mb_copy->m_len < hlen))
 	mb_copy = m_pullup(mb_copy, hlen);
     if (mb_copy == NULL)
 	return;
 
     send_packet(vifp, mb_copy);
 }
 
 static void
 send_packet(struct vif *vifp, struct mbuf *m)
 {
 	struct ip_moptions imo;
 	struct in_multi *imm[2];
 	int error;
 
 	VIF_LOCK_ASSERT();
 
 	imo.imo_multicast_ifp  = vifp->v_ifp;
 	imo.imo_multicast_ttl  = mtod(m, struct ip *)->ip_ttl - 1;
 	imo.imo_multicast_loop = 1;
 	imo.imo_multicast_vif  = -1;
 	imo.imo_num_memberships = 0;
 	imo.imo_max_memberships = 2;
 	imo.imo_membership  = &imm[0];
 
 	/*
 	 * Re-entrancy should not be a problem here, because
 	 * the packets that we send out and are looped back at us
 	 * should get rejected because they appear to come from
 	 * the loopback interface, thus preventing looping.
 	 */
 	error = ip_output(m, NULL, NULL, IP_FORWARDING, &imo, NULL);
 	CTR3(KTR_IPMF, "%s: vif %td err %d", __func__,
 	    (ptrdiff_t)(vifp - V_viftable), error);
 }
 
 /*
  * Stubs for old RSVP socket shim implementation.
  */
 
 static int
 X_ip_rsvp_vif(struct socket *so __unused, struct sockopt *sopt __unused)
 {
 
 	return (EOPNOTSUPP);
 }
 
 static void
 X_ip_rsvp_force_done(struct socket *so __unused)
 {
 
 }
 
 static int
 X_rsvp_input(struct mbuf **mp, int *offp, int proto)
 {
 	struct mbuf *m;
 
 	m = *mp;
 	*mp = NULL;
 	if (!V_rsvp_on)
 		m_freem(m);
 	return (IPPROTO_DONE);
 }
 
 /*
  * Code for bandwidth monitors
  */
 
 /*
  * Define common interface for timeval-related methods
  */
 #define	BW_TIMEVALCMP(tvp, uvp, cmp) timevalcmp((tvp), (uvp), cmp)
 #define	BW_TIMEVALDECR(vvp, uvp) timevalsub((vvp), (uvp))
 #define	BW_TIMEVALADD(vvp, uvp) timevaladd((vvp), (uvp))
 
 static uint32_t
 compute_bw_meter_flags(struct bw_upcall *req)
 {
     uint32_t flags = 0;
 
     if (req->bu_flags & BW_UPCALL_UNIT_PACKETS)
 	flags |= BW_METER_UNIT_PACKETS;
     if (req->bu_flags & BW_UPCALL_UNIT_BYTES)
 	flags |= BW_METER_UNIT_BYTES;
     if (req->bu_flags & BW_UPCALL_GEQ)
 	flags |= BW_METER_GEQ;
     if (req->bu_flags & BW_UPCALL_LEQ)
 	flags |= BW_METER_LEQ;
 
     return flags;
 }
 
 /*
  * Add a bw_meter entry
  */
 static int
 add_bw_upcall(struct bw_upcall *req)
 {
     struct mfc *mfc;
     struct timeval delta = { BW_UPCALL_THRESHOLD_INTERVAL_MIN_SEC,
 		BW_UPCALL_THRESHOLD_INTERVAL_MIN_USEC };
     struct timeval now;
     struct bw_meter *x;
     uint32_t flags;
 
     if (!(V_mrt_api_config & MRT_MFC_BW_UPCALL))
 	return EOPNOTSUPP;
 
     /* Test if the flags are valid */
     if (!(req->bu_flags & (BW_UPCALL_UNIT_PACKETS | BW_UPCALL_UNIT_BYTES)))
 	return EINVAL;
     if (!(req->bu_flags & (BW_UPCALL_GEQ | BW_UPCALL_LEQ)))
 	return EINVAL;
     if ((req->bu_flags & (BW_UPCALL_GEQ | BW_UPCALL_LEQ))
 	    == (BW_UPCALL_GEQ | BW_UPCALL_LEQ))
 	return EINVAL;
 
     /* Test if the threshold time interval is valid */
     if (BW_TIMEVALCMP(&req->bu_threshold.b_time, &delta, <))
 	return EINVAL;
 
     flags = compute_bw_meter_flags(req);
 
     /*
      * Find if we have already same bw_meter entry
      */
     MFC_LOCK();
     mfc = mfc_find(&req->bu_src, &req->bu_dst);
     if (mfc == NULL) {
 	MFC_UNLOCK();
 	return EADDRNOTAVAIL;
     }
     for (x = mfc->mfc_bw_meter; x != NULL; x = x->bm_mfc_next) {
 	if ((BW_TIMEVALCMP(&x->bm_threshold.b_time,
 			   &req->bu_threshold.b_time, ==)) &&
 	    (x->bm_threshold.b_packets == req->bu_threshold.b_packets) &&
 	    (x->bm_threshold.b_bytes == req->bu_threshold.b_bytes) &&
 	    (x->bm_flags & BW_METER_USER_FLAGS) == flags)  {
 	    MFC_UNLOCK();
 	    return 0;		/* XXX Already installed */
 	}
     }
 
     /* Allocate the new bw_meter entry */
     x = (struct bw_meter *)malloc(sizeof(*x), M_BWMETER, M_NOWAIT);
     if (x == NULL) {
 	MFC_UNLOCK();
 	return ENOBUFS;
     }
 
     /* Set the new bw_meter entry */
     x->bm_threshold.b_time = req->bu_threshold.b_time;
     microtime(&now);
     x->bm_start_time = now;
     x->bm_threshold.b_packets = req->bu_threshold.b_packets;
     x->bm_threshold.b_bytes = req->bu_threshold.b_bytes;
     x->bm_measured.b_packets = 0;
     x->bm_measured.b_bytes = 0;
     x->bm_flags = flags;
     x->bm_time_next = NULL;
     x->bm_time_hash = BW_METER_BUCKETS;
 
     /* Add the new bw_meter entry to the front of entries for this MFC */
     x->bm_mfc = mfc;
     x->bm_mfc_next = mfc->mfc_bw_meter;
     mfc->mfc_bw_meter = x;
     schedule_bw_meter(x, &now);
     MFC_UNLOCK();
 
     return 0;
 }
 
 static void
 free_bw_list(struct bw_meter *list)
 {
     while (list != NULL) {
 	struct bw_meter *x = list;
 
 	list = list->bm_mfc_next;
 	unschedule_bw_meter(x);
 	free(x, M_BWMETER);
     }
 }
 
 /*
  * Delete one or multiple bw_meter entries
  */
 static int
 del_bw_upcall(struct bw_upcall *req)
 {
     struct mfc *mfc;
     struct bw_meter *x;
 
     if (!(V_mrt_api_config & MRT_MFC_BW_UPCALL))
 	return EOPNOTSUPP;
 
     MFC_LOCK();
 
     /* Find the corresponding MFC entry */
     mfc = mfc_find(&req->bu_src, &req->bu_dst);
     if (mfc == NULL) {
 	MFC_UNLOCK();
 	return EADDRNOTAVAIL;
     } else if (req->bu_flags & BW_UPCALL_DELETE_ALL) {
 	/*
 	 * Delete all bw_meter entries for this mfc
 	 */
 	struct bw_meter *list;
 
 	list = mfc->mfc_bw_meter;
 	mfc->mfc_bw_meter = NULL;
 	free_bw_list(list);
 	MFC_UNLOCK();
 	return 0;
     } else {			/* Delete a single bw_meter entry */
 	struct bw_meter *prev;
 	uint32_t flags = 0;
 
 	flags = compute_bw_meter_flags(req);
 
 	/* Find the bw_meter entry to delete */
 	for (prev = NULL, x = mfc->mfc_bw_meter; x != NULL;
 	     prev = x, x = x->bm_mfc_next) {
 	    if ((BW_TIMEVALCMP(&x->bm_threshold.b_time,
 			       &req->bu_threshold.b_time, ==)) &&
 		(x->bm_threshold.b_packets == req->bu_threshold.b_packets) &&
 		(x->bm_threshold.b_bytes == req->bu_threshold.b_bytes) &&
 		(x->bm_flags & BW_METER_USER_FLAGS) == flags)
 		break;
 	}
 	if (x != NULL) { /* Delete entry from the list for this MFC */
 	    if (prev != NULL)
 		prev->bm_mfc_next = x->bm_mfc_next;	/* remove from middle*/
 	    else
 		x->bm_mfc->mfc_bw_meter = x->bm_mfc_next;/* new head of list */
 
 	    unschedule_bw_meter(x);
 	    MFC_UNLOCK();
 	    /* Free the bw_meter entry */
 	    free(x, M_BWMETER);
 	    return 0;
 	} else {
 	    MFC_UNLOCK();
 	    return EINVAL;
 	}
     }
     /* NOTREACHED */
 }
 
 /*
  * Perform bandwidth measurement processing that may result in an upcall
  */
 static void
 bw_meter_receive_packet(struct bw_meter *x, int plen, struct timeval *nowp)
 {
     struct timeval delta;
 
     MFC_LOCK_ASSERT();
 
     delta = *nowp;
     BW_TIMEVALDECR(&delta, &x->bm_start_time);
 
     if (x->bm_flags & BW_METER_GEQ) {
 	/*
 	 * Processing for ">=" type of bw_meter entry
 	 */
 	if (BW_TIMEVALCMP(&delta, &x->bm_threshold.b_time, >)) {
 	    /* Reset the bw_meter entry */
 	    x->bm_start_time = *nowp;
 	    x->bm_measured.b_packets = 0;
 	    x->bm_measured.b_bytes = 0;
 	    x->bm_flags &= ~BW_METER_UPCALL_DELIVERED;
 	}
 
 	/* Record that a packet is received */
 	x->bm_measured.b_packets++;
 	x->bm_measured.b_bytes += plen;
 
 	/*
 	 * Test if we should deliver an upcall
 	 */
 	if (!(x->bm_flags & BW_METER_UPCALL_DELIVERED)) {
 	    if (((x->bm_flags & BW_METER_UNIT_PACKETS) &&
 		 (x->bm_measured.b_packets >= x->bm_threshold.b_packets)) ||
 		((x->bm_flags & BW_METER_UNIT_BYTES) &&
 		 (x->bm_measured.b_bytes >= x->bm_threshold.b_bytes))) {
 		/* Prepare an upcall for delivery */
 		bw_meter_prepare_upcall(x, nowp);
 		x->bm_flags |= BW_METER_UPCALL_DELIVERED;
 	    }
 	}
     } else if (x->bm_flags & BW_METER_LEQ) {
 	/*
 	 * Processing for "<=" type of bw_meter entry
 	 */
 	if (BW_TIMEVALCMP(&delta, &x->bm_threshold.b_time, >)) {
 	    /*
 	     * We are behind time with the multicast forwarding table
 	     * scanning for "<=" type of bw_meter entries, so test now
 	     * if we should deliver an upcall.
 	     */
 	    if (((x->bm_flags & BW_METER_UNIT_PACKETS) &&
 		 (x->bm_measured.b_packets <= x->bm_threshold.b_packets)) ||
 		((x->bm_flags & BW_METER_UNIT_BYTES) &&
 		 (x->bm_measured.b_bytes <= x->bm_threshold.b_bytes))) {
 		/* Prepare an upcall for delivery */
 		bw_meter_prepare_upcall(x, nowp);
 	    }
 	    /* Reschedule the bw_meter entry */
 	    unschedule_bw_meter(x);
 	    schedule_bw_meter(x, nowp);
 	}
 
 	/* Record that a packet is received */
 	x->bm_measured.b_packets++;
 	x->bm_measured.b_bytes += plen;
 
 	/*
 	 * Test if we should restart the measuring interval
 	 */
 	if ((x->bm_flags & BW_METER_UNIT_PACKETS &&
 	     x->bm_measured.b_packets <= x->bm_threshold.b_packets) ||
 	    (x->bm_flags & BW_METER_UNIT_BYTES &&
 	     x->bm_measured.b_bytes <= x->bm_threshold.b_bytes)) {
 	    /* Don't restart the measuring interval */
 	} else {
 	    /* Do restart the measuring interval */
 	    /*
 	     * XXX: note that we don't unschedule and schedule, because this
 	     * might be too much overhead per packet. Instead, when we process
 	     * all entries for a given timer hash bin, we check whether it is
 	     * really a timeout. If not, we reschedule at that time.
 	     */
 	    x->bm_start_time = *nowp;
 	    x->bm_measured.b_packets = 0;
 	    x->bm_measured.b_bytes = 0;
 	    x->bm_flags &= ~BW_METER_UPCALL_DELIVERED;
 	}
     }
 }
 
 /*
  * Prepare a bandwidth-related upcall
  */
 static void
 bw_meter_prepare_upcall(struct bw_meter *x, struct timeval *nowp)
 {
     struct timeval delta;
     struct bw_upcall *u;
 
     MFC_LOCK_ASSERT();
 
     /*
      * Compute the measured time interval
      */
     delta = *nowp;
     BW_TIMEVALDECR(&delta, &x->bm_start_time);
 
     /*
      * If there are too many pending upcalls, deliver them now
      */
     if (V_bw_upcalls_n >= BW_UPCALLS_MAX)
 	bw_upcalls_send();
 
     /*
      * Set the bw_upcall entry
      */
     u = &V_bw_upcalls[V_bw_upcalls_n++];
     u->bu_src = x->bm_mfc->mfc_origin;
     u->bu_dst = x->bm_mfc->mfc_mcastgrp;
     u->bu_threshold.b_time = x->bm_threshold.b_time;
     u->bu_threshold.b_packets = x->bm_threshold.b_packets;
     u->bu_threshold.b_bytes = x->bm_threshold.b_bytes;
     u->bu_measured.b_time = delta;
     u->bu_measured.b_packets = x->bm_measured.b_packets;
     u->bu_measured.b_bytes = x->bm_measured.b_bytes;
     u->bu_flags = 0;
     if (x->bm_flags & BW_METER_UNIT_PACKETS)
 	u->bu_flags |= BW_UPCALL_UNIT_PACKETS;
     if (x->bm_flags & BW_METER_UNIT_BYTES)
 	u->bu_flags |= BW_UPCALL_UNIT_BYTES;
     if (x->bm_flags & BW_METER_GEQ)
 	u->bu_flags |= BW_UPCALL_GEQ;
     if (x->bm_flags & BW_METER_LEQ)
 	u->bu_flags |= BW_UPCALL_LEQ;
 }
 
 /*
  * Send the pending bandwidth-related upcalls
  */
 static void
 bw_upcalls_send(void)
 {
     struct mbuf *m;
     int len = V_bw_upcalls_n * sizeof(V_bw_upcalls[0]);
     struct sockaddr_in k_igmpsrc = { sizeof k_igmpsrc, AF_INET };
     static struct igmpmsg igmpmsg = { 0,		/* unused1 */
 				      0,		/* unused2 */
 				      IGMPMSG_BW_UPCALL,/* im_msgtype */
 				      0,		/* im_mbz  */
 				      0,		/* im_vif  */
 				      0,		/* unused3 */
 				      { 0 },		/* im_src  */
 				      { 0 } };		/* im_dst  */
 
     MFC_LOCK_ASSERT();
 
     if (V_bw_upcalls_n == 0)
 	return;			/* No pending upcalls */
 
     V_bw_upcalls_n = 0;
 
     /*
      * Allocate a new mbuf, initialize it with the header and
      * the payload for the pending calls.
      */
     m = m_gethdr(M_NOWAIT, MT_DATA);
     if (m == NULL) {
 	log(LOG_WARNING, "bw_upcalls_send: cannot allocate mbuf\n");
 	return;
     }
 
     m_copyback(m, 0, sizeof(struct igmpmsg), (caddr_t)&igmpmsg);
     m_copyback(m, sizeof(struct igmpmsg), len, (caddr_t)&V_bw_upcalls[0]);
 
     /*
      * Send the upcalls
      * XXX do we need to set the address in k_igmpsrc ?
      */
     MRTSTAT_INC(mrts_upcalls);
     if (socket_send(V_ip_mrouter, m, &k_igmpsrc) < 0) {
 	log(LOG_WARNING, "bw_upcalls_send: ip_mrouter socket queue full\n");
 	MRTSTAT_INC(mrts_upq_sockfull);
     }
 }
 
 /*
  * Compute the timeout hash value for the bw_meter entries
  */
 #define	BW_METER_TIMEHASH(bw_meter, hash)				\
     do {								\
 	struct timeval next_timeval = (bw_meter)->bm_start_time;	\
 									\
 	BW_TIMEVALADD(&next_timeval, &(bw_meter)->bm_threshold.b_time); \
 	(hash) = next_timeval.tv_sec;					\
 	if (next_timeval.tv_usec)					\
 	    (hash)++; /* XXX: make sure we don't timeout early */	\
 	(hash) %= BW_METER_BUCKETS;					\
     } while (0)
 
 /*
  * Schedule a timer to process periodically bw_meter entry of type "<="
  * by linking the entry in the proper hash bucket.
  */
 static void
 schedule_bw_meter(struct bw_meter *x, struct timeval *nowp)
 {
     int time_hash;
 
     MFC_LOCK_ASSERT();
 
     if (!(x->bm_flags & BW_METER_LEQ))
 	return;		/* XXX: we schedule timers only for "<=" entries */
 
     /*
      * Reset the bw_meter entry
      */
     x->bm_start_time = *nowp;
     x->bm_measured.b_packets = 0;
     x->bm_measured.b_bytes = 0;
     x->bm_flags &= ~BW_METER_UPCALL_DELIVERED;
 
     /*
      * Compute the timeout hash value and insert the entry
      */
     BW_METER_TIMEHASH(x, time_hash);
     x->bm_time_next = V_bw_meter_timers[time_hash];
     V_bw_meter_timers[time_hash] = x;
     x->bm_time_hash = time_hash;
 }
 
 /*
  * Unschedule the periodic timer that processes bw_meter entry of type "<="
  * by removing the entry from the proper hash bucket.
  */
 static void
 unschedule_bw_meter(struct bw_meter *x)
 {
     int time_hash;
     struct bw_meter *prev, *tmp;
 
     MFC_LOCK_ASSERT();
 
     if (!(x->bm_flags & BW_METER_LEQ))
 	return;		/* XXX: we schedule timers only for "<=" entries */
 
     /*
      * Compute the timeout hash value and delete the entry
      */
     time_hash = x->bm_time_hash;
     if (time_hash >= BW_METER_BUCKETS)
 	return;		/* Entry was not scheduled */
 
     for (prev = NULL, tmp = V_bw_meter_timers[time_hash];
 	     tmp != NULL; prev = tmp, tmp = tmp->bm_time_next)
 	if (tmp == x)
 	    break;
 
     if (tmp == NULL)
 	panic("unschedule_bw_meter: bw_meter entry not found");
 
     if (prev != NULL)
 	prev->bm_time_next = x->bm_time_next;
     else
 	V_bw_meter_timers[time_hash] = x->bm_time_next;
 
     x->bm_time_next = NULL;
     x->bm_time_hash = BW_METER_BUCKETS;
 }
 
 
 /*
  * Process all "<=" type of bw_meter that should be processed now,
  * and for each entry prepare an upcall if necessary. Each processed
  * entry is rescheduled again for the (periodic) processing.
  *
  * This is run periodically (once per second normally). On each round,
  * all the potentially matching entries are in the hash slot that we are
  * looking at.
  */
 static void
 bw_meter_process()
 {
     uint32_t loops;
     int i;
     struct timeval now, process_endtime;
 
     microtime(&now);
     if (V_last_tv_sec == now.tv_sec)
 	return;		/* nothing to do */
 
     loops = now.tv_sec - V_last_tv_sec;
     V_last_tv_sec = now.tv_sec;
     if (loops > BW_METER_BUCKETS)
 	loops = BW_METER_BUCKETS;
 
     MFC_LOCK();
     /*
      * Process all bins of bw_meter entries from the one after the last
      * processed to the current one. On entry, i points to the last bucket
      * visited, so we need to increment i at the beginning of the loop.
      */
     for (i = (now.tv_sec - loops) % BW_METER_BUCKETS; loops > 0; loops--) {
 	struct bw_meter *x, *tmp_list;
 
 	if (++i >= BW_METER_BUCKETS)
 	    i = 0;
 
 	/* Disconnect the list of bw_meter entries from the bin */
 	tmp_list = V_bw_meter_timers[i];
 	V_bw_meter_timers[i] = NULL;
 
 	/* Process the list of bw_meter entries */
 	while (tmp_list != NULL) {
 	    x = tmp_list;
 	    tmp_list = tmp_list->bm_time_next;
 
 	    /* Test if the time interval is over */
 	    process_endtime = x->bm_start_time;
 	    BW_TIMEVALADD(&process_endtime, &x->bm_threshold.b_time);
 	    if (BW_TIMEVALCMP(&process_endtime, &now, >)) {
 		/* Not yet: reschedule, but don't reset */
 		int time_hash;
 
 		BW_METER_TIMEHASH(x, time_hash);
 		if (time_hash == i && process_endtime.tv_sec == now.tv_sec) {
 		    /*
 		     * XXX: somehow the bin processing is a bit ahead of time.
 		     * Put the entry in the next bin.
 		     */
 		    if (++time_hash >= BW_METER_BUCKETS)
 			time_hash = 0;
 		}
 		x->bm_time_next = V_bw_meter_timers[time_hash];
 		V_bw_meter_timers[time_hash] = x;
 		x->bm_time_hash = time_hash;
 
 		continue;
 	    }
 
 	    /*
 	     * Test if we should deliver an upcall
 	     */
 	    if (((x->bm_flags & BW_METER_UNIT_PACKETS) &&
 		 (x->bm_measured.b_packets <= x->bm_threshold.b_packets)) ||
 		((x->bm_flags & BW_METER_UNIT_BYTES) &&
 		 (x->bm_measured.b_bytes <= x->bm_threshold.b_bytes))) {
 		/* Prepare an upcall for delivery */
 		bw_meter_prepare_upcall(x, &now);
 	    }
 
 	    /*
 	     * Reschedule for next processing
 	     */
 	    schedule_bw_meter(x, &now);
 	}
     }
 
     /* Send all upcalls that are pending delivery */
     bw_upcalls_send();
 
     MFC_UNLOCK();
 }
 
 /*
  * A periodic function for sending all upcalls that are pending delivery
  */
 static void
 expire_bw_upcalls_send(void *arg)
 {
     CURVNET_SET((struct vnet *) arg);
 
     MFC_LOCK();
     bw_upcalls_send();
     MFC_UNLOCK();
 
     callout_reset(&V_bw_upcalls_ch, BW_UPCALLS_PERIOD, expire_bw_upcalls_send,
 	curvnet);
     CURVNET_RESTORE();
 }
 
 /*
  * A periodic function for periodic scanning of the multicast forwarding
  * table for processing all "<=" bw_meter entries.
  */
 static void
 expire_bw_meter_process(void *arg)
 {
     CURVNET_SET((struct vnet *) arg);
 
     if (V_mrt_api_config & MRT_MFC_BW_UPCALL)
 	bw_meter_process();
 
     callout_reset(&V_bw_meter_ch, BW_METER_PERIOD, expire_bw_meter_process,
 	curvnet);
     CURVNET_RESTORE();
 }
 
 /*
  * End of bandwidth monitoring code
  */
 
 /*
  * Send the packet up to the user daemon, or eventually do kernel encapsulation
  *
  */
 static int
 pim_register_send(struct ip *ip, struct vif *vifp, struct mbuf *m,
     struct mfc *rt)
 {
     struct mbuf *mb_copy, *mm;
 
     /*
      * Do not send IGMP_WHOLEPKT notifications to userland, if the
      * rendezvous point was unspecified, and we were told not to.
      */
     if (pim_squelch_wholepkt != 0 && (V_mrt_api_config & MRT_MFC_RP) &&
 	in_nullhost(rt->mfc_rp))
 	return 0;
 
     mb_copy = pim_register_prepare(ip, m);
     if (mb_copy == NULL)
 	return ENOBUFS;
 
     /*
      * Send all the fragments. Note that the mbuf for each fragment
      * is freed by the sending machinery.
      */
     for (mm = mb_copy; mm; mm = mb_copy) {
 	mb_copy = mm->m_nextpkt;
 	mm->m_nextpkt = 0;
 	mm = m_pullup(mm, sizeof(struct ip));
 	if (mm != NULL) {
 	    ip = mtod(mm, struct ip *);
 	    if ((V_mrt_api_config & MRT_MFC_RP) && !in_nullhost(rt->mfc_rp)) {
 		pim_register_send_rp(ip, vifp, mm, rt);
 	    } else {
 		pim_register_send_upcall(ip, vifp, mm, rt);
 	    }
 	}
     }
 
     return 0;
 }
 
 /*
  * Return a copy of the data packet that is ready for PIM Register
  * encapsulation.
  * XXX: Note that in the returned copy the IP header is a valid one.
  */
 static struct mbuf *
 pim_register_prepare(struct ip *ip, struct mbuf *m)
 {
     struct mbuf *mb_copy = NULL;
     int mtu;
 
     /* Take care of delayed checksums */
     if (m->m_pkthdr.csum_flags & CSUM_DELAY_DATA) {
 	in_delayed_cksum(m);
 	m->m_pkthdr.csum_flags &= ~CSUM_DELAY_DATA;
     }
 
     /*
      * Copy the old packet & pullup its IP header into the
      * new mbuf so we can modify it.
      */
     mb_copy = m_copypacket(m, M_NOWAIT);
     if (mb_copy == NULL)
 	return NULL;
     mb_copy = m_pullup(mb_copy, ip->ip_hl << 2);
     if (mb_copy == NULL)
 	return NULL;
 
     /* take care of the TTL */
     ip = mtod(mb_copy, struct ip *);
     --ip->ip_ttl;
 
     /* Compute the MTU after the PIM Register encapsulation */
     mtu = 0xffff - sizeof(pim_encap_iphdr) - sizeof(pim_encap_pimhdr);
 
     if (ntohs(ip->ip_len) <= mtu) {
 	/* Turn the IP header into a valid one */
 	ip->ip_sum = 0;
 	ip->ip_sum = in_cksum(mb_copy, ip->ip_hl << 2);
     } else {
 	/* Fragment the packet */
 	mb_copy->m_pkthdr.csum_flags |= CSUM_IP;
 	if (ip_fragment(ip, &mb_copy, mtu, 0) != 0) {
 	    m_freem(mb_copy);
 	    return NULL;
 	}
     }
     return mb_copy;
 }
 
 /*
  * Send an upcall with the data packet to the user-level process.
  */
 static int
 pim_register_send_upcall(struct ip *ip, struct vif *vifp,
     struct mbuf *mb_copy, struct mfc *rt)
 {
     struct mbuf *mb_first;
     int len = ntohs(ip->ip_len);
     struct igmpmsg *im;
     struct sockaddr_in k_igmpsrc = { sizeof k_igmpsrc, AF_INET };
 
     VIF_LOCK_ASSERT();
 
     /*
      * Add a new mbuf with an upcall header
      */
     mb_first = m_gethdr(M_NOWAIT, MT_DATA);
     if (mb_first == NULL) {
 	m_freem(mb_copy);
 	return ENOBUFS;
     }
     mb_first->m_data += max_linkhdr;
     mb_first->m_pkthdr.len = len + sizeof(struct igmpmsg);
     mb_first->m_len = sizeof(struct igmpmsg);
     mb_first->m_next = mb_copy;
 
     /* Send message to routing daemon */
     im = mtod(mb_first, struct igmpmsg *);
     im->im_msgtype	= IGMPMSG_WHOLEPKT;
     im->im_mbz		= 0;
     im->im_vif		= vifp - V_viftable;
     im->im_src		= ip->ip_src;
     im->im_dst		= ip->ip_dst;
 
     k_igmpsrc.sin_addr	= ip->ip_src;
 
     MRTSTAT_INC(mrts_upcalls);
 
     if (socket_send(V_ip_mrouter, mb_first, &k_igmpsrc) < 0) {
 	CTR1(KTR_IPMF, "%s: socket queue full", __func__);
 	MRTSTAT_INC(mrts_upq_sockfull);
 	return ENOBUFS;
     }
 
     /* Keep statistics */
     PIMSTAT_INC(pims_snd_registers_msgs);
     PIMSTAT_ADD(pims_snd_registers_bytes, len);
 
     return 0;
 }
 
 /*
  * Encapsulate the data packet in PIM Register message and send it to the RP.
  */
 static int
 pim_register_send_rp(struct ip *ip, struct vif *vifp, struct mbuf *mb_copy,
     struct mfc *rt)
 {
     struct mbuf *mb_first;
     struct ip *ip_outer;
     struct pim_encap_pimhdr *pimhdr;
     int len = ntohs(ip->ip_len);
     vifi_t vifi = rt->mfc_parent;
 
     VIF_LOCK_ASSERT();
 
     if ((vifi >= V_numvifs) || in_nullhost(V_viftable[vifi].v_lcl_addr)) {
 	m_freem(mb_copy);
 	return EADDRNOTAVAIL;		/* The iif vif is invalid */
     }
 
     /*
      * Add a new mbuf with the encapsulating header
      */
     mb_first = m_gethdr(M_NOWAIT, MT_DATA);
     if (mb_first == NULL) {
 	m_freem(mb_copy);
 	return ENOBUFS;
     }
     mb_first->m_data += max_linkhdr;
     mb_first->m_len = sizeof(pim_encap_iphdr) + sizeof(pim_encap_pimhdr);
     mb_first->m_next = mb_copy;
 
     mb_first->m_pkthdr.len = len + mb_first->m_len;
 
     /*
      * Fill in the encapsulating IP and PIM header
      */
     ip_outer = mtod(mb_first, struct ip *);
     *ip_outer = pim_encap_iphdr;
     ip_outer->ip_len = htons(len + sizeof(pim_encap_iphdr) +
 	sizeof(pim_encap_pimhdr));
     ip_outer->ip_src = V_viftable[vifi].v_lcl_addr;
     ip_outer->ip_dst = rt->mfc_rp;
     /*
      * Copy the inner header TOS to the outer header, and take care of the
      * IP_DF bit.
      */
     ip_outer->ip_tos = ip->ip_tos;
     if (ip->ip_off & htons(IP_DF))
 	ip_outer->ip_off |= htons(IP_DF);
     ip_fillid(ip_outer);
     pimhdr = (struct pim_encap_pimhdr *)((caddr_t)ip_outer
 					 + sizeof(pim_encap_iphdr));
     *pimhdr = pim_encap_pimhdr;
     /* If the iif crosses a border, set the Border-bit */
     if (rt->mfc_flags[vifi] & MRT_MFC_FLAGS_BORDER_VIF & V_mrt_api_config)
 	pimhdr->flags |= htonl(PIM_BORDER_REGISTER);
 
     mb_first->m_data += sizeof(pim_encap_iphdr);
     pimhdr->pim.pim_cksum = in_cksum(mb_first, sizeof(pim_encap_pimhdr));
     mb_first->m_data -= sizeof(pim_encap_iphdr);
 
     send_packet(vifp, mb_first);
 
     /* Keep statistics */
     PIMSTAT_INC(pims_snd_registers_msgs);
     PIMSTAT_ADD(pims_snd_registers_bytes, len);
 
     return 0;
 }
 
 /*
  * pim_encapcheck() is called by the encap4_input() path at runtime to
  * determine if a packet is for PIM; allowing PIM to be dynamically loaded
  * into the kernel.
  */
 static int
 pim_encapcheck(const struct mbuf *m, int off, int proto, void *arg)
 {
 
 #ifdef DIAGNOSTIC
     KASSERT(proto == IPPROTO_PIM, ("not for IPPROTO_PIM"));
 #endif
     if (proto != IPPROTO_PIM)
 	return 0;	/* not for us; reject the datagram. */
 
     return 64;		/* claim the datagram. */
 }
 
 /*
  * PIM-SMv2 and PIM-DM messages processing.
  * Receives and verifies the PIM control messages, and passes them
  * up to the listening socket, using rip_input().
  * The only message with special processing is the PIM_REGISTER message
  * (used by PIM-SM): the PIM header is stripped off, and the inner packet
  * is passed to if_simloop().
  */
 int
 pim_input(struct mbuf **mp, int *offp, int proto)
 {
     struct mbuf *m = *mp;
     struct ip *ip = mtod(m, struct ip *);
     struct pim *pim;
     int iphlen = *offp;
     int minlen;
     int datalen = ntohs(ip->ip_len) - iphlen;
     int ip_tos;
 
     *mp = NULL;
 
     /* Keep statistics */
     PIMSTAT_INC(pims_rcv_total_msgs);
     PIMSTAT_ADD(pims_rcv_total_bytes, datalen);
 
     /*
      * Validate lengths
      */
     if (datalen < PIM_MINLEN) {
 	PIMSTAT_INC(pims_rcv_tooshort);
 	CTR3(KTR_IPMF, "%s: short packet (%d) from %s",
 	    __func__, datalen, inet_ntoa(ip->ip_src));
 	m_freem(m);
 	return (IPPROTO_DONE);
     }
 
     /*
      * If the packet is at least as big as a REGISTER, go agead
      * and grab the PIM REGISTER header size, to avoid another
      * possible m_pullup() later.
      *
      * PIM_MINLEN       == pimhdr + u_int32_t == 4 + 4 = 8
      * PIM_REG_MINLEN   == pimhdr + reghdr + encap_iphdr == 4 + 4 + 20 = 28
      */
     minlen = iphlen + (datalen >= PIM_REG_MINLEN ? PIM_REG_MINLEN : PIM_MINLEN);
     /*
      * Get the IP and PIM headers in contiguous memory, and
      * possibly the PIM REGISTER header.
      */
     if (m->m_len < minlen && (m = m_pullup(m, minlen)) == NULL) {
 	CTR1(KTR_IPMF, "%s: m_pullup() failed", __func__);
 	return (IPPROTO_DONE);
     }
 
     /* m_pullup() may have given us a new mbuf so reset ip. */
     ip = mtod(m, struct ip *);
     ip_tos = ip->ip_tos;
 
     /* adjust mbuf to point to the PIM header */
     m->m_data += iphlen;
     m->m_len  -= iphlen;
     pim = mtod(m, struct pim *);
 
     /*
      * Validate checksum. If PIM REGISTER, exclude the data packet.
      *
      * XXX: some older PIMv2 implementations don't make this distinction,
      * so for compatibility reason perform the checksum over part of the
      * message, and if error, then over the whole message.
      */
     if (PIM_VT_T(pim->pim_vt) == PIM_REGISTER && in_cksum(m, PIM_MINLEN) == 0) {
 	/* do nothing, checksum okay */
     } else if (in_cksum(m, datalen)) {
 	PIMSTAT_INC(pims_rcv_badsum);
 	CTR1(KTR_IPMF, "%s: invalid checksum", __func__);
 	m_freem(m);
 	return (IPPROTO_DONE);
     }
 
     /* PIM version check */
     if (PIM_VT_V(pim->pim_vt) < PIM_VERSION) {
 	PIMSTAT_INC(pims_rcv_badversion);
 	CTR3(KTR_IPMF, "%s: bad version %d expect %d", __func__,
 	    (int)PIM_VT_V(pim->pim_vt), PIM_VERSION);
 	m_freem(m);
 	return (IPPROTO_DONE);
     }
 
     /* restore mbuf back to the outer IP */
     m->m_data -= iphlen;
     m->m_len  += iphlen;
 
     if (PIM_VT_T(pim->pim_vt) == PIM_REGISTER) {
 	/*
 	 * Since this is a REGISTER, we'll make a copy of the register
 	 * headers ip + pim + u_int32 + encap_ip, to be passed up to the
 	 * routing daemon.
 	 */
 	struct sockaddr_in dst = { sizeof(dst), AF_INET };
 	struct mbuf *mcp;
 	struct ip *encap_ip;
 	u_int32_t *reghdr;
 	struct ifnet *vifp;
 
 	VIF_LOCK();
 	if ((V_reg_vif_num >= V_numvifs) || (V_reg_vif_num == VIFI_INVALID)) {
 	    VIF_UNLOCK();
 	    CTR2(KTR_IPMF, "%s: register vif not set: %d", __func__,
 		(int)V_reg_vif_num);
 	    m_freem(m);
 	    return (IPPROTO_DONE);
 	}
 	/* XXX need refcnt? */
 	vifp = V_viftable[V_reg_vif_num].v_ifp;
 	VIF_UNLOCK();
 
 	/*
 	 * Validate length
 	 */
 	if (datalen < PIM_REG_MINLEN) {
 	    PIMSTAT_INC(pims_rcv_tooshort);
 	    PIMSTAT_INC(pims_rcv_badregisters);
 	    CTR1(KTR_IPMF, "%s: register packet size too small", __func__);
 	    m_freem(m);
 	    return (IPPROTO_DONE);
 	}
 
 	reghdr = (u_int32_t *)(pim + 1);
 	encap_ip = (struct ip *)(reghdr + 1);
 
 	CTR3(KTR_IPMF, "%s: register: encap ip src %s len %d",
 	    __func__, inet_ntoa(encap_ip->ip_src), ntohs(encap_ip->ip_len));
 
 	/* verify the version number of the inner packet */
 	if (encap_ip->ip_v != IPVERSION) {
 	    PIMSTAT_INC(pims_rcv_badregisters);
 	    CTR1(KTR_IPMF, "%s: bad encap ip version", __func__);
 	    m_freem(m);
 	    return (IPPROTO_DONE);
 	}
 
 	/* verify the inner packet is destined to a mcast group */
 	if (!IN_MULTICAST(ntohl(encap_ip->ip_dst.s_addr))) {
 	    PIMSTAT_INC(pims_rcv_badregisters);
 	    CTR2(KTR_IPMF, "%s: bad encap ip dest %s", __func__,
 		inet_ntoa(encap_ip->ip_dst));
 	    m_freem(m);
 	    return (IPPROTO_DONE);
 	}
 
 	/* If a NULL_REGISTER, pass it to the daemon */
 	if ((ntohl(*reghdr) & PIM_NULL_REGISTER))
 	    goto pim_input_to_daemon;
 
 	/*
 	 * Copy the TOS from the outer IP header to the inner IP header.
 	 */
 	if (encap_ip->ip_tos != ip_tos) {
 	    /* Outer TOS -> inner TOS */
 	    encap_ip->ip_tos = ip_tos;
 	    /* Recompute the inner header checksum. Sigh... */
 
 	    /* adjust mbuf to point to the inner IP header */
 	    m->m_data += (iphlen + PIM_MINLEN);
 	    m->m_len  -= (iphlen + PIM_MINLEN);
 
 	    encap_ip->ip_sum = 0;
 	    encap_ip->ip_sum = in_cksum(m, encap_ip->ip_hl << 2);
 
 	    /* restore mbuf to point back to the outer IP header */
 	    m->m_data -= (iphlen + PIM_MINLEN);
 	    m->m_len  += (iphlen + PIM_MINLEN);
 	}
 
 	/*
 	 * Decapsulate the inner IP packet and loopback to forward it
 	 * as a normal multicast packet. Also, make a copy of the
 	 *     outer_iphdr + pimhdr + reghdr + encap_iphdr
 	 * to pass to the daemon later, so it can take the appropriate
 	 * actions (e.g., send back PIM_REGISTER_STOP).
 	 * XXX: here m->m_data points to the outer IP header.
 	 */
-	mcp = m_copy(m, 0, iphlen + PIM_REG_MINLEN);
+	mcp = m_copym(m, 0, iphlen + PIM_REG_MINLEN, M_NOWAIT);
 	if (mcp == NULL) {
-	    CTR1(KTR_IPMF, "%s: m_copy() failed", __func__);
+	    CTR1(KTR_IPMF, "%s: m_copym() failed", __func__);
 	    m_freem(m);
 	    return (IPPROTO_DONE);
 	}
 
 	/* Keep statistics */
 	/* XXX: registers_bytes include only the encap. mcast pkt */
 	PIMSTAT_INC(pims_rcv_registers_msgs);
 	PIMSTAT_ADD(pims_rcv_registers_bytes, ntohs(encap_ip->ip_len));
 
 	/*
 	 * forward the inner ip packet; point m_data at the inner ip.
 	 */
 	m_adj(m, iphlen + PIM_MINLEN);
 
 	CTR4(KTR_IPMF,
 	    "%s: forward decap'd REGISTER: src %lx dst %lx vif %d",
 	    __func__,
 	    (u_long)ntohl(encap_ip->ip_src.s_addr),
 	    (u_long)ntohl(encap_ip->ip_dst.s_addr),
 	    (int)V_reg_vif_num);
 
 	/* NB: vifp was collected above; can it change on us? */
 	if_simloop(vifp, m, dst.sin_family, 0);
 
 	/* prepare the register head to send to the mrouting daemon */
 	m = mcp;
     }
 
 pim_input_to_daemon:
     /*
      * Pass the PIM message up to the daemon; if it is a Register message,
      * pass the 'head' only up to the daemon. This includes the
      * outer IP header, PIM header, PIM-Register header and the
      * inner IP header.
      * XXX: the outer IP header pkt size of a Register is not adjust to
      * reflect the fact that the inner multicast data is truncated.
      */
     *mp = m;
     rip_input(mp, offp, proto);
 
     return (IPPROTO_DONE);
 }
 
 static int
 sysctl_mfctable(SYSCTL_HANDLER_ARGS)
 {
 	struct mfc	*rt;
 	int		 error, i;
 
 	if (req->newptr)
 		return (EPERM);
 	if (V_mfchashtbl == NULL)	/* XXX unlocked */
 		return (0);
 	error = sysctl_wire_old_buffer(req, 0);
 	if (error)
 		return (error);
 
 	MFC_LOCK();
 	for (i = 0; i < mfchashsize; i++) {
 		LIST_FOREACH(rt, &V_mfchashtbl[i], mfc_hash) {
 			error = SYSCTL_OUT(req, rt, sizeof(struct mfc));
 			if (error)
 				goto out_locked;
 		}
 	}
 out_locked:
 	MFC_UNLOCK();
 	return (error);
 }
 
 static SYSCTL_NODE(_net_inet_ip, OID_AUTO, mfctable, CTLFLAG_RD,
     sysctl_mfctable, "IPv4 Multicast Forwarding Table "
     "(struct *mfc[mfchashsize], netinet/ip_mroute.h)");
 
 static void
 vnet_mroute_init(const void *unused __unused)
 {
 
 	MALLOC(V_nexpire, u_char *, mfchashsize, M_MRTABLE, M_WAITOK|M_ZERO);
 	bzero(V_bw_meter_timers, sizeof(V_bw_meter_timers));
 	callout_init(&V_expire_upcalls_ch, 1);
 	callout_init(&V_bw_upcalls_ch, 1);
 	callout_init(&V_bw_meter_ch, 1);
 }
 
 VNET_SYSINIT(vnet_mroute_init, SI_SUB_PROTO_MC, SI_ORDER_ANY, vnet_mroute_init,
 	NULL);
 
 static void
 vnet_mroute_uninit(const void *unused __unused)
 {
 
 	FREE(V_nexpire, M_MRTABLE);
 	V_nexpire = NULL;
 }
 
 VNET_SYSUNINIT(vnet_mroute_uninit, SI_SUB_PROTO_MC, SI_ORDER_MIDDLE, 
 	vnet_mroute_uninit, NULL);
 
 static int
 ip_mroute_modevent(module_t mod, int type, void *unused)
 {
 
     switch (type) {
     case MOD_LOAD:
 	MROUTER_LOCK_INIT();
 
 	if_detach_event_tag = EVENTHANDLER_REGISTER(ifnet_departure_event, 
 	    if_detached_event, NULL, EVENTHANDLER_PRI_ANY);
 	if (if_detach_event_tag == NULL) {
 		printf("ip_mroute: unable to register "
 		    "ifnet_departure_event handler\n");
 		MROUTER_LOCK_DESTROY();
 		return (EINVAL);
 	}
 
 	MFC_LOCK_INIT();
 	VIF_LOCK_INIT();
 
 	mfchashsize = MFCHASHSIZE;
 	if (TUNABLE_ULONG_FETCH("net.inet.ip.mfchashsize", &mfchashsize) &&
 	    !powerof2(mfchashsize)) {
 		printf("WARNING: %s not a power of 2; using default\n",
 		    "net.inet.ip.mfchashsize");
 		mfchashsize = MFCHASHSIZE;
 	}
 
 	pim_squelch_wholepkt = 0;
 	TUNABLE_ULONG_FETCH("net.inet.pim.squelch_wholepkt",
 	    &pim_squelch_wholepkt);
 
 	pim_encap_cookie = encap_attach_func(AF_INET, IPPROTO_PIM,
 	    pim_encapcheck, &in_pim_protosw, NULL);
 	if (pim_encap_cookie == NULL) {
 		printf("ip_mroute: unable to attach pim encap\n");
 		VIF_LOCK_DESTROY();
 		MFC_LOCK_DESTROY();
 		MROUTER_LOCK_DESTROY();
 		return (EINVAL);
 	}
 
 	ip_mcast_src = X_ip_mcast_src;
 	ip_mforward = X_ip_mforward;
 	ip_mrouter_done = X_ip_mrouter_done;
 	ip_mrouter_get = X_ip_mrouter_get;
 	ip_mrouter_set = X_ip_mrouter_set;
 
 	ip_rsvp_force_done = X_ip_rsvp_force_done;
 	ip_rsvp_vif = X_ip_rsvp_vif;
 
 	legal_vif_num = X_legal_vif_num;
 	mrt_ioctl = X_mrt_ioctl;
 	rsvp_input_p = X_rsvp_input;
 	break;
 
     case MOD_UNLOAD:
 	/*
 	 * Typically module unload happens after the user-level
 	 * process has shutdown the kernel services (the check
 	 * below insures someone can't just yank the module out
 	 * from under a running process).  But if the module is
 	 * just loaded and then unloaded w/o starting up a user
 	 * process we still need to cleanup.
 	 */
 	MROUTER_LOCK();
 	if (ip_mrouter_cnt != 0) {
 	    MROUTER_UNLOCK();
 	    return (EINVAL);
 	}
 	ip_mrouter_unloading = 1;
 	MROUTER_UNLOCK();
 
 	EVENTHANDLER_DEREGISTER(ifnet_departure_event, if_detach_event_tag);
 
 	if (pim_encap_cookie) {
 	    encap_detach(pim_encap_cookie);
 	    pim_encap_cookie = NULL;
 	}
 
 	ip_mcast_src = NULL;
 	ip_mforward = NULL;
 	ip_mrouter_done = NULL;
 	ip_mrouter_get = NULL;
 	ip_mrouter_set = NULL;
 
 	ip_rsvp_force_done = NULL;
 	ip_rsvp_vif = NULL;
 
 	legal_vif_num = NULL;
 	mrt_ioctl = NULL;
 	rsvp_input_p = NULL;
 
 	VIF_LOCK_DESTROY();
 	MFC_LOCK_DESTROY();
 	MROUTER_LOCK_DESTROY();
 	break;
 
     default:
 	return EOPNOTSUPP;
     }
     return 0;
 }
 
 static moduledata_t ip_mroutemod = {
     "ip_mroute",
     ip_mroute_modevent,
     0
 };
 
 DECLARE_MODULE(ip_mroute, ip_mroutemod, SI_SUB_PROTO_MC, SI_ORDER_MIDDLE);
Index: head/sys/netinet/raw_ip.c
===================================================================
--- head/sys/netinet/raw_ip.c	(revision 305823)
+++ head/sys/netinet/raw_ip.c	(revision 305824)
@@ -1,1139 +1,1139 @@
 /*-
  * Copyright (c) 1982, 1986, 1988, 1993
  *	The Regents of the University of California.
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 4. Neither the name of the University nor the names of its contributors
  *    may be used to endorse or promote products derived from this software
  *    without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  *	@(#)raw_ip.c	8.7 (Berkeley) 5/15/95
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include "opt_inet.h"
 #include "opt_inet6.h"
 #include "opt_ipsec.h"
 
 #include <sys/param.h>
 #include <sys/jail.h>
 #include <sys/kernel.h>
 #include <sys/eventhandler.h>
 #include <sys/lock.h>
 #include <sys/malloc.h>
 #include <sys/mbuf.h>
 #include <sys/priv.h>
 #include <sys/proc.h>
 #include <sys/protosw.h>
 #include <sys/rmlock.h>
 #include <sys/rwlock.h>
 #include <sys/signalvar.h>
 #include <sys/socket.h>
 #include <sys/socketvar.h>
 #include <sys/sx.h>
 #include <sys/sysctl.h>
 #include <sys/systm.h>
 
 #include <vm/uma.h>
 
 #include <net/if.h>
 #include <net/if_var.h>
 #include <net/route.h>
 #include <net/vnet.h>
 
 #include <netinet/in.h>
 #include <netinet/in_systm.h>
 #include <netinet/in_pcb.h>
 #include <netinet/in_var.h>
 #include <netinet/if_ether.h>
 #include <netinet/ip.h>
 #include <netinet/ip_var.h>
 #include <netinet/ip_mroute.h>
 #include <netinet/ip_icmp.h>
 
 #ifdef IPSEC
 #include <netipsec/ipsec.h>
 #endif /*IPSEC*/
 
 #include <machine/stdarg.h>
 #include <security/mac/mac_framework.h>
 
 VNET_DEFINE(int, ip_defttl) = IPDEFTTL;
 SYSCTL_INT(_net_inet_ip, IPCTL_DEFTTL, ttl, CTLFLAG_VNET | CTLFLAG_RW,
     &VNET_NAME(ip_defttl), 0,
     "Maximum TTL on IP packets");
 
 VNET_DEFINE(struct inpcbhead, ripcb);
 VNET_DEFINE(struct inpcbinfo, ripcbinfo);
 
 #define	V_ripcb			VNET(ripcb)
 #define	V_ripcbinfo		VNET(ripcbinfo)
 
 /*
  * Control and data hooks for ipfw, dummynet, divert and so on.
  * The data hooks are not used here but it is convenient
  * to keep them all in one place.
  */
 VNET_DEFINE(ip_fw_chk_ptr_t, ip_fw_chk_ptr) = NULL;
 VNET_DEFINE(ip_fw_ctl_ptr_t, ip_fw_ctl_ptr) = NULL;
 
 int	(*ip_dn_ctl_ptr)(struct sockopt *);
 int	(*ip_dn_io_ptr)(struct mbuf **, int, struct ip_fw_args *);
 void	(*ip_divert_ptr)(struct mbuf *, int);
 int	(*ng_ipfw_input_p)(struct mbuf **, int,
 			struct ip_fw_args *, int);
 
 #ifdef INET
 /*
  * Hooks for multicast routing. They all default to NULL, so leave them not
  * initialized and rely on BSS being set to 0.
  */
 
 /*
  * The socket used to communicate with the multicast routing daemon.
  */
 VNET_DEFINE(struct socket *, ip_mrouter);
 
 /*
  * The various mrouter and rsvp functions.
  */
 int (*ip_mrouter_set)(struct socket *, struct sockopt *);
 int (*ip_mrouter_get)(struct socket *, struct sockopt *);
 int (*ip_mrouter_done)(void);
 int (*ip_mforward)(struct ip *, struct ifnet *, struct mbuf *,
 		   struct ip_moptions *);
 int (*mrt_ioctl)(u_long, caddr_t, int);
 int (*legal_vif_num)(int);
 u_long (*ip_mcast_src)(int);
 
 int (*rsvp_input_p)(struct mbuf **, int *, int);
 int (*ip_rsvp_vif)(struct socket *, struct sockopt *);
 void (*ip_rsvp_force_done)(struct socket *);
 #endif /* INET */
 
 extern	struct protosw inetsw[];
 
 u_long	rip_sendspace = 9216;
 SYSCTL_ULONG(_net_inet_raw, OID_AUTO, maxdgram, CTLFLAG_RW,
     &rip_sendspace, 0, "Maximum outgoing raw IP datagram size");
 
 u_long	rip_recvspace = 9216;
 SYSCTL_ULONG(_net_inet_raw, OID_AUTO, recvspace, CTLFLAG_RW,
     &rip_recvspace, 0, "Maximum space for incoming raw IP datagrams");
 
 /*
  * Hash functions
  */
 
 #define INP_PCBHASH_RAW_SIZE	256
 #define INP_PCBHASH_RAW(proto, laddr, faddr, mask) \
         (((proto) + (laddr) + (faddr)) % (mask) + 1)
 
 #ifdef INET
 static void
 rip_inshash(struct inpcb *inp)
 {
 	struct inpcbinfo *pcbinfo = inp->inp_pcbinfo;
 	struct inpcbhead *pcbhash;
 	int hash;
 
 	INP_INFO_WLOCK_ASSERT(pcbinfo);
 	INP_WLOCK_ASSERT(inp);
 	
 	if (inp->inp_ip_p != 0 &&
 	    inp->inp_laddr.s_addr != INADDR_ANY &&
 	    inp->inp_faddr.s_addr != INADDR_ANY) {
 		hash = INP_PCBHASH_RAW(inp->inp_ip_p, inp->inp_laddr.s_addr,
 		    inp->inp_faddr.s_addr, pcbinfo->ipi_hashmask);
 	} else
 		hash = 0;
 	pcbhash = &pcbinfo->ipi_hashbase[hash];
 	LIST_INSERT_HEAD(pcbhash, inp, inp_hash);
 }
 
 static void
 rip_delhash(struct inpcb *inp)
 {
 
 	INP_INFO_WLOCK_ASSERT(inp->inp_pcbinfo);
 	INP_WLOCK_ASSERT(inp);
 
 	LIST_REMOVE(inp, inp_hash);
 }
 #endif /* INET */
 
 /*
  * Raw interface to IP protocol.
  */
 
 /*
  * Initialize raw connection block q.
  */
 static void
 rip_zone_change(void *tag)
 {
 
 	uma_zone_set_max(V_ripcbinfo.ipi_zone, maxsockets);
 }
 
 static int
 rip_inpcb_init(void *mem, int size, int flags)
 {
 	struct inpcb *inp = mem;
 
 	INP_LOCK_INIT(inp, "inp", "rawinp");
 	return (0);
 }
 
 void
 rip_init(void)
 {
 
 	in_pcbinfo_init(&V_ripcbinfo, "rip", &V_ripcb, INP_PCBHASH_RAW_SIZE,
 	    1, "ripcb", rip_inpcb_init, NULL, 0, IPI_HASHFIELDS_NONE);
 	EVENTHANDLER_REGISTER(maxsockets_change, rip_zone_change, NULL,
 	    EVENTHANDLER_PRI_ANY);
 }
 
 #ifdef VIMAGE
 static void
 rip_destroy(void *unused __unused)
 {
 
 	in_pcbinfo_destroy(&V_ripcbinfo);
 }
 VNET_SYSUNINIT(raw_ip, SI_SUB_PROTO_DOMAIN, SI_ORDER_FOURTH, rip_destroy, NULL);
 #endif
 
 #ifdef INET
 static int
 rip_append(struct inpcb *last, struct ip *ip, struct mbuf *n,
     struct sockaddr_in *ripsrc)
 {
 	int policyfail = 0;
 
 	INP_LOCK_ASSERT(last);
 
 #ifdef IPSEC
 	/* check AH/ESP integrity. */
 	if (ipsec4_in_reject(n, last)) {
 		policyfail = 1;
 	}
 #endif /* IPSEC */
 #ifdef MAC
 	if (!policyfail && mac_inpcb_check_deliver(last, n) != 0)
 		policyfail = 1;
 #endif
 	/* Check the minimum TTL for socket. */
 	if (last->inp_ip_minttl && last->inp_ip_minttl > ip->ip_ttl)
 		policyfail = 1;
 	if (!policyfail) {
 		struct mbuf *opts = NULL;
 		struct socket *so;
 
 		so = last->inp_socket;
 		if ((last->inp_flags & INP_CONTROLOPTS) ||
 		    (so->so_options & (SO_TIMESTAMP | SO_BINTIME)))
 			ip_savecontrol(last, &opts, ip, n);
 		SOCKBUF_LOCK(&so->so_rcv);
 		if (sbappendaddr_locked(&so->so_rcv,
 		    (struct sockaddr *)ripsrc, n, opts) == 0) {
 			/* should notify about lost packet */
 			m_freem(n);
 			if (opts)
 				m_freem(opts);
 			SOCKBUF_UNLOCK(&so->so_rcv);
 		} else
 			sorwakeup_locked(so);
 	} else
 		m_freem(n);
 	return (policyfail);
 }
 
 /*
  * Setup generic address and protocol structures for raw_input routine, then
  * pass them along with mbuf chain.
  */
 int
 rip_input(struct mbuf **mp, int *offp, int proto)
 {
 	struct ifnet *ifp;
 	struct mbuf *m = *mp;
 	struct ip *ip = mtod(m, struct ip *);
 	struct inpcb *inp, *last;
 	struct sockaddr_in ripsrc;
 	int hash;
 
 	*mp = NULL;
 
 	bzero(&ripsrc, sizeof(ripsrc));
 	ripsrc.sin_len = sizeof(ripsrc);
 	ripsrc.sin_family = AF_INET;
 	ripsrc.sin_addr = ip->ip_src;
 	last = NULL;
 
 	ifp = m->m_pkthdr.rcvif;
 
 	hash = INP_PCBHASH_RAW(proto, ip->ip_src.s_addr,
 	    ip->ip_dst.s_addr, V_ripcbinfo.ipi_hashmask);
 	INP_INFO_RLOCK(&V_ripcbinfo);
 	LIST_FOREACH(inp, &V_ripcbinfo.ipi_hashbase[hash], inp_hash) {
 		if (inp->inp_ip_p != proto)
 			continue;
 #ifdef INET6
 		/* XXX inp locking */
 		if ((inp->inp_vflag & INP_IPV4) == 0)
 			continue;
 #endif
 		if (inp->inp_laddr.s_addr != ip->ip_dst.s_addr)
 			continue;
 		if (inp->inp_faddr.s_addr != ip->ip_src.s_addr)
 			continue;
 		if (jailed_without_vnet(inp->inp_cred)) {
 			/*
 			 * XXX: If faddr was bound to multicast group,
 			 * jailed raw socket will drop datagram.
 			 */
 			if (prison_check_ip4(inp->inp_cred, &ip->ip_dst) != 0)
 				continue;
 		}
 		if (last != NULL) {
 			struct mbuf *n;
 
-			n = m_copy(m, 0, (int)M_COPYALL);
+			n = m_copym(m, 0, M_COPYALL, M_NOWAIT);
 			if (n != NULL)
 		    	    (void) rip_append(last, ip, n, &ripsrc);
 			/* XXX count dropped packet */
 			INP_RUNLOCK(last);
 		}
 		INP_RLOCK(inp);
 		last = inp;
 	}
 	LIST_FOREACH(inp, &V_ripcbinfo.ipi_hashbase[0], inp_hash) {
 		if (inp->inp_ip_p && inp->inp_ip_p != proto)
 			continue;
 #ifdef INET6
 		/* XXX inp locking */
 		if ((inp->inp_vflag & INP_IPV4) == 0)
 			continue;
 #endif
 		if (!in_nullhost(inp->inp_laddr) &&
 		    !in_hosteq(inp->inp_laddr, ip->ip_dst))
 			continue;
 		if (!in_nullhost(inp->inp_faddr) &&
 		    !in_hosteq(inp->inp_faddr, ip->ip_src))
 			continue;
 		if (jailed_without_vnet(inp->inp_cred)) {
 			/*
 			 * Allow raw socket in jail to receive multicast;
 			 * assume process had PRIV_NETINET_RAW at attach,
 			 * and fall through into normal filter path if so.
 			 */
 			if (!IN_MULTICAST(ntohl(ip->ip_dst.s_addr)) &&
 			    prison_check_ip4(inp->inp_cred, &ip->ip_dst) != 0)
 				continue;
 		}
 		/*
 		 * If this raw socket has multicast state, and we
 		 * have received a multicast, check if this socket
 		 * should receive it, as multicast filtering is now
 		 * the responsibility of the transport layer.
 		 */
 		if (inp->inp_moptions != NULL &&
 		    IN_MULTICAST(ntohl(ip->ip_dst.s_addr))) {
 			/*
 			 * If the incoming datagram is for IGMP, allow it
 			 * through unconditionally to the raw socket.
 			 *
 			 * In the case of IGMPv2, we may not have explicitly
 			 * joined the group, and may have set IFF_ALLMULTI
 			 * on the interface. imo_multi_filter() may discard
 			 * control traffic we actually need to see.
 			 *
 			 * Userland multicast routing daemons should continue
 			 * filter the control traffic appropriately.
 			 */
 			int blocked;
 
 			blocked = MCAST_PASS;
 			if (proto != IPPROTO_IGMP) {
 				struct sockaddr_in group;
 
 				bzero(&group, sizeof(struct sockaddr_in));
 				group.sin_len = sizeof(struct sockaddr_in);
 				group.sin_family = AF_INET;
 				group.sin_addr = ip->ip_dst;
 
 				blocked = imo_multi_filter(inp->inp_moptions,
 				    ifp,
 				    (struct sockaddr *)&group,
 				    (struct sockaddr *)&ripsrc);
 			}
 
 			if (blocked != MCAST_PASS) {
 				IPSTAT_INC(ips_notmember);
 				continue;
 			}
 		}
 		if (last != NULL) {
 			struct mbuf *n;
 
-			n = m_copy(m, 0, (int)M_COPYALL);
+			n = m_copym(m, 0, M_COPYALL, M_NOWAIT);
 			if (n != NULL)
 				(void) rip_append(last, ip, n, &ripsrc);
 			/* XXX count dropped packet */
 			INP_RUNLOCK(last);
 		}
 		INP_RLOCK(inp);
 		last = inp;
 	}
 	INP_INFO_RUNLOCK(&V_ripcbinfo);
 	if (last != NULL) {
 		if (rip_append(last, ip, m, &ripsrc) != 0)
 			IPSTAT_INC(ips_delivered);
 		INP_RUNLOCK(last);
 	} else {
 		if (inetsw[ip_protox[ip->ip_p]].pr_input == rip_input) {
 			IPSTAT_INC(ips_noproto);
 			IPSTAT_DEC(ips_delivered);
 			icmp_error(m, ICMP_UNREACH, ICMP_UNREACH_PROTOCOL, 0, 0);
 		} else {
 			m_freem(m);
 		}
 	}
 	return (IPPROTO_DONE);
 }
 
 /*
  * Generate IP header and pass packet to ip_output.  Tack on options user may
  * have setup with control call.
  */
 int
 rip_output(struct mbuf *m, struct socket *so, ...)
 {
 	struct ip *ip;
 	int error;
 	struct inpcb *inp = sotoinpcb(so);
 	va_list ap;
 	u_long dst;
 	int flags = ((so->so_options & SO_DONTROUTE) ? IP_ROUTETOIF : 0) |
 	    IP_ALLOWBROADCAST;
 
 	va_start(ap, so);
 	dst = va_arg(ap, u_long);
 	va_end(ap);
 
 	/*
 	 * If the user handed us a complete IP packet, use it.  Otherwise,
 	 * allocate an mbuf for a header and fill it in.
 	 */
 	if ((inp->inp_flags & INP_HDRINCL) == 0) {
 		if (m->m_pkthdr.len + sizeof(struct ip) > IP_MAXPACKET) {
 			m_freem(m);
 			return(EMSGSIZE);
 		}
 		M_PREPEND(m, sizeof(struct ip), M_NOWAIT);
 		if (m == NULL)
 			return(ENOBUFS);
 
 		INP_RLOCK(inp);
 		ip = mtod(m, struct ip *);
 		ip->ip_tos = inp->inp_ip_tos;
 		if (inp->inp_flags & INP_DONTFRAG)
 			ip->ip_off = htons(IP_DF);
 		else
 			ip->ip_off = htons(0);
 		ip->ip_p = inp->inp_ip_p;
 		ip->ip_len = htons(m->m_pkthdr.len);
 		ip->ip_src = inp->inp_laddr;
 		ip->ip_dst.s_addr = dst;
 		if (jailed(inp->inp_cred)) {
 			/*
 			 * prison_local_ip4() would be good enough but would
 			 * let a source of INADDR_ANY pass, which we do not
 			 * want to see from jails.
 			 */
 			if (ip->ip_src.s_addr == INADDR_ANY) {
 				error = in_pcbladdr(inp, &ip->ip_dst, &ip->ip_src,
 				    inp->inp_cred);
 			} else {
 				error = prison_local_ip4(inp->inp_cred,
 				    &ip->ip_src);
 			}
 			if (error != 0) {
 				INP_RUNLOCK(inp);
 				m_freem(m);
 				return (error);
 			}
 		}
 		ip->ip_ttl = inp->inp_ip_ttl;
 	} else {
 		if (m->m_pkthdr.len > IP_MAXPACKET) {
 			m_freem(m);
 			return(EMSGSIZE);
 		}
 		INP_RLOCK(inp);
 		ip = mtod(m, struct ip *);
 		error = prison_check_ip4(inp->inp_cred, &ip->ip_src);
 		if (error != 0) {
 			INP_RUNLOCK(inp);
 			m_freem(m);
 			return (error);
 		}
 
 		/*
 		 * Don't allow both user specified and setsockopt options,
 		 * and don't allow packet length sizes that will crash.
 		 */
 		if (((ip->ip_hl != (sizeof (*ip) >> 2)) && inp->inp_options)
 		    || (ntohs(ip->ip_len) > m->m_pkthdr.len)
 		    || (ntohs(ip->ip_len) < (ip->ip_hl << 2))) {
 			INP_RUNLOCK(inp);
 			m_freem(m);
 			return (EINVAL);
 		}
 		/*
 		 * This doesn't allow application to specify ID of zero,
 		 * but we got this limitation from the beginning of history.
 		 */
 		if (ip->ip_id == 0)
 			ip_fillid(ip);
 
 		/*
 		 * XXX prevent ip_output from overwriting header fields.
 		 */
 		flags |= IP_RAWOUTPUT;
 		IPSTAT_INC(ips_rawout);
 	}
 
 	if (inp->inp_flags & INP_ONESBCAST)
 		flags |= IP_SENDONES;
 
 #ifdef MAC
 	mac_inpcb_create_mbuf(inp, m);
 #endif
 
 	error = ip_output(m, inp->inp_options, NULL, flags,
 	    inp->inp_moptions, inp);
 	INP_RUNLOCK(inp);
 	return (error);
 }
 
 /*
  * Raw IP socket option processing.
  *
  * IMPORTANT NOTE regarding access control: Traditionally, raw sockets could
  * only be created by a privileged process, and as such, socket option
  * operations to manage system properties on any raw socket were allowed to
  * take place without explicit additional access control checks.  However,
  * raw sockets can now also be created in jail(), and therefore explicit
  * checks are now required.  Likewise, raw sockets can be used by a process
  * after it gives up privilege, so some caution is required.  For options
  * passed down to the IP layer via ip_ctloutput(), checks are assumed to be
  * performed in ip_ctloutput() and therefore no check occurs here.
  * Unilaterally checking priv_check() here breaks normal IP socket option
  * operations on raw sockets.
  *
  * When adding new socket options here, make sure to add access control
  * checks here as necessary.
  *
  * XXX-BZ inp locking?
  */
 int
 rip_ctloutput(struct socket *so, struct sockopt *sopt)
 {
 	struct	inpcb *inp = sotoinpcb(so);
 	int	error, optval;
 
 	if (sopt->sopt_level != IPPROTO_IP) {
 		if ((sopt->sopt_level == SOL_SOCKET) &&
 		    (sopt->sopt_name == SO_SETFIB)) {
 			inp->inp_inc.inc_fibnum = so->so_fibnum;
 			return (0);
 		}
 		return (EINVAL);
 	}
 
 	error = 0;
 	switch (sopt->sopt_dir) {
 	case SOPT_GET:
 		switch (sopt->sopt_name) {
 		case IP_HDRINCL:
 			optval = inp->inp_flags & INP_HDRINCL;
 			error = sooptcopyout(sopt, &optval, sizeof optval);
 			break;
 
 		case IP_FW3:	/* generic ipfw v.3 functions */
 		case IP_FW_ADD:	/* ADD actually returns the body... */
 		case IP_FW_GET:
 		case IP_FW_TABLE_GETSIZE:
 		case IP_FW_TABLE_LIST:
 		case IP_FW_NAT_GET_CONFIG:
 		case IP_FW_NAT_GET_LOG:
 			if (V_ip_fw_ctl_ptr != NULL)
 				error = V_ip_fw_ctl_ptr(sopt);
 			else
 				error = ENOPROTOOPT;
 			break;
 
 		case IP_DUMMYNET3:	/* generic dummynet v.3 functions */
 		case IP_DUMMYNET_GET:
 			if (ip_dn_ctl_ptr != NULL)
 				error = ip_dn_ctl_ptr(sopt);
 			else
 				error = ENOPROTOOPT;
 			break ;
 
 		case MRT_INIT:
 		case MRT_DONE:
 		case MRT_ADD_VIF:
 		case MRT_DEL_VIF:
 		case MRT_ADD_MFC:
 		case MRT_DEL_MFC:
 		case MRT_VERSION:
 		case MRT_ASSERT:
 		case MRT_API_SUPPORT:
 		case MRT_API_CONFIG:
 		case MRT_ADD_BW_UPCALL:
 		case MRT_DEL_BW_UPCALL:
 			error = priv_check(curthread, PRIV_NETINET_MROUTE);
 			if (error != 0)
 				return (error);
 			error = ip_mrouter_get ? ip_mrouter_get(so, sopt) :
 				EOPNOTSUPP;
 			break;
 
 		default:
 			error = ip_ctloutput(so, sopt);
 			break;
 		}
 		break;
 
 	case SOPT_SET:
 		switch (sopt->sopt_name) {
 		case IP_HDRINCL:
 			error = sooptcopyin(sopt, &optval, sizeof optval,
 					    sizeof optval);
 			if (error)
 				break;
 			if (optval)
 				inp->inp_flags |= INP_HDRINCL;
 			else
 				inp->inp_flags &= ~INP_HDRINCL;
 			break;
 
 		case IP_FW3:	/* generic ipfw v.3 functions */
 		case IP_FW_ADD:
 		case IP_FW_DEL:
 		case IP_FW_FLUSH:
 		case IP_FW_ZERO:
 		case IP_FW_RESETLOG:
 		case IP_FW_TABLE_ADD:
 		case IP_FW_TABLE_DEL:
 		case IP_FW_TABLE_FLUSH:
 		case IP_FW_NAT_CFG:
 		case IP_FW_NAT_DEL:
 			if (V_ip_fw_ctl_ptr != NULL)
 				error = V_ip_fw_ctl_ptr(sopt);
 			else
 				error = ENOPROTOOPT;
 			break;
 
 		case IP_DUMMYNET3:	/* generic dummynet v.3 functions */
 		case IP_DUMMYNET_CONFIGURE:
 		case IP_DUMMYNET_DEL:
 		case IP_DUMMYNET_FLUSH:
 			if (ip_dn_ctl_ptr != NULL)
 				error = ip_dn_ctl_ptr(sopt);
 			else
 				error = ENOPROTOOPT ;
 			break ;
 
 		case IP_RSVP_ON:
 			error = priv_check(curthread, PRIV_NETINET_MROUTE);
 			if (error != 0)
 				return (error);
 			error = ip_rsvp_init(so);
 			break;
 
 		case IP_RSVP_OFF:
 			error = priv_check(curthread, PRIV_NETINET_MROUTE);
 			if (error != 0)
 				return (error);
 			error = ip_rsvp_done();
 			break;
 
 		case IP_RSVP_VIF_ON:
 		case IP_RSVP_VIF_OFF:
 			error = priv_check(curthread, PRIV_NETINET_MROUTE);
 			if (error != 0)
 				return (error);
 			error = ip_rsvp_vif ?
 				ip_rsvp_vif(so, sopt) : EINVAL;
 			break;
 
 		case MRT_INIT:
 		case MRT_DONE:
 		case MRT_ADD_VIF:
 		case MRT_DEL_VIF:
 		case MRT_ADD_MFC:
 		case MRT_DEL_MFC:
 		case MRT_VERSION:
 		case MRT_ASSERT:
 		case MRT_API_SUPPORT:
 		case MRT_API_CONFIG:
 		case MRT_ADD_BW_UPCALL:
 		case MRT_DEL_BW_UPCALL:
 			error = priv_check(curthread, PRIV_NETINET_MROUTE);
 			if (error != 0)
 				return (error);
 			error = ip_mrouter_set ? ip_mrouter_set(so, sopt) :
 					EOPNOTSUPP;
 			break;
 
 		default:
 			error = ip_ctloutput(so, sopt);
 			break;
 		}
 		break;
 	}
 
 	return (error);
 }
 
 /*
  * This function exists solely to receive the PRC_IFDOWN messages which are
  * sent by if_down().  It looks for an ifaddr whose ifa_addr is sa, and calls
  * in_ifadown() to remove all routes corresponding to that address.  It also
  * receives the PRC_IFUP messages from if_up() and reinstalls the interface
  * routes.
  */
 void
 rip_ctlinput(int cmd, struct sockaddr *sa, void *vip)
 {
 	struct rm_priotracker in_ifa_tracker;
 	struct in_ifaddr *ia;
 	struct ifnet *ifp;
 	int err;
 	int flags;
 
 	switch (cmd) {
 	case PRC_IFDOWN:
 		IN_IFADDR_RLOCK(&in_ifa_tracker);
 		TAILQ_FOREACH(ia, &V_in_ifaddrhead, ia_link) {
 			if (ia->ia_ifa.ifa_addr == sa
 			    && (ia->ia_flags & IFA_ROUTE)) {
 				ifa_ref(&ia->ia_ifa);
 				IN_IFADDR_RUNLOCK(&in_ifa_tracker);
 				/*
 				 * in_scrubprefix() kills the interface route.
 				 */
 				in_scrubprefix(ia, 0);
 				/*
 				 * in_ifadown gets rid of all the rest of the
 				 * routes.  This is not quite the right thing
 				 * to do, but at least if we are running a
 				 * routing process they will come back.
 				 */
 				in_ifadown(&ia->ia_ifa, 0);
 				ifa_free(&ia->ia_ifa);
 				break;
 			}
 		}
 		if (ia == NULL)		/* If ia matched, already unlocked. */
 			IN_IFADDR_RUNLOCK(&in_ifa_tracker);
 		break;
 
 	case PRC_IFUP:
 		IN_IFADDR_RLOCK(&in_ifa_tracker);
 		TAILQ_FOREACH(ia, &V_in_ifaddrhead, ia_link) {
 			if (ia->ia_ifa.ifa_addr == sa)
 				break;
 		}
 		if (ia == NULL || (ia->ia_flags & IFA_ROUTE)) {
 			IN_IFADDR_RUNLOCK(&in_ifa_tracker);
 			return;
 		}
 		ifa_ref(&ia->ia_ifa);
 		IN_IFADDR_RUNLOCK(&in_ifa_tracker);
 		flags = RTF_UP;
 		ifp = ia->ia_ifa.ifa_ifp;
 
 		if ((ifp->if_flags & IFF_LOOPBACK)
 		    || (ifp->if_flags & IFF_POINTOPOINT))
 			flags |= RTF_HOST;
 
 		err = ifa_del_loopback_route((struct ifaddr *)ia, sa);
 
 		err = rtinit(&ia->ia_ifa, RTM_ADD, flags);
 		if (err == 0)
 			ia->ia_flags |= IFA_ROUTE;
 
 		err = ifa_add_loopback_route((struct ifaddr *)ia, sa);
 
 		ifa_free(&ia->ia_ifa);
 		break;
 	}
 }
 
 static int
 rip_attach(struct socket *so, int proto, struct thread *td)
 {
 	struct inpcb *inp;
 	int error;
 
 	inp = sotoinpcb(so);
 	KASSERT(inp == NULL, ("rip_attach: inp != NULL"));
 
 	error = priv_check(td, PRIV_NETINET_RAW);
 	if (error)
 		return (error);
 	if (proto >= IPPROTO_MAX || proto < 0)
 		return EPROTONOSUPPORT;
 	error = soreserve(so, rip_sendspace, rip_recvspace);
 	if (error)
 		return (error);
 	INP_INFO_WLOCK(&V_ripcbinfo);
 	error = in_pcballoc(so, &V_ripcbinfo);
 	if (error) {
 		INP_INFO_WUNLOCK(&V_ripcbinfo);
 		return (error);
 	}
 	inp = (struct inpcb *)so->so_pcb;
 	inp->inp_vflag |= INP_IPV4;
 	inp->inp_ip_p = proto;
 	inp->inp_ip_ttl = V_ip_defttl;
 	rip_inshash(inp);
 	INP_INFO_WUNLOCK(&V_ripcbinfo);
 	INP_WUNLOCK(inp);
 	return (0);
 }
 
 static void
 rip_detach(struct socket *so)
 {
 	struct inpcb *inp;
 
 	inp = sotoinpcb(so);
 	KASSERT(inp != NULL, ("rip_detach: inp == NULL"));
 	KASSERT(inp->inp_faddr.s_addr == INADDR_ANY, 
 	    ("rip_detach: not closed"));
 
 	INP_INFO_WLOCK(&V_ripcbinfo);
 	INP_WLOCK(inp);
 	rip_delhash(inp);
 	if (so == V_ip_mrouter && ip_mrouter_done)
 		ip_mrouter_done();
 	if (ip_rsvp_force_done)
 		ip_rsvp_force_done(so);
 	if (so == V_ip_rsvpd)
 		ip_rsvp_done();
 	in_pcbdetach(inp);
 	in_pcbfree(inp);
 	INP_INFO_WUNLOCK(&V_ripcbinfo);
 }
 
 static void
 rip_dodisconnect(struct socket *so, struct inpcb *inp)
 {
 	struct inpcbinfo *pcbinfo;
 
 	pcbinfo = inp->inp_pcbinfo;
 	INP_INFO_WLOCK(pcbinfo);
 	INP_WLOCK(inp);
 	rip_delhash(inp);
 	inp->inp_faddr.s_addr = INADDR_ANY;
 	rip_inshash(inp);
 	SOCK_LOCK(so);
 	so->so_state &= ~SS_ISCONNECTED;
 	SOCK_UNLOCK(so);
 	INP_WUNLOCK(inp);
 	INP_INFO_WUNLOCK(pcbinfo);
 }
 
 static void
 rip_abort(struct socket *so)
 {
 	struct inpcb *inp;
 
 	inp = sotoinpcb(so);
 	KASSERT(inp != NULL, ("rip_abort: inp == NULL"));
 
 	rip_dodisconnect(so, inp);
 }
 
 static void
 rip_close(struct socket *so)
 {
 	struct inpcb *inp;
 
 	inp = sotoinpcb(so);
 	KASSERT(inp != NULL, ("rip_close: inp == NULL"));
 
 	rip_dodisconnect(so, inp);
 }
 
 static int
 rip_disconnect(struct socket *so)
 {
 	struct inpcb *inp;
 
 	if ((so->so_state & SS_ISCONNECTED) == 0)
 		return (ENOTCONN);
 
 	inp = sotoinpcb(so);
 	KASSERT(inp != NULL, ("rip_disconnect: inp == NULL"));
 
 	rip_dodisconnect(so, inp);
 	return (0);
 }
 
 static int
 rip_bind(struct socket *so, struct sockaddr *nam, struct thread *td)
 {
 	struct sockaddr_in *addr = (struct sockaddr_in *)nam;
 	struct inpcb *inp;
 	int error;
 
 	if (nam->sa_len != sizeof(*addr))
 		return (EINVAL);
 
 	error = prison_check_ip4(td->td_ucred, &addr->sin_addr);
 	if (error != 0)
 		return (error);
 
 	inp = sotoinpcb(so);
 	KASSERT(inp != NULL, ("rip_bind: inp == NULL"));
 
 	if (TAILQ_EMPTY(&V_ifnet) ||
 	    (addr->sin_family != AF_INET && addr->sin_family != AF_IMPLINK) ||
 	    (addr->sin_addr.s_addr &&
 	     (inp->inp_flags & INP_BINDANY) == 0 &&
 	     ifa_ifwithaddr_check((struct sockaddr *)addr) == 0))
 		return (EADDRNOTAVAIL);
 
 	INP_INFO_WLOCK(&V_ripcbinfo);
 	INP_WLOCK(inp);
 	rip_delhash(inp);
 	inp->inp_laddr = addr->sin_addr;
 	rip_inshash(inp);
 	INP_WUNLOCK(inp);
 	INP_INFO_WUNLOCK(&V_ripcbinfo);
 	return (0);
 }
 
 static int
 rip_connect(struct socket *so, struct sockaddr *nam, struct thread *td)
 {
 	struct sockaddr_in *addr = (struct sockaddr_in *)nam;
 	struct inpcb *inp;
 
 	if (nam->sa_len != sizeof(*addr))
 		return (EINVAL);
 	if (TAILQ_EMPTY(&V_ifnet))
 		return (EADDRNOTAVAIL);
 	if (addr->sin_family != AF_INET && addr->sin_family != AF_IMPLINK)
 		return (EAFNOSUPPORT);
 
 	inp = sotoinpcb(so);
 	KASSERT(inp != NULL, ("rip_connect: inp == NULL"));
 
 	INP_INFO_WLOCK(&V_ripcbinfo);
 	INP_WLOCK(inp);
 	rip_delhash(inp);
 	inp->inp_faddr = addr->sin_addr;
 	rip_inshash(inp);
 	soisconnected(so);
 	INP_WUNLOCK(inp);
 	INP_INFO_WUNLOCK(&V_ripcbinfo);
 	return (0);
 }
 
 static int
 rip_shutdown(struct socket *so)
 {
 	struct inpcb *inp;
 
 	inp = sotoinpcb(so);
 	KASSERT(inp != NULL, ("rip_shutdown: inp == NULL"));
 
 	INP_WLOCK(inp);
 	socantsendmore(so);
 	INP_WUNLOCK(inp);
 	return (0);
 }
 
 static int
 rip_send(struct socket *so, int flags, struct mbuf *m, struct sockaddr *nam,
     struct mbuf *control, struct thread *td)
 {
 	struct inpcb *inp;
 	u_long dst;
 
 	inp = sotoinpcb(so);
 	KASSERT(inp != NULL, ("rip_send: inp == NULL"));
 
 	/*
 	 * Note: 'dst' reads below are unlocked.
 	 */
 	if (so->so_state & SS_ISCONNECTED) {
 		if (nam) {
 			m_freem(m);
 			return (EISCONN);
 		}
 		dst = inp->inp_faddr.s_addr;	/* Unlocked read. */
 	} else {
 		if (nam == NULL) {
 			m_freem(m);
 			return (ENOTCONN);
 		}
 		dst = ((struct sockaddr_in *)nam)->sin_addr.s_addr;
 	}
 	return (rip_output(m, so, dst));
 }
 #endif /* INET */
 
 static int
 rip_pcblist(SYSCTL_HANDLER_ARGS)
 {
 	int error, i, n;
 	struct inpcb *inp, **inp_list;
 	inp_gen_t gencnt;
 	struct xinpgen xig;
 
 	/*
 	 * The process of preparing the TCB list is too time-consuming and
 	 * resource-intensive to repeat twice on every request.
 	 */
 	if (req->oldptr == 0) {
 		n = V_ripcbinfo.ipi_count;
 		n += imax(n / 8, 10);
 		req->oldidx = 2 * (sizeof xig) + n * sizeof(struct xinpcb);
 		return (0);
 	}
 
 	if (req->newptr != 0)
 		return (EPERM);
 
 	/*
 	 * OK, now we're committed to doing something.
 	 */
 	INP_INFO_RLOCK(&V_ripcbinfo);
 	gencnt = V_ripcbinfo.ipi_gencnt;
 	n = V_ripcbinfo.ipi_count;
 	INP_INFO_RUNLOCK(&V_ripcbinfo);
 
 	xig.xig_len = sizeof xig;
 	xig.xig_count = n;
 	xig.xig_gen = gencnt;
 	xig.xig_sogen = so_gencnt;
 	error = SYSCTL_OUT(req, &xig, sizeof xig);
 	if (error)
 		return (error);
 
 	inp_list = malloc(n * sizeof *inp_list, M_TEMP, M_WAITOK);
 	if (inp_list == NULL)
 		return (ENOMEM);
 
 	INP_INFO_RLOCK(&V_ripcbinfo);
 	for (inp = LIST_FIRST(V_ripcbinfo.ipi_listhead), i = 0; inp && i < n;
 	     inp = LIST_NEXT(inp, inp_list)) {
 		INP_WLOCK(inp);
 		if (inp->inp_gencnt <= gencnt &&
 		    cr_canseeinpcb(req->td->td_ucred, inp) == 0) {
 			in_pcbref(inp);
 			inp_list[i++] = inp;
 		}
 		INP_WUNLOCK(inp);
 	}
 	INP_INFO_RUNLOCK(&V_ripcbinfo);
 	n = i;
 
 	error = 0;
 	for (i = 0; i < n; i++) {
 		inp = inp_list[i];
 		INP_RLOCK(inp);
 		if (inp->inp_gencnt <= gencnt) {
 			struct xinpcb xi;
 
 			bzero(&xi, sizeof(xi));
 			xi.xi_len = sizeof xi;
 			/* XXX should avoid extra copy */
 			bcopy(inp, &xi.xi_inp, sizeof *inp);
 			if (inp->inp_socket)
 				sotoxsocket(inp->inp_socket, &xi.xi_socket);
 			INP_RUNLOCK(inp);
 			error = SYSCTL_OUT(req, &xi, sizeof xi);
 		} else
 			INP_RUNLOCK(inp);
 	}
 	INP_INFO_WLOCK(&V_ripcbinfo);
 	for (i = 0; i < n; i++) {
 		inp = inp_list[i];
 		INP_RLOCK(inp);
 		if (!in_pcbrele_rlocked(inp))
 			INP_RUNLOCK(inp);
 	}
 	INP_INFO_WUNLOCK(&V_ripcbinfo);
 
 	if (!error) {
 		/*
 		 * Give the user an updated idea of our state.  If the
 		 * generation differs from what we told her before, she knows
 		 * that something happened while we were processing this
 		 * request, and it might be necessary to retry.
 		 */
 		INP_INFO_RLOCK(&V_ripcbinfo);
 		xig.xig_gen = V_ripcbinfo.ipi_gencnt;
 		xig.xig_sogen = so_gencnt;
 		xig.xig_count = V_ripcbinfo.ipi_count;
 		INP_INFO_RUNLOCK(&V_ripcbinfo);
 		error = SYSCTL_OUT(req, &xig, sizeof xig);
 	}
 	free(inp_list, M_TEMP);
 	return (error);
 }
 
 SYSCTL_PROC(_net_inet_raw, OID_AUTO/*XXX*/, pcblist,
     CTLTYPE_OPAQUE | CTLFLAG_RD, NULL, 0,
     rip_pcblist, "S,xinpcb", "List of active raw IP sockets");
 
 #ifdef INET
 struct pr_usrreqs rip_usrreqs = {
 	.pru_abort =		rip_abort,
 	.pru_attach =		rip_attach,
 	.pru_bind =		rip_bind,
 	.pru_connect =		rip_connect,
 	.pru_control =		in_control,
 	.pru_detach =		rip_detach,
 	.pru_disconnect =	rip_disconnect,
 	.pru_peeraddr =		in_getpeeraddr,
 	.pru_send =		rip_send,
 	.pru_shutdown =		rip_shutdown,
 	.pru_sockaddr =		in_getsockaddr,
 	.pru_sosetlabel =	in_pcbsosetlabel,
 	.pru_close =		rip_close,
 };
 #endif /* INET */
Index: head/sys/netinet/tcp_output.c
===================================================================
--- head/sys/netinet/tcp_output.c	(revision 305823)
+++ head/sys/netinet/tcp_output.c	(revision 305824)
@@ -1,1814 +1,1814 @@
 /*-
  * Copyright (c) 1982, 1986, 1988, 1990, 1993, 1995
  *	The Regents of the University of California.  All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 4. Neither the name of the University nor the names of its contributors
  *    may be used to endorse or promote products derived from this software
  *    without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  *	@(#)tcp_output.c	8.4 (Berkeley) 5/24/95
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include "opt_inet.h"
 #include "opt_inet6.h"
 #include "opt_ipsec.h"
 #include "opt_tcpdebug.h"
 
 #include <sys/param.h>
 #include <sys/systm.h>
 #include <sys/domain.h>
 #include <sys/hhook.h>
 #include <sys/kernel.h>
 #include <sys/lock.h>
 #include <sys/mbuf.h>
 #include <sys/mutex.h>
 #include <sys/protosw.h>
 #include <sys/sdt.h>
 #include <sys/socket.h>
 #include <sys/socketvar.h>
 #include <sys/sysctl.h>
 
 #include <net/if.h>
 #include <net/route.h>
 #include <net/vnet.h>
 
 #include <netinet/in.h>
 #include <netinet/in_kdtrace.h>
 #include <netinet/in_systm.h>
 #include <netinet/ip.h>
 #include <netinet/in_pcb.h>
 #include <netinet/ip_var.h>
 #include <netinet/ip_options.h>
 #ifdef INET6
 #include <netinet6/in6_pcb.h>
 #include <netinet/ip6.h>
 #include <netinet6/ip6_var.h>
 #endif
 #ifdef TCP_RFC7413
 #include <netinet/tcp_fastopen.h>
 #endif
 #include <netinet/tcp.h>
 #define	TCPOUTFLAGS
 #include <netinet/tcp_fsm.h>
 #include <netinet/tcp_seq.h>
 #include <netinet/tcp_timer.h>
 #include <netinet/tcp_var.h>
 #include <netinet/tcpip.h>
 #include <netinet/cc/cc.h>
 #ifdef TCPPCAP
 #include <netinet/tcp_pcap.h>
 #endif
 #ifdef TCPDEBUG
 #include <netinet/tcp_debug.h>
 #endif
 #ifdef TCP_OFFLOAD
 #include <netinet/tcp_offload.h>
 #endif
 
 #ifdef IPSEC
 #include <netipsec/ipsec.h>
 #endif /*IPSEC*/
 
 #include <machine/in_cksum.h>
 
 #include <security/mac/mac_framework.h>
 
 VNET_DEFINE(int, path_mtu_discovery) = 1;
 SYSCTL_INT(_net_inet_tcp, OID_AUTO, path_mtu_discovery, CTLFLAG_VNET | CTLFLAG_RW,
 	&VNET_NAME(path_mtu_discovery), 1,
 	"Enable Path MTU Discovery");
 
 VNET_DEFINE(int, tcp_do_tso) = 1;
 #define	V_tcp_do_tso		VNET(tcp_do_tso)
 SYSCTL_INT(_net_inet_tcp, OID_AUTO, tso, CTLFLAG_VNET | CTLFLAG_RW,
 	&VNET_NAME(tcp_do_tso), 0,
 	"Enable TCP Segmentation Offload");
 
 VNET_DEFINE(int, tcp_sendspace) = 1024*32;
 #define	V_tcp_sendspace	VNET(tcp_sendspace)
 SYSCTL_INT(_net_inet_tcp, TCPCTL_SENDSPACE, sendspace, CTLFLAG_VNET | CTLFLAG_RW,
 	&VNET_NAME(tcp_sendspace), 0, "Initial send socket buffer size");
 
 VNET_DEFINE(int, tcp_do_autosndbuf) = 1;
 #define	V_tcp_do_autosndbuf	VNET(tcp_do_autosndbuf)
 SYSCTL_INT(_net_inet_tcp, OID_AUTO, sendbuf_auto, CTLFLAG_VNET | CTLFLAG_RW,
 	&VNET_NAME(tcp_do_autosndbuf), 0,
 	"Enable automatic send buffer sizing");
 
 VNET_DEFINE(int, tcp_autosndbuf_inc) = 8*1024;
 #define	V_tcp_autosndbuf_inc	VNET(tcp_autosndbuf_inc)
 SYSCTL_INT(_net_inet_tcp, OID_AUTO, sendbuf_inc, CTLFLAG_VNET | CTLFLAG_RW,
 	&VNET_NAME(tcp_autosndbuf_inc), 0,
 	"Incrementor step size of automatic send buffer");
 
 VNET_DEFINE(int, tcp_autosndbuf_max) = 2*1024*1024;
 #define	V_tcp_autosndbuf_max	VNET(tcp_autosndbuf_max)
 SYSCTL_INT(_net_inet_tcp, OID_AUTO, sendbuf_max, CTLFLAG_VNET | CTLFLAG_RW,
 	&VNET_NAME(tcp_autosndbuf_max), 0,
 	"Max size of automatic send buffer");
 
 /*
  * Make sure that either retransmit or persist timer is set for SYN, FIN and
  * non-ACK.
  */
 #define TCP_XMIT_TIMER_ASSERT(tp, len, th_flags)			\
 	KASSERT(((len) == 0 && ((th_flags) & (TH_SYN | TH_FIN)) == 0) ||\
 	    tcp_timer_active((tp), TT_REXMT) ||				\
 	    tcp_timer_active((tp), TT_PERSIST),				\
 	    ("neither rexmt nor persist timer is set"))
 
 static void inline	hhook_run_tcp_est_out(struct tcpcb *tp,
 			    struct tcphdr *th, struct tcpopt *to,
 			    long len, int tso);
 static void inline	cc_after_idle(struct tcpcb *tp);
 
 /*
  * Wrapper for the TCP established output helper hook.
  */
 static void inline
 hhook_run_tcp_est_out(struct tcpcb *tp, struct tcphdr *th,
     struct tcpopt *to, long len, int tso)
 {
 	struct tcp_hhook_data hhook_data;
 
 	if (V_tcp_hhh[HHOOK_TCP_EST_OUT]->hhh_nhooks > 0) {
 		hhook_data.tp = tp;
 		hhook_data.th = th;
 		hhook_data.to = to;
 		hhook_data.len = len;
 		hhook_data.tso = tso;
 
 		hhook_run_hooks(V_tcp_hhh[HHOOK_TCP_EST_OUT], &hhook_data,
 		    tp->osd);
 	}
 }
 
 /*
  * CC wrapper hook functions
  */
 static void inline
 cc_after_idle(struct tcpcb *tp)
 {
 	INP_WLOCK_ASSERT(tp->t_inpcb);
 
 	if (CC_ALGO(tp)->after_idle != NULL)
 		CC_ALGO(tp)->after_idle(tp->ccv);
 }
 
 /*
  * Tcp output routine: figure out what should be sent and send it.
  */
 int
 tcp_output(struct tcpcb *tp)
 {
 	struct socket *so = tp->t_inpcb->inp_socket;
 	long len, recwin, sendwin;
 	int off, flags, error = 0;	/* Keep compiler happy */
 	struct mbuf *m;
 	struct ip *ip = NULL;
 	struct ipovly *ipov = NULL;
 	struct tcphdr *th;
 	u_char opt[TCP_MAXOLEN];
 	unsigned ipoptlen, optlen, hdrlen;
 #ifdef IPSEC
 	unsigned ipsec_optlen = 0;
 #endif
 	int idle, sendalot;
 	int sack_rxmit, sack_bytes_rxmt;
 	struct sackhole *p;
 	int tso, mtu;
 	struct tcpopt to;
 #if 0
 	int maxburst = TCP_MAXBURST;
 #endif
 #ifdef INET6
 	struct ip6_hdr *ip6 = NULL;
 	int isipv6;
 
 	isipv6 = (tp->t_inpcb->inp_vflag & INP_IPV6) != 0;
 #endif
 
 	INP_WLOCK_ASSERT(tp->t_inpcb);
 
 #ifdef TCP_OFFLOAD
 	if (tp->t_flags & TF_TOE)
 		return (tcp_offload_output(tp));
 #endif
 
 #ifdef TCP_RFC7413
 	/*
 	 * For TFO connections in SYN_RECEIVED, only allow the initial
 	 * SYN|ACK and those sent by the retransmit timer.
 	 */
 	if ((tp->t_flags & TF_FASTOPEN) &&
 	    (tp->t_state == TCPS_SYN_RECEIVED) &&
 	    SEQ_GT(tp->snd_max, tp->snd_una) &&    /* initial SYN|ACK sent */
 	    (tp->snd_nxt != tp->snd_una))          /* not a retransmit */
 		return (0);
 #endif
 	/*
 	 * Determine length of data that should be transmitted,
 	 * and flags that will be used.
 	 * If there is some data or critical controls (SYN, RST)
 	 * to send, then transmit; otherwise, investigate further.
 	 */
 	idle = (tp->t_flags & TF_LASTIDLE) || (tp->snd_max == tp->snd_una);
 	if (idle && ticks - tp->t_rcvtime >= tp->t_rxtcur)
 		cc_after_idle(tp);
 	tp->t_flags &= ~TF_LASTIDLE;
 	if (idle) {
 		if (tp->t_flags & TF_MORETOCOME) {
 			tp->t_flags |= TF_LASTIDLE;
 			idle = 0;
 		}
 	}
 again:
 	/*
 	 * If we've recently taken a timeout, snd_max will be greater than
 	 * snd_nxt.  There may be SACK information that allows us to avoid
 	 * resending already delivered data.  Adjust snd_nxt accordingly.
 	 */
 	if ((tp->t_flags & TF_SACK_PERMIT) &&
 	    SEQ_LT(tp->snd_nxt, tp->snd_max))
 		tcp_sack_adjust(tp);
 	sendalot = 0;
 	tso = 0;
 	mtu = 0;
 	off = tp->snd_nxt - tp->snd_una;
 	sendwin = min(tp->snd_wnd, tp->snd_cwnd);
 
 	flags = tcp_outflags[tp->t_state];
 	/*
 	 * Send any SACK-generated retransmissions.  If we're explicitly trying
 	 * to send out new data (when sendalot is 1), bypass this function.
 	 * If we retransmit in fast recovery mode, decrement snd_cwnd, since
 	 * we're replacing a (future) new transmission with a retransmission
 	 * now, and we previously incremented snd_cwnd in tcp_input().
 	 */
 	/*
 	 * Still in sack recovery , reset rxmit flag to zero.
 	 */
 	sack_rxmit = 0;
 	sack_bytes_rxmt = 0;
 	len = 0;
 	p = NULL;
 	if ((tp->t_flags & TF_SACK_PERMIT) && IN_FASTRECOVERY(tp->t_flags) &&
 	    (p = tcp_sack_output(tp, &sack_bytes_rxmt))) {
 		long cwin;
 		
 		cwin = min(tp->snd_wnd, tp->snd_cwnd) - sack_bytes_rxmt;
 		if (cwin < 0)
 			cwin = 0;
 		/* Do not retransmit SACK segments beyond snd_recover */
 		if (SEQ_GT(p->end, tp->snd_recover)) {
 			/*
 			 * (At least) part of sack hole extends beyond
 			 * snd_recover. Check to see if we can rexmit data
 			 * for this hole.
 			 */
 			if (SEQ_GEQ(p->rxmit, tp->snd_recover)) {
 				/*
 				 * Can't rexmit any more data for this hole.
 				 * That data will be rexmitted in the next
 				 * sack recovery episode, when snd_recover
 				 * moves past p->rxmit.
 				 */
 				p = NULL;
 				goto after_sack_rexmit;
 			} else
 				/* Can rexmit part of the current hole */
 				len = ((long)ulmin(cwin,
 						   tp->snd_recover - p->rxmit));
 		} else
 			len = ((long)ulmin(cwin, p->end - p->rxmit));
 		off = p->rxmit - tp->snd_una;
 		KASSERT(off >= 0,("%s: sack block to the left of una : %d",
 		    __func__, off));
 		if (len > 0) {
 			sack_rxmit = 1;
 			sendalot = 1;
 			TCPSTAT_INC(tcps_sack_rexmits);
 			TCPSTAT_ADD(tcps_sack_rexmit_bytes,
 			    min(len, tp->t_maxseg));
 		}
 	}
 after_sack_rexmit:
 	/*
 	 * Get standard flags, and add SYN or FIN if requested by 'hidden'
 	 * state flags.
 	 */
 	if (tp->t_flags & TF_NEEDFIN)
 		flags |= TH_FIN;
 	if (tp->t_flags & TF_NEEDSYN)
 		flags |= TH_SYN;
 
 	SOCKBUF_LOCK(&so->so_snd);
 	/*
 	 * If in persist timeout with window of 0, send 1 byte.
 	 * Otherwise, if window is small but nonzero
 	 * and timer expired, we will send what we can
 	 * and go to transmit state.
 	 */
 	if (tp->t_flags & TF_FORCEDATA) {
 		if (sendwin == 0) {
 			/*
 			 * If we still have some data to send, then
 			 * clear the FIN bit.  Usually this would
 			 * happen below when it realizes that we
 			 * aren't sending all the data.  However,
 			 * if we have exactly 1 byte of unsent data,
 			 * then it won't clear the FIN bit below,
 			 * and if we are in persist state, we wind
 			 * up sending the packet without recording
 			 * that we sent the FIN bit.
 			 *
 			 * We can't just blindly clear the FIN bit,
 			 * because if we don't have any more data
 			 * to send then the probe will be the FIN
 			 * itself.
 			 */
 			if (off < sbused(&so->so_snd))
 				flags &= ~TH_FIN;
 			sendwin = 1;
 		} else {
 			tcp_timer_activate(tp, TT_PERSIST, 0);
 			tp->t_rxtshift = 0;
 		}
 	}
 
 	/*
 	 * If snd_nxt == snd_max and we have transmitted a FIN, the
 	 * offset will be > 0 even if so_snd.sb_cc is 0, resulting in
 	 * a negative length.  This can also occur when TCP opens up
 	 * its congestion window while receiving additional duplicate
 	 * acks after fast-retransmit because TCP will reset snd_nxt
 	 * to snd_max after the fast-retransmit.
 	 *
 	 * In the normal retransmit-FIN-only case, however, snd_nxt will
 	 * be set to snd_una, the offset will be 0, and the length may
 	 * wind up 0.
 	 *
 	 * If sack_rxmit is true we are retransmitting from the scoreboard
 	 * in which case len is already set.
 	 */
 	if (sack_rxmit == 0) {
 		if (sack_bytes_rxmt == 0)
 			len = ((long)ulmin(sbavail(&so->so_snd), sendwin) -
 			    off);
 		else {
 			long cwin;
 
                         /*
 			 * We are inside of a SACK recovery episode and are
 			 * sending new data, having retransmitted all the
 			 * data possible in the scoreboard.
 			 */
 			len = ((long)ulmin(sbavail(&so->so_snd), tp->snd_wnd) -
 			    off);
 			/*
 			 * Don't remove this (len > 0) check !
 			 * We explicitly check for len > 0 here (although it 
 			 * isn't really necessary), to work around a gcc 
 			 * optimization issue - to force gcc to compute
 			 * len above. Without this check, the computation
 			 * of len is bungled by the optimizer.
 			 */
 			if (len > 0) {
 				cwin = tp->snd_cwnd - 
 					(tp->snd_nxt - tp->sack_newdata) -
 					sack_bytes_rxmt;
 				if (cwin < 0)
 					cwin = 0;
 				len = lmin(len, cwin);
 			}
 		}
 	}
 
 	/*
 	 * Lop off SYN bit if it has already been sent.  However, if this
 	 * is SYN-SENT state and if segment contains data and if we don't
 	 * know that foreign host supports TAO, suppress sending segment.
 	 */
 	if ((flags & TH_SYN) && SEQ_GT(tp->snd_nxt, tp->snd_una)) {
 		if (tp->t_state != TCPS_SYN_RECEIVED)
 			flags &= ~TH_SYN;
 #ifdef TCP_RFC7413
 		/*
 		 * When sending additional segments following a TFO SYN|ACK,
 		 * do not include the SYN bit.
 		 */
 		if ((tp->t_flags & TF_FASTOPEN) &&
 		    (tp->t_state == TCPS_SYN_RECEIVED))
 			flags &= ~TH_SYN;
 #endif
 		off--, len++;
 	}
 
 	/*
 	 * Be careful not to send data and/or FIN on SYN segments.
 	 * This measure is needed to prevent interoperability problems
 	 * with not fully conformant TCP implementations.
 	 */
 	if ((flags & TH_SYN) && (tp->t_flags & TF_NOOPT)) {
 		len = 0;
 		flags &= ~TH_FIN;
 	}
 
 #ifdef TCP_RFC7413
 	/*
 	 * When retransmitting SYN|ACK on a passively-created TFO socket,
 	 * don't include data, as the presence of data may have caused the
 	 * original SYN|ACK to have been dropped by a middlebox.
 	 */
 	if ((tp->t_flags & TF_FASTOPEN) &&
 	    (((tp->t_state == TCPS_SYN_RECEIVED) && (tp->t_rxtshift > 0)) ||
 	     (flags & TH_RST)))
 		len = 0;
 #endif
 	if (len <= 0) {
 		/*
 		 * If FIN has been sent but not acked,
 		 * but we haven't been called to retransmit,
 		 * len will be < 0.  Otherwise, window shrank
 		 * after we sent into it.  If window shrank to 0,
 		 * cancel pending retransmit, pull snd_nxt back
 		 * to (closed) window, and set the persist timer
 		 * if it isn't already going.  If the window didn't
 		 * close completely, just wait for an ACK.
 		 *
 		 * We also do a general check here to ensure that
 		 * we will set the persist timer when we have data
 		 * to send, but a 0-byte window. This makes sure
 		 * the persist timer is set even if the packet
 		 * hits one of the "goto send" lines below.
 		 */
 		len = 0;
 		if ((sendwin == 0) && (TCPS_HAVEESTABLISHED(tp->t_state)) &&
 			(off < (int) sbavail(&so->so_snd))) {
 			tcp_timer_activate(tp, TT_REXMT, 0);
 			tp->t_rxtshift = 0;
 			tp->snd_nxt = tp->snd_una;
 			if (!tcp_timer_active(tp, TT_PERSIST))
 				tcp_setpersist(tp);
 		}
 	}
 
 	/* len will be >= 0 after this point. */
 	KASSERT(len >= 0, ("[%s:%d]: len < 0", __func__, __LINE__));
 
 	/*
 	 * Automatic sizing of send socket buffer.  Often the send buffer
 	 * size is not optimally adjusted to the actual network conditions
 	 * at hand (delay bandwidth product).  Setting the buffer size too
 	 * small limits throughput on links with high bandwidth and high
 	 * delay (eg. trans-continental/oceanic links).  Setting the
 	 * buffer size too big consumes too much real kernel memory,
 	 * especially with many connections on busy servers.
 	 *
 	 * The criteria to step up the send buffer one notch are:
 	 *  1. receive window of remote host is larger than send buffer
 	 *     (with a fudge factor of 5/4th);
 	 *  2. send buffer is filled to 7/8th with data (so we actually
 	 *     have data to make use of it);
 	 *  3. send buffer fill has not hit maximal automatic size;
 	 *  4. our send window (slow start and cogestion controlled) is
 	 *     larger than sent but unacknowledged data in send buffer.
 	 *
 	 * The remote host receive window scaling factor may limit the
 	 * growing of the send buffer before it reaches its allowed
 	 * maximum.
 	 *
 	 * It scales directly with slow start or congestion window
 	 * and does at most one step per received ACK.  This fast
 	 * scaling has the drawback of growing the send buffer beyond
 	 * what is strictly necessary to make full use of a given
 	 * delay*bandwidth product.  However testing has shown this not
 	 * to be much of an problem.  At worst we are trading wasting
 	 * of available bandwidth (the non-use of it) for wasting some
 	 * socket buffer memory.
 	 *
 	 * TODO: Shrink send buffer during idle periods together
 	 * with congestion window.  Requires another timer.  Has to
 	 * wait for upcoming tcp timer rewrite.
 	 *
 	 * XXXGL: should there be used sbused() or sbavail()?
 	 */
 	if (V_tcp_do_autosndbuf && so->so_snd.sb_flags & SB_AUTOSIZE) {
 		if ((tp->snd_wnd / 4 * 5) >= so->so_snd.sb_hiwat &&
 		    sbused(&so->so_snd) >= (so->so_snd.sb_hiwat / 8 * 7) &&
 		    sbused(&so->so_snd) < V_tcp_autosndbuf_max &&
 		    sendwin >= (sbused(&so->so_snd) -
 		    (tp->snd_nxt - tp->snd_una))) {
 			if (!sbreserve_locked(&so->so_snd,
 			    min(so->so_snd.sb_hiwat + V_tcp_autosndbuf_inc,
 			     V_tcp_autosndbuf_max), so, curthread))
 				so->so_snd.sb_flags &= ~SB_AUTOSIZE;
 		}
 	}
 
 	/*
 	 * Decide if we can use TCP Segmentation Offloading (if supported by
 	 * hardware).
 	 *
 	 * TSO may only be used if we are in a pure bulk sending state.  The
 	 * presence of TCP-MD5, SACK retransmits, SACK advertizements and
 	 * IP options prevent using TSO.  With TSO the TCP header is the same
 	 * (except for the sequence number) for all generated packets.  This
 	 * makes it impossible to transmit any options which vary per generated
 	 * segment or packet.
 	 */
 #ifdef IPSEC
 	/*
 	 * Pre-calculate here as we save another lookup into the darknesses
 	 * of IPsec that way and can actually decide if TSO is ok.
 	 */
 	ipsec_optlen = ipsec_hdrsiz_tcp(tp);
 #endif
 	if ((tp->t_flags & TF_TSO) && V_tcp_do_tso && len > tp->t_maxseg &&
 	    ((tp->t_flags & TF_SIGNATURE) == 0) &&
 	    tp->rcv_numsacks == 0 && sack_rxmit == 0 &&
 #ifdef IPSEC
 	    ipsec_optlen == 0 &&
 #endif
 	    tp->t_inpcb->inp_options == NULL &&
 	    tp->t_inpcb->in6p_options == NULL)
 		tso = 1;
 
 	if (sack_rxmit) {
 		if (SEQ_LT(p->rxmit + len, tp->snd_una + sbused(&so->so_snd)))
 			flags &= ~TH_FIN;
 	} else {
 		if (SEQ_LT(tp->snd_nxt + len, tp->snd_una +
 		    sbused(&so->so_snd)))
 			flags &= ~TH_FIN;
 	}
 
 	recwin = sbspace(&so->so_rcv);
 
 	/*
 	 * Sender silly window avoidance.   We transmit under the following
 	 * conditions when len is non-zero:
 	 *
 	 *	- We have a full segment (or more with TSO)
 	 *	- This is the last buffer in a write()/send() and we are
 	 *	  either idle or running NODELAY
 	 *	- we've timed out (e.g. persist timer)
 	 *	- we have more then 1/2 the maximum send window's worth of
 	 *	  data (receiver may be limited the window size)
 	 *	- we need to retransmit
 	 */
 	if (len) {
 		if (len >= tp->t_maxseg)
 			goto send;
 		/*
 		 * NOTE! on localhost connections an 'ack' from the remote
 		 * end may occur synchronously with the output and cause
 		 * us to flush a buffer queued with moretocome.  XXX
 		 *
 		 * note: the len + off check is almost certainly unnecessary.
 		 */
 		if (!(tp->t_flags & TF_MORETOCOME) &&	/* normal case */
 		    (idle || (tp->t_flags & TF_NODELAY)) &&
 		    len + off >= sbavail(&so->so_snd) &&
 		    (tp->t_flags & TF_NOPUSH) == 0) {
 			goto send;
 		}
 		if (tp->t_flags & TF_FORCEDATA)		/* typ. timeout case */
 			goto send;
 		if (len >= tp->max_sndwnd / 2 && tp->max_sndwnd > 0)
 			goto send;
 		if (SEQ_LT(tp->snd_nxt, tp->snd_max))	/* retransmit case */
 			goto send;
 		if (sack_rxmit)
 			goto send;
 	}
 
 	/*
 	 * Sending of standalone window updates.
 	 *
 	 * Window updates are important when we close our window due to a
 	 * full socket buffer and are opening it again after the application
 	 * reads data from it.  Once the window has opened again and the
 	 * remote end starts to send again the ACK clock takes over and
 	 * provides the most current window information.
 	 *
 	 * We must avoid the silly window syndrome whereas every read
 	 * from the receive buffer, no matter how small, causes a window
 	 * update to be sent.  We also should avoid sending a flurry of
 	 * window updates when the socket buffer had queued a lot of data
 	 * and the application is doing small reads.
 	 *
 	 * Prevent a flurry of pointless window updates by only sending
 	 * an update when we can increase the advertized window by more
 	 * than 1/4th of the socket buffer capacity.  When the buffer is
 	 * getting full or is very small be more aggressive and send an
 	 * update whenever we can increase by two mss sized segments.
 	 * In all other situations the ACK's to new incoming data will
 	 * carry further window increases.
 	 *
 	 * Don't send an independent window update if a delayed
 	 * ACK is pending (it will get piggy-backed on it) or the
 	 * remote side already has done a half-close and won't send
 	 * more data.  Skip this if the connection is in T/TCP
 	 * half-open state.
 	 */
 	if (recwin > 0 && !(tp->t_flags & TF_NEEDSYN) &&
 	    !(tp->t_flags & TF_DELACK) &&
 	    !TCPS_HAVERCVDFIN(tp->t_state)) {
 		/*
 		 * "adv" is the amount we could increase the window,
 		 * taking into account that we are limited by
 		 * TCP_MAXWIN << tp->rcv_scale.
 		 */
 		long adv;
 		int oldwin;
 
 		adv = min(recwin, (long)TCP_MAXWIN << tp->rcv_scale);
 		if (SEQ_GT(tp->rcv_adv, tp->rcv_nxt)) {
 			oldwin = (tp->rcv_adv - tp->rcv_nxt);
 			adv -= oldwin;
 		} else
 			oldwin = 0;
 
 		/* 
 		 * If the new window size ends up being the same as the old
 		 * size when it is scaled, then don't force a window update.
 		 */
 		if (oldwin >> tp->rcv_scale == (adv + oldwin) >> tp->rcv_scale)
 			goto dontupdate;
 
 		if (adv >= (long)(2 * tp->t_maxseg) &&
 		    (adv >= (long)(so->so_rcv.sb_hiwat / 4) ||
 		     recwin <= (long)(so->so_rcv.sb_hiwat / 8) ||
 		     so->so_rcv.sb_hiwat <= 8 * tp->t_maxseg))
 			goto send;
 	}
 dontupdate:
 
 	/*
 	 * Send if we owe the peer an ACK, RST, SYN, or urgent data.  ACKNOW
 	 * is also a catch-all for the retransmit timer timeout case.
 	 */
 	if (tp->t_flags & TF_ACKNOW)
 		goto send;
 	if ((flags & TH_RST) ||
 	    ((flags & TH_SYN) && (tp->t_flags & TF_NEEDSYN) == 0))
 		goto send;
 	if (SEQ_GT(tp->snd_up, tp->snd_una))
 		goto send;
 	/*
 	 * If our state indicates that FIN should be sent
 	 * and we have not yet done so, then we need to send.
 	 */
 	if (flags & TH_FIN &&
 	    ((tp->t_flags & TF_SENTFIN) == 0 || tp->snd_nxt == tp->snd_una))
 		goto send;
 	/*
 	 * In SACK, it is possible for tcp_output to fail to send a segment
 	 * after the retransmission timer has been turned off.  Make sure
 	 * that the retransmission timer is set.
 	 */
 	if ((tp->t_flags & TF_SACK_PERMIT) &&
 	    SEQ_GT(tp->snd_max, tp->snd_una) &&
 	    !tcp_timer_active(tp, TT_REXMT) &&
 	    !tcp_timer_active(tp, TT_PERSIST)) {
 		tcp_timer_activate(tp, TT_REXMT, tp->t_rxtcur);
 		goto just_return;
 	} 
 	/*
 	 * TCP window updates are not reliable, rather a polling protocol
 	 * using ``persist'' packets is used to insure receipt of window
 	 * updates.  The three ``states'' for the output side are:
 	 *	idle			not doing retransmits or persists
 	 *	persisting		to move a small or zero window
 	 *	(re)transmitting	and thereby not persisting
 	 *
 	 * tcp_timer_active(tp, TT_PERSIST)
 	 *	is true when we are in persist state.
 	 * (tp->t_flags & TF_FORCEDATA)
 	 *	is set when we are called to send a persist packet.
 	 * tcp_timer_active(tp, TT_REXMT)
 	 *	is set when we are retransmitting
 	 * The output side is idle when both timers are zero.
 	 *
 	 * If send window is too small, there is data to transmit, and no
 	 * retransmit or persist is pending, then go to persist state.
 	 * If nothing happens soon, send when timer expires:
 	 * if window is nonzero, transmit what we can,
 	 * otherwise force out a byte.
 	 */
 	if (sbavail(&so->so_snd) && !tcp_timer_active(tp, TT_REXMT) &&
 	    !tcp_timer_active(tp, TT_PERSIST)) {
 		tp->t_rxtshift = 0;
 		tcp_setpersist(tp);
 	}
 
 	/*
 	 * No reason to send a segment, just return.
 	 */
 just_return:
 	SOCKBUF_UNLOCK(&so->so_snd);
 	return (0);
 
 send:
 	SOCKBUF_LOCK_ASSERT(&so->so_snd);
 	if (len > 0) {
 		if (len >= tp->t_maxseg)
 			tp->t_flags2 |= TF2_PLPMTU_MAXSEGSNT;
 		else
 			tp->t_flags2 &= ~TF2_PLPMTU_MAXSEGSNT;
 	}
 	/*
 	 * Before ESTABLISHED, force sending of initial options
 	 * unless TCP set not to do any options.
 	 * NOTE: we assume that the IP/TCP header plus TCP options
 	 * always fit in a single mbuf, leaving room for a maximum
 	 * link header, i.e.
 	 *	max_linkhdr + sizeof (struct tcpiphdr) + optlen <= MCLBYTES
 	 */
 	optlen = 0;
 #ifdef INET6
 	if (isipv6)
 		hdrlen = sizeof (struct ip6_hdr) + sizeof (struct tcphdr);
 	else
 #endif
 		hdrlen = sizeof (struct tcpiphdr);
 
 	/*
 	 * Compute options for segment.
 	 * We only have to care about SYN and established connection
 	 * segments.  Options for SYN-ACK segments are handled in TCP
 	 * syncache.
 	 */
 	to.to_flags = 0;
 	if ((tp->t_flags & TF_NOOPT) == 0) {
 		/* Maximum segment size. */
 		if (flags & TH_SYN) {
 			tp->snd_nxt = tp->iss;
 			to.to_mss = tcp_mssopt(&tp->t_inpcb->inp_inc);
 			to.to_flags |= TOF_MSS;
 #ifdef TCP_RFC7413
 			/*
 			 * Only include the TFO option on the first
 			 * transmission of the SYN|ACK on a
 			 * passively-created TFO socket, as the presence of
 			 * the TFO option may have caused the original
 			 * SYN|ACK to have been dropped by a middlebox.
 			 */
 			if ((tp->t_flags & TF_FASTOPEN) &&
 			    (tp->t_state == TCPS_SYN_RECEIVED) &&
 			    (tp->t_rxtshift == 0)) {
 				to.to_tfo_len = TCP_FASTOPEN_COOKIE_LEN;
 				to.to_tfo_cookie = (u_char *)&tp->t_tfo_cookie;
 				to.to_flags |= TOF_FASTOPEN;
 			}
 #endif
 		}
 		/* Window scaling. */
 		if ((flags & TH_SYN) && (tp->t_flags & TF_REQ_SCALE)) {
 			to.to_wscale = tp->request_r_scale;
 			to.to_flags |= TOF_SCALE;
 		}
 		/* Timestamps. */
 		if ((tp->t_flags & TF_RCVD_TSTMP) ||
 		    ((flags & TH_SYN) && (tp->t_flags & TF_REQ_TSTMP))) {
 			to.to_tsval = tcp_ts_getticks() + tp->ts_offset;
 			to.to_tsecr = tp->ts_recent;
 			to.to_flags |= TOF_TS;
 			/* Set receive buffer autosizing timestamp. */
 			if (tp->rfbuf_ts == 0 &&
 			    (so->so_rcv.sb_flags & SB_AUTOSIZE))
 				tp->rfbuf_ts = tcp_ts_getticks();
 		}
 		/* Selective ACK's. */
 		if (tp->t_flags & TF_SACK_PERMIT) {
 			if (flags & TH_SYN)
 				to.to_flags |= TOF_SACKPERM;
 			else if (TCPS_HAVEESTABLISHED(tp->t_state) &&
 			    (tp->t_flags & TF_SACK_PERMIT) &&
 			    tp->rcv_numsacks > 0) {
 				to.to_flags |= TOF_SACK;
 				to.to_nsacks = tp->rcv_numsacks;
 				to.to_sacks = (u_char *)tp->sackblks;
 			}
 		}
 #ifdef TCP_SIGNATURE
 		/* TCP-MD5 (RFC2385). */
 		if (tp->t_flags & TF_SIGNATURE)
 			to.to_flags |= TOF_SIGNATURE;
 #endif /* TCP_SIGNATURE */
 
 		/* Processing the options. */
 		hdrlen += optlen = tcp_addoptions(&to, opt);
 	}
 
 #ifdef INET6
 	if (isipv6)
 		ipoptlen = ip6_optlen(tp->t_inpcb);
 	else
 #endif
 	if (tp->t_inpcb->inp_options)
 		ipoptlen = tp->t_inpcb->inp_options->m_len -
 				offsetof(struct ipoption, ipopt_list);
 	else
 		ipoptlen = 0;
 #ifdef IPSEC
 	ipoptlen += ipsec_optlen;
 #endif
 
 	/*
 	 * Adjust data length if insertion of options will
 	 * bump the packet length beyond the t_maxseg length.
 	 * Clear the FIN bit because we cut off the tail of
 	 * the segment.
 	 */
 	if (len + optlen + ipoptlen > tp->t_maxseg) {
 		flags &= ~TH_FIN;
 
 		if (tso) {
 			u_int if_hw_tsomax;
 			u_int if_hw_tsomaxsegcount;
 			u_int if_hw_tsomaxsegsize;
 			struct mbuf *mb;
 			u_int moff;
 			int max_len;
 
 			/* extract TSO information */
 			if_hw_tsomax = tp->t_tsomax;
 			if_hw_tsomaxsegcount = tp->t_tsomaxsegcount;
 			if_hw_tsomaxsegsize = tp->t_tsomaxsegsize;
 
 			/*
 			 * Limit a TSO burst to prevent it from
 			 * overflowing or exceeding the maximum length
 			 * allowed by the network interface:
 			 */
 			KASSERT(ipoptlen == 0,
 			    ("%s: TSO can't do IP options", __func__));
 
 			/*
 			 * Check if we should limit by maximum payload
 			 * length:
 			 */
 			if (if_hw_tsomax != 0) {
 				/* compute maximum TSO length */
 				max_len = (if_hw_tsomax - hdrlen -
 				    max_linkhdr);
 				if (max_len <= 0) {
 					len = 0;
 				} else if (len > max_len) {
 					sendalot = 1;
 					len = max_len;
 				}
 			}
 
 			/*
 			 * Check if we should limit by maximum segment
 			 * size and count:
 			 */
 			if (if_hw_tsomaxsegcount != 0 &&
 			    if_hw_tsomaxsegsize != 0) {
 				/*
 				 * Subtract one segment for the LINK
 				 * and TCP/IP headers mbuf that will
 				 * be prepended to this mbuf chain
 				 * after the code in this section
 				 * limits the number of mbufs in the
 				 * chain to if_hw_tsomaxsegcount.
 				 */
 				if_hw_tsomaxsegcount -= 1;
 				max_len = 0;
 				mb = sbsndmbuf(&so->so_snd, off, &moff);
 
 				while (mb != NULL && max_len < len) {
 					u_int mlen;
 					u_int frags;
 
 					/*
 					 * Get length of mbuf fragment
 					 * and how many hardware frags,
 					 * rounded up, it would use:
 					 */
 					mlen = (mb->m_len - moff);
 					frags = howmany(mlen,
 					    if_hw_tsomaxsegsize);
 
 					/* Handle special case: Zero Length Mbuf */
 					if (frags == 0)
 						frags = 1;
 
 					/*
 					 * Check if the fragment limit
 					 * will be reached or exceeded:
 					 */
 					if (frags >= if_hw_tsomaxsegcount) {
 						max_len += min(mlen,
 						    if_hw_tsomaxsegcount *
 						    if_hw_tsomaxsegsize);
 						break;
 					}
 					max_len += mlen;
 					if_hw_tsomaxsegcount -= frags;
 					moff = 0;
 					mb = mb->m_next;
 				}
 				if (max_len <= 0) {
 					len = 0;
 				} else if (len > max_len) {
 					sendalot = 1;
 					len = max_len;
 				}
 			}
 
 			/*
 			 * Prevent the last segment from being
 			 * fractional unless the send sockbuf can be
 			 * emptied:
 			 */
 			max_len = (tp->t_maxseg - optlen);
 			if ((off + len) < sbavail(&so->so_snd)) {
 				moff = len % max_len;
 				if (moff != 0) {
 					len -= moff;
 					sendalot = 1;
 				}
 			}
 
 			/*
 			 * In case there are too many small fragments
 			 * don't use TSO:
 			 */
 			if (len <= max_len) {
 				len = max_len;
 				sendalot = 1;
 				tso = 0;
 			}
 
 			/*
 			 * Send the FIN in a separate segment
 			 * after the bulk sending is done.
 			 * We don't trust the TSO implementations
 			 * to clear the FIN flag on all but the
 			 * last segment.
 			 */
 			if (tp->t_flags & TF_NEEDFIN)
 				sendalot = 1;
 
 		} else {
 			len = tp->t_maxseg - optlen - ipoptlen;
 			sendalot = 1;
 		}
 	} else
 		tso = 0;
 
 	KASSERT(len + hdrlen + ipoptlen <= IP_MAXPACKET,
 	    ("%s: len > IP_MAXPACKET", __func__));
 
 /*#ifdef DIAGNOSTIC*/
 #ifdef INET6
 	if (max_linkhdr + hdrlen > MCLBYTES)
 #else
 	if (max_linkhdr + hdrlen > MHLEN)
 #endif
 		panic("tcphdr too big");
 /*#endif*/
 
 	/*
 	 * This KASSERT is here to catch edge cases at a well defined place.
 	 * Before, those had triggered (random) panic conditions further down.
 	 */
 	KASSERT(len >= 0, ("[%s:%d]: len < 0", __func__, __LINE__));
 
 	/*
 	 * Grab a header mbuf, attaching a copy of data to
 	 * be transmitted, and initialize the header from
 	 * the template for sends on this connection.
 	 */
 	if (len) {
 		struct mbuf *mb;
 		u_int moff;
 
 		if ((tp->t_flags & TF_FORCEDATA) && len == 1)
 			TCPSTAT_INC(tcps_sndprobe);
 		else if (SEQ_LT(tp->snd_nxt, tp->snd_max) || sack_rxmit) {
 			tp->t_sndrexmitpack++;
 			TCPSTAT_INC(tcps_sndrexmitpack);
 			TCPSTAT_ADD(tcps_sndrexmitbyte, len);
 		} else {
 			TCPSTAT_INC(tcps_sndpack);
 			TCPSTAT_ADD(tcps_sndbyte, len);
 		}
 #ifdef INET6
 		if (MHLEN < hdrlen + max_linkhdr)
 			m = m_getcl(M_NOWAIT, MT_DATA, M_PKTHDR);
 		else
 #endif
 			m = m_gethdr(M_NOWAIT, MT_DATA);
 
 		if (m == NULL) {
 			SOCKBUF_UNLOCK(&so->so_snd);
 			error = ENOBUFS;
 			sack_rxmit = 0;
 			goto out;
 		}
 
 		m->m_data += max_linkhdr;
 		m->m_len = hdrlen;
 
 		/*
 		 * Start the m_copy functions from the closest mbuf
 		 * to the offset in the socket buffer chain.
 		 */
 		mb = sbsndptr(&so->so_snd, off, len, &moff);
 
 		if (len <= MHLEN - hdrlen - max_linkhdr) {
 			m_copydata(mb, moff, (int)len,
 			    mtod(m, caddr_t) + hdrlen);
 			m->m_len += len;
 		} else {
-			m->m_next = m_copy(mb, moff, (int)len);
+			m->m_next = m_copym(mb, moff, len, M_NOWAIT);
 			if (m->m_next == NULL) {
 				SOCKBUF_UNLOCK(&so->so_snd);
 				(void) m_free(m);
 				error = ENOBUFS;
 				sack_rxmit = 0;
 				goto out;
 			}
 		}
 
 		/*
 		 * If we're sending everything we've got, set PUSH.
 		 * (This will keep happy those implementations which only
 		 * give data to the user when a buffer fills or
 		 * a PUSH comes in.)
 		 */
 		if ((off + len == sbused(&so->so_snd)) && !(flags & TH_SYN))
 			flags |= TH_PUSH;
 		SOCKBUF_UNLOCK(&so->so_snd);
 	} else {
 		SOCKBUF_UNLOCK(&so->so_snd);
 		if (tp->t_flags & TF_ACKNOW)
 			TCPSTAT_INC(tcps_sndacks);
 		else if (flags & (TH_SYN|TH_FIN|TH_RST))
 			TCPSTAT_INC(tcps_sndctrl);
 		else if (SEQ_GT(tp->snd_up, tp->snd_una))
 			TCPSTAT_INC(tcps_sndurg);
 		else
 			TCPSTAT_INC(tcps_sndwinup);
 
 		m = m_gethdr(M_NOWAIT, MT_DATA);
 		if (m == NULL) {
 			error = ENOBUFS;
 			sack_rxmit = 0;
 			goto out;
 		}
 #ifdef INET6
 		if (isipv6 && (MHLEN < hdrlen + max_linkhdr) &&
 		    MHLEN >= hdrlen) {
 			M_ALIGN(m, hdrlen);
 		} else
 #endif
 		m->m_data += max_linkhdr;
 		m->m_len = hdrlen;
 	}
 	SOCKBUF_UNLOCK_ASSERT(&so->so_snd);
 	m->m_pkthdr.rcvif = (struct ifnet *)0;
 #ifdef MAC
 	mac_inpcb_create_mbuf(tp->t_inpcb, m);
 #endif
 #ifdef INET6
 	if (isipv6) {
 		ip6 = mtod(m, struct ip6_hdr *);
 		th = (struct tcphdr *)(ip6 + 1);
 		tcpip_fillheaders(tp->t_inpcb, ip6, th);
 	} else
 #endif /* INET6 */
 	{
 		ip = mtod(m, struct ip *);
 		ipov = (struct ipovly *)ip;
 		th = (struct tcphdr *)(ip + 1);
 		tcpip_fillheaders(tp->t_inpcb, ip, th);
 	}
 
 	/*
 	 * Fill in fields, remembering maximum advertised
 	 * window for use in delaying messages about window sizes.
 	 * If resending a FIN, be sure not to use a new sequence number.
 	 */
 	if (flags & TH_FIN && tp->t_flags & TF_SENTFIN &&
 	    tp->snd_nxt == tp->snd_max)
 		tp->snd_nxt--;
 	/*
 	 * If we are starting a connection, send ECN setup
 	 * SYN packet. If we are on a retransmit, we may
 	 * resend those bits a number of times as per
 	 * RFC 3168.
 	 */
 	if (tp->t_state == TCPS_SYN_SENT && V_tcp_do_ecn == 1) {
 		if (tp->t_rxtshift >= 1) {
 			if (tp->t_rxtshift <= V_tcp_ecn_maxretries)
 				flags |= TH_ECE|TH_CWR;
 		} else
 			flags |= TH_ECE|TH_CWR;
 	}
 	
 	if (tp->t_state == TCPS_ESTABLISHED &&
 	    (tp->t_flags & TF_ECN_PERMIT)) {
 		/*
 		 * If the peer has ECN, mark data packets with
 		 * ECN capable transmission (ECT).
 		 * Ignore pure ack packets, retransmissions and window probes.
 		 */
 		if (len > 0 && SEQ_GEQ(tp->snd_nxt, tp->snd_max) &&
 		    !((tp->t_flags & TF_FORCEDATA) && len == 1)) {
 #ifdef INET6
 			if (isipv6)
 				ip6->ip6_flow |= htonl(IPTOS_ECN_ECT0 << 20);
 			else
 #endif
 				ip->ip_tos |= IPTOS_ECN_ECT0;
 			TCPSTAT_INC(tcps_ecn_ect0);
 		}
 		
 		/*
 		 * Reply with proper ECN notifications.
 		 */
 		if (tp->t_flags & TF_ECN_SND_CWR) {
 			flags |= TH_CWR;
 			tp->t_flags &= ~TF_ECN_SND_CWR;
 		} 
 		if (tp->t_flags & TF_ECN_SND_ECE)
 			flags |= TH_ECE;
 	}
 	
 	/*
 	 * If we are doing retransmissions, then snd_nxt will
 	 * not reflect the first unsent octet.  For ACK only
 	 * packets, we do not want the sequence number of the
 	 * retransmitted packet, we want the sequence number
 	 * of the next unsent octet.  So, if there is no data
 	 * (and no SYN or FIN), use snd_max instead of snd_nxt
 	 * when filling in ti_seq.  But if we are in persist
 	 * state, snd_max might reflect one byte beyond the
 	 * right edge of the window, so use snd_nxt in that
 	 * case, since we know we aren't doing a retransmission.
 	 * (retransmit and persist are mutually exclusive...)
 	 */
 	if (sack_rxmit == 0) {
 		if (len || (flags & (TH_SYN|TH_FIN)) ||
 		    tcp_timer_active(tp, TT_PERSIST))
 			th->th_seq = htonl(tp->snd_nxt);
 		else
 			th->th_seq = htonl(tp->snd_max);
 	} else {
 		th->th_seq = htonl(p->rxmit);
 		p->rxmit += len;
 		tp->sackhint.sack_bytes_rexmit += len;
 	}
 	th->th_ack = htonl(tp->rcv_nxt);
 	if (optlen) {
 		bcopy(opt, th + 1, optlen);
 		th->th_off = (sizeof (struct tcphdr) + optlen) >> 2;
 	}
 	th->th_flags = flags;
 	/*
 	 * Calculate receive window.  Don't shrink window,
 	 * but avoid silly window syndrome.
 	 */
 	if (recwin < (long)(so->so_rcv.sb_hiwat / 4) &&
 	    recwin < (long)tp->t_maxseg)
 		recwin = 0;
 	if (SEQ_GT(tp->rcv_adv, tp->rcv_nxt) &&
 	    recwin < (long)(tp->rcv_adv - tp->rcv_nxt))
 		recwin = (long)(tp->rcv_adv - tp->rcv_nxt);
 	if (recwin > (long)TCP_MAXWIN << tp->rcv_scale)
 		recwin = (long)TCP_MAXWIN << tp->rcv_scale;
 
 	/*
 	 * According to RFC1323 the window field in a SYN (i.e., a <SYN>
 	 * or <SYN,ACK>) segment itself is never scaled.  The <SYN,ACK>
 	 * case is handled in syncache.
 	 */
 	if (flags & TH_SYN)
 		th->th_win = htons((u_short)
 				(min(sbspace(&so->so_rcv), TCP_MAXWIN)));
 	else
 		th->th_win = htons((u_short)(recwin >> tp->rcv_scale));
 
 	/*
 	 * Adjust the RXWIN0SENT flag - indicate that we have advertised
 	 * a 0 window.  This may cause the remote transmitter to stall.  This
 	 * flag tells soreceive() to disable delayed acknowledgements when
 	 * draining the buffer.  This can occur if the receiver is attempting
 	 * to read more data than can be buffered prior to transmitting on
 	 * the connection.
 	 */
 	if (th->th_win == 0) {
 		tp->t_sndzerowin++;
 		tp->t_flags |= TF_RXWIN0SENT;
 	} else
 		tp->t_flags &= ~TF_RXWIN0SENT;
 	if (SEQ_GT(tp->snd_up, tp->snd_nxt)) {
 		th->th_urp = htons((u_short)(tp->snd_up - tp->snd_nxt));
 		th->th_flags |= TH_URG;
 	} else
 		/*
 		 * If no urgent pointer to send, then we pull
 		 * the urgent pointer to the left edge of the send window
 		 * so that it doesn't drift into the send window on sequence
 		 * number wraparound.
 		 */
 		tp->snd_up = tp->snd_una;		/* drag it along */
 
 #ifdef TCP_SIGNATURE
 	if (to.to_flags & TOF_SIGNATURE) {
 		int sigoff = to.to_signature - opt;
 		tcp_signature_compute(m, 0, len, optlen,
 		    (u_char *)(th + 1) + sigoff, IPSEC_DIR_OUTBOUND);
 	}
 #endif
 
 	/*
 	 * Put TCP length in extended header, and then
 	 * checksum extended header and data.
 	 */
 	m->m_pkthdr.len = hdrlen + len; /* in6_cksum() need this */
 	m->m_pkthdr.csum_data = offsetof(struct tcphdr, th_sum);
 #ifdef INET6
 	if (isipv6) {
 		/*
 		 * ip6_plen is not need to be filled now, and will be filled
 		 * in ip6_output.
 		 */
 		m->m_pkthdr.csum_flags = CSUM_TCP_IPV6;
 		th->th_sum = in6_cksum_pseudo(ip6, sizeof(struct tcphdr) +
 		    optlen + len, IPPROTO_TCP, 0);
 	}
 #endif
 #if defined(INET6) && defined(INET)
 	else
 #endif
 #ifdef INET
 	{
 		m->m_pkthdr.csum_flags = CSUM_TCP;
 		th->th_sum = in_pseudo(ip->ip_src.s_addr, ip->ip_dst.s_addr,
 		    htons(sizeof(struct tcphdr) + IPPROTO_TCP + len + optlen));
 
 		/* IP version must be set here for ipv4/ipv6 checking later */
 		KASSERT(ip->ip_v == IPVERSION,
 		    ("%s: IP version incorrect: %d", __func__, ip->ip_v));
 	}
 #endif
 
 	/*
 	 * Enable TSO and specify the size of the segments.
 	 * The TCP pseudo header checksum is always provided.
 	 */
 	if (tso) {
 		KASSERT(len > tp->t_maxseg - optlen,
 		    ("%s: len <= tso_segsz", __func__));
 		m->m_pkthdr.csum_flags |= CSUM_TSO;
 		m->m_pkthdr.tso_segsz = tp->t_maxseg - optlen;
 	}
 
 #ifdef IPSEC
 	KASSERT(len + hdrlen + ipoptlen - ipsec_optlen == m_length(m, NULL),
 	    ("%s: mbuf chain shorter than expected: %ld + %u + %u - %u != %u",
 	    __func__, len, hdrlen, ipoptlen, ipsec_optlen, m_length(m, NULL)));
 #else
 	KASSERT(len + hdrlen + ipoptlen == m_length(m, NULL),
 	    ("%s: mbuf chain shorter than expected: %ld + %u + %u != %u",
 	    __func__, len, hdrlen, ipoptlen, m_length(m, NULL)));
 #endif
 
 	/* Run HHOOK_TCP_ESTABLISHED_OUT helper hooks. */
 	hhook_run_tcp_est_out(tp, th, &to, len, tso);
 
 #ifdef TCPDEBUG
 	/*
 	 * Trace.
 	 */
 	if (so->so_options & SO_DEBUG) {
 		u_short save = 0;
 #ifdef INET6
 		if (!isipv6)
 #endif
 		{
 			save = ipov->ih_len;
 			ipov->ih_len = htons(m->m_pkthdr.len /* - hdrlen + (th->th_off << 2) */);
 		}
 		tcp_trace(TA_OUTPUT, tp->t_state, tp, mtod(m, void *), th, 0);
 #ifdef INET6
 		if (!isipv6)
 #endif
 		ipov->ih_len = save;
 	}
 #endif /* TCPDEBUG */
 	TCP_PROBE3(debug__output, tp, th, mtod(m, const char *));
 
 	/*
 	 * Fill in IP length and desired time to live and
 	 * send to IP level.  There should be a better way
 	 * to handle ttl and tos; we could keep them in
 	 * the template, but need a way to checksum without them.
 	 */
 	/*
 	 * m->m_pkthdr.len should have been set before checksum calculation,
 	 * because in6_cksum() need it.
 	 */
 #ifdef INET6
 	if (isipv6) {
 		struct route_in6 ro;
 
 		bzero(&ro, sizeof(ro));
 		/*
 		 * we separately set hoplimit for every segment, since the
 		 * user might want to change the value via setsockopt.
 		 * Also, desired default hop limit might be changed via
 		 * Neighbor Discovery.
 		 */
 		ip6->ip6_hlim = in6_selecthlim(tp->t_inpcb, NULL);
 
 		/*
 		 * Set the packet size here for the benefit of DTrace probes.
 		 * ip6_output() will set it properly; it's supposed to include
 		 * the option header lengths as well.
 		 */
 		ip6->ip6_plen = htons(m->m_pkthdr.len - sizeof(*ip6));
 
 		if (V_path_mtu_discovery && tp->t_maxseg > V_tcp_minmss)
 			tp->t_flags2 |= TF2_PLPMTU_PMTUD;
 		else
 			tp->t_flags2 &= ~TF2_PLPMTU_PMTUD;
 
 		if (tp->t_state == TCPS_SYN_SENT)
 			TCP_PROBE5(connect__request, NULL, tp, ip6, tp, th);
 
 		TCP_PROBE5(send, NULL, tp, ip6, tp, th);
 
 #ifdef TCPPCAP
 		/* Save packet, if requested. */
 		tcp_pcap_add(th, m, &(tp->t_outpkts));
 #endif
 
 		/* TODO: IPv6 IP6TOS_ECT bit on */
 		error = ip6_output(m, tp->t_inpcb->in6p_outputopts, &ro,
 		    ((so->so_options & SO_DONTROUTE) ?  IP_ROUTETOIF : 0),
 		    NULL, NULL, tp->t_inpcb);
 
 		if (error == EMSGSIZE && ro.ro_rt != NULL)
 			mtu = ro.ro_rt->rt_mtu;
 		RO_RTFREE(&ro);
 	}
 #endif /* INET6 */
 #if defined(INET) && defined(INET6)
 	else
 #endif
 #ifdef INET
     {
 	ip->ip_len = htons(m->m_pkthdr.len);
 #ifdef INET6
 	if (tp->t_inpcb->inp_vflag & INP_IPV6PROTO)
 		ip->ip_ttl = in6_selecthlim(tp->t_inpcb, NULL);
 #endif /* INET6 */
 	/*
 	 * If we do path MTU discovery, then we set DF on every packet.
 	 * This might not be the best thing to do according to RFC3390
 	 * Section 2. However the tcp hostcache migitates the problem
 	 * so it affects only the first tcp connection with a host.
 	 *
 	 * NB: Don't set DF on small MTU/MSS to have a safe fallback.
 	 */
 	if (V_path_mtu_discovery && tp->t_maxseg > V_tcp_minmss) {
 		ip->ip_off |= htons(IP_DF);
 		tp->t_flags2 |= TF2_PLPMTU_PMTUD;
 	} else {
 		tp->t_flags2 &= ~TF2_PLPMTU_PMTUD;
 	}
 
 	if (tp->t_state == TCPS_SYN_SENT)
 		TCP_PROBE5(connect__request, NULL, tp, ip, tp, th);
 
 	TCP_PROBE5(send, NULL, tp, ip, tp, th);
 
 #ifdef TCPPCAP
 	/* Save packet, if requested. */
 	tcp_pcap_add(th, m, &(tp->t_outpkts));
 #endif
 
 	error = ip_output(m, tp->t_inpcb->inp_options, &tp->t_inpcb->inp_route,
 	    ((so->so_options & SO_DONTROUTE) ? IP_ROUTETOIF : 0), 0,
 	    tp->t_inpcb);
 
 	if (error == EMSGSIZE && tp->t_inpcb->inp_route.ro_rt != NULL)
 		mtu = tp->t_inpcb->inp_route.ro_rt->rt_mtu;
     }
 #endif /* INET */
 
 out:
 	/*
 	 * In transmit state, time the transmission and arrange for
 	 * the retransmit.  In persist state, just set snd_max.
 	 */
 	if ((tp->t_flags & TF_FORCEDATA) == 0 || 
 	    !tcp_timer_active(tp, TT_PERSIST)) {
 		tcp_seq startseq = tp->snd_nxt;
 
 		/*
 		 * Advance snd_nxt over sequence space of this segment.
 		 */
 		if (flags & (TH_SYN|TH_FIN)) {
 			if (flags & TH_SYN)
 				tp->snd_nxt++;
 			if (flags & TH_FIN) {
 				tp->snd_nxt++;
 				tp->t_flags |= TF_SENTFIN;
 			}
 		}
 		if (sack_rxmit)
 			goto timer;
 		tp->snd_nxt += len;
 		if (SEQ_GT(tp->snd_nxt, tp->snd_max)) {
 			tp->snd_max = tp->snd_nxt;
 			/*
 			 * Time this transmission if not a retransmission and
 			 * not currently timing anything.
 			 */
 			if (tp->t_rtttime == 0) {
 				tp->t_rtttime = ticks;
 				tp->t_rtseq = startseq;
 				TCPSTAT_INC(tcps_segstimed);
 			}
 		}
 
 		/*
 		 * Set retransmit timer if not currently set,
 		 * and not doing a pure ack or a keep-alive probe.
 		 * Initial value for retransmit timer is smoothed
 		 * round-trip time + 2 * round-trip time variance.
 		 * Initialize shift counter which is used for backoff
 		 * of retransmit time.
 		 */
 timer:
 		if (!tcp_timer_active(tp, TT_REXMT) &&
 		    ((sack_rxmit && tp->snd_nxt != tp->snd_max) ||
 		     (tp->snd_nxt != tp->snd_una))) {
 			if (tcp_timer_active(tp, TT_PERSIST)) {
 				tcp_timer_activate(tp, TT_PERSIST, 0);
 				tp->t_rxtshift = 0;
 			}
 			tcp_timer_activate(tp, TT_REXMT, tp->t_rxtcur);
 		} else if (len == 0 && sbavail(&so->so_snd) &&
 		    !tcp_timer_active(tp, TT_REXMT) &&
 		    !tcp_timer_active(tp, TT_PERSIST)) {
 			/*
 			 * Avoid a situation where we do not set persist timer
 			 * after a zero window condition. For example:
 			 * 1) A -> B: packet with enough data to fill the window
 			 * 2) B -> A: ACK for #1 + new data (0 window
 			 *    advertisement)
 			 * 3) A -> B: ACK for #2, 0 len packet
 			 *
 			 * In this case, A will not activate the persist timer,
 			 * because it chose to send a packet. Unless tcp_output
 			 * is called for some other reason (delayed ack timer,
 			 * another input packet from B, socket syscall), A will
 			 * not send zero window probes.
 			 *
 			 * So, if you send a 0-length packet, but there is data
 			 * in the socket buffer, and neither the rexmt or
 			 * persist timer is already set, then activate the
 			 * persist timer.
 			 */
 			tp->t_rxtshift = 0;
 			tcp_setpersist(tp);
 		}
 	} else {
 		/*
 		 * Persist case, update snd_max but since we are in
 		 * persist mode (no window) we do not update snd_nxt.
 		 */
 		int xlen = len;
 		if (flags & TH_SYN)
 			++xlen;
 		if (flags & TH_FIN) {
 			++xlen;
 			tp->t_flags |= TF_SENTFIN;
 		}
 		if (SEQ_GT(tp->snd_nxt + xlen, tp->snd_max))
 			tp->snd_max = tp->snd_nxt + len;
 	}
 
 	if (error) {
 
 		/*
 		 * We know that the packet was lost, so back out the
 		 * sequence number advance, if any.
 		 *
 		 * If the error is EPERM the packet got blocked by the
 		 * local firewall.  Normally we should terminate the
 		 * connection but the blocking may have been spurious
 		 * due to a firewall reconfiguration cycle.  So we treat
 		 * it like a packet loss and let the retransmit timer and
 		 * timeouts do their work over time.
 		 * XXX: It is a POLA question whether calling tcp_drop right
 		 * away would be the really correct behavior instead.
 		 */
 		if (((tp->t_flags & TF_FORCEDATA) == 0 ||
 		    !tcp_timer_active(tp, TT_PERSIST)) &&
 		    ((flags & TH_SYN) == 0) &&
 		    (error != EPERM)) {
 			if (sack_rxmit) {
 				p->rxmit -= len;
 				tp->sackhint.sack_bytes_rexmit -= len;
 				KASSERT(tp->sackhint.sack_bytes_rexmit >= 0,
 				    ("sackhint bytes rtx >= 0"));
 			} else
 				tp->snd_nxt -= len;
 		}
 		SOCKBUF_UNLOCK_ASSERT(&so->so_snd);	/* Check gotos. */
 		switch (error) {
 		case EPERM:
 			tp->t_softerror = error;
 			return (error);
 		case ENOBUFS:
 			TCP_XMIT_TIMER_ASSERT(tp, len, flags);
 			tp->snd_cwnd = tp->t_maxseg;
 			return (0);
 		case EMSGSIZE:
 			/*
 			 * For some reason the interface we used initially
 			 * to send segments changed to another or lowered
 			 * its MTU.
 			 * If TSO was active we either got an interface
 			 * without TSO capabilits or TSO was turned off.
 			 * If we obtained mtu from ip_output() then update
 			 * it and try again.
 			 */
 			if (tso)
 				tp->t_flags &= ~TF_TSO;
 			if (mtu != 0) {
 				tcp_mss_update(tp, -1, mtu, NULL, NULL);
 				goto again;
 			}
 			return (error);
 		case EHOSTDOWN:
 		case EHOSTUNREACH:
 		case ENETDOWN:
 		case ENETUNREACH:
 			if (TCPS_HAVERCVDSYN(tp->t_state)) {
 				tp->t_softerror = error;
 				return (0);
 			}
 			/* FALLTHROUGH */
 		default:
 			return (error);
 		}
 	}
 	TCPSTAT_INC(tcps_sndtotal);
 
 	/*
 	 * Data sent (as far as we can tell).
 	 * If this advertises a larger window than any other segment,
 	 * then remember the size of the advertised window.
 	 * Any pending ACK has now been sent.
 	 */
 	if (recwin >= 0 && SEQ_GT(tp->rcv_nxt + recwin, tp->rcv_adv))
 		tp->rcv_adv = tp->rcv_nxt + recwin;
 	tp->last_ack_sent = tp->rcv_nxt;
 	tp->t_flags &= ~(TF_ACKNOW | TF_DELACK);
 	if (tcp_timer_active(tp, TT_DELACK))
 		tcp_timer_activate(tp, TT_DELACK, 0);
 #if 0
 	/*
 	 * This completely breaks TCP if newreno is turned on.  What happens
 	 * is that if delayed-acks are turned on on the receiver, this code
 	 * on the transmitter effectively destroys the TCP window, forcing
 	 * it to four packets (1.5Kx4 = 6K window).
 	 */
 	if (sendalot && --maxburst)
 		goto again;
 #endif
 	if (sendalot)
 		goto again;
 	return (0);
 }
 
 void
 tcp_setpersist(struct tcpcb *tp)
 {
 	int t = ((tp->t_srtt >> 2) + tp->t_rttvar) >> 1;
 	int tt;
 
 	tp->t_flags &= ~TF_PREVVALID;
 	if (tcp_timer_active(tp, TT_REXMT))
 		panic("tcp_setpersist: retransmit pending");
 	/*
 	 * Start/restart persistence timer.
 	 */
 	TCPT_RANGESET(tt, t * tcp_backoff[tp->t_rxtshift],
 		      tcp_persmin, tcp_persmax);
 	tcp_timer_activate(tp, TT_PERSIST, tt);
 	if (tp->t_rxtshift < TCP_MAXRXTSHIFT)
 		tp->t_rxtshift++;
 }
 
 /*
  * Insert TCP options according to the supplied parameters to the place
  * optp in a consistent way.  Can handle unaligned destinations.
  *
  * The order of the option processing is crucial for optimal packing and
  * alignment for the scarce option space.
  *
  * The optimal order for a SYN/SYN-ACK segment is:
  *   MSS (4) + NOP (1) + Window scale (3) + SACK permitted (2) +
  *   Timestamp (10) + Signature (18) = 38 bytes out of a maximum of 40.
  *
  * The SACK options should be last.  SACK blocks consume 8*n+2 bytes.
  * So a full size SACK blocks option is 34 bytes (with 4 SACK blocks).
  * At minimum we need 10 bytes (to generate 1 SACK block).  If both
  * TCP Timestamps (12 bytes) and TCP Signatures (18 bytes) are present,
  * we only have 10 bytes for SACK options (40 - (12 + 18)).
  */
 int
 tcp_addoptions(struct tcpopt *to, u_char *optp)
 {
 	u_int32_t mask, optlen = 0;
 
 	for (mask = 1; mask < TOF_MAXOPT; mask <<= 1) {
 		if ((to->to_flags & mask) != mask)
 			continue;
 		if (optlen == TCP_MAXOLEN)
 			break;
 		switch (to->to_flags & mask) {
 		case TOF_MSS:
 			while (optlen % 4) {
 				optlen += TCPOLEN_NOP;
 				*optp++ = TCPOPT_NOP;
 			}
 			if (TCP_MAXOLEN - optlen < TCPOLEN_MAXSEG)
 				continue;
 			optlen += TCPOLEN_MAXSEG;
 			*optp++ = TCPOPT_MAXSEG;
 			*optp++ = TCPOLEN_MAXSEG;
 			to->to_mss = htons(to->to_mss);
 			bcopy((u_char *)&to->to_mss, optp, sizeof(to->to_mss));
 			optp += sizeof(to->to_mss);
 			break;
 		case TOF_SCALE:
 			while (!optlen || optlen % 2 != 1) {
 				optlen += TCPOLEN_NOP;
 				*optp++ = TCPOPT_NOP;
 			}
 			if (TCP_MAXOLEN - optlen < TCPOLEN_WINDOW)
 				continue;
 			optlen += TCPOLEN_WINDOW;
 			*optp++ = TCPOPT_WINDOW;
 			*optp++ = TCPOLEN_WINDOW;
 			*optp++ = to->to_wscale;
 			break;
 		case TOF_SACKPERM:
 			while (optlen % 2) {
 				optlen += TCPOLEN_NOP;
 				*optp++ = TCPOPT_NOP;
 			}
 			if (TCP_MAXOLEN - optlen < TCPOLEN_SACK_PERMITTED)
 				continue;
 			optlen += TCPOLEN_SACK_PERMITTED;
 			*optp++ = TCPOPT_SACK_PERMITTED;
 			*optp++ = TCPOLEN_SACK_PERMITTED;
 			break;
 		case TOF_TS:
 			while (!optlen || optlen % 4 != 2) {
 				optlen += TCPOLEN_NOP;
 				*optp++ = TCPOPT_NOP;
 			}
 			if (TCP_MAXOLEN - optlen < TCPOLEN_TIMESTAMP)
 				continue;
 			optlen += TCPOLEN_TIMESTAMP;
 			*optp++ = TCPOPT_TIMESTAMP;
 			*optp++ = TCPOLEN_TIMESTAMP;
 			to->to_tsval = htonl(to->to_tsval);
 			to->to_tsecr = htonl(to->to_tsecr);
 			bcopy((u_char *)&to->to_tsval, optp, sizeof(to->to_tsval));
 			optp += sizeof(to->to_tsval);
 			bcopy((u_char *)&to->to_tsecr, optp, sizeof(to->to_tsecr));
 			optp += sizeof(to->to_tsecr);
 			break;
 #ifdef TCP_SIGNATURE
 		case TOF_SIGNATURE:
 			{
 			int siglen = TCPOLEN_SIGNATURE - 2;
 
 			while (!optlen || optlen % 4 != 2) {
 				optlen += TCPOLEN_NOP;
 				*optp++ = TCPOPT_NOP;
 			}
 			if (TCP_MAXOLEN - optlen < TCPOLEN_SIGNATURE)
 				continue;
 			optlen += TCPOLEN_SIGNATURE;
 			*optp++ = TCPOPT_SIGNATURE;
 			*optp++ = TCPOLEN_SIGNATURE;
 			to->to_signature = optp;
 			while (siglen--)
 				 *optp++ = 0;
 			break;
 			}
 #endif
 		case TOF_SACK:
 			{
 			int sackblks = 0;
 			struct sackblk *sack = (struct sackblk *)to->to_sacks;
 			tcp_seq sack_seq;
 
 			while (!optlen || optlen % 4 != 2) {
 				optlen += TCPOLEN_NOP;
 				*optp++ = TCPOPT_NOP;
 			}
 			if (TCP_MAXOLEN - optlen < TCPOLEN_SACKHDR + TCPOLEN_SACK)
 				continue;
 			optlen += TCPOLEN_SACKHDR;
 			*optp++ = TCPOPT_SACK;
 			sackblks = min(to->to_nsacks,
 					(TCP_MAXOLEN - optlen) / TCPOLEN_SACK);
 			*optp++ = TCPOLEN_SACKHDR + sackblks * TCPOLEN_SACK;
 			while (sackblks--) {
 				sack_seq = htonl(sack->start);
 				bcopy((u_char *)&sack_seq, optp, sizeof(sack_seq));
 				optp += sizeof(sack_seq);
 				sack_seq = htonl(sack->end);
 				bcopy((u_char *)&sack_seq, optp, sizeof(sack_seq));
 				optp += sizeof(sack_seq);
 				optlen += TCPOLEN_SACK;
 				sack++;
 			}
 			TCPSTAT_INC(tcps_sack_send_blocks);
 			break;
 			}
 #ifdef TCP_RFC7413
 		case TOF_FASTOPEN:
 			{
 			int total_len;
 
 			/* XXX is there any point to aligning this option? */
 			total_len = TCPOLEN_FAST_OPEN_EMPTY + to->to_tfo_len;
 			if (TCP_MAXOLEN - optlen < total_len)
 				continue;
 			*optp++ = TCPOPT_FAST_OPEN;
 			*optp++ = total_len;
 			if (to->to_tfo_len > 0) {
 				bcopy(to->to_tfo_cookie, optp, to->to_tfo_len);
 				optp += to->to_tfo_len;
 			}
 			optlen += total_len;
 			break;
 			}
 #endif
 		default:
 			panic("%s: unknown TCP option type", __func__);
 			break;
 		}
 	}
 
 	/* Terminate and pad TCP options to a 4 byte boundary. */
 	if (optlen % 4) {
 		optlen += TCPOLEN_EOL;
 		*optp++ = TCPOPT_EOL;
 	}
 	/*
 	 * According to RFC 793 (STD0007):
 	 *   "The content of the header beyond the End-of-Option option
 	 *    must be header padding (i.e., zero)."
 	 *   and later: "The padding is composed of zeros."
 	 */
 	while (optlen % 4) {
 		optlen += TCPOLEN_PAD;
 		*optp++ = TCPOPT_PAD;
 	}
 
 	KASSERT(optlen <= TCP_MAXOLEN, ("%s: TCP options too long", __func__));
 	return (optlen);
 }
Index: head/sys/netinet/udp_usrreq.c
===================================================================
--- head/sys/netinet/udp_usrreq.c	(revision 305823)
+++ head/sys/netinet/udp_usrreq.c	(revision 305824)
@@ -1,1959 +1,1960 @@
 /*-
  * Copyright (c) 1982, 1986, 1988, 1990, 1993, 1995
  *	The Regents of the University of California.
  * Copyright (c) 2008 Robert N. M. Watson
  * Copyright (c) 2010-2011 Juniper Networks, Inc.
  * Copyright (c) 2014 Kevin Lo
  * All rights reserved.
  *
  * Portions of this software were developed by Robert N. M. Watson under
  * contract to Juniper Networks, Inc.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 4. Neither the name of the University nor the names of its contributors
  *    may be used to endorse or promote products derived from this software
  *    without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  *	@(#)udp_usrreq.c	8.6 (Berkeley) 5/23/95
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include "opt_inet.h"
 #include "opt_inet6.h"
 #include "opt_ipsec.h"
 #include "opt_rss.h"
 
 #include <sys/param.h>
 #include <sys/domain.h>
 #include <sys/eventhandler.h>
 #include <sys/jail.h>
 #include <sys/kernel.h>
 #include <sys/lock.h>
 #include <sys/malloc.h>
 #include <sys/mbuf.h>
 #include <sys/priv.h>
 #include <sys/proc.h>
 #include <sys/protosw.h>
 #include <sys/sdt.h>
 #include <sys/signalvar.h>
 #include <sys/socket.h>
 #include <sys/socketvar.h>
 #include <sys/sx.h>
 #include <sys/sysctl.h>
 #include <sys/syslog.h>
 #include <sys/systm.h>
 
 #include <vm/uma.h>
 
 #include <net/if.h>
 #include <net/if_var.h>
 #include <net/route.h>
 #include <net/rss_config.h>
 
 #include <netinet/in.h>
 #include <netinet/in_kdtrace.h>
 #include <netinet/in_pcb.h>
 #include <netinet/in_systm.h>
 #include <netinet/in_var.h>
 #include <netinet/ip.h>
 #ifdef INET6
 #include <netinet/ip6.h>
 #endif
 #include <netinet/ip_icmp.h>
 #include <netinet/icmp_var.h>
 #include <netinet/ip_var.h>
 #include <netinet/ip_options.h>
 #ifdef INET6
 #include <netinet6/ip6_var.h>
 #endif
 #include <netinet/udp.h>
 #include <netinet/udp_var.h>
 #include <netinet/udplite.h>
 #include <netinet/in_rss.h>
 
 #ifdef IPSEC
 #include <netipsec/ipsec.h>
 #include <netipsec/esp.h>
 #endif
 
 #include <machine/in_cksum.h>
 
 #include <security/mac/mac_framework.h>
 
 /*
  * UDP and UDP-Lite protocols implementation.
  * Per RFC 768, August, 1980.
  * Per RFC 3828, July, 2004.
  */
 
 /*
  * BSD 4.2 defaulted the udp checksum to be off.  Turning off udp checksums
  * removes the only data integrity mechanism for packets and malformed
  * packets that would otherwise be discarded due to bad checksums, and may
  * cause problems (especially for NFS data blocks).
  */
 VNET_DEFINE(int, udp_cksum) = 1;
 SYSCTL_INT(_net_inet_udp, UDPCTL_CHECKSUM, checksum, CTLFLAG_VNET | CTLFLAG_RW,
     &VNET_NAME(udp_cksum), 0, "compute udp checksum");
 
 int	udp_log_in_vain = 0;
 SYSCTL_INT(_net_inet_udp, OID_AUTO, log_in_vain, CTLFLAG_RW,
     &udp_log_in_vain, 0, "Log all incoming UDP packets");
 
 VNET_DEFINE(int, udp_blackhole) = 0;
 SYSCTL_INT(_net_inet_udp, OID_AUTO, blackhole, CTLFLAG_VNET | CTLFLAG_RW,
     &VNET_NAME(udp_blackhole), 0,
     "Do not send port unreachables for refused connects");
 
 static VNET_DEFINE(int, udp_require_l2_bcast) = 0;
 #define	V_udp_require_l2_bcast		VNET(udp_require_l2_bcast)
 SYSCTL_INT(_net_inet_udp, OID_AUTO, require_l2_bcast, CTLFLAG_VNET | CTLFLAG_RW,
     &VNET_NAME(udp_require_l2_bcast), 0,
     "Only treat packets sent to an L2 broadcast address as broadcast packets");
 
 u_long	udp_sendspace = 9216;		/* really max datagram size */
 SYSCTL_ULONG(_net_inet_udp, UDPCTL_MAXDGRAM, maxdgram, CTLFLAG_RW,
     &udp_sendspace, 0, "Maximum outgoing UDP datagram size");
 
 u_long	udp_recvspace = 40 * (1024 +
 #ifdef INET6
 				      sizeof(struct sockaddr_in6)
 #else
 				      sizeof(struct sockaddr_in)
 #endif
 				      );	/* 40 1K datagrams */
 
 SYSCTL_ULONG(_net_inet_udp, UDPCTL_RECVSPACE, recvspace, CTLFLAG_RW,
     &udp_recvspace, 0, "Maximum space for incoming UDP datagrams");
 
 VNET_DEFINE(struct inpcbhead, udb);		/* from udp_var.h */
 VNET_DEFINE(struct inpcbinfo, udbinfo);
 VNET_DEFINE(struct inpcbhead, ulitecb);
 VNET_DEFINE(struct inpcbinfo, ulitecbinfo);
 static VNET_DEFINE(uma_zone_t, udpcb_zone);
 #define	V_udpcb_zone			VNET(udpcb_zone)
 
 #ifndef UDBHASHSIZE
 #define	UDBHASHSIZE	128
 #endif
 
 VNET_PCPUSTAT_DEFINE(struct udpstat, udpstat);		/* from udp_var.h */
 VNET_PCPUSTAT_SYSINIT(udpstat);
 SYSCTL_VNET_PCPUSTAT(_net_inet_udp, UDPCTL_STATS, stats, struct udpstat,
     udpstat, "UDP statistics (struct udpstat, netinet/udp_var.h)");
 
 #ifdef VIMAGE
 VNET_PCPUSTAT_SYSUNINIT(udpstat);
 #endif /* VIMAGE */
 #ifdef INET
 static void	udp_detach(struct socket *so);
 static int	udp_output(struct inpcb *, struct mbuf *, struct sockaddr *,
 		    struct mbuf *, struct thread *);
 #endif
 
 #ifdef IPSEC
 #ifdef IPSEC_NAT_T
 #define	UF_ESPINUDP_ALL	(UF_ESPINUDP_NON_IKE|UF_ESPINUDP)
 #ifdef INET
 static struct mbuf *udp4_espdecap(struct inpcb *, struct mbuf *, int);
 #endif
 #endif /* IPSEC_NAT_T */
 #endif /* IPSEC */
 
 static void
 udp_zone_change(void *tag)
 {
 
 	uma_zone_set_max(V_udbinfo.ipi_zone, maxsockets);
 	uma_zone_set_max(V_udpcb_zone, maxsockets);
 }
 
 static int
 udp_inpcb_init(void *mem, int size, int flags)
 {
 	struct inpcb *inp;
 
 	inp = mem;
 	INP_LOCK_INIT(inp, "inp", "udpinp");
 	return (0);
 }
 
 static int
 udplite_inpcb_init(void *mem, int size, int flags)
 {
 	struct inpcb *inp;
 
 	inp = mem;
 	INP_LOCK_INIT(inp, "inp", "udpliteinp");
 	return (0);
 }
 
 void
 udp_init(void)
 {
 
 	/*
 	 * For now default to 2-tuple UDP hashing - until the fragment
 	 * reassembly code can also update the flowid.
 	 *
 	 * Once we can calculate the flowid that way and re-establish
 	 * a 4-tuple, flip this to 4-tuple.
 	 */
 	in_pcbinfo_init(&V_udbinfo, "udp", &V_udb, UDBHASHSIZE, UDBHASHSIZE,
 	    "udp_inpcb", udp_inpcb_init, NULL, 0,
 	    IPI_HASHFIELDS_2TUPLE);
 	V_udpcb_zone = uma_zcreate("udpcb", sizeof(struct udpcb),
 	    NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, 0);
 	uma_zone_set_max(V_udpcb_zone, maxsockets);
 	uma_zone_set_warning(V_udpcb_zone, "kern.ipc.maxsockets limit reached");
 	EVENTHANDLER_REGISTER(maxsockets_change, udp_zone_change, NULL,
 	    EVENTHANDLER_PRI_ANY);
 }
 
 void
 udplite_init(void)
 {
 
 	in_pcbinfo_init(&V_ulitecbinfo, "udplite", &V_ulitecb, UDBHASHSIZE,
 	    UDBHASHSIZE, "udplite_inpcb", udplite_inpcb_init, NULL,
 	    0, IPI_HASHFIELDS_2TUPLE);
 }
 
 /*
  * Kernel module interface for updating udpstat.  The argument is an index
  * into udpstat treated as an array of u_long.  While this encodes the
  * general layout of udpstat into the caller, it doesn't encode its location,
  * so that future changes to add, for example, per-CPU stats support won't
  * cause binary compatibility problems for kernel modules.
  */
 void
 kmod_udpstat_inc(int statnum)
 {
 
 	counter_u64_add(VNET(udpstat)[statnum], 1);
 }
 
 int
 udp_newudpcb(struct inpcb *inp)
 {
 	struct udpcb *up;
 
 	up = uma_zalloc(V_udpcb_zone, M_NOWAIT | M_ZERO);
 	if (up == NULL)
 		return (ENOBUFS);
 	inp->inp_ppcb = up;
 	return (0);
 }
 
 void
 udp_discardcb(struct udpcb *up)
 {
 
 	uma_zfree(V_udpcb_zone, up);
 }
 
 #ifdef VIMAGE
 static void
 udp_destroy(void *unused __unused)
 {
 
 	in_pcbinfo_destroy(&V_udbinfo);
 	uma_zdestroy(V_udpcb_zone);
 }
 VNET_SYSUNINIT(udp, SI_SUB_PROTO_DOMAIN, SI_ORDER_FOURTH, udp_destroy, NULL);
 
 static void
 udplite_destroy(void *unused __unused)
 {
 
 	in_pcbinfo_destroy(&V_ulitecbinfo);
 }
 VNET_SYSUNINIT(udplite, SI_SUB_PROTO_DOMAIN, SI_ORDER_FOURTH, udplite_destroy,
     NULL);
 #endif
 
 #ifdef INET
 /*
  * Subroutine of udp_input(), which appends the provided mbuf chain to the
  * passed pcb/socket.  The caller must provide a sockaddr_in via udp_in that
  * contains the source address.  If the socket ends up being an IPv6 socket,
  * udp_append() will convert to a sockaddr_in6 before passing the address
  * into the socket code.
  *
  * In the normal case udp_append() will return 0, indicating that you
  * must unlock the inp. However if a tunneling protocol is in place we increment
  * the inpcb refcnt and unlock the inp, on return from the tunneling protocol we
  * then decrement the reference count. If the inp_rele returns 1, indicating the
  * inp is gone, we return that to the caller to tell them *not* to unlock
  * the inp. In the case of multi-cast this will cause the distribution
  * to stop (though most tunneling protocols known currently do *not* use
  * multicast).
  */
 static int
 udp_append(struct inpcb *inp, struct ip *ip, struct mbuf *n, int off,
     struct sockaddr_in *udp_in)
 {
 	struct sockaddr *append_sa;
 	struct socket *so;
 	struct mbuf *opts = NULL;
 #ifdef INET6
 	struct sockaddr_in6 udp_in6;
 #endif
 	struct udpcb *up;
 
 	INP_LOCK_ASSERT(inp);
 
 	/*
 	 * Engage the tunneling protocol.
 	 */
 	up = intoudpcb(inp);
 	if (up->u_tun_func != NULL) {
 		in_pcbref(inp);
 		INP_RUNLOCK(inp);
 		(*up->u_tun_func)(n, off, inp, (struct sockaddr *)udp_in,
 		    up->u_tun_ctx);
 		INP_RLOCK(inp);
 		return (in_pcbrele_rlocked(inp));
 	}
 
 	off += sizeof(struct udphdr);
 
 #ifdef IPSEC
 	/* Check AH/ESP integrity. */
 	if (ipsec4_in_reject(n, inp)) {
 		m_freem(n);
 		return (0);
 	}
 #ifdef IPSEC_NAT_T
 	up = intoudpcb(inp);
 	KASSERT(up != NULL, ("%s: udpcb NULL", __func__));
 	if (up->u_flags & UF_ESPINUDP_ALL) {	/* IPSec UDP encaps. */
 		n = udp4_espdecap(inp, n, off);
 		if (n == NULL)				/* Consumed. */
 			return (0);
 	}
 #endif /* IPSEC_NAT_T */
 #endif /* IPSEC */
 #ifdef MAC
 	if (mac_inpcb_check_deliver(inp, n) != 0) {
 		m_freem(n);
 		return (0);
 	}
 #endif /* MAC */
 	if (inp->inp_flags & INP_CONTROLOPTS ||
 	    inp->inp_socket->so_options & (SO_TIMESTAMP | SO_BINTIME)) {
 #ifdef INET6
 		if (inp->inp_vflag & INP_IPV6)
 			(void)ip6_savecontrol_v4(inp, n, &opts, NULL);
 		else
 #endif /* INET6 */
 			ip_savecontrol(inp, &opts, ip, n);
 	}
 #ifdef INET6
 	if (inp->inp_vflag & INP_IPV6) {
 		bzero(&udp_in6, sizeof(udp_in6));
 		udp_in6.sin6_len = sizeof(udp_in6);
 		udp_in6.sin6_family = AF_INET6;
 		in6_sin_2_v4mapsin6(udp_in, &udp_in6);
 		append_sa = (struct sockaddr *)&udp_in6;
 	} else
 #endif /* INET6 */
 		append_sa = (struct sockaddr *)udp_in;
 	m_adj(n, off);
 
 	so = inp->inp_socket;
 	SOCKBUF_LOCK(&so->so_rcv);
 	if (sbappendaddr_locked(&so->so_rcv, append_sa, n, opts) == 0) {
 		SOCKBUF_UNLOCK(&so->so_rcv);
 		m_freem(n);
 		if (opts)
 			m_freem(opts);
 		UDPSTAT_INC(udps_fullsock);
 	} else
 		sorwakeup_locked(so);
 	return (0);
 }
 
 int
 udp_input(struct mbuf **mp, int *offp, int proto)
 {
 	struct ip *ip;
 	struct udphdr *uh;
 	struct ifnet *ifp;
 	struct inpcb *inp;
 	uint16_t len, ip_len;
 	struct inpcbinfo *pcbinfo;
 	struct ip save_ip;
 	struct sockaddr_in udp_in;
 	struct mbuf *m;
 	struct m_tag *fwd_tag;
 	int cscov_partial, iphlen;
 
 	m = *mp;
 	iphlen = *offp;
 	ifp = m->m_pkthdr.rcvif;
 	*mp = NULL;
 	UDPSTAT_INC(udps_ipackets);
 
 	/*
 	 * Strip IP options, if any; should skip this, make available to
 	 * user, and use on returned packets, but we don't yet have a way to
 	 * check the checksum with options still present.
 	 */
 	if (iphlen > sizeof (struct ip)) {
 		ip_stripoptions(m);
 		iphlen = sizeof(struct ip);
 	}
 
 	/*
 	 * Get IP and UDP header together in first mbuf.
 	 */
 	ip = mtod(m, struct ip *);
 	if (m->m_len < iphlen + sizeof(struct udphdr)) {
 		if ((m = m_pullup(m, iphlen + sizeof(struct udphdr))) == NULL) {
 			UDPSTAT_INC(udps_hdrops);
 			return (IPPROTO_DONE);
 		}
 		ip = mtod(m, struct ip *);
 	}
 	uh = (struct udphdr *)((caddr_t)ip + iphlen);
 	cscov_partial = (proto == IPPROTO_UDPLITE) ? 1 : 0;
 
 	/*
 	 * Destination port of 0 is illegal, based on RFC768.
 	 */
 	if (uh->uh_dport == 0)
 		goto badunlocked;
 
 	/*
 	 * Construct sockaddr format source address.  Stuff source address
 	 * and datagram in user buffer.
 	 */
 	bzero(&udp_in, sizeof(udp_in));
 	udp_in.sin_len = sizeof(udp_in);
 	udp_in.sin_family = AF_INET;
 	udp_in.sin_port = uh->uh_sport;
 	udp_in.sin_addr = ip->ip_src;
 
 	/*
 	 * Make mbuf data length reflect UDP length.  If not enough data to
 	 * reflect UDP length, drop.
 	 */
 	len = ntohs((u_short)uh->uh_ulen);
 	ip_len = ntohs(ip->ip_len) - iphlen;
 	if (proto == IPPROTO_UDPLITE && (len == 0 || len == ip_len)) {
 		/* Zero means checksum over the complete packet. */
 		if (len == 0)
 			len = ip_len;
 		cscov_partial = 0;
 	}
 	if (ip_len != len) {
 		if (len > ip_len || len < sizeof(struct udphdr)) {
 			UDPSTAT_INC(udps_badlen);
 			goto badunlocked;
 		}
 		if (proto == IPPROTO_UDP)
 			m_adj(m, len - ip_len);
 	}
 
 	/*
 	 * Save a copy of the IP header in case we want restore it for
 	 * sending an ICMP error message in response.
 	 */
 	if (!V_udp_blackhole)
 		save_ip = *ip;
 	else
 		memset(&save_ip, 0, sizeof(save_ip));
 
 	/*
 	 * Checksum extended UDP header and data.
 	 */
 	if (uh->uh_sum) {
 		u_short uh_sum;
 
 		if ((m->m_pkthdr.csum_flags & CSUM_DATA_VALID) &&
 		    !cscov_partial) {
 			if (m->m_pkthdr.csum_flags & CSUM_PSEUDO_HDR)
 				uh_sum = m->m_pkthdr.csum_data;
 			else
 				uh_sum = in_pseudo(ip->ip_src.s_addr,
 				    ip->ip_dst.s_addr, htonl((u_short)len +
 				    m->m_pkthdr.csum_data + proto));
 			uh_sum ^= 0xffff;
 		} else {
 			char b[9];
 
 			bcopy(((struct ipovly *)ip)->ih_x1, b, 9);
 			bzero(((struct ipovly *)ip)->ih_x1, 9);
 			((struct ipovly *)ip)->ih_len = (proto == IPPROTO_UDP) ?
 			    uh->uh_ulen : htons(ip_len);
 			uh_sum = in_cksum(m, len + sizeof (struct ip));
 			bcopy(b, ((struct ipovly *)ip)->ih_x1, 9);
 		}
 		if (uh_sum) {
 			UDPSTAT_INC(udps_badsum);
 			m_freem(m);
 			return (IPPROTO_DONE);
 		}
 	} else {
 		if (proto == IPPROTO_UDP) {
 			UDPSTAT_INC(udps_nosum);
 		} else {
 			/* UDPLite requires a checksum */
 			/* XXX: What is the right UDPLite MIB counter here? */
 			m_freem(m);
 			return (IPPROTO_DONE);
 		}
 	}
 
 	pcbinfo = udp_get_inpcbinfo(proto);
 	if (IN_MULTICAST(ntohl(ip->ip_dst.s_addr)) ||
 	    ((!V_udp_require_l2_bcast || m->m_flags & M_BCAST) &&
 	    in_broadcast(ip->ip_dst, ifp))) {
 		struct inpcb *last;
 		struct inpcbhead *pcblist;
 		struct ip_moptions *imo;
 
 		INP_INFO_RLOCK(pcbinfo);
 		pcblist = udp_get_pcblist(proto);
 		last = NULL;
 		LIST_FOREACH(inp, pcblist, inp_list) {
 			if (inp->inp_lport != uh->uh_dport)
 				continue;
 #ifdef INET6
 			if ((inp->inp_vflag & INP_IPV4) == 0)
 				continue;
 #endif
 			if (inp->inp_laddr.s_addr != INADDR_ANY &&
 			    inp->inp_laddr.s_addr != ip->ip_dst.s_addr)
 				continue;
 			if (inp->inp_faddr.s_addr != INADDR_ANY &&
 			    inp->inp_faddr.s_addr != ip->ip_src.s_addr)
 				continue;
 			if (inp->inp_fport != 0 &&
 			    inp->inp_fport != uh->uh_sport)
 				continue;
 
 			INP_RLOCK(inp);
 
 			/*
 			 * XXXRW: Because we weren't holding either the inpcb
 			 * or the hash lock when we checked for a match
 			 * before, we should probably recheck now that the
 			 * inpcb lock is held.
 			 */
 
 			/*
 			 * Handle socket delivery policy for any-source
 			 * and source-specific multicast. [RFC3678]
 			 */
 			imo = inp->inp_moptions;
 			if (IN_MULTICAST(ntohl(ip->ip_dst.s_addr))) {
 				struct sockaddr_in	 group;
 				int			 blocked;
 				if (imo == NULL) {
 					INP_RUNLOCK(inp);
 					continue;
 				}
 				bzero(&group, sizeof(struct sockaddr_in));
 				group.sin_len = sizeof(struct sockaddr_in);
 				group.sin_family = AF_INET;
 				group.sin_addr = ip->ip_dst;
 
 				blocked = imo_multi_filter(imo, ifp,
 					(struct sockaddr *)&group,
 					(struct sockaddr *)&udp_in);
 				if (blocked != MCAST_PASS) {
 					if (blocked == MCAST_NOTGMEMBER)
 						IPSTAT_INC(ips_notmember);
 					if (blocked == MCAST_NOTSMEMBER ||
 					    blocked == MCAST_MUTED)
 						UDPSTAT_INC(udps_filtermcast);
 					INP_RUNLOCK(inp);
 					continue;
 				}
 			}
 			if (last != NULL) {
 				struct mbuf *n;
 
-				if ((n = m_copy(m, 0, M_COPYALL)) != NULL) {
+				if ((n = m_copym(m, 0, M_COPYALL, M_NOWAIT)) !=
+				    NULL) {
 					UDP_PROBE(receive, NULL, last, ip,
 					    last, uh);
 					if (udp_append(last, ip, n, iphlen,
 						&udp_in)) {
 						goto inp_lost;
 					}
 				}
 				INP_RUNLOCK(last);
 			}
 			last = inp;
 			/*
 			 * Don't look for additional matches if this one does
 			 * not have either the SO_REUSEPORT or SO_REUSEADDR
 			 * socket options set.  This heuristic avoids
 			 * searching through all pcbs in the common case of a
 			 * non-shared port.  It assumes that an application
 			 * will never clear these options after setting them.
 			 */
 			if ((last->inp_socket->so_options &
 			    (SO_REUSEPORT|SO_REUSEADDR)) == 0)
 				break;
 		}
 
 		if (last == NULL) {
 			/*
 			 * No matching pcb found; discard datagram.  (No need
 			 * to send an ICMP Port Unreachable for a broadcast
 			 * or multicast datgram.)
 			 */
 			UDPSTAT_INC(udps_noportbcast);
 			if (inp)
 				INP_RUNLOCK(inp);
 			INP_INFO_RUNLOCK(pcbinfo);
 			goto badunlocked;
 		}
 		UDP_PROBE(receive, NULL, last, ip, last, uh);
 		if (udp_append(last, ip, m, iphlen, &udp_in) == 0) 
 			INP_RUNLOCK(last);
 	inp_lost:
 		INP_INFO_RUNLOCK(pcbinfo);
 		return (IPPROTO_DONE);
 	}
 
 	/*
 	 * Locate pcb for datagram.
 	 */
 
 	/*
 	 * Grab info from PACKET_TAG_IPFORWARD tag prepended to the chain.
 	 */
 	if ((m->m_flags & M_IP_NEXTHOP) &&
 	    (fwd_tag = m_tag_find(m, PACKET_TAG_IPFORWARD, NULL)) != NULL) {
 		struct sockaddr_in *next_hop;
 
 		next_hop = (struct sockaddr_in *)(fwd_tag + 1);
 
 		/*
 		 * Transparently forwarded. Pretend to be the destination.
 		 * Already got one like this?
 		 */
 		inp = in_pcblookup_mbuf(pcbinfo, ip->ip_src, uh->uh_sport,
 		    ip->ip_dst, uh->uh_dport, INPLOOKUP_RLOCKPCB, ifp, m);
 		if (!inp) {
 			/*
 			 * It's new.  Try to find the ambushing socket.
 			 * Because we've rewritten the destination address,
 			 * any hardware-generated hash is ignored.
 			 */
 			inp = in_pcblookup(pcbinfo, ip->ip_src,
 			    uh->uh_sport, next_hop->sin_addr,
 			    next_hop->sin_port ? htons(next_hop->sin_port) :
 			    uh->uh_dport, INPLOOKUP_WILDCARD |
 			    INPLOOKUP_RLOCKPCB, ifp);
 		}
 		/* Remove the tag from the packet. We don't need it anymore. */
 		m_tag_delete(m, fwd_tag);
 		m->m_flags &= ~M_IP_NEXTHOP;
 	} else
 		inp = in_pcblookup_mbuf(pcbinfo, ip->ip_src, uh->uh_sport,
 		    ip->ip_dst, uh->uh_dport, INPLOOKUP_WILDCARD |
 		    INPLOOKUP_RLOCKPCB, ifp, m);
 	if (inp == NULL) {
 		if (udp_log_in_vain) {
 			char buf[4*sizeof "123"];
 
 			strcpy(buf, inet_ntoa(ip->ip_dst));
 			log(LOG_INFO,
 			    "Connection attempt to UDP %s:%d from %s:%d\n",
 			    buf, ntohs(uh->uh_dport), inet_ntoa(ip->ip_src),
 			    ntohs(uh->uh_sport));
 		}
 		UDPSTAT_INC(udps_noport);
 		if (m->m_flags & (M_BCAST | M_MCAST)) {
 			UDPSTAT_INC(udps_noportbcast);
 			goto badunlocked;
 		}
 		if (V_udp_blackhole)
 			goto badunlocked;
 		if (badport_bandlim(BANDLIM_ICMP_UNREACH) < 0)
 			goto badunlocked;
 		*ip = save_ip;
 		icmp_error(m, ICMP_UNREACH, ICMP_UNREACH_PORT, 0, 0);
 		return (IPPROTO_DONE);
 	}
 
 	/*
 	 * Check the minimum TTL for socket.
 	 */
 	INP_RLOCK_ASSERT(inp);
 	if (inp->inp_ip_minttl && inp->inp_ip_minttl > ip->ip_ttl) {
 		INP_RUNLOCK(inp);
 		m_freem(m);
 		return (IPPROTO_DONE);
 	}
 	if (cscov_partial) {
 		struct udpcb *up;
 
 		up = intoudpcb(inp);
 		if (up->u_rxcslen == 0 || up->u_rxcslen > len) {
 			INP_RUNLOCK(inp);
 			m_freem(m);
 			return (IPPROTO_DONE);
 		}
 	}
 
 	UDP_PROBE(receive, NULL, inp, ip, inp, uh);
 	if (udp_append(inp, ip, m, iphlen, &udp_in) == 0) 
 		INP_RUNLOCK(inp);
 	return (IPPROTO_DONE);
 
 badunlocked:
 	m_freem(m);
 	return (IPPROTO_DONE);
 }
 #endif /* INET */
 
 /*
  * Notify a udp user of an asynchronous error; just wake up so that they can
  * collect error status.
  */
 struct inpcb *
 udp_notify(struct inpcb *inp, int errno)
 {
 
 	/*
 	 * While udp_ctlinput() always calls udp_notify() with a read lock
 	 * when invoking it directly, in_pcbnotifyall() currently uses write
 	 * locks due to sharing code with TCP.  For now, accept either a read
 	 * or a write lock, but a read lock is sufficient.
 	 */
 	INP_LOCK_ASSERT(inp);
 	if ((errno == EHOSTUNREACH || errno == ENETUNREACH ||
 	     errno == EHOSTDOWN) && inp->inp_route.ro_rt) {
 		RTFREE(inp->inp_route.ro_rt);
 		inp->inp_route.ro_rt = (struct rtentry *)NULL;
 	}
 
 	inp->inp_socket->so_error = errno;
 	sorwakeup(inp->inp_socket);
 	sowwakeup(inp->inp_socket);
 	return (inp);
 }
 
 #ifdef INET
 static void
 udp_common_ctlinput(int cmd, struct sockaddr *sa, void *vip,
     struct inpcbinfo *pcbinfo)
 {
 	struct ip *ip = vip;
 	struct udphdr *uh;
 	struct in_addr faddr;
 	struct inpcb *inp;
 
 	faddr = ((struct sockaddr_in *)sa)->sin_addr;
 	if (sa->sa_family != AF_INET || faddr.s_addr == INADDR_ANY)
 		return;
 
 	if (PRC_IS_REDIRECT(cmd)) {
 		/* signal EHOSTDOWN, as it flushes the cached route */
 		in_pcbnotifyall(&V_udbinfo, faddr, EHOSTDOWN, udp_notify);
 		return;
 	}
 
 	/*
 	 * Hostdead is ugly because it goes linearly through all PCBs.
 	 *
 	 * XXX: We never get this from ICMP, otherwise it makes an excellent
 	 * DoS attack on machines with many connections.
 	 */
 	if (cmd == PRC_HOSTDEAD)
 		ip = NULL;
 	else if ((unsigned)cmd >= PRC_NCMDS || inetctlerrmap[cmd] == 0)
 		return;
 	if (ip != NULL) {
 		uh = (struct udphdr *)((caddr_t)ip + (ip->ip_hl << 2));
 		inp = in_pcblookup(pcbinfo, faddr, uh->uh_dport,
 		    ip->ip_src, uh->uh_sport, INPLOOKUP_RLOCKPCB, NULL);
 		if (inp != NULL) {
 			INP_RLOCK_ASSERT(inp);
 			if (inp->inp_socket != NULL) {
 				udp_notify(inp, inetctlerrmap[cmd]);
 			}
 			INP_RUNLOCK(inp);
 		} else {
 			inp = in_pcblookup(pcbinfo, faddr, uh->uh_dport,
 					   ip->ip_src, uh->uh_sport,
 					   INPLOOKUP_WILDCARD | INPLOOKUP_RLOCKPCB, NULL);
 			if (inp != NULL) {
 				struct udpcb *up;
 
 				up = intoudpcb(inp);
 				if (up->u_icmp_func != NULL) {
 					INP_RUNLOCK(inp);
 					(*up->u_icmp_func)(cmd, sa, vip, up->u_tun_ctx);
 				} else {
 					INP_RUNLOCK(inp);
 				}
 			}
 		}
 	} else
 		in_pcbnotifyall(pcbinfo, faddr, inetctlerrmap[cmd],
 		    udp_notify);
 }
 void
 udp_ctlinput(int cmd, struct sockaddr *sa, void *vip)
 {
 
 	return (udp_common_ctlinput(cmd, sa, vip, &V_udbinfo));
 }
 
 void
 udplite_ctlinput(int cmd, struct sockaddr *sa, void *vip)
 {
 
 	return (udp_common_ctlinput(cmd, sa, vip, &V_ulitecbinfo));
 }
 #endif /* INET */
 
 static int
 udp_pcblist(SYSCTL_HANDLER_ARGS)
 {
 	int error, i, n;
 	struct inpcb *inp, **inp_list;
 	inp_gen_t gencnt;
 	struct xinpgen xig;
 
 	/*
 	 * The process of preparing the PCB list is too time-consuming and
 	 * resource-intensive to repeat twice on every request.
 	 */
 	if (req->oldptr == 0) {
 		n = V_udbinfo.ipi_count;
 		n += imax(n / 8, 10);
 		req->oldidx = 2 * (sizeof xig) + n * sizeof(struct xinpcb);
 		return (0);
 	}
 
 	if (req->newptr != 0)
 		return (EPERM);
 
 	/*
 	 * OK, now we're committed to doing something.
 	 */
 	INP_INFO_RLOCK(&V_udbinfo);
 	gencnt = V_udbinfo.ipi_gencnt;
 	n = V_udbinfo.ipi_count;
 	INP_INFO_RUNLOCK(&V_udbinfo);
 
 	error = sysctl_wire_old_buffer(req, 2 * (sizeof xig)
 		+ n * sizeof(struct xinpcb));
 	if (error != 0)
 		return (error);
 
 	xig.xig_len = sizeof xig;
 	xig.xig_count = n;
 	xig.xig_gen = gencnt;
 	xig.xig_sogen = so_gencnt;
 	error = SYSCTL_OUT(req, &xig, sizeof xig);
 	if (error)
 		return (error);
 
 	inp_list = malloc(n * sizeof *inp_list, M_TEMP, M_WAITOK);
 	if (inp_list == NULL)
 		return (ENOMEM);
 
 	INP_INFO_RLOCK(&V_udbinfo);
 	for (inp = LIST_FIRST(V_udbinfo.ipi_listhead), i = 0; inp && i < n;
 	     inp = LIST_NEXT(inp, inp_list)) {
 		INP_WLOCK(inp);
 		if (inp->inp_gencnt <= gencnt &&
 		    cr_canseeinpcb(req->td->td_ucred, inp) == 0) {
 			in_pcbref(inp);
 			inp_list[i++] = inp;
 		}
 		INP_WUNLOCK(inp);
 	}
 	INP_INFO_RUNLOCK(&V_udbinfo);
 	n = i;
 
 	error = 0;
 	for (i = 0; i < n; i++) {
 		inp = inp_list[i];
 		INP_RLOCK(inp);
 		if (inp->inp_gencnt <= gencnt) {
 			struct xinpcb xi;
 
 			bzero(&xi, sizeof(xi));
 			xi.xi_len = sizeof xi;
 			/* XXX should avoid extra copy */
 			bcopy(inp, &xi.xi_inp, sizeof *inp);
 			if (inp->inp_socket)
 				sotoxsocket(inp->inp_socket, &xi.xi_socket);
 			xi.xi_inp.inp_gencnt = inp->inp_gencnt;
 			INP_RUNLOCK(inp);
 			error = SYSCTL_OUT(req, &xi, sizeof xi);
 		} else
 			INP_RUNLOCK(inp);
 	}
 	INP_INFO_WLOCK(&V_udbinfo);
 	for (i = 0; i < n; i++) {
 		inp = inp_list[i];
 		INP_RLOCK(inp);
 		if (!in_pcbrele_rlocked(inp))
 			INP_RUNLOCK(inp);
 	}
 	INP_INFO_WUNLOCK(&V_udbinfo);
 
 	if (!error) {
 		/*
 		 * Give the user an updated idea of our state.  If the
 		 * generation differs from what we told her before, she knows
 		 * that something happened while we were processing this
 		 * request, and it might be necessary to retry.
 		 */
 		INP_INFO_RLOCK(&V_udbinfo);
 		xig.xig_gen = V_udbinfo.ipi_gencnt;
 		xig.xig_sogen = so_gencnt;
 		xig.xig_count = V_udbinfo.ipi_count;
 		INP_INFO_RUNLOCK(&V_udbinfo);
 		error = SYSCTL_OUT(req, &xig, sizeof xig);
 	}
 	free(inp_list, M_TEMP);
 	return (error);
 }
 
 SYSCTL_PROC(_net_inet_udp, UDPCTL_PCBLIST, pcblist,
     CTLTYPE_OPAQUE | CTLFLAG_RD, NULL, 0,
     udp_pcblist, "S,xinpcb", "List of active UDP sockets");
 
 #ifdef INET
 static int
 udp_getcred(SYSCTL_HANDLER_ARGS)
 {
 	struct xucred xuc;
 	struct sockaddr_in addrs[2];
 	struct inpcb *inp;
 	int error;
 
 	error = priv_check(req->td, PRIV_NETINET_GETCRED);
 	if (error)
 		return (error);
 	error = SYSCTL_IN(req, addrs, sizeof(addrs));
 	if (error)
 		return (error);
 	inp = in_pcblookup(&V_udbinfo, addrs[1].sin_addr, addrs[1].sin_port,
 	    addrs[0].sin_addr, addrs[0].sin_port,
 	    INPLOOKUP_WILDCARD | INPLOOKUP_RLOCKPCB, NULL);
 	if (inp != NULL) {
 		INP_RLOCK_ASSERT(inp);
 		if (inp->inp_socket == NULL)
 			error = ENOENT;
 		if (error == 0)
 			error = cr_canseeinpcb(req->td->td_ucred, inp);
 		if (error == 0)
 			cru2x(inp->inp_cred, &xuc);
 		INP_RUNLOCK(inp);
 	} else
 		error = ENOENT;
 	if (error == 0)
 		error = SYSCTL_OUT(req, &xuc, sizeof(struct xucred));
 	return (error);
 }
 
 SYSCTL_PROC(_net_inet_udp, OID_AUTO, getcred,
     CTLTYPE_OPAQUE|CTLFLAG_RW|CTLFLAG_PRISON, 0, 0,
     udp_getcred, "S,xucred", "Get the xucred of a UDP connection");
 #endif /* INET */
 
 int
 udp_ctloutput(struct socket *so, struct sockopt *sopt)
 {
 	struct inpcb *inp;
 	struct udpcb *up;
 	int isudplite, error, optval;
 
 	error = 0;
 	isudplite = (so->so_proto->pr_protocol == IPPROTO_UDPLITE) ? 1 : 0;
 	inp = sotoinpcb(so);
 	KASSERT(inp != NULL, ("%s: inp == NULL", __func__));
 	INP_WLOCK(inp);
 	if (sopt->sopt_level != so->so_proto->pr_protocol) {
 #ifdef INET6
 		if (INP_CHECK_SOCKAF(so, AF_INET6)) {
 			INP_WUNLOCK(inp);
 			error = ip6_ctloutput(so, sopt);
 		}
 #endif
 #if defined(INET) && defined(INET6)
 		else
 #endif
 #ifdef INET
 		{
 			INP_WUNLOCK(inp);
 			error = ip_ctloutput(so, sopt);
 		}
 #endif
 		return (error);
 	}
 
 	switch (sopt->sopt_dir) {
 	case SOPT_SET:
 		switch (sopt->sopt_name) {
 		case UDP_ENCAP:
 			INP_WUNLOCK(inp);
 			error = sooptcopyin(sopt, &optval, sizeof optval,
 					    sizeof optval);
 			if (error)
 				break;
 			inp = sotoinpcb(so);
 			KASSERT(inp != NULL, ("%s: inp == NULL", __func__));
 			INP_WLOCK(inp);
 #ifdef IPSEC_NAT_T
 			up = intoudpcb(inp);
 			KASSERT(up != NULL, ("%s: up == NULL", __func__));
 #endif
 			switch (optval) {
 			case 0:
 				/* Clear all UDP encap. */
 #ifdef IPSEC_NAT_T
 				up->u_flags &= ~UF_ESPINUDP_ALL;
 #endif
 				break;
 #ifdef IPSEC_NAT_T
 			case UDP_ENCAP_ESPINUDP:
 			case UDP_ENCAP_ESPINUDP_NON_IKE:
 				up->u_flags &= ~UF_ESPINUDP_ALL;
 				if (optval == UDP_ENCAP_ESPINUDP)
 					up->u_flags |= UF_ESPINUDP;
 				else if (optval == UDP_ENCAP_ESPINUDP_NON_IKE)
 					up->u_flags |= UF_ESPINUDP_NON_IKE;
 				break;
 #endif
 			default:
 				error = EINVAL;
 				break;
 			}
 			INP_WUNLOCK(inp);
 			break;
 		case UDPLITE_SEND_CSCOV:
 		case UDPLITE_RECV_CSCOV:
 			if (!isudplite) {
 				INP_WUNLOCK(inp);
 				error = ENOPROTOOPT;
 				break;
 			}
 			INP_WUNLOCK(inp);
 			error = sooptcopyin(sopt, &optval, sizeof(optval),
 			    sizeof(optval));
 			if (error != 0)
 				break;
 			inp = sotoinpcb(so);
 			KASSERT(inp != NULL, ("%s: inp == NULL", __func__));
 			INP_WLOCK(inp);
 			up = intoudpcb(inp);
 			KASSERT(up != NULL, ("%s: up == NULL", __func__));
 			if ((optval != 0 && optval < 8) || (optval > 65535)) {
 				INP_WUNLOCK(inp);
 				error = EINVAL;
 				break;
 			}
 			if (sopt->sopt_name == UDPLITE_SEND_CSCOV)
 				up->u_txcslen = optval;
 			else
 				up->u_rxcslen = optval;
 			INP_WUNLOCK(inp);
 			break;
 		default:
 			INP_WUNLOCK(inp);
 			error = ENOPROTOOPT;
 			break;
 		}
 		break;
 	case SOPT_GET:
 		switch (sopt->sopt_name) {
 #ifdef IPSEC_NAT_T
 		case UDP_ENCAP:
 			up = intoudpcb(inp);
 			KASSERT(up != NULL, ("%s: up == NULL", __func__));
 			optval = up->u_flags & UF_ESPINUDP_ALL;
 			INP_WUNLOCK(inp);
 			error = sooptcopyout(sopt, &optval, sizeof optval);
 			break;
 #endif
 		case UDPLITE_SEND_CSCOV:
 		case UDPLITE_RECV_CSCOV:
 			if (!isudplite) {
 				INP_WUNLOCK(inp);
 				error = ENOPROTOOPT;
 				break;
 			}
 			up = intoudpcb(inp);
 			KASSERT(up != NULL, ("%s: up == NULL", __func__));
 			if (sopt->sopt_name == UDPLITE_SEND_CSCOV)
 				optval = up->u_txcslen;
 			else
 				optval = up->u_rxcslen;
 			INP_WUNLOCK(inp);
 			error = sooptcopyout(sopt, &optval, sizeof(optval));
 			break;
 		default:
 			INP_WUNLOCK(inp);
 			error = ENOPROTOOPT;
 			break;
 		}
 		break;
 	}	
 	return (error);
 }
 
 #ifdef INET
 #define	UH_WLOCKED	2
 #define	UH_RLOCKED	1
 #define	UH_UNLOCKED	0
 static int
 udp_output(struct inpcb *inp, struct mbuf *m, struct sockaddr *addr,
     struct mbuf *control, struct thread *td)
 {
 	struct udpiphdr *ui;
 	int len = m->m_pkthdr.len;
 	struct in_addr faddr, laddr;
 	struct cmsghdr *cm;
 	struct inpcbinfo *pcbinfo;
 	struct sockaddr_in *sin, src;
 	int cscov_partial = 0;
 	int error = 0;
 	int ipflags;
 	u_short fport, lport;
 	int unlock_udbinfo, unlock_inp;
 	u_char tos;
 	uint8_t pr;
 	uint16_t cscov = 0;
 	uint32_t flowid = 0;
 	uint8_t flowtype = M_HASHTYPE_NONE;
 
 	/*
 	 * udp_output() may need to temporarily bind or connect the current
 	 * inpcb.  As such, we don't know up front whether we will need the
 	 * pcbinfo lock or not.  Do any work to decide what is needed up
 	 * front before acquiring any locks.
 	 */
 	if (len + sizeof(struct udpiphdr) > IP_MAXPACKET) {
 		if (control)
 			m_freem(control);
 		m_freem(m);
 		return (EMSGSIZE);
 	}
 
 	src.sin_family = 0;
 	sin = (struct sockaddr_in *)addr;
 	if (sin == NULL ||
 	    (inp->inp_laddr.s_addr == INADDR_ANY && inp->inp_lport == 0)) {
 		INP_WLOCK(inp);
 		unlock_inp = UH_WLOCKED;
 	} else {
 		INP_RLOCK(inp);
 		unlock_inp = UH_RLOCKED;
 	}
 	tos = inp->inp_ip_tos;
 	if (control != NULL) {
 		/*
 		 * XXX: Currently, we assume all the optional information is
 		 * stored in a single mbuf.
 		 */
 		if (control->m_next) {
 			if (unlock_inp == UH_WLOCKED)
 				INP_WUNLOCK(inp);
 			else
 				INP_RUNLOCK(inp);
 			m_freem(control);
 			m_freem(m);
 			return (EINVAL);
 		}
 		for (; control->m_len > 0;
 		    control->m_data += CMSG_ALIGN(cm->cmsg_len),
 		    control->m_len -= CMSG_ALIGN(cm->cmsg_len)) {
 			cm = mtod(control, struct cmsghdr *);
 			if (control->m_len < sizeof(*cm) || cm->cmsg_len == 0
 			    || cm->cmsg_len > control->m_len) {
 				error = EINVAL;
 				break;
 			}
 			if (cm->cmsg_level != IPPROTO_IP)
 				continue;
 
 			switch (cm->cmsg_type) {
 			case IP_SENDSRCADDR:
 				if (cm->cmsg_len !=
 				    CMSG_LEN(sizeof(struct in_addr))) {
 					error = EINVAL;
 					break;
 				}
 				bzero(&src, sizeof(src));
 				src.sin_family = AF_INET;
 				src.sin_len = sizeof(src);
 				src.sin_port = inp->inp_lport;
 				src.sin_addr =
 				    *(struct in_addr *)CMSG_DATA(cm);
 				break;
 
 			case IP_TOS:
 				if (cm->cmsg_len != CMSG_LEN(sizeof(u_char))) {
 					error = EINVAL;
 					break;
 				}
 				tos = *(u_char *)CMSG_DATA(cm);
 				break;
 
 			case IP_FLOWID:
 				if (cm->cmsg_len != CMSG_LEN(sizeof(uint32_t))) {
 					error = EINVAL;
 					break;
 				}
 				flowid = *(uint32_t *) CMSG_DATA(cm);
 				break;
 
 			case IP_FLOWTYPE:
 				if (cm->cmsg_len != CMSG_LEN(sizeof(uint32_t))) {
 					error = EINVAL;
 					break;
 				}
 				flowtype = *(uint32_t *) CMSG_DATA(cm);
 				break;
 
 #ifdef	RSS
 			case IP_RSSBUCKETID:
 				if (cm->cmsg_len != CMSG_LEN(sizeof(uint32_t))) {
 					error = EINVAL;
 					break;
 				}
 				/* This is just a placeholder for now */
 				break;
 #endif	/* RSS */
 			default:
 				error = ENOPROTOOPT;
 				break;
 			}
 			if (error)
 				break;
 		}
 		m_freem(control);
 	}
 	if (error) {
 		if (unlock_inp == UH_WLOCKED)
 			INP_WUNLOCK(inp);
 		else
 			INP_RUNLOCK(inp);
 		m_freem(m);
 		return (error);
 	}
 
 	/*
 	 * Depending on whether or not the application has bound or connected
 	 * the socket, we may have to do varying levels of work.  The optimal
 	 * case is for a connected UDP socket, as a global lock isn't
 	 * required at all.
 	 *
 	 * In order to decide which we need, we require stability of the
 	 * inpcb binding, which we ensure by acquiring a read lock on the
 	 * inpcb.  This doesn't strictly follow the lock order, so we play
 	 * the trylock and retry game; note that we may end up with more
 	 * conservative locks than required the second time around, so later
 	 * assertions have to accept that.  Further analysis of the number of
 	 * misses under contention is required.
 	 *
 	 * XXXRW: Check that hash locking update here is correct.
 	 */
 	pr = inp->inp_socket->so_proto->pr_protocol;
 	pcbinfo = udp_get_inpcbinfo(pr);
 	sin = (struct sockaddr_in *)addr;
 	if (sin != NULL &&
 	    (inp->inp_laddr.s_addr == INADDR_ANY && inp->inp_lport == 0)) {
 		INP_HASH_WLOCK(pcbinfo);
 		unlock_udbinfo = UH_WLOCKED;
 	} else if ((sin != NULL && (
 	    (sin->sin_addr.s_addr == INADDR_ANY) ||
 	    (sin->sin_addr.s_addr == INADDR_BROADCAST) ||
 	    (inp->inp_laddr.s_addr == INADDR_ANY) ||
 	    (inp->inp_lport == 0))) ||
 	    (src.sin_family == AF_INET)) {
 		INP_HASH_RLOCK(pcbinfo);
 		unlock_udbinfo = UH_RLOCKED;
 	} else
 		unlock_udbinfo = UH_UNLOCKED;
 
 	/*
 	 * If the IP_SENDSRCADDR control message was specified, override the
 	 * source address for this datagram.  Its use is invalidated if the
 	 * address thus specified is incomplete or clobbers other inpcbs.
 	 */
 	laddr = inp->inp_laddr;
 	lport = inp->inp_lport;
 	if (src.sin_family == AF_INET) {
 		INP_HASH_LOCK_ASSERT(pcbinfo);
 		if ((lport == 0) ||
 		    (laddr.s_addr == INADDR_ANY &&
 		     src.sin_addr.s_addr == INADDR_ANY)) {
 			error = EINVAL;
 			goto release;
 		}
 		error = in_pcbbind_setup(inp, (struct sockaddr *)&src,
 		    &laddr.s_addr, &lport, td->td_ucred);
 		if (error)
 			goto release;
 	}
 
 	/*
 	 * If a UDP socket has been connected, then a local address/port will
 	 * have been selected and bound.
 	 *
 	 * If a UDP socket has not been connected to, then an explicit
 	 * destination address must be used, in which case a local
 	 * address/port may not have been selected and bound.
 	 */
 	if (sin != NULL) {
 		INP_LOCK_ASSERT(inp);
 		if (inp->inp_faddr.s_addr != INADDR_ANY) {
 			error = EISCONN;
 			goto release;
 		}
 
 		/*
 		 * Jail may rewrite the destination address, so let it do
 		 * that before we use it.
 		 */
 		error = prison_remote_ip4(td->td_ucred, &sin->sin_addr);
 		if (error)
 			goto release;
 
 		/*
 		 * If a local address or port hasn't yet been selected, or if
 		 * the destination address needs to be rewritten due to using
 		 * a special INADDR_ constant, invoke in_pcbconnect_setup()
 		 * to do the heavy lifting.  Once a port is selected, we
 		 * commit the binding back to the socket; we also commit the
 		 * binding of the address if in jail.
 		 *
 		 * If we already have a valid binding and we're not
 		 * requesting a destination address rewrite, use a fast path.
 		 */
 		if (inp->inp_laddr.s_addr == INADDR_ANY ||
 		    inp->inp_lport == 0 ||
 		    sin->sin_addr.s_addr == INADDR_ANY ||
 		    sin->sin_addr.s_addr == INADDR_BROADCAST) {
 			INP_HASH_LOCK_ASSERT(pcbinfo);
 			error = in_pcbconnect_setup(inp, addr, &laddr.s_addr,
 			    &lport, &faddr.s_addr, &fport, NULL,
 			    td->td_ucred);
 			if (error)
 				goto release;
 
 			/*
 			 * XXXRW: Why not commit the port if the address is
 			 * !INADDR_ANY?
 			 */
 			/* Commit the local port if newly assigned. */
 			if (inp->inp_laddr.s_addr == INADDR_ANY &&
 			    inp->inp_lport == 0) {
 				INP_WLOCK_ASSERT(inp);
 				INP_HASH_WLOCK_ASSERT(pcbinfo);
 				/*
 				 * Remember addr if jailed, to prevent
 				 * rebinding.
 				 */
 				if (prison_flag(td->td_ucred, PR_IP4))
 					inp->inp_laddr = laddr;
 				inp->inp_lport = lport;
 				if (in_pcbinshash(inp) != 0) {
 					inp->inp_lport = 0;
 					error = EAGAIN;
 					goto release;
 				}
 				inp->inp_flags |= INP_ANONPORT;
 			}
 		} else {
 			faddr = sin->sin_addr;
 			fport = sin->sin_port;
 		}
 	} else {
 		INP_LOCK_ASSERT(inp);
 		faddr = inp->inp_faddr;
 		fport = inp->inp_fport;
 		if (faddr.s_addr == INADDR_ANY) {
 			error = ENOTCONN;
 			goto release;
 		}
 	}
 
 	/*
 	 * Calculate data length and get a mbuf for UDP, IP, and possible
 	 * link-layer headers.  Immediate slide the data pointer back forward
 	 * since we won't use that space at this layer.
 	 */
 	M_PREPEND(m, sizeof(struct udpiphdr) + max_linkhdr, M_NOWAIT);
 	if (m == NULL) {
 		error = ENOBUFS;
 		goto release;
 	}
 	m->m_data += max_linkhdr;
 	m->m_len -= max_linkhdr;
 	m->m_pkthdr.len -= max_linkhdr;
 
 	/*
 	 * Fill in mbuf with extended UDP header and addresses and length put
 	 * into network format.
 	 */
 	ui = mtod(m, struct udpiphdr *);
 	bzero(ui->ui_x1, sizeof(ui->ui_x1));	/* XXX still needed? */
 	ui->ui_pr = pr;
 	ui->ui_src = laddr;
 	ui->ui_dst = faddr;
 	ui->ui_sport = lport;
 	ui->ui_dport = fport;
 	ui->ui_ulen = htons((u_short)len + sizeof(struct udphdr));
 	if (pr == IPPROTO_UDPLITE) {
 		struct udpcb *up;
 		uint16_t plen;
 
 		up = intoudpcb(inp);
 		cscov = up->u_txcslen;
 		plen = (u_short)len + sizeof(struct udphdr);
 		if (cscov >= plen)
 			cscov = 0;
 		ui->ui_len = htons(plen);
 		ui->ui_ulen = htons(cscov);
 		/*
 		 * For UDP-Lite, checksum coverage length of zero means
 		 * the entire UDPLite packet is covered by the checksum.
 		 */
 		cscov_partial = (cscov == 0) ? 0 : 1;
 	} else
 		ui->ui_v = IPVERSION << 4;
 
 	/*
 	 * Set the Don't Fragment bit in the IP header.
 	 */
 	if (inp->inp_flags & INP_DONTFRAG) {
 		struct ip *ip;
 
 		ip = (struct ip *)&ui->ui_i;
 		ip->ip_off |= htons(IP_DF);
 	}
 
 	ipflags = 0;
 	if (inp->inp_socket->so_options & SO_DONTROUTE)
 		ipflags |= IP_ROUTETOIF;
 	if (inp->inp_socket->so_options & SO_BROADCAST)
 		ipflags |= IP_ALLOWBROADCAST;
 	if (inp->inp_flags & INP_ONESBCAST)
 		ipflags |= IP_SENDONES;
 
 #ifdef MAC
 	mac_inpcb_create_mbuf(inp, m);
 #endif
 
 	/*
 	 * Set up checksum and output datagram.
 	 */
 	ui->ui_sum = 0;
 	if (pr == IPPROTO_UDPLITE) {
 		if (inp->inp_flags & INP_ONESBCAST)
 			faddr.s_addr = INADDR_BROADCAST;
 		if (cscov_partial) {
 			if ((ui->ui_sum = in_cksum(m, sizeof(struct ip) + cscov)) == 0)
 				ui->ui_sum = 0xffff;
 		} else {
 			if ((ui->ui_sum = in_cksum(m, sizeof(struct udpiphdr) + len)) == 0)
 				ui->ui_sum = 0xffff;
 		}
 	} else if (V_udp_cksum) {
 		if (inp->inp_flags & INP_ONESBCAST)
 			faddr.s_addr = INADDR_BROADCAST;
 		ui->ui_sum = in_pseudo(ui->ui_src.s_addr, faddr.s_addr,
 		    htons((u_short)len + sizeof(struct udphdr) + pr));
 		m->m_pkthdr.csum_flags = CSUM_UDP;
 		m->m_pkthdr.csum_data = offsetof(struct udphdr, uh_sum);
 	}
 	((struct ip *)ui)->ip_len = htons(sizeof(struct udpiphdr) + len);
 	((struct ip *)ui)->ip_ttl = inp->inp_ip_ttl;	/* XXX */
 	((struct ip *)ui)->ip_tos = tos;		/* XXX */
 	UDPSTAT_INC(udps_opackets);
 
 	/*
 	 * Setup flowid / RSS information for outbound socket.
 	 *
 	 * Once the UDP code decides to set a flowid some other way,
 	 * this allows the flowid to be overridden by userland.
 	 */
 	if (flowtype != M_HASHTYPE_NONE) {
 		m->m_pkthdr.flowid = flowid;
 		M_HASHTYPE_SET(m, flowtype);
 #ifdef	RSS
 	} else {
 		uint32_t hash_val, hash_type;
 		/*
 		 * Calculate an appropriate RSS hash for UDP and
 		 * UDP Lite.
 		 *
 		 * The called function will take care of figuring out
 		 * whether a 2-tuple or 4-tuple hash is required based
 		 * on the currently configured scheme.
 		 *
 		 * Later later on connected socket values should be
 		 * cached in the inpcb and reused, rather than constantly
 		 * re-calculating it.
 		 *
 		 * UDP Lite is a different protocol number and will
 		 * likely end up being hashed as a 2-tuple until
 		 * RSS / NICs grow UDP Lite protocol awareness.
 		 */
 		if (rss_proto_software_hash_v4(faddr, laddr, fport, lport,
 		    pr, &hash_val, &hash_type) == 0) {
 			m->m_pkthdr.flowid = hash_val;
 			M_HASHTYPE_SET(m, hash_type);
 		}
 #endif
 	}
 
 #ifdef	RSS
 	/*
 	 * Don't override with the inp cached flowid value.
 	 *
 	 * Depending upon the kind of send being done, the inp
 	 * flowid/flowtype values may actually not be appropriate
 	 * for this particular socket send.
 	 *
 	 * We should either leave the flowid at zero (which is what is
 	 * currently done) or set it to some software generated
 	 * hash value based on the packet contents.
 	 */
 	ipflags |= IP_NODEFAULTFLOWID;
 #endif	/* RSS */
 
 	if (unlock_udbinfo == UH_WLOCKED)
 		INP_HASH_WUNLOCK(pcbinfo);
 	else if (unlock_udbinfo == UH_RLOCKED)
 		INP_HASH_RUNLOCK(pcbinfo);
 	UDP_PROBE(send, NULL, inp, &ui->ui_i, inp, &ui->ui_u);
 	error = ip_output(m, inp->inp_options,
 	    (unlock_inp == UH_WLOCKED ? &inp->inp_route : NULL), ipflags,
 	    inp->inp_moptions, inp);
 	if (unlock_inp == UH_WLOCKED)
 		INP_WUNLOCK(inp);
 	else
 		INP_RUNLOCK(inp);
 	return (error);
 
 release:
 	if (unlock_udbinfo == UH_WLOCKED) {
 		INP_HASH_WUNLOCK(pcbinfo);
 		INP_WUNLOCK(inp);
 	} else if (unlock_udbinfo == UH_RLOCKED) {
 		INP_HASH_RUNLOCK(pcbinfo);
 		INP_RUNLOCK(inp);
 	} else
 		INP_RUNLOCK(inp);
 	m_freem(m);
 	return (error);
 }
 
 
 #if defined(IPSEC) && defined(IPSEC_NAT_T)
 /*
  * Potentially decap ESP in UDP frame.  Check for an ESP header
  * and optional marker; if present, strip the UDP header and
  * push the result through IPSec.
  *
  * Returns mbuf to be processed (potentially re-allocated) or
  * NULL if consumed and/or processed.
  */
 static struct mbuf *
 udp4_espdecap(struct inpcb *inp, struct mbuf *m, int off)
 {
 	size_t minlen, payload, skip, iphlen;
 	caddr_t data;
 	struct udpcb *up;
 	struct m_tag *tag;
 	struct udphdr *udphdr;
 	struct ip *ip;
 
 	INP_RLOCK_ASSERT(inp);
 
 	/* 
 	 * Pull up data so the longest case is contiguous:
 	 *    IP/UDP hdr + non ESP marker + ESP hdr.
 	 */
 	minlen = off + sizeof(uint64_t) + sizeof(struct esp);
 	if (minlen > m->m_pkthdr.len)
 		minlen = m->m_pkthdr.len;
 	if ((m = m_pullup(m, minlen)) == NULL) {
 		IPSECSTAT_INC(ips_in_inval);
 		return (NULL);		/* Bypass caller processing. */
 	}
 	data = mtod(m, caddr_t);	/* Points to ip header. */
 	payload = m->m_len - off;	/* Size of payload. */
 
 	if (payload == 1 && data[off] == '\xff')
 		return (m);		/* NB: keepalive packet, no decap. */
 
 	up = intoudpcb(inp);
 	KASSERT(up != NULL, ("%s: udpcb NULL", __func__));
 	KASSERT((up->u_flags & UF_ESPINUDP_ALL) != 0,
 	    ("u_flags 0x%x", up->u_flags));
 
 	/* 
 	 * Check that the payload is large enough to hold an
 	 * ESP header and compute the amount of data to remove.
 	 *
 	 * NB: the caller has already done a pullup for us.
 	 * XXX can we assume alignment and eliminate bcopys?
 	 */
 	if (up->u_flags & UF_ESPINUDP_NON_IKE) {
 		/*
 		 * draft-ietf-ipsec-nat-t-ike-0[01].txt and
 		 * draft-ietf-ipsec-udp-encaps-(00/)01.txt, ignoring
 		 * possible AH mode non-IKE marker+non-ESP marker
 		 * from draft-ietf-ipsec-udp-encaps-00.txt.
 		 */
 		uint64_t marker;
 
 		if (payload <= sizeof(uint64_t) + sizeof(struct esp))
 			return (m);	/* NB: no decap. */
 		bcopy(data + off, &marker, sizeof(uint64_t));
 		if (marker != 0)	/* Non-IKE marker. */
 			return (m);	/* NB: no decap. */
 		skip = sizeof(uint64_t) + sizeof(struct udphdr);
 	} else {
 		uint32_t spi;
 
 		if (payload <= sizeof(struct esp)) {
 			IPSECSTAT_INC(ips_in_inval);
 			m_freem(m);
 			return (NULL);	/* Discard. */
 		}
 		bcopy(data + off, &spi, sizeof(uint32_t));
 		if (spi == 0)		/* Non-ESP marker. */
 			return (m);	/* NB: no decap. */
 		skip = sizeof(struct udphdr);
 	}
 
 	/*
 	 * Setup a PACKET_TAG_IPSEC_NAT_T_PORT tag to remember
 	 * the UDP ports. This is required if we want to select
 	 * the right SPD for multiple hosts behind same NAT.
 	 *
 	 * NB: ports are maintained in network byte order everywhere
 	 *     in the NAT-T code.
 	 */
 	tag = m_tag_get(PACKET_TAG_IPSEC_NAT_T_PORTS,
 		2 * sizeof(uint16_t), M_NOWAIT);
 	if (tag == NULL) {
 		IPSECSTAT_INC(ips_in_nomem);
 		m_freem(m);
 		return (NULL);		/* Discard. */
 	}
 	iphlen = off - sizeof(struct udphdr);
 	udphdr = (struct udphdr *)(data + iphlen);
 	((uint16_t *)(tag + 1))[0] = udphdr->uh_sport;
 	((uint16_t *)(tag + 1))[1] = udphdr->uh_dport;
 	m_tag_prepend(m, tag);
 
 	/*
 	 * Remove the UDP header (and possibly the non ESP marker)
 	 * IP header length is iphlen
 	 * Before:
 	 *   <--- off --->
 	 *   +----+------+-----+
 	 *   | IP |  UDP | ESP |
 	 *   +----+------+-----+
 	 *        <-skip->
 	 * After:
 	 *          +----+-----+
 	 *          | IP | ESP |
 	 *          +----+-----+
 	 *   <-skip->
 	 */
 	ovbcopy(data, data + skip, iphlen);
 	m_adj(m, skip);
 
 	ip = mtod(m, struct ip *);
 	ip->ip_len = htons(ntohs(ip->ip_len) - skip);
 	ip->ip_p = IPPROTO_ESP;
 
 	/*
 	 * We cannot yet update the cksums so clear any
 	 * h/w cksum flags as they are no longer valid.
 	 */
 	if (m->m_pkthdr.csum_flags & CSUM_DATA_VALID)
 		m->m_pkthdr.csum_flags &= ~(CSUM_DATA_VALID|CSUM_PSEUDO_HDR);
 
 	(void) ipsec_common_input(m, iphlen, offsetof(struct ip, ip_p),
 				AF_INET, ip->ip_p);
 	return (NULL);			/* NB: consumed, bypass processing. */
 }
 #endif /* defined(IPSEC) && defined(IPSEC_NAT_T) */
 
 static void
 udp_abort(struct socket *so)
 {
 	struct inpcb *inp;
 	struct inpcbinfo *pcbinfo;
 
 	pcbinfo = udp_get_inpcbinfo(so->so_proto->pr_protocol);
 	inp = sotoinpcb(so);
 	KASSERT(inp != NULL, ("udp_abort: inp == NULL"));
 	INP_WLOCK(inp);
 	if (inp->inp_faddr.s_addr != INADDR_ANY) {
 		INP_HASH_WLOCK(pcbinfo);
 		in_pcbdisconnect(inp);
 		inp->inp_laddr.s_addr = INADDR_ANY;
 		INP_HASH_WUNLOCK(pcbinfo);
 		soisdisconnected(so);
 	}
 	INP_WUNLOCK(inp);
 }
 
 static int
 udp_attach(struct socket *so, int proto, struct thread *td)
 {
 	struct inpcb *inp;
 	struct inpcbinfo *pcbinfo;
 	int error;
 
 	pcbinfo = udp_get_inpcbinfo(so->so_proto->pr_protocol);
 	inp = sotoinpcb(so);
 	KASSERT(inp == NULL, ("udp_attach: inp != NULL"));
 	error = soreserve(so, udp_sendspace, udp_recvspace);
 	if (error)
 		return (error);
 	INP_INFO_WLOCK(pcbinfo);
 	error = in_pcballoc(so, pcbinfo);
 	if (error) {
 		INP_INFO_WUNLOCK(pcbinfo);
 		return (error);
 	}
 
 	inp = sotoinpcb(so);
 	inp->inp_vflag |= INP_IPV4;
 	inp->inp_ip_ttl = V_ip_defttl;
 
 	error = udp_newudpcb(inp);
 	if (error) {
 		in_pcbdetach(inp);
 		in_pcbfree(inp);
 		INP_INFO_WUNLOCK(pcbinfo);
 		return (error);
 	}
 
 	INP_WUNLOCK(inp);
 	INP_INFO_WUNLOCK(pcbinfo);
 	return (0);
 }
 #endif /* INET */
 
 int
 udp_set_kernel_tunneling(struct socket *so, udp_tun_func_t f, udp_tun_icmp_t i, void *ctx)
 {
 	struct inpcb *inp;
 	struct udpcb *up;
 
 	KASSERT(so->so_type == SOCK_DGRAM,
 	    ("udp_set_kernel_tunneling: !dgram"));
 	inp = sotoinpcb(so);
 	KASSERT(inp != NULL, ("udp_set_kernel_tunneling: inp == NULL"));
 	INP_WLOCK(inp);
 	up = intoudpcb(inp);
 	if ((up->u_tun_func != NULL) ||
 	    (up->u_icmp_func != NULL)) {
 		INP_WUNLOCK(inp);
 		return (EBUSY);
 	}
 	up->u_tun_func = f;
 	up->u_icmp_func = i;
 	up->u_tun_ctx = ctx;
 	INP_WUNLOCK(inp);
 	return (0);
 }
 
 #ifdef INET
 static int
 udp_bind(struct socket *so, struct sockaddr *nam, struct thread *td)
 {
 	struct inpcb *inp;
 	struct inpcbinfo *pcbinfo;
 	int error;
 
 	pcbinfo = udp_get_inpcbinfo(so->so_proto->pr_protocol);
 	inp = sotoinpcb(so);
 	KASSERT(inp != NULL, ("udp_bind: inp == NULL"));
 	INP_WLOCK(inp);
 	INP_HASH_WLOCK(pcbinfo);
 	error = in_pcbbind(inp, nam, td->td_ucred);
 	INP_HASH_WUNLOCK(pcbinfo);
 	INP_WUNLOCK(inp);
 	return (error);
 }
 
 static void
 udp_close(struct socket *so)
 {
 	struct inpcb *inp;
 	struct inpcbinfo *pcbinfo;
 
 	pcbinfo = udp_get_inpcbinfo(so->so_proto->pr_protocol);
 	inp = sotoinpcb(so);
 	KASSERT(inp != NULL, ("udp_close: inp == NULL"));
 	INP_WLOCK(inp);
 	if (inp->inp_faddr.s_addr != INADDR_ANY) {
 		INP_HASH_WLOCK(pcbinfo);
 		in_pcbdisconnect(inp);
 		inp->inp_laddr.s_addr = INADDR_ANY;
 		INP_HASH_WUNLOCK(pcbinfo);
 		soisdisconnected(so);
 	}
 	INP_WUNLOCK(inp);
 }
 
 static int
 udp_connect(struct socket *so, struct sockaddr *nam, struct thread *td)
 {
 	struct inpcb *inp;
 	struct inpcbinfo *pcbinfo;
 	struct sockaddr_in *sin;
 	int error;
 
 	pcbinfo = udp_get_inpcbinfo(so->so_proto->pr_protocol);
 	inp = sotoinpcb(so);
 	KASSERT(inp != NULL, ("udp_connect: inp == NULL"));
 	INP_WLOCK(inp);
 	if (inp->inp_faddr.s_addr != INADDR_ANY) {
 		INP_WUNLOCK(inp);
 		return (EISCONN);
 	}
 	sin = (struct sockaddr_in *)nam;
 	error = prison_remote_ip4(td->td_ucred, &sin->sin_addr);
 	if (error != 0) {
 		INP_WUNLOCK(inp);
 		return (error);
 	}
 	INP_HASH_WLOCK(pcbinfo);
 	error = in_pcbconnect(inp, nam, td->td_ucred);
 	INP_HASH_WUNLOCK(pcbinfo);
 	if (error == 0)
 		soisconnected(so);
 	INP_WUNLOCK(inp);
 	return (error);
 }
 
 static void
 udp_detach(struct socket *so)
 {
 	struct inpcb *inp;
 	struct inpcbinfo *pcbinfo;
 	struct udpcb *up;
 
 	pcbinfo = udp_get_inpcbinfo(so->so_proto->pr_protocol);
 	inp = sotoinpcb(so);
 	KASSERT(inp != NULL, ("udp_detach: inp == NULL"));
 	KASSERT(inp->inp_faddr.s_addr == INADDR_ANY,
 	    ("udp_detach: not disconnected"));
 	INP_INFO_WLOCK(pcbinfo);
 	INP_WLOCK(inp);
 	up = intoudpcb(inp);
 	KASSERT(up != NULL, ("%s: up == NULL", __func__));
 	inp->inp_ppcb = NULL;
 	in_pcbdetach(inp);
 	in_pcbfree(inp);
 	INP_INFO_WUNLOCK(pcbinfo);
 	udp_discardcb(up);
 }
 
 static int
 udp_disconnect(struct socket *so)
 {
 	struct inpcb *inp;
 	struct inpcbinfo *pcbinfo;
 
 	pcbinfo = udp_get_inpcbinfo(so->so_proto->pr_protocol);
 	inp = sotoinpcb(so);
 	KASSERT(inp != NULL, ("udp_disconnect: inp == NULL"));
 	INP_WLOCK(inp);
 	if (inp->inp_faddr.s_addr == INADDR_ANY) {
 		INP_WUNLOCK(inp);
 		return (ENOTCONN);
 	}
 	INP_HASH_WLOCK(pcbinfo);
 	in_pcbdisconnect(inp);
 	inp->inp_laddr.s_addr = INADDR_ANY;
 	INP_HASH_WUNLOCK(pcbinfo);
 	SOCK_LOCK(so);
 	so->so_state &= ~SS_ISCONNECTED;		/* XXX */
 	SOCK_UNLOCK(so);
 	INP_WUNLOCK(inp);
 	return (0);
 }
 
 static int
 udp_send(struct socket *so, int flags, struct mbuf *m, struct sockaddr *addr,
     struct mbuf *control, struct thread *td)
 {
 	struct inpcb *inp;
 
 	inp = sotoinpcb(so);
 	KASSERT(inp != NULL, ("udp_send: inp == NULL"));
 	return (udp_output(inp, m, addr, control, td));
 }
 #endif /* INET */
 
 int
 udp_shutdown(struct socket *so)
 {
 	struct inpcb *inp;
 
 	inp = sotoinpcb(so);
 	KASSERT(inp != NULL, ("udp_shutdown: inp == NULL"));
 	INP_WLOCK(inp);
 	socantsendmore(so);
 	INP_WUNLOCK(inp);
 	return (0);
 }
 
 #ifdef INET
 struct pr_usrreqs udp_usrreqs = {
 	.pru_abort =		udp_abort,
 	.pru_attach =		udp_attach,
 	.pru_bind =		udp_bind,
 	.pru_connect =		udp_connect,
 	.pru_control =		in_control,
 	.pru_detach =		udp_detach,
 	.pru_disconnect =	udp_disconnect,
 	.pru_peeraddr =		in_getpeeraddr,
 	.pru_send =		udp_send,
 	.pru_soreceive =	soreceive_dgram,
 	.pru_sosend =		sosend_dgram,
 	.pru_shutdown =		udp_shutdown,
 	.pru_sockaddr =		in_getsockaddr,
 	.pru_sosetlabel =	in_pcbsosetlabel,
 	.pru_close =		udp_close,
 };
 #endif /* INET */
Index: head/sys/netinet6/icmp6.c
===================================================================
--- head/sys/netinet6/icmp6.c	(revision 305823)
+++ head/sys/netinet6/icmp6.c	(revision 305824)
@@ -1,2870 +1,2870 @@
 /*-
  * Copyright (C) 1995, 1996, 1997, and 1998 WIDE Project.
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 3. Neither the name of the project nor the names of its contributors
  *    may be used to endorse or promote products derived from this software
  *    without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE PROJECT AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE PROJECT OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  *	$KAME: icmp6.c,v 1.211 2001/04/04 05:56:20 itojun Exp $
  */
 
 /*-
  * Copyright (c) 1982, 1986, 1988, 1993
  *	The Regents of the University of California.  All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 4. Neither the name of the University nor the names of its contributors
  *    may be used to endorse or promote products derived from this software
  *    without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  *	@(#)ip_icmp.c	8.2 (Berkeley) 1/4/94
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #define	MBUF_PRIVATE	/* XXXRW: Optimisation tries to avoid M_EXT mbufs */
 
 #include "opt_inet.h"
 #include "opt_inet6.h"
 
 #include <sys/param.h>
 #include <sys/domain.h>
 #include <sys/jail.h>
 #include <sys/kernel.h>
 #include <sys/lock.h>
 #include <sys/malloc.h>
 #include <sys/mbuf.h>
 #include <sys/proc.h>
 #include <sys/protosw.h>
 #include <sys/signalvar.h>
 #include <sys/socket.h>
 #include <sys/socketvar.h>
 #include <sys/sx.h>
 #include <sys/syslog.h>
 #include <sys/systm.h>
 #include <sys/time.h>
 
 #include <net/if.h>
 #include <net/if_var.h>
 #include <net/if_dl.h>
 #include <net/if_llatbl.h>
 #include <net/if_types.h>
 #include <net/route.h>
 #include <net/vnet.h>
 
 #include <netinet/in.h>
 #include <netinet/in_pcb.h>
 #include <netinet/in_var.h>
 #include <netinet/ip6.h>
 #include <netinet/icmp6.h>
 #include <netinet/tcp_var.h>
 
 #include <netinet6/in6_fib.h>
 #include <netinet6/in6_ifattach.h>
 #include <netinet6/in6_pcb.h>
 #include <netinet6/ip6protosw.h>
 #include <netinet6/ip6_var.h>
 #include <netinet6/scope6_var.h>
 #include <netinet6/mld6_var.h>
 #include <netinet6/nd6.h>
 #include <netinet6/send.h>
 
 extern struct domain inet6domain;
 
 VNET_PCPUSTAT_DEFINE(struct icmp6stat, icmp6stat);
 VNET_PCPUSTAT_SYSINIT(icmp6stat);
 
 #ifdef VIMAGE
 VNET_PCPUSTAT_SYSUNINIT(icmp6stat);
 #endif /* VIMAGE */
 
 VNET_DECLARE(struct inpcbinfo, ripcbinfo);
 VNET_DECLARE(struct inpcbhead, ripcb);
 VNET_DECLARE(int, icmp6errppslim);
 static VNET_DEFINE(int, icmp6errpps_count) = 0;
 static VNET_DEFINE(struct timeval, icmp6errppslim_last);
 VNET_DECLARE(int, icmp6_nodeinfo);
 
 #define	V_ripcbinfo			VNET(ripcbinfo)
 #define	V_ripcb				VNET(ripcb)
 #define	V_icmp6errppslim		VNET(icmp6errppslim)
 #define	V_icmp6errpps_count		VNET(icmp6errpps_count)
 #define	V_icmp6errppslim_last		VNET(icmp6errppslim_last)
 #define	V_icmp6_nodeinfo		VNET(icmp6_nodeinfo)
 
 static void icmp6_errcount(int, int);
 static int icmp6_rip6_input(struct mbuf **, int);
 static int icmp6_ratelimit(const struct in6_addr *, const int, const int);
 static const char *icmp6_redirect_diag(struct in6_addr *,
 	struct in6_addr *, struct in6_addr *);
 static struct mbuf *ni6_input(struct mbuf *, int);
 static struct mbuf *ni6_nametodns(const char *, int, int);
 static int ni6_dnsmatch(const char *, int, const char *, int);
 static int ni6_addrs(struct icmp6_nodeinfo *, struct mbuf *,
 			  struct ifnet **, struct in6_addr *);
 static int ni6_store_addrs(struct icmp6_nodeinfo *, struct icmp6_nodeinfo *,
 				struct ifnet *, int);
 static int icmp6_notify_error(struct mbuf **, int, int, int);
 
 /*
  * Kernel module interface for updating icmp6stat.  The argument is an index
  * into icmp6stat treated as an array of u_quad_t.  While this encodes the
  * general layout of icmp6stat into the caller, it doesn't encode its
  * location, so that future changes to add, for example, per-CPU stats
  * support won't cause binary compatibility problems for kernel modules.
  */
 void
 kmod_icmp6stat_inc(int statnum)
 {
 
 	counter_u64_add(VNET(icmp6stat)[statnum], 1);
 }
 
 static void
 icmp6_errcount(int type, int code)
 {
 	switch (type) {
 	case ICMP6_DST_UNREACH:
 		switch (code) {
 		case ICMP6_DST_UNREACH_NOROUTE:
 			ICMP6STAT_INC(icp6s_odst_unreach_noroute);
 			return;
 		case ICMP6_DST_UNREACH_ADMIN:
 			ICMP6STAT_INC(icp6s_odst_unreach_admin);
 			return;
 		case ICMP6_DST_UNREACH_BEYONDSCOPE:
 			ICMP6STAT_INC(icp6s_odst_unreach_beyondscope);
 			return;
 		case ICMP6_DST_UNREACH_ADDR:
 			ICMP6STAT_INC(icp6s_odst_unreach_addr);
 			return;
 		case ICMP6_DST_UNREACH_NOPORT:
 			ICMP6STAT_INC(icp6s_odst_unreach_noport);
 			return;
 		}
 		break;
 	case ICMP6_PACKET_TOO_BIG:
 		ICMP6STAT_INC(icp6s_opacket_too_big);
 		return;
 	case ICMP6_TIME_EXCEEDED:
 		switch (code) {
 		case ICMP6_TIME_EXCEED_TRANSIT:
 			ICMP6STAT_INC(icp6s_otime_exceed_transit);
 			return;
 		case ICMP6_TIME_EXCEED_REASSEMBLY:
 			ICMP6STAT_INC(icp6s_otime_exceed_reassembly);
 			return;
 		}
 		break;
 	case ICMP6_PARAM_PROB:
 		switch (code) {
 		case ICMP6_PARAMPROB_HEADER:
 			ICMP6STAT_INC(icp6s_oparamprob_header);
 			return;
 		case ICMP6_PARAMPROB_NEXTHEADER:
 			ICMP6STAT_INC(icp6s_oparamprob_nextheader);
 			return;
 		case ICMP6_PARAMPROB_OPTION:
 			ICMP6STAT_INC(icp6s_oparamprob_option);
 			return;
 		}
 		break;
 	case ND_REDIRECT:
 		ICMP6STAT_INC(icp6s_oredirect);
 		return;
 	}
 	ICMP6STAT_INC(icp6s_ounknown);
 }
 
 /*
  * A wrapper function for icmp6_error() necessary when the erroneous packet
  * may not contain enough scope zone information.
  */
 void
 icmp6_error2(struct mbuf *m, int type, int code, int param,
     struct ifnet *ifp)
 {
 	struct ip6_hdr *ip6;
 
 	if (ifp == NULL)
 		return;
 
 #ifndef PULLDOWN_TEST
 	IP6_EXTHDR_CHECK(m, 0, sizeof(struct ip6_hdr), );
 #else
 	if (m->m_len < sizeof(struct ip6_hdr)) {
 		m = m_pullup(m, sizeof(struct ip6_hdr));
 		if (m == NULL)
 			return;
 	}
 #endif
 
 	ip6 = mtod(m, struct ip6_hdr *);
 
 	if (in6_setscope(&ip6->ip6_src, ifp, NULL) != 0)
 		return;
 	if (in6_setscope(&ip6->ip6_dst, ifp, NULL) != 0)
 		return;
 
 	icmp6_error(m, type, code, param);
 }
 
 /*
  * Generate an error packet of type error in response to bad IP6 packet.
  */
 void
 icmp6_error(struct mbuf *m, int type, int code, int param)
 {
 	struct ip6_hdr *oip6, *nip6;
 	struct icmp6_hdr *icmp6;
 	u_int preplen;
 	int off;
 	int nxt;
 
 	ICMP6STAT_INC(icp6s_error);
 
 	/* count per-type-code statistics */
 	icmp6_errcount(type, code);
 
 #ifdef M_DECRYPTED	/*not openbsd*/
 	if (m->m_flags & M_DECRYPTED) {
 		ICMP6STAT_INC(icp6s_canterror);
 		goto freeit;
 	}
 #endif
 
 #ifndef PULLDOWN_TEST
 	IP6_EXTHDR_CHECK(m, 0, sizeof(struct ip6_hdr), );
 #else
 	if (m->m_len < sizeof(struct ip6_hdr)) {
 		m = m_pullup(m, sizeof(struct ip6_hdr));
 		if (m == NULL)
 			return;
 	}
 #endif
 	oip6 = mtod(m, struct ip6_hdr *);
 
 	/*
 	 * If the destination address of the erroneous packet is a multicast
 	 * address, or the packet was sent using link-layer multicast,
 	 * we should basically suppress sending an error (RFC 2463, Section
 	 * 2.4).
 	 * We have two exceptions (the item e.2 in that section):
 	 * - the Packet Too Big message can be sent for path MTU discovery.
 	 * - the Parameter Problem Message that can be allowed an icmp6 error
 	 *   in the option type field.  This check has been done in
 	 *   ip6_unknown_opt(), so we can just check the type and code.
 	 */
 	if ((m->m_flags & (M_BCAST|M_MCAST) ||
 	     IN6_IS_ADDR_MULTICAST(&oip6->ip6_dst)) &&
 	    (type != ICMP6_PACKET_TOO_BIG &&
 	     (type != ICMP6_PARAM_PROB ||
 	      code != ICMP6_PARAMPROB_OPTION)))
 		goto freeit;
 
 	/*
 	 * RFC 2463, 2.4 (e.5): source address check.
 	 * XXX: the case of anycast source?
 	 */
 	if (IN6_IS_ADDR_UNSPECIFIED(&oip6->ip6_src) ||
 	    IN6_IS_ADDR_MULTICAST(&oip6->ip6_src))
 		goto freeit;
 
 	/*
 	 * If we are about to send ICMPv6 against ICMPv6 error/redirect,
 	 * don't do it.
 	 */
 	nxt = -1;
 	off = ip6_lasthdr(m, 0, IPPROTO_IPV6, &nxt);
 	if (off >= 0 && nxt == IPPROTO_ICMPV6) {
 		struct icmp6_hdr *icp;
 
 #ifndef PULLDOWN_TEST
 		IP6_EXTHDR_CHECK(m, 0, off + sizeof(struct icmp6_hdr), );
 		icp = (struct icmp6_hdr *)(mtod(m, caddr_t) + off);
 #else
 		IP6_EXTHDR_GET(icp, struct icmp6_hdr *, m, off,
 			sizeof(*icp));
 		if (icp == NULL) {
 			ICMP6STAT_INC(icp6s_tooshort);
 			return;
 		}
 #endif
 		if (icp->icmp6_type < ICMP6_ECHO_REQUEST ||
 		    icp->icmp6_type == ND_REDIRECT) {
 			/*
 			 * ICMPv6 error
 			 * Special case: for redirect (which is
 			 * informational) we must not send icmp6 error.
 			 */
 			ICMP6STAT_INC(icp6s_canterror);
 			goto freeit;
 		} else {
 			/* ICMPv6 informational - send the error */
 		}
 	} else {
 		/* non-ICMPv6 - send the error */
 	}
 
 	oip6 = mtod(m, struct ip6_hdr *); /* adjust pointer */
 
 	/* Finally, do rate limitation check. */
 	if (icmp6_ratelimit(&oip6->ip6_src, type, code)) {
 		ICMP6STAT_INC(icp6s_toofreq);
 		goto freeit;
 	}
 
 	/*
 	 * OK, ICMP6 can be generated.
 	 */
 
 	if (m->m_pkthdr.len >= ICMPV6_PLD_MAXLEN)
 		m_adj(m, ICMPV6_PLD_MAXLEN - m->m_pkthdr.len);
 
 	preplen = sizeof(struct ip6_hdr) + sizeof(struct icmp6_hdr);
 	M_PREPEND(m, preplen, M_NOWAIT);	/* FIB is also copied over. */
 	if (m == NULL) {
 		nd6log((LOG_DEBUG, "ENOBUFS in icmp6_error %d\n", __LINE__));
 		return;
 	}
 
 	nip6 = mtod(m, struct ip6_hdr *);
 	nip6->ip6_src  = oip6->ip6_src;
 	nip6->ip6_dst  = oip6->ip6_dst;
 
 	in6_clearscope(&oip6->ip6_src);
 	in6_clearscope(&oip6->ip6_dst);
 
 	icmp6 = (struct icmp6_hdr *)(nip6 + 1);
 	icmp6->icmp6_type = type;
 	icmp6->icmp6_code = code;
 	icmp6->icmp6_pptr = htonl((u_int32_t)param);
 
 	/*
 	 * icmp6_reflect() is designed to be in the input path.
 	 * icmp6_error() can be called from both input and output path,
 	 * and if we are in output path rcvif could contain bogus value.
 	 * clear m->m_pkthdr.rcvif for safety, we should have enough scope
 	 * information in ip header (nip6).
 	 */
 	m->m_pkthdr.rcvif = NULL;
 
 	ICMP6STAT_INC(icp6s_outhist[type]);
 	icmp6_reflect(m, sizeof(struct ip6_hdr)); /* header order: IPv6 - ICMPv6 */
 
 	return;
 
   freeit:
 	/*
 	 * If we can't tell whether or not we can generate ICMP6, free it.
 	 */
 	m_freem(m);
 }
 
 /*
  * Process a received ICMP6 message.
  */
 int
 icmp6_input(struct mbuf **mp, int *offp, int proto)
 {
 	struct mbuf *m = *mp, *n;
 	struct ifnet *ifp;
 	struct ip6_hdr *ip6, *nip6;
 	struct icmp6_hdr *icmp6, *nicmp6;
 	int off = *offp;
 	int icmp6len = m->m_pkthdr.len - *offp;
 	int code, sum, noff;
 	char ip6bufs[INET6_ADDRSTRLEN], ip6bufd[INET6_ADDRSTRLEN];
 	int ip6len, error;
 
 	ifp = m->m_pkthdr.rcvif;
 
 #ifndef PULLDOWN_TEST
 	IP6_EXTHDR_CHECK(m, off, sizeof(struct icmp6_hdr), IPPROTO_DONE);
 	/* m might change if M_LOOP.  So, call mtod after this */
 #endif
 
 	/*
 	 * Locate icmp6 structure in mbuf, and check
 	 * that not corrupted and of at least minimum length
 	 */
 
 	ip6 = mtod(m, struct ip6_hdr *);
 	ip6len = sizeof(struct ip6_hdr) + ntohs(ip6->ip6_plen);
 	if (icmp6len < sizeof(struct icmp6_hdr)) {
 		ICMP6STAT_INC(icp6s_tooshort);
 		goto freeit;
 	}
 
 	/*
 	 * Check multicast group membership.
 	 * Note: SSM filters are not applied for ICMPv6 traffic.
 	 */
 	if (IN6_IS_ADDR_MULTICAST(&ip6->ip6_dst)) {
 		struct in6_multi	*inm;
 
 		inm = in6m_lookup(ifp, &ip6->ip6_dst);
 		if (inm == NULL) {
 			IP6STAT_INC(ip6s_notmember);
 			in6_ifstat_inc(m->m_pkthdr.rcvif, ifs6_in_discard);
 			goto freeit;
 		}
 	}
 
 	/*
 	 * calculate the checksum
 	 */
 #ifndef PULLDOWN_TEST
 	icmp6 = (struct icmp6_hdr *)((caddr_t)ip6 + off);
 #else
 	IP6_EXTHDR_GET(icmp6, struct icmp6_hdr *, m, off, sizeof(*icmp6));
 	if (icmp6 == NULL) {
 		ICMP6STAT_INC(icp6s_tooshort);
 		return IPPROTO_DONE;
 	}
 #endif
 	code = icmp6->icmp6_code;
 
 	if ((sum = in6_cksum(m, IPPROTO_ICMPV6, off, icmp6len)) != 0) {
 		nd6log((LOG_ERR,
 		    "ICMP6 checksum error(%d|%x) %s\n",
 		    icmp6->icmp6_type, sum,
 		    ip6_sprintf(ip6bufs, &ip6->ip6_src)));
 		ICMP6STAT_INC(icp6s_checksum);
 		goto freeit;
 	}
 
 	ICMP6STAT_INC(icp6s_inhist[icmp6->icmp6_type]);
 	icmp6_ifstat_inc(ifp, ifs6_in_msg);
 	if (icmp6->icmp6_type < ICMP6_INFOMSG_MASK)
 		icmp6_ifstat_inc(ifp, ifs6_in_error);
 
 	switch (icmp6->icmp6_type) {
 	case ICMP6_DST_UNREACH:
 		icmp6_ifstat_inc(ifp, ifs6_in_dstunreach);
 		switch (code) {
 		case ICMP6_DST_UNREACH_NOROUTE:
 		case ICMP6_DST_UNREACH_ADDR:	/* PRC_HOSTDEAD is a DOS */
 			code = PRC_UNREACH_NET;
 			break;
 		case ICMP6_DST_UNREACH_ADMIN:
 			icmp6_ifstat_inc(ifp, ifs6_in_adminprohib);
 			code = PRC_UNREACH_PROTOCOL; /* is this a good code? */
 			break;
 		case ICMP6_DST_UNREACH_BEYONDSCOPE:
 			/* I mean "source address was incorrect." */
 			code = PRC_PARAMPROB;
 			break;
 		case ICMP6_DST_UNREACH_NOPORT:
 			code = PRC_UNREACH_PORT;
 			break;
 		default:
 			goto badcode;
 		}
 		goto deliver;
 		break;
 
 	case ICMP6_PACKET_TOO_BIG:
 		icmp6_ifstat_inc(ifp, ifs6_in_pkttoobig);
 
 		/* validation is made in icmp6_mtudisc_update */
 
 		code = PRC_MSGSIZE;
 
 		/*
 		 * Updating the path MTU will be done after examining
 		 * intermediate extension headers.
 		 */
 		goto deliver;
 		break;
 
 	case ICMP6_TIME_EXCEEDED:
 		icmp6_ifstat_inc(ifp, ifs6_in_timeexceed);
 		switch (code) {
 		case ICMP6_TIME_EXCEED_TRANSIT:
 			code = PRC_TIMXCEED_INTRANS;
 			break;
 		case ICMP6_TIME_EXCEED_REASSEMBLY:
 			code = PRC_TIMXCEED_REASS;
 			break;
 		default:
 			goto badcode;
 		}
 		goto deliver;
 		break;
 
 	case ICMP6_PARAM_PROB:
 		icmp6_ifstat_inc(ifp, ifs6_in_paramprob);
 		switch (code) {
 		case ICMP6_PARAMPROB_NEXTHEADER:
 			code = PRC_UNREACH_PROTOCOL;
 			break;
 		case ICMP6_PARAMPROB_HEADER:
 		case ICMP6_PARAMPROB_OPTION:
 			code = PRC_PARAMPROB;
 			break;
 		default:
 			goto badcode;
 		}
 		goto deliver;
 		break;
 
 	case ICMP6_ECHO_REQUEST:
 		icmp6_ifstat_inc(ifp, ifs6_in_echo);
 		if (code != 0)
 			goto badcode;
-		if ((n = m_copy(m, 0, M_COPYALL)) == NULL) {
+		if ((n = m_copym(m, 0, M_COPYALL, M_NOWAIT)) == NULL) {
 			/* Give up remote */
 			break;
 		}
 		if (!M_WRITABLE(n)
 		 || n->m_len < off + sizeof(struct icmp6_hdr)) {
 			struct mbuf *n0 = n;
 			int n0len;
 
 			CTASSERT(sizeof(*nip6) + sizeof(*nicmp6) <= MHLEN);
 			n = m_gethdr(M_NOWAIT, n0->m_type);
 			if (n == NULL) {
 				/* Give up remote */
 				m_freem(n0);
 				break;
 			}
 
 			m_move_pkthdr(n, n0);	/* FIB copied. */
 			n0len = n0->m_pkthdr.len;	/* save for use below */
 			/*
 			 * Copy IPv6 and ICMPv6 only.
 			 */
 			nip6 = mtod(n, struct ip6_hdr *);
 			bcopy(ip6, nip6, sizeof(struct ip6_hdr));
 			nicmp6 = (struct icmp6_hdr *)(nip6 + 1);
 			bcopy(icmp6, nicmp6, sizeof(struct icmp6_hdr));
 			noff = sizeof(struct ip6_hdr);
 			/* new mbuf contains only ipv6+icmpv6 headers */
 			n->m_len = noff + sizeof(struct icmp6_hdr);
 			/*
 			 * Adjust mbuf.  ip6_plen will be adjusted in
 			 * ip6_output().
 			 */
 			m_adj(n0, off + sizeof(struct icmp6_hdr));
 			/* recalculate complete packet size */
 			n->m_pkthdr.len = n0len + (noff - off);
 			n->m_next = n0;
 		} else {
 			nip6 = mtod(n, struct ip6_hdr *);
 			IP6_EXTHDR_GET(nicmp6, struct icmp6_hdr *, n, off,
 			    sizeof(*nicmp6));
 			noff = off;
 		}
 		nicmp6->icmp6_type = ICMP6_ECHO_REPLY;
 		nicmp6->icmp6_code = 0;
 		if (n) {
 			ICMP6STAT_INC(icp6s_reflect);
 			ICMP6STAT_INC(icp6s_outhist[ICMP6_ECHO_REPLY]);
 			icmp6_reflect(n, noff);
 		}
 		break;
 
 	case ICMP6_ECHO_REPLY:
 		icmp6_ifstat_inc(ifp, ifs6_in_echoreply);
 		if (code != 0)
 			goto badcode;
 		break;
 
 	case MLD_LISTENER_QUERY:
 	case MLD_LISTENER_REPORT:
 	case MLD_LISTENER_DONE:
 	case MLDV2_LISTENER_REPORT:
 		/*
 		 * Drop MLD traffic which is not link-local, has a hop limit
 		 * of greater than 1 hop, or which does not have the
 		 * IPv6 HBH Router Alert option.
 		 * As IPv6 HBH options are stripped in ip6_input() we must
 		 * check an mbuf header flag.
 		 * XXX Should we also sanity check that these messages
 		 * were directed to a link-local multicast prefix?
 		 */
 		if ((ip6->ip6_hlim != 1) || (m->m_flags & M_RTALERT_MLD) == 0)
 			goto freeit;
 		if (mld_input(m, off, icmp6len) != 0)
 			return (IPPROTO_DONE);
 		/* m stays. */
 		break;
 
 	case ICMP6_WRUREQUEST:	/* ICMP6_FQDN_QUERY */
 	    {
 		enum { WRU, FQDN } mode;
 
 		if (!V_icmp6_nodeinfo)
 			break;
 
 		if (icmp6len == sizeof(struct icmp6_hdr) + 4)
 			mode = WRU;
 		else if (icmp6len >= sizeof(struct icmp6_nodeinfo))
 			mode = FQDN;
 		else
 			goto badlen;
 
 		if (mode == FQDN) {
 #ifndef PULLDOWN_TEST
 			IP6_EXTHDR_CHECK(m, off, sizeof(struct icmp6_nodeinfo),
 			    IPPROTO_DONE);
 #endif
-			n = m_copy(m, 0, M_COPYALL);
+			n = m_copym(m, 0, M_COPYALL, M_NOWAIT);
 			if (n)
 				n = ni6_input(n, off);
 			/* XXX meaningless if n == NULL */
 			noff = sizeof(struct ip6_hdr);
 		} else {
 			struct prison *pr;
 			u_char *p;
 			int maxhlen, hlen;
 
 			/*
 			 * XXX: this combination of flags is pointless,
 			 * but should we keep this for compatibility?
 			 */
 			if ((V_icmp6_nodeinfo & (ICMP6_NODEINFO_FQDNOK |
 			    ICMP6_NODEINFO_TMPADDROK)) !=
 			    (ICMP6_NODEINFO_FQDNOK | ICMP6_NODEINFO_TMPADDROK))
 				break;
 
 			if (code != 0)
 				goto badcode;
 
 			CTASSERT(sizeof(*nip6) + sizeof(*nicmp6) + 4 <= MHLEN);
 			n = m_gethdr(M_NOWAIT, m->m_type);
 			if (n == NULL) {
 				/* Give up remote */
 				break;
 			}
 			if (!m_dup_pkthdr(n, m, M_NOWAIT)) {
 				/*
 				 * Previous code did a blind M_COPY_PKTHDR
 				 * and said "just for rcvif".  If true, then
 				 * we could tolerate the dup failing (due to
 				 * the deep copy of the tag chain).  For now
 				 * be conservative and just fail.
 				 */
 				m_free(n);
 				n = NULL;
 			}
 			maxhlen = M_TRAILINGSPACE(n) -
 			    (sizeof(*nip6) + sizeof(*nicmp6) + 4);
 			pr = curthread->td_ucred->cr_prison;
 			mtx_lock(&pr->pr_mtx);
 			hlen = strlen(pr->pr_hostname);
 			if (maxhlen > hlen)
 				maxhlen = hlen;
 			/*
 			 * Copy IPv6 and ICMPv6 only.
 			 */
 			nip6 = mtod(n, struct ip6_hdr *);
 			bcopy(ip6, nip6, sizeof(struct ip6_hdr));
 			nicmp6 = (struct icmp6_hdr *)(nip6 + 1);
 			bcopy(icmp6, nicmp6, sizeof(struct icmp6_hdr));
 			p = (u_char *)(nicmp6 + 1);
 			bzero(p, 4);
 			/* meaningless TTL */
 			bcopy(pr->pr_hostname, p + 4, maxhlen);
 			mtx_unlock(&pr->pr_mtx);
 			noff = sizeof(struct ip6_hdr);
 			n->m_pkthdr.len = n->m_len = sizeof(struct ip6_hdr) +
 				sizeof(struct icmp6_hdr) + 4 + maxhlen;
 			nicmp6->icmp6_type = ICMP6_WRUREPLY;
 			nicmp6->icmp6_code = 0;
 		}
 		if (n) {
 			ICMP6STAT_INC(icp6s_reflect);
 			ICMP6STAT_INC(icp6s_outhist[ICMP6_WRUREPLY]);
 			icmp6_reflect(n, noff);
 		}
 		break;
 	    }
 
 	case ICMP6_WRUREPLY:
 		if (code != 0)
 			goto badcode;
 		break;
 
 	case ND_ROUTER_SOLICIT:
 		icmp6_ifstat_inc(ifp, ifs6_in_routersolicit);
 		if (code != 0)
 			goto badcode;
 		if (icmp6len < sizeof(struct nd_router_solicit))
 			goto badlen;
 		if ((n = m_copym(m, 0, M_COPYALL, M_NOWAIT)) == NULL) {
 			/* give up local */
 
 			/* Send incoming SeND packet to user space. */
 			if (send_sendso_input_hook != NULL) {
 				IP6_EXTHDR_CHECK(m, off,
 				    icmp6len, IPPROTO_DONE);
 				error = send_sendso_input_hook(m, ifp,
 				    SND_IN, ip6len);
 				/* -1 == no app on SEND socket */
 				if (error == 0)
 					return (IPPROTO_DONE);
 				nd6_rs_input(m, off, icmp6len);
 			} else
 				nd6_rs_input(m, off, icmp6len);
 			m = NULL;
 			goto freeit;
 		}
 		if (send_sendso_input_hook != NULL) {
 			IP6_EXTHDR_CHECK(n, off,
 			    icmp6len, IPPROTO_DONE);
                         error = send_sendso_input_hook(n, ifp,
 			    SND_IN, ip6len);
 			if (error == 0)
 				goto freeit;
 			/* -1 == no app on SEND socket */
 			nd6_rs_input(n, off, icmp6len);
 		} else
 			nd6_rs_input(n, off, icmp6len);
 		/* m stays. */
 		break;
 
 	case ND_ROUTER_ADVERT:
 		icmp6_ifstat_inc(ifp, ifs6_in_routeradvert);
 		if (code != 0)
 			goto badcode;
 		if (icmp6len < sizeof(struct nd_router_advert))
 			goto badlen;
 		if ((n = m_copym(m, 0, M_COPYALL, M_NOWAIT)) == NULL) {
 
 			/* Send incoming SeND-protected/ND packet to user space. */
 			if (send_sendso_input_hook != NULL) {
 				error = send_sendso_input_hook(m, ifp,
 				    SND_IN, ip6len);
 				if (error == 0)
 					return (IPPROTO_DONE);
 				nd6_ra_input(m, off, icmp6len);
 			} else
 				nd6_ra_input(m, off, icmp6len);
 			m = NULL;
 			goto freeit;
 		}
 		if (send_sendso_input_hook != NULL) {
 			error = send_sendso_input_hook(n, ifp,
 			    SND_IN, ip6len);
 			if (error == 0)
 				goto freeit;
 			nd6_ra_input(n, off, icmp6len);
 		} else
 			nd6_ra_input(n, off, icmp6len);
 		/* m stays. */
 		break;
 
 	case ND_NEIGHBOR_SOLICIT:
 		icmp6_ifstat_inc(ifp, ifs6_in_neighborsolicit);
 		if (code != 0)
 			goto badcode;
 		if (icmp6len < sizeof(struct nd_neighbor_solicit))
 			goto badlen;
 		if ((n = m_copym(m, 0, M_COPYALL, M_NOWAIT)) == NULL) {
 			if (send_sendso_input_hook != NULL) {
 				error = send_sendso_input_hook(m, ifp,
 				    SND_IN, ip6len);
 				if (error == 0)
 					return (IPPROTO_DONE);
 				nd6_ns_input(m, off, icmp6len);
 			} else
 				nd6_ns_input(m, off, icmp6len);
 			m = NULL;
 			goto freeit;
 		}
 		if (send_sendso_input_hook != NULL) {
 			error = send_sendso_input_hook(n, ifp,
 			    SND_IN, ip6len);
 			if (error == 0)
 				goto freeit;
 			nd6_ns_input(n, off, icmp6len);
 		} else
 			nd6_ns_input(n, off, icmp6len);
 		/* m stays. */
 		break;
 
 	case ND_NEIGHBOR_ADVERT:
 		icmp6_ifstat_inc(ifp, ifs6_in_neighboradvert);
 		if (code != 0)
 			goto badcode;
 		if (icmp6len < sizeof(struct nd_neighbor_advert))
 			goto badlen;
 		if ((n = m_copym(m, 0, M_COPYALL, M_NOWAIT)) == NULL) {
 
 			/* Send incoming SeND-protected/ND packet to user space. */
 			if (send_sendso_input_hook != NULL) {
 				error = send_sendso_input_hook(m, ifp,
 				    SND_IN, ip6len);
 				if (error == 0)
 					return (IPPROTO_DONE);
 				nd6_na_input(m, off, icmp6len);
 			} else
 				nd6_na_input(m, off, icmp6len);
 			m = NULL;
 			goto freeit;
 		}
 		if (send_sendso_input_hook != NULL) {
 			error = send_sendso_input_hook(n, ifp,
 			    SND_IN, ip6len);
 			if (error == 0)
 				goto freeit;
 			nd6_na_input(n, off, icmp6len);
 		} else
 			nd6_na_input(n, off, icmp6len);
 		/* m stays. */
 		break;
 
 	case ND_REDIRECT:
 		icmp6_ifstat_inc(ifp, ifs6_in_redirect);
 		if (code != 0)
 			goto badcode;
 		if (icmp6len < sizeof(struct nd_redirect))
 			goto badlen;
 		if ((n = m_copym(m, 0, M_COPYALL, M_NOWAIT)) == NULL) {
 			if (send_sendso_input_hook != NULL) {
 				error = send_sendso_input_hook(m, ifp,
 				    SND_IN, ip6len);
 		 		if (error == 0)
 					return (IPPROTO_DONE);
 			    icmp6_redirect_input(m, off);
 			} else
 				icmp6_redirect_input(m, off);
 			m = NULL;
 			goto freeit;
 		}
 		if (send_sendso_input_hook != NULL) {
 			error = send_sendso_input_hook(n, ifp,
 			    SND_IN, ip6len);
 			if (error == 0)
 				goto freeit;
 			icmp6_redirect_input(n, off);
 		} else
 			icmp6_redirect_input(n, off);
 		/* m stays. */
 		break;
 
 	case ICMP6_ROUTER_RENUMBERING:
 		if (code != ICMP6_ROUTER_RENUMBERING_COMMAND &&
 		    code != ICMP6_ROUTER_RENUMBERING_RESULT)
 			goto badcode;
 		if (icmp6len < sizeof(struct icmp6_router_renum))
 			goto badlen;
 		break;
 
 	default:
 		nd6log((LOG_DEBUG,
 		    "icmp6_input: unknown type %d(src=%s, dst=%s, ifid=%d)\n",
 		    icmp6->icmp6_type, ip6_sprintf(ip6bufs, &ip6->ip6_src),
 		    ip6_sprintf(ip6bufd, &ip6->ip6_dst),
 		    ifp ? ifp->if_index : 0));
 		if (icmp6->icmp6_type < ICMP6_ECHO_REQUEST) {
 			/* ICMPv6 error: MUST deliver it by spec... */
 			code = PRC_NCMDS;
 			/* deliver */
 		} else {
 			/* ICMPv6 informational: MUST not deliver */
 			break;
 		}
 	deliver:
 		if (icmp6_notify_error(&m, off, icmp6len, code) != 0) {
 			/* In this case, m should've been freed. */
 			return (IPPROTO_DONE);
 		}
 		break;
 
 	badcode:
 		ICMP6STAT_INC(icp6s_badcode);
 		break;
 
 	badlen:
 		ICMP6STAT_INC(icp6s_badlen);
 		break;
 	}
 
 	/* deliver the packet to appropriate sockets */
 	icmp6_rip6_input(&m, *offp);
 
 	return IPPROTO_DONE;
 
  freeit:
 	m_freem(m);
 	return IPPROTO_DONE;
 }
 
 static int
 icmp6_notify_error(struct mbuf **mp, int off, int icmp6len, int code)
 {
 	struct mbuf *m = *mp;
 	struct icmp6_hdr *icmp6;
 	struct ip6_hdr *eip6;
 	u_int32_t notifymtu;
 	struct sockaddr_in6 icmp6src, icmp6dst;
 
 	if (icmp6len < sizeof(struct icmp6_hdr) + sizeof(struct ip6_hdr)) {
 		ICMP6STAT_INC(icp6s_tooshort);
 		goto freeit;
 	}
 #ifndef PULLDOWN_TEST
 	IP6_EXTHDR_CHECK(m, off,
 	    sizeof(struct icmp6_hdr) + sizeof(struct ip6_hdr), -1);
 	icmp6 = (struct icmp6_hdr *)(mtod(m, caddr_t) + off);
 #else
 	IP6_EXTHDR_GET(icmp6, struct icmp6_hdr *, m, off,
 	    sizeof(*icmp6) + sizeof(struct ip6_hdr));
 	if (icmp6 == NULL) {
 		ICMP6STAT_INC(icp6s_tooshort);
 		return (-1);
 	}
 #endif
 	eip6 = (struct ip6_hdr *)(icmp6 + 1);
 
 	/* Detect the upper level protocol */
 	{
 		void (*ctlfunc)(int, struct sockaddr *, void *);
 		u_int8_t nxt = eip6->ip6_nxt;
 		int eoff = off + sizeof(struct icmp6_hdr) +
 		    sizeof(struct ip6_hdr);
 		struct ip6ctlparam ip6cp;
 		struct in6_addr *finaldst = NULL;
 		int icmp6type = icmp6->icmp6_type;
 		struct ip6_frag *fh;
 		struct ip6_rthdr *rth;
 		struct ip6_rthdr0 *rth0;
 		int rthlen;
 
 		while (1) { /* XXX: should avoid infinite loop explicitly? */
 			struct ip6_ext *eh;
 
 			switch (nxt) {
 			case IPPROTO_HOPOPTS:
 			case IPPROTO_DSTOPTS:
 			case IPPROTO_AH:
 #ifndef PULLDOWN_TEST
 				IP6_EXTHDR_CHECK(m, 0,
 				    eoff + sizeof(struct ip6_ext), -1);
 				eh = (struct ip6_ext *)(mtod(m, caddr_t) + eoff);
 #else
 				IP6_EXTHDR_GET(eh, struct ip6_ext *, m,
 				    eoff, sizeof(*eh));
 				if (eh == NULL) {
 					ICMP6STAT_INC(icp6s_tooshort);
 					return (-1);
 				}
 #endif
 
 				if (nxt == IPPROTO_AH)
 					eoff += (eh->ip6e_len + 2) << 2;
 				else
 					eoff += (eh->ip6e_len + 1) << 3;
 				nxt = eh->ip6e_nxt;
 				break;
 			case IPPROTO_ROUTING:
 				/*
 				 * When the erroneous packet contains a
 				 * routing header, we should examine the
 				 * header to determine the final destination.
 				 * Otherwise, we can't properly update
 				 * information that depends on the final
 				 * destination (e.g. path MTU).
 				 */
 #ifndef PULLDOWN_TEST
 				IP6_EXTHDR_CHECK(m, 0, eoff + sizeof(*rth), -1);
 				rth = (struct ip6_rthdr *)
 				    (mtod(m, caddr_t) + eoff);
 #else
 				IP6_EXTHDR_GET(rth, struct ip6_rthdr *, m,
 				    eoff, sizeof(*rth));
 				if (rth == NULL) {
 					ICMP6STAT_INC(icp6s_tooshort);
 					return (-1);
 				}
 #endif
 				rthlen = (rth->ip6r_len + 1) << 3;
 				/*
 				 * XXX: currently there is no
 				 * officially defined type other
 				 * than type-0.
 				 * Note that if the segment left field
 				 * is 0, all intermediate hops must
 				 * have been passed.
 				 */
 				if (rth->ip6r_segleft &&
 				    rth->ip6r_type == IPV6_RTHDR_TYPE_0) {
 					int hops;
 
 #ifndef PULLDOWN_TEST
 					IP6_EXTHDR_CHECK(m, 0, eoff + rthlen, -1);
 					rth0 = (struct ip6_rthdr0 *)
 					    (mtod(m, caddr_t) + eoff);
 #else
 					IP6_EXTHDR_GET(rth0,
 					    struct ip6_rthdr0 *, m,
 					    eoff, rthlen);
 					if (rth0 == NULL) {
 						ICMP6STAT_INC(icp6s_tooshort);
 						return (-1);
 					}
 #endif
 					/* just ignore a bogus header */
 					if ((rth0->ip6r0_len % 2) == 0 &&
 					    (hops = rth0->ip6r0_len/2))
 						finaldst = (struct in6_addr *)(rth0 + 1) + (hops - 1);
 				}
 				eoff += rthlen;
 				nxt = rth->ip6r_nxt;
 				break;
 			case IPPROTO_FRAGMENT:
 #ifndef PULLDOWN_TEST
 				IP6_EXTHDR_CHECK(m, 0, eoff +
 				    sizeof(struct ip6_frag), -1);
 				fh = (struct ip6_frag *)(mtod(m, caddr_t) +
 				    eoff);
 #else
 				IP6_EXTHDR_GET(fh, struct ip6_frag *, m,
 				    eoff, sizeof(*fh));
 				if (fh == NULL) {
 					ICMP6STAT_INC(icp6s_tooshort);
 					return (-1);
 				}
 #endif
 				/*
 				 * Data after a fragment header is meaningless
 				 * unless it is the first fragment, but
 				 * we'll go to the notify label for path MTU
 				 * discovery.
 				 */
 				if (fh->ip6f_offlg & IP6F_OFF_MASK)
 					goto notify;
 
 				eoff += sizeof(struct ip6_frag);
 				nxt = fh->ip6f_nxt;
 				break;
 			default:
 				/*
 				 * This case includes ESP and the No Next
 				 * Header.  In such cases going to the notify
 				 * label does not have any meaning
 				 * (i.e. ctlfunc will be NULL), but we go
 				 * anyway since we might have to update
 				 * path MTU information.
 				 */
 				goto notify;
 			}
 		}
 	  notify:
 #ifndef PULLDOWN_TEST
 		icmp6 = (struct icmp6_hdr *)(mtod(m, caddr_t) + off);
 #else
 		IP6_EXTHDR_GET(icmp6, struct icmp6_hdr *, m, off,
 		    sizeof(*icmp6) + sizeof(struct ip6_hdr));
 		if (icmp6 == NULL) {
 			ICMP6STAT_INC(icp6s_tooshort);
 			return (-1);
 		}
 #endif
 
 		/*
 		 * retrieve parameters from the inner IPv6 header, and convert
 		 * them into sockaddr structures.
 		 * XXX: there is no guarantee that the source or destination
 		 * addresses of the inner packet are in the same scope as
 		 * the addresses of the icmp packet.  But there is no other
 		 * way to determine the zone.
 		 */
 		eip6 = (struct ip6_hdr *)(icmp6 + 1);
 
 		bzero(&icmp6dst, sizeof(icmp6dst));
 		icmp6dst.sin6_len = sizeof(struct sockaddr_in6);
 		icmp6dst.sin6_family = AF_INET6;
 		if (finaldst == NULL)
 			icmp6dst.sin6_addr = eip6->ip6_dst;
 		else
 			icmp6dst.sin6_addr = *finaldst;
 		if (in6_setscope(&icmp6dst.sin6_addr, m->m_pkthdr.rcvif, NULL))
 			goto freeit;
 		bzero(&icmp6src, sizeof(icmp6src));
 		icmp6src.sin6_len = sizeof(struct sockaddr_in6);
 		icmp6src.sin6_family = AF_INET6;
 		icmp6src.sin6_addr = eip6->ip6_src;
 		if (in6_setscope(&icmp6src.sin6_addr, m->m_pkthdr.rcvif, NULL))
 			goto freeit;
 		icmp6src.sin6_flowinfo =
 		    (eip6->ip6_flow & IPV6_FLOWLABEL_MASK);
 
 		if (finaldst == NULL)
 			finaldst = &eip6->ip6_dst;
 		ip6cp.ip6c_m = m;
 		ip6cp.ip6c_icmp6 = icmp6;
 		ip6cp.ip6c_ip6 = (struct ip6_hdr *)(icmp6 + 1);
 		ip6cp.ip6c_off = eoff;
 		ip6cp.ip6c_finaldst = finaldst;
 		ip6cp.ip6c_src = &icmp6src;
 		ip6cp.ip6c_nxt = nxt;
 
 		if (icmp6type == ICMP6_PACKET_TOO_BIG) {
 			notifymtu = ntohl(icmp6->icmp6_mtu);
 			ip6cp.ip6c_cmdarg = (void *)&notifymtu;
 			icmp6_mtudisc_update(&ip6cp, 1);	/*XXX*/
 		}
 
 		ctlfunc = (void (*)(int, struct sockaddr *, void *))
 		    (inet6sw[ip6_protox[nxt]].pr_ctlinput);
 		if (ctlfunc) {
 			(void) (*ctlfunc)(code, (struct sockaddr *)&icmp6dst,
 			    &ip6cp);
 		}
 	}
 	*mp = m;
 	return (0);
 
   freeit:
 	m_freem(m);
 	return (-1);
 }
 
 void
 icmp6_mtudisc_update(struct ip6ctlparam *ip6cp, int validated)
 {
 	struct in6_addr *dst = ip6cp->ip6c_finaldst;
 	struct icmp6_hdr *icmp6 = ip6cp->ip6c_icmp6;
 	struct mbuf *m = ip6cp->ip6c_m;	/* will be necessary for scope issue */
 	u_int mtu = ntohl(icmp6->icmp6_mtu);
 	struct in_conninfo inc;
 
 #if 0
 	/*
 	 * RFC2460 section 5, last paragraph.
 	 * even though minimum link MTU for IPv6 is IPV6_MMTU,
 	 * we may see ICMPv6 too big with mtu < IPV6_MMTU
 	 * due to packet translator in the middle.
 	 * see ip6_output() and ip6_getpmtu() "alwaysfrag" case for
 	 * special handling.
 	 */
 	if (mtu < IPV6_MMTU)
 		return;
 #endif
 
 	/*
 	 * we reject ICMPv6 too big with abnormally small value.
 	 * XXX what is the good definition of "abnormally small"?
 	 */
 	if (mtu < sizeof(struct ip6_hdr) + sizeof(struct ip6_frag) + 8)
 		return;
 
 	if (!validated)
 		return;
 
 	/*
 	 * In case the suggested mtu is less than IPV6_MMTU, we
 	 * only need to remember that it was for above mentioned
 	 * "alwaysfrag" case.
 	 * Try to be as close to the spec as possible.
 	 */
 	if (mtu < IPV6_MMTU)
 		mtu = IPV6_MMTU - 8;
 
 	bzero(&inc, sizeof(inc));
 	inc.inc_fibnum = M_GETFIB(m);
 	inc.inc_flags |= INC_ISIPV6;
 	inc.inc6_faddr = *dst;
 	if (in6_setscope(&inc.inc6_faddr, m->m_pkthdr.rcvif, NULL))
 		return;
 
 	if (mtu < tcp_maxmtu6(&inc, NULL)) {
 		tcp_hc_updatemtu(&inc, mtu);
 		ICMP6STAT_INC(icp6s_pmtuchg);
 	}
 }
 
 /*
  * Process a Node Information Query packet, based on
  * draft-ietf-ipngwg-icmp-name-lookups-07.
  *
  * Spec incompatibilities:
  * - IPv6 Subject address handling
  * - IPv4 Subject address handling support missing
  * - Proxy reply (answer even if it's not for me)
  * - joins NI group address at in6_ifattach() time only, does not cope
  *   with hostname changes by sethostname(3)
  */
 static struct mbuf *
 ni6_input(struct mbuf *m, int off)
 {
 	struct icmp6_nodeinfo *ni6, *nni6;
 	struct mbuf *n = NULL;
 	struct prison *pr;
 	u_int16_t qtype;
 	int subjlen;
 	int replylen = sizeof(struct ip6_hdr) + sizeof(struct icmp6_nodeinfo);
 	struct ni_reply_fqdn *fqdn;
 	int addrs;		/* for NI_QTYPE_NODEADDR */
 	struct ifnet *ifp = NULL; /* for NI_QTYPE_NODEADDR */
 	struct in6_addr in6_subj; /* subject address */
 	struct ip6_hdr *ip6;
 	int oldfqdn = 0;	/* if 1, return pascal string (03 draft) */
 	char *subj = NULL;
 	struct in6_ifaddr *ia6 = NULL;
 
 	ip6 = mtod(m, struct ip6_hdr *);
 #ifndef PULLDOWN_TEST
 	ni6 = (struct icmp6_nodeinfo *)(mtod(m, caddr_t) + off);
 #else
 	IP6_EXTHDR_GET(ni6, struct icmp6_nodeinfo *, m, off, sizeof(*ni6));
 	if (ni6 == NULL) {
 		/* m is already reclaimed */
 		return (NULL);
 	}
 #endif
 
 	/*
 	 * Validate IPv6 source address.
 	 * The default configuration MUST be to refuse answering queries from
 	 * global-scope addresses according to RFC4602.
 	 * Notes:
 	 *  - it's not very clear what "refuse" means; this implementation
 	 *    simply drops it.
 	 *  - it's not very easy to identify global-scope (unicast) addresses
 	 *    since there are many prefixes for them.  It should be safer
 	 *    and in practice sufficient to check "all" but loopback and
 	 *    link-local (note that site-local unicast was deprecated and
 	 *    ULA is defined as global scope-wise)
 	 */
 	if ((V_icmp6_nodeinfo & ICMP6_NODEINFO_GLOBALOK) == 0 &&
 	    !IN6_IS_ADDR_LOOPBACK(&ip6->ip6_src) &&
 	    !IN6_IS_ADDR_LINKLOCAL(&ip6->ip6_src))
 		goto bad;
 
 	/*
 	 * Validate IPv6 destination address.
 	 *
 	 * The Responder must discard the Query without further processing
 	 * unless it is one of the Responder's unicast or anycast addresses, or
 	 * a link-local scope multicast address which the Responder has joined.
 	 * [RFC4602, Section 5.]
 	 */
 	if (IN6_IS_ADDR_MULTICAST(&ip6->ip6_dst)) {
 		if (!IN6_IS_ADDR_MC_LINKLOCAL(&ip6->ip6_dst))
 			goto bad;
 		/* else it's a link-local multicast, fine */
 	} else {		/* unicast or anycast */
 		ia6 = in6ifa_ifwithaddr(&ip6->ip6_dst, 0 /* XXX */);
 		if (ia6 == NULL)
 			goto bad; /* XXX impossible */
 
 		if ((ia6->ia6_flags & IN6_IFF_TEMPORARY) &&
 		    !(V_icmp6_nodeinfo & ICMP6_NODEINFO_TMPADDROK)) {
 			ifa_free(&ia6->ia_ifa);
 			nd6log((LOG_DEBUG, "ni6_input: ignore node info to "
 				"a temporary address in %s:%d",
 			       __FILE__, __LINE__));
 			goto bad;
 		}
 		ifa_free(&ia6->ia_ifa);
 	}
 
 	/* validate query Subject field. */
 	qtype = ntohs(ni6->ni_qtype);
 	subjlen = m->m_pkthdr.len - off - sizeof(struct icmp6_nodeinfo);
 	switch (qtype) {
 	case NI_QTYPE_NOOP:
 	case NI_QTYPE_SUPTYPES:
 		/* 07 draft */
 		if (ni6->ni_code == ICMP6_NI_SUBJ_FQDN && subjlen == 0)
 			break;
 		/* FALLTHROUGH */
 	case NI_QTYPE_FQDN:
 	case NI_QTYPE_NODEADDR:
 	case NI_QTYPE_IPV4ADDR:
 		switch (ni6->ni_code) {
 		case ICMP6_NI_SUBJ_IPV6:
 #if ICMP6_NI_SUBJ_IPV6 != 0
 		case 0:
 #endif
 			/*
 			 * backward compatibility - try to accept 03 draft
 			 * format, where no Subject is present.
 			 */
 			if (qtype == NI_QTYPE_FQDN && ni6->ni_code == 0 &&
 			    subjlen == 0) {
 				oldfqdn++;
 				break;
 			}
 #if ICMP6_NI_SUBJ_IPV6 != 0
 			if (ni6->ni_code != ICMP6_NI_SUBJ_IPV6)
 				goto bad;
 #endif
 
 			if (subjlen != sizeof(struct in6_addr))
 				goto bad;
 
 			/*
 			 * Validate Subject address.
 			 *
 			 * Not sure what exactly "address belongs to the node"
 			 * means in the spec, is it just unicast, or what?
 			 *
 			 * At this moment we consider Subject address as
 			 * "belong to the node" if the Subject address equals
 			 * to the IPv6 destination address; validation for
 			 * IPv6 destination address should have done enough
 			 * check for us.
 			 *
 			 * We do not do proxy at this moment.
 			 */
 			/* m_pulldown instead of copy? */
 			m_copydata(m, off + sizeof(struct icmp6_nodeinfo),
 			    subjlen, (caddr_t)&in6_subj);
 			if (in6_setscope(&in6_subj, m->m_pkthdr.rcvif, NULL))
 				goto bad;
 
 			subj = (char *)&in6_subj;
 			if (IN6_ARE_ADDR_EQUAL(&ip6->ip6_dst, &in6_subj))
 				break;
 
 			/*
 			 * XXX if we are to allow other cases, we should really
 			 * be careful about scope here.
 			 * basically, we should disallow queries toward IPv6
 			 * destination X with subject Y,
 			 * if scope(X) > scope(Y).
 			 * if we allow scope(X) > scope(Y), it will result in
 			 * information leakage across scope boundary.
 			 */
 			goto bad;
 
 		case ICMP6_NI_SUBJ_FQDN:
 			/*
 			 * Validate Subject name with gethostname(3).
 			 *
 			 * The behavior may need some debate, since:
 			 * - we are not sure if the node has FQDN as
 			 *   hostname (returned by gethostname(3)).
 			 * - the code does wildcard match for truncated names.
 			 *   however, we are not sure if we want to perform
 			 *   wildcard match, if gethostname(3) side has
 			 *   truncated hostname.
 			 */
 			pr = curthread->td_ucred->cr_prison;
 			mtx_lock(&pr->pr_mtx);
 			n = ni6_nametodns(pr->pr_hostname,
 			    strlen(pr->pr_hostname), 0);
 			mtx_unlock(&pr->pr_mtx);
 			if (!n || n->m_next || n->m_len == 0)
 				goto bad;
 			IP6_EXTHDR_GET(subj, char *, m,
 			    off + sizeof(struct icmp6_nodeinfo), subjlen);
 			if (subj == NULL)
 				goto bad;
 			if (!ni6_dnsmatch(subj, subjlen, mtod(n, const char *),
 			    n->m_len)) {
 				goto bad;
 			}
 			m_freem(n);
 			n = NULL;
 			break;
 
 		case ICMP6_NI_SUBJ_IPV4:	/* XXX: to be implemented? */
 		default:
 			goto bad;
 		}
 		break;
 	}
 
 	/* refuse based on configuration.  XXX ICMP6_NI_REFUSED? */
 	switch (qtype) {
 	case NI_QTYPE_FQDN:
 		if ((V_icmp6_nodeinfo & ICMP6_NODEINFO_FQDNOK) == 0)
 			goto bad;
 		break;
 	case NI_QTYPE_NODEADDR:
 	case NI_QTYPE_IPV4ADDR:
 		if ((V_icmp6_nodeinfo & ICMP6_NODEINFO_NODEADDROK) == 0)
 			goto bad;
 		break;
 	}
 
 	/* guess reply length */
 	switch (qtype) {
 	case NI_QTYPE_NOOP:
 		break;		/* no reply data */
 	case NI_QTYPE_SUPTYPES:
 		replylen += sizeof(u_int32_t);
 		break;
 	case NI_QTYPE_FQDN:
 		/* XXX will append an mbuf */
 		replylen += offsetof(struct ni_reply_fqdn, ni_fqdn_namelen);
 		break;
 	case NI_QTYPE_NODEADDR:
 		addrs = ni6_addrs(ni6, m, &ifp, (struct in6_addr *)subj);
 		if ((replylen += addrs * (sizeof(struct in6_addr) +
 		    sizeof(u_int32_t))) > MCLBYTES)
 			replylen = MCLBYTES; /* XXX: will truncate pkt later */
 		break;
 	case NI_QTYPE_IPV4ADDR:
 		/* unsupported - should respond with unknown Qtype? */
 		break;
 	default:
 		/*
 		 * XXX: We must return a reply with the ICMP6 code
 		 * `unknown Qtype' in this case.  However we regard the case
 		 * as an FQDN query for backward compatibility.
 		 * Older versions set a random value to this field,
 		 * so it rarely varies in the defined qtypes.
 		 * But the mechanism is not reliable...
 		 * maybe we should obsolete older versions.
 		 */
 		qtype = NI_QTYPE_FQDN;
 		/* XXX will append an mbuf */
 		replylen += offsetof(struct ni_reply_fqdn, ni_fqdn_namelen);
 		oldfqdn++;
 		break;
 	}
 
 	/* Allocate an mbuf to reply. */
 	if (replylen > MCLBYTES) {
 		/*
 		 * XXX: should we try to allocate more? But MCLBYTES
 		 * is probably much larger than IPV6_MMTU...
 		 */
 		goto bad;
 	}
 	if (replylen > MHLEN)
 		n = m_getcl(M_NOWAIT, m->m_type, M_PKTHDR);
 	else
 		n = m_gethdr(M_NOWAIT, m->m_type);
 	if (n == NULL) {
 		m_freem(m);
 		return (NULL);
 	}
 	m_move_pkthdr(n, m); /* just for recvif and FIB */
 	n->m_pkthdr.len = n->m_len = replylen;
 
 	/* copy mbuf header and IPv6 + Node Information base headers */
 	bcopy(mtod(m, caddr_t), mtod(n, caddr_t), sizeof(struct ip6_hdr));
 	nni6 = (struct icmp6_nodeinfo *)(mtod(n, struct ip6_hdr *) + 1);
 	bcopy((caddr_t)ni6, (caddr_t)nni6, sizeof(struct icmp6_nodeinfo));
 
 	/* qtype dependent procedure */
 	switch (qtype) {
 	case NI_QTYPE_NOOP:
 		nni6->ni_code = ICMP6_NI_SUCCESS;
 		nni6->ni_flags = 0;
 		break;
 	case NI_QTYPE_SUPTYPES:
 	{
 		u_int32_t v;
 		nni6->ni_code = ICMP6_NI_SUCCESS;
 		nni6->ni_flags = htons(0x0000);	/* raw bitmap */
 		/* supports NOOP, SUPTYPES, FQDN, and NODEADDR */
 		v = (u_int32_t)htonl(0x0000000f);
 		bcopy(&v, nni6 + 1, sizeof(u_int32_t));
 		break;
 	}
 	case NI_QTYPE_FQDN:
 		nni6->ni_code = ICMP6_NI_SUCCESS;
 		fqdn = (struct ni_reply_fqdn *)(mtod(n, caddr_t) +
 		    sizeof(struct ip6_hdr) + sizeof(struct icmp6_nodeinfo));
 		nni6->ni_flags = 0; /* XXX: meaningless TTL */
 		fqdn->ni_fqdn_ttl = 0;	/* ditto. */
 		/*
 		 * XXX do we really have FQDN in hostname?
 		 */
 		pr = curthread->td_ucred->cr_prison;
 		mtx_lock(&pr->pr_mtx);
 		n->m_next = ni6_nametodns(pr->pr_hostname,
 		    strlen(pr->pr_hostname), oldfqdn);
 		mtx_unlock(&pr->pr_mtx);
 		if (n->m_next == NULL)
 			goto bad;
 		/* XXX we assume that n->m_next is not a chain */
 		if (n->m_next->m_next != NULL)
 			goto bad;
 		n->m_pkthdr.len += n->m_next->m_len;
 		break;
 	case NI_QTYPE_NODEADDR:
 	{
 		int lenlim, copied;
 
 		nni6->ni_code = ICMP6_NI_SUCCESS;
 		n->m_pkthdr.len = n->m_len =
 		    sizeof(struct ip6_hdr) + sizeof(struct icmp6_nodeinfo);
 		lenlim = M_TRAILINGSPACE(n);
 		copied = ni6_store_addrs(ni6, nni6, ifp, lenlim);
 		/* XXX: reset mbuf length */
 		n->m_pkthdr.len = n->m_len = sizeof(struct ip6_hdr) +
 		    sizeof(struct icmp6_nodeinfo) + copied;
 		break;
 	}
 	default:
 		break;		/* XXX impossible! */
 	}
 
 	nni6->ni_type = ICMP6_NI_REPLY;
 	m_freem(m);
 	return (n);
 
   bad:
 	m_freem(m);
 	if (n)
 		m_freem(n);
 	return (NULL);
 }
 
 /*
  * make a mbuf with DNS-encoded string.  no compression support.
  *
  * XXX names with less than 2 dots (like "foo" or "foo.section") will be
  * treated as truncated name (two \0 at the end).  this is a wild guess.
  *
  * old - return pascal string if non-zero
  */
 static struct mbuf *
 ni6_nametodns(const char *name, int namelen, int old)
 {
 	struct mbuf *m;
 	char *cp, *ep;
 	const char *p, *q;
 	int i, len, nterm;
 
 	if (old)
 		len = namelen + 1;
 	else
 		len = MCLBYTES;
 
 	/* Because MAXHOSTNAMELEN is usually 256, we use cluster mbuf. */
 	if (len > MLEN)
 		m = m_getcl(M_NOWAIT, MT_DATA, 0);
 	else
 		m = m_get(M_NOWAIT, MT_DATA);
 	if (m == NULL)
 		goto fail;
 
 	if (old) {
 		m->m_len = len;
 		*mtod(m, char *) = namelen;
 		bcopy(name, mtod(m, char *) + 1, namelen);
 		return m;
 	} else {
 		m->m_len = 0;
 		cp = mtod(m, char *);
 		ep = mtod(m, char *) + M_TRAILINGSPACE(m);
 
 		/* if not certain about my name, return empty buffer */
 		if (namelen == 0)
 			return m;
 
 		/*
 		 * guess if it looks like shortened hostname, or FQDN.
 		 * shortened hostname needs two trailing "\0".
 		 */
 		i = 0;
 		for (p = name; p < name + namelen; p++) {
 			if (*p && *p == '.')
 				i++;
 		}
 		if (i < 2)
 			nterm = 2;
 		else
 			nterm = 1;
 
 		p = name;
 		while (cp < ep && p < name + namelen) {
 			i = 0;
 			for (q = p; q < name + namelen && *q && *q != '.'; q++)
 				i++;
 			/* result does not fit into mbuf */
 			if (cp + i + 1 >= ep)
 				goto fail;
 			/*
 			 * DNS label length restriction, RFC1035 page 8.
 			 * "i == 0" case is included here to avoid returning
 			 * 0-length label on "foo..bar".
 			 */
 			if (i <= 0 || i >= 64)
 				goto fail;
 			*cp++ = i;
 			bcopy(p, cp, i);
 			cp += i;
 			p = q;
 			if (p < name + namelen && *p == '.')
 				p++;
 		}
 		/* termination */
 		if (cp + nterm >= ep)
 			goto fail;
 		while (nterm-- > 0)
 			*cp++ = '\0';
 		m->m_len = cp - mtod(m, char *);
 		return m;
 	}
 
 	panic("should not reach here");
 	/* NOTREACHED */
 
  fail:
 	if (m)
 		m_freem(m);
 	return NULL;
 }
 
 /*
  * check if two DNS-encoded string matches.  takes care of truncated
  * form (with \0\0 at the end).  no compression support.
  * XXX upper/lowercase match (see RFC2065)
  */
 static int
 ni6_dnsmatch(const char *a, int alen, const char *b, int blen)
 {
 	const char *a0, *b0;
 	int l;
 
 	/* simplest case - need validation? */
 	if (alen == blen && bcmp(a, b, alen) == 0)
 		return 1;
 
 	a0 = a;
 	b0 = b;
 
 	/* termination is mandatory */
 	if (alen < 2 || blen < 2)
 		return 0;
 	if (a0[alen - 1] != '\0' || b0[blen - 1] != '\0')
 		return 0;
 	alen--;
 	blen--;
 
 	while (a - a0 < alen && b - b0 < blen) {
 		if (a - a0 + 1 > alen || b - b0 + 1 > blen)
 			return 0;
 
 		if ((signed char)a[0] < 0 || (signed char)b[0] < 0)
 			return 0;
 		/* we don't support compression yet */
 		if (a[0] >= 64 || b[0] >= 64)
 			return 0;
 
 		/* truncated case */
 		if (a[0] == 0 && a - a0 == alen - 1)
 			return 1;
 		if (b[0] == 0 && b - b0 == blen - 1)
 			return 1;
 		if (a[0] == 0 || b[0] == 0)
 			return 0;
 
 		if (a[0] != b[0])
 			return 0;
 		l = a[0];
 		if (a - a0 + 1 + l > alen || b - b0 + 1 + l > blen)
 			return 0;
 		if (bcmp(a + 1, b + 1, l) != 0)
 			return 0;
 
 		a += 1 + l;
 		b += 1 + l;
 	}
 
 	if (a - a0 == alen && b - b0 == blen)
 		return 1;
 	else
 		return 0;
 }
 
 /*
  * calculate the number of addresses to be returned in the node info reply.
  */
 static int
 ni6_addrs(struct icmp6_nodeinfo *ni6, struct mbuf *m, struct ifnet **ifpp,
     struct in6_addr *subj)
 {
 	struct ifnet *ifp;
 	struct in6_ifaddr *ifa6;
 	struct ifaddr *ifa;
 	int addrs = 0, addrsofif, iffound = 0;
 	int niflags = ni6->ni_flags;
 
 	if ((niflags & NI_NODEADDR_FLAG_ALL) == 0) {
 		switch (ni6->ni_code) {
 		case ICMP6_NI_SUBJ_IPV6:
 			if (subj == NULL) /* must be impossible... */
 				return (0);
 			break;
 		default:
 			/*
 			 * XXX: we only support IPv6 subject address for
 			 * this Qtype.
 			 */
 			return (0);
 		}
 	}
 
 	IFNET_RLOCK_NOSLEEP();
 	TAILQ_FOREACH(ifp, &V_ifnet, if_link) {
 		addrsofif = 0;
 		IF_ADDR_RLOCK(ifp);
 		TAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) {
 			if (ifa->ifa_addr->sa_family != AF_INET6)
 				continue;
 			ifa6 = (struct in6_ifaddr *)ifa;
 
 			if ((niflags & NI_NODEADDR_FLAG_ALL) == 0 &&
 			    IN6_ARE_ADDR_EQUAL(subj, &ifa6->ia_addr.sin6_addr))
 				iffound = 1;
 
 			/*
 			 * IPv4-mapped addresses can only be returned by a
 			 * Node Information proxy, since they represent
 			 * addresses of IPv4-only nodes, which perforce do
 			 * not implement this protocol.
 			 * [icmp-name-lookups-07, Section 5.4]
 			 * So we don't support NI_NODEADDR_FLAG_COMPAT in
 			 * this function at this moment.
 			 */
 
 			/* What do we have to do about ::1? */
 			switch (in6_addrscope(&ifa6->ia_addr.sin6_addr)) {
 			case IPV6_ADDR_SCOPE_LINKLOCAL:
 				if ((niflags & NI_NODEADDR_FLAG_LINKLOCAL) == 0)
 					continue;
 				break;
 			case IPV6_ADDR_SCOPE_SITELOCAL:
 				if ((niflags & NI_NODEADDR_FLAG_SITELOCAL) == 0)
 					continue;
 				break;
 			case IPV6_ADDR_SCOPE_GLOBAL:
 				if ((niflags & NI_NODEADDR_FLAG_GLOBAL) == 0)
 					continue;
 				break;
 			default:
 				continue;
 			}
 
 			/*
 			 * check if anycast is okay.
 			 * XXX: just experimental.  not in the spec.
 			 */
 			if ((ifa6->ia6_flags & IN6_IFF_ANYCAST) != 0 &&
 			    (niflags & NI_NODEADDR_FLAG_ANYCAST) == 0)
 				continue; /* we need only unicast addresses */
 			if ((ifa6->ia6_flags & IN6_IFF_TEMPORARY) != 0 &&
 			    (V_icmp6_nodeinfo & ICMP6_NODEINFO_TMPADDROK) == 0) {
 				continue;
 			}
 			addrsofif++; /* count the address */
 		}
 		IF_ADDR_RUNLOCK(ifp);
 		if (iffound) {
 			*ifpp = ifp;
 			IFNET_RUNLOCK_NOSLEEP();
 			return (addrsofif);
 		}
 
 		addrs += addrsofif;
 	}
 	IFNET_RUNLOCK_NOSLEEP();
 
 	return (addrs);
 }
 
 static int
 ni6_store_addrs(struct icmp6_nodeinfo *ni6, struct icmp6_nodeinfo *nni6,
     struct ifnet *ifp0, int resid)
 {
 	struct ifnet *ifp;
 	struct in6_ifaddr *ifa6;
 	struct ifaddr *ifa;
 	struct ifnet *ifp_dep = NULL;
 	int copied = 0, allow_deprecated = 0;
 	u_char *cp = (u_char *)(nni6 + 1);
 	int niflags = ni6->ni_flags;
 	u_int32_t ltime;
 
 	if (ifp0 == NULL && !(niflags & NI_NODEADDR_FLAG_ALL))
 		return (0);	/* needless to copy */
 
 	IFNET_RLOCK_NOSLEEP();
 	ifp = ifp0 ? ifp0 : TAILQ_FIRST(&V_ifnet);
   again:
 
 	for (; ifp; ifp = TAILQ_NEXT(ifp, if_link)) {
 		IF_ADDR_RLOCK(ifp);
 		TAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) {
 			if (ifa->ifa_addr->sa_family != AF_INET6)
 				continue;
 			ifa6 = (struct in6_ifaddr *)ifa;
 
 			if ((ifa6->ia6_flags & IN6_IFF_DEPRECATED) != 0 &&
 			    allow_deprecated == 0) {
 				/*
 				 * prefererred address should be put before
 				 * deprecated addresses.
 				 */
 
 				/* record the interface for later search */
 				if (ifp_dep == NULL)
 					ifp_dep = ifp;
 
 				continue;
 			} else if ((ifa6->ia6_flags & IN6_IFF_DEPRECATED) == 0 &&
 			    allow_deprecated != 0)
 				continue; /* we now collect deprecated addrs */
 
 			/* What do we have to do about ::1? */
 			switch (in6_addrscope(&ifa6->ia_addr.sin6_addr)) {
 			case IPV6_ADDR_SCOPE_LINKLOCAL:
 				if ((niflags & NI_NODEADDR_FLAG_LINKLOCAL) == 0)
 					continue;
 				break;
 			case IPV6_ADDR_SCOPE_SITELOCAL:
 				if ((niflags & NI_NODEADDR_FLAG_SITELOCAL) == 0)
 					continue;
 				break;
 			case IPV6_ADDR_SCOPE_GLOBAL:
 				if ((niflags & NI_NODEADDR_FLAG_GLOBAL) == 0)
 					continue;
 				break;
 			default:
 				continue;
 			}
 
 			/*
 			 * check if anycast is okay.
 			 * XXX: just experimental.  not in the spec.
 			 */
 			if ((ifa6->ia6_flags & IN6_IFF_ANYCAST) != 0 &&
 			    (niflags & NI_NODEADDR_FLAG_ANYCAST) == 0)
 				continue;
 			if ((ifa6->ia6_flags & IN6_IFF_TEMPORARY) != 0 &&
 			    (V_icmp6_nodeinfo & ICMP6_NODEINFO_TMPADDROK) == 0) {
 				continue;
 			}
 
 			/* now we can copy the address */
 			if (resid < sizeof(struct in6_addr) +
 			    sizeof(u_int32_t)) {
 				IF_ADDR_RUNLOCK(ifp);
 				/*
 				 * We give up much more copy.
 				 * Set the truncate flag and return.
 				 */
 				nni6->ni_flags |= NI_NODEADDR_FLAG_TRUNCATE;
 				IFNET_RUNLOCK_NOSLEEP();
 				return (copied);
 			}
 
 			/*
 			 * Set the TTL of the address.
 			 * The TTL value should be one of the following
 			 * according to the specification:
 			 *
 			 * 1. The remaining lifetime of a DHCP lease on the
 			 *    address, or
 			 * 2. The remaining Valid Lifetime of a prefix from
 			 *    which the address was derived through Stateless
 			 *    Autoconfiguration.
 			 *
 			 * Note that we currently do not support stateful
 			 * address configuration by DHCPv6, so the former
 			 * case can't happen.
 			 */
 			if (ifa6->ia6_lifetime.ia6t_expire == 0)
 				ltime = ND6_INFINITE_LIFETIME;
 			else {
 				if (ifa6->ia6_lifetime.ia6t_expire >
 				    time_uptime)
 					ltime = htonl(ifa6->ia6_lifetime.ia6t_expire - time_uptime);
 				else
 					ltime = 0;
 			}
 
 			bcopy(&ltime, cp, sizeof(u_int32_t));
 			cp += sizeof(u_int32_t);
 
 			/* copy the address itself */
 			bcopy(&ifa6->ia_addr.sin6_addr, cp,
 			    sizeof(struct in6_addr));
 			in6_clearscope((struct in6_addr *)cp); /* XXX */
 			cp += sizeof(struct in6_addr);
 
 			resid -= (sizeof(struct in6_addr) + sizeof(u_int32_t));
 			copied += (sizeof(struct in6_addr) + sizeof(u_int32_t));
 		}
 		IF_ADDR_RUNLOCK(ifp);
 		if (ifp0)	/* we need search only on the specified IF */
 			break;
 	}
 
 	if (allow_deprecated == 0 && ifp_dep != NULL) {
 		ifp = ifp_dep;
 		allow_deprecated = 1;
 
 		goto again;
 	}
 
 	IFNET_RUNLOCK_NOSLEEP();
 
 	return (copied);
 }
 
 /*
  * XXX almost dup'ed code with rip6_input.
  */
 static int
 icmp6_rip6_input(struct mbuf **mp, int off)
 {
 	struct mbuf *m = *mp;
 	struct ip6_hdr *ip6 = mtod(m, struct ip6_hdr *);
 	struct inpcb *in6p;
 	struct inpcb *last = NULL;
 	struct sockaddr_in6 fromsa;
 	struct icmp6_hdr *icmp6;
 	struct mbuf *opts = NULL;
 
 #ifndef PULLDOWN_TEST
 	/* this is assumed to be safe. */
 	icmp6 = (struct icmp6_hdr *)((caddr_t)ip6 + off);
 #else
 	IP6_EXTHDR_GET(icmp6, struct icmp6_hdr *, m, off, sizeof(*icmp6));
 	if (icmp6 == NULL) {
 		/* m is already reclaimed */
 		return (IPPROTO_DONE);
 	}
 #endif
 
 	/*
 	 * XXX: the address may have embedded scope zone ID, which should be
 	 * hidden from applications.
 	 */
 	bzero(&fromsa, sizeof(fromsa));
 	fromsa.sin6_family = AF_INET6;
 	fromsa.sin6_len = sizeof(struct sockaddr_in6);
 	fromsa.sin6_addr = ip6->ip6_src;
 	if (sa6_recoverscope(&fromsa)) {
 		m_freem(m);
 		return (IPPROTO_DONE);
 	}
 
 	INP_INFO_RLOCK(&V_ripcbinfo);
 	LIST_FOREACH(in6p, &V_ripcb, inp_list) {
 		if ((in6p->inp_vflag & INP_IPV6) == 0)
 			continue;
 		if (in6p->inp_ip_p != IPPROTO_ICMPV6)
 			continue;
 		if (!IN6_IS_ADDR_UNSPECIFIED(&in6p->in6p_laddr) &&
 		   !IN6_ARE_ADDR_EQUAL(&in6p->in6p_laddr, &ip6->ip6_dst))
 			continue;
 		if (!IN6_IS_ADDR_UNSPECIFIED(&in6p->in6p_faddr) &&
 		   !IN6_ARE_ADDR_EQUAL(&in6p->in6p_faddr, &ip6->ip6_src))
 			continue;
 		INP_RLOCK(in6p);
 		if (ICMP6_FILTER_WILLBLOCK(icmp6->icmp6_type,
 		    in6p->in6p_icmp6filt)) {
 			INP_RUNLOCK(in6p);
 			continue;
 		}
 		if (last != NULL) {
 			struct	mbuf *n = NULL;
 
 			/*
 			 * Recent network drivers tend to allocate a single
 			 * mbuf cluster, rather than to make a couple of
 			 * mbufs without clusters.  Also, since the IPv6 code
 			 * path tries to avoid m_pullup(), it is highly
 			 * probable that we still have an mbuf cluster here
 			 * even though the necessary length can be stored in an
 			 * mbuf's internal buffer.
 			 * Meanwhile, the default size of the receive socket
 			 * buffer for raw sockets is not so large.  This means
 			 * the possibility of packet loss is relatively higher
 			 * than before.  To avoid this scenario, we copy the
 			 * received data to a separate mbuf that does not use
 			 * a cluster, if possible.
 			 * XXX: it is better to copy the data after stripping
 			 * intermediate headers.
 			 */
 			if ((m->m_flags & M_EXT) && m->m_next == NULL &&
 			    m->m_len <= MHLEN) {
 				n = m_get(M_NOWAIT, m->m_type);
 				if (n != NULL) {
 					if (m_dup_pkthdr(n, m, M_NOWAIT)) {
 						bcopy(m->m_data, n->m_data,
 						      m->m_len);
 						n->m_len = m->m_len;
 					} else {
 						m_free(n);
 						n = NULL;
 					}
 				}
 			}
 			if (n != NULL ||
-			    (n = m_copy(m, 0, (int)M_COPYALL)) != NULL) {
+			    (n = m_copym(m, 0, M_COPYALL, M_NOWAIT)) != NULL) {
 				if (last->inp_flags & INP_CONTROLOPTS)
 					ip6_savecontrol(last, n, &opts);
 				/* strip intermediate headers */
 				m_adj(n, off);
 				SOCKBUF_LOCK(&last->inp_socket->so_rcv);
 				if (sbappendaddr_locked(
 				    &last->inp_socket->so_rcv,
 				    (struct sockaddr *)&fromsa, n, opts)
 				    == 0) {
 					/* should notify about lost packet */
 					m_freem(n);
 					if (opts) {
 						m_freem(opts);
 					}
 					SOCKBUF_UNLOCK(
 					    &last->inp_socket->so_rcv);
 				} else
 					sorwakeup_locked(last->inp_socket);
 				opts = NULL;
 			}
 			INP_RUNLOCK(last);
 		}
 		last = in6p;
 	}
 	INP_INFO_RUNLOCK(&V_ripcbinfo);
 	if (last != NULL) {
 		if (last->inp_flags & INP_CONTROLOPTS)
 			ip6_savecontrol(last, m, &opts);
 		/* strip intermediate headers */
 		m_adj(m, off);
 
 		/* avoid using mbuf clusters if possible (see above) */
 		if ((m->m_flags & M_EXT) && m->m_next == NULL &&
 		    m->m_len <= MHLEN) {
 			struct mbuf *n;
 
 			n = m_get(M_NOWAIT, m->m_type);
 			if (n != NULL) {
 				if (m_dup_pkthdr(n, m, M_NOWAIT)) {
 					bcopy(m->m_data, n->m_data, m->m_len);
 					n->m_len = m->m_len;
 
 					m_freem(m);
 					m = n;
 				} else {
 					m_freem(n);
 					n = NULL;
 				}
 			}
 		}
 		SOCKBUF_LOCK(&last->inp_socket->so_rcv);
 		if (sbappendaddr_locked(&last->inp_socket->so_rcv,
 		    (struct sockaddr *)&fromsa, m, opts) == 0) {
 			m_freem(m);
 			if (opts)
 				m_freem(opts);
 			SOCKBUF_UNLOCK(&last->inp_socket->so_rcv);
 		} else
 			sorwakeup_locked(last->inp_socket);
 		INP_RUNLOCK(last);
 	} else {
 		m_freem(m);
 		IP6STAT_DEC(ip6s_delivered);
 	}
 	return IPPROTO_DONE;
 }
 
 /*
  * Reflect the ip6 packet back to the source.
  * OFF points to the icmp6 header, counted from the top of the mbuf.
  */
 void
 icmp6_reflect(struct mbuf *m, size_t off)
 {
 	struct in6_addr src6, *srcp;
 	struct ip6_hdr *ip6;
 	struct icmp6_hdr *icmp6;
 	struct in6_ifaddr *ia = NULL;
 	struct ifnet *outif = NULL;
 	int plen;
 	int type, code, hlim;
 
 	/* too short to reflect */
 	if (off < sizeof(struct ip6_hdr)) {
 		nd6log((LOG_DEBUG,
 		    "sanity fail: off=%lx, sizeof(ip6)=%lx in %s:%d\n",
 		    (u_long)off, (u_long)sizeof(struct ip6_hdr),
 		    __FILE__, __LINE__));
 		goto bad;
 	}
 
 	/*
 	 * If there are extra headers between IPv6 and ICMPv6, strip
 	 * off that header first.
 	 */
 #ifdef DIAGNOSTIC
 	if (sizeof(struct ip6_hdr) + sizeof(struct icmp6_hdr) > MHLEN)
 		panic("assumption failed in icmp6_reflect");
 #endif
 	if (off > sizeof(struct ip6_hdr)) {
 		size_t l;
 		struct ip6_hdr nip6;
 
 		l = off - sizeof(struct ip6_hdr);
 		m_copydata(m, 0, sizeof(nip6), (caddr_t)&nip6);
 		m_adj(m, l);
 		l = sizeof(struct ip6_hdr) + sizeof(struct icmp6_hdr);
 		if (m->m_len < l) {
 			if ((m = m_pullup(m, l)) == NULL)
 				return;
 		}
 		bcopy((caddr_t)&nip6, mtod(m, caddr_t), sizeof(nip6));
 	} else /* off == sizeof(struct ip6_hdr) */ {
 		size_t l;
 		l = sizeof(struct ip6_hdr) + sizeof(struct icmp6_hdr);
 		if (m->m_len < l) {
 			if ((m = m_pullup(m, l)) == NULL)
 				return;
 		}
 	}
 	plen = m->m_pkthdr.len - sizeof(struct ip6_hdr);
 	ip6 = mtod(m, struct ip6_hdr *);
 	ip6->ip6_nxt = IPPROTO_ICMPV6;
 	icmp6 = (struct icmp6_hdr *)(ip6 + 1);
 	type = icmp6->icmp6_type; /* keep type for statistics */
 	code = icmp6->icmp6_code; /* ditto. */
 	hlim = 0;
 	srcp = NULL;
 
 	/*
 	 * If the incoming packet was addressed directly to us (i.e. unicast),
 	 * use dst as the src for the reply.
 	 * The IN6_IFF_NOTREADY case should be VERY rare, but is possible
 	 * (for example) when we encounter an error while forwarding procedure
 	 * destined to a duplicated address of ours.
 	 */
 	if (!IN6_IS_ADDR_MULTICAST(&ip6->ip6_dst)) {
 		ia = in6ifa_ifwithaddr(&ip6->ip6_dst, 0 /* XXX */);
 		if (ia != NULL && !(ia->ia6_flags &
 		    (IN6_IFF_ANYCAST|IN6_IFF_NOTREADY))) {
 			src6 = ia->ia_addr.sin6_addr;
 			srcp = &src6;
 
 			if (m->m_pkthdr.rcvif != NULL) {
 				/* XXX: This may not be the outgoing interface */
 				hlim = ND_IFINFO(m->m_pkthdr.rcvif)->chlim;
 			} else
 				hlim = V_ip6_defhlim;
 		}
 		if (ia != NULL)
 			ifa_free(&ia->ia_ifa);
 	}
 
 	if (srcp == NULL) {
 		int error;
 		struct in6_addr dst6;
 		uint32_t scopeid;
 
 		/*
 		 * This case matches to multicasts, our anycast, or unicasts
 		 * that we do not own.  Select a source address based on the
 		 * source address of the erroneous packet.
 		 */
 		in6_splitscope(&ip6->ip6_dst, &dst6, &scopeid);
 		error = in6_selectsrc_addr(RT_DEFAULT_FIB, &dst6,
 		    scopeid, NULL, &src6, &hlim);
 
 		if (error) {
 			char ip6buf[INET6_ADDRSTRLEN];
 			nd6log((LOG_DEBUG,
 			    "icmp6_reflect: source can't be determined: "
 			    "dst=%s, error=%d\n",
 			    ip6_sprintf(ip6buf, &ip6->ip6_dst), error));
 			goto bad;
 		}
 		srcp = &src6;
 	}
 	/*
 	 * ip6_input() drops a packet if its src is multicast.
 	 * So, the src is never multicast.
 	 */
 	ip6->ip6_dst = ip6->ip6_src;
 	ip6->ip6_src = *srcp;
 	ip6->ip6_flow = 0;
 	ip6->ip6_vfc &= ~IPV6_VERSION_MASK;
 	ip6->ip6_vfc |= IPV6_VERSION;
 	ip6->ip6_nxt = IPPROTO_ICMPV6;
 	ip6->ip6_hlim = hlim;
 
 	icmp6->icmp6_cksum = 0;
 	icmp6->icmp6_cksum = in6_cksum(m, IPPROTO_ICMPV6,
 	    sizeof(struct ip6_hdr), plen);
 
 	/*
 	 * XXX option handling
 	 */
 
 	m->m_flags &= ~(M_BCAST|M_MCAST);
 
 	ip6_output(m, NULL, NULL, 0, NULL, &outif, NULL);
 	if (outif)
 		icmp6_ifoutstat_inc(outif, type, code);
 
 	return;
 
  bad:
 	m_freem(m);
 	return;
 }
 
 void
 icmp6_fasttimo(void)
 {
 
 	mld_fasttimo();
 }
 
 void
 icmp6_slowtimo(void)
 {
 
 	mld_slowtimo();
 }
 
 static const char *
 icmp6_redirect_diag(struct in6_addr *src6, struct in6_addr *dst6,
     struct in6_addr *tgt6)
 {
 	static char buf[1024];
 	char ip6bufs[INET6_ADDRSTRLEN];
 	char ip6bufd[INET6_ADDRSTRLEN];
 	char ip6buft[INET6_ADDRSTRLEN];
 	snprintf(buf, sizeof(buf), "(src=%s dst=%s tgt=%s)",
 	    ip6_sprintf(ip6bufs, src6), ip6_sprintf(ip6bufd, dst6),
 	    ip6_sprintf(ip6buft, tgt6));
 	return buf;
 }
 
 void
 icmp6_redirect_input(struct mbuf *m, int off)
 {
 	struct ifnet *ifp;
 	struct ip6_hdr *ip6 = mtod(m, struct ip6_hdr *);
 	struct nd_redirect *nd_rd;
 	int icmp6len = ntohs(ip6->ip6_plen);
 	char *lladdr = NULL;
 	int lladdrlen = 0;
 	int is_router;
 	int is_onlink;
 	struct in6_addr src6 = ip6->ip6_src;
 	struct in6_addr redtgt6;
 	struct in6_addr reddst6;
 	union nd_opts ndopts;
 	char ip6buf[INET6_ADDRSTRLEN];
 
 	M_ASSERTPKTHDR(m);
 	KASSERT(m->m_pkthdr.rcvif != NULL, ("%s: no rcvif", __func__));
 
 	ifp = m->m_pkthdr.rcvif;
 
 	/* XXX if we are router, we don't update route by icmp6 redirect */
 	if (V_ip6_forwarding)
 		goto freeit;
 	if (!V_icmp6_rediraccept)
 		goto freeit;
 
 #ifndef PULLDOWN_TEST
 	IP6_EXTHDR_CHECK(m, off, icmp6len,);
 	nd_rd = (struct nd_redirect *)((caddr_t)ip6 + off);
 #else
 	IP6_EXTHDR_GET(nd_rd, struct nd_redirect *, m, off, icmp6len);
 	if (nd_rd == NULL) {
 		ICMP6STAT_INC(icp6s_tooshort);
 		return;
 	}
 #endif
 	redtgt6 = nd_rd->nd_rd_target;
 	reddst6 = nd_rd->nd_rd_dst;
 
 	if (in6_setscope(&redtgt6, m->m_pkthdr.rcvif, NULL) ||
 	    in6_setscope(&reddst6, m->m_pkthdr.rcvif, NULL)) {
 		goto freeit;
 	}
 
 	/* validation */
 	if (!IN6_IS_ADDR_LINKLOCAL(&src6)) {
 		nd6log((LOG_ERR,
 		    "ICMP6 redirect sent from %s rejected; "
 		    "must be from linklocal\n",
 		    ip6_sprintf(ip6buf, &src6)));
 		goto bad;
 	}
 	if (ip6->ip6_hlim != 255) {
 		nd6log((LOG_ERR,
 		    "ICMP6 redirect sent from %s rejected; "
 		    "hlim=%d (must be 255)\n",
 		    ip6_sprintf(ip6buf, &src6), ip6->ip6_hlim));
 		goto bad;
 	}
     {
 	/* ip6->ip6_src must be equal to gw for icmp6->icmp6_reddst */
 	struct nhop6_basic nh6;
 	struct in6_addr kdst;
 	uint32_t scopeid;
 
 	in6_splitscope(&reddst6, &kdst, &scopeid);
 	if (fib6_lookup_nh_basic(RT_DEFAULT_FIB, &kdst, scopeid, 0, 0,&nh6)==0){
 		if ((nh6.nh_flags & NHF_GATEWAY) == 0) {
 			nd6log((LOG_ERR,
 			    "ICMP6 redirect rejected; no route "
 			    "with inet6 gateway found for redirect dst: %s\n",
 			    icmp6_redirect_diag(&src6, &reddst6, &redtgt6)));
 			goto bad;
 		}
 
 		if (IN6_ARE_ADDR_EQUAL(&src6, &nh6.nh_addr) == 0) {
 			nd6log((LOG_ERR,
 			    "ICMP6 redirect rejected; "
 			    "not equal to gw-for-src=%s (must be same): "
 			    "%s\n",
 			    ip6_sprintf(ip6buf, &nh6.nh_addr),
 			    icmp6_redirect_diag(&src6, &reddst6, &redtgt6)));
 			goto bad;
 		}
 	} else {
 		nd6log((LOG_ERR,
 		    "ICMP6 redirect rejected; "
 		    "no route found for redirect dst: %s\n",
 		    icmp6_redirect_diag(&src6, &reddst6, &redtgt6)));
 		goto bad;
 	}
     }
 	if (IN6_IS_ADDR_MULTICAST(&reddst6)) {
 		nd6log((LOG_ERR,
 		    "ICMP6 redirect rejected; "
 		    "redirect dst must be unicast: %s\n",
 		    icmp6_redirect_diag(&src6, &reddst6, &redtgt6)));
 		goto bad;
 	}
 
 	is_router = is_onlink = 0;
 	if (IN6_IS_ADDR_LINKLOCAL(&redtgt6))
 		is_router = 1;	/* router case */
 	if (bcmp(&redtgt6, &reddst6, sizeof(redtgt6)) == 0)
 		is_onlink = 1;	/* on-link destination case */
 	if (!is_router && !is_onlink) {
 		nd6log((LOG_ERR,
 		    "ICMP6 redirect rejected; "
 		    "neither router case nor onlink case: %s\n",
 		    icmp6_redirect_diag(&src6, &reddst6, &redtgt6)));
 		goto bad;
 	}
 
 	icmp6len -= sizeof(*nd_rd);
 	nd6_option_init(nd_rd + 1, icmp6len, &ndopts);
 	if (nd6_options(&ndopts) < 0) {
 		nd6log((LOG_INFO, "%s: invalid ND option, rejected: %s\n",
 		    __func__, icmp6_redirect_diag(&src6, &reddst6, &redtgt6)));
 		/* nd6_options have incremented stats */
 		goto freeit;
 	}
 
 	if (ndopts.nd_opts_tgt_lladdr) {
 		lladdr = (char *)(ndopts.nd_opts_tgt_lladdr + 1);
 		lladdrlen = ndopts.nd_opts_tgt_lladdr->nd_opt_len << 3;
 	}
 
 	if (lladdr && ((ifp->if_addrlen + 2 + 7) & ~7) != lladdrlen) {
 		nd6log((LOG_INFO, "%s: lladdrlen mismatch for %s "
 		    "(if %d, icmp6 packet %d): %s\n",
 		    __func__, ip6_sprintf(ip6buf, &redtgt6),
 		    ifp->if_addrlen, lladdrlen - 2,
 		    icmp6_redirect_diag(&src6, &reddst6, &redtgt6)));
 		goto bad;
 	}
 
 	/* Validation passed. */
 
 	/* RFC 2461 8.3 */
 	nd6_cache_lladdr(ifp, &redtgt6, lladdr, lladdrlen, ND_REDIRECT,
 	    is_onlink ? ND_REDIRECT_ONLINK : ND_REDIRECT_ROUTER);
 
 	/*
 	 * Install a gateway route in the better-router case or an interface
 	 * route in the on-link-destination case.
 	 */
 	{
 		struct sockaddr_in6 sdst;
 		struct sockaddr_in6 sgw;
 		struct sockaddr_in6 ssrc;
 		struct sockaddr *gw;
 		int rt_flags;
 		u_int fibnum;
 
 		bzero(&sdst, sizeof(sdst));
 		bzero(&ssrc, sizeof(ssrc));
 		sdst.sin6_family = ssrc.sin6_family = AF_INET6;
 		sdst.sin6_len = ssrc.sin6_len = sizeof(struct sockaddr_in6);
 		bcopy(&reddst6, &sdst.sin6_addr, sizeof(struct in6_addr));
 		bcopy(&src6, &ssrc.sin6_addr, sizeof(struct in6_addr));
 		rt_flags = RTF_HOST;
 		if (is_router) {
 			bzero(&sgw, sizeof(sgw));
 			sgw.sin6_family = AF_INET6;
 			sgw.sin6_len = sizeof(struct sockaddr_in6);
 			bcopy(&redtgt6, &sgw.sin6_addr,
 				sizeof(struct in6_addr));
 			gw = (struct sockaddr *)&sgw;
 			rt_flags |= RTF_GATEWAY;
 		} else
 			gw = ifp->if_addr->ifa_addr;
 		for (fibnum = 0; fibnum < rt_numfibs; fibnum++)
 			in6_rtredirect((struct sockaddr *)&sdst, gw,
 			    (struct sockaddr *)NULL, rt_flags,
 			    (struct sockaddr *)&ssrc, fibnum);
 	}
 	/* finally update cached route in each socket via pfctlinput */
     {
 	struct sockaddr_in6 sdst;
 
 	bzero(&sdst, sizeof(sdst));
 	sdst.sin6_family = AF_INET6;
 	sdst.sin6_len = sizeof(struct sockaddr_in6);
 	bcopy(&reddst6, &sdst.sin6_addr, sizeof(struct in6_addr));
 	pfctlinput(PRC_REDIRECT_HOST, (struct sockaddr *)&sdst);
     }
 
  freeit:
 	m_freem(m);
 	return;
 
  bad:
 	ICMP6STAT_INC(icp6s_badredirect);
 	m_freem(m);
 }
 
 void
 icmp6_redirect_output(struct mbuf *m0, struct rtentry *rt)
 {
 	struct ifnet *ifp;	/* my outgoing interface */
 	struct in6_addr *ifp_ll6;
 	struct in6_addr *router_ll6;
 	struct ip6_hdr *sip6;	/* m0 as struct ip6_hdr */
 	struct mbuf *m = NULL;	/* newly allocated one */
 	struct m_tag *mtag;
 	struct ip6_hdr *ip6;	/* m as struct ip6_hdr */
 	struct nd_redirect *nd_rd;
 	struct llentry *ln = NULL;
 	size_t maxlen;
 	u_char *p;
 	struct ifnet *outif = NULL;
 	struct sockaddr_in6 src_sa;
 
 	icmp6_errcount(ND_REDIRECT, 0);
 
 	/* if we are not router, we don't send icmp6 redirect */
 	if (!V_ip6_forwarding)
 		goto fail;
 
 	/* sanity check */
 	if (!m0 || !rt || !(rt->rt_flags & RTF_UP) || !(ifp = rt->rt_ifp))
 		goto fail;
 
 	/*
 	 * Address check:
 	 *  the source address must identify a neighbor, and
 	 *  the destination address must not be a multicast address
 	 *  [RFC 2461, sec 8.2]
 	 */
 	sip6 = mtod(m0, struct ip6_hdr *);
 	bzero(&src_sa, sizeof(src_sa));
 	src_sa.sin6_family = AF_INET6;
 	src_sa.sin6_len = sizeof(src_sa);
 	src_sa.sin6_addr = sip6->ip6_src;
 	if (nd6_is_addr_neighbor(&src_sa, ifp) == 0)
 		goto fail;
 	if (IN6_IS_ADDR_MULTICAST(&sip6->ip6_dst))
 		goto fail;	/* what should we do here? */
 
 	/* rate limit */
 	if (icmp6_ratelimit(&sip6->ip6_src, ND_REDIRECT, 0))
 		goto fail;
 
 	/*
 	 * Since we are going to append up to 1280 bytes (= IPV6_MMTU),
 	 * we almost always ask for an mbuf cluster for simplicity.
 	 * (MHLEN < IPV6_MMTU is almost always true)
 	 */
 #if IPV6_MMTU >= MCLBYTES
 # error assumption failed about IPV6_MMTU and MCLBYTES
 #endif
 	m = m_getcl(M_NOWAIT, MT_DATA, M_PKTHDR);
 	if (m == NULL)
 		goto fail;
 	M_SETFIB(m, rt->rt_fibnum);
 	maxlen = M_TRAILINGSPACE(m);
 	maxlen = min(IPV6_MMTU, maxlen);
 	/* just for safety */
 	if (maxlen < sizeof(struct ip6_hdr) + sizeof(struct icmp6_hdr) +
 	    ((sizeof(struct nd_opt_hdr) + ifp->if_addrlen + 7) & ~7)) {
 		goto fail;
 	}
 
 	{
 		/* get ip6 linklocal address for ifp(my outgoing interface). */
 		struct in6_ifaddr *ia;
 		if ((ia = in6ifa_ifpforlinklocal(ifp,
 						 IN6_IFF_NOTREADY|
 						 IN6_IFF_ANYCAST)) == NULL)
 			goto fail;
 		ifp_ll6 = &ia->ia_addr.sin6_addr;
 		/* XXXRW: reference released prematurely. */
 		ifa_free(&ia->ia_ifa);
 	}
 
 	/* get ip6 linklocal address for the router. */
 	if (rt->rt_gateway && (rt->rt_flags & RTF_GATEWAY)) {
 		struct sockaddr_in6 *sin6;
 		sin6 = (struct sockaddr_in6 *)rt->rt_gateway;
 		router_ll6 = &sin6->sin6_addr;
 		if (!IN6_IS_ADDR_LINKLOCAL(router_ll6))
 			router_ll6 = (struct in6_addr *)NULL;
 	} else
 		router_ll6 = (struct in6_addr *)NULL;
 
 	/* ip6 */
 	ip6 = mtod(m, struct ip6_hdr *);
 	ip6->ip6_flow = 0;
 	ip6->ip6_vfc &= ~IPV6_VERSION_MASK;
 	ip6->ip6_vfc |= IPV6_VERSION;
 	/* ip6->ip6_plen will be set later */
 	ip6->ip6_nxt = IPPROTO_ICMPV6;
 	ip6->ip6_hlim = 255;
 	/* ip6->ip6_src must be linklocal addr for my outgoing if. */
 	bcopy(ifp_ll6, &ip6->ip6_src, sizeof(struct in6_addr));
 	bcopy(&sip6->ip6_src, &ip6->ip6_dst, sizeof(struct in6_addr));
 
 	/* ND Redirect */
 	nd_rd = (struct nd_redirect *)(ip6 + 1);
 	nd_rd->nd_rd_type = ND_REDIRECT;
 	nd_rd->nd_rd_code = 0;
 	nd_rd->nd_rd_reserved = 0;
 	if (rt->rt_flags & RTF_GATEWAY) {
 		/*
 		 * nd_rd->nd_rd_target must be a link-local address in
 		 * better router cases.
 		 */
 		if (!router_ll6)
 			goto fail;
 		bcopy(router_ll6, &nd_rd->nd_rd_target,
 		    sizeof(nd_rd->nd_rd_target));
 		bcopy(&sip6->ip6_dst, &nd_rd->nd_rd_dst,
 		    sizeof(nd_rd->nd_rd_dst));
 	} else {
 		/* make sure redtgt == reddst */
 		bcopy(&sip6->ip6_dst, &nd_rd->nd_rd_target,
 		    sizeof(nd_rd->nd_rd_target));
 		bcopy(&sip6->ip6_dst, &nd_rd->nd_rd_dst,
 		    sizeof(nd_rd->nd_rd_dst));
 	}
 
 	p = (u_char *)(nd_rd + 1);
 
 	if (!router_ll6)
 		goto nolladdropt;
 
 	{
 		/* target lladdr option */
 		int len;
 		struct nd_opt_hdr *nd_opt;
 		char *lladdr;
 
 		IF_AFDATA_RLOCK(ifp);
 		ln = nd6_lookup(router_ll6, 0, ifp);
 		IF_AFDATA_RUNLOCK(ifp);
 		if (ln == NULL)
 			goto nolladdropt;
 
 		len = sizeof(*nd_opt) + ifp->if_addrlen;
 		len = (len + 7) & ~7;	/* round by 8 */
 		/* safety check */
 		if (len + (p - (u_char *)ip6) > maxlen) 			
 			goto nolladdropt;
 
 		if (ln->la_flags & LLE_VALID) {
 			nd_opt = (struct nd_opt_hdr *)p;
 			nd_opt->nd_opt_type = ND_OPT_TARGET_LINKADDR;
 			nd_opt->nd_opt_len = len >> 3;
 			lladdr = (char *)(nd_opt + 1);
 			bcopy(ln->ll_addr, lladdr, ifp->if_addrlen);
 			p += len;
 		}
 	}
 nolladdropt:
 	if (ln != NULL)
 		LLE_RUNLOCK(ln);
 		
 	m->m_pkthdr.len = m->m_len = p - (u_char *)ip6;
 
 	/* just to be safe */
 #ifdef M_DECRYPTED	/*not openbsd*/
 	if (m0->m_flags & M_DECRYPTED)
 		goto noredhdropt;
 #endif
 	if (p - (u_char *)ip6 > maxlen)
 		goto noredhdropt;
 
 	{
 		/* redirected header option */
 		int len;
 		struct nd_opt_rd_hdr *nd_opt_rh;
 
 		/*
 		 * compute the maximum size for icmp6 redirect header option.
 		 * XXX room for auth header?
 		 */
 		len = maxlen - (p - (u_char *)ip6);
 		len &= ~7;
 
 		/* This is just for simplicity. */
 		if (m0->m_pkthdr.len != m0->m_len) {
 			if (m0->m_next) {
 				m_freem(m0->m_next);
 				m0->m_next = NULL;
 			}
 			m0->m_pkthdr.len = m0->m_len;
 		}
 
 		/*
 		 * Redirected header option spec (RFC2461 4.6.3) talks nothing
 		 * about padding/truncate rule for the original IP packet.
 		 * From the discussion on IPv6imp in Feb 1999,
 		 * the consensus was:
 		 * - "attach as much as possible" is the goal
 		 * - pad if not aligned (original size can be guessed by
 		 *   original ip6 header)
 		 * Following code adds the padding if it is simple enough,
 		 * and truncates if not.
 		 */
 		if (m0->m_next || m0->m_pkthdr.len != m0->m_len)
 			panic("assumption failed in %s:%d", __FILE__,
 			    __LINE__);
 
 		if (len - sizeof(*nd_opt_rh) < m0->m_pkthdr.len) {
 			/* not enough room, truncate */
 			m0->m_pkthdr.len = m0->m_len = len -
 			    sizeof(*nd_opt_rh);
 		} else {
 			/* enough room, pad or truncate */
 			size_t extra;
 
 			extra = m0->m_pkthdr.len % 8;
 			if (extra) {
 				/* pad if easy enough, truncate if not */
 				if (8 - extra <= M_TRAILINGSPACE(m0)) {
 					/* pad */
 					m0->m_len += (8 - extra);
 					m0->m_pkthdr.len += (8 - extra);
 				} else {
 					/* truncate */
 					m0->m_pkthdr.len -= extra;
 					m0->m_len -= extra;
 				}
 			}
 			len = m0->m_pkthdr.len + sizeof(*nd_opt_rh);
 			m0->m_pkthdr.len = m0->m_len = len -
 			    sizeof(*nd_opt_rh);
 		}
 
 		nd_opt_rh = (struct nd_opt_rd_hdr *)p;
 		bzero(nd_opt_rh, sizeof(*nd_opt_rh));
 		nd_opt_rh->nd_opt_rh_type = ND_OPT_REDIRECTED_HEADER;
 		nd_opt_rh->nd_opt_rh_len = len >> 3;
 		p += sizeof(*nd_opt_rh);
 		m->m_pkthdr.len = m->m_len = p - (u_char *)ip6;
 
 		/* connect m0 to m */
 		m_tag_delete_chain(m0, NULL);
 		m0->m_flags &= ~M_PKTHDR;
 		m->m_next = m0;
 		m->m_pkthdr.len = m->m_len + m0->m_len;
 		m0 = NULL;
 	}
 noredhdropt:;
 	if (m0) {
 		m_freem(m0);
 		m0 = NULL;
 	}
 
 	/* XXX: clear embedded link IDs in the inner header */
 	in6_clearscope(&sip6->ip6_src);
 	in6_clearscope(&sip6->ip6_dst);
 	in6_clearscope(&nd_rd->nd_rd_target);
 	in6_clearscope(&nd_rd->nd_rd_dst);
 
 	ip6->ip6_plen = htons(m->m_pkthdr.len - sizeof(struct ip6_hdr));
 
 	nd_rd->nd_rd_cksum = 0;
 	nd_rd->nd_rd_cksum = in6_cksum(m, IPPROTO_ICMPV6,
 	    sizeof(*ip6), ntohs(ip6->ip6_plen));
 
         if (send_sendso_input_hook != NULL) {
 		mtag = m_tag_get(PACKET_TAG_ND_OUTGOING, sizeof(unsigned short),
 			M_NOWAIT);
 		if (mtag == NULL)
 			goto fail;
 		*(unsigned short *)(mtag + 1) = nd_rd->nd_rd_type;
 		m_tag_prepend(m, mtag);
 	}
 
 	/* send the packet to outside... */
 	ip6_output(m, NULL, NULL, 0, NULL, &outif, NULL);
 	if (outif) {
 		icmp6_ifstat_inc(outif, ifs6_out_msg);
 		icmp6_ifstat_inc(outif, ifs6_out_redirect);
 	}
 	ICMP6STAT_INC(icp6s_outhist[ND_REDIRECT]);
 
 	return;
 
 fail:
 	if (m)
 		m_freem(m);
 	if (m0)
 		m_freem(m0);
 }
 
 /*
  * ICMPv6 socket option processing.
  */
 int
 icmp6_ctloutput(struct socket *so, struct sockopt *sopt)
 {
 	int error = 0;
 	int optlen;
 	struct inpcb *inp = sotoinpcb(so);
 	int level, op, optname;
 
 	if (sopt) {
 		level = sopt->sopt_level;
 		op = sopt->sopt_dir;
 		optname = sopt->sopt_name;
 		optlen = sopt->sopt_valsize;
 	} else
 		level = op = optname = optlen = 0;
 
 	if (level != IPPROTO_ICMPV6) {
 		return EINVAL;
 	}
 
 	switch (op) {
 	case PRCO_SETOPT:
 		switch (optname) {
 		case ICMP6_FILTER:
 		    {
 			struct icmp6_filter ic6f;
 
 			if (optlen != sizeof(ic6f)) {
 				error = EMSGSIZE;
 				break;
 			}
 			error = sooptcopyin(sopt, &ic6f, optlen, optlen);
 			if (error == 0) {
 				INP_WLOCK(inp);
 				*inp->in6p_icmp6filt = ic6f;
 				INP_WUNLOCK(inp);
 			}
 			break;
 		    }
 
 		default:
 			error = ENOPROTOOPT;
 			break;
 		}
 		break;
 
 	case PRCO_GETOPT:
 		switch (optname) {
 		case ICMP6_FILTER:
 		    {
 			struct icmp6_filter ic6f;
 
 			INP_RLOCK(inp);
 			ic6f = *inp->in6p_icmp6filt;
 			INP_RUNLOCK(inp);
 			error = sooptcopyout(sopt, &ic6f, sizeof(ic6f));
 			break;
 		    }
 
 		default:
 			error = ENOPROTOOPT;
 			break;
 		}
 		break;
 	}
 
 	return (error);
 }
 
 /*
  * Perform rate limit check.
  * Returns 0 if it is okay to send the icmp6 packet.
  * Returns 1 if the router SHOULD NOT send this icmp6 packet due to rate
  * limitation.
  *
  * XXX per-destination/type check necessary?
  *
  * dst - not used at this moment
  * type - not used at this moment
  * code - not used at this moment
  */
 static int
 icmp6_ratelimit(const struct in6_addr *dst, const int type,
     const int code)
 {
 	int ret;
 
 	ret = 0;	/* okay to send */
 
 	/* PPS limit */
 	if (!ppsratecheck(&V_icmp6errppslim_last, &V_icmp6errpps_count,
 	    V_icmp6errppslim)) {
 		/* The packet is subject to rate limit */
 		ret++;
 	}
 
 	return ret;
 }
Index: head/sys/netinet6/ip6_forward.c
===================================================================
--- head/sys/netinet6/ip6_forward.c	(revision 305823)
+++ head/sys/netinet6/ip6_forward.c	(revision 305824)
@@ -1,626 +1,627 @@
 /*-
  * Copyright (C) 1995, 1996, 1997, and 1998 WIDE Project.
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 3. Neither the name of the project nor the names of its contributors
  *    may be used to endorse or promote products derived from this software
  *    without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE PROJECT AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE PROJECT OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  *	$KAME: ip6_forward.c,v 1.69 2001/05/17 03:48:30 itojun Exp $
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include "opt_inet.h"
 #include "opt_inet6.h"
 #include "opt_ipsec.h"
 #include "opt_ipstealth.h"
 
 #include <sys/param.h>
 #include <sys/systm.h>
 #include <sys/malloc.h>
 #include <sys/mbuf.h>
 #include <sys/domain.h>
 #include <sys/protosw.h>
 #include <sys/socket.h>
 #include <sys/errno.h>
 #include <sys/time.h>
 #include <sys/kernel.h>
 #include <sys/syslog.h>
 
 #include <net/if.h>
 #include <net/if_var.h>
 #include <net/netisr.h>
 #include <net/route.h>
 #include <net/pfil.h>
 
 #include <netinet/in.h>
 #include <netinet/in_var.h>
 #include <netinet/in_systm.h>
 #include <netinet/ip.h>
 #include <netinet/ip_var.h>
 #include <netinet6/in6_var.h>
 #include <netinet/ip6.h>
 #include <netinet6/ip6_var.h>
 #include <netinet6/scope6_var.h>
 #include <netinet/icmp6.h>
 #include <netinet6/nd6.h>
 
 #include <netinet/in_pcb.h>
 
 #ifdef IPSEC
 #include <netinet6/ip6_ipsec.h>
 #include <netipsec/ipsec.h>
 #include <netipsec/ipsec6.h>
 #include <netipsec/key.h>
 #endif /* IPSEC */
 
 /*
  * Forward a packet.  If some error occurs return the sender
  * an icmp packet.  Note we can't always generate a meaningful
  * icmp message because icmp doesn't have a large enough repertoire
  * of codes and types.
  *
  * If not forwarding, just drop the packet.  This could be confusing
  * if ipforwarding was zero but some routing protocol was advancing
  * us as a gateway to somewhere.  However, we must let the routing
  * protocol deal with that.
  *
  */
 void
 ip6_forward(struct mbuf *m, int srcrt)
 {
 	struct ip6_hdr *ip6 = mtod(m, struct ip6_hdr *);
 	struct sockaddr_in6 *dst = NULL;
 	struct rtentry *rt = NULL;
 	struct route_in6 rin6;
 	int error, type = 0, code = 0;
 	struct mbuf *mcopy = NULL;
 	struct ifnet *origifp;	/* maybe unnecessary */
 	u_int32_t inzone, outzone;
 	struct in6_addr src_in6, dst_in6, odst;
 #ifdef IPSEC
 	struct secpolicy *sp = NULL;
 #endif
 	struct m_tag *fwd_tag;
 	char ip6bufs[INET6_ADDRSTRLEN], ip6bufd[INET6_ADDRSTRLEN];
 
 	/*
 	 * Do not forward packets to multicast destination (should be handled
 	 * by ip6_mforward().
 	 * Do not forward packets with unspecified source.  It was discussed
 	 * in July 2000, on the ipngwg mailing list.
 	 */
 	if ((m->m_flags & (M_BCAST|M_MCAST)) != 0 ||
 	    IN6_IS_ADDR_MULTICAST(&ip6->ip6_dst) ||
 	    IN6_IS_ADDR_UNSPECIFIED(&ip6->ip6_src)) {
 		IP6STAT_INC(ip6s_cantforward);
 		/* XXX in6_ifstat_inc(rt->rt_ifp, ifs6_in_discard) */
 		if (V_ip6_log_time + V_ip6_log_interval < time_uptime) {
 			V_ip6_log_time = time_uptime;
 			log(LOG_DEBUG,
 			    "cannot forward "
 			    "from %s to %s nxt %d received on %s\n",
 			    ip6_sprintf(ip6bufs, &ip6->ip6_src),
 			    ip6_sprintf(ip6bufd, &ip6->ip6_dst),
 			    ip6->ip6_nxt,
 			    if_name(m->m_pkthdr.rcvif));
 		}
 		m_freem(m);
 		return;
 	}
 #ifdef IPSEC
 	/*
 	 * Check if this packet has an active SA and needs to be dropped
 	 * instead of forwarded.
 	 */
 	if (ip6_ipsec_fwd(m) != 0) {
 		IP6STAT_INC(ip6s_cantforward);
 		m_freem(m);
 		return;
 	}
 #endif /* IPSEC */
 
 #ifdef IPSTEALTH
 	if (!V_ip6stealth) {
 #endif
 	if (ip6->ip6_hlim <= IPV6_HLIMDEC) {
 		/* XXX in6_ifstat_inc(rt->rt_ifp, ifs6_in_discard) */
 		icmp6_error(m, ICMP6_TIME_EXCEEDED,
 				ICMP6_TIME_EXCEED_TRANSIT, 0);
 		return;
 	}
 	ip6->ip6_hlim -= IPV6_HLIMDEC;
 
 #ifdef IPSTEALTH
 	}
 #endif
 
 	/*
 	 * Save at most ICMPV6_PLD_MAXLEN (= the min IPv6 MTU -
 	 * size of IPv6 + ICMPv6 headers) bytes of the packet in case
 	 * we need to generate an ICMP6 message to the src.
 	 * Thanks to M_EXT, in most cases copy will not occur.
 	 *
 	 * It is important to save it before IPsec processing as IPsec
 	 * processing may modify the mbuf.
 	 */
-	mcopy = m_copy(m, 0, imin(m->m_pkthdr.len, ICMPV6_PLD_MAXLEN));
+	mcopy = m_copym(m, 0, imin(m->m_pkthdr.len, ICMPV6_PLD_MAXLEN),
+	    M_NOWAIT);
 
 #ifdef IPSEC
 	/* get a security policy for this packet */
 	sp = ipsec_getpolicybyaddr(m, IPSEC_DIR_OUTBOUND, &error);
 	if (sp == NULL) {
 		IPSEC6STAT_INC(ips_out_inval);
 		IP6STAT_INC(ip6s_cantforward);
 		if (mcopy) {
 #if 0
 			/* XXX: what icmp ? */
 #else
 			m_freem(mcopy);
 #endif
 		}
 		m_freem(m);
 		return;
 	}
 
 	error = 0;
 
 	/* check policy */
 	switch (sp->policy) {
 	case IPSEC_POLICY_DISCARD:
 		/*
 		 * This packet is just discarded.
 		 */
 		IPSEC6STAT_INC(ips_out_polvio);
 		IP6STAT_INC(ip6s_cantforward);
 		KEY_FREESP(&sp);
 		if (mcopy) {
 #if 0
 			/* XXX: what icmp ? */
 #else
 			m_freem(mcopy);
 #endif
 		}
 		m_freem(m);
 		return;
 
 	case IPSEC_POLICY_BYPASS:
 	case IPSEC_POLICY_NONE:
 		/* no need to do IPsec. */
 		KEY_FREESP(&sp);
 		goto skip_ipsec;
 
 	case IPSEC_POLICY_IPSEC:
 		if (sp->req == NULL) {
 			/* XXX should be panic ? */
 			printf("ip6_forward: No IPsec request specified.\n");
 			IP6STAT_INC(ip6s_cantforward);
 			KEY_FREESP(&sp);
 			if (mcopy) {
 #if 0
 				/* XXX: what icmp ? */
 #else
 				m_freem(mcopy);
 #endif
 			}
 			m_freem(m);
 			return;
 		}
 		/* do IPsec */
 		break;
 
 	case IPSEC_POLICY_ENTRUST:
 	default:
 		/* should be panic ?? */
 		printf("ip6_forward: Invalid policy found. %d\n", sp->policy);
 		KEY_FREESP(&sp);
 		goto skip_ipsec;
 	}
 
     {
 	struct ipsecrequest *isr = NULL;
 
 	/*
 	 * when the kernel forwards a packet, it is not proper to apply
 	 * IPsec transport mode to the packet. This check avoid from this.
 	 * at present, if there is even a transport mode SA request in the
 	 * security policy, the kernel does not apply IPsec to the packet.
 	 * this check is not enough because the following case is valid.
 	 *      ipsec esp/tunnel/xxx-xxx/require esp/transport//require;
 	 */
 	for (isr = sp->req; isr; isr = isr->next) {
 		if (isr->saidx.mode == IPSEC_MODE_ANY)
 			goto doipsectunnel;
 		if (isr->saidx.mode == IPSEC_MODE_TUNNEL)
 			goto doipsectunnel;
 	}
 
 	/*
 	 * if there's no need for tunnel mode IPsec, skip.
 	 */
 	if (!isr)
 		goto skip_ipsec;
 
     doipsectunnel:
 	/*
 	 * All the extension headers will become inaccessible
 	 * (since they can be encrypted).
 	 * Don't panic, we need no more updates to extension headers
 	 * on inner IPv6 packet (since they are now encapsulated).
 	 *
 	 * IPv6 [ESP|AH] IPv6 [extension headers] payload
 	 */
 
 	/*
 	 * If we need to encapsulate the packet, do it here
 	 * ipsec6_proces_packet will send the packet using ip6_output
 	 */
 	error = ipsec6_process_packet(m, sp->req);
 	/* Release SP if an error occurred */
 	if (error != 0)
 		KEY_FREESP(&sp);
 	if (error == EJUSTRETURN) {
 		/*
 		 * We had a SP with a level of 'use' and no SA. We
 		 * will just continue to process the packet without
 		 * IPsec processing.
 		 */
 		error = 0;
 		goto skip_ipsec;
 	}
 
 	if (error) {
 		/* mbuf is already reclaimed in ipsec6_process_packet. */
 		switch (error) {
 		case EHOSTUNREACH:
 		case ENETUNREACH:
 		case EMSGSIZE:
 		case ENOBUFS:
 		case ENOMEM:
 			break;
 		default:
 			printf("ip6_output (ipsec): error code %d\n", error);
 			/* FALLTHROUGH */
 		case ENOENT:
 			/* don't show these error codes to the user */
 			break;
 		}
 		IP6STAT_INC(ip6s_cantforward);
 		if (mcopy) {
 #if 0
 			/* XXX: what icmp ? */
 #else
 			m_freem(mcopy);
 #endif
 		}
 		return;
 	} else {
 		/*
 		 * In the FAST IPSec case we have already
 		 * re-injected the packet and it has been freed
 		 * by the ipsec_done() function.  So, just clean
 		 * up after ourselves.
 		 */
 		m = NULL;
 		goto freecopy;
 	}
     }
 skip_ipsec:
 #endif
 again:
 	bzero(&rin6, sizeof(struct route_in6));
 	dst = (struct sockaddr_in6 *)&rin6.ro_dst;
 	dst->sin6_len = sizeof(struct sockaddr_in6);
 	dst->sin6_family = AF_INET6;
 	dst->sin6_addr = ip6->ip6_dst;
 again2:
 	rin6.ro_rt = in6_rtalloc1((struct sockaddr *)dst, 0, 0, M_GETFIB(m));
 	rt = rin6.ro_rt;
 	if (rin6.ro_rt != NULL)
 		RT_UNLOCK(rin6.ro_rt);
 	else {
 		IP6STAT_INC(ip6s_noroute);
 		in6_ifstat_inc(m->m_pkthdr.rcvif, ifs6_in_noroute);
 		if (mcopy) {
 			icmp6_error(mcopy, ICMP6_DST_UNREACH,
 			ICMP6_DST_UNREACH_NOROUTE, 0);
 		}
 		goto bad;
 	}
 
 	/*
 	 * Source scope check: if a packet can't be delivered to its
 	 * destination for the reason that the destination is beyond the scope
 	 * of the source address, discard the packet and return an icmp6
 	 * destination unreachable error with Code 2 (beyond scope of source
 	 * address).  We use a local copy of ip6_src, since in6_setscope()
 	 * will possibly modify its first argument.
 	 * [draft-ietf-ipngwg-icmp-v3-04.txt, Section 3.1]
 	 */
 	src_in6 = ip6->ip6_src;
 	if (in6_setscope(&src_in6, rt->rt_ifp, &outzone)) {
 		/* XXX: this should not happen */
 		IP6STAT_INC(ip6s_cantforward);
 		IP6STAT_INC(ip6s_badscope);
 		goto bad;
 	}
 	if (in6_setscope(&src_in6, m->m_pkthdr.rcvif, &inzone)) {
 		IP6STAT_INC(ip6s_cantforward);
 		IP6STAT_INC(ip6s_badscope);
 		goto bad;
 	}
 	if (inzone != outzone) {
 		IP6STAT_INC(ip6s_cantforward);
 		IP6STAT_INC(ip6s_badscope);
 		in6_ifstat_inc(rt->rt_ifp, ifs6_in_discard);
 
 		if (V_ip6_log_time + V_ip6_log_interval < time_uptime) {
 			V_ip6_log_time = time_uptime;
 			log(LOG_DEBUG,
 			    "cannot forward "
 			    "src %s, dst %s, nxt %d, rcvif %s, outif %s\n",
 			    ip6_sprintf(ip6bufs, &ip6->ip6_src),
 			    ip6_sprintf(ip6bufd, &ip6->ip6_dst),
 			    ip6->ip6_nxt,
 			    if_name(m->m_pkthdr.rcvif), if_name(rt->rt_ifp));
 		}
 		if (mcopy)
 			icmp6_error(mcopy, ICMP6_DST_UNREACH,
 				    ICMP6_DST_UNREACH_BEYONDSCOPE, 0);
 		goto bad;
 	}
 
 	/*
 	 * Destination scope check: if a packet is going to break the scope
 	 * zone of packet's destination address, discard it.  This case should
 	 * usually be prevented by appropriately-configured routing table, but
 	 * we need an explicit check because we may mistakenly forward the
 	 * packet to a different zone by (e.g.) a default route.
 	 */
 	dst_in6 = ip6->ip6_dst;
 	if (in6_setscope(&dst_in6, m->m_pkthdr.rcvif, &inzone) != 0 ||
 	    in6_setscope(&dst_in6, rt->rt_ifp, &outzone) != 0 ||
 	    inzone != outzone) {
 		IP6STAT_INC(ip6s_cantforward);
 		IP6STAT_INC(ip6s_badscope);
 		goto bad;
 	}
 
 	if (rt->rt_flags & RTF_GATEWAY)
 		dst = (struct sockaddr_in6 *)rt->rt_gateway;
 
 	/*
 	 * If we are to forward the packet using the same interface
 	 * as one we got the packet from, perhaps we should send a redirect
 	 * to sender to shortcut a hop.
 	 * Only send redirect if source is sending directly to us,
 	 * and if packet was not source routed (or has any options).
 	 * Also, don't send redirect if forwarding using a route
 	 * modified by a redirect.
 	 */
 	if (V_ip6_sendredirects && rt->rt_ifp == m->m_pkthdr.rcvif && !srcrt &&
 	    (rt->rt_flags & (RTF_DYNAMIC|RTF_MODIFIED)) == 0) {
 		if ((rt->rt_ifp->if_flags & IFF_POINTOPOINT) != 0) {
 			/*
 			 * If the incoming interface is equal to the outgoing
 			 * one, and the link attached to the interface is
 			 * point-to-point, then it will be highly probable
 			 * that a routing loop occurs. Thus, we immediately
 			 * drop the packet and send an ICMPv6 error message.
 			 *
 			 * type/code is based on suggestion by Rich Draves.
 			 * not sure if it is the best pick.
 			 */
 			icmp6_error(mcopy, ICMP6_DST_UNREACH,
 				    ICMP6_DST_UNREACH_ADDR, 0);
 			goto bad;
 		}
 		type = ND_REDIRECT;
 	}
 
 	/*
 	 * Fake scoped addresses. Note that even link-local source or
 	 * destinaion can appear, if the originating node just sends the
 	 * packet to us (without address resolution for the destination).
 	 * Since both icmp6_error and icmp6_redirect_output fill the embedded
 	 * link identifiers, we can do this stuff after making a copy for
 	 * returning an error.
 	 */
 	if ((rt->rt_ifp->if_flags & IFF_LOOPBACK) != 0) {
 		/*
 		 * See corresponding comments in ip6_output.
 		 * XXX: but is it possible that ip6_forward() sends a packet
 		 *      to a loopback interface? I don't think so, and thus
 		 *      I bark here. (jinmei@kame.net)
 		 * XXX: it is common to route invalid packets to loopback.
 		 *	also, the codepath will be visited on use of ::1 in
 		 *	rthdr. (itojun)
 		 */
 #if 1
 		if (0)
 #else
 		if ((rt->rt_flags & (RTF_BLACKHOLE|RTF_REJECT)) == 0)
 #endif
 		{
 			printf("ip6_forward: outgoing interface is loopback. "
 			       "src %s, dst %s, nxt %d, rcvif %s, outif %s\n",
 			       ip6_sprintf(ip6bufs, &ip6->ip6_src),
 			       ip6_sprintf(ip6bufd, &ip6->ip6_dst),
 			       ip6->ip6_nxt, if_name(m->m_pkthdr.rcvif),
 			       if_name(rt->rt_ifp));
 		}
 
 		/* we can just use rcvif in forwarding. */
 		origifp = m->m_pkthdr.rcvif;
 	}
 	else
 		origifp = rt->rt_ifp;
 	/*
 	 * clear embedded scope identifiers if necessary.
 	 * in6_clearscope will touch the addresses only when necessary.
 	 */
 	in6_clearscope(&ip6->ip6_src);
 	in6_clearscope(&ip6->ip6_dst);
 
 	/* Jump over all PFIL processing if hooks are not active. */
 	if (!PFIL_HOOKED(&V_inet6_pfil_hook))
 		goto pass;
 
 	odst = ip6->ip6_dst;
 	/* Run through list of hooks for output packets. */
 	error = pfil_run_hooks(&V_inet6_pfil_hook, &m, rt->rt_ifp, PFIL_OUT, NULL);
 	if (error != 0 || m == NULL)
 		goto freecopy;		/* consumed by filter */
 	ip6 = mtod(m, struct ip6_hdr *);
 
 	/* See if destination IP address was changed by packet filter. */
 	if (!IN6_ARE_ADDR_EQUAL(&odst, &ip6->ip6_dst)) {
 		m->m_flags |= M_SKIP_FIREWALL;
 		/* If destination is now ourself drop to ip6_input(). */
 		if (in6_localip(&ip6->ip6_dst))
 			m->m_flags |= M_FASTFWD_OURS;
 		else {
 			RTFREE(rt);
 			goto again;	/* Redo the routing table lookup. */
 		}
 	}
 
 	/* See if local, if yes, send it to netisr. */
 	if (m->m_flags & M_FASTFWD_OURS) {
 		if (m->m_pkthdr.rcvif == NULL)
 			m->m_pkthdr.rcvif = V_loif;
 		if (m->m_pkthdr.csum_flags & CSUM_DELAY_DATA_IPV6) {
 			m->m_pkthdr.csum_flags |=
 			    CSUM_DATA_VALID_IPV6 | CSUM_PSEUDO_HDR;
 			m->m_pkthdr.csum_data = 0xffff;
 		}
 #ifdef SCTP
 		if (m->m_pkthdr.csum_flags & CSUM_SCTP_IPV6)
 			m->m_pkthdr.csum_flags |= CSUM_SCTP_VALID;
 #endif
 		error = netisr_queue(NETISR_IPV6, m);
 		goto out;
 	}
 	/* Or forward to some other address? */
 	if ((m->m_flags & M_IP6_NEXTHOP) &&
 	    (fwd_tag = m_tag_find(m, PACKET_TAG_IPFORWARD, NULL)) != NULL) {
 		dst = (struct sockaddr_in6 *)&rin6.ro_dst;
 		bcopy((fwd_tag+1), dst, sizeof(struct sockaddr_in6));
 		m->m_flags |= M_SKIP_FIREWALL;
 		m->m_flags &= ~M_IP6_NEXTHOP;
 		m_tag_delete(m, fwd_tag);
 		RTFREE(rt);
 		goto again2;
 	}
 
 pass:
 	/* See if the size was changed by the packet filter. */
 	if (m->m_pkthdr.len > IN6_LINKMTU(rt->rt_ifp)) {
 		in6_ifstat_inc(rt->rt_ifp, ifs6_in_toobig);
 		if (mcopy) {
 			u_long mtu;
 #ifdef IPSEC
 			size_t ipsechdrsiz;
 #endif /* IPSEC */
 
 			mtu = IN6_LINKMTU(rt->rt_ifp);
 #ifdef IPSEC
 			/*
 			 * When we do IPsec tunnel ingress, we need to play
 			 * with the link value (decrement IPsec header size
 			 * from mtu value).  The code is much simpler than v4
 			 * case, as we have the outgoing interface for
 			 * encapsulated packet as "rt->rt_ifp".
 			 */
 			ipsechdrsiz = ipsec_hdrsiz(mcopy, IPSEC_DIR_OUTBOUND,
 			    NULL);
 			if (ipsechdrsiz < mtu)
 				mtu -= ipsechdrsiz;
 			/*
 			 * if mtu becomes less than minimum MTU,
 			 * tell minimum MTU (and I'll need to fragment it).
 			 */
 			if (mtu < IPV6_MMTU)
 				mtu = IPV6_MMTU;
 #endif /* IPSEC */
 			icmp6_error(mcopy, ICMP6_PACKET_TOO_BIG, 0, mtu);
 		}
 		goto bad;
 	}
 
 	error = nd6_output_ifp(rt->rt_ifp, origifp, m, dst, NULL);
 	if (error) {
 		in6_ifstat_inc(rt->rt_ifp, ifs6_out_discard);
 		IP6STAT_INC(ip6s_cantforward);
 	} else {
 		IP6STAT_INC(ip6s_forward);
 		in6_ifstat_inc(rt->rt_ifp, ifs6_out_forward);
 		if (type)
 			IP6STAT_INC(ip6s_redirectsent);
 		else {
 			if (mcopy)
 				goto freecopy;
 		}
 	}
 
 	if (mcopy == NULL)
 		goto out;
 	switch (error) {
 	case 0:
 		if (type == ND_REDIRECT) {
 			icmp6_redirect_output(mcopy, rt);
 			goto out;
 		}
 		goto freecopy;
 
 	case EMSGSIZE:
 		/* xxx MTU is constant in PPP? */
 		goto freecopy;
 
 	case ENOBUFS:
 		/* Tell source to slow down like source quench in IP? */
 		goto freecopy;
 
 	case ENETUNREACH:	/* shouldn't happen, checked above */
 	case EHOSTUNREACH:
 	case ENETDOWN:
 	case EHOSTDOWN:
 	default:
 		type = ICMP6_DST_UNREACH;
 		code = ICMP6_DST_UNREACH_ADDR;
 		break;
 	}
 	icmp6_error(mcopy, type, code, 0);
 	goto out;
 
  freecopy:
 	m_freem(mcopy);
 	goto out;
 bad:
 	m_freem(m);
 out:
 	if (rt != NULL)
 		RTFREE(rt);
 }
Index: head/sys/netinet6/ip6_mroute.c
===================================================================
--- head/sys/netinet6/ip6_mroute.c	(revision 305823)
+++ head/sys/netinet6/ip6_mroute.c	(revision 305824)
@@ -1,1969 +1,1970 @@
 /*-
  * Copyright (C) 1998 WIDE Project.
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 3. Neither the name of the project nor the names of its contributors
  *    may be used to endorse or promote products derived from this software
  *    without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE PROJECT AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE PROJECT OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  *	$KAME: ip6_mroute.c,v 1.58 2001/12/18 02:36:31 itojun Exp $
  */
 
 /*-
  * Copyright (c) 1989 Stephen Deering
  * Copyright (c) 1992, 1993
  *      The Regents of the University of California.  All rights reserved.
  *
  * This code is derived from software contributed to Berkeley by
  * Stephen Deering of Stanford University.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 4. Neither the name of the University nor the names of its contributors
  *    may be used to endorse or promote products derived from this software
  *    without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  *	@(#)ip_mroute.c	8.2 (Berkeley) 11/15/93
  *	BSDI ip_mroute.c,v 2.10 1996/11/14 00:29:52 jch Exp
  */
 
 /*
  * IP multicast forwarding procedures
  *
  * Written by David Waitzman, BBN Labs, August 1988.
  * Modified by Steve Deering, Stanford, February 1989.
  * Modified by Mark J. Steiglitz, Stanford, May, 1991
  * Modified by Van Jacobson, LBL, January 1993
  * Modified by Ajit Thyagarajan, PARC, August 1993
  * Modified by Bill Fenner, PARC, April 1994
  *
  * MROUTING Revision: 3.5.1.2 + PIM-SMv2 (pimd) Support
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include "opt_inet6.h"
 
 #include <sys/param.h>
 #include <sys/callout.h>
 #include <sys/errno.h>
 #include <sys/kernel.h>
 #include <sys/lock.h>
 #include <sys/malloc.h>
 #include <sys/mbuf.h>
 #include <sys/module.h>
 #include <sys/domain.h>
 #include <sys/protosw.h>
 #include <sys/sdt.h>
 #include <sys/signalvar.h>
 #include <sys/socket.h>
 #include <sys/socketvar.h>
 #include <sys/sockio.h>
 #include <sys/sx.h>
 #include <sys/sysctl.h>
 #include <sys/syslog.h>
 #include <sys/systm.h>
 #include <sys/time.h>
 
 #include <net/if.h>
 #include <net/if_var.h>
 #include <net/if_types.h>
 #include <net/raw_cb.h>
 #include <net/vnet.h>
 
 #include <netinet/in.h>
 #include <netinet/in_var.h>
 #include <netinet/icmp6.h>
 #include <netinet/ip_encap.h>
 
 #include <netinet/ip6.h>
 #include <netinet/in_kdtrace.h>
 #include <netinet6/ip6_var.h>
 #include <netinet6/scope6_var.h>
 #include <netinet6/nd6.h>
 #include <netinet6/ip6_mroute.h>
 #include <netinet6/pim6.h>
 #include <netinet6/pim6_var.h>
 
 static MALLOC_DEFINE(M_MRTABLE6, "mf6c", "multicast forwarding cache entry");
 
 static int	ip6_mdq(struct mbuf *, struct ifnet *, struct mf6c *);
 static void	phyint_send(struct ip6_hdr *, struct mif6 *, struct mbuf *);
 static int	register_send(struct ip6_hdr *, struct mif6 *, struct mbuf *);
 static int	set_pim6(int *);
 static int	socket_send(struct socket *, struct mbuf *,
 		    struct sockaddr_in6 *);
 
 extern int in6_mcast_loop;
 extern struct domain inet6domain;
 
 static const struct encaptab *pim6_encap_cookie;
 static const struct protosw in6_pim_protosw = {
 	.pr_type =		SOCK_RAW,
 	.pr_domain =		&inet6domain,
 	.pr_protocol =		IPPROTO_PIM,
 	.pr_flags =		PR_ATOMIC|PR_ADDR|PR_LASTHDR,
 	.pr_input =		pim6_input,
 	.pr_output =		rip6_output,
 	.pr_ctloutput =		rip6_ctloutput,
 	.pr_usrreqs =		&rip6_usrreqs
 };
 static int pim6_encapcheck(const struct mbuf *, int, int, void *);
 
 static VNET_DEFINE(int, ip6_mrouter_ver) = 0;
 #define	V_ip6_mrouter_ver	VNET(ip6_mrouter_ver)
 
 SYSCTL_DECL(_net_inet6);
 SYSCTL_DECL(_net_inet6_ip6);
 static SYSCTL_NODE(_net_inet6, IPPROTO_PIM, pim, CTLFLAG_RW, 0, "PIM");
 
 static struct mrt6stat mrt6stat;
 SYSCTL_STRUCT(_net_inet6_ip6, OID_AUTO, mrt6stat, CTLFLAG_RW,
     &mrt6stat, mrt6stat,
     "Multicast Routing Statistics (struct mrt6stat, netinet6/ip6_mroute.h)");
 
 #define	MRT6STAT_INC(name)	mrt6stat.name += 1
 #define NO_RTE_FOUND	0x1
 #define RTE_FOUND	0x2
 
 static struct mtx mrouter6_mtx;
 #define	MROUTER6_LOCK()		mtx_lock(&mrouter6_mtx)
 #define	MROUTER6_UNLOCK()	mtx_unlock(&mrouter6_mtx)
 #define	MROUTER6_LOCK_ASSERT()	do {					\
 	mtx_assert(&mrouter6_mtx, MA_OWNED);				\
 	NET_ASSERT_GIANT();						\
 } while (0)
 #define	MROUTER6_LOCK_INIT()	\
 	mtx_init(&mrouter6_mtx, "IPv6 multicast forwarding", NULL, MTX_DEF)
 #define	MROUTER6_LOCK_DESTROY()	mtx_destroy(&mrouter6_mtx)
 
 static struct mf6c *mf6ctable[MF6CTBLSIZ];
 SYSCTL_OPAQUE(_net_inet6_ip6, OID_AUTO, mf6ctable, CTLFLAG_RD,
     &mf6ctable, sizeof(mf6ctable), "S,*mf6ctable[MF6CTBLSIZ]",
     "IPv6 Multicast Forwarding Table (struct *mf6ctable[MF6CTBLSIZ], "
     "netinet6/ip6_mroute.h)");
 
 static struct mtx mfc6_mtx;
 #define	MFC6_LOCK()		mtx_lock(&mfc6_mtx)
 #define	MFC6_UNLOCK()		mtx_unlock(&mfc6_mtx)
 #define	MFC6_LOCK_ASSERT()	do {					\
 	mtx_assert(&mfc6_mtx, MA_OWNED);				\
 	NET_ASSERT_GIANT();						\
 } while (0)
 #define	MFC6_LOCK_INIT()		\
 	mtx_init(&mfc6_mtx, "IPv6 multicast forwarding cache", NULL, MTX_DEF)
 #define	MFC6_LOCK_DESTROY()	mtx_destroy(&mfc6_mtx)
 
 static u_char n6expire[MF6CTBLSIZ];
 
 static struct mif6 mif6table[MAXMIFS];
 static int
 sysctl_mif6table(SYSCTL_HANDLER_ARGS)
 {
 	struct mif6_sctl *out;
 	int error;
 
 	out = malloc(sizeof(struct mif6_sctl) * MAXMIFS, M_TEMP, M_WAITOK);
 	for (int i = 0; i < MAXMIFS; i++) {
 		out[i].m6_flags		= mif6table[i].m6_flags;
 		out[i].m6_rate_limit	= mif6table[i].m6_rate_limit;
 		out[i].m6_lcl_addr	= mif6table[i].m6_lcl_addr;
 		if (mif6table[i].m6_ifp != NULL)
 			out[i].m6_ifp	= mif6table[i].m6_ifp->if_index;
 		else
 			out[i].m6_ifp	= 0;
 		out[i].m6_pkt_in	= mif6table[i].m6_pkt_in;
 		out[i].m6_pkt_out	= mif6table[i].m6_pkt_out;
 		out[i].m6_bytes_in	= mif6table[i].m6_bytes_in;
 		out[i].m6_bytes_out	= mif6table[i].m6_bytes_out;
 	}
 	error = SYSCTL_OUT(req, out, sizeof(struct mif6_sctl) * MAXMIFS);
 	free(out, M_TEMP);
 	return (error);
 }
 SYSCTL_PROC(_net_inet6_ip6, OID_AUTO, mif6table, CTLTYPE_OPAQUE | CTLFLAG_RD,
     NULL, 0, sysctl_mif6table, "S,mif6_sctl[MAXMIFS]",
     "IPv6 Multicast Interfaces (struct mif6_sctl[MAXMIFS], "
     "netinet6/ip6_mroute.h)");
 
 static struct mtx mif6_mtx;
 #define	MIF6_LOCK()		mtx_lock(&mif6_mtx)
 #define	MIF6_UNLOCK()		mtx_unlock(&mif6_mtx)
 #define	MIF6_LOCK_ASSERT()	mtx_assert(&mif6_mtx, MA_OWNED)
 #define	MIF6_LOCK_INIT()	\
 	mtx_init(&mif6_mtx, "IPv6 multicast interfaces", NULL, MTX_DEF)
 #define	MIF6_LOCK_DESTROY()	mtx_destroy(&mif6_mtx)
 
 #ifdef MRT6DEBUG
 static VNET_DEFINE(u_int, mrt6debug) = 0;	/* debug level */
 #define	V_mrt6debug		VNET(mrt6debug)
 #define DEBUG_MFC	0x02
 #define DEBUG_FORWARD	0x04
 #define DEBUG_EXPIRE	0x08
 #define DEBUG_XMIT	0x10
 #define DEBUG_REG	0x20
 #define DEBUG_PIM	0x40
 #define	DEBUG_ERR	0x80
 #define	DEBUG_ANY	0x7f
 #define	MRT6_DLOG(m, fmt, ...)	\
 	if (V_mrt6debug & (m))	\
 		log(((m) & DEBUG_ERR) ? LOG_ERR: LOG_DEBUG, \
 		    "%s: " fmt "\n", __func__, ##__VA_ARGS__)
 #else
 #define	MRT6_DLOG(m, fmt, ...)
 #endif
 
 static void	expire_upcalls(void *);
 #define	EXPIRE_TIMEOUT	(hz / 4)	/* 4x / second */
 #define	UPCALL_EXPIRE	6		/* number of timeouts */
 
 /*
  * XXX TODO: maintain a count to if_allmulti() calls in struct ifnet.
  */
 
 /*
  * 'Interfaces' associated with decapsulator (so we can tell
  * packets that went through it from ones that get reflected
  * by a broken gateway).  Different from IPv4 register_if,
  * these interfaces are linked into the system ifnet list,
  * because per-interface IPv6 statistics are maintained in
  * ifp->if_afdata.  But it does not have any routes point
  * to them.  I.e., packets can't be sent this way.  They
  * only exist as a placeholder for multicast source
  * verification.
  */
 static struct ifnet *multicast_register_if6;
 
 #define ENCAP_HOPS 64
 
 /*
  * Private variables.
  */
 static mifi_t nummifs = 0;
 static mifi_t reg_mif_num = (mifi_t)-1;
 
 static struct pim6stat pim6stat;
 SYSCTL_STRUCT(_net_inet6_pim, PIM6CTL_STATS, stats, CTLFLAG_RW,
     &pim6stat, pim6stat,
     "PIM Statistics (struct pim6stat, netinet6/pim6_var.h)");
 
 #define	PIM6STAT_INC(name)	pim6stat.name += 1
 static VNET_DEFINE(int, pim6);
 #define	V_pim6		VNET(pim6)
 
 /*
  * Hash function for a source, group entry
  */
 #define MF6CHASH(a, g) MF6CHASHMOD((a).s6_addr32[0] ^ (a).s6_addr32[1] ^ \
 				   (a).s6_addr32[2] ^ (a).s6_addr32[3] ^ \
 				   (g).s6_addr32[0] ^ (g).s6_addr32[1] ^ \
 				   (g).s6_addr32[2] ^ (g).s6_addr32[3])
 
 /*
  * Find a route for a given origin IPv6 address and Multicast group address.
  */
 #define MF6CFIND(o, g, rt) do { \
 	struct mf6c *_rt = mf6ctable[MF6CHASH(o,g)]; \
 	rt = NULL; \
 	while (_rt) { \
 		if (IN6_ARE_ADDR_EQUAL(&_rt->mf6c_origin.sin6_addr, &(o)) && \
 		    IN6_ARE_ADDR_EQUAL(&_rt->mf6c_mcastgrp.sin6_addr, &(g)) && \
 		    (_rt->mf6c_stall == NULL)) { \
 			rt = _rt; \
 			break; \
 		} \
 		_rt = _rt->mf6c_next; \
 	} \
 	if (rt == NULL) { \
 		MRT6STAT_INC(mrt6s_mfc_misses); \
 	} \
 } while (/*CONSTCOND*/ 0)
 
 /*
  * Macros to compute elapsed time efficiently
  * Borrowed from Van Jacobson's scheduling code
  * XXX: replace with timersub() ?
  */
 #define TV_DELTA(a, b, delta) do { \
 	    int xxs; \
 		\
 	    delta = (a).tv_usec - (b).tv_usec; \
 	    if ((xxs = (a).tv_sec - (b).tv_sec)) { \
 	       switch (xxs) { \
 		      case 2: \
 			  delta += 1000000; \
 			      /* FALLTHROUGH */ \
 		      case 1: \
 			  delta += 1000000; \
 			  break; \
 		      default: \
 			  delta += (1000000 * xxs); \
 	       } \
 	    } \
 } while (/*CONSTCOND*/ 0)
 
 /* XXX: replace with timercmp(a, b, <) ? */
 #define TV_LT(a, b) (((a).tv_usec < (b).tv_usec && \
 	      (a).tv_sec <= (b).tv_sec) || (a).tv_sec < (b).tv_sec)
 
 #ifdef UPCALL_TIMING
 #define UPCALL_MAX	50
 static u_long upcall_data[UPCALL_MAX + 1];
 static void collate();
 #endif /* UPCALL_TIMING */
 
 static int ip6_mrouter_init(struct socket *, int, int);
 static int add_m6fc(struct mf6cctl *);
 static int add_m6if(struct mif6ctl *);
 static int del_m6fc(struct mf6cctl *);
 static int del_m6if(mifi_t *);
 static int del_m6if_locked(mifi_t *);
 static int get_mif6_cnt(struct sioc_mif_req6 *);
 static int get_sg_cnt(struct sioc_sg_req6 *);
 
 static struct callout expire_upcalls_ch;
 
 int X_ip6_mforward(struct ip6_hdr *, struct ifnet *, struct mbuf *);
 int X_ip6_mrouter_done(void);
 int X_ip6_mrouter_set(struct socket *, struct sockopt *);
 int X_ip6_mrouter_get(struct socket *, struct sockopt *);
 int X_mrt6_ioctl(u_long, caddr_t);
 
 /*
  * Handle MRT setsockopt commands to modify the multicast routing tables.
  */
 int
 X_ip6_mrouter_set(struct socket *so, struct sockopt *sopt)
 {
 	int error = 0;
 	int optval;
 	struct mif6ctl mifc;
 	struct mf6cctl mfcc;
 	mifi_t mifi;
 
 	if (so != V_ip6_mrouter && sopt->sopt_name != MRT6_INIT)
 		return (EPERM);
 
 	switch (sopt->sopt_name) {
 	case MRT6_INIT:
 #ifdef MRT6_OINIT
 	case MRT6_OINIT:
 #endif
 		error = sooptcopyin(sopt, &optval, sizeof(optval),
 		    sizeof(optval));
 		if (error)
 			break;
 		error = ip6_mrouter_init(so, optval, sopt->sopt_name);
 		break;
 	case MRT6_DONE:
 		error = X_ip6_mrouter_done();
 		break;
 	case MRT6_ADD_MIF:
 		error = sooptcopyin(sopt, &mifc, sizeof(mifc), sizeof(mifc));
 		if (error)
 			break;
 		error = add_m6if(&mifc);
 		break;
 	case MRT6_ADD_MFC:
 		error = sooptcopyin(sopt, &mfcc, sizeof(mfcc), sizeof(mfcc));
 		if (error)
 			break;
 		error = add_m6fc(&mfcc);
 		break;
 	case MRT6_DEL_MFC:
 		error = sooptcopyin(sopt, &mfcc, sizeof(mfcc), sizeof(mfcc));
 		if (error)
 			break;
 		error = del_m6fc(&mfcc);
 		break;
 	case MRT6_DEL_MIF:
 		error = sooptcopyin(sopt, &mifi, sizeof(mifi), sizeof(mifi));
 		if (error)
 			break;
 		error = del_m6if(&mifi);
 		break;
 	case MRT6_PIM:
 		error = sooptcopyin(sopt, &optval, sizeof(optval),
 		    sizeof(optval));
 		if (error)
 			break;
 		error = set_pim6(&optval);
 		break;
 	default:
 		error = EOPNOTSUPP;
 		break;
 	}
 
 	return (error);
 }
 
 /*
  * Handle MRT getsockopt commands
  */
 int
 X_ip6_mrouter_get(struct socket *so, struct sockopt *sopt)
 {
 	int error = 0;
 
 	if (so != V_ip6_mrouter)
 		return (EACCES);
 
 	switch (sopt->sopt_name) {
 		case MRT6_PIM:
 			error = sooptcopyout(sopt, &V_pim6, sizeof(V_pim6));
 			break;
 	}
 	return (error);
 }
 
 /*
  * Handle ioctl commands to obtain information from the cache
  */
 int
 X_mrt6_ioctl(u_long cmd, caddr_t data)
 {
 	int ret;
 
 	ret = EINVAL;
 
 	switch (cmd) {
 	case SIOCGETSGCNT_IN6:
 		ret = get_sg_cnt((struct sioc_sg_req6 *)data);
 		break;
 
 	case SIOCGETMIFCNT_IN6:
 		ret = get_mif6_cnt((struct sioc_mif_req6 *)data);
 		break;
 
 	default:
 		break;
 	}
 
 	return (ret);
 }
 
 /*
  * returns the packet, byte, rpf-failure count for the source group provided
  */
 static int
 get_sg_cnt(struct sioc_sg_req6 *req)
 {
 	struct mf6c *rt;
 	int ret;
 
 	ret = 0;
 
 	MFC6_LOCK();
 
 	MF6CFIND(req->src.sin6_addr, req->grp.sin6_addr, rt);
 	if (rt == NULL) {
 		ret = ESRCH;
 	} else {
 		req->pktcnt = rt->mf6c_pkt_cnt;
 		req->bytecnt = rt->mf6c_byte_cnt;
 		req->wrong_if = rt->mf6c_wrong_if;
 	}
 
 	MFC6_UNLOCK();
 
 	return (ret);
 }
 
 /*
  * returns the input and output packet and byte counts on the mif provided
  */
 static int
 get_mif6_cnt(struct sioc_mif_req6 *req)
 {
 	mifi_t mifi;
 	int ret;
 
 	ret = 0;
 	mifi = req->mifi;
 
 	MIF6_LOCK();
 
 	if (mifi >= nummifs) {
 		ret = EINVAL;
 	} else {
 		req->icount = mif6table[mifi].m6_pkt_in;
 		req->ocount = mif6table[mifi].m6_pkt_out;
 		req->ibytes = mif6table[mifi].m6_bytes_in;
 		req->obytes = mif6table[mifi].m6_bytes_out;
 	}
 
 	MIF6_UNLOCK();
 
 	return (ret);
 }
 
 static int
 set_pim6(int *i)
 {
 	if ((*i != 1) && (*i != 0))
 		return (EINVAL);
 
 	V_pim6 = *i;
 
 	return (0);
 }
 
 /*
  * Enable multicast routing
  */
 static int
 ip6_mrouter_init(struct socket *so, int v, int cmd)
 {
 
 	MRT6_DLOG(DEBUG_ANY, "so_type = %d, pr_protocol = %d",
 	    so->so_type, so->so_proto->pr_protocol);
 
 	if (so->so_type != SOCK_RAW ||
 	    so->so_proto->pr_protocol != IPPROTO_ICMPV6)
 		return (EOPNOTSUPP);
 
 	if (v != 1)
 		return (ENOPROTOOPT);
 
 	MROUTER6_LOCK();
 
 	if (V_ip6_mrouter != NULL) {
 		MROUTER6_UNLOCK();
 		return (EADDRINUSE);
 	}
 
 	V_ip6_mrouter = so;
 	V_ip6_mrouter_ver = cmd;
 
 	bzero((caddr_t)mf6ctable, sizeof(mf6ctable));
 	bzero((caddr_t)n6expire, sizeof(n6expire));
 
 	V_pim6 = 0;/* used for stubbing out/in pim stuff */
 
 	callout_init(&expire_upcalls_ch, 0);
 	callout_reset(&expire_upcalls_ch, EXPIRE_TIMEOUT,
 	    expire_upcalls, NULL);
 
 	MROUTER6_UNLOCK();
 	MRT6_DLOG(DEBUG_ANY, "finished");
 
 	return (0);
 }
 
 /*
  * Disable IPv6 multicast forwarding.
  */
 int
 X_ip6_mrouter_done(void)
 {
 	mifi_t mifi;
 	u_long i;
 	struct mf6c *rt;
 	struct rtdetq *rte;
 
 	MROUTER6_LOCK();
 
 	if (V_ip6_mrouter == NULL) {
 		MROUTER6_UNLOCK();
 		return (EINVAL);
 	}
 
 	/*
 	 * For each phyint in use, disable promiscuous reception of all IPv6
 	 * multicasts.
 	 */
 	for (mifi = 0; mifi < nummifs; mifi++) {
 		if (mif6table[mifi].m6_ifp &&
 		    !(mif6table[mifi].m6_flags & MIFF_REGISTER)) {
 			if_allmulti(mif6table[mifi].m6_ifp, 0);
 		}
 	}
 	bzero((caddr_t)mif6table, sizeof(mif6table));
 	nummifs = 0;
 
 	V_pim6 = 0; /* used to stub out/in pim specific code */
 
 	callout_stop(&expire_upcalls_ch);
 
 	/*
 	 * Free all multicast forwarding cache entries.
 	 */
 	MFC6_LOCK();
 	for (i = 0; i < MF6CTBLSIZ; i++) {
 		rt = mf6ctable[i];
 		while (rt) {
 			struct mf6c *frt;
 
 			for (rte = rt->mf6c_stall; rte != NULL; ) {
 				struct rtdetq *n = rte->next;
 
 				m_freem(rte->m);
 				free(rte, M_MRTABLE6);
 				rte = n;
 			}
 			frt = rt;
 			rt = rt->mf6c_next;
 			free(frt, M_MRTABLE6);
 		}
 	}
 	bzero((caddr_t)mf6ctable, sizeof(mf6ctable));
 	MFC6_UNLOCK();
 
 	/*
 	 * Reset register interface
 	 */
 	if (reg_mif_num != (mifi_t)-1 && multicast_register_if6 != NULL) {
 		if_detach(multicast_register_if6);
 		if_free(multicast_register_if6);
 		reg_mif_num = (mifi_t)-1;
 		multicast_register_if6 = NULL;
 	}
 
 	V_ip6_mrouter = NULL;
 	V_ip6_mrouter_ver = 0;
 
 	MROUTER6_UNLOCK();
 	MRT6_DLOG(DEBUG_ANY, "finished");
 
 	return (0);
 }
 
 static struct sockaddr_in6 sin6 = { sizeof(sin6), AF_INET6 };
 
 /*
  * Add a mif to the mif table
  */
 static int
 add_m6if(struct mif6ctl *mifcp)
 {
 	struct mif6 *mifp;
 	struct ifnet *ifp;
 	int error;
 
 	MIF6_LOCK();
 
 	if (mifcp->mif6c_mifi >= MAXMIFS) {
 		MIF6_UNLOCK();
 		return (EINVAL);
 	}
 	mifp = mif6table + mifcp->mif6c_mifi;
 	if (mifp->m6_ifp != NULL) {
 		MIF6_UNLOCK();
 		return (EADDRINUSE); /* XXX: is it appropriate? */
 	}
 	if (mifcp->mif6c_pifi == 0 || mifcp->mif6c_pifi > V_if_index) {
 		MIF6_UNLOCK();
 		return (ENXIO);
 	}
 
 	ifp = ifnet_byindex(mifcp->mif6c_pifi);
 
 	if (mifcp->mif6c_flags & MIFF_REGISTER) {
 		if (reg_mif_num == (mifi_t)-1) {
 			ifp = if_alloc(IFT_OTHER);
 
 			if_initname(ifp, "register_mif", 0);
 			ifp->if_flags |= IFF_LOOPBACK;
 			if_attach(ifp);
 			multicast_register_if6 = ifp;
 			reg_mif_num = mifcp->mif6c_mifi;
 			/*
 			 * it is impossible to guess the ifindex of the
 			 * register interface.  So mif6c_pifi is automatically
 			 * calculated.
 			 */
 			mifcp->mif6c_pifi = ifp->if_index;
 		} else {
 			ifp = multicast_register_if6;
 		}
 	} else {
 		/* Make sure the interface supports multicast */
 		if ((ifp->if_flags & IFF_MULTICAST) == 0) {
 			MIF6_UNLOCK();
 			return (EOPNOTSUPP);
 		}
 
 		error = if_allmulti(ifp, 1);
 		if (error) {
 			MIF6_UNLOCK();
 			return (error);
 		}
 	}
 
 	mifp->m6_flags     = mifcp->mif6c_flags;
 	mifp->m6_ifp       = ifp;
 
 	/* initialize per mif pkt counters */
 	mifp->m6_pkt_in    = 0;
 	mifp->m6_pkt_out   = 0;
 	mifp->m6_bytes_in  = 0;
 	mifp->m6_bytes_out = 0;
 
 	/* Adjust nummifs up if the mifi is higher than nummifs */
 	if (nummifs <= mifcp->mif6c_mifi)
 		nummifs = mifcp->mif6c_mifi + 1;
 
 	MIF6_UNLOCK();
 	MRT6_DLOG(DEBUG_ANY, "mif #%d, phyint %s", mifcp->mif6c_mifi,
 	    if_name(ifp));
 
 	return (0);
 }
 
 /*
  * Delete a mif from the mif table
  */
 static int
 del_m6if_locked(mifi_t *mifip)
 {
 	struct mif6 *mifp = mif6table + *mifip;
 	mifi_t mifi;
 	struct ifnet *ifp;
 
 	MIF6_LOCK_ASSERT();
 
 	if (*mifip >= nummifs)
 		return (EINVAL);
 	if (mifp->m6_ifp == NULL)
 		return (EINVAL);
 
 	if (!(mifp->m6_flags & MIFF_REGISTER)) {
 		/* XXX: TODO: Maintain an ALLMULTI refcount in struct ifnet. */
 		ifp = mifp->m6_ifp;
 		if_allmulti(ifp, 0);
 	} else {
 		if (reg_mif_num != (mifi_t)-1 &&
 		    multicast_register_if6 != NULL) {
 			if_detach(multicast_register_if6);
 			if_free(multicast_register_if6);
 			reg_mif_num = (mifi_t)-1;
 			multicast_register_if6 = NULL;
 		}
 	}
 
 	bzero((caddr_t)mifp, sizeof(*mifp));
 
 	/* Adjust nummifs down */
 	for (mifi = nummifs; mifi > 0; mifi--)
 		if (mif6table[mifi - 1].m6_ifp)
 			break;
 	nummifs = mifi;
 	MRT6_DLOG(DEBUG_ANY, "mif %d, nummifs %d", *mifip, nummifs);
 
 	return (0);
 }
 
 static int
 del_m6if(mifi_t *mifip)
 {
 	int cc;
 
 	MIF6_LOCK();
 	cc = del_m6if_locked(mifip);
 	MIF6_UNLOCK();
 
 	return (cc);
 }
 
 /*
  * Add an mfc entry
  */
 static int
 add_m6fc(struct mf6cctl *mfccp)
 {
 	struct mf6c *rt;
 	u_long hash;
 	struct rtdetq *rte;
 	u_short nstl;
 	char ip6bufo[INET6_ADDRSTRLEN], ip6bufg[INET6_ADDRSTRLEN];
 
 	MFC6_LOCK();
 
 	MF6CFIND(mfccp->mf6cc_origin.sin6_addr,
 		 mfccp->mf6cc_mcastgrp.sin6_addr, rt);
 
 	/* If an entry already exists, just update the fields */
 	if (rt) {
 		MRT6_DLOG(DEBUG_MFC, "no upcall o %s g %s p %x",
 		    ip6_sprintf(ip6bufo, &mfccp->mf6cc_origin.sin6_addr),
 		    ip6_sprintf(ip6bufg, &mfccp->mf6cc_mcastgrp.sin6_addr),
 		    mfccp->mf6cc_parent);
 
 		rt->mf6c_parent = mfccp->mf6cc_parent;
 		rt->mf6c_ifset = mfccp->mf6cc_ifset;
 
 		MFC6_UNLOCK();
 		return (0);
 	}
 
 	/*
 	 * Find the entry for which the upcall was made and update
 	 */
 	hash = MF6CHASH(mfccp->mf6cc_origin.sin6_addr,
 			mfccp->mf6cc_mcastgrp.sin6_addr);
 	for (rt = mf6ctable[hash], nstl = 0; rt; rt = rt->mf6c_next) {
 		if (IN6_ARE_ADDR_EQUAL(&rt->mf6c_origin.sin6_addr,
 				       &mfccp->mf6cc_origin.sin6_addr) &&
 		    IN6_ARE_ADDR_EQUAL(&rt->mf6c_mcastgrp.sin6_addr,
 				       &mfccp->mf6cc_mcastgrp.sin6_addr) &&
 		    (rt->mf6c_stall != NULL)) {
 
 			if (nstl++)
 				log(LOG_ERR,
 				    "add_m6fc: %s o %s g %s p %x dbx %p\n",
 				    "multiple kernel entries",
 				    ip6_sprintf(ip6bufo,
 					    &mfccp->mf6cc_origin.sin6_addr),
 				    ip6_sprintf(ip6bufg,
 					    &mfccp->mf6cc_mcastgrp.sin6_addr),
 				    mfccp->mf6cc_parent, rt->mf6c_stall);
 
 			MRT6_DLOG(DEBUG_MFC, "o %s g %s p %x dbg %p",
 			    ip6_sprintf(ip6bufo,
 			    &mfccp->mf6cc_origin.sin6_addr),
 			    ip6_sprintf(ip6bufg,
 				&mfccp->mf6cc_mcastgrp.sin6_addr),
 			    mfccp->mf6cc_parent, rt->mf6c_stall);
 
 			rt->mf6c_origin     = mfccp->mf6cc_origin;
 			rt->mf6c_mcastgrp   = mfccp->mf6cc_mcastgrp;
 			rt->mf6c_parent     = mfccp->mf6cc_parent;
 			rt->mf6c_ifset	    = mfccp->mf6cc_ifset;
 			/* initialize pkt counters per src-grp */
 			rt->mf6c_pkt_cnt    = 0;
 			rt->mf6c_byte_cnt   = 0;
 			rt->mf6c_wrong_if   = 0;
 
 			rt->mf6c_expire = 0;	/* Don't clean this guy up */
 			n6expire[hash]--;
 
 			/* free packets Qed at the end of this entry */
 			for (rte = rt->mf6c_stall; rte != NULL; ) {
 				struct rtdetq *n = rte->next;
 				ip6_mdq(rte->m, rte->ifp, rt);
 				m_freem(rte->m);
 #ifdef UPCALL_TIMING
 				collate(&(rte->t));
 #endif /* UPCALL_TIMING */
 				free(rte, M_MRTABLE6);
 				rte = n;
 			}
 			rt->mf6c_stall = NULL;
 		}
 	}
 
 	/*
 	 * It is possible that an entry is being inserted without an upcall
 	 */
 	if (nstl == 0) {
 		MRT6_DLOG(DEBUG_MFC, "no upcall h %lu o %s g %s p %x", hash,
 		    ip6_sprintf(ip6bufo, &mfccp->mf6cc_origin.sin6_addr),
 		    ip6_sprintf(ip6bufg, &mfccp->mf6cc_mcastgrp.sin6_addr),
 		    mfccp->mf6cc_parent);
 
 		for (rt = mf6ctable[hash]; rt; rt = rt->mf6c_next) {
 
 			if (IN6_ARE_ADDR_EQUAL(&rt->mf6c_origin.sin6_addr,
 					       &mfccp->mf6cc_origin.sin6_addr)&&
 			    IN6_ARE_ADDR_EQUAL(&rt->mf6c_mcastgrp.sin6_addr,
 					       &mfccp->mf6cc_mcastgrp.sin6_addr)) {
 
 				rt->mf6c_origin     = mfccp->mf6cc_origin;
 				rt->mf6c_mcastgrp   = mfccp->mf6cc_mcastgrp;
 				rt->mf6c_parent     = mfccp->mf6cc_parent;
 				rt->mf6c_ifset	    = mfccp->mf6cc_ifset;
 				/* initialize pkt counters per src-grp */
 				rt->mf6c_pkt_cnt    = 0;
 				rt->mf6c_byte_cnt   = 0;
 				rt->mf6c_wrong_if   = 0;
 
 				if (rt->mf6c_expire)
 					n6expire[hash]--;
 				rt->mf6c_expire	   = 0;
 			}
 		}
 		if (rt == NULL) {
 			/* no upcall, so make a new entry */
 			rt = (struct mf6c *)malloc(sizeof(*rt), M_MRTABLE6,
 						  M_NOWAIT);
 			if (rt == NULL) {
 				MFC6_UNLOCK();
 				return (ENOBUFS);
 			}
 
 			/* insert new entry at head of hash chain */
 			rt->mf6c_origin     = mfccp->mf6cc_origin;
 			rt->mf6c_mcastgrp   = mfccp->mf6cc_mcastgrp;
 			rt->mf6c_parent     = mfccp->mf6cc_parent;
 			rt->mf6c_ifset	    = mfccp->mf6cc_ifset;
 			/* initialize pkt counters per src-grp */
 			rt->mf6c_pkt_cnt    = 0;
 			rt->mf6c_byte_cnt   = 0;
 			rt->mf6c_wrong_if   = 0;
 			rt->mf6c_expire     = 0;
 			rt->mf6c_stall = NULL;
 
 			/* link into table */
 			rt->mf6c_next  = mf6ctable[hash];
 			mf6ctable[hash] = rt;
 		}
 	}
 
 	MFC6_UNLOCK();
 	return (0);
 }
 
 #ifdef UPCALL_TIMING
 /*
  * collect delay statistics on the upcalls
  */
 static void
 collate(struct timeval *t)
 {
 	u_long d;
 	struct timeval tp;
 	u_long delta;
 
 	GET_TIME(tp);
 
 	if (TV_LT(*t, tp))
 	{
 		TV_DELTA(tp, *t, delta);
 
 		d = delta >> 10;
 		if (d > UPCALL_MAX)
 			d = UPCALL_MAX;
 
 		++upcall_data[d];
 	}
 }
 #endif /* UPCALL_TIMING */
 
 /*
  * Delete an mfc entry
  */
 static int
 del_m6fc(struct mf6cctl *mfccp)
 {
 #ifdef MRT6DEBUG
 	char ip6bufo[INET6_ADDRSTRLEN], ip6bufg[INET6_ADDRSTRLEN];
 #endif
 	struct sockaddr_in6	origin;
 	struct sockaddr_in6	mcastgrp;
 	struct mf6c		*rt;
 	struct mf6c		**nptr;
 	u_long		hash;
 
 	origin = mfccp->mf6cc_origin;
 	mcastgrp = mfccp->mf6cc_mcastgrp;
 	hash = MF6CHASH(origin.sin6_addr, mcastgrp.sin6_addr);
 
 	MRT6_DLOG(DEBUG_MFC, "orig %s mcastgrp %s",
 	    ip6_sprintf(ip6bufo, &origin.sin6_addr),
 	    ip6_sprintf(ip6bufg, &mcastgrp.sin6_addr));
 
 	MFC6_LOCK();
 
 	nptr = &mf6ctable[hash];
 	while ((rt = *nptr) != NULL) {
 		if (IN6_ARE_ADDR_EQUAL(&origin.sin6_addr,
 				       &rt->mf6c_origin.sin6_addr) &&
 		    IN6_ARE_ADDR_EQUAL(&mcastgrp.sin6_addr,
 				       &rt->mf6c_mcastgrp.sin6_addr) &&
 		    rt->mf6c_stall == NULL)
 			break;
 
 		nptr = &rt->mf6c_next;
 	}
 	if (rt == NULL) {
 		MFC6_UNLOCK();
 		return (EADDRNOTAVAIL);
 	}
 
 	*nptr = rt->mf6c_next;
 	free(rt, M_MRTABLE6);
 
 	MFC6_UNLOCK();
 
 	return (0);
 }
 
 static int
 socket_send(struct socket *s, struct mbuf *mm, struct sockaddr_in6 *src)
 {
 
 	if (s) {
 		if (sbappendaddr(&s->so_rcv,
 				 (struct sockaddr *)src,
 				 mm, (struct mbuf *)0) != 0) {
 			sorwakeup(s);
 			return (0);
 		}
 	}
 	m_freem(mm);
 	return (-1);
 }
 
 /*
  * IPv6 multicast forwarding function. This function assumes that the packet
  * pointed to by "ip6" has arrived on (or is about to be sent to) the interface
  * pointed to by "ifp", and the packet is to be relayed to other networks
  * that have members of the packet's destination IPv6 multicast group.
  *
  * The packet is returned unscathed to the caller, unless it is
  * erroneous, in which case a non-zero return value tells the caller to
  * discard it.
  *
  * NOTE: this implementation assumes that m->m_pkthdr.rcvif is NULL iff
  * this function is called in the originating context (i.e., not when
  * forwarding a packet from other node).  ip6_output(), which is currently the
  * only function that calls this function is called in the originating context,
  * explicitly ensures this condition.  It is caller's responsibility to ensure
  * that if this function is called from somewhere else in the originating
  * context in the future.
  */
 int
 X_ip6_mforward(struct ip6_hdr *ip6, struct ifnet *ifp, struct mbuf *m)
 {
 	struct rtdetq *rte;
 	struct mbuf *mb0;
 	struct mf6c *rt;
 	struct mif6 *mifp;
 	struct mbuf *mm;
 	u_long hash;
 	mifi_t mifi;
 	char ip6bufs[INET6_ADDRSTRLEN], ip6bufd[INET6_ADDRSTRLEN];
 #ifdef UPCALL_TIMING
 	struct timeval tp;
 
 	GET_TIME(tp);
 #endif /* UPCALL_TIMING */
 
 	MRT6_DLOG(DEBUG_FORWARD, "src %s, dst %s, ifindex %d",
 	    ip6_sprintf(ip6bufs, &ip6->ip6_src),
 	    ip6_sprintf(ip6bufd, &ip6->ip6_dst), ifp->if_index);
 
 	/*
 	 * Don't forward a packet with Hop limit of zero or one,
 	 * or a packet destined to a local-only group.
 	 */
 	if (ip6->ip6_hlim <= 1 || IN6_IS_ADDR_MC_INTFACELOCAL(&ip6->ip6_dst) ||
 	    IN6_IS_ADDR_MC_LINKLOCAL(&ip6->ip6_dst))
 		return (0);
 	ip6->ip6_hlim--;
 
 	/*
 	 * Source address check: do not forward packets with unspecified
 	 * source. It was discussed in July 2000, on ipngwg mailing list.
 	 * This is rather more serious than unicast cases, because some
 	 * MLD packets can be sent with the unspecified source address
 	 * (although such packets must normally set 1 to the hop limit field).
 	 */
 	if (IN6_IS_ADDR_UNSPECIFIED(&ip6->ip6_src)) {
 		IP6STAT_INC(ip6s_cantforward);
 		if (V_ip6_log_time + V_ip6_log_interval < time_uptime) {
 			V_ip6_log_time = time_uptime;
 			log(LOG_DEBUG,
 			    "cannot forward "
 			    "from %s to %s nxt %d received on %s\n",
 			    ip6_sprintf(ip6bufs, &ip6->ip6_src),
 			    ip6_sprintf(ip6bufd, &ip6->ip6_dst),
 			    ip6->ip6_nxt,
 			    if_name(m->m_pkthdr.rcvif));
 		}
 		return (0);
 	}
 
 	MFC6_LOCK();
 
 	/*
 	 * Determine forwarding mifs from the forwarding cache table
 	 */
 	MF6CFIND(ip6->ip6_src, ip6->ip6_dst, rt);
 	MRT6STAT_INC(mrt6s_mfc_lookups);
 
 	/* Entry exists, so forward if necessary */
 	if (rt) {
 		MFC6_UNLOCK();
 		return (ip6_mdq(m, ifp, rt));
 	}
 
 	/*
 	 * If we don't have a route for packet's origin,
 	 * Make a copy of the packet & send message to routing daemon.
 	 */
 	MRT6STAT_INC(mrt6s_no_route);
 	MRT6_DLOG(DEBUG_FORWARD | DEBUG_MFC, "no rte s %s g %s",
 	    ip6_sprintf(ip6bufs, &ip6->ip6_src),
 	    ip6_sprintf(ip6bufd, &ip6->ip6_dst));
 
 	/*
 	 * Allocate mbufs early so that we don't do extra work if we
 	 * are just going to fail anyway.
 	 */
 	rte = (struct rtdetq *)malloc(sizeof(*rte), M_MRTABLE6, M_NOWAIT);
 	if (rte == NULL) {
 		MFC6_UNLOCK();
 		return (ENOBUFS);
 	}
-	mb0 = m_copy(m, 0, M_COPYALL);
+	mb0 = m_copym(m, 0, M_COPYALL, M_NOWAIT);
 	/*
 	 * Pullup packet header if needed before storing it,
 	 * as other references may modify it in the meantime.
 	 */
 	if (mb0 && (!M_WRITABLE(mb0) || mb0->m_len < sizeof(struct ip6_hdr)))
 		mb0 = m_pullup(mb0, sizeof(struct ip6_hdr));
 	if (mb0 == NULL) {
 		free(rte, M_MRTABLE6);
 		MFC6_UNLOCK();
 		return (ENOBUFS);
 	}
 
 	/* is there an upcall waiting for this packet? */
 	hash = MF6CHASH(ip6->ip6_src, ip6->ip6_dst);
 	for (rt = mf6ctable[hash]; rt; rt = rt->mf6c_next) {
 		if (IN6_ARE_ADDR_EQUAL(&ip6->ip6_src,
 		    &rt->mf6c_origin.sin6_addr) &&
 		    IN6_ARE_ADDR_EQUAL(&ip6->ip6_dst,
 		    &rt->mf6c_mcastgrp.sin6_addr) && (rt->mf6c_stall != NULL))
 			break;
 	}
 
 	if (rt == NULL) {
 		struct mrt6msg *im;
 #ifdef MRT6_OINIT
 		struct omrt6msg *oim;
 #endif
 		/* no upcall, so make a new entry */
 		rt = (struct mf6c *)malloc(sizeof(*rt), M_MRTABLE6, M_NOWAIT);
 		if (rt == NULL) {
 			free(rte, M_MRTABLE6);
 			m_freem(mb0);
 			MFC6_UNLOCK();
 			return (ENOBUFS);
 		}
 		/*
 		 * Make a copy of the header to send to the user
 		 * level process
 		 */
-		mm = m_copy(mb0, 0, sizeof(struct ip6_hdr));
+		mm = m_copym(mb0, 0, sizeof(struct ip6_hdr), M_NOWAIT);
 		if (mm == NULL) {
 			free(rte, M_MRTABLE6);
 			m_freem(mb0);
 			free(rt, M_MRTABLE6);
 			MFC6_UNLOCK();
 			return (ENOBUFS);
 		}
 
 		/*
 		 * Send message to routing daemon
 		 */
 		sin6.sin6_addr = ip6->ip6_src;
 		im = NULL;
 #ifdef MRT6_OINIT
 		oim = NULL;
 #endif
 		switch (V_ip6_mrouter_ver) {
 #ifdef MRT6_OINIT
 		case MRT6_OINIT:
 			oim = mtod(mm, struct omrt6msg *);
 			oim->im6_msgtype = MRT6MSG_NOCACHE;
 			oim->im6_mbz = 0;
 			break;
 #endif
 		case MRT6_INIT:
 			im = mtod(mm, struct mrt6msg *);
 			im->im6_msgtype = MRT6MSG_NOCACHE;
 			im->im6_mbz = 0;
 			break;
 		default:
 			free(rte, M_MRTABLE6);
 			m_freem(mb0);
 			free(rt, M_MRTABLE6);
 			MFC6_UNLOCK();
 			return (EINVAL);
 		}
 
 		MRT6_DLOG(DEBUG_FORWARD, "getting the iif info in the kernel");
 		for (mifp = mif6table, mifi = 0;
 		    mifi < nummifs && mifp->m6_ifp != ifp; mifp++, mifi++)
 				;
 
 		switch (V_ip6_mrouter_ver) {
 #ifdef MRT6_OINIT
 		case MRT6_OINIT:
 			oim->im6_mif = mifi;
 			break;
 #endif
 		case MRT6_INIT:
 			im->im6_mif = mifi;
 			break;
 		}
 
 		if (socket_send(V_ip6_mrouter, mm, &sin6) < 0) {
 			log(LOG_WARNING, "ip6_mforward: ip6_mrouter "
 			    "socket queue full\n");
 			MRT6STAT_INC(mrt6s_upq_sockfull);
 			free(rte, M_MRTABLE6);
 			m_freem(mb0);
 			free(rt, M_MRTABLE6);
 			MFC6_UNLOCK();
 			return (ENOBUFS);
 		}
 
 		MRT6STAT_INC(mrt6s_upcalls);
 
 		/* insert new entry at head of hash chain */
 		bzero(rt, sizeof(*rt));
 		rt->mf6c_origin.sin6_family = AF_INET6;
 		rt->mf6c_origin.sin6_len = sizeof(struct sockaddr_in6);
 		rt->mf6c_origin.sin6_addr = ip6->ip6_src;
 		rt->mf6c_mcastgrp.sin6_family = AF_INET6;
 		rt->mf6c_mcastgrp.sin6_len = sizeof(struct sockaddr_in6);
 		rt->mf6c_mcastgrp.sin6_addr = ip6->ip6_dst;
 		rt->mf6c_expire = UPCALL_EXPIRE;
 		n6expire[hash]++;
 		rt->mf6c_parent = MF6C_INCOMPLETE_PARENT;
 
 		/* link into table */
 		rt->mf6c_next  = mf6ctable[hash];
 		mf6ctable[hash] = rt;
 		/* Add this entry to the end of the queue */
 		rt->mf6c_stall = rte;
 	} else {
 		/* determine if q has overflowed */
 		struct rtdetq **p;
 		int npkts = 0;
 
 		for (p = &rt->mf6c_stall; *p != NULL; p = &(*p)->next)
 			if (++npkts > MAX_UPQ6) {
 				MRT6STAT_INC(mrt6s_upq_ovflw);
 				free(rte, M_MRTABLE6);
 				m_freem(mb0);
 				MFC6_UNLOCK();
 				return (0);
 			}
 
 		/* Add this entry to the end of the queue */
 		*p = rte;
 	}
 
 	rte->next = NULL;
 	rte->m = mb0;
 	rte->ifp = ifp;
 #ifdef UPCALL_TIMING
 	rte->t = tp;
 #endif /* UPCALL_TIMING */
 
 	MFC6_UNLOCK();
 
 	return (0);
 }
 
 /*
  * Clean up cache entries if upcalls are not serviced
  * Call from the Slow Timeout mechanism, every half second.
  */
 static void
 expire_upcalls(void *unused)
 {
 #ifdef MRT6DEBUG
 	char ip6bufo[INET6_ADDRSTRLEN], ip6bufg[INET6_ADDRSTRLEN];
 #endif
 	struct rtdetq *rte;
 	struct mf6c *mfc, **nptr;
 	u_long i;
 
 	MFC6_LOCK();
 	for (i = 0; i < MF6CTBLSIZ; i++) {
 		if (n6expire[i] == 0)
 			continue;
 		nptr = &mf6ctable[i];
 		while ((mfc = *nptr) != NULL) {
 			rte = mfc->mf6c_stall;
 			/*
 			 * Skip real cache entries
 			 * Make sure it wasn't marked to not expire (shouldn't happen)
 			 * If it expires now
 			 */
 			if (rte != NULL &&
 			    mfc->mf6c_expire != 0 &&
 			    --mfc->mf6c_expire == 0) {
 				MRT6_DLOG(DEBUG_EXPIRE, "expiring (%s %s)",
 				    ip6_sprintf(ip6bufo, &mfc->mf6c_origin.sin6_addr),
 				    ip6_sprintf(ip6bufg, &mfc->mf6c_mcastgrp.sin6_addr));
 				/*
 				 * drop all the packets
 				 * free the mbuf with the pkt, if, timing info
 				 */
 				do {
 					struct rtdetq *n = rte->next;
 					m_freem(rte->m);
 					free(rte, M_MRTABLE6);
 					rte = n;
 				} while (rte != NULL);
 				MRT6STAT_INC(mrt6s_cache_cleanups);
 				n6expire[i]--;
 
 				*nptr = mfc->mf6c_next;
 				free(mfc, M_MRTABLE6);
 			} else {
 				nptr = &mfc->mf6c_next;
 			}
 		}
 	}
 	MFC6_UNLOCK();
 	callout_reset(&expire_upcalls_ch, EXPIRE_TIMEOUT,
 	    expire_upcalls, NULL);
 }
 
 /*
  * Packet forwarding routine once entry in the cache is made
  */
 static int
 ip6_mdq(struct mbuf *m, struct ifnet *ifp, struct mf6c *rt)
 {
 	struct ip6_hdr *ip6 = mtod(m, struct ip6_hdr *);
 	mifi_t mifi, iif;
 	struct mif6 *mifp;
 	int plen = m->m_pkthdr.len;
 	struct in6_addr src0, dst0; /* copies for local work */
 	u_int32_t iszone, idzone, oszone, odzone;
 	int error = 0;
 
 /*
  * Macro to send packet on mif.  Since RSVP packets don't get counted on
  * input, they shouldn't get counted on output, so statistics keeping is
  * separate.
  */
 
 #define MC6_SEND(ip6, mifp, m) do {				\
 	if ((mifp)->m6_flags & MIFF_REGISTER)			\
 		register_send((ip6), (mifp), (m));		\
 	else							\
 		phyint_send((ip6), (mifp), (m));		\
 } while (/*CONSTCOND*/ 0)
 
 	/*
 	 * Don't forward if it didn't arrive from the parent mif
 	 * for its origin.
 	 */
 	mifi = rt->mf6c_parent;
 	if ((mifi >= nummifs) || (mif6table[mifi].m6_ifp != ifp)) {
 		/* came in the wrong interface */
 		MRT6_DLOG(DEBUG_FORWARD,
 		    "wrong if: ifid %d mifi %d mififid %x", ifp->if_index,
 		    mifi, mif6table[mifi].m6_ifp->if_index);
 		MRT6STAT_INC(mrt6s_wrong_if);
 		rt->mf6c_wrong_if++;
 		/*
 		 * If we are doing PIM processing, and we are forwarding
 		 * packets on this interface, send a message to the
 		 * routing daemon.
 		 */
 		/* have to make sure this is a valid mif */
 		if (mifi < nummifs && mif6table[mifi].m6_ifp)
 			if (V_pim6 && (m->m_flags & M_LOOP) == 0) {
 				/*
 				 * Check the M_LOOP flag to avoid an
 				 * unnecessary PIM assert.
 				 * XXX: M_LOOP is an ad-hoc hack...
 				 */
 				static struct sockaddr_in6 sin6 =
 				{ sizeof(sin6), AF_INET6 };
 
 				struct mbuf *mm;
 				struct mrt6msg *im;
 #ifdef MRT6_OINIT
 				struct omrt6msg *oim;
 #endif
 
-				mm = m_copy(m, 0, sizeof(struct ip6_hdr));
+				mm = m_copym(m, 0, sizeof(struct ip6_hdr),
+				    M_NOWAIT);
 				if (mm &&
 				    (!M_WRITABLE(mm) ||
 				     mm->m_len < sizeof(struct ip6_hdr)))
 					mm = m_pullup(mm, sizeof(struct ip6_hdr));
 				if (mm == NULL)
 					return (ENOBUFS);
 
 #ifdef MRT6_OINIT
 				oim = NULL;
 #endif
 				im = NULL;
 				switch (V_ip6_mrouter_ver) {
 #ifdef MRT6_OINIT
 				case MRT6_OINIT:
 					oim = mtod(mm, struct omrt6msg *);
 					oim->im6_msgtype = MRT6MSG_WRONGMIF;
 					oim->im6_mbz = 0;
 					break;
 #endif
 				case MRT6_INIT:
 					im = mtod(mm, struct mrt6msg *);
 					im->im6_msgtype = MRT6MSG_WRONGMIF;
 					im->im6_mbz = 0;
 					break;
 				default:
 					m_freem(mm);
 					return (EINVAL);
 				}
 
 				for (mifp = mif6table, iif = 0;
 				     iif < nummifs && mifp &&
 					     mifp->m6_ifp != ifp;
 				     mifp++, iif++)
 					;
 
 				switch (V_ip6_mrouter_ver) {
 #ifdef MRT6_OINIT
 				case MRT6_OINIT:
 					oim->im6_mif = iif;
 					sin6.sin6_addr = oim->im6_src;
 					break;
 #endif
 				case MRT6_INIT:
 					im->im6_mif = iif;
 					sin6.sin6_addr = im->im6_src;
 					break;
 				}
 
 				MRT6STAT_INC(mrt6s_upcalls);
 
 				if (socket_send(V_ip6_mrouter, mm, &sin6) < 0) {
 					MRT6_DLOG(DEBUG_ANY,
 					    "ip6_mrouter socket queue full");
 					MRT6STAT_INC(mrt6s_upq_sockfull);
 					return (ENOBUFS);
 				}	/* if socket Q full */
 			}		/* if PIM */
 		return (0);
 	}			/* if wrong iif */
 
 	/* If I sourced this packet, it counts as output, else it was input. */
 	if (m->m_pkthdr.rcvif == NULL) {
 		/* XXX: is rcvif really NULL when output?? */
 		mif6table[mifi].m6_pkt_out++;
 		mif6table[mifi].m6_bytes_out += plen;
 	} else {
 		mif6table[mifi].m6_pkt_in++;
 		mif6table[mifi].m6_bytes_in += plen;
 	}
 	rt->mf6c_pkt_cnt++;
 	rt->mf6c_byte_cnt += plen;
 
 	/*
 	 * For each mif, forward a copy of the packet if there are group
 	 * members downstream on the interface.
 	 */
 	src0 = ip6->ip6_src;
 	dst0 = ip6->ip6_dst;
 	if ((error = in6_setscope(&src0, ifp, &iszone)) != 0 ||
 	    (error = in6_setscope(&dst0, ifp, &idzone)) != 0) {
 		IP6STAT_INC(ip6s_badscope);
 		return (error);
 	}
 	for (mifp = mif6table, mifi = 0; mifi < nummifs; mifp++, mifi++) {
 		if (IF_ISSET(mifi, &rt->mf6c_ifset)) {
 			/*
 			 * check if the outgoing packet is going to break
 			 * a scope boundary.
 			 * XXX For packets through PIM register tunnel
 			 * interface, we believe a routing daemon.
 			 */
 			if (!(mif6table[rt->mf6c_parent].m6_flags &
 			      MIFF_REGISTER) &&
 			    !(mif6table[mifi].m6_flags & MIFF_REGISTER)) {
 				if (in6_setscope(&src0, mif6table[mifi].m6_ifp,
 				    &oszone) ||
 				    in6_setscope(&dst0, mif6table[mifi].m6_ifp,
 				    &odzone) ||
 				    iszone != oszone ||
 				    idzone != odzone) {
 					IP6STAT_INC(ip6s_badscope);
 					continue;
 				}
 			}
 
 			mifp->m6_pkt_out++;
 			mifp->m6_bytes_out += plen;
 			MC6_SEND(ip6, mifp, m);
 		}
 	}
 	return (0);
 }
 
 static void
 phyint_send(struct ip6_hdr *ip6, struct mif6 *mifp, struct mbuf *m)
 {
 #ifdef MRT6DEBUG
 	char ip6bufs[INET6_ADDRSTRLEN], ip6bufd[INET6_ADDRSTRLEN];
 #endif
 	struct mbuf *mb_copy;
 	struct ifnet *ifp = mifp->m6_ifp;
 	int error = 0;
 	u_long linkmtu;
 
 	/*
 	 * Make a new reference to the packet; make sure that
 	 * the IPv6 header is actually copied, not just referenced,
 	 * so that ip6_output() only scribbles on the copy.
 	 */
-	mb_copy = m_copy(m, 0, M_COPYALL);
+	mb_copy = m_copym(m, 0, M_COPYALL, M_NOWAIT);
 	if (mb_copy &&
 	    (!M_WRITABLE(mb_copy) || mb_copy->m_len < sizeof(struct ip6_hdr)))
 		mb_copy = m_pullup(mb_copy, sizeof(struct ip6_hdr));
 	if (mb_copy == NULL) {
 		return;
 	}
 	/* set MCAST flag to the outgoing packet */
 	mb_copy->m_flags |= M_MCAST;
 
 	/*
 	 * If we sourced the packet, call ip6_output since we may devide
 	 * the packet into fragments when the packet is too big for the
 	 * outgoing interface.
 	 * Otherwise, we can simply send the packet to the interface
 	 * sending queue.
 	 */
 	if (m->m_pkthdr.rcvif == NULL) {
 		struct ip6_moptions im6o;
 
 		im6o.im6o_multicast_ifp = ifp;
 		/* XXX: ip6_output will override ip6->ip6_hlim */
 		im6o.im6o_multicast_hlim = ip6->ip6_hlim;
 		im6o.im6o_multicast_loop = 1;
 		error = ip6_output(mb_copy, NULL, NULL, IPV6_FORWARDING, &im6o,
 		    NULL, NULL);
 
 		MRT6_DLOG(DEBUG_XMIT, "mif %u err %d",
 		    (uint16_t)(mifp - mif6table), error);
 		return;
 	}
 
 	/*
 	 * If configured to loop back multicasts by default,
 	 * loop back a copy now.
 	 */
 	if (in6_mcast_loop)
 		ip6_mloopback(ifp, m);
 
 	/*
 	 * Put the packet into the sending queue of the outgoing interface
 	 * if it would fit in the MTU of the interface.
 	 */
 	linkmtu = IN6_LINKMTU(ifp);
 	if (mb_copy->m_pkthdr.len <= linkmtu || linkmtu < IPV6_MMTU) {
 		struct sockaddr_in6 dst6;
 
 		bzero(&dst6, sizeof(dst6));
 		dst6.sin6_len = sizeof(struct sockaddr_in6);
 		dst6.sin6_family = AF_INET6;
 		dst6.sin6_addr = ip6->ip6_dst;
 
 		IP_PROBE(send, NULL, NULL, ip6, ifp, NULL, ip6);
 		/*
 		 * We just call if_output instead of nd6_output here, since
 		 * we need no ND for a multicast forwarded packet...right?
 		 */
 		m_clrprotoflags(m);	/* Avoid confusing lower layers. */
 		error = (*ifp->if_output)(ifp, mb_copy,
 		    (struct sockaddr *)&dst6, NULL);
 		MRT6_DLOG(DEBUG_XMIT, "mif %u err %d",
 		    (uint16_t)(mifp - mif6table), error);
 	} else {
 		/*
 		 * pMTU discovery is intentionally disabled by default, since
 		 * various router may notify pMTU in multicast, which can be
 		 * a DDoS to a router
 		 */
 		if (V_ip6_mcast_pmtu)
 			icmp6_error(mb_copy, ICMP6_PACKET_TOO_BIG, 0, linkmtu);
 		else {
 			MRT6_DLOG(DEBUG_XMIT, " packet too big on %s o %s "
 			    "g %s size %d (discarded)", if_name(ifp),
 			    ip6_sprintf(ip6bufs, &ip6->ip6_src),
 			    ip6_sprintf(ip6bufd, &ip6->ip6_dst),
 			    mb_copy->m_pkthdr.len);
 			m_freem(mb_copy); /* simply discard the packet */
 		}
 	}
 }
 
 static int
 register_send(struct ip6_hdr *ip6, struct mif6 *mif, struct mbuf *m)
 {
 #ifdef MRT6DEBUG
 	char ip6bufs[INET6_ADDRSTRLEN], ip6bufd[INET6_ADDRSTRLEN];
 #endif
 	struct mbuf *mm;
 	int i, len = m->m_pkthdr.len;
 	static struct sockaddr_in6 sin6 = { sizeof(sin6), AF_INET6 };
 	struct mrt6msg *im6;
 
 	MRT6_DLOG(DEBUG_ANY, "src %s dst %s",
 	    ip6_sprintf(ip6bufs, &ip6->ip6_src),
 	    ip6_sprintf(ip6bufd, &ip6->ip6_dst));
 	PIM6STAT_INC(pim6s_snd_registers);
 
 	/* Make a copy of the packet to send to the user level process. */
 	mm = m_gethdr(M_NOWAIT, MT_DATA);
 	if (mm == NULL)
 		return (ENOBUFS);
 	mm->m_data += max_linkhdr;
 	mm->m_len = sizeof(struct ip6_hdr);
 
-	if ((mm->m_next = m_copy(m, 0, M_COPYALL)) == NULL) {
+	if ((mm->m_next = m_copym(m, 0, M_COPYALL, M_NOWAIT)) == NULL) {
 		m_freem(mm);
 		return (ENOBUFS);
 	}
 	i = MHLEN - M_LEADINGSPACE(mm);
 	if (i > len)
 		i = len;
 	mm = m_pullup(mm, i);
 	if (mm == NULL)
 		return (ENOBUFS);
 /* TODO: check it! */
 	mm->m_pkthdr.len = len + sizeof(struct ip6_hdr);
 
 	/*
 	 * Send message to routing daemon
 	 */
 	sin6.sin6_addr = ip6->ip6_src;
 
 	im6 = mtod(mm, struct mrt6msg *);
 	im6->im6_msgtype      = MRT6MSG_WHOLEPKT;
 	im6->im6_mbz          = 0;
 
 	im6->im6_mif = mif - mif6table;
 
 	/* iif info is not given for reg. encap.n */
 	MRT6STAT_INC(mrt6s_upcalls);
 
 	if (socket_send(V_ip6_mrouter, mm, &sin6) < 0) {
 		MRT6_DLOG(DEBUG_ANY, "ip6_mrouter socket queue full");
 		MRT6STAT_INC(mrt6s_upq_sockfull);
 		return (ENOBUFS);
 	}
 	return (0);
 }
 
 /*
  * pim6_encapcheck() is called by the encap6_input() path at runtime to
  * determine if a packet is for PIM; allowing PIM to be dynamically loaded
  * into the kernel.
  */
 static int
 pim6_encapcheck(const struct mbuf *m, int off, int proto, void *arg)
 {
 
 #ifdef DIAGNOSTIC
     KASSERT(proto == IPPROTO_PIM, ("not for IPPROTO_PIM"));
 #endif
     if (proto != IPPROTO_PIM)
 	return 0;	/* not for us; reject the datagram. */
 
     return 64;		/* claim the datagram. */
 }
 
 /*
  * PIM sparse mode hook
  * Receives the pim control messages, and passes them up to the listening
  * socket, using rip6_input.
  * The only message processed is the REGISTER pim message; the pim header
  * is stripped off, and the inner packet is passed to register_mforward.
  */
 int
 pim6_input(struct mbuf **mp, int *offp, int proto)
 {
 	struct pim *pim; /* pointer to a pim struct */
 	struct ip6_hdr *ip6;
 	int pimlen;
 	struct mbuf *m = *mp;
 	int minlen;
 	int off = *offp;
 
 	PIM6STAT_INC(pim6s_rcv_total);
 
 	ip6 = mtod(m, struct ip6_hdr *);
 	pimlen = m->m_pkthdr.len - *offp;
 
 	/*
 	 * Validate lengths
 	 */
 	if (pimlen < PIM_MINLEN) {
 		PIM6STAT_INC(pim6s_rcv_tooshort);
 		MRT6_DLOG(DEBUG_PIM, "PIM packet too short");
 		m_freem(m);
 		return (IPPROTO_DONE);
 	}
 
 	/*
 	 * if the packet is at least as big as a REGISTER, go ahead
 	 * and grab the PIM REGISTER header size, to avoid another
 	 * possible m_pullup() later.
 	 *
 	 * PIM_MINLEN       == pimhdr + u_int32 == 8
 	 * PIM6_REG_MINLEN   == pimhdr + reghdr + eip6hdr == 4 + 4 + 40
 	 */
 	minlen = (pimlen >= PIM6_REG_MINLEN) ? PIM6_REG_MINLEN : PIM_MINLEN;
 
 	/*
 	 * Make sure that the IP6 and PIM headers in contiguous memory, and
 	 * possibly the PIM REGISTER header
 	 */
 #ifndef PULLDOWN_TEST
 	IP6_EXTHDR_CHECK(m, off, minlen, IPPROTO_DONE);
 	/* adjust pointer */
 	ip6 = mtod(m, struct ip6_hdr *);
 
 	/* adjust mbuf to point to the PIM header */
 	pim = (struct pim *)((caddr_t)ip6 + off);
 #else
 	IP6_EXTHDR_GET(pim, struct pim *, m, off, minlen);
 	if (pim == NULL) {
 		PIM6STAT_INC(pim6s_rcv_tooshort);
 		return (IPPROTO_DONE);
 	}
 #endif
 
 #define PIM6_CHECKSUM
 #ifdef PIM6_CHECKSUM
 	{
 		int cksumlen;
 
 		/*
 		 * Validate checksum.
 		 * If PIM REGISTER, exclude the data packet
 		 */
 		if (pim->pim_type == PIM_REGISTER)
 			cksumlen = PIM_MINLEN;
 		else
 			cksumlen = pimlen;
 
 		if (in6_cksum(m, IPPROTO_PIM, off, cksumlen)) {
 			PIM6STAT_INC(pim6s_rcv_badsum);
 			MRT6_DLOG(DEBUG_PIM, "invalid checksum");
 			m_freem(m);
 			return (IPPROTO_DONE);
 		}
 	}
 #endif /* PIM_CHECKSUM */
 
 	/* PIM version check */
 	if (pim->pim_ver != PIM_VERSION) {
 		PIM6STAT_INC(pim6s_rcv_badversion);
 		MRT6_DLOG(DEBUG_ANY | DEBUG_ERR,
 		    "incorrect version %d, expecting %d",
 		    pim->pim_ver, PIM_VERSION);
 		m_freem(m);
 		return (IPPROTO_DONE);
 	}
 
 	if (pim->pim_type == PIM_REGISTER) {
 		/*
 		 * since this is a REGISTER, we'll make a copy of the register
 		 * headers ip6+pim+u_int32_t+encap_ip6, to be passed up to the
 		 * routing daemon.
 		 */
 		static struct sockaddr_in6 dst = { sizeof(dst), AF_INET6 };
 
 		struct mbuf *mcp;
 		struct ip6_hdr *eip6;
 		u_int32_t *reghdr;
 		int rc;
 #ifdef MRT6DEBUG
 		char ip6bufs[INET6_ADDRSTRLEN], ip6bufd[INET6_ADDRSTRLEN];
 #endif
 
 		PIM6STAT_INC(pim6s_rcv_registers);
 
 		if ((reg_mif_num >= nummifs) || (reg_mif_num == (mifi_t) -1)) {
 			MRT6_DLOG(DEBUG_PIM, "register mif not set: %d",
 			    reg_mif_num);
 			m_freem(m);
 			return (IPPROTO_DONE);
 		}
 
 		reghdr = (u_int32_t *)(pim + 1);
 
 		if ((ntohl(*reghdr) & PIM_NULL_REGISTER))
 			goto pim6_input_to_daemon;
 
 		/*
 		 * Validate length
 		 */
 		if (pimlen < PIM6_REG_MINLEN) {
 			PIM6STAT_INC(pim6s_rcv_tooshort);
 			PIM6STAT_INC(pim6s_rcv_badregisters);
 			MRT6_DLOG(DEBUG_ANY | DEBUG_ERR, "register packet "
 			    "size too small %d from %s",
 			    pimlen, ip6_sprintf(ip6bufs, &ip6->ip6_src));
 			m_freem(m);
 			return (IPPROTO_DONE);
 		}
 
 		eip6 = (struct ip6_hdr *) (reghdr + 1);
 		MRT6_DLOG(DEBUG_PIM, "eip6: %s -> %s, eip6 plen %d",
 		    ip6_sprintf(ip6bufs, &eip6->ip6_src),
 		    ip6_sprintf(ip6bufd, &eip6->ip6_dst),
 		    ntohs(eip6->ip6_plen));
 
 		/* verify the version number of the inner packet */
 		if ((eip6->ip6_vfc & IPV6_VERSION_MASK) != IPV6_VERSION) {
 			PIM6STAT_INC(pim6s_rcv_badregisters);
 			MRT6_DLOG(DEBUG_ANY, "invalid IP version (%d) "
 			    "of the inner packet",
 			    (eip6->ip6_vfc & IPV6_VERSION));
 			m_freem(m);
 			return (IPPROTO_NONE);
 		}
 
 		/* verify the inner packet is destined to a mcast group */
 		if (!IN6_IS_ADDR_MULTICAST(&eip6->ip6_dst)) {
 			PIM6STAT_INC(pim6s_rcv_badregisters);
 			MRT6_DLOG(DEBUG_PIM, "inner packet of register "
 			    "is not multicast %s",
 			    ip6_sprintf(ip6bufd, &eip6->ip6_dst));
 			m_freem(m);
 			return (IPPROTO_DONE);
 		}
 
 		/*
 		 * make a copy of the whole header to pass to the daemon later.
 		 */
-		mcp = m_copy(m, 0, off + PIM6_REG_MINLEN);
+		mcp = m_copym(m, 0, off + PIM6_REG_MINLEN, M_NOWAIT);
 		if (mcp == NULL) {
 			MRT6_DLOG(DEBUG_ANY | DEBUG_ERR, "pim register: "
 			    "could not copy register head");
 			m_freem(m);
 			return (IPPROTO_DONE);
 		}
 
 		/*
 		 * forward the inner ip6 packet; point m_data at the inner ip6.
 		 */
 		m_adj(m, off + PIM_MINLEN);
 		MRT6_DLOG(DEBUG_PIM, "forwarding decapsulated register: "
 		    "src %s, dst %s, mif %d",
 		    ip6_sprintf(ip6bufs, &eip6->ip6_src),
 		    ip6_sprintf(ip6bufd, &eip6->ip6_dst), reg_mif_num);
 
 		rc = if_simloop(mif6table[reg_mif_num].m6_ifp, m,
 				dst.sin6_family, 0);
 
 		/* prepare the register head to send to the mrouting daemon */
 		m = mcp;
 	}
 
 	/*
 	 * Pass the PIM message up to the daemon; if it is a register message
 	 * pass the 'head' only up to the daemon. This includes the
 	 * encapsulator ip6 header, pim header, register header and the
 	 * encapsulated ip6 header.
 	 */
   pim6_input_to_daemon:
 	rip6_input(&m, offp, proto);
 	return (IPPROTO_DONE);
 }
 
 static int
 ip6_mroute_modevent(module_t mod, int type, void *unused)
 {
 
 	switch (type) {
 	case MOD_LOAD:
 		MROUTER6_LOCK_INIT();
 		MFC6_LOCK_INIT();
 		MIF6_LOCK_INIT();
 
 		pim6_encap_cookie = encap_attach_func(AF_INET6, IPPROTO_PIM,
 			pim6_encapcheck,
 			(const struct protosw *)&in6_pim_protosw, NULL);
 		if (pim6_encap_cookie == NULL) {
 			printf("ip6_mroute: unable to attach pim6 encap\n");
 			MIF6_LOCK_DESTROY();
 			MFC6_LOCK_DESTROY();
 			MROUTER6_LOCK_DESTROY();
 			return (EINVAL);
 		}
 
 		ip6_mforward = X_ip6_mforward;
 		ip6_mrouter_done = X_ip6_mrouter_done;
 		ip6_mrouter_get = X_ip6_mrouter_get;
 		ip6_mrouter_set = X_ip6_mrouter_set;
 		mrt6_ioctl = X_mrt6_ioctl;
 		break;
 
 	case MOD_UNLOAD:
 		if (V_ip6_mrouter != NULL)
 			return EINVAL;
 
 		if (pim6_encap_cookie) {
 			encap_detach(pim6_encap_cookie);
 			pim6_encap_cookie = NULL;
 		}
 		X_ip6_mrouter_done();
 		ip6_mforward = NULL;
 		ip6_mrouter_done = NULL;
 		ip6_mrouter_get = NULL;
 		ip6_mrouter_set = NULL;
 		mrt6_ioctl = NULL;
 
 		MIF6_LOCK_DESTROY();
 		MFC6_LOCK_DESTROY();
 		MROUTER6_LOCK_DESTROY();
 		break;
 
 	default:
 		return (EOPNOTSUPP);
 	}
 
 	return (0);
 }
 
 static moduledata_t ip6_mroutemod = {
 	"ip6_mroute",
 	ip6_mroute_modevent,
 	0
 };
 
 DECLARE_MODULE(ip6_mroute, ip6_mroutemod, SI_SUB_PROTO_MC, SI_ORDER_ANY);
Index: head/sys/netinet6/ip6_output.c
===================================================================
--- head/sys/netinet6/ip6_output.c	(revision 305823)
+++ head/sys/netinet6/ip6_output.c	(revision 305824)
@@ -1,3088 +1,3088 @@
 /*-
  * Copyright (C) 1995, 1996, 1997, and 1998 WIDE Project.
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 3. Neither the name of the project nor the names of its contributors
  *    may be used to endorse or promote products derived from this software
  *    without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE PROJECT AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE PROJECT OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  *	$KAME: ip6_output.c,v 1.279 2002/01/26 06:12:30 jinmei Exp $
  */
 
 /*-
  * Copyright (c) 1982, 1986, 1988, 1990, 1993
  *	The Regents of the University of California.  All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 4. Neither the name of the University nor the names of its contributors
  *    may be used to endorse or promote products derived from this software
  *    without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  *	@(#)ip_output.c	8.3 (Berkeley) 1/21/94
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include "opt_inet.h"
 #include "opt_inet6.h"
 #include "opt_ipsec.h"
 #include "opt_sctp.h"
 #include "opt_route.h"
 #include "opt_rss.h"
 
 #include <sys/param.h>
 #include <sys/kernel.h>
 #include <sys/malloc.h>
 #include <sys/mbuf.h>
 #include <sys/errno.h>
 #include <sys/priv.h>
 #include <sys/proc.h>
 #include <sys/protosw.h>
 #include <sys/socket.h>
 #include <sys/socketvar.h>
 #include <sys/syslog.h>
 #include <sys/ucred.h>
 
 #include <machine/in_cksum.h>
 
 #include <net/if.h>
 #include <net/if_var.h>
 #include <net/if_llatbl.h>
 #include <net/netisr.h>
 #include <net/route.h>
 #include <net/pfil.h>
 #include <net/rss_config.h>
 #include <net/vnet.h>
 
 #include <netinet/in.h>
 #include <netinet/in_var.h>
 #include <netinet/ip_var.h>
 #include <netinet6/in6_fib.h>
 #include <netinet6/in6_var.h>
 #include <netinet/ip6.h>
 #include <netinet/icmp6.h>
 #include <netinet6/ip6_var.h>
 #include <netinet/in_pcb.h>
 #include <netinet/tcp_var.h>
 #include <netinet6/nd6.h>
 #include <netinet6/in6_rss.h>
 
 #ifdef IPSEC
 #include <netipsec/ipsec.h>
 #include <netipsec/ipsec6.h>
 #include <netipsec/key.h>
 #include <netinet6/ip6_ipsec.h>
 #endif /* IPSEC */
 #ifdef SCTP
 #include <netinet/sctp.h>
 #include <netinet/sctp_crc32.h>
 #endif
 
 #include <netinet6/ip6protosw.h>
 #include <netinet6/scope6_var.h>
 
 #ifdef FLOWTABLE
 #include <net/flowtable.h>
 #endif
 
 extern int in6_mcast_loop;
 
 struct ip6_exthdrs {
 	struct mbuf *ip6e_ip6;
 	struct mbuf *ip6e_hbh;
 	struct mbuf *ip6e_dest1;
 	struct mbuf *ip6e_rthdr;
 	struct mbuf *ip6e_dest2;
 };
 
 static MALLOC_DEFINE(M_IP6OPT, "ip6opt", "IPv6 options");
 
 static int ip6_pcbopt(int, u_char *, int, struct ip6_pktopts **,
 			   struct ucred *, int);
 static int ip6_pcbopts(struct ip6_pktopts **, struct mbuf *,
 	struct socket *, struct sockopt *);
 static int ip6_getpcbopt(struct ip6_pktopts *, int, struct sockopt *);
 static int ip6_setpktopt(int, u_char *, int, struct ip6_pktopts *,
 	struct ucred *, int, int, int);
 
 static int ip6_copyexthdr(struct mbuf **, caddr_t, int);
 static int ip6_insertfraghdr(struct mbuf *, struct mbuf *, int,
 	struct ip6_frag **);
 static int ip6_insert_jumboopt(struct ip6_exthdrs *, u_int32_t);
 static int ip6_splithdr(struct mbuf *, struct ip6_exthdrs *);
 static int ip6_getpmtu(struct route_in6 *, int,
 	struct ifnet *, const struct in6_addr *, u_long *, int *, u_int,
 	u_int);
 static int ip6_calcmtu(struct ifnet *, const struct in6_addr *, u_long,
 	u_long *, int *, u_int);
 static int ip6_getpmtu_ctl(u_int, const struct in6_addr *, u_long *);
 static int copypktopts(struct ip6_pktopts *, struct ip6_pktopts *, int);
 
 
 /*
  * Make an extension header from option data.  hp is the source, and
  * mp is the destination.
  */
 #define MAKE_EXTHDR(hp, mp)						\
     do {								\
 	if (hp) {							\
 		struct ip6_ext *eh = (struct ip6_ext *)(hp);		\
 		error = ip6_copyexthdr((mp), (caddr_t)(hp),		\
 		    ((eh)->ip6e_len + 1) << 3);				\
 		if (error)						\
 			goto freehdrs;					\
 	}								\
     } while (/*CONSTCOND*/ 0)
 
 /*
  * Form a chain of extension headers.
  * m is the extension header mbuf
  * mp is the previous mbuf in the chain
  * p is the next header
  * i is the type of option.
  */
 #define MAKE_CHAIN(m, mp, p, i)\
     do {\
 	if (m) {\
 		if (!hdrsplit) \
 			panic("assumption failed: hdr not split"); \
 		*mtod((m), u_char *) = *(p);\
 		*(p) = (i);\
 		p = mtod((m), u_char *);\
 		(m)->m_next = (mp)->m_next;\
 		(mp)->m_next = (m);\
 		(mp) = (m);\
 	}\
     } while (/*CONSTCOND*/ 0)
 
 void
 in6_delayed_cksum(struct mbuf *m, uint32_t plen, u_short offset)
 {
 	u_short csum;
 
 	csum = in_cksum_skip(m, offset + plen, offset);
 	if (m->m_pkthdr.csum_flags & CSUM_UDP_IPV6 && csum == 0)
 		csum = 0xffff;
 	offset += m->m_pkthdr.csum_data;	/* checksum offset */
 
 	if (offset + sizeof(u_short) > m->m_len) {
 		printf("%s: delayed m_pullup, m->len: %d plen %u off %u "
 		    "csum_flags=%b\n", __func__, m->m_len, plen, offset,
 		    (int)m->m_pkthdr.csum_flags, CSUM_BITS);
 		/*
 		 * XXX this should not happen, but if it does, the correct
 		 * behavior may be to insert the checksum in the appropriate
 		 * next mbuf in the chain.
 		 */
 		return;
 	}
 	*(u_short *)(m->m_data + offset) = csum;
 }
 
 int
 ip6_fragment(struct ifnet *ifp, struct mbuf *m0, int hlen, u_char nextproto,
     int mtu, uint32_t id)
 {
 	struct mbuf *m, **mnext, *m_frgpart;
 	struct ip6_hdr *ip6, *mhip6;
 	struct ip6_frag *ip6f;
 	int off;
 	int error;
 	int tlen = m0->m_pkthdr.len;
 
 	m = m0;
 	ip6 = mtod(m, struct ip6_hdr *);
 	mnext = &m->m_nextpkt;
 
 	for (off = hlen; off < tlen; off += mtu) {
 		m = m_gethdr(M_NOWAIT, MT_DATA);
 		if (!m) {
 			IP6STAT_INC(ip6s_odropped);
 			return (ENOBUFS);
 		}
 		m->m_flags = m0->m_flags & M_COPYFLAGS;
 		*mnext = m;
 		mnext = &m->m_nextpkt;
 		m->m_data += max_linkhdr;
 		mhip6 = mtod(m, struct ip6_hdr *);
 		*mhip6 = *ip6;
 		m->m_len = sizeof(*mhip6);
 		error = ip6_insertfraghdr(m0, m, hlen, &ip6f);
 		if (error) {
 			IP6STAT_INC(ip6s_odropped);
 			return (error);
 		}
 		ip6f->ip6f_offlg = htons((u_short)((off - hlen) & ~7));
 		if (off + mtu >= tlen)
 			mtu = tlen - off;
 		else
 			ip6f->ip6f_offlg |= IP6F_MORE_FRAG;
 		mhip6->ip6_plen = htons((u_short)(mtu + hlen +
 		    sizeof(*ip6f) - sizeof(struct ip6_hdr)));
-		if ((m_frgpart = m_copy(m0, off, mtu)) == NULL) {
+		if ((m_frgpart = m_copym(m0, off, mtu, M_NOWAIT)) == NULL) {
 			IP6STAT_INC(ip6s_odropped);
 			return (ENOBUFS);
 		}
 		m_cat(m, m_frgpart);
 		m->m_pkthdr.len = mtu + hlen + sizeof(*ip6f);
 		m->m_pkthdr.fibnum = m0->m_pkthdr.fibnum;
 		m->m_pkthdr.rcvif = NULL;
 		ip6f->ip6f_reserved = 0;
 		ip6f->ip6f_ident = id;
 		ip6f->ip6f_nxt = nextproto;
 		IP6STAT_INC(ip6s_ofragments);
 		in6_ifstat_inc(ifp, ifs6_out_fragcreat);
 	}
 
 	return (0);
 }
 
 /*
  * IP6 output. The packet in mbuf chain m contains a skeletal IP6
  * header (with pri, len, nxt, hlim, src, dst).
  * This function may modify ver and hlim only.
  * The mbuf chain containing the packet will be freed.
  * The mbuf opt, if present, will not be freed.
  * If route_in6 ro is present and has ro_rt initialized, route lookup would be
  * skipped and ro->ro_rt would be used. If ro is present but ro->ro_rt is NULL,
  * then result of route lookup is stored in ro->ro_rt.
  *
  * type of "mtu": rt_mtu is u_long, ifnet.ifr_mtu is int, and
  * nd_ifinfo.linkmtu is u_int32_t.  so we use u_long to hold largest one,
  * which is rt_mtu.
  *
  * ifpp - XXX: just for statistics
  */
 /*
  * XXX TODO: no flowid is assigned for outbound flows?
  */
 int
 ip6_output(struct mbuf *m0, struct ip6_pktopts *opt,
     struct route_in6 *ro, int flags, struct ip6_moptions *im6o,
     struct ifnet **ifpp, struct inpcb *inp)
 {
 	struct ip6_hdr *ip6;
 	struct ifnet *ifp, *origifp;
 	struct mbuf *m = m0;
 	struct mbuf *mprev = NULL;
 	int hlen, tlen, len;
 	struct route_in6 ip6route;
 	struct rtentry *rt = NULL;
 	struct sockaddr_in6 *dst, src_sa, dst_sa;
 	struct in6_addr odst;
 	int error = 0;
 	struct in6_ifaddr *ia = NULL;
 	u_long mtu;
 	int alwaysfrag, dontfrag;
 	u_int32_t optlen = 0, plen = 0, unfragpartlen = 0;
 	struct ip6_exthdrs exthdrs;
 	struct in6_addr src0, dst0;
 	u_int32_t zone;
 	struct route_in6 *ro_pmtu = NULL;
 	int hdrsplit = 0;
 	int sw_csum, tso;
 	int needfiblookup;
 	uint32_t fibnum;
 	struct m_tag *fwd_tag = NULL;
 	uint32_t id;
 
 	if (inp != NULL) {
 		M_SETFIB(m, inp->inp_inc.inc_fibnum);
 		if ((flags & IP_NODEFAULTFLOWID) == 0) {
 			/* unconditionally set flowid */
 			m->m_pkthdr.flowid = inp->inp_flowid;
 			M_HASHTYPE_SET(m, inp->inp_flowtype);
 		}
 	}
 
 	bzero(&exthdrs, sizeof(exthdrs));
 	if (opt) {
 		/* Hop-by-Hop options header */
 		MAKE_EXTHDR(opt->ip6po_hbh, &exthdrs.ip6e_hbh);
 		/* Destination options header(1st part) */
 		if (opt->ip6po_rthdr) {
 			/*
 			 * Destination options header(1st part)
 			 * This only makes sense with a routing header.
 			 * See Section 9.2 of RFC 3542.
 			 * Disabling this part just for MIP6 convenience is
 			 * a bad idea.  We need to think carefully about a
 			 * way to make the advanced API coexist with MIP6
 			 * options, which might automatically be inserted in
 			 * the kernel.
 			 */
 			MAKE_EXTHDR(opt->ip6po_dest1, &exthdrs.ip6e_dest1);
 		}
 		/* Routing header */
 		MAKE_EXTHDR(opt->ip6po_rthdr, &exthdrs.ip6e_rthdr);
 		/* Destination options header(2nd part) */
 		MAKE_EXTHDR(opt->ip6po_dest2, &exthdrs.ip6e_dest2);
 	}
 
 #ifdef IPSEC
 	/*
 	 * IPSec checking which handles several cases.
 	 * FAST IPSEC: We re-injected the packet.
 	 * XXX: need scope argument.
 	 */
 	switch(ip6_ipsec_output(&m, inp, &error))
 	{
 	case 1:                 /* Bad packet */
 		goto freehdrs;
 	case -1:                /* IPSec done */
 		goto done;
 	case 0:                 /* No IPSec */
 	default:
 		break;
 	}
 #endif /* IPSEC */
 
 	/*
 	 * Calculate the total length of the extension header chain.
 	 * Keep the length of the unfragmentable part for fragmentation.
 	 */
 	optlen = 0;
 	if (exthdrs.ip6e_hbh)
 		optlen += exthdrs.ip6e_hbh->m_len;
 	if (exthdrs.ip6e_dest1)
 		optlen += exthdrs.ip6e_dest1->m_len;
 	if (exthdrs.ip6e_rthdr)
 		optlen += exthdrs.ip6e_rthdr->m_len;
 	unfragpartlen = optlen + sizeof(struct ip6_hdr);
 
 	/* NOTE: we don't add AH/ESP length here (done in ip6_ipsec_output) */
 	if (exthdrs.ip6e_dest2)
 		optlen += exthdrs.ip6e_dest2->m_len;
 
 	/*
 	 * If there is at least one extension header,
 	 * separate IP6 header from the payload.
 	 */
 	if (optlen && !hdrsplit) {
 		if ((error = ip6_splithdr(m, &exthdrs)) != 0) {
 			m = NULL;
 			goto freehdrs;
 		}
 		m = exthdrs.ip6e_ip6;
 		hdrsplit++;
 	}
 
 	ip6 = mtod(m, struct ip6_hdr *);
 
 	/* adjust mbuf packet header length */
 	m->m_pkthdr.len += optlen;
 	plen = m->m_pkthdr.len - sizeof(*ip6);
 
 	/* If this is a jumbo payload, insert a jumbo payload option. */
 	if (plen > IPV6_MAXPACKET) {
 		if (!hdrsplit) {
 			if ((error = ip6_splithdr(m, &exthdrs)) != 0) {
 				m = NULL;
 				goto freehdrs;
 			}
 			m = exthdrs.ip6e_ip6;
 			hdrsplit++;
 		}
 		/* adjust pointer */
 		ip6 = mtod(m, struct ip6_hdr *);
 		if ((error = ip6_insert_jumboopt(&exthdrs, plen)) != 0)
 			goto freehdrs;
 		ip6->ip6_plen = 0;
 	} else
 		ip6->ip6_plen = htons(plen);
 
 	/*
 	 * Concatenate headers and fill in next header fields.
 	 * Here we have, on "m"
 	 *	IPv6 payload
 	 * and we insert headers accordingly.  Finally, we should be getting:
 	 *	IPv6 hbh dest1 rthdr ah* [esp* dest2 payload]
 	 *
 	 * during the header composing process, "m" points to IPv6 header.
 	 * "mprev" points to an extension header prior to esp.
 	 */
 	u_char *nexthdrp = &ip6->ip6_nxt;
 	mprev = m;
 
 	/*
 	 * we treat dest2 specially.  this makes IPsec processing
 	 * much easier.  the goal here is to make mprev point the
 	 * mbuf prior to dest2.
 	 *
 	 * result: IPv6 dest2 payload
 	 * m and mprev will point to IPv6 header.
 	 */
 	if (exthdrs.ip6e_dest2) {
 		if (!hdrsplit)
 			panic("assumption failed: hdr not split");
 		exthdrs.ip6e_dest2->m_next = m->m_next;
 		m->m_next = exthdrs.ip6e_dest2;
 		*mtod(exthdrs.ip6e_dest2, u_char *) = ip6->ip6_nxt;
 		ip6->ip6_nxt = IPPROTO_DSTOPTS;
 	}
 
 	/*
 	 * result: IPv6 hbh dest1 rthdr dest2 payload
 	 * m will point to IPv6 header.  mprev will point to the
 	 * extension header prior to dest2 (rthdr in the above case).
 	 */
 	MAKE_CHAIN(exthdrs.ip6e_hbh, mprev, nexthdrp, IPPROTO_HOPOPTS);
 	MAKE_CHAIN(exthdrs.ip6e_dest1, mprev, nexthdrp,
 		   IPPROTO_DSTOPTS);
 	MAKE_CHAIN(exthdrs.ip6e_rthdr, mprev, nexthdrp,
 		   IPPROTO_ROUTING);
 
 	/*
 	 * If there is a routing header, discard the packet.
 	 */
 	if (exthdrs.ip6e_rthdr) {
 		 error = EINVAL;
 		 goto bad;
 	}
 
 	/* Source address validation */
 	if (IN6_IS_ADDR_UNSPECIFIED(&ip6->ip6_src) &&
 	    (flags & IPV6_UNSPECSRC) == 0) {
 		error = EOPNOTSUPP;
 		IP6STAT_INC(ip6s_badscope);
 		goto bad;
 	}
 	if (IN6_IS_ADDR_MULTICAST(&ip6->ip6_src)) {
 		error = EOPNOTSUPP;
 		IP6STAT_INC(ip6s_badscope);
 		goto bad;
 	}
 
 	IP6STAT_INC(ip6s_localout);
 
 	/*
 	 * Route packet.
 	 */
 	if (ro == NULL) {
 		ro = &ip6route;
 		bzero((caddr_t)ro, sizeof(*ro));
 	} else
 		ro->ro_flags |= RT_LLE_CACHE;
 	ro_pmtu = ro;
 	if (opt && opt->ip6po_rthdr)
 		ro = &opt->ip6po_route;
 	dst = (struct sockaddr_in6 *)&ro->ro_dst;
 #ifdef FLOWTABLE
 	if (ro->ro_rt == NULL)
 		(void )flowtable_lookup(AF_INET6, m, (struct route *)ro);
 #endif
 	fibnum = (inp != NULL) ? inp->inp_inc.inc_fibnum : M_GETFIB(m);
 again:
 	/*
 	 * if specified, try to fill in the traffic class field.
 	 * do not override if a non-zero value is already set.
 	 * we check the diffserv field and the ecn field separately.
 	 */
 	if (opt && opt->ip6po_tclass >= 0) {
 		int mask = 0;
 
 		if ((ip6->ip6_flow & htonl(0xfc << 20)) == 0)
 			mask |= 0xfc;
 		if ((ip6->ip6_flow & htonl(0x03 << 20)) == 0)
 			mask |= 0x03;
 		if (mask != 0)
 			ip6->ip6_flow |= htonl((opt->ip6po_tclass & mask) << 20);
 	}
 
 	/* fill in or override the hop limit field, if necessary. */
 	if (opt && opt->ip6po_hlim != -1)
 		ip6->ip6_hlim = opt->ip6po_hlim & 0xff;
 	else if (IN6_IS_ADDR_MULTICAST(&ip6->ip6_dst)) {
 		if (im6o != NULL)
 			ip6->ip6_hlim = im6o->im6o_multicast_hlim;
 		else
 			ip6->ip6_hlim = V_ip6_defmcasthlim;
 	}
 	/*
 	 * Validate route against routing table additions;
 	 * a better/more specific route might have been added.
 	 * Make sure address family is set in route.
 	 */
 	if (inp) {
 		ro->ro_dst.sin6_family = AF_INET6;
 		RT_VALIDATE((struct route *)ro, &inp->inp_rt_cookie, fibnum);
 	}
 	if (ro->ro_rt && fwd_tag == NULL && (ro->ro_rt->rt_flags & RTF_UP) &&
 	    ro->ro_dst.sin6_family == AF_INET6 &&
 	    IN6_ARE_ADDR_EQUAL(&ro->ro_dst.sin6_addr, &ip6->ip6_dst)) {
 		rt = ro->ro_rt;
 		ifp = ro->ro_rt->rt_ifp;
 	} else {
 		if (ro->ro_lle)
 			LLE_FREE(ro->ro_lle);	/* zeros ro_lle */
 		ro->ro_lle = NULL;
 		if (fwd_tag == NULL) {
 			bzero(&dst_sa, sizeof(dst_sa));
 			dst_sa.sin6_family = AF_INET6;
 			dst_sa.sin6_len = sizeof(dst_sa);
 			dst_sa.sin6_addr = ip6->ip6_dst;
 		}
 		error = in6_selectroute_fib(&dst_sa, opt, im6o, ro, &ifp,
 		    &rt, fibnum);
 		if (error != 0) {
 			if (ifp != NULL)
 				in6_ifstat_inc(ifp, ifs6_out_discard);
 			goto bad;
 		}
 	}
 	if (rt == NULL) {
 		/*
 		 * If in6_selectroute() does not return a route entry,
 		 * dst may not have been updated.
 		 */
 		*dst = dst_sa;	/* XXX */
 	}
 
 	/*
 	 * then rt (for unicast) and ifp must be non-NULL valid values.
 	 */
 	if ((flags & IPV6_FORWARDING) == 0) {
 		/* XXX: the FORWARDING flag can be set for mrouting. */
 		in6_ifstat_inc(ifp, ifs6_out_request);
 	}
 	if (rt != NULL) {
 		ia = (struct in6_ifaddr *)(rt->rt_ifa);
 		counter_u64_add(rt->rt_pksent, 1);
 	}
 
 
 	/*
 	 * The outgoing interface must be in the zone of source and
 	 * destination addresses.
 	 */
 	origifp = ifp;
 
 	src0 = ip6->ip6_src;
 	if (in6_setscope(&src0, origifp, &zone))
 		goto badscope;
 	bzero(&src_sa, sizeof(src_sa));
 	src_sa.sin6_family = AF_INET6;
 	src_sa.sin6_len = sizeof(src_sa);
 	src_sa.sin6_addr = ip6->ip6_src;
 	if (sa6_recoverscope(&src_sa) || zone != src_sa.sin6_scope_id)
 		goto badscope;
 
 	dst0 = ip6->ip6_dst;
 	if (in6_setscope(&dst0, origifp, &zone))
 		goto badscope;
 	/* re-initialize to be sure */
 	bzero(&dst_sa, sizeof(dst_sa));
 	dst_sa.sin6_family = AF_INET6;
 	dst_sa.sin6_len = sizeof(dst_sa);
 	dst_sa.sin6_addr = ip6->ip6_dst;
 	if (sa6_recoverscope(&dst_sa) || zone != dst_sa.sin6_scope_id) {
 		goto badscope;
 	}
 
 	/* We should use ia_ifp to support the case of
 	 * sending packets to an address of our own.
 	 */
 	if (ia != NULL && ia->ia_ifp)
 		ifp = ia->ia_ifp;
 
 	/* scope check is done. */
 	goto routefound;
 
   badscope:
 	IP6STAT_INC(ip6s_badscope);
 	in6_ifstat_inc(origifp, ifs6_out_discard);
 	if (error == 0)
 		error = EHOSTUNREACH; /* XXX */
 	goto bad;
 
   routefound:
 	if (rt && !IN6_IS_ADDR_MULTICAST(&ip6->ip6_dst)) {
 		if (opt && opt->ip6po_nextroute.ro_rt) {
 			/*
 			 * The nexthop is explicitly specified by the
 			 * application.  We assume the next hop is an IPv6
 			 * address.
 			 */
 			dst = (struct sockaddr_in6 *)opt->ip6po_nexthop;
 		}
 		else if ((rt->rt_flags & RTF_GATEWAY))
 			dst = (struct sockaddr_in6 *)rt->rt_gateway;
 	}
 
 	if (!IN6_IS_ADDR_MULTICAST(&ip6->ip6_dst)) {
 		m->m_flags &= ~(M_BCAST | M_MCAST); /* just in case */
 	} else {
 		m->m_flags = (m->m_flags & ~M_BCAST) | M_MCAST;
 		in6_ifstat_inc(ifp, ifs6_out_mcast);
 		/*
 		 * Confirm that the outgoing interface supports multicast.
 		 */
 		if (!(ifp->if_flags & IFF_MULTICAST)) {
 			IP6STAT_INC(ip6s_noroute);
 			in6_ifstat_inc(ifp, ifs6_out_discard);
 			error = ENETUNREACH;
 			goto bad;
 		}
 		if ((im6o == NULL && in6_mcast_loop) ||
 		    (im6o && im6o->im6o_multicast_loop)) {
 			/*
 			 * Loop back multicast datagram if not expressly
 			 * forbidden to do so, even if we have not joined
 			 * the address; protocols will filter it later,
 			 * thus deferring a hash lookup and lock acquisition
 			 * at the expense of an m_copym().
 			 */
 			ip6_mloopback(ifp, m);
 		} else {
 			/*
 			 * If we are acting as a multicast router, perform
 			 * multicast forwarding as if the packet had just
 			 * arrived on the interface to which we are about
 			 * to send.  The multicast forwarding function
 			 * recursively calls this function, using the
 			 * IPV6_FORWARDING flag to prevent infinite recursion.
 			 *
 			 * Multicasts that are looped back by ip6_mloopback(),
 			 * above, will be forwarded by the ip6_input() routine,
 			 * if necessary.
 			 */
 			if (V_ip6_mrouter && (flags & IPV6_FORWARDING) == 0) {
 				/*
 				 * XXX: ip6_mforward expects that rcvif is NULL
 				 * when it is called from the originating path.
 				 * However, it may not always be the case.
 				 */
 				m->m_pkthdr.rcvif = NULL;
 				if (ip6_mforward(ip6, ifp, m) != 0) {
 					m_freem(m);
 					goto done;
 				}
 			}
 		}
 		/*
 		 * Multicasts with a hoplimit of zero may be looped back,
 		 * above, but must not be transmitted on a network.
 		 * Also, multicasts addressed to the loopback interface
 		 * are not sent -- the above call to ip6_mloopback() will
 		 * loop back a copy if this host actually belongs to the
 		 * destination group on the loopback interface.
 		 */
 		if (ip6->ip6_hlim == 0 || (ifp->if_flags & IFF_LOOPBACK) ||
 		    IN6_IS_ADDR_MC_INTFACELOCAL(&ip6->ip6_dst)) {
 			m_freem(m);
 			goto done;
 		}
 	}
 
 	/*
 	 * Fill the outgoing inteface to tell the upper layer
 	 * to increment per-interface statistics.
 	 */
 	if (ifpp)
 		*ifpp = ifp;
 
 	/* Determine path MTU. */
 	if ((error = ip6_getpmtu(ro_pmtu, ro != ro_pmtu, ifp, &ip6->ip6_dst,
 		    &mtu, &alwaysfrag, fibnum, *nexthdrp)) != 0)
 		goto bad;
 
 	/*
 	 * The caller of this function may specify to use the minimum MTU
 	 * in some cases.
 	 * An advanced API option (IPV6_USE_MIN_MTU) can also override MTU
 	 * setting.  The logic is a bit complicated; by default, unicast
 	 * packets will follow path MTU while multicast packets will be sent at
 	 * the minimum MTU.  If IP6PO_MINMTU_ALL is specified, all packets
 	 * including unicast ones will be sent at the minimum MTU.  Multicast
 	 * packets will always be sent at the minimum MTU unless
 	 * IP6PO_MINMTU_DISABLE is explicitly specified.
 	 * See RFC 3542 for more details.
 	 */
 	if (mtu > IPV6_MMTU) {
 		if ((flags & IPV6_MINMTU))
 			mtu = IPV6_MMTU;
 		else if (opt && opt->ip6po_minmtu == IP6PO_MINMTU_ALL)
 			mtu = IPV6_MMTU;
 		else if (IN6_IS_ADDR_MULTICAST(&ip6->ip6_dst) &&
 			 (opt == NULL ||
 			  opt->ip6po_minmtu != IP6PO_MINMTU_DISABLE)) {
 			mtu = IPV6_MMTU;
 		}
 	}
 
 	/*
 	 * clear embedded scope identifiers if necessary.
 	 * in6_clearscope will touch the addresses only when necessary.
 	 */
 	in6_clearscope(&ip6->ip6_src);
 	in6_clearscope(&ip6->ip6_dst);
 
 	/*
 	 * If the outgoing packet contains a hop-by-hop options header,
 	 * it must be examined and processed even by the source node.
 	 * (RFC 2460, section 4.)
 	 */
 	if (exthdrs.ip6e_hbh) {
 		struct ip6_hbh *hbh = mtod(exthdrs.ip6e_hbh, struct ip6_hbh *);
 		u_int32_t dummy; /* XXX unused */
 		u_int32_t plen = 0; /* XXX: ip6_process will check the value */
 
 #ifdef DIAGNOSTIC
 		if ((hbh->ip6h_len + 1) << 3 > exthdrs.ip6e_hbh->m_len)
 			panic("ip6e_hbh is not contiguous");
 #endif
 		/*
 		 *  XXX: if we have to send an ICMPv6 error to the sender,
 		 *       we need the M_LOOP flag since icmp6_error() expects
 		 *       the IPv6 and the hop-by-hop options header are
 		 *       contiguous unless the flag is set.
 		 */
 		m->m_flags |= M_LOOP;
 		m->m_pkthdr.rcvif = ifp;
 		if (ip6_process_hopopts(m, (u_int8_t *)(hbh + 1),
 		    ((hbh->ip6h_len + 1) << 3) - sizeof(struct ip6_hbh),
 		    &dummy, &plen) < 0) {
 			/* m was already freed at this point */
 			error = EINVAL;/* better error? */
 			goto done;
 		}
 		m->m_flags &= ~M_LOOP; /* XXX */
 		m->m_pkthdr.rcvif = NULL;
 	}
 
 	/* Jump over all PFIL processing if hooks are not active. */
 	if (!PFIL_HOOKED(&V_inet6_pfil_hook))
 		goto passout;
 
 	odst = ip6->ip6_dst;
 	/* Run through list of hooks for output packets. */
 	error = pfil_run_hooks(&V_inet6_pfil_hook, &m, ifp, PFIL_OUT, inp);
 	if (error != 0 || m == NULL)
 		goto done;
 	/* adjust pointer */
 	ip6 = mtod(m, struct ip6_hdr *);
 
 	needfiblookup = 0;
 	/* See if destination IP address was changed by packet filter. */
 	if (!IN6_ARE_ADDR_EQUAL(&odst, &ip6->ip6_dst)) {
 		m->m_flags |= M_SKIP_FIREWALL;
 		/* If destination is now ourself drop to ip6_input(). */
 		if (in6_localip(&ip6->ip6_dst)) {
 			m->m_flags |= M_FASTFWD_OURS;
 			if (m->m_pkthdr.rcvif == NULL)
 				m->m_pkthdr.rcvif = V_loif;
 			if (m->m_pkthdr.csum_flags & CSUM_DELAY_DATA_IPV6) {
 				m->m_pkthdr.csum_flags |=
 				    CSUM_DATA_VALID_IPV6 | CSUM_PSEUDO_HDR;
 				m->m_pkthdr.csum_data = 0xffff;
 			}
 #ifdef SCTP
 			if (m->m_pkthdr.csum_flags & CSUM_SCTP_IPV6)
 				m->m_pkthdr.csum_flags |= CSUM_SCTP_VALID;
 #endif
 			error = netisr_queue(NETISR_IPV6, m);
 			goto done;
 		} else {
 			RO_RTFREE(ro);
 			needfiblookup = 1; /* Redo the routing table lookup. */
 			if (ro->ro_lle)
 				LLE_FREE(ro->ro_lle);	/* zeros ro_lle */
 			ro->ro_lle = NULL;
 		}
 	}
 	/* See if fib was changed by packet filter. */
 	if (fibnum != M_GETFIB(m)) {
 		m->m_flags |= M_SKIP_FIREWALL;
 		fibnum = M_GETFIB(m);
 		RO_RTFREE(ro);
 		needfiblookup = 1;
 		if (ro->ro_lle)
 			LLE_FREE(ro->ro_lle);	/* zeros ro_lle */
 		ro->ro_lle = NULL;
 	}
 	if (needfiblookup)
 		goto again;
 
 	/* See if local, if yes, send it to netisr. */
 	if (m->m_flags & M_FASTFWD_OURS) {
 		if (m->m_pkthdr.rcvif == NULL)
 			m->m_pkthdr.rcvif = V_loif;
 		if (m->m_pkthdr.csum_flags & CSUM_DELAY_DATA_IPV6) {
 			m->m_pkthdr.csum_flags |=
 			    CSUM_DATA_VALID_IPV6 | CSUM_PSEUDO_HDR;
 			m->m_pkthdr.csum_data = 0xffff;
 		}
 #ifdef SCTP
 		if (m->m_pkthdr.csum_flags & CSUM_SCTP_IPV6)
 			m->m_pkthdr.csum_flags |= CSUM_SCTP_VALID;
 #endif
 		error = netisr_queue(NETISR_IPV6, m);
 		goto done;
 	}
 	/* Or forward to some other address? */
 	if ((m->m_flags & M_IP6_NEXTHOP) &&
 	    (fwd_tag = m_tag_find(m, PACKET_TAG_IPFORWARD, NULL)) != NULL) {
 		dst = (struct sockaddr_in6 *)&ro->ro_dst;
 		bcopy((fwd_tag+1), &dst_sa, sizeof(struct sockaddr_in6));
 		m->m_flags |= M_SKIP_FIREWALL;
 		m->m_flags &= ~M_IP6_NEXTHOP;
 		m_tag_delete(m, fwd_tag);
 		goto again;
 	}
 
 passout:
 	/*
 	 * Send the packet to the outgoing interface.
 	 * If necessary, do IPv6 fragmentation before sending.
 	 *
 	 * the logic here is rather complex:
 	 * 1: normal case (dontfrag == 0, alwaysfrag == 0)
 	 * 1-a:	send as is if tlen <= path mtu
 	 * 1-b:	fragment if tlen > path mtu
 	 *
 	 * 2: if user asks us not to fragment (dontfrag == 1)
 	 * 2-a:	send as is if tlen <= interface mtu
 	 * 2-b:	error if tlen > interface mtu
 	 *
 	 * 3: if we always need to attach fragment header (alwaysfrag == 1)
 	 *	always fragment
 	 *
 	 * 4: if dontfrag == 1 && alwaysfrag == 1
 	 *	error, as we cannot handle this conflicting request
 	 */
 	sw_csum = m->m_pkthdr.csum_flags;
 	if (!hdrsplit) {
 		tso = ((sw_csum & ifp->if_hwassist & CSUM_TSO) != 0) ? 1 : 0;
 		sw_csum &= ~ifp->if_hwassist;
 	} else
 		tso = 0;
 	/*
 	 * If we added extension headers, we will not do TSO and calculate the
 	 * checksums ourselves for now.
 	 * XXX-BZ  Need a framework to know when the NIC can handle it, even
 	 * with ext. hdrs.
 	 */
 	if (sw_csum & CSUM_DELAY_DATA_IPV6) {
 		sw_csum &= ~CSUM_DELAY_DATA_IPV6;
 		in6_delayed_cksum(m, plen, sizeof(struct ip6_hdr));
 	}
 #ifdef SCTP
 	if (sw_csum & CSUM_SCTP_IPV6) {
 		sw_csum &= ~CSUM_SCTP_IPV6;
 		sctp_delayed_cksum(m, sizeof(struct ip6_hdr));
 	}
 #endif
 	m->m_pkthdr.csum_flags &= ifp->if_hwassist;
 	tlen = m->m_pkthdr.len;
 
 	if ((opt && (opt->ip6po_flags & IP6PO_DONTFRAG)) || tso)
 		dontfrag = 1;
 	else
 		dontfrag = 0;
 	if (dontfrag && alwaysfrag) {	/* case 4 */
 		/* conflicting request - can't transmit */
 		error = EMSGSIZE;
 		goto bad;
 	}
 	if (dontfrag && tlen > IN6_LINKMTU(ifp) && !tso) {	/* case 2-b */
 		/*
 		 * Even if the DONTFRAG option is specified, we cannot send the
 		 * packet when the data length is larger than the MTU of the
 		 * outgoing interface.
 		 * Notify the error by sending IPV6_PATHMTU ancillary data if
 		 * application wanted to know the MTU value. Also return an
 		 * error code (this is not described in the API spec).
 		 */
 		if (inp != NULL)
 			ip6_notify_pmtu(inp, &dst_sa, (u_int32_t)mtu);
 		error = EMSGSIZE;
 		goto bad;
 	}
 
 	/*
 	 * transmit packet without fragmentation
 	 */
 	if (dontfrag || (!alwaysfrag && tlen <= mtu)) {	/* case 1-a and 2-a */
 		struct in6_ifaddr *ia6;
 
 		ip6 = mtod(m, struct ip6_hdr *);
 		ia6 = in6_ifawithifp(ifp, &ip6->ip6_src);
 		if (ia6) {
 			/* Record statistics for this interface address. */
 			counter_u64_add(ia6->ia_ifa.ifa_opackets, 1);
 			counter_u64_add(ia6->ia_ifa.ifa_obytes,
 			    m->m_pkthdr.len);
 			ifa_free(&ia6->ia_ifa);
 		}
 		error = nd6_output_ifp(ifp, origifp, m, dst,
 		    (struct route *)ro);
 		goto done;
 	}
 
 	/*
 	 * try to fragment the packet.  case 1-b and 3
 	 */
 	if (mtu < IPV6_MMTU) {
 		/* path MTU cannot be less than IPV6_MMTU */
 		error = EMSGSIZE;
 		in6_ifstat_inc(ifp, ifs6_out_fragfail);
 		goto bad;
 	} else if (ip6->ip6_plen == 0) {
 		/* jumbo payload cannot be fragmented */
 		error = EMSGSIZE;
 		in6_ifstat_inc(ifp, ifs6_out_fragfail);
 		goto bad;
 	} else {
 		u_char nextproto;
 
 		/*
 		 * Too large for the destination or interface;
 		 * fragment if possible.
 		 * Must be able to put at least 8 bytes per fragment.
 		 */
 		hlen = unfragpartlen;
 		if (mtu > IPV6_MAXPACKET)
 			mtu = IPV6_MAXPACKET;
 
 		len = (mtu - hlen - sizeof(struct ip6_frag)) & ~7;
 		if (len < 8) {
 			error = EMSGSIZE;
 			in6_ifstat_inc(ifp, ifs6_out_fragfail);
 			goto bad;
 		}
 
 		/*
 		 * If the interface will not calculate checksums on
 		 * fragmented packets, then do it here.
 		 * XXX-BZ handle the hw offloading case.  Need flags.
 		 */
 		if (m->m_pkthdr.csum_flags & CSUM_DELAY_DATA_IPV6) {
 			in6_delayed_cksum(m, plen, hlen);
 			m->m_pkthdr.csum_flags &= ~CSUM_DELAY_DATA_IPV6;
 		}
 #ifdef SCTP
 		if (m->m_pkthdr.csum_flags & CSUM_SCTP_IPV6) {
 			sctp_delayed_cksum(m, hlen);
 			m->m_pkthdr.csum_flags &= ~CSUM_SCTP_IPV6;
 		}
 #endif
 		/*
 		 * Change the next header field of the last header in the
 		 * unfragmentable part.
 		 */
 		if (exthdrs.ip6e_rthdr) {
 			nextproto = *mtod(exthdrs.ip6e_rthdr, u_char *);
 			*mtod(exthdrs.ip6e_rthdr, u_char *) = IPPROTO_FRAGMENT;
 		} else if (exthdrs.ip6e_dest1) {
 			nextproto = *mtod(exthdrs.ip6e_dest1, u_char *);
 			*mtod(exthdrs.ip6e_dest1, u_char *) = IPPROTO_FRAGMENT;
 		} else if (exthdrs.ip6e_hbh) {
 			nextproto = *mtod(exthdrs.ip6e_hbh, u_char *);
 			*mtod(exthdrs.ip6e_hbh, u_char *) = IPPROTO_FRAGMENT;
 		} else {
 			nextproto = ip6->ip6_nxt;
 			ip6->ip6_nxt = IPPROTO_FRAGMENT;
 		}
 
 		/*
 		 * Loop through length of segment after first fragment,
 		 * make new header and copy data of each part and link onto
 		 * chain.
 		 */
 		m0 = m;
 		id = htonl(ip6_randomid());
 		if ((error = ip6_fragment(ifp, m, hlen, nextproto, len, id)))
 			goto sendorfree;
 
 		in6_ifstat_inc(ifp, ifs6_out_fragok);
 	}
 
 	/*
 	 * Remove leading garbages.
 	 */
 sendorfree:
 	m = m0->m_nextpkt;
 	m0->m_nextpkt = 0;
 	m_freem(m0);
 	for (m0 = m; m; m = m0) {
 		m0 = m->m_nextpkt;
 		m->m_nextpkt = 0;
 		if (error == 0) {
 			/* Record statistics for this interface address. */
 			if (ia) {
 				counter_u64_add(ia->ia_ifa.ifa_opackets, 1);
 				counter_u64_add(ia->ia_ifa.ifa_obytes,
 				    m->m_pkthdr.len);
 			}
 			error = nd6_output_ifp(ifp, origifp, m, dst,
 			    (struct route *)ro);
 		} else
 			m_freem(m);
 	}
 
 	if (error == 0)
 		IP6STAT_INC(ip6s_fragmented);
 
 done:
 	/*
 	 * Release the route if using our private route, or if
 	 * (with flowtable) we don't have our own reference.
 	 */
 	if (ro == &ip6route ||
 	    (ro != NULL && ro->ro_flags & RT_NORTREF))
 		RO_RTFREE(ro);
 	return (error);
 
 freehdrs:
 	m_freem(exthdrs.ip6e_hbh);	/* m_freem will check if mbuf is 0 */
 	m_freem(exthdrs.ip6e_dest1);
 	m_freem(exthdrs.ip6e_rthdr);
 	m_freem(exthdrs.ip6e_dest2);
 	/* FALLTHROUGH */
 bad:
 	if (m)
 		m_freem(m);
 	goto done;
 }
 
 static int
 ip6_copyexthdr(struct mbuf **mp, caddr_t hdr, int hlen)
 {
 	struct mbuf *m;
 
 	if (hlen > MCLBYTES)
 		return (ENOBUFS); /* XXX */
 
 	if (hlen > MLEN)
 		m = m_getcl(M_NOWAIT, MT_DATA, 0);
 	else
 		m = m_get(M_NOWAIT, MT_DATA);
 	if (m == NULL)
 		return (ENOBUFS);
 	m->m_len = hlen;
 	if (hdr)
 		bcopy(hdr, mtod(m, caddr_t), hlen);
 
 	*mp = m;
 	return (0);
 }
 
 /*
  * Insert jumbo payload option.
  */
 static int
 ip6_insert_jumboopt(struct ip6_exthdrs *exthdrs, u_int32_t plen)
 {
 	struct mbuf *mopt;
 	u_char *optbuf;
 	u_int32_t v;
 
 #define JUMBOOPTLEN	8	/* length of jumbo payload option and padding */
 
 	/*
 	 * If there is no hop-by-hop options header, allocate new one.
 	 * If there is one but it doesn't have enough space to store the
 	 * jumbo payload option, allocate a cluster to store the whole options.
 	 * Otherwise, use it to store the options.
 	 */
 	if (exthdrs->ip6e_hbh == NULL) {
 		mopt = m_get(M_NOWAIT, MT_DATA);
 		if (mopt == NULL)
 			return (ENOBUFS);
 		mopt->m_len = JUMBOOPTLEN;
 		optbuf = mtod(mopt, u_char *);
 		optbuf[1] = 0;	/* = ((JUMBOOPTLEN) >> 3) - 1 */
 		exthdrs->ip6e_hbh = mopt;
 	} else {
 		struct ip6_hbh *hbh;
 
 		mopt = exthdrs->ip6e_hbh;
 		if (M_TRAILINGSPACE(mopt) < JUMBOOPTLEN) {
 			/*
 			 * XXX assumption:
 			 * - exthdrs->ip6e_hbh is not referenced from places
 			 *   other than exthdrs.
 			 * - exthdrs->ip6e_hbh is not an mbuf chain.
 			 */
 			int oldoptlen = mopt->m_len;
 			struct mbuf *n;
 
 			/*
 			 * XXX: give up if the whole (new) hbh header does
 			 * not fit even in an mbuf cluster.
 			 */
 			if (oldoptlen + JUMBOOPTLEN > MCLBYTES)
 				return (ENOBUFS);
 
 			/*
 			 * As a consequence, we must always prepare a cluster
 			 * at this point.
 			 */
 			n = m_getcl(M_NOWAIT, MT_DATA, 0);
 			if (n == NULL)
 				return (ENOBUFS);
 			n->m_len = oldoptlen + JUMBOOPTLEN;
 			bcopy(mtod(mopt, caddr_t), mtod(n, caddr_t),
 			    oldoptlen);
 			optbuf = mtod(n, caddr_t) + oldoptlen;
 			m_freem(mopt);
 			mopt = exthdrs->ip6e_hbh = n;
 		} else {
 			optbuf = mtod(mopt, u_char *) + mopt->m_len;
 			mopt->m_len += JUMBOOPTLEN;
 		}
 		optbuf[0] = IP6OPT_PADN;
 		optbuf[1] = 1;
 
 		/*
 		 * Adjust the header length according to the pad and
 		 * the jumbo payload option.
 		 */
 		hbh = mtod(mopt, struct ip6_hbh *);
 		hbh->ip6h_len += (JUMBOOPTLEN >> 3);
 	}
 
 	/* fill in the option. */
 	optbuf[2] = IP6OPT_JUMBO;
 	optbuf[3] = 4;
 	v = (u_int32_t)htonl(plen + JUMBOOPTLEN);
 	bcopy(&v, &optbuf[4], sizeof(u_int32_t));
 
 	/* finally, adjust the packet header length */
 	exthdrs->ip6e_ip6->m_pkthdr.len += JUMBOOPTLEN;
 
 	return (0);
 #undef JUMBOOPTLEN
 }
 
 /*
  * Insert fragment header and copy unfragmentable header portions.
  */
 static int
 ip6_insertfraghdr(struct mbuf *m0, struct mbuf *m, int hlen,
     struct ip6_frag **frghdrp)
 {
 	struct mbuf *n, *mlast;
 
 	if (hlen > sizeof(struct ip6_hdr)) {
 		n = m_copym(m0, sizeof(struct ip6_hdr),
 		    hlen - sizeof(struct ip6_hdr), M_NOWAIT);
 		if (n == NULL)
 			return (ENOBUFS);
 		m->m_next = n;
 	} else
 		n = m;
 
 	/* Search for the last mbuf of unfragmentable part. */
 	for (mlast = n; mlast->m_next; mlast = mlast->m_next)
 		;
 
 	if (M_WRITABLE(mlast) &&
 	    M_TRAILINGSPACE(mlast) >= sizeof(struct ip6_frag)) {
 		/* use the trailing space of the last mbuf for the fragment hdr */
 		*frghdrp = (struct ip6_frag *)(mtod(mlast, caddr_t) +
 		    mlast->m_len);
 		mlast->m_len += sizeof(struct ip6_frag);
 		m->m_pkthdr.len += sizeof(struct ip6_frag);
 	} else {
 		/* allocate a new mbuf for the fragment header */
 		struct mbuf *mfrg;
 
 		mfrg = m_get(M_NOWAIT, MT_DATA);
 		if (mfrg == NULL)
 			return (ENOBUFS);
 		mfrg->m_len = sizeof(struct ip6_frag);
 		*frghdrp = mtod(mfrg, struct ip6_frag *);
 		mlast->m_next = mfrg;
 	}
 
 	return (0);
 }
 
 /*
  * Calculates IPv6 path mtu for destination @dst.
  * Resulting MTU is stored in @mtup.
  *
  * Returns 0 on success.
  */
 static int
 ip6_getpmtu_ctl(u_int fibnum, const struct in6_addr *dst, u_long *mtup)
 {
 	struct nhop6_extended nh6;
 	struct in6_addr kdst;
 	uint32_t scopeid;
 	struct ifnet *ifp;
 	u_long mtu;
 	int error;
 
 	in6_splitscope(dst, &kdst, &scopeid);
 	if (fib6_lookup_nh_ext(fibnum, &kdst, scopeid, NHR_REF, 0, &nh6) != 0)
 		return (EHOSTUNREACH);
 
 	ifp = nh6.nh_ifp;
 	mtu = nh6.nh_mtu;
 
 	error = ip6_calcmtu(ifp, dst, mtu, mtup, NULL, 0);
 	fib6_free_nh_ext(fibnum, &nh6);
 
 	return (error);
 }
 
 /*
  * Calculates IPv6 path MTU for @dst based on transmit @ifp,
  * and cached data in @ro_pmtu.
  * MTU from (successful) route lookup is saved (along with dst)
  * inside @ro_pmtu to avoid subsequent route lookups after packet
  * filter processing.
  *
  * Stores mtu and always-frag value into @mtup and @alwaysfragp.
  * Returns 0 on success.
  */
 static int
 ip6_getpmtu(struct route_in6 *ro_pmtu, int do_lookup,
     struct ifnet *ifp, const struct in6_addr *dst, u_long *mtup,
     int *alwaysfragp, u_int fibnum, u_int proto)
 {
 	struct nhop6_basic nh6;
 	struct in6_addr kdst;
 	uint32_t scopeid;
 	struct sockaddr_in6 *sa6_dst;
 	u_long mtu;
 
 	mtu = 0;
 	if (do_lookup) {
 
 		/*
 		 * Here ro_pmtu has final destination address, while
 		 * ro might represent immediate destination.
 		 * Use ro_pmtu destination since mtu might differ.
 		 */
 		sa6_dst = (struct sockaddr_in6 *)&ro_pmtu->ro_dst;
 		if (!IN6_ARE_ADDR_EQUAL(&sa6_dst->sin6_addr, dst))
 			ro_pmtu->ro_mtu = 0;
 
 		if (ro_pmtu->ro_mtu == 0) {
 			bzero(sa6_dst, sizeof(*sa6_dst));
 			sa6_dst->sin6_family = AF_INET6;
 			sa6_dst->sin6_len = sizeof(struct sockaddr_in6);
 			sa6_dst->sin6_addr = *dst;
 
 			in6_splitscope(dst, &kdst, &scopeid);
 			if (fib6_lookup_nh_basic(fibnum, &kdst, scopeid, 0, 0,
 			    &nh6) == 0)
 				ro_pmtu->ro_mtu = nh6.nh_mtu;
 		}
 
 		mtu = ro_pmtu->ro_mtu;
 	}
 
 	if (ro_pmtu->ro_rt)
 		mtu = ro_pmtu->ro_rt->rt_mtu;
 
 	return (ip6_calcmtu(ifp, dst, mtu, mtup, alwaysfragp, proto));
 }
 
 /*
  * Calculate MTU based on transmit @ifp, route mtu @rt_mtu and
  * hostcache data for @dst.
  * Stores mtu and always-frag value into @mtup and @alwaysfragp.
  *
  * Returns 0 on success.
  */
 static int
 ip6_calcmtu(struct ifnet *ifp, const struct in6_addr *dst, u_long rt_mtu,
     u_long *mtup, int *alwaysfragp, u_int proto)
 {
 	u_long mtu = 0;
 	int alwaysfrag = 0;
 	int error = 0;
 
 	if (rt_mtu > 0) {
 		u_int32_t ifmtu;
 		struct in_conninfo inc;
 
 		bzero(&inc, sizeof(inc));
 		inc.inc_flags |= INC_ISIPV6;
 		inc.inc6_faddr = *dst;
 
 		ifmtu = IN6_LINKMTU(ifp);
 
 		/* TCP is known to react to pmtu changes so skip hc */
 		if (proto != IPPROTO_TCP)
 			mtu = tcp_hc_getmtu(&inc);
 
 		if (mtu)
 			mtu = min(mtu, rt_mtu);
 		else
 			mtu = rt_mtu;
 		if (mtu == 0)
 			mtu = ifmtu;
 		else if (mtu < IPV6_MMTU) {
 			/*
 			 * RFC2460 section 5, last paragraph:
 			 * if we record ICMPv6 too big message with
 			 * mtu < IPV6_MMTU, transmit packets sized IPV6_MMTU
 			 * or smaller, with framgent header attached.
 			 * (fragment header is needed regardless from the
 			 * packet size, for translators to identify packets)
 			 */
 			alwaysfrag = 1;
 			mtu = IPV6_MMTU;
 		}
 	} else if (ifp) {
 		mtu = IN6_LINKMTU(ifp);
 	} else
 		error = EHOSTUNREACH; /* XXX */
 
 	*mtup = mtu;
 	if (alwaysfragp)
 		*alwaysfragp = alwaysfrag;
 	return (error);
 }
 
 /*
  * IP6 socket option processing.
  */
 int
 ip6_ctloutput(struct socket *so, struct sockopt *sopt)
 {
 	int optdatalen, uproto;
 	void *optdata;
 	struct inpcb *in6p = sotoinpcb(so);
 	int error, optval;
 	int level, op, optname;
 	int optlen;
 	struct thread *td;
 #ifdef	RSS
 	uint32_t rss_bucket;
 	int retval;
 #endif
 
 	level = sopt->sopt_level;
 	op = sopt->sopt_dir;
 	optname = sopt->sopt_name;
 	optlen = sopt->sopt_valsize;
 	td = sopt->sopt_td;
 	error = 0;
 	optval = 0;
 	uproto = (int)so->so_proto->pr_protocol;
 
 	if (level != IPPROTO_IPV6) {
 		error = EINVAL;
 
 		if (sopt->sopt_level == SOL_SOCKET &&
 		    sopt->sopt_dir == SOPT_SET) {
 			switch (sopt->sopt_name) {
 			case SO_REUSEADDR:
 				INP_WLOCK(in6p);
 				if ((so->so_options & SO_REUSEADDR) != 0)
 					in6p->inp_flags2 |= INP_REUSEADDR;
 				else
 					in6p->inp_flags2 &= ~INP_REUSEADDR;
 				INP_WUNLOCK(in6p);
 				error = 0;
 				break;
 			case SO_REUSEPORT:
 				INP_WLOCK(in6p);
 				if ((so->so_options & SO_REUSEPORT) != 0)
 					in6p->inp_flags2 |= INP_REUSEPORT;
 				else
 					in6p->inp_flags2 &= ~INP_REUSEPORT;
 				INP_WUNLOCK(in6p);
 				error = 0;
 				break;
 			case SO_SETFIB:
 				INP_WLOCK(in6p);
 				in6p->inp_inc.inc_fibnum = so->so_fibnum;
 				INP_WUNLOCK(in6p);
 				error = 0;
 				break;
 			default:
 				break;
 			}
 		}
 	} else {		/* level == IPPROTO_IPV6 */
 		switch (op) {
 
 		case SOPT_SET:
 			switch (optname) {
 			case IPV6_2292PKTOPTIONS:
 #ifdef IPV6_PKTOPTIONS
 			case IPV6_PKTOPTIONS:
 #endif
 			{
 				struct mbuf *m;
 
 				error = soopt_getm(sopt, &m); /* XXX */
 				if (error != 0)
 					break;
 				error = soopt_mcopyin(sopt, m); /* XXX */
 				if (error != 0)
 					break;
 				error = ip6_pcbopts(&in6p->in6p_outputopts,
 						    m, so, sopt);
 				m_freem(m); /* XXX */
 				break;
 			}
 
 			/*
 			 * Use of some Hop-by-Hop options or some
 			 * Destination options, might require special
 			 * privilege.  That is, normal applications
 			 * (without special privilege) might be forbidden
 			 * from setting certain options in outgoing packets,
 			 * and might never see certain options in received
 			 * packets. [RFC 2292 Section 6]
 			 * KAME specific note:
 			 *  KAME prevents non-privileged users from sending or
 			 *  receiving ANY hbh/dst options in order to avoid
 			 *  overhead of parsing options in the kernel.
 			 */
 			case IPV6_RECVHOPOPTS:
 			case IPV6_RECVDSTOPTS:
 			case IPV6_RECVRTHDRDSTOPTS:
 				if (td != NULL) {
 					error = priv_check(td,
 					    PRIV_NETINET_SETHDROPTS);
 					if (error)
 						break;
 				}
 				/* FALLTHROUGH */
 			case IPV6_UNICAST_HOPS:
 			case IPV6_HOPLIMIT:
 
 			case IPV6_RECVPKTINFO:
 			case IPV6_RECVHOPLIMIT:
 			case IPV6_RECVRTHDR:
 			case IPV6_RECVPATHMTU:
 			case IPV6_RECVTCLASS:
 			case IPV6_RECVFLOWID:
 #ifdef	RSS
 			case IPV6_RECVRSSBUCKETID:
 #endif
 			case IPV6_V6ONLY:
 			case IPV6_AUTOFLOWLABEL:
 			case IPV6_BINDANY:
 			case IPV6_BINDMULTI:
 #ifdef	RSS
 			case IPV6_RSS_LISTEN_BUCKET:
 #endif
 				if (optname == IPV6_BINDANY && td != NULL) {
 					error = priv_check(td,
 					    PRIV_NETINET_BINDANY);
 					if (error)
 						break;
 				}
 
 				if (optlen != sizeof(int)) {
 					error = EINVAL;
 					break;
 				}
 				error = sooptcopyin(sopt, &optval,
 					sizeof optval, sizeof optval);
 				if (error)
 					break;
 				switch (optname) {
 
 				case IPV6_UNICAST_HOPS:
 					if (optval < -1 || optval >= 256)
 						error = EINVAL;
 					else {
 						/* -1 = kernel default */
 						in6p->in6p_hops = optval;
 						if ((in6p->inp_vflag &
 						     INP_IPV4) != 0)
 							in6p->inp_ip_ttl = optval;
 					}
 					break;
 #define OPTSET(bit) \
 do { \
 	INP_WLOCK(in6p); \
 	if (optval) \
 		in6p->inp_flags |= (bit); \
 	else \
 		in6p->inp_flags &= ~(bit); \
 	INP_WUNLOCK(in6p); \
 } while (/*CONSTCOND*/ 0)
 #define OPTSET2292(bit) \
 do { \
 	INP_WLOCK(in6p); \
 	in6p->inp_flags |= IN6P_RFC2292; \
 	if (optval) \
 		in6p->inp_flags |= (bit); \
 	else \
 		in6p->inp_flags &= ~(bit); \
 	INP_WUNLOCK(in6p); \
 } while (/*CONSTCOND*/ 0)
 #define OPTBIT(bit) (in6p->inp_flags & (bit) ? 1 : 0)
 
 #define OPTSET2(bit, val) do {						\
 	INP_WLOCK(in6p);						\
 	if (val)							\
 		in6p->inp_flags2 |= bit;				\
 	else								\
 		in6p->inp_flags2 &= ~bit;				\
 	INP_WUNLOCK(in6p);						\
 } while (0)
 #define OPTBIT2(bit) (in6p->inp_flags2 & (bit) ? 1 : 0)
 
 				case IPV6_RECVPKTINFO:
 					/* cannot mix with RFC2292 */
 					if (OPTBIT(IN6P_RFC2292)) {
 						error = EINVAL;
 						break;
 					}
 					OPTSET(IN6P_PKTINFO);
 					break;
 
 				case IPV6_HOPLIMIT:
 				{
 					struct ip6_pktopts **optp;
 
 					/* cannot mix with RFC2292 */
 					if (OPTBIT(IN6P_RFC2292)) {
 						error = EINVAL;
 						break;
 					}
 					optp = &in6p->in6p_outputopts;
 					error = ip6_pcbopt(IPV6_HOPLIMIT,
 					    (u_char *)&optval, sizeof(optval),
 					    optp, (td != NULL) ? td->td_ucred :
 					    NULL, uproto);
 					break;
 				}
 
 				case IPV6_RECVHOPLIMIT:
 					/* cannot mix with RFC2292 */
 					if (OPTBIT(IN6P_RFC2292)) {
 						error = EINVAL;
 						break;
 					}
 					OPTSET(IN6P_HOPLIMIT);
 					break;
 
 				case IPV6_RECVHOPOPTS:
 					/* cannot mix with RFC2292 */
 					if (OPTBIT(IN6P_RFC2292)) {
 						error = EINVAL;
 						break;
 					}
 					OPTSET(IN6P_HOPOPTS);
 					break;
 
 				case IPV6_RECVDSTOPTS:
 					/* cannot mix with RFC2292 */
 					if (OPTBIT(IN6P_RFC2292)) {
 						error = EINVAL;
 						break;
 					}
 					OPTSET(IN6P_DSTOPTS);
 					break;
 
 				case IPV6_RECVRTHDRDSTOPTS:
 					/* cannot mix with RFC2292 */
 					if (OPTBIT(IN6P_RFC2292)) {
 						error = EINVAL;
 						break;
 					}
 					OPTSET(IN6P_RTHDRDSTOPTS);
 					break;
 
 				case IPV6_RECVRTHDR:
 					/* cannot mix with RFC2292 */
 					if (OPTBIT(IN6P_RFC2292)) {
 						error = EINVAL;
 						break;
 					}
 					OPTSET(IN6P_RTHDR);
 					break;
 
 				case IPV6_RECVPATHMTU:
 					/*
 					 * We ignore this option for TCP
 					 * sockets.
 					 * (RFC3542 leaves this case
 					 * unspecified.)
 					 */
 					if (uproto != IPPROTO_TCP)
 						OPTSET(IN6P_MTU);
 					break;
 
 				case IPV6_RECVFLOWID:
 					OPTSET2(INP_RECVFLOWID, optval);
 					break;
 
 #ifdef	RSS
 				case IPV6_RECVRSSBUCKETID:
 					OPTSET2(INP_RECVRSSBUCKETID, optval);
 					break;
 #endif
 
 				case IPV6_V6ONLY:
 					/*
 					 * make setsockopt(IPV6_V6ONLY)
 					 * available only prior to bind(2).
 					 * see ipng mailing list, Jun 22 2001.
 					 */
 					if (in6p->inp_lport ||
 					    !IN6_IS_ADDR_UNSPECIFIED(&in6p->in6p_laddr)) {
 						error = EINVAL;
 						break;
 					}
 					OPTSET(IN6P_IPV6_V6ONLY);
 					if (optval)
 						in6p->inp_vflag &= ~INP_IPV4;
 					else
 						in6p->inp_vflag |= INP_IPV4;
 					break;
 				case IPV6_RECVTCLASS:
 					/* cannot mix with RFC2292 XXX */
 					if (OPTBIT(IN6P_RFC2292)) {
 						error = EINVAL;
 						break;
 					}
 					OPTSET(IN6P_TCLASS);
 					break;
 				case IPV6_AUTOFLOWLABEL:
 					OPTSET(IN6P_AUTOFLOWLABEL);
 					break;
 
 				case IPV6_BINDANY:
 					OPTSET(INP_BINDANY);
 					break;
 
 				case IPV6_BINDMULTI:
 					OPTSET2(INP_BINDMULTI, optval);
 					break;
 #ifdef	RSS
 				case IPV6_RSS_LISTEN_BUCKET:
 					if ((optval >= 0) &&
 					    (optval < rss_getnumbuckets())) {
 						in6p->inp_rss_listen_bucket = optval;
 						OPTSET2(INP_RSS_BUCKET_SET, 1);
 					} else {
 						error = EINVAL;
 					}
 					break;
 #endif
 				}
 				break;
 
 			case IPV6_TCLASS:
 			case IPV6_DONTFRAG:
 			case IPV6_USE_MIN_MTU:
 			case IPV6_PREFER_TEMPADDR:
 				if (optlen != sizeof(optval)) {
 					error = EINVAL;
 					break;
 				}
 				error = sooptcopyin(sopt, &optval,
 					sizeof optval, sizeof optval);
 				if (error)
 					break;
 				{
 					struct ip6_pktopts **optp;
 					optp = &in6p->in6p_outputopts;
 					error = ip6_pcbopt(optname,
 					    (u_char *)&optval, sizeof(optval),
 					    optp, (td != NULL) ? td->td_ucred :
 					    NULL, uproto);
 					break;
 				}
 
 			case IPV6_2292PKTINFO:
 			case IPV6_2292HOPLIMIT:
 			case IPV6_2292HOPOPTS:
 			case IPV6_2292DSTOPTS:
 			case IPV6_2292RTHDR:
 				/* RFC 2292 */
 				if (optlen != sizeof(int)) {
 					error = EINVAL;
 					break;
 				}
 				error = sooptcopyin(sopt, &optval,
 					sizeof optval, sizeof optval);
 				if (error)
 					break;
 				switch (optname) {
 				case IPV6_2292PKTINFO:
 					OPTSET2292(IN6P_PKTINFO);
 					break;
 				case IPV6_2292HOPLIMIT:
 					OPTSET2292(IN6P_HOPLIMIT);
 					break;
 				case IPV6_2292HOPOPTS:
 					/*
 					 * Check super-user privilege.
 					 * See comments for IPV6_RECVHOPOPTS.
 					 */
 					if (td != NULL) {
 						error = priv_check(td,
 						    PRIV_NETINET_SETHDROPTS);
 						if (error)
 							return (error);
 					}
 					OPTSET2292(IN6P_HOPOPTS);
 					break;
 				case IPV6_2292DSTOPTS:
 					if (td != NULL) {
 						error = priv_check(td,
 						    PRIV_NETINET_SETHDROPTS);
 						if (error)
 							return (error);
 					}
 					OPTSET2292(IN6P_DSTOPTS|IN6P_RTHDRDSTOPTS); /* XXX */
 					break;
 				case IPV6_2292RTHDR:
 					OPTSET2292(IN6P_RTHDR);
 					break;
 				}
 				break;
 			case IPV6_PKTINFO:
 			case IPV6_HOPOPTS:
 			case IPV6_RTHDR:
 			case IPV6_DSTOPTS:
 			case IPV6_RTHDRDSTOPTS:
 			case IPV6_NEXTHOP:
 			{
 				/* new advanced API (RFC3542) */
 				u_char *optbuf;
 				u_char optbuf_storage[MCLBYTES];
 				int optlen;
 				struct ip6_pktopts **optp;
 
 				/* cannot mix with RFC2292 */
 				if (OPTBIT(IN6P_RFC2292)) {
 					error = EINVAL;
 					break;
 				}
 
 				/*
 				 * We only ensure valsize is not too large
 				 * here.  Further validation will be done
 				 * later.
 				 */
 				error = sooptcopyin(sopt, optbuf_storage,
 				    sizeof(optbuf_storage), 0);
 				if (error)
 					break;
 				optlen = sopt->sopt_valsize;
 				optbuf = optbuf_storage;
 				optp = &in6p->in6p_outputopts;
 				error = ip6_pcbopt(optname, optbuf, optlen,
 				    optp, (td != NULL) ? td->td_ucred : NULL,
 				    uproto);
 				break;
 			}
 #undef OPTSET
 
 			case IPV6_MULTICAST_IF:
 			case IPV6_MULTICAST_HOPS:
 			case IPV6_MULTICAST_LOOP:
 			case IPV6_JOIN_GROUP:
 			case IPV6_LEAVE_GROUP:
 			case IPV6_MSFILTER:
 			case MCAST_BLOCK_SOURCE:
 			case MCAST_UNBLOCK_SOURCE:
 			case MCAST_JOIN_GROUP:
 			case MCAST_LEAVE_GROUP:
 			case MCAST_JOIN_SOURCE_GROUP:
 			case MCAST_LEAVE_SOURCE_GROUP:
 				error = ip6_setmoptions(in6p, sopt);
 				break;
 
 			case IPV6_PORTRANGE:
 				error = sooptcopyin(sopt, &optval,
 				    sizeof optval, sizeof optval);
 				if (error)
 					break;
 
 				INP_WLOCK(in6p);
 				switch (optval) {
 				case IPV6_PORTRANGE_DEFAULT:
 					in6p->inp_flags &= ~(INP_LOWPORT);
 					in6p->inp_flags &= ~(INP_HIGHPORT);
 					break;
 
 				case IPV6_PORTRANGE_HIGH:
 					in6p->inp_flags &= ~(INP_LOWPORT);
 					in6p->inp_flags |= INP_HIGHPORT;
 					break;
 
 				case IPV6_PORTRANGE_LOW:
 					in6p->inp_flags &= ~(INP_HIGHPORT);
 					in6p->inp_flags |= INP_LOWPORT;
 					break;
 
 				default:
 					error = EINVAL;
 					break;
 				}
 				INP_WUNLOCK(in6p);
 				break;
 
 #ifdef IPSEC
 			case IPV6_IPSEC_POLICY:
 			{
 				caddr_t req;
 				struct mbuf *m;
 
 				if ((error = soopt_getm(sopt, &m)) != 0) /* XXX */
 					break;
 				if ((error = soopt_mcopyin(sopt, m)) != 0) /* XXX */
 					break;
 				req = mtod(m, caddr_t);
 				error = ipsec_set_policy(in6p, optname, req,
 				    m->m_len, (sopt->sopt_td != NULL) ?
 				    sopt->sopt_td->td_ucred : NULL);
 				m_freem(m);
 				break;
 			}
 #endif /* IPSEC */
 
 			default:
 				error = ENOPROTOOPT;
 				break;
 			}
 			break;
 
 		case SOPT_GET:
 			switch (optname) {
 
 			case IPV6_2292PKTOPTIONS:
 #ifdef IPV6_PKTOPTIONS
 			case IPV6_PKTOPTIONS:
 #endif
 				/*
 				 * RFC3542 (effectively) deprecated the
 				 * semantics of the 2292-style pktoptions.
 				 * Since it was not reliable in nature (i.e.,
 				 * applications had to expect the lack of some
 				 * information after all), it would make sense
 				 * to simplify this part by always returning
 				 * empty data.
 				 */
 				sopt->sopt_valsize = 0;
 				break;
 
 			case IPV6_RECVHOPOPTS:
 			case IPV6_RECVDSTOPTS:
 			case IPV6_RECVRTHDRDSTOPTS:
 			case IPV6_UNICAST_HOPS:
 			case IPV6_RECVPKTINFO:
 			case IPV6_RECVHOPLIMIT:
 			case IPV6_RECVRTHDR:
 			case IPV6_RECVPATHMTU:
 
 			case IPV6_V6ONLY:
 			case IPV6_PORTRANGE:
 			case IPV6_RECVTCLASS:
 			case IPV6_AUTOFLOWLABEL:
 			case IPV6_BINDANY:
 			case IPV6_FLOWID:
 			case IPV6_FLOWTYPE:
 			case IPV6_RECVFLOWID:
 #ifdef	RSS
 			case IPV6_RSSBUCKETID:
 			case IPV6_RECVRSSBUCKETID:
 #endif
 			case IPV6_BINDMULTI:
 				switch (optname) {
 
 				case IPV6_RECVHOPOPTS:
 					optval = OPTBIT(IN6P_HOPOPTS);
 					break;
 
 				case IPV6_RECVDSTOPTS:
 					optval = OPTBIT(IN6P_DSTOPTS);
 					break;
 
 				case IPV6_RECVRTHDRDSTOPTS:
 					optval = OPTBIT(IN6P_RTHDRDSTOPTS);
 					break;
 
 				case IPV6_UNICAST_HOPS:
 					optval = in6p->in6p_hops;
 					break;
 
 				case IPV6_RECVPKTINFO:
 					optval = OPTBIT(IN6P_PKTINFO);
 					break;
 
 				case IPV6_RECVHOPLIMIT:
 					optval = OPTBIT(IN6P_HOPLIMIT);
 					break;
 
 				case IPV6_RECVRTHDR:
 					optval = OPTBIT(IN6P_RTHDR);
 					break;
 
 				case IPV6_RECVPATHMTU:
 					optval = OPTBIT(IN6P_MTU);
 					break;
 
 				case IPV6_V6ONLY:
 					optval = OPTBIT(IN6P_IPV6_V6ONLY);
 					break;
 
 				case IPV6_PORTRANGE:
 				    {
 					int flags;
 					flags = in6p->inp_flags;
 					if (flags & INP_HIGHPORT)
 						optval = IPV6_PORTRANGE_HIGH;
 					else if (flags & INP_LOWPORT)
 						optval = IPV6_PORTRANGE_LOW;
 					else
 						optval = 0;
 					break;
 				    }
 				case IPV6_RECVTCLASS:
 					optval = OPTBIT(IN6P_TCLASS);
 					break;
 
 				case IPV6_AUTOFLOWLABEL:
 					optval = OPTBIT(IN6P_AUTOFLOWLABEL);
 					break;
 
 				case IPV6_BINDANY:
 					optval = OPTBIT(INP_BINDANY);
 					break;
 
 				case IPV6_FLOWID:
 					optval = in6p->inp_flowid;
 					break;
 
 				case IPV6_FLOWTYPE:
 					optval = in6p->inp_flowtype;
 					break;
 
 				case IPV6_RECVFLOWID:
 					optval = OPTBIT2(INP_RECVFLOWID);
 					break;
 #ifdef	RSS
 				case IPV6_RSSBUCKETID:
 					retval =
 					    rss_hash2bucket(in6p->inp_flowid,
 					    in6p->inp_flowtype,
 					    &rss_bucket);
 					if (retval == 0)
 						optval = rss_bucket;
 					else
 						error = EINVAL;
 					break;
 
 				case IPV6_RECVRSSBUCKETID:
 					optval = OPTBIT2(INP_RECVRSSBUCKETID);
 					break;
 #endif
 
 				case IPV6_BINDMULTI:
 					optval = OPTBIT2(INP_BINDMULTI);
 					break;
 
 				}
 				if (error)
 					break;
 				error = sooptcopyout(sopt, &optval,
 					sizeof optval);
 				break;
 
 			case IPV6_PATHMTU:
 			{
 				u_long pmtu = 0;
 				struct ip6_mtuinfo mtuinfo;
 
 				if (!(so->so_state & SS_ISCONNECTED))
 					return (ENOTCONN);
 				/*
 				 * XXX: we dot not consider the case of source
 				 * routing, or optional information to specify
 				 * the outgoing interface.
 				 */
 				error = ip6_getpmtu_ctl(so->so_fibnum,
 				    &in6p->in6p_faddr, &pmtu);
 				if (error)
 					break;
 				if (pmtu > IPV6_MAXPACKET)
 					pmtu = IPV6_MAXPACKET;
 
 				bzero(&mtuinfo, sizeof(mtuinfo));
 				mtuinfo.ip6m_mtu = (u_int32_t)pmtu;
 				optdata = (void *)&mtuinfo;
 				optdatalen = sizeof(mtuinfo);
 				error = sooptcopyout(sopt, optdata,
 				    optdatalen);
 				break;
 			}
 
 			case IPV6_2292PKTINFO:
 			case IPV6_2292HOPLIMIT:
 			case IPV6_2292HOPOPTS:
 			case IPV6_2292RTHDR:
 			case IPV6_2292DSTOPTS:
 				switch (optname) {
 				case IPV6_2292PKTINFO:
 					optval = OPTBIT(IN6P_PKTINFO);
 					break;
 				case IPV6_2292HOPLIMIT:
 					optval = OPTBIT(IN6P_HOPLIMIT);
 					break;
 				case IPV6_2292HOPOPTS:
 					optval = OPTBIT(IN6P_HOPOPTS);
 					break;
 				case IPV6_2292RTHDR:
 					optval = OPTBIT(IN6P_RTHDR);
 					break;
 				case IPV6_2292DSTOPTS:
 					optval = OPTBIT(IN6P_DSTOPTS|IN6P_RTHDRDSTOPTS);
 					break;
 				}
 				error = sooptcopyout(sopt, &optval,
 				    sizeof optval);
 				break;
 			case IPV6_PKTINFO:
 			case IPV6_HOPOPTS:
 			case IPV6_RTHDR:
 			case IPV6_DSTOPTS:
 			case IPV6_RTHDRDSTOPTS:
 			case IPV6_NEXTHOP:
 			case IPV6_TCLASS:
 			case IPV6_DONTFRAG:
 			case IPV6_USE_MIN_MTU:
 			case IPV6_PREFER_TEMPADDR:
 				error = ip6_getpcbopt(in6p->in6p_outputopts,
 				    optname, sopt);
 				break;
 
 			case IPV6_MULTICAST_IF:
 			case IPV6_MULTICAST_HOPS:
 			case IPV6_MULTICAST_LOOP:
 			case IPV6_MSFILTER:
 				error = ip6_getmoptions(in6p, sopt);
 				break;
 
 #ifdef IPSEC
 			case IPV6_IPSEC_POLICY:
 			  {
 				caddr_t req = NULL;
 				size_t len = 0;
 				struct mbuf *m = NULL;
 				struct mbuf **mp = &m;
 				size_t ovalsize = sopt->sopt_valsize;
 				caddr_t oval = (caddr_t)sopt->sopt_val;
 
 				error = soopt_getm(sopt, &m); /* XXX */
 				if (error != 0)
 					break;
 				error = soopt_mcopyin(sopt, m); /* XXX */
 				if (error != 0)
 					break;
 				sopt->sopt_valsize = ovalsize;
 				sopt->sopt_val = oval;
 				if (m) {
 					req = mtod(m, caddr_t);
 					len = m->m_len;
 				}
 				error = ipsec_get_policy(in6p, req, len, mp);
 				if (error == 0)
 					error = soopt_mcopyout(sopt, m); /* XXX */
 				if (error == 0 && m)
 					m_freem(m);
 				break;
 			  }
 #endif /* IPSEC */
 
 			default:
 				error = ENOPROTOOPT;
 				break;
 			}
 			break;
 		}
 	}
 	return (error);
 }
 
 int
 ip6_raw_ctloutput(struct socket *so, struct sockopt *sopt)
 {
 	int error = 0, optval, optlen;
 	const int icmp6off = offsetof(struct icmp6_hdr, icmp6_cksum);
 	struct inpcb *in6p = sotoinpcb(so);
 	int level, op, optname;
 
 	level = sopt->sopt_level;
 	op = sopt->sopt_dir;
 	optname = sopt->sopt_name;
 	optlen = sopt->sopt_valsize;
 
 	if (level != IPPROTO_IPV6) {
 		return (EINVAL);
 	}
 
 	switch (optname) {
 	case IPV6_CHECKSUM:
 		/*
 		 * For ICMPv6 sockets, no modification allowed for checksum
 		 * offset, permit "no change" values to help existing apps.
 		 *
 		 * RFC3542 says: "An attempt to set IPV6_CHECKSUM
 		 * for an ICMPv6 socket will fail."
 		 * The current behavior does not meet RFC3542.
 		 */
 		switch (op) {
 		case SOPT_SET:
 			if (optlen != sizeof(int)) {
 				error = EINVAL;
 				break;
 			}
 			error = sooptcopyin(sopt, &optval, sizeof(optval),
 					    sizeof(optval));
 			if (error)
 				break;
 			if ((optval % 2) != 0) {
 				/* the API assumes even offset values */
 				error = EINVAL;
 			} else if (so->so_proto->pr_protocol ==
 			    IPPROTO_ICMPV6) {
 				if (optval != icmp6off)
 					error = EINVAL;
 			} else
 				in6p->in6p_cksum = optval;
 			break;
 
 		case SOPT_GET:
 			if (so->so_proto->pr_protocol == IPPROTO_ICMPV6)
 				optval = icmp6off;
 			else
 				optval = in6p->in6p_cksum;
 
 			error = sooptcopyout(sopt, &optval, sizeof(optval));
 			break;
 
 		default:
 			error = EINVAL;
 			break;
 		}
 		break;
 
 	default:
 		error = ENOPROTOOPT;
 		break;
 	}
 
 	return (error);
 }
 
 /*
  * Set up IP6 options in pcb for insertion in output packets or
  * specifying behavior of outgoing packets.
  */
 static int
 ip6_pcbopts(struct ip6_pktopts **pktopt, struct mbuf *m,
     struct socket *so, struct sockopt *sopt)
 {
 	struct ip6_pktopts *opt = *pktopt;
 	int error = 0;
 	struct thread *td = sopt->sopt_td;
 
 	/* turn off any old options. */
 	if (opt) {
 #ifdef DIAGNOSTIC
 		if (opt->ip6po_pktinfo || opt->ip6po_nexthop ||
 		    opt->ip6po_hbh || opt->ip6po_dest1 || opt->ip6po_dest2 ||
 		    opt->ip6po_rhinfo.ip6po_rhi_rthdr)
 			printf("ip6_pcbopts: all specified options are cleared.\n");
 #endif
 		ip6_clearpktopts(opt, -1);
 	} else
 		opt = malloc(sizeof(*opt), M_IP6OPT, M_WAITOK);
 	*pktopt = NULL;
 
 	if (!m || m->m_len == 0) {
 		/*
 		 * Only turning off any previous options, regardless of
 		 * whether the opt is just created or given.
 		 */
 		free(opt, M_IP6OPT);
 		return (0);
 	}
 
 	/*  set options specified by user. */
 	if ((error = ip6_setpktopts(m, opt, NULL, (td != NULL) ?
 	    td->td_ucred : NULL, so->so_proto->pr_protocol)) != 0) {
 		ip6_clearpktopts(opt, -1); /* XXX: discard all options */
 		free(opt, M_IP6OPT);
 		return (error);
 	}
 	*pktopt = opt;
 	return (0);
 }
 
 /*
  * initialize ip6_pktopts.  beware that there are non-zero default values in
  * the struct.
  */
 void
 ip6_initpktopts(struct ip6_pktopts *opt)
 {
 
 	bzero(opt, sizeof(*opt));
 	opt->ip6po_hlim = -1;	/* -1 means default hop limit */
 	opt->ip6po_tclass = -1;	/* -1 means default traffic class */
 	opt->ip6po_minmtu = IP6PO_MINMTU_MCASTONLY;
 	opt->ip6po_prefer_tempaddr = IP6PO_TEMPADDR_SYSTEM;
 }
 
 static int
 ip6_pcbopt(int optname, u_char *buf, int len, struct ip6_pktopts **pktopt,
     struct ucred *cred, int uproto)
 {
 	struct ip6_pktopts *opt;
 
 	if (*pktopt == NULL) {
 		*pktopt = malloc(sizeof(struct ip6_pktopts), M_IP6OPT,
 		    M_WAITOK);
 		ip6_initpktopts(*pktopt);
 	}
 	opt = *pktopt;
 
 	return (ip6_setpktopt(optname, buf, len, opt, cred, 1, 0, uproto));
 }
 
 static int
 ip6_getpcbopt(struct ip6_pktopts *pktopt, int optname, struct sockopt *sopt)
 {
 	void *optdata = NULL;
 	int optdatalen = 0;
 	struct ip6_ext *ip6e;
 	int error = 0;
 	struct in6_pktinfo null_pktinfo;
 	int deftclass = 0, on;
 	int defminmtu = IP6PO_MINMTU_MCASTONLY;
 	int defpreftemp = IP6PO_TEMPADDR_SYSTEM;
 
 	switch (optname) {
 	case IPV6_PKTINFO:
 		optdata = (void *)&null_pktinfo;
 		if (pktopt && pktopt->ip6po_pktinfo) {
 			bcopy(pktopt->ip6po_pktinfo, &null_pktinfo,
 			    sizeof(null_pktinfo));
 			in6_clearscope(&null_pktinfo.ipi6_addr);
 		} else {
 			/* XXX: we don't have to do this every time... */
 			bzero(&null_pktinfo, sizeof(null_pktinfo));
 		}
 		optdatalen = sizeof(struct in6_pktinfo);
 		break;
 	case IPV6_TCLASS:
 		if (pktopt && pktopt->ip6po_tclass >= 0)
 			optdata = (void *)&pktopt->ip6po_tclass;
 		else
 			optdata = (void *)&deftclass;
 		optdatalen = sizeof(int);
 		break;
 	case IPV6_HOPOPTS:
 		if (pktopt && pktopt->ip6po_hbh) {
 			optdata = (void *)pktopt->ip6po_hbh;
 			ip6e = (struct ip6_ext *)pktopt->ip6po_hbh;
 			optdatalen = (ip6e->ip6e_len + 1) << 3;
 		}
 		break;
 	case IPV6_RTHDR:
 		if (pktopt && pktopt->ip6po_rthdr) {
 			optdata = (void *)pktopt->ip6po_rthdr;
 			ip6e = (struct ip6_ext *)pktopt->ip6po_rthdr;
 			optdatalen = (ip6e->ip6e_len + 1) << 3;
 		}
 		break;
 	case IPV6_RTHDRDSTOPTS:
 		if (pktopt && pktopt->ip6po_dest1) {
 			optdata = (void *)pktopt->ip6po_dest1;
 			ip6e = (struct ip6_ext *)pktopt->ip6po_dest1;
 			optdatalen = (ip6e->ip6e_len + 1) << 3;
 		}
 		break;
 	case IPV6_DSTOPTS:
 		if (pktopt && pktopt->ip6po_dest2) {
 			optdata = (void *)pktopt->ip6po_dest2;
 			ip6e = (struct ip6_ext *)pktopt->ip6po_dest2;
 			optdatalen = (ip6e->ip6e_len + 1) << 3;
 		}
 		break;
 	case IPV6_NEXTHOP:
 		if (pktopt && pktopt->ip6po_nexthop) {
 			optdata = (void *)pktopt->ip6po_nexthop;
 			optdatalen = pktopt->ip6po_nexthop->sa_len;
 		}
 		break;
 	case IPV6_USE_MIN_MTU:
 		if (pktopt)
 			optdata = (void *)&pktopt->ip6po_minmtu;
 		else
 			optdata = (void *)&defminmtu;
 		optdatalen = sizeof(int);
 		break;
 	case IPV6_DONTFRAG:
 		if (pktopt && ((pktopt->ip6po_flags) & IP6PO_DONTFRAG))
 			on = 1;
 		else
 			on = 0;
 		optdata = (void *)&on;
 		optdatalen = sizeof(on);
 		break;
 	case IPV6_PREFER_TEMPADDR:
 		if (pktopt)
 			optdata = (void *)&pktopt->ip6po_prefer_tempaddr;
 		else
 			optdata = (void *)&defpreftemp;
 		optdatalen = sizeof(int);
 		break;
 	default:		/* should not happen */
 #ifdef DIAGNOSTIC
 		panic("ip6_getpcbopt: unexpected option\n");
 #endif
 		return (ENOPROTOOPT);
 	}
 
 	error = sooptcopyout(sopt, optdata, optdatalen);
 
 	return (error);
 }
 
 void
 ip6_clearpktopts(struct ip6_pktopts *pktopt, int optname)
 {
 	if (pktopt == NULL)
 		return;
 
 	if (optname == -1 || optname == IPV6_PKTINFO) {
 		if (pktopt->ip6po_pktinfo)
 			free(pktopt->ip6po_pktinfo, M_IP6OPT);
 		pktopt->ip6po_pktinfo = NULL;
 	}
 	if (optname == -1 || optname == IPV6_HOPLIMIT)
 		pktopt->ip6po_hlim = -1;
 	if (optname == -1 || optname == IPV6_TCLASS)
 		pktopt->ip6po_tclass = -1;
 	if (optname == -1 || optname == IPV6_NEXTHOP) {
 		if (pktopt->ip6po_nextroute.ro_rt) {
 			RTFREE(pktopt->ip6po_nextroute.ro_rt);
 			pktopt->ip6po_nextroute.ro_rt = NULL;
 		}
 		if (pktopt->ip6po_nexthop)
 			free(pktopt->ip6po_nexthop, M_IP6OPT);
 		pktopt->ip6po_nexthop = NULL;
 	}
 	if (optname == -1 || optname == IPV6_HOPOPTS) {
 		if (pktopt->ip6po_hbh)
 			free(pktopt->ip6po_hbh, M_IP6OPT);
 		pktopt->ip6po_hbh = NULL;
 	}
 	if (optname == -1 || optname == IPV6_RTHDRDSTOPTS) {
 		if (pktopt->ip6po_dest1)
 			free(pktopt->ip6po_dest1, M_IP6OPT);
 		pktopt->ip6po_dest1 = NULL;
 	}
 	if (optname == -1 || optname == IPV6_RTHDR) {
 		if (pktopt->ip6po_rhinfo.ip6po_rhi_rthdr)
 			free(pktopt->ip6po_rhinfo.ip6po_rhi_rthdr, M_IP6OPT);
 		pktopt->ip6po_rhinfo.ip6po_rhi_rthdr = NULL;
 		if (pktopt->ip6po_route.ro_rt) {
 			RTFREE(pktopt->ip6po_route.ro_rt);
 			pktopt->ip6po_route.ro_rt = NULL;
 		}
 	}
 	if (optname == -1 || optname == IPV6_DSTOPTS) {
 		if (pktopt->ip6po_dest2)
 			free(pktopt->ip6po_dest2, M_IP6OPT);
 		pktopt->ip6po_dest2 = NULL;
 	}
 }
 
 #define PKTOPT_EXTHDRCPY(type) \
 do {\
 	if (src->type) {\
 		int hlen = (((struct ip6_ext *)src->type)->ip6e_len + 1) << 3;\
 		dst->type = malloc(hlen, M_IP6OPT, canwait);\
 		if (dst->type == NULL && canwait == M_NOWAIT)\
 			goto bad;\
 		bcopy(src->type, dst->type, hlen);\
 	}\
 } while (/*CONSTCOND*/ 0)
 
 static int
 copypktopts(struct ip6_pktopts *dst, struct ip6_pktopts *src, int canwait)
 {
 	if (dst == NULL || src == NULL)  {
 		printf("ip6_clearpktopts: invalid argument\n");
 		return (EINVAL);
 	}
 
 	dst->ip6po_hlim = src->ip6po_hlim;
 	dst->ip6po_tclass = src->ip6po_tclass;
 	dst->ip6po_flags = src->ip6po_flags;
 	dst->ip6po_minmtu = src->ip6po_minmtu;
 	dst->ip6po_prefer_tempaddr = src->ip6po_prefer_tempaddr;
 	if (src->ip6po_pktinfo) {
 		dst->ip6po_pktinfo = malloc(sizeof(*dst->ip6po_pktinfo),
 		    M_IP6OPT, canwait);
 		if (dst->ip6po_pktinfo == NULL)
 			goto bad;
 		*dst->ip6po_pktinfo = *src->ip6po_pktinfo;
 	}
 	if (src->ip6po_nexthop) {
 		dst->ip6po_nexthop = malloc(src->ip6po_nexthop->sa_len,
 		    M_IP6OPT, canwait);
 		if (dst->ip6po_nexthop == NULL)
 			goto bad;
 		bcopy(src->ip6po_nexthop, dst->ip6po_nexthop,
 		    src->ip6po_nexthop->sa_len);
 	}
 	PKTOPT_EXTHDRCPY(ip6po_hbh);
 	PKTOPT_EXTHDRCPY(ip6po_dest1);
 	PKTOPT_EXTHDRCPY(ip6po_dest2);
 	PKTOPT_EXTHDRCPY(ip6po_rthdr); /* not copy the cached route */
 	return (0);
 
   bad:
 	ip6_clearpktopts(dst, -1);
 	return (ENOBUFS);
 }
 #undef PKTOPT_EXTHDRCPY
 
 struct ip6_pktopts *
 ip6_copypktopts(struct ip6_pktopts *src, int canwait)
 {
 	int error;
 	struct ip6_pktopts *dst;
 
 	dst = malloc(sizeof(*dst), M_IP6OPT, canwait);
 	if (dst == NULL)
 		return (NULL);
 	ip6_initpktopts(dst);
 
 	if ((error = copypktopts(dst, src, canwait)) != 0) {
 		free(dst, M_IP6OPT);
 		return (NULL);
 	}
 
 	return (dst);
 }
 
 void
 ip6_freepcbopts(struct ip6_pktopts *pktopt)
 {
 	if (pktopt == NULL)
 		return;
 
 	ip6_clearpktopts(pktopt, -1);
 
 	free(pktopt, M_IP6OPT);
 }
 
 /*
  * Set IPv6 outgoing packet options based on advanced API.
  */
 int
 ip6_setpktopts(struct mbuf *control, struct ip6_pktopts *opt,
     struct ip6_pktopts *stickyopt, struct ucred *cred, int uproto)
 {
 	struct cmsghdr *cm = NULL;
 
 	if (control == NULL || opt == NULL)
 		return (EINVAL);
 
 	ip6_initpktopts(opt);
 	if (stickyopt) {
 		int error;
 
 		/*
 		 * If stickyopt is provided, make a local copy of the options
 		 * for this particular packet, then override them by ancillary
 		 * objects.
 		 * XXX: copypktopts() does not copy the cached route to a next
 		 * hop (if any).  This is not very good in terms of efficiency,
 		 * but we can allow this since this option should be rarely
 		 * used.
 		 */
 		if ((error = copypktopts(opt, stickyopt, M_NOWAIT)) != 0)
 			return (error);
 	}
 
 	/*
 	 * XXX: Currently, we assume all the optional information is stored
 	 * in a single mbuf.
 	 */
 	if (control->m_next)
 		return (EINVAL);
 
 	for (; control->m_len > 0; control->m_data += CMSG_ALIGN(cm->cmsg_len),
 	    control->m_len -= CMSG_ALIGN(cm->cmsg_len)) {
 		int error;
 
 		if (control->m_len < CMSG_LEN(0))
 			return (EINVAL);
 
 		cm = mtod(control, struct cmsghdr *);
 		if (cm->cmsg_len == 0 || cm->cmsg_len > control->m_len)
 			return (EINVAL);
 		if (cm->cmsg_level != IPPROTO_IPV6)
 			continue;
 
 		error = ip6_setpktopt(cm->cmsg_type, CMSG_DATA(cm),
 		    cm->cmsg_len - CMSG_LEN(0), opt, cred, 0, 1, uproto);
 		if (error)
 			return (error);
 	}
 
 	return (0);
 }
 
 /*
  * Set a particular packet option, as a sticky option or an ancillary data
  * item.  "len" can be 0 only when it's a sticky option.
  * We have 4 cases of combination of "sticky" and "cmsg":
  * "sticky=0, cmsg=0": impossible
  * "sticky=0, cmsg=1": RFC2292 or RFC3542 ancillary data
  * "sticky=1, cmsg=0": RFC3542 socket option
  * "sticky=1, cmsg=1": RFC2292 socket option
  */
 static int
 ip6_setpktopt(int optname, u_char *buf, int len, struct ip6_pktopts *opt,
     struct ucred *cred, int sticky, int cmsg, int uproto)
 {
 	int minmtupolicy, preftemp;
 	int error;
 
 	if (!sticky && !cmsg) {
 #ifdef DIAGNOSTIC
 		printf("ip6_setpktopt: impossible case\n");
 #endif
 		return (EINVAL);
 	}
 
 	/*
 	 * IPV6_2292xxx is for backward compatibility to RFC2292, and should
 	 * not be specified in the context of RFC3542.  Conversely,
 	 * RFC3542 types should not be specified in the context of RFC2292.
 	 */
 	if (!cmsg) {
 		switch (optname) {
 		case IPV6_2292PKTINFO:
 		case IPV6_2292HOPLIMIT:
 		case IPV6_2292NEXTHOP:
 		case IPV6_2292HOPOPTS:
 		case IPV6_2292DSTOPTS:
 		case IPV6_2292RTHDR:
 		case IPV6_2292PKTOPTIONS:
 			return (ENOPROTOOPT);
 		}
 	}
 	if (sticky && cmsg) {
 		switch (optname) {
 		case IPV6_PKTINFO:
 		case IPV6_HOPLIMIT:
 		case IPV6_NEXTHOP:
 		case IPV6_HOPOPTS:
 		case IPV6_DSTOPTS:
 		case IPV6_RTHDRDSTOPTS:
 		case IPV6_RTHDR:
 		case IPV6_USE_MIN_MTU:
 		case IPV6_DONTFRAG:
 		case IPV6_TCLASS:
 		case IPV6_PREFER_TEMPADDR: /* XXX: not an RFC3542 option */
 			return (ENOPROTOOPT);
 		}
 	}
 
 	switch (optname) {
 	case IPV6_2292PKTINFO:
 	case IPV6_PKTINFO:
 	{
 		struct ifnet *ifp = NULL;
 		struct in6_pktinfo *pktinfo;
 
 		if (len != sizeof(struct in6_pktinfo))
 			return (EINVAL);
 
 		pktinfo = (struct in6_pktinfo *)buf;
 
 		/*
 		 * An application can clear any sticky IPV6_PKTINFO option by
 		 * doing a "regular" setsockopt with ipi6_addr being
 		 * in6addr_any and ipi6_ifindex being zero.
 		 * [RFC 3542, Section 6]
 		 */
 		if (optname == IPV6_PKTINFO && opt->ip6po_pktinfo &&
 		    pktinfo->ipi6_ifindex == 0 &&
 		    IN6_IS_ADDR_UNSPECIFIED(&pktinfo->ipi6_addr)) {
 			ip6_clearpktopts(opt, optname);
 			break;
 		}
 
 		if (uproto == IPPROTO_TCP && optname == IPV6_PKTINFO &&
 		    sticky && !IN6_IS_ADDR_UNSPECIFIED(&pktinfo->ipi6_addr)) {
 			return (EINVAL);
 		}
 		if (IN6_IS_ADDR_MULTICAST(&pktinfo->ipi6_addr))
 			return (EINVAL);
 		/* validate the interface index if specified. */
 		if (pktinfo->ipi6_ifindex > V_if_index)
 			 return (ENXIO);
 		if (pktinfo->ipi6_ifindex) {
 			ifp = ifnet_byindex(pktinfo->ipi6_ifindex);
 			if (ifp == NULL)
 				return (ENXIO);
 		}
 		if (ifp != NULL && (ifp->if_afdata[AF_INET6] == NULL ||
 		    (ND_IFINFO(ifp)->flags & ND6_IFF_IFDISABLED) != 0))
 			return (ENETDOWN);
 
 		if (ifp != NULL &&
 		    !IN6_IS_ADDR_UNSPECIFIED(&pktinfo->ipi6_addr)) {
 			struct in6_ifaddr *ia;
 
 			in6_setscope(&pktinfo->ipi6_addr, ifp, NULL);
 			ia = in6ifa_ifpwithaddr(ifp, &pktinfo->ipi6_addr);
 			if (ia == NULL)
 				return (EADDRNOTAVAIL);
 			ifa_free(&ia->ia_ifa);
 		}
 		/*
 		 * We store the address anyway, and let in6_selectsrc()
 		 * validate the specified address.  This is because ipi6_addr
 		 * may not have enough information about its scope zone, and
 		 * we may need additional information (such as outgoing
 		 * interface or the scope zone of a destination address) to
 		 * disambiguate the scope.
 		 * XXX: the delay of the validation may confuse the
 		 * application when it is used as a sticky option.
 		 */
 		if (opt->ip6po_pktinfo == NULL) {
 			opt->ip6po_pktinfo = malloc(sizeof(*pktinfo),
 			    M_IP6OPT, M_NOWAIT);
 			if (opt->ip6po_pktinfo == NULL)
 				return (ENOBUFS);
 		}
 		bcopy(pktinfo, opt->ip6po_pktinfo, sizeof(*pktinfo));
 		break;
 	}
 
 	case IPV6_2292HOPLIMIT:
 	case IPV6_HOPLIMIT:
 	{
 		int *hlimp;
 
 		/*
 		 * RFC 3542 deprecated the usage of sticky IPV6_HOPLIMIT
 		 * to simplify the ordering among hoplimit options.
 		 */
 		if (optname == IPV6_HOPLIMIT && sticky)
 			return (ENOPROTOOPT);
 
 		if (len != sizeof(int))
 			return (EINVAL);
 		hlimp = (int *)buf;
 		if (*hlimp < -1 || *hlimp > 255)
 			return (EINVAL);
 
 		opt->ip6po_hlim = *hlimp;
 		break;
 	}
 
 	case IPV6_TCLASS:
 	{
 		int tclass;
 
 		if (len != sizeof(int))
 			return (EINVAL);
 		tclass = *(int *)buf;
 		if (tclass < -1 || tclass > 255)
 			return (EINVAL);
 
 		opt->ip6po_tclass = tclass;
 		break;
 	}
 
 	case IPV6_2292NEXTHOP:
 	case IPV6_NEXTHOP:
 		if (cred != NULL) {
 			error = priv_check_cred(cred,
 			    PRIV_NETINET_SETHDROPTS, 0);
 			if (error)
 				return (error);
 		}
 
 		if (len == 0) {	/* just remove the option */
 			ip6_clearpktopts(opt, IPV6_NEXTHOP);
 			break;
 		}
 
 		/* check if cmsg_len is large enough for sa_len */
 		if (len < sizeof(struct sockaddr) || len < *buf)
 			return (EINVAL);
 
 		switch (((struct sockaddr *)buf)->sa_family) {
 		case AF_INET6:
 		{
 			struct sockaddr_in6 *sa6 = (struct sockaddr_in6 *)buf;
 			int error;
 
 			if (sa6->sin6_len != sizeof(struct sockaddr_in6))
 				return (EINVAL);
 
 			if (IN6_IS_ADDR_UNSPECIFIED(&sa6->sin6_addr) ||
 			    IN6_IS_ADDR_MULTICAST(&sa6->sin6_addr)) {
 				return (EINVAL);
 			}
 			if ((error = sa6_embedscope(sa6, V_ip6_use_defzone))
 			    != 0) {
 				return (error);
 			}
 			break;
 		}
 		case AF_LINK:	/* should eventually be supported */
 		default:
 			return (EAFNOSUPPORT);
 		}
 
 		/* turn off the previous option, then set the new option. */
 		ip6_clearpktopts(opt, IPV6_NEXTHOP);
 		opt->ip6po_nexthop = malloc(*buf, M_IP6OPT, M_NOWAIT);
 		if (opt->ip6po_nexthop == NULL)
 			return (ENOBUFS);
 		bcopy(buf, opt->ip6po_nexthop, *buf);
 		break;
 
 	case IPV6_2292HOPOPTS:
 	case IPV6_HOPOPTS:
 	{
 		struct ip6_hbh *hbh;
 		int hbhlen;
 
 		/*
 		 * XXX: We don't allow a non-privileged user to set ANY HbH
 		 * options, since per-option restriction has too much
 		 * overhead.
 		 */
 		if (cred != NULL) {
 			error = priv_check_cred(cred,
 			    PRIV_NETINET_SETHDROPTS, 0);
 			if (error)
 				return (error);
 		}
 
 		if (len == 0) {
 			ip6_clearpktopts(opt, IPV6_HOPOPTS);
 			break;	/* just remove the option */
 		}
 
 		/* message length validation */
 		if (len < sizeof(struct ip6_hbh))
 			return (EINVAL);
 		hbh = (struct ip6_hbh *)buf;
 		hbhlen = (hbh->ip6h_len + 1) << 3;
 		if (len != hbhlen)
 			return (EINVAL);
 
 		/* turn off the previous option, then set the new option. */
 		ip6_clearpktopts(opt, IPV6_HOPOPTS);
 		opt->ip6po_hbh = malloc(hbhlen, M_IP6OPT, M_NOWAIT);
 		if (opt->ip6po_hbh == NULL)
 			return (ENOBUFS);
 		bcopy(hbh, opt->ip6po_hbh, hbhlen);
 
 		break;
 	}
 
 	case IPV6_2292DSTOPTS:
 	case IPV6_DSTOPTS:
 	case IPV6_RTHDRDSTOPTS:
 	{
 		struct ip6_dest *dest, **newdest = NULL;
 		int destlen;
 
 		if (cred != NULL) { /* XXX: see the comment for IPV6_HOPOPTS */
 			error = priv_check_cred(cred,
 			    PRIV_NETINET_SETHDROPTS, 0);
 			if (error)
 				return (error);
 		}
 
 		if (len == 0) {
 			ip6_clearpktopts(opt, optname);
 			break;	/* just remove the option */
 		}
 
 		/* message length validation */
 		if (len < sizeof(struct ip6_dest))
 			return (EINVAL);
 		dest = (struct ip6_dest *)buf;
 		destlen = (dest->ip6d_len + 1) << 3;
 		if (len != destlen)
 			return (EINVAL);
 
 		/*
 		 * Determine the position that the destination options header
 		 * should be inserted; before or after the routing header.
 		 */
 		switch (optname) {
 		case IPV6_2292DSTOPTS:
 			/*
 			 * The old advacned API is ambiguous on this point.
 			 * Our approach is to determine the position based
 			 * according to the existence of a routing header.
 			 * Note, however, that this depends on the order of the
 			 * extension headers in the ancillary data; the 1st
 			 * part of the destination options header must appear
 			 * before the routing header in the ancillary data,
 			 * too.
 			 * RFC3542 solved the ambiguity by introducing
 			 * separate ancillary data or option types.
 			 */
 			if (opt->ip6po_rthdr == NULL)
 				newdest = &opt->ip6po_dest1;
 			else
 				newdest = &opt->ip6po_dest2;
 			break;
 		case IPV6_RTHDRDSTOPTS:
 			newdest = &opt->ip6po_dest1;
 			break;
 		case IPV6_DSTOPTS:
 			newdest = &opt->ip6po_dest2;
 			break;
 		}
 
 		/* turn off the previous option, then set the new option. */
 		ip6_clearpktopts(opt, optname);
 		*newdest = malloc(destlen, M_IP6OPT, M_NOWAIT);
 		if (*newdest == NULL)
 			return (ENOBUFS);
 		bcopy(dest, *newdest, destlen);
 
 		break;
 	}
 
 	case IPV6_2292RTHDR:
 	case IPV6_RTHDR:
 	{
 		struct ip6_rthdr *rth;
 		int rthlen;
 
 		if (len == 0) {
 			ip6_clearpktopts(opt, IPV6_RTHDR);
 			break;	/* just remove the option */
 		}
 
 		/* message length validation */
 		if (len < sizeof(struct ip6_rthdr))
 			return (EINVAL);
 		rth = (struct ip6_rthdr *)buf;
 		rthlen = (rth->ip6r_len + 1) << 3;
 		if (len != rthlen)
 			return (EINVAL);
 
 		switch (rth->ip6r_type) {
 		case IPV6_RTHDR_TYPE_0:
 			if (rth->ip6r_len == 0)	/* must contain one addr */
 				return (EINVAL);
 			if (rth->ip6r_len % 2) /* length must be even */
 				return (EINVAL);
 			if (rth->ip6r_len / 2 != rth->ip6r_segleft)
 				return (EINVAL);
 			break;
 		default:
 			return (EINVAL);	/* not supported */
 		}
 
 		/* turn off the previous option */
 		ip6_clearpktopts(opt, IPV6_RTHDR);
 		opt->ip6po_rthdr = malloc(rthlen, M_IP6OPT, M_NOWAIT);
 		if (opt->ip6po_rthdr == NULL)
 			return (ENOBUFS);
 		bcopy(rth, opt->ip6po_rthdr, rthlen);
 
 		break;
 	}
 
 	case IPV6_USE_MIN_MTU:
 		if (len != sizeof(int))
 			return (EINVAL);
 		minmtupolicy = *(int *)buf;
 		if (minmtupolicy != IP6PO_MINMTU_MCASTONLY &&
 		    minmtupolicy != IP6PO_MINMTU_DISABLE &&
 		    minmtupolicy != IP6PO_MINMTU_ALL) {
 			return (EINVAL);
 		}
 		opt->ip6po_minmtu = minmtupolicy;
 		break;
 
 	case IPV6_DONTFRAG:
 		if (len != sizeof(int))
 			return (EINVAL);
 
 		if (uproto == IPPROTO_TCP || *(int *)buf == 0) {
 			/*
 			 * we ignore this option for TCP sockets.
 			 * (RFC3542 leaves this case unspecified.)
 			 */
 			opt->ip6po_flags &= ~IP6PO_DONTFRAG;
 		} else
 			opt->ip6po_flags |= IP6PO_DONTFRAG;
 		break;
 
 	case IPV6_PREFER_TEMPADDR:
 		if (len != sizeof(int))
 			return (EINVAL);
 		preftemp = *(int *)buf;
 		if (preftemp != IP6PO_TEMPADDR_SYSTEM &&
 		    preftemp != IP6PO_TEMPADDR_NOTPREFER &&
 		    preftemp != IP6PO_TEMPADDR_PREFER) {
 			return (EINVAL);
 		}
 		opt->ip6po_prefer_tempaddr = preftemp;
 		break;
 
 	default:
 		return (ENOPROTOOPT);
 	} /* end of switch */
 
 	return (0);
 }
 
 /*
  * Routine called from ip6_output() to loop back a copy of an IP6 multicast
  * packet to the input queue of a specified interface.  Note that this
  * calls the output routine of the loopback "driver", but with an interface
  * pointer that might NOT be &loif -- easier than replicating that code here.
  */
 void
 ip6_mloopback(struct ifnet *ifp, struct mbuf *m)
 {
 	struct mbuf *copym;
 	struct ip6_hdr *ip6;
 
-	copym = m_copy(m, 0, M_COPYALL);
+	copym = m_copym(m, 0, M_COPYALL, M_NOWAIT);
 	if (copym == NULL)
 		return;
 
 	/*
 	 * Make sure to deep-copy IPv6 header portion in case the data
 	 * is in an mbuf cluster, so that we can safely override the IPv6
 	 * header portion later.
 	 */
 	if (!M_WRITABLE(copym) ||
 	    copym->m_len < sizeof(struct ip6_hdr)) {
 		copym = m_pullup(copym, sizeof(struct ip6_hdr));
 		if (copym == NULL)
 			return;
 	}
 	ip6 = mtod(copym, struct ip6_hdr *);
 	/*
 	 * clear embedded scope identifiers if necessary.
 	 * in6_clearscope will touch the addresses only when necessary.
 	 */
 	in6_clearscope(&ip6->ip6_src);
 	in6_clearscope(&ip6->ip6_dst);
 	if (copym->m_pkthdr.csum_flags & CSUM_DELAY_DATA_IPV6) {
 		copym->m_pkthdr.csum_flags |= CSUM_DATA_VALID_IPV6 |
 		    CSUM_PSEUDO_HDR;
 		copym->m_pkthdr.csum_data = 0xffff;
 	}
 	if_simloop(ifp, copym, AF_INET6, 0);
 }
 
 /*
  * Chop IPv6 header off from the payload.
  */
 static int
 ip6_splithdr(struct mbuf *m, struct ip6_exthdrs *exthdrs)
 {
 	struct mbuf *mh;
 	struct ip6_hdr *ip6;
 
 	ip6 = mtod(m, struct ip6_hdr *);
 	if (m->m_len > sizeof(*ip6)) {
 		mh = m_gethdr(M_NOWAIT, MT_DATA);
 		if (mh == NULL) {
 			m_freem(m);
 			return ENOBUFS;
 		}
 		m_move_pkthdr(mh, m);
 		M_ALIGN(mh, sizeof(*ip6));
 		m->m_len -= sizeof(*ip6);
 		m->m_data += sizeof(*ip6);
 		mh->m_next = m;
 		m = mh;
 		m->m_len = sizeof(*ip6);
 		bcopy((caddr_t)ip6, mtod(m, caddr_t), sizeof(*ip6));
 	}
 	exthdrs->ip6e_ip6 = m;
 	return 0;
 }
 
 /*
  * Compute IPv6 extension header length.
  */
 int
 ip6_optlen(struct inpcb *in6p)
 {
 	int len;
 
 	if (!in6p->in6p_outputopts)
 		return 0;
 
 	len = 0;
 #define elen(x) \
     (((struct ip6_ext *)(x)) ? (((struct ip6_ext *)(x))->ip6e_len + 1) << 3 : 0)
 
 	len += elen(in6p->in6p_outputopts->ip6po_hbh);
 	if (in6p->in6p_outputopts->ip6po_rthdr)
 		/* dest1 is valid with rthdr only */
 		len += elen(in6p->in6p_outputopts->ip6po_dest1);
 	len += elen(in6p->in6p_outputopts->ip6po_rthdr);
 	len += elen(in6p->in6p_outputopts->ip6po_dest2);
 	return len;
 #undef elen
 }
Index: head/sys/netinet6/raw_ip6.c
===================================================================
--- head/sys/netinet6/raw_ip6.c	(revision 305823)
+++ head/sys/netinet6/raw_ip6.c	(revision 305824)
@@ -1,905 +1,905 @@
 /*-
  * Copyright (C) 1995, 1996, 1997, and 1998 WIDE Project.
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 3. Neither the name of the project nor the names of its contributors
  *    may be used to endorse or promote products derived from this software
  *    without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE PROJECT AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE PROJECT OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  */
 
 /*-
  * Copyright (c) 1982, 1986, 1988, 1993
  *	The Regents of the University of California.
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 4. Neither the name of the University nor the names of its contributors
  *    may be used to endorse or promote products derived from this software
  *    without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  *	@(#)raw_ip.c	8.2 (Berkeley) 1/4/94
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include "opt_ipsec.h"
 #include "opt_inet6.h"
 
 #include <sys/param.h>
 #include <sys/errno.h>
 #include <sys/jail.h>
 #include <sys/kernel.h>
 #include <sys/lock.h>
 #include <sys/malloc.h>
 #include <sys/mbuf.h>
 #include <sys/priv.h>
 #include <sys/proc.h>
 #include <sys/protosw.h>
 #include <sys/signalvar.h>
 #include <sys/socket.h>
 #include <sys/socketvar.h>
 #include <sys/sx.h>
 #include <sys/syslog.h>
 
 #include <net/if.h>
 #include <net/if_var.h>
 #include <net/if_types.h>
 #include <net/route.h>
 #include <net/vnet.h>
 
 #include <netinet/in.h>
 #include <netinet/in_var.h>
 #include <netinet/in_systm.h>
 #include <netinet/in_pcb.h>
 
 #include <netinet/icmp6.h>
 #include <netinet/ip6.h>
 #include <netinet/ip_var.h>
 #include <netinet6/ip6protosw.h>
 #include <netinet6/ip6_mroute.h>
 #include <netinet6/in6_pcb.h>
 #include <netinet6/ip6_var.h>
 #include <netinet6/nd6.h>
 #include <netinet6/raw_ip6.h>
 #include <netinet6/scope6_var.h>
 #include <netinet6/send.h>
 
 #ifdef IPSEC
 #include <netipsec/ipsec.h>
 #include <netipsec/ipsec6.h>
 #endif /* IPSEC */
 
 #include <machine/stdarg.h>
 
 #define	satosin6(sa)	((struct sockaddr_in6 *)(sa))
 #define	ifatoia6(ifa)	((struct in6_ifaddr *)(ifa))
 
 /*
  * Raw interface to IP6 protocol.
  */
 
 VNET_DECLARE(struct inpcbhead, ripcb);
 VNET_DECLARE(struct inpcbinfo, ripcbinfo);
 #define	V_ripcb				VNET(ripcb)
 #define	V_ripcbinfo			VNET(ripcbinfo)
 
 extern u_long	rip_sendspace;
 extern u_long	rip_recvspace;
 
 VNET_PCPUSTAT_DEFINE(struct rip6stat, rip6stat);
 VNET_PCPUSTAT_SYSINIT(rip6stat);
 
 #ifdef VIMAGE
 VNET_PCPUSTAT_SYSUNINIT(rip6stat);
 #endif /* VIMAGE */
 
 /*
  * Hooks for multicast routing. They all default to NULL, so leave them not
  * initialized and rely on BSS being set to 0.
  */
 
 /*
  * The socket used to communicate with the multicast routing daemon.
  */
 VNET_DEFINE(struct socket *, ip6_mrouter);
 
 /*
  * The various mrouter functions.
  */
 int (*ip6_mrouter_set)(struct socket *, struct sockopt *);
 int (*ip6_mrouter_get)(struct socket *, struct sockopt *);
 int (*ip6_mrouter_done)(void);
 int (*ip6_mforward)(struct ip6_hdr *, struct ifnet *, struct mbuf *);
 int (*mrt6_ioctl)(u_long, caddr_t);
 
 /*
  * Setup generic address and protocol structures for raw_input routine, then
  * pass them along with mbuf chain.
  */
 int
 rip6_input(struct mbuf **mp, int *offp, int proto)
 {
 	struct ifnet *ifp;
 	struct mbuf *m = *mp;
 	register struct ip6_hdr *ip6 = mtod(m, struct ip6_hdr *);
 	register struct inpcb *in6p;
 	struct inpcb *last = NULL;
 	struct mbuf *opts = NULL;
 	struct sockaddr_in6 fromsa;
 
 	RIP6STAT_INC(rip6s_ipackets);
 
 	init_sin6(&fromsa, m); /* general init */
 
 	ifp = m->m_pkthdr.rcvif;
 
 	INP_INFO_RLOCK(&V_ripcbinfo);
 	LIST_FOREACH(in6p, &V_ripcb, inp_list) {
 		/* XXX inp locking */
 		if ((in6p->inp_vflag & INP_IPV6) == 0)
 			continue;
 		if (in6p->inp_ip_p &&
 		    in6p->inp_ip_p != proto)
 			continue;
 		if (!IN6_IS_ADDR_UNSPECIFIED(&in6p->in6p_laddr) &&
 		    !IN6_ARE_ADDR_EQUAL(&in6p->in6p_laddr, &ip6->ip6_dst))
 			continue;
 		if (!IN6_IS_ADDR_UNSPECIFIED(&in6p->in6p_faddr) &&
 		    !IN6_ARE_ADDR_EQUAL(&in6p->in6p_faddr, &ip6->ip6_src))
 			continue;
 		if (jailed_without_vnet(in6p->inp_cred)) {
 			/*
 			 * Allow raw socket in jail to receive multicast;
 			 * assume process had PRIV_NETINET_RAW at attach,
 			 * and fall through into normal filter path if so.
 			 */
 			if (!IN6_IS_ADDR_MULTICAST(&ip6->ip6_dst) &&
 			    prison_check_ip6(in6p->inp_cred,
 			    &ip6->ip6_dst) != 0)
 				continue;
 		}
 		INP_RLOCK(in6p);
 		if (in6p->in6p_cksum != -1) {
 			RIP6STAT_INC(rip6s_isum);
 			if (in6_cksum(m, proto, *offp,
 			    m->m_pkthdr.len - *offp)) {
 				INP_RUNLOCK(in6p);
 				RIP6STAT_INC(rip6s_badsum);
 				continue;
 			}
 		}
 		/*
 		 * If this raw socket has multicast state, and we
 		 * have received a multicast, check if this socket
 		 * should receive it, as multicast filtering is now
 		 * the responsibility of the transport layer.
 		 */
 		if (in6p->in6p_moptions &&
 		    IN6_IS_ADDR_MULTICAST(&ip6->ip6_dst)) {
 			/*
 			 * If the incoming datagram is for MLD, allow it
 			 * through unconditionally to the raw socket.
 			 *
 			 * Use the M_RTALERT_MLD flag to check for MLD
 			 * traffic without having to inspect the mbuf chain
 			 * more deeply, as all MLDv1/v2 host messages MUST
 			 * contain the Router Alert option.
 			 *
 			 * In the case of MLDv1, we may not have explicitly
 			 * joined the group, and may have set IFF_ALLMULTI
 			 * on the interface. im6o_mc_filter() may discard
 			 * control traffic we actually need to see.
 			 *
 			 * Userland multicast routing daemons should continue
 			 * filter the control traffic appropriately.
 			 */
 			int blocked;
 
 			blocked = MCAST_PASS;
 			if ((m->m_flags & M_RTALERT_MLD) == 0) {
 				struct sockaddr_in6 mcaddr;
 
 				bzero(&mcaddr, sizeof(struct sockaddr_in6));
 				mcaddr.sin6_len = sizeof(struct sockaddr_in6);
 				mcaddr.sin6_family = AF_INET6;
 				mcaddr.sin6_addr = ip6->ip6_dst;
 
 				blocked = im6o_mc_filter(in6p->in6p_moptions,
 				    ifp,
 				    (struct sockaddr *)&mcaddr,
 				    (struct sockaddr *)&fromsa);
 			}
 			if (blocked != MCAST_PASS) {
 				IP6STAT_INC(ip6s_notmember);
 				INP_RUNLOCK(in6p);
 				continue;
 			}
 		}
 		if (last != NULL) {
-			struct mbuf *n = m_copy(m, 0, (int)M_COPYALL);
+			struct mbuf *n = m_copym(m, 0, M_COPYALL, M_NOWAIT);
 
 #ifdef IPSEC
 			/*
 			 * Check AH/ESP integrity.
 			 */
 			if (n && ipsec6_in_reject(n, last)) {
 				m_freem(n);
 				/* Do not inject data into pcb. */
 			} else
 #endif /* IPSEC */
 			if (n) {
 				if (last->inp_flags & INP_CONTROLOPTS ||
 				    last->inp_socket->so_options & SO_TIMESTAMP)
 					ip6_savecontrol(last, n, &opts);
 				/* strip intermediate headers */
 				m_adj(n, *offp);
 				if (sbappendaddr(&last->inp_socket->so_rcv,
 						(struct sockaddr *)&fromsa,
 						 n, opts) == 0) {
 					m_freem(n);
 					if (opts)
 						m_freem(opts);
 					RIP6STAT_INC(rip6s_fullsock);
 				} else
 					sorwakeup(last->inp_socket);
 				opts = NULL;
 			}
 			INP_RUNLOCK(last);
 		}
 		last = in6p;
 	}
 	INP_INFO_RUNLOCK(&V_ripcbinfo);
 #ifdef IPSEC
 	/*
 	 * Check AH/ESP integrity.
 	 */
 	if ((last != NULL) && ipsec6_in_reject(m, last)) {
 		m_freem(m);
 		IP6STAT_DEC(ip6s_delivered);
 		/* Do not inject data into pcb. */
 		INP_RUNLOCK(last);
 	} else
 #endif /* IPSEC */
 	if (last != NULL) {
 		if (last->inp_flags & INP_CONTROLOPTS ||
 		    last->inp_socket->so_options & SO_TIMESTAMP)
 			ip6_savecontrol(last, m, &opts);
 		/* Strip intermediate headers. */
 		m_adj(m, *offp);
 		if (sbappendaddr(&last->inp_socket->so_rcv,
 		    (struct sockaddr *)&fromsa, m, opts) == 0) {
 			m_freem(m);
 			if (opts)
 				m_freem(opts);
 			RIP6STAT_INC(rip6s_fullsock);
 		} else
 			sorwakeup(last->inp_socket);
 		INP_RUNLOCK(last);
 	} else {
 		RIP6STAT_INC(rip6s_nosock);
 		if (m->m_flags & M_MCAST)
 			RIP6STAT_INC(rip6s_nosockmcast);
 		if (proto == IPPROTO_NONE)
 			m_freem(m);
 		else {
 			char *prvnxtp = ip6_get_prevhdr(m, *offp); /* XXX */
 			icmp6_error(m, ICMP6_PARAM_PROB,
 			    ICMP6_PARAMPROB_NEXTHEADER,
 			    prvnxtp - mtod(m, char *));
 		}
 		IP6STAT_DEC(ip6s_delivered);
 	}
 	return (IPPROTO_DONE);
 }
 
 void
 rip6_ctlinput(int cmd, struct sockaddr *sa, void *d)
 {
 	struct ip6_hdr *ip6;
 	struct mbuf *m;
 	int off = 0;
 	struct ip6ctlparam *ip6cp = NULL;
 	const struct sockaddr_in6 *sa6_src = NULL;
 	void *cmdarg;
 	struct inpcb *(*notify)(struct inpcb *, int) = in6_rtchange;
 
 	if (sa->sa_family != AF_INET6 ||
 	    sa->sa_len != sizeof(struct sockaddr_in6))
 		return;
 
 	if ((unsigned)cmd >= PRC_NCMDS)
 		return;
 	if (PRC_IS_REDIRECT(cmd))
 		notify = in6_rtchange, d = NULL;
 	else if (cmd == PRC_HOSTDEAD)
 		d = NULL;
 	else if (inet6ctlerrmap[cmd] == 0)
 		return;
 
 	/*
 	 * If the parameter is from icmp6, decode it.
 	 */
 	if (d != NULL) {
 		ip6cp = (struct ip6ctlparam *)d;
 		m = ip6cp->ip6c_m;
 		ip6 = ip6cp->ip6c_ip6;
 		off = ip6cp->ip6c_off;
 		cmdarg = ip6cp->ip6c_cmdarg;
 		sa6_src = ip6cp->ip6c_src;
 	} else {
 		m = NULL;
 		ip6 = NULL;
 		cmdarg = NULL;
 		sa6_src = &sa6_any;
 	}
 
 	(void) in6_pcbnotify(&V_ripcbinfo, sa, 0,
 	    (const struct sockaddr *)sa6_src, 0, cmd, cmdarg, notify);
 }
 
 /*
  * Generate IPv6 header and pass packet to ip6_output.  Tack on options user
  * may have setup with control call.
  */
 int
 rip6_output(struct mbuf *m, struct socket *so, ...)
 {
 	struct mbuf *control;
 	struct m_tag *mtag;
 	struct sockaddr_in6 *dstsock;
 	struct in6_addr *dst;
 	struct ip6_hdr *ip6;
 	struct inpcb *in6p;
 	u_int	plen = m->m_pkthdr.len;
 	int error = 0;
 	struct ip6_pktopts opt, *optp;
 	struct ifnet *oifp = NULL;
 	int type = 0, code = 0;		/* for ICMPv6 output statistics only */
 	int scope_ambiguous = 0;
 	int use_defzone = 0;
 	int hlim = 0;
 	struct in6_addr in6a;
 	va_list ap;
 
 	va_start(ap, so);
 	dstsock = va_arg(ap, struct sockaddr_in6 *);
 	control = va_arg(ap, struct mbuf *);
 	va_end(ap);
 
 	in6p = sotoinpcb(so);
 	INP_WLOCK(in6p);
 
 	dst = &dstsock->sin6_addr;
 	if (control != NULL) {
 		if ((error = ip6_setpktopts(control, &opt,
 		    in6p->in6p_outputopts, so->so_cred,
 		    so->so_proto->pr_protocol)) != 0) {
 			goto bad;
 		}
 		optp = &opt;
 	} else
 		optp = in6p->in6p_outputopts;
 
 	/*
 	 * Check and convert scope zone ID into internal form.
 	 *
 	 * XXX: we may still need to determine the zone later.
 	 */
 	if (!(so->so_state & SS_ISCONNECTED)) {
 		if (!optp || !optp->ip6po_pktinfo ||
 		    !optp->ip6po_pktinfo->ipi6_ifindex)
 			use_defzone = V_ip6_use_defzone;
 		if (dstsock->sin6_scope_id == 0 && !use_defzone)
 			scope_ambiguous = 1;
 		if ((error = sa6_embedscope(dstsock, use_defzone)) != 0)
 			goto bad;
 	}
 
 	/*
 	 * For an ICMPv6 packet, we should know its type and code to update
 	 * statistics.
 	 */
 	if (so->so_proto->pr_protocol == IPPROTO_ICMPV6) {
 		struct icmp6_hdr *icmp6;
 		if (m->m_len < sizeof(struct icmp6_hdr) &&
 		    (m = m_pullup(m, sizeof(struct icmp6_hdr))) == NULL) {
 			error = ENOBUFS;
 			goto bad;
 		}
 		icmp6 = mtod(m, struct icmp6_hdr *);
 		type = icmp6->icmp6_type;
 		code = icmp6->icmp6_code;
 	}
 
 	M_PREPEND(m, sizeof(*ip6), M_NOWAIT);
 	if (m == NULL) {
 		error = ENOBUFS;
 		goto bad;
 	}
 	ip6 = mtod(m, struct ip6_hdr *);
 
 	/*
 	 * Source address selection.
 	 */
 	error = in6_selectsrc_socket(dstsock, optp, in6p, so->so_cred,
 	    scope_ambiguous, &in6a, &hlim);
 
 	if (error)
 		goto bad;
 	error = prison_check_ip6(in6p->inp_cred, &in6a);
 	if (error != 0)
 		goto bad;
 	ip6->ip6_src = in6a;
 
 	ip6->ip6_dst = dstsock->sin6_addr;
 
 	/*
 	 * Fill in the rest of the IPv6 header fields.
 	 */
 	ip6->ip6_flow = (ip6->ip6_flow & ~IPV6_FLOWINFO_MASK) |
 	    (in6p->inp_flow & IPV6_FLOWINFO_MASK);
 	ip6->ip6_vfc = (ip6->ip6_vfc & ~IPV6_VERSION_MASK) |
 	    (IPV6_VERSION & IPV6_VERSION_MASK);
 
 	/*
 	 * ip6_plen will be filled in ip6_output, so not fill it here.
 	 */
 	ip6->ip6_nxt = in6p->inp_ip_p;
 	ip6->ip6_hlim = hlim;
 
 	if (so->so_proto->pr_protocol == IPPROTO_ICMPV6 ||
 	    in6p->in6p_cksum != -1) {
 		struct mbuf *n;
 		int off;
 		u_int16_t *p;
 
 		/* Compute checksum. */
 		if (so->so_proto->pr_protocol == IPPROTO_ICMPV6)
 			off = offsetof(struct icmp6_hdr, icmp6_cksum);
 		else
 			off = in6p->in6p_cksum;
 		if (plen < off + 1) {
 			error = EINVAL;
 			goto bad;
 		}
 		off += sizeof(struct ip6_hdr);
 
 		n = m;
 		while (n && n->m_len <= off) {
 			off -= n->m_len;
 			n = n->m_next;
 		}
 		if (!n)
 			goto bad;
 		p = (u_int16_t *)(mtod(n, caddr_t) + off);
 		*p = 0;
 		*p = in6_cksum(m, ip6->ip6_nxt, sizeof(*ip6), plen);
 	}
 
 	/*
 	 * Send RA/RS messages to user land for protection, before sending
 	 * them to rtadvd/rtsol.
 	 */
 	if ((send_sendso_input_hook != NULL) &&
 	    so->so_proto->pr_protocol == IPPROTO_ICMPV6) {
 		switch (type) {
 		case ND_ROUTER_ADVERT:
 		case ND_ROUTER_SOLICIT:
 			mtag = m_tag_get(PACKET_TAG_ND_OUTGOING,
 				sizeof(unsigned short), M_NOWAIT);
 			if (mtag == NULL)
 				goto bad;
 			m_tag_prepend(m, mtag);
 		}
 	}
 
 	error = ip6_output(m, optp, NULL, 0, in6p->in6p_moptions, &oifp, in6p);
 	if (so->so_proto->pr_protocol == IPPROTO_ICMPV6) {
 		if (oifp)
 			icmp6_ifoutstat_inc(oifp, type, code);
 		ICMP6STAT_INC(icp6s_outhist[type]);
 	} else
 		RIP6STAT_INC(rip6s_opackets);
 
 	goto freectl;
 
  bad:
 	if (m)
 		m_freem(m);
 
  freectl:
 	if (control != NULL) {
 		ip6_clearpktopts(&opt, -1);
 		m_freem(control);
 	}
 	INP_WUNLOCK(in6p);
 	return (error);
 }
 
 /*
  * Raw IPv6 socket option processing.
  */
 int
 rip6_ctloutput(struct socket *so, struct sockopt *sopt)
 {
 	struct inpcb *inp;
 	int error;
 
 	if (sopt->sopt_level == IPPROTO_ICMPV6)
 		/*
 		 * XXX: is it better to call icmp6_ctloutput() directly
 		 * from protosw?
 		 */
 		return (icmp6_ctloutput(so, sopt));
 	else if (sopt->sopt_level != IPPROTO_IPV6) {
 		if (sopt->sopt_level == SOL_SOCKET &&
 		    sopt->sopt_name == SO_SETFIB) {
 			inp = sotoinpcb(so);
 			INP_WLOCK(inp);
 			inp->inp_inc.inc_fibnum = so->so_fibnum;
 			INP_WUNLOCK(inp);
 			return (0);
 		}
 		return (EINVAL);
 	}
 
 	error = 0;
 
 	switch (sopt->sopt_dir) {
 	case SOPT_GET:
 		switch (sopt->sopt_name) {
 		case MRT6_INIT:
 		case MRT6_DONE:
 		case MRT6_ADD_MIF:
 		case MRT6_DEL_MIF:
 		case MRT6_ADD_MFC:
 		case MRT6_DEL_MFC:
 		case MRT6_PIM:
 			error = ip6_mrouter_get ?  ip6_mrouter_get(so, sopt) :
 			    EOPNOTSUPP;
 			break;
 		case IPV6_CHECKSUM:
 			error = ip6_raw_ctloutput(so, sopt);
 			break;
 		default:
 			error = ip6_ctloutput(so, sopt);
 			break;
 		}
 		break;
 
 	case SOPT_SET:
 		switch (sopt->sopt_name) {
 		case MRT6_INIT:
 		case MRT6_DONE:
 		case MRT6_ADD_MIF:
 		case MRT6_DEL_MIF:
 		case MRT6_ADD_MFC:
 		case MRT6_DEL_MFC:
 		case MRT6_PIM:
 			error = ip6_mrouter_set ?  ip6_mrouter_set(so, sopt) :
 			    EOPNOTSUPP;
 			break;
 		case IPV6_CHECKSUM:
 			error = ip6_raw_ctloutput(so, sopt);
 			break;
 		default:
 			error = ip6_ctloutput(so, sopt);
 			break;
 		}
 		break;
 	}
 
 	return (error);
 }
 
 static int
 rip6_attach(struct socket *so, int proto, struct thread *td)
 {
 	struct inpcb *inp;
 	struct icmp6_filter *filter;
 	int error;
 
 	inp = sotoinpcb(so);
 	KASSERT(inp == NULL, ("rip6_attach: inp != NULL"));
 
 	error = priv_check(td, PRIV_NETINET_RAW);
 	if (error)
 		return (error);
 	error = soreserve(so, rip_sendspace, rip_recvspace);
 	if (error)
 		return (error);
 	filter = malloc(sizeof(struct icmp6_filter), M_PCB, M_NOWAIT);
 	if (filter == NULL)
 		return (ENOMEM);
 	INP_INFO_WLOCK(&V_ripcbinfo);
 	error = in_pcballoc(so, &V_ripcbinfo);
 	if (error) {
 		INP_INFO_WUNLOCK(&V_ripcbinfo);
 		free(filter, M_PCB);
 		return (error);
 	}
 	inp = (struct inpcb *)so->so_pcb;
 	INP_INFO_WUNLOCK(&V_ripcbinfo);
 	inp->inp_vflag |= INP_IPV6;
 	inp->inp_ip_p = (long)proto;
 	inp->in6p_hops = -1;	/* use kernel default */
 	inp->in6p_cksum = -1;
 	inp->in6p_icmp6filt = filter;
 	ICMP6_FILTER_SETPASSALL(inp->in6p_icmp6filt);
 	INP_WUNLOCK(inp);
 	return (0);
 }
 
 static void
 rip6_detach(struct socket *so)
 {
 	struct inpcb *inp;
 
 	inp = sotoinpcb(so);
 	KASSERT(inp != NULL, ("rip6_detach: inp == NULL"));
 
 	if (so == V_ip6_mrouter && ip6_mrouter_done)
 		ip6_mrouter_done();
 	/* xxx: RSVP */
 	INP_INFO_WLOCK(&V_ripcbinfo);
 	INP_WLOCK(inp);
 	free(inp->in6p_icmp6filt, M_PCB);
 	in_pcbdetach(inp);
 	in_pcbfree(inp);
 	INP_INFO_WUNLOCK(&V_ripcbinfo);
 }
 
 /* XXXRW: This can't ever be called. */
 static void
 rip6_abort(struct socket *so)
 {
 	struct inpcb *inp;
 
 	inp = sotoinpcb(so);
 	KASSERT(inp != NULL, ("rip6_abort: inp == NULL"));
 
 	soisdisconnected(so);
 }
 
 static void
 rip6_close(struct socket *so)
 {
 	struct inpcb *inp;
 
 	inp = sotoinpcb(so);
 	KASSERT(inp != NULL, ("rip6_close: inp == NULL"));
 
 	soisdisconnected(so);
 }
 
 static int
 rip6_disconnect(struct socket *so)
 {
 	struct inpcb *inp;
 
 	inp = sotoinpcb(so);
 	KASSERT(inp != NULL, ("rip6_disconnect: inp == NULL"));
 
 	if ((so->so_state & SS_ISCONNECTED) == 0)
 		return (ENOTCONN);
 	inp->in6p_faddr = in6addr_any;
 	rip6_abort(so);
 	return (0);
 }
 
 static int
 rip6_bind(struct socket *so, struct sockaddr *nam, struct thread *td)
 {
 	struct inpcb *inp;
 	struct sockaddr_in6 *addr = (struct sockaddr_in6 *)nam;
 	struct ifaddr *ifa = NULL;
 	int error = 0;
 
 	inp = sotoinpcb(so);
 	KASSERT(inp != NULL, ("rip6_bind: inp == NULL"));
 
 	if (nam->sa_len != sizeof(*addr))
 		return (EINVAL);
 	if ((error = prison_check_ip6(td->td_ucred, &addr->sin6_addr)) != 0)
 		return (error);
 	if (TAILQ_EMPTY(&V_ifnet) || addr->sin6_family != AF_INET6)
 		return (EADDRNOTAVAIL);
 	if ((error = sa6_embedscope(addr, V_ip6_use_defzone)) != 0)
 		return (error);
 
 	if (!IN6_IS_ADDR_UNSPECIFIED(&addr->sin6_addr) &&
 	    (ifa = ifa_ifwithaddr((struct sockaddr *)addr)) == NULL)
 		return (EADDRNOTAVAIL);
 	if (ifa != NULL &&
 	    ((struct in6_ifaddr *)ifa)->ia6_flags &
 	    (IN6_IFF_ANYCAST|IN6_IFF_NOTREADY|
 	     IN6_IFF_DETACHED|IN6_IFF_DEPRECATED)) {
 		ifa_free(ifa);
 		return (EADDRNOTAVAIL);
 	}
 	if (ifa != NULL)
 		ifa_free(ifa);
 	INP_INFO_WLOCK(&V_ripcbinfo);
 	INP_WLOCK(inp);
 	inp->in6p_laddr = addr->sin6_addr;
 	INP_WUNLOCK(inp);
 	INP_INFO_WUNLOCK(&V_ripcbinfo);
 	return (0);
 }
 
 static int
 rip6_connect(struct socket *so, struct sockaddr *nam, struct thread *td)
 {
 	struct inpcb *inp;
 	struct sockaddr_in6 *addr = (struct sockaddr_in6 *)nam;
 	struct in6_addr in6a;
 	int error = 0, scope_ambiguous = 0;
 
 	inp = sotoinpcb(so);
 	KASSERT(inp != NULL, ("rip6_connect: inp == NULL"));
 
 	if (nam->sa_len != sizeof(*addr))
 		return (EINVAL);
 	if (TAILQ_EMPTY(&V_ifnet))
 		return (EADDRNOTAVAIL);
 	if (addr->sin6_family != AF_INET6)
 		return (EAFNOSUPPORT);
 
 	/*
 	 * Application should provide a proper zone ID or the use of default
 	 * zone IDs should be enabled.  Unfortunately, some applications do
 	 * not behave as it should, so we need a workaround.  Even if an
 	 * appropriate ID is not determined, we'll see if we can determine
 	 * the outgoing interface.  If we can, determine the zone ID based on
 	 * the interface below.
 	 */
 	if (addr->sin6_scope_id == 0 && !V_ip6_use_defzone)
 		scope_ambiguous = 1;
 	if ((error = sa6_embedscope(addr, V_ip6_use_defzone)) != 0)
 		return (error);
 
 	INP_INFO_WLOCK(&V_ripcbinfo);
 	INP_WLOCK(inp);
 	/* Source address selection. XXX: need pcblookup? */
 	error = in6_selectsrc_socket(addr, inp->in6p_outputopts,
 	    inp, so->so_cred, scope_ambiguous, &in6a, NULL);
 	if (error) {
 		INP_WUNLOCK(inp);
 		INP_INFO_WUNLOCK(&V_ripcbinfo);
 		return (error);
 	}
 
 	inp->in6p_faddr = addr->sin6_addr;
 	inp->in6p_laddr = in6a;
 	soisconnected(so);
 	INP_WUNLOCK(inp);
 	INP_INFO_WUNLOCK(&V_ripcbinfo);
 	return (0);
 }
 
 static int
 rip6_shutdown(struct socket *so)
 {
 	struct inpcb *inp;
 
 	inp = sotoinpcb(so);
 	KASSERT(inp != NULL, ("rip6_shutdown: inp == NULL"));
 
 	INP_WLOCK(inp);
 	socantsendmore(so);
 	INP_WUNLOCK(inp);
 	return (0);
 }
 
 static int
 rip6_send(struct socket *so, int flags, struct mbuf *m, struct sockaddr *nam,
     struct mbuf *control, struct thread *td)
 {
 	struct inpcb *inp;
 	struct sockaddr_in6 tmp;
 	struct sockaddr_in6 *dst;
 	int ret;
 
 	inp = sotoinpcb(so);
 	KASSERT(inp != NULL, ("rip6_send: inp == NULL"));
 
 	/* Always copy sockaddr to avoid overwrites. */
 	/* Unlocked read. */
 	if (so->so_state & SS_ISCONNECTED) {
 		if (nam) {
 			m_freem(m);
 			return (EISCONN);
 		}
 		/* XXX */
 		bzero(&tmp, sizeof(tmp));
 		tmp.sin6_family = AF_INET6;
 		tmp.sin6_len = sizeof(struct sockaddr_in6);
 		INP_RLOCK(inp);
 		bcopy(&inp->in6p_faddr, &tmp.sin6_addr,
 		    sizeof(struct in6_addr));
 		INP_RUNLOCK(inp);
 		dst = &tmp;
 	} else {
 		if (nam == NULL) {
 			m_freem(m);
 			return (ENOTCONN);
 		}
 		if (nam->sa_len != sizeof(struct sockaddr_in6)) {
 			m_freem(m);
 			return (EINVAL);
 		}
 		tmp = *(struct sockaddr_in6 *)nam;
 		dst = &tmp;
 
 		if (dst->sin6_family == AF_UNSPEC) {
 			/*
 			 * XXX: we allow this case for backward
 			 * compatibility to buggy applications that
 			 * rely on old (and wrong) kernel behavior.
 			 */
 			log(LOG_INFO, "rip6 SEND: address family is "
 			    "unspec. Assume AF_INET6\n");
 			dst->sin6_family = AF_INET6;
 		} else if (dst->sin6_family != AF_INET6) {
 			m_freem(m);
 			return(EAFNOSUPPORT);
 		}
 	}
 	ret = rip6_output(m, so, dst, control);
 	return (ret);
 }
 
 struct pr_usrreqs rip6_usrreqs = {
 	.pru_abort =		rip6_abort,
 	.pru_attach =		rip6_attach,
 	.pru_bind =		rip6_bind,
 	.pru_connect =		rip6_connect,
 	.pru_control =		in6_control,
 	.pru_detach =		rip6_detach,
 	.pru_disconnect =	rip6_disconnect,
 	.pru_peeraddr =		in6_getpeeraddr,
 	.pru_send =		rip6_send,
 	.pru_shutdown =		rip6_shutdown,
 	.pru_sockaddr =		in6_getsockaddr,
 	.pru_close =		rip6_close,
 };
Index: head/sys/netinet6/udp6_usrreq.c
===================================================================
--- head/sys/netinet6/udp6_usrreq.c	(revision 305823)
+++ head/sys/netinet6/udp6_usrreq.c	(revision 305824)
@@ -1,1301 +1,1302 @@
 /*-
  * Copyright (C) 1995, 1996, 1997, and 1998 WIDE Project.
  * Copyright (c) 2010-2011 Juniper Networks, Inc.
  * Copyright (c) 2014 Kevin Lo
  * All rights reserved.
  *
  * Portions of this software were developed by Robert N. M. Watson under
  * contract to Juniper Networks, Inc.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 3. Neither the name of the project nor the names of its contributors
  *    may be used to endorse or promote products derived from this software
  *    without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE PROJECT AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE PROJECT OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  *	$KAME: udp6_usrreq.c,v 1.27 2001/05/21 05:45:10 jinmei Exp $
  *	$KAME: udp6_output.c,v 1.31 2001/05/21 16:39:15 jinmei Exp $
  */
 
 /*-
  * Copyright (c) 1982, 1986, 1988, 1990, 1993, 1995
  *	The Regents of the University of California.
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 4. Neither the name of the University nor the names of its contributors
  *    may be used to endorse or promote products derived from this software
  *    without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  *	@(#)udp_usrreq.c	8.6 (Berkeley) 5/23/95
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include "opt_inet.h"
 #include "opt_inet6.h"
 #include "opt_ipsec.h"
 #include "opt_rss.h"
 
 #include <sys/param.h>
 #include <sys/jail.h>
 #include <sys/kernel.h>
 #include <sys/lock.h>
 #include <sys/mbuf.h>
 #include <sys/priv.h>
 #include <sys/proc.h>
 #include <sys/protosw.h>
 #include <sys/sdt.h>
 #include <sys/signalvar.h>
 #include <sys/socket.h>
 #include <sys/socketvar.h>
 #include <sys/sx.h>
 #include <sys/sysctl.h>
 #include <sys/syslog.h>
 #include <sys/systm.h>
 
 #include <net/if.h>
 #include <net/if_var.h>
 #include <net/if_types.h>
 #include <net/route.h>
 #include <net/rss_config.h>
 
 #include <netinet/in.h>
 #include <netinet/in_kdtrace.h>
 #include <netinet/in_pcb.h>
 #include <netinet/in_systm.h>
 #include <netinet/in_var.h>
 #include <netinet/ip.h>
 #include <netinet/ip_icmp.h>
 #include <netinet/ip6.h>
 #include <netinet/icmp_var.h>
 #include <netinet/icmp6.h>
 #include <netinet/ip_var.h>
 #include <netinet/udp.h>
 #include <netinet/udp_var.h>
 #include <netinet/udplite.h>
 
 #include <netinet6/ip6protosw.h>
 #include <netinet6/ip6_var.h>
 #include <netinet6/in6_pcb.h>
 #include <netinet6/in6_rss.h>
 #include <netinet6/udp6_var.h>
 #include <netinet6/scope6_var.h>
 
 #ifdef IPSEC
 #include <netipsec/ipsec.h>
 #include <netipsec/ipsec6.h>
 #endif /* IPSEC */
 
 #include <security/mac/mac_framework.h>
 
 /*
  * UDP protocol implementation.
  * Per RFC 768, August, 1980.
  */
 
 extern struct protosw	inetsw[];
 static void		udp6_detach(struct socket *so);
 
 static int
 udp6_append(struct inpcb *inp, struct mbuf *n, int off,
     struct sockaddr_in6 *fromsa)
 {
 	struct socket *so;
 	struct mbuf *opts;
 	struct udpcb *up;
 
 	INP_LOCK_ASSERT(inp);
 
 	/*
 	 * Engage the tunneling protocol.
 	 */
 	up = intoudpcb(inp);
 	if (up->u_tun_func != NULL) {
 		in_pcbref(inp);
 		INP_RUNLOCK(inp);
 		(*up->u_tun_func)(n, off, inp, (struct sockaddr *)fromsa,
 		    up->u_tun_ctx);
 		INP_RLOCK(inp);
 		return (in_pcbrele_rlocked(inp));
 	}
 #ifdef IPSEC
 	/* Check AH/ESP integrity. */
 	if (ipsec6_in_reject(n, inp)) {
 		m_freem(n);
 		return (0);
 	}
 #endif /* IPSEC */
 #ifdef MAC
 	if (mac_inpcb_check_deliver(inp, n) != 0) {
 		m_freem(n);
 		return (0);
 	}
 #endif
 	opts = NULL;
 	if (inp->inp_flags & INP_CONTROLOPTS ||
 	    inp->inp_socket->so_options & SO_TIMESTAMP)
 		ip6_savecontrol(inp, n, &opts);
 	m_adj(n, off + sizeof(struct udphdr));
 
 	so = inp->inp_socket;
 	SOCKBUF_LOCK(&so->so_rcv);
 	if (sbappendaddr_locked(&so->so_rcv, (struct sockaddr *)fromsa, n,
 	    opts) == 0) {
 		SOCKBUF_UNLOCK(&so->so_rcv);
 		m_freem(n);
 		if (opts)
 			m_freem(opts);
 		UDPSTAT_INC(udps_fullsock);
 	} else
 		sorwakeup_locked(so);
 	return (0);
 }
 
 int
 udp6_input(struct mbuf **mp, int *offp, int proto)
 {
 	struct mbuf *m = *mp;
 	struct ifnet *ifp;
 	struct ip6_hdr *ip6;
 	struct udphdr *uh;
 	struct inpcb *inp;
 	struct inpcbinfo *pcbinfo;
 	struct udpcb *up;
 	int off = *offp;
 	int cscov_partial;
 	int plen, ulen;
 	struct sockaddr_in6 fromsa;
 	struct m_tag *fwd_tag;
 	uint16_t uh_sum;
 	uint8_t nxt;
 
 	ifp = m->m_pkthdr.rcvif;
 	ip6 = mtod(m, struct ip6_hdr *);
 
 #ifndef PULLDOWN_TEST
 	IP6_EXTHDR_CHECK(m, off, sizeof(struct udphdr), IPPROTO_DONE);
 	ip6 = mtod(m, struct ip6_hdr *);
 	uh = (struct udphdr *)((caddr_t)ip6 + off);
 #else
 	IP6_EXTHDR_GET(uh, struct udphdr *, m, off, sizeof(*uh));
 	if (!uh)
 		return (IPPROTO_DONE);
 #endif
 
 	UDPSTAT_INC(udps_ipackets);
 
 	/*
 	 * Destination port of 0 is illegal, based on RFC768.
 	 */
 	if (uh->uh_dport == 0)
 		goto badunlocked;
 
 	plen = ntohs(ip6->ip6_plen) - off + sizeof(*ip6);
 	ulen = ntohs((u_short)uh->uh_ulen);
 
 	nxt = proto;
 	cscov_partial = (nxt == IPPROTO_UDPLITE) ? 1 : 0;
 	if (nxt == IPPROTO_UDPLITE) {
 		/* Zero means checksum over the complete packet. */
 		if (ulen == 0)
 			ulen = plen;
 		if (ulen == plen)
 			cscov_partial = 0;
 		if ((ulen < sizeof(struct udphdr)) || (ulen > plen)) {
 			/* XXX: What is the right UDPLite MIB counter? */
 			goto badunlocked;
 		}
 		if (uh->uh_sum == 0) {
 			/* XXX: What is the right UDPLite MIB counter? */
 			goto badunlocked;
 		}
 	} else {
 		if ((ulen < sizeof(struct udphdr)) || (plen != ulen)) {
 			UDPSTAT_INC(udps_badlen);
 			goto badunlocked;
 		}
 		if (uh->uh_sum == 0) {
 			UDPSTAT_INC(udps_nosum);
 			goto badunlocked;
 		}
 	}
 
 	if ((m->m_pkthdr.csum_flags & CSUM_DATA_VALID_IPV6) &&
 	    !cscov_partial) {
 		if (m->m_pkthdr.csum_flags & CSUM_PSEUDO_HDR)
 			uh_sum = m->m_pkthdr.csum_data;
 		else
 			uh_sum = in6_cksum_pseudo(ip6, ulen, nxt,
 			    m->m_pkthdr.csum_data);
 		uh_sum ^= 0xffff;
 	} else
 		uh_sum = in6_cksum_partial(m, nxt, off, plen, ulen);
 
 	if (uh_sum != 0) {
 		UDPSTAT_INC(udps_badsum);
 		goto badunlocked;
 	}
 
 	/*
 	 * Construct sockaddr format source address.
 	 */
 	init_sin6(&fromsa, m);
 	fromsa.sin6_port = uh->uh_sport;
 
 	pcbinfo = udp_get_inpcbinfo(nxt);
 	if (IN6_IS_ADDR_MULTICAST(&ip6->ip6_dst)) {
 		struct inpcb *last;
 		struct inpcbhead *pcblist;
 		struct ip6_moptions *imo;
 
 		INP_INFO_RLOCK(pcbinfo);
 		/*
 		 * In the event that laddr should be set to the link-local
 		 * address (this happens in RIPng), the multicast address
 		 * specified in the received packet will not match laddr.  To
 		 * handle this situation, matching is relaxed if the
 		 * receiving interface is the same as one specified in the
 		 * socket and if the destination multicast address matches
 		 * one of the multicast groups specified in the socket.
 		 */
 
 		/*
 		 * KAME note: traditionally we dropped udpiphdr from mbuf
 		 * here.  We need udphdr for IPsec processing so we do that
 		 * later.
 		 */
 		pcblist = udp_get_pcblist(nxt);
 		last = NULL;
 		LIST_FOREACH(inp, pcblist, inp_list) {
 			if ((inp->inp_vflag & INP_IPV6) == 0)
 				continue;
 			if (inp->inp_lport != uh->uh_dport)
 				continue;
 			if (inp->inp_fport != 0 &&
 			    inp->inp_fport != uh->uh_sport)
 				continue;
 			if (!IN6_IS_ADDR_UNSPECIFIED(&inp->in6p_laddr)) {
 				if (!IN6_ARE_ADDR_EQUAL(&inp->in6p_laddr,
 							&ip6->ip6_dst))
 					continue;
 			}
 			if (!IN6_IS_ADDR_UNSPECIFIED(&inp->in6p_faddr)) {
 				if (!IN6_ARE_ADDR_EQUAL(&inp->in6p_faddr,
 							&ip6->ip6_src) ||
 				    inp->inp_fport != uh->uh_sport)
 					continue;
 			}
 
 			/*
 			 * XXXRW: Because we weren't holding either the inpcb
 			 * or the hash lock when we checked for a match 
 			 * before, we should probably recheck now that the 
 			 * inpcb lock is (supposed to be) held.
 			 */
 
 			/*
 			 * Handle socket delivery policy for any-source
 			 * and source-specific multicast. [RFC3678]
 			 */
 			imo = inp->in6p_moptions;
 			if (imo && IN6_IS_ADDR_MULTICAST(&ip6->ip6_dst)) {
 				struct sockaddr_in6	 mcaddr;
 				int			 blocked;
 
 				INP_RLOCK(inp);
 
 				bzero(&mcaddr, sizeof(struct sockaddr_in6));
 				mcaddr.sin6_len = sizeof(struct sockaddr_in6);
 				mcaddr.sin6_family = AF_INET6;
 				mcaddr.sin6_addr = ip6->ip6_dst;
 
 				blocked = im6o_mc_filter(imo, ifp,
 					(struct sockaddr *)&mcaddr,
 					(struct sockaddr *)&fromsa);
 				if (blocked != MCAST_PASS) {
 					if (blocked == MCAST_NOTGMEMBER)
 						IP6STAT_INC(ip6s_notmember);
 					if (blocked == MCAST_NOTSMEMBER ||
 					    blocked == MCAST_MUTED)
 						UDPSTAT_INC(udps_filtermcast);
 					INP_RUNLOCK(inp); /* XXX */
 					continue;
 				}
 
 				INP_RUNLOCK(inp);
 			}
 			if (last != NULL) {
 				struct mbuf *n;
 
-				if ((n = m_copy(m, 0, M_COPYALL)) != NULL) {
+				if ((n = m_copym(m, 0, M_COPYALL, M_NOWAIT)) !=
+				    NULL) {
 					INP_RLOCK(last);
 					UDP_PROBE(receive, NULL, last, ip6,
 					    last, uh);
 					if (udp6_append(last, n, off, &fromsa))
 						goto inp_lost;
 					INP_RUNLOCK(last);
 				}
 			}
 			last = inp;
 			/*
 			 * Don't look for additional matches if this one does
 			 * not have either the SO_REUSEPORT or SO_REUSEADDR
 			 * socket options set.  This heuristic avoids
 			 * searching through all pcbs in the common case of a
 			 * non-shared port.  It assumes that an application
 			 * will never clear these options after setting them.
 			 */
 			if ((last->inp_socket->so_options &
 			     (SO_REUSEPORT|SO_REUSEADDR)) == 0)
 				break;
 		}
 
 		if (last == NULL) {
 			/*
 			 * No matching pcb found; discard datagram.  (No need
 			 * to send an ICMP Port Unreachable for a broadcast
 			 * or multicast datgram.)
 			 */
 			UDPSTAT_INC(udps_noport);
 			UDPSTAT_INC(udps_noportmcast);
 			goto badheadlocked;
 		}
 		INP_RLOCK(last);
 		INP_INFO_RUNLOCK(pcbinfo);
 		UDP_PROBE(receive, NULL, last, ip6, last, uh);
 		if (udp6_append(last, m, off, &fromsa) == 0) 
 			INP_RUNLOCK(last);
 	inp_lost:
 		return (IPPROTO_DONE);
 	}
 	/*
 	 * Locate pcb for datagram.
 	 */
 
 	/*
 	 * Grab info from PACKET_TAG_IPFORWARD tag prepended to the chain.
 	 */
 	if ((m->m_flags & M_IP6_NEXTHOP) &&
 	    (fwd_tag = m_tag_find(m, PACKET_TAG_IPFORWARD, NULL)) != NULL) {
 		struct sockaddr_in6 *next_hop6;
 
 		next_hop6 = (struct sockaddr_in6 *)(fwd_tag + 1);
 
 		/*
 		 * Transparently forwarded. Pretend to be the destination.
 		 * Already got one like this?
 		 */
 		inp = in6_pcblookup_mbuf(pcbinfo, &ip6->ip6_src,
 		    uh->uh_sport, &ip6->ip6_dst, uh->uh_dport,
 		    INPLOOKUP_RLOCKPCB, m->m_pkthdr.rcvif, m);
 		if (!inp) {
 			/*
 			 * It's new.  Try to find the ambushing socket.
 			 * Because we've rewritten the destination address,
 			 * any hardware-generated hash is ignored.
 			 */
 			inp = in6_pcblookup(pcbinfo, &ip6->ip6_src,
 			    uh->uh_sport, &next_hop6->sin6_addr,
 			    next_hop6->sin6_port ? htons(next_hop6->sin6_port) :
 			    uh->uh_dport, INPLOOKUP_WILDCARD |
 			    INPLOOKUP_RLOCKPCB, m->m_pkthdr.rcvif);
 		}
 		/* Remove the tag from the packet. We don't need it anymore. */
 		m_tag_delete(m, fwd_tag);
 		m->m_flags &= ~M_IP6_NEXTHOP;
 	} else
 		inp = in6_pcblookup_mbuf(pcbinfo, &ip6->ip6_src,
 		    uh->uh_sport, &ip6->ip6_dst, uh->uh_dport,
 		    INPLOOKUP_WILDCARD | INPLOOKUP_RLOCKPCB,
 		    m->m_pkthdr.rcvif, m);
 	if (inp == NULL) {
 		if (udp_log_in_vain) {
 			char ip6bufs[INET6_ADDRSTRLEN];
 			char ip6bufd[INET6_ADDRSTRLEN];
 
 			log(LOG_INFO,
 			    "Connection attempt to UDP [%s]:%d from [%s]:%d\n",
 			    ip6_sprintf(ip6bufd, &ip6->ip6_dst),
 			    ntohs(uh->uh_dport),
 			    ip6_sprintf(ip6bufs, &ip6->ip6_src),
 			    ntohs(uh->uh_sport));
 		}
 		UDPSTAT_INC(udps_noport);
 		if (m->m_flags & M_MCAST) {
 			printf("UDP6: M_MCAST is set in a unicast packet.\n");
 			UDPSTAT_INC(udps_noportmcast);
 			goto badunlocked;
 		}
 		if (V_udp_blackhole)
 			goto badunlocked;
 		if (badport_bandlim(BANDLIM_ICMP6_UNREACH) < 0)
 			goto badunlocked;
 		icmp6_error(m, ICMP6_DST_UNREACH, ICMP6_DST_UNREACH_NOPORT, 0);
 		return (IPPROTO_DONE);
 	}
 	INP_RLOCK_ASSERT(inp);
 	up = intoudpcb(inp);
 	if (cscov_partial) {
 		if (up->u_rxcslen == 0 || up->u_rxcslen > ulen) {
 			INP_RUNLOCK(inp);
 			m_freem(m);
 			return (IPPROTO_DONE);
 		}
 	}
 	UDP_PROBE(receive, NULL, inp, ip6, inp, uh);
 	if (udp6_append(inp, m, off, &fromsa) == 0)
 		INP_RUNLOCK(inp);
 	return (IPPROTO_DONE);
 
 badheadlocked:
 	INP_INFO_RUNLOCK(pcbinfo);
 badunlocked:
 	if (m)
 		m_freem(m);
 	return (IPPROTO_DONE);
 }
 
 static void
 udp6_common_ctlinput(int cmd, struct sockaddr *sa, void *d,
     struct inpcbinfo *pcbinfo)
 {
 	struct udphdr uh;
 	struct ip6_hdr *ip6;
 	struct mbuf *m;
 	int off = 0;
 	struct ip6ctlparam *ip6cp = NULL;
 	const struct sockaddr_in6 *sa6_src = NULL;
 	void *cmdarg;
 	struct inpcb *(*notify)(struct inpcb *, int) = udp_notify;
 	struct udp_portonly {
 		u_int16_t uh_sport;
 		u_int16_t uh_dport;
 	} *uhp;
 
 	if (sa->sa_family != AF_INET6 ||
 	    sa->sa_len != sizeof(struct sockaddr_in6))
 		return;
 
 	if ((unsigned)cmd >= PRC_NCMDS)
 		return;
 	if (PRC_IS_REDIRECT(cmd))
 		notify = in6_rtchange, d = NULL;
 	else if (cmd == PRC_HOSTDEAD)
 		d = NULL;
 	else if (inet6ctlerrmap[cmd] == 0)
 		return;
 
 	/* if the parameter is from icmp6, decode it. */
 	if (d != NULL) {
 		ip6cp = (struct ip6ctlparam *)d;
 		m = ip6cp->ip6c_m;
 		ip6 = ip6cp->ip6c_ip6;
 		off = ip6cp->ip6c_off;
 		cmdarg = ip6cp->ip6c_cmdarg;
 		sa6_src = ip6cp->ip6c_src;
 	} else {
 		m = NULL;
 		ip6 = NULL;
 		cmdarg = NULL;
 		sa6_src = &sa6_any;
 	}
 
 	if (ip6) {
 		/*
 		 * XXX: We assume that when IPV6 is non NULL,
 		 * M and OFF are valid.
 		 */
 
 		/* Check if we can safely examine src and dst ports. */
 		if (m->m_pkthdr.len < off + sizeof(*uhp))
 			return;
 
 		bzero(&uh, sizeof(uh));
 		m_copydata(m, off, sizeof(*uhp), (caddr_t)&uh);
 
 		if (!PRC_IS_REDIRECT(cmd)) {
 			/* Check to see if its tunneled */
 			struct inpcb *inp;
 			inp = in6_pcblookup_mbuf(pcbinfo, &ip6->ip6_dst,
 			    uh.uh_dport, &ip6->ip6_src, uh.uh_sport,
 			    INPLOOKUP_WILDCARD | INPLOOKUP_RLOCKPCB,
 			    m->m_pkthdr.rcvif, m);
 			if (inp != NULL) {
 				struct udpcb *up;
 				
 				up = intoudpcb(inp);
 				if (up->u_icmp_func) {
 					/* Yes it is. */
 					INP_RUNLOCK(inp);
 					(*up->u_icmp_func)(cmd, (struct sockaddr *)ip6cp->ip6c_src,
 					      d, up->u_tun_ctx);
 					return;
 				} else {
 					/* Can't find it. */
 					INP_RUNLOCK(inp);
 				}
 			}
 		}
 		(void)in6_pcbnotify(pcbinfo, sa, uh.uh_dport,
 		    (struct sockaddr *)ip6cp->ip6c_src, uh.uh_sport, cmd,
 		    cmdarg, notify);
 	} else
 		(void)in6_pcbnotify(pcbinfo, sa, 0,
 		    (const struct sockaddr *)sa6_src, 0, cmd, cmdarg, notify);
 }
 
 void
 udp6_ctlinput(int cmd, struct sockaddr *sa, void *d)
 {
 
 	return (udp6_common_ctlinput(cmd, sa, d, &V_udbinfo));
 }
 
 void
 udplite6_ctlinput(int cmd, struct sockaddr *sa, void *d)
 {
 
 	return (udp6_common_ctlinput(cmd, sa, d, &V_ulitecbinfo));
 }
 
 static int
 udp6_getcred(SYSCTL_HANDLER_ARGS)
 {
 	struct xucred xuc;
 	struct sockaddr_in6 addrs[2];
 	struct inpcb *inp;
 	int error;
 
 	error = priv_check(req->td, PRIV_NETINET_GETCRED);
 	if (error)
 		return (error);
 
 	if (req->newlen != sizeof(addrs))
 		return (EINVAL);
 	if (req->oldlen != sizeof(struct xucred))
 		return (EINVAL);
 	error = SYSCTL_IN(req, addrs, sizeof(addrs));
 	if (error)
 		return (error);
 	if ((error = sa6_embedscope(&addrs[0], V_ip6_use_defzone)) != 0 ||
 	    (error = sa6_embedscope(&addrs[1], V_ip6_use_defzone)) != 0) {
 		return (error);
 	}
 	inp = in6_pcblookup(&V_udbinfo, &addrs[1].sin6_addr,
 	    addrs[1].sin6_port, &addrs[0].sin6_addr, addrs[0].sin6_port,
 	    INPLOOKUP_WILDCARD | INPLOOKUP_RLOCKPCB, NULL);
 	if (inp != NULL) {
 		INP_RLOCK_ASSERT(inp);
 		if (inp->inp_socket == NULL)
 			error = ENOENT;
 		if (error == 0)
 			error = cr_canseesocket(req->td->td_ucred,
 			    inp->inp_socket);
 		if (error == 0)
 			cru2x(inp->inp_cred, &xuc);
 		INP_RUNLOCK(inp);
 	} else
 		error = ENOENT;
 	if (error == 0)
 		error = SYSCTL_OUT(req, &xuc, sizeof(struct xucred));
 	return (error);
 }
 
 SYSCTL_PROC(_net_inet6_udp6, OID_AUTO, getcred, CTLTYPE_OPAQUE|CTLFLAG_RW, 0,
     0, udp6_getcred, "S,xucred", "Get the xucred of a UDP6 connection");
 
 static int
 udp6_output(struct inpcb *inp, struct mbuf *m, struct sockaddr *addr6,
     struct mbuf *control, struct thread *td)
 {
 	u_int32_t ulen = m->m_pkthdr.len;
 	u_int32_t plen = sizeof(struct udphdr) + ulen;
 	struct ip6_hdr *ip6;
 	struct udphdr *udp6;
 	struct in6_addr *laddr, *faddr, in6a;
 	struct sockaddr_in6 *sin6 = NULL;
 	int cscov_partial = 0;
 	int scope_ambiguous = 0;
 	u_short fport;
 	int error = 0;
 	uint8_t nxt;
 	uint16_t cscov = 0;
 	struct ip6_pktopts *optp, opt;
 	int af = AF_INET6, hlen = sizeof(struct ip6_hdr);
 	int flags;
 	struct sockaddr_in6 tmp;
 
 	INP_WLOCK_ASSERT(inp);
 	INP_HASH_WLOCK_ASSERT(inp->inp_pcbinfo);
 
 	if (addr6) {
 		/* addr6 has been validated in udp6_send(). */
 		sin6 = (struct sockaddr_in6 *)addr6;
 
 		/* protect *sin6 from overwrites */
 		tmp = *sin6;
 		sin6 = &tmp;
 
 		/*
 		 * Application should provide a proper zone ID or the use of
 		 * default zone IDs should be enabled.  Unfortunately, some
 		 * applications do not behave as it should, so we need a
 		 * workaround.  Even if an appropriate ID is not determined,
 		 * we'll see if we can determine the outgoing interface.  If we
 		 * can, determine the zone ID based on the interface below.
 		 */
 		if (sin6->sin6_scope_id == 0 && !V_ip6_use_defzone)
 			scope_ambiguous = 1;
 		if ((error = sa6_embedscope(sin6, V_ip6_use_defzone)) != 0)
 			return (error);
 	}
 
 	nxt = (inp->inp_socket->so_proto->pr_protocol == IPPROTO_UDP) ?
 	    IPPROTO_UDP : IPPROTO_UDPLITE;
 	if (control) {
 		if ((error = ip6_setpktopts(control, &opt,
 		    inp->in6p_outputopts, td->td_ucred, nxt)) != 0)
 			goto release;
 		optp = &opt;
 	} else
 		optp = inp->in6p_outputopts;
 
 	if (sin6) {
 		faddr = &sin6->sin6_addr;
 
 		/*
 		 * Since we saw no essential reason for calling in_pcbconnect,
 		 * we get rid of such kind of logic, and call in6_selectsrc
 		 * and in6_pcbsetport in order to fill in the local address
 		 * and the local port.
 		 */
 		if (sin6->sin6_port == 0) {
 			error = EADDRNOTAVAIL;
 			goto release;
 		}
 
 		if (!IN6_IS_ADDR_UNSPECIFIED(&inp->in6p_faddr)) {
 			/* how about ::ffff:0.0.0.0 case? */
 			error = EISCONN;
 			goto release;
 		}
 
 		fport = sin6->sin6_port; /* allow 0 port */
 
 		if (IN6_IS_ADDR_V4MAPPED(faddr)) {
 			if ((inp->inp_flags & IN6P_IPV6_V6ONLY)) {
 				/*
 				 * I believe we should explicitly discard the
 				 * packet when mapped addresses are disabled,
 				 * rather than send the packet as an IPv6 one.
 				 * If we chose the latter approach, the packet
 				 * might be sent out on the wire based on the
 				 * default route, the situation which we'd
 				 * probably want to avoid.
 				 * (20010421 jinmei@kame.net)
 				 */
 				error = EINVAL;
 				goto release;
 			}
 			if (!IN6_IS_ADDR_UNSPECIFIED(&inp->in6p_laddr) &&
 			    !IN6_IS_ADDR_V4MAPPED(&inp->in6p_laddr)) {
 				/*
 				 * when remote addr is an IPv4-mapped address,
 				 * local addr should not be an IPv6 address,
 				 * since you cannot determine how to map IPv6
 				 * source address to IPv4.
 				 */
 				error = EINVAL;
 				goto release;
 			}
 
 			af = AF_INET;
 		}
 
 		if (!IN6_IS_ADDR_V4MAPPED(faddr)) {
 			error = in6_selectsrc_socket(sin6, optp, inp,
 			    td->td_ucred, scope_ambiguous, &in6a, NULL);
 			if (error)
 				goto release;
 			laddr = &in6a;
 		} else
 			laddr = &inp->in6p_laddr;	/* XXX */
 		if (laddr == NULL) {
 			if (error == 0)
 				error = EADDRNOTAVAIL;
 			goto release;
 		}
 		if (inp->inp_lport == 0 &&
 		    (error = in6_pcbsetport(laddr, inp, td->td_ucred)) != 0) {
 			/* Undo an address bind that may have occurred. */
 			inp->in6p_laddr = in6addr_any;
 			goto release;
 		}
 	} else {
 		if (IN6_IS_ADDR_UNSPECIFIED(&inp->in6p_faddr)) {
 			error = ENOTCONN;
 			goto release;
 		}
 		if (IN6_IS_ADDR_V4MAPPED(&inp->in6p_faddr)) {
 			if ((inp->inp_flags & IN6P_IPV6_V6ONLY)) {
 				/*
 				 * XXX: this case would happen when the
 				 * application sets the V6ONLY flag after
 				 * connecting the foreign address.
 				 * Such applications should be fixed,
 				 * so we bark here.
 				 */
 				log(LOG_INFO, "udp6_output: IPV6_V6ONLY "
 				    "option was set for a connected socket\n");
 				error = EINVAL;
 				goto release;
 			} else
 				af = AF_INET;
 		}
 		laddr = &inp->in6p_laddr;
 		faddr = &inp->in6p_faddr;
 		fport = inp->inp_fport;
 	}
 
 	if (af == AF_INET)
 		hlen = sizeof(struct ip);
 
 	/*
 	 * Calculate data length and get a mbuf
 	 * for UDP and IP6 headers.
 	 */
 	M_PREPEND(m, hlen + sizeof(struct udphdr), M_NOWAIT);
 	if (m == NULL) {
 		error = ENOBUFS;
 		goto release;
 	}
 
 	/*
 	 * Stuff checksum and output datagram.
 	 */
 	udp6 = (struct udphdr *)(mtod(m, caddr_t) + hlen);
 	udp6->uh_sport = inp->inp_lport; /* lport is always set in the PCB */
 	udp6->uh_dport = fport;
 	if (nxt == IPPROTO_UDPLITE) {
 		struct udpcb *up;
 
 		up = intoudpcb(inp);
 		cscov = up->u_txcslen;
 		if (cscov >= plen)
 			cscov = 0;
 		udp6->uh_ulen = htons(cscov);
 		/*
 		 * For UDP-Lite, checksum coverage length of zero means
 		 * the entire UDPLite packet is covered by the checksum.
 		 */
 		cscov_partial = (cscov == 0) ? 0 : 1;
 	} else if (plen <= 0xffff)
 		udp6->uh_ulen = htons((u_short)plen);
 	else
 		udp6->uh_ulen = 0;
 	udp6->uh_sum = 0;
 
 	switch (af) {
 	case AF_INET6:
 		ip6 = mtod(m, struct ip6_hdr *);
 		ip6->ip6_flow	= inp->inp_flow & IPV6_FLOWINFO_MASK;
 		ip6->ip6_vfc	&= ~IPV6_VERSION_MASK;
 		ip6->ip6_vfc	|= IPV6_VERSION;
 		ip6->ip6_plen	= htons((u_short)plen);
 		ip6->ip6_nxt	= nxt;
 		ip6->ip6_hlim	= in6_selecthlim(inp, NULL);
 		ip6->ip6_src	= *laddr;
 		ip6->ip6_dst	= *faddr;
 
 		if (cscov_partial) {
 			if ((udp6->uh_sum = in6_cksum_partial(m, nxt,
 			    sizeof(struct ip6_hdr), plen, cscov)) == 0)
 				udp6->uh_sum = 0xffff;
 		} else {
 			udp6->uh_sum = in6_cksum_pseudo(ip6, plen, nxt, 0);
 			m->m_pkthdr.csum_flags = CSUM_UDP_IPV6;
 			m->m_pkthdr.csum_data = offsetof(struct udphdr, uh_sum);
 		}
 
 #ifdef	RSS
 		{
 			uint32_t hash_val, hash_type;
 			uint8_t pr;
 
 			pr = inp->inp_socket->so_proto->pr_protocol;
 			/*
 			 * Calculate an appropriate RSS hash for UDP and
 			 * UDP Lite.
 			 *
 			 * The called function will take care of figuring out
 			 * whether a 2-tuple or 4-tuple hash is required based
 			 * on the currently configured scheme.
 			 *
 			 * Later later on connected socket values should be
 			 * cached in the inpcb and reused, rather than constantly
 			 * re-calculating it.
 			 *
 			 * UDP Lite is a different protocol number and will
 			 * likely end up being hashed as a 2-tuple until
 			 * RSS / NICs grow UDP Lite protocol awareness.
 			 */
 			if (rss_proto_software_hash_v6(faddr, laddr, fport,
 			    inp->inp_lport, pr, &hash_val, &hash_type) == 0) {
 				m->m_pkthdr.flowid = hash_val;
 				M_HASHTYPE_SET(m, hash_type);
 			}
 		}
 #endif
 		flags = 0;
 #ifdef	RSS
 		/*
 		 * Don't override with the inp cached flowid.
 		 *
 		 * Until the whole UDP path is vetted, it may actually
 		 * be incorrect.
 		 */
 		flags |= IP_NODEFAULTFLOWID;
 #endif
 
 		UDP_PROBE(send, NULL, inp, ip6, inp, udp6);
 		UDPSTAT_INC(udps_opackets);
 		error = ip6_output(m, optp, &inp->inp_route6, flags,
 		    inp->in6p_moptions, NULL, inp);
 		break;
 	case AF_INET:
 		error = EAFNOSUPPORT;
 		goto release;
 	}
 	goto releaseopt;
 
 release:
 	m_freem(m);
 
 releaseopt:
 	if (control) {
 		ip6_clearpktopts(&opt, -1);
 		m_freem(control);
 	}
 	return (error);
 }
 
 static void
 udp6_abort(struct socket *so)
 {
 	struct inpcb *inp;
 	struct inpcbinfo *pcbinfo;
 
 	pcbinfo = udp_get_inpcbinfo(so->so_proto->pr_protocol);
 	inp = sotoinpcb(so);
 	KASSERT(inp != NULL, ("udp6_abort: inp == NULL"));
 
 	INP_WLOCK(inp);
 #ifdef INET
 	if (inp->inp_vflag & INP_IPV4) {
 		struct pr_usrreqs *pru;
 		uint8_t nxt;
 
 		nxt = (inp->inp_socket->so_proto->pr_protocol == IPPROTO_UDP) ?
 		    IPPROTO_UDP : IPPROTO_UDPLITE;
 		INP_WUNLOCK(inp);
 		pru = inetsw[ip_protox[nxt]].pr_usrreqs;
 		(*pru->pru_abort)(so);
 		return;
 	}
 #endif
 
 	if (!IN6_IS_ADDR_UNSPECIFIED(&inp->in6p_faddr)) {
 		INP_HASH_WLOCK(pcbinfo);
 		in6_pcbdisconnect(inp);
 		inp->in6p_laddr = in6addr_any;
 		INP_HASH_WUNLOCK(pcbinfo);
 		soisdisconnected(so);
 	}
 	INP_WUNLOCK(inp);
 }
 
 static int
 udp6_attach(struct socket *so, int proto, struct thread *td)
 {
 	struct inpcb *inp;
 	struct inpcbinfo *pcbinfo;
 	int error;
 
 	pcbinfo = udp_get_inpcbinfo(so->so_proto->pr_protocol);
 	inp = sotoinpcb(so);
 	KASSERT(inp == NULL, ("udp6_attach: inp != NULL"));
 
 	if (so->so_snd.sb_hiwat == 0 || so->so_rcv.sb_hiwat == 0) {
 		error = soreserve(so, udp_sendspace, udp_recvspace);
 		if (error)
 			return (error);
 	}
 	INP_INFO_WLOCK(pcbinfo);
 	error = in_pcballoc(so, pcbinfo);
 	if (error) {
 		INP_INFO_WUNLOCK(pcbinfo);
 		return (error);
 	}
 	inp = (struct inpcb *)so->so_pcb;
 	inp->inp_vflag |= INP_IPV6;
 	if ((inp->inp_flags & IN6P_IPV6_V6ONLY) == 0)
 		inp->inp_vflag |= INP_IPV4;
 	inp->in6p_hops = -1;	/* use kernel default */
 	inp->in6p_cksum = -1;	/* just to be sure */
 	/*
 	 * XXX: ugly!!
 	 * IPv4 TTL initialization is necessary for an IPv6 socket as well,
 	 * because the socket may be bound to an IPv6 wildcard address,
 	 * which may match an IPv4-mapped IPv6 address.
 	 */
 	inp->inp_ip_ttl = V_ip_defttl;
 
 	error = udp_newudpcb(inp);
 	if (error) {
 		in_pcbdetach(inp);
 		in_pcbfree(inp);
 		INP_INFO_WUNLOCK(pcbinfo);
 		return (error);
 	}
 	INP_WUNLOCK(inp);
 	INP_INFO_WUNLOCK(pcbinfo);
 	return (0);
 }
 
 static int
 udp6_bind(struct socket *so, struct sockaddr *nam, struct thread *td)
 {
 	struct inpcb *inp;
 	struct inpcbinfo *pcbinfo;
 	int error;
 
 	pcbinfo = udp_get_inpcbinfo(so->so_proto->pr_protocol);
 	inp = sotoinpcb(so);
 	KASSERT(inp != NULL, ("udp6_bind: inp == NULL"));
 
 	INP_WLOCK(inp);
 	INP_HASH_WLOCK(pcbinfo);
 	inp->inp_vflag &= ~INP_IPV4;
 	inp->inp_vflag |= INP_IPV6;
 	if ((inp->inp_flags & IN6P_IPV6_V6ONLY) == 0) {
 		struct sockaddr_in6 *sin6_p;
 
 		sin6_p = (struct sockaddr_in6 *)nam;
 
 		if (IN6_IS_ADDR_UNSPECIFIED(&sin6_p->sin6_addr))
 			inp->inp_vflag |= INP_IPV4;
 #ifdef INET
 		else if (IN6_IS_ADDR_V4MAPPED(&sin6_p->sin6_addr)) {
 			struct sockaddr_in sin;
 
 			in6_sin6_2_sin(&sin, sin6_p);
 			inp->inp_vflag |= INP_IPV4;
 			inp->inp_vflag &= ~INP_IPV6;
 			error = in_pcbbind(inp, (struct sockaddr *)&sin,
 			    td->td_ucred);
 			goto out;
 		}
 #endif
 	}
 
 	error = in6_pcbbind(inp, nam, td->td_ucred);
 #ifdef INET
 out:
 #endif
 	INP_HASH_WUNLOCK(pcbinfo);
 	INP_WUNLOCK(inp);
 	return (error);
 }
 
 static void
 udp6_close(struct socket *so)
 {
 	struct inpcb *inp;
 	struct inpcbinfo *pcbinfo;
 
 	pcbinfo = udp_get_inpcbinfo(so->so_proto->pr_protocol);
 	inp = sotoinpcb(so);
 	KASSERT(inp != NULL, ("udp6_close: inp == NULL"));
 
 	INP_WLOCK(inp);
 #ifdef INET
 	if (inp->inp_vflag & INP_IPV4) {
 		struct pr_usrreqs *pru;
 		uint8_t nxt;
 
 		nxt = (inp->inp_socket->so_proto->pr_protocol == IPPROTO_UDP) ?
 		    IPPROTO_UDP : IPPROTO_UDPLITE;
 		INP_WUNLOCK(inp);
 		pru = inetsw[ip_protox[nxt]].pr_usrreqs;
 		(*pru->pru_disconnect)(so);
 		return;
 	}
 #endif
 	if (!IN6_IS_ADDR_UNSPECIFIED(&inp->in6p_faddr)) {
 		INP_HASH_WLOCK(pcbinfo);
 		in6_pcbdisconnect(inp);
 		inp->in6p_laddr = in6addr_any;
 		INP_HASH_WUNLOCK(pcbinfo);
 		soisdisconnected(so);
 	}
 	INP_WUNLOCK(inp);
 }
 
 static int
 udp6_connect(struct socket *so, struct sockaddr *nam, struct thread *td)
 {
 	struct inpcb *inp;
 	struct inpcbinfo *pcbinfo;
 	struct sockaddr_in6 *sin6;
 	int error;
 
 	pcbinfo = udp_get_inpcbinfo(so->so_proto->pr_protocol);
 	inp = sotoinpcb(so);
 	sin6 = (struct sockaddr_in6 *)nam;
 	KASSERT(inp != NULL, ("udp6_connect: inp == NULL"));
 
 	/*
 	 * XXXRW: Need to clarify locking of v4/v6 flags.
 	 */
 	INP_WLOCK(inp);
 #ifdef INET
 	if (IN6_IS_ADDR_V4MAPPED(&sin6->sin6_addr)) {
 		struct sockaddr_in sin;
 
 		if ((inp->inp_flags & IN6P_IPV6_V6ONLY) != 0) {
 			error = EINVAL;
 			goto out;
 		}
 		if (inp->inp_faddr.s_addr != INADDR_ANY) {
 			error = EISCONN;
 			goto out;
 		}
 		in6_sin6_2_sin(&sin, sin6);
 		inp->inp_vflag |= INP_IPV4;
 		inp->inp_vflag &= ~INP_IPV6;
 		error = prison_remote_ip4(td->td_ucred, &sin.sin_addr);
 		if (error != 0)
 			goto out;
 		INP_HASH_WLOCK(pcbinfo);
 		error = in_pcbconnect(inp, (struct sockaddr *)&sin,
 		    td->td_ucred);
 		INP_HASH_WUNLOCK(pcbinfo);
 		if (error == 0)
 			soisconnected(so);
 		goto out;
 	}
 #endif
 	if (!IN6_IS_ADDR_UNSPECIFIED(&inp->in6p_faddr)) {
 		error = EISCONN;
 		goto out;
 	}
 	inp->inp_vflag &= ~INP_IPV4;
 	inp->inp_vflag |= INP_IPV6;
 	error = prison_remote_ip6(td->td_ucred, &sin6->sin6_addr);
 	if (error != 0)
 		goto out;
 	INP_HASH_WLOCK(pcbinfo);
 	error = in6_pcbconnect(inp, nam, td->td_ucred);
 	INP_HASH_WUNLOCK(pcbinfo);
 	if (error == 0)
 		soisconnected(so);
 out:
 	INP_WUNLOCK(inp);
 	return (error);
 }
 
 static void
 udp6_detach(struct socket *so)
 {
 	struct inpcb *inp;
 	struct inpcbinfo *pcbinfo;
 	struct udpcb *up;
 
 	pcbinfo = udp_get_inpcbinfo(so->so_proto->pr_protocol);
 	inp = sotoinpcb(so);
 	KASSERT(inp != NULL, ("udp6_detach: inp == NULL"));
 
 	INP_INFO_WLOCK(pcbinfo);
 	INP_WLOCK(inp);
 	up = intoudpcb(inp);
 	KASSERT(up != NULL, ("%s: up == NULL", __func__));
 	in_pcbdetach(inp);
 	in_pcbfree(inp);
 	INP_INFO_WUNLOCK(pcbinfo);
 	udp_discardcb(up);
 }
 
 static int
 udp6_disconnect(struct socket *so)
 {
 	struct inpcb *inp;
 	struct inpcbinfo *pcbinfo;
 	int error;
 
 	pcbinfo = udp_get_inpcbinfo(so->so_proto->pr_protocol);
 	inp = sotoinpcb(so);
 	KASSERT(inp != NULL, ("udp6_disconnect: inp == NULL"));
 
 	INP_WLOCK(inp);
 #ifdef INET
 	if (inp->inp_vflag & INP_IPV4) {
 		struct pr_usrreqs *pru;
 		uint8_t nxt;
 
 		nxt = (inp->inp_socket->so_proto->pr_protocol == IPPROTO_UDP) ?
 		    IPPROTO_UDP : IPPROTO_UDPLITE;
 		INP_WUNLOCK(inp);
 		pru = inetsw[ip_protox[nxt]].pr_usrreqs;
 		(void)(*pru->pru_disconnect)(so);
 		return (0);
 	}
 #endif
 
 	if (IN6_IS_ADDR_UNSPECIFIED(&inp->in6p_faddr)) {
 		error = ENOTCONN;
 		goto out;
 	}
 
 	INP_HASH_WLOCK(pcbinfo);
 	in6_pcbdisconnect(inp);
 	inp->in6p_laddr = in6addr_any;
 	INP_HASH_WUNLOCK(pcbinfo);
 	SOCK_LOCK(so);
 	so->so_state &= ~SS_ISCONNECTED;		/* XXX */
 	SOCK_UNLOCK(so);
 out:
 	INP_WUNLOCK(inp);
 	return (0);
 }
 
 static int
 udp6_send(struct socket *so, int flags, struct mbuf *m,
     struct sockaddr *addr, struct mbuf *control, struct thread *td)
 {
 	struct inpcb *inp;
 	struct inpcbinfo *pcbinfo;
 	int error = 0;
 
 	pcbinfo = udp_get_inpcbinfo(so->so_proto->pr_protocol);
 	inp = sotoinpcb(so);
 	KASSERT(inp != NULL, ("udp6_send: inp == NULL"));
 
 	INP_WLOCK(inp);
 	if (addr) {
 		if (addr->sa_len != sizeof(struct sockaddr_in6)) {
 			error = EINVAL;
 			goto bad;
 		}
 		if (addr->sa_family != AF_INET6) {
 			error = EAFNOSUPPORT;
 			goto bad;
 		}
 	}
 
 #ifdef INET
 	if ((inp->inp_flags & IN6P_IPV6_V6ONLY) == 0) {
 		int hasv4addr;
 		struct sockaddr_in6 *sin6 = NULL;
 
 		if (addr == NULL)
 			hasv4addr = (inp->inp_vflag & INP_IPV4);
 		else {
 			sin6 = (struct sockaddr_in6 *)addr;
 			hasv4addr = IN6_IS_ADDR_V4MAPPED(&sin6->sin6_addr)
 			    ? 1 : 0;
 		}
 		if (hasv4addr) {
 			struct pr_usrreqs *pru;
 			uint8_t nxt;
 
 			nxt = (inp->inp_socket->so_proto->pr_protocol ==
 			    IPPROTO_UDP) ? IPPROTO_UDP : IPPROTO_UDPLITE;
 			/*
 			 * XXXRW: We release UDP-layer locks before calling
 			 * udp_send() in order to avoid recursion.  However,
 			 * this does mean there is a short window where inp's
 			 * fields are unstable.  Could this lead to a
 			 * potential race in which the factors causing us to
 			 * select the UDPv4 output routine are invalidated?
 			 */
 			INP_WUNLOCK(inp);
 			if (sin6)
 				in6_sin6_2_sin_in_sock(addr);
 			pru = inetsw[ip_protox[nxt]].pr_usrreqs;
 			/* addr will just be freed in sendit(). */
 			return ((*pru->pru_send)(so, flags, m, addr, control,
 			    td));
 		}
 	}
 #endif
 #ifdef MAC
 	mac_inpcb_create_mbuf(inp, m);
 #endif
 	INP_HASH_WLOCK(pcbinfo);
 	error = udp6_output(inp, m, addr, control, td);
 	INP_HASH_WUNLOCK(pcbinfo);
 	INP_WUNLOCK(inp);
 	return (error);
 
 bad:
 	INP_WUNLOCK(inp);
 	m_freem(m);
 	return (error);
 }
 
 struct pr_usrreqs udp6_usrreqs = {
 	.pru_abort =		udp6_abort,
 	.pru_attach =		udp6_attach,
 	.pru_bind =		udp6_bind,
 	.pru_connect =		udp6_connect,
 	.pru_control =		in6_control,
 	.pru_detach =		udp6_detach,
 	.pru_disconnect =	udp6_disconnect,
 	.pru_peeraddr =		in6_mapped_peeraddr,
 	.pru_send =		udp6_send,
 	.pru_shutdown =		udp_shutdown,
 	.pru_sockaddr =		in6_mapped_sockaddr,
 	.pru_soreceive =	soreceive_dgram,
 	.pru_sosend =		sosend_dgram,
 	.pru_sosetlabel =	in_pcbsosetlabel,
 	.pru_close =		udp6_close
 };
Index: head/sys/netipsec/keysock.c
===================================================================
--- head/sys/netipsec/keysock.c	(revision 305823)
+++ head/sys/netipsec/keysock.c	(revision 305824)
@@ -1,570 +1,570 @@
 /*	$FreeBSD$	*/
 /*	$KAME: keysock.c,v 1.25 2001/08/13 20:07:41 itojun Exp $	*/
 
 /*-
  * Copyright (C) 1995, 1996, 1997, and 1998 WIDE Project.
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 3. Neither the name of the project nor the names of its contributors
  *    may be used to endorse or promote products derived from this software
  *    without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE PROJECT AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE PROJECT OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  */
 
 #include "opt_ipsec.h"
 
 /* This code has derived from sys/net/rtsock.c on FreeBSD2.2.5 */
 
 #include <sys/types.h>
 #include <sys/param.h>
 #include <sys/domain.h>
 #include <sys/errno.h>
 #include <sys/kernel.h>
 #include <sys/lock.h>
 #include <sys/malloc.h>
 #include <sys/mbuf.h>
 #include <sys/mutex.h>
 #include <sys/priv.h>
 #include <sys/protosw.h>
 #include <sys/signalvar.h>
 #include <sys/socket.h>
 #include <sys/socketvar.h>
 #include <sys/sysctl.h>
 #include <sys/systm.h>
 
 #include <net/if.h>
 #include <net/vnet.h>
 #include <net/raw_cb.h>
 
 #include <netinet/in.h>
 
 #include <net/pfkeyv2.h>
 #include <netipsec/key.h>
 #include <netipsec/keysock.h>
 #include <netipsec/key_debug.h>
 #include <netipsec/ipsec.h>
 
 #include <machine/stdarg.h>
 
 struct key_cb {
 	int key_count;
 	int any_count;
 };
 static VNET_DEFINE(struct key_cb, key_cb);
 #define	V_key_cb		VNET(key_cb)
 
 static struct sockaddr key_src = { 2, PF_KEY, };
 
 static int key_sendup0(struct rawcb *, struct mbuf *, int);
 
 VNET_PCPUSTAT_DEFINE(struct pfkeystat, pfkeystat);
 VNET_PCPUSTAT_SYSINIT(pfkeystat);
 
 #ifdef VIMAGE
 VNET_PCPUSTAT_SYSUNINIT(pfkeystat);
 #endif /* VIMAGE */
 
 /*
  * key_output()
  */
 int
 key_output(struct mbuf *m, struct socket *so, ...)
 {
 	struct sadb_msg *msg;
 	int len, error = 0;
 
 	if (m == NULL)
 		panic("%s: NULL pointer was passed.\n", __func__);
 
 	PFKEYSTAT_INC(out_total);
 	PFKEYSTAT_ADD(out_bytes, m->m_pkthdr.len);
 
 	len = m->m_pkthdr.len;
 	if (len < sizeof(struct sadb_msg)) {
 		PFKEYSTAT_INC(out_tooshort);
 		error = EINVAL;
 		goto end;
 	}
 
 	if (m->m_len < sizeof(struct sadb_msg)) {
 		if ((m = m_pullup(m, sizeof(struct sadb_msg))) == NULL) {
 			PFKEYSTAT_INC(out_nomem);
 			error = ENOBUFS;
 			goto end;
 		}
 	}
 
 	M_ASSERTPKTHDR(m);
 
 	KEYDEBUG(KEYDEBUG_KEY_DUMP, kdebug_mbuf(m));
 
 	msg = mtod(m, struct sadb_msg *);
 	PFKEYSTAT_INC(out_msgtype[msg->sadb_msg_type]);
 	if (len != PFKEY_UNUNIT64(msg->sadb_msg_len)) {
 		PFKEYSTAT_INC(out_invlen);
 		error = EINVAL;
 		goto end;
 	}
 
 	error = key_parse(m, so);
 	m = NULL;
 end:
 	if (m)
 		m_freem(m);
 	return error;
 }
 
 /*
  * send message to the socket.
  */
 static int
 key_sendup0(struct rawcb *rp, struct mbuf *m, int promisc)
 {
 	int error;
 
 	if (promisc) {
 		struct sadb_msg *pmsg;
 
 		M_PREPEND(m, sizeof(struct sadb_msg), M_NOWAIT);
 		if (m == NULL) {
 			PFKEYSTAT_INC(in_nomem);
 			return (ENOBUFS);
 		}
 		pmsg = mtod(m, struct sadb_msg *);
 		bzero(pmsg, sizeof(*pmsg));
 		pmsg->sadb_msg_version = PF_KEY_V2;
 		pmsg->sadb_msg_type = SADB_X_PROMISC;
 		pmsg->sadb_msg_len = PFKEY_UNIT64(m->m_pkthdr.len);
 		/* pid and seq? */
 
 		PFKEYSTAT_INC(in_msgtype[pmsg->sadb_msg_type]);
 	}
 
 	if (!sbappendaddr(&rp->rcb_socket->so_rcv, (struct sockaddr *)&key_src,
 	    m, NULL)) {
 		PFKEYSTAT_INC(in_nomem);
 		m_freem(m);
 		error = ENOBUFS;
 	} else
 		error = 0;
 	sorwakeup(rp->rcb_socket);
 	return error;
 }
 
 /* XXX this interface should be obsoleted. */
 int
 key_sendup(struct socket *so, struct sadb_msg *msg, u_int len, int target)
 {
 	struct mbuf *m, *n, *mprev;
 	int tlen;
 
 	/* sanity check */
 	if (so == NULL || msg == NULL)
 		panic("%s: NULL pointer was passed.\n", __func__);
 
 	KEYDEBUG(KEYDEBUG_KEY_DUMP,
 		printf("%s: \n", __func__);
 		kdebug_sadb(msg));
 
 	/*
 	 * we increment statistics here, just in case we have ENOBUFS
 	 * in this function.
 	 */
 	PFKEYSTAT_INC(in_total);
 	PFKEYSTAT_ADD(in_bytes, len);
 	PFKEYSTAT_INC(in_msgtype[msg->sadb_msg_type]);
 
 	/*
 	 * Get mbuf chain whenever possible (not clusters),
 	 * to save socket buffer.  We'll be generating many SADB_ACQUIRE
 	 * messages to listening key sockets.  If we simply allocate clusters,
 	 * sbappendaddr() will raise ENOBUFS due to too little sbspace().
 	 * sbspace() computes # of actual data bytes AND mbuf region.
 	 *
 	 * TODO: SADB_ACQUIRE filters should be implemented.
 	 */
 	tlen = len;
 	m = mprev = NULL;
 	while (tlen > 0) {
 		if (tlen == len) {
 			MGETHDR(n, M_NOWAIT, MT_DATA);
 			if (n == NULL) {
 				PFKEYSTAT_INC(in_nomem);
 				return ENOBUFS;
 			}
 			n->m_len = MHLEN;
 		} else {
 			MGET(n, M_NOWAIT, MT_DATA);
 			if (n == NULL) {
 				PFKEYSTAT_INC(in_nomem);
 				return ENOBUFS;
 			}
 			n->m_len = MLEN;
 		}
 		if (tlen >= MCLBYTES) {	/*XXX better threshold? */
 			if (!(MCLGET(n, M_NOWAIT))) {
 				m_free(n);
 				m_freem(m);
 				PFKEYSTAT_INC(in_nomem);
 				return ENOBUFS;
 			}
 			n->m_len = MCLBYTES;
 		}
 
 		if (tlen < n->m_len)
 			n->m_len = tlen;
 		n->m_next = NULL;
 		if (m == NULL)
 			m = mprev = n;
 		else {
 			mprev->m_next = n;
 			mprev = n;
 		}
 		tlen -= n->m_len;
 		n = NULL;
 	}
 	m->m_pkthdr.len = len;
 	m->m_pkthdr.rcvif = NULL;
 	m_copyback(m, 0, len, (caddr_t)msg);
 
 	/* avoid duplicated statistics */
 	PFKEYSTAT_ADD(in_total, -1);
 	PFKEYSTAT_ADD(in_bytes, -len);
 	PFKEYSTAT_ADD(in_msgtype[msg->sadb_msg_type], -1);
 
 	return key_sendup_mbuf(so, m, target);
 }
 
 /* so can be NULL if target != KEY_SENDUP_ONE */
 int
 key_sendup_mbuf(struct socket *so, struct mbuf *m, int target)
 {
 	struct mbuf *n;
 	struct keycb *kp;
 	int sendup;
 	struct rawcb *rp;
 	int error = 0;
 
 	if (m == NULL)
 		panic("key_sendup_mbuf: NULL pointer was passed.\n");
 	if (so == NULL && target == KEY_SENDUP_ONE)
 		panic("%s: NULL pointer was passed.\n", __func__);
 
 	PFKEYSTAT_INC(in_total);
 	PFKEYSTAT_ADD(in_bytes, m->m_pkthdr.len);
 	if (m->m_len < sizeof(struct sadb_msg)) {
 		m = m_pullup(m, sizeof(struct sadb_msg));
 		if (m == NULL) {
 			PFKEYSTAT_INC(in_nomem);
 			return ENOBUFS;
 		}
 	}
 	if (m->m_len >= sizeof(struct sadb_msg)) {
 		struct sadb_msg *msg;
 		msg = mtod(m, struct sadb_msg *);
 		PFKEYSTAT_INC(in_msgtype[msg->sadb_msg_type]);
 	}
 	mtx_lock(&rawcb_mtx);
 	LIST_FOREACH(rp, &V_rawcb_list, list)
 	{
 		if (rp->rcb_proto.sp_family != PF_KEY)
 			continue;
 		if (rp->rcb_proto.sp_protocol
 		 && rp->rcb_proto.sp_protocol != PF_KEY_V2) {
 			continue;
 		}
 
 		kp = (struct keycb *)rp;
 
 		/*
 		 * If you are in promiscuous mode, and when you get broadcasted
 		 * reply, you'll get two PF_KEY messages.
 		 * (based on pf_key@inner.net message on 14 Oct 1998)
 		 */
 		if (((struct keycb *)rp)->kp_promisc) {
-			if ((n = m_copy(m, 0, (int)M_COPYALL)) != NULL) {
+			if ((n = m_copym(m, 0, M_COPYALL, M_NOWAIT)) != NULL) {
 				(void)key_sendup0(rp, n, 1);
 				n = NULL;
 			}
 		}
 
 		/* the exact target will be processed later */
 		if (so && sotorawcb(so) == rp)
 			continue;
 
 		sendup = 0;
 		switch (target) {
 		case KEY_SENDUP_ONE:
 			/* the statement has no effect */
 			if (so && sotorawcb(so) == rp)
 				sendup++;
 			break;
 		case KEY_SENDUP_ALL:
 			sendup++;
 			break;
 		case KEY_SENDUP_REGISTERED:
 			if (kp->kp_registered)
 				sendup++;
 			break;
 		}
 		PFKEYSTAT_INC(in_msgtarget[target]);
 
 		if (!sendup)
 			continue;
 
-		if ((n = m_copy(m, 0, (int)M_COPYALL)) == NULL) {
+		if ((n = m_copym(m, 0, M_COPYALL, M_NOWAIT)) == NULL) {
 			m_freem(m);
 			PFKEYSTAT_INC(in_nomem);
 			mtx_unlock(&rawcb_mtx);
 			return ENOBUFS;
 		}
 
 		if ((error = key_sendup0(rp, n, 0)) != 0) {
 			m_freem(m);
 			mtx_unlock(&rawcb_mtx);
 			return error;
 		}
 
 		n = NULL;
 	}
 
 	if (so) {
 		error = key_sendup0(sotorawcb(so), m, 0);
 		m = NULL;
 	} else {
 		error = 0;
 		m_freem(m);
 	}
 	mtx_unlock(&rawcb_mtx);
 	return error;
 }
 
 /*
  * key_abort()
  * derived from net/rtsock.c:rts_abort()
  */
 static void
 key_abort(struct socket *so)
 {
 	raw_usrreqs.pru_abort(so);
 }
 
 /*
  * key_attach()
  * derived from net/rtsock.c:rts_attach()
  */
 static int
 key_attach(struct socket *so, int proto, struct thread *td)
 {
 	struct keycb *kp;
 	int error;
 
 	KASSERT(so->so_pcb == NULL, ("key_attach: so_pcb != NULL"));
 
 	if (td != NULL) {
 		error = priv_check(td, PRIV_NET_RAW);
 		if (error)
 			return error;
 	}
 
 	/* XXX */
 	kp = malloc(sizeof *kp, M_PCB, M_WAITOK | M_ZERO); 
 	if (kp == NULL)
 		return ENOBUFS;
 
 	so->so_pcb = (caddr_t)kp;
 	error = raw_attach(so, proto);
 	kp = (struct keycb *)sotorawcb(so);
 	if (error) {
 		free(kp, M_PCB);
 		so->so_pcb = (caddr_t) 0;
 		return error;
 	}
 
 	kp->kp_promisc = kp->kp_registered = 0;
 
 	if (kp->kp_raw.rcb_proto.sp_protocol == PF_KEY) /* XXX: AF_KEY */
 		V_key_cb.key_count++;
 	V_key_cb.any_count++;
 	soisconnected(so);
 	so->so_options |= SO_USELOOPBACK;
 
 	return 0;
 }
 
 /*
  * key_bind()
  * derived from net/rtsock.c:rts_bind()
  */
 static int
 key_bind(struct socket *so, struct sockaddr *nam, struct thread *td)
 {
   return EINVAL;
 }
 
 /*
  * key_close()
  * derived from net/rtsock.c:rts_close().
  */
 static void
 key_close(struct socket *so)
 {
 
 	raw_usrreqs.pru_close(so);
 }
 
 /*
  * key_connect()
  * derived from net/rtsock.c:rts_connect()
  */
 static int
 key_connect(struct socket *so, struct sockaddr *nam, struct thread *td)
 {
 	return EINVAL;
 }
 
 /*
  * key_detach()
  * derived from net/rtsock.c:rts_detach()
  */
 static void
 key_detach(struct socket *so)
 {
 	struct keycb *kp = (struct keycb *)sotorawcb(so);
 
 	KASSERT(kp != NULL, ("key_detach: kp == NULL"));
 	if (kp->kp_raw.rcb_proto.sp_protocol
 	    == PF_KEY) /* XXX: AF_KEY */
 		V_key_cb.key_count--;
 	V_key_cb.any_count--;
 
 	key_freereg(so);
 	raw_usrreqs.pru_detach(so);
 }
 
 /*
  * key_disconnect()
  * derived from net/rtsock.c:key_disconnect()
  */
 static int
 key_disconnect(struct socket *so)
 {
 	return(raw_usrreqs.pru_disconnect(so));
 }
 
 /*
  * key_peeraddr()
  * derived from net/rtsock.c:rts_peeraddr()
  */
 static int
 key_peeraddr(struct socket *so, struct sockaddr **nam)
 {
 	return(raw_usrreqs.pru_peeraddr(so, nam));
 }
 
 /*
  * key_send()
  * derived from net/rtsock.c:rts_send()
  */
 static int
 key_send(struct socket *so, int flags, struct mbuf *m, struct sockaddr *nam,
 	 struct mbuf *control, struct thread *td)
 {
 	return(raw_usrreqs.pru_send(so, flags, m, nam, control, td));
 }
 
 /*
  * key_shutdown()
  * derived from net/rtsock.c:rts_shutdown()
  */
 static int
 key_shutdown(struct socket *so)
 {
 	return(raw_usrreqs.pru_shutdown(so));
 }
 
 /*
  * key_sockaddr()
  * derived from net/rtsock.c:rts_sockaddr()
  */
 static int
 key_sockaddr(struct socket *so, struct sockaddr **nam)
 {
 	return(raw_usrreqs.pru_sockaddr(so, nam));
 }
 
 struct pr_usrreqs key_usrreqs = {
 	.pru_abort =		key_abort,
 	.pru_attach =		key_attach,
 	.pru_bind =		key_bind,
 	.pru_connect =		key_connect,
 	.pru_detach =		key_detach,
 	.pru_disconnect =	key_disconnect,
 	.pru_peeraddr =		key_peeraddr,
 	.pru_send =		key_send,
 	.pru_shutdown =		key_shutdown,
 	.pru_sockaddr =		key_sockaddr,
 	.pru_close =		key_close,
 };
 
 /* sysctl */
 SYSCTL_NODE(_net, PF_KEY, key, CTLFLAG_RW, 0, "Key Family");
 
 /*
  * Definitions of protocols supported in the KEY domain.
  */
 
 extern struct domain keydomain;
 
 struct protosw keysw[] = {
 {
 	.pr_type =		SOCK_RAW,
 	.pr_domain =		&keydomain,
 	.pr_protocol =		PF_KEY_V2,
 	.pr_flags =		PR_ATOMIC|PR_ADDR,
 	.pr_output =		key_output,
 	.pr_ctlinput =		raw_ctlinput,
 	.pr_init =		raw_init,
 	.pr_usrreqs =		&key_usrreqs
 }
 };
 
 static void
 key_init0(void)
 {
 
 	bzero((caddr_t)&V_key_cb, sizeof(V_key_cb));
 	key_init();
 }
 
 struct domain keydomain = {
 	.dom_family =		PF_KEY,
 	.dom_name =		"key",
 	.dom_init =		key_init0,
 #ifdef VIMAGE
 	.dom_destroy =		key_destroy,
 #endif
 	.dom_protosw =		keysw,
 	.dom_protoswNPROTOSW =	&keysw[nitems(keysw)]
 };
 
 VNET_DOMAIN_SET(key);
Index: head/sys/sys/mbuf.h
===================================================================
--- head/sys/sys/mbuf.h	(revision 305823)
+++ head/sys/sys/mbuf.h	(revision 305824)
@@ -1,1316 +1,1313 @@
 /*-
  * Copyright (c) 1982, 1986, 1988, 1993
  *	The Regents of the University of California.
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 3. Neither the name of the University nor the names of its contributors
  *    may be used to endorse or promote products derived from this software
  *    without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  *	@(#)mbuf.h	8.5 (Berkeley) 2/19/95
  * $FreeBSD$
  */
 
 #ifndef _SYS_MBUF_H_
 #define	_SYS_MBUF_H_
 
 /* XXX: These includes suck. Sorry! */
 #include <sys/queue.h>
 #ifdef _KERNEL
 #include <sys/systm.h>
 #include <vm/uma.h>
 #ifdef WITNESS
 #include <sys/lock.h>
 #endif
 #endif
 
 #ifdef _KERNEL
 #include <sys/sdt.h>
 
 #define	MBUF_PROBE1(probe, arg0)					\
 	SDT_PROBE1(sdt, , , probe, arg0)
 #define	MBUF_PROBE2(probe, arg0, arg1)					\
 	SDT_PROBE2(sdt, , , probe, arg0, arg1)
 #define	MBUF_PROBE3(probe, arg0, arg1, arg2)				\
 	SDT_PROBE3(sdt, , , probe, arg0, arg1, arg2)
 #define	MBUF_PROBE4(probe, arg0, arg1, arg2, arg3)			\
 	SDT_PROBE4(sdt, , , probe, arg0, arg1, arg2, arg3)
 #define	MBUF_PROBE5(probe, arg0, arg1, arg2, arg3, arg4)		\
 	SDT_PROBE5(sdt, , , probe, arg0, arg1, arg2, arg3, arg4)
 
 SDT_PROBE_DECLARE(sdt, , , m__init);
 SDT_PROBE_DECLARE(sdt, , , m__gethdr);
 SDT_PROBE_DECLARE(sdt, , , m__get);
 SDT_PROBE_DECLARE(sdt, , , m__getcl);
 SDT_PROBE_DECLARE(sdt, , , m__clget);
 SDT_PROBE_DECLARE(sdt, , , m__cljget);
 SDT_PROBE_DECLARE(sdt, , , m__cljset);
 SDT_PROBE_DECLARE(sdt, , , m__free);
 SDT_PROBE_DECLARE(sdt, , , m__freem);
 
 #endif /* _KERNEL */
 
 /*
  * Mbufs are of a single size, MSIZE (sys/param.h), which includes overhead.
  * An mbuf may add a single "mbuf cluster" of size MCLBYTES (also in
  * sys/param.h), which has no additional overhead and is used instead of the
  * internal data area; this is done when at least MINCLSIZE of data must be
  * stored.  Additionally, it is possible to allocate a separate buffer
  * externally and attach it to the mbuf in a way similar to that of mbuf
  * clusters.
  *
  * NB: These calculation do not take actual compiler-induced alignment and
  * padding inside the complete struct mbuf into account.  Appropriate
  * attention is required when changing members of struct mbuf.
  *
  * MLEN is data length in a normal mbuf.
  * MHLEN is data length in an mbuf with pktheader.
  * MINCLSIZE is a smallest amount of data that should be put into cluster.
  *
  * Compile-time assertions in uipc_mbuf.c test these values to ensure that
  * they are sensible.
  */
 struct mbuf;
 #define	MHSIZE		offsetof(struct mbuf, m_dat)
 #define	MPKTHSIZE	offsetof(struct mbuf, m_pktdat)
 #define	MLEN		((int)(MSIZE - MHSIZE))
 #define	MHLEN		((int)(MSIZE - MPKTHSIZE))
 #define	MINCLSIZE	(MHLEN + 1)
 
 #ifdef _KERNEL
 /*-
  * Macro for type conversion: convert mbuf pointer to data pointer of correct
  * type:
  *
  * mtod(m, t)	-- Convert mbuf pointer to data pointer of correct type.
  * mtodo(m, o) -- Same as above but with offset 'o' into data.
  */
 #define	mtod(m, t)	((t)((m)->m_data))
 #define	mtodo(m, o)	((void *)(((m)->m_data) + (o)))
 
 /*
  * Argument structure passed to UMA routines during mbuf and packet
  * allocations.
  */
 struct mb_args {
 	int	flags;	/* Flags for mbuf being allocated */
 	short	type;	/* Type of mbuf being allocated */
 };
 #endif /* _KERNEL */
 
 /*
  * Packet tag structure (see below for details).
  */
 struct m_tag {
 	SLIST_ENTRY(m_tag)	m_tag_link;	/* List of packet tags */
 	u_int16_t		m_tag_id;	/* Tag ID */
 	u_int16_t		m_tag_len;	/* Length of data */
 	u_int32_t		m_tag_cookie;	/* ABI/Module ID */
 	void			(*m_tag_free)(struct m_tag *);
 };
 
 /*
  * Record/packet header in first mbuf of chain; valid only if M_PKTHDR is set.
  * Size ILP32: 48
  *	 LP64: 56
  * Compile-time assertions in uipc_mbuf.c test these values to ensure that
  * they are correct.
  */
 struct pkthdr {
 	struct ifnet	*rcvif;		/* rcv interface */
 	SLIST_HEAD(packet_tags, m_tag) tags; /* list of packet tags */
 	int32_t		 len;		/* total packet length */
 
 	/* Layer crossing persistent information. */
 	uint32_t	 flowid;	/* packet's 4-tuple system */
 	uint64_t	 csum_flags;	/* checksum and offload features */
 	uint16_t	 fibnum;	/* this packet should use this fib */
 	uint8_t		 cosqos;	/* class/quality of service */
 	uint8_t		 rsstype;	/* hash type */
 	uint8_t		 l2hlen;	/* layer 2 header length */
 	uint8_t		 l3hlen;	/* layer 3 header length */
 	uint8_t		 l4hlen;	/* layer 4 header length */
 	uint8_t		 l5hlen;	/* layer 5 header length */
 	union {
 		uint8_t  eight[8];
 		uint16_t sixteen[4];
 		uint32_t thirtytwo[2];
 		uint64_t sixtyfour[1];
 		uintptr_t unintptr[1];
 		void	*ptr;
 	} PH_per;
 
 	/* Layer specific non-persistent local storage for reassembly, etc. */
 	union {
 		uint8_t  eight[8];
 		uint16_t sixteen[4];
 		uint32_t thirtytwo[2];
 		uint64_t sixtyfour[1];
 		uintptr_t unintptr[1];
 		void 	*ptr;
 	} PH_loc;
 };
 #define	ether_vtag	PH_per.sixteen[0]
 #define	PH_vt		PH_per
 #define	vt_nrecs	sixteen[0]
 #define	tso_segsz	PH_per.sixteen[1]
 #define	lro_nsegs	tso_segsz
 #define	csum_phsum	PH_per.sixteen[2]
 #define	csum_data	PH_per.thirtytwo[1]
 
 /*
  * Description of external storage mapped into mbuf; valid only if M_EXT is
  * set.
  * Size ILP32: 28
  *	 LP64: 48
  * Compile-time assertions in uipc_mbuf.c test these values to ensure that
  * they are correct.
  */
 struct m_ext {
 	union {
 		volatile u_int	 ext_count;	/* value of ref count info */
 		volatile u_int	*ext_cnt;	/* pointer to ref count info */
 	};
 	caddr_t		 ext_buf;	/* start of buffer */
 	uint32_t	 ext_size;	/* size of buffer, for ext_free */
 	uint32_t	 ext_type:8,	/* type of external storage */
 			 ext_flags:24;	/* external storage mbuf flags */
 	void		(*ext_free)	/* free routine if not the usual */
 			    (struct mbuf *, void *, void *);
 	void		*ext_arg1;	/* optional argument pointer */
 	void		*ext_arg2;	/* optional argument pointer */
 };
 
 /*
  * The core of the mbuf object along with some shortcut defines for practical
  * purposes.
  */
 struct mbuf {
 	/*
 	 * Header present at the beginning of every mbuf.
 	 * Size ILP32: 24
 	 *      LP64: 32
 	 * Compile-time assertions in uipc_mbuf.c test these values to ensure
 	 * that they are correct.
 	 */
 	union {	/* next buffer in chain */
 		struct mbuf		*m_next;
 		SLIST_ENTRY(mbuf)	m_slist;
 		STAILQ_ENTRY(mbuf)	m_stailq;
 	};
 	union {	/* next chain in queue/record */
 		struct mbuf		*m_nextpkt;
 		SLIST_ENTRY(mbuf)	m_slistpkt;
 		STAILQ_ENTRY(mbuf)	m_stailqpkt;
 	};
 	caddr_t		 m_data;	/* location of data */
 	int32_t		 m_len;		/* amount of data in this mbuf */
 	uint32_t	 m_type:8,	/* type of data in this mbuf */
 			 m_flags:24;	/* flags; see below */
 #if !defined(__LP64__)
 	uint32_t	 m_pad;		/* pad for 64bit alignment */
 #endif
 
 	/*
 	 * A set of optional headers (packet header, external storage header)
 	 * and internal data storage.  Historically, these arrays were sized
 	 * to MHLEN (space left after a packet header) and MLEN (space left
 	 * after only a regular mbuf header); they are now variable size in
 	 * order to support future work on variable-size mbufs.
 	 */
 	union {
 		struct {
 			struct pkthdr	m_pkthdr;	/* M_PKTHDR set */
 			union {
 				struct m_ext	m_ext;	/* M_EXT set */
 				char		m_pktdat[0];
 			};
 		};
 		char	m_dat[0];			/* !M_PKTHDR, !M_EXT */
 	};
 };
 
 /*
  * mbuf flags of global significance and layer crossing.
  * Those of only protocol/layer specific significance are to be mapped
  * to M_PROTO[1-12] and cleared at layer handoff boundaries.
  * NB: Limited to the lower 24 bits.
  */
 #define	M_EXT		0x00000001 /* has associated external storage */
 #define	M_PKTHDR	0x00000002 /* start of record */
 #define	M_EOR		0x00000004 /* end of record */
 #define	M_RDONLY	0x00000008 /* associated data is marked read-only */
 #define	M_BCAST		0x00000010 /* send/received as link-level broadcast */
 #define	M_MCAST		0x00000020 /* send/received as link-level multicast */
 #define	M_PROMISC	0x00000040 /* packet was not for us */
 #define	M_VLANTAG	0x00000080 /* ether_vtag is valid */
 #define	M_UNUSED_8	0x00000100 /* --available-- */
 #define	M_NOFREE	0x00000200 /* do not free mbuf, embedded in cluster */
 
 #define	M_PROTO1	0x00001000 /* protocol-specific */
 #define	M_PROTO2	0x00002000 /* protocol-specific */
 #define	M_PROTO3	0x00004000 /* protocol-specific */
 #define	M_PROTO4	0x00008000 /* protocol-specific */
 #define	M_PROTO5	0x00010000 /* protocol-specific */
 #define	M_PROTO6	0x00020000 /* protocol-specific */
 #define	M_PROTO7	0x00040000 /* protocol-specific */
 #define	M_PROTO8	0x00080000 /* protocol-specific */
 #define	M_PROTO9	0x00100000 /* protocol-specific */
 #define	M_PROTO10	0x00200000 /* protocol-specific */
 #define	M_PROTO11	0x00400000 /* protocol-specific */
 #define	M_PROTO12	0x00800000 /* protocol-specific */
 
 #define MB_DTOR_SKIP	0x1	/* don't pollute the cache by touching a freed mbuf */
 
 /*
  * Flags to purge when crossing layers.
  */
 #define	M_PROTOFLAGS \
     (M_PROTO1|M_PROTO2|M_PROTO3|M_PROTO4|M_PROTO5|M_PROTO6|M_PROTO7|M_PROTO8|\
      M_PROTO9|M_PROTO10|M_PROTO11|M_PROTO12)
 
 /*
  * Flags preserved when copying m_pkthdr.
  */
 #define M_COPYFLAGS \
     (M_PKTHDR|M_EOR|M_RDONLY|M_BCAST|M_MCAST|M_PROMISC|M_VLANTAG| \
      M_PROTOFLAGS)
 
 /*
  * Mbuf flag description for use with printf(9) %b identifier.
  */
 #define	M_FLAG_BITS \
     "\20\1M_EXT\2M_PKTHDR\3M_EOR\4M_RDONLY\5M_BCAST\6M_MCAST" \
     "\7M_PROMISC\10M_VLANTAG"
 #define	M_FLAG_PROTOBITS \
     "\15M_PROTO1\16M_PROTO2\17M_PROTO3\20M_PROTO4\21M_PROTO5" \
     "\22M_PROTO6\23M_PROTO7\24M_PROTO8\25M_PROTO9\26M_PROTO10" \
     "\27M_PROTO11\30M_PROTO12"
 #define	M_FLAG_PRINTF (M_FLAG_BITS M_FLAG_PROTOBITS)
 
 /*
  * Network interface cards are able to hash protocol fields (such as IPv4
  * addresses and TCP port numbers) classify packets into flows.  These flows
  * can then be used to maintain ordering while delivering packets to the OS
  * via parallel input queues, as well as to provide a stateless affinity
  * model.  NIC drivers can pass up the hash via m->m_pkthdr.flowid, and set
  * m_flag fields to indicate how the hash should be interpreted by the
  * network stack.
  *
  * Most NICs support RSS, which provides ordering and explicit affinity, and
  * use the hash m_flag bits to indicate what header fields were covered by
  * the hash.  M_HASHTYPE_OPAQUE and M_HASHTYPE_OPAQUE_HASH can be set by non-
  * RSS cards or configurations that provide an opaque flow identifier, allowing
  * for ordering and distribution without explicit affinity.  Additionally,
  * M_HASHTYPE_OPAQUE_HASH indicates that the flow identifier has hash
  * properties.
  */
 #define	M_HASHTYPE_HASHPROP		0x80	/* has hash properties */
 #define	M_HASHTYPE_HASH(t)		(M_HASHTYPE_HASHPROP | (t))
 /* Microsoft RSS standard hash types */
 #define	M_HASHTYPE_NONE			0
 #define	M_HASHTYPE_RSS_IPV4		M_HASHTYPE_HASH(1) /* IPv4 2-tuple */
 #define	M_HASHTYPE_RSS_TCP_IPV4		M_HASHTYPE_HASH(2) /* TCPv4 4-tuple */
 #define	M_HASHTYPE_RSS_IPV6		M_HASHTYPE_HASH(3) /* IPv6 2-tuple */
 #define	M_HASHTYPE_RSS_TCP_IPV6		M_HASHTYPE_HASH(4) /* TCPv6 4-tuple */
 #define	M_HASHTYPE_RSS_IPV6_EX		M_HASHTYPE_HASH(5) /* IPv6 2-tuple +
 							    * ext hdrs */
 #define	M_HASHTYPE_RSS_TCP_IPV6_EX	M_HASHTYPE_HASH(6) /* TCPv6 4-tiple +
 							    * ext hdrs */
 /* Non-standard RSS hash types */
 #define	M_HASHTYPE_RSS_UDP_IPV4		M_HASHTYPE_HASH(7) /* IPv4 UDP 4-tuple*/
 #define	M_HASHTYPE_RSS_UDP_IPV4_EX	M_HASHTYPE_HASH(8) /* IPv4 UDP 4-tuple +
 							    * ext hdrs */
 #define	M_HASHTYPE_RSS_UDP_IPV6		M_HASHTYPE_HASH(9) /* IPv6 UDP 4-tuple*/
 #define	M_HASHTYPE_RSS_UDP_IPV6_EX	M_HASHTYPE_HASH(10)/* IPv6 UDP 4-tuple +
 							    * ext hdrs */
 
 #define	M_HASHTYPE_OPAQUE		63	/* ordering, not affinity */
 #define	M_HASHTYPE_OPAQUE_HASH		M_HASHTYPE_HASH(M_HASHTYPE_OPAQUE)
 						/* ordering+hash, not affinity*/
 
 #define	M_HASHTYPE_CLEAR(m)	((m)->m_pkthdr.rsstype = 0)
 #define	M_HASHTYPE_GET(m)	((m)->m_pkthdr.rsstype)
 #define	M_HASHTYPE_SET(m, v)	((m)->m_pkthdr.rsstype = (v))
 #define	M_HASHTYPE_TEST(m, v)	(M_HASHTYPE_GET(m) == (v))
 #define	M_HASHTYPE_ISHASH(m)	(M_HASHTYPE_GET(m) & M_HASHTYPE_HASHPROP)
 
 /*
  * COS/QOS class and quality of service tags.
  * It uses DSCP code points as base.
  */
 #define	QOS_DSCP_CS0		0x00
 #define	QOS_DSCP_DEF		QOS_DSCP_CS0
 #define	QOS_DSCP_CS1		0x20
 #define	QOS_DSCP_AF11		0x28
 #define	QOS_DSCP_AF12		0x30
 #define	QOS_DSCP_AF13		0x38
 #define	QOS_DSCP_CS2		0x40
 #define	QOS_DSCP_AF21		0x48
 #define	QOS_DSCP_AF22		0x50
 #define	QOS_DSCP_AF23		0x58
 #define	QOS_DSCP_CS3		0x60
 #define	QOS_DSCP_AF31		0x68
 #define	QOS_DSCP_AF32		0x70
 #define	QOS_DSCP_AF33		0x78
 #define	QOS_DSCP_CS4		0x80
 #define	QOS_DSCP_AF41		0x88
 #define	QOS_DSCP_AF42		0x90
 #define	QOS_DSCP_AF43		0x98
 #define	QOS_DSCP_CS5		0xa0
 #define	QOS_DSCP_EF		0xb8
 #define	QOS_DSCP_CS6		0xc0
 #define	QOS_DSCP_CS7		0xe0
 
 /*
  * External mbuf storage buffer types.
  */
 #define	EXT_CLUSTER	1	/* mbuf cluster */
 #define	EXT_SFBUF	2	/* sendfile(2)'s sf_buf */
 #define	EXT_JUMBOP	3	/* jumbo cluster page sized */
 #define	EXT_JUMBO9	4	/* jumbo cluster 9216 bytes */
 #define	EXT_JUMBO16	5	/* jumbo cluster 16184 bytes */
 #define	EXT_PACKET	6	/* mbuf+cluster from packet zone */
 #define	EXT_MBUF	7	/* external mbuf reference */
 #define	EXT_SFBUF_NOCACHE 8	/* sendfile(2)'s sf_buf not to be cached */
 
 #define	EXT_VENDOR1	224	/* for vendor-internal use */
 #define	EXT_VENDOR2	225	/* for vendor-internal use */
 #define	EXT_VENDOR3	226	/* for vendor-internal use */
 #define	EXT_VENDOR4	227	/* for vendor-internal use */
 
 #define	EXT_EXP1	244	/* for experimental use */
 #define	EXT_EXP2	245	/* for experimental use */
 #define	EXT_EXP3	246	/* for experimental use */
 #define	EXT_EXP4	247	/* for experimental use */
 
 #define	EXT_NET_DRV	252	/* custom ext_buf provided by net driver(s) */
 #define	EXT_MOD_TYPE	253	/* custom module's ext_buf type */
 #define	EXT_DISPOSABLE	254	/* can throw this buffer away w/page flipping */
 #define	EXT_EXTREF	255	/* has externally maintained ext_cnt ptr */
 
 /*
  * Flags for external mbuf buffer types.
  * NB: limited to the lower 24 bits.
  */
 #define	EXT_FLAG_EMBREF		0x000001	/* embedded ext_count */
 #define	EXT_FLAG_EXTREF		0x000002	/* external ext_cnt, notyet */
 
 #define	EXT_FLAG_NOFREE		0x000010	/* don't free mbuf to pool, notyet */
 
 #define	EXT_FLAG_VENDOR1	0x010000	/* for vendor-internal use */
 #define	EXT_FLAG_VENDOR2	0x020000	/* for vendor-internal use */
 #define	EXT_FLAG_VENDOR3	0x040000	/* for vendor-internal use */
 #define	EXT_FLAG_VENDOR4	0x080000	/* for vendor-internal use */
 
 #define	EXT_FLAG_EXP1		0x100000	/* for experimental use */
 #define	EXT_FLAG_EXP2		0x200000	/* for experimental use */
 #define	EXT_FLAG_EXP3		0x400000	/* for experimental use */
 #define	EXT_FLAG_EXP4		0x800000	/* for experimental use */
 
 /*
  * EXT flag description for use with printf(9) %b identifier.
  */
 #define	EXT_FLAG_BITS \
     "\20\1EXT_FLAG_EMBREF\2EXT_FLAG_EXTREF\5EXT_FLAG_NOFREE" \
     "\21EXT_FLAG_VENDOR1\22EXT_FLAG_VENDOR2\23EXT_FLAG_VENDOR3" \
     "\24EXT_FLAG_VENDOR4\25EXT_FLAG_EXP1\26EXT_FLAG_EXP2\27EXT_FLAG_EXP3" \
     "\30EXT_FLAG_EXP4"
 
 /*
  * External reference/free functions.
  */
 void sf_ext_free(void *, void *);
 void sf_ext_free_nocache(void *, void *);
 
 /*
  * Flags indicating checksum, segmentation and other offload work to be
  * done, or already done, by hardware or lower layers.  It is split into
  * separate inbound and outbound flags.
  *
  * Outbound flags that are set by upper protocol layers requesting lower
  * layers, or ideally the hardware, to perform these offloading tasks.
  * For outbound packets this field and its flags can be directly tested
  * against ifnet if_hwassist.
  */
 #define	CSUM_IP			0x00000001	/* IP header checksum offload */
 #define	CSUM_IP_UDP		0x00000002	/* UDP checksum offload */
 #define	CSUM_IP_TCP		0x00000004	/* TCP checksum offload */
 #define	CSUM_IP_SCTP		0x00000008	/* SCTP checksum offload */
 #define	CSUM_IP_TSO		0x00000010	/* TCP segmentation offload */
 #define	CSUM_IP_ISCSI		0x00000020	/* iSCSI checksum offload */
 
 #define	CSUM_IP6_UDP		0x00000200	/* UDP checksum offload */
 #define	CSUM_IP6_TCP		0x00000400	/* TCP checksum offload */
 #define	CSUM_IP6_SCTP		0x00000800	/* SCTP checksum offload */
 #define	CSUM_IP6_TSO		0x00001000	/* TCP segmentation offload */
 #define	CSUM_IP6_ISCSI		0x00002000	/* iSCSI checksum offload */
 
 /* Inbound checksum support where the checksum was verified by hardware. */
 #define	CSUM_L3_CALC		0x01000000	/* calculated layer 3 csum */
 #define	CSUM_L3_VALID		0x02000000	/* checksum is correct */
 #define	CSUM_L4_CALC		0x04000000	/* calculated layer 4 csum */
 #define	CSUM_L4_VALID		0x08000000	/* checksum is correct */
 #define	CSUM_L5_CALC		0x10000000	/* calculated layer 5 csum */
 #define	CSUM_L5_VALID		0x20000000	/* checksum is correct */
 #define	CSUM_COALESED		0x40000000	/* contains merged segments */
 
 /*
  * CSUM flag description for use with printf(9) %b identifier.
  */
 #define	CSUM_BITS \
     "\20\1CSUM_IP\2CSUM_IP_UDP\3CSUM_IP_TCP\4CSUM_IP_SCTP\5CSUM_IP_TSO" \
     "\6CSUM_IP_ISCSI" \
     "\12CSUM_IP6_UDP\13CSUM_IP6_TCP\14CSUM_IP6_SCTP\15CSUM_IP6_TSO" \
     "\16CSUM_IP6_ISCSI" \
     "\31CSUM_L3_CALC\32CSUM_L3_VALID\33CSUM_L4_CALC\34CSUM_L4_VALID" \
     "\35CSUM_L5_CALC\36CSUM_L5_VALID\37CSUM_COALESED"
 
 /* CSUM flags compatibility mappings. */
 #define	CSUM_IP_CHECKED		CSUM_L3_CALC
 #define	CSUM_IP_VALID		CSUM_L3_VALID
 #define	CSUM_DATA_VALID		CSUM_L4_VALID
 #define	CSUM_PSEUDO_HDR		CSUM_L4_CALC
 #define	CSUM_SCTP_VALID		CSUM_L4_VALID
 #define	CSUM_DELAY_DATA		(CSUM_TCP|CSUM_UDP)
 #define	CSUM_DELAY_IP		CSUM_IP		/* Only v4, no v6 IP hdr csum */
 #define	CSUM_DELAY_DATA_IPV6	(CSUM_TCP_IPV6|CSUM_UDP_IPV6)
 #define	CSUM_DATA_VALID_IPV6	CSUM_DATA_VALID
 #define	CSUM_TCP		CSUM_IP_TCP
 #define	CSUM_UDP		CSUM_IP_UDP
 #define	CSUM_SCTP		CSUM_IP_SCTP
 #define	CSUM_TSO		(CSUM_IP_TSO|CSUM_IP6_TSO)
 #define	CSUM_UDP_IPV6		CSUM_IP6_UDP
 #define	CSUM_TCP_IPV6		CSUM_IP6_TCP
 #define	CSUM_SCTP_IPV6		CSUM_IP6_SCTP
 
 /*
  * mbuf types describing the content of the mbuf (including external storage).
  */
 #define	MT_NOTMBUF	0	/* USED INTERNALLY ONLY! Object is not mbuf */
 #define	MT_DATA		1	/* dynamic (data) allocation */
 #define	MT_HEADER	MT_DATA	/* packet header, use M_PKTHDR instead */
 
 #define	MT_VENDOR1	4	/* for vendor-internal use */
 #define	MT_VENDOR2	5	/* for vendor-internal use */
 #define	MT_VENDOR3	6	/* for vendor-internal use */
 #define	MT_VENDOR4	7	/* for vendor-internal use */
 
 #define	MT_SONAME	8	/* socket name */
 
 #define	MT_EXP1		9	/* for experimental use */
 #define	MT_EXP2		10	/* for experimental use */
 #define	MT_EXP3		11	/* for experimental use */
 #define	MT_EXP4		12	/* for experimental use */
 
 #define	MT_CONTROL	14	/* extra-data protocol message */
 #define	MT_OOBDATA	15	/* expedited data  */
 #define	MT_NTYPES	16	/* number of mbuf types for mbtypes[] */
 
 #define	MT_NOINIT	255	/* Not a type but a flag to allocate
 				   a non-initialized mbuf */
 
 /*
  * String names of mbuf-related UMA(9) and malloc(9) types.  Exposed to
  * !_KERNEL so that monitoring tools can look up the zones with
  * libmemstat(3).
  */
 #define	MBUF_MEM_NAME		"mbuf"
 #define	MBUF_CLUSTER_MEM_NAME	"mbuf_cluster"
 #define	MBUF_PACKET_MEM_NAME	"mbuf_packet"
 #define	MBUF_JUMBOP_MEM_NAME	"mbuf_jumbo_page"
 #define	MBUF_JUMBO9_MEM_NAME	"mbuf_jumbo_9k"
 #define	MBUF_JUMBO16_MEM_NAME	"mbuf_jumbo_16k"
 #define	MBUF_TAG_MEM_NAME	"mbuf_tag"
 #define	MBUF_EXTREFCNT_MEM_NAME	"mbuf_ext_refcnt"
 
 #ifdef _KERNEL
 
 #ifdef WITNESS
 #define	MBUF_CHECKSLEEP(how) do {					\
 	if (how == M_WAITOK)						\
 		WITNESS_WARN(WARN_GIANTOK | WARN_SLEEPOK, NULL,		\
 		    "Sleeping in \"%s\"", __func__);			\
 } while (0)
 #else
 #define	MBUF_CHECKSLEEP(how)
 #endif
 
 /*
  * Network buffer allocation API
  *
  * The rest of it is defined in kern/kern_mbuf.c
  */
 extern uma_zone_t	zone_mbuf;
 extern uma_zone_t	zone_clust;
 extern uma_zone_t	zone_pack;
 extern uma_zone_t	zone_jumbop;
 extern uma_zone_t	zone_jumbo9;
 extern uma_zone_t	zone_jumbo16;
 
 void		 mb_dupcl(struct mbuf *, struct mbuf *);
 void		 mb_free_ext(struct mbuf *);
 void		 m_adj(struct mbuf *, int);
 int		 m_apply(struct mbuf *, int, int,
 		    int (*)(void *, void *, u_int), void *);
 int		 m_append(struct mbuf *, int, c_caddr_t);
 void		 m_cat(struct mbuf *, struct mbuf *);
 void		 m_catpkt(struct mbuf *, struct mbuf *);
 int		 m_clget(struct mbuf *m, int how);
 void 		*m_cljget(struct mbuf *m, int how, int size);
 struct mbuf	*m_collapse(struct mbuf *, int, int);
 void		 m_copyback(struct mbuf *, int, int, c_caddr_t);
 void		 m_copydata(const struct mbuf *, int, int, caddr_t);
 struct mbuf	*m_copym(struct mbuf *, int, int, int);
 struct mbuf	*m_copypacket(struct mbuf *, int);
 void		 m_copy_pkthdr(struct mbuf *, struct mbuf *);
 struct mbuf	*m_copyup(struct mbuf *, int, int);
 struct mbuf	*m_defrag(struct mbuf *, int);
 void		 m_demote_pkthdr(struct mbuf *);
 void		 m_demote(struct mbuf *, int, int);
 struct mbuf	*m_devget(char *, int, int, struct ifnet *,
 		    void (*)(char *, caddr_t, u_int));
 struct mbuf	*m_dup(const struct mbuf *, int);
 int		 m_dup_pkthdr(struct mbuf *, const struct mbuf *, int);
 void		 m_extadd(struct mbuf *, caddr_t, u_int,
 		    void (*)(struct mbuf *, void *, void *), void *, void *,
 		    int, int);
 u_int		 m_fixhdr(struct mbuf *);
 struct mbuf	*m_fragment(struct mbuf *, int, int);
 void		 m_freem(struct mbuf *);
 struct mbuf	*m_get2(int, int, short, int);
 struct mbuf	*m_getjcl(int, short, int, int);
 struct mbuf	*m_getm2(struct mbuf *, int, int, short, int);
 struct mbuf	*m_getptr(struct mbuf *, int, int *);
 u_int		 m_length(struct mbuf *, struct mbuf **);
 int		 m_mbuftouio(struct uio *, struct mbuf *, int);
 void		 m_move_pkthdr(struct mbuf *, struct mbuf *);
 int		 m_pkthdr_init(struct mbuf *, int);
 struct mbuf	*m_prepend(struct mbuf *, int, int);
 void		 m_print(const struct mbuf *, int);
 struct mbuf	*m_pulldown(struct mbuf *, int, int, int *);
 struct mbuf	*m_pullup(struct mbuf *, int);
 int		 m_sanity(struct mbuf *, int);
 struct mbuf	*m_split(struct mbuf *, int, int);
 struct mbuf	*m_uiotombuf(struct uio *, int, int, int, int);
 struct mbuf	*m_unshare(struct mbuf *, int);
 
 static __inline int
 m_gettype(int size)
 {
 	int type;
 
 	switch (size) {
 	case MSIZE:
 		type = EXT_MBUF;
 		break;
 	case MCLBYTES:
 		type = EXT_CLUSTER;
 		break;
 #if MJUMPAGESIZE != MCLBYTES
 	case MJUMPAGESIZE:
 		type = EXT_JUMBOP;
 		break;
 #endif
 	case MJUM9BYTES:
 		type = EXT_JUMBO9;
 		break;
 	case MJUM16BYTES:
 		type = EXT_JUMBO16;
 		break;
 	default:
 		panic("%s: invalid cluster size %d", __func__, size);
 	}
 
 	return (type);
 }
 
 /*
  * Associated an external reference counted buffer with an mbuf.
  */
 static __inline void
 m_extaddref(struct mbuf *m, caddr_t buf, u_int size, u_int *ref_cnt,
     void (*freef)(struct mbuf *, void *, void *), void *arg1, void *arg2)
 {
 
 	KASSERT(ref_cnt != NULL, ("%s: ref_cnt not provided", __func__));
 
 	atomic_add_int(ref_cnt, 1);
 	m->m_flags |= M_EXT;
 	m->m_ext.ext_buf = buf;
 	m->m_ext.ext_cnt = ref_cnt;
 	m->m_data = m->m_ext.ext_buf;
 	m->m_ext.ext_size = size;
 	m->m_ext.ext_free = freef;
 	m->m_ext.ext_arg1 = arg1;
 	m->m_ext.ext_arg2 = arg2;
 	m->m_ext.ext_type = EXT_EXTREF;
 	m->m_ext.ext_flags = 0;
 }
 
 static __inline uma_zone_t
 m_getzone(int size)
 {
 	uma_zone_t zone;
 
 	switch (size) {
 	case MCLBYTES:
 		zone = zone_clust;
 		break;
 #if MJUMPAGESIZE != MCLBYTES
 	case MJUMPAGESIZE:
 		zone = zone_jumbop;
 		break;
 #endif
 	case MJUM9BYTES:
 		zone = zone_jumbo9;
 		break;
 	case MJUM16BYTES:
 		zone = zone_jumbo16;
 		break;
 	default:
 		panic("%s: invalid cluster size %d", __func__, size);
 	}
 
 	return (zone);
 }
 
 /*
  * Initialize an mbuf with linear storage.
  *
  * Inline because the consumer text overhead will be roughly the same to
  * initialize or call a function with this many parameters and M_PKTHDR
  * should go away with constant propagation for !MGETHDR.
  */
 static __inline int
 m_init(struct mbuf *m, int how, short type, int flags)
 {
 	int error;
 
 	m->m_next = NULL;
 	m->m_nextpkt = NULL;
 	m->m_data = m->m_dat;
 	m->m_len = 0;
 	m->m_flags = flags;
 	m->m_type = type;
 	if (flags & M_PKTHDR)
 		error = m_pkthdr_init(m, how);
 	else
 		error = 0;
 
 	MBUF_PROBE5(m__init, m, how, type, flags, error);
 	return (error);
 }
 
 static __inline struct mbuf *
 m_get(int how, short type)
 {
 	struct mbuf *m;
 	struct mb_args args;
 
 	args.flags = 0;
 	args.type = type;
 	m = uma_zalloc_arg(zone_mbuf, &args, how);
 	MBUF_PROBE3(m__get, how, type, m);
 	return (m);
 }
 
 static __inline struct mbuf *
 m_gethdr(int how, short type)
 {
 	struct mbuf *m;
 	struct mb_args args;
 
 	args.flags = M_PKTHDR;
 	args.type = type;
 	m = uma_zalloc_arg(zone_mbuf, &args, how);
 	MBUF_PROBE3(m__gethdr, how, type, m);
 	return (m);
 }
 
 static __inline struct mbuf *
 m_getcl(int how, short type, int flags)
 {
 	struct mbuf *m;
 	struct mb_args args;
 
 	args.flags = flags;
 	args.type = type;
 	m = uma_zalloc_arg(zone_pack, &args, how);
 	MBUF_PROBE4(m__getcl, how, type, flags, m);
 	return (m);
 }
 
 /*
  * XXX: m_cljset() is a dangerous API.  One must attach only a new,
  * unreferenced cluster to an mbuf(9).  It is not possible to assert
  * that, so care can be taken only by users of the API.
  */
 static __inline void
 m_cljset(struct mbuf *m, void *cl, int type)
 {
 	int size;
 
 	switch (type) {
 	case EXT_CLUSTER:
 		size = MCLBYTES;
 		break;
 #if MJUMPAGESIZE != MCLBYTES
 	case EXT_JUMBOP:
 		size = MJUMPAGESIZE;
 		break;
 #endif
 	case EXT_JUMBO9:
 		size = MJUM9BYTES;
 		break;
 	case EXT_JUMBO16:
 		size = MJUM16BYTES;
 		break;
 	default:
 		panic("%s: unknown cluster type %d", __func__, type);
 		break;
 	}
 
 	m->m_data = m->m_ext.ext_buf = cl;
 	m->m_ext.ext_free = m->m_ext.ext_arg1 = m->m_ext.ext_arg2 = NULL;
 	m->m_ext.ext_size = size;
 	m->m_ext.ext_type = type;
 	m->m_ext.ext_flags = EXT_FLAG_EMBREF;
 	m->m_ext.ext_count = 1;
 	m->m_flags |= M_EXT;
 	MBUF_PROBE3(m__cljset, m, cl, type);
 }
 
 static __inline void
 m_chtype(struct mbuf *m, short new_type)
 {
 
 	m->m_type = new_type;
 }
 
 static __inline void
 m_clrprotoflags(struct mbuf *m)
 {
 
 	while (m) {
 		m->m_flags &= ~M_PROTOFLAGS;
 		m = m->m_next;
 	}
 }
 
 static __inline struct mbuf *
 m_last(struct mbuf *m)
 {
 
 	while (m->m_next)
 		m = m->m_next;
 	return (m);
 }
 
 static inline u_int
 m_extrefcnt(struct mbuf *m)
 {
 
 	KASSERT(m->m_flags & M_EXT, ("%s: M_EXT missing", __func__));
 
 	return ((m->m_ext.ext_flags & EXT_FLAG_EMBREF) ? m->m_ext.ext_count :
 	    *m->m_ext.ext_cnt);
 }
 
 /*
  * mbuf, cluster, and external object allocation macros (for compatibility
  * purposes).
  */
 #define	M_MOVE_PKTHDR(to, from)	m_move_pkthdr((to), (from))
 #define	MGET(m, how, type)	((m) = m_get((how), (type)))
 #define	MGETHDR(m, how, type)	((m) = m_gethdr((how), (type)))
 #define	MCLGET(m, how)		m_clget((m), (how))
 #define	MEXTADD(m, buf, size, free, arg1, arg2, flags, type)		\
     m_extadd((m), (caddr_t)(buf), (size), (free), (arg1), (arg2),	\
     (flags), (type))
 #define	m_getm(m, len, how, type)					\
     m_getm2((m), (len), (how), (type), M_PKTHDR)
 
 /*
  * Evaluate TRUE if it's safe to write to the mbuf m's data region (this can
  * be both the local data payload, or an external buffer area, depending on
  * whether M_EXT is set).
  */
 #define	M_WRITABLE(m)	(!((m)->m_flags & M_RDONLY) &&			\
 			 (!(((m)->m_flags & M_EXT)) ||			\
 			 (m_extrefcnt(m) == 1)))
 
 /* Check if the supplied mbuf has a packet header, or else panic. */
 #define	M_ASSERTPKTHDR(m)						\
 	KASSERT((m) != NULL && (m)->m_flags & M_PKTHDR,			\
 	    ("%s: no mbuf packet header!", __func__))
 
 /*
  * Ensure that the supplied mbuf is a valid, non-free mbuf.
  *
  * XXX: Broken at the moment.  Need some UMA magic to make it work again.
  */
 #define	M_ASSERTVALID(m)						\
 	KASSERT((((struct mbuf *)m)->m_flags & 0) == 0,			\
 	    ("%s: attempted use of a free mbuf!", __func__))
 
 /*
  * Return the address of the start of the buffer associated with an mbuf,
  * handling external storage, packet-header mbufs, and regular data mbufs.
  */
 #define	M_START(m)							\
 	(((m)->m_flags & M_EXT) ? (m)->m_ext.ext_buf :			\
 	 ((m)->m_flags & M_PKTHDR) ? &(m)->m_pktdat[0] :		\
 	 &(m)->m_dat[0])
 
 /*
  * Return the size of the buffer associated with an mbuf, handling external
  * storage, packet-header mbufs, and regular data mbufs.
  */
 #define	M_SIZE(m)							\
 	(((m)->m_flags & M_EXT) ? (m)->m_ext.ext_size :			\
 	 ((m)->m_flags & M_PKTHDR) ? MHLEN :				\
 	 MLEN)
 
 /*
  * Set the m_data pointer of a newly allocated mbuf to place an object of the
  * specified size at the end of the mbuf, longword aligned.
  *
  * NB: Historically, we had M_ALIGN(), MH_ALIGN(), and MEXT_ALIGN() as
  * separate macros, each asserting that it was called at the proper moment.
  * This required callers to themselves test the storage type and call the
  * right one.  Rather than require callers to be aware of those layout
  * decisions, we centralize here.
  */
 static __inline void
 m_align(struct mbuf *m, int len)
 {
 #ifdef INVARIANTS
 	const char *msg = "%s: not a virgin mbuf";
 #endif
 	int adjust;
 
 	KASSERT(m->m_data == M_START(m), (msg, __func__));
 
 	adjust = M_SIZE(m) - len;
 	m->m_data += adjust &~ (sizeof(long)-1);
 }
 
 #define	M_ALIGN(m, len)		m_align(m, len)
 #define	MH_ALIGN(m, len)	m_align(m, len)
 #define	MEXT_ALIGN(m, len)	m_align(m, len)
 
 /*
  * Compute the amount of space available before the current start of data in
  * an mbuf.
  *
  * The M_WRITABLE() is a temporary, conservative safety measure: the burden
  * of checking writability of the mbuf data area rests solely with the caller.
  *
  * NB: In previous versions, M_LEADINGSPACE() would only check M_WRITABLE()
  * for mbufs with external storage.  We now allow mbuf-embedded data to be
  * read-only as well.
  */
 #define	M_LEADINGSPACE(m)						\
 	(M_WRITABLE(m) ? ((m)->m_data - M_START(m)) : 0)
 
 /*
  * Compute the amount of space available after the end of data in an mbuf.
  *
  * The M_WRITABLE() is a temporary, conservative safety measure: the burden
  * of checking writability of the mbuf data area rests solely with the caller.
  *
  * NB: In previous versions, M_TRAILINGSPACE() would only check M_WRITABLE()
  * for mbufs with external storage.  We now allow mbuf-embedded data to be
  * read-only as well.
  */
 #define	M_TRAILINGSPACE(m)						\
 	(M_WRITABLE(m) ?						\
 	    ((M_START(m) + M_SIZE(m)) - ((m)->m_data + (m)->m_len)) : 0)
 
 /*
  * Arrange to prepend space of size plen to mbuf m.  If a new mbuf must be
  * allocated, how specifies whether to wait.  If the allocation fails, the
  * original mbuf chain is freed and m is set to NULL.
  */
 #define	M_PREPEND(m, plen, how) do {					\
 	struct mbuf **_mmp = &(m);					\
 	struct mbuf *_mm = *_mmp;					\
 	int _mplen = (plen);						\
 	int __mhow = (how);						\
 									\
 	MBUF_CHECKSLEEP(how);						\
 	if (M_LEADINGSPACE(_mm) >= _mplen) {				\
 		_mm->m_data -= _mplen;					\
 		_mm->m_len += _mplen;					\
 	} else								\
 		_mm = m_prepend(_mm, _mplen, __mhow);			\
 	if (_mm != NULL && _mm->m_flags & M_PKTHDR)			\
 		_mm->m_pkthdr.len += _mplen;				\
 	*_mmp = _mm;							\
 } while (0)
 
 /*
  * Change mbuf to new type.  This is a relatively expensive operation and
  * should be avoided.
  */
 #define	MCHTYPE(m, t)	m_chtype((m), (t))
 
 /* Length to m_copy to copy all. */
 #define	M_COPYALL	1000000000
 
-/* Compatibility with 4.3. */
-#define	m_copy(m, o, l)	m_copym((m), (o), (l), M_NOWAIT)
-
 extern int		max_datalen;	/* MHLEN - max_hdr */
 extern int		max_hdr;	/* Largest link + protocol header */
 extern int		max_linkhdr;	/* Largest link-level header */
 extern int		max_protohdr;	/* Largest protocol header */
 extern int		nmbclusters;	/* Maximum number of clusters */
 
 /*-
  * Network packets may have annotations attached by affixing a list of
  * "packet tags" to the pkthdr structure.  Packet tags are dynamically
  * allocated semi-opaque data structures that have a fixed header
  * (struct m_tag) that specifies the size of the memory block and a
  * <cookie,type> pair that identifies it.  The cookie is a 32-bit unique
  * unsigned value used to identify a module or ABI.  By convention this value
  * is chosen as the date+time that the module is created, expressed as the
  * number of seconds since the epoch (e.g., using date -u +'%s').  The type
  * value is an ABI/module-specific value that identifies a particular
  * annotation and is private to the module.  For compatibility with systems
  * like OpenBSD that define packet tags w/o an ABI/module cookie, the value
  * PACKET_ABI_COMPAT is used to implement m_tag_get and m_tag_find
  * compatibility shim functions and several tag types are defined below.
  * Users that do not require compatibility should use a private cookie value
  * so that packet tag-related definitions can be maintained privately.
  *
  * Note that the packet tag returned by m_tag_alloc has the default memory
  * alignment implemented by malloc.  To reference private data one can use a
  * construct like:
  *
  *	struct m_tag *mtag = m_tag_alloc(...);
  *	struct foo *p = (struct foo *)(mtag+1);
  *
  * if the alignment of struct m_tag is sufficient for referencing members of
  * struct foo.  Otherwise it is necessary to embed struct m_tag within the
  * private data structure to insure proper alignment; e.g.,
  *
  *	struct foo {
  *		struct m_tag	tag;
  *		...
  *	};
  *	struct foo *p = (struct foo *) m_tag_alloc(...);
  *	struct m_tag *mtag = &p->tag;
  */
 
 /*
  * Persistent tags stay with an mbuf until the mbuf is reclaimed.  Otherwise
  * tags are expected to ``vanish'' when they pass through a network
  * interface.  For most interfaces this happens normally as the tags are
  * reclaimed when the mbuf is free'd.  However in some special cases
  * reclaiming must be done manually.  An example is packets that pass through
  * the loopback interface.  Also, one must be careful to do this when
  * ``turning around'' packets (e.g., icmp_reflect).
  *
  * To mark a tag persistent bit-or this flag in when defining the tag id.
  * The tag will then be treated as described above.
  */
 #define	MTAG_PERSISTENT				0x800
 
 #define	PACKET_TAG_NONE				0  /* Nadda */
 
 /* Packet tags for use with PACKET_ABI_COMPAT. */
 #define	PACKET_TAG_IPSEC_IN_DONE		1  /* IPsec applied, in */
 #define	PACKET_TAG_IPSEC_OUT_DONE		2  /* IPsec applied, out */
 #define	PACKET_TAG_IPSEC_IN_CRYPTO_DONE		3  /* NIC IPsec crypto done */
 #define	PACKET_TAG_IPSEC_OUT_CRYPTO_NEEDED	4  /* NIC IPsec crypto req'ed */
 #define	PACKET_TAG_IPSEC_IN_COULD_DO_CRYPTO	5  /* NIC notifies IPsec */
 #define	PACKET_TAG_IPSEC_PENDING_TDB		6  /* Reminder to do IPsec */
 #define	PACKET_TAG_BRIDGE			7  /* Bridge processing done */
 #define	PACKET_TAG_GIF				8  /* GIF processing done */
 #define	PACKET_TAG_GRE				9  /* GRE processing done */
 #define	PACKET_TAG_IN_PACKET_CHECKSUM		10 /* NIC checksumming done */
 #define	PACKET_TAG_ENCAP			11 /* Encap.  processing */
 #define	PACKET_TAG_IPSEC_SOCKET			12 /* IPSEC socket ref */
 #define	PACKET_TAG_IPSEC_HISTORY		13 /* IPSEC history */
 #define	PACKET_TAG_IPV6_INPUT			14 /* IPV6 input processing */
 #define	PACKET_TAG_DUMMYNET			15 /* dummynet info */
 #define	PACKET_TAG_DIVERT			17 /* divert info */
 #define	PACKET_TAG_IPFORWARD			18 /* ipforward info */
 #define	PACKET_TAG_MACLABEL	(19 | MTAG_PERSISTENT) /* MAC label */
 #define	PACKET_TAG_PF		(21 | MTAG_PERSISTENT) /* PF/ALTQ information */
 #define	PACKET_TAG_RTSOCKFAM			25 /* rtsock sa family */
 #define	PACKET_TAG_IPOPTIONS			27 /* Saved IP options */
 #define	PACKET_TAG_CARP				28 /* CARP info */
 #define	PACKET_TAG_IPSEC_NAT_T_PORTS		29 /* two uint16_t */
 #define	PACKET_TAG_ND_OUTGOING			30 /* ND outgoing */
 
 /* Specific cookies and tags. */
 
 /* Packet tag routines. */
 struct m_tag	*m_tag_alloc(u_int32_t, int, int, int);
 void		 m_tag_delete(struct mbuf *, struct m_tag *);
 void		 m_tag_delete_chain(struct mbuf *, struct m_tag *);
 void		 m_tag_free_default(struct m_tag *);
 struct m_tag	*m_tag_locate(struct mbuf *, u_int32_t, int, struct m_tag *);
 struct m_tag	*m_tag_copy(struct m_tag *, int);
 int		 m_tag_copy_chain(struct mbuf *, const struct mbuf *, int);
 void		 m_tag_delete_nonpersistent(struct mbuf *);
 
 /*
  * Initialize the list of tags associated with an mbuf.
  */
 static __inline void
 m_tag_init(struct mbuf *m)
 {
 
 	SLIST_INIT(&m->m_pkthdr.tags);
 }
 
 /*
  * Set up the contents of a tag.  Note that this does not fill in the free
  * method; the caller is expected to do that.
  *
  * XXX probably should be called m_tag_init, but that was already taken.
  */
 static __inline void
 m_tag_setup(struct m_tag *t, u_int32_t cookie, int type, int len)
 {
 
 	t->m_tag_id = type;
 	t->m_tag_len = len;
 	t->m_tag_cookie = cookie;
 }
 
 /*
  * Reclaim resources associated with a tag.
  */
 static __inline void
 m_tag_free(struct m_tag *t)
 {
 
 	(*t->m_tag_free)(t);
 }
 
 /*
  * Return the first tag associated with an mbuf.
  */
 static __inline struct m_tag *
 m_tag_first(struct mbuf *m)
 {
 
 	return (SLIST_FIRST(&m->m_pkthdr.tags));
 }
 
 /*
  * Return the next tag in the list of tags associated with an mbuf.
  */
 static __inline struct m_tag *
 m_tag_next(struct mbuf *m __unused, struct m_tag *t)
 {
 
 	return (SLIST_NEXT(t, m_tag_link));
 }
 
 /*
  * Prepend a tag to the list of tags associated with an mbuf.
  */
 static __inline void
 m_tag_prepend(struct mbuf *m, struct m_tag *t)
 {
 
 	SLIST_INSERT_HEAD(&m->m_pkthdr.tags, t, m_tag_link);
 }
 
 /*
  * Unlink a tag from the list of tags associated with an mbuf.
  */
 static __inline void
 m_tag_unlink(struct mbuf *m, struct m_tag *t)
 {
 
 	SLIST_REMOVE(&m->m_pkthdr.tags, t, m_tag, m_tag_link);
 }
 
 /* These are for OpenBSD compatibility. */
 #define	MTAG_ABI_COMPAT		0		/* compatibility ABI */
 
 static __inline struct m_tag *
 m_tag_get(int type, int length, int wait)
 {
 	return (m_tag_alloc(MTAG_ABI_COMPAT, type, length, wait));
 }
 
 static __inline struct m_tag *
 m_tag_find(struct mbuf *m, int type, struct m_tag *start)
 {
 	return (SLIST_EMPTY(&m->m_pkthdr.tags) ? (struct m_tag *)NULL :
 	    m_tag_locate(m, MTAG_ABI_COMPAT, type, start));
 }
 
 static __inline struct mbuf *
 m_free(struct mbuf *m)
 {
 	struct mbuf *n = m->m_next;
 
 	MBUF_PROBE1(m__free, m);
 	if ((m->m_flags & (M_PKTHDR|M_NOFREE)) == (M_PKTHDR|M_NOFREE))
 		m_tag_delete_chain(m, NULL);
 	if (m->m_flags & M_EXT)
 		mb_free_ext(m);
 	else if ((m->m_flags & M_NOFREE) == 0)
 		uma_zfree(zone_mbuf, m);
 	return (n);
 }
 
 static __inline int
 rt_m_getfib(struct mbuf *m)
 {
 	KASSERT(m->m_flags & M_PKTHDR , ("Attempt to get FIB from non header mbuf."));
 	return (m->m_pkthdr.fibnum);
 }
 
 #define M_GETFIB(_m)   rt_m_getfib(_m)
 
 #define M_SETFIB(_m, _fib) do {						\
         KASSERT((_m)->m_flags & M_PKTHDR, ("Attempt to set FIB on non header mbuf."));	\
 	((_m)->m_pkthdr.fibnum) = (_fib);				\
 } while (0)
 
 /* flags passed as first argument for "m_ether_tcpip_hash()" */
 #define	MBUF_HASHFLAG_L2	(1 << 2)
 #define	MBUF_HASHFLAG_L3	(1 << 3)
 #define	MBUF_HASHFLAG_L4	(1 << 4)
 
 /* mbuf hashing helper routines */
 uint32_t	m_ether_tcpip_hash_init(void);
 uint32_t	m_ether_tcpip_hash(const uint32_t, const struct mbuf *, const uint32_t);
 
 #ifdef MBUF_PROFILING
  void m_profile(struct mbuf *m);
  #define M_PROFILE(m) m_profile(m)
 #else
  #define M_PROFILE(m)
 #endif
 
 struct mbufq {
 	STAILQ_HEAD(, mbuf)	mq_head;
 	int			mq_len;
 	int			mq_maxlen;
 };
 
 static inline void
 mbufq_init(struct mbufq *mq, int maxlen)
 {
 
 	STAILQ_INIT(&mq->mq_head);
 	mq->mq_maxlen = maxlen;
 	mq->mq_len = 0;
 }
 
 static inline struct mbuf *
 mbufq_flush(struct mbufq *mq)
 {
 	struct mbuf *m;
 
 	m = STAILQ_FIRST(&mq->mq_head);
 	STAILQ_INIT(&mq->mq_head);
 	mq->mq_len = 0;
 	return (m);
 }
 
 static inline void
 mbufq_drain(struct mbufq *mq)
 {
 	struct mbuf *m, *n;
 
 	n = mbufq_flush(mq);
 	while ((m = n) != NULL) {
 		n = STAILQ_NEXT(m, m_stailqpkt);
 		m_freem(m);
 	}
 }
 
 static inline struct mbuf *
 mbufq_first(const struct mbufq *mq)
 {
 
 	return (STAILQ_FIRST(&mq->mq_head));
 }
 
 static inline struct mbuf *
 mbufq_last(const struct mbufq *mq)
 {
 
 	return (STAILQ_LAST(&mq->mq_head, mbuf, m_stailqpkt));
 }
 
 static inline int
 mbufq_full(const struct mbufq *mq)
 {
 
 	return (mq->mq_len >= mq->mq_maxlen);
 }
 
 static inline int
 mbufq_len(const struct mbufq *mq)
 {
 
 	return (mq->mq_len);
 }
 
 static inline int
 mbufq_enqueue(struct mbufq *mq, struct mbuf *m)
 {
 
 	if (mbufq_full(mq))
 		return (ENOBUFS);
 	STAILQ_INSERT_TAIL(&mq->mq_head, m, m_stailqpkt);
 	mq->mq_len++;
 	return (0);
 }
 
 static inline struct mbuf *
 mbufq_dequeue(struct mbufq *mq)
 {
 	struct mbuf *m;
 
 	m = STAILQ_FIRST(&mq->mq_head);
 	if (m) {
 		STAILQ_REMOVE_HEAD(&mq->mq_head, m_stailqpkt);
 		m->m_nextpkt = NULL;
 		mq->mq_len--;
 	}
 	return (m);
 }
 
 static inline void
 mbufq_prepend(struct mbufq *mq, struct mbuf *m)
 {
 
 	STAILQ_INSERT_HEAD(&mq->mq_head, m, m_stailqpkt);
 	mq->mq_len++;
 }
 #endif /* _KERNEL */
 #endif /* !_SYS_MBUF_H_ */