Index: head/contrib/ipfilter/tools/ipftest.c
===================================================================
--- head/contrib/ipfilter/tools/ipftest.c	(revision 280970)
+++ head/contrib/ipfilter/tools/ipftest.c	(revision 280971)
@@ -1,866 +1,874 @@
 /*	$FreeBSD$	*/
 
 /*
  * Copyright (C) 2012 by Darren Reed.
  *
  * See the IPFILTER.LICENCE file for details on licencing.
  */
 #include "ipf.h"
 #include "ipt.h"
 #include <sys/ioctl.h>
 #include <sys/file.h>
 
 #if !defined(lint)
 static const char sccsid[] = "@(#)ipt.c	1.19 6/3/96 (C) 1993-2000 Darren Reed";
 static const char rcsid[] = "@(#)$Id$";
 #endif
 
 extern	char	*optarg;
 extern	struct ipread	pcap, iptext, iphex;
 extern	struct ifnet	*get_unit __P((char *, int));
 extern	void	init_ifp __P((void));
 extern	ipnat_t	*natparse __P((char *, int));
 extern	hostmap_t **ipf_hm_maptable;
 extern	hostmap_t *ipf_hm_maplist;
 
 ipfmutex_t	ipl_mutex, ipf_auth_mx, ipf_rw, ipf_stinsert;
 ipfmutex_t	ipf_nat_new, ipf_natio, ipf_timeoutlock;
 ipfrwlock_t	ipf_mutex, ipf_global, ipf_ipidfrag, ip_poolrw, ipf_frcache;
 ipfrwlock_t	ipf_frag, ipf_state, ipf_nat, ipf_natfrag, ipf_authlk;
 ipfrwlock_t	ipf_tokens;
 int	opts = OPT_DONTOPEN;
 int	use_inet6 = 0;
 int	docksum = 0;
 int	pfil_delayed_copy = 0;
 int	main __P((int, char *[]));
 int	loadrules __P((char *, int));
 int	kmemcpy __P((char *, long, int));
 int     kstrncpy __P((char *, long, int n));
 int	blockreason;
 void	dumpnat __P((void *));
 void	dumpgroups __P((ipf_main_softc_t *));
 void	dumprules __P((frentry_t *));
 void	drain_log __P((char *));
 void	fixv4sums __P((mb_t *, ip_t *));
 
 #if defined(__NetBSD__) || defined(__OpenBSD__) || SOLARIS || \
 	(_BSDI_VERSION >= 199701) || (__FreeBSD_version >= 300000) || \
 	defined(__osf__) || defined(linux)
 int ipftestioctl __P((int, ioctlcmd_t, ...));
 int ipnattestioctl __P((int, ioctlcmd_t, ...));
 int ipstatetestioctl __P((int, ioctlcmd_t, ...));
 int ipauthtestioctl __P((int, ioctlcmd_t, ...));
 int ipscantestioctl __P((int, ioctlcmd_t, ...));
 int ipsynctestioctl __P((int, ioctlcmd_t, ...));
 int ipooltestioctl __P((int, ioctlcmd_t, ...));
 #else
 int ipftestioctl __P((dev_t, ioctlcmd_t, void *));
 int ipnattestioctl __P((dev_t, ioctlcmd_t, void *));
 int ipstatetestioctl __P((dev_t, ioctlcmd_t, void *));
 int ipauthtestioctl __P((dev_t, ioctlcmd_t, void *));
 int ipsynctestioctl __P((dev_t, ioctlcmd_t, void *));
 int ipscantestioctl __P((dev_t, ioctlcmd_t, void *));
 int ipooltestioctl __P((dev_t, ioctlcmd_t, void *));
 #endif
 
 static	ioctlfunc_t	iocfunctions[IPL_LOGSIZE] = { ipftestioctl,
 						      ipnattestioctl,
 						      ipstatetestioctl,
 						      ipauthtestioctl,
 						      ipsynctestioctl,
 						      ipscantestioctl,
 						      ipooltestioctl,
 						      NULL };
 static	ipf_main_softc_t	*softc = NULL;
 
 
 int
 main(argc,argv)
 	int argc;
 	char *argv[];
 {
 	char	*datain, *iface, *ifname, *logout;
 	int	fd, i, dir, c, loaded, dump, hlen;
 	struct	in_addr	sip;
 	struct	ifnet	*ifp;
 	struct	ipread	*r;
 	mb_t	mb, *m, *n;
 	ip_t	*ip;
 
 	m = &mb;
 	dir = 0;
 	dump = 0;
 	hlen = 0;
 	loaded = 0;
 	r = &iptext;
 	iface = NULL;
 	logout = NULL;
 	datain = NULL;
 	sip.s_addr = 0;
 	ifname = "anon0";
 
 	initparse();
 
 	ipf_load_all();
 
 	softc = ipf_create_all(NULL);
 	if (softc == NULL)
 		exit(1);
 
 	if (ipf_init_all(softc) == -1)
 		exit(1);
 
 	i = 1;
 	if (ipftestioctl(IPL_LOGIPF, SIOCFRENB, &i) != 0)
 		exit(1);
 
 	while ((c = getopt(argc, argv, "6bCdDF:i:I:l:N:P:or:RS:T:vxX")) != -1)
 		switch (c)
 		{
 		case '6' :
 #ifdef	USE_INET6
 			use_inet6 = 1;
 #else
 			fprintf(stderr, "IPv6 not supported\n");
 			exit(1);
 #endif
 			break;
 		case 'b' :
 			opts |= OPT_BRIEF;
 			break;
 		case 'd' :
 			opts |= OPT_DEBUG;
 			break;
 		case 'C' :
 			docksum = 1;
 			break;
 		case 'D' :
 			dump = 1;
 			break;
 		case 'F' :
 			if (strcasecmp(optarg, "pcap") == 0)
 				r = &pcap;
 			else if (strcasecmp(optarg, "hex") == 0)
 				r = &iphex;
 			else if (strcasecmp(optarg, "text") == 0)
 				r = &iptext;
 			break;
 		case 'i' :
 			datain = optarg;
 			break;
 		case 'I' :
 			ifname = optarg;
 			break;
 		case 'l' :
 			logout = optarg;
 			break;
 		case 'N' :
 			if (ipnat_parsefile(-1, ipnat_addrule, ipnattestioctl,
 					    optarg) == -1)
 				return -1;
 			loaded = 1;
 			opts |= OPT_NAT;
 			break;
 		case 'o' :
 			opts |= OPT_SAVEOUT;
 			break;
 		case 'P' :
 			if (ippool_parsefile(-1, optarg, ipooltestioctl) == -1)
 				return -1;
 			loaded = 1;
 			break;
 		case 'r' :
 			if (ipf_parsefile(-1, ipf_addrule, iocfunctions,
 					  optarg) == -1)
 				return -1;
 			loaded = 1;
 			break;
 		case 'S' :
 			sip.s_addr = inet_addr(optarg);
 			break;
 		case 'R' :
 			opts |= OPT_NORESOLVE;
 			break;
 		case 'T' :
 			ipf_dotuning(-1, optarg, ipftestioctl);
 			break;
 		case 'v' :
 			opts |= OPT_VERBOSE;
 			break;
 		case 'x' :
 			opts |= OPT_HEX;
 			break;
 		}
 
 	if (loaded == 0) {
 		(void)fprintf(stderr,"no rules loaded\n");
 		exit(-1);
 	}
 
 	if (opts & OPT_SAVEOUT)
 		init_ifp();
 
 	if (datain)
 		fd = (*r->r_open)(datain);
 	else
 		fd = (*r->r_open)("-");
 
 	if (fd < 0) {
 		perror("error opening input");
 		exit(-1);
 	}
 
 	m->m_data = (char *)m->mb_buf;
 	while ((i = (*r->r_readip)(m, &iface, &dir)) > 0) {
 
 		if ((iface == NULL) || (*iface == '\0'))
 			iface = ifname;
 
 		ip = MTOD(m, ip_t *);
 		ifp = get_unit(iface, IP_V(ip));
 
 		if (IP_V(ip) == 4) {
 			if ((r->r_flags & R_DO_CKSUM) || docksum)
 				fixv4sums(m, ip);
 			hlen = IP_HL(ip) << 2;
 			if (sip.s_addr)
 				dir = !(sip.s_addr == ip->ip_src.s_addr);
 		}
 #ifdef	USE_INET6
 		else
 			hlen = sizeof(ip6_t);
 #endif
 		/* ipfr_slowtimer(); */
 		blockreason = 0;
 		m = &mb;
 		m->mb_ifp = ifp;
 		m->mb_len = i;
 		i = ipf_check(softc, ip, hlen, ifp, dir, &m);
 		if ((opts & OPT_NAT) == 0)
 			switch (i)
 			{
 			case -4 :
 				(void)printf("preauth");
 				break;
 			case -3 :
 				(void)printf("account");
 				break;
 			case -2 :
 				(void)printf("auth");
 				break;
 			case -1 :
 				(void)printf("block");
 				break;
 			case 0 :
 				(void)printf("pass");
 				break;
 			case 1 :
 				if (m == NULL)
 					(void)printf("bad-packet");
 				else
 					(void)printf("nomatch");
 				break;
 			case 3 :
 				(void)printf("block return-rst");
 				break;
 			case 4 :
 				(void)printf("block return-icmp");
 				break;
 			case 5 :
 				(void)printf("block return-icmp-as-dest");
 				break;
 			default :
 				(void)printf("recognised return %#x\n", i);
 				break;
 			}
 
 		if (!(opts & OPT_BRIEF)) {
 			putchar(' ');
 			if (m != NULL)
 				printpacket(dir, m);
 			else
 				printpacket(dir, &mb);
 			printf("--------------");
 		} else if ((opts & (OPT_BRIEF|OPT_NAT)) ==
 			   (OPT_NAT|OPT_BRIEF)) {
 			if (m != NULL)
 				printpacket(dir, m);
 			else
 				PRINTF("%d\n", blockreason);
 		}
 
 		ipf_state_flush(softc, 1, 0);
 
 		if (dir && (ifp != NULL) && IP_V(ip) && (m != NULL))
 #if  defined(__sgi) && (IRIX < 60500)
 			(*ifp->if_output)(ifp, (void *)m, NULL);
 #else
 # if TRU64 >= 1885
 			(*ifp->if_output)(ifp, (void *)m, NULL, 0, 0);
 # else
 			(*ifp->if_output)(ifp, (void *)m, NULL, 0);
 # endif
 #endif
 
 		while ((m != NULL) && (m != &mb)) {
 			n = m->mb_next;
 			freembt(m);
 			m = n;
 		}
 
 		if ((opts & (OPT_BRIEF|OPT_NAT)) != (OPT_NAT|OPT_BRIEF))
 			putchar('\n');
 		dir = 0;
 		if (iface != ifname) {
 			free(iface);
 			iface = ifname;
 		}
 		m = &mb;
 		m->mb_data = (char *)m->mb_buf;
 	}
 
 	if (i != 0)
 		fprintf(stderr, "readip failed: %d\n", i);
 	(*r->r_close)();
 
 	if (logout != NULL) {
 		drain_log(logout);
 	}
 
 	if (dump == 1)  {
 		dumpnat(softc->ipf_nat_soft);
 		ipf_state_dump(softc, softc->ipf_state_soft);
 		ipf_lookup_dump(softc, softc->ipf_state_soft);
 		dumpgroups(softc);
 	}
 
 	ipf_fini_all(softc);
 
 	ipf_destroy_all(softc);
 
 	ipf_unload_all();
 
 	ipf_mutex_clean();
 	ipf_rwlock_clean();
 
 	if (getenv("FINDLEAKS")) {
 		fflush(stdout);
 		abort();
 	}
 	return 0;
 }
 
 
 #if defined(__NetBSD__) || defined(__OpenBSD__) || SOLARIS || \
 	(_BSDI_VERSION >= 199701) || (__FreeBSD_version >= 300000) || \
 	defined(__osf__) || defined(linux)
 int ipftestioctl(int dev, ioctlcmd_t cmd, ...)
 {
 	caddr_t data;
 	va_list ap;
 	int i;
 
 	dev = dev;	/* gcc -Wextra */
 	va_start(ap, cmd);
 	data = va_arg(ap, caddr_t);
 	va_end(ap);
 
 	i = ipfioctl(softc, IPL_LOGIPF, cmd, data, FWRITE|FREAD);
 	if (opts & OPT_DEBUG)
 		fprintf(stderr, "ipfioctl(IPF,%#x,%p) = %d (%d)\n",
 			(u_int)cmd, data, i, softc->ipf_interror);
 	if (i != 0) {
 		errno = i;
 		return -1;
 	}
 	return 0;
 }
 
 
 int ipnattestioctl(int dev, ioctlcmd_t cmd, ...)
 {
 	caddr_t data;
 	va_list ap;
 	int i;
 
 	dev = dev;	/* gcc -Wextra */
 	va_start(ap, cmd);
 	data = va_arg(ap, caddr_t);
 	va_end(ap);
 
 	i = ipfioctl(softc, IPL_LOGNAT, cmd, data, FWRITE|FREAD);
 	if (opts & OPT_DEBUG)
 		fprintf(stderr, "ipfioctl(NAT,%#x,%p) = %d\n",
 			(u_int)cmd, data, i);
 	if (i != 0) {
 		errno = i;
 		return -1;
 	}
 	return 0;
 }
 
 
 int ipstatetestioctl(int dev, ioctlcmd_t cmd, ...)
 {
 	caddr_t data;
 	va_list ap;
 	int i;
 
 	dev = dev;	/* gcc -Wextra */
 	va_start(ap, cmd);
 	data = va_arg(ap, caddr_t);
 	va_end(ap);
 
 	i = ipfioctl(softc, IPL_LOGSTATE, cmd, data, FWRITE|FREAD);
 	if ((opts & OPT_DEBUG) || (i != 0))
 		fprintf(stderr, "ipfioctl(STATE,%#x,%p) = %d\n",
 			(u_int)cmd, data, i);
 	if (i != 0) {
 		errno = i;
 		return -1;
 	}
 	return 0;
 }
 
 
 int ipauthtestioctl(int dev, ioctlcmd_t cmd, ...)
 {
 	caddr_t data;
 	va_list ap;
 	int i;
 
 	dev = dev;	/* gcc -Wextra */
 	va_start(ap, cmd);
 	data = va_arg(ap, caddr_t);
 	va_end(ap);
 
 	i = ipfioctl(softc, IPL_LOGAUTH, cmd, data, FWRITE|FREAD);
 	if ((opts & OPT_DEBUG) || (i != 0))
 		fprintf(stderr, "ipfioctl(AUTH,%#x,%p) = %d\n",
 			(u_int)cmd, data, i);
 	if (i != 0) {
 		errno = i;
 		return -1;
 	}
 	return 0;
 }
 
 
 int ipscantestioctl(int dev, ioctlcmd_t cmd, ...)
 {
 	caddr_t data;
 	va_list ap;
 	int i;
 
 	dev = dev;	/* gcc -Wextra */
 	va_start(ap, cmd);
 	data = va_arg(ap, caddr_t);
 	va_end(ap);
 
 	i = ipfioctl(softc, IPL_LOGSCAN, cmd, data, FWRITE|FREAD);
 	if ((opts & OPT_DEBUG) || (i != 0))
 		fprintf(stderr, "ipfioctl(SCAN,%#x,%p) = %d\n",
 			(u_int)cmd, data, i);
 	if (i != 0) {
 		errno = i;
 		return -1;
 	}
 	return 0;
 }
 
 
 int ipsynctestioctl(int dev, ioctlcmd_t cmd, ...)
 {
 	caddr_t data;
 	va_list ap;
 	int i;
 
 	dev = dev;	/* gcc -Wextra */
 	va_start(ap, cmd);
 	data = va_arg(ap, caddr_t);
 	va_end(ap);
 
 	i = ipfioctl(softc, IPL_LOGSYNC, cmd, data, FWRITE|FREAD);
 	if ((opts & OPT_DEBUG) || (i != 0))
 		fprintf(stderr, "ipfioctl(SYNC,%#x,%p) = %d\n",
 			(u_int)cmd, data, i);
 	if (i != 0) {
 		errno = i;
 		return -1;
 	}
 	return 0;
 }
 
 
 int ipooltestioctl(int dev, ioctlcmd_t cmd, ...)
 {
 	caddr_t data;
 	va_list ap;
 	int i;
 
 	dev = dev;	/* gcc -Wextra */
 	va_start(ap, cmd);
 	data = va_arg(ap, caddr_t);
 	va_end(ap);
 
 	i = ipfioctl(softc, IPL_LOGLOOKUP, cmd, data, FWRITE|FREAD);
 	if ((opts & OPT_DEBUG) || (i != 0))
 		fprintf(stderr, "ipfioctl(POOL,%#x,%p) = %d (%d)\n",
 			(u_int)cmd, data, i, softc->ipf_interror);
 	if (i != 0) {
 		errno = i;
 		return -1;
 	}
 	return 0;
 }
 #else
 int ipftestioctl(dev, cmd, data)
 	dev_t dev;
 	ioctlcmd_t cmd;
 	void *data;
 {
 	int i;
 
 	dev = dev;	/* gcc -Wextra */
 	i = ipfioctl(softc, IPL_LOGIPF, cmd, data, FWRITE|FREAD);
 	if ((opts & OPT_DEBUG) || (i != 0))
 		fprintf(stderr, "ipfioctl(IPF,%#x,%p) = %d (%d)\n",
 			cmd, data, i, softc->ipf_interror);
 	if (i != 0) {
 		errno = i;
 		return -1;
 	}
 	return 0;
 }
 
 
 int ipnattestioctl(dev, cmd, data)
 	dev_t dev;
 	ioctlcmd_t cmd;
 	void *data;
 {
 	int i;
 
 	dev = dev;	/* gcc -Wextra */
 	i = ipfioctl(softc, IPL_LOGNAT, cmd, data, FWRITE|FREAD);
 	if ((opts & OPT_DEBUG) || (i != 0))
 		fprintf(stderr, "ipfioctl(NAT,%#x,%p) = %d\n", cmd, data, i);
 	if (i != 0) {
 		errno = i;
 		return -1;
 	}
 	return 0;
 }
 
 
 int ipstatetestioctl(dev, cmd, data)
 	dev_t dev;
 	ioctlcmd_t cmd;
 	void *data;
 {
 	int i;
 
 	dev = dev;	/* gcc -Wextra */
 	i = ipfioctl(softc, IPL_LOGSTATE, cmd, data, FWRITE|FREAD);
 	if ((opts & OPT_DEBUG) || (i != 0))
 		fprintf(stderr, "ipfioctl(STATE,%#x,%p) = %d\n", cmd, data, i);
 	if (i != 0) {
 		errno = i;
 		return -1;
 	}
 	return 0;
 }
 
 
 int ipauthtestioctl(dev, cmd, data)
 	dev_t dev;
 	ioctlcmd_t cmd;
 	void *data;
 {
 	int i;
 
 	dev = dev;	/* gcc -Wextra */
 	i = ipfioctl(softc, IPL_LOGAUTH, cmd, data, FWRITE|FREAD);
 	if ((opts & OPT_DEBUG) || (i != 0))
 		fprintf(stderr, "ipfioctl(AUTH,%#x,%p) = %d\n", cmd, data, i);
 	if (i != 0) {
 		errno = i;
 		return -1;
 	}
 	return 0;
 }
 
 
 int ipsynctestioctl(dev, cmd, data)
 	dev_t dev;
 	ioctlcmd_t cmd;
 	void *data;
 {
 	int i;
 
 	dev = dev;	/* gcc -Wextra */
 	i = ipfioctl(softc, IPL_LOGSYNC, cmd, data, FWRITE|FREAD);
 	if ((opts & OPT_DEBUG) || (i != 0))
 		fprintf(stderr, "ipfioctl(SYNC,%#x,%p) = %d\n", cmd, data, i);
 	if (i != 0) {
 		errno = i;
 		return -1;
 	}
 	return 0;
 }
 
 
 int ipscantestioctl(dev, cmd, data)
 	dev_t dev;
 	ioctlcmd_t cmd;
 	void *data;
 {
 	int i;
 
 	dev = dev;	/* gcc -Wextra */
 	i = ipfioctl(softc, IPL_LOGSCAN, cmd, data, FWRITE|FREAD);
 	if ((opts & OPT_DEBUG) || (i != 0))
 		fprintf(stderr, "ipfioctl(SCAN,%#x,%p) = %d\n", cmd, data, i);
 	if (i != 0) {
 		errno = i;
 		return -1;
 	}
 	return 0;
 }
 
 
 int ipooltestioctl(dev, cmd, data)
 	dev_t dev;
 	ioctlcmd_t cmd;
 	void *data;
 {
 	int i;
 
 	dev = dev;	/* gcc -Wextra */
 	i = ipfioctl(softc, IPL_LOGLOOKUP, cmd, data, FWRITE|FREAD);
 	if (opts & OPT_DEBUG)
 		fprintf(stderr, "ipfioctl(POOL,%#x,%p) = %d (%d)\n",
 			cmd, data, i, softc->ipf_interror);
 	if (i != 0) {
 		errno = i;
 		return -1;
 	}
 	return 0;
 }
 #endif
 
 
 int kmemcpy(addr, offset, size)
 	char *addr;
 	long offset;
 	int size;
 {
 	bcopy((char *)offset, addr, size);
 	return 0;
 }
 
 
 int kstrncpy(buf, pos, n)
 	char *buf;
 	long pos;
 	int n;
 {
 	char *ptr;
 
 	ptr = (char *)pos;
 
 	while ((n > 0) && (*buf++ = *ptr++))
 		;
 	return 0;
 }
 
 
 /*
  * Display the built up NAT table rules and mapping entries.
  */
 void dumpnat(arg)
 	void *arg;
 {
 	ipf_nat_softc_t *softn = arg;
 	hostmap_t *hm;
 	ipnat_t	*ipn;
 	nat_t *nat;
 
 	printf("List of active MAP/Redirect filters:\n");
 	for (ipn = softn->ipf_nat_list; ipn != NULL; ipn = ipn->in_next)
 		printnat(ipn, opts & (OPT_DEBUG|OPT_VERBOSE));
 	printf("\nList of active sessions:\n");
 	for (nat = softn->ipf_nat_instances; nat; nat = nat->nat_next) {
 		printactivenat(nat, opts, 0);
 		if (nat->nat_aps)
 			printf("\tproxy active\n");
 	}
 
 	printf("\nHostmap table:\n");
 	for (hm = softn->ipf_hm_maplist; hm != NULL; hm = hm->hm_next)
 		printhostmap(hm, hm->hm_hv);
 }
 
 
 void dumpgroups(softc)
 	ipf_main_softc_t *softc;
 {
 	frgroup_t *fg;
 	int i;
 
 	printf("List of groups configured (set 0)\n");
 	for (i = 0; i < IPL_LOGSIZE; i++)
 		for (fg =  softc->ipf_groups[i][0]; fg != NULL;
 		     fg = fg->fg_next) {
 			printf("Dev.%d. Group %s Ref %d Flags %#x\n",
 				i, fg->fg_name, fg->fg_ref, fg->fg_flags);
 			dumprules(fg->fg_start);
 		}
 
 	printf("List of groups configured (set 1)\n");
 	for (i = 0; i < IPL_LOGSIZE; i++)
 		for (fg =  softc->ipf_groups[i][1]; fg != NULL;
 		     fg = fg->fg_next) {
 			printf("Dev.%d. Group %s Ref %d Flags %#x\n",
 				i, fg->fg_name, fg->fg_ref, fg->fg_flags);
 			dumprules(fg->fg_start);
 		}
 
 	printf("Rules configured (set 0, in)\n");
 	dumprules(softc->ipf_rules[0][0]);
 	printf("Rules configured (set 0, out)\n");
 	dumprules(softc->ipf_rules[1][0]);
 	printf("Rules configured (set 1, in)\n");
 	dumprules(softc->ipf_rules[0][1]);
 	printf("Rules configured (set 1, out)\n");
 	dumprules(softc->ipf_rules[1][1]);
 
 	printf("Accounting rules configured (set 0, in)\n");
 	dumprules(softc->ipf_acct[0][0]);
 	printf("Accounting rules configured (set 0, out)\n");
 	dumprules(softc->ipf_acct[0][1]);
 	printf("Accounting rules configured (set 1, in)\n");
 	dumprules(softc->ipf_acct[1][0]);
 	printf("Accounting rules configured (set 1, out)\n");
 	dumprules(softc->ipf_acct[1][1]);
 }
 
 void dumprules(rulehead)
 	frentry_t *rulehead;
 {
 	frentry_t *fr;
 
 	for (fr = rulehead; fr != NULL; fr = fr->fr_next) {
 #ifdef	USE_QUAD_T
 		printf("%"PRIu64" ",(unsigned long long)fr->fr_hits);
 #else
 		printf("%ld ", fr->fr_hits);
 #endif
 		printfr(fr, ipftestioctl);
 	}
 }
 
 
 void drain_log(filename)
 	char *filename;
 {
 	char buffer[DEFAULT_IPFLOGSIZE];
 	struct iovec iov;
 	struct uio uio;
 	size_t resid;
 	int fd, i;
 
 	fd = open(filename, O_CREAT|O_TRUNC|O_WRONLY, 0644);
 	if (fd == -1) {
 		perror("drain_log:open");
 		return;
 	}
 
 	for (i = 0; i <= IPL_LOGMAX; i++)
 		while (1) {
 			bzero((char *)&iov, sizeof(iov));
 			iov.iov_base = buffer;
 			iov.iov_len = sizeof(buffer);
 
 			bzero((char *)&uio, sizeof(uio));
 			uio.uio_iov = &iov;
 			uio.uio_iovcnt = 1;
 			uio.uio_resid = iov.iov_len;
 			resid = uio.uio_resid;
 
 			if (ipf_log_read(softc, i, &uio) == 0) {
 				/*
 				 * If nothing was read then break out.
 				 */
 				if (uio.uio_resid == resid)
 					break;
 				write(fd, buffer, resid - uio.uio_resid);
 			} else
 				break;
 	}
 
 	close(fd);
 }
 
 
 void fixv4sums(m, ip)
 	mb_t *m;
 	ip_t *ip;
 {
 	u_char *csump, *hdr, p;
 	fr_info_t tmp;
 	int len;
 
 	p = 0;
 	len = 0;
 	bzero((char *)&tmp, sizeof(tmp));
 
 	csump = (u_char *)ip;
 	if (IP_V(ip) == 4) {
 		ip->ip_sum = 0;
 		ip->ip_sum = ipf_cksum((u_short *)ip, IP_HL(ip) << 2);
 		tmp.fin_hlen = IP_HL(ip) << 2;
 		csump += IP_HL(ip) << 2;
 		p = ip->ip_p;
 		len = ntohs(ip->ip_len);
 #ifdef USE_INET6
 	} else if (IP_V(ip) == 6) {
 		tmp.fin_hlen = sizeof(ip6_t);
 		csump += sizeof(ip6_t);
 		p = ((ip6_t *)ip)->ip6_nxt;
 		len = ntohs(((ip6_t *)ip)->ip6_plen);
 		len += sizeof(ip6_t);
 #endif
 	}
 	tmp.fin_plen = len;
 	tmp.fin_dlen = len - tmp.fin_hlen;
 
 	switch (p)
 	{
 	case IPPROTO_TCP :
 		hdr = csump;
 		csump += offsetof(tcphdr_t, th_sum);
 		break;
 	case IPPROTO_UDP :
 		hdr = csump;
 		csump += offsetof(udphdr_t, uh_sum);
 		break;
 	case IPPROTO_ICMP :
 		hdr = csump;
 		csump += offsetof(icmphdr_t, icmp_cksum);
 		break;
 	default :
 		csump = NULL;
 		hdr = NULL;
 		break;
 	}
 	if (hdr != NULL) {
 		tmp.fin_m = m;
 		tmp.fin_mp = &m;
 		tmp.fin_dp = hdr;
 		tmp.fin_ip = ip;
 		tmp.fin_plen = len;
 		*csump = 0;
 		*(u_short *)csump = fr_cksum(&tmp, ip, p, hdr);
 	}
 }
+
+void
+ip_fillid(struct ip *ip)
+{
+	static uint16_t ip_id;
+
+	ip->ip_id = ip_id++;
+}
Index: head/share/man/man4/inet.4
===================================================================
--- head/share/man/man4/inet.4	(revision 280970)
+++ head/share/man/man4/inet.4	(revision 280971)
@@ -1,289 +1,301 @@
 .\" Copyright (c) 1983, 1991, 1993
 .\"	The Regents of the University of California.  All rights reserved.
 .\"
 .\" Redistribution and use in source and binary forms, with or without
 .\" modification, are permitted provided that the following conditions
 .\" are met:
 .\" 1. Redistributions of source code must retain the above copyright
 .\"    notice, this list of conditions and the following disclaimer.
 .\" 2. Redistributions in binary form must reproduce the above copyright
 .\"    notice, this list of conditions and the following disclaimer in the
 .\"    documentation and/or other materials provided with the distribution.
 .\" 3. Neither the name of the University nor the names of its contributors
 .\"    may be used to endorse or promote products derived from this software
 .\"    without specific prior written permission.
 .\"
 .\" THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
 .\" ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 .\" IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
 .\" ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
 .\" FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
 .\" DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
 .\" OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
 .\" HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
 .\" LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
 .\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
 .\" SUCH DAMAGE.
 .\"
 .\"     From: @(#)inet.4	8.1 (Berkeley) 6/5/93
 .\" $FreeBSD$
 .\"
-.Dd January 26, 2012
+.Dd April 2, 2015
 .Dt INET 4
 .Os
 .Sh NAME
 .Nm inet
 .Nd Internet protocol family
 .Sh SYNOPSIS
 .In sys/types.h
 .In netinet/in.h
 .Sh DESCRIPTION
 The Internet protocol family is a collection of protocols
 layered atop the
 .Em Internet Protocol
 .Pq Tn IP
 transport layer, and utilizing the Internet address format.
 The Internet family provides protocol support for the
 .Dv SOCK_STREAM , SOCK_DGRAM ,
 and
 .Dv SOCK_RAW
 socket types; the
 .Dv SOCK_RAW
 interface provides access to the
 .Tn IP
 protocol.
 .Sh ADDRESSING
 Internet addresses are four byte quantities, stored in
 network standard format (on little endian machines, such as the
 .Tn alpha ,
 .Tn amd64
 and
 .Tn i386
 these are word and byte reversed).
 The include file
 .In netinet/in.h
 defines this address
 as a discriminated union.
 .Pp
 Sockets bound to the Internet protocol family utilize
 the following addressing structure,
 .Bd -literal -offset indent
 struct sockaddr_in {
 	uint8_t		sin_len;
 	sa_family_t	sin_family;
 	in_port_t	sin_port;
 	struct in_addr	sin_addr;
 	char		sin_zero[8];
 };
 .Ed
 .Pp
 Sockets may be created with the local address
 .Dv INADDR_ANY
 to affect
 .Dq wildcard
 matching on incoming messages.
 The address in a
 .Xr connect 2
 or
 .Xr sendto 2
 call may be given as
 .Dv INADDR_ANY
 to mean
 .Dq this host .
 The distinguished address
 .Dv INADDR_BROADCAST
 is allowed as a shorthand for the broadcast address on the primary
 network if the first network configured supports broadcast.
 .Sh PROTOCOLS
 The Internet protocol family is comprised of
 the
 .Tn IP
 network protocol, Internet Control
 Message Protocol
 .Pq Tn ICMP ,
 Internet Group Management Protocol
 .Pq Tn IGMP ,
 Transmission Control
 Protocol
 .Pq Tn TCP ,
 and User Datagram Protocol
 .Pq Tn UDP .
 .Tn TCP
 is used to support the
 .Dv SOCK_STREAM
 abstraction while
 .Tn UDP
 is used to support the
 .Dv SOCK_DGRAM
 abstraction.
 A raw interface to
 .Tn IP
 is available
 by creating an Internet socket of type
 .Dv SOCK_RAW .
 The
 .Tn ICMP
 message protocol is accessible from a raw socket.
 .Pp
 The
 .Nm
 address on an interface consist of the address itself, the
 netmask, either broadcast address in case of a broadcast
 interface or peers address in case of point-to-point interface.
 The following
 .Xr ioctl 2
 commands are provided for a datagram socket in the Internet domain:
 .Pp
 .Bl -tag -width ".Dv SIOCGIFBRDADDR" -offset indent -compact   
 .It Dv SIOCAIFADDR
 Add address to an interface.
 The command requires
 .Ft struct in_aliasreq
 as argument.
 .It Dv SIOCDIFADDR
 Delete address from an interface.
 The command requires
 .Ft struct ifreq
 as argument.
 .It Dv SIOCGIFADDR
 .It Dv SIOCGIFBRDADDR
 .It Dv SIOCGIFDSTADDR
 .It Dv SIOCGIFNETMASK
 Return address information from interface. The returned value
 is in
 .Ft struct ifreq .
 This way of address information retrieval is obsoleted, a
 preferred way is to use
 .Xr getifaddrs 3
 API.
 .El
 .Ss MIB Variables
 A number of variables are implemented in the net.inet branch of the
 .Xr sysctl 3
 MIB.
 In addition to the variables supported by the transport protocols
 (for which the respective manual pages may be consulted),
 the following general variables are defined:
 .Bl -tag -width IPCTL_FASTFORWARDING
 .It Dv IPCTL_FORWARDING
 .Pq ip.forwarding
 Boolean: enable/disable forwarding of IP packets.
 Defaults to off.
 .It Dv IPCTL_FASTFORWARDING
 .Pq ip.fastforwarding
 Boolean: enable/disable the use of
 .Tn fast IP forwarding
 code.
 Defaults to off.
 When
 .Tn fast IP forwarding
 is enabled, IP packets are forwarded directly to the appropriate network
 interface with direct processing to completion, which greatly improves
 the throughput.
 All packets for local IP addresses, non-unicast, or with IP options are
 handled by the normal IP input processing path.
 All features of the normal (slow) IP forwarding path are supported
 including firewall (through
 .Xr pfil 9
 hooks) checking, except
 .Xr ipsec 4
 tunnel brokering.
 The
 .Tn IP fastforwarding
 path does not generate ICMP redirect or source quench messages.
 .It Dv IPCTL_SENDREDIRECTS
 .Pq ip.redirect
 Boolean: enable/disable sending of ICMP redirects in response to
 .Tn IP
 packets for which a better, and for the sender directly reachable, route
 and next hop is known.
 Defaults to on.
 .It Dv IPCTL_DEFTTL
 .Pq ip.ttl
 Integer: default time-to-live
 .Pq Dq TTL
 to use for outgoing
 .Tn IP
 packets.
 .It Dv IPCTL_ACCEPTSOURCEROUTE
 .Pq ip.accept_sourceroute
 Boolean: enable/disable accepting of source-routed IP packets (default false).
 .It Dv IPCTL_SOURCEROUTE
 .Pq ip.sourceroute
 Boolean: enable/disable forwarding of source-routed IP packets (default false).
 .It Va ip.process_options
 Integer: control IP options processing.
 By setting this variable to 0, all IP options in the incoming packets
 will be ignored, and the packets will be passed unmodified.
 By setting to 1, IP options in the incoming packets will be processed
 accordingly.
 By setting to 2, an
 .Tn ICMP
 .Dq "prohibited by filter"
 message will be sent back in response to incoming packets with IP options.
 Default is 1.
 This
 .Xr sysctl 8
 variable affects packets destined for a local host as well as packets
 forwarded to some other host.
+.It Va ip.rfc6864
+Boolean: control IP IDs generation behaviour.
+True value enables RFC6864 support, which specifies that IP ID field of
+.Em atomic
+datagrams can be set to any value.
+The
+.Fx implementation sets it to zero.
+Enabled by default.
 .It Va ip.random_id
 Boolean: control IP IDs generation behaviour.
 Setting this
 .Xr sysctl 8
-to non-zero causes the ID field in IP packets to be randomized instead of
-incremented by 1 with each packet generated.
+to 1 causes the ID field in
+.Em non-atomic
+IP datagrams (or all IP datagrams, if
+.Va ip.rfc6864
+is disabled) to be randomized instead of incremented by 1 with each packet
+generated.
 This closes a minor information leak which allows remote observers to
 determine the rate of packet generation on the machine by watching the
 counter.
 In the same time, on high-speed links, it can decrease the ID reuse
 cycle greatly.
 Default is 0 (sequential IP IDs).
 IPv6 flow IDs and fragment IDs are always random.
 .It Va ip.maxfragpackets
 Integer: maximum number of fragmented packets the host will accept and hold
 in the reassembling queue simultaneously.
 0 means that the host will not accept any fragmented packets.
 \-1 means that the host will accept as many fragmented packets as it receives.
 .It Va ip.maxfragsperpacket
 Integer: maximum number of fragments the host will accept and hold
 in the reassembling queue for a packet.
 0 means that the host will not accept any fragmented packets.
 .El
 .Sh SEE ALSO
 .Xr ioctl 2 ,
 .Xr socket 2 ,
 .Xr getifaddrs 3 ,
 .Xr sysctl 3 ,
 .Xr icmp 4 ,
 .Xr intro 4 ,
 .Xr ip 4 ,
 .Xr ipfirewall 4 ,
 .Xr route 4 ,
 .Xr tcp 4 ,
 .Xr udp 4 ,
 .Xr pfil 9
 .Rs
 .%T "An Introductory 4.3 BSD Interprocess Communication Tutorial"
 .%B PS1
 .%N 7
 .Re
 .Rs
 .%T "An Advanced 4.3 BSD Interprocess Communication Tutorial"
 .%B PS1
 .%N 8
 .Re
 .Sh HISTORY
 The
 .Nm
 protocol interface appeared in
 .Bx 4.2 .
 The
 .Dq protocol cloning
 code appeared in
 .Fx 2.1 .
 .Sh CAVEATS
 The Internet protocol support is subject to change as
 the Internet protocols develop.
 Users should not depend
 on details of the current implementation, but rather
 the services exported.
Index: head/sys/contrib/ipfilter/netinet/fil.c
===================================================================
--- head/sys/contrib/ipfilter/netinet/fil.c	(revision 280970)
+++ head/sys/contrib/ipfilter/netinet/fil.c	(revision 280971)
@@ -1,10250 +1,10251 @@
 /*	$FreeBSD$	*/
 
 /*
  * Copyright (C) 2012 by Darren Reed.
  *
  * See the IPFILTER.LICENCE file for details on licencing.
  *
  * Copyright 2008 Sun Microsystems.
  *
  * $Id$
  *
  */
 #if defined(KERNEL) || defined(_KERNEL)
 # undef KERNEL
 # undef _KERNEL
 # define        KERNEL	1
 # define        _KERNEL	1
 #endif
 #include <sys/errno.h>
 #include <sys/types.h>
 #include <sys/param.h>
 #include <sys/time.h>
 #if defined(_KERNEL) && defined(__FreeBSD_version) && \
     (__FreeBSD_version >= 220000)
 # if (__FreeBSD_version >= 400000)
 #  if !defined(IPFILTER_LKM)
 #   include "opt_inet6.h"
 #  endif
 #  if (__FreeBSD_version == 400019)
 #   define CSUM_DELAY_DATA
 #  endif
 # endif
 # include <sys/filio.h>
 #else
 # include <sys/ioctl.h>
 #endif
 #if (defined(__SVR4) || defined(__svr4__)) && defined(sun)
 # include <sys/filio.h>
 #endif
 #if !defined(_AIX51)
 # include <sys/fcntl.h>
 #endif
 #if defined(_KERNEL)
 # include <sys/systm.h>
 # include <sys/file.h>
 #else
 # include <stdio.h>
 # include <string.h>
 # include <stdlib.h>
 # include <stddef.h>
 # include <sys/file.h>
 # define _KERNEL
 # ifdef __OpenBSD__
 struct file;
 # endif
 # include <sys/uio.h>
 # undef _KERNEL
 #endif
 #if !defined(__SVR4) && !defined(__svr4__) && !defined(__hpux) && \
     !defined(linux)
 # include <sys/mbuf.h>
 #else
 # if !defined(linux)
 #  include <sys/byteorder.h>
 # endif
 # if (SOLARIS2 < 5) && defined(sun)
 #  include <sys/dditypes.h>
 # endif
 #endif
 #ifdef __hpux
 # define _NET_ROUTE_INCLUDED
 #endif
 #if !defined(linux)
 # include <sys/protosw.h>
 #endif
 #include <sys/socket.h>
 #include <net/if.h>
 #ifdef sun
 # include <net/af.h>
 #endif
 #include <netinet/in.h>
 #include <netinet/in_systm.h>
 #include <netinet/ip.h>
 #if defined(__sgi) && defined(IFF_DRVRLOCK) /* IRIX 6 */
 # include <sys/hashing.h>
 # include <netinet/in_var.h>
 #endif
 #include <netinet/tcp.h>
 #if (!defined(__sgi) && !defined(AIX)) || defined(_KERNEL)
 # include <netinet/udp.h>
 # include <netinet/ip_icmp.h>
 #endif
 #ifdef __hpux
 # undef _NET_ROUTE_INCLUDED
 #endif
 #ifdef __osf__
 # undef _RADIX_H_
 #endif
 #include "netinet/ip_compat.h"
 #ifdef	USE_INET6
 # include <netinet/icmp6.h>
 # if !SOLARIS && defined(_KERNEL) && !defined(__osf__) && !defined(__hpux)
 #  include <netinet6/in6_var.h>
 # endif
 #endif
 #include "netinet/ip_fil.h"
 #include "netinet/ip_nat.h"
 #include "netinet/ip_frag.h"
 #include "netinet/ip_state.h"
 #include "netinet/ip_proxy.h"
 #include "netinet/ip_auth.h"
 #ifdef IPFILTER_SCAN
 # include "netinet/ip_scan.h"
 #endif
 #include "netinet/ip_sync.h"
 #include "netinet/ip_lookup.h"
 #include "netinet/ip_pool.h"
 #include "netinet/ip_htable.h"
 #ifdef IPFILTER_COMPILED
 # include "netinet/ip_rules.h"
 #endif
 #if defined(IPFILTER_BPF) && defined(_KERNEL)
 # include <net/bpf.h>
 #endif
 #if defined(__FreeBSD_version) && (__FreeBSD_version >= 300000)
 # include <sys/malloc.h>
 #endif
 #include "netinet/ipl.h"
 
 #if defined(__NetBSD__) && (__NetBSD_Version__ >= 104230000)
 # include <sys/callout.h>
 extern struct callout ipf_slowtimer_ch;
 #endif
 #if defined(__OpenBSD__)
 # include <sys/timeout.h>
 extern struct timeout ipf_slowtimer_ch;
 #endif
 /* END OF INCLUDES */
 
 #if !defined(lint)
 static const char sccsid[] = "@(#)fil.c	1.36 6/5/96 (C) 1993-2000 Darren Reed";
 static const char rcsid[] = "@(#)$FreeBSD$";
 /* static const char rcsid[] = "@(#)$Id: fil.c,v 2.243.2.125 2007/10/10 09:27:20 darrenr Exp $"; */
 #endif
 
 #ifndef	_KERNEL
 # include "ipf.h"
 # include "ipt.h"
 extern	int	opts;
 extern	int	blockreason;
 #endif /* _KERNEL */
 
 #define	LBUMP(x)	softc->x++
 #define	LBUMPD(x, y)	do { softc->x.y++; DT(y); } while (0)
 
 static	INLINE int	ipf_check_ipf __P((fr_info_t *, frentry_t *, int));
 static	u_32_t		ipf_checkcipso __P((fr_info_t *, u_char *, int));
 static	u_32_t		ipf_checkripso __P((u_char *));
 static	u_32_t		ipf_decaps __P((fr_info_t *, u_32_t, int));
 #ifdef IPFILTER_LOG
 static	frentry_t	*ipf_dolog __P((fr_info_t *, u_32_t *));
 #endif
 static	int		ipf_flushlist __P((ipf_main_softc_t *, int *,
 					   frentry_t **));
 static	int		ipf_flush_groups __P((ipf_main_softc_t *, frgroup_t **,
 					      int));
 static	ipfunc_t	ipf_findfunc __P((ipfunc_t));
 static	void		*ipf_findlookup __P((ipf_main_softc_t *, int,
 					     frentry_t *,
 					     i6addr_t *, i6addr_t *));
 static	frentry_t	*ipf_firewall __P((fr_info_t *, u_32_t *));
 static	int		ipf_fr_matcharray __P((fr_info_t *, int *));
 static	int		ipf_frruleiter __P((ipf_main_softc_t *, void *, int,
 					    void *));
 static	void		ipf_funcfini __P((ipf_main_softc_t *, frentry_t *));
 static	int		ipf_funcinit __P((ipf_main_softc_t *, frentry_t *));
 static	int		ipf_geniter __P((ipf_main_softc_t *, ipftoken_t *,
 					 ipfgeniter_t *));
 static	void		ipf_getstat __P((ipf_main_softc_t *,
 					 struct friostat *, int));
 static	int		ipf_group_flush __P((ipf_main_softc_t *, frgroup_t *));
 static	void		ipf_group_free __P((frgroup_t *));
 static	int		ipf_grpmapfini __P((struct ipf_main_softc_s *,
 					    frentry_t *));
 static	int		ipf_grpmapinit __P((struct ipf_main_softc_s *,
 					    frentry_t *));
 static	frentry_t	*ipf_nextrule __P((ipf_main_softc_t *, int, int,
 					   frentry_t *, int));
 static	int		ipf_portcheck __P((frpcmp_t *, u_32_t));
 static	INLINE int	ipf_pr_ah __P((fr_info_t *));
 static	INLINE void	ipf_pr_esp __P((fr_info_t *));
 static	INLINE void	ipf_pr_gre __P((fr_info_t *));
 static	INLINE void	ipf_pr_udp __P((fr_info_t *));
 static	INLINE void	ipf_pr_tcp __P((fr_info_t *));
 static	INLINE void	ipf_pr_icmp __P((fr_info_t *));
 static	INLINE void	ipf_pr_ipv4hdr __P((fr_info_t *));
 static	INLINE void	ipf_pr_short __P((fr_info_t *, int));
 static	INLINE int	ipf_pr_tcpcommon __P((fr_info_t *));
 static	INLINE int	ipf_pr_udpcommon __P((fr_info_t *));
 static	void		ipf_rule_delete __P((ipf_main_softc_t *, frentry_t *f,
 					     int, int));
 static	void		ipf_rule_expire_insert __P((ipf_main_softc_t *,
 						    frentry_t *, int));
 static	int		ipf_synclist __P((ipf_main_softc_t *, frentry_t *,
 					  void *));
 static	void		ipf_token_flush __P((ipf_main_softc_t *));
 static	void		ipf_token_unlink __P((ipf_main_softc_t *,
 					      ipftoken_t *));
 static	ipftuneable_t	*ipf_tune_findbyname __P((ipftuneable_t *,
 						  const char *));
 static	ipftuneable_t	*ipf_tune_findbycookie __P((ipftuneable_t **, void *,
 						    void **));
 static	int		ipf_updateipid __P((fr_info_t *));
 static	int		ipf_settimeout __P((struct ipf_main_softc_s *,
 					    struct ipftuneable *,
 					    ipftuneval_t *));
 #if !defined(_KERNEL) || (!defined(__NetBSD__) && !defined(__OpenBSD__) && \
      !defined(__FreeBSD__)) || \
     FREEBSD_LT_REV(501000) || NETBSD_LT_REV(105000000) || \
     OPENBSD_LT_REV(200006)
 static	int		ppsratecheck(struct timeval *, int *, int);
 #endif
 
 
 /*
  * bit values for identifying presence of individual IP options
  * All of these tables should be ordered by increasing key value on the left
  * hand side to allow for binary searching of the array and include a trailer
  * with a 0 for the bitmask for linear searches to easily find the end with.
  */
 static const	struct	optlist	ipopts[20] = {
 	{ IPOPT_NOP,	0x000001 },
 	{ IPOPT_RR,	0x000002 },
 	{ IPOPT_ZSU,	0x000004 },
 	{ IPOPT_MTUP,	0x000008 },
 	{ IPOPT_MTUR,	0x000010 },
 	{ IPOPT_ENCODE,	0x000020 },
 	{ IPOPT_TS,	0x000040 },
 	{ IPOPT_TR,	0x000080 },
 	{ IPOPT_SECURITY, 0x000100 },
 	{ IPOPT_LSRR,	0x000200 },
 	{ IPOPT_E_SEC,	0x000400 },
 	{ IPOPT_CIPSO,	0x000800 },
 	{ IPOPT_SATID,	0x001000 },
 	{ IPOPT_SSRR,	0x002000 },
 	{ IPOPT_ADDEXT,	0x004000 },
 	{ IPOPT_VISA,	0x008000 },
 	{ IPOPT_IMITD,	0x010000 },
 	{ IPOPT_EIP,	0x020000 },
 	{ IPOPT_FINN,	0x040000 },
 	{ 0,		0x000000 }
 };
 
 #ifdef USE_INET6
 static const struct optlist ip6exthdr[] = {
 	{ IPPROTO_HOPOPTS,		0x000001 },
 	{ IPPROTO_IPV6,			0x000002 },
 	{ IPPROTO_ROUTING,		0x000004 },
 	{ IPPROTO_FRAGMENT,		0x000008 },
 	{ IPPROTO_ESP,			0x000010 },
 	{ IPPROTO_AH,			0x000020 },
 	{ IPPROTO_NONE,			0x000040 },
 	{ IPPROTO_DSTOPTS,		0x000080 },
 	{ IPPROTO_MOBILITY,		0x000100 },
 	{ 0,				0 }
 };
 #endif
 
 /*
  * bit values for identifying presence of individual IP security options
  */
 static const	struct	optlist	secopt[8] = {
 	{ IPSO_CLASS_RES4,	0x01 },
 	{ IPSO_CLASS_TOPS,	0x02 },
 	{ IPSO_CLASS_SECR,	0x04 },
 	{ IPSO_CLASS_RES3,	0x08 },
 	{ IPSO_CLASS_CONF,	0x10 },
 	{ IPSO_CLASS_UNCL,	0x20 },
 	{ IPSO_CLASS_RES2,	0x40 },
 	{ IPSO_CLASS_RES1,	0x80 }
 };
 
 char	ipfilter_version[] = IPL_VERSION;
 
 int	ipf_features = 0
 #ifdef	IPFILTER_LKM
 		| IPF_FEAT_LKM
 #endif
 #ifdef	IPFILTER_LOG
 		| IPF_FEAT_LOG
 #endif
 		| IPF_FEAT_LOOKUP
 #ifdef	IPFILTER_BPF
 		| IPF_FEAT_BPF
 #endif
 #ifdef	IPFILTER_COMPILED
 		| IPF_FEAT_COMPILED
 #endif
 #ifdef	IPFILTER_CKSUM
 		| IPF_FEAT_CKSUM
 #endif
 		| IPF_FEAT_SYNC
 #ifdef	IPFILTER_SCAN
 		| IPF_FEAT_SCAN
 #endif
 #ifdef	USE_INET6
 		| IPF_FEAT_IPV6
 #endif
 	;
 
 
 /*
  * Table of functions available for use with call rules.
  */
 static ipfunc_resolve_t ipf_availfuncs[] = {
 	{ "srcgrpmap", ipf_srcgrpmap, ipf_grpmapinit, ipf_grpmapfini },
 	{ "dstgrpmap", ipf_dstgrpmap, ipf_grpmapinit, ipf_grpmapfini },
 	{ "",	      NULL,	      NULL,	      NULL }
 };
 
 static ipftuneable_t ipf_main_tuneables[] = {
 	{ { (void *)offsetof(struct ipf_main_softc_s, ipf_flags) },
 		"ipf_flags",		0,	0xffffffff,
 		stsizeof(ipf_main_softc_t, ipf_flags),
 		0,			NULL,	NULL },
 	{ { (void *)offsetof(struct ipf_main_softc_s, ipf_active) },
 		"active",		0,	0,
 		stsizeof(ipf_main_softc_t, ipf_active),
 		IPFT_RDONLY,		NULL,	NULL },
 	{ { (void *)offsetof(ipf_main_softc_t, ipf_control_forwarding) },
 		"control_forwarding",	0, 1,
 		stsizeof(ipf_main_softc_t, ipf_control_forwarding),
 		0,			NULL,	NULL },
 	{ { (void *)offsetof(ipf_main_softc_t, ipf_update_ipid) },
 		"update_ipid",		0,	1,
 		stsizeof(ipf_main_softc_t, ipf_update_ipid),
 		0,			NULL,	NULL },
 	{ { (void *)offsetof(ipf_main_softc_t, ipf_chksrc) },
 		"chksrc",		0,	1,
 		stsizeof(ipf_main_softc_t, ipf_chksrc),
 		0,			NULL,	NULL },
 	{ { (void *)offsetof(ipf_main_softc_t, ipf_minttl) },
 		"min_ttl",		0,	1,
 		stsizeof(ipf_main_softc_t, ipf_minttl),
 		0,			NULL,	NULL },
 	{ { (void *)offsetof(ipf_main_softc_t, ipf_icmpminfragmtu) },
 		"icmp_minfragmtu",	0,	1,
 		stsizeof(ipf_main_softc_t, ipf_icmpminfragmtu),
 		0,			NULL,	NULL },
 	{ { (void *)offsetof(ipf_main_softc_t, ipf_pass) },
 		"default_pass",		0,	0xffffffff,
 		stsizeof(ipf_main_softc_t, ipf_pass),
 		0,			NULL,	NULL },
 	{ { (void *)offsetof(ipf_main_softc_t, ipf_tcpidletimeout) },
 		"tcp_idle_timeout",	1,	0x7fffffff,
 		stsizeof(ipf_main_softc_t, ipf_tcpidletimeout),
 		0,			NULL,	ipf_settimeout },
 	{ { (void *)offsetof(ipf_main_softc_t, ipf_tcpclosewait) },
 		"tcp_close_wait",	1,	0x7fffffff,
 		stsizeof(ipf_main_softc_t, ipf_tcpclosewait),
 		0,			NULL,	ipf_settimeout },
 	{ { (void *)offsetof(ipf_main_softc_t, ipf_tcplastack) },
 		"tcp_last_ack",		1,	0x7fffffff,
 		stsizeof(ipf_main_softc_t, ipf_tcplastack),
 		0,			NULL,	ipf_settimeout },
 	{ { (void *)offsetof(ipf_main_softc_t, ipf_tcptimeout) },
 		"tcp_timeout",		1,	0x7fffffff,
 		stsizeof(ipf_main_softc_t, ipf_tcptimeout),
 		0,			NULL,	ipf_settimeout },
 	{ { (void *)offsetof(ipf_main_softc_t, ipf_tcpsynsent) },
 		"tcp_syn_sent",		1,	0x7fffffff,
 		stsizeof(ipf_main_softc_t, ipf_tcpsynsent),
 		0,			NULL,	ipf_settimeout },
 	{ { (void *)offsetof(ipf_main_softc_t, ipf_tcpsynrecv) },
 		"tcp_syn_received",	1,	0x7fffffff,
 		stsizeof(ipf_main_softc_t, ipf_tcpsynrecv),
 		0,			NULL,	ipf_settimeout },
 	{ { (void *)offsetof(ipf_main_softc_t, ipf_tcpclosed) },
 		"tcp_closed",		1,	0x7fffffff,
 		stsizeof(ipf_main_softc_t, ipf_tcpclosed),
 		0,			NULL,	ipf_settimeout },
 	{ { (void *)offsetof(ipf_main_softc_t, ipf_tcphalfclosed) },
 		"tcp_half_closed",	1,	0x7fffffff,
 		stsizeof(ipf_main_softc_t, ipf_tcphalfclosed),
 		0,			NULL,	ipf_settimeout },
 	{ { (void *)offsetof(ipf_main_softc_t, ipf_tcptimewait) },
 		"tcp_time_wait",	1,	0x7fffffff,
 		stsizeof(ipf_main_softc_t, ipf_tcptimewait),
 		0,			NULL,	ipf_settimeout },
 	{ { (void *)offsetof(ipf_main_softc_t, ipf_udptimeout) },
 		"udp_timeout",		1,	0x7fffffff,
 		stsizeof(ipf_main_softc_t, ipf_udptimeout),
 		0,			NULL,	ipf_settimeout },
 	{ { (void *)offsetof(ipf_main_softc_t, ipf_udpacktimeout) },
 		"udp_ack_timeout",	1,	0x7fffffff,
 		stsizeof(ipf_main_softc_t, ipf_udpacktimeout),
 		0,			NULL,	ipf_settimeout },
 	{ { (void *)offsetof(ipf_main_softc_t, ipf_icmptimeout) },
 		"icmp_timeout",		1,	0x7fffffff,
 		stsizeof(ipf_main_softc_t, ipf_icmptimeout),
 		0,			NULL,	ipf_settimeout },
 	{ { (void *)offsetof(ipf_main_softc_t, ipf_icmpacktimeout) },
 		"icmp_ack_timeout",	1,	0x7fffffff,
 		stsizeof(ipf_main_softc_t, ipf_icmpacktimeout),
 		0,			NULL,	ipf_settimeout },
 	{ { (void *)offsetof(ipf_main_softc_t, ipf_iptimeout) },
 		"ip_timeout",		1,	0x7fffffff,
 		stsizeof(ipf_main_softc_t, ipf_iptimeout),
 		0,			NULL,	ipf_settimeout },
 #if defined(INSTANCES) && defined(_KERNEL)
 	{ { (void *)offsetof(ipf_main_softc_t, ipf_get_loopback) },
 		"intercept_loopback",	0,	1,
 		stsizeof(ipf_main_softc_t, ipf_get_loopback),
 		0,			NULL,	ipf_set_loopback },
 #endif
 	{ { 0 },
 		NULL,			0,	0,
 		0,
 		0,			NULL,	NULL }
 };
 
 
 /*
  * The next section of code is a a collection of small routines that set
  * fields in the fr_info_t structure passed based on properties of the
  * current packet.  There are different routines for the same protocol
  * for each of IPv4 and IPv6.  Adding a new protocol, for which there
  * will "special" inspection for setup, is now more easily done by adding
  * a new routine and expanding the ipf_pr_ipinit*() function rather than by
  * adding more code to a growing switch statement.
  */
 #ifdef USE_INET6
 static	INLINE int	ipf_pr_ah6 __P((fr_info_t *));
 static	INLINE void	ipf_pr_esp6 __P((fr_info_t *));
 static	INLINE void	ipf_pr_gre6 __P((fr_info_t *));
 static	INLINE void	ipf_pr_udp6 __P((fr_info_t *));
 static	INLINE void	ipf_pr_tcp6 __P((fr_info_t *));
 static	INLINE void	ipf_pr_icmp6 __P((fr_info_t *));
 static	INLINE void	ipf_pr_ipv6hdr __P((fr_info_t *));
 static	INLINE void	ipf_pr_short6 __P((fr_info_t *, int));
 static	INLINE int	ipf_pr_hopopts6 __P((fr_info_t *));
 static	INLINE int	ipf_pr_mobility6 __P((fr_info_t *));
 static	INLINE int	ipf_pr_routing6 __P((fr_info_t *));
 static	INLINE int	ipf_pr_dstopts6 __P((fr_info_t *));
 static	INLINE int	ipf_pr_fragment6 __P((fr_info_t *));
 static	INLINE struct ip6_ext *ipf_pr_ipv6exthdr __P((fr_info_t *, int, int));
 
 
 /* ------------------------------------------------------------------------ */
 /* Function:    ipf_pr_short6                                               */
 /* Returns:     void                                                        */
 /* Parameters:  fin(I)  - pointer to packet information                     */
 /*              xmin(I) - minimum header size                               */
 /*                                                                          */
 /* IPv6 Only                                                                */
 /* This is function enforces the 'is a packet too short to be legit' rule   */
 /* for IPv6 and marks the packet with FI_SHORT if so.  See function comment */
 /* for ipf_pr_short() for more details.                                     */
 /* ------------------------------------------------------------------------ */
 static INLINE void
 ipf_pr_short6(fin, xmin)
 	fr_info_t *fin;
 	int xmin;
 {
 
 	if (fin->fin_dlen < xmin)
 		fin->fin_flx |= FI_SHORT;
 }
 
 
 /* ------------------------------------------------------------------------ */
 /* Function:    ipf_pr_ipv6hdr                                              */
 /* Returns:     void                                                        */
 /* Parameters:  fin(I) - pointer to packet information                      */
 /*                                                                          */
 /* IPv6 Only                                                                */
 /* Copy values from the IPv6 header into the fr_info_t struct and call the  */
 /* per-protocol analyzer if it exists.  In validating the packet, a protocol*/
 /* analyzer may pullup or free the packet itself so we need to be vigiliant */
 /* of that possibility arising.                                             */
 /* ------------------------------------------------------------------------ */
 static INLINE void
 ipf_pr_ipv6hdr(fin)
 	fr_info_t *fin;
 {
 	ip6_t *ip6 = (ip6_t *)fin->fin_ip;
 	int p, go = 1, i, hdrcount;
 	fr_ip_t *fi = &fin->fin_fi;
 
 	fin->fin_off = 0;
 
 	fi->fi_tos = 0;
 	fi->fi_optmsk = 0;
 	fi->fi_secmsk = 0;
 	fi->fi_auth = 0;
 
 	p = ip6->ip6_nxt;
 	fin->fin_crc = p;
 	fi->fi_ttl = ip6->ip6_hlim;
 	fi->fi_src.in6 = ip6->ip6_src;
 	fin->fin_crc += fi->fi_src.i6[0];
 	fin->fin_crc += fi->fi_src.i6[1];
 	fin->fin_crc += fi->fi_src.i6[2];
 	fin->fin_crc += fi->fi_src.i6[3];
 	fi->fi_dst.in6 = ip6->ip6_dst;
 	fin->fin_crc += fi->fi_dst.i6[0];
 	fin->fin_crc += fi->fi_dst.i6[1];
 	fin->fin_crc += fi->fi_dst.i6[2];
 	fin->fin_crc += fi->fi_dst.i6[3];
 	fin->fin_id = 0;
 	if (IN6_IS_ADDR_MULTICAST(&fi->fi_dst.in6))
 		fin->fin_flx |= FI_MULTICAST|FI_MBCAST;
 
 	hdrcount = 0;
 	while (go && !(fin->fin_flx & FI_SHORT)) {
 		switch (p)
 		{
 		case IPPROTO_UDP :
 			ipf_pr_udp6(fin);
 			go = 0;
 			break;
 
 		case IPPROTO_TCP :
 			ipf_pr_tcp6(fin);
 			go = 0;
 			break;
 
 		case IPPROTO_ICMPV6 :
 			ipf_pr_icmp6(fin);
 			go = 0;
 			break;
 
 		case IPPROTO_GRE :
 			ipf_pr_gre6(fin);
 			go = 0;
 			break;
 
 		case IPPROTO_HOPOPTS :
 			p = ipf_pr_hopopts6(fin);
 			break;
 
 		case IPPROTO_MOBILITY :
 			p = ipf_pr_mobility6(fin);
 			break;
 
 		case IPPROTO_DSTOPTS :
 			p = ipf_pr_dstopts6(fin);
 			break;
 
 		case IPPROTO_ROUTING :
 			p = ipf_pr_routing6(fin);
 			break;
 
 		case IPPROTO_AH :
 			p = ipf_pr_ah6(fin);
 			break;
 
 		case IPPROTO_ESP :
 			ipf_pr_esp6(fin);
 			go = 0;
 			break;
 
 		case IPPROTO_IPV6 :
 			for (i = 0; ip6exthdr[i].ol_bit != 0; i++)
 				if (ip6exthdr[i].ol_val == p) {
 					fin->fin_flx |= ip6exthdr[i].ol_bit;
 					break;
 				}
 			go = 0;
 			break;
 
 		case IPPROTO_NONE :
 			go = 0;
 			break;
 
 		case IPPROTO_FRAGMENT :
 			p = ipf_pr_fragment6(fin);
 			/*
 			 * Given that the only fragments we want to let through
 			 * (where fin_off != 0) are those where the non-first
 			 * fragments only have data, we can safely stop looking
 			 * at headers if this is a non-leading fragment.
 			 */
 			if (fin->fin_off != 0)
 				go = 0;
 			break;
 
 		default :
 			go = 0;
 			break;
 		}
 		hdrcount++;
 
 		/*
 		 * It is important to note that at this point, for the
 		 * extension headers (go != 0), the entire header may not have
 		 * been pulled up when the code gets to this point.  This is
 		 * only done for "go != 0" because the other header handlers
 		 * will all pullup their complete header.  The other indicator
 		 * of an incomplete packet is that this was just an extension
 		 * header.
 		 */
 		if ((go != 0) && (p != IPPROTO_NONE) &&
 		    (ipf_pr_pullup(fin, 0) == -1)) {
 			p = IPPROTO_NONE;
 			break;
 		}
 	}
 
 	/*
 	 * Some of the above functions, like ipf_pr_esp6(), can call ipf_pullup
 	 * and destroy whatever packet was here.  The caller of this function
 	 * expects us to return if there is a problem with ipf_pullup.
 	 */
 	if (fin->fin_m == NULL) {
 		ipf_main_softc_t *softc = fin->fin_main_soft;
 
 		LBUMPD(ipf_stats[fin->fin_out], fr_v6_bad);
 		return;
 	}
 
 	fi->fi_p = p;
 
 	/*
 	 * IPv6 fragment case 1 - see comment for ipf_pr_fragment6().
 	 * "go != 0" imples the above loop hasn't arrived at a layer 4 header.
 	 */
 	if ((go != 0) && (fin->fin_flx & FI_FRAG) && (fin->fin_off == 0)) {
 		ipf_main_softc_t *softc = fin->fin_main_soft;
 
 		fin->fin_flx |= FI_BAD;
 		LBUMPD(ipf_stats[fin->fin_out], fr_v6_badfrag);
 		LBUMP(ipf_stats[fin->fin_out].fr_v6_bad);
 	}
 }
 
 
 /* ------------------------------------------------------------------------ */
 /* Function:    ipf_pr_ipv6exthdr                                           */
 /* Returns:     struct ip6_ext * - pointer to the start of the next header  */
 /*                                 or NULL if there is a prolblem.          */
 /* Parameters:  fin(I)      - pointer to packet information                 */
 /*              multiple(I) - flag indicating yes/no if multiple occurances */
 /*                            of this extension header are allowed.         */
 /*              proto(I)    - protocol number for this extension header     */
 /*                                                                          */
 /* IPv6 Only                                                                */
 /* This function embodies a number of common checks that all IPv6 extension */
 /* headers must be subjected to.  For example, making sure the packet is    */
 /* big enough for it to be in, checking if it is repeated and setting a     */
 /* flag to indicate its presence.                                           */
 /* ------------------------------------------------------------------------ */
 static INLINE struct ip6_ext *
 ipf_pr_ipv6exthdr(fin, multiple, proto)
 	fr_info_t *fin;
 	int multiple, proto;
 {
 	ipf_main_softc_t *softc = fin->fin_main_soft;
 	struct ip6_ext *hdr;
 	u_short shift;
 	int i;
 
 	fin->fin_flx |= FI_V6EXTHDR;
 
 				/* 8 is default length of extension hdr */
 	if ((fin->fin_dlen - 8) < 0) {
 		fin->fin_flx |= FI_SHORT;
 		LBUMPD(ipf_stats[fin->fin_out], fr_v6_ext_short);
 		return NULL;
 	}
 
 	if (ipf_pr_pullup(fin, 8) == -1) {
 		LBUMPD(ipf_stats[fin->fin_out], fr_v6_ext_pullup);
 		return NULL;
 	}
 
 	hdr = fin->fin_dp;
 	switch (proto)
 	{
 	case IPPROTO_FRAGMENT :
 		shift = 8;
 		break;
 	default :
 		shift = 8 + (hdr->ip6e_len << 3);
 		break;
 	}
 
 	if (shift > fin->fin_dlen) {	/* Nasty extension header length? */
 		fin->fin_flx |= FI_BAD;
 		LBUMPD(ipf_stats[fin->fin_out], fr_v6_ext_hlen);
 		return NULL;
 	}
 
 	fin->fin_dp = (char *)fin->fin_dp + shift;
 	fin->fin_dlen -= shift;
 
 	/*
 	 * If we have seen a fragment header, do not set any flags to indicate
 	 * the presence of this extension header as it has no impact on the
 	 * end result until after it has been defragmented.
 	 */
 	if (fin->fin_flx & FI_FRAG)
 		return hdr;
 
 	for (i = 0; ip6exthdr[i].ol_bit != 0; i++)
 		if (ip6exthdr[i].ol_val == proto) {
 			/*
 			 * Most IPv6 extension headers are only allowed once.
 			 */
 			if ((multiple == 0) &&
 			    ((fin->fin_optmsk & ip6exthdr[i].ol_bit) != 0))
 				fin->fin_flx |= FI_BAD;
 			else
 				fin->fin_optmsk |= ip6exthdr[i].ol_bit;
 			break;
 		}
 
 	return hdr;
 }
 
 
 /* ------------------------------------------------------------------------ */
 /* Function:    ipf_pr_hopopts6                                             */
 /* Returns:     int    - value of the next header or IPPROTO_NONE if error  */
 /* Parameters:  fin(I) - pointer to packet information                      */
 /*                                                                          */
 /* IPv6 Only                                                                */
 /* This is function checks pending hop by hop options extension header      */
 /* ------------------------------------------------------------------------ */
 static INLINE int
 ipf_pr_hopopts6(fin)
 	fr_info_t *fin;
 {
 	struct ip6_ext *hdr;
 
 	hdr = ipf_pr_ipv6exthdr(fin, 0, IPPROTO_HOPOPTS);
 	if (hdr == NULL)
 		return IPPROTO_NONE;
 	return hdr->ip6e_nxt;
 }
 
 
 /* ------------------------------------------------------------------------ */
 /* Function:    ipf_pr_mobility6                                            */
 /* Returns:     int    - value of the next header or IPPROTO_NONE if error  */
 /* Parameters:  fin(I) - pointer to packet information                      */
 /*                                                                          */
 /* IPv6 Only                                                                */
 /* This is function checks the IPv6 mobility extension header               */
 /* ------------------------------------------------------------------------ */
 static INLINE int
 ipf_pr_mobility6(fin)
 	fr_info_t *fin;
 {
 	struct ip6_ext *hdr;
 
 	hdr = ipf_pr_ipv6exthdr(fin, 0, IPPROTO_MOBILITY);
 	if (hdr == NULL)
 		return IPPROTO_NONE;
 	return hdr->ip6e_nxt;
 }
 
 
 /* ------------------------------------------------------------------------ */
 /* Function:    ipf_pr_routing6                                             */
 /* Returns:     int    - value of the next header or IPPROTO_NONE if error  */
 /* Parameters:  fin(I) - pointer to packet information                      */
 /*                                                                          */
 /* IPv6 Only                                                                */
 /* This is function checks pending routing extension header                 */
 /* ------------------------------------------------------------------------ */
 static INLINE int
 ipf_pr_routing6(fin)
 	fr_info_t *fin;
 {
 	struct ip6_routing *hdr;
 
 	hdr = (struct ip6_routing *)ipf_pr_ipv6exthdr(fin, 0, IPPROTO_ROUTING);
 	if (hdr == NULL)
 		return IPPROTO_NONE;
 
 	switch (hdr->ip6r_type)
 	{
 	case 0 :
 		/*
 		 * Nasty extension header length?
 		 */
 		if (((hdr->ip6r_len >> 1) < hdr->ip6r_segleft) ||
 		    (hdr->ip6r_segleft && (hdr->ip6r_len & 1))) {
 			ipf_main_softc_t *softc = fin->fin_main_soft;
 
 			fin->fin_flx |= FI_BAD;
 			LBUMPD(ipf_stats[fin->fin_out], fr_v6_rh_bad);
 			return IPPROTO_NONE;
 		}
 		break;
 
 	default :
 		break;
 	}
 
 	return hdr->ip6r_nxt;
 }
 
 
 /* ------------------------------------------------------------------------ */
 /* Function:    ipf_pr_fragment6                                            */
 /* Returns:     int    - value of the next header or IPPROTO_NONE if error  */
 /* Parameters:  fin(I) - pointer to packet information                      */
 /*                                                                          */
 /* IPv6 Only                                                                */
 /* Examine the IPv6 fragment header and extract fragment offset information.*/
 /*                                                                          */
 /* Fragments in IPv6 are extraordinarily difficult to deal with - much more */
 /* so than in IPv4.  There are 5 cases of fragments with IPv6 that all      */
 /* packets with a fragment header can fit into.  They are as follows:       */
 /*                                                                          */
 /* 1.  [IPv6][0-n EH][FH][0-n EH] (no L4HDR present)                        */
 /* 2.  [IPV6][0-n EH][FH][0-n EH][L4HDR part] (short)                       */
 /* 3.  [IPV6][0-n EH][FH][L4HDR part][0-n data] (short)                     */
 /* 4.  [IPV6][0-n EH][FH][0-n EH][L4HDR][0-n data]                          */
 /* 5.  [IPV6][0-n EH][FH][data]                                             */
 /*                                                                          */
 /* IPV6 = IPv6 header, FH = Fragment Header,                                */
 /* 0-n EH = 0 or more extension headers, 0-n data = 0 or more bytes of data */
 /*                                                                          */
 /* Packets that match 1, 2, 3 will be dropped as the only reasonable        */
 /* scenario in which they happen is in extreme circumstances that are most  */
 /* likely to be an indication of an attack rather than normal traffic.      */
 /* A type 3 packet may be sent by an attacked after a type 4 packet.  There */
 /* are two rules that can be used to guard against type 3 packets: L4       */
 /* headers must always be in a packet that has the offset field set to 0    */
 /* and no packet is allowed to overlay that where offset = 0.               */
 /* ------------------------------------------------------------------------ */
 static INLINE int
 ipf_pr_fragment6(fin)
 	fr_info_t *fin;
 {
 	ipf_main_softc_t *softc = fin->fin_main_soft;
 	struct ip6_frag *frag;
 
 	fin->fin_flx |= FI_FRAG;
 
 	frag = (struct ip6_frag *)ipf_pr_ipv6exthdr(fin, 0, IPPROTO_FRAGMENT);
 	if (frag == NULL) {
 		LBUMPD(ipf_stats[fin->fin_out], fr_v6_frag_bad);
 		return IPPROTO_NONE;
 	}
 
 	if ((frag->ip6f_offlg & IP6F_MORE_FRAG) != 0) {
 		/*
 		 * Any fragment that isn't the last fragment must have its
 		 * length as a multiple of 8.
 		 */
 		if ((fin->fin_plen & 7) != 0)
 			fin->fin_flx |= FI_BAD;
 	}
 
 	fin->fin_fraghdr = frag;
 	fin->fin_id = frag->ip6f_ident;
 	fin->fin_off = ntohs(frag->ip6f_offlg & IP6F_OFF_MASK);
 	if (fin->fin_off != 0)
 		fin->fin_flx |= FI_FRAGBODY;
 
 	/*
 	 * Jumbograms aren't handled, so the max. length is 64k
 	 */
 	if ((fin->fin_off << 3) + fin->fin_dlen > 65535)
 		  fin->fin_flx |= FI_BAD;
 
 	/*
 	 * We don't know where the transport layer header (or whatever is next
 	 * is), as it could be behind destination options (amongst others) so
 	 * return the fragment header as the type of packet this is.  Note that
 	 * this effectively disables the fragment cache for > 1 protocol at a
 	 * time.
 	 */
 	return frag->ip6f_nxt;
 }
 
 
 /* ------------------------------------------------------------------------ */
 /* Function:    ipf_pr_dstopts6                                             */
 /* Returns:     int    - value of the next header or IPPROTO_NONE if error  */
 /* Parameters:  fin(I) - pointer to packet information                      */
 /*                                                                          */
 /* IPv6 Only                                                                */
 /* This is function checks pending destination options extension header     */
 /* ------------------------------------------------------------------------ */
 static INLINE int
 ipf_pr_dstopts6(fin)
 	fr_info_t *fin;
 {
 	ipf_main_softc_t *softc = fin->fin_main_soft;
 	struct ip6_ext *hdr;
 
 	hdr = ipf_pr_ipv6exthdr(fin, 0, IPPROTO_DSTOPTS);
 	if (hdr == NULL) {
 		LBUMPD(ipf_stats[fin->fin_out], fr_v6_dst_bad);
 		return IPPROTO_NONE;
 	}
 	return hdr->ip6e_nxt;
 }
 
 
 /* ------------------------------------------------------------------------ */
 /* Function:    ipf_pr_icmp6                                                */
 /* Returns:     void                                                        */
 /* Parameters:  fin(I) - pointer to packet information                      */
 /*                                                                          */
 /* IPv6 Only                                                                */
 /* This routine is mainly concerned with determining the minimum valid size */
 /* for an ICMPv6 packet.                                                    */
 /* ------------------------------------------------------------------------ */
 static INLINE void
 ipf_pr_icmp6(fin)
 	fr_info_t *fin;
 {
 	int minicmpsz = sizeof(struct icmp6_hdr);
 	struct icmp6_hdr *icmp6;
 
 	if (ipf_pr_pullup(fin, ICMP6ERR_MINPKTLEN - sizeof(ip6_t)) == -1) {
 		ipf_main_softc_t *softc = fin->fin_main_soft;
 
 		LBUMPD(ipf_stats[fin->fin_out], fr_v6_icmp6_pullup);
 		return;
 	}
 
 	if (fin->fin_dlen > 1) {
 		ip6_t *ip6;
 
 		icmp6 = fin->fin_dp;
 
 		fin->fin_data[0] = *(u_short *)icmp6;
 
 		if ((icmp6->icmp6_type & ICMP6_INFOMSG_MASK) != 0)
 			fin->fin_flx |= FI_ICMPQUERY;
 
 		switch (icmp6->icmp6_type)
 		{
 		case ICMP6_ECHO_REPLY :
 		case ICMP6_ECHO_REQUEST :
 			if (fin->fin_dlen >= 6)
 				fin->fin_data[1] = icmp6->icmp6_id;
 			minicmpsz = ICMP6ERR_MINPKTLEN - sizeof(ip6_t);
 			break;
 
 		case ICMP6_DST_UNREACH :
 		case ICMP6_PACKET_TOO_BIG :
 		case ICMP6_TIME_EXCEEDED :
 		case ICMP6_PARAM_PROB :
 			fin->fin_flx |= FI_ICMPERR;
 			minicmpsz = ICMP6ERR_IPICMPHLEN - sizeof(ip6_t);
 			if (fin->fin_plen < ICMP6ERR_IPICMPHLEN)
 				break;
 
 			if (M_LEN(fin->fin_m) < fin->fin_plen) {
 				if (ipf_coalesce(fin) != 1)
 					return;
 			}
 
 			if (ipf_pr_pullup(fin, ICMP6ERR_MINPKTLEN) == -1)
 				return;
 
 			/*
 			 * If the destination of this packet doesn't match the
 			 * source of the original packet then this packet is
 			 * not correct.
 			 */
 			icmp6 = fin->fin_dp;
 			ip6 = (ip6_t *)((char *)icmp6 + ICMPERR_ICMPHLEN);
 			if (IP6_NEQ(&fin->fin_fi.fi_dst,
 				    (i6addr_t *)&ip6->ip6_src))
 				fin->fin_flx |= FI_BAD;
 			break;
 		default :
 			break;
 		}
 	}
 
 	ipf_pr_short6(fin, minicmpsz);
 	if ((fin->fin_flx & (FI_SHORT|FI_BAD)) == 0) {
 		u_char p = fin->fin_p;
 
 		fin->fin_p = IPPROTO_ICMPV6;
 		ipf_checkv6sum(fin);
 		fin->fin_p = p;
 	}
 }
 
 
 /* ------------------------------------------------------------------------ */
 /* Function:    ipf_pr_udp6                                                 */
 /* Returns:     void                                                        */
 /* Parameters:  fin(I) - pointer to packet information                      */
 /*                                                                          */
 /* IPv6 Only                                                                */
 /* Analyse the packet for IPv6/UDP properties.                              */
 /* Is not expected to be called for fragmented packets.                     */
 /* ------------------------------------------------------------------------ */
 static INLINE void
 ipf_pr_udp6(fin)
 	fr_info_t *fin;
 {
 
 	if (ipf_pr_udpcommon(fin) == 0) {
 		u_char p = fin->fin_p;
 
 		fin->fin_p = IPPROTO_UDP;
 		ipf_checkv6sum(fin);
 		fin->fin_p = p;
 	}
 }
 
 
 /* ------------------------------------------------------------------------ */
 /* Function:    ipf_pr_tcp6                                                 */
 /* Returns:     void                                                        */
 /* Parameters:  fin(I) - pointer to packet information                      */
 /*                                                                          */
 /* IPv6 Only                                                                */
 /* Analyse the packet for IPv6/TCP properties.                              */
 /* Is not expected to be called for fragmented packets.                     */
 /* ------------------------------------------------------------------------ */
 static INLINE void
 ipf_pr_tcp6(fin)
 	fr_info_t *fin;
 {
 
 	if (ipf_pr_tcpcommon(fin) == 0) {
 		u_char p = fin->fin_p;
 
 		fin->fin_p = IPPROTO_TCP;
 		ipf_checkv6sum(fin);
 		fin->fin_p = p;
 	}
 }
 
 
 /* ------------------------------------------------------------------------ */
 /* Function:    ipf_pr_esp6                                                 */
 /* Returns:     void                                                        */
 /* Parameters:  fin(I) - pointer to packet information                      */
 /*                                                                          */
 /* IPv6 Only                                                                */
 /* Analyse the packet for ESP properties.                                   */
 /* The minimum length is taken to be the SPI (32bits) plus a tail (32bits)  */
 /* even though the newer ESP packets must also have a sequence number that  */
 /* is 32bits as well, it is not possible(?) to determine the version from a */
 /* simple packet header.                                                    */
 /* ------------------------------------------------------------------------ */
 static INLINE void
 ipf_pr_esp6(fin)
 	fr_info_t *fin;
 {
 
 	if ((fin->fin_off == 0) && (ipf_pr_pullup(fin, 8) == -1)) {
 		ipf_main_softc_t *softc = fin->fin_main_soft;
 
 		LBUMPD(ipf_stats[fin->fin_out], fr_v6_esp_pullup);
 		return;
 	}
 }
 
 
 /* ------------------------------------------------------------------------ */
 /* Function:    ipf_pr_ah6                                                  */
 /* Returns:     int    - value of the next header or IPPROTO_NONE if error  */
 /* Parameters:  fin(I) - pointer to packet information                      */
 /*                                                                          */
 /* IPv6 Only                                                                */
 /* Analyse the packet for AH properties.                                    */
 /* The minimum length is taken to be the combination of all fields in the   */
 /* header being present and no authentication data (null algorithm used.)   */
 /* ------------------------------------------------------------------------ */
 static INLINE int
 ipf_pr_ah6(fin)
 	fr_info_t *fin;
 {
 	authhdr_t *ah;
 
 	fin->fin_flx |= FI_AH;
 
 	ah = (authhdr_t *)ipf_pr_ipv6exthdr(fin, 0, IPPROTO_HOPOPTS);
 	if (ah == NULL) {
 		ipf_main_softc_t *softc = fin->fin_main_soft;
 
 		LBUMPD(ipf_stats[fin->fin_out], fr_v6_ah_bad);
 		return IPPROTO_NONE;
 	}
 
 	ipf_pr_short6(fin, sizeof(*ah));
 
 	/*
 	 * No need for another pullup, ipf_pr_ipv6exthdr() will pullup
 	 * enough data to satisfy ah_next (the very first one.)
 	 */
 	return ah->ah_next;
 }
 
 
 /* ------------------------------------------------------------------------ */
 /* Function:    ipf_pr_gre6                                                 */
 /* Returns:     void                                                        */
 /* Parameters:  fin(I) - pointer to packet information                      */
 /*                                                                          */
 /* Analyse the packet for GRE properties.                                   */
 /* ------------------------------------------------------------------------ */
 static INLINE void
 ipf_pr_gre6(fin)
 	fr_info_t *fin;
 {
 	grehdr_t *gre;
 
 	if (ipf_pr_pullup(fin, sizeof(grehdr_t)) == -1) {
 		ipf_main_softc_t *softc = fin->fin_main_soft;
 
 		LBUMPD(ipf_stats[fin->fin_out], fr_v6_gre_pullup);
 		return;
 	}
 
 	gre = fin->fin_dp;
 	if (GRE_REV(gre->gr_flags) == 1)
 		fin->fin_data[0] = gre->gr_call;
 }
 #endif	/* USE_INET6 */
 
 
 /* ------------------------------------------------------------------------ */
 /* Function:    ipf_pr_pullup                                               */
 /* Returns:     int     - 0 == pullup succeeded, -1 == failure              */
 /* Parameters:  fin(I)  - pointer to packet information                     */
 /*              plen(I) - length (excluding L3 header) to pullup            */
 /*                                                                          */
 /* Short inline function to cut down on code duplication to perform a call  */
 /* to ipf_pullup to ensure there is the required amount of data,            */
 /* consecutively in the packet buffer.                                      */
 /*                                                                          */
 /* This function pulls up 'extra' data at the location of fin_dp.  fin_dp   */
 /* points to the first byte after the complete layer 3 header, which will   */
 /* include all of the known extension headers for IPv6 or options for IPv4. */
 /*                                                                          */
 /* Since fr_pullup() expects the total length of bytes to be pulled up, it  */
 /* is necessary to add those we can already assume to be pulled up (fin_dp  */
 /* - fin_ip) to what is passed through.                                     */
 /* ------------------------------------------------------------------------ */
 int
 ipf_pr_pullup(fin, plen)
 	fr_info_t *fin;
 	int plen;
 {
 	ipf_main_softc_t *softc = fin->fin_main_soft;
 
 	if (fin->fin_m != NULL) {
 		if (fin->fin_dp != NULL)
 			plen += (char *)fin->fin_dp -
 				((char *)fin->fin_ip + fin->fin_hlen);
 		plen += fin->fin_hlen;
 		if (M_LEN(fin->fin_m) < plen + fin->fin_ipoff) {
 #if defined(_KERNEL)
 			if (ipf_pullup(fin->fin_m, fin, plen) == NULL) {
 				DT(ipf_pullup_fail);
 				LBUMP(ipf_stats[fin->fin_out].fr_pull[1]);
 				return -1;
 			}
 			LBUMP(ipf_stats[fin->fin_out].fr_pull[0]);
 #else
 			LBUMP(ipf_stats[fin->fin_out].fr_pull[1]);
 			/*
 			 * Fake ipf_pullup failing
 			 */
 			fin->fin_reason = FRB_PULLUP;
 			*fin->fin_mp = NULL;
 			fin->fin_m = NULL;
 			fin->fin_ip = NULL;
 			return -1;
 #endif
 		}
 	}
 	return 0;
 }
 
 
 /* ------------------------------------------------------------------------ */
 /* Function:    ipf_pr_short                                                */
 /* Returns:     void                                                        */
 /* Parameters:  fin(I)  - pointer to packet information                     */
 /*              xmin(I) - minimum header size                               */
 /*                                                                          */
 /* Check if a packet is "short" as defined by xmin.  The rule we are        */
 /* applying here is that the packet must not be fragmented within the layer */
 /* 4 header.  That is, it must not be a fragment that has its offset set to */
 /* start within the layer 4 header (hdrmin) or if it is at offset 0, the    */
 /* entire layer 4 header must be present (min).                             */
 /* ------------------------------------------------------------------------ */
 static INLINE void
 ipf_pr_short(fin, xmin)
 	fr_info_t *fin;
 	int xmin;
 {
 
 	if (fin->fin_off == 0) {
 		if (fin->fin_dlen < xmin)
 			fin->fin_flx |= FI_SHORT;
 	} else if (fin->fin_off < xmin) {
 		fin->fin_flx |= FI_SHORT;
 	}
 }
 
 
 /* ------------------------------------------------------------------------ */
 /* Function:    ipf_pr_icmp                                                 */
 /* Returns:     void                                                        */
 /* Parameters:  fin(I) - pointer to packet information                      */
 /*                                                                          */
 /* IPv4 Only                                                                */
 /* Do a sanity check on the packet for ICMP (v4).  In nearly all cases,     */
 /* except extrememly bad packets, both type and code will be present.       */
 /* The expected minimum size of an ICMP packet is very much dependent on    */
 /* the type of it.                                                          */
 /*                                                                          */
 /* XXX - other ICMP sanity checks?                                          */
 /* ------------------------------------------------------------------------ */
 static INLINE void
 ipf_pr_icmp(fin)
 	fr_info_t *fin;
 {
 	ipf_main_softc_t *softc = fin->fin_main_soft;
 	int minicmpsz = sizeof(struct icmp);
 	icmphdr_t *icmp;
 	ip_t *oip;
 
 	ipf_pr_short(fin, ICMPERR_ICMPHLEN);
 
 	if (fin->fin_off != 0) {
 		LBUMPD(ipf_stats[fin->fin_out], fr_v4_icmp_frag);
 		return;
 	}
 
 	if (ipf_pr_pullup(fin, ICMPERR_ICMPHLEN) == -1) {
 		LBUMPD(ipf_stats[fin->fin_out], fr_v4_icmp_pullup);
 		return;
 	}
 
 	icmp = fin->fin_dp;
 
 	fin->fin_data[0] = *(u_short *)icmp;
 	fin->fin_data[1] = icmp->icmp_id;
 
 	switch (icmp->icmp_type)
 	{
 	case ICMP_ECHOREPLY :
 	case ICMP_ECHO :
 	/* Router discovery messaes - RFC 1256 */
 	case ICMP_ROUTERADVERT :
 	case ICMP_ROUTERSOLICIT :
 		fin->fin_flx |= FI_ICMPQUERY;
 		minicmpsz = ICMP_MINLEN;
 		break;
 	/*
 	 * type(1) + code(1) + cksum(2) + id(2) seq(2) +
 	 * 3 * timestamp(3 * 4)
 	 */
 	case ICMP_TSTAMP :
 	case ICMP_TSTAMPREPLY :
 		fin->fin_flx |= FI_ICMPQUERY;
 		minicmpsz = 20;
 		break;
 	/*
 	 * type(1) + code(1) + cksum(2) + id(2) seq(2) +
 	 * mask(4)
 	 */
 	case ICMP_IREQ :
 	case ICMP_IREQREPLY :
 	case ICMP_MASKREQ :
 	case ICMP_MASKREPLY :
 		fin->fin_flx |= FI_ICMPQUERY;
 		minicmpsz = 12;
 		break;
 	/*
 	 * type(1) + code(1) + cksum(2) + id(2) seq(2) + ip(20+)
 	 */
 	case ICMP_UNREACH :
 #ifdef icmp_nextmtu
 		if (icmp->icmp_code == ICMP_UNREACH_NEEDFRAG) {
 			if (icmp->icmp_nextmtu < softc->ipf_icmpminfragmtu)
 				fin->fin_flx |= FI_BAD;
 		}
 #endif
 	case ICMP_SOURCEQUENCH :
 	case ICMP_REDIRECT :
 	case ICMP_TIMXCEED :
 	case ICMP_PARAMPROB :
 		fin->fin_flx |= FI_ICMPERR;
 		if (ipf_coalesce(fin) != 1) {
 			LBUMPD(ipf_stats[fin->fin_out], fr_icmp_coalesce);
 			return;
 		}
 
 		/*
 		 * ICMP error packets should not be generated for IP
 		 * packets that are a fragment that isn't the first
 		 * fragment.
 		 */
 		oip = (ip_t *)((char *)fin->fin_dp + ICMPERR_ICMPHLEN);
 		if ((ntohs(oip->ip_off) & IP_OFFMASK) != 0)
 			fin->fin_flx |= FI_BAD;
 
 		/*
 		 * If the destination of this packet doesn't match the
 		 * source of the original packet then this packet is
 		 * not correct.
 		 */
 		if (oip->ip_src.s_addr != fin->fin_daddr)
 			fin->fin_flx |= FI_BAD;
 		break;
 	default :
 		break;
 	}
 
 	ipf_pr_short(fin, minicmpsz);
 
 	ipf_checkv4sum(fin);
 }
 
 
 /* ------------------------------------------------------------------------ */
 /* Function:    ipf_pr_tcpcommon                                            */
 /* Returns:     int    - 0 = header ok, 1 = bad packet, -1 = buffer error   */
 /* Parameters:  fin(I) - pointer to packet information                      */
 /*                                                                          */
 /* TCP header sanity checking.  Look for bad combinations of TCP flags,     */
 /* and make some checks with how they interact with other fields.           */
 /* If compiled with IPFILTER_CKSUM, check to see if the TCP checksum is     */
 /* valid and mark the packet as bad if not.                                 */
 /* ------------------------------------------------------------------------ */
 static INLINE int
 ipf_pr_tcpcommon(fin)
 	fr_info_t *fin;
 {
 	ipf_main_softc_t *softc = fin->fin_main_soft;
 	int flags, tlen;
 	tcphdr_t *tcp;
 
 	fin->fin_flx |= FI_TCPUDP;
 	if (fin->fin_off != 0) {
 		LBUMPD(ipf_stats[fin->fin_out], fr_tcp_frag);
 		return 0;
 	}
 
 	if (ipf_pr_pullup(fin, sizeof(*tcp)) == -1) {
 		LBUMPD(ipf_stats[fin->fin_out], fr_tcp_pullup);
 		return -1;
 	}
 
 	tcp = fin->fin_dp;
 	if (fin->fin_dlen > 3) {
 		fin->fin_sport = ntohs(tcp->th_sport);
 		fin->fin_dport = ntohs(tcp->th_dport);
 	}
 
 	if ((fin->fin_flx & FI_SHORT) != 0) {
 		LBUMPD(ipf_stats[fin->fin_out], fr_tcp_short);
 		return 1;
 	}
 
 	/*
 	 * Use of the TCP data offset *must* result in a value that is at
 	 * least the same size as the TCP header.
 	 */
 	tlen = TCP_OFF(tcp) << 2;
 	if (tlen < sizeof(tcphdr_t)) {
 		LBUMPD(ipf_stats[fin->fin_out], fr_tcp_small);
 		fin->fin_flx |= FI_BAD;
 		return 1;
 	}
 
 	flags = tcp->th_flags;
 	fin->fin_tcpf = tcp->th_flags;
 
 	/*
 	 * If the urgent flag is set, then the urgent pointer must
 	 * also be set and vice versa.  Good TCP packets do not have
 	 * just one of these set.
 	 */
 	if ((flags & TH_URG) != 0 && (tcp->th_urp == 0)) {
 		fin->fin_flx |= FI_BAD;
 #if 0
 	} else if ((flags & TH_URG) == 0 && (tcp->th_urp != 0)) {
 		/*
 		 * Ignore this case (#if 0) as it shows up in "real"
 		 * traffic with bogus values in the urgent pointer field.
 		 */
 		fin->fin_flx |= FI_BAD;
 #endif
 	} else if (((flags & (TH_SYN|TH_FIN)) != 0) &&
 		   ((flags & (TH_RST|TH_ACK)) == TH_RST)) {
 		/* TH_FIN|TH_RST|TH_ACK seems to appear "naturally" */
 		fin->fin_flx |= FI_BAD;
 #if 1
 	} else if (((flags & TH_SYN) != 0) &&
 		   ((flags & (TH_URG|TH_PUSH)) != 0)) {
 		/*
 		 * SYN with URG and PUSH set is not for normal TCP but it is
 		 * possible(?) with T/TCP...but who uses T/TCP?
 		 */
 		fin->fin_flx |= FI_BAD;
 #endif
 	} else if (!(flags & TH_ACK)) {
 		/*
 		 * If the ack bit isn't set, then either the SYN or
 		 * RST bit must be set.  If the SYN bit is set, then
 		 * we expect the ACK field to be 0.  If the ACK is
 		 * not set and if URG, PSH or FIN are set, consdier
 		 * that to indicate a bad TCP packet.
 		 */
 		if ((flags == TH_SYN) && (tcp->th_ack != 0)) {
 			/*
 			 * Cisco PIX sets the ACK field to a random value.
 			 * In light of this, do not set FI_BAD until a patch
 			 * is available from Cisco to ensure that
 			 * interoperability between existing systems is
 			 * achieved.
 			 */
 			/*fin->fin_flx |= FI_BAD*/;
 		} else if (!(flags & (TH_RST|TH_SYN))) {
 			fin->fin_flx |= FI_BAD;
 		} else if ((flags & (TH_URG|TH_PUSH|TH_FIN)) != 0) {
 			fin->fin_flx |= FI_BAD;
 		}
 	}
 	if (fin->fin_flx & FI_BAD) {
 		LBUMPD(ipf_stats[fin->fin_out], fr_tcp_bad_flags);
 		return 1;
 	}
 
 	/*
 	 * At this point, it's not exactly clear what is to be gained by
 	 * marking up which TCP options are and are not present.  The one we
 	 * are most interested in is the TCP window scale.  This is only in
 	 * a SYN packet [RFC1323] so we don't need this here...?
 	 * Now if we were to analyse the header for passive fingerprinting,
 	 * then that might add some weight to adding this...
 	 */
 	if (tlen == sizeof(tcphdr_t)) {
 		return 0;
 	}
 
 	if (ipf_pr_pullup(fin, tlen) == -1) {
 		LBUMPD(ipf_stats[fin->fin_out], fr_tcp_pullup);
 		return -1;
 	}
 
 #if 0
 	tcp = fin->fin_dp;
 	ip = fin->fin_ip;
 	s = (u_char *)(tcp + 1);
 	off = IP_HL(ip) << 2;
 # ifdef _KERNEL
 	if (fin->fin_mp != NULL) {
 		mb_t *m = *fin->fin_mp;
 
 		if (off + tlen > M_LEN(m))
 			return;
 	}
 # endif
 	for (tlen -= (int)sizeof(*tcp); tlen > 0; ) {
 		opt = *s;
 		if (opt == '\0')
 			break;
 		else if (opt == TCPOPT_NOP)
 			ol = 1;
 		else {
 			if (tlen < 2)
 				break;
 			ol = (int)*(s + 1);
 			if (ol < 2 || ol > tlen)
 				break;
 		}
 
 		for (i = 9, mv = 4; mv >= 0; ) {
 			op = ipopts + i;
 			if (opt == (u_char)op->ol_val) {
 				optmsk |= op->ol_bit;
 				break;
 			}
 		}
 		tlen -= ol;
 		s += ol;
 	}
 #endif /* 0 */
 
 	return 0;
 }
 
 
 
 /* ------------------------------------------------------------------------ */
 /* Function:    ipf_pr_udpcommon                                            */
 /* Returns:     int    - 0 = header ok, 1 = bad packet                      */
 /* Parameters:  fin(I) - pointer to packet information                      */
 /*                                                                          */
 /* Extract the UDP source and destination ports, if present.  If compiled   */
 /* with IPFILTER_CKSUM, check to see if the UDP checksum is valid.          */
 /* ------------------------------------------------------------------------ */
 static INLINE int
 ipf_pr_udpcommon(fin)
 	fr_info_t *fin;
 {
 	udphdr_t *udp;
 
 	fin->fin_flx |= FI_TCPUDP;
 
 	if (!fin->fin_off && (fin->fin_dlen > 3)) {
 		if (ipf_pr_pullup(fin, sizeof(*udp)) == -1) {
 			ipf_main_softc_t *softc = fin->fin_main_soft;
 
 			fin->fin_flx |= FI_SHORT;
 			LBUMPD(ipf_stats[fin->fin_out], fr_udp_pullup);
 			return 1;
 		}
 
 		udp = fin->fin_dp;
 
 		fin->fin_sport = ntohs(udp->uh_sport);
 		fin->fin_dport = ntohs(udp->uh_dport);
 	}
 
 	return 0;
 }
 
 
 /* ------------------------------------------------------------------------ */
 /* Function:    ipf_pr_tcp                                                  */
 /* Returns:     void                                                        */
 /* Parameters:  fin(I) - pointer to packet information                      */
 /*                                                                          */
 /* IPv4 Only                                                                */
 /* Analyse the packet for IPv4/TCP properties.                              */
 /* ------------------------------------------------------------------------ */
 static INLINE void
 ipf_pr_tcp(fin)
 	fr_info_t *fin;
 {
 
 	ipf_pr_short(fin, sizeof(tcphdr_t));
 
 	if (ipf_pr_tcpcommon(fin) == 0)
 		ipf_checkv4sum(fin);
 }
 
 
 /* ------------------------------------------------------------------------ */
 /* Function:    ipf_pr_udp                                                  */
 /* Returns:     void                                                        */
 /* Parameters:  fin(I) - pointer to packet information                      */
 /*                                                                          */
 /* IPv4 Only                                                                */
 /* Analyse the packet for IPv4/UDP properties.                              */
 /* ------------------------------------------------------------------------ */
 static INLINE void
 ipf_pr_udp(fin)
 	fr_info_t *fin;
 {
 
 	ipf_pr_short(fin, sizeof(udphdr_t));
 
 	if (ipf_pr_udpcommon(fin) == 0)
 		ipf_checkv4sum(fin);
 }
 
 
 /* ------------------------------------------------------------------------ */
 /* Function:    ipf_pr_esp                                                  */
 /* Returns:     void                                                        */
 /* Parameters:  fin(I) - pointer to packet information                      */
 /*                                                                          */
 /* Analyse the packet for ESP properties.                                   */
 /* The minimum length is taken to be the SPI (32bits) plus a tail (32bits)  */
 /* even though the newer ESP packets must also have a sequence number that  */
 /* is 32bits as well, it is not possible(?) to determine the version from a */
 /* simple packet header.                                                    */
 /* ------------------------------------------------------------------------ */
 static INLINE void
 ipf_pr_esp(fin)
 	fr_info_t *fin;
 {
 
 	if (fin->fin_off == 0) {
 		ipf_pr_short(fin, 8);
 		if (ipf_pr_pullup(fin, 8) == -1) {
 			ipf_main_softc_t *softc = fin->fin_main_soft;
 
 			LBUMPD(ipf_stats[fin->fin_out], fr_v4_esp_pullup);
 		}
 	}
 }
 
 
 /* ------------------------------------------------------------------------ */
 /* Function:    ipf_pr_ah                                                   */
 /* Returns:     int    - value of the next header or IPPROTO_NONE if error  */
 /* Parameters:  fin(I) - pointer to packet information                      */
 /*                                                                          */
 /* Analyse the packet for AH properties.                                    */
 /* The minimum length is taken to be the combination of all fields in the   */
 /* header being present and no authentication data (null algorithm used.)   */
 /* ------------------------------------------------------------------------ */
 static INLINE int
 ipf_pr_ah(fin)
 	fr_info_t *fin;
 {
 	ipf_main_softc_t *softc = fin->fin_main_soft;
 	authhdr_t *ah;
 	int len;
 
 	fin->fin_flx |= FI_AH;
 	ipf_pr_short(fin, sizeof(*ah));
 
 	if (((fin->fin_flx & FI_SHORT) != 0) || (fin->fin_off != 0)) {
 		LBUMPD(ipf_stats[fin->fin_out], fr_v4_ah_bad);
 		return IPPROTO_NONE;
 	}
 
 	if (ipf_pr_pullup(fin, sizeof(*ah)) == -1) {
 		DT(fr_v4_ah_pullup_1);
 		LBUMP(ipf_stats[fin->fin_out].fr_v4_ah_pullup);
 		return IPPROTO_NONE;
 	}
 
 	ah = (authhdr_t *)fin->fin_dp;
 
 	len = (ah->ah_plen + 2) << 2;
 	ipf_pr_short(fin, len);
 	if (ipf_pr_pullup(fin, len) == -1) {
 		DT(fr_v4_ah_pullup_2);
 		LBUMP(ipf_stats[fin->fin_out].fr_v4_ah_pullup);
 		return IPPROTO_NONE;
 	}
 
 	/*
 	 * Adjust fin_dp and fin_dlen for skipping over the authentication
 	 * header.
 	 */
 	fin->fin_dp = (char *)fin->fin_dp + len;
 	fin->fin_dlen -= len;
 	return ah->ah_next;
 }
 
 
 /* ------------------------------------------------------------------------ */
 /* Function:    ipf_pr_gre                                                  */
 /* Returns:     void                                                        */
 /* Parameters:  fin(I) - pointer to packet information                      */
 /*                                                                          */
 /* Analyse the packet for GRE properties.                                   */
 /* ------------------------------------------------------------------------ */
 static INLINE void
 ipf_pr_gre(fin)
 	fr_info_t *fin;
 {
 	ipf_main_softc_t *softc = fin->fin_main_soft;
 	grehdr_t *gre;
 
 	ipf_pr_short(fin, sizeof(grehdr_t));
 
 	if (fin->fin_off != 0) {
 		LBUMPD(ipf_stats[fin->fin_out], fr_v4_gre_frag);
 		return;
 	}
 
 	if (ipf_pr_pullup(fin, sizeof(grehdr_t)) == -1) {
 		LBUMPD(ipf_stats[fin->fin_out], fr_v4_gre_pullup);
 		return;
 	}
 
 	gre = fin->fin_dp;
 	if (GRE_REV(gre->gr_flags) == 1)
 		fin->fin_data[0] = gre->gr_call;
 }
 
 
 /* ------------------------------------------------------------------------ */
 /* Function:    ipf_pr_ipv4hdr                                              */
 /* Returns:     void                                                        */
 /* Parameters:  fin(I) - pointer to packet information                      */
 /*                                                                          */
 /* IPv4 Only                                                                */
 /* Analyze the IPv4 header and set fields in the fr_info_t structure.       */
 /* Check all options present and flag their presence if any exist.          */
 /* ------------------------------------------------------------------------ */
 static INLINE void
 ipf_pr_ipv4hdr(fin)
 	fr_info_t *fin;
 {
 	u_short optmsk = 0, secmsk = 0, auth = 0;
 	int hlen, ol, mv, p, i;
 	const struct optlist *op;
 	u_char *s, opt;
 	u_short off;
 	fr_ip_t *fi;
 	ip_t *ip;
 
 	fi = &fin->fin_fi;
 	hlen = fin->fin_hlen;
 
 	ip = fin->fin_ip;
 	p = ip->ip_p;
 	fi->fi_p = p;
 	fin->fin_crc = p;
 	fi->fi_tos = ip->ip_tos;
 	fin->fin_id = ip->ip_id;
 	off = ntohs(ip->ip_off);
 
 	/* Get both TTL and protocol */
 	fi->fi_p = ip->ip_p;
 	fi->fi_ttl = ip->ip_ttl;
 
 	/* Zero out bits not used in IPv6 address */
 	fi->fi_src.i6[1] = 0;
 	fi->fi_src.i6[2] = 0;
 	fi->fi_src.i6[3] = 0;
 	fi->fi_dst.i6[1] = 0;
 	fi->fi_dst.i6[2] = 0;
 	fi->fi_dst.i6[3] = 0;
 
 	fi->fi_saddr = ip->ip_src.s_addr;
 	fin->fin_crc += fi->fi_saddr;
 	fi->fi_daddr = ip->ip_dst.s_addr;
 	fin->fin_crc += fi->fi_daddr;
 	if (IN_CLASSD(ntohl(fi->fi_daddr)))
 		fin->fin_flx |= FI_MULTICAST|FI_MBCAST;
 
 	/*
 	 * set packet attribute flags based on the offset and
 	 * calculate the byte offset that it represents.
 	 */
 	off &= IP_MF|IP_OFFMASK;
 	if (off != 0) {
 		int morefrag = off & IP_MF;
 
 		fi->fi_flx |= FI_FRAG;
 		off &= IP_OFFMASK;
 		if (off != 0) {
 			fin->fin_flx |= FI_FRAGBODY;
 			off <<= 3;
 			if ((off + fin->fin_dlen > 65535) ||
 			    (fin->fin_dlen == 0) ||
 			    ((morefrag != 0) && ((fin->fin_dlen & 7) != 0))) {
 				/*
 				 * The length of the packet, starting at its
 				 * offset cannot exceed 65535 (0xffff) as the
 				 * length of an IP packet is only 16 bits.
 				 *
 				 * Any fragment that isn't the last fragment
 				 * must have a length greater than 0 and it
 				 * must be an even multiple of 8.
 				 */
 				fi->fi_flx |= FI_BAD;
 			}
 		}
 	}
 	fin->fin_off = off;
 
 	/*
 	 * Call per-protocol setup and checking
 	 */
 	if (p == IPPROTO_AH) {
 		/*
 		 * Treat AH differently because we expect there to be another
 		 * layer 4 header after it.
 		 */
 		p = ipf_pr_ah(fin);
 	}
 
 	switch (p)
 	{
 	case IPPROTO_UDP :
 		ipf_pr_udp(fin);
 		break;
 	case IPPROTO_TCP :
 		ipf_pr_tcp(fin);
 		break;
 	case IPPROTO_ICMP :
 		ipf_pr_icmp(fin);
 		break;
 	case IPPROTO_ESP :
 		ipf_pr_esp(fin);
 		break;
 	case IPPROTO_GRE :
 		ipf_pr_gre(fin);
 		break;
 	}
 
 	ip = fin->fin_ip;
 	if (ip == NULL)
 		return;
 
 	/*
 	 * If it is a standard IP header (no options), set the flag fields
 	 * which relate to options to 0.
 	 */
 	if (hlen == sizeof(*ip)) {
 		fi->fi_optmsk = 0;
 		fi->fi_secmsk = 0;
 		fi->fi_auth = 0;
 		return;
 	}
 
 	/*
 	 * So the IP header has some IP options attached.  Walk the entire
 	 * list of options present with this packet and set flags to indicate
 	 * which ones are here and which ones are not.  For the somewhat out
 	 * of date and obscure security classification options, set a flag to
 	 * represent which classification is present.
 	 */
 	fi->fi_flx |= FI_OPTIONS;
 
 	for (s = (u_char *)(ip + 1), hlen -= (int)sizeof(*ip); hlen > 0; ) {
 		opt = *s;
 		if (opt == '\0')
 			break;
 		else if (opt == IPOPT_NOP)
 			ol = 1;
 		else {
 			if (hlen < 2)
 				break;
 			ol = (int)*(s + 1);
 			if (ol < 2 || ol > hlen)
 				break;
 		}
 		for (i = 9, mv = 4; mv >= 0; ) {
 			op = ipopts + i;
 
 			if ((opt == (u_char)op->ol_val) && (ol > 4)) {
 				u_32_t doi;
 
 				switch (opt)
 				{
 				case IPOPT_SECURITY :
 					if (optmsk & op->ol_bit) {
 						fin->fin_flx |= FI_BAD;
 					} else {
 						doi = ipf_checkripso(s);
 						secmsk = doi >> 16;
 						auth = doi & 0xffff;
 					}
 					break;
 
 				case IPOPT_CIPSO :
 
 					if (optmsk & op->ol_bit) {
 						fin->fin_flx |= FI_BAD;
 					} else {
 						doi = ipf_checkcipso(fin,
 								     s, ol);
 						secmsk = doi >> 16;
 						auth = doi & 0xffff;
 					}
 					break;
 				}
 				optmsk |= op->ol_bit;
 			}
 
 			if (opt < op->ol_val)
 				i -= mv;
 			else
 				i += mv;
 			mv--;
 		}
 		hlen -= ol;
 		s += ol;
 	}
 
 	/*
 	 *
 	 */
 	if (auth && !(auth & 0x0100))
 		auth &= 0xff00;
 	fi->fi_optmsk = optmsk;
 	fi->fi_secmsk = secmsk;
 	fi->fi_auth = auth;
 }
 
 
 /* ------------------------------------------------------------------------ */
 /* Function:    ipf_checkripso                                              */
 /* Returns:     void                                                        */
 /* Parameters:  s(I)   - pointer to start of RIPSO option                   */
 /*                                                                          */
 /* ------------------------------------------------------------------------ */
 static u_32_t
 ipf_checkripso(s)
 	u_char *s;
 {
 	const struct optlist *sp;
 	u_short secmsk = 0, auth = 0;
 	u_char sec;
 	int j, m;
 
 	sec = *(s + 2);	/* classification */
 	for (j = 3, m = 2; m >= 0; ) {
 		sp = secopt + j;
 		if (sec == sp->ol_val) {
 			secmsk |= sp->ol_bit;
 			auth = *(s + 3);
 			auth *= 256;
 			auth += *(s + 4);
 			break;
 		}
 		if (sec < sp->ol_val)
 			j -= m;
 		else
 			j += m;
 		m--;
 	}
 
 	return (secmsk << 16) | auth;
 }
 
 
 /* ------------------------------------------------------------------------ */
 /* Function:    ipf_checkcipso                                              */
 /* Returns:     u_32_t  - 0 = failure, else the doi from the header         */
 /* Parameters:  fin(IO) - pointer to packet information                     */
 /*              s(I)    - pointer to start of CIPSO option                  */
 /*              ol(I)   - length of CIPSO option field                      */
 /*                                                                          */
 /* This function returns the domain of integrity (DOI) field from the CIPSO */
 /* header and returns that whilst also storing the highest sensitivity      */
 /* value found in the fr_info_t structure.                                  */
 /*                                                                          */
 /* No attempt is made to extract the category bitmaps as these are defined  */
 /* by the user (rather than the protocol) and can be rather numerous on the */
 /* end nodes.                                                               */
 /* ------------------------------------------------------------------------ */
 static u_32_t
 ipf_checkcipso(fin, s, ol)
 	fr_info_t *fin;
 	u_char *s;
 	int ol;
 {
 	ipf_main_softc_t *softc = fin->fin_main_soft;
 	fr_ip_t *fi;
 	u_32_t doi;
 	u_char *t, tag, tlen, sensitivity;
 	int len;
 
 	if (ol < 6 || ol > 40) {
 		LBUMPD(ipf_stats[fin->fin_out], fr_v4_cipso_bad);
 		fin->fin_flx |= FI_BAD;
 		return 0;
 	}
 
 	fi = &fin->fin_fi;
 	fi->fi_sensitivity = 0;
 	/*
 	 * The DOI field MUST be there.
 	 */
 	bcopy(s + 2, &doi, sizeof(doi));
 
 	t = (u_char *)s + 6;
 	for (len = ol - 6; len >= 2; len -= tlen, t+= tlen) {
 		tag = *t;
 		tlen = *(t + 1);
 		if (tlen > len || tlen < 4 || tlen > 34) {
 			LBUMPD(ipf_stats[fin->fin_out], fr_v4_cipso_tlen);
 			fin->fin_flx |= FI_BAD;
 			return 0;
 		}
 
 		sensitivity = 0;
 		/*
 		 * Tag numbers 0, 1, 2, 5 are laid out in the CIPSO Internet
 		 * draft (16 July 1992) that has expired.
 		 */
 		if (tag == 0) {
 			fin->fin_flx |= FI_BAD;
 			continue;
 		} else if (tag == 1) {
 			if (*(t + 2) != 0) {
 				fin->fin_flx |= FI_BAD;
 				continue;
 			}
 			sensitivity = *(t + 3);
 			/* Category bitmap for categories 0-239 */
 
 		} else if (tag == 4) {
 			if (*(t + 2) != 0) {
 				fin->fin_flx |= FI_BAD;
 				continue;
 			}
 			sensitivity = *(t + 3);
 			/* Enumerated categories, 16bits each, upto 15 */
 
 		} else if (tag == 5) {
 			if (*(t + 2) != 0) {
 				fin->fin_flx |= FI_BAD;
 				continue;
 			}
 			sensitivity = *(t + 3);
 			/* Range of categories (2*16bits), up to 7 pairs */
 
 		} else if (tag > 127) {
 			/* Custom defined DOI */
 			;
 		} else {
 			fin->fin_flx |= FI_BAD;
 			continue;
 		}
 
 		if (sensitivity > fi->fi_sensitivity)
 			fi->fi_sensitivity = sensitivity;
 	}
 
 	return doi;
 }
 
 
 /* ------------------------------------------------------------------------ */
 /* Function:    ipf_makefrip                                                */
 /* Returns:     int     - 0 == packet ok, -1 == packet freed                */
 /* Parameters:  hlen(I) - length of IP packet header                        */
 /*              ip(I)   - pointer to the IP header                          */
 /*              fin(IO) - pointer to packet information                     */
 /*                                                                          */
 /* Compact the IP header into a structure which contains just the info.     */
 /* which is useful for comparing IP headers with and store this information */
 /* in the fr_info_t structure pointer to by fin.  At present, it is assumed */
 /* this function will be called with either an IPv4 or IPv6 packet.         */
 /* ------------------------------------------------------------------------ */
 int
 ipf_makefrip(hlen, ip, fin)
 	int hlen;
 	ip_t *ip;
 	fr_info_t *fin;
 {
 	ipf_main_softc_t *softc = fin->fin_main_soft;
 	int v;
 
 	fin->fin_depth = 0;
 	fin->fin_hlen = (u_short)hlen;
 	fin->fin_ip = ip;
 	fin->fin_rule = 0xffffffff;
 	fin->fin_group[0] = -1;
 	fin->fin_group[1] = '\0';
 	fin->fin_dp = (char *)ip + hlen;
 
 	v = fin->fin_v;
 	if (v == 4) {
 		fin->fin_plen = ntohs(ip->ip_len);
 		fin->fin_dlen = fin->fin_plen - hlen;
 		ipf_pr_ipv4hdr(fin);
 #ifdef	USE_INET6
 	} else if (v == 6) {
 		fin->fin_plen = ntohs(((ip6_t *)ip)->ip6_plen);
 		fin->fin_dlen = fin->fin_plen;
 		fin->fin_plen += hlen;
 
 		ipf_pr_ipv6hdr(fin);
 #endif
 	}
 	if (fin->fin_ip == NULL) {
 		LBUMP(ipf_stats[fin->fin_out].fr_ip_freed);
 		return -1;
 	}
 	return 0;
 }
 
 
 /* ------------------------------------------------------------------------ */
 /* Function:    ipf_portcheck                                               */
 /* Returns:     int - 1 == port matched, 0 == port match failed             */
 /* Parameters:  frp(I) - pointer to port check `expression'                 */
 /*              pop(I) - port number to evaluate                            */
 /*                                                                          */
 /* Perform a comparison of a port number against some other(s), using a     */
 /* structure with compare information stored in it.                         */
 /* ------------------------------------------------------------------------ */
 static INLINE int
 ipf_portcheck(frp, pop)
 	frpcmp_t *frp;
 	u_32_t pop;
 {
 	int err = 1;
 	u_32_t po;
 
 	po = frp->frp_port;
 
 	/*
 	 * Do opposite test to that required and continue if that succeeds.
 	 */
 	switch (frp->frp_cmp)
 	{
 	case FR_EQUAL :
 		if (pop != po) /* EQUAL */
 			err = 0;
 		break;
 	case FR_NEQUAL :
 		if (pop == po) /* NOTEQUAL */
 			err = 0;
 		break;
 	case FR_LESST :
 		if (pop >= po) /* LESSTHAN */
 			err = 0;
 		break;
 	case FR_GREATERT :
 		if (pop <= po) /* GREATERTHAN */
 			err = 0;
 		break;
 	case FR_LESSTE :
 		if (pop > po) /* LT or EQ */
 			err = 0;
 		break;
 	case FR_GREATERTE :
 		if (pop < po) /* GT or EQ */
 			err = 0;
 		break;
 	case FR_OUTRANGE :
 		if (pop >= po && pop <= frp->frp_top) /* Out of range */
 			err = 0;
 		break;
 	case FR_INRANGE :
 		if (pop <= po || pop >= frp->frp_top) /* In range */
 			err = 0;
 		break;
 	case FR_INCRANGE :
 		if (pop < po || pop > frp->frp_top) /* Inclusive range */
 			err = 0;
 		break;
 	default :
 		break;
 	}
 	return err;
 }
 
 
 /* ------------------------------------------------------------------------ */
 /* Function:    ipf_tcpudpchk                                               */
 /* Returns:     int - 1 == protocol matched, 0 == check failed              */
 /* Parameters:  fda(I) - pointer to packet information                      */
 /*              ft(I)  - pointer to structure with comparison data          */
 /*                                                                          */
 /* Compares the current pcket (assuming it is TCP/UDP) information with a   */
 /* structure containing information that we want to match against.          */
 /* ------------------------------------------------------------------------ */
 int
 ipf_tcpudpchk(fi, ft)
 	fr_ip_t *fi;
 	frtuc_t *ft;
 {
 	int err = 1;
 
 	/*
 	 * Both ports should *always* be in the first fragment.
 	 * So far, I cannot find any cases where they can not be.
 	 *
 	 * compare destination ports
 	 */
 	if (ft->ftu_dcmp)
 		err = ipf_portcheck(&ft->ftu_dst, fi->fi_ports[1]);
 
 	/*
 	 * compare source ports
 	 */
 	if (err && ft->ftu_scmp)
 		err = ipf_portcheck(&ft->ftu_src, fi->fi_ports[0]);
 
 	/*
 	 * If we don't have all the TCP/UDP header, then how can we
 	 * expect to do any sort of match on it ?  If we were looking for
 	 * TCP flags, then NO match.  If not, then match (which should
 	 * satisfy the "short" class too).
 	 */
 	if (err && (fi->fi_p == IPPROTO_TCP)) {
 		if (fi->fi_flx & FI_SHORT)
 			return !(ft->ftu_tcpf | ft->ftu_tcpfm);
 		/*
 		 * Match the flags ?  If not, abort this match.
 		 */
 		if (ft->ftu_tcpfm &&
 		    ft->ftu_tcpf != (fi->fi_tcpf & ft->ftu_tcpfm)) {
 			FR_DEBUG(("f. %#x & %#x != %#x\n", fi->fi_tcpf,
 				 ft->ftu_tcpfm, ft->ftu_tcpf));
 			err = 0;
 		}
 	}
 	return err;
 }
 
 
 /* ------------------------------------------------------------------------ */
 /* Function:    ipf_check_ipf                                               */
 /* Returns:     int - 0 == match, else no match                             */
 /* Parameters:  fin(I)     - pointer to packet information                  */
 /*              fr(I)      - pointer to filter rule                         */
 /*              portcmp(I) - flag indicating whether to attempt matching on */
 /*                           TCP/UDP port data.                             */
 /*                                                                          */
 /* Check to see if a packet matches an IPFilter rule.  Checks of addresses, */
 /* port numbers, etc, for "standard" IPFilter rules are all orchestrated in */
 /* this function.                                                           */
 /* ------------------------------------------------------------------------ */
 static INLINE int
 ipf_check_ipf(fin, fr, portcmp)
 	fr_info_t *fin;
 	frentry_t *fr;
 	int portcmp;
 {
 	u_32_t	*ld, *lm, *lip;
 	fripf_t *fri;
 	fr_ip_t *fi;
 	int i;
 
 	fi = &fin->fin_fi;
 	fri = fr->fr_ipf;
 	lip = (u_32_t *)fi;
 	lm = (u_32_t *)&fri->fri_mip;
 	ld = (u_32_t *)&fri->fri_ip;
 
 	/*
 	 * first 32 bits to check coversion:
 	 * IP version, TOS, TTL, protocol
 	 */
 	i = ((*lip & *lm) != *ld);
 	FR_DEBUG(("0. %#08x & %#08x != %#08x\n",
 		   ntohl(*lip), ntohl(*lm), ntohl(*ld)));
 	if (i)
 		return 1;
 
 	/*
 	 * Next 32 bits is a constructed bitmask indicating which IP options
 	 * are present (if any) in this packet.
 	 */
 	lip++, lm++, ld++;
 	i = ((*lip & *lm) != *ld);
 	FR_DEBUG(("1. %#08x & %#08x != %#08x\n",
 		   ntohl(*lip), ntohl(*lm), ntohl(*ld)));
 	if (i != 0)
 		return 1;
 
 	lip++, lm++, ld++;
 	/*
 	 * Unrolled loops (4 each, for 32 bits) for address checks.
 	 */
 	/*
 	 * Check the source address.
 	 */
 	if (fr->fr_satype == FRI_LOOKUP) {
 		i = (*fr->fr_srcfunc)(fin->fin_main_soft, fr->fr_srcptr,
 				      fi->fi_v, lip, fin->fin_plen);
 		if (i == -1)
 			return 1;
 		lip += 3;
 		lm += 3;
 		ld += 3;
 	} else {
 		i = ((*lip & *lm) != *ld);
 		FR_DEBUG(("2a. %#08x & %#08x != %#08x\n",
 			   ntohl(*lip), ntohl(*lm), ntohl(*ld)));
 		if (fi->fi_v == 6) {
 			lip++, lm++, ld++;
 			i |= ((*lip & *lm) != *ld);
 			FR_DEBUG(("2b. %#08x & %#08x != %#08x\n",
 				   ntohl(*lip), ntohl(*lm), ntohl(*ld)));
 			lip++, lm++, ld++;
 			i |= ((*lip & *lm) != *ld);
 			FR_DEBUG(("2c. %#08x & %#08x != %#08x\n",
 				   ntohl(*lip), ntohl(*lm), ntohl(*ld)));
 			lip++, lm++, ld++;
 			i |= ((*lip & *lm) != *ld);
 			FR_DEBUG(("2d. %#08x & %#08x != %#08x\n",
 				   ntohl(*lip), ntohl(*lm), ntohl(*ld)));
 		} else {
 			lip += 3;
 			lm += 3;
 			ld += 3;
 		}
 	}
 	i ^= (fr->fr_flags & FR_NOTSRCIP) >> 6;
 	if (i != 0)
 		return 1;
 
 	/*
 	 * Check the destination address.
 	 */
 	lip++, lm++, ld++;
 	if (fr->fr_datype == FRI_LOOKUP) {
 		i = (*fr->fr_dstfunc)(fin->fin_main_soft, fr->fr_dstptr,
 				      fi->fi_v, lip, fin->fin_plen);
 		if (i == -1)
 			return 1;
 		lip += 3;
 		lm += 3;
 		ld += 3;
 	} else {
 		i = ((*lip & *lm) != *ld);
 		FR_DEBUG(("3a. %#08x & %#08x != %#08x\n",
 			   ntohl(*lip), ntohl(*lm), ntohl(*ld)));
 		if (fi->fi_v == 6) {
 			lip++, lm++, ld++;
 			i |= ((*lip & *lm) != *ld);
 			FR_DEBUG(("3b. %#08x & %#08x != %#08x\n",
 				   ntohl(*lip), ntohl(*lm), ntohl(*ld)));
 			lip++, lm++, ld++;
 			i |= ((*lip & *lm) != *ld);
 			FR_DEBUG(("3c. %#08x & %#08x != %#08x\n",
 				   ntohl(*lip), ntohl(*lm), ntohl(*ld)));
 			lip++, lm++, ld++;
 			i |= ((*lip & *lm) != *ld);
 			FR_DEBUG(("3d. %#08x & %#08x != %#08x\n",
 				   ntohl(*lip), ntohl(*lm), ntohl(*ld)));
 		} else {
 			lip += 3;
 			lm += 3;
 			ld += 3;
 		}
 	}
 	i ^= (fr->fr_flags & FR_NOTDSTIP) >> 7;
 	if (i != 0)
 		return 1;
 	/*
 	 * IP addresses matched.  The next 32bits contains:
 	 * mast of old IP header security & authentication bits.
 	 */
 	lip++, lm++, ld++;
 	i = (*ld - (*lip & *lm));
 	FR_DEBUG(("4. %#08x & %#08x != %#08x\n", *lip, *lm, *ld));
 
 	/*
 	 * Next we have 32 bits of packet flags.
 	 */
 	lip++, lm++, ld++;
 	i |= (*ld - (*lip & *lm));
 	FR_DEBUG(("5. %#08x & %#08x != %#08x\n", *lip, *lm, *ld));
 
 	if (i == 0) {
 		/*
 		 * If a fragment, then only the first has what we're
 		 * looking for here...
 		 */
 		if (portcmp) {
 			if (!ipf_tcpudpchk(&fin->fin_fi, &fr->fr_tuc))
 				i = 1;
 		} else {
 			if (fr->fr_dcmp || fr->fr_scmp ||
 			    fr->fr_tcpf || fr->fr_tcpfm)
 				i = 1;
 			if (fr->fr_icmpm || fr->fr_icmp) {
 				if (((fi->fi_p != IPPROTO_ICMP) &&
 				     (fi->fi_p != IPPROTO_ICMPV6)) ||
 				    fin->fin_off || (fin->fin_dlen < 2))
 					i = 1;
 				else if ((fin->fin_data[0] & fr->fr_icmpm) !=
 					 fr->fr_icmp) {
 					FR_DEBUG(("i. %#x & %#x != %#x\n",
 						 fin->fin_data[0],
 						 fr->fr_icmpm, fr->fr_icmp));
 					i = 1;
 				}
 			}
 		}
 	}
 	return i;
 }
 
 
 /* ------------------------------------------------------------------------ */
 /* Function:    ipf_scanlist                                                */
 /* Returns:     int - result flags of scanning filter list                  */
 /* Parameters:  fin(I) - pointer to packet information                      */
 /*              pass(I) - default result to return for filtering            */
 /*                                                                          */
 /* Check the input/output list of rules for a match to the current packet.  */
 /* If a match is found, the value of fr_flags from the rule becomes the     */
 /* return value and fin->fin_fr points to the matched rule.                 */
 /*                                                                          */
 /* This function may be called recusively upto 16 times (limit inbuilt.)    */
 /* When unwinding, it should finish up with fin_depth as 0.                 */
 /*                                                                          */
 /* Could be per interface, but this gets real nasty when you don't have,    */
 /* or can't easily change, the kernel source code to .                      */
 /* ------------------------------------------------------------------------ */
 int
 ipf_scanlist(fin, pass)
 	fr_info_t *fin;
 	u_32_t pass;
 {
 	ipf_main_softc_t *softc = fin->fin_main_soft;
 	int rulen, portcmp, off, skip;
 	struct frentry *fr, *fnext;
 	u_32_t passt, passo;
 
 	/*
 	 * Do not allow nesting deeper than 16 levels.
 	 */
 	if (fin->fin_depth >= 16)
 		return pass;
 
 	fr = fin->fin_fr;
 
 	/*
 	 * If there are no rules in this list, return now.
 	 */
 	if (fr == NULL)
 		return pass;
 
 	skip = 0;
 	portcmp = 0;
 	fin->fin_depth++;
 	fin->fin_fr = NULL;
 	off = fin->fin_off;
 
 	if ((fin->fin_flx & FI_TCPUDP) && (fin->fin_dlen > 3) && !off)
 		portcmp = 1;
 
 	for (rulen = 0; fr; fr = fnext, rulen++) {
 		fnext = fr->fr_next;
 		if (skip != 0) {
 			FR_VERBOSE(("SKIP %d (%#x)\n", skip, fr->fr_flags));
 			skip--;
 			continue;
 		}
 
 		/*
 		 * In all checks below, a null (zero) value in the
 		 * filter struture is taken to mean a wildcard.
 		 *
 		 * check that we are working for the right interface
 		 */
 #ifdef	_KERNEL
 		if (fr->fr_ifa && fr->fr_ifa != fin->fin_ifp)
 			continue;
 #else
 		if (opts & (OPT_VERBOSE|OPT_DEBUG))
 			printf("\n");
 		FR_VERBOSE(("%c", FR_ISSKIP(pass) ? 's' :
 				  FR_ISPASS(pass) ? 'p' :
 				  FR_ISACCOUNT(pass) ? 'A' :
 				  FR_ISAUTH(pass) ? 'a' :
 				  (pass & FR_NOMATCH) ? 'n' :'b'));
 		if (fr->fr_ifa && fr->fr_ifa != fin->fin_ifp)
 			continue;
 		FR_VERBOSE((":i"));
 #endif
 
 		switch (fr->fr_type)
 		{
 		case FR_T_IPF :
 		case FR_T_IPF_BUILTIN :
 			if (ipf_check_ipf(fin, fr, portcmp))
 				continue;
 			break;
 #if defined(IPFILTER_BPF)
 		case FR_T_BPFOPC :
 		case FR_T_BPFOPC_BUILTIN :
 		    {
 			u_char *mc;
 			int wlen;
 
 			if (*fin->fin_mp == NULL)
 				continue;
 			if (fin->fin_family != fr->fr_family)
 				continue;
 			mc = (u_char *)fin->fin_m;
 			wlen = fin->fin_dlen + fin->fin_hlen;
 			if (!bpf_filter(fr->fr_data, mc, wlen, 0))
 				continue;
 			break;
 		    }
 #endif
 		case FR_T_CALLFUNC_BUILTIN :
 		    {
 			frentry_t *f;
 
 			f = (*fr->fr_func)(fin, &pass);
 			if (f != NULL)
 				fr = f;
 			else
 				continue;
 			break;
 		    }
 
 		case FR_T_IPFEXPR :
 		case FR_T_IPFEXPR_BUILTIN :
 			if (fin->fin_family != fr->fr_family)
 				continue;
 			if (ipf_fr_matcharray(fin, fr->fr_data) == 0)
 				continue;
 			break;
 
 		default :
 			break;
 		}
 
 		if ((fin->fin_out == 0) && (fr->fr_nattag.ipt_num[0] != 0)) {
 			if (fin->fin_nattag == NULL)
 				continue;
 			if (ipf_matchtag(&fr->fr_nattag, fin->fin_nattag) == 0)
 				continue;
 		}
 		FR_VERBOSE(("=%d/%d.%d *", fr->fr_grhead, fr->fr_group, rulen));
 
 		passt = fr->fr_flags;
 
 		/*
 		 * If the rule is a "call now" rule, then call the function
 		 * in the rule, if it exists and use the results from that.
 		 * If the function pointer is bad, just make like we ignore
 		 * it, except for increasing the hit counter.
 		 */
 		if ((passt & FR_CALLNOW) != 0) {
 			frentry_t *frs;
 
 			ATOMIC_INC64(fr->fr_hits);
 			if ((fr->fr_func == NULL) ||
 			    (fr->fr_func == (ipfunc_t)-1))
 				continue;
 
 			frs = fin->fin_fr;
 			fin->fin_fr = fr;
 			fr = (*fr->fr_func)(fin, &passt);
 			if (fr == NULL) {
 				fin->fin_fr = frs;
 				continue;
 			}
 			passt = fr->fr_flags;
 		}
 		fin->fin_fr = fr;
 
 #ifdef  IPFILTER_LOG
 		/*
 		 * Just log this packet...
 		 */
 		if ((passt & FR_LOGMASK) == FR_LOG) {
 			if (ipf_log_pkt(fin, passt) == -1) {
 				if (passt & FR_LOGORBLOCK) {
 					DT(frb_logfail);
 					passt &= ~FR_CMDMASK;
 					passt |= FR_BLOCK|FR_QUICK;
 					fin->fin_reason = FRB_LOGFAIL;
 				}
 			}
 		}
 #endif /* IPFILTER_LOG */
 
 		MUTEX_ENTER(&fr->fr_lock);
 		fr->fr_bytes += (U_QUAD_T)fin->fin_plen;
 		fr->fr_hits++;
 		MUTEX_EXIT(&fr->fr_lock);
 		fin->fin_rule = rulen;
 
 		passo = pass;
 		if (FR_ISSKIP(passt)) {
 			skip = fr->fr_arg;
 			continue;
 		} else if (((passt & FR_LOGMASK) != FR_LOG) &&
 			   ((passt & FR_LOGMASK) != FR_DECAPSULATE)) {
 			pass = passt;
 		}
 
 		if (passt & (FR_RETICMP|FR_FAKEICMP))
 			fin->fin_icode = fr->fr_icode;
 
 		if (fr->fr_group != -1) {
 			(void) strncpy(fin->fin_group,
 				       FR_NAME(fr, fr_group),
 				       strlen(FR_NAME(fr, fr_group)));
 		} else {
 			fin->fin_group[0] = '\0';
 		}
 
 		FR_DEBUG(("pass %#x/%#x/%x\n", passo, pass, passt));
 
 		if (fr->fr_grphead != NULL) {
 			fin->fin_fr = fr->fr_grphead->fg_start;
 			FR_VERBOSE(("group %s\n", FR_NAME(fr, fr_grhead)));
 
 			if (FR_ISDECAPS(passt))
 				passt = ipf_decaps(fin, pass, fr->fr_icode);
 			else
 				passt = ipf_scanlist(fin, pass);
 
 			if (fin->fin_fr == NULL) {
 				fin->fin_rule = rulen;
 				if (fr->fr_group != -1)
 					(void) strncpy(fin->fin_group,
 						       fr->fr_names +
 						       fr->fr_group,
 						       strlen(fr->fr_names +
 							      fr->fr_group));
 				fin->fin_fr = fr;
 				passt = pass;
 			}
 			pass = passt;
 		}
 
 		if (pass & FR_QUICK) {
 			/*
 			 * Finally, if we've asked to track state for this
 			 * packet, set it up.  Add state for "quick" rules
 			 * here so that if the action fails we can consider
 			 * the rule to "not match" and keep on processing
 			 * filter rules.
 			 */
 			if ((pass & FR_KEEPSTATE) && !FR_ISAUTH(pass) &&
 			    !(fin->fin_flx & FI_STATE)) {
 				int out = fin->fin_out;
 
 				fin->fin_fr = fr;
 				if (ipf_state_add(softc, fin, NULL, 0) == 0) {
 					LBUMPD(ipf_stats[out], fr_ads);
 				} else {
 					LBUMPD(ipf_stats[out], fr_bads);
 					pass = passo;
 					continue;
 				}
 			}
 			break;
 		}
 	}
 	fin->fin_depth--;
 	return pass;
 }
 
 
 /* ------------------------------------------------------------------------ */
 /* Function:    ipf_acctpkt                                                 */
 /* Returns:     frentry_t* - always returns NULL                            */
 /* Parameters:  fin(I) - pointer to packet information                      */
 /*              passp(IO) - pointer to current/new filter decision (unused) */
 /*                                                                          */
 /* Checks a packet against accounting rules, if there are any for the given */
 /* IP protocol version.                                                     */
 /*                                                                          */
 /* N.B.: this function returns NULL to match the prototype used by other    */
 /* functions called from the IPFilter "mainline" in ipf_check().            */
 /* ------------------------------------------------------------------------ */
 frentry_t *
 ipf_acctpkt(fin, passp)
 	fr_info_t *fin;
 	u_32_t *passp;
 {
 	ipf_main_softc_t *softc = fin->fin_main_soft;
 	char group[FR_GROUPLEN];
 	frentry_t *fr, *frsave;
 	u_32_t pass, rulen;
 
 	passp = passp;
 	fr = softc->ipf_acct[fin->fin_out][softc->ipf_active];
 
 	if (fr != NULL) {
 		frsave = fin->fin_fr;
 		bcopy(fin->fin_group, group, FR_GROUPLEN);
 		rulen = fin->fin_rule;
 		fin->fin_fr = fr;
 		pass = ipf_scanlist(fin, FR_NOMATCH);
 		if (FR_ISACCOUNT(pass)) {
 			LBUMPD(ipf_stats[0], fr_acct);
 		}
 		fin->fin_fr = frsave;
 		bcopy(group, fin->fin_group, FR_GROUPLEN);
 		fin->fin_rule = rulen;
 	}
 	return NULL;
 }
 
 
 /* ------------------------------------------------------------------------ */
 /* Function:    ipf_firewall                                                */
 /* Returns:     frentry_t* - returns pointer to matched rule, if no matches */
 /*                           were found, returns NULL.                      */
 /* Parameters:  fin(I) - pointer to packet information                      */
 /*              passp(IO) - pointer to current/new filter decision (unused) */
 /*                                                                          */
 /* Applies an appropriate set of firewall rules to the packet, to see if    */
 /* there are any matches.  The first check is to see if a match can be seen */
 /* in the cache.  If not, then search an appropriate list of rules.  Once a */
 /* matching rule is found, take any appropriate actions as defined by the   */
 /* rule - except logging.                                                   */
 /* ------------------------------------------------------------------------ */
 static frentry_t *
 ipf_firewall(fin, passp)
 	fr_info_t *fin;
 	u_32_t *passp;
 {
 	ipf_main_softc_t *softc = fin->fin_main_soft;
 	frentry_t *fr;
 	u_32_t pass;
 	int out;
 
 	out = fin->fin_out;
 	pass = *passp;
 
 	/*
 	 * This rule cache will only affect packets that are not being
 	 * statefully filtered.
 	 */
 	fin->fin_fr = softc->ipf_rules[out][softc->ipf_active];
 	if (fin->fin_fr != NULL)
 		pass = ipf_scanlist(fin, softc->ipf_pass);
 
 	if ((pass & FR_NOMATCH)) {
 		LBUMPD(ipf_stats[out], fr_nom);
 	}
 	fr = fin->fin_fr;
 
 	/*
 	 * Apply packets per second rate-limiting to a rule as required.
 	 */
 	if ((fr != NULL) && (fr->fr_pps != 0) &&
 	    !ppsratecheck(&fr->fr_lastpkt, &fr->fr_curpps, fr->fr_pps)) {
 		DT2(frb_ppsrate, fr_info_t *, fin, frentry_t *, fr);
 		pass &= ~(FR_CMDMASK|FR_RETICMP|FR_RETRST);
 		pass |= FR_BLOCK;
 		LBUMPD(ipf_stats[out], fr_ppshit);
 		fin->fin_reason = FRB_PPSRATE;
 	}
 
 	/*
 	 * If we fail to add a packet to the authorization queue, then we
 	 * drop the packet later.  However, if it was added then pretend
 	 * we've dropped it already.
 	 */
 	if (FR_ISAUTH(pass)) {
 		if (ipf_auth_new(fin->fin_m, fin) != 0) {
 			DT1(frb_authnew, fr_info_t *, fin);
 			fin->fin_m = *fin->fin_mp = NULL;
 			fin->fin_reason = FRB_AUTHNEW;
 			fin->fin_error = 0;
 		} else {
 			IPFERROR(1);
 			fin->fin_error = ENOSPC;
 		}
 	}
 
 	if ((fr != NULL) && (fr->fr_func != NULL) &&
 	    (fr->fr_func != (ipfunc_t)-1) && !(pass & FR_CALLNOW))
 		(void) (*fr->fr_func)(fin, &pass);
 
 	/*
 	 * If a rule is a pre-auth rule, check again in the list of rules
 	 * loaded for authenticated use.  It does not particulary matter
 	 * if this search fails because a "preauth" result, from a rule,
 	 * is treated as "not a pass", hence the packet is blocked.
 	 */
 	if (FR_ISPREAUTH(pass)) {
 		pass = ipf_auth_pre_scanlist(softc, fin, pass);
 	}
 
 	/*
 	 * If the rule has "keep frag" and the packet is actually a fragment,
 	 * then create a fragment state entry.
 	 */
 	if ((pass & (FR_KEEPFRAG|FR_KEEPSTATE)) == FR_KEEPFRAG) {
 		if (fin->fin_flx & FI_FRAG) {
 			if (ipf_frag_new(softc, fin, pass) == -1) {
 				LBUMP(ipf_stats[out].fr_bnfr);
 			} else {
 				LBUMP(ipf_stats[out].fr_nfr);
 			}
 		} else {
 			LBUMP(ipf_stats[out].fr_cfr);
 		}
 	}
 
 	fr = fin->fin_fr;
 	*passp = pass;
 
 	return fr;
 }
 
 
 /* ------------------------------------------------------------------------ */
 /* Function:    ipf_check                                                   */
 /* Returns:     int -  0 == packet allowed through,                         */
 /*              User space:                                                 */
 /*                    -1 == packet blocked                                  */
 /*                     1 == packet not matched                              */
 /*                    -2 == requires authentication                         */
 /*              Kernel:                                                     */
 /*                   > 0 == filter error # for packet                       */
 /* Parameters: ip(I)   - pointer to start of IPv4/6 packet                  */
 /*             hlen(I) - length of header                                   */
 /*             ifp(I)  - pointer to interface this packet is on             */
 /*             out(I)  - 0 == packet going in, 1 == packet going out        */
 /*             mp(IO)  - pointer to caller's buffer pointer that holds this */
 /*                       IP packet.                                         */
 /* Solaris & HP-UX ONLY :                                                   */
 /*             qpi(I)  - pointer to STREAMS queue information for this      */
 /*                       interface & direction.                             */
 /*                                                                          */
 /* ipf_check() is the master function for all IPFilter packet processing.   */
 /* It orchestrates: Network Address Translation (NAT), checking for packet  */
 /* authorisation (or pre-authorisation), presence of related state info.,   */
 /* generating log entries, IP packet accounting, routing of packets as      */
 /* directed by firewall rules and of course whether or not to allow the     */
 /* packet to be further processed by the kernel.                            */
 /*                                                                          */
 /* For packets blocked, the contents of "mp" will be NULL'd and the buffer  */
 /* freed.  Packets passed may be returned with the pointer pointed to by    */
 /* by "mp" changed to a new buffer.                                         */
 /* ------------------------------------------------------------------------ */
 int
 ipf_check(ctx, ip, hlen, ifp, out
 #if defined(_KERNEL) && defined(MENTAT)
 	, qif, mp)
 	void *qif;
 #else
 	, mp)
 #endif
 	mb_t **mp;
 	ip_t *ip;
 	int hlen;
 	void *ifp;
 	int out;
 	void *ctx;
 {
 	/*
 	 * The above really sucks, but short of writing a diff
 	 */
 	ipf_main_softc_t *softc = ctx;
 	fr_info_t frinfo;
 	fr_info_t *fin = &frinfo;
 	u_32_t pass = softc->ipf_pass;
 	frentry_t *fr = NULL;
 	int v = IP_V(ip);
 	mb_t *mc = NULL;
 	mb_t *m;
 	/*
 	 * The first part of ipf_check() deals with making sure that what goes
 	 * into the filtering engine makes some sense.  Information about the
 	 * the packet is distilled, collected into a fr_info_t structure and
 	 * the an attempt to ensure the buffer the packet is in is big enough
 	 * to hold all the required packet headers.
 	 */
 #ifdef	_KERNEL
 # ifdef MENTAT
 	qpktinfo_t *qpi = qif;
 
 #  ifdef __sparc
 	if ((u_int)ip & 0x3)
 		return 2;
 #  endif
 # else
 	SPL_INT(s);
 # endif
 
 	if (softc->ipf_running <= 0) {
 		return 0;
 	}
 
 	bzero((char *)fin, sizeof(*fin));
 
 # ifdef MENTAT
 	if (qpi->qpi_flags & QF_BROADCAST)
 		fin->fin_flx |= FI_MBCAST|FI_BROADCAST;
 	if (qpi->qpi_flags & QF_MULTICAST)
 		fin->fin_flx |= FI_MBCAST|FI_MULTICAST;
 	m = qpi->qpi_m;
 	fin->fin_qfm = m;
 	fin->fin_qpi = qpi;
 # else /* MENTAT */
 
 	m = *mp;
 
 #  if defined(M_MCAST)
 	if ((m->m_flags & M_MCAST) != 0)
 		fin->fin_flx |= FI_MBCAST|FI_MULTICAST;
 #  endif
 #  if defined(M_MLOOP)
 	if ((m->m_flags & M_MLOOP) != 0)
 		fin->fin_flx |= FI_MBCAST|FI_MULTICAST;
 #  endif
 #  if defined(M_BCAST)
 	if ((m->m_flags & M_BCAST) != 0)
 		fin->fin_flx |= FI_MBCAST|FI_BROADCAST;
 #  endif
 #  ifdef M_CANFASTFWD
 	/*
 	 * XXX For now, IP Filter and fast-forwarding of cached flows
 	 * XXX are mutually exclusive.  Eventually, IP Filter should
 	 * XXX get a "can-fast-forward" filter rule.
 	 */
 	m->m_flags &= ~M_CANFASTFWD;
 #  endif /* M_CANFASTFWD */
 #  if defined(CSUM_DELAY_DATA) && (!defined(__FreeBSD_version) || \
 				   (__FreeBSD_version < 501108))
 	/*
 	 * disable delayed checksums.
 	 */
 	if (m->m_pkthdr.csum_flags & CSUM_DELAY_DATA) {
 		in_delayed_cksum(m);
 		m->m_pkthdr.csum_flags &= ~CSUM_DELAY_DATA;
 	}
 #  endif /* CSUM_DELAY_DATA */
 # endif /* MENTAT */
 #else
 	bzero((char *)fin, sizeof(*fin));
 	m = *mp;
 # if defined(M_MCAST)
 	if ((m->m_flags & M_MCAST) != 0)
 		fin->fin_flx |= FI_MBCAST|FI_MULTICAST;
 # endif
 # if defined(M_MLOOP)
 	if ((m->m_flags & M_MLOOP) != 0)
 		fin->fin_flx |= FI_MBCAST|FI_MULTICAST;
 # endif
 # if defined(M_BCAST)
 	if ((m->m_flags & M_BCAST) != 0)
 		fin->fin_flx |= FI_MBCAST|FI_BROADCAST;
 # endif
 #endif /* _KERNEL */
 
 	fin->fin_v = v;
 	fin->fin_m = m;
 	fin->fin_ip = ip;
 	fin->fin_mp = mp;
 	fin->fin_out = out;
 	fin->fin_ifp = ifp;
 	fin->fin_error = ENETUNREACH;
 	fin->fin_hlen = (u_short)hlen;
 	fin->fin_dp = (char *)ip + hlen;
 	fin->fin_main_soft = softc;
 
 	fin->fin_ipoff = (char *)ip - MTOD(m, char *);
 
 	SPL_NET(s);
 
 #ifdef	USE_INET6
 	if (v == 6) {
 		LBUMP(ipf_stats[out].fr_ipv6);
 		/*
 		 * Jumbo grams are quite likely too big for internal buffer
 		 * structures to handle comfortably, for now, so just drop
 		 * them.
 		 */
 		if (((ip6_t *)ip)->ip6_plen == 0) {
 			DT1(frb_jumbo, ip6_t *, (ip6_t *)ip);
 			pass = FR_BLOCK|FR_NOMATCH;
 			fin->fin_reason = FRB_JUMBO;
 			goto finished;
 		}
 		fin->fin_family = AF_INET6;
 	} else
 #endif
 	{
 		fin->fin_family = AF_INET;
 	}
 
 	if (ipf_makefrip(hlen, ip, fin) == -1) {
 		DT1(frb_makefrip, fr_info_t *, fin);
 		pass = FR_BLOCK|FR_NOMATCH;
 		fin->fin_reason = FRB_MAKEFRIP;
 		goto finished;
 	}
 
 	/*
 	 * For at least IPv6 packets, if a m_pullup() fails then this pointer
 	 * becomes NULL and so we have no packet to free.
 	 */
 	if (*fin->fin_mp == NULL)
 		goto finished;
 
 	if (!out) {
 		if (v == 4) {
 			if (softc->ipf_chksrc && !ipf_verifysrc(fin)) {
 				LBUMPD(ipf_stats[0], fr_v4_badsrc);
 				fin->fin_flx |= FI_BADSRC;
 			}
 			if (fin->fin_ip->ip_ttl < softc->ipf_minttl) {
 				LBUMPD(ipf_stats[0], fr_v4_badttl);
 				fin->fin_flx |= FI_LOWTTL;
 			}
 		}
 #ifdef USE_INET6
 		else  if (v == 6) {
 			if (((ip6_t *)ip)->ip6_hlim < softc->ipf_minttl) {
 				LBUMPD(ipf_stats[0], fr_v6_badttl);
 				fin->fin_flx |= FI_LOWTTL;
 			}
 		}
 #endif
 	}
 
 	if (fin->fin_flx & FI_SHORT) {
 		LBUMPD(ipf_stats[out], fr_short);
 	}
 
 	READ_ENTER(&softc->ipf_mutex);
 
 	if (!out) {
 		switch (fin->fin_v)
 		{
 		case 4 :
 			if (ipf_nat_checkin(fin, &pass) == -1) {
 				goto filterdone;
 			}
 			break;
 #ifdef USE_INET6
 		case 6 :
 			if (ipf_nat6_checkin(fin, &pass) == -1) {
 				goto filterdone;
 			}
 			break;
 #endif
 		default :
 			break;
 		}
 	}
 	/*
 	 * Check auth now.
 	 * If a packet is found in the auth table, then skip checking
 	 * the access lists for permission but we do need to consider
 	 * the result as if it were from the ACL's.  In addition, being
 	 * found in the auth table means it has been seen before, so do
 	 * not pass it through accounting (again), lest it be counted twice.
 	 */
 	fr = ipf_auth_check(fin, &pass);
 	if (!out && (fr == NULL))
 		(void) ipf_acctpkt(fin, NULL);
 
 	if (fr == NULL) {
 		if ((fin->fin_flx & FI_FRAG) != 0)
 			fr = ipf_frag_known(fin, &pass);
 
 		if (fr == NULL)
 			fr = ipf_state_check(fin, &pass);
 	}
 
 	if ((pass & FR_NOMATCH) || (fr == NULL))
 		fr = ipf_firewall(fin, &pass);
 
 	/*
 	 * If we've asked to track state for this packet, set it up.
 	 * Here rather than ipf_firewall because ipf_checkauth may decide
 	 * to return a packet for "keep state"
 	 */
 	if ((pass & FR_KEEPSTATE) && (fin->fin_m != NULL) &&
 	    !(fin->fin_flx & FI_STATE)) {
 		if (ipf_state_add(softc, fin, NULL, 0) == 0) {
 			LBUMP(ipf_stats[out].fr_ads);
 		} else {
 			LBUMP(ipf_stats[out].fr_bads);
 			if (FR_ISPASS(pass)) {
 				DT(frb_stateadd);
 				pass &= ~FR_CMDMASK;
 				pass |= FR_BLOCK;
 				fin->fin_reason = FRB_STATEADD;
 			}
 		}
 	}
 
 	fin->fin_fr = fr;
 	if ((fr != NULL) && !(fin->fin_flx & FI_STATE)) {
 		fin->fin_dif = &fr->fr_dif;
 		fin->fin_tif = &fr->fr_tifs[fin->fin_rev];
 	}
 
 	/*
 	 * Only count/translate packets which will be passed on, out the
 	 * interface.
 	 */
 	if (out && FR_ISPASS(pass)) {
 		(void) ipf_acctpkt(fin, NULL);
 
 		switch (fin->fin_v)
 		{
 		case 4 :
 			if (ipf_nat_checkout(fin, &pass) == -1) {
 				;
 			} else if ((softc->ipf_update_ipid != 0) && (v == 4)) {
 				if (ipf_updateipid(fin) == -1) {
 					DT(frb_updateipid);
 					LBUMP(ipf_stats[1].fr_ipud);
 					pass &= ~FR_CMDMASK;
 					pass |= FR_BLOCK;
 					fin->fin_reason = FRB_UPDATEIPID;
 				} else {
 					LBUMP(ipf_stats[0].fr_ipud);
 				}
 			}
 			break;
 #ifdef USE_INET6
 		case 6 :
 			(void) ipf_nat6_checkout(fin, &pass);
 			break;
 #endif
 		default :
 			break;
 		}
 	}
 
 filterdone:
 #ifdef	IPFILTER_LOG
 	if ((softc->ipf_flags & FF_LOGGING) || (pass & FR_LOGMASK)) {
 		(void) ipf_dolog(fin, &pass);
 	}
 #endif
 
 	/*
 	 * The FI_STATE flag is cleared here so that calling ipf_state_check
 	 * will work when called from inside of fr_fastroute.  Although
 	 * there is a similar flag, FI_NATED, for NAT, it does have the same
 	 * impact on code execution.
 	 */
 	fin->fin_flx &= ~FI_STATE;
 
 #if defined(FASTROUTE_RECURSION)
 	/*
 	 * Up the reference on fr_lock and exit ipf_mutex. The generation of
 	 * a packet below can sometimes cause a recursive call into IPFilter.
 	 * On those platforms where that does happen, we need to hang onto
 	 * the filter rule just in case someone decides to remove or flush it
 	 * in the meantime.
 	 */
 	if (fr != NULL) {
 		MUTEX_ENTER(&fr->fr_lock);
 		fr->fr_ref++;
 		MUTEX_EXIT(&fr->fr_lock);
 	}
 
 	RWLOCK_EXIT(&softc->ipf_mutex);
 #endif
 
 	if ((pass & FR_RETMASK) != 0) {
 		/*
 		 * Should we return an ICMP packet to indicate error
 		 * status passing through the packet filter ?
 		 * WARNING: ICMP error packets AND TCP RST packets should
 		 * ONLY be sent in repsonse to incoming packets.  Sending
 		 * them in response to outbound packets can result in a
 		 * panic on some operating systems.
 		 */
 		if (!out) {
 			if (pass & FR_RETICMP) {
 				int dst;
 
 				if ((pass & FR_RETMASK) == FR_FAKEICMP)
 					dst = 1;
 				else
 					dst = 0;
 				(void) ipf_send_icmp_err(ICMP_UNREACH, fin,
 							 dst);
 				LBUMP(ipf_stats[0].fr_ret);
 			} else if (((pass & FR_RETMASK) == FR_RETRST) &&
 				   !(fin->fin_flx & FI_SHORT)) {
 				if (((fin->fin_flx & FI_OOW) != 0) ||
 				    (ipf_send_reset(fin) == 0)) {
 					LBUMP(ipf_stats[1].fr_ret);
 				}
 			}
 
 			/*
 			 * When using return-* with auth rules, the auth code
 			 * takes over disposing of this packet.
 			 */
 			if (FR_ISAUTH(pass) && (fin->fin_m != NULL)) {
 				DT1(frb_authcapture, fr_info_t *, fin);
 				fin->fin_m = *fin->fin_mp = NULL;
 				fin->fin_reason = FRB_AUTHCAPTURE;
 				m = NULL;
 			}
 		} else {
 			if (pass & FR_RETRST) {
 				fin->fin_error = ECONNRESET;
 			}
 		}
 	}
 
 	/*
 	 * After the above so that ICMP unreachables and TCP RSTs get
 	 * created properly.
 	 */
 	if (FR_ISBLOCK(pass) && (fin->fin_flx & FI_NEWNAT))
 		ipf_nat_uncreate(fin);
 
 	/*
 	 * If we didn't drop off the bottom of the list of rules (and thus
 	 * the 'current' rule fr is not NULL), then we may have some extra
 	 * instructions about what to do with a packet.
 	 * Once we're finished return to our caller, freeing the packet if
 	 * we are dropping it.
 	 */
 	if (fr != NULL) {
 		frdest_t *fdp;
 
 		/*
 		 * Generate a duplicated packet first because ipf_fastroute
 		 * can lead to fin_m being free'd... not good.
 		 */
 		fdp = fin->fin_dif;
 		if ((fdp != NULL) && (fdp->fd_ptr != NULL) &&
 		    (fdp->fd_ptr != (void *)-1)) {
 			mc = M_COPY(fin->fin_m);
 			if (mc != NULL)
 				ipf_fastroute(mc, &mc, fin, fdp);
 		}
 
 		fdp = fin->fin_tif;
 		if (!out && (pass & FR_FASTROUTE)) {
 			/*
 			 * For fastroute rule, no destination interface defined
 			 * so pass NULL as the frdest_t parameter
 			 */
 			(void) ipf_fastroute(fin->fin_m, mp, fin, NULL);
 			m = *mp = NULL;
 		} else if ((fdp != NULL) && (fdp->fd_ptr != NULL) &&
 			   (fdp->fd_ptr != (struct ifnet *)-1)) {
 			/* this is for to rules: */
 			ipf_fastroute(fin->fin_m, mp, fin, fdp);
 			m = *mp = NULL;
 		}
 
 #if defined(FASTROUTE_RECURSION)
 		(void) ipf_derefrule(softc, &fr);
 #endif
 	}
 #if !defined(FASTROUTE_RECURSION)
 	RWLOCK_EXIT(&softc->ipf_mutex);
 #endif
 
 finished:
 	if (!FR_ISPASS(pass)) {
 		LBUMP(ipf_stats[out].fr_block);
 		if (*mp != NULL) {
 #ifdef _KERNEL
 			FREE_MB_T(*mp);
 #endif
 			m = *mp = NULL;
 		}
 	} else {
 		LBUMP(ipf_stats[out].fr_pass);
 #if defined(_KERNEL) && defined(__sgi)
 		if ((fin->fin_hbuf != NULL) &&
 		    (mtod(fin->fin_m, struct ip *) != fin->fin_ip)) {
 			COPYBACK(fin->fin_m, 0, fin->fin_plen, fin->fin_hbuf);
 		}
 #endif
 	}
 
 	SPL_X(s);
 
 #ifdef _KERNEL
 	if (FR_ISPASS(pass))
 		return 0;
 	LBUMP(ipf_stats[out].fr_blocked[fin->fin_reason]);
 	return fin->fin_error;
 #else /* _KERNEL */
 	if (*mp != NULL)
 		(*mp)->mb_ifp = fin->fin_ifp;
 	blockreason = fin->fin_reason;
 	FR_VERBOSE(("fin_flx %#x pass %#x ", fin->fin_flx, pass));
 	/*if ((pass & FR_CMDMASK) == (softc->ipf_pass & FR_CMDMASK))*/
 		if ((pass & FR_NOMATCH) != 0)
 			return 1;
 
 	if ((pass & FR_RETMASK) != 0)
 		switch (pass & FR_RETMASK)
 		{
 		case FR_RETRST :
 			return 3;
 		case FR_RETICMP :
 			return 4;
 		case FR_FAKEICMP :
 			return 5;
 		}
 
 	switch (pass & FR_CMDMASK)
 	{
 	case FR_PASS :
 		return 0;
 	case FR_BLOCK :
 		return -1;
 	case FR_AUTH :
 		return -2;
 	case FR_ACCOUNT :
 		return -3;
 	case FR_PREAUTH :
 		return -4;
 	}
 	return 2;
 #endif /* _KERNEL */
 }
 
 
 #ifdef	IPFILTER_LOG
 /* ------------------------------------------------------------------------ */
 /* Function:    ipf_dolog                                                   */
 /* Returns:     frentry_t* - returns contents of fin_fr (no change made)    */
 /* Parameters:  fin(I) - pointer to packet information                      */
 /*              passp(IO) - pointer to current/new filter decision (unused) */
 /*                                                                          */
 /* Checks flags set to see how a packet should be logged, if it is to be    */
 /* logged.  Adjust statistics based on its success or not.                  */
 /* ------------------------------------------------------------------------ */
 frentry_t *
 ipf_dolog(fin, passp)
 	fr_info_t *fin;
 	u_32_t *passp;
 {
 	ipf_main_softc_t *softc = fin->fin_main_soft;
 	u_32_t pass;
 	int out;
 
 	out = fin->fin_out;
 	pass = *passp;
 
 	if ((softc->ipf_flags & FF_LOGNOMATCH) && (pass & FR_NOMATCH)) {
 		pass |= FF_LOGNOMATCH;
 		LBUMPD(ipf_stats[out], fr_npkl);
 		goto logit;
 
 	} else if (((pass & FR_LOGMASK) == FR_LOGP) ||
 	    (FR_ISPASS(pass) && (softc->ipf_flags & FF_LOGPASS))) {
 		if ((pass & FR_LOGMASK) != FR_LOGP)
 			pass |= FF_LOGPASS;
 		LBUMPD(ipf_stats[out], fr_ppkl);
 		goto logit;
 
 	} else if (((pass & FR_LOGMASK) == FR_LOGB) ||
 		   (FR_ISBLOCK(pass) && (softc->ipf_flags & FF_LOGBLOCK))) {
 		if ((pass & FR_LOGMASK) != FR_LOGB)
 			pass |= FF_LOGBLOCK;
 		LBUMPD(ipf_stats[out], fr_bpkl);
 
 logit:
 		if (ipf_log_pkt(fin, pass) == -1) {
 			/*
 			 * If the "or-block" option has been used then
 			 * block the packet if we failed to log it.
 			 */
 			if ((pass & FR_LOGORBLOCK) && FR_ISPASS(pass)) {
 				DT1(frb_logfail2, u_int, pass);
 				pass &= ~FR_CMDMASK;
 				pass |= FR_BLOCK;
 				fin->fin_reason = FRB_LOGFAIL2;
 			}
 		}
 		*passp = pass;
 	}
 
 	return fin->fin_fr;
 }
 #endif /* IPFILTER_LOG */
 
 
 /* ------------------------------------------------------------------------ */
 /* Function:    ipf_cksum                                                   */
 /* Returns:     u_short - IP header checksum                                */
 /* Parameters:  addr(I) - pointer to start of buffer to checksum            */
 /*              len(I)  - length of buffer in bytes                         */
 /*                                                                          */
 /* Calculate the two's complement 16 bit checksum of the buffer passed.     */
 /*                                                                          */
 /* N.B.: addr should be 16bit aligned.                                      */
 /* ------------------------------------------------------------------------ */
 u_short
 ipf_cksum(addr, len)
 	u_short *addr;
 	int len;
 {
 	u_32_t sum = 0;
 
 	for (sum = 0; len > 1; len -= 2)
 		sum += *addr++;
 
 	/* mop up an odd byte, if necessary */
 	if (len == 1)
 		sum += *(u_char *)addr;
 
 	/*
 	 * add back carry outs from top 16 bits to low 16 bits
 	 */
 	sum = (sum >> 16) + (sum & 0xffff);	/* add hi 16 to low 16 */
 	sum += (sum >> 16);			/* add carry */
 	return (u_short)(~sum);
 }
 
 
 /* ------------------------------------------------------------------------ */
 /* Function:    fr_cksum                                                    */
 /* Returns:     u_short - layer 4 checksum                                  */
 /* Parameters:  fin(I)     - pointer to packet information                  */
 /*              ip(I)      - pointer to IP header                           */
 /*              l4proto(I) - protocol to caclulate checksum for             */
 /*              l4hdr(I)   - pointer to layer 4 header                      */
 /*                                                                          */
 /* Calculates the TCP checksum for the packet held in "m", using the data   */
 /* in the IP header "ip" to seed it.                                        */
 /*                                                                          */
 /* NB: This function assumes we've pullup'd enough for all of the IP header */
 /* and the TCP header.  We also assume that data blocks aren't allocated in */
 /* odd sizes.                                                               */
 /*                                                                          */
 /* Expects ip_len and ip_off to be in network byte order when called.       */
 /* ------------------------------------------------------------------------ */
 u_short
 fr_cksum(fin, ip, l4proto, l4hdr)
 	fr_info_t *fin;
 	ip_t *ip;
 	int l4proto;
 	void *l4hdr;
 {
 	u_short *sp, slen, sumsave, *csump;
 	u_int sum, sum2;
 	int hlen;
 	int off;
 #ifdef	USE_INET6
 	ip6_t *ip6;
 #endif
 
 	csump = NULL;
 	sumsave = 0;
 	sp = NULL;
 	slen = 0;
 	hlen = 0;
 	sum = 0;
 
 	sum = htons((u_short)l4proto);
 	/*
 	 * Add up IP Header portion
 	 */
 #ifdef	USE_INET6
 	if (IP_V(ip) == 4) {
 #endif
 		hlen = IP_HL(ip) << 2;
 		off = hlen;
 		sp = (u_short *)&ip->ip_src;
 		sum += *sp++;	/* ip_src */
 		sum += *sp++;
 		sum += *sp++;	/* ip_dst */
 		sum += *sp++;
 #ifdef	USE_INET6
 	} else if (IP_V(ip) == 6) {
 		ip6 = (ip6_t *)ip;
 		hlen = sizeof(*ip6);
 		off = ((char *)fin->fin_dp - (char *)fin->fin_ip);
 		sp = (u_short *)&ip6->ip6_src;
 		sum += *sp++;	/* ip6_src */
 		sum += *sp++;
 		sum += *sp++;
 		sum += *sp++;
 		sum += *sp++;
 		sum += *sp++;
 		sum += *sp++;
 		sum += *sp++;
 		/* This needs to be routing header aware. */
 		sum += *sp++;	/* ip6_dst */
 		sum += *sp++;
 		sum += *sp++;
 		sum += *sp++;
 		sum += *sp++;
 		sum += *sp++;
 		sum += *sp++;
 		sum += *sp++;
 	} else {
 		return 0xffff;
 	}
 #endif
 	slen = fin->fin_plen - off;
 	sum += htons(slen);
 
 	switch (l4proto)
 	{
 	case IPPROTO_UDP :
 		csump = &((udphdr_t *)l4hdr)->uh_sum;
 		break;
 
 	case IPPROTO_TCP :
 		csump = &((tcphdr_t *)l4hdr)->th_sum;
 		break;
 	case IPPROTO_ICMP :
 		csump = &((icmphdr_t *)l4hdr)->icmp_cksum;
 		sum = 0;	/* Pseudo-checksum is not included */
 		break;
 #ifdef USE_INET6
 	case IPPROTO_ICMPV6 :
 		csump = &((struct icmp6_hdr *)l4hdr)->icmp6_cksum;
 		break;
 #endif
 	default :
 		break;
 	}
 
 	if (csump != NULL) {
 		sumsave = *csump;
 		*csump = 0;
 	}
 
 	sum2 = ipf_pcksum(fin, off, sum);
 	if (csump != NULL)
 		*csump = sumsave;
 	return sum2;
 }
 
 
 /* ------------------------------------------------------------------------ */
 /* Function:    ipf_findgroup                                               */
 /* Returns:     frgroup_t * - NULL = group not found, else pointer to group */
 /* Parameters:  softc(I) - pointer to soft context main structure           */
 /*              group(I) - group name to search for                         */
 /*              unit(I)  - device to which this group belongs               */
 /*              set(I)   - which set of rules (inactive/inactive) this is   */
 /*              fgpp(O)  - pointer to place to store pointer to the pointer */
 /*                         to where to add the next (last) group or where   */
 /*                         to delete group from.                            */
 /*                                                                          */
 /* Search amongst the defined groups for a particular group number.         */
 /* ------------------------------------------------------------------------ */
 frgroup_t *
 ipf_findgroup(softc, group, unit, set, fgpp)
 	ipf_main_softc_t *softc;
 	char *group;
 	minor_t unit;
 	int set;
 	frgroup_t ***fgpp;
 {
 	frgroup_t *fg, **fgp;
 
 	/*
 	 * Which list of groups to search in is dependent on which list of
 	 * rules are being operated on.
 	 */
 	fgp = &softc->ipf_groups[unit][set];
 
 	while ((fg = *fgp) != NULL) {
 		if (strncmp(group, fg->fg_name, FR_GROUPLEN) == 0)
 			break;
 		else
 			fgp = &fg->fg_next;
 	}
 	if (fgpp != NULL)
 		*fgpp = fgp;
 	return fg;
 }
 
 
 /* ------------------------------------------------------------------------ */
 /* Function:    ipf_group_add                                               */
 /* Returns:     frgroup_t * - NULL == did not create group,                 */
 /*                            != NULL == pointer to the group               */
 /* Parameters:  softc(I) - pointer to soft context main structure           */
 /*              num(I)   - group number to add                              */
 /*              head(I)  - rule pointer that is using this as the head      */
 /*              flags(I) - rule flags which describe the type of rule it is */
 /*              unit(I)  - device to which this group will belong to        */
 /*              set(I)   - which set of rules (inactive/inactive) this is   */
 /* Write Locks: ipf_mutex                                                   */
 /*                                                                          */
 /* Add a new group head, or if it already exists, increase the reference    */
 /* count to it.                                                             */
 /* ------------------------------------------------------------------------ */
 frgroup_t *
 ipf_group_add(softc, group, head, flags, unit, set)
 	ipf_main_softc_t *softc;
 	char *group;
 	void *head;
 	u_32_t flags;
 	minor_t unit;
 	int set;
 {
 	frgroup_t *fg, **fgp;
 	u_32_t gflags;
 
 	if (group == NULL)
 		return NULL;
 
 	if (unit == IPL_LOGIPF && *group == '\0')
 		return NULL;
 
 	fgp = NULL;
 	gflags = flags & FR_INOUT;
 
 	fg = ipf_findgroup(softc, group, unit, set, &fgp);
 	if (fg != NULL) {
 		if (fg->fg_head == NULL && head != NULL)
 			fg->fg_head = head;
 		if (fg->fg_flags == 0)
 			fg->fg_flags = gflags;
 		else if (gflags != fg->fg_flags)
 			return NULL;
 		fg->fg_ref++;
 		return fg;
 	}
 
 	KMALLOC(fg, frgroup_t *);
 	if (fg != NULL) {
 		fg->fg_head = head;
 		fg->fg_start = NULL;
 		fg->fg_next = *fgp;
 		bcopy(group, fg->fg_name, strlen(group) + 1);
 		fg->fg_flags = gflags;
 		fg->fg_ref = 1;
 		fg->fg_set = &softc->ipf_groups[unit][set];
 		*fgp = fg;
 	}
 	return fg;
 }
 
 
 /* ------------------------------------------------------------------------ */
 /* Function:    ipf_group_del                                               */
 /* Returns:     int      - number of rules deleted                          */
 /* Parameters:  softc(I) - pointer to soft context main structure           */
 /*              group(I) - group name to delete                             */
 /*              fr(I)    - filter rule from which group is referenced       */
 /* Write Locks: ipf_mutex                                                   */
 /*                                                                          */
 /* This function is called whenever a reference to a group is to be dropped */
 /* and thus its reference count needs to be lowered and the group free'd if */
 /* the reference count reaches zero. Passing in fr is really for the sole   */
 /* purpose of knowing when the head rule is being deleted.                  */
 /* ------------------------------------------------------------------------ */
 void
 ipf_group_del(softc, group, fr)
 	ipf_main_softc_t *softc;
 	frgroup_t *group;
 	frentry_t *fr;
 {
 
 	if (group->fg_head == fr)
 		group->fg_head = NULL;
 
 	group->fg_ref--;
 	if ((group->fg_ref == 0) && (group->fg_start == NULL))
 		ipf_group_free(group);
 }
 
 
 /* ------------------------------------------------------------------------ */
 /* Function:    ipf_group_free                                              */
 /* Returns:     Nil                                                         */
 /* Parameters:  group(I) - pointer to filter rule group                     */
 /*                                                                          */
 /* Remove the group from the list of groups and free it.                    */
 /* ------------------------------------------------------------------------ */
 static void
 ipf_group_free(group)
 	frgroup_t *group;
 {
 	frgroup_t **gp;
 
 	for (gp = group->fg_set; *gp != NULL; gp = &(*gp)->fg_next) {
 		if (*gp == group) {
 			*gp = group->fg_next;
 			break;
 		}
 	}
 	KFREE(group);
 }
 
 
 /* ------------------------------------------------------------------------ */
 /* Function:    ipf_group_flush                                             */
 /* Returns:     int      - number of rules flush from group                 */
 /* Parameters:  softc(I) - pointer to soft context main structure           */
 /* Parameters:  group(I) - pointer to filter rule group                     */
 /*                                                                          */
 /* Remove all of the rules that currently are listed under the given group. */
 /* ------------------------------------------------------------------------ */
 static int
 ipf_group_flush(softc, group)
 	ipf_main_softc_t *softc;
 	frgroup_t *group;
 {
 	int gone = 0;
 
 	(void) ipf_flushlist(softc, &gone, &group->fg_start);
 
 	return gone;
 }
 
 
 /* ------------------------------------------------------------------------ */
 /* Function:    ipf_getrulen                                                */
 /* Returns:     frentry_t * - NULL == not found, else pointer to rule n     */
 /* Parameters:  softc(I) - pointer to soft context main structure           */
 /* Parameters:  unit(I)  - device for which to count the rule's number      */
 /*              flags(I) - which set of rules to find the rule in           */
 /*              group(I) - group name                                       */
 /*              n(I)     - rule number to find                              */
 /*                                                                          */
 /* Find rule # n in group # g and return a pointer to it.  Return NULl if   */
 /* group # g doesn't exist or there are less than n rules in the group.     */
 /* ------------------------------------------------------------------------ */
 frentry_t *
 ipf_getrulen(softc, unit, group, n)
 	ipf_main_softc_t *softc;
 	int unit;
 	char *group;
 	u_32_t n;
 {
 	frentry_t *fr;
 	frgroup_t *fg;
 
 	fg = ipf_findgroup(softc, group, unit, softc->ipf_active, NULL);
 	if (fg == NULL)
 		return NULL;
 	for (fr = fg->fg_start; fr && n; fr = fr->fr_next, n--)
 		;
 	if (n != 0)
 		return NULL;
 	return fr;
 }
 
 
 /* ------------------------------------------------------------------------ */
 /* Function:    ipf_flushlist                                               */
 /* Returns:     int - >= 0 - number of flushed rules                        */
 /* Parameters:  softc(I)   - pointer to soft context main structure         */
 /*              nfreedp(O) - pointer to int where flush count is stored     */
 /*              listp(I)   - pointer to list to flush pointer               */
 /* Write Locks: ipf_mutex                                                   */
 /*                                                                          */
 /* Recursively flush rules from the list, descending groups as they are     */
 /* encountered.  if a rule is the head of a group and it has lost all its   */
 /* group members, then also delete the group reference.  nfreedp is needed  */
 /* to store the accumulating count of rules removed, whereas the returned   */
 /* value is just the number removed from the current list.  The latter is   */
 /* needed to correctly adjust reference counts on rules that define groups. */
 /*                                                                          */
 /* NOTE: Rules not loaded from user space cannot be flushed.                */
 /* ------------------------------------------------------------------------ */
 static int
 ipf_flushlist(softc, nfreedp, listp)
 	ipf_main_softc_t *softc;
 	int *nfreedp;
 	frentry_t **listp;
 {
 	int freed = 0;
 	frentry_t *fp;
 
 	while ((fp = *listp) != NULL) {
 		if ((fp->fr_type & FR_T_BUILTIN) ||
 		    !(fp->fr_flags & FR_COPIED)) {
 			listp = &fp->fr_next;
 			continue;
 		}
 		*listp = fp->fr_next;
 		if (fp->fr_next != NULL)
 			fp->fr_next->fr_pnext = fp->fr_pnext;
 		fp->fr_pnext = NULL;
 
 		if (fp->fr_grphead != NULL) {
 			freed += ipf_group_flush(softc, fp->fr_grphead);
 			fp->fr_names[fp->fr_grhead] = '\0';
 		}
 
 		if (fp->fr_icmpgrp != NULL) {
 			freed += ipf_group_flush(softc, fp->fr_icmpgrp);
 			fp->fr_names[fp->fr_icmphead] = '\0';
 		}
 
 		if (fp->fr_srctrack.ht_max_nodes)
 			ipf_rb_ht_flush(&fp->fr_srctrack);
 
 		fp->fr_next = NULL;
 
 		ASSERT(fp->fr_ref > 0);
 		if (ipf_derefrule(softc, &fp) == 0)
 			freed++;
 	}
 	*nfreedp += freed;
 	return freed;
 }
 
 
 /* ------------------------------------------------------------------------ */
 /* Function:    ipf_flush                                                   */
 /* Returns:     int - >= 0 - number of flushed rules                        */
 /* Parameters:  softc(I) - pointer to soft context main structure           */
 /*              unit(I)  - device for which to flush rules                  */
 /*              flags(I) - which set of rules to flush                      */
 /*                                                                          */
 /* Calls flushlist() for all filter rules (accounting, firewall - both IPv4 */
 /* and IPv6) as defined by the value of flags.                              */
 /* ------------------------------------------------------------------------ */
 int
 ipf_flush(softc, unit, flags)
 	ipf_main_softc_t *softc;
 	minor_t unit;
 	int flags;
 {
 	int flushed = 0, set;
 
 	WRITE_ENTER(&softc->ipf_mutex);
 
 	set = softc->ipf_active;
 	if ((flags & FR_INACTIVE) == FR_INACTIVE)
 		set = 1 - set;
 
 	if (flags & FR_OUTQUE) {
 		ipf_flushlist(softc, &flushed, &softc->ipf_rules[1][set]);
 		ipf_flushlist(softc, &flushed, &softc->ipf_acct[1][set]);
 	}
 	if (flags & FR_INQUE) {
 		ipf_flushlist(softc, &flushed, &softc->ipf_rules[0][set]);
 		ipf_flushlist(softc, &flushed, &softc->ipf_acct[0][set]);
 	}
 
 	flushed += ipf_flush_groups(softc, &softc->ipf_groups[unit][set],
 				    flags & (FR_INQUE|FR_OUTQUE));
 
 	RWLOCK_EXIT(&softc->ipf_mutex);
 
 	if (unit == IPL_LOGIPF) {
 		int tmp;
 
 		tmp = ipf_flush(softc, IPL_LOGCOUNT, flags);
 		if (tmp >= 0)
 			flushed += tmp;
 	}
 	return flushed;
 }
 
 
 /* ------------------------------------------------------------------------ */
 /* Function:    ipf_flush_groups                                            */
 /* Returns:     int - >= 0 - number of flushed rules                        */
 /* Parameters:  softc(I)  - soft context pointerto work with                */
 /*              grhead(I) - pointer to the start of the group list to flush */
 /*              flags(I)  - which set of rules to flush                     */
 /*                                                                          */
 /* Walk through all of the groups under the given group head and remove all */
 /* of those that match the flags passed in. The for loop here is bit more   */
 /* complicated than usual because the removal of a rule with ipf_derefrule  */
 /* may end up removing not only the structure pointed to by "fg" but also   */
 /* what is fg_next and fg_next after that. So if a filter rule is actually  */
 /* removed from the group then it is necessary to start again.              */
 /* ------------------------------------------------------------------------ */
 static int
 ipf_flush_groups(softc, grhead, flags)
 	ipf_main_softc_t *softc;
 	frgroup_t **grhead;
 	int flags;
 {
 	frentry_t *fr, **frp;
 	frgroup_t *fg, **fgp;
 	int flushed = 0;
 	int removed = 0;
 
 	for (fgp = grhead; (fg = *fgp) != NULL; ) {
 		while ((fg != NULL) && ((fg->fg_flags & flags) == 0))
 			fg = fg->fg_next;
 		if (fg == NULL)
 			break;
 		removed = 0;
 		frp = &fg->fg_start;
 		while ((removed == 0) && ((fr = *frp) != NULL)) {
 			if ((fr->fr_flags & flags) == 0) {
 				frp = &fr->fr_next;
 			} else {
 				if (fr->fr_next != NULL)
 					fr->fr_next->fr_pnext = fr->fr_pnext;
 				*frp = fr->fr_next;
 				fr->fr_pnext = NULL;
 				fr->fr_next = NULL;
 				(void) ipf_derefrule(softc, &fr);
 				flushed++;
 				removed++;
 			}
 		}
 		if (removed == 0)
 			fgp = &fg->fg_next;
 	}
 	return flushed;
 }
 
 
 /* ------------------------------------------------------------------------ */
 /* Function:    memstr                                                      */
 /* Returns:     char *  - NULL if failed, != NULL pointer to matching bytes */
 /* Parameters:  src(I)  - pointer to byte sequence to match                 */
 /*              dst(I)  - pointer to byte sequence to search                */
 /*              slen(I) - match length                                      */
 /*              dlen(I) - length available to search in                     */
 /*                                                                          */
 /* Search dst for a sequence of bytes matching those at src and extend for  */
 /* slen bytes.                                                              */
 /* ------------------------------------------------------------------------ */
 char *
 memstr(src, dst, slen, dlen)
 	const char *src;
 	char *dst;
 	size_t slen, dlen;
 {
 	char *s = NULL;
 
 	while (dlen >= slen) {
 		if (bcmp(src, dst, slen) == 0) {
 			s = dst;
 			break;
 		}
 		dst++;
 		dlen--;
 	}
 	return s;
 }
 /* ------------------------------------------------------------------------ */
 /* Function:    ipf_fixskip                                                 */
 /* Returns:     Nil                                                         */
 /* Parameters:  listp(IO)    - pointer to start of list with skip rule      */
 /*              rp(I)        - rule added/removed with skip in it.          */
 /*              addremove(I) - adjustment (-1/+1) to make to skip count,    */
 /*                             depending on whether a rule was just added   */
 /*                             or removed.                                  */
 /*                                                                          */
 /* Adjust all the rules in a list which would have skip'd past the position */
 /* where we are inserting to skip to the right place given the change.      */
 /* ------------------------------------------------------------------------ */
 void
 ipf_fixskip(listp, rp, addremove)
 	frentry_t **listp, *rp;
 	int addremove;
 {
 	int rules, rn;
 	frentry_t *fp;
 
 	rules = 0;
 	for (fp = *listp; (fp != NULL) && (fp != rp); fp = fp->fr_next)
 		rules++;
 
 	if (!fp)
 		return;
 
 	for (rn = 0, fp = *listp; fp && (fp != rp); fp = fp->fr_next, rn++)
 		if (FR_ISSKIP(fp->fr_flags) && (rn + fp->fr_arg >= rules))
 			fp->fr_arg += addremove;
 }
 
 
 #ifdef	_KERNEL
 /* ------------------------------------------------------------------------ */
 /* Function:    count4bits                                                  */
 /* Returns:     int - >= 0 - number of consecutive bits in input            */
 /* Parameters:  ip(I) - 32bit IP address                                    */
 /*                                                                          */
 /* IPv4 ONLY                                                                */
 /* count consecutive 1's in bit mask.  If the mask generated by counting    */
 /* consecutive 1's is different to that passed, return -1, else return #    */
 /* of bits.                                                                 */
 /* ------------------------------------------------------------------------ */
 int
 count4bits(ip)
 	u_32_t	ip;
 {
 	u_32_t	ipn;
 	int	cnt = 0, i, j;
 
 	ip = ipn = ntohl(ip);
 	for (i = 32; i; i--, ipn *= 2)
 		if (ipn & 0x80000000)
 			cnt++;
 		else
 			break;
 	ipn = 0;
 	for (i = 32, j = cnt; i; i--, j--) {
 		ipn *= 2;
 		if (j > 0)
 			ipn++;
 	}
 	if (ipn == ip)
 		return cnt;
 	return -1;
 }
 
 
 /* ------------------------------------------------------------------------ */
 /* Function:    count6bits                                                  */
 /* Returns:     int - >= 0 - number of consecutive bits in input            */
 /* Parameters:  msk(I) - pointer to start of IPv6 bitmask                   */
 /*                                                                          */
 /* IPv6 ONLY                                                                */
 /* count consecutive 1's in bit mask.                                       */
 /* ------------------------------------------------------------------------ */
 # ifdef USE_INET6
 int
 count6bits(msk)
 	u_32_t *msk;
 {
 	int i = 0, k;
 	u_32_t j;
 
 	for (k = 3; k >= 0; k--)
 		if (msk[k] == 0xffffffff)
 			i += 32;
 		else {
 			for (j = msk[k]; j; j <<= 1)
 				if (j & 0x80000000)
 					i++;
 		}
 	return i;
 }
 # endif
 #endif /* _KERNEL */
 
 
 /* ------------------------------------------------------------------------ */
 /* Function:    ipf_synclist                                                */
 /* Returns:     int    - 0 = no failures, else indication of first failure  */
 /* Parameters:  fr(I)  - start of filter list to sync interface names for   */
 /*              ifp(I) - interface pointer for limiting sync lookups        */
 /* Write Locks: ipf_mutex                                                   */
 /*                                                                          */
 /* Walk through a list of filter rules and resolve any interface names into */
 /* pointers.  Where dynamic addresses are used, also update the IP address  */
 /* used in the rule.  The interface pointer is used to limit the lookups to */
 /* a specific set of matching names if it is non-NULL.                      */
 /* Errors can occur when resolving the destination name of to/dup-to fields */
 /* when the name points to a pool and that pool doest not exist. If this    */
 /* does happen then it is necessary to check if there are any lookup refs   */
 /* that need to be dropped before returning with an error.                  */
 /* ------------------------------------------------------------------------ */
 static int
 ipf_synclist(softc, fr, ifp)
 	ipf_main_softc_t *softc;
 	frentry_t *fr;
 	void *ifp;
 {
 	frentry_t *frt, *start = fr;
 	frdest_t *fdp;
 	char *name;
 	int error;
 	void *ifa;
 	int v, i;
 
 	error = 0;
 
 	for (; fr; fr = fr->fr_next) {
 		if (fr->fr_family == AF_INET)
 			v = 4;
 		else if (fr->fr_family == AF_INET6)
 			v = 6;
 		else
 			v = 0;
 
 		/*
 		 * Lookup all the interface names that are part of the rule.
 		 */
 		for (i = 0; i < 4; i++) {
 			if ((ifp != NULL) && (fr->fr_ifas[i] != ifp))
 				continue;
 			if (fr->fr_ifnames[i] == -1)
 				continue;
 			name = FR_NAME(fr, fr_ifnames[i]);
 			fr->fr_ifas[i] = ipf_resolvenic(softc, name, v);
 		}
 
 		if ((fr->fr_type & ~FR_T_BUILTIN) == FR_T_IPF) {
 			if (fr->fr_satype != FRI_NORMAL &&
 			    fr->fr_satype != FRI_LOOKUP) {
 				ifa = ipf_resolvenic(softc, fr->fr_names +
 						     fr->fr_sifpidx, v);
 				ipf_ifpaddr(softc, v, fr->fr_satype, ifa,
 					    &fr->fr_src6, &fr->fr_smsk6);
 			}
 			if (fr->fr_datype != FRI_NORMAL &&
 			    fr->fr_datype != FRI_LOOKUP) {
 				ifa = ipf_resolvenic(softc, fr->fr_names +
 						     fr->fr_sifpidx, v);
 				ipf_ifpaddr(softc, v, fr->fr_datype, ifa,
 					    &fr->fr_dst6, &fr->fr_dmsk6);
 			}
 		}
 
 		fdp = &fr->fr_tifs[0];
 		if ((ifp == NULL) || (fdp->fd_ptr == ifp)) {
 			error = ipf_resolvedest(softc, fr->fr_names, fdp, v);
 			if (error != 0)
 				goto unwind;
 		}
 
 		fdp = &fr->fr_tifs[1];
 		if ((ifp == NULL) || (fdp->fd_ptr == ifp)) {
 			error = ipf_resolvedest(softc, fr->fr_names, fdp, v);
 			if (error != 0)
 				goto unwind;
 		}
 
 		fdp = &fr->fr_dif;
 		if ((ifp == NULL) || (fdp->fd_ptr == ifp)) {
 			error = ipf_resolvedest(softc, fr->fr_names, fdp, v);
 			if (error != 0)
 				goto unwind;
 		}
 
 		if (((fr->fr_type & ~FR_T_BUILTIN) == FR_T_IPF) &&
 		    (fr->fr_satype == FRI_LOOKUP) && (fr->fr_srcptr == NULL)) {
 			fr->fr_srcptr = ipf_lookup_res_num(softc,
 							   fr->fr_srctype,
 							   IPL_LOGIPF,
 							   fr->fr_srcnum,
 							   &fr->fr_srcfunc);
 		}
 		if (((fr->fr_type & ~FR_T_BUILTIN) == FR_T_IPF) &&
 		    (fr->fr_datype == FRI_LOOKUP) && (fr->fr_dstptr == NULL)) {
 			fr->fr_dstptr = ipf_lookup_res_num(softc,
 							   fr->fr_dsttype,
 							   IPL_LOGIPF,
 							   fr->fr_dstnum,
 							   &fr->fr_dstfunc);
 		}
 	}
 	return 0;
 
 unwind:
 	for (frt = start; frt != fr; fr = fr->fr_next) {
 		if (((frt->fr_type & ~FR_T_BUILTIN) == FR_T_IPF) &&
 		    (frt->fr_satype == FRI_LOOKUP) && (frt->fr_srcptr != NULL))
 				ipf_lookup_deref(softc, frt->fr_srctype,
 						 frt->fr_srcptr);
 		if (((frt->fr_type & ~FR_T_BUILTIN) == FR_T_IPF) &&
 		    (frt->fr_datype == FRI_LOOKUP) && (frt->fr_dstptr != NULL))
 				ipf_lookup_deref(softc, frt->fr_dsttype,
 						 frt->fr_dstptr);
 	}
 	return error;
 }
 
 
 /* ------------------------------------------------------------------------ */
 /* Function:    ipf_sync                                                    */
 /* Returns:     void                                                        */
 /* Parameters:  Nil                                                         */
 /*                                                                          */
 /* ipf_sync() is called when we suspect that the interface list or          */
 /* information about interfaces (like IP#) has changed.  Go through all     */
 /* filter rules, NAT entries and the state table and check if anything      */
 /* needs to be changed/updated.                                             */
 /* ------------------------------------------------------------------------ */
 int
 ipf_sync(softc, ifp)
 	ipf_main_softc_t *softc;
 	void *ifp;
 {
 	int i;
 
 # if !SOLARIS
 	ipf_nat_sync(softc, ifp);
 	ipf_state_sync(softc, ifp);
 	ipf_lookup_sync(softc, ifp);
 # endif
 
 	WRITE_ENTER(&softc->ipf_mutex);
 	(void) ipf_synclist(softc, softc->ipf_acct[0][softc->ipf_active], ifp);
 	(void) ipf_synclist(softc, softc->ipf_acct[1][softc->ipf_active], ifp);
 	(void) ipf_synclist(softc, softc->ipf_rules[0][softc->ipf_active], ifp);
 	(void) ipf_synclist(softc, softc->ipf_rules[1][softc->ipf_active], ifp);
 
 	for (i = 0; i < IPL_LOGSIZE; i++) {
 		frgroup_t *g;
 
 		for (g = softc->ipf_groups[i][0]; g != NULL; g = g->fg_next)
 			(void) ipf_synclist(softc, g->fg_start, ifp);
 		for (g = softc->ipf_groups[i][1]; g != NULL; g = g->fg_next)
 			(void) ipf_synclist(softc, g->fg_start, ifp);
 	}
 	RWLOCK_EXIT(&softc->ipf_mutex);
 
 	return 0;
 }
 
 
 /*
  * In the functions below, bcopy() is called because the pointer being
  * copied _from_ in this instance is a pointer to a char buf (which could
  * end up being unaligned) and on the kernel's local stack.
  */
 /* ------------------------------------------------------------------------ */
 /* Function:    copyinptr                                                   */
 /* Returns:     int - 0 = success, else failure                             */
 /* Parameters:  src(I)  - pointer to the source address                     */
 /*              dst(I)  - destination address                               */
 /*              size(I) - number of bytes to copy                           */
 /*                                                                          */
 /* Copy a block of data in from user space, given a pointer to the pointer  */
 /* to start copying from (src) and a pointer to where to store it (dst).    */
 /* NB: src - pointer to user space pointer, dst - kernel space pointer      */
 /* ------------------------------------------------------------------------ */
 int
 copyinptr(softc, src, dst, size)
 	ipf_main_softc_t *softc;
 	void *src, *dst;
 	size_t size;
 {
 	caddr_t ca;
 	int error;
 
 # if SOLARIS
 	error = COPYIN(src, &ca, sizeof(ca));
 	if (error != 0)
 		return error;
 # else
 	bcopy(src, (caddr_t)&ca, sizeof(ca));
 # endif
 	error = COPYIN(ca, dst, size);
 	if (error != 0) {
 		IPFERROR(3);
 		error = EFAULT;
 	}
 	return error;
 }
 
 
 /* ------------------------------------------------------------------------ */
 /* Function:    copyoutptr                                                  */
 /* Returns:     int - 0 = success, else failure                             */
 /* Parameters:  src(I)  - pointer to the source address                     */
 /*              dst(I)  - destination address                               */
 /*              size(I) - number of bytes to copy                           */
 /*                                                                          */
 /* Copy a block of data out to user space, given a pointer to the pointer   */
 /* to start copying from (src) and a pointer to where to store it (dst).    */
 /* NB: src - kernel space pointer, dst - pointer to user space pointer.     */
 /* ------------------------------------------------------------------------ */
 int
 copyoutptr(softc, src, dst, size)
 	ipf_main_softc_t *softc;
 	void *src, *dst;
 	size_t size;
 {
 	caddr_t ca;
 	int error;
 
 	bcopy(dst, (caddr_t)&ca, sizeof(ca));
 	error = COPYOUT(src, ca, size);
 	if (error != 0) {
 		IPFERROR(4);
 		error = EFAULT;
 	}
 	return error;
 }
 #ifdef	_KERNEL
 #endif
 
 
 /* ------------------------------------------------------------------------ */
 /* Function:    ipf_lock                                                    */
 /* Returns:     int      - 0 = success, else error                          */
 /* Parameters:  data(I)  - pointer to lock value to set                     */
 /*              lockp(O) - pointer to location to store old lock value      */
 /*                                                                          */
 /* Get the new value for the lock integer, set it and return the old value  */
 /* in *lockp.                                                               */
 /* ------------------------------------------------------------------------ */
 int
 ipf_lock(data, lockp)
 	caddr_t data;
 	int *lockp;
 {
 	int arg, err;
 
 	err = BCOPYIN(data, &arg, sizeof(arg));
 	if (err != 0)
 		return EFAULT;
 	err = BCOPYOUT(lockp, data, sizeof(*lockp));
 	if (err != 0)
 		return EFAULT;
 	*lockp = arg;
 	return 0;
 }
 
 
 /* ------------------------------------------------------------------------ */
 /* Function:    ipf_getstat                                                 */
 /* Returns:     Nil                                                         */
 /* Parameters:  softc(I) - pointer to soft context main structure           */
 /*              fiop(I)  - pointer to ipfilter stats structure              */
 /*              rev(I)   - version claim by program doing ioctl             */
 /*                                                                          */
 /* Stores a copy of current pointers, counters, etc, in the friostat        */
 /* structure.                                                               */
 /* If IPFILTER_COMPAT is compiled, we pretend to be whatever version the    */
 /* program is looking for. This ensure that validation of the version it    */
 /* expects will always succeed. Thus kernels with IPFILTER_COMPAT will      */
 /* allow older binaries to work but kernels without it will not.            */
 /* ------------------------------------------------------------------------ */
 /*ARGSUSED*/
 static void
 ipf_getstat(softc, fiop, rev)
 	ipf_main_softc_t *softc;
 	friostat_t *fiop;
 	int rev;
 {
 	int i;
 
 	bcopy((char *)softc->ipf_stats, (char *)fiop->f_st,
 	      sizeof(ipf_statistics_t) * 2);
 	fiop->f_locks[IPL_LOGSTATE] = -1;
 	fiop->f_locks[IPL_LOGNAT] = -1;
 	fiop->f_locks[IPL_LOGIPF] = -1;
 	fiop->f_locks[IPL_LOGAUTH] = -1;
 
 	fiop->f_ipf[0][0] = softc->ipf_rules[0][0];
 	fiop->f_acct[0][0] = softc->ipf_acct[0][0];
 	fiop->f_ipf[0][1] = softc->ipf_rules[0][1];
 	fiop->f_acct[0][1] = softc->ipf_acct[0][1];
 	fiop->f_ipf[1][0] = softc->ipf_rules[1][0];
 	fiop->f_acct[1][0] = softc->ipf_acct[1][0];
 	fiop->f_ipf[1][1] = softc->ipf_rules[1][1];
 	fiop->f_acct[1][1] = softc->ipf_acct[1][1];
 
 	fiop->f_ticks = softc->ipf_ticks;
 	fiop->f_active = softc->ipf_active;
 	fiop->f_froute[0] = softc->ipf_frouteok[0];
 	fiop->f_froute[1] = softc->ipf_frouteok[1];
 	fiop->f_rb_no_mem = softc->ipf_rb_no_mem;
 	fiop->f_rb_node_max = softc->ipf_rb_node_max;
 
 	fiop->f_running = softc->ipf_running;
 	for (i = 0; i < IPL_LOGSIZE; i++) {
 		fiop->f_groups[i][0] = softc->ipf_groups[i][0];
 		fiop->f_groups[i][1] = softc->ipf_groups[i][1];
 	}
 #ifdef  IPFILTER_LOG
 	fiop->f_log_ok = ipf_log_logok(softc, IPL_LOGIPF);
 	fiop->f_log_fail = ipf_log_failures(softc, IPL_LOGIPF);
 	fiop->f_logging = 1;
 #else
 	fiop->f_log_ok = 0;
 	fiop->f_log_fail = 0;
 	fiop->f_logging = 0;
 #endif
 	fiop->f_defpass = softc->ipf_pass;
 	fiop->f_features = ipf_features;
 
 #ifdef IPFILTER_COMPAT
 	sprintf(fiop->f_version, "IP Filter: v%d.%d.%d",
 		(rev / 1000000) % 100,
 		(rev / 10000) % 100,
 		(rev / 100) % 100);
 #else
 	rev = rev;
 	(void) strncpy(fiop->f_version, ipfilter_version,
 		       sizeof(fiop->f_version));
 #endif
 }
 
 
 #ifdef	USE_INET6
 int icmptoicmp6types[ICMP_MAXTYPE+1] = {
 	ICMP6_ECHO_REPLY,	/* 0: ICMP_ECHOREPLY */
 	-1,			/* 1: UNUSED */
 	-1,			/* 2: UNUSED */
 	ICMP6_DST_UNREACH,	/* 3: ICMP_UNREACH */
 	-1,			/* 4: ICMP_SOURCEQUENCH */
 	ND_REDIRECT,		/* 5: ICMP_REDIRECT */
 	-1,			/* 6: UNUSED */
 	-1,			/* 7: UNUSED */
 	ICMP6_ECHO_REQUEST,	/* 8: ICMP_ECHO */
 	-1,			/* 9: UNUSED */
 	-1,			/* 10: UNUSED */
 	ICMP6_TIME_EXCEEDED,	/* 11: ICMP_TIMXCEED */
 	ICMP6_PARAM_PROB,	/* 12: ICMP_PARAMPROB */
 	-1,			/* 13: ICMP_TSTAMP */
 	-1,			/* 14: ICMP_TSTAMPREPLY */
 	-1,			/* 15: ICMP_IREQ */
 	-1,			/* 16: ICMP_IREQREPLY */
 	-1,			/* 17: ICMP_MASKREQ */
 	-1,			/* 18: ICMP_MASKREPLY */
 };
 
 
 int	icmptoicmp6unreach[ICMP_MAX_UNREACH] = {
 	ICMP6_DST_UNREACH_ADDR,		/* 0: ICMP_UNREACH_NET */
 	ICMP6_DST_UNREACH_ADDR,		/* 1: ICMP_UNREACH_HOST */
 	-1,				/* 2: ICMP_UNREACH_PROTOCOL */
 	ICMP6_DST_UNREACH_NOPORT,	/* 3: ICMP_UNREACH_PORT */
 	-1,				/* 4: ICMP_UNREACH_NEEDFRAG */
 	ICMP6_DST_UNREACH_NOTNEIGHBOR,	/* 5: ICMP_UNREACH_SRCFAIL */
 	ICMP6_DST_UNREACH_ADDR,		/* 6: ICMP_UNREACH_NET_UNKNOWN */
 	ICMP6_DST_UNREACH_ADDR,		/* 7: ICMP_UNREACH_HOST_UNKNOWN */
 	-1,				/* 8: ICMP_UNREACH_ISOLATED */
 	ICMP6_DST_UNREACH_ADMIN,	/* 9: ICMP_UNREACH_NET_PROHIB */
 	ICMP6_DST_UNREACH_ADMIN,	/* 10: ICMP_UNREACH_HOST_PROHIB */
 	-1,				/* 11: ICMP_UNREACH_TOSNET */
 	-1,				/* 12: ICMP_UNREACH_TOSHOST */
 	ICMP6_DST_UNREACH_ADMIN,	/* 13: ICMP_UNREACH_ADMIN_PROHIBIT */
 };
 int	icmpreplytype6[ICMP6_MAXTYPE + 1];
 #endif
 
 int	icmpreplytype4[ICMP_MAXTYPE + 1];
 
 
 /* ------------------------------------------------------------------------ */
 /* Function:    ipf_matchicmpqueryreply                                     */
 /* Returns:     int - 1 if "icmp" is a valid reply to "ic" else 0.          */
 /* Parameters:  v(I)    - IP protocol version (4 or 6)                      */
 /*              ic(I)   - ICMP information                                  */
 /*              icmp(I) - ICMP packet header                                */
 /*              rev(I)  - direction (0 = forward/1 = reverse) of packet     */
 /*                                                                          */
 /* Check if the ICMP packet defined by the header pointed to by icmp is a   */
 /* reply to one as described by what's in ic.  If it is a match, return 1,  */
 /* else return 0 for no match.                                              */
 /* ------------------------------------------------------------------------ */
 int
 ipf_matchicmpqueryreply(v, ic, icmp, rev)
 	int v;
 	icmpinfo_t *ic;
 	icmphdr_t *icmp;
 	int rev;
 {
 	int ictype;
 
 	ictype = ic->ici_type;
 
 	if (v == 4) {
 		/*
 		 * If we matched its type on the way in, then when going out
 		 * it will still be the same type.
 		 */
 		if ((!rev && (icmp->icmp_type == ictype)) ||
 		    (rev && (icmpreplytype4[ictype] == icmp->icmp_type))) {
 			if (icmp->icmp_type != ICMP_ECHOREPLY)
 				return 1;
 			if (icmp->icmp_id == ic->ici_id)
 				return 1;
 		}
 	}
 #ifdef	USE_INET6
 	else if (v == 6) {
 		if ((!rev && (icmp->icmp_type == ictype)) ||
 		    (rev && (icmpreplytype6[ictype] == icmp->icmp_type))) {
 			if (icmp->icmp_type != ICMP6_ECHO_REPLY)
 				return 1;
 			if (icmp->icmp_id == ic->ici_id)
 				return 1;
 		}
 	}
 #endif
 	return 0;
 }
 
 
 /* ------------------------------------------------------------------------ */
 /* Function:    ipf_rule_compare                                            */
 /* Parameters:  fr1(I) - first rule structure to compare                    */
 /*              fr2(I) - second rule structure to compare                   */
 /* Returns:     int    - 0 == rules are the same, else mismatch             */
 /*                                                                          */
 /* Compare two rules and return 0 if they match or a number indicating      */
 /* which of the individual checks failed.                                   */
 /* ------------------------------------------------------------------------ */
 static int
 ipf_rule_compare(frentry_t *fr1, frentry_t *fr2)
 {
 	if (fr1->fr_cksum != fr2->fr_cksum)
 		return 1;
 	if (fr1->fr_size != fr2->fr_size)
 		return 2;
 	if (fr1->fr_dsize != fr2->fr_dsize)
 		return 3;
 	if (bcmp((char *)&fr1->fr_func, (char *)&fr2->fr_func,
 		 fr1->fr_size - offsetof(struct frentry, fr_func)) != 0)
 		return 4;
 	if (fr1->fr_data && !fr2->fr_data)
 		return 5;
 	if (!fr1->fr_data && fr2->fr_data)
 		return 6;
 	if (fr1->fr_data) {
 		if (bcmp(fr1->fr_caddr, fr2->fr_caddr, fr1->fr_dsize))
 			return 7;
 	}
 	return 0;
 }
 
 
 /* ------------------------------------------------------------------------ */
 /* Function:    frrequest                                                   */
 /* Returns:     int - 0 == success, > 0 == errno value                      */
 /* Parameters:  unit(I)     - device for which this is for                  */
 /*              req(I)      - ioctl command (SIOC*)                         */
 /*              data(I)     - pointr to ioctl data                          */
 /*              set(I)      - 1 or 0 (filter set)                           */
 /*              makecopy(I) - flag indicating whether data points to a rule */
 /*                            in kernel space & hence doesn't need copying. */
 /*                                                                          */
 /* This function handles all the requests which operate on the list of      */
 /* filter rules.  This includes adding, deleting, insertion.  It is also    */
 /* responsible for creating groups when a "head" rule is loaded.  Interface */
 /* names are resolved here and other sanity checks are made on the content  */
 /* of the rule structure being loaded.  If a rule has user defined timeouts */
 /* then make sure they are created and initialised before exiting.          */
 /* ------------------------------------------------------------------------ */
 int
 frrequest(softc, unit, req, data, set, makecopy)
 	ipf_main_softc_t *softc;
 	int unit;
 	ioctlcmd_t req;
 	int set, makecopy;
 	caddr_t data;
 {
 	int error = 0, in, family, addrem, need_free = 0;
 	frentry_t frd, *fp, *f, **fprev, **ftail;
 	void *ptr, *uptr, *cptr;
 	u_int *p, *pp;
 	frgroup_t *fg;
 	char *group;
 
 	ptr = NULL;
 	cptr = NULL;
 	fg = NULL;
 	fp = &frd;
 	if (makecopy != 0) {
 		bzero(fp, sizeof(frd));
 		error = ipf_inobj(softc, data, NULL, fp, IPFOBJ_FRENTRY);
 		if (error) {
 			return error;
 		}
 		if ((fp->fr_type & FR_T_BUILTIN) != 0) {
 			IPFERROR(6);
 			return EINVAL;
 		}
 		KMALLOCS(f, frentry_t *, fp->fr_size);
 		if (f == NULL) {
 			IPFERROR(131);
 			return ENOMEM;
 		}
 		bzero(f, fp->fr_size);
 		error = ipf_inobjsz(softc, data, f, IPFOBJ_FRENTRY,
 				    fp->fr_size);
 		if (error) {
 			KFREES(f, fp->fr_size);
 			return error;
 		}
 
 		fp = f;
 		f = NULL;
 		fp->fr_next = NULL;
 		fp->fr_dnext = NULL;
 		fp->fr_pnext = NULL;
 		fp->fr_pdnext = NULL;
 		fp->fr_grp = NULL;
 		fp->fr_grphead = NULL;
 		fp->fr_icmpgrp = NULL;
 		fp->fr_isc = (void *)-1;
 		fp->fr_ptr = NULL;
 		fp->fr_ref = 0;
 		fp->fr_flags |= FR_COPIED;
 	} else {
 		fp = (frentry_t *)data;
 		if ((fp->fr_type & FR_T_BUILTIN) == 0) {
 			IPFERROR(7);
 			return EINVAL;
 		}
 		fp->fr_flags &= ~FR_COPIED;
 	}
 
 	if (((fp->fr_dsize == 0) && (fp->fr_data != NULL)) ||
 	    ((fp->fr_dsize != 0) && (fp->fr_data == NULL))) {
 		IPFERROR(8);
 		error = EINVAL;
 		goto donenolock;
 	}
 
 	family = fp->fr_family;
 	uptr = fp->fr_data;
 
 	if (req == (ioctlcmd_t)SIOCINAFR || req == (ioctlcmd_t)SIOCINIFR ||
 	    req == (ioctlcmd_t)SIOCADAFR || req == (ioctlcmd_t)SIOCADIFR)
 		addrem = 0;
 	else if (req == (ioctlcmd_t)SIOCRMAFR || req == (ioctlcmd_t)SIOCRMIFR)
 		addrem = 1;
 	else if (req == (ioctlcmd_t)SIOCZRLST)
 		addrem = 2;
 	else {
 		IPFERROR(9);
 		error = EINVAL;
 		goto donenolock;
 	}
 
 	/*
 	 * Only filter rules for IPv4 or IPv6 are accepted.
 	 */
 	if (family == AF_INET) {
 		/*EMPTY*/;
 #ifdef	USE_INET6
 	} else if (family == AF_INET6) {
 		/*EMPTY*/;
 #endif
 	} else if (family != 0) {
 		IPFERROR(10);
 		error = EINVAL;
 		goto donenolock;
 	}
 
 	/*
 	 * If the rule is being loaded from user space, i.e. we had to copy it
 	 * into kernel space, then do not trust the function pointer in the
 	 * rule.
 	 */
 	if ((makecopy == 1) && (fp->fr_func != NULL)) {
 		if (ipf_findfunc(fp->fr_func) == NULL) {
 			IPFERROR(11);
 			error = ESRCH;
 			goto donenolock;
 		}
 
 		if (addrem == 0) {
 			error = ipf_funcinit(softc, fp);
 			if (error != 0)
 				goto donenolock;
 		}
 	}
 	if ((fp->fr_flags & FR_CALLNOW) &&
 	    ((fp->fr_func == NULL) || (fp->fr_func == (ipfunc_t)-1))) {
 		IPFERROR(142);
 		error = ESRCH;
 		goto donenolock;
 	}
 	if (((fp->fr_flags & FR_CMDMASK) == FR_CALL) &&
 	    ((fp->fr_func == NULL) || (fp->fr_func == (ipfunc_t)-1))) {
 		IPFERROR(143);
 		error = ESRCH;
 		goto donenolock;
 	}
 
 	ptr = NULL;
 	cptr = NULL;
 
 	if (FR_ISACCOUNT(fp->fr_flags))
 		unit = IPL_LOGCOUNT;
 
 	/*
 	 * Check that each group name in the rule has a start index that
 	 * is valid.
 	 */
 	if (fp->fr_icmphead != -1) {
 		if ((fp->fr_icmphead < 0) ||
 		    (fp->fr_icmphead >= fp->fr_namelen)) {
 			IPFERROR(136);
 			error = EINVAL;
 			goto donenolock;
 		}
 		if (!strcmp(FR_NAME(fp, fr_icmphead), "0"))
 			fp->fr_names[fp->fr_icmphead] = '\0';
 	}
 
 	if (fp->fr_grhead != -1) {
 		if ((fp->fr_grhead < 0) ||
 		    (fp->fr_grhead >= fp->fr_namelen)) {
 			IPFERROR(137);
 			error = EINVAL;
 			goto donenolock;
 		}
 		if (!strcmp(FR_NAME(fp, fr_grhead), "0"))
 			fp->fr_names[fp->fr_grhead] = '\0';
 	}
 
 	if (fp->fr_group != -1) {
 		if ((fp->fr_group < 0) ||
 		    (fp->fr_group >= fp->fr_namelen)) {
 			IPFERROR(138);
 			error = EINVAL;
 			goto donenolock;
 		}
 		if ((req != (int)SIOCZRLST) && (fp->fr_group != -1)) {
 			/*
 			 * Allow loading rules that are in groups to cause
 			 * them to be created if they don't already exit.
 			 */
 			group = FR_NAME(fp, fr_group);
 			if (addrem == 0) {
 				fg = ipf_group_add(softc, group, NULL,
 						   fp->fr_flags, unit, set);
 				fp->fr_grp = fg;
 			} else {
 				fg = ipf_findgroup(softc, group, unit,
 						   set, NULL);
 				if (fg == NULL) {
 					IPFERROR(12);
 					error = ESRCH;
 					goto donenolock;
 				}
 			}
 
 			if (fg->fg_flags == 0) {
 				fg->fg_flags = fp->fr_flags & FR_INOUT;
 			} else if (fg->fg_flags != (fp->fr_flags & FR_INOUT)) {
 				IPFERROR(13);
 				error = ESRCH;
 				goto donenolock;
 			}
 		}
 	} else {
 		/*
 		 * If a rule is going to be part of a group then it does
 		 * not matter whether it is an in or out rule, but if it
 		 * isn't in a group, then it does...
 		 */
 		if ((fp->fr_flags & (FR_INQUE|FR_OUTQUE)) == 0) {
 			IPFERROR(14);
 			error = EINVAL;
 			goto donenolock;
 		}
 	}
 	in = (fp->fr_flags & FR_INQUE) ? 0 : 1;
 
 	/*
 	 * Work out which rule list this change is being applied to.
 	 */
 	ftail = NULL;
 	fprev = NULL;
 	if (unit == IPL_LOGAUTH) {
                 if ((fp->fr_tifs[0].fd_ptr != NULL) ||
 		    (fp->fr_tifs[1].fd_ptr != NULL) ||
 		    (fp->fr_dif.fd_ptr != NULL) ||
 		    (fp->fr_flags & FR_FASTROUTE)) {
 			softc->ipf_interror = 145;
 			error = EINVAL;
 			goto donenolock;
 		}
 		fprev = ipf_auth_rulehead(softc);
 	} else {
 		if (FR_ISACCOUNT(fp->fr_flags))
 			fprev = &softc->ipf_acct[in][set];
 		else if ((fp->fr_flags & (FR_OUTQUE|FR_INQUE)) != 0)
 			fprev = &softc->ipf_rules[in][set];
 	}
 	if (fprev == NULL) {
 		IPFERROR(15);
 		error = ESRCH;
 		goto donenolock;
 	}
 
 	if (fg != NULL)
 		fprev = &fg->fg_start;
 
 	/*
 	 * Copy in extra data for the rule.
 	 */
 	if (fp->fr_dsize != 0) {
 		if (makecopy != 0) {
 			KMALLOCS(ptr, void *, fp->fr_dsize);
 			if (ptr == NULL) {
 				IPFERROR(16);
 				error = ENOMEM;
 				goto donenolock;
 			}
 
 			/*
 			 * The bcopy case is for when the data is appended
 			 * to the rule by ipf_in_compat().
 			 */
 			if (uptr >= (void *)fp &&
 			    uptr < (void *)((char *)fp + fp->fr_size)) {
 				bcopy(uptr, ptr, fp->fr_dsize);
 				error = 0;
 			} else {
 				error = COPYIN(uptr, ptr, fp->fr_dsize);
 				if (error != 0) {
 					IPFERROR(17);
 					error = EFAULT;
 					goto donenolock;
 				}
 			}
 		} else {
 			ptr = uptr;
 		}
 		fp->fr_data = ptr;
 	} else {
 		fp->fr_data = NULL;
 	}
 
 	/*
 	 * Perform per-rule type sanity checks of their members.
 	 * All code after this needs to be aware that allocated memory
 	 * may need to be free'd before exiting.
 	 */
 	switch (fp->fr_type & ~FR_T_BUILTIN)
 	{
 #if defined(IPFILTER_BPF)
 	case FR_T_BPFOPC :
 		if (fp->fr_dsize == 0) {
 			IPFERROR(19);
 			error = EINVAL;
 			break;
 		}
 		if (!bpf_validate(ptr, fp->fr_dsize/sizeof(struct bpf_insn))) {
 			IPFERROR(20);
 			error = EINVAL;
 			break;
 		}
 		break;
 #endif
 	case FR_T_IPF :
 		/*
 		 * Preparation for error case at the bottom of this function.
 		 */
 		if (fp->fr_datype == FRI_LOOKUP)
 			fp->fr_dstptr = NULL;
 		if (fp->fr_satype == FRI_LOOKUP)
 			fp->fr_srcptr = NULL;
 
 		if (fp->fr_dsize != sizeof(fripf_t)) {
 			IPFERROR(21);
 			error = EINVAL;
 			break;
 		}
 
 		/*
 		 * Allowing a rule with both "keep state" and "with oow" is
 		 * pointless because adding a state entry to the table will
 		 * fail with the out of window (oow) flag set.
 		 */
 		if ((fp->fr_flags & FR_KEEPSTATE) && (fp->fr_flx & FI_OOW)) {
 			IPFERROR(22);
 			error = EINVAL;
 			break;
 		}
 
 		switch (fp->fr_satype)
 		{
 		case FRI_BROADCAST :
 		case FRI_DYNAMIC :
 		case FRI_NETWORK :
 		case FRI_NETMASKED :
 		case FRI_PEERADDR :
 			if (fp->fr_sifpidx < 0) {
 				IPFERROR(23);
 				error = EINVAL;
 			}
 			break;
 		case FRI_LOOKUP :
 			fp->fr_srcptr = ipf_findlookup(softc, unit, fp,
 						       &fp->fr_src6,
 						       &fp->fr_smsk6);
 			if (fp->fr_srcfunc == NULL) {
 				IPFERROR(132);
 				error = ESRCH;
 				break;
 			}
 			break;
 		case FRI_NORMAL :
 			break;
 		default :
 			IPFERROR(133);
 			error = EINVAL;
 			break;
 		}
 		if (error != 0)
 			break;
 
 		switch (fp->fr_datype)
 		{
 		case FRI_BROADCAST :
 		case FRI_DYNAMIC :
 		case FRI_NETWORK :
 		case FRI_NETMASKED :
 		case FRI_PEERADDR :
 			if (fp->fr_difpidx < 0) {
 				IPFERROR(24);
 				error = EINVAL;
 			}
 			break;
 		case FRI_LOOKUP :
 			fp->fr_dstptr = ipf_findlookup(softc, unit, fp,
 						       &fp->fr_dst6,
 						       &fp->fr_dmsk6);
 			if (fp->fr_dstfunc == NULL) {
 				IPFERROR(134);
 				error = ESRCH;
 			}
 			break;
 		case FRI_NORMAL :
 			break;
 		default :
 			IPFERROR(135);
 			error = EINVAL;
 		}
 		break;
 
 	case FR_T_NONE :
 	case FR_T_CALLFUNC :
 	case FR_T_COMPIPF :
 		break;
 
 	case FR_T_IPFEXPR :
 		if (ipf_matcharray_verify(fp->fr_data, fp->fr_dsize) == -1) {
 			IPFERROR(25);
 			error = EINVAL;
 		}
 		break;
 
 	default :
 		IPFERROR(26);
 		error = EINVAL;
 		break;
 	}
 	if (error != 0)
 		goto donenolock;
 
 	if (fp->fr_tif.fd_name != -1) {
 		if ((fp->fr_tif.fd_name < 0) ||
 		    (fp->fr_tif.fd_name >= fp->fr_namelen)) {
 			IPFERROR(139);
 			error = EINVAL;
 			goto donenolock;
 		}
 	}
 
 	if (fp->fr_dif.fd_name != -1) {
 		if ((fp->fr_dif.fd_name < 0) ||
 		    (fp->fr_dif.fd_name >= fp->fr_namelen)) {
 			IPFERROR(140);
 			error = EINVAL;
 			goto donenolock;
 		}
 	}
 
 	if (fp->fr_rif.fd_name != -1) {
 		if ((fp->fr_rif.fd_name < 0) ||
 		    (fp->fr_rif.fd_name >= fp->fr_namelen)) {
 			IPFERROR(141);
 			error = EINVAL;
 			goto donenolock;
 		}
 	}
 
 	/*
 	 * Lookup all the interface names that are part of the rule.
 	 */
 	error = ipf_synclist(softc, fp, NULL);
 	if (error != 0)
 		goto donenolock;
 	fp->fr_statecnt = 0;
 	if (fp->fr_srctrack.ht_max_nodes != 0)
 		ipf_rb_ht_init(&fp->fr_srctrack);
 
 	/*
 	 * Look for an existing matching filter rule, but don't include the
 	 * next or interface pointer in the comparison (fr_next, fr_ifa).
 	 * This elminates rules which are indentical being loaded.  Checksum
 	 * the constant part of the filter rule to make comparisons quicker
 	 * (this meaning no pointers are included).
 	 */
 	for (fp->fr_cksum = 0, p = (u_int *)&fp->fr_func, pp = &fp->fr_cksum;
 	     p < pp; p++)
 		fp->fr_cksum += *p;
 	pp = (u_int *)(fp->fr_caddr + fp->fr_dsize);
 	for (p = (u_int *)fp->fr_data; p < pp; p++)
 		fp->fr_cksum += *p;
 
 	WRITE_ENTER(&softc->ipf_mutex);
 
 	/*
 	 * Now that the filter rule lists are locked, we can walk the
 	 * chain of them without fear.
 	 */
 	ftail = fprev;
 	for (f = *ftail; (f = *ftail) != NULL; ftail = &f->fr_next) {
 		if (fp->fr_collect <= f->fr_collect) {
 			ftail = fprev;
 			f = NULL;
 			break;
 		}
 		fprev = ftail;
 	}
 
 	for (; (f = *ftail) != NULL; ftail = &f->fr_next) {
 		if (ipf_rule_compare(fp, f) == 0)
 			break;
 	}
 
 	/*
 	 * If zero'ing statistics, copy current to caller and zero.
 	 */
 	if (addrem == 2) {
 		if (f == NULL) {
 			IPFERROR(27);
 			error = ESRCH;
 		} else {
 			/*
 			 * Copy and reduce lock because of impending copyout.
 			 * Well we should, but if we do then the atomicity of
 			 * this call and the correctness of fr_hits and
 			 * fr_bytes cannot be guaranteed.  As it is, this code
 			 * only resets them to 0 if they are successfully
 			 * copied out into user space.
 			 */
 			bcopy((char *)f, (char *)fp, f->fr_size);
 			/* MUTEX_DOWNGRADE(&softc->ipf_mutex); */
 
 			/*
 			 * When we copy this rule back out, set the data
 			 * pointer to be what it was in user space.
 			 */
 			fp->fr_data = uptr;
 			error = ipf_outobj(softc, data, fp, IPFOBJ_FRENTRY);
 
 			if (error == 0) {
 				if ((f->fr_dsize != 0) && (uptr != NULL))
 					error = COPYOUT(f->fr_data, uptr,
 							f->fr_dsize);
 					if (error != 0) {
 						IPFERROR(28);
 						error = EFAULT;
 					}
 				if (error == 0) {
 					f->fr_hits = 0;
 					f->fr_bytes = 0;
 				}
 			}
 		}
 
 		if (makecopy != 0) {
 			if (ptr != NULL) {
 				KFREES(ptr, fp->fr_dsize);
 			}
 			KFREES(fp, fp->fr_size);
 		}
 		RWLOCK_EXIT(&softc->ipf_mutex);
 		return error;
 	}
 
   	if (!f) {
 		/*
 		 * At the end of this, ftail must point to the place where the
 		 * new rule is to be saved/inserted/added.
 		 * For SIOCAD*FR, this should be the last rule in the group of
 		 * rules that have equal fr_collect fields.
 		 * For SIOCIN*FR, ...
 		 */
 		if (req == (ioctlcmd_t)SIOCADAFR ||
 		    req == (ioctlcmd_t)SIOCADIFR) {
 
 			for (ftail = fprev; (f = *ftail) != NULL; ) {
 				if (f->fr_collect > fp->fr_collect)
 					break;
 				ftail = &f->fr_next;
 				fprev = ftail;
 			}
 			ftail = fprev;
 			f = NULL;
 			ptr = NULL;
 		} else if (req == (ioctlcmd_t)SIOCINAFR ||
 			   req == (ioctlcmd_t)SIOCINIFR) {
 			while ((f = *fprev) != NULL) {
 				if (f->fr_collect >= fp->fr_collect)
 					break;
 				fprev = &f->fr_next;
 			}
   			ftail = fprev;
   			if (fp->fr_hits != 0) {
 				while (fp->fr_hits && (f = *ftail)) {
 					if (f->fr_collect != fp->fr_collect)
 						break;
 					fprev = ftail;
   					ftail = &f->fr_next;
 					fp->fr_hits--;
 				}
   			}
   			f = NULL;
   			ptr = NULL;
 		}
 	}
 
 	/*
 	 * Request to remove a rule.
 	 */
 	if (addrem == 1) {
 		if (!f) {
 			IPFERROR(29);
 			error = ESRCH;
 		} else {
 			/*
 			 * Do not allow activity from user space to interfere
 			 * with rules not loaded that way.
 			 */
 			if ((makecopy == 1) && !(f->fr_flags & FR_COPIED)) {
 				IPFERROR(30);
 				error = EPERM;
 				goto done;
 			}
 
 			/*
 			 * Return EBUSY if the rule is being reference by
 			 * something else (eg state information.)
 			 */
 			if (f->fr_ref > 1) {
 				IPFERROR(31);
 				error = EBUSY;
 				goto done;
 			}
 #ifdef	IPFILTER_SCAN
 			if (f->fr_isctag != -1 &&
 			    (f->fr_isc != (struct ipscan *)-1))
 				ipf_scan_detachfr(f);
 #endif
 
 			if (unit == IPL_LOGAUTH) {
 				error = ipf_auth_precmd(softc, req, f, ftail);
 				goto done;
 			}
 
 			ipf_rule_delete(softc, f, unit, set);
 
 			need_free = makecopy;
 		}
 	} else {
 		/*
 		 * Not removing, so we must be adding/inserting a rule.
 		 */
 		if (f != NULL) {
 			IPFERROR(32);
 			error = EEXIST;
 			goto done;
 		}
 		if (unit == IPL_LOGAUTH) {
 			error = ipf_auth_precmd(softc, req, fp, ftail);
 			goto done;
 		}
 
 		MUTEX_NUKE(&fp->fr_lock);
 		MUTEX_INIT(&fp->fr_lock, "filter rule lock");
 		if (fp->fr_die != 0)
 			ipf_rule_expire_insert(softc, fp, set);
 
 		fp->fr_hits = 0;
 		if (makecopy != 0)
 			fp->fr_ref = 1;
 		fp->fr_pnext = ftail;
 		fp->fr_next = *ftail;
 		if (fp->fr_next != NULL)
 			fp->fr_next->fr_pnext = &fp->fr_next;
 		*ftail = fp;
 		if (addrem == 0)
 			ipf_fixskip(ftail, fp, 1);
 
 		fp->fr_icmpgrp = NULL;
 		if (fp->fr_icmphead != -1) {
 			group = FR_NAME(fp, fr_icmphead);
 			fg = ipf_group_add(softc, group, fp, 0, unit, set);
 			fp->fr_icmpgrp = fg;
 		}
 
 		fp->fr_grphead = NULL;
 		if (fp->fr_grhead != -1) {
 			group = FR_NAME(fp, fr_grhead);
 			fg = ipf_group_add(softc, group, fp, fp->fr_flags,
 					   unit, set);
 			fp->fr_grphead = fg;
 		}
 	}
 done:
 	RWLOCK_EXIT(&softc->ipf_mutex);
 donenolock:
 	if (need_free || (error != 0)) {
 		if ((fp->fr_type & ~FR_T_BUILTIN) == FR_T_IPF) {
 			if ((fp->fr_satype == FRI_LOOKUP) &&
 			    (fp->fr_srcptr != NULL))
 				ipf_lookup_deref(softc, fp->fr_srctype,
 						 fp->fr_srcptr);
 			if ((fp->fr_datype == FRI_LOOKUP) &&
 			    (fp->fr_dstptr != NULL))
 				ipf_lookup_deref(softc, fp->fr_dsttype,
 						 fp->fr_dstptr);
 		}
 		if (fp->fr_grp != NULL) {
 			WRITE_ENTER(&softc->ipf_mutex);
 			ipf_group_del(softc, fp->fr_grp, fp);
 			RWLOCK_EXIT(&softc->ipf_mutex);
 		}
 		if ((ptr != NULL) && (makecopy != 0)) {
 			KFREES(ptr, fp->fr_dsize);
 		}
 		KFREES(fp, fp->fr_size);
 	}
 	return (error);
 }
 
 
 /* ------------------------------------------------------------------------ */
 /* Function:   ipf_rule_delete                                              */
 /* Returns:    Nil                                                          */
 /* Parameters: softc(I) - pointer to soft context main structure            */
 /*             f(I)     - pointer to the rule being deleted                 */
 /*             ftail(I) - pointer to the pointer to f                       */
 /*             unit(I)  - device for which this is for                      */
 /*             set(I)   - 1 or 0 (filter set)                               */
 /*                                                                          */
 /* This function attempts to do what it can to delete a filter rule: remove */
 /* it from any linked lists and remove any groups it is responsible for.    */
 /* But in the end, removing a rule can only drop the reference count - we   */
 /* must use that as the guide for whether or not it can be freed.           */
 /* ------------------------------------------------------------------------ */
 static void
 ipf_rule_delete(softc, f, unit, set)
 	ipf_main_softc_t *softc;
 	frentry_t *f;
 	int unit, set;
 {
 
 	/*
 	 * If fr_pdnext is set, then the rule is on the expire list, so
 	 * remove it from there.
 	 */
 	if (f->fr_pdnext != NULL) {
 		*f->fr_pdnext = f->fr_dnext;
 		if (f->fr_dnext != NULL)
 			f->fr_dnext->fr_pdnext = f->fr_pdnext;
 		f->fr_pdnext = NULL;
 		f->fr_dnext = NULL;
 	}
 
 	ipf_fixskip(f->fr_pnext, f, -1);
 	if (f->fr_pnext != NULL)
 		*f->fr_pnext = f->fr_next;
 	if (f->fr_next != NULL)
 		f->fr_next->fr_pnext = f->fr_pnext;
 	f->fr_pnext = NULL;
 	f->fr_next = NULL;
 
 	(void) ipf_derefrule(softc, &f);
 }
 
 /* ------------------------------------------------------------------------ */
 /* Function:   ipf_rule_expire_insert                                       */
 /* Returns:    Nil                                                          */
 /* Parameters: softc(I) - pointer to soft context main structure            */
 /*             f(I)     - pointer to rule to be added to expire list        */
 /*             set(I)   - 1 or 0 (filter set)                               */
 /*                                                                          */
 /* If the new rule has a given expiration time, insert it into the list of  */
 /* expiring rules with the ones to be removed first added to the front of   */
 /* the list. The insertion is O(n) but it is kept sorted for quick scans at */
 /* expiration interval checks.                                              */
 /* ------------------------------------------------------------------------ */
 static void
 ipf_rule_expire_insert(softc, f, set)
 	ipf_main_softc_t *softc;
 	frentry_t *f;
 	int set;
 {
 	frentry_t *fr;
 
 	/*
 	 */
 
 	f->fr_die = softc->ipf_ticks + IPF_TTLVAL(f->fr_die);
 	for (fr = softc->ipf_rule_explist[set]; fr != NULL;
 	     fr = fr->fr_dnext) {
 		if (f->fr_die < fr->fr_die)
 			break;
 		if (fr->fr_dnext == NULL) {
 			/*
 			 * We've got to the last rule and everything
 			 * wanted to be expired before this new node,
 			 * so we have to tack it on the end...
 			 */
 			fr->fr_dnext = f;
 			f->fr_pdnext = &fr->fr_dnext;
 			fr = NULL;
 			break;
 		}
 	}
 
 	if (softc->ipf_rule_explist[set] == NULL) {
 		softc->ipf_rule_explist[set] = f;
 		f->fr_pdnext = &softc->ipf_rule_explist[set];
 	} else if (fr != NULL) {
 		f->fr_dnext = fr;
 		f->fr_pdnext = fr->fr_pdnext;
 		fr->fr_pdnext = &f->fr_dnext;
 	}
 }
 
 
 /* ------------------------------------------------------------------------ */
 /* Function:   ipf_findlookup                                               */
 /* Returns:    NULL = failure, else success                                 */
 /* Parameters: softc(I) - pointer to soft context main structure            */
 /*             unit(I)  - ipf device we want to find match for              */
 /*             fp(I)    - rule for which lookup is for                      */
 /*             addrp(I) - pointer to lookup information in address struct   */
 /*             maskp(O) - pointer to lookup information for storage         */
 /*                                                                          */
 /* When using pools and hash tables to store addresses for matching in      */
 /* rules, it is necessary to resolve both the object referred to by the     */
 /* name or address (and return that pointer) and also provide the means by  */
 /* which to determine if an address belongs to that object to make the      */
 /* packet matching quicker.                                                 */
 /* ------------------------------------------------------------------------ */
 static void *
 ipf_findlookup(softc, unit, fr, addrp, maskp)
 	ipf_main_softc_t *softc;
 	int unit;
 	frentry_t *fr;
 	i6addr_t *addrp, *maskp;
 {
 	void *ptr = NULL;
 
 	switch (addrp->iplookupsubtype)
 	{
 	case 0 :
 		ptr = ipf_lookup_res_num(softc, unit, addrp->iplookuptype,
 					 addrp->iplookupnum,
 					 &maskp->iplookupfunc);
 		break;
 	case 1 :
 		if (addrp->iplookupname < 0)
 			break;
 		if (addrp->iplookupname >= fr->fr_namelen)
 			break;
 		ptr = ipf_lookup_res_name(softc, unit, addrp->iplookuptype,
 					  fr->fr_names + addrp->iplookupname,
 					  &maskp->iplookupfunc);
 		break;
 	default :
 		break;
 	}
 
 	return ptr;
 }
 
 
 /* ------------------------------------------------------------------------ */
 /* Function:    ipf_funcinit                                                */
 /* Returns:     int - 0 == success, else ESRCH: cannot resolve rule details */
 /* Parameters:  softc(I) - pointer to soft context main structure           */
 /*              fr(I)    - pointer to filter rule                           */
 /*                                                                          */
 /* If a rule is a call rule, then check if the function it points to needs  */
 /* an init function to be called now the rule has been loaded.              */
 /* ------------------------------------------------------------------------ */
 static int
 ipf_funcinit(softc, fr)
 	ipf_main_softc_t *softc;
 	frentry_t *fr;
 {
 	ipfunc_resolve_t *ft;
 	int err;
 
 	IPFERROR(34);
 	err = ESRCH;
 
 	for (ft = ipf_availfuncs; ft->ipfu_addr != NULL; ft++)
 		if (ft->ipfu_addr == fr->fr_func) {
 			err = 0;
 			if (ft->ipfu_init != NULL)
 				err = (*ft->ipfu_init)(softc, fr);
 			break;
 		}
 	return err;
 }
 
 
 /* ------------------------------------------------------------------------ */
 /* Function:    ipf_funcfini                                                */
 /* Returns:     Nil                                                         */
 /* Parameters:  softc(I) - pointer to soft context main structure           */
 /*              fr(I)    - pointer to filter rule                           */
 /*                                                                          */
 /* For a given filter rule, call the matching "fini" function if the rule   */
 /* is using a known function that would have resulted in the "init" being   */
 /* called for ealier.                                                       */
 /* ------------------------------------------------------------------------ */
 static void
 ipf_funcfini(softc, fr)
 	ipf_main_softc_t *softc;
 	frentry_t *fr;
 {
 	ipfunc_resolve_t *ft;
 
 	for (ft = ipf_availfuncs; ft->ipfu_addr != NULL; ft++)
 		if (ft->ipfu_addr == fr->fr_func) {
 			if (ft->ipfu_fini != NULL)
 				(void) (*ft->ipfu_fini)(softc, fr);
 			break;
 		}
 }
 
 
 /* ------------------------------------------------------------------------ */
 /* Function:    ipf_findfunc                                                */
 /* Returns:     ipfunc_t - pointer to function if found, else NULL          */
 /* Parameters:  funcptr(I) - function pointer to lookup                     */
 /*                                                                          */
 /* Look for a function in the table of known functions.                     */
 /* ------------------------------------------------------------------------ */
 static ipfunc_t
 ipf_findfunc(funcptr)
 	ipfunc_t funcptr;
 {
 	ipfunc_resolve_t *ft;
 
 	for (ft = ipf_availfuncs; ft->ipfu_addr != NULL; ft++)
 		if (ft->ipfu_addr == funcptr)
 			return funcptr;
 	return NULL;
 }
 
 
 /* ------------------------------------------------------------------------ */
 /* Function:    ipf_resolvefunc                                             */
 /* Returns:     int - 0 == success, else error                              */
 /* Parameters:  data(IO) - ioctl data pointer to ipfunc_resolve_t struct    */
 /*                                                                          */
 /* Copy in a ipfunc_resolve_t structure and then fill in the missing field. */
 /* This will either be the function name (if the pointer is set) or the     */
 /* function pointer if the name is set.  When found, fill in the other one  */
 /* so that the entire, complete, structure can be copied back to user space.*/
 /* ------------------------------------------------------------------------ */
 int
 ipf_resolvefunc(softc, data)
 	ipf_main_softc_t *softc;
 	void *data;
 {
 	ipfunc_resolve_t res, *ft;
 	int error;
 
 	error = BCOPYIN(data, &res, sizeof(res));
 	if (error != 0) {
 		IPFERROR(123);
 		return EFAULT;
 	}
 
 	if (res.ipfu_addr == NULL && res.ipfu_name[0] != '\0') {
 		for (ft = ipf_availfuncs; ft->ipfu_addr != NULL; ft++)
 			if (strncmp(res.ipfu_name, ft->ipfu_name,
 				    sizeof(res.ipfu_name)) == 0) {
 				res.ipfu_addr = ft->ipfu_addr;
 				res.ipfu_init = ft->ipfu_init;
 				if (COPYOUT(&res, data, sizeof(res)) != 0) {
 					IPFERROR(35);
 					return EFAULT;
 				}
 				return 0;
 			}
 	}
 	if (res.ipfu_addr != NULL && res.ipfu_name[0] == '\0') {
 		for (ft = ipf_availfuncs; ft->ipfu_addr != NULL; ft++)
 			if (ft->ipfu_addr == res.ipfu_addr) {
 				(void) strncpy(res.ipfu_name, ft->ipfu_name,
 					       sizeof(res.ipfu_name));
 				res.ipfu_init = ft->ipfu_init;
 				if (COPYOUT(&res, data, sizeof(res)) != 0) {
 					IPFERROR(36);
 					return EFAULT;
 				}
 				return 0;
 			}
 	}
 	IPFERROR(37);
 	return ESRCH;
 }
 
 
 #if !defined(_KERNEL) || (!defined(__NetBSD__) && !defined(__OpenBSD__) && \
      !defined(__FreeBSD__)) || \
     FREEBSD_LT_REV(501000) || NETBSD_LT_REV(105000000) || \
     OPENBSD_LT_REV(200006)
 /*
  * From: NetBSD
  * ppsratecheck(): packets (or events) per second limitation.
  */
 int
 ppsratecheck(lasttime, curpps, maxpps)
 	struct timeval *lasttime;
 	int *curpps;
 	int maxpps;	/* maximum pps allowed */
 {
 	struct timeval tv, delta;
 	int rv;
 
 	GETKTIME(&tv);
 
 	delta.tv_sec = tv.tv_sec - lasttime->tv_sec;
 	delta.tv_usec = tv.tv_usec - lasttime->tv_usec;
 	if (delta.tv_usec < 0) {
 		delta.tv_sec--;
 		delta.tv_usec += 1000000;
 	}
 
 	/*
 	 * check for 0,0 is so that the message will be seen at least once.
 	 * if more than one second have passed since the last update of
 	 * lasttime, reset the counter.
 	 *
 	 * we do increment *curpps even in *curpps < maxpps case, as some may
 	 * try to use *curpps for stat purposes as well.
 	 */
 	if ((lasttime->tv_sec == 0 && lasttime->tv_usec == 0) ||
 	    delta.tv_sec >= 1) {
 		*lasttime = tv;
 		*curpps = 0;
 		rv = 1;
 	} else if (maxpps < 0)
 		rv = 1;
 	else if (*curpps < maxpps)
 		rv = 1;
 	else
 		rv = 0;
 	*curpps = *curpps + 1;
 
 	return (rv);
 }
 #endif
 
 
 /* ------------------------------------------------------------------------ */
 /* Function:    ipf_derefrule                                               */
 /* Returns:     int   - 0 == rule freed up, else rule not freed             */
 /* Parameters:  fr(I) - pointer to filter rule                              */
 /*                                                                          */
 /* Decrement the reference counter to a rule by one.  If it reaches zero,   */
 /* free it and any associated storage space being used by it.               */
 /* ------------------------------------------------------------------------ */
 int
 ipf_derefrule(softc, frp)
 	ipf_main_softc_t *softc;
 	frentry_t **frp;
 {
 	frentry_t *fr;
 	frdest_t *fdp;
 
 	fr = *frp;
 	*frp = NULL;
 
 	MUTEX_ENTER(&fr->fr_lock);
 	fr->fr_ref--;
 	if (fr->fr_ref == 0) {
 		MUTEX_EXIT(&fr->fr_lock);
 		MUTEX_DESTROY(&fr->fr_lock);
 
 		ipf_funcfini(softc, fr);
 
 		fdp = &fr->fr_tif;
 		if (fdp->fd_type == FRD_DSTLIST)
 			ipf_lookup_deref(softc, IPLT_DSTLIST, fdp->fd_ptr);
 
 		fdp = &fr->fr_rif;
 		if (fdp->fd_type == FRD_DSTLIST)
 			ipf_lookup_deref(softc, IPLT_DSTLIST, fdp->fd_ptr);
 
 		fdp = &fr->fr_dif;
 		if (fdp->fd_type == FRD_DSTLIST)
 			ipf_lookup_deref(softc, IPLT_DSTLIST, fdp->fd_ptr);
 
 		if ((fr->fr_type & ~FR_T_BUILTIN) == FR_T_IPF &&
 		    fr->fr_satype == FRI_LOOKUP)
 			ipf_lookup_deref(softc, fr->fr_srctype, fr->fr_srcptr);
 		if ((fr->fr_type & ~FR_T_BUILTIN) == FR_T_IPF &&
 		    fr->fr_datype == FRI_LOOKUP)
 			ipf_lookup_deref(softc, fr->fr_dsttype, fr->fr_dstptr);
 
 		if (fr->fr_grp != NULL)
 			ipf_group_del(softc, fr->fr_grp, fr);
 
 		if (fr->fr_grphead != NULL)
 			ipf_group_del(softc, fr->fr_grphead, fr);
 
 		if (fr->fr_icmpgrp != NULL)
 			ipf_group_del(softc, fr->fr_icmpgrp, fr);
 
 		if ((fr->fr_flags & FR_COPIED) != 0) {
 			if (fr->fr_dsize) {
 				KFREES(fr->fr_data, fr->fr_dsize);
 			}
 			KFREES(fr, fr->fr_size);
 			return 0;
 		}
 		return 1;
 	} else {
 		MUTEX_EXIT(&fr->fr_lock);
 	}
 	return -1;
 }
 
 
 /* ------------------------------------------------------------------------ */
 /* Function:    ipf_grpmapinit                                              */
 /* Returns:     int - 0 == success, else ESRCH because table entry not found*/
 /* Parameters:  fr(I) - pointer to rule to find hash table for              */
 /*                                                                          */
 /* Looks for group hash table fr_arg and stores a pointer to it in fr_ptr.  */
 /* fr_ptr is later used by ipf_srcgrpmap and ipf_dstgrpmap.                 */
 /* ------------------------------------------------------------------------ */
 static int
 ipf_grpmapinit(softc, fr)
 	ipf_main_softc_t *softc;
 	frentry_t *fr;
 {
 	char name[FR_GROUPLEN];
 	iphtable_t *iph;
 
 #if defined(SNPRINTF) && defined(_KERNEL)
 	SNPRINTF(name, sizeof(name), "%d", fr->fr_arg);
 #else
 	(void) sprintf(name, "%d", fr->fr_arg);
 #endif
 	iph = ipf_lookup_find_htable(softc, IPL_LOGIPF, name);
 	if (iph == NULL) {
 		IPFERROR(38);
 		return ESRCH;
 	}
 	if ((iph->iph_flags & FR_INOUT) != (fr->fr_flags & FR_INOUT)) {
 		IPFERROR(39);
 		return ESRCH;
 	}
 	iph->iph_ref++;
 	fr->fr_ptr = iph;
 	return 0;
 }
 
 
 /* ------------------------------------------------------------------------ */
 /* Function:    ipf_grpmapfini                                              */
 /* Returns:     int - 0 == success, else ESRCH because table entry not found*/
 /* Parameters:  softc(I) - pointer to soft context main structure           */
 /*              fr(I)    - pointer to rule to release hash table for        */
 /*                                                                          */
 /* For rules that have had ipf_grpmapinit called, ipf_lookup_deref needs to */
 /* be called to undo what ipf_grpmapinit caused to be done.                 */
 /* ------------------------------------------------------------------------ */
 static int
 ipf_grpmapfini(softc, fr)
 	ipf_main_softc_t *softc;
 	frentry_t *fr;
 {
 	iphtable_t *iph;
 	iph = fr->fr_ptr;
 	if (iph != NULL)
 		ipf_lookup_deref(softc, IPLT_HASH, iph);
 	return 0;
 }
 
 
 /* ------------------------------------------------------------------------ */
 /* Function:    ipf_srcgrpmap                                               */
 /* Returns:     frentry_t * - pointer to "new last matching" rule or NULL   */
 /* Parameters:  fin(I)    - pointer to packet information                   */
 /*              passp(IO) - pointer to current/new filter decision (unused) */
 /*                                                                          */
 /* Look for a rule group head in a hash table, using the source address as  */
 /* the key, and descend into that group and continue matching rules against */
 /* the packet.                                                              */
 /* ------------------------------------------------------------------------ */
 frentry_t *
 ipf_srcgrpmap(fin, passp)
 	fr_info_t *fin;
 	u_32_t *passp;
 {
 	frgroup_t *fg;
 	void *rval;
 
 	rval = ipf_iphmfindgroup(fin->fin_main_soft, fin->fin_fr->fr_ptr,
 				 &fin->fin_src);
 	if (rval == NULL)
 		return NULL;
 
 	fg = rval;
 	fin->fin_fr = fg->fg_start;
 	(void) ipf_scanlist(fin, *passp);
 	return fin->fin_fr;
 }
 
 
 /* ------------------------------------------------------------------------ */
 /* Function:    ipf_dstgrpmap                                               */
 /* Returns:     frentry_t * - pointer to "new last matching" rule or NULL   */
 /* Parameters:  fin(I)    - pointer to packet information                   */
 /*              passp(IO) - pointer to current/new filter decision (unused) */
 /*                                                                          */
 /* Look for a rule group head in a hash table, using the destination        */
 /* address as the key, and descend into that group and continue matching    */
 /* rules against  the packet.                                               */
 /* ------------------------------------------------------------------------ */
 frentry_t *
 ipf_dstgrpmap(fin, passp)
 	fr_info_t *fin;
 	u_32_t *passp;
 {
 	frgroup_t *fg;
 	void *rval;
 
 	rval = ipf_iphmfindgroup(fin->fin_main_soft, fin->fin_fr->fr_ptr,
 				 &fin->fin_dst);
 	if (rval == NULL)
 		return NULL;
 
 	fg = rval;
 	fin->fin_fr = fg->fg_start;
 	(void) ipf_scanlist(fin, *passp);
 	return fin->fin_fr;
 }
 
 /*
  * Queue functions
  * ===============
  * These functions manage objects on queues for efficient timeouts.  There
  * are a number of system defined queues as well as user defined timeouts.
  * It is expected that a lock is held in the domain in which the queue
  * belongs (i.e. either state or NAT) when calling any of these functions
  * that prevents ipf_freetimeoutqueue() from being called at the same time
  * as any other.
  */
 
 
 /* ------------------------------------------------------------------------ */
 /* Function:    ipf_addtimeoutqueue                                         */
 /* Returns:     struct ifqtq * - NULL if malloc fails, else pointer to      */
 /*                               timeout queue with given interval.         */
 /* Parameters:  parent(I)  - pointer to pointer to parent node of this list */
 /*                           of interface queues.                           */
 /*              seconds(I) - timeout value in seconds for this queue.       */
 /*                                                                          */
 /* This routine first looks for a timeout queue that matches the interval   */
 /* being requested.  If it finds one, increments the reference counter and  */
 /* returns a pointer to it.  If none are found, it allocates a new one and  */
 /* inserts it at the top of the list.                                       */
 /*                                                                          */
 /* Locking.                                                                 */
 /* It is assumed that the caller of this function has an appropriate lock   */
 /* held (exclusively) in the domain that encompases 'parent'.               */
 /* ------------------------------------------------------------------------ */
 ipftq_t *
 ipf_addtimeoutqueue(softc, parent, seconds)
 	ipf_main_softc_t *softc;
 	ipftq_t **parent;
 	u_int seconds;
 {
 	ipftq_t *ifq;
 	u_int period;
 
 	period = seconds * IPF_HZ_DIVIDE;
 
 	MUTEX_ENTER(&softc->ipf_timeoutlock);
 	for (ifq = *parent; ifq != NULL; ifq = ifq->ifq_next) {
 		if (ifq->ifq_ttl == period) {
 			/*
 			 * Reset the delete flag, if set, so the structure
 			 * gets reused rather than freed and reallocated.
 			 */
 			MUTEX_ENTER(&ifq->ifq_lock);
 			ifq->ifq_flags &= ~IFQF_DELETE;
 			ifq->ifq_ref++;
 			MUTEX_EXIT(&ifq->ifq_lock);
 			MUTEX_EXIT(&softc->ipf_timeoutlock);
 
 			return ifq;
 		}
 	}
 
 	KMALLOC(ifq, ipftq_t *);
 	if (ifq != NULL) {
 		MUTEX_NUKE(&ifq->ifq_lock);
 		IPFTQ_INIT(ifq, period, "ipftq mutex");
 		ifq->ifq_next = *parent;
 		ifq->ifq_pnext = parent;
 		ifq->ifq_flags = IFQF_USER;
 		ifq->ifq_ref++;
 		*parent = ifq;
 		softc->ipf_userifqs++;
 	}
 	MUTEX_EXIT(&softc->ipf_timeoutlock);
 	return ifq;
 }
 
 
 /* ------------------------------------------------------------------------ */
 /* Function:    ipf_deletetimeoutqueue                                      */
 /* Returns:     int    - new reference count value of the timeout queue     */
 /* Parameters:  ifq(I) - timeout queue which is losing a reference.         */
 /* Locks:       ifq->ifq_lock                                               */
 /*                                                                          */
 /* This routine must be called when we're discarding a pointer to a timeout */
 /* queue object, taking care of the reference counter.                      */
 /*                                                                          */
 /* Now that this just sets a DELETE flag, it requires the expire code to    */
 /* check the list of user defined timeout queues and call the free function */
 /* below (currently commented out) to stop memory leaking.  It is done this */
 /* way because the locking may not be sufficient to safely do a free when   */
 /* this function is called.                                                 */
 /* ------------------------------------------------------------------------ */
 int
 ipf_deletetimeoutqueue(ifq)
 	ipftq_t *ifq;
 {
 
 	ifq->ifq_ref--;
 	if ((ifq->ifq_ref == 0) && ((ifq->ifq_flags & IFQF_USER) != 0)) {
 		ifq->ifq_flags |= IFQF_DELETE;
 	}
 
 	return ifq->ifq_ref;
 }
 
 
 /* ------------------------------------------------------------------------ */
 /* Function:    ipf_freetimeoutqueue                                        */
 /* Parameters:  ifq(I) - timeout queue which is losing a reference.         */
 /* Returns:     Nil                                                         */
 /*                                                                          */
 /* Locking:                                                                 */
 /* It is assumed that the caller of this function has an appropriate lock   */
 /* held (exclusively) in the domain that encompases the callers "domain".   */
 /* The ifq_lock for this structure should not be held.                      */
 /*                                                                          */
 /* Remove a user defined timeout queue from the list of queues it is in and */
 /* tidy up after this is done.                                              */
 /* ------------------------------------------------------------------------ */
 void
 ipf_freetimeoutqueue(softc, ifq)
 	ipf_main_softc_t *softc;
 	ipftq_t *ifq;
 {
 
 	if (((ifq->ifq_flags & IFQF_DELETE) == 0) || (ifq->ifq_ref != 0) ||
 	    ((ifq->ifq_flags & IFQF_USER) == 0)) {
 		printf("ipf_freetimeoutqueue(%lx) flags 0x%x ttl %d ref %d\n",
 		       (u_long)ifq, ifq->ifq_flags, ifq->ifq_ttl,
 		       ifq->ifq_ref);
 		return;
 	}
 
 	/*
 	 * Remove from its position in the list.
 	 */
 	*ifq->ifq_pnext = ifq->ifq_next;
 	if (ifq->ifq_next != NULL)
 		ifq->ifq_next->ifq_pnext = ifq->ifq_pnext;
 	ifq->ifq_next = NULL;
 	ifq->ifq_pnext = NULL;
 
 	MUTEX_DESTROY(&ifq->ifq_lock);
 	ATOMIC_DEC(softc->ipf_userifqs);
 	KFREE(ifq);
 }
 
 
 /* ------------------------------------------------------------------------ */
 /* Function:    ipf_deletequeueentry                                        */
 /* Returns:     Nil                                                         */
 /* Parameters:  tqe(I) - timeout queue entry to delete                      */
 /*                                                                          */
 /* Remove a tail queue entry from its queue and make it an orphan.          */
 /* ipf_deletetimeoutqueue is called to make sure the reference count on the */
 /* queue is correct.  We can't, however, call ipf_freetimeoutqueue because  */
 /* the correct lock(s) may not be held that would make it safe to do so.    */
 /* ------------------------------------------------------------------------ */
 void
 ipf_deletequeueentry(tqe)
 	ipftqent_t *tqe;
 {
 	ipftq_t *ifq;
 
 	ifq = tqe->tqe_ifq;
 
 	MUTEX_ENTER(&ifq->ifq_lock);
 
 	if (tqe->tqe_pnext != NULL) {
 		*tqe->tqe_pnext = tqe->tqe_next;
 		if (tqe->tqe_next != NULL)
 			tqe->tqe_next->tqe_pnext = tqe->tqe_pnext;
 		else    /* we must be the tail anyway */
 			ifq->ifq_tail = tqe->tqe_pnext;
 
 		tqe->tqe_pnext = NULL;
 		tqe->tqe_ifq = NULL;
 	}
 
 	(void) ipf_deletetimeoutqueue(ifq);
 	ASSERT(ifq->ifq_ref > 0);
 
 	MUTEX_EXIT(&ifq->ifq_lock);
 }
 
 
 /* ------------------------------------------------------------------------ */
 /* Function:    ipf_queuefront                                              */
 /* Returns:     Nil                                                         */
 /* Parameters:  tqe(I) - pointer to timeout queue entry                     */
 /*                                                                          */
 /* Move a queue entry to the front of the queue, if it isn't already there. */
 /* ------------------------------------------------------------------------ */
 void
 ipf_queuefront(tqe)
 	ipftqent_t *tqe;
 {
 	ipftq_t *ifq;
 
 	ifq = tqe->tqe_ifq;
 	if (ifq == NULL)
 		return;
 
 	MUTEX_ENTER(&ifq->ifq_lock);
 	if (ifq->ifq_head != tqe) {
 		*tqe->tqe_pnext = tqe->tqe_next;
 		if (tqe->tqe_next)
 			tqe->tqe_next->tqe_pnext = tqe->tqe_pnext;
 		else
 			ifq->ifq_tail = tqe->tqe_pnext;
 
 		tqe->tqe_next = ifq->ifq_head;
 		ifq->ifq_head->tqe_pnext = &tqe->tqe_next;
 		ifq->ifq_head = tqe;
 		tqe->tqe_pnext = &ifq->ifq_head;
 	}
 	MUTEX_EXIT(&ifq->ifq_lock);
 }
 
 
 /* ------------------------------------------------------------------------ */
 /* Function:    ipf_queueback                                               */
 /* Returns:     Nil                                                         */
 /* Parameters:  ticks(I) - ipf tick time to use with this call              */
 /*              tqe(I)   - pointer to timeout queue entry                   */
 /*                                                                          */
 /* Move a queue entry to the back of the queue, if it isn't already there.  */
 /* We use use ticks to calculate the expiration and mark for when we last   */
 /* touched the structure.                                                   */
 /* ------------------------------------------------------------------------ */
 void
 ipf_queueback(ticks, tqe)
 	u_long ticks;
 	ipftqent_t *tqe;
 {
 	ipftq_t *ifq;
 
 	ifq = tqe->tqe_ifq;
 	if (ifq == NULL)
 		return;
 	tqe->tqe_die = ticks + ifq->ifq_ttl;
 	tqe->tqe_touched = ticks;
 
 	MUTEX_ENTER(&ifq->ifq_lock);
 	if (tqe->tqe_next != NULL) {		/* at the end already ? */
 		/*
 		 * Remove from list
 		 */
 		*tqe->tqe_pnext = tqe->tqe_next;
 		tqe->tqe_next->tqe_pnext = tqe->tqe_pnext;
 
 		/*
 		 * Make it the last entry.
 		 */
 		tqe->tqe_next = NULL;
 		tqe->tqe_pnext = ifq->ifq_tail;
 		*ifq->ifq_tail = tqe;
 		ifq->ifq_tail = &tqe->tqe_next;
 	}
 	MUTEX_EXIT(&ifq->ifq_lock);
 }
 
 
 /* ------------------------------------------------------------------------ */
 /* Function:    ipf_queueappend                                             */
 /* Returns:     Nil                                                         */
 /* Parameters:  ticks(I)  - ipf tick time to use with this call             */
 /*              tqe(I)    - pointer to timeout queue entry                  */
 /*              ifq(I)    - pointer to timeout queue                        */
 /*              parent(I) - owing object pointer                            */
 /*                                                                          */
 /* Add a new item to this queue and put it on the very end.                 */
 /* We use use ticks to calculate the expiration and mark for when we last   */
 /* touched the structure.                                                   */
 /* ------------------------------------------------------------------------ */
 void
 ipf_queueappend(ticks, tqe, ifq, parent)
 	u_long ticks;
 	ipftqent_t *tqe;
 	ipftq_t *ifq;
 	void *parent;
 {
 
 	MUTEX_ENTER(&ifq->ifq_lock);
 	tqe->tqe_parent = parent;
 	tqe->tqe_pnext = ifq->ifq_tail;
 	*ifq->ifq_tail = tqe;
 	ifq->ifq_tail = &tqe->tqe_next;
 	tqe->tqe_next = NULL;
 	tqe->tqe_ifq = ifq;
 	tqe->tqe_die = ticks + ifq->ifq_ttl;
 	tqe->tqe_touched = ticks;
 	ifq->ifq_ref++;
 	MUTEX_EXIT(&ifq->ifq_lock);
 }
 
 
 /* ------------------------------------------------------------------------ */
 /* Function:    ipf_movequeue                                               */
 /* Returns:     Nil                                                         */
 /* Parameters:  tq(I)   - pointer to timeout queue information              */
 /*              oifp(I) - old timeout queue entry was on                    */
 /*              nifp(I) - new timeout queue to put entry on                 */
 /*                                                                          */
 /* Move a queue entry from one timeout queue to another timeout queue.      */
 /* If it notices that the current entry is already last and does not need   */
 /* to move queue, the return.                                               */
 /* ------------------------------------------------------------------------ */
 void
 ipf_movequeue(ticks, tqe, oifq, nifq)
 	u_long ticks;
 	ipftqent_t *tqe;
 	ipftq_t *oifq, *nifq;
 {
 
 	/*
 	 * If the queue hasn't changed and we last touched this entry at the
 	 * same ipf time, then we're not going to achieve anything by either
 	 * changing the ttl or moving it on the queue.
 	 */
 	if (oifq == nifq && tqe->tqe_touched == ticks)
 		return;
 
 	/*
 	 * For any of this to be outside the lock, there is a risk that two
 	 * packets entering simultaneously, with one changing to a different
 	 * queue and one not, could end up with things in a bizarre state.
 	 */
 	MUTEX_ENTER(&oifq->ifq_lock);
 
 	tqe->tqe_touched = ticks;
 	tqe->tqe_die = ticks + nifq->ifq_ttl;
 	/*
 	 * Is the operation here going to be a no-op ?
 	 */
 	if (oifq == nifq) {
 		if ((tqe->tqe_next == NULL) ||
 		    (tqe->tqe_next->tqe_die == tqe->tqe_die)) {
 			MUTEX_EXIT(&oifq->ifq_lock);
 			return;
 		}
 	}
 
 	/*
 	 * Remove from the old queue
 	 */
 	*tqe->tqe_pnext = tqe->tqe_next;
 	if (tqe->tqe_next)
 		tqe->tqe_next->tqe_pnext = tqe->tqe_pnext;
 	else
 		oifq->ifq_tail = tqe->tqe_pnext;
 	tqe->tqe_next = NULL;
 
 	/*
 	 * If we're moving from one queue to another, release the
 	 * lock on the old queue and get a lock on the new queue.
 	 * For user defined queues, if we're moving off it, call
 	 * delete in case it can now be freed.
 	 */
 	if (oifq != nifq) {
 		tqe->tqe_ifq = NULL;
 
 		(void) ipf_deletetimeoutqueue(oifq);
 
 		MUTEX_EXIT(&oifq->ifq_lock);
 
 		MUTEX_ENTER(&nifq->ifq_lock);
 
 		tqe->tqe_ifq = nifq;
 		nifq->ifq_ref++;
 	}
 
 	/*
 	 * Add to the bottom of the new queue
 	 */
 	tqe->tqe_pnext = nifq->ifq_tail;
 	*nifq->ifq_tail = tqe;
 	nifq->ifq_tail = &tqe->tqe_next;
 	MUTEX_EXIT(&nifq->ifq_lock);
 }
 
 
 /* ------------------------------------------------------------------------ */
 /* Function:    ipf_updateipid                                              */
 /* Returns:     int - 0 == success, -1 == error (packet should be droppped) */
 /* Parameters:  fin(I) - pointer to packet information                      */
 /*                                                                          */
 /* When we are doing NAT, change the IP of every packet to represent a      */
 /* single sequence of packets coming from the host, hiding any host         */
 /* specific sequencing that might otherwise be revealed.  If the packet is  */
 /* a fragment, then store the 'new' IPid in the fragment cache and look up  */
 /* the fragment cache for non-leading fragments.  If a non-leading fragment */
 /* has no match in the cache, return an error.                              */
 /* ------------------------------------------------------------------------ */
 static int
 ipf_updateipid(fin)
 	fr_info_t *fin;
 {
 	u_short id, ido, sums;
 	u_32_t sumd, sum;
 	ip_t *ip;
 
+	ip = fin->fin_ip;
+	ido = ntohs(ip->ip_id);
 	if (fin->fin_off != 0) {
 		sum = ipf_frag_ipidknown(fin);
 		if (sum == 0xffffffff)
 			return -1;
 		sum &= 0xffff;
 		id = (u_short)sum;
+		ip->ip_id = htons(id);
 	} else {
-		id = ipf_nextipid(fin);
-		if (fin->fin_off == 0 && (fin->fin_flx & FI_FRAG) != 0)
+		ip_fillid(ip);
+		id = ntohs(ip->ip_id);
+		if ((fin->fin_flx & FI_FRAG) != 0)
 			(void) ipf_frag_ipidnew(fin, (u_32_t)id);
 	}
 
-	ip = fin->fin_ip;
-	ido = ntohs(ip->ip_id);
 	if (id == ido)
 		return 0;
-	ip->ip_id = htons(id);
 	CALC_SUMD(ido, id, sumd);	/* DESTRUCTIVE MACRO! id,ido change */
 	sum = (~ntohs(ip->ip_sum)) & 0xffff;
 	sum += sumd;
 	sum = (sum >> 16) + (sum & 0xffff);
 	sum = (sum >> 16) + (sum & 0xffff);
 	sums = ~(u_short)sum;
 	ip->ip_sum = htons(sums);
 	return 0;
 }
 
 
 #ifdef	NEED_FRGETIFNAME
 /* ------------------------------------------------------------------------ */
 /* Function:    ipf_getifname                                               */
 /* Returns:     char *    - pointer to interface name                       */
 /* Parameters:  ifp(I)    - pointer to network interface                    */
 /*              buffer(O) - pointer to where to store interface name        */
 /*                                                                          */
 /* Constructs an interface name in the buffer passed.  The buffer passed is */
 /* expected to be at least LIFNAMSIZ in bytes big.  If buffer is passed in  */
 /* as a NULL pointer then return a pointer to a static array.               */
 /* ------------------------------------------------------------------------ */
 char *
 ipf_getifname(ifp, buffer)
 	struct ifnet *ifp;
 	char *buffer;
 {
 	static char namebuf[LIFNAMSIZ];
 # if defined(MENTAT) || defined(__FreeBSD__) || defined(__osf__) || \
      defined(__sgi) || defined(linux) || defined(_AIX51) || \
      (defined(sun) && !defined(__SVR4) && !defined(__svr4__))
 	int unit, space;
 	char temp[20];
 	char *s;
 # endif
 
 	if (buffer == NULL)
 		buffer = namebuf;
 	(void) strncpy(buffer, ifp->if_name, LIFNAMSIZ);
 	buffer[LIFNAMSIZ - 1] = '\0';
 # if defined(MENTAT) || defined(__FreeBSD__) || defined(__osf__) || \
      defined(__sgi) || defined(_AIX51) || \
      (defined(sun) && !defined(__SVR4) && !defined(__svr4__))
 	for (s = buffer; *s; s++)
 		;
 	unit = ifp->if_unit;
 	space = LIFNAMSIZ - (s - buffer);
 	if ((space > 0) && (unit >= 0)) {
 #  if defined(SNPRINTF) && defined(_KERNEL)
 		SNPRINTF(temp, sizeof(temp), "%d", unit);
 #  else
 		(void) sprintf(temp, "%d", unit);
 #  endif
 		(void) strncpy(s, temp, space);
 	}
 # endif
 	return buffer;
 }
 #endif
 
 
 /* ------------------------------------------------------------------------ */
 /* Function:    ipf_ioctlswitch                                             */
 /* Returns:     int     - -1 continue processing, else ioctl return value   */
 /* Parameters:  unit(I) - device unit opened                                */
 /*              data(I) - pointer to ioctl data                             */
 /*              cmd(I)  - ioctl command                                     */
 /*              mode(I) - mode value                                        */
 /*              uid(I)  - uid making the ioctl call                         */
 /*              ctx(I)  - pointer to context data                           */
 /*                                                                          */
 /* Based on the value of unit, call the appropriate ioctl handler or return */
 /* EIO if ipfilter is not running.   Also checks if write perms are req'd   */
 /* for the device in order to execute the ioctl.  A special case is made    */
 /* SIOCIPFINTERROR so that the same code isn't required in every handler.   */
 /* The context data pointer is passed through as this is used as the key    */
 /* for locating a matching token for continued access for walking lists,    */
 /* etc.                                                                     */
 /* ------------------------------------------------------------------------ */
 int
 ipf_ioctlswitch(softc, unit, data, cmd, mode, uid, ctx)
 	ipf_main_softc_t *softc;
 	int unit, mode, uid;
 	ioctlcmd_t cmd;
 	void *data, *ctx;
 {
 	int error = 0;
 
 	switch (cmd)
 	{
 	case SIOCIPFINTERROR :
 		error = BCOPYOUT(&softc->ipf_interror, data,
 				 sizeof(softc->ipf_interror));
 		if (error != 0) {
 			IPFERROR(40);
 			error = EFAULT;
 		}
 		return error;
 	default :
 		break;
 	}
 
 	switch (unit)
 	{
 	case IPL_LOGIPF :
 		error = ipf_ipf_ioctl(softc, data, cmd, mode, uid, ctx);
 		break;
 	case IPL_LOGNAT :
 		if (softc->ipf_running > 0) {
 			error = ipf_nat_ioctl(softc, data, cmd, mode,
 					      uid, ctx);
 		} else {
 			IPFERROR(42);
 			error = EIO;
 		}
 		break;
 	case IPL_LOGSTATE :
 		if (softc->ipf_running > 0) {
 			error = ipf_state_ioctl(softc, data, cmd, mode,
 						uid, ctx);
 		} else {
 			IPFERROR(43);
 			error = EIO;
 		}
 		break;
 	case IPL_LOGAUTH :
 		if (softc->ipf_running > 0) {
 			error = ipf_auth_ioctl(softc, data, cmd, mode,
 					       uid, ctx);
 		} else {
 			IPFERROR(44);
 			error = EIO;
 		}
 		break;
 	case IPL_LOGSYNC :
 		if (softc->ipf_running > 0) {
 			error = ipf_sync_ioctl(softc, data, cmd, mode,
 					       uid, ctx);
 		} else {
 			error = EIO;
 			IPFERROR(45);
 		}
 		break;
 	case IPL_LOGSCAN :
 #ifdef IPFILTER_SCAN
 		if (softc->ipf_running > 0)
 			error = ipf_scan_ioctl(softc, data, cmd, mode,
 					       uid, ctx);
 		else
 #endif
 		{
 			error = EIO;
 			IPFERROR(46);
 		}
 		break;
 	case IPL_LOGLOOKUP :
 		if (softc->ipf_running > 0) {
 			error = ipf_lookup_ioctl(softc, data, cmd, mode,
 						 uid, ctx);
 		} else {
 			error = EIO;
 			IPFERROR(47);
 		}
 		break;
 	default :
 		IPFERROR(48);
 		error = EIO;
 		break;
 	}
 
 	return error;
 }
 
 
 /*
  * This array defines the expected size of objects coming into the kernel
  * for the various recognised object types. The first column is flags (see
  * below), 2nd column is current size, 3rd column is the version number of
  * when the current size became current.
  * Flags:
  * 1 = minimum size, not absolute size
  */
 static	int	ipf_objbytes[IPFOBJ_COUNT][3] = {
 	{ 1,	sizeof(struct frentry),		5010000 },	/* 0 */
 	{ 1,	sizeof(struct friostat),	5010000 },
 	{ 0,	sizeof(struct fr_info),		5010000 },
 	{ 0,	sizeof(struct ipf_authstat),	4010100 },
 	{ 0,	sizeof(struct ipfrstat),	5010000 },
 	{ 1,	sizeof(struct ipnat),		5010000 },	/* 5 */
 	{ 0,	sizeof(struct natstat),		5010000 },
 	{ 0,	sizeof(struct ipstate_save),	5010000 },
 	{ 1,	sizeof(struct nat_save),	5010000 },
 	{ 0,	sizeof(struct natlookup),	5010000 },
 	{ 1,	sizeof(struct ipstate),		5010000 },	/* 10 */
 	{ 0,	sizeof(struct ips_stat),	5010000 },
 	{ 0,	sizeof(struct frauth),		5010000 },
 	{ 0,	sizeof(struct ipftune),		4010100 },
 	{ 0,	sizeof(struct nat),		5010000 },
 	{ 0,	sizeof(struct ipfruleiter),	4011400 },	/* 15 */
 	{ 0,	sizeof(struct ipfgeniter),	4011400 },
 	{ 0,	sizeof(struct ipftable),	4011400 },
 	{ 0,	sizeof(struct ipflookupiter),	4011400 },
 	{ 0,	sizeof(struct ipftq) * IPF_TCP_NSTATES },
 	{ 1,	0,				0	}, /* IPFEXPR */
 	{ 0,	0,				0	}, /* PROXYCTL */
 	{ 0,	sizeof (struct fripf),		5010000	}
 };
 
 
 /* ------------------------------------------------------------------------ */
 /* Function:    ipf_inobj                                                   */
 /* Returns:     int     - 0 = success, else failure                         */
 /* Parameters:  softc(I) - soft context pointerto work with                 */
 /*              data(I)  - pointer to ioctl data                            */
 /*              objp(O)  - where to store ipfobj structure                  */
 /*              ptr(I)   - pointer to data to copy out                      */
 /*              type(I)  - type of structure being moved                    */
 /*                                                                          */
 /* Copy in the contents of what the ipfobj_t points to.  In future, we      */
 /* add things to check for version numbers, sizes, etc, to make it backward */
 /* compatible at the ABI for user land.                                     */
 /* If objp is not NULL then we assume that the caller wants to see what is  */
 /* in the ipfobj_t structure being copied in. As an example, this can tell  */
 /* the caller what version of ipfilter the ioctl program was written to.    */
 /* ------------------------------------------------------------------------ */
 int
 ipf_inobj(softc, data, objp, ptr, type)
 	ipf_main_softc_t *softc;
 	void *data;
 	ipfobj_t *objp;
 	void *ptr;
 	int type;
 {
 	ipfobj_t obj;
 	int error;
 	int size;
 
 	if ((type < 0) || (type >= IPFOBJ_COUNT)) {
 		IPFERROR(49);
 		return EINVAL;
 	}
 
 	if (objp == NULL)
 		objp = &obj;
 	error = BCOPYIN(data, objp, sizeof(*objp));
 	if (error != 0) {
 		IPFERROR(124);
 		return EFAULT;
 	}
 
 	if (objp->ipfo_type != type) {
 		IPFERROR(50);
 		return EINVAL;
 	}
 
 	if (objp->ipfo_rev >= ipf_objbytes[type][2]) {
 		if ((ipf_objbytes[type][0] & 1) != 0) {
 			if (objp->ipfo_size < ipf_objbytes[type][1]) {
 				IPFERROR(51);
 				return EINVAL;
 			}
 			size =  ipf_objbytes[type][1];
 		} else if (objp->ipfo_size == ipf_objbytes[type][1]) {
 			size =  objp->ipfo_size;
 		} else {
 			IPFERROR(52);
 			return EINVAL;
 		}
 		error = COPYIN(objp->ipfo_ptr, ptr, size);
 		if (error != 0) {
 			IPFERROR(55);
 			error = EFAULT;
 		}
 	} else {
 #ifdef  IPFILTER_COMPAT
 		error = ipf_in_compat(softc, objp, ptr, 0);
 #else
 		IPFERROR(54);
 		error = EINVAL;
 #endif
 	}
 	return error;
 }
 
 
 /* ------------------------------------------------------------------------ */
 /* Function:    ipf_inobjsz                                                 */
 /* Returns:     int     - 0 = success, else failure                         */
 /* Parameters:  softc(I) - soft context pointerto work with                 */
 /*              data(I)  - pointer to ioctl data                            */
 /*              ptr(I)   - pointer to store real data in                    */
 /*              type(I)  - type of structure being moved                    */
 /*              sz(I)    - size of data to copy                             */
 /*                                                                          */
 /* As per ipf_inobj, except the size of the object to copy in is passed in  */
 /* but it must not be smaller than the size defined for the type and the    */
 /* type must allow for varied sized objects.  The extra requirement here is */
 /* that sz must match the size of the object being passed in - this is not  */
 /* not possible nor required in ipf_inobj().                                */
 /* ------------------------------------------------------------------------ */
 int
 ipf_inobjsz(softc, data, ptr, type, sz)
 	ipf_main_softc_t *softc;
 	void *data;
 	void *ptr;
 	int type, sz;
 {
 	ipfobj_t obj;
 	int error;
 
 	if ((type < 0) || (type >= IPFOBJ_COUNT)) {
 		IPFERROR(56);
 		return EINVAL;
 	}
 
 	error = BCOPYIN(data, &obj, sizeof(obj));
 	if (error != 0) {
 		IPFERROR(125);
 		return EFAULT;
 	}
 
 	if (obj.ipfo_type != type) {
 		IPFERROR(58);
 		return EINVAL;
 	}
 
 	if (obj.ipfo_rev >= ipf_objbytes[type][2]) {
 		if (((ipf_objbytes[type][0] & 1) == 0) ||
 		    (sz < ipf_objbytes[type][1])) {
 			IPFERROR(57);
 			return EINVAL;
 		}
 		error = COPYIN(obj.ipfo_ptr, ptr, sz);
 		if (error != 0) {
 			IPFERROR(61);
 			error = EFAULT;
 		}
 	} else {
 #ifdef	IPFILTER_COMPAT
 		error = ipf_in_compat(softc, &obj, ptr, sz);
 #else
 		IPFERROR(60);
 		error = EINVAL;
 #endif
 	}
 	return error;
 }
 
 
 /* ------------------------------------------------------------------------ */
 /* Function:    ipf_outobjsz                                                */
 /* Returns:     int     - 0 = success, else failure                         */
 /* Parameters:  data(I) - pointer to ioctl data                             */
 /*              ptr(I)  - pointer to store real data in                     */
 /*              type(I) - type of structure being moved                     */
 /*              sz(I)   - size of data to copy                              */
 /*                                                                          */
 /* As per ipf_outobj, except the size of the object to copy out is passed in*/
 /* but it must not be smaller than the size defined for the type and the    */
 /* type must allow for varied sized objects.  The extra requirement here is */
 /* that sz must match the size of the object being passed in - this is not  */
 /* not possible nor required in ipf_outobj().                               */
 /* ------------------------------------------------------------------------ */
 int
 ipf_outobjsz(softc, data, ptr, type, sz)
 	ipf_main_softc_t *softc;
 	void *data;
 	void *ptr;
 	int type, sz;
 {
 	ipfobj_t obj;
 	int error;
 
 	if ((type < 0) || (type >= IPFOBJ_COUNT)) {
 		IPFERROR(62);
 		return EINVAL;
 	}
 
 	error = BCOPYIN(data, &obj, sizeof(obj));
 	if (error != 0) {
 		IPFERROR(127);
 		return EFAULT;
 	}
 
 	if (obj.ipfo_type != type) {
 		IPFERROR(63);
 		return EINVAL;
 	}
 
 	if (obj.ipfo_rev >= ipf_objbytes[type][2]) {
 		if (((ipf_objbytes[type][0] & 1) == 0) ||
 		    (sz < ipf_objbytes[type][1])) {
 			IPFERROR(146);
 			return EINVAL;
 		}
 		error = COPYOUT(ptr, obj.ipfo_ptr, sz);
 		if (error != 0) {
 			IPFERROR(66);
 			error = EFAULT;
 		}
 	} else {
 #ifdef	IPFILTER_COMPAT
 		error = ipf_out_compat(softc, &obj, ptr);
 #else
 		IPFERROR(65);
 		error = EINVAL;
 #endif
 	}
 	return error;
 }
 
 
 /* ------------------------------------------------------------------------ */
 /* Function:    ipf_outobj                                                  */
 /* Returns:     int     - 0 = success, else failure                         */
 /* Parameters:  data(I) - pointer to ioctl data                             */
 /*              ptr(I)  - pointer to store real data in                     */
 /*              type(I) - type of structure being moved                     */
 /*                                                                          */
 /* Copy out the contents of what ptr is to where ipfobj points to.  In      */
 /* future, we add things to check for version numbers, sizes, etc, to make  */
 /* it backward  compatible at the ABI for user land.                        */
 /* ------------------------------------------------------------------------ */
 int
 ipf_outobj(softc, data, ptr, type)
 	ipf_main_softc_t *softc;
 	void *data;
 	void *ptr;
 	int type;
 {
 	ipfobj_t obj;
 	int error;
 
 	if ((type < 0) || (type >= IPFOBJ_COUNT)) {
 		IPFERROR(67);
 		return EINVAL;
 	}
 
 	error = BCOPYIN(data, &obj, sizeof(obj));
 	if (error != 0) {
 		IPFERROR(126);
 		return EFAULT;
 	}
 
 	if (obj.ipfo_type != type) {
 		IPFERROR(68);
 		return EINVAL;
 	}
 
 	if (obj.ipfo_rev >= ipf_objbytes[type][2]) {
 		if ((ipf_objbytes[type][0] & 1) != 0) {
 			if (obj.ipfo_size < ipf_objbytes[type][1]) {
 				IPFERROR(69);
 				return EINVAL;
 			}
 		} else if (obj.ipfo_size != ipf_objbytes[type][1]) {
 			IPFERROR(70);
 			return EINVAL;
 		}
 
 		error = COPYOUT(ptr, obj.ipfo_ptr, obj.ipfo_size);
 		if (error != 0) {
 			IPFERROR(73);
 			error = EFAULT;
 		}
 	} else {
 #ifdef	IPFILTER_COMPAT
 		error = ipf_out_compat(softc, &obj, ptr);
 #else
 		IPFERROR(72);
 		error = EINVAL;
 #endif
 	}
 	return error;
 }
 
 
 /* ------------------------------------------------------------------------ */
 /* Function:    ipf_outobjk                                                 */
 /* Returns:     int     - 0 = success, else failure                         */
 /* Parameters:  obj(I)  - pointer to data description structure             */
 /*              ptr(I)  - pointer to kernel data to copy out                */
 /*                                                                          */
 /* In the above functions, the ipfobj_t structure is copied into the kernel,*/
 /* telling ipfilter how to copy out data. In this instance, the ipfobj_t is */
 /* already populated with information and now we just need to use it.       */
 /* There is no need for this function to have a "type" parameter as there   */
 /* is no point in validating information that comes from the kernel with    */
 /* itself.                                                                  */
 /* ------------------------------------------------------------------------ */
 int
 ipf_outobjk(softc, obj, ptr)
 	ipf_main_softc_t *softc;
 	ipfobj_t *obj;
 	void *ptr;
 {
 	int type = obj->ipfo_type;
 	int error;
 
 	if ((type < 0) || (type >= IPFOBJ_COUNT)) {
 		IPFERROR(147);
 		return EINVAL;
 	}
 
 	if (obj->ipfo_rev >= ipf_objbytes[type][2]) {
 		if ((ipf_objbytes[type][0] & 1) != 0) {
 			if (obj->ipfo_size < ipf_objbytes[type][1]) {
 				IPFERROR(148);
 				return EINVAL;
 			}
 
 		} else if (obj->ipfo_size != ipf_objbytes[type][1]) {
 			IPFERROR(149);
 			return EINVAL;
 		}
 
 		error = COPYOUT(ptr, obj->ipfo_ptr, obj->ipfo_size);
 		if (error != 0) {
 			IPFERROR(150);
 			error = EFAULT;
 		}
 	} else {
 #ifdef  IPFILTER_COMPAT
 		error = ipf_out_compat(softc, obj, ptr);
 #else
 		IPFERROR(151);
 		error = EINVAL;
 #endif
 	}
 	return error;
 }
 
 
 /* ------------------------------------------------------------------------ */
 /* Function:    ipf_checkl4sum                                              */
 /* Returns:     int     - 0 = good, -1 = bad, 1 = cannot check              */
 /* Parameters:  fin(I) - pointer to packet information                      */
 /*                                                                          */
 /* If possible, calculate the layer 4 checksum for the packet.  If this is  */
 /* not possible, return without indicating a failure or success but in a    */
 /* way that is ditinguishable. This function should only be called by the   */
 /* ipf_checkv6sum() for each platform.                                      */
 /* ------------------------------------------------------------------------ */
 INLINE int
 ipf_checkl4sum(fin)
 	fr_info_t *fin;
 {
 	u_short sum, hdrsum, *csump;
 	udphdr_t *udp;
 	int dosum;
 
 	/*
 	 * If the TCP packet isn't a fragment, isn't too short and otherwise
 	 * isn't already considered "bad", then validate the checksum.  If
 	 * this check fails then considered the packet to be "bad".
 	 */
 	if ((fin->fin_flx & (FI_FRAG|FI_SHORT|FI_BAD)) != 0)
 		return 1;
 
 	csump = NULL;
 	hdrsum = 0;
 	dosum = 0;
 	sum = 0;
 
 	switch (fin->fin_p)
 	{
 	case IPPROTO_TCP :
 		csump = &((tcphdr_t *)fin->fin_dp)->th_sum;
 		dosum = 1;
 		break;
 
 	case IPPROTO_UDP :
 		udp = fin->fin_dp;
 		if (udp->uh_sum != 0) {
 			csump = &udp->uh_sum;
 			dosum = 1;
 		}
 		break;
 
 #ifdef USE_INET6
 	case IPPROTO_ICMPV6 :
 		csump = &((struct icmp6_hdr *)fin->fin_dp)->icmp6_cksum;
 		dosum = 1;
 		break;
 #endif
 
 	case IPPROTO_ICMP :
 		csump = &((struct icmp *)fin->fin_dp)->icmp_cksum;
 		dosum = 1;
 		break;
 
 	default :
 		return 1;
 		/*NOTREACHED*/
 	}
 
 	if (csump != NULL)
 		hdrsum = *csump;
 
 	if (dosum) {
 		sum = fr_cksum(fin, fin->fin_ip, fin->fin_p, fin->fin_dp);
 	}
 #if !defined(_KERNEL)
 	if (sum == hdrsum) {
 		FR_DEBUG(("checkl4sum: %hx == %hx\n", sum, hdrsum));
 	} else {
 		FR_DEBUG(("checkl4sum: %hx != %hx\n", sum, hdrsum));
 	}
 #endif
 	DT2(l4sums, u_short, hdrsum, u_short, sum);
 	if (hdrsum == sum) {
 		fin->fin_cksum = FI_CK_SUMOK;
 		return 0;
 	}
 	fin->fin_cksum = FI_CK_BAD;
 	return -1;
 }
 
 
 /* ------------------------------------------------------------------------ */
 /* Function:    ipf_ifpfillv4addr                                           */
 /* Returns:     int     - 0 = address update, -1 = address not updated      */
 /* Parameters:  atype(I)   - type of network address update to perform      */
 /*              sin(I)     - pointer to source of address information       */
 /*              mask(I)    - pointer to source of netmask information       */
 /*              inp(I)     - pointer to destination address store           */
 /*              inpmask(I) - pointer to destination netmask store           */
 /*                                                                          */
 /* Given a type of network address update (atype) to perform, copy          */
 /* information from sin/mask into inp/inpmask.  If ipnmask is NULL then no  */
 /* netmask update is performed unless FRI_NETMASKED is passed as atype, in  */
 /* which case the operation fails.  For all values of atype other than      */
 /* FRI_NETMASKED, if inpmask is non-NULL then the mask is set to an all 1s  */
 /* value.                                                                   */
 /* ------------------------------------------------------------------------ */
 int
 ipf_ifpfillv4addr(atype, sin, mask, inp, inpmask)
 	int atype;
 	struct sockaddr_in *sin, *mask;
 	struct in_addr *inp, *inpmask;
 {
 	if (inpmask != NULL && atype != FRI_NETMASKED)
 		inpmask->s_addr = 0xffffffff;
 
 	if (atype == FRI_NETWORK || atype == FRI_NETMASKED) {
 		if (atype == FRI_NETMASKED) {
 			if (inpmask == NULL)
 				return -1;
 			inpmask->s_addr = mask->sin_addr.s_addr;
 		}
 		inp->s_addr = sin->sin_addr.s_addr & mask->sin_addr.s_addr;
 	} else {
 		inp->s_addr = sin->sin_addr.s_addr;
 	}
 	return 0;
 }
 
 
 #ifdef	USE_INET6
 /* ------------------------------------------------------------------------ */
 /* Function:    ipf_ifpfillv6addr                                           */
 /* Returns:     int     - 0 = address update, -1 = address not updated      */
 /* Parameters:  atype(I)   - type of network address update to perform      */
 /*              sin(I)     - pointer to source of address information       */
 /*              mask(I)    - pointer to source of netmask information       */
 /*              inp(I)     - pointer to destination address store           */
 /*              inpmask(I) - pointer to destination netmask store           */
 /*                                                                          */
 /* Given a type of network address update (atype) to perform, copy          */
 /* information from sin/mask into inp/inpmask.  If ipnmask is NULL then no  */
 /* netmask update is performed unless FRI_NETMASKED is passed as atype, in  */
 /* which case the operation fails.  For all values of atype other than      */
 /* FRI_NETMASKED, if inpmask is non-NULL then the mask is set to an all 1s  */
 /* value.                                                                   */
 /* ------------------------------------------------------------------------ */
 int
 ipf_ifpfillv6addr(atype, sin, mask, inp, inpmask)
 	int atype;
 	struct sockaddr_in6 *sin, *mask;
 	i6addr_t *inp, *inpmask;
 {
 	i6addr_t *src, *and;
 
 	src = (i6addr_t *)&sin->sin6_addr;
 	and = (i6addr_t *)&mask->sin6_addr;
 
 	if (inpmask != NULL && atype != FRI_NETMASKED) {
 		inpmask->i6[0] = 0xffffffff;
 		inpmask->i6[1] = 0xffffffff;
 		inpmask->i6[2] = 0xffffffff;
 		inpmask->i6[3] = 0xffffffff;
 	}
 
 	if (atype == FRI_NETWORK || atype == FRI_NETMASKED) {
 		if (atype == FRI_NETMASKED) {
 			if (inpmask == NULL)
 				return -1;
 			inpmask->i6[0] = and->i6[0];
 			inpmask->i6[1] = and->i6[1];
 			inpmask->i6[2] = and->i6[2];
 			inpmask->i6[3] = and->i6[3];
 		}
 
 		inp->i6[0] = src->i6[0] & and->i6[0];
 		inp->i6[1] = src->i6[1] & and->i6[1];
 		inp->i6[2] = src->i6[2] & and->i6[2];
 		inp->i6[3] = src->i6[3] & and->i6[3];
 	} else {
 		inp->i6[0] = src->i6[0];
 		inp->i6[1] = src->i6[1];
 		inp->i6[2] = src->i6[2];
 		inp->i6[3] = src->i6[3];
 	}
 	return 0;
 }
 #endif
 
 
 /* ------------------------------------------------------------------------ */
 /* Function:    ipf_matchtag                                                */
 /* Returns:     0 == mismatch, 1 == match.                                  */
 /* Parameters:  tag1(I) - pointer to first tag to compare                   */
 /*              tag2(I) - pointer to second tag to compare                  */
 /*                                                                          */
 /* Returns true (non-zero) or false(0) if the two tag structures can be     */
 /* considered to be a match or not match, respectively.  The tag is 16      */
 /* bytes long (16 characters) but that is overlayed with 4 32bit ints so    */
 /* compare the ints instead, for speed. tag1 is the master of the           */
 /* comparison.  This function should only be called with both tag1 and tag2 */
 /* as non-NULL pointers.                                                    */
 /* ------------------------------------------------------------------------ */
 int
 ipf_matchtag(tag1, tag2)
 	ipftag_t *tag1, *tag2;
 {
 	if (tag1 == tag2)
 		return 1;
 
 	if ((tag1->ipt_num[0] == 0) && (tag2->ipt_num[0] == 0))
 		return 1;
 
 	if ((tag1->ipt_num[0] == tag2->ipt_num[0]) &&
 	    (tag1->ipt_num[1] == tag2->ipt_num[1]) &&
 	    (tag1->ipt_num[2] == tag2->ipt_num[2]) &&
 	    (tag1->ipt_num[3] == tag2->ipt_num[3]))
 		return 1;
 	return 0;
 }
 
 
 /* ------------------------------------------------------------------------ */
 /* Function:    ipf_coalesce                                                */
 /* Returns:     1 == success, -1 == failure, 0 == no change                 */
 /* Parameters:  fin(I) - pointer to packet information                      */
 /*                                                                          */
 /* Attempt to get all of the packet data into a single, contiguous buffer.  */
 /* If this call returns a failure then the buffers have also been freed.    */
 /* ------------------------------------------------------------------------ */
 int
 ipf_coalesce(fin)
 	fr_info_t *fin;
 {
 
 	if ((fin->fin_flx & FI_COALESCE) != 0)
 		return 1;
 
 	/*
 	 * If the mbuf pointers indicate that there is no mbuf to work with,
 	 * return but do not indicate success or failure.
 	 */
 	if (fin->fin_m == NULL || fin->fin_mp == NULL)
 		return 0;
 
 #if defined(_KERNEL)
 	if (ipf_pullup(fin->fin_m, fin, fin->fin_plen) == NULL) {
 		ipf_main_softc_t *softc = fin->fin_main_soft;
 
 		DT1(frb_coalesce, fr_info_t *, fin);
 		LBUMP(ipf_stats[fin->fin_out].fr_badcoalesces);
 # ifdef MENTAT
 		FREE_MB_T(*fin->fin_mp);
 # endif
 		fin->fin_reason = FRB_COALESCE;
 		*fin->fin_mp = NULL;
 		fin->fin_m = NULL;
 		return -1;
 	}
 #else
 	fin = fin;	/* LINT */
 #endif
 	return 1;
 }
 
 
 /*
  * The following table lists all of the tunable variables that can be
  * accessed via SIOCIPFGET/SIOCIPFSET/SIOCIPFGETNEXt.  The format of each row
  * in the table below is as follows:
  *
  * pointer to value, name of value, minimum, maximum, size of the value's
  *     container, value attribute flags
  *
  * For convienience, IPFT_RDONLY means the value is read-only, IPFT_WRDISABLED
  * means the value can only be written to when IPFilter is loaded but disabled.
  * The obvious implication is if neither of these are set then the value can be
  * changed at any time without harm.
  */
 
 
 /* ------------------------------------------------------------------------ */
 /* Function:    ipf_tune_findbycookie                                       */
 /* Returns:     NULL = search failed, else pointer to tune struct           */
 /* Parameters:  cookie(I) - cookie value to search for amongst tuneables    */
 /*              next(O)   - pointer to place to store the cookie for the    */
 /*                          "next" tuneable, if it is desired.              */
 /*                                                                          */
 /* This function is used to walk through all of the existing tunables with  */
 /* successive calls.  It searches the known tunables for the one which has  */
 /* a matching value for "cookie" - ie its address.  When returning a match, */
 /* the next one to be found may be returned inside next.                    */
 /* ------------------------------------------------------------------------ */
 static ipftuneable_t *
 ipf_tune_findbycookie(ptop, cookie, next)
 	ipftuneable_t **ptop;
 	void *cookie, **next;
 {
 	ipftuneable_t *ta, **tap;
 
 	for (ta = *ptop; ta->ipft_name != NULL; ta++)
 		if (ta == cookie) {
 			if (next != NULL) {
 				/*
 				 * If the next entry in the array has a name
 				 * present, then return a pointer to it for
 				 * where to go next, else return a pointer to
 				 * the dynaminc list as a key to search there
 				 * next.  This facilitates a weak linking of
 				 * the two "lists" together.
 				 */
 				if ((ta + 1)->ipft_name != NULL)
 					*next = ta + 1;
 				else
 					*next = ptop;
 			}
 			return ta;
 		}
 
 	for (tap = ptop; (ta = *tap) != NULL; tap = &ta->ipft_next)
 		if (tap == cookie) {
 			if (next != NULL)
 				*next = &ta->ipft_next;
 			return ta;
 		}
 
 	if (next != NULL)
 		*next = NULL;
 	return NULL;
 }
 
 
 /* ------------------------------------------------------------------------ */
 /* Function:    ipf_tune_findbyname                                         */
 /* Returns:     NULL = search failed, else pointer to tune struct           */
 /* Parameters:  name(I) - name of the tuneable entry to find.               */
 /*                                                                          */
 /* Search the static array of tuneables and the list of dynamic tuneables   */
 /* for an entry with a matching name.  If we can find one, return a pointer */
 /* to the matching structure.                                               */
 /* ------------------------------------------------------------------------ */
 static ipftuneable_t *
 ipf_tune_findbyname(top, name)
 	ipftuneable_t *top;
 	const char *name;
 {
 	ipftuneable_t *ta;
 
 	for (ta = top; ta != NULL; ta = ta->ipft_next)
 		if (!strcmp(ta->ipft_name, name)) {
 			return ta;
 		}
 
 	return NULL;
 }
 
 
 /* ------------------------------------------------------------------------ */
 /* Function:    ipf_tune_add_array                                          */
 /* Returns:     int - 0 == success, else failure                            */
 /* Parameters:  newtune - pointer to new tune array to add to tuneables     */
 /*                                                                          */
 /* Appends tune structures from the array passed in (newtune) to the end of */
 /* the current list of "dynamic" tuneable parameters.                       */
 /* If any entry to be added is already present (by name) then the operation */
 /* is aborted - entries that have been added are removed before returning.  */
 /* An entry with no name (NULL) is used as the indication that the end of   */
 /* the array has been reached.                                              */
 /* ------------------------------------------------------------------------ */
 int
 ipf_tune_add_array(softc, newtune)
 	ipf_main_softc_t *softc;
 	ipftuneable_t *newtune;
 {
 	ipftuneable_t *nt, *dt;
 	int error = 0;
 
 	for (nt = newtune; nt->ipft_name != NULL; nt++) {
 		error = ipf_tune_add(softc, nt);
 		if (error != 0) {
 			for (dt = newtune; dt != nt; dt++) {
 				(void) ipf_tune_del(softc, dt);
 			}
 		}
 	}
 
 	return error;
 }
 
 
 /* ------------------------------------------------------------------------ */
 /* Function:    ipf_tune_array_link                                         */
 /* Returns:     0 == success, -1 == failure                                 */
 /* Parameters:  softc(I) - soft context pointerto work with                 */
 /*              array(I) - pointer to an array of tuneables                 */
 /*                                                                          */
 /* Given an array of tunables (array), append them to the current list of   */
 /* tuneables for this context (softc->ipf_tuners.) To properly prepare the  */
 /* the array for being appended to the list, initialise all of the next     */
 /* pointers so we don't need to walk parts of it with ++ and others with    */
 /* next. The array is expected to have an entry with a NULL name as the     */
 /* terminator. Trying to add an array with no non-NULL names will return as */
 /* a failure.                                                               */
 /* ------------------------------------------------------------------------ */
 int
 ipf_tune_array_link(softc, array)
 	ipf_main_softc_t *softc;
 	ipftuneable_t *array;
 {
 	ipftuneable_t *t, **p;
 
 	t = array;
 	if (t->ipft_name == NULL)
 		return -1;
 
 	for (; t[1].ipft_name != NULL; t++)
 		t[0].ipft_next = &t[1];
 	t->ipft_next = NULL;
 
 	/*
 	 * Since a pointer to the last entry isn't kept, we need to find it
 	 * each time we want to add new variables to the list.
 	 */
 	for (p = &softc->ipf_tuners; (t = *p) != NULL; p = &t->ipft_next)
 		if (t->ipft_name == NULL)
 			break;
 	*p = array;
 
 	return 0;
 }
 
 
 /* ------------------------------------------------------------------------ */
 /* Function:    ipf_tune_array_unlink                                       */
 /* Returns:     0 == success, -1 == failure                                 */
 /* Parameters:  softc(I) - soft context pointerto work with                 */
 /*              array(I) - pointer to an array of tuneables                 */
 /*                                                                          */
 /* ------------------------------------------------------------------------ */
 int
 ipf_tune_array_unlink(softc, array)
 	ipf_main_softc_t *softc;
 	ipftuneable_t *array;
 {
 	ipftuneable_t *t, **p;
 
 	for (p = &softc->ipf_tuners; (t = *p) != NULL; p = &t->ipft_next)
 		if (t == array)
 			break;
 	if (t == NULL)
 		return -1;
 
 	for (; t[1].ipft_name != NULL; t++)
 		;
 
 	*p = t->ipft_next;
 
 	return 0;
 }
 
 
 /* ------------------------------------------------------------------------ */
 /* Function:   ipf_tune_array_copy                                          */
 /* Returns:    NULL = failure, else pointer to new array                    */
 /* Parameters: base(I)     - pointer to structure base                      */
 /*             size(I)     - size of the array at template                  */
 /*             template(I) - original array to copy                         */
 /*                                                                          */
 /* Allocate memory for a new set of tuneable values and copy everything     */
 /* from template into the new region of memory.  The new region is full of  */
 /* uninitialised pointers (ipft_next) so set them up.  Now, ipftp_offset... */
 /*                                                                          */
 /* NOTE: the following assumes that sizeof(long) == sizeof(void *)          */
 /* In the array template, ipftp_offset is the offset (in bytes) of the      */
 /* location of the tuneable value inside the structure pointed to by base.  */
 /* As ipftp_offset is a union over the pointers to the tuneable values, if  */
 /* we add base to the copy's ipftp_offset, copy ends up with a pointer in   */
 /* ipftp_void that points to the stored value.                              */
 /* ------------------------------------------------------------------------ */
 ipftuneable_t *
 ipf_tune_array_copy(base, size, template)
 	void *base;
 	size_t size;
 	ipftuneable_t *template;
 {
 	ipftuneable_t *copy;
 	int i;
 
 
 	KMALLOCS(copy, ipftuneable_t *, size);
 	if (copy == NULL) {
 		return NULL;
 	}
 	bcopy(template, copy, size);
 
 	for (i = 0; copy[i].ipft_name; i++) {
 		copy[i].ipft_una.ipftp_offset += (u_long)base;
 		copy[i].ipft_next = copy + i + 1;
 	}
 
 	return copy;
 }
 
 
 /* ------------------------------------------------------------------------ */
 /* Function:    ipf_tune_add                                                */
 /* Returns:     int - 0 == success, else failure                            */
 /* Parameters:  newtune - pointer to new tune entry to add to tuneables     */
 /*                                                                          */
 /* Appends tune structures from the array passed in (newtune) to the end of */
 /* the current list of "dynamic" tuneable parameters.  Once added, the      */
 /* owner of the object is not expected to ever change "ipft_next".          */
 /* ------------------------------------------------------------------------ */
 int
 ipf_tune_add(softc, newtune)
 	ipf_main_softc_t *softc;
 	ipftuneable_t *newtune;
 {
 	ipftuneable_t *ta, **tap;
 
 	ta = ipf_tune_findbyname(softc->ipf_tuners, newtune->ipft_name);
 	if (ta != NULL) {
 		IPFERROR(74);
 		return EEXIST;
 	}
 
 	for (tap = &softc->ipf_tuners; *tap != NULL; tap = &(*tap)->ipft_next)
 		;
 
 	newtune->ipft_next = NULL;
 	*tap = newtune;
 	return 0;
 }
 
 
 /* ------------------------------------------------------------------------ */
 /* Function:    ipf_tune_del                                                */
 /* Returns:     int - 0 == success, else failure                            */
 /* Parameters:  oldtune - pointer to tune entry to remove from the list of  */
 /*                        current dynamic tuneables                         */
 /*                                                                          */
 /* Search for the tune structure, by pointer, in the list of those that are */
 /* dynamically added at run time.  If found, adjust the list so that this   */
 /* structure is no longer part of it.                                       */
 /* ------------------------------------------------------------------------ */
 int
 ipf_tune_del(softc, oldtune)
 	ipf_main_softc_t *softc;
 	ipftuneable_t *oldtune;
 {
 	ipftuneable_t *ta, **tap;
 	int error = 0;
 
 	for (tap = &softc->ipf_tuners; (ta = *tap) != NULL;
 	     tap = &ta->ipft_next) {
 		if (ta == oldtune) {
 			*tap = oldtune->ipft_next;
 			oldtune->ipft_next = NULL;
 			break;
 		}
 	}
 
 	if (ta == NULL) {
 		error = ESRCH;
 		IPFERROR(75);
 	}
 	return error;
 }
 
 
 /* ------------------------------------------------------------------------ */
 /* Function:    ipf_tune_del_array                                          */
 /* Returns:     int - 0 == success, else failure                            */
 /* Parameters:  oldtune - pointer to tuneables array                        */
 /*                                                                          */
 /* Remove each tuneable entry in the array from the list of "dynamic"       */
 /* tunables.  If one entry should fail to be found, an error will be        */
 /* returned and no further ones removed.                                    */
 /* An entry with a NULL name is used as the indicator of the last entry in  */
 /* the array.                                                               */
 /* ------------------------------------------------------------------------ */
 int
 ipf_tune_del_array(softc, oldtune)
 	ipf_main_softc_t *softc;
 	ipftuneable_t *oldtune;
 {
 	ipftuneable_t *ot;
 	int error = 0;
 
 	for (ot = oldtune; ot->ipft_name != NULL; ot++) {
 		error = ipf_tune_del(softc, ot);
 		if (error != 0)
 			break;
 	}
 
 	return error;
 
 }
 
 
 /* ------------------------------------------------------------------------ */
 /* Function:    ipf_tune                                                    */
 /* Returns:     int - 0 == success, else failure                            */
 /* Parameters:  cmd(I)  - ioctl command number                              */
 /*              data(I) - pointer to ioctl data structure                   */
 /*                                                                          */
 /* Implement handling of SIOCIPFGETNEXT, SIOCIPFGET and SIOCIPFSET.  These  */
 /* three ioctls provide the means to access and control global variables    */
 /* within IPFilter, allowing (for example) timeouts and table sizes to be   */
 /* changed without rebooting, reloading or recompiling.  The initialisation */
 /* and 'destruction' routines of the various components of ipfilter are all */
 /* each responsible for handling their own values being too big.            */
 /* ------------------------------------------------------------------------ */
 int
 ipf_ipftune(softc, cmd, data)
 	ipf_main_softc_t *softc;
 	ioctlcmd_t cmd;
 	void *data;
 {
 	ipftuneable_t *ta;
 	ipftune_t tu;
 	void *cookie;
 	int error;
 
 	error = ipf_inobj(softc, data, NULL, &tu, IPFOBJ_TUNEABLE);
 	if (error != 0)
 		return error;
 
 	tu.ipft_name[sizeof(tu.ipft_name) - 1] = '\0';
 	cookie = tu.ipft_cookie;
 	ta = NULL;
 
 	switch (cmd)
 	{
 	case SIOCIPFGETNEXT :
 		/*
 		 * If cookie is non-NULL, assume it to be a pointer to the last
 		 * entry we looked at, so find it (if possible) and return a
 		 * pointer to the next one after it.  The last entry in the
 		 * the table is a NULL entry, so when we get to it, set cookie
 		 * to NULL and return that, indicating end of list, erstwhile
 		 * if we come in with cookie set to NULL, we are starting anew
 		 * at the front of the list.
 		 */
 		if (cookie != NULL) {
 			ta = ipf_tune_findbycookie(&softc->ipf_tuners,
 						   cookie, &tu.ipft_cookie);
 		} else {
 			ta = softc->ipf_tuners;
 			tu.ipft_cookie = ta + 1;
 		}
 		if (ta != NULL) {
 			/*
 			 * Entry found, but does the data pointed to by that
 			 * row fit in what we can return?
 			 */
 			if (ta->ipft_sz > sizeof(tu.ipft_un)) {
 				IPFERROR(76);
 				return EINVAL;
 			}
 
 			tu.ipft_vlong = 0;
 			if (ta->ipft_sz == sizeof(u_long))
 				tu.ipft_vlong = *ta->ipft_plong;
 			else if (ta->ipft_sz == sizeof(u_int))
 				tu.ipft_vint = *ta->ipft_pint;
 			else if (ta->ipft_sz == sizeof(u_short))
 				tu.ipft_vshort = *ta->ipft_pshort;
 			else if (ta->ipft_sz == sizeof(u_char))
 				tu.ipft_vchar = *ta->ipft_pchar;
 
 			tu.ipft_sz = ta->ipft_sz;
 			tu.ipft_min = ta->ipft_min;
 			tu.ipft_max = ta->ipft_max;
 			tu.ipft_flags = ta->ipft_flags;
 			bcopy(ta->ipft_name, tu.ipft_name,
 			      MIN(sizeof(tu.ipft_name),
 				  strlen(ta->ipft_name) + 1));
 		}
 		error = ipf_outobj(softc, data, &tu, IPFOBJ_TUNEABLE);
 		break;
 
 	case SIOCIPFGET :
 	case SIOCIPFSET :
 		/*
 		 * Search by name or by cookie value for a particular entry
 		 * in the tuning paramter table.
 		 */
 		IPFERROR(77);
 		error = ESRCH;
 		if (cookie != NULL) {
 			ta = ipf_tune_findbycookie(&softc->ipf_tuners,
 						   cookie, NULL);
 			if (ta != NULL)
 				error = 0;
 		} else if (tu.ipft_name[0] != '\0') {
 			ta = ipf_tune_findbyname(softc->ipf_tuners,
 						 tu.ipft_name);
 			if (ta != NULL)
 				error = 0;
 		}
 		if (error != 0)
 			break;
 
 		if (cmd == (ioctlcmd_t)SIOCIPFGET) {
 			/*
 			 * Fetch the tuning parameters for a particular value
 			 */
 			tu.ipft_vlong = 0;
 			if (ta->ipft_sz == sizeof(u_long))
 				tu.ipft_vlong = *ta->ipft_plong;
 			else if (ta->ipft_sz == sizeof(u_int))
 				tu.ipft_vint = *ta->ipft_pint;
 			else if (ta->ipft_sz == sizeof(u_short))
 				tu.ipft_vshort = *ta->ipft_pshort;
 			else if (ta->ipft_sz == sizeof(u_char))
 				tu.ipft_vchar = *ta->ipft_pchar;
 			tu.ipft_cookie = ta;
 			tu.ipft_sz = ta->ipft_sz;
 			tu.ipft_min = ta->ipft_min;
 			tu.ipft_max = ta->ipft_max;
 			tu.ipft_flags = ta->ipft_flags;
 			error = ipf_outobj(softc, data, &tu, IPFOBJ_TUNEABLE);
 
 		} else if (cmd == (ioctlcmd_t)SIOCIPFSET) {
 			/*
 			 * Set an internal parameter.  The hard part here is
 			 * getting the new value safely and correctly out of
 			 * the kernel (given we only know its size, not type.)
 			 */
 			u_long in;
 
 			if (((ta->ipft_flags & IPFT_WRDISABLED) != 0) &&
 			    (softc->ipf_running > 0)) {
 				IPFERROR(78);
 				error = EBUSY;
 				break;
 			}
 
 			in = tu.ipft_vlong;
 			if (in < ta->ipft_min || in > ta->ipft_max) {
 				IPFERROR(79);
 				error = EINVAL;
 				break;
 			}
 
 			if (ta->ipft_func != NULL) {
 				SPL_INT(s);
 
 				SPL_NET(s);
 				error = (*ta->ipft_func)(softc, ta,
 							 &tu.ipft_un);
 				SPL_X(s);
 
 			} else if (ta->ipft_sz == sizeof(u_long)) {
 				tu.ipft_vlong = *ta->ipft_plong;
 				*ta->ipft_plong = in;
 
 			} else if (ta->ipft_sz == sizeof(u_int)) {
 				tu.ipft_vint = *ta->ipft_pint;
 				*ta->ipft_pint = (u_int)(in & 0xffffffff);
 
 			} else if (ta->ipft_sz == sizeof(u_short)) {
 				tu.ipft_vshort = *ta->ipft_pshort;
 				*ta->ipft_pshort = (u_short)(in & 0xffff);
 
 			} else if (ta->ipft_sz == sizeof(u_char)) {
 				tu.ipft_vchar = *ta->ipft_pchar;
 				*ta->ipft_pchar = (u_char)(in & 0xff);
 			}
 			error = ipf_outobj(softc, data, &tu, IPFOBJ_TUNEABLE);
 		}
 		break;
 
 	default :
 		IPFERROR(80);
 		error = EINVAL;
 		break;
 	}
 
 	return error;
 }
 
 
 /* ------------------------------------------------------------------------ */
 /* Function:    ipf_zerostats                                               */
 /* Returns:     int - 0 = success, else failure                             */
 /* Parameters:  data(O) - pointer to pointer for copying data back to       */
 /*                                                                          */
 /* Copies the current statistics out to userspace and then zero's the       */
 /* current ones in the kernel. The lock is only held across the bzero() as  */
 /* the copyout may result in paging (ie network activity.)                  */
 /* ------------------------------------------------------------------------ */
 int
 ipf_zerostats(softc, data)
 	ipf_main_softc_t *softc;
 	caddr_t	data;
 {
 	friostat_t fio;
 	ipfobj_t obj;
 	int error;
 
 	error = ipf_inobj(softc, data, &obj, &fio, IPFOBJ_IPFSTAT);
 	if (error != 0)
 		return error;
 	ipf_getstat(softc, &fio, obj.ipfo_rev);
 	error = ipf_outobj(softc, data, &fio, IPFOBJ_IPFSTAT);
 	if (error != 0)
 		return error;
 
 	WRITE_ENTER(&softc->ipf_mutex);
 	bzero(&softc->ipf_stats, sizeof(softc->ipf_stats));
 	RWLOCK_EXIT(&softc->ipf_mutex);
 
 	return 0;
 }
 
 
 /* ------------------------------------------------------------------------ */
 /* Function:    ipf_resolvedest                                             */
 /* Returns:     Nil                                                         */
 /* Parameters:  softc(I) - pointer to soft context main structure           */
 /*              base(I)  - where strings are stored                         */
 /*              fdp(IO)  - pointer to destination information to resolve    */
 /*              v(I)     - IP protocol version to match                     */
 /*                                                                          */
 /* Looks up an interface name in the frdest structure pointed to by fdp and */
 /* if a matching name can be found for the particular IP protocol version   */
 /* then store the interface pointer in the frdest struct.  If no match is   */
 /* found, then set the interface pointer to be -1 as NULL is considered to  */
 /* indicate there is no information at all in the structure.                */
 /* ------------------------------------------------------------------------ */
 int
 ipf_resolvedest(softc, base, fdp, v)
 	ipf_main_softc_t *softc;
 	char *base;
 	frdest_t *fdp;
 	int v;
 {
 	int errval = 0;
 	void *ifp;
 
 	ifp = NULL;
 
 	if (fdp->fd_name != -1) {
 		if (fdp->fd_type == FRD_DSTLIST) {
 			ifp = ipf_lookup_res_name(softc, IPL_LOGIPF,
 						  IPLT_DSTLIST,
 						  base + fdp->fd_name,
 						  NULL);
 			if (ifp == NULL) {
 				IPFERROR(144);
 				errval = ESRCH;
 			}
 		} else {
 			ifp = GETIFP(base + fdp->fd_name, v);
 			if (ifp == NULL)
 				ifp = (void *)-1;
 		}
 	}
 	fdp->fd_ptr = ifp;
 
 	if ((ifp != NULL) && (ifp != (void *)-1)) {
 		fdp->fd_local = ipf_deliverlocal(softc, v, ifp, &fdp->fd_ip6);
 	}
 
 	return errval;
 }
 
 
 /* ------------------------------------------------------------------------ */
 /* Function:    ipf_resolvenic                                              */
 /* Returns:     void* - NULL = wildcard name, -1 = failed to find NIC, else */
 /*                      pointer to interface structure for NIC              */
 /* Parameters:  softc(I)- pointer to soft context main structure            */
 /*              name(I) - complete interface name                           */
 /*              v(I)    - IP protocol version                               */
 /*                                                                          */
 /* Look for a network interface structure that firstly has a matching name  */
 /* to that passed in and that is also being used for that IP protocol       */
 /* version (necessary on some platforms where there are separate listings   */
 /* for both IPv4 and IPv6 on the same physical NIC.                         */
 /* ------------------------------------------------------------------------ */
 void *
 ipf_resolvenic(softc, name, v)
 	ipf_main_softc_t *softc;
 	char *name;
 	int v;
 {
 	void *nic;
 
 	softc = softc;	/* gcc -Wextra */
 	if (name[0] == '\0')
 		return NULL;
 
 	if ((name[1] == '\0') && ((name[0] == '-') || (name[0] == '*'))) {
 		return NULL;
 	}
 
 	nic = GETIFP(name, v);
 	if (nic == NULL)
 		nic = (void *)-1;
 	return nic;
 }
 
 
 /* ------------------------------------------------------------------------ */
 /* Function:    ipf_token_expire                                            */
 /* Returns:     None.                                                       */
 /* Parameters:  softc(I) - pointer to soft context main structure           */
 /*                                                                          */
 /* This function is run every ipf tick to see if there are any tokens that  */
 /* have been held for too long and need to be freed up.                     */
 /* ------------------------------------------------------------------------ */
 void
 ipf_token_expire(softc)
 	ipf_main_softc_t *softc;
 {
 	ipftoken_t *it;
 
 	WRITE_ENTER(&softc->ipf_tokens);
 	while ((it = softc->ipf_token_head) != NULL) {
 		if (it->ipt_die > softc->ipf_ticks)
 			break;
 
 		ipf_token_deref(softc, it);
 	}
 	RWLOCK_EXIT(&softc->ipf_tokens);
 }
 
 
 /* ------------------------------------------------------------------------ */
 /* Function:    ipf_token_flush                                             */
 /* Returns:     None.                                                       */
 /* Parameters:  softc(I) - pointer to soft context main structure           */
 /*                                                                          */
 /* Loop through all of the existing tokens and call deref to see if they    */
 /* can be freed. Normally a function like this might just loop on           */
 /* ipf_token_head but there is a chance that a token might have a ref count */
 /* of greater than one and in that case the the reference would drop twice  */
 /* by code that is only entitled to drop it once.                           */
 /* ------------------------------------------------------------------------ */
 static void
 ipf_token_flush(softc)
 	ipf_main_softc_t *softc;
 {
 	ipftoken_t *it, *next;
 
 	WRITE_ENTER(&softc->ipf_tokens);
 	for (it = softc->ipf_token_head; it != NULL; it = next) {
 		next = it->ipt_next;
 		(void) ipf_token_deref(softc, it);
 	}
 	RWLOCK_EXIT(&softc->ipf_tokens);
 }
 
 
 /* ------------------------------------------------------------------------ */
 /* Function:    ipf_token_del                                               */
 /* Returns:     int     - 0 = success, else error                           */
 /* Parameters:  softc(I)- pointer to soft context main structure            */
 /*              type(I) - the token type to match                           */
 /*              uid(I)  - uid owning the token                              */
 /*              ptr(I)  - context pointer for the token                     */
 /*                                                                          */
 /* This function looks for a a token in the current list that matches up    */
 /* the fields (type, uid, ptr).  If none is found, ESRCH is returned, else  */
 /* call ipf_token_dewref() to remove it from the list. In the event that    */
 /* the token has a reference held elsewhere, setting ipt_complete to 2      */
 /* enables debugging to distinguish between the two paths that ultimately   */
 /* lead to a token to be deleted.                                           */
 /* ------------------------------------------------------------------------ */
 int
 ipf_token_del(softc, type, uid, ptr)
 	ipf_main_softc_t *softc;
 	int type, uid;
 	void *ptr;
 {
 	ipftoken_t *it;
 	int error;
 
 	IPFERROR(82);
 	error = ESRCH;
 
 	WRITE_ENTER(&softc->ipf_tokens);
 	for (it = softc->ipf_token_head; it != NULL; it = it->ipt_next) {
 		if (ptr == it->ipt_ctx && type == it->ipt_type &&
 		    uid == it->ipt_uid) {
 			it->ipt_complete = 2;
 			ipf_token_deref(softc, it);
 			error = 0;
 			break;
 		}
 	}
 	RWLOCK_EXIT(&softc->ipf_tokens);
 
 	return error;
 }
 
 
 /* ------------------------------------------------------------------------ */
 /* Function:    ipf_token_mark_complete                                     */
 /* Returns:     None.                                                       */
 /* Parameters:  token(I) - pointer to token structure                       */
 /*                                                                          */
 /* Mark a token as being ineligable for being found with ipf_token_find.    */
 /* ------------------------------------------------------------------------ */
 void
 ipf_token_mark_complete(token)
 	ipftoken_t *token;
 {
 	if (token->ipt_complete == 0)
 		token->ipt_complete = 1;
 }
 
 
 /* ------------------------------------------------------------------------ */
 /* Function:    ipf_token_find                                               */
 /* Returns:     ipftoken_t * - NULL if no memory, else pointer to token     */
 /* Parameters:  softc(I)- pointer to soft context main structure            */
 /*              type(I) - the token type to match                           */
 /*              uid(I)  - uid owning the token                              */
 /*              ptr(I)  - context pointer for the token                     */
 /*                                                                          */
 /* This function looks for a live token in the list of current tokens that  */
 /* matches the tuple (type, uid, ptr).  If one cannot be found then one is  */
 /* allocated.  If one is found then it is moved to the top of the list of   */
 /* currently active tokens.                                                 */
 /* ------------------------------------------------------------------------ */
 ipftoken_t *
 ipf_token_find(softc, type, uid, ptr)
 	ipf_main_softc_t *softc;
 	int type, uid;
 	void *ptr;
 {
 	ipftoken_t *it, *new;
 
 	KMALLOC(new, ipftoken_t *);
 	if (new != NULL)
 		bzero((char *)new, sizeof(*new));
 
 	WRITE_ENTER(&softc->ipf_tokens);
 	for (it = softc->ipf_token_head; it != NULL; it = it->ipt_next) {
 		if ((ptr == it->ipt_ctx) && (type == it->ipt_type) &&
 		    (uid == it->ipt_uid) && (it->ipt_complete < 2))
 			break;
 	}
 
 	if (it == NULL) {
 		it = new;
 		new = NULL;
 		if (it == NULL) {
 			RWLOCK_EXIT(&softc->ipf_tokens);
 			return NULL;
 		}
 		it->ipt_ctx = ptr;
 		it->ipt_uid = uid;
 		it->ipt_type = type;
 		it->ipt_ref = 1;
 	} else {
 		if (new != NULL) {
 			KFREE(new);
 			new = NULL;
 		}
 
 		if (it->ipt_complete > 0)
 			it = NULL;
 		else
 			ipf_token_unlink(softc, it);
 	}
 
 	if (it != NULL) {
 		it->ipt_pnext = softc->ipf_token_tail;
 		*softc->ipf_token_tail = it;
 		softc->ipf_token_tail = &it->ipt_next;
 		it->ipt_next = NULL;
 		it->ipt_ref++;
 
 		it->ipt_die = softc->ipf_ticks + 20;
 	}
 
 	RWLOCK_EXIT(&softc->ipf_tokens);
 
 	return it;
 }
 
 
 /* ------------------------------------------------------------------------ */
 /* Function:    ipf_token_unlink                                            */
 /* Returns:     None.                                                       */
 /* Parameters:  softc(I) - pointer to soft context main structure           */
 /*              token(I) - pointer to token structure                       */
 /* Write Locks: ipf_tokens                                                  */
 /*                                                                          */
 /* This function unlinks a token structure from the linked list of tokens   */
 /* that "own" it.  The head pointer never needs to be explicitly adjusted   */
 /* but the tail does due to the linked list implementation.                 */
 /* ------------------------------------------------------------------------ */
 static void
 ipf_token_unlink(softc, token)
 	ipf_main_softc_t *softc;
 	ipftoken_t *token;
 {
 
 	if (softc->ipf_token_tail == &token->ipt_next)
 		softc->ipf_token_tail = token->ipt_pnext;
 
 	*token->ipt_pnext = token->ipt_next;
 	if (token->ipt_next != NULL)
 		token->ipt_next->ipt_pnext = token->ipt_pnext;
 	token->ipt_next = NULL;
 	token->ipt_pnext = NULL;
 }
 
 
 /* ------------------------------------------------------------------------ */
 /* Function:    ipf_token_deref                                             */
 /* Returns:     int      - 0 == token freed, else reference count           */
 /* Parameters:  softc(I) - pointer to soft context main structure           */
 /*              token(I) - pointer to token structure                       */
 /* Write Locks: ipf_tokens                                                  */
 /*                                                                          */
 /* Drop the reference count on the token structure and if it drops to zero, */
 /* call the dereference function for the token type because it is then      */
 /* possible to free the token data structure.                               */
 /* ------------------------------------------------------------------------ */
 int
 ipf_token_deref(softc, token)
 	ipf_main_softc_t *softc;
 	ipftoken_t *token;
 {
 	void *data, **datap;
 
 	ASSERT(token->ipt_ref > 0);
 	token->ipt_ref--;
 	if (token->ipt_ref > 0)
 		return token->ipt_ref;
 
 	data = token->ipt_data;
 	datap = &data;
 
 	if ((data != NULL) && (data != (void *)-1)) {
 		switch (token->ipt_type)
 		{
 		case IPFGENITER_IPF :
 			(void) ipf_derefrule(softc, (frentry_t **)datap);
 			break;
 		case IPFGENITER_IPNAT :
 			WRITE_ENTER(&softc->ipf_nat);
 			ipf_nat_rule_deref(softc, (ipnat_t **)datap);
 			RWLOCK_EXIT(&softc->ipf_nat);
 			break;
 		case IPFGENITER_NAT :
 			ipf_nat_deref(softc, (nat_t **)datap);
 			break;
 		case IPFGENITER_STATE :
 			ipf_state_deref(softc, (ipstate_t **)datap);
 			break;
 		case IPFGENITER_FRAG :
 			ipf_frag_pkt_deref(softc, (ipfr_t **)datap);
 			break;
 		case IPFGENITER_NATFRAG :
 			ipf_frag_nat_deref(softc, (ipfr_t **)datap);
 			break;
 		case IPFGENITER_HOSTMAP :
 			WRITE_ENTER(&softc->ipf_nat);
 			ipf_nat_hostmapdel(softc, (hostmap_t **)datap);
 			RWLOCK_EXIT(&softc->ipf_nat);
 			break;
 		default :
 			ipf_lookup_iterderef(softc, token->ipt_type, data);
 			break;
 		}
 	}
 
 	ipf_token_unlink(softc, token);
 	KFREE(token);
 	return 0;
 }
 
 
 /* ------------------------------------------------------------------------ */
 /* Function:    ipf_nextrule                                                */
 /* Returns:     frentry_t * - NULL == no more rules, else pointer to next   */
 /* Parameters:  softc(I)    - pointer to soft context main structure        */
 /*              fr(I)       - pointer to filter rule                        */
 /*              out(I)      - 1 == out rules, 0 == input rules              */
 /*                                                                          */
 /* Starting with "fr", find the next rule to visit. This includes visiting  */
 /* the list of rule groups if either fr is NULL (empty list) or it is the   */
 /* last rule in the list. When walking rule lists, it is either input or    */
 /* output rules that are returned, never both.                              */
 /* ------------------------------------------------------------------------ */
 static frentry_t *
 ipf_nextrule(softc, active, unit, fr, out)
 	ipf_main_softc_t *softc;
 	int active, unit;
 	frentry_t *fr;
 	int out;
 {
 	frentry_t *next;
 	frgroup_t *fg;
 
 	if (fr != NULL && fr->fr_group != -1) {
 		fg = ipf_findgroup(softc, fr->fr_names + fr->fr_group,
 				   unit, active, NULL);
 		if (fg != NULL)
 			fg = fg->fg_next;
 	} else {
 		fg = softc->ipf_groups[unit][active];
 	}
 
 	while (fg != NULL) {
 		next = fg->fg_start;
 		while (next != NULL) {
 			if (out) {
 				if (next->fr_flags & FR_OUTQUE)
 					return next;
 			} else if (next->fr_flags & FR_INQUE) {
 				return next;
 			}
 			next = next->fr_next;
 		}
 		if (next == NULL)
 			fg = fg->fg_next;
 	}
 
 	return NULL;
 }
 
 /* ------------------------------------------------------------------------ */
 /* Function:    ipf_getnextrule                                             */
 /* Returns:     int - 0 = success, else error                               */
 /* Parameters:  softc(I)- pointer to soft context main structure            */
 /*              t(I)   - pointer to destination information to resolve      */
 /*              ptr(I) - pointer to ipfobj_t to copyin from user space      */
 /*                                                                          */
 /* This function's first job is to bring in the ipfruleiter_t structure via */
 /* the ipfobj_t structure to determine what should be the next rule to      */
 /* return. Once the ipfruleiter_t has been brought in, it then tries to     */
 /* find the 'next rule'.  This may include searching rule group lists or    */
 /* just be as simple as looking at the 'next' field in the rule structure.  */
 /* When we have found the rule to return, increase its reference count and  */
 /* if we used an existing rule to get here, decrease its reference count.   */
 /* ------------------------------------------------------------------------ */
 int
 ipf_getnextrule(softc, t, ptr)
 	ipf_main_softc_t *softc;
 	ipftoken_t *t;
 	void *ptr;
 {
 	frentry_t *fr, *next, zero;
 	ipfruleiter_t it;
 	int error, out;
 	frgroup_t *fg;
 	ipfobj_t obj;
 	int predict;
 	char *dst;
 	int unit;
 
 	if (t == NULL || ptr == NULL) {
 		IPFERROR(84);
 		return EFAULT;
 	}
 
 	error = ipf_inobj(softc, ptr, &obj, &it, IPFOBJ_IPFITER);
 	if (error != 0)
 		return error;
 
 	if ((it.iri_inout < 0) || (it.iri_inout > 3)) {
 		IPFERROR(85);
 		return EINVAL;
 	}
 	if ((it.iri_active != 0) && (it.iri_active != 1)) {
 		IPFERROR(86);
 		return EINVAL;
 	}
 	if (it.iri_nrules == 0) {
 		IPFERROR(87);
 		return ENOSPC;
 	}
 	if (it.iri_rule == NULL) {
 		IPFERROR(88);
 		return EFAULT;
 	}
 
 	fg = NULL;
 	fr = t->ipt_data;
 	if ((it.iri_inout & F_OUT) != 0)
 		out = 1;
 	else
 		out = 0;
 	if ((it.iri_inout & F_ACIN) != 0)
 		unit = IPL_LOGCOUNT;
 	else
 		unit = IPL_LOGIPF;
 
 	READ_ENTER(&softc->ipf_mutex);
 	if (fr == NULL) {
 		if (*it.iri_group == '\0') {
 			if (unit == IPL_LOGCOUNT) {
 				next = softc->ipf_acct[out][it.iri_active];
 			} else {
 				next = softc->ipf_rules[out][it.iri_active];
 			}
 			if (next == NULL)
 				next = ipf_nextrule(softc, it.iri_active,
 						    unit, NULL, out);
 		} else {
 			fg = ipf_findgroup(softc, it.iri_group, unit,
 					   it.iri_active, NULL);
 			if (fg != NULL)
 				next = fg->fg_start;
 			else
 				next = NULL;
 		}
 	} else {
 		next = fr->fr_next;
 		if (next == NULL)
 			next = ipf_nextrule(softc, it.iri_active, unit,
 					    fr, out);
 	}
 
 	if (next != NULL && next->fr_next != NULL)
 		predict = 1;
 	else if (ipf_nextrule(softc, it.iri_active, unit, next, out) != NULL)
 		predict = 1;
 	else
 		predict = 0;
 
 	if (fr != NULL)
 		(void) ipf_derefrule(softc, &fr);
 
 	obj.ipfo_type = IPFOBJ_FRENTRY;
 	dst = (char *)it.iri_rule;
 
 	if (next != NULL) {
 		obj.ipfo_size = next->fr_size;
 		MUTEX_ENTER(&next->fr_lock);
 		next->fr_ref++;
 		MUTEX_EXIT(&next->fr_lock);
 		t->ipt_data = next;
 	} else {
 		obj.ipfo_size = sizeof(frentry_t);
 		bzero(&zero, sizeof(zero));
 		next = &zero;
 		t->ipt_data = NULL;
 	}
 	it.iri_rule = predict ? next : NULL;
 	if (predict == 0)
 		ipf_token_mark_complete(t);
 
 	RWLOCK_EXIT(&softc->ipf_mutex);
 
 	obj.ipfo_ptr = dst;
 	error = ipf_outobjk(softc, &obj, next);
 	if (error == 0 && t->ipt_data != NULL) {
 		dst += obj.ipfo_size;
 		if (next->fr_data != NULL) {
 			ipfobj_t dobj;
 
 			if (next->fr_type == FR_T_IPFEXPR)
 				dobj.ipfo_type = IPFOBJ_IPFEXPR;
 			else
 				dobj.ipfo_type = IPFOBJ_FRIPF;
 			dobj.ipfo_size = next->fr_dsize;
 			dobj.ipfo_rev = obj.ipfo_rev;
 			dobj.ipfo_ptr = dst;
 			error = ipf_outobjk(softc, &dobj, next->fr_data);
 		}
 	}
 
 	if ((fr != NULL) && (next == &zero))
 		(void) ipf_derefrule(softc, &fr);
 
 	return error;
 }
 
 
 /* ------------------------------------------------------------------------ */
 /* Function:    ipf_frruleiter                                              */
 /* Returns:     int - 0 = success, else error                               */
 /* Parameters:  softc(I)- pointer to soft context main structure            */
 /*              data(I) - the token type to match                           */
 /*              uid(I)  - uid owning the token                              */
 /*              ptr(I)  - context pointer for the token                     */
 /*                                                                          */
 /* This function serves as a stepping stone between ipf_ipf_ioctl and       */
 /* ipf_getnextrule.  It's role is to find the right token in the kernel for */
 /* the process doing the ioctl and use that to ask for the next rule.       */
 /* ------------------------------------------------------------------------ */
 static int
 ipf_frruleiter(softc, data, uid, ctx)
 	ipf_main_softc_t *softc;
 	void *data, *ctx;
 	int uid;
 {
 	ipftoken_t *token;
 	ipfruleiter_t it;
 	ipfobj_t obj;
 	int error;
 
 	token = ipf_token_find(softc, IPFGENITER_IPF, uid, ctx);
 	if (token != NULL) {
 		error = ipf_getnextrule(softc, token, data);
 		WRITE_ENTER(&softc->ipf_tokens);
 		ipf_token_deref(softc, token);
 		RWLOCK_EXIT(&softc->ipf_tokens);
 	} else {
 		error = ipf_inobj(softc, data, &obj, &it, IPFOBJ_IPFITER);
 		if (error != 0)
 			return error;
 		it.iri_rule = NULL;
 		error = ipf_outobj(softc, data, &it, IPFOBJ_IPFITER);
 	}
 
 	return error;
 }
 
 
 /* ------------------------------------------------------------------------ */
 /* Function:    ipf_geniter                                                 */
 /* Returns:     int - 0 = success, else error                               */
 /* Parameters:  softc(I) - pointer to soft context main structure           */
 /*              token(I) - pointer to ipftoken_t structure                  */
 /*              itp(I)   - pointer to iterator data                         */
 /*                                                                          */
 /* Decide which iterator function to call using information passed through  */
 /* the ipfgeniter_t structure at itp.                                       */
 /* ------------------------------------------------------------------------ */
 static int
 ipf_geniter(softc, token, itp)
 	ipf_main_softc_t *softc;
 	ipftoken_t *token;
 	ipfgeniter_t *itp;
 {
 	int error;
 
 	switch (itp->igi_type)
 	{
 	case IPFGENITER_FRAG :
 		error = ipf_frag_pkt_next(softc, token, itp);
 		break;
 	default :
 		IPFERROR(92);
 		error = EINVAL;
 		break;
 	}
 
 	return error;
 }
 
 
 /* ------------------------------------------------------------------------ */
 /* Function:    ipf_genericiter                                             */
 /* Returns:     int - 0 = success, else error                               */
 /* Parameters:  softc(I)- pointer to soft context main structure            */
 /*              data(I) - the token type to match                           */
 /*              uid(I)  - uid owning the token                              */
 /*              ptr(I)  - context pointer for the token                     */
 /*                                                                          */
 /* Handle the SIOCGENITER ioctl for the ipfilter device. The primary role   */
 /* ------------------------------------------------------------------------ */
 int
 ipf_genericiter(softc, data, uid, ctx)
 	ipf_main_softc_t *softc;
 	void *data, *ctx;
 	int uid;
 {
 	ipftoken_t *token;
 	ipfgeniter_t iter;
 	int error;
 
 	error = ipf_inobj(softc, data, NULL, &iter, IPFOBJ_GENITER);
 	if (error != 0)
 		return error;
 
 	token = ipf_token_find(softc, iter.igi_type, uid, ctx);
 	if (token != NULL) {
 		token->ipt_subtype = iter.igi_type;
 		error = ipf_geniter(softc, token, &iter);
 		WRITE_ENTER(&softc->ipf_tokens);
 		ipf_token_deref(softc, token);
 		RWLOCK_EXIT(&softc->ipf_tokens);
 	} else {
 		IPFERROR(93);
 		error = 0;
 	}
 
 	return error;
 }
 
 
 /* ------------------------------------------------------------------------ */
 /* Function:    ipf_ipf_ioctl                                               */
 /* Returns:     int - 0 = success, else error                               */
 /* Parameters:  softc(I)- pointer to soft context main structure           */
 /*              data(I) - the token type to match                           */
 /*              cmd(I)  - the ioctl command number                          */
 /*              mode(I) - mode flags for the ioctl                          */
 /*              uid(I)  - uid owning the token                              */
 /*              ptr(I)  - context pointer for the token                     */
 /*                                                                          */
 /* This function handles all of the ioctl command that are actually isssued */
 /* to the /dev/ipl device.                                                  */
 /* ------------------------------------------------------------------------ */
 int
 ipf_ipf_ioctl(softc, data, cmd, mode, uid, ctx)
 	ipf_main_softc_t *softc;
 	caddr_t data;
 	ioctlcmd_t cmd;
 	int mode, uid;
 	void *ctx;
 {
 	friostat_t fio;
 	int error, tmp;
 	ipfobj_t obj;
 	SPL_INT(s);
 
 	switch (cmd)
 	{
 	case SIOCFRENB :
 		if (!(mode & FWRITE)) {
 			IPFERROR(94);
 			error = EPERM;
 		} else {
 			error = BCOPYIN(data, &tmp, sizeof(tmp));
 			if (error != 0) {
 				IPFERROR(95);
 				error = EFAULT;
 				break;
 			}
 
 			WRITE_ENTER(&softc->ipf_global);
 			if (tmp) {
 				if (softc->ipf_running > 0)
 					error = 0;
 				else
 					error = ipfattach(softc);
 				if (error == 0)
 					softc->ipf_running = 1;
 				else
 					(void) ipfdetach(softc);
 			} else {
 				if (softc->ipf_running == 1)
 					error = ipfdetach(softc);
 				else
 					error = 0;
 				if (error == 0)
 					softc->ipf_running = -1;
 			}
 			RWLOCK_EXIT(&softc->ipf_global);
 		}
 		break;
 
 	case SIOCIPFSET :
 		if (!(mode & FWRITE)) {
 			IPFERROR(96);
 			error = EPERM;
 			break;
 		}
 		/* FALLTHRU */
 	case SIOCIPFGETNEXT :
 	case SIOCIPFGET :
 		error = ipf_ipftune(softc, cmd, (void *)data);
 		break;
 
 	case SIOCSETFF :
 		if (!(mode & FWRITE)) {
 			IPFERROR(97);
 			error = EPERM;
 		} else {
 			error = BCOPYIN(data, &softc->ipf_flags,
 					sizeof(softc->ipf_flags));
 			if (error != 0) {
 				IPFERROR(98);
 				error = EFAULT;
 			}
 		}
 		break;
 
 	case SIOCGETFF :
 		error = BCOPYOUT(&softc->ipf_flags, data,
 				 sizeof(softc->ipf_flags));
 		if (error != 0) {
 			IPFERROR(99);
 			error = EFAULT;
 		}
 		break;
 
 	case SIOCFUNCL :
 		error = ipf_resolvefunc(softc, (void *)data);
 		break;
 
 	case SIOCINAFR :
 	case SIOCRMAFR :
 	case SIOCADAFR :
 	case SIOCZRLST :
 		if (!(mode & FWRITE)) {
 			IPFERROR(100);
 			error = EPERM;
 		} else {
 			error = frrequest(softc, IPL_LOGIPF, cmd, (caddr_t)data,
 					  softc->ipf_active, 1);
 		}
 		break;
 
 	case SIOCINIFR :
 	case SIOCRMIFR :
 	case SIOCADIFR :
 		if (!(mode & FWRITE)) {
 			IPFERROR(101);
 			error = EPERM;
 		} else {
 			error = frrequest(softc, IPL_LOGIPF, cmd, (caddr_t)data,
 					  1 - softc->ipf_active, 1);
 		}
 		break;
 
 	case SIOCSWAPA :
 		if (!(mode & FWRITE)) {
 			IPFERROR(102);
 			error = EPERM;
 		} else {
 			WRITE_ENTER(&softc->ipf_mutex);
 			error = BCOPYOUT(&softc->ipf_active, data,
 					 sizeof(softc->ipf_active));
 			if (error != 0) {
 				IPFERROR(103);
 				error = EFAULT;
 			} else {
 				softc->ipf_active = 1 - softc->ipf_active;
 			}
 			RWLOCK_EXIT(&softc->ipf_mutex);
 		}
 		break;
 
 	case SIOCGETFS :
 		error = ipf_inobj(softc, (void *)data, &obj, &fio,
 				  IPFOBJ_IPFSTAT);
 		if (error != 0)
 			break;
 		ipf_getstat(softc, &fio, obj.ipfo_rev);
 		error = ipf_outobj(softc, (void *)data, &fio, IPFOBJ_IPFSTAT);
 		break;
 
 	case SIOCFRZST :
 		if (!(mode & FWRITE)) {
 			IPFERROR(104);
 			error = EPERM;
 		} else
 			error = ipf_zerostats(softc, (caddr_t)data);
 		break;
 
 	case SIOCIPFFL :
 		if (!(mode & FWRITE)) {
 			IPFERROR(105);
 			error = EPERM;
 		} else {
 			error = BCOPYIN(data, &tmp, sizeof(tmp));
 			if (!error) {
 				tmp = ipf_flush(softc, IPL_LOGIPF, tmp);
 				error = BCOPYOUT(&tmp, data, sizeof(tmp));
 				if (error != 0) {
 					IPFERROR(106);
 					error = EFAULT;
 				}
 			} else {
 				IPFERROR(107);
 				error = EFAULT;
 			}
 		}
 		break;
 
 #ifdef USE_INET6
 	case SIOCIPFL6 :
 		if (!(mode & FWRITE)) {
 			IPFERROR(108);
 			error = EPERM;
 		} else {
 			error = BCOPYIN(data, &tmp, sizeof(tmp));
 			if (!error) {
 				tmp = ipf_flush(softc, IPL_LOGIPF, tmp);
 				error = BCOPYOUT(&tmp, data, sizeof(tmp));
 				if (error != 0) {
 					IPFERROR(109);
 					error = EFAULT;
 				}
 			} else {
 				IPFERROR(110);
 				error = EFAULT;
 			}
 		}
 		break;
 #endif
 
 	case SIOCSTLCK :
 		if (!(mode & FWRITE)) {
 			IPFERROR(122);
 			error = EPERM;
 		} else {
 			error = BCOPYIN(data, &tmp, sizeof(tmp));
 			if (error == 0) {
 				ipf_state_setlock(softc->ipf_state_soft, tmp);
 				ipf_nat_setlock(softc->ipf_nat_soft, tmp);
 				ipf_frag_setlock(softc->ipf_frag_soft, tmp);
 				ipf_auth_setlock(softc->ipf_auth_soft, tmp);
 			} else {
 				IPFERROR(111);
 				error = EFAULT;
 			}
 		}
 		break;
 
 #ifdef	IPFILTER_LOG
 	case SIOCIPFFB :
 		if (!(mode & FWRITE)) {
 			IPFERROR(112);
 			error = EPERM;
 		} else {
 			tmp = ipf_log_clear(softc, IPL_LOGIPF);
 			error = BCOPYOUT(&tmp, data, sizeof(tmp));
 			if (error) {
 				IPFERROR(113);
 				error = EFAULT;
 			}
 		}
 		break;
 #endif /* IPFILTER_LOG */
 
 	case SIOCFRSYN :
 		if (!(mode & FWRITE)) {
 			IPFERROR(114);
 			error = EPERM;
 		} else {
 			WRITE_ENTER(&softc->ipf_global);
 #if (defined(MENTAT) && defined(_KERNEL)) && !defined(INSTANCES)
 			error = ipfsync();
 #else
 			ipf_sync(softc, NULL);
 			error = 0;
 #endif
 			RWLOCK_EXIT(&softc->ipf_global);
 
 		}
 		break;
 
 	case SIOCGFRST :
 		error = ipf_outobj(softc, (void *)data,
 				   ipf_frag_stats(softc->ipf_frag_soft),
 				   IPFOBJ_FRAGSTAT);
 		break;
 
 #ifdef	IPFILTER_LOG
 	case FIONREAD :
 		tmp = ipf_log_bytesused(softc, IPL_LOGIPF);
 		error = BCOPYOUT(&tmp, data, sizeof(tmp));
 		break;
 #endif
 
 	case SIOCIPFITER :
 		SPL_SCHED(s);
 		error = ipf_frruleiter(softc, data, uid, ctx);
 		SPL_X(s);
 		break;
 
 	case SIOCGENITER :
 		SPL_SCHED(s);
 		error = ipf_genericiter(softc, data, uid, ctx);
 		SPL_X(s);
 		break;
 
 	case SIOCIPFDELTOK :
 		error = BCOPYIN(data, &tmp, sizeof(tmp));
 		if (error == 0) {
 			SPL_SCHED(s);
 			error = ipf_token_del(softc, tmp, uid, ctx);
 			SPL_X(s);
 		}
 		break;
 
 	default :
 		IPFERROR(115);
 		error = EINVAL;
 		break;
 	}
 
 	return error;
 }
 
 
 /* ------------------------------------------------------------------------ */
 /* Function:    ipf_decaps                                                  */
 /* Returns:     int        - -1 == decapsulation failed, else bit mask of   */
 /*                           flags indicating packet filtering decision.    */
 /* Parameters:  fin(I)     - pointer to packet information                  */
 /*              pass(I)    - IP protocol version to match                   */
 /*              l5proto(I) - layer 5 protocol to decode UDP data as.        */
 /*                                                                          */
 /* This function is called for packets that are wrapt up in other packets,  */
 /* for example, an IP packet that is the entire data segment for another IP */
 /* packet.  If the basic constraints for this are satisfied, change the     */
 /* buffer to point to the start of the inner packet and start processing    */
 /* rules belonging to the head group this rule specifies.                   */
 /* ------------------------------------------------------------------------ */
 u_32_t
 ipf_decaps(fin, pass, l5proto)
 	fr_info_t *fin;
 	u_32_t pass;
 	int l5proto;
 {
 	fr_info_t fin2, *fino = NULL;
 	int elen, hlen, nh;
 	grehdr_t gre;
 	ip_t *ip;
 	mb_t *m;
 
 	if ((fin->fin_flx & FI_COALESCE) == 0)
 		if (ipf_coalesce(fin) == -1)
 			goto cantdecaps;
 
 	m = fin->fin_m;
 	hlen = fin->fin_hlen;
 
 	switch (fin->fin_p)
 	{
 	case IPPROTO_UDP :
 		/*
 		 * In this case, the specific protocol being decapsulated
 		 * inside UDP frames comes from the rule.
 		 */
 		nh = fin->fin_fr->fr_icode;
 		break;
 
 	case IPPROTO_GRE :	/* 47 */
 		bcopy(fin->fin_dp, (char *)&gre, sizeof(gre));
 		hlen += sizeof(grehdr_t);
 		if (gre.gr_R|gre.gr_s)
 			goto cantdecaps;
 		if (gre.gr_C)
 			hlen += 4;
 		if (gre.gr_K)
 			hlen += 4;
 		if (gre.gr_S)
 			hlen += 4;
 
 		nh = IPPROTO_IP;
 
 		/*
 		 * If the routing options flag is set, validate that it is
 		 * there and bounce over it.
 		 */
 #if 0
 		/* This is really heavy weight and lots of room for error, */
 		/* so for now, put it off and get the simple stuff right.  */
 		if (gre.gr_R) {
 			u_char off, len, *s;
 			u_short af;
 			int end;
 
 			end = 0;
 			s = fin->fin_dp;
 			s += hlen;
 			aplen = fin->fin_plen - hlen;
 			while (aplen > 3) {
 				af = (s[0] << 8) | s[1];
 				off = s[2];
 				len = s[3];
 				aplen -= 4;
 				s += 4;
 				if (af == 0 && len == 0) {
 					end = 1;
 					break;
 				}
 				if (aplen < len)
 					break;
 				s += len;
 				aplen -= len;
 			}
 			if (end != 1)
 				goto cantdecaps;
 			hlen = s - (u_char *)fin->fin_dp;
 		}
 #endif
 		break;
 
 #ifdef IPPROTO_IPIP
 	case IPPROTO_IPIP :	/* 4 */
 #endif
 		nh = IPPROTO_IP;
 		break;
 
 	default :	/* Includes ESP, AH is special for IPv4 */
 		goto cantdecaps;
 	}
 
 	switch (nh)
 	{
 	case IPPROTO_IP :
 	case IPPROTO_IPV6 :
 		break;
 	default :
 		goto cantdecaps;
 	}
 
 	bcopy((char *)fin, (char *)&fin2, sizeof(fin2));
 	fino = fin;
 	fin = &fin2;
 	elen = hlen;
 #if defined(MENTAT) && defined(_KERNEL)
 	m->b_rptr += elen;
 #else
 	m->m_data += elen;
 	m->m_len -= elen;
 #endif
 	fin->fin_plen -= elen;
 
 	ip = (ip_t *)((char *)fin->fin_ip + elen);
 
 	/*
 	 * Make sure we have at least enough data for the network layer
 	 * header.
 	 */
 	if (IP_V(ip) == 4)
 		hlen = IP_HL(ip) << 2;
 #ifdef USE_INET6
 	else if (IP_V(ip) == 6)
 		hlen = sizeof(ip6_t);
 #endif
 	else
 		goto cantdecaps2;
 
 	if (fin->fin_plen < hlen)
 		goto cantdecaps2;
 
 	fin->fin_dp = (char *)ip + hlen;
 
 	if (IP_V(ip) == 4) {
 		/*
 		 * Perform IPv4 header checksum validation.
 		 */
 		if (ipf_cksum((u_short *)ip, hlen))
 			goto cantdecaps2;
 	}
 
 	if (ipf_makefrip(hlen, ip, fin) == -1) {
 cantdecaps2:
 		if (m != NULL) {
 #if defined(MENTAT) && defined(_KERNEL)
 			m->b_rptr -= elen;
 #else
 			m->m_data -= elen;
 			m->m_len += elen;
 #endif
 		}
 cantdecaps:
 		DT1(frb_decapfrip, fr_info_t *, fin);
 		pass &= ~FR_CMDMASK;
 		pass |= FR_BLOCK|FR_QUICK;
 		fin->fin_reason = FRB_DECAPFRIP;
 		return -1;
 	}
 
 	pass = ipf_scanlist(fin, pass);
 
 	/*
 	 * Copy the packet filter "result" fields out of the fr_info_t struct
 	 * that is local to the decapsulation processing and back into the
 	 * one we were called with.
 	 */
 	fino->fin_flx = fin->fin_flx;
 	fino->fin_rev = fin->fin_rev;
 	fino->fin_icode = fin->fin_icode;
 	fino->fin_rule = fin->fin_rule;
 	(void) strncpy(fino->fin_group, fin->fin_group, FR_GROUPLEN);
 	fino->fin_fr = fin->fin_fr;
 	fino->fin_error = fin->fin_error;
 	fino->fin_mp = fin->fin_mp;
 	fino->fin_m = fin->fin_m;
 	m = fin->fin_m;
 	if (m != NULL) {
 #if defined(MENTAT) && defined(_KERNEL)
 		m->b_rptr -= elen;
 #else
 		m->m_data -= elen;
 		m->m_len += elen;
 #endif
 	}
 	return pass;
 }
 
 
 /* ------------------------------------------------------------------------ */
 /* Function:    ipf_matcharray_load                                         */
 /* Returns:     int         - 0 = success, else error                       */
 /* Parameters:  softc(I)    - pointer to soft context main structure        */
 /*              data(I)     - pointer to ioctl data                         */
 /*              objp(I)     - ipfobj_t structure to load data into          */
 /*              arrayptr(I) - pointer to location to store array pointer    */
 /*                                                                          */
 /* This function loads in a mathing array through the ipfobj_t struct that  */
 /* describes it.  Sanity checking and array size limitations are enforced   */
 /* in this function to prevent userspace from trying to load in something   */
 /* that is insanely big.  Once the size of the array is known, the memory   */
 /* required is malloc'd and returned through changing *arrayptr.  The       */
 /* contents of the array are verified before returning.  Only in the event  */
 /* of a successful call is the caller required to free up the malloc area.  */
 /* ------------------------------------------------------------------------ */
 int
 ipf_matcharray_load(softc, data, objp, arrayptr)
 	ipf_main_softc_t *softc;
 	caddr_t data;
 	ipfobj_t *objp;
 	int **arrayptr;
 {
 	int arraysize, *array, error;
 
 	*arrayptr = NULL;
 
 	error = BCOPYIN(data, objp, sizeof(*objp));
 	if (error != 0) {
 		IPFERROR(116);
 		return EFAULT;
 	}
 
 	if (objp->ipfo_type != IPFOBJ_IPFEXPR) {
 		IPFERROR(117);
 		return EINVAL;
 	}
 
 	if (((objp->ipfo_size & 3) != 0) || (objp->ipfo_size == 0) ||
 	    (objp->ipfo_size > 1024)) {
 		IPFERROR(118);
 		return EINVAL;
 	}
 
 	arraysize = objp->ipfo_size * sizeof(*array);
 	KMALLOCS(array, int *, arraysize);
 	if (array == NULL) {
 		IPFERROR(119);
 		return ENOMEM;
 	}
 
 	error = COPYIN(objp->ipfo_ptr, array, arraysize);
 	if (error != 0) {
 		KFREES(array, arraysize);
 		IPFERROR(120);
 		return EFAULT;
 	}
 
 	if (ipf_matcharray_verify(array, arraysize) != 0) {
 		KFREES(array, arraysize);
 		IPFERROR(121);
 		return EINVAL;
 	}
 
 	*arrayptr = array;
 	return 0;
 }
 
 
 /* ------------------------------------------------------------------------ */
 /* Function:    ipf_matcharray_verify                                       */
 /* Returns:     Nil                                                         */
 /* Parameters:  array(I)     - pointer to matching array                    */
 /*              arraysize(I) - number of elements in the array              */
 /*                                                                          */
 /* Verify the contents of a matching array by stepping through each element */
 /* in it.  The actual commands in the array are not verified for            */
 /* correctness, only that all of the sizes are correctly within limits.     */
 /* ------------------------------------------------------------------------ */
 int
 ipf_matcharray_verify(array, arraysize)
 	int *array, arraysize;
 {
 	int i, nelem, maxidx;
 	ipfexp_t *e;
 
 	nelem = arraysize / sizeof(*array);
 
 	/*
 	 * Currently, it makes no sense to have an array less than 6
 	 * elements long - the initial size at the from, a single operation
 	 * (minimum 4 in length) and a trailer, for a total of 6.
 	 */
 	if ((array[0] < 6) || (arraysize < 24) || (arraysize > 4096)) {
 		return -1;
 	}
 
 	/*
 	 * Verify the size of data pointed to by array with how long
 	 * the array claims to be itself.
 	 */
 	if (array[0] * sizeof(*array) != arraysize) {
 		return -1;
 	}
 
 	maxidx = nelem - 1;
 	/*
 	 * The last opcode in this array should be an IPF_EXP_END.
 	 */
 	if (array[maxidx] != IPF_EXP_END) {
 		return -1;
 	}
 
 	for (i = 1; i < maxidx; ) {
 		e = (ipfexp_t *)(array + i);
 
 		/*
 		 * The length of the bits to check must be at least 1
 		 * (or else there is nothing to comapre with!) and it
 		 * cannot exceed the length of the data present.
 		 */
 		if ((e->ipfe_size < 1 ) ||
 		    (e->ipfe_size + i > maxidx)) {
 			return -1;
 		}
 		i += e->ipfe_size;
 	}
 	return 0;
 }
 
 
 /* ------------------------------------------------------------------------ */
 /* Function:    ipf_fr_matcharray                                           */
 /* Returns:     int      - 0 = match failed, else positive match            */
 /* Parameters:  fin(I)   - pointer to packet information                    */
 /*              array(I) - pointer to matching array                        */
 /*                                                                          */
 /* This function is used to apply a matching array against a packet and     */
 /* return an indication of whether or not the packet successfully matches   */
 /* all of the commands in it.                                               */
 /* ------------------------------------------------------------------------ */
 static int
 ipf_fr_matcharray(fin, array)
 	fr_info_t *fin;
 	int *array;
 {
 	int i, n, *x, rv, p;
 	ipfexp_t *e;
 
 	rv = 0;
 	n = array[0];
 	x = array + 1;
 
 	for (; n > 0; x += 3 + x[3], rv = 0) {
 		e = (ipfexp_t *)x;
 		if (e->ipfe_cmd == IPF_EXP_END)
 			break;
 		n -= e->ipfe_size;
 
 		/*
 		 * The upper 16 bits currently store the protocol value.
 		 * This is currently used with TCP and UDP port compares and
 		 * allows "tcp.port = 80" without requiring an explicit
 		 " "ip.pr = tcp" first.
 		 */
 		p = e->ipfe_cmd >> 16;
 		if ((p != 0) && (p != fin->fin_p))
 			break;
 
 		switch (e->ipfe_cmd)
 		{
 		case IPF_EXP_IP_PR :
 			for (i = 0; !rv && i < e->ipfe_narg; i++) {
 				rv |= (fin->fin_p == e->ipfe_arg0[i]);
 			}
 			break;
 
 		case IPF_EXP_IP_SRCADDR :
 			if (fin->fin_v != 4)
 				break;
 			for (i = 0; !rv && i < e->ipfe_narg; i++) {
 				rv |= ((fin->fin_saddr &
 					e->ipfe_arg0[i * 2 + 1]) ==
 				       e->ipfe_arg0[i * 2]);
 			}
 			break;
 
 		case IPF_EXP_IP_DSTADDR :
 			if (fin->fin_v != 4)
 				break;
 			for (i = 0; !rv && i < e->ipfe_narg; i++) {
 				rv |= ((fin->fin_daddr &
 					e->ipfe_arg0[i * 2 + 1]) ==
 				       e->ipfe_arg0[i * 2]);
 			}
 			break;
 
 		case IPF_EXP_IP_ADDR :
 			if (fin->fin_v != 4)
 				break;
 			for (i = 0; !rv && i < e->ipfe_narg; i++) {
 				rv |= ((fin->fin_saddr &
 					e->ipfe_arg0[i * 2 + 1]) ==
 				       e->ipfe_arg0[i * 2]) ||
 				      ((fin->fin_daddr &
 					e->ipfe_arg0[i * 2 + 1]) ==
 				       e->ipfe_arg0[i * 2]);
 			}
 			break;
 
 #ifdef USE_INET6
 		case IPF_EXP_IP6_SRCADDR :
 			if (fin->fin_v != 6)
 				break;
 			for (i = 0; !rv && i < e->ipfe_narg; i++) {
 				rv |= IP6_MASKEQ(&fin->fin_src6,
 						 &e->ipfe_arg0[i * 8 + 4],
 						 &e->ipfe_arg0[i * 8]);
 			}
 			break;
 
 		case IPF_EXP_IP6_DSTADDR :
 			if (fin->fin_v != 6)
 				break;
 			for (i = 0; !rv && i < e->ipfe_narg; i++) {
 				rv |= IP6_MASKEQ(&fin->fin_dst6,
 						 &e->ipfe_arg0[i * 8 + 4],
 						 &e->ipfe_arg0[i * 8]);
 			}
 			break;
 
 		case IPF_EXP_IP6_ADDR :
 			if (fin->fin_v != 6)
 				break;
 			for (i = 0; !rv && i < e->ipfe_narg; i++) {
 				rv |= IP6_MASKEQ(&fin->fin_src6,
 						 &e->ipfe_arg0[i * 8 + 4],
 						 &e->ipfe_arg0[i * 8]) ||
 				      IP6_MASKEQ(&fin->fin_dst6,
 						 &e->ipfe_arg0[i * 8 + 4],
 						 &e->ipfe_arg0[i * 8]);
 			}
 			break;
 #endif
 
 		case IPF_EXP_UDP_PORT :
 		case IPF_EXP_TCP_PORT :
 			for (i = 0; !rv && i < e->ipfe_narg; i++) {
 				rv |= (fin->fin_sport == e->ipfe_arg0[i]) ||
 				      (fin->fin_dport == e->ipfe_arg0[i]);
 			}
 			break;
 
 		case IPF_EXP_UDP_SPORT :
 		case IPF_EXP_TCP_SPORT :
 			for (i = 0; !rv && i < e->ipfe_narg; i++) {
 				rv |= (fin->fin_sport == e->ipfe_arg0[i]);
 			}
 			break;
 
 		case IPF_EXP_UDP_DPORT :
 		case IPF_EXP_TCP_DPORT :
 			for (i = 0; !rv && i < e->ipfe_narg; i++) {
 				rv |= (fin->fin_dport == e->ipfe_arg0[i]);
 			}
 			break;
 
 		case IPF_EXP_TCP_FLAGS :
 			for (i = 0; !rv && i < e->ipfe_narg; i++) {
 				rv |= ((fin->fin_tcpf &
 					e->ipfe_arg0[i * 2 + 1]) ==
 				       e->ipfe_arg0[i * 2]);
 			}
 			break;
 		}
 		rv ^= e->ipfe_not;
 
 		if (rv == 0)
 			break;
 	}
 
 	return rv;
 }
 
 
 /* ------------------------------------------------------------------------ */
 /* Function:    ipf_queueflush                                              */
 /* Returns:     int - number of entries flushed (0 = none)                  */
 /* Parameters:  softc(I)    - pointer to soft context main structure        */
 /*              deletefn(I) - function to call to delete entry              */
 /*              ipfqs(I)    - top of the list of ipf internal queues        */
 /*              userqs(I)   - top of the list of user defined timeouts      */
 /*                                                                          */
 /* This fucntion gets called when the state/NAT hash tables fill up and we  */
 /* need to try a bit harder to free up some space.  The algorithm used here */
 /* split into two parts but both halves have the same goal: to reduce the   */
 /* number of connections considered to be "active" to the low watermark.    */
 /* There are two steps in doing this:                                       */
 /* 1) Remove any TCP connections that are already considered to be "closed" */
 /*    but have not yet been removed from the state table.  The two states   */
 /*    TCPS_TIME_WAIT and TCPS_CLOSED are considered to be the perfect       */
 /*    candidates for this style of removal.  If freeing up entries in       */
 /*    CLOSED or both CLOSED and TIME_WAIT brings us to the low watermark,   */
 /*    we do not go on to step 2.                                            */
 /*                                                                          */
 /* 2) Look for the oldest entries on each timeout queue and free them if    */
 /*    they are within the given window we are considering.  Where the       */
 /*    window starts and the steps taken to increase its size depend upon    */
 /*    how long ipf has been running (ipf_ticks.)  Anything modified in the  */
 /*    last 30 seconds is not touched.                                       */
 /*                                              touched                     */
 /*         die     ipf_ticks  30*1.5    1800*1.5   |  43200*1.5             */
 /*           |          |        |           |     |     |                  */
 /* future <--+----------+--------+-----------+-----+-----+-----------> past */
 /*                     now        \_int=30s_/ \_int=1hr_/ \_int=12hr        */
 /*                                                                          */
 /* Points to note:                                                          */
 /* - tqe_die is the time, in the future, when entries die.                  */
 /* - tqe_die - ipf_ticks is how long left the connection has to live in ipf */
 /*   ticks.                                                                 */
 /* - tqe_touched is when the entry was last used by NAT/state               */
 /* - the closer tqe_touched is to ipf_ticks, the further tqe_die will be    */
 /*   ipf_ticks any given timeout queue and vice versa.                      */
 /* - both tqe_die and tqe_touched increase over time                        */
 /* - timeout queues are sorted with the highest value of tqe_die at the     */
 /*   bottom and therefore the smallest values of each are at the top        */
 /* - the pointer passed in as ipfqs should point to an array of timeout     */
 /*   queues representing each of the TCP states                             */
 /*                                                                          */
 /* We start by setting up a maximum range to scan for things to move of     */
 /* iend (newest) to istart (oldest) in chunks of "interval".  If nothing is */
 /* found in that range, "interval" is adjusted (so long as it isn't 30) and */
 /* we start again with a new value for "iend" and "istart".  This is        */
 /* continued until we either finish the scan of 30 second intervals or the  */
 /* low water mark is reached.                                               */
 /* ------------------------------------------------------------------------ */
 int
 ipf_queueflush(softc, deletefn, ipfqs, userqs, activep, size, low)
 	ipf_main_softc_t *softc;
 	ipftq_delete_fn_t deletefn;
 	ipftq_t *ipfqs, *userqs;
 	u_int *activep;
 	int size, low;
 {
 	u_long interval, istart, iend;
 	ipftq_t *ifq, *ifqnext;
 	ipftqent_t *tqe, *tqn;
 	int removed = 0;
 
 	for (tqn = ipfqs[IPF_TCPS_CLOSED].ifq_head; ((tqe = tqn) != NULL); ) {
 		tqn = tqe->tqe_next;
 		if ((*deletefn)(softc, tqe->tqe_parent) == 0)
 			removed++;
 	}
 	if ((*activep * 100 / size) > low) {
 		for (tqn = ipfqs[IPF_TCPS_TIME_WAIT].ifq_head;
 		     ((tqe = tqn) != NULL); ) {
 			tqn = tqe->tqe_next;
 			if ((*deletefn)(softc, tqe->tqe_parent) == 0)
 				removed++;
 		}
 	}
 
 	if ((*activep * 100 / size) <= low) {
 		return removed;
 	}
 
 	/*
 	 * NOTE: Use of "* 15 / 10" is required here because if "* 1.5" is
 	 *       used then the operations are upgraded to floating point
 	 *       and kernels don't like floating point...
 	 */
 	if (softc->ipf_ticks > IPF_TTLVAL(43200 * 15 / 10)) {
 		istart = IPF_TTLVAL(86400 * 4);
 		interval = IPF_TTLVAL(43200);
 	} else if (softc->ipf_ticks > IPF_TTLVAL(1800 * 15 / 10)) {
 		istart = IPF_TTLVAL(43200);
 		interval = IPF_TTLVAL(1800);
 	} else if (softc->ipf_ticks > IPF_TTLVAL(30 * 15 / 10)) {
 		istart = IPF_TTLVAL(1800);
 		interval = IPF_TTLVAL(30);
 	} else {
 		return 0;
 	}
 	if (istart > softc->ipf_ticks) {
 		if (softc->ipf_ticks - interval < interval)
 			istart = interval;
 		else
 			istart = (softc->ipf_ticks / interval) * interval;
 	}
 
 	iend = softc->ipf_ticks - interval;
 
 	while ((*activep * 100 / size) > low) {
 		u_long try;
 
 		try = softc->ipf_ticks - istart;
 
 		for (ifq = ipfqs; ifq != NULL; ifq = ifq->ifq_next) {
 			for (tqn = ifq->ifq_head; ((tqe = tqn) != NULL); ) {
 				if (try < tqe->tqe_touched)
 					break;
 				tqn = tqe->tqe_next;
 				if ((*deletefn)(softc, tqe->tqe_parent) == 0)
 					removed++;
 			}
 		}
 
 		for (ifq = userqs; ifq != NULL; ifq = ifqnext) {
 			ifqnext = ifq->ifq_next;
 
 			for (tqn = ifq->ifq_head; ((tqe = tqn) != NULL); ) {
 				if (try < tqe->tqe_touched)
 					break;
 				tqn = tqe->tqe_next;
 				if ((*deletefn)(softc, tqe->tqe_parent) == 0)
 					removed++;
 			}
 		}
 
 		if (try >= iend) {
 			if (interval == IPF_TTLVAL(43200)) {
 				interval = IPF_TTLVAL(1800);
 			} else if (interval == IPF_TTLVAL(1800)) {
 				interval = IPF_TTLVAL(30);
 			} else {
 				break;
 			}
 			if (interval >= softc->ipf_ticks)
 				break;
 
 			iend = softc->ipf_ticks - interval;
 		}
 		istart -= interval;
 	}
 
 	return removed;
 }
 
 
 /* ------------------------------------------------------------------------ */
 /* Function:    ipf_deliverlocal                                            */
 /* Returns:     int - 1 = local address, 0 = non-local address              */
 /* Parameters:  softc(I)     - pointer to soft context main structure       */
 /*              ipversion(I) - IP protocol version (4 or 6)                 */
 /*              ifp(I)       - network interface pointer                    */
 /*              ipaddr(I)    - IPv4/6 destination address                   */
 /*                                                                          */
 /* This fucntion is used to determine in the address "ipaddr" belongs to    */
 /* the network interface represented by ifp.                                */
 /* ------------------------------------------------------------------------ */
 int
 ipf_deliverlocal(softc, ipversion, ifp, ipaddr)
 	ipf_main_softc_t *softc;
 	int ipversion;
 	void *ifp;
 	i6addr_t *ipaddr;
 {
 	i6addr_t addr;
 	int islocal = 0;
 
 	if (ipversion == 4) {
 		if (ipf_ifpaddr(softc, 4, FRI_NORMAL, ifp, &addr, NULL) == 0) {
 			if (addr.in4.s_addr == ipaddr->in4.s_addr)
 				islocal = 1;
 		}
 
 #ifdef USE_INET6
 	} else if (ipversion == 6) {
 		if (ipf_ifpaddr(softc, 6, FRI_NORMAL, ifp, &addr, NULL) == 0) {
 			if (IP6_EQ(&addr, ipaddr))
 				islocal = 1;
 		}
 #endif
 	}
 
 	return islocal;
 }
 
 
 /* ------------------------------------------------------------------------ */
 /* Function:    ipf_settimeout                                              */
 /* Returns:     int - 0 = success, -1 = failure                             */
 /* Parameters:  softc(I) - pointer to soft context main structure           */
 /*              t(I)     - pointer to tuneable array entry                  */
 /*              p(I)     - pointer to values passed in to apply             */
 /*                                                                          */
 /* This function is called to set the timeout values for each distinct      */
 /* queue timeout that is available.  When called, it calls into both the    */
 /* state and NAT code, telling them to update their timeout queues.         */
 /* ------------------------------------------------------------------------ */
 static int
 ipf_settimeout(softc, t, p)
 	struct ipf_main_softc_s *softc;
 	ipftuneable_t *t;
 	ipftuneval_t *p;
 {
 
 	/*
 	 * ipf_interror should be set by the functions called here, not
 	 * by this function - it's just a middle man.
 	 */
 	if (ipf_state_settimeout(softc, t, p) == -1)
 		return -1;
 	if (ipf_nat_settimeout(softc, t, p) == -1)
 		return -1;
 	return 0;
 }
 
 
 /* ------------------------------------------------------------------------ */
 /* Function:    ipf_apply_timeout                                           */
 /* Returns:     int - 0 = success, -1 = failure                             */
 /* Parameters:  head(I)    - pointer to tuneable array entry                */
 /*              seconds(I) - pointer to values passed in to apply           */
 /*                                                                          */
 /* This function applies a timeout of "seconds" to the timeout queue that   */
 /* is pointed to by "head".  All entries on this list have an expiration    */
 /* set to be the current tick value of ipf plus the ttl.  Given that this   */
 /* function should only be called when the delta is non-zero, the task is   */
 /* to walk the entire list and apply the change.  The sort order will not   */
 /* change.  The only catch is that this is O(n) across the list, so if the  */
 /* queue has lots of entries (10s of thousands or 100s of thousands), it    */
 /* could take a relatively long time to work through them all.              */
 /* ------------------------------------------------------------------------ */
 void
 ipf_apply_timeout(head, seconds)
 	ipftq_t *head;
 	u_int seconds;
 {
 	u_int oldtimeout, newtimeout;
 	ipftqent_t *tqe;
 	int delta;
 
 	MUTEX_ENTER(&head->ifq_lock);
 	oldtimeout = head->ifq_ttl;
 	newtimeout = IPF_TTLVAL(seconds);
 	delta = oldtimeout - newtimeout;
 
 	head->ifq_ttl = newtimeout;
 
 	for (tqe = head->ifq_head; tqe != NULL; tqe = tqe->tqe_next) {
 		tqe->tqe_die += delta;
 	}
 	MUTEX_EXIT(&head->ifq_lock);
 }
 
 
 /* ------------------------------------------------------------------------ */
 /* Function:   ipf_settimeout_tcp                                           */
 /* Returns:    int - 0 = successfully applied, -1 = failed                  */
 /* Parameters: t(I)   - pointer to tuneable to change                       */
 /*             p(I)   - pointer to new timeout information                  */
 /*             tab(I) - pointer to table of TCP queues                      */
 /*                                                                          */
 /* This function applies the new timeout (p) to the TCP tunable (t) and     */
 /* updates all of the entries on the relevant timeout queue by calling      */
 /* ipf_apply_timeout().                                                     */
 /* ------------------------------------------------------------------------ */
 int
 ipf_settimeout_tcp(t, p, tab)
 	ipftuneable_t *t;
 	ipftuneval_t *p;
 	ipftq_t *tab;
 {
 	if (!strcmp(t->ipft_name, "tcp_idle_timeout") ||
 	    !strcmp(t->ipft_name, "tcp_established")) {
 		ipf_apply_timeout(&tab[IPF_TCPS_ESTABLISHED], p->ipftu_int);
 	} else if (!strcmp(t->ipft_name, "tcp_close_wait")) {
 		ipf_apply_timeout(&tab[IPF_TCPS_CLOSE_WAIT], p->ipftu_int);
 	} else if (!strcmp(t->ipft_name, "tcp_last_ack")) {
 		ipf_apply_timeout(&tab[IPF_TCPS_LAST_ACK], p->ipftu_int);
 	} else if (!strcmp(t->ipft_name, "tcp_timeout")) {
 		ipf_apply_timeout(&tab[IPF_TCPS_LISTEN], p->ipftu_int);
 		ipf_apply_timeout(&tab[IPF_TCPS_HALF_ESTAB], p->ipftu_int);
 		ipf_apply_timeout(&tab[IPF_TCPS_CLOSING], p->ipftu_int);
 	} else if (!strcmp(t->ipft_name, "tcp_listen")) {
 		ipf_apply_timeout(&tab[IPF_TCPS_LISTEN], p->ipftu_int);
 	} else if (!strcmp(t->ipft_name, "tcp_half_established")) {
 		ipf_apply_timeout(&tab[IPF_TCPS_HALF_ESTAB], p->ipftu_int);
 	} else if (!strcmp(t->ipft_name, "tcp_closing")) {
 		ipf_apply_timeout(&tab[IPF_TCPS_CLOSING], p->ipftu_int);
 	} else if (!strcmp(t->ipft_name, "tcp_syn_received")) {
 		ipf_apply_timeout(&tab[IPF_TCPS_SYN_RECEIVED], p->ipftu_int);
 	} else if (!strcmp(t->ipft_name, "tcp_syn_sent")) {
 		ipf_apply_timeout(&tab[IPF_TCPS_SYN_SENT], p->ipftu_int);
 	} else if (!strcmp(t->ipft_name, "tcp_closed")) {
 		ipf_apply_timeout(&tab[IPF_TCPS_CLOSED], p->ipftu_int);
 	} else if (!strcmp(t->ipft_name, "tcp_half_closed")) {
 		ipf_apply_timeout(&tab[IPF_TCPS_CLOSED], p->ipftu_int);
 	} else if (!strcmp(t->ipft_name, "tcp_time_wait")) {
 		ipf_apply_timeout(&tab[IPF_TCPS_TIME_WAIT], p->ipftu_int);
 	} else {
 		/*
 		 * ipf_interror isn't set here because it should be set
 		 * by whatever called this function.
 		 */
 		return -1;
 	}
 	return 0;
 }
 
 
 /* ------------------------------------------------------------------------ */
 /* Function:   ipf_main_soft_create                                         */
 /* Returns:    NULL = failure, else success                                 */
 /* Parameters: arg(I) - pointer to soft context structure if already allocd */
 /*                                                                          */
 /* Create the foundation soft context structure. In circumstances where it  */
 /* is not required to dynamically allocate the context, a pointer can be    */
 /* passed in (rather than NULL) to a structure to be initialised.           */
 /* The main thing of interest is that a number of locks are initialised     */
 /* here instead of in the where might be expected - in the relevant create  */
 /* function elsewhere.  This is done because the current locking design has */
 /* some areas where these locks are used outside of their module.           */
 /* Possibly the most important exercise that is done here is setting of all */
 /* the timeout values, allowing them to be changed before init().           */
 /* ------------------------------------------------------------------------ */
 void *
 ipf_main_soft_create(arg)
 	void *arg;
 {
 	ipf_main_softc_t *softc;
 
 	if (arg == NULL) {
 		KMALLOC(softc, ipf_main_softc_t *);
 		if (softc == NULL)
 			return NULL;
 	} else {
 		softc = arg;
 	}
 
 	bzero((char *)softc, sizeof(*softc));
 
 	/*
 	 * This serves as a flag as to whether or not the softc should be
 	 * free'd when _destroy is called.
 	 */
 	softc->ipf_dynamic_softc = (arg == NULL) ? 1 : 0;
 
 	softc->ipf_tuners = ipf_tune_array_copy(softc,
 						sizeof(ipf_main_tuneables),
 						ipf_main_tuneables);
 	if (softc->ipf_tuners == NULL) {
 		ipf_main_soft_destroy(softc);
 		return NULL;
 	}
 
 	MUTEX_INIT(&softc->ipf_rw, "ipf rw mutex");
 	MUTEX_INIT(&softc->ipf_timeoutlock, "ipf timeout lock");
 	RWLOCK_INIT(&softc->ipf_global, "ipf filter load/unload mutex");
 	RWLOCK_INIT(&softc->ipf_mutex, "ipf filter rwlock");
 	RWLOCK_INIT(&softc->ipf_tokens, "ipf token rwlock");
 	RWLOCK_INIT(&softc->ipf_state, "ipf state rwlock");
 	RWLOCK_INIT(&softc->ipf_nat, "ipf IP NAT rwlock");
 	RWLOCK_INIT(&softc->ipf_poolrw, "ipf pool rwlock");
 	RWLOCK_INIT(&softc->ipf_frag, "ipf frag rwlock");
 
 	softc->ipf_token_head = NULL;
 	softc->ipf_token_tail = &softc->ipf_token_head;
 
 	softc->ipf_tcpidletimeout = FIVE_DAYS;
 	softc->ipf_tcpclosewait = IPF_TTLVAL(2 * TCP_MSL);
 	softc->ipf_tcplastack = IPF_TTLVAL(30);
 	softc->ipf_tcptimewait = IPF_TTLVAL(2 * TCP_MSL);
 	softc->ipf_tcptimeout = IPF_TTLVAL(2 * TCP_MSL);
 	softc->ipf_tcpsynsent = IPF_TTLVAL(2 * TCP_MSL);
 	softc->ipf_tcpsynrecv = IPF_TTLVAL(2 * TCP_MSL);
 	softc->ipf_tcpclosed = IPF_TTLVAL(30);
 	softc->ipf_tcphalfclosed = IPF_TTLVAL(2 * 3600);
 	softc->ipf_udptimeout = IPF_TTLVAL(120);
 	softc->ipf_udpacktimeout = IPF_TTLVAL(12);
 	softc->ipf_icmptimeout = IPF_TTLVAL(60);
 	softc->ipf_icmpacktimeout = IPF_TTLVAL(6);
 	softc->ipf_iptimeout = IPF_TTLVAL(60);
 
 #if defined(IPFILTER_DEFAULT_BLOCK)
 	softc->ipf_pass = FR_BLOCK|FR_NOMATCH;
 #else
 	softc->ipf_pass = (IPF_DEFAULT_PASS)|FR_NOMATCH;
 #endif
 	softc->ipf_minttl = 4;
 	softc->ipf_icmpminfragmtu = 68;
 	softc->ipf_flags = IPF_LOGGING;
 
 	return softc;
 }
 
 /* ------------------------------------------------------------------------ */
 /* Function:   ipf_main_soft_init                                           */
 /* Returns:    0 = success, -1 = failure                                    */
 /* Parameters: softc(I) - pointer to soft context main structure            */
 /*                                                                          */
 /* A null-op function that exists as a placeholder so that the flow in      */
 /* other functions is obvious.                                              */
 /* ------------------------------------------------------------------------ */
 /*ARGSUSED*/
 int
 ipf_main_soft_init(softc)
 	ipf_main_softc_t *softc;
 {
 	return 0;
 }
 
 
 /* ------------------------------------------------------------------------ */
 /* Function:   ipf_main_soft_destroy                                        */
 /* Returns:    void                                                         */
 /* Parameters: softc(I) - pointer to soft context main structure            */
 /*                                                                          */
 /* Undo everything that we did in ipf_main_soft_create.                     */
 /*                                                                          */
 /* The most important check that needs to be made here is whether or not    */
 /* the structure was allocated by ipf_main_soft_create() by checking what   */
 /* value is stored in ipf_dynamic_main.                                     */
 /* ------------------------------------------------------------------------ */
 /*ARGSUSED*/
 void
 ipf_main_soft_destroy(softc)
 	ipf_main_softc_t *softc;
 {
 
 	RW_DESTROY(&softc->ipf_frag);
 	RW_DESTROY(&softc->ipf_poolrw);
 	RW_DESTROY(&softc->ipf_nat);
 	RW_DESTROY(&softc->ipf_state);
 	RW_DESTROY(&softc->ipf_tokens);
 	RW_DESTROY(&softc->ipf_mutex);
 	RW_DESTROY(&softc->ipf_global);
 	MUTEX_DESTROY(&softc->ipf_timeoutlock);
 	MUTEX_DESTROY(&softc->ipf_rw);
 
 	if (softc->ipf_tuners != NULL) {
 		KFREES(softc->ipf_tuners, sizeof(ipf_main_tuneables));
 	}
 	if (softc->ipf_dynamic_softc == 1) {
 		KFREE(softc);
 	}
 }
 
 
 /* ------------------------------------------------------------------------ */
 /* Function:   ipf_main_soft_fini                                           */
 /* Returns:    0 = success, -1 = failure                                    */
 /* Parameters: softc(I) - pointer to soft context main structure            */
 /*                                                                          */
 /* Clean out the rules which have been added since _init was last called,   */
 /* the only dynamic part of the mainline.                                   */
 /* ------------------------------------------------------------------------ */
 int
 ipf_main_soft_fini(softc)
 	ipf_main_softc_t *softc;
 {
 	(void) ipf_flush(softc, IPL_LOGIPF, FR_INQUE|FR_OUTQUE|FR_INACTIVE);
 	(void) ipf_flush(softc, IPL_LOGIPF, FR_INQUE|FR_OUTQUE);
 	(void) ipf_flush(softc, IPL_LOGCOUNT, FR_INQUE|FR_OUTQUE|FR_INACTIVE);
 	(void) ipf_flush(softc, IPL_LOGCOUNT, FR_INQUE|FR_OUTQUE);
 
 	return 0;
 }
 
 
 /* ------------------------------------------------------------------------ */
 /* Function:   ipf_main_load                                                */
 /* Returns:    0 = success, -1 = failure                                    */
 /* Parameters: none                                                         */
 /*                                                                          */
 /* Handle global initialisation that needs to be done for the base part of  */
 /* IPFilter. At present this just amounts to initialising some ICMP lookup  */
 /* arrays that get used by the state/NAT code.                              */
 /* ------------------------------------------------------------------------ */
 int
 ipf_main_load()
 {
 	int i;
 
 	/* fill icmp reply type table */
 	for (i = 0; i <= ICMP_MAXTYPE; i++)
 		icmpreplytype4[i] = -1;
 	icmpreplytype4[ICMP_ECHO] = ICMP_ECHOREPLY;
 	icmpreplytype4[ICMP_TSTAMP] = ICMP_TSTAMPREPLY;
 	icmpreplytype4[ICMP_IREQ] = ICMP_IREQREPLY;
 	icmpreplytype4[ICMP_MASKREQ] = ICMP_MASKREPLY;
 
 #ifdef  USE_INET6
 	/* fill icmp reply type table */
 	for (i = 0; i <= ICMP6_MAXTYPE; i++)
 		icmpreplytype6[i] = -1;
 	icmpreplytype6[ICMP6_ECHO_REQUEST] = ICMP6_ECHO_REPLY;
 	icmpreplytype6[ICMP6_MEMBERSHIP_QUERY] = ICMP6_MEMBERSHIP_REPORT;
 	icmpreplytype6[ICMP6_NI_QUERY] = ICMP6_NI_REPLY;
 	icmpreplytype6[ND_ROUTER_SOLICIT] = ND_ROUTER_ADVERT;
 	icmpreplytype6[ND_NEIGHBOR_SOLICIT] = ND_NEIGHBOR_ADVERT;
 #endif
 
 	return 0;
 }
 
 
 /* ------------------------------------------------------------------------ */
 /* Function:   ipf_main_unload                                              */
 /* Returns:    0 = success, -1 = failure                                    */
 /* Parameters: none                                                         */
 /*                                                                          */
 /* A null-op function that exists as a placeholder so that the flow in      */
 /* other functions is obvious.                                              */
 /* ------------------------------------------------------------------------ */
 int
 ipf_main_unload()
 {
 	return 0;
 }
 
 
 /* ------------------------------------------------------------------------ */
 /* Function:   ipf_load_all                                                 */
 /* Returns:    0 = success, -1 = failure                                    */
 /* Parameters: none                                                         */
 /*                                                                          */
 /* Work through all of the subsystems inside IPFilter and call the load     */
 /* function for each in an order that won't lead to a crash :)              */
 /* ------------------------------------------------------------------------ */
 int
 ipf_load_all()
 {
 	if (ipf_main_load() == -1)
 		return -1;
 
 	if (ipf_state_main_load() == -1)
 		return -1;
 
 	if (ipf_nat_main_load() == -1)
 		return -1;
 
 	if (ipf_frag_main_load() == -1)
 		return -1;
 
 	if (ipf_auth_main_load() == -1)
 		return -1;
 
 	if (ipf_proxy_main_load() == -1)
 		return -1;
 
 	return 0;
 }
 
 
 /* ------------------------------------------------------------------------ */
 /* Function:   ipf_unload_all                                               */
 /* Returns:    0 = success, -1 = failure                                    */
 /* Parameters: none                                                         */
 /*                                                                          */
 /* Work through all of the subsystems inside IPFilter and call the unload   */
 /* function for each in an order that won't lead to a crash :)              */
 /* ------------------------------------------------------------------------ */
 int
 ipf_unload_all()
 {
 	if (ipf_proxy_main_unload() == -1)
 		return -1;
 
 	if (ipf_auth_main_unload() == -1)
 		return -1;
 
 	if (ipf_frag_main_unload() == -1)
 		return -1;
 
 	if (ipf_nat_main_unload() == -1)
 		return -1;
 
 	if (ipf_state_main_unload() == -1)
 		return -1;
 
 	if (ipf_main_unload() == -1)
 		return -1;
 
 	return 0;
 }
 
 
 /* ------------------------------------------------------------------------ */
 /* Function:   ipf_create_all                                               */
 /* Returns:    NULL = failure, else success                                 */
 /* Parameters: arg(I) - pointer to soft context main structure              */
 /*                                                                          */
 /* Work through all of the subsystems inside IPFilter and call the create   */
 /* function for each in an order that won't lead to a crash :)              */
 /* ------------------------------------------------------------------------ */
 ipf_main_softc_t *
 ipf_create_all(arg)
 	void *arg;
 {
 	ipf_main_softc_t *softc;
 
 	softc = ipf_main_soft_create(arg);
 	if (softc == NULL)
 		return NULL;
 
 #ifdef IPFILTER_LOG
 	softc->ipf_log_soft = ipf_log_soft_create(softc);
 	if (softc->ipf_log_soft == NULL) {
 		ipf_destroy_all(softc);
 		return NULL;
 	}
 #endif
 
 	softc->ipf_lookup_soft = ipf_lookup_soft_create(softc);
 	if (softc->ipf_lookup_soft == NULL) {
 		ipf_destroy_all(softc);
 		return NULL;
 	}
 
 	softc->ipf_sync_soft = ipf_sync_soft_create(softc);
 	if (softc->ipf_sync_soft == NULL) {
 		ipf_destroy_all(softc);
 		return NULL;
 	}
 
 	softc->ipf_state_soft = ipf_state_soft_create(softc);
 	if (softc->ipf_state_soft == NULL) {
 		ipf_destroy_all(softc);
 		return NULL;
 	}
 
 	softc->ipf_nat_soft = ipf_nat_soft_create(softc);
 	if (softc->ipf_nat_soft == NULL) {
 		ipf_destroy_all(softc);
 		return NULL;
 	}
 
 	softc->ipf_frag_soft = ipf_frag_soft_create(softc);
 	if (softc->ipf_frag_soft == NULL) {
 		ipf_destroy_all(softc);
 		return NULL;
 	}
 
 	softc->ipf_auth_soft = ipf_auth_soft_create(softc);
 	if (softc->ipf_auth_soft == NULL) {
 		ipf_destroy_all(softc);
 		return NULL;
 	}
 
 	softc->ipf_proxy_soft = ipf_proxy_soft_create(softc);
 	if (softc->ipf_proxy_soft == NULL) {
 		ipf_destroy_all(softc);
 		return NULL;
 	}
 
 	return softc;
 }
 
 
 /* ------------------------------------------------------------------------ */
 /* Function:   ipf_destroy_all                                              */
 /* Returns:    void                                                         */
 /* Parameters: softc(I) - pointer to soft context main structure            */
 /*                                                                          */
 /* Work through all of the subsystems inside IPFilter and call the destroy  */
 /* function for each in an order that won't lead to a crash :)              */
 /*                                                                          */
 /* Every one of these functions is expected to succeed, so there is no      */
 /* checking of return values.                                               */
 /* ------------------------------------------------------------------------ */
 void
 ipf_destroy_all(softc)
 	ipf_main_softc_t *softc;
 {
 
 	if (softc->ipf_state_soft != NULL) {
 		ipf_state_soft_destroy(softc, softc->ipf_state_soft);
 		softc->ipf_state_soft = NULL;
 	}
 
 	if (softc->ipf_nat_soft != NULL) {
 		ipf_nat_soft_destroy(softc, softc->ipf_nat_soft);
 		softc->ipf_nat_soft = NULL;
 	}
 
 	if (softc->ipf_frag_soft != NULL) {
 		ipf_frag_soft_destroy(softc, softc->ipf_frag_soft);
 		softc->ipf_frag_soft = NULL;
 	}
 
 	if (softc->ipf_auth_soft != NULL) {
 		ipf_auth_soft_destroy(softc, softc->ipf_auth_soft);
 		softc->ipf_auth_soft = NULL;
 	}
 
 	if (softc->ipf_proxy_soft != NULL) {
 		ipf_proxy_soft_destroy(softc, softc->ipf_proxy_soft);
 		softc->ipf_proxy_soft = NULL;
 	}
 
 	if (softc->ipf_sync_soft != NULL) {
 		ipf_sync_soft_destroy(softc, softc->ipf_sync_soft);
 		softc->ipf_sync_soft = NULL;
 	}
 
 	if (softc->ipf_lookup_soft != NULL) {
 		ipf_lookup_soft_destroy(softc, softc->ipf_lookup_soft);
 		softc->ipf_lookup_soft = NULL;
 	}
 
 #ifdef IPFILTER_LOG
 	if (softc->ipf_log_soft != NULL) {
 		ipf_log_soft_destroy(softc, softc->ipf_log_soft);
 		softc->ipf_log_soft = NULL;
 	}
 #endif
 
 	ipf_main_soft_destroy(softc);
 }
 
 
 /* ------------------------------------------------------------------------ */
 /* Function:   ipf_init_all                                                 */
 /* Returns:    0 = success, -1 = failure                                    */
 /* Parameters: softc(I) - pointer to soft context main structure            */
 /*                                                                          */
 /* Work through all of the subsystems inside IPFilter and call the init     */
 /* function for each in an order that won't lead to a crash :)              */
 /* ------------------------------------------------------------------------ */
 int
 ipf_init_all(softc)
 	ipf_main_softc_t *softc;
 {
 
 	if (ipf_main_soft_init(softc) == -1)
 		return -1;
 
 #ifdef IPFILTER_LOG
 	if (ipf_log_soft_init(softc, softc->ipf_log_soft) == -1)
 		return -1;
 #endif
 
 	if (ipf_lookup_soft_init(softc, softc->ipf_lookup_soft) == -1)
 		return -1;
 
 	if (ipf_sync_soft_init(softc, softc->ipf_sync_soft) == -1)
 		return -1;
 
 	if (ipf_state_soft_init(softc, softc->ipf_state_soft) == -1)
 		return -1;
 
 	if (ipf_nat_soft_init(softc, softc->ipf_nat_soft) == -1)
 		return -1;
 
 	if (ipf_frag_soft_init(softc, softc->ipf_frag_soft) == -1)
 		return -1;
 
 	if (ipf_auth_soft_init(softc, softc->ipf_auth_soft) == -1)
 		return -1;
 
 	if (ipf_proxy_soft_init(softc, softc->ipf_proxy_soft) == -1)
 		return -1;
 
 	return 0;
 }
 
 
 /* ------------------------------------------------------------------------ */
 /* Function:   ipf_fini_all                                                 */
 /* Returns:    0 = success, -1 = failure                                    */
 /* Parameters: softc(I) - pointer to soft context main structure            */
 /*                                                                          */
 /* Work through all of the subsystems inside IPFilter and call the fini     */
 /* function for each in an order that won't lead to a crash :)              */
 /* ------------------------------------------------------------------------ */
 int
 ipf_fini_all(softc)
 	ipf_main_softc_t *softc;
 {
 
 	ipf_token_flush(softc);
 
 	if (ipf_proxy_soft_fini(softc, softc->ipf_proxy_soft) == -1)
 		return -1;
 
 	if (ipf_auth_soft_fini(softc, softc->ipf_auth_soft) == -1)
 		return -1;
 
 	if (ipf_frag_soft_fini(softc, softc->ipf_frag_soft) == -1)
 		return -1;
 
 	if (ipf_nat_soft_fini(softc, softc->ipf_nat_soft) == -1)
 		return -1;
 
 	if (ipf_state_soft_fini(softc, softc->ipf_state_soft) == -1)
 		return -1;
 
 	if (ipf_sync_soft_fini(softc, softc->ipf_sync_soft) == -1)
 		return -1;
 
 	if (ipf_lookup_soft_fini(softc, softc->ipf_lookup_soft) == -1)
 		return -1;
 
 #ifdef IPFILTER_LOG
 	if (ipf_log_soft_fini(softc, softc->ipf_log_soft) == -1)
 		return -1;
 #endif
 
 	if (ipf_main_soft_fini(softc) == -1)
 		return -1;
 
 	return 0;
 }
 
 
 /* ------------------------------------------------------------------------ */
 /* Function:    ipf_rule_expire                                             */
 /* Returns:     Nil                                                         */
 /* Parameters:  softc(I) - pointer to soft context main structure           */
 /*                                                                          */
 /* At present this function exists just to support temporary addition of    */
 /* firewall rules. Both inactive and active lists are scanned for items to  */
 /* purge, as by rights, the expiration is computed as soon as the rule is   */
 /* loaded in.                                                               */
 /* ------------------------------------------------------------------------ */
 void
 ipf_rule_expire(softc)
 	ipf_main_softc_t *softc;
 {
 	frentry_t *fr;
 
 	if ((softc->ipf_rule_explist[0] == NULL) &&
 	    (softc->ipf_rule_explist[1] == NULL))
 		return;
 
 	WRITE_ENTER(&softc->ipf_mutex);
 
 	while ((fr = softc->ipf_rule_explist[0]) != NULL) {
 		/*
 		 * Because the list is kept sorted on insertion, the fist
 		 * one that dies in the future means no more work to do.
 		 */
 		if (fr->fr_die > softc->ipf_ticks)
 			break;
 		ipf_rule_delete(softc, fr, IPL_LOGIPF, 0);
 	}
 
 	while ((fr = softc->ipf_rule_explist[1]) != NULL) {
 		/*
 		 * Because the list is kept sorted on insertion, the fist
 		 * one that dies in the future means no more work to do.
 		 */
 		if (fr->fr_die > softc->ipf_ticks)
 			break;
 		ipf_rule_delete(softc, fr, IPL_LOGIPF, 1);
 	}
 
 	RWLOCK_EXIT(&softc->ipf_mutex);
 }
 
 
 static int ipf_ht_node_cmp __P((struct host_node_s *, struct host_node_s *));
 static void ipf_ht_node_make_key __P((host_track_t *, host_node_t *, int,
 				      i6addr_t *));
 
 host_node_t RBI_ZERO(ipf_rb);
 RBI_CODE(ipf_rb, host_node_t, hn_entry, ipf_ht_node_cmp)
 
 
 /* ------------------------------------------------------------------------ */
 /* Function:    ipf_ht_node_cmp                                             */
 /* Returns:     int   - 0 == nodes are the same, ..                         */
 /* Parameters:  k1(I) - pointer to first key to compare                     */
 /*              k2(I) - pointer to second key to compare                    */
 /*                                                                          */
 /* The "key" for the node is a combination of two fields: the address       */
 /* family and the address itself.                                           */
 /*                                                                          */
 /* Because we're not actually interpreting the address data, it isn't       */
 /* necessary to convert them to/from network/host byte order. The mask is   */
 /* just used to remove bits that aren't significant - it doesn't matter     */
 /* where they are, as long as they're always in the same place.             */
 /*                                                                          */
 /* As with IP6_EQ, comparing IPv6 addresses starts at the bottom because    */
 /* this is where individual ones will differ the most - but not true for    */
 /* for /48's, etc.                                                          */
 /* ------------------------------------------------------------------------ */
 static int
 ipf_ht_node_cmp(k1, k2)
 	struct host_node_s *k1, *k2;
 {
 	int i;
 
 	i = (k2->hn_addr.adf_family - k1->hn_addr.adf_family);
 	if (i != 0)
 		return i;
 
 	if (k1->hn_addr.adf_family == AF_INET)
 		return (k2->hn_addr.adf_addr.in4.s_addr -
 			k1->hn_addr.adf_addr.in4.s_addr);
 
 	i = k2->hn_addr.adf_addr.i6[3] - k1->hn_addr.adf_addr.i6[3];
 	if (i != 0)
 		return i;
 	i = k2->hn_addr.adf_addr.i6[2] - k1->hn_addr.adf_addr.i6[2];
 	if (i != 0)
 		return i;
 	i = k2->hn_addr.adf_addr.i6[1] - k1->hn_addr.adf_addr.i6[1];
 	if (i != 0)
 		return i;
 	i = k2->hn_addr.adf_addr.i6[0] - k1->hn_addr.adf_addr.i6[0];
 	return i;
 }
 
 
 /* ------------------------------------------------------------------------ */
 /* Function:    ipf_ht_node_make_key                                        */
 /* Returns:     Nil                                                         */
 /* parameters:  htp(I)    - pointer to address tracking structure           */
 /*              key(I)    - where to store masked address for lookup        */
 /*              family(I) - protocol family of address                      */
 /*              addr(I)   - pointer to network address                      */
 /*                                                                          */
 /* Using the "netmask" (number of bits) stored parent host tracking struct, */
 /* copy the address passed in into the key structure whilst masking out the */
 /* bits that we don't want.                                                 */
 /*                                                                          */
 /* Because the parser will set ht_netmask to 128 if there is no protocol    */
 /* specified (the parser doesn't know if it should be a v4 or v6 rule), we  */
 /* have to be wary of that and not allow 32-128 to happen.                  */
 /* ------------------------------------------------------------------------ */
 static void
 ipf_ht_node_make_key(htp, key, family, addr)
 	host_track_t *htp;
 	host_node_t *key;
 	int family;
 	i6addr_t *addr;
 {
 	key->hn_addr.adf_family = family;
 	if (family == AF_INET) {
 		u_32_t mask;
 		int bits;
 
 		key->hn_addr.adf_len = sizeof(key->hn_addr.adf_addr.in4);
 		bits = htp->ht_netmask;
 		if (bits >= 32) {
 			mask = 0xffffffff;
 		} else {
 			mask = htonl(0xffffffff << (32 - bits));
 		}
 		key->hn_addr.adf_addr.in4.s_addr = addr->in4.s_addr & mask;
 #ifdef USE_INET6
 	} else {
 		int bits = htp->ht_netmask;
 
 		key->hn_addr.adf_len = sizeof(key->hn_addr.adf_addr.in6);
 		if (bits > 96) {
 			key->hn_addr.adf_addr.i6[3] = addr->i6[3] &
 					     htonl(0xffffffff << (128 - bits));
 			key->hn_addr.adf_addr.i6[2] = addr->i6[2];
 			key->hn_addr.adf_addr.i6[1] = addr->i6[2];
 			key->hn_addr.adf_addr.i6[0] = addr->i6[2];
 		} else if (bits > 64) {
 			key->hn_addr.adf_addr.i6[3] = 0;
 			key->hn_addr.adf_addr.i6[2] = addr->i6[2] &
 					     htonl(0xffffffff << (96 - bits));
 			key->hn_addr.adf_addr.i6[1] = addr->i6[1];
 			key->hn_addr.adf_addr.i6[0] = addr->i6[0];
 		} else if (bits > 32) {
 			key->hn_addr.adf_addr.i6[3] = 0;
 			key->hn_addr.adf_addr.i6[2] = 0;
 			key->hn_addr.adf_addr.i6[1] = addr->i6[1] &
 					     htonl(0xffffffff << (64 - bits));
 			key->hn_addr.adf_addr.i6[0] = addr->i6[0];
 		} else {
 			key->hn_addr.adf_addr.i6[3] = 0;
 			key->hn_addr.adf_addr.i6[2] = 0;
 			key->hn_addr.adf_addr.i6[1] = 0;
 			key->hn_addr.adf_addr.i6[0] = addr->i6[0] &
 					     htonl(0xffffffff << (32 - bits));
 		}
 #endif
 	}
 }
 
 
 /* ------------------------------------------------------------------------ */
 /* Function:    ipf_ht_node_add                                             */
 /* Returns:     int       - 0 == success,  -1 == failure                    */
 /* Parameters:  softc(I)  - pointer to soft context main structure          */
 /*              htp(I)    - pointer to address tracking structure           */
 /*              family(I) - protocol family of address                      */
 /*              addr(I)   - pointer to network address                      */
 /*                                                                          */
 /* NOTE: THIS FUNCTION MUST BE CALLED WITH AN EXCLUSIVE LOCK THAT PREVENTS  */
 /*       ipf_ht_node_del FROM RUNNING CONCURRENTLY ON THE SAME htp.         */
 /*                                                                          */
 /* After preparing the key with the address information to find, look in    */
 /* the red-black tree to see if the address is known. A successful call to  */
 /* this function can mean one of two things: a new node was added to the    */
 /* tree or a matching node exists and we're able to bump up its activity.   */
 /* ------------------------------------------------------------------------ */
 int
 ipf_ht_node_add(softc, htp, family, addr)
 	ipf_main_softc_t *softc;
 	host_track_t *htp;
 	int family;
 	i6addr_t *addr;
 {
 	host_node_t *h;
 	host_node_t k;
 
 	ipf_ht_node_make_key(htp, &k, family, addr);
 
 	h = RBI_SEARCH(ipf_rb, &htp->ht_root, &k);
 	if (h == NULL) {
 		if (htp->ht_cur_nodes >= htp->ht_max_nodes)
 			return -1;
 		KMALLOC(h, host_node_t *);
 		if (h == NULL) {
 			DT(ipf_rb_no_mem);
 			LBUMP(ipf_rb_no_mem);
 			return -1;
 		}
 
 		/*
 		 * If there was a macro to initialise the RB node then that
 		 * would get used here, but there isn't...
 		 */
 		bzero((char *)h, sizeof(*h));
 		h->hn_addr = k.hn_addr;
 		h->hn_addr.adf_family = k.hn_addr.adf_family;
 		RBI_INSERT(ipf_rb, &htp->ht_root, h);
 		htp->ht_cur_nodes++;
 	} else {
 		if ((htp->ht_max_per_node != 0) &&
 		    (h->hn_active >= htp->ht_max_per_node)) {
 			DT(ipf_rb_node_max);
 			LBUMP(ipf_rb_node_max);
 			return -1;
 		}
 	}
 
 	h->hn_active++;
 
 	return 0;
 }
 
 
 /* ------------------------------------------------------------------------ */
 /* Function:    ipf_ht_node_del                                             */
 /* Returns:     int       - 0 == success,  -1 == failure                    */
 /* parameters:  htp(I)    - pointer to address tracking structure           */
 /*              family(I) - protocol family of address                      */
 /*              addr(I)   - pointer to network address                      */
 /*                                                                          */
 /* NOTE: THIS FUNCTION MUST BE CALLED WITH AN EXCLUSIVE LOCK THAT PREVENTS  */
 /*       ipf_ht_node_add FROM RUNNING CONCURRENTLY ON THE SAME htp.         */
 /*                                                                          */
 /* Try and find the address passed in amongst the leavese on this tree to   */
 /* be friend. If found then drop the active account for that node drops by  */
 /* one. If that count reaches 0, it is time to free it all up.              */
 /* ------------------------------------------------------------------------ */
 int
 ipf_ht_node_del(htp, family, addr)
 	host_track_t *htp;
 	int family;
 	i6addr_t *addr;
 {
 	host_node_t *h;
 	host_node_t k;
 
 	ipf_ht_node_make_key(htp, &k, family, addr);
 
 	h = RBI_SEARCH(ipf_rb, &htp->ht_root, &k);
 	if (h == NULL) {
 		return -1;
 	} else {
 		h->hn_active--;
 		if (h->hn_active == 0) {
 			(void) RBI_DELETE(ipf_rb, &htp->ht_root, h);
 			htp->ht_cur_nodes--;
 			KFREE(h);
 		}
 	}
 
 	return 0;
 }
 
 
 /* ------------------------------------------------------------------------ */
 /* Function:    ipf_rb_ht_init                                              */
 /* Returns:     Nil                                                         */
 /* Parameters:  head(I) - pointer to host tracking structure                */
 /*                                                                          */
 /* Initialise the host tracking structure to be ready for use above.        */
 /* ------------------------------------------------------------------------ */
 void
 ipf_rb_ht_init(head)
 	host_track_t *head;
 {
 	RBI_INIT(ipf_rb, &head->ht_root);
 }
 
 
 /* ------------------------------------------------------------------------ */
 /* Function:    ipf_rb_ht_freenode                                          */
 /* Returns:     Nil                                                         */
 /* Parameters:  head(I) - pointer to host tracking structure                */
 /*              arg(I)  - additional argument from walk caller              */
 /*                                                                          */
 /* Free an actual host_node_t structure.                                    */
 /* ------------------------------------------------------------------------ */
 void
 ipf_rb_ht_freenode(node, arg)
 	host_node_t *node;
 	void *arg;
 {
 	KFREE(node);
 }
 
 
 /* ------------------------------------------------------------------------ */
 /* Function:    ipf_rb_ht_flush                                             */
 /* Returns:     Nil                                                         */
 /* Parameters:  head(I) - pointer to host tracking structure                */
 /*                                                                          */
 /* Remove all of the nodes in the tree tracking hosts by calling a walker   */
 /* and free'ing each one.                                                   */
 /* ------------------------------------------------------------------------ */
 void
 ipf_rb_ht_flush(head)
 	host_track_t *head;
 {
 	RBI_WALK(ipf_rb, &head->ht_root, ipf_rb_ht_freenode, NULL);
 }
 
 
 /* ------------------------------------------------------------------------ */
 /* Function:    ipf_slowtimer                                               */
 /* Returns:     Nil                                                         */
 /* Parameters:  ptr(I) - pointer to main ipf soft context structure         */
 /*                                                                          */
 /* Slowly expire held state for fragments.  Timeouts are set * in           */
 /* expectation of this being called twice per second.                       */
 /* ------------------------------------------------------------------------ */
 void
 ipf_slowtimer(softc)
 	ipf_main_softc_t *softc;
 {
 
 	ipf_token_expire(softc);
 	ipf_frag_expire(softc);
 	ipf_state_expire(softc);
 	ipf_nat_expire(softc);
 	ipf_auth_expire(softc);
 	ipf_lookup_expire(softc);
 	ipf_rule_expire(softc);
 	ipf_sync_expire(softc);
 	softc->ipf_ticks++;
 #   if defined(__OpenBSD__)
 	timeout_add(&ipf_slowtimer_ch, hz/2);
 #   endif
 }
 
 
 /* ------------------------------------------------------------------------ */
 /* Function:    ipf_inet_mask_add                                           */
 /* Returns:     Nil                                                         */
 /* Parameters:  bits(I) - pointer to nat context information                */
 /*              mtab(I) - pointer to mask hash table structure              */
 /*                                                                          */
 /* When called, bits represents the mask of a new NAT rule that has just    */
 /* been added. This function inserts a bitmask into the array of masks to   */
 /* search when searching for a matching NAT rule for a packet.              */
 /* Prevention of duplicate masks is achieved by checking the use count for  */
 /* a given netmask.                                                         */
 /* ------------------------------------------------------------------------ */
 void
 ipf_inet_mask_add(bits, mtab)
 	int bits;
 	ipf_v4_masktab_t *mtab;
 {
 	u_32_t mask;
 	int i, j;
 
 	mtab->imt4_masks[bits]++;
 	if (mtab->imt4_masks[bits] > 1)
 		return;
 
 	if (bits == 0)
 		mask = 0;
 	else
 		mask = 0xffffffff << (32 - bits);
 
 	for (i = 0; i < 33; i++) {
 		if (ntohl(mtab->imt4_active[i]) < mask) {
 			for (j = 32; j > i; j--)
 				mtab->imt4_active[j] = mtab->imt4_active[j - 1];
 			mtab->imt4_active[i] = htonl(mask);
 			break;
 		}
 	}
 	mtab->imt4_max++;
 }
 
 
 /* ------------------------------------------------------------------------ */
 /* Function:    ipf_inet_mask_del                                           */
 /* Returns:     Nil                                                         */
 /* Parameters:  bits(I) - number of bits set in the netmask                 */
 /*              mtab(I) - pointer to mask hash table structure              */
 /*                                                                          */
 /* Remove the 32bit bitmask represented by "bits" from the collection of    */
 /* netmasks stored inside of mtab.                                          */
 /* ------------------------------------------------------------------------ */
 void
 ipf_inet_mask_del(bits, mtab)
 	int bits;
 	ipf_v4_masktab_t *mtab;
 {
 	u_32_t mask;
 	int i, j;
 
 	mtab->imt4_masks[bits]--;
 	if (mtab->imt4_masks[bits] > 0)
 		return;
 
 	mask = htonl(0xffffffff << (32 - bits));
 	for (i = 0; i < 33; i++) {
 		if (mtab->imt4_active[i] == mask) {
 			for (j = i + 1; j < 33; j++)
 				mtab->imt4_active[j - 1] = mtab->imt4_active[j];
 			break;
 		}
 	}
 	mtab->imt4_max--;
 	ASSERT(mtab->imt4_max >= 0);
 }
 
 
 #ifdef USE_INET6
 /* ------------------------------------------------------------------------ */
 /* Function:    ipf_inet6_mask_add                                          */
 /* Returns:     Nil                                                         */
 /* Parameters:  bits(I) - number of bits set in mask                        */
 /*              mask(I) - pointer to mask to add                            */
 /*              mtab(I) - pointer to mask hash table structure              */
 /*                                                                          */
 /* When called, bitcount represents the mask of a IPv6 NAT map rule that    */
 /* has just been added. This function inserts a bitmask into the array of   */
 /* masks to search when searching for a matching NAT rule for a packet.     */
 /* Prevention of duplicate masks is achieved by checking the use count for  */
 /* a given netmask.                                                         */
 /* ------------------------------------------------------------------------ */
 void
 ipf_inet6_mask_add(bits, mask, mtab)
 	int bits;
 	i6addr_t *mask;
 	ipf_v6_masktab_t *mtab;
 {
 	i6addr_t zero;
 	int i, j;
 
 	mtab->imt6_masks[bits]++;
 	if (mtab->imt6_masks[bits] > 1)
 		return;
 
 	if (bits == 0) {
 		mask = &zero;
 		zero.i6[0] = 0;
 		zero.i6[1] = 0;
 		zero.i6[2] = 0;
 		zero.i6[3] = 0;
 	}
 
 	for (i = 0; i < 129; i++) {
 		if (IP6_LT(&mtab->imt6_active[i], mask)) {
 			for (j = 128; j > i; j--)
 				mtab->imt6_active[j] = mtab->imt6_active[j - 1];
 			mtab->imt6_active[i] = *mask;
 			break;
 		}
 	}
 	mtab->imt6_max++;
 }
 
 
 /* ------------------------------------------------------------------------ */
 /* Function:    ipf_inet6_mask_del                                          */
 /* Returns:     Nil                                                         */
 /* Parameters:  bits(I) - number of bits set in mask                        */
 /*              mask(I) - pointer to mask to remove                         */
 /*              mtab(I) - pointer to mask hash table structure              */
 /*                                                                          */
 /* Remove the 128bit bitmask represented by "bits" from the collection of   */
 /* netmasks stored inside of mtab.                                          */
 /* ------------------------------------------------------------------------ */
 void
 ipf_inet6_mask_del(bits, mask, mtab)
 	int bits;
 	i6addr_t *mask;
 	ipf_v6_masktab_t *mtab;
 {
 	i6addr_t zero;
 	int i, j;
 
 	mtab->imt6_masks[bits]--;
 	if (mtab->imt6_masks[bits] > 0)
 		return;
 
 	if (bits == 0)
 		mask = &zero;
 	zero.i6[0] = 0;
 	zero.i6[1] = 0;
 	zero.i6[2] = 0;
 	zero.i6[3] = 0;
 
 	for (i = 0; i < 129; i++) {
 		if (IP6_EQ(&mtab->imt6_active[i], mask)) {
 			for (j = i + 1; j < 129; j++) {
 				mtab->imt6_active[j - 1] = mtab->imt6_active[j];
 				if (IP6_EQ(&mtab->imt6_active[j - 1], &zero))
 					break;
 			}
 			break;
 		}
 	}
 	mtab->imt6_max--;
 	ASSERT(mtab->imt6_max >= 0);
 }
 #endif
Index: head/sys/contrib/ipfilter/netinet/ip_fil.h
===================================================================
--- head/sys/contrib/ipfilter/netinet/ip_fil.h	(revision 280970)
+++ head/sys/contrib/ipfilter/netinet/ip_fil.h	(revision 280971)
@@ -1,2000 +1,2000 @@
 /*
  * Copyright (C) 2012 by Darren Reed.
  *
  * See the IPFILTER.LICENCE file for details on licencing.
  *
  * @(#)ip_fil.h	1.35 6/5/96
  * $FreeBSD$
  * Id: ip_fil.h,v 2.170.2.51 2007/10/10 09:48:03 darrenr Exp $
  */
 
 #ifndef	__IP_FIL_H__
 #define	__IP_FIL_H__
 
 #include "netinet/ip_compat.h"
 #include "netinet/ipf_rb.h"
 #if NETBSD_GE_REV(104040000)
 # include <sys/callout.h>
 #endif
 #if defined(BSD) && defined(_KERNEL)
 # if NETBSD_LT_REV(399000000) || defined(__osf__) || FREEBSD_LT_REV(500043)
 #  include <sys/select.h>
 # else
 #  include <sys/selinfo.h>
 # endif
 #endif
 
 #if !defined(linux) || !defined(_KERNEL)
 # include <netinet/in.h>
 #endif
 
 #ifndef	SOLARIS
 # define SOLARIS (defined(sun) && (defined(__svr4__) || defined(__SVR4)))
 #endif
 
 #ifndef	__P
 # ifdef	__STDC__
 #  define	__P(x)	x
 # else
 #  define	__P(x)	()
 # endif
 #endif
 
 #if defined(__STDC__) || defined(__GNUC__) || defined(_AIX51)
 # define	SIOCADAFR	_IOW('r', 60, struct ipfobj)
 # define	SIOCRMAFR	_IOW('r', 61, struct ipfobj)
 # define	SIOCSETFF	_IOW('r', 62, u_int)
 # define	SIOCGETFF	_IOR('r', 63, u_int)
 # define	SIOCGETFS	_IOWR('r', 64, struct ipfobj)
 # define	SIOCIPFFL	_IOWR('r', 65, int)
 # define	SIOCIPFFB	_IOR('r', 66, int)
 # define	SIOCADIFR	_IOW('r', 67, struct ipfobj)
 # define	SIOCRMIFR	_IOW('r', 68, struct ipfobj)
 # define	SIOCSWAPA	_IOR('r', 69, u_int)
 # define	SIOCINAFR	_IOW('r', 70, struct ipfobj)
 # define	SIOCINIFR	_IOW('r', 71, struct ipfobj)
 # define	SIOCFRENB	_IOW('r', 72, u_int)
 # define	SIOCFRSYN	_IOW('r', 73, u_int)
 # define	SIOCFRZST	_IOWR('r', 74, struct ipfobj)
 # define	SIOCZRLST	_IOWR('r', 75, struct ipfobj)
 # define	SIOCAUTHW	_IOWR('r', 76, struct ipfobj)
 # define	SIOCAUTHR	_IOWR('r', 77, struct ipfobj)
 # define	SIOCSTAT1	_IOWR('r', 78, struct ipfobj)
 # define	SIOCSTLCK	_IOWR('r', 79, u_int)
 # define	SIOCSTPUT	_IOWR('r', 80, struct ipfobj)
 # define	SIOCSTGET	_IOWR('r', 81, struct ipfobj)
 # define	SIOCSTGSZ	_IOWR('r', 82, struct ipfobj)
 # define	SIOCSTAT2	_IOWR('r', 83, struct ipfobj)
 # define	SIOCSETLG	_IOWR('r', 84, int)
 # define	SIOCGETLG	_IOWR('r', 85, int)
 # define	SIOCFUNCL	_IOWR('r', 86, struct ipfunc_resolve)
 # define	SIOCIPFGETNEXT	_IOWR('r', 87, struct ipfobj)
 # define	SIOCIPFGET	_IOWR('r', 88, struct ipfobj)
 # define	SIOCIPFSET	_IOWR('r', 89, struct ipfobj)
 # define	SIOCIPFL6	_IOWR('r', 90, int)
 # define	SIOCIPFITER	_IOWR('r', 91, struct ipfobj)
 # define	SIOCGENITER	_IOWR('r', 92, struct ipfobj)
 # define	SIOCGTABL	_IOWR('r', 93, struct ipfobj)
 # define	SIOCIPFDELTOK	_IOWR('r', 94, int)
 # define	SIOCLOOKUPITER	_IOWR('r', 95, struct ipfobj)
 # define	SIOCGTQTAB	_IOWR('r', 96, struct ipfobj)
 # define	SIOCMATCHFLUSH	_IOWR('r', 97, struct ipfobj)
 # define	SIOCIPFINTERROR	_IOR('r', 98, int)
 #else
 # define	SIOCADAFR	_IOW(r, 60, struct ipfobj)
 # define	SIOCRMAFR	_IOW(r, 61, struct ipfobj)
 # define	SIOCSETFF	_IOW(r, 62, u_int)
 # define	SIOCGETFF	_IOR(r, 63, u_int)
 # define	SIOCGETFS	_IOWR(r, 64, struct ipfobj)
 # define	SIOCIPFFL	_IOWR(r, 65, int)
 # define	SIOCIPFFB	_IOR(r, 66, int)
 # define	SIOCADIFR	_IOW(r, 67, struct ipfobj)
 # define	SIOCRMIFR	_IOW(r, 68, struct ipfobj)
 # define	SIOCSWAPA	_IOR(r, 69, u_int)
 # define	SIOCINAFR	_IOW(r, 70, struct ipfobj)
 # define	SIOCINIFR	_IOW(r, 71, struct ipfobj)
 # define	SIOCFRENB	_IOW(r, 72, u_int)
 # define	SIOCFRSYN	_IOW(r, 73, u_int)
 # define	SIOCFRZST	_IOWR(r, 74, struct ipfobj)
 # define	SIOCZRLST	_IOWR(r, 75, struct ipfobj)
 # define	SIOCAUTHW	_IOWR(r, 76, struct ipfobj)
 # define	SIOCAUTHR	_IOWR(r, 77, struct ipfobj)
 # define	SIOCSTAT1	_IOWR(r, 78, struct ipfobj)
 # define	SIOCSTLCK	_IOWR(r, 79, u_int)
 # define	SIOCSTPUT	_IOWR(r, 80, struct ipfobj)
 # define	SIOCSTGET	_IOWR(r, 81, struct ipfobj)
 # define	SIOCSTGSZ	_IOWR(r, 82, struct ipfobj)
 # define	SIOCSTAT2	_IOWR(r, 83, struct ipfobj)
 # define	SIOCSETLG	_IOWR(r, 84, int)
 # define	SIOCGETLG	_IOWR(r, 85, int)
 # define	SIOCFUNCL	_IOWR(r, 86, struct ipfunc_resolve)
 # define	SIOCIPFGETNEXT	_IOWR(r, 87, struct ipfobj)
 # define	SIOCIPFGET	_IOWR(r, 88, struct ipfobj)
 # define	SIOCIPFSET	_IOWR(r, 89, struct ipfobj)
 # define	SIOCIPFL6	_IOWR(r, 90, int)
 # define	SIOCIPFITER	_IOWR(r, 91, struct ipfobj)
 # define	SIOCGENITER	_IOWR(r, 92, struct ipfobj)
 # define	SIOCGTABL	_IOWR(r, 93, struct ipfobj)
 # define	SIOCIPFDELTOK	_IOWR(r, 94, int)
 # define	SIOCLOOKUPITER	_IOWR(r, 95, struct ipfobj)
 # define	SIOCGTQTAB	_IOWR(r, 96, struct ipfobj)
 # define	SIOCMATCHFLUSH	_IOWR(r, 97, struct ipfobj)
 # define	SIOCIPFINTERROR	_IOR(r, 98, int)
 #endif
 #define	SIOCADDFR	SIOCADAFR
 #define	SIOCDELFR	SIOCRMAFR
 #define	SIOCINSFR	SIOCINAFR
 #define	SIOCATHST	SIOCSTAT1
 #define	SIOCGFRST	SIOCSTAT2
 
 
 struct ipscan;
 struct ifnet;
 struct ipf_main_softc_s;
 
 typedef	int	(* lookupfunc_t) __P((struct ipf_main_softc_s *, void *,
 				      int, void *, u_int));
 
 /*
  * i6addr is used as a container for both IPv4 and IPv6 addresses, as well
  * as other types of objects, depending on its qualifier.
  */
 #ifdef	USE_INET6
 typedef	union	i6addr	{
 	u_32_t	i6[4];
 	struct	in_addr	in4;
 	struct	in6_addr in6;
 	void	*vptr[2];
 	lookupfunc_t	lptr[2];
 	struct {
 		u_short	type;
 		u_short	subtype;
 		int	name;
 	} i6un;
 } i6addr_t;
 #else
 typedef	union	i6addr	{
 	u_32_t	i6[4];
 	struct	in_addr	in4;
 	void	*vptr[2];
 	lookupfunc_t	lptr[2];
 	struct {
 		u_short	type;
 		u_short	subtype;
 		int	name;
 	} i6un;
 } i6addr_t;
 #endif
 
 #define in4_addr	in4.s_addr
 #define	iplookupnum	i6[1]
 #define	iplookupname	i6un.name
 #define	iplookuptype	i6un.type
 #define	iplookupsubtype	i6un.subtype
 /*
  * NOTE: These DO overlap the above on 64bit systems and this IS recognised.
  */
 #define	iplookupptr	vptr[0]
 #define	iplookupfunc	lptr[1]
 
 #define	I60(x)	(((u_32_t *)(x))[0])
 #define	I61(x)	(((u_32_t *)(x))[1])
 #define	I62(x)	(((u_32_t *)(x))[2])
 #define	I63(x)	(((u_32_t *)(x))[3])
 #define	HI60(x)	ntohl(((u_32_t *)(x))[0])
 #define	HI61(x)	ntohl(((u_32_t *)(x))[1])
 #define	HI62(x)	ntohl(((u_32_t *)(x))[2])
 #define	HI63(x)	ntohl(((u_32_t *)(x))[3])
 
 #define	IP6_EQ(a,b)	((I63(a) == I63(b)) && (I62(a) == I62(b)) && \
 			 (I61(a) == I61(b)) && (I60(a) == I60(b)))
 #define	IP6_NEQ(a,b)	((I63(a) != I63(b)) || (I62(a) != I62(b)) || \
 			 (I61(a) != I61(b)) || (I60(a) != I60(b)))
 #define IP6_ISZERO(a)   ((I60(a) | I61(a) | I62(a) | I63(a)) == 0)
 #define IP6_NOTZERO(a)  ((I60(a) | I61(a) | I62(a) | I63(a)) != 0)
 #define	IP6_ISONES(a)	((I63(a) == 0xffffffff) && (I62(a) == 0xffffffff) && \
 			 (I61(a) == 0xffffffff) && (I60(a) == 0xffffffff))
 #define	IP6_GT(a,b)	(ntohl(HI60(a)) > ntohl(HI60(b)) || \
 			 (HI60(a) == HI60(b) && \
 			  (ntohl(HI61(a)) > ntohl(HI61(b)) || \
 			   (HI61(a) == HI61(b) && \
 			    (ntohl(HI62(a)) > ntohl(HI62(b)) || \
 			     (HI62(a) == HI62(b) && \
 			      ntohl(HI63(a)) > ntohl(HI63(b))))))))
 #define	IP6_LT(a,b)	(ntohl(HI60(a)) < ntohl(HI60(b)) || \
 			 (HI60(a) == HI60(b) && \
 			  (ntohl(HI61(a)) < ntohl(HI61(b)) || \
 			   (HI61(a) == HI61(b) && \
 			    (ntohl(HI62(a)) < ntohl(HI62(b)) || \
 			     (HI62(a) == HI62(b) && \
 			      ntohl(HI63(a)) < ntohl(HI63(b))))))))
 #define	NLADD(n,x)	htonl(ntohl(n) + (x))
 #define	IP6_INC(a)	\
 		do { u_32_t *_i6 = (u_32_t *)(a); \
 		  _i6[3] = NLADD(_i6[3], 1); \
 		  if (_i6[3] == 0) { \
 			_i6[2] = NLADD(_i6[2], 1); \
 			if (_i6[2] == 0) { \
 				_i6[1] = NLADD(_i6[1], 1); \
 				if (_i6[1] == 0) { \
 					_i6[0] = NLADD(_i6[0], 1); \
 				} \
 			} \
 		  } \
 		} while (0)
 #define	IP6_ADD(a,x,d)	\
 		do { i6addr_t *_s = (i6addr_t *)(a); \
 		  i6addr_t *_d = (i6addr_t *)(d); \
 		  _d->i6[0] = NLADD(_s->i6[0], x); \
 		  if (ntohl(_d->i6[0]) < ntohl(_s->i6[0])) { \
 			_d->i6[1] = NLADD(_d->i6[1], 1); \
 			if (ntohl(_d->i6[1]) < ntohl(_s->i6[1])) { \
 				_d->i6[2] = NLADD(_d->i6[2], 1); \
 				if (ntohl(_d->i6[2]) < ntohl(_s->i6[2])) { \
 					_d->i6[3] = NLADD(_d->i6[3], 1); \
 				} \
 			} \
 		  } \
 		} while (0)
 #define	IP6_AND(a,b,d)	do { i6addr_t *_s1 = (i6addr_t *)(a); \
 			  i6addr_t *_s2 = (i6addr_t *)(b); \
 			  i6addr_t *_d = (i6addr_t *)(d); \
 			  _d->i6[0] = _s1->i6[0] & _s2->i6[0]; \
 			  _d->i6[1] = _s1->i6[1] & _s2->i6[1]; \
 			  _d->i6[2] = _s1->i6[2] & _s2->i6[2]; \
 			  _d->i6[3] = _s1->i6[3] & _s2->i6[3]; \
 			} while (0)
 #define	IP6_ANDASSIGN(a,m) \
 			do { i6addr_t *_d = (i6addr_t *)(a); \
 			  i6addr_t *_m = (i6addr_t *)(m); \
 			  _d->i6[0] &= _m->i6[0]; \
 			  _d->i6[1] &= _m->i6[1]; \
 			  _d->i6[2] &= _m->i6[2]; \
 			  _d->i6[3] &= _m->i6[3]; \
 			} while (0)
 #define	IP6_MASKEQ(a,m,b) \
 			(((I60(a) & I60(m)) == I60(b)) && \
 			 ((I61(a) & I61(m)) == I61(b)) && \
 			 ((I62(a) & I62(m)) == I62(b)) && \
 			 ((I63(a) & I63(m)) == I63(b)))
 #define	IP6_MASKNEQ(a,m,b) \
 			(((I60(a) & I60(m)) != I60(b)) || \
 			 ((I61(a) & I61(m)) != I61(b)) || \
 			 ((I62(a) & I62(m)) != I62(b)) || \
 			 ((I63(a) & I63(m)) != I63(b)))
 #define	IP6_MERGE(a,b,c) \
 			do { i6addr_t *_d, *_s1, *_s2; \
 			  _d = (i6addr_t *)(a); \
 			  _s1 = (i6addr_t *)(b); \
 			  _s2 = (i6addr_t *)(c); \
 			  _d->i6[0] |= _s1->i6[0] & ~_s2->i6[0]; \
 			  _d->i6[1] |= _s1->i6[1] & ~_s2->i6[1]; \
 			  _d->i6[2] |= _s1->i6[2] & ~_s2->i6[2]; \
 			  _d->i6[3] |= _s1->i6[3] & ~_s2->i6[3]; \
 			} while (0)
 #define	IP6_MASK(a,b,c) \
 			do { i6addr_t *_d, *_s1, *_s2; \
 			  _d = (i6addr_t *)(a); \
 			  _s1 = (i6addr_t *)(b); \
 			  _s2 = (i6addr_t *)(c); \
 			  _d->i6[0] = _s1->i6[0] & ~_s2->i6[0]; \
 			  _d->i6[1] = _s1->i6[1] & ~_s2->i6[1]; \
 			  _d->i6[2] = _s1->i6[2] & ~_s2->i6[2]; \
 			  _d->i6[3] = _s1->i6[3] & ~_s2->i6[3]; \
 			} while (0)
 #define	IP6_SETONES(a)	\
 			do { i6addr_t *_d = (i6addr_t *)(a); \
 			  _d->i6[0] = 0xffffffff; \
 			  _d->i6[1] = 0xffffffff; \
 			  _d->i6[2] = 0xffffffff; \
 			  _d->i6[3] = 0xffffffff; \
 			} while (0)
 
 typedef	union ipso_u	{
 	u_short	ipso_ripso[2];
 	u_32_t	ipso_doi;
 } ipso_t;
 
 typedef	struct	fr_ip	{
 	u_32_t	fi_v:4;		/* IP version */
 	u_32_t	fi_xx:4;	/* spare */
 	u_32_t	fi_tos:8;	/* IP packet TOS */
 	u_32_t	fi_ttl:8;	/* IP packet TTL */
 	u_32_t	fi_p:8;		/* IP packet protocol */
 	u_32_t	fi_optmsk;	/* bitmask composed from IP options */
 	i6addr_t fi_src;	/* source address from packet */
 	i6addr_t fi_dst;	/* destination address from packet */
 	ipso_t	fi_ipso;	/* IP security options */
 	u_32_t	fi_flx;		/* packet flags */
 	u_32_t	fi_tcpmsk;	/* TCP options set/reset */
 	u_32_t	fi_ports[2];	/* TCP ports */
 	u_char	fi_tcpf;	/* TCP flags */
 	u_char	fi_sensitivity;
 	u_char	fi_xxx[2];	/* pad */
 } fr_ip_t;
 
 /*
  * For use in fi_flx
  */
 #define	FI_TCPUDP	0x0001	/* TCP/UCP implied comparison*/
 #define	FI_OPTIONS	0x0002
 #define	FI_FRAG		0x0004
 #define	FI_SHORT	0x0008
 #define	FI_NATED	0x0010
 #define	FI_MULTICAST	0x0020
 #define	FI_BROADCAST	0x0040
 #define	FI_MBCAST	0x0080
 #define	FI_STATE	0x0100
 #define	FI_BADNAT	0x0200
 #define	FI_BAD		0x0400
 #define	FI_OOW		0x0800	/* Out of state window, else match */
 #define	FI_ICMPERR	0x1000
 #define	FI_FRAGBODY	0x2000
 #define	FI_BADSRC	0x4000
 #define	FI_LOWTTL	0x8000
 #define	FI_CMP		0x5cfe3	/* Not FI_FRAG,FI_NATED,FI_FRAGTAIL */
 #define	FI_ICMPCMP	0x0003	/* Flags we can check for ICMP error packets */
 #define	FI_WITH		0x5effe	/* Not FI_TCPUDP */
 #define	FI_V6EXTHDR	0x10000
 #define	FI_COALESCE	0x20000
 #define	FI_NEWNAT	0x40000
 #define	FI_ICMPQUERY	0x80000
 #define	FI_ENCAP	0x100000	/* encap/decap with NAT */
 #define	FI_AH		0x200000	/* AH header present */
 #define	FI_DOCKSUM	0x10000000	/* Proxy wants L4 recalculation */
 #define	FI_NOCKSUM	0x20000000	/* don't do a L4 checksum validation */
 #define	FI_NOWILD	0x40000000	/* Do not do wildcard searches */
 #define	FI_IGNORE	0x80000000
 
 #define	fi_secmsk	fi_ipso.ipso_ripso[0]
 #define	fi_auth		fi_ipso.ipso_ripso[1]
 #define	fi_doi		fi_ipso.ipso_doi
 #define	fi_saddr	fi_src.in4.s_addr
 #define	fi_daddr	fi_dst.in4.s_addr
 #define	fi_srcnum	fi_src.iplookupnum
 #define	fi_dstnum	fi_dst.iplookupnum
 #define	fi_srcname	fi_src.iplookupname
 #define	fi_dstname	fi_dst.iplookupname
 #define	fi_srctype	fi_src.iplookuptype
 #define	fi_dsttype	fi_dst.iplookuptype
 #define	fi_srcsubtype	fi_src.iplookupsubtype
 #define	fi_dstsubtype	fi_dst.iplookupsubtype
 #define	fi_srcptr	fi_src.iplookupptr
 #define	fi_dstptr	fi_dst.iplookupptr
 #define	fi_srcfunc	fi_src.iplookupfunc
 #define	fi_dstfunc	fi_dst.iplookupfunc
 
 
 /*
  * These are both used by the state and NAT code to indicate that one port or
  * the other should be treated as a wildcard.
  * NOTE: When updating, check bit masks in ip_state.h and update there too.
  */
 #define	SI_W_SPORT	0x00000100
 #define	SI_W_DPORT	0x00000200
 #define	SI_WILDP	(SI_W_SPORT|SI_W_DPORT)
 #define	SI_W_SADDR	0x00000400
 #define	SI_W_DADDR	0x00000800
 #define	SI_WILDA	(SI_W_SADDR|SI_W_DADDR)
 #define	SI_NEWFR	0x00001000
 #define	SI_CLONE	0x00002000
 #define	SI_CLONED	0x00004000
 #define	SI_NEWCLONE	0x00008000
 
 typedef	struct {
 	u_short	fda_ports[2];
 	u_char	fda_tcpf;		/* TCP header flags (SYN, ACK, etc) */
 } frdat_t;
 
 typedef enum fr_breasons_e {
 	FRB_BLOCKED = 0,
 	FRB_LOGFAIL = 1,
 	FRB_PPSRATE = 2,
 	FRB_JUMBO = 3,
 	FRB_MAKEFRIP = 4,
 	FRB_STATEADD = 5,
 	FRB_UPDATEIPID = 6,
 	FRB_LOGFAIL2 = 7,
 	FRB_DECAPFRIP = 8,
 	FRB_AUTHNEW = 9,
 	FRB_AUTHCAPTURE = 10,
 	FRB_COALESCE = 11,
 	FRB_PULLUP = 12,
 	FRB_AUTHFEEDBACK = 13,
 	FRB_BADFRAG = 14,
 	FRB_NATV4 = 15,
 	FRB_NATV6 = 16,
 } fr_breason_t;
 
 #define	FRB_MAX_VALUE	16
 
 typedef enum ipf_cksum_e {
 	FI_CK_BAD = -1,
 	FI_CK_NEEDED = 0,
 	FI_CK_SUMOK = 1,
 	FI_CK_L4PART = 2,
 	FI_CK_L4FULL = 4
 } ipf_cksum_t;
 
 typedef	struct	fr_info	{
 	void	*fin_main_soft;
 	void	*fin_ifp;		/* interface packet is `on' */
 	struct	frentry *fin_fr;	/* last matching rule */
 	int	fin_out;		/* in or out ? 1 == out, 0 == in */
 	fr_ip_t	fin_fi;			/* IP Packet summary */
 	frdat_t	fin_dat;		/* TCP/UDP ports, ICMP code/type */
 	int	fin_dlen;		/* length of data portion of packet */
 	int	fin_plen;
 	u_32_t	fin_rule;		/* rule # last matched */
 	u_short	fin_hlen;		/* length of IP header in bytes */
 	char	fin_group[FR_GROUPLEN];	/* group number, -1 for none */
 	void	*fin_dp;		/* start of data past IP header */
 	/*
 	 * Fields after fin_dp aren't used for compression of log records.
 	 * fin_fi contains the IP version (fin_family)
 	 * fin_rule isn't included because adding a new rule can change it but
 	 * not change fin_fr. fin_rule is the rule number reported.
 	 * It isn't necessary to include fin_crc because that is checked
 	 * for explicitly, before calling bcmp.
 	 */
 	u_32_t	fin_crc;		/* Simple calculation for logging */
 	int	fin_family;		/* AF_INET, etc. */
 	int	fin_icode;		/* ICMP error to return */
 	int	fin_mtu;		/* MTU input for ICMP need-frag */
 	int	fin_rev;		/* state only: 1 = reverse */
 	int	fin_ipoff;		/* # bytes from buffer start to hdr */
 	u_32_t	fin_id;			/* IP packet id field */
 	u_short	fin_l4hlen;		/* length of L4 header, if known */
 	u_short	fin_off;
 	int	fin_depth;		/* Group nesting depth */
 	int	fin_error;		/* Error code to return */
 	ipf_cksum_t	fin_cksum;	/* -1 = bad, 1 = good, 0 = not done */
 	fr_breason_t	fin_reason;	/* why auto blocked */
 	u_int	fin_pktnum;
 	void	*fin_nattag;
 	struct frdest	*fin_dif;
 	struct frdest	*fin_tif;
 	union {
 		ip_t	*fip_ip;
 #ifdef USE_INET6
 		ip6_t	*fip_ip6;
 #endif
 	} fin_ipu;
 	mb_t	**fin_mp;		/* pointer to pointer to mbuf */
 	mb_t	*fin_m;			/* pointer to mbuf */
 #ifdef	MENTAT
 	mb_t	*fin_qfm;		/* pointer to mblk where pkt starts */
 	void	*fin_qpi;
 	char	fin_ifname[LIFNAMSIZ];
 #endif
 #ifdef	__sgi
 	void	*fin_hbuf;
 #endif
 	void	*fin_fraghdr;		/* pointer to start of ipv6 frag hdr */
 } fr_info_t;
 
 #define	fin_ip		fin_ipu.fip_ip
 #define	fin_ip6		fin_ipu.fip_ip6
 #define	fin_v		fin_fi.fi_v
 #define	fin_p		fin_fi.fi_p
 #define	fin_flx		fin_fi.fi_flx
 #define	fin_optmsk	fin_fi.fi_optmsk
 #define	fin_secmsk	fin_fi.fi_secmsk
 #define	fin_doi		fin_fi.fi_doi
 #define	fin_auth	fin_fi.fi_auth
 #define	fin_src		fin_fi.fi_src.in4
 #define	fin_saddr	fin_fi.fi_saddr
 #define	fin_dst		fin_fi.fi_dst.in4
 #define	fin_daddr	fin_fi.fi_daddr
 #define	fin_data	fin_fi.fi_ports
 #define	fin_sport	fin_fi.fi_ports[0]
 #define	fin_dport	fin_fi.fi_ports[1]
 #define	fin_tcpf	fin_fi.fi_tcpf
 #define	fin_src6	fin_fi.fi_src
 #define	fin_dst6	fin_fi.fi_dst
 #define	fin_srcip6	fin_fi.fi_src.in6
 #define	fin_dstip6	fin_fi.fi_dst.in6
 
 #define	IPF_IN		0
 #define	IPF_OUT		1
 
 typedef	struct frentry	*(*ipfunc_t) __P((fr_info_t *, u_32_t *));
 typedef	int		(*ipfuncinit_t) __P((struct ipf_main_softc_s *, struct frentry *));
 
 typedef	struct	ipfunc_resolve	{
 	char		ipfu_name[32];
 	ipfunc_t	ipfu_addr;
 	ipfuncinit_t	ipfu_init;
 	ipfuncinit_t	ipfu_fini;
 } ipfunc_resolve_t;
 
 /*
  * Size for compares on fr_info structures
  */
 #define	FI_CSIZE	offsetof(fr_info_t, fin_icode)
 #define	FI_LCSIZE	offsetof(fr_info_t, fin_dp)
 
 /*
  * Size for copying cache fr_info structure
  */
 #define	FI_COPYSIZE	offsetof(fr_info_t, fin_dp)
 
 /*
  * Structure for holding IPFilter's tag information
  */
 #define	IPFTAG_LEN	16
 typedef	struct	{
 	union	{
 		u_32_t	iptu_num[4];
 		char	iptu_tag[IPFTAG_LEN];
 	} ipt_un;
 	int	ipt_not;
 } ipftag_t;
 
 #define	ipt_tag	ipt_un.iptu_tag
 #define	ipt_num	ipt_un.iptu_num
 
 /*
  * Structure to define address for pool lookups.
  */
 typedef	struct	{
 	u_char		adf_len;
 	sa_family_t	adf_family;
 	u_char		adf_xxx[2];
 	i6addr_t	adf_addr;
 } addrfamily_t;
 
 
 RBI_LINK(ipf_rb, host_node_s);
 
 typedef	struct	host_node_s {
 	RBI_FIELD(ipf_rb)	hn_entry;
 	addrfamily_t		hn_addr;
 	int			hn_active;
 } host_node_t;
 
 typedef RBI_HEAD(ipf_rb, host_node_s) ipf_rb_head_t;
 
 typedef	struct	host_track_s {
 	ipf_rb_head_t	ht_root;
 	int		ht_max_nodes;
 	int		ht_max_per_node;
 	int		ht_netmask;
 	int		ht_cur_nodes;
 } host_track_t;
 
 
 typedef enum fr_dtypes_e {
 	FRD_NORMAL = 0,
 	FRD_DSTLIST
 } fr_dtypes_t;
 /*
  * This structure is used to hold information about the next hop for where
  * to forward a packet.
  */
 typedef	struct	frdest	{
 	void		*fd_ptr;
 	addrfamily_t	fd_addr;
 	fr_dtypes_t	fd_type;
 	int		fd_name;
 	int		fd_local;
 } frdest_t;
 
 #define	fd_ip6	fd_addr.adf_addr
 #define	fd_ip	fd_ip6.in4
 
 
 typedef enum fr_ctypes_e {
 	FR_NONE = 0,
 	FR_EQUAL,
 	FR_NEQUAL,
 	FR_LESST,
 	FR_GREATERT,
 	FR_LESSTE,
 	FR_GREATERTE,
 	FR_OUTRANGE,
 	FR_INRANGE,
 	FR_INCRANGE
 } fr_ctypes_t;
 
 /*
  * This structure holds information about a port comparison.
  */
 typedef	struct	frpcmp	{
 	fr_ctypes_t	frp_cmp;	/* data for port comparisons */
 	u_32_t		frp_port;	/* top port for <> and >< */
 	u_32_t		frp_top;	/* top port for <> and >< */
 } frpcmp_t;
 
 
 /*
  * Structure containing all the relevant TCP things that can be checked in
  * a filter rule.
  */
 typedef	struct	frtuc	{
 	u_char		ftu_tcpfm;	/* tcp flags mask */
 	u_char		ftu_tcpf;	/* tcp flags */
 	frpcmp_t	ftu_src;
 	frpcmp_t	ftu_dst;
 } frtuc_t;
 
 #define	ftu_scmp	ftu_src.frp_cmp
 #define	ftu_dcmp	ftu_dst.frp_cmp
 #define	ftu_sport	ftu_src.frp_port
 #define	ftu_dport	ftu_dst.frp_port
 #define	ftu_stop	ftu_src.frp_top
 #define	ftu_dtop	ftu_dst.frp_top
 
 #define	FR_TCPFMAX	0x3f
 
 typedef enum fr_atypes_e {
 	FRI_NONE = -1,	/* For LHS of NAT */
 	FRI_NORMAL = 0,	/* Normal address */
 	FRI_DYNAMIC,	/* dynamic address */
 	FRI_LOOKUP,	/* address is a pool # */
 	FRI_RANGE,	/* address/mask is a range */
 	FRI_NETWORK,	/* network address from if */
 	FRI_BROADCAST,	/* broadcast address from if */
 	FRI_PEERADDR,	/* Peer address for P-to-P */
 	FRI_NETMASKED,	/* network address with netmask from if */
 	FRI_SPLIT,	/* For NAT compatibility */
 	FRI_INTERFACE	/* address is based on interface name */
 } fr_atypes_t;
 
 /*
  * This structure makes up what is considered to be the IPFilter specific
  * matching components of a filter rule, as opposed to the data structures
  * used to define the result which are in frentry_t and not here.
  */
 typedef	struct	fripf	{
 	fr_ip_t		fri_ip;
 	fr_ip_t		fri_mip;	/* mask structure */
 
 	u_short		fri_icmpm;	/* data for ICMP packets (mask) */
 	u_short		fri_icmp;
 
 	frtuc_t		fri_tuc;
 	fr_atypes_t	fri_satype;	/* addres type */
 	fr_atypes_t	fri_datype;	/* addres type */
 	int		fri_sifpidx;	/* doing dynamic addressing */
 	int		fri_difpidx;	/* index into fr_ifps[] to use when */
 } fripf_t;
 
 #define	fri_dlookup	fri_mip.fi_dst
 #define	fri_slookup	fri_mip.fi_src
 #define	fri_dstnum	fri_mip.fi_dstnum
 #define	fri_srcnum	fri_mip.fi_srcnum
 #define	fri_dstname	fri_mip.fi_dstname
 #define	fri_srcname	fri_mip.fi_srcname
 #define	fri_dstptr	fri_mip.fi_dstptr
 #define	fri_srcptr	fri_mip.fi_srcptr
 
 
 typedef enum fr_rtypes_e {
 	FR_T_NONE = 0,
 	FR_T_IPF,		/* IPF structures */
 	FR_T_BPFOPC,		/* BPF opcode */
 	FR_T_CALLFUNC,		/* callout to function in fr_func only */
 	FR_T_COMPIPF,			/* compiled C code */
 	FR_T_IPFEXPR,			/* IPF expression */
 	FR_T_BUILTIN = 0x40000000,	/* rule is in kernel space */
 	FR_T_IPF_BUILTIN,
 	FR_T_BPFOPC_BUILTIN,
 	FR_T_CALLFUNC_BUILTIN,
 	FR_T_COMPIPF_BUILTIN,
 	FR_T_IPFEXPR_BUILTIN
 } fr_rtypes_t;
 
 typedef	struct	frentry	* (* frentfunc_t) __P((fr_info_t *));
 
 typedef	struct	frentry {
 	ipfmutex_t	fr_lock;
 	struct	frentry	*fr_next;
 	struct	frentry	**fr_pnext;
 	struct	frgroup	*fr_grp;
 	struct	frgroup	*fr_grphead;
 	struct	frgroup	*fr_icmpgrp;
 	struct	ipscan	*fr_isc;
 	struct	frentry	*fr_dnext;	/* 2 fr_die linked list pointers */
 	struct	frentry	**fr_pdnext;
 	void	*fr_ifas[4];
 	void	*fr_ptr;	/* for use with fr_arg */
 	int	fr_comment;	/* text comment for rule */
 	int	fr_size;	/* size of this structure */
 	int	fr_ref;		/* reference count */
 	int	fr_statecnt;	/* state count - for limit rules */
 	u_32_t	fr_die;		/* only used on loading the rule */
 	u_int	fr_cksum;	/* checksum on filter rules for performance */
 	/*
 	 * The line number from a file is here because we need to be able to
 	 * match the rule generated with ``grep rule ipf.conf | ipf -rf -''
 	 * with the rule loaded using ``ipf -f ipf.conf'' - thus it can't be
 	 * on the other side of fr_func.
 	 */
 	int	fr_flineno;	/* line number from conf file */
 	/*
 	 * These are only incremented when a packet  matches this rule and
 	 * it is the last match
 	 */
 	U_QUAD_T	fr_hits;
 	U_QUAD_T	fr_bytes;
 
 	/*
 	 * For PPS rate limiting
 	 * fr_lpu is used to always have the same size for this field,
 	 * allocating 64bits for seconds and 32bits for milliseconds.
 	 */
 	union {
 		struct timeval	frp_lastpkt;
 		char	frp_bytes[12];
 	} fr_lpu;
 	int		fr_curpps;
 
 	union	{
 		void		*fru_data;
 		char		*fru_caddr;
 		fripf_t		*fru_ipf;
 		frentfunc_t	fru_func;
 	} fr_dun;
 
 	/*
 	 * Fields after this may not change whilst in the kernel.
 	 */
 	ipfunc_t fr_func; 	/* call this function */
 	int	fr_dsize;
 	int	fr_pps;
 	fr_rtypes_t	fr_type;
 	u_32_t	fr_flags;	/* per-rule flags && options (see below) */
 	u_32_t	fr_logtag;	/* user defined log tag # */
 	u_32_t	fr_collect;	/* collection number */
 	u_int	fr_arg;		/* misc. numeric arg for rule */
 	u_int	fr_loglevel;	/* syslog log facility + priority */
 	u_char	fr_family;
 	u_char	fr_icode;	/* return ICMP code */
 	int	fr_group;	/* group to which this rule belongs */
 	int	fr_grhead;	/* group # which this rule starts */
 	int	fr_ifnames[4];
 	int	fr_isctag;
 	int	fr_rpc;		/* XID Filtering */ 
 	ipftag_t fr_nattag;
 	frdest_t fr_tifs[2];	/* "to"/"reply-to" interface */
 	frdest_t fr_dif;	/* duplicate packet interface */
 	/*
 	 * These are all options related to stateful filtering
 	 */
 	host_track_t	fr_srctrack;
 	int	fr_nostatelog;
 	int	fr_statemax;	/* max reference count */
 	int	fr_icmphead;	/* ICMP group  for state options */
 	u_int	fr_age[2];	/* non-TCP state timeouts */
 	/*
 	 * How big is the name buffer at the end?
 	 */
 	int	fr_namelen;
 	char	fr_names[1];
 } frentry_t;
 
 #define	fr_lastpkt	fr_lpu.frp_lastpkt
 #define	fr_caddr	fr_dun.fru_caddr
 #define	fr_data		fr_dun.fru_data
 #define	fr_dfunc	fr_dun.fru_func
 #define	fr_ipf		fr_dun.fru_ipf
 #define	fr_ip		fr_ipf->fri_ip
 #define	fr_mip		fr_ipf->fri_mip
 #define	fr_icmpm	fr_ipf->fri_icmpm
 #define	fr_icmp		fr_ipf->fri_icmp
 #define	fr_tuc		fr_ipf->fri_tuc
 #define	fr_satype	fr_ipf->fri_satype
 #define	fr_datype	fr_ipf->fri_datype
 #define	fr_sifpidx	fr_ipf->fri_sifpidx
 #define	fr_difpidx	fr_ipf->fri_difpidx
 #define	fr_proto	fr_ip.fi_p
 #define	fr_mproto	fr_mip.fi_p
 #define	fr_ttl		fr_ip.fi_ttl
 #define	fr_mttl		fr_mip.fi_ttl
 #define	fr_tos		fr_ip.fi_tos
 #define	fr_mtos		fr_mip.fi_tos
 #define	fr_tcpfm	fr_tuc.ftu_tcpfm
 #define	fr_tcpf		fr_tuc.ftu_tcpf
 #define	fr_scmp		fr_tuc.ftu_scmp
 #define	fr_dcmp		fr_tuc.ftu_dcmp
 #define	fr_dport	fr_tuc.ftu_dport
 #define	fr_sport	fr_tuc.ftu_sport
 #define	fr_stop		fr_tuc.ftu_stop
 #define	fr_dtop		fr_tuc.ftu_dtop
 #define	fr_dst		fr_ip.fi_dst.in4
 #define	fr_dst6		fr_ip.fi_dst
 #define	fr_daddr	fr_ip.fi_dst.in4.s_addr
 #define	fr_src		fr_ip.fi_src.in4
 #define	fr_src6		fr_ip.fi_src
 #define	fr_saddr	fr_ip.fi_src.in4.s_addr
 #define	fr_dmsk		fr_mip.fi_dst.in4
 #define	fr_dmsk6	fr_mip.fi_dst
 #define	fr_dmask	fr_mip.fi_dst.in4.s_addr
 #define	fr_smsk		fr_mip.fi_src.in4
 #define	fr_smsk6	fr_mip.fi_src
 #define	fr_smask	fr_mip.fi_src.in4.s_addr
 #define	fr_dstnum	fr_ip.fi_dstnum
 #define	fr_srcnum	fr_ip.fi_srcnum
 #define	fr_dlookup	fr_ip.fi_dst
 #define	fr_slookup	fr_ip.fi_src
 #define	fr_dstname	fr_ip.fi_dstname
 #define	fr_srcname	fr_ip.fi_srcname
 #define	fr_dsttype	fr_ip.fi_dsttype
 #define	fr_srctype	fr_ip.fi_srctype
 #define	fr_dstsubtype	fr_ip.fi_dstsubtype
 #define	fr_srcsubtype	fr_ip.fi_srcsubtype
 #define	fr_dstptr	fr_mip.fi_dstptr
 #define	fr_srcptr	fr_mip.fi_srcptr
 #define	fr_dstfunc	fr_mip.fi_dstfunc
 #define	fr_srcfunc	fr_mip.fi_srcfunc
 #define	fr_optbits	fr_ip.fi_optmsk
 #define	fr_optmask	fr_mip.fi_optmsk
 #define	fr_secbits	fr_ip.fi_secmsk
 #define	fr_secmask	fr_mip.fi_secmsk
 #define	fr_authbits	fr_ip.fi_auth
 #define	fr_authmask	fr_mip.fi_auth
 #define	fr_doi		fr_ip.fi_doi
 #define	fr_doimask	fr_mip.fi_doi
 #define	fr_flx		fr_ip.fi_flx
 #define	fr_mflx		fr_mip.fi_flx
 #define	fr_ifa		fr_ifas[0]
 #define	fr_oifa		fr_ifas[2]
 #define	fr_tif		fr_tifs[0]
 #define	fr_rif		fr_tifs[1]
 
 #define	FR_NOLOGTAG	0
 
 #define	FR_CMPSIZ	(sizeof(struct frentry) - \
 			 offsetof(struct frentry, fr_func))
 #define	FR_NAME(_f, _n)	(_f)->fr_names + (_f)->_n
 
 
 /*
  * fr_flags
  */
 #define	FR_BLOCK	0x00001	/* do not allow packet to pass */
 #define	FR_PASS		0x00002	/* allow packet to pass */
 #define	FR_AUTH		0x00003	/* use authentication */
 #define	FR_PREAUTH	0x00004	/* require preauthentication */
 #define	FR_ACCOUNT	0x00005	/* Accounting rule */
 #define	FR_SKIP		0x00006	/* skip rule */
 #define	FR_DECAPSULATE	0x00008	/* decapsulate rule */
 #define	FR_CALL		0x00009	/* call rule */
 #define	FR_CMDMASK	0x0000f
 #define	FR_LOG		0x00010	/* Log */
 #define	FR_LOGB		0x00011	/* Log-fail */
 #define	FR_LOGP		0x00012	/* Log-pass */
 #define	FR_LOGMASK	(FR_LOG|FR_CMDMASK)
 #define	FR_CALLNOW	0x00020	/* call another function (fr_func) if matches */
 #define	FR_NOTSRCIP	0x00040
 #define	FR_NOTDSTIP	0x00080
 #define	FR_QUICK	0x00100	/* match & stop processing list */
 #define	FR_KEEPFRAG	0x00200	/* keep fragment information */
 #define	FR_KEEPSTATE	0x00400	/* keep `connection' state information */
 #define	FR_FASTROUTE	0x00800	/* bypass normal routing */
 #define	FR_RETRST	0x01000	/* Return TCP RST packet - reset connection */
 #define	FR_RETICMP	0x02000	/* Return ICMP unreachable packet */
 #define	FR_FAKEICMP	0x03000	/* Return ICMP unreachable with fake source */
 #define	FR_OUTQUE	0x04000	/* outgoing packets */
 #define	FR_INQUE	0x08000	/* ingoing packets */
 #define	FR_LOGBODY	0x10000	/* Log the body */
 #define	FR_LOGFIRST	0x20000	/* Log the first byte if state held */
 #define	FR_LOGORBLOCK	0x40000	/* block the packet if it can't be logged */
 #define	FR_STLOOSE	0x80000	/* loose state checking */
 #define	FR_FRSTRICT	0x100000	/* strict frag. cache */
 #define	FR_STSTRICT	0x200000	/* strict keep state */
 #define	FR_NEWISN	0x400000	/* new ISN for outgoing TCP */
 #define	FR_NOICMPERR	0x800000	/* do not match ICMP errors in state */
 #define	FR_STATESYNC	0x1000000	/* synchronize state to slave */
 #define	FR_COPIED	0x2000000	/* copied from user space */
 #define	FR_INACTIVE	0x4000000	/* only used when flush'ing rules */
 #define	FR_NOMATCH	0x8000000	/* no match occured */
 		/*	0x10000000 	FF_LOGPASS */
 		/*	0x20000000 	FF_LOGBLOCK */
 		/*	0x40000000 	FF_LOGNOMATCH */
 		/*	0x80000000 	FF_BLOCKNONIP */
 
 #define	FR_RETMASK	(FR_RETICMP|FR_RETRST|FR_FAKEICMP)
 #define	FR_ISBLOCK(x)	(((x) & FR_CMDMASK) == FR_BLOCK)
 #define	FR_ISPASS(x)	(((x) & FR_CMDMASK) == FR_PASS)
 #define	FR_ISAUTH(x)	(((x) & FR_CMDMASK) == FR_AUTH)
 #define	FR_ISPREAUTH(x)	(((x) & FR_CMDMASK) == FR_PREAUTH)
 #define	FR_ISACCOUNT(x)	(((x) & FR_CMDMASK) == FR_ACCOUNT)
 #define	FR_ISSKIP(x)	(((x) & FR_CMDMASK) == FR_SKIP)
 #define	FR_ISDECAPS(x)	(((x) & FR_CMDMASK) == FR_DECAPSULATE)
 #define	FR_ISNOMATCH(x)	((x) & FR_NOMATCH)
 #define	FR_INOUT	(FR_INQUE|FR_OUTQUE)
 
 /*
  * recognized flags for SIOCGETFF and SIOCSETFF, and get put in fr_flags
  */
 #define	FF_LOGPASS	0x10000000
 #define	FF_LOGBLOCK	0x20000000
 #define	FF_LOGNOMATCH	0x40000000
 #define	FF_LOGGING	(FF_LOGPASS|FF_LOGBLOCK|FF_LOGNOMATCH)
 #define	FF_BLOCKNONIP	0x80000000	/* Solaris2 Only */
 
 
 /*
  * Structure that passes information on what/how to flush to the kernel.
  */
 typedef	struct	ipfflush	{
 	int		ipflu_how;
 	int		ipflu_arg;
 } ipfflush_t;
 
 
 /*
  *
  */
 typedef	struct	ipfgetctl	{
 	u_int		ipfg_min;	/* min value */
 	u_int		ipfg_current;	/* current value */
 	u_int		ipfg_max;	/* max value */
 	u_int		ipfg_default;	/* default value */
 	u_int		ipfg_steps;	/* value increments */
 	char		ipfg_name[40];	/* tag name for this control */
 } ipfgetctl_t;
 
 typedef	struct	ipfsetctl	{
 	int	ipfs_which;	/* 0 = min 1 = current 2 = max 3 = default */
 	u_int	ipfs_value;	/* min value */
 	char	ipfs_name[40];	/* tag name for this control */
 } ipfsetctl_t;
 
 
 /*
  * Some of the statistics below are in their own counters, but most are kept
  * in this single structure so that they can all easily be collected and
  * copied back as required.
  */
 typedef	struct	ipf_statistics {
 	u_long	fr_icmp_coalesce;
 	u_long	fr_tcp_frag;
 	u_long	fr_tcp_pullup;
 	u_long	fr_tcp_short;
 	u_long	fr_tcp_small;
 	u_long	fr_tcp_bad_flags;
 	u_long	fr_udp_pullup;
 	u_long	fr_ip_freed;
 	u_long	fr_v6_ah_bad;
 	u_long	fr_v6_bad;
 	u_long	fr_v6_badfrag;
 	u_long	fr_v6_dst_bad;
 	u_long	fr_v6_esp_pullup;
 	u_long	fr_v6_ext_short;
 	u_long	fr_v6_ext_pullup;
 	u_long	fr_v6_ext_hlen;
 	u_long	fr_v6_frag_bad;
 	u_long	fr_v6_frag_pullup;
 	u_long	fr_v6_frag_size;
 	u_long	fr_v6_gre_pullup;
 	u_long	fr_v6_icmp6_pullup;
 	u_long	fr_v6_rh_bad;
 	u_long	fr_v6_badttl;	/* TTL in packet doesn't reach minimum */
 	u_long	fr_v4_ah_bad;
 	u_long	fr_v4_ah_pullup;
 	u_long	fr_v4_esp_pullup;
 	u_long	fr_v4_cipso_bad;
 	u_long	fr_v4_cipso_tlen;
 	u_long	fr_v4_gre_frag;
 	u_long	fr_v4_gre_pullup;
 	u_long	fr_v4_icmp_frag;
 	u_long	fr_v4_icmp_pullup;
 	u_long	fr_v4_badttl;	/* TTL in packet doesn't reach minimum */
 	u_long	fr_v4_badsrc;	/* source received doesn't match route */
 	u_long	fr_l4_badcksum;	/* layer 4 header checksum failure */
 	u_long	fr_badcoalesces;
 	u_long	fr_pass;	/* packets allowed */
 	u_long	fr_block;	/* packets denied */
 	u_long	fr_nom;		/* packets which don't match any rule */
 	u_long	fr_short;	/* packets which are short */
 	u_long	fr_ppkl;	/* packets allowed and logged */
 	u_long	fr_bpkl;	/* packets denied and logged */
 	u_long	fr_npkl;	/* packets unmatched and logged */
 	u_long	fr_ret;		/* packets for which a return is sent */
 	u_long	fr_acct;	/* packets for which counting was performed */
 	u_long	fr_bnfr;	/* bad attempts to allocate fragment state */
 	u_long	fr_nfr;		/* new fragment state kept */
 	u_long	fr_cfr;		/* add new fragment state but complete pkt */
 	u_long	fr_bads;	/* bad attempts to allocate packet state */
 	u_long	fr_ads;		/* new packet state kept */
 	u_long	fr_chit;	/* cached hit */
 	u_long	fr_cmiss;	/* cached miss */
 	u_long	fr_tcpbad;	/* TCP checksum check failures */
 	u_long	fr_pull[2];	/* good and bad pullup attempts */
 	u_long	fr_bad;		/* bad IP packets to the filter */
 	u_long	fr_ipv6;	/* IPv6 packets in/out */
 	u_long	fr_ppshit;	/* dropped because of pps ceiling */
 	u_long	fr_ipud;	/* IP id update failures */
 	u_long	fr_blocked[FRB_MAX_VALUE + 1];
 } ipf_statistics_t;
 
 /*
  * Log structure.  Each packet header logged is prepended by one of these.
  * Following this in the log records read from the device will be an ipflog
  * structure which is then followed by any packet data.
  */
 typedef	struct	iplog	{
 	u_32_t		ipl_magic;
 	u_int		ipl_count;
 	u_32_t		ipl_seqnum;
 	struct	timeval	ipl_time;
 	size_t		ipl_dsize;
 	struct	iplog	*ipl_next;
 } iplog_t;
 
 #define	ipl_sec		ipl_time.tv_sec
 #define	ipl_usec	ipl_time.tv_usec
 
 #define IPL_MAGIC	0x49504c4d	/* 'IPLM' */
 #define IPL_MAGIC_NAT	0x49504c4e	/* 'IPLN' */
 #define IPL_MAGIC_STATE	0x49504c53	/* 'IPLS' */
 #define	IPLOG_SIZE	sizeof(iplog_t)
 
 typedef	struct	ipflog	{
 #if (defined(NetBSD) && (NetBSD <= 1991011) && (NetBSD >= 199603)) || \
         (defined(OpenBSD) && (OpenBSD >= 199603))
 #else
 	u_int	fl_unit;
 #endif
 	u_32_t		fl_rule;
 	u_32_t		fl_flags;
 	u_32_t		fl_lflags;
 	u_32_t		fl_logtag;
 	ipftag_t	fl_nattag;
 	u_short		fl_plen;	/* extra data after hlen */
 	u_short		fl_loglevel;	/* syslog log level */
 	char		fl_group[FR_GROUPLEN];
 	u_char		fl_hlen;	/* length of IP headers saved */
 	u_char		fl_dir;
 	u_char		fl_breason;	/* from fin_reason */
 	u_char		fl_family;	/* address family of packet logged */
 	char		fl_ifname[LIFNAMSIZ];
 } ipflog_t;
 
 #ifndef	IPF_LOGGING
 # define	IPF_LOGGING	0
 #endif
 #ifndef	IPF_DEFAULT_PASS
 # define	IPF_DEFAULT_PASS	FR_PASS
 #endif
 
 #define	DEFAULT_IPFLOGSIZE	32768
 #ifndef	IPFILTER_LOGSIZE
 # define	IPFILTER_LOGSIZE	DEFAULT_IPFLOGSIZE
 #else
 # if IPFILTER_LOGSIZE < 8192
 #  error IPFILTER_LOGSIZE too small.  Must be >= 8192
 # endif
 #endif
 
 #define	IPF_OPTCOPY	0x07ff00	/* bit mask of copied options */
 
 /*
  * Device filenames for reading log information.  Use ipf on Solaris2 because
  * ipl is already a name used by something else.
  */
 #ifndef	IPL_NAME
 # if	SOLARIS
 #  define	IPL_NAME	"/dev/ipf"
 # else
 #  define	IPL_NAME	"/dev/ipl"
 # endif
 #endif
 /*
  * Pathnames for various IP Filter control devices.  Used by LKM
  * and userland, so defined here.
  */
 #define	IPNAT_NAME	"/dev/ipnat"
 #define	IPSTATE_NAME	"/dev/ipstate"
 #define	IPAUTH_NAME	"/dev/ipauth"
 #define	IPSYNC_NAME	"/dev/ipsync"
 #define	IPSCAN_NAME	"/dev/ipscan"
 #define	IPLOOKUP_NAME	"/dev/iplookup"
 
 #define	IPL_LOGIPF	0	/* Minor device #'s for accessing logs */
 #define	IPL_LOGNAT	1
 #define	IPL_LOGSTATE	2
 #define	IPL_LOGAUTH	3
 #define	IPL_LOGSYNC	4
 #define	IPL_LOGSCAN	5
 #define	IPL_LOGLOOKUP	6
 #define	IPL_LOGCOUNT	7
 #define	IPL_LOGMAX	7
 #define	IPL_LOGSIZE	IPL_LOGMAX + 1
 #define	IPL_LOGALL	-1
 #define	IPL_LOGNONE	-2
 
 /*
  * For SIOCGETFS
  */
 typedef	struct	friostat	{
 	ipf_statistics_t f_st[2];
 	frentry_t	*f_ipf[2][2];
 	frentry_t	*f_acct[2][2];
 	frentry_t	*f_auth;
 	struct frgroup	*f_groups[IPL_LOGSIZE][2];
 	u_long		f_froute[2];
 	u_long		f_log_ok;
 	u_long		f_log_fail;
 	u_long		f_rb_no_mem;
 	u_long		f_rb_node_max;
 	u_32_t		f_ticks;
 	int		f_locks[IPL_LOGSIZE];
 	int		f_defpass;	/* default pass - from fr_pass */
 	int		f_active;	/* 1 or 0 - active rule set */
 	int		f_running;	/* 1 if running, else 0 */
 	int		f_logging;	/* 1 if enabled, else 0 */
 	int		f_features;
 	char		f_version[32];	/* version string */
 } friostat_t;
 
 #define	f_fin		f_ipf[0]
 #define	f_fout		f_ipf[1]
 #define	f_acctin	f_acct[0]
 #define	f_acctout	f_acct[1]
 
 #define	IPF_FEAT_LKM		0x001
 #define	IPF_FEAT_LOG		0x002
 #define	IPF_FEAT_LOOKUP		0x004
 #define	IPF_FEAT_BPF		0x008
 #define	IPF_FEAT_COMPILED	0x010
 #define	IPF_FEAT_CKSUM		0x020
 #define	IPF_FEAT_SYNC		0x040
 #define	IPF_FEAT_SCAN		0x080
 #define	IPF_FEAT_IPV6		0x100
 
 typedef struct	optlist {
 	u_short ol_val;
 	int	ol_bit;
 } optlist_t;
 
 
 /*
  * Group list structure.
  */
 typedef	struct frgroup {
 	struct frgroup	*fg_next;
 	struct frentry	*fg_head;
 	struct frentry	*fg_start;
 	struct frgroup	**fg_set;
 	u_32_t		fg_flags;
 	int		fg_ref;
 	char		fg_name[FR_GROUPLEN];
 } frgroup_t;
 
 #define	FG_NAME(g)	(*(g)->fg_name == '\0' ? "" : (g)->fg_name)
 
 
 /*
  * Used by state and NAT tables
  */
 typedef struct icmpinfo {
 	u_short		ici_id;
 	u_short		ici_seq;
 	u_char		ici_type;
 } icmpinfo_t;
 
 typedef struct udpinfo {
 	u_short		us_sport;
 	u_short		us_dport;
 } udpinfo_t;
 
 
 typedef	struct	tcpdata	{
 	u_32_t		td_end;
 	u_32_t		td_maxend;
 	u_32_t		td_maxwin;
 	u_32_t		td_winscale;
 	u_32_t		td_maxseg;
 	int		td_winflags;
 } tcpdata_t;
 
 #define	TCP_WSCALE_MAX		14
 
 #define	TCP_WSCALE_SEEN		0x00000001
 #define	TCP_WSCALE_FIRST	0x00000002
 #define	TCP_SACK_PERMIT		0x00000004
 
 
 typedef	struct tcpinfo {
 	u_32_t		ts_sport;
 	u_32_t		ts_dport;
 	tcpdata_t	ts_data[2];
 } tcpinfo_t;
 
 
 /*
  * Structures to define a GRE header as seen in a packet.
  */
 struct	grebits	{
 #if defined(sparc)
 	u_32_t		grb_ver:3;
 	u_32_t		grb_flags:3;
 	u_32_t		grb_A:1;
 	u_32_t		grb_recur:1;
 	u_32_t		grb_s:1;
 	u_32_t		grb_S:1;
 	u_32_t		grb_K:1;
 	u_32_t		grb_R:1;
 	u_32_t		grb_C:1;
 #else
 	u_32_t		grb_C:1;
 	u_32_t		grb_R:1;
 	u_32_t		grb_K:1;
 	u_32_t		grb_S:1;
 	u_32_t		grb_s:1;
 	u_32_t		grb_recur:1;
 	u_32_t		grb_A:1;
 	u_32_t		grb_flags:3;
 	u_32_t		grb_ver:3;
 #endif
 	u_short		grb_ptype;
 };
 
 typedef	struct	grehdr	{
 	union	{
 		struct	grebits	gru_bits;
 		u_short	gru_flags;
 	} gr_un;
 	u_short		gr_len;
 	u_short		gr_call;
 } grehdr_t;
 
 #define	gr_flags	gr_un.gru_flags
 #define	gr_bits		gr_un.gru_bits
 #define	gr_ptype	gr_bits.grb_ptype
 #define	gr_C		gr_bits.grb_C
 #define	gr_R		gr_bits.grb_R
 #define	gr_K		gr_bits.grb_K
 #define	gr_S		gr_bits.grb_S
 #define	gr_s		gr_bits.grb_s
 #define	gr_recur	gr_bits.grb_recur
 #define	gr_A		gr_bits.grb_A
 #define	gr_ver		gr_bits.grb_ver
 
 /*
  * GRE information tracked by "keep state"
  */
 typedef	struct	greinfo	{
 	u_short		gs_call[2];
 	u_short		gs_flags;
 	u_short		gs_ptype;
 } greinfo_t;
 
 #define	GRE_REV(x)	((ntohs(x) >> 13) & 7)
 
 
 /*
  * Format of an Authentication header
  */
 typedef	struct	authhdr	{
 	u_char		ah_next;
 	u_char		ah_plen;
 	u_short		ah_reserved;
 	u_32_t		ah_spi;
 	u_32_t		ah_seq;
 	/* Following the sequence number field is 0 or more bytes of */
 	/* authentication data, as specified by ah_plen - RFC 2402.  */
 } authhdr_t;
 
 
 /*
  * Timeout tail queue list member
  */
 typedef	struct	ipftqent	{
 	struct ipftqent **tqe_pnext;
 	struct ipftqent *tqe_next;
 	struct	ipftq	*tqe_ifq;
 	void		*tqe_parent;	/* pointer back to NAT/state struct */
 	u_32_t		tqe_die;	/* when this entriy is to die */
 	u_32_t		tqe_touched;
 	int		tqe_flags;
 	int		tqe_state[2];	/* current state of this entry */
 } ipftqent_t;
 
 #define	TQE_RULEBASED	0x00000001
 #define	TQE_DELETE	0x00000002
 
 
 /*
  * Timeout tail queue head for IPFilter
  */
 typedef struct  ipftq   {
 	ipfmutex_t	ifq_lock;
 	u_int		ifq_ttl;
 	ipftqent_t	*ifq_head;
 	ipftqent_t	**ifq_tail;
 	struct ipftq	*ifq_next;
 	struct ipftq	**ifq_pnext;
 	int		ifq_ref;
 	u_int		ifq_flags;
 } ipftq_t;
 
 #define	IFQF_USER	0x01		/* User defined aging */
 #define	IFQF_DELETE	0x02		/* Marked for deletion */
 #define	IFQF_PROXY	0x04		/* Timeout queue in use by a proxy */
 
 #define	IPFTQ_INIT(x,y,z)	do {			\
 					(x)->ifq_ttl = (y);	\
 					(x)->ifq_head = NULL;	\
 					(x)->ifq_ref = 1;	\
 					(x)->ifq_tail = &(x)->ifq_head; \
 					MUTEX_INIT(&(x)->ifq_lock, (z)); \
 				} while (0)
 
 #define	IPF_HZ_MULT	1
 #define	IPF_HZ_DIVIDE	2		/* How many times a second ipfilter */
 					/* checks its timeout queues.       */
 #define	IPF_TTLVAL(x)	(((x) / IPF_HZ_MULT) * IPF_HZ_DIVIDE)
 
 typedef	int	(*ipftq_delete_fn_t)(struct ipf_main_softc_s *, void *);
 
 
 /*
  * Object structure description.  For passing through in ioctls.
  */
 typedef	struct	ipfobj	{
 	u_32_t		ipfo_rev;	/* IPFilter version number */
 	u_32_t		ipfo_size;	/* size of object at ipfo_ptr */
 	void		*ipfo_ptr;	/* pointer to object */
 	int		ipfo_type;	/* type of object being pointed to */
 	int		ipfo_offset;	/* bytes from ipfo_ptr where to start */
 	int		ipfo_retval;	/* return value */
 	u_char		ipfo_xxxpad[28];	/* reserved for future use */
 } ipfobj_t;
 
 #define	IPFOBJ_FRENTRY		0	/* struct frentry */
 #define	IPFOBJ_IPFSTAT		1	/* struct friostat */
 #define	IPFOBJ_IPFINFO		2	/* struct fr_info */
 #define	IPFOBJ_AUTHSTAT		3	/* struct fr_authstat */
 #define	IPFOBJ_FRAGSTAT		4	/* struct ipfrstat */
 #define	IPFOBJ_IPNAT		5	/* struct ipnat */
 #define	IPFOBJ_NATSTAT		6	/* struct natstat */
 #define	IPFOBJ_STATESAVE	7	/* struct ipstate_save */
 #define	IPFOBJ_NATSAVE		8	/* struct nat_save */
 #define	IPFOBJ_NATLOOKUP	9	/* struct natlookup */
 #define	IPFOBJ_IPSTATE		10	/* struct ipstate */
 #define	IPFOBJ_STATESTAT	11	/* struct ips_stat */
 #define	IPFOBJ_FRAUTH		12	/* struct frauth */
 #define	IPFOBJ_TUNEABLE		13	/* struct ipftune */
 #define	IPFOBJ_NAT		14	/* struct nat */
 #define	IPFOBJ_IPFITER		15	/* struct ipfruleiter */
 #define	IPFOBJ_GENITER		16	/* struct ipfgeniter */
 #define	IPFOBJ_GTABLE		17	/* struct ipftable */
 #define	IPFOBJ_LOOKUPITER	18	/* struct ipflookupiter */
 #define	IPFOBJ_STATETQTAB	19	/* struct ipftq * NSTATES */
 #define	IPFOBJ_IPFEXPR		20
 #define	IPFOBJ_PROXYCTL		21	/* strct ap_ctl */
 #define	IPFOBJ_FRIPF		22	/* structfripf */
 #define	IPFOBJ_COUNT		23	/* How many #defines are above this? */
 
 
 typedef	union	ipftunevalptr	{
 	void		*ipftp_void;
 	u_long		*ipftp_long;
 	u_int		*ipftp_int;
 	u_short		*ipftp_short;
 	u_char		*ipftp_char;
 	u_long		ipftp_offset;
 } ipftunevalptr_t;
 
 typedef	union	ipftuneval	{
 	u_long		ipftu_long;
 	u_int		ipftu_int;
 	u_short		ipftu_short;
 	u_char		ipftu_char;
 } ipftuneval_t;
 
 struct ipftuneable;
 typedef	int (* ipftunefunc_t) __P((struct ipf_main_softc_s *, struct ipftuneable *, ipftuneval_t *));
 
 typedef	struct	ipftuneable	{
 	ipftunevalptr_t	ipft_una;
 	const char	*ipft_name;
 	u_long		ipft_min;
 	u_long		ipft_max;
 	int		ipft_sz;
 	int		ipft_flags;
 	struct ipftuneable *ipft_next;
 	ipftunefunc_t	ipft_func;
 } ipftuneable_t;
 
 #define	ipft_addr	ipft_una.ipftp_void
 #define	ipft_plong	ipft_una.ipftp_long
 #define	ipft_pint	ipft_una.ipftp_int
 #define	ipft_pshort	ipft_una.ipftp_short
 #define	ipft_pchar	ipft_una.ipftp_char
 
 #define	IPFT_RDONLY	1	/* read-only */
 #define	IPFT_WRDISABLED	2	/* write when disabled only */
 
 typedef	struct	ipftune	{
 	void    	*ipft_cookie;
 	ipftuneval_t	ipft_un;
 	u_long  	ipft_min;
 	u_long  	ipft_max;
 	int		ipft_sz;
 	int		ipft_flags;
 	char		ipft_name[80];
 } ipftune_t;
 
 #define	ipft_vlong	ipft_un.ipftu_long
 #define	ipft_vint	ipft_un.ipftu_int
 #define	ipft_vshort	ipft_un.ipftu_short
 #define	ipft_vchar	ipft_un.ipftu_char
 
 /*
  * Hash table header
  */
 #define	IPFHASH(x,y)	typedef struct { 			\
 				ipfrwlock_t	ipfh_lock;	\
 				struct	x	*ipfh_head;	\
 				} y
 
 /*
 ** HPUX Port
 */
 #ifdef __hpux
 /* HP-UX locking sequence deadlock detection module lock MAJOR ID */
 # define	IPF_SMAJ	0	/* temp assignment XXX, not critical */
 #endif
 
 #if !defined(CDEV_MAJOR) && defined (__FreeBSD_version) && \
     (__FreeBSD_version >= 220000)
 # define	CDEV_MAJOR	79
 #endif
 
 /*
  * Post NetBSD 1.2 has the PFIL interface for packet filters.  This turns
  * on those hooks.  We don't need any special mods in non-IP Filter code
  * with this!
  */
 #if (defined(NetBSD) && (NetBSD > 199609) && (NetBSD <= 1991011)) || \
     (defined(NetBSD1_2) && NetBSD1_2 > 1) || \
     (defined(__FreeBSD__) && (__FreeBSD_version >= 500043))
 # if (defined(NetBSD) && NetBSD >= 199905)
 #  define PFIL_HOOKS
 # endif
 # ifdef PFIL_HOOKS
 #  define NETBSD_PF
 # endif
 #endif
 
 #ifdef _KERNEL
 # define	FR_VERBOSE(verb_pr)
 # define	FR_DEBUG(verb_pr)
 #else
 extern	void	ipfkdebug __P((char *, ...));
 extern	void	ipfkverbose __P((char *, ...));
 # define	FR_VERBOSE(verb_pr)	ipfkverbose verb_pr
 # define	FR_DEBUG(verb_pr)	ipfkdebug verb_pr
 #endif
 
 /*
  *
  */
 typedef	struct	ipfruleiter {
 	int		iri_inout;
 	char		iri_group[FR_GROUPLEN];
 	int		iri_active;
 	int		iri_nrules;
 	int		iri_v;		/* No longer used (compatibility) */
 	frentry_t	*iri_rule;
 } ipfruleiter_t;
 
 /*
  * Values for iri_inout
  */
 #define	F_IN	0
 #define	F_OUT	1
 #define	F_ACIN	2
 #define	F_ACOUT	3
 
 
 typedef	struct	ipfgeniter {
 	int	igi_type;
 	int	igi_nitems;
 	void	*igi_data;
 } ipfgeniter_t;
 
 #define	IPFGENITER_IPF		0
 #define	IPFGENITER_NAT		1
 #define	IPFGENITER_IPNAT	2
 #define	IPFGENITER_FRAG		3
 #define	IPFGENITER_AUTH		4
 #define	IPFGENITER_STATE	5
 #define	IPFGENITER_NATFRAG	6
 #define	IPFGENITER_HOSTMAP	7
 #define	IPFGENITER_LOOKUP	8
 
 typedef	struct	ipftable {
 	int	ita_type;
 	void	*ita_table;
 } ipftable_t;
 
 #define	IPFTABLE_BUCKETS	1
 #define	IPFTABLE_BUCKETS_NATIN	2
 #define	IPFTABLE_BUCKETS_NATOUT	3
 
 
 typedef struct ipf_v4_masktab_s {
 	u_32_t	imt4_active[33];
 	int	imt4_masks[33];
 	int	imt4_max;
 } ipf_v4_masktab_t;
 
 typedef struct ipf_v6_masktab_s {
 	i6addr_t	imt6_active[129];
 	int		imt6_masks[129];
 	int		imt6_max;
 } ipf_v6_masktab_t;
 
 
 /*
  *
  */
 typedef struct ipftoken {
 	struct ipftoken	*ipt_next;
 	struct ipftoken	**ipt_pnext;
 	void		*ipt_ctx;
 	void		*ipt_data;
 	u_long		ipt_die;
 	int		ipt_type;
 	int		ipt_uid;
 	int		ipt_subtype;
 	int		ipt_ref;
 	int		ipt_complete;
 } ipftoken_t;
 
 
 /*
  *
  */
 typedef struct ipfexp {
 	int		ipfe_cmd;
 	int		ipfe_not;
 	int		ipfe_narg;
 	int		ipfe_size;
 	int		ipfe_arg0[1];
 } ipfexp_t;
 
 /*
  * Currently support commands (ipfe_cmd)
  * 32bits is split up follows:
  * aabbcccc
  * aa = 0 = packet matching, 1 = meta data matching
  * bb = IP protocol number
  * cccc = command
  */
 #define	IPF_EXP_IP_PR		0x00000001
 #define	IPF_EXP_IP_ADDR		0x00000002
 #define	IPF_EXP_IP_SRCADDR	0x00000003
 #define	IPF_EXP_IP_DSTADDR	0x00000004
 #define	IPF_EXP_IP6_ADDR	0x00000005
 #define	IPF_EXP_IP6_SRCADDR	0x00000006
 #define	IPF_EXP_IP6_DSTADDR	0x00000007
 #define	IPF_EXP_TCP_FLAGS	0x00060001
 #define	IPF_EXP_TCP_PORT	0x00060002
 #define	IPF_EXP_TCP_SPORT	0x00060003
 #define	IPF_EXP_TCP_DPORT	0x00060004
 #define	IPF_EXP_UDP_PORT	0x00110002
 #define	IPF_EXP_UDP_SPORT	0x00110003
 #define	IPF_EXP_UDP_DPORT	0x00110004
 #define	IPF_EXP_IDLE_GT		0x01000001
 #define	IPF_EXP_TCP_STATE	0x01060002
 #define	IPF_EXP_END		0xffffffff
 
 #define	ONE_DAY			IPF_TTLVAL(1 * 86400)   /* 1 day */
 #define	FIVE_DAYS		(5 * ONE_DAY)
 
 typedef struct ipf_main_softc_s {
 	struct ipf_main_softc_s *ipf_next;
 	ipfmutex_t	ipf_rw;
 	ipfmutex_t      ipf_timeoutlock;
 	ipfrwlock_t     ipf_mutex;
 	ipfrwlock_t	ipf_frag;
 	ipfrwlock_t	ipf_global;
 	ipfrwlock_t	ipf_tokens;
 	ipfrwlock_t	ipf_state;
 	ipfrwlock_t	ipf_nat;
 	ipfrwlock_t	ipf_natfrag;
 	ipfrwlock_t	ipf_poolrw;
 	int		ipf_dynamic_softc;
 	int		ipf_refcnt;
 	int		ipf_running;
 	int		ipf_flags;
 	int		ipf_active;
 	int		ipf_control_forwarding;
 	int		ipf_update_ipid;
 	int		ipf_chksrc;	/* causes a system crash if enabled */
 	int		ipf_pass;
 	int		ipf_minttl;
 	int		ipf_icmpminfragmtu;
 	int		ipf_interror;	/* Should be in a struct that is per  */
 					/* thread or process. Does not belong */
 					/* here but there's a lot more work   */
 					/* in doing that properly. For now,   */
 					/* it is squatting. */
 	u_int		ipf_tcpidletimeout;
 	u_int		ipf_tcpclosewait;
 	u_int		ipf_tcplastack;
 	u_int		ipf_tcptimewait;
 	u_int		ipf_tcptimeout;
 	u_int		ipf_tcpsynsent;
 	u_int		ipf_tcpsynrecv;
 	u_int		ipf_tcpclosed;
 	u_int		ipf_tcphalfclosed;
 	u_int		ipf_udptimeout;
 	u_int		ipf_udpacktimeout;
 	u_int		ipf_icmptimeout;
 	u_int		ipf_icmpacktimeout;
 	u_int		ipf_iptimeout;
 	u_long		ipf_ticks;
 	u_long		ipf_userifqs;
 	u_long		ipf_rb_no_mem;
 	u_long		ipf_rb_node_max;
 	u_long		ipf_frouteok[2];
 	ipftuneable_t	*ipf_tuners;
 	void		*ipf_frag_soft;
 	void		*ipf_nat_soft;
 	void		*ipf_state_soft;
 	void		*ipf_auth_soft;
 	void		*ipf_proxy_soft;
 	void		*ipf_sync_soft;
 	void		*ipf_lookup_soft;
 	void		*ipf_log_soft;
 	struct frgroup	*ipf_groups[IPL_LOGSIZE][2];
 	frentry_t	*ipf_rules[2][2];
 	frentry_t	*ipf_acct[2][2];
 	frentry_t	*ipf_rule_explist[2];
 	ipftoken_t	*ipf_token_head;
 	ipftoken_t	**ipf_token_tail;
 #if defined(__FreeBSD_version) && (__FreeBSD_version >= 300000) && \
     defined(_KERNEL)
 	struct callout ipf_slow_ch;
 #endif
 #if defined(linux) && defined(_KERNEL)
 	struct timer_list	ipf_timer;
 #endif
 #if NETBSD_GE_REV(104040000)
 	struct callout	ipf_slow_ch;
 #endif
 #if SOLARIS
 # if SOLARIS2 >= 7
 	timeout_id_t	ipf_slow_ch;
 # else
 	int		ipf_slow_ch;
 # endif
 #endif
 #if defined(_KERNEL)
 # if SOLARIS
 	struct pollhead	ipf_poll_head[IPL_LOGSIZE];
 	void		*ipf_dip;
 #  if defined(INSTANCES)
 	int		ipf_get_loopback;
 	u_long		ipf_idnum;
 	net_handle_t	ipf_nd_v4;
 	net_handle_t	ipf_nd_v6;
 	hook_t		*ipf_hk_v4_in;
 	hook_t		*ipf_hk_v4_out;
 	hook_t		*ipf_hk_v4_nic;
 	hook_t		*ipf_hk_v6_in;
 	hook_t		*ipf_hk_v6_out;
 	hook_t		*ipf_hk_v6_nic;
 	hook_t		*ipf_hk_loop_v4_in;
 	hook_t		*ipf_hk_loop_v4_out;
 	hook_t		*ipf_hk_loop_v6_in;
 	hook_t		*ipf_hk_loop_v6_out;
 #  endif
 # else
 #  if defined(linux) && defined(_KERNEL)
 	struct poll_table_struct	ipf_selwait[IPL_LOGSIZE];
 	wait_queue_head_t		iplh_linux[IPL_LOGSIZE];
 #  else
 	struct selinfo	ipf_selwait[IPL_LOGSIZE];
 #  endif
 # endif
 #endif
 	void		*ipf_slow;
 	ipf_statistics_t ipf_stats[2];
 	u_char		ipf_iss_secret[32];
 	u_short		ipf_ip_id;
 } ipf_main_softc_t;
 
 #define	IPFERROR(_e)	do { softc->ipf_interror = (_e); \
 			     DT1(user_error, int, _e); \
 			} while (0)
 
 #ifndef	_KERNEL
 extern	int	ipf_check __P((void *, struct ip *, int, void *, int, mb_t **));
 extern	int	(*ipf_checkp) __P((ip_t *, int, void *, int, mb_t **));
 extern	struct	ifnet *get_unit __P((char *, int));
 extern	char	*get_ifname __P((struct ifnet *));
 extern	int	ipfioctl __P((ipf_main_softc_t *, int, ioctlcmd_t,
 			      caddr_t, int));
 extern	void	m_freem __P((mb_t *));
 extern	size_t	msgdsize __P((mb_t *));
 extern	int	bcopywrap __P((void *, void *, size_t));
+extern	void	ip_fillid(struct ip *);
 #else /* #ifndef _KERNEL */
 # if defined(__NetBSD__) && defined(PFIL_HOOKS)
 extern	void	ipfilterattach __P((int));
 # endif
 extern	int	ipl_enable __P((void));
 extern	int	ipl_disable __P((void));
 # ifdef MENTAT
 extern	int	ipf_check __P((void *, struct ip *, int, void *, int, void *,
 			       mblk_t **));
 #  if SOLARIS
 extern	void	ipf_prependmbt(fr_info_t *, mblk_t *);
 #   if SOLARIS2 >= 7
 extern	int	ipfioctl __P((dev_t, int, intptr_t, int, cred_t *, int *));
 #   else
 extern	int	ipfioctl __P((dev_t, int, int *, int, cred_t *, int *));
 #   endif
 #  endif
 #  ifdef __hpux
 extern	int	ipfioctl __P((dev_t, int, caddr_t, int));
 extern	int	ipf_select __P((dev_t, int));
 #  endif
 extern	int	ipf_qout __P((queue_t *, mblk_t *));
 # else /* MENTAT */
 extern	int	ipf_check __P((void *, struct ip *, int, void *, int, mb_t **));
 extern	int	(*fr_checkp) __P((ip_t *, int, void *, int, mb_t **));
 extern	size_t	mbufchainlen __P((mb_t *));
 #  ifdef	__sgi
 #   include <sys/cred.h>
 extern	int	ipfioctl __P((dev_t, int, caddr_t, int, cred_t *, int *));
 extern	int	ipfilter_sgi_attach __P((void));
 extern	void	ipfilter_sgi_detach __P((void));
 extern	void	ipfilter_sgi_intfsync __P((void));
 #  else
 #   ifdef	IPFILTER_LKM
 extern	int	ipf_identify __P((char *));
 #   endif
 #   if BSDOS_GE_REV(199510) || FREEBSD_GE_REV(220000) || \
       (defined(NetBSD) && (NetBSD >= 199511)) || defined(__OpenBSD__)
 #    if defined(__NetBSD__) || BSDOS_GE_REV(199701) || \
        defined(__OpenBSD__) || FREEBSD_GE_REV(300000)
 #     if (__FreeBSD_version >= 500024)
 #      if (__FreeBSD_version >= 502116)
 extern	int	ipfioctl __P((struct cdev*, u_long, caddr_t, int, struct thread *));
 #      else
 extern	int	ipfioctl __P((dev_t, u_long, caddr_t, int, struct thread *));
 #      endif /* __FreeBSD_version >= 502116 */
 #     else
 #      if  NETBSD_GE_REV(499001000)
 extern	int	ipfioctl __P((dev_t, u_long, void *, int, struct lwp *));
 #       else
 #       if  NETBSD_GE_REV(399001400)
 extern	int	ipfioctl __P((dev_t, u_long, caddr_t, int, struct lwp *));
 #       else
 extern	int	ipfioctl __P((dev_t, u_long, caddr_t, int, struct proc *));
 #       endif
 #      endif
 #     endif /* __FreeBSD_version >= 500024 */
 #    else
 extern	int	ipfioctl __P((dev_t, int, caddr_t, int, struct proc *));
 #    endif
 #   else
 #    ifdef linux
 extern	int	ipfioctl __P((struct inode *, struct file *, u_int, u_long));
 #    else
 extern	int	ipfioctl __P((dev_t, int, caddr_t, int));
 #    endif
 #   endif /* (_BSDI_VERSION >= 199510) */
 #  endif /* __ sgi */
 # endif /* MENTAT */
 
 # if defined(__FreeBSD_version)
 extern	int	ipf_pfil_hook __P((void));
 extern	int	ipf_pfil_unhook __P((void));
 extern	void	ipf_event_reg __P((void));
 extern	void	ipf_event_dereg __P((void));
 # endif
 
 # if defined(INSTANCES)
 extern	ipf_main_softc_t	*ipf_find_softc __P((u_long));
 extern	int	ipf_set_loopback __P((ipf_main_softc_t *, ipftuneable_t *,
 				      ipftuneval_t *));
 # endif
 
 #endif /* #ifndef _KERNEL */
 
 extern	char	*memstr __P((const char *, char *, size_t, size_t));
 extern	int	count4bits __P((u_32_t));
 #ifdef USE_INET6
 extern	int	count6bits __P((u_32_t *));
 #endif
 extern	int	frrequest __P((ipf_main_softc_t *, int, ioctlcmd_t, caddr_t,
 			       int, int));
 extern	char	*getifname __P((struct ifnet *));
 extern	int	ipfattach __P((ipf_main_softc_t *));
 extern	int	ipfdetach __P((ipf_main_softc_t *));
 extern	u_short	ipf_cksum __P((u_short *, int));
 extern	int	copyinptr __P((ipf_main_softc_t *, void *, void *, size_t));
 extern	int	copyoutptr __P((ipf_main_softc_t *, void *, void *, size_t));
 extern	int	ipf_fastroute __P((mb_t *, mb_t **, fr_info_t *, frdest_t *));
 extern	int	ipf_inject __P((fr_info_t *, mb_t *));
 extern	int	ipf_inobj __P((ipf_main_softc_t *, void *, ipfobj_t *,
 			       void *, int));
 extern	int	ipf_inobjsz __P((ipf_main_softc_t *, void *, void *,
 				 int , int));
 extern	int	ipf_ioctlswitch __P((ipf_main_softc_t *, int, void *,
 				     ioctlcmd_t, int, int, void *));
 extern	int	ipf_ipf_ioctl __P((ipf_main_softc_t *, caddr_t, ioctlcmd_t,
 				   int, int, void *));
 extern	int	ipf_ipftune __P((ipf_main_softc_t *, ioctlcmd_t, void *));
 extern	int	ipf_matcharray_load __P((ipf_main_softc_t *, caddr_t,
 					 ipfobj_t *, int **));
 extern	int	ipf_matcharray_verify __P((int *, int));
 extern	int	ipf_outobj __P((ipf_main_softc_t *, void *, void *, int));
 extern	int	ipf_outobjk __P((ipf_main_softc_t *, ipfobj_t *, void *));
 extern	int	ipf_outobjsz __P((ipf_main_softc_t *, void *, void *,
 				  int, int));
 extern	void	*ipf_pullup __P((mb_t *, fr_info_t *, int));
 extern	int	ipf_resolvedest __P((ipf_main_softc_t *, char *,
 				     struct frdest *, int));
 extern	int	ipf_resolvefunc __P((ipf_main_softc_t *, void *));
 extern	void	*ipf_resolvenic __P((ipf_main_softc_t *, char *, int));
 extern	int	ipf_send_icmp_err __P((int, fr_info_t *, int));
 extern	int	ipf_send_reset __P((fr_info_t *));
 #if  (defined(__FreeBSD_version) && (__FreeBSD_version < 501000)) || \
      !defined(_KERNEL) || defined(linux)
 #endif
 extern	void	ipf_apply_timeout __P((ipftq_t *, u_int));
 extern	ipftq_t	*ipf_addtimeoutqueue __P((ipf_main_softc_t *, ipftq_t **,
 					  u_int));
 extern	void	ipf_deletequeueentry __P((ipftqent_t *));
 extern	int	ipf_deletetimeoutqueue __P((ipftq_t *));
 extern	void	ipf_freetimeoutqueue __P((ipf_main_softc_t *, ipftq_t *));
 extern	void	ipf_movequeue __P((u_long, ipftqent_t *, ipftq_t *,
 				   ipftq_t *));
 extern	void	ipf_queueappend __P((u_long, ipftqent_t *, ipftq_t *, void *));
 extern	void	ipf_queueback __P((u_long, ipftqent_t *));
 extern	int	ipf_queueflush __P((ipf_main_softc_t *, ipftq_delete_fn_t,
 				    ipftq_t *, ipftq_t *, u_int *, int, int));
 extern	void	ipf_queuefront __P((ipftqent_t *));
 extern	int	ipf_settimeout_tcp __P((ipftuneable_t *, ipftuneval_t *,
 					ipftq_t *));
 extern	int	ipf_checkv4sum __P((fr_info_t *));
 extern	int	ipf_checkl4sum __P((fr_info_t *));
 extern	int	ipf_ifpfillv4addr __P((int, struct sockaddr_in *,
 				      struct sockaddr_in *, struct in_addr *,
 				      struct in_addr *));
 extern	int	ipf_coalesce __P((fr_info_t *));
 #ifdef	USE_INET6
 extern	int	ipf_checkv6sum __P((fr_info_t *));
 extern	int	ipf_ifpfillv6addr __P((int, struct sockaddr_in6 *,
 				      struct sockaddr_in6 *, i6addr_t *,
 				      i6addr_t *));
 #endif
 
 extern	int	ipf_tune_add __P((ipf_main_softc_t *, ipftuneable_t *));
 extern	int	ipf_tune_add_array __P((ipf_main_softc_t *, ipftuneable_t *));
 extern	int	ipf_tune_del __P((ipf_main_softc_t *, ipftuneable_t *));
 extern	int	ipf_tune_del_array __P((ipf_main_softc_t *, ipftuneable_t *));
 extern	int	ipf_tune_array_link __P((ipf_main_softc_t *, ipftuneable_t *));
 extern	int	ipf_tune_array_unlink __P((ipf_main_softc_t *,
 					   ipftuneable_t *));
 extern	ipftuneable_t *ipf_tune_array_copy __P((void *, size_t,
 						ipftuneable_t *));
 
 extern int	ipf_pr_pullup __P((fr_info_t *, int));
 
 extern	int	ipf_flush __P((ipf_main_softc_t *, minor_t, int));
 extern	frgroup_t *ipf_group_add __P((ipf_main_softc_t *, char *, void *,
 				      u_32_t, minor_t, int));
 extern	void	ipf_group_del __P((ipf_main_softc_t *, frgroup_t *,
 				   frentry_t *));
 extern	int	ipf_derefrule __P((ipf_main_softc_t *, frentry_t **));
 extern	frgroup_t *ipf_findgroup __P((ipf_main_softc_t *, char *, minor_t,
 				      int, frgroup_t ***));
 
 extern	int	ipf_log_init __P((void));
 extern	int	ipf_log_bytesused __P((ipf_main_softc_t *, int));
 extern	int	ipf_log_canread __P((ipf_main_softc_t *, int));
 extern	int	ipf_log_clear __P((ipf_main_softc_t *, minor_t));
 extern	u_long  ipf_log_failures __P((ipf_main_softc_t *, int));
 extern	int	ipf_log_read __P((ipf_main_softc_t *, minor_t, uio_t *));
 extern	int	ipf_log_items __P((ipf_main_softc_t *, int, fr_info_t *,
 				   void **, size_t *, int *, int));
 extern	u_long  ipf_log_logok __P((ipf_main_softc_t *, int));
 extern	void	ipf_log_unload __P((ipf_main_softc_t *));
 extern	int 	ipf_log_pkt __P((fr_info_t *, u_int));
 
 extern	frentry_t	*ipf_acctpkt __P((fr_info_t *, u_32_t *));
 extern	u_short		fr_cksum __P((fr_info_t *, ip_t *, int, void *));
 extern	void		ipf_deinitialise __P((ipf_main_softc_t *));
 extern	int		ipf_deliverlocal __P((ipf_main_softc_t *, int, void *,
 					      i6addr_t *));
 extern	frentry_t 	*ipf_dstgrpmap __P((fr_info_t *, u_32_t *));
 extern	void		ipf_fixskip __P((frentry_t **, frentry_t *, int));
 extern	void		ipf_forgetifp __P((ipf_main_softc_t *, void *));
 extern	frentry_t 	*ipf_getrulen __P((ipf_main_softc_t *, int, char *,
 					   u_32_t));
 extern	int		ipf_ifpaddr __P((ipf_main_softc_t *, int, int, void *,
 					i6addr_t *, i6addr_t *));
 extern	void		ipf_inet_mask_add __P((int, ipf_v4_masktab_t *));
 extern	void		ipf_inet_mask_del __P((int, ipf_v4_masktab_t *));
 #ifdef	USE_INET6
 extern	void		ipf_inet6_mask_add __P((int, i6addr_t *,
 						ipf_v6_masktab_t *));
 extern	void		ipf_inet6_mask_del __P((int, i6addr_t *,
 						ipf_v6_masktab_t *));
 #endif
 extern	int		ipf_initialise __P((void));
 extern	int		ipf_lock __P((caddr_t, int *));
 extern  int		ipf_makefrip __P((int, ip_t *, fr_info_t *));
 extern	int		ipf_matchtag __P((ipftag_t *, ipftag_t *));
 extern	int		ipf_matchicmpqueryreply __P((int, icmpinfo_t *,
 						     struct icmp *, int));
 extern	u_32_t		ipf_newisn __P((fr_info_t *));
-extern	u_short		ipf_nextipid __P((fr_info_t *));
 extern	u_int		ipf_pcksum __P((fr_info_t *, int, u_int));
 extern	void		ipf_rule_expire __P((ipf_main_softc_t *));
 extern	int		ipf_scanlist __P((fr_info_t *, u_32_t));
 extern	frentry_t 	*ipf_srcgrpmap __P((fr_info_t *, u_32_t *));
 extern	int		ipf_tcpudpchk __P((fr_ip_t *, frtuc_t *));
 extern	int		ipf_verifysrc __P((fr_info_t *fin));
 extern	int		ipf_zerostats __P((ipf_main_softc_t *, char *));
 extern	int		ipf_getnextrule __P((ipf_main_softc_t *, ipftoken_t *,
 					     void *));
 extern	int		ipf_sync __P((ipf_main_softc_t *, void *));
 extern	int		ipf_token_deref __P((ipf_main_softc_t *, ipftoken_t *));
 extern	void		ipf_token_expire __P((ipf_main_softc_t *));
 extern	ipftoken_t	*ipf_token_find __P((ipf_main_softc_t *, int, int,
 					    void *));
 extern	int		ipf_token_del __P((ipf_main_softc_t *, int, int,
 					  void *));
 extern	void		ipf_token_mark_complete __P((ipftoken_t *));
 extern	int		ipf_genericiter __P((ipf_main_softc_t *, void *,
 					     int, void *));
 #ifdef	IPFILTER_LOOKUP
 extern	void		*ipf_resolvelookup __P((int, u_int, u_int,
 						lookupfunc_t *));
 #endif
 extern	u_32_t		ipf_random __P((void));
 
 extern	int		ipf_main_load __P((void));
 extern	void		*ipf_main_soft_create __P((void *));
 extern	void		ipf_main_soft_destroy __P((ipf_main_softc_t *));
 extern	int		ipf_main_soft_init __P((ipf_main_softc_t *));
 extern	int		ipf_main_soft_fini __P((ipf_main_softc_t *));
 extern	int		ipf_main_unload __P((void));
 extern	int		ipf_load_all __P((void));
 extern	int		ipf_unload_all __P((void));
 extern	void		ipf_destroy_all __P((ipf_main_softc_t *));
 extern	ipf_main_softc_t *ipf_create_all __P((void *));
 extern	int		ipf_init_all __P((ipf_main_softc_t *));
 extern	int		ipf_fini_all __P((ipf_main_softc_t *));
 extern	void		ipf_log_soft_destroy __P((ipf_main_softc_t *, void *));
 extern	void		*ipf_log_soft_create __P((ipf_main_softc_t *));
 extern	int		ipf_log_soft_init __P((ipf_main_softc_t *, void *));
 extern	int		ipf_log_soft_fini __P((ipf_main_softc_t *, void *));
 extern	int		ipf_log_main_load __P((void));
 extern	int		ipf_log_main_unload __P((void));
 
 
 extern	char	ipfilter_version[];
 #ifdef	USE_INET6
 extern	int	icmptoicmp6types[ICMP_MAXTYPE+1];
 extern	int	icmptoicmp6unreach[ICMP_MAX_UNREACH];
 extern	int	icmpreplytype6[ICMP6_MAXTYPE + 1];
 #endif
 #ifdef	IPFILTER_COMPAT
 extern	int	ipf_in_compat __P((ipf_main_softc_t *, ipfobj_t *, void *,int));
 extern	int	ipf_out_compat __P((ipf_main_softc_t *, ipfobj_t *, void *));
 #endif
 extern	int	icmpreplytype4[ICMP_MAXTYPE + 1];
 
 extern	int	ipf_ht_node_add __P((ipf_main_softc_t *, host_track_t *,
 				     int, i6addr_t *));
 extern	int	ipf_ht_node_del __P((host_track_t *, int, i6addr_t *));
 extern	void	ipf_rb_ht_flush __P((host_track_t *));
 extern	void	ipf_rb_ht_freenode __P((host_node_t *, void *));
 extern	void	ipf_rb_ht_init __P((host_track_t *));
 
 #endif	/* __IP_FIL_H__ */
Index: head/sys/contrib/ipfilter/netinet/ip_fil_freebsd.c
===================================================================
--- head/sys/contrib/ipfilter/netinet/ip_fil_freebsd.c	(revision 280970)
+++ head/sys/contrib/ipfilter/netinet/ip_fil_freebsd.c	(revision 280971)
@@ -1,1476 +1,1448 @@
 /*	$FreeBSD$	*/
 
 /*
  * Copyright (C) 2012 by Darren Reed.
  *
  * See the IPFILTER.LICENCE file for details on licencing.
  */
 #if !defined(lint)
 static const char sccsid[] = "@(#)ip_fil.c	2.41 6/5/96 (C) 1993-2000 Darren Reed";
 static const char rcsid[] = "@(#)$Id$";
 #endif
 
 #if defined(KERNEL) || defined(_KERNEL)
 # undef KERNEL
 # undef _KERNEL
 # define	KERNEL	1
 # define	_KERNEL	1
 #endif
 #if defined(__FreeBSD_version) && (__FreeBSD_version >= 400000) && \
     !defined(KLD_MODULE) && !defined(IPFILTER_LKM)
 # include "opt_inet6.h"
 #endif
 #if defined(__FreeBSD_version) && (__FreeBSD_version >= 440000) && \
     !defined(KLD_MODULE) && !defined(IPFILTER_LKM)
 # include "opt_random_ip_id.h"
 #endif
 #include <sys/param.h>
 #include <sys/errno.h>
 #include <sys/types.h>
 #include <sys/file.h>
 # include <sys/fcntl.h>
 # include <sys/filio.h>
 #include <sys/time.h>
 #include <sys/systm.h>
 # include <sys/dirent.h>
 #if defined(__FreeBSD_version) && (__FreeBSD_version >= 800000)
 #include <sys/jail.h>
 #endif
 # include <sys/mbuf.h>
 # include <sys/sockopt.h>
 #if !defined(__hpux)
 # include <sys/mbuf.h>
 #endif
 #include <sys/socket.h>
 # include <sys/selinfo.h>
 # include <netinet/tcp_var.h>
 
 #include <net/if.h>
 # include <net/if_var.h>
 #  include <net/netisr.h>
 #include <net/route.h>
 #include <netinet/in.h>
 #include <netinet/in_var.h>
 #include <netinet/in_systm.h>
 #include <netinet/ip.h>
 #include <netinet/ip_var.h>
 #include <netinet/tcp.h>
 #if defined(__FreeBSD_version) && (__FreeBSD_version >= 800000)
 #include <net/vnet.h>
 #else
 #define CURVNET_SET(arg)
 #define CURVNET_RESTORE()
 #endif
 #if defined(__osf__)
 # include <netinet/tcp_timer.h>
 #endif
 #include <netinet/udp.h>
 #include <netinet/tcpip.h>
 #include <netinet/ip_icmp.h>
 #include "netinet/ip_compat.h"
 #ifdef USE_INET6
 # include <netinet/icmp6.h>
 #endif
 #include "netinet/ip_fil.h"
 #include "netinet/ip_nat.h"
 #include "netinet/ip_frag.h"
 #include "netinet/ip_state.h"
 #include "netinet/ip_proxy.h"
 #include "netinet/ip_auth.h"
 #include "netinet/ip_sync.h"
 #include "netinet/ip_lookup.h"
 #include "netinet/ip_dstlist.h"
 #ifdef	IPFILTER_SCAN
 #include "netinet/ip_scan.h"
 #endif
 #include "netinet/ip_pool.h"
 # include <sys/malloc.h>
 #include <sys/kernel.h>
 #ifdef CSUM_DATA_VALID
 #include <machine/in_cksum.h>
 #endif
 extern	int	ip_optcopy __P((struct ip *, struct ip *));
 
 
 # ifdef IPFILTER_M_IPFILTER
 MALLOC_DEFINE(M_IPFILTER, "ipfilter", "IP Filter packet filter data structures");
 # endif
 
 
-static	u_short	ipid = 0;
 static	int	(*ipf_savep) __P((void *, ip_t *, int, void *, int, struct mbuf **));
 static	int	ipf_send_ip __P((fr_info_t *, mb_t *));
 static void	ipf_timer_func __P((void *arg));
 int		ipf_locks_done = 0;
 
 ipf_main_softc_t ipfmain;
 
 # include <sys/conf.h>
 # if defined(NETBSD_PF)
 #  include <net/pfil.h>
 # endif /* NETBSD_PF */
 /*
  * We provide the ipf_checkp name just to minimize changes later.
  */
 int (*ipf_checkp) __P((void *, ip_t *ip, int hlen, void *ifp, int out, mb_t **mp));
 
 
 static eventhandler_tag ipf_arrivetag, ipf_departtag, ipf_clonetag;
 
 static void ipf_ifevent(void *arg);
 
 static void ipf_ifevent(arg)
 	void *arg;
 {
         ipf_sync(arg, NULL);
 }
 
 
 
 static int
 ipf_check_wrapper(void *arg, struct mbuf **mp, struct ifnet *ifp, int dir)
 {
 	struct ip *ip = mtod(*mp, struct ip *);
 	int rv;
 
 	/*
 	 * IPFilter expects evreything in network byte order
 	 */
 #if (__FreeBSD_version < 1000019)
 	ip->ip_len = htons(ip->ip_len);
 	ip->ip_off = htons(ip->ip_off);
 #endif
 	rv = ipf_check(&ipfmain, ip, ip->ip_hl << 2, ifp, (dir == PFIL_OUT),
 		       mp);
 #if (__FreeBSD_version < 1000019)
 	if ((rv == 0) && (*mp != NULL)) {
 		ip = mtod(*mp, struct ip *);
 		ip->ip_len = ntohs(ip->ip_len);
 		ip->ip_off = ntohs(ip->ip_off);
 	}
 #endif
 	return rv;
 }
 
 # ifdef USE_INET6
 #  include <netinet/ip6.h>
 
 static int
 ipf_check_wrapper6(void *arg, struct mbuf **mp, struct ifnet *ifp, int dir)
 {
 	return (ipf_check(&ipfmain, mtod(*mp, struct ip *),
 			  sizeof(struct ip6_hdr), ifp, (dir == PFIL_OUT), mp));
 }
 # endif
 #if	defined(IPFILTER_LKM)
 int ipf_identify(s)
 	char *s;
 {
 	if (strcmp(s, "ipl") == 0)
 		return 1;
 	return 0;
 }
 #endif /* IPFILTER_LKM */
 
 
 static void
 ipf_timer_func(arg)
 	void *arg;
 {
 	ipf_main_softc_t *softc = arg;
 	SPL_INT(s);
 
 	SPL_NET(s);
 	READ_ENTER(&softc->ipf_global);
 
         if (softc->ipf_running > 0)
 		ipf_slowtimer(softc);
 
 	if (softc->ipf_running == -1 || softc->ipf_running == 1) {
 #if 0
 		softc->ipf_slow_ch = timeout(ipf_timer_func, softc, hz/2);
 #endif
 		callout_init(&softc->ipf_slow_ch, CALLOUT_MPSAFE);
 		callout_reset(&softc->ipf_slow_ch,
 			(hz / IPF_HZ_DIVIDE) * IPF_HZ_MULT,
 			ipf_timer_func, softc);
 	}
 	RWLOCK_EXIT(&softc->ipf_global);
 	SPL_X(s);
 }
 
 
 int
 ipfattach(softc)
 	ipf_main_softc_t *softc;
 {
 #ifdef USE_SPL
 	int s;
 #endif
 
 	SPL_NET(s);
 	if (softc->ipf_running > 0) {
 		SPL_X(s);
 		return EBUSY;
 	}
 
 	if (ipf_init_all(softc) < 0) {
 		SPL_X(s);
 		return EIO;
 	}
 
 
 	if (ipf_checkp != ipf_check) {
 		ipf_savep = ipf_checkp;
 		ipf_checkp = ipf_check;
 	}
 
 	bzero((char *)ipfmain.ipf_selwait, sizeof(ipfmain.ipf_selwait));
 	softc->ipf_running = 1;
 
 	if (softc->ipf_control_forwarding & 1)
 		V_ipforwarding = 1;
 
-	ipid = 0;
-
 	SPL_X(s);
 #if 0
 	softc->ipf_slow_ch = timeout(ipf_timer_func, softc,
 				     (hz / IPF_HZ_DIVIDE) * IPF_HZ_MULT);
 #endif
 	callout_init(&softc->ipf_slow_ch, CALLOUT_MPSAFE);
 	callout_reset(&softc->ipf_slow_ch, (hz / IPF_HZ_DIVIDE) * IPF_HZ_MULT,
 		ipf_timer_func, softc);
 	return 0;
 }
 
 
 /*
  * Disable the filter by removing the hooks from the IP input/output
  * stream.
  */
 int
 ipfdetach(softc)
 	ipf_main_softc_t *softc;
 {
 #ifdef USE_SPL
 	int s;
 #endif
 
 	if (softc->ipf_control_forwarding & 2)
 		V_ipforwarding = 0;
 
 	SPL_NET(s);
 
 #if 0
 	if (softc->ipf_slow_ch.callout != NULL)
 		untimeout(ipf_timer_func, softc, softc->ipf_slow_ch);
 	bzero(&softc->ipf_slow, sizeof(softc->ipf_slow));
 #endif
 	callout_drain(&softc->ipf_slow_ch);
 
 #ifndef NETBSD_PF
 	if (ipf_checkp != NULL)
 		ipf_checkp = ipf_savep;
 	ipf_savep = NULL;
 #endif
 
 	ipf_fini_all(softc);
 
 	softc->ipf_running = -2;
 
 	SPL_X(s);
 
 	return 0;
 }
 
 
 /*
  * Filter ioctl interface.
  */
 int
 ipfioctl(dev, cmd, data, mode
 , p)
 	struct thread *p;
 #    define	p_cred	td_ucred
 #    define	p_uid	td_ucred->cr_ruid
 	struct cdev *dev;
 	ioctlcmd_t cmd;
 	caddr_t data;
 	int mode;
 {
 	int error = 0, unit = 0;
 	SPL_INT(s);
 
 #if (BSD >= 199306)
         if (securelevel_ge(p->p_cred, 3) && (mode & FWRITE))
 	{
 		ipfmain.ipf_interror = 130001;
 		return EPERM;
 	}
 #endif
 
 	unit = GET_MINOR(dev);
 	if ((IPL_LOGMAX < unit) || (unit < 0)) {
 		ipfmain.ipf_interror = 130002;
 		return ENXIO;
 	}
 
 	if (ipfmain.ipf_running <= 0) {
 		if (unit != IPL_LOGIPF && cmd != SIOCIPFINTERROR) {
 			ipfmain.ipf_interror = 130003;
 			return EIO;
 		}
 		if (cmd != SIOCIPFGETNEXT && cmd != SIOCIPFGET &&
 		    cmd != SIOCIPFSET && cmd != SIOCFRENB &&
 		    cmd != SIOCGETFS && cmd != SIOCGETFF &&
 		    cmd != SIOCIPFINTERROR) {
 			ipfmain.ipf_interror = 130004;
 			return EIO;
 		}
 	}
 
 	SPL_NET(s);
 
 	CURVNET_SET(TD_TO_VNET(p));
 	error = ipf_ioctlswitch(&ipfmain, unit, data, cmd, mode, p->p_uid, p);
 	CURVNET_RESTORE();
 	if (error != -1) {
 		SPL_X(s);
 		return error;
 	}
 
 	SPL_X(s);
 
 	return error;
 }
 
 
 /*
  * ipf_send_reset - this could conceivably be a call to tcp_respond(), but that
  * requires a large amount of setting up and isn't any more efficient.
  */
 int
 ipf_send_reset(fin)
 	fr_info_t *fin;
 {
 	struct tcphdr *tcp, *tcp2;
 	int tlen = 0, hlen;
 	struct mbuf *m;
 #ifdef USE_INET6
 	ip6_t *ip6;
 #endif
 	ip_t *ip;
 
 	tcp = fin->fin_dp;
 	if (tcp->th_flags & TH_RST)
 		return -1;		/* feedback loop */
 
 	if (ipf_checkl4sum(fin) == -1)
 		return -1;
 
 	tlen = fin->fin_dlen - (TCP_OFF(tcp) << 2) +
 			((tcp->th_flags & TH_SYN) ? 1 : 0) +
 			((tcp->th_flags & TH_FIN) ? 1 : 0);
 
 #ifdef USE_INET6
 	hlen = (fin->fin_v == 6) ? sizeof(ip6_t) : sizeof(ip_t);
 #else
 	hlen = sizeof(ip_t);
 #endif
 #ifdef MGETHDR
 	MGETHDR(m, M_NOWAIT, MT_HEADER);
 #else
 	MGET(m, M_NOWAIT, MT_HEADER);
 #endif
 	if (m == NULL)
 		return -1;
 	if (sizeof(*tcp2) + hlen > MLEN) {
 		if (!(MCLGET(m, M_NOWAIT))) {
 			FREE_MB_T(m);
 			return -1;
 		}
 	}
 
 	m->m_len = sizeof(*tcp2) + hlen;
 #if (BSD >= 199103)
 	m->m_data += max_linkhdr;
 	m->m_pkthdr.len = m->m_len;
 	m->m_pkthdr.rcvif = (struct ifnet *)0;
 #endif
 	ip = mtod(m, struct ip *);
 	bzero((char *)ip, hlen);
 #ifdef USE_INET6
 	ip6 = (ip6_t *)ip;
 #endif
 	tcp2 = (struct tcphdr *)((char *)ip + hlen);
 	tcp2->th_sport = tcp->th_dport;
 	tcp2->th_dport = tcp->th_sport;
 
 	if (tcp->th_flags & TH_ACK) {
 		tcp2->th_seq = tcp->th_ack;
 		tcp2->th_flags = TH_RST;
 		tcp2->th_ack = 0;
 	} else {
 		tcp2->th_seq = 0;
 		tcp2->th_ack = ntohl(tcp->th_seq);
 		tcp2->th_ack += tlen;
 		tcp2->th_ack = htonl(tcp2->th_ack);
 		tcp2->th_flags = TH_RST|TH_ACK;
 	}
 	TCP_X2_A(tcp2, 0);
 	TCP_OFF_A(tcp2, sizeof(*tcp2) >> 2);
 	tcp2->th_win = tcp->th_win;
 	tcp2->th_sum = 0;
 	tcp2->th_urp = 0;
 
 #ifdef USE_INET6
 	if (fin->fin_v == 6) {
 		ip6->ip6_flow = ((ip6_t *)fin->fin_ip)->ip6_flow;
 		ip6->ip6_plen = htons(sizeof(struct tcphdr));
 		ip6->ip6_nxt = IPPROTO_TCP;
 		ip6->ip6_hlim = 0;
 		ip6->ip6_src = fin->fin_dst6.in6;
 		ip6->ip6_dst = fin->fin_src6.in6;
 		tcp2->th_sum = in6_cksum(m, IPPROTO_TCP,
 					 sizeof(*ip6), sizeof(*tcp2));
 		return ipf_send_ip(fin, m);
 	}
 #endif
 	ip->ip_p = IPPROTO_TCP;
 	ip->ip_len = htons(sizeof(struct tcphdr));
 	ip->ip_src.s_addr = fin->fin_daddr;
 	ip->ip_dst.s_addr = fin->fin_saddr;
 	tcp2->th_sum = in_cksum(m, hlen + sizeof(*tcp2));
 	ip->ip_len = htons(hlen + sizeof(*tcp2));
 	return ipf_send_ip(fin, m);
 }
 
 
 /*
  * ip_len must be in network byte order when called.
  */
 static int
 ipf_send_ip(fin, m)
 	fr_info_t *fin;
 	mb_t *m;
 {
 	fr_info_t fnew;
 	ip_t *ip, *oip;
 	int hlen;
 
 	ip = mtod(m, ip_t *);
 	bzero((char *)&fnew, sizeof(fnew));
 	fnew.fin_main_soft = fin->fin_main_soft;
 
 	IP_V_A(ip, fin->fin_v);
 	switch (fin->fin_v)
 	{
 	case 4 :
 		oip = fin->fin_ip;
 		hlen = sizeof(*oip);
 		fnew.fin_v = 4;
 		fnew.fin_p = ip->ip_p;
 		fnew.fin_plen = ntohs(ip->ip_len);
 		IP_HL_A(ip, sizeof(*oip) >> 2);
 		ip->ip_tos = oip->ip_tos;
 		ip->ip_id = fin->fin_ip->ip_id;
 #if defined(FreeBSD) && (__FreeBSD_version > 460000)
 		ip->ip_off = htons(path_mtu_discovery ? IP_DF : 0);
 #else
 		ip->ip_off = 0;
 #endif
 		ip->ip_ttl = V_ip_defttl;
 		ip->ip_sum = 0;
 		break;
 #ifdef USE_INET6
 	case 6 :
 	{
 		ip6_t *ip6 = (ip6_t *)ip;
 
 		ip6->ip6_vfc = 0x60;
 		ip6->ip6_hlim = IPDEFTTL;
 
 		hlen = sizeof(*ip6);
 		fnew.fin_p = ip6->ip6_nxt;
 		fnew.fin_v = 6;
 		fnew.fin_plen = ntohs(ip6->ip6_plen) + hlen;
 		break;
 	}
 #endif
 	default :
 		return EINVAL;
 	}
 #ifdef IPSEC
 	m->m_pkthdr.rcvif = NULL;
 #endif
 
 	fnew.fin_ifp = fin->fin_ifp;
 	fnew.fin_flx = FI_NOCKSUM;
 	fnew.fin_m = m;
 	fnew.fin_ip = ip;
 	fnew.fin_mp = &m;
 	fnew.fin_hlen = hlen;
 	fnew.fin_dp = (char *)ip + hlen;
 	(void) ipf_makefrip(hlen, ip, &fnew);
 
 	return ipf_fastroute(m, &m, &fnew, NULL);
 }
 
 
 int
 ipf_send_icmp_err(type, fin, dst)
 	int type;
 	fr_info_t *fin;
 	int dst;
 {
 	int err, hlen, xtra, iclen, ohlen, avail, code;
 	struct in_addr dst4;
 	struct icmp *icmp;
 	struct mbuf *m;
 	i6addr_t dst6;
 	void *ifp;
 #ifdef USE_INET6
 	ip6_t *ip6;
 #endif
 	ip_t *ip, *ip2;
 
 	if ((type < 0) || (type >= ICMP_MAXTYPE))
 		return -1;
 
 	code = fin->fin_icode;
 #ifdef USE_INET6
 #if 0
 	/* XXX Fix an off by one error: s/>/>=/
 	 was:
 	 if ((code < 0) || (code > sizeof(icmptoicmp6unreach)/sizeof(int)))
 	 Fix obtained from NetBSD ip_fil_netbsd.c r1.4: */
 #endif
 	if ((code < 0) || (code >= sizeof(icmptoicmp6unreach)/sizeof(int)))
 		return -1;
 #endif
 
 	if (ipf_checkl4sum(fin) == -1)
 		return -1;
 #ifdef MGETHDR
 	MGETHDR(m, M_NOWAIT, MT_HEADER);
 #else
 	MGET(m, M_NOWAIT, MT_HEADER);
 #endif
 	if (m == NULL)
 		return -1;
 	avail = MHLEN;
 
 	xtra = 0;
 	hlen = 0;
 	ohlen = 0;
 	dst4.s_addr = 0;
 	ifp = fin->fin_ifp;
 	if (fin->fin_v == 4) {
 		if ((fin->fin_p == IPPROTO_ICMP) && !(fin->fin_flx & FI_SHORT))
 			switch (ntohs(fin->fin_data[0]) >> 8)
 			{
 			case ICMP_ECHO :
 			case ICMP_TSTAMP :
 			case ICMP_IREQ :
 			case ICMP_MASKREQ :
 				break;
 			default :
 				FREE_MB_T(m);
 				return 0;
 			}
 
 		if (dst == 0) {
 			if (ipf_ifpaddr(&ipfmain, 4, FRI_NORMAL, ifp,
 					&dst6, NULL) == -1) {
 				FREE_MB_T(m);
 				return -1;
 			}
 			dst4 = dst6.in4;
 		} else
 			dst4.s_addr = fin->fin_daddr;
 
 		hlen = sizeof(ip_t);
 		ohlen = fin->fin_hlen;
 		iclen = hlen + offsetof(struct icmp, icmp_ip) + ohlen;
 		if (fin->fin_hlen < fin->fin_plen)
 			xtra = MIN(fin->fin_dlen, 8);
 		else
 			xtra = 0;
 	}
 
 #ifdef USE_INET6
 	else if (fin->fin_v == 6) {
 		hlen = sizeof(ip6_t);
 		ohlen = sizeof(ip6_t);
 		iclen = hlen + offsetof(struct icmp, icmp_ip) + ohlen;
 		type = icmptoicmp6types[type];
 		if (type == ICMP6_DST_UNREACH)
 			code = icmptoicmp6unreach[code];
 
 		if (iclen + max_linkhdr + fin->fin_plen > avail) {
 			if (!(MCLGET(m, M_NOWAIT))) {
 				FREE_MB_T(m);
 				return -1;
 			}
 			avail = MCLBYTES;
 		}
 		xtra = MIN(fin->fin_plen, avail - iclen - max_linkhdr);
 		xtra = MIN(xtra, IPV6_MMTU - iclen);
 		if (dst == 0) {
 			if (ipf_ifpaddr(&ipfmain, 6, FRI_NORMAL, ifp,
 					&dst6, NULL) == -1) {
 				FREE_MB_T(m);
 				return -1;
 			}
 		} else
 			dst6 = fin->fin_dst6;
 	}
 #endif
 	else {
 		FREE_MB_T(m);
 		return -1;
 	}
 
 	avail -= (max_linkhdr + iclen);
 	if (avail < 0) {
 		FREE_MB_T(m);
 		return -1;
 	}
 	if (xtra > avail)
 		xtra = avail;
 	iclen += xtra;
 	m->m_data += max_linkhdr;
 	m->m_pkthdr.rcvif = (struct ifnet *)0;
 	m->m_pkthdr.len = iclen;
 	m->m_len = iclen;
 	ip = mtod(m, ip_t *);
 	icmp = (struct icmp *)((char *)ip + hlen);
 	ip2 = (ip_t *)&icmp->icmp_ip;
 
 	icmp->icmp_type = type;
 	icmp->icmp_code = fin->fin_icode;
 	icmp->icmp_cksum = 0;
 #ifdef icmp_nextmtu
 	if (type == ICMP_UNREACH && fin->fin_icode == ICMP_UNREACH_NEEDFRAG) {
 		if (fin->fin_mtu != 0) {
 			icmp->icmp_nextmtu = htons(fin->fin_mtu);
 
 		} else if (ifp != NULL) {
 			icmp->icmp_nextmtu = htons(GETIFMTU_4(ifp));
 
 		} else {	/* make up a number... */
 			icmp->icmp_nextmtu = htons(fin->fin_plen - 20);
 		}
 	}
 #endif
 
 	bcopy((char *)fin->fin_ip, (char *)ip2, ohlen);
 
 #ifdef USE_INET6
 	ip6 = (ip6_t *)ip;
 	if (fin->fin_v == 6) {
 		ip6->ip6_flow = ((ip6_t *)fin->fin_ip)->ip6_flow;
 		ip6->ip6_plen = htons(iclen - hlen);
 		ip6->ip6_nxt = IPPROTO_ICMPV6;
 		ip6->ip6_hlim = 0;
 		ip6->ip6_src = dst6.in6;
 		ip6->ip6_dst = fin->fin_src6.in6;
 		if (xtra > 0)
 			bcopy((char *)fin->fin_ip + ohlen,
 			      (char *)&icmp->icmp_ip + ohlen, xtra);
 		icmp->icmp_cksum = in6_cksum(m, IPPROTO_ICMPV6,
 					     sizeof(*ip6), iclen - hlen);
 	} else
 #endif
 	{
 		ip->ip_p = IPPROTO_ICMP;
 		ip->ip_src.s_addr = dst4.s_addr;
 		ip->ip_dst.s_addr = fin->fin_saddr;
 
 		if (xtra > 0)
 			bcopy((char *)fin->fin_ip + ohlen,
 			      (char *)&icmp->icmp_ip + ohlen, xtra);
 		icmp->icmp_cksum = ipf_cksum((u_short *)icmp,
 					     sizeof(*icmp) + 8);
 		ip->ip_len = htons(iclen);
 		ip->ip_p = IPPROTO_ICMP;
 	}
 	err = ipf_send_ip(fin, m);
 	return err;
 }
 
 
 
 
 /*
  * m0 - pointer to mbuf where the IP packet starts
  * mpp - pointer to the mbuf pointer that is the start of the mbuf chain
  */
 int
 ipf_fastroute(m0, mpp, fin, fdp)
 	mb_t *m0, **mpp;
 	fr_info_t *fin;
 	frdest_t *fdp;
 {
 	register struct ip *ip, *mhip;
 	register struct mbuf *m = *mpp;
 	register struct route *ro;
 	int len, off, error = 0, hlen, code;
 	struct ifnet *ifp, *sifp;
 	struct sockaddr_in *dst;
 	struct route iproute;
 	u_short ip_off;
 	frdest_t node;
 	frentry_t *fr;
 
 	ro = NULL;
 
 #ifdef M_WRITABLE
 	/*
 	* HOT FIX/KLUDGE:
 	*
 	* If the mbuf we're about to send is not writable (because of
 	* a cluster reference, for example) we'll need to make a copy
 	* of it since this routine modifies the contents.
 	*
 	* If you have non-crappy network hardware that can transmit data
 	* from the mbuf, rather than making a copy, this is gonna be a
 	* problem.
 	*/
 	if (M_WRITABLE(m) == 0) {
 		m0 = m_dup(m, M_NOWAIT);
 		if (m0 != 0) {
 			FREE_MB_T(m);
 			m = m0;
 			*mpp = m;
 		} else {
 			error = ENOBUFS;
 			FREE_MB_T(m);
 			goto done;
 		}
 	}
 #endif
 
 #ifdef USE_INET6
 	if (fin->fin_v == 6) {
 		/*
 		 * currently "to <if>" and "to <if>:ip#" are not supported
 		 * for IPv6
 		 */
 		return ip6_output(m, NULL, NULL, 0, NULL, NULL, NULL);
 	}
 #endif
 
 	hlen = fin->fin_hlen;
 	ip = mtod(m0, struct ip *);
 	ifp = NULL;
 
 	/*
 	 * Route packet.
 	 */
 	ro = &iproute;
 	bzero(ro, sizeof (*ro));
 	dst = (struct sockaddr_in *)&ro->ro_dst;
 	dst->sin_family = AF_INET;
 	dst->sin_addr = ip->ip_dst;
 
 	fr = fin->fin_fr;
 	if ((fr != NULL) && !(fr->fr_flags & FR_KEEPSTATE) && (fdp != NULL) &&
 	    (fdp->fd_type == FRD_DSTLIST)) {
 		if (ipf_dstlist_select_node(fin, fdp->fd_ptr, NULL, &node) == 0)
 			fdp = &node;
 	}
 
 	if (fdp != NULL)
 		ifp = fdp->fd_ptr;
 	else
 		ifp = fin->fin_ifp;
 
 	if ((ifp == NULL) && ((fr == NULL) || !(fr->fr_flags & FR_FASTROUTE))) {
 		error = -2;
 		goto bad;
 	}
 
 	if ((fdp != NULL) && (fdp->fd_ip.s_addr != 0))
 		dst->sin_addr = fdp->fd_ip;
 
 	dst->sin_len = sizeof(*dst);
 	in_rtalloc(ro, M_GETFIB(m0));
 
 	if ((ifp == NULL) && (ro->ro_rt != NULL))
 		ifp = ro->ro_rt->rt_ifp;
 
 	if ((ro->ro_rt == NULL) || (ifp == NULL)) {
 		if (in_localaddr(ip->ip_dst))
 			error = EHOSTUNREACH;
 		else
 			error = ENETUNREACH;
 		goto bad;
 	}
 	if (ro->ro_rt->rt_flags & RTF_GATEWAY)
 		dst = (struct sockaddr_in *)ro->ro_rt->rt_gateway;
 	if (ro->ro_rt)
 		counter_u64_add(ro->ro_rt->rt_pksent, 1);
 
 	/*
 	 * For input packets which are being "fastrouted", they won't
 	 * go back through output filtering and miss their chance to get
 	 * NAT'd and counted.  Duplicated packets aren't considered to be
 	 * part of the normal packet stream, so do not NAT them or pass
 	 * them through stateful checking, etc.
 	 */
 	if ((fdp != &fr->fr_dif) && (fin->fin_out == 0)) {
 		sifp = fin->fin_ifp;
 		fin->fin_ifp = ifp;
 		fin->fin_out = 1;
 		(void) ipf_acctpkt(fin, NULL);
 		fin->fin_fr = NULL;
 		if (!fr || !(fr->fr_flags & FR_RETMASK)) {
 			u_32_t pass;
 
 			(void) ipf_state_check(fin, &pass);
 		}
 
 		switch (ipf_nat_checkout(fin, NULL))
 		{
 		case 0 :
 			break;
 		case 1 :
 			ip->ip_sum = 0;
 			break;
 		case -1 :
 			error = -1;
 			goto bad;
 			break;
 		}
 
 		fin->fin_ifp = sifp;
 		fin->fin_out = 0;
 	} else
 		ip->ip_sum = 0;
 	/*
 	 * If small enough for interface, can just send directly.
 	 */
 	if (ntohs(ip->ip_len) <= ifp->if_mtu) {
 		if (!ip->ip_sum)
 			ip->ip_sum = in_cksum(m, hlen);
 		error = (*ifp->if_output)(ifp, m, (struct sockaddr *)dst,
 			    ro
 			);
 		goto done;
 	}
 	/*
 	 * Too large for interface; fragment if possible.
 	 * Must be able to put at least 8 bytes per fragment.
 	 */
 	ip_off = ntohs(ip->ip_off);
 	if (ip_off & IP_DF) {
 		error = EMSGSIZE;
 		goto bad;
 	}
 	len = (ifp->if_mtu - hlen) &~ 7;
 	if (len < 8) {
 		error = EMSGSIZE;
 		goto bad;
 	}
 
     {
 	int mhlen, firstlen = len;
 	struct mbuf **mnext = &m->m_act;
 
 	/*
 	 * Loop through length of segment after first fragment,
 	 * make new header and copy data of each part and link onto chain.
 	 */
 	m0 = m;
 	mhlen = sizeof (struct ip);
 	for (off = hlen + len; off < ntohs(ip->ip_len); off += len) {
 #ifdef MGETHDR
 		MGETHDR(m, M_NOWAIT, MT_HEADER);
 #else
 		MGET(m, M_NOWAIT, MT_HEADER);
 #endif
 		if (m == 0) {
 			m = m0;
 			error = ENOBUFS;
 			goto bad;
 		}
 		m->m_data += max_linkhdr;
 		mhip = mtod(m, struct ip *);
 		bcopy((char *)ip, (char *)mhip, sizeof(*ip));
 		if (hlen > sizeof (struct ip)) {
 			mhlen = ip_optcopy(ip, mhip) + sizeof (struct ip);
 			IP_HL_A(mhip, mhlen >> 2);
 		}
 		m->m_len = mhlen;
 		mhip->ip_off = ((off - hlen) >> 3) + ip_off;
 		if (off + len >= ntohs(ip->ip_len))
 			len = ntohs(ip->ip_len) - off;
 		else
 			mhip->ip_off |= IP_MF;
 		mhip->ip_len = htons((u_short)(len + mhlen));
 		*mnext = m;
 		m->m_next = m_copy(m0, off, len);
 		if (m->m_next == 0) {
 			error = ENOBUFS;	/* ??? */
 			goto sendorfree;
 		}
 		m->m_pkthdr.len = mhlen + len;
 		m->m_pkthdr.rcvif = NULL;
 		mhip->ip_off = htons((u_short)mhip->ip_off);
 		mhip->ip_sum = 0;
 		mhip->ip_sum = in_cksum(m, mhlen);
 		mnext = &m->m_act;
 	}
 	/*
 	 * Update first fragment by trimming what's been copied out
 	 * and updating header, then send each fragment (in order).
 	 */
 	m_adj(m0, hlen + firstlen - ip->ip_len);
 	ip->ip_len = htons((u_short)(hlen + firstlen));
 	ip->ip_off = htons((u_short)IP_MF);
 	ip->ip_sum = 0;
 	ip->ip_sum = in_cksum(m0, hlen);
 sendorfree:
 	for (m = m0; m; m = m0) {
 		m0 = m->m_act;
 		m->m_act = 0;
 		if (error == 0)
 			error = (*ifp->if_output)(ifp, m,
 			    (struct sockaddr *)dst,
 			    ro
 			    );
 		else
 			FREE_MB_T(m);
 	}
     }
 done:
 	if (!error)
 		ipfmain.ipf_frouteok[0]++;
 	else
 		ipfmain.ipf_frouteok[1]++;
 
 	if ((ro != NULL) && (ro->ro_rt != NULL)) {
 		RTFREE(ro->ro_rt);
 	}
 	return 0;
 bad:
 	if (error == EMSGSIZE) {
 		sifp = fin->fin_ifp;
 		code = fin->fin_icode;
 		fin->fin_icode = ICMP_UNREACH_NEEDFRAG;
 		fin->fin_ifp = ifp;
 		(void) ipf_send_icmp_err(ICMP_UNREACH, fin, 1);
 		fin->fin_ifp = sifp;
 		fin->fin_icode = code;
 	}
 	FREE_MB_T(m);
 	goto done;
 }
 
 
 int
 ipf_verifysrc(fin)
 	fr_info_t *fin;
 {
 	struct sockaddr_in *dst;
 	struct route iproute;
 
 	bzero((char *)&iproute, sizeof(iproute));
 	dst = (struct sockaddr_in *)&iproute.ro_dst;
 	dst->sin_len = sizeof(*dst);
 	dst->sin_family = AF_INET;
 	dst->sin_addr = fin->fin_src;
 	in_rtalloc(&iproute, 0);
 	if (iproute.ro_rt == NULL)
 		return 0;
 	return (fin->fin_ifp == iproute.ro_rt->rt_ifp);
 }
 
 
 /*
  * return the first IP Address associated with an interface
  */
 int
 ipf_ifpaddr(softc, v, atype, ifptr, inp, inpmask)
 	ipf_main_softc_t *softc;
 	int v, atype;
 	void *ifptr;
 	i6addr_t *inp, *inpmask;
 {
 #ifdef USE_INET6
 	struct in6_addr *inp6 = NULL;
 #endif
 	struct sockaddr *sock, *mask;
 	struct sockaddr_in *sin;
 	struct ifaddr *ifa;
 	struct ifnet *ifp;
 
 	if ((ifptr == NULL) || (ifptr == (void *)-1))
 		return -1;
 
 	sin = NULL;
 	ifp = ifptr;
 
 	if (v == 4)
 		inp->in4.s_addr = 0;
 #ifdef USE_INET6
 	else if (v == 6)
 		bzero((char *)inp, sizeof(*inp));
 #endif
 	ifa = TAILQ_FIRST(&ifp->if_addrhead);
 
 	sock = ifa->ifa_addr;
 	while (sock != NULL && ifa != NULL) {
 		sin = (struct sockaddr_in *)sock;
 		if ((v == 4) && (sin->sin_family == AF_INET))
 			break;
 #ifdef USE_INET6
 		if ((v == 6) && (sin->sin_family == AF_INET6)) {
 			inp6 = &((struct sockaddr_in6 *)sin)->sin6_addr;
 			if (!IN6_IS_ADDR_LINKLOCAL(inp6) &&
 			    !IN6_IS_ADDR_LOOPBACK(inp6))
 				break;
 		}
 #endif
 		ifa = TAILQ_NEXT(ifa, ifa_link);
 		if (ifa != NULL)
 			sock = ifa->ifa_addr;
 	}
 
 	if (ifa == NULL || sin == NULL)
 		return -1;
 
 	mask = ifa->ifa_netmask;
 	if (atype == FRI_BROADCAST)
 		sock = ifa->ifa_broadaddr;
 	else if (atype == FRI_PEERADDR)
 		sock = ifa->ifa_dstaddr;
 
 	if (sock == NULL)
 		return -1;
 
 #ifdef USE_INET6
 	if (v == 6) {
 		return ipf_ifpfillv6addr(atype, (struct sockaddr_in6 *)sock,
 					 (struct sockaddr_in6 *)mask,
 					 inp, inpmask);
 	}
 #endif
 	return ipf_ifpfillv4addr(atype, (struct sockaddr_in *)sock,
 				 (struct sockaddr_in *)mask,
 				 &inp->in4, &inpmask->in4);
 }
 
 
 u_32_t
 ipf_newisn(fin)
 	fr_info_t *fin;
 {
 	u_32_t newiss;
 	newiss = arc4random();
 	return newiss;
-}
-
-
-/* ------------------------------------------------------------------------ */
-/* Function:    ipf_nextipid                                                */
-/* Returns:     int - 0 == success, -1 == error (packet should be droppped) */
-/* Parameters:  fin(I) - pointer to packet information                      */
-/*                                                                          */
-/* Returns the next IPv4 ID to use for this packet.                         */
-/* ------------------------------------------------------------------------ */
-u_short
-ipf_nextipid(fin)
-	fr_info_t *fin;
-{
-	u_short id;
-
-#ifndef	RANDOM_IP_ID
-	MUTEX_ENTER(&ipfmain.ipf_rw);
-	id = ipid++;
-	MUTEX_EXIT(&ipfmain.ipf_rw);
-#else
-	id = ip_randomid();
-#endif
-
-	return id;
 }
 
 
 INLINE int
 ipf_checkv4sum(fin)
 	fr_info_t *fin;
 {
 #ifdef CSUM_DATA_VALID
 	int manual = 0;
 	u_short sum;
 	ip_t *ip;
 	mb_t *m;
 
 	if ((fin->fin_flx & FI_NOCKSUM) != 0)
 		return 0;
 
 	if ((fin->fin_flx & FI_SHORT) != 0)
 		return 1;
 
 	if (fin->fin_cksum != FI_CK_NEEDED)
 		return (fin->fin_cksum > FI_CK_NEEDED) ? 0 : -1;
 
 	m = fin->fin_m;
 	if (m == NULL) {
 		manual = 1;
 		goto skipauto;
 	}
 	ip = fin->fin_ip;
 
 	if ((m->m_pkthdr.csum_flags & (CSUM_IP_CHECKED|CSUM_IP_VALID)) ==
 	    CSUM_IP_CHECKED) {
 		fin->fin_cksum = FI_CK_BAD;
 		fin->fin_flx |= FI_BAD;
 		return -1;
 	}
 	if (m->m_pkthdr.csum_flags & CSUM_DATA_VALID) {
 		if (m->m_pkthdr.csum_flags & CSUM_PSEUDO_HDR)
 			sum = m->m_pkthdr.csum_data;
 		else
 			sum = in_pseudo(ip->ip_src.s_addr, ip->ip_dst.s_addr,
 					htonl(m->m_pkthdr.csum_data +
 					fin->fin_dlen + fin->fin_p));
 		sum ^= 0xffff;
 		if (sum != 0) {
 			fin->fin_cksum = FI_CK_BAD;
 			fin->fin_flx |= FI_BAD;
 		} else {
 			fin->fin_cksum = FI_CK_SUMOK;
 			return 0;
 		}
 	} else {
 		if (m->m_pkthdr.csum_flags == CSUM_DELAY_DATA) {
 			fin->fin_cksum = FI_CK_L4FULL;
 			return 0;
 		} else if (m->m_pkthdr.csum_flags == CSUM_TCP ||
 			   m->m_pkthdr.csum_flags == CSUM_UDP) {
 			fin->fin_cksum = FI_CK_L4PART;
 			return 0;
 		} else if (m->m_pkthdr.csum_flags == CSUM_IP) {
 			fin->fin_cksum = FI_CK_L4PART;
 			return 0;
 		} else {
 			manual = 1;
 		}
 	}
 skipauto:
 	if (manual != 0) {
 		if (ipf_checkl4sum(fin) == -1) {
 			fin->fin_flx |= FI_BAD;
 			return -1;
 		}
 	}
 #else
 	if (ipf_checkl4sum(fin) == -1) {
 		fin->fin_flx |= FI_BAD;
 		return -1;
 	}
 #endif
 	return 0;
 }
 
 
 #ifdef USE_INET6
 INLINE int
 ipf_checkv6sum(fin)
 	fr_info_t *fin;
 {
 	if ((fin->fin_flx & FI_NOCKSUM) != 0)
 		return 0;
 
 	if ((fin->fin_flx & FI_SHORT) != 0)
 		return 1;
 
 	if (fin->fin_cksum != FI_CK_NEEDED)
 		return (fin->fin_cksum > FI_CK_NEEDED) ? 0 : -1;
 
 	if (ipf_checkl4sum(fin) == -1) {
 		fin->fin_flx |= FI_BAD;
 		return -1;
 	}
 	return 0;
 }
 #endif /* USE_INET6 */
 
 
 size_t
 mbufchainlen(m0)
 	struct mbuf *m0;
 	{
 	size_t len;
 
 	if ((m0->m_flags & M_PKTHDR) != 0) {
 		len = m0->m_pkthdr.len;
 	} else {
 		struct mbuf *m;
 
 		for (m = m0, len = 0; m != NULL; m = m->m_next)
 			len += m->m_len;
 	}
 	return len;
 }
 
 
 /* ------------------------------------------------------------------------ */
 /* Function:    ipf_pullup                                                  */
 /* Returns:     NULL == pullup failed, else pointer to protocol header      */
 /* Parameters:  xmin(I)- pointer to buffer where data packet starts         */
 /*              fin(I) - pointer to packet information                      */
 /*              len(I) - number of bytes to pullup                          */
 /*                                                                          */
 /* Attempt to move at least len bytes (from the start of the buffer) into a */
 /* single buffer for ease of access.  Operating system native functions are */
 /* used to manage buffers - if necessary.  If the entire packet ends up in  */
 /* a single buffer, set the FI_COALESCE flag even though ipf_coalesce() has */
 /* not been called.  Both fin_ip and fin_dp are updated before exiting _IF_ */
 /* and ONLY if the pullup succeeds.                                         */
 /*                                                                          */
 /* We assume that 'xmin' is a pointer to a buffer that is part of the chain */
 /* of buffers that starts at *fin->fin_mp.                                  */
 /* ------------------------------------------------------------------------ */
 void *
 ipf_pullup(xmin, fin, len)
 	mb_t *xmin;
 	fr_info_t *fin;
 	int len;
 {
 	int dpoff, ipoff;
 	mb_t *m = xmin;
 	char *ip;
 
 	if (m == NULL)
 		return NULL;
 
 	ip = (char *)fin->fin_ip;
 	if ((fin->fin_flx & FI_COALESCE) != 0)
 		return ip;
 
 	ipoff = fin->fin_ipoff;
 	if (fin->fin_dp != NULL)
 		dpoff = (char *)fin->fin_dp - (char *)ip;
 	else
 		dpoff = 0;
 
 	if (M_LEN(m) < len) {
 		mb_t *n = *fin->fin_mp;
 		/*
 		 * Assume that M_PKTHDR is set and just work with what is left
 		 * rather than check..
 		 * Should not make any real difference, anyway.
 		 */
 		if (m != n) {
 			/*
 			 * Record the mbuf that points to the mbuf that we're
 			 * about to go to work on so that we can update the
 			 * m_next appropriately later.
 			 */
 			for (; n->m_next != m; n = n->m_next)
 				;
 		} else {
 			n = NULL;
 		}
 
 #ifdef MHLEN
 		if (len > MHLEN)
 #else
 		if (len > MLEN)
 #endif
 		{
 #ifdef HAVE_M_PULLDOWN
 			if (m_pulldown(m, 0, len, NULL) == NULL)
 				m = NULL;
 #else
 			FREE_MB_T(*fin->fin_mp);
 			m = NULL;
 			n = NULL;
 #endif
 		} else
 		{
 			m = m_pullup(m, len);
 		}
 		if (n != NULL)
 			n->m_next = m;
 		if (m == NULL) {
 			/*
 			 * When n is non-NULL, it indicates that m pointed to
 			 * a sub-chain (tail) of the mbuf and that the head
 			 * of this chain has not yet been free'd.
 			 */
 			if (n != NULL) {
 				FREE_MB_T(*fin->fin_mp);
 			}
 
 			*fin->fin_mp = NULL;
 			fin->fin_m = NULL;
 			return NULL;
 		}
 
 		if (n == NULL)
 			*fin->fin_mp = m;
 
 		while (M_LEN(m) == 0) {
 			m = m->m_next;
 		}
 		fin->fin_m = m;
 		ip = MTOD(m, char *) + ipoff;
 
 		fin->fin_ip = (ip_t *)ip;
 		if (fin->fin_dp != NULL)
 			fin->fin_dp = (char *)fin->fin_ip + dpoff;
 		if (fin->fin_fraghdr != NULL)
 			fin->fin_fraghdr = (char *)ip +
 					   ((char *)fin->fin_fraghdr -
 					    (char *)fin->fin_ip);
 	}
 
 	if (len == fin->fin_plen)
 		fin->fin_flx |= FI_COALESCE;
 	return ip;
 }
 
 
 int
 ipf_inject(fin, m)
 	fr_info_t *fin;
 	mb_t *m;
 {
 	int error = 0;
 
 	if (fin->fin_out == 0) {
 		netisr_dispatch(NETISR_IP, m);
 	} else {
 		fin->fin_ip->ip_len = ntohs(fin->fin_ip->ip_len);
 		fin->fin_ip->ip_off = ntohs(fin->fin_ip->ip_off);
 		error = ip_output(m, NULL, NULL, IP_FORWARDING, NULL, NULL);
 	}
 
 	return error;
 }
 
 int ipf_pfil_unhook(void) {
 #if defined(NETBSD_PF) && (__FreeBSD_version >= 500011)
 	struct pfil_head *ph_inet;
 #  ifdef USE_INET6
 	struct pfil_head *ph_inet6;
 #  endif
 #endif
 
 #ifdef NETBSD_PF
 	ph_inet = pfil_head_get(PFIL_TYPE_AF, AF_INET);
 	if (ph_inet != NULL)
 		pfil_remove_hook((void *)ipf_check_wrapper, NULL,
 		    PFIL_IN|PFIL_OUT|PFIL_WAITOK, ph_inet);
 # ifdef USE_INET6
 	ph_inet6 = pfil_head_get(PFIL_TYPE_AF, AF_INET6);
 	if (ph_inet6 != NULL)
 		pfil_remove_hook((void *)ipf_check_wrapper6, NULL,
 		    PFIL_IN|PFIL_OUT|PFIL_WAITOK, ph_inet6);
 # endif
 #endif
 
 	return (0);
 }
 
 int ipf_pfil_hook(void) {
 #if defined(NETBSD_PF) && (__FreeBSD_version >= 500011)
 	struct pfil_head *ph_inet;
 #  ifdef USE_INET6
 	struct pfil_head *ph_inet6;
 #  endif
 #endif
 
 # ifdef NETBSD_PF
 	ph_inet = pfil_head_get(PFIL_TYPE_AF, AF_INET);
 #    ifdef USE_INET6
 	ph_inet6 = pfil_head_get(PFIL_TYPE_AF, AF_INET6);
 #    endif
 	if (ph_inet == NULL
 #    ifdef USE_INET6
 	    && ph_inet6 == NULL
 #    endif
 	   ) {
 		return ENODEV;
 	}
 
 	if (ph_inet != NULL)
 		pfil_add_hook((void *)ipf_check_wrapper, NULL,
 		    PFIL_IN|PFIL_OUT|PFIL_WAITOK, ph_inet);
 #  ifdef USE_INET6
 	if (ph_inet6 != NULL)
 		pfil_add_hook((void *)ipf_check_wrapper6, NULL,
 				      PFIL_IN|PFIL_OUT|PFIL_WAITOK, ph_inet6);
 #  endif
 # endif
 	return (0);
 }
 
 void
 ipf_event_reg(void)
 {
 	ipf_arrivetag = EVENTHANDLER_REGISTER(ifnet_arrival_event, \
 					       ipf_ifevent, &ipfmain, \
 					       EVENTHANDLER_PRI_ANY);
 	ipf_departtag = EVENTHANDLER_REGISTER(ifnet_departure_event, \
 					       ipf_ifevent, &ipfmain, \
 					       EVENTHANDLER_PRI_ANY);
 	ipf_clonetag  = EVENTHANDLER_REGISTER(if_clone_event, ipf_ifevent, \
 					       &ipfmain, EVENTHANDLER_PRI_ANY);
 }
 
 void
 ipf_event_dereg(void)
 {
 	if (ipf_arrivetag != NULL) {
 		EVENTHANDLER_DEREGISTER(ifnet_arrival_event, ipf_arrivetag);
 	}
 	if (ipf_departtag != NULL) {
 		EVENTHANDLER_DEREGISTER(ifnet_departure_event, ipf_departtag);
 	}
 	if (ipf_clonetag != NULL) {
 		EVENTHANDLER_DEREGISTER(if_clone_event, ipf_clonetag);
 	}
 }
 
 
 u_32_t
 ipf_random()
 {
 	return arc4random();
 }
 
 
 u_int
 ipf_pcksum(fin, hlen, sum)
 	fr_info_t *fin;
 	int hlen;
 	u_int sum;
 {
 	struct mbuf *m;
 	u_int sum2;
 	int off;
 
 	m = fin->fin_m;
 	off = (char *)fin->fin_dp - (char *)fin->fin_ip;
 	m->m_data += hlen;
 	m->m_len -= hlen;
 	sum2 = in_cksum(fin->fin_m, fin->fin_plen - off);
 	m->m_len += hlen;
 	m->m_data -= hlen;
 
 	/*
 	 * Both sum and sum2 are partial sums, so combine them together.
 	 */
 	sum += ~sum2 & 0xffff;
 	while (sum > 0xffff)
 		sum = (sum & 0xffff) + (sum >> 16);
 	sum2 = ~sum & 0xffff;
 	return sum2;
 }
Index: head/sys/contrib/ipfilter/netinet/ip_nat.c
===================================================================
--- head/sys/contrib/ipfilter/netinet/ip_nat.c	(revision 280970)
+++ head/sys/contrib/ipfilter/netinet/ip_nat.c	(revision 280971)
@@ -1,8596 +1,8596 @@
 /*	$FreeBSD$	*/
 
 /*
  * Copyright (C) 2012 by Darren Reed.
  *
  * See the IPFILTER.LICENCE file for details on licencing.
  */
 #if defined(KERNEL) || defined(_KERNEL)
 # undef KERNEL
 # undef _KERNEL
 # define        KERNEL	1
 # define        _KERNEL	1
 #endif
 #include <sys/errno.h>
 #include <sys/types.h>
 #include <sys/param.h>
 #include <sys/time.h>
 #include <sys/file.h>
 #if defined(_KERNEL) && \
     (defined(__NetBSD_Version) && (__NetBSD_Version >= 399002000))
 # include <sys/kauth.h>
 #endif
 #if !defined(_KERNEL)
 # include <stdio.h>
 # include <string.h>
 # include <stdlib.h>
 # define KERNEL
 # ifdef _OpenBSD__
 struct file;
 # endif
 # include <sys/uio.h>
 # undef KERNEL
 #endif
 #if defined(_KERNEL) && \
     defined(__FreeBSD_version) && (__FreeBSD_version >= 220000)
 # include <sys/filio.h>
 # include <sys/fcntl.h>
 #else
 # include <sys/ioctl.h>
 #endif
 #if !defined(AIX)
 # include <sys/fcntl.h>
 #endif
 #if !defined(linux)
 # include <sys/protosw.h>
 #endif
 #include <sys/socket.h>
 #if defined(_KERNEL)
 # include <sys/systm.h>
 # if !defined(__SVR4) && !defined(__svr4__)
 #  include <sys/mbuf.h>
 # endif
 #endif
 #if defined(__SVR4) || defined(__svr4__)
 # include <sys/filio.h>
 # include <sys/byteorder.h>
 # ifdef KERNEL
 #  include <sys/dditypes.h>
 # endif
 # include <sys/stream.h>
 # include <sys/kmem.h>
 #endif
 #if __FreeBSD_version >= 300000
 # include <sys/queue.h>
 #endif
 #include <net/if.h>
 #if __FreeBSD_version >= 300000
 # include <net/if_var.h>
 #endif
 #ifdef sun
 # include <net/af.h>
 #endif
 #include <netinet/in.h>
 #include <netinet/in_systm.h>
 #include <netinet/ip.h>
 
 #ifdef RFC1825
 # include <vpn/md5.h>
 # include <vpn/ipsec.h>
 extern struct ifnet vpnif;
 #endif
 
 #if !defined(linux)
 # include <netinet/ip_var.h>
 #endif
 #include <netinet/tcp.h>
 #include <netinet/udp.h>
 #include <netinet/ip_icmp.h>
 #include "netinet/ip_compat.h"
 #include <netinet/tcpip.h>
 #include "netinet/ipl.h"
 #include "netinet/ip_fil.h"
 #include "netinet/ip_nat.h"
 #include "netinet/ip_frag.h"
 #include "netinet/ip_state.h"
 #include "netinet/ip_proxy.h"
 #include "netinet/ip_lookup.h"
 #include "netinet/ip_dstlist.h"
 #include "netinet/ip_sync.h"
 #if FREEBSD_GE_REV(300000)
 # include <sys/malloc.h>
 #endif
 #ifdef HAS_SYS_MD5_H
 # include <sys/md5.h>
 #else
 # include "md5.h"
 #endif
 /* END OF INCLUDES */
 
 #undef	SOCKADDR_IN
 #define	SOCKADDR_IN	struct sockaddr_in
 
 #if !defined(lint)
 static const char sccsid[] = "@(#)ip_nat.c	1.11 6/5/96 (C) 1995 Darren Reed";
 static const char rcsid[] = "@(#)$FreeBSD$";
 /* static const char rcsid[] = "@(#)$Id: ip_nat.c,v 2.195.2.102 2007/10/16 10:08:10 darrenr Exp $"; */
 #endif
 
 
 #define	NATFSUM(n,v,f)	((v) == 4 ? (n)->f.in4.s_addr : (n)->f.i6[0] + \
 			 (n)->f.i6[1] + (n)->f.i6[2] + (n)->f.i6[3])
 #define	NBUMP(x)	softn->(x)++
 #define	NBUMPD(x, y)	do { \
 				softn->x.y++; \
 				DT(y); \
 			} while (0)
 #define	NBUMPSIDE(y,x)	softn->ipf_nat_stats.ns_side[y].x++
 #define	NBUMPSIDED(y,x)	do { softn->ipf_nat_stats.ns_side[y].x++; \
 			     DT(x); } while (0)
 #define	NBUMPSIDEX(y,x,z) \
 			do { softn->ipf_nat_stats.ns_side[y].x++; \
 			     DT(z); } while (0)
 #define	NBUMPSIDEDF(y,x)do { softn->ipf_nat_stats.ns_side[y].x++; \
 			     DT1(x, fr_info_t *, fin); } while (0)
 
 frentry_t	ipfnatblock;
 
 static ipftuneable_t ipf_nat_tuneables[] = {
 	/* nat */
 	{ { (void *)offsetof(ipf_nat_softc_t, ipf_nat_lock) },
 		"nat_lock",	0,	1,
 		stsizeof(ipf_nat_softc_t, ipf_nat_lock),
 		IPFT_RDONLY,		NULL,	NULL },
 	{ { (void *)offsetof(ipf_nat_softc_t, ipf_nat_table_sz) },
 		"nat_table_size", 1,	0x7fffffff,
 		stsizeof(ipf_nat_softc_t, ipf_nat_table_sz),
 		0,			NULL,	ipf_nat_rehash },
 	{ { (void *)offsetof(ipf_nat_softc_t, ipf_nat_table_max) },
 		"nat_table_max", 1,	0x7fffffff,
 		stsizeof(ipf_nat_softc_t, ipf_nat_table_max),
 		0,			NULL,	NULL },
 	{ { (void *)offsetof(ipf_nat_softc_t, ipf_nat_maprules_sz) },
 		"nat_rules_size", 1,	0x7fffffff,
 		stsizeof(ipf_nat_softc_t, ipf_nat_maprules_sz),
 		0,			NULL,	ipf_nat_rehash_rules },
 	{ { (void *)offsetof(ipf_nat_softc_t, ipf_nat_rdrrules_sz) },
 		"rdr_rules_size", 1,	0x7fffffff,
 		stsizeof(ipf_nat_softc_t, ipf_nat_rdrrules_sz),
 		0,			NULL,	ipf_nat_rehash_rules },
 	{ { (void *)offsetof(ipf_nat_softc_t, ipf_nat_hostmap_sz) },
 		"hostmap_size",	1,	0x7fffffff,
 		stsizeof(ipf_nat_softc_t, ipf_nat_hostmap_sz),
 		0,			NULL,	ipf_nat_hostmap_rehash },
 	{ { (void *)offsetof(ipf_nat_softc_t, ipf_nat_maxbucket) },
 		"nat_maxbucket",1,	0x7fffffff,
 		stsizeof(ipf_nat_softc_t, ipf_nat_maxbucket),
 		0,			NULL,	NULL },
 	{ { (void *)offsetof(ipf_nat_softc_t, ipf_nat_logging) },
 		"nat_logging",	0,	1,
 		stsizeof(ipf_nat_softc_t, ipf_nat_logging),
 		0,			NULL,	NULL },
 	{ { (void *)offsetof(ipf_nat_softc_t, ipf_nat_doflush) },
 		"nat_doflush",	0,	1,
 		stsizeof(ipf_nat_softc_t, ipf_nat_doflush),
 		0,			NULL,	NULL },
 	{ { (void *)offsetof(ipf_nat_softc_t, ipf_nat_table_wm_low) },
 		"nat_table_wm_low",	1,	99,
 		stsizeof(ipf_nat_softc_t, ipf_nat_table_wm_low),
 		0,			NULL,	NULL },
 	{ { (void *)offsetof(ipf_nat_softc_t, ipf_nat_table_wm_high) },
 		"nat_table_wm_high",	2,	100,
 		stsizeof(ipf_nat_softc_t, ipf_nat_table_wm_high),
 		0,			NULL,	NULL },
 	{ { 0 },
 		NULL,			0,	0,
 		0,
 		0,			NULL,	NULL }
 };
 
 /* ======================================================================== */
 /* How the NAT is organised and works.                                      */
 /*                                                                          */
 /* Inside (interface y) NAT       Outside (interface x)                     */
 /* -------------------- -+- -------------------------------------           */
 /* Packet going          |   out, processsed by ipf_nat_checkout() for x    */
 /* ------------>         |   ------------>                                  */
 /* src=10.1.1.1          |   src=192.1.1.1                                  */
 /*                       |                                                  */
 /*                       |   in, processed by ipf_nat_checkin() for x       */
 /* <------------         |   <------------                                  */
 /* dst=10.1.1.1          |   dst=192.1.1.1                                  */
 /* -------------------- -+- -------------------------------------           */
 /* ipf_nat_checkout() - changes ip_src and if required, sport               */
 /*             - creates a new mapping, if required.                        */
 /* ipf_nat_checkin()  - changes ip_dst and if required, dport               */
 /*                                                                          */
 /* In the NAT table, internal source is recorded as "in" and externally     */
 /* seen as "out".                                                           */
 /* ======================================================================== */
 
 
 #if SOLARIS && !defined(INSTANCES)
 extern	int		pfil_delayed_copy;
 #endif
 
 static	int	ipf_nat_flush_entry __P((ipf_main_softc_t *, void *));
 static	int	ipf_nat_getent __P((ipf_main_softc_t *, caddr_t, int));
 static	int	ipf_nat_getsz __P((ipf_main_softc_t *, caddr_t, int));
 static	int	ipf_nat_putent __P((ipf_main_softc_t *, caddr_t, int));
 static	void	ipf_nat_addmap __P((ipf_nat_softc_t *, ipnat_t *));
 static	void	ipf_nat_addrdr __P((ipf_nat_softc_t *, ipnat_t *));
 static	int	ipf_nat_builddivertmp __P((ipf_nat_softc_t *, ipnat_t *));
 static	int	ipf_nat_clearlist __P((ipf_main_softc_t *, ipf_nat_softc_t *));
 static	int	ipf_nat_cmp_rules __P((ipnat_t *, ipnat_t *));
 static	int	ipf_nat_decap __P((fr_info_t *, nat_t *));
 static	void	ipf_nat_delrule __P((ipf_main_softc_t *, ipf_nat_softc_t *,
 				     ipnat_t *, int));
 static	int	ipf_nat_extraflush __P((ipf_main_softc_t *, ipf_nat_softc_t *, int));
 static	int	ipf_nat_finalise __P((fr_info_t *, nat_t *));
 static	int	ipf_nat_flushtable __P((ipf_main_softc_t *, ipf_nat_softc_t *));
 static	int	ipf_nat_getnext __P((ipf_main_softc_t *, ipftoken_t *,
 				     ipfgeniter_t *, ipfobj_t *));
 static	int	ipf_nat_gettable __P((ipf_main_softc_t *, ipf_nat_softc_t *,
 				      char *));
 static	hostmap_t *ipf_nat_hostmap __P((ipf_nat_softc_t *, ipnat_t *,
 					struct in_addr, struct in_addr,
 					struct in_addr, u_32_t));
 static	int	ipf_nat_icmpquerytype __P((int));
 static	int	ipf_nat_iterator __P((ipf_main_softc_t *, ipftoken_t *,
 				      ipfgeniter_t *, ipfobj_t *));
 static	int	ipf_nat_match __P((fr_info_t *, ipnat_t *));
 static	int	ipf_nat_matcharray __P((nat_t *, int *, u_long));
 static	int	ipf_nat_matchflush __P((ipf_main_softc_t *, ipf_nat_softc_t *,
 					caddr_t));
 static	void	ipf_nat_mssclamp __P((tcphdr_t *, u_32_t, fr_info_t *,
 				      u_short *));
 static	int	ipf_nat_newmap __P((fr_info_t *, nat_t *, natinfo_t *));
 static	int	ipf_nat_newdivert __P((fr_info_t *, nat_t *, natinfo_t *));
 static	int	ipf_nat_newrdr __P((fr_info_t *, nat_t *, natinfo_t *));
 static	int	ipf_nat_newrewrite __P((fr_info_t *, nat_t *, natinfo_t *));
 static	int	ipf_nat_nextaddr __P((fr_info_t *, nat_addr_t *, u_32_t *,
 				      u_32_t *));
 static	int	ipf_nat_nextaddrinit __P((ipf_main_softc_t *, char *,
 					  nat_addr_t *, int, void *));
 static	int	ipf_nat_resolverule __P((ipf_main_softc_t *, ipnat_t *));
 static	int	ipf_nat_ruleaddrinit __P((ipf_main_softc_t *,
 					  ipf_nat_softc_t *, ipnat_t *));
 static	void	ipf_nat_rule_fini __P((ipf_main_softc_t *, ipnat_t *));
 static	int	ipf_nat_rule_init __P((ipf_main_softc_t *, ipf_nat_softc_t *,
 				       ipnat_t *));
 static	int	ipf_nat_siocaddnat __P((ipf_main_softc_t *, ipf_nat_softc_t *,
 					ipnat_t *, int));
 static	void	ipf_nat_siocdelnat __P((ipf_main_softc_t *, ipf_nat_softc_t *,
 					ipnat_t *, int));
 static	void	ipf_nat_tabmove __P((ipf_nat_softc_t *, nat_t *));
 
 /* ------------------------------------------------------------------------ */
 /* Function:    ipf_nat_main_load                                           */
 /* Returns:     int - 0 == success, -1 == failure                           */
 /* Parameters:  Nil                                                         */
 /*                                                                          */
 /* The only global NAT structure that needs to be initialised is the filter */
 /* rule that is used with blocking packets.                                 */
 /* ------------------------------------------------------------------------ */
 int
 ipf_nat_main_load()
 {
 	bzero((char *)&ipfnatblock, sizeof(ipfnatblock));
 	ipfnatblock.fr_flags = FR_BLOCK|FR_QUICK;
 	ipfnatblock.fr_ref = 1;
 
 	return 0;
 }
 
 
 /* ------------------------------------------------------------------------ */
 /* Function:    ipf_nat_main_unload                                         */
 /* Returns:     int - 0 == success, -1 == failure                           */
 /* Parameters:  Nil                                                         */
 /*                                                                          */
 /* A null-op function that exists as a placeholder so that the flow in      */
 /* other functions is obvious.                                              */
 /* ------------------------------------------------------------------------ */
 int
 ipf_nat_main_unload()
 {
 	return 0;
 }
 
 
 /* ------------------------------------------------------------------------ */
 /* Function:    ipf_nat_soft_create                                         */
 /* Returns:     void * - NULL = failure, else pointer to NAT context        */
 /* Parameters:  softc(I) - pointer to soft context main structure           */
 /*                                                                          */
 /* Allocate the initial soft context structure for NAT and populate it with */
 /* some default values. Creating the tables is left until we call _init so  */
 /* that sizes can be changed before we get under way.                       */
 /* ------------------------------------------------------------------------ */
 void *
 ipf_nat_soft_create(softc)
 	ipf_main_softc_t *softc;
 {
 	ipf_nat_softc_t *softn;
 
 	KMALLOC(softn, ipf_nat_softc_t *);
 	if (softn == NULL)
 		return NULL;
 
 	bzero((char *)softn, sizeof(*softn));
 
 	softn->ipf_nat_tune = ipf_tune_array_copy(softn,
 						  sizeof(ipf_nat_tuneables),
 						  ipf_nat_tuneables);
 	if (softn->ipf_nat_tune == NULL) {
 		ipf_nat_soft_destroy(softc, softn);
 		return NULL;
 	}
 	if (ipf_tune_array_link(softc, softn->ipf_nat_tune) == -1) {
 		ipf_nat_soft_destroy(softc, softn);
 		return NULL;
 	}
 
 	softn->ipf_nat_list_tail = &softn->ipf_nat_list;
 
 	softn->ipf_nat_table_max = NAT_TABLE_MAX;
 	softn->ipf_nat_table_sz = NAT_TABLE_SZ;
 	softn->ipf_nat_maprules_sz = NAT_SIZE;
 	softn->ipf_nat_rdrrules_sz = RDR_SIZE;
 	softn->ipf_nat_hostmap_sz = HOSTMAP_SIZE;
 	softn->ipf_nat_doflush = 0;
 #ifdef  IPFILTER_LOG
 	softn->ipf_nat_logging = 1;
 #else
 	softn->ipf_nat_logging = 0;
 #endif
 
 	softn->ipf_nat_defage = DEF_NAT_AGE;
 	softn->ipf_nat_defipage = IPF_TTLVAL(60);
 	softn->ipf_nat_deficmpage = IPF_TTLVAL(3);
 	softn->ipf_nat_table_wm_high = 99;
 	softn->ipf_nat_table_wm_low = 90;
 
 	return softn;
 }
 
 /* ------------------------------------------------------------------------ */
 /* Function:    ipf_nat_soft_destroy                                        */
 /* Returns:     Nil                                                         */
 /* Parameters:  softc(I) - pointer to soft context main structure           */
 /*                                                                          */
 /* ------------------------------------------------------------------------ */
 void
 ipf_nat_soft_destroy(softc, arg)
 	ipf_main_softc_t *softc;
 	void *arg;
 {
 	ipf_nat_softc_t *softn = arg;
 
 	if (softn->ipf_nat_tune != NULL) {
 		ipf_tune_array_unlink(softc, softn->ipf_nat_tune);
 		KFREES(softn->ipf_nat_tune, sizeof(ipf_nat_tuneables));
 		softn->ipf_nat_tune = NULL;
 	}
 
 	KFREE(softn);
 }
 
 
 /* ------------------------------------------------------------------------ */
 /* Function:    ipf_nat_init                                                */
 /* Returns:     int - 0 == success, -1 == failure                           */
 /* Parameters:  softc(I) - pointer to soft context main structure           */
 /*                                                                          */
 /* Initialise all of the NAT locks, tables and other structures.            */
 /* ------------------------------------------------------------------------ */
 int
 ipf_nat_soft_init(softc, arg)
 	ipf_main_softc_t *softc;
 	void *arg;
 {
 	ipf_nat_softc_t *softn = arg;
 	ipftq_t *tq;
 	int i;
 
 	KMALLOCS(softn->ipf_nat_table[0], nat_t **, \
 		 sizeof(nat_t *) * softn->ipf_nat_table_sz);
 
 	if (softn->ipf_nat_table[0] != NULL) {
 		bzero((char *)softn->ipf_nat_table[0],
 		      softn->ipf_nat_table_sz * sizeof(nat_t *));
 	} else {
 		return -1;
 	}
 
 	KMALLOCS(softn->ipf_nat_table[1], nat_t **, \
 		 sizeof(nat_t *) * softn->ipf_nat_table_sz);
 
 	if (softn->ipf_nat_table[1] != NULL) {
 		bzero((char *)softn->ipf_nat_table[1],
 		      softn->ipf_nat_table_sz * sizeof(nat_t *));
 	} else {
 		return -2;
 	}
 
 	KMALLOCS(softn->ipf_nat_map_rules, ipnat_t **, \
 		 sizeof(ipnat_t *) * softn->ipf_nat_maprules_sz);
 
 	if (softn->ipf_nat_map_rules != NULL) {
 		bzero((char *)softn->ipf_nat_map_rules,
 		      softn->ipf_nat_maprules_sz * sizeof(ipnat_t *));
 	} else {
 		return -3;
 	}
 
 	KMALLOCS(softn->ipf_nat_rdr_rules, ipnat_t **, \
 		 sizeof(ipnat_t *) * softn->ipf_nat_rdrrules_sz);
 
 	if (softn->ipf_nat_rdr_rules != NULL) {
 		bzero((char *)softn->ipf_nat_rdr_rules,
 		      softn->ipf_nat_rdrrules_sz * sizeof(ipnat_t *));
 	} else {
 		return -4;
 	}
 
 	KMALLOCS(softn->ipf_hm_maptable, hostmap_t **, \
 		 sizeof(hostmap_t *) * softn->ipf_nat_hostmap_sz);
 
 	if (softn->ipf_hm_maptable != NULL) {
 		bzero((char *)softn->ipf_hm_maptable,
 		      sizeof(hostmap_t *) * softn->ipf_nat_hostmap_sz);
 	} else {
 		return -5;
 	}
 	softn->ipf_hm_maplist = NULL;
 
 	KMALLOCS(softn->ipf_nat_stats.ns_side[0].ns_bucketlen, u_int *,
 		 softn->ipf_nat_table_sz * sizeof(u_int));
 
 	if (softn->ipf_nat_stats.ns_side[0].ns_bucketlen == NULL) {
 		return -6;
 	}
 	bzero((char *)softn->ipf_nat_stats.ns_side[0].ns_bucketlen,
 	      softn->ipf_nat_table_sz * sizeof(u_int));
 
 	KMALLOCS(softn->ipf_nat_stats.ns_side[1].ns_bucketlen, u_int *,
 		 softn->ipf_nat_table_sz * sizeof(u_int));
 
 	if (softn->ipf_nat_stats.ns_side[1].ns_bucketlen == NULL) {
 		return -7;
 	}
 
 	bzero((char *)softn->ipf_nat_stats.ns_side[1].ns_bucketlen,
 	      softn->ipf_nat_table_sz * sizeof(u_int));
 
 	if (softn->ipf_nat_maxbucket == 0) {
 		for (i = softn->ipf_nat_table_sz; i > 0; i >>= 1)
 			softn->ipf_nat_maxbucket++;
 		softn->ipf_nat_maxbucket *= 2;
 	}
 
 	ipf_sttab_init(softc, softn->ipf_nat_tcptq);
 	/*
 	 * Increase this because we may have "keep state" following this too
 	 * and packet storms can occur if this is removed too quickly.
 	 */
 	softn->ipf_nat_tcptq[IPF_TCPS_CLOSED].ifq_ttl = softc->ipf_tcplastack;
 	softn->ipf_nat_tcptq[IPF_TCP_NSTATES - 1].ifq_next =
 							&softn->ipf_nat_udptq;
 
 	IPFTQ_INIT(&softn->ipf_nat_udptq, softn->ipf_nat_defage,
 		   "nat ipftq udp tab");
 	softn->ipf_nat_udptq.ifq_next = &softn->ipf_nat_udpacktq;
 
 	IPFTQ_INIT(&softn->ipf_nat_udpacktq, softn->ipf_nat_defage,
 		   "nat ipftq udpack tab");
 	softn->ipf_nat_udpacktq.ifq_next = &softn->ipf_nat_icmptq;
 
 	IPFTQ_INIT(&softn->ipf_nat_icmptq, softn->ipf_nat_deficmpage,
 		   "nat icmp ipftq tab");
 	softn->ipf_nat_icmptq.ifq_next = &softn->ipf_nat_icmpacktq;
 
 	IPFTQ_INIT(&softn->ipf_nat_icmpacktq, softn->ipf_nat_defage,
 		   "nat icmpack ipftq tab");
 	softn->ipf_nat_icmpacktq.ifq_next = &softn->ipf_nat_iptq;
 
 	IPFTQ_INIT(&softn->ipf_nat_iptq, softn->ipf_nat_defipage,
 		   "nat ip ipftq tab");
 	softn->ipf_nat_iptq.ifq_next = &softn->ipf_nat_pending;
 
 	IPFTQ_INIT(&softn->ipf_nat_pending, 1, "nat pending ipftq tab");
 	softn->ipf_nat_pending.ifq_next = NULL;
 
 	for (i = 0, tq = softn->ipf_nat_tcptq; i < IPF_TCP_NSTATES; i++, tq++) {
 		if (tq->ifq_ttl < softn->ipf_nat_deficmpage)
 			tq->ifq_ttl = softn->ipf_nat_deficmpage;
 #ifdef LARGE_NAT
 		else if (tq->ifq_ttl > softn->ipf_nat_defage)
 			tq->ifq_ttl = softn->ipf_nat_defage;
 #endif
 	}
 
 	/*
 	 * Increase this because we may have "keep state" following
 	 * this too and packet storms can occur if this is removed
 	 * too quickly.
 	 */
 	softn->ipf_nat_tcptq[IPF_TCPS_CLOSED].ifq_ttl = softc->ipf_tcplastack;
 
 	MUTEX_INIT(&softn->ipf_nat_new, "ipf nat new mutex");
 	MUTEX_INIT(&softn->ipf_nat_io, "ipf nat io mutex");
 
 	softn->ipf_nat_inited = 1;
 
 	return 0;
 }
 
 
 /* ------------------------------------------------------------------------ */
 /* Function:    ipf_nat_soft_fini                                           */
 /* Returns:     Nil                                                         */
 /* Parameters:  softc(I) - pointer to soft context main structure           */
 /*                                                                          */
 /* Free all memory used by NAT structures allocated at runtime.             */
 /* ------------------------------------------------------------------------ */
 int
 ipf_nat_soft_fini(softc, arg)
 	ipf_main_softc_t *softc;
 	void *arg;
 {
 	ipf_nat_softc_t *softn = arg;
 	ipftq_t *ifq, *ifqnext;
 
 	(void) ipf_nat_clearlist(softc, softn);
 	(void) ipf_nat_flushtable(softc, softn);
 
 	/*
 	 * Proxy timeout queues are not cleaned here because although they
 	 * exist on the NAT list, ipf_proxy_unload is called after unload
 	 * and the proxies actually are responsible for them being created.
 	 * Should the proxy timeouts have their own list?  There's no real
 	 * justification as this is the only complication.
 	 */
 	for (ifq = softn->ipf_nat_utqe; ifq != NULL; ifq = ifqnext) {
 		ifqnext = ifq->ifq_next;
 		if (ipf_deletetimeoutqueue(ifq) == 0)
 			ipf_freetimeoutqueue(softc, ifq);
 	}
 
 	if (softn->ipf_nat_table[0] != NULL) {
 		KFREES(softn->ipf_nat_table[0],
 		       sizeof(nat_t *) * softn->ipf_nat_table_sz);
 		softn->ipf_nat_table[0] = NULL;
 	}
 	if (softn->ipf_nat_table[1] != NULL) {
 		KFREES(softn->ipf_nat_table[1],
 		       sizeof(nat_t *) * softn->ipf_nat_table_sz);
 		softn->ipf_nat_table[1] = NULL;
 	}
 	if (softn->ipf_nat_map_rules != NULL) {
 		KFREES(softn->ipf_nat_map_rules,
 		       sizeof(ipnat_t *) * softn->ipf_nat_maprules_sz);
 		softn->ipf_nat_map_rules = NULL;
 	}
 	if (softn->ipf_nat_rdr_rules != NULL) {
 		KFREES(softn->ipf_nat_rdr_rules,
 		       sizeof(ipnat_t *) * softn->ipf_nat_rdrrules_sz);
 		softn->ipf_nat_rdr_rules = NULL;
 	}
 	if (softn->ipf_hm_maptable != NULL) {
 		KFREES(softn->ipf_hm_maptable,
 		       sizeof(hostmap_t *) * softn->ipf_nat_hostmap_sz);
 		softn->ipf_hm_maptable = NULL;
 	}
 	if (softn->ipf_nat_stats.ns_side[0].ns_bucketlen != NULL) {
 		KFREES(softn->ipf_nat_stats.ns_side[0].ns_bucketlen,
 		       sizeof(u_int) * softn->ipf_nat_table_sz);
 		softn->ipf_nat_stats.ns_side[0].ns_bucketlen = NULL;
 	}
 	if (softn->ipf_nat_stats.ns_side[1].ns_bucketlen != NULL) {
 		KFREES(softn->ipf_nat_stats.ns_side[1].ns_bucketlen,
 		       sizeof(u_int) * softn->ipf_nat_table_sz);
 		softn->ipf_nat_stats.ns_side[1].ns_bucketlen = NULL;
 	}
 
 	if (softn->ipf_nat_inited == 1) {
 		softn->ipf_nat_inited = 0;
 		ipf_sttab_destroy(softn->ipf_nat_tcptq);
 
 		MUTEX_DESTROY(&softn->ipf_nat_new);
 		MUTEX_DESTROY(&softn->ipf_nat_io);
 
 		MUTEX_DESTROY(&softn->ipf_nat_udptq.ifq_lock);
 		MUTEX_DESTROY(&softn->ipf_nat_udpacktq.ifq_lock);
 		MUTEX_DESTROY(&softn->ipf_nat_icmptq.ifq_lock);
 		MUTEX_DESTROY(&softn->ipf_nat_icmpacktq.ifq_lock);
 		MUTEX_DESTROY(&softn->ipf_nat_iptq.ifq_lock);
 		MUTEX_DESTROY(&softn->ipf_nat_pending.ifq_lock);
 	}
 
 	return 0;
 }
 
 
 /* ------------------------------------------------------------------------ */
 /* Function:    ipf_nat_setlock                                             */
 /* Returns:     Nil                                                         */
 /* Parameters:  arg(I) - pointer to soft state information                  */
 /*              tmp(I) - new lock value                                     */
 /*                                                                          */
 /* Set the "lock status" of NAT to the value in tmp.                        */
 /* ------------------------------------------------------------------------ */
 void
 ipf_nat_setlock(arg, tmp)
 	void *arg;
 	int tmp;
 {
 	ipf_nat_softc_t *softn = arg;
 
 	softn->ipf_nat_lock = tmp;
 }
 
 
 /* ------------------------------------------------------------------------ */
 /* Function:    ipf_nat_addrdr                                              */
 /* Returns:     Nil                                                         */
 /* Parameters:  n(I) - pointer to NAT rule to add                           */
 /*                                                                          */
 /* Adds a redirect rule to the hash table of redirect rules and the list of */
 /* loaded NAT rules.  Updates the bitmask indicating which netmasks are in  */
 /* use by redirect rules.                                                   */
 /* ------------------------------------------------------------------------ */
 static void
 ipf_nat_addrdr(softn, n)
 	ipf_nat_softc_t *softn;
 	ipnat_t *n;
 {
 	ipnat_t **np;
 	u_32_t j;
 	u_int hv;
 	u_int rhv;
 	int k;
 
 	if (n->in_odstatype == FRI_NORMAL) {
 		k = count4bits(n->in_odstmsk);
 		ipf_inet_mask_add(k, &softn->ipf_nat_rdr_mask);
 		j = (n->in_odstaddr & n->in_odstmsk);
 		rhv = NAT_HASH_FN(j, 0, 0xffffffff);
 	} else {
 		ipf_inet_mask_add(0, &softn->ipf_nat_rdr_mask);
 		j = 0;
 		rhv = 0;
 	}
 	hv = rhv % softn->ipf_nat_rdrrules_sz;
 	np = softn->ipf_nat_rdr_rules + hv;
 	while (*np != NULL)
 		np = &(*np)->in_rnext;
 	n->in_rnext = NULL;
 	n->in_prnext = np;
 	n->in_hv[0] = hv;
 	n->in_use++;
 	*np = n;
 }
 
 
 /* ------------------------------------------------------------------------ */
 /* Function:    ipf_nat_addmap                                              */
 /* Returns:     Nil                                                         */
 /* Parameters:  n(I) - pointer to NAT rule to add                           */
 /*                                                                          */
 /* Adds a NAT map rule to the hash table of rules and the list of  loaded   */
 /* NAT rules.  Updates the bitmask indicating which netmasks are in use by  */
 /* redirect rules.                                                          */
 /* ------------------------------------------------------------------------ */
 static void
 ipf_nat_addmap(softn, n)
 	ipf_nat_softc_t *softn;
 	ipnat_t *n;
 {
 	ipnat_t **np;
 	u_32_t j;
 	u_int hv;
 	u_int rhv;
 	int k;
 
 	if (n->in_osrcatype == FRI_NORMAL) {
 		k = count4bits(n->in_osrcmsk);
 		ipf_inet_mask_add(k, &softn->ipf_nat_map_mask);
 		j = (n->in_osrcaddr & n->in_osrcmsk);
 		rhv = NAT_HASH_FN(j, 0, 0xffffffff);
 	} else {
 		ipf_inet_mask_add(0, &softn->ipf_nat_map_mask);
 		j = 0;
 		rhv = 0;
 	}
 	hv = rhv % softn->ipf_nat_maprules_sz;
 	np = softn->ipf_nat_map_rules + hv;
 	while (*np != NULL)
 		np = &(*np)->in_mnext;
 	n->in_mnext = NULL;
 	n->in_pmnext = np;
 	n->in_hv[1] = rhv;
 	n->in_use++;
 	*np = n;
 }
 
 
 /* ------------------------------------------------------------------------ */
 /* Function:    ipf_nat_delrdr                                              */
 /* Returns:     Nil                                                         */
 /* Parameters:  n(I) - pointer to NAT rule to delete                        */
 /*                                                                          */
 /* Removes a redirect rule from the hash table of redirect rules.           */
 /* ------------------------------------------------------------------------ */
 void
 ipf_nat_delrdr(softn, n)
 	ipf_nat_softc_t *softn;
 	ipnat_t *n;
 {
 	if (n->in_odstatype == FRI_NORMAL) {
 		int k = count4bits(n->in_odstmsk);
 		ipf_inet_mask_del(k, &softn->ipf_nat_rdr_mask);
 	} else {
 		ipf_inet_mask_del(0, &softn->ipf_nat_rdr_mask);
 	}
 	if (n->in_rnext)
 		n->in_rnext->in_prnext = n->in_prnext;
 	*n->in_prnext = n->in_rnext;
 	n->in_use--;
 }
 
 
 /* ------------------------------------------------------------------------ */
 /* Function:    ipf_nat_delmap                                              */
 /* Returns:     Nil                                                         */
 /* Parameters:  n(I) - pointer to NAT rule to delete                        */
 /*                                                                          */
 /* Removes a NAT map rule from the hash table of NAT map rules.             */
 /* ------------------------------------------------------------------------ */
 void
 ipf_nat_delmap(softn, n)
 	ipf_nat_softc_t *softn;
 	ipnat_t *n;
 {
 	if (n->in_osrcatype == FRI_NORMAL) {
 		int k = count4bits(n->in_osrcmsk);
 		ipf_inet_mask_del(k, &softn->ipf_nat_map_mask);
 	} else {
 		ipf_inet_mask_del(0, &softn->ipf_nat_map_mask);
 	}
 	if (n->in_mnext != NULL)
 		n->in_mnext->in_pmnext = n->in_pmnext;
 	*n->in_pmnext = n->in_mnext;
 	n->in_use--;
 }
 
 
 /* ------------------------------------------------------------------------ */
 /* Function:    ipf_nat_hostmap                                             */
 /* Returns:     struct hostmap* - NULL if no hostmap could be created,      */
 /*                                else a pointer to the hostmapping to use  */
 /* Parameters:  np(I)   - pointer to NAT rule                               */
 /*              real(I) - real IP address                                   */
 /*              map(I)  - mapped IP address                                 */
 /*              port(I) - destination port number                           */
 /* Write Locks: ipf_nat                                                     */
 /*                                                                          */
 /* Check if an ip address has already been allocated for a given mapping    */
 /* that is not doing port based translation.  If is not yet allocated, then */
 /* create a new entry if a non-NULL NAT rule pointer has been supplied.     */
 /* ------------------------------------------------------------------------ */
 static struct hostmap *
 ipf_nat_hostmap(softn, np, src, dst, map, port)
 	ipf_nat_softc_t *softn;
 	ipnat_t *np;
 	struct in_addr src;
 	struct in_addr dst;
 	struct in_addr map;
 	u_32_t port;
 {
 	hostmap_t *hm;
 	u_int hv, rhv;
 
 	hv = (src.s_addr ^ dst.s_addr);
 	hv += src.s_addr;
 	hv += dst.s_addr;
 	rhv = hv;
 	hv %= softn->ipf_nat_hostmap_sz;
 	for (hm = softn->ipf_hm_maptable[hv]; hm; hm = hm->hm_hnext)
 		if ((hm->hm_osrcip.s_addr == src.s_addr) &&
 		    (hm->hm_odstip.s_addr == dst.s_addr) &&
 		    ((np == NULL) || (np == hm->hm_ipnat)) &&
 		    ((port == 0) || (port == hm->hm_port))) {
 			softn->ipf_nat_stats.ns_hm_addref++;
 			hm->hm_ref++;
 			return hm;
 		}
 
 	if (np == NULL) {
 		softn->ipf_nat_stats.ns_hm_nullnp++;
 		return NULL;
 	}
 
 	KMALLOC(hm, hostmap_t *);
 	if (hm) {
 		hm->hm_next = softn->ipf_hm_maplist;
 		hm->hm_pnext = &softn->ipf_hm_maplist;
 		if (softn->ipf_hm_maplist != NULL)
 			softn->ipf_hm_maplist->hm_pnext = &hm->hm_next;
 		softn->ipf_hm_maplist = hm;
 		hm->hm_hnext = softn->ipf_hm_maptable[hv];
 		hm->hm_phnext = softn->ipf_hm_maptable + hv;
 		if (softn->ipf_hm_maptable[hv] != NULL)
 			softn->ipf_hm_maptable[hv]->hm_phnext = &hm->hm_hnext;
 		softn->ipf_hm_maptable[hv] = hm;
 		hm->hm_ipnat = np;
 		np->in_use++;
 		hm->hm_osrcip = src;
 		hm->hm_odstip = dst;
 		hm->hm_nsrcip = map;
 		hm->hm_ndstip.s_addr = 0;
 		hm->hm_ref = 1;
 		hm->hm_port = port;
 		hm->hm_hv = rhv;
 		hm->hm_v = 4;
 		softn->ipf_nat_stats.ns_hm_new++;
 	} else {
 		softn->ipf_nat_stats.ns_hm_newfail++;
 	}
 	return hm;
 }
 
 
 /* ------------------------------------------------------------------------ */
 /* Function:    ipf_nat_hostmapdel                                          */
 /* Returns:     Nil                                                         */
 /* Parameters:  hmp(I) - pointer to hostmap structure pointer               */
 /* Write Locks: ipf_nat                                                     */
 /*                                                                          */
 /* Decrement the references to this hostmap structure by one.  If this      */
 /* reaches zero then remove it and free it.                                 */
 /* ------------------------------------------------------------------------ */
 void
 ipf_nat_hostmapdel(softc, hmp)
 	ipf_main_softc_t *softc;
 	struct hostmap **hmp;
 {
 	struct hostmap *hm;
 
 	hm = *hmp;
 	*hmp = NULL;
 
 	hm->hm_ref--;
 	if (hm->hm_ref == 0) {
 		ipf_nat_rule_deref(softc, &hm->hm_ipnat);
 		if (hm->hm_hnext)
 			hm->hm_hnext->hm_phnext = hm->hm_phnext;
 		*hm->hm_phnext = hm->hm_hnext;
 		if (hm->hm_next)
 			hm->hm_next->hm_pnext = hm->hm_pnext;
 		*hm->hm_pnext = hm->hm_next;
 		KFREE(hm);
 	}
 }
 
 
 /* ------------------------------------------------------------------------ */
 /* Function:    ipf_fix_outcksum                                            */
 /* Returns:     Nil                                                         */
 /* Parameters:  fin(I) - pointer to packet information                      */
 /*              sp(I)  - location of 16bit checksum to update               */
 /*              n((I)  - amount to adjust checksum by                       */
 /*                                                                          */
 /* Adjusts the 16bit checksum by "n" for packets going out.                 */
 /* ------------------------------------------------------------------------ */
 void
 ipf_fix_outcksum(cksum, sp, n, partial)
 	int cksum;
 	u_short *sp;
 	u_32_t n, partial;
 {
 	u_short sumshort;
 	u_32_t sum1;
 
 	if (n == 0)
 		return;
 
 	if (cksum == 4) {
 		*sp = 0;
 		return;
 	}
 	if (cksum == 2) {
 		sum1 = partial;
 		sum1 = (sum1 & 0xffff) + (sum1 >> 16);
 		*sp = htons(sum1);
 		return;
 	}
 	sum1 = (~ntohs(*sp)) & 0xffff;
 	sum1 += (n);
 	sum1 = (sum1 >> 16) + (sum1 & 0xffff);
 	/* Again */
 	sum1 = (sum1 >> 16) + (sum1 & 0xffff);
 	sumshort = ~(u_short)sum1;
 	*(sp) = htons(sumshort);
 }
 
 
 /* ------------------------------------------------------------------------ */
 /* Function:    ipf_fix_incksum                                             */
 /* Returns:     Nil                                                         */
 /* Parameters:  fin(I) - pointer to packet information                      */
 /*              sp(I)  - location of 16bit checksum to update               */
 /*              n((I)  - amount to adjust checksum by                       */
 /*                                                                          */
 /* Adjusts the 16bit checksum by "n" for packets going in.                  */
 /* ------------------------------------------------------------------------ */
 void
 ipf_fix_incksum(cksum, sp, n, partial)
 	int cksum;
 	u_short *sp;
 	u_32_t n, partial;
 {
 	u_short sumshort;
 	u_32_t sum1;
 
 	if (n == 0)
 		return;
 
 	if (cksum == 4) {
 		*sp = 0;
 		return;
 	}
 	if (cksum == 2) {
 		sum1 = partial;
 		sum1 = (sum1 & 0xffff) + (sum1 >> 16);
 		*sp = htons(sum1);
 		return;
 	}
 
 	sum1 = (~ntohs(*sp)) & 0xffff;
 	sum1 += ~(n) & 0xffff;
 	sum1 = (sum1 >> 16) + (sum1 & 0xffff);
 	/* Again */
 	sum1 = (sum1 >> 16) + (sum1 & 0xffff);
 	sumshort = ~(u_short)sum1;
 	*(sp) = htons(sumshort);
 }
 
 
 /* ------------------------------------------------------------------------ */
 /* Function:    ipf_fix_datacksum                                           */
 /* Returns:     Nil                                                         */
 /* Parameters:  sp(I)  - location of 16bit checksum to update               */
 /*              n((I)  - amount to adjust checksum by                       */
 /*                                                                          */
 /* Fix_datacksum is used *only* for the adjustments of checksums in the     */
 /* data section of an IP packet.                                            */
 /*                                                                          */
 /* The only situation in which you need to do this is when NAT'ing an       */
 /* ICMP error message. Such a message, contains in its body the IP header   */
 /* of the original IP packet, that causes the error.                        */
 /*                                                                          */
 /* You can't use fix_incksum or fix_outcksum in that case, because for the  */
 /* kernel the data section of the ICMP error is just data, and no special   */
 /* processing like hardware cksum or ntohs processing have been done by the */
 /* kernel on the data section.                                              */
 /* ------------------------------------------------------------------------ */
 void
 ipf_fix_datacksum(sp, n)
 	u_short *sp;
 	u_32_t n;
 {
 	u_short sumshort;
 	u_32_t sum1;
 
 	if (n == 0)
 		return;
 
 	sum1 = (~ntohs(*sp)) & 0xffff;
 	sum1 += (n);
 	sum1 = (sum1 >> 16) + (sum1 & 0xffff);
 	/* Again */
 	sum1 = (sum1 >> 16) + (sum1 & 0xffff);
 	sumshort = ~(u_short)sum1;
 	*(sp) = htons(sumshort);
 }
 
 
 /* ------------------------------------------------------------------------ */
 /* Function:    ipf_nat_ioctl                                               */
 /* Returns:     int - 0 == success, != 0 == failure                         */
 /* Parameters:  softc(I) - pointer to soft context main structure           */
 /*              data(I)  - pointer to ioctl data                            */
 /*              cmd(I)   - ioctl command integer                            */
 /*              mode(I)  - file mode bits used with open                    */
 /*              uid(I)   - uid of calling process                           */
 /*              ctx(I)   - pointer used as key for finding context          */
 /*                                                                          */
 /* Processes an ioctl call made to operate on the IP Filter NAT device.     */
 /* ------------------------------------------------------------------------ */
 int
 ipf_nat_ioctl(softc, data, cmd, mode, uid, ctx)
 	ipf_main_softc_t *softc;
 	ioctlcmd_t cmd;
 	caddr_t data;
 	int mode, uid;
 	void *ctx;
 {
 	ipf_nat_softc_t *softn = softc->ipf_nat_soft;
 	int error = 0, ret, arg, getlock;
 	ipnat_t *nat, *nt, *n;
 	ipnat_t natd;
 	SPL_INT(s);
 
 #if BSD_GE_YEAR(199306) && defined(_KERNEL)
 # if NETBSD_GE_REV(399002000)
 	if ((mode & FWRITE) &&
 	     kauth_authorize_network(curlwp->l_cred, KAUTH_NETWORK_FIREWALL,
 				     KAUTH_REQ_NETWORK_FIREWALL_FW,
 				     NULL, NULL, NULL))
 # else
 #  if defined(__FreeBSD_version) && (__FreeBSD_version >= 500034)
 	if (securelevel_ge(curthread->td_ucred, 3) && (mode & FWRITE))
 #  else
 	if ((securelevel >= 3) && (mode & FWRITE))
 #  endif
 # endif
 	{
 		IPFERROR(60001);
 		return EPERM;
 	}
 #endif
 
 #if defined(__osf__) && defined(_KERNEL)
 	getlock = 0;
 #else
 	getlock = (mode & NAT_LOCKHELD) ? 0 : 1;
 #endif
 
 	n = NULL;
 	nt = NULL;
 	nat = NULL;
 
 	if ((cmd == (ioctlcmd_t)SIOCADNAT) || (cmd == (ioctlcmd_t)SIOCRMNAT) ||
 	    (cmd == (ioctlcmd_t)SIOCPURGENAT)) {
 		if (mode & NAT_SYSSPACE) {
 			bcopy(data, (char *)&natd, sizeof(natd));
 			nat = &natd;
 			error = 0;
 		} else {
 			bzero(&natd, sizeof(natd));
 			error = ipf_inobj(softc, data, NULL, &natd,
 					  IPFOBJ_IPNAT);
 			if (error != 0)
 				goto done;
 
 			if (natd.in_size < sizeof(ipnat_t)) {
 				error = EINVAL;
 				goto done;
 			}
 			KMALLOCS(nt, ipnat_t *, natd.in_size);
 			if (nt == NULL) {
 				IPFERROR(60070);
 				error = ENOMEM;
 				goto done;
 			}
 			bzero(nt, natd.in_size);
 			error = ipf_inobjsz(softc, data, nt, IPFOBJ_IPNAT,
 					    natd.in_size);
 			if (error)
 				goto done;
 			nat = nt;
 		}
 
 		/*
 		 * For add/delete, look to see if the NAT entry is
 		 * already present
 		 */
 		nat->in_flags &= IPN_USERFLAGS;
 		if ((nat->in_redir & NAT_MAPBLK) == 0) {
 			if (nat->in_osrcatype == FRI_NORMAL ||
 			    nat->in_osrcatype == FRI_NONE)
 				nat->in_osrcaddr &= nat->in_osrcmsk;
 			if (nat->in_odstatype == FRI_NORMAL ||
 			    nat->in_odstatype == FRI_NONE)
 				nat->in_odstaddr &= nat->in_odstmsk;
 			if ((nat->in_flags & (IPN_SPLIT|IPN_SIPRANGE)) == 0) {
 				if (nat->in_nsrcatype == FRI_NORMAL)
 					nat->in_nsrcaddr &= nat->in_nsrcmsk;
 				if (nat->in_ndstatype == FRI_NORMAL)
 					nat->in_ndstaddr &= nat->in_ndstmsk;
 			}
 		}
 
 		error = ipf_nat_rule_init(softc, softn, nat);
 		if (error != 0)
 			goto done;
 
 		MUTEX_ENTER(&softn->ipf_nat_io);
 		for (n = softn->ipf_nat_list; n != NULL; n = n->in_next)
 			if (ipf_nat_cmp_rules(nat, n) == 0)
 				break;
 	}
 
 	switch (cmd)
 	{
 #ifdef  IPFILTER_LOG
 	case SIOCIPFFB :
 	{
 		int tmp;
 
 		if (!(mode & FWRITE)) {
 			IPFERROR(60002);
 			error = EPERM;
 		} else {
 			tmp = ipf_log_clear(softc, IPL_LOGNAT);
 			error = BCOPYOUT(&tmp, data, sizeof(tmp));
 			if (error != 0) {
 				IPFERROR(60057);
 				error = EFAULT;
 			}
 		}
 		break;
 	}
 
 	case SIOCSETLG :
 		if (!(mode & FWRITE)) {
 			IPFERROR(60003);
 			error = EPERM;
 		} else {
 			error = BCOPYIN(data, &softn->ipf_nat_logging,
 					sizeof(softn->ipf_nat_logging));
 			if (error != 0)
 				error = EFAULT;
 		}
 		break;
 
 	case SIOCGETLG :
 		error = BCOPYOUT(&softn->ipf_nat_logging, data,
 				 sizeof(softn->ipf_nat_logging));
 		if (error != 0) {
 			IPFERROR(60004);
 			error = EFAULT;
 		}
 		break;
 
 	case FIONREAD :
 		arg = ipf_log_bytesused(softc, IPL_LOGNAT);
 		error = BCOPYOUT(&arg, data, sizeof(arg));
 		if (error != 0) {
 			IPFERROR(60005);
 			error = EFAULT;
 		}
 		break;
 #endif
 	case SIOCADNAT :
 		if (!(mode & FWRITE)) {
 			IPFERROR(60006);
 			error = EPERM;
 		} else if (n != NULL) {
 			natd.in_flineno = n->in_flineno;
 			(void) ipf_outobj(softc, data, &natd, IPFOBJ_IPNAT);
 			IPFERROR(60007);
 			error = EEXIST;
 		} else if (nt == NULL) {
 			IPFERROR(60008);
 			error = ENOMEM;
 		}
 		if (error != 0) {
 			MUTEX_EXIT(&softn->ipf_nat_io);
 			break;
 		}
 		if (nat != nt)
 			bcopy((char *)nat, (char *)nt, sizeof(*n));
 		error = ipf_nat_siocaddnat(softc, softn, nt, getlock);
 		MUTEX_EXIT(&softn->ipf_nat_io);
 		if (error == 0) {
 			nat = NULL;
 			nt = NULL;
 		}
 		break;
 
 	case SIOCRMNAT :
 	case SIOCPURGENAT :
 		if (!(mode & FWRITE)) {
 			IPFERROR(60009);
 			error = EPERM;
 			n = NULL;
 		} else if (n == NULL) {
 			IPFERROR(60010);
 			error = ESRCH;
 		}
 
 		if (error != 0) {
 			MUTEX_EXIT(&softn->ipf_nat_io);
 			break;
 		}
 		if (cmd == (ioctlcmd_t)SIOCPURGENAT) {
 			error = ipf_outobjsz(softc, data, n, IPFOBJ_IPNAT,
 					     n->in_size);
 			if (error) {
 				MUTEX_EXIT(&softn->ipf_nat_io);
 				goto done;
 			}
 			n->in_flags |= IPN_PURGE;
 		}
 		ipf_nat_siocdelnat(softc, softn, n, getlock);
 
 		MUTEX_EXIT(&softn->ipf_nat_io);
 		n = NULL;
 		break;
 
 	case SIOCGNATS :
 	    {
 		natstat_t *nsp = &softn->ipf_nat_stats;
 
 		nsp->ns_side[0].ns_table = softn->ipf_nat_table[0];
 		nsp->ns_side[1].ns_table = softn->ipf_nat_table[1];
 		nsp->ns_list = softn->ipf_nat_list;
 		nsp->ns_maptable = softn->ipf_hm_maptable;
 		nsp->ns_maplist = softn->ipf_hm_maplist;
 		nsp->ns_nattab_sz = softn->ipf_nat_table_sz;
 		nsp->ns_nattab_max = softn->ipf_nat_table_max;
 		nsp->ns_rultab_sz = softn->ipf_nat_maprules_sz;
 		nsp->ns_rdrtab_sz = softn->ipf_nat_rdrrules_sz;
 		nsp->ns_hostmap_sz = softn->ipf_nat_hostmap_sz;
 		nsp->ns_instances = softn->ipf_nat_instances;
 		nsp->ns_ticks = softc->ipf_ticks;
 #ifdef IPFILTER_LOGGING
 		nsp->ns_log_ok = ipf_log_logok(softc, IPF_LOGNAT);
 		nsp->ns_log_fail = ipf_log_failures(softc, IPF_LOGNAT);
 #else
 		nsp->ns_log_ok = 0;
 		nsp->ns_log_fail = 0;
 #endif
 		error = ipf_outobj(softc, data, nsp, IPFOBJ_NATSTAT);
 		break;
 	    }
 
 	case SIOCGNATL :
 	    {
 		natlookup_t nl;
 
 		error = ipf_inobj(softc, data, NULL, &nl, IPFOBJ_NATLOOKUP);
 		if (error == 0) {
 			void *ptr;
 
 			if (getlock) {
 				READ_ENTER(&softc->ipf_nat);
 			}
 
 			switch (nl.nl_v)
 			{
 			case 4 :
 				ptr = ipf_nat_lookupredir(&nl);
 				break;
 #ifdef USE_INET6
 			case 6 :
 				ptr = ipf_nat6_lookupredir(&nl);
 				break;
 #endif
 			default:
 				ptr = NULL;
 				break;
 			}
 
 			if (getlock) {
 				RWLOCK_EXIT(&softc->ipf_nat);
 			}
 			if (ptr != NULL) {
 				error = ipf_outobj(softc, data, &nl,
 						   IPFOBJ_NATLOOKUP);
 			} else {
 				IPFERROR(60011);
 				error = ESRCH;
 			}
 		}
 		break;
 	    }
 
 	case SIOCIPFFL :	/* old SIOCFLNAT & SIOCCNATL */
 		if (!(mode & FWRITE)) {
 			IPFERROR(60012);
 			error = EPERM;
 			break;
 		}
 		if (getlock) {
 			WRITE_ENTER(&softc->ipf_nat);
 		}
 
 		error = BCOPYIN(data, &arg, sizeof(arg));
 		if (error != 0) {
 			IPFERROR(60013);
 			error = EFAULT;
 		} else {
 			if (arg == 0)
 				ret = ipf_nat_flushtable(softc, softn);
 			else if (arg == 1)
 				ret = ipf_nat_clearlist(softc, softn);
 			else
 				ret = ipf_nat_extraflush(softc, softn, arg);
 			ipf_proxy_flush(softc->ipf_proxy_soft, arg);
 		}
 
 		if (getlock) {
 			RWLOCK_EXIT(&softc->ipf_nat);
 		}
 		if (error == 0) {
 			error = BCOPYOUT(&ret, data, sizeof(ret));
 		}
 		break;
 
 	case SIOCMATCHFLUSH :
 		if (!(mode & FWRITE)) {
 			IPFERROR(60014);
 			error = EPERM;
 			break;
 		}
 		if (getlock) {
 			WRITE_ENTER(&softc->ipf_nat);
 		}
 
 		error = ipf_nat_matchflush(softc, softn, data);
 
 		if (getlock) {
 			RWLOCK_EXIT(&softc->ipf_nat);
 		}
 		break;
 
 	case SIOCPROXY :
 		error = ipf_proxy_ioctl(softc, data, cmd, mode, ctx);
 		break;
 
 	case SIOCSTLCK :
 		if (!(mode & FWRITE)) {
 			IPFERROR(60015);
 			error = EPERM;
 		} else {
 			error = ipf_lock(data, &softn->ipf_nat_lock);
 		}
 		break;
 
 	case SIOCSTPUT :
 		if ((mode & FWRITE) != 0) {
 			error = ipf_nat_putent(softc, data, getlock);
 		} else {
 			IPFERROR(60016);
 			error = EACCES;
 		}
 		break;
 
 	case SIOCSTGSZ :
 		if (softn->ipf_nat_lock) {
 			error = ipf_nat_getsz(softc, data, getlock);
 		} else {
 			IPFERROR(60017);
 			error = EACCES;
 		}
 		break;
 
 	case SIOCSTGET :
 		if (softn->ipf_nat_lock) {
 			error = ipf_nat_getent(softc, data, getlock);
 		} else {
 			IPFERROR(60018);
 			error = EACCES;
 		}
 		break;
 
 	case SIOCGENITER :
 	    {
 		ipfgeniter_t iter;
 		ipftoken_t *token;
 		ipfobj_t obj;
 
 		error = ipf_inobj(softc, data, &obj, &iter, IPFOBJ_GENITER);
 		if (error != 0)
 			break;
 
 		SPL_SCHED(s);
 		token = ipf_token_find(softc, iter.igi_type, uid, ctx);
 		if (token != NULL) {
 			error  = ipf_nat_iterator(softc, token, &iter, &obj);
 			WRITE_ENTER(&softc->ipf_tokens);
 			ipf_token_deref(softc, token);
 			RWLOCK_EXIT(&softc->ipf_tokens);
 		}
 		SPL_X(s);
 		break;
 	    }
 
 	case SIOCIPFDELTOK :
 		error = BCOPYIN(data, &arg, sizeof(arg));
 		if (error == 0) {
 			SPL_SCHED(s);
 			error = ipf_token_del(softc, arg, uid, ctx);
 			SPL_X(s);
 		} else {
 			IPFERROR(60019);
 			error = EFAULT;
 		}
 		break;
 
 	case SIOCGTQTAB :
 		error = ipf_outobj(softc, data, softn->ipf_nat_tcptq,
 				   IPFOBJ_STATETQTAB);
 		break;
 
 	case SIOCGTABL :
 		error = ipf_nat_gettable(softc, softn, data);
 		break;
 
 	default :
 		IPFERROR(60020);
 		error = EINVAL;
 		break;
 	}
 done:
 	if (nat != NULL)
 		ipf_nat_rule_fini(softc, nat);
 	if (nt != NULL)
 		KFREES(nt, nt->in_size);
 	return error;
 }
 
 
 /* ------------------------------------------------------------------------ */
 /* Function:    ipf_nat_siocaddnat                                          */
 /* Returns:     int - 0 == success, != 0 == failure                         */
 /* Parameters:  softc(I) - pointer to soft context main structure           */
 /*              softn(I) - pointer to NAT context structure                 */
 /*              n(I)       - pointer to new NAT rule                        */
 /*              np(I)      - pointer to where to insert new NAT rule        */
 /*              getlock(I) - flag indicating if lock on  is held            */
 /* Mutex Locks: ipf_nat_io                                                   */
 /*                                                                          */
 /* Handle SIOCADNAT.  Resolve and calculate details inside the NAT rule     */
 /* from information passed to the kernel, then add it  to the appropriate   */
 /* NAT rule table(s).                                                       */
 /* ------------------------------------------------------------------------ */
 static int
 ipf_nat_siocaddnat(softc, softn, n, getlock)
 	ipf_main_softc_t *softc;
 	ipf_nat_softc_t *softn;
 	ipnat_t *n;
 	int getlock;
 {
 	int error = 0;
 
 	if (ipf_nat_resolverule(softc, n) != 0) {
 		IPFERROR(60022);
 		return ENOENT;
 	}
 
 	if ((n->in_age[0] == 0) && (n->in_age[1] != 0)) {
 		IPFERROR(60023);
 		return EINVAL;
 	}
 
 	if (n->in_redir == (NAT_DIVERTUDP|NAT_MAP)) {
 		/*
 		 * Prerecord whether or not the destination of the divert
 		 * is local or not to the interface the packet is going
 		 * to be sent out.
 		 */
 		n->in_dlocal = ipf_deliverlocal(softc, n->in_v[1],
 						n->in_ifps[1], &n->in_ndstip6);
 	}
 
 	if (getlock) {
 		WRITE_ENTER(&softc->ipf_nat);
 	}
 	n->in_next = NULL;
 	n->in_pnext = softn->ipf_nat_list_tail;
 	*n->in_pnext = n;
 	softn->ipf_nat_list_tail = &n->in_next;
 	n->in_use++;
 
 	if (n->in_redir & NAT_REDIRECT) {
 		n->in_flags &= ~IPN_NOTDST;
 		switch (n->in_v[0])
 		{
 		case 4 :
 			ipf_nat_addrdr(softn, n);
 			break;
 #ifdef USE_INET6
 		case 6 :
 			ipf_nat6_addrdr(softn, n);
 			break;
 #endif
 		default :
 			break;
 		}
 		ATOMIC_INC32(softn->ipf_nat_stats.ns_rules_rdr);
 	}
 
 	if (n->in_redir & (NAT_MAP|NAT_MAPBLK)) {
 		n->in_flags &= ~IPN_NOTSRC;
 		switch (n->in_v[0])
 		{
 		case 4 :
 			ipf_nat_addmap(softn, n);
 			break;
 #ifdef USE_INET6
 		case 6 :
 			ipf_nat6_addmap(softn, n);
 			break;
 #endif
 		default :
 			break;
 		}
 		ATOMIC_INC32(softn->ipf_nat_stats.ns_rules_map);
 	}
 
 	if (n->in_age[0] != 0)
 		n->in_tqehead[0] = ipf_addtimeoutqueue(softc,
 						       &softn->ipf_nat_utqe,
 						       n->in_age[0]);
 
 	if (n->in_age[1] != 0)
 		n->in_tqehead[1] = ipf_addtimeoutqueue(softc,
 						       &softn->ipf_nat_utqe,
 						       n->in_age[1]);
 
 	MUTEX_INIT(&n->in_lock, "ipnat rule lock");
 
 	n = NULL;
 	ATOMIC_INC32(softn->ipf_nat_stats.ns_rules);
 #if SOLARIS && !defined(INSTANCES)
 	pfil_delayed_copy = 0;
 #endif
 	if (getlock) {
 		RWLOCK_EXIT(&softc->ipf_nat);			/* WRITE */
 	}
 
 	return error;
 }
 
 
 /* ------------------------------------------------------------------------ */
 /* Function:    ipf_nat_ruleaddrinit                                        */
 /* Parameters:  softc(I) - pointer to soft context main structure           */
 /*              softn(I) - pointer to NAT context structure                 */
 /*              n(I)     - pointer to NAT rule                              */
 /*                                                                          */
 /* Initialise all of the NAT address structures in a NAT rule.              */
 /* ------------------------------------------------------------------------ */
 static int
 ipf_nat_ruleaddrinit(softc, softn, n)
 	ipf_main_softc_t *softc;
 	ipf_nat_softc_t *softn;
 	ipnat_t *n;
 {
 	int idx, error;
 
 	if ((n->in_ndst.na_atype == FRI_LOOKUP) &&
 	    (n->in_ndst.na_type != IPLT_DSTLIST)) {
 		IPFERROR(60071);
 		return EINVAL;
 	}
 	if ((n->in_nsrc.na_atype == FRI_LOOKUP) &&
 	    (n->in_nsrc.na_type != IPLT_DSTLIST)) {
 		IPFERROR(60069);
 		return EINVAL;
 	}
 
 	if (n->in_redir == NAT_BIMAP) {
 		n->in_ndstaddr = n->in_osrcaddr;
 		n->in_ndstmsk = n->in_osrcmsk;
 		n->in_odstaddr = n->in_nsrcaddr;
 		n->in_odstmsk = n->in_nsrcmsk;
 
 	}
 
 	if (n->in_redir & NAT_REDIRECT)
 		idx = 1;
 	else
 		idx = 0;
 	/*
 	 * Initialise all of the address fields.
 	 */
 	error = ipf_nat_nextaddrinit(softc, n->in_names, &n->in_osrc, 1,
 				     n->in_ifps[idx]);
 	if (error != 0)
 		return error;
 
 	error = ipf_nat_nextaddrinit(softc, n->in_names, &n->in_odst, 1,
 				     n->in_ifps[idx]);
 	if (error != 0)
 		return error;
 
 	error = ipf_nat_nextaddrinit(softc, n->in_names, &n->in_nsrc, 1,
 				     n->in_ifps[idx]);
 	if (error != 0)
 		return error;
 
 	error = ipf_nat_nextaddrinit(softc, n->in_names, &n->in_ndst, 1,
 				     n->in_ifps[idx]);
 	if (error != 0)
 		return error;
 
 	if (n->in_redir & NAT_DIVERTUDP)
 		ipf_nat_builddivertmp(softn, n);
 
 	return 0;
 }
 
 
 /* ------------------------------------------------------------------------ */
 /* Function:    ipf_nat_resolvrule                                          */
 /* Returns:     Nil                                                         */
 /* Parameters:  softc(I) - pointer to soft context main structure           */
 /*              n(I)     - pointer to NAT rule                              */
 /*                                                                          */
 /* Handle SIOCADNAT.  Resolve and calculate details inside the NAT rule     */
 /* from information passed to the kernel, then add it  to the appropriate   */
 /* NAT rule table(s).                                                       */
 /* ------------------------------------------------------------------------ */
 static int
 ipf_nat_resolverule(softc, n)
 	ipf_main_softc_t *softc;
 	ipnat_t *n;
 {
 	char *base;
 
 	base = n->in_names;
 
 	n->in_ifps[0] = ipf_resolvenic(softc, base + n->in_ifnames[0],
 				       n->in_v[0]);
 
 	if (n->in_ifnames[1] == -1) {
 		n->in_ifnames[1] = n->in_ifnames[0];
 		n->in_ifps[1] = n->in_ifps[0];
 	} else {
 		n->in_ifps[1] = ipf_resolvenic(softc, base + n->in_ifnames[1],
 					       n->in_v[1]);
 	}
 
 	if (n->in_plabel != -1) {
 		if (n->in_redir & NAT_REDIRECT)
 			n->in_apr = ipf_proxy_lookup(softc->ipf_proxy_soft,
 						     n->in_pr[0],
 						     base + n->in_plabel);
 		else
 			n->in_apr = ipf_proxy_lookup(softc->ipf_proxy_soft,
 						     n->in_pr[1],
 						     base + n->in_plabel);
 		if (n->in_apr == NULL)
 			return -1;
 	}
 	return 0;
 }
 
 
 /* ------------------------------------------------------------------------ */
 /* Function:    ipf_nat_siocdelnat                                          */
 /* Returns:     int - 0 == success, != 0 == failure                         */
 /* Parameters:  softc(I)   - pointer to soft context main structure         */
 /*              softn(I)   - pointer to NAT context structure               */
 /*              n(I)       - pointer to new NAT rule                        */
 /*              getlock(I) - flag indicating if lock on  is held            */
 /* Mutex Locks: ipf_nat_io                                                  */
 /*                                                                          */
 /* Handle SIOCADNAT.  Resolve and calculate details inside the NAT rule     */
 /* from information passed to the kernel, then add it  to the appropriate   */
 /* NAT rule table(s).                                                       */
 /* ------------------------------------------------------------------------ */
 static void
 ipf_nat_siocdelnat(softc, softn, n, getlock)
 	ipf_main_softc_t *softc;
 	ipf_nat_softc_t *softn;
 	ipnat_t *n;
 	int getlock;
 {
 #ifdef IPF_NAT6
 	int i;
 #endif
 
 	if (getlock) {
 		WRITE_ENTER(&softc->ipf_nat);
 	}
 
 	ipf_nat_delrule(softc, softn, n, 1);
 
 	if (getlock) {
 		RWLOCK_EXIT(&softc->ipf_nat);			/* READ/WRITE */
 	}
 }
 
 
 /* ------------------------------------------------------------------------ */
 /* Function:    ipf_nat_getsz                                               */
 /* Returns:     int - 0 == success, != 0 is the error value.                */
 /* Parameters:  softc(I)   - pointer to soft context main structure         */
 /*              data(I)    - pointer to natget structure with kernel        */
 /*                           pointer get the size of.                       */
 /*              getlock(I) - flag indicating whether or not the caller      */
 /*                           holds a lock on ipf_nat                        */
 /*                                                                          */
 /* Handle SIOCSTGSZ.                                                        */
 /* Return the size of the nat list entry to be copied back to user space.   */
 /* The size of the entry is stored in the ng_sz field and the enture natget */
 /* structure is copied back to the user.                                    */
 /* ------------------------------------------------------------------------ */
 static int
 ipf_nat_getsz(softc, data, getlock)
 	ipf_main_softc_t *softc;
 	caddr_t data;
 	int getlock;
 {
 	ipf_nat_softc_t *softn = softc->ipf_nat_soft;
 	ap_session_t *aps;
 	nat_t *nat, *n;
 	natget_t ng;
 	int error;
 
 	error = BCOPYIN(data, &ng, sizeof(ng));
 	if (error != 0) {
 		IPFERROR(60024);
 		return EFAULT;
 	}
 
 	if (getlock) {
 		READ_ENTER(&softc->ipf_nat);
 	}
 
 	nat = ng.ng_ptr;
 	if (!nat) {
 		nat = softn->ipf_nat_instances;
 		ng.ng_sz = 0;
 		/*
 		 * Empty list so the size returned is 0.  Simple.
 		 */
 		if (nat == NULL) {
 			if (getlock) {
 				RWLOCK_EXIT(&softc->ipf_nat);
 			}
 			error = BCOPYOUT(&ng, data, sizeof(ng));
 			if (error != 0) {
 				IPFERROR(60025);
 				return EFAULT;
 			}
 			return 0;
 		}
 	} else {
 		/*
 		 * Make sure the pointer we're copying from exists in the
 		 * current list of entries.  Security precaution to prevent
 		 * copying of random kernel data.
 		 */
 		for (n = softn->ipf_nat_instances; n; n = n->nat_next)
 			if (n == nat)
 				break;
 		if (n == NULL) {
 			if (getlock) {
 				RWLOCK_EXIT(&softc->ipf_nat);
 			}
 			IPFERROR(60026);
 			return ESRCH;
 		}
 	}
 
 	/*
 	 * Incluse any space required for proxy data structures.
 	 */
 	ng.ng_sz = sizeof(nat_save_t);
 	aps = nat->nat_aps;
 	if (aps != NULL) {
 		ng.ng_sz += sizeof(ap_session_t) - 4;
 		if (aps->aps_data != 0)
 			ng.ng_sz += aps->aps_psiz;
 	}
 	if (getlock) {
 		RWLOCK_EXIT(&softc->ipf_nat);
 	}
 
 	error = BCOPYOUT(&ng, data, sizeof(ng));
 	if (error != 0) {
 		IPFERROR(60027);
 		return EFAULT;
 	}
 	return 0;
 }
 
 
 /* ------------------------------------------------------------------------ */
 /* Function:    ipf_nat_getent                                              */
 /* Returns:     int - 0 == success, != 0 is the error value.                */
 /* Parameters:  softc(I)   - pointer to soft context main structure         */
 /*              data(I)    - pointer to natget structure with kernel pointer*/
 /*                           to NAT structure to copy out.                  */
 /*              getlock(I) - flag indicating whether or not the caller      */
 /*                           holds a lock on ipf_nat                        */
 /*                                                                          */
 /* Handle SIOCSTGET.                                                        */
 /* Copies out NAT entry to user space.  Any additional data held for a      */
 /* proxy is also copied, as to is the NAT rule which was responsible for it */
 /* ------------------------------------------------------------------------ */
 static int
 ipf_nat_getent(softc, data, getlock)
 	ipf_main_softc_t *softc;
 	caddr_t data;
 	int getlock;
 {
 	ipf_nat_softc_t *softn = softc->ipf_nat_soft;
 	int error, outsize;
 	ap_session_t *aps;
 	nat_save_t *ipn, ipns;
 	nat_t *n, *nat;
 
 	error = ipf_inobj(softc, data, NULL, &ipns, IPFOBJ_NATSAVE);
 	if (error != 0)
 		return error;
 
 	if ((ipns.ipn_dsize < sizeof(ipns)) || (ipns.ipn_dsize > 81920)) {
 		IPFERROR(60028);
 		return EINVAL;
 	}
 
 	KMALLOCS(ipn, nat_save_t *, ipns.ipn_dsize);
 	if (ipn == NULL) {
 		IPFERROR(60029);
 		return ENOMEM;
 	}
 
 	if (getlock) {
 		READ_ENTER(&softc->ipf_nat);
 	}
 
 	ipn->ipn_dsize = ipns.ipn_dsize;
 	nat = ipns.ipn_next;
 	if (nat == NULL) {
 		nat = softn->ipf_nat_instances;
 		if (nat == NULL) {
 			if (softn->ipf_nat_instances == NULL) {
 				IPFERROR(60030);
 				error = ENOENT;
 			}
 			goto finished;
 		}
 	} else {
 		/*
 		 * Make sure the pointer we're copying from exists in the
 		 * current list of entries.  Security precaution to prevent
 		 * copying of random kernel data.
 		 */
 		for (n = softn->ipf_nat_instances; n; n = n->nat_next)
 			if (n == nat)
 				break;
 		if (n == NULL) {
 			IPFERROR(60031);
 			error = ESRCH;
 			goto finished;
 		}
 	}
 	ipn->ipn_next = nat->nat_next;
 
 	/*
 	 * Copy the NAT structure.
 	 */
 	bcopy((char *)nat, &ipn->ipn_nat, sizeof(*nat));
 
 	/*
 	 * If we have a pointer to the NAT rule it belongs to, save that too.
 	 */
 	if (nat->nat_ptr != NULL)
 		bcopy((char *)nat->nat_ptr, (char *)&ipn->ipn_ipnat,
 		      ipn->ipn_ipnat.in_size);
 
 	/*
 	 * If we also know the NAT entry has an associated filter rule,
 	 * save that too.
 	 */
 	if (nat->nat_fr != NULL)
 		bcopy((char *)nat->nat_fr, (char *)&ipn->ipn_fr,
 		      sizeof(ipn->ipn_fr));
 
 	/*
 	 * Last but not least, if there is an application proxy session set
 	 * up for this NAT entry, then copy that out too, including any
 	 * private data saved along side it by the proxy.
 	 */
 	aps = nat->nat_aps;
 	outsize = ipn->ipn_dsize - sizeof(*ipn) + sizeof(ipn->ipn_data);
 	if (aps != NULL) {
 		char *s;
 
 		if (outsize < sizeof(*aps)) {
 			IPFERROR(60032);
 			error = ENOBUFS;
 			goto finished;
 		}
 
 		s = ipn->ipn_data;
 		bcopy((char *)aps, s, sizeof(*aps));
 		s += sizeof(*aps);
 		outsize -= sizeof(*aps);
 		if ((aps->aps_data != NULL) && (outsize >= aps->aps_psiz))
 			bcopy(aps->aps_data, s, aps->aps_psiz);
 		else {
 			IPFERROR(60033);
 			error = ENOBUFS;
 		}
 	}
 	if (error == 0) {
 		if (getlock) {
 			READ_ENTER(&softc->ipf_nat);
 			getlock = 0;
 		}
 		error = ipf_outobjsz(softc, data, ipn, IPFOBJ_NATSAVE,
 				     ipns.ipn_dsize);
 	}
 
 finished:
 	if (getlock) {
 		READ_ENTER(&softc->ipf_nat);
 	}
 	if (ipn != NULL) {
 		KFREES(ipn, ipns.ipn_dsize);
 	}
 	return error;
 }
 
 
 /* ------------------------------------------------------------------------ */
 /* Function:    ipf_nat_putent                                              */
 /* Returns:     int - 0 == success, != 0 is the error value.                */
 /* Parameters:  softc(I)   - pointer to soft context main structure         */
 /*              data(I)    - pointer to natget structure with NAT           */
 /*                           structure information to load into the kernel  */
 /*              getlock(I) - flag indicating whether or not a write lock    */
 /*                           on is already held.                            */
 /*                                                                          */
 /* Handle SIOCSTPUT.                                                        */
 /* Loads a NAT table entry from user space, including a NAT rule, proxy and */
 /* firewall rule data structures, if pointers to them indicate so.          */
 /* ------------------------------------------------------------------------ */
 static int
 ipf_nat_putent(softc, data, getlock)
 	ipf_main_softc_t *softc;
 	caddr_t data;
 	int getlock;
 {
 	ipf_nat_softc_t *softn = softc->ipf_nat_soft;
 	nat_save_t ipn, *ipnn;
 	ap_session_t *aps;
 	nat_t *n, *nat;
 	frentry_t *fr;
 	fr_info_t fin;
 	ipnat_t *in;
 	int error;
 
 	error = ipf_inobj(softc, data, NULL, &ipn, IPFOBJ_NATSAVE);
 	if (error != 0)
 		return error;
 
 	/*
 	 * Initialise early because of code at junkput label.
 	 */
 	n = NULL;
 	in = NULL;
 	aps = NULL;
 	nat = NULL;
 	ipnn = NULL;
 	fr = NULL;
 
 	/*
 	 * New entry, copy in the rest of the NAT entry if it's size is more
 	 * than just the nat_t structure.
 	 */
 	if (ipn.ipn_dsize > sizeof(ipn)) {
 		if (ipn.ipn_dsize > 81920) {
 			IPFERROR(60034);
 			error = ENOMEM;
 			goto junkput;
 		}
 
 		KMALLOCS(ipnn, nat_save_t *, ipn.ipn_dsize);
 		if (ipnn == NULL) {
 			IPFERROR(60035);
 			return ENOMEM;
 		}
 
 		bzero(ipnn, ipn.ipn_dsize);
 		error = ipf_inobjsz(softc, data, ipnn, IPFOBJ_NATSAVE,
 				    ipn.ipn_dsize);
 		if (error != 0) {
 			goto junkput;
 		}
 	} else
 		ipnn = &ipn;
 
 	KMALLOC(nat, nat_t *);
 	if (nat == NULL) {
 		IPFERROR(60037);
 		error = ENOMEM;
 		goto junkput;
 	}
 
 	bcopy((char *)&ipnn->ipn_nat, (char *)nat, sizeof(*nat));
 
 	switch (nat->nat_v[0])
 	{
 	case 4:
 #ifdef USE_INET6
 	case 6 :
 #endif
 		break;
 	default :
 		IPFERROR(60061);
 		error = EPROTONOSUPPORT;
 		goto junkput;
 		/*NOTREACHED*/
 	}
 
 	/*
 	 * Initialize all these so that ipf_nat_delete() doesn't cause a crash.
 	 */
 	bzero((char *)nat, offsetof(struct nat, nat_tqe));
 	nat->nat_tqe.tqe_pnext = NULL;
 	nat->nat_tqe.tqe_next = NULL;
 	nat->nat_tqe.tqe_ifq = NULL;
 	nat->nat_tqe.tqe_parent = nat;
 
 	/*
 	 * Restore the rule associated with this nat session
 	 */
 	in = ipnn->ipn_nat.nat_ptr;
 	if (in != NULL) {
 		KMALLOCS(in, ipnat_t *, ipnn->ipn_ipnat.in_size);
 		nat->nat_ptr = in;
 		if (in == NULL) {
 			IPFERROR(60038);
 			error = ENOMEM;
 			goto junkput;
 		}
 		bcopy((char *)&ipnn->ipn_ipnat, (char *)in,
 		      ipnn->ipn_ipnat.in_size);
 		in->in_use = 1;
 		in->in_flags |= IPN_DELETE;
 
 		ATOMIC_INC32(softn->ipf_nat_stats.ns_rules);
 
 		if (ipf_nat_resolverule(softc, in) != 0) {
 			IPFERROR(60039);
 			error = ESRCH;
 			goto junkput;
 		}
 	}
 
 	/*
 	 * Check that the NAT entry doesn't already exist in the kernel.
 	 *
 	 * For NAT_OUTBOUND, we're lookup for a duplicate MAP entry.  To do
 	 * this, we check to see if the inbound combination of addresses and
 	 * ports is already known.  Similar logic is applied for NAT_INBOUND.
 	 *
 	 */
 	bzero((char *)&fin, sizeof(fin));
 	fin.fin_v = nat->nat_v[0];
 	fin.fin_p = nat->nat_pr[0];
 	fin.fin_rev = nat->nat_rev;
 	fin.fin_ifp = nat->nat_ifps[0];
 	fin.fin_data[0] = ntohs(nat->nat_ndport);
 	fin.fin_data[1] = ntohs(nat->nat_nsport);
 
 	switch (nat->nat_dir)
 	{
 	case NAT_OUTBOUND :
 	case NAT_DIVERTOUT :
 		if (getlock) {
 			READ_ENTER(&softc->ipf_nat);
 		}
 
 		fin.fin_v = nat->nat_v[1];
 		if (nat->nat_v[1] == 4) {
 			n = ipf_nat_inlookup(&fin, nat->nat_flags, fin.fin_p,
 					     nat->nat_ndstip, nat->nat_nsrcip);
 #ifdef USE_INET6
 		} else if (nat->nat_v[1] == 6) {
 			n = ipf_nat6_inlookup(&fin, nat->nat_flags, fin.fin_p,
 					      &nat->nat_ndst6.in6,
 					      &nat->nat_nsrc6.in6);
 #endif
 		}
 
 		if (getlock) {
 			RWLOCK_EXIT(&softc->ipf_nat);
 		}
 		if (n != NULL) {
 			IPFERROR(60040);
 			error = EEXIST;
 			goto junkput;
 		}
 		break;
 
 	case NAT_INBOUND :
 	case NAT_DIVERTIN :
 		if (getlock) {
 			READ_ENTER(&softc->ipf_nat);
 		}
 
 		if (fin.fin_v == 4) {
 			n = ipf_nat_outlookup(&fin, nat->nat_flags, fin.fin_p,
 					      nat->nat_ndstip,
 					      nat->nat_nsrcip);
 #ifdef USE_INET6
 		} else if (fin.fin_v == 6) {
 			n = ipf_nat6_outlookup(&fin, nat->nat_flags, fin.fin_p,
 					       &nat->nat_ndst6.in6,
 					       &nat->nat_nsrc6.in6);
 #endif
 		}
 
 		if (getlock) {
 			RWLOCK_EXIT(&softc->ipf_nat);
 		}
 		if (n != NULL) {
 			IPFERROR(60041);
 			error = EEXIST;
 			goto junkput;
 		}
 		break;
 
 	default :
 		IPFERROR(60042);
 		error = EINVAL;
 		goto junkput;
 	}
 
 	/*
 	 * Restore ap_session_t structure.  Include the private data allocated
 	 * if it was there.
 	 */
 	aps = nat->nat_aps;
 	if (aps != NULL) {
 		KMALLOC(aps, ap_session_t *);
 		nat->nat_aps = aps;
 		if (aps == NULL) {
 			IPFERROR(60043);
 			error = ENOMEM;
 			goto junkput;
 		}
 		bcopy(ipnn->ipn_data, (char *)aps, sizeof(*aps));
 		if (in != NULL)
 			aps->aps_apr = in->in_apr;
 		else
 			aps->aps_apr = NULL;
 		if (aps->aps_psiz != 0) {
 			if (aps->aps_psiz > 81920) {
 				IPFERROR(60044);
 				error = ENOMEM;
 				goto junkput;
 			}
 			KMALLOCS(aps->aps_data, void *, aps->aps_psiz);
 			if (aps->aps_data == NULL) {
 				IPFERROR(60045);
 				error = ENOMEM;
 				goto junkput;
 			}
 			bcopy(ipnn->ipn_data + sizeof(*aps), aps->aps_data,
 			      aps->aps_psiz);
 		} else {
 			aps->aps_psiz = 0;
 			aps->aps_data = NULL;
 		}
 	}
 
 	/*
 	 * If there was a filtering rule associated with this entry then
 	 * build up a new one.
 	 */
 	fr = nat->nat_fr;
 	if (fr != NULL) {
 		if ((nat->nat_flags & SI_NEWFR) != 0) {
 			KMALLOC(fr, frentry_t *);
 			nat->nat_fr = fr;
 			if (fr == NULL) {
 				IPFERROR(60046);
 				error = ENOMEM;
 				goto junkput;
 			}
 			ipnn->ipn_nat.nat_fr = fr;
 			fr->fr_ref = 1;
 			(void) ipf_outobj(softc, data, ipnn, IPFOBJ_NATSAVE);
 			bcopy((char *)&ipnn->ipn_fr, (char *)fr, sizeof(*fr));
 
 			fr->fr_ref = 1;
 			fr->fr_dsize = 0;
 			fr->fr_data = NULL;
 			fr->fr_type = FR_T_NONE;
 
 			MUTEX_NUKE(&fr->fr_lock);
 			MUTEX_INIT(&fr->fr_lock, "nat-filter rule lock");
 		} else {
 			if (getlock) {
 				READ_ENTER(&softc->ipf_nat);
 			}
 			for (n = softn->ipf_nat_instances; n; n = n->nat_next)
 				if (n->nat_fr == fr)
 					break;
 
 			if (n != NULL) {
 				MUTEX_ENTER(&fr->fr_lock);
 				fr->fr_ref++;
 				MUTEX_EXIT(&fr->fr_lock);
 			}
 			if (getlock) {
 				RWLOCK_EXIT(&softc->ipf_nat);
 			}
 
 			if (n == NULL) {
 				IPFERROR(60047);
 				error = ESRCH;
 				goto junkput;
 			}
 		}
 	}
 
 	if (ipnn != &ipn) {
 		KFREES(ipnn, ipn.ipn_dsize);
 		ipnn = NULL;
 	}
 
 	if (getlock) {
 		WRITE_ENTER(&softc->ipf_nat);
 	}
 
 	if (fin.fin_v == 4)
 		error = ipf_nat_finalise(&fin, nat);
 #ifdef USE_INET6
 	else
 		error = ipf_nat6_finalise(&fin, nat);
 #endif
 
 	if (getlock) {
 		RWLOCK_EXIT(&softc->ipf_nat);
 	}
 
 	if (error == 0)
 		return 0;
 
 	IPFERROR(60048);
 	error = ENOMEM;
 
 junkput:
 	if (fr != NULL) {
 		(void) ipf_derefrule(softc, &fr);
 	}
 
 	if ((ipnn != NULL) && (ipnn != &ipn)) {
 		KFREES(ipnn, ipn.ipn_dsize);
 	}
 	if (nat != NULL) {
 		if (aps != NULL) {
 			if (aps->aps_data != NULL) {
 				KFREES(aps->aps_data, aps->aps_psiz);
 			}
 			KFREE(aps);
 		}
 		if (in != NULL) {
 			if (in->in_apr)
 				ipf_proxy_deref(in->in_apr);
 			KFREES(in, in->in_size);
 		}
 		KFREE(nat);
 	}
 	return error;
 }
 
 
 /* ------------------------------------------------------------------------ */
 /* Function:    ipf_nat_delete                                              */
 /* Returns:     Nil                                                         */
 /* Parameters:  softc(I)   - pointer to soft context main structure         */
 /*              nat(I)     - pointer to NAT structure to delete             */
 /*              logtype(I) - type of LOG record to create before deleting   */
 /* Write Lock:  ipf_nat                                                     */
 /*                                                                          */
 /* Delete a nat entry from the various lists and table.  If NAT logging is  */
 /* enabled then generate a NAT log record for this event.                   */
 /* ------------------------------------------------------------------------ */
 void
 ipf_nat_delete(softc, nat, logtype)
 	ipf_main_softc_t *softc;
 	struct nat *nat;
 	int logtype;
 {
 	ipf_nat_softc_t *softn = softc->ipf_nat_soft;
 	int madeorphan = 0, bkt, removed = 0;
 	nat_stat_side_t *nss;
 	struct ipnat *ipn;
 
 	if (logtype != 0 && softn->ipf_nat_logging != 0)
 		ipf_nat_log(softc, softn, nat, logtype);
 
 	/*
 	 * Take it as a general indication that all the pointers are set if
 	 * nat_pnext is set.
 	 */
 	if (nat->nat_pnext != NULL) {
 		removed = 1;
 
 		bkt = nat->nat_hv[0] % softn->ipf_nat_table_sz;
 		nss = &softn->ipf_nat_stats.ns_side[0];
 		nss->ns_bucketlen[bkt]--;
 		if (nss->ns_bucketlen[bkt] == 0) {
 			nss->ns_inuse--;
 		}
 
 		bkt = nat->nat_hv[1] % softn->ipf_nat_table_sz;
 		nss = &softn->ipf_nat_stats.ns_side[1];
 		nss->ns_bucketlen[bkt]--;
 		if (nss->ns_bucketlen[bkt] == 0) {
 			nss->ns_inuse--;
 		}
 
 		*nat->nat_pnext = nat->nat_next;
 		if (nat->nat_next != NULL) {
 			nat->nat_next->nat_pnext = nat->nat_pnext;
 			nat->nat_next = NULL;
 		}
 		nat->nat_pnext = NULL;
 
 		*nat->nat_phnext[0] = nat->nat_hnext[0];
 		if (nat->nat_hnext[0] != NULL) {
 			nat->nat_hnext[0]->nat_phnext[0] = nat->nat_phnext[0];
 			nat->nat_hnext[0] = NULL;
 		}
 		nat->nat_phnext[0] = NULL;
 
 		*nat->nat_phnext[1] = nat->nat_hnext[1];
 		if (nat->nat_hnext[1] != NULL) {
 			nat->nat_hnext[1]->nat_phnext[1] = nat->nat_phnext[1];
 			nat->nat_hnext[1] = NULL;
 		}
 		nat->nat_phnext[1] = NULL;
 
 		if ((nat->nat_flags & SI_WILDP) != 0) {
 			ATOMIC_DEC32(softn->ipf_nat_stats.ns_wilds);
 		}
 		madeorphan = 1;
 	}
 
 	if (nat->nat_me != NULL) {
 		*nat->nat_me = NULL;
 		nat->nat_me = NULL;
 		nat->nat_ref--;
 		ASSERT(nat->nat_ref >= 0);
 	}
 
 	if (nat->nat_tqe.tqe_ifq != NULL) {
 		/*
 		 * No call to ipf_freetimeoutqueue() is made here, they are
 		 * garbage collected in ipf_nat_expire().
 		 */
 		(void) ipf_deletequeueentry(&nat->nat_tqe);
 	}
 
 	if (nat->nat_sync) {
 		ipf_sync_del_nat(softc->ipf_sync_soft, nat->nat_sync);
 		nat->nat_sync = NULL;
 	}
 
 	if (logtype == NL_EXPIRE)
 		softn->ipf_nat_stats.ns_expire++;
 
 	MUTEX_ENTER(&nat->nat_lock);
 	/*
 	 * NL_DESTROY should only be passed in when we've got nat_ref >= 2.
 	 * This happens when a nat'd packet is blocked and we want to throw
 	 * away the NAT session.
 	 */
 	if (logtype == NL_DESTROY) {
 		if (nat->nat_ref > 2) {
 			nat->nat_ref -= 2;
 			MUTEX_EXIT(&nat->nat_lock);
 			if (removed)
 				softn->ipf_nat_stats.ns_orphans++;
 			return;
 		}
 	} else if (nat->nat_ref > 1) {
 		nat->nat_ref--;
 		MUTEX_EXIT(&nat->nat_lock);
 		if (madeorphan == 1)
 			softn->ipf_nat_stats.ns_orphans++;
 		return;
 	}
 	ASSERT(nat->nat_ref >= 0);
 	MUTEX_EXIT(&nat->nat_lock);
 
 	nat->nat_ref = 0;
 
 	if (madeorphan == 0)
 		softn->ipf_nat_stats.ns_orphans--;
 
 	/*
 	 * At this point, nat_ref can be either 0 or -1
 	 */
 	softn->ipf_nat_stats.ns_proto[nat->nat_pr[0]]--;
 
 	if (nat->nat_fr != NULL) {
 		(void) ipf_derefrule(softc, &nat->nat_fr);
 	}
 
 	if (nat->nat_hm != NULL) {
 		ipf_nat_hostmapdel(softc, &nat->nat_hm);
 	}
 
 	/*
 	 * If there is an active reference from the nat entry to its parent
 	 * rule, decrement the rule's reference count and free it too if no
 	 * longer being used.
 	 */
 	ipn = nat->nat_ptr;
 	nat->nat_ptr = NULL;
 
 	if (ipn != NULL) {
 		ipn->in_space++;
 		ipf_nat_rule_deref(softc, &ipn);
 	}
 
 	if (nat->nat_aps != NULL) {
 		ipf_proxy_free(softc, nat->nat_aps);
 		nat->nat_aps = NULL;
 	}
 
 	MUTEX_DESTROY(&nat->nat_lock);
 
 	softn->ipf_nat_stats.ns_active--;
 
 	/*
 	 * If there's a fragment table entry too for this nat entry, then
 	 * dereference that as well.  This is after nat_lock is released
 	 * because of Tru64.
 	 */
 	ipf_frag_natforget(softc, (void *)nat);
 
 	KFREE(nat);
 }
 
 
 /* ------------------------------------------------------------------------ */
 /* Function:    ipf_nat_flushtable                                          */
 /* Returns:     int - number of NAT rules deleted                           */
 /* Parameters:  softc(I) - pointer to soft context main structure           */
 /*              softn(I) - pointer to NAT context structure                 */
 /* Write Lock:  ipf_nat                                                     */
 /*                                                                          */
 /* Deletes all currently active NAT sessions.  In deleting each NAT entry a */
 /* log record should be emitted in ipf_nat_delete() if NAT logging is       */
 /* enabled.                                                                 */
 /* ------------------------------------------------------------------------ */
 /*
  * nat_flushtable - clear the NAT table of all mapping entries.
  */
 static int
 ipf_nat_flushtable(softc, softn)
 	ipf_main_softc_t *softc;
 	ipf_nat_softc_t *softn;
 {
 	nat_t *nat;
 	int j = 0;
 
 	/*
 	 * ALL NAT mappings deleted, so lets just make the deletions
 	 * quicker.
 	 */
 	if (softn->ipf_nat_table[0] != NULL)
 		bzero((char *)softn->ipf_nat_table[0],
 		      sizeof(softn->ipf_nat_table[0]) *
 		      softn->ipf_nat_table_sz);
 	if (softn->ipf_nat_table[1] != NULL)
 		bzero((char *)softn->ipf_nat_table[1],
 		      sizeof(softn->ipf_nat_table[1]) *
 		      softn->ipf_nat_table_sz);
 
 	while ((nat = softn->ipf_nat_instances) != NULL) {
 		ipf_nat_delete(softc, nat, NL_FLUSH);
 		j++;
 	}
 
 	return j;
 }
 
 
 /* ------------------------------------------------------------------------ */
 /* Function:    ipf_nat_clearlist                                           */
 /* Returns:     int - number of NAT/RDR rules deleted                       */
 /* Parameters:  softc(I) - pointer to soft context main structure           */
 /*              softn(I) - pointer to NAT context structure                 */
 /*                                                                          */
 /* Delete all rules in the current list of rules.  There is nothing elegant */
 /* about this cleanup: simply free all entries on the list of rules and     */
 /* clear out the tables used for hashed NAT rule lookups.                   */
 /* ------------------------------------------------------------------------ */
 static int
 ipf_nat_clearlist(softc, softn)
 	ipf_main_softc_t *softc;
 	ipf_nat_softc_t *softn;
 {
 	ipnat_t *n;
 	int i = 0;
 
 	if (softn->ipf_nat_map_rules != NULL) {
 		bzero((char *)softn->ipf_nat_map_rules,
 		      sizeof(*softn->ipf_nat_map_rules) *
 		      softn->ipf_nat_maprules_sz);
 	}
 	if (softn->ipf_nat_rdr_rules != NULL) {
 		bzero((char *)softn->ipf_nat_rdr_rules,
 		      sizeof(*softn->ipf_nat_rdr_rules) *
 		      softn->ipf_nat_rdrrules_sz);
 	}
 
 	while ((n = softn->ipf_nat_list) != NULL) {
 		ipf_nat_delrule(softc, softn, n, 0);
 		i++;
 	}
 #if SOLARIS && !defined(INSTANCES)
 	pfil_delayed_copy = 1;
 #endif
 	return i;
 }
 
 
 /* ------------------------------------------------------------------------ */
 /* Function:    ipf_nat_delrule                                             */
 /* Returns:     Nil                                                         */
 /* Parameters:  softc(I) - pointer to soft context main structure           */
 /*              softn(I) - pointer to NAT context structure                 */
 /*              np(I)    - pointer to NAT rule to delete                    */
 /*              purge(I) - 1 == allow purge, 0 == prevent purge             */
 /* Locks:       WRITE(ipf_nat)                                              */
 /*                                                                          */
 /* Preventing "purge" from occuring is allowed because when all of the NAT  */
 /* rules are being removed, allowing the "purge" to walk through the list   */
 /* of NAT sessions, possibly multiple times, would be a large performance   */
 /* hit, on the order of O(N^2).                                             */
 /* ------------------------------------------------------------------------ */
 static void
 ipf_nat_delrule(softc, softn, np, purge)
 	ipf_main_softc_t *softc;
 	ipf_nat_softc_t *softn;
 	ipnat_t *np;
 	int purge;
 {
 
 	if (np->in_pnext != NULL) {
 		*np->in_pnext = np->in_next;
 		if (np->in_next != NULL)
 			np->in_next->in_pnext = np->in_pnext;
 		if (softn->ipf_nat_list_tail == &np->in_next)
 			softn->ipf_nat_list_tail = np->in_pnext;
 	}
 
 	if ((purge == 1) && ((np->in_flags & IPN_PURGE) != 0)) {
 		nat_t *next;
 		nat_t *nat;
 
 		for (next = softn->ipf_nat_instances; (nat = next) != NULL;) {
 			next = nat->nat_next;
 			if (nat->nat_ptr == np)
 				ipf_nat_delete(softc, nat, NL_PURGE);
 		}
 	}
 
 	if ((np->in_flags & IPN_DELETE) == 0) {
 		if (np->in_redir & NAT_REDIRECT) {
 			switch (np->in_v[0])
 			{
 			case 4 :
 				ipf_nat_delrdr(softn, np);
 				break;
 #ifdef USE_INET6
 			case 6 :
 				ipf_nat6_delrdr(softn, np);
 				break;
 #endif
 			}
 		}
 		if (np->in_redir & (NAT_MAPBLK|NAT_MAP)) {
 			switch (np->in_v[0])
 			{
 			case 4 :
 				ipf_nat_delmap(softn, np);
 				break;
 #ifdef USE_INET6
 			case 6 :
 				ipf_nat6_delmap(softn, np);
 				break;
 #endif
 			}
 		}
 	}
 
 	np->in_flags |= IPN_DELETE;
 	ipf_nat_rule_deref(softc, &np);
 }
 
 
 /* ------------------------------------------------------------------------ */
 /* Function:    ipf_nat_newmap                                              */
 /* Returns:     int - -1 == error, 0 == success                             */
 /* Parameters:  fin(I) - pointer to packet information                      */
 /*              nat(I) - pointer to NAT entry                               */
 /*              ni(I)  - pointer to structure with misc. information needed */
 /*                       to create new NAT entry.                           */
 /*                                                                          */
 /* Given an empty NAT structure, populate it with new information about a   */
 /* new NAT session, as defined by the matching NAT rule.                    */
 /* ni.nai_ip is passed in uninitialised and must be set, in host byte order,*/
 /* to the new IP address for the translation.                               */
 /* ------------------------------------------------------------------------ */
 static int
 ipf_nat_newmap(fin, nat, ni)
 	fr_info_t *fin;
 	nat_t *nat;
 	natinfo_t *ni;
 {
 	ipf_main_softc_t *softc = fin->fin_main_soft;
 	ipf_nat_softc_t *softn = softc->ipf_nat_soft;
 	u_short st_port, dport, sport, port, sp, dp;
 	struct in_addr in, inb;
 	hostmap_t *hm;
 	u_32_t flags;
 	u_32_t st_ip;
 	ipnat_t *np;
 	nat_t *natl;
 	int l;
 
 	/*
 	 * If it's an outbound packet which doesn't match any existing
 	 * record, then create a new port
 	 */
 	l = 0;
 	hm = NULL;
 	np = ni->nai_np;
 	st_ip = np->in_snip;
 	st_port = np->in_spnext;
 	flags = nat->nat_flags;
 
 	if (flags & IPN_ICMPQUERY) {
 		sport = fin->fin_data[1];
 		dport = 0;
 	} else {
 		sport = htons(fin->fin_data[0]);
 		dport = htons(fin->fin_data[1]);
 	}
 
 	/*
 	 * Do a loop until we either run out of entries to try or we find
 	 * a NAT mapping that isn't currently being used.  This is done
 	 * because the change to the source is not (usually) being fixed.
 	 */
 	do {
 		port = 0;
 		in.s_addr = htonl(np->in_snip);
 		if (l == 0) {
 			/*
 			 * Check to see if there is an existing NAT
 			 * setup for this IP address pair.
 			 */
 			hm = ipf_nat_hostmap(softn, np, fin->fin_src,
 					     fin->fin_dst, in, 0);
 			if (hm != NULL)
 				in.s_addr = hm->hm_nsrcip.s_addr;
 		} else if ((l == 1) && (hm != NULL)) {
 			ipf_nat_hostmapdel(softc, &hm);
 		}
 		in.s_addr = ntohl(in.s_addr);
 
 		nat->nat_hm = hm;
 
 		if ((np->in_nsrcmsk == 0xffffffff) && (np->in_spnext == 0)) {
 			if (l > 0) {
 				NBUMPSIDEX(1, ns_exhausted, ns_exhausted_1);
 				return -1;
 			}
 		}
 
 		if (np->in_redir == NAT_BIMAP &&
 		    np->in_osrcmsk == np->in_nsrcmsk) {
 			/*
 			 * map the address block in a 1:1 fashion
 			 */
 			in.s_addr = np->in_nsrcaddr;
 			in.s_addr |= fin->fin_saddr & ~np->in_osrcmsk;
 			in.s_addr = ntohl(in.s_addr);
 
 		} else if (np->in_redir & NAT_MAPBLK) {
 			if ((l >= np->in_ppip) || ((l > 0) &&
 			     !(flags & IPN_TCPUDP))) {
 				NBUMPSIDEX(1, ns_exhausted, ns_exhausted_2);
 				return -1;
 			}
 			/*
 			 * map-block - Calculate destination address.
 			 */
 			in.s_addr = ntohl(fin->fin_saddr);
 			in.s_addr &= ntohl(~np->in_osrcmsk);
 			inb.s_addr = in.s_addr;
 			in.s_addr /= np->in_ippip;
 			in.s_addr &= ntohl(~np->in_nsrcmsk);
 			in.s_addr += ntohl(np->in_nsrcaddr);
 			/*
 			 * Calculate destination port.
 			 */
 			if ((flags & IPN_TCPUDP) &&
 			    (np->in_ppip != 0)) {
 				port = ntohs(sport) + l;
 				port %= np->in_ppip;
 				port += np->in_ppip *
 					(inb.s_addr % np->in_ippip);
 				port += MAPBLK_MINPORT;
 				port = htons(port);
 			}
 
 		} else if ((np->in_nsrcaddr == 0) &&
 			   (np->in_nsrcmsk == 0xffffffff)) {
 			i6addr_t in6;
 
 			/*
 			 * 0/32 - use the interface's IP address.
 			 */
 			if ((l > 0) ||
 			    ipf_ifpaddr(softc, 4, FRI_NORMAL, fin->fin_ifp,
 				       &in6, NULL) == -1) {
 				NBUMPSIDEX(1, ns_new_ifpaddr, ns_new_ifpaddr_1);
 				return -1;
 			}
 			in.s_addr = ntohl(in6.in4.s_addr);
 
 		} else if ((np->in_nsrcaddr == 0) && (np->in_nsrcmsk == 0)) {
 			/*
 			 * 0/0 - use the original source address/port.
 			 */
 			if (l > 0) {
 				NBUMPSIDEX(1, ns_exhausted, ns_exhausted_3);
 				return -1;
 			}
 			in.s_addr = ntohl(fin->fin_saddr);
 
 		} else if ((np->in_nsrcmsk != 0xffffffff) &&
 			   (np->in_spnext == 0) && ((l > 0) || (hm == NULL)))
 			np->in_snip++;
 
 		natl = NULL;
 
 		if ((flags & IPN_TCPUDP) &&
 		    ((np->in_redir & NAT_MAPBLK) == 0) &&
 		    (np->in_flags & IPN_AUTOPORTMAP)) {
 			/*
 			 * "ports auto" (without map-block)
 			 */
 			if ((l > 0) && (l % np->in_ppip == 0)) {
 				if ((l > np->in_ppip) &&
 				    np->in_nsrcmsk != 0xffffffff)
 					np->in_snip++;
 			}
 			if (np->in_ppip != 0) {
 				port = ntohs(sport);
 				port += (l % np->in_ppip);
 				port %= np->in_ppip;
 				port += np->in_ppip *
 					(ntohl(fin->fin_saddr) %
 					 np->in_ippip);
 				port += MAPBLK_MINPORT;
 				port = htons(port);
 			}
 
 		} else if (((np->in_redir & NAT_MAPBLK) == 0) &&
 			   (flags & IPN_TCPUDPICMP) && (np->in_spnext != 0)) {
 			/*
 			 * Standard port translation.  Select next port.
 			 */
 			if (np->in_flags & IPN_SEQUENTIAL) {
 				port = np->in_spnext;
 			} else {
 				port = ipf_random() % (np->in_spmax -
 						       np->in_spmin + 1);
 				port += np->in_spmin;
 			}
 			port = htons(port);
 			np->in_spnext++;
 
 			if (np->in_spnext > np->in_spmax) {
 				np->in_spnext = np->in_spmin;
 				if (np->in_nsrcmsk != 0xffffffff)
 					np->in_snip++;
 			}
 		}
 
 		if (np->in_flags & IPN_SIPRANGE) {
 			if (np->in_snip > ntohl(np->in_nsrcmsk))
 				np->in_snip = ntohl(np->in_nsrcaddr);
 		} else {
 			if ((np->in_nsrcmsk != 0xffffffff) &&
 			    ((np->in_snip + 1) & ntohl(np->in_nsrcmsk)) >
 			    ntohl(np->in_nsrcaddr))
 				np->in_snip = ntohl(np->in_nsrcaddr) + 1;
 		}
 
 		if ((port == 0) && (flags & (IPN_TCPUDPICMP|IPN_ICMPQUERY)))
 			port = sport;
 
 		/*
 		 * Here we do a lookup of the connection as seen from
 		 * the outside.  If an IP# pair already exists, try
 		 * again.  So if you have A->B becomes C->B, you can
 		 * also have D->E become C->E but not D->B causing
 		 * another C->B.  Also take protocol and ports into
 		 * account when determining whether a pre-existing
 		 * NAT setup will cause an external conflict where
 		 * this is appropriate.
 		 */
 		inb.s_addr = htonl(in.s_addr);
 		sp = fin->fin_data[0];
 		dp = fin->fin_data[1];
 		fin->fin_data[0] = fin->fin_data[1];
 		fin->fin_data[1] = ntohs(port);
 		natl = ipf_nat_inlookup(fin, flags & ~(SI_WILDP|NAT_SEARCH),
 					(u_int)fin->fin_p, fin->fin_dst, inb);
 		fin->fin_data[0] = sp;
 		fin->fin_data[1] = dp;
 
 		/*
 		 * Has the search wrapped around and come back to the
 		 * start ?
 		 */
 		if ((natl != NULL) &&
 		    (np->in_spnext != 0) && (st_port == np->in_spnext) &&
 		    (np->in_snip != 0) && (st_ip == np->in_snip)) {
 			NBUMPSIDED(1, ns_wrap);
 			return -1;
 		}
 		l++;
 	} while (natl != NULL);
 
 	/* Setup the NAT table */
 	nat->nat_osrcip = fin->fin_src;
 	nat->nat_nsrcaddr = htonl(in.s_addr);
 	nat->nat_odstip = fin->fin_dst;
 	nat->nat_ndstip = fin->fin_dst;
 	if (nat->nat_hm == NULL)
 		nat->nat_hm = ipf_nat_hostmap(softn, np, fin->fin_src,
 					      fin->fin_dst, nat->nat_nsrcip,
 					      0);
 
 	if (flags & IPN_TCPUDP) {
 		nat->nat_osport = sport;
 		nat->nat_nsport = port;	/* sport */
 		nat->nat_odport = dport;
 		nat->nat_ndport = dport;
 		((tcphdr_t *)fin->fin_dp)->th_sport = port;
 	} else if (flags & IPN_ICMPQUERY) {
 		nat->nat_oicmpid = fin->fin_data[1];
 		((icmphdr_t *)fin->fin_dp)->icmp_id = port;
 		nat->nat_nicmpid = port;
 	}
 	return 0;
 }
 
 
 /* ------------------------------------------------------------------------ */
 /* Function:    ipf_nat_newrdr                                              */
 /* Returns:     int - -1 == error, 0 == success (no move), 1 == success and */
 /*                    allow rule to be moved if IPN_ROUNDR is set.          */
 /* Parameters:  fin(I) - pointer to packet information                      */
 /*              nat(I) - pointer to NAT entry                               */
 /*              ni(I)  - pointer to structure with misc. information needed */
 /*                       to create new NAT entry.                           */
 /*                                                                          */
 /* ni.nai_ip is passed in uninitialised and must be set, in host byte order,*/
 /* to the new IP address for the translation.                               */
 /* ------------------------------------------------------------------------ */
 static int
 ipf_nat_newrdr(fin, nat, ni)
 	fr_info_t *fin;
 	nat_t *nat;
 	natinfo_t *ni;
 {
 	ipf_main_softc_t *softc = fin->fin_main_soft;
 	ipf_nat_softc_t *softn = softc->ipf_nat_soft;
 	u_short nport, dport, sport;
 	struct in_addr in, inb;
 	u_short sp, dp;
 	hostmap_t *hm;
 	u_32_t flags;
 	ipnat_t *np;
 	nat_t *natl;
 	int move;
 
 	move = 1;
 	hm = NULL;
 	in.s_addr = 0;
 	np = ni->nai_np;
 	flags = nat->nat_flags;
 
 	if (flags & IPN_ICMPQUERY) {
 		dport = fin->fin_data[1];
 		sport = 0;
 	} else {
 		sport = htons(fin->fin_data[0]);
 		dport = htons(fin->fin_data[1]);
 	}
 
 	/* TRACE sport, dport */
 
 
 	/*
 	 * If the matching rule has IPN_STICKY set, then we want to have the
 	 * same rule kick in as before.  Why would this happen?  If you have
 	 * a collection of rdr rules with "round-robin sticky", the current
 	 * packet might match a different one to the previous connection but
 	 * we want the same destination to be used.
 	 */
 	if (((np->in_flags & (IPN_ROUNDR|IPN_SPLIT)) != 0) &&
 	    ((np->in_flags & IPN_STICKY) != 0)) {
 		hm = ipf_nat_hostmap(softn, NULL, fin->fin_src, fin->fin_dst,
 				     in, (u_32_t)dport);
 		if (hm != NULL) {
 			in.s_addr = ntohl(hm->hm_ndstip.s_addr);
 			np = hm->hm_ipnat;
 			ni->nai_np = np;
 			move = 0;
 			ipf_nat_hostmapdel(softc, &hm);
 		}
 	}
 
 	/*
 	 * Otherwise, it's an inbound packet. Most likely, we don't
 	 * want to rewrite source ports and source addresses. Instead,
 	 * we want to rewrite to a fixed internal address and fixed
 	 * internal port.
 	 */
 	if (np->in_flags & IPN_SPLIT) {
 		in.s_addr = np->in_dnip;
 		inb.s_addr = htonl(in.s_addr);
 
 		if ((np->in_flags & (IPN_ROUNDR|IPN_STICKY)) == IPN_STICKY) {
 			hm = ipf_nat_hostmap(softn, NULL, fin->fin_src,
 					     fin->fin_dst, inb, (u_32_t)dport);
 			if (hm != NULL) {
 				in.s_addr = hm->hm_ndstip.s_addr;
 				move = 0;
 			}
 		}
 
 		if (hm == NULL || hm->hm_ref == 1) {
 			if (np->in_ndstaddr == htonl(in.s_addr)) {
 				np->in_dnip = ntohl(np->in_ndstmsk);
 				move = 0;
 			} else {
 				np->in_dnip = ntohl(np->in_ndstaddr);
 			}
 		}
 		if (hm != NULL)
 			ipf_nat_hostmapdel(softc, &hm);
 
 	} else if ((np->in_ndstaddr == 0) && (np->in_ndstmsk == 0xffffffff)) {
 		i6addr_t in6;
 
 		/*
 		 * 0/32 - use the interface's IP address.
 		 */
 		if (ipf_ifpaddr(softc, 4, FRI_NORMAL, fin->fin_ifp,
 			       &in6, NULL) == -1) {
 			NBUMPSIDEX(0, ns_new_ifpaddr, ns_new_ifpaddr_2);
 			return -1;
 		}
 		in.s_addr = ntohl(in6.in4.s_addr);
 
 	} else if ((np->in_ndstaddr == 0) && (np->in_ndstmsk== 0)) {
 		/*
 		 * 0/0 - use the original destination address/port.
 		 */
 		in.s_addr = ntohl(fin->fin_daddr);
 
 	} else if (np->in_redir == NAT_BIMAP &&
 		   np->in_ndstmsk == np->in_odstmsk) {
 		/*
 		 * map the address block in a 1:1 fashion
 		 */
 		in.s_addr = np->in_ndstaddr;
 		in.s_addr |= fin->fin_daddr & ~np->in_ndstmsk;
 		in.s_addr = ntohl(in.s_addr);
 	} else {
 		in.s_addr = ntohl(np->in_ndstaddr);
 	}
 
 	if ((np->in_dpnext == 0) || ((flags & NAT_NOTRULEPORT) != 0))
 		nport = dport;
 	else {
 		/*
 		 * Whilst not optimized for the case where
 		 * pmin == pmax, the gain is not significant.
 		 */
 		if (((np->in_flags & IPN_FIXEDDPORT) == 0) &&
 		    (np->in_odport != np->in_dtop)) {
 			nport = ntohs(dport) - np->in_odport + np->in_dpmax;
 			nport = htons(nport);
 		} else {
 			nport = htons(np->in_dpnext);
 			np->in_dpnext++;
 			if (np->in_dpnext > np->in_dpmax)
 				np->in_dpnext = np->in_dpmin;
 		}
 	}
 
 	/*
 	 * When the redirect-to address is set to 0.0.0.0, just
 	 * assume a blank `forwarding' of the packet.  We don't
 	 * setup any translation for this either.
 	 */
 	if (in.s_addr == 0) {
 		if (nport == dport) {
 			NBUMPSIDED(0, ns_xlate_null);
 			return -1;
 		}
 		in.s_addr = ntohl(fin->fin_daddr);
 	}
 
 	/*
 	 * Check to see if this redirect mapping already exists and if
 	 * it does, return "failure" (allowing it to be created will just
 	 * cause one or both of these "connections" to stop working.)
 	 */
 	inb.s_addr = htonl(in.s_addr);
 	sp = fin->fin_data[0];
 	dp = fin->fin_data[1];
 	fin->fin_data[1] = fin->fin_data[0];
 	fin->fin_data[0] = ntohs(nport);
 	natl = ipf_nat_outlookup(fin, flags & ~(SI_WILDP|NAT_SEARCH),
 			     (u_int)fin->fin_p, inb, fin->fin_src);
 	fin->fin_data[0] = sp;
 	fin->fin_data[1] = dp;
 	if (natl != NULL) {
 		DT2(ns_new_xlate_exists, fr_info_t *, fin, nat_t *, natl);
 		NBUMPSIDE(0, ns_xlate_exists);
 		return -1;
 	}
 
 	inb.s_addr = htonl(in.s_addr);
 	nat->nat_ndstaddr = htonl(in.s_addr);
 	nat->nat_odstip = fin->fin_dst;
 	nat->nat_nsrcip = fin->fin_src;
 	nat->nat_osrcip = fin->fin_src;
 	if ((nat->nat_hm == NULL) && ((np->in_flags & IPN_STICKY) != 0))
 		nat->nat_hm = ipf_nat_hostmap(softn, np, fin->fin_src,
 					      fin->fin_dst, inb, (u_32_t)dport);
 
 	if (flags & IPN_TCPUDP) {
 		nat->nat_odport = dport;
 		nat->nat_ndport = nport;
 		nat->nat_osport = sport;
 		nat->nat_nsport = sport;
 		((tcphdr_t *)fin->fin_dp)->th_dport = nport;
 	} else if (flags & IPN_ICMPQUERY) {
 		nat->nat_oicmpid = fin->fin_data[1];
 		((icmphdr_t *)fin->fin_dp)->icmp_id = nport;
 		nat->nat_nicmpid = nport;
 	}
 
 	return move;
 }
 
 /* ------------------------------------------------------------------------ */
 /* Function:    ipf_nat_add                                                 */
 /* Returns:     nat_t* - NULL == failure to create new NAT structure,       */
 /*                       else pointer to new NAT structure                  */
 /* Parameters:  fin(I)       - pointer to packet information                */
 /*              np(I)        - pointer to NAT rule                          */
 /*              natsave(I)   - pointer to where to store NAT struct pointer */
 /*              flags(I)     - flags describing the current packet          */
 /*              direction(I) - direction of packet (in/out)                 */
 /* Write Lock:  ipf_nat                                                     */
 /*                                                                          */
 /* Attempts to create a new NAT entry.  Does not actually change the packet */
 /* in any way.                                                              */
 /*                                                                          */
 /* This fucntion is in three main parts: (1) deal with creating a new NAT   */
 /* structure for a "MAP" rule (outgoing NAT translation); (2) deal with     */
 /* creating a new NAT structure for a "RDR" rule (incoming NAT translation) */
 /* and (3) building that structure and putting it into the NAT table(s).    */
 /*                                                                          */
 /* NOTE: natsave should NOT be used top point back to an ipstate_t struct   */
 /*       as it can result in memory being corrupted.                        */
 /* ------------------------------------------------------------------------ */
 nat_t *
 ipf_nat_add(fin, np, natsave, flags, direction)
 	fr_info_t *fin;
 	ipnat_t *np;
 	nat_t **natsave;
 	u_int flags;
 	int direction;
 {
 	ipf_main_softc_t *softc = fin->fin_main_soft;
 	ipf_nat_softc_t *softn = softc->ipf_nat_soft;
 	hostmap_t *hm = NULL;
 	nat_t *nat, *natl;
 	natstat_t *nsp;
 	u_int nflags;
 	natinfo_t ni;
 	int move;
 
 	nsp = &softn->ipf_nat_stats;
 
 	if ((nsp->ns_active * 100 / softn->ipf_nat_table_max) >
 	    softn->ipf_nat_table_wm_high) {
 		softn->ipf_nat_doflush = 1;
 	}
 
 	if (nsp->ns_active >= softn->ipf_nat_table_max) {
 		NBUMPSIDED(fin->fin_out, ns_table_max);
 		return NULL;
 	}
 
 	move = 1;
 	nflags = np->in_flags & flags;
 	nflags &= NAT_FROMRULE;
 
 	ni.nai_np = np;
 	ni.nai_dport = 0;
 	ni.nai_sport = 0;
 
 	/* Give me a new nat */
 	KMALLOC(nat, nat_t *);
 	if (nat == NULL) {
 		NBUMPSIDED(fin->fin_out, ns_memfail);
 		/*
 		 * Try to automatically tune the max # of entries in the
 		 * table allowed to be less than what will cause kmem_alloc()
 		 * to fail and try to eliminate panics due to out of memory
 		 * conditions arising.
 		 */
 		if ((softn->ipf_nat_table_max > softn->ipf_nat_table_sz) &&
 		    (nsp->ns_active > 100)) {
 			softn->ipf_nat_table_max = nsp->ns_active - 100;
 			printf("table_max reduced to %d\n",
 				softn->ipf_nat_table_max);
 		}
 		return NULL;
 	}
 
 	if (flags & IPN_ICMPQUERY) {
 		/*
 		 * In the ICMP query NAT code, we translate the ICMP id fields
 		 * to make them unique. This is indepedent of the ICMP type
 		 * (e.g. in the unlikely event that a host sends an echo and
 		 * an tstamp request with the same id, both packets will have
 		 * their ip address/id field changed in the same way).
 		 */
 		/* The icmp_id field is used by the sender to identify the
 		 * process making the icmp request. (the receiver justs
 		 * copies it back in its response). So, it closely matches
 		 * the concept of source port. We overlay sport, so we can
 		 * maximally reuse the existing code.
 		 */
 		ni.nai_sport = fin->fin_data[1];
 		ni.nai_dport = 0;
 	}
 
 	bzero((char *)nat, sizeof(*nat));
 	nat->nat_flags = flags;
 	nat->nat_redir = np->in_redir;
 	nat->nat_dir = direction;
 	nat->nat_pr[0] = fin->fin_p;
 	nat->nat_pr[1] = fin->fin_p;
 
 	/*
 	 * Search the current table for a match and create a new mapping
 	 * if there is none found.
 	 */
 	if (np->in_redir & NAT_DIVERTUDP) {
 		move = ipf_nat_newdivert(fin, nat, &ni);
 
 	} else if (np->in_redir & NAT_REWRITE) {
 		move = ipf_nat_newrewrite(fin, nat, &ni);
 
 	} else if (direction == NAT_OUTBOUND) {
 		/*
 		 * We can now arrange to call this for the same connection
 		 * because ipf_nat_new doesn't protect the code path into
 		 * this function.
 		 */
 		natl = ipf_nat_outlookup(fin, nflags, (u_int)fin->fin_p,
 				     fin->fin_src, fin->fin_dst);
 		if (natl != NULL) {
 			KFREE(nat);
 			nat = natl;
 			goto done;
 		}
 
 		move = ipf_nat_newmap(fin, nat, &ni);
 	} else {
 		/*
 		 * NAT_INBOUND is used for redirects rules
 		 */
 		natl = ipf_nat_inlookup(fin, nflags, (u_int)fin->fin_p,
 					fin->fin_src, fin->fin_dst);
 		if (natl != NULL) {
 			KFREE(nat);
 			nat = natl;
 			goto done;
 		}
 
 		move = ipf_nat_newrdr(fin, nat, &ni);
 	}
 	if (move == -1)
 		goto badnat;
 
 	np = ni.nai_np;
 
 	nat->nat_mssclamp = np->in_mssclamp;
 	nat->nat_me = natsave;
 	nat->nat_fr = fin->fin_fr;
 	nat->nat_rev = fin->fin_rev;
 	nat->nat_ptr = np;
 	nat->nat_dlocal = np->in_dlocal;
 
 	if ((np->in_apr != NULL) && ((nat->nat_flags & NAT_SLAVE) == 0)) {
 		if (ipf_proxy_new(fin, nat) == -1) {
 			NBUMPSIDED(fin->fin_out, ns_appr_fail);
 			goto badnat;
 		}
 	}
 
 	nat->nat_ifps[0] = np->in_ifps[0];
 	if (np->in_ifps[0] != NULL) {
 		COPYIFNAME(np->in_v[0], np->in_ifps[0], nat->nat_ifnames[0]);
 	}
 
 	nat->nat_ifps[1] = np->in_ifps[1];
 	if (np->in_ifps[1] != NULL) {
 		COPYIFNAME(np->in_v[1], np->in_ifps[1], nat->nat_ifnames[1]);
 	}
 
 	if (ipf_nat_finalise(fin, nat) == -1) {
 		goto badnat;
 	}
 
 	np->in_use++;
 
 	if ((move == 1) && (np->in_flags & IPN_ROUNDR)) {
 		if ((np->in_redir & (NAT_REDIRECT|NAT_MAP)) == NAT_REDIRECT) {
 			ipf_nat_delrdr(softn, np);
 			ipf_nat_addrdr(softn, np);
 		} else if ((np->in_redir & (NAT_REDIRECT|NAT_MAP)) == NAT_MAP) {
 			ipf_nat_delmap(softn, np);
 			ipf_nat_addmap(softn, np);
 		}
 	}
 
 	if (flags & SI_WILDP)
 		nsp->ns_wilds++;
 	nsp->ns_proto[nat->nat_pr[0]]++;
 
 	goto done;
 badnat:
 	DT2(ns_badnatnew, fr_info_t *, fin, nat_t *, nat);
 	NBUMPSIDE(fin->fin_out, ns_badnatnew);
 	if ((hm = nat->nat_hm) != NULL)
 		ipf_nat_hostmapdel(softc, &hm);
 	KFREE(nat);
 	nat = NULL;
 done:
 	if (nat != NULL && np != NULL)
 		np->in_hits++;
 	if (natsave != NULL)
 		*natsave = nat;
 	return nat;
 }
 
 
 /* ------------------------------------------------------------------------ */
 /* Function:    ipf_nat_finalise                                            */
 /* Returns:     int - 0 == sucess, -1 == failure                            */
 /* Parameters:  fin(I) - pointer to packet information                      */
 /*              nat(I) - pointer to NAT entry                               */
 /* Write Lock:  ipf_nat                                                     */
 /*                                                                          */
 /* This is the tail end of constructing a new NAT entry and is the same     */
 /* for both IPv4 and IPv6.                                                  */
 /* ------------------------------------------------------------------------ */
 /*ARGSUSED*/
 static int
 ipf_nat_finalise(fin, nat)
 	fr_info_t *fin;
 	nat_t *nat;
 {
 	ipf_main_softc_t *softc = fin->fin_main_soft;
 	ipf_nat_softc_t *softn = softc->ipf_nat_soft;
 	u_32_t sum1, sum2, sumd;
 	frentry_t *fr;
 	u_32_t flags;
 #if SOLARIS && defined(_KERNEL) && (SOLARIS2 >= 6) && defined(ICK_M_CTL_MAGIC)
 	qpktinfo_t *qpi = fin->fin_qpi;
 #endif
 
 	flags = nat->nat_flags;
 
 	switch (nat->nat_pr[0])
 	{
 	case IPPROTO_ICMP :
 		sum1 = LONG_SUM(ntohs(nat->nat_oicmpid));
 		sum2 = LONG_SUM(ntohs(nat->nat_nicmpid));
 		CALC_SUMD(sum1, sum2, sumd);
 		nat->nat_sumd[0] = (sumd & 0xffff) + (sumd >> 16);
 
 		break;
 
 	default :
 		sum1 = LONG_SUM(ntohl(nat->nat_osrcaddr) + \
 				ntohs(nat->nat_osport));
 		sum2 = LONG_SUM(ntohl(nat->nat_nsrcaddr) + \
 				ntohs(nat->nat_nsport));
 		CALC_SUMD(sum1, sum2, sumd);
 		nat->nat_sumd[0] = (sumd & 0xffff) + (sumd >> 16);
 
 		sum1 = LONG_SUM(ntohl(nat->nat_odstaddr) + \
 				ntohs(nat->nat_odport));
 		sum2 = LONG_SUM(ntohl(nat->nat_ndstaddr) + \
 				ntohs(nat->nat_ndport));
 		CALC_SUMD(sum1, sum2, sumd);
 		nat->nat_sumd[0] += (sumd & 0xffff) + (sumd >> 16);
 		break;
 	}
 
 	/*
 	 * Compute the partial checksum, just in case.
 	 * This is only ever placed into outbound packets so care needs
 	 * to be taken over which pair of addresses are used.
 	 */
 	if (nat->nat_dir == NAT_OUTBOUND) {
 		sum1 = LONG_SUM(ntohl(nat->nat_nsrcaddr));
 		sum1 += LONG_SUM(ntohl(nat->nat_ndstaddr));
 	} else {
 		sum1 = LONG_SUM(ntohl(nat->nat_osrcaddr));
 		sum1 += LONG_SUM(ntohl(nat->nat_odstaddr));
 	}
 	sum1 += nat->nat_pr[1];
 	nat->nat_sumd[1] = (sum1 & 0xffff) + (sum1 >> 16);
 
 	sum1 = LONG_SUM(ntohl(nat->nat_osrcaddr));
 	sum2 = LONG_SUM(ntohl(nat->nat_nsrcaddr));
 	CALC_SUMD(sum1, sum2, sumd);
 	nat->nat_ipsumd = (sumd & 0xffff) + (sumd >> 16);
 
 	sum1 = LONG_SUM(ntohl(nat->nat_odstaddr));
 	sum2 = LONG_SUM(ntohl(nat->nat_ndstaddr));
 	CALC_SUMD(sum1, sum2, sumd);
 	nat->nat_ipsumd += (sumd & 0xffff) + (sumd >> 16);
 
 	nat->nat_v[0] = 4;
 	nat->nat_v[1] = 4;
 
 	if ((nat->nat_ifps[0] != NULL) && (nat->nat_ifps[0] != (void *)-1)) {
 		nat->nat_mtu[0] = GETIFMTU_4(nat->nat_ifps[0]);
 	}
 
 	if ((nat->nat_ifps[1] != NULL) && (nat->nat_ifps[1] != (void *)-1)) {
 		nat->nat_mtu[1] = GETIFMTU_4(nat->nat_ifps[1]);
 	}
 
 	if ((nat->nat_flags & SI_CLONE) == 0)
 		nat->nat_sync = ipf_sync_new(softc, SMC_NAT, fin, nat);
 
 	if (ipf_nat_insert(softc, softn, nat) == 0) {
 		if (softn->ipf_nat_logging)
 			ipf_nat_log(softc, softn, nat, NL_NEW);
 		fr = nat->nat_fr;
 		if (fr != NULL) {
 			MUTEX_ENTER(&fr->fr_lock);
 			fr->fr_ref++;
 			MUTEX_EXIT(&fr->fr_lock);
 		}
 		return 0;
 	}
 
 	NBUMPSIDED(fin->fin_out, ns_unfinalised);
 	/*
 	 * nat_insert failed, so cleanup time...
 	 */
 	if (nat->nat_sync != NULL)
 		ipf_sync_del_nat(softc->ipf_sync_soft, nat->nat_sync);
 	return -1;
 }
 
 
 /* ------------------------------------------------------------------------ */
 /* Function:    ipf_nat_insert                                              */
 /* Returns:     int - 0 == sucess, -1 == failure                            */
 /* Parameters:  softc(I) - pointer to soft context main structure           */
 /*              softn(I) - pointer to NAT context structure                 */
 /*              nat(I) - pointer to NAT structure                           */
 /* Write Lock:  ipf_nat                                                     */
 /*                                                                          */
 /* Insert a NAT entry into the hash tables for searching and add it to the  */
 /* list of active NAT entries.  Adjust global counters when complete.       */
 /* ------------------------------------------------------------------------ */
 int
 ipf_nat_insert(softc, softn, nat)
 	ipf_main_softc_t *softc;
 	ipf_nat_softc_t *softn;
 	nat_t *nat;
 {
 	u_int hv0, hv1;
 	u_int sp, dp;
 	ipnat_t *in;
 
 	/*
 	 * Try and return an error as early as possible, so calculate the hash
 	 * entry numbers first and then proceed.
 	 */
 	if ((nat->nat_flags & (SI_W_SPORT|SI_W_DPORT)) == 0) {
 		if ((nat->nat_flags & IPN_TCPUDP) != 0) {
 			sp = nat->nat_osport;
 			dp = nat->nat_odport;
 		} else if ((nat->nat_flags & IPN_ICMPQUERY) != 0) {
 			sp = 0;
 			dp = nat->nat_oicmpid;
 		} else {
 			sp = 0;
 			dp = 0;
 		}
 		hv0 = NAT_HASH_FN(nat->nat_osrcaddr, sp, 0xffffffff);
 		hv0 = NAT_HASH_FN(nat->nat_odstaddr, hv0 + dp, 0xffffffff);
 		/*
 		 * TRACE nat_osrcaddr, nat_osport, nat_odstaddr,
 		 * nat_odport, hv0
 		 */
 
 		if ((nat->nat_flags & IPN_TCPUDP) != 0) {
 			sp = nat->nat_nsport;
 			dp = nat->nat_ndport;
 		} else if ((nat->nat_flags & IPN_ICMPQUERY) != 0) {
 			sp = 0;
 			dp = nat->nat_nicmpid;
 		} else {
 			sp = 0;
 			dp = 0;
 		}
 		hv1 = NAT_HASH_FN(nat->nat_nsrcaddr, sp, 0xffffffff);
 		hv1 = NAT_HASH_FN(nat->nat_ndstaddr, hv1 + dp, 0xffffffff);
 		/*
 		 * TRACE nat_nsrcaddr, nat_nsport, nat_ndstaddr,
 		 * nat_ndport, hv1
 		 */
 	} else {
 		hv0 = NAT_HASH_FN(nat->nat_osrcaddr, 0, 0xffffffff);
 		hv0 = NAT_HASH_FN(nat->nat_odstaddr, hv0, 0xffffffff);
 		/* TRACE nat_osrcaddr, nat_odstaddr, hv0 */
 
 		hv1 = NAT_HASH_FN(nat->nat_nsrcaddr, 0, 0xffffffff);
 		hv1 = NAT_HASH_FN(nat->nat_ndstaddr, hv1, 0xffffffff);
 		/* TRACE nat_nsrcaddr, nat_ndstaddr, hv1 */
 	}
 
 	nat->nat_hv[0] = hv0;
 	nat->nat_hv[1] = hv1;
 
 	MUTEX_INIT(&nat->nat_lock, "nat entry lock");
 
 	in = nat->nat_ptr;
 	nat->nat_ref = nat->nat_me ? 2 : 1;
 
 	nat->nat_ifnames[0][LIFNAMSIZ - 1] = '\0';
 	nat->nat_ifps[0] = ipf_resolvenic(softc, nat->nat_ifnames[0], 4);
 
 	if (nat->nat_ifnames[1][0] != '\0') {
 		nat->nat_ifnames[1][LIFNAMSIZ - 1] = '\0';
 		nat->nat_ifps[1] = ipf_resolvenic(softc,
 						  nat->nat_ifnames[1], 4);
 	} else if (in->in_ifnames[1] != -1) {
 		char *name;
 
 		name = in->in_names + in->in_ifnames[1];
 		if (name[1] != '\0' && name[0] != '-' && name[0] != '*') {
 			(void) strncpy(nat->nat_ifnames[1],
 				       nat->nat_ifnames[0], LIFNAMSIZ);
 			nat->nat_ifnames[1][LIFNAMSIZ - 1] = '\0';
 			nat->nat_ifps[1] = nat->nat_ifps[0];
 		}
 	}
 	if ((nat->nat_ifps[0] != NULL) && (nat->nat_ifps[0] != (void *)-1)) {
 		nat->nat_mtu[0] = GETIFMTU_4(nat->nat_ifps[0]);
 	}
 	if ((nat->nat_ifps[1] != NULL) && (nat->nat_ifps[1] != (void *)-1)) {
 		nat->nat_mtu[1] = GETIFMTU_4(nat->nat_ifps[1]);
 	}
 
 	return ipf_nat_hashtab_add(softc, softn, nat);
 }
 
 
 /* ------------------------------------------------------------------------ */
 /* Function:    ipf_nat_hashtab_add                                         */
 /* Parameters:  softc(I) - pointer to soft context main structure           */
 /*              softn(I) - pointer to NAT context structure                 */
 /*              nat(I) - pointer to NAT structure                           */
 /*                                                                          */
 /* Handle the insertion of a NAT entry into the table/list.                 */
 /* ------------------------------------------------------------------------ */
 int
 ipf_nat_hashtab_add(softc, softn, nat)
 	ipf_main_softc_t *softc;
 	ipf_nat_softc_t *softn;
 	nat_t *nat;
 {
 	nat_t **natp;
 	u_int hv0;
 	u_int hv1;
 
 	hv0 = nat->nat_hv[0] % softn->ipf_nat_table_sz;
 	hv1 = nat->nat_hv[1] % softn->ipf_nat_table_sz;
 
 	if (nat->nat_dir == NAT_INBOUND || nat->nat_dir == NAT_DIVERTIN) {
 		u_int swap;
 
 		swap = hv0;
 		hv0 = hv1;
 		hv1 = swap;
 	}
 
 	if (softn->ipf_nat_stats.ns_side[0].ns_bucketlen[hv0] >=
 	    softn->ipf_nat_maxbucket) {
 		DT1(ns_bucket_max_0, int,
 		    softn->ipf_nat_stats.ns_side[0].ns_bucketlen[hv0]);
 		NBUMPSIDE(0, ns_bucket_max);
 		return -1;
 	}
 
 	if (softn->ipf_nat_stats.ns_side[1].ns_bucketlen[hv1] >=
 	    softn->ipf_nat_maxbucket) {
 		DT1(ns_bucket_max_1, int,
 		    softn->ipf_nat_stats.ns_side[1].ns_bucketlen[hv1]);
 		NBUMPSIDE(1, ns_bucket_max);
 		return -1;
 	}
 
 	/*
 	 * The ordering of operations in the list and hash table insertion
 	 * is very important.  The last operation for each task should be
 	 * to update the top of the list, after all the "nexts" have been
 	 * done so that walking the list while it is being done does not
 	 * find strange pointers.
 	 *
 	 * Global list of NAT instances
 	 */
 	nat->nat_next = softn->ipf_nat_instances;
 	nat->nat_pnext = &softn->ipf_nat_instances;
 	if (softn->ipf_nat_instances)
 		softn->ipf_nat_instances->nat_pnext = &nat->nat_next;
 	softn->ipf_nat_instances = nat;
 
 	/*
 	 * Inbound hash table.
 	 */
 	natp = &softn->ipf_nat_table[0][hv0];
 	nat->nat_phnext[0] = natp;
 	nat->nat_hnext[0] = *natp;
 	if (*natp) {
 		(*natp)->nat_phnext[0] = &nat->nat_hnext[0];
 	} else {
 		NBUMPSIDE(0, ns_inuse);
 	}
 	*natp = nat;
 	NBUMPSIDE(0, ns_bucketlen[hv0]);
 
 	/*
 	 * Outbound hash table.
 	 */
 	natp = &softn->ipf_nat_table[1][hv1];
 	nat->nat_phnext[1] = natp;
 	nat->nat_hnext[1] = *natp;
 	if (*natp)
 		(*natp)->nat_phnext[1] = &nat->nat_hnext[1];
 	else {
 		NBUMPSIDE(1, ns_inuse);
 	}
 	*natp = nat;
 	NBUMPSIDE(1, ns_bucketlen[hv1]);
 
 	ipf_nat_setqueue(softc, softn, nat);
 
 	if (nat->nat_dir & NAT_OUTBOUND) {
 		NBUMPSIDE(1, ns_added);
 	} else {
 		NBUMPSIDE(0, ns_added);
 	}
 	softn->ipf_nat_stats.ns_active++;
 	return 0;
 }
 
 
 /* ------------------------------------------------------------------------ */
 /* Function:    ipf_nat_icmperrorlookup                                     */
 /* Returns:     nat_t* - point to matching NAT structure                    */
 /* Parameters:  fin(I) - pointer to packet information                      */
 /*              dir(I) - direction of packet (in/out)                       */
 /*                                                                          */
 /* Check if the ICMP error message is related to an existing TCP, UDP or    */
 /* ICMP query nat entry.  It is assumed that the packet is already of the   */
 /* the required length.                                                     */
 /* ------------------------------------------------------------------------ */
 nat_t *
 ipf_nat_icmperrorlookup(fin, dir)
 	fr_info_t *fin;
 	int dir;
 {
 	ipf_main_softc_t *softc = fin->fin_main_soft;
 	ipf_nat_softc_t *softn = softc->ipf_nat_soft;
 	int flags = 0, type, minlen;
 	icmphdr_t *icmp, *orgicmp;
 	nat_stat_side_t *nside;
 	tcphdr_t *tcp = NULL;
 	u_short data[2];
 	nat_t *nat;
 	ip_t *oip;
 	u_int p;
 
 	icmp = fin->fin_dp;
 	type = icmp->icmp_type;
 	nside = &softn->ipf_nat_stats.ns_side[fin->fin_out];
 	/*
 	 * Does it at least have the return (basic) IP header ?
 	 * Only a basic IP header (no options) should be with an ICMP error
 	 * header.  Also, if it's not an error type, then return.
 	 */
 	if ((fin->fin_hlen != sizeof(ip_t)) || !(fin->fin_flx & FI_ICMPERR)) {
 		ATOMIC_INCL(nside->ns_icmp_basic);
 		return NULL;
 	}
 
 	/*
 	 * Check packet size
 	 */
 	oip = (ip_t *)((char *)fin->fin_dp + 8);
 	minlen = IP_HL(oip) << 2;
 	if ((minlen < sizeof(ip_t)) ||
 	    (fin->fin_plen < ICMPERR_IPICMPHLEN + minlen)) {
 		ATOMIC_INCL(nside->ns_icmp_size);
 		return NULL;
 	}
 
 	/*
 	 * Is the buffer big enough for all of it ?  It's the size of the IP
 	 * header claimed in the encapsulated part which is of concern.  It
 	 * may be too big to be in this buffer but not so big that it's
 	 * outside the ICMP packet, leading to TCP deref's causing problems.
 	 * This is possible because we don't know how big oip_hl is when we
 	 * do the pullup early in ipf_check() and thus can't gaurantee it is
 	 * all here now.
 	 */
 #ifdef  ipf_nat_KERNEL
 	{
 	mb_t *m;
 
 	m = fin->fin_m;
 # if defined(MENTAT)
 	if ((char *)oip + fin->fin_dlen - ICMPERR_ICMPHLEN >
 	    (char *)m->b_wptr) {
 		ATOMIC_INCL(nside->ns_icmp_mbuf);
 		return NULL;
 	}
 # else
 	if ((char *)oip + fin->fin_dlen - ICMPERR_ICMPHLEN >
 	    (char *)fin->fin_ip + M_LEN(m)) {
 		ATOMIC_INCL(nside->ns_icmp_mbuf);
 		return NULL;
 	}
 # endif
 	}
 #endif
 
 	if (fin->fin_daddr != oip->ip_src.s_addr) {
 		ATOMIC_INCL(nside->ns_icmp_address);
 		return NULL;
 	}
 
 	p = oip->ip_p;
 	if (p == IPPROTO_TCP)
 		flags = IPN_TCP;
 	else if (p == IPPROTO_UDP)
 		flags = IPN_UDP;
 	else if (p == IPPROTO_ICMP) {
 		orgicmp = (icmphdr_t *)((char *)oip + (IP_HL(oip) << 2));
 
 		/* see if this is related to an ICMP query */
 		if (ipf_nat_icmpquerytype(orgicmp->icmp_type)) {
 			data[0] = fin->fin_data[0];
 			data[1] = fin->fin_data[1];
 			fin->fin_data[0] = 0;
 			fin->fin_data[1] = orgicmp->icmp_id;
 
 			flags = IPN_ICMPERR|IPN_ICMPQUERY;
 			/*
 			 * NOTE : dir refers to the direction of the original
 			 *        ip packet. By definition the icmp error
 			 *        message flows in the opposite direction.
 			 */
 			if (dir == NAT_INBOUND)
 				nat = ipf_nat_inlookup(fin, flags, p,
 						       oip->ip_dst,
 						       oip->ip_src);
 			else
 				nat = ipf_nat_outlookup(fin, flags, p,
 							oip->ip_dst,
 							oip->ip_src);
 			fin->fin_data[0] = data[0];
 			fin->fin_data[1] = data[1];
 			return nat;
 		}
 	}
 
 	if (flags & IPN_TCPUDP) {
 		minlen += 8;		/* + 64bits of data to get ports */
 		/* TRACE (fin,minlen) */
 		if (fin->fin_plen < ICMPERR_IPICMPHLEN + minlen) {
 			ATOMIC_INCL(nside->ns_icmp_short);
 			return NULL;
 		}
 
 		data[0] = fin->fin_data[0];
 		data[1] = fin->fin_data[1];
 		tcp = (tcphdr_t *)((char *)oip + (IP_HL(oip) << 2));
 		fin->fin_data[0] = ntohs(tcp->th_dport);
 		fin->fin_data[1] = ntohs(tcp->th_sport);
 
 		if (dir == NAT_INBOUND) {
 			nat = ipf_nat_inlookup(fin, flags, p, oip->ip_dst,
 					       oip->ip_src);
 		} else {
 			nat = ipf_nat_outlookup(fin, flags, p, oip->ip_dst,
 					    oip->ip_src);
 		}
 		fin->fin_data[0] = data[0];
 		fin->fin_data[1] = data[1];
 		return nat;
 	}
 	if (dir == NAT_INBOUND)
 		nat = ipf_nat_inlookup(fin, 0, p, oip->ip_dst, oip->ip_src);
 	else
 		nat = ipf_nat_outlookup(fin, 0, p, oip->ip_dst, oip->ip_src);
 
 	return nat;
 }
 
 
 /* ------------------------------------------------------------------------ */
 /* Function:    ipf_nat_icmperror                                           */
 /* Returns:     nat_t* - point to matching NAT structure                    */
 /* Parameters:  fin(I)    - pointer to packet information                   */
 /*              nflags(I) - NAT flags for this packet                       */
 /*              dir(I)    - direction of packet (in/out)                    */
 /*                                                                          */
 /* Fix up an ICMP packet which is an error message for an existing NAT      */
 /* session.  This will correct both packet header data and checksums.       */
 /*                                                                          */
 /* This should *ONLY* be used for incoming ICMP error packets to make sure  */
 /* a NAT'd ICMP packet gets correctly recognised.                           */
 /* ------------------------------------------------------------------------ */
 nat_t *
 ipf_nat_icmperror(fin, nflags, dir)
 	fr_info_t *fin;
 	u_int *nflags;
 	int dir;
 {
 	ipf_main_softc_t *softc = fin->fin_main_soft;
 	ipf_nat_softc_t *softn = softc->ipf_nat_soft;
 	u_32_t sum1, sum2, sumd, sumd2;
 	struct in_addr a1, a2, a3, a4;
 	int flags, dlen, odst;
 	icmphdr_t *icmp;
 	u_short *csump;
 	tcphdr_t *tcp;
 	nat_t *nat;
 	ip_t *oip;
 	void *dp;
 
 	if ((fin->fin_flx & (FI_SHORT|FI_FRAGBODY))) {
 		NBUMPSIDED(fin->fin_out, ns_icmp_short);
 		return NULL;
 	}
 
 	/*
 	 * ipf_nat_icmperrorlookup() will return NULL for `defective' packets.
 	 */
 	if ((fin->fin_v != 4) || !(nat = ipf_nat_icmperrorlookup(fin, dir))) {
 		NBUMPSIDED(fin->fin_out, ns_icmp_notfound);
 		return NULL;
 	}
 
 	tcp = NULL;
 	csump = NULL;
 	flags = 0;
 	sumd2 = 0;
 	*nflags = IPN_ICMPERR;
 	icmp = fin->fin_dp;
 	oip = (ip_t *)&icmp->icmp_ip;
 	dp = (((char *)oip) + (IP_HL(oip) << 2));
 	if (oip->ip_p == IPPROTO_TCP) {
 		tcp = (tcphdr_t *)dp;
 		csump = (u_short *)&tcp->th_sum;
 		flags = IPN_TCP;
 	} else if (oip->ip_p == IPPROTO_UDP) {
 		udphdr_t *udp;
 
 		udp = (udphdr_t *)dp;
 		tcp = (tcphdr_t *)dp;
 		csump = (u_short *)&udp->uh_sum;
 		flags = IPN_UDP;
 	} else if (oip->ip_p == IPPROTO_ICMP)
 		flags = IPN_ICMPQUERY;
 	dlen = fin->fin_plen - ((char *)dp - (char *)fin->fin_ip);
 
 	/*
 	 * Need to adjust ICMP header to include the real IP#'s and
 	 * port #'s.  Only apply a checksum change relative to the
 	 * IP address change as it will be modified again in ipf_nat_checkout
 	 * for both address and port.  Two checksum changes are
 	 * necessary for the two header address changes.  Be careful
 	 * to only modify the checksum once for the port # and twice
 	 * for the IP#.
 	 */
 
 	/*
 	 * Step 1
 	 * Fix the IP addresses in the offending IP packet. You also need
 	 * to adjust the IP header checksum of that offending IP packet.
 	 *
 	 * Normally, you would expect that the ICMP checksum of the
 	 * ICMP error message needs to be adjusted as well for the
 	 * IP address change in oip.
 	 * However, this is a NOP, because the ICMP checksum is
 	 * calculated over the complete ICMP packet, which includes the
 	 * changed oip IP addresses and oip->ip_sum. However, these
 	 * two changes cancel each other out (if the delta for
 	 * the IP address is x, then the delta for ip_sum is minus x),
 	 * so no change in the icmp_cksum is necessary.
 	 *
 	 * Inbound ICMP
 	 * ------------
 	 * MAP rule, SRC=a,DST=b -> SRC=c,DST=b
 	 * - response to outgoing packet (a,b)=>(c,b) (OIP_SRC=c,OIP_DST=b)
 	 * - OIP_SRC(c)=nat_newsrcip,          OIP_DST(b)=nat_newdstip
 	 *=> OIP_SRC(c)=nat_oldsrcip,          OIP_DST(b)=nat_olddstip
 	 *
 	 * RDR rule, SRC=a,DST=b -> SRC=a,DST=c
 	 * - response to outgoing packet (c,a)=>(b,a) (OIP_SRC=b,OIP_DST=a)
 	 * - OIP_SRC(b)=nat_olddstip,          OIP_DST(a)=nat_oldsrcip
 	 *=> OIP_SRC(b)=nat_newdstip,          OIP_DST(a)=nat_newsrcip
 	 *
 	 * REWRITE out rule, SRC=a,DST=b -> SRC=c,DST=d
 	 * - response to outgoing packet (a,b)=>(c,d) (OIP_SRC=c,OIP_DST=d)
 	 * - OIP_SRC(c)=nat_newsrcip,          OIP_DST(d)=nat_newdstip
 	 *=> OIP_SRC(c)=nat_oldsrcip,          OIP_DST(d)=nat_olddstip
 	 *
 	 * REWRITE in rule, SRC=a,DST=b -> SRC=c,DST=d
 	 * - response to outgoing packet (d,c)=>(b,a) (OIP_SRC=b,OIP_DST=a)
 	 * - OIP_SRC(b)=nat_olddstip,          OIP_DST(a)=nat_oldsrcip
 	 *=> OIP_SRC(b)=nat_newdstip,          OIP_DST(a)=nat_newsrcip
 	 *
 	 * Outbound ICMP
 	 * -------------
 	 * MAP rule, SRC=a,DST=b -> SRC=c,DST=b
 	 * - response to incoming packet (b,c)=>(b,a) (OIP_SRC=b,OIP_DST=a)
 	 * - OIP_SRC(b)=nat_olddstip,          OIP_DST(a)=nat_oldsrcip
 	 *=> OIP_SRC(b)=nat_newdstip,          OIP_DST(a)=nat_newsrcip
 	 *
 	 * RDR rule, SRC=a,DST=b -> SRC=a,DST=c
 	 * - response to incoming packet (a,b)=>(a,c) (OIP_SRC=a,OIP_DST=c)
 	 * - OIP_SRC(a)=nat_newsrcip,          OIP_DST(c)=nat_newdstip
 	 *=> OIP_SRC(a)=nat_oldsrcip,          OIP_DST(c)=nat_olddstip
 	 *
 	 * REWRITE out rule, SRC=a,DST=b -> SRC=c,DST=d
 	 * - response to incoming packet (d,c)=>(b,a) (OIP_SRC=c,OIP_DST=d)
 	 * - OIP_SRC(c)=nat_olddstip,          OIP_DST(d)=nat_oldsrcip
 	 *=> OIP_SRC(b)=nat_newdstip,          OIP_DST(a)=nat_newsrcip
 	 *
 	 * REWRITE in rule, SRC=a,DST=b -> SRC=c,DST=d
 	 * - response to incoming packet (a,b)=>(c,d) (OIP_SRC=b,OIP_DST=a)
 	 * - OIP_SRC(b)=nat_newsrcip,          OIP_DST(a)=nat_newdstip
 	 *=> OIP_SRC(a)=nat_oldsrcip,          OIP_DST(c)=nat_olddstip
 	 */
 
 	if (((fin->fin_out == 0) && ((nat->nat_redir & NAT_MAP) != 0)) ||
 	    ((fin->fin_out == 1) && ((nat->nat_redir & NAT_REDIRECT) != 0))) {
 		a1.s_addr = ntohl(nat->nat_osrcaddr);
 		a4.s_addr = ntohl(oip->ip_src.s_addr);
 		a3.s_addr = ntohl(nat->nat_odstaddr);
 		a2.s_addr = ntohl(oip->ip_dst.s_addr);
 		oip->ip_src.s_addr = htonl(a1.s_addr);
 		oip->ip_dst.s_addr = htonl(a3.s_addr);
 		odst = 1;
 	} else {
 		a1.s_addr = ntohl(nat->nat_ndstaddr);
 		a2.s_addr = ntohl(oip->ip_dst.s_addr);
 		a3.s_addr = ntohl(nat->nat_nsrcaddr);
 		a4.s_addr = ntohl(oip->ip_src.s_addr);
 		oip->ip_dst.s_addr = htonl(a3.s_addr);
 		oip->ip_src.s_addr = htonl(a1.s_addr);
 		odst = 0;
 	}
 	sum1 = 0;
 	sum2 = 0;
 	sumd = 0;
 	CALC_SUMD(a2.s_addr, a3.s_addr, sum1);
 	CALC_SUMD(a4.s_addr, a1.s_addr, sum2);
 	sumd = sum2 + sum1;
 	if (sumd != 0)
 		ipf_fix_datacksum(&oip->ip_sum, sumd);
 
 	sumd2 = sumd;
 	sum1 = 0;
 	sum2 = 0;
 
 	/*
 	 * Fix UDP pseudo header checksum to compensate for the
 	 * IP address change.
 	 */
 	if (((flags & IPN_TCPUDP) != 0) && (dlen >= 4)) {
 		u_32_t sum3, sum4, sumt;
 
 		/*
 		 * Step 2 :
 		 * For offending TCP/UDP IP packets, translate the ports as
 		 * well, based on the NAT specification. Of course such
 		 * a change may be reflected in the ICMP checksum as well.
 		 *
 		 * Since the port fields are part of the TCP/UDP checksum
 		 * of the offending IP packet, you need to adjust that checksum
 		 * as well... except that the change in the port numbers should
 		 * be offset by the checksum change.  However, the TCP/UDP
 		 * checksum will also need to change if there has been an
 		 * IP address change.
 		 */
 		if (odst == 1) {
 			sum1 = ntohs(nat->nat_osport);
 			sum4 = ntohs(tcp->th_sport);
 			sum3 = ntohs(nat->nat_odport);
 			sum2 = ntohs(tcp->th_dport);
 
 			tcp->th_sport = htons(sum1);
 			tcp->th_dport = htons(sum3);
 		} else {
 			sum1 = ntohs(nat->nat_ndport);
 			sum2 = ntohs(tcp->th_dport);
 			sum3 = ntohs(nat->nat_nsport);
 			sum4 = ntohs(tcp->th_sport);
 
 			tcp->th_dport = htons(sum3);
 			tcp->th_sport = htons(sum1);
 		}
 		CALC_SUMD(sum4, sum1, sumt);
 		sumd += sumt;
 		CALC_SUMD(sum2, sum3, sumt);
 		sumd += sumt;
 
 		if (sumd != 0 || sumd2 != 0) {
 			/*
 			 * At this point, sumd is the delta to apply to the
 			 * TCP/UDP header, given the changes in both the IP
 			 * address and the ports and sumd2 is the delta to
 			 * apply to the ICMP header, given the IP address
 			 * change delta that may need to be applied to the
 			 * TCP/UDP checksum instead.
 			 *
 			 * If we will both the IP and TCP/UDP checksums
 			 * then the ICMP checksum changes by the address
 			 * delta applied to the TCP/UDP checksum.  If we
 			 * do not change the TCP/UDP checksum them we
 			 * apply the delta in ports to the ICMP checksum.
 			 */
 			if (oip->ip_p == IPPROTO_UDP) {
 				if ((dlen >= 8) && (*csump != 0)) {
 					ipf_fix_datacksum(csump, sumd);
 				} else {
 					CALC_SUMD(sum1, sum4, sumd2);
 					CALC_SUMD(sum3, sum2, sumt);
 					sumd2 += sumt;
 				}
 			} else if (oip->ip_p == IPPROTO_TCP) {
 				if (dlen >= 18) {
 					ipf_fix_datacksum(csump, sumd);
 				} else {
 					CALC_SUMD(sum1, sum4, sumd2);
 					CALC_SUMD(sum3, sum2, sumt);
 					sumd2 += sumt;
 				}
 			}
 			if (sumd2 != 0) {
 				sumd2 = (sumd2 & 0xffff) + (sumd2 >> 16);
 				sumd2 = (sumd2 & 0xffff) + (sumd2 >> 16);
 				sumd2 = (sumd2 & 0xffff) + (sumd2 >> 16);
 				ipf_fix_incksum(0, &icmp->icmp_cksum, sumd2, 0);
 			}
 		}
 	} else if (((flags & IPN_ICMPQUERY) != 0) && (dlen >= 8)) {
 		icmphdr_t *orgicmp;
 
 		/*
 		 * XXX - what if this is bogus hl and we go off the end ?
 		 * In this case, ipf_nat_icmperrorlookup() will have
 		 * returned NULL.
 		 */
 		orgicmp = (icmphdr_t *)dp;
 
 		if (odst == 1) {
 			if (orgicmp->icmp_id != nat->nat_osport) {
 
 				/*
 				 * Fix ICMP checksum (of the offening ICMP
 				 * query packet) to compensate the change
 				 * in the ICMP id of the offending ICMP
 				 * packet.
 				 *
 				 * Since you modify orgicmp->icmp_id with
 				 * a delta (say x) and you compensate that
 				 * in origicmp->icmp_cksum with a delta
 				 * minus x, you don't have to adjust the
 				 * overall icmp->icmp_cksum
 				 */
 				sum1 = ntohs(orgicmp->icmp_id);
 				sum2 = ntohs(nat->nat_oicmpid);
 				CALC_SUMD(sum1, sum2, sumd);
 				orgicmp->icmp_id = nat->nat_oicmpid;
 				ipf_fix_datacksum(&orgicmp->icmp_cksum, sumd);
 			}
 		} /* nat_dir == NAT_INBOUND is impossible for icmp queries */
 	}
 	return nat;
 }
 
 
 /*
  *       MAP-IN    MAP-OUT   RDR-IN   RDR-OUT
  * osrc    X       == src    == src      X
  * odst    X       == dst    == dst      X
  * nsrc  == dst      X         X      == dst
  * ndst  == src      X         X      == src
  * MAP = NAT_OUTBOUND, RDR = NAT_INBOUND
  */
 /*
  * NB: these lookups don't lock access to the list, it assumed that it has
  * already been done!
  */
 /* ------------------------------------------------------------------------ */
 /* Function:    ipf_nat_inlookup                                            */
 /* Returns:     nat_t* - NULL == no match,                                  */
 /*                       else pointer to matching NAT entry                 */
 /* Parameters:  fin(I)    - pointer to packet information                   */
 /*              flags(I)  - NAT flags for this packet                       */
 /*              p(I)      - protocol for this packet                        */
 /*              src(I)    - source IP address                               */
 /*              mapdst(I) - destination IP address                          */
 /*                                                                          */
 /* Lookup a nat entry based on the mapped destination ip address/port and   */
 /* real source address/port.  We use this lookup when receiving a packet,   */
 /* we're looking for a table entry, based on the destination address.       */
 /*                                                                          */
 /* NOTE: THE PACKET BEING CHECKED (IF FOUND) HAS A MAPPING ALREADY.         */
 /*                                                                          */
 /* NOTE: IT IS ASSUMED THAT  IS ONLY HELD WITH A READ LOCK WHEN             */
 /*       THIS FUNCTION IS CALLED WITH NAT_SEARCH SET IN nflags.             */
 /*                                                                          */
 /* flags   -> relevant are IPN_UDP/IPN_TCP/IPN_ICMPQUERY that indicate if   */
 /*            the packet is of said protocol                                */
 /* ------------------------------------------------------------------------ */
 nat_t *
 ipf_nat_inlookup(fin, flags, p, src, mapdst)
 	fr_info_t *fin;
 	u_int flags, p;
 	struct in_addr src , mapdst;
 {
 	ipf_main_softc_t *softc = fin->fin_main_soft;
 	ipf_nat_softc_t *softn = softc->ipf_nat_soft;
 	u_short sport, dport;
 	grehdr_t *gre;
 	ipnat_t *ipn;
 	u_int sflags;
 	nat_t *nat;
 	int nflags;
 	u_32_t dst;
 	void *ifp;
 	u_int hv, rhv;
 
 	ifp = fin->fin_ifp;
 	gre = NULL;
 	dst = mapdst.s_addr;
 	sflags = flags & NAT_TCPUDPICMP;
 
 	switch (p)
 	{
 	case IPPROTO_TCP :
 	case IPPROTO_UDP :
 		sport = htons(fin->fin_data[0]);
 		dport = htons(fin->fin_data[1]);
 		break;
 	case IPPROTO_ICMP :
 		if (flags & IPN_ICMPERR) {
 			sport = fin->fin_data[1];
 			dport = 0;
 		} else {
 			dport = fin->fin_data[1];
 			sport = 0;
 		}
 		break;
 	default :
 		sport = 0;
 		dport = 0;
 		break;
 	}
 
 
 	if ((flags & SI_WILDP) != 0)
 		goto find_in_wild_ports;
 
 	rhv = NAT_HASH_FN(dst, dport, 0xffffffff);
 	rhv = NAT_HASH_FN(src.s_addr, rhv + sport, 0xffffffff);
 	hv = rhv % softn->ipf_nat_table_sz;
 	nat = softn->ipf_nat_table[1][hv];
 	/* TRACE dst, dport, src, sport, hv, nat */
 
 	for (; nat; nat = nat->nat_hnext[1]) {
 		if (nat->nat_ifps[0] != NULL) {
 			if ((ifp != NULL) && (ifp != nat->nat_ifps[0]))
 				continue;
 		}
 
 		if (nat->nat_pr[0] != p)
 			continue;
 
 		switch (nat->nat_dir)
 		{
 		case NAT_INBOUND :
 		case NAT_DIVERTIN :
 			if (nat->nat_v[0] != 4)
 				continue;
 			if (nat->nat_osrcaddr != src.s_addr ||
 			    nat->nat_odstaddr != dst)
 				continue;
 			if ((nat->nat_flags & IPN_TCPUDP) != 0) {
 				if (nat->nat_osport != sport)
 					continue;
 				if (nat->nat_odport != dport)
 					continue;
 
 			} else if (p == IPPROTO_ICMP) {
 				if (nat->nat_osport != dport) {
 					continue;
 				}
 			}
 			break;
 		case NAT_DIVERTOUT :
 			if (nat->nat_dlocal)
 				continue;
 		case NAT_OUTBOUND :
 			if (nat->nat_v[1] != 4)
 				continue;
 			if (nat->nat_dlocal)
 				continue;
 			if (nat->nat_dlocal)
 				continue;
 			if (nat->nat_ndstaddr != src.s_addr ||
 			    nat->nat_nsrcaddr != dst)
 				continue;
 			if ((nat->nat_flags & IPN_TCPUDP) != 0) {
 				if (nat->nat_ndport != sport)
 					continue;
 				if (nat->nat_nsport != dport)
 					continue;
 
 			} else if (p == IPPROTO_ICMP) {
 				if (nat->nat_osport != dport) {
 					continue;
 				}
 			}
 			break;
 		}
 
 
 		if ((nat->nat_flags & IPN_TCPUDP) != 0) {
 			ipn = nat->nat_ptr;
 			if ((ipn != NULL) && (nat->nat_aps != NULL))
 				if (ipf_proxy_match(fin, nat) != 0)
 					continue;
 		}
 		if ((nat->nat_ifps[0] == NULL) && (ifp != NULL)) {
 			nat->nat_ifps[0] = ifp;
 			nat->nat_mtu[0] = GETIFMTU_4(ifp);
 		}
 		return nat;
 	}
 
 	/*
 	 * So if we didn't find it but there are wildcard members in the hash
 	 * table, go back and look for them.  We do this search and update here
 	 * because it is modifying the NAT table and we want to do this only
 	 * for the first packet that matches.  The exception, of course, is
 	 * for "dummy" (FI_IGNORE) lookups.
 	 */
 find_in_wild_ports:
 	if (!(flags & NAT_TCPUDP) || !(flags & NAT_SEARCH)) {
 		NBUMPSIDEX(0, ns_lookup_miss, ns_lookup_miss_0);
 		return NULL;
 	}
 	if (softn->ipf_nat_stats.ns_wilds == 0 || (fin->fin_flx & FI_NOWILD)) {
 		NBUMPSIDEX(0, ns_lookup_nowild, ns_lookup_nowild_0);
 		return NULL;
 	}
 
 	RWLOCK_EXIT(&softc->ipf_nat);
 
 	hv = NAT_HASH_FN(dst, 0, 0xffffffff);
 	hv = NAT_HASH_FN(src.s_addr, hv, softn->ipf_nat_table_sz);
 	WRITE_ENTER(&softc->ipf_nat);
 
 	nat = softn->ipf_nat_table[1][hv];
 	/* TRACE dst, src, hv, nat */
 	for (; nat; nat = nat->nat_hnext[1]) {
 		if (nat->nat_ifps[0] != NULL) {
 			if ((ifp != NULL) && (ifp != nat->nat_ifps[0]))
 				continue;
 		}
 
 		if (nat->nat_pr[0] != fin->fin_p)
 			continue;
 
 		switch (nat->nat_dir & (NAT_INBOUND|NAT_OUTBOUND))
 		{
 		case NAT_INBOUND :
 			if (nat->nat_v[0] != 4)
 				continue;
 			if (nat->nat_osrcaddr != src.s_addr ||
 			    nat->nat_odstaddr != dst)
 				continue;
 			break;
 		case NAT_OUTBOUND :
 			if (nat->nat_v[1] != 4)
 				continue;
 			if (nat->nat_ndstaddr != src.s_addr ||
 			    nat->nat_nsrcaddr != dst)
 				continue;
 			break;
 		}
 
 		nflags = nat->nat_flags;
 		if (!(nflags & (NAT_TCPUDP|SI_WILDP)))
 			continue;
 
 		if (ipf_nat_wildok(nat, (int)sport, (int)dport, nflags,
 				   NAT_INBOUND) == 1) {
 			if ((fin->fin_flx & FI_IGNORE) != 0)
 				break;
 			if ((nflags & SI_CLONE) != 0) {
 				nat = ipf_nat_clone(fin, nat);
 				if (nat == NULL)
 					break;
 			} else {
 				MUTEX_ENTER(&softn->ipf_nat_new);
 				softn->ipf_nat_stats.ns_wilds--;
 				MUTEX_EXIT(&softn->ipf_nat_new);
 			}
 
 			if (nat->nat_dir == NAT_INBOUND) {
 				if (nat->nat_osport == 0) {
 					nat->nat_osport = sport;
 					nat->nat_nsport = sport;
 				}
 				if (nat->nat_odport == 0) {
 					nat->nat_odport = dport;
 					nat->nat_ndport = dport;
 				}
 			} else if (nat->nat_dir == NAT_OUTBOUND) {
 				if (nat->nat_osport == 0) {
 					nat->nat_osport = dport;
 					nat->nat_nsport = dport;
 				}
 				if (nat->nat_odport == 0) {
 					nat->nat_odport = sport;
 					nat->nat_ndport = sport;
 				}
 			}
 			if ((nat->nat_ifps[0] == NULL) && (ifp != NULL)) {
 				nat->nat_ifps[0] = ifp;
 				nat->nat_mtu[0] = GETIFMTU_4(ifp);
 			}
 			nat->nat_flags &= ~(SI_W_DPORT|SI_W_SPORT);
 			ipf_nat_tabmove(softn, nat);
 			break;
 		}
 	}
 
 	MUTEX_DOWNGRADE(&softc->ipf_nat);
 
 	if (nat == NULL) {
 		NBUMPSIDE(0, ns_lookup_miss);
 	}
 	return nat;
 }
 
 
 /* ------------------------------------------------------------------------ */
 /* Function:    ipf_nat_tabmove                                             */
 /* Returns:     Nil                                                         */
 /* Parameters:  softn(I) - pointer to NAT context structure                 */
 /*              nat(I)   - pointer to NAT structure                         */
 /* Write Lock:  ipf_nat                                                     */
 /*                                                                          */
 /* This function is only called for TCP/UDP NAT table entries where the     */
 /* original was placed in the table without hashing on the ports and we now */
 /* want to include hashing on port numbers.                                 */
 /* ------------------------------------------------------------------------ */
 static void
 ipf_nat_tabmove(softn, nat)
 	ipf_nat_softc_t *softn;
 	nat_t *nat;
 {
 	u_int hv0, hv1, rhv0, rhv1;
 	natstat_t *nsp;
 	nat_t **natp;
 
 	if (nat->nat_flags & SI_CLONE)
 		return;
 
 	nsp = &softn->ipf_nat_stats;
 	/*
 	 * Remove the NAT entry from the old location
 	 */
 	if (nat->nat_hnext[0])
 		nat->nat_hnext[0]->nat_phnext[0] = nat->nat_phnext[0];
 	*nat->nat_phnext[0] = nat->nat_hnext[0];
 	nsp->ns_side[0].ns_bucketlen[nat->nat_hv[0] %
 				     softn->ipf_nat_table_sz]--;
 
 	if (nat->nat_hnext[1])
 		nat->nat_hnext[1]->nat_phnext[1] = nat->nat_phnext[1];
 	*nat->nat_phnext[1] = nat->nat_hnext[1];
 	nsp->ns_side[1].ns_bucketlen[nat->nat_hv[1] %
 				     softn->ipf_nat_table_sz]--;
 
 	/*
 	 * Add into the NAT table in the new position
 	 */
 	rhv0 = NAT_HASH_FN(nat->nat_osrcaddr, nat->nat_osport, 0xffffffff);
 	rhv0 = NAT_HASH_FN(nat->nat_odstaddr, rhv0 + nat->nat_odport,
 			   0xffffffff);
 	rhv1 = NAT_HASH_FN(nat->nat_nsrcaddr, nat->nat_nsport, 0xffffffff);
 	rhv1 = NAT_HASH_FN(nat->nat_ndstaddr, rhv1 + nat->nat_ndport,
 			   0xffffffff);
 
 	hv0 = rhv0 % softn->ipf_nat_table_sz;
 	hv1 = rhv1 % softn->ipf_nat_table_sz;
 
 	if (nat->nat_dir == NAT_INBOUND || nat->nat_dir == NAT_DIVERTIN) {
 		u_int swap;
 
 		swap = hv0;
 		hv0 = hv1;
 		hv1 = swap;
 	}
 
 	/* TRACE nat_osrcaddr, nat_osport, nat_odstaddr, nat_odport, hv0 */
 	/* TRACE nat_nsrcaddr, nat_nsport, nat_ndstaddr, nat_ndport, hv1 */
 
 	nat->nat_hv[0] = rhv0;
 	natp = &softn->ipf_nat_table[0][hv0];
 	if (*natp)
 		(*natp)->nat_phnext[0] = &nat->nat_hnext[0];
 	nat->nat_phnext[0] = natp;
 	nat->nat_hnext[0] = *natp;
 	*natp = nat;
 	nsp->ns_side[0].ns_bucketlen[hv0]++;
 
 	nat->nat_hv[1] = rhv1;
 	natp = &softn->ipf_nat_table[1][hv1];
 	if (*natp)
 		(*natp)->nat_phnext[1] = &nat->nat_hnext[1];
 	nat->nat_phnext[1] = natp;
 	nat->nat_hnext[1] = *natp;
 	*natp = nat;
 	nsp->ns_side[1].ns_bucketlen[hv1]++;
 }
 
 
 /* ------------------------------------------------------------------------ */
 /* Function:    ipf_nat_outlookup                                           */
 /* Returns:     nat_t* - NULL == no match,                                  */
 /*                       else pointer to matching NAT entry                 */
 /* Parameters:  fin(I)   - pointer to packet information                    */
 /*              flags(I) - NAT flags for this packet                        */
 /*              p(I)     - protocol for this packet                         */
 /*              src(I)   - source IP address                                */
 /*              dst(I)   - destination IP address                           */
 /*              rw(I)    - 1 == write lock on  held, 0 == read lock.        */
 /*                                                                          */
 /* Lookup a nat entry based on the source 'real' ip address/port and        */
 /* destination address/port.  We use this lookup when sending a packet out, */
 /* we're looking for a table entry, based on the source address.            */
 /*                                                                          */
 /* NOTE: THE PACKET BEING CHECKED (IF FOUND) HAS A MAPPING ALREADY.         */
 /*                                                                          */
 /* NOTE: IT IS ASSUMED THAT  IS ONLY HELD WITH A READ LOCK WHEN             */
 /*       THIS FUNCTION IS CALLED WITH NAT_SEARCH SET IN nflags.             */
 /*                                                                          */
 /* flags   -> relevant are IPN_UDP/IPN_TCP/IPN_ICMPQUERY that indicate if   */
 /*            the packet is of said protocol                                */
 /* ------------------------------------------------------------------------ */
 nat_t *
 ipf_nat_outlookup(fin, flags, p, src, dst)
 	fr_info_t *fin;
 	u_int flags, p;
 	struct in_addr src , dst;
 {
 	ipf_main_softc_t *softc = fin->fin_main_soft;
 	ipf_nat_softc_t *softn = softc->ipf_nat_soft;
 	u_short sport, dport;
 	u_int sflags;
 	ipnat_t *ipn;
 	nat_t *nat;
 	void *ifp;
 	u_int hv;
 
 	ifp = fin->fin_ifp;
 	sflags = flags & IPN_TCPUDPICMP;
 	sport = 0;
 	dport = 0;
 
 	switch (p)
 	{
 	case IPPROTO_TCP :
 	case IPPROTO_UDP :
 		sport = htons(fin->fin_data[0]);
 		dport = htons(fin->fin_data[1]);
 		break;
 	case IPPROTO_ICMP :
 		if (flags & IPN_ICMPERR)
 			sport = fin->fin_data[1];
 		else
 			dport = fin->fin_data[1];
 		break;
 	default :
 		break;
 	}
 
 	if ((flags & SI_WILDP) != 0)
 		goto find_out_wild_ports;
 
 	hv = NAT_HASH_FN(src.s_addr, sport, 0xffffffff);
 	hv = NAT_HASH_FN(dst.s_addr, hv + dport, softn->ipf_nat_table_sz);
 	nat = softn->ipf_nat_table[0][hv];
 
 	/* TRACE src, sport, dst, dport, hv, nat */
 
 	for (; nat; nat = nat->nat_hnext[0]) {
 		if (nat->nat_ifps[1] != NULL) {
 			if ((ifp != NULL) && (ifp != nat->nat_ifps[1]))
 				continue;
 		}
 
 		if (nat->nat_pr[1] != p)
 			continue;
 
 		switch (nat->nat_dir)
 		{
 		case NAT_INBOUND :
 		case NAT_DIVERTIN :
 			if (nat->nat_v[1] != 4)
 				continue;
 			if (nat->nat_ndstaddr != src.s_addr ||
 			    nat->nat_nsrcaddr != dst.s_addr)
 				continue;
 
 			if ((nat->nat_flags & IPN_TCPUDP) != 0) {
 				if (nat->nat_ndport != sport)
 					continue;
 				if (nat->nat_nsport != dport)
 					continue;
 
 			} else if (p == IPPROTO_ICMP) {
 				if (nat->nat_osport != dport) {
 					continue;
 				}
 			}
 			break;
 		case NAT_OUTBOUND :
 		case NAT_DIVERTOUT :
 			if (nat->nat_v[0] != 4)
 				continue;
 			if (nat->nat_osrcaddr != src.s_addr ||
 			    nat->nat_odstaddr != dst.s_addr)
 				continue;
 
 			if ((nat->nat_flags & IPN_TCPUDP) != 0) {
 				if (nat->nat_odport != dport)
 					continue;
 				if (nat->nat_osport != sport)
 					continue;
 
 			} else if (p == IPPROTO_ICMP) {
 				if (nat->nat_osport != dport) {
 					continue;
 				}
 			}
 			break;
 		}
 
 		ipn = nat->nat_ptr;
 		if ((ipn != NULL) && (nat->nat_aps != NULL))
 			if (ipf_proxy_match(fin, nat) != 0)
 				continue;
 
 		if ((nat->nat_ifps[1] == NULL) && (ifp != NULL)) {
 			nat->nat_ifps[1] = ifp;
 			nat->nat_mtu[1] = GETIFMTU_4(ifp);
 		}
 		return nat;
 	}
 
 	/*
 	 * So if we didn't find it but there are wildcard members in the hash
 	 * table, go back and look for them.  We do this search and update here
 	 * because it is modifying the NAT table and we want to do this only
 	 * for the first packet that matches.  The exception, of course, is
 	 * for "dummy" (FI_IGNORE) lookups.
 	 */
 find_out_wild_ports:
 	if (!(flags & NAT_TCPUDP) || !(flags & NAT_SEARCH)) {
 		NBUMPSIDEX(1, ns_lookup_miss, ns_lookup_miss_1);
 		return NULL;
 	}
 	if (softn->ipf_nat_stats.ns_wilds == 0 || (fin->fin_flx & FI_NOWILD)) {
 		NBUMPSIDEX(1, ns_lookup_nowild, ns_lookup_nowild_1);
 		return NULL;
 	}
 
 	RWLOCK_EXIT(&softc->ipf_nat);
 
 	hv = NAT_HASH_FN(src.s_addr, 0, 0xffffffff);
 	hv = NAT_HASH_FN(dst.s_addr, hv, softn->ipf_nat_table_sz);
 
 	WRITE_ENTER(&softc->ipf_nat);
 
 	nat = softn->ipf_nat_table[0][hv];
 	for (; nat; nat = nat->nat_hnext[0]) {
 		if (nat->nat_ifps[1] != NULL) {
 			if ((ifp != NULL) && (ifp != nat->nat_ifps[1]))
 				continue;
 		}
 
 		if (nat->nat_pr[1] != fin->fin_p)
 			continue;
 
 		switch (nat->nat_dir & (NAT_INBOUND|NAT_OUTBOUND))
 		{
 		case NAT_INBOUND :
 			if (nat->nat_v[1] != 4)
 				continue;
 			if (nat->nat_ndstaddr != src.s_addr ||
 			    nat->nat_nsrcaddr != dst.s_addr)
 				continue;
 			break;
 		case NAT_OUTBOUND :
 			if (nat->nat_v[0] != 4)
 				continue;
 			if (nat->nat_osrcaddr != src.s_addr ||
 			    nat->nat_odstaddr != dst.s_addr)
 				continue;
 			break;
 		}
 
 		if (!(nat->nat_flags & (NAT_TCPUDP|SI_WILDP)))
 			continue;
 
 		if (ipf_nat_wildok(nat, (int)sport, (int)dport, nat->nat_flags,
 				   NAT_OUTBOUND) == 1) {
 			if ((fin->fin_flx & FI_IGNORE) != 0)
 				break;
 			if ((nat->nat_flags & SI_CLONE) != 0) {
 				nat = ipf_nat_clone(fin, nat);
 				if (nat == NULL)
 					break;
 			} else {
 				MUTEX_ENTER(&softn->ipf_nat_new);
 				softn->ipf_nat_stats.ns_wilds--;
 				MUTEX_EXIT(&softn->ipf_nat_new);
 			}
 
 			if (nat->nat_dir == NAT_OUTBOUND) {
 				if (nat->nat_osport == 0) {
 					nat->nat_osport = sport;
 					nat->nat_nsport = sport;
 				}
 				if (nat->nat_odport == 0) {
 					nat->nat_odport = dport;
 					nat->nat_ndport = dport;
 				}
 			} else if (nat->nat_dir == NAT_INBOUND) {
 				if (nat->nat_osport == 0) {
 					nat->nat_osport = dport;
 					nat->nat_nsport = dport;
 				}
 				if (nat->nat_odport == 0) {
 					nat->nat_odport = sport;
 					nat->nat_ndport = sport;
 				}
 			}
 			if ((nat->nat_ifps[1] == NULL) && (ifp != NULL)) {
 				nat->nat_ifps[1] = ifp;
 				nat->nat_mtu[1] = GETIFMTU_4(ifp);
 			}
 			nat->nat_flags &= ~(SI_W_DPORT|SI_W_SPORT);
 			ipf_nat_tabmove(softn, nat);
 			break;
 		}
 	}
 
 	MUTEX_DOWNGRADE(&softc->ipf_nat);
 
 	if (nat == NULL) {
 		NBUMPSIDE(1, ns_lookup_miss);
 	}
 	return nat;
 }
 
 
 /* ------------------------------------------------------------------------ */
 /* Function:    ipf_nat_lookupredir                                         */
 /* Returns:     nat_t* - NULL == no match,                                  */
 /*                       else pointer to matching NAT entry                 */
 /* Parameters:  np(I) - pointer to description of packet to find NAT table  */
 /*                      entry for.                                          */
 /*                                                                          */
 /* Lookup the NAT tables to search for a matching redirect                  */
 /* The contents of natlookup_t should imitate those found in a packet that  */
 /* would be translated - ie a packet coming in for RDR or going out for MAP.*/
 /* We can do the lookup in one of two ways, imitating an inbound or         */
 /* outbound  packet.  By default we assume outbound, unless IPN_IN is set.  */
 /* For IN, the fields are set as follows:                                   */
 /*     nl_real* = source information                                        */
 /*     nl_out* = destination information (translated)                       */
 /* For an out packet, the fields are set like this:                         */
 /*     nl_in* = source information (untranslated)                           */
 /*     nl_out* = destination information (translated)                       */
 /* ------------------------------------------------------------------------ */
 nat_t *
 ipf_nat_lookupredir(np)
 	natlookup_t *np;
 {
 	fr_info_t fi;
 	nat_t *nat;
 
 	bzero((char *)&fi, sizeof(fi));
 	if (np->nl_flags & IPN_IN) {
 		fi.fin_data[0] = ntohs(np->nl_realport);
 		fi.fin_data[1] = ntohs(np->nl_outport);
 	} else {
 		fi.fin_data[0] = ntohs(np->nl_inport);
 		fi.fin_data[1] = ntohs(np->nl_outport);
 	}
 	if (np->nl_flags & IPN_TCP)
 		fi.fin_p = IPPROTO_TCP;
 	else if (np->nl_flags & IPN_UDP)
 		fi.fin_p = IPPROTO_UDP;
 	else if (np->nl_flags & (IPN_ICMPERR|IPN_ICMPQUERY))
 		fi.fin_p = IPPROTO_ICMP;
 
 	/*
 	 * We can do two sorts of lookups:
 	 * - IPN_IN: we have the `real' and `out' address, look for `in'.
 	 * - default: we have the `in' and `out' address, look for `real'.
 	 */
 	if (np->nl_flags & IPN_IN) {
 		if ((nat = ipf_nat_inlookup(&fi, np->nl_flags, fi.fin_p,
 					    np->nl_realip, np->nl_outip))) {
 			np->nl_inip = nat->nat_odstip;
 			np->nl_inport = nat->nat_odport;
 		}
 	} else {
 		/*
 		 * If nl_inip is non null, this is a lookup based on the real
 		 * ip address. Else, we use the fake.
 		 */
 		if ((nat = ipf_nat_outlookup(&fi, np->nl_flags, fi.fin_p,
 					 np->nl_inip, np->nl_outip))) {
 
 			if ((np->nl_flags & IPN_FINDFORWARD) != 0) {
 				fr_info_t fin;
 				bzero((char *)&fin, sizeof(fin));
 				fin.fin_p = nat->nat_pr[0];
 				fin.fin_data[0] = ntohs(nat->nat_ndport);
 				fin.fin_data[1] = ntohs(nat->nat_nsport);
 				if (ipf_nat_inlookup(&fin, np->nl_flags,
 						     fin.fin_p, nat->nat_ndstip,
 						     nat->nat_nsrcip) != NULL) {
 					np->nl_flags &= ~IPN_FINDFORWARD;
 				}
 			}
 
 			np->nl_realip = nat->nat_ndstip;
 			np->nl_realport = nat->nat_ndport;
 		}
  	}
 
 	return nat;
 }
 
 
 /* ------------------------------------------------------------------------ */
 /* Function:    ipf_nat_match                                               */
 /* Returns:     int - 0 == no match, 1 == match                             */
 /* Parameters:  fin(I)   - pointer to packet information                    */
 /*              np(I)    - pointer to NAT rule                              */
 /*                                                                          */
 /* Pull the matching of a packet against a NAT rule out of that complex     */
 /* loop inside ipf_nat_checkin() and lay it out properly in its own function. */
 /* ------------------------------------------------------------------------ */
 static int
 ipf_nat_match(fin, np)
 	fr_info_t *fin;
 	ipnat_t *np;
 {
 	ipf_main_softc_t *softc = fin->fin_main_soft;
 	frtuc_t *ft;
 	int match;
 
 	match = 0;
 	switch (np->in_osrcatype)
 	{
 	case FRI_NORMAL :
 		match = ((fin->fin_saddr & np->in_osrcmsk) != np->in_osrcaddr);
 		break;
 	case FRI_LOOKUP :
 		match = (*np->in_osrcfunc)(softc, np->in_osrcptr,
 					   4, &fin->fin_saddr, fin->fin_plen);
 		break;
 	}
 	match ^= ((np->in_flags & IPN_NOTSRC) != 0);
 	if (match)
 		return 0;
 
 	match = 0;
 	switch (np->in_odstatype)
 	{
 	case FRI_NORMAL :
 		match = ((fin->fin_daddr & np->in_odstmsk) != np->in_odstaddr);
 		break;
 	case FRI_LOOKUP :
 		match = (*np->in_odstfunc)(softc, np->in_odstptr,
 					   4, &fin->fin_daddr, fin->fin_plen);
 		break;
 	}
 
 	match ^= ((np->in_flags & IPN_NOTDST) != 0);
 	if (match)
 		return 0;
 
 	ft = &np->in_tuc;
 	if (!(fin->fin_flx & FI_TCPUDP) ||
 	    (fin->fin_flx & (FI_SHORT|FI_FRAGBODY))) {
 		if (ft->ftu_scmp || ft->ftu_dcmp)
 			return 0;
 		return 1;
 	}
 
 	return ipf_tcpudpchk(&fin->fin_fi, ft);
 }
 
 
 /* ------------------------------------------------------------------------ */
 /* Function:    ipf_nat_update                                              */
 /* Returns:     Nil                                                         */
 /* Parameters:  fin(I) - pointer to packet information                      */
 /*              nat(I) - pointer to NAT structure                           */
 /*                                                                          */
 /* Updates the lifetime of a NAT table entry for non-TCP packets.  Must be  */
 /* called with fin_rev updated - i.e. after calling ipf_nat_proto().        */
 /*                                                                          */
 /* This *MUST* be called after ipf_nat_proto() as it expects fin_rev to     */
 /* already be set.                                                          */
 /* ------------------------------------------------------------------------ */
 void
 ipf_nat_update(fin, nat)
 	fr_info_t *fin;
 	nat_t *nat;
 {
 	ipf_main_softc_t *softc = fin->fin_main_soft;
 	ipf_nat_softc_t *softn = softc->ipf_nat_soft;
 	ipftq_t *ifq, *ifq2;
 	ipftqent_t *tqe;
 	ipnat_t *np = nat->nat_ptr;
 
 	tqe = &nat->nat_tqe;
 	ifq = tqe->tqe_ifq;
 
 	/*
 	 * We allow over-riding of NAT timeouts from NAT rules, even for
 	 * TCP, however, if it is TCP and there is no rule timeout set,
 	 * then do not update the timeout here.
 	 */
 	if (np != NULL) {
 		np->in_bytes[fin->fin_rev] += fin->fin_plen;
 		ifq2 = np->in_tqehead[fin->fin_rev];
 	} else {
 		ifq2 = NULL;
 	}
 
 	if (nat->nat_pr[0] == IPPROTO_TCP && ifq2 == NULL) {
 		(void) ipf_tcp_age(&nat->nat_tqe, fin, softn->ipf_nat_tcptq,
 				   0, 2);
 	} else {
 		if (ifq2 == NULL) {
 			if (nat->nat_pr[0] == IPPROTO_UDP)
 				ifq2 = fin->fin_rev ? &softn->ipf_nat_udpacktq :
 						      &softn->ipf_nat_udptq;
 			else if (nat->nat_pr[0] == IPPROTO_ICMP ||
 				 nat->nat_pr[0] == IPPROTO_ICMPV6)
 				ifq2 = fin->fin_rev ? &softn->ipf_nat_icmpacktq:
 						      &softn->ipf_nat_icmptq;
 			else
 				ifq2 = &softn->ipf_nat_iptq;
 		}
 
 		ipf_movequeue(softc->ipf_ticks, tqe, ifq, ifq2);
 	}
 }
 
 
 /* ------------------------------------------------------------------------ */
 /* Function:    ipf_nat_checkout                                            */
 /* Returns:     int - -1 == packet failed NAT checks so block it,           */
 /*                     0 == no packet translation occurred,                 */
 /*                     1 == packet was successfully translated.             */
 /* Parameters:  fin(I)   - pointer to packet information                    */
 /*              passp(I) - pointer to filtering result flags                */
 /*                                                                          */
 /* Check to see if an outcoming packet should be changed.  ICMP packets are */
 /* first checked to see if they match an existing entry (if an error),      */
 /* otherwise a search of the current NAT table is made.  If neither results */
 /* in a match then a search for a matching NAT rule is made.  Create a new  */
 /* NAT entry if a we matched a NAT rule.  Lastly, actually change the       */
 /* packet header(s) as required.                                            */
 /* ------------------------------------------------------------------------ */
 int
 ipf_nat_checkout(fin, passp)
 	fr_info_t *fin;
 	u_32_t *passp;
 {
 	ipnat_t *np = NULL, *npnext;
 	struct ifnet *ifp, *sifp;
 	ipf_main_softc_t *softc;
 	ipf_nat_softc_t *softn;
 	icmphdr_t *icmp = NULL;
 	tcphdr_t *tcp = NULL;
 	int rval, natfailed;
 	u_int nflags = 0;
 	u_32_t ipa, iph;
 	int natadd = 1;
 	frentry_t *fr;
 	nat_t *nat;
 
 	if (fin->fin_v == 6) {
 #ifdef USE_INET6
 		return ipf_nat6_checkout(fin, passp);
 #else
 		return 0;
 #endif
 	}
 
 	softc = fin->fin_main_soft;
 	softn = softc->ipf_nat_soft;
 
 	if (softn->ipf_nat_lock != 0)
 		return 0;
 	if (softn->ipf_nat_stats.ns_rules == 0 &&
 	    softn->ipf_nat_instances == NULL)
 		return 0;
 
 	natfailed = 0;
 	fr = fin->fin_fr;
 	sifp = fin->fin_ifp;
 	if (fr != NULL) {
 		ifp = fr->fr_tifs[fin->fin_rev].fd_ptr;
 		if ((ifp != NULL) && (ifp != (void *)-1))
 			fin->fin_ifp = ifp;
 	}
 	ifp = fin->fin_ifp;
 
 	if (!(fin->fin_flx & FI_SHORT) && (fin->fin_off == 0)) {
 		switch (fin->fin_p)
 		{
 		case IPPROTO_TCP :
 			nflags = IPN_TCP;
 			break;
 		case IPPROTO_UDP :
 			nflags = IPN_UDP;
 			break;
 		case IPPROTO_ICMP :
 			icmp = fin->fin_dp;
 
 			/*
 			 * This is an incoming packet, so the destination is
 			 * the icmp_id and the source port equals 0
 			 */
 			if ((fin->fin_flx & FI_ICMPQUERY) != 0)
 				nflags = IPN_ICMPQUERY;
 			break;
 		default :
 			break;
 		}
 
 		if ((nflags & IPN_TCPUDP))
 			tcp = fin->fin_dp;
 	}
 
 	ipa = fin->fin_saddr;
 
 	READ_ENTER(&softc->ipf_nat);
 
 	if ((fin->fin_p == IPPROTO_ICMP) && !(nflags & IPN_ICMPQUERY) &&
 	    (nat = ipf_nat_icmperror(fin, &nflags, NAT_OUTBOUND)))
 		/*EMPTY*/;
 	else if ((fin->fin_flx & FI_FRAG) && (nat = ipf_frag_natknown(fin)))
 		natadd = 0;
 	else if ((nat = ipf_nat_outlookup(fin, nflags|NAT_SEARCH,
 				      (u_int)fin->fin_p, fin->fin_src,
 				      fin->fin_dst))) {
 		nflags = nat->nat_flags;
 	} else if (fin->fin_off == 0) {
 		u_32_t hv, msk, nmsk = 0;
 
 		/*
 		 * If there is no current entry in the nat table for this IP#,
 		 * create one for it (if there is a matching rule).
 		 */
 maskloop:
 		msk = softn->ipf_nat_map_active_masks[nmsk];
 		iph = ipa & msk;
 		hv = NAT_HASH_FN(iph, 0, softn->ipf_nat_maprules_sz);
 retry_roundrobin:
 		for (np = softn->ipf_nat_map_rules[hv]; np; np = npnext) {
 			npnext = np->in_mnext;
 			if ((np->in_ifps[1] && (np->in_ifps[1] != ifp)))
 				continue;
 			if (np->in_v[0] != 4)
 				continue;
 			if (np->in_pr[1] && (np->in_pr[1] != fin->fin_p))
 				continue;
 			if ((np->in_flags & IPN_RF) &&
 			    !(np->in_flags & nflags))
 				continue;
 			if (np->in_flags & IPN_FILTER) {
 				switch (ipf_nat_match(fin, np))
 				{
 				case 0 :
 					continue;
 				case -1 :
 					rval = -1;
 					goto outmatchfail;
 				case 1 :
 				default :
 					break;
 				}
 			} else if ((ipa & np->in_osrcmsk) != np->in_osrcaddr)
 				continue;
 
 			if ((fr != NULL) &&
 			    !ipf_matchtag(&np->in_tag, &fr->fr_nattag))
 				continue;
 
 			if (np->in_plabel != -1) {
 				if (((np->in_flags & IPN_FILTER) == 0) &&
 				    (np->in_odport != fin->fin_data[1]))
 					continue;
 				if (ipf_proxy_ok(fin, tcp, np) == 0)
 					continue;
 			}
 
 			if (np->in_flags & IPN_NO) {
 				np->in_hits++;
 				break;
 			}
 			MUTEX_ENTER(&softn->ipf_nat_new);
 			/*
 			 * If we've matched a round-robin rule but it has
 			 * moved in the list since we got it, start over as
 			 * this is now no longer correct.
 			 */
 			if (npnext != np->in_mnext) {
 				if ((np->in_flags & IPN_ROUNDR) != 0) {
 					MUTEX_EXIT(&softn->ipf_nat_new);
 					goto retry_roundrobin;
 				}
 				npnext = np->in_mnext;
 			}
 
 			nat = ipf_nat_add(fin, np, NULL, nflags, NAT_OUTBOUND);
 			MUTEX_EXIT(&softn->ipf_nat_new);
 			if (nat != NULL) {
 				natfailed = 0;
 				break;
 			}
 			natfailed = -1;
 		}
 		if ((np == NULL) && (nmsk < softn->ipf_nat_map_max)) {
 			nmsk++;
 			goto maskloop;
 		}
 	}
 
 	if (nat != NULL) {
 		rval = ipf_nat_out(fin, nat, natadd, nflags);
 		if (rval == 1) {
 			MUTEX_ENTER(&nat->nat_lock);
 			ipf_nat_update(fin, nat);
 			nat->nat_bytes[1] += fin->fin_plen;
 			nat->nat_pkts[1]++;
 			fin->fin_pktnum = nat->nat_pkts[1];
 			MUTEX_EXIT(&nat->nat_lock);
 		}
 	} else
 		rval = natfailed;
 outmatchfail:
 	RWLOCK_EXIT(&softc->ipf_nat);
 
 	switch (rval)
 	{
 	case -1 :
 		if (passp != NULL) {
 			DT1(frb_natv4out, fr_info_t *, fin);
 			NBUMPSIDED(1, ns_drop);
 			*passp = FR_BLOCK;
 			fin->fin_reason = FRB_NATV4;
 		}
 		fin->fin_flx |= FI_BADNAT;
 		NBUMPSIDED(1, ns_badnat);
 		break;
 	case 0 :
 		NBUMPSIDE(1, ns_ignored);
 		break;
 	case 1 :
 		NBUMPSIDE(1, ns_translated);
 		break;
 	}
 	fin->fin_ifp = sifp;
 	return rval;
 }
 
 /* ------------------------------------------------------------------------ */
 /* Function:    ipf_nat_out                                                 */
 /* Returns:     int - -1 == packet failed NAT checks so block it,           */
 /*                     1 == packet was successfully translated.             */
 /* Parameters:  fin(I)    - pointer to packet information                   */
 /*              nat(I)    - pointer to NAT structure                        */
 /*              natadd(I) - flag indicating if it is safe to add frag cache */
 /*              nflags(I) - NAT flags set for this packet                   */
 /*                                                                          */
 /* Translate a packet coming "out" on an interface.                         */
 /* ------------------------------------------------------------------------ */
 int
 ipf_nat_out(fin, nat, natadd, nflags)
 	fr_info_t *fin;
 	nat_t *nat;
 	int natadd;
 	u_32_t nflags;
 {
 	ipf_main_softc_t *softc = fin->fin_main_soft;
 	ipf_nat_softc_t *softn = softc->ipf_nat_soft;
 	icmphdr_t *icmp;
 	tcphdr_t *tcp;
 	ipnat_t *np;
 	int skip;
 	int i;
 
 	tcp = NULL;
 	icmp = NULL;
 	np = nat->nat_ptr;
 
 	if ((natadd != 0) && (fin->fin_flx & FI_FRAG) && (np != NULL))
 		(void) ipf_frag_natnew(softc, fin, 0, nat);
 
 	/*
 	 * Fix up checksums, not by recalculating them, but
 	 * simply computing adjustments.
 	 * This is only done for STREAMS based IP implementations where the
 	 * checksum has already been calculated by IP.  In all other cases,
 	 * IPFilter is called before the checksum needs calculating so there
 	 * is no call to modify whatever is in the header now.
 	 */
 	if (nflags == IPN_ICMPERR) {
 		u_32_t s1, s2, sumd, msumd;
 
 		s1 = LONG_SUM(ntohl(fin->fin_saddr));
 		if (nat->nat_dir == NAT_OUTBOUND) {
 			s2 = LONG_SUM(ntohl(nat->nat_nsrcaddr));
 		} else {
 			s2 = LONG_SUM(ntohl(nat->nat_odstaddr));
 		}
 		CALC_SUMD(s1, s2, sumd);
 		msumd = sumd;
 
 		s1 = LONG_SUM(ntohl(fin->fin_daddr));
 		if (nat->nat_dir == NAT_OUTBOUND) {
 			s2 = LONG_SUM(ntohl(nat->nat_ndstaddr));
 		} else {
 			s2 = LONG_SUM(ntohl(nat->nat_osrcaddr));
 		}
 		CALC_SUMD(s1, s2, sumd);
 		msumd += sumd;
 
 		ipf_fix_outcksum(0, &fin->fin_ip->ip_sum, msumd, 0);
 	}
 #if !defined(_KERNEL) || defined(MENTAT) || defined(__sgi) || \
     defined(linux) || defined(BRIDGE_IPF)
 	else {
 		/*
 		 * Strictly speaking, this isn't necessary on BSD
 		 * kernels because they do checksum calculation after
 		 * this code has run BUT if ipfilter is being used
 		 * to do NAT as a bridge, that code doesn't exist.
 		 */
 		switch (nat->nat_dir)
 		{
 		case NAT_OUTBOUND :
 			ipf_fix_outcksum(fin->fin_cksum & FI_CK_L4PART,
 					 &fin->fin_ip->ip_sum,
 					 nat->nat_ipsumd, 0);
 			break;
 
 		case NAT_INBOUND :
 			ipf_fix_incksum(fin->fin_cksum & FI_CK_L4PART,
 					&fin->fin_ip->ip_sum,
 					nat->nat_ipsumd, 0);
 			break;
 
 		default :
 			break;
 		}
 	}
 #endif
 
 	/*
 	 * Address assignment is after the checksum modification because
 	 * we are using the address in the packet for determining the
 	 * correct checksum offset (the ICMP error could be coming from
 	 * anyone...)
 	 */
 	switch (nat->nat_dir)
 	{
 	case NAT_OUTBOUND :
 		fin->fin_ip->ip_src = nat->nat_nsrcip;
 		fin->fin_saddr = nat->nat_nsrcaddr;
 		fin->fin_ip->ip_dst = nat->nat_ndstip;
 		fin->fin_daddr = nat->nat_ndstaddr;
 		break;
 
 	case NAT_INBOUND :
 		fin->fin_ip->ip_src = nat->nat_odstip;
 		fin->fin_saddr = nat->nat_ndstaddr;
 		fin->fin_ip->ip_dst = nat->nat_osrcip;
 		fin->fin_daddr = nat->nat_nsrcaddr;
 		break;
 
 	case NAT_DIVERTIN :
 	    {
 		mb_t *m;
 
 		skip = ipf_nat_decap(fin, nat);
 		if (skip <= 0) {
 			NBUMPSIDED(1, ns_decap_fail);
 			return -1;
 		}
 
 		m = fin->fin_m;
 
 #if defined(MENTAT) && defined(_KERNEL)
 		m->b_rptr += skip;
 #else
 		m->m_data += skip;
 		m->m_len -= skip;
 
 # ifdef M_PKTHDR
 		if (m->m_flags & M_PKTHDR)
 			m->m_pkthdr.len -= skip;
 # endif
 #endif
 
 		MUTEX_ENTER(&nat->nat_lock);
 		ipf_nat_update(fin, nat);
 		MUTEX_EXIT(&nat->nat_lock);
 		fin->fin_flx |= FI_NATED;
 		if (np != NULL && np->in_tag.ipt_num[0] != 0)
 			fin->fin_nattag = &np->in_tag;
 		return 1;
 		/* NOTREACHED */
 	    }
 
 	case NAT_DIVERTOUT :
 	    {
 		u_32_t s1, s2, sumd;
 		udphdr_t *uh;
 		ip_t *ip;
 		mb_t *m;
 
 		m = M_DUP(np->in_divmp);
 		if (m == NULL) {
 			NBUMPSIDED(1, ns_divert_dup);
 			return -1;
 		}
 
 		ip = MTOD(m, ip_t *);
-		ip->ip_id = htons(ipf_nextipid(fin));
+		ip_fillid(ip);
 		s2 = ntohs(ip->ip_id);
 
 		s1 = ip->ip_len;
 		ip->ip_len = ntohs(ip->ip_len);
 		ip->ip_len += fin->fin_plen;
 		ip->ip_len = htons(ip->ip_len);
 		s2 += ntohs(ip->ip_len);
 		CALC_SUMD(s1, s2, sumd);
 
 		uh = (udphdr_t *)(ip + 1);
 		uh->uh_ulen += fin->fin_plen;
 		uh->uh_ulen = htons(uh->uh_ulen);
 #if !defined(_KERNEL) || defined(MENTAT) || defined(__sgi) || \
     defined(linux) || defined(BRIDGE_IPF)
 		ipf_fix_outcksum(0, &ip->ip_sum, sumd, 0);
 #endif
 
 		PREP_MB_T(fin, m);
 
 		fin->fin_src = ip->ip_src;
 		fin->fin_dst = ip->ip_dst;
 		fin->fin_ip = ip;
 		fin->fin_plen += sizeof(ip_t) + 8;	/* UDP + IPv4 hdr */
 		fin->fin_dlen += sizeof(ip_t) + 8;	/* UDP + IPv4 hdr */
 
 		nflags &= ~IPN_TCPUDPICMP;
 
 		break;
 	    }
 
 	default :
 		break;
 	}
 
 	if (!(fin->fin_flx & FI_SHORT) && (fin->fin_off == 0)) {
 		u_short *csump;
 
 		if ((nat->nat_nsport != 0) && (nflags & IPN_TCPUDP)) {
 			tcp = fin->fin_dp;
 
 			switch (nat->nat_dir)
 			{
 			case NAT_OUTBOUND :
 				tcp->th_sport = nat->nat_nsport;
 				fin->fin_data[0] = ntohs(nat->nat_nsport);
 				tcp->th_dport = nat->nat_ndport;
 				fin->fin_data[1] = ntohs(nat->nat_ndport);
 				break;
 
 			case NAT_INBOUND :
 				tcp->th_sport = nat->nat_odport;
 				fin->fin_data[0] = ntohs(nat->nat_odport);
 				tcp->th_dport = nat->nat_osport;
 				fin->fin_data[1] = ntohs(nat->nat_osport);
 				break;
 			}
 		}
 
 		if ((nat->nat_nsport != 0) && (nflags & IPN_ICMPQUERY)) {
 			icmp = fin->fin_dp;
 			icmp->icmp_id = nat->nat_nicmpid;
 		}
 
 		csump = ipf_nat_proto(fin, nat, nflags);
 
 		/*
 		 * The above comments do not hold for layer 4 (or higher)
 		 * checksums...
 		 */
 		if (csump != NULL) {
 			if (nat->nat_dir == NAT_OUTBOUND)
 				ipf_fix_outcksum(fin->fin_cksum, csump,
 						 nat->nat_sumd[0],
 						 nat->nat_sumd[1] +
 						 fin->fin_dlen);
 			else
 				ipf_fix_incksum(fin->fin_cksum, csump,
 						nat->nat_sumd[0],
 						nat->nat_sumd[1] +
 						fin->fin_dlen);
 		}
 	}
 
 	ipf_sync_update(softc, SMC_NAT, fin, nat->nat_sync);
 	/* ------------------------------------------------------------- */
 	/* A few quick notes:                                            */
 	/*      Following are test conditions prior to calling the       */
 	/*      ipf_proxy_check routine.                                 */
 	/*                                                               */
 	/*      A NULL tcp indicates a non TCP/UDP packet.  When dealing */
 	/*      with a redirect rule, we attempt to match the packet's   */
 	/*      source port against in_dport, otherwise we'd compare the */
 	/*      packet's destination.                                    */
 	/* ------------------------------------------------------------- */
 	if ((np != NULL) && (np->in_apr != NULL)) {
 		i = ipf_proxy_check(fin, nat);
 		if (i == 0) {
 			i = 1;
 		} else if (i == -1) {
 			NBUMPSIDED(1, ns_ipf_proxy_fail);
 		}
 	} else {
 		i = 1;
 	}
 	fin->fin_flx |= FI_NATED;
 	return i;
 }
 
 
 /* ------------------------------------------------------------------------ */
 /* Function:    ipf_nat_checkin                                             */
 /* Returns:     int - -1 == packet failed NAT checks so block it,           */
 /*                     0 == no packet translation occurred,                 */
 /*                     1 == packet was successfully translated.             */
 /* Parameters:  fin(I)   - pointer to packet information                    */
 /*              passp(I) - pointer to filtering result flags                */
 /*                                                                          */
 /* Check to see if an incoming packet should be changed.  ICMP packets are  */
 /* first checked to see if they match an existing entry (if an error),      */
 /* otherwise a search of the current NAT table is made.  If neither results */
 /* in a match then a search for a matching NAT rule is made.  Create a new  */
 /* NAT entry if a we matched a NAT rule.  Lastly, actually change the       */
 /* packet header(s) as required.                                            */
 /* ------------------------------------------------------------------------ */
 int
 ipf_nat_checkin(fin, passp)
 	fr_info_t *fin;
 	u_32_t *passp;
 {
 	ipf_main_softc_t *softc;
 	ipf_nat_softc_t *softn;
 	u_int nflags, natadd;
 	ipnat_t *np, *npnext;
 	int rval, natfailed;
 	struct ifnet *ifp;
 	struct in_addr in;
 	icmphdr_t *icmp;
 	tcphdr_t *tcp;
 	u_short dport;
 	nat_t *nat;
 	u_32_t iph;
 
 	softc = fin->fin_main_soft;
 	softn = softc->ipf_nat_soft;
 
 	if (softn->ipf_nat_lock != 0)
 		return 0;
 	if (softn->ipf_nat_stats.ns_rules == 0 &&
 	    softn->ipf_nat_instances == NULL)
 		return 0;
 
 	tcp = NULL;
 	icmp = NULL;
 	dport = 0;
 	natadd = 1;
 	nflags = 0;
 	natfailed = 0;
 	ifp = fin->fin_ifp;
 
 	if (!(fin->fin_flx & FI_SHORT) && (fin->fin_off == 0)) {
 		switch (fin->fin_p)
 		{
 		case IPPROTO_TCP :
 			nflags = IPN_TCP;
 			break;
 		case IPPROTO_UDP :
 			nflags = IPN_UDP;
 			break;
 		case IPPROTO_ICMP :
 			icmp = fin->fin_dp;
 
 			/*
 			 * This is an incoming packet, so the destination is
 			 * the icmp_id and the source port equals 0
 			 */
 			if ((fin->fin_flx & FI_ICMPQUERY) != 0) {
 				nflags = IPN_ICMPQUERY;
 				dport = icmp->icmp_id;
 			} break;
 		default :
 			break;
 		}
 
 		if ((nflags & IPN_TCPUDP)) {
 			tcp = fin->fin_dp;
 			dport = fin->fin_data[1];
 		}
 	}
 
 	in = fin->fin_dst;
 
 	READ_ENTER(&softc->ipf_nat);
 
 	if ((fin->fin_p == IPPROTO_ICMP) && !(nflags & IPN_ICMPQUERY) &&
 	    (nat = ipf_nat_icmperror(fin, &nflags, NAT_INBOUND)))
 		/*EMPTY*/;
 	else if ((fin->fin_flx & FI_FRAG) && (nat = ipf_frag_natknown(fin)))
 		natadd = 0;
 	else if ((nat = ipf_nat_inlookup(fin, nflags|NAT_SEARCH,
 					 (u_int)fin->fin_p,
 					 fin->fin_src, in))) {
 		nflags = nat->nat_flags;
 	} else if (fin->fin_off == 0) {
 		u_32_t hv, msk, rmsk = 0;
 
 		/*
 		 * If there is no current entry in the nat table for this IP#,
 		 * create one for it (if there is a matching rule).
 		 */
 maskloop:
 		msk = softn->ipf_nat_rdr_active_masks[rmsk];
 		iph = in.s_addr & msk;
 		hv = NAT_HASH_FN(iph, 0, softn->ipf_nat_rdrrules_sz);
 retry_roundrobin:
 		/* TRACE (iph,msk,rmsk,hv,softn->ipf_nat_rdrrules_sz) */
 		for (np = softn->ipf_nat_rdr_rules[hv]; np; np = npnext) {
 			npnext = np->in_rnext;
 			if (np->in_ifps[0] && (np->in_ifps[0] != ifp))
 				continue;
 			if (np->in_v[0] != 4)
 				continue;
 			if (np->in_pr[0] && (np->in_pr[0] != fin->fin_p))
 				continue;
 			if ((np->in_flags & IPN_RF) && !(np->in_flags & nflags))
 				continue;
 			if (np->in_flags & IPN_FILTER) {
 				switch (ipf_nat_match(fin, np))
 				{
 				case 0 :
 					continue;
 				case -1 :
 					rval = -1;
 					goto inmatchfail;
 				case 1 :
 				default :
 					break;
 				}
 			} else {
 				if ((in.s_addr & np->in_odstmsk) !=
 				    np->in_odstaddr)
 					continue;
 				if (np->in_odport &&
 				    ((np->in_dtop < dport) ||
 				     (dport < np->in_odport)))
 					continue;
 			}
 
 			if (np->in_plabel != -1) {
 				if (!ipf_proxy_ok(fin, tcp, np)) {
 					continue;
 				}
 			}
 
 			if (np->in_flags & IPN_NO) {
 				np->in_hits++;
 				break;
 			}
 
 			MUTEX_ENTER(&softn->ipf_nat_new);
 			/*
 			 * If we've matched a round-robin rule but it has
 			 * moved in the list since we got it, start over as
 			 * this is now no longer correct.
 			 */
 			if (npnext != np->in_rnext) {
 				if ((np->in_flags & IPN_ROUNDR) != 0) {
 					MUTEX_EXIT(&softn->ipf_nat_new);
 					goto retry_roundrobin;
 				}
 				npnext = np->in_rnext;
 			}
 
 			nat = ipf_nat_add(fin, np, NULL, nflags, NAT_INBOUND);
 			MUTEX_EXIT(&softn->ipf_nat_new);
 			if (nat != NULL) {
 				natfailed = 0;
 				break;
 			}
 			natfailed = -1;
 		}
 		if ((np == NULL) && (rmsk < softn->ipf_nat_rdr_max)) {
 			rmsk++;
 			goto maskloop;
 		}
 	}
 
 	if (nat != NULL) {
 		rval = ipf_nat_in(fin, nat, natadd, nflags);
 		if (rval == 1) {
 			MUTEX_ENTER(&nat->nat_lock);
 			ipf_nat_update(fin, nat);
 			nat->nat_bytes[0] += fin->fin_plen;
 			nat->nat_pkts[0]++;
 			fin->fin_pktnum = nat->nat_pkts[0];
 			MUTEX_EXIT(&nat->nat_lock);
 		}
 	} else
 		rval = natfailed;
 inmatchfail:
 	RWLOCK_EXIT(&softc->ipf_nat);
 
 	switch (rval)
 	{
 	case -1 :
 		if (passp != NULL) {
 			DT1(frb_natv4in, fr_info_t *, fin);
 			NBUMPSIDED(0, ns_drop);
 			*passp = FR_BLOCK;
 			fin->fin_reason = FRB_NATV4;
 		}
 		fin->fin_flx |= FI_BADNAT;
 		NBUMPSIDED(0, ns_badnat);
 		break;
 	case 0 :
 		NBUMPSIDE(0, ns_ignored);
 		break;
 	case 1 :
 		NBUMPSIDE(0, ns_translated);
 		break;
 	}
 	return rval;
 }
 
 
 /* ------------------------------------------------------------------------ */
 /* Function:    ipf_nat_in                                                  */
 /* Returns:     int - -1 == packet failed NAT checks so block it,           */
 /*                     1 == packet was successfully translated.             */
 /* Parameters:  fin(I)    - pointer to packet information                   */
 /*              nat(I)    - pointer to NAT structure                        */
 /*              natadd(I) - flag indicating if it is safe to add frag cache */
 /*              nflags(I) - NAT flags set for this packet                   */
 /* Locks Held:  ipf_nat(READ)                                               */
 /*                                                                          */
 /* Translate a packet coming "in" on an interface.                          */
 /* ------------------------------------------------------------------------ */
 int
 ipf_nat_in(fin, nat, natadd, nflags)
 	fr_info_t *fin;
 	nat_t *nat;
 	int natadd;
 	u_32_t nflags;
 {
 	ipf_main_softc_t *softc = fin->fin_main_soft;
 	ipf_nat_softc_t *softn = softc->ipf_nat_soft;
 	u_32_t sumd, ipsumd, sum1, sum2;
 	icmphdr_t *icmp;
 	tcphdr_t *tcp;
 	ipnat_t *np;
 	int skip;
 	int i;
 
 	tcp = NULL;
 	np = nat->nat_ptr;
 	fin->fin_fr = nat->nat_fr;
 
 	if (np != NULL) {
 		if ((natadd != 0) && (fin->fin_flx & FI_FRAG))
 			(void) ipf_frag_natnew(softc, fin, 0, nat);
 
 	/* ------------------------------------------------------------- */
 	/* A few quick notes:                                            */
 	/*      Following are test conditions prior to calling the       */
 	/*      ipf_proxy_check routine.                                 */
 	/*                                                               */
 	/*      A NULL tcp indicates a non TCP/UDP packet.  When dealing */
 	/*      with a map rule, we attempt to match the packet's        */
 	/*      source port against in_dport, otherwise we'd compare the */
 	/*      packet's destination.                                    */
 	/* ------------------------------------------------------------- */
 		if (np->in_apr != NULL) {
 			i = ipf_proxy_check(fin, nat);
 			if (i == -1) {
 				NBUMPSIDED(0, ns_ipf_proxy_fail);
 				return -1;
 			}
 		}
 	}
 
 	ipf_sync_update(softc, SMC_NAT, fin, nat->nat_sync);
 
 	ipsumd = nat->nat_ipsumd;
 	/*
 	 * Fix up checksums, not by recalculating them, but
 	 * simply computing adjustments.
 	 * Why only do this for some platforms on inbound packets ?
 	 * Because for those that it is done, IP processing is yet to happen
 	 * and so the IPv4 header checksum has not yet been evaluated.
 	 * Perhaps it should always be done for the benefit of things like
 	 * fast forwarding (so that it doesn't need to be recomputed) but with
 	 * header checksum offloading, perhaps it is a moot point.
 	 */
 
 	switch (nat->nat_dir)
 	{
 	case NAT_INBOUND :
 		if ((fin->fin_flx & FI_ICMPERR) == 0) {
 			fin->fin_ip->ip_src = nat->nat_nsrcip;
 			fin->fin_saddr = nat->nat_nsrcaddr;
 		} else {
 			sum1 = nat->nat_osrcaddr;
 			sum2 = nat->nat_nsrcaddr;
 			CALC_SUMD(sum1, sum2, sumd);
 			ipsumd -= sumd;
 		}
 		fin->fin_ip->ip_dst = nat->nat_ndstip;
 		fin->fin_daddr = nat->nat_ndstaddr;
 #if !defined(_KERNEL) || defined(MENTAT) || defined(__sgi) || \
      defined(__osf__) || defined(linux)
 		ipf_fix_outcksum(0, &fin->fin_ip->ip_sum, ipsumd, 0);
 #endif
 		break;
 
 	case NAT_OUTBOUND :
 		if ((fin->fin_flx & FI_ICMPERR) == 0) {
 			fin->fin_ip->ip_src = nat->nat_odstip;
 			fin->fin_saddr = nat->nat_odstaddr;
 		} else {
 			sum1 = nat->nat_odstaddr;
 			sum2 = nat->nat_ndstaddr;
 			CALC_SUMD(sum1, sum2, sumd);
 			ipsumd -= sumd;
 		}
 		fin->fin_ip->ip_dst = nat->nat_osrcip;
 		fin->fin_daddr = nat->nat_osrcaddr;
 #if !defined(_KERNEL) || defined(MENTAT) || defined(__sgi) || \
      defined(__osf__) || defined(linux)
 		ipf_fix_incksum(0, &fin->fin_ip->ip_sum, ipsumd, 0);
 #endif
 		break;
 
 	case NAT_DIVERTIN :
 	    {
 		udphdr_t *uh;
 		ip_t *ip;
 		mb_t *m;
 
 		m = M_DUP(np->in_divmp);
 		if (m == NULL) {
 			NBUMPSIDED(0, ns_divert_dup);
 			return -1;
 		}
 
 		ip = MTOD(m, ip_t *);
-		ip->ip_id = htons(ipf_nextipid(fin));
+		ip_fillid(ip);
 		sum1 = ntohs(ip->ip_len);
 		ip->ip_len = ntohs(ip->ip_len);
 		ip->ip_len += fin->fin_plen;
 		ip->ip_len = htons(ip->ip_len);
 
 		uh = (udphdr_t *)(ip + 1);
 		uh->uh_ulen += fin->fin_plen;
 		uh->uh_ulen = htons(uh->uh_ulen);
 
 		sum2 = ntohs(ip->ip_id) + ntohs(ip->ip_len);
 		sum2 += ntohs(ip->ip_off) & IP_DF;
 		CALC_SUMD(sum1, sum2, sumd);
 
 #if !defined(_KERNEL) || defined(MENTAT) || defined(__sgi) || \
      defined(__osf__) || defined(linux)
 		ipf_fix_outcksum(0, &ip->ip_sum, sumd, 0);
 #endif
 		PREP_MB_T(fin, m);
 
 		fin->fin_ip = ip;
 		fin->fin_plen += sizeof(ip_t) + 8;	/* UDP + new IPv4 hdr */
 		fin->fin_dlen += sizeof(ip_t) + 8;	/* UDP + old IPv4 hdr */
 
 		nflags &= ~IPN_TCPUDPICMP;
 
 		break;
 	    }
 
 	case NAT_DIVERTOUT :
 	    {
 		mb_t *m;
 
 		skip = ipf_nat_decap(fin, nat);
 		if (skip <= 0) {
 			NBUMPSIDED(0, ns_decap_fail);
 			return -1;
 		}
 
 		m = fin->fin_m;
 
 #if defined(MENTAT) && defined(_KERNEL)
 		m->b_rptr += skip;
 #else
 		m->m_data += skip;
 		m->m_len -= skip;
 
 # ifdef M_PKTHDR
 		if (m->m_flags & M_PKTHDR)
 			m->m_pkthdr.len -= skip;
 # endif
 #endif
 
 		ipf_nat_update(fin, nat);
 		nflags &= ~IPN_TCPUDPICMP;
 		fin->fin_flx |= FI_NATED;
 		if (np != NULL && np->in_tag.ipt_num[0] != 0)
 			fin->fin_nattag = &np->in_tag;
 		return 1;
 		/* NOTREACHED */
 	    }
 	}
 	if (nflags & IPN_TCPUDP)
 		tcp = fin->fin_dp;
 
 	if (!(fin->fin_flx & FI_SHORT) && (fin->fin_off == 0)) {
 		u_short *csump;
 
 		if ((nat->nat_odport != 0) && (nflags & IPN_TCPUDP)) {
 			switch (nat->nat_dir)
 			{
 			case NAT_INBOUND :
 				tcp->th_sport = nat->nat_nsport;
 				fin->fin_data[0] = ntohs(nat->nat_nsport);
 				tcp->th_dport = nat->nat_ndport;
 				fin->fin_data[1] = ntohs(nat->nat_ndport);
 				break;
 
 			case NAT_OUTBOUND :
 				tcp->th_sport = nat->nat_odport;
 				fin->fin_data[0] = ntohs(nat->nat_odport);
 				tcp->th_dport = nat->nat_osport;
 				fin->fin_data[1] = ntohs(nat->nat_osport);
 				break;
 			}
 		}
 
 
 		if ((nat->nat_odport != 0) && (nflags & IPN_ICMPQUERY)) {
 			icmp = fin->fin_dp;
 
 			icmp->icmp_id = nat->nat_nicmpid;
 		}
 
 		csump = ipf_nat_proto(fin, nat, nflags);
 
 		/*
 		 * The above comments do not hold for layer 4 (or higher)
 		 * checksums...
 		 */
 		if (csump != NULL) {
 			if (nat->nat_dir == NAT_OUTBOUND)
 				ipf_fix_incksum(0, csump, nat->nat_sumd[0], 0);
 			else
 				ipf_fix_outcksum(0, csump, nat->nat_sumd[0], 0);
 		}
 	}
 
 	fin->fin_flx |= FI_NATED;
 	if (np != NULL && np->in_tag.ipt_num[0] != 0)
 		fin->fin_nattag = &np->in_tag;
 	return 1;
 }
 
 
 /* ------------------------------------------------------------------------ */
 /* Function:    ipf_nat_proto                                               */
 /* Returns:     u_short* - pointer to transport header checksum to update,  */
 /*                         NULL if the transport protocol is not recognised */
 /*                         as needing a checksum update.                    */
 /* Parameters:  fin(I)    - pointer to packet information                   */
 /*              nat(I)    - pointer to NAT structure                        */
 /*              nflags(I) - NAT flags set for this packet                   */
 /*                                                                          */
 /* Return the pointer to the checksum field for each protocol so understood.*/
 /* If support for making other changes to a protocol header is required,    */
 /* that is not strictly 'address' translation, such as clamping the MSS in  */
 /* TCP down to a specific value, then do it from here.                      */
 /* ------------------------------------------------------------------------ */
 u_short *
 ipf_nat_proto(fin, nat, nflags)
 	fr_info_t *fin;
 	nat_t *nat;
 	u_int nflags;
 {
 	icmphdr_t *icmp;
 	u_short *csump;
 	tcphdr_t *tcp;
 	udphdr_t *udp;
 
 	csump = NULL;
 	if (fin->fin_out == 0) {
 		fin->fin_rev = (nat->nat_dir & NAT_OUTBOUND);
 	} else {
 		fin->fin_rev = ((nat->nat_dir & NAT_OUTBOUND) == 0);
 	}
 
 	switch (fin->fin_p)
 	{
 	case IPPROTO_TCP :
 		tcp = fin->fin_dp;
 
 		if ((nflags & IPN_TCP) != 0)
 			csump = &tcp->th_sum;
 
 		/*
 		 * Do a MSS CLAMPING on a SYN packet,
 		 * only deal IPv4 for now.
 		 */
 		if ((nat->nat_mssclamp != 0) && (tcp->th_flags & TH_SYN) != 0)
 			ipf_nat_mssclamp(tcp, nat->nat_mssclamp, fin, csump);
 
 		break;
 
 	case IPPROTO_UDP :
 		udp = fin->fin_dp;
 
 		if ((nflags & IPN_UDP) != 0) {
 			if (udp->uh_sum != 0)
 				csump = &udp->uh_sum;
 		}
 		break;
 
 	case IPPROTO_ICMP :
 		icmp = fin->fin_dp;
 
 		if ((nflags & IPN_ICMPQUERY) != 0) {
 			if (icmp->icmp_cksum != 0)
 				csump = &icmp->icmp_cksum;
 		}
 		break;
 
 #ifdef USE_INET6
 	case IPPROTO_ICMPV6 :
 	    {
 		struct icmp6_hdr *icmp6 = (struct icmp6_hdr *)fin->fin_dp;
 
 		icmp6 = fin->fin_dp;
 
 		if ((nflags & IPN_ICMPQUERY) != 0) {
 			if (icmp6->icmp6_cksum != 0)
 				csump = &icmp6->icmp6_cksum;
 		}
 		break;
 	    }
 #endif
 	}
 	return csump;
 }
 
 
 /* ------------------------------------------------------------------------ */
 /* Function:    ipf_nat_expire                                              */
 /* Returns:     Nil                                                         */
 /* Parameters:  softc(I) - pointer to soft context main structure           */
 /*                                                                          */
 /* Check all of the timeout queues for entries at the top which need to be  */
 /* expired.                                                                 */
 /* ------------------------------------------------------------------------ */
 void
 ipf_nat_expire(softc)
 	ipf_main_softc_t *softc;
 {
 	ipf_nat_softc_t *softn = softc->ipf_nat_soft;
 	ipftq_t *ifq, *ifqnext;
 	ipftqent_t *tqe, *tqn;
 	int i;
 	SPL_INT(s);
 
 	SPL_NET(s);
 	WRITE_ENTER(&softc->ipf_nat);
 	for (ifq = softn->ipf_nat_tcptq, i = 0; ifq != NULL;
 	     ifq = ifq->ifq_next) {
 		for (tqn = ifq->ifq_head; ((tqe = tqn) != NULL); i++) {
 			if (tqe->tqe_die > softc->ipf_ticks)
 				break;
 			tqn = tqe->tqe_next;
 			ipf_nat_delete(softc, tqe->tqe_parent, NL_EXPIRE);
 		}
 	}
 
 	for (ifq = softn->ipf_nat_utqe; ifq != NULL; ifq = ifq->ifq_next) {
 		for (tqn = ifq->ifq_head; ((tqe = tqn) != NULL); i++) {
 			if (tqe->tqe_die > softc->ipf_ticks)
 				break;
 			tqn = tqe->tqe_next;
 			ipf_nat_delete(softc, tqe->tqe_parent, NL_EXPIRE);
 		}
 	}
 
 	for (ifq = softn->ipf_nat_utqe; ifq != NULL; ifq = ifqnext) {
 		ifqnext = ifq->ifq_next;
 
 		if (((ifq->ifq_flags & IFQF_DELETE) != 0) &&
 		    (ifq->ifq_ref == 0)) {
 			ipf_freetimeoutqueue(softc, ifq);
 		}
 	}
 
 	if (softn->ipf_nat_doflush != 0) {
 		ipf_nat_extraflush(softc, softn, 2);
 		softn->ipf_nat_doflush = 0;
 	}
 
 	RWLOCK_EXIT(&softc->ipf_nat);
 	SPL_X(s);
 }
 
 
 /* ------------------------------------------------------------------------ */
 /* Function:    ipf_nat_sync                                                */
 /* Returns:     Nil                                                         */
 /* Parameters:  softc(I) - pointer to soft context main structure           */
 /*              ifp(I) - pointer to network interface                       */
 /*                                                                          */
 /* Walk through all of the currently active NAT sessions, looking for those */
 /* which need to have their translated address updated.                     */
 /* ------------------------------------------------------------------------ */
 void
 ipf_nat_sync(softc, ifp)
 	ipf_main_softc_t *softc;
 	void *ifp;
 {
 	ipf_nat_softc_t *softn = softc->ipf_nat_soft;
 	u_32_t sum1, sum2, sumd;
 	i6addr_t in;
 	ipnat_t *n;
 	nat_t *nat;
 	void *ifp2;
 	int idx;
 	SPL_INT(s);
 
 	if (softc->ipf_running <= 0)
 		return;
 
 	/*
 	 * Change IP addresses for NAT sessions for any protocol except TCP
 	 * since it will break the TCP connection anyway.  The only rules
 	 * which will get changed are those which are "map ... -> 0/32",
 	 * where the rule specifies the address is taken from the interface.
 	 */
 	SPL_NET(s);
 	WRITE_ENTER(&softc->ipf_nat);
 
 	if (softc->ipf_running <= 0) {
 		RWLOCK_EXIT(&softc->ipf_nat);
 		return;
 	}
 
 	for (nat = softn->ipf_nat_instances; nat; nat = nat->nat_next) {
 		if ((nat->nat_flags & IPN_TCP) != 0)
 			continue;
 
 		n = nat->nat_ptr;
 		if (n != NULL) {
 			if (n->in_v[1] == 4) {
 				if (n->in_redir & NAT_MAP) {
 					if ((n->in_nsrcaddr != 0) ||
 					    (n->in_nsrcmsk != 0xffffffff))
 						continue;
 				} else if (n->in_redir & NAT_REDIRECT) {
 					if ((n->in_ndstaddr != 0) ||
 					    (n->in_ndstmsk != 0xffffffff))
 						continue;
 				}
 			}
 #ifdef USE_INET6
 			if (n->in_v[1] == 4) {
 				if (n->in_redir & NAT_MAP) {
 					if (!IP6_ISZERO(&n->in_nsrcaddr) ||
 					    !IP6_ISONES(&n->in_nsrcmsk))
 						continue;
 				} else if (n->in_redir & NAT_REDIRECT) {
 					if (!IP6_ISZERO(&n->in_ndstaddr) ||
 					    !IP6_ISONES(&n->in_ndstmsk))
 						continue;
 				}
 			}
 #endif
 		}
 
 		if (((ifp == NULL) || (ifp == nat->nat_ifps[0]) ||
 		     (ifp == nat->nat_ifps[1]))) {
 			nat->nat_ifps[0] = GETIFP(nat->nat_ifnames[0],
 						  nat->nat_v[0]);
 			if ((nat->nat_ifps[0] != NULL) &&
 			    (nat->nat_ifps[0] != (void *)-1)) {
 				nat->nat_mtu[0] = GETIFMTU_4(nat->nat_ifps[0]);
 			}
 			if (nat->nat_ifnames[1][0] != '\0') {
 				nat->nat_ifps[1] = GETIFP(nat->nat_ifnames[1],
 							  nat->nat_v[1]);
 			} else {
 				nat->nat_ifps[1] = nat->nat_ifps[0];
 			}
 			if ((nat->nat_ifps[1] != NULL) &&
 			    (nat->nat_ifps[1] != (void *)-1)) {
 				nat->nat_mtu[1] = GETIFMTU_4(nat->nat_ifps[1]);
 			}
 			ifp2 = nat->nat_ifps[0];
 			if (ifp2 == NULL)
 				continue;
 
 			/*
 			 * Change the map-to address to be the same as the
 			 * new one.
 			 */
 			sum1 = NATFSUM(nat, nat->nat_v[1], nat_nsrc6);
 			if (ipf_ifpaddr(softc, nat->nat_v[0], FRI_NORMAL, ifp2,
 				       &in, NULL) != -1) {
 				if (nat->nat_v[0] == 4)
 					nat->nat_nsrcip = in.in4;
 			}
 			sum2 = NATFSUM(nat, nat->nat_v[1], nat_nsrc6);
 
 			if (sum1 == sum2)
 				continue;
 			/*
 			 * Readjust the checksum adjustment to take into
 			 * account the new IP#.
 			 */
 			CALC_SUMD(sum1, sum2, sumd);
 			/* XXX - dont change for TCP when solaris does
 			 * hardware checksumming.
 			 */
 			sumd += nat->nat_sumd[0];
 			nat->nat_sumd[0] = (sumd & 0xffff) + (sumd >> 16);
 			nat->nat_sumd[1] = nat->nat_sumd[0];
 		}
 	}
 
 	for (n = softn->ipf_nat_list; (n != NULL); n = n->in_next) {
 		char *base = n->in_names;
 
 		if ((ifp == NULL) || (n->in_ifps[0] == ifp))
 			n->in_ifps[0] = ipf_resolvenic(softc,
 						       base + n->in_ifnames[0],
 						       n->in_v[0]);
 		if ((ifp == NULL) || (n->in_ifps[1] == ifp))
 			n->in_ifps[1] = ipf_resolvenic(softc,
 						       base + n->in_ifnames[1],
 						       n->in_v[1]);
 
 		if (n->in_redir & NAT_REDIRECT)
 			idx = 1;
 		else
 			idx = 0;
 
 		if (((ifp == NULL) || (n->in_ifps[idx] == ifp)) &&
 		    (n->in_ifps[idx] != NULL &&
 		     n->in_ifps[idx] != (void *)-1)) {
 
 			ipf_nat_nextaddrinit(softc, n->in_names, &n->in_osrc,
 					     0, n->in_ifps[idx]);
 			ipf_nat_nextaddrinit(softc, n->in_names, &n->in_odst,
 					     0, n->in_ifps[idx]);
 			ipf_nat_nextaddrinit(softc, n->in_names, &n->in_nsrc,
 					     0, n->in_ifps[idx]);
 			ipf_nat_nextaddrinit(softc, n->in_names, &n->in_ndst,
 					     0, n->in_ifps[idx]);
 		}
 	}
 	RWLOCK_EXIT(&softc->ipf_nat);
 	SPL_X(s);
 }
 
 
 /* ------------------------------------------------------------------------ */
 /* Function:    ipf_nat_icmpquerytype                                       */
 /* Returns:     int - 1 == success, 0 == failure                            */
 /* Parameters:  icmptype(I) - ICMP type number                              */
 /*                                                                          */
 /* Tests to see if the ICMP type number passed is a query/response type or  */
 /* not.                                                                     */
 /* ------------------------------------------------------------------------ */
 static int
 ipf_nat_icmpquerytype(icmptype)
 	int icmptype;
 {
 
 	/*
 	 * For the ICMP query NAT code, it is essential that both the query
 	 * and the reply match on the NAT rule. Because the NAT structure
 	 * does not keep track of the icmptype, and a single NAT structure
 	 * is used for all icmp types with the same src, dest and id, we
 	 * simply define the replies as queries as well. The funny thing is,
 	 * altough it seems silly to call a reply a query, this is exactly
 	 * as it is defined in the IPv4 specification
 	 */
 	switch (icmptype)
 	{
 	case ICMP_ECHOREPLY:
 	case ICMP_ECHO:
 	/* route aedvertisement/solliciation is currently unsupported: */
 	/* it would require rewriting the ICMP data section            */
 	case ICMP_TSTAMP:
 	case ICMP_TSTAMPREPLY:
 	case ICMP_IREQ:
 	case ICMP_IREQREPLY:
 	case ICMP_MASKREQ:
 	case ICMP_MASKREPLY:
 		return 1;
 	default:
 		return 0;
 	}
 }
 
 
 /* ------------------------------------------------------------------------ */
 /* Function:    nat_log                                                     */
 /* Returns:     Nil                                                         */
 /* Parameters:  softc(I) - pointer to soft context main structure           */
 /*              softn(I) - pointer to NAT context structure                 */
 /*              nat(I)    - pointer to NAT structure                        */
 /*              action(I) - action related to NAT structure being performed */
 /*                                                                          */
 /* Creates a NAT log entry.                                                 */
 /* ------------------------------------------------------------------------ */
 void
 ipf_nat_log(softc, softn, nat, action)
 	ipf_main_softc_t *softc;
 	ipf_nat_softc_t *softn;
 	struct nat *nat;
 	u_int action;
 {
 #ifdef	IPFILTER_LOG
 # ifndef LARGE_NAT
 	struct ipnat *np;
 	int rulen;
 # endif
 	struct natlog natl;
 	void *items[1];
 	size_t sizes[1];
 	int types[1];
 
 	bcopy((char *)&nat->nat_osrc6, (char *)&natl.nl_osrcip,
 	      sizeof(natl.nl_osrcip));
 	bcopy((char *)&nat->nat_nsrc6, (char *)&natl.nl_nsrcip,
 	      sizeof(natl.nl_nsrcip));
 	bcopy((char *)&nat->nat_odst6, (char *)&natl.nl_odstip,
 	      sizeof(natl.nl_odstip));
 	bcopy((char *)&nat->nat_ndst6, (char *)&natl.nl_ndstip,
 	      sizeof(natl.nl_ndstip));
 
 	natl.nl_bytes[0] = nat->nat_bytes[0];
 	natl.nl_bytes[1] = nat->nat_bytes[1];
 	natl.nl_pkts[0] = nat->nat_pkts[0];
 	natl.nl_pkts[1] = nat->nat_pkts[1];
 	natl.nl_odstport = nat->nat_odport;
 	natl.nl_osrcport = nat->nat_osport;
 	natl.nl_nsrcport = nat->nat_nsport;
 	natl.nl_ndstport = nat->nat_ndport;
 	natl.nl_p[0] = nat->nat_pr[0];
 	natl.nl_p[1] = nat->nat_pr[1];
 	natl.nl_v[0] = nat->nat_v[0];
 	natl.nl_v[1] = nat->nat_v[1];
 	natl.nl_type = nat->nat_redir;
 	natl.nl_action = action;
 	natl.nl_rule = -1;
 
 	bcopy(nat->nat_ifnames[0], natl.nl_ifnames[0],
 	      sizeof(nat->nat_ifnames[0]));
 	bcopy(nat->nat_ifnames[1], natl.nl_ifnames[1],
 	      sizeof(nat->nat_ifnames[1]));
 
 # ifndef LARGE_NAT
 	if (nat->nat_ptr != NULL) {
 		for (rulen = 0, np = softn->ipf_nat_list; np != NULL;
 		     np = np->in_next, rulen++)
 			if (np == nat->nat_ptr) {
 				natl.nl_rule = rulen;
 				break;
 			}
 	}
 # endif
 	items[0] = &natl;
 	sizes[0] = sizeof(natl);
 	types[0] = 0;
 
 	(void) ipf_log_items(softc, IPL_LOGNAT, NULL, items, sizes, types, 1);
 #endif
 }
 
 
 #if defined(__OpenBSD__)
 /* ------------------------------------------------------------------------ */
 /* Function:    ipf_nat_ifdetach                                            */
 /* Returns:     Nil                                                         */
 /* Parameters:  ifp(I) - pointer to network interface                       */
 /*                                                                          */
 /* Compatibility interface for OpenBSD to trigger the correct updating of   */
 /* interface references within IPFilter.                                    */
 /* ------------------------------------------------------------------------ */
 void
 ipf_nat_ifdetach(ifp)
 	void *ifp;
 {
 	ipf_main_softc_t *softc;
 
 	softc = ipf_get_softc(0);
 
 	ipf_sync(ifp);
 	return;
 }
 #endif
 
 
 /* ------------------------------------------------------------------------ */
 /* Function:    ipf_nat_rule_deref                                          */
 /* Returns:     Nil                                                         */
 /* Parameters:  softc(I) - pointer to soft context main structure           */
 /*              inp(I)   - pointer to pointer to NAT rule                   */
 /* Write Locks: ipf_nat                                                     */
 /*                                                                          */
 /* Dropping the refernce count for a rule means that whatever held the      */
 /* pointer to this rule (*inp) is no longer interested in it and when the   */
 /* reference count drops to zero, any resources allocated for the rule can  */
 /* be released and the rule itself free'd.                                  */
 /* ------------------------------------------------------------------------ */
 void
 ipf_nat_rule_deref(softc, inp)
 	ipf_main_softc_t *softc;
 	ipnat_t **inp;
 {
 	ipf_nat_softc_t *softn = softc->ipf_nat_soft;
 	ipnat_t *n;
 
 	n = *inp;
 	*inp = NULL;
 	n->in_use--;
 	if (n->in_use > 0)
 		return;
 
 	if (n->in_apr != NULL)
 		ipf_proxy_deref(n->in_apr);
 
 	ipf_nat_rule_fini(softc, n);
 
 	if (n->in_redir & NAT_REDIRECT) {
 		if ((n->in_flags & IPN_PROXYRULE) == 0) {
 			ATOMIC_DEC32(softn->ipf_nat_stats.ns_rules_rdr);
 		}
 	}
 	if (n->in_redir & (NAT_MAP|NAT_MAPBLK)) {
 		if ((n->in_flags & IPN_PROXYRULE) == 0) {
 			ATOMIC_DEC32(softn->ipf_nat_stats.ns_rules_map);
 		}
 	}
 
 	if (n->in_tqehead[0] != NULL) {
 		if (ipf_deletetimeoutqueue(n->in_tqehead[0]) == 0) {
 			ipf_freetimeoutqueue(softc, n->in_tqehead[1]);
 		}
 	}
 
 	if (n->in_tqehead[1] != NULL) {
 		if (ipf_deletetimeoutqueue(n->in_tqehead[1]) == 0) {
 			ipf_freetimeoutqueue(softc, n->in_tqehead[1]);
 		}
 	}
 
 	if ((n->in_flags & IPN_PROXYRULE) == 0) {
 		ATOMIC_DEC32(softn->ipf_nat_stats.ns_rules);
 	}
 
 	MUTEX_DESTROY(&n->in_lock);
 
 	KFREES(n, n->in_size);
 
 #if SOLARIS && !defined(INSTANCES)
 	if (softn->ipf_nat_stats.ns_rules == 0)
 		pfil_delayed_copy = 1;
 #endif
 }
 
 
 /* ------------------------------------------------------------------------ */
 /* Function:    ipf_nat_deref                                               */
 /* Returns:     Nil                                                         */
 /* Parameters:  softc(I) - pointer to soft context main structure           */
 /*              natp(I)  - pointer to pointer to NAT table entry            */
 /*                                                                          */
 /* Decrement the reference counter for this NAT table entry and free it if  */
 /* there are no more things using it.                                       */
 /*                                                                          */
 /* IF nat_ref == 1 when this function is called, then we have an orphan nat */
 /* structure *because* it only gets called on paths _after_ nat_ref has been*/
 /* incremented.  If nat_ref == 1 then we shouldn't decrement it here        */
 /* because nat_delete() will do that and send nat_ref to -1.                */
 /*                                                                          */
 /* Holding the lock on nat_lock is required to serialise nat_delete() being */
 /* called from a NAT flush ioctl with a deref happening because of a packet.*/
 /* ------------------------------------------------------------------------ */
 void
 ipf_nat_deref(softc, natp)
 	ipf_main_softc_t *softc;
 	nat_t **natp;
 {
 	nat_t *nat;
 
 	nat = *natp;
 	*natp = NULL;
 
 	MUTEX_ENTER(&nat->nat_lock);
 	if (nat->nat_ref > 1) {
 		nat->nat_ref--;
 		ASSERT(nat->nat_ref >= 0);
 		MUTEX_EXIT(&nat->nat_lock);
 		return;
 	}
 	MUTEX_EXIT(&nat->nat_lock);
 
 	WRITE_ENTER(&softc->ipf_nat);
 	ipf_nat_delete(softc, nat, NL_EXPIRE);
 	RWLOCK_EXIT(&softc->ipf_nat);
 }
 
 
 /* ------------------------------------------------------------------------ */
 /* Function:    ipf_nat_clone                                               */
 /* Returns:     ipstate_t* - NULL == cloning failed,                        */
 /*                           else pointer to new state structure            */
 /* Parameters:  fin(I) - pointer to packet information                      */
 /*              is(I)  - pointer to master state structure                  */
 /* Write Lock:  ipf_nat                                                     */
 /*                                                                          */
 /* Create a "duplcate" state table entry from the master.                   */
 /* ------------------------------------------------------------------------ */
 nat_t *
 ipf_nat_clone(fin, nat)
 	fr_info_t *fin;
 	nat_t *nat;
 {
 	ipf_main_softc_t *softc = fin->fin_main_soft;
 	ipf_nat_softc_t *softn = softc->ipf_nat_soft;
 	frentry_t *fr;
 	nat_t *clone;
 	ipnat_t *np;
 
 	KMALLOC(clone, nat_t *);
 	if (clone == NULL) {
 		NBUMPSIDED(fin->fin_out, ns_clone_nomem);
 		return NULL;
 	}
 	bcopy((char *)nat, (char *)clone, sizeof(*clone));
 
 	MUTEX_NUKE(&clone->nat_lock);
 
 	clone->nat_rev = fin->fin_rev;
 	clone->nat_aps = NULL;
 	/*
 	 * Initialize all these so that ipf_nat_delete() doesn't cause a crash.
 	 */
 	clone->nat_tqe.tqe_pnext = NULL;
 	clone->nat_tqe.tqe_next = NULL;
 	clone->nat_tqe.tqe_ifq = NULL;
 	clone->nat_tqe.tqe_parent = clone;
 
 	clone->nat_flags &= ~SI_CLONE;
 	clone->nat_flags |= SI_CLONED;
 
 	if (clone->nat_hm)
 		clone->nat_hm->hm_ref++;
 
 	if (ipf_nat_insert(softc, softn, clone) == -1) {
 		KFREE(clone);
 		NBUMPSIDED(fin->fin_out, ns_insert_fail);
 		return NULL;
 	}
 
 	np = clone->nat_ptr;
 	if (np != NULL) {
 		if (softn->ipf_nat_logging)
 			ipf_nat_log(softc, softn, clone, NL_CLONE);
 		np->in_use++;
 	}
 	fr = clone->nat_fr;
 	if (fr != NULL) {
 		MUTEX_ENTER(&fr->fr_lock);
 		fr->fr_ref++;
 		MUTEX_EXIT(&fr->fr_lock);
 	}
 
 
 	/*
 	 * Because the clone is created outside the normal loop of things and
 	 * TCP has special needs in terms of state, initialise the timeout
 	 * state of the new NAT from here.
 	 */
 	if (clone->nat_pr[0] == IPPROTO_TCP) {
 		(void) ipf_tcp_age(&clone->nat_tqe, fin, softn->ipf_nat_tcptq,
 				   clone->nat_flags, 2);
 	}
 	clone->nat_sync = ipf_sync_new(softc, SMC_NAT, fin, clone);
 	if (softn->ipf_nat_logging)
 		ipf_nat_log(softc, softn, clone, NL_CLONE);
 	return clone;
 }
 
 
 /* ------------------------------------------------------------------------ */
 /* Function:   ipf_nat_wildok                                               */
 /* Returns:    int - 1 == packet's ports match wildcards                    */
 /*                   0 == packet's ports don't match wildcards              */
 /* Parameters: nat(I)   - NAT entry                                         */
 /*             sport(I) - source port                                       */
 /*             dport(I) - destination port                                  */
 /*             flags(I) - wildcard flags                                    */
 /*             dir(I)   - packet direction                                  */
 /*                                                                          */
 /* Use NAT entry and packet direction to determine which combination of     */
 /* wildcard flags should be used.                                           */
 /* ------------------------------------------------------------------------ */
 int
 ipf_nat_wildok(nat, sport, dport, flags, dir)
 	nat_t *nat;
 	int sport, dport, flags, dir;
 {
 	/*
 	 * When called by       dir is set to
 	 * nat_inlookup         NAT_INBOUND (0)
 	 * nat_outlookup        NAT_OUTBOUND (1)
 	 *
 	 * We simply combine the packet's direction in dir with the original
 	 * "intended" direction of that NAT entry in nat->nat_dir to decide
 	 * which combination of wildcard flags to allow.
 	 */
 	switch ((dir << 1) | (nat->nat_dir & (NAT_INBOUND|NAT_OUTBOUND)))
 	{
 	case 3: /* outbound packet / outbound entry */
 		if (((nat->nat_osport == sport) ||
 		    (flags & SI_W_SPORT)) &&
 		    ((nat->nat_odport == dport) ||
 		    (flags & SI_W_DPORT)))
 			return 1;
 		break;
 	case 2: /* outbound packet / inbound entry */
 		if (((nat->nat_osport == dport) ||
 		    (flags & SI_W_SPORT)) &&
 		    ((nat->nat_odport == sport) ||
 		    (flags & SI_W_DPORT)))
 			return 1;
 		break;
 	case 1: /* inbound packet / outbound entry */
 		if (((nat->nat_osport == dport) ||
 		    (flags & SI_W_SPORT)) &&
 		    ((nat->nat_odport == sport) ||
 		    (flags & SI_W_DPORT)))
 			return 1;
 		break;
 	case 0: /* inbound packet / inbound entry */
 		if (((nat->nat_osport == sport) ||
 		    (flags & SI_W_SPORT)) &&
 		    ((nat->nat_odport == dport) ||
 		    (flags & SI_W_DPORT)))
 			return 1;
 		break;
 	default:
 		break;
 	}
 
 	return(0);
 }
 
 
 /* ------------------------------------------------------------------------ */
 /* Function:    nat_mssclamp                                                */
 /* Returns:     Nil                                                         */
 /* Parameters:  tcp(I)    - pointer to TCP header                           */
 /*              maxmss(I) - value to clamp the TCP MSS to                   */
 /*              fin(I)    - pointer to packet information                   */
 /*              csump(I)  - pointer to TCP checksum                         */
 /*                                                                          */
 /* Check for MSS option and clamp it if necessary.  If found and changed,   */
 /* then the TCP header checksum will be updated to reflect the change in    */
 /* the MSS.                                                                 */
 /* ------------------------------------------------------------------------ */
 static void
 ipf_nat_mssclamp(tcp, maxmss, fin, csump)
 	tcphdr_t *tcp;
 	u_32_t maxmss;
 	fr_info_t *fin;
 	u_short *csump;
 {
 	u_char *cp, *ep, opt;
 	int hlen, advance;
 	u_32_t mss, sumd;
 
 	hlen = TCP_OFF(tcp) << 2;
 	if (hlen > sizeof(*tcp)) {
 		cp = (u_char *)tcp + sizeof(*tcp);
 		ep = (u_char *)tcp + hlen;
 
 		while (cp < ep) {
 			opt = cp[0];
 			if (opt == TCPOPT_EOL)
 				break;
 			else if (opt == TCPOPT_NOP) {
 				cp++;
 				continue;
 			}
 
 			if (cp + 1 >= ep)
 				break;
 			advance = cp[1];
 			if ((cp + advance > ep) || (advance <= 0))
 				break;
 			switch (opt)
 			{
 			case TCPOPT_MAXSEG:
 				if (advance != 4)
 					break;
 				mss = cp[2] * 256 + cp[3];
 				if (mss > maxmss) {
 					cp[2] = maxmss / 256;
 					cp[3] = maxmss & 0xff;
 					CALC_SUMD(mss, maxmss, sumd);
 					ipf_fix_outcksum(0, csump, sumd, 0);
 				}
 				break;
 			default:
 				/* ignore unknown options */
 				break;
 			}
 
 			cp += advance;
 		}
 	}
 }
 
 
 /* ------------------------------------------------------------------------ */
 /* Function:    ipf_nat_setqueue                                            */
 /* Returns:     Nil                                                         */
 /* Parameters:  softc(I) - pointer to soft context main structure           */
 /*              softn(I) - pointer to NAT context structure                 */
 /*              nat(I)- pointer to NAT structure                            */
 /* Locks:       ipf_nat (read or write)                                     */
 /*                                                                          */
 /* Put the NAT entry on its default queue entry, using rev as a helped in   */
 /* determining which queue it should be placed on.                          */
 /* ------------------------------------------------------------------------ */
 void
 ipf_nat_setqueue(softc, softn, nat)
 	ipf_main_softc_t *softc;
 	ipf_nat_softc_t *softn;
 	nat_t *nat;
 {
 	ipftq_t *oifq, *nifq;
 	int rev = nat->nat_rev;
 
 	if (nat->nat_ptr != NULL)
 		nifq = nat->nat_ptr->in_tqehead[rev];
 	else
 		nifq = NULL;
 
 	if (nifq == NULL) {
 		switch (nat->nat_pr[0])
 		{
 		case IPPROTO_UDP :
 			nifq = &softn->ipf_nat_udptq;
 			break;
 		case IPPROTO_ICMP :
 			nifq = &softn->ipf_nat_icmptq;
 			break;
 		case IPPROTO_TCP :
 			nifq = softn->ipf_nat_tcptq +
 			       nat->nat_tqe.tqe_state[rev];
 			break;
 		default :
 			nifq = &softn->ipf_nat_iptq;
 			break;
 		}
 	}
 
 	oifq = nat->nat_tqe.tqe_ifq;
 	/*
 	 * If it's currently on a timeout queue, move it from one queue to
 	 * another, else put it on the end of the newly determined queue.
 	 */
 	if (oifq != NULL)
 		ipf_movequeue(softc->ipf_ticks, &nat->nat_tqe, oifq, nifq);
 	else
 		ipf_queueappend(softc->ipf_ticks, &nat->nat_tqe, nifq, nat);
 	return;
 }
 
 
 /* ------------------------------------------------------------------------ */
 /* Function:    nat_getnext                                                 */
 /* Returns:     int - 0 == ok, else error                                   */
 /* Parameters:  softc(I) - pointer to soft context main structure           */
 /*              t(I)   - pointer to ipftoken structure                      */
 /*              itp(I) - pointer to ipfgeniter_t structure                  */
 /*                                                                          */
 /* Fetch the next nat/ipnat structure pointer from the linked list and      */
 /* copy it out to the storage space pointed to by itp_data.  The next item  */
 /* in the list to look at is put back in the ipftoken struture.             */
 /* ------------------------------------------------------------------------ */
 static int
 ipf_nat_getnext(softc, t, itp, objp)
 	ipf_main_softc_t *softc;
 	ipftoken_t *t;
 	ipfgeniter_t *itp;
 	ipfobj_t *objp;
 {
 	ipf_nat_softc_t *softn = softc->ipf_nat_soft;
 	hostmap_t *hm, *nexthm = NULL, zerohm;
 	ipnat_t *ipn, *nextipnat = NULL, zeroipn;
 	nat_t *nat, *nextnat = NULL, zeronat;
 	int error = 0;
 	void *nnext;
 
 	if (itp->igi_nitems != 1) {
 		IPFERROR(60075);
 		return ENOSPC;
 	}
 
 	READ_ENTER(&softc->ipf_nat);
 
 	switch (itp->igi_type)
 	{
 	case IPFGENITER_HOSTMAP :
 		hm = t->ipt_data;
 		if (hm == NULL) {
 			nexthm = softn->ipf_hm_maplist;
 		} else {
 			nexthm = hm->hm_next;
 		}
 		if (nexthm != NULL) {
 			ATOMIC_INC32(nexthm->hm_ref);
 			t->ipt_data = nexthm;
 		} else {
 			bzero(&zerohm, sizeof(zerohm));
 			nexthm = &zerohm;
 			t->ipt_data = NULL;
 		}
 		nnext = nexthm->hm_next;
 		break;
 
 	case IPFGENITER_IPNAT :
 		ipn = t->ipt_data;
 		if (ipn == NULL) {
 			nextipnat = softn->ipf_nat_list;
 		} else {
 			nextipnat = ipn->in_next;
 		}
 		if (nextipnat != NULL) {
 			ATOMIC_INC32(nextipnat->in_use);
 			t->ipt_data = nextipnat;
 		} else {
 			bzero(&zeroipn, sizeof(zeroipn));
 			nextipnat = &zeroipn;
 			t->ipt_data = NULL;
 		}
 		nnext = nextipnat->in_next;
 		break;
 
 	case IPFGENITER_NAT :
 		nat = t->ipt_data;
 		if (nat == NULL) {
 			nextnat = softn->ipf_nat_instances;
 		} else {
 			nextnat = nat->nat_next;
 		}
 		if (nextnat != NULL) {
 			MUTEX_ENTER(&nextnat->nat_lock);
 			nextnat->nat_ref++;
 			MUTEX_EXIT(&nextnat->nat_lock);
 			t->ipt_data = nextnat;
 		} else {
 			bzero(&zeronat, sizeof(zeronat));
 			nextnat = &zeronat;
 			t->ipt_data = NULL;
 		}
 		nnext = nextnat->nat_next;
 		break;
 
 	default :
 		RWLOCK_EXIT(&softc->ipf_nat);
 		IPFERROR(60055);
 		return EINVAL;
 	}
 
 	RWLOCK_EXIT(&softc->ipf_nat);
 
 	objp->ipfo_ptr = itp->igi_data;
 
 	switch (itp->igi_type)
 	{
 	case IPFGENITER_HOSTMAP :
 		error = COPYOUT(nexthm, objp->ipfo_ptr, sizeof(*nexthm));
 		if (error != 0) {
 			IPFERROR(60049);
 			error = EFAULT;
 		}
 		if (hm != NULL) {
 			WRITE_ENTER(&softc->ipf_nat);
 			ipf_nat_hostmapdel(softc, &hm);
 			RWLOCK_EXIT(&softc->ipf_nat);
 		}
 		break;
 
 	case IPFGENITER_IPNAT :
 		objp->ipfo_size = nextipnat->in_size;
 		objp->ipfo_type = IPFOBJ_IPNAT;
 		error = ipf_outobjk(softc, objp, nextipnat);
 		if (ipn != NULL) {
 			WRITE_ENTER(&softc->ipf_nat);
 			ipf_nat_rule_deref(softc, &ipn);
 			RWLOCK_EXIT(&softc->ipf_nat);
 		}
 		break;
 
 	case IPFGENITER_NAT :
 		objp->ipfo_size = sizeof(nat_t);
 		objp->ipfo_type = IPFOBJ_NAT;
 		error = ipf_outobjk(softc, objp, nextnat);
 		if (nat != NULL)
 			ipf_nat_deref(softc, &nat);
 
 		break;
 	}
 
 	if (nnext == NULL)
 		ipf_token_mark_complete(t);
 
 	return error;
 }
 
 
 /* ------------------------------------------------------------------------ */
 /* Function:    nat_extraflush                                              */
 /* Returns:     int - 0 == success, -1 == failure                           */
 /* Parameters:  softc(I) - pointer to soft context main structure           */
 /*              softn(I) - pointer to NAT context structure                 */
 /*              which(I) - how to flush the active NAT table                */
 /* Write Locks: ipf_nat                                                     */
 /*                                                                          */
 /* Flush nat tables.  Three actions currently defined:                      */
 /* which == 0 : flush all nat table entries                                 */
 /* which == 1 : flush TCP connections which have started to close but are   */
 /*	      stuck for some reason.                                        */
 /* which == 2 : flush TCP connections which have been idle for a long time, */
 /*	      starting at > 4 days idle and working back in successive half-*/
 /*	      days to at most 12 hours old.  If this fails to free enough   */
 /*            slots then work backwards in half hour slots to 30 minutes.   */
 /*            If that too fails, then work backwards in 30 second intervals */
 /*            for the last 30 minutes to at worst 30 seconds idle.          */
 /* ------------------------------------------------------------------------ */
 static int
 ipf_nat_extraflush(softc, softn, which)
 	ipf_main_softc_t *softc;
 	ipf_nat_softc_t *softn;
 	int which;
 {
 	nat_t *nat, **natp;
 	ipftqent_t *tqn;
 	ipftq_t *ifq;
 	int removed;
 	SPL_INT(s);
 
 	removed = 0;
 
 	SPL_NET(s);
 	switch (which)
 	{
 	case 0 :
 		softn->ipf_nat_stats.ns_flush_all++;
 		/*
 		 * Style 0 flush removes everything...
 		 */
 		for (natp = &softn->ipf_nat_instances;
 		     ((nat = *natp) != NULL); ) {
 			ipf_nat_delete(softc, nat, NL_FLUSH);
 			removed++;
 		}
 		break;
 
 	case 1 :
 		softn->ipf_nat_stats.ns_flush_closing++;
 		/*
 		 * Since we're only interested in things that are closing,
 		 * we can start with the appropriate timeout queue.
 		 */
 		for (ifq = softn->ipf_nat_tcptq + IPF_TCPS_CLOSE_WAIT;
 		     ifq != NULL; ifq = ifq->ifq_next) {
 
 			for (tqn = ifq->ifq_head; tqn != NULL; ) {
 				nat = tqn->tqe_parent;
 				tqn = tqn->tqe_next;
 				if (nat->nat_pr[0] != IPPROTO_TCP ||
 				    nat->nat_pr[1] != IPPROTO_TCP)
 					break;
 				ipf_nat_delete(softc, nat, NL_EXPIRE);
 				removed++;
 			}
 		}
 
 		/*
 		 * Also need to look through the user defined queues.
 		 */
 		for (ifq = softn->ipf_nat_utqe; ifq != NULL;
 		     ifq = ifq->ifq_next) {
 			for (tqn = ifq->ifq_head; tqn != NULL; ) {
 				nat = tqn->tqe_parent;
 				tqn = tqn->tqe_next;
 				if (nat->nat_pr[0] != IPPROTO_TCP ||
 				    nat->nat_pr[1] != IPPROTO_TCP)
 					continue;
 
 				if ((nat->nat_tcpstate[0] >
 				     IPF_TCPS_ESTABLISHED) &&
 				    (nat->nat_tcpstate[1] >
 				     IPF_TCPS_ESTABLISHED)) {
 					ipf_nat_delete(softc, nat, NL_EXPIRE);
 					removed++;
 				}
 			}
 		}
 		break;
 
 		/*
 		 * Args 5-11 correspond to flushing those particular states
 		 * for TCP connections.
 		 */
 	case IPF_TCPS_CLOSE_WAIT :
 	case IPF_TCPS_FIN_WAIT_1 :
 	case IPF_TCPS_CLOSING :
 	case IPF_TCPS_LAST_ACK :
 	case IPF_TCPS_FIN_WAIT_2 :
 	case IPF_TCPS_TIME_WAIT :
 	case IPF_TCPS_CLOSED :
 		softn->ipf_nat_stats.ns_flush_state++;
 		tqn = softn->ipf_nat_tcptq[which].ifq_head;
 		while (tqn != NULL) {
 			nat = tqn->tqe_parent;
 			tqn = tqn->tqe_next;
 			ipf_nat_delete(softc, nat, NL_FLUSH);
 			removed++;
 		}
 		break;
 
 	default :
 		if (which < 30)
 			break;
 
 		softn->ipf_nat_stats.ns_flush_timeout++;
 		/*
 		 * Take a large arbitrary number to mean the number of seconds
 		 * for which which consider to be the maximum value we'll allow
 		 * the expiration to be.
 		 */
 		which = IPF_TTLVAL(which);
 		for (natp = &softn->ipf_nat_instances;
 		     ((nat = *natp) != NULL); ) {
 			if (softc->ipf_ticks - nat->nat_touched > which) {
 				ipf_nat_delete(softc, nat, NL_FLUSH);
 				removed++;
 			} else
 				natp = &nat->nat_next;
 		}
 		break;
 	}
 
 	if (which != 2) {
 		SPL_X(s);
 		return removed;
 	}
 
 	softn->ipf_nat_stats.ns_flush_queue++;
 
 	/*
 	 * Asked to remove inactive entries because the table is full, try
 	 * again, 3 times, if first attempt failed with a different criteria
 	 * each time.  The order tried in must be in decreasing age.
 	 * Another alternative is to implement random drop and drop N entries
 	 * at random until N have been freed up.
 	 */
 	if (softc->ipf_ticks - softn->ipf_nat_last_force_flush >
 	    IPF_TTLVAL(5)) {
 		softn->ipf_nat_last_force_flush = softc->ipf_ticks;
 
 		removed = ipf_queueflush(softc, ipf_nat_flush_entry,
 					 softn->ipf_nat_tcptq,
 					 softn->ipf_nat_utqe,
 					 &softn->ipf_nat_stats.ns_active,
 					 softn->ipf_nat_table_sz,
 					 softn->ipf_nat_table_wm_low);
 	}
 
 	SPL_X(s);
 	return removed;
 }
 
 
 /* ------------------------------------------------------------------------ */
 /* Function:    ipf_nat_flush_entry                                         */
 /* Returns:     0 - always succeeds                                         */
 /* Parameters:  softc(I) - pointer to soft context main structure           */
 /*              entry(I) - pointer to NAT entry                             */
 /* Write Locks: ipf_nat                                                     */
 /*                                                                          */
 /* This function is a stepping stone between ipf_queueflush() and           */
 /* nat_dlete().  It is used so we can provide a uniform interface via the   */
 /* ipf_queueflush() function.  Since the nat_delete() function returns void */
 /* we translate that to mean it always succeeds in deleting something.      */
 /* ------------------------------------------------------------------------ */
 static int
 ipf_nat_flush_entry(softc, entry)
 	ipf_main_softc_t *softc;
 	void *entry;
 {
 	ipf_nat_delete(softc, entry, NL_FLUSH);
 	return 0;
 }
 
 
 /* ------------------------------------------------------------------------ */
 /* Function:    ipf_nat_iterator                                            */
 /* Returns:     int - 0 == ok, else error                                   */
 /* Parameters:  softc(I) - pointer to soft context main structure           */
 /*              token(I) - pointer to ipftoken structure                    */
 /*              itp(I)   - pointer to ipfgeniter_t structure                */
 /*              obj(I)   - pointer to data description structure            */
 /*                                                                          */
 /* This function acts as a handler for the SIOCGENITER ioctls that use a    */
 /* generic structure to iterate through a list.  There are three different  */
 /* linked lists of NAT related information to go through: NAT rules, active */
 /* NAT mappings and the NAT fragment cache.                                 */
 /* ------------------------------------------------------------------------ */
 static int
 ipf_nat_iterator(softc, token, itp, obj)
 	ipf_main_softc_t *softc;
 	ipftoken_t *token;
 	ipfgeniter_t *itp;
 	ipfobj_t *obj;
 {
 	int error;
 
 	if (itp->igi_data == NULL) {
 		IPFERROR(60052);
 		return EFAULT;
 	}
 
 	switch (itp->igi_type)
 	{
 	case IPFGENITER_HOSTMAP :
 	case IPFGENITER_IPNAT :
 	case IPFGENITER_NAT :
 		error = ipf_nat_getnext(softc, token, itp, obj);
 		break;
 
 	case IPFGENITER_NATFRAG :
 		error = ipf_frag_nat_next(softc, token, itp);
 		break;
 	default :
 		IPFERROR(60053);
 		error = EINVAL;
 		break;
 	}
 
 	return error;
 }
 
 
 /* ------------------------------------------------------------------------ */
 /* Function:    ipf_nat_setpending                                          */
 /* Returns:     Nil                                                         */
 /* Parameters:  softc(I) - pointer to soft context main structure           */
 /*              nat(I)   - pointer to NAT structure                         */
 /* Locks:       ipf_nat (read or write)                                     */
 /*                                                                          */
 /* Put the NAT entry on to the pending queue - this queue has a very short  */
 /* lifetime where items are put that can't be deleted straight away because */
 /* of locking issues but we want to delete them ASAP, anyway.  In calling   */
 /* this function, it is assumed that the owner (if there is one, as shown   */
 /* by nat_me) is no longer interested in it.                                */
 /* ------------------------------------------------------------------------ */
 void
 ipf_nat_setpending(softc, nat)
 	ipf_main_softc_t *softc;
 	nat_t *nat;
 {
 	ipf_nat_softc_t *softn = softc->ipf_nat_soft;
 	ipftq_t *oifq;
 
 	oifq = nat->nat_tqe.tqe_ifq;
 	if (oifq != NULL)
 		ipf_movequeue(softc->ipf_ticks, &nat->nat_tqe, oifq,
 			      &softn->ipf_nat_pending);
 	else
 		ipf_queueappend(softc->ipf_ticks, &nat->nat_tqe,
 				&softn->ipf_nat_pending, nat);
 
 	if (nat->nat_me != NULL) {
 		*nat->nat_me = NULL;
 		nat->nat_me = NULL;
 		nat->nat_ref--;
 		ASSERT(nat->nat_ref >= 0);
 	}
 }
 
 
 /* ------------------------------------------------------------------------ */
 /* Function:    nat_newrewrite                                              */
 /* Returns:     int - -1 == error, 0 == success (no move), 1 == success and */
 /*                    allow rule to be moved if IPN_ROUNDR is set.          */
 /* Parameters:  fin(I) - pointer to packet information                      */
 /*              nat(I) - pointer to NAT entry                               */
 /*              ni(I)  - pointer to structure with misc. information needed */
 /*                       to create new NAT entry.                           */
 /* Write Lock:  ipf_nat                                                     */
 /*                                                                          */
 /* This function is responsible for setting up an active NAT session where  */
 /* we are changing both the source and destination parameters at the same   */
 /* time.  The loop in here works differently to elsewhere - each iteration  */
 /* is responsible for changing a single parameter that can be incremented.  */
 /* So one pass may increase the source IP#, next source port, next dest. IP#*/
 /* and the last destination port for a total of 4 iterations to try each.   */
 /* This is done to try and exhaustively use the translation space available.*/
 /* ------------------------------------------------------------------------ */
 static int
 ipf_nat_newrewrite(fin, nat, nai)
 	fr_info_t *fin;
 	nat_t *nat;
 	natinfo_t *nai;
 {
 	int src_search = 1;
 	int dst_search = 1;
 	fr_info_t frnat;
 	u_32_t flags;
 	u_short swap;
 	ipnat_t *np;
 	nat_t *natl;
 	int l = 0;
 	int changed;
 
 	natl = NULL;
 	changed = -1;
 	np = nai->nai_np;
 	flags = nat->nat_flags;
 	bcopy((char *)fin, (char *)&frnat, sizeof(*fin));
 
 	nat->nat_hm = NULL;
 
 	do {
 		changed = -1;
 		/* TRACE (l, src_search, dst_search, np) */
 
 		if ((src_search == 0) && (np->in_spnext == 0) &&
 		    (dst_search == 0) && (np->in_dpnext == 0)) {
 			if (l > 0)
 				return -1;
 		}
 
 		/*
 		 * Find a new source address
 		 */
 		if (ipf_nat_nextaddr(fin, &np->in_nsrc, &frnat.fin_saddr,
 				     &frnat.fin_saddr) == -1) {
 			return -1;
 		}
 
 		if ((np->in_nsrcaddr == 0) && (np->in_nsrcmsk == 0xffffffff)) {
 			src_search = 0;
 			if (np->in_stepnext == 0)
 				np->in_stepnext = 1;
 
 		} else if ((np->in_nsrcaddr == 0) && (np->in_nsrcmsk == 0)) {
 			src_search = 0;
 			if (np->in_stepnext == 0)
 				np->in_stepnext = 1;
 
 		} else if (np->in_nsrcmsk == 0xffffffff) {
 			src_search = 0;
 			if (np->in_stepnext == 0)
 				np->in_stepnext = 1;
 
 		} else if (np->in_nsrcmsk != 0xffffffff) {
 			if (np->in_stepnext == 0 && changed == -1) {
 				np->in_snip++;
 				np->in_stepnext++;
 				changed = 0;
 			}
 		}
 
 		if ((flags & IPN_TCPUDPICMP) != 0) {
 			if (np->in_spnext != 0)
 				frnat.fin_data[0] = np->in_spnext;
 
 			/*
 			 * Standard port translation.  Select next port.
 			 */
 			if ((flags & IPN_FIXEDSPORT) != 0) {
 				np->in_stepnext = 2;
 			} else if ((np->in_stepnext == 1) &&
 				   (changed == -1) && (natl != NULL)) {
 				np->in_spnext++;
 				np->in_stepnext++;
 				changed = 1;
 				if (np->in_spnext > np->in_spmax)
 					np->in_spnext = np->in_spmin;
 			}
 		} else {
 			np->in_stepnext = 2;
 		}
 		np->in_stepnext &= 0x3;
 
 		/*
 		 * Find a new destination address
 		 */
 		/* TRACE (fin, np, l, frnat) */
 
 		if (ipf_nat_nextaddr(fin, &np->in_ndst, &frnat.fin_daddr,
 				     &frnat.fin_daddr) == -1)
 			return -1;
 		if ((np->in_ndstaddr == 0) && (np->in_ndstmsk == 0xffffffff)) {
 			dst_search = 0;
 			if (np->in_stepnext == 2)
 				np->in_stepnext = 3;
 
 		} else if ((np->in_ndstaddr == 0) && (np->in_ndstmsk == 0)) {
 			dst_search = 0;
 			if (np->in_stepnext == 2)
 				np->in_stepnext = 3;
 
 		} else if (np->in_ndstmsk == 0xffffffff) {
 			dst_search = 0;
 			if (np->in_stepnext == 2)
 				np->in_stepnext = 3;
 
 		} else if (np->in_ndstmsk != 0xffffffff) {
 			if ((np->in_stepnext == 2) && (changed == -1) &&
 			    (natl != NULL)) {
 				changed = 2;
 				np->in_stepnext++;
 				np->in_dnip++;
 			}
 		}
 
 		if ((flags & IPN_TCPUDPICMP) != 0) {
 			if (np->in_dpnext != 0)
 				frnat.fin_data[1] = np->in_dpnext;
 
 			/*
 			 * Standard port translation.  Select next port.
 			 */
 			if ((flags & IPN_FIXEDDPORT) != 0) {
 				np->in_stepnext = 0;
 			} else if (np->in_stepnext == 3 && changed == -1) {
 				np->in_dpnext++;
 				np->in_stepnext++;
 				changed = 3;
 				if (np->in_dpnext > np->in_dpmax)
 					np->in_dpnext = np->in_dpmin;
 			}
 		} else {
 			if (np->in_stepnext == 3)
 				np->in_stepnext = 0;
 		}
 
 		/* TRACE (frnat) */
 
 		/*
 		 * Here we do a lookup of the connection as seen from
 		 * the outside.  If an IP# pair already exists, try
 		 * again.  So if you have A->B becomes C->B, you can
 		 * also have D->E become C->E but not D->B causing
 		 * another C->B.  Also take protocol and ports into
 		 * account when determining whether a pre-existing
 		 * NAT setup will cause an external conflict where
 		 * this is appropriate.
 		 *
 		 * fin_data[] is swapped around because we are doing a
 		 * lookup of the packet is if it were moving in the opposite
 		 * direction of the one we are working with now.
 		 */
 		if (flags & IPN_TCPUDP) {
 			swap = frnat.fin_data[0];
 			frnat.fin_data[0] = frnat.fin_data[1];
 			frnat.fin_data[1] = swap;
 		}
 		if (fin->fin_out == 1) {
 			natl = ipf_nat_inlookup(&frnat,
 						flags & ~(SI_WILDP|NAT_SEARCH),
 						(u_int)frnat.fin_p,
 						frnat.fin_dst, frnat.fin_src);
 
 		} else {
 			natl = ipf_nat_outlookup(&frnat,
 						 flags & ~(SI_WILDP|NAT_SEARCH),
 						 (u_int)frnat.fin_p,
 						 frnat.fin_dst, frnat.fin_src);
 		}
 		if (flags & IPN_TCPUDP) {
 			swap = frnat.fin_data[0];
 			frnat.fin_data[0] = frnat.fin_data[1];
 			frnat.fin_data[1] = swap;
 		}
 
 		/* TRACE natl, in_stepnext, l */
 
 		if ((natl != NULL) && (l > 8))	/* XXX 8 is arbitrary */
 			return -1;
 
 		np->in_stepnext &= 0x3;
 
 		l++;
 		changed = -1;
 	} while (natl != NULL);
 
 	nat->nat_osrcip = fin->fin_src;
 	nat->nat_odstip = fin->fin_dst;
 	nat->nat_nsrcip = frnat.fin_src;
 	nat->nat_ndstip = frnat.fin_dst;
 
 	if ((flags & IPN_TCPUDP) != 0) {
 		nat->nat_osport = htons(fin->fin_data[0]);
 		nat->nat_odport = htons(fin->fin_data[1]);
 		nat->nat_nsport = htons(frnat.fin_data[0]);
 		nat->nat_ndport = htons(frnat.fin_data[1]);
 	} else if ((flags & IPN_ICMPQUERY) != 0) {
 		nat->nat_oicmpid = fin->fin_data[1];
 		nat->nat_nicmpid = frnat.fin_data[1];
 	}
 
 	return 0;
 }
 
 
 /* ------------------------------------------------------------------------ */
 /* Function:    nat_newdivert                                               */
 /* Returns:     int - -1 == error, 0 == success                             */
 /* Parameters:  fin(I) - pointer to packet information                      */
 /*              nat(I) - pointer to NAT entry                               */
 /*              ni(I)  - pointer to structure with misc. information needed */
 /*                       to create new NAT entry.                           */
 /* Write Lock:  ipf_nat                                                     */
 /*                                                                          */
 /* Create a new NAT  divert session as defined by the NAT rule.  This is    */
 /* somewhat different to other NAT session creation routines because we     */
 /* do not iterate through either port numbers or IP addresses, searching    */
 /* for a unique mapping, however, a complimentary duplicate check is made.  */
 /* ------------------------------------------------------------------------ */
 static int
 ipf_nat_newdivert(fin, nat, nai)
 	fr_info_t *fin;
 	nat_t *nat;
 	natinfo_t *nai;
 {
 	ipf_main_softc_t *softc = fin->fin_main_soft;
 	ipf_nat_softc_t *softn = softc->ipf_nat_soft;
 	fr_info_t frnat;
 	ipnat_t *np;
 	nat_t *natl;
 	int p;
 
 	np = nai->nai_np;
 	bcopy((char *)fin, (char *)&frnat, sizeof(*fin));
 
 	nat->nat_pr[0] = 0;
 	nat->nat_osrcaddr = fin->fin_saddr;
 	nat->nat_odstaddr = fin->fin_daddr;
 	frnat.fin_saddr = htonl(np->in_snip);
 	frnat.fin_daddr = htonl(np->in_dnip);
 	if ((nat->nat_flags & IPN_TCPUDP) != 0) {
 		nat->nat_osport = htons(fin->fin_data[0]);
 		nat->nat_odport = htons(fin->fin_data[1]);
 	} else if ((nat->nat_flags & IPN_ICMPQUERY) != 0) {
 		nat->nat_oicmpid = fin->fin_data[1];
 	}
 
 	if (np->in_redir & NAT_DIVERTUDP) {
 		frnat.fin_data[0] = np->in_spnext;
 		frnat.fin_data[1] = np->in_dpnext;
 		frnat.fin_flx |= FI_TCPUDP;
 		p = IPPROTO_UDP;
 	} else {
 		frnat.fin_flx &= ~FI_TCPUDP;
 		p = IPPROTO_IPIP;
 	}
 
 	if (fin->fin_out == 1) {
 		natl = ipf_nat_inlookup(&frnat, 0, p,
 					frnat.fin_dst, frnat.fin_src);
 
 	} else {
 		natl = ipf_nat_outlookup(&frnat, 0, p,
 					 frnat.fin_dst, frnat.fin_src);
 	}
 
 	if (natl != NULL) {
 		NBUMPSIDED(fin->fin_out, ns_divert_exist);
 		return -1;
 	}
 
 	nat->nat_nsrcaddr = frnat.fin_saddr;
 	nat->nat_ndstaddr = frnat.fin_daddr;
 	if ((nat->nat_flags & IPN_TCPUDP) != 0) {
 		nat->nat_nsport = htons(frnat.fin_data[0]);
 		nat->nat_ndport = htons(frnat.fin_data[1]);
 	} else if ((nat->nat_flags & IPN_ICMPQUERY) != 0) {
 		nat->nat_nicmpid = frnat.fin_data[1];
 	}
 
 	nat->nat_pr[fin->fin_out] = fin->fin_p;
 	nat->nat_pr[1 - fin->fin_out] = p;
 
 	if (np->in_redir & NAT_REDIRECT)
 		nat->nat_dir = NAT_DIVERTIN;
 	else
 		nat->nat_dir = NAT_DIVERTOUT;
 
 	return 0;
 }
 
 
 /* ------------------------------------------------------------------------ */
 /* Function:    nat_builddivertmp                                           */
 /* Returns:     int - -1 == error, 0 == success                             */
 /* Parameters:  softn(I) - pointer to NAT context structure                 */
 /*              np(I)    - pointer to a NAT rule                            */
 /*                                                                          */
 /* For divert rules, a skeleton packet representing what will be prepended  */
 /* to the real packet is created.  Even though we don't have the full       */
 /* packet here, a checksum is calculated that we update later when we       */
 /* fill in the final details.  At present a 0 checksum for UDP is being set */
 /* here because it is expected that divert will be used for localhost.      */
 /* ------------------------------------------------------------------------ */
 static int
 ipf_nat_builddivertmp(softn, np)
 	ipf_nat_softc_t *softn;
 	ipnat_t *np;
 {
 	udphdr_t *uh;
 	size_t len;
 	ip_t *ip;
 
 	if ((np->in_redir & NAT_DIVERTUDP) != 0)
 		len = sizeof(ip_t) + sizeof(udphdr_t);
 	else
 		len = sizeof(ip_t);
 
 	ALLOC_MB_T(np->in_divmp, len);
 	if (np->in_divmp == NULL) {
 		NBUMPD(ipf_nat_stats, ns_divert_build);
 		return -1;
 	}
 
 	/*
 	 * First, the header to get the packet diverted to the new destination
 	 */
 	ip = MTOD(np->in_divmp, ip_t *);
 	IP_V_A(ip, 4);
 	IP_HL_A(ip, 5);
 	ip->ip_tos = 0;
 	if ((np->in_redir & NAT_DIVERTUDP) != 0)
 		ip->ip_p = IPPROTO_UDP;
 	else
 		ip->ip_p = IPPROTO_IPIP;
 	ip->ip_ttl = 255;
 	ip->ip_off = 0;
 	ip->ip_sum = 0;
 	ip->ip_len = htons(len);
 	ip->ip_id = 0;
 	ip->ip_src.s_addr = htonl(np->in_snip);
 	ip->ip_dst.s_addr = htonl(np->in_dnip);
 	ip->ip_sum = ipf_cksum((u_short *)ip, sizeof(*ip));
 
 	if (np->in_redir & NAT_DIVERTUDP) {
 		uh = (udphdr_t *)(ip + 1);
 		uh->uh_sum = 0;
 		uh->uh_ulen = 8;
 		uh->uh_sport = htons(np->in_spnext);
 		uh->uh_dport = htons(np->in_dpnext);
 	}
 
 	return 0;
 }
 
 
 #define	MINDECAP	(sizeof(ip_t) + sizeof(udphdr_t) + sizeof(ip_t))
 
 /* ------------------------------------------------------------------------ */
 /* Function:    nat_decap                                                   */
 /* Returns:     int - -1 == error, 0 == success                             */
 /* Parameters:  fin(I) - pointer to packet information                      */
 /*              nat(I) - pointer to current NAT session                     */
 /*                                                                          */
 /* This function is responsible for undoing a packet's encapsulation in the */
 /* reverse of an encap/divert rule.  After removing the outer encapsulation */
 /* it is necessary to call ipf_makefrip() again so that the contents of 'fin'*/
 /* match the "new" packet as it may still be used by IPFilter elsewhere.    */
 /* We use "dir" here as the basis for some of the expectations about the    */
 /* outer header.  If we return an error, the goal is to leave the original  */
 /* packet information undisturbed - this falls short at the end where we'd  */
 /* need to back a backup copy of "fin" - expensive.                         */
 /* ------------------------------------------------------------------------ */
 static int
 ipf_nat_decap(fin, nat)
 	fr_info_t *fin;
 	nat_t *nat;
 {
 	ipf_main_softc_t *softc = fin->fin_main_soft;
 	ipf_nat_softc_t *softn = softc->ipf_nat_soft;
 	char *hdr;
 	int hlen;
 	int skip;
 	mb_t *m;
 
 	if ((fin->fin_flx & FI_ICMPERR) != 0) {
 		/*
 		 * ICMP packets don't get decapsulated, instead what we need
 		 * to do is change the ICMP reply from including (in the data
 		 * portion for errors) the encapsulated packet that we sent
 		 * out to something that resembles the original packet prior
 		 * to encapsulation.  This isn't done here - all we're doing
 		 * here is changing the outer address to ensure that it gets
 		 * targetted back to the correct system.
 		 */
 
 		if (nat->nat_dir & NAT_OUTBOUND) {
 			u_32_t sum1, sum2, sumd;
 
 			sum1 = ntohl(fin->fin_daddr);
 			sum2 = ntohl(nat->nat_osrcaddr);
 			CALC_SUMD(sum1, sum2, sumd);
 			fin->fin_ip->ip_dst = nat->nat_osrcip;
 			fin->fin_daddr = nat->nat_osrcaddr;
 #if !defined(_KERNEL) || defined(MENTAT) || defined(__sgi) || \
      defined(__osf__) || defined(linux)
 			ipf_fix_outcksum(0, &fin->fin_ip->ip_sum, sumd, 0);
 #endif
 		}
 		return 0;
 	}
 
 	m = fin->fin_m;
 	skip = fin->fin_hlen;
 
 	switch (nat->nat_dir)
 	{
 	case NAT_DIVERTIN :
 	case NAT_DIVERTOUT :
 		if (fin->fin_plen < MINDECAP)
 			return -1;
 		skip += sizeof(udphdr_t);
 		break;
 
 	case NAT_ENCAPIN :
 	case NAT_ENCAPOUT :
 		if (fin->fin_plen < (skip + sizeof(ip_t)))
 			return -1;
 		break;
 	default :
 		return -1;
 		/* NOTREACHED */
 	}
 
 	/*
 	 * The aim here is to keep the original packet details in "fin" for
 	 * as long as possible so that returning with an error is for the
 	 * original packet and there is little undoing work to do.
 	 */
 	if (M_LEN(m) < skip + sizeof(ip_t)) {
 		if (ipf_pr_pullup(fin, skip + sizeof(ip_t)) == -1)
 			return -1;
 	}
 
 	hdr = MTOD(fin->fin_m, char *);
 	fin->fin_ip = (ip_t *)(hdr + skip);
 	hlen = IP_HL(fin->fin_ip) << 2;
 
 	if (ipf_pr_pullup(fin, skip + hlen) == -1) {
 		NBUMPSIDED(fin->fin_out, ns_decap_pullup);
 		return -1;
 	}
 
 	fin->fin_hlen = hlen;
 	fin->fin_dlen -= skip;
 	fin->fin_plen -= skip;
 	fin->fin_ipoff += skip;
 
 	if (ipf_makefrip(hlen, (ip_t *)hdr, fin) == -1) {
 		NBUMPSIDED(fin->fin_out, ns_decap_bad);
 		return -1;
 	}
 
 	return skip;
 }
 
 
 /* ------------------------------------------------------------------------ */
 /* Function:    nat_nextaddr                                                */
 /* Returns:     int - -1 == bad input (no new address),                     */
 /*                     0 == success and dst has new address                 */
 /* Parameters:  fin(I) - pointer to packet information                      */
 /*              na(I)  - how to generate new address                        */
 /*              old(I) - original address being replaced                    */
 /*              dst(O) - where to put the new address                       */
 /* Write Lock:  ipf_nat                                                     */
 /*                                                                          */
 /* This function uses the contents of the "na" structure, in combination    */
 /* with "old" to produce a new address to store in "dst".  Not all of the   */
 /* possible uses of "na" will result in a new address.                      */
 /* ------------------------------------------------------------------------ */
 static int
 ipf_nat_nextaddr(fin, na, old, dst)
 	fr_info_t *fin;
 	nat_addr_t *na;
 	u_32_t *old, *dst;
 {
 	ipf_main_softc_t *softc = fin->fin_main_soft;
 	ipf_nat_softc_t *softn = softc->ipf_nat_soft;
 	u_32_t amin, amax, new;
 	i6addr_t newip;
 	int error;
 
 	new = 0;
 	amin = na->na_addr[0].in4.s_addr;
 
 	switch (na->na_atype)
 	{
 	case FRI_RANGE :
 		amax = na->na_addr[1].in4.s_addr;
 		break;
 
 	case FRI_NETMASKED :
 	case FRI_DYNAMIC :
 	case FRI_NORMAL :
 		/*
 		 * Compute the maximum address by adding the inverse of the
 		 * netmask to the minimum address.
 		 */
 		amax = ~na->na_addr[1].in4.s_addr;
 		amax |= amin;
 		break;
 
 	case FRI_LOOKUP :
 		break;
 
 	case FRI_BROADCAST :
 	case FRI_PEERADDR :
 	case FRI_NETWORK :
 	default :
 		return -1;
 	}
 
 	error = -1;
 
 	if (na->na_atype == FRI_LOOKUP) {
 		if (na->na_type == IPLT_DSTLIST) {
 			error = ipf_dstlist_select_node(fin, na->na_ptr, dst,
 							NULL);
 		} else {
 			NBUMPSIDE(fin->fin_out, ns_badnextaddr);
 		}
 
 	} else if (na->na_atype == IPLT_NONE) {
 		/*
 		 * 0/0 as the new address means leave it alone.
 		 */
 		if (na->na_addr[0].in4.s_addr == 0 &&
 		    na->na_addr[1].in4.s_addr == 0) {
 			new = *old;
 
 		/*
 		 * 0/32 means get the interface's address
 		 */
 		} else if (na->na_addr[0].in4.s_addr == 0 &&
 			   na->na_addr[1].in4.s_addr == 0xffffffff) {
 			if (ipf_ifpaddr(softc, 4, na->na_atype,
 					fin->fin_ifp, &newip, NULL) == -1) {
 				NBUMPSIDED(fin->fin_out, ns_ifpaddrfail);
 				return -1;
 			}
 			new = newip.in4.s_addr;
 		} else {
 			new = htonl(na->na_nextip);
 		}
 		*dst = new;
 		error = 0;
 
 	} else {
 		NBUMPSIDE(fin->fin_out, ns_badnextaddr);
 	}
 
 	return error;
 }
 
 
 /* ------------------------------------------------------------------------ */
 /* Function:    nat_nextaddrinit                                            */
 /* Returns:     int - 0 == success, else error number                       */
 /* Parameters:  softc(I) - pointer to soft context main structure           */
 /*              na(I)      - NAT address information for generating new addr*/
 /*              initial(I) - flag indicating if it is the first call for    */
 /*                           this "na" structure.                           */
 /*              ifp(I)     - network interface to derive address            */
 /*                           information from.                              */
 /*                                                                          */
 /* This function is expected to be called in two scenarious: when a new NAT */
 /* rule is loaded into the kernel and when the list of NAT rules is sync'd  */
 /* up with the valid network interfaces (possibly due to them changing.)    */
 /* To distinguish between these, the "initial" parameter is used.  If it is */
 /* 1 then this indicates the rule has just been reloaded and 0 for when we  */
 /* are updating information.  This difference is important because in       */
 /* instances where we are not updating address information associated with  */
 /* a network interface, we don't want to disturb what the "next" address to */
 /* come out of ipf_nat_nextaddr() will be.                                  */
 /* ------------------------------------------------------------------------ */
 static int
 ipf_nat_nextaddrinit(softc, base, na, initial, ifp)
 	ipf_main_softc_t *softc;
 	char *base;
 	nat_addr_t *na;
 	int initial;
 	void *ifp;
 {
 
 	switch (na->na_atype)
 	{
 	case FRI_LOOKUP :
 		if (na->na_subtype == 0) {
 			na->na_ptr = ipf_lookup_res_num(softc, IPL_LOGNAT,
 							na->na_type,
 							na->na_num,
 							&na->na_func);
 		} else if (na->na_subtype == 1) {
 			na->na_ptr = ipf_lookup_res_name(softc, IPL_LOGNAT,
 							 na->na_type,
 							 base + na->na_num,
 							 &na->na_func);
 		}
 		if (na->na_func == NULL) {
 			IPFERROR(60060);
 			return ESRCH;
 		}
 		if (na->na_ptr == NULL) {
 			IPFERROR(60056);
 			return ESRCH;
 		}
 		break;
 
 	case FRI_DYNAMIC :
 	case FRI_BROADCAST :
 	case FRI_NETWORK :
 	case FRI_NETMASKED :
 	case FRI_PEERADDR :
 		if (ifp != NULL)
 			(void )ipf_ifpaddr(softc, 4, na->na_atype, ifp,
 					   &na->na_addr[0], &na->na_addr[1]);
 		break;
 
 	case FRI_SPLIT :
 	case FRI_RANGE :
 		if (initial)
 			na->na_nextip = ntohl(na->na_addr[0].in4.s_addr);
 		break;
 
 	case FRI_NONE :
 		na->na_addr[0].in4.s_addr &= na->na_addr[1].in4.s_addr;
 		return 0;
 
 	case FRI_NORMAL :
 		na->na_addr[0].in4.s_addr &= na->na_addr[1].in4.s_addr;
 		break;
 
 	default :
 		IPFERROR(60054);
 		return EINVAL;
 	}
 
 	if (initial && (na->na_atype == FRI_NORMAL)) {
 		if (na->na_addr[0].in4.s_addr == 0) {
 			if ((na->na_addr[1].in4.s_addr == 0xffffffff) ||
 			    (na->na_addr[1].in4.s_addr == 0)) {
 				return 0;
 			}
 		}
 
 		if (na->na_addr[1].in4.s_addr == 0xffffffff) {
 			na->na_nextip = ntohl(na->na_addr[0].in4.s_addr);
 		} else {
 			na->na_nextip = ntohl(na->na_addr[0].in4.s_addr) + 1;
 		}
 	}
 
 	return 0;
 }
 
 
 /* ------------------------------------------------------------------------ */
 /* Function:    ipf_nat_matchflush                                          */
 /* Returns:     int - -1 == error, 0 == success                             */
 /* Parameters:  softc(I) - pointer to soft context main structure           */
 /*              softn(I) - pointer to NAT context structure                 */
 /*              nat(I)   - pointer to current NAT session                   */
 /*                                                                          */
 /* ------------------------------------------------------------------------ */
 static int
 ipf_nat_matchflush(softc, softn, data)
 	ipf_main_softc_t *softc;
 	ipf_nat_softc_t *softn;
 	caddr_t data;
 {
 	int *array, flushed, error;
 	nat_t *nat, *natnext;
 	ipfobj_t obj;
 
 	error = ipf_matcharray_load(softc, data, &obj, &array);
 	if (error != 0)
 		return error;
 
 	flushed = 0;
 
 	for (nat = softn->ipf_nat_instances; nat != NULL; nat = natnext) {
 		natnext = nat->nat_next;
 		if (ipf_nat_matcharray(nat, array, softc->ipf_ticks) == 0) {
 			ipf_nat_delete(softc, nat, NL_FLUSH);
 			flushed++;
 		}
 	}
 
 	obj.ipfo_retval = flushed;
 	error = BCOPYOUT(&obj, data, sizeof(obj));
 
 	KFREES(array, array[0] * sizeof(*array));
 
 	return error;
 }
 
 
 /* ------------------------------------------------------------------------ */
 /* Function:    ipf_nat_matcharray                                          */
 /* Returns:     int - -1 == error, 0 == success                             */
 /* Parameters:  fin(I) - pointer to packet information                      */
 /*              nat(I) - pointer to current NAT session                     */
 /*                                                                          */
 /* ------------------------------------------------------------------------ */
 static int
 ipf_nat_matcharray(nat, array, ticks)
 	nat_t *nat;
 	int *array;
 	u_long ticks;
 {
 	int i, n, *x, e, p;
 
 	e = 0;
 	n = array[0];
 	x = array + 1;
 
 	for (; n > 0; x += 3 + x[2]) {
 		if (x[0] == IPF_EXP_END)
 			break;
 		e = 0;
 
 		n -= x[2] + 3;
 		if (n < 0)
 			break;
 
 		p = x[0] >> 16;
 		if (p != 0 && p != nat->nat_pr[1])
 			break;
 
 		switch (x[0])
 		{
 		case IPF_EXP_IP_PR :
 			for (i = 0; !e && i < x[2]; i++) {
 				e |= (nat->nat_pr[1] == x[i + 3]);
 			}
 			break;
 
 		case IPF_EXP_IP_SRCADDR :
 			if (nat->nat_v[0] == 4) {
 				for (i = 0; !e && i < x[2]; i++) {
 					e |= ((nat->nat_osrcaddr & x[i + 4]) ==
 					      x[i + 3]);
 				}
 			}
 			if (nat->nat_v[1] == 4) {
 				for (i = 0; !e && i < x[2]; i++) {
 					e |= ((nat->nat_nsrcaddr & x[i + 4]) ==
 					      x[i + 3]);
 				}
 			}
 			break;
 
 		case IPF_EXP_IP_DSTADDR :
 			if (nat->nat_v[0] == 4) {
 				for (i = 0; !e && i < x[2]; i++) {
 					e |= ((nat->nat_odstaddr & x[i + 4]) ==
 					      x[i + 3]);
 				}
 			}
 			if (nat->nat_v[1] == 4) {
 				for (i = 0; !e && i < x[2]; i++) {
 					e |= ((nat->nat_ndstaddr & x[i + 4]) ==
 					      x[i + 3]);
 				}
 			}
 			break;
 
 		case IPF_EXP_IP_ADDR :
 			for (i = 0; !e && i < x[2]; i++) {
 				if (nat->nat_v[0] == 4) {
 					e |= ((nat->nat_osrcaddr & x[i + 4]) ==
 					      x[i + 3]);
 				}
 				if (nat->nat_v[1] == 4) {
 					e |= ((nat->nat_nsrcaddr & x[i + 4]) ==
 					      x[i + 3]);
 				}
 				if (nat->nat_v[0] == 4) {
 					e |= ((nat->nat_odstaddr & x[i + 4]) ==
 					      x[i + 3]);
 				}
 				if (nat->nat_v[1] == 4) {
 					e |= ((nat->nat_ndstaddr & x[i + 4]) ==
 					      x[i + 3]);
 				}
 			}
 			break;
 
 #ifdef USE_INET6
 		case IPF_EXP_IP6_SRCADDR :
 			if (nat->nat_v[0] == 6) {
 				for (i = 0; !e && i < x[3]; i++) {
 					e |= IP6_MASKEQ(&nat->nat_osrc6,
 							x + i + 7, x + i + 3);
 				}
 			}
 			if (nat->nat_v[1] == 6) {
 				for (i = 0; !e && i < x[3]; i++) {
 					e |= IP6_MASKEQ(&nat->nat_nsrc6,
 							x + i + 7, x + i + 3);
 				}
 			}
 			break;
 
 		case IPF_EXP_IP6_DSTADDR :
 			if (nat->nat_v[0] == 6) {
 				for (i = 0; !e && i < x[3]; i++) {
 					e |= IP6_MASKEQ(&nat->nat_odst6,
 							x + i + 7,
 							x + i + 3);
 				}
 			}
 			if (nat->nat_v[1] == 6) {
 				for (i = 0; !e && i < x[3]; i++) {
 					e |= IP6_MASKEQ(&nat->nat_ndst6,
 							x + i + 7,
 							x + i + 3);
 				}
 			}
 			break;
 
 		case IPF_EXP_IP6_ADDR :
 			for (i = 0; !e && i < x[3]; i++) {
 				if (nat->nat_v[0] == 6) {
 					e |= IP6_MASKEQ(&nat->nat_osrc6,
 							x + i + 7,
 							x + i + 3);
 				}
 				if (nat->nat_v[0] == 6) {
 					e |= IP6_MASKEQ(&nat->nat_odst6,
 							x + i + 7,
 							x + i + 3);
 				}
 				if (nat->nat_v[1] == 6) {
 					e |= IP6_MASKEQ(&nat->nat_nsrc6,
 							x + i + 7,
 							x + i + 3);
 				}
 				if (nat->nat_v[1] == 6) {
 					e |= IP6_MASKEQ(&nat->nat_ndst6,
 							x + i + 7,
 							x + i + 3);
 				}
 			}
 			break;
 #endif
 
 		case IPF_EXP_UDP_PORT :
 		case IPF_EXP_TCP_PORT :
 			for (i = 0; !e && i < x[2]; i++) {
 				e |= (nat->nat_nsport == x[i + 3]) ||
 				     (nat->nat_ndport == x[i + 3]);
 			}
 			break;
 
 		case IPF_EXP_UDP_SPORT :
 		case IPF_EXP_TCP_SPORT :
 			for (i = 0; !e && i < x[2]; i++) {
 				e |= (nat->nat_nsport == x[i + 3]);
 			}
 			break;
 
 		case IPF_EXP_UDP_DPORT :
 		case IPF_EXP_TCP_DPORT :
 			for (i = 0; !e && i < x[2]; i++) {
 				e |= (nat->nat_ndport == x[i + 3]);
 			}
 			break;
 
 		case IPF_EXP_TCP_STATE :
 			for (i = 0; !e && i < x[2]; i++) {
 				e |= (nat->nat_tcpstate[0] == x[i + 3]) ||
 				     (nat->nat_tcpstate[1] == x[i + 3]);
 			}
 			break;
 
 		case IPF_EXP_IDLE_GT :
 			e |= (ticks - nat->nat_touched > x[3]);
 			break;
 		}
 		e ^= x[1];
 
 		if (!e)
 			break;
 	}
 
 	return e;
 }
 
 
 /* ------------------------------------------------------------------------ */
 /* Function:    ipf_nat_gettable                                            */
 /* Returns:     int     - 0 = success, else error                           */
 /* Parameters:  softc(I) - pointer to soft context main structure           */
 /*              softn(I) - pointer to NAT context structure                 */
 /*              data(I)  - pointer to ioctl data                            */
 /*                                                                          */
 /* This function handles ioctl requests for tables of nat information.      */
 /* At present the only table it deals with is the hash bucket statistics.   */
 /* ------------------------------------------------------------------------ */
 static int
 ipf_nat_gettable(softc, softn, data)
 	ipf_main_softc_t *softc;
 	ipf_nat_softc_t *softn;
 	char *data;
 {
 	ipftable_t table;
 	int error;
 
 	error = ipf_inobj(softc, data, NULL, &table, IPFOBJ_GTABLE);
 	if (error != 0)
 		return error;
 
 	switch (table.ita_type)
 	{
 	case IPFTABLE_BUCKETS_NATIN :
 		error = COPYOUT(softn->ipf_nat_stats.ns_side[0].ns_bucketlen,
 				table.ita_table,
 				softn->ipf_nat_table_sz * sizeof(u_int));
 		break;
 
 	case IPFTABLE_BUCKETS_NATOUT :
 		error = COPYOUT(softn->ipf_nat_stats.ns_side[1].ns_bucketlen,
 				table.ita_table,
 				softn->ipf_nat_table_sz * sizeof(u_int));
 		break;
 
 	default :
 		IPFERROR(60058);
 		return EINVAL;
 	}
 
 	if (error != 0) {
 		IPFERROR(60059);
 		error = EFAULT;
 	}
 	return error;
 }
 
 
 /* ------------------------------------------------------------------------ */
 /* Function:    ipf_nat_settimeout                                          */
 /* Returns:     int  - 0 = success, else failure			    */
 /* Parameters:  softc(I) - pointer to soft context main structure           */
 /*              t(I) - pointer to tunable                                   */
 /*              p(I) - pointer to new tuning data                           */
 /*                                                                          */
 /* Apply the timeout change to the NAT timeout queues.                      */
 /* ------------------------------------------------------------------------ */
 int
 ipf_nat_settimeout(softc, t, p)
 	struct ipf_main_softc_s *softc;
 	ipftuneable_t *t;
 	ipftuneval_t *p;
 {
 	ipf_nat_softc_t *softn = softc->ipf_nat_soft;
 
 	if (!strncmp(t->ipft_name, "tcp_", 4))
 		return ipf_settimeout_tcp(t, p, softn->ipf_nat_tcptq);
 
 	if (!strcmp(t->ipft_name, "udp_timeout")) {
 		ipf_apply_timeout(&softn->ipf_nat_udptq, p->ipftu_int);
 	} else if (!strcmp(t->ipft_name, "udp_ack_timeout")) {
 		ipf_apply_timeout(&softn->ipf_nat_udpacktq, p->ipftu_int);
 	} else if (!strcmp(t->ipft_name, "icmp_timeout")) {
 		ipf_apply_timeout(&softn->ipf_nat_icmptq, p->ipftu_int);
 	} else if (!strcmp(t->ipft_name, "icmp_ack_timeout")) {
 		ipf_apply_timeout(&softn->ipf_nat_icmpacktq, p->ipftu_int);
 	} else if (!strcmp(t->ipft_name, "ip_timeout")) {
 		ipf_apply_timeout(&softn->ipf_nat_iptq, p->ipftu_int);
 	} else {
 		IPFERROR(60062);
 		return ESRCH;
 	}
 	return 0;
 }
 
 
 /* ------------------------------------------------------------------------ */
 /* Function:    ipf_nat_rehash                                              */
 /* Returns:     int  - 0 = success, else failure			    */
 /* Parameters:  softc(I) - pointer to soft context main structure           */
 /*              t(I) - pointer to tunable                                   */
 /*              p(I) - pointer to new tuning data                           */
 /*                                                                          */
 /* To change the size of the basic NAT table, we need to first allocate the */
 /* new tables (lest it fails and we've got nowhere to store all of the NAT  */
 /* sessions currently active) and then walk through the entire list and     */
 /* insert them into the table.  There are two tables here: an inbound one   */
 /* and an outbound one.  Each NAT entry goes into each table once.          */
 /* ------------------------------------------------------------------------ */
 int
 ipf_nat_rehash(softc, t, p)
 	ipf_main_softc_t *softc;
 	ipftuneable_t *t;
 	ipftuneval_t *p;
 {
 	ipf_nat_softc_t *softn = softc->ipf_nat_soft;
 	nat_t **newtab[2], *nat, **natp;
 	u_int *bucketlens[2];
 	u_int maxbucket;
 	u_int newsize;
 	int error;
 	u_int hv;
 	int i;
 
 	newsize = p->ipftu_int;
 	/*
 	 * In case there is nothing to do...
 	 */
 	if (newsize == softn->ipf_nat_table_sz)
 		return 0;
 
 	newtab[0] = NULL;
 	newtab[1] = NULL;
 	bucketlens[0] = NULL;
 	bucketlens[1] = NULL;
 	/*
 	 * 4 tables depend on the NAT table size: the inbound looking table,
 	 * the outbound lookup table and the hash chain length for each.
 	 */
 	KMALLOCS(newtab[0], nat_t **, newsize * sizeof(nat_t *));
 	if (newtab == NULL) {
 		error = 60063;
 		goto badrehash;
 	}
 
 	KMALLOCS(newtab[1], nat_t **, newsize * sizeof(nat_t *));
 	if (newtab == NULL) {
 		error = 60064;
 		goto badrehash;
 	}
 
 	KMALLOCS(bucketlens[0], u_int *, newsize * sizeof(u_int));
 	if (bucketlens[0] == NULL) {
 		error = 60065;
 		goto badrehash;
 	}
 
 	KMALLOCS(bucketlens[1], u_int *, newsize * sizeof(u_int));
 	if (bucketlens[1] == NULL) {
 		error = 60066;
 		goto badrehash;
 	}
 
 	/*
 	 * Recalculate the maximum length based on the new size.
 	 */
 	for (maxbucket = 0, i = newsize; i > 0; i >>= 1)
 		maxbucket++;
 	maxbucket *= 2;
 
 	bzero((char *)newtab[0], newsize * sizeof(nat_t *));
 	bzero((char *)newtab[1], newsize * sizeof(nat_t *));
 	bzero((char *)bucketlens[0], newsize * sizeof(u_int));
 	bzero((char *)bucketlens[1], newsize * sizeof(u_int));
 
 	WRITE_ENTER(&softc->ipf_nat);
 
 	if (softn->ipf_nat_table[0] != NULL) {
 		KFREES(softn->ipf_nat_table[0],
 		       softn->ipf_nat_table_sz *
 		       sizeof(*softn->ipf_nat_table[0]));
 	}
 	softn->ipf_nat_table[0] = newtab[0];
 
 	if (softn->ipf_nat_table[1] != NULL) {
 		KFREES(softn->ipf_nat_table[1],
 		       softn->ipf_nat_table_sz *
 		       sizeof(*softn->ipf_nat_table[1]));
 	}
 	softn->ipf_nat_table[1] = newtab[1];
 
 	if (softn->ipf_nat_stats.ns_side[0].ns_bucketlen != NULL) {
 		KFREES(softn->ipf_nat_stats.ns_side[0].ns_bucketlen,
 		       softn->ipf_nat_table_sz * sizeof(u_int));
 	}
 	softn->ipf_nat_stats.ns_side[0].ns_bucketlen = bucketlens[0];
 
 	if (softn->ipf_nat_stats.ns_side[1].ns_bucketlen != NULL) {
 		KFREES(softn->ipf_nat_stats.ns_side[1].ns_bucketlen,
 		       softn->ipf_nat_table_sz * sizeof(u_int));
 	}
 	softn->ipf_nat_stats.ns_side[1].ns_bucketlen = bucketlens[1];
 
 #ifdef USE_INET6
 	if (softn->ipf_nat_stats.ns_side6[0].ns_bucketlen != NULL) {
 		KFREES(softn->ipf_nat_stats.ns_side6[0].ns_bucketlen,
 		       softn->ipf_nat_table_sz * sizeof(u_int));
 	}
 	softn->ipf_nat_stats.ns_side6[0].ns_bucketlen = bucketlens[0];
 
 	if (softn->ipf_nat_stats.ns_side6[1].ns_bucketlen != NULL) {
 		KFREES(softn->ipf_nat_stats.ns_side6[1].ns_bucketlen,
 		       softn->ipf_nat_table_sz * sizeof(u_int));
 	}
 	softn->ipf_nat_stats.ns_side6[1].ns_bucketlen = bucketlens[1];
 #endif
 
 	softn->ipf_nat_maxbucket = maxbucket;
 	softn->ipf_nat_table_sz = newsize;
 	/*
 	 * Walk through the entire list of NAT table entries and put them
 	 * in the new NAT table, somewhere.  Because we have a new table,
 	 * we need to restart the counter of how many chains are in use.
 	 */
 	softn->ipf_nat_stats.ns_side[0].ns_inuse = 0;
 	softn->ipf_nat_stats.ns_side[1].ns_inuse = 0;
 #ifdef USE_INET6
 	softn->ipf_nat_stats.ns_side6[0].ns_inuse = 0;
 	softn->ipf_nat_stats.ns_side6[1].ns_inuse = 0;
 #endif
 
 	for (nat = softn->ipf_nat_instances; nat != NULL; nat = nat->nat_next) {
 		nat->nat_hnext[0] = NULL;
 		nat->nat_phnext[0] = NULL;
 		hv = nat->nat_hv[0] % softn->ipf_nat_table_sz;
 
 		natp = &softn->ipf_nat_table[0][hv];
 		if (*natp) {
 			(*natp)->nat_phnext[0] = &nat->nat_hnext[0];
 		} else {
 			NBUMPSIDE(0, ns_inuse);
 		}
 		nat->nat_phnext[0] = natp;
 		nat->nat_hnext[0] = *natp;
 		*natp = nat;
 		NBUMPSIDE(0, ns_bucketlen[hv]);
 
 		nat->nat_hnext[1] = NULL;
 		nat->nat_phnext[1] = NULL;
 		hv = nat->nat_hv[1] % softn->ipf_nat_table_sz;
 
 		natp = &softn->ipf_nat_table[1][hv];
 		if (*natp) {
 			(*natp)->nat_phnext[1] = &nat->nat_hnext[1];
 		} else {
 			NBUMPSIDE(1, ns_inuse);
 		}
 		nat->nat_phnext[1] = natp;
 		nat->nat_hnext[1] = *natp;
 		*natp = nat;
 		NBUMPSIDE(1, ns_bucketlen[hv]);
 	}
 	RWLOCK_EXIT(&softc->ipf_nat);
 
 	return 0;
 
 badrehash:
 	if (bucketlens[1] != NULL) {
 		KFREES(bucketlens[0], newsize * sizeof(u_int));
 	}
 	if (bucketlens[0] != NULL) {
 		KFREES(bucketlens[0], newsize * sizeof(u_int));
 	}
 	if (newtab[0] != NULL) {
 		KFREES(newtab[0], newsize * sizeof(nat_t *));
 	}
 	if (newtab[1] != NULL) {
 		KFREES(newtab[1], newsize * sizeof(nat_t *));
 	}
 	IPFERROR(error);
 	return ENOMEM;
 }
 
 
 /* ------------------------------------------------------------------------ */
 /* Function:    ipf_nat_rehash_rules                                        */
 /* Returns:     int  - 0 = success, else failure			    */
 /* Parameters:  softc(I) - pointer to soft context main structure           */
 /*              t(I) - pointer to tunable                                   */
 /*              p(I) - pointer to new tuning data                           */
 /*                                                                          */
 /* All of the NAT rules hang off of a hash table that is searched with a    */
 /* hash on address after the netmask is applied.  There is a different table*/
 /* for both inbound rules (rdr) and outbound (map.)  The resizing will only */
 /* affect one of these two tables.                                          */
 /* ------------------------------------------------------------------------ */
 int
 ipf_nat_rehash_rules(softc, t, p)
 	ipf_main_softc_t *softc;
 	ipftuneable_t *t;
 	ipftuneval_t *p;
 {
 	ipf_nat_softc_t *softn = softc->ipf_nat_soft;
 	ipnat_t **newtab, *np, ***old, **npp;
 	u_int newsize;
 	u_int mask;
 	u_int hv;
 
 	newsize = p->ipftu_int;
 	/*
 	 * In case there is nothing to do...
 	 */
 	if (newsize == *t->ipft_pint)
 		return 0;
 
 	/*
 	 * All inbound rules have the NAT_REDIRECT bit set in in_redir and
 	 * all outbound rules have either NAT_MAP or MAT_MAPBLK set.
 	 * This if statement allows for some more generic code to be below,
 	 * rather than two huge gobs of code that almost do the same thing.
 	 */
 	if (t->ipft_pint == &softn->ipf_nat_rdrrules_sz) {
 		old = &softn->ipf_nat_rdr_rules;
 		mask = NAT_REDIRECT;
 	} else {
 		old = &softn->ipf_nat_map_rules;
 		mask = NAT_MAP|NAT_MAPBLK;
 	}
 
 	KMALLOCS(newtab, ipnat_t **, newsize * sizeof(ipnat_t *));
 	if (newtab == NULL) {
 		IPFERROR(60067);
 		return ENOMEM;
 	}
 
 	bzero((char *)newtab, newsize * sizeof(ipnat_t *));
 
 	WRITE_ENTER(&softc->ipf_nat);
 
 	if (*old != NULL) {
 		KFREES(*old, *t->ipft_pint * sizeof(ipnat_t **));
 	}
 	*old = newtab;
 	*t->ipft_pint = newsize;
 
 	for (np = softn->ipf_nat_list; np != NULL; np = np->in_next) {
 		if ((np->in_redir & mask) == 0)
 			continue;
 
 		if (np->in_redir & NAT_REDIRECT) {
 			np->in_rnext = NULL;
 			hv = np->in_hv[0] % newsize;
 			for (npp = newtab + hv; *npp != NULL; )
 				npp = &(*npp)->in_rnext;
 			np->in_prnext = npp;
 			*npp = np;
 		}
 		if (np->in_redir & NAT_MAP) {
 			np->in_mnext = NULL;
 			hv = np->in_hv[1] % newsize;
 			for (npp = newtab + hv; *npp != NULL; )
 				npp = &(*npp)->in_mnext;
 			np->in_pmnext = npp;
 			*npp = np;
 		}
 
 	}
 	RWLOCK_EXIT(&softc->ipf_nat);
 
 	return 0;
 }
 
 
 /* ------------------------------------------------------------------------ */
 /* Function:    ipf_nat_hostmap_rehash                                      */
 /* Returns:     int  - 0 = success, else failure			    */
 /* Parameters:  softc(I) - pointer to soft context main structure           */
 /*              t(I) - pointer to tunable                                   */
 /*              p(I) - pointer to new tuning data                           */
 /*                                                                          */
 /* Allocate and populate a new hash table that will contain a reference to  */
 /* all of the active IP# translations currently in place.                   */
 /* ------------------------------------------------------------------------ */
 int
 ipf_nat_hostmap_rehash(softc, t, p)
 	ipf_main_softc_t *softc;
 	ipftuneable_t *t;
 	ipftuneval_t *p;
 {
 	ipf_nat_softc_t *softn = softc->ipf_nat_soft;
 	hostmap_t *hm, **newtab;
 	u_int newsize;
 	u_int hv;
 
 	newsize = p->ipftu_int;
 	/*
 	 * In case there is nothing to do...
 	 */
 	if (newsize == *t->ipft_pint)
 		return 0;
 
 	KMALLOCS(newtab, hostmap_t **, newsize * sizeof(hostmap_t *));
 	if (newtab == NULL) {
 		IPFERROR(60068);
 		return ENOMEM;
 	}
 
 	bzero((char *)newtab, newsize * sizeof(hostmap_t *));
 
 	WRITE_ENTER(&softc->ipf_nat);
 	if (softn->ipf_hm_maptable != NULL) {
 		KFREES(softn->ipf_hm_maptable,
 		       softn->ipf_nat_hostmap_sz * sizeof(hostmap_t *));
 	}
 	softn->ipf_hm_maptable = newtab;
 	softn->ipf_nat_hostmap_sz = newsize;
 
 	for (hm = softn->ipf_hm_maplist; hm != NULL; hm = hm->hm_next) {
 		hv = hm->hm_hv % softn->ipf_nat_hostmap_sz;
 		hm->hm_hnext = softn->ipf_hm_maptable[hv];
 		hm->hm_phnext = softn->ipf_hm_maptable + hv;
 		if (softn->ipf_hm_maptable[hv] != NULL)
 			softn->ipf_hm_maptable[hv]->hm_phnext = &hm->hm_hnext;
 		softn->ipf_hm_maptable[hv] = hm;
 	}
 	RWLOCK_EXIT(&softc->ipf_nat);
 
 	return 0;
 }
 
 
 /* ------------------------------------------------------------------------ */
 /* Function:    ipf_nat_add_tq                                              */
 /* Parameters:  softc(I) - pointer to soft context main structure           */
 /*                                                                          */
 /* ------------------------------------------------------------------------ */
 ipftq_t *
 ipf_nat_add_tq(softc, ttl)
 	ipf_main_softc_t *softc;
 	int ttl;
 {
 	ipf_nat_softc_t *softs = softc->ipf_nat_soft;
 
 	return ipf_addtimeoutqueue(softc, &softs->ipf_nat_utqe, ttl);
 }
 
 /* ------------------------------------------------------------------------ */
 /* Function:    ipf_nat_uncreate                                            */
 /* Returns:     Nil                                                         */
 /* Parameters:  fin(I) - pointer to packet information                      */
 /*                                                                          */
 /* This function is used to remove a NAT entry from the NAT table when we   */
 /* decide that the create was actually in error. It is thus assumed that    */
 /* fin_flx will have both FI_NATED and FI_NATNEW set. Because we're dealing */
 /* with the translated packet (not the original), we have to reverse the    */
 /* lookup. Although doing the lookup is expensive (relatively speaking), it */
 /* is not anticipated that this will be a frequent occurance for normal     */
 /* traffic patterns.                                                        */
 /* ------------------------------------------------------------------------ */
 void
 ipf_nat_uncreate(fin)
 	fr_info_t *fin;
 {
 	ipf_main_softc_t *softc = fin->fin_main_soft;
 	ipf_nat_softc_t *softn = softc->ipf_nat_soft;
 	int nflags;
 	nat_t *nat;
 
 	switch (fin->fin_p)
 	{
 	case IPPROTO_TCP :
 		nflags = IPN_TCP;
 		break;
 	case IPPROTO_UDP :
 		nflags = IPN_UDP;
 		break;
 	default :
 		nflags = 0;
 		break;
 	}
 
 	WRITE_ENTER(&softc->ipf_nat);
 
 	if (fin->fin_out == 0) {
 		nat = ipf_nat_outlookup(fin, nflags, (u_int)fin->fin_p,
 					fin->fin_dst, fin->fin_src);
 	} else {
 		nat = ipf_nat_inlookup(fin, nflags, (u_int)fin->fin_p,
 				       fin->fin_src, fin->fin_dst);
 	}
 
 	if (nat != NULL) {
 		NBUMPSIDE(fin->fin_out, ns_uncreate[0]);
 		ipf_nat_delete(softc, nat, NL_DESTROY);
 	} else {
 		NBUMPSIDE(fin->fin_out, ns_uncreate[1]);
 	}
 
 	RWLOCK_EXIT(&softc->ipf_nat);
 }
 
 
 /* ------------------------------------------------------------------------ */
 /* Function:    ipf_nat_cmp_rules                                           */
 /* Returns:     int   - 0 == success, else rules do not match.              */
 /* Parameters:  n1(I) - first rule to compare                               */
 /*              n2(I) - first rule to compare                               */
 /*                                                                          */
 /* Compare two rules using pointers to each rule. A straight bcmp will not  */
 /* work as some fields (such as in_dst, in_pkts) actually do change once    */
 /* the rule has been loaded into the kernel. Whilst this function returns   */
 /* various non-zero returns, they're strictly to aid in debugging. Use of   */
 /* this function should simply care if the result is zero or not.           */
 /* ------------------------------------------------------------------------ */
 static int
 ipf_nat_cmp_rules(n1, n2)
 	ipnat_t *n1, *n2;
 {
 	if (n1->in_size != n2->in_size)
 		return 1;
 
 	if (bcmp((char *)&n1->in_v, (char *)&n2->in_v,
 		 offsetof(ipnat_t, in_ndst) - offsetof(ipnat_t, in_v)) != 0)
 		return 2;
 
 	if (bcmp((char *)&n1->in_tuc, (char *)&n2->in_tuc,
 		 n1->in_size - offsetof(ipnat_t, in_tuc)) != 0)
 		return 3;
 	if (n1->in_ndst.na_atype != n2->in_ndst.na_atype)
 		return 5;
 	if (n1->in_ndst.na_function != n2->in_ndst.na_function)
 		return 6;
 	if (bcmp((char *)&n1->in_ndst.na_addr, (char *)&n2->in_ndst.na_addr,
 		 sizeof(n1->in_ndst.na_addr)))
 		return 7;
 	if (n1->in_nsrc.na_atype != n2->in_nsrc.na_atype)
 		return 8;
 	if (n1->in_nsrc.na_function != n2->in_nsrc.na_function)
 		return 9;
 	if (bcmp((char *)&n1->in_nsrc.na_addr, (char *)&n2->in_nsrc.na_addr,
 		 sizeof(n1->in_nsrc.na_addr)))
 		return 10;
 	if (n1->in_odst.na_atype != n2->in_odst.na_atype)
 		return 11;
 	if (n1->in_odst.na_function != n2->in_odst.na_function)
 		return 12;
 	if (bcmp((char *)&n1->in_odst.na_addr, (char *)&n2->in_odst.na_addr,
 		 sizeof(n1->in_odst.na_addr)))
 		return 13;
 	if (n1->in_osrc.na_atype != n2->in_osrc.na_atype)
 		return 14;
 	if (n1->in_osrc.na_function != n2->in_osrc.na_function)
 		return 15;
 	if (bcmp((char *)&n1->in_osrc.na_addr, (char *)&n2->in_osrc.na_addr,
 		 sizeof(n1->in_osrc.na_addr)))
 		return 16;
 	return 0;
 }
 
 
 /* ------------------------------------------------------------------------ */
 /* Function:    ipf_nat_rule_init                                           */
 /* Returns:     int   - 0 == success, else rules do not match.              */
 /* Parameters:  softc(I) - pointer to soft context main structure           */
 /*              softn(I) - pointer to NAT context structure                 */
 /*              n(I)     - first rule to compare                            */
 /*                                                                          */
 /* ------------------------------------------------------------------------ */
 static int
 ipf_nat_rule_init(softc, softn, n)
 	ipf_main_softc_t *softc;
 	ipf_nat_softc_t *softn;
 	ipnat_t *n;
 {
 	int error = 0;
 
 	if ((n->in_flags & IPN_SIPRANGE) != 0)
 		n->in_nsrcatype = FRI_RANGE;
 
 	if ((n->in_flags & IPN_DIPRANGE) != 0)
 		n->in_ndstatype = FRI_RANGE;
 
 	if ((n->in_flags & IPN_SPLIT) != 0)
 		n->in_ndstatype = FRI_SPLIT;
 
 	if ((n->in_redir & (NAT_MAP|NAT_REWRITE|NAT_DIVERTUDP)) != 0)
 		n->in_spnext = n->in_spmin;
 
 	if ((n->in_redir & (NAT_REWRITE|NAT_DIVERTUDP)) != 0) {
 		n->in_dpnext = n->in_dpmin;
 	} else if (n->in_redir == NAT_REDIRECT) {
 		n->in_dpnext = n->in_dpmin;
 	}
 
 	n->in_stepnext = 0;
 
 	switch (n->in_v[0])
 	{
 	case 4 :
 		error = ipf_nat_ruleaddrinit(softc, softn, n);
 		if (error != 0)
 			return error;
 		break;
 #ifdef USE_INET6
 	case 6 :
 		error = ipf_nat6_ruleaddrinit(softc, softn, n);
 		if (error != 0)
 			return error;
 		break;
 #endif
 	default :
 		break;
 	}
 
 	if (n->in_redir == (NAT_DIVERTUDP|NAT_MAP)) {
 		/*
 		 * Prerecord whether or not the destination of the divert
 		 * is local or not to the interface the packet is going
 		 * to be sent out.
 		 */
 		n->in_dlocal = ipf_deliverlocal(softc, n->in_v[1],
 						n->in_ifps[1], &n->in_ndstip6);
 	}
 
 	return error;
 }
 
 
 /* ------------------------------------------------------------------------ */
 /* Function:    ipf_nat_rule_fini                                           */
 /* Returns:     int   - 0 == success, else rules do not match.              */
 /* Parameters:  softc(I) - pointer to soft context main structure           */
 /*              n(I)     - rule to work on                                  */
 /*                                                                          */
 /* This function is used to release any objects that were referenced during */
 /* the rule initialisation. This is useful both when free'ing the rule and  */
 /* when handling ioctls that need to initialise these fields but not        */
 /* actually use them after the ioctl processing has finished.               */
 /* ------------------------------------------------------------------------ */
 static void
 ipf_nat_rule_fini(softc, n)
 	ipf_main_softc_t *softc;
 	ipnat_t *n;
 {
 	if (n->in_odst.na_atype == FRI_LOOKUP && n->in_odst.na_ptr != NULL)
 		ipf_lookup_deref(softc, n->in_odst.na_type, n->in_odst.na_ptr);
 
 	if (n->in_osrc.na_atype == FRI_LOOKUP && n->in_osrc.na_ptr != NULL)
 		ipf_lookup_deref(softc, n->in_osrc.na_type, n->in_osrc.na_ptr);
 
 	if (n->in_ndst.na_atype == FRI_LOOKUP && n->in_ndst.na_ptr != NULL)
 		ipf_lookup_deref(softc, n->in_ndst.na_type, n->in_ndst.na_ptr);
 
 	if (n->in_nsrc.na_atype == FRI_LOOKUP && n->in_nsrc.na_ptr != NULL)
 		ipf_lookup_deref(softc, n->in_nsrc.na_type, n->in_nsrc.na_ptr);
 
 	if (n->in_divmp != NULL)
 		FREE_MB_T(n->in_divmp);
 }
Index: head/sys/netinet/ip_carp.c
===================================================================
--- head/sys/netinet/ip_carp.c	(revision 280970)
+++ head/sys/netinet/ip_carp.c	(revision 280971)
@@ -1,2187 +1,2187 @@
 /*-
  * Copyright (c) 2002 Michael Shalayeff.
  * Copyright (c) 2003 Ryan McBride.
  * Copyright (c) 2011 Gleb Smirnoff <glebius@FreeBSD.org>
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
  * IN NO EVENT SHALL THE AUTHOR OR HIS RELATIVES BE LIABLE FOR ANY DIRECT,
  * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
  * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
  * SERVICES; LOSS OF MIND, USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
  * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING
  * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
  * THE POSSIBILITY OF SUCH DAMAGE.
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include "opt_bpf.h"
 #include "opt_inet.h"
 #include "opt_inet6.h"
 
 #include <sys/param.h>
 #include <sys/systm.h>
 #include <sys/bus.h>
 #include <sys/jail.h>
 #include <sys/kernel.h>
 #include <sys/limits.h>
 #include <sys/malloc.h>
 #include <sys/mbuf.h>
 #include <sys/module.h>
 #include <sys/priv.h>
 #include <sys/proc.h>
 #include <sys/protosw.h>
 #include <sys/socket.h>
 #include <sys/sockio.h>
 #include <sys/sysctl.h>
 #include <sys/syslog.h>
 #include <sys/taskqueue.h>
 #include <sys/counter.h>
 
 #include <net/ethernet.h>
 #include <net/fddi.h>
 #include <net/if.h>
 #include <net/if_var.h>
 #include <net/if_dl.h>
 #include <net/if_llatbl.h>
 #include <net/if_types.h>
 #include <net/iso88025.h>
 #include <net/route.h>
 #include <net/vnet.h>
 
 #if defined(INET) || defined(INET6)
 #include <netinet/in.h>
 #include <netinet/in_var.h>
 #include <netinet/ip_carp.h>
 #include <netinet/ip.h>
 #include <machine/in_cksum.h>
 #endif
 #ifdef INET
 #include <netinet/ip_var.h>
 #include <netinet/if_ether.h>
 #endif
 
 #ifdef INET6
 #include <netinet/icmp6.h>
 #include <netinet/ip6.h>
 #include <netinet6/in6_var.h>
 #include <netinet6/ip6_var.h>
 #include <netinet6/scope6_var.h>
 #include <netinet6/nd6.h>
 #endif
 
 #include <crypto/sha1.h>
 
 static MALLOC_DEFINE(M_CARP, "CARP", "CARP addresses");
 
 struct carp_softc {
 	struct ifnet		*sc_carpdev;	/* Pointer to parent ifnet. */
 	struct ifaddr		**sc_ifas;	/* Our ifaddrs. */
 	struct sockaddr_dl	sc_addr;	/* Our link level address. */
 	struct callout		sc_ad_tmo;	/* Advertising timeout. */
 #ifdef INET
 	struct callout		sc_md_tmo;	/* Master down timeout. */
 #endif
 #ifdef INET6
 	struct callout 		sc_md6_tmo;	/* XXX: Master down timeout. */
 #endif
 	struct mtx		sc_mtx;
 
 	int			sc_vhid;
 	int			sc_advskew;
 	int			sc_advbase;
 
 	int			sc_naddrs;
 	int			sc_naddrs6;
 	int			sc_ifasiz;
 	enum { INIT = 0, BACKUP, MASTER }	sc_state;
 	int			sc_suppress;
 	int			sc_sendad_errors;
 #define	CARP_SENDAD_MAX_ERRORS	3
 	int			sc_sendad_success;
 #define	CARP_SENDAD_MIN_SUCCESS 3
 
 	int			sc_init_counter;
 	uint64_t		sc_counter;
 
 	/* authentication */
 #define	CARP_HMAC_PAD	64
 	unsigned char sc_key[CARP_KEY_LEN];
 	unsigned char sc_pad[CARP_HMAC_PAD];
 	SHA1_CTX sc_sha1;
 
 	TAILQ_ENTRY(carp_softc)	sc_list;	/* On the carp_if list. */
 	LIST_ENTRY(carp_softc)	sc_next;	/* On the global list. */
 };
 
 struct carp_if {
 #ifdef INET
 	int	cif_naddrs;
 #endif
 #ifdef INET6
 	int	cif_naddrs6;
 #endif
 	TAILQ_HEAD(, carp_softc) cif_vrs;
 #ifdef INET
 	struct ip_moptions 	 cif_imo;
 #endif
 #ifdef INET6
 	struct ip6_moptions 	 cif_im6o;
 #endif
 	struct ifnet	*cif_ifp;
 	struct mtx	cif_mtx;
 	uint32_t	cif_flags;
 #define	CIF_PROMISC	0x00000001
 };
 
 #define	CARP_INET	0
 #define	CARP_INET6	1
 static int proto_reg[] = {-1, -1};
 
 /*
  * Brief design of carp(4).
  *
  * Any carp-capable ifnet may have a list of carp softcs hanging off
  * its ifp->if_carp pointer. Each softc represents one unique virtual
  * host id, or vhid. The softc has a back pointer to the ifnet. All
  * softcs are joined in a global list, which has quite limited use.
  *
  * Any interface address that takes part in CARP negotiation has a
  * pointer to the softc of its vhid, ifa->ifa_carp. That could be either
  * AF_INET or AF_INET6 address.
  *
  * Although, one can get the softc's backpointer to ifnet and traverse
  * through its ifp->if_addrhead queue to find all interface addresses
  * involved in CARP, we keep a growable array of ifaddr pointers. This
  * allows us to avoid grabbing the IF_ADDR_LOCK() in many traversals that
  * do calls into the network stack, thus avoiding LORs.
  *
  * Locking:
  *
  * Each softc has a lock sc_mtx. It is used to synchronise carp_input_c(),
  * callout-driven events and ioctl()s.
  *
  * To traverse the list of softcs on an ifnet we use CIF_LOCK(), to
  * traverse the global list we use the mutex carp_mtx.
  *
  * Known issues with locking:
  *
  * - There is no protection for races between two ioctl() requests,
  *   neither SIOCSVH, nor SIOCAIFADDR & SIOCAIFADDR_IN6. I think that all
  *   interface ioctl()s should be serialized right in net/if.c.
  * - Sending ad, we put the pointer to the softc in an mtag, and no reference
  *   counting is done on the softc.
  * - On module unload we may race (?) with packet processing thread
  *   dereferencing our function pointers.
  */
 
 /* Accept incoming CARP packets. */
 static VNET_DEFINE(int, carp_allow) = 1;
 #define	V_carp_allow	VNET(carp_allow)
 
 /* Preempt slower nodes. */
 static VNET_DEFINE(int, carp_preempt) = 0;
 #define	V_carp_preempt	VNET(carp_preempt)
 
 /* Log level. */
 static VNET_DEFINE(int, carp_log) = 1;
 #define	V_carp_log	VNET(carp_log)
 
 /* Global advskew demotion. */
 static VNET_DEFINE(int, carp_demotion) = 0;
 #define	V_carp_demotion	VNET(carp_demotion)
 
 /* Send error demotion factor. */
 static VNET_DEFINE(int, carp_senderr_adj) = CARP_MAXSKEW;
 #define	V_carp_senderr_adj	VNET(carp_senderr_adj)
 
 /* Iface down demotion factor. */
 static VNET_DEFINE(int, carp_ifdown_adj) = CARP_MAXSKEW;
 #define	V_carp_ifdown_adj	VNET(carp_ifdown_adj)
 
 static int carp_demote_adj_sysctl(SYSCTL_HANDLER_ARGS);
 
 SYSCTL_NODE(_net_inet, IPPROTO_CARP,	carp,	CTLFLAG_RW, 0,	"CARP");
 SYSCTL_INT(_net_inet_carp, OID_AUTO, allow, CTLFLAG_VNET | CTLFLAG_RW,
     &VNET_NAME(carp_allow), 0, "Accept incoming CARP packets");
 SYSCTL_INT(_net_inet_carp, OID_AUTO, preempt, CTLFLAG_VNET | CTLFLAG_RW,
     &VNET_NAME(carp_preempt), 0, "High-priority backup preemption mode");
 SYSCTL_INT(_net_inet_carp, OID_AUTO, log, CTLFLAG_VNET | CTLFLAG_RW,
     &VNET_NAME(carp_log), 0, "CARP log level");
 SYSCTL_PROC(_net_inet_carp, OID_AUTO, demotion,
     CTLFLAG_VNET | CTLTYPE_INT | CTLFLAG_RW,
     0, 0, carp_demote_adj_sysctl, "I",
     "Adjust demotion factor (skew of advskew)");
 SYSCTL_INT(_net_inet_carp, OID_AUTO, senderr_demotion_factor,
     CTLFLAG_VNET | CTLFLAG_RW,
     &VNET_NAME(carp_senderr_adj), 0, "Send error demotion factor adjustment");
 SYSCTL_INT(_net_inet_carp, OID_AUTO, ifdown_demotion_factor,
     CTLFLAG_VNET | CTLFLAG_RW,
     &VNET_NAME(carp_ifdown_adj), 0,
     "Interface down demotion factor adjustment");
 
 VNET_PCPUSTAT_DEFINE(struct carpstats, carpstats);
 VNET_PCPUSTAT_SYSINIT(carpstats);
 VNET_PCPUSTAT_SYSUNINIT(carpstats);
 
 #define	CARPSTATS_ADD(name, val)	\
     counter_u64_add(VNET(carpstats)[offsetof(struct carpstats, name) / \
 	sizeof(uint64_t)], (val))
 #define	CARPSTATS_INC(name)		CARPSTATS_ADD(name, 1)
 
 SYSCTL_VNET_PCPUSTAT(_net_inet_carp, OID_AUTO, stats, struct carpstats,
     carpstats, "CARP statistics (struct carpstats, netinet/ip_carp.h)");
 
 #define	CARP_LOCK_INIT(sc)	mtx_init(&(sc)->sc_mtx, "carp_softc",   \
 	NULL, MTX_DEF)
 #define	CARP_LOCK_DESTROY(sc)	mtx_destroy(&(sc)->sc_mtx)
 #define	CARP_LOCK_ASSERT(sc)	mtx_assert(&(sc)->sc_mtx, MA_OWNED)
 #define	CARP_LOCK(sc)		mtx_lock(&(sc)->sc_mtx)
 #define	CARP_UNLOCK(sc)		mtx_unlock(&(sc)->sc_mtx)
 #define	CIF_LOCK_INIT(cif)	mtx_init(&(cif)->cif_mtx, "carp_if",   \
 	NULL, MTX_DEF)
 #define	CIF_LOCK_DESTROY(cif)	mtx_destroy(&(cif)->cif_mtx)
 #define	CIF_LOCK_ASSERT(cif)	mtx_assert(&(cif)->cif_mtx, MA_OWNED)
 #define	CIF_LOCK(cif)		mtx_lock(&(cif)->cif_mtx)
 #define	CIF_UNLOCK(cif)		mtx_unlock(&(cif)->cif_mtx)
 #define	CIF_FREE(cif)	do {				\
 		CIF_LOCK_ASSERT(cif);			\
 		if (TAILQ_EMPTY(&(cif)->cif_vrs))	\
 			carp_free_if(cif);		\
 		else					\
 			CIF_UNLOCK(cif);		\
 } while (0)
 
 #define	CARP_LOG(...)	do {				\
 	if (V_carp_log > 0)				\
 		log(LOG_INFO, "carp: " __VA_ARGS__);	\
 } while (0)
 
 #define	CARP_DEBUG(...)	do {				\
 	if (V_carp_log > 1)				\
 		log(LOG_DEBUG, __VA_ARGS__);		\
 } while (0)
 
 #define	IFNET_FOREACH_IFA(ifp, ifa)					\
 	IF_ADDR_LOCK_ASSERT(ifp);					\
 	TAILQ_FOREACH((ifa), &(ifp)->if_addrhead, ifa_link)		\
 		if ((ifa)->ifa_carp != NULL)
 
 #define	CARP_FOREACH_IFA(sc, ifa)					\
 	CARP_LOCK_ASSERT(sc);						\
 	for (int _i = 0;						\
 		_i < (sc)->sc_naddrs + (sc)->sc_naddrs6 &&		\
 		((ifa) = sc->sc_ifas[_i]) != NULL;			\
 		++_i)
 
 #define	IFNET_FOREACH_CARP(ifp, sc)					\
 	CIF_LOCK_ASSERT(ifp->if_carp);					\
 	TAILQ_FOREACH((sc), &(ifp)->if_carp->cif_vrs, sc_list)
 
 #define	DEMOTE_ADVSKEW(sc)					\
     (((sc)->sc_advskew + V_carp_demotion > CARP_MAXSKEW) ?	\
     CARP_MAXSKEW : ((sc)->sc_advskew + V_carp_demotion))
 
 static void	carp_input_c(struct mbuf *, struct carp_header *, sa_family_t);
 static struct carp_softc
 		*carp_alloc(struct ifnet *);
 static void	carp_detach_locked(struct ifaddr *);
 static void	carp_destroy(struct carp_softc *);
 static struct carp_if
 		*carp_alloc_if(struct ifnet *);
 static void	carp_free_if(struct carp_if *);
 static void	carp_set_state(struct carp_softc *, int, const char* reason);
 static void	carp_sc_state(struct carp_softc *);
 static void	carp_setrun(struct carp_softc *, sa_family_t);
 static void	carp_master_down(void *);
 static void	carp_master_down_locked(struct carp_softc *,
     		    const char* reason);
 static void	carp_send_ad(void *);
 static void	carp_send_ad_locked(struct carp_softc *);
 static void	carp_addroute(struct carp_softc *);
 static void	carp_ifa_addroute(struct ifaddr *);
 static void	carp_delroute(struct carp_softc *);
 static void	carp_ifa_delroute(struct ifaddr *);
 static void	carp_send_ad_all(void *, int);
 static void	carp_demote_adj(int, char *);
 
 static LIST_HEAD(, carp_softc) carp_list;
 static struct mtx carp_mtx;
 static struct task carp_sendall_task =
     TASK_INITIALIZER(0, carp_send_ad_all, NULL);
 
 static void
 carp_hmac_prepare(struct carp_softc *sc)
 {
 	uint8_t version = CARP_VERSION, type = CARP_ADVERTISEMENT;
 	uint8_t vhid = sc->sc_vhid & 0xff;
 	struct ifaddr *ifa;
 	int i, found;
 #ifdef INET
 	struct in_addr last, cur, in;
 #endif
 #ifdef INET6
 	struct in6_addr last6, cur6, in6;
 #endif
 
 	CARP_LOCK_ASSERT(sc);
 
 	/* Compute ipad from key. */
 	bzero(sc->sc_pad, sizeof(sc->sc_pad));
 	bcopy(sc->sc_key, sc->sc_pad, sizeof(sc->sc_key));
 	for (i = 0; i < sizeof(sc->sc_pad); i++)
 		sc->sc_pad[i] ^= 0x36;
 
 	/* Precompute first part of inner hash. */
 	SHA1Init(&sc->sc_sha1);
 	SHA1Update(&sc->sc_sha1, sc->sc_pad, sizeof(sc->sc_pad));
 	SHA1Update(&sc->sc_sha1, (void *)&version, sizeof(version));
 	SHA1Update(&sc->sc_sha1, (void *)&type, sizeof(type));
 	SHA1Update(&sc->sc_sha1, (void *)&vhid, sizeof(vhid));
 #ifdef INET
 	cur.s_addr = 0;
 	do {
 		found = 0;
 		last = cur;
 		cur.s_addr = 0xffffffff;
 		CARP_FOREACH_IFA(sc, ifa) {
 			in.s_addr = ifatoia(ifa)->ia_addr.sin_addr.s_addr;
 			if (ifa->ifa_addr->sa_family == AF_INET &&
 			    ntohl(in.s_addr) > ntohl(last.s_addr) &&
 			    ntohl(in.s_addr) < ntohl(cur.s_addr)) {
 				cur.s_addr = in.s_addr;
 				found++;
 			}
 		}
 		if (found)
 			SHA1Update(&sc->sc_sha1, (void *)&cur, sizeof(cur));
 	} while (found);
 #endif /* INET */
 #ifdef INET6
 	memset(&cur6, 0, sizeof(cur6));
 	do {
 		found = 0;
 		last6 = cur6;
 		memset(&cur6, 0xff, sizeof(cur6));
 		CARP_FOREACH_IFA(sc, ifa) {
 			in6 = ifatoia6(ifa)->ia_addr.sin6_addr;
 			if (IN6_IS_SCOPE_EMBED(&in6))
 				in6.s6_addr16[1] = 0;
 			if (ifa->ifa_addr->sa_family == AF_INET6 &&
 			    memcmp(&in6, &last6, sizeof(in6)) > 0 &&
 			    memcmp(&in6, &cur6, sizeof(in6)) < 0) {
 				cur6 = in6;
 				found++;
 			}
 		}
 		if (found)
 			SHA1Update(&sc->sc_sha1, (void *)&cur6, sizeof(cur6));
 	} while (found);
 #endif /* INET6 */
 
 	/* convert ipad to opad */
 	for (i = 0; i < sizeof(sc->sc_pad); i++)
 		sc->sc_pad[i] ^= 0x36 ^ 0x5c;
 }
 
 static void
 carp_hmac_generate(struct carp_softc *sc, uint32_t counter[2],
     unsigned char md[20])
 {
 	SHA1_CTX sha1ctx;
 
 	CARP_LOCK_ASSERT(sc);
 
 	/* fetch first half of inner hash */
 	bcopy(&sc->sc_sha1, &sha1ctx, sizeof(sha1ctx));
 
 	SHA1Update(&sha1ctx, (void *)counter, sizeof(sc->sc_counter));
 	SHA1Final(md, &sha1ctx);
 
 	/* outer hash */
 	SHA1Init(&sha1ctx);
 	SHA1Update(&sha1ctx, sc->sc_pad, sizeof(sc->sc_pad));
 	SHA1Update(&sha1ctx, md, 20);
 	SHA1Final(md, &sha1ctx);
 }
 
 static int
 carp_hmac_verify(struct carp_softc *sc, uint32_t counter[2],
     unsigned char md[20])
 {
 	unsigned char md2[20];
 
 	CARP_LOCK_ASSERT(sc);
 
 	carp_hmac_generate(sc, counter, md2);
 
 	return (bcmp(md, md2, sizeof(md2)));
 }
 
 /*
  * process input packet.
  * we have rearranged checks order compared to the rfc,
  * but it seems more efficient this way or not possible otherwise.
  */
 #ifdef INET
 int
 carp_input(struct mbuf **mp, int *offp, int proto)
 {
 	struct mbuf *m = *mp;
 	struct ip *ip = mtod(m, struct ip *);
 	struct carp_header *ch;
 	int iplen, len;
 
 	iplen = *offp;
 	*mp = NULL;
 
 	CARPSTATS_INC(carps_ipackets);
 
 	if (!V_carp_allow) {
 		m_freem(m);
 		return (IPPROTO_DONE);
 	}
 
 	/* verify that the IP TTL is 255.  */
 	if (ip->ip_ttl != CARP_DFLTTL) {
 		CARPSTATS_INC(carps_badttl);
 		CARP_DEBUG("%s: received ttl %d != 255 on %s\n", __func__,
 		    ip->ip_ttl,
 		    m->m_pkthdr.rcvif->if_xname);
 		m_freem(m);
 		return (IPPROTO_DONE);
 	}
 
 	iplen = ip->ip_hl << 2;
 
 	if (m->m_pkthdr.len < iplen + sizeof(*ch)) {
 		CARPSTATS_INC(carps_badlen);
 		CARP_DEBUG("%s: received len %zd < sizeof(struct carp_header) "
 		    "on %s\n", __func__, m->m_len - sizeof(struct ip),
 		    m->m_pkthdr.rcvif->if_xname);
 		m_freem(m);
 		return (IPPROTO_DONE);
 	}
 
 	if (iplen + sizeof(*ch) < m->m_len) {
 		if ((m = m_pullup(m, iplen + sizeof(*ch))) == NULL) {
 			CARPSTATS_INC(carps_hdrops);
 			CARP_DEBUG("%s: pullup failed\n", __func__);
 			return (IPPROTO_DONE);
 		}
 		ip = mtod(m, struct ip *);
 	}
 	ch = (struct carp_header *)((char *)ip + iplen);
 
 	/*
 	 * verify that the received packet length is
 	 * equal to the CARP header
 	 */
 	len = iplen + sizeof(*ch);
 	if (len > m->m_pkthdr.len) {
 		CARPSTATS_INC(carps_badlen);
 		CARP_DEBUG("%s: packet too short %d on %s\n", __func__,
 		    m->m_pkthdr.len,
 		    m->m_pkthdr.rcvif->if_xname);
 		m_freem(m);
 		return (IPPROTO_DONE);
 	}
 
 	if ((m = m_pullup(m, len)) == NULL) {
 		CARPSTATS_INC(carps_hdrops);
 		return (IPPROTO_DONE);
 	}
 	ip = mtod(m, struct ip *);
 	ch = (struct carp_header *)((char *)ip + iplen);
 
 	/* verify the CARP checksum */
 	m->m_data += iplen;
 	if (in_cksum(m, len - iplen)) {
 		CARPSTATS_INC(carps_badsum);
 		CARP_DEBUG("%s: checksum failed on %s\n", __func__,
 		    m->m_pkthdr.rcvif->if_xname);
 		m_freem(m);
 		return (IPPROTO_DONE);
 	}
 	m->m_data -= iplen;
 
 	carp_input_c(m, ch, AF_INET);
 	return (IPPROTO_DONE);
 }
 #endif
 
 #ifdef INET6
 int
 carp6_input(struct mbuf **mp, int *offp, int proto)
 {
 	struct mbuf *m = *mp;
 	struct ip6_hdr *ip6 = mtod(m, struct ip6_hdr *);
 	struct carp_header *ch;
 	u_int len;
 
 	CARPSTATS_INC(carps_ipackets6);
 
 	if (!V_carp_allow) {
 		m_freem(m);
 		return (IPPROTO_DONE);
 	}
 
 	/* check if received on a valid carp interface */
 	if (m->m_pkthdr.rcvif->if_carp == NULL) {
 		CARPSTATS_INC(carps_badif);
 		CARP_DEBUG("%s: packet received on non-carp interface: %s\n",
 		    __func__, m->m_pkthdr.rcvif->if_xname);
 		m_freem(m);
 		return (IPPROTO_DONE);
 	}
 
 	/* verify that the IP TTL is 255 */
 	if (ip6->ip6_hlim != CARP_DFLTTL) {
 		CARPSTATS_INC(carps_badttl);
 		CARP_DEBUG("%s: received ttl %d != 255 on %s\n", __func__,
 		    ip6->ip6_hlim, m->m_pkthdr.rcvif->if_xname);
 		m_freem(m);
 		return (IPPROTO_DONE);
 	}
 
 	/* verify that we have a complete carp packet */
 	len = m->m_len;
 	IP6_EXTHDR_GET(ch, struct carp_header *, m, *offp, sizeof(*ch));
 	if (ch == NULL) {
 		CARPSTATS_INC(carps_badlen);
 		CARP_DEBUG("%s: packet size %u too small\n", __func__, len);
 		return (IPPROTO_DONE);
 	}
 
 
 	/* verify the CARP checksum */
 	m->m_data += *offp;
 	if (in_cksum(m, sizeof(*ch))) {
 		CARPSTATS_INC(carps_badsum);
 		CARP_DEBUG("%s: checksum failed, on %s\n", __func__,
 		    m->m_pkthdr.rcvif->if_xname);
 		m_freem(m);
 		return (IPPROTO_DONE);
 	}
 	m->m_data -= *offp;
 
 	carp_input_c(m, ch, AF_INET6);
 	return (IPPROTO_DONE);
 }
 #endif /* INET6 */
 
 static void
 carp_input_c(struct mbuf *m, struct carp_header *ch, sa_family_t af)
 {
 	struct ifnet *ifp = m->m_pkthdr.rcvif;
 	struct ifaddr *ifa;
 	struct carp_softc *sc;
 	uint64_t tmp_counter;
 	struct timeval sc_tv, ch_tv;
 
 	/* verify that the VHID is valid on the receiving interface */
 	IF_ADDR_RLOCK(ifp);
 	IFNET_FOREACH_IFA(ifp, ifa)
 		if (ifa->ifa_addr->sa_family == af &&
 		    ifa->ifa_carp->sc_vhid == ch->carp_vhid) {
 			ifa_ref(ifa);
 			break;
 		}
 	IF_ADDR_RUNLOCK(ifp);
 
 	if (ifa == NULL) {
 		CARPSTATS_INC(carps_badvhid);
 		m_freem(m);
 		return;
 	}
 
 	/* verify the CARP version. */
 	if (ch->carp_version != CARP_VERSION) {
 		CARPSTATS_INC(carps_badver);
 		CARP_DEBUG("%s: invalid version %d\n", ifp->if_xname,
 		    ch->carp_version);
 		ifa_free(ifa);
 		m_freem(m);
 		return;
 	}
 
 	sc = ifa->ifa_carp;
 	CARP_LOCK(sc);
 	ifa_free(ifa);
 
 	if (carp_hmac_verify(sc, ch->carp_counter, ch->carp_md)) {
 		CARPSTATS_INC(carps_badauth);
 		CARP_DEBUG("%s: incorrect hash for VHID %u@%s\n", __func__,
 		    sc->sc_vhid, ifp->if_xname);
 		goto out;
 	}
 
 	tmp_counter = ntohl(ch->carp_counter[0]);
 	tmp_counter = tmp_counter<<32;
 	tmp_counter += ntohl(ch->carp_counter[1]);
 
 	/* XXX Replay protection goes here */
 
 	sc->sc_init_counter = 0;
 	sc->sc_counter = tmp_counter;
 
 	sc_tv.tv_sec = sc->sc_advbase;
 	sc_tv.tv_usec = DEMOTE_ADVSKEW(sc) * 1000000 / 256;
 	ch_tv.tv_sec = ch->carp_advbase;
 	ch_tv.tv_usec = ch->carp_advskew * 1000000 / 256;
 
 	switch (sc->sc_state) {
 	case INIT:
 		break;
 	case MASTER:
 		/*
 		 * If we receive an advertisement from a master who's going to
 		 * be more frequent than us, go into BACKUP state.
 		 */
 		if (timevalcmp(&sc_tv, &ch_tv, >) ||
 		    timevalcmp(&sc_tv, &ch_tv, ==)) {
 			callout_stop(&sc->sc_ad_tmo);
 			carp_set_state(sc, BACKUP,
 			    "more frequent advertisement received");
 			carp_setrun(sc, 0);
 			carp_delroute(sc);
 		}
 		break;
 	case BACKUP:
 		/*
 		 * If we're pre-empting masters who advertise slower than us,
 		 * and this one claims to be slower, treat him as down.
 		 */
 		if (V_carp_preempt && timevalcmp(&sc_tv, &ch_tv, <)) {
 			carp_master_down_locked(sc,
 			    "preempting a slower master");
 			break;
 		}
 
 		/*
 		 *  If the master is going to advertise at such a low frequency
 		 *  that he's guaranteed to time out, we'd might as well just
 		 *  treat him as timed out now.
 		 */
 		sc_tv.tv_sec = sc->sc_advbase * 3;
 		if (timevalcmp(&sc_tv, &ch_tv, <)) {
 			carp_master_down_locked(sc, "master will time out");
 			break;
 		}
 
 		/*
 		 * Otherwise, we reset the counter and wait for the next
 		 * advertisement.
 		 */
 		carp_setrun(sc, af);
 		break;
 	}
 
 out:
 	CARP_UNLOCK(sc);
 	m_freem(m);
 }
 
 static int
 carp_prepare_ad(struct mbuf *m, struct carp_softc *sc, struct carp_header *ch)
 {
 	struct m_tag *mtag;
 
 	if (sc->sc_init_counter) {
 		/* this could also be seconds since unix epoch */
 		sc->sc_counter = arc4random();
 		sc->sc_counter = sc->sc_counter << 32;
 		sc->sc_counter += arc4random();
 	} else
 		sc->sc_counter++;
 
 	ch->carp_counter[0] = htonl((sc->sc_counter>>32)&0xffffffff);
 	ch->carp_counter[1] = htonl(sc->sc_counter&0xffffffff);
 
 	carp_hmac_generate(sc, ch->carp_counter, ch->carp_md);
 
 	/* Tag packet for carp_output */
 	if ((mtag = m_tag_get(PACKET_TAG_CARP, sizeof(struct carp_softc *),
 	    M_NOWAIT)) == NULL) {
 		m_freem(m);
 		CARPSTATS_INC(carps_onomem);
 		return (ENOMEM);
 	}
 	bcopy(&sc, mtag + 1, sizeof(sc));
 	m_tag_prepend(m, mtag);
 
 	return (0);
 }
 
 /*
  * To avoid LORs and possible recursions this function shouldn't
  * be called directly, but scheduled via taskqueue.
  */
 static void
 carp_send_ad_all(void *ctx __unused, int pending __unused)
 {
 	struct carp_softc *sc;
 
 	mtx_lock(&carp_mtx);
 	LIST_FOREACH(sc, &carp_list, sc_next)
 		if (sc->sc_state == MASTER) {
 			CARP_LOCK(sc);
 			CURVNET_SET(sc->sc_carpdev->if_vnet);
 			carp_send_ad_locked(sc);
 			CURVNET_RESTORE();
 			CARP_UNLOCK(sc);
 		}
 	mtx_unlock(&carp_mtx);
 }
 
 /* Send a periodic advertisement, executed in callout context. */
 static void
 carp_send_ad(void *v)
 {
 	struct carp_softc *sc = v;
 
 	CARP_LOCK_ASSERT(sc);
 	CURVNET_SET(sc->sc_carpdev->if_vnet);
 	carp_send_ad_locked(sc);
 	CURVNET_RESTORE();
 	CARP_UNLOCK(sc);
 }
 
 static void
 carp_send_ad_error(struct carp_softc *sc, int error)
 {
 
 	if (error) {
 		if (sc->sc_sendad_errors < INT_MAX)
 			sc->sc_sendad_errors++;
 		if (sc->sc_sendad_errors == CARP_SENDAD_MAX_ERRORS) {
 			static const char fmt[] = "send error %d on %s";
 			char msg[sizeof(fmt) + IFNAMSIZ];
 
 			sprintf(msg, fmt, error, sc->sc_carpdev->if_xname);
 			carp_demote_adj(V_carp_senderr_adj, msg);
 		}
 		sc->sc_sendad_success = 0;
 	} else {
 		if (sc->sc_sendad_errors >= CARP_SENDAD_MAX_ERRORS &&
 		    ++sc->sc_sendad_success >= CARP_SENDAD_MIN_SUCCESS) {
 			static const char fmt[] = "send ok on %s";
 			char msg[sizeof(fmt) + IFNAMSIZ];
 
 			sprintf(msg, fmt, sc->sc_carpdev->if_xname);
 			carp_demote_adj(-V_carp_senderr_adj, msg);
 			sc->sc_sendad_errors = 0;
 		} else
 			sc->sc_sendad_errors = 0;
 	}
 }
 
 static void
 carp_send_ad_locked(struct carp_softc *sc)
 {
 	struct carp_header ch;
 	struct timeval tv;
 	struct sockaddr sa;
 	struct ifaddr *ifa;
 	struct carp_header *ch_ptr;
 	struct mbuf *m;
 	int len, advskew;
 
 	CARP_LOCK_ASSERT(sc);
 
 	advskew = DEMOTE_ADVSKEW(sc);
 	tv.tv_sec = sc->sc_advbase;
 	tv.tv_usec = advskew * 1000000 / 256;
 
 	ch.carp_version = CARP_VERSION;
 	ch.carp_type = CARP_ADVERTISEMENT;
 	ch.carp_vhid = sc->sc_vhid;
 	ch.carp_advbase = sc->sc_advbase;
 	ch.carp_advskew = advskew;
 	ch.carp_authlen = 7;	/* XXX DEFINE */
 	ch.carp_pad1 = 0;	/* must be zero */
 	ch.carp_cksum = 0;
 
 	/* XXXGL: OpenBSD picks first ifaddr with needed family. */
 
 #ifdef INET
 	if (sc->sc_naddrs) {
 		struct ip *ip;
 
 		m = m_gethdr(M_NOWAIT, MT_DATA);
 		if (m == NULL) {
 			CARPSTATS_INC(carps_onomem);
 			goto resched;
 		}
 		len = sizeof(*ip) + sizeof(ch);
 		m->m_pkthdr.len = len;
 		m->m_pkthdr.rcvif = NULL;
 		m->m_len = len;
 		M_ALIGN(m, m->m_len);
 		m->m_flags |= M_MCAST;
 		ip = mtod(m, struct ip *);
 		ip->ip_v = IPVERSION;
 		ip->ip_hl = sizeof(*ip) >> 2;
 		ip->ip_tos = IPTOS_LOWDELAY;
 		ip->ip_len = htons(len);
-		ip->ip_id = ip_newid();
 		ip->ip_off = htons(IP_DF);
 		ip->ip_ttl = CARP_DFLTTL;
 		ip->ip_p = IPPROTO_CARP;
 		ip->ip_sum = 0;
+		ip_fillid(ip);
 
 		bzero(&sa, sizeof(sa));
 		sa.sa_family = AF_INET;
 		ifa = ifaof_ifpforaddr(&sa, sc->sc_carpdev);
 		if (ifa != NULL) {
 			ip->ip_src.s_addr =
 			    ifatoia(ifa)->ia_addr.sin_addr.s_addr;
 			ifa_free(ifa);
 		} else
 			ip->ip_src.s_addr = 0;
 		ip->ip_dst.s_addr = htonl(INADDR_CARP_GROUP);
 
 		ch_ptr = (struct carp_header *)(&ip[1]);
 		bcopy(&ch, ch_ptr, sizeof(ch));
 		if (carp_prepare_ad(m, sc, ch_ptr))
 			goto resched;
 
 		m->m_data += sizeof(*ip);
 		ch_ptr->carp_cksum = in_cksum(m, len - sizeof(*ip));
 		m->m_data -= sizeof(*ip);
 
 		CARPSTATS_INC(carps_opackets);
 
 		carp_send_ad_error(sc, ip_output(m, NULL, NULL, IP_RAWOUTPUT,
 		    &sc->sc_carpdev->if_carp->cif_imo, NULL));
 	}
 #endif /* INET */
 #ifdef INET6
 	if (sc->sc_naddrs6) {
 		struct ip6_hdr *ip6;
 
 		m = m_gethdr(M_NOWAIT, MT_DATA);
 		if (m == NULL) {
 			CARPSTATS_INC(carps_onomem);
 			goto resched;
 		}
 		len = sizeof(*ip6) + sizeof(ch);
 		m->m_pkthdr.len = len;
 		m->m_pkthdr.rcvif = NULL;
 		m->m_len = len;
 		M_ALIGN(m, m->m_len);
 		m->m_flags |= M_MCAST;
 		ip6 = mtod(m, struct ip6_hdr *);
 		bzero(ip6, sizeof(*ip6));
 		ip6->ip6_vfc |= IPV6_VERSION;
 		ip6->ip6_hlim = CARP_DFLTTL;
 		ip6->ip6_nxt = IPPROTO_CARP;
 		bzero(&sa, sizeof(sa));
 
 		/* set the source address */
 		sa.sa_family = AF_INET6;
 		ifa = ifaof_ifpforaddr(&sa, sc->sc_carpdev);
 		if (ifa != NULL) {
 			bcopy(IFA_IN6(ifa), &ip6->ip6_src,
 			    sizeof(struct in6_addr));
 			ifa_free(ifa);
 		} else
 			/* This should never happen with IPv6. */
 			bzero(&ip6->ip6_src, sizeof(struct in6_addr));
 
 		/* Set the multicast destination. */
 		ip6->ip6_dst.s6_addr16[0] = htons(0xff02);
 		ip6->ip6_dst.s6_addr8[15] = 0x12;
 		if (in6_setscope(&ip6->ip6_dst, sc->sc_carpdev, NULL) != 0) {
 			m_freem(m);
 			CARP_DEBUG("%s: in6_setscope failed\n", __func__);
 			goto resched;
 		}
 
 		ch_ptr = (struct carp_header *)(&ip6[1]);
 		bcopy(&ch, ch_ptr, sizeof(ch));
 		if (carp_prepare_ad(m, sc, ch_ptr))
 			goto resched;
 
 		m->m_data += sizeof(*ip6);
 		ch_ptr->carp_cksum = in_cksum(m, len - sizeof(*ip6));
 		m->m_data -= sizeof(*ip6);
 
 		CARPSTATS_INC(carps_opackets6);
 
 		carp_send_ad_error(sc, ip6_output(m, NULL, NULL, 0,
 		    &sc->sc_carpdev->if_carp->cif_im6o, NULL, NULL));
 	}
 #endif /* INET6 */
 
 resched:
 	callout_reset(&sc->sc_ad_tmo, tvtohz(&tv), carp_send_ad, sc);
 }
 
 static void
 carp_addroute(struct carp_softc *sc)
 {
 	struct ifaddr *ifa;
 
 	CARP_FOREACH_IFA(sc, ifa)
 		carp_ifa_addroute(ifa);
 }
 
 static void
 carp_ifa_addroute(struct ifaddr *ifa)
 {
 
 	switch (ifa->ifa_addr->sa_family) {
 #ifdef INET
 	case AF_INET:
 		in_addprefix(ifatoia(ifa), RTF_UP);
 		ifa_add_loopback_route(ifa,
 		    (struct sockaddr *)&ifatoia(ifa)->ia_addr);
 		break;
 #endif
 #ifdef INET6
 	case AF_INET6:
 		ifa_add_loopback_route(ifa,
 		    (struct sockaddr *)&ifatoia6(ifa)->ia_addr);
 		nd6_add_ifa_lle(ifatoia6(ifa));
 		break;
 #endif
 	}
 }
 
 static void
 carp_delroute(struct carp_softc *sc)
 {
 	struct ifaddr *ifa;
 
 	CARP_FOREACH_IFA(sc, ifa)
 		carp_ifa_delroute(ifa);
 }
 
 static void
 carp_ifa_delroute(struct ifaddr *ifa)
 {
 
 	switch (ifa->ifa_addr->sa_family) {
 #ifdef INET
 	case AF_INET:
 		ifa_del_loopback_route(ifa,
 		    (struct sockaddr *)&ifatoia(ifa)->ia_addr);
 		in_scrubprefix(ifatoia(ifa), LLE_STATIC);
 		break;
 #endif
 #ifdef INET6
 	case AF_INET6:
 		ifa_del_loopback_route(ifa,
 		    (struct sockaddr *)&ifatoia6(ifa)->ia_addr);
 		nd6_rem_ifa_lle(ifatoia6(ifa));
 		break;
 #endif
 	}
 }
 
 int
 carp_master(struct ifaddr *ifa)
 {
 	struct carp_softc *sc = ifa->ifa_carp;
 
 	return (sc->sc_state == MASTER);
 }
 
 #ifdef INET
 /*
  * Broadcast a gratuitous ARP request containing
  * the virtual router MAC address for each IP address
  * associated with the virtual router.
  */
 static void
 carp_send_arp(struct carp_softc *sc)
 {
 	struct ifaddr *ifa;
 
 	CARP_FOREACH_IFA(sc, ifa)
 		if (ifa->ifa_addr->sa_family == AF_INET)
 			arp_ifinit2(sc->sc_carpdev, ifa, LLADDR(&sc->sc_addr));
 }
 
 int
 carp_iamatch(struct ifaddr *ifa, uint8_t **enaddr)
 {
 	struct carp_softc *sc = ifa->ifa_carp;
 
 	if (sc->sc_state == MASTER) {
 		*enaddr = LLADDR(&sc->sc_addr);
 		return (1);
 	}
 
 	return (0);
 }
 #endif
 
 #ifdef INET6
 static void
 carp_send_na(struct carp_softc *sc)
 {
 	static struct in6_addr mcast = IN6ADDR_LINKLOCAL_ALLNODES_INIT;
 	struct ifaddr *ifa;
 	struct in6_addr *in6;
 
 	CARP_FOREACH_IFA(sc, ifa) {
 		if (ifa->ifa_addr->sa_family != AF_INET6)
 			continue;
 
 		in6 = IFA_IN6(ifa);
 		nd6_na_output(sc->sc_carpdev, &mcast, in6,
 		    ND_NA_FLAG_OVERRIDE, 1, NULL);
 		DELAY(1000);	/* XXX */
 	}
 }
 
 /*
  * Returns ifa in case it's a carp address and it is MASTER, or if the address
  * matches and is not a carp address.  Returns NULL otherwise.
  */
 struct ifaddr *
 carp_iamatch6(struct ifnet *ifp, struct in6_addr *taddr)
 {
 	struct ifaddr *ifa;
 
 	ifa = NULL;
 	IF_ADDR_RLOCK(ifp);
 	TAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) {
 		if (ifa->ifa_addr->sa_family != AF_INET6)
 			continue;
 		if (!IN6_ARE_ADDR_EQUAL(taddr, IFA_IN6(ifa)))
 			continue;
 		if (ifa->ifa_carp && ifa->ifa_carp->sc_state != MASTER)
 			ifa = NULL;
 		else
 			ifa_ref(ifa);
 		break;
 	}
 	IF_ADDR_RUNLOCK(ifp);
 
 	return (ifa);
 }
 
 caddr_t
 carp_macmatch6(struct ifnet *ifp, struct mbuf *m, const struct in6_addr *taddr)
 {
 	struct ifaddr *ifa;
 
 	IF_ADDR_RLOCK(ifp);
 	IFNET_FOREACH_IFA(ifp, ifa)
 		if (ifa->ifa_addr->sa_family == AF_INET6 &&
 		    IN6_ARE_ADDR_EQUAL(taddr, IFA_IN6(ifa))) {
 			struct carp_softc *sc = ifa->ifa_carp;
 			struct m_tag *mtag;
 
 			IF_ADDR_RUNLOCK(ifp);
 
 			mtag = m_tag_get(PACKET_TAG_CARP,
 			    sizeof(struct carp_softc *), M_NOWAIT);
 			if (mtag == NULL)
 				/* Better a bit than nothing. */
 				return (LLADDR(&sc->sc_addr));
 
 			bcopy(&sc, mtag + 1, sizeof(sc));
 			m_tag_prepend(m, mtag);
 
 			return (LLADDR(&sc->sc_addr));
 		}
 	IF_ADDR_RUNLOCK(ifp);
 
 	return (NULL);
 }
 #endif /* INET6 */
 
 int
 carp_forus(struct ifnet *ifp, u_char *dhost)
 {
 	struct carp_softc *sc;
 	uint8_t *ena = dhost;
 
 	if (ena[0] || ena[1] || ena[2] != 0x5e || ena[3] || ena[4] != 1)
 		return (0);
 
 	CIF_LOCK(ifp->if_carp);
 	IFNET_FOREACH_CARP(ifp, sc) {
 		CARP_LOCK(sc);
 		if (sc->sc_state == MASTER && !bcmp(dhost, LLADDR(&sc->sc_addr),
 		    ETHER_ADDR_LEN)) {
 			CARP_UNLOCK(sc);
 			CIF_UNLOCK(ifp->if_carp);
 			return (1);
 		}
 		CARP_UNLOCK(sc);
 	}
 	CIF_UNLOCK(ifp->if_carp);
 
 	return (0);
 }
 
 /* Master down timeout event, executed in callout context. */
 static void
 carp_master_down(void *v)
 {
 	struct carp_softc *sc = v;
 
 	CARP_LOCK_ASSERT(sc);
 
 	CURVNET_SET(sc->sc_carpdev->if_vnet);
 	if (sc->sc_state == BACKUP) {
 		carp_master_down_locked(sc, "master timed out");
 	}
 	CURVNET_RESTORE();
 
 	CARP_UNLOCK(sc);
 }
 
 static void
 carp_master_down_locked(struct carp_softc *sc, const char *reason)
 {
 
 	CARP_LOCK_ASSERT(sc);
 
 	switch (sc->sc_state) {
 	case BACKUP:
 		carp_set_state(sc, MASTER, reason);
 		carp_send_ad_locked(sc);
 #ifdef INET
 		carp_send_arp(sc);
 #endif
 #ifdef INET6
 		carp_send_na(sc);
 #endif
 		carp_setrun(sc, 0);
 		carp_addroute(sc);
 		break;
 	case INIT:
 	case MASTER:
 #ifdef INVARIANTS
 		panic("carp: VHID %u@%s: master_down event in %s state\n",
 		    sc->sc_vhid,
 		    sc->sc_carpdev->if_xname,
 		    sc->sc_state ? "MASTER" : "INIT");
 #endif
 		break;
 	}
 }
 
 /*
  * When in backup state, af indicates whether to reset the master down timer
  * for v4 or v6. If it's set to zero, reset the ones which are already pending.
  */
 static void
 carp_setrun(struct carp_softc *sc, sa_family_t af)
 {
 	struct timeval tv;
 
 	CARP_LOCK_ASSERT(sc);
 
 	if ((sc->sc_carpdev->if_flags & IFF_UP) == 0 ||
 	    sc->sc_carpdev->if_link_state != LINK_STATE_UP ||
 	    (sc->sc_naddrs == 0 && sc->sc_naddrs6 == 0))
 		return;
 
 	switch (sc->sc_state) {
 	case INIT:
 		carp_set_state(sc, BACKUP, "initialization complete");
 		carp_setrun(sc, 0);
 		break;
 	case BACKUP:
 		callout_stop(&sc->sc_ad_tmo);
 		tv.tv_sec = 3 * sc->sc_advbase;
 		tv.tv_usec = sc->sc_advskew * 1000000 / 256;
 		switch (af) {
 #ifdef INET
 		case AF_INET:
 			callout_reset(&sc->sc_md_tmo, tvtohz(&tv),
 			    carp_master_down, sc);
 			break;
 #endif
 #ifdef INET6
 		case AF_INET6:
 			callout_reset(&sc->sc_md6_tmo, tvtohz(&tv),
 			    carp_master_down, sc);
 			break;
 #endif
 		default:
 #ifdef INET
 			if (sc->sc_naddrs)
 				callout_reset(&sc->sc_md_tmo, tvtohz(&tv),
 				    carp_master_down, sc);
 #endif
 #ifdef INET6
 			if (sc->sc_naddrs6)
 				callout_reset(&sc->sc_md6_tmo, tvtohz(&tv),
 				    carp_master_down, sc);
 #endif
 			break;
 		}
 		break;
 	case MASTER:
 		tv.tv_sec = sc->sc_advbase;
 		tv.tv_usec = sc->sc_advskew * 1000000 / 256;
 		callout_reset(&sc->sc_ad_tmo, tvtohz(&tv),
 		    carp_send_ad, sc);
 		break;
 	}
 }
 
 /*
  * Setup multicast structures.
  */
 static int
 carp_multicast_setup(struct carp_if *cif, sa_family_t sa)
 {
 	struct ifnet *ifp = cif->cif_ifp;
 	int error = 0;
 
 	CIF_LOCK_ASSERT(cif);
 
 	switch (sa) {
 #ifdef INET
 	case AF_INET:
 	    {
 		struct ip_moptions *imo = &cif->cif_imo;
 		struct in_addr addr;
 
 		if (imo->imo_membership)
 			return (0);
 
 		imo->imo_membership = (struct in_multi **)malloc(
 		    (sizeof(struct in_multi *) * IP_MIN_MEMBERSHIPS), M_CARP,
 		    M_NOWAIT);
 		if (imo->imo_membership == NULL)
 			return (ENOMEM);
 		imo->imo_mfilters = NULL;
 		imo->imo_max_memberships = IP_MIN_MEMBERSHIPS;
 		imo->imo_multicast_vif = -1;
 
 		addr.s_addr = htonl(INADDR_CARP_GROUP);
 		if ((error = in_joingroup(ifp, &addr, NULL,
 		    &imo->imo_membership[0])) != 0) {
 			free(imo->imo_membership, M_CARP);
 			break;
 		}
 		imo->imo_num_memberships++;
 		imo->imo_multicast_ifp = ifp;
 		imo->imo_multicast_ttl = CARP_DFLTTL;
 		imo->imo_multicast_loop = 0;
 		break;
 	   }
 #endif
 #ifdef INET6
 	case AF_INET6:
 	    {
 		struct ip6_moptions *im6o = &cif->cif_im6o;
 		struct in6_addr in6;
 		struct in6_multi *in6m;
 
 		if (im6o->im6o_membership)
 			return (0);
 
 		im6o->im6o_membership = (struct in6_multi **)malloc(
 		    (sizeof(struct in6_multi *) * IPV6_MIN_MEMBERSHIPS), M_CARP,
 		    M_ZERO | M_NOWAIT);
 		if (im6o->im6o_membership == NULL)
 			return (ENOMEM);
 		im6o->im6o_mfilters = NULL;
 		im6o->im6o_max_memberships = IPV6_MIN_MEMBERSHIPS;
 		im6o->im6o_multicast_hlim = CARP_DFLTTL;
 		im6o->im6o_multicast_ifp = ifp;
 
 		/* Join IPv6 CARP multicast group. */
 		bzero(&in6, sizeof(in6));
 		in6.s6_addr16[0] = htons(0xff02);
 		in6.s6_addr8[15] = 0x12;
 		if ((error = in6_setscope(&in6, ifp, NULL)) != 0) {
 			free(im6o->im6o_membership, M_CARP);
 			break;
 		}
 		in6m = NULL;
 		if ((error = in6_mc_join(ifp, &in6, NULL, &in6m, 0)) != 0) {
 			free(im6o->im6o_membership, M_CARP);
 			break;
 		}
 		im6o->im6o_membership[0] = in6m;
 		im6o->im6o_num_memberships++;
 
 		/* Join solicited multicast address. */
 		bzero(&in6, sizeof(in6));
 		in6.s6_addr16[0] = htons(0xff02);
 		in6.s6_addr32[1] = 0;
 		in6.s6_addr32[2] = htonl(1);
 		in6.s6_addr32[3] = 0;
 		in6.s6_addr8[12] = 0xff;
 		if ((error = in6_setscope(&in6, ifp, NULL)) != 0) {
 			in6_mc_leave(im6o->im6o_membership[0], NULL);
 			free(im6o->im6o_membership, M_CARP);
 			break;
 		}
 		in6m = NULL;
 		if ((error = in6_mc_join(ifp, &in6, NULL, &in6m, 0)) != 0) {
 			in6_mc_leave(im6o->im6o_membership[0], NULL);
 			free(im6o->im6o_membership, M_CARP);
 			break;
 		}
 		im6o->im6o_membership[1] = in6m;
 		im6o->im6o_num_memberships++;
 		break;
 	    }
 #endif
 	}
 
 	return (error);
 }
 
 /*
  * Free multicast structures.
  */
 static void
 carp_multicast_cleanup(struct carp_if *cif, sa_family_t sa)
 {
 
 	CIF_LOCK_ASSERT(cif);
 	switch (sa) {
 #ifdef INET
 	case AF_INET:
 		if (cif->cif_naddrs == 0) {
 			struct ip_moptions *imo = &cif->cif_imo;
 
 			in_leavegroup(imo->imo_membership[0], NULL);
 			KASSERT(imo->imo_mfilters == NULL,
 			    ("%s: imo_mfilters != NULL", __func__));
 			free(imo->imo_membership, M_CARP);
 			imo->imo_membership = NULL;
 
 		}
 		break;
 #endif
 #ifdef INET6
 	case AF_INET6:
 		if (cif->cif_naddrs6 == 0) {
 			struct ip6_moptions *im6o = &cif->cif_im6o;
 
 			in6_mc_leave(im6o->im6o_membership[0], NULL);
 			in6_mc_leave(im6o->im6o_membership[1], NULL);
 			KASSERT(im6o->im6o_mfilters == NULL,
 			    ("%s: im6o_mfilters != NULL", __func__));
 			free(im6o->im6o_membership, M_CARP);
 			im6o->im6o_membership = NULL;
 		}
 		break;
 #endif
 	}
 }
 
 int
 carp_output(struct ifnet *ifp, struct mbuf *m, const struct sockaddr *sa)
 {
 	struct m_tag *mtag;
 	struct carp_softc *sc;
 
 	if (!sa)
 		return (0);
 
 	switch (sa->sa_family) {
 #ifdef INET
 	case AF_INET:
 		break;
 #endif
 #ifdef INET6
 	case AF_INET6:
 		break;
 #endif
 	default:
 		return (0);
 	}
 
 	mtag = m_tag_find(m, PACKET_TAG_CARP, NULL);
 	if (mtag == NULL)
 		return (0);
 
 	bcopy(mtag + 1, &sc, sizeof(sc));
 
 	/* Set the source MAC address to the Virtual Router MAC Address. */
 	switch (ifp->if_type) {
 	case IFT_ETHER:
 	case IFT_BRIDGE:
 	case IFT_L2VLAN: {
 			struct ether_header *eh;
 
 			eh = mtod(m, struct ether_header *);
 			eh->ether_shost[0] = 0;
 			eh->ether_shost[1] = 0;
 			eh->ether_shost[2] = 0x5e;
 			eh->ether_shost[3] = 0;
 			eh->ether_shost[4] = 1;
 			eh->ether_shost[5] = sc->sc_vhid;
 		}
 		break;
 	case IFT_FDDI: {
 			struct fddi_header *fh;
 
 			fh = mtod(m, struct fddi_header *);
 			fh->fddi_shost[0] = 0;
 			fh->fddi_shost[1] = 0;
 			fh->fddi_shost[2] = 0x5e;
 			fh->fddi_shost[3] = 0;
 			fh->fddi_shost[4] = 1;
 			fh->fddi_shost[5] = sc->sc_vhid;
 		}
 		break;
 	case IFT_ISO88025: {
  			struct iso88025_header *th;
  			th = mtod(m, struct iso88025_header *);
 			th->iso88025_shost[0] = 3;
 			th->iso88025_shost[1] = 0;
 			th->iso88025_shost[2] = 0x40 >> (sc->sc_vhid - 1);
 			th->iso88025_shost[3] = 0x40000 >> (sc->sc_vhid - 1);
 			th->iso88025_shost[4] = 0;
 			th->iso88025_shost[5] = 0;
 		}
 		break;
 	default:
 		printf("%s: carp is not supported for the %d interface type\n",
 		    ifp->if_xname, ifp->if_type);
 		return (EOPNOTSUPP);
 	}
 
 	return (0);
 }
 
 static struct carp_softc*
 carp_alloc(struct ifnet *ifp)
 {
 	struct carp_softc *sc;
 	struct carp_if *cif;
 
 	if ((cif = ifp->if_carp) == NULL)
 		cif = carp_alloc_if(ifp);
 
 	sc = malloc(sizeof(*sc), M_CARP, M_WAITOK|M_ZERO);
 
 	sc->sc_advbase = CARP_DFLTINTV;
 	sc->sc_vhid = -1;	/* required setting */
 	sc->sc_init_counter = 1;
 	sc->sc_state = INIT;
 
 	sc->sc_ifasiz = sizeof(struct ifaddr *);
 	sc->sc_ifas = malloc(sc->sc_ifasiz, M_CARP, M_WAITOK|M_ZERO);
 	sc->sc_carpdev = ifp;
 
 	CARP_LOCK_INIT(sc);
 #ifdef INET
 	callout_init_mtx(&sc->sc_md_tmo, &sc->sc_mtx, CALLOUT_RETURNUNLOCKED);
 #endif
 #ifdef INET6
 	callout_init_mtx(&sc->sc_md6_tmo, &sc->sc_mtx, CALLOUT_RETURNUNLOCKED);
 #endif
 	callout_init_mtx(&sc->sc_ad_tmo, &sc->sc_mtx, CALLOUT_RETURNUNLOCKED);
 
 	CIF_LOCK(cif);
 	TAILQ_INSERT_TAIL(&cif->cif_vrs, sc, sc_list);
 	CIF_UNLOCK(cif);
 
 	mtx_lock(&carp_mtx);
 	LIST_INSERT_HEAD(&carp_list, sc, sc_next);
 	mtx_unlock(&carp_mtx);
 
 	return (sc);
 }
 
 static int
 carp_grow_ifas(struct carp_softc *sc)
 {
 	struct ifaddr **new;
 
 	CARP_LOCK_ASSERT(sc);
 
 	new = malloc(sc->sc_ifasiz * 2, M_CARP, M_NOWAIT|M_ZERO);
 	if (new == NULL)
 		return (ENOMEM);
 	bcopy(sc->sc_ifas, new, sc->sc_ifasiz);
 	free(sc->sc_ifas, M_CARP);
 	sc->sc_ifas = new;
 	sc->sc_ifasiz *= 2;
 
 	return (0);
 }
 
 static void
 carp_destroy(struct carp_softc *sc)
 {
 	struct ifnet *ifp = sc->sc_carpdev;
 	struct carp_if *cif = ifp->if_carp;
 
 	CIF_LOCK_ASSERT(cif);
 
 	TAILQ_REMOVE(&cif->cif_vrs, sc, sc_list);
 
 	mtx_lock(&carp_mtx);
 	LIST_REMOVE(sc, sc_next);
 	mtx_unlock(&carp_mtx);
 
 	CARP_LOCK(sc);
 	if (sc->sc_suppress)
 		carp_demote_adj(-V_carp_ifdown_adj, "vhid removed");
 	callout_drain(&sc->sc_ad_tmo);
 #ifdef INET
 	callout_drain(&sc->sc_md_tmo);
 #endif
 #ifdef INET6
 	callout_drain(&sc->sc_md6_tmo);
 #endif
 	CARP_LOCK_DESTROY(sc);
 
 	free(sc->sc_ifas, M_CARP);
 	free(sc, M_CARP);
 }
 
 static struct carp_if*
 carp_alloc_if(struct ifnet *ifp)
 {
 	struct carp_if *cif;
 	int error;
 
 	cif = malloc(sizeof(*cif), M_CARP, M_WAITOK|M_ZERO);
 
 	if ((error = ifpromisc(ifp, 1)) != 0)
 		printf("%s: ifpromisc(%s) failed: %d\n",
 		    __func__, ifp->if_xname, error);
 	else
 		cif->cif_flags |= CIF_PROMISC;
 
 	CIF_LOCK_INIT(cif);
 	cif->cif_ifp = ifp;
 	TAILQ_INIT(&cif->cif_vrs);
 
 	IF_ADDR_WLOCK(ifp);
 	ifp->if_carp = cif;
 	if_ref(ifp);
 	IF_ADDR_WUNLOCK(ifp);
 
 	return (cif);
 }
 
 static void
 carp_free_if(struct carp_if *cif)
 {
 	struct ifnet *ifp = cif->cif_ifp;
 
 	CIF_LOCK_ASSERT(cif);
 	KASSERT(TAILQ_EMPTY(&cif->cif_vrs), ("%s: softc list not empty",
 	    __func__));
 
 	IF_ADDR_WLOCK(ifp);
 	ifp->if_carp = NULL;
 	IF_ADDR_WUNLOCK(ifp);
 
 	CIF_LOCK_DESTROY(cif);
 
 	if (cif->cif_flags & CIF_PROMISC)
 		ifpromisc(ifp, 0);
 	if_rele(ifp);
 
 	free(cif, M_CARP);
 }
 
 static void
 carp_carprcp(struct carpreq *carpr, struct carp_softc *sc, int priv)
 {
 
 	CARP_LOCK(sc);
 	carpr->carpr_state = sc->sc_state;
 	carpr->carpr_vhid = sc->sc_vhid;
 	carpr->carpr_advbase = sc->sc_advbase;
 	carpr->carpr_advskew = sc->sc_advskew;
 	if (priv)
 		bcopy(sc->sc_key, carpr->carpr_key, sizeof(carpr->carpr_key));
 	else
 		bzero(carpr->carpr_key, sizeof(carpr->carpr_key));
 	CARP_UNLOCK(sc);
 }
 
 int
 carp_ioctl(struct ifreq *ifr, u_long cmd, struct thread *td)
 {
 	struct carpreq carpr;
 	struct ifnet *ifp;
 	struct carp_softc *sc = NULL;
 	int error = 0, locked = 0;
 
 	if ((error = copyin(ifr->ifr_data, &carpr, sizeof carpr)))
 		return (error);
 
 	ifp = ifunit_ref(ifr->ifr_name);
 	if (ifp == NULL)
 		return (ENXIO);
 
 	switch (ifp->if_type) {
 	case IFT_ETHER:
 	case IFT_L2VLAN:
 	case IFT_BRIDGE:
 	case IFT_FDDI:
 	case IFT_ISO88025:
 		break;
 	default:
 		error = EOPNOTSUPP;
 		goto out;
 	}
 
 	if ((ifp->if_flags & IFF_MULTICAST) == 0) {
 		error = EADDRNOTAVAIL;
 		goto out;
 	}
 
 	switch (cmd) {
 	case SIOCSVH:
 		if ((error = priv_check(td, PRIV_NETINET_CARP)))
 			break;
 		if (carpr.carpr_vhid <= 0 || carpr.carpr_vhid > CARP_MAXVHID ||
 		    carpr.carpr_advbase < 0 || carpr.carpr_advskew < 0) {
 			error = EINVAL;
 			break;
 		}
 
 		if (ifp->if_carp) {
 			CIF_LOCK(ifp->if_carp);
 			IFNET_FOREACH_CARP(ifp, sc)
 				if (sc->sc_vhid == carpr.carpr_vhid)
 					break;
 			CIF_UNLOCK(ifp->if_carp);
 		}
 		if (sc == NULL) {
 			sc = carp_alloc(ifp);
 			CARP_LOCK(sc);
 			sc->sc_vhid = carpr.carpr_vhid;
 			LLADDR(&sc->sc_addr)[0] = 0;
 			LLADDR(&sc->sc_addr)[1] = 0;
 			LLADDR(&sc->sc_addr)[2] = 0x5e;
 			LLADDR(&sc->sc_addr)[3] = 0;
 			LLADDR(&sc->sc_addr)[4] = 1;
 			LLADDR(&sc->sc_addr)[5] = sc->sc_vhid;
 		} else
 			CARP_LOCK(sc);
 		locked = 1;
 		if (carpr.carpr_advbase > 0) {
 			if (carpr.carpr_advbase > 255 ||
 			    carpr.carpr_advbase < CARP_DFLTINTV) {
 				error = EINVAL;
 				break;
 			}
 			sc->sc_advbase = carpr.carpr_advbase;
 		}
 		if (carpr.carpr_advskew >= 255) {
 			error = EINVAL;
 			break;
 		}
 		sc->sc_advskew = carpr.carpr_advskew;
 		if (carpr.carpr_key[0] != '\0') {
 			bcopy(carpr.carpr_key, sc->sc_key, sizeof(sc->sc_key));
 			carp_hmac_prepare(sc);
 		}
 		if (sc->sc_state != INIT &&
 		    carpr.carpr_state != sc->sc_state) {
 			switch (carpr.carpr_state) {
 			case BACKUP:
 				callout_stop(&sc->sc_ad_tmo);
 				carp_set_state(sc, BACKUP,
 				    "user requested via ifconfig");
 				carp_setrun(sc, 0);
 				carp_delroute(sc);
 				break;
 			case MASTER:
 				carp_master_down_locked(sc,
 				    "user requested via ifconfig");
 				break;
 			default:
 				break;
 			}
 		}
 		break;
 
 	case SIOCGVH:
 	    {
 		int priveleged;
 
 		if (carpr.carpr_vhid < 0 || carpr.carpr_vhid > CARP_MAXVHID) {
 			error = EINVAL;
 			break;
 		}
 		if (carpr.carpr_count < 1) {
 			error = EMSGSIZE;
 			break;
 		}
 		if (ifp->if_carp == NULL) {
 			error = ENOENT;
 			break;
 		}
 
 		priveleged = (priv_check(td, PRIV_NETINET_CARP) == 0);
 		if (carpr.carpr_vhid != 0) {
 			CIF_LOCK(ifp->if_carp);
 			IFNET_FOREACH_CARP(ifp, sc)
 				if (sc->sc_vhid == carpr.carpr_vhid)
 					break;
 			CIF_UNLOCK(ifp->if_carp);
 			if (sc == NULL) {
 				error = ENOENT;
 				break;
 			}
 			carp_carprcp(&carpr, sc, priveleged);
 			error = copyout(&carpr, ifr->ifr_data, sizeof(carpr));
 		} else  {
 			int i, count;
 
 			count = 0;
 			CIF_LOCK(ifp->if_carp);
 			IFNET_FOREACH_CARP(ifp, sc)
 				count++;
 
 			if (count > carpr.carpr_count) {
 				CIF_UNLOCK(ifp->if_carp);
 				error = EMSGSIZE;
 				break;
 			}
 
 			i = 0;
 			IFNET_FOREACH_CARP(ifp, sc) {
 				carp_carprcp(&carpr, sc, priveleged);
 				carpr.carpr_count = count;
 				error = copyout(&carpr, ifr->ifr_data +
 				    (i * sizeof(carpr)), sizeof(carpr));
 				if (error) {
 					CIF_UNLOCK(ifp->if_carp);
 					break;
 				}
 				i++;
 			}
 			CIF_UNLOCK(ifp->if_carp);
 		}
 		break;
 	    }
 	default:
 		error = EINVAL;
 	}
 
 out:
 	if (locked)
 		CARP_UNLOCK(sc);
 	if_rele(ifp);
 
 	return (error);
 }
 
 static int
 carp_get_vhid(struct ifaddr *ifa)
 {
 
 	if (ifa == NULL || ifa->ifa_carp == NULL)
 		return (0);
 
 	return (ifa->ifa_carp->sc_vhid);
 }
 
 int
 carp_attach(struct ifaddr *ifa, int vhid)
 {
 	struct ifnet *ifp = ifa->ifa_ifp;
 	struct carp_if *cif = ifp->if_carp;
 	struct carp_softc *sc;
 	int index, error;
 
 	if (ifp->if_carp == NULL)
 		return (ENOPROTOOPT);
 
 	switch (ifa->ifa_addr->sa_family) {
 #ifdef INET
 	case AF_INET:
 #endif
 #ifdef INET6
 	case AF_INET6:
 #endif
 		break;
 	default:
 		return (EPROTOTYPE);
 	}
 
 	CIF_LOCK(cif);
 	IFNET_FOREACH_CARP(ifp, sc)
 		if (sc->sc_vhid == vhid)
 			break;
 	if (sc == NULL) {
 		CIF_UNLOCK(cif);
 		return (ENOENT);
 	}
 
 	if (ifa->ifa_carp) {
 		if (ifa->ifa_carp->sc_vhid != vhid)
 			carp_detach_locked(ifa);
 		else {
 			CIF_UNLOCK(cif);
 			return (0);
 		}
 	}
 
 	error = carp_multicast_setup(cif, ifa->ifa_addr->sa_family);
 	if (error) {
 		CIF_FREE(cif);
 		return (error);
 	}
 
 	CARP_LOCK(sc);
 	index = sc->sc_naddrs + sc->sc_naddrs6 + 1;
 	if (index > sc->sc_ifasiz / sizeof(struct ifaddr *))
 		if ((error = carp_grow_ifas(sc)) != 0) {
 			carp_multicast_cleanup(cif,
 			    ifa->ifa_addr->sa_family);
 			CARP_UNLOCK(sc);
 			CIF_FREE(cif);
 			return (error);
 		}
 
 	switch (ifa->ifa_addr->sa_family) {
 #ifdef INET
 	case AF_INET:
 		cif->cif_naddrs++;
 		sc->sc_naddrs++;
 		break;
 #endif
 #ifdef INET6
 	case AF_INET6:
 		cif->cif_naddrs6++;
 		sc->sc_naddrs6++;
 		break;
 #endif
 	}
 
 	ifa_ref(ifa);
 	sc->sc_ifas[index - 1] = ifa;
 	ifa->ifa_carp = sc;
 
 	carp_hmac_prepare(sc);
 	carp_sc_state(sc);
 
 	CARP_UNLOCK(sc);
 	CIF_UNLOCK(cif);
 
 	return (0);
 }
 
 void
 carp_detach(struct ifaddr *ifa)
 {
 	struct ifnet *ifp = ifa->ifa_ifp;
 	struct carp_if *cif = ifp->if_carp;
 
 	CIF_LOCK(cif);
 	carp_detach_locked(ifa);
 	CIF_FREE(cif);
 }
 
 static void
 carp_detach_locked(struct ifaddr *ifa)
 {
 	struct ifnet *ifp = ifa->ifa_ifp;
 	struct carp_if *cif = ifp->if_carp;
 	struct carp_softc *sc = ifa->ifa_carp;
 	int i, index;
 
 	KASSERT(sc != NULL, ("%s: %p not attached", __func__, ifa));
 
 	CIF_LOCK_ASSERT(cif);
 	CARP_LOCK(sc);
 
 	/* Shift array. */
 	index = sc->sc_naddrs + sc->sc_naddrs6;
 	for (i = 0; i < index; i++)
 		if (sc->sc_ifas[i] == ifa)
 			break;
 	KASSERT(i < index, ("%s: %p no backref", __func__, ifa));
 	for (; i < index - 1; i++)
 		sc->sc_ifas[i] = sc->sc_ifas[i+1];
 	sc->sc_ifas[index - 1] = NULL;
 
 	switch (ifa->ifa_addr->sa_family) {
 #ifdef INET
 	case AF_INET:
 		cif->cif_naddrs--;
 		sc->sc_naddrs--;
 		break;
 #endif
 #ifdef INET6
 	case AF_INET6:
 		cif->cif_naddrs6--;
 		sc->sc_naddrs6--;
 		break;
 #endif
 	}
 
 	carp_ifa_delroute(ifa);
 	carp_multicast_cleanup(cif, ifa->ifa_addr->sa_family);
 
 	ifa->ifa_carp = NULL;
 	ifa_free(ifa);
 
 	carp_hmac_prepare(sc);
 	carp_sc_state(sc);
 
 	if (sc->sc_naddrs == 0 && sc->sc_naddrs6 == 0) {
 		CARP_UNLOCK(sc);
 		carp_destroy(sc);
 	} else
 		CARP_UNLOCK(sc);
 }
 
 static void
 carp_set_state(struct carp_softc *sc, int state, const char *reason)
 {
 
 	CARP_LOCK_ASSERT(sc);
 
 	if (sc->sc_state != state) {
 		const char *carp_states[] = { CARP_STATES };
 		char subsys[IFNAMSIZ+5];
 
 		snprintf(subsys, IFNAMSIZ+5, "%u@%s", sc->sc_vhid,
 		    sc->sc_carpdev->if_xname);
 
 		CARP_LOG("%s: %s -> %s (%s)\n", subsys,
 		    carp_states[sc->sc_state], carp_states[state], reason);
 
 		sc->sc_state = state;
 
 		devctl_notify("CARP", subsys, carp_states[state], NULL);
 	}
 }
 
 static void
 carp_linkstate(struct ifnet *ifp)
 {
 	struct carp_softc *sc;
 
 	CIF_LOCK(ifp->if_carp);
 	IFNET_FOREACH_CARP(ifp, sc) {
 		CARP_LOCK(sc);
 		carp_sc_state(sc);
 		CARP_UNLOCK(sc);
 	}
 	CIF_UNLOCK(ifp->if_carp);
 }
 
 static void
 carp_sc_state(struct carp_softc *sc)
 {
 
 	CARP_LOCK_ASSERT(sc);
 
 	if (sc->sc_carpdev->if_link_state != LINK_STATE_UP ||
 	    !(sc->sc_carpdev->if_flags & IFF_UP)) {
 		callout_stop(&sc->sc_ad_tmo);
 #ifdef INET
 		callout_stop(&sc->sc_md_tmo);
 #endif
 #ifdef INET6
 		callout_stop(&sc->sc_md6_tmo);
 #endif
 		carp_set_state(sc, INIT, "hardware interface down");
 		carp_setrun(sc, 0);
 		if (!sc->sc_suppress)
 			carp_demote_adj(V_carp_ifdown_adj, "interface down");
 		sc->sc_suppress = 1;
 	} else {
 		carp_set_state(sc, INIT, "hardware interface up");
 		carp_setrun(sc, 0);
 		if (sc->sc_suppress)
 			carp_demote_adj(-V_carp_ifdown_adj, "interface up");
 		sc->sc_suppress = 0;
 	}
 }
 
 static void
 carp_demote_adj(int adj, char *reason)
 {
 	atomic_add_int(&V_carp_demotion, adj);
 	CARP_LOG("demoted by %d to %d (%s)\n", adj, V_carp_demotion, reason);
 	taskqueue_enqueue(taskqueue_swi, &carp_sendall_task);
 }
 
 static int
 carp_demote_adj_sysctl(SYSCTL_HANDLER_ARGS)
 {
 	int new, error;
 
 	new = V_carp_demotion;
 	error = sysctl_handle_int(oidp, &new, 0, req);
 	if (error || !req->newptr)
 		return (error);
 
 	carp_demote_adj(new, "sysctl");
 
 	return (0);
 }
 
 #ifdef INET
 extern  struct domain inetdomain;
 static struct protosw in_carp_protosw = {
 	.pr_type =		SOCK_RAW,
 	.pr_domain =		&inetdomain,
 	.pr_protocol =		IPPROTO_CARP,
 	.pr_flags =		PR_ATOMIC|PR_ADDR,
 	.pr_input =		carp_input,
 	.pr_output =		rip_output,
 	.pr_ctloutput =		rip_ctloutput,
 	.pr_usrreqs =		&rip_usrreqs
 };
 #endif
 
 #ifdef INET6
 extern	struct domain inet6domain;
 static struct protosw in6_carp_protosw = {
 	.pr_type =		SOCK_RAW,
 	.pr_domain =		&inet6domain,
 	.pr_protocol =		IPPROTO_CARP,
 	.pr_flags =		PR_ATOMIC|PR_ADDR,
 	.pr_input =		carp6_input,
 	.pr_output =		rip6_output,
 	.pr_ctloutput =		rip6_ctloutput,
 	.pr_usrreqs =		&rip6_usrreqs
 };
 #endif
 
 static void
 carp_mod_cleanup(void)
 {
 
 #ifdef INET
 	if (proto_reg[CARP_INET] == 0) {
 		(void)ipproto_unregister(IPPROTO_CARP);
 		pf_proto_unregister(PF_INET, IPPROTO_CARP, SOCK_RAW);
 		proto_reg[CARP_INET] = -1;
 	}
 	carp_iamatch_p = NULL;
 #endif
 #ifdef INET6
 	if (proto_reg[CARP_INET6] == 0) {
 		(void)ip6proto_unregister(IPPROTO_CARP);
 		pf_proto_unregister(PF_INET6, IPPROTO_CARP, SOCK_RAW);
 		proto_reg[CARP_INET6] = -1;
 	}
 	carp_iamatch6_p = NULL;
 	carp_macmatch6_p = NULL;
 #endif
 	carp_ioctl_p = NULL;
 	carp_attach_p = NULL;
 	carp_detach_p = NULL;
 	carp_get_vhid_p = NULL;
 	carp_linkstate_p = NULL;
 	carp_forus_p = NULL;
 	carp_output_p = NULL;
 	carp_demote_adj_p = NULL;
 	carp_master_p = NULL;
 	mtx_unlock(&carp_mtx);
 	taskqueue_drain(taskqueue_swi, &carp_sendall_task);
 	mtx_destroy(&carp_mtx);
 }
 
 static int
 carp_mod_load(void)
 {
 	int err;
 
 	mtx_init(&carp_mtx, "carp_mtx", NULL, MTX_DEF);
 	LIST_INIT(&carp_list);
 	carp_get_vhid_p = carp_get_vhid;
 	carp_forus_p = carp_forus;
 	carp_output_p = carp_output;
 	carp_linkstate_p = carp_linkstate;
 	carp_ioctl_p = carp_ioctl;
 	carp_attach_p = carp_attach;
 	carp_detach_p = carp_detach;
 	carp_demote_adj_p = carp_demote_adj;
 	carp_master_p = carp_master;
 #ifdef INET6
 	carp_iamatch6_p = carp_iamatch6;
 	carp_macmatch6_p = carp_macmatch6;
 	proto_reg[CARP_INET6] = pf_proto_register(PF_INET6,
 	    (struct protosw *)&in6_carp_protosw);
 	if (proto_reg[CARP_INET6]) {
 		printf("carp: error %d attaching to PF_INET6\n",
 		    proto_reg[CARP_INET6]);
 		carp_mod_cleanup();
 		return (proto_reg[CARP_INET6]);
 	}
 	err = ip6proto_register(IPPROTO_CARP);
 	if (err) {
 		printf("carp: error %d registering with INET6\n", err);
 		carp_mod_cleanup();
 		return (err);
 	}
 #endif
 #ifdef INET
 	carp_iamatch_p = carp_iamatch;
 	proto_reg[CARP_INET] = pf_proto_register(PF_INET, &in_carp_protosw);
 	if (proto_reg[CARP_INET]) {
 		printf("carp: error %d attaching to PF_INET\n",
 		    proto_reg[CARP_INET]);
 		carp_mod_cleanup();
 		return (proto_reg[CARP_INET]);
 	}
 	err = ipproto_register(IPPROTO_CARP);
 	if (err) {
 		printf("carp: error %d registering with INET\n", err);
 		carp_mod_cleanup();
 		return (err);
 	}
 #endif
 	return (0);
 }
 
 static int
 carp_modevent(module_t mod, int type, void *data)
 {
 	switch (type) {
 	case MOD_LOAD:
 		return carp_mod_load();
 		/* NOTREACHED */
 	case MOD_UNLOAD:
 		mtx_lock(&carp_mtx);
 		if (LIST_EMPTY(&carp_list))
 			carp_mod_cleanup();
 		else {
 			mtx_unlock(&carp_mtx);
 			return (EBUSY);
 		}
 		break;
 
 	default:
 		return (EINVAL);
 	}
 
 	return (0);
 }
 
 static moduledata_t carp_mod = {
 	"carp",
 	carp_modevent,
 	0
 };
 
 DECLARE_MODULE(carp, carp_mod, SI_SUB_PROTO_DOMAIN, SI_ORDER_ANY);
Index: head/sys/netinet/ip_gre.c
===================================================================
--- head/sys/netinet/ip_gre.c	(revision 280970)
+++ head/sys/netinet/ip_gre.c	(revision 280971)
@@ -1,167 +1,167 @@
 /*-
  * Copyright (c) 1998 The NetBSD Foundation, Inc.
  * Copyright (c) 2014 Andrey V. Elsukov <ae@FreeBSD.org>
  * All rights reserved.
  *
  * This code is derived from software contributed to The NetBSD Foundation
  * by Heiko W.Rupp <hwr@pilhuhn.de>
  *
  * IPv6-over-GRE contributed by Gert Doering <gert@greenie.muc.de>
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
  * POSSIBILITY OF SUCH DAMAGE.
  *
  * $NetBSD: ip_gre.c,v 1.29 2003/09/05 23:02:43 itojun Exp $
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include "opt_inet.h"
 #include "opt_inet6.h"
 
 #include <sys/param.h>
 #include <sys/systm.h>
 #include <sys/mbuf.h>
 #include <sys/socket.h>
 #include <sys/socketvar.h>
 #include <sys/protosw.h>
 #include <sys/errno.h>
 #include <sys/time.h>
 #include <sys/kernel.h>
 #include <sys/lock.h>
 #include <sys/rmlock.h>
 #include <sys/sysctl.h>
 #include <net/ethernet.h>
 #include <net/if.h>
 #include <net/if_var.h>
 #include <net/vnet.h>
 
 #include <netinet/in.h>
 #include <netinet/in_var.h>
 #include <netinet/ip.h>
 #include <netinet/ip_encap.h>
 #include <netinet/ip_var.h>
 
 #ifdef INET6
 #include <netinet/ip6.h>
 #endif
 
 #include <net/if_gre.h>
 
 extern struct domain inetdomain;
 static const struct protosw in_gre_protosw = {
 	.pr_type =		SOCK_RAW,
 	.pr_domain =		&inetdomain,
 	.pr_protocol =		IPPROTO_GRE,
 	.pr_flags =		PR_ATOMIC|PR_ADDR,
 	.pr_input =		gre_input,
 	.pr_output =		rip_output,
 	.pr_ctlinput =		rip_ctlinput,
 	.pr_ctloutput =		rip_ctloutput,
 	.pr_usrreqs =		&rip_usrreqs
 };
 
 #define	GRE_TTL			30
 VNET_DEFINE(int, ip_gre_ttl) = GRE_TTL;
 #define	V_ip_gre_ttl		VNET(ip_gre_ttl)
 SYSCTL_INT(_net_inet_ip, OID_AUTO, grettl, CTLFLAG_VNET | CTLFLAG_RW,
 	&VNET_NAME(ip_gre_ttl), 0, "");
 
 static int
 in_gre_encapcheck(const struct mbuf *m, int off, int proto, void *arg)
 {
 	GRE_RLOCK_TRACKER;
 	struct gre_softc *sc;
 	struct ip *ip;
 
 	sc = (struct gre_softc *)arg;
 	if ((GRE2IFP(sc)->if_flags & IFF_UP) == 0)
 		return (0);
 
 	M_ASSERTPKTHDR(m);
 	/*
 	 * We expect that payload contains at least IPv4
 	 * or IPv6 packet.
 	 */
 	if (m->m_pkthdr.len < sizeof(struct greip) + sizeof(struct ip))
 		return (0);
 
 	GRE_RLOCK(sc);
 	if (sc->gre_family == 0)
 		goto bad;
 
 	KASSERT(sc->gre_family == AF_INET,
 	    ("wrong gre_family: %d", sc->gre_family));
 
 	ip = mtod(m, struct ip *);
 	if (sc->gre_oip.ip_src.s_addr != ip->ip_dst.s_addr ||
 	    sc->gre_oip.ip_dst.s_addr != ip->ip_src.s_addr)
 		goto bad;
 
 	GRE_RUNLOCK(sc);
 	return (32 * 2);
 bad:
 	GRE_RUNLOCK(sc);
 	return (0);
 }
 
 int
 in_gre_output(struct mbuf *m, int af, int hlen)
 {
 	struct greip *gi;
 
 	gi = mtod(m, struct greip *);
 	switch (af) {
 	case AF_INET:
 		/*
 		 * gre_transmit() has used M_PREPEND() that doesn't guarantee
 		 * m_data is contiguous more than hlen bytes. Use m_copydata()
 		 * here to avoid m_pullup().
 		 */
 		m_copydata(m, hlen + offsetof(struct ip, ip_tos),
 		    sizeof(u_char), &gi->gi_ip.ip_tos);
 		m_copydata(m, hlen + offsetof(struct ip, ip_id),
 		    sizeof(u_short), (caddr_t)&gi->gi_ip.ip_id);
 		break;
 #ifdef INET6
 	case AF_INET6:
 		gi->gi_ip.ip_tos = 0; /* XXX */
-		gi->gi_ip.ip_id = ip_newid();
+		ip_fillid(&gi->gi_ip);
 		break;
 #endif
 	}
 	gi->gi_ip.ip_ttl = V_ip_gre_ttl;
 	gi->gi_ip.ip_len = htons(m->m_pkthdr.len);
 	return (ip_output(m, NULL, NULL, IP_FORWARDING, NULL, NULL));
 }
 
 int
 in_gre_attach(struct gre_softc *sc)
 {
 
 	KASSERT(sc->gre_ecookie == NULL, ("gre_ecookie isn't NULL"));
 	sc->gre_ecookie = encap_attach_func(AF_INET, IPPROTO_GRE,
 	    in_gre_encapcheck, &in_gre_protosw, sc);
 	if (sc->gre_ecookie == NULL)
 		return (EEXIST);
 	return (0);
 }
Index: head/sys/netinet/ip_id.c
===================================================================
--- head/sys/netinet/ip_id.c	(revision 280970)
+++ head/sys/netinet/ip_id.c	(revision 280971)
@@ -1,211 +1,282 @@
 
 /*-
  * Copyright (c) 2008 Michael J. Silbersack.
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice unmodified, this list of conditions, and the following
  *    disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 /*
  * IP ID generation is a fascinating topic.
  *
  * In order to avoid ID collisions during packet reassembly, common sense
  * dictates that the period between reuse of IDs be as large as possible.
  * This leads to the classic implementation of a system-wide counter, thereby
  * ensuring that IDs repeat only once every 2^16 packets.
  *
  * Subsequent security researchers have pointed out that using a global
  * counter makes ID values predictable.  This predictability allows traffic
  * analysis, idle scanning, and even packet injection in specific cases.
  * These results suggest that IP IDs should be as random as possible.
  *
  * The "searchable queues" algorithm used in this IP ID implementation was
  * proposed by Amit Klein.  It is a compromise between the above two
  * viewpoints that has provable behavior that can be tuned to the user's
  * requirements.
  *
  * The basic concept is that we supplement a standard random number generator
  * with a queue of the last L IDs that we have handed out to ensure that all
  * IDs have a period of at least L.
  *
  * To efficiently implement this idea, we keep two data structures: a
  * circular array of IDs of size L and a bitstring of 65536 bits.
  *
  * To start, we ask the RNG for a new ID.  A quick index into the bitstring
  * is used to determine if this is a recently used value.  The process is
  * repeated until a value is returned that is not in the bitstring.
  *
  * Having found a usable ID, we remove the ID stored at the current position
  * in the queue from the bitstring and replace it with our new ID.  Our new
  * ID is then added to the bitstring and the queue pointer is incremented.
  *
  * The lower limit of 512 was chosen because there doesn't seem to be much
  * point to having a smaller value.  The upper limit of 32768 was chosen for
  * two reasons.  First, every step above 32768 decreases the entropy.  Taken
  * to an extreme, 65533 would offer 1 bit of entropy.  Second, the number of
  * attempts it takes the algorithm to find an unused ID drastically
  * increases, killing performance.  The default value of 8192 was chosen
  * because it provides a good tradeoff between randomness and non-repetition.
  *
  * With L=8192, the queue will use 16K of memory.  The bitstring always
  * uses 8K of memory.  No memory is allocated until the use of random ids is
  * enabled.
  */
 
-#include <sys/types.h>
-#include <sys/malloc.h>
 #include <sys/param.h>
-#include <sys/time.h>
-#include <sys/kernel.h>
-#include <sys/libkern.h>
+#include <sys/systm.h>
+#include <sys/counter.h>
+#include <sys/malloc.h>
 #include <sys/lock.h>
 #include <sys/mutex.h>
 #include <sys/random.h>
-#include <sys/systm.h>
+#include <sys/smp.h>
 #include <sys/sysctl.h>
 #include <sys/bitstring.h>
 
 #include <net/vnet.h>
 
 #include <netinet/in.h>
+#include <netinet/ip.h>
 #include <netinet/ip_var.h>
 
-static MALLOC_DEFINE(M_IPID, "ipid", "randomized ip id state");
+/*
+ * By default we generate IP ID only for non-atomic datagrams, as
+ * suggested by RFC6864.  We use per-CPU counter for that, or if
+ * user wants to, we can turn on random ID generation.
+ */
+static VNET_DEFINE(int, ip_rfc6864) = 1;
+static VNET_DEFINE(int, ip_do_randomid) = 0;
+#define	V_ip_rfc6864		VNET(ip_rfc6864)
+#define	V_ip_do_randomid	VNET(ip_do_randomid)
 
+/*
+ * Random ID state engine.
+ */
+static MALLOC_DEFINE(M_IPID, "ipid", "randomized ip id state");
 static VNET_DEFINE(uint16_t *, id_array);
 static VNET_DEFINE(bitstr_t *, id_bits);
 static VNET_DEFINE(int, array_ptr);
 static VNET_DEFINE(int, array_size);
 static VNET_DEFINE(int, random_id_collisions);
 static VNET_DEFINE(int, random_id_total);
 static VNET_DEFINE(struct mtx, ip_id_mtx);
 #define	V_id_array	VNET(id_array)
 #define	V_id_bits	VNET(id_bits)
 #define	V_array_ptr	VNET(array_ptr)
 #define	V_array_size	VNET(array_size)
 #define	V_random_id_collisions	VNET(random_id_collisions)
 #define	V_random_id_total	VNET(random_id_total)
 #define	V_ip_id_mtx	VNET(ip_id_mtx)
 
-static void	ip_initid(int);
+/*
+ * Non-random ID state engine is simply a per-cpu counter.
+ */
+static VNET_DEFINE(counter_u64_t, ip_id);
+#define	V_ip_id		VNET(ip_id)
+
+static int	sysctl_ip_randomid(SYSCTL_HANDLER_ARGS);
 static int	sysctl_ip_id_change(SYSCTL_HANDLER_ARGS);
+static void	ip_initid(int);
+static uint16_t ip_randomid(void);
 static void	ipid_sysinit(void);
 static void	ipid_sysuninit(void);
 
 SYSCTL_DECL(_net_inet_ip);
+SYSCTL_PROC(_net_inet_ip, OID_AUTO, random_id,
+    CTLTYPE_INT | CTLFLAG_VNET | CTLFLAG_RW,
+    &VNET_NAME(ip_do_randomid), 0, sysctl_ip_randomid, "IU",
+    "Assign random ip_id values");
+SYSCTL_INT(_net_inet_ip, OID_AUTO, rfc6864, CTLFLAG_VNET | CTLFLAG_RW,
+    &VNET_NAME(ip_rfc6864), 0,
+    "Use constant IP ID for atomic datagrams");
 SYSCTL_PROC(_net_inet_ip, OID_AUTO, random_id_period,
     CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_VNET,
     &VNET_NAME(array_size), 0, sysctl_ip_id_change, "IU", "IP ID Array size");
 SYSCTL_INT(_net_inet_ip, OID_AUTO, random_id_collisions,
     CTLFLAG_RD | CTLFLAG_VNET,
     &VNET_NAME(random_id_collisions), 0, "Count of IP ID collisions");
 SYSCTL_INT(_net_inet_ip, OID_AUTO, random_id_total, CTLFLAG_RD | CTLFLAG_VNET,
     &VNET_NAME(random_id_total), 0, "Count of IP IDs created");
 
 static int
+sysctl_ip_randomid(SYSCTL_HANDLER_ARGS)
+{
+	int error, new;
+
+	new = V_ip_do_randomid;
+	error = sysctl_handle_int(oidp, &new, 0, req);
+	if (error || req->newptr == NULL)
+		return (error);
+	if (new != 0 && new != 1)
+		return (EINVAL);
+	if (new == V_ip_do_randomid)
+		return (0);
+	if (new == 1 && V_ip_do_randomid == 0)
+		ip_initid(8192);
+	/* We don't free memory when turning random ID off, due to race. */
+	V_ip_do_randomid = new;
+	return (0);
+}
+
+static int
 sysctl_ip_id_change(SYSCTL_HANDLER_ARGS)
 {
 	int error, new;
 
 	new = V_array_size;
 	error = sysctl_handle_int(oidp, &new, 0, req);
 	if (error == 0 && req->newptr) {
 		if (new >= 512 && new <= 32768)
 			ip_initid(new);
 		else
 			error = EINVAL;
 	}
 	return (error);
 }
 
 static void
 ip_initid(int new_size)
 {
 	uint16_t *new_array;
 	bitstr_t *new_bits;
 
 	new_array = malloc(new_size * sizeof(uint16_t), M_IPID,
 	    M_WAITOK | M_ZERO);
 	new_bits = malloc(bitstr_size(65536), M_IPID, M_WAITOK | M_ZERO);
 
 	mtx_lock(&V_ip_id_mtx);
 	if (V_id_array != NULL) {
 		free(V_id_array, M_IPID);
 		free(V_id_bits, M_IPID);
 	}
 	V_id_array = new_array;
 	V_id_bits = new_bits;
 	V_array_size = new_size;
 	V_array_ptr = 0;
 	V_random_id_collisions = 0;
 	V_random_id_total = 0;
 	mtx_unlock(&V_ip_id_mtx);
 }
 
-uint16_t
+static uint16_t
 ip_randomid(void)
 {
 	uint16_t new_id;
 
 	mtx_lock(&V_ip_id_mtx);
 	/*
 	 * To avoid a conflict with the zeros that the array is initially
 	 * filled with, we never hand out an id of zero.
 	 */
 	new_id = 0;
 	do {
 		if (new_id != 0)
 			V_random_id_collisions++;
 		arc4rand(&new_id, sizeof(new_id), 0);
 	} while (bit_test(V_id_bits, new_id) || new_id == 0);
 	bit_clear(V_id_bits, V_id_array[V_array_ptr]);
 	bit_set(V_id_bits, new_id);
 	V_id_array[V_array_ptr] = new_id;
 	V_array_ptr++;
 	if (V_array_ptr == V_array_size)
 		V_array_ptr = 0;
 	V_random_id_total++;
 	mtx_unlock(&V_ip_id_mtx);
 	return (new_id);
 }
 
+void
+ip_fillid(struct ip *ip)
+{
+
+	/*
+	 * Per RFC6864 Section 4
+	 *
+	 * o  Atomic datagrams: (DF==1) && (MF==0) && (frag_offset==0)
+	 * o  Non-atomic datagrams: (DF==0) || (MF==1) || (frag_offset>0)
+	 */
+	if (V_ip_rfc6864 && (ip->ip_off & htons(IP_DF)) == htons(IP_DF))
+		ip->ip_id = 0;
+	else if (V_ip_do_randomid)
+		ip->ip_id = ip_randomid();
+	else {
+		counter_u64_add(V_ip_id, 1);
+		ip->ip_id = htons((*(uint64_t *)zpcpu_get(V_ip_id)) & 0xffff);
+	}
+}
+
 static void
 ipid_sysinit(void)
 {
 
 	mtx_init(&V_ip_id_mtx, "ip_id_mtx", NULL, MTX_DEF);
-	ip_initid(8192);
+	V_ip_id = counter_u64_alloc(M_WAITOK);
+	for (int i = 0; i < mp_ncpus; i++)
+		arc4rand(zpcpu_get_cpu(V_ip_id, i), sizeof(uint64_t), 0);
 }
 VNET_SYSINIT(ip_id, SI_SUB_PROTO_DOMAIN, SI_ORDER_ANY, ipid_sysinit, NULL);
 
 static void
 ipid_sysuninit(void)
 {
 
 	mtx_destroy(&V_ip_id_mtx);
-	free(V_id_array, M_IPID);
-	free(V_id_bits, M_IPID);
+	if (V_id_array != NULL) {
+		free(V_id_array, M_IPID);
+		free(V_id_bits, M_IPID);
+	}
+	counter_u64_free(V_ip_id);
 }
 VNET_SYSUNINIT(ip_id, SI_SUB_PROTO_DOMAIN, SI_ORDER_ANY, ipid_sysuninit, NULL);
Index: head/sys/netinet/ip_input.c
===================================================================
--- head/sys/netinet/ip_input.c	(revision 280970)
+++ head/sys/netinet/ip_input.c	(revision 280971)
@@ -1,1875 +1,1868 @@
 /*-
  * Copyright (c) 1982, 1986, 1988, 1993
  *	The Regents of the University of California.  All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 4. Neither the name of the University nor the names of its contributors
  *    may be used to endorse or promote products derived from this software
  *    without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  *	@(#)ip_input.c	8.2 (Berkeley) 1/4/94
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include "opt_bootp.h"
 #include "opt_ipfw.h"
 #include "opt_ipstealth.h"
 #include "opt_ipsec.h"
 #include "opt_route.h"
 #include "opt_rss.h"
 
 #include <sys/param.h>
 #include <sys/systm.h>
 #include <sys/mbuf.h>
 #include <sys/malloc.h>
 #include <sys/domain.h>
 #include <sys/protosw.h>
 #include <sys/socket.h>
 #include <sys/time.h>
 #include <sys/kernel.h>
 #include <sys/lock.h>
 #include <sys/rwlock.h>
 #include <sys/sdt.h>
 #include <sys/syslog.h>
 #include <sys/sysctl.h>
 
 #include <net/pfil.h>
 #include <net/if.h>
 #include <net/if_types.h>
 #include <net/if_var.h>
 #include <net/if_dl.h>
 #include <net/route.h>
 #include <net/netisr.h>
 #include <net/rss_config.h>
 #include <net/vnet.h>
 
 #include <netinet/in.h>
 #include <netinet/in_kdtrace.h>
 #include <netinet/in_systm.h>
 #include <netinet/in_var.h>
 #include <netinet/ip.h>
 #include <netinet/in_pcb.h>
 #include <netinet/ip_var.h>
 #include <netinet/ip_fw.h>
 #include <netinet/ip_icmp.h>
 #include <netinet/ip_options.h>
 #include <machine/in_cksum.h>
 #include <netinet/ip_carp.h>
 #ifdef IPSEC
 #include <netinet/ip_ipsec.h>
 #endif /* IPSEC */
 #include <netinet/in_rss.h>
 
 #include <sys/socketvar.h>
 
 #include <security/mac/mac_framework.h>
 
 #ifdef CTASSERT
 CTASSERT(sizeof(struct ip) == 20);
 #endif
 
 struct	rwlock in_ifaddr_lock;
 RW_SYSINIT(in_ifaddr_lock, &in_ifaddr_lock, "in_ifaddr_lock");
 
 VNET_DEFINE(int, rsvp_on);
 
 VNET_DEFINE(int, ipforwarding);
 SYSCTL_INT(_net_inet_ip, IPCTL_FORWARDING, forwarding, CTLFLAG_VNET | CTLFLAG_RW,
     &VNET_NAME(ipforwarding), 0,
     "Enable IP forwarding between interfaces");
 
 static VNET_DEFINE(int, ipsendredirects) = 1;	/* XXX */
 #define	V_ipsendredirects	VNET(ipsendredirects)
 SYSCTL_INT(_net_inet_ip, IPCTL_SENDREDIRECTS, redirect, CTLFLAG_VNET | CTLFLAG_RW,
     &VNET_NAME(ipsendredirects), 0,
     "Enable sending IP redirects");
 
-VNET_DEFINE(int, ip_do_randomid);
-SYSCTL_INT(_net_inet_ip, OID_AUTO, random_id, CTLFLAG_VNET | CTLFLAG_RW,
-    &VNET_NAME(ip_do_randomid), 0,
-    "Assign random ip_id values");
-
 /*
  * XXX - Setting ip_checkinterface mostly implements the receive side of
  * the Strong ES model described in RFC 1122, but since the routing table
  * and transmit implementation do not implement the Strong ES model,
  * setting this to 1 results in an odd hybrid.
  *
  * XXX - ip_checkinterface currently must be disabled if you use ipnat
  * to translate the destination address to another local interface.
  *
  * XXX - ip_checkinterface must be disabled if you add IP aliases
  * to the loopback interface instead of the interface where the
  * packets for those addresses are received.
  */
 static VNET_DEFINE(int, ip_checkinterface);
 #define	V_ip_checkinterface	VNET(ip_checkinterface)
 SYSCTL_INT(_net_inet_ip, OID_AUTO, check_interface, CTLFLAG_VNET | CTLFLAG_RW,
     &VNET_NAME(ip_checkinterface), 0,
     "Verify packet arrives on correct interface");
 
 VNET_DEFINE(struct pfil_head, inet_pfil_hook);	/* Packet filter hooks */
 
 static struct netisr_handler ip_nh = {
 	.nh_name = "ip",
 	.nh_handler = ip_input,
 	.nh_proto = NETISR_IP,
 #ifdef	RSS
 	.nh_m2cpuid = rss_soft_m2cpuid,
 	.nh_policy = NETISR_POLICY_CPU,
 	.nh_dispatch = NETISR_DISPATCH_HYBRID,
 #else
 	.nh_policy = NETISR_POLICY_FLOW,
 #endif
 };
 
 #ifdef	RSS
 /*
  * Directly dispatched frames are currently assumed
  * to have a flowid already calculated.
  *
  * It should likely have something that assert it
  * actually has valid flow details.
  */
 static struct netisr_handler ip_direct_nh = {
 	.nh_name = "ip_direct",
 	.nh_handler = ip_direct_input,
 	.nh_proto = NETISR_IP_DIRECT,
 	.nh_m2cpuid = rss_m2cpuid,
 	.nh_policy = NETISR_POLICY_CPU,
 	.nh_dispatch = NETISR_DISPATCH_HYBRID,
 };
 #endif
 
 extern	struct domain inetdomain;
 extern	struct protosw inetsw[];
 u_char	ip_protox[IPPROTO_MAX];
 VNET_DEFINE(struct in_ifaddrhead, in_ifaddrhead);  /* first inet address */
 VNET_DEFINE(struct in_ifaddrhashhead *, in_ifaddrhashtbl); /* inet addr hash table  */
 VNET_DEFINE(u_long, in_ifaddrhmask);		/* mask for hash table */
 
 static VNET_DEFINE(uma_zone_t, ipq_zone);
 static VNET_DEFINE(TAILQ_HEAD(ipqhead, ipq), ipq[IPREASS_NHASH]);
 static struct mtx ipqlock;
 
 #define	V_ipq_zone		VNET(ipq_zone)
 #define	V_ipq			VNET(ipq)
 
 #define	IPQ_LOCK()	mtx_lock(&ipqlock)
 #define	IPQ_UNLOCK()	mtx_unlock(&ipqlock)
 #define	IPQ_LOCK_INIT()	mtx_init(&ipqlock, "ipqlock", NULL, MTX_DEF)
 #define	IPQ_LOCK_ASSERT()	mtx_assert(&ipqlock, MA_OWNED)
 
 static void	maxnipq_update(void);
 static void	ipq_zone_change(void *);
 static void	ip_drain_locked(void);
 
 static VNET_DEFINE(int, maxnipq);  /* Administrative limit on # reass queues. */
 static VNET_DEFINE(int, nipq);			/* Total # of reass queues */
 #define	V_maxnipq		VNET(maxnipq)
 #define	V_nipq			VNET(nipq)
 SYSCTL_INT(_net_inet_ip, OID_AUTO, fragpackets, CTLFLAG_VNET | CTLFLAG_RD,
     &VNET_NAME(nipq), 0,
     "Current number of IPv4 fragment reassembly queue entries");
 
 static VNET_DEFINE(int, maxfragsperpacket);
 #define	V_maxfragsperpacket	VNET(maxfragsperpacket)
 SYSCTL_INT(_net_inet_ip, OID_AUTO, maxfragsperpacket, CTLFLAG_VNET | CTLFLAG_RW,
     &VNET_NAME(maxfragsperpacket), 0,
     "Maximum number of IPv4 fragments allowed per packet");
 
 #ifdef IPCTL_DEFMTU
 SYSCTL_INT(_net_inet_ip, IPCTL_DEFMTU, mtu, CTLFLAG_RW,
     &ip_mtu, 0, "Default MTU");
 #endif
 
 #ifdef IPSTEALTH
 VNET_DEFINE(int, ipstealth);
 SYSCTL_INT(_net_inet_ip, OID_AUTO, stealth, CTLFLAG_VNET | CTLFLAG_RW,
     &VNET_NAME(ipstealth), 0,
     "IP stealth mode, no TTL decrementation on forwarding");
 #endif
 
 static void	ip_freef(struct ipqhead *, struct ipq *);
 
 /*
  * IP statistics are stored in the "array" of counter(9)s.
  */
 VNET_PCPUSTAT_DEFINE(struct ipstat, ipstat);
 VNET_PCPUSTAT_SYSINIT(ipstat);
 SYSCTL_VNET_PCPUSTAT(_net_inet_ip, IPCTL_STATS, stats, struct ipstat, ipstat,
     "IP statistics (struct ipstat, netinet/ip_var.h)");
 
 #ifdef VIMAGE
 VNET_PCPUSTAT_SYSUNINIT(ipstat);
 #endif /* VIMAGE */
 
 /*
  * Kernel module interface for updating ipstat.  The argument is an index
  * into ipstat treated as an array.
  */
 void
 kmod_ipstat_inc(int statnum)
 {
 
 	counter_u64_add(VNET(ipstat)[statnum], 1);
 }
 
 void
 kmod_ipstat_dec(int statnum)
 {
 
 	counter_u64_add(VNET(ipstat)[statnum], -1);
 }
 
 static int
 sysctl_netinet_intr_queue_maxlen(SYSCTL_HANDLER_ARGS)
 {
 	int error, qlimit;
 
 	netisr_getqlimit(&ip_nh, &qlimit);
 	error = sysctl_handle_int(oidp, &qlimit, 0, req);
 	if (error || !req->newptr)
 		return (error);
 	if (qlimit < 1)
 		return (EINVAL);
 	return (netisr_setqlimit(&ip_nh, qlimit));
 }
 SYSCTL_PROC(_net_inet_ip, IPCTL_INTRQMAXLEN, intr_queue_maxlen,
     CTLTYPE_INT|CTLFLAG_RW, 0, 0, sysctl_netinet_intr_queue_maxlen, "I",
     "Maximum size of the IP input queue");
 
 static int
 sysctl_netinet_intr_queue_drops(SYSCTL_HANDLER_ARGS)
 {
 	u_int64_t qdrops_long;
 	int error, qdrops;
 
 	netisr_getqdrops(&ip_nh, &qdrops_long);
 	qdrops = qdrops_long;
 	error = sysctl_handle_int(oidp, &qdrops, 0, req);
 	if (error || !req->newptr)
 		return (error);
 	if (qdrops != 0)
 		return (EINVAL);
 	netisr_clearqdrops(&ip_nh);
 	return (0);
 }
 
 SYSCTL_PROC(_net_inet_ip, IPCTL_INTRQDROPS, intr_queue_drops,
     CTLTYPE_INT|CTLFLAG_RD, 0, 0, sysctl_netinet_intr_queue_drops, "I",
     "Number of packets dropped from the IP input queue");
 
 #ifdef	RSS
 static int
 sysctl_netinet_intr_direct_queue_maxlen(SYSCTL_HANDLER_ARGS)
 {
 	int error, qlimit;
 
 	netisr_getqlimit(&ip_direct_nh, &qlimit);
 	error = sysctl_handle_int(oidp, &qlimit, 0, req);
 	if (error || !req->newptr)
 		return (error);
 	if (qlimit < 1)
 		return (EINVAL);
 	return (netisr_setqlimit(&ip_direct_nh, qlimit));
 }
 SYSCTL_PROC(_net_inet_ip, IPCTL_INTRQMAXLEN, intr_direct_queue_maxlen,
     CTLTYPE_INT|CTLFLAG_RW, 0, 0, sysctl_netinet_intr_direct_queue_maxlen, "I",
     "Maximum size of the IP direct input queue");
 
 static int
 sysctl_netinet_intr_direct_queue_drops(SYSCTL_HANDLER_ARGS)
 {
 	u_int64_t qdrops_long;
 	int error, qdrops;
 
 	netisr_getqdrops(&ip_direct_nh, &qdrops_long);
 	qdrops = qdrops_long;
 	error = sysctl_handle_int(oidp, &qdrops, 0, req);
 	if (error || !req->newptr)
 		return (error);
 	if (qdrops != 0)
 		return (EINVAL);
 	netisr_clearqdrops(&ip_direct_nh);
 	return (0);
 }
 
 SYSCTL_PROC(_net_inet_ip, IPCTL_INTRQDROPS, intr_direct_queue_drops,
     CTLTYPE_INT|CTLFLAG_RD, 0, 0, sysctl_netinet_intr_direct_queue_drops, "I",
     "Number of packets dropped from the IP direct input queue");
 #endif	/* RSS */
 
 /*
  * IP initialization: fill in IP protocol switch table.
  * All protocols not implemented in kernel go to raw IP protocol handler.
  */
 void
 ip_init(void)
 {
 	struct protosw *pr;
 	int i;
-
-	V_ip_id = time_second & 0xffff;
 
 	TAILQ_INIT(&V_in_ifaddrhead);
 	V_in_ifaddrhashtbl = hashinit(INADDR_NHASH, M_IFADDR, &V_in_ifaddrhmask);
 
 	/* Initialize IP reassembly queue. */
 	for (i = 0; i < IPREASS_NHASH; i++)
 		TAILQ_INIT(&V_ipq[i]);
 	V_maxnipq = nmbclusters / 32;
 	V_maxfragsperpacket = 16;
 	V_ipq_zone = uma_zcreate("ipq", sizeof(struct ipq), NULL, NULL, NULL,
 	    NULL, UMA_ALIGN_PTR, 0);
 	maxnipq_update();
 
 	/* Initialize packet filter hooks. */
 	V_inet_pfil_hook.ph_type = PFIL_TYPE_AF;
 	V_inet_pfil_hook.ph_af = AF_INET;
 	if ((i = pfil_head_register(&V_inet_pfil_hook)) != 0)
 		printf("%s: WARNING: unable to register pfil hook, "
 			"error %d\n", __func__, i);
 
 	/* Skip initialization of globals for non-default instances. */
 	if (!IS_DEFAULT_VNET(curvnet))
 		return;
 
 	pr = pffindproto(PF_INET, IPPROTO_RAW, SOCK_RAW);
 	if (pr == NULL)
 		panic("ip_init: PF_INET not found");
 
 	/* Initialize the entire ip_protox[] array to IPPROTO_RAW. */
 	for (i = 0; i < IPPROTO_MAX; i++)
 		ip_protox[i] = pr - inetsw;
 	/*
 	 * Cycle through IP protocols and put them into the appropriate place
 	 * in ip_protox[].
 	 */
 	for (pr = inetdomain.dom_protosw;
 	    pr < inetdomain.dom_protoswNPROTOSW; pr++)
 		if (pr->pr_domain->dom_family == PF_INET &&
 		    pr->pr_protocol && pr->pr_protocol != IPPROTO_RAW) {
 			/* Be careful to only index valid IP protocols. */
 			if (pr->pr_protocol < IPPROTO_MAX)
 				ip_protox[pr->pr_protocol] = pr - inetsw;
 		}
 
 	EVENTHANDLER_REGISTER(nmbclusters_change, ipq_zone_change,
 		NULL, EVENTHANDLER_PRI_ANY);
 
 	/* Initialize various other remaining things. */
 	IPQ_LOCK_INIT();
 	netisr_register(&ip_nh);
 #ifdef	RSS
 	netisr_register(&ip_direct_nh);
 #endif
 }
 
 #ifdef VIMAGE
 void
 ip_destroy(void)
 {
 	int i;
 
 	if ((i = pfil_head_unregister(&V_inet_pfil_hook)) != 0)
 		printf("%s: WARNING: unable to unregister pfil hook, "
 		    "error %d\n", __func__, i);
 
 	/* Cleanup in_ifaddr hash table; should be empty. */
 	hashdestroy(V_in_ifaddrhashtbl, M_IFADDR, V_in_ifaddrhmask);
 
 	IPQ_LOCK();
 	ip_drain_locked();
 	IPQ_UNLOCK();
 
 	uma_zdestroy(V_ipq_zone);
 }
 #endif
 
 #ifdef	RSS
 /*
  * IP direct input routine.
  *
  * This is called when reinjecting completed fragments where
  * all of the previous checking and book-keeping has been done.
  */
 void
 ip_direct_input(struct mbuf *m)
 {
 	struct ip *ip;
 	int hlen;
 
 	ip = mtod(m, struct ip *);
 	hlen = ip->ip_hl << 2;
 
 	IPSTAT_INC(ips_delivered);
 	(*inetsw[ip_protox[ip->ip_p]].pr_input)(&m, &hlen, ip->ip_p);
 	return;
 }
 #endif
 
 /*
  * Ip input routine.  Checksum and byte swap header.  If fragmented
  * try to reassemble.  Process options.  Pass to next level.
  */
 void
 ip_input(struct mbuf *m)
 {
 	struct ip *ip = NULL;
 	struct in_ifaddr *ia = NULL;
 	struct ifaddr *ifa;
 	struct ifnet *ifp;
 	int    checkif, hlen = 0;
 	uint16_t sum, ip_len;
 	int dchg = 0;				/* dest changed after fw */
 	struct in_addr odst;			/* original dst address */
 
 	M_ASSERTPKTHDR(m);
 
 	if (m->m_flags & M_FASTFWD_OURS) {
 		m->m_flags &= ~M_FASTFWD_OURS;
 		/* Set up some basics that will be used later. */
 		ip = mtod(m, struct ip *);
 		hlen = ip->ip_hl << 2;
 		ip_len = ntohs(ip->ip_len);
 		goto ours;
 	}
 
 	IPSTAT_INC(ips_total);
 
 	if (m->m_pkthdr.len < sizeof(struct ip))
 		goto tooshort;
 
 	if (m->m_len < sizeof (struct ip) &&
 	    (m = m_pullup(m, sizeof (struct ip))) == NULL) {
 		IPSTAT_INC(ips_toosmall);
 		return;
 	}
 	ip = mtod(m, struct ip *);
 
 	if (ip->ip_v != IPVERSION) {
 		IPSTAT_INC(ips_badvers);
 		goto bad;
 	}
 
 	hlen = ip->ip_hl << 2;
 	if (hlen < sizeof(struct ip)) {	/* minimum header length */
 		IPSTAT_INC(ips_badhlen);
 		goto bad;
 	}
 	if (hlen > m->m_len) {
 		if ((m = m_pullup(m, hlen)) == NULL) {
 			IPSTAT_INC(ips_badhlen);
 			return;
 		}
 		ip = mtod(m, struct ip *);
 	}
 
 	IP_PROBE(receive, NULL, NULL, ip, m->m_pkthdr.rcvif, ip, NULL);
 
 	/* 127/8 must not appear on wire - RFC1122 */
 	ifp = m->m_pkthdr.rcvif;
 	if ((ntohl(ip->ip_dst.s_addr) >> IN_CLASSA_NSHIFT) == IN_LOOPBACKNET ||
 	    (ntohl(ip->ip_src.s_addr) >> IN_CLASSA_NSHIFT) == IN_LOOPBACKNET) {
 		if ((ifp->if_flags & IFF_LOOPBACK) == 0) {
 			IPSTAT_INC(ips_badaddr);
 			goto bad;
 		}
 	}
 
 	if (m->m_pkthdr.csum_flags & CSUM_IP_CHECKED) {
 		sum = !(m->m_pkthdr.csum_flags & CSUM_IP_VALID);
 	} else {
 		if (hlen == sizeof(struct ip)) {
 			sum = in_cksum_hdr(ip);
 		} else {
 			sum = in_cksum(m, hlen);
 		}
 	}
 	if (sum) {
 		IPSTAT_INC(ips_badsum);
 		goto bad;
 	}
 
 #ifdef ALTQ
 	if (altq_input != NULL && (*altq_input)(m, AF_INET) == 0)
 		/* packet is dropped by traffic conditioner */
 		return;
 #endif
 
 	ip_len = ntohs(ip->ip_len);
 	if (ip_len < hlen) {
 		IPSTAT_INC(ips_badlen);
 		goto bad;
 	}
 
 	/*
 	 * Check that the amount of data in the buffers
 	 * is as at least much as the IP header would have us expect.
 	 * Trim mbufs if longer than we expect.
 	 * Drop packet if shorter than we expect.
 	 */
 	if (m->m_pkthdr.len < ip_len) {
 tooshort:
 		IPSTAT_INC(ips_tooshort);
 		goto bad;
 	}
 	if (m->m_pkthdr.len > ip_len) {
 		if (m->m_len == m->m_pkthdr.len) {
 			m->m_len = ip_len;
 			m->m_pkthdr.len = ip_len;
 		} else
 			m_adj(m, ip_len - m->m_pkthdr.len);
 	}
 
 #ifdef IPSEC
 	/*
 	 * Bypass packet filtering for packets previously handled by IPsec.
 	 */
 	if (ip_ipsec_filtertunnel(m))
 		goto passin;
 #endif /* IPSEC */
 
 	/*
 	 * Run through list of hooks for input packets.
 	 *
 	 * NB: Beware of the destination address changing (e.g.
 	 *     by NAT rewriting).  When this happens, tell
 	 *     ip_forward to do the right thing.
 	 */
 
 	/* Jump over all PFIL processing if hooks are not active. */
 	if (!PFIL_HOOKED(&V_inet_pfil_hook))
 		goto passin;
 
 	odst = ip->ip_dst;
 	if (pfil_run_hooks(&V_inet_pfil_hook, &m, ifp, PFIL_IN, NULL) != 0)
 		return;
 	if (m == NULL)			/* consumed by filter */
 		return;
 
 	ip = mtod(m, struct ip *);
 	dchg = (odst.s_addr != ip->ip_dst.s_addr);
 	ifp = m->m_pkthdr.rcvif;
 
 	if (m->m_flags & M_FASTFWD_OURS) {
 		m->m_flags &= ~M_FASTFWD_OURS;
 		goto ours;
 	}
 	if (m->m_flags & M_IP_NEXTHOP) {
 		dchg = (m_tag_find(m, PACKET_TAG_IPFORWARD, NULL) != NULL);
 		if (dchg != 0) {
 			/*
 			 * Directly ship the packet on.  This allows
 			 * forwarding packets originally destined to us
 			 * to some other directly connected host.
 			 */
 			ip_forward(m, 1);
 			return;
 		}
 	}
 passin:
 
 	/*
 	 * Process options and, if not destined for us,
 	 * ship it on.  ip_dooptions returns 1 when an
 	 * error was detected (causing an icmp message
 	 * to be sent and the original packet to be freed).
 	 */
 	if (hlen > sizeof (struct ip) && ip_dooptions(m, 0))
 		return;
 
         /* greedy RSVP, snatches any PATH packet of the RSVP protocol and no
          * matter if it is destined to another node, or whether it is 
          * a multicast one, RSVP wants it! and prevents it from being forwarded
          * anywhere else. Also checks if the rsvp daemon is running before
 	 * grabbing the packet.
          */
 	if (V_rsvp_on && ip->ip_p==IPPROTO_RSVP) 
 		goto ours;
 
 	/*
 	 * Check our list of addresses, to see if the packet is for us.
 	 * If we don't have any addresses, assume any unicast packet
 	 * we receive might be for us (and let the upper layers deal
 	 * with it).
 	 */
 	if (TAILQ_EMPTY(&V_in_ifaddrhead) &&
 	    (m->m_flags & (M_MCAST|M_BCAST)) == 0)
 		goto ours;
 
 	/*
 	 * Enable a consistency check between the destination address
 	 * and the arrival interface for a unicast packet (the RFC 1122
 	 * strong ES model) if IP forwarding is disabled and the packet
 	 * is not locally generated and the packet is not subject to
 	 * 'ipfw fwd'.
 	 *
 	 * XXX - Checking also should be disabled if the destination
 	 * address is ipnat'ed to a different interface.
 	 *
 	 * XXX - Checking is incompatible with IP aliases added
 	 * to the loopback interface instead of the interface where
 	 * the packets are received.
 	 *
 	 * XXX - This is the case for carp vhost IPs as well so we
 	 * insert a workaround. If the packet got here, we already
 	 * checked with carp_iamatch() and carp_forus().
 	 */
 	checkif = V_ip_checkinterface && (V_ipforwarding == 0) && 
 	    ifp != NULL && ((ifp->if_flags & IFF_LOOPBACK) == 0) &&
 	    ifp->if_carp == NULL && (dchg == 0);
 
 	/*
 	 * Check for exact addresses in the hash bucket.
 	 */
 	/* IN_IFADDR_RLOCK(); */
 	LIST_FOREACH(ia, INADDR_HASH(ip->ip_dst.s_addr), ia_hash) {
 		/*
 		 * If the address matches, verify that the packet
 		 * arrived via the correct interface if checking is
 		 * enabled.
 		 */
 		if (IA_SIN(ia)->sin_addr.s_addr == ip->ip_dst.s_addr && 
 		    (!checkif || ia->ia_ifp == ifp)) {
 			counter_u64_add(ia->ia_ifa.ifa_ipackets, 1);
 			counter_u64_add(ia->ia_ifa.ifa_ibytes,
 			    m->m_pkthdr.len);
 			/* IN_IFADDR_RUNLOCK(); */
 			goto ours;
 		}
 	}
 	/* IN_IFADDR_RUNLOCK(); */
 
 	/*
 	 * Check for broadcast addresses.
 	 *
 	 * Only accept broadcast packets that arrive via the matching
 	 * interface.  Reception of forwarded directed broadcasts would
 	 * be handled via ip_forward() and ether_output() with the loopback
 	 * into the stack for SIMPLEX interfaces handled by ether_output().
 	 */
 	if (ifp != NULL && ifp->if_flags & IFF_BROADCAST) {
 		IF_ADDR_RLOCK(ifp);
 	        TAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) {
 			if (ifa->ifa_addr->sa_family != AF_INET)
 				continue;
 			ia = ifatoia(ifa);
 			if (satosin(&ia->ia_broadaddr)->sin_addr.s_addr ==
 			    ip->ip_dst.s_addr) {
 				counter_u64_add(ia->ia_ifa.ifa_ipackets, 1);
 				counter_u64_add(ia->ia_ifa.ifa_ibytes,
 				    m->m_pkthdr.len);
 				IF_ADDR_RUNLOCK(ifp);
 				goto ours;
 			}
 #ifdef BOOTP_COMPAT
 			if (IA_SIN(ia)->sin_addr.s_addr == INADDR_ANY) {
 				counter_u64_add(ia->ia_ifa.ifa_ipackets, 1);
 				counter_u64_add(ia->ia_ifa.ifa_ibytes,
 				    m->m_pkthdr.len);
 				IF_ADDR_RUNLOCK(ifp);
 				goto ours;
 			}
 #endif
 		}
 		IF_ADDR_RUNLOCK(ifp);
 		ia = NULL;
 	}
 	/* RFC 3927 2.7: Do not forward datagrams for 169.254.0.0/16. */
 	if (IN_LINKLOCAL(ntohl(ip->ip_dst.s_addr))) {
 		IPSTAT_INC(ips_cantforward);
 		m_freem(m);
 		return;
 	}
 	if (IN_MULTICAST(ntohl(ip->ip_dst.s_addr))) {
 		if (V_ip_mrouter) {
 			/*
 			 * If we are acting as a multicast router, all
 			 * incoming multicast packets are passed to the
 			 * kernel-level multicast forwarding function.
 			 * The packet is returned (relatively) intact; if
 			 * ip_mforward() returns a non-zero value, the packet
 			 * must be discarded, else it may be accepted below.
 			 */
 			if (ip_mforward && ip_mforward(ip, ifp, m, 0) != 0) {
 				IPSTAT_INC(ips_cantforward);
 				m_freem(m);
 				return;
 			}
 
 			/*
 			 * The process-level routing daemon needs to receive
 			 * all multicast IGMP packets, whether or not this
 			 * host belongs to their destination groups.
 			 */
 			if (ip->ip_p == IPPROTO_IGMP)
 				goto ours;
 			IPSTAT_INC(ips_forward);
 		}
 		/*
 		 * Assume the packet is for us, to avoid prematurely taking
 		 * a lock on the in_multi hash. Protocols must perform
 		 * their own filtering and update statistics accordingly.
 		 */
 		goto ours;
 	}
 	if (ip->ip_dst.s_addr == (u_long)INADDR_BROADCAST)
 		goto ours;
 	if (ip->ip_dst.s_addr == INADDR_ANY)
 		goto ours;
 
 	/*
 	 * Not for us; forward if possible and desirable.
 	 */
 	if (V_ipforwarding == 0) {
 		IPSTAT_INC(ips_cantforward);
 		m_freem(m);
 	} else {
 		ip_forward(m, dchg);
 	}
 	return;
 
 ours:
 #ifdef IPSTEALTH
 	/*
 	 * IPSTEALTH: Process non-routing options only
 	 * if the packet is destined for us.
 	 */
 	if (V_ipstealth && hlen > sizeof (struct ip) && ip_dooptions(m, 1))
 		return;
 #endif /* IPSTEALTH */
 
 	/*
 	 * Attempt reassembly; if it succeeds, proceed.
 	 * ip_reass() will return a different mbuf.
 	 */
 	if (ip->ip_off & htons(IP_MF | IP_OFFMASK)) {
 		/* XXXGL: shouldn't we save & set m_flags? */
 		m = ip_reass(m);
 		if (m == NULL)
 			return;
 		ip = mtod(m, struct ip *);
 		/* Get the header length of the reassembled packet */
 		hlen = ip->ip_hl << 2;
 	}
 
 #ifdef IPSEC
 	/*
 	 * enforce IPsec policy checking if we are seeing last header.
 	 * note that we do not visit this with protocols with pcb layer
 	 * code - like udp/tcp/raw ip.
 	 */
 	if (ip_ipsec_input(m, ip->ip_p) != 0)
 		goto bad;
 #endif /* IPSEC */
 
 	/*
 	 * Switch out to protocol's input routine.
 	 */
 	IPSTAT_INC(ips_delivered);
 
 	(*inetsw[ip_protox[ip->ip_p]].pr_input)(&m, &hlen, ip->ip_p);
 	return;
 bad:
 	m_freem(m);
 }
 
 /*
  * After maxnipq has been updated, propagate the change to UMA.  The UMA zone
  * max has slightly different semantics than the sysctl, for historical
  * reasons.
  */
 static void
 maxnipq_update(void)
 {
 
 	/*
 	 * -1 for unlimited allocation.
 	 */
 	if (V_maxnipq < 0)
 		uma_zone_set_max(V_ipq_zone, 0);
 	/*
 	 * Positive number for specific bound.
 	 */
 	if (V_maxnipq > 0)
 		uma_zone_set_max(V_ipq_zone, V_maxnipq);
 	/*
 	 * Zero specifies no further fragment queue allocation -- set the
 	 * bound very low, but rely on implementation elsewhere to actually
 	 * prevent allocation and reclaim current queues.
 	 */
 	if (V_maxnipq == 0)
 		uma_zone_set_max(V_ipq_zone, 1);
 }
 
 static void
 ipq_zone_change(void *tag)
 {
 
 	if (V_maxnipq > 0 && V_maxnipq < (nmbclusters / 32)) {
 		V_maxnipq = nmbclusters / 32;
 		maxnipq_update();
 	}
 }
 
 static int
 sysctl_maxnipq(SYSCTL_HANDLER_ARGS)
 {
 	int error, i;
 
 	i = V_maxnipq;
 	error = sysctl_handle_int(oidp, &i, 0, req);
 	if (error || !req->newptr)
 		return (error);
 
 	/*
 	 * XXXRW: Might be a good idea to sanity check the argument and place
 	 * an extreme upper bound.
 	 */
 	if (i < -1)
 		return (EINVAL);
 	V_maxnipq = i;
 	maxnipq_update();
 	return (0);
 }
 
 SYSCTL_PROC(_net_inet_ip, OID_AUTO, maxfragpackets, CTLTYPE_INT|CTLFLAG_RW,
     NULL, 0, sysctl_maxnipq, "I",
     "Maximum number of IPv4 fragment reassembly queue entries");
 
 #define	M_IP_FRAG	M_PROTO9
 
 /*
  * Take incoming datagram fragment and try to reassemble it into
  * whole datagram.  If the argument is the first fragment or one
  * in between the function will return NULL and store the mbuf
  * in the fragment chain.  If the argument is the last fragment
  * the packet will be reassembled and the pointer to the new
  * mbuf returned for further processing.  Only m_tags attached
  * to the first packet/fragment are preserved.
  * The IP header is *NOT* adjusted out of iplen.
  */
 struct mbuf *
 ip_reass(struct mbuf *m)
 {
 	struct ip *ip;
 	struct mbuf *p, *q, *nq, *t;
 	struct ipq *fp = NULL;
 	struct ipqhead *head;
 	int i, hlen, next;
 	u_int8_t ecn, ecn0;
 	u_short hash;
 #ifdef	RSS
 	uint32_t rss_hash, rss_type;
 #endif
 
 	/* If maxnipq or maxfragsperpacket are 0, never accept fragments. */
 	if (V_maxnipq == 0 || V_maxfragsperpacket == 0) {
 		IPSTAT_INC(ips_fragments);
 		IPSTAT_INC(ips_fragdropped);
 		m_freem(m);
 		return (NULL);
 	}
 
 	ip = mtod(m, struct ip *);
 	hlen = ip->ip_hl << 2;
 
 	hash = IPREASS_HASH(ip->ip_src.s_addr, ip->ip_id);
 	head = &V_ipq[hash];
 	IPQ_LOCK();
 
 	/*
 	 * Look for queue of fragments
 	 * of this datagram.
 	 */
 	TAILQ_FOREACH(fp, head, ipq_list)
 		if (ip->ip_id == fp->ipq_id &&
 		    ip->ip_src.s_addr == fp->ipq_src.s_addr &&
 		    ip->ip_dst.s_addr == fp->ipq_dst.s_addr &&
 #ifdef MAC
 		    mac_ipq_match(m, fp) &&
 #endif
 		    ip->ip_p == fp->ipq_p)
 			goto found;
 
 	fp = NULL;
 
 	/*
 	 * Attempt to trim the number of allocated fragment queues if it
 	 * exceeds the administrative limit.
 	 */
 	if ((V_nipq > V_maxnipq) && (V_maxnipq > 0)) {
 		/*
 		 * drop something from the tail of the current queue
 		 * before proceeding further
 		 */
 		struct ipq *q = TAILQ_LAST(head, ipqhead);
 		if (q == NULL) {   /* gak */
 			for (i = 0; i < IPREASS_NHASH; i++) {
 				struct ipq *r = TAILQ_LAST(&V_ipq[i], ipqhead);
 				if (r) {
 					IPSTAT_ADD(ips_fragtimeout,
 					    r->ipq_nfrags);
 					ip_freef(&V_ipq[i], r);
 					break;
 				}
 			}
 		} else {
 			IPSTAT_ADD(ips_fragtimeout, q->ipq_nfrags);
 			ip_freef(head, q);
 		}
 	}
 
 found:
 	/*
 	 * Adjust ip_len to not reflect header,
 	 * convert offset of this to bytes.
 	 */
 	ip->ip_len = htons(ntohs(ip->ip_len) - hlen);
 	if (ip->ip_off & htons(IP_MF)) {
 		/*
 		 * Make sure that fragments have a data length
 		 * that's a non-zero multiple of 8 bytes.
 		 */
 		if (ip->ip_len == htons(0) || (ntohs(ip->ip_len) & 0x7) != 0) {
 			IPSTAT_INC(ips_toosmall); /* XXX */
 			goto dropfrag;
 		}
 		m->m_flags |= M_IP_FRAG;
 	} else
 		m->m_flags &= ~M_IP_FRAG;
 	ip->ip_off = htons(ntohs(ip->ip_off) << 3);
 
 	/*
 	 * Attempt reassembly; if it succeeds, proceed.
 	 * ip_reass() will return a different mbuf.
 	 */
 	IPSTAT_INC(ips_fragments);
 	m->m_pkthdr.PH_loc.ptr = ip;
 
 	/* Previous ip_reass() started here. */
 	/*
 	 * Presence of header sizes in mbufs
 	 * would confuse code below.
 	 */
 	m->m_data += hlen;
 	m->m_len -= hlen;
 
 	/*
 	 * If first fragment to arrive, create a reassembly queue.
 	 */
 	if (fp == NULL) {
 		fp = uma_zalloc(V_ipq_zone, M_NOWAIT);
 		if (fp == NULL)
 			goto dropfrag;
 #ifdef MAC
 		if (mac_ipq_init(fp, M_NOWAIT) != 0) {
 			uma_zfree(V_ipq_zone, fp);
 			fp = NULL;
 			goto dropfrag;
 		}
 		mac_ipq_create(m, fp);
 #endif
 		TAILQ_INSERT_HEAD(head, fp, ipq_list);
 		V_nipq++;
 		fp->ipq_nfrags = 1;
 		fp->ipq_ttl = IPFRAGTTL;
 		fp->ipq_p = ip->ip_p;
 		fp->ipq_id = ip->ip_id;
 		fp->ipq_src = ip->ip_src;
 		fp->ipq_dst = ip->ip_dst;
 		fp->ipq_frags = m;
 		m->m_nextpkt = NULL;
 		goto done;
 	} else {
 		fp->ipq_nfrags++;
 #ifdef MAC
 		mac_ipq_update(m, fp);
 #endif
 	}
 
 #define GETIP(m)	((struct ip*)((m)->m_pkthdr.PH_loc.ptr))
 
 	/*
 	 * Handle ECN by comparing this segment with the first one;
 	 * if CE is set, do not lose CE.
 	 * drop if CE and not-ECT are mixed for the same packet.
 	 */
 	ecn = ip->ip_tos & IPTOS_ECN_MASK;
 	ecn0 = GETIP(fp->ipq_frags)->ip_tos & IPTOS_ECN_MASK;
 	if (ecn == IPTOS_ECN_CE) {
 		if (ecn0 == IPTOS_ECN_NOTECT)
 			goto dropfrag;
 		if (ecn0 != IPTOS_ECN_CE)
 			GETIP(fp->ipq_frags)->ip_tos |= IPTOS_ECN_CE;
 	}
 	if (ecn == IPTOS_ECN_NOTECT && ecn0 != IPTOS_ECN_NOTECT)
 		goto dropfrag;
 
 	/*
 	 * Find a segment which begins after this one does.
 	 */
 	for (p = NULL, q = fp->ipq_frags; q; p = q, q = q->m_nextpkt)
 		if (ntohs(GETIP(q)->ip_off) > ntohs(ip->ip_off))
 			break;
 
 	/*
 	 * If there is a preceding segment, it may provide some of
 	 * our data already.  If so, drop the data from the incoming
 	 * segment.  If it provides all of our data, drop us, otherwise
 	 * stick new segment in the proper place.
 	 *
 	 * If some of the data is dropped from the preceding
 	 * segment, then it's checksum is invalidated.
 	 */
 	if (p) {
 		i = ntohs(GETIP(p)->ip_off) + ntohs(GETIP(p)->ip_len) -
 		    ntohs(ip->ip_off);
 		if (i > 0) {
 			if (i >= ntohs(ip->ip_len))
 				goto dropfrag;
 			m_adj(m, i);
 			m->m_pkthdr.csum_flags = 0;
 			ip->ip_off = htons(ntohs(ip->ip_off) + i);
 			ip->ip_len = htons(ntohs(ip->ip_len) - i);
 		}
 		m->m_nextpkt = p->m_nextpkt;
 		p->m_nextpkt = m;
 	} else {
 		m->m_nextpkt = fp->ipq_frags;
 		fp->ipq_frags = m;
 	}
 
 	/*
 	 * While we overlap succeeding segments trim them or,
 	 * if they are completely covered, dequeue them.
 	 */
 	for (; q != NULL && ntohs(ip->ip_off) + ntohs(ip->ip_len) >
 	    ntohs(GETIP(q)->ip_off); q = nq) {
 		i = (ntohs(ip->ip_off) + ntohs(ip->ip_len)) -
 		    ntohs(GETIP(q)->ip_off);
 		if (i < ntohs(GETIP(q)->ip_len)) {
 			GETIP(q)->ip_len = htons(ntohs(GETIP(q)->ip_len) - i);
 			GETIP(q)->ip_off = htons(ntohs(GETIP(q)->ip_off) + i);
 			m_adj(q, i);
 			q->m_pkthdr.csum_flags = 0;
 			break;
 		}
 		nq = q->m_nextpkt;
 		m->m_nextpkt = nq;
 		IPSTAT_INC(ips_fragdropped);
 		fp->ipq_nfrags--;
 		m_freem(q);
 	}
 
 	/*
 	 * Check for complete reassembly and perform frag per packet
 	 * limiting.
 	 *
 	 * Frag limiting is performed here so that the nth frag has
 	 * a chance to complete the packet before we drop the packet.
 	 * As a result, n+1 frags are actually allowed per packet, but
 	 * only n will ever be stored. (n = maxfragsperpacket.)
 	 *
 	 */
 	next = 0;
 	for (p = NULL, q = fp->ipq_frags; q; p = q, q = q->m_nextpkt) {
 		if (ntohs(GETIP(q)->ip_off) != next) {
 			if (fp->ipq_nfrags > V_maxfragsperpacket) {
 				IPSTAT_ADD(ips_fragdropped, fp->ipq_nfrags);
 				ip_freef(head, fp);
 			}
 			goto done;
 		}
 		next += ntohs(GETIP(q)->ip_len);
 	}
 	/* Make sure the last packet didn't have the IP_MF flag */
 	if (p->m_flags & M_IP_FRAG) {
 		if (fp->ipq_nfrags > V_maxfragsperpacket) {
 			IPSTAT_ADD(ips_fragdropped, fp->ipq_nfrags);
 			ip_freef(head, fp);
 		}
 		goto done;
 	}
 
 	/*
 	 * Reassembly is complete.  Make sure the packet is a sane size.
 	 */
 	q = fp->ipq_frags;
 	ip = GETIP(q);
 	if (next + (ip->ip_hl << 2) > IP_MAXPACKET) {
 		IPSTAT_INC(ips_toolong);
 		IPSTAT_ADD(ips_fragdropped, fp->ipq_nfrags);
 		ip_freef(head, fp);
 		goto done;
 	}
 
 	/*
 	 * Concatenate fragments.
 	 */
 	m = q;
 	t = m->m_next;
 	m->m_next = NULL;
 	m_cat(m, t);
 	nq = q->m_nextpkt;
 	q->m_nextpkt = NULL;
 	for (q = nq; q != NULL; q = nq) {
 		nq = q->m_nextpkt;
 		q->m_nextpkt = NULL;
 		m->m_pkthdr.csum_flags &= q->m_pkthdr.csum_flags;
 		m->m_pkthdr.csum_data += q->m_pkthdr.csum_data;
 		m_cat(m, q);
 	}
 	/*
 	 * In order to do checksumming faster we do 'end-around carry' here
 	 * (and not in for{} loop), though it implies we are not going to
 	 * reassemble more than 64k fragments.
 	 */
 	while (m->m_pkthdr.csum_data & 0xffff0000)
 		m->m_pkthdr.csum_data = (m->m_pkthdr.csum_data & 0xffff) +
 		    (m->m_pkthdr.csum_data >> 16);
 #ifdef MAC
 	mac_ipq_reassemble(fp, m);
 	mac_ipq_destroy(fp);
 #endif
 
 	/*
 	 * Create header for new ip packet by modifying header of first
 	 * packet;  dequeue and discard fragment reassembly header.
 	 * Make header visible.
 	 */
 	ip->ip_len = htons((ip->ip_hl << 2) + next);
 	ip->ip_src = fp->ipq_src;
 	ip->ip_dst = fp->ipq_dst;
 	TAILQ_REMOVE(head, fp, ipq_list);
 	V_nipq--;
 	uma_zfree(V_ipq_zone, fp);
 	m->m_len += (ip->ip_hl << 2);
 	m->m_data -= (ip->ip_hl << 2);
 	/* some debugging cruft by sklower, below, will go away soon */
 	if (m->m_flags & M_PKTHDR)	/* XXX this should be done elsewhere */
 		m_fixhdr(m);
 	IPSTAT_INC(ips_reassembled);
 	IPQ_UNLOCK();
 
 #ifdef	RSS
 	/*
 	 * Query the RSS layer for the flowid / flowtype for the
 	 * mbuf payload.
 	 *
 	 * For now, just assume we have to calculate a new one.
 	 * Later on we should check to see if the assigned flowid matches
 	 * what RSS wants for the given IP protocol and if so, just keep it.
 	 *
 	 * We then queue into the relevant netisr so it can be dispatched
 	 * to the correct CPU.
 	 *
 	 * Note - this may return 1, which means the flowid in the mbuf
 	 * is correct for the configured RSS hash types and can be used.
 	 */
 	if (rss_mbuf_software_hash_v4(m, 0, &rss_hash, &rss_type) == 0) {
 		m->m_pkthdr.flowid = rss_hash;
 		M_HASHTYPE_SET(m, rss_type);
 	}
 
 	/*
 	 * Queue/dispatch for reprocessing.
 	 *
 	 * Note: this is much slower than just handling the frame in the
 	 * current receive context.  It's likely worth investigating
 	 * why this is.
 	 */
 	netisr_dispatch(NETISR_IP_DIRECT, m);
 	return (NULL);
 #endif
 
 	/* Handle in-line */
 	return (m);
 
 dropfrag:
 	IPSTAT_INC(ips_fragdropped);
 	if (fp != NULL)
 		fp->ipq_nfrags--;
 	m_freem(m);
 done:
 	IPQ_UNLOCK();
 	return (NULL);
 
 #undef GETIP
 }
 
 /*
  * Free a fragment reassembly header and all
  * associated datagrams.
  */
 static void
 ip_freef(struct ipqhead *fhp, struct ipq *fp)
 {
 	struct mbuf *q;
 
 	IPQ_LOCK_ASSERT();
 
 	while (fp->ipq_frags) {
 		q = fp->ipq_frags;
 		fp->ipq_frags = q->m_nextpkt;
 		m_freem(q);
 	}
 	TAILQ_REMOVE(fhp, fp, ipq_list);
 	uma_zfree(V_ipq_zone, fp);
 	V_nipq--;
 }
 
 /*
  * IP timer processing;
  * if a timer expires on a reassembly
  * queue, discard it.
  */
 void
 ip_slowtimo(void)
 {
 	VNET_ITERATOR_DECL(vnet_iter);
 	struct ipq *fp;
 	int i;
 
 	VNET_LIST_RLOCK_NOSLEEP();
 	IPQ_LOCK();
 	VNET_FOREACH(vnet_iter) {
 		CURVNET_SET(vnet_iter);
 		for (i = 0; i < IPREASS_NHASH; i++) {
 			for(fp = TAILQ_FIRST(&V_ipq[i]); fp;) {
 				struct ipq *fpp;
 
 				fpp = fp;
 				fp = TAILQ_NEXT(fp, ipq_list);
 				if(--fpp->ipq_ttl == 0) {
 					IPSTAT_ADD(ips_fragtimeout,
 					    fpp->ipq_nfrags);
 					ip_freef(&V_ipq[i], fpp);
 				}
 			}
 		}
 		/*
 		 * If we are over the maximum number of fragments
 		 * (due to the limit being lowered), drain off
 		 * enough to get down to the new limit.
 		 */
 		if (V_maxnipq >= 0 && V_nipq > V_maxnipq) {
 			for (i = 0; i < IPREASS_NHASH; i++) {
 				while (V_nipq > V_maxnipq &&
 				    !TAILQ_EMPTY(&V_ipq[i])) {
 					IPSTAT_ADD(ips_fragdropped,
 					    TAILQ_FIRST(&V_ipq[i])->ipq_nfrags);
 					ip_freef(&V_ipq[i],
 					    TAILQ_FIRST(&V_ipq[i]));
 				}
 			}
 		}
 		CURVNET_RESTORE();
 	}
 	IPQ_UNLOCK();
 	VNET_LIST_RUNLOCK_NOSLEEP();
 }
 
 /*
  * Drain off all datagram fragments.
  */
 static void
 ip_drain_locked(void)
 {
 	int     i;
 
 	IPQ_LOCK_ASSERT();
 
 	for (i = 0; i < IPREASS_NHASH; i++) {
 		while(!TAILQ_EMPTY(&V_ipq[i])) {
 			IPSTAT_ADD(ips_fragdropped,
 			    TAILQ_FIRST(&V_ipq[i])->ipq_nfrags);
 			ip_freef(&V_ipq[i], TAILQ_FIRST(&V_ipq[i]));
 		}
 	}
 }
 
 void
 ip_drain(void)
 {
 	VNET_ITERATOR_DECL(vnet_iter);
 
 	VNET_LIST_RLOCK_NOSLEEP();
 	IPQ_LOCK();
 	VNET_FOREACH(vnet_iter) {
 		CURVNET_SET(vnet_iter);
 		ip_drain_locked();
 		CURVNET_RESTORE();
 	}
 	IPQ_UNLOCK();
 	VNET_LIST_RUNLOCK_NOSLEEP();
 }
 
 /*
  * The protocol to be inserted into ip_protox[] must be already registered
  * in inetsw[], either statically or through pf_proto_register().
  */
 int
 ipproto_register(short ipproto)
 {
 	struct protosw *pr;
 
 	/* Sanity checks. */
 	if (ipproto <= 0 || ipproto >= IPPROTO_MAX)
 		return (EPROTONOSUPPORT);
 
 	/*
 	 * The protocol slot must not be occupied by another protocol
 	 * already.  An index pointing to IPPROTO_RAW is unused.
 	 */
 	pr = pffindproto(PF_INET, IPPROTO_RAW, SOCK_RAW);
 	if (pr == NULL)
 		return (EPFNOSUPPORT);
 	if (ip_protox[ipproto] != pr - inetsw)	/* IPPROTO_RAW */
 		return (EEXIST);
 
 	/* Find the protocol position in inetsw[] and set the index. */
 	for (pr = inetdomain.dom_protosw;
 	     pr < inetdomain.dom_protoswNPROTOSW; pr++) {
 		if (pr->pr_domain->dom_family == PF_INET &&
 		    pr->pr_protocol && pr->pr_protocol == ipproto) {
 			ip_protox[pr->pr_protocol] = pr - inetsw;
 			return (0);
 		}
 	}
 	return (EPROTONOSUPPORT);
 }
 
 int
 ipproto_unregister(short ipproto)
 {
 	struct protosw *pr;
 
 	/* Sanity checks. */
 	if (ipproto <= 0 || ipproto >= IPPROTO_MAX)
 		return (EPROTONOSUPPORT);
 
 	/* Check if the protocol was indeed registered. */
 	pr = pffindproto(PF_INET, IPPROTO_RAW, SOCK_RAW);
 	if (pr == NULL)
 		return (EPFNOSUPPORT);
 	if (ip_protox[ipproto] == pr - inetsw)  /* IPPROTO_RAW */
 		return (ENOENT);
 
 	/* Reset the protocol slot to IPPROTO_RAW. */
 	ip_protox[ipproto] = pr - inetsw;
 	return (0);
 }
 
 /*
  * Given address of next destination (final or next hop), return (referenced)
  * internet address info of interface to be used to get there.
  */
 struct in_ifaddr *
 ip_rtaddr(struct in_addr dst, u_int fibnum)
 {
 	struct route sro;
 	struct sockaddr_in *sin;
 	struct in_ifaddr *ia;
 
 	bzero(&sro, sizeof(sro));
 	sin = (struct sockaddr_in *)&sro.ro_dst;
 	sin->sin_family = AF_INET;
 	sin->sin_len = sizeof(*sin);
 	sin->sin_addr = dst;
 	in_rtalloc_ign(&sro, 0, fibnum);
 
 	if (sro.ro_rt == NULL)
 		return (NULL);
 
 	ia = ifatoia(sro.ro_rt->rt_ifa);
 	ifa_ref(&ia->ia_ifa);
 	RTFREE(sro.ro_rt);
 	return (ia);
 }
 
 u_char inetctlerrmap[PRC_NCMDS] = {
 	0,		0,		0,		0,
 	0,		EMSGSIZE,	EHOSTDOWN,	EHOSTUNREACH,
 	EHOSTUNREACH,	EHOSTUNREACH,	ECONNREFUSED,	ECONNREFUSED,
 	EMSGSIZE,	EHOSTUNREACH,	0,		0,
 	0,		0,		EHOSTUNREACH,	0,
 	ENOPROTOOPT,	ECONNREFUSED
 };
 
 /*
  * Forward a packet.  If some error occurs return the sender
  * an icmp packet.  Note we can't always generate a meaningful
  * icmp message because icmp doesn't have a large enough repertoire
  * of codes and types.
  *
  * If not forwarding, just drop the packet.  This could be confusing
  * if ipforwarding was zero but some routing protocol was advancing
  * us as a gateway to somewhere.  However, we must let the routing
  * protocol deal with that.
  *
  * The srcrt parameter indicates whether the packet is being forwarded
  * via a source route.
  */
 void
 ip_forward(struct mbuf *m, int srcrt)
 {
 	struct ip *ip = mtod(m, struct ip *);
 	struct in_ifaddr *ia;
 	struct mbuf *mcopy;
 	struct in_addr dest;
 	struct route ro;
 	int error, type = 0, code = 0, mtu = 0;
 
 	if (m->m_flags & (M_BCAST|M_MCAST) || in_canforward(ip->ip_dst) == 0) {
 		IPSTAT_INC(ips_cantforward);
 		m_freem(m);
 		return;
 	}
 #ifdef IPSEC
 	if (ip_ipsec_fwd(m) != 0) {
 		IPSTAT_INC(ips_cantforward);
 		m_freem(m);
 		return;
 	}
 #endif /* IPSEC */
 #ifdef IPSTEALTH
 	if (!V_ipstealth) {
 #endif
 		if (ip->ip_ttl <= IPTTLDEC) {
 			icmp_error(m, ICMP_TIMXCEED, ICMP_TIMXCEED_INTRANS,
 			    0, 0);
 			return;
 		}
 #ifdef IPSTEALTH
 	}
 #endif
 
 	ia = ip_rtaddr(ip->ip_dst, M_GETFIB(m));
 #ifndef IPSEC
 	/*
 	 * 'ia' may be NULL if there is no route for this destination.
 	 * In case of IPsec, Don't discard it just yet, but pass it to
 	 * ip_output in case of outgoing IPsec policy.
 	 */
 	if (!srcrt && ia == NULL) {
 		icmp_error(m, ICMP_UNREACH, ICMP_UNREACH_HOST, 0, 0);
 		return;
 	}
 #endif
 
 	/*
 	 * Save the IP header and at most 8 bytes of the payload,
 	 * in case we need to generate an ICMP message to the src.
 	 *
 	 * XXX this can be optimized a lot by saving the data in a local
 	 * buffer on the stack (72 bytes at most), and only allocating the
 	 * mbuf if really necessary. The vast majority of the packets
 	 * are forwarded without having to send an ICMP back (either
 	 * because unnecessary, or because rate limited), so we are
 	 * really we are wasting a lot of work here.
 	 *
 	 * We don't use m_copy() because it might return a reference
 	 * to a shared cluster. Both this function and ip_output()
 	 * assume exclusive access to the IP header in `m', so any
 	 * data in a cluster may change before we reach icmp_error().
 	 */
 	mcopy = m_gethdr(M_NOWAIT, m->m_type);
 	if (mcopy != NULL && !m_dup_pkthdr(mcopy, m, M_NOWAIT)) {
 		/*
 		 * It's probably ok if the pkthdr dup fails (because
 		 * the deep copy of the tag chain failed), but for now
 		 * be conservative and just discard the copy since
 		 * code below may some day want the tags.
 		 */
 		m_free(mcopy);
 		mcopy = NULL;
 	}
 	if (mcopy != NULL) {
 		mcopy->m_len = min(ntohs(ip->ip_len), M_TRAILINGSPACE(mcopy));
 		mcopy->m_pkthdr.len = mcopy->m_len;
 		m_copydata(m, 0, mcopy->m_len, mtod(mcopy, caddr_t));
 	}
 
 #ifdef IPSTEALTH
 	if (!V_ipstealth) {
 #endif
 		ip->ip_ttl -= IPTTLDEC;
 #ifdef IPSTEALTH
 	}
 #endif
 
 	/*
 	 * If forwarding packet using same interface that it came in on,
 	 * perhaps should send a redirect to sender to shortcut a hop.
 	 * Only send redirect if source is sending directly to us,
 	 * and if packet was not source routed (or has any options).
 	 * Also, don't send redirect if forwarding using a default route
 	 * or a route modified by a redirect.
 	 */
 	dest.s_addr = 0;
 	if (!srcrt && V_ipsendredirects &&
 	    ia != NULL && ia->ia_ifp == m->m_pkthdr.rcvif) {
 		struct sockaddr_in *sin;
 		struct rtentry *rt;
 
 		bzero(&ro, sizeof(ro));
 		sin = (struct sockaddr_in *)&ro.ro_dst;
 		sin->sin_family = AF_INET;
 		sin->sin_len = sizeof(*sin);
 		sin->sin_addr = ip->ip_dst;
 		in_rtalloc_ign(&ro, 0, M_GETFIB(m));
 
 		rt = ro.ro_rt;
 
 		if (rt && (rt->rt_flags & (RTF_DYNAMIC|RTF_MODIFIED)) == 0 &&
 		    satosin(rt_key(rt))->sin_addr.s_addr != 0) {
 #define	RTA(rt)	((struct in_ifaddr *)(rt->rt_ifa))
 			u_long src = ntohl(ip->ip_src.s_addr);
 
 			if (RTA(rt) &&
 			    (src & RTA(rt)->ia_subnetmask) == RTA(rt)->ia_subnet) {
 				if (rt->rt_flags & RTF_GATEWAY)
 					dest.s_addr = satosin(rt->rt_gateway)->sin_addr.s_addr;
 				else
 					dest.s_addr = ip->ip_dst.s_addr;
 				/* Router requirements says to only send host redirects */
 				type = ICMP_REDIRECT;
 				code = ICMP_REDIRECT_HOST;
 			}
 		}
 		if (rt)
 			RTFREE(rt);
 	}
 
 	/*
 	 * Try to cache the route MTU from ip_output so we can consider it for
 	 * the ICMP_UNREACH_NEEDFRAG "Next-Hop MTU" field described in RFC1191.
 	 */
 	bzero(&ro, sizeof(ro));
 
 	error = ip_output(m, NULL, &ro, IP_FORWARDING, NULL, NULL);
 
 	if (error == EMSGSIZE && ro.ro_rt)
 		mtu = ro.ro_rt->rt_mtu;
 	RO_RTFREE(&ro);
 
 	if (error)
 		IPSTAT_INC(ips_cantforward);
 	else {
 		IPSTAT_INC(ips_forward);
 		if (type)
 			IPSTAT_INC(ips_redirectsent);
 		else {
 			if (mcopy)
 				m_freem(mcopy);
 			if (ia != NULL)
 				ifa_free(&ia->ia_ifa);
 			return;
 		}
 	}
 	if (mcopy == NULL) {
 		if (ia != NULL)
 			ifa_free(&ia->ia_ifa);
 		return;
 	}
 
 	switch (error) {
 
 	case 0:				/* forwarded, but need redirect */
 		/* type, code set above */
 		break;
 
 	case ENETUNREACH:
 	case EHOSTUNREACH:
 	case ENETDOWN:
 	case EHOSTDOWN:
 	default:
 		type = ICMP_UNREACH;
 		code = ICMP_UNREACH_HOST;
 		break;
 
 	case EMSGSIZE:
 		type = ICMP_UNREACH;
 		code = ICMP_UNREACH_NEEDFRAG;
 
 #ifdef IPSEC
 		/* 
 		 * If IPsec is configured for this path,
 		 * override any possibly mtu value set by ip_output.
 		 */ 
 		mtu = ip_ipsec_mtu(mcopy, mtu);
 #endif /* IPSEC */
 		/*
 		 * If the MTU was set before make sure we are below the
 		 * interface MTU.
 		 * If the MTU wasn't set before use the interface mtu or
 		 * fall back to the next smaller mtu step compared to the
 		 * current packet size.
 		 */
 		if (mtu != 0) {
 			if (ia != NULL)
 				mtu = min(mtu, ia->ia_ifp->if_mtu);
 		} else {
 			if (ia != NULL)
 				mtu = ia->ia_ifp->if_mtu;
 			else
 				mtu = ip_next_mtu(ntohs(ip->ip_len), 0);
 		}
 		IPSTAT_INC(ips_cantfrag);
 		break;
 
 	case ENOBUFS:
 	case EACCES:			/* ipfw denied packet */
 		m_freem(mcopy);
 		if (ia != NULL)
 			ifa_free(&ia->ia_ifa);
 		return;
 	}
 	if (ia != NULL)
 		ifa_free(&ia->ia_ifa);
 	icmp_error(mcopy, type, code, dest.s_addr, mtu);
 }
 
 void
 ip_savecontrol(struct inpcb *inp, struct mbuf **mp, struct ip *ip,
     struct mbuf *m)
 {
 
 	if (inp->inp_socket->so_options & (SO_BINTIME | SO_TIMESTAMP)) {
 		struct bintime bt;
 
 		bintime(&bt);
 		if (inp->inp_socket->so_options & SO_BINTIME) {
 			*mp = sbcreatecontrol((caddr_t)&bt, sizeof(bt),
 			    SCM_BINTIME, SOL_SOCKET);
 			if (*mp)
 				mp = &(*mp)->m_next;
 		}
 		if (inp->inp_socket->so_options & SO_TIMESTAMP) {
 			struct timeval tv;
 
 			bintime2timeval(&bt, &tv);
 			*mp = sbcreatecontrol((caddr_t)&tv, sizeof(tv),
 			    SCM_TIMESTAMP, SOL_SOCKET);
 			if (*mp)
 				mp = &(*mp)->m_next;
 		}
 	}
 	if (inp->inp_flags & INP_RECVDSTADDR) {
 		*mp = sbcreatecontrol((caddr_t)&ip->ip_dst,
 		    sizeof(struct in_addr), IP_RECVDSTADDR, IPPROTO_IP);
 		if (*mp)
 			mp = &(*mp)->m_next;
 	}
 	if (inp->inp_flags & INP_RECVTTL) {
 		*mp = sbcreatecontrol((caddr_t)&ip->ip_ttl,
 		    sizeof(u_char), IP_RECVTTL, IPPROTO_IP);
 		if (*mp)
 			mp = &(*mp)->m_next;
 	}
 #ifdef notyet
 	/* XXX
 	 * Moving these out of udp_input() made them even more broken
 	 * than they already were.
 	 */
 	/* options were tossed already */
 	if (inp->inp_flags & INP_RECVOPTS) {
 		*mp = sbcreatecontrol((caddr_t)opts_deleted_above,
 		    sizeof(struct in_addr), IP_RECVOPTS, IPPROTO_IP);
 		if (*mp)
 			mp = &(*mp)->m_next;
 	}
 	/* ip_srcroute doesn't do what we want here, need to fix */
 	if (inp->inp_flags & INP_RECVRETOPTS) {
 		*mp = sbcreatecontrol((caddr_t)ip_srcroute(m),
 		    sizeof(struct in_addr), IP_RECVRETOPTS, IPPROTO_IP);
 		if (*mp)
 			mp = &(*mp)->m_next;
 	}
 #endif
 	if (inp->inp_flags & INP_RECVIF) {
 		struct ifnet *ifp;
 		struct sdlbuf {
 			struct sockaddr_dl sdl;
 			u_char	pad[32];
 		} sdlbuf;
 		struct sockaddr_dl *sdp;
 		struct sockaddr_dl *sdl2 = &sdlbuf.sdl;
 
 		if ((ifp = m->m_pkthdr.rcvif) &&
 		    ifp->if_index && ifp->if_index <= V_if_index) {
 			sdp = (struct sockaddr_dl *)ifp->if_addr->ifa_addr;
 			/*
 			 * Change our mind and don't try copy.
 			 */
 			if (sdp->sdl_family != AF_LINK ||
 			    sdp->sdl_len > sizeof(sdlbuf)) {
 				goto makedummy;
 			}
 			bcopy(sdp, sdl2, sdp->sdl_len);
 		} else {
 makedummy:	
 			sdl2->sdl_len =
 			    offsetof(struct sockaddr_dl, sdl_data[0]);
 			sdl2->sdl_family = AF_LINK;
 			sdl2->sdl_index = 0;
 			sdl2->sdl_nlen = sdl2->sdl_alen = sdl2->sdl_slen = 0;
 		}
 		*mp = sbcreatecontrol((caddr_t)sdl2, sdl2->sdl_len,
 		    IP_RECVIF, IPPROTO_IP);
 		if (*mp)
 			mp = &(*mp)->m_next;
 	}
 	if (inp->inp_flags & INP_RECVTOS) {
 		*mp = sbcreatecontrol((caddr_t)&ip->ip_tos,
 		    sizeof(u_char), IP_RECVTOS, IPPROTO_IP);
 		if (*mp)
 			mp = &(*mp)->m_next;
 	}
 
 	if (inp->inp_flags2 & INP_RECVFLOWID) {
 		uint32_t flowid, flow_type;
 
 		flowid = m->m_pkthdr.flowid;
 		flow_type = M_HASHTYPE_GET(m);
 
 		/*
 		 * XXX should handle the failure of one or the
 		 * other - don't populate both?
 		 */
 		*mp = sbcreatecontrol((caddr_t) &flowid,
 		    sizeof(uint32_t), IP_FLOWID, IPPROTO_IP);
 		if (*mp)
 			mp = &(*mp)->m_next;
 		*mp = sbcreatecontrol((caddr_t) &flow_type,
 		    sizeof(uint32_t), IP_FLOWTYPE, IPPROTO_IP);
 		if (*mp)
 			mp = &(*mp)->m_next;
 	}
 
 #ifdef	RSS
 	if (inp->inp_flags2 & INP_RECVRSSBUCKETID) {
 		uint32_t flowid, flow_type;
 		uint32_t rss_bucketid;
 
 		flowid = m->m_pkthdr.flowid;
 		flow_type = M_HASHTYPE_GET(m);
 
 		if (rss_hash2bucket(flowid, flow_type, &rss_bucketid) == 0) {
 			*mp = sbcreatecontrol((caddr_t) &rss_bucketid,
 			   sizeof(uint32_t), IP_RSSBUCKETID, IPPROTO_IP);
 			if (*mp)
 				mp = &(*mp)->m_next;
 		}
 	}
 #endif
 }
 
 /*
  * XXXRW: Multicast routing code in ip_mroute.c is generally MPSAFE, but the
  * ip_rsvp and ip_rsvp_on variables need to be interlocked with rsvp_on
  * locking.  This code remains in ip_input.c as ip_mroute.c is optionally
  * compiled.
  */
 static VNET_DEFINE(int, ip_rsvp_on);
 VNET_DEFINE(struct socket *, ip_rsvpd);
 
 #define	V_ip_rsvp_on		VNET(ip_rsvp_on)
 
 int
 ip_rsvp_init(struct socket *so)
 {
 
 	if (so->so_type != SOCK_RAW ||
 	    so->so_proto->pr_protocol != IPPROTO_RSVP)
 		return EOPNOTSUPP;
 
 	if (V_ip_rsvpd != NULL)
 		return EADDRINUSE;
 
 	V_ip_rsvpd = so;
 	/*
 	 * This may seem silly, but we need to be sure we don't over-increment
 	 * the RSVP counter, in case something slips up.
 	 */
 	if (!V_ip_rsvp_on) {
 		V_ip_rsvp_on = 1;
 		V_rsvp_on++;
 	}
 
 	return 0;
 }
 
 int
 ip_rsvp_done(void)
 {
 
 	V_ip_rsvpd = NULL;
 	/*
 	 * This may seem silly, but we need to be sure we don't over-decrement
 	 * the RSVP counter, in case something slips up.
 	 */
 	if (V_ip_rsvp_on) {
 		V_ip_rsvp_on = 0;
 		V_rsvp_on--;
 	}
 	return 0;
 }
 
 int
 rsvp_input(struct mbuf **mp, int *offp, int proto)
 {
 	struct mbuf *m;
 
 	m = *mp;
 	*mp = NULL;
 
 	if (rsvp_input_p) { /* call the real one if loaded */
 		*mp = m;
 		rsvp_input_p(mp, offp, proto);
 		return (IPPROTO_DONE);
 	}
 
 	/* Can still get packets with rsvp_on = 0 if there is a local member
 	 * of the group to which the RSVP packet is addressed.  But in this
 	 * case we want to throw the packet away.
 	 */
 	
 	if (!V_rsvp_on) {
 		m_freem(m);
 		return (IPPROTO_DONE);
 	}
 
 	if (V_ip_rsvpd != NULL) { 
 		*mp = m;
 		rip_input(mp, offp, proto);
 		return (IPPROTO_DONE);
 	}
 	/* Drop the packet */
 	m_freem(m);
 	return (IPPROTO_DONE);
 }
Index: head/sys/netinet/ip_mroute.c
===================================================================
--- head/sys/netinet/ip_mroute.c	(revision 280970)
+++ head/sys/netinet/ip_mroute.c	(revision 280971)
@@ -1,2948 +1,2948 @@
 /*-
  * Copyright (c) 1989 Stephen Deering
  * Copyright (c) 1992, 1993
  *      The Regents of the University of California.  All rights reserved.
  *
  * This code is derived from software contributed to Berkeley by
  * Stephen Deering of Stanford University.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 4. Neither the name of the University nor the names of its contributors
  *    may be used to endorse or promote products derived from this software
  *    without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  *      @(#)ip_mroute.c 8.2 (Berkeley) 11/15/93
  */
 
 /*
  * IP multicast forwarding procedures
  *
  * Written by David Waitzman, BBN Labs, August 1988.
  * Modified by Steve Deering, Stanford, February 1989.
  * Modified by Mark J. Steiglitz, Stanford, May, 1991
  * Modified by Van Jacobson, LBL, January 1993
  * Modified by Ajit Thyagarajan, PARC, August 1993
  * Modified by Bill Fenner, PARC, April 1995
  * Modified by Ahmed Helmy, SGI, June 1996
  * Modified by George Edmond Eddy (Rusty), ISI, February 1998
  * Modified by Pavlin Radoslavov, USC/ISI, May 1998, August 1999, October 2000
  * Modified by Hitoshi Asaeda, WIDE, August 2000
  * Modified by Pavlin Radoslavov, ICSI, October 2002
  *
  * MROUTING Revision: 3.5
  * and PIM-SMv2 and PIM-DM support, advanced API support,
  * bandwidth metering and signaling
  */
 
 /*
  * TODO: Prefix functions with ipmf_.
  * TODO: Maintain a refcount on if_allmulti() in ifnet or in the protocol
  * domain attachment (if_afdata) so we can track consumers of that service.
  * TODO: Deprecate routing socket path for SIOCGETSGCNT and SIOCGETVIFCNT,
  * move it to socket options.
  * TODO: Cleanup LSRR removal further.
  * TODO: Push RSVP stubs into raw_ip.c.
  * TODO: Use bitstring.h for vif set.
  * TODO: Fix mrt6_ioctl dangling ref when dynamically loaded.
  * TODO: Sync ip6_mroute.c with this file.
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include "opt_inet.h"
 #include "opt_mrouting.h"
 
 #define _PIM_VT 1
 
 #include <sys/param.h>
 #include <sys/kernel.h>
 #include <sys/stddef.h>
 #include <sys/lock.h>
 #include <sys/ktr.h>
 #include <sys/malloc.h>
 #include <sys/mbuf.h>
 #include <sys/module.h>
 #include <sys/priv.h>
 #include <sys/protosw.h>
 #include <sys/signalvar.h>
 #include <sys/socket.h>
 #include <sys/socketvar.h>
 #include <sys/sockio.h>
 #include <sys/sx.h>
 #include <sys/sysctl.h>
 #include <sys/syslog.h>
 #include <sys/systm.h>
 #include <sys/time.h>
 #include <sys/counter.h>
 
 #include <net/if.h>
 #include <net/if_var.h>
 #include <net/netisr.h>
 #include <net/route.h>
 #include <net/vnet.h>
 
 #include <netinet/in.h>
 #include <netinet/igmp.h>
 #include <netinet/in_systm.h>
 #include <netinet/in_var.h>
 #include <netinet/ip.h>
 #include <netinet/ip_encap.h>
 #include <netinet/ip_mroute.h>
 #include <netinet/ip_var.h>
 #include <netinet/ip_options.h>
 #include <netinet/pim.h>
 #include <netinet/pim_var.h>
 #include <netinet/udp.h>
 
 #include <machine/in_cksum.h>
 
 #ifndef KTR_IPMF
 #define KTR_IPMF KTR_INET
 #endif
 
 #define		VIFI_INVALID	((vifi_t) -1)
 
 static VNET_DEFINE(uint32_t, last_tv_sec); /* last time we processed this */
 #define	V_last_tv_sec	VNET(last_tv_sec)
 
 static MALLOC_DEFINE(M_MRTABLE, "mroutetbl", "multicast forwarding cache");
 
 /*
  * Locking.  We use two locks: one for the virtual interface table and
  * one for the forwarding table.  These locks may be nested in which case
  * the VIF lock must always be taken first.  Note that each lock is used
  * to cover not only the specific data structure but also related data
  * structures.
  */
 
 static struct mtx mrouter_mtx;
 #define	MROUTER_LOCK()		mtx_lock(&mrouter_mtx)
 #define	MROUTER_UNLOCK()	mtx_unlock(&mrouter_mtx)
 #define	MROUTER_LOCK_ASSERT()	mtx_assert(&mrouter_mtx, MA_OWNED)
 #define	MROUTER_LOCK_INIT()						\
 	mtx_init(&mrouter_mtx, "IPv4 multicast forwarding", NULL, MTX_DEF)
 #define	MROUTER_LOCK_DESTROY()	mtx_destroy(&mrouter_mtx)
 
 static int ip_mrouter_cnt;	/* # of vnets with active mrouters */
 static int ip_mrouter_unloading; /* Allow no more V_ip_mrouter sockets */
 
 static VNET_PCPUSTAT_DEFINE(struct mrtstat, mrtstat);
 VNET_PCPUSTAT_SYSINIT(mrtstat);
 VNET_PCPUSTAT_SYSUNINIT(mrtstat);
 SYSCTL_VNET_PCPUSTAT(_net_inet_ip, OID_AUTO, mrtstat, struct mrtstat,
     mrtstat, "IPv4 Multicast Forwarding Statistics (struct mrtstat, "
     "netinet/ip_mroute.h)");
 
 static VNET_DEFINE(u_long, mfchash);
 #define	V_mfchash		VNET(mfchash)
 #define	MFCHASH(a, g)							\
 	((((a).s_addr >> 20) ^ ((a).s_addr >> 10) ^ (a).s_addr ^ \
 	  ((g).s_addr >> 20) ^ ((g).s_addr >> 10) ^ (g).s_addr) & V_mfchash)
 #define	MFCHASHSIZE	256
 
 static u_long mfchashsize;			/* Hash size */
 static VNET_DEFINE(u_char *, nexpire);		/* 0..mfchashsize-1 */
 #define	V_nexpire		VNET(nexpire)
 static VNET_DEFINE(LIST_HEAD(mfchashhdr, mfc)*, mfchashtbl);
 #define	V_mfchashtbl		VNET(mfchashtbl)
 
 static struct mtx mfc_mtx;
 #define	MFC_LOCK()		mtx_lock(&mfc_mtx)
 #define	MFC_UNLOCK()		mtx_unlock(&mfc_mtx)
 #define	MFC_LOCK_ASSERT()	mtx_assert(&mfc_mtx, MA_OWNED)
 #define	MFC_LOCK_INIT()							\
 	mtx_init(&mfc_mtx, "IPv4 multicast forwarding cache", NULL, MTX_DEF)
 #define	MFC_LOCK_DESTROY()	mtx_destroy(&mfc_mtx)
 
 static VNET_DEFINE(vifi_t, numvifs);
 #define	V_numvifs		VNET(numvifs)
 static VNET_DEFINE(struct vif, viftable[MAXVIFS]);
 #define	V_viftable		VNET(viftable)
 SYSCTL_OPAQUE(_net_inet_ip, OID_AUTO, viftable, CTLFLAG_VNET | CTLFLAG_RD,
     &VNET_NAME(viftable), sizeof(V_viftable), "S,vif[MAXVIFS]",
     "IPv4 Multicast Interfaces (struct vif[MAXVIFS], netinet/ip_mroute.h)");
 
 static struct mtx vif_mtx;
 #define	VIF_LOCK()		mtx_lock(&vif_mtx)
 #define	VIF_UNLOCK()		mtx_unlock(&vif_mtx)
 #define	VIF_LOCK_ASSERT()	mtx_assert(&vif_mtx, MA_OWNED)
 #define	VIF_LOCK_INIT()							\
 	mtx_init(&vif_mtx, "IPv4 multicast interfaces", NULL, MTX_DEF)
 #define	VIF_LOCK_DESTROY()	mtx_destroy(&vif_mtx)
 
 static eventhandler_tag if_detach_event_tag = NULL;
 
 static VNET_DEFINE(struct callout, expire_upcalls_ch);
 #define	V_expire_upcalls_ch	VNET(expire_upcalls_ch)
 
 #define		EXPIRE_TIMEOUT	(hz / 4)	/* 4x / second		*/
 #define		UPCALL_EXPIRE	6		/* number of timeouts	*/
 
 /*
  * Bandwidth meter variables and constants
  */
 static MALLOC_DEFINE(M_BWMETER, "bwmeter", "multicast upcall bw meters");
 /*
  * Pending timeouts are stored in a hash table, the key being the
  * expiration time. Periodically, the entries are analysed and processed.
  */
 #define	BW_METER_BUCKETS	1024
 static VNET_DEFINE(struct bw_meter*, bw_meter_timers[BW_METER_BUCKETS]);
 #define	V_bw_meter_timers	VNET(bw_meter_timers)
 static VNET_DEFINE(struct callout, bw_meter_ch);
 #define	V_bw_meter_ch		VNET(bw_meter_ch)
 #define	BW_METER_PERIOD (hz)		/* periodical handling of bw meters */
 
 /*
  * Pending upcalls are stored in a vector which is flushed when
  * full, or periodically
  */
 static VNET_DEFINE(struct bw_upcall, bw_upcalls[BW_UPCALLS_MAX]);
 #define	V_bw_upcalls		VNET(bw_upcalls)
 static VNET_DEFINE(u_int, bw_upcalls_n); /* # of pending upcalls */
 #define	V_bw_upcalls_n    	VNET(bw_upcalls_n)
 static VNET_DEFINE(struct callout, bw_upcalls_ch);
 #define	V_bw_upcalls_ch		VNET(bw_upcalls_ch)
 
 #define BW_UPCALLS_PERIOD (hz)		/* periodical flush of bw upcalls */
 
 static VNET_PCPUSTAT_DEFINE(struct pimstat, pimstat);
 VNET_PCPUSTAT_SYSINIT(pimstat);
 VNET_PCPUSTAT_SYSUNINIT(pimstat);
 
 SYSCTL_NODE(_net_inet, IPPROTO_PIM, pim, CTLFLAG_RW, 0, "PIM");
 SYSCTL_VNET_PCPUSTAT(_net_inet_pim, PIMCTL_STATS, stats, struct pimstat,
     pimstat, "PIM Statistics (struct pimstat, netinet/pim_var.h)");
 
 static u_long	pim_squelch_wholepkt = 0;
 SYSCTL_ULONG(_net_inet_pim, OID_AUTO, squelch_wholepkt, CTLFLAG_RW,
     &pim_squelch_wholepkt, 0,
     "Disable IGMP_WHOLEPKT notifications if rendezvous point is unspecified");
 
 extern  struct domain inetdomain;
 static const struct protosw in_pim_protosw = {
 	.pr_type =		SOCK_RAW,
 	.pr_domain =		&inetdomain,
 	.pr_protocol =		IPPROTO_PIM,
 	.pr_flags =		PR_ATOMIC|PR_ADDR|PR_LASTHDR,
 	.pr_input =		pim_input,
 	.pr_output =		rip_output,
 	.pr_ctloutput =		rip_ctloutput,
 	.pr_usrreqs =		&rip_usrreqs
 };
 static const struct encaptab *pim_encap_cookie;
 
 static int pim_encapcheck(const struct mbuf *, int, int, void *);
 
 /*
  * Note: the PIM Register encapsulation adds the following in front of a
  * data packet:
  *
  * struct pim_encap_hdr {
  *    struct ip ip;
  *    struct pim_encap_pimhdr  pim;
  * }
  *
  */
 
 struct pim_encap_pimhdr {
 	struct pim pim;
 	uint32_t   flags;
 };
 #define		PIM_ENCAP_TTL	64
 
 static struct ip pim_encap_iphdr = {
 #if BYTE_ORDER == LITTLE_ENDIAN
 	sizeof(struct ip) >> 2,
 	IPVERSION,
 #else
 	IPVERSION,
 	sizeof(struct ip) >> 2,
 #endif
 	0,			/* tos */
 	sizeof(struct ip),	/* total length */
 	0,			/* id */
 	0,			/* frag offset */
 	PIM_ENCAP_TTL,
 	IPPROTO_PIM,
 	0,			/* checksum */
 };
 
 static struct pim_encap_pimhdr pim_encap_pimhdr = {
     {
 	PIM_MAKE_VT(PIM_VERSION, PIM_REGISTER), /* PIM vers and message type */
 	0,			/* reserved */
 	0,			/* checksum */
     },
     0				/* flags */
 };
 
 static VNET_DEFINE(vifi_t, reg_vif_num) = VIFI_INVALID;
 #define	V_reg_vif_num		VNET(reg_vif_num)
 static VNET_DEFINE(struct ifnet, multicast_register_if);
 #define	V_multicast_register_if	VNET(multicast_register_if)
 
 /*
  * Private variables.
  */
 
 static u_long	X_ip_mcast_src(int);
 static int	X_ip_mforward(struct ip *, struct ifnet *, struct mbuf *,
 		    struct ip_moptions *);
 static int	X_ip_mrouter_done(void);
 static int	X_ip_mrouter_get(struct socket *, struct sockopt *);
 static int	X_ip_mrouter_set(struct socket *, struct sockopt *);
 static int	X_legal_vif_num(int);
 static int	X_mrt_ioctl(u_long, caddr_t, int);
 
 static int	add_bw_upcall(struct bw_upcall *);
 static int	add_mfc(struct mfcctl2 *);
 static int	add_vif(struct vifctl *);
 static void	bw_meter_prepare_upcall(struct bw_meter *, struct timeval *);
 static void	bw_meter_process(void);
 static void	bw_meter_receive_packet(struct bw_meter *, int,
 		    struct timeval *);
 static void	bw_upcalls_send(void);
 static int	del_bw_upcall(struct bw_upcall *);
 static int	del_mfc(struct mfcctl2 *);
 static int	del_vif(vifi_t);
 static int	del_vif_locked(vifi_t);
 static void	expire_bw_meter_process(void *);
 static void	expire_bw_upcalls_send(void *);
 static void	expire_mfc(struct mfc *);
 static void	expire_upcalls(void *);
 static void	free_bw_list(struct bw_meter *);
 static int	get_sg_cnt(struct sioc_sg_req *);
 static int	get_vif_cnt(struct sioc_vif_req *);
 static void	if_detached_event(void *, struct ifnet *);
 static int	ip_mdq(struct mbuf *, struct ifnet *, struct mfc *, vifi_t);
 static int	ip_mrouter_init(struct socket *, int);
 static __inline struct mfc *
 		mfc_find(struct in_addr *, struct in_addr *);
 static void	phyint_send(struct ip *, struct vif *, struct mbuf *);
 static struct mbuf *
 		pim_register_prepare(struct ip *, struct mbuf *);
 static int	pim_register_send(struct ip *, struct vif *,
 		    struct mbuf *, struct mfc *);
 static int	pim_register_send_rp(struct ip *, struct vif *,
 		    struct mbuf *, struct mfc *);
 static int	pim_register_send_upcall(struct ip *, struct vif *,
 		    struct mbuf *, struct mfc *);
 static void	schedule_bw_meter(struct bw_meter *, struct timeval *);
 static void	send_packet(struct vif *, struct mbuf *);
 static int	set_api_config(uint32_t *);
 static int	set_assert(int);
 static int	socket_send(struct socket *, struct mbuf *,
 		    struct sockaddr_in *);
 static void	unschedule_bw_meter(struct bw_meter *);
 
 /*
  * Kernel multicast forwarding API capabilities and setup.
  * If more API capabilities are added to the kernel, they should be
  * recorded in `mrt_api_support'.
  */
 #define MRT_API_VERSION		0x0305
 
 static const int mrt_api_version = MRT_API_VERSION;
 static const uint32_t mrt_api_support = (MRT_MFC_FLAGS_DISABLE_WRONGVIF |
 					 MRT_MFC_FLAGS_BORDER_VIF |
 					 MRT_MFC_RP |
 					 MRT_MFC_BW_UPCALL);
 static VNET_DEFINE(uint32_t, mrt_api_config);
 #define	V_mrt_api_config	VNET(mrt_api_config)
 static VNET_DEFINE(int, pim_assert_enabled);
 #define	V_pim_assert_enabled	VNET(pim_assert_enabled)
 static struct timeval pim_assert_interval = { 3, 0 };	/* Rate limit */
 
 /*
  * Find a route for a given origin IP address and multicast group address.
  * Statistics must be updated by the caller.
  */
 static __inline struct mfc *
 mfc_find(struct in_addr *o, struct in_addr *g)
 {
 	struct mfc *rt;
 
 	MFC_LOCK_ASSERT();
 
 	LIST_FOREACH(rt, &V_mfchashtbl[MFCHASH(*o, *g)], mfc_hash) {
 		if (in_hosteq(rt->mfc_origin, *o) &&
 		    in_hosteq(rt->mfc_mcastgrp, *g) &&
 		    TAILQ_EMPTY(&rt->mfc_stall))
 			break;
 	}
 
 	return (rt);
 }
 
 /*
  * Handle MRT setsockopt commands to modify the multicast forwarding tables.
  */
 static int
 X_ip_mrouter_set(struct socket *so, struct sockopt *sopt)
 {
     int	error, optval;
     vifi_t	vifi;
     struct	vifctl vifc;
     struct	mfcctl2 mfc;
     struct	bw_upcall bw_upcall;
     uint32_t	i;
 
     if (so != V_ip_mrouter && sopt->sopt_name != MRT_INIT)
 	return EPERM;
 
     error = 0;
     switch (sopt->sopt_name) {
     case MRT_INIT:
 	error = sooptcopyin(sopt, &optval, sizeof optval, sizeof optval);
 	if (error)
 	    break;
 	error = ip_mrouter_init(so, optval);
 	break;
 
     case MRT_DONE:
 	error = ip_mrouter_done();
 	break;
 
     case MRT_ADD_VIF:
 	error = sooptcopyin(sopt, &vifc, sizeof vifc, sizeof vifc);
 	if (error)
 	    break;
 	error = add_vif(&vifc);
 	break;
 
     case MRT_DEL_VIF:
 	error = sooptcopyin(sopt, &vifi, sizeof vifi, sizeof vifi);
 	if (error)
 	    break;
 	error = del_vif(vifi);
 	break;
 
     case MRT_ADD_MFC:
     case MRT_DEL_MFC:
 	/*
 	 * select data size depending on API version.
 	 */
 	if (sopt->sopt_name == MRT_ADD_MFC &&
 		V_mrt_api_config & MRT_API_FLAGS_ALL) {
 	    error = sooptcopyin(sopt, &mfc, sizeof(struct mfcctl2),
 				sizeof(struct mfcctl2));
 	} else {
 	    error = sooptcopyin(sopt, &mfc, sizeof(struct mfcctl),
 				sizeof(struct mfcctl));
 	    bzero((caddr_t)&mfc + sizeof(struct mfcctl),
 			sizeof(mfc) - sizeof(struct mfcctl));
 	}
 	if (error)
 	    break;
 	if (sopt->sopt_name == MRT_ADD_MFC)
 	    error = add_mfc(&mfc);
 	else
 	    error = del_mfc(&mfc);
 	break;
 
     case MRT_ASSERT:
 	error = sooptcopyin(sopt, &optval, sizeof optval, sizeof optval);
 	if (error)
 	    break;
 	set_assert(optval);
 	break;
 
     case MRT_API_CONFIG:
 	error = sooptcopyin(sopt, &i, sizeof i, sizeof i);
 	if (!error)
 	    error = set_api_config(&i);
 	if (!error)
 	    error = sooptcopyout(sopt, &i, sizeof i);
 	break;
 
     case MRT_ADD_BW_UPCALL:
     case MRT_DEL_BW_UPCALL:
 	error = sooptcopyin(sopt, &bw_upcall, sizeof bw_upcall,
 				sizeof bw_upcall);
 	if (error)
 	    break;
 	if (sopt->sopt_name == MRT_ADD_BW_UPCALL)
 	    error = add_bw_upcall(&bw_upcall);
 	else
 	    error = del_bw_upcall(&bw_upcall);
 	break;
 
     default:
 	error = EOPNOTSUPP;
 	break;
     }
     return error;
 }
 
 /*
  * Handle MRT getsockopt commands
  */
 static int
 X_ip_mrouter_get(struct socket *so, struct sockopt *sopt)
 {
     int error;
 
     switch (sopt->sopt_name) {
     case MRT_VERSION:
 	error = sooptcopyout(sopt, &mrt_api_version, sizeof mrt_api_version);
 	break;
 
     case MRT_ASSERT:
 	error = sooptcopyout(sopt, &V_pim_assert_enabled,
 	    sizeof V_pim_assert_enabled);
 	break;
 
     case MRT_API_SUPPORT:
 	error = sooptcopyout(sopt, &mrt_api_support, sizeof mrt_api_support);
 	break;
 
     case MRT_API_CONFIG:
 	error = sooptcopyout(sopt, &V_mrt_api_config, sizeof V_mrt_api_config);
 	break;
 
     default:
 	error = EOPNOTSUPP;
 	break;
     }
     return error;
 }
 
 /*
  * Handle ioctl commands to obtain information from the cache
  */
 static int
 X_mrt_ioctl(u_long cmd, caddr_t data, int fibnum __unused)
 {
     int error = 0;
 
     /*
      * Currently the only function calling this ioctl routine is rtioctl().
      * Typically, only root can create the raw socket in order to execute
      * this ioctl method, however the request might be coming from a prison
      */
     error = priv_check(curthread, PRIV_NETINET_MROUTE);
     if (error)
 	return (error);
     switch (cmd) {
     case (SIOCGETVIFCNT):
 	error = get_vif_cnt((struct sioc_vif_req *)data);
 	break;
 
     case (SIOCGETSGCNT):
 	error = get_sg_cnt((struct sioc_sg_req *)data);
 	break;
 
     default:
 	error = EINVAL;
 	break;
     }
     return error;
 }
 
 /*
  * returns the packet, byte, rpf-failure count for the source group provided
  */
 static int
 get_sg_cnt(struct sioc_sg_req *req)
 {
     struct mfc *rt;
 
     MFC_LOCK();
     rt = mfc_find(&req->src, &req->grp);
     if (rt == NULL) {
 	MFC_UNLOCK();
 	req->pktcnt = req->bytecnt = req->wrong_if = 0xffffffff;
 	return EADDRNOTAVAIL;
     }
     req->pktcnt = rt->mfc_pkt_cnt;
     req->bytecnt = rt->mfc_byte_cnt;
     req->wrong_if = rt->mfc_wrong_if;
     MFC_UNLOCK();
     return 0;
 }
 
 /*
  * returns the input and output packet and byte counts on the vif provided
  */
 static int
 get_vif_cnt(struct sioc_vif_req *req)
 {
     vifi_t vifi = req->vifi;
 
     VIF_LOCK();
     if (vifi >= V_numvifs) {
 	VIF_UNLOCK();
 	return EINVAL;
     }
 
     req->icount = V_viftable[vifi].v_pkt_in;
     req->ocount = V_viftable[vifi].v_pkt_out;
     req->ibytes = V_viftable[vifi].v_bytes_in;
     req->obytes = V_viftable[vifi].v_bytes_out;
     VIF_UNLOCK();
 
     return 0;
 }
 
 static void
 if_detached_event(void *arg __unused, struct ifnet *ifp)
 {
     vifi_t vifi;
     u_long i;
 
     MROUTER_LOCK();
 
     if (V_ip_mrouter == NULL) {
 	MROUTER_UNLOCK();
 	return;
     }
 
     VIF_LOCK();
     MFC_LOCK();
 
     /*
      * Tear down multicast forwarder state associated with this ifnet.
      * 1. Walk the vif list, matching vifs against this ifnet.
      * 2. Walk the multicast forwarding cache (mfc) looking for
      *    inner matches with this vif's index.
      * 3. Expire any matching multicast forwarding cache entries.
      * 4. Free vif state. This should disable ALLMULTI on the interface.
      */
     for (vifi = 0; vifi < V_numvifs; vifi++) {
 	if (V_viftable[vifi].v_ifp != ifp)
 		continue;
 	for (i = 0; i < mfchashsize; i++) {
 		struct mfc *rt, *nrt;
 
 		LIST_FOREACH_SAFE(rt, &V_mfchashtbl[i], mfc_hash, nrt) {
 			if (rt->mfc_parent == vifi) {
 				expire_mfc(rt);
 			}
 		}
 	}
 	del_vif_locked(vifi);
     }
 
     MFC_UNLOCK();
     VIF_UNLOCK();
 
     MROUTER_UNLOCK();
 }
                         
 /*
  * Enable multicast forwarding.
  */
 static int
 ip_mrouter_init(struct socket *so, int version)
 {
 
     CTR3(KTR_IPMF, "%s: so_type %d, pr_protocol %d", __func__,
         so->so_type, so->so_proto->pr_protocol);
 
     if (so->so_type != SOCK_RAW || so->so_proto->pr_protocol != IPPROTO_IGMP)
 	return EOPNOTSUPP;
 
     if (version != 1)
 	return ENOPROTOOPT;
 
     MROUTER_LOCK();
 
     if (ip_mrouter_unloading) {
 	MROUTER_UNLOCK();
 	return ENOPROTOOPT;
     }
 
     if (V_ip_mrouter != NULL) {
 	MROUTER_UNLOCK();
 	return EADDRINUSE;
     }
 
     V_mfchashtbl = hashinit_flags(mfchashsize, M_MRTABLE, &V_mfchash,
 	HASH_NOWAIT);
 
     callout_reset(&V_expire_upcalls_ch, EXPIRE_TIMEOUT, expire_upcalls,
 	curvnet);
     callout_reset(&V_bw_upcalls_ch, BW_UPCALLS_PERIOD, expire_bw_upcalls_send,
 	curvnet);
     callout_reset(&V_bw_meter_ch, BW_METER_PERIOD, expire_bw_meter_process,
 	curvnet);
 
     V_ip_mrouter = so;
     ip_mrouter_cnt++;
 
     MROUTER_UNLOCK();
 
     CTR1(KTR_IPMF, "%s: done", __func__);
 
     return 0;
 }
 
 /*
  * Disable multicast forwarding.
  */
 static int
 X_ip_mrouter_done(void)
 {
     struct ifnet *ifp;
     u_long i;
     vifi_t vifi;
 
     MROUTER_LOCK();
 
     if (V_ip_mrouter == NULL) {
 	MROUTER_UNLOCK();
 	return EINVAL;
     }
 
     /*
      * Detach/disable hooks to the reset of the system.
      */
     V_ip_mrouter = NULL;
     ip_mrouter_cnt--;
     V_mrt_api_config = 0;
 
     VIF_LOCK();
 
     /*
      * For each phyint in use, disable promiscuous reception of all IP
      * multicasts.
      */
     for (vifi = 0; vifi < V_numvifs; vifi++) {
 	if (!in_nullhost(V_viftable[vifi].v_lcl_addr) &&
 		!(V_viftable[vifi].v_flags & (VIFF_TUNNEL | VIFF_REGISTER))) {
 	    ifp = V_viftable[vifi].v_ifp;
 	    if_allmulti(ifp, 0);
 	}
     }
     bzero((caddr_t)V_viftable, sizeof(V_viftable));
     V_numvifs = 0;
     V_pim_assert_enabled = 0;
     
     VIF_UNLOCK();
 
     callout_stop(&V_expire_upcalls_ch);
     callout_stop(&V_bw_upcalls_ch);
     callout_stop(&V_bw_meter_ch);
 
     MFC_LOCK();
 
     /*
      * Free all multicast forwarding cache entries.
      * Do not use hashdestroy(), as we must perform other cleanup.
      */
     for (i = 0; i < mfchashsize; i++) {
 	struct mfc *rt, *nrt;
 
 	LIST_FOREACH_SAFE(rt, &V_mfchashtbl[i], mfc_hash, nrt) {
 		expire_mfc(rt);
 	}
     }
     free(V_mfchashtbl, M_MRTABLE);
     V_mfchashtbl = NULL;
 
     bzero(V_nexpire, sizeof(V_nexpire[0]) * mfchashsize);
 
     V_bw_upcalls_n = 0;
     bzero(V_bw_meter_timers, sizeof(V_bw_meter_timers));
 
     MFC_UNLOCK();
 
     V_reg_vif_num = VIFI_INVALID;
 
     MROUTER_UNLOCK();
 
     CTR1(KTR_IPMF, "%s: done", __func__);
 
     return 0;
 }
 
 /*
  * Set PIM assert processing global
  */
 static int
 set_assert(int i)
 {
     if ((i != 1) && (i != 0))
 	return EINVAL;
 
     V_pim_assert_enabled = i;
 
     return 0;
 }
 
 /*
  * Configure API capabilities
  */
 int
 set_api_config(uint32_t *apival)
 {
     u_long i;
 
     /*
      * We can set the API capabilities only if it is the first operation
      * after MRT_INIT. I.e.:
      *  - there are no vifs installed
      *  - pim_assert is not enabled
      *  - the MFC table is empty
      */
     if (V_numvifs > 0) {
 	*apival = 0;
 	return EPERM;
     }
     if (V_pim_assert_enabled) {
 	*apival = 0;
 	return EPERM;
     }
 
     MFC_LOCK();
 
     for (i = 0; i < mfchashsize; i++) {
 	if (LIST_FIRST(&V_mfchashtbl[i]) != NULL) {
 	    MFC_UNLOCK();
 	    *apival = 0;
 	    return EPERM;
 	}
     }
 
     MFC_UNLOCK();
 
     V_mrt_api_config = *apival & mrt_api_support;
     *apival = V_mrt_api_config;
 
     return 0;
 }
 
 /*
  * Add a vif to the vif table
  */
 static int
 add_vif(struct vifctl *vifcp)
 {
     struct vif *vifp = V_viftable + vifcp->vifc_vifi;
     struct sockaddr_in sin = {sizeof sin, AF_INET};
     struct ifaddr *ifa;
     struct ifnet *ifp;
     int error;
 
     VIF_LOCK();
     if (vifcp->vifc_vifi >= MAXVIFS) {
 	VIF_UNLOCK();
 	return EINVAL;
     }
     /* rate limiting is no longer supported by this code */
     if (vifcp->vifc_rate_limit != 0) {
 	log(LOG_ERR, "rate limiting is no longer supported\n");
 	VIF_UNLOCK();
 	return EINVAL;
     }
     if (!in_nullhost(vifp->v_lcl_addr)) {
 	VIF_UNLOCK();
 	return EADDRINUSE;
     }
     if (in_nullhost(vifcp->vifc_lcl_addr)) {
 	VIF_UNLOCK();
 	return EADDRNOTAVAIL;
     }
 
     /* Find the interface with an address in AF_INET family */
     if (vifcp->vifc_flags & VIFF_REGISTER) {
 	/*
 	 * XXX: Because VIFF_REGISTER does not really need a valid
 	 * local interface (e.g. it could be 127.0.0.2), we don't
 	 * check its address.
 	 */
 	ifp = NULL;
     } else {
 	sin.sin_addr = vifcp->vifc_lcl_addr;
 	ifa = ifa_ifwithaddr((struct sockaddr *)&sin);
 	if (ifa == NULL) {
 	    VIF_UNLOCK();
 	    return EADDRNOTAVAIL;
 	}
 	ifp = ifa->ifa_ifp;
 	ifa_free(ifa);
     }
 
     if ((vifcp->vifc_flags & VIFF_TUNNEL) != 0) {
 	CTR1(KTR_IPMF, "%s: tunnels are no longer supported", __func__);
 	VIF_UNLOCK();
 	return EOPNOTSUPP;
     } else if (vifcp->vifc_flags & VIFF_REGISTER) {
 	ifp = &V_multicast_register_if;
 	CTR2(KTR_IPMF, "%s: add register vif for ifp %p", __func__, ifp);
 	if (V_reg_vif_num == VIFI_INVALID) {
 	    if_initname(&V_multicast_register_if, "register_vif", 0);
 	    V_multicast_register_if.if_flags = IFF_LOOPBACK;
 	    V_reg_vif_num = vifcp->vifc_vifi;
 	}
     } else {		/* Make sure the interface supports multicast */
 	if ((ifp->if_flags & IFF_MULTICAST) == 0) {
 	    VIF_UNLOCK();
 	    return EOPNOTSUPP;
 	}
 
 	/* Enable promiscuous reception of all IP multicasts from the if */
 	error = if_allmulti(ifp, 1);
 	if (error) {
 	    VIF_UNLOCK();
 	    return error;
 	}
     }
 
     vifp->v_flags     = vifcp->vifc_flags;
     vifp->v_threshold = vifcp->vifc_threshold;
     vifp->v_lcl_addr  = vifcp->vifc_lcl_addr;
     vifp->v_rmt_addr  = vifcp->vifc_rmt_addr;
     vifp->v_ifp       = ifp;
     /* initialize per vif pkt counters */
     vifp->v_pkt_in    = 0;
     vifp->v_pkt_out   = 0;
     vifp->v_bytes_in  = 0;
     vifp->v_bytes_out = 0;
 
     /* Adjust numvifs up if the vifi is higher than numvifs */
     if (V_numvifs <= vifcp->vifc_vifi)
 	V_numvifs = vifcp->vifc_vifi + 1;
 
     VIF_UNLOCK();
 
     CTR4(KTR_IPMF, "%s: add vif %d laddr %s thresh %x", __func__,
 	(int)vifcp->vifc_vifi, inet_ntoa(vifcp->vifc_lcl_addr),
 	(int)vifcp->vifc_threshold);
 
     return 0;
 }
 
 /*
  * Delete a vif from the vif table
  */
 static int
 del_vif_locked(vifi_t vifi)
 {
     struct vif *vifp;
 
     VIF_LOCK_ASSERT();
 
     if (vifi >= V_numvifs) {
 	return EINVAL;
     }
     vifp = &V_viftable[vifi];
     if (in_nullhost(vifp->v_lcl_addr)) {
 	return EADDRNOTAVAIL;
     }
 
     if (!(vifp->v_flags & (VIFF_TUNNEL | VIFF_REGISTER)))
 	if_allmulti(vifp->v_ifp, 0);
 
     if (vifp->v_flags & VIFF_REGISTER)
 	V_reg_vif_num = VIFI_INVALID;
 
     bzero((caddr_t)vifp, sizeof (*vifp));
 
     CTR2(KTR_IPMF, "%s: delete vif %d", __func__, (int)vifi);
 
     /* Adjust numvifs down */
     for (vifi = V_numvifs; vifi > 0; vifi--)
 	if (!in_nullhost(V_viftable[vifi-1].v_lcl_addr))
 	    break;
     V_numvifs = vifi;
 
     return 0;
 }
 
 static int
 del_vif(vifi_t vifi)
 {
     int cc;
 
     VIF_LOCK();
     cc = del_vif_locked(vifi);
     VIF_UNLOCK();
 
     return cc;
 }
 
 /*
  * update an mfc entry without resetting counters and S,G addresses.
  */
 static void
 update_mfc_params(struct mfc *rt, struct mfcctl2 *mfccp)
 {
     int i;
 
     rt->mfc_parent = mfccp->mfcc_parent;
     for (i = 0; i < V_numvifs; i++) {
 	rt->mfc_ttls[i] = mfccp->mfcc_ttls[i];
 	rt->mfc_flags[i] = mfccp->mfcc_flags[i] & V_mrt_api_config &
 	    MRT_MFC_FLAGS_ALL;
     }
     /* set the RP address */
     if (V_mrt_api_config & MRT_MFC_RP)
 	rt->mfc_rp = mfccp->mfcc_rp;
     else
 	rt->mfc_rp.s_addr = INADDR_ANY;
 }
 
 /*
  * fully initialize an mfc entry from the parameter.
  */
 static void
 init_mfc_params(struct mfc *rt, struct mfcctl2 *mfccp)
 {
     rt->mfc_origin     = mfccp->mfcc_origin;
     rt->mfc_mcastgrp   = mfccp->mfcc_mcastgrp;
 
     update_mfc_params(rt, mfccp);
 
     /* initialize pkt counters per src-grp */
     rt->mfc_pkt_cnt    = 0;
     rt->mfc_byte_cnt   = 0;
     rt->mfc_wrong_if   = 0;
     timevalclear(&rt->mfc_last_assert);
 }
 
 static void
 expire_mfc(struct mfc *rt)
 {
 	struct rtdetq *rte, *nrte;
 
 	MFC_LOCK_ASSERT();
 
 	free_bw_list(rt->mfc_bw_meter);
 
 	TAILQ_FOREACH_SAFE(rte, &rt->mfc_stall, rte_link, nrte) {
 		m_freem(rte->m);
 		TAILQ_REMOVE(&rt->mfc_stall, rte, rte_link);
 		free(rte, M_MRTABLE);
 	}
 
 	LIST_REMOVE(rt, mfc_hash);
 	free(rt, M_MRTABLE);
 }
 
 /*
  * Add an mfc entry
  */
 static int
 add_mfc(struct mfcctl2 *mfccp)
 {
     struct mfc *rt;
     struct rtdetq *rte, *nrte;
     u_long hash = 0;
     u_short nstl;
 
     VIF_LOCK();
     MFC_LOCK();
 
     rt = mfc_find(&mfccp->mfcc_origin, &mfccp->mfcc_mcastgrp);
 
     /* If an entry already exists, just update the fields */
     if (rt) {
 	CTR4(KTR_IPMF, "%s: update mfc orig %s group %lx parent %x",
 	    __func__, inet_ntoa(mfccp->mfcc_origin),
 	    (u_long)ntohl(mfccp->mfcc_mcastgrp.s_addr),
 	    mfccp->mfcc_parent);
 	update_mfc_params(rt, mfccp);
 	MFC_UNLOCK();
 	VIF_UNLOCK();
 	return (0);
     }
 
     /*
      * Find the entry for which the upcall was made and update
      */
     nstl = 0;
     hash = MFCHASH(mfccp->mfcc_origin, mfccp->mfcc_mcastgrp);
     LIST_FOREACH(rt, &V_mfchashtbl[hash], mfc_hash) {
 	if (in_hosteq(rt->mfc_origin, mfccp->mfcc_origin) &&
 	    in_hosteq(rt->mfc_mcastgrp, mfccp->mfcc_mcastgrp) &&
 	    !TAILQ_EMPTY(&rt->mfc_stall)) {
 		CTR5(KTR_IPMF,
 		    "%s: add mfc orig %s group %lx parent %x qh %p",
 		    __func__, inet_ntoa(mfccp->mfcc_origin),
 		    (u_long)ntohl(mfccp->mfcc_mcastgrp.s_addr),
 		    mfccp->mfcc_parent,
 		    TAILQ_FIRST(&rt->mfc_stall));
 		if (nstl++)
 			CTR1(KTR_IPMF, "%s: multiple matches", __func__);
 
 		init_mfc_params(rt, mfccp);
 		rt->mfc_expire = 0;	/* Don't clean this guy up */
 		V_nexpire[hash]--;
 
 		/* Free queued packets, but attempt to forward them first. */
 		TAILQ_FOREACH_SAFE(rte, &rt->mfc_stall, rte_link, nrte) {
 			if (rte->ifp != NULL)
 				ip_mdq(rte->m, rte->ifp, rt, -1);
 			m_freem(rte->m);
 			TAILQ_REMOVE(&rt->mfc_stall, rte, rte_link);
 			rt->mfc_nstall--;
 			free(rte, M_MRTABLE);
 		}
 	}
     }
 
     /*
      * It is possible that an entry is being inserted without an upcall
      */
     if (nstl == 0) {
 	CTR1(KTR_IPMF, "%s: adding mfc w/o upcall", __func__);
 	LIST_FOREACH(rt, &V_mfchashtbl[hash], mfc_hash) {
 		if (in_hosteq(rt->mfc_origin, mfccp->mfcc_origin) &&
 		    in_hosteq(rt->mfc_mcastgrp, mfccp->mfcc_mcastgrp)) {
 			init_mfc_params(rt, mfccp);
 			if (rt->mfc_expire)
 			    V_nexpire[hash]--;
 			rt->mfc_expire = 0;
 			break; /* XXX */
 		}
 	}
 
 	if (rt == NULL) {		/* no upcall, so make a new entry */
 	    rt = (struct mfc *)malloc(sizeof(*rt), M_MRTABLE, M_NOWAIT);
 	    if (rt == NULL) {
 		MFC_UNLOCK();
 		VIF_UNLOCK();
 		return (ENOBUFS);
 	    }
 
 	    init_mfc_params(rt, mfccp);
 	    TAILQ_INIT(&rt->mfc_stall);
 	    rt->mfc_nstall = 0;
 
 	    rt->mfc_expire     = 0;
 	    rt->mfc_bw_meter = NULL;
 
 	    /* insert new entry at head of hash chain */
 	    LIST_INSERT_HEAD(&V_mfchashtbl[hash], rt, mfc_hash);
 	}
     }
 
     MFC_UNLOCK();
     VIF_UNLOCK();
 
     return (0);
 }
 
 /*
  * Delete an mfc entry
  */
 static int
 del_mfc(struct mfcctl2 *mfccp)
 {
     struct in_addr	origin;
     struct in_addr	mcastgrp;
     struct mfc		*rt;
 
     origin = mfccp->mfcc_origin;
     mcastgrp = mfccp->mfcc_mcastgrp;
 
     CTR3(KTR_IPMF, "%s: delete mfc orig %s group %lx", __func__,
 	inet_ntoa(origin), (u_long)ntohl(mcastgrp.s_addr));
 
     MFC_LOCK();
 
     rt = mfc_find(&origin, &mcastgrp);
     if (rt == NULL) {
 	MFC_UNLOCK();
 	return EADDRNOTAVAIL;
     }
 
     /*
      * free the bw_meter entries
      */
     free_bw_list(rt->mfc_bw_meter);
     rt->mfc_bw_meter = NULL;
 
     LIST_REMOVE(rt, mfc_hash);
     free(rt, M_MRTABLE);
 
     MFC_UNLOCK();
 
     return (0);
 }
 
 /*
  * Send a message to the routing daemon on the multicast routing socket.
  */
 static int
 socket_send(struct socket *s, struct mbuf *mm, struct sockaddr_in *src)
 {
     if (s) {
 	SOCKBUF_LOCK(&s->so_rcv);
 	if (sbappendaddr_locked(&s->so_rcv, (struct sockaddr *)src, mm,
 	    NULL) != 0) {
 	    sorwakeup_locked(s);
 	    return 0;
 	}
 	SOCKBUF_UNLOCK(&s->so_rcv);
     }
     m_freem(mm);
     return -1;
 }
 
 /*
  * IP multicast forwarding function. This function assumes that the packet
  * pointed to by "ip" has arrived on (or is about to be sent to) the interface
  * pointed to by "ifp", and the packet is to be relayed to other networks
  * that have members of the packet's destination IP multicast group.
  *
  * The packet is returned unscathed to the caller, unless it is
  * erroneous, in which case a non-zero return value tells the caller to
  * discard it.
  */
 
 #define TUNNEL_LEN  12  /* # bytes of IP option for tunnel encapsulation  */
 
 static int
 X_ip_mforward(struct ip *ip, struct ifnet *ifp, struct mbuf *m,
     struct ip_moptions *imo)
 {
     struct mfc *rt;
     int error;
     vifi_t vifi;
 
     CTR3(KTR_IPMF, "ip_mforward: delete mfc orig %s group %lx ifp %p",
 	inet_ntoa(ip->ip_src), (u_long)ntohl(ip->ip_dst.s_addr), ifp);
 
     if (ip->ip_hl < (sizeof(struct ip) + TUNNEL_LEN) >> 2 ||
 		((u_char *)(ip + 1))[1] != IPOPT_LSRR ) {
 	/*
 	 * Packet arrived via a physical interface or
 	 * an encapsulated tunnel or a register_vif.
 	 */
     } else {
 	/*
 	 * Packet arrived through a source-route tunnel.
 	 * Source-route tunnels are no longer supported.
 	 */
 	return (1);
     }
 
     VIF_LOCK();
     MFC_LOCK();
     if (imo && ((vifi = imo->imo_multicast_vif) < V_numvifs)) {
 	if (ip->ip_ttl < MAXTTL)
 	    ip->ip_ttl++;	/* compensate for -1 in *_send routines */
 	error = ip_mdq(m, ifp, NULL, vifi);
 	MFC_UNLOCK();
 	VIF_UNLOCK();
 	return error;
     }
 
     /*
      * Don't forward a packet with time-to-live of zero or one,
      * or a packet destined to a local-only group.
      */
     if (ip->ip_ttl <= 1 || IN_LOCAL_GROUP(ntohl(ip->ip_dst.s_addr))) {
 	MFC_UNLOCK();
 	VIF_UNLOCK();
 	return 0;
     }
 
     /*
      * Determine forwarding vifs from the forwarding cache table
      */
     MRTSTAT_INC(mrts_mfc_lookups);
     rt = mfc_find(&ip->ip_src, &ip->ip_dst);
 
     /* Entry exists, so forward if necessary */
     if (rt != NULL) {
 	error = ip_mdq(m, ifp, rt, -1);
 	MFC_UNLOCK();
 	VIF_UNLOCK();
 	return error;
     } else {
 	/*
 	 * If we don't have a route for packet's origin,
 	 * Make a copy of the packet & send message to routing daemon
 	 */
 
 	struct mbuf *mb0;
 	struct rtdetq *rte;
 	u_long hash;
 	int hlen = ip->ip_hl << 2;
 
 	MRTSTAT_INC(mrts_mfc_misses);
 	MRTSTAT_INC(mrts_no_route);
 	CTR2(KTR_IPMF, "ip_mforward: no mfc for (%s,%lx)",
 	    inet_ntoa(ip->ip_src), (u_long)ntohl(ip->ip_dst.s_addr));
 
 	/*
 	 * Allocate mbufs early so that we don't do extra work if we are
 	 * just going to fail anyway.  Make sure to pullup the header so
 	 * that other people can't step on it.
 	 */
 	rte = (struct rtdetq *)malloc((sizeof *rte), M_MRTABLE,
 	    M_NOWAIT|M_ZERO);
 	if (rte == NULL) {
 	    MFC_UNLOCK();
 	    VIF_UNLOCK();
 	    return ENOBUFS;
 	}
 
 	mb0 = m_copypacket(m, M_NOWAIT);
 	if (mb0 && (!M_WRITABLE(mb0) || mb0->m_len < hlen))
 	    mb0 = m_pullup(mb0, hlen);
 	if (mb0 == NULL) {
 	    free(rte, M_MRTABLE);
 	    MFC_UNLOCK();
 	    VIF_UNLOCK();
 	    return ENOBUFS;
 	}
 
 	/* is there an upcall waiting for this flow ? */
 	hash = MFCHASH(ip->ip_src, ip->ip_dst);
 	LIST_FOREACH(rt, &V_mfchashtbl[hash], mfc_hash) {
 		if (in_hosteq(ip->ip_src, rt->mfc_origin) &&
 		    in_hosteq(ip->ip_dst, rt->mfc_mcastgrp) &&
 		    !TAILQ_EMPTY(&rt->mfc_stall))
 			break;
 	}
 
 	if (rt == NULL) {
 	    int i;
 	    struct igmpmsg *im;
 	    struct sockaddr_in k_igmpsrc = { sizeof k_igmpsrc, AF_INET };
 	    struct mbuf *mm;
 
 	    /*
 	     * Locate the vifi for the incoming interface for this packet.
 	     * If none found, drop packet.
 	     */
 	    for (vifi = 0; vifi < V_numvifs &&
 		    V_viftable[vifi].v_ifp != ifp; vifi++)
 		;
 	    if (vifi >= V_numvifs)	/* vif not found, drop packet */
 		goto non_fatal;
 
 	    /* no upcall, so make a new entry */
 	    rt = (struct mfc *)malloc(sizeof(*rt), M_MRTABLE, M_NOWAIT);
 	    if (rt == NULL)
 		goto fail;
 
 	    /* Make a copy of the header to send to the user level process */
 	    mm = m_copy(mb0, 0, hlen);
 	    if (mm == NULL)
 		goto fail1;
 
 	    /*
 	     * Send message to routing daemon to install
 	     * a route into the kernel table
 	     */
 
 	    im = mtod(mm, struct igmpmsg *);
 	    im->im_msgtype = IGMPMSG_NOCACHE;
 	    im->im_mbz = 0;
 	    im->im_vif = vifi;
 
 	    MRTSTAT_INC(mrts_upcalls);
 
 	    k_igmpsrc.sin_addr = ip->ip_src;
 	    if (socket_send(V_ip_mrouter, mm, &k_igmpsrc) < 0) {
 		CTR0(KTR_IPMF, "ip_mforward: socket queue full");
 		MRTSTAT_INC(mrts_upq_sockfull);
 fail1:
 		free(rt, M_MRTABLE);
 fail:
 		free(rte, M_MRTABLE);
 		m_freem(mb0);
 		MFC_UNLOCK();
 		VIF_UNLOCK();
 		return ENOBUFS;
 	    }
 
 	    /* insert new entry at head of hash chain */
 	    rt->mfc_origin.s_addr     = ip->ip_src.s_addr;
 	    rt->mfc_mcastgrp.s_addr   = ip->ip_dst.s_addr;
 	    rt->mfc_expire	      = UPCALL_EXPIRE;
 	    V_nexpire[hash]++;
 	    for (i = 0; i < V_numvifs; i++) {
 		rt->mfc_ttls[i] = 0;
 		rt->mfc_flags[i] = 0;
 	    }
 	    rt->mfc_parent = -1;
 
 	    /* clear the RP address */
 	    rt->mfc_rp.s_addr = INADDR_ANY;
 	    rt->mfc_bw_meter = NULL;
 
 	    /* initialize pkt counters per src-grp */
 	    rt->mfc_pkt_cnt = 0;
 	    rt->mfc_byte_cnt = 0;
 	    rt->mfc_wrong_if = 0;
 	    timevalclear(&rt->mfc_last_assert);
 
 	    TAILQ_INIT(&rt->mfc_stall);
 	    rt->mfc_nstall = 0;
 
 	    /* link into table */
 	    LIST_INSERT_HEAD(&V_mfchashtbl[hash], rt, mfc_hash);
 	    TAILQ_INSERT_HEAD(&rt->mfc_stall, rte, rte_link);
 	    rt->mfc_nstall++;
 
 	} else {
 	    /* determine if queue has overflowed */
 	    if (rt->mfc_nstall > MAX_UPQ) {
 		MRTSTAT_INC(mrts_upq_ovflw);
 non_fatal:
 		free(rte, M_MRTABLE);
 		m_freem(mb0);
 		MFC_UNLOCK();
 		VIF_UNLOCK();
 		return (0);
 	    }
 	    TAILQ_INSERT_TAIL(&rt->mfc_stall, rte, rte_link);
 	    rt->mfc_nstall++;
 	}
 
 	rte->m			= mb0;
 	rte->ifp		= ifp;
 
 	MFC_UNLOCK();
 	VIF_UNLOCK();
 
 	return 0;
     }
 }
 
 /*
  * Clean up the cache entry if upcall is not serviced
  */
 static void
 expire_upcalls(void *arg)
 {
     u_long i;
 
     CURVNET_SET((struct vnet *) arg);
 
     MFC_LOCK();
 
     for (i = 0; i < mfchashsize; i++) {
 	struct mfc *rt, *nrt;
 
 	if (V_nexpire[i] == 0)
 	    continue;
 
 	LIST_FOREACH_SAFE(rt, &V_mfchashtbl[i], mfc_hash, nrt) {
 		if (TAILQ_EMPTY(&rt->mfc_stall))
 			continue;
 
 		if (rt->mfc_expire == 0 || --rt->mfc_expire > 0)
 			continue;
 
 		/*
 		 * free the bw_meter entries
 		 */
 		while (rt->mfc_bw_meter != NULL) {
 		    struct bw_meter *x = rt->mfc_bw_meter;
 
 		    rt->mfc_bw_meter = x->bm_mfc_next;
 		    free(x, M_BWMETER);
 		}
 
 		MRTSTAT_INC(mrts_cache_cleanups);
 		CTR3(KTR_IPMF, "%s: expire (%lx, %lx)", __func__,
 		    (u_long)ntohl(rt->mfc_origin.s_addr),
 		    (u_long)ntohl(rt->mfc_mcastgrp.s_addr));
 
 		expire_mfc(rt);
 	    }
     }
 
     MFC_UNLOCK();
 
     callout_reset(&V_expire_upcalls_ch, EXPIRE_TIMEOUT, expire_upcalls,
 	curvnet);
 
     CURVNET_RESTORE();
 }
 
 /*
  * Packet forwarding routine once entry in the cache is made
  */
 static int
 ip_mdq(struct mbuf *m, struct ifnet *ifp, struct mfc *rt, vifi_t xmt_vif)
 {
     struct ip  *ip = mtod(m, struct ip *);
     vifi_t vifi;
     int plen = ntohs(ip->ip_len);
 
     VIF_LOCK_ASSERT();
 
     /*
      * If xmt_vif is not -1, send on only the requested vif.
      *
      * (since vifi_t is u_short, -1 becomes MAXUSHORT, which > numvifs.)
      */
     if (xmt_vif < V_numvifs) {
 	if (V_viftable[xmt_vif].v_flags & VIFF_REGISTER)
 		pim_register_send(ip, V_viftable + xmt_vif, m, rt);
 	else
 		phyint_send(ip, V_viftable + xmt_vif, m);
 	return 1;
     }
 
     /*
      * Don't forward if it didn't arrive from the parent vif for its origin.
      */
     vifi = rt->mfc_parent;
     if ((vifi >= V_numvifs) || (V_viftable[vifi].v_ifp != ifp)) {
 	CTR4(KTR_IPMF, "%s: rx on wrong ifp %p (vifi %d, v_ifp %p)",
 	    __func__, ifp, (int)vifi, V_viftable[vifi].v_ifp);
 	MRTSTAT_INC(mrts_wrong_if);
 	++rt->mfc_wrong_if;
 	/*
 	 * If we are doing PIM assert processing, send a message
 	 * to the routing daemon.
 	 *
 	 * XXX: A PIM-SM router needs the WRONGVIF detection so it
 	 * can complete the SPT switch, regardless of the type
 	 * of the iif (broadcast media, GRE tunnel, etc).
 	 */
 	if (V_pim_assert_enabled && (vifi < V_numvifs) &&
 	    V_viftable[vifi].v_ifp) {
 
 	    if (ifp == &V_multicast_register_if)
 		PIMSTAT_INC(pims_rcv_registers_wrongiif);
 
 	    /* Get vifi for the incoming packet */
 	    for (vifi = 0; vifi < V_numvifs && V_viftable[vifi].v_ifp != ifp;
 		vifi++)
 		;
 	    if (vifi >= V_numvifs)
 		return 0;	/* The iif is not found: ignore the packet. */
 
 	    if (rt->mfc_flags[vifi] & MRT_MFC_FLAGS_DISABLE_WRONGVIF)
 		return 0;	/* WRONGVIF disabled: ignore the packet */
 
 	    if (ratecheck(&rt->mfc_last_assert, &pim_assert_interval)) {
 		struct sockaddr_in k_igmpsrc = { sizeof k_igmpsrc, AF_INET };
 		struct igmpmsg *im;
 		int hlen = ip->ip_hl << 2;
 		struct mbuf *mm = m_copy(m, 0, hlen);
 
 		if (mm && (!M_WRITABLE(mm) || mm->m_len < hlen))
 		    mm = m_pullup(mm, hlen);
 		if (mm == NULL)
 		    return ENOBUFS;
 
 		im = mtod(mm, struct igmpmsg *);
 		im->im_msgtype	= IGMPMSG_WRONGVIF;
 		im->im_mbz		= 0;
 		im->im_vif		= vifi;
 
 		MRTSTAT_INC(mrts_upcalls);
 
 		k_igmpsrc.sin_addr = im->im_src;
 		if (socket_send(V_ip_mrouter, mm, &k_igmpsrc) < 0) {
 		    CTR1(KTR_IPMF, "%s: socket queue full", __func__);
 		    MRTSTAT_INC(mrts_upq_sockfull);
 		    return ENOBUFS;
 		}
 	    }
 	}
 	return 0;
     }
 
 
     /* If I sourced this packet, it counts as output, else it was input. */
     if (in_hosteq(ip->ip_src, V_viftable[vifi].v_lcl_addr)) {
 	V_viftable[vifi].v_pkt_out++;
 	V_viftable[vifi].v_bytes_out += plen;
     } else {
 	V_viftable[vifi].v_pkt_in++;
 	V_viftable[vifi].v_bytes_in += plen;
     }
     rt->mfc_pkt_cnt++;
     rt->mfc_byte_cnt += plen;
 
     /*
      * For each vif, decide if a copy of the packet should be forwarded.
      * Forward if:
      *		- the ttl exceeds the vif's threshold
      *		- there are group members downstream on interface
      */
     for (vifi = 0; vifi < V_numvifs; vifi++)
 	if ((rt->mfc_ttls[vifi] > 0) && (ip->ip_ttl > rt->mfc_ttls[vifi])) {
 	    V_viftable[vifi].v_pkt_out++;
 	    V_viftable[vifi].v_bytes_out += plen;
 	    if (V_viftable[vifi].v_flags & VIFF_REGISTER)
 		pim_register_send(ip, V_viftable + vifi, m, rt);
 	    else
 		phyint_send(ip, V_viftable + vifi, m);
 	}
 
     /*
      * Perform upcall-related bw measuring.
      */
     if (rt->mfc_bw_meter != NULL) {
 	struct bw_meter *x;
 	struct timeval now;
 
 	microtime(&now);
 	MFC_LOCK_ASSERT();
 	for (x = rt->mfc_bw_meter; x != NULL; x = x->bm_mfc_next)
 	    bw_meter_receive_packet(x, plen, &now);
     }
 
     return 0;
 }
 
 /*
  * Check if a vif number is legal/ok. This is used by in_mcast.c.
  */
 static int
 X_legal_vif_num(int vif)
 {
 	int ret;
 
 	ret = 0;
 	if (vif < 0)
 		return (ret);
 
 	VIF_LOCK();
 	if (vif < V_numvifs)
 		ret = 1;
 	VIF_UNLOCK();
 
 	return (ret);
 }
 
 /*
  * Return the local address used by this vif
  */
 static u_long
 X_ip_mcast_src(int vifi)
 {
 	in_addr_t addr;
 
 	addr = INADDR_ANY;
 	if (vifi < 0)
 		return (addr);
 
 	VIF_LOCK();
 	if (vifi < V_numvifs)
 		addr = V_viftable[vifi].v_lcl_addr.s_addr;
 	VIF_UNLOCK();
 
 	return (addr);
 }
 
 static void
 phyint_send(struct ip *ip, struct vif *vifp, struct mbuf *m)
 {
     struct mbuf *mb_copy;
     int hlen = ip->ip_hl << 2;
 
     VIF_LOCK_ASSERT();
 
     /*
      * Make a new reference to the packet; make sure that
      * the IP header is actually copied, not just referenced,
      * so that ip_output() only scribbles on the copy.
      */
     mb_copy = m_copypacket(m, M_NOWAIT);
     if (mb_copy && (!M_WRITABLE(mb_copy) || mb_copy->m_len < hlen))
 	mb_copy = m_pullup(mb_copy, hlen);
     if (mb_copy == NULL)
 	return;
 
     send_packet(vifp, mb_copy);
 }
 
 static void
 send_packet(struct vif *vifp, struct mbuf *m)
 {
 	struct ip_moptions imo;
 	struct in_multi *imm[2];
 	int error;
 
 	VIF_LOCK_ASSERT();
 
 	imo.imo_multicast_ifp  = vifp->v_ifp;
 	imo.imo_multicast_ttl  = mtod(m, struct ip *)->ip_ttl - 1;
 	imo.imo_multicast_loop = 1;
 	imo.imo_multicast_vif  = -1;
 	imo.imo_num_memberships = 0;
 	imo.imo_max_memberships = 2;
 	imo.imo_membership  = &imm[0];
 
 	/*
 	 * Re-entrancy should not be a problem here, because
 	 * the packets that we send out and are looped back at us
 	 * should get rejected because they appear to come from
 	 * the loopback interface, thus preventing looping.
 	 */
 	error = ip_output(m, NULL, NULL, IP_FORWARDING, &imo, NULL);
 	CTR3(KTR_IPMF, "%s: vif %td err %d", __func__,
 	    (ptrdiff_t)(vifp - V_viftable), error);
 }
 
 /*
  * Stubs for old RSVP socket shim implementation.
  */
 
 static int
 X_ip_rsvp_vif(struct socket *so __unused, struct sockopt *sopt __unused)
 {
 
 	return (EOPNOTSUPP);
 }
 
 static void
 X_ip_rsvp_force_done(struct socket *so __unused)
 {
 
 }
 
 static int
 X_rsvp_input(struct mbuf **mp, int *offp, int proto)
 {
 	struct mbuf *m;
 
 	m = *mp;
 	*mp = NULL;
 	if (!V_rsvp_on)
 		m_freem(m);
 	return (IPPROTO_DONE);
 }
 
 /*
  * Code for bandwidth monitors
  */
 
 /*
  * Define common interface for timeval-related methods
  */
 #define	BW_TIMEVALCMP(tvp, uvp, cmp) timevalcmp((tvp), (uvp), cmp)
 #define	BW_TIMEVALDECR(vvp, uvp) timevalsub((vvp), (uvp))
 #define	BW_TIMEVALADD(vvp, uvp) timevaladd((vvp), (uvp))
 
 static uint32_t
 compute_bw_meter_flags(struct bw_upcall *req)
 {
     uint32_t flags = 0;
 
     if (req->bu_flags & BW_UPCALL_UNIT_PACKETS)
 	flags |= BW_METER_UNIT_PACKETS;
     if (req->bu_flags & BW_UPCALL_UNIT_BYTES)
 	flags |= BW_METER_UNIT_BYTES;
     if (req->bu_flags & BW_UPCALL_GEQ)
 	flags |= BW_METER_GEQ;
     if (req->bu_flags & BW_UPCALL_LEQ)
 	flags |= BW_METER_LEQ;
 
     return flags;
 }
 
 /*
  * Add a bw_meter entry
  */
 static int
 add_bw_upcall(struct bw_upcall *req)
 {
     struct mfc *mfc;
     struct timeval delta = { BW_UPCALL_THRESHOLD_INTERVAL_MIN_SEC,
 		BW_UPCALL_THRESHOLD_INTERVAL_MIN_USEC };
     struct timeval now;
     struct bw_meter *x;
     uint32_t flags;
 
     if (!(V_mrt_api_config & MRT_MFC_BW_UPCALL))
 	return EOPNOTSUPP;
 
     /* Test if the flags are valid */
     if (!(req->bu_flags & (BW_UPCALL_UNIT_PACKETS | BW_UPCALL_UNIT_BYTES)))
 	return EINVAL;
     if (!(req->bu_flags & (BW_UPCALL_GEQ | BW_UPCALL_LEQ)))
 	return EINVAL;
     if ((req->bu_flags & (BW_UPCALL_GEQ | BW_UPCALL_LEQ))
 	    == (BW_UPCALL_GEQ | BW_UPCALL_LEQ))
 	return EINVAL;
 
     /* Test if the threshold time interval is valid */
     if (BW_TIMEVALCMP(&req->bu_threshold.b_time, &delta, <))
 	return EINVAL;
 
     flags = compute_bw_meter_flags(req);
 
     /*
      * Find if we have already same bw_meter entry
      */
     MFC_LOCK();
     mfc = mfc_find(&req->bu_src, &req->bu_dst);
     if (mfc == NULL) {
 	MFC_UNLOCK();
 	return EADDRNOTAVAIL;
     }
     for (x = mfc->mfc_bw_meter; x != NULL; x = x->bm_mfc_next) {
 	if ((BW_TIMEVALCMP(&x->bm_threshold.b_time,
 			   &req->bu_threshold.b_time, ==)) &&
 	    (x->bm_threshold.b_packets == req->bu_threshold.b_packets) &&
 	    (x->bm_threshold.b_bytes == req->bu_threshold.b_bytes) &&
 	    (x->bm_flags & BW_METER_USER_FLAGS) == flags)  {
 	    MFC_UNLOCK();
 	    return 0;		/* XXX Already installed */
 	}
     }
 
     /* Allocate the new bw_meter entry */
     x = (struct bw_meter *)malloc(sizeof(*x), M_BWMETER, M_NOWAIT);
     if (x == NULL) {
 	MFC_UNLOCK();
 	return ENOBUFS;
     }
 
     /* Set the new bw_meter entry */
     x->bm_threshold.b_time = req->bu_threshold.b_time;
     microtime(&now);
     x->bm_start_time = now;
     x->bm_threshold.b_packets = req->bu_threshold.b_packets;
     x->bm_threshold.b_bytes = req->bu_threshold.b_bytes;
     x->bm_measured.b_packets = 0;
     x->bm_measured.b_bytes = 0;
     x->bm_flags = flags;
     x->bm_time_next = NULL;
     x->bm_time_hash = BW_METER_BUCKETS;
 
     /* Add the new bw_meter entry to the front of entries for this MFC */
     x->bm_mfc = mfc;
     x->bm_mfc_next = mfc->mfc_bw_meter;
     mfc->mfc_bw_meter = x;
     schedule_bw_meter(x, &now);
     MFC_UNLOCK();
 
     return 0;
 }
 
 static void
 free_bw_list(struct bw_meter *list)
 {
     while (list != NULL) {
 	struct bw_meter *x = list;
 
 	list = list->bm_mfc_next;
 	unschedule_bw_meter(x);
 	free(x, M_BWMETER);
     }
 }
 
 /*
  * Delete one or multiple bw_meter entries
  */
 static int
 del_bw_upcall(struct bw_upcall *req)
 {
     struct mfc *mfc;
     struct bw_meter *x;
 
     if (!(V_mrt_api_config & MRT_MFC_BW_UPCALL))
 	return EOPNOTSUPP;
 
     MFC_LOCK();
 
     /* Find the corresponding MFC entry */
     mfc = mfc_find(&req->bu_src, &req->bu_dst);
     if (mfc == NULL) {
 	MFC_UNLOCK();
 	return EADDRNOTAVAIL;
     } else if (req->bu_flags & BW_UPCALL_DELETE_ALL) {
 	/*
 	 * Delete all bw_meter entries for this mfc
 	 */
 	struct bw_meter *list;
 
 	list = mfc->mfc_bw_meter;
 	mfc->mfc_bw_meter = NULL;
 	free_bw_list(list);
 	MFC_UNLOCK();
 	return 0;
     } else {			/* Delete a single bw_meter entry */
 	struct bw_meter *prev;
 	uint32_t flags = 0;
 
 	flags = compute_bw_meter_flags(req);
 
 	/* Find the bw_meter entry to delete */
 	for (prev = NULL, x = mfc->mfc_bw_meter; x != NULL;
 	     prev = x, x = x->bm_mfc_next) {
 	    if ((BW_TIMEVALCMP(&x->bm_threshold.b_time,
 			       &req->bu_threshold.b_time, ==)) &&
 		(x->bm_threshold.b_packets == req->bu_threshold.b_packets) &&
 		(x->bm_threshold.b_bytes == req->bu_threshold.b_bytes) &&
 		(x->bm_flags & BW_METER_USER_FLAGS) == flags)
 		break;
 	}
 	if (x != NULL) { /* Delete entry from the list for this MFC */
 	    if (prev != NULL)
 		prev->bm_mfc_next = x->bm_mfc_next;	/* remove from middle*/
 	    else
 		x->bm_mfc->mfc_bw_meter = x->bm_mfc_next;/* new head of list */
 
 	    unschedule_bw_meter(x);
 	    MFC_UNLOCK();
 	    /* Free the bw_meter entry */
 	    free(x, M_BWMETER);
 	    return 0;
 	} else {
 	    MFC_UNLOCK();
 	    return EINVAL;
 	}
     }
     /* NOTREACHED */
 }
 
 /*
  * Perform bandwidth measurement processing that may result in an upcall
  */
 static void
 bw_meter_receive_packet(struct bw_meter *x, int plen, struct timeval *nowp)
 {
     struct timeval delta;
 
     MFC_LOCK_ASSERT();
 
     delta = *nowp;
     BW_TIMEVALDECR(&delta, &x->bm_start_time);
 
     if (x->bm_flags & BW_METER_GEQ) {
 	/*
 	 * Processing for ">=" type of bw_meter entry
 	 */
 	if (BW_TIMEVALCMP(&delta, &x->bm_threshold.b_time, >)) {
 	    /* Reset the bw_meter entry */
 	    x->bm_start_time = *nowp;
 	    x->bm_measured.b_packets = 0;
 	    x->bm_measured.b_bytes = 0;
 	    x->bm_flags &= ~BW_METER_UPCALL_DELIVERED;
 	}
 
 	/* Record that a packet is received */
 	x->bm_measured.b_packets++;
 	x->bm_measured.b_bytes += plen;
 
 	/*
 	 * Test if we should deliver an upcall
 	 */
 	if (!(x->bm_flags & BW_METER_UPCALL_DELIVERED)) {
 	    if (((x->bm_flags & BW_METER_UNIT_PACKETS) &&
 		 (x->bm_measured.b_packets >= x->bm_threshold.b_packets)) ||
 		((x->bm_flags & BW_METER_UNIT_BYTES) &&
 		 (x->bm_measured.b_bytes >= x->bm_threshold.b_bytes))) {
 		/* Prepare an upcall for delivery */
 		bw_meter_prepare_upcall(x, nowp);
 		x->bm_flags |= BW_METER_UPCALL_DELIVERED;
 	    }
 	}
     } else if (x->bm_flags & BW_METER_LEQ) {
 	/*
 	 * Processing for "<=" type of bw_meter entry
 	 */
 	if (BW_TIMEVALCMP(&delta, &x->bm_threshold.b_time, >)) {
 	    /*
 	     * We are behind time with the multicast forwarding table
 	     * scanning for "<=" type of bw_meter entries, so test now
 	     * if we should deliver an upcall.
 	     */
 	    if (((x->bm_flags & BW_METER_UNIT_PACKETS) &&
 		 (x->bm_measured.b_packets <= x->bm_threshold.b_packets)) ||
 		((x->bm_flags & BW_METER_UNIT_BYTES) &&
 		 (x->bm_measured.b_bytes <= x->bm_threshold.b_bytes))) {
 		/* Prepare an upcall for delivery */
 		bw_meter_prepare_upcall(x, nowp);
 	    }
 	    /* Reschedule the bw_meter entry */
 	    unschedule_bw_meter(x);
 	    schedule_bw_meter(x, nowp);
 	}
 
 	/* Record that a packet is received */
 	x->bm_measured.b_packets++;
 	x->bm_measured.b_bytes += plen;
 
 	/*
 	 * Test if we should restart the measuring interval
 	 */
 	if ((x->bm_flags & BW_METER_UNIT_PACKETS &&
 	     x->bm_measured.b_packets <= x->bm_threshold.b_packets) ||
 	    (x->bm_flags & BW_METER_UNIT_BYTES &&
 	     x->bm_measured.b_bytes <= x->bm_threshold.b_bytes)) {
 	    /* Don't restart the measuring interval */
 	} else {
 	    /* Do restart the measuring interval */
 	    /*
 	     * XXX: note that we don't unschedule and schedule, because this
 	     * might be too much overhead per packet. Instead, when we process
 	     * all entries for a given timer hash bin, we check whether it is
 	     * really a timeout. If not, we reschedule at that time.
 	     */
 	    x->bm_start_time = *nowp;
 	    x->bm_measured.b_packets = 0;
 	    x->bm_measured.b_bytes = 0;
 	    x->bm_flags &= ~BW_METER_UPCALL_DELIVERED;
 	}
     }
 }
 
 /*
  * Prepare a bandwidth-related upcall
  */
 static void
 bw_meter_prepare_upcall(struct bw_meter *x, struct timeval *nowp)
 {
     struct timeval delta;
     struct bw_upcall *u;
 
     MFC_LOCK_ASSERT();
 
     /*
      * Compute the measured time interval
      */
     delta = *nowp;
     BW_TIMEVALDECR(&delta, &x->bm_start_time);
 
     /*
      * If there are too many pending upcalls, deliver them now
      */
     if (V_bw_upcalls_n >= BW_UPCALLS_MAX)
 	bw_upcalls_send();
 
     /*
      * Set the bw_upcall entry
      */
     u = &V_bw_upcalls[V_bw_upcalls_n++];
     u->bu_src = x->bm_mfc->mfc_origin;
     u->bu_dst = x->bm_mfc->mfc_mcastgrp;
     u->bu_threshold.b_time = x->bm_threshold.b_time;
     u->bu_threshold.b_packets = x->bm_threshold.b_packets;
     u->bu_threshold.b_bytes = x->bm_threshold.b_bytes;
     u->bu_measured.b_time = delta;
     u->bu_measured.b_packets = x->bm_measured.b_packets;
     u->bu_measured.b_bytes = x->bm_measured.b_bytes;
     u->bu_flags = 0;
     if (x->bm_flags & BW_METER_UNIT_PACKETS)
 	u->bu_flags |= BW_UPCALL_UNIT_PACKETS;
     if (x->bm_flags & BW_METER_UNIT_BYTES)
 	u->bu_flags |= BW_UPCALL_UNIT_BYTES;
     if (x->bm_flags & BW_METER_GEQ)
 	u->bu_flags |= BW_UPCALL_GEQ;
     if (x->bm_flags & BW_METER_LEQ)
 	u->bu_flags |= BW_UPCALL_LEQ;
 }
 
 /*
  * Send the pending bandwidth-related upcalls
  */
 static void
 bw_upcalls_send(void)
 {
     struct mbuf *m;
     int len = V_bw_upcalls_n * sizeof(V_bw_upcalls[0]);
     struct sockaddr_in k_igmpsrc = { sizeof k_igmpsrc, AF_INET };
     static struct igmpmsg igmpmsg = { 0,		/* unused1 */
 				      0,		/* unused2 */
 				      IGMPMSG_BW_UPCALL,/* im_msgtype */
 				      0,		/* im_mbz  */
 				      0,		/* im_vif  */
 				      0,		/* unused3 */
 				      { 0 },		/* im_src  */
 				      { 0 } };		/* im_dst  */
 
     MFC_LOCK_ASSERT();
 
     if (V_bw_upcalls_n == 0)
 	return;			/* No pending upcalls */
 
     V_bw_upcalls_n = 0;
 
     /*
      * Allocate a new mbuf, initialize it with the header and
      * the payload for the pending calls.
      */
     m = m_gethdr(M_NOWAIT, MT_DATA);
     if (m == NULL) {
 	log(LOG_WARNING, "bw_upcalls_send: cannot allocate mbuf\n");
 	return;
     }
 
     m_copyback(m, 0, sizeof(struct igmpmsg), (caddr_t)&igmpmsg);
     m_copyback(m, sizeof(struct igmpmsg), len, (caddr_t)&V_bw_upcalls[0]);
 
     /*
      * Send the upcalls
      * XXX do we need to set the address in k_igmpsrc ?
      */
     MRTSTAT_INC(mrts_upcalls);
     if (socket_send(V_ip_mrouter, m, &k_igmpsrc) < 0) {
 	log(LOG_WARNING, "bw_upcalls_send: ip_mrouter socket queue full\n");
 	MRTSTAT_INC(mrts_upq_sockfull);
     }
 }
 
 /*
  * Compute the timeout hash value for the bw_meter entries
  */
 #define	BW_METER_TIMEHASH(bw_meter, hash)				\
     do {								\
 	struct timeval next_timeval = (bw_meter)->bm_start_time;	\
 									\
 	BW_TIMEVALADD(&next_timeval, &(bw_meter)->bm_threshold.b_time); \
 	(hash) = next_timeval.tv_sec;					\
 	if (next_timeval.tv_usec)					\
 	    (hash)++; /* XXX: make sure we don't timeout early */	\
 	(hash) %= BW_METER_BUCKETS;					\
     } while (0)
 
 /*
  * Schedule a timer to process periodically bw_meter entry of type "<="
  * by linking the entry in the proper hash bucket.
  */
 static void
 schedule_bw_meter(struct bw_meter *x, struct timeval *nowp)
 {
     int time_hash;
 
     MFC_LOCK_ASSERT();
 
     if (!(x->bm_flags & BW_METER_LEQ))
 	return;		/* XXX: we schedule timers only for "<=" entries */
 
     /*
      * Reset the bw_meter entry
      */
     x->bm_start_time = *nowp;
     x->bm_measured.b_packets = 0;
     x->bm_measured.b_bytes = 0;
     x->bm_flags &= ~BW_METER_UPCALL_DELIVERED;
 
     /*
      * Compute the timeout hash value and insert the entry
      */
     BW_METER_TIMEHASH(x, time_hash);
     x->bm_time_next = V_bw_meter_timers[time_hash];
     V_bw_meter_timers[time_hash] = x;
     x->bm_time_hash = time_hash;
 }
 
 /*
  * Unschedule the periodic timer that processes bw_meter entry of type "<="
  * by removing the entry from the proper hash bucket.
  */
 static void
 unschedule_bw_meter(struct bw_meter *x)
 {
     int time_hash;
     struct bw_meter *prev, *tmp;
 
     MFC_LOCK_ASSERT();
 
     if (!(x->bm_flags & BW_METER_LEQ))
 	return;		/* XXX: we schedule timers only for "<=" entries */
 
     /*
      * Compute the timeout hash value and delete the entry
      */
     time_hash = x->bm_time_hash;
     if (time_hash >= BW_METER_BUCKETS)
 	return;		/* Entry was not scheduled */
 
     for (prev = NULL, tmp = V_bw_meter_timers[time_hash];
 	     tmp != NULL; prev = tmp, tmp = tmp->bm_time_next)
 	if (tmp == x)
 	    break;
 
     if (tmp == NULL)
 	panic("unschedule_bw_meter: bw_meter entry not found");
 
     if (prev != NULL)
 	prev->bm_time_next = x->bm_time_next;
     else
 	V_bw_meter_timers[time_hash] = x->bm_time_next;
 
     x->bm_time_next = NULL;
     x->bm_time_hash = BW_METER_BUCKETS;
 }
 
 
 /*
  * Process all "<=" type of bw_meter that should be processed now,
  * and for each entry prepare an upcall if necessary. Each processed
  * entry is rescheduled again for the (periodic) processing.
  *
  * This is run periodically (once per second normally). On each round,
  * all the potentially matching entries are in the hash slot that we are
  * looking at.
  */
 static void
 bw_meter_process()
 {
     uint32_t loops;
     int i;
     struct timeval now, process_endtime;
 
     microtime(&now);
     if (V_last_tv_sec == now.tv_sec)
 	return;		/* nothing to do */
 
     loops = now.tv_sec - V_last_tv_sec;
     V_last_tv_sec = now.tv_sec;
     if (loops > BW_METER_BUCKETS)
 	loops = BW_METER_BUCKETS;
 
     MFC_LOCK();
     /*
      * Process all bins of bw_meter entries from the one after the last
      * processed to the current one. On entry, i points to the last bucket
      * visited, so we need to increment i at the beginning of the loop.
      */
     for (i = (now.tv_sec - loops) % BW_METER_BUCKETS; loops > 0; loops--) {
 	struct bw_meter *x, *tmp_list;
 
 	if (++i >= BW_METER_BUCKETS)
 	    i = 0;
 
 	/* Disconnect the list of bw_meter entries from the bin */
 	tmp_list = V_bw_meter_timers[i];
 	V_bw_meter_timers[i] = NULL;
 
 	/* Process the list of bw_meter entries */
 	while (tmp_list != NULL) {
 	    x = tmp_list;
 	    tmp_list = tmp_list->bm_time_next;
 
 	    /* Test if the time interval is over */
 	    process_endtime = x->bm_start_time;
 	    BW_TIMEVALADD(&process_endtime, &x->bm_threshold.b_time);
 	    if (BW_TIMEVALCMP(&process_endtime, &now, >)) {
 		/* Not yet: reschedule, but don't reset */
 		int time_hash;
 
 		BW_METER_TIMEHASH(x, time_hash);
 		if (time_hash == i && process_endtime.tv_sec == now.tv_sec) {
 		    /*
 		     * XXX: somehow the bin processing is a bit ahead of time.
 		     * Put the entry in the next bin.
 		     */
 		    if (++time_hash >= BW_METER_BUCKETS)
 			time_hash = 0;
 		}
 		x->bm_time_next = V_bw_meter_timers[time_hash];
 		V_bw_meter_timers[time_hash] = x;
 		x->bm_time_hash = time_hash;
 
 		continue;
 	    }
 
 	    /*
 	     * Test if we should deliver an upcall
 	     */
 	    if (((x->bm_flags & BW_METER_UNIT_PACKETS) &&
 		 (x->bm_measured.b_packets <= x->bm_threshold.b_packets)) ||
 		((x->bm_flags & BW_METER_UNIT_BYTES) &&
 		 (x->bm_measured.b_bytes <= x->bm_threshold.b_bytes))) {
 		/* Prepare an upcall for delivery */
 		bw_meter_prepare_upcall(x, &now);
 	    }
 
 	    /*
 	     * Reschedule for next processing
 	     */
 	    schedule_bw_meter(x, &now);
 	}
     }
 
     /* Send all upcalls that are pending delivery */
     bw_upcalls_send();
 
     MFC_UNLOCK();
 }
 
 /*
  * A periodic function for sending all upcalls that are pending delivery
  */
 static void
 expire_bw_upcalls_send(void *arg)
 {
     CURVNET_SET((struct vnet *) arg);
 
     MFC_LOCK();
     bw_upcalls_send();
     MFC_UNLOCK();
 
     callout_reset(&V_bw_upcalls_ch, BW_UPCALLS_PERIOD, expire_bw_upcalls_send,
 	curvnet);
     CURVNET_RESTORE();
 }
 
 /*
  * A periodic function for periodic scanning of the multicast forwarding
  * table for processing all "<=" bw_meter entries.
  */
 static void
 expire_bw_meter_process(void *arg)
 {
     CURVNET_SET((struct vnet *) arg);
 
     if (V_mrt_api_config & MRT_MFC_BW_UPCALL)
 	bw_meter_process();
 
     callout_reset(&V_bw_meter_ch, BW_METER_PERIOD, expire_bw_meter_process,
 	curvnet);
     CURVNET_RESTORE();
 }
 
 /*
  * End of bandwidth monitoring code
  */
 
 /*
  * Send the packet up to the user daemon, or eventually do kernel encapsulation
  *
  */
 static int
 pim_register_send(struct ip *ip, struct vif *vifp, struct mbuf *m,
     struct mfc *rt)
 {
     struct mbuf *mb_copy, *mm;
 
     /*
      * Do not send IGMP_WHOLEPKT notifications to userland, if the
      * rendezvous point was unspecified, and we were told not to.
      */
     if (pim_squelch_wholepkt != 0 && (V_mrt_api_config & MRT_MFC_RP) &&
 	in_nullhost(rt->mfc_rp))
 	return 0;
 
     mb_copy = pim_register_prepare(ip, m);
     if (mb_copy == NULL)
 	return ENOBUFS;
 
     /*
      * Send all the fragments. Note that the mbuf for each fragment
      * is freed by the sending machinery.
      */
     for (mm = mb_copy; mm; mm = mb_copy) {
 	mb_copy = mm->m_nextpkt;
 	mm->m_nextpkt = 0;
 	mm = m_pullup(mm, sizeof(struct ip));
 	if (mm != NULL) {
 	    ip = mtod(mm, struct ip *);
 	    if ((V_mrt_api_config & MRT_MFC_RP) && !in_nullhost(rt->mfc_rp)) {
 		pim_register_send_rp(ip, vifp, mm, rt);
 	    } else {
 		pim_register_send_upcall(ip, vifp, mm, rt);
 	    }
 	}
     }
 
     return 0;
 }
 
 /*
  * Return a copy of the data packet that is ready for PIM Register
  * encapsulation.
  * XXX: Note that in the returned copy the IP header is a valid one.
  */
 static struct mbuf *
 pim_register_prepare(struct ip *ip, struct mbuf *m)
 {
     struct mbuf *mb_copy = NULL;
     int mtu;
 
     /* Take care of delayed checksums */
     if (m->m_pkthdr.csum_flags & CSUM_DELAY_DATA) {
 	in_delayed_cksum(m);
 	m->m_pkthdr.csum_flags &= ~CSUM_DELAY_DATA;
     }
 
     /*
      * Copy the old packet & pullup its IP header into the
      * new mbuf so we can modify it.
      */
     mb_copy = m_copypacket(m, M_NOWAIT);
     if (mb_copy == NULL)
 	return NULL;
     mb_copy = m_pullup(mb_copy, ip->ip_hl << 2);
     if (mb_copy == NULL)
 	return NULL;
 
     /* take care of the TTL */
     ip = mtod(mb_copy, struct ip *);
     --ip->ip_ttl;
 
     /* Compute the MTU after the PIM Register encapsulation */
     mtu = 0xffff - sizeof(pim_encap_iphdr) - sizeof(pim_encap_pimhdr);
 
     if (ntohs(ip->ip_len) <= mtu) {
 	/* Turn the IP header into a valid one */
 	ip->ip_sum = 0;
 	ip->ip_sum = in_cksum(mb_copy, ip->ip_hl << 2);
     } else {
 	/* Fragment the packet */
 	mb_copy->m_pkthdr.csum_flags |= CSUM_IP;
 	if (ip_fragment(ip, &mb_copy, mtu, 0) != 0) {
 	    m_freem(mb_copy);
 	    return NULL;
 	}
     }
     return mb_copy;
 }
 
 /*
  * Send an upcall with the data packet to the user-level process.
  */
 static int
 pim_register_send_upcall(struct ip *ip, struct vif *vifp,
     struct mbuf *mb_copy, struct mfc *rt)
 {
     struct mbuf *mb_first;
     int len = ntohs(ip->ip_len);
     struct igmpmsg *im;
     struct sockaddr_in k_igmpsrc = { sizeof k_igmpsrc, AF_INET };
 
     VIF_LOCK_ASSERT();
 
     /*
      * Add a new mbuf with an upcall header
      */
     mb_first = m_gethdr(M_NOWAIT, MT_DATA);
     if (mb_first == NULL) {
 	m_freem(mb_copy);
 	return ENOBUFS;
     }
     mb_first->m_data += max_linkhdr;
     mb_first->m_pkthdr.len = len + sizeof(struct igmpmsg);
     mb_first->m_len = sizeof(struct igmpmsg);
     mb_first->m_next = mb_copy;
 
     /* Send message to routing daemon */
     im = mtod(mb_first, struct igmpmsg *);
     im->im_msgtype	= IGMPMSG_WHOLEPKT;
     im->im_mbz		= 0;
     im->im_vif		= vifp - V_viftable;
     im->im_src		= ip->ip_src;
     im->im_dst		= ip->ip_dst;
 
     k_igmpsrc.sin_addr	= ip->ip_src;
 
     MRTSTAT_INC(mrts_upcalls);
 
     if (socket_send(V_ip_mrouter, mb_first, &k_igmpsrc) < 0) {
 	CTR1(KTR_IPMF, "%s: socket queue full", __func__);
 	MRTSTAT_INC(mrts_upq_sockfull);
 	return ENOBUFS;
     }
 
     /* Keep statistics */
     PIMSTAT_INC(pims_snd_registers_msgs);
     PIMSTAT_ADD(pims_snd_registers_bytes, len);
 
     return 0;
 }
 
 /*
  * Encapsulate the data packet in PIM Register message and send it to the RP.
  */
 static int
 pim_register_send_rp(struct ip *ip, struct vif *vifp, struct mbuf *mb_copy,
     struct mfc *rt)
 {
     struct mbuf *mb_first;
     struct ip *ip_outer;
     struct pim_encap_pimhdr *pimhdr;
     int len = ntohs(ip->ip_len);
     vifi_t vifi = rt->mfc_parent;
 
     VIF_LOCK_ASSERT();
 
     if ((vifi >= V_numvifs) || in_nullhost(V_viftable[vifi].v_lcl_addr)) {
 	m_freem(mb_copy);
 	return EADDRNOTAVAIL;		/* The iif vif is invalid */
     }
 
     /*
      * Add a new mbuf with the encapsulating header
      */
     mb_first = m_gethdr(M_NOWAIT, MT_DATA);
     if (mb_first == NULL) {
 	m_freem(mb_copy);
 	return ENOBUFS;
     }
     mb_first->m_data += max_linkhdr;
     mb_first->m_len = sizeof(pim_encap_iphdr) + sizeof(pim_encap_pimhdr);
     mb_first->m_next = mb_copy;
 
     mb_first->m_pkthdr.len = len + mb_first->m_len;
 
     /*
      * Fill in the encapsulating IP and PIM header
      */
     ip_outer = mtod(mb_first, struct ip *);
     *ip_outer = pim_encap_iphdr;
-    ip_outer->ip_id = ip_newid();
     ip_outer->ip_len = htons(len + sizeof(pim_encap_iphdr) +
 	sizeof(pim_encap_pimhdr));
     ip_outer->ip_src = V_viftable[vifi].v_lcl_addr;
     ip_outer->ip_dst = rt->mfc_rp;
     /*
      * Copy the inner header TOS to the outer header, and take care of the
      * IP_DF bit.
      */
     ip_outer->ip_tos = ip->ip_tos;
     if (ip->ip_off & htons(IP_DF))
 	ip_outer->ip_off |= htons(IP_DF);
+    ip_fillid(ip_outer);
     pimhdr = (struct pim_encap_pimhdr *)((caddr_t)ip_outer
 					 + sizeof(pim_encap_iphdr));
     *pimhdr = pim_encap_pimhdr;
     /* If the iif crosses a border, set the Border-bit */
     if (rt->mfc_flags[vifi] & MRT_MFC_FLAGS_BORDER_VIF & V_mrt_api_config)
 	pimhdr->flags |= htonl(PIM_BORDER_REGISTER);
 
     mb_first->m_data += sizeof(pim_encap_iphdr);
     pimhdr->pim.pim_cksum = in_cksum(mb_first, sizeof(pim_encap_pimhdr));
     mb_first->m_data -= sizeof(pim_encap_iphdr);
 
     send_packet(vifp, mb_first);
 
     /* Keep statistics */
     PIMSTAT_INC(pims_snd_registers_msgs);
     PIMSTAT_ADD(pims_snd_registers_bytes, len);
 
     return 0;
 }
 
 /*
  * pim_encapcheck() is called by the encap4_input() path at runtime to
  * determine if a packet is for PIM; allowing PIM to be dynamically loaded
  * into the kernel.
  */
 static int
 pim_encapcheck(const struct mbuf *m, int off, int proto, void *arg)
 {
 
 #ifdef DIAGNOSTIC
     KASSERT(proto == IPPROTO_PIM, ("not for IPPROTO_PIM"));
 #endif
     if (proto != IPPROTO_PIM)
 	return 0;	/* not for us; reject the datagram. */
 
     return 64;		/* claim the datagram. */
 }
 
 /*
  * PIM-SMv2 and PIM-DM messages processing.
  * Receives and verifies the PIM control messages, and passes them
  * up to the listening socket, using rip_input().
  * The only message with special processing is the PIM_REGISTER message
  * (used by PIM-SM): the PIM header is stripped off, and the inner packet
  * is passed to if_simloop().
  */
 int
 pim_input(struct mbuf **mp, int *offp, int proto)
 {
     struct mbuf *m = *mp;
     struct ip *ip = mtod(m, struct ip *);
     struct pim *pim;
     int iphlen = *offp;
     int minlen;
     int datalen = ntohs(ip->ip_len) - iphlen;
     int ip_tos;
 
     *mp = NULL;
 
     /* Keep statistics */
     PIMSTAT_INC(pims_rcv_total_msgs);
     PIMSTAT_ADD(pims_rcv_total_bytes, datalen);
 
     /*
      * Validate lengths
      */
     if (datalen < PIM_MINLEN) {
 	PIMSTAT_INC(pims_rcv_tooshort);
 	CTR3(KTR_IPMF, "%s: short packet (%d) from %s",
 	    __func__, datalen, inet_ntoa(ip->ip_src));
 	m_freem(m);
 	return (IPPROTO_DONE);
     }
 
     /*
      * If the packet is at least as big as a REGISTER, go agead
      * and grab the PIM REGISTER header size, to avoid another
      * possible m_pullup() later.
      *
      * PIM_MINLEN       == pimhdr + u_int32_t == 4 + 4 = 8
      * PIM_REG_MINLEN   == pimhdr + reghdr + encap_iphdr == 4 + 4 + 20 = 28
      */
     minlen = iphlen + (datalen >= PIM_REG_MINLEN ? PIM_REG_MINLEN : PIM_MINLEN);
     /*
      * Get the IP and PIM headers in contiguous memory, and
      * possibly the PIM REGISTER header.
      */
     if (m->m_len < minlen && (m = m_pullup(m, minlen)) == 0) {
 	CTR1(KTR_IPMF, "%s: m_pullup() failed", __func__);
 	return (IPPROTO_DONE);
     }
 
     /* m_pullup() may have given us a new mbuf so reset ip. */
     ip = mtod(m, struct ip *);
     ip_tos = ip->ip_tos;
 
     /* adjust mbuf to point to the PIM header */
     m->m_data += iphlen;
     m->m_len  -= iphlen;
     pim = mtod(m, struct pim *);
 
     /*
      * Validate checksum. If PIM REGISTER, exclude the data packet.
      *
      * XXX: some older PIMv2 implementations don't make this distinction,
      * so for compatibility reason perform the checksum over part of the
      * message, and if error, then over the whole message.
      */
     if (PIM_VT_T(pim->pim_vt) == PIM_REGISTER && in_cksum(m, PIM_MINLEN) == 0) {
 	/* do nothing, checksum okay */
     } else if (in_cksum(m, datalen)) {
 	PIMSTAT_INC(pims_rcv_badsum);
 	CTR1(KTR_IPMF, "%s: invalid checksum", __func__);
 	m_freem(m);
 	return (IPPROTO_DONE);
     }
 
     /* PIM version check */
     if (PIM_VT_V(pim->pim_vt) < PIM_VERSION) {
 	PIMSTAT_INC(pims_rcv_badversion);
 	CTR3(KTR_IPMF, "%s: bad version %d expect %d", __func__,
 	    (int)PIM_VT_V(pim->pim_vt), PIM_VERSION);
 	m_freem(m);
 	return (IPPROTO_DONE);
     }
 
     /* restore mbuf back to the outer IP */
     m->m_data -= iphlen;
     m->m_len  += iphlen;
 
     if (PIM_VT_T(pim->pim_vt) == PIM_REGISTER) {
 	/*
 	 * Since this is a REGISTER, we'll make a copy of the register
 	 * headers ip + pim + u_int32 + encap_ip, to be passed up to the
 	 * routing daemon.
 	 */
 	struct sockaddr_in dst = { sizeof(dst), AF_INET };
 	struct mbuf *mcp;
 	struct ip *encap_ip;
 	u_int32_t *reghdr;
 	struct ifnet *vifp;
 
 	VIF_LOCK();
 	if ((V_reg_vif_num >= V_numvifs) || (V_reg_vif_num == VIFI_INVALID)) {
 	    VIF_UNLOCK();
 	    CTR2(KTR_IPMF, "%s: register vif not set: %d", __func__,
 		(int)V_reg_vif_num);
 	    m_freem(m);
 	    return (IPPROTO_DONE);
 	}
 	/* XXX need refcnt? */
 	vifp = V_viftable[V_reg_vif_num].v_ifp;
 	VIF_UNLOCK();
 
 	/*
 	 * Validate length
 	 */
 	if (datalen < PIM_REG_MINLEN) {
 	    PIMSTAT_INC(pims_rcv_tooshort);
 	    PIMSTAT_INC(pims_rcv_badregisters);
 	    CTR1(KTR_IPMF, "%s: register packet size too small", __func__);
 	    m_freem(m);
 	    return (IPPROTO_DONE);
 	}
 
 	reghdr = (u_int32_t *)(pim + 1);
 	encap_ip = (struct ip *)(reghdr + 1);
 
 	CTR3(KTR_IPMF, "%s: register: encap ip src %s len %d",
 	    __func__, inet_ntoa(encap_ip->ip_src), ntohs(encap_ip->ip_len));
 
 	/* verify the version number of the inner packet */
 	if (encap_ip->ip_v != IPVERSION) {
 	    PIMSTAT_INC(pims_rcv_badregisters);
 	    CTR1(KTR_IPMF, "%s: bad encap ip version", __func__);
 	    m_freem(m);
 	    return (IPPROTO_DONE);
 	}
 
 	/* verify the inner packet is destined to a mcast group */
 	if (!IN_MULTICAST(ntohl(encap_ip->ip_dst.s_addr))) {
 	    PIMSTAT_INC(pims_rcv_badregisters);
 	    CTR2(KTR_IPMF, "%s: bad encap ip dest %s", __func__,
 		inet_ntoa(encap_ip->ip_dst));
 	    m_freem(m);
 	    return (IPPROTO_DONE);
 	}
 
 	/* If a NULL_REGISTER, pass it to the daemon */
 	if ((ntohl(*reghdr) & PIM_NULL_REGISTER))
 	    goto pim_input_to_daemon;
 
 	/*
 	 * Copy the TOS from the outer IP header to the inner IP header.
 	 */
 	if (encap_ip->ip_tos != ip_tos) {
 	    /* Outer TOS -> inner TOS */
 	    encap_ip->ip_tos = ip_tos;
 	    /* Recompute the inner header checksum. Sigh... */
 
 	    /* adjust mbuf to point to the inner IP header */
 	    m->m_data += (iphlen + PIM_MINLEN);
 	    m->m_len  -= (iphlen + PIM_MINLEN);
 
 	    encap_ip->ip_sum = 0;
 	    encap_ip->ip_sum = in_cksum(m, encap_ip->ip_hl << 2);
 
 	    /* restore mbuf to point back to the outer IP header */
 	    m->m_data -= (iphlen + PIM_MINLEN);
 	    m->m_len  += (iphlen + PIM_MINLEN);
 	}
 
 	/*
 	 * Decapsulate the inner IP packet and loopback to forward it
 	 * as a normal multicast packet. Also, make a copy of the
 	 *     outer_iphdr + pimhdr + reghdr + encap_iphdr
 	 * to pass to the daemon later, so it can take the appropriate
 	 * actions (e.g., send back PIM_REGISTER_STOP).
 	 * XXX: here m->m_data points to the outer IP header.
 	 */
 	mcp = m_copy(m, 0, iphlen + PIM_REG_MINLEN);
 	if (mcp == NULL) {
 	    CTR1(KTR_IPMF, "%s: m_copy() failed", __func__);
 	    m_freem(m);
 	    return (IPPROTO_DONE);
 	}
 
 	/* Keep statistics */
 	/* XXX: registers_bytes include only the encap. mcast pkt */
 	PIMSTAT_INC(pims_rcv_registers_msgs);
 	PIMSTAT_ADD(pims_rcv_registers_bytes, ntohs(encap_ip->ip_len));
 
 	/*
 	 * forward the inner ip packet; point m_data at the inner ip.
 	 */
 	m_adj(m, iphlen + PIM_MINLEN);
 
 	CTR4(KTR_IPMF,
 	    "%s: forward decap'd REGISTER: src %lx dst %lx vif %d",
 	    __func__,
 	    (u_long)ntohl(encap_ip->ip_src.s_addr),
 	    (u_long)ntohl(encap_ip->ip_dst.s_addr),
 	    (int)V_reg_vif_num);
 
 	/* NB: vifp was collected above; can it change on us? */
 	if_simloop(vifp, m, dst.sin_family, 0);
 
 	/* prepare the register head to send to the mrouting daemon */
 	m = mcp;
     }
 
 pim_input_to_daemon:
     /*
      * Pass the PIM message up to the daemon; if it is a Register message,
      * pass the 'head' only up to the daemon. This includes the
      * outer IP header, PIM header, PIM-Register header and the
      * inner IP header.
      * XXX: the outer IP header pkt size of a Register is not adjust to
      * reflect the fact that the inner multicast data is truncated.
      */
     *mp = m;
     rip_input(mp, offp, proto);
 
     return (IPPROTO_DONE);
 }
 
 static int
 sysctl_mfctable(SYSCTL_HANDLER_ARGS)
 {
 	struct mfc	*rt;
 	int		 error, i;
 
 	if (req->newptr)
 		return (EPERM);
 	if (V_mfchashtbl == NULL)	/* XXX unlocked */
 		return (0);
 	error = sysctl_wire_old_buffer(req, 0);
 	if (error)
 		return (error);
 
 	MFC_LOCK();
 	for (i = 0; i < mfchashsize; i++) {
 		LIST_FOREACH(rt, &V_mfchashtbl[i], mfc_hash) {
 			error = SYSCTL_OUT(req, rt, sizeof(struct mfc));
 			if (error)
 				goto out_locked;
 		}
 	}
 out_locked:
 	MFC_UNLOCK();
 	return (error);
 }
 
 static SYSCTL_NODE(_net_inet_ip, OID_AUTO, mfctable, CTLFLAG_RD,
     sysctl_mfctable, "IPv4 Multicast Forwarding Table "
     "(struct *mfc[mfchashsize], netinet/ip_mroute.h)");
 
 static void
 vnet_mroute_init(const void *unused __unused)
 {
 
 	MALLOC(V_nexpire, u_char *, mfchashsize, M_MRTABLE, M_WAITOK|M_ZERO);
 	bzero(V_bw_meter_timers, sizeof(V_bw_meter_timers));
 	callout_init(&V_expire_upcalls_ch, CALLOUT_MPSAFE);
 	callout_init(&V_bw_upcalls_ch, CALLOUT_MPSAFE);
 	callout_init(&V_bw_meter_ch, CALLOUT_MPSAFE);
 }
 
 VNET_SYSINIT(vnet_mroute_init, SI_SUB_PSEUDO, SI_ORDER_ANY, vnet_mroute_init,
 	NULL);
 
 static void
 vnet_mroute_uninit(const void *unused __unused)
 {
 
 	FREE(V_nexpire, M_MRTABLE);
 	V_nexpire = NULL;
 }
 
 VNET_SYSUNINIT(vnet_mroute_uninit, SI_SUB_PSEUDO, SI_ORDER_MIDDLE, 
 	vnet_mroute_uninit, NULL);
 
 static int
 ip_mroute_modevent(module_t mod, int type, void *unused)
 {
 
     switch (type) {
     case MOD_LOAD:
 	MROUTER_LOCK_INIT();
 
 	if_detach_event_tag = EVENTHANDLER_REGISTER(ifnet_departure_event, 
 	    if_detached_event, NULL, EVENTHANDLER_PRI_ANY);
 	if (if_detach_event_tag == NULL) {
 		printf("ip_mroute: unable to register "
 		    "ifnet_departure_event handler\n");
 		MROUTER_LOCK_DESTROY();
 		return (EINVAL);
 	}
 
 	MFC_LOCK_INIT();
 	VIF_LOCK_INIT();
 
 	mfchashsize = MFCHASHSIZE;
 	if (TUNABLE_ULONG_FETCH("net.inet.ip.mfchashsize", &mfchashsize) &&
 	    !powerof2(mfchashsize)) {
 		printf("WARNING: %s not a power of 2; using default\n",
 		    "net.inet.ip.mfchashsize");
 		mfchashsize = MFCHASHSIZE;
 	}
 
 	pim_squelch_wholepkt = 0;
 	TUNABLE_ULONG_FETCH("net.inet.pim.squelch_wholepkt",
 	    &pim_squelch_wholepkt);
 
 	pim_encap_cookie = encap_attach_func(AF_INET, IPPROTO_PIM,
 	    pim_encapcheck, &in_pim_protosw, NULL);
 	if (pim_encap_cookie == NULL) {
 		printf("ip_mroute: unable to attach pim encap\n");
 		VIF_LOCK_DESTROY();
 		MFC_LOCK_DESTROY();
 		MROUTER_LOCK_DESTROY();
 		return (EINVAL);
 	}
 
 	ip_mcast_src = X_ip_mcast_src;
 	ip_mforward = X_ip_mforward;
 	ip_mrouter_done = X_ip_mrouter_done;
 	ip_mrouter_get = X_ip_mrouter_get;
 	ip_mrouter_set = X_ip_mrouter_set;
 
 	ip_rsvp_force_done = X_ip_rsvp_force_done;
 	ip_rsvp_vif = X_ip_rsvp_vif;
 
 	legal_vif_num = X_legal_vif_num;
 	mrt_ioctl = X_mrt_ioctl;
 	rsvp_input_p = X_rsvp_input;
 	break;
 
     case MOD_UNLOAD:
 	/*
 	 * Typically module unload happens after the user-level
 	 * process has shutdown the kernel services (the check
 	 * below insures someone can't just yank the module out
 	 * from under a running process).  But if the module is
 	 * just loaded and then unloaded w/o starting up a user
 	 * process we still need to cleanup.
 	 */
 	MROUTER_LOCK();
 	if (ip_mrouter_cnt != 0) {
 	    MROUTER_UNLOCK();
 	    return (EINVAL);
 	}
 	ip_mrouter_unloading = 1;
 	MROUTER_UNLOCK();
 
 	EVENTHANDLER_DEREGISTER(ifnet_departure_event, if_detach_event_tag);
 
 	if (pim_encap_cookie) {
 	    encap_detach(pim_encap_cookie);
 	    pim_encap_cookie = NULL;
 	}
 
 	ip_mcast_src = NULL;
 	ip_mforward = NULL;
 	ip_mrouter_done = NULL;
 	ip_mrouter_get = NULL;
 	ip_mrouter_set = NULL;
 
 	ip_rsvp_force_done = NULL;
 	ip_rsvp_vif = NULL;
 
 	legal_vif_num = NULL;
 	mrt_ioctl = NULL;
 	rsvp_input_p = NULL;
 
 	VIF_LOCK_DESTROY();
 	MFC_LOCK_DESTROY();
 	MROUTER_LOCK_DESTROY();
 	break;
 
     default:
 	return EOPNOTSUPP;
     }
     return 0;
 }
 
 static moduledata_t ip_mroutemod = {
     "ip_mroute",
     ip_mroute_modevent,
     0
 };
 
 DECLARE_MODULE(ip_mroute, ip_mroutemod, SI_SUB_PSEUDO, SI_ORDER_MIDDLE);
Index: head/sys/netinet/ip_output.c
===================================================================
--- head/sys/netinet/ip_output.c	(revision 280970)
+++ head/sys/netinet/ip_output.c	(revision 280971)
@@ -1,1376 +1,1363 @@
 /*-
  * Copyright (c) 1982, 1986, 1988, 1990, 1993
  *	The Regents of the University of California.  All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 4. Neither the name of the University nor the names of its contributors
  *    may be used to endorse or promote products derived from this software
  *    without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  *	@(#)ip_output.c	8.3 (Berkeley) 1/21/94
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include "opt_inet.h"
 #include "opt_ipfw.h"
 #include "opt_ipsec.h"
 #include "opt_mbuf_stress_test.h"
 #include "opt_mpath.h"
 #include "opt_route.h"
 #include "opt_sctp.h"
 #include "opt_rss.h"
 
 #include <sys/param.h>
 #include <sys/systm.h>
 #include <sys/kernel.h>
 #include <sys/malloc.h>
 #include <sys/mbuf.h>
 #include <sys/priv.h>
 #include <sys/proc.h>
 #include <sys/protosw.h>
 #include <sys/sdt.h>
 #include <sys/socket.h>
 #include <sys/socketvar.h>
 #include <sys/sysctl.h>
 #include <sys/ucred.h>
 
 #include <net/if.h>
 #include <net/if_var.h>
 #include <net/if_llatbl.h>
 #include <net/netisr.h>
 #include <net/pfil.h>
 #include <net/route.h>
 #include <net/flowtable.h>
 #ifdef RADIX_MPATH
 #include <net/radix_mpath.h>
 #endif
 #include <net/rss_config.h>
 #include <net/vnet.h>
 
 #include <netinet/in.h>
 #include <netinet/in_kdtrace.h>
 #include <netinet/in_systm.h>
 #include <netinet/ip.h>
 #include <netinet/in_pcb.h>
 #include <netinet/in_rss.h>
 #include <netinet/in_var.h>
 #include <netinet/ip_var.h>
 #include <netinet/ip_options.h>
 #ifdef SCTP
 #include <netinet/sctp.h>
 #include <netinet/sctp_crc32.h>
 #endif
 
 #ifdef IPSEC
 #include <netinet/ip_ipsec.h>
 #include <netipsec/ipsec.h>
 #endif /* IPSEC*/
 
 #include <machine/in_cksum.h>
 
 #include <security/mac/mac_framework.h>
 
-VNET_DEFINE(uint32_t, ip_id);
-
 #ifdef MBUF_STRESS_TEST
 static int mbuf_frag_size = 0;
 SYSCTL_INT(_net_inet_ip, OID_AUTO, mbuf_frag_size, CTLFLAG_RW,
 	&mbuf_frag_size, 0, "Fragment outgoing mbufs to this size");
 #endif
 
 static void	ip_mloopback
 	(struct ifnet *, struct mbuf *, struct sockaddr_in *, int);
 
 
 extern int in_mcast_loop;
 extern	struct protosw inetsw[];
 
 /*
  * IP output.  The packet in mbuf chain m contains a skeletal IP
  * header (with len, off, ttl, proto, tos, src, dst).
  * The mbuf chain containing the packet will be freed.
  * The mbuf opt, if present, will not be freed.
  * If route ro is present and has ro_rt initialized, route lookup would be
  * skipped and ro->ro_rt would be used. If ro is present but ro->ro_rt is NULL,
  * then result of route lookup is stored in ro->ro_rt.
  *
  * In the IP forwarding case, the packet will arrive with options already
  * inserted, so must have a NULL opt pointer.
  */
 int
 ip_output(struct mbuf *m, struct mbuf *opt, struct route *ro, int flags,
     struct ip_moptions *imo, struct inpcb *inp)
 {
 	struct ip *ip;
 	struct ifnet *ifp = NULL;	/* keep compiler happy */
 	struct mbuf *m0;
 	int hlen = sizeof (struct ip);
 	int mtu;
 	int error = 0;
 	struct sockaddr_in *dst;
 	const struct sockaddr_in *gw;
 	struct in_ifaddr *ia;
 	int isbroadcast;
 	uint16_t ip_len, ip_off;
 	struct route iproute;
 	struct rtentry *rte;	/* cache for ro->ro_rt */
 	struct in_addr odst;
 	struct m_tag *fwd_tag = NULL;
 	uint32_t fibnum;
 	int have_ia_ref;
 	int needfiblookup;
 #ifdef IPSEC
 	int no_route_but_check_spd = 0;
 #endif
 	M_ASSERTPKTHDR(m);
 
 	if (inp != NULL) {
 		INP_LOCK_ASSERT(inp);
 		M_SETFIB(m, inp->inp_inc.inc_fibnum);
 		if ((flags & IP_NODEFAULTFLOWID) == 0) {
 			m->m_pkthdr.flowid = inp->inp_flowid;
 			M_HASHTYPE_SET(m, inp->inp_flowtype);
 		}
 	}
 
 	if (ro == NULL) {
 		ro = &iproute;
 		bzero(ro, sizeof (*ro));
 	}
 
 #ifdef FLOWTABLE
 	if (ro->ro_rt == NULL)
 		(void )flowtable_lookup(AF_INET, m, ro);
 #endif
 
 	if (opt) {
 		int len = 0;
 		m = ip_insertoptions(m, opt, &len);
 		if (len != 0)
 			hlen = len; /* ip->ip_hl is updated above */
 	}
 	ip = mtod(m, struct ip *);
 	ip_len = ntohs(ip->ip_len);
 	ip_off = ntohs(ip->ip_off);
 
-	/*
-	 * Fill in IP header.  If we are not allowing fragmentation,
-	 * then the ip_id field is meaningless, but we don't set it
-	 * to zero.  Doing so causes various problems when devices along
-	 * the path (routers, load balancers, firewalls, etc.) illegally
-	 * disable DF on our packet.  Note that a 16-bit counter
-	 * will wrap around in less than 10 seconds at 100 Mbit/s on a
-	 * medium with MTU 1500.  See Steven M. Bellovin, "A Technique
-	 * for Counting NATted Hosts", Proc. IMW'02, available at
-	 * <http://www.cs.columbia.edu/~smb/papers/fnat.pdf>.
-	 */
 	if ((flags & (IP_FORWARDING|IP_RAWOUTPUT)) == 0) {
 		ip->ip_v = IPVERSION;
 		ip->ip_hl = hlen >> 2;
-		ip->ip_id = ip_newid();
+		ip_fillid(ip);
 		IPSTAT_INC(ips_localout);
 	} else {
 		/* Header already set, fetch hlen from there */
 		hlen = ip->ip_hl << 2;
 	}
 
 	/*
 	 * dst/gw handling:
 	 *
 	 * dst can be rewritten but always points to &ro->ro_dst.
 	 * gw is readonly but can point either to dst OR rt_gateway,
 	 * therefore we need restore gw if we're redoing lookup.
 	 */
 	gw = dst = (struct sockaddr_in *)&ro->ro_dst;
 	fibnum = (inp != NULL) ? inp->inp_inc.inc_fibnum : M_GETFIB(m);
 again:
 	ia = NULL;
 	have_ia_ref = 0;
 	/*
 	 * If there is a cached route, check that it is to the same
 	 * destination and is still up.  If not, free it and try again.
 	 * The address family should also be checked in case of sharing
 	 * the cache with IPv6.
 	 */
 	rte = ro->ro_rt;
 	if (rte && ((rte->rt_flags & RTF_UP) == 0 ||
 		    rte->rt_ifp == NULL ||
 		    !RT_LINK_IS_UP(rte->rt_ifp) ||
 			  dst->sin_family != AF_INET ||
 			  dst->sin_addr.s_addr != ip->ip_dst.s_addr)) {
 		RO_RTFREE(ro);
 		ro->ro_lle = NULL;
 		rte = NULL;
 		gw = dst;
 	}
 	if (rte == NULL && fwd_tag == NULL) {
 		bzero(dst, sizeof(*dst));
 		dst->sin_family = AF_INET;
 		dst->sin_len = sizeof(*dst);
 		dst->sin_addr = ip->ip_dst;
 	}
 	/*
 	 * If routing to interface only, short circuit routing lookup.
 	 * The use of an all-ones broadcast address implies this; an
 	 * interface is specified by the broadcast address of an interface,
 	 * or the destination address of a ptp interface.
 	 */
 	if (flags & IP_SENDONES) {
 		if ((ia = ifatoia(ifa_ifwithbroadaddr(sintosa(dst),
 						      M_GETFIB(m)))) == NULL &&
 		    (ia = ifatoia(ifa_ifwithdstaddr(sintosa(dst),
 						    M_GETFIB(m)))) == NULL) {
 			IPSTAT_INC(ips_noroute);
 			error = ENETUNREACH;
 			goto bad;
 		}
 		have_ia_ref = 1;
 		ip->ip_dst.s_addr = INADDR_BROADCAST;
 		dst->sin_addr = ip->ip_dst;
 		ifp = ia->ia_ifp;
 		ip->ip_ttl = 1;
 		isbroadcast = 1;
 	} else if (flags & IP_ROUTETOIF) {
 		if ((ia = ifatoia(ifa_ifwithdstaddr(sintosa(dst),
 						    M_GETFIB(m)))) == NULL &&
 		    (ia = ifatoia(ifa_ifwithnet(sintosa(dst), 0,
 						M_GETFIB(m)))) == NULL) {
 			IPSTAT_INC(ips_noroute);
 			error = ENETUNREACH;
 			goto bad;
 		}
 		have_ia_ref = 1;
 		ifp = ia->ia_ifp;
 		ip->ip_ttl = 1;
 		isbroadcast = in_broadcast(dst->sin_addr, ifp);
 	} else if (IN_MULTICAST(ntohl(ip->ip_dst.s_addr)) &&
 	    imo != NULL && imo->imo_multicast_ifp != NULL) {
 		/*
 		 * Bypass the normal routing lookup for multicast
 		 * packets if the interface is specified.
 		 */
 		ifp = imo->imo_multicast_ifp;
 		IFP_TO_IA(ifp, ia);
 		if (ia)
 			have_ia_ref = 1;
 		isbroadcast = 0;	/* fool gcc */
 	} else {
 		/*
 		 * We want to do any cloning requested by the link layer,
 		 * as this is probably required in all cases for correct
 		 * operation (as it is for ARP).
 		 */
 		if (rte == NULL) {
 #ifdef RADIX_MPATH
 			rtalloc_mpath_fib(ro,
 			    ntohl(ip->ip_src.s_addr ^ ip->ip_dst.s_addr),
 			    fibnum);
 #else
 			in_rtalloc_ign(ro, 0, fibnum);
 #endif
 			rte = ro->ro_rt;
 		}
 		if (rte == NULL ||
 		    rte->rt_ifp == NULL ||
 		    !RT_LINK_IS_UP(rte->rt_ifp)) {
 #ifdef IPSEC
 			/*
 			 * There is no route for this packet, but it is
 			 * possible that a matching SPD entry exists.
 			 */
 			no_route_but_check_spd = 1;
 			mtu = 0; /* Silence GCC warning. */
 			goto sendit;
 #endif
 			IPSTAT_INC(ips_noroute);
 			error = EHOSTUNREACH;
 			goto bad;
 		}
 		ia = ifatoia(rte->rt_ifa);
 		ifp = rte->rt_ifp;
 		counter_u64_add(rte->rt_pksent, 1);
 		if (rte->rt_flags & RTF_GATEWAY)
 			gw = (struct sockaddr_in *)rte->rt_gateway;
 		if (rte->rt_flags & RTF_HOST)
 			isbroadcast = (rte->rt_flags & RTF_BROADCAST);
 		else
 			isbroadcast = in_broadcast(gw->sin_addr, ifp);
 	}
 	/*
 	 * Calculate MTU.  If we have a route that is up, use that,
 	 * otherwise use the interface's MTU.
 	 */
 	if (rte != NULL && (rte->rt_flags & (RTF_UP|RTF_HOST)))
 		mtu = rte->rt_mtu;
 	else
 		mtu = ifp->if_mtu;
 	/* Catch a possible divide by zero later. */
 	KASSERT(mtu > 0, ("%s: mtu %d <= 0, rte=%p (rt_flags=0x%08x) ifp=%p",
 	    __func__, mtu, rte, (rte != NULL) ? rte->rt_flags : 0, ifp));
 	if (IN_MULTICAST(ntohl(ip->ip_dst.s_addr))) {
 		m->m_flags |= M_MCAST;
 		/*
 		 * IP destination address is multicast.  Make sure "gw"
 		 * still points to the address in "ro".  (It may have been
 		 * changed to point to a gateway address, above.)
 		 */
 		gw = dst;
 		/*
 		 * See if the caller provided any multicast options
 		 */
 		if (imo != NULL) {
 			ip->ip_ttl = imo->imo_multicast_ttl;
 			if (imo->imo_multicast_vif != -1)
 				ip->ip_src.s_addr =
 				    ip_mcast_src ?
 				    ip_mcast_src(imo->imo_multicast_vif) :
 				    INADDR_ANY;
 		} else
 			ip->ip_ttl = IP_DEFAULT_MULTICAST_TTL;
 		/*
 		 * Confirm that the outgoing interface supports multicast.
 		 */
 		if ((imo == NULL) || (imo->imo_multicast_vif == -1)) {
 			if ((ifp->if_flags & IFF_MULTICAST) == 0) {
 				IPSTAT_INC(ips_noroute);
 				error = ENETUNREACH;
 				goto bad;
 			}
 		}
 		/*
 		 * If source address not specified yet, use address
 		 * of outgoing interface.
 		 */
 		if (ip->ip_src.s_addr == INADDR_ANY) {
 			/* Interface may have no addresses. */
 			if (ia != NULL)
 				ip->ip_src = IA_SIN(ia)->sin_addr;
 		}
 
 		if ((imo == NULL && in_mcast_loop) ||
 		    (imo && imo->imo_multicast_loop)) {
 			/*
 			 * Loop back multicast datagram if not expressly
 			 * forbidden to do so, even if we are not a member
 			 * of the group; ip_input() will filter it later,
 			 * thus deferring a hash lookup and mutex acquisition
 			 * at the expense of a cheap copy using m_copym().
 			 */
 			ip_mloopback(ifp, m, dst, hlen);
 		} else {
 			/*
 			 * If we are acting as a multicast router, perform
 			 * multicast forwarding as if the packet had just
 			 * arrived on the interface to which we are about
 			 * to send.  The multicast forwarding function
 			 * recursively calls this function, using the
 			 * IP_FORWARDING flag to prevent infinite recursion.
 			 *
 			 * Multicasts that are looped back by ip_mloopback(),
 			 * above, will be forwarded by the ip_input() routine,
 			 * if necessary.
 			 */
 			if (V_ip_mrouter && (flags & IP_FORWARDING) == 0) {
 				/*
 				 * If rsvp daemon is not running, do not
 				 * set ip_moptions. This ensures that the packet
 				 * is multicast and not just sent down one link
 				 * as prescribed by rsvpd.
 				 */
 				if (!V_rsvp_on)
 					imo = NULL;
 				if (ip_mforward &&
 				    ip_mforward(ip, ifp, m, imo) != 0) {
 					m_freem(m);
 					goto done;
 				}
 			}
 		}
 
 		/*
 		 * Multicasts with a time-to-live of zero may be looped-
 		 * back, above, but must not be transmitted on a network.
 		 * Also, multicasts addressed to the loopback interface
 		 * are not sent -- the above call to ip_mloopback() will
 		 * loop back a copy. ip_input() will drop the copy if
 		 * this host does not belong to the destination group on
 		 * the loopback interface.
 		 */
 		if (ip->ip_ttl == 0 || ifp->if_flags & IFF_LOOPBACK) {
 			m_freem(m);
 			goto done;
 		}
 
 		goto sendit;
 	}
 
 	/*
 	 * If the source address is not specified yet, use the address
 	 * of the outoing interface.
 	 */
 	if (ip->ip_src.s_addr == INADDR_ANY) {
 		/* Interface may have no addresses. */
 		if (ia != NULL) {
 			ip->ip_src = IA_SIN(ia)->sin_addr;
 		}
 	}
 
 	/*
 	 * Look for broadcast address and
 	 * verify user is allowed to send
 	 * such a packet.
 	 */
 	if (isbroadcast) {
 		if ((ifp->if_flags & IFF_BROADCAST) == 0) {
 			error = EADDRNOTAVAIL;
 			goto bad;
 		}
 		if ((flags & IP_ALLOWBROADCAST) == 0) {
 			error = EACCES;
 			goto bad;
 		}
 		/* don't allow broadcast messages to be fragmented */
 		if (ip_len > mtu) {
 			error = EMSGSIZE;
 			goto bad;
 		}
 		m->m_flags |= M_BCAST;
 	} else {
 		m->m_flags &= ~M_BCAST;
 	}
 
 sendit:
 #ifdef IPSEC
 	switch(ip_ipsec_output(&m, inp, &error)) {
 	case 1:
 		goto bad;
 	case -1:
 		goto done;
 	case 0:
 	default:
 		break;	/* Continue with packet processing. */
 	}
 	/*
 	 * Check if there was a route for this packet; return error if not.
 	 */
 	if (no_route_but_check_spd) {
 		IPSTAT_INC(ips_noroute);
 		error = EHOSTUNREACH;
 		goto bad;
 	}
 	/* Update variables that are affected by ipsec4_output(). */
 	ip = mtod(m, struct ip *);
 	hlen = ip->ip_hl << 2;
 #endif /* IPSEC */
 
 	/* Jump over all PFIL processing if hooks are not active. */
 	if (!PFIL_HOOKED(&V_inet_pfil_hook))
 		goto passout;
 
 	/* Run through list of hooks for output packets. */
 	odst.s_addr = ip->ip_dst.s_addr;
 	error = pfil_run_hooks(&V_inet_pfil_hook, &m, ifp, PFIL_OUT, inp);
 	if (error != 0 || m == NULL)
 		goto done;
 
 	ip = mtod(m, struct ip *);
 	needfiblookup = 0;
 
 	/* See if destination IP address was changed by packet filter. */
 	if (odst.s_addr != ip->ip_dst.s_addr) {
 		m->m_flags |= M_SKIP_FIREWALL;
 		/* If destination is now ourself drop to ip_input(). */
 		if (in_localip(ip->ip_dst)) {
 			m->m_flags |= M_FASTFWD_OURS;
 			if (m->m_pkthdr.rcvif == NULL)
 				m->m_pkthdr.rcvif = V_loif;
 			if (m->m_pkthdr.csum_flags & CSUM_DELAY_DATA) {
 				m->m_pkthdr.csum_flags |=
 				    CSUM_DATA_VALID | CSUM_PSEUDO_HDR;
 				m->m_pkthdr.csum_data = 0xffff;
 			}
 			m->m_pkthdr.csum_flags |=
 			    CSUM_IP_CHECKED | CSUM_IP_VALID;
 #ifdef SCTP
 			if (m->m_pkthdr.csum_flags & CSUM_SCTP)
 				m->m_pkthdr.csum_flags |= CSUM_SCTP_VALID;
 #endif
 			error = netisr_queue(NETISR_IP, m);
 			goto done;
 		} else {
 			if (have_ia_ref)
 				ifa_free(&ia->ia_ifa);
 			needfiblookup = 1; /* Redo the routing table lookup. */
 		}
 	}
 	/* See if fib was changed by packet filter. */
 	if (fibnum != M_GETFIB(m)) {
 		m->m_flags |= M_SKIP_FIREWALL;
 		fibnum = M_GETFIB(m);
 		RO_RTFREE(ro);
 		needfiblookup = 1;
 	}
 	if (needfiblookup)
 		goto again;
 
 	/* See if local, if yes, send it to netisr with IP_FASTFWD_OURS. */
 	if (m->m_flags & M_FASTFWD_OURS) {
 		if (m->m_pkthdr.rcvif == NULL)
 			m->m_pkthdr.rcvif = V_loif;
 		if (m->m_pkthdr.csum_flags & CSUM_DELAY_DATA) {
 			m->m_pkthdr.csum_flags |=
 			    CSUM_DATA_VALID | CSUM_PSEUDO_HDR;
 			m->m_pkthdr.csum_data = 0xffff;
 		}
 #ifdef SCTP
 		if (m->m_pkthdr.csum_flags & CSUM_SCTP)
 			m->m_pkthdr.csum_flags |= CSUM_SCTP_VALID;
 #endif
 		m->m_pkthdr.csum_flags |=
 			    CSUM_IP_CHECKED | CSUM_IP_VALID;
 
 		error = netisr_queue(NETISR_IP, m);
 		goto done;
 	}
 	/* Or forward to some other address? */
 	if ((m->m_flags & M_IP_NEXTHOP) &&
 	    (fwd_tag = m_tag_find(m, PACKET_TAG_IPFORWARD, NULL)) != NULL) {
 		bcopy((fwd_tag+1), dst, sizeof(struct sockaddr_in));
 		m->m_flags |= M_SKIP_FIREWALL;
 		m->m_flags &= ~M_IP_NEXTHOP;
 		m_tag_delete(m, fwd_tag);
 		if (have_ia_ref)
 			ifa_free(&ia->ia_ifa);
 		goto again;
 	}
 
 passout:
 	/* 127/8 must not appear on wire - RFC1122. */
 	if ((ntohl(ip->ip_dst.s_addr) >> IN_CLASSA_NSHIFT) == IN_LOOPBACKNET ||
 	    (ntohl(ip->ip_src.s_addr) >> IN_CLASSA_NSHIFT) == IN_LOOPBACKNET) {
 		if ((ifp->if_flags & IFF_LOOPBACK) == 0) {
 			IPSTAT_INC(ips_badaddr);
 			error = EADDRNOTAVAIL;
 			goto bad;
 		}
 	}
 
 	m->m_pkthdr.csum_flags |= CSUM_IP;
 	if (m->m_pkthdr.csum_flags & CSUM_DELAY_DATA & ~ifp->if_hwassist) {
 		in_delayed_cksum(m);
 		m->m_pkthdr.csum_flags &= ~CSUM_DELAY_DATA;
 	}
 #ifdef SCTP
 	if (m->m_pkthdr.csum_flags & CSUM_SCTP & ~ifp->if_hwassist) {
 		sctp_delayed_cksum(m, (uint32_t)(ip->ip_hl << 2));
 		m->m_pkthdr.csum_flags &= ~CSUM_SCTP;
 	}
 #endif
 
 	/*
 	 * If small enough for interface, or the interface will take
 	 * care of the fragmentation for us, we can just send directly.
 	 */
 	if (ip_len <= mtu ||
 	    (m->m_pkthdr.csum_flags & ifp->if_hwassist & CSUM_TSO) != 0) {
 		ip->ip_sum = 0;
 		if (m->m_pkthdr.csum_flags & CSUM_IP & ~ifp->if_hwassist) {
 			ip->ip_sum = in_cksum(m, hlen);
 			m->m_pkthdr.csum_flags &= ~CSUM_IP;
 		}
 
 		/*
 		 * Record statistics for this interface address.
 		 * With CSUM_TSO the byte/packet count will be slightly
 		 * incorrect because we count the IP+TCP headers only
 		 * once instead of for every generated packet.
 		 */
 		if (!(flags & IP_FORWARDING) && ia) {
 			if (m->m_pkthdr.csum_flags & CSUM_TSO)
 				counter_u64_add(ia->ia_ifa.ifa_opackets,
 				    m->m_pkthdr.len / m->m_pkthdr.tso_segsz);
 			else
 				counter_u64_add(ia->ia_ifa.ifa_opackets, 1);
 
 			counter_u64_add(ia->ia_ifa.ifa_obytes, m->m_pkthdr.len);
 		}
 #ifdef MBUF_STRESS_TEST
 		if (mbuf_frag_size && m->m_pkthdr.len > mbuf_frag_size)
 			m = m_fragment(m, M_NOWAIT, mbuf_frag_size);
 #endif
 		/*
 		 * Reset layer specific mbuf flags
 		 * to avoid confusing lower layers.
 		 */
 		m_clrprotoflags(m);
 		IP_PROBE(send, NULL, NULL, ip, ifp, ip, NULL);
 		error = (*ifp->if_output)(ifp, m,
 		    (const struct sockaddr *)gw, ro);
 		goto done;
 	}
 
 	/* Balk when DF bit is set or the interface didn't support TSO. */
 	if ((ip_off & IP_DF) || (m->m_pkthdr.csum_flags & CSUM_TSO)) {
 		error = EMSGSIZE;
 		IPSTAT_INC(ips_cantfrag);
 		goto bad;
 	}
 
 	/*
 	 * Too large for interface; fragment if possible. If successful,
 	 * on return, m will point to a list of packets to be sent.
 	 */
 	error = ip_fragment(ip, &m, mtu, ifp->if_hwassist);
 	if (error)
 		goto bad;
 	for (; m; m = m0) {
 		m0 = m->m_nextpkt;
 		m->m_nextpkt = 0;
 		if (error == 0) {
 			/* Record statistics for this interface address. */
 			if (ia != NULL) {
 				counter_u64_add(ia->ia_ifa.ifa_opackets, 1);
 				counter_u64_add(ia->ia_ifa.ifa_obytes,
 				    m->m_pkthdr.len);
 			}
 			/*
 			 * Reset layer specific mbuf flags
 			 * to avoid confusing upper layers.
 			 */
 			m_clrprotoflags(m);
 
 			IP_PROBE(send, NULL, NULL, ip, ifp, ip, NULL);
 			error = (*ifp->if_output)(ifp, m,
 			    (const struct sockaddr *)gw, ro);
 		} else
 			m_freem(m);
 	}
 
 	if (error == 0)
 		IPSTAT_INC(ips_fragmented);
 
 done:
 	if (ro == &iproute)
 		RO_RTFREE(ro);
 	if (have_ia_ref)
 		ifa_free(&ia->ia_ifa);
 	return (error);
 bad:
 	m_freem(m);
 	goto done;
 }
 
 /*
  * Create a chain of fragments which fit the given mtu. m_frag points to the
  * mbuf to be fragmented; on return it points to the chain with the fragments.
  * Return 0 if no error. If error, m_frag may contain a partially built
  * chain of fragments that should be freed by the caller.
  *
  * if_hwassist_flags is the hw offload capabilities (see if_data.ifi_hwassist)
  */
 int
 ip_fragment(struct ip *ip, struct mbuf **m_frag, int mtu,
     u_long if_hwassist_flags)
 {
 	int error = 0;
 	int hlen = ip->ip_hl << 2;
 	int len = (mtu - hlen) & ~7;	/* size of payload in each fragment */
 	int off;
 	struct mbuf *m0 = *m_frag;	/* the original packet		*/
 	int firstlen;
 	struct mbuf **mnext;
 	int nfrags;
 	uint16_t ip_len, ip_off;
 
 	ip_len = ntohs(ip->ip_len);
 	ip_off = ntohs(ip->ip_off);
 
 	if (ip_off & IP_DF) {	/* Fragmentation not allowed */
 		IPSTAT_INC(ips_cantfrag);
 		return EMSGSIZE;
 	}
 
 	/*
 	 * Must be able to put at least 8 bytes per fragment.
 	 */
 	if (len < 8)
 		return EMSGSIZE;
 
 	/*
 	 * If the interface will not calculate checksums on
 	 * fragmented packets, then do it here.
 	 */
 	if (m0->m_pkthdr.csum_flags & CSUM_DELAY_DATA) {
 		in_delayed_cksum(m0);
 		m0->m_pkthdr.csum_flags &= ~CSUM_DELAY_DATA;
 	}
 #ifdef SCTP
 	if (m0->m_pkthdr.csum_flags & CSUM_SCTP) {
 		sctp_delayed_cksum(m0, hlen);
 		m0->m_pkthdr.csum_flags &= ~CSUM_SCTP;
 	}
 #endif
 	if (len > PAGE_SIZE) {
 		/*
 		 * Fragment large datagrams such that each segment
 		 * contains a multiple of PAGE_SIZE amount of data,
 		 * plus headers. This enables a receiver to perform
 		 * page-flipping zero-copy optimizations.
 		 *
 		 * XXX When does this help given that sender and receiver
 		 * could have different page sizes, and also mtu could
 		 * be less than the receiver's page size ?
 		 */
 		int newlen;
 
 		off = MIN(mtu, m0->m_pkthdr.len);
 
 		/*
 		 * firstlen (off - hlen) must be aligned on an
 		 * 8-byte boundary
 		 */
 		if (off < hlen)
 			goto smart_frag_failure;
 		off = ((off - hlen) & ~7) + hlen;
 		newlen = (~PAGE_MASK) & mtu;
 		if ((newlen + sizeof (struct ip)) > mtu) {
 			/* we failed, go back the default */
 smart_frag_failure:
 			newlen = len;
 			off = hlen + len;
 		}
 		len = newlen;
 
 	} else {
 		off = hlen + len;
 	}
 
 	firstlen = off - hlen;
 	mnext = &m0->m_nextpkt;		/* pointer to next packet */
 
 	/*
 	 * Loop through length of segment after first fragment,
 	 * make new header and copy data of each part and link onto chain.
 	 * Here, m0 is the original packet, m is the fragment being created.
 	 * The fragments are linked off the m_nextpkt of the original
 	 * packet, which after processing serves as the first fragment.
 	 */
 	for (nfrags = 1; off < ip_len; off += len, nfrags++) {
 		struct ip *mhip;	/* ip header on the fragment */
 		struct mbuf *m;
 		int mhlen = sizeof (struct ip);
 
 		m = m_gethdr(M_NOWAIT, MT_DATA);
 		if (m == NULL) {
 			error = ENOBUFS;
 			IPSTAT_INC(ips_odropped);
 			goto done;
 		}
 		/* make sure the flowid is the same for the fragmented mbufs */
 		M_HASHTYPE_SET(m, M_HASHTYPE_GET(m0));
 		m->m_pkthdr.flowid = m0->m_pkthdr.flowid;
 		/* copy multicast flag, if any */
 		m->m_flags |= (m0->m_flags & M_MCAST);
 		/*
 		 * In the first mbuf, leave room for the link header, then
 		 * copy the original IP header including options. The payload
 		 * goes into an additional mbuf chain returned by m_copym().
 		 */
 		m->m_data += max_linkhdr;
 		mhip = mtod(m, struct ip *);
 		*mhip = *ip;
 		if (hlen > sizeof (struct ip)) {
 			mhlen = ip_optcopy(ip, mhip) + sizeof (struct ip);
 			mhip->ip_v = IPVERSION;
 			mhip->ip_hl = mhlen >> 2;
 		}
 		m->m_len = mhlen;
 		/* XXX do we need to add ip_off below ? */
 		mhip->ip_off = ((off - hlen) >> 3) + ip_off;
 		if (off + len >= ip_len)
 			len = ip_len - off;
 		else
 			mhip->ip_off |= IP_MF;
 		mhip->ip_len = htons((u_short)(len + mhlen));
 		m->m_next = m_copym(m0, off, len, M_NOWAIT);
 		if (m->m_next == NULL) {	/* copy failed */
 			m_free(m);
 			error = ENOBUFS;	/* ??? */
 			IPSTAT_INC(ips_odropped);
 			goto done;
 		}
 		m->m_pkthdr.len = mhlen + len;
 		m->m_pkthdr.rcvif = NULL;
 #ifdef MAC
 		mac_netinet_fragment(m0, m);
 #endif
 		m->m_pkthdr.csum_flags = m0->m_pkthdr.csum_flags;
 		mhip->ip_off = htons(mhip->ip_off);
 		mhip->ip_sum = 0;
 		if (m->m_pkthdr.csum_flags & CSUM_IP & ~if_hwassist_flags) {
 			mhip->ip_sum = in_cksum(m, mhlen);
 			m->m_pkthdr.csum_flags &= ~CSUM_IP;
 		}
 		*mnext = m;
 		mnext = &m->m_nextpkt;
 	}
 	IPSTAT_ADD(ips_ofragments, nfrags);
 
 	/*
 	 * Update first fragment by trimming what's been copied out
 	 * and updating header.
 	 */
 	m_adj(m0, hlen + firstlen - ip_len);
 	m0->m_pkthdr.len = hlen + firstlen;
 	ip->ip_len = htons((u_short)m0->m_pkthdr.len);
 	ip->ip_off = htons(ip_off | IP_MF);
 	ip->ip_sum = 0;
 	if (m0->m_pkthdr.csum_flags & CSUM_IP & ~if_hwassist_flags) {
 		ip->ip_sum = in_cksum(m0, hlen);
 		m0->m_pkthdr.csum_flags &= ~CSUM_IP;
 	}
 
 done:
 	*m_frag = m0;
 	return error;
 }
 
 void
 in_delayed_cksum(struct mbuf *m)
 {
 	struct ip *ip;
 	uint16_t csum, offset, ip_len;
 
 	ip = mtod(m, struct ip *);
 	offset = ip->ip_hl << 2 ;
 	ip_len = ntohs(ip->ip_len);
 	csum = in_cksum_skip(m, ip_len, offset);
 	if (m->m_pkthdr.csum_flags & CSUM_UDP && csum == 0)
 		csum = 0xffff;
 	offset += m->m_pkthdr.csum_data;	/* checksum offset */
 
 	/* find the mbuf in the chain where the checksum starts*/
 	while ((m != NULL) && (offset >= m->m_len)) {
 		offset -= m->m_len;
 		m = m->m_next;
 	}
 	KASSERT(m != NULL, ("in_delayed_cksum: checksum outside mbuf chain."));
 	KASSERT(offset + sizeof(u_short) <= m->m_len, ("in_delayed_cksum: checksum split between mbufs."));
 	*(u_short *)(m->m_data + offset) = csum;
 }
 
 /*
  * IP socket option processing.
  */
 int
 ip_ctloutput(struct socket *so, struct sockopt *sopt)
 {
 	struct	inpcb *inp = sotoinpcb(so);
 	int	error, optval;
 #ifdef	RSS
 	uint32_t rss_bucket;
 	int retval;
 #endif
 
 	error = optval = 0;
 	if (sopt->sopt_level != IPPROTO_IP) {
 		error = EINVAL;
 
 		if (sopt->sopt_level == SOL_SOCKET &&
 		    sopt->sopt_dir == SOPT_SET) {
 			switch (sopt->sopt_name) {
 			case SO_REUSEADDR:
 				INP_WLOCK(inp);
 				if ((so->so_options & SO_REUSEADDR) != 0)
 					inp->inp_flags2 |= INP_REUSEADDR;
 				else
 					inp->inp_flags2 &= ~INP_REUSEADDR;
 				INP_WUNLOCK(inp);
 				error = 0;
 				break;
 			case SO_REUSEPORT:
 				INP_WLOCK(inp);
 				if ((so->so_options & SO_REUSEPORT) != 0)
 					inp->inp_flags2 |= INP_REUSEPORT;
 				else
 					inp->inp_flags2 &= ~INP_REUSEPORT;
 				INP_WUNLOCK(inp);
 				error = 0;
 				break;
 			case SO_SETFIB:
 				INP_WLOCK(inp);
 				inp->inp_inc.inc_fibnum = so->so_fibnum;
 				INP_WUNLOCK(inp);
 				error = 0;
 				break;
 			default:
 				break;
 			}
 		}
 		return (error);
 	}
 
 	switch (sopt->sopt_dir) {
 	case SOPT_SET:
 		switch (sopt->sopt_name) {
 		case IP_OPTIONS:
 #ifdef notyet
 		case IP_RETOPTS:
 #endif
 		{
 			struct mbuf *m;
 			if (sopt->sopt_valsize > MLEN) {
 				error = EMSGSIZE;
 				break;
 			}
 			m = m_get(sopt->sopt_td ? M_WAITOK : M_NOWAIT, MT_DATA);
 			if (m == NULL) {
 				error = ENOBUFS;
 				break;
 			}
 			m->m_len = sopt->sopt_valsize;
 			error = sooptcopyin(sopt, mtod(m, char *), m->m_len,
 					    m->m_len);
 			if (error) {
 				m_free(m);
 				break;
 			}
 			INP_WLOCK(inp);
 			error = ip_pcbopts(inp, sopt->sopt_name, m);
 			INP_WUNLOCK(inp);
 			return (error);
 		}
 
 		case IP_BINDANY:
 			if (sopt->sopt_td != NULL) {
 				error = priv_check(sopt->sopt_td,
 				    PRIV_NETINET_BINDANY);
 				if (error)
 					break;
 			}
 			/* FALLTHROUGH */
 		case IP_BINDMULTI:
 #ifdef	RSS
 		case IP_RSS_LISTEN_BUCKET:
 #endif
 		case IP_TOS:
 		case IP_TTL:
 		case IP_MINTTL:
 		case IP_RECVOPTS:
 		case IP_RECVRETOPTS:
 		case IP_RECVDSTADDR:
 		case IP_RECVTTL:
 		case IP_RECVIF:
 		case IP_ONESBCAST:
 		case IP_DONTFRAG:
 		case IP_RECVTOS:
 		case IP_RECVFLOWID:
 #ifdef	RSS
 		case IP_RECVRSSBUCKETID:
 #endif
 			error = sooptcopyin(sopt, &optval, sizeof optval,
 					    sizeof optval);
 			if (error)
 				break;
 
 			switch (sopt->sopt_name) {
 			case IP_TOS:
 				inp->inp_ip_tos = optval;
 				break;
 
 			case IP_TTL:
 				inp->inp_ip_ttl = optval;
 				break;
 
 			case IP_MINTTL:
 				if (optval >= 0 && optval <= MAXTTL)
 					inp->inp_ip_minttl = optval;
 				else
 					error = EINVAL;
 				break;
 
 #define	OPTSET(bit) do {						\
 	INP_WLOCK(inp);							\
 	if (optval)							\
 		inp->inp_flags |= bit;					\
 	else								\
 		inp->inp_flags &= ~bit;					\
 	INP_WUNLOCK(inp);						\
 } while (0)
 
 #define	OPTSET2(bit, val) do {						\
 	INP_WLOCK(inp);							\
 	if (val)							\
 		inp->inp_flags2 |= bit;					\
 	else								\
 		inp->inp_flags2 &= ~bit;				\
 	INP_WUNLOCK(inp);						\
 } while (0)
 
 			case IP_RECVOPTS:
 				OPTSET(INP_RECVOPTS);
 				break;
 
 			case IP_RECVRETOPTS:
 				OPTSET(INP_RECVRETOPTS);
 				break;
 
 			case IP_RECVDSTADDR:
 				OPTSET(INP_RECVDSTADDR);
 				break;
 
 			case IP_RECVTTL:
 				OPTSET(INP_RECVTTL);
 				break;
 
 			case IP_RECVIF:
 				OPTSET(INP_RECVIF);
 				break;
 
 			case IP_ONESBCAST:
 				OPTSET(INP_ONESBCAST);
 				break;
 			case IP_DONTFRAG:
 				OPTSET(INP_DONTFRAG);
 				break;
 			case IP_BINDANY:
 				OPTSET(INP_BINDANY);
 				break;
 			case IP_RECVTOS:
 				OPTSET(INP_RECVTOS);
 				break;
 			case IP_BINDMULTI:
 				OPTSET2(INP_BINDMULTI, optval);
 				break;
 			case IP_RECVFLOWID:
 				OPTSET2(INP_RECVFLOWID, optval);
 				break;
 #ifdef	RSS
 			case IP_RSS_LISTEN_BUCKET:
 				if ((optval >= 0) &&
 				    (optval < rss_getnumbuckets())) {
 					inp->inp_rss_listen_bucket = optval;
 					OPTSET2(INP_RSS_BUCKET_SET, 1);
 				} else {
 					error = EINVAL;
 				}
 				break;
 			case IP_RECVRSSBUCKETID:
 				OPTSET2(INP_RECVRSSBUCKETID, optval);
 				break;
 #endif
 			}
 			break;
 #undef OPTSET
 #undef OPTSET2
 
 		/*
 		 * Multicast socket options are processed by the in_mcast
 		 * module.
 		 */
 		case IP_MULTICAST_IF:
 		case IP_MULTICAST_VIF:
 		case IP_MULTICAST_TTL:
 		case IP_MULTICAST_LOOP:
 		case IP_ADD_MEMBERSHIP:
 		case IP_DROP_MEMBERSHIP:
 		case IP_ADD_SOURCE_MEMBERSHIP:
 		case IP_DROP_SOURCE_MEMBERSHIP:
 		case IP_BLOCK_SOURCE:
 		case IP_UNBLOCK_SOURCE:
 		case IP_MSFILTER:
 		case MCAST_JOIN_GROUP:
 		case MCAST_LEAVE_GROUP:
 		case MCAST_JOIN_SOURCE_GROUP:
 		case MCAST_LEAVE_SOURCE_GROUP:
 		case MCAST_BLOCK_SOURCE:
 		case MCAST_UNBLOCK_SOURCE:
 			error = inp_setmoptions(inp, sopt);
 			break;
 
 		case IP_PORTRANGE:
 			error = sooptcopyin(sopt, &optval, sizeof optval,
 					    sizeof optval);
 			if (error)
 				break;
 
 			INP_WLOCK(inp);
 			switch (optval) {
 			case IP_PORTRANGE_DEFAULT:
 				inp->inp_flags &= ~(INP_LOWPORT);
 				inp->inp_flags &= ~(INP_HIGHPORT);
 				break;
 
 			case IP_PORTRANGE_HIGH:
 				inp->inp_flags &= ~(INP_LOWPORT);
 				inp->inp_flags |= INP_HIGHPORT;
 				break;
 
 			case IP_PORTRANGE_LOW:
 				inp->inp_flags &= ~(INP_HIGHPORT);
 				inp->inp_flags |= INP_LOWPORT;
 				break;
 
 			default:
 				error = EINVAL;
 				break;
 			}
 			INP_WUNLOCK(inp);
 			break;
 
 #ifdef IPSEC
 		case IP_IPSEC_POLICY:
 		{
 			caddr_t req;
 			struct mbuf *m;
 
 			if ((error = soopt_getm(sopt, &m)) != 0) /* XXX */
 				break;
 			if ((error = soopt_mcopyin(sopt, m)) != 0) /* XXX */
 				break;
 			req = mtod(m, caddr_t);
 			error = ipsec_set_policy(inp, sopt->sopt_name, req,
 			    m->m_len, (sopt->sopt_td != NULL) ?
 			    sopt->sopt_td->td_ucred : NULL);
 			m_freem(m);
 			break;
 		}
 #endif /* IPSEC */
 
 		default:
 			error = ENOPROTOOPT;
 			break;
 		}
 		break;
 
 	case SOPT_GET:
 		switch (sopt->sopt_name) {
 		case IP_OPTIONS:
 		case IP_RETOPTS:
 			if (inp->inp_options)
 				error = sooptcopyout(sopt,
 						     mtod(inp->inp_options,
 							  char *),
 						     inp->inp_options->m_len);
 			else
 				sopt->sopt_valsize = 0;
 			break;
 
 		case IP_TOS:
 		case IP_TTL:
 		case IP_MINTTL:
 		case IP_RECVOPTS:
 		case IP_RECVRETOPTS:
 		case IP_RECVDSTADDR:
 		case IP_RECVTTL:
 		case IP_RECVIF:
 		case IP_PORTRANGE:
 		case IP_ONESBCAST:
 		case IP_DONTFRAG:
 		case IP_BINDANY:
 		case IP_RECVTOS:
 		case IP_BINDMULTI:
 		case IP_FLOWID:
 		case IP_FLOWTYPE:
 		case IP_RECVFLOWID:
 #ifdef	RSS
 		case IP_RSSBUCKETID:
 		case IP_RECVRSSBUCKETID:
 #endif
 			switch (sopt->sopt_name) {
 
 			case IP_TOS:
 				optval = inp->inp_ip_tos;
 				break;
 
 			case IP_TTL:
 				optval = inp->inp_ip_ttl;
 				break;
 
 			case IP_MINTTL:
 				optval = inp->inp_ip_minttl;
 				break;
 
 #define	OPTBIT(bit)	(inp->inp_flags & bit ? 1 : 0)
 #define	OPTBIT2(bit)	(inp->inp_flags2 & bit ? 1 : 0)
 
 			case IP_RECVOPTS:
 				optval = OPTBIT(INP_RECVOPTS);
 				break;
 
 			case IP_RECVRETOPTS:
 				optval = OPTBIT(INP_RECVRETOPTS);
 				break;
 
 			case IP_RECVDSTADDR:
 				optval = OPTBIT(INP_RECVDSTADDR);
 				break;
 
 			case IP_RECVTTL:
 				optval = OPTBIT(INP_RECVTTL);
 				break;
 
 			case IP_RECVIF:
 				optval = OPTBIT(INP_RECVIF);
 				break;
 
 			case IP_PORTRANGE:
 				if (inp->inp_flags & INP_HIGHPORT)
 					optval = IP_PORTRANGE_HIGH;
 				else if (inp->inp_flags & INP_LOWPORT)
 					optval = IP_PORTRANGE_LOW;
 				else
 					optval = 0;
 				break;
 
 			case IP_ONESBCAST:
 				optval = OPTBIT(INP_ONESBCAST);
 				break;
 			case IP_DONTFRAG:
 				optval = OPTBIT(INP_DONTFRAG);
 				break;
 			case IP_BINDANY:
 				optval = OPTBIT(INP_BINDANY);
 				break;
 			case IP_RECVTOS:
 				optval = OPTBIT(INP_RECVTOS);
 				break;
 			case IP_FLOWID:
 				optval = inp->inp_flowid;
 				break;
 			case IP_FLOWTYPE:
 				optval = inp->inp_flowtype;
 				break;
 			case IP_RECVFLOWID:
 				optval = OPTBIT2(INP_RECVFLOWID);
 				break;
 #ifdef	RSS
 			case IP_RSSBUCKETID:
 				retval = rss_hash2bucket(inp->inp_flowid,
 				    inp->inp_flowtype,
 				    &rss_bucket);
 				if (retval == 0)
 					optval = rss_bucket;
 				else
 					error = EINVAL;
 				break;
 			case IP_RECVRSSBUCKETID:
 				optval = OPTBIT2(INP_RECVRSSBUCKETID);
 				break;
 #endif
 			case IP_BINDMULTI:
 				optval = OPTBIT2(INP_BINDMULTI);
 				break;
 			}
 			error = sooptcopyout(sopt, &optval, sizeof optval);
 			break;
 
 		/*
 		 * Multicast socket options are processed by the in_mcast
 		 * module.
 		 */
 		case IP_MULTICAST_IF:
 		case IP_MULTICAST_VIF:
 		case IP_MULTICAST_TTL:
 		case IP_MULTICAST_LOOP:
 		case IP_MSFILTER:
 			error = inp_getmoptions(inp, sopt);
 			break;
 
 #ifdef IPSEC
 		case IP_IPSEC_POLICY:
 		{
 			struct mbuf *m = NULL;
 			caddr_t req = NULL;
 			size_t len = 0;
 
 			if (m != 0) {
 				req = mtod(m, caddr_t);
 				len = m->m_len;
 			}
 			error = ipsec_get_policy(sotoinpcb(so), req, len, &m);
 			if (error == 0)
 				error = soopt_mcopyout(sopt, m); /* XXX */
 			if (error == 0)
 				m_freem(m);
 			break;
 		}
 #endif /* IPSEC */
 
 		default:
 			error = ENOPROTOOPT;
 			break;
 		}
 		break;
 	}
 	return (error);
 }
 
 /*
  * Routine called from ip_output() to loop back a copy of an IP multicast
  * packet to the input queue of a specified interface.  Note that this
  * calls the output routine of the loopback "driver", but with an interface
  * pointer that might NOT be a loopback interface -- evil, but easier than
  * replicating that code here.
  */
 static void
 ip_mloopback(struct ifnet *ifp, struct mbuf *m, struct sockaddr_in *dst,
     int hlen)
 {
 	register struct ip *ip;
 	struct mbuf *copym;
 
 	/*
 	 * Make a deep copy of the packet because we're going to
 	 * modify the pack in order to generate checksums.
 	 */
 	copym = m_dup(m, M_NOWAIT);
 	if (copym != NULL && (!M_WRITABLE(copym) || copym->m_len < hlen))
 		copym = m_pullup(copym, hlen);
 	if (copym != NULL) {
 		/* If needed, compute the checksum and mark it as valid. */
 		if (copym->m_pkthdr.csum_flags & CSUM_DELAY_DATA) {
 			in_delayed_cksum(copym);
 			copym->m_pkthdr.csum_flags &= ~CSUM_DELAY_DATA;
 			copym->m_pkthdr.csum_flags |=
 			    CSUM_DATA_VALID | CSUM_PSEUDO_HDR;
 			copym->m_pkthdr.csum_data = 0xffff;
 		}
 		/*
 		 * We don't bother to fragment if the IP length is greater
 		 * than the interface's MTU.  Can this possibly matter?
 		 */
 		ip = mtod(copym, struct ip *);
 		ip->ip_sum = 0;
 		ip->ip_sum = in_cksum(copym, hlen);
 #if 1 /* XXX */
 		if (dst->sin_family != AF_INET) {
 			printf("ip_mloopback: bad address family %d\n",
 						dst->sin_family);
 			dst->sin_family = AF_INET;
 		}
 #endif
 		if_simloop(ifp, copym, dst->sin_family, 0);
 	}
 }
Index: head/sys/netinet/ip_var.h
===================================================================
--- head/sys/netinet/ip_var.h	(revision 280970)
+++ head/sys/netinet/ip_var.h	(revision 280971)
@@ -1,323 +1,306 @@
 /*-
  * Copyright (c) 1982, 1986, 1993
  *	The Regents of the University of California.  All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 4. Neither the name of the University nor the names of its contributors
  *    may be used to endorse or promote products derived from this software
  *    without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  *	@(#)ip_var.h	8.2 (Berkeley) 1/9/95
  * $FreeBSD$
  */
 
 #ifndef _NETINET_IP_VAR_H_
 #define	_NETINET_IP_VAR_H_
 
 #include <sys/queue.h>
 
 /*
  * Overlay for ip header used by other protocols (tcp, udp).
  */
 struct ipovly {
 	u_char	ih_x1[9];		/* (unused) */
 	u_char	ih_pr;			/* protocol */
 	u_short	ih_len;			/* protocol length */
 	struct	in_addr ih_src;		/* source internet address */
 	struct	in_addr ih_dst;		/* destination internet address */
 };
 
 #ifdef _KERNEL
 /*
  * Ip reassembly queue structure.  Each fragment
  * being reassembled is attached to one of these structures.
  * They are timed out after ipq_ttl drops to 0, and may also
  * be reclaimed if memory becomes tight.
  */
 struct ipq {
 	TAILQ_ENTRY(ipq) ipq_list;	/* to other reass headers */
 	u_char	ipq_ttl;		/* time for reass q to live */
 	u_char	ipq_p;			/* protocol of this fragment */
 	u_short	ipq_id;			/* sequence id for reassembly */
 	struct mbuf *ipq_frags;		/* to ip headers of fragments */
 	struct	in_addr ipq_src,ipq_dst;
 	u_char	ipq_nfrags;		/* # frags in this packet */
 	struct label *ipq_label;	/* MAC label */
 };
 #endif /* _KERNEL */
 
 /*
  * Structure stored in mbuf in inpcb.ip_options
  * and passed to ip_output when ip options are in use.
  * The actual length of the options (including ipopt_dst)
  * is in m_len.
  */
 #define MAX_IPOPTLEN	40
 
 struct ipoption {
 	struct	in_addr ipopt_dst;	/* first-hop dst if source routed */
 	char	ipopt_list[MAX_IPOPTLEN];	/* options proper */
 };
 
 /*
  * Structure attached to inpcb.ip_moptions and
  * passed to ip_output when IP multicast options are in use.
  * This structure is lazy-allocated.
  */
 struct ip_moptions {
 	struct	ifnet *imo_multicast_ifp; /* ifp for outgoing multicasts */
 	struct in_addr imo_multicast_addr; /* ifindex/addr on MULTICAST_IF */
 	u_long	imo_multicast_vif;	/* vif num outgoing multicasts */
 	u_char	imo_multicast_ttl;	/* TTL for outgoing multicasts */
 	u_char	imo_multicast_loop;	/* 1 => hear sends if a member */
 	u_short	imo_num_memberships;	/* no. memberships this socket */
 	u_short	imo_max_memberships;	/* max memberships this socket */
 	struct	in_multi **imo_membership;	/* group memberships */
 	struct	in_mfilter *imo_mfilters;	/* source filters */
 	STAILQ_ENTRY(ip_moptions) imo_link;
 };
 
 struct	ipstat {
 	uint64_t ips_total;		/* total packets received */
 	uint64_t ips_badsum;		/* checksum bad */
 	uint64_t ips_tooshort;		/* packet too short */
 	uint64_t ips_toosmall;		/* not enough data */
 	uint64_t ips_badhlen;		/* ip header length < data size */
 	uint64_t ips_badlen;		/* ip length < ip header length */
 	uint64_t ips_fragments;		/* fragments received */
 	uint64_t ips_fragdropped;	/* frags dropped (dups, out of space) */
 	uint64_t ips_fragtimeout;	/* fragments timed out */
 	uint64_t ips_forward;		/* packets forwarded */
 	uint64_t ips_fastforward;	/* packets fast forwarded */
 	uint64_t ips_cantforward;	/* packets rcvd for unreachable dest */
 	uint64_t ips_redirectsent;	/* packets forwarded on same net */
 	uint64_t ips_noproto;		/* unknown or unsupported protocol */
 	uint64_t ips_delivered;		/* datagrams delivered to upper level*/
 	uint64_t ips_localout;		/* total ip packets generated here */
 	uint64_t ips_odropped;		/* lost packets due to nobufs, etc. */
 	uint64_t ips_reassembled;	/* total packets reassembled ok */
 	uint64_t ips_fragmented;	/* datagrams successfully fragmented */
 	uint64_t ips_ofragments;	/* output fragments created */
 	uint64_t ips_cantfrag;		/* don't fragment flag was set, etc. */
 	uint64_t ips_badoptions;		/* error in option processing */
 	uint64_t ips_noroute;		/* packets discarded due to no route */
 	uint64_t ips_badvers;		/* ip version != 4 */
 	uint64_t ips_rawout;		/* total raw ip packets generated */
 	uint64_t ips_toolong;		/* ip length > max ip packet size */
 	uint64_t ips_notmember;		/* multicasts for unregistered grps */
 	uint64_t ips_nogif;		/* no match gif found */
 	uint64_t ips_badaddr;		/* invalid address on header */
 };
 
 #ifdef _KERNEL
 
 #include <sys/counter.h>
 #include <net/vnet.h>
 
 VNET_PCPUSTAT_DECLARE(struct ipstat, ipstat);
 /*
  * In-kernel consumers can use these accessor macros directly to update
  * stats.
  */
 #define	IPSTAT_ADD(name, val)	\
     VNET_PCPUSTAT_ADD(struct ipstat, ipstat, name, (val))
 #define	IPSTAT_SUB(name, val)	IPSTAT_ADD(name, -(val))
 #define	IPSTAT_INC(name)	IPSTAT_ADD(name, 1)
 #define	IPSTAT_DEC(name)	IPSTAT_SUB(name, 1)
 
 /*
  * Kernel module consumers must use this accessor macro.
  */
 void	kmod_ipstat_inc(int statnum);
 #define	KMOD_IPSTAT_INC(name)	\
     kmod_ipstat_inc(offsetof(struct ipstat, name) / sizeof(uint64_t))
 void	kmod_ipstat_dec(int statnum);
 #define	KMOD_IPSTAT_DEC(name)	\
     kmod_ipstat_dec(offsetof(struct ipstat, name) / sizeof(uint64_t))
 
 /* flags passed to ip_output as last parameter */
 #define	IP_FORWARDING		0x1		/* most of ip header exists */
 #define	IP_RAWOUTPUT		0x2		/* raw ip header exists */
 #define	IP_SENDONES		0x4		/* send all-ones broadcast */
 #define	IP_SENDTOIF		0x8		/* send on specific ifnet */
 #define IP_ROUTETOIF		SO_DONTROUTE	/* 0x10 bypass routing tables */
 #define IP_ALLOWBROADCAST	SO_BROADCAST	/* 0x20 can send broadcast packets */
 #define	IP_NODEFAULTFLOWID	0x40		/* Don't set the flowid from inp */
 
 #ifdef __NO_STRICT_ALIGNMENT
 #define IP_HDR_ALIGNED_P(ip)	1
 #else
 #define IP_HDR_ALIGNED_P(ip)	((((intptr_t) (ip)) & 3) == 0)
 #endif
 
 struct ip;
 struct inpcb;
 struct route;
 struct sockopt;
 
-VNET_DECLARE(uint32_t, ip_id);			/* ip packet ctr, for ids */
 VNET_DECLARE(int, ip_defttl);			/* default IP ttl */
 VNET_DECLARE(int, ipforwarding);		/* ip forwarding */
 #ifdef IPSTEALTH
 VNET_DECLARE(int, ipstealth);			/* stealth forwarding */
 #endif
 extern u_char	ip_protox[];
 VNET_DECLARE(struct socket *, ip_rsvpd);	/* reservation protocol daemon*/
 VNET_DECLARE(struct socket *, ip_mrouter);	/* multicast routing daemon */
 extern int	(*legal_vif_num)(int);
 extern u_long	(*ip_mcast_src)(int);
 VNET_DECLARE(int, rsvp_on);
 VNET_DECLARE(int, drop_redirect);
 extern struct	pr_usrreqs rip_usrreqs;
 
 #define	V_ip_id			VNET(ip_id)
 #define	V_ip_defttl		VNET(ip_defttl)
 #define	V_ipforwarding		VNET(ipforwarding)
 #ifdef IPSTEALTH
 #define	V_ipstealth		VNET(ipstealth)
 #endif
 #define	V_ip_rsvpd		VNET(ip_rsvpd)
 #define	V_ip_mrouter		VNET(ip_mrouter)
 #define	V_rsvp_on		VNET(rsvp_on)
 #define	V_drop_redirect		VNET(drop_redirect)
 
 void	inp_freemoptions(struct ip_moptions *);
 int	inp_getmoptions(struct inpcb *, struct sockopt *);
 int	inp_setmoptions(struct inpcb *, struct sockopt *);
 
 int	ip_ctloutput(struct socket *, struct sockopt *sopt);
 void	ip_drain(void);
 int	ip_fragment(struct ip *ip, struct mbuf **m_frag, int mtu,
 	    u_long if_hwassist_flags);
 void	ip_forward(struct mbuf *m, int srcrt);
 void	ip_init(void);
 #ifdef VIMAGE
 void	ip_destroy(void);
 #endif
 extern int
 	(*ip_mforward)(struct ip *, struct ifnet *, struct mbuf *,
 	    struct ip_moptions *);
 int	ip_output(struct mbuf *,
 	    struct mbuf *, struct route *, int, struct ip_moptions *,
 	    struct inpcb *);
 int	ipproto_register(short);
 int	ipproto_unregister(short);
 struct mbuf *
 	ip_reass(struct mbuf *);
 struct in_ifaddr *
 	ip_rtaddr(struct in_addr, u_int fibnum);
 void	ip_savecontrol(struct inpcb *, struct mbuf **, struct ip *,
 	    struct mbuf *);
 void	ip_slowtimo(void);
-uint16_t	ip_randomid(void);
+void	ip_fillid(struct ip *);
 int	rip_ctloutput(struct socket *, struct sockopt *);
 void	rip_ctlinput(int, struct sockaddr *, void *);
 void	rip_init(void);
 #ifdef VIMAGE
 void	rip_destroy(void);
 #endif
 int	rip_input(struct mbuf **, int *, int);
 int	rip_output(struct mbuf *, struct socket *, ...);
 int	ipip_input(struct mbuf **, int *, int);
 int	rsvp_input(struct mbuf **, int *, int);
 int	ip_rsvp_init(struct socket *);
 int	ip_rsvp_done(void);
 extern int	(*ip_rsvp_vif)(struct socket *, struct sockopt *);
 extern void	(*ip_rsvp_force_done)(struct socket *);
 extern int	(*rsvp_input_p)(struct mbuf **, int *, int);
 
 VNET_DECLARE(struct pfil_head, inet_pfil_hook);	/* packet filter hooks */
 #define	V_inet_pfil_hook	VNET(inet_pfil_hook)
 
 void	in_delayed_cksum(struct mbuf *m);
 
 /* Hooks for ipfw, dummynet, divert etc. Most are declared in raw_ip.c */
 /*
  * Reference to an ipfw or packet filter rule that can be carried
  * outside critical sections.
  * A rule is identified by rulenum:rule_id which is ordered.
  * In version chain_id the rule can be found in slot 'slot', so
  * we don't need a lookup if chain_id == chain->id.
  *
  * On exit from the firewall this structure refers to the rule after
  * the matching one (slot points to the new rule; rulenum:rule_id-1
  * is the matching rule), and additional info (e.g. info often contains
  * the insn argument or tablearg in the low 16 bits, in host format).
  * On entry, the structure is valid if slot>0, and refers to the starting
  * rules. 'info' contains the reason for reinject, e.g. divert port,
  * divert direction, and so on.
  */
 struct ipfw_rule_ref {
 	uint32_t	slot;		/* slot for matching rule	*/
 	uint32_t	rulenum;	/* matching rule number		*/
 	uint32_t	rule_id;	/* matching rule id		*/
 	uint32_t	chain_id;	/* ruleset id			*/
 	uint32_t	info;		/* see below			*/
 };
 
 enum {
 	IPFW_INFO_MASK	= 0x0000ffff,
 	IPFW_INFO_OUT	= 0x00000000,	/* outgoing, just for convenience */
 	IPFW_INFO_IN	= 0x80000000,	/* incoming, overloads dir */
 	IPFW_ONEPASS	= 0x40000000,	/* One-pass, do not reinject */
 	IPFW_IS_MASK	= 0x30000000,	/* which source ? */
 	IPFW_IS_DIVERT	= 0x20000000,
 	IPFW_IS_DUMMYNET =0x10000000,
 	IPFW_IS_PIPE	= 0x08000000,	/* pip1=1, queue = 0 */
 };
 #define MTAG_IPFW	1148380143	/* IPFW-tagged cookie */
 #define MTAG_IPFW_RULE	1262273568	/* rule reference */
 #define	MTAG_IPFW_CALL	1308397630	/* call stack */
 
 struct ip_fw_args;
 typedef int	(*ip_fw_chk_ptr_t)(struct ip_fw_args *args);
 typedef int	(*ip_fw_ctl_ptr_t)(struct sockopt *);
 VNET_DECLARE(ip_fw_ctl_ptr_t, ip_fw_ctl_ptr);
 #define	V_ip_fw_ctl_ptr		VNET(ip_fw_ctl_ptr)
 
 /* Divert hooks. */
 extern void	(*ip_divert_ptr)(struct mbuf *m, int incoming);
 /* ng_ipfw hooks -- XXX make it the same as divert and dummynet */
 extern int	(*ng_ipfw_input_p)(struct mbuf **, int,
 			struct ip_fw_args *, int);
 
 extern int	(*ip_dn_ctl_ptr)(struct sockopt *);
 extern int	(*ip_dn_io_ptr)(struct mbuf **, int, struct ip_fw_args *);
-
-VNET_DECLARE(int, ip_do_randomid);
-#define	V_ip_do_randomid	VNET(ip_do_randomid)
-static __inline uint16_t
-ip_newid(void)
-{
-	uint16_t res;
-
-	if (V_ip_do_randomid != 0)
-		return (ip_randomid());
-	else {
-		res = atomic_fetchadd_32(&V_ip_id, 1) & 0xFFFF;
-		return (htons(res));
-	}
-}
-
 #endif /* _KERNEL */
 
 #endif /* !_NETINET_IP_VAR_H_ */
Index: head/sys/netinet/raw_ip.c
===================================================================
--- head/sys/netinet/raw_ip.c	(revision 280970)
+++ head/sys/netinet/raw_ip.c	(revision 280971)
@@ -1,1125 +1,1129 @@
 /*-
  * Copyright (c) 1982, 1986, 1988, 1993
  *	The Regents of the University of California.
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 4. Neither the name of the University nor the names of its contributors
  *    may be used to endorse or promote products derived from this software
  *    without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  *	@(#)raw_ip.c	8.7 (Berkeley) 5/15/95
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include "opt_inet.h"
 #include "opt_inet6.h"
 #include "opt_ipsec.h"
 
 #include <sys/param.h>
 #include <sys/jail.h>
 #include <sys/kernel.h>
 #include <sys/lock.h>
 #include <sys/malloc.h>
 #include <sys/mbuf.h>
 #include <sys/priv.h>
 #include <sys/proc.h>
 #include <sys/protosw.h>
 #include <sys/rwlock.h>
 #include <sys/signalvar.h>
 #include <sys/socket.h>
 #include <sys/socketvar.h>
 #include <sys/sx.h>
 #include <sys/sysctl.h>
 #include <sys/systm.h>
 
 #include <vm/uma.h>
 
 #include <net/if.h>
 #include <net/if_var.h>
 #include <net/route.h>
 #include <net/vnet.h>
 
 #include <netinet/in.h>
 #include <netinet/in_systm.h>
 #include <netinet/in_pcb.h>
 #include <netinet/in_var.h>
 #include <netinet/if_ether.h>
 #include <netinet/ip.h>
 #include <netinet/ip_var.h>
 #include <netinet/ip_mroute.h>
 
 #ifdef IPSEC
 #include <netipsec/ipsec.h>
 #endif /*IPSEC*/
 
 #include <machine/stdarg.h>
 #include <security/mac/mac_framework.h>
 
 VNET_DEFINE(int, ip_defttl) = IPDEFTTL;
 SYSCTL_INT(_net_inet_ip, IPCTL_DEFTTL, ttl, CTLFLAG_VNET | CTLFLAG_RW,
     &VNET_NAME(ip_defttl), 0,
     "Maximum TTL on IP packets");
 
 VNET_DEFINE(struct inpcbhead, ripcb);
 VNET_DEFINE(struct inpcbinfo, ripcbinfo);
 
 #define	V_ripcb			VNET(ripcb)
 #define	V_ripcbinfo		VNET(ripcbinfo)
 
 /*
  * Control and data hooks for ipfw, dummynet, divert and so on.
  * The data hooks are not used here but it is convenient
  * to keep them all in one place.
  */
 VNET_DEFINE(ip_fw_chk_ptr_t, ip_fw_chk_ptr) = NULL;
 VNET_DEFINE(ip_fw_ctl_ptr_t, ip_fw_ctl_ptr) = NULL;
 
 int	(*ip_dn_ctl_ptr)(struct sockopt *);
 int	(*ip_dn_io_ptr)(struct mbuf **, int, struct ip_fw_args *);
 void	(*ip_divert_ptr)(struct mbuf *, int);
 int	(*ng_ipfw_input_p)(struct mbuf **, int,
 			struct ip_fw_args *, int);
 
 #ifdef INET
 /*
  * Hooks for multicast routing. They all default to NULL, so leave them not
  * initialized and rely on BSS being set to 0.
  */
 
 /*
  * The socket used to communicate with the multicast routing daemon.
  */
 VNET_DEFINE(struct socket *, ip_mrouter);
 
 /*
  * The various mrouter and rsvp functions.
  */
 int (*ip_mrouter_set)(struct socket *, struct sockopt *);
 int (*ip_mrouter_get)(struct socket *, struct sockopt *);
 int (*ip_mrouter_done)(void);
 int (*ip_mforward)(struct ip *, struct ifnet *, struct mbuf *,
 		   struct ip_moptions *);
 int (*mrt_ioctl)(u_long, caddr_t, int);
 int (*legal_vif_num)(int);
 u_long (*ip_mcast_src)(int);
 
 int (*rsvp_input_p)(struct mbuf **, int *, int);
 int (*ip_rsvp_vif)(struct socket *, struct sockopt *);
 void (*ip_rsvp_force_done)(struct socket *);
 #endif /* INET */
 
 u_long	rip_sendspace = 9216;
 SYSCTL_ULONG(_net_inet_raw, OID_AUTO, maxdgram, CTLFLAG_RW,
     &rip_sendspace, 0, "Maximum outgoing raw IP datagram size");
 
 u_long	rip_recvspace = 9216;
 SYSCTL_ULONG(_net_inet_raw, OID_AUTO, recvspace, CTLFLAG_RW,
     &rip_recvspace, 0, "Maximum space for incoming raw IP datagrams");
 
 /*
  * Hash functions
  */
 
 #define INP_PCBHASH_RAW_SIZE	256
 #define INP_PCBHASH_RAW(proto, laddr, faddr, mask) \
         (((proto) + (laddr) + (faddr)) % (mask) + 1)
 
 #ifdef INET
 static void
 rip_inshash(struct inpcb *inp)
 {
 	struct inpcbinfo *pcbinfo = inp->inp_pcbinfo;
 	struct inpcbhead *pcbhash;
 	int hash;
 
 	INP_INFO_WLOCK_ASSERT(pcbinfo);
 	INP_WLOCK_ASSERT(inp);
 	
 	if (inp->inp_ip_p != 0 &&
 	    inp->inp_laddr.s_addr != INADDR_ANY &&
 	    inp->inp_faddr.s_addr != INADDR_ANY) {
 		hash = INP_PCBHASH_RAW(inp->inp_ip_p, inp->inp_laddr.s_addr,
 		    inp->inp_faddr.s_addr, pcbinfo->ipi_hashmask);
 	} else
 		hash = 0;
 	pcbhash = &pcbinfo->ipi_hashbase[hash];
 	LIST_INSERT_HEAD(pcbhash, inp, inp_hash);
 }
 
 static void
 rip_delhash(struct inpcb *inp)
 {
 
 	INP_INFO_WLOCK_ASSERT(inp->inp_pcbinfo);
 	INP_WLOCK_ASSERT(inp);
 
 	LIST_REMOVE(inp, inp_hash);
 }
 #endif /* INET */
 
 /*
  * Raw interface to IP protocol.
  */
 
 /*
  * Initialize raw connection block q.
  */
 static void
 rip_zone_change(void *tag)
 {
 
 	uma_zone_set_max(V_ripcbinfo.ipi_zone, maxsockets);
 }
 
 static int
 rip_inpcb_init(void *mem, int size, int flags)
 {
 	struct inpcb *inp = mem;
 
 	INP_LOCK_INIT(inp, "inp", "rawinp");
 	return (0);
 }
 
 void
 rip_init(void)
 {
 
 	in_pcbinfo_init(&V_ripcbinfo, "rip", &V_ripcb, INP_PCBHASH_RAW_SIZE,
 	    1, "ripcb", rip_inpcb_init, NULL, UMA_ZONE_NOFREE,
 	    IPI_HASHFIELDS_NONE);
 	EVENTHANDLER_REGISTER(maxsockets_change, rip_zone_change, NULL,
 	    EVENTHANDLER_PRI_ANY);
 }
 
 #ifdef VIMAGE
 void
 rip_destroy(void)
 {
 
 	in_pcbinfo_destroy(&V_ripcbinfo);
 }
 #endif
 
 #ifdef INET
 static int
 rip_append(struct inpcb *last, struct ip *ip, struct mbuf *n,
     struct sockaddr_in *ripsrc)
 {
 	int policyfail = 0;
 
 	INP_LOCK_ASSERT(last);
 
 #ifdef IPSEC
 	/* check AH/ESP integrity. */
 	if (ipsec4_in_reject(n, last)) {
 		policyfail = 1;
 	}
 #endif /* IPSEC */
 #ifdef MAC
 	if (!policyfail && mac_inpcb_check_deliver(last, n) != 0)
 		policyfail = 1;
 #endif
 	/* Check the minimum TTL for socket. */
 	if (last->inp_ip_minttl && last->inp_ip_minttl > ip->ip_ttl)
 		policyfail = 1;
 	if (!policyfail) {
 		struct mbuf *opts = NULL;
 		struct socket *so;
 
 		so = last->inp_socket;
 		if ((last->inp_flags & INP_CONTROLOPTS) ||
 		    (so->so_options & (SO_TIMESTAMP | SO_BINTIME)))
 			ip_savecontrol(last, &opts, ip, n);
 		SOCKBUF_LOCK(&so->so_rcv);
 		if (sbappendaddr_locked(&so->so_rcv,
 		    (struct sockaddr *)ripsrc, n, opts) == 0) {
 			/* should notify about lost packet */
 			m_freem(n);
 			if (opts)
 				m_freem(opts);
 			SOCKBUF_UNLOCK(&so->so_rcv);
 		} else
 			sorwakeup_locked(so);
 	} else
 		m_freem(n);
 	return (policyfail);
 }
 
 /*
  * Setup generic address and protocol structures for raw_input routine, then
  * pass them along with mbuf chain.
  */
 int
 rip_input(struct mbuf **mp, int *offp, int proto)
 {
 	struct ifnet *ifp;
 	struct mbuf *m = *mp;
 	struct ip *ip = mtod(m, struct ip *);
 	struct inpcb *inp, *last;
 	struct sockaddr_in ripsrc;
 	int hash;
 
 	*mp = NULL;
 
 	bzero(&ripsrc, sizeof(ripsrc));
 	ripsrc.sin_len = sizeof(ripsrc);
 	ripsrc.sin_family = AF_INET;
 	ripsrc.sin_addr = ip->ip_src;
 	last = NULL;
 
 	ifp = m->m_pkthdr.rcvif;
 
 	hash = INP_PCBHASH_RAW(proto, ip->ip_src.s_addr,
 	    ip->ip_dst.s_addr, V_ripcbinfo.ipi_hashmask);
 	INP_INFO_RLOCK(&V_ripcbinfo);
 	LIST_FOREACH(inp, &V_ripcbinfo.ipi_hashbase[hash], inp_hash) {
 		if (inp->inp_ip_p != proto)
 			continue;
 #ifdef INET6
 		/* XXX inp locking */
 		if ((inp->inp_vflag & INP_IPV4) == 0)
 			continue;
 #endif
 		if (inp->inp_laddr.s_addr != ip->ip_dst.s_addr)
 			continue;
 		if (inp->inp_faddr.s_addr != ip->ip_src.s_addr)
 			continue;
 		if (jailed_without_vnet(inp->inp_cred)) {
 			/*
 			 * XXX: If faddr was bound to multicast group,
 			 * jailed raw socket will drop datagram.
 			 */
 			if (prison_check_ip4(inp->inp_cred, &ip->ip_dst) != 0)
 				continue;
 		}
 		if (last != NULL) {
 			struct mbuf *n;
 
 			n = m_copy(m, 0, (int)M_COPYALL);
 			if (n != NULL)
 		    	    (void) rip_append(last, ip, n, &ripsrc);
 			/* XXX count dropped packet */
 			INP_RUNLOCK(last);
 		}
 		INP_RLOCK(inp);
 		last = inp;
 	}
 	LIST_FOREACH(inp, &V_ripcbinfo.ipi_hashbase[0], inp_hash) {
 		if (inp->inp_ip_p && inp->inp_ip_p != proto)
 			continue;
 #ifdef INET6
 		/* XXX inp locking */
 		if ((inp->inp_vflag & INP_IPV4) == 0)
 			continue;
 #endif
 		if (!in_nullhost(inp->inp_laddr) &&
 		    !in_hosteq(inp->inp_laddr, ip->ip_dst))
 			continue;
 		if (!in_nullhost(inp->inp_faddr) &&
 		    !in_hosteq(inp->inp_faddr, ip->ip_src))
 			continue;
 		if (jailed_without_vnet(inp->inp_cred)) {
 			/*
 			 * Allow raw socket in jail to receive multicast;
 			 * assume process had PRIV_NETINET_RAW at attach,
 			 * and fall through into normal filter path if so.
 			 */
 			if (!IN_MULTICAST(ntohl(ip->ip_dst.s_addr)) &&
 			    prison_check_ip4(inp->inp_cred, &ip->ip_dst) != 0)
 				continue;
 		}
 		/*
 		 * If this raw socket has multicast state, and we
 		 * have received a multicast, check if this socket
 		 * should receive it, as multicast filtering is now
 		 * the responsibility of the transport layer.
 		 */
 		if (inp->inp_moptions != NULL &&
 		    IN_MULTICAST(ntohl(ip->ip_dst.s_addr))) {
 			/*
 			 * If the incoming datagram is for IGMP, allow it
 			 * through unconditionally to the raw socket.
 			 *
 			 * In the case of IGMPv2, we may not have explicitly
 			 * joined the group, and may have set IFF_ALLMULTI
 			 * on the interface. imo_multi_filter() may discard
 			 * control traffic we actually need to see.
 			 *
 			 * Userland multicast routing daemons should continue
 			 * filter the control traffic appropriately.
 			 */
 			int blocked;
 
 			blocked = MCAST_PASS;
 			if (proto != IPPROTO_IGMP) {
 				struct sockaddr_in group;
 
 				bzero(&group, sizeof(struct sockaddr_in));
 				group.sin_len = sizeof(struct sockaddr_in);
 				group.sin_family = AF_INET;
 				group.sin_addr = ip->ip_dst;
 
 				blocked = imo_multi_filter(inp->inp_moptions,
 				    ifp,
 				    (struct sockaddr *)&group,
 				    (struct sockaddr *)&ripsrc);
 			}
 
 			if (blocked != MCAST_PASS) {
 				IPSTAT_INC(ips_notmember);
 				continue;
 			}
 		}
 		if (last != NULL) {
 			struct mbuf *n;
 
 			n = m_copy(m, 0, (int)M_COPYALL);
 			if (n != NULL)
 				(void) rip_append(last, ip, n, &ripsrc);
 			/* XXX count dropped packet */
 			INP_RUNLOCK(last);
 		}
 		INP_RLOCK(inp);
 		last = inp;
 	}
 	INP_INFO_RUNLOCK(&V_ripcbinfo);
 	if (last != NULL) {
 		if (rip_append(last, ip, m, &ripsrc) != 0)
 			IPSTAT_INC(ips_delivered);
 		INP_RUNLOCK(last);
 	} else {
 		m_freem(m);
 		IPSTAT_INC(ips_noproto);
 		IPSTAT_DEC(ips_delivered);
 	}
 	return (IPPROTO_DONE);
 }
 
 /*
  * Generate IP header and pass packet to ip_output.  Tack on options user may
  * have setup with control call.
  */
 int
 rip_output(struct mbuf *m, struct socket *so, ...)
 {
 	struct ip *ip;
 	int error;
 	struct inpcb *inp = sotoinpcb(so);
 	va_list ap;
 	u_long dst;
 	int flags = ((so->so_options & SO_DONTROUTE) ? IP_ROUTETOIF : 0) |
 	    IP_ALLOWBROADCAST;
 
 	va_start(ap, so);
 	dst = va_arg(ap, u_long);
 	va_end(ap);
 
 	/*
 	 * If the user handed us a complete IP packet, use it.  Otherwise,
 	 * allocate an mbuf for a header and fill it in.
 	 */
 	if ((inp->inp_flags & INP_HDRINCL) == 0) {
 		if (m->m_pkthdr.len + sizeof(struct ip) > IP_MAXPACKET) {
 			m_freem(m);
 			return(EMSGSIZE);
 		}
 		M_PREPEND(m, sizeof(struct ip), M_NOWAIT);
 		if (m == NULL)
 			return(ENOBUFS);
 
 		INP_RLOCK(inp);
 		ip = mtod(m, struct ip *);
 		ip->ip_tos = inp->inp_ip_tos;
 		if (inp->inp_flags & INP_DONTFRAG)
 			ip->ip_off = htons(IP_DF);
 		else
 			ip->ip_off = htons(0);
 		ip->ip_p = inp->inp_ip_p;
 		ip->ip_len = htons(m->m_pkthdr.len);
 		ip->ip_src = inp->inp_laddr;
 		ip->ip_dst.s_addr = dst;
 		if (jailed(inp->inp_cred)) {
 			/*
 			 * prison_local_ip4() would be good enough but would
 			 * let a source of INADDR_ANY pass, which we do not
 			 * want to see from jails.
 			 */
 			if (ip->ip_src.s_addr == INADDR_ANY) {
 				error = in_pcbladdr(inp, &ip->ip_dst, &ip->ip_src,
 				    inp->inp_cred);
 			} else {
 				error = prison_local_ip4(inp->inp_cred,
 				    &ip->ip_src);
 			}
 			if (error != 0) {
 				INP_RUNLOCK(inp);
 				m_freem(m);
 				return (error);
 			}
 		}
 		ip->ip_ttl = inp->inp_ip_ttl;
 	} else {
 		if (m->m_pkthdr.len > IP_MAXPACKET) {
 			m_freem(m);
 			return(EMSGSIZE);
 		}
 		INP_RLOCK(inp);
 		ip = mtod(m, struct ip *);
 		error = prison_check_ip4(inp->inp_cred, &ip->ip_src);
 		if (error != 0) {
 			INP_RUNLOCK(inp);
 			m_freem(m);
 			return (error);
 		}
 
 		/*
 		 * Don't allow both user specified and setsockopt options,
 		 * and don't allow packet length sizes that will crash.
 		 */
 		if (((ip->ip_hl != (sizeof (*ip) >> 2)) && inp->inp_options)
 		    || (ntohs(ip->ip_len) > m->m_pkthdr.len)
 		    || (ntohs(ip->ip_len) < (ip->ip_hl << 2))) {
 			INP_RUNLOCK(inp);
 			m_freem(m);
 			return (EINVAL);
 		}
+		/*
+		 * This doesn't allow application to specify ID of zero,
+		 * but we got this limitation from the beginning of history.
+		 */
 		if (ip->ip_id == 0)
-			ip->ip_id = ip_newid();
+			ip_fillid(ip);
 
 		/*
 		 * XXX prevent ip_output from overwriting header fields.
 		 */
 		flags |= IP_RAWOUTPUT;
 		IPSTAT_INC(ips_rawout);
 	}
 
 	if (inp->inp_flags & INP_ONESBCAST)
 		flags |= IP_SENDONES;
 
 #ifdef MAC
 	mac_inpcb_create_mbuf(inp, m);
 #endif
 
 	error = ip_output(m, inp->inp_options, NULL, flags,
 	    inp->inp_moptions, inp);
 	INP_RUNLOCK(inp);
 	return (error);
 }
 
 /*
  * Raw IP socket option processing.
  *
  * IMPORTANT NOTE regarding access control: Traditionally, raw sockets could
  * only be created by a privileged process, and as such, socket option
  * operations to manage system properties on any raw socket were allowed to
  * take place without explicit additional access control checks.  However,
  * raw sockets can now also be created in jail(), and therefore explicit
  * checks are now required.  Likewise, raw sockets can be used by a process
  * after it gives up privilege, so some caution is required.  For options
  * passed down to the IP layer via ip_ctloutput(), checks are assumed to be
  * performed in ip_ctloutput() and therefore no check occurs here.
  * Unilaterally checking priv_check() here breaks normal IP socket option
  * operations on raw sockets.
  *
  * When adding new socket options here, make sure to add access control
  * checks here as necessary.
  *
  * XXX-BZ inp locking?
  */
 int
 rip_ctloutput(struct socket *so, struct sockopt *sopt)
 {
 	struct	inpcb *inp = sotoinpcb(so);
 	int	error, optval;
 
 	if (sopt->sopt_level != IPPROTO_IP) {
 		if ((sopt->sopt_level == SOL_SOCKET) &&
 		    (sopt->sopt_name == SO_SETFIB)) {
 			inp->inp_inc.inc_fibnum = so->so_fibnum;
 			return (0);
 		}
 		return (EINVAL);
 	}
 
 	error = 0;
 	switch (sopt->sopt_dir) {
 	case SOPT_GET:
 		switch (sopt->sopt_name) {
 		case IP_HDRINCL:
 			optval = inp->inp_flags & INP_HDRINCL;
 			error = sooptcopyout(sopt, &optval, sizeof optval);
 			break;
 
 		case IP_FW3:	/* generic ipfw v.3 functions */
 		case IP_FW_ADD:	/* ADD actually returns the body... */
 		case IP_FW_GET:
 		case IP_FW_TABLE_GETSIZE:
 		case IP_FW_TABLE_LIST:
 		case IP_FW_NAT_GET_CONFIG:
 		case IP_FW_NAT_GET_LOG:
 			if (V_ip_fw_ctl_ptr != NULL)
 				error = V_ip_fw_ctl_ptr(sopt);
 			else
 				error = ENOPROTOOPT;
 			break;
 
 		case IP_DUMMYNET3:	/* generic dummynet v.3 functions */
 		case IP_DUMMYNET_GET:
 			if (ip_dn_ctl_ptr != NULL)
 				error = ip_dn_ctl_ptr(sopt);
 			else
 				error = ENOPROTOOPT;
 			break ;
 
 		case MRT_INIT:
 		case MRT_DONE:
 		case MRT_ADD_VIF:
 		case MRT_DEL_VIF:
 		case MRT_ADD_MFC:
 		case MRT_DEL_MFC:
 		case MRT_VERSION:
 		case MRT_ASSERT:
 		case MRT_API_SUPPORT:
 		case MRT_API_CONFIG:
 		case MRT_ADD_BW_UPCALL:
 		case MRT_DEL_BW_UPCALL:
 			error = priv_check(curthread, PRIV_NETINET_MROUTE);
 			if (error != 0)
 				return (error);
 			error = ip_mrouter_get ? ip_mrouter_get(so, sopt) :
 				EOPNOTSUPP;
 			break;
 
 		default:
 			error = ip_ctloutput(so, sopt);
 			break;
 		}
 		break;
 
 	case SOPT_SET:
 		switch (sopt->sopt_name) {
 		case IP_HDRINCL:
 			error = sooptcopyin(sopt, &optval, sizeof optval,
 					    sizeof optval);
 			if (error)
 				break;
 			if (optval)
 				inp->inp_flags |= INP_HDRINCL;
 			else
 				inp->inp_flags &= ~INP_HDRINCL;
 			break;
 
 		case IP_FW3:	/* generic ipfw v.3 functions */
 		case IP_FW_ADD:
 		case IP_FW_DEL:
 		case IP_FW_FLUSH:
 		case IP_FW_ZERO:
 		case IP_FW_RESETLOG:
 		case IP_FW_TABLE_ADD:
 		case IP_FW_TABLE_DEL:
 		case IP_FW_TABLE_FLUSH:
 		case IP_FW_NAT_CFG:
 		case IP_FW_NAT_DEL:
 			if (V_ip_fw_ctl_ptr != NULL)
 				error = V_ip_fw_ctl_ptr(sopt);
 			else
 				error = ENOPROTOOPT;
 			break;
 
 		case IP_DUMMYNET3:	/* generic dummynet v.3 functions */
 		case IP_DUMMYNET_CONFIGURE:
 		case IP_DUMMYNET_DEL:
 		case IP_DUMMYNET_FLUSH:
 			if (ip_dn_ctl_ptr != NULL)
 				error = ip_dn_ctl_ptr(sopt);
 			else
 				error = ENOPROTOOPT ;
 			break ;
 
 		case IP_RSVP_ON:
 			error = priv_check(curthread, PRIV_NETINET_MROUTE);
 			if (error != 0)
 				return (error);
 			error = ip_rsvp_init(so);
 			break;
 
 		case IP_RSVP_OFF:
 			error = priv_check(curthread, PRIV_NETINET_MROUTE);
 			if (error != 0)
 				return (error);
 			error = ip_rsvp_done();
 			break;
 
 		case IP_RSVP_VIF_ON:
 		case IP_RSVP_VIF_OFF:
 			error = priv_check(curthread, PRIV_NETINET_MROUTE);
 			if (error != 0)
 				return (error);
 			error = ip_rsvp_vif ?
 				ip_rsvp_vif(so, sopt) : EINVAL;
 			break;
 
 		case MRT_INIT:
 		case MRT_DONE:
 		case MRT_ADD_VIF:
 		case MRT_DEL_VIF:
 		case MRT_ADD_MFC:
 		case MRT_DEL_MFC:
 		case MRT_VERSION:
 		case MRT_ASSERT:
 		case MRT_API_SUPPORT:
 		case MRT_API_CONFIG:
 		case MRT_ADD_BW_UPCALL:
 		case MRT_DEL_BW_UPCALL:
 			error = priv_check(curthread, PRIV_NETINET_MROUTE);
 			if (error != 0)
 				return (error);
 			error = ip_mrouter_set ? ip_mrouter_set(so, sopt) :
 					EOPNOTSUPP;
 			break;
 
 		default:
 			error = ip_ctloutput(so, sopt);
 			break;
 		}
 		break;
 	}
 
 	return (error);
 }
 
 /*
  * This function exists solely to receive the PRC_IFDOWN messages which are
  * sent by if_down().  It looks for an ifaddr whose ifa_addr is sa, and calls
  * in_ifadown() to remove all routes corresponding to that address.  It also
  * receives the PRC_IFUP messages from if_up() and reinstalls the interface
  * routes.
  */
 void
 rip_ctlinput(int cmd, struct sockaddr *sa, void *vip)
 {
 	struct in_ifaddr *ia;
 	struct ifnet *ifp;
 	int err;
 	int flags;
 
 	switch (cmd) {
 	case PRC_IFDOWN:
 		IN_IFADDR_RLOCK();
 		TAILQ_FOREACH(ia, &V_in_ifaddrhead, ia_link) {
 			if (ia->ia_ifa.ifa_addr == sa
 			    && (ia->ia_flags & IFA_ROUTE)) {
 				ifa_ref(&ia->ia_ifa);
 				IN_IFADDR_RUNLOCK();
 				/*
 				 * in_scrubprefix() kills the interface route.
 				 */
 				in_scrubprefix(ia, 0);
 				/*
 				 * in_ifadown gets rid of all the rest of the
 				 * routes.  This is not quite the right thing
 				 * to do, but at least if we are running a
 				 * routing process they will come back.
 				 */
 				in_ifadown(&ia->ia_ifa, 0);
 				ifa_free(&ia->ia_ifa);
 				break;
 			}
 		}
 		if (ia == NULL)		/* If ia matched, already unlocked. */
 			IN_IFADDR_RUNLOCK();
 		break;
 
 	case PRC_IFUP:
 		IN_IFADDR_RLOCK();
 		TAILQ_FOREACH(ia, &V_in_ifaddrhead, ia_link) {
 			if (ia->ia_ifa.ifa_addr == sa)
 				break;
 		}
 		if (ia == NULL || (ia->ia_flags & IFA_ROUTE)) {
 			IN_IFADDR_RUNLOCK();
 			return;
 		}
 		ifa_ref(&ia->ia_ifa);
 		IN_IFADDR_RUNLOCK();
 		flags = RTF_UP;
 		ifp = ia->ia_ifa.ifa_ifp;
 
 		if ((ifp->if_flags & IFF_LOOPBACK)
 		    || (ifp->if_flags & IFF_POINTOPOINT))
 			flags |= RTF_HOST;
 
 		err = ifa_del_loopback_route((struct ifaddr *)ia, sa);
 
 		err = rtinit(&ia->ia_ifa, RTM_ADD, flags);
 		if (err == 0)
 			ia->ia_flags |= IFA_ROUTE;
 
 		err = ifa_add_loopback_route((struct ifaddr *)ia, sa);
 
 		ifa_free(&ia->ia_ifa);
 		break;
 	}
 }
 
 static int
 rip_attach(struct socket *so, int proto, struct thread *td)
 {
 	struct inpcb *inp;
 	int error;
 
 	inp = sotoinpcb(so);
 	KASSERT(inp == NULL, ("rip_attach: inp != NULL"));
 
 	error = priv_check(td, PRIV_NETINET_RAW);
 	if (error)
 		return (error);
 	if (proto >= IPPROTO_MAX || proto < 0)
 		return EPROTONOSUPPORT;
 	error = soreserve(so, rip_sendspace, rip_recvspace);
 	if (error)
 		return (error);
 	INP_INFO_WLOCK(&V_ripcbinfo);
 	error = in_pcballoc(so, &V_ripcbinfo);
 	if (error) {
 		INP_INFO_WUNLOCK(&V_ripcbinfo);
 		return (error);
 	}
 	inp = (struct inpcb *)so->so_pcb;
 	inp->inp_vflag |= INP_IPV4;
 	inp->inp_ip_p = proto;
 	inp->inp_ip_ttl = V_ip_defttl;
 	rip_inshash(inp);
 	INP_INFO_WUNLOCK(&V_ripcbinfo);
 	INP_WUNLOCK(inp);
 	return (0);
 }
 
 static void
 rip_detach(struct socket *so)
 {
 	struct inpcb *inp;
 
 	inp = sotoinpcb(so);
 	KASSERT(inp != NULL, ("rip_detach: inp == NULL"));
 	KASSERT(inp->inp_faddr.s_addr == INADDR_ANY, 
 	    ("rip_detach: not closed"));
 
 	INP_INFO_WLOCK(&V_ripcbinfo);
 	INP_WLOCK(inp);
 	rip_delhash(inp);
 	if (so == V_ip_mrouter && ip_mrouter_done)
 		ip_mrouter_done();
 	if (ip_rsvp_force_done)
 		ip_rsvp_force_done(so);
 	if (so == V_ip_rsvpd)
 		ip_rsvp_done();
 	in_pcbdetach(inp);
 	in_pcbfree(inp);
 	INP_INFO_WUNLOCK(&V_ripcbinfo);
 }
 
 static void
 rip_dodisconnect(struct socket *so, struct inpcb *inp)
 {
 	struct inpcbinfo *pcbinfo;
 
 	pcbinfo = inp->inp_pcbinfo;
 	INP_INFO_WLOCK(pcbinfo);
 	INP_WLOCK(inp);
 	rip_delhash(inp);
 	inp->inp_faddr.s_addr = INADDR_ANY;
 	rip_inshash(inp);
 	SOCK_LOCK(so);
 	so->so_state &= ~SS_ISCONNECTED;
 	SOCK_UNLOCK(so);
 	INP_WUNLOCK(inp);
 	INP_INFO_WUNLOCK(pcbinfo);
 }
 
 static void
 rip_abort(struct socket *so)
 {
 	struct inpcb *inp;
 
 	inp = sotoinpcb(so);
 	KASSERT(inp != NULL, ("rip_abort: inp == NULL"));
 
 	rip_dodisconnect(so, inp);
 }
 
 static void
 rip_close(struct socket *so)
 {
 	struct inpcb *inp;
 
 	inp = sotoinpcb(so);
 	KASSERT(inp != NULL, ("rip_close: inp == NULL"));
 
 	rip_dodisconnect(so, inp);
 }
 
 static int
 rip_disconnect(struct socket *so)
 {
 	struct inpcb *inp;
 
 	if ((so->so_state & SS_ISCONNECTED) == 0)
 		return (ENOTCONN);
 
 	inp = sotoinpcb(so);
 	KASSERT(inp != NULL, ("rip_disconnect: inp == NULL"));
 
 	rip_dodisconnect(so, inp);
 	return (0);
 }
 
 static int
 rip_bind(struct socket *so, struct sockaddr *nam, struct thread *td)
 {
 	struct sockaddr_in *addr = (struct sockaddr_in *)nam;
 	struct inpcb *inp;
 	int error;
 
 	if (nam->sa_len != sizeof(*addr))
 		return (EINVAL);
 
 	error = prison_check_ip4(td->td_ucred, &addr->sin_addr);
 	if (error != 0)
 		return (error);
 
 	inp = sotoinpcb(so);
 	KASSERT(inp != NULL, ("rip_bind: inp == NULL"));
 
 	if (TAILQ_EMPTY(&V_ifnet) ||
 	    (addr->sin_family != AF_INET && addr->sin_family != AF_IMPLINK) ||
 	    (addr->sin_addr.s_addr &&
 	     (inp->inp_flags & INP_BINDANY) == 0 &&
 	     ifa_ifwithaddr_check((struct sockaddr *)addr) == 0))
 		return (EADDRNOTAVAIL);
 
 	INP_INFO_WLOCK(&V_ripcbinfo);
 	INP_WLOCK(inp);
 	rip_delhash(inp);
 	inp->inp_laddr = addr->sin_addr;
 	rip_inshash(inp);
 	INP_WUNLOCK(inp);
 	INP_INFO_WUNLOCK(&V_ripcbinfo);
 	return (0);
 }
 
 static int
 rip_connect(struct socket *so, struct sockaddr *nam, struct thread *td)
 {
 	struct sockaddr_in *addr = (struct sockaddr_in *)nam;
 	struct inpcb *inp;
 
 	if (nam->sa_len != sizeof(*addr))
 		return (EINVAL);
 	if (TAILQ_EMPTY(&V_ifnet))
 		return (EADDRNOTAVAIL);
 	if (addr->sin_family != AF_INET && addr->sin_family != AF_IMPLINK)
 		return (EAFNOSUPPORT);
 
 	inp = sotoinpcb(so);
 	KASSERT(inp != NULL, ("rip_connect: inp == NULL"));
 
 	INP_INFO_WLOCK(&V_ripcbinfo);
 	INP_WLOCK(inp);
 	rip_delhash(inp);
 	inp->inp_faddr = addr->sin_addr;
 	rip_inshash(inp);
 	soisconnected(so);
 	INP_WUNLOCK(inp);
 	INP_INFO_WUNLOCK(&V_ripcbinfo);
 	return (0);
 }
 
 static int
 rip_shutdown(struct socket *so)
 {
 	struct inpcb *inp;
 
 	inp = sotoinpcb(so);
 	KASSERT(inp != NULL, ("rip_shutdown: inp == NULL"));
 
 	INP_WLOCK(inp);
 	socantsendmore(so);
 	INP_WUNLOCK(inp);
 	return (0);
 }
 
 static int
 rip_send(struct socket *so, int flags, struct mbuf *m, struct sockaddr *nam,
     struct mbuf *control, struct thread *td)
 {
 	struct inpcb *inp;
 	u_long dst;
 
 	inp = sotoinpcb(so);
 	KASSERT(inp != NULL, ("rip_send: inp == NULL"));
 
 	/*
 	 * Note: 'dst' reads below are unlocked.
 	 */
 	if (so->so_state & SS_ISCONNECTED) {
 		if (nam) {
 			m_freem(m);
 			return (EISCONN);
 		}
 		dst = inp->inp_faddr.s_addr;	/* Unlocked read. */
 	} else {
 		if (nam == NULL) {
 			m_freem(m);
 			return (ENOTCONN);
 		}
 		dst = ((struct sockaddr_in *)nam)->sin_addr.s_addr;
 	}
 	return (rip_output(m, so, dst));
 }
 #endif /* INET */
 
 static int
 rip_pcblist(SYSCTL_HANDLER_ARGS)
 {
 	int error, i, n;
 	struct inpcb *inp, **inp_list;
 	inp_gen_t gencnt;
 	struct xinpgen xig;
 
 	/*
 	 * The process of preparing the TCB list is too time-consuming and
 	 * resource-intensive to repeat twice on every request.
 	 */
 	if (req->oldptr == 0) {
 		n = V_ripcbinfo.ipi_count;
 		n += imax(n / 8, 10);
 		req->oldidx = 2 * (sizeof xig) + n * sizeof(struct xinpcb);
 		return (0);
 	}
 
 	if (req->newptr != 0)
 		return (EPERM);
 
 	/*
 	 * OK, now we're committed to doing something.
 	 */
 	INP_INFO_RLOCK(&V_ripcbinfo);
 	gencnt = V_ripcbinfo.ipi_gencnt;
 	n = V_ripcbinfo.ipi_count;
 	INP_INFO_RUNLOCK(&V_ripcbinfo);
 
 	xig.xig_len = sizeof xig;
 	xig.xig_count = n;
 	xig.xig_gen = gencnt;
 	xig.xig_sogen = so_gencnt;
 	error = SYSCTL_OUT(req, &xig, sizeof xig);
 	if (error)
 		return (error);
 
 	inp_list = malloc(n * sizeof *inp_list, M_TEMP, M_WAITOK);
 	if (inp_list == 0)
 		return (ENOMEM);
 
 	INP_INFO_RLOCK(&V_ripcbinfo);
 	for (inp = LIST_FIRST(V_ripcbinfo.ipi_listhead), i = 0; inp && i < n;
 	     inp = LIST_NEXT(inp, inp_list)) {
 		INP_WLOCK(inp);
 		if (inp->inp_gencnt <= gencnt &&
 		    cr_canseeinpcb(req->td->td_ucred, inp) == 0) {
 			in_pcbref(inp);
 			inp_list[i++] = inp;
 		}
 		INP_WUNLOCK(inp);
 	}
 	INP_INFO_RUNLOCK(&V_ripcbinfo);
 	n = i;
 
 	error = 0;
 	for (i = 0; i < n; i++) {
 		inp = inp_list[i];
 		INP_RLOCK(inp);
 		if (inp->inp_gencnt <= gencnt) {
 			struct xinpcb xi;
 
 			bzero(&xi, sizeof(xi));
 			xi.xi_len = sizeof xi;
 			/* XXX should avoid extra copy */
 			bcopy(inp, &xi.xi_inp, sizeof *inp);
 			if (inp->inp_socket)
 				sotoxsocket(inp->inp_socket, &xi.xi_socket);
 			INP_RUNLOCK(inp);
 			error = SYSCTL_OUT(req, &xi, sizeof xi);
 		} else
 			INP_RUNLOCK(inp);
 	}
 	INP_INFO_WLOCK(&V_ripcbinfo);
 	for (i = 0; i < n; i++) {
 		inp = inp_list[i];
 		INP_RLOCK(inp);
 		if (!in_pcbrele_rlocked(inp))
 			INP_RUNLOCK(inp);
 	}
 	INP_INFO_WUNLOCK(&V_ripcbinfo);
 
 	if (!error) {
 		/*
 		 * Give the user an updated idea of our state.  If the
 		 * generation differs from what we told her before, she knows
 		 * that something happened while we were processing this
 		 * request, and it might be necessary to retry.
 		 */
 		INP_INFO_RLOCK(&V_ripcbinfo);
 		xig.xig_gen = V_ripcbinfo.ipi_gencnt;
 		xig.xig_sogen = so_gencnt;
 		xig.xig_count = V_ripcbinfo.ipi_count;
 		INP_INFO_RUNLOCK(&V_ripcbinfo);
 		error = SYSCTL_OUT(req, &xig, sizeof xig);
 	}
 	free(inp_list, M_TEMP);
 	return (error);
 }
 
 SYSCTL_PROC(_net_inet_raw, OID_AUTO/*XXX*/, pcblist,
     CTLTYPE_OPAQUE | CTLFLAG_RD, NULL, 0,
     rip_pcblist, "S,xinpcb", "List of active raw IP sockets");
 
 #ifdef INET
 struct pr_usrreqs rip_usrreqs = {
 	.pru_abort =		rip_abort,
 	.pru_attach =		rip_attach,
 	.pru_bind =		rip_bind,
 	.pru_connect =		rip_connect,
 	.pru_control =		in_control,
 	.pru_detach =		rip_detach,
 	.pru_disconnect =	rip_disconnect,
 	.pru_peeraddr =		in_getpeeraddr,
 	.pru_send =		rip_send,
 	.pru_shutdown =		rip_shutdown,
 	.pru_sockaddr =		in_getsockaddr,
 	.pru_sosetlabel =	in_pcbsosetlabel,
 	.pru_close =		rip_close,
 };
 #endif /* INET */
Index: head/sys/netinet/sctp_output.c
===================================================================
--- head/sys/netinet/sctp_output.c	(revision 280970)
+++ head/sys/netinet/sctp_output.c	(revision 280971)
@@ -1,13473 +1,13473 @@
 /*-
  * Copyright (c) 2001-2008, by Cisco Systems, Inc. All rights reserved.
  * Copyright (c) 2008-2012, by Randall Stewart. All rights reserved.
  * Copyright (c) 2008-2012, by Michael Tuexen. All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions are met:
  *
  * a) Redistributions of source code must retain the above copyright notice,
  *    this list of conditions and the following disclaimer.
  *
  * b) Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in
  *    the documentation and/or other materials provided with the distribution.
  *
  * c) Neither the name of Cisco Systems, Inc. nor the names of its
  *    contributors may be used to endorse or promote products derived
  *    from this software without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
  * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
  * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
  * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
  * THE POSSIBILITY OF SUCH DAMAGE.
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include <netinet/sctp_os.h>
 #include <sys/proc.h>
 #include <netinet/sctp_var.h>
 #include <netinet/sctp_sysctl.h>
 #include <netinet/sctp_header.h>
 #include <netinet/sctp_pcb.h>
 #include <netinet/sctputil.h>
 #include <netinet/sctp_output.h>
 #include <netinet/sctp_uio.h>
 #include <netinet/sctputil.h>
 #include <netinet/sctp_auth.h>
 #include <netinet/sctp_timer.h>
 #include <netinet/sctp_asconf.h>
 #include <netinet/sctp_indata.h>
 #include <netinet/sctp_bsd_addr.h>
 #include <netinet/sctp_input.h>
 #include <netinet/sctp_crc32.h>
 #if defined(INET) || defined(INET6)
 #include <netinet/udp.h>
 #endif
 #include <netinet/udp_var.h>
 #include <machine/in_cksum.h>
 
 
 
 #define SCTP_MAX_GAPS_INARRAY 4
 struct sack_track {
 	uint8_t right_edge;	/* mergable on the right edge */
 	uint8_t left_edge;	/* mergable on the left edge */
 	uint8_t num_entries;
 	uint8_t spare;
 	struct sctp_gap_ack_block gaps[SCTP_MAX_GAPS_INARRAY];
 };
 
 struct sack_track sack_array[256] = {
 	{0, 0, 0, 0,		/* 0x00 */
 		{{0, 0},
 		{0, 0},
 		{0, 0},
 		{0, 0}
 		}
 	},
 	{1, 0, 1, 0,		/* 0x01 */
 		{{0, 0},
 		{0, 0},
 		{0, 0},
 		{0, 0}
 		}
 	},
 	{0, 0, 1, 0,		/* 0x02 */
 		{{1, 1},
 		{0, 0},
 		{0, 0},
 		{0, 0}
 		}
 	},
 	{1, 0, 1, 0,		/* 0x03 */
 		{{0, 1},
 		{0, 0},
 		{0, 0},
 		{0, 0}
 		}
 	},
 	{0, 0, 1, 0,		/* 0x04 */
 		{{2, 2},
 		{0, 0},
 		{0, 0},
 		{0, 0}
 		}
 	},
 	{1, 0, 2, 0,		/* 0x05 */
 		{{0, 0},
 		{2, 2},
 		{0, 0},
 		{0, 0}
 		}
 	},
 	{0, 0, 1, 0,		/* 0x06 */
 		{{1, 2},
 		{0, 0},
 		{0, 0},
 		{0, 0}
 		}
 	},
 	{1, 0, 1, 0,		/* 0x07 */
 		{{0, 2},
 		{0, 0},
 		{0, 0},
 		{0, 0}
 		}
 	},
 	{0, 0, 1, 0,		/* 0x08 */
 		{{3, 3},
 		{0, 0},
 		{0, 0},
 		{0, 0}
 		}
 	},
 	{1, 0, 2, 0,		/* 0x09 */
 		{{0, 0},
 		{3, 3},
 		{0, 0},
 		{0, 0}
 		}
 	},
 	{0, 0, 2, 0,		/* 0x0a */
 		{{1, 1},
 		{3, 3},
 		{0, 0},
 		{0, 0}
 		}
 	},
 	{1, 0, 2, 0,		/* 0x0b */
 		{{0, 1},
 		{3, 3},
 		{0, 0},
 		{0, 0}
 		}
 	},
 	{0, 0, 1, 0,		/* 0x0c */
 		{{2, 3},
 		{0, 0},
 		{0, 0},
 		{0, 0}
 		}
 	},
 	{1, 0, 2, 0,		/* 0x0d */
 		{{0, 0},
 		{2, 3},
 		{0, 0},
 		{0, 0}
 		}
 	},
 	{0, 0, 1, 0,		/* 0x0e */
 		{{1, 3},
 		{0, 0},
 		{0, 0},
 		{0, 0}
 		}
 	},
 	{1, 0, 1, 0,		/* 0x0f */
 		{{0, 3},
 		{0, 0},
 		{0, 0},
 		{0, 0}
 		}
 	},
 	{0, 0, 1, 0,		/* 0x10 */
 		{{4, 4},
 		{0, 0},
 		{0, 0},
 		{0, 0}
 		}
 	},
 	{1, 0, 2, 0,		/* 0x11 */
 		{{0, 0},
 		{4, 4},
 		{0, 0},
 		{0, 0}
 		}
 	},
 	{0, 0, 2, 0,		/* 0x12 */
 		{{1, 1},
 		{4, 4},
 		{0, 0},
 		{0, 0}
 		}
 	},
 	{1, 0, 2, 0,		/* 0x13 */
 		{{0, 1},
 		{4, 4},
 		{0, 0},
 		{0, 0}
 		}
 	},
 	{0, 0, 2, 0,		/* 0x14 */
 		{{2, 2},
 		{4, 4},
 		{0, 0},
 		{0, 0}
 		}
 	},
 	{1, 0, 3, 0,		/* 0x15 */
 		{{0, 0},
 		{2, 2},
 		{4, 4},
 		{0, 0}
 		}
 	},
 	{0, 0, 2, 0,		/* 0x16 */
 		{{1, 2},
 		{4, 4},
 		{0, 0},
 		{0, 0}
 		}
 	},
 	{1, 0, 2, 0,		/* 0x17 */
 		{{0, 2},
 		{4, 4},
 		{0, 0},
 		{0, 0}
 		}
 	},
 	{0, 0, 1, 0,		/* 0x18 */
 		{{3, 4},
 		{0, 0},
 		{0, 0},
 		{0, 0}
 		}
 	},
 	{1, 0, 2, 0,		/* 0x19 */
 		{{0, 0},
 		{3, 4},
 		{0, 0},
 		{0, 0}
 		}
 	},
 	{0, 0, 2, 0,		/* 0x1a */
 		{{1, 1},
 		{3, 4},
 		{0, 0},
 		{0, 0}
 		}
 	},
 	{1, 0, 2, 0,		/* 0x1b */
 		{{0, 1},
 		{3, 4},
 		{0, 0},
 		{0, 0}
 		}
 	},
 	{0, 0, 1, 0,		/* 0x1c */
 		{{2, 4},
 		{0, 0},
 		{0, 0},
 		{0, 0}
 		}
 	},
 	{1, 0, 2, 0,		/* 0x1d */
 		{{0, 0},
 		{2, 4},
 		{0, 0},
 		{0, 0}
 		}
 	},
 	{0, 0, 1, 0,		/* 0x1e */
 		{{1, 4},
 		{0, 0},
 		{0, 0},
 		{0, 0}
 		}
 	},
 	{1, 0, 1, 0,		/* 0x1f */
 		{{0, 4},
 		{0, 0},
 		{0, 0},
 		{0, 0}
 		}
 	},
 	{0, 0, 1, 0,		/* 0x20 */
 		{{5, 5},
 		{0, 0},
 		{0, 0},
 		{0, 0}
 		}
 	},
 	{1, 0, 2, 0,		/* 0x21 */
 		{{0, 0},
 		{5, 5},
 		{0, 0},
 		{0, 0}
 		}
 	},
 	{0, 0, 2, 0,		/* 0x22 */
 		{{1, 1},
 		{5, 5},
 		{0, 0},
 		{0, 0}
 		}
 	},
 	{1, 0, 2, 0,		/* 0x23 */
 		{{0, 1},
 		{5, 5},
 		{0, 0},
 		{0, 0}
 		}
 	},
 	{0, 0, 2, 0,		/* 0x24 */
 		{{2, 2},
 		{5, 5},
 		{0, 0},
 		{0, 0}
 		}
 	},
 	{1, 0, 3, 0,		/* 0x25 */
 		{{0, 0},
 		{2, 2},
 		{5, 5},
 		{0, 0}
 		}
 	},
 	{0, 0, 2, 0,		/* 0x26 */
 		{{1, 2},
 		{5, 5},
 		{0, 0},
 		{0, 0}
 		}
 	},
 	{1, 0, 2, 0,		/* 0x27 */
 		{{0, 2},
 		{5, 5},
 		{0, 0},
 		{0, 0}
 		}
 	},
 	{0, 0, 2, 0,		/* 0x28 */
 		{{3, 3},
 		{5, 5},
 		{0, 0},
 		{0, 0}
 		}
 	},
 	{1, 0, 3, 0,		/* 0x29 */
 		{{0, 0},
 		{3, 3},
 		{5, 5},
 		{0, 0}
 		}
 	},
 	{0, 0, 3, 0,		/* 0x2a */
 		{{1, 1},
 		{3, 3},
 		{5, 5},
 		{0, 0}
 		}
 	},
 	{1, 0, 3, 0,		/* 0x2b */
 		{{0, 1},
 		{3, 3},
 		{5, 5},
 		{0, 0}
 		}
 	},
 	{0, 0, 2, 0,		/* 0x2c */
 		{{2, 3},
 		{5, 5},
 		{0, 0},
 		{0, 0}
 		}
 	},
 	{1, 0, 3, 0,		/* 0x2d */
 		{{0, 0},
 		{2, 3},
 		{5, 5},
 		{0, 0}
 		}
 	},
 	{0, 0, 2, 0,		/* 0x2e */
 		{{1, 3},
 		{5, 5},
 		{0, 0},
 		{0, 0}
 		}
 	},
 	{1, 0, 2, 0,		/* 0x2f */
 		{{0, 3},
 		{5, 5},
 		{0, 0},
 		{0, 0}
 		}
 	},
 	{0, 0, 1, 0,		/* 0x30 */
 		{{4, 5},
 		{0, 0},
 		{0, 0},
 		{0, 0}
 		}
 	},
 	{1, 0, 2, 0,		/* 0x31 */
 		{{0, 0},
 		{4, 5},
 		{0, 0},
 		{0, 0}
 		}
 	},
 	{0, 0, 2, 0,		/* 0x32 */
 		{{1, 1},
 		{4, 5},
 		{0, 0},
 		{0, 0}
 		}
 	},
 	{1, 0, 2, 0,		/* 0x33 */
 		{{0, 1},
 		{4, 5},
 		{0, 0},
 		{0, 0}
 		}
 	},
 	{0, 0, 2, 0,		/* 0x34 */
 		{{2, 2},
 		{4, 5},
 		{0, 0},
 		{0, 0}
 		}
 	},
 	{1, 0, 3, 0,		/* 0x35 */
 		{{0, 0},
 		{2, 2},
 		{4, 5},
 		{0, 0}
 		}
 	},
 	{0, 0, 2, 0,		/* 0x36 */
 		{{1, 2},
 		{4, 5},
 		{0, 0},
 		{0, 0}
 		}
 	},
 	{1, 0, 2, 0,		/* 0x37 */
 		{{0, 2},
 		{4, 5},
 		{0, 0},
 		{0, 0}
 		}
 	},
 	{0, 0, 1, 0,		/* 0x38 */
 		{{3, 5},
 		{0, 0},
 		{0, 0},
 		{0, 0}
 		}
 	},
 	{1, 0, 2, 0,		/* 0x39 */
 		{{0, 0},
 		{3, 5},
 		{0, 0},
 		{0, 0}
 		}
 	},
 	{0, 0, 2, 0,		/* 0x3a */
 		{{1, 1},
 		{3, 5},
 		{0, 0},
 		{0, 0}
 		}
 	},
 	{1, 0, 2, 0,		/* 0x3b */
 		{{0, 1},
 		{3, 5},
 		{0, 0},
 		{0, 0}
 		}
 	},
 	{0, 0, 1, 0,		/* 0x3c */
 		{{2, 5},
 		{0, 0},
 		{0, 0},
 		{0, 0}
 		}
 	},
 	{1, 0, 2, 0,		/* 0x3d */
 		{{0, 0},
 		{2, 5},
 		{0, 0},
 		{0, 0}
 		}
 	},
 	{0, 0, 1, 0,		/* 0x3e */
 		{{1, 5},
 		{0, 0},
 		{0, 0},
 		{0, 0}
 		}
 	},
 	{1, 0, 1, 0,		/* 0x3f */
 		{{0, 5},
 		{0, 0},
 		{0, 0},
 		{0, 0}
 		}
 	},
 	{0, 0, 1, 0,		/* 0x40 */
 		{{6, 6},
 		{0, 0},
 		{0, 0},
 		{0, 0}
 		}
 	},
 	{1, 0, 2, 0,		/* 0x41 */
 		{{0, 0},
 		{6, 6},
 		{0, 0},
 		{0, 0}
 		}
 	},
 	{0, 0, 2, 0,		/* 0x42 */
 		{{1, 1},
 		{6, 6},
 		{0, 0},
 		{0, 0}
 		}
 	},
 	{1, 0, 2, 0,		/* 0x43 */
 		{{0, 1},
 		{6, 6},
 		{0, 0},
 		{0, 0}
 		}
 	},
 	{0, 0, 2, 0,		/* 0x44 */
 		{{2, 2},
 		{6, 6},
 		{0, 0},
 		{0, 0}
 		}
 	},
 	{1, 0, 3, 0,		/* 0x45 */
 		{{0, 0},
 		{2, 2},
 		{6, 6},
 		{0, 0}
 		}
 	},
 	{0, 0, 2, 0,		/* 0x46 */
 		{{1, 2},
 		{6, 6},
 		{0, 0},
 		{0, 0}
 		}
 	},
 	{1, 0, 2, 0,		/* 0x47 */
 		{{0, 2},
 		{6, 6},
 		{0, 0},
 		{0, 0}
 		}
 	},
 	{0, 0, 2, 0,		/* 0x48 */
 		{{3, 3},
 		{6, 6},
 		{0, 0},
 		{0, 0}
 		}
 	},
 	{1, 0, 3, 0,		/* 0x49 */
 		{{0, 0},
 		{3, 3},
 		{6, 6},
 		{0, 0}
 		}
 	},
 	{0, 0, 3, 0,		/* 0x4a */
 		{{1, 1},
 		{3, 3},
 		{6, 6},
 		{0, 0}
 		}
 	},
 	{1, 0, 3, 0,		/* 0x4b */
 		{{0, 1},
 		{3, 3},
 		{6, 6},
 		{0, 0}
 		}
 	},
 	{0, 0, 2, 0,		/* 0x4c */
 		{{2, 3},
 		{6, 6},
 		{0, 0},
 		{0, 0}
 		}
 	},
 	{1, 0, 3, 0,		/* 0x4d */
 		{{0, 0},
 		{2, 3},
 		{6, 6},
 		{0, 0}
 		}
 	},
 	{0, 0, 2, 0,		/* 0x4e */
 		{{1, 3},
 		{6, 6},
 		{0, 0},
 		{0, 0}
 		}
 	},
 	{1, 0, 2, 0,		/* 0x4f */
 		{{0, 3},
 		{6, 6},
 		{0, 0},
 		{0, 0}
 		}
 	},
 	{0, 0, 2, 0,		/* 0x50 */
 		{{4, 4},
 		{6, 6},
 		{0, 0},
 		{0, 0}
 		}
 	},
 	{1, 0, 3, 0,		/* 0x51 */
 		{{0, 0},
 		{4, 4},
 		{6, 6},
 		{0, 0}
 		}
 	},
 	{0, 0, 3, 0,		/* 0x52 */
 		{{1, 1},
 		{4, 4},
 		{6, 6},
 		{0, 0}
 		}
 	},
 	{1, 0, 3, 0,		/* 0x53 */
 		{{0, 1},
 		{4, 4},
 		{6, 6},
 		{0, 0}
 		}
 	},
 	{0, 0, 3, 0,		/* 0x54 */
 		{{2, 2},
 		{4, 4},
 		{6, 6},
 		{0, 0}
 		}
 	},
 	{1, 0, 4, 0,		/* 0x55 */
 		{{0, 0},
 		{2, 2},
 		{4, 4},
 		{6, 6}
 		}
 	},
 	{0, 0, 3, 0,		/* 0x56 */
 		{{1, 2},
 		{4, 4},
 		{6, 6},
 		{0, 0}
 		}
 	},
 	{1, 0, 3, 0,		/* 0x57 */
 		{{0, 2},
 		{4, 4},
 		{6, 6},
 		{0, 0}
 		}
 	},
 	{0, 0, 2, 0,		/* 0x58 */
 		{{3, 4},
 		{6, 6},
 		{0, 0},
 		{0, 0}
 		}
 	},
 	{1, 0, 3, 0,		/* 0x59 */
 		{{0, 0},
 		{3, 4},
 		{6, 6},
 		{0, 0}
 		}
 	},
 	{0, 0, 3, 0,		/* 0x5a */
 		{{1, 1},
 		{3, 4},
 		{6, 6},
 		{0, 0}
 		}
 	},
 	{1, 0, 3, 0,		/* 0x5b */
 		{{0, 1},
 		{3, 4},
 		{6, 6},
 		{0, 0}
 		}
 	},
 	{0, 0, 2, 0,		/* 0x5c */
 		{{2, 4},
 		{6, 6},
 		{0, 0},
 		{0, 0}
 		}
 	},
 	{1, 0, 3, 0,		/* 0x5d */
 		{{0, 0},
 		{2, 4},
 		{6, 6},
 		{0, 0}
 		}
 	},
 	{0, 0, 2, 0,		/* 0x5e */
 		{{1, 4},
 		{6, 6},
 		{0, 0},
 		{0, 0}
 		}
 	},
 	{1, 0, 2, 0,		/* 0x5f */
 		{{0, 4},
 		{6, 6},
 		{0, 0},
 		{0, 0}
 		}
 	},
 	{0, 0, 1, 0,		/* 0x60 */
 		{{5, 6},
 		{0, 0},
 		{0, 0},
 		{0, 0}
 		}
 	},
 	{1, 0, 2, 0,		/* 0x61 */
 		{{0, 0},
 		{5, 6},
 		{0, 0},
 		{0, 0}
 		}
 	},
 	{0, 0, 2, 0,		/* 0x62 */
 		{{1, 1},
 		{5, 6},
 		{0, 0},
 		{0, 0}
 		}
 	},
 	{1, 0, 2, 0,		/* 0x63 */
 		{{0, 1},
 		{5, 6},
 		{0, 0},
 		{0, 0}
 		}
 	},
 	{0, 0, 2, 0,		/* 0x64 */
 		{{2, 2},
 		{5, 6},
 		{0, 0},
 		{0, 0}
 		}
 	},
 	{1, 0, 3, 0,		/* 0x65 */
 		{{0, 0},
 		{2, 2},
 		{5, 6},
 		{0, 0}
 		}
 	},
 	{0, 0, 2, 0,		/* 0x66 */
 		{{1, 2},
 		{5, 6},
 		{0, 0},
 		{0, 0}
 		}
 	},
 	{1, 0, 2, 0,		/* 0x67 */
 		{{0, 2},
 		{5, 6},
 		{0, 0},
 		{0, 0}
 		}
 	},
 	{0, 0, 2, 0,		/* 0x68 */
 		{{3, 3},
 		{5, 6},
 		{0, 0},
 		{0, 0}
 		}
 	},
 	{1, 0, 3, 0,		/* 0x69 */
 		{{0, 0},
 		{3, 3},
 		{5, 6},
 		{0, 0}
 		}
 	},
 	{0, 0, 3, 0,		/* 0x6a */
 		{{1, 1},
 		{3, 3},
 		{5, 6},
 		{0, 0}
 		}
 	},
 	{1, 0, 3, 0,		/* 0x6b */
 		{{0, 1},
 		{3, 3},
 		{5, 6},
 		{0, 0}
 		}
 	},
 	{0, 0, 2, 0,		/* 0x6c */
 		{{2, 3},
 		{5, 6},
 		{0, 0},
 		{0, 0}
 		}
 	},
 	{1, 0, 3, 0,		/* 0x6d */
 		{{0, 0},
 		{2, 3},
 		{5, 6},
 		{0, 0}
 		}
 	},
 	{0, 0, 2, 0,		/* 0x6e */
 		{{1, 3},
 		{5, 6},
 		{0, 0},
 		{0, 0}
 		}
 	},
 	{1, 0, 2, 0,		/* 0x6f */
 		{{0, 3},
 		{5, 6},
 		{0, 0},
 		{0, 0}
 		}
 	},
 	{0, 0, 1, 0,		/* 0x70 */
 		{{4, 6},
 		{0, 0},
 		{0, 0},
 		{0, 0}
 		}
 	},
 	{1, 0, 2, 0,		/* 0x71 */
 		{{0, 0},
 		{4, 6},
 		{0, 0},
 		{0, 0}
 		}
 	},
 	{0, 0, 2, 0,		/* 0x72 */
 		{{1, 1},
 		{4, 6},
 		{0, 0},
 		{0, 0}
 		}
 	},
 	{1, 0, 2, 0,		/* 0x73 */
 		{{0, 1},
 		{4, 6},
 		{0, 0},
 		{0, 0}
 		}
 	},
 	{0, 0, 2, 0,		/* 0x74 */
 		{{2, 2},
 		{4, 6},
 		{0, 0},
 		{0, 0}
 		}
 	},
 	{1, 0, 3, 0,		/* 0x75 */
 		{{0, 0},
 		{2, 2},
 		{4, 6},
 		{0, 0}
 		}
 	},
 	{0, 0, 2, 0,		/* 0x76 */
 		{{1, 2},
 		{4, 6},
 		{0, 0},
 		{0, 0}
 		}
 	},
 	{1, 0, 2, 0,		/* 0x77 */
 		{{0, 2},
 		{4, 6},
 		{0, 0},
 		{0, 0}
 		}
 	},
 	{0, 0, 1, 0,		/* 0x78 */
 		{{3, 6},
 		{0, 0},
 		{0, 0},
 		{0, 0}
 		}
 	},
 	{1, 0, 2, 0,		/* 0x79 */
 		{{0, 0},
 		{3, 6},
 		{0, 0},
 		{0, 0}
 		}
 	},
 	{0, 0, 2, 0,		/* 0x7a */
 		{{1, 1},
 		{3, 6},
 		{0, 0},
 		{0, 0}
 		}
 	},
 	{1, 0, 2, 0,		/* 0x7b */
 		{{0, 1},
 		{3, 6},
 		{0, 0},
 		{0, 0}
 		}
 	},
 	{0, 0, 1, 0,		/* 0x7c */
 		{{2, 6},
 		{0, 0},
 		{0, 0},
 		{0, 0}
 		}
 	},
 	{1, 0, 2, 0,		/* 0x7d */
 		{{0, 0},
 		{2, 6},
 		{0, 0},
 		{0, 0}
 		}
 	},
 	{0, 0, 1, 0,		/* 0x7e */
 		{{1, 6},
 		{0, 0},
 		{0, 0},
 		{0, 0}
 		}
 	},
 	{1, 0, 1, 0,		/* 0x7f */
 		{{0, 6},
 		{0, 0},
 		{0, 0},
 		{0, 0}
 		}
 	},
 	{0, 1, 1, 0,		/* 0x80 */
 		{{7, 7},
 		{0, 0},
 		{0, 0},
 		{0, 0}
 		}
 	},
 	{1, 1, 2, 0,		/* 0x81 */
 		{{0, 0},
 		{7, 7},
 		{0, 0},
 		{0, 0}
 		}
 	},
 	{0, 1, 2, 0,		/* 0x82 */
 		{{1, 1},
 		{7, 7},
 		{0, 0},
 		{0, 0}
 		}
 	},
 	{1, 1, 2, 0,		/* 0x83 */
 		{{0, 1},
 		{7, 7},
 		{0, 0},
 		{0, 0}
 		}
 	},
 	{0, 1, 2, 0,		/* 0x84 */
 		{{2, 2},
 		{7, 7},
 		{0, 0},
 		{0, 0}
 		}
 	},
 	{1, 1, 3, 0,		/* 0x85 */
 		{{0, 0},
 		{2, 2},
 		{7, 7},
 		{0, 0}
 		}
 	},
 	{0, 1, 2, 0,		/* 0x86 */
 		{{1, 2},
 		{7, 7},
 		{0, 0},
 		{0, 0}
 		}
 	},
 	{1, 1, 2, 0,		/* 0x87 */
 		{{0, 2},
 		{7, 7},
 		{0, 0},
 		{0, 0}
 		}
 	},
 	{0, 1, 2, 0,		/* 0x88 */
 		{{3, 3},
 		{7, 7},
 		{0, 0},
 		{0, 0}
 		}
 	},
 	{1, 1, 3, 0,		/* 0x89 */
 		{{0, 0},
 		{3, 3},
 		{7, 7},
 		{0, 0}
 		}
 	},
 	{0, 1, 3, 0,		/* 0x8a */
 		{{1, 1},
 		{3, 3},
 		{7, 7},
 		{0, 0}
 		}
 	},
 	{1, 1, 3, 0,		/* 0x8b */
 		{{0, 1},
 		{3, 3},
 		{7, 7},
 		{0, 0}
 		}
 	},
 	{0, 1, 2, 0,		/* 0x8c */
 		{{2, 3},
 		{7, 7},
 		{0, 0},
 		{0, 0}
 		}
 	},
 	{1, 1, 3, 0,		/* 0x8d */
 		{{0, 0},
 		{2, 3},
 		{7, 7},
 		{0, 0}
 		}
 	},
 	{0, 1, 2, 0,		/* 0x8e */
 		{{1, 3},
 		{7, 7},
 		{0, 0},
 		{0, 0}
 		}
 	},
 	{1, 1, 2, 0,		/* 0x8f */
 		{{0, 3},
 		{7, 7},
 		{0, 0},
 		{0, 0}
 		}
 	},
 	{0, 1, 2, 0,		/* 0x90 */
 		{{4, 4},
 		{7, 7},
 		{0, 0},
 		{0, 0}
 		}
 	},
 	{1, 1, 3, 0,		/* 0x91 */
 		{{0, 0},
 		{4, 4},
 		{7, 7},
 		{0, 0}
 		}
 	},
 	{0, 1, 3, 0,		/* 0x92 */
 		{{1, 1},
 		{4, 4},
 		{7, 7},
 		{0, 0}
 		}
 	},
 	{1, 1, 3, 0,		/* 0x93 */
 		{{0, 1},
 		{4, 4},
 		{7, 7},
 		{0, 0}
 		}
 	},
 	{0, 1, 3, 0,		/* 0x94 */
 		{{2, 2},
 		{4, 4},
 		{7, 7},
 		{0, 0}
 		}
 	},
 	{1, 1, 4, 0,		/* 0x95 */
 		{{0, 0},
 		{2, 2},
 		{4, 4},
 		{7, 7}
 		}
 	},
 	{0, 1, 3, 0,		/* 0x96 */
 		{{1, 2},
 		{4, 4},
 		{7, 7},
 		{0, 0}
 		}
 	},
 	{1, 1, 3, 0,		/* 0x97 */
 		{{0, 2},
 		{4, 4},
 		{7, 7},
 		{0, 0}
 		}
 	},
 	{0, 1, 2, 0,		/* 0x98 */
 		{{3, 4},
 		{7, 7},
 		{0, 0},
 		{0, 0}
 		}
 	},
 	{1, 1, 3, 0,		/* 0x99 */
 		{{0, 0},
 		{3, 4},
 		{7, 7},
 		{0, 0}
 		}
 	},
 	{0, 1, 3, 0,		/* 0x9a */
 		{{1, 1},
 		{3, 4},
 		{7, 7},
 		{0, 0}
 		}
 	},
 	{1, 1, 3, 0,		/* 0x9b */
 		{{0, 1},
 		{3, 4},
 		{7, 7},
 		{0, 0}
 		}
 	},
 	{0, 1, 2, 0,		/* 0x9c */
 		{{2, 4},
 		{7, 7},
 		{0, 0},
 		{0, 0}
 		}
 	},
 	{1, 1, 3, 0,		/* 0x9d */
 		{{0, 0},
 		{2, 4},
 		{7, 7},
 		{0, 0}
 		}
 	},
 	{0, 1, 2, 0,		/* 0x9e */
 		{{1, 4},
 		{7, 7},
 		{0, 0},
 		{0, 0}
 		}
 	},
 	{1, 1, 2, 0,		/* 0x9f */
 		{{0, 4},
 		{7, 7},
 		{0, 0},
 		{0, 0}
 		}
 	},
 	{0, 1, 2, 0,		/* 0xa0 */
 		{{5, 5},
 		{7, 7},
 		{0, 0},
 		{0, 0}
 		}
 	},
 	{1, 1, 3, 0,		/* 0xa1 */
 		{{0, 0},
 		{5, 5},
 		{7, 7},
 		{0, 0}
 		}
 	},
 	{0, 1, 3, 0,		/* 0xa2 */
 		{{1, 1},
 		{5, 5},
 		{7, 7},
 		{0, 0}
 		}
 	},
 	{1, 1, 3, 0,		/* 0xa3 */
 		{{0, 1},
 		{5, 5},
 		{7, 7},
 		{0, 0}
 		}
 	},
 	{0, 1, 3, 0,		/* 0xa4 */
 		{{2, 2},
 		{5, 5},
 		{7, 7},
 		{0, 0}
 		}
 	},
 	{1, 1, 4, 0,		/* 0xa5 */
 		{{0, 0},
 		{2, 2},
 		{5, 5},
 		{7, 7}
 		}
 	},
 	{0, 1, 3, 0,		/* 0xa6 */
 		{{1, 2},
 		{5, 5},
 		{7, 7},
 		{0, 0}
 		}
 	},
 	{1, 1, 3, 0,		/* 0xa7 */
 		{{0, 2},
 		{5, 5},
 		{7, 7},
 		{0, 0}
 		}
 	},
 	{0, 1, 3, 0,		/* 0xa8 */
 		{{3, 3},
 		{5, 5},
 		{7, 7},
 		{0, 0}
 		}
 	},
 	{1, 1, 4, 0,		/* 0xa9 */
 		{{0, 0},
 		{3, 3},
 		{5, 5},
 		{7, 7}
 		}
 	},
 	{0, 1, 4, 0,		/* 0xaa */
 		{{1, 1},
 		{3, 3},
 		{5, 5},
 		{7, 7}
 		}
 	},
 	{1, 1, 4, 0,		/* 0xab */
 		{{0, 1},
 		{3, 3},
 		{5, 5},
 		{7, 7}
 		}
 	},
 	{0, 1, 3, 0,		/* 0xac */
 		{{2, 3},
 		{5, 5},
 		{7, 7},
 		{0, 0}
 		}
 	},
 	{1, 1, 4, 0,		/* 0xad */
 		{{0, 0},
 		{2, 3},
 		{5, 5},
 		{7, 7}
 		}
 	},
 	{0, 1, 3, 0,		/* 0xae */
 		{{1, 3},
 		{5, 5},
 		{7, 7},
 		{0, 0}
 		}
 	},
 	{1, 1, 3, 0,		/* 0xaf */
 		{{0, 3},
 		{5, 5},
 		{7, 7},
 		{0, 0}
 		}
 	},
 	{0, 1, 2, 0,		/* 0xb0 */
 		{{4, 5},
 		{7, 7},
 		{0, 0},
 		{0, 0}
 		}
 	},
 	{1, 1, 3, 0,		/* 0xb1 */
 		{{0, 0},
 		{4, 5},
 		{7, 7},
 		{0, 0}
 		}
 	},
 	{0, 1, 3, 0,		/* 0xb2 */
 		{{1, 1},
 		{4, 5},
 		{7, 7},
 		{0, 0}
 		}
 	},
 	{1, 1, 3, 0,		/* 0xb3 */
 		{{0, 1},
 		{4, 5},
 		{7, 7},
 		{0, 0}
 		}
 	},
 	{0, 1, 3, 0,		/* 0xb4 */
 		{{2, 2},
 		{4, 5},
 		{7, 7},
 		{0, 0}
 		}
 	},
 	{1, 1, 4, 0,		/* 0xb5 */
 		{{0, 0},
 		{2, 2},
 		{4, 5},
 		{7, 7}
 		}
 	},
 	{0, 1, 3, 0,		/* 0xb6 */
 		{{1, 2},
 		{4, 5},
 		{7, 7},
 		{0, 0}
 		}
 	},
 	{1, 1, 3, 0,		/* 0xb7 */
 		{{0, 2},
 		{4, 5},
 		{7, 7},
 		{0, 0}
 		}
 	},
 	{0, 1, 2, 0,		/* 0xb8 */
 		{{3, 5},
 		{7, 7},
 		{0, 0},
 		{0, 0}
 		}
 	},
 	{1, 1, 3, 0,		/* 0xb9 */
 		{{0, 0},
 		{3, 5},
 		{7, 7},
 		{0, 0}
 		}
 	},
 	{0, 1, 3, 0,		/* 0xba */
 		{{1, 1},
 		{3, 5},
 		{7, 7},
 		{0, 0}
 		}
 	},
 	{1, 1, 3, 0,		/* 0xbb */
 		{{0, 1},
 		{3, 5},
 		{7, 7},
 		{0, 0}
 		}
 	},
 	{0, 1, 2, 0,		/* 0xbc */
 		{{2, 5},
 		{7, 7},
 		{0, 0},
 		{0, 0}
 		}
 	},
 	{1, 1, 3, 0,		/* 0xbd */
 		{{0, 0},
 		{2, 5},
 		{7, 7},
 		{0, 0}
 		}
 	},
 	{0, 1, 2, 0,		/* 0xbe */
 		{{1, 5},
 		{7, 7},
 		{0, 0},
 		{0, 0}
 		}
 	},
 	{1, 1, 2, 0,		/* 0xbf */
 		{{0, 5},
 		{7, 7},
 		{0, 0},
 		{0, 0}
 		}
 	},
 	{0, 1, 1, 0,		/* 0xc0 */
 		{{6, 7},
 		{0, 0},
 		{0, 0},
 		{0, 0}
 		}
 	},
 	{1, 1, 2, 0,		/* 0xc1 */
 		{{0, 0},
 		{6, 7},
 		{0, 0},
 		{0, 0}
 		}
 	},
 	{0, 1, 2, 0,		/* 0xc2 */
 		{{1, 1},
 		{6, 7},
 		{0, 0},
 		{0, 0}
 		}
 	},
 	{1, 1, 2, 0,		/* 0xc3 */
 		{{0, 1},
 		{6, 7},
 		{0, 0},
 		{0, 0}
 		}
 	},
 	{0, 1, 2, 0,		/* 0xc4 */
 		{{2, 2},
 		{6, 7},
 		{0, 0},
 		{0, 0}
 		}
 	},
 	{1, 1, 3, 0,		/* 0xc5 */
 		{{0, 0},
 		{2, 2},
 		{6, 7},
 		{0, 0}
 		}
 	},
 	{0, 1, 2, 0,		/* 0xc6 */
 		{{1, 2},
 		{6, 7},
 		{0, 0},
 		{0, 0}
 		}
 	},
 	{1, 1, 2, 0,		/* 0xc7 */
 		{{0, 2},
 		{6, 7},
 		{0, 0},
 		{0, 0}
 		}
 	},
 	{0, 1, 2, 0,		/* 0xc8 */
 		{{3, 3},
 		{6, 7},
 		{0, 0},
 		{0, 0}
 		}
 	},
 	{1, 1, 3, 0,		/* 0xc9 */
 		{{0, 0},
 		{3, 3},
 		{6, 7},
 		{0, 0}
 		}
 	},
 	{0, 1, 3, 0,		/* 0xca */
 		{{1, 1},
 		{3, 3},
 		{6, 7},
 		{0, 0}
 		}
 	},
 	{1, 1, 3, 0,		/* 0xcb */
 		{{0, 1},
 		{3, 3},
 		{6, 7},
 		{0, 0}
 		}
 	},
 	{0, 1, 2, 0,		/* 0xcc */
 		{{2, 3},
 		{6, 7},
 		{0, 0},
 		{0, 0}
 		}
 	},
 	{1, 1, 3, 0,		/* 0xcd */
 		{{0, 0},
 		{2, 3},
 		{6, 7},
 		{0, 0}
 		}
 	},
 	{0, 1, 2, 0,		/* 0xce */
 		{{1, 3},
 		{6, 7},
 		{0, 0},
 		{0, 0}
 		}
 	},
 	{1, 1, 2, 0,		/* 0xcf */
 		{{0, 3},
 		{6, 7},
 		{0, 0},
 		{0, 0}
 		}
 	},
 	{0, 1, 2, 0,		/* 0xd0 */
 		{{4, 4},
 		{6, 7},
 		{0, 0},
 		{0, 0}
 		}
 	},
 	{1, 1, 3, 0,		/* 0xd1 */
 		{{0, 0},
 		{4, 4},
 		{6, 7},
 		{0, 0}
 		}
 	},
 	{0, 1, 3, 0,		/* 0xd2 */
 		{{1, 1},
 		{4, 4},
 		{6, 7},
 		{0, 0}
 		}
 	},
 	{1, 1, 3, 0,		/* 0xd3 */
 		{{0, 1},
 		{4, 4},
 		{6, 7},
 		{0, 0}
 		}
 	},
 	{0, 1, 3, 0,		/* 0xd4 */
 		{{2, 2},
 		{4, 4},
 		{6, 7},
 		{0, 0}
 		}
 	},
 	{1, 1, 4, 0,		/* 0xd5 */
 		{{0, 0},
 		{2, 2},
 		{4, 4},
 		{6, 7}
 		}
 	},
 	{0, 1, 3, 0,		/* 0xd6 */
 		{{1, 2},
 		{4, 4},
 		{6, 7},
 		{0, 0}
 		}
 	},
 	{1, 1, 3, 0,		/* 0xd7 */
 		{{0, 2},
 		{4, 4},
 		{6, 7},
 		{0, 0}
 		}
 	},
 	{0, 1, 2, 0,		/* 0xd8 */
 		{{3, 4},
 		{6, 7},
 		{0, 0},
 		{0, 0}
 		}
 	},
 	{1, 1, 3, 0,		/* 0xd9 */
 		{{0, 0},
 		{3, 4},
 		{6, 7},
 		{0, 0}
 		}
 	},
 	{0, 1, 3, 0,		/* 0xda */
 		{{1, 1},
 		{3, 4},
 		{6, 7},
 		{0, 0}
 		}
 	},
 	{1, 1, 3, 0,		/* 0xdb */
 		{{0, 1},
 		{3, 4},
 		{6, 7},
 		{0, 0}
 		}
 	},
 	{0, 1, 2, 0,		/* 0xdc */
 		{{2, 4},
 		{6, 7},
 		{0, 0},
 		{0, 0}
 		}
 	},
 	{1, 1, 3, 0,		/* 0xdd */
 		{{0, 0},
 		{2, 4},
 		{6, 7},
 		{0, 0}
 		}
 	},
 	{0, 1, 2, 0,		/* 0xde */
 		{{1, 4},
 		{6, 7},
 		{0, 0},
 		{0, 0}
 		}
 	},
 	{1, 1, 2, 0,		/* 0xdf */
 		{{0, 4},
 		{6, 7},
 		{0, 0},
 		{0, 0}
 		}
 	},
 	{0, 1, 1, 0,		/* 0xe0 */
 		{{5, 7},
 		{0, 0},
 		{0, 0},
 		{0, 0}
 		}
 	},
 	{1, 1, 2, 0,		/* 0xe1 */
 		{{0, 0},
 		{5, 7},
 		{0, 0},
 		{0, 0}
 		}
 	},
 	{0, 1, 2, 0,		/* 0xe2 */
 		{{1, 1},
 		{5, 7},
 		{0, 0},
 		{0, 0}
 		}
 	},
 	{1, 1, 2, 0,		/* 0xe3 */
 		{{0, 1},
 		{5, 7},
 		{0, 0},
 		{0, 0}
 		}
 	},
 	{0, 1, 2, 0,		/* 0xe4 */
 		{{2, 2},
 		{5, 7},
 		{0, 0},
 		{0, 0}
 		}
 	},
 	{1, 1, 3, 0,		/* 0xe5 */
 		{{0, 0},
 		{2, 2},
 		{5, 7},
 		{0, 0}
 		}
 	},
 	{0, 1, 2, 0,		/* 0xe6 */
 		{{1, 2},
 		{5, 7},
 		{0, 0},
 		{0, 0}
 		}
 	},
 	{1, 1, 2, 0,		/* 0xe7 */
 		{{0, 2},
 		{5, 7},
 		{0, 0},
 		{0, 0}
 		}
 	},
 	{0, 1, 2, 0,		/* 0xe8 */
 		{{3, 3},
 		{5, 7},
 		{0, 0},
 		{0, 0}
 		}
 	},
 	{1, 1, 3, 0,		/* 0xe9 */
 		{{0, 0},
 		{3, 3},
 		{5, 7},
 		{0, 0}
 		}
 	},
 	{0, 1, 3, 0,		/* 0xea */
 		{{1, 1},
 		{3, 3},
 		{5, 7},
 		{0, 0}
 		}
 	},
 	{1, 1, 3, 0,		/* 0xeb */
 		{{0, 1},
 		{3, 3},
 		{5, 7},
 		{0, 0}
 		}
 	},
 	{0, 1, 2, 0,		/* 0xec */
 		{{2, 3},
 		{5, 7},
 		{0, 0},
 		{0, 0}
 		}
 	},
 	{1, 1, 3, 0,		/* 0xed */
 		{{0, 0},
 		{2, 3},
 		{5, 7},
 		{0, 0}
 		}
 	},
 	{0, 1, 2, 0,		/* 0xee */
 		{{1, 3},
 		{5, 7},
 		{0, 0},
 		{0, 0}
 		}
 	},
 	{1, 1, 2, 0,		/* 0xef */
 		{{0, 3},
 		{5, 7},
 		{0, 0},
 		{0, 0}
 		}
 	},
 	{0, 1, 1, 0,		/* 0xf0 */
 		{{4, 7},
 		{0, 0},
 		{0, 0},
 		{0, 0}
 		}
 	},
 	{1, 1, 2, 0,		/* 0xf1 */
 		{{0, 0},
 		{4, 7},
 		{0, 0},
 		{0, 0}
 		}
 	},
 	{0, 1, 2, 0,		/* 0xf2 */
 		{{1, 1},
 		{4, 7},
 		{0, 0},
 		{0, 0}
 		}
 	},
 	{1, 1, 2, 0,		/* 0xf3 */
 		{{0, 1},
 		{4, 7},
 		{0, 0},
 		{0, 0}
 		}
 	},
 	{0, 1, 2, 0,		/* 0xf4 */
 		{{2, 2},
 		{4, 7},
 		{0, 0},
 		{0, 0}
 		}
 	},
 	{1, 1, 3, 0,		/* 0xf5 */
 		{{0, 0},
 		{2, 2},
 		{4, 7},
 		{0, 0}
 		}
 	},
 	{0, 1, 2, 0,		/* 0xf6 */
 		{{1, 2},
 		{4, 7},
 		{0, 0},
 		{0, 0}
 		}
 	},
 	{1, 1, 2, 0,		/* 0xf7 */
 		{{0, 2},
 		{4, 7},
 		{0, 0},
 		{0, 0}
 		}
 	},
 	{0, 1, 1, 0,		/* 0xf8 */
 		{{3, 7},
 		{0, 0},
 		{0, 0},
 		{0, 0}
 		}
 	},
 	{1, 1, 2, 0,		/* 0xf9 */
 		{{0, 0},
 		{3, 7},
 		{0, 0},
 		{0, 0}
 		}
 	},
 	{0, 1, 2, 0,		/* 0xfa */
 		{{1, 1},
 		{3, 7},
 		{0, 0},
 		{0, 0}
 		}
 	},
 	{1, 1, 2, 0,		/* 0xfb */
 		{{0, 1},
 		{3, 7},
 		{0, 0},
 		{0, 0}
 		}
 	},
 	{0, 1, 1, 0,		/* 0xfc */
 		{{2, 7},
 		{0, 0},
 		{0, 0},
 		{0, 0}
 		}
 	},
 	{1, 1, 2, 0,		/* 0xfd */
 		{{0, 0},
 		{2, 7},
 		{0, 0},
 		{0, 0}
 		}
 	},
 	{0, 1, 1, 0,		/* 0xfe */
 		{{1, 7},
 		{0, 0},
 		{0, 0},
 		{0, 0}
 		}
 	},
 	{1, 1, 1, 0,		/* 0xff */
 		{{0, 7},
 		{0, 0},
 		{0, 0},
 		{0, 0}
 		}
 	}
 };
 
 
 int
 sctp_is_address_in_scope(struct sctp_ifa *ifa,
     struct sctp_scoping *scope,
     int do_update)
 {
 	if ((scope->loopback_scope == 0) &&
 	    (ifa->ifn_p) && SCTP_IFN_IS_IFT_LOOP(ifa->ifn_p)) {
 		/*
 		 * skip loopback if not in scope *
 		 */
 		return (0);
 	}
 	switch (ifa->address.sa.sa_family) {
 #ifdef INET
 	case AF_INET:
 		if (scope->ipv4_addr_legal) {
 			struct sockaddr_in *sin;
 
 			sin = &ifa->address.sin;
 			if (sin->sin_addr.s_addr == 0) {
 				/* not in scope , unspecified */
 				return (0);
 			}
 			if ((scope->ipv4_local_scope == 0) &&
 			    (IN4_ISPRIVATE_ADDRESS(&sin->sin_addr))) {
 				/* private address not in scope */
 				return (0);
 			}
 		} else {
 			return (0);
 		}
 		break;
 #endif
 #ifdef INET6
 	case AF_INET6:
 		if (scope->ipv6_addr_legal) {
 			struct sockaddr_in6 *sin6;
 
 			/*
 			 * Must update the flags,  bummer, which means any
 			 * IFA locks must now be applied HERE <->
 			 */
 			if (do_update) {
 				sctp_gather_internal_ifa_flags(ifa);
 			}
 			if (ifa->localifa_flags & SCTP_ADDR_IFA_UNUSEABLE) {
 				return (0);
 			}
 			/* ok to use deprecated addresses? */
 			sin6 = &ifa->address.sin6;
 			if (IN6_IS_ADDR_UNSPECIFIED(&sin6->sin6_addr)) {
 				/* skip unspecifed addresses */
 				return (0);
 			}
 			if (	/* (local_scope == 0) && */
 			    (IN6_IS_ADDR_LINKLOCAL(&sin6->sin6_addr))) {
 				return (0);
 			}
 			if ((scope->site_scope == 0) &&
 			    (IN6_IS_ADDR_SITELOCAL(&sin6->sin6_addr))) {
 				return (0);
 			}
 		} else {
 			return (0);
 		}
 		break;
 #endif
 	default:
 		return (0);
 	}
 	return (1);
 }
 
 static struct mbuf *
 sctp_add_addr_to_mbuf(struct mbuf *m, struct sctp_ifa *ifa, uint16_t * len)
 {
 #if defined(INET) || defined(INET6)
 	struct sctp_paramhdr *parmh;
 	struct mbuf *mret;
 	uint16_t plen;
 
 #endif
 
 	switch (ifa->address.sa.sa_family) {
 #ifdef INET
 	case AF_INET:
 		plen = (uint16_t) sizeof(struct sctp_ipv4addr_param);
 		break;
 #endif
 #ifdef INET6
 	case AF_INET6:
 		plen = (uint16_t) sizeof(struct sctp_ipv6addr_param);
 		break;
 #endif
 	default:
 		return (m);
 	}
 #if defined(INET) || defined(INET6)
 	if (M_TRAILINGSPACE(m) >= plen) {
 		/* easy side we just drop it on the end */
 		parmh = (struct sctp_paramhdr *)(SCTP_BUF_AT(m, SCTP_BUF_LEN(m)));
 		mret = m;
 	} else {
 		/* Need more space */
 		mret = m;
 		while (SCTP_BUF_NEXT(mret) != NULL) {
 			mret = SCTP_BUF_NEXT(mret);
 		}
 		SCTP_BUF_NEXT(mret) = sctp_get_mbuf_for_msg(plen, 0, M_NOWAIT, 1, MT_DATA);
 		if (SCTP_BUF_NEXT(mret) == NULL) {
 			/* We are hosed, can't add more addresses */
 			return (m);
 		}
 		mret = SCTP_BUF_NEXT(mret);
 		parmh = mtod(mret, struct sctp_paramhdr *);
 	}
 	/* now add the parameter */
 	switch (ifa->address.sa.sa_family) {
 #ifdef INET
 	case AF_INET:
 		{
 			struct sctp_ipv4addr_param *ipv4p;
 			struct sockaddr_in *sin;
 
 			sin = &ifa->address.sin;
 			ipv4p = (struct sctp_ipv4addr_param *)parmh;
 			parmh->param_type = htons(SCTP_IPV4_ADDRESS);
 			parmh->param_length = htons(plen);
 			ipv4p->addr = sin->sin_addr.s_addr;
 			SCTP_BUF_LEN(mret) += plen;
 			break;
 		}
 #endif
 #ifdef INET6
 	case AF_INET6:
 		{
 			struct sctp_ipv6addr_param *ipv6p;
 			struct sockaddr_in6 *sin6;
 
 			sin6 = &ifa->address.sin6;
 			ipv6p = (struct sctp_ipv6addr_param *)parmh;
 			parmh->param_type = htons(SCTP_IPV6_ADDRESS);
 			parmh->param_length = htons(plen);
 			memcpy(ipv6p->addr, &sin6->sin6_addr,
 			    sizeof(ipv6p->addr));
 			/* clear embedded scope in the address */
 			in6_clearscope((struct in6_addr *)ipv6p->addr);
 			SCTP_BUF_LEN(mret) += plen;
 			break;
 		}
 #endif
 	default:
 		return (m);
 	}
 	if (len != NULL) {
 		*len += plen;
 	}
 	return (mret);
 #endif
 }
 
 
 struct mbuf *
 sctp_add_addresses_to_i_ia(struct sctp_inpcb *inp, struct sctp_tcb *stcb,
     struct sctp_scoping *scope,
     struct mbuf *m_at, int cnt_inits_to,
     uint16_t * padding_len, uint16_t * chunk_len)
 {
 	struct sctp_vrf *vrf = NULL;
 	int cnt, limit_out = 0, total_count;
 	uint32_t vrf_id;
 
 	vrf_id = inp->def_vrf_id;
 	SCTP_IPI_ADDR_RLOCK();
 	vrf = sctp_find_vrf(vrf_id);
 	if (vrf == NULL) {
 		SCTP_IPI_ADDR_RUNLOCK();
 		return (m_at);
 	}
 	if (inp->sctp_flags & SCTP_PCB_FLAGS_BOUNDALL) {
 		struct sctp_ifa *sctp_ifap;
 		struct sctp_ifn *sctp_ifnp;
 
 		cnt = cnt_inits_to;
 		if (vrf->total_ifa_count > SCTP_COUNT_LIMIT) {
 			limit_out = 1;
 			cnt = SCTP_ADDRESS_LIMIT;
 			goto skip_count;
 		}
 		LIST_FOREACH(sctp_ifnp, &vrf->ifnlist, next_ifn) {
 			if ((scope->loopback_scope == 0) &&
 			    SCTP_IFN_IS_IFT_LOOP(sctp_ifnp)) {
 				/*
 				 * Skip loopback devices if loopback_scope
 				 * not set
 				 */
 				continue;
 			}
 			LIST_FOREACH(sctp_ifap, &sctp_ifnp->ifalist, next_ifa) {
 #ifdef INET
 				if ((sctp_ifap->address.sa.sa_family == AF_INET) &&
 				    (prison_check_ip4(inp->ip_inp.inp.inp_cred,
 				    &sctp_ifap->address.sin.sin_addr) != 0)) {
 					continue;
 				}
 #endif
 #ifdef INET6
 				if ((sctp_ifap->address.sa.sa_family == AF_INET6) &&
 				    (prison_check_ip6(inp->ip_inp.inp.inp_cred,
 				    &sctp_ifap->address.sin6.sin6_addr) != 0)) {
 					continue;
 				}
 #endif
 				if (sctp_is_addr_restricted(stcb, sctp_ifap)) {
 					continue;
 				}
 				if (sctp_is_address_in_scope(sctp_ifap, scope, 1) == 0) {
 					continue;
 				}
 				cnt++;
 				if (cnt > SCTP_ADDRESS_LIMIT) {
 					break;
 				}
 			}
 			if (cnt > SCTP_ADDRESS_LIMIT) {
 				break;
 			}
 		}
 skip_count:
 		if (cnt > 1) {
 			total_count = 0;
 			LIST_FOREACH(sctp_ifnp, &vrf->ifnlist, next_ifn) {
 				cnt = 0;
 				if ((scope->loopback_scope == 0) &&
 				    SCTP_IFN_IS_IFT_LOOP(sctp_ifnp)) {
 					/*
 					 * Skip loopback devices if
 					 * loopback_scope not set
 					 */
 					continue;
 				}
 				LIST_FOREACH(sctp_ifap, &sctp_ifnp->ifalist, next_ifa) {
 #ifdef INET
 					if ((sctp_ifap->address.sa.sa_family == AF_INET) &&
 					    (prison_check_ip4(inp->ip_inp.inp.inp_cred,
 					    &sctp_ifap->address.sin.sin_addr) != 0)) {
 						continue;
 					}
 #endif
 #ifdef INET6
 					if ((sctp_ifap->address.sa.sa_family == AF_INET6) &&
 					    (prison_check_ip6(inp->ip_inp.inp.inp_cred,
 					    &sctp_ifap->address.sin6.sin6_addr) != 0)) {
 						continue;
 					}
 #endif
 					if (sctp_is_addr_restricted(stcb, sctp_ifap)) {
 						continue;
 					}
 					if (sctp_is_address_in_scope(sctp_ifap,
 					    scope, 0) == 0) {
 						continue;
 					}
 					if ((chunk_len != NULL) &&
 					    (padding_len != NULL) &&
 					    (*padding_len > 0)) {
 						memset(mtod(m_at, caddr_t)+*chunk_len, 0, *padding_len);
 						SCTP_BUF_LEN(m_at) += *padding_len;
 						*chunk_len += *padding_len;
 						*padding_len = 0;
 					}
 					m_at = sctp_add_addr_to_mbuf(m_at, sctp_ifap, chunk_len);
 					if (limit_out) {
 						cnt++;
 						total_count++;
 						if (cnt >= 2) {
 							/*
 							 * two from each
 							 * address
 							 */
 							break;
 						}
 						if (total_count > SCTP_ADDRESS_LIMIT) {
 							/* No more addresses */
 							break;
 						}
 					}
 				}
 			}
 		}
 	} else {
 		struct sctp_laddr *laddr;
 
 		cnt = cnt_inits_to;
 		/* First, how many ? */
 		LIST_FOREACH(laddr, &inp->sctp_addr_list, sctp_nxt_addr) {
 			if (laddr->ifa == NULL) {
 				continue;
 			}
 			if (laddr->ifa->localifa_flags & SCTP_BEING_DELETED)
 				/*
 				 * Address being deleted by the system, dont
 				 * list.
 				 */
 				continue;
 			if (laddr->action == SCTP_DEL_IP_ADDRESS) {
 				/*
 				 * Address being deleted on this ep don't
 				 * list.
 				 */
 				continue;
 			}
 			if (sctp_is_address_in_scope(laddr->ifa,
 			    scope, 1) == 0) {
 				continue;
 			}
 			cnt++;
 		}
 		/*
 		 * To get through a NAT we only list addresses if we have
 		 * more than one. That way if you just bind a single address
 		 * we let the source of the init dictate our address.
 		 */
 		if (cnt > 1) {
 			cnt = cnt_inits_to;
 			LIST_FOREACH(laddr, &inp->sctp_addr_list, sctp_nxt_addr) {
 				if (laddr->ifa == NULL) {
 					continue;
 				}
 				if (laddr->ifa->localifa_flags & SCTP_BEING_DELETED) {
 					continue;
 				}
 				if (sctp_is_address_in_scope(laddr->ifa,
 				    scope, 0) == 0) {
 					continue;
 				}
 				if ((chunk_len != NULL) &&
 				    (padding_len != NULL) &&
 				    (*padding_len > 0)) {
 					memset(mtod(m_at, caddr_t)+*chunk_len, 0, *padding_len);
 					SCTP_BUF_LEN(m_at) += *padding_len;
 					*chunk_len += *padding_len;
 					*padding_len = 0;
 				}
 				m_at = sctp_add_addr_to_mbuf(m_at, laddr->ifa, chunk_len);
 				cnt++;
 				if (cnt >= SCTP_ADDRESS_LIMIT) {
 					break;
 				}
 			}
 		}
 	}
 	SCTP_IPI_ADDR_RUNLOCK();
 	return (m_at);
 }
 
 static struct sctp_ifa *
 sctp_is_ifa_addr_preferred(struct sctp_ifa *ifa,
     uint8_t dest_is_loop,
     uint8_t dest_is_priv,
     sa_family_t fam)
 {
 	uint8_t dest_is_global = 0;
 
 	/* dest_is_priv is true if destination is a private address */
 	/* dest_is_loop is true if destination is a loopback addresses */
 
 	/**
 	 * Here we determine if its a preferred address. A preferred address
 	 * means it is the same scope or higher scope then the destination.
 	 * L = loopback, P = private, G = global
 	 * -----------------------------------------
 	 *    src    |  dest | result
 	 *  ----------------------------------------
 	 *     L     |    L  |    yes
 	 *  -----------------------------------------
 	 *     P     |    L  |    yes-v4 no-v6
 	 *  -----------------------------------------
 	 *     G     |    L  |    yes-v4 no-v6
 	 *  -----------------------------------------
 	 *     L     |    P  |    no
 	 *  -----------------------------------------
 	 *     P     |    P  |    yes
 	 *  -----------------------------------------
 	 *     G     |    P  |    no
 	 *   -----------------------------------------
 	 *     L     |    G  |    no
 	 *   -----------------------------------------
 	 *     P     |    G  |    no
 	 *    -----------------------------------------
 	 *     G     |    G  |    yes
 	 *    -----------------------------------------
 	 */
 
 	if (ifa->address.sa.sa_family != fam) {
 		/* forget mis-matched family */
 		return (NULL);
 	}
 	if ((dest_is_priv == 0) && (dest_is_loop == 0)) {
 		dest_is_global = 1;
 	}
 	SCTPDBG(SCTP_DEBUG_OUTPUT2, "Is destination preferred:");
 	SCTPDBG_ADDR(SCTP_DEBUG_OUTPUT2, &ifa->address.sa);
 	/* Ok the address may be ok */
 #ifdef INET6
 	if (fam == AF_INET6) {
 		/* ok to use deprecated addresses? no lets not! */
 		if (ifa->localifa_flags & SCTP_ADDR_IFA_UNUSEABLE) {
 			SCTPDBG(SCTP_DEBUG_OUTPUT3, "NO:1\n");
 			return (NULL);
 		}
 		if (ifa->src_is_priv && !ifa->src_is_loop) {
 			if (dest_is_loop) {
 				SCTPDBG(SCTP_DEBUG_OUTPUT3, "NO:2\n");
 				return (NULL);
 			}
 		}
 		if (ifa->src_is_glob) {
 			if (dest_is_loop) {
 				SCTPDBG(SCTP_DEBUG_OUTPUT3, "NO:3\n");
 				return (NULL);
 			}
 		}
 	}
 #endif
 	/*
 	 * Now that we know what is what, implement or table this could in
 	 * theory be done slicker (it used to be), but this is
 	 * straightforward and easier to validate :-)
 	 */
 	SCTPDBG(SCTP_DEBUG_OUTPUT3, "src_loop:%d src_priv:%d src_glob:%d\n",
 	    ifa->src_is_loop, ifa->src_is_priv, ifa->src_is_glob);
 	SCTPDBG(SCTP_DEBUG_OUTPUT3, "dest_loop:%d dest_priv:%d dest_glob:%d\n",
 	    dest_is_loop, dest_is_priv, dest_is_global);
 
 	if ((ifa->src_is_loop) && (dest_is_priv)) {
 		SCTPDBG(SCTP_DEBUG_OUTPUT3, "NO:4\n");
 		return (NULL);
 	}
 	if ((ifa->src_is_glob) && (dest_is_priv)) {
 		SCTPDBG(SCTP_DEBUG_OUTPUT3, "NO:5\n");
 		return (NULL);
 	}
 	if ((ifa->src_is_loop) && (dest_is_global)) {
 		SCTPDBG(SCTP_DEBUG_OUTPUT3, "NO:6\n");
 		return (NULL);
 	}
 	if ((ifa->src_is_priv) && (dest_is_global)) {
 		SCTPDBG(SCTP_DEBUG_OUTPUT3, "NO:7\n");
 		return (NULL);
 	}
 	SCTPDBG(SCTP_DEBUG_OUTPUT3, "YES\n");
 	/* its a preferred address */
 	return (ifa);
 }
 
 static struct sctp_ifa *
 sctp_is_ifa_addr_acceptable(struct sctp_ifa *ifa,
     uint8_t dest_is_loop,
     uint8_t dest_is_priv,
     sa_family_t fam)
 {
 	uint8_t dest_is_global = 0;
 
 	/**
 	 * Here we determine if its a acceptable address. A acceptable
 	 * address means it is the same scope or higher scope but we can
 	 * allow for NAT which means its ok to have a global dest and a
 	 * private src.
 	 *
 	 * L = loopback, P = private, G = global
 	 * -----------------------------------------
 	 *  src    |  dest | result
 	 * -----------------------------------------
 	 *   L     |   L   |    yes
 	 *  -----------------------------------------
 	 *   P     |   L   |    yes-v4 no-v6
 	 *  -----------------------------------------
 	 *   G     |   L   |    yes
 	 * -----------------------------------------
 	 *   L     |   P   |    no
 	 * -----------------------------------------
 	 *   P     |   P   |    yes
 	 * -----------------------------------------
 	 *   G     |   P   |    yes - May not work
 	 * -----------------------------------------
 	 *   L     |   G   |    no
 	 * -----------------------------------------
 	 *   P     |   G   |    yes - May not work
 	 * -----------------------------------------
 	 *   G     |   G   |    yes
 	 * -----------------------------------------
 	 */
 
 	if (ifa->address.sa.sa_family != fam) {
 		/* forget non matching family */
 		SCTPDBG(SCTP_DEBUG_OUTPUT3, "ifa_fam:%d fam:%d\n",
 		    ifa->address.sa.sa_family, fam);
 		return (NULL);
 	}
 	/* Ok the address may be ok */
 	SCTPDBG_ADDR(SCTP_DEBUG_OUTPUT3, &ifa->address.sa);
 	SCTPDBG(SCTP_DEBUG_OUTPUT3, "dst_is_loop:%d dest_is_priv:%d\n",
 	    dest_is_loop, dest_is_priv);
 	if ((dest_is_loop == 0) && (dest_is_priv == 0)) {
 		dest_is_global = 1;
 	}
 #ifdef INET6
 	if (fam == AF_INET6) {
 		/* ok to use deprecated addresses? */
 		if (ifa->localifa_flags & SCTP_ADDR_IFA_UNUSEABLE) {
 			return (NULL);
 		}
 		if (ifa->src_is_priv) {
 			/* Special case, linklocal to loop */
 			if (dest_is_loop)
 				return (NULL);
 		}
 	}
 #endif
 	/*
 	 * Now that we know what is what, implement our table. This could in
 	 * theory be done slicker (it used to be), but this is
 	 * straightforward and easier to validate :-)
 	 */
 	SCTPDBG(SCTP_DEBUG_OUTPUT3, "ifa->src_is_loop:%d dest_is_priv:%d\n",
 	    ifa->src_is_loop,
 	    dest_is_priv);
 	if ((ifa->src_is_loop == 1) && (dest_is_priv)) {
 		return (NULL);
 	}
 	SCTPDBG(SCTP_DEBUG_OUTPUT3, "ifa->src_is_loop:%d dest_is_glob:%d\n",
 	    ifa->src_is_loop,
 	    dest_is_global);
 	if ((ifa->src_is_loop == 1) && (dest_is_global)) {
 		return (NULL);
 	}
 	SCTPDBG(SCTP_DEBUG_OUTPUT3, "address is acceptable\n");
 	/* its an acceptable address */
 	return (ifa);
 }
 
 int
 sctp_is_addr_restricted(struct sctp_tcb *stcb, struct sctp_ifa *ifa)
 {
 	struct sctp_laddr *laddr;
 
 	if (stcb == NULL) {
 		/* There are no restrictions, no TCB :-) */
 		return (0);
 	}
 	LIST_FOREACH(laddr, &stcb->asoc.sctp_restricted_addrs, sctp_nxt_addr) {
 		if (laddr->ifa == NULL) {
 			SCTPDBG(SCTP_DEBUG_OUTPUT1, "%s: NULL ifa\n",
 			    __FUNCTION__);
 			continue;
 		}
 		if (laddr->ifa == ifa) {
 			/* Yes it is on the list */
 			return (1);
 		}
 	}
 	return (0);
 }
 
 
 int
 sctp_is_addr_in_ep(struct sctp_inpcb *inp, struct sctp_ifa *ifa)
 {
 	struct sctp_laddr *laddr;
 
 	if (ifa == NULL)
 		return (0);
 	LIST_FOREACH(laddr, &inp->sctp_addr_list, sctp_nxt_addr) {
 		if (laddr->ifa == NULL) {
 			SCTPDBG(SCTP_DEBUG_OUTPUT1, "%s: NULL ifa\n",
 			    __FUNCTION__);
 			continue;
 		}
 		if ((laddr->ifa == ifa) && laddr->action == 0)
 			/* same pointer */
 			return (1);
 	}
 	return (0);
 }
 
 
 
 static struct sctp_ifa *
 sctp_choose_boundspecific_inp(struct sctp_inpcb *inp,
     sctp_route_t * ro,
     uint32_t vrf_id,
     int non_asoc_addr_ok,
     uint8_t dest_is_priv,
     uint8_t dest_is_loop,
     sa_family_t fam)
 {
 	struct sctp_laddr *laddr, *starting_point;
 	void *ifn;
 	int resettotop = 0;
 	struct sctp_ifn *sctp_ifn;
 	struct sctp_ifa *sctp_ifa, *sifa;
 	struct sctp_vrf *vrf;
 	uint32_t ifn_index;
 
 	vrf = sctp_find_vrf(vrf_id);
 	if (vrf == NULL)
 		return (NULL);
 
 	ifn = SCTP_GET_IFN_VOID_FROM_ROUTE(ro);
 	ifn_index = SCTP_GET_IF_INDEX_FROM_ROUTE(ro);
 	sctp_ifn = sctp_find_ifn(ifn, ifn_index);
 	/*
 	 * first question, is the ifn we will emit on in our list, if so, we
 	 * want such an address. Note that we first looked for a preferred
 	 * address.
 	 */
 	if (sctp_ifn) {
 		/* is a preferred one on the interface we route out? */
 		LIST_FOREACH(sctp_ifa, &sctp_ifn->ifalist, next_ifa) {
 #ifdef INET
 			if ((sctp_ifa->address.sa.sa_family == AF_INET) &&
 			    (prison_check_ip4(inp->ip_inp.inp.inp_cred,
 			    &sctp_ifa->address.sin.sin_addr) != 0)) {
 				continue;
 			}
 #endif
 #ifdef INET6
 			if ((sctp_ifa->address.sa.sa_family == AF_INET6) &&
 			    (prison_check_ip6(inp->ip_inp.inp.inp_cred,
 			    &sctp_ifa->address.sin6.sin6_addr) != 0)) {
 				continue;
 			}
 #endif
 			if ((sctp_ifa->localifa_flags & SCTP_ADDR_DEFER_USE) &&
 			    (non_asoc_addr_ok == 0))
 				continue;
 			sifa = sctp_is_ifa_addr_preferred(sctp_ifa,
 			    dest_is_loop,
 			    dest_is_priv, fam);
 			if (sifa == NULL)
 				continue;
 			if (sctp_is_addr_in_ep(inp, sifa)) {
 				atomic_add_int(&sifa->refcount, 1);
 				return (sifa);
 			}
 		}
 	}
 	/*
 	 * ok, now we now need to find one on the list of the addresses. We
 	 * can't get one on the emitting interface so let's find first a
 	 * preferred one. If not that an acceptable one otherwise... we
 	 * return NULL.
 	 */
 	starting_point = inp->next_addr_touse;
 once_again:
 	if (inp->next_addr_touse == NULL) {
 		inp->next_addr_touse = LIST_FIRST(&inp->sctp_addr_list);
 		resettotop = 1;
 	}
 	for (laddr = inp->next_addr_touse; laddr;
 	    laddr = LIST_NEXT(laddr, sctp_nxt_addr)) {
 		if (laddr->ifa == NULL) {
 			/* address has been removed */
 			continue;
 		}
 		if (laddr->action == SCTP_DEL_IP_ADDRESS) {
 			/* address is being deleted */
 			continue;
 		}
 		sifa = sctp_is_ifa_addr_preferred(laddr->ifa, dest_is_loop,
 		    dest_is_priv, fam);
 		if (sifa == NULL)
 			continue;
 		atomic_add_int(&sifa->refcount, 1);
 		return (sifa);
 	}
 	if (resettotop == 0) {
 		inp->next_addr_touse = NULL;
 		goto once_again;
 	}
 	inp->next_addr_touse = starting_point;
 	resettotop = 0;
 once_again_too:
 	if (inp->next_addr_touse == NULL) {
 		inp->next_addr_touse = LIST_FIRST(&inp->sctp_addr_list);
 		resettotop = 1;
 	}
 	/* ok, what about an acceptable address in the inp */
 	for (laddr = inp->next_addr_touse; laddr;
 	    laddr = LIST_NEXT(laddr, sctp_nxt_addr)) {
 		if (laddr->ifa == NULL) {
 			/* address has been removed */
 			continue;
 		}
 		if (laddr->action == SCTP_DEL_IP_ADDRESS) {
 			/* address is being deleted */
 			continue;
 		}
 		sifa = sctp_is_ifa_addr_acceptable(laddr->ifa, dest_is_loop,
 		    dest_is_priv, fam);
 		if (sifa == NULL)
 			continue;
 		atomic_add_int(&sifa->refcount, 1);
 		return (sifa);
 	}
 	if (resettotop == 0) {
 		inp->next_addr_touse = NULL;
 		goto once_again_too;
 	}
 	/*
 	 * no address bound can be a source for the destination we are in
 	 * trouble
 	 */
 	return (NULL);
 }
 
 
 
 static struct sctp_ifa *
 sctp_choose_boundspecific_stcb(struct sctp_inpcb *inp,
     struct sctp_tcb *stcb,
     sctp_route_t * ro,
     uint32_t vrf_id,
     uint8_t dest_is_priv,
     uint8_t dest_is_loop,
     int non_asoc_addr_ok,
     sa_family_t fam)
 {
 	struct sctp_laddr *laddr, *starting_point;
 	void *ifn;
 	struct sctp_ifn *sctp_ifn;
 	struct sctp_ifa *sctp_ifa, *sifa;
 	uint8_t start_at_beginning = 0;
 	struct sctp_vrf *vrf;
 	uint32_t ifn_index;
 
 	/*
 	 * first question, is the ifn we will emit on in our list, if so, we
 	 * want that one.
 	 */
 	vrf = sctp_find_vrf(vrf_id);
 	if (vrf == NULL)
 		return (NULL);
 
 	ifn = SCTP_GET_IFN_VOID_FROM_ROUTE(ro);
 	ifn_index = SCTP_GET_IF_INDEX_FROM_ROUTE(ro);
 	sctp_ifn = sctp_find_ifn(ifn, ifn_index);
 
 	/*
 	 * first question, is the ifn we will emit on in our list?  If so,
 	 * we want that one. First we look for a preferred. Second, we go
 	 * for an acceptable.
 	 */
 	if (sctp_ifn) {
 		/* first try for a preferred address on the ep */
 		LIST_FOREACH(sctp_ifa, &sctp_ifn->ifalist, next_ifa) {
 #ifdef INET
 			if ((sctp_ifa->address.sa.sa_family == AF_INET) &&
 			    (prison_check_ip4(inp->ip_inp.inp.inp_cred,
 			    &sctp_ifa->address.sin.sin_addr) != 0)) {
 				continue;
 			}
 #endif
 #ifdef INET6
 			if ((sctp_ifa->address.sa.sa_family == AF_INET6) &&
 			    (prison_check_ip6(inp->ip_inp.inp.inp_cred,
 			    &sctp_ifa->address.sin6.sin6_addr) != 0)) {
 				continue;
 			}
 #endif
 			if ((sctp_ifa->localifa_flags & SCTP_ADDR_DEFER_USE) && (non_asoc_addr_ok == 0))
 				continue;
 			if (sctp_is_addr_in_ep(inp, sctp_ifa)) {
 				sifa = sctp_is_ifa_addr_preferred(sctp_ifa, dest_is_loop, dest_is_priv, fam);
 				if (sifa == NULL)
 					continue;
 				if (((non_asoc_addr_ok == 0) &&
 				    (sctp_is_addr_restricted(stcb, sifa))) ||
 				    (non_asoc_addr_ok &&
 				    (sctp_is_addr_restricted(stcb, sifa)) &&
 				    (!sctp_is_addr_pending(stcb, sifa)))) {
 					/* on the no-no list */
 					continue;
 				}
 				atomic_add_int(&sifa->refcount, 1);
 				return (sifa);
 			}
 		}
 		/* next try for an acceptable address on the ep */
 		LIST_FOREACH(sctp_ifa, &sctp_ifn->ifalist, next_ifa) {
 #ifdef INET
 			if ((sctp_ifa->address.sa.sa_family == AF_INET) &&
 			    (prison_check_ip4(inp->ip_inp.inp.inp_cred,
 			    &sctp_ifa->address.sin.sin_addr) != 0)) {
 				continue;
 			}
 #endif
 #ifdef INET6
 			if ((sctp_ifa->address.sa.sa_family == AF_INET6) &&
 			    (prison_check_ip6(inp->ip_inp.inp.inp_cred,
 			    &sctp_ifa->address.sin6.sin6_addr) != 0)) {
 				continue;
 			}
 #endif
 			if ((sctp_ifa->localifa_flags & SCTP_ADDR_DEFER_USE) && (non_asoc_addr_ok == 0))
 				continue;
 			if (sctp_is_addr_in_ep(inp, sctp_ifa)) {
 				sifa = sctp_is_ifa_addr_acceptable(sctp_ifa, dest_is_loop, dest_is_priv, fam);
 				if (sifa == NULL)
 					continue;
 				if (((non_asoc_addr_ok == 0) &&
 				    (sctp_is_addr_restricted(stcb, sifa))) ||
 				    (non_asoc_addr_ok &&
 				    (sctp_is_addr_restricted(stcb, sifa)) &&
 				    (!sctp_is_addr_pending(stcb, sifa)))) {
 					/* on the no-no list */
 					continue;
 				}
 				atomic_add_int(&sifa->refcount, 1);
 				return (sifa);
 			}
 		}
 
 	}
 	/*
 	 * if we can't find one like that then we must look at all addresses
 	 * bound to pick one at first preferable then secondly acceptable.
 	 */
 	starting_point = stcb->asoc.last_used_address;
 sctp_from_the_top:
 	if (stcb->asoc.last_used_address == NULL) {
 		start_at_beginning = 1;
 		stcb->asoc.last_used_address = LIST_FIRST(&inp->sctp_addr_list);
 	}
 	/* search beginning with the last used address */
 	for (laddr = stcb->asoc.last_used_address; laddr;
 	    laddr = LIST_NEXT(laddr, sctp_nxt_addr)) {
 		if (laddr->ifa == NULL) {
 			/* address has been removed */
 			continue;
 		}
 		if (laddr->action == SCTP_DEL_IP_ADDRESS) {
 			/* address is being deleted */
 			continue;
 		}
 		sifa = sctp_is_ifa_addr_preferred(laddr->ifa, dest_is_loop, dest_is_priv, fam);
 		if (sifa == NULL)
 			continue;
 		if (((non_asoc_addr_ok == 0) &&
 		    (sctp_is_addr_restricted(stcb, sifa))) ||
 		    (non_asoc_addr_ok &&
 		    (sctp_is_addr_restricted(stcb, sifa)) &&
 		    (!sctp_is_addr_pending(stcb, sifa)))) {
 			/* on the no-no list */
 			continue;
 		}
 		stcb->asoc.last_used_address = laddr;
 		atomic_add_int(&sifa->refcount, 1);
 		return (sifa);
 	}
 	if (start_at_beginning == 0) {
 		stcb->asoc.last_used_address = NULL;
 		goto sctp_from_the_top;
 	}
 	/* now try for any higher scope than the destination */
 	stcb->asoc.last_used_address = starting_point;
 	start_at_beginning = 0;
 sctp_from_the_top2:
 	if (stcb->asoc.last_used_address == NULL) {
 		start_at_beginning = 1;
 		stcb->asoc.last_used_address = LIST_FIRST(&inp->sctp_addr_list);
 	}
 	/* search beginning with the last used address */
 	for (laddr = stcb->asoc.last_used_address; laddr;
 	    laddr = LIST_NEXT(laddr, sctp_nxt_addr)) {
 		if (laddr->ifa == NULL) {
 			/* address has been removed */
 			continue;
 		}
 		if (laddr->action == SCTP_DEL_IP_ADDRESS) {
 			/* address is being deleted */
 			continue;
 		}
 		sifa = sctp_is_ifa_addr_acceptable(laddr->ifa, dest_is_loop,
 		    dest_is_priv, fam);
 		if (sifa == NULL)
 			continue;
 		if (((non_asoc_addr_ok == 0) &&
 		    (sctp_is_addr_restricted(stcb, sifa))) ||
 		    (non_asoc_addr_ok &&
 		    (sctp_is_addr_restricted(stcb, sifa)) &&
 		    (!sctp_is_addr_pending(stcb, sifa)))) {
 			/* on the no-no list */
 			continue;
 		}
 		stcb->asoc.last_used_address = laddr;
 		atomic_add_int(&sifa->refcount, 1);
 		return (sifa);
 	}
 	if (start_at_beginning == 0) {
 		stcb->asoc.last_used_address = NULL;
 		goto sctp_from_the_top2;
 	}
 	return (NULL);
 }
 
 static struct sctp_ifa *
 sctp_select_nth_preferred_addr_from_ifn_boundall(struct sctp_ifn *ifn,
     struct sctp_inpcb *inp,
     struct sctp_tcb *stcb,
     int non_asoc_addr_ok,
     uint8_t dest_is_loop,
     uint8_t dest_is_priv,
     int addr_wanted,
     sa_family_t fam,
     sctp_route_t * ro
 )
 {
 	struct sctp_ifa *ifa, *sifa;
 	int num_eligible_addr = 0;
 
 #ifdef INET6
 	struct sockaddr_in6 sin6, lsa6;
 
 	if (fam == AF_INET6) {
 		memcpy(&sin6, &ro->ro_dst, sizeof(struct sockaddr_in6));
 		(void)sa6_recoverscope(&sin6);
 	}
 #endif				/* INET6 */
 	LIST_FOREACH(ifa, &ifn->ifalist, next_ifa) {
 #ifdef INET
 		if ((ifa->address.sa.sa_family == AF_INET) &&
 		    (prison_check_ip4(inp->ip_inp.inp.inp_cred,
 		    &ifa->address.sin.sin_addr) != 0)) {
 			continue;
 		}
 #endif
 #ifdef INET6
 		if ((ifa->address.sa.sa_family == AF_INET6) &&
 		    (prison_check_ip6(inp->ip_inp.inp.inp_cred,
 		    &ifa->address.sin6.sin6_addr) != 0)) {
 			continue;
 		}
 #endif
 		if ((ifa->localifa_flags & SCTP_ADDR_DEFER_USE) &&
 		    (non_asoc_addr_ok == 0))
 			continue;
 		sifa = sctp_is_ifa_addr_preferred(ifa, dest_is_loop,
 		    dest_is_priv, fam);
 		if (sifa == NULL)
 			continue;
 #ifdef INET6
 		if (fam == AF_INET6 &&
 		    dest_is_loop &&
 		    sifa->src_is_loop && sifa->src_is_priv) {
 			/*
 			 * don't allow fe80::1 to be a src on loop ::1, we
 			 * don't list it to the peer so we will get an
 			 * abort.
 			 */
 			continue;
 		}
 		if (fam == AF_INET6 &&
 		    IN6_IS_ADDR_LINKLOCAL(&sifa->address.sin6.sin6_addr) &&
 		    IN6_IS_ADDR_LINKLOCAL(&sin6.sin6_addr)) {
 			/*
 			 * link-local <-> link-local must belong to the same
 			 * scope.
 			 */
 			memcpy(&lsa6, &sifa->address.sin6, sizeof(struct sockaddr_in6));
 			(void)sa6_recoverscope(&lsa6);
 			if (sin6.sin6_scope_id != lsa6.sin6_scope_id) {
 				continue;
 			}
 		}
 #endif				/* INET6 */
 
 		/*
 		 * Check if the IPv6 address matches to next-hop. In the
 		 * mobile case, old IPv6 address may be not deleted from the
 		 * interface. Then, the interface has previous and new
 		 * addresses.  We should use one corresponding to the
 		 * next-hop.  (by micchie)
 		 */
 #ifdef INET6
 		if (stcb && fam == AF_INET6 &&
 		    sctp_is_mobility_feature_on(stcb->sctp_ep, SCTP_MOBILITY_BASE)) {
 			if (sctp_v6src_match_nexthop(&sifa->address.sin6, ro)
 			    == 0) {
 				continue;
 			}
 		}
 #endif
 #ifdef INET
 		/* Avoid topologically incorrect IPv4 address */
 		if (stcb && fam == AF_INET &&
 		    sctp_is_mobility_feature_on(stcb->sctp_ep, SCTP_MOBILITY_BASE)) {
 			if (sctp_v4src_match_nexthop(sifa, ro) == 0) {
 				continue;
 			}
 		}
 #endif
 		if (stcb) {
 			if (sctp_is_address_in_scope(ifa, &stcb->asoc.scope, 0) == 0) {
 				continue;
 			}
 			if (((non_asoc_addr_ok == 0) &&
 			    (sctp_is_addr_restricted(stcb, sifa))) ||
 			    (non_asoc_addr_ok &&
 			    (sctp_is_addr_restricted(stcb, sifa)) &&
 			    (!sctp_is_addr_pending(stcb, sifa)))) {
 				/*
 				 * It is restricted for some reason..
 				 * probably not yet added.
 				 */
 				continue;
 			}
 		}
 		if (num_eligible_addr >= addr_wanted) {
 			return (sifa);
 		}
 		num_eligible_addr++;
 	}
 	return (NULL);
 }
 
 
 static int
 sctp_count_num_preferred_boundall(struct sctp_ifn *ifn,
     struct sctp_inpcb *inp,
     struct sctp_tcb *stcb,
     int non_asoc_addr_ok,
     uint8_t dest_is_loop,
     uint8_t dest_is_priv,
     sa_family_t fam)
 {
 	struct sctp_ifa *ifa, *sifa;
 	int num_eligible_addr = 0;
 
 	LIST_FOREACH(ifa, &ifn->ifalist, next_ifa) {
 #ifdef INET
 		if ((ifa->address.sa.sa_family == AF_INET) &&
 		    (prison_check_ip4(inp->ip_inp.inp.inp_cred,
 		    &ifa->address.sin.sin_addr) != 0)) {
 			continue;
 		}
 #endif
 #ifdef INET6
 		if ((ifa->address.sa.sa_family == AF_INET6) &&
 		    (stcb != NULL) &&
 		    (prison_check_ip6(inp->ip_inp.inp.inp_cred,
 		    &ifa->address.sin6.sin6_addr) != 0)) {
 			continue;
 		}
 #endif
 		if ((ifa->localifa_flags & SCTP_ADDR_DEFER_USE) &&
 		    (non_asoc_addr_ok == 0)) {
 			continue;
 		}
 		sifa = sctp_is_ifa_addr_preferred(ifa, dest_is_loop,
 		    dest_is_priv, fam);
 		if (sifa == NULL) {
 			continue;
 		}
 		if (stcb) {
 			if (sctp_is_address_in_scope(ifa, &stcb->asoc.scope, 0) == 0) {
 				continue;
 			}
 			if (((non_asoc_addr_ok == 0) &&
 			    (sctp_is_addr_restricted(stcb, sifa))) ||
 			    (non_asoc_addr_ok &&
 			    (sctp_is_addr_restricted(stcb, sifa)) &&
 			    (!sctp_is_addr_pending(stcb, sifa)))) {
 				/*
 				 * It is restricted for some reason..
 				 * probably not yet added.
 				 */
 				continue;
 			}
 		}
 		num_eligible_addr++;
 	}
 	return (num_eligible_addr);
 }
 
 static struct sctp_ifa *
 sctp_choose_boundall(struct sctp_inpcb *inp,
     struct sctp_tcb *stcb,
     struct sctp_nets *net,
     sctp_route_t * ro,
     uint32_t vrf_id,
     uint8_t dest_is_priv,
     uint8_t dest_is_loop,
     int non_asoc_addr_ok,
     sa_family_t fam)
 {
 	int cur_addr_num = 0, num_preferred = 0;
 	void *ifn;
 	struct sctp_ifn *sctp_ifn, *looked_at = NULL, *emit_ifn;
 	struct sctp_ifa *sctp_ifa, *sifa;
 	uint32_t ifn_index;
 	struct sctp_vrf *vrf;
 
 #ifdef INET
 	int retried = 0;
 
 #endif
 
 	/*-
 	 * For boundall we can use any address in the association.
 	 * If non_asoc_addr_ok is set we can use any address (at least in
 	 * theory). So we look for preferred addresses first. If we find one,
 	 * we use it. Otherwise we next try to get an address on the
 	 * interface, which we should be able to do (unless non_asoc_addr_ok
 	 * is false and we are routed out that way). In these cases where we
 	 * can't use the address of the interface we go through all the
 	 * ifn's looking for an address we can use and fill that in. Punting
 	 * means we send back address 0, which will probably cause problems
 	 * actually since then IP will fill in the address of the route ifn,
 	 * which means we probably already rejected it.. i.e. here comes an
 	 * abort :-<.
 	 */
 	vrf = sctp_find_vrf(vrf_id);
 	if (vrf == NULL)
 		return (NULL);
 
 	ifn = SCTP_GET_IFN_VOID_FROM_ROUTE(ro);
 	ifn_index = SCTP_GET_IF_INDEX_FROM_ROUTE(ro);
 	SCTPDBG(SCTP_DEBUG_OUTPUT2, "ifn from route:%p ifn_index:%d\n", ifn, ifn_index);
 	emit_ifn = looked_at = sctp_ifn = sctp_find_ifn(ifn, ifn_index);
 	if (sctp_ifn == NULL) {
 		/* ?? We don't have this guy ?? */
 		SCTPDBG(SCTP_DEBUG_OUTPUT2, "No ifn emit interface?\n");
 		goto bound_all_plan_b;
 	}
 	SCTPDBG(SCTP_DEBUG_OUTPUT2, "ifn_index:%d name:%s is emit interface\n",
 	    ifn_index, sctp_ifn->ifn_name);
 
 	if (net) {
 		cur_addr_num = net->indx_of_eligible_next_to_use;
 	}
 	num_preferred = sctp_count_num_preferred_boundall(sctp_ifn,
 	    inp, stcb,
 	    non_asoc_addr_ok,
 	    dest_is_loop,
 	    dest_is_priv, fam);
 	SCTPDBG(SCTP_DEBUG_OUTPUT2, "Found %d preferred source addresses for intf:%s\n",
 	    num_preferred, sctp_ifn->ifn_name);
 	if (num_preferred == 0) {
 		/*
 		 * no eligible addresses, we must use some other interface
 		 * address if we can find one.
 		 */
 		goto bound_all_plan_b;
 	}
 	/*
 	 * Ok we have num_eligible_addr set with how many we can use, this
 	 * may vary from call to call due to addresses being deprecated
 	 * etc..
 	 */
 	if (cur_addr_num >= num_preferred) {
 		cur_addr_num = 0;
 	}
 	/*
 	 * select the nth address from the list (where cur_addr_num is the
 	 * nth) and 0 is the first one, 1 is the second one etc...
 	 */
 	SCTPDBG(SCTP_DEBUG_OUTPUT2, "cur_addr_num:%d\n", cur_addr_num);
 
 	sctp_ifa = sctp_select_nth_preferred_addr_from_ifn_boundall(sctp_ifn, inp, stcb, non_asoc_addr_ok, dest_is_loop,
 	    dest_is_priv, cur_addr_num, fam, ro);
 
 	/* if sctp_ifa is NULL something changed??, fall to plan b. */
 	if (sctp_ifa) {
 		atomic_add_int(&sctp_ifa->refcount, 1);
 		if (net) {
 			/* save off where the next one we will want */
 			net->indx_of_eligible_next_to_use = cur_addr_num + 1;
 		}
 		return (sctp_ifa);
 	}
 	/*
 	 * plan_b: Look at all interfaces and find a preferred address. If
 	 * no preferred fall through to plan_c.
 	 */
 bound_all_plan_b:
 	SCTPDBG(SCTP_DEBUG_OUTPUT2, "Trying Plan B\n");
 	LIST_FOREACH(sctp_ifn, &vrf->ifnlist, next_ifn) {
 		SCTPDBG(SCTP_DEBUG_OUTPUT2, "Examine interface %s\n",
 		    sctp_ifn->ifn_name);
 		if (dest_is_loop == 0 && SCTP_IFN_IS_IFT_LOOP(sctp_ifn)) {
 			/* wrong base scope */
 			SCTPDBG(SCTP_DEBUG_OUTPUT2, "skip\n");
 			continue;
 		}
 		if ((sctp_ifn == looked_at) && looked_at) {
 			/* already looked at this guy */
 			SCTPDBG(SCTP_DEBUG_OUTPUT2, "already seen\n");
 			continue;
 		}
 		num_preferred = sctp_count_num_preferred_boundall(sctp_ifn, inp, stcb, non_asoc_addr_ok,
 		    dest_is_loop, dest_is_priv, fam);
 		SCTPDBG(SCTP_DEBUG_OUTPUT2,
 		    "Found ifn:%p %d preferred source addresses\n",
 		    ifn, num_preferred);
 		if (num_preferred == 0) {
 			/* None on this interface. */
 			SCTPDBG(SCTP_DEBUG_OUTPUT2, "No prefered -- skipping to next\n");
 			continue;
 		}
 		SCTPDBG(SCTP_DEBUG_OUTPUT2,
 		    "num preferred:%d on interface:%p cur_addr_num:%d\n",
 		    num_preferred, (void *)sctp_ifn, cur_addr_num);
 
 		/*
 		 * Ok we have num_eligible_addr set with how many we can
 		 * use, this may vary from call to call due to addresses
 		 * being deprecated etc..
 		 */
 		if (cur_addr_num >= num_preferred) {
 			cur_addr_num = 0;
 		}
 		sifa = sctp_select_nth_preferred_addr_from_ifn_boundall(sctp_ifn, inp, stcb, non_asoc_addr_ok, dest_is_loop,
 		    dest_is_priv, cur_addr_num, fam, ro);
 		if (sifa == NULL)
 			continue;
 		if (net) {
 			net->indx_of_eligible_next_to_use = cur_addr_num + 1;
 			SCTPDBG(SCTP_DEBUG_OUTPUT2, "we selected %d\n",
 			    cur_addr_num);
 			SCTPDBG(SCTP_DEBUG_OUTPUT2, "Source:");
 			SCTPDBG_ADDR(SCTP_DEBUG_OUTPUT2, &sifa->address.sa);
 			SCTPDBG(SCTP_DEBUG_OUTPUT2, "Dest:");
 			SCTPDBG_ADDR(SCTP_DEBUG_OUTPUT2, &net->ro._l_addr.sa);
 		}
 		atomic_add_int(&sifa->refcount, 1);
 		return (sifa);
 	}
 #ifdef INET
 again_with_private_addresses_allowed:
 #endif
 	/* plan_c: do we have an acceptable address on the emit interface */
 	sifa = NULL;
 	SCTPDBG(SCTP_DEBUG_OUTPUT2, "Trying Plan C: find acceptable on interface\n");
 	if (emit_ifn == NULL) {
 		SCTPDBG(SCTP_DEBUG_OUTPUT2, "Jump to Plan D - no emit_ifn\n");
 		goto plan_d;
 	}
 	LIST_FOREACH(sctp_ifa, &emit_ifn->ifalist, next_ifa) {
 		SCTPDBG(SCTP_DEBUG_OUTPUT2, "ifa:%p\n", (void *)sctp_ifa);
 #ifdef INET
 		if ((sctp_ifa->address.sa.sa_family == AF_INET) &&
 		    (prison_check_ip4(inp->ip_inp.inp.inp_cred,
 		    &sctp_ifa->address.sin.sin_addr) != 0)) {
 			SCTPDBG(SCTP_DEBUG_OUTPUT2, "Jailed\n");
 			continue;
 		}
 #endif
 #ifdef INET6
 		if ((sctp_ifa->address.sa.sa_family == AF_INET6) &&
 		    (prison_check_ip6(inp->ip_inp.inp.inp_cred,
 		    &sctp_ifa->address.sin6.sin6_addr) != 0)) {
 			SCTPDBG(SCTP_DEBUG_OUTPUT2, "Jailed\n");
 			continue;
 		}
 #endif
 		if ((sctp_ifa->localifa_flags & SCTP_ADDR_DEFER_USE) &&
 		    (non_asoc_addr_ok == 0)) {
 			SCTPDBG(SCTP_DEBUG_OUTPUT2, "Defer\n");
 			continue;
 		}
 		sifa = sctp_is_ifa_addr_acceptable(sctp_ifa, dest_is_loop,
 		    dest_is_priv, fam);
 		if (sifa == NULL) {
 			SCTPDBG(SCTP_DEBUG_OUTPUT2, "IFA not acceptable\n");
 			continue;
 		}
 		if (stcb) {
 			if (sctp_is_address_in_scope(sifa, &stcb->asoc.scope, 0) == 0) {
 				SCTPDBG(SCTP_DEBUG_OUTPUT2, "NOT in scope\n");
 				sifa = NULL;
 				continue;
 			}
 			if (((non_asoc_addr_ok == 0) &&
 			    (sctp_is_addr_restricted(stcb, sifa))) ||
 			    (non_asoc_addr_ok &&
 			    (sctp_is_addr_restricted(stcb, sifa)) &&
 			    (!sctp_is_addr_pending(stcb, sifa)))) {
 				/*
 				 * It is restricted for some reason..
 				 * probably not yet added.
 				 */
 				SCTPDBG(SCTP_DEBUG_OUTPUT2, "Its resticted\n");
 				sifa = NULL;
 				continue;
 			}
 		} else {
 			SCTP_PRINTF("Stcb is null - no print\n");
 		}
 		atomic_add_int(&sifa->refcount, 1);
 		goto out;
 	}
 plan_d:
 	/*
 	 * plan_d: We are in trouble. No preferred address on the emit
 	 * interface. And not even a preferred address on all interfaces. Go
 	 * out and see if we can find an acceptable address somewhere
 	 * amongst all interfaces.
 	 */
 	SCTPDBG(SCTP_DEBUG_OUTPUT2, "Trying Plan D looked_at is %p\n", (void *)looked_at);
 	LIST_FOREACH(sctp_ifn, &vrf->ifnlist, next_ifn) {
 		if (dest_is_loop == 0 && SCTP_IFN_IS_IFT_LOOP(sctp_ifn)) {
 			/* wrong base scope */
 			continue;
 		}
 		LIST_FOREACH(sctp_ifa, &sctp_ifn->ifalist, next_ifa) {
 #ifdef INET
 			if ((sctp_ifa->address.sa.sa_family == AF_INET) &&
 			    (prison_check_ip4(inp->ip_inp.inp.inp_cred,
 			    &sctp_ifa->address.sin.sin_addr) != 0)) {
 				continue;
 			}
 #endif
 #ifdef INET6
 			if ((sctp_ifa->address.sa.sa_family == AF_INET6) &&
 			    (prison_check_ip6(inp->ip_inp.inp.inp_cred,
 			    &sctp_ifa->address.sin6.sin6_addr) != 0)) {
 				continue;
 			}
 #endif
 			if ((sctp_ifa->localifa_flags & SCTP_ADDR_DEFER_USE) &&
 			    (non_asoc_addr_ok == 0))
 				continue;
 			sifa = sctp_is_ifa_addr_acceptable(sctp_ifa,
 			    dest_is_loop,
 			    dest_is_priv, fam);
 			if (sifa == NULL)
 				continue;
 			if (stcb) {
 				if (sctp_is_address_in_scope(sifa, &stcb->asoc.scope, 0) == 0) {
 					sifa = NULL;
 					continue;
 				}
 				if (((non_asoc_addr_ok == 0) &&
 				    (sctp_is_addr_restricted(stcb, sifa))) ||
 				    (non_asoc_addr_ok &&
 				    (sctp_is_addr_restricted(stcb, sifa)) &&
 				    (!sctp_is_addr_pending(stcb, sifa)))) {
 					/*
 					 * It is restricted for some
 					 * reason.. probably not yet added.
 					 */
 					sifa = NULL;
 					continue;
 				}
 			}
 			goto out;
 		}
 	}
 #ifdef INET
 	if ((retried == 0) && (stcb->asoc.scope.ipv4_local_scope == 0)) {
 		stcb->asoc.scope.ipv4_local_scope = 1;
 		retried = 1;
 		goto again_with_private_addresses_allowed;
 	} else if (retried == 1) {
 		stcb->asoc.scope.ipv4_local_scope = 0;
 	}
 #endif
 out:
 #ifdef INET
 	if (sifa) {
 		if (retried == 1) {
 			LIST_FOREACH(sctp_ifn, &vrf->ifnlist, next_ifn) {
 				if (dest_is_loop == 0 && SCTP_IFN_IS_IFT_LOOP(sctp_ifn)) {
 					/* wrong base scope */
 					continue;
 				}
 				LIST_FOREACH(sctp_ifa, &sctp_ifn->ifalist, next_ifa) {
 					struct sctp_ifa *tmp_sifa;
 
 #ifdef INET
 					if ((sctp_ifa->address.sa.sa_family == AF_INET) &&
 					    (prison_check_ip4(inp->ip_inp.inp.inp_cred,
 					    &sctp_ifa->address.sin.sin_addr) != 0)) {
 						continue;
 					}
 #endif
 #ifdef INET6
 					if ((sctp_ifa->address.sa.sa_family == AF_INET6) &&
 					    (prison_check_ip6(inp->ip_inp.inp.inp_cred,
 					    &sctp_ifa->address.sin6.sin6_addr) != 0)) {
 						continue;
 					}
 #endif
 					if ((sctp_ifa->localifa_flags & SCTP_ADDR_DEFER_USE) &&
 					    (non_asoc_addr_ok == 0))
 						continue;
 					tmp_sifa = sctp_is_ifa_addr_acceptable(sctp_ifa,
 					    dest_is_loop,
 					    dest_is_priv, fam);
 					if (tmp_sifa == NULL) {
 						continue;
 					}
 					if (tmp_sifa == sifa) {
 						continue;
 					}
 					if (stcb) {
 						if (sctp_is_address_in_scope(tmp_sifa,
 						    &stcb->asoc.scope, 0) == 0) {
 							continue;
 						}
 						if (((non_asoc_addr_ok == 0) &&
 						    (sctp_is_addr_restricted(stcb, tmp_sifa))) ||
 						    (non_asoc_addr_ok &&
 						    (sctp_is_addr_restricted(stcb, tmp_sifa)) &&
 						    (!sctp_is_addr_pending(stcb, tmp_sifa)))) {
 							/*
 							 * It is restricted
 							 * for some reason..
 							 * probably not yet
 							 * added.
 							 */
 							continue;
 						}
 					}
 					if ((tmp_sifa->address.sin.sin_family == AF_INET) &&
 					    (IN4_ISPRIVATE_ADDRESS(&(tmp_sifa->address.sin.sin_addr)))) {
 						sctp_add_local_addr_restricted(stcb, tmp_sifa);
 					}
 				}
 			}
 		}
 		atomic_add_int(&sifa->refcount, 1);
 	}
 #endif
 	return (sifa);
 }
 
 
 
 /* tcb may be NULL */
 struct sctp_ifa *
 sctp_source_address_selection(struct sctp_inpcb *inp,
     struct sctp_tcb *stcb,
     sctp_route_t * ro,
     struct sctp_nets *net,
     int non_asoc_addr_ok, uint32_t vrf_id)
 {
 	struct sctp_ifa *answer;
 	uint8_t dest_is_priv, dest_is_loop;
 	sa_family_t fam;
 
 #ifdef INET
 	struct sockaddr_in *to = (struct sockaddr_in *)&ro->ro_dst;
 
 #endif
 #ifdef INET6
 	struct sockaddr_in6 *to6 = (struct sockaddr_in6 *)&ro->ro_dst;
 
 #endif
 
 	/**
 	 * Rules: - Find the route if needed, cache if I can. - Look at
 	 * interface address in route, Is it in the bound list. If so we
 	 * have the best source. - If not we must rotate amongst the
 	 * addresses.
 	 *
 	 * Cavets and issues
 	 *
 	 * Do we need to pay attention to scope. We can have a private address
 	 * or a global address we are sourcing or sending to. So if we draw
 	 * it out
 	 * zzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzz
 	 * For V4
 	 * ------------------------------------------
 	 *      source     *      dest  *  result
 	 * -----------------------------------------
 	 * <a>  Private    *    Global  *  NAT
 	 * -----------------------------------------
 	 * <b>  Private    *    Private *  No problem
 	 * -----------------------------------------
 	 * <c>  Global     *    Private *  Huh, How will this work?
 	 * -----------------------------------------
 	 * <d>  Global     *    Global  *  No Problem
 	 *------------------------------------------
 	 * zzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzz
 	 * For V6
 	 *------------------------------------------
 	 *      source     *      dest  *  result
 	 * -----------------------------------------
 	 * <a>  Linklocal  *    Global  *
 	 * -----------------------------------------
 	 * <b>  Linklocal  * Linklocal  *  No problem
 	 * -----------------------------------------
 	 * <c>  Global     * Linklocal  *  Huh, How will this work?
 	 * -----------------------------------------
 	 * <d>  Global     *    Global  *  No Problem
 	 *------------------------------------------
 	 * zzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzz
 	 *
 	 * And then we add to that what happens if there are multiple addresses
 	 * assigned to an interface. Remember the ifa on a ifn is a linked
 	 * list of addresses. So one interface can have more than one IP
 	 * address. What happens if we have both a private and a global
 	 * address? Do we then use context of destination to sort out which
 	 * one is best? And what about NAT's sending P->G may get you a NAT
 	 * translation, or should you select the G thats on the interface in
 	 * preference.
 	 *
 	 * Decisions:
 	 *
 	 * - count the number of addresses on the interface.
 	 * - if it is one, no problem except case <c>.
 	 *   For <a> we will assume a NAT out there.
 	 * - if there are more than one, then we need to worry about scope P
 	 *   or G. We should prefer G -> G and P -> P if possible.
 	 *   Then as a secondary fall back to mixed types G->P being a last
 	 *   ditch one.
 	 * - The above all works for bound all, but bound specific we need to
 	 *   use the same concept but instead only consider the bound
 	 *   addresses. If the bound set is NOT assigned to the interface then
 	 *   we must use rotation amongst the bound addresses..
 	 */
 	if (ro->ro_rt == NULL) {
 		/*
 		 * Need a route to cache.
 		 */
 		SCTP_RTALLOC(ro, vrf_id);
 	}
 	if (ro->ro_rt == NULL) {
 		return (NULL);
 	}
 	fam = ro->ro_dst.sa_family;
 	dest_is_priv = dest_is_loop = 0;
 	/* Setup our scopes for the destination */
 	switch (fam) {
 #ifdef INET
 	case AF_INET:
 		/* Scope based on outbound address */
 		if (IN4_ISLOOPBACK_ADDRESS(&to->sin_addr)) {
 			dest_is_loop = 1;
 			if (net != NULL) {
 				/* mark it as local */
 				net->addr_is_local = 1;
 			}
 		} else if ((IN4_ISPRIVATE_ADDRESS(&to->sin_addr))) {
 			dest_is_priv = 1;
 		}
 		break;
 #endif
 #ifdef INET6
 	case AF_INET6:
 		/* Scope based on outbound address */
 		if (IN6_IS_ADDR_LOOPBACK(&to6->sin6_addr) ||
 		    SCTP_ROUTE_IS_REAL_LOOP(ro)) {
 			/*
 			 * If the address is a loopback address, which
 			 * consists of "::1" OR "fe80::1%lo0", we are
 			 * loopback scope. But we don't use dest_is_priv
 			 * (link local addresses).
 			 */
 			dest_is_loop = 1;
 			if (net != NULL) {
 				/* mark it as local */
 				net->addr_is_local = 1;
 			}
 		} else if (IN6_IS_ADDR_LINKLOCAL(&to6->sin6_addr)) {
 			dest_is_priv = 1;
 		}
 		break;
 #endif
 	}
 	SCTPDBG(SCTP_DEBUG_OUTPUT2, "Select source addr for:");
 	SCTPDBG_ADDR(SCTP_DEBUG_OUTPUT2, (struct sockaddr *)&ro->ro_dst);
 	SCTP_IPI_ADDR_RLOCK();
 	if (inp->sctp_flags & SCTP_PCB_FLAGS_BOUNDALL) {
 		/*
 		 * Bound all case
 		 */
 		answer = sctp_choose_boundall(inp, stcb, net, ro, vrf_id,
 		    dest_is_priv, dest_is_loop,
 		    non_asoc_addr_ok, fam);
 		SCTP_IPI_ADDR_RUNLOCK();
 		return (answer);
 	}
 	/*
 	 * Subset bound case
 	 */
 	if (stcb) {
 		answer = sctp_choose_boundspecific_stcb(inp, stcb, ro,
 		    vrf_id, dest_is_priv,
 		    dest_is_loop,
 		    non_asoc_addr_ok, fam);
 	} else {
 		answer = sctp_choose_boundspecific_inp(inp, ro, vrf_id,
 		    non_asoc_addr_ok,
 		    dest_is_priv,
 		    dest_is_loop, fam);
 	}
 	SCTP_IPI_ADDR_RUNLOCK();
 	return (answer);
 }
 
 static int
 sctp_find_cmsg(int c_type, void *data, struct mbuf *control, size_t cpsize)
 {
 	struct cmsghdr cmh;
 	int tlen, at, found;
 	struct sctp_sndinfo sndinfo;
 	struct sctp_prinfo prinfo;
 	struct sctp_authinfo authinfo;
 
 	tlen = SCTP_BUF_LEN(control);
 	at = 0;
 	found = 0;
 	/*
 	 * Independent of how many mbufs, find the c_type inside the control
 	 * structure and copy out the data.
 	 */
 	while (at < tlen) {
 		if ((tlen - at) < (int)CMSG_ALIGN(sizeof(cmh))) {
 			/* There is not enough room for one more. */
 			return (found);
 		}
 		m_copydata(control, at, sizeof(cmh), (caddr_t)&cmh);
 		if (cmh.cmsg_len < CMSG_ALIGN(sizeof(cmh))) {
 			/* We dont't have a complete CMSG header. */
 			return (found);
 		}
 		if (((int)cmh.cmsg_len + at) > tlen) {
 			/* We don't have the complete CMSG. */
 			return (found);
 		}
 		if ((cmh.cmsg_level == IPPROTO_SCTP) &&
 		    ((c_type == cmh.cmsg_type) ||
 		    ((c_type == SCTP_SNDRCV) &&
 		    ((cmh.cmsg_type == SCTP_SNDINFO) ||
 		    (cmh.cmsg_type == SCTP_PRINFO) ||
 		    (cmh.cmsg_type == SCTP_AUTHINFO))))) {
 			if (c_type == cmh.cmsg_type) {
 				if ((size_t)(cmh.cmsg_len - CMSG_ALIGN(sizeof(cmh))) < cpsize) {
 					return (found);
 				}
 				/* It is exactly what we want. Copy it out. */
 				m_copydata(control, at + CMSG_ALIGN(sizeof(cmh)), cpsize, (caddr_t)data);
 				return (1);
 			} else {
 				struct sctp_sndrcvinfo *sndrcvinfo;
 
 				sndrcvinfo = (struct sctp_sndrcvinfo *)data;
 				if (found == 0) {
 					if (cpsize < sizeof(struct sctp_sndrcvinfo)) {
 						return (found);
 					}
 					memset(sndrcvinfo, 0, sizeof(struct sctp_sndrcvinfo));
 				}
 				switch (cmh.cmsg_type) {
 				case SCTP_SNDINFO:
 					if ((size_t)(cmh.cmsg_len - CMSG_ALIGN(sizeof(cmh))) < sizeof(struct sctp_sndinfo)) {
 						return (found);
 					}
 					m_copydata(control, at + CMSG_ALIGN(sizeof(cmh)), sizeof(struct sctp_sndinfo), (caddr_t)&sndinfo);
 					sndrcvinfo->sinfo_stream = sndinfo.snd_sid;
 					sndrcvinfo->sinfo_flags = sndinfo.snd_flags;
 					sndrcvinfo->sinfo_ppid = sndinfo.snd_ppid;
 					sndrcvinfo->sinfo_context = sndinfo.snd_context;
 					sndrcvinfo->sinfo_assoc_id = sndinfo.snd_assoc_id;
 					break;
 				case SCTP_PRINFO:
 					if ((size_t)(cmh.cmsg_len - CMSG_ALIGN(sizeof(cmh))) < sizeof(struct sctp_prinfo)) {
 						return (found);
 					}
 					m_copydata(control, at + CMSG_ALIGN(sizeof(cmh)), sizeof(struct sctp_prinfo), (caddr_t)&prinfo);
 					if (prinfo.pr_policy != SCTP_PR_SCTP_NONE) {
 						sndrcvinfo->sinfo_timetolive = prinfo.pr_value;
 					} else {
 						sndrcvinfo->sinfo_timetolive = 0;
 					}
 					sndrcvinfo->sinfo_flags |= prinfo.pr_policy;
 					break;
 				case SCTP_AUTHINFO:
 					if ((size_t)(cmh.cmsg_len - CMSG_ALIGN(sizeof(cmh))) < sizeof(struct sctp_authinfo)) {
 						return (found);
 					}
 					m_copydata(control, at + CMSG_ALIGN(sizeof(cmh)), sizeof(struct sctp_authinfo), (caddr_t)&authinfo);
 					sndrcvinfo->sinfo_keynumber_valid = 1;
 					sndrcvinfo->sinfo_keynumber = authinfo.auth_keynumber;
 					break;
 				default:
 					return (found);
 				}
 				found = 1;
 			}
 		}
 		at += CMSG_ALIGN(cmh.cmsg_len);
 	}
 	return (found);
 }
 
 static int
 sctp_process_cmsgs_for_init(struct sctp_tcb *stcb, struct mbuf *control, int *error)
 {
 	struct cmsghdr cmh;
 	int tlen, at;
 	struct sctp_initmsg initmsg;
 
 #ifdef INET
 	struct sockaddr_in sin;
 
 #endif
 #ifdef INET6
 	struct sockaddr_in6 sin6;
 
 #endif
 
 	tlen = SCTP_BUF_LEN(control);
 	at = 0;
 	while (at < tlen) {
 		if ((tlen - at) < (int)CMSG_ALIGN(sizeof(cmh))) {
 			/* There is not enough room for one more. */
 			*error = EINVAL;
 			return (1);
 		}
 		m_copydata(control, at, sizeof(cmh), (caddr_t)&cmh);
 		if (cmh.cmsg_len < CMSG_ALIGN(sizeof(cmh))) {
 			/* We dont't have a complete CMSG header. */
 			*error = EINVAL;
 			return (1);
 		}
 		if (((int)cmh.cmsg_len + at) > tlen) {
 			/* We don't have the complete CMSG. */
 			*error = EINVAL;
 			return (1);
 		}
 		if (cmh.cmsg_level == IPPROTO_SCTP) {
 			switch (cmh.cmsg_type) {
 			case SCTP_INIT:
 				if ((size_t)(cmh.cmsg_len - CMSG_ALIGN(sizeof(cmh))) < sizeof(struct sctp_initmsg)) {
 					*error = EINVAL;
 					return (1);
 				}
 				m_copydata(control, at + CMSG_ALIGN(sizeof(cmh)), sizeof(struct sctp_initmsg), (caddr_t)&initmsg);
 				if (initmsg.sinit_max_attempts)
 					stcb->asoc.max_init_times = initmsg.sinit_max_attempts;
 				if (initmsg.sinit_num_ostreams)
 					stcb->asoc.pre_open_streams = initmsg.sinit_num_ostreams;
 				if (initmsg.sinit_max_instreams)
 					stcb->asoc.max_inbound_streams = initmsg.sinit_max_instreams;
 				if (initmsg.sinit_max_init_timeo)
 					stcb->asoc.initial_init_rto_max = initmsg.sinit_max_init_timeo;
 				if (stcb->asoc.streamoutcnt < stcb->asoc.pre_open_streams) {
 					struct sctp_stream_out *tmp_str;
 					unsigned int i;
 
 #if defined(SCTP_DETAILED_STR_STATS)
 					int j;
 
 #endif
 
 					/* Default is NOT correct */
 					SCTPDBG(SCTP_DEBUG_OUTPUT1, "Ok, default:%d pre_open:%d\n",
 					    stcb->asoc.streamoutcnt, stcb->asoc.pre_open_streams);
 					SCTP_TCB_UNLOCK(stcb);
 					SCTP_MALLOC(tmp_str,
 					    struct sctp_stream_out *,
 					    (stcb->asoc.pre_open_streams * sizeof(struct sctp_stream_out)),
 					    SCTP_M_STRMO);
 					SCTP_TCB_LOCK(stcb);
 					if (tmp_str != NULL) {
 						SCTP_FREE(stcb->asoc.strmout, SCTP_M_STRMO);
 						stcb->asoc.strmout = tmp_str;
 						stcb->asoc.strm_realoutsize = stcb->asoc.streamoutcnt = stcb->asoc.pre_open_streams;
 					} else {
 						stcb->asoc.pre_open_streams = stcb->asoc.streamoutcnt;
 					}
 					for (i = 0; i < stcb->asoc.streamoutcnt; i++) {
 						TAILQ_INIT(&stcb->asoc.strmout[i].outqueue);
 						stcb->asoc.strmout[i].chunks_on_queues = 0;
 						stcb->asoc.strmout[i].next_sequence_send = 0;
 #if defined(SCTP_DETAILED_STR_STATS)
 						for (j = 0; j < SCTP_PR_SCTP_MAX + 1; j++) {
 							stcb->asoc.strmout[i].abandoned_sent[j] = 0;
 							stcb->asoc.strmout[i].abandoned_unsent[j] = 0;
 						}
 #else
 						stcb->asoc.strmout[i].abandoned_sent[0] = 0;
 						stcb->asoc.strmout[i].abandoned_unsent[0] = 0;
 #endif
 						stcb->asoc.strmout[i].stream_no = i;
 						stcb->asoc.strmout[i].last_msg_incomplete = 0;
 						stcb->asoc.ss_functions.sctp_ss_init_stream(&stcb->asoc.strmout[i], NULL);
 					}
 				}
 				break;
 #ifdef INET
 			case SCTP_DSTADDRV4:
 				if ((size_t)(cmh.cmsg_len - CMSG_ALIGN(sizeof(cmh))) < sizeof(struct in_addr)) {
 					*error = EINVAL;
 					return (1);
 				}
 				memset(&sin, 0, sizeof(struct sockaddr_in));
 				sin.sin_family = AF_INET;
 				sin.sin_len = sizeof(struct sockaddr_in);
 				sin.sin_port = stcb->rport;
 				m_copydata(control, at + CMSG_ALIGN(sizeof(cmh)), sizeof(struct in_addr), (caddr_t)&sin.sin_addr);
 				if ((sin.sin_addr.s_addr == INADDR_ANY) ||
 				    (sin.sin_addr.s_addr == INADDR_BROADCAST) ||
 				    IN_MULTICAST(ntohl(sin.sin_addr.s_addr))) {
 					*error = EINVAL;
 					return (1);
 				}
 				if (sctp_add_remote_addr(stcb, (struct sockaddr *)&sin, NULL,
 				    SCTP_DONOT_SETSCOPE, SCTP_ADDR_IS_CONFIRMED)) {
 					*error = ENOBUFS;
 					return (1);
 				}
 				break;
 #endif
 #ifdef INET6
 			case SCTP_DSTADDRV6:
 				if ((size_t)(cmh.cmsg_len - CMSG_ALIGN(sizeof(cmh))) < sizeof(struct in6_addr)) {
 					*error = EINVAL;
 					return (1);
 				}
 				memset(&sin6, 0, sizeof(struct sockaddr_in6));
 				sin6.sin6_family = AF_INET6;
 				sin6.sin6_len = sizeof(struct sockaddr_in6);
 				sin6.sin6_port = stcb->rport;
 				m_copydata(control, at + CMSG_ALIGN(sizeof(cmh)), sizeof(struct in6_addr), (caddr_t)&sin6.sin6_addr);
 				if (IN6_IS_ADDR_UNSPECIFIED(&sin6.sin6_addr) ||
 				    IN6_IS_ADDR_MULTICAST(&sin6.sin6_addr)) {
 					*error = EINVAL;
 					return (1);
 				}
 #ifdef INET
 				if (IN6_IS_ADDR_V4MAPPED(&sin6.sin6_addr)) {
 					in6_sin6_2_sin(&sin, &sin6);
 					if ((sin.sin_addr.s_addr == INADDR_ANY) ||
 					    (sin.sin_addr.s_addr == INADDR_BROADCAST) ||
 					    IN_MULTICAST(ntohl(sin.sin_addr.s_addr))) {
 						*error = EINVAL;
 						return (1);
 					}
 					if (sctp_add_remote_addr(stcb, (struct sockaddr *)&sin, NULL,
 					    SCTP_DONOT_SETSCOPE, SCTP_ADDR_IS_CONFIRMED)) {
 						*error = ENOBUFS;
 						return (1);
 					}
 				} else
 #endif
 					if (sctp_add_remote_addr(stcb, (struct sockaddr *)&sin6, NULL,
 				    SCTP_DONOT_SETSCOPE, SCTP_ADDR_IS_CONFIRMED)) {
 					*error = ENOBUFS;
 					return (1);
 				}
 				break;
 #endif
 			default:
 				break;
 			}
 		}
 		at += CMSG_ALIGN(cmh.cmsg_len);
 	}
 	return (0);
 }
 
 static struct sctp_tcb *
 sctp_findassociation_cmsgs(struct sctp_inpcb **inp_p,
     uint16_t port,
     struct mbuf *control,
     struct sctp_nets **net_p,
     int *error)
 {
 	struct cmsghdr cmh;
 	int tlen, at;
 	struct sctp_tcb *stcb;
 	struct sockaddr *addr;
 
 #ifdef INET
 	struct sockaddr_in sin;
 
 #endif
 #ifdef INET6
 	struct sockaddr_in6 sin6;
 
 #endif
 
 	tlen = SCTP_BUF_LEN(control);
 	at = 0;
 	while (at < tlen) {
 		if ((tlen - at) < (int)CMSG_ALIGN(sizeof(cmh))) {
 			/* There is not enough room for one more. */
 			*error = EINVAL;
 			return (NULL);
 		}
 		m_copydata(control, at, sizeof(cmh), (caddr_t)&cmh);
 		if (cmh.cmsg_len < CMSG_ALIGN(sizeof(cmh))) {
 			/* We dont't have a complete CMSG header. */
 			*error = EINVAL;
 			return (NULL);
 		}
 		if (((int)cmh.cmsg_len + at) > tlen) {
 			/* We don't have the complete CMSG. */
 			*error = EINVAL;
 			return (NULL);
 		}
 		if (cmh.cmsg_level == IPPROTO_SCTP) {
 			switch (cmh.cmsg_type) {
 #ifdef INET
 			case SCTP_DSTADDRV4:
 				if ((size_t)(cmh.cmsg_len - CMSG_ALIGN(sizeof(cmh))) < sizeof(struct in_addr)) {
 					*error = EINVAL;
 					return (NULL);
 				}
 				memset(&sin, 0, sizeof(struct sockaddr_in));
 				sin.sin_family = AF_INET;
 				sin.sin_len = sizeof(struct sockaddr_in);
 				sin.sin_port = port;
 				m_copydata(control, at + CMSG_ALIGN(sizeof(cmh)), sizeof(struct in_addr), (caddr_t)&sin.sin_addr);
 				addr = (struct sockaddr *)&sin;
 				break;
 #endif
 #ifdef INET6
 			case SCTP_DSTADDRV6:
 				if ((size_t)(cmh.cmsg_len - CMSG_ALIGN(sizeof(cmh))) < sizeof(struct in6_addr)) {
 					*error = EINVAL;
 					return (NULL);
 				}
 				memset(&sin6, 0, sizeof(struct sockaddr_in6));
 				sin6.sin6_family = AF_INET6;
 				sin6.sin6_len = sizeof(struct sockaddr_in6);
 				sin6.sin6_port = port;
 				m_copydata(control, at + CMSG_ALIGN(sizeof(cmh)), sizeof(struct in6_addr), (caddr_t)&sin6.sin6_addr);
 #ifdef INET
 				if (IN6_IS_ADDR_V4MAPPED(&sin6.sin6_addr)) {
 					in6_sin6_2_sin(&sin, &sin6);
 					addr = (struct sockaddr *)&sin;
 				} else
 #endif
 					addr = (struct sockaddr *)&sin6;
 				break;
 #endif
 			default:
 				addr = NULL;
 				break;
 			}
 			if (addr) {
 				stcb = sctp_findassociation_ep_addr(inp_p, addr, net_p, NULL, NULL);
 				if (stcb != NULL) {
 					return (stcb);
 				}
 			}
 		}
 		at += CMSG_ALIGN(cmh.cmsg_len);
 	}
 	return (NULL);
 }
 
 static struct mbuf *
 sctp_add_cookie(struct mbuf *init, int init_offset,
     struct mbuf *initack, int initack_offset, struct sctp_state_cookie *stc_in, uint8_t ** signature)
 {
 	struct mbuf *copy_init, *copy_initack, *m_at, *sig, *mret;
 	struct sctp_state_cookie *stc;
 	struct sctp_paramhdr *ph;
 	uint8_t *foo;
 	int sig_offset;
 	uint16_t cookie_sz;
 
 	mret = sctp_get_mbuf_for_msg((sizeof(struct sctp_state_cookie) +
 	    sizeof(struct sctp_paramhdr)), 0,
 	    M_NOWAIT, 1, MT_DATA);
 	if (mret == NULL) {
 		return (NULL);
 	}
 	copy_init = SCTP_M_COPYM(init, init_offset, M_COPYALL, M_NOWAIT);
 	if (copy_init == NULL) {
 		sctp_m_freem(mret);
 		return (NULL);
 	}
 #ifdef SCTP_MBUF_LOGGING
 	if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_MBUF_LOGGING_ENABLE) {
 		sctp_log_mbc(copy_init, SCTP_MBUF_ICOPY);
 	}
 #endif
 	copy_initack = SCTP_M_COPYM(initack, initack_offset, M_COPYALL,
 	    M_NOWAIT);
 	if (copy_initack == NULL) {
 		sctp_m_freem(mret);
 		sctp_m_freem(copy_init);
 		return (NULL);
 	}
 #ifdef SCTP_MBUF_LOGGING
 	if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_MBUF_LOGGING_ENABLE) {
 		sctp_log_mbc(copy_initack, SCTP_MBUF_ICOPY);
 	}
 #endif
 	/* easy side we just drop it on the end */
 	ph = mtod(mret, struct sctp_paramhdr *);
 	SCTP_BUF_LEN(mret) = sizeof(struct sctp_state_cookie) +
 	    sizeof(struct sctp_paramhdr);
 	stc = (struct sctp_state_cookie *)((caddr_t)ph +
 	    sizeof(struct sctp_paramhdr));
 	ph->param_type = htons(SCTP_STATE_COOKIE);
 	ph->param_length = 0;	/* fill in at the end */
 	/* Fill in the stc cookie data */
 	memcpy(stc, stc_in, sizeof(struct sctp_state_cookie));
 
 	/* tack the INIT and then the INIT-ACK onto the chain */
 	cookie_sz = 0;
 	for (m_at = mret; m_at; m_at = SCTP_BUF_NEXT(m_at)) {
 		cookie_sz += SCTP_BUF_LEN(m_at);
 		if (SCTP_BUF_NEXT(m_at) == NULL) {
 			SCTP_BUF_NEXT(m_at) = copy_init;
 			break;
 		}
 	}
 	for (m_at = copy_init; m_at; m_at = SCTP_BUF_NEXT(m_at)) {
 		cookie_sz += SCTP_BUF_LEN(m_at);
 		if (SCTP_BUF_NEXT(m_at) == NULL) {
 			SCTP_BUF_NEXT(m_at) = copy_initack;
 			break;
 		}
 	}
 	for (m_at = copy_initack; m_at; m_at = SCTP_BUF_NEXT(m_at)) {
 		cookie_sz += SCTP_BUF_LEN(m_at);
 		if (SCTP_BUF_NEXT(m_at) == NULL) {
 			break;
 		}
 	}
 	sig = sctp_get_mbuf_for_msg(SCTP_SECRET_SIZE, 0, M_NOWAIT, 1, MT_DATA);
 	if (sig == NULL) {
 		/* no space, so free the entire chain */
 		sctp_m_freem(mret);
 		return (NULL);
 	}
 	SCTP_BUF_LEN(sig) = 0;
 	SCTP_BUF_NEXT(m_at) = sig;
 	sig_offset = 0;
 	foo = (uint8_t *) (mtod(sig, caddr_t)+sig_offset);
 	memset(foo, 0, SCTP_SIGNATURE_SIZE);
 	*signature = foo;
 	SCTP_BUF_LEN(sig) += SCTP_SIGNATURE_SIZE;
 	cookie_sz += SCTP_SIGNATURE_SIZE;
 	ph->param_length = htons(cookie_sz);
 	return (mret);
 }
 
 
 static uint8_t
 sctp_get_ect(struct sctp_tcb *stcb)
 {
 	if ((stcb != NULL) && (stcb->asoc.ecn_supported == 1)) {
 		return (SCTP_ECT0_BIT);
 	} else {
 		return (0);
 	}
 }
 
 #if defined(INET) || defined(INET6)
 static void
 sctp_handle_no_route(struct sctp_tcb *stcb,
     struct sctp_nets *net,
     int so_locked)
 {
 	SCTPDBG(SCTP_DEBUG_OUTPUT1, "dropped packet - no valid source addr\n");
 
 	if (net) {
 		SCTPDBG(SCTP_DEBUG_OUTPUT1, "Destination was ");
 		SCTPDBG_ADDR(SCTP_DEBUG_OUTPUT1, &net->ro._l_addr.sa);
 		if (net->dest_state & SCTP_ADDR_CONFIRMED) {
 			if ((net->dest_state & SCTP_ADDR_REACHABLE) && stcb) {
 				SCTPDBG(SCTP_DEBUG_OUTPUT1, "no route takes interface %p down\n", (void *)net);
 				sctp_ulp_notify(SCTP_NOTIFY_INTERFACE_DOWN,
 				    stcb, 0,
 				    (void *)net,
 				    so_locked);
 				net->dest_state &= ~SCTP_ADDR_REACHABLE;
 				net->dest_state &= ~SCTP_ADDR_PF;
 			}
 		}
 		if (stcb) {
 			if (net == stcb->asoc.primary_destination) {
 				/* need a new primary */
 				struct sctp_nets *alt;
 
 				alt = sctp_find_alternate_net(stcb, net, 0);
 				if (alt != net) {
 					if (stcb->asoc.alternate) {
 						sctp_free_remote_addr(stcb->asoc.alternate);
 					}
 					stcb->asoc.alternate = alt;
 					atomic_add_int(&stcb->asoc.alternate->ref_count, 1);
 					if (net->ro._s_addr) {
 						sctp_free_ifa(net->ro._s_addr);
 						net->ro._s_addr = NULL;
 					}
 					net->src_addr_selected = 0;
 				}
 			}
 		}
 	}
 }
 
 #endif
 
 static int
 sctp_lowlevel_chunk_output(struct sctp_inpcb *inp,
     struct sctp_tcb *stcb,	/* may be NULL */
     struct sctp_nets *net,
     struct sockaddr *to,
     struct mbuf *m,
     uint32_t auth_offset,
     struct sctp_auth_chunk *auth,
     uint16_t auth_keyid,
     int nofragment_flag,
     int ecn_ok,
     int out_of_asoc_ok,
     uint16_t src_port,
     uint16_t dest_port,
     uint32_t v_tag,
     uint16_t port,
     union sctp_sockstore *over_addr,
     uint8_t mflowtype, uint32_t mflowid,
 #if !defined(__APPLE__) && !defined(SCTP_SO_LOCK_TESTING)
     int so_locked SCTP_UNUSED
 #else
     int so_locked
 #endif
 )
 /* nofragment_flag to tell if IP_DF should be set (IPv4 only) */
 {
 	/**
 	 * Given a mbuf chain (via SCTP_BUF_NEXT()) that holds a packet header
 	 * WITH an SCTPHDR but no IP header, endpoint inp and sa structure:
 	 * - fill in the HMAC digest of any AUTH chunk in the packet.
 	 * - calculate and fill in the SCTP checksum.
 	 * - prepend an IP address header.
 	 * - if boundall use INADDR_ANY.
 	 * - if boundspecific do source address selection.
 	 * - set fragmentation option for ipV4.
 	 * - On return from IP output, check/adjust mtu size of output
 	 *   interface and smallest_mtu size as well.
 	 */
 	/* Will need ifdefs around this */
 	struct mbuf *newm;
 	struct sctphdr *sctphdr;
 	int packet_length;
 	int ret;
 
 #if defined(INET) || defined(INET6)
 	uint32_t vrf_id;
 
 #endif
 #if defined(INET) || defined(INET6)
 	struct mbuf *o_pak;
 	sctp_route_t *ro = NULL;
 	struct udphdr *udp = NULL;
 
 #endif
 	uint8_t tos_value;
 
 #if defined(__APPLE__) || defined(SCTP_SO_LOCK_TESTING)
 	struct socket *so = NULL;
 
 #endif
 
 	if ((net) && (net->dest_state & SCTP_ADDR_OUT_OF_SCOPE)) {
 		SCTP_LTRACE_ERR_RET_PKT(m, inp, stcb, net, SCTP_FROM_SCTP_OUTPUT, EFAULT);
 		sctp_m_freem(m);
 		return (EFAULT);
 	}
 #if defined(INET) || defined(INET6)
 	if (stcb) {
 		vrf_id = stcb->asoc.vrf_id;
 	} else {
 		vrf_id = inp->def_vrf_id;
 	}
 #endif
 	/* fill in the HMAC digest for any AUTH chunk in the packet */
 	if ((auth != NULL) && (stcb != NULL)) {
 		sctp_fill_hmac_digest_m(m, auth_offset, auth, stcb, auth_keyid);
 	}
 	if (net) {
 		tos_value = net->dscp;
 	} else if (stcb) {
 		tos_value = stcb->asoc.default_dscp;
 	} else {
 		tos_value = inp->sctp_ep.default_dscp;
 	}
 
 	switch (to->sa_family) {
 #ifdef INET
 	case AF_INET:
 		{
 			struct ip *ip = NULL;
 			sctp_route_t iproute;
 			int len;
 
 			len = sizeof(struct ip) + sizeof(struct sctphdr);
 			if (port) {
 				len += sizeof(struct udphdr);
 			}
 			newm = sctp_get_mbuf_for_msg(len, 1, M_NOWAIT, 1, MT_DATA);
 			if (newm == NULL) {
 				sctp_m_freem(m);
 				SCTP_LTRACE_ERR_RET(inp, stcb, NULL, SCTP_FROM_SCTP_OUTPUT, ENOMEM);
 				return (ENOMEM);
 			}
 			SCTP_ALIGN_TO_END(newm, len);
 			SCTP_BUF_LEN(newm) = len;
 			SCTP_BUF_NEXT(newm) = m;
 			m = newm;
 			if (net != NULL) {
 				m->m_pkthdr.flowid = net->flowid;
 				M_HASHTYPE_SET(m, net->flowtype);
 			} else {
 				m->m_pkthdr.flowid = mflowid;
 				M_HASHTYPE_SET(m, mflowtype);
 			}
 			packet_length = sctp_calculate_len(m);
 			ip = mtod(m, struct ip *);
 			ip->ip_v = IPVERSION;
 			ip->ip_hl = (sizeof(struct ip) >> 2);
 			if (tos_value == 0) {
 				/*
 				 * This means especially, that it is not set
 				 * at the SCTP layer. So use the value from
 				 * the IP layer.
 				 */
 				tos_value = inp->ip_inp.inp.inp_ip_tos;
 			}
 			tos_value &= 0xfc;
 			if (ecn_ok) {
 				tos_value |= sctp_get_ect(stcb);
 			}
 			if ((nofragment_flag) && (port == 0)) {
 				ip->ip_off = htons(IP_DF);
 			} else {
 				ip->ip_off = htons(0);
 			}
 			/* FreeBSD has a function for ip_id's */
-			ip->ip_id = ip_newid();
+			ip_fillid(ip);
 
 			ip->ip_ttl = inp->ip_inp.inp.inp_ip_ttl;
 			ip->ip_len = htons(packet_length);
 			ip->ip_tos = tos_value;
 			if (port) {
 				ip->ip_p = IPPROTO_UDP;
 			} else {
 				ip->ip_p = IPPROTO_SCTP;
 			}
 			ip->ip_sum = 0;
 			if (net == NULL) {
 				ro = &iproute;
 				memset(&iproute, 0, sizeof(iproute));
 				memcpy(&ro->ro_dst, to, to->sa_len);
 			} else {
 				ro = (sctp_route_t *) & net->ro;
 			}
 			/* Now the address selection part */
 			ip->ip_dst.s_addr = ((struct sockaddr_in *)to)->sin_addr.s_addr;
 
 			/* call the routine to select the src address */
 			if (net && out_of_asoc_ok == 0) {
 				if (net->ro._s_addr && (net->ro._s_addr->localifa_flags & (SCTP_BEING_DELETED | SCTP_ADDR_IFA_UNUSEABLE))) {
 					sctp_free_ifa(net->ro._s_addr);
 					net->ro._s_addr = NULL;
 					net->src_addr_selected = 0;
 					if (ro->ro_rt) {
 						RTFREE(ro->ro_rt);
 						ro->ro_rt = NULL;
 					}
 				}
 				if (net->src_addr_selected == 0) {
 					/* Cache the source address */
 					net->ro._s_addr = sctp_source_address_selection(inp, stcb,
 					    ro, net, 0,
 					    vrf_id);
 					net->src_addr_selected = 1;
 				}
 				if (net->ro._s_addr == NULL) {
 					/* No route to host */
 					net->src_addr_selected = 0;
 					sctp_handle_no_route(stcb, net, so_locked);
 					SCTP_LTRACE_ERR_RET_PKT(m, inp, stcb, NULL, SCTP_FROM_SCTP_OUTPUT, EHOSTUNREACH);
 					sctp_m_freem(m);
 					return (EHOSTUNREACH);
 				}
 				ip->ip_src = net->ro._s_addr->address.sin.sin_addr;
 			} else {
 				if (over_addr == NULL) {
 					struct sctp_ifa *_lsrc;
 
 					_lsrc = sctp_source_address_selection(inp, stcb, ro,
 					    net,
 					    out_of_asoc_ok,
 					    vrf_id);
 					if (_lsrc == NULL) {
 						sctp_handle_no_route(stcb, net, so_locked);
 						SCTP_LTRACE_ERR_RET_PKT(m, inp, stcb, NULL, SCTP_FROM_SCTP_OUTPUT, EHOSTUNREACH);
 						sctp_m_freem(m);
 						return (EHOSTUNREACH);
 					}
 					ip->ip_src = _lsrc->address.sin.sin_addr;
 					sctp_free_ifa(_lsrc);
 				} else {
 					ip->ip_src = over_addr->sin.sin_addr;
 					SCTP_RTALLOC(ro, vrf_id);
 				}
 			}
 			if (port) {
 				if (htons(SCTP_BASE_SYSCTL(sctp_udp_tunneling_port)) == 0) {
 					sctp_handle_no_route(stcb, net, so_locked);
 					SCTP_LTRACE_ERR_RET_PKT(m, inp, stcb, NULL, SCTP_FROM_SCTP_OUTPUT, EHOSTUNREACH);
 					sctp_m_freem(m);
 					return (EHOSTUNREACH);
 				}
 				udp = (struct udphdr *)((caddr_t)ip + sizeof(struct ip));
 				udp->uh_sport = htons(SCTP_BASE_SYSCTL(sctp_udp_tunneling_port));
 				udp->uh_dport = port;
 				udp->uh_ulen = htons(packet_length - sizeof(struct ip));
 				if (V_udp_cksum) {
 					udp->uh_sum = in_pseudo(ip->ip_src.s_addr, ip->ip_dst.s_addr, udp->uh_ulen + htons(IPPROTO_UDP));
 				} else {
 					udp->uh_sum = 0;
 				}
 				sctphdr = (struct sctphdr *)((caddr_t)udp + sizeof(struct udphdr));
 			} else {
 				sctphdr = (struct sctphdr *)((caddr_t)ip + sizeof(struct ip));
 			}
 
 			sctphdr->src_port = src_port;
 			sctphdr->dest_port = dest_port;
 			sctphdr->v_tag = v_tag;
 			sctphdr->checksum = 0;
 
 			/*
 			 * If source address selection fails and we find no
 			 * route then the ip_output should fail as well with
 			 * a NO_ROUTE_TO_HOST type error. We probably should
 			 * catch that somewhere and abort the association
 			 * right away (assuming this is an INIT being sent).
 			 */
 			if (ro->ro_rt == NULL) {
 				/*
 				 * src addr selection failed to find a route
 				 * (or valid source addr), so we can't get
 				 * there from here (yet)!
 				 */
 				sctp_handle_no_route(stcb, net, so_locked);
 				SCTP_LTRACE_ERR_RET_PKT(m, inp, stcb, NULL, SCTP_FROM_SCTP_OUTPUT, EHOSTUNREACH);
 				sctp_m_freem(m);
 				return (EHOSTUNREACH);
 			}
 			if (ro != &iproute) {
 				memcpy(&iproute, ro, sizeof(*ro));
 			}
 			SCTPDBG(SCTP_DEBUG_OUTPUT3, "Calling ipv4 output routine from low level src addr:%x\n",
 			    (uint32_t) (ntohl(ip->ip_src.s_addr)));
 			SCTPDBG(SCTP_DEBUG_OUTPUT3, "Destination is %x\n",
 			    (uint32_t) (ntohl(ip->ip_dst.s_addr)));
 			SCTPDBG(SCTP_DEBUG_OUTPUT3, "RTP route is %p through\n",
 			    (void *)ro->ro_rt);
 
 			if (SCTP_GET_HEADER_FOR_OUTPUT(o_pak)) {
 				/* failed to prepend data, give up */
 				SCTP_LTRACE_ERR_RET_PKT(m, inp, stcb, NULL, SCTP_FROM_SCTP_OUTPUT, ENOMEM);
 				sctp_m_freem(m);
 				return (ENOMEM);
 			}
 			SCTP_ATTACH_CHAIN(o_pak, m, packet_length);
 			if (port) {
 #if defined(SCTP_WITH_NO_CSUM)
 				SCTP_STAT_INCR(sctps_sendnocrc);
 #else
 				sctphdr->checksum = sctp_calculate_cksum(m, sizeof(struct ip) + sizeof(struct udphdr));
 				SCTP_STAT_INCR(sctps_sendswcrc);
 #endif
 				if (V_udp_cksum) {
 					SCTP_ENABLE_UDP_CSUM(o_pak);
 				}
 			} else {
 #if defined(SCTP_WITH_NO_CSUM)
 				SCTP_STAT_INCR(sctps_sendnocrc);
 #else
 				m->m_pkthdr.csum_flags = CSUM_SCTP;
 				m->m_pkthdr.csum_data = offsetof(struct sctphdr, checksum);
 				SCTP_STAT_INCR(sctps_sendhwcrc);
 #endif
 			}
 #ifdef SCTP_PACKET_LOGGING
 			if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_LAST_PACKET_TRACING)
 				sctp_packet_log(o_pak);
 #endif
 			/* send it out.  table id is taken from stcb */
 #if defined(__APPLE__) || defined(SCTP_SO_LOCK_TESTING)
 			if ((SCTP_BASE_SYSCTL(sctp_output_unlocked)) && (so_locked)) {
 				so = SCTP_INP_SO(inp);
 				SCTP_SOCKET_UNLOCK(so, 0);
 			}
 #endif
 			SCTP_IP_OUTPUT(ret, o_pak, ro, stcb, vrf_id);
 #if defined(__APPLE__) || defined(SCTP_SO_LOCK_TESTING)
 			if ((SCTP_BASE_SYSCTL(sctp_output_unlocked)) && (so_locked)) {
 				atomic_add_int(&stcb->asoc.refcnt, 1);
 				SCTP_TCB_UNLOCK(stcb);
 				SCTP_SOCKET_LOCK(so, 0);
 				SCTP_TCB_LOCK(stcb);
 				atomic_subtract_int(&stcb->asoc.refcnt, 1);
 			}
 #endif
 			SCTP_STAT_INCR(sctps_sendpackets);
 			SCTP_STAT_INCR_COUNTER64(sctps_outpackets);
 			if (ret)
 				SCTP_STAT_INCR(sctps_senderrors);
 
 			SCTPDBG(SCTP_DEBUG_OUTPUT3, "IP output returns %d\n", ret);
 			if (net == NULL) {
 				/* free tempy routes */
 				RO_RTFREE(ro);
 			} else {
 				/*
 				 * PMTU check versus smallest asoc MTU goes
 				 * here
 				 */
 				if ((ro->ro_rt != NULL) &&
 				    (net->ro._s_addr)) {
 					uint32_t mtu;
 
 					mtu = SCTP_GATHER_MTU_FROM_ROUTE(net->ro._s_addr, &net->ro._l_addr.sa, ro->ro_rt);
 					if (net->port) {
 						mtu -= sizeof(struct udphdr);
 					}
 					if (mtu && (stcb->asoc.smallest_mtu > mtu)) {
 						sctp_mtu_size_reset(inp, &stcb->asoc, mtu);
 						net->mtu = mtu;
 					}
 				} else if (ro->ro_rt == NULL) {
 					/* route was freed */
 					if (net->ro._s_addr &&
 					    net->src_addr_selected) {
 						sctp_free_ifa(net->ro._s_addr);
 						net->ro._s_addr = NULL;
 					}
 					net->src_addr_selected = 0;
 				}
 			}
 			return (ret);
 		}
 #endif
 #ifdef INET6
 	case AF_INET6:
 		{
 			uint32_t flowlabel, flowinfo;
 			struct ip6_hdr *ip6h;
 			struct route_in6 ip6route;
 			struct ifnet *ifp;
 			struct sockaddr_in6 *sin6, tmp, *lsa6, lsa6_tmp;
 			int prev_scope = 0;
 			struct sockaddr_in6 lsa6_storage;
 			int error;
 			u_short prev_port = 0;
 			int len;
 
 			if (net) {
 				flowlabel = net->flowlabel;
 			} else if (stcb) {
 				flowlabel = stcb->asoc.default_flowlabel;
 			} else {
 				flowlabel = inp->sctp_ep.default_flowlabel;
 			}
 			if (flowlabel == 0) {
 				/*
 				 * This means especially, that it is not set
 				 * at the SCTP layer. So use the value from
 				 * the IP layer.
 				 */
 				flowlabel = ntohl(((struct in6pcb *)inp)->in6p_flowinfo);
 			}
 			flowlabel &= 0x000fffff;
 			len = sizeof(struct ip6_hdr) + sizeof(struct sctphdr);
 			if (port) {
 				len += sizeof(struct udphdr);
 			}
 			newm = sctp_get_mbuf_for_msg(len, 1, M_NOWAIT, 1, MT_DATA);
 			if (newm == NULL) {
 				sctp_m_freem(m);
 				SCTP_LTRACE_ERR_RET(inp, stcb, NULL, SCTP_FROM_SCTP_OUTPUT, ENOMEM);
 				return (ENOMEM);
 			}
 			SCTP_ALIGN_TO_END(newm, len);
 			SCTP_BUF_LEN(newm) = len;
 			SCTP_BUF_NEXT(newm) = m;
 			m = newm;
 			if (net != NULL) {
 				m->m_pkthdr.flowid = net->flowid;
 				M_HASHTYPE_SET(m, net->flowtype);
 			} else {
 				m->m_pkthdr.flowid = mflowid;
 				M_HASHTYPE_SET(m, mflowtype);
 			}
 			packet_length = sctp_calculate_len(m);
 
 			ip6h = mtod(m, struct ip6_hdr *);
 			/* protect *sin6 from overwrite */
 			sin6 = (struct sockaddr_in6 *)to;
 			tmp = *sin6;
 			sin6 = &tmp;
 
 			/* KAME hack: embed scopeid */
 			if (sa6_embedscope(sin6, MODULE_GLOBAL(ip6_use_defzone)) != 0) {
 				SCTP_LTRACE_ERR_RET_PKT(m, inp, stcb, net, SCTP_FROM_SCTP_OUTPUT, EINVAL);
 				return (EINVAL);
 			}
 			if (net == NULL) {
 				memset(&ip6route, 0, sizeof(ip6route));
 				ro = (sctp_route_t *) & ip6route;
 				memcpy(&ro->ro_dst, sin6, sin6->sin6_len);
 			} else {
 				ro = (sctp_route_t *) & net->ro;
 			}
 			/*
 			 * We assume here that inp_flow is in host byte
 			 * order within the TCB!
 			 */
 			if (tos_value == 0) {
 				/*
 				 * This means especially, that it is not set
 				 * at the SCTP layer. So use the value from
 				 * the IP layer.
 				 */
 				tos_value = (ntohl(((struct in6pcb *)inp)->in6p_flowinfo) >> 20) & 0xff;
 			}
 			tos_value &= 0xfc;
 			if (ecn_ok) {
 				tos_value |= sctp_get_ect(stcb);
 			}
 			flowinfo = 0x06;
 			flowinfo <<= 8;
 			flowinfo |= tos_value;
 			flowinfo <<= 20;
 			flowinfo |= flowlabel;
 			ip6h->ip6_flow = htonl(flowinfo);
 			if (port) {
 				ip6h->ip6_nxt = IPPROTO_UDP;
 			} else {
 				ip6h->ip6_nxt = IPPROTO_SCTP;
 			}
 			ip6h->ip6_plen = (packet_length - sizeof(struct ip6_hdr));
 			ip6h->ip6_dst = sin6->sin6_addr;
 
 			/*
 			 * Add SRC address selection here: we can only reuse
 			 * to a limited degree the kame src-addr-sel, since
 			 * we can try their selection but it may not be
 			 * bound.
 			 */
 			bzero(&lsa6_tmp, sizeof(lsa6_tmp));
 			lsa6_tmp.sin6_family = AF_INET6;
 			lsa6_tmp.sin6_len = sizeof(lsa6_tmp);
 			lsa6 = &lsa6_tmp;
 			if (net && out_of_asoc_ok == 0) {
 				if (net->ro._s_addr && (net->ro._s_addr->localifa_flags & (SCTP_BEING_DELETED | SCTP_ADDR_IFA_UNUSEABLE))) {
 					sctp_free_ifa(net->ro._s_addr);
 					net->ro._s_addr = NULL;
 					net->src_addr_selected = 0;
 					if (ro->ro_rt) {
 						RTFREE(ro->ro_rt);
 						ro->ro_rt = NULL;
 					}
 				}
 				if (net->src_addr_selected == 0) {
 					sin6 = (struct sockaddr_in6 *)&net->ro._l_addr;
 					/* KAME hack: embed scopeid */
 					if (sa6_embedscope(sin6, MODULE_GLOBAL(ip6_use_defzone)) != 0) {
 						SCTP_LTRACE_ERR_RET_PKT(m, inp, stcb, net, SCTP_FROM_SCTP_OUTPUT, EINVAL);
 						return (EINVAL);
 					}
 					/* Cache the source address */
 					net->ro._s_addr = sctp_source_address_selection(inp,
 					    stcb,
 					    ro,
 					    net,
 					    0,
 					    vrf_id);
 					(void)sa6_recoverscope(sin6);
 					net->src_addr_selected = 1;
 				}
 				if (net->ro._s_addr == NULL) {
 					SCTPDBG(SCTP_DEBUG_OUTPUT3, "V6:No route to host\n");
 					net->src_addr_selected = 0;
 					sctp_handle_no_route(stcb, net, so_locked);
 					SCTP_LTRACE_ERR_RET_PKT(m, inp, stcb, NULL, SCTP_FROM_SCTP_OUTPUT, EHOSTUNREACH);
 					sctp_m_freem(m);
 					return (EHOSTUNREACH);
 				}
 				lsa6->sin6_addr = net->ro._s_addr->address.sin6.sin6_addr;
 			} else {
 				sin6 = (struct sockaddr_in6 *)&ro->ro_dst;
 				/* KAME hack: embed scopeid */
 				if (sa6_embedscope(sin6, MODULE_GLOBAL(ip6_use_defzone)) != 0) {
 					SCTP_LTRACE_ERR_RET_PKT(m, inp, stcb, net, SCTP_FROM_SCTP_OUTPUT, EINVAL);
 					return (EINVAL);
 				}
 				if (over_addr == NULL) {
 					struct sctp_ifa *_lsrc;
 
 					_lsrc = sctp_source_address_selection(inp, stcb, ro,
 					    net,
 					    out_of_asoc_ok,
 					    vrf_id);
 					if (_lsrc == NULL) {
 						sctp_handle_no_route(stcb, net, so_locked);
 						SCTP_LTRACE_ERR_RET_PKT(m, inp, stcb, NULL, SCTP_FROM_SCTP_OUTPUT, EHOSTUNREACH);
 						sctp_m_freem(m);
 						return (EHOSTUNREACH);
 					}
 					lsa6->sin6_addr = _lsrc->address.sin6.sin6_addr;
 					sctp_free_ifa(_lsrc);
 				} else {
 					lsa6->sin6_addr = over_addr->sin6.sin6_addr;
 					SCTP_RTALLOC(ro, vrf_id);
 				}
 				(void)sa6_recoverscope(sin6);
 			}
 			lsa6->sin6_port = inp->sctp_lport;
 
 			if (ro->ro_rt == NULL) {
 				/*
 				 * src addr selection failed to find a route
 				 * (or valid source addr), so we can't get
 				 * there from here!
 				 */
 				sctp_handle_no_route(stcb, net, so_locked);
 				SCTP_LTRACE_ERR_RET_PKT(m, inp, stcb, NULL, SCTP_FROM_SCTP_OUTPUT, EHOSTUNREACH);
 				sctp_m_freem(m);
 				return (EHOSTUNREACH);
 			}
 			/*
 			 * XXX: sa6 may not have a valid sin6_scope_id in
 			 * the non-SCOPEDROUTING case.
 			 */
 			bzero(&lsa6_storage, sizeof(lsa6_storage));
 			lsa6_storage.sin6_family = AF_INET6;
 			lsa6_storage.sin6_len = sizeof(lsa6_storage);
 			lsa6_storage.sin6_addr = lsa6->sin6_addr;
 			if ((error = sa6_recoverscope(&lsa6_storage)) != 0) {
 				SCTPDBG(SCTP_DEBUG_OUTPUT3, "recover scope fails error %d\n", error);
 				sctp_m_freem(m);
 				return (error);
 			}
 			/* XXX */
 			lsa6_storage.sin6_addr = lsa6->sin6_addr;
 			lsa6_storage.sin6_port = inp->sctp_lport;
 			lsa6 = &lsa6_storage;
 			ip6h->ip6_src = lsa6->sin6_addr;
 
 			if (port) {
 				if (htons(SCTP_BASE_SYSCTL(sctp_udp_tunneling_port)) == 0) {
 					sctp_handle_no_route(stcb, net, so_locked);
 					SCTP_LTRACE_ERR_RET_PKT(m, inp, stcb, NULL, SCTP_FROM_SCTP_OUTPUT, EHOSTUNREACH);
 					sctp_m_freem(m);
 					return (EHOSTUNREACH);
 				}
 				udp = (struct udphdr *)((caddr_t)ip6h + sizeof(struct ip6_hdr));
 				udp->uh_sport = htons(SCTP_BASE_SYSCTL(sctp_udp_tunneling_port));
 				udp->uh_dport = port;
 				udp->uh_ulen = htons(packet_length - sizeof(struct ip6_hdr));
 				udp->uh_sum = 0;
 				sctphdr = (struct sctphdr *)((caddr_t)udp + sizeof(struct udphdr));
 			} else {
 				sctphdr = (struct sctphdr *)((caddr_t)ip6h + sizeof(struct ip6_hdr));
 			}
 
 			sctphdr->src_port = src_port;
 			sctphdr->dest_port = dest_port;
 			sctphdr->v_tag = v_tag;
 			sctphdr->checksum = 0;
 
 			/*
 			 * We set the hop limit now since there is a good
 			 * chance that our ro pointer is now filled
 			 */
 			ip6h->ip6_hlim = SCTP_GET_HLIM(inp, ro);
 			ifp = SCTP_GET_IFN_VOID_FROM_ROUTE(ro);
 
 #ifdef SCTP_DEBUG
 			/* Copy to be sure something bad is not happening */
 			sin6->sin6_addr = ip6h->ip6_dst;
 			lsa6->sin6_addr = ip6h->ip6_src;
 #endif
 
 			SCTPDBG(SCTP_DEBUG_OUTPUT3, "Calling ipv6 output routine from low level\n");
 			SCTPDBG(SCTP_DEBUG_OUTPUT3, "src: ");
 			SCTPDBG_ADDR(SCTP_DEBUG_OUTPUT3, (struct sockaddr *)lsa6);
 			SCTPDBG(SCTP_DEBUG_OUTPUT3, "dst: ");
 			SCTPDBG_ADDR(SCTP_DEBUG_OUTPUT3, (struct sockaddr *)sin6);
 			if (net) {
 				sin6 = (struct sockaddr_in6 *)&net->ro._l_addr;
 				/*
 				 * preserve the port and scope for link
 				 * local send
 				 */
 				prev_scope = sin6->sin6_scope_id;
 				prev_port = sin6->sin6_port;
 			}
 			if (SCTP_GET_HEADER_FOR_OUTPUT(o_pak)) {
 				/* failed to prepend data, give up */
 				sctp_m_freem(m);
 				SCTP_LTRACE_ERR_RET(inp, stcb, NULL, SCTP_FROM_SCTP_OUTPUT, ENOMEM);
 				return (ENOMEM);
 			}
 			SCTP_ATTACH_CHAIN(o_pak, m, packet_length);
 			if (port) {
 #if defined(SCTP_WITH_NO_CSUM)
 				SCTP_STAT_INCR(sctps_sendnocrc);
 #else
 				sctphdr->checksum = sctp_calculate_cksum(m, sizeof(struct ip6_hdr) + sizeof(struct udphdr));
 				SCTP_STAT_INCR(sctps_sendswcrc);
 #endif
 				if ((udp->uh_sum = in6_cksum(o_pak, IPPROTO_UDP, sizeof(struct ip6_hdr), packet_length - sizeof(struct ip6_hdr))) == 0) {
 					udp->uh_sum = 0xffff;
 				}
 			} else {
 #if defined(SCTP_WITH_NO_CSUM)
 				SCTP_STAT_INCR(sctps_sendnocrc);
 #else
 				m->m_pkthdr.csum_flags = CSUM_SCTP_IPV6;
 				m->m_pkthdr.csum_data = offsetof(struct sctphdr, checksum);
 				SCTP_STAT_INCR(sctps_sendhwcrc);
 #endif
 			}
 			/* send it out. table id is taken from stcb */
 #if defined(__APPLE__) || defined(SCTP_SO_LOCK_TESTING)
 			if ((SCTP_BASE_SYSCTL(sctp_output_unlocked)) && (so_locked)) {
 				so = SCTP_INP_SO(inp);
 				SCTP_SOCKET_UNLOCK(so, 0);
 			}
 #endif
 #ifdef SCTP_PACKET_LOGGING
 			if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_LAST_PACKET_TRACING)
 				sctp_packet_log(o_pak);
 #endif
 			SCTP_IP6_OUTPUT(ret, o_pak, (struct route_in6 *)ro, &ifp, stcb, vrf_id);
 #if defined(__APPLE__) || defined(SCTP_SO_LOCK_TESTING)
 			if ((SCTP_BASE_SYSCTL(sctp_output_unlocked)) && (so_locked)) {
 				atomic_add_int(&stcb->asoc.refcnt, 1);
 				SCTP_TCB_UNLOCK(stcb);
 				SCTP_SOCKET_LOCK(so, 0);
 				SCTP_TCB_LOCK(stcb);
 				atomic_subtract_int(&stcb->asoc.refcnt, 1);
 			}
 #endif
 			if (net) {
 				/* for link local this must be done */
 				sin6->sin6_scope_id = prev_scope;
 				sin6->sin6_port = prev_port;
 			}
 			SCTPDBG(SCTP_DEBUG_OUTPUT3, "return from send is %d\n", ret);
 			SCTP_STAT_INCR(sctps_sendpackets);
 			SCTP_STAT_INCR_COUNTER64(sctps_outpackets);
 			if (ret) {
 				SCTP_STAT_INCR(sctps_senderrors);
 			}
 			if (net == NULL) {
 				/* Now if we had a temp route free it */
 				RO_RTFREE(ro);
 			} else {
 				/*
 				 * PMTU check versus smallest asoc MTU goes
 				 * here
 				 */
 				if (ro->ro_rt == NULL) {
 					/* Route was freed */
 					if (net->ro._s_addr &&
 					    net->src_addr_selected) {
 						sctp_free_ifa(net->ro._s_addr);
 						net->ro._s_addr = NULL;
 					}
 					net->src_addr_selected = 0;
 				}
 				if ((ro->ro_rt != NULL) &&
 				    (net->ro._s_addr)) {
 					uint32_t mtu;
 
 					mtu = SCTP_GATHER_MTU_FROM_ROUTE(net->ro._s_addr, &net->ro._l_addr.sa, ro->ro_rt);
 					if (mtu &&
 					    (stcb->asoc.smallest_mtu > mtu)) {
 						sctp_mtu_size_reset(inp, &stcb->asoc, mtu);
 						net->mtu = mtu;
 						if (net->port) {
 							net->mtu -= sizeof(struct udphdr);
 						}
 					}
 				} else if (ifp) {
 					if (ND_IFINFO(ifp)->linkmtu &&
 					    (stcb->asoc.smallest_mtu > ND_IFINFO(ifp)->linkmtu)) {
 						sctp_mtu_size_reset(inp,
 						    &stcb->asoc,
 						    ND_IFINFO(ifp)->linkmtu);
 					}
 				}
 			}
 			return (ret);
 		}
 #endif
 	default:
 		SCTPDBG(SCTP_DEBUG_OUTPUT1, "Unknown protocol (TSNH) type %d\n",
 		    ((struct sockaddr *)to)->sa_family);
 		sctp_m_freem(m);
 		SCTP_LTRACE_ERR_RET_PKT(m, inp, stcb, net, SCTP_FROM_SCTP_OUTPUT, EFAULT);
 		return (EFAULT);
 	}
 }
 
 
 void
 sctp_send_initiate(struct sctp_inpcb *inp, struct sctp_tcb *stcb, int so_locked
 #if !defined(__APPLE__) && !defined(SCTP_SO_LOCK_TESTING)
     SCTP_UNUSED
 #endif
 )
 {
 	struct mbuf *m, *m_last;
 	struct sctp_nets *net;
 	struct sctp_init_chunk *init;
 	struct sctp_supported_addr_param *sup_addr;
 	struct sctp_adaptation_layer_indication *ali;
 	struct sctp_supported_chunk_types_param *pr_supported;
 	struct sctp_paramhdr *ph;
 	int cnt_inits_to = 0;
 	int ret;
 	uint16_t num_ext, chunk_len, padding_len, parameter_len;
 
 	/* INIT's always go to the primary (and usually ONLY address) */
 	net = stcb->asoc.primary_destination;
 	if (net == NULL) {
 		net = TAILQ_FIRST(&stcb->asoc.nets);
 		if (net == NULL) {
 			/* TSNH */
 			return;
 		}
 		/* we confirm any address we send an INIT to */
 		net->dest_state &= ~SCTP_ADDR_UNCONFIRMED;
 		(void)sctp_set_primary_addr(stcb, NULL, net);
 	} else {
 		/* we confirm any address we send an INIT to */
 		net->dest_state &= ~SCTP_ADDR_UNCONFIRMED;
 	}
 	SCTPDBG(SCTP_DEBUG_OUTPUT4, "Sending INIT\n");
 #ifdef INET6
 	if (net->ro._l_addr.sa.sa_family == AF_INET6) {
 		/*
 		 * special hook, if we are sending to link local it will not
 		 * show up in our private address count.
 		 */
 		if (IN6_IS_ADDR_LINKLOCAL(&net->ro._l_addr.sin6.sin6_addr))
 			cnt_inits_to = 1;
 	}
 #endif
 	if (SCTP_OS_TIMER_PENDING(&net->rxt_timer.timer)) {
 		/* This case should not happen */
 		SCTPDBG(SCTP_DEBUG_OUTPUT4, "Sending INIT - failed timer?\n");
 		return;
 	}
 	/* start the INIT timer */
 	sctp_timer_start(SCTP_TIMER_TYPE_INIT, inp, stcb, net);
 
 	m = sctp_get_mbuf_for_msg(MCLBYTES, 1, M_NOWAIT, 1, MT_DATA);
 	if (m == NULL) {
 		/* No memory, INIT timer will re-attempt. */
 		SCTPDBG(SCTP_DEBUG_OUTPUT4, "Sending INIT - mbuf?\n");
 		return;
 	}
 	chunk_len = (uint16_t) sizeof(struct sctp_init_chunk);
 	padding_len = 0;
 	/* Now lets put the chunk header in place */
 	init = mtod(m, struct sctp_init_chunk *);
 	/* now the chunk header */
 	init->ch.chunk_type = SCTP_INITIATION;
 	init->ch.chunk_flags = 0;
 	/* fill in later from mbuf we build */
 	init->ch.chunk_length = 0;
 	/* place in my tag */
 	init->init.initiate_tag = htonl(stcb->asoc.my_vtag);
 	/* set up some of the credits. */
 	init->init.a_rwnd = htonl(max(inp->sctp_socket ? SCTP_SB_LIMIT_RCV(inp->sctp_socket) : 0,
 	    SCTP_MINIMAL_RWND));
 	init->init.num_outbound_streams = htons(stcb->asoc.pre_open_streams);
 	init->init.num_inbound_streams = htons(stcb->asoc.max_inbound_streams);
 	init->init.initial_tsn = htonl(stcb->asoc.init_seq_number);
 
 	/* Adaptation layer indication parameter */
 	if (inp->sctp_ep.adaptation_layer_indicator_provided) {
 		parameter_len = (uint16_t) sizeof(struct sctp_adaptation_layer_indication);
 		ali = (struct sctp_adaptation_layer_indication *)(mtod(m, caddr_t)+chunk_len);
 		ali->ph.param_type = htons(SCTP_ULP_ADAPTATION);
 		ali->ph.param_length = htons(parameter_len);
 		ali->indication = htonl(inp->sctp_ep.adaptation_layer_indicator);
 		chunk_len += parameter_len;
 	}
 	/* ECN parameter */
 	if (stcb->asoc.ecn_supported == 1) {
 		parameter_len = (uint16_t) sizeof(struct sctp_paramhdr);
 		ph = (struct sctp_paramhdr *)(mtod(m, caddr_t)+chunk_len);
 		ph->param_type = htons(SCTP_ECN_CAPABLE);
 		ph->param_length = htons(parameter_len);
 		chunk_len += parameter_len;
 	}
 	/* PR-SCTP supported parameter */
 	if (stcb->asoc.prsctp_supported == 1) {
 		parameter_len = (uint16_t) sizeof(struct sctp_paramhdr);
 		ph = (struct sctp_paramhdr *)(mtod(m, caddr_t)+chunk_len);
 		ph->param_type = htons(SCTP_PRSCTP_SUPPORTED);
 		ph->param_length = htons(parameter_len);
 		chunk_len += parameter_len;
 	}
 	/* Add NAT friendly parameter. */
 	if (SCTP_BASE_SYSCTL(sctp_inits_include_nat_friendly)) {
 		parameter_len = (uint16_t) sizeof(struct sctp_paramhdr);
 		ph = (struct sctp_paramhdr *)(mtod(m, caddr_t)+chunk_len);
 		ph->param_type = htons(SCTP_HAS_NAT_SUPPORT);
 		ph->param_length = htons(parameter_len);
 		chunk_len += parameter_len;
 	}
 	/* And now tell the peer which extensions we support */
 	num_ext = 0;
 	pr_supported = (struct sctp_supported_chunk_types_param *)(mtod(m, caddr_t)+chunk_len);
 	if (stcb->asoc.prsctp_supported == 1) {
 		pr_supported->chunk_types[num_ext++] = SCTP_FORWARD_CUM_TSN;
 	}
 	if (stcb->asoc.auth_supported == 1) {
 		pr_supported->chunk_types[num_ext++] = SCTP_AUTHENTICATION;
 	}
 	if (stcb->asoc.asconf_supported == 1) {
 		pr_supported->chunk_types[num_ext++] = SCTP_ASCONF;
 		pr_supported->chunk_types[num_ext++] = SCTP_ASCONF_ACK;
 	}
 	if (stcb->asoc.reconfig_supported == 1) {
 		pr_supported->chunk_types[num_ext++] = SCTP_STREAM_RESET;
 	}
 	if (stcb->asoc.nrsack_supported == 1) {
 		pr_supported->chunk_types[num_ext++] = SCTP_NR_SELECTIVE_ACK;
 	}
 	if (stcb->asoc.pktdrop_supported == 1) {
 		pr_supported->chunk_types[num_ext++] = SCTP_PACKET_DROPPED;
 	}
 	if (num_ext > 0) {
 		parameter_len = (uint16_t) sizeof(struct sctp_supported_chunk_types_param) + num_ext;
 		pr_supported->ph.param_type = htons(SCTP_SUPPORTED_CHUNK_EXT);
 		pr_supported->ph.param_length = htons(parameter_len);
 		padding_len = SCTP_SIZE32(parameter_len) - parameter_len;
 		chunk_len += parameter_len;
 	}
 	/* add authentication parameters */
 	if (stcb->asoc.auth_supported) {
 		/* attach RANDOM parameter, if available */
 		if (stcb->asoc.authinfo.random != NULL) {
 			struct sctp_auth_random *randp;
 
 			if (padding_len > 0) {
 				memset(mtod(m, caddr_t)+chunk_len, 0, padding_len);
 				chunk_len += padding_len;
 				padding_len = 0;
 			}
 			randp = (struct sctp_auth_random *)(mtod(m, caddr_t)+chunk_len);
 			parameter_len = (uint16_t) sizeof(struct sctp_auth_random) + stcb->asoc.authinfo.random_len;
 			/* random key already contains the header */
 			memcpy(randp, stcb->asoc.authinfo.random->key, parameter_len);
 			padding_len = SCTP_SIZE32(parameter_len) - parameter_len;
 			chunk_len += parameter_len;
 		}
 		/* add HMAC_ALGO parameter */
 		if (stcb->asoc.local_hmacs != NULL) {
 			struct sctp_auth_hmac_algo *hmacs;
 
 			if (padding_len > 0) {
 				memset(mtod(m, caddr_t)+chunk_len, 0, padding_len);
 				chunk_len += padding_len;
 				padding_len = 0;
 			}
 			hmacs = (struct sctp_auth_hmac_algo *)(mtod(m, caddr_t)+chunk_len);
 			parameter_len = (uint16_t) (sizeof(struct sctp_auth_hmac_algo) +
 			    stcb->asoc.local_hmacs->num_algo * sizeof(uint16_t));
 			hmacs->ph.param_type = htons(SCTP_HMAC_LIST);
 			hmacs->ph.param_length = htons(parameter_len);
 			sctp_serialize_hmaclist(stcb->asoc.local_hmacs, (uint8_t *) hmacs->hmac_ids);
 			padding_len = SCTP_SIZE32(parameter_len) - parameter_len;
 			chunk_len += parameter_len;
 		}
 		/* add CHUNKS parameter */
 		if (stcb->asoc.local_auth_chunks != NULL) {
 			struct sctp_auth_chunk_list *chunks;
 
 			if (padding_len > 0) {
 				memset(mtod(m, caddr_t)+chunk_len, 0, padding_len);
 				chunk_len += padding_len;
 				padding_len = 0;
 			}
 			chunks = (struct sctp_auth_chunk_list *)(mtod(m, caddr_t)+chunk_len);
 			parameter_len = (uint16_t) (sizeof(struct sctp_auth_chunk_list) +
 			    sctp_auth_get_chklist_size(stcb->asoc.local_auth_chunks));
 			chunks->ph.param_type = htons(SCTP_CHUNK_LIST);
 			chunks->ph.param_length = htons(parameter_len);
 			sctp_serialize_auth_chunks(stcb->asoc.local_auth_chunks, chunks->chunk_types);
 			padding_len = SCTP_SIZE32(parameter_len) - parameter_len;
 			chunk_len += parameter_len;
 		}
 	}
 	/* now any cookie time extensions */
 	if (stcb->asoc.cookie_preserve_req) {
 		struct sctp_cookie_perserve_param *cookie_preserve;
 
 		if (padding_len > 0) {
 			memset(mtod(m, caddr_t)+chunk_len, 0, padding_len);
 			chunk_len += padding_len;
 			padding_len = 0;
 		}
 		parameter_len = (uint16_t) sizeof(struct sctp_cookie_perserve_param);
 		cookie_preserve = (struct sctp_cookie_perserve_param *)(mtod(m, caddr_t)+chunk_len);
 		cookie_preserve->ph.param_type = htons(SCTP_COOKIE_PRESERVE);
 		cookie_preserve->ph.param_length = htons(parameter_len);
 		cookie_preserve->time = htonl(stcb->asoc.cookie_preserve_req);
 		stcb->asoc.cookie_preserve_req = 0;
 		chunk_len += parameter_len;
 	}
 	if (stcb->asoc.scope.ipv4_addr_legal || stcb->asoc.scope.ipv6_addr_legal) {
 		uint8_t i;
 
 		if (padding_len > 0) {
 			memset(mtod(m, caddr_t)+chunk_len, 0, padding_len);
 			chunk_len += padding_len;
 			padding_len = 0;
 		}
 		parameter_len = (uint16_t) sizeof(struct sctp_paramhdr);
 		if (stcb->asoc.scope.ipv4_addr_legal) {
 			parameter_len += (uint16_t) sizeof(uint16_t);
 		}
 		if (stcb->asoc.scope.ipv6_addr_legal) {
 			parameter_len += (uint16_t) sizeof(uint16_t);
 		}
 		sup_addr = (struct sctp_supported_addr_param *)(mtod(m, caddr_t)+chunk_len);
 		sup_addr->ph.param_type = htons(SCTP_SUPPORTED_ADDRTYPE);
 		sup_addr->ph.param_length = htons(parameter_len);
 		i = 0;
 		if (stcb->asoc.scope.ipv4_addr_legal) {
 			sup_addr->addr_type[i++] = htons(SCTP_IPV4_ADDRESS);
 		}
 		if (stcb->asoc.scope.ipv6_addr_legal) {
 			sup_addr->addr_type[i++] = htons(SCTP_IPV6_ADDRESS);
 		}
 		padding_len = 4 - 2 * i;
 		chunk_len += parameter_len;
 	}
 	SCTP_BUF_LEN(m) = chunk_len;
 	/* now the addresses */
 	/*
 	 * To optimize this we could put the scoping stuff into a structure
 	 * and remove the individual uint8's from the assoc structure. Then
 	 * we could just sifa in the address within the stcb. But for now
 	 * this is a quick hack to get the address stuff teased apart.
 	 */
 	m_last = sctp_add_addresses_to_i_ia(inp, stcb, &stcb->asoc.scope,
 	    m, cnt_inits_to,
 	    &padding_len, &chunk_len);
 
 	init->ch.chunk_length = htons(chunk_len);
 	if (padding_len > 0) {
 		if (sctp_add_pad_tombuf(m_last, padding_len) == NULL) {
 			sctp_m_freem(m);
 			return;
 		}
 	}
 	SCTPDBG(SCTP_DEBUG_OUTPUT4, "Sending INIT - calls lowlevel_output\n");
 	ret = sctp_lowlevel_chunk_output(inp, stcb, net,
 	    (struct sockaddr *)&net->ro._l_addr,
 	    m, 0, NULL, 0, 0, 0, 0,
 	    inp->sctp_lport, stcb->rport, htonl(0),
 	    net->port, NULL,
 	    0, 0,
 	    so_locked);
 	SCTPDBG(SCTP_DEBUG_OUTPUT4, "lowlevel_output - %d\n", ret);
 	SCTP_STAT_INCR_COUNTER64(sctps_outcontrolchunks);
 	(void)SCTP_GETTIME_TIMEVAL(&net->last_sent_time);
 }
 
 struct mbuf *
 sctp_arethere_unrecognized_parameters(struct mbuf *in_initpkt,
     int param_offset, int *abort_processing, struct sctp_chunkhdr *cp, int *nat_friendly)
 {
 	/*
 	 * Given a mbuf containing an INIT or INIT-ACK with the param_offset
 	 * being equal to the beginning of the params i.e. (iphlen +
 	 * sizeof(struct sctp_init_msg) parse through the parameters to the
 	 * end of the mbuf verifying that all parameters are known.
 	 * 
 	 * For unknown parameters build and return a mbuf with
 	 * UNRECOGNIZED_PARAMETER errors. If the flags indicate to stop
 	 * processing this chunk stop, and set *abort_processing to 1.
 	 * 
 	 * By having param_offset be pre-set to where parameters begin it is
 	 * hoped that this routine may be reused in the future by new
 	 * features.
 	 */
 	struct sctp_paramhdr *phdr, params;
 
 	struct mbuf *mat, *op_err;
 	char tempbuf[SCTP_PARAM_BUFFER_SIZE];
 	int at, limit, pad_needed;
 	uint16_t ptype, plen, padded_size;
 	int err_at;
 
 	*abort_processing = 0;
 	mat = in_initpkt;
 	err_at = 0;
 	limit = ntohs(cp->chunk_length) - sizeof(struct sctp_init_chunk);
 	at = param_offset;
 	op_err = NULL;
 	SCTPDBG(SCTP_DEBUG_OUTPUT1, "Check for unrecognized param's\n");
 	phdr = sctp_get_next_param(mat, at, &params, sizeof(params));
 	while ((phdr != NULL) && ((size_t)limit >= sizeof(struct sctp_paramhdr))) {
 		ptype = ntohs(phdr->param_type);
 		plen = ntohs(phdr->param_length);
 		if ((plen > limit) || (plen < sizeof(struct sctp_paramhdr))) {
 			/* wacked parameter */
 			SCTPDBG(SCTP_DEBUG_OUTPUT1, "Invalid size - error %d\n", plen);
 			goto invalid_size;
 		}
 		limit -= SCTP_SIZE32(plen);
 		/*-
 		 * All parameters for all chunks that we know/understand are
 		 * listed here. We process them other places and make
 		 * appropriate stop actions per the upper bits. However this
 		 * is the generic routine processor's can call to get back
 		 * an operr.. to either incorporate (init-ack) or send.
 		 */
 		padded_size = SCTP_SIZE32(plen);
 		switch (ptype) {
 			/* Param's with variable size */
 		case SCTP_HEARTBEAT_INFO:
 		case SCTP_STATE_COOKIE:
 		case SCTP_UNRECOG_PARAM:
 		case SCTP_ERROR_CAUSE_IND:
 			/* ok skip fwd */
 			at += padded_size;
 			break;
 			/* Param's with variable size within a range */
 		case SCTP_CHUNK_LIST:
 		case SCTP_SUPPORTED_CHUNK_EXT:
 			if (padded_size > (sizeof(struct sctp_supported_chunk_types_param) + (sizeof(uint8_t) * SCTP_MAX_SUPPORTED_EXT))) {
 				SCTPDBG(SCTP_DEBUG_OUTPUT1, "Invalid size - error chklist %d\n", plen);
 				goto invalid_size;
 			}
 			at += padded_size;
 			break;
 		case SCTP_SUPPORTED_ADDRTYPE:
 			if (padded_size > SCTP_MAX_ADDR_PARAMS_SIZE) {
 				SCTPDBG(SCTP_DEBUG_OUTPUT1, "Invalid size - error supaddrtype %d\n", plen);
 				goto invalid_size;
 			}
 			at += padded_size;
 			break;
 		case SCTP_RANDOM:
 			if (padded_size > (sizeof(struct sctp_auth_random) + SCTP_RANDOM_MAX_SIZE)) {
 				SCTPDBG(SCTP_DEBUG_OUTPUT1, "Invalid size - error random %d\n", plen);
 				goto invalid_size;
 			}
 			at += padded_size;
 			break;
 		case SCTP_SET_PRIM_ADDR:
 		case SCTP_DEL_IP_ADDRESS:
 		case SCTP_ADD_IP_ADDRESS:
 			if ((padded_size != sizeof(struct sctp_asconf_addrv4_param)) &&
 			    (padded_size != sizeof(struct sctp_asconf_addr_param))) {
 				SCTPDBG(SCTP_DEBUG_OUTPUT1, "Invalid size - error setprim %d\n", plen);
 				goto invalid_size;
 			}
 			at += padded_size;
 			break;
 			/* Param's with a fixed size */
 		case SCTP_IPV4_ADDRESS:
 			if (padded_size != sizeof(struct sctp_ipv4addr_param)) {
 				SCTPDBG(SCTP_DEBUG_OUTPUT1, "Invalid size - error ipv4 addr %d\n", plen);
 				goto invalid_size;
 			}
 			at += padded_size;
 			break;
 		case SCTP_IPV6_ADDRESS:
 			if (padded_size != sizeof(struct sctp_ipv6addr_param)) {
 				SCTPDBG(SCTP_DEBUG_OUTPUT1, "Invalid size - error ipv6 addr %d\n", plen);
 				goto invalid_size;
 			}
 			at += padded_size;
 			break;
 		case SCTP_COOKIE_PRESERVE:
 			if (padded_size != sizeof(struct sctp_cookie_perserve_param)) {
 				SCTPDBG(SCTP_DEBUG_OUTPUT1, "Invalid size - error cookie-preserve %d\n", plen);
 				goto invalid_size;
 			}
 			at += padded_size;
 			break;
 		case SCTP_HAS_NAT_SUPPORT:
 			*nat_friendly = 1;
 			/* fall through */
 		case SCTP_PRSCTP_SUPPORTED:
 			if (padded_size != sizeof(struct sctp_paramhdr)) {
 				SCTPDBG(SCTP_DEBUG_OUTPUT1, "Invalid size - error prsctp/nat support %d\n", plen);
 				goto invalid_size;
 			}
 			at += padded_size;
 			break;
 		case SCTP_ECN_CAPABLE:
 			if (padded_size != sizeof(struct sctp_paramhdr)) {
 				SCTPDBG(SCTP_DEBUG_OUTPUT1, "Invalid size - error ecn %d\n", plen);
 				goto invalid_size;
 			}
 			at += padded_size;
 			break;
 		case SCTP_ULP_ADAPTATION:
 			if (padded_size != sizeof(struct sctp_adaptation_layer_indication)) {
 				SCTPDBG(SCTP_DEBUG_OUTPUT1, "Invalid size - error adapatation %d\n", plen);
 				goto invalid_size;
 			}
 			at += padded_size;
 			break;
 		case SCTP_SUCCESS_REPORT:
 			if (padded_size != sizeof(struct sctp_asconf_paramhdr)) {
 				SCTPDBG(SCTP_DEBUG_OUTPUT1, "Invalid size - error success %d\n", plen);
 				goto invalid_size;
 			}
 			at += padded_size;
 			break;
 		case SCTP_HOSTNAME_ADDRESS:
 			{
 				/* We can NOT handle HOST NAME addresses!! */
 				int l_len;
 
 				SCTPDBG(SCTP_DEBUG_OUTPUT1, "Can't handle hostname addresses.. abort processing\n");
 				*abort_processing = 1;
 				if (op_err == NULL) {
 					/* Ok need to try to get a mbuf */
 #ifdef INET6
 					l_len = sizeof(struct ip6_hdr) + sizeof(struct sctphdr) + sizeof(struct sctp_chunkhdr);
 #else
 					l_len = sizeof(struct ip) + sizeof(struct sctphdr) + sizeof(struct sctp_chunkhdr);
 #endif
 					l_len += plen;
 					l_len += sizeof(struct sctp_paramhdr);
 					op_err = sctp_get_mbuf_for_msg(l_len, 0, M_NOWAIT, 1, MT_DATA);
 					if (op_err) {
 						SCTP_BUF_LEN(op_err) = 0;
 						/*
 						 * pre-reserve space for ip
 						 * and sctp header  and
 						 * chunk hdr
 						 */
 #ifdef INET6
 						SCTP_BUF_RESV_UF(op_err, sizeof(struct ip6_hdr));
 #else
 						SCTP_BUF_RESV_UF(op_err, sizeof(struct ip));
 #endif
 						SCTP_BUF_RESV_UF(op_err, sizeof(struct sctphdr));
 						SCTP_BUF_RESV_UF(op_err, sizeof(struct sctp_chunkhdr));
 					}
 				}
 				if (op_err) {
 					/* If we have space */
 					struct sctp_paramhdr s;
 
 					if (err_at % 4) {
 						uint32_t cpthis = 0;
 
 						pad_needed = 4 - (err_at % 4);
 						m_copyback(op_err, err_at, pad_needed, (caddr_t)&cpthis);
 						err_at += pad_needed;
 					}
 					s.param_type = htons(SCTP_CAUSE_UNRESOLVABLE_ADDR);
 					s.param_length = htons(sizeof(s) + plen);
 					m_copyback(op_err, err_at, sizeof(s), (caddr_t)&s);
 					err_at += sizeof(s);
 					phdr = sctp_get_next_param(mat, at, (struct sctp_paramhdr *)tempbuf, min(sizeof(tempbuf), plen));
 					if (phdr == NULL) {
 						sctp_m_freem(op_err);
 						/*
 						 * we are out of memory but
 						 * we still need to have a
 						 * look at what to do (the
 						 * system is in trouble
 						 * though).
 						 */
 						return (NULL);
 					}
 					m_copyback(op_err, err_at, plen, (caddr_t)phdr);
 				}
 				return (op_err);
 				break;
 			}
 		default:
 			/*
 			 * we do not recognize the parameter figure out what
 			 * we do.
 			 */
 			SCTPDBG(SCTP_DEBUG_OUTPUT1, "Hit default param %x\n", ptype);
 			if ((ptype & 0x4000) == 0x4000) {
 				/* Report bit is set?? */
 				SCTPDBG(SCTP_DEBUG_OUTPUT1, "report op err\n");
 				if (op_err == NULL) {
 					int l_len;
 
 					/* Ok need to try to get an mbuf */
 #ifdef INET6
 					l_len = sizeof(struct ip6_hdr) + sizeof(struct sctphdr) + sizeof(struct sctp_chunkhdr);
 #else
 					l_len = sizeof(struct ip) + sizeof(struct sctphdr) + sizeof(struct sctp_chunkhdr);
 #endif
 					l_len += plen;
 					l_len += sizeof(struct sctp_paramhdr);
 					op_err = sctp_get_mbuf_for_msg(l_len, 0, M_NOWAIT, 1, MT_DATA);
 					if (op_err) {
 						SCTP_BUF_LEN(op_err) = 0;
 #ifdef INET6
 						SCTP_BUF_RESV_UF(op_err, sizeof(struct ip6_hdr));
 #else
 						SCTP_BUF_RESV_UF(op_err, sizeof(struct ip));
 #endif
 						SCTP_BUF_RESV_UF(op_err, sizeof(struct sctphdr));
 						SCTP_BUF_RESV_UF(op_err, sizeof(struct sctp_chunkhdr));
 					}
 				}
 				if (op_err) {
 					/* If we have space */
 					struct sctp_paramhdr s;
 
 					if (err_at % 4) {
 						uint32_t cpthis = 0;
 
 						pad_needed = 4 - (err_at % 4);
 						m_copyback(op_err, err_at, pad_needed, (caddr_t)&cpthis);
 						err_at += pad_needed;
 					}
 					s.param_type = htons(SCTP_UNRECOG_PARAM);
 					s.param_length = htons(sizeof(s) + plen);
 					m_copyback(op_err, err_at, sizeof(s), (caddr_t)&s);
 					err_at += sizeof(s);
 					if (plen > sizeof(tempbuf)) {
 						plen = sizeof(tempbuf);
 					}
 					phdr = sctp_get_next_param(mat, at, (struct sctp_paramhdr *)tempbuf, min(sizeof(tempbuf), plen));
 					if (phdr == NULL) {
 						sctp_m_freem(op_err);
 						/*
 						 * we are out of memory but
 						 * we still need to have a
 						 * look at what to do (the
 						 * system is in trouble
 						 * though).
 						 */
 						op_err = NULL;
 						goto more_processing;
 					}
 					m_copyback(op_err, err_at, plen, (caddr_t)phdr);
 					err_at += plen;
 				}
 			}
 	more_processing:
 			if ((ptype & 0x8000) == 0x0000) {
 				SCTPDBG(SCTP_DEBUG_OUTPUT1, "stop proc\n");
 				return (op_err);
 			} else {
 				/* skip this chunk and continue processing */
 				SCTPDBG(SCTP_DEBUG_OUTPUT1, "move on\n");
 				at += SCTP_SIZE32(plen);
 			}
 			break;
 
 		}
 		phdr = sctp_get_next_param(mat, at, &params, sizeof(params));
 	}
 	return (op_err);
 invalid_size:
 	SCTPDBG(SCTP_DEBUG_OUTPUT1, "abort flag set\n");
 	*abort_processing = 1;
 	if ((op_err == NULL) && phdr) {
 		int l_len;
 
 #ifdef INET6
 		l_len = sizeof(struct ip6_hdr) + sizeof(struct sctphdr) + sizeof(struct sctp_chunkhdr);
 #else
 		l_len = sizeof(struct ip) + sizeof(struct sctphdr) + sizeof(struct sctp_chunkhdr);
 #endif
 		l_len += (2 * sizeof(struct sctp_paramhdr));
 		op_err = sctp_get_mbuf_for_msg(l_len, 0, M_NOWAIT, 1, MT_DATA);
 		if (op_err) {
 			SCTP_BUF_LEN(op_err) = 0;
 #ifdef INET6
 			SCTP_BUF_RESV_UF(op_err, sizeof(struct ip6_hdr));
 #else
 			SCTP_BUF_RESV_UF(op_err, sizeof(struct ip));
 #endif
 			SCTP_BUF_RESV_UF(op_err, sizeof(struct sctphdr));
 			SCTP_BUF_RESV_UF(op_err, sizeof(struct sctp_chunkhdr));
 		}
 	}
 	if ((op_err) && phdr) {
 		struct sctp_paramhdr s;
 
 		if (err_at % 4) {
 			uint32_t cpthis = 0;
 
 			pad_needed = 4 - (err_at % 4);
 			m_copyback(op_err, err_at, pad_needed, (caddr_t)&cpthis);
 			err_at += pad_needed;
 		}
 		s.param_type = htons(SCTP_CAUSE_PROTOCOL_VIOLATION);
 		s.param_length = htons(sizeof(s) + sizeof(struct sctp_paramhdr));
 		m_copyback(op_err, err_at, sizeof(s), (caddr_t)&s);
 		err_at += sizeof(s);
 		/* Only copy back the p-hdr that caused the issue */
 		m_copyback(op_err, err_at, sizeof(struct sctp_paramhdr), (caddr_t)phdr);
 	}
 	return (op_err);
 }
 
 static int
 sctp_are_there_new_addresses(struct sctp_association *asoc,
     struct mbuf *in_initpkt, int offset, struct sockaddr *src)
 {
 	/*
 	 * Given a INIT packet, look through the packet to verify that there
 	 * are NO new addresses. As we go through the parameters add reports
 	 * of any un-understood parameters that require an error.  Also we
 	 * must return (1) to drop the packet if we see a un-understood
 	 * parameter that tells us to drop the chunk.
 	 */
 	struct sockaddr *sa_touse;
 	struct sockaddr *sa;
 	struct sctp_paramhdr *phdr, params;
 	uint16_t ptype, plen;
 	uint8_t fnd;
 	struct sctp_nets *net;
 
 #ifdef INET
 	struct sockaddr_in sin4, *sa4;
 
 #endif
 #ifdef INET6
 	struct sockaddr_in6 sin6, *sa6;
 
 #endif
 
 #ifdef INET
 	memset(&sin4, 0, sizeof(sin4));
 	sin4.sin_family = AF_INET;
 	sin4.sin_len = sizeof(sin4);
 #endif
 #ifdef INET6
 	memset(&sin6, 0, sizeof(sin6));
 	sin6.sin6_family = AF_INET6;
 	sin6.sin6_len = sizeof(sin6);
 #endif
 	/* First what about the src address of the pkt ? */
 	fnd = 0;
 	TAILQ_FOREACH(net, &asoc->nets, sctp_next) {
 		sa = (struct sockaddr *)&net->ro._l_addr;
 		if (sa->sa_family == src->sa_family) {
 #ifdef INET
 			if (sa->sa_family == AF_INET) {
 				struct sockaddr_in *src4;
 
 				sa4 = (struct sockaddr_in *)sa;
 				src4 = (struct sockaddr_in *)src;
 				if (sa4->sin_addr.s_addr == src4->sin_addr.s_addr) {
 					fnd = 1;
 					break;
 				}
 			}
 #endif
 #ifdef INET6
 			if (sa->sa_family == AF_INET6) {
 				struct sockaddr_in6 *src6;
 
 				sa6 = (struct sockaddr_in6 *)sa;
 				src6 = (struct sockaddr_in6 *)src;
 				if (SCTP6_ARE_ADDR_EQUAL(sa6, src6)) {
 					fnd = 1;
 					break;
 				}
 			}
 #endif
 		}
 	}
 	if (fnd == 0) {
 		/* New address added! no need to look futher. */
 		return (1);
 	}
 	/* Ok so far lets munge through the rest of the packet */
 	offset += sizeof(struct sctp_init_chunk);
 	phdr = sctp_get_next_param(in_initpkt, offset, &params, sizeof(params));
 	while (phdr) {
 		sa_touse = NULL;
 		ptype = ntohs(phdr->param_type);
 		plen = ntohs(phdr->param_length);
 		switch (ptype) {
 #ifdef INET
 		case SCTP_IPV4_ADDRESS:
 			{
 				struct sctp_ipv4addr_param *p4, p4_buf;
 
 				phdr = sctp_get_next_param(in_initpkt, offset,
 				    (struct sctp_paramhdr *)&p4_buf, sizeof(p4_buf));
 				if (plen != sizeof(struct sctp_ipv4addr_param) ||
 				    phdr == NULL) {
 					return (1);
 				}
 				p4 = (struct sctp_ipv4addr_param *)phdr;
 				sin4.sin_addr.s_addr = p4->addr;
 				sa_touse = (struct sockaddr *)&sin4;
 				break;
 			}
 #endif
 #ifdef INET6
 		case SCTP_IPV6_ADDRESS:
 			{
 				struct sctp_ipv6addr_param *p6, p6_buf;
 
 				phdr = sctp_get_next_param(in_initpkt, offset,
 				    (struct sctp_paramhdr *)&p6_buf, sizeof(p6_buf));
 				if (plen != sizeof(struct sctp_ipv6addr_param) ||
 				    phdr == NULL) {
 					return (1);
 				}
 				p6 = (struct sctp_ipv6addr_param *)phdr;
 				memcpy((caddr_t)&sin6.sin6_addr, p6->addr,
 				    sizeof(p6->addr));
 				sa_touse = (struct sockaddr *)&sin6;
 				break;
 			}
 #endif
 		default:
 			sa_touse = NULL;
 			break;
 		}
 		if (sa_touse) {
 			/* ok, sa_touse points to one to check */
 			fnd = 0;
 			TAILQ_FOREACH(net, &asoc->nets, sctp_next) {
 				sa = (struct sockaddr *)&net->ro._l_addr;
 				if (sa->sa_family != sa_touse->sa_family) {
 					continue;
 				}
 #ifdef INET
 				if (sa->sa_family == AF_INET) {
 					sa4 = (struct sockaddr_in *)sa;
 					if (sa4->sin_addr.s_addr ==
 					    sin4.sin_addr.s_addr) {
 						fnd = 1;
 						break;
 					}
 				}
 #endif
 #ifdef INET6
 				if (sa->sa_family == AF_INET6) {
 					sa6 = (struct sockaddr_in6 *)sa;
 					if (SCTP6_ARE_ADDR_EQUAL(
 					    sa6, &sin6)) {
 						fnd = 1;
 						break;
 					}
 				}
 #endif
 			}
 			if (!fnd) {
 				/* New addr added! no need to look further */
 				return (1);
 			}
 		}
 		offset += SCTP_SIZE32(plen);
 		phdr = sctp_get_next_param(in_initpkt, offset, &params, sizeof(params));
 	}
 	return (0);
 }
 
 /*
  * Given a MBUF chain that was sent into us containing an INIT. Build a
  * INIT-ACK with COOKIE and send back. We assume that the in_initpkt has done
  * a pullup to include IPv6/4header, SCTP header and initial part of INIT
  * message (i.e. the struct sctp_init_msg).
  */
 void
 sctp_send_initiate_ack(struct sctp_inpcb *inp, struct sctp_tcb *stcb,
     struct mbuf *init_pkt, int iphlen, int offset,
     struct sockaddr *src, struct sockaddr *dst,
     struct sctphdr *sh, struct sctp_init_chunk *init_chk,
     uint8_t mflowtype, uint32_t mflowid,
     uint32_t vrf_id, uint16_t port, int hold_inp_lock)
 {
 	struct sctp_association *asoc;
 	struct mbuf *m, *m_tmp, *m_last, *m_cookie, *op_err;
 	struct sctp_init_ack_chunk *initack;
 	struct sctp_adaptation_layer_indication *ali;
 	struct sctp_supported_chunk_types_param *pr_supported;
 	struct sctp_paramhdr *ph;
 	union sctp_sockstore *over_addr;
 	struct sctp_scoping scp;
 
 #ifdef INET
 	struct sockaddr_in *dst4 = (struct sockaddr_in *)dst;
 	struct sockaddr_in *src4 = (struct sockaddr_in *)src;
 	struct sockaddr_in *sin;
 
 #endif
 #ifdef INET6
 	struct sockaddr_in6 *dst6 = (struct sockaddr_in6 *)dst;
 	struct sockaddr_in6 *src6 = (struct sockaddr_in6 *)src;
 	struct sockaddr_in6 *sin6;
 
 #endif
 	struct sockaddr *to;
 	struct sctp_state_cookie stc;
 	struct sctp_nets *net = NULL;
 	uint8_t *signature = NULL;
 	int cnt_inits_to = 0;
 	uint16_t his_limit, i_want;
 	int abort_flag;
 	int nat_friendly = 0;
 	struct socket *so;
 	uint16_t num_ext, chunk_len, padding_len, parameter_len;
 
 	if (stcb) {
 		asoc = &stcb->asoc;
 	} else {
 		asoc = NULL;
 	}
 	if ((asoc != NULL) &&
 	    (SCTP_GET_STATE(asoc) != SCTP_STATE_COOKIE_WAIT) &&
 	    (sctp_are_there_new_addresses(asoc, init_pkt, offset, src))) {
 		/* new addresses, out of here in non-cookie-wait states */
 		/*
 		 * Send a ABORT, we don't add the new address error clause
 		 * though we even set the T bit and copy in the 0 tag.. this
 		 * looks no different than if no listener was present.
 		 */
 		op_err = sctp_generate_cause(SCTP_BASE_SYSCTL(sctp_diag_info_code),
 		    "Address added");
 		sctp_send_abort(init_pkt, iphlen, src, dst, sh, 0, op_err,
 		    mflowtype, mflowid,
 		    vrf_id, port);
 		return;
 	}
 	abort_flag = 0;
 	op_err = sctp_arethere_unrecognized_parameters(init_pkt,
 	    (offset + sizeof(struct sctp_init_chunk)),
 	    &abort_flag, (struct sctp_chunkhdr *)init_chk, &nat_friendly);
 	if (abort_flag) {
 do_a_abort:
 		if (op_err == NULL) {
 			char msg[SCTP_DIAG_INFO_LEN];
 
 			snprintf(msg, sizeof(msg), "%s:%d at %s\n", __FILE__, __LINE__, __FUNCTION__);
 			op_err = sctp_generate_cause(SCTP_BASE_SYSCTL(sctp_diag_info_code),
 			    msg);
 		}
 		sctp_send_abort(init_pkt, iphlen, src, dst, sh,
 		    init_chk->init.initiate_tag, op_err,
 		    mflowtype, mflowid,
 		    vrf_id, port);
 		return;
 	}
 	m = sctp_get_mbuf_for_msg(MCLBYTES, 0, M_NOWAIT, 1, MT_DATA);
 	if (m == NULL) {
 		/* No memory, INIT timer will re-attempt. */
 		if (op_err)
 			sctp_m_freem(op_err);
 		return;
 	}
 	chunk_len = (uint16_t) sizeof(struct sctp_init_ack_chunk);
 	padding_len = 0;
 
 	/*
 	 * We might not overwrite the identification[] completely and on
 	 * some platforms time_entered will contain some padding. Therefore
 	 * zero out the cookie to avoid putting uninitialized memory on the
 	 * wire.
 	 */
 	memset(&stc, 0, sizeof(struct sctp_state_cookie));
 
 	/* the time I built cookie */
 	(void)SCTP_GETTIME_TIMEVAL(&stc.time_entered);
 
 	/* populate any tie tags */
 	if (asoc != NULL) {
 		/* unlock before tag selections */
 		stc.tie_tag_my_vtag = asoc->my_vtag_nonce;
 		stc.tie_tag_peer_vtag = asoc->peer_vtag_nonce;
 		stc.cookie_life = asoc->cookie_life;
 		net = asoc->primary_destination;
 	} else {
 		stc.tie_tag_my_vtag = 0;
 		stc.tie_tag_peer_vtag = 0;
 		/* life I will award this cookie */
 		stc.cookie_life = inp->sctp_ep.def_cookie_life;
 	}
 
 	/* copy in the ports for later check */
 	stc.myport = sh->dest_port;
 	stc.peerport = sh->src_port;
 
 	/*
 	 * If we wanted to honor cookie life extentions, we would add to
 	 * stc.cookie_life. For now we should NOT honor any extension
 	 */
 	stc.site_scope = stc.local_scope = stc.loopback_scope = 0;
 	if (inp->sctp_flags & SCTP_PCB_FLAGS_BOUND_V6) {
 		stc.ipv6_addr_legal = 1;
 		if (SCTP_IPV6_V6ONLY(inp)) {
 			stc.ipv4_addr_legal = 0;
 		} else {
 			stc.ipv4_addr_legal = 1;
 		}
 	} else {
 		stc.ipv6_addr_legal = 0;
 		stc.ipv4_addr_legal = 1;
 	}
 #ifdef SCTP_DONT_DO_PRIVADDR_SCOPE
 	stc.ipv4_scope = 1;
 #else
 	stc.ipv4_scope = 0;
 #endif
 	if (net == NULL) {
 		to = src;
 		switch (dst->sa_family) {
 #ifdef INET
 		case AF_INET:
 			{
 				/* lookup address */
 				stc.address[0] = src4->sin_addr.s_addr;
 				stc.address[1] = 0;
 				stc.address[2] = 0;
 				stc.address[3] = 0;
 				stc.addr_type = SCTP_IPV4_ADDRESS;
 				/* local from address */
 				stc.laddress[0] = dst4->sin_addr.s_addr;
 				stc.laddress[1] = 0;
 				stc.laddress[2] = 0;
 				stc.laddress[3] = 0;
 				stc.laddr_type = SCTP_IPV4_ADDRESS;
 				/* scope_id is only for v6 */
 				stc.scope_id = 0;
 #ifndef SCTP_DONT_DO_PRIVADDR_SCOPE
 				if (IN4_ISPRIVATE_ADDRESS(&src4->sin_addr)) {
 					stc.ipv4_scope = 1;
 				}
 #else
 				stc.ipv4_scope = 1;
 #endif				/* SCTP_DONT_DO_PRIVADDR_SCOPE */
 				/* Must use the address in this case */
 				if (sctp_is_address_on_local_host(src, vrf_id)) {
 					stc.loopback_scope = 1;
 					stc.ipv4_scope = 1;
 					stc.site_scope = 1;
 					stc.local_scope = 0;
 				}
 				break;
 			}
 #endif
 #ifdef INET6
 		case AF_INET6:
 			{
 				stc.addr_type = SCTP_IPV6_ADDRESS;
 				memcpy(&stc.address, &src6->sin6_addr, sizeof(struct in6_addr));
 				stc.scope_id = in6_getscope(&src6->sin6_addr);
 				if (sctp_is_address_on_local_host(src, vrf_id)) {
 					stc.loopback_scope = 1;
 					stc.local_scope = 0;
 					stc.site_scope = 1;
 					stc.ipv4_scope = 1;
 				} else if (IN6_IS_ADDR_LINKLOCAL(&src6->sin6_addr)) {
 					/*
 					 * If the new destination is a
 					 * LINK_LOCAL we must have common
 					 * both site and local scope. Don't
 					 * set local scope though since we
 					 * must depend on the source to be
 					 * added implicitly. We cannot
 					 * assure just because we share one
 					 * link that all links are common.
 					 */
 					stc.local_scope = 0;
 					stc.site_scope = 1;
 					stc.ipv4_scope = 1;
 					/*
 					 * we start counting for the private
 					 * address stuff at 1. since the
 					 * link local we source from won't
 					 * show up in our scoped count.
 					 */
 					cnt_inits_to = 1;
 					/*
 					 * pull out the scope_id from
 					 * incoming pkt
 					 */
 				} else if (IN6_IS_ADDR_SITELOCAL(&src6->sin6_addr)) {
 					/*
 					 * If the new destination is
 					 * SITE_LOCAL then we must have site
 					 * scope in common.
 					 */
 					stc.site_scope = 1;
 				}
 				memcpy(&stc.laddress, &dst6->sin6_addr, sizeof(struct in6_addr));
 				stc.laddr_type = SCTP_IPV6_ADDRESS;
 				break;
 			}
 #endif
 		default:
 			/* TSNH */
 			goto do_a_abort;
 			break;
 		}
 	} else {
 		/* set the scope per the existing tcb */
 
 #ifdef INET6
 		struct sctp_nets *lnet;
 
 #endif
 
 		stc.loopback_scope = asoc->scope.loopback_scope;
 		stc.ipv4_scope = asoc->scope.ipv4_local_scope;
 		stc.site_scope = asoc->scope.site_scope;
 		stc.local_scope = asoc->scope.local_scope;
 #ifdef INET6
 		/* Why do we not consider IPv4 LL addresses? */
 		TAILQ_FOREACH(lnet, &asoc->nets, sctp_next) {
 			if (lnet->ro._l_addr.sin6.sin6_family == AF_INET6) {
 				if (IN6_IS_ADDR_LINKLOCAL(&lnet->ro._l_addr.sin6.sin6_addr)) {
 					/*
 					 * if we have a LL address, start
 					 * counting at 1.
 					 */
 					cnt_inits_to = 1;
 				}
 			}
 		}
 #endif
 		/* use the net pointer */
 		to = (struct sockaddr *)&net->ro._l_addr;
 		switch (to->sa_family) {
 #ifdef INET
 		case AF_INET:
 			sin = (struct sockaddr_in *)to;
 			stc.address[0] = sin->sin_addr.s_addr;
 			stc.address[1] = 0;
 			stc.address[2] = 0;
 			stc.address[3] = 0;
 			stc.addr_type = SCTP_IPV4_ADDRESS;
 			if (net->src_addr_selected == 0) {
 				/*
 				 * strange case here, the INIT should have
 				 * did the selection.
 				 */
 				net->ro._s_addr = sctp_source_address_selection(inp,
 				    stcb, (sctp_route_t *) & net->ro,
 				    net, 0, vrf_id);
 				if (net->ro._s_addr == NULL)
 					return;
 
 				net->src_addr_selected = 1;
 
 			}
 			stc.laddress[0] = net->ro._s_addr->address.sin.sin_addr.s_addr;
 			stc.laddress[1] = 0;
 			stc.laddress[2] = 0;
 			stc.laddress[3] = 0;
 			stc.laddr_type = SCTP_IPV4_ADDRESS;
 			/* scope_id is only for v6 */
 			stc.scope_id = 0;
 			break;
 #endif
 #ifdef INET6
 		case AF_INET6:
 			sin6 = (struct sockaddr_in6 *)to;
 			memcpy(&stc.address, &sin6->sin6_addr,
 			    sizeof(struct in6_addr));
 			stc.addr_type = SCTP_IPV6_ADDRESS;
 			stc.scope_id = sin6->sin6_scope_id;
 			if (net->src_addr_selected == 0) {
 				/*
 				 * strange case here, the INIT should have
 				 * done the selection.
 				 */
 				net->ro._s_addr = sctp_source_address_selection(inp,
 				    stcb, (sctp_route_t *) & net->ro,
 				    net, 0, vrf_id);
 				if (net->ro._s_addr == NULL)
 					return;
 
 				net->src_addr_selected = 1;
 			}
 			memcpy(&stc.laddress, &net->ro._s_addr->address.sin6.sin6_addr,
 			    sizeof(struct in6_addr));
 			stc.laddr_type = SCTP_IPV6_ADDRESS;
 			break;
 #endif
 		}
 	}
 	/* Now lets put the SCTP header in place */
 	initack = mtod(m, struct sctp_init_ack_chunk *);
 	/* Save it off for quick ref */
 	stc.peers_vtag = init_chk->init.initiate_tag;
 	/* who are we */
 	memcpy(stc.identification, SCTP_VERSION_STRING,
 	    min(strlen(SCTP_VERSION_STRING), sizeof(stc.identification)));
 	memset(stc.reserved, 0, SCTP_RESERVE_SPACE);
 	/* now the chunk header */
 	initack->ch.chunk_type = SCTP_INITIATION_ACK;
 	initack->ch.chunk_flags = 0;
 	/* fill in later from mbuf we build */
 	initack->ch.chunk_length = 0;
 	/* place in my tag */
 	if ((asoc != NULL) &&
 	    ((SCTP_GET_STATE(asoc) == SCTP_STATE_COOKIE_WAIT) ||
 	    (SCTP_GET_STATE(asoc) == SCTP_STATE_INUSE) ||
 	    (SCTP_GET_STATE(asoc) == SCTP_STATE_COOKIE_ECHOED))) {
 		/* re-use the v-tags and init-seq here */
 		initack->init.initiate_tag = htonl(asoc->my_vtag);
 		initack->init.initial_tsn = htonl(asoc->init_seq_number);
 	} else {
 		uint32_t vtag, itsn;
 
 		if (hold_inp_lock) {
 			SCTP_INP_INCR_REF(inp);
 			SCTP_INP_RUNLOCK(inp);
 		}
 		if (asoc) {
 			atomic_add_int(&asoc->refcnt, 1);
 			SCTP_TCB_UNLOCK(stcb);
 	new_tag:
 			vtag = sctp_select_a_tag(inp, inp->sctp_lport, sh->src_port, 1);
 			if ((asoc->peer_supports_nat) && (vtag == asoc->my_vtag)) {
 				/*
 				 * Got a duplicate vtag on some guy behind a
 				 * nat make sure we don't use it.
 				 */
 				goto new_tag;
 			}
 			initack->init.initiate_tag = htonl(vtag);
 			/* get a TSN to use too */
 			itsn = sctp_select_initial_TSN(&inp->sctp_ep);
 			initack->init.initial_tsn = htonl(itsn);
 			SCTP_TCB_LOCK(stcb);
 			atomic_add_int(&asoc->refcnt, -1);
 		} else {
 			vtag = sctp_select_a_tag(inp, inp->sctp_lport, sh->src_port, 1);
 			initack->init.initiate_tag = htonl(vtag);
 			/* get a TSN to use too */
 			initack->init.initial_tsn = htonl(sctp_select_initial_TSN(&inp->sctp_ep));
 		}
 		if (hold_inp_lock) {
 			SCTP_INP_RLOCK(inp);
 			SCTP_INP_DECR_REF(inp);
 		}
 	}
 	/* save away my tag to */
 	stc.my_vtag = initack->init.initiate_tag;
 
 	/* set up some of the credits. */
 	so = inp->sctp_socket;
 	if (so == NULL) {
 		/* memory problem */
 		sctp_m_freem(m);
 		return;
 	} else {
 		initack->init.a_rwnd = htonl(max(SCTP_SB_LIMIT_RCV(so), SCTP_MINIMAL_RWND));
 	}
 	/* set what I want */
 	his_limit = ntohs(init_chk->init.num_inbound_streams);
 	/* choose what I want */
 	if (asoc != NULL) {
 		if (asoc->streamoutcnt > inp->sctp_ep.pre_open_stream_count) {
 			i_want = asoc->streamoutcnt;
 		} else {
 			i_want = inp->sctp_ep.pre_open_stream_count;
 		}
 	} else {
 		i_want = inp->sctp_ep.pre_open_stream_count;
 	}
 	if (his_limit < i_want) {
 		/* I Want more :< */
 		initack->init.num_outbound_streams = init_chk->init.num_inbound_streams;
 	} else {
 		/* I can have what I want :> */
 		initack->init.num_outbound_streams = htons(i_want);
 	}
 	/* tell him his limit. */
 	initack->init.num_inbound_streams =
 	    htons(inp->sctp_ep.max_open_streams_intome);
 
 	/* adaptation layer indication parameter */
 	if (inp->sctp_ep.adaptation_layer_indicator_provided) {
 		parameter_len = (uint16_t) sizeof(struct sctp_adaptation_layer_indication);
 		ali = (struct sctp_adaptation_layer_indication *)(mtod(m, caddr_t)+chunk_len);
 		ali->ph.param_type = htons(SCTP_ULP_ADAPTATION);
 		ali->ph.param_length = htons(parameter_len);
 		ali->indication = htonl(inp->sctp_ep.adaptation_layer_indicator);
 		chunk_len += parameter_len;
 	}
 	/* ECN parameter */
 	if (((asoc != NULL) && (asoc->ecn_supported == 1)) ||
 	    ((asoc == NULL) && (inp->ecn_supported == 1))) {
 		parameter_len = (uint16_t) sizeof(struct sctp_paramhdr);
 		ph = (struct sctp_paramhdr *)(mtod(m, caddr_t)+chunk_len);
 		ph->param_type = htons(SCTP_ECN_CAPABLE);
 		ph->param_length = htons(parameter_len);
 		chunk_len += parameter_len;
 	}
 	/* PR-SCTP supported parameter */
 	if (((asoc != NULL) && (asoc->prsctp_supported == 1)) ||
 	    ((asoc == NULL) && (inp->prsctp_supported == 1))) {
 		parameter_len = (uint16_t) sizeof(struct sctp_paramhdr);
 		ph = (struct sctp_paramhdr *)(mtod(m, caddr_t)+chunk_len);
 		ph->param_type = htons(SCTP_PRSCTP_SUPPORTED);
 		ph->param_length = htons(parameter_len);
 		chunk_len += parameter_len;
 	}
 	/* Add NAT friendly parameter */
 	if (nat_friendly) {
 		parameter_len = (uint16_t) sizeof(struct sctp_paramhdr);
 		ph = (struct sctp_paramhdr *)(mtod(m, caddr_t)+chunk_len);
 		ph->param_type = htons(SCTP_HAS_NAT_SUPPORT);
 		ph->param_length = htons(parameter_len);
 		chunk_len += parameter_len;
 	}
 	/* And now tell the peer which extensions we support */
 	num_ext = 0;
 	pr_supported = (struct sctp_supported_chunk_types_param *)(mtod(m, caddr_t)+chunk_len);
 	if (((asoc != NULL) && (asoc->prsctp_supported == 1)) ||
 	    ((asoc == NULL) && (inp->prsctp_supported == 1))) {
 		pr_supported->chunk_types[num_ext++] = SCTP_FORWARD_CUM_TSN;
 	}
 	if (((asoc != NULL) && (asoc->auth_supported == 1)) ||
 	    ((asoc == NULL) && (inp->auth_supported == 1))) {
 		pr_supported->chunk_types[num_ext++] = SCTP_AUTHENTICATION;
 	}
 	if (((asoc != NULL) && (asoc->asconf_supported == 1)) ||
 	    ((asoc == NULL) && (inp->asconf_supported == 1))) {
 		pr_supported->chunk_types[num_ext++] = SCTP_ASCONF;
 		pr_supported->chunk_types[num_ext++] = SCTP_ASCONF_ACK;
 	}
 	if (((asoc != NULL) && (asoc->reconfig_supported == 1)) ||
 	    ((asoc == NULL) && (inp->reconfig_supported == 1))) {
 		pr_supported->chunk_types[num_ext++] = SCTP_STREAM_RESET;
 	}
 	if (((asoc != NULL) && (asoc->nrsack_supported == 1)) ||
 	    ((asoc == NULL) && (inp->nrsack_supported == 1))) {
 		pr_supported->chunk_types[num_ext++] = SCTP_NR_SELECTIVE_ACK;
 	}
 	if (((asoc != NULL) && (asoc->pktdrop_supported == 1)) ||
 	    ((asoc == NULL) && (inp->pktdrop_supported == 1))) {
 		pr_supported->chunk_types[num_ext++] = SCTP_PACKET_DROPPED;
 	}
 	if (num_ext > 0) {
 		parameter_len = (uint16_t) sizeof(struct sctp_supported_chunk_types_param) + num_ext;
 		pr_supported->ph.param_type = htons(SCTP_SUPPORTED_CHUNK_EXT);
 		pr_supported->ph.param_length = htons(parameter_len);
 		padding_len = SCTP_SIZE32(parameter_len) - parameter_len;
 		chunk_len += parameter_len;
 	}
 	/* add authentication parameters */
 	if (((asoc != NULL) && (asoc->auth_supported == 1)) ||
 	    ((asoc == NULL) && (inp->auth_supported == 1))) {
 		struct sctp_auth_random *randp;
 		struct sctp_auth_hmac_algo *hmacs;
 		struct sctp_auth_chunk_list *chunks;
 
 		if (padding_len > 0) {
 			memset(mtod(m, caddr_t)+chunk_len, 0, padding_len);
 			chunk_len += padding_len;
 			padding_len = 0;
 		}
 		/* generate and add RANDOM parameter */
 		randp = (struct sctp_auth_random *)(mtod(m, caddr_t)+chunk_len);
 		parameter_len = (uint16_t) sizeof(struct sctp_auth_random) +
 		    SCTP_AUTH_RANDOM_SIZE_DEFAULT;
 		randp->ph.param_type = htons(SCTP_RANDOM);
 		randp->ph.param_length = htons(parameter_len);
 		SCTP_READ_RANDOM(randp->random_data, SCTP_AUTH_RANDOM_SIZE_DEFAULT);
 		padding_len = SCTP_SIZE32(parameter_len) - parameter_len;
 		chunk_len += parameter_len;
 
 		if (padding_len > 0) {
 			memset(mtod(m, caddr_t)+chunk_len, 0, padding_len);
 			chunk_len += padding_len;
 			padding_len = 0;
 		}
 		/* add HMAC_ALGO parameter */
 		hmacs = (struct sctp_auth_hmac_algo *)(mtod(m, caddr_t)+chunk_len);
 		parameter_len = (uint16_t) sizeof(struct sctp_auth_hmac_algo) +
 		    sctp_serialize_hmaclist(inp->sctp_ep.local_hmacs,
 		    (uint8_t *) hmacs->hmac_ids);
 		hmacs->ph.param_type = htons(SCTP_HMAC_LIST);
 		hmacs->ph.param_length = htons(parameter_len);
 		padding_len = SCTP_SIZE32(parameter_len) - parameter_len;
 		chunk_len += parameter_len;
 
 		if (padding_len > 0) {
 			memset(mtod(m, caddr_t)+chunk_len, 0, padding_len);
 			chunk_len += padding_len;
 			padding_len = 0;
 		}
 		/* add CHUNKS parameter */
 		chunks = (struct sctp_auth_chunk_list *)(mtod(m, caddr_t)+chunk_len);
 		parameter_len = (uint16_t) sizeof(struct sctp_auth_chunk_list) +
 		    sctp_serialize_auth_chunks(inp->sctp_ep.local_auth_chunks,
 		    chunks->chunk_types);
 		chunks->ph.param_type = htons(SCTP_CHUNK_LIST);
 		chunks->ph.param_length = htons(parameter_len);
 		padding_len = SCTP_SIZE32(parameter_len) - parameter_len;
 		chunk_len += parameter_len;
 	}
 	SCTP_BUF_LEN(m) = chunk_len;
 	m_last = m;
 	/* now the addresses */
 	/*
 	 * To optimize this we could put the scoping stuff into a structure
 	 * and remove the individual uint8's from the stc structure. Then we
 	 * could just sifa in the address within the stc.. but for now this
 	 * is a quick hack to get the address stuff teased apart.
 	 */
 	scp.ipv4_addr_legal = stc.ipv4_addr_legal;
 	scp.ipv6_addr_legal = stc.ipv6_addr_legal;
 	scp.loopback_scope = stc.loopback_scope;
 	scp.ipv4_local_scope = stc.ipv4_scope;
 	scp.local_scope = stc.local_scope;
 	scp.site_scope = stc.site_scope;
 	m_last = sctp_add_addresses_to_i_ia(inp, stcb, &scp, m_last,
 	    cnt_inits_to,
 	    &padding_len, &chunk_len);
 	/* padding_len can only be positive, if no addresses have been added */
 	if (padding_len > 0) {
 		memset(mtod(m, caddr_t)+chunk_len, 0, padding_len);
 		chunk_len += padding_len;
 		SCTP_BUF_LEN(m) += padding_len;
 		padding_len = 0;
 	}
 	/* tack on the operational error if present */
 	if (op_err) {
 		parameter_len = 0;
 		for (m_tmp = op_err; m_tmp != NULL; m_tmp = SCTP_BUF_NEXT(m_tmp)) {
 			parameter_len += SCTP_BUF_LEN(m_tmp);
 		}
 		padding_len = SCTP_SIZE32(parameter_len) - parameter_len;
 		SCTP_BUF_NEXT(m_last) = op_err;
 		while (SCTP_BUF_NEXT(m_last) != NULL) {
 			m_last = SCTP_BUF_NEXT(m_last);
 		}
 		chunk_len += parameter_len;
 	}
 	if (padding_len > 0) {
 		m_last = sctp_add_pad_tombuf(m_last, padding_len);
 		if (m_last == NULL) {
 			/* Houston we have a problem, no space */
 			sctp_m_freem(m);
 			return;
 		}
 		chunk_len += padding_len;
 		padding_len = 0;
 	}
 	/* Now we must build a cookie */
 	m_cookie = sctp_add_cookie(init_pkt, offset, m, 0, &stc, &signature);
 	if (m_cookie == NULL) {
 		/* memory problem */
 		sctp_m_freem(m);
 		return;
 	}
 	/* Now append the cookie to the end and update the space/size */
 	SCTP_BUF_NEXT(m_last) = m_cookie;
 	parameter_len = 0;
 	for (m_tmp = m_cookie; m_tmp != NULL; m_tmp = SCTP_BUF_NEXT(m_tmp)) {
 		parameter_len += SCTP_BUF_LEN(m_tmp);
 		if (SCTP_BUF_NEXT(m_tmp) == NULL) {
 			m_last = m_tmp;
 		}
 	}
 	padding_len = SCTP_SIZE32(parameter_len) - parameter_len;
 	chunk_len += parameter_len;
 
 	/*
 	 * Place in the size, but we don't include the last pad (if any) in
 	 * the INIT-ACK.
 	 */
 	initack->ch.chunk_length = htons(chunk_len);
 
 	/*
 	 * Time to sign the cookie, we don't sign over the cookie signature
 	 * though thus we set trailer.
 	 */
 	(void)sctp_hmac_m(SCTP_HMAC,
 	    (uint8_t *) inp->sctp_ep.secret_key[(int)(inp->sctp_ep.current_secret_number)],
 	    SCTP_SECRET_SIZE, m_cookie, sizeof(struct sctp_paramhdr),
 	    (uint8_t *) signature, SCTP_SIGNATURE_SIZE);
 	/*
 	 * We sifa 0 here to NOT set IP_DF if its IPv4, we ignore the return
 	 * here since the timer will drive a retranmission.
 	 */
 	if (padding_len > 0) {
 		if (sctp_add_pad_tombuf(m_last, padding_len) == NULL) {
 			sctp_m_freem(m);
 			return;
 		}
 	}
 	if (stc.loopback_scope) {
 		over_addr = (union sctp_sockstore *)dst;
 	} else {
 		over_addr = NULL;
 	}
 
 	(void)sctp_lowlevel_chunk_output(inp, NULL, NULL, to, m, 0, NULL, 0, 0,
 	    0, 0,
 	    inp->sctp_lport, sh->src_port, init_chk->init.initiate_tag,
 	    port, over_addr,
 	    mflowtype, mflowid,
 	    SCTP_SO_NOT_LOCKED);
 	SCTP_STAT_INCR_COUNTER64(sctps_outcontrolchunks);
 }
 
 
 static void
 sctp_prune_prsctp(struct sctp_tcb *stcb,
     struct sctp_association *asoc,
     struct sctp_sndrcvinfo *srcv,
     int dataout)
 {
 	int freed_spc = 0;
 	struct sctp_tmit_chunk *chk, *nchk;
 
 	SCTP_TCB_LOCK_ASSERT(stcb);
 	if ((asoc->prsctp_supported) &&
 	    (asoc->sent_queue_cnt_removeable > 0)) {
 		TAILQ_FOREACH(chk, &asoc->sent_queue, sctp_next) {
 			/*
 			 * Look for chunks marked with the PR_SCTP flag AND
 			 * the buffer space flag. If the one being sent is
 			 * equal or greater priority then purge the old one
 			 * and free some space.
 			 */
 			if (PR_SCTP_BUF_ENABLED(chk->flags)) {
 				/*
 				 * This one is PR-SCTP AND buffer space
 				 * limited type
 				 */
 				if (chk->rec.data.timetodrop.tv_sec >= (long)srcv->sinfo_timetolive) {
 					/*
 					 * Lower numbers equates to higher
 					 * priority so if the one we are
 					 * looking at has a larger or equal
 					 * priority we want to drop the data
 					 * and NOT retransmit it.
 					 */
 					if (chk->data) {
 						/*
 						 * We release the book_size
 						 * if the mbuf is here
 						 */
 						int ret_spc;
 						uint8_t sent;
 
 						if (chk->sent > SCTP_DATAGRAM_UNSENT)
 							sent = 1;
 						else
 							sent = 0;
 						ret_spc = sctp_release_pr_sctp_chunk(stcb, chk,
 						    sent,
 						    SCTP_SO_LOCKED);
 						freed_spc += ret_spc;
 						if (freed_spc >= dataout) {
 							return;
 						}
 					}	/* if chunk was present */
 				}	/* if of sufficent priority */
 			}	/* if chunk has enabled */
 		}		/* tailqforeach */
 
 		TAILQ_FOREACH_SAFE(chk, &asoc->send_queue, sctp_next, nchk) {
 			/* Here we must move to the sent queue and mark */
 			if (PR_SCTP_BUF_ENABLED(chk->flags)) {
 				if (chk->rec.data.timetodrop.tv_sec >= (long)srcv->sinfo_timetolive) {
 					if (chk->data) {
 						/*
 						 * We release the book_size
 						 * if the mbuf is here
 						 */
 						int ret_spc;
 
 						ret_spc = sctp_release_pr_sctp_chunk(stcb, chk,
 						    0, SCTP_SO_LOCKED);
 
 						freed_spc += ret_spc;
 						if (freed_spc >= dataout) {
 							return;
 						}
 					}	/* end if chk->data */
 				}	/* end if right class */
 			}	/* end if chk pr-sctp */
 		}		/* tailqforeachsafe (chk) */
 	}			/* if enabled in asoc */
 }
 
 int
 sctp_get_frag_point(struct sctp_tcb *stcb,
     struct sctp_association *asoc)
 {
 	int siz, ovh;
 
 	/*
 	 * For endpoints that have both v6 and v4 addresses we must reserve
 	 * room for the ipv6 header, for those that are only dealing with V4
 	 * we use a larger frag point.
 	 */
 	if (stcb->sctp_ep->sctp_flags & SCTP_PCB_FLAGS_BOUND_V6) {
 		ovh = SCTP_MED_OVERHEAD;
 	} else {
 		ovh = SCTP_MED_V4_OVERHEAD;
 	}
 
 	if (stcb->asoc.sctp_frag_point > asoc->smallest_mtu)
 		siz = asoc->smallest_mtu - ovh;
 	else
 		siz = (stcb->asoc.sctp_frag_point - ovh);
 	/*
 	 * if (siz > (MCLBYTES-sizeof(struct sctp_data_chunk))) {
 	 */
 	/* A data chunk MUST fit in a cluster */
 	/* siz = (MCLBYTES - sizeof(struct sctp_data_chunk)); */
 	/* } */
 
 	/* adjust for an AUTH chunk if DATA requires auth */
 	if (sctp_auth_is_required_chunk(SCTP_DATA, stcb->asoc.peer_auth_chunks))
 		siz -= sctp_get_auth_chunk_len(stcb->asoc.peer_hmac_id);
 
 	if (siz % 4) {
 		/* make it an even word boundary please */
 		siz -= (siz % 4);
 	}
 	return (siz);
 }
 
 static void
 sctp_set_prsctp_policy(struct sctp_stream_queue_pending *sp)
 {
 	/*
 	 * We assume that the user wants PR_SCTP_TTL if the user provides a
 	 * positive lifetime but does not specify any PR_SCTP policy.
 	 */
 	if (PR_SCTP_ENABLED(sp->sinfo_flags)) {
 		sp->act_flags |= PR_SCTP_POLICY(sp->sinfo_flags);
 	} else if (sp->timetolive > 0) {
 		sp->sinfo_flags |= SCTP_PR_SCTP_TTL;
 		sp->act_flags |= PR_SCTP_POLICY(sp->sinfo_flags);
 	} else {
 		return;
 	}
 	switch (PR_SCTP_POLICY(sp->sinfo_flags)) {
 	case CHUNK_FLAGS_PR_SCTP_BUF:
 		/*
 		 * Time to live is a priority stored in tv_sec when doing
 		 * the buffer drop thing.
 		 */
 		sp->ts.tv_sec = sp->timetolive;
 		sp->ts.tv_usec = 0;
 		break;
 	case CHUNK_FLAGS_PR_SCTP_TTL:
 		{
 			struct timeval tv;
 
 			(void)SCTP_GETTIME_TIMEVAL(&sp->ts);
 			tv.tv_sec = sp->timetolive / 1000;
 			tv.tv_usec = (sp->timetolive * 1000) % 1000000;
 			/*
 			 * TODO sctp_constants.h needs alternative time
 			 * macros when _KERNEL is undefined.
 			 */
 			timevaladd(&sp->ts, &tv);
 		}
 		break;
 	case CHUNK_FLAGS_PR_SCTP_RTX:
 		/*
 		 * Time to live is a the number or retransmissions stored in
 		 * tv_sec.
 		 */
 		sp->ts.tv_sec = sp->timetolive;
 		sp->ts.tv_usec = 0;
 		break;
 	default:
 		SCTPDBG(SCTP_DEBUG_USRREQ1,
 		    "Unknown PR_SCTP policy %u.\n",
 		    PR_SCTP_POLICY(sp->sinfo_flags));
 		break;
 	}
 }
 
 static int
 sctp_msg_append(struct sctp_tcb *stcb,
     struct sctp_nets *net,
     struct mbuf *m,
     struct sctp_sndrcvinfo *srcv, int hold_stcb_lock)
 {
 	int error = 0;
 	struct mbuf *at;
 	struct sctp_stream_queue_pending *sp = NULL;
 	struct sctp_stream_out *strm;
 
 	/*
 	 * Given an mbuf chain, put it into the association send queue and
 	 * place it on the wheel
 	 */
 	if (srcv->sinfo_stream >= stcb->asoc.streamoutcnt) {
 		/* Invalid stream number */
 		SCTP_LTRACE_ERR_RET_PKT(m, NULL, stcb, net, SCTP_FROM_SCTP_OUTPUT, EINVAL);
 		error = EINVAL;
 		goto out_now;
 	}
 	if ((stcb->asoc.stream_locked) &&
 	    (stcb->asoc.stream_locked_on != srcv->sinfo_stream)) {
 		SCTP_LTRACE_ERR_RET_PKT(m, NULL, stcb, net, SCTP_FROM_SCTP_OUTPUT, EINVAL);
 		error = EINVAL;
 		goto out_now;
 	}
 	strm = &stcb->asoc.strmout[srcv->sinfo_stream];
 	/* Now can we send this? */
 	if ((SCTP_GET_STATE(&stcb->asoc) == SCTP_STATE_SHUTDOWN_SENT) ||
 	    (SCTP_GET_STATE(&stcb->asoc) == SCTP_STATE_SHUTDOWN_ACK_SENT) ||
 	    (SCTP_GET_STATE(&stcb->asoc) == SCTP_STATE_SHUTDOWN_RECEIVED) ||
 	    (stcb->asoc.state & SCTP_STATE_SHUTDOWN_PENDING)) {
 		/* got data while shutting down */
 		SCTP_LTRACE_ERR_RET(NULL, stcb, NULL, SCTP_FROM_SCTP_OUTPUT, ECONNRESET);
 		error = ECONNRESET;
 		goto out_now;
 	}
 	sctp_alloc_a_strmoq(stcb, sp);
 	if (sp == NULL) {
 		SCTP_LTRACE_ERR_RET(NULL, stcb, NULL, SCTP_FROM_SCTP_OUTPUT, ENOMEM);
 		error = ENOMEM;
 		goto out_now;
 	}
 	sp->sinfo_flags = srcv->sinfo_flags;
 	sp->timetolive = srcv->sinfo_timetolive;
 	sp->ppid = srcv->sinfo_ppid;
 	sp->context = srcv->sinfo_context;
 	if (sp->sinfo_flags & SCTP_ADDR_OVER) {
 		sp->net = net;
 		atomic_add_int(&sp->net->ref_count, 1);
 	} else {
 		sp->net = NULL;
 	}
 	(void)SCTP_GETTIME_TIMEVAL(&sp->ts);
 	sp->stream = srcv->sinfo_stream;
 	sp->msg_is_complete = 1;
 	sp->sender_all_done = 1;
 	sp->some_taken = 0;
 	sp->data = m;
 	sp->tail_mbuf = NULL;
 	sctp_set_prsctp_policy(sp);
 	/*
 	 * We could in theory (for sendall) sifa the length in, but we would
 	 * still have to hunt through the chain since we need to setup the
 	 * tail_mbuf
 	 */
 	sp->length = 0;
 	for (at = m; at; at = SCTP_BUF_NEXT(at)) {
 		if (SCTP_BUF_NEXT(at) == NULL)
 			sp->tail_mbuf = at;
 		sp->length += SCTP_BUF_LEN(at);
 	}
 	if (srcv->sinfo_keynumber_valid) {
 		sp->auth_keyid = srcv->sinfo_keynumber;
 	} else {
 		sp->auth_keyid = stcb->asoc.authinfo.active_keyid;
 	}
 	if (sctp_auth_is_required_chunk(SCTP_DATA, stcb->asoc.peer_auth_chunks)) {
 		sctp_auth_key_acquire(stcb, sp->auth_keyid);
 		sp->holds_key_ref = 1;
 	}
 	if (hold_stcb_lock == 0) {
 		SCTP_TCB_SEND_LOCK(stcb);
 	}
 	sctp_snd_sb_alloc(stcb, sp->length);
 	atomic_add_int(&stcb->asoc.stream_queue_cnt, 1);
 	TAILQ_INSERT_TAIL(&strm->outqueue, sp, next);
 	stcb->asoc.ss_functions.sctp_ss_add_to_stream(stcb, &stcb->asoc, strm, sp, 1);
 	m = NULL;
 	if (hold_stcb_lock == 0) {
 		SCTP_TCB_SEND_UNLOCK(stcb);
 	}
 out_now:
 	if (m) {
 		sctp_m_freem(m);
 	}
 	return (error);
 }
 
 
 static struct mbuf *
 sctp_copy_mbufchain(struct mbuf *clonechain,
     struct mbuf *outchain,
     struct mbuf **endofchain,
     int can_take_mbuf,
     int sizeofcpy,
     uint8_t copy_by_ref)
 {
 	struct mbuf *m;
 	struct mbuf *appendchain;
 	caddr_t cp;
 	int len;
 
 	if (endofchain == NULL) {
 		/* error */
 error_out:
 		if (outchain)
 			sctp_m_freem(outchain);
 		return (NULL);
 	}
 	if (can_take_mbuf) {
 		appendchain = clonechain;
 	} else {
 		if (!copy_by_ref &&
 		    (sizeofcpy <= (int)((((SCTP_BASE_SYSCTL(sctp_mbuf_threshold_count) - 1) * MLEN) + MHLEN)))
 		    ) {
 			/* Its not in a cluster */
 			if (*endofchain == NULL) {
 				/* lets get a mbuf cluster */
 				if (outchain == NULL) {
 					/* This is the general case */
 			new_mbuf:
 					outchain = sctp_get_mbuf_for_msg(MCLBYTES, 0, M_NOWAIT, 1, MT_HEADER);
 					if (outchain == NULL) {
 						goto error_out;
 					}
 					SCTP_BUF_LEN(outchain) = 0;
 					*endofchain = outchain;
 					/* get the prepend space */
 					SCTP_BUF_RESV_UF(outchain, (SCTP_FIRST_MBUF_RESV + 4));
 				} else {
 					/*
 					 * We really should not get a NULL
 					 * in endofchain
 					 */
 					/* find end */
 					m = outchain;
 					while (m) {
 						if (SCTP_BUF_NEXT(m) == NULL) {
 							*endofchain = m;
 							break;
 						}
 						m = SCTP_BUF_NEXT(m);
 					}
 					/* sanity */
 					if (*endofchain == NULL) {
 						/*
 						 * huh, TSNH XXX maybe we
 						 * should panic
 						 */
 						sctp_m_freem(outchain);
 						goto new_mbuf;
 					}
 				}
 				/* get the new end of length */
 				len = M_TRAILINGSPACE(*endofchain);
 			} else {
 				/* how much is left at the end? */
 				len = M_TRAILINGSPACE(*endofchain);
 			}
 			/* Find the end of the data, for appending */
 			cp = (mtod((*endofchain), caddr_t)+SCTP_BUF_LEN((*endofchain)));
 
 			/* Now lets copy it out */
 			if (len >= sizeofcpy) {
 				/* It all fits, copy it in */
 				m_copydata(clonechain, 0, sizeofcpy, cp);
 				SCTP_BUF_LEN((*endofchain)) += sizeofcpy;
 			} else {
 				/* fill up the end of the chain */
 				if (len > 0) {
 					m_copydata(clonechain, 0, len, cp);
 					SCTP_BUF_LEN((*endofchain)) += len;
 					/* now we need another one */
 					sizeofcpy -= len;
 				}
 				m = sctp_get_mbuf_for_msg(MCLBYTES, 0, M_NOWAIT, 1, MT_HEADER);
 				if (m == NULL) {
 					/* We failed */
 					goto error_out;
 				}
 				SCTP_BUF_NEXT((*endofchain)) = m;
 				*endofchain = m;
 				cp = mtod((*endofchain), caddr_t);
 				m_copydata(clonechain, len, sizeofcpy, cp);
 				SCTP_BUF_LEN((*endofchain)) += sizeofcpy;
 			}
 			return (outchain);
 		} else {
 			/* copy the old fashion way */
 			appendchain = SCTP_M_COPYM(clonechain, 0, M_COPYALL, M_NOWAIT);
 #ifdef SCTP_MBUF_LOGGING
 			if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_MBUF_LOGGING_ENABLE) {
 				sctp_log_mbc(appendchain, SCTP_MBUF_ICOPY);
 			}
 #endif
 		}
 	}
 	if (appendchain == NULL) {
 		/* error */
 		if (outchain)
 			sctp_m_freem(outchain);
 		return (NULL);
 	}
 	if (outchain) {
 		/* tack on to the end */
 		if (*endofchain != NULL) {
 			SCTP_BUF_NEXT(((*endofchain))) = appendchain;
 		} else {
 			m = outchain;
 			while (m) {
 				if (SCTP_BUF_NEXT(m) == NULL) {
 					SCTP_BUF_NEXT(m) = appendchain;
 					break;
 				}
 				m = SCTP_BUF_NEXT(m);
 			}
 		}
 		/*
 		 * save off the end and update the end-chain postion
 		 */
 		m = appendchain;
 		while (m) {
 			if (SCTP_BUF_NEXT(m) == NULL) {
 				*endofchain = m;
 				break;
 			}
 			m = SCTP_BUF_NEXT(m);
 		}
 		return (outchain);
 	} else {
 		/* save off the end and update the end-chain postion */
 		m = appendchain;
 		while (m) {
 			if (SCTP_BUF_NEXT(m) == NULL) {
 				*endofchain = m;
 				break;
 			}
 			m = SCTP_BUF_NEXT(m);
 		}
 		return (appendchain);
 	}
 }
 
 static int
 sctp_med_chunk_output(struct sctp_inpcb *inp,
     struct sctp_tcb *stcb,
     struct sctp_association *asoc,
     int *num_out,
     int *reason_code,
     int control_only, int from_where,
     struct timeval *now, int *now_filled, int frag_point, int so_locked
 #if !defined(__APPLE__) && !defined(SCTP_SO_LOCK_TESTING)
     SCTP_UNUSED
 #endif
 );
 
 static void
 sctp_sendall_iterator(struct sctp_inpcb *inp, struct sctp_tcb *stcb, void *ptr,
     uint32_t val SCTP_UNUSED)
 {
 	struct sctp_copy_all *ca;
 	struct mbuf *m;
 	int ret = 0;
 	int added_control = 0;
 	int un_sent, do_chunk_output = 1;
 	struct sctp_association *asoc;
 	struct sctp_nets *net;
 
 	ca = (struct sctp_copy_all *)ptr;
 	if (ca->m == NULL) {
 		return;
 	}
 	if (ca->inp != inp) {
 		/* TSNH */
 		return;
 	}
 	if (ca->sndlen > 0) {
 		m = SCTP_M_COPYM(ca->m, 0, M_COPYALL, M_NOWAIT);
 		if (m == NULL) {
 			/* can't copy so we are done */
 			ca->cnt_failed++;
 			return;
 		}
 #ifdef SCTP_MBUF_LOGGING
 		if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_MBUF_LOGGING_ENABLE) {
 			sctp_log_mbc(m, SCTP_MBUF_ICOPY);
 		}
 #endif
 	} else {
 		m = NULL;
 	}
 	SCTP_TCB_LOCK_ASSERT(stcb);
 	if (stcb->asoc.alternate) {
 		net = stcb->asoc.alternate;
 	} else {
 		net = stcb->asoc.primary_destination;
 	}
 	if (ca->sndrcv.sinfo_flags & SCTP_ABORT) {
 		/* Abort this assoc with m as the user defined reason */
 		if (m != NULL) {
 			SCTP_BUF_PREPEND(m, sizeof(struct sctp_paramhdr), M_NOWAIT);
 		} else {
 			m = sctp_get_mbuf_for_msg(sizeof(struct sctp_paramhdr),
 			    0, M_NOWAIT, 1, MT_DATA);
 			SCTP_BUF_LEN(m) = sizeof(struct sctp_paramhdr);
 		}
 		if (m != NULL) {
 			struct sctp_paramhdr *ph;
 
 			ph = mtod(m, struct sctp_paramhdr *);
 			ph->param_type = htons(SCTP_CAUSE_USER_INITIATED_ABT);
 			ph->param_length = htons(sizeof(struct sctp_paramhdr) + ca->sndlen);
 		}
 		/*
 		 * We add one here to keep the assoc from dis-appearing on
 		 * us.
 		 */
 		atomic_add_int(&stcb->asoc.refcnt, 1);
 		sctp_abort_an_association(inp, stcb, m, SCTP_SO_NOT_LOCKED);
 		/*
 		 * sctp_abort_an_association calls sctp_free_asoc() free
 		 * association will NOT free it since we incremented the
 		 * refcnt .. we do this to prevent it being freed and things
 		 * getting tricky since we could end up (from free_asoc)
 		 * calling inpcb_free which would get a recursive lock call
 		 * to the iterator lock.. But as a consequence of that the
 		 * stcb will return to us un-locked.. since free_asoc
 		 * returns with either no TCB or the TCB unlocked, we must
 		 * relock.. to unlock in the iterator timer :-0
 		 */
 		SCTP_TCB_LOCK(stcb);
 		atomic_add_int(&stcb->asoc.refcnt, -1);
 		goto no_chunk_output;
 	} else {
 		if (m) {
 			ret = sctp_msg_append(stcb, net, m,
 			    &ca->sndrcv, 1);
 		}
 		asoc = &stcb->asoc;
 		if (ca->sndrcv.sinfo_flags & SCTP_EOF) {
 			/* shutdown this assoc */
 			int cnt;
 
 			cnt = sctp_is_there_unsent_data(stcb, SCTP_SO_NOT_LOCKED);
 
 			if (TAILQ_EMPTY(&asoc->send_queue) &&
 			    TAILQ_EMPTY(&asoc->sent_queue) &&
 			    (cnt == 0)) {
 				if (asoc->locked_on_sending) {
 					goto abort_anyway;
 				}
 				/*
 				 * there is nothing queued to send, so I'm
 				 * done...
 				 */
 				if ((SCTP_GET_STATE(asoc) != SCTP_STATE_SHUTDOWN_SENT) &&
 				    (SCTP_GET_STATE(asoc) != SCTP_STATE_SHUTDOWN_RECEIVED) &&
 				    (SCTP_GET_STATE(asoc) != SCTP_STATE_SHUTDOWN_ACK_SENT)) {
 					/*
 					 * only send SHUTDOWN the first time
 					 * through
 					 */
 					if (SCTP_GET_STATE(asoc) == SCTP_STATE_OPEN) {
 						SCTP_STAT_DECR_GAUGE32(sctps_currestab);
 					}
 					SCTP_SET_STATE(asoc, SCTP_STATE_SHUTDOWN_SENT);
 					SCTP_CLEAR_SUBSTATE(asoc, SCTP_STATE_SHUTDOWN_PENDING);
 					sctp_stop_timers_for_shutdown(stcb);
 					sctp_send_shutdown(stcb, net);
 					sctp_timer_start(SCTP_TIMER_TYPE_SHUTDOWN, stcb->sctp_ep, stcb,
 					    net);
 					sctp_timer_start(SCTP_TIMER_TYPE_SHUTDOWNGUARD, stcb->sctp_ep, stcb,
 					    asoc->primary_destination);
 					added_control = 1;
 					do_chunk_output = 0;
 				}
 			} else {
 				/*
 				 * we still got (or just got) data to send,
 				 * so set SHUTDOWN_PENDING
 				 */
 				/*
 				 * XXX sockets draft says that SCTP_EOF
 				 * should be sent with no data.  currently,
 				 * we will allow user data to be sent first
 				 * and move to SHUTDOWN-PENDING
 				 */
 				if ((SCTP_GET_STATE(asoc) != SCTP_STATE_SHUTDOWN_SENT) &&
 				    (SCTP_GET_STATE(asoc) != SCTP_STATE_SHUTDOWN_RECEIVED) &&
 				    (SCTP_GET_STATE(asoc) != SCTP_STATE_SHUTDOWN_ACK_SENT)) {
 					if (asoc->locked_on_sending) {
 						/*
 						 * Locked to send out the
 						 * data
 						 */
 						struct sctp_stream_queue_pending *sp;
 
 						sp = TAILQ_LAST(&asoc->locked_on_sending->outqueue, sctp_streamhead);
 						if (sp) {
 							if ((sp->length == 0) && (sp->msg_is_complete == 0))
 								asoc->state |= SCTP_STATE_PARTIAL_MSG_LEFT;
 						}
 					}
 					asoc->state |= SCTP_STATE_SHUTDOWN_PENDING;
 					if (TAILQ_EMPTY(&asoc->send_queue) &&
 					    TAILQ_EMPTY(&asoc->sent_queue) &&
 					    (asoc->state & SCTP_STATE_PARTIAL_MSG_LEFT)) {
 				abort_anyway:
 						atomic_add_int(&stcb->asoc.refcnt, 1);
 						sctp_abort_an_association(stcb->sctp_ep, stcb,
 						    NULL, SCTP_SO_NOT_LOCKED);
 						atomic_add_int(&stcb->asoc.refcnt, -1);
 						goto no_chunk_output;
 					}
 					sctp_timer_start(SCTP_TIMER_TYPE_SHUTDOWNGUARD, stcb->sctp_ep, stcb,
 					    asoc->primary_destination);
 				}
 			}
 
 		}
 	}
 	un_sent = ((stcb->asoc.total_output_queue_size - stcb->asoc.total_flight) +
 	    (stcb->asoc.stream_queue_cnt * sizeof(struct sctp_data_chunk)));
 
 	if ((sctp_is_feature_off(inp, SCTP_PCB_FLAGS_NODELAY)) &&
 	    (stcb->asoc.total_flight > 0) &&
 	    (un_sent < (int)(stcb->asoc.smallest_mtu - SCTP_MIN_OVERHEAD))) {
 		do_chunk_output = 0;
 	}
 	if (do_chunk_output)
 		sctp_chunk_output(inp, stcb, SCTP_OUTPUT_FROM_USR_SEND, SCTP_SO_NOT_LOCKED);
 	else if (added_control) {
 		int num_out, reason, now_filled = 0;
 		struct timeval now;
 		int frag_point;
 
 		frag_point = sctp_get_frag_point(stcb, &stcb->asoc);
 		(void)sctp_med_chunk_output(inp, stcb, &stcb->asoc, &num_out,
 		    &reason, 1, 1, &now, &now_filled, frag_point, SCTP_SO_NOT_LOCKED);
 	}
 no_chunk_output:
 	if (ret) {
 		ca->cnt_failed++;
 	} else {
 		ca->cnt_sent++;
 	}
 }
 
 static void
 sctp_sendall_completes(void *ptr, uint32_t val SCTP_UNUSED)
 {
 	struct sctp_copy_all *ca;
 
 	ca = (struct sctp_copy_all *)ptr;
 	/*
 	 * Do a notify here? Kacheong suggests that the notify be done at
 	 * the send time.. so you would push up a notification if any send
 	 * failed. Don't know if this is feasable since the only failures we
 	 * have is "memory" related and if you cannot get an mbuf to send
 	 * the data you surely can't get an mbuf to send up to notify the
 	 * user you can't send the data :->
 	 */
 
 	/* now free everything */
 	sctp_m_freem(ca->m);
 	SCTP_FREE(ca, SCTP_M_COPYAL);
 }
 
 static struct mbuf *
 sctp_copy_out_all(struct uio *uio, int len)
 {
 	struct mbuf *ret, *at;
 	int left, willcpy, cancpy, error;
 
 	ret = sctp_get_mbuf_for_msg(MCLBYTES, 0, M_WAITOK, 1, MT_DATA);
 	if (ret == NULL) {
 		/* TSNH */
 		return (NULL);
 	}
 	left = len;
 	SCTP_BUF_LEN(ret) = 0;
 	/* save space for the data chunk header */
 	cancpy = M_TRAILINGSPACE(ret);
 	willcpy = min(cancpy, left);
 	at = ret;
 	while (left > 0) {
 		/* Align data to the end */
 		error = uiomove(mtod(at, caddr_t), willcpy, uio);
 		if (error) {
 	err_out_now:
 			sctp_m_freem(at);
 			return (NULL);
 		}
 		SCTP_BUF_LEN(at) = willcpy;
 		SCTP_BUF_NEXT_PKT(at) = SCTP_BUF_NEXT(at) = 0;
 		left -= willcpy;
 		if (left > 0) {
 			SCTP_BUF_NEXT(at) = sctp_get_mbuf_for_msg(left, 0, M_WAITOK, 1, MT_DATA);
 			if (SCTP_BUF_NEXT(at) == NULL) {
 				goto err_out_now;
 			}
 			at = SCTP_BUF_NEXT(at);
 			SCTP_BUF_LEN(at) = 0;
 			cancpy = M_TRAILINGSPACE(at);
 			willcpy = min(cancpy, left);
 		}
 	}
 	return (ret);
 }
 
 static int
 sctp_sendall(struct sctp_inpcb *inp, struct uio *uio, struct mbuf *m,
     struct sctp_sndrcvinfo *srcv)
 {
 	int ret;
 	struct sctp_copy_all *ca;
 
 	SCTP_MALLOC(ca, struct sctp_copy_all *, sizeof(struct sctp_copy_all),
 	    SCTP_M_COPYAL);
 	if (ca == NULL) {
 		sctp_m_freem(m);
 		SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_OUTPUT, ENOMEM);
 		return (ENOMEM);
 	}
 	memset(ca, 0, sizeof(struct sctp_copy_all));
 
 	ca->inp = inp;
 	if (srcv) {
 		memcpy(&ca->sndrcv, srcv, sizeof(struct sctp_nonpad_sndrcvinfo));
 	}
 	/*
 	 * take off the sendall flag, it would be bad if we failed to do
 	 * this :-0
 	 */
 	ca->sndrcv.sinfo_flags &= ~SCTP_SENDALL;
 	/* get length and mbuf chain */
 	if (uio) {
 		ca->sndlen = uio->uio_resid;
 		ca->m = sctp_copy_out_all(uio, ca->sndlen);
 		if (ca->m == NULL) {
 			SCTP_FREE(ca, SCTP_M_COPYAL);
 			SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_OUTPUT, ENOMEM);
 			return (ENOMEM);
 		}
 	} else {
 		/* Gather the length of the send */
 		struct mbuf *mat;
 
 		ca->sndlen = 0;
 		for (mat = m; mat; mat = SCTP_BUF_NEXT(mat)) {
 			ca->sndlen += SCTP_BUF_LEN(mat);
 		}
 	}
 	ret = sctp_initiate_iterator(NULL, sctp_sendall_iterator, NULL,
 	    SCTP_PCB_ANY_FLAGS, SCTP_PCB_ANY_FEATURES,
 	    SCTP_ASOC_ANY_STATE,
 	    (void *)ca, 0,
 	    sctp_sendall_completes, inp, 1);
 	if (ret) {
 		SCTP_PRINTF("Failed to initiate iterator for sendall\n");
 		SCTP_FREE(ca, SCTP_M_COPYAL);
 		SCTP_LTRACE_ERR_RET_PKT(m, inp, NULL, NULL, SCTP_FROM_SCTP_OUTPUT, EFAULT);
 		return (EFAULT);
 	}
 	return (0);
 }
 
 
 void
 sctp_toss_old_cookies(struct sctp_tcb *stcb, struct sctp_association *asoc)
 {
 	struct sctp_tmit_chunk *chk, *nchk;
 
 	TAILQ_FOREACH_SAFE(chk, &asoc->control_send_queue, sctp_next, nchk) {
 		if (chk->rec.chunk_id.id == SCTP_COOKIE_ECHO) {
 			TAILQ_REMOVE(&asoc->control_send_queue, chk, sctp_next);
 			if (chk->data) {
 				sctp_m_freem(chk->data);
 				chk->data = NULL;
 			}
 			asoc->ctrl_queue_cnt--;
 			sctp_free_a_chunk(stcb, chk, SCTP_SO_NOT_LOCKED);
 		}
 	}
 }
 
 void
 sctp_toss_old_asconf(struct sctp_tcb *stcb)
 {
 	struct sctp_association *asoc;
 	struct sctp_tmit_chunk *chk, *nchk;
 	struct sctp_asconf_chunk *acp;
 
 	asoc = &stcb->asoc;
 	TAILQ_FOREACH_SAFE(chk, &asoc->asconf_send_queue, sctp_next, nchk) {
 		/* find SCTP_ASCONF chunk in queue */
 		if (chk->rec.chunk_id.id == SCTP_ASCONF) {
 			if (chk->data) {
 				acp = mtod(chk->data, struct sctp_asconf_chunk *);
 				if (SCTP_TSN_GT(ntohl(acp->serial_number), asoc->asconf_seq_out_acked)) {
 					/* Not Acked yet */
 					break;
 				}
 			}
 			TAILQ_REMOVE(&asoc->asconf_send_queue, chk, sctp_next);
 			if (chk->data) {
 				sctp_m_freem(chk->data);
 				chk->data = NULL;
 			}
 			asoc->ctrl_queue_cnt--;
 			sctp_free_a_chunk(stcb, chk, SCTP_SO_NOT_LOCKED);
 		}
 	}
 }
 
 
 static void
 sctp_clean_up_datalist(struct sctp_tcb *stcb,
     struct sctp_association *asoc,
     struct sctp_tmit_chunk **data_list,
     int bundle_at,
     struct sctp_nets *net)
 {
 	int i;
 	struct sctp_tmit_chunk *tp1;
 
 	for (i = 0; i < bundle_at; i++) {
 		/* off of the send queue */
 		TAILQ_REMOVE(&asoc->send_queue, data_list[i], sctp_next);
 		asoc->send_queue_cnt--;
 		if (i > 0) {
 			/*
 			 * Any chunk NOT 0 you zap the time chunk 0 gets
 			 * zapped or set based on if a RTO measurment is
 			 * needed.
 			 */
 			data_list[i]->do_rtt = 0;
 		}
 		/* record time */
 		data_list[i]->sent_rcv_time = net->last_sent_time;
 		data_list[i]->rec.data.cwnd_at_send = net->cwnd;
 		data_list[i]->rec.data.fast_retran_tsn = data_list[i]->rec.data.TSN_seq;
 		if (data_list[i]->whoTo == NULL) {
 			data_list[i]->whoTo = net;
 			atomic_add_int(&net->ref_count, 1);
 		}
 		/* on to the sent queue */
 		tp1 = TAILQ_LAST(&asoc->sent_queue, sctpchunk_listhead);
 		if ((tp1) && SCTP_TSN_GT(tp1->rec.data.TSN_seq, data_list[i]->rec.data.TSN_seq)) {
 			struct sctp_tmit_chunk *tpp;
 
 			/* need to move back */
 	back_up_more:
 			tpp = TAILQ_PREV(tp1, sctpchunk_listhead, sctp_next);
 			if (tpp == NULL) {
 				TAILQ_INSERT_BEFORE(tp1, data_list[i], sctp_next);
 				goto all_done;
 			}
 			tp1 = tpp;
 			if (SCTP_TSN_GT(tp1->rec.data.TSN_seq, data_list[i]->rec.data.TSN_seq)) {
 				goto back_up_more;
 			}
 			TAILQ_INSERT_AFTER(&asoc->sent_queue, tp1, data_list[i], sctp_next);
 		} else {
 			TAILQ_INSERT_TAIL(&asoc->sent_queue,
 			    data_list[i],
 			    sctp_next);
 		}
 all_done:
 		/* This does not lower until the cum-ack passes it */
 		asoc->sent_queue_cnt++;
 		if ((asoc->peers_rwnd <= 0) &&
 		    (asoc->total_flight == 0) &&
 		    (bundle_at == 1)) {
 			/* Mark the chunk as being a window probe */
 			SCTP_STAT_INCR(sctps_windowprobed);
 		}
 #ifdef SCTP_AUDITING_ENABLED
 		sctp_audit_log(0xC2, 3);
 #endif
 		data_list[i]->sent = SCTP_DATAGRAM_SENT;
 		data_list[i]->snd_count = 1;
 		data_list[i]->rec.data.chunk_was_revoked = 0;
 		if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_FLIGHT_LOGGING_ENABLE) {
 			sctp_misc_ints(SCTP_FLIGHT_LOG_UP,
 			    data_list[i]->whoTo->flight_size,
 			    data_list[i]->book_size,
 			    (uintptr_t) data_list[i]->whoTo,
 			    data_list[i]->rec.data.TSN_seq);
 		}
 		sctp_flight_size_increase(data_list[i]);
 		sctp_total_flight_increase(stcb, data_list[i]);
 		if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_LOG_RWND_ENABLE) {
 			sctp_log_rwnd(SCTP_DECREASE_PEER_RWND,
 			    asoc->peers_rwnd, data_list[i]->send_size, SCTP_BASE_SYSCTL(sctp_peer_chunk_oh));
 		}
 		asoc->peers_rwnd = sctp_sbspace_sub(asoc->peers_rwnd,
 		    (uint32_t) (data_list[i]->send_size + SCTP_BASE_SYSCTL(sctp_peer_chunk_oh)));
 		if (asoc->peers_rwnd < stcb->sctp_ep->sctp_ep.sctp_sws_sender) {
 			/* SWS sender side engages */
 			asoc->peers_rwnd = 0;
 		}
 	}
 	if (asoc->cc_functions.sctp_cwnd_update_packet_transmitted) {
 		(*asoc->cc_functions.sctp_cwnd_update_packet_transmitted) (stcb, net);
 	}
 }
 
 static void
 sctp_clean_up_ctl(struct sctp_tcb *stcb, struct sctp_association *asoc, int so_locked
 #if !defined(__APPLE__) && !defined(SCTP_SO_LOCK_TESTING)
     SCTP_UNUSED
 #endif
 )
 {
 	struct sctp_tmit_chunk *chk, *nchk;
 
 	TAILQ_FOREACH_SAFE(chk, &asoc->control_send_queue, sctp_next, nchk) {
 		if ((chk->rec.chunk_id.id == SCTP_SELECTIVE_ACK) ||
 		    (chk->rec.chunk_id.id == SCTP_NR_SELECTIVE_ACK) ||	/* EY */
 		    (chk->rec.chunk_id.id == SCTP_HEARTBEAT_REQUEST) ||
 		    (chk->rec.chunk_id.id == SCTP_HEARTBEAT_ACK) ||
 		    (chk->rec.chunk_id.id == SCTP_FORWARD_CUM_TSN) ||
 		    (chk->rec.chunk_id.id == SCTP_SHUTDOWN) ||
 		    (chk->rec.chunk_id.id == SCTP_SHUTDOWN_ACK) ||
 		    (chk->rec.chunk_id.id == SCTP_OPERATION_ERROR) ||
 		    (chk->rec.chunk_id.id == SCTP_PACKET_DROPPED) ||
 		    (chk->rec.chunk_id.id == SCTP_COOKIE_ACK) ||
 		    (chk->rec.chunk_id.id == SCTP_ECN_CWR) ||
 		    (chk->rec.chunk_id.id == SCTP_ASCONF_ACK)) {
 			/* Stray chunks must be cleaned up */
 	clean_up_anyway:
 			TAILQ_REMOVE(&asoc->control_send_queue, chk, sctp_next);
 			if (chk->data) {
 				sctp_m_freem(chk->data);
 				chk->data = NULL;
 			}
 			asoc->ctrl_queue_cnt--;
 			if (chk->rec.chunk_id.id == SCTP_FORWARD_CUM_TSN)
 				asoc->fwd_tsn_cnt--;
 			sctp_free_a_chunk(stcb, chk, so_locked);
 		} else if (chk->rec.chunk_id.id == SCTP_STREAM_RESET) {
 			/* special handling, we must look into the param */
 			if (chk != asoc->str_reset) {
 				goto clean_up_anyway;
 			}
 		}
 	}
 }
 
 
 static int
 sctp_can_we_split_this(struct sctp_tcb *stcb,
     uint32_t length,
     uint32_t goal_mtu, uint32_t frag_point, int eeor_on)
 {
 	/*
 	 * Make a decision on if I should split a msg into multiple parts.
 	 * This is only asked of incomplete messages.
 	 */
 	if (eeor_on) {
 		/*
 		 * If we are doing EEOR we need to always send it if its the
 		 * entire thing, since it might be all the guy is putting in
 		 * the hopper.
 		 */
 		if (goal_mtu >= length) {
 			/*-
 			 * If we have data outstanding,
 			 * we get another chance when the sack
 			 * arrives to transmit - wait for more data
 			 */
 			if (stcb->asoc.total_flight == 0) {
 				/*
 				 * If nothing is in flight, we zero the
 				 * packet counter.
 				 */
 				return (length);
 			}
 			return (0);
 
 		} else {
 			/* You can fill the rest */
 			return (goal_mtu);
 		}
 	}
 	/*-
 	 * For those strange folk that make the send buffer
 	 * smaller than our fragmentation point, we can't
 	 * get a full msg in so we have to allow splitting.
 	 */
 	if (SCTP_SB_LIMIT_SND(stcb->sctp_socket) < frag_point) {
 		return (length);
 	}
 	if ((length <= goal_mtu) ||
 	    ((length - goal_mtu) < SCTP_BASE_SYSCTL(sctp_min_residual))) {
 		/* Sub-optimial residual don't split in non-eeor mode. */
 		return (0);
 	}
 	/*
 	 * If we reach here length is larger than the goal_mtu. Do we wish
 	 * to split it for the sake of packet putting together?
 	 */
 	if (goal_mtu >= min(SCTP_BASE_SYSCTL(sctp_min_split_point), frag_point)) {
 		/* Its ok to split it */
 		return (min(goal_mtu, frag_point));
 	}
 	/* Nope, can't split */
 	return (0);
 
 }
 
 static uint32_t
 sctp_move_to_outqueue(struct sctp_tcb *stcb,
     struct sctp_stream_out *strq,
     uint32_t goal_mtu,
     uint32_t frag_point,
     int *locked,
     int *giveup,
     int eeor_mode,
     int *bail,
     int so_locked
 #if !defined(__APPLE__) && !defined(SCTP_SO_LOCK_TESTING)
     SCTP_UNUSED
 #endif
 )
 {
 	/* Move from the stream to the send_queue keeping track of the total */
 	struct sctp_association *asoc;
 	struct sctp_stream_queue_pending *sp;
 	struct sctp_tmit_chunk *chk;
 	struct sctp_data_chunk *dchkh;
 	uint32_t to_move, length;
 	uint8_t rcv_flags = 0;
 	uint8_t some_taken;
 	uint8_t send_lock_up = 0;
 
 	SCTP_TCB_LOCK_ASSERT(stcb);
 	asoc = &stcb->asoc;
 one_more_time:
 	/* sa_ignore FREED_MEMORY */
 	sp = TAILQ_FIRST(&strq->outqueue);
 	if (sp == NULL) {
 		*locked = 0;
 		if (send_lock_up == 0) {
 			SCTP_TCB_SEND_LOCK(stcb);
 			send_lock_up = 1;
 		}
 		sp = TAILQ_FIRST(&strq->outqueue);
 		if (sp) {
 			goto one_more_time;
 		}
 		if (strq->last_msg_incomplete) {
 			SCTP_PRINTF("Huh? Stream:%d lm_in_c=%d but queue is NULL\n",
 			    strq->stream_no,
 			    strq->last_msg_incomplete);
 			strq->last_msg_incomplete = 0;
 		}
 		to_move = 0;
 		if (send_lock_up) {
 			SCTP_TCB_SEND_UNLOCK(stcb);
 			send_lock_up = 0;
 		}
 		goto out_of;
 	}
 	if ((sp->msg_is_complete) && (sp->length == 0)) {
 		if (sp->sender_all_done) {
 			/*
 			 * We are doing differed cleanup. Last time through
 			 * when we took all the data the sender_all_done was
 			 * not set.
 			 */
 			if ((sp->put_last_out == 0) && (sp->discard_rest == 0)) {
 				SCTP_PRINTF("Gak, put out entire msg with NO end!-1\n");
 				SCTP_PRINTF("sender_done:%d len:%d msg_comp:%d put_last_out:%d send_lock:%d\n",
 				    sp->sender_all_done,
 				    sp->length,
 				    sp->msg_is_complete,
 				    sp->put_last_out,
 				    send_lock_up);
 			}
 			if ((TAILQ_NEXT(sp, next) == NULL) && (send_lock_up == 0)) {
 				SCTP_TCB_SEND_LOCK(stcb);
 				send_lock_up = 1;
 			}
 			atomic_subtract_int(&asoc->stream_queue_cnt, 1);
 			TAILQ_REMOVE(&strq->outqueue, sp, next);
 			stcb->asoc.ss_functions.sctp_ss_remove_from_stream(stcb, asoc, strq, sp, send_lock_up);
 			if (sp->net) {
 				sctp_free_remote_addr(sp->net);
 				sp->net = NULL;
 			}
 			if (sp->data) {
 				sctp_m_freem(sp->data);
 				sp->data = NULL;
 			}
 			sctp_free_a_strmoq(stcb, sp, so_locked);
 			/* we can't be locked to it */
 			*locked = 0;
 			stcb->asoc.locked_on_sending = NULL;
 			if (send_lock_up) {
 				SCTP_TCB_SEND_UNLOCK(stcb);
 				send_lock_up = 0;
 			}
 			/* back to get the next msg */
 			goto one_more_time;
 		} else {
 			/*
 			 * sender just finished this but still holds a
 			 * reference
 			 */
 			*locked = 1;
 			*giveup = 1;
 			to_move = 0;
 			goto out_of;
 		}
 	} else {
 		/* is there some to get */
 		if (sp->length == 0) {
 			/* no */
 			*locked = 1;
 			*giveup = 1;
 			to_move = 0;
 			goto out_of;
 		} else if (sp->discard_rest) {
 			if (send_lock_up == 0) {
 				SCTP_TCB_SEND_LOCK(stcb);
 				send_lock_up = 1;
 			}
 			/* Whack down the size */
 			atomic_subtract_int(&stcb->asoc.total_output_queue_size, sp->length);
 			if ((stcb->sctp_socket != NULL) && \
 			    ((stcb->sctp_ep->sctp_flags & SCTP_PCB_FLAGS_TCPTYPE) ||
 			    (stcb->sctp_ep->sctp_flags & SCTP_PCB_FLAGS_IN_TCPPOOL))) {
 				atomic_subtract_int(&stcb->sctp_socket->so_snd.sb_cc, sp->length);
 			}
 			if (sp->data) {
 				sctp_m_freem(sp->data);
 				sp->data = NULL;
 				sp->tail_mbuf = NULL;
 			}
 			sp->length = 0;
 			sp->some_taken = 1;
 			*locked = 1;
 			*giveup = 1;
 			to_move = 0;
 			goto out_of;
 		}
 	}
 	some_taken = sp->some_taken;
 	if (stcb->asoc.state & SCTP_STATE_CLOSED_SOCKET) {
 		sp->msg_is_complete = 1;
 	}
 re_look:
 	length = sp->length;
 	if (sp->msg_is_complete) {
 		/* The message is complete */
 		to_move = min(length, frag_point);
 		if (to_move == length) {
 			/* All of it fits in the MTU */
 			if (sp->some_taken) {
 				rcv_flags |= SCTP_DATA_LAST_FRAG;
 				sp->put_last_out = 1;
 			} else {
 				rcv_flags |= SCTP_DATA_NOT_FRAG;
 				sp->put_last_out = 1;
 			}
 		} else {
 			/* Not all of it fits, we fragment */
 			if (sp->some_taken == 0) {
 				rcv_flags |= SCTP_DATA_FIRST_FRAG;
 			}
 			sp->some_taken = 1;
 		}
 	} else {
 		to_move = sctp_can_we_split_this(stcb, length, goal_mtu, frag_point, eeor_mode);
 		if (to_move) {
 			/*-
 			 * We use a snapshot of length in case it
 			 * is expanding during the compare.
 			 */
 			uint32_t llen;
 
 			llen = length;
 			if (to_move >= llen) {
 				to_move = llen;
 				if (send_lock_up == 0) {
 					/*-
 					 * We are taking all of an incomplete msg
 					 * thus we need a send lock.
 					 */
 					SCTP_TCB_SEND_LOCK(stcb);
 					send_lock_up = 1;
 					if (sp->msg_is_complete) {
 						/*
 						 * the sender finished the
 						 * msg
 						 */
 						goto re_look;
 					}
 				}
 			}
 			if (sp->some_taken == 0) {
 				rcv_flags |= SCTP_DATA_FIRST_FRAG;
 				sp->some_taken = 1;
 			}
 		} else {
 			/* Nothing to take. */
 			if (sp->some_taken) {
 				*locked = 1;
 			}
 			*giveup = 1;
 			to_move = 0;
 			goto out_of;
 		}
 	}
 
 	/* If we reach here, we can copy out a chunk */
 	sctp_alloc_a_chunk(stcb, chk);
 	if (chk == NULL) {
 		/* No chunk memory */
 		*giveup = 1;
 		to_move = 0;
 		goto out_of;
 	}
 	/*
 	 * Setup for unordered if needed by looking at the user sent info
 	 * flags.
 	 */
 	if (sp->sinfo_flags & SCTP_UNORDERED) {
 		rcv_flags |= SCTP_DATA_UNORDERED;
 	}
 	if ((SCTP_BASE_SYSCTL(sctp_enable_sack_immediately) && ((sp->sinfo_flags & SCTP_EOF) == SCTP_EOF)) ||
 	    ((sp->sinfo_flags & SCTP_SACK_IMMEDIATELY) == SCTP_SACK_IMMEDIATELY)) {
 		rcv_flags |= SCTP_DATA_SACK_IMMEDIATELY;
 	}
 	/* clear out the chunk before setting up */
 	memset(chk, 0, sizeof(*chk));
 	chk->rec.data.rcv_flags = rcv_flags;
 
 	if (to_move >= length) {
 		/* we think we can steal the whole thing */
 		if ((sp->sender_all_done == 0) && (send_lock_up == 0)) {
 			SCTP_TCB_SEND_LOCK(stcb);
 			send_lock_up = 1;
 		}
 		if (to_move < sp->length) {
 			/* bail, it changed */
 			goto dont_do_it;
 		}
 		chk->data = sp->data;
 		chk->last_mbuf = sp->tail_mbuf;
 		/* register the stealing */
 		sp->data = sp->tail_mbuf = NULL;
 	} else {
 		struct mbuf *m;
 
 dont_do_it:
 		chk->data = SCTP_M_COPYM(sp->data, 0, to_move, M_NOWAIT);
 		chk->last_mbuf = NULL;
 		if (chk->data == NULL) {
 			sp->some_taken = some_taken;
 			sctp_free_a_chunk(stcb, chk, so_locked);
 			*bail = 1;
 			to_move = 0;
 			goto out_of;
 		}
 #ifdef SCTP_MBUF_LOGGING
 		if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_MBUF_LOGGING_ENABLE) {
 			sctp_log_mbc(chk->data, SCTP_MBUF_ICOPY);
 		}
 #endif
 		/* Pull off the data */
 		m_adj(sp->data, to_move);
 		/* Now lets work our way down and compact it */
 		m = sp->data;
 		while (m && (SCTP_BUF_LEN(m) == 0)) {
 			sp->data = SCTP_BUF_NEXT(m);
 			SCTP_BUF_NEXT(m) = NULL;
 			if (sp->tail_mbuf == m) {
 				/*-
 				 * Freeing tail? TSNH since
 				 * we supposedly were taking less
 				 * than the sp->length.
 				 */
 #ifdef INVARIANTS
 				panic("Huh, freing tail? - TSNH");
 #else
 				SCTP_PRINTF("Huh, freeing tail? - TSNH\n");
 				sp->tail_mbuf = sp->data = NULL;
 				sp->length = 0;
 #endif
 
 			}
 			sctp_m_free(m);
 			m = sp->data;
 		}
 	}
 	if (SCTP_BUF_IS_EXTENDED(chk->data)) {
 		chk->copy_by_ref = 1;
 	} else {
 		chk->copy_by_ref = 0;
 	}
 	/*
 	 * get last_mbuf and counts of mb useage This is ugly but hopefully
 	 * its only one mbuf.
 	 */
 	if (chk->last_mbuf == NULL) {
 		chk->last_mbuf = chk->data;
 		while (SCTP_BUF_NEXT(chk->last_mbuf) != NULL) {
 			chk->last_mbuf = SCTP_BUF_NEXT(chk->last_mbuf);
 		}
 	}
 	if (to_move > length) {
 		/*- This should not happen either
 		 * since we always lower to_move to the size
 		 * of sp->length if its larger.
 		 */
 #ifdef INVARIANTS
 		panic("Huh, how can to_move be larger?");
 #else
 		SCTP_PRINTF("Huh, how can to_move be larger?\n");
 		sp->length = 0;
 #endif
 	} else {
 		atomic_subtract_int(&sp->length, to_move);
 	}
 	if (M_LEADINGSPACE(chk->data) < (int)sizeof(struct sctp_data_chunk)) {
 		/* Not enough room for a chunk header, get some */
 		struct mbuf *m;
 
 		m = sctp_get_mbuf_for_msg(1, 0, M_NOWAIT, 0, MT_DATA);
 		if (m == NULL) {
 			/*
 			 * we're in trouble here. _PREPEND below will free
 			 * all the data if there is no leading space, so we
 			 * must put the data back and restore.
 			 */
 			if (send_lock_up == 0) {
 				SCTP_TCB_SEND_LOCK(stcb);
 				send_lock_up = 1;
 			}
 			if (sp->data == NULL) {
 				/* unsteal the data */
 				sp->data = chk->data;
 				sp->tail_mbuf = chk->last_mbuf;
 			} else {
 				struct mbuf *m_tmp;
 
 				/* reassemble the data */
 				m_tmp = sp->data;
 				sp->data = chk->data;
 				SCTP_BUF_NEXT(chk->last_mbuf) = m_tmp;
 			}
 			sp->some_taken = some_taken;
 			atomic_add_int(&sp->length, to_move);
 			chk->data = NULL;
 			*bail = 1;
 			sctp_free_a_chunk(stcb, chk, so_locked);
 			to_move = 0;
 			goto out_of;
 		} else {
 			SCTP_BUF_LEN(m) = 0;
 			SCTP_BUF_NEXT(m) = chk->data;
 			chk->data = m;
 			M_ALIGN(chk->data, 4);
 		}
 	}
 	SCTP_BUF_PREPEND(chk->data, sizeof(struct sctp_data_chunk), M_NOWAIT);
 	if (chk->data == NULL) {
 		/* HELP, TSNH since we assured it would not above? */
 #ifdef INVARIANTS
 		panic("prepend failes HELP?");
 #else
 		SCTP_PRINTF("prepend fails HELP?\n");
 		sctp_free_a_chunk(stcb, chk, so_locked);
 #endif
 		*bail = 1;
 		to_move = 0;
 		goto out_of;
 	}
 	sctp_snd_sb_alloc(stcb, sizeof(struct sctp_data_chunk));
 	chk->book_size = chk->send_size = (to_move + sizeof(struct sctp_data_chunk));
 	chk->book_size_scale = 0;
 	chk->sent = SCTP_DATAGRAM_UNSENT;
 
 	chk->flags = 0;
 	chk->asoc = &stcb->asoc;
 	chk->pad_inplace = 0;
 	chk->no_fr_allowed = 0;
 	chk->rec.data.stream_seq = strq->next_sequence_send;
 	if ((rcv_flags & SCTP_DATA_LAST_FRAG) &&
 	    !(rcv_flags & SCTP_DATA_UNORDERED)) {
 		strq->next_sequence_send++;
 	}
 	chk->rec.data.stream_number = sp->stream;
 	chk->rec.data.payloadtype = sp->ppid;
 	chk->rec.data.context = sp->context;
 	chk->rec.data.doing_fast_retransmit = 0;
 
 	chk->rec.data.timetodrop = sp->ts;
 	chk->flags = sp->act_flags;
 
 	if (sp->net) {
 		chk->whoTo = sp->net;
 		atomic_add_int(&chk->whoTo->ref_count, 1);
 	} else
 		chk->whoTo = NULL;
 
 	if (sp->holds_key_ref) {
 		chk->auth_keyid = sp->auth_keyid;
 		sctp_auth_key_acquire(stcb, chk->auth_keyid);
 		chk->holds_key_ref = 1;
 	}
 	chk->rec.data.TSN_seq = atomic_fetchadd_int(&asoc->sending_seq, 1);
 	if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_LOG_AT_SEND_2_OUTQ) {
 		sctp_misc_ints(SCTP_STRMOUT_LOG_SEND,
 		    (uintptr_t) stcb, sp->length,
 		    (uint32_t) ((chk->rec.data.stream_number << 16) | chk->rec.data.stream_seq),
 		    chk->rec.data.TSN_seq);
 	}
 	dchkh = mtod(chk->data, struct sctp_data_chunk *);
 	/*
 	 * Put the rest of the things in place now. Size was done earlier in
 	 * previous loop prior to padding.
 	 */
 
 #ifdef SCTP_ASOCLOG_OF_TSNS
 	SCTP_TCB_LOCK_ASSERT(stcb);
 	if (asoc->tsn_out_at >= SCTP_TSN_LOG_SIZE) {
 		asoc->tsn_out_at = 0;
 		asoc->tsn_out_wrapped = 1;
 	}
 	asoc->out_tsnlog[asoc->tsn_out_at].tsn = chk->rec.data.TSN_seq;
 	asoc->out_tsnlog[asoc->tsn_out_at].strm = chk->rec.data.stream_number;
 	asoc->out_tsnlog[asoc->tsn_out_at].seq = chk->rec.data.stream_seq;
 	asoc->out_tsnlog[asoc->tsn_out_at].sz = chk->send_size;
 	asoc->out_tsnlog[asoc->tsn_out_at].flgs = chk->rec.data.rcv_flags;
 	asoc->out_tsnlog[asoc->tsn_out_at].stcb = (void *)stcb;
 	asoc->out_tsnlog[asoc->tsn_out_at].in_pos = asoc->tsn_out_at;
 	asoc->out_tsnlog[asoc->tsn_out_at].in_out = 2;
 	asoc->tsn_out_at++;
 #endif
 
 	dchkh->ch.chunk_type = SCTP_DATA;
 	dchkh->ch.chunk_flags = chk->rec.data.rcv_flags;
 	dchkh->dp.tsn = htonl(chk->rec.data.TSN_seq);
 	dchkh->dp.stream_id = htons(strq->stream_no);
 	dchkh->dp.stream_sequence = htons(chk->rec.data.stream_seq);
 	dchkh->dp.protocol_id = chk->rec.data.payloadtype;
 	dchkh->ch.chunk_length = htons(chk->send_size);
 	/* Now advance the chk->send_size by the actual pad needed. */
 	if (chk->send_size < SCTP_SIZE32(chk->book_size)) {
 		/* need a pad */
 		struct mbuf *lm;
 		int pads;
 
 		pads = SCTP_SIZE32(chk->book_size) - chk->send_size;
 		lm = sctp_pad_lastmbuf(chk->data, pads, chk->last_mbuf);
 		if (lm != NULL) {
 			chk->last_mbuf = lm;
 			chk->pad_inplace = 1;
 		}
 		chk->send_size += pads;
 	}
 	if (PR_SCTP_ENABLED(chk->flags)) {
 		asoc->pr_sctp_cnt++;
 	}
 	if (sp->msg_is_complete && (sp->length == 0) && (sp->sender_all_done)) {
 		/* All done pull and kill the message */
 		atomic_subtract_int(&asoc->stream_queue_cnt, 1);
 		if (sp->put_last_out == 0) {
 			SCTP_PRINTF("Gak, put out entire msg with NO end!-2\n");
 			SCTP_PRINTF("sender_done:%d len:%d msg_comp:%d put_last_out:%d send_lock:%d\n",
 			    sp->sender_all_done,
 			    sp->length,
 			    sp->msg_is_complete,
 			    sp->put_last_out,
 			    send_lock_up);
 		}
 		if ((send_lock_up == 0) && (TAILQ_NEXT(sp, next) == NULL)) {
 			SCTP_TCB_SEND_LOCK(stcb);
 			send_lock_up = 1;
 		}
 		TAILQ_REMOVE(&strq->outqueue, sp, next);
 		stcb->asoc.ss_functions.sctp_ss_remove_from_stream(stcb, asoc, strq, sp, send_lock_up);
 		if (sp->net) {
 			sctp_free_remote_addr(sp->net);
 			sp->net = NULL;
 		}
 		if (sp->data) {
 			sctp_m_freem(sp->data);
 			sp->data = NULL;
 		}
 		sctp_free_a_strmoq(stcb, sp, so_locked);
 
 		/* we can't be locked to it */
 		*locked = 0;
 		stcb->asoc.locked_on_sending = NULL;
 	} else {
 		/* more to go, we are locked */
 		*locked = 1;
 	}
 	asoc->chunks_on_out_queue++;
 	strq->chunks_on_queues++;
 	TAILQ_INSERT_TAIL(&asoc->send_queue, chk, sctp_next);
 	asoc->send_queue_cnt++;
 out_of:
 	if (send_lock_up) {
 		SCTP_TCB_SEND_UNLOCK(stcb);
 	}
 	return (to_move);
 }
 
 
 static void
 sctp_fill_outqueue(struct sctp_tcb *stcb,
     struct sctp_nets *net, int frag_point, int eeor_mode, int *quit_now, int so_locked
 #if !defined(__APPLE__) && !defined(SCTP_SO_LOCK_TESTING)
     SCTP_UNUSED
 #endif
 )
 {
 	struct sctp_association *asoc;
 	struct sctp_stream_out *strq;
 	int goal_mtu, moved_how_much, total_moved = 0, bail = 0;
 	int locked, giveup;
 
 	SCTP_TCB_LOCK_ASSERT(stcb);
 	asoc = &stcb->asoc;
 	switch (net->ro._l_addr.sa.sa_family) {
 #ifdef INET
 	case AF_INET:
 		goal_mtu = net->mtu - SCTP_MIN_V4_OVERHEAD;
 		break;
 #endif
 #ifdef INET6
 	case AF_INET6:
 		goal_mtu = net->mtu - SCTP_MIN_OVERHEAD;
 		break;
 #endif
 	default:
 		/* TSNH */
 		goal_mtu = net->mtu;
 		break;
 	}
 	/* Need an allowance for the data chunk header too */
 	goal_mtu -= sizeof(struct sctp_data_chunk);
 
 	/* must make even word boundary */
 	goal_mtu &= 0xfffffffc;
 	if (asoc->locked_on_sending) {
 		/* We are stuck on one stream until the message completes. */
 		strq = asoc->locked_on_sending;
 		locked = 1;
 	} else {
 		strq = stcb->asoc.ss_functions.sctp_ss_select_stream(stcb, net, asoc);
 		locked = 0;
 	}
 	while ((goal_mtu > 0) && strq) {
 		giveup = 0;
 		bail = 0;
 		moved_how_much = sctp_move_to_outqueue(stcb, strq, goal_mtu, frag_point, &locked,
 		    &giveup, eeor_mode, &bail, so_locked);
 		if (moved_how_much)
 			stcb->asoc.ss_functions.sctp_ss_scheduled(stcb, net, asoc, strq, moved_how_much);
 
 		if (locked) {
 			asoc->locked_on_sending = strq;
 			if ((moved_how_much == 0) || (giveup) || bail)
 				/* no more to move for now */
 				break;
 		} else {
 			asoc->locked_on_sending = NULL;
 			if ((giveup) || bail) {
 				break;
 			}
 			strq = stcb->asoc.ss_functions.sctp_ss_select_stream(stcb, net, asoc);
 			if (strq == NULL) {
 				break;
 			}
 		}
 		total_moved += moved_how_much;
 		goal_mtu -= (moved_how_much + sizeof(struct sctp_data_chunk));
 		goal_mtu &= 0xfffffffc;
 	}
 	if (bail)
 		*quit_now = 1;
 
 	stcb->asoc.ss_functions.sctp_ss_packet_done(stcb, net, asoc);
 
 	if (total_moved == 0) {
 		if ((stcb->asoc.sctp_cmt_on_off == 0) &&
 		    (net == stcb->asoc.primary_destination)) {
 			/* ran dry for primary network net */
 			SCTP_STAT_INCR(sctps_primary_randry);
 		} else if (stcb->asoc.sctp_cmt_on_off > 0) {
 			/* ran dry with CMT on */
 			SCTP_STAT_INCR(sctps_cmt_randry);
 		}
 	}
 }
 
 void
 sctp_fix_ecn_echo(struct sctp_association *asoc)
 {
 	struct sctp_tmit_chunk *chk;
 
 	TAILQ_FOREACH(chk, &asoc->control_send_queue, sctp_next) {
 		if (chk->rec.chunk_id.id == SCTP_ECN_ECHO) {
 			chk->sent = SCTP_DATAGRAM_UNSENT;
 		}
 	}
 }
 
 void
 sctp_move_chunks_from_net(struct sctp_tcb *stcb, struct sctp_nets *net)
 {
 	struct sctp_association *asoc;
 	struct sctp_tmit_chunk *chk;
 	struct sctp_stream_queue_pending *sp;
 	unsigned int i;
 
 	if (net == NULL) {
 		return;
 	}
 	asoc = &stcb->asoc;
 	for (i = 0; i < stcb->asoc.streamoutcnt; i++) {
 		TAILQ_FOREACH(sp, &stcb->asoc.strmout[i].outqueue, next) {
 			if (sp->net == net) {
 				sctp_free_remote_addr(sp->net);
 				sp->net = NULL;
 			}
 		}
 	}
 	TAILQ_FOREACH(chk, &asoc->send_queue, sctp_next) {
 		if (chk->whoTo == net) {
 			sctp_free_remote_addr(chk->whoTo);
 			chk->whoTo = NULL;
 		}
 	}
 }
 
 int
 sctp_med_chunk_output(struct sctp_inpcb *inp,
     struct sctp_tcb *stcb,
     struct sctp_association *asoc,
     int *num_out,
     int *reason_code,
     int control_only, int from_where,
     struct timeval *now, int *now_filled, int frag_point, int so_locked
 #if !defined(__APPLE__) && !defined(SCTP_SO_LOCK_TESTING)
     SCTP_UNUSED
 #endif
 )
 {
 	/**
 	 * Ok this is the generic chunk service queue. we must do the
 	 * following: - Service the stream queue that is next, moving any
 	 * message (note I must get a complete message i.e. FIRST/MIDDLE and
 	 * LAST to the out queue in one pass) and assigning TSN's - Check to
 	 * see if the cwnd/rwnd allows any output, if so we go ahead and
 	 * fomulate and send the low level chunks. Making sure to combine
 	 * any control in the control chunk queue also.
 	 */
 	struct sctp_nets *net, *start_at, *sack_goes_to = NULL, *old_start_at = NULL;
 	struct mbuf *outchain, *endoutchain;
 	struct sctp_tmit_chunk *chk, *nchk;
 
 	/* temp arrays for unlinking */
 	struct sctp_tmit_chunk *data_list[SCTP_MAX_DATA_BUNDLING];
 	int no_fragmentflg, error;
 	unsigned int max_rwnd_per_dest, max_send_per_dest;
 	int one_chunk, hbflag, skip_data_for_this_net;
 	int asconf, cookie, no_out_cnt;
 	int bundle_at, ctl_cnt, no_data_chunks, eeor_mode;
 	unsigned int mtu, r_mtu, omtu, mx_mtu, to_out;
 	int tsns_sent = 0;
 	uint32_t auth_offset = 0;
 	struct sctp_auth_chunk *auth = NULL;
 	uint16_t auth_keyid;
 	int override_ok = 1;
 	int skip_fill_up = 0;
 	int data_auth_reqd = 0;
 
 	/*
 	 * JRS 5/14/07 - Add flag for whether a heartbeat is sent to the
 	 * destination.
 	 */
 	int quit_now = 0;
 
 	*num_out = 0;
 	*reason_code = 0;
 	auth_keyid = stcb->asoc.authinfo.active_keyid;
 	if ((asoc->state & SCTP_STATE_SHUTDOWN_PENDING) ||
 	    (asoc->state & SCTP_STATE_SHUTDOWN_RECEIVED) ||
 	    (sctp_is_feature_on(inp, SCTP_PCB_FLAGS_EXPLICIT_EOR))) {
 		eeor_mode = 1;
 	} else {
 		eeor_mode = 0;
 	}
 	ctl_cnt = no_out_cnt = asconf = cookie = 0;
 	/*
 	 * First lets prime the pump. For each destination, if there is room
 	 * in the flight size, attempt to pull an MTU's worth out of the
 	 * stream queues into the general send_queue
 	 */
 #ifdef SCTP_AUDITING_ENABLED
 	sctp_audit_log(0xC2, 2);
 #endif
 	SCTP_TCB_LOCK_ASSERT(stcb);
 	hbflag = 0;
 	if ((control_only) || (asoc->stream_reset_outstanding))
 		no_data_chunks = 1;
 	else
 		no_data_chunks = 0;
 
 	/* Nothing to possible to send? */
 	if ((TAILQ_EMPTY(&asoc->control_send_queue) ||
 	    (asoc->ctrl_queue_cnt == stcb->asoc.ecn_echo_cnt_onq)) &&
 	    TAILQ_EMPTY(&asoc->asconf_send_queue) &&
 	    TAILQ_EMPTY(&asoc->send_queue) &&
 	    stcb->asoc.ss_functions.sctp_ss_is_empty(stcb, asoc)) {
 nothing_to_send:
 		*reason_code = 9;
 		return (0);
 	}
 	if (asoc->peers_rwnd == 0) {
 		/* No room in peers rwnd */
 		*reason_code = 1;
 		if (asoc->total_flight > 0) {
 			/* we are allowed one chunk in flight */
 			no_data_chunks = 1;
 		}
 	}
 	if (stcb->asoc.ecn_echo_cnt_onq) {
 		/* Record where a sack goes, if any */
 		if (no_data_chunks &&
 		    (asoc->ctrl_queue_cnt == stcb->asoc.ecn_echo_cnt_onq)) {
 			/* Nothing but ECNe to send - we don't do that */
 			goto nothing_to_send;
 		}
 		TAILQ_FOREACH(chk, &asoc->control_send_queue, sctp_next) {
 			if ((chk->rec.chunk_id.id == SCTP_SELECTIVE_ACK) ||
 			    (chk->rec.chunk_id.id == SCTP_NR_SELECTIVE_ACK)) {
 				sack_goes_to = chk->whoTo;
 				break;
 			}
 		}
 	}
 	max_rwnd_per_dest = ((asoc->peers_rwnd + asoc->total_flight) / asoc->numnets);
 	if (stcb->sctp_socket)
 		max_send_per_dest = SCTP_SB_LIMIT_SND(stcb->sctp_socket) / asoc->numnets;
 	else
 		max_send_per_dest = 0;
 	if (no_data_chunks == 0) {
 		/* How many non-directed chunks are there? */
 		TAILQ_FOREACH(chk, &asoc->send_queue, sctp_next) {
 			if (chk->whoTo == NULL) {
 				/*
 				 * We already have non-directed chunks on
 				 * the queue, no need to do a fill-up.
 				 */
 				skip_fill_up = 1;
 				break;
 			}
 		}
 
 	}
 	if ((no_data_chunks == 0) &&
 	    (skip_fill_up == 0) &&
 	    (!stcb->asoc.ss_functions.sctp_ss_is_empty(stcb, asoc))) {
 		TAILQ_FOREACH(net, &asoc->nets, sctp_next) {
 			/*
 			 * This for loop we are in takes in each net, if
 			 * its's got space in cwnd and has data sent to it
 			 * (when CMT is off) then it calls
 			 * sctp_fill_outqueue for the net. This gets data on
 			 * the send queue for that network.
 			 * 
 			 * In sctp_fill_outqueue TSN's are assigned and data is
 			 * copied out of the stream buffers. Note mostly
 			 * copy by reference (we hope).
 			 */
 			net->window_probe = 0;
 			if ((net != stcb->asoc.alternate) &&
 			    ((net->dest_state & SCTP_ADDR_PF) ||
 			    (!(net->dest_state & SCTP_ADDR_REACHABLE)) ||
 			    (net->dest_state & SCTP_ADDR_UNCONFIRMED))) {
 				if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_CWND_LOGGING_ENABLE) {
 					sctp_log_cwnd(stcb, net, 1,
 					    SCTP_CWND_LOG_FILL_OUTQ_CALLED);
 				}
 				continue;
 			}
 			if ((stcb->asoc.cc_functions.sctp_cwnd_new_transmission_begins) &&
 			    (net->flight_size == 0)) {
 				(*stcb->asoc.cc_functions.sctp_cwnd_new_transmission_begins) (stcb, net);
 			}
 			if (net->flight_size >= net->cwnd) {
 				/* skip this network, no room - can't fill */
 				if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_CWND_LOGGING_ENABLE) {
 					sctp_log_cwnd(stcb, net, 3,
 					    SCTP_CWND_LOG_FILL_OUTQ_CALLED);
 				}
 				continue;
 			}
 			if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_CWND_LOGGING_ENABLE) {
 				sctp_log_cwnd(stcb, net, 4, SCTP_CWND_LOG_FILL_OUTQ_CALLED);
 			}
 			sctp_fill_outqueue(stcb, net, frag_point, eeor_mode, &quit_now, so_locked);
 			if (quit_now) {
 				/* memory alloc failure */
 				no_data_chunks = 1;
 				break;
 			}
 		}
 	}
 	/* now service each destination and send out what we can for it */
 	/* Nothing to send? */
 	if (TAILQ_EMPTY(&asoc->control_send_queue) &&
 	    TAILQ_EMPTY(&asoc->asconf_send_queue) &&
 	    TAILQ_EMPTY(&asoc->send_queue)) {
 		*reason_code = 8;
 		return (0);
 	}
 	if (asoc->sctp_cmt_on_off > 0) {
 		/* get the last start point */
 		start_at = asoc->last_net_cmt_send_started;
 		if (start_at == NULL) {
 			/* null so to beginning */
 			start_at = TAILQ_FIRST(&asoc->nets);
 		} else {
 			start_at = TAILQ_NEXT(asoc->last_net_cmt_send_started, sctp_next);
 			if (start_at == NULL) {
 				start_at = TAILQ_FIRST(&asoc->nets);
 			}
 		}
 		asoc->last_net_cmt_send_started = start_at;
 	} else {
 		start_at = TAILQ_FIRST(&asoc->nets);
 	}
 	TAILQ_FOREACH(chk, &asoc->control_send_queue, sctp_next) {
 		if (chk->whoTo == NULL) {
 			if (asoc->alternate) {
 				chk->whoTo = asoc->alternate;
 			} else {
 				chk->whoTo = asoc->primary_destination;
 			}
 			atomic_add_int(&chk->whoTo->ref_count, 1);
 		}
 	}
 	old_start_at = NULL;
 again_one_more_time:
 	for (net = start_at; net != NULL; net = TAILQ_NEXT(net, sctp_next)) {
 		/* how much can we send? */
 		/* SCTPDBG("Examine for sending net:%x\n", (uint32_t)net); */
 		if (old_start_at && (old_start_at == net)) {
 			/* through list ocmpletely. */
 			break;
 		}
 		tsns_sent = 0xa;
 		if (TAILQ_EMPTY(&asoc->control_send_queue) &&
 		    TAILQ_EMPTY(&asoc->asconf_send_queue) &&
 		    (net->flight_size >= net->cwnd)) {
 			/*
 			 * Nothing on control or asconf and flight is full,
 			 * we can skip even in the CMT case.
 			 */
 			continue;
 		}
 		bundle_at = 0;
 		endoutchain = outchain = NULL;
 		no_fragmentflg = 1;
 		one_chunk = 0;
 		if (net->dest_state & SCTP_ADDR_UNCONFIRMED) {
 			skip_data_for_this_net = 1;
 		} else {
 			skip_data_for_this_net = 0;
 		}
 		switch (((struct sockaddr *)&net->ro._l_addr)->sa_family) {
 #ifdef INET
 		case AF_INET:
 			mtu = net->mtu - (sizeof(struct ip) + sizeof(struct sctphdr));
 			break;
 #endif
 #ifdef INET6
 		case AF_INET6:
 			mtu = net->mtu - (sizeof(struct ip6_hdr) + sizeof(struct sctphdr));
 			break;
 #endif
 		default:
 			/* TSNH */
 			mtu = net->mtu;
 			break;
 		}
 		mx_mtu = mtu;
 		to_out = 0;
 		if (mtu > asoc->peers_rwnd) {
 			if (asoc->total_flight > 0) {
 				/* We have a packet in flight somewhere */
 				r_mtu = asoc->peers_rwnd;
 			} else {
 				/* We are always allowed to send one MTU out */
 				one_chunk = 1;
 				r_mtu = mtu;
 			}
 		} else {
 			r_mtu = mtu;
 		}
 		/************************/
 		/* ASCONF transmission */
 		/************************/
 		/* Now first lets go through the asconf queue */
 		TAILQ_FOREACH_SAFE(chk, &asoc->asconf_send_queue, sctp_next, nchk) {
 			if (chk->rec.chunk_id.id != SCTP_ASCONF) {
 				continue;
 			}
 			if (chk->whoTo == NULL) {
 				if (asoc->alternate == NULL) {
 					if (asoc->primary_destination != net) {
 						break;
 					}
 				} else {
 					if (asoc->alternate != net) {
 						break;
 					}
 				}
 			} else {
 				if (chk->whoTo != net) {
 					break;
 				}
 			}
 			if (chk->data == NULL) {
 				break;
 			}
 			if (chk->sent != SCTP_DATAGRAM_UNSENT &&
 			    chk->sent != SCTP_DATAGRAM_RESEND) {
 				break;
 			}
 			/*
 			 * if no AUTH is yet included and this chunk
 			 * requires it, make sure to account for it.  We
 			 * don't apply the size until the AUTH chunk is
 			 * actually added below in case there is no room for
 			 * this chunk. NOTE: we overload the use of "omtu"
 			 * here
 			 */
 			if ((auth == NULL) &&
 			    sctp_auth_is_required_chunk(chk->rec.chunk_id.id,
 			    stcb->asoc.peer_auth_chunks)) {
 				omtu = sctp_get_auth_chunk_len(stcb->asoc.peer_hmac_id);
 			} else
 				omtu = 0;
 			/* Here we do NOT factor the r_mtu */
 			if ((chk->send_size < (int)(mtu - omtu)) ||
 			    (chk->flags & CHUNK_FLAGS_FRAGMENT_OK)) {
 				/*
 				 * We probably should glom the mbuf chain
 				 * from the chk->data for control but the
 				 * problem is it becomes yet one more level
 				 * of tracking to do if for some reason
 				 * output fails. Then I have got to
 				 * reconstruct the merged control chain.. el
 				 * yucko.. for now we take the easy way and
 				 * do the copy
 				 */
 				/*
 				 * Add an AUTH chunk, if chunk requires it
 				 * save the offset into the chain for AUTH
 				 */
 				if ((auth == NULL) &&
 				    (sctp_auth_is_required_chunk(chk->rec.chunk_id.id,
 				    stcb->asoc.peer_auth_chunks))) {
 					outchain = sctp_add_auth_chunk(outchain,
 					    &endoutchain,
 					    &auth,
 					    &auth_offset,
 					    stcb,
 					    chk->rec.chunk_id.id);
 					SCTP_STAT_INCR_COUNTER64(sctps_outcontrolchunks);
 				}
 				outchain = sctp_copy_mbufchain(chk->data, outchain, &endoutchain,
 				    (int)chk->rec.chunk_id.can_take_data,
 				    chk->send_size, chk->copy_by_ref);
 				if (outchain == NULL) {
 					*reason_code = 8;
 					SCTP_LTRACE_ERR_RET(inp, stcb, NULL, SCTP_FROM_SCTP_OUTPUT, ENOMEM);
 					return (ENOMEM);
 				}
 				SCTP_STAT_INCR_COUNTER64(sctps_outcontrolchunks);
 				/* update our MTU size */
 				if (mtu > (chk->send_size + omtu))
 					mtu -= (chk->send_size + omtu);
 				else
 					mtu = 0;
 				to_out += (chk->send_size + omtu);
 				/* Do clear IP_DF ? */
 				if (chk->flags & CHUNK_FLAGS_FRAGMENT_OK) {
 					no_fragmentflg = 0;
 				}
 				if (chk->rec.chunk_id.can_take_data)
 					chk->data = NULL;
 				/*
 				 * set hb flag since we can use these for
 				 * RTO
 				 */
 				hbflag = 1;
 				asconf = 1;
 				/*
 				 * should sysctl this: don't bundle data
 				 * with ASCONF since it requires AUTH
 				 */
 				no_data_chunks = 1;
 				chk->sent = SCTP_DATAGRAM_SENT;
 				if (chk->whoTo == NULL) {
 					chk->whoTo = net;
 					atomic_add_int(&net->ref_count, 1);
 				}
 				chk->snd_count++;
 				if (mtu == 0) {
 					/*
 					 * Ok we are out of room but we can
 					 * output without effecting the
 					 * flight size since this little guy
 					 * is a control only packet.
 					 */
 					sctp_timer_start(SCTP_TIMER_TYPE_ASCONF, inp, stcb, net);
 					/*
 					 * do NOT clear the asconf flag as
 					 * it is used to do appropriate
 					 * source address selection.
 					 */
 					if ((error = sctp_lowlevel_chunk_output(inp, stcb, net,
 					    (struct sockaddr *)&net->ro._l_addr,
 					    outchain, auth_offset, auth,
 					    stcb->asoc.authinfo.active_keyid,
 					    no_fragmentflg, 0, asconf,
 					    inp->sctp_lport, stcb->rport,
 					    htonl(stcb->asoc.peer_vtag),
 					    net->port, NULL,
 					    0, 0,
 					    so_locked))) {
 						if (error == ENOBUFS) {
 							asoc->ifp_had_enobuf = 1;
 							SCTP_STAT_INCR(sctps_lowlevelerr);
 						}
 						if (from_where == 0) {
 							SCTP_STAT_INCR(sctps_lowlevelerrusr);
 						}
 						if (*now_filled == 0) {
 							(void)SCTP_GETTIME_TIMEVAL(&net->last_sent_time);
 							*now_filled = 1;
 							*now = net->last_sent_time;
 						} else {
 							net->last_sent_time = *now;
 						}
 						hbflag = 0;
 						/* error, could not output */
 						if (error == EHOSTUNREACH) {
 							/*
 							 * Destination went
 							 * unreachable
 							 * during this send
 							 */
 							sctp_move_chunks_from_net(stcb, net);
 						}
 						*reason_code = 7;
 						continue;
 					} else
 						asoc->ifp_had_enobuf = 0;
 					if (*now_filled == 0) {
 						(void)SCTP_GETTIME_TIMEVAL(&net->last_sent_time);
 						*now_filled = 1;
 						*now = net->last_sent_time;
 					} else {
 						net->last_sent_time = *now;
 					}
 					hbflag = 0;
 					/*
 					 * increase the number we sent, if a
 					 * cookie is sent we don't tell them
 					 * any was sent out.
 					 */
 					outchain = endoutchain = NULL;
 					auth = NULL;
 					auth_offset = 0;
 					if (!no_out_cnt)
 						*num_out += ctl_cnt;
 					/* recalc a clean slate and setup */
 					switch (net->ro._l_addr.sa.sa_family) {
 #ifdef INET
 					case AF_INET:
 						mtu = net->mtu - SCTP_MIN_V4_OVERHEAD;
 						break;
 #endif
 #ifdef INET6
 					case AF_INET6:
 						mtu = net->mtu - SCTP_MIN_OVERHEAD;
 						break;
 #endif
 					default:
 						/* TSNH */
 						mtu = net->mtu;
 						break;
 					}
 					to_out = 0;
 					no_fragmentflg = 1;
 				}
 			}
 		}
 		/************************/
 		/* Control transmission */
 		/************************/
 		/* Now first lets go through the control queue */
 		TAILQ_FOREACH_SAFE(chk, &asoc->control_send_queue, sctp_next, nchk) {
 			if ((sack_goes_to) &&
 			    (chk->rec.chunk_id.id == SCTP_ECN_ECHO) &&
 			    (chk->whoTo != sack_goes_to)) {
 				/*
 				 * if we have a sack in queue, and we are
 				 * looking at an ecn echo that is NOT queued
 				 * to where the sack is going..
 				 */
 				if (chk->whoTo == net) {
 					/*
 					 * Don't transmit it to where its
 					 * going (current net)
 					 */
 					continue;
 				} else if (sack_goes_to == net) {
 					/*
 					 * But do transmit it to this
 					 * address
 					 */
 					goto skip_net_check;
 				}
 			}
 			if (chk->whoTo == NULL) {
 				if (asoc->alternate == NULL) {
 					if (asoc->primary_destination != net) {
 						continue;
 					}
 				} else {
 					if (asoc->alternate != net) {
 						continue;
 					}
 				}
 			} else {
 				if (chk->whoTo != net) {
 					continue;
 				}
 			}
 	skip_net_check:
 			if (chk->data == NULL) {
 				continue;
 			}
 			if (chk->sent != SCTP_DATAGRAM_UNSENT) {
 				/*
 				 * It must be unsent. Cookies and ASCONF's
 				 * hang around but there timers will force
 				 * when marked for resend.
 				 */
 				continue;
 			}
 			/*
 			 * if no AUTH is yet included and this chunk
 			 * requires it, make sure to account for it.  We
 			 * don't apply the size until the AUTH chunk is
 			 * actually added below in case there is no room for
 			 * this chunk. NOTE: we overload the use of "omtu"
 			 * here
 			 */
 			if ((auth == NULL) &&
 			    sctp_auth_is_required_chunk(chk->rec.chunk_id.id,
 			    stcb->asoc.peer_auth_chunks)) {
 				omtu = sctp_get_auth_chunk_len(stcb->asoc.peer_hmac_id);
 			} else
 				omtu = 0;
 			/* Here we do NOT factor the r_mtu */
 			if ((chk->send_size <= (int)(mtu - omtu)) ||
 			    (chk->flags & CHUNK_FLAGS_FRAGMENT_OK)) {
 				/*
 				 * We probably should glom the mbuf chain
 				 * from the chk->data for control but the
 				 * problem is it becomes yet one more level
 				 * of tracking to do if for some reason
 				 * output fails. Then I have got to
 				 * reconstruct the merged control chain.. el
 				 * yucko.. for now we take the easy way and
 				 * do the copy
 				 */
 				/*
 				 * Add an AUTH chunk, if chunk requires it
 				 * save the offset into the chain for AUTH
 				 */
 				if ((auth == NULL) &&
 				    (sctp_auth_is_required_chunk(chk->rec.chunk_id.id,
 				    stcb->asoc.peer_auth_chunks))) {
 					outchain = sctp_add_auth_chunk(outchain,
 					    &endoutchain,
 					    &auth,
 					    &auth_offset,
 					    stcb,
 					    chk->rec.chunk_id.id);
 					SCTP_STAT_INCR_COUNTER64(sctps_outcontrolchunks);
 				}
 				outchain = sctp_copy_mbufchain(chk->data, outchain, &endoutchain,
 				    (int)chk->rec.chunk_id.can_take_data,
 				    chk->send_size, chk->copy_by_ref);
 				if (outchain == NULL) {
 					*reason_code = 8;
 					SCTP_LTRACE_ERR_RET(inp, stcb, NULL, SCTP_FROM_SCTP_OUTPUT, ENOMEM);
 					return (ENOMEM);
 				}
 				SCTP_STAT_INCR_COUNTER64(sctps_outcontrolchunks);
 				/* update our MTU size */
 				if (mtu > (chk->send_size + omtu))
 					mtu -= (chk->send_size + omtu);
 				else
 					mtu = 0;
 				to_out += (chk->send_size + omtu);
 				/* Do clear IP_DF ? */
 				if (chk->flags & CHUNK_FLAGS_FRAGMENT_OK) {
 					no_fragmentflg = 0;
 				}
 				if (chk->rec.chunk_id.can_take_data)
 					chk->data = NULL;
 				/* Mark things to be removed, if needed */
 				if ((chk->rec.chunk_id.id == SCTP_SELECTIVE_ACK) ||
 				    (chk->rec.chunk_id.id == SCTP_NR_SELECTIVE_ACK) ||	/* EY */
 				    (chk->rec.chunk_id.id == SCTP_HEARTBEAT_REQUEST) ||
 				    (chk->rec.chunk_id.id == SCTP_HEARTBEAT_ACK) ||
 				    (chk->rec.chunk_id.id == SCTP_SHUTDOWN) ||
 				    (chk->rec.chunk_id.id == SCTP_SHUTDOWN_ACK) ||
 				    (chk->rec.chunk_id.id == SCTP_OPERATION_ERROR) ||
 				    (chk->rec.chunk_id.id == SCTP_COOKIE_ACK) ||
 				    (chk->rec.chunk_id.id == SCTP_ECN_CWR) ||
 				    (chk->rec.chunk_id.id == SCTP_PACKET_DROPPED) ||
 				    (chk->rec.chunk_id.id == SCTP_ASCONF_ACK)) {
 					if (chk->rec.chunk_id.id == SCTP_HEARTBEAT_REQUEST) {
 						hbflag = 1;
 					}
 					/* remove these chunks at the end */
 					if ((chk->rec.chunk_id.id == SCTP_SELECTIVE_ACK) ||
 					    (chk->rec.chunk_id.id == SCTP_NR_SELECTIVE_ACK)) {
 						/* turn off the timer */
 						if (SCTP_OS_TIMER_PENDING(&stcb->asoc.dack_timer.timer)) {
 							sctp_timer_stop(SCTP_TIMER_TYPE_RECV,
 							    inp, stcb, net, SCTP_FROM_SCTP_OUTPUT + SCTP_LOC_1);
 						}
 					}
 					ctl_cnt++;
 				} else {
 					/*
 					 * Other chunks, since they have
 					 * timers running (i.e. COOKIE) we
 					 * just "trust" that it gets sent or
 					 * retransmitted.
 					 */
 					ctl_cnt++;
 					if (chk->rec.chunk_id.id == SCTP_COOKIE_ECHO) {
 						cookie = 1;
 						no_out_cnt = 1;
 					} else if (chk->rec.chunk_id.id == SCTP_ECN_ECHO) {
 						/*
 						 * Increment ecne send count
 						 * here this means we may be
 						 * over-zealous in our
 						 * counting if the send
 						 * fails, but its the best
 						 * place to do it (we used
 						 * to do it in the queue of
 						 * the chunk, but that did
 						 * not tell how many times
 						 * it was sent.
 						 */
 						SCTP_STAT_INCR(sctps_sendecne);
 					}
 					chk->sent = SCTP_DATAGRAM_SENT;
 					if (chk->whoTo == NULL) {
 						chk->whoTo = net;
 						atomic_add_int(&net->ref_count, 1);
 					}
 					chk->snd_count++;
 				}
 				if (mtu == 0) {
 					/*
 					 * Ok we are out of room but we can
 					 * output without effecting the
 					 * flight size since this little guy
 					 * is a control only packet.
 					 */
 					if (asconf) {
 						sctp_timer_start(SCTP_TIMER_TYPE_ASCONF, inp, stcb, net);
 						/*
 						 * do NOT clear the asconf
 						 * flag as it is used to do
 						 * appropriate source
 						 * address selection.
 						 */
 					}
 					if (cookie) {
 						sctp_timer_start(SCTP_TIMER_TYPE_COOKIE, inp, stcb, net);
 						cookie = 0;
 					}
 					if ((error = sctp_lowlevel_chunk_output(inp, stcb, net,
 					    (struct sockaddr *)&net->ro._l_addr,
 					    outchain,
 					    auth_offset, auth,
 					    stcb->asoc.authinfo.active_keyid,
 					    no_fragmentflg, 0, asconf,
 					    inp->sctp_lport, stcb->rport,
 					    htonl(stcb->asoc.peer_vtag),
 					    net->port, NULL,
 					    0, 0,
 					    so_locked))) {
 						if (error == ENOBUFS) {
 							asoc->ifp_had_enobuf = 1;
 							SCTP_STAT_INCR(sctps_lowlevelerr);
 						}
 						if (from_where == 0) {
 							SCTP_STAT_INCR(sctps_lowlevelerrusr);
 						}
 						/* error, could not output */
 						if (hbflag) {
 							if (*now_filled == 0) {
 								(void)SCTP_GETTIME_TIMEVAL(&net->last_sent_time);
 								*now_filled = 1;
 								*now = net->last_sent_time;
 							} else {
 								net->last_sent_time = *now;
 							}
 							hbflag = 0;
 						}
 						if (error == EHOSTUNREACH) {
 							/*
 							 * Destination went
 							 * unreachable
 							 * during this send
 							 */
 							sctp_move_chunks_from_net(stcb, net);
 						}
 						*reason_code = 7;
 						continue;
 					} else
 						asoc->ifp_had_enobuf = 0;
 					/* Only HB or ASCONF advances time */
 					if (hbflag) {
 						if (*now_filled == 0) {
 							(void)SCTP_GETTIME_TIMEVAL(&net->last_sent_time);
 							*now_filled = 1;
 							*now = net->last_sent_time;
 						} else {
 							net->last_sent_time = *now;
 						}
 						hbflag = 0;
 					}
 					/*
 					 * increase the number we sent, if a
 					 * cookie is sent we don't tell them
 					 * any was sent out.
 					 */
 					outchain = endoutchain = NULL;
 					auth = NULL;
 					auth_offset = 0;
 					if (!no_out_cnt)
 						*num_out += ctl_cnt;
 					/* recalc a clean slate and setup */
 					switch (net->ro._l_addr.sa.sa_family) {
 #ifdef INET
 					case AF_INET:
 						mtu = net->mtu - SCTP_MIN_V4_OVERHEAD;
 						break;
 #endif
 #ifdef INET6
 					case AF_INET6:
 						mtu = net->mtu - SCTP_MIN_OVERHEAD;
 						break;
 #endif
 					default:
 						/* TSNH */
 						mtu = net->mtu;
 						break;
 					}
 					to_out = 0;
 					no_fragmentflg = 1;
 				}
 			}
 		}
 		/* JRI: if dest is in PF state, do not send data to it */
 		if ((asoc->sctp_cmt_on_off > 0) &&
 		    (net != stcb->asoc.alternate) &&
 		    (net->dest_state & SCTP_ADDR_PF)) {
 			goto no_data_fill;
 		}
 		if (net->flight_size >= net->cwnd) {
 			goto no_data_fill;
 		}
 		if ((asoc->sctp_cmt_on_off > 0) &&
 		    (SCTP_BASE_SYSCTL(sctp_buffer_splitting) & SCTP_RECV_BUFFER_SPLITTING) &&
 		    (net->flight_size > max_rwnd_per_dest)) {
 			goto no_data_fill;
 		}
 		/*
 		 * We need a specific accounting for the usage of the send
 		 * buffer. We also need to check the number of messages per
 		 * net. For now, this is better than nothing and it disabled
 		 * by default...
 		 */
 		if ((asoc->sctp_cmt_on_off > 0) &&
 		    (SCTP_BASE_SYSCTL(sctp_buffer_splitting) & SCTP_SEND_BUFFER_SPLITTING) &&
 		    (max_send_per_dest > 0) &&
 		    (net->flight_size > max_send_per_dest)) {
 			goto no_data_fill;
 		}
 		/*********************/
 		/* Data transmission */
 		/*********************/
 		/*
 		 * if AUTH for DATA is required and no AUTH has been added
 		 * yet, account for this in the mtu now... if no data can be
 		 * bundled, this adjustment won't matter anyways since the
 		 * packet will be going out...
 		 */
 		data_auth_reqd = sctp_auth_is_required_chunk(SCTP_DATA,
 		    stcb->asoc.peer_auth_chunks);
 		if (data_auth_reqd && (auth == NULL)) {
 			mtu -= sctp_get_auth_chunk_len(stcb->asoc.peer_hmac_id);
 		}
 		/* now lets add any data within the MTU constraints */
 		switch (((struct sockaddr *)&net->ro._l_addr)->sa_family) {
 #ifdef INET
 		case AF_INET:
 			if (net->mtu > (sizeof(struct ip) + sizeof(struct sctphdr)))
 				omtu = net->mtu - (sizeof(struct ip) + sizeof(struct sctphdr));
 			else
 				omtu = 0;
 			break;
 #endif
 #ifdef INET6
 		case AF_INET6:
 			if (net->mtu > (sizeof(struct ip6_hdr) + sizeof(struct sctphdr)))
 				omtu = net->mtu - (sizeof(struct ip6_hdr) + sizeof(struct sctphdr));
 			else
 				omtu = 0;
 			break;
 #endif
 		default:
 			/* TSNH */
 			omtu = 0;
 			break;
 		}
 		if ((((asoc->state & SCTP_STATE_OPEN) == SCTP_STATE_OPEN) &&
 		    (skip_data_for_this_net == 0)) ||
 		    (cookie)) {
 			TAILQ_FOREACH_SAFE(chk, &asoc->send_queue, sctp_next, nchk) {
 				if (no_data_chunks) {
 					/* let only control go out */
 					*reason_code = 1;
 					break;
 				}
 				if (net->flight_size >= net->cwnd) {
 					/* skip this net, no room for data */
 					*reason_code = 2;
 					break;
 				}
 				if ((chk->whoTo != NULL) &&
 				    (chk->whoTo != net)) {
 					/* Don't send the chunk on this net */
 					continue;
 				}
 				if (asoc->sctp_cmt_on_off == 0) {
 					if ((asoc->alternate) &&
 					    (asoc->alternate != net) &&
 					    (chk->whoTo == NULL)) {
 						continue;
 					} else if ((net != asoc->primary_destination) &&
 						    (asoc->alternate == NULL) &&
 					    (chk->whoTo == NULL)) {
 						continue;
 					}
 				}
 				if ((chk->send_size > omtu) && ((chk->flags & CHUNK_FLAGS_FRAGMENT_OK) == 0)) {
 					/*-
 					 * strange, we have a chunk that is
 					 * to big for its destination and
 					 * yet no fragment ok flag.
 					 * Something went wrong when the
 					 * PMTU changed...we did not mark
 					 * this chunk for some reason?? I
 					 * will fix it here by letting IP
 					 * fragment it for now and printing
 					 * a warning. This really should not
 					 * happen ...
 					 */
 					SCTP_PRINTF("Warning chunk of %d bytes > mtu:%d and yet PMTU disc missed\n",
 					    chk->send_size, mtu);
 					chk->flags |= CHUNK_FLAGS_FRAGMENT_OK;
 				}
 				if (SCTP_BASE_SYSCTL(sctp_enable_sack_immediately) &&
 				    ((asoc->state & SCTP_STATE_SHUTDOWN_PENDING) == SCTP_STATE_SHUTDOWN_PENDING)) {
 					struct sctp_data_chunk *dchkh;
 
 					dchkh = mtod(chk->data, struct sctp_data_chunk *);
 					dchkh->ch.chunk_flags |= SCTP_DATA_SACK_IMMEDIATELY;
 				}
 				if (((chk->send_size <= mtu) && (chk->send_size <= r_mtu)) ||
 				    ((chk->flags & CHUNK_FLAGS_FRAGMENT_OK) && (chk->send_size <= asoc->peers_rwnd))) {
 					/* ok we will add this one */
 
 					/*
 					 * Add an AUTH chunk, if chunk
 					 * requires it, save the offset into
 					 * the chain for AUTH
 					 */
 					if (data_auth_reqd) {
 						if (auth == NULL) {
 							outchain = sctp_add_auth_chunk(outchain,
 							    &endoutchain,
 							    &auth,
 							    &auth_offset,
 							    stcb,
 							    SCTP_DATA);
 							auth_keyid = chk->auth_keyid;
 							override_ok = 0;
 							SCTP_STAT_INCR_COUNTER64(sctps_outcontrolchunks);
 						} else if (override_ok) {
 							/*
 							 * use this data's
 							 * keyid
 							 */
 							auth_keyid = chk->auth_keyid;
 							override_ok = 0;
 						} else if (auth_keyid != chk->auth_keyid) {
 							/*
 							 * different keyid,
 							 * so done bundling
 							 */
 							break;
 						}
 					}
 					outchain = sctp_copy_mbufchain(chk->data, outchain, &endoutchain, 0,
 					    chk->send_size, chk->copy_by_ref);
 					if (outchain == NULL) {
 						SCTPDBG(SCTP_DEBUG_OUTPUT3, "No memory?\n");
 						if (!SCTP_OS_TIMER_PENDING(&net->rxt_timer.timer)) {
 							sctp_timer_start(SCTP_TIMER_TYPE_SEND, inp, stcb, net);
 						}
 						*reason_code = 3;
 						SCTP_LTRACE_ERR_RET(inp, stcb, NULL, SCTP_FROM_SCTP_OUTPUT, ENOMEM);
 						return (ENOMEM);
 					}
 					/* upate our MTU size */
 					/* Do clear IP_DF ? */
 					if (chk->flags & CHUNK_FLAGS_FRAGMENT_OK) {
 						no_fragmentflg = 0;
 					}
 					/* unsigned subtraction of mtu */
 					if (mtu > chk->send_size)
 						mtu -= chk->send_size;
 					else
 						mtu = 0;
 					/* unsigned subtraction of r_mtu */
 					if (r_mtu > chk->send_size)
 						r_mtu -= chk->send_size;
 					else
 						r_mtu = 0;
 
 					to_out += chk->send_size;
 					if ((to_out > mx_mtu) && no_fragmentflg) {
 #ifdef INVARIANTS
 						panic("Exceeding mtu of %d out size is %d", mx_mtu, to_out);
 #else
 						SCTP_PRINTF("Exceeding mtu of %d out size is %d\n",
 						    mx_mtu, to_out);
 #endif
 					}
 					chk->window_probe = 0;
 					data_list[bundle_at++] = chk;
 					if (bundle_at >= SCTP_MAX_DATA_BUNDLING) {
 						break;
 					}
 					if (chk->sent == SCTP_DATAGRAM_UNSENT) {
 						if ((chk->rec.data.rcv_flags & SCTP_DATA_UNORDERED) == 0) {
 							SCTP_STAT_INCR_COUNTER64(sctps_outorderchunks);
 						} else {
 							SCTP_STAT_INCR_COUNTER64(sctps_outunorderchunks);
 						}
 						if (((chk->rec.data.rcv_flags & SCTP_DATA_LAST_FRAG) == SCTP_DATA_LAST_FRAG) &&
 						    ((chk->rec.data.rcv_flags & SCTP_DATA_FIRST_FRAG) == 0))
 							/*
 							 * Count number of
 							 * user msg's that
 							 * were fragmented
 							 * we do this by
 							 * counting when we
 							 * see a LAST
 							 * fragment only.
 							 */
 							SCTP_STAT_INCR_COUNTER64(sctps_fragusrmsgs);
 					}
 					if ((mtu == 0) || (r_mtu == 0) || (one_chunk)) {
 						if ((one_chunk) && (stcb->asoc.total_flight == 0)) {
 							data_list[0]->window_probe = 1;
 							net->window_probe = 1;
 						}
 						break;
 					}
 				} else {
 					/*
 					 * Must be sent in order of the
 					 * TSN's (on a network)
 					 */
 					break;
 				}
 			}	/* for (chunk gather loop for this net) */
 		}		/* if asoc.state OPEN */
 no_data_fill:
 		/* Is there something to send for this destination? */
 		if (outchain) {
 			/* We may need to start a control timer or two */
 			if (asconf) {
 				sctp_timer_start(SCTP_TIMER_TYPE_ASCONF, inp,
 				    stcb, net);
 				/*
 				 * do NOT clear the asconf flag as it is
 				 * used to do appropriate source address
 				 * selection.
 				 */
 			}
 			if (cookie) {
 				sctp_timer_start(SCTP_TIMER_TYPE_COOKIE, inp, stcb, net);
 				cookie = 0;
 			}
 			/* must start a send timer if data is being sent */
 			if (bundle_at && (!SCTP_OS_TIMER_PENDING(&net->rxt_timer.timer))) {
 				/*
 				 * no timer running on this destination
 				 * restart it.
 				 */
 				sctp_timer_start(SCTP_TIMER_TYPE_SEND, inp, stcb, net);
 			}
 			/* Now send it, if there is anything to send :> */
 			if ((error = sctp_lowlevel_chunk_output(inp,
 			    stcb,
 			    net,
 			    (struct sockaddr *)&net->ro._l_addr,
 			    outchain,
 			    auth_offset,
 			    auth,
 			    auth_keyid,
 			    no_fragmentflg,
 			    bundle_at,
 			    asconf,
 			    inp->sctp_lport, stcb->rport,
 			    htonl(stcb->asoc.peer_vtag),
 			    net->port, NULL,
 			    0, 0,
 			    so_locked))) {
 				/* error, we could not output */
 				if (error == ENOBUFS) {
 					SCTP_STAT_INCR(sctps_lowlevelerr);
 					asoc->ifp_had_enobuf = 1;
 				}
 				if (from_where == 0) {
 					SCTP_STAT_INCR(sctps_lowlevelerrusr);
 				}
 				SCTPDBG(SCTP_DEBUG_OUTPUT3, "Gak send error %d\n", error);
 				if (hbflag) {
 					if (*now_filled == 0) {
 						(void)SCTP_GETTIME_TIMEVAL(&net->last_sent_time);
 						*now_filled = 1;
 						*now = net->last_sent_time;
 					} else {
 						net->last_sent_time = *now;
 					}
 					hbflag = 0;
 				}
 				if (error == EHOSTUNREACH) {
 					/*
 					 * Destination went unreachable
 					 * during this send
 					 */
 					sctp_move_chunks_from_net(stcb, net);
 				}
 				*reason_code = 6;
 				/*-
 				 * I add this line to be paranoid. As far as
 				 * I can tell the continue, takes us back to
 				 * the top of the for, but just to make sure
 				 * I will reset these again here.
 				 */
 				ctl_cnt = bundle_at = 0;
 				continue;	/* This takes us back to the
 						 * for() for the nets. */
 			} else {
 				asoc->ifp_had_enobuf = 0;
 			}
 			endoutchain = NULL;
 			auth = NULL;
 			auth_offset = 0;
 			if (bundle_at || hbflag) {
 				/* For data/asconf and hb set time */
 				if (*now_filled == 0) {
 					(void)SCTP_GETTIME_TIMEVAL(&net->last_sent_time);
 					*now_filled = 1;
 					*now = net->last_sent_time;
 				} else {
 					net->last_sent_time = *now;
 				}
 			}
 			if (!no_out_cnt) {
 				*num_out += (ctl_cnt + bundle_at);
 			}
 			if (bundle_at) {
 				/* setup for a RTO measurement */
 				tsns_sent = data_list[0]->rec.data.TSN_seq;
 				/* fill time if not already filled */
 				if (*now_filled == 0) {
 					(void)SCTP_GETTIME_TIMEVAL(&asoc->time_last_sent);
 					*now_filled = 1;
 					*now = asoc->time_last_sent;
 				} else {
 					asoc->time_last_sent = *now;
 				}
 				if (net->rto_needed) {
 					data_list[0]->do_rtt = 1;
 					net->rto_needed = 0;
 				}
 				SCTP_STAT_INCR_BY(sctps_senddata, bundle_at);
 				sctp_clean_up_datalist(stcb, asoc, data_list, bundle_at, net);
 			}
 			if (one_chunk) {
 				break;
 			}
 		}
 		if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_CWND_LOGGING_ENABLE) {
 			sctp_log_cwnd(stcb, net, tsns_sent, SCTP_CWND_LOG_FROM_SEND);
 		}
 	}
 	if (old_start_at == NULL) {
 		old_start_at = start_at;
 		start_at = TAILQ_FIRST(&asoc->nets);
 		if (old_start_at)
 			goto again_one_more_time;
 	}
 	/*
 	 * At the end there should be no NON timed chunks hanging on this
 	 * queue.
 	 */
 	if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_CWND_LOGGING_ENABLE) {
 		sctp_log_cwnd(stcb, net, *num_out, SCTP_CWND_LOG_FROM_SEND);
 	}
 	if ((*num_out == 0) && (*reason_code == 0)) {
 		*reason_code = 4;
 	} else {
 		*reason_code = 5;
 	}
 	sctp_clean_up_ctl(stcb, asoc, so_locked);
 	return (0);
 }
 
 void
 sctp_queue_op_err(struct sctp_tcb *stcb, struct mbuf *op_err)
 {
 	/*-
 	 * Prepend a OPERATIONAL_ERROR chunk header and put on the end of
 	 * the control chunk queue.
 	 */
 	struct sctp_chunkhdr *hdr;
 	struct sctp_tmit_chunk *chk;
 	struct mbuf *mat;
 
 	SCTP_TCB_LOCK_ASSERT(stcb);
 	sctp_alloc_a_chunk(stcb, chk);
 	if (chk == NULL) {
 		/* no memory */
 		sctp_m_freem(op_err);
 		return;
 	}
 	chk->copy_by_ref = 0;
 	SCTP_BUF_PREPEND(op_err, sizeof(struct sctp_chunkhdr), M_NOWAIT);
 	if (op_err == NULL) {
 		sctp_free_a_chunk(stcb, chk, SCTP_SO_NOT_LOCKED);
 		return;
 	}
 	chk->send_size = 0;
 	for (mat = op_err; mat != NULL; mat = SCTP_BUF_NEXT(mat)) {
 		chk->send_size += SCTP_BUF_LEN(mat);
 	}
 	chk->sent = SCTP_DATAGRAM_UNSENT;
 	chk->snd_count = 0;
 	chk->asoc = &stcb->asoc;
 	chk->data = op_err;
 	chk->whoTo = NULL;
 	hdr = mtod(op_err, struct sctp_chunkhdr *);
 	hdr->chunk_type = SCTP_OPERATION_ERROR;
 	hdr->chunk_flags = 0;
 	hdr->chunk_length = htons(chk->send_size);
 	TAILQ_INSERT_TAIL(&chk->asoc->control_send_queue,
 	    chk,
 	    sctp_next);
 	chk->asoc->ctrl_queue_cnt++;
 }
 
 int
 sctp_send_cookie_echo(struct mbuf *m,
     int offset,
     struct sctp_tcb *stcb,
     struct sctp_nets *net)
 {
 	/*-
 	 * pull out the cookie and put it at the front of the control chunk
 	 * queue.
 	 */
 	int at;
 	struct mbuf *cookie;
 	struct sctp_paramhdr parm, *phdr;
 	struct sctp_chunkhdr *hdr;
 	struct sctp_tmit_chunk *chk;
 	uint16_t ptype, plen;
 
 	SCTP_TCB_LOCK_ASSERT(stcb);
 	/* First find the cookie in the param area */
 	cookie = NULL;
 	at = offset + sizeof(struct sctp_init_chunk);
 	for (;;) {
 		phdr = sctp_get_next_param(m, at, &parm, sizeof(parm));
 		if (phdr == NULL) {
 			return (-3);
 		}
 		ptype = ntohs(phdr->param_type);
 		plen = ntohs(phdr->param_length);
 		if (ptype == SCTP_STATE_COOKIE) {
 			int pad;
 
 			/* found the cookie */
 			if ((pad = (plen % 4))) {
 				plen += 4 - pad;
 			}
 			cookie = SCTP_M_COPYM(m, at, plen, M_NOWAIT);
 			if (cookie == NULL) {
 				/* No memory */
 				return (-2);
 			}
 #ifdef SCTP_MBUF_LOGGING
 			if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_MBUF_LOGGING_ENABLE) {
 				sctp_log_mbc(cookie, SCTP_MBUF_ICOPY);
 			}
 #endif
 			break;
 		}
 		at += SCTP_SIZE32(plen);
 	}
 	/* ok, we got the cookie lets change it into a cookie echo chunk */
 	/* first the change from param to cookie */
 	hdr = mtod(cookie, struct sctp_chunkhdr *);
 	hdr->chunk_type = SCTP_COOKIE_ECHO;
 	hdr->chunk_flags = 0;
 	/* get the chunk stuff now and place it in the FRONT of the queue */
 	sctp_alloc_a_chunk(stcb, chk);
 	if (chk == NULL) {
 		/* no memory */
 		sctp_m_freem(cookie);
 		return (-5);
 	}
 	chk->copy_by_ref = 0;
 	chk->rec.chunk_id.id = SCTP_COOKIE_ECHO;
 	chk->rec.chunk_id.can_take_data = 0;
 	chk->flags = CHUNK_FLAGS_FRAGMENT_OK;
 	chk->send_size = plen;
 	chk->sent = SCTP_DATAGRAM_UNSENT;
 	chk->snd_count = 0;
 	chk->asoc = &stcb->asoc;
 	chk->data = cookie;
 	chk->whoTo = net;
 	atomic_add_int(&chk->whoTo->ref_count, 1);
 	TAILQ_INSERT_HEAD(&chk->asoc->control_send_queue, chk, sctp_next);
 	chk->asoc->ctrl_queue_cnt++;
 	return (0);
 }
 
 void
 sctp_send_heartbeat_ack(struct sctp_tcb *stcb,
     struct mbuf *m,
     int offset,
     int chk_length,
     struct sctp_nets *net)
 {
 	/*
 	 * take a HB request and make it into a HB ack and send it.
 	 */
 	struct mbuf *outchain;
 	struct sctp_chunkhdr *chdr;
 	struct sctp_tmit_chunk *chk;
 
 
 	if (net == NULL)
 		/* must have a net pointer */
 		return;
 
 	outchain = SCTP_M_COPYM(m, offset, chk_length, M_NOWAIT);
 	if (outchain == NULL) {
 		/* gak out of memory */
 		return;
 	}
 #ifdef SCTP_MBUF_LOGGING
 	if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_MBUF_LOGGING_ENABLE) {
 		sctp_log_mbc(outchain, SCTP_MBUF_ICOPY);
 	}
 #endif
 	chdr = mtod(outchain, struct sctp_chunkhdr *);
 	chdr->chunk_type = SCTP_HEARTBEAT_ACK;
 	chdr->chunk_flags = 0;
 	if (chk_length % 4) {
 		/* need pad */
 		uint32_t cpthis = 0;
 		int padlen;
 
 		padlen = 4 - (chk_length % 4);
 		m_copyback(outchain, chk_length, padlen, (caddr_t)&cpthis);
 	}
 	sctp_alloc_a_chunk(stcb, chk);
 	if (chk == NULL) {
 		/* no memory */
 		sctp_m_freem(outchain);
 		return;
 	}
 	chk->copy_by_ref = 0;
 	chk->rec.chunk_id.id = SCTP_HEARTBEAT_ACK;
 	chk->rec.chunk_id.can_take_data = 1;
 	chk->flags = 0;
 	chk->send_size = chk_length;
 	chk->sent = SCTP_DATAGRAM_UNSENT;
 	chk->snd_count = 0;
 	chk->asoc = &stcb->asoc;
 	chk->data = outchain;
 	chk->whoTo = net;
 	atomic_add_int(&chk->whoTo->ref_count, 1);
 	TAILQ_INSERT_TAIL(&chk->asoc->control_send_queue, chk, sctp_next);
 	chk->asoc->ctrl_queue_cnt++;
 }
 
 void
 sctp_send_cookie_ack(struct sctp_tcb *stcb)
 {
 	/* formulate and queue a cookie-ack back to sender */
 	struct mbuf *cookie_ack;
 	struct sctp_chunkhdr *hdr;
 	struct sctp_tmit_chunk *chk;
 
 	SCTP_TCB_LOCK_ASSERT(stcb);
 
 	cookie_ack = sctp_get_mbuf_for_msg(sizeof(struct sctp_chunkhdr), 0, M_NOWAIT, 1, MT_HEADER);
 	if (cookie_ack == NULL) {
 		/* no mbuf's */
 		return;
 	}
 	SCTP_BUF_RESV_UF(cookie_ack, SCTP_MIN_OVERHEAD);
 	sctp_alloc_a_chunk(stcb, chk);
 	if (chk == NULL) {
 		/* no memory */
 		sctp_m_freem(cookie_ack);
 		return;
 	}
 	chk->copy_by_ref = 0;
 	chk->rec.chunk_id.id = SCTP_COOKIE_ACK;
 	chk->rec.chunk_id.can_take_data = 1;
 	chk->flags = 0;
 	chk->send_size = sizeof(struct sctp_chunkhdr);
 	chk->sent = SCTP_DATAGRAM_UNSENT;
 	chk->snd_count = 0;
 	chk->asoc = &stcb->asoc;
 	chk->data = cookie_ack;
 	if (chk->asoc->last_control_chunk_from != NULL) {
 		chk->whoTo = chk->asoc->last_control_chunk_from;
 		atomic_add_int(&chk->whoTo->ref_count, 1);
 	} else {
 		chk->whoTo = NULL;
 	}
 	hdr = mtod(cookie_ack, struct sctp_chunkhdr *);
 	hdr->chunk_type = SCTP_COOKIE_ACK;
 	hdr->chunk_flags = 0;
 	hdr->chunk_length = htons(chk->send_size);
 	SCTP_BUF_LEN(cookie_ack) = chk->send_size;
 	TAILQ_INSERT_TAIL(&chk->asoc->control_send_queue, chk, sctp_next);
 	chk->asoc->ctrl_queue_cnt++;
 	return;
 }
 
 
 void
 sctp_send_shutdown_ack(struct sctp_tcb *stcb, struct sctp_nets *net)
 {
 	/* formulate and queue a SHUTDOWN-ACK back to the sender */
 	struct mbuf *m_shutdown_ack;
 	struct sctp_shutdown_ack_chunk *ack_cp;
 	struct sctp_tmit_chunk *chk;
 
 	m_shutdown_ack = sctp_get_mbuf_for_msg(sizeof(struct sctp_shutdown_ack_chunk), 0, M_NOWAIT, 1, MT_HEADER);
 	if (m_shutdown_ack == NULL) {
 		/* no mbuf's */
 		return;
 	}
 	SCTP_BUF_RESV_UF(m_shutdown_ack, SCTP_MIN_OVERHEAD);
 	sctp_alloc_a_chunk(stcb, chk);
 	if (chk == NULL) {
 		/* no memory */
 		sctp_m_freem(m_shutdown_ack);
 		return;
 	}
 	chk->copy_by_ref = 0;
 	chk->rec.chunk_id.id = SCTP_SHUTDOWN_ACK;
 	chk->rec.chunk_id.can_take_data = 1;
 	chk->flags = 0;
 	chk->send_size = sizeof(struct sctp_chunkhdr);
 	chk->sent = SCTP_DATAGRAM_UNSENT;
 	chk->snd_count = 0;
 	chk->flags = 0;
 	chk->asoc = &stcb->asoc;
 	chk->data = m_shutdown_ack;
 	chk->whoTo = net;
 	if (chk->whoTo) {
 		atomic_add_int(&chk->whoTo->ref_count, 1);
 	}
 	ack_cp = mtod(m_shutdown_ack, struct sctp_shutdown_ack_chunk *);
 	ack_cp->ch.chunk_type = SCTP_SHUTDOWN_ACK;
 	ack_cp->ch.chunk_flags = 0;
 	ack_cp->ch.chunk_length = htons(chk->send_size);
 	SCTP_BUF_LEN(m_shutdown_ack) = chk->send_size;
 	TAILQ_INSERT_TAIL(&chk->asoc->control_send_queue, chk, sctp_next);
 	chk->asoc->ctrl_queue_cnt++;
 	return;
 }
 
 void
 sctp_send_shutdown(struct sctp_tcb *stcb, struct sctp_nets *net)
 {
 	/* formulate and queue a SHUTDOWN to the sender */
 	struct mbuf *m_shutdown;
 	struct sctp_shutdown_chunk *shutdown_cp;
 	struct sctp_tmit_chunk *chk;
 
 	m_shutdown = sctp_get_mbuf_for_msg(sizeof(struct sctp_shutdown_chunk), 0, M_NOWAIT, 1, MT_HEADER);
 	if (m_shutdown == NULL) {
 		/* no mbuf's */
 		return;
 	}
 	SCTP_BUF_RESV_UF(m_shutdown, SCTP_MIN_OVERHEAD);
 	sctp_alloc_a_chunk(stcb, chk);
 	if (chk == NULL) {
 		/* no memory */
 		sctp_m_freem(m_shutdown);
 		return;
 	}
 	chk->copy_by_ref = 0;
 	chk->rec.chunk_id.id = SCTP_SHUTDOWN;
 	chk->rec.chunk_id.can_take_data = 1;
 	chk->flags = 0;
 	chk->send_size = sizeof(struct sctp_shutdown_chunk);
 	chk->sent = SCTP_DATAGRAM_UNSENT;
 	chk->snd_count = 0;
 	chk->flags = 0;
 	chk->asoc = &stcb->asoc;
 	chk->data = m_shutdown;
 	chk->whoTo = net;
 	if (chk->whoTo) {
 		atomic_add_int(&chk->whoTo->ref_count, 1);
 	}
 	shutdown_cp = mtod(m_shutdown, struct sctp_shutdown_chunk *);
 	shutdown_cp->ch.chunk_type = SCTP_SHUTDOWN;
 	shutdown_cp->ch.chunk_flags = 0;
 	shutdown_cp->ch.chunk_length = htons(chk->send_size);
 	shutdown_cp->cumulative_tsn_ack = htonl(stcb->asoc.cumulative_tsn);
 	SCTP_BUF_LEN(m_shutdown) = chk->send_size;
 	TAILQ_INSERT_TAIL(&chk->asoc->control_send_queue, chk, sctp_next);
 	chk->asoc->ctrl_queue_cnt++;
 	return;
 }
 
 void
 sctp_send_asconf(struct sctp_tcb *stcb, struct sctp_nets *net, int addr_locked)
 {
 	/*
 	 * formulate and queue an ASCONF to the peer. ASCONF parameters
 	 * should be queued on the assoc queue.
 	 */
 	struct sctp_tmit_chunk *chk;
 	struct mbuf *m_asconf;
 	int len;
 
 	SCTP_TCB_LOCK_ASSERT(stcb);
 
 	if ((!TAILQ_EMPTY(&stcb->asoc.asconf_send_queue)) &&
 	    (!sctp_is_feature_on(stcb->sctp_ep, SCTP_PCB_FLAGS_MULTIPLE_ASCONFS))) {
 		/* can't send a new one if there is one in flight already */
 		return;
 	}
 	/* compose an ASCONF chunk, maximum length is PMTU */
 	m_asconf = sctp_compose_asconf(stcb, &len, addr_locked);
 	if (m_asconf == NULL) {
 		return;
 	}
 	sctp_alloc_a_chunk(stcb, chk);
 	if (chk == NULL) {
 		/* no memory */
 		sctp_m_freem(m_asconf);
 		return;
 	}
 	chk->copy_by_ref = 0;
 	chk->rec.chunk_id.id = SCTP_ASCONF;
 	chk->rec.chunk_id.can_take_data = 0;
 	chk->flags = CHUNK_FLAGS_FRAGMENT_OK;
 	chk->data = m_asconf;
 	chk->send_size = len;
 	chk->sent = SCTP_DATAGRAM_UNSENT;
 	chk->snd_count = 0;
 	chk->asoc = &stcb->asoc;
 	chk->whoTo = net;
 	if (chk->whoTo) {
 		atomic_add_int(&chk->whoTo->ref_count, 1);
 	}
 	TAILQ_INSERT_TAIL(&chk->asoc->asconf_send_queue, chk, sctp_next);
 	chk->asoc->ctrl_queue_cnt++;
 	return;
 }
 
 void
 sctp_send_asconf_ack(struct sctp_tcb *stcb)
 {
 	/*
 	 * formulate and queue a asconf-ack back to sender. the asconf-ack
 	 * must be stored in the tcb.
 	 */
 	struct sctp_tmit_chunk *chk;
 	struct sctp_asconf_ack *ack, *latest_ack;
 	struct mbuf *m_ack;
 	struct sctp_nets *net = NULL;
 
 	SCTP_TCB_LOCK_ASSERT(stcb);
 	/* Get the latest ASCONF-ACK */
 	latest_ack = TAILQ_LAST(&stcb->asoc.asconf_ack_sent, sctp_asconf_ackhead);
 	if (latest_ack == NULL) {
 		return;
 	}
 	if (latest_ack->last_sent_to != NULL &&
 	    latest_ack->last_sent_to == stcb->asoc.last_control_chunk_from) {
 		/* we're doing a retransmission */
 		net = sctp_find_alternate_net(stcb, stcb->asoc.last_control_chunk_from, 0);
 		if (net == NULL) {
 			/* no alternate */
 			if (stcb->asoc.last_control_chunk_from == NULL) {
 				if (stcb->asoc.alternate) {
 					net = stcb->asoc.alternate;
 				} else {
 					net = stcb->asoc.primary_destination;
 				}
 			} else {
 				net = stcb->asoc.last_control_chunk_from;
 			}
 		}
 	} else {
 		/* normal case */
 		if (stcb->asoc.last_control_chunk_from == NULL) {
 			if (stcb->asoc.alternate) {
 				net = stcb->asoc.alternate;
 			} else {
 				net = stcb->asoc.primary_destination;
 			}
 		} else {
 			net = stcb->asoc.last_control_chunk_from;
 		}
 	}
 	latest_ack->last_sent_to = net;
 
 	TAILQ_FOREACH(ack, &stcb->asoc.asconf_ack_sent, next) {
 		if (ack->data == NULL) {
 			continue;
 		}
 		/* copy the asconf_ack */
 		m_ack = SCTP_M_COPYM(ack->data, 0, M_COPYALL, M_NOWAIT);
 		if (m_ack == NULL) {
 			/* couldn't copy it */
 			return;
 		}
 #ifdef SCTP_MBUF_LOGGING
 		if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_MBUF_LOGGING_ENABLE) {
 			sctp_log_mbc(m_ack, SCTP_MBUF_ICOPY);
 		}
 #endif
 
 		sctp_alloc_a_chunk(stcb, chk);
 		if (chk == NULL) {
 			/* no memory */
 			if (m_ack)
 				sctp_m_freem(m_ack);
 			return;
 		}
 		chk->copy_by_ref = 0;
 		chk->rec.chunk_id.id = SCTP_ASCONF_ACK;
 		chk->rec.chunk_id.can_take_data = 1;
 		chk->flags = CHUNK_FLAGS_FRAGMENT_OK;
 		chk->whoTo = net;
 		if (chk->whoTo) {
 			atomic_add_int(&chk->whoTo->ref_count, 1);
 		}
 		chk->data = m_ack;
 		chk->send_size = ack->len;
 		chk->sent = SCTP_DATAGRAM_UNSENT;
 		chk->snd_count = 0;
 		chk->asoc = &stcb->asoc;
 
 		TAILQ_INSERT_TAIL(&chk->asoc->control_send_queue, chk, sctp_next);
 		chk->asoc->ctrl_queue_cnt++;
 	}
 	return;
 }
 
 
 static int
 sctp_chunk_retransmission(struct sctp_inpcb *inp,
     struct sctp_tcb *stcb,
     struct sctp_association *asoc,
     int *cnt_out, struct timeval *now, int *now_filled, int *fr_done, int so_locked
 #if !defined(__APPLE__) && !defined(SCTP_SO_LOCK_TESTING)
     SCTP_UNUSED
 #endif
 )
 {
 	/*-
 	 * send out one MTU of retransmission. If fast_retransmit is
 	 * happening we ignore the cwnd. Otherwise we obey the cwnd and
 	 * rwnd. For a Cookie or Asconf in the control chunk queue we
 	 * retransmit them by themselves.
 	 *
 	 * For data chunks we will pick out the lowest TSN's in the sent_queue
 	 * marked for resend and bundle them all together (up to a MTU of
 	 * destination). The address to send to should have been
 	 * selected/changed where the retransmission was marked (i.e. in FR
 	 * or t3-timeout routines).
 	 */
 	struct sctp_tmit_chunk *data_list[SCTP_MAX_DATA_BUNDLING];
 	struct sctp_tmit_chunk *chk, *fwd;
 	struct mbuf *m, *endofchain;
 	struct sctp_nets *net = NULL;
 	uint32_t tsns_sent = 0;
 	int no_fragmentflg, bundle_at, cnt_thru;
 	unsigned int mtu;
 	int error, i, one_chunk, fwd_tsn, ctl_cnt, tmr_started;
 	struct sctp_auth_chunk *auth = NULL;
 	uint32_t auth_offset = 0;
 	uint16_t auth_keyid;
 	int override_ok = 1;
 	int data_auth_reqd = 0;
 	uint32_t dmtu = 0;
 
 	SCTP_TCB_LOCK_ASSERT(stcb);
 	tmr_started = ctl_cnt = bundle_at = error = 0;
 	no_fragmentflg = 1;
 	fwd_tsn = 0;
 	*cnt_out = 0;
 	fwd = NULL;
 	endofchain = m = NULL;
 	auth_keyid = stcb->asoc.authinfo.active_keyid;
 #ifdef SCTP_AUDITING_ENABLED
 	sctp_audit_log(0xC3, 1);
 #endif
 	if ((TAILQ_EMPTY(&asoc->sent_queue)) &&
 	    (TAILQ_EMPTY(&asoc->control_send_queue))) {
 		SCTPDBG(SCTP_DEBUG_OUTPUT1, "SCTP hits empty queue with cnt set to %d?\n",
 		    asoc->sent_queue_retran_cnt);
 		asoc->sent_queue_cnt = 0;
 		asoc->sent_queue_cnt_removeable = 0;
 		/* send back 0/0 so we enter normal transmission */
 		*cnt_out = 0;
 		return (0);
 	}
 	TAILQ_FOREACH(chk, &asoc->control_send_queue, sctp_next) {
 		if ((chk->rec.chunk_id.id == SCTP_COOKIE_ECHO) ||
 		    (chk->rec.chunk_id.id == SCTP_STREAM_RESET) ||
 		    (chk->rec.chunk_id.id == SCTP_FORWARD_CUM_TSN)) {
 			if (chk->sent != SCTP_DATAGRAM_RESEND) {
 				continue;
 			}
 			if (chk->rec.chunk_id.id == SCTP_STREAM_RESET) {
 				if (chk != asoc->str_reset) {
 					/*
 					 * not eligible for retran if its
 					 * not ours
 					 */
 					continue;
 				}
 			}
 			ctl_cnt++;
 			if (chk->rec.chunk_id.id == SCTP_FORWARD_CUM_TSN) {
 				fwd_tsn = 1;
 			}
 			/*
 			 * Add an AUTH chunk, if chunk requires it save the
 			 * offset into the chain for AUTH
 			 */
 			if ((auth == NULL) &&
 			    (sctp_auth_is_required_chunk(chk->rec.chunk_id.id,
 			    stcb->asoc.peer_auth_chunks))) {
 				m = sctp_add_auth_chunk(m, &endofchain,
 				    &auth, &auth_offset,
 				    stcb,
 				    chk->rec.chunk_id.id);
 				SCTP_STAT_INCR_COUNTER64(sctps_outcontrolchunks);
 			}
 			m = sctp_copy_mbufchain(chk->data, m, &endofchain, 0, chk->send_size, chk->copy_by_ref);
 			break;
 		}
 	}
 	one_chunk = 0;
 	cnt_thru = 0;
 	/* do we have control chunks to retransmit? */
 	if (m != NULL) {
 		/* Start a timer no matter if we suceed or fail */
 		if (chk->rec.chunk_id.id == SCTP_COOKIE_ECHO) {
 			sctp_timer_start(SCTP_TIMER_TYPE_COOKIE, inp, stcb, chk->whoTo);
 		} else if (chk->rec.chunk_id.id == SCTP_ASCONF)
 			sctp_timer_start(SCTP_TIMER_TYPE_ASCONF, inp, stcb, chk->whoTo);
 		chk->snd_count++;	/* update our count */
 		if ((error = sctp_lowlevel_chunk_output(inp, stcb, chk->whoTo,
 		    (struct sockaddr *)&chk->whoTo->ro._l_addr, m,
 		    auth_offset, auth, stcb->asoc.authinfo.active_keyid,
 		    no_fragmentflg, 0, 0,
 		    inp->sctp_lport, stcb->rport, htonl(stcb->asoc.peer_vtag),
 		    chk->whoTo->port, NULL,
 		    0, 0,
 		    so_locked))) {
 			SCTP_STAT_INCR(sctps_lowlevelerr);
 			return (error);
 		}
 		endofchain = NULL;
 		auth = NULL;
 		auth_offset = 0;
 		/*
 		 * We don't want to mark the net->sent time here since this
 		 * we use this for HB and retrans cannot measure RTT
 		 */
 		/* (void)SCTP_GETTIME_TIMEVAL(&chk->whoTo->last_sent_time); */
 		*cnt_out += 1;
 		chk->sent = SCTP_DATAGRAM_SENT;
 		sctp_ucount_decr(stcb->asoc.sent_queue_retran_cnt);
 		if (fwd_tsn == 0) {
 			return (0);
 		} else {
 			/* Clean up the fwd-tsn list */
 			sctp_clean_up_ctl(stcb, asoc, so_locked);
 			return (0);
 		}
 	}
 	/*
 	 * Ok, it is just data retransmission we need to do or that and a
 	 * fwd-tsn with it all.
 	 */
 	if (TAILQ_EMPTY(&asoc->sent_queue)) {
 		return (SCTP_RETRAN_DONE);
 	}
 	if ((SCTP_GET_STATE(asoc) == SCTP_STATE_COOKIE_ECHOED) ||
 	    (SCTP_GET_STATE(asoc) == SCTP_STATE_COOKIE_WAIT)) {
 		/* not yet open, resend the cookie and that is it */
 		return (1);
 	}
 #ifdef SCTP_AUDITING_ENABLED
 	sctp_auditing(20, inp, stcb, NULL);
 #endif
 	data_auth_reqd = sctp_auth_is_required_chunk(SCTP_DATA, stcb->asoc.peer_auth_chunks);
 	TAILQ_FOREACH(chk, &asoc->sent_queue, sctp_next) {
 		if (chk->sent != SCTP_DATAGRAM_RESEND) {
 			/* No, not sent to this net or not ready for rtx */
 			continue;
 		}
 		if (chk->data == NULL) {
 			SCTP_PRINTF("TSN:%x chk->snd_count:%d chk->sent:%d can't retran - no data\n",
 			    chk->rec.data.TSN_seq, chk->snd_count, chk->sent);
 			continue;
 		}
 		if ((SCTP_BASE_SYSCTL(sctp_max_retran_chunk)) &&
 		    (chk->snd_count >= SCTP_BASE_SYSCTL(sctp_max_retran_chunk))) {
 			/* Gak, we have exceeded max unlucky retran, abort! */
 			SCTP_PRINTF("Gak, chk->snd_count:%d >= max:%d - send abort\n",
 			    chk->snd_count,
 			    SCTP_BASE_SYSCTL(sctp_max_retran_chunk));
 			atomic_add_int(&stcb->asoc.refcnt, 1);
 			sctp_abort_an_association(stcb->sctp_ep, stcb, NULL, so_locked);
 			SCTP_TCB_LOCK(stcb);
 			atomic_subtract_int(&stcb->asoc.refcnt, 1);
 			return (SCTP_RETRAN_EXIT);
 		}
 		/* pick up the net */
 		net = chk->whoTo;
 		switch (net->ro._l_addr.sa.sa_family) {
 #ifdef INET
 		case AF_INET:
 			mtu = net->mtu - SCTP_MIN_V4_OVERHEAD;
 			break;
 #endif
 #ifdef INET6
 		case AF_INET6:
 			mtu = net->mtu - SCTP_MIN_OVERHEAD;
 			break;
 #endif
 		default:
 			/* TSNH */
 			mtu = net->mtu;
 			break;
 		}
 
 		if ((asoc->peers_rwnd < mtu) && (asoc->total_flight > 0)) {
 			/* No room in peers rwnd */
 			uint32_t tsn;
 
 			tsn = asoc->last_acked_seq + 1;
 			if (tsn == chk->rec.data.TSN_seq) {
 				/*
 				 * we make a special exception for this
 				 * case. The peer has no rwnd but is missing
 				 * the lowest chunk.. which is probably what
 				 * is holding up the rwnd.
 				 */
 				goto one_chunk_around;
 			}
 			return (1);
 		}
 one_chunk_around:
 		if (asoc->peers_rwnd < mtu) {
 			one_chunk = 1;
 			if ((asoc->peers_rwnd == 0) &&
 			    (asoc->total_flight == 0)) {
 				chk->window_probe = 1;
 				chk->whoTo->window_probe = 1;
 			}
 		}
 #ifdef SCTP_AUDITING_ENABLED
 		sctp_audit_log(0xC3, 2);
 #endif
 		bundle_at = 0;
 		m = NULL;
 		net->fast_retran_ip = 0;
 		if (chk->rec.data.doing_fast_retransmit == 0) {
 			/*
 			 * if no FR in progress skip destination that have
 			 * flight_size > cwnd.
 			 */
 			if (net->flight_size >= net->cwnd) {
 				continue;
 			}
 		} else {
 			/*
 			 * Mark the destination net to have FR recovery
 			 * limits put on it.
 			 */
 			*fr_done = 1;
 			net->fast_retran_ip = 1;
 		}
 
 		/*
 		 * if no AUTH is yet included and this chunk requires it,
 		 * make sure to account for it.  We don't apply the size
 		 * until the AUTH chunk is actually added below in case
 		 * there is no room for this chunk.
 		 */
 		if (data_auth_reqd && (auth == NULL)) {
 			dmtu = sctp_get_auth_chunk_len(stcb->asoc.peer_hmac_id);
 		} else
 			dmtu = 0;
 
 		if ((chk->send_size <= (mtu - dmtu)) ||
 		    (chk->flags & CHUNK_FLAGS_FRAGMENT_OK)) {
 			/* ok we will add this one */
 			if (data_auth_reqd) {
 				if (auth == NULL) {
 					m = sctp_add_auth_chunk(m,
 					    &endofchain,
 					    &auth,
 					    &auth_offset,
 					    stcb,
 					    SCTP_DATA);
 					auth_keyid = chk->auth_keyid;
 					override_ok = 0;
 					SCTP_STAT_INCR_COUNTER64(sctps_outcontrolchunks);
 				} else if (override_ok) {
 					auth_keyid = chk->auth_keyid;
 					override_ok = 0;
 				} else if (chk->auth_keyid != auth_keyid) {
 					/* different keyid, so done bundling */
 					break;
 				}
 			}
 			m = sctp_copy_mbufchain(chk->data, m, &endofchain, 0, chk->send_size, chk->copy_by_ref);
 			if (m == NULL) {
 				SCTP_LTRACE_ERR_RET(inp, stcb, NULL, SCTP_FROM_SCTP_OUTPUT, ENOMEM);
 				return (ENOMEM);
 			}
 			/* Do clear IP_DF ? */
 			if (chk->flags & CHUNK_FLAGS_FRAGMENT_OK) {
 				no_fragmentflg = 0;
 			}
 			/* upate our MTU size */
 			if (mtu > (chk->send_size + dmtu))
 				mtu -= (chk->send_size + dmtu);
 			else
 				mtu = 0;
 			data_list[bundle_at++] = chk;
 			if (one_chunk && (asoc->total_flight <= 0)) {
 				SCTP_STAT_INCR(sctps_windowprobed);
 			}
 		}
 		if (one_chunk == 0) {
 			/*
 			 * now are there anymore forward from chk to pick
 			 * up?
 			 */
 			for (fwd = TAILQ_NEXT(chk, sctp_next); fwd != NULL; fwd = TAILQ_NEXT(fwd, sctp_next)) {
 				if (fwd->sent != SCTP_DATAGRAM_RESEND) {
 					/* Nope, not for retran */
 					continue;
 				}
 				if (fwd->whoTo != net) {
 					/* Nope, not the net in question */
 					continue;
 				}
 				if (data_auth_reqd && (auth == NULL)) {
 					dmtu = sctp_get_auth_chunk_len(stcb->asoc.peer_hmac_id);
 				} else
 					dmtu = 0;
 				if (fwd->send_size <= (mtu - dmtu)) {
 					if (data_auth_reqd) {
 						if (auth == NULL) {
 							m = sctp_add_auth_chunk(m,
 							    &endofchain,
 							    &auth,
 							    &auth_offset,
 							    stcb,
 							    SCTP_DATA);
 							auth_keyid = fwd->auth_keyid;
 							override_ok = 0;
 							SCTP_STAT_INCR_COUNTER64(sctps_outcontrolchunks);
 						} else if (override_ok) {
 							auth_keyid = fwd->auth_keyid;
 							override_ok = 0;
 						} else if (fwd->auth_keyid != auth_keyid) {
 							/*
 							 * different keyid,
 							 * so done bundling
 							 */
 							break;
 						}
 					}
 					m = sctp_copy_mbufchain(fwd->data, m, &endofchain, 0, fwd->send_size, fwd->copy_by_ref);
 					if (m == NULL) {
 						SCTP_LTRACE_ERR_RET(inp, stcb, NULL, SCTP_FROM_SCTP_OUTPUT, ENOMEM);
 						return (ENOMEM);
 					}
 					/* Do clear IP_DF ? */
 					if (fwd->flags & CHUNK_FLAGS_FRAGMENT_OK) {
 						no_fragmentflg = 0;
 					}
 					/* upate our MTU size */
 					if (mtu > (fwd->send_size + dmtu))
 						mtu -= (fwd->send_size + dmtu);
 					else
 						mtu = 0;
 					data_list[bundle_at++] = fwd;
 					if (bundle_at >= SCTP_MAX_DATA_BUNDLING) {
 						break;
 					}
 				} else {
 					/* can't fit so we are done */
 					break;
 				}
 			}
 		}
 		/* Is there something to send for this destination? */
 		if (m) {
 			/*
 			 * No matter if we fail/or suceed we should start a
 			 * timer. A failure is like a lost IP packet :-)
 			 */
 			if (!SCTP_OS_TIMER_PENDING(&net->rxt_timer.timer)) {
 				/*
 				 * no timer running on this destination
 				 * restart it.
 				 */
 				sctp_timer_start(SCTP_TIMER_TYPE_SEND, inp, stcb, net);
 				tmr_started = 1;
 			}
 			/* Now lets send it, if there is anything to send :> */
 			if ((error = sctp_lowlevel_chunk_output(inp, stcb, net,
 			    (struct sockaddr *)&net->ro._l_addr, m,
 			    auth_offset, auth, auth_keyid,
 			    no_fragmentflg, 0, 0,
 			    inp->sctp_lport, stcb->rport, htonl(stcb->asoc.peer_vtag),
 			    net->port, NULL,
 			    0, 0,
 			    so_locked))) {
 				/* error, we could not output */
 				SCTP_STAT_INCR(sctps_lowlevelerr);
 				return (error);
 			}
 			endofchain = NULL;
 			auth = NULL;
 			auth_offset = 0;
 			/* For HB's */
 			/*
 			 * We don't want to mark the net->sent time here
 			 * since this we use this for HB and retrans cannot
 			 * measure RTT
 			 */
 			/* (void)SCTP_GETTIME_TIMEVAL(&net->last_sent_time); */
 
 			/* For auto-close */
 			cnt_thru++;
 			if (*now_filled == 0) {
 				(void)SCTP_GETTIME_TIMEVAL(&asoc->time_last_sent);
 				*now = asoc->time_last_sent;
 				*now_filled = 1;
 			} else {
 				asoc->time_last_sent = *now;
 			}
 			*cnt_out += bundle_at;
 #ifdef SCTP_AUDITING_ENABLED
 			sctp_audit_log(0xC4, bundle_at);
 #endif
 			if (bundle_at) {
 				tsns_sent = data_list[0]->rec.data.TSN_seq;
 			}
 			for (i = 0; i < bundle_at; i++) {
 				SCTP_STAT_INCR(sctps_sendretransdata);
 				data_list[i]->sent = SCTP_DATAGRAM_SENT;
 				/*
 				 * When we have a revoked data, and we
 				 * retransmit it, then we clear the revoked
 				 * flag since this flag dictates if we
 				 * subtracted from the fs
 				 */
 				if (data_list[i]->rec.data.chunk_was_revoked) {
 					/* Deflate the cwnd */
 					data_list[i]->whoTo->cwnd -= data_list[i]->book_size;
 					data_list[i]->rec.data.chunk_was_revoked = 0;
 				}
 				data_list[i]->snd_count++;
 				sctp_ucount_decr(asoc->sent_queue_retran_cnt);
 				/* record the time */
 				data_list[i]->sent_rcv_time = asoc->time_last_sent;
 				if (data_list[i]->book_size_scale) {
 					/*
 					 * need to double the book size on
 					 * this one
 					 */
 					data_list[i]->book_size_scale = 0;
 					/*
 					 * Since we double the booksize, we
 					 * must also double the output queue
 					 * size, since this get shrunk when
 					 * we free by this amount.
 					 */
 					atomic_add_int(&((asoc)->total_output_queue_size), data_list[i]->book_size);
 					data_list[i]->book_size *= 2;
 
 
 				} else {
 					if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_LOG_RWND_ENABLE) {
 						sctp_log_rwnd(SCTP_DECREASE_PEER_RWND,
 						    asoc->peers_rwnd, data_list[i]->send_size, SCTP_BASE_SYSCTL(sctp_peer_chunk_oh));
 					}
 					asoc->peers_rwnd = sctp_sbspace_sub(asoc->peers_rwnd,
 					    (uint32_t) (data_list[i]->send_size +
 					    SCTP_BASE_SYSCTL(sctp_peer_chunk_oh)));
 				}
 				if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_FLIGHT_LOGGING_ENABLE) {
 					sctp_misc_ints(SCTP_FLIGHT_LOG_UP_RSND,
 					    data_list[i]->whoTo->flight_size,
 					    data_list[i]->book_size,
 					    (uintptr_t) data_list[i]->whoTo,
 					    data_list[i]->rec.data.TSN_seq);
 				}
 				sctp_flight_size_increase(data_list[i]);
 				sctp_total_flight_increase(stcb, data_list[i]);
 				if (asoc->peers_rwnd < stcb->sctp_ep->sctp_ep.sctp_sws_sender) {
 					/* SWS sender side engages */
 					asoc->peers_rwnd = 0;
 				}
 				if ((i == 0) &&
 				    (data_list[i]->rec.data.doing_fast_retransmit)) {
 					SCTP_STAT_INCR(sctps_sendfastretrans);
 					if ((data_list[i] == TAILQ_FIRST(&asoc->sent_queue)) &&
 					    (tmr_started == 0)) {
 						/*-
 						 * ok we just fast-retrans'd
 						 * the lowest TSN, i.e the
 						 * first on the list. In
 						 * this case we want to give
 						 * some more time to get a
 						 * SACK back without a
 						 * t3-expiring.
 						 */
 						sctp_timer_stop(SCTP_TIMER_TYPE_SEND, inp, stcb, net,
 						    SCTP_FROM_SCTP_OUTPUT + SCTP_LOC_4);
 						sctp_timer_start(SCTP_TIMER_TYPE_SEND, inp, stcb, net);
 					}
 				}
 			}
 			if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_CWND_LOGGING_ENABLE) {
 				sctp_log_cwnd(stcb, net, tsns_sent, SCTP_CWND_LOG_FROM_RESEND);
 			}
 #ifdef SCTP_AUDITING_ENABLED
 			sctp_auditing(21, inp, stcb, NULL);
 #endif
 		} else {
 			/* None will fit */
 			return (1);
 		}
 		if (asoc->sent_queue_retran_cnt <= 0) {
 			/* all done we have no more to retran */
 			asoc->sent_queue_retran_cnt = 0;
 			break;
 		}
 		if (one_chunk) {
 			/* No more room in rwnd */
 			return (1);
 		}
 		/* stop the for loop here. we sent out a packet */
 		break;
 	}
 	return (0);
 }
 
 static void
 sctp_timer_validation(struct sctp_inpcb *inp,
     struct sctp_tcb *stcb,
     struct sctp_association *asoc)
 {
 	struct sctp_nets *net;
 
 	/* Validate that a timer is running somewhere */
 	TAILQ_FOREACH(net, &asoc->nets, sctp_next) {
 		if (SCTP_OS_TIMER_PENDING(&net->rxt_timer.timer)) {
 			/* Here is a timer */
 			return;
 		}
 	}
 	SCTP_TCB_LOCK_ASSERT(stcb);
 	/* Gak, we did not have a timer somewhere */
 	SCTPDBG(SCTP_DEBUG_OUTPUT3, "Deadlock avoided starting timer on a dest at retran\n");
 	if (asoc->alternate) {
 		sctp_timer_start(SCTP_TIMER_TYPE_SEND, inp, stcb, asoc->alternate);
 	} else {
 		sctp_timer_start(SCTP_TIMER_TYPE_SEND, inp, stcb, asoc->primary_destination);
 	}
 	return;
 }
 
 void
 sctp_chunk_output(struct sctp_inpcb *inp,
     struct sctp_tcb *stcb,
     int from_where,
     int so_locked
 #if !defined(__APPLE__) && !defined(SCTP_SO_LOCK_TESTING)
     SCTP_UNUSED
 #endif
 )
 {
 	/*-
 	 * Ok this is the generic chunk service queue. we must do the
 	 * following:
 	 * - See if there are retransmits pending, if so we must
 	 *   do these first.
 	 * - Service the stream queue that is next, moving any
 	 *   message (note I must get a complete message i.e.
 	 *   FIRST/MIDDLE and LAST to the out queue in one pass) and assigning
 	 *   TSN's
 	 * - Check to see if the cwnd/rwnd allows any output, if so we
 	 *   go ahead and fomulate and send the low level chunks. Making sure
 	 *   to combine any control in the control chunk queue also.
 	 */
 	struct sctp_association *asoc;
 	struct sctp_nets *net;
 	int error = 0, num_out, tot_out = 0, ret = 0, reason_code;
 	unsigned int burst_cnt = 0;
 	struct timeval now;
 	int now_filled = 0;
 	int nagle_on;
 	int frag_point = sctp_get_frag_point(stcb, &stcb->asoc);
 	int un_sent = 0;
 	int fr_done;
 	unsigned int tot_frs = 0;
 
 	asoc = &stcb->asoc;
 	/* The Nagle algorithm is only applied when handling a send call. */
 	if (from_where == SCTP_OUTPUT_FROM_USR_SEND) {
 		if (sctp_is_feature_on(inp, SCTP_PCB_FLAGS_NODELAY)) {
 			nagle_on = 0;
 		} else {
 			nagle_on = 1;
 		}
 	} else {
 		nagle_on = 0;
 	}
 	SCTP_TCB_LOCK_ASSERT(stcb);
 
 	un_sent = (stcb->asoc.total_output_queue_size - stcb->asoc.total_flight);
 
 	if ((un_sent <= 0) &&
 	    (TAILQ_EMPTY(&asoc->control_send_queue)) &&
 	    (TAILQ_EMPTY(&asoc->asconf_send_queue)) &&
 	    (asoc->sent_queue_retran_cnt == 0)) {
 		/* Nothing to do unless there is something to be sent left */
 		return;
 	}
 	/*
 	 * Do we have something to send, data or control AND a sack timer
 	 * running, if so piggy-back the sack.
 	 */
 	if (SCTP_OS_TIMER_PENDING(&stcb->asoc.dack_timer.timer)) {
 		sctp_send_sack(stcb, so_locked);
 		(void)SCTP_OS_TIMER_STOP(&stcb->asoc.dack_timer.timer);
 	}
 	while (asoc->sent_queue_retran_cnt) {
 		/*-
 		 * Ok, it is retransmission time only, we send out only ONE
 		 * packet with a single call off to the retran code.
 		 */
 		if (from_where == SCTP_OUTPUT_FROM_COOKIE_ACK) {
 			/*-
 			 * Special hook for handling cookiess discarded
 			 * by peer that carried data. Send cookie-ack only
 			 * and then the next call with get the retran's.
 			 */
 			(void)sctp_med_chunk_output(inp, stcb, asoc, &num_out, &reason_code, 1,
 			    from_where,
 			    &now, &now_filled, frag_point, so_locked);
 			return;
 		} else if (from_where != SCTP_OUTPUT_FROM_HB_TMR) {
 			/* if its not from a HB then do it */
 			fr_done = 0;
 			ret = sctp_chunk_retransmission(inp, stcb, asoc, &num_out, &now, &now_filled, &fr_done, so_locked);
 			if (fr_done) {
 				tot_frs++;
 			}
 		} else {
 			/*
 			 * its from any other place, we don't allow retran
 			 * output (only control)
 			 */
 			ret = 1;
 		}
 		if (ret > 0) {
 			/* Can't send anymore */
 			/*-
 			 * now lets push out control by calling med-level
 			 * output once. this assures that we WILL send HB's
 			 * if queued too.
 			 */
 			(void)sctp_med_chunk_output(inp, stcb, asoc, &num_out, &reason_code, 1,
 			    from_where,
 			    &now, &now_filled, frag_point, so_locked);
 #ifdef SCTP_AUDITING_ENABLED
 			sctp_auditing(8, inp, stcb, NULL);
 #endif
 			sctp_timer_validation(inp, stcb, asoc);
 			return;
 		}
 		if (ret < 0) {
 			/*-
 			 * The count was off.. retran is not happening so do
 			 * the normal retransmission.
 			 */
 #ifdef SCTP_AUDITING_ENABLED
 			sctp_auditing(9, inp, stcb, NULL);
 #endif
 			if (ret == SCTP_RETRAN_EXIT) {
 				return;
 			}
 			break;
 		}
 		if (from_where == SCTP_OUTPUT_FROM_T3) {
 			/* Only one transmission allowed out of a timeout */
 #ifdef SCTP_AUDITING_ENABLED
 			sctp_auditing(10, inp, stcb, NULL);
 #endif
 			/* Push out any control */
 			(void)sctp_med_chunk_output(inp, stcb, asoc, &num_out, &reason_code, 1, from_where,
 			    &now, &now_filled, frag_point, so_locked);
 			return;
 		}
 		if ((asoc->fr_max_burst > 0) && (tot_frs >= asoc->fr_max_burst)) {
 			/* Hit FR burst limit */
 			return;
 		}
 		if ((num_out == 0) && (ret == 0)) {
 			/* No more retrans to send */
 			break;
 		}
 	}
 #ifdef SCTP_AUDITING_ENABLED
 	sctp_auditing(12, inp, stcb, NULL);
 #endif
 	/* Check for bad destinations, if they exist move chunks around. */
 	TAILQ_FOREACH(net, &asoc->nets, sctp_next) {
 		if (!(net->dest_state & SCTP_ADDR_REACHABLE)) {
 			/*-
 			 * if possible move things off of this address we
 			 * still may send below due to the dormant state but
 			 * we try to find an alternate address to send to
 			 * and if we have one we move all queued data on the
 			 * out wheel to this alternate address.
 			 */
 			if (net->ref_count > 1)
 				sctp_move_chunks_from_net(stcb, net);
 		} else {
 			/*-
 			 * if ((asoc->sat_network) || (net->addr_is_local))
 			 * { burst_limit = asoc->max_burst *
 			 * SCTP_SAT_NETWORK_BURST_INCR; }
 			 */
 			if (asoc->max_burst > 0) {
 				if (SCTP_BASE_SYSCTL(sctp_use_cwnd_based_maxburst)) {
 					if ((net->flight_size + (asoc->max_burst * net->mtu)) < net->cwnd) {
 						/*
 						 * JRS - Use the congestion
 						 * control given in the
 						 * congestion control module
 						 */
 						asoc->cc_functions.sctp_cwnd_update_after_output(stcb, net, asoc->max_burst);
 						if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_LOG_MAXBURST_ENABLE) {
 							sctp_log_maxburst(stcb, net, 0, asoc->max_burst, SCTP_MAX_BURST_APPLIED);
 						}
 						SCTP_STAT_INCR(sctps_maxburstqueued);
 					}
 					net->fast_retran_ip = 0;
 				} else {
 					if (net->flight_size == 0) {
 						/*
 						 * Should be decaying the
 						 * cwnd here
 						 */
 						;
 					}
 				}
 			}
 		}
 
 	}
 	burst_cnt = 0;
 	do {
 		error = sctp_med_chunk_output(inp, stcb, asoc, &num_out,
 		    &reason_code, 0, from_where,
 		    &now, &now_filled, frag_point, so_locked);
 		if (error) {
 			SCTPDBG(SCTP_DEBUG_OUTPUT1, "Error %d was returned from med-c-op\n", error);
 			if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_LOG_MAXBURST_ENABLE) {
 				sctp_log_maxburst(stcb, asoc->primary_destination, error, burst_cnt, SCTP_MAX_BURST_ERROR_STOP);
 			}
 			if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_CWND_LOGGING_ENABLE) {
 				sctp_log_cwnd(stcb, NULL, error, SCTP_SEND_NOW_COMPLETES);
 				sctp_log_cwnd(stcb, NULL, 0xdeadbeef, SCTP_SEND_NOW_COMPLETES);
 			}
 			break;
 		}
 		SCTPDBG(SCTP_DEBUG_OUTPUT3, "m-c-o put out %d\n", num_out);
 
 		tot_out += num_out;
 		burst_cnt++;
 		if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_CWND_LOGGING_ENABLE) {
 			sctp_log_cwnd(stcb, NULL, num_out, SCTP_SEND_NOW_COMPLETES);
 			if (num_out == 0) {
 				sctp_log_cwnd(stcb, NULL, reason_code, SCTP_SEND_NOW_COMPLETES);
 			}
 		}
 		if (nagle_on) {
 			/*
 			 * When the Nagle algorithm is used, look at how
 			 * much is unsent, then if its smaller than an MTU
 			 * and we have data in flight we stop, except if we
 			 * are handling a fragmented user message.
 			 */
 			un_sent = ((stcb->asoc.total_output_queue_size - stcb->asoc.total_flight) +
 			    (stcb->asoc.stream_queue_cnt * sizeof(struct sctp_data_chunk)));
 			if ((un_sent < (int)(stcb->asoc.smallest_mtu - SCTP_MIN_OVERHEAD)) &&
 			    (stcb->asoc.total_flight > 0) &&
 			    ((stcb->asoc.locked_on_sending == NULL) ||
 			    sctp_is_feature_on(inp, SCTP_PCB_FLAGS_EXPLICIT_EOR))) {
 				break;
 			}
 		}
 		if (TAILQ_EMPTY(&asoc->control_send_queue) &&
 		    TAILQ_EMPTY(&asoc->send_queue) &&
 		    stcb->asoc.ss_functions.sctp_ss_is_empty(stcb, asoc)) {
 			/* Nothing left to send */
 			break;
 		}
 		if ((stcb->asoc.total_output_queue_size - stcb->asoc.total_flight) <= 0) {
 			/* Nothing left to send */
 			break;
 		}
 	} while (num_out &&
 	    ((asoc->max_burst == 0) ||
 	    SCTP_BASE_SYSCTL(sctp_use_cwnd_based_maxburst) ||
 	    (burst_cnt < asoc->max_burst)));
 
 	if (SCTP_BASE_SYSCTL(sctp_use_cwnd_based_maxburst) == 0) {
 		if ((asoc->max_burst > 0) && (burst_cnt >= asoc->max_burst)) {
 			SCTP_STAT_INCR(sctps_maxburstqueued);
 			asoc->burst_limit_applied = 1;
 			if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_LOG_MAXBURST_ENABLE) {
 				sctp_log_maxburst(stcb, asoc->primary_destination, 0, burst_cnt, SCTP_MAX_BURST_APPLIED);
 			}
 		} else {
 			asoc->burst_limit_applied = 0;
 		}
 	}
 	if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_CWND_LOGGING_ENABLE) {
 		sctp_log_cwnd(stcb, NULL, tot_out, SCTP_SEND_NOW_COMPLETES);
 	}
 	SCTPDBG(SCTP_DEBUG_OUTPUT1, "Ok, we have put out %d chunks\n",
 	    tot_out);
 
 	/*-
 	 * Now we need to clean up the control chunk chain if a ECNE is on
 	 * it. It must be marked as UNSENT again so next call will continue
 	 * to send it until such time that we get a CWR, to remove it.
 	 */
 	if (stcb->asoc.ecn_echo_cnt_onq)
 		sctp_fix_ecn_echo(asoc);
 	return;
 }
 
 
 int
 sctp_output(
     struct sctp_inpcb *inp,
     struct mbuf *m,
     struct sockaddr *addr,
     struct mbuf *control,
     struct thread *p,
     int flags)
 {
 	if (inp == NULL) {
 		SCTP_LTRACE_ERR_RET_PKT(m, inp, NULL, NULL, SCTP_FROM_SCTP_OUTPUT, EINVAL);
 		return (EINVAL);
 	}
 	if (inp->sctp_socket == NULL) {
 		SCTP_LTRACE_ERR_RET_PKT(m, inp, NULL, NULL, SCTP_FROM_SCTP_OUTPUT, EINVAL);
 		return (EINVAL);
 	}
 	return (sctp_sosend(inp->sctp_socket,
 	    addr,
 	    (struct uio *)NULL,
 	    m,
 	    control,
 	    flags, p
 	    ));
 }
 
 void
 send_forward_tsn(struct sctp_tcb *stcb,
     struct sctp_association *asoc)
 {
 	struct sctp_tmit_chunk *chk;
 	struct sctp_forward_tsn_chunk *fwdtsn;
 	uint32_t advance_peer_ack_point;
 
 	SCTP_TCB_LOCK_ASSERT(stcb);
 	TAILQ_FOREACH(chk, &asoc->control_send_queue, sctp_next) {
 		if (chk->rec.chunk_id.id == SCTP_FORWARD_CUM_TSN) {
 			/* mark it to unsent */
 			chk->sent = SCTP_DATAGRAM_UNSENT;
 			chk->snd_count = 0;
 			/* Do we correct its output location? */
 			if (chk->whoTo) {
 				sctp_free_remote_addr(chk->whoTo);
 				chk->whoTo = NULL;
 			}
 			goto sctp_fill_in_rest;
 		}
 	}
 	/* Ok if we reach here we must build one */
 	sctp_alloc_a_chunk(stcb, chk);
 	if (chk == NULL) {
 		return;
 	}
 	asoc->fwd_tsn_cnt++;
 	chk->copy_by_ref = 0;
 	chk->rec.chunk_id.id = SCTP_FORWARD_CUM_TSN;
 	chk->rec.chunk_id.can_take_data = 0;
 	chk->flags = 0;
 	chk->asoc = asoc;
 	chk->whoTo = NULL;
 	chk->data = sctp_get_mbuf_for_msg(MCLBYTES, 0, M_NOWAIT, 1, MT_DATA);
 	if (chk->data == NULL) {
 		sctp_free_a_chunk(stcb, chk, SCTP_SO_NOT_LOCKED);
 		return;
 	}
 	SCTP_BUF_RESV_UF(chk->data, SCTP_MIN_OVERHEAD);
 	chk->sent = SCTP_DATAGRAM_UNSENT;
 	chk->snd_count = 0;
 	TAILQ_INSERT_TAIL(&asoc->control_send_queue, chk, sctp_next);
 	asoc->ctrl_queue_cnt++;
 sctp_fill_in_rest:
 	/*-
 	 * Here we go through and fill out the part that deals with
 	 * stream/seq of the ones we skip.
 	 */
 	SCTP_BUF_LEN(chk->data) = 0;
 	{
 		struct sctp_tmit_chunk *at, *tp1, *last;
 		struct sctp_strseq *strseq;
 		unsigned int cnt_of_space, i, ovh;
 		unsigned int space_needed;
 		unsigned int cnt_of_skipped = 0;
 
 		TAILQ_FOREACH(at, &asoc->sent_queue, sctp_next) {
 			if ((at->sent != SCTP_FORWARD_TSN_SKIP) &&
 			    (at->sent != SCTP_DATAGRAM_NR_ACKED)) {
 				/* no more to look at */
 				break;
 			}
 			if (at->rec.data.rcv_flags & SCTP_DATA_UNORDERED) {
 				/* We don't report these */
 				continue;
 			}
 			cnt_of_skipped++;
 		}
 		space_needed = (sizeof(struct sctp_forward_tsn_chunk) +
 		    (cnt_of_skipped * sizeof(struct sctp_strseq)));
 
 		cnt_of_space = M_TRAILINGSPACE(chk->data);
 
 		if (stcb->sctp_ep->sctp_flags & SCTP_PCB_FLAGS_BOUND_V6) {
 			ovh = SCTP_MIN_OVERHEAD;
 		} else {
 			ovh = SCTP_MIN_V4_OVERHEAD;
 		}
 		if (cnt_of_space > (asoc->smallest_mtu - ovh)) {
 			/* trim to a mtu size */
 			cnt_of_space = asoc->smallest_mtu - ovh;
 		}
 		if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_LOG_TRY_ADVANCE) {
 			sctp_misc_ints(SCTP_FWD_TSN_CHECK,
 			    0xff, 0, cnt_of_skipped,
 			    asoc->advanced_peer_ack_point);
 
 		}
 		advance_peer_ack_point = asoc->advanced_peer_ack_point;
 		if (cnt_of_space < space_needed) {
 			/*-
 			 * ok we must trim down the chunk by lowering the
 			 * advance peer ack point.
 			 */
 			if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_LOG_TRY_ADVANCE) {
 				sctp_misc_ints(SCTP_FWD_TSN_CHECK,
 				    0xff, 0xff, cnt_of_space,
 				    space_needed);
 			}
 			cnt_of_skipped = cnt_of_space - sizeof(struct sctp_forward_tsn_chunk);
 			cnt_of_skipped /= sizeof(struct sctp_strseq);
 			/*-
 			 * Go through and find the TSN that will be the one
 			 * we report.
 			 */
 			at = TAILQ_FIRST(&asoc->sent_queue);
 			if (at != NULL) {
 				for (i = 0; i < cnt_of_skipped; i++) {
 					tp1 = TAILQ_NEXT(at, sctp_next);
 					if (tp1 == NULL) {
 						break;
 					}
 					at = tp1;
 				}
 			}
 			if (at && SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_LOG_TRY_ADVANCE) {
 				sctp_misc_ints(SCTP_FWD_TSN_CHECK,
 				    0xff, cnt_of_skipped, at->rec.data.TSN_seq,
 				    asoc->advanced_peer_ack_point);
 			}
 			last = at;
 			/*-
 			 * last now points to last one I can report, update
 			 * peer ack point
 			 */
 			if (last)
 				advance_peer_ack_point = last->rec.data.TSN_seq;
 			space_needed = sizeof(struct sctp_forward_tsn_chunk) +
 			    cnt_of_skipped * sizeof(struct sctp_strseq);
 		}
 		chk->send_size = space_needed;
 		/* Setup the chunk */
 		fwdtsn = mtod(chk->data, struct sctp_forward_tsn_chunk *);
 		fwdtsn->ch.chunk_length = htons(chk->send_size);
 		fwdtsn->ch.chunk_flags = 0;
 		fwdtsn->ch.chunk_type = SCTP_FORWARD_CUM_TSN;
 		fwdtsn->new_cumulative_tsn = htonl(advance_peer_ack_point);
 		SCTP_BUF_LEN(chk->data) = chk->send_size;
 		fwdtsn++;
 		/*-
 		 * Move pointer to after the fwdtsn and transfer to the
 		 * strseq pointer.
 		 */
 		strseq = (struct sctp_strseq *)fwdtsn;
 		/*-
 		 * Now populate the strseq list. This is done blindly
 		 * without pulling out duplicate stream info. This is
 		 * inefficent but won't harm the process since the peer will
 		 * look at these in sequence and will thus release anything.
 		 * It could mean we exceed the PMTU and chop off some that
 		 * we could have included.. but this is unlikely (aka 1432/4
 		 * would mean 300+ stream seq's would have to be reported in
 		 * one FWD-TSN. With a bit of work we can later FIX this to
 		 * optimize and pull out duplcates.. but it does add more
 		 * overhead. So for now... not!
 		 */
 		at = TAILQ_FIRST(&asoc->sent_queue);
 		for (i = 0; i < cnt_of_skipped; i++) {
 			tp1 = TAILQ_NEXT(at, sctp_next);
 			if (tp1 == NULL)
 				break;
 			if (at->rec.data.rcv_flags & SCTP_DATA_UNORDERED) {
 				/* We don't report these */
 				i--;
 				at = tp1;
 				continue;
 			}
 			if (at->rec.data.TSN_seq == advance_peer_ack_point) {
 				at->rec.data.fwd_tsn_cnt = 0;
 			}
 			strseq->stream = ntohs(at->rec.data.stream_number);
 			strseq->sequence = ntohs(at->rec.data.stream_seq);
 			strseq++;
 			at = tp1;
 		}
 	}
 	return;
 }
 
 void
 sctp_send_sack(struct sctp_tcb *stcb, int so_locked
 #if !defined(__APPLE__) && !defined(SCTP_SO_LOCK_TESTING)
     SCTP_UNUSED
 #endif
 )
 {
 	/*-
 	 * Queue up a SACK or NR-SACK in the control queue.
 	 * We must first check to see if a SACK or NR-SACK is
 	 * somehow on the control queue.
 	 * If so, we will take and and remove the old one.
 	 */
 	struct sctp_association *asoc;
 	struct sctp_tmit_chunk *chk, *a_chk;
 	struct sctp_sack_chunk *sack;
 	struct sctp_nr_sack_chunk *nr_sack;
 	struct sctp_gap_ack_block *gap_descriptor;
 	struct sack_track *selector;
 	int mergeable = 0;
 	int offset;
 	caddr_t limit;
 	uint32_t *dup;
 	int limit_reached = 0;
 	unsigned int i, siz, j;
 	unsigned int num_gap_blocks = 0, num_nr_gap_blocks = 0, space;
 	int num_dups = 0;
 	int space_req;
 	uint32_t highest_tsn;
 	uint8_t flags;
 	uint8_t type;
 	uint8_t tsn_map;
 
 	if (stcb->asoc.nrsack_supported == 1) {
 		type = SCTP_NR_SELECTIVE_ACK;
 	} else {
 		type = SCTP_SELECTIVE_ACK;
 	}
 	a_chk = NULL;
 	asoc = &stcb->asoc;
 	SCTP_TCB_LOCK_ASSERT(stcb);
 	if (asoc->last_data_chunk_from == NULL) {
 		/* Hmm we never received anything */
 		return;
 	}
 	sctp_slide_mapping_arrays(stcb);
 	sctp_set_rwnd(stcb, asoc);
 	TAILQ_FOREACH(chk, &asoc->control_send_queue, sctp_next) {
 		if (chk->rec.chunk_id.id == type) {
 			/* Hmm, found a sack already on queue, remove it */
 			TAILQ_REMOVE(&asoc->control_send_queue, chk, sctp_next);
 			asoc->ctrl_queue_cnt--;
 			a_chk = chk;
 			if (a_chk->data) {
 				sctp_m_freem(a_chk->data);
 				a_chk->data = NULL;
 			}
 			if (a_chk->whoTo) {
 				sctp_free_remote_addr(a_chk->whoTo);
 				a_chk->whoTo = NULL;
 			}
 			break;
 		}
 	}
 	if (a_chk == NULL) {
 		sctp_alloc_a_chunk(stcb, a_chk);
 		if (a_chk == NULL) {
 			/* No memory so we drop the idea, and set a timer */
 			if (stcb->asoc.delayed_ack) {
 				sctp_timer_stop(SCTP_TIMER_TYPE_RECV,
 				    stcb->sctp_ep, stcb, NULL, SCTP_FROM_SCTP_OUTPUT + SCTP_LOC_5);
 				sctp_timer_start(SCTP_TIMER_TYPE_RECV,
 				    stcb->sctp_ep, stcb, NULL);
 			} else {
 				stcb->asoc.send_sack = 1;
 			}
 			return;
 		}
 		a_chk->copy_by_ref = 0;
 		a_chk->rec.chunk_id.id = type;
 		a_chk->rec.chunk_id.can_take_data = 1;
 	}
 	/* Clear our pkt counts */
 	asoc->data_pkts_seen = 0;
 
 	a_chk->flags = 0;
 	a_chk->asoc = asoc;
 	a_chk->snd_count = 0;
 	a_chk->send_size = 0;	/* fill in later */
 	a_chk->sent = SCTP_DATAGRAM_UNSENT;
 	a_chk->whoTo = NULL;
 
 	if (!(asoc->last_data_chunk_from->dest_state & SCTP_ADDR_REACHABLE)) {
 		/*-
 		 * Ok, the destination for the SACK is unreachable, lets see if
 		 * we can select an alternate to asoc->last_data_chunk_from
 		 */
 		a_chk->whoTo = sctp_find_alternate_net(stcb, asoc->last_data_chunk_from, 0);
 		if (a_chk->whoTo == NULL) {
 			/* Nope, no alternate */
 			a_chk->whoTo = asoc->last_data_chunk_from;
 		}
 	} else {
 		a_chk->whoTo = asoc->last_data_chunk_from;
 	}
 	if (a_chk->whoTo) {
 		atomic_add_int(&a_chk->whoTo->ref_count, 1);
 	}
 	if (SCTP_TSN_GT(asoc->highest_tsn_inside_map, asoc->highest_tsn_inside_nr_map)) {
 		highest_tsn = asoc->highest_tsn_inside_map;
 	} else {
 		highest_tsn = asoc->highest_tsn_inside_nr_map;
 	}
 	if (highest_tsn == asoc->cumulative_tsn) {
 		/* no gaps */
 		if (type == SCTP_SELECTIVE_ACK) {
 			space_req = sizeof(struct sctp_sack_chunk);
 		} else {
 			space_req = sizeof(struct sctp_nr_sack_chunk);
 		}
 	} else {
 		/* gaps get a cluster */
 		space_req = MCLBYTES;
 	}
 	/* Ok now lets formulate a MBUF with our sack */
 	a_chk->data = sctp_get_mbuf_for_msg(space_req, 0, M_NOWAIT, 1, MT_DATA);
 	if ((a_chk->data == NULL) ||
 	    (a_chk->whoTo == NULL)) {
 		/* rats, no mbuf memory */
 		if (a_chk->data) {
 			/* was a problem with the destination */
 			sctp_m_freem(a_chk->data);
 			a_chk->data = NULL;
 		}
 		sctp_free_a_chunk(stcb, a_chk, so_locked);
 		/* sa_ignore NO_NULL_CHK */
 		if (stcb->asoc.delayed_ack) {
 			sctp_timer_stop(SCTP_TIMER_TYPE_RECV,
 			    stcb->sctp_ep, stcb, NULL, SCTP_FROM_SCTP_OUTPUT + SCTP_LOC_6);
 			sctp_timer_start(SCTP_TIMER_TYPE_RECV,
 			    stcb->sctp_ep, stcb, NULL);
 		} else {
 			stcb->asoc.send_sack = 1;
 		}
 		return;
 	}
 	/* ok, lets go through and fill it in */
 	SCTP_BUF_RESV_UF(a_chk->data, SCTP_MIN_OVERHEAD);
 	space = M_TRAILINGSPACE(a_chk->data);
 	if (space > (a_chk->whoTo->mtu - SCTP_MIN_OVERHEAD)) {
 		space = (a_chk->whoTo->mtu - SCTP_MIN_OVERHEAD);
 	}
 	limit = mtod(a_chk->data, caddr_t);
 	limit += space;
 
 	flags = 0;
 
 	if ((asoc->sctp_cmt_on_off > 0) &&
 	    SCTP_BASE_SYSCTL(sctp_cmt_use_dac)) {
 		/*-
 		 * CMT DAC algorithm: If 2 (i.e., 0x10) packets have been
 		 * received, then set high bit to 1, else 0. Reset
 		 * pkts_rcvd.
 		 */
 		flags |= (asoc->cmt_dac_pkts_rcvd << 6);
 		asoc->cmt_dac_pkts_rcvd = 0;
 	}
 #ifdef SCTP_ASOCLOG_OF_TSNS
 	stcb->asoc.cumack_logsnt[stcb->asoc.cumack_log_atsnt] = asoc->cumulative_tsn;
 	stcb->asoc.cumack_log_atsnt++;
 	if (stcb->asoc.cumack_log_atsnt >= SCTP_TSN_LOG_SIZE) {
 		stcb->asoc.cumack_log_atsnt = 0;
 	}
 #endif
 	/* reset the readers interpretation */
 	stcb->freed_by_sorcv_sincelast = 0;
 
 	if (type == SCTP_SELECTIVE_ACK) {
 		sack = mtod(a_chk->data, struct sctp_sack_chunk *);
 		nr_sack = NULL;
 		gap_descriptor = (struct sctp_gap_ack_block *)((caddr_t)sack + sizeof(struct sctp_sack_chunk));
 		if (highest_tsn > asoc->mapping_array_base_tsn) {
 			siz = (((highest_tsn - asoc->mapping_array_base_tsn) + 1) + 7) / 8;
 		} else {
 			siz = (((MAX_TSN - highest_tsn) + 1) + highest_tsn + 7) / 8;
 		}
 	} else {
 		sack = NULL;
 		nr_sack = mtod(a_chk->data, struct sctp_nr_sack_chunk *);
 		gap_descriptor = (struct sctp_gap_ack_block *)((caddr_t)nr_sack + sizeof(struct sctp_nr_sack_chunk));
 		if (asoc->highest_tsn_inside_map > asoc->mapping_array_base_tsn) {
 			siz = (((asoc->highest_tsn_inside_map - asoc->mapping_array_base_tsn) + 1) + 7) / 8;
 		} else {
 			siz = (((MAX_TSN - asoc->mapping_array_base_tsn) + 1) + asoc->highest_tsn_inside_map + 7) / 8;
 		}
 	}
 
 	if (SCTP_TSN_GT(asoc->mapping_array_base_tsn, asoc->cumulative_tsn)) {
 		offset = 1;
 	} else {
 		offset = asoc->mapping_array_base_tsn - asoc->cumulative_tsn;
 	}
 	if (((type == SCTP_SELECTIVE_ACK) &&
 	    SCTP_TSN_GT(highest_tsn, asoc->cumulative_tsn)) ||
 	    ((type == SCTP_NR_SELECTIVE_ACK) &&
 	    SCTP_TSN_GT(asoc->highest_tsn_inside_map, asoc->cumulative_tsn))) {
 		/* we have a gap .. maybe */
 		for (i = 0; i < siz; i++) {
 			tsn_map = asoc->mapping_array[i];
 			if (type == SCTP_SELECTIVE_ACK) {
 				tsn_map |= asoc->nr_mapping_array[i];
 			}
 			if (i == 0) {
 				/*
 				 * Clear all bits corresponding to TSNs
 				 * smaller or equal to the cumulative TSN.
 				 */
 				tsn_map &= (~0 << (1 - offset));
 			}
 			selector = &sack_array[tsn_map];
 			if (mergeable && selector->right_edge) {
 				/*
 				 * Backup, left and right edges were ok to
 				 * merge.
 				 */
 				num_gap_blocks--;
 				gap_descriptor--;
 			}
 			if (selector->num_entries == 0)
 				mergeable = 0;
 			else {
 				for (j = 0; j < selector->num_entries; j++) {
 					if (mergeable && selector->right_edge) {
 						/*
 						 * do a merge by NOT setting
 						 * the left side
 						 */
 						mergeable = 0;
 					} else {
 						/*
 						 * no merge, set the left
 						 * side
 						 */
 						mergeable = 0;
 						gap_descriptor->start = htons((selector->gaps[j].start + offset));
 					}
 					gap_descriptor->end = htons((selector->gaps[j].end + offset));
 					num_gap_blocks++;
 					gap_descriptor++;
 					if (((caddr_t)gap_descriptor + sizeof(struct sctp_gap_ack_block)) > limit) {
 						/* no more room */
 						limit_reached = 1;
 						break;
 					}
 				}
 				if (selector->left_edge) {
 					mergeable = 1;
 				}
 			}
 			if (limit_reached) {
 				/* Reached the limit stop */
 				break;
 			}
 			offset += 8;
 		}
 	}
 	if ((type == SCTP_NR_SELECTIVE_ACK) &&
 	    (limit_reached == 0)) {
 
 		mergeable = 0;
 
 		if (asoc->highest_tsn_inside_nr_map > asoc->mapping_array_base_tsn) {
 			siz = (((asoc->highest_tsn_inside_nr_map - asoc->mapping_array_base_tsn) + 1) + 7) / 8;
 		} else {
 			siz = (((MAX_TSN - asoc->mapping_array_base_tsn) + 1) + asoc->highest_tsn_inside_nr_map + 7) / 8;
 		}
 
 		if (SCTP_TSN_GT(asoc->mapping_array_base_tsn, asoc->cumulative_tsn)) {
 			offset = 1;
 		} else {
 			offset = asoc->mapping_array_base_tsn - asoc->cumulative_tsn;
 		}
 		if (SCTP_TSN_GT(asoc->highest_tsn_inside_nr_map, asoc->cumulative_tsn)) {
 			/* we have a gap .. maybe */
 			for (i = 0; i < siz; i++) {
 				tsn_map = asoc->nr_mapping_array[i];
 				if (i == 0) {
 					/*
 					 * Clear all bits corresponding to
 					 * TSNs smaller or equal to the
 					 * cumulative TSN.
 					 */
 					tsn_map &= (~0 << (1 - offset));
 				}
 				selector = &sack_array[tsn_map];
 				if (mergeable && selector->right_edge) {
 					/*
 					 * Backup, left and right edges were
 					 * ok to merge.
 					 */
 					num_nr_gap_blocks--;
 					gap_descriptor--;
 				}
 				if (selector->num_entries == 0)
 					mergeable = 0;
 				else {
 					for (j = 0; j < selector->num_entries; j++) {
 						if (mergeable && selector->right_edge) {
 							/*
 							 * do a merge by NOT
 							 * setting the left
 							 * side
 							 */
 							mergeable = 0;
 						} else {
 							/*
 							 * no merge, set the
 							 * left side
 							 */
 							mergeable = 0;
 							gap_descriptor->start = htons((selector->gaps[j].start + offset));
 						}
 						gap_descriptor->end = htons((selector->gaps[j].end + offset));
 						num_nr_gap_blocks++;
 						gap_descriptor++;
 						if (((caddr_t)gap_descriptor + sizeof(struct sctp_gap_ack_block)) > limit) {
 							/* no more room */
 							limit_reached = 1;
 							break;
 						}
 					}
 					if (selector->left_edge) {
 						mergeable = 1;
 					}
 				}
 				if (limit_reached) {
 					/* Reached the limit stop */
 					break;
 				}
 				offset += 8;
 			}
 		}
 	}
 	/* now we must add any dups we are going to report. */
 	if ((limit_reached == 0) && (asoc->numduptsns)) {
 		dup = (uint32_t *) gap_descriptor;
 		for (i = 0; i < asoc->numduptsns; i++) {
 			*dup = htonl(asoc->dup_tsns[i]);
 			dup++;
 			num_dups++;
 			if (((caddr_t)dup + sizeof(uint32_t)) > limit) {
 				/* no more room */
 				break;
 			}
 		}
 		asoc->numduptsns = 0;
 	}
 	/*
 	 * now that the chunk is prepared queue it to the control chunk
 	 * queue.
 	 */
 	if (type == SCTP_SELECTIVE_ACK) {
 		a_chk->send_size = sizeof(struct sctp_sack_chunk) +
 		    (num_gap_blocks + num_nr_gap_blocks) * sizeof(struct sctp_gap_ack_block) +
 		    num_dups * sizeof(int32_t);
 		SCTP_BUF_LEN(a_chk->data) = a_chk->send_size;
 		sack->sack.cum_tsn_ack = htonl(asoc->cumulative_tsn);
 		sack->sack.a_rwnd = htonl(asoc->my_rwnd);
 		sack->sack.num_gap_ack_blks = htons(num_gap_blocks);
 		sack->sack.num_dup_tsns = htons(num_dups);
 		sack->ch.chunk_type = type;
 		sack->ch.chunk_flags = flags;
 		sack->ch.chunk_length = htons(a_chk->send_size);
 	} else {
 		a_chk->send_size = sizeof(struct sctp_nr_sack_chunk) +
 		    (num_gap_blocks + num_nr_gap_blocks) * sizeof(struct sctp_gap_ack_block) +
 		    num_dups * sizeof(int32_t);
 		SCTP_BUF_LEN(a_chk->data) = a_chk->send_size;
 		nr_sack->nr_sack.cum_tsn_ack = htonl(asoc->cumulative_tsn);
 		nr_sack->nr_sack.a_rwnd = htonl(asoc->my_rwnd);
 		nr_sack->nr_sack.num_gap_ack_blks = htons(num_gap_blocks);
 		nr_sack->nr_sack.num_nr_gap_ack_blks = htons(num_nr_gap_blocks);
 		nr_sack->nr_sack.num_dup_tsns = htons(num_dups);
 		nr_sack->nr_sack.reserved = 0;
 		nr_sack->ch.chunk_type = type;
 		nr_sack->ch.chunk_flags = flags;
 		nr_sack->ch.chunk_length = htons(a_chk->send_size);
 	}
 	TAILQ_INSERT_TAIL(&asoc->control_send_queue, a_chk, sctp_next);
 	asoc->my_last_reported_rwnd = asoc->my_rwnd;
 	asoc->ctrl_queue_cnt++;
 	asoc->send_sack = 0;
 	SCTP_STAT_INCR(sctps_sendsacks);
 	return;
 }
 
 void
 sctp_send_abort_tcb(struct sctp_tcb *stcb, struct mbuf *operr, int so_locked
 #if !defined(__APPLE__) && !defined(SCTP_SO_LOCK_TESTING)
     SCTP_UNUSED
 #endif
 )
 {
 	struct mbuf *m_abort, *m, *m_last;
 	struct mbuf *m_out, *m_end = NULL;
 	struct sctp_abort_chunk *abort;
 	struct sctp_auth_chunk *auth = NULL;
 	struct sctp_nets *net;
 	uint32_t vtag;
 	uint32_t auth_offset = 0;
 	uint16_t cause_len, chunk_len, padding_len;
 
 	SCTP_TCB_LOCK_ASSERT(stcb);
 	/*-
 	 * Add an AUTH chunk, if chunk requires it and save the offset into
 	 * the chain for AUTH
 	 */
 	if (sctp_auth_is_required_chunk(SCTP_ABORT_ASSOCIATION,
 	    stcb->asoc.peer_auth_chunks)) {
 		m_out = sctp_add_auth_chunk(NULL, &m_end, &auth, &auth_offset,
 		    stcb, SCTP_ABORT_ASSOCIATION);
 		SCTP_STAT_INCR_COUNTER64(sctps_outcontrolchunks);
 	} else {
 		m_out = NULL;
 	}
 	m_abort = sctp_get_mbuf_for_msg(sizeof(struct sctp_abort_chunk), 0, M_NOWAIT, 1, MT_HEADER);
 	if (m_abort == NULL) {
 		if (m_out) {
 			sctp_m_freem(m_out);
 		}
 		if (operr) {
 			sctp_m_freem(operr);
 		}
 		return;
 	}
 	/* link in any error */
 	SCTP_BUF_NEXT(m_abort) = operr;
 	cause_len = 0;
 	m_last = NULL;
 	for (m = operr; m; m = SCTP_BUF_NEXT(m)) {
 		cause_len += (uint16_t) SCTP_BUF_LEN(m);
 		if (SCTP_BUF_NEXT(m) == NULL) {
 			m_last = m;
 		}
 	}
 	SCTP_BUF_LEN(m_abort) = sizeof(struct sctp_abort_chunk);
 	chunk_len = (uint16_t) sizeof(struct sctp_abort_chunk) + cause_len;
 	padding_len = SCTP_SIZE32(chunk_len) - chunk_len;
 	if (m_out == NULL) {
 		/* NO Auth chunk prepended, so reserve space in front */
 		SCTP_BUF_RESV_UF(m_abort, SCTP_MIN_OVERHEAD);
 		m_out = m_abort;
 	} else {
 		/* Put AUTH chunk at the front of the chain */
 		SCTP_BUF_NEXT(m_end) = m_abort;
 	}
 	if (stcb->asoc.alternate) {
 		net = stcb->asoc.alternate;
 	} else {
 		net = stcb->asoc.primary_destination;
 	}
 	/* Fill in the ABORT chunk header. */
 	abort = mtod(m_abort, struct sctp_abort_chunk *);
 	abort->ch.chunk_type = SCTP_ABORT_ASSOCIATION;
 	if (stcb->asoc.peer_vtag == 0) {
 		/* This happens iff the assoc is in COOKIE-WAIT state. */
 		vtag = stcb->asoc.my_vtag;
 		abort->ch.chunk_flags = SCTP_HAD_NO_TCB;
 	} else {
 		vtag = stcb->asoc.peer_vtag;
 		abort->ch.chunk_flags = 0;
 	}
 	abort->ch.chunk_length = htons(chunk_len);
 	/* Add padding, if necessary. */
 	if (padding_len > 0) {
 		if ((m_last == NULL) ||
 		    (sctp_add_pad_tombuf(m_last, padding_len) == NULL)) {
 			sctp_m_freem(m_out);
 			return;
 		}
 	}
 	(void)sctp_lowlevel_chunk_output(stcb->sctp_ep, stcb, net,
 	    (struct sockaddr *)&net->ro._l_addr,
 	    m_out, auth_offset, auth, stcb->asoc.authinfo.active_keyid, 1, 0, 0,
 	    stcb->sctp_ep->sctp_lport, stcb->rport, htonl(vtag),
 	    stcb->asoc.primary_destination->port, NULL,
 	    0, 0,
 	    so_locked);
 	SCTP_STAT_INCR_COUNTER64(sctps_outcontrolchunks);
 }
 
 void
 sctp_send_shutdown_complete(struct sctp_tcb *stcb,
     struct sctp_nets *net,
     int reflect_vtag)
 {
 	/* formulate and SEND a SHUTDOWN-COMPLETE */
 	struct mbuf *m_shutdown_comp;
 	struct sctp_shutdown_complete_chunk *shutdown_complete;
 	uint32_t vtag;
 	uint8_t flags;
 
 	m_shutdown_comp = sctp_get_mbuf_for_msg(sizeof(struct sctp_chunkhdr), 0, M_NOWAIT, 1, MT_HEADER);
 	if (m_shutdown_comp == NULL) {
 		/* no mbuf's */
 		return;
 	}
 	if (reflect_vtag) {
 		flags = SCTP_HAD_NO_TCB;
 		vtag = stcb->asoc.my_vtag;
 	} else {
 		flags = 0;
 		vtag = stcb->asoc.peer_vtag;
 	}
 	shutdown_complete = mtod(m_shutdown_comp, struct sctp_shutdown_complete_chunk *);
 	shutdown_complete->ch.chunk_type = SCTP_SHUTDOWN_COMPLETE;
 	shutdown_complete->ch.chunk_flags = flags;
 	shutdown_complete->ch.chunk_length = htons(sizeof(struct sctp_shutdown_complete_chunk));
 	SCTP_BUF_LEN(m_shutdown_comp) = sizeof(struct sctp_shutdown_complete_chunk);
 	(void)sctp_lowlevel_chunk_output(stcb->sctp_ep, stcb, net,
 	    (struct sockaddr *)&net->ro._l_addr,
 	    m_shutdown_comp, 0, NULL, 0, 1, 0, 0,
 	    stcb->sctp_ep->sctp_lport, stcb->rport,
 	    htonl(vtag),
 	    net->port, NULL,
 	    0, 0,
 	    SCTP_SO_NOT_LOCKED);
 	SCTP_STAT_INCR_COUNTER64(sctps_outcontrolchunks);
 	return;
 }
 
 static void
 sctp_send_resp_msg(struct sockaddr *src, struct sockaddr *dst,
     struct sctphdr *sh, uint32_t vtag,
     uint8_t type, struct mbuf *cause,
     uint8_t mflowtype, uint32_t mflowid,
     uint32_t vrf_id, uint16_t port)
 {
 	struct mbuf *o_pak;
 	struct mbuf *mout;
 	struct sctphdr *shout;
 	struct sctp_chunkhdr *ch;
 
 #if defined(INET) || defined(INET6)
 	struct udphdr *udp;
 	int ret;
 
 #endif
 	int len, cause_len, padding_len;
 
 #ifdef INET
 	struct sockaddr_in *src_sin, *dst_sin;
 	struct ip *ip;
 
 #endif
 #ifdef INET6
 	struct sockaddr_in6 *src_sin6, *dst_sin6;
 	struct ip6_hdr *ip6;
 
 #endif
 
 	/* Compute the length of the cause and add final padding. */
 	cause_len = 0;
 	if (cause != NULL) {
 		struct mbuf *m_at, *m_last = NULL;
 
 		for (m_at = cause; m_at; m_at = SCTP_BUF_NEXT(m_at)) {
 			if (SCTP_BUF_NEXT(m_at) == NULL)
 				m_last = m_at;
 			cause_len += SCTP_BUF_LEN(m_at);
 		}
 		padding_len = cause_len % 4;
 		if (padding_len != 0) {
 			padding_len = 4 - padding_len;
 		}
 		if (padding_len != 0) {
 			if (sctp_add_pad_tombuf(m_last, padding_len) == NULL) {
 				sctp_m_freem(cause);
 				return;
 			}
 		}
 	} else {
 		padding_len = 0;
 	}
 	/* Get an mbuf for the header. */
 	len = sizeof(struct sctphdr) + sizeof(struct sctp_chunkhdr);
 	switch (dst->sa_family) {
 #ifdef INET
 	case AF_INET:
 		len += sizeof(struct ip);
 		break;
 #endif
 #ifdef INET6
 	case AF_INET6:
 		len += sizeof(struct ip6_hdr);
 		break;
 #endif
 	default:
 		break;
 	}
 #if defined(INET) || defined(INET6)
 	if (port) {
 		len += sizeof(struct udphdr);
 	}
 #endif
 	mout = sctp_get_mbuf_for_msg(len + max_linkhdr, 1, M_NOWAIT, 1, MT_DATA);
 	if (mout == NULL) {
 		if (cause) {
 			sctp_m_freem(cause);
 		}
 		return;
 	}
 	SCTP_BUF_RESV_UF(mout, max_linkhdr);
 	SCTP_BUF_LEN(mout) = len;
 	SCTP_BUF_NEXT(mout) = cause;
 	mout->m_pkthdr.flowid = mflowid;
 	M_HASHTYPE_SET(mout, mflowtype);
 #ifdef INET
 	ip = NULL;
 #endif
 #ifdef INET6
 	ip6 = NULL;
 #endif
 	switch (dst->sa_family) {
 #ifdef INET
 	case AF_INET:
 		src_sin = (struct sockaddr_in *)src;
 		dst_sin = (struct sockaddr_in *)dst;
 		ip = mtod(mout, struct ip *);
 		ip->ip_v = IPVERSION;
 		ip->ip_hl = (sizeof(struct ip) >> 2);
 		ip->ip_tos = 0;
-		ip->ip_id = ip_newid();
 		ip->ip_off = 0;
+		ip_fillid(ip);
 		ip->ip_ttl = MODULE_GLOBAL(ip_defttl);
 		if (port) {
 			ip->ip_p = IPPROTO_UDP;
 		} else {
 			ip->ip_p = IPPROTO_SCTP;
 		}
 		ip->ip_src.s_addr = dst_sin->sin_addr.s_addr;
 		ip->ip_dst.s_addr = src_sin->sin_addr.s_addr;
 		ip->ip_sum = 0;
 		len = sizeof(struct ip);
 		shout = (struct sctphdr *)((caddr_t)ip + len);
 		break;
 #endif
 #ifdef INET6
 	case AF_INET6:
 		src_sin6 = (struct sockaddr_in6 *)src;
 		dst_sin6 = (struct sockaddr_in6 *)dst;
 		ip6 = mtod(mout, struct ip6_hdr *);
 		ip6->ip6_flow = htonl(0x60000000);
 		if (V_ip6_auto_flowlabel) {
 			ip6->ip6_flow |= (htonl(ip6_randomflowlabel()) & IPV6_FLOWLABEL_MASK);
 		}
 		ip6->ip6_hlim = MODULE_GLOBAL(ip6_defhlim);
 		if (port) {
 			ip6->ip6_nxt = IPPROTO_UDP;
 		} else {
 			ip6->ip6_nxt = IPPROTO_SCTP;
 		}
 		ip6->ip6_src = dst_sin6->sin6_addr;
 		ip6->ip6_dst = src_sin6->sin6_addr;
 		len = sizeof(struct ip6_hdr);
 		shout = (struct sctphdr *)((caddr_t)ip6 + len);
 		break;
 #endif
 	default:
 		len = 0;
 		shout = mtod(mout, struct sctphdr *);
 		break;
 	}
 #if defined(INET) || defined(INET6)
 	if (port) {
 		if (htons(SCTP_BASE_SYSCTL(sctp_udp_tunneling_port)) == 0) {
 			sctp_m_freem(mout);
 			return;
 		}
 		udp = (struct udphdr *)shout;
 		udp->uh_sport = htons(SCTP_BASE_SYSCTL(sctp_udp_tunneling_port));
 		udp->uh_dport = port;
 		udp->uh_sum = 0;
 		udp->uh_ulen = htons(sizeof(struct udphdr) +
 		    sizeof(struct sctphdr) +
 		    sizeof(struct sctp_chunkhdr) +
 		    cause_len + padding_len);
 		len += sizeof(struct udphdr);
 		shout = (struct sctphdr *)((caddr_t)shout + sizeof(struct udphdr));
 	} else {
 		udp = NULL;
 	}
 #endif
 	shout->src_port = sh->dest_port;
 	shout->dest_port = sh->src_port;
 	shout->checksum = 0;
 	if (vtag) {
 		shout->v_tag = htonl(vtag);
 	} else {
 		shout->v_tag = sh->v_tag;
 	}
 	len += sizeof(struct sctphdr);
 	ch = (struct sctp_chunkhdr *)((caddr_t)shout + sizeof(struct sctphdr));
 	ch->chunk_type = type;
 	if (vtag) {
 		ch->chunk_flags = 0;
 	} else {
 		ch->chunk_flags = SCTP_HAD_NO_TCB;
 	}
 	ch->chunk_length = htons(sizeof(struct sctp_chunkhdr) + cause_len);
 	len += sizeof(struct sctp_chunkhdr);
 	len += cause_len + padding_len;
 
 	if (SCTP_GET_HEADER_FOR_OUTPUT(o_pak)) {
 		sctp_m_freem(mout);
 		return;
 	}
 	SCTP_ATTACH_CHAIN(o_pak, mout, len);
 	switch (dst->sa_family) {
 #ifdef INET
 	case AF_INET:
 		if (port) {
 			if (V_udp_cksum) {
 				udp->uh_sum = in_pseudo(ip->ip_src.s_addr, ip->ip_dst.s_addr, udp->uh_ulen + htons(IPPROTO_UDP));
 			} else {
 				udp->uh_sum = 0;
 			}
 		}
 		ip->ip_len = htons(len);
 		if (port) {
 #if defined(SCTP_WITH_NO_CSUM)
 			SCTP_STAT_INCR(sctps_sendnocrc);
 #else
 			shout->checksum = sctp_calculate_cksum(mout, sizeof(struct ip) + sizeof(struct udphdr));
 			SCTP_STAT_INCR(sctps_sendswcrc);
 #endif
 			if (V_udp_cksum) {
 				SCTP_ENABLE_UDP_CSUM(o_pak);
 			}
 		} else {
 #if defined(SCTP_WITH_NO_CSUM)
 			SCTP_STAT_INCR(sctps_sendnocrc);
 #else
 			mout->m_pkthdr.csum_flags = CSUM_SCTP;
 			mout->m_pkthdr.csum_data = offsetof(struct sctphdr, checksum);
 			SCTP_STAT_INCR(sctps_sendhwcrc);
 #endif
 		}
 #ifdef SCTP_PACKET_LOGGING
 		if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_LAST_PACKET_TRACING) {
 			sctp_packet_log(o_pak);
 		}
 #endif
 		SCTP_IP_OUTPUT(ret, o_pak, NULL, NULL, vrf_id);
 		break;
 #endif
 #ifdef INET6
 	case AF_INET6:
 		ip6->ip6_plen = len - sizeof(struct ip6_hdr);
 		if (port) {
 #if defined(SCTP_WITH_NO_CSUM)
 			SCTP_STAT_INCR(sctps_sendnocrc);
 #else
 			shout->checksum = sctp_calculate_cksum(mout, sizeof(struct ip6_hdr) + sizeof(struct udphdr));
 			SCTP_STAT_INCR(sctps_sendswcrc);
 #endif
 			if ((udp->uh_sum = in6_cksum(o_pak, IPPROTO_UDP, sizeof(struct ip6_hdr), len - sizeof(struct ip6_hdr))) == 0) {
 				udp->uh_sum = 0xffff;
 			}
 		} else {
 #if defined(SCTP_WITH_NO_CSUM)
 			SCTP_STAT_INCR(sctps_sendnocrc);
 #else
 			mout->m_pkthdr.csum_flags = CSUM_SCTP_IPV6;
 			mout->m_pkthdr.csum_data = offsetof(struct sctphdr, checksum);
 			SCTP_STAT_INCR(sctps_sendhwcrc);
 #endif
 		}
 #ifdef SCTP_PACKET_LOGGING
 		if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_LAST_PACKET_TRACING) {
 			sctp_packet_log(o_pak);
 		}
 #endif
 		SCTP_IP6_OUTPUT(ret, o_pak, NULL, NULL, NULL, vrf_id);
 		break;
 #endif
 	default:
 		SCTPDBG(SCTP_DEBUG_OUTPUT1, "Unknown protocol (TSNH) type %d\n",
 		    dst->sa_family);
 		sctp_m_freem(mout);
 		SCTP_LTRACE_ERR_RET_PKT(mout, NULL, NULL, NULL, SCTP_FROM_SCTP_OUTPUT, EFAULT);
 		return;
 	}
 	SCTP_STAT_INCR(sctps_sendpackets);
 	SCTP_STAT_INCR_COUNTER64(sctps_outpackets);
 	SCTP_STAT_INCR_COUNTER64(sctps_outcontrolchunks);
 	return;
 }
 
 void
 sctp_send_shutdown_complete2(struct sockaddr *src, struct sockaddr *dst,
     struct sctphdr *sh,
     uint8_t mflowtype, uint32_t mflowid,
     uint32_t vrf_id, uint16_t port)
 {
 	sctp_send_resp_msg(src, dst, sh, 0, SCTP_SHUTDOWN_COMPLETE, NULL,
 	    mflowtype, mflowid,
 	    vrf_id, port);
 }
 
 void
 sctp_send_hb(struct sctp_tcb *stcb, struct sctp_nets *net, int so_locked
 #if !defined(__APPLE__) && !defined(SCTP_SO_LOCK_TESTING)
     SCTP_UNUSED
 #endif
 )
 {
 	struct sctp_tmit_chunk *chk;
 	struct sctp_heartbeat_chunk *hb;
 	struct timeval now;
 
 	SCTP_TCB_LOCK_ASSERT(stcb);
 	if (net == NULL) {
 		return;
 	}
 	(void)SCTP_GETTIME_TIMEVAL(&now);
 	switch (net->ro._l_addr.sa.sa_family) {
 #ifdef INET
 	case AF_INET:
 		break;
 #endif
 #ifdef INET6
 	case AF_INET6:
 		break;
 #endif
 	default:
 		return;
 	}
 	sctp_alloc_a_chunk(stcb, chk);
 	if (chk == NULL) {
 		SCTPDBG(SCTP_DEBUG_OUTPUT4, "Gak, can't get a chunk for hb\n");
 		return;
 	}
 	chk->copy_by_ref = 0;
 	chk->rec.chunk_id.id = SCTP_HEARTBEAT_REQUEST;
 	chk->rec.chunk_id.can_take_data = 1;
 	chk->flags = 0;
 	chk->asoc = &stcb->asoc;
 	chk->send_size = sizeof(struct sctp_heartbeat_chunk);
 
 	chk->data = sctp_get_mbuf_for_msg(chk->send_size, 0, M_NOWAIT, 1, MT_HEADER);
 	if (chk->data == NULL) {
 		sctp_free_a_chunk(stcb, chk, so_locked);
 		return;
 	}
 	SCTP_BUF_RESV_UF(chk->data, SCTP_MIN_OVERHEAD);
 	SCTP_BUF_LEN(chk->data) = chk->send_size;
 	chk->sent = SCTP_DATAGRAM_UNSENT;
 	chk->snd_count = 0;
 	chk->whoTo = net;
 	atomic_add_int(&chk->whoTo->ref_count, 1);
 	/* Now we have a mbuf that we can fill in with the details */
 	hb = mtod(chk->data, struct sctp_heartbeat_chunk *);
 	memset(hb, 0, sizeof(struct sctp_heartbeat_chunk));
 	/* fill out chunk header */
 	hb->ch.chunk_type = SCTP_HEARTBEAT_REQUEST;
 	hb->ch.chunk_flags = 0;
 	hb->ch.chunk_length = htons(chk->send_size);
 	/* Fill out hb parameter */
 	hb->heartbeat.hb_info.ph.param_type = htons(SCTP_HEARTBEAT_INFO);
 	hb->heartbeat.hb_info.ph.param_length = htons(sizeof(struct sctp_heartbeat_info_param));
 	hb->heartbeat.hb_info.time_value_1 = now.tv_sec;
 	hb->heartbeat.hb_info.time_value_2 = now.tv_usec;
 	/* Did our user request this one, put it in */
 	hb->heartbeat.hb_info.addr_family = (uint8_t) net->ro._l_addr.sa.sa_family;
 	hb->heartbeat.hb_info.addr_len = net->ro._l_addr.sa.sa_len;
 	if (net->dest_state & SCTP_ADDR_UNCONFIRMED) {
 		/*
 		 * we only take from the entropy pool if the address is not
 		 * confirmed.
 		 */
 		net->heartbeat_random1 = hb->heartbeat.hb_info.random_value1 = sctp_select_initial_TSN(&stcb->sctp_ep->sctp_ep);
 		net->heartbeat_random2 = hb->heartbeat.hb_info.random_value2 = sctp_select_initial_TSN(&stcb->sctp_ep->sctp_ep);
 	} else {
 		net->heartbeat_random1 = hb->heartbeat.hb_info.random_value1 = 0;
 		net->heartbeat_random2 = hb->heartbeat.hb_info.random_value2 = 0;
 	}
 	switch (net->ro._l_addr.sa.sa_family) {
 #ifdef INET
 	case AF_INET:
 		memcpy(hb->heartbeat.hb_info.address,
 		    &net->ro._l_addr.sin.sin_addr,
 		    sizeof(net->ro._l_addr.sin.sin_addr));
 		break;
 #endif
 #ifdef INET6
 	case AF_INET6:
 		memcpy(hb->heartbeat.hb_info.address,
 		    &net->ro._l_addr.sin6.sin6_addr,
 		    sizeof(net->ro._l_addr.sin6.sin6_addr));
 		break;
 #endif
 	default:
 		return;
 		break;
 	}
 	net->hb_responded = 0;
 	TAILQ_INSERT_TAIL(&stcb->asoc.control_send_queue, chk, sctp_next);
 	stcb->asoc.ctrl_queue_cnt++;
 	SCTP_STAT_INCR(sctps_sendheartbeat);
 	return;
 }
 
 void
 sctp_send_ecn_echo(struct sctp_tcb *stcb, struct sctp_nets *net,
     uint32_t high_tsn)
 {
 	struct sctp_association *asoc;
 	struct sctp_ecne_chunk *ecne;
 	struct sctp_tmit_chunk *chk;
 
 	if (net == NULL) {
 		return;
 	}
 	asoc = &stcb->asoc;
 	SCTP_TCB_LOCK_ASSERT(stcb);
 	TAILQ_FOREACH(chk, &asoc->control_send_queue, sctp_next) {
 		if ((chk->rec.chunk_id.id == SCTP_ECN_ECHO) && (net == chk->whoTo)) {
 			/* found a previous ECN_ECHO update it if needed */
 			uint32_t cnt, ctsn;
 
 			ecne = mtod(chk->data, struct sctp_ecne_chunk *);
 			ctsn = ntohl(ecne->tsn);
 			if (SCTP_TSN_GT(high_tsn, ctsn)) {
 				ecne->tsn = htonl(high_tsn);
 				SCTP_STAT_INCR(sctps_queue_upd_ecne);
 			}
 			cnt = ntohl(ecne->num_pkts_since_cwr);
 			cnt++;
 			ecne->num_pkts_since_cwr = htonl(cnt);
 			return;
 		}
 	}
 	/* nope could not find one to update so we must build one */
 	sctp_alloc_a_chunk(stcb, chk);
 	if (chk == NULL) {
 		return;
 	}
 	SCTP_STAT_INCR(sctps_queue_upd_ecne);
 	chk->copy_by_ref = 0;
 	chk->rec.chunk_id.id = SCTP_ECN_ECHO;
 	chk->rec.chunk_id.can_take_data = 0;
 	chk->flags = 0;
 	chk->asoc = &stcb->asoc;
 	chk->send_size = sizeof(struct sctp_ecne_chunk);
 	chk->data = sctp_get_mbuf_for_msg(chk->send_size, 0, M_NOWAIT, 1, MT_HEADER);
 	if (chk->data == NULL) {
 		sctp_free_a_chunk(stcb, chk, SCTP_SO_NOT_LOCKED);
 		return;
 	}
 	SCTP_BUF_RESV_UF(chk->data, SCTP_MIN_OVERHEAD);
 	SCTP_BUF_LEN(chk->data) = chk->send_size;
 	chk->sent = SCTP_DATAGRAM_UNSENT;
 	chk->snd_count = 0;
 	chk->whoTo = net;
 	atomic_add_int(&chk->whoTo->ref_count, 1);
 
 	stcb->asoc.ecn_echo_cnt_onq++;
 	ecne = mtod(chk->data, struct sctp_ecne_chunk *);
 	ecne->ch.chunk_type = SCTP_ECN_ECHO;
 	ecne->ch.chunk_flags = 0;
 	ecne->ch.chunk_length = htons(sizeof(struct sctp_ecne_chunk));
 	ecne->tsn = htonl(high_tsn);
 	ecne->num_pkts_since_cwr = htonl(1);
 	TAILQ_INSERT_HEAD(&stcb->asoc.control_send_queue, chk, sctp_next);
 	asoc->ctrl_queue_cnt++;
 }
 
 void
 sctp_send_packet_dropped(struct sctp_tcb *stcb, struct sctp_nets *net,
     struct mbuf *m, int len, int iphlen, int bad_crc)
 {
 	struct sctp_association *asoc;
 	struct sctp_pktdrop_chunk *drp;
 	struct sctp_tmit_chunk *chk;
 	uint8_t *datap;
 	int was_trunc = 0;
 	int fullsz = 0;
 	long spc;
 	int offset;
 	struct sctp_chunkhdr *ch, chunk_buf;
 	unsigned int chk_length;
 
 	if (!stcb) {
 		return;
 	}
 	asoc = &stcb->asoc;
 	SCTP_TCB_LOCK_ASSERT(stcb);
 	if (asoc->pktdrop_supported == 0) {
 		/*-
 		 * peer must declare support before I send one.
 		 */
 		return;
 	}
 	if (stcb->sctp_socket == NULL) {
 		return;
 	}
 	sctp_alloc_a_chunk(stcb, chk);
 	if (chk == NULL) {
 		return;
 	}
 	chk->copy_by_ref = 0;
 	chk->rec.chunk_id.id = SCTP_PACKET_DROPPED;
 	chk->rec.chunk_id.can_take_data = 1;
 	chk->flags = 0;
 	len -= iphlen;
 	chk->send_size = len;
 	/* Validate that we do not have an ABORT in here. */
 	offset = iphlen + sizeof(struct sctphdr);
 	ch = (struct sctp_chunkhdr *)sctp_m_getptr(m, offset,
 	    sizeof(*ch), (uint8_t *) & chunk_buf);
 	while (ch != NULL) {
 		chk_length = ntohs(ch->chunk_length);
 		if (chk_length < sizeof(*ch)) {
 			/* break to abort land */
 			break;
 		}
 		switch (ch->chunk_type) {
 		case SCTP_PACKET_DROPPED:
 		case SCTP_ABORT_ASSOCIATION:
 		case SCTP_INITIATION_ACK:
 			/**
 			 * We don't respond with an PKT-DROP to an ABORT
 			 * or PKT-DROP. We also do not respond to an
 			 * INIT-ACK, because we can't know if the initiation
 			 * tag is correct or not.
 			 */
 			sctp_free_a_chunk(stcb, chk, SCTP_SO_NOT_LOCKED);
 			return;
 		default:
 			break;
 		}
 		offset += SCTP_SIZE32(chk_length);
 		ch = (struct sctp_chunkhdr *)sctp_m_getptr(m, offset,
 		    sizeof(*ch), (uint8_t *) & chunk_buf);
 	}
 
 	if ((len + SCTP_MAX_OVERHEAD + sizeof(struct sctp_pktdrop_chunk)) >
 	    min(stcb->asoc.smallest_mtu, MCLBYTES)) {
 		/*
 		 * only send 1 mtu worth, trim off the excess on the end.
 		 */
 		fullsz = len;
 		len = min(stcb->asoc.smallest_mtu, MCLBYTES) - SCTP_MAX_OVERHEAD;
 		was_trunc = 1;
 	}
 	chk->asoc = &stcb->asoc;
 	chk->data = sctp_get_mbuf_for_msg(MCLBYTES, 0, M_NOWAIT, 1, MT_DATA);
 	if (chk->data == NULL) {
 jump_out:
 		sctp_free_a_chunk(stcb, chk, SCTP_SO_NOT_LOCKED);
 		return;
 	}
 	SCTP_BUF_RESV_UF(chk->data, SCTP_MIN_OVERHEAD);
 	drp = mtod(chk->data, struct sctp_pktdrop_chunk *);
 	if (drp == NULL) {
 		sctp_m_freem(chk->data);
 		chk->data = NULL;
 		goto jump_out;
 	}
 	chk->book_size = SCTP_SIZE32((chk->send_size + sizeof(struct sctp_pktdrop_chunk) +
 	    sizeof(struct sctphdr) + SCTP_MED_OVERHEAD));
 	chk->book_size_scale = 0;
 	if (was_trunc) {
 		drp->ch.chunk_flags = SCTP_PACKET_TRUNCATED;
 		drp->trunc_len = htons(fullsz);
 		/*
 		 * Len is already adjusted to size minus overhead above take
 		 * out the pkt_drop chunk itself from it.
 		 */
 		chk->send_size = len - sizeof(struct sctp_pktdrop_chunk);
 		len = chk->send_size;
 	} else {
 		/* no truncation needed */
 		drp->ch.chunk_flags = 0;
 		drp->trunc_len = htons(0);
 	}
 	if (bad_crc) {
 		drp->ch.chunk_flags |= SCTP_BADCRC;
 	}
 	chk->send_size += sizeof(struct sctp_pktdrop_chunk);
 	SCTP_BUF_LEN(chk->data) = chk->send_size;
 	chk->sent = SCTP_DATAGRAM_UNSENT;
 	chk->snd_count = 0;
 	if (net) {
 		/* we should hit here */
 		chk->whoTo = net;
 		atomic_add_int(&chk->whoTo->ref_count, 1);
 	} else {
 		chk->whoTo = NULL;
 	}
 	drp->ch.chunk_type = SCTP_PACKET_DROPPED;
 	drp->ch.chunk_length = htons(chk->send_size);
 	spc = SCTP_SB_LIMIT_RCV(stcb->sctp_socket);
 	if (spc < 0) {
 		spc = 0;
 	}
 	drp->bottle_bw = htonl(spc);
 	if (asoc->my_rwnd) {
 		drp->current_onq = htonl(asoc->size_on_reasm_queue +
 		    asoc->size_on_all_streams +
 		    asoc->my_rwnd_control_len +
 		    stcb->sctp_socket->so_rcv.sb_cc);
 	} else {
 		/*-
 		 * If my rwnd is 0, possibly from mbuf depletion as well as
 		 * space used, tell the peer there is NO space aka onq == bw
 		 */
 		drp->current_onq = htonl(spc);
 	}
 	drp->reserved = 0;
 	datap = drp->data;
 	m_copydata(m, iphlen, len, (caddr_t)datap);
 	TAILQ_INSERT_TAIL(&stcb->asoc.control_send_queue, chk, sctp_next);
 	asoc->ctrl_queue_cnt++;
 }
 
 void
 sctp_send_cwr(struct sctp_tcb *stcb, struct sctp_nets *net, uint32_t high_tsn, uint8_t override)
 {
 	struct sctp_association *asoc;
 	struct sctp_cwr_chunk *cwr;
 	struct sctp_tmit_chunk *chk;
 
 	SCTP_TCB_LOCK_ASSERT(stcb);
 	if (net == NULL) {
 		return;
 	}
 	asoc = &stcb->asoc;
 	TAILQ_FOREACH(chk, &asoc->control_send_queue, sctp_next) {
 		if ((chk->rec.chunk_id.id == SCTP_ECN_CWR) && (net == chk->whoTo)) {
 			/*
 			 * found a previous CWR queued to same destination
 			 * update it if needed
 			 */
 			uint32_t ctsn;
 
 			cwr = mtod(chk->data, struct sctp_cwr_chunk *);
 			ctsn = ntohl(cwr->tsn);
 			if (SCTP_TSN_GT(high_tsn, ctsn)) {
 				cwr->tsn = htonl(high_tsn);
 			}
 			if (override & SCTP_CWR_REDUCE_OVERRIDE) {
 				/* Make sure override is carried */
 				cwr->ch.chunk_flags |= SCTP_CWR_REDUCE_OVERRIDE;
 			}
 			return;
 		}
 	}
 	sctp_alloc_a_chunk(stcb, chk);
 	if (chk == NULL) {
 		return;
 	}
 	chk->copy_by_ref = 0;
 	chk->rec.chunk_id.id = SCTP_ECN_CWR;
 	chk->rec.chunk_id.can_take_data = 1;
 	chk->flags = 0;
 	chk->asoc = &stcb->asoc;
 	chk->send_size = sizeof(struct sctp_cwr_chunk);
 	chk->data = sctp_get_mbuf_for_msg(chk->send_size, 0, M_NOWAIT, 1, MT_HEADER);
 	if (chk->data == NULL) {
 		sctp_free_a_chunk(stcb, chk, SCTP_SO_NOT_LOCKED);
 		return;
 	}
 	SCTP_BUF_RESV_UF(chk->data, SCTP_MIN_OVERHEAD);
 	SCTP_BUF_LEN(chk->data) = chk->send_size;
 	chk->sent = SCTP_DATAGRAM_UNSENT;
 	chk->snd_count = 0;
 	chk->whoTo = net;
 	atomic_add_int(&chk->whoTo->ref_count, 1);
 	cwr = mtod(chk->data, struct sctp_cwr_chunk *);
 	cwr->ch.chunk_type = SCTP_ECN_CWR;
 	cwr->ch.chunk_flags = override;
 	cwr->ch.chunk_length = htons(sizeof(struct sctp_cwr_chunk));
 	cwr->tsn = htonl(high_tsn);
 	TAILQ_INSERT_TAIL(&stcb->asoc.control_send_queue, chk, sctp_next);
 	asoc->ctrl_queue_cnt++;
 }
 
 void
 sctp_add_stream_reset_out(struct sctp_tmit_chunk *chk,
     int number_entries, uint16_t * list,
     uint32_t seq, uint32_t resp_seq, uint32_t last_sent)
 {
 	uint16_t len, old_len, i;
 	struct sctp_stream_reset_out_request *req_out;
 	struct sctp_chunkhdr *ch;
 
 	ch = mtod(chk->data, struct sctp_chunkhdr *);
 	old_len = len = SCTP_SIZE32(ntohs(ch->chunk_length));
 
 	/* get to new offset for the param. */
 	req_out = (struct sctp_stream_reset_out_request *)((caddr_t)ch + len);
 	/* now how long will this param be? */
 	len = (sizeof(struct sctp_stream_reset_out_request) + (sizeof(uint16_t) * number_entries));
 	req_out->ph.param_type = htons(SCTP_STR_RESET_OUT_REQUEST);
 	req_out->ph.param_length = htons(len);
 	req_out->request_seq = htonl(seq);
 	req_out->response_seq = htonl(resp_seq);
 	req_out->send_reset_at_tsn = htonl(last_sent);
 	if (number_entries) {
 		for (i = 0; i < number_entries; i++) {
 			req_out->list_of_streams[i] = htons(list[i]);
 		}
 	}
 	if (SCTP_SIZE32(len) > len) {
 		/*-
 		 * Need to worry about the pad we may end up adding to the
 		 * end. This is easy since the struct is either aligned to 4
 		 * bytes or 2 bytes off.
 		 */
 		req_out->list_of_streams[number_entries] = 0;
 	}
 	/* now fix the chunk length */
 	ch->chunk_length = htons(len + old_len);
 	chk->book_size = len + old_len;
 	chk->book_size_scale = 0;
 	chk->send_size = SCTP_SIZE32(chk->book_size);
 	SCTP_BUF_LEN(chk->data) = chk->send_size;
 	return;
 }
 
 static void
 sctp_add_stream_reset_in(struct sctp_tmit_chunk *chk,
     int number_entries, uint16_t * list,
     uint32_t seq)
 {
 	uint16_t len, old_len, i;
 	struct sctp_stream_reset_in_request *req_in;
 	struct sctp_chunkhdr *ch;
 
 	ch = mtod(chk->data, struct sctp_chunkhdr *);
 	old_len = len = SCTP_SIZE32(ntohs(ch->chunk_length));
 
 	/* get to new offset for the param. */
 	req_in = (struct sctp_stream_reset_in_request *)((caddr_t)ch + len);
 	/* now how long will this param be? */
 	len = (sizeof(struct sctp_stream_reset_in_request) + (sizeof(uint16_t) * number_entries));
 	req_in->ph.param_type = htons(SCTP_STR_RESET_IN_REQUEST);
 	req_in->ph.param_length = htons(len);
 	req_in->request_seq = htonl(seq);
 	if (number_entries) {
 		for (i = 0; i < number_entries; i++) {
 			req_in->list_of_streams[i] = htons(list[i]);
 		}
 	}
 	if (SCTP_SIZE32(len) > len) {
 		/*-
 		 * Need to worry about the pad we may end up adding to the
 		 * end. This is easy since the struct is either aligned to 4
 		 * bytes or 2 bytes off.
 		 */
 		req_in->list_of_streams[number_entries] = 0;
 	}
 	/* now fix the chunk length */
 	ch->chunk_length = htons(len + old_len);
 	chk->book_size = len + old_len;
 	chk->book_size_scale = 0;
 	chk->send_size = SCTP_SIZE32(chk->book_size);
 	SCTP_BUF_LEN(chk->data) = chk->send_size;
 	return;
 }
 
 static void
 sctp_add_stream_reset_tsn(struct sctp_tmit_chunk *chk,
     uint32_t seq)
 {
 	uint16_t len, old_len;
 	struct sctp_stream_reset_tsn_request *req_tsn;
 	struct sctp_chunkhdr *ch;
 
 	ch = mtod(chk->data, struct sctp_chunkhdr *);
 	old_len = len = SCTP_SIZE32(ntohs(ch->chunk_length));
 
 	/* get to new offset for the param. */
 	req_tsn = (struct sctp_stream_reset_tsn_request *)((caddr_t)ch + len);
 	/* now how long will this param be? */
 	len = sizeof(struct sctp_stream_reset_tsn_request);
 	req_tsn->ph.param_type = htons(SCTP_STR_RESET_TSN_REQUEST);
 	req_tsn->ph.param_length = htons(len);
 	req_tsn->request_seq = htonl(seq);
 
 	/* now fix the chunk length */
 	ch->chunk_length = htons(len + old_len);
 	chk->send_size = len + old_len;
 	chk->book_size = SCTP_SIZE32(chk->send_size);
 	chk->book_size_scale = 0;
 	SCTP_BUF_LEN(chk->data) = SCTP_SIZE32(chk->send_size);
 	return;
 }
 
 void
 sctp_add_stream_reset_result(struct sctp_tmit_chunk *chk,
     uint32_t resp_seq, uint32_t result)
 {
 	uint16_t len, old_len;
 	struct sctp_stream_reset_response *resp;
 	struct sctp_chunkhdr *ch;
 
 	ch = mtod(chk->data, struct sctp_chunkhdr *);
 	old_len = len = SCTP_SIZE32(ntohs(ch->chunk_length));
 
 	/* get to new offset for the param. */
 	resp = (struct sctp_stream_reset_response *)((caddr_t)ch + len);
 	/* now how long will this param be? */
 	len = sizeof(struct sctp_stream_reset_response);
 	resp->ph.param_type = htons(SCTP_STR_RESET_RESPONSE);
 	resp->ph.param_length = htons(len);
 	resp->response_seq = htonl(resp_seq);
 	resp->result = ntohl(result);
 
 	/* now fix the chunk length */
 	ch->chunk_length = htons(len + old_len);
 	chk->book_size = len + old_len;
 	chk->book_size_scale = 0;
 	chk->send_size = SCTP_SIZE32(chk->book_size);
 	SCTP_BUF_LEN(chk->data) = chk->send_size;
 	return;
 }
 
 void
 sctp_add_stream_reset_result_tsn(struct sctp_tmit_chunk *chk,
     uint32_t resp_seq, uint32_t result,
     uint32_t send_una, uint32_t recv_next)
 {
 	uint16_t len, old_len;
 	struct sctp_stream_reset_response_tsn *resp;
 	struct sctp_chunkhdr *ch;
 
 	ch = mtod(chk->data, struct sctp_chunkhdr *);
 	old_len = len = SCTP_SIZE32(ntohs(ch->chunk_length));
 
 	/* get to new offset for the param. */
 	resp = (struct sctp_stream_reset_response_tsn *)((caddr_t)ch + len);
 	/* now how long will this param be? */
 	len = sizeof(struct sctp_stream_reset_response_tsn);
 	resp->ph.param_type = htons(SCTP_STR_RESET_RESPONSE);
 	resp->ph.param_length = htons(len);
 	resp->response_seq = htonl(resp_seq);
 	resp->result = htonl(result);
 	resp->senders_next_tsn = htonl(send_una);
 	resp->receivers_next_tsn = htonl(recv_next);
 
 	/* now fix the chunk length */
 	ch->chunk_length = htons(len + old_len);
 	chk->book_size = len + old_len;
 	chk->send_size = SCTP_SIZE32(chk->book_size);
 	chk->book_size_scale = 0;
 	SCTP_BUF_LEN(chk->data) = chk->send_size;
 	return;
 }
 
 static void
 sctp_add_an_out_stream(struct sctp_tmit_chunk *chk,
     uint32_t seq,
     uint16_t adding)
 {
 	uint16_t len, old_len;
 	struct sctp_chunkhdr *ch;
 	struct sctp_stream_reset_add_strm *addstr;
 
 	ch = mtod(chk->data, struct sctp_chunkhdr *);
 	old_len = len = SCTP_SIZE32(ntohs(ch->chunk_length));
 
 	/* get to new offset for the param. */
 	addstr = (struct sctp_stream_reset_add_strm *)((caddr_t)ch + len);
 	/* now how long will this param be? */
 	len = sizeof(struct sctp_stream_reset_add_strm);
 
 	/* Fill it out. */
 	addstr->ph.param_type = htons(SCTP_STR_RESET_ADD_OUT_STREAMS);
 	addstr->ph.param_length = htons(len);
 	addstr->request_seq = htonl(seq);
 	addstr->number_of_streams = htons(adding);
 	addstr->reserved = 0;
 
 	/* now fix the chunk length */
 	ch->chunk_length = htons(len + old_len);
 	chk->send_size = len + old_len;
 	chk->book_size = SCTP_SIZE32(chk->send_size);
 	chk->book_size_scale = 0;
 	SCTP_BUF_LEN(chk->data) = SCTP_SIZE32(chk->send_size);
 	return;
 }
 
 static void
 sctp_add_an_in_stream(struct sctp_tmit_chunk *chk,
     uint32_t seq,
     uint16_t adding)
 {
 	uint16_t len, old_len;
 	struct sctp_chunkhdr *ch;
 	struct sctp_stream_reset_add_strm *addstr;
 
 	ch = mtod(chk->data, struct sctp_chunkhdr *);
 	old_len = len = SCTP_SIZE32(ntohs(ch->chunk_length));
 
 	/* get to new offset for the param. */
 	addstr = (struct sctp_stream_reset_add_strm *)((caddr_t)ch + len);
 	/* now how long will this param be? */
 	len = sizeof(struct sctp_stream_reset_add_strm);
 	/* Fill it out. */
 	addstr->ph.param_type = htons(SCTP_STR_RESET_ADD_IN_STREAMS);
 	addstr->ph.param_length = htons(len);
 	addstr->request_seq = htonl(seq);
 	addstr->number_of_streams = htons(adding);
 	addstr->reserved = 0;
 
 	/* now fix the chunk length */
 	ch->chunk_length = htons(len + old_len);
 	chk->send_size = len + old_len;
 	chk->book_size = SCTP_SIZE32(chk->send_size);
 	chk->book_size_scale = 0;
 	SCTP_BUF_LEN(chk->data) = SCTP_SIZE32(chk->send_size);
 	return;
 }
 
 int
 sctp_send_str_reset_req(struct sctp_tcb *stcb,
     uint16_t number_entries, uint16_t * list,
     uint8_t send_out_req,
     uint8_t send_in_req,
     uint8_t send_tsn_req,
     uint8_t add_stream,
     uint16_t adding_o,
     uint16_t adding_i, uint8_t peer_asked)
 {
 
 	struct sctp_association *asoc;
 	struct sctp_tmit_chunk *chk;
 	struct sctp_chunkhdr *ch;
 	uint32_t seq;
 
 	asoc = &stcb->asoc;
 	if (asoc->stream_reset_outstanding) {
 		/*-
 		 * Already one pending, must get ACK back to clear the flag.
 		 */
 		SCTP_LTRACE_ERR_RET(NULL, stcb, NULL, SCTP_FROM_SCTP_OUTPUT, EBUSY);
 		return (EBUSY);
 	}
 	if ((send_out_req == 0) && (send_in_req == 0) && (send_tsn_req == 0) &&
 	    (add_stream == 0)) {
 		/* nothing to do */
 		SCTP_LTRACE_ERR_RET(NULL, stcb, NULL, SCTP_FROM_SCTP_OUTPUT, EINVAL);
 		return (EINVAL);
 	}
 	if (send_tsn_req && (send_out_req || send_in_req)) {
 		/* error, can't do that */
 		SCTP_LTRACE_ERR_RET(NULL, stcb, NULL, SCTP_FROM_SCTP_OUTPUT, EINVAL);
 		return (EINVAL);
 	}
 	if (number_entries > (MCLBYTES -
 	    SCTP_MIN_OVERHEAD -
 	    sizeof(struct sctp_chunkhdr) -
 	    sizeof(struct sctp_stream_reset_out_request)) /
 	    sizeof(uint16_t)) {
 		SCTP_LTRACE_ERR_RET(NULL, stcb, NULL, SCTP_FROM_SCTP_OUTPUT, ENOMEM);
 		return (ENOMEM);
 	}
 	sctp_alloc_a_chunk(stcb, chk);
 	if (chk == NULL) {
 		SCTP_LTRACE_ERR_RET(NULL, stcb, NULL, SCTP_FROM_SCTP_OUTPUT, ENOMEM);
 		return (ENOMEM);
 	}
 	chk->copy_by_ref = 0;
 	chk->rec.chunk_id.id = SCTP_STREAM_RESET;
 	chk->rec.chunk_id.can_take_data = 0;
 	chk->flags = 0;
 	chk->asoc = &stcb->asoc;
 	chk->book_size = sizeof(struct sctp_chunkhdr);
 	chk->send_size = SCTP_SIZE32(chk->book_size);
 	chk->book_size_scale = 0;
 
 	chk->data = sctp_get_mbuf_for_msg(MCLBYTES, 0, M_NOWAIT, 1, MT_DATA);
 	if (chk->data == NULL) {
 		sctp_free_a_chunk(stcb, chk, SCTP_SO_LOCKED);
 		SCTP_LTRACE_ERR_RET(NULL, stcb, NULL, SCTP_FROM_SCTP_OUTPUT, ENOMEM);
 		return (ENOMEM);
 	}
 	SCTP_BUF_RESV_UF(chk->data, SCTP_MIN_OVERHEAD);
 
 	/* setup chunk parameters */
 	chk->sent = SCTP_DATAGRAM_UNSENT;
 	chk->snd_count = 0;
 	if (stcb->asoc.alternate) {
 		chk->whoTo = stcb->asoc.alternate;
 	} else {
 		chk->whoTo = stcb->asoc.primary_destination;
 	}
 	atomic_add_int(&chk->whoTo->ref_count, 1);
 	ch = mtod(chk->data, struct sctp_chunkhdr *);
 	ch->chunk_type = SCTP_STREAM_RESET;
 	ch->chunk_flags = 0;
 	ch->chunk_length = htons(chk->book_size);
 	SCTP_BUF_LEN(chk->data) = chk->send_size;
 
 	seq = stcb->asoc.str_reset_seq_out;
 	if (send_out_req) {
 		sctp_add_stream_reset_out(chk, number_entries, list,
 		    seq, (stcb->asoc.str_reset_seq_in - 1), (stcb->asoc.sending_seq - 1));
 		asoc->stream_reset_out_is_outstanding = 1;
 		seq++;
 		asoc->stream_reset_outstanding++;
 	}
 	if ((add_stream & 1) &&
 	    ((stcb->asoc.strm_realoutsize - stcb->asoc.streamoutcnt) < adding_o)) {
 		/* Need to allocate more */
 		struct sctp_stream_out *oldstream;
 		struct sctp_stream_queue_pending *sp, *nsp;
 		int i;
 
 #if defined(SCTP_DETAILED_STR_STATS)
 		int j;
 
 #endif
 
 		oldstream = stcb->asoc.strmout;
 		/* get some more */
 		SCTP_MALLOC(stcb->asoc.strmout, struct sctp_stream_out *,
 		    ((stcb->asoc.streamoutcnt + adding_o) * sizeof(struct sctp_stream_out)),
 		    SCTP_M_STRMO);
 		if (stcb->asoc.strmout == NULL) {
 			uint8_t x;
 
 			stcb->asoc.strmout = oldstream;
 			/* Turn off the bit */
 			x = add_stream & 0xfe;
 			add_stream = x;
 			goto skip_stuff;
 		}
 		/*
 		 * Ok now we proceed with copying the old out stuff and
 		 * initializing the new stuff.
 		 */
 		SCTP_TCB_SEND_LOCK(stcb);
 		stcb->asoc.ss_functions.sctp_ss_clear(stcb, &stcb->asoc, 0, 1);
 		for (i = 0; i < stcb->asoc.streamoutcnt; i++) {
 			TAILQ_INIT(&stcb->asoc.strmout[i].outqueue);
 			stcb->asoc.strmout[i].chunks_on_queues = oldstream[i].chunks_on_queues;
 			stcb->asoc.strmout[i].next_sequence_send = oldstream[i].next_sequence_send;
 			stcb->asoc.strmout[i].last_msg_incomplete = oldstream[i].last_msg_incomplete;
 			stcb->asoc.strmout[i].stream_no = i;
 			stcb->asoc.ss_functions.sctp_ss_init_stream(&stcb->asoc.strmout[i], &oldstream[i]);
 			/* now anything on those queues? */
 			TAILQ_FOREACH_SAFE(sp, &oldstream[i].outqueue, next, nsp) {
 				TAILQ_REMOVE(&oldstream[i].outqueue, sp, next);
 				TAILQ_INSERT_TAIL(&stcb->asoc.strmout[i].outqueue, sp, next);
 			}
 			/* Now move assoc pointers too */
 			if (stcb->asoc.last_out_stream == &oldstream[i]) {
 				stcb->asoc.last_out_stream = &stcb->asoc.strmout[i];
 			}
 			if (stcb->asoc.locked_on_sending == &oldstream[i]) {
 				stcb->asoc.locked_on_sending = &stcb->asoc.strmout[i];
 			}
 		}
 		/* now the new streams */
 		stcb->asoc.ss_functions.sctp_ss_init(stcb, &stcb->asoc, 1);
 		for (i = stcb->asoc.streamoutcnt; i < (stcb->asoc.streamoutcnt + adding_o); i++) {
 			TAILQ_INIT(&stcb->asoc.strmout[i].outqueue);
 			stcb->asoc.strmout[i].chunks_on_queues = 0;
 #if defined(SCTP_DETAILED_STR_STATS)
 			for (j = 0; j < SCTP_PR_SCTP_MAX + 1; j++) {
 				stcb->asoc.strmout[i].abandoned_sent[j] = 0;
 				stcb->asoc.strmout[i].abandoned_unsent[j] = 0;
 			}
 #else
 			stcb->asoc.strmout[i].abandoned_sent[0] = 0;
 			stcb->asoc.strmout[i].abandoned_unsent[0] = 0;
 #endif
 			stcb->asoc.strmout[i].next_sequence_send = 0x0;
 			stcb->asoc.strmout[i].stream_no = i;
 			stcb->asoc.strmout[i].last_msg_incomplete = 0;
 			stcb->asoc.ss_functions.sctp_ss_init_stream(&stcb->asoc.strmout[i], NULL);
 		}
 		stcb->asoc.strm_realoutsize = stcb->asoc.streamoutcnt + adding_o;
 		SCTP_FREE(oldstream, SCTP_M_STRMO);
 		SCTP_TCB_SEND_UNLOCK(stcb);
 	}
 skip_stuff:
 	if ((add_stream & 1) && (adding_o > 0)) {
 		asoc->strm_pending_add_size = adding_o;
 		asoc->peer_req_out = peer_asked;
 		sctp_add_an_out_stream(chk, seq, adding_o);
 		seq++;
 		asoc->stream_reset_outstanding++;
 	}
 	if ((add_stream & 2) && (adding_i > 0)) {
 		sctp_add_an_in_stream(chk, seq, adding_i);
 		seq++;
 		asoc->stream_reset_outstanding++;
 	}
 	if (send_in_req) {
 		sctp_add_stream_reset_in(chk, number_entries, list, seq);
 		seq++;
 		asoc->stream_reset_outstanding++;
 	}
 	if (send_tsn_req) {
 		sctp_add_stream_reset_tsn(chk, seq);
 		asoc->stream_reset_outstanding++;
 	}
 	asoc->str_reset = chk;
 	/* insert the chunk for sending */
 	TAILQ_INSERT_TAIL(&asoc->control_send_queue,
 	    chk,
 	    sctp_next);
 	asoc->ctrl_queue_cnt++;
 	sctp_timer_start(SCTP_TIMER_TYPE_STRRESET, stcb->sctp_ep, stcb, chk->whoTo);
 	return (0);
 }
 
 void
 sctp_send_abort(struct mbuf *m, int iphlen, struct sockaddr *src, struct sockaddr *dst,
     struct sctphdr *sh, uint32_t vtag, struct mbuf *cause,
     uint8_t mflowtype, uint32_t mflowid,
     uint32_t vrf_id, uint16_t port)
 {
 	/* Don't respond to an ABORT with an ABORT. */
 	if (sctp_is_there_an_abort_here(m, iphlen, &vtag)) {
 		if (cause)
 			sctp_m_freem(cause);
 		return;
 	}
 	sctp_send_resp_msg(src, dst, sh, vtag, SCTP_ABORT_ASSOCIATION, cause,
 	    mflowtype, mflowid,
 	    vrf_id, port);
 	return;
 }
 
 void
 sctp_send_operr_to(struct sockaddr *src, struct sockaddr *dst,
     struct sctphdr *sh, uint32_t vtag, struct mbuf *cause,
     uint8_t mflowtype, uint32_t mflowid,
     uint32_t vrf_id, uint16_t port)
 {
 	sctp_send_resp_msg(src, dst, sh, vtag, SCTP_OPERATION_ERROR, cause,
 	    mflowtype, mflowid,
 	    vrf_id, port);
 	return;
 }
 
 static struct mbuf *
 sctp_copy_resume(struct uio *uio,
     int max_send_len,
     int user_marks_eor,
     int *error,
     uint32_t * sndout,
     struct mbuf **new_tail)
 {
 	struct mbuf *m;
 
 	m = m_uiotombuf(uio, M_WAITOK, max_send_len, 0,
 	    (M_PKTHDR | (user_marks_eor ? M_EOR : 0)));
 	if (m == NULL) {
 		SCTP_LTRACE_ERR_RET(NULL, NULL, NULL, SCTP_FROM_SCTP_OUTPUT, ENOBUFS);
 		*error = ENOBUFS;
 	} else {
 		*sndout = m_length(m, NULL);
 		*new_tail = m_last(m);
 	}
 	return (m);
 }
 
 static int
 sctp_copy_one(struct sctp_stream_queue_pending *sp,
     struct uio *uio,
     int resv_upfront)
 {
 	int left;
 
 	left = sp->length;
 	sp->data = m_uiotombuf(uio, M_WAITOK, sp->length,
 	    resv_upfront, 0);
 	if (sp->data == NULL) {
 		SCTP_LTRACE_ERR_RET(NULL, NULL, NULL, SCTP_FROM_SCTP_OUTPUT, ENOBUFS);
 		return (ENOBUFS);
 	}
 	sp->tail_mbuf = m_last(sp->data);
 	return (0);
 }
 
 
 
 static struct sctp_stream_queue_pending *
 sctp_copy_it_in(struct sctp_tcb *stcb,
     struct sctp_association *asoc,
     struct sctp_sndrcvinfo *srcv,
     struct uio *uio,
     struct sctp_nets *net,
     int max_send_len,
     int user_marks_eor,
     int *error)
 {
 	/*-
 	 * This routine must be very careful in its work. Protocol
 	 * processing is up and running so care must be taken to spl...()
 	 * when you need to do something that may effect the stcb/asoc. The
 	 * sb is locked however. When data is copied the protocol processing
 	 * should be enabled since this is a slower operation...
 	 */
 	struct sctp_stream_queue_pending *sp = NULL;
 	int resv_in_first;
 
 	*error = 0;
 	/* Now can we send this? */
 	if ((SCTP_GET_STATE(asoc) == SCTP_STATE_SHUTDOWN_SENT) ||
 	    (SCTP_GET_STATE(asoc) == SCTP_STATE_SHUTDOWN_ACK_SENT) ||
 	    (SCTP_GET_STATE(asoc) == SCTP_STATE_SHUTDOWN_RECEIVED) ||
 	    (asoc->state & SCTP_STATE_SHUTDOWN_PENDING)) {
 		/* got data while shutting down */
 		SCTP_LTRACE_ERR_RET(NULL, stcb, NULL, SCTP_FROM_SCTP_OUTPUT, ECONNRESET);
 		*error = ECONNRESET;
 		goto out_now;
 	}
 	sctp_alloc_a_strmoq(stcb, sp);
 	if (sp == NULL) {
 		SCTP_LTRACE_ERR_RET(NULL, stcb, net, SCTP_FROM_SCTP_OUTPUT, ENOMEM);
 		*error = ENOMEM;
 		goto out_now;
 	}
 	sp->act_flags = 0;
 	sp->sender_all_done = 0;
 	sp->sinfo_flags = srcv->sinfo_flags;
 	sp->timetolive = srcv->sinfo_timetolive;
 	sp->ppid = srcv->sinfo_ppid;
 	sp->context = srcv->sinfo_context;
 	(void)SCTP_GETTIME_TIMEVAL(&sp->ts);
 
 	sp->stream = srcv->sinfo_stream;
 	sp->length = min(uio->uio_resid, max_send_len);
 	if ((sp->length == (uint32_t) uio->uio_resid) &&
 	    ((user_marks_eor == 0) ||
 	    (srcv->sinfo_flags & SCTP_EOF) ||
 	    (user_marks_eor && (srcv->sinfo_flags & SCTP_EOR)))) {
 		sp->msg_is_complete = 1;
 	} else {
 		sp->msg_is_complete = 0;
 	}
 	sp->sender_all_done = 0;
 	sp->some_taken = 0;
 	sp->put_last_out = 0;
 	resv_in_first = sizeof(struct sctp_data_chunk);
 	sp->data = sp->tail_mbuf = NULL;
 	if (sp->length == 0) {
 		*error = 0;
 		goto skip_copy;
 	}
 	if (srcv->sinfo_keynumber_valid) {
 		sp->auth_keyid = srcv->sinfo_keynumber;
 	} else {
 		sp->auth_keyid = stcb->asoc.authinfo.active_keyid;
 	}
 	if (sctp_auth_is_required_chunk(SCTP_DATA, stcb->asoc.peer_auth_chunks)) {
 		sctp_auth_key_acquire(stcb, sp->auth_keyid);
 		sp->holds_key_ref = 1;
 	}
 	*error = sctp_copy_one(sp, uio, resv_in_first);
 skip_copy:
 	if (*error) {
 		sctp_free_a_strmoq(stcb, sp, SCTP_SO_LOCKED);
 		sp = NULL;
 	} else {
 		if (sp->sinfo_flags & SCTP_ADDR_OVER) {
 			sp->net = net;
 			atomic_add_int(&sp->net->ref_count, 1);
 		} else {
 			sp->net = NULL;
 		}
 		sctp_set_prsctp_policy(sp);
 	}
 out_now:
 	return (sp);
 }
 
 
 int
 sctp_sosend(struct socket *so,
     struct sockaddr *addr,
     struct uio *uio,
     struct mbuf *top,
     struct mbuf *control,
     int flags,
     struct thread *p
 )
 {
 	int error, use_sndinfo = 0;
 	struct sctp_sndrcvinfo sndrcvninfo;
 	struct sockaddr *addr_to_use;
 
 #if defined(INET) && defined(INET6)
 	struct sockaddr_in sin;
 
 #endif
 
 	if (control) {
 		/* process cmsg snd/rcv info (maybe a assoc-id) */
 		if (sctp_find_cmsg(SCTP_SNDRCV, (void *)&sndrcvninfo, control,
 		    sizeof(sndrcvninfo))) {
 			/* got one */
 			use_sndinfo = 1;
 		}
 	}
 	addr_to_use = addr;
 #if defined(INET) && defined(INET6)
 	if ((addr) && (addr->sa_family == AF_INET6)) {
 		struct sockaddr_in6 *sin6;
 
 		sin6 = (struct sockaddr_in6 *)addr;
 		if (IN6_IS_ADDR_V4MAPPED(&sin6->sin6_addr)) {
 			in6_sin6_2_sin(&sin, sin6);
 			addr_to_use = (struct sockaddr *)&sin;
 		}
 	}
 #endif
 	error = sctp_lower_sosend(so, addr_to_use, uio, top,
 	    control,
 	    flags,
 	    use_sndinfo ? &sndrcvninfo : NULL
 	    ,p
 	    );
 	return (error);
 }
 
 
 int
 sctp_lower_sosend(struct socket *so,
     struct sockaddr *addr,
     struct uio *uio,
     struct mbuf *i_pak,
     struct mbuf *control,
     int flags,
     struct sctp_sndrcvinfo *srcv
     ,
     struct thread *p
 )
 {
 	unsigned int sndlen = 0, max_len;
 	int error, len;
 	struct mbuf *top = NULL;
 	int queue_only = 0, queue_only_for_init = 0;
 	int free_cnt_applied = 0;
 	int un_sent;
 	int now_filled = 0;
 	unsigned int inqueue_bytes = 0;
 	struct sctp_block_entry be;
 	struct sctp_inpcb *inp;
 	struct sctp_tcb *stcb = NULL;
 	struct timeval now;
 	struct sctp_nets *net;
 	struct sctp_association *asoc;
 	struct sctp_inpcb *t_inp;
 	int user_marks_eor;
 	int create_lock_applied = 0;
 	int nagle_applies = 0;
 	int some_on_control = 0;
 	int got_all_of_the_send = 0;
 	int hold_tcblock = 0;
 	int non_blocking = 0;
 	uint32_t local_add_more, local_soresv = 0;
 	uint16_t port;
 	uint16_t sinfo_flags;
 	sctp_assoc_t sinfo_assoc_id;
 
 	error = 0;
 	net = NULL;
 	stcb = NULL;
 	asoc = NULL;
 
 	t_inp = inp = (struct sctp_inpcb *)so->so_pcb;
 	if (inp == NULL) {
 		SCTP_LTRACE_ERR_RET(NULL, NULL, NULL, SCTP_FROM_SCTP_OUTPUT, EINVAL);
 		error = EINVAL;
 		if (i_pak) {
 			SCTP_RELEASE_PKT(i_pak);
 		}
 		return (error);
 	}
 	if ((uio == NULL) && (i_pak == NULL)) {
 		SCTP_LTRACE_ERR_RET(inp, stcb, net, SCTP_FROM_SCTP_OUTPUT, EINVAL);
 		return (EINVAL);
 	}
 	user_marks_eor = sctp_is_feature_on(inp, SCTP_PCB_FLAGS_EXPLICIT_EOR);
 	atomic_add_int(&inp->total_sends, 1);
 	if (uio) {
 		if (uio->uio_resid < 0) {
 			SCTP_LTRACE_ERR_RET(inp, stcb, net, SCTP_FROM_SCTP_OUTPUT, EINVAL);
 			return (EINVAL);
 		}
 		sndlen = uio->uio_resid;
 	} else {
 		top = SCTP_HEADER_TO_CHAIN(i_pak);
 		sndlen = SCTP_HEADER_LEN(i_pak);
 	}
 	SCTPDBG(SCTP_DEBUG_OUTPUT1, "Send called addr:%p send length %d\n",
 	    (void *)addr,
 	    sndlen);
 	if ((inp->sctp_flags & SCTP_PCB_FLAGS_TCPTYPE) &&
 	    (inp->sctp_socket->so_qlimit)) {
 		/* The listener can NOT send */
 		SCTP_LTRACE_ERR_RET(NULL, NULL, NULL, SCTP_FROM_SCTP_OUTPUT, ENOTCONN);
 		error = ENOTCONN;
 		goto out_unlocked;
 	}
 	/**
 	 * Pre-screen address, if one is given the sin-len
 	 * must be set correctly!
 	 */
 	if (addr) {
 		union sctp_sockstore *raddr = (union sctp_sockstore *)addr;
 
 		switch (raddr->sa.sa_family) {
 #ifdef INET
 		case AF_INET:
 			if (raddr->sin.sin_len != sizeof(struct sockaddr_in)) {
 				SCTP_LTRACE_ERR_RET(inp, stcb, net, SCTP_FROM_SCTP_OUTPUT, EINVAL);
 				error = EINVAL;
 				goto out_unlocked;
 			}
 			port = raddr->sin.sin_port;
 			break;
 #endif
 #ifdef INET6
 		case AF_INET6:
 			if (raddr->sin6.sin6_len != sizeof(struct sockaddr_in6)) {
 				SCTP_LTRACE_ERR_RET(inp, stcb, net, SCTP_FROM_SCTP_OUTPUT, EINVAL);
 				error = EINVAL;
 				goto out_unlocked;
 			}
 			port = raddr->sin6.sin6_port;
 			break;
 #endif
 		default:
 			SCTP_LTRACE_ERR_RET(inp, stcb, net, SCTP_FROM_SCTP_OUTPUT, EAFNOSUPPORT);
 			error = EAFNOSUPPORT;
 			goto out_unlocked;
 		}
 	} else
 		port = 0;
 
 	if (srcv) {
 		sinfo_flags = srcv->sinfo_flags;
 		sinfo_assoc_id = srcv->sinfo_assoc_id;
 		if (INVALID_SINFO_FLAG(sinfo_flags) ||
 		    PR_SCTP_INVALID_POLICY(sinfo_flags)) {
 			SCTP_LTRACE_ERR_RET(inp, stcb, net, SCTP_FROM_SCTP_OUTPUT, EINVAL);
 			error = EINVAL;
 			goto out_unlocked;
 		}
 		if (srcv->sinfo_flags)
 			SCTP_STAT_INCR(sctps_sends_with_flags);
 	} else {
 		sinfo_flags = inp->def_send.sinfo_flags;
 		sinfo_assoc_id = inp->def_send.sinfo_assoc_id;
 	}
 	if (sinfo_flags & SCTP_SENDALL) {
 		/* its a sendall */
 		error = sctp_sendall(inp, uio, top, srcv);
 		top = NULL;
 		goto out_unlocked;
 	}
 	if ((sinfo_flags & SCTP_ADDR_OVER) && (addr == NULL)) {
 		SCTP_LTRACE_ERR_RET(inp, stcb, net, SCTP_FROM_SCTP_OUTPUT, EINVAL);
 		error = EINVAL;
 		goto out_unlocked;
 	}
 	/* now we must find the assoc */
 	if ((inp->sctp_flags & SCTP_PCB_FLAGS_CONNECTED) ||
 	    (inp->sctp_flags & SCTP_PCB_FLAGS_IN_TCPPOOL)) {
 		SCTP_INP_RLOCK(inp);
 		stcb = LIST_FIRST(&inp->sctp_asoc_list);
 		if (stcb) {
 			SCTP_TCB_LOCK(stcb);
 			hold_tcblock = 1;
 		}
 		SCTP_INP_RUNLOCK(inp);
 	} else if (sinfo_assoc_id) {
 		stcb = sctp_findassociation_ep_asocid(inp, sinfo_assoc_id, 0);
 	} else if (addr) {
 		/*-
 		 * Since we did not use findep we must
 		 * increment it, and if we don't find a tcb
 		 * decrement it.
 		 */
 		SCTP_INP_WLOCK(inp);
 		SCTP_INP_INCR_REF(inp);
 		SCTP_INP_WUNLOCK(inp);
 		stcb = sctp_findassociation_ep_addr(&t_inp, addr, &net, NULL, NULL);
 		if (stcb == NULL) {
 			SCTP_INP_WLOCK(inp);
 			SCTP_INP_DECR_REF(inp);
 			SCTP_INP_WUNLOCK(inp);
 		} else {
 			hold_tcblock = 1;
 		}
 	}
 	if ((stcb == NULL) && (addr)) {
 		/* Possible implicit send? */
 		SCTP_ASOC_CREATE_LOCK(inp);
 		create_lock_applied = 1;
 		if ((inp->sctp_flags & SCTP_PCB_FLAGS_SOCKET_GONE) ||
 		    (inp->sctp_flags & SCTP_PCB_FLAGS_SOCKET_ALLGONE)) {
 			/* Should I really unlock ? */
 			SCTP_LTRACE_ERR_RET(NULL, NULL, NULL, SCTP_FROM_SCTP_OUTPUT, EINVAL);
 			error = EINVAL;
 			goto out_unlocked;
 
 		}
 		if (((inp->sctp_flags & SCTP_PCB_FLAGS_BOUND_V6) == 0) &&
 		    (addr->sa_family == AF_INET6)) {
 			SCTP_LTRACE_ERR_RET(inp, stcb, net, SCTP_FROM_SCTP_OUTPUT, EINVAL);
 			error = EINVAL;
 			goto out_unlocked;
 		}
 		SCTP_INP_WLOCK(inp);
 		SCTP_INP_INCR_REF(inp);
 		SCTP_INP_WUNLOCK(inp);
 		/* With the lock applied look again */
 		stcb = sctp_findassociation_ep_addr(&t_inp, addr, &net, NULL, NULL);
 		if ((stcb == NULL) && (control != NULL) && (port > 0)) {
 			stcb = sctp_findassociation_cmsgs(&t_inp, port, control, &net, &error);
 		}
 		if (stcb == NULL) {
 			SCTP_INP_WLOCK(inp);
 			SCTP_INP_DECR_REF(inp);
 			SCTP_INP_WUNLOCK(inp);
 		} else {
 			hold_tcblock = 1;
 		}
 		if (error) {
 			goto out_unlocked;
 		}
 		if (t_inp != inp) {
 			SCTP_LTRACE_ERR_RET(inp, stcb, net, SCTP_FROM_SCTP_OUTPUT, ENOTCONN);
 			error = ENOTCONN;
 			goto out_unlocked;
 		}
 	}
 	if (stcb == NULL) {
 		if (addr == NULL) {
 			SCTP_LTRACE_ERR_RET(inp, stcb, net, SCTP_FROM_SCTP_OUTPUT, ENOENT);
 			error = ENOENT;
 			goto out_unlocked;
 		} else {
 			/* We must go ahead and start the INIT process */
 			uint32_t vrf_id;
 
 			if ((sinfo_flags & SCTP_ABORT) ||
 			    ((sinfo_flags & SCTP_EOF) && (sndlen == 0))) {
 				/*-
 				 * User asks to abort a non-existant assoc,
 				 * or EOF a non-existant assoc with no data
 				 */
 				SCTP_LTRACE_ERR_RET(inp, stcb, net, SCTP_FROM_SCTP_OUTPUT, ENOENT);
 				error = ENOENT;
 				goto out_unlocked;
 			}
 			/* get an asoc/stcb struct */
 			vrf_id = inp->def_vrf_id;
 #ifdef INVARIANTS
 			if (create_lock_applied == 0) {
 				panic("Error, should hold create lock and I don't?");
 			}
 #endif
 			stcb = sctp_aloc_assoc(inp, addr, &error, 0, vrf_id,
 			    p
 			    );
 			if (stcb == NULL) {
 				/* Error is setup for us in the call */
 				goto out_unlocked;
 			}
 			if (stcb->sctp_ep->sctp_flags & SCTP_PCB_FLAGS_TCPTYPE) {
 				stcb->sctp_ep->sctp_flags |= SCTP_PCB_FLAGS_CONNECTED;
 				/*
 				 * Set the connected flag so we can queue
 				 * data
 				 */
 				soisconnecting(so);
 			}
 			hold_tcblock = 1;
 			if (create_lock_applied) {
 				SCTP_ASOC_CREATE_UNLOCK(inp);
 				create_lock_applied = 0;
 			} else {
 				SCTP_PRINTF("Huh-3? create lock should have been on??\n");
 			}
 			/*
 			 * Turn on queue only flag to prevent data from
 			 * being sent
 			 */
 			queue_only = 1;
 			asoc = &stcb->asoc;
 			SCTP_SET_STATE(asoc, SCTP_STATE_COOKIE_WAIT);
 			(void)SCTP_GETTIME_TIMEVAL(&asoc->time_entered);
 
 			/* initialize authentication params for the assoc */
 			sctp_initialize_auth_params(inp, stcb);
 
 			if (control) {
 				if (sctp_process_cmsgs_for_init(stcb, control, &error)) {
 					sctp_free_assoc(inp, stcb, SCTP_PCBFREE_FORCE, SCTP_FROM_SCTP_OUTPUT + SCTP_LOC_7);
 					hold_tcblock = 0;
 					stcb = NULL;
 					goto out_unlocked;
 				}
 			}
 			/* out with the INIT */
 			queue_only_for_init = 1;
 			/*-
 			 * we may want to dig in after this call and adjust the MTU
 			 * value. It defaulted to 1500 (constant) but the ro
 			 * structure may now have an update and thus we may need to
 			 * change it BEFORE we append the message.
 			 */
 		}
 	} else
 		asoc = &stcb->asoc;
 	if (srcv == NULL)
 		srcv = (struct sctp_sndrcvinfo *)&asoc->def_send;
 	if (srcv->sinfo_flags & SCTP_ADDR_OVER) {
 		if (addr)
 			net = sctp_findnet(stcb, addr);
 		else
 			net = NULL;
 		if ((net == NULL) ||
 		    ((port != 0) && (port != stcb->rport))) {
 			SCTP_LTRACE_ERR_RET(inp, stcb, net, SCTP_FROM_SCTP_OUTPUT, EINVAL);
 			error = EINVAL;
 			goto out_unlocked;
 		}
 	} else {
 		if (stcb->asoc.alternate) {
 			net = stcb->asoc.alternate;
 		} else {
 			net = stcb->asoc.primary_destination;
 		}
 	}
 	atomic_add_int(&stcb->total_sends, 1);
 	/* Keep the stcb from being freed under our feet */
 	atomic_add_int(&asoc->refcnt, 1);
 	free_cnt_applied = 1;
 
 	if (sctp_is_feature_on(inp, SCTP_PCB_FLAGS_NO_FRAGMENT)) {
 		if (sndlen > asoc->smallest_mtu) {
 			SCTP_LTRACE_ERR_RET(inp, stcb, net, SCTP_FROM_SCTP_OUTPUT, EMSGSIZE);
 			error = EMSGSIZE;
 			goto out_unlocked;
 		}
 	}
 	if (SCTP_SO_IS_NBIO(so)
 	    || (flags & MSG_NBIO)
 	    ) {
 		non_blocking = 1;
 	}
 	/* would we block? */
 	if (non_blocking) {
 		if (hold_tcblock == 0) {
 			SCTP_TCB_LOCK(stcb);
 			hold_tcblock = 1;
 		}
 		inqueue_bytes = stcb->asoc.total_output_queue_size - (stcb->asoc.chunks_on_out_queue * sizeof(struct sctp_data_chunk));
 		if ((SCTP_SB_LIMIT_SND(so) < (sndlen + inqueue_bytes + stcb->asoc.sb_send_resv)) ||
 		    (stcb->asoc.chunks_on_out_queue >= SCTP_BASE_SYSCTL(sctp_max_chunks_on_queue))) {
 			SCTP_LTRACE_ERR_RET(inp, stcb, net, SCTP_FROM_SCTP_OUTPUT, EWOULDBLOCK);
 			if (sndlen > SCTP_SB_LIMIT_SND(so))
 				error = EMSGSIZE;
 			else
 				error = EWOULDBLOCK;
 			goto out_unlocked;
 		}
 		stcb->asoc.sb_send_resv += sndlen;
 		SCTP_TCB_UNLOCK(stcb);
 		hold_tcblock = 0;
 	} else {
 		atomic_add_int(&stcb->asoc.sb_send_resv, sndlen);
 	}
 	local_soresv = sndlen;
 	if (stcb->asoc.state & SCTP_STATE_ABOUT_TO_BE_FREED) {
 		SCTP_LTRACE_ERR_RET(NULL, stcb, NULL, SCTP_FROM_SCTP_OUTPUT, ECONNRESET);
 		error = ECONNRESET;
 		goto out_unlocked;
 	}
 	if (create_lock_applied) {
 		SCTP_ASOC_CREATE_UNLOCK(inp);
 		create_lock_applied = 0;
 	}
 	if (asoc->stream_reset_outstanding) {
 		/*
 		 * Can't queue any data while stream reset is underway.
 		 */
 		SCTP_LTRACE_ERR_RET(inp, stcb, net, SCTP_FROM_SCTP_OUTPUT, EAGAIN);
 		error = EAGAIN;
 		goto out_unlocked;
 	}
 	if ((SCTP_GET_STATE(asoc) == SCTP_STATE_COOKIE_WAIT) ||
 	    (SCTP_GET_STATE(asoc) == SCTP_STATE_COOKIE_ECHOED)) {
 		queue_only = 1;
 	}
 	/* we are now done with all control */
 	if (control) {
 		sctp_m_freem(control);
 		control = NULL;
 	}
 	if ((SCTP_GET_STATE(asoc) == SCTP_STATE_SHUTDOWN_SENT) ||
 	    (SCTP_GET_STATE(asoc) == SCTP_STATE_SHUTDOWN_RECEIVED) ||
 	    (SCTP_GET_STATE(asoc) == SCTP_STATE_SHUTDOWN_ACK_SENT) ||
 	    (asoc->state & SCTP_STATE_SHUTDOWN_PENDING)) {
 		if (srcv->sinfo_flags & SCTP_ABORT) {
 			;
 		} else {
 			SCTP_LTRACE_ERR_RET(NULL, stcb, NULL, SCTP_FROM_SCTP_OUTPUT, ECONNRESET);
 			error = ECONNRESET;
 			goto out_unlocked;
 		}
 	}
 	/* Ok, we will attempt a msgsnd :> */
 	if (p) {
 		p->td_ru.ru_msgsnd++;
 	}
 	/* Are we aborting? */
 	if (srcv->sinfo_flags & SCTP_ABORT) {
 		struct mbuf *mm;
 		int tot_demand, tot_out = 0, max_out;
 
 		SCTP_STAT_INCR(sctps_sends_with_abort);
 		if ((SCTP_GET_STATE(asoc) == SCTP_STATE_COOKIE_WAIT) ||
 		    (SCTP_GET_STATE(asoc) == SCTP_STATE_COOKIE_ECHOED)) {
 			/* It has to be up before we abort */
 			/* how big is the user initiated abort? */
 			SCTP_LTRACE_ERR_RET(inp, stcb, net, SCTP_FROM_SCTP_OUTPUT, EINVAL);
 			error = EINVAL;
 			goto out;
 		}
 		if (hold_tcblock) {
 			SCTP_TCB_UNLOCK(stcb);
 			hold_tcblock = 0;
 		}
 		if (top) {
 			struct mbuf *cntm = NULL;
 
 			mm = sctp_get_mbuf_for_msg(sizeof(struct sctp_paramhdr), 0, M_WAITOK, 1, MT_DATA);
 			if (sndlen != 0) {
 				for (cntm = top; cntm; cntm = SCTP_BUF_NEXT(cntm)) {
 					tot_out += SCTP_BUF_LEN(cntm);
 				}
 			}
 		} else {
 			/* Must fit in a MTU */
 			tot_out = sndlen;
 			tot_demand = (tot_out + sizeof(struct sctp_paramhdr));
 			if (tot_demand > SCTP_DEFAULT_ADD_MORE) {
 				/* To big */
 				SCTP_LTRACE_ERR_RET(NULL, stcb, net, SCTP_FROM_SCTP_OUTPUT, EMSGSIZE);
 				error = EMSGSIZE;
 				goto out;
 			}
 			mm = sctp_get_mbuf_for_msg(tot_demand, 0, M_WAITOK, 1, MT_DATA);
 		}
 		if (mm == NULL) {
 			SCTP_LTRACE_ERR_RET(NULL, stcb, net, SCTP_FROM_SCTP_OUTPUT, ENOMEM);
 			error = ENOMEM;
 			goto out;
 		}
 		max_out = asoc->smallest_mtu - sizeof(struct sctp_paramhdr);
 		max_out -= sizeof(struct sctp_abort_msg);
 		if (tot_out > max_out) {
 			tot_out = max_out;
 		}
 		if (mm) {
 			struct sctp_paramhdr *ph;
 
 			/* now move forward the data pointer */
 			ph = mtod(mm, struct sctp_paramhdr *);
 			ph->param_type = htons(SCTP_CAUSE_USER_INITIATED_ABT);
 			ph->param_length = htons(sizeof(struct sctp_paramhdr) + tot_out);
 			ph++;
 			SCTP_BUF_LEN(mm) = tot_out + sizeof(struct sctp_paramhdr);
 			if (top == NULL) {
 				error = uiomove((caddr_t)ph, (int)tot_out, uio);
 				if (error) {
 					/*-
 					 * Here if we can't get his data we
 					 * still abort we just don't get to
 					 * send the users note :-0
 					 */
 					sctp_m_freem(mm);
 					mm = NULL;
 				}
 			} else {
 				if (sndlen != 0) {
 					SCTP_BUF_NEXT(mm) = top;
 				}
 			}
 		}
 		if (hold_tcblock == 0) {
 			SCTP_TCB_LOCK(stcb);
 		}
 		atomic_add_int(&stcb->asoc.refcnt, -1);
 		free_cnt_applied = 0;
 		/* release this lock, otherwise we hang on ourselves */
 		sctp_abort_an_association(stcb->sctp_ep, stcb, mm, SCTP_SO_LOCKED);
 		/* now relock the stcb so everything is sane */
 		hold_tcblock = 0;
 		stcb = NULL;
 		/*
 		 * In this case top is already chained to mm avoid double
 		 * free, since we free it below if top != NULL and driver
 		 * would free it after sending the packet out
 		 */
 		if (sndlen != 0) {
 			top = NULL;
 		}
 		goto out_unlocked;
 	}
 	/* Calculate the maximum we can send */
 	inqueue_bytes = stcb->asoc.total_output_queue_size - (stcb->asoc.chunks_on_out_queue * sizeof(struct sctp_data_chunk));
 	if (SCTP_SB_LIMIT_SND(so) > inqueue_bytes) {
 		if (non_blocking) {
 			/* we already checked for non-blocking above. */
 			max_len = sndlen;
 		} else {
 			max_len = SCTP_SB_LIMIT_SND(so) - inqueue_bytes;
 		}
 	} else {
 		max_len = 0;
 	}
 	if (hold_tcblock) {
 		SCTP_TCB_UNLOCK(stcb);
 		hold_tcblock = 0;
 	}
 	/* Is the stream no. valid? */
 	if (srcv->sinfo_stream >= asoc->streamoutcnt) {
 		/* Invalid stream number */
 		SCTP_LTRACE_ERR_RET(inp, stcb, net, SCTP_FROM_SCTP_OUTPUT, EINVAL);
 		error = EINVAL;
 		goto out_unlocked;
 	}
 	if (asoc->strmout == NULL) {
 		/* huh? software error */
 		SCTP_LTRACE_ERR_RET(inp, stcb, net, SCTP_FROM_SCTP_OUTPUT, EFAULT);
 		error = EFAULT;
 		goto out_unlocked;
 	}
 	/* Unless E_EOR mode is on, we must make a send FIT in one call. */
 	if ((user_marks_eor == 0) &&
 	    (sndlen > SCTP_SB_LIMIT_SND(stcb->sctp_socket))) {
 		/* It will NEVER fit */
 		SCTP_LTRACE_ERR_RET(NULL, stcb, net, SCTP_FROM_SCTP_OUTPUT, EMSGSIZE);
 		error = EMSGSIZE;
 		goto out_unlocked;
 	}
 	if ((uio == NULL) && user_marks_eor) {
 		/*-
 		 * We do not support eeor mode for
 		 * sending with mbuf chains (like sendfile).
 		 */
 		SCTP_LTRACE_ERR_RET(NULL, stcb, net, SCTP_FROM_SCTP_OUTPUT, EINVAL);
 		error = EINVAL;
 		goto out_unlocked;
 	}
 	if (user_marks_eor) {
 		local_add_more = min(SCTP_SB_LIMIT_SND(so), SCTP_BASE_SYSCTL(sctp_add_more_threshold));
 	} else {
 		/*-
 		 * For non-eeor the whole message must fit in
 		 * the socket send buffer.
 		 */
 		local_add_more = sndlen;
 	}
 	len = 0;
 	if (non_blocking) {
 		goto skip_preblock;
 	}
 	if (((max_len <= local_add_more) &&
 	    (SCTP_SB_LIMIT_SND(so) >= local_add_more)) ||
 	    (max_len == 0) ||
 	    ((stcb->asoc.chunks_on_out_queue + stcb->asoc.stream_queue_cnt) >= SCTP_BASE_SYSCTL(sctp_max_chunks_on_queue))) {
 		/* No room right now ! */
 		SOCKBUF_LOCK(&so->so_snd);
 		inqueue_bytes = stcb->asoc.total_output_queue_size - (stcb->asoc.chunks_on_out_queue * sizeof(struct sctp_data_chunk));
 		while ((SCTP_SB_LIMIT_SND(so) < (inqueue_bytes + local_add_more)) ||
 		    ((stcb->asoc.stream_queue_cnt + stcb->asoc.chunks_on_out_queue) >= SCTP_BASE_SYSCTL(sctp_max_chunks_on_queue))) {
 			SCTPDBG(SCTP_DEBUG_OUTPUT1, "pre_block limit:%u <(inq:%d + %d) || (%d+%d > %d)\n",
 			    (unsigned int)SCTP_SB_LIMIT_SND(so),
 			    inqueue_bytes,
 			    local_add_more,
 			    stcb->asoc.stream_queue_cnt,
 			    stcb->asoc.chunks_on_out_queue,
 			    SCTP_BASE_SYSCTL(sctp_max_chunks_on_queue));
 			if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_BLK_LOGGING_ENABLE) {
 				sctp_log_block(SCTP_BLOCK_LOG_INTO_BLKA, asoc, sndlen);
 			}
 			be.error = 0;
 			stcb->block_entry = &be;
 			error = sbwait(&so->so_snd);
 			stcb->block_entry = NULL;
 			if (error || so->so_error || be.error) {
 				if (error == 0) {
 					if (so->so_error)
 						error = so->so_error;
 					if (be.error) {
 						error = be.error;
 					}
 				}
 				SOCKBUF_UNLOCK(&so->so_snd);
 				goto out_unlocked;
 			}
 			if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_BLK_LOGGING_ENABLE) {
 				sctp_log_block(SCTP_BLOCK_LOG_OUTOF_BLK,
 				    asoc, stcb->asoc.total_output_queue_size);
 			}
 			if (stcb->asoc.state & SCTP_STATE_ABOUT_TO_BE_FREED) {
 				goto out_unlocked;
 			}
 			inqueue_bytes = stcb->asoc.total_output_queue_size - (stcb->asoc.chunks_on_out_queue * sizeof(struct sctp_data_chunk));
 		}
 		if (SCTP_SB_LIMIT_SND(so) > inqueue_bytes) {
 			max_len = SCTP_SB_LIMIT_SND(so) - inqueue_bytes;
 		} else {
 			max_len = 0;
 		}
 		SOCKBUF_UNLOCK(&so->so_snd);
 	}
 skip_preblock:
 	if (stcb->asoc.state & SCTP_STATE_ABOUT_TO_BE_FREED) {
 		goto out_unlocked;
 	}
 	/*
 	 * sndlen covers for mbuf case uio_resid covers for the non-mbuf
 	 * case NOTE: uio will be null when top/mbuf is passed
 	 */
 	if (sndlen == 0) {
 		if (srcv->sinfo_flags & SCTP_EOF) {
 			got_all_of_the_send = 1;
 			goto dataless_eof;
 		} else {
 			SCTP_LTRACE_ERR_RET(inp, stcb, net, SCTP_FROM_SCTP_OUTPUT, EINVAL);
 			error = EINVAL;
 			goto out;
 		}
 	}
 	if (top == NULL) {
 		struct sctp_stream_queue_pending *sp;
 		struct sctp_stream_out *strm;
 		uint32_t sndout;
 
 		SCTP_TCB_SEND_LOCK(stcb);
 		if ((asoc->stream_locked) &&
 		    (asoc->stream_locked_on != srcv->sinfo_stream)) {
 			SCTP_TCB_SEND_UNLOCK(stcb);
 			SCTP_LTRACE_ERR_RET(inp, stcb, net, SCTP_FROM_SCTP_OUTPUT, EINVAL);
 			error = EINVAL;
 			goto out;
 		}
 		SCTP_TCB_SEND_UNLOCK(stcb);
 
 		strm = &stcb->asoc.strmout[srcv->sinfo_stream];
 		if (strm->last_msg_incomplete == 0) {
 	do_a_copy_in:
 			sp = sctp_copy_it_in(stcb, asoc, srcv, uio, net, max_len, user_marks_eor, &error);
 			if ((sp == NULL) || (error)) {
 				goto out;
 			}
 			SCTP_TCB_SEND_LOCK(stcb);
 			if (sp->msg_is_complete) {
 				strm->last_msg_incomplete = 0;
 				asoc->stream_locked = 0;
 			} else {
 				/*
 				 * Just got locked to this guy in case of an
 				 * interrupt.
 				 */
 				strm->last_msg_incomplete = 1;
 				asoc->stream_locked = 1;
 				asoc->stream_locked_on = srcv->sinfo_stream;
 				sp->sender_all_done = 0;
 			}
 			sctp_snd_sb_alloc(stcb, sp->length);
 			atomic_add_int(&asoc->stream_queue_cnt, 1);
 			if (srcv->sinfo_flags & SCTP_UNORDERED) {
 				SCTP_STAT_INCR(sctps_sends_with_unord);
 			}
 			TAILQ_INSERT_TAIL(&strm->outqueue, sp, next);
 			stcb->asoc.ss_functions.sctp_ss_add_to_stream(stcb, asoc, strm, sp, 1);
 			SCTP_TCB_SEND_UNLOCK(stcb);
 		} else {
 			SCTP_TCB_SEND_LOCK(stcb);
 			sp = TAILQ_LAST(&strm->outqueue, sctp_streamhead);
 			SCTP_TCB_SEND_UNLOCK(stcb);
 			if (sp == NULL) {
 				/* ???? Huh ??? last msg is gone */
 #ifdef INVARIANTS
 				panic("Warning: Last msg marked incomplete, yet nothing left?");
 #else
 				SCTP_PRINTF("Warning: Last msg marked incomplete, yet nothing left?\n");
 				strm->last_msg_incomplete = 0;
 #endif
 				goto do_a_copy_in;
 
 			}
 		}
 		while (uio->uio_resid > 0) {
 			/* How much room do we have? */
 			struct mbuf *new_tail, *mm;
 
 			if (SCTP_SB_LIMIT_SND(so) > stcb->asoc.total_output_queue_size)
 				max_len = SCTP_SB_LIMIT_SND(so) - stcb->asoc.total_output_queue_size;
 			else
 				max_len = 0;
 
 			if ((max_len > SCTP_BASE_SYSCTL(sctp_add_more_threshold)) ||
 			    (max_len && (SCTP_SB_LIMIT_SND(so) < SCTP_BASE_SYSCTL(sctp_add_more_threshold))) ||
 			    (uio->uio_resid && (uio->uio_resid <= (int)max_len))) {
 				sndout = 0;
 				new_tail = NULL;
 				if (hold_tcblock) {
 					SCTP_TCB_UNLOCK(stcb);
 					hold_tcblock = 0;
 				}
 				mm = sctp_copy_resume(uio, max_len, user_marks_eor, &error, &sndout, &new_tail);
 				if ((mm == NULL) || error) {
 					if (mm) {
 						sctp_m_freem(mm);
 					}
 					goto out;
 				}
 				/* Update the mbuf and count */
 				SCTP_TCB_SEND_LOCK(stcb);
 				if (stcb->asoc.state & SCTP_STATE_ABOUT_TO_BE_FREED) {
 					/*
 					 * we need to get out. Peer probably
 					 * aborted.
 					 */
 					sctp_m_freem(mm);
 					if (stcb->asoc.state & SCTP_PCB_FLAGS_WAS_ABORTED) {
 						SCTP_LTRACE_ERR_RET(NULL, stcb, NULL, SCTP_FROM_SCTP_OUTPUT, ECONNRESET);
 						error = ECONNRESET;
 					}
 					SCTP_TCB_SEND_UNLOCK(stcb);
 					goto out;
 				}
 				if (sp->tail_mbuf) {
 					/* tack it to the end */
 					SCTP_BUF_NEXT(sp->tail_mbuf) = mm;
 					sp->tail_mbuf = new_tail;
 				} else {
 					/* A stolen mbuf */
 					sp->data = mm;
 					sp->tail_mbuf = new_tail;
 				}
 				sctp_snd_sb_alloc(stcb, sndout);
 				atomic_add_int(&sp->length, sndout);
 				len += sndout;
 
 				/* Did we reach EOR? */
 				if ((uio->uio_resid == 0) &&
 				    ((user_marks_eor == 0) ||
 				    (srcv->sinfo_flags & SCTP_EOF) ||
 				    (user_marks_eor && (srcv->sinfo_flags & SCTP_EOR)))) {
 					sp->msg_is_complete = 1;
 				} else {
 					sp->msg_is_complete = 0;
 				}
 				SCTP_TCB_SEND_UNLOCK(stcb);
 			}
 			if (uio->uio_resid == 0) {
 				/* got it all? */
 				continue;
 			}
 			/* PR-SCTP? */
 			if ((asoc->prsctp_supported) && (asoc->sent_queue_cnt_removeable > 0)) {
 				/*
 				 * This is ugly but we must assure locking
 				 * order
 				 */
 				if (hold_tcblock == 0) {
 					SCTP_TCB_LOCK(stcb);
 					hold_tcblock = 1;
 				}
 				sctp_prune_prsctp(stcb, asoc, srcv, sndlen);
 				inqueue_bytes = stcb->asoc.total_output_queue_size - (stcb->asoc.chunks_on_out_queue * sizeof(struct sctp_data_chunk));
 				if (SCTP_SB_LIMIT_SND(so) > stcb->asoc.total_output_queue_size)
 					max_len = SCTP_SB_LIMIT_SND(so) - inqueue_bytes;
 				else
 					max_len = 0;
 				if (max_len > 0) {
 					continue;
 				}
 				SCTP_TCB_UNLOCK(stcb);
 				hold_tcblock = 0;
 			}
 			/* wait for space now */
 			if (non_blocking) {
 				/* Non-blocking io in place out */
 				goto skip_out_eof;
 			}
 			/* What about the INIT, send it maybe */
 			if (queue_only_for_init) {
 				if (hold_tcblock == 0) {
 					SCTP_TCB_LOCK(stcb);
 					hold_tcblock = 1;
 				}
 				if (SCTP_GET_STATE(&stcb->asoc) == SCTP_STATE_OPEN) {
 					/* a collision took us forward? */
 					queue_only = 0;
 				} else {
 					sctp_send_initiate(inp, stcb, SCTP_SO_LOCKED);
 					SCTP_SET_STATE(asoc, SCTP_STATE_COOKIE_WAIT);
 					queue_only = 1;
 				}
 			}
 			if ((net->flight_size > net->cwnd) &&
 			    (asoc->sctp_cmt_on_off == 0)) {
 				SCTP_STAT_INCR(sctps_send_cwnd_avoid);
 				queue_only = 1;
 			} else if (asoc->ifp_had_enobuf) {
 				SCTP_STAT_INCR(sctps_ifnomemqueued);
 				if (net->flight_size > (2 * net->mtu)) {
 					queue_only = 1;
 				}
 				asoc->ifp_had_enobuf = 0;
 			}
 			un_sent = ((stcb->asoc.total_output_queue_size - stcb->asoc.total_flight) +
 			    (stcb->asoc.stream_queue_cnt * sizeof(struct sctp_data_chunk)));
 			if ((sctp_is_feature_off(inp, SCTP_PCB_FLAGS_NODELAY)) &&
 			    (stcb->asoc.total_flight > 0) &&
 			    (stcb->asoc.stream_queue_cnt < SCTP_MAX_DATA_BUNDLING) &&
 			    (un_sent < (int)(stcb->asoc.smallest_mtu - SCTP_MIN_OVERHEAD))) {
 
 				/*-
 				 * Ok, Nagle is set on and we have data outstanding.
 				 * Don't send anything and let SACKs drive out the
 				 * data unless wen have a "full" segment to send.
 				 */
 				if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_NAGLE_LOGGING_ENABLE) {
 					sctp_log_nagle_event(stcb, SCTP_NAGLE_APPLIED);
 				}
 				SCTP_STAT_INCR(sctps_naglequeued);
 				nagle_applies = 1;
 			} else {
 				if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_NAGLE_LOGGING_ENABLE) {
 					if (sctp_is_feature_off(inp, SCTP_PCB_FLAGS_NODELAY))
 						sctp_log_nagle_event(stcb, SCTP_NAGLE_SKIPPED);
 				}
 				SCTP_STAT_INCR(sctps_naglesent);
 				nagle_applies = 0;
 			}
 			if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_BLK_LOGGING_ENABLE) {
 
 				sctp_misc_ints(SCTP_CWNDLOG_PRESEND, queue_only_for_init, queue_only,
 				    nagle_applies, un_sent);
 				sctp_misc_ints(SCTP_CWNDLOG_PRESEND, stcb->asoc.total_output_queue_size,
 				    stcb->asoc.total_flight,
 				    stcb->asoc.chunks_on_out_queue, stcb->asoc.total_flight_count);
 			}
 			if (queue_only_for_init)
 				queue_only_for_init = 0;
 			if ((queue_only == 0) && (nagle_applies == 0)) {
 				/*-
 				 * need to start chunk output
 				 * before blocking.. note that if
 				 * a lock is already applied, then
 				 * the input via the net is happening
 				 * and I don't need to start output :-D
 				 */
 				if (hold_tcblock == 0) {
 					if (SCTP_TCB_TRYLOCK(stcb)) {
 						hold_tcblock = 1;
 						sctp_chunk_output(inp,
 						    stcb,
 						    SCTP_OUTPUT_FROM_USR_SEND, SCTP_SO_LOCKED);
 					}
 				} else {
 					sctp_chunk_output(inp,
 					    stcb,
 					    SCTP_OUTPUT_FROM_USR_SEND, SCTP_SO_LOCKED);
 				}
 				if (hold_tcblock == 1) {
 					SCTP_TCB_UNLOCK(stcb);
 					hold_tcblock = 0;
 				}
 			}
 			SOCKBUF_LOCK(&so->so_snd);
 			/*-
 			 * This is a bit strange, but I think it will
 			 * work. The total_output_queue_size is locked and
 			 * protected by the TCB_LOCK, which we just released.
 			 * There is a race that can occur between releasing it
 			 * above, and me getting the socket lock, where sacks
 			 * come in but we have not put the SB_WAIT on the
 			 * so_snd buffer to get the wakeup. After the LOCK
 			 * is applied the sack_processing will also need to
 			 * LOCK the so->so_snd to do the actual sowwakeup(). So
 			 * once we have the socket buffer lock if we recheck the
 			 * size we KNOW we will get to sleep safely with the
 			 * wakeup flag in place.
 			 */
 			if (SCTP_SB_LIMIT_SND(so) <= (stcb->asoc.total_output_queue_size +
 			    min(SCTP_BASE_SYSCTL(sctp_add_more_threshold), SCTP_SB_LIMIT_SND(so)))) {
 				if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_BLK_LOGGING_ENABLE) {
 					sctp_log_block(SCTP_BLOCK_LOG_INTO_BLK,
 					    asoc, uio->uio_resid);
 				}
 				be.error = 0;
 				stcb->block_entry = &be;
 				error = sbwait(&so->so_snd);
 				stcb->block_entry = NULL;
 
 				if (error || so->so_error || be.error) {
 					if (error == 0) {
 						if (so->so_error)
 							error = so->so_error;
 						if (be.error) {
 							error = be.error;
 						}
 					}
 					SOCKBUF_UNLOCK(&so->so_snd);
 					goto out_unlocked;
 				}
 				if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_BLK_LOGGING_ENABLE) {
 					sctp_log_block(SCTP_BLOCK_LOG_OUTOF_BLK,
 					    asoc, stcb->asoc.total_output_queue_size);
 				}
 			}
 			SOCKBUF_UNLOCK(&so->so_snd);
 			if (stcb->asoc.state & SCTP_STATE_ABOUT_TO_BE_FREED) {
 				goto out_unlocked;
 			}
 		}
 		SCTP_TCB_SEND_LOCK(stcb);
 		if (sp) {
 			if (sp->msg_is_complete == 0) {
 				strm->last_msg_incomplete = 1;
 				asoc->stream_locked = 1;
 				asoc->stream_locked_on = srcv->sinfo_stream;
 			} else {
 				sp->sender_all_done = 1;
 				strm->last_msg_incomplete = 0;
 				asoc->stream_locked = 0;
 			}
 		} else {
 			SCTP_PRINTF("Huh no sp TSNH?\n");
 			strm->last_msg_incomplete = 0;
 			asoc->stream_locked = 0;
 		}
 		SCTP_TCB_SEND_UNLOCK(stcb);
 		if (uio->uio_resid == 0) {
 			got_all_of_the_send = 1;
 		}
 	} else {
 		/* We send in a 0, since we do NOT have any locks */
 		error = sctp_msg_append(stcb, net, top, srcv, 0);
 		top = NULL;
 		if (srcv->sinfo_flags & SCTP_EOF) {
 			/*
 			 * This should only happen for Panda for the mbuf
 			 * send case, which does NOT yet support EEOR mode.
 			 * Thus, we can just set this flag to do the proper
 			 * EOF handling.
 			 */
 			got_all_of_the_send = 1;
 		}
 	}
 	if (error) {
 		goto out;
 	}
 dataless_eof:
 	/* EOF thing ? */
 	if ((srcv->sinfo_flags & SCTP_EOF) &&
 	    (got_all_of_the_send == 1)) {
 		int cnt;
 
 		SCTP_STAT_INCR(sctps_sends_with_eof);
 		error = 0;
 		if (hold_tcblock == 0) {
 			SCTP_TCB_LOCK(stcb);
 			hold_tcblock = 1;
 		}
 		cnt = sctp_is_there_unsent_data(stcb, SCTP_SO_LOCKED);
 		if (TAILQ_EMPTY(&asoc->send_queue) &&
 		    TAILQ_EMPTY(&asoc->sent_queue) &&
 		    (cnt == 0)) {
 			if (asoc->locked_on_sending) {
 				goto abort_anyway;
 			}
 			/* there is nothing queued to send, so I'm done... */
 			if ((SCTP_GET_STATE(asoc) != SCTP_STATE_SHUTDOWN_SENT) &&
 			    (SCTP_GET_STATE(asoc) != SCTP_STATE_SHUTDOWN_RECEIVED) &&
 			    (SCTP_GET_STATE(asoc) != SCTP_STATE_SHUTDOWN_ACK_SENT)) {
 				struct sctp_nets *netp;
 
 				/* only send SHUTDOWN the first time through */
 				if (SCTP_GET_STATE(asoc) == SCTP_STATE_OPEN) {
 					SCTP_STAT_DECR_GAUGE32(sctps_currestab);
 				}
 				SCTP_SET_STATE(asoc, SCTP_STATE_SHUTDOWN_SENT);
 				SCTP_CLEAR_SUBSTATE(asoc, SCTP_STATE_SHUTDOWN_PENDING);
 				sctp_stop_timers_for_shutdown(stcb);
 				if (stcb->asoc.alternate) {
 					netp = stcb->asoc.alternate;
 				} else {
 					netp = stcb->asoc.primary_destination;
 				}
 				sctp_send_shutdown(stcb, netp);
 				sctp_timer_start(SCTP_TIMER_TYPE_SHUTDOWN, stcb->sctp_ep, stcb,
 				    netp);
 				sctp_timer_start(SCTP_TIMER_TYPE_SHUTDOWNGUARD, stcb->sctp_ep, stcb,
 				    asoc->primary_destination);
 			}
 		} else {
 			/*-
 			 * we still got (or just got) data to send, so set
 			 * SHUTDOWN_PENDING
 			 */
 			/*-
 			 * XXX sockets draft says that SCTP_EOF should be
 			 * sent with no data.  currently, we will allow user
 			 * data to be sent first and move to
 			 * SHUTDOWN-PENDING
 			 */
 			if ((SCTP_GET_STATE(asoc) != SCTP_STATE_SHUTDOWN_SENT) &&
 			    (SCTP_GET_STATE(asoc) != SCTP_STATE_SHUTDOWN_RECEIVED) &&
 			    (SCTP_GET_STATE(asoc) != SCTP_STATE_SHUTDOWN_ACK_SENT)) {
 				if (hold_tcblock == 0) {
 					SCTP_TCB_LOCK(stcb);
 					hold_tcblock = 1;
 				}
 				if (asoc->locked_on_sending) {
 					/* Locked to send out the data */
 					struct sctp_stream_queue_pending *sp;
 
 					sp = TAILQ_LAST(&asoc->locked_on_sending->outqueue, sctp_streamhead);
 					if (sp) {
 						if ((sp->length == 0) && (sp->msg_is_complete == 0))
 							asoc->state |= SCTP_STATE_PARTIAL_MSG_LEFT;
 					}
 				}
 				asoc->state |= SCTP_STATE_SHUTDOWN_PENDING;
 				if (TAILQ_EMPTY(&asoc->send_queue) &&
 				    TAILQ_EMPTY(&asoc->sent_queue) &&
 				    (asoc->state & SCTP_STATE_PARTIAL_MSG_LEFT)) {
 			abort_anyway:
 					if (free_cnt_applied) {
 						atomic_add_int(&stcb->asoc.refcnt, -1);
 						free_cnt_applied = 0;
 					}
 					sctp_abort_an_association(stcb->sctp_ep, stcb,
 					    NULL, SCTP_SO_LOCKED);
 					/*
 					 * now relock the stcb so everything
 					 * is sane
 					 */
 					hold_tcblock = 0;
 					stcb = NULL;
 					goto out;
 				}
 				sctp_timer_start(SCTP_TIMER_TYPE_SHUTDOWNGUARD, stcb->sctp_ep, stcb,
 				    asoc->primary_destination);
 				sctp_feature_off(inp, SCTP_PCB_FLAGS_NODELAY);
 			}
 		}
 	}
 skip_out_eof:
 	if (!TAILQ_EMPTY(&stcb->asoc.control_send_queue)) {
 		some_on_control = 1;
 	}
 	if (queue_only_for_init) {
 		if (hold_tcblock == 0) {
 			SCTP_TCB_LOCK(stcb);
 			hold_tcblock = 1;
 		}
 		if (SCTP_GET_STATE(&stcb->asoc) == SCTP_STATE_OPEN) {
 			/* a collision took us forward? */
 			queue_only = 0;
 		} else {
 			sctp_send_initiate(inp, stcb, SCTP_SO_LOCKED);
 			SCTP_SET_STATE(&stcb->asoc, SCTP_STATE_COOKIE_WAIT);
 			queue_only = 1;
 		}
 	}
 	if ((net->flight_size > net->cwnd) &&
 	    (stcb->asoc.sctp_cmt_on_off == 0)) {
 		SCTP_STAT_INCR(sctps_send_cwnd_avoid);
 		queue_only = 1;
 	} else if (asoc->ifp_had_enobuf) {
 		SCTP_STAT_INCR(sctps_ifnomemqueued);
 		if (net->flight_size > (2 * net->mtu)) {
 			queue_only = 1;
 		}
 		asoc->ifp_had_enobuf = 0;
 	}
 	un_sent = ((stcb->asoc.total_output_queue_size - stcb->asoc.total_flight) +
 	    (stcb->asoc.stream_queue_cnt * sizeof(struct sctp_data_chunk)));
 	if ((sctp_is_feature_off(inp, SCTP_PCB_FLAGS_NODELAY)) &&
 	    (stcb->asoc.total_flight > 0) &&
 	    (stcb->asoc.stream_queue_cnt < SCTP_MAX_DATA_BUNDLING) &&
 	    (un_sent < (int)(stcb->asoc.smallest_mtu - SCTP_MIN_OVERHEAD))) {
 		/*-
 		 * Ok, Nagle is set on and we have data outstanding.
 		 * Don't send anything and let SACKs drive out the
 		 * data unless wen have a "full" segment to send.
 		 */
 		if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_NAGLE_LOGGING_ENABLE) {
 			sctp_log_nagle_event(stcb, SCTP_NAGLE_APPLIED);
 		}
 		SCTP_STAT_INCR(sctps_naglequeued);
 		nagle_applies = 1;
 	} else {
 		if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_NAGLE_LOGGING_ENABLE) {
 			if (sctp_is_feature_off(inp, SCTP_PCB_FLAGS_NODELAY))
 				sctp_log_nagle_event(stcb, SCTP_NAGLE_SKIPPED);
 		}
 		SCTP_STAT_INCR(sctps_naglesent);
 		nagle_applies = 0;
 	}
 	if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_BLK_LOGGING_ENABLE) {
 		sctp_misc_ints(SCTP_CWNDLOG_PRESEND, queue_only_for_init, queue_only,
 		    nagle_applies, un_sent);
 		sctp_misc_ints(SCTP_CWNDLOG_PRESEND, stcb->asoc.total_output_queue_size,
 		    stcb->asoc.total_flight,
 		    stcb->asoc.chunks_on_out_queue, stcb->asoc.total_flight_count);
 	}
 	if ((queue_only == 0) && (nagle_applies == 0) && (stcb->asoc.peers_rwnd && un_sent)) {
 		/* we can attempt to send too. */
 		if (hold_tcblock == 0) {
 			/*
 			 * If there is activity recv'ing sacks no need to
 			 * send
 			 */
 			if (SCTP_TCB_TRYLOCK(stcb)) {
 				sctp_chunk_output(inp, stcb, SCTP_OUTPUT_FROM_USR_SEND, SCTP_SO_LOCKED);
 				hold_tcblock = 1;
 			}
 		} else {
 			sctp_chunk_output(inp, stcb, SCTP_OUTPUT_FROM_USR_SEND, SCTP_SO_LOCKED);
 		}
 	} else if ((queue_only == 0) &&
 		    (stcb->asoc.peers_rwnd == 0) &&
 	    (stcb->asoc.total_flight == 0)) {
 		/* We get to have a probe outstanding */
 		if (hold_tcblock == 0) {
 			hold_tcblock = 1;
 			SCTP_TCB_LOCK(stcb);
 		}
 		sctp_chunk_output(inp, stcb, SCTP_OUTPUT_FROM_USR_SEND, SCTP_SO_LOCKED);
 	} else if (some_on_control) {
 		int num_out, reason, frag_point;
 
 		/* Here we do control only */
 		if (hold_tcblock == 0) {
 			hold_tcblock = 1;
 			SCTP_TCB_LOCK(stcb);
 		}
 		frag_point = sctp_get_frag_point(stcb, &stcb->asoc);
 		(void)sctp_med_chunk_output(inp, stcb, &stcb->asoc, &num_out,
 		    &reason, 1, 1, &now, &now_filled, frag_point, SCTP_SO_LOCKED);
 	}
 	SCTPDBG(SCTP_DEBUG_OUTPUT1, "USR Send complete qo:%d prw:%d unsent:%d tf:%d cooq:%d toqs:%d err:%d\n",
 	    queue_only, stcb->asoc.peers_rwnd, un_sent,
 	    stcb->asoc.total_flight, stcb->asoc.chunks_on_out_queue,
 	    stcb->asoc.total_output_queue_size, error);
 
 out:
 out_unlocked:
 
 	if (local_soresv && stcb) {
 		atomic_subtract_int(&stcb->asoc.sb_send_resv, sndlen);
 	}
 	if (create_lock_applied) {
 		SCTP_ASOC_CREATE_UNLOCK(inp);
 	}
 	if ((stcb) && hold_tcblock) {
 		SCTP_TCB_UNLOCK(stcb);
 	}
 	if (stcb && free_cnt_applied) {
 		atomic_add_int(&stcb->asoc.refcnt, -1);
 	}
 #ifdef INVARIANTS
 	if (stcb) {
 		if (mtx_owned(&stcb->tcb_mtx)) {
 			panic("Leaving with tcb mtx owned?");
 		}
 		if (mtx_owned(&stcb->tcb_send_mtx)) {
 			panic("Leaving with tcb send mtx owned?");
 		}
 	}
 #endif
 #ifdef INVARIANTS
 	if (inp) {
 		sctp_validate_no_locks(inp);
 	} else {
 		SCTP_PRINTF("Warning - inp is NULL so cant validate locks\n");
 	}
 #endif
 	if (top) {
 		sctp_m_freem(top);
 	}
 	if (control) {
 		sctp_m_freem(control);
 	}
 	return (error);
 }
 
 
 /*
  * generate an AUTHentication chunk, if required
  */
 struct mbuf *
 sctp_add_auth_chunk(struct mbuf *m, struct mbuf **m_end,
     struct sctp_auth_chunk **auth_ret, uint32_t * offset,
     struct sctp_tcb *stcb, uint8_t chunk)
 {
 	struct mbuf *m_auth;
 	struct sctp_auth_chunk *auth;
 	int chunk_len;
 	struct mbuf *cn;
 
 	if ((m_end == NULL) || (auth_ret == NULL) || (offset == NULL) ||
 	    (stcb == NULL))
 		return (m);
 
 	if (stcb->asoc.auth_supported == 0) {
 		return (m);
 	}
 	/* does the requested chunk require auth? */
 	if (!sctp_auth_is_required_chunk(chunk, stcb->asoc.peer_auth_chunks)) {
 		return (m);
 	}
 	m_auth = sctp_get_mbuf_for_msg(sizeof(*auth), 0, M_NOWAIT, 1, MT_HEADER);
 	if (m_auth == NULL) {
 		/* no mbuf's */
 		return (m);
 	}
 	/* reserve some space if this will be the first mbuf */
 	if (m == NULL)
 		SCTP_BUF_RESV_UF(m_auth, SCTP_MIN_OVERHEAD);
 	/* fill in the AUTH chunk details */
 	auth = mtod(m_auth, struct sctp_auth_chunk *);
 	bzero(auth, sizeof(*auth));
 	auth->ch.chunk_type = SCTP_AUTHENTICATION;
 	auth->ch.chunk_flags = 0;
 	chunk_len = sizeof(*auth) +
 	    sctp_get_hmac_digest_len(stcb->asoc.peer_hmac_id);
 	auth->ch.chunk_length = htons(chunk_len);
 	auth->hmac_id = htons(stcb->asoc.peer_hmac_id);
 	/* key id and hmac digest will be computed and filled in upon send */
 
 	/* save the offset where the auth was inserted into the chain */
 	*offset = 0;
 	for (cn = m; cn; cn = SCTP_BUF_NEXT(cn)) {
 		*offset += SCTP_BUF_LEN(cn);
 	}
 
 	/* update length and return pointer to the auth chunk */
 	SCTP_BUF_LEN(m_auth) = chunk_len;
 	m = sctp_copy_mbufchain(m_auth, m, m_end, 1, chunk_len, 0);
 	if (auth_ret != NULL)
 		*auth_ret = auth;
 
 	return (m);
 }
 
 #ifdef INET6
 int
 sctp_v6src_match_nexthop(struct sockaddr_in6 *src6, sctp_route_t * ro)
 {
 	struct nd_prefix *pfx = NULL;
 	struct nd_pfxrouter *pfxrtr = NULL;
 	struct sockaddr_in6 gw6;
 
 	if (ro == NULL || ro->ro_rt == NULL || src6->sin6_family != AF_INET6)
 		return (0);
 
 	/* get prefix entry of address */
 	LIST_FOREACH(pfx, &MODULE_GLOBAL(nd_prefix), ndpr_entry) {
 		if (pfx->ndpr_stateflags & NDPRF_DETACHED)
 			continue;
 		if (IN6_ARE_MASKED_ADDR_EQUAL(&pfx->ndpr_prefix.sin6_addr,
 		    &src6->sin6_addr, &pfx->ndpr_mask))
 			break;
 	}
 	/* no prefix entry in the prefix list */
 	if (pfx == NULL) {
 		SCTPDBG(SCTP_DEBUG_OUTPUT2, "No prefix entry for ");
 		SCTPDBG_ADDR(SCTP_DEBUG_OUTPUT2, (struct sockaddr *)src6);
 		return (0);
 	}
 	SCTPDBG(SCTP_DEBUG_OUTPUT2, "v6src_match_nexthop(), Prefix entry is ");
 	SCTPDBG_ADDR(SCTP_DEBUG_OUTPUT2, (struct sockaddr *)src6);
 
 	/* search installed gateway from prefix entry */
 	LIST_FOREACH(pfxrtr, &pfx->ndpr_advrtrs, pfr_entry) {
 		memset(&gw6, 0, sizeof(struct sockaddr_in6));
 		gw6.sin6_family = AF_INET6;
 		gw6.sin6_len = sizeof(struct sockaddr_in6);
 		memcpy(&gw6.sin6_addr, &pfxrtr->router->rtaddr,
 		    sizeof(struct in6_addr));
 		SCTPDBG(SCTP_DEBUG_OUTPUT2, "prefix router is ");
 		SCTPDBG_ADDR(SCTP_DEBUG_OUTPUT2, (struct sockaddr *)&gw6);
 		SCTPDBG(SCTP_DEBUG_OUTPUT2, "installed router is ");
 		SCTPDBG_ADDR(SCTP_DEBUG_OUTPUT2, ro->ro_rt->rt_gateway);
 		if (sctp_cmpaddr((struct sockaddr *)&gw6,
 		    ro->ro_rt->rt_gateway)) {
 			SCTPDBG(SCTP_DEBUG_OUTPUT2, "pfxrouter is installed\n");
 			return (1);
 		}
 	}
 	SCTPDBG(SCTP_DEBUG_OUTPUT2, "pfxrouter is not installed\n");
 	return (0);
 }
 
 #endif
 
 int
 sctp_v4src_match_nexthop(struct sctp_ifa *sifa, sctp_route_t * ro)
 {
 #ifdef INET
 	struct sockaddr_in *sin, *mask;
 	struct ifaddr *ifa;
 	struct in_addr srcnetaddr, gwnetaddr;
 
 	if (ro == NULL || ro->ro_rt == NULL ||
 	    sifa->address.sa.sa_family != AF_INET) {
 		return (0);
 	}
 	ifa = (struct ifaddr *)sifa->ifa;
 	mask = (struct sockaddr_in *)(ifa->ifa_netmask);
 	sin = &sifa->address.sin;
 	srcnetaddr.s_addr = (sin->sin_addr.s_addr & mask->sin_addr.s_addr);
 	SCTPDBG(SCTP_DEBUG_OUTPUT1, "match_nexthop4: src address is ");
 	SCTPDBG_ADDR(SCTP_DEBUG_OUTPUT2, &sifa->address.sa);
 	SCTPDBG(SCTP_DEBUG_OUTPUT1, "network address is %x\n", srcnetaddr.s_addr);
 
 	sin = (struct sockaddr_in *)ro->ro_rt->rt_gateway;
 	gwnetaddr.s_addr = (sin->sin_addr.s_addr & mask->sin_addr.s_addr);
 	SCTPDBG(SCTP_DEBUG_OUTPUT1, "match_nexthop4: nexthop is ");
 	SCTPDBG_ADDR(SCTP_DEBUG_OUTPUT2, ro->ro_rt->rt_gateway);
 	SCTPDBG(SCTP_DEBUG_OUTPUT1, "network address is %x\n", gwnetaddr.s_addr);
 	if (srcnetaddr.s_addr == gwnetaddr.s_addr) {
 		return (1);
 	}
 #endif
 	return (0);
 }
Index: head/sys/netipsec/xform_ipip.c
===================================================================
--- head/sys/netipsec/xform_ipip.c	(revision 280970)
+++ head/sys/netipsec/xform_ipip.c	(revision 280971)
@@ -1,309 +1,307 @@
 /*	$FreeBSD$	*/
 /*	$OpenBSD: ip_ipip.c,v 1.25 2002/06/10 18:04:55 itojun Exp $ */
 /*-
  * The authors of this code are John Ioannidis (ji@tla.org),
  * Angelos D. Keromytis (kermit@csd.uch.gr) and
  * Niels Provos (provos@physnet.uni-hamburg.de).
  *
  * The original version of this code was written by John Ioannidis
  * for BSD/OS in Athens, Greece, in November 1995.
  *
  * Ported to OpenBSD and NetBSD, with additional transforms, in December 1996,
  * by Angelos D. Keromytis.
  *
  * Additional transforms and features in 1997 and 1998 by Angelos D. Keromytis
  * and Niels Provos.
  *
  * Additional features in 1999 by Angelos D. Keromytis.
  *
  * Copyright (C) 1995, 1996, 1997, 1998, 1999 by John Ioannidis,
  * Angelos D. Keromytis and Niels Provos.
  * Copyright (c) 2001, Angelos D. Keromytis.
  *
  * Permission to use, copy, and modify this software with or without fee
  * is hereby granted, provided that this entire notice is included in
  * all copies of any software which is or includes a copy or
  * modification of this software.
  * You may use this code under the GNU public license if you so wish. Please
  * contribute changes back to the authors under this freer than GPL license
  * so that we may further the use of strong encryption without limitations to
  * all.
  *
  * THIS SOFTWARE IS BEING PROVIDED "AS IS", WITHOUT ANY EXPRESS OR
  * IMPLIED WARRANTY. IN PARTICULAR, NONE OF THE AUTHORS MAKES ANY
  * REPRESENTATION OR WARRANTY OF ANY KIND CONCERNING THE
  * MERCHANTABILITY OF THIS SOFTWARE OR ITS FITNESS FOR ANY PARTICULAR
  * PURPOSE.
  */
 
 /*
  * IP-inside-IP processing
  */
 #include "opt_inet.h"
 #include "opt_inet6.h"
 #include "opt_enc.h"
 
 #include <sys/param.h>
 #include <sys/systm.h>
 #include <sys/mbuf.h>
 #include <sys/socket.h>
 #include <sys/kernel.h>
 #include <sys/protosw.h>
 #include <sys/sysctl.h>
 
 #include <net/if.h>
 #include <net/if_var.h>
 #include <net/pfil.h>
 #include <net/netisr.h>
 #include <net/vnet.h>
 
 #include <netinet/in.h>
 #include <netinet/in_systm.h>
 #include <netinet/in_var.h>
 #include <netinet/ip.h>
 #include <netinet/ip_ecn.h>
 #include <netinet/ip_var.h>
 
 #include <netipsec/ipsec.h>
 #include <netipsec/xform.h>
 
 #ifdef INET6
 #include <netinet/ip6.h>
 #include <netipsec/ipsec6.h>
 #include <netinet6/ip6_ecn.h>
 #include <netinet6/in6_var.h>
 #include <netinet6/scope6_var.h>
 #endif
 
 #include <netipsec/key.h>
 #include <netipsec/key_debug.h>
 
 int
 ipip_output(struct mbuf *m, struct ipsecrequest *isr, struct mbuf **mp,
     int skip, int protoff)
 {
 	struct secasvar *sav;
 	u_int8_t tp, otos;
 	struct secasindex *saidx;
 	int error;
 #if defined(INET) || defined(INET6)
 	u_int8_t itos;
 #endif
 #ifdef INET
 	struct ip *ipo;
 #endif /* INET */
 #ifdef INET6
 	struct ip6_hdr *ip6, *ip6o;
 #endif /* INET6 */
 
 	sav = isr->sav;
 	IPSEC_ASSERT(sav != NULL, ("null SA"));
 	IPSEC_ASSERT(sav->sah != NULL, ("null SAH"));
 
 	/* XXX Deal with empty TDB source/destination addresses. */
 
 	m_copydata(m, 0, 1, &tp);
 	tp = (tp >> 4) & 0xff;  /* Get the IP version number. */
 
 	saidx = &sav->sah->saidx;
 	switch (saidx->dst.sa.sa_family) {
 #ifdef INET
 	case AF_INET:
 		if (saidx->src.sa.sa_family != AF_INET ||
 		    saidx->src.sin.sin_addr.s_addr == INADDR_ANY ||
 		    saidx->dst.sin.sin_addr.s_addr == INADDR_ANY) {
 			DPRINTF(("%s: unspecified tunnel endpoint "
 			    "address in SA %s/%08lx\n", __func__,
 			    ipsec_address(&saidx->dst),
 			    (u_long) ntohl(sav->spi)));
 			error = EINVAL;
 			goto bad;
 		}
 
 		M_PREPEND(m, sizeof(struct ip), M_NOWAIT);
 		if (m == 0) {
 			DPRINTF(("%s: M_PREPEND failed\n", __func__));
 			error = ENOBUFS;
 			goto bad;
 		}
 
 		ipo = mtod(m, struct ip *);
 
 		ipo->ip_v = IPVERSION;
 		ipo->ip_hl = 5;
 		ipo->ip_len = htons(m->m_pkthdr.len);
 		ipo->ip_ttl = V_ip_defttl;
 		ipo->ip_sum = 0;
 		ipo->ip_src = saidx->src.sin.sin_addr;
 		ipo->ip_dst = saidx->dst.sin.sin_addr;
-
-		ipo->ip_id = ip_newid();
-
 		/* If the inner protocol is IP... */
 		switch (tp) {
 		case IPVERSION:
 			/* Save ECN notification */
 			m_copydata(m, sizeof(struct ip) +
 			    offsetof(struct ip, ip_tos),
 			    sizeof(u_int8_t), (caddr_t) &itos);
 
 			ipo->ip_p = IPPROTO_IPIP;
 
 			/*
 			 * We should be keeping tunnel soft-state and
 			 * send back ICMPs if needed.
 			 */
 			m_copydata(m, sizeof(struct ip) +
 			    offsetof(struct ip, ip_off),
 			    sizeof(u_int16_t), (caddr_t) &ipo->ip_off);
 			ipo->ip_off = ntohs(ipo->ip_off);
 			ipo->ip_off &= ~(IP_DF | IP_MF | IP_OFFMASK);
 			ipo->ip_off = htons(ipo->ip_off);
 			break;
 #ifdef INET6
 		case (IPV6_VERSION >> 4):
 		{
 			u_int32_t itos32;
 
 			/* Save ECN notification. */
 			m_copydata(m, sizeof(struct ip) +
 			    offsetof(struct ip6_hdr, ip6_flow),
 			    sizeof(u_int32_t), (caddr_t) &itos32);
 			itos = ntohl(itos32) >> 20;
 			ipo->ip_p = IPPROTO_IPV6;
 			ipo->ip_off = 0;
 			break;
 		}
 #endif /* INET6 */
 		default:
 			goto nofamily;
 		}
+		ip_fillid(ipo);
 
 		otos = 0;
 		ip_ecn_ingress(ECN_ALLOWED, &otos, &itos);
 		ipo->ip_tos = otos;
 		break;
 #endif /* INET */
 
 #ifdef INET6
 	case AF_INET6:
 		if (IN6_IS_ADDR_UNSPECIFIED(&saidx->dst.sin6.sin6_addr) ||
 		    saidx->src.sa.sa_family != AF_INET6 ||
 		    IN6_IS_ADDR_UNSPECIFIED(&saidx->src.sin6.sin6_addr)) {
 			DPRINTF(("%s: unspecified tunnel endpoint "
 			    "address in SA %s/%08lx\n", __func__,
 			    ipsec_address(&saidx->dst),
 			    (u_long) ntohl(sav->spi)));
 			error = ENOBUFS;
 			goto bad;
 		}
 
 		/* scoped address handling */
 		ip6 = mtod(m, struct ip6_hdr *);
 		in6_clearscope(&ip6->ip6_src);
 		in6_clearscope(&ip6->ip6_dst);
 		M_PREPEND(m, sizeof(struct ip6_hdr), M_NOWAIT);
 		if (m == 0) {
 			DPRINTF(("%s: M_PREPEND failed\n", __func__));
 			error = ENOBUFS;
 			goto bad;
 		}
 
 		/* Initialize IPv6 header */
 		ip6o = mtod(m, struct ip6_hdr *);
 		ip6o->ip6_flow = 0;
 		ip6o->ip6_vfc &= ~IPV6_VERSION_MASK;
 		ip6o->ip6_vfc |= IPV6_VERSION;
 		ip6o->ip6_hlim = IPV6_DEFHLIM;
 		ip6o->ip6_dst = saidx->dst.sin6.sin6_addr;
 		ip6o->ip6_src = saidx->src.sin6.sin6_addr;
 		ip6o->ip6_plen = htons(m->m_pkthdr.len - sizeof(*ip6));
 
 		switch (tp) {
 #ifdef INET
 		case IPVERSION:
 			/* Save ECN notification */
 			m_copydata(m, sizeof(struct ip6_hdr) +
 			    offsetof(struct ip, ip_tos), sizeof(u_int8_t),
 			    (caddr_t) &itos);
 
 			/* This is really IPVERSION. */
 			ip6o->ip6_nxt = IPPROTO_IPIP;
 			break;
 #endif /* INET */
 		case (IPV6_VERSION >> 4):
 		{
 			u_int32_t itos32;
 
 			/* Save ECN notification. */
 			m_copydata(m, sizeof(struct ip6_hdr) +
 			    offsetof(struct ip6_hdr, ip6_flow),
 			    sizeof(u_int32_t), (caddr_t) &itos32);
 			itos = ntohl(itos32) >> 20;
 
 			ip6o->ip6_nxt = IPPROTO_IPV6;
 			break;
 		}
 		default:
 			goto nofamily;
 		}
 
 		otos = 0;
 		ip_ecn_ingress(V_ip6_ipsec_ecn, &otos, &itos);
 		ip6o->ip6_flow |= htonl((u_int32_t) otos << 20);
 		break;
 #endif /* INET6 */
 
 	default:
 nofamily:
 		DPRINTF(("%s: unsupported protocol family %u\n", __func__,
 		    saidx->dst.sa.sa_family));
 		error = EAFNOSUPPORT;		/* XXX diffs from openbsd */
 		goto bad;
 	}
 
 	*mp = m;
 	return (0);
 bad:
 	if (m)
 		m_freem(m);
 	*mp = NULL;
 	return (error);
 }
 
 static int
 ipe4_init(struct secasvar *sav, struct xformsw *xsp)
 {
 	sav->tdb_xform = xsp;
 	return 0;
 }
 
 static int
 ipe4_zeroize(struct secasvar *sav)
 {
 	sav->tdb_xform = NULL;
 	return 0;
 }
 
 static int
 ipe4_input(struct mbuf *m, struct secasvar *sav, int skip, int protoff)
 {
 	/* This is a rather serious mistake, so no conditional printing. */
 	printf("%s: should never be called\n", __func__);
 	if (m)
 		m_freem(m);
 	return EOPNOTSUPP;
 }
 
 static struct xformsw ipe4_xformsw = {
 	XF_IP4,		0,		"IPv4 Simple Encapsulation",
 	ipe4_init,	ipe4_zeroize,	ipe4_input,	ipip_output,
 };
 
 static void
 ipe4_attach(void)
 {
 
 	xform_register(&ipe4_xformsw);
 }
 SYSINIT(ipe4_xform_init, SI_SUB_PROTO_DOMAIN, SI_ORDER_MIDDLE, ipe4_attach, NULL);
Index: head/sys/netpfil/pf/if_pfsync.c
===================================================================
--- head/sys/netpfil/pf/if_pfsync.c	(revision 280970)
+++ head/sys/netpfil/pf/if_pfsync.c	(revision 280971)
@@ -1,2421 +1,2421 @@
 /*-
  * Copyright (c) 2002 Michael Shalayeff
  * Copyright (c) 2012 Gleb Smirnoff <glebius@FreeBSD.org>
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
  * IN NO EVENT SHALL THE AUTHOR OR HIS RELATIVES BE LIABLE FOR ANY DIRECT,
  * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
  * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
  * SERVICES; LOSS OF MIND, USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
  * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING
  * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
  * THE POSSIBILITY OF SUCH DAMAGE.
  */
 
 /*-
  * Copyright (c) 2009 David Gwynne <dlg@openbsd.org>
  *
  * Permission to use, copy, modify, and distribute this software for any
  * purpose with or without fee is hereby granted, provided that the above
  * copyright notice and this permission notice appear in all copies.
  *
  * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
  * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
  * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
  * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
  * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
  * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
  * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
  */
 
 /*
  * $OpenBSD: if_pfsync.c,v 1.110 2009/02/24 05:39:19 dlg Exp $
  *
  * Revisions picked from OpenBSD after revision 1.110 import:
  * 1.119 - don't m_copydata() beyond the len of mbuf in pfsync_input()
  * 1.118, 1.124, 1.148, 1.149, 1.151, 1.171 - fixes to bulk updates
  * 1.120, 1.175 - use monotonic time_uptime
  * 1.122 - reduce number of updates for non-TCP sessions
  * 1.125, 1.127 - rewrite merge or stale processing
  * 1.128 - cleanups
  * 1.146 - bzero() mbuf before sparsely filling it with data
  * 1.170 - SIOCSIFMTU checks
  * 1.126, 1.142 - deferred packets processing
  * 1.173 - correct expire time processing
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include "opt_inet.h"
 #include "opt_inet6.h"
 #include "opt_pf.h"
 
 #include <sys/param.h>
 #include <sys/bus.h>
 #include <sys/endian.h>
 #include <sys/interrupt.h>
 #include <sys/kernel.h>
 #include <sys/lock.h>
 #include <sys/mbuf.h>
 #include <sys/module.h>
 #include <sys/mutex.h>
 #include <sys/priv.h>
 #include <sys/protosw.h>
 #include <sys/socket.h>
 #include <sys/sockio.h>
 #include <sys/sysctl.h>
 
 #include <net/bpf.h>
 #include <net/if.h>
 #include <net/if_var.h>
 #include <net/if_clone.h>
 #include <net/if_types.h>
 #include <net/vnet.h>
 #include <net/pfvar.h>
 #include <net/if_pfsync.h>
 
 #include <netinet/if_ether.h>
 #include <netinet/in.h>
 #include <netinet/in_var.h>
 #include <netinet/ip.h>
 #include <netinet/ip_carp.h>
 #include <netinet/ip_var.h>
 #include <netinet/tcp.h>
 #include <netinet/tcp_fsm.h>
 #include <netinet/tcp_seq.h>
 
 #define PFSYNC_MINPKT ( \
 	sizeof(struct ip) + \
 	sizeof(struct pfsync_header) + \
 	sizeof(struct pfsync_subheader) )
 
 struct pfsync_pkt {
 	struct ip *ip;
 	struct in_addr src;
 	u_int8_t flags;
 };
 
 static int	pfsync_upd_tcp(struct pf_state *, struct pfsync_state_peer *,
 		    struct pfsync_state_peer *);
 static int	pfsync_in_clr(struct pfsync_pkt *, struct mbuf *, int, int);
 static int	pfsync_in_ins(struct pfsync_pkt *, struct mbuf *, int, int);
 static int	pfsync_in_iack(struct pfsync_pkt *, struct mbuf *, int, int);
 static int	pfsync_in_upd(struct pfsync_pkt *, struct mbuf *, int, int);
 static int	pfsync_in_upd_c(struct pfsync_pkt *, struct mbuf *, int, int);
 static int	pfsync_in_ureq(struct pfsync_pkt *, struct mbuf *, int, int);
 static int	pfsync_in_del(struct pfsync_pkt *, struct mbuf *, int, int);
 static int	pfsync_in_del_c(struct pfsync_pkt *, struct mbuf *, int, int);
 static int	pfsync_in_bus(struct pfsync_pkt *, struct mbuf *, int, int);
 static int	pfsync_in_tdb(struct pfsync_pkt *, struct mbuf *, int, int);
 static int	pfsync_in_eof(struct pfsync_pkt *, struct mbuf *, int, int);
 static int	pfsync_in_error(struct pfsync_pkt *, struct mbuf *, int, int);
 
 static int (*pfsync_acts[])(struct pfsync_pkt *, struct mbuf *, int, int) = {
 	pfsync_in_clr,			/* PFSYNC_ACT_CLR */
 	pfsync_in_ins,			/* PFSYNC_ACT_INS */
 	pfsync_in_iack,			/* PFSYNC_ACT_INS_ACK */
 	pfsync_in_upd,			/* PFSYNC_ACT_UPD */
 	pfsync_in_upd_c,		/* PFSYNC_ACT_UPD_C */
 	pfsync_in_ureq,			/* PFSYNC_ACT_UPD_REQ */
 	pfsync_in_del,			/* PFSYNC_ACT_DEL */
 	pfsync_in_del_c,		/* PFSYNC_ACT_DEL_C */
 	pfsync_in_error,		/* PFSYNC_ACT_INS_F */
 	pfsync_in_error,		/* PFSYNC_ACT_DEL_F */
 	pfsync_in_bus,			/* PFSYNC_ACT_BUS */
 	pfsync_in_tdb,			/* PFSYNC_ACT_TDB */
 	pfsync_in_eof			/* PFSYNC_ACT_EOF */
 };
 
 struct pfsync_q {
 	void		(*write)(struct pf_state *, void *);
 	size_t		len;
 	u_int8_t	action;
 };
 
 /* we have one of these for every PFSYNC_S_ */
 static void	pfsync_out_state(struct pf_state *, void *);
 static void	pfsync_out_iack(struct pf_state *, void *);
 static void	pfsync_out_upd_c(struct pf_state *, void *);
 static void	pfsync_out_del(struct pf_state *, void *);
 
 static struct pfsync_q pfsync_qs[] = {
 	{ pfsync_out_state, sizeof(struct pfsync_state),   PFSYNC_ACT_INS },
 	{ pfsync_out_iack,  sizeof(struct pfsync_ins_ack), PFSYNC_ACT_INS_ACK },
 	{ pfsync_out_state, sizeof(struct pfsync_state),   PFSYNC_ACT_UPD },
 	{ pfsync_out_upd_c, sizeof(struct pfsync_upd_c),   PFSYNC_ACT_UPD_C },
 	{ pfsync_out_del,   sizeof(struct pfsync_del_c),   PFSYNC_ACT_DEL_C }
 };
 
 static void	pfsync_q_ins(struct pf_state *, int);
 static void	pfsync_q_del(struct pf_state *);
 
 static void	pfsync_update_state(struct pf_state *);
 
 struct pfsync_upd_req_item {
 	TAILQ_ENTRY(pfsync_upd_req_item)	ur_entry;
 	struct pfsync_upd_req			ur_msg;
 };
 
 struct pfsync_deferral {
 	struct pfsync_softc		*pd_sc;
 	TAILQ_ENTRY(pfsync_deferral)	pd_entry;
 	u_int				pd_refs;
 	struct callout			pd_tmo;
 
 	struct pf_state			*pd_st;
 	struct mbuf			*pd_m;
 };
 
 struct pfsync_softc {
 	/* Configuration */
 	struct ifnet		*sc_ifp;
 	struct ifnet		*sc_sync_if;
 	struct ip_moptions	sc_imo;
 	struct in_addr		sc_sync_peer;
 	uint32_t		sc_flags;
 #define	PFSYNCF_OK		0x00000001
 #define	PFSYNCF_DEFER		0x00000002
 #define	PFSYNCF_PUSH		0x00000004
 	uint8_t			sc_maxupdates;
 	struct ip		sc_template;
 	struct callout		sc_tmo;
 	struct mtx		sc_mtx;
 
 	/* Queued data */
 	size_t			sc_len;
 	TAILQ_HEAD(, pf_state)			sc_qs[PFSYNC_S_COUNT];
 	TAILQ_HEAD(, pfsync_upd_req_item)	sc_upd_req_list;
 	TAILQ_HEAD(, pfsync_deferral)		sc_deferrals;
 	u_int			sc_deferred;
 	void			*sc_plus;
 	size_t			sc_pluslen;
 
 	/* Bulk update info */
 	struct mtx		sc_bulk_mtx;
 	uint32_t		sc_ureq_sent;
 	int			sc_bulk_tries;
 	uint32_t		sc_ureq_received;
 	int			sc_bulk_hashid;
 	uint64_t		sc_bulk_stateid;
 	uint32_t		sc_bulk_creatorid;
 	struct callout		sc_bulk_tmo;
 	struct callout		sc_bulkfail_tmo;
 };
 
 #define	PFSYNC_LOCK(sc)		mtx_lock(&(sc)->sc_mtx)
 #define	PFSYNC_UNLOCK(sc)	mtx_unlock(&(sc)->sc_mtx)
 #define	PFSYNC_LOCK_ASSERT(sc)	mtx_assert(&(sc)->sc_mtx, MA_OWNED)
 
 #define	PFSYNC_BLOCK(sc)	mtx_lock(&(sc)->sc_bulk_mtx)
 #define	PFSYNC_BUNLOCK(sc)	mtx_unlock(&(sc)->sc_bulk_mtx)
 #define	PFSYNC_BLOCK_ASSERT(sc)	mtx_assert(&(sc)->sc_bulk_mtx, MA_OWNED)
 
 static const char pfsyncname[] = "pfsync";
 static MALLOC_DEFINE(M_PFSYNC, pfsyncname, "pfsync(4) data");
 static VNET_DEFINE(struct pfsync_softc	*, pfsyncif) = NULL;
 #define	V_pfsyncif		VNET(pfsyncif)
 static VNET_DEFINE(void *, pfsync_swi_cookie) = NULL;
 #define	V_pfsync_swi_cookie	VNET(pfsync_swi_cookie)
 static VNET_DEFINE(struct pfsyncstats, pfsyncstats);
 #define	V_pfsyncstats		VNET(pfsyncstats)
 static VNET_DEFINE(int, pfsync_carp_adj) = CARP_MAXSKEW;
 #define	V_pfsync_carp_adj	VNET(pfsync_carp_adj)
 
 static void	pfsync_timeout(void *);
 static void	pfsync_push(struct pfsync_softc *);
 static void	pfsyncintr(void *);
 static int	pfsync_multicast_setup(struct pfsync_softc *, struct ifnet *,
 		    void *);
 static void	pfsync_multicast_cleanup(struct pfsync_softc *);
 static void	pfsync_pointers_init(void);
 static void	pfsync_pointers_uninit(void);
 static int	pfsync_init(void);
 static void	pfsync_uninit(void);
 
 SYSCTL_NODE(_net, OID_AUTO, pfsync, CTLFLAG_RW, 0, "PFSYNC");
 SYSCTL_STRUCT(_net_pfsync, OID_AUTO, stats, CTLFLAG_VNET | CTLFLAG_RW,
     &VNET_NAME(pfsyncstats), pfsyncstats,
     "PFSYNC statistics (struct pfsyncstats, net/if_pfsync.h)");
 SYSCTL_INT(_net_pfsync, OID_AUTO, carp_demotion_factor, CTLFLAG_RW,
     &VNET_NAME(pfsync_carp_adj), 0, "pfsync's CARP demotion factor adjustment");
 
 static int	pfsync_clone_create(struct if_clone *, int, caddr_t);
 static void	pfsync_clone_destroy(struct ifnet *);
 static int	pfsync_alloc_scrub_memory(struct pfsync_state_peer *,
 		    struct pf_state_peer *);
 static int	pfsyncoutput(struct ifnet *, struct mbuf *,
 		    const struct sockaddr *, struct route *);
 static int	pfsyncioctl(struct ifnet *, u_long, caddr_t);
 
 static int	pfsync_defer(struct pf_state *, struct mbuf *);
 static void	pfsync_undefer(struct pfsync_deferral *, int);
 static void	pfsync_undefer_state(struct pf_state *, int);
 static void	pfsync_defer_tmo(void *);
 
 static void	pfsync_request_update(u_int32_t, u_int64_t);
 static void	pfsync_update_state_req(struct pf_state *);
 
 static void	pfsync_drop(struct pfsync_softc *);
 static void	pfsync_sendout(int);
 static void	pfsync_send_plus(void *, size_t);
 
 static void	pfsync_bulk_start(void);
 static void	pfsync_bulk_status(u_int8_t);
 static void	pfsync_bulk_update(void *);
 static void	pfsync_bulk_fail(void *);
 
 #ifdef IPSEC
 static void	pfsync_update_net_tdb(struct pfsync_tdb *);
 #endif
 
 #define PFSYNC_MAX_BULKTRIES	12
 
 VNET_DEFINE(struct if_clone *, pfsync_cloner);
 #define	V_pfsync_cloner	VNET(pfsync_cloner)
 
 static int
 pfsync_clone_create(struct if_clone *ifc, int unit, caddr_t param)
 {
 	struct pfsync_softc *sc;
 	struct ifnet *ifp;
 	int q;
 
 	if (unit != 0)
 		return (EINVAL);
 
 	sc = malloc(sizeof(struct pfsync_softc), M_PFSYNC, M_WAITOK | M_ZERO);
 	sc->sc_flags |= PFSYNCF_OK;
 
 	for (q = 0; q < PFSYNC_S_COUNT; q++)
 		TAILQ_INIT(&sc->sc_qs[q]);
 
 	TAILQ_INIT(&sc->sc_upd_req_list);
 	TAILQ_INIT(&sc->sc_deferrals);
 
 	sc->sc_len = PFSYNC_MINPKT;
 	sc->sc_maxupdates = 128;
 
 	ifp = sc->sc_ifp = if_alloc(IFT_PFSYNC);
 	if (ifp == NULL) {
 		free(sc, M_PFSYNC);
 		return (ENOSPC);
 	}
 	if_initname(ifp, pfsyncname, unit);
 	ifp->if_softc = sc;
 	ifp->if_ioctl = pfsyncioctl;
 	ifp->if_output = pfsyncoutput;
 	ifp->if_type = IFT_PFSYNC;
 	ifp->if_snd.ifq_maxlen = ifqmaxlen;
 	ifp->if_hdrlen = sizeof(struct pfsync_header);
 	ifp->if_mtu = ETHERMTU;
 	mtx_init(&sc->sc_mtx, pfsyncname, NULL, MTX_DEF);
 	mtx_init(&sc->sc_bulk_mtx, "pfsync bulk", NULL, MTX_DEF);
 	callout_init(&sc->sc_tmo, CALLOUT_MPSAFE);
 	callout_init_mtx(&sc->sc_bulk_tmo, &sc->sc_bulk_mtx, 0);
 	callout_init_mtx(&sc->sc_bulkfail_tmo, &sc->sc_bulk_mtx, 0);
 
 	if_attach(ifp);
 
 	bpfattach(ifp, DLT_PFSYNC, PFSYNC_HDRLEN);
 
 	V_pfsyncif = sc;
 
 	return (0);
 }
 
 static void
 pfsync_clone_destroy(struct ifnet *ifp)
 {
 	struct pfsync_softc *sc = ifp->if_softc;
 
 	/*
 	 * At this stage, everything should have already been
 	 * cleared by pfsync_uninit(), and we have only to
 	 * drain callouts.
 	 */
 	while (sc->sc_deferred > 0) {
 		struct pfsync_deferral *pd = TAILQ_FIRST(&sc->sc_deferrals);
 
 		TAILQ_REMOVE(&sc->sc_deferrals, pd, pd_entry);
 		sc->sc_deferred--;
 		if (callout_stop(&pd->pd_tmo)) {
 			pf_release_state(pd->pd_st);
 			m_freem(pd->pd_m);
 			free(pd, M_PFSYNC);
 		} else {
 			pd->pd_refs++;
 			callout_drain(&pd->pd_tmo);
 			free(pd, M_PFSYNC);
 		}
 	}
 
 	callout_drain(&sc->sc_tmo);
 	callout_drain(&sc->sc_bulkfail_tmo);
 	callout_drain(&sc->sc_bulk_tmo);
 
 	if (!(sc->sc_flags & PFSYNCF_OK) && carp_demote_adj_p)
 		(*carp_demote_adj_p)(-V_pfsync_carp_adj, "pfsync destroy");
 	bpfdetach(ifp);
 	if_detach(ifp);
 
 	pfsync_drop(sc);
 
 	if_free(ifp);
 	if (sc->sc_imo.imo_membership)
 		pfsync_multicast_cleanup(sc);
 	mtx_destroy(&sc->sc_mtx);
 	mtx_destroy(&sc->sc_bulk_mtx);
 	free(sc, M_PFSYNC);
 
 	V_pfsyncif = NULL;
 }
 
 static int
 pfsync_alloc_scrub_memory(struct pfsync_state_peer *s,
     struct pf_state_peer *d)
 {
 	if (s->scrub.scrub_flag && d->scrub == NULL) {
 		d->scrub = uma_zalloc(V_pf_state_scrub_z, M_NOWAIT | M_ZERO);
 		if (d->scrub == NULL)
 			return (ENOMEM);
 	}
 
 	return (0);
 }
 
 
 static int
 pfsync_state_import(struct pfsync_state *sp, u_int8_t flags)
 {
 	struct pfsync_softc *sc = V_pfsyncif;
 #ifndef	__NO_STRICT_ALIGNMENT
 	struct pfsync_state_key key[2];
 #endif
 	struct pfsync_state_key *kw, *ks;
 	struct pf_state	*st = NULL;
 	struct pf_state_key *skw = NULL, *sks = NULL;
 	struct pf_rule *r = NULL;
 	struct pfi_kif	*kif;
 	int error;
 
 	PF_RULES_RASSERT();
 
 	if (sp->creatorid == 0) {
 		if (V_pf_status.debug >= PF_DEBUG_MISC)
 			printf("%s: invalid creator id: %08x\n", __func__,
 			    ntohl(sp->creatorid));
 		return (EINVAL);
 	}
 
 	if ((kif = pfi_kif_find(sp->ifname)) == NULL) {
 		if (V_pf_status.debug >= PF_DEBUG_MISC)
 			printf("%s: unknown interface: %s\n", __func__,
 			    sp->ifname);
 		if (flags & PFSYNC_SI_IOCTL)
 			return (EINVAL);
 		return (0);	/* skip this state */
 	}
 
 	/*
 	 * If the ruleset checksums match or the state is coming from the ioctl,
 	 * it's safe to associate the state with the rule of that number.
 	 */
 	if (sp->rule != htonl(-1) && sp->anchor == htonl(-1) &&
 	    (flags & (PFSYNC_SI_IOCTL | PFSYNC_SI_CKSUM)) && ntohl(sp->rule) <
 	    pf_main_ruleset.rules[PF_RULESET_FILTER].active.rcount)
 		r = pf_main_ruleset.rules[
 		    PF_RULESET_FILTER].active.ptr_array[ntohl(sp->rule)];
 	else
 		r = &V_pf_default_rule;
 
 	if ((r->max_states &&
 	    counter_u64_fetch(r->states_cur) >= r->max_states))
 		goto cleanup;
 
 	/*
 	 * XXXGL: consider M_WAITOK in ioctl path after.
 	 */
 	if ((st = uma_zalloc(V_pf_state_z, M_NOWAIT | M_ZERO)) == NULL)
 		goto cleanup;
 
 	if ((skw = uma_zalloc(V_pf_state_key_z, M_NOWAIT)) == NULL)
 		goto cleanup;
 
 #ifndef	__NO_STRICT_ALIGNMENT
 	bcopy(&sp->key, key, sizeof(struct pfsync_state_key) * 2);
 	kw = &key[PF_SK_WIRE];
 	ks = &key[PF_SK_STACK];
 #else
 	kw = &sp->key[PF_SK_WIRE];
 	ks = &sp->key[PF_SK_STACK];
 #endif
 
 	if (PF_ANEQ(&kw->addr[0], &ks->addr[0], sp->af) ||
 	    PF_ANEQ(&kw->addr[1], &ks->addr[1], sp->af) ||
 	    kw->port[0] != ks->port[0] ||
 	    kw->port[1] != ks->port[1]) {
 		sks = uma_zalloc(V_pf_state_key_z, M_NOWAIT);
 		if (sks == NULL)
 			goto cleanup;
 	} else
 		sks = skw;
 
 	/* allocate memory for scrub info */
 	if (pfsync_alloc_scrub_memory(&sp->src, &st->src) ||
 	    pfsync_alloc_scrub_memory(&sp->dst, &st->dst))
 		goto cleanup;
 
 	/* Copy to state key(s). */
 	skw->addr[0] = kw->addr[0];
 	skw->addr[1] = kw->addr[1];
 	skw->port[0] = kw->port[0];
 	skw->port[1] = kw->port[1];
 	skw->proto = sp->proto;
 	skw->af = sp->af;
 	if (sks != skw) {
 		sks->addr[0] = ks->addr[0];
 		sks->addr[1] = ks->addr[1];
 		sks->port[0] = ks->port[0];
 		sks->port[1] = ks->port[1];
 		sks->proto = sp->proto;
 		sks->af = sp->af;
 	}
 
 	/* copy to state */
 	bcopy(&sp->rt_addr, &st->rt_addr, sizeof(st->rt_addr));
 	st->creation = time_uptime - ntohl(sp->creation);
 	st->expire = time_uptime;
 	if (sp->expire) {
 		uint32_t timeout;
 
 		timeout = r->timeout[sp->timeout];
 		if (!timeout)
 			timeout = V_pf_default_rule.timeout[sp->timeout];
 
 		/* sp->expire may have been adaptively scaled by export. */
 		st->expire -= timeout - ntohl(sp->expire);
 	}
 
 	st->direction = sp->direction;
 	st->log = sp->log;
 	st->timeout = sp->timeout;
 	st->state_flags = sp->state_flags;
 
 	st->id = sp->id;
 	st->creatorid = sp->creatorid;
 	pf_state_peer_ntoh(&sp->src, &st->src);
 	pf_state_peer_ntoh(&sp->dst, &st->dst);
 
 	st->rule.ptr = r;
 	st->nat_rule.ptr = NULL;
 	st->anchor.ptr = NULL;
 	st->rt_kif = NULL;
 
 	st->pfsync_time = time_uptime;
 	st->sync_state = PFSYNC_S_NONE;
 
 	if (!(flags & PFSYNC_SI_IOCTL))
 		st->state_flags |= PFSTATE_NOSYNC;
 
 	if ((error = pf_state_insert(kif, skw, sks, st)) != 0)
 		goto cleanup_state;
 
 	/* XXX when we have nat_rule/anchors, use STATE_INC_COUNTERS */
 	counter_u64_add(r->states_cur, 1);
 	counter_u64_add(r->states_tot, 1);
 
 	if (!(flags & PFSYNC_SI_IOCTL)) {
 		st->state_flags &= ~PFSTATE_NOSYNC;
 		if (st->state_flags & PFSTATE_ACK) {
 			pfsync_q_ins(st, PFSYNC_S_IACK);
 			pfsync_push(sc);
 		}
 	}
 	st->state_flags &= ~PFSTATE_ACK;
 	PF_STATE_UNLOCK(st);
 
 	return (0);
 
 cleanup:
 	error = ENOMEM;
 	if (skw == sks)
 		sks = NULL;
 	if (skw != NULL)
 		uma_zfree(V_pf_state_key_z, skw);
 	if (sks != NULL)
 		uma_zfree(V_pf_state_key_z, sks);
 
 cleanup_state:	/* pf_state_insert() frees the state keys. */
 	if (st) {
 		if (st->dst.scrub)
 			uma_zfree(V_pf_state_scrub_z, st->dst.scrub);
 		if (st->src.scrub)
 			uma_zfree(V_pf_state_scrub_z, st->src.scrub);
 		uma_zfree(V_pf_state_z, st);
 	}
 	return (error);
 }
 
 static int
 pfsync_input(struct mbuf **mp, int *offp __unused, int proto __unused)
 {
 	struct pfsync_softc *sc = V_pfsyncif;
 	struct pfsync_pkt pkt;
 	struct mbuf *m = *mp;
 	struct ip *ip = mtod(m, struct ip *);
 	struct pfsync_header *ph;
 	struct pfsync_subheader subh;
 
 	int offset, len;
 	int rv;
 	uint16_t count;
 
 	*mp = NULL;
 	V_pfsyncstats.pfsyncs_ipackets++;
 
 	/* Verify that we have a sync interface configured. */
 	if (!sc || !sc->sc_sync_if || !V_pf_status.running ||
 	    (sc->sc_ifp->if_drv_flags & IFF_DRV_RUNNING) == 0)
 		goto done;
 
 	/* verify that the packet came in on the right interface */
 	if (sc->sc_sync_if != m->m_pkthdr.rcvif) {
 		V_pfsyncstats.pfsyncs_badif++;
 		goto done;
 	}
 
 	if_inc_counter(sc->sc_ifp, IFCOUNTER_IPACKETS, 1);
 	if_inc_counter(sc->sc_ifp, IFCOUNTER_IBYTES, m->m_pkthdr.len);
 	/* verify that the IP TTL is 255. */
 	if (ip->ip_ttl != PFSYNC_DFLTTL) {
 		V_pfsyncstats.pfsyncs_badttl++;
 		goto done;
 	}
 
 	offset = ip->ip_hl << 2;
 	if (m->m_pkthdr.len < offset + sizeof(*ph)) {
 		V_pfsyncstats.pfsyncs_hdrops++;
 		goto done;
 	}
 
 	if (offset + sizeof(*ph) > m->m_len) {
 		if (m_pullup(m, offset + sizeof(*ph)) == NULL) {
 			V_pfsyncstats.pfsyncs_hdrops++;
 			return (IPPROTO_DONE);
 		}
 		ip = mtod(m, struct ip *);
 	}
 	ph = (struct pfsync_header *)((char *)ip + offset);
 
 	/* verify the version */
 	if (ph->version != PFSYNC_VERSION) {
 		V_pfsyncstats.pfsyncs_badver++;
 		goto done;
 	}
 
 	len = ntohs(ph->len) + offset;
 	if (m->m_pkthdr.len < len) {
 		V_pfsyncstats.pfsyncs_badlen++;
 		goto done;
 	}
 
 	/* Cheaper to grab this now than having to mess with mbufs later */
 	pkt.ip = ip;
 	pkt.src = ip->ip_src;
 	pkt.flags = 0;
 
 	/*
 	 * Trusting pf_chksum during packet processing, as well as seeking
 	 * in interface name tree, require holding PF_RULES_RLOCK().
 	 */
 	PF_RULES_RLOCK();
 	if (!bcmp(&ph->pfcksum, &V_pf_status.pf_chksum, PF_MD5_DIGEST_LENGTH))
 		pkt.flags |= PFSYNC_SI_CKSUM;
 
 	offset += sizeof(*ph);
 	while (offset <= len - sizeof(subh)) {
 		m_copydata(m, offset, sizeof(subh), (caddr_t)&subh);
 		offset += sizeof(subh);
 
 		if (subh.action >= PFSYNC_ACT_MAX) {
 			V_pfsyncstats.pfsyncs_badact++;
 			PF_RULES_RUNLOCK();
 			goto done;
 		}
 
 		count = ntohs(subh.count);
 		V_pfsyncstats.pfsyncs_iacts[subh.action] += count;
 		rv = (*pfsync_acts[subh.action])(&pkt, m, offset, count);
 		if (rv == -1) {
 			PF_RULES_RUNLOCK();
 			return (IPPROTO_DONE);
 		}
 
 		offset += rv;
 	}
 	PF_RULES_RUNLOCK();
 
 done:
 	m_freem(m);
 	return (IPPROTO_DONE);
 }
 
 static int
 pfsync_in_clr(struct pfsync_pkt *pkt, struct mbuf *m, int offset, int count)
 {
 	struct pfsync_clr *clr;
 	struct mbuf *mp;
 	int len = sizeof(*clr) * count;
 	int i, offp;
 	u_int32_t creatorid;
 
 	mp = m_pulldown(m, offset, len, &offp);
 	if (mp == NULL) {
 		V_pfsyncstats.pfsyncs_badlen++;
 		return (-1);
 	}
 	clr = (struct pfsync_clr *)(mp->m_data + offp);
 
 	for (i = 0; i < count; i++) {
 		creatorid = clr[i].creatorid;
 
 		if (clr[i].ifname[0] != '\0' &&
 		    pfi_kif_find(clr[i].ifname) == NULL)
 			continue;
 
 		for (int i = 0; i <= pf_hashmask; i++) {
 			struct pf_idhash *ih = &V_pf_idhash[i];
 			struct pf_state *s;
 relock:
 			PF_HASHROW_LOCK(ih);
 			LIST_FOREACH(s, &ih->states, entry) {
 				if (s->creatorid == creatorid) {
 					s->state_flags |= PFSTATE_NOSYNC;
 					pf_unlink_state(s, PF_ENTER_LOCKED);
 					goto relock;
 				}
 			}
 			PF_HASHROW_UNLOCK(ih);
 		}
 	}
 
 	return (len);
 }
 
 static int
 pfsync_in_ins(struct pfsync_pkt *pkt, struct mbuf *m, int offset, int count)
 {
 	struct mbuf *mp;
 	struct pfsync_state *sa, *sp;
 	int len = sizeof(*sp) * count;
 	int i, offp;
 
 	mp = m_pulldown(m, offset, len, &offp);
 	if (mp == NULL) {
 		V_pfsyncstats.pfsyncs_badlen++;
 		return (-1);
 	}
 	sa = (struct pfsync_state *)(mp->m_data + offp);
 
 	for (i = 0; i < count; i++) {
 		sp = &sa[i];
 
 		/* Check for invalid values. */
 		if (sp->timeout >= PFTM_MAX ||
 		    sp->src.state > PF_TCPS_PROXY_DST ||
 		    sp->dst.state > PF_TCPS_PROXY_DST ||
 		    sp->direction > PF_OUT ||
 		    (sp->af != AF_INET && sp->af != AF_INET6)) {
 			if (V_pf_status.debug >= PF_DEBUG_MISC)
 				printf("%s: invalid value\n", __func__);
 			V_pfsyncstats.pfsyncs_badval++;
 			continue;
 		}
 
 		if (pfsync_state_import(sp, pkt->flags) == ENOMEM)
 			/* Drop out, but process the rest of the actions. */
 			break;
 	}
 
 	return (len);
 }
 
 static int
 pfsync_in_iack(struct pfsync_pkt *pkt, struct mbuf *m, int offset, int count)
 {
 	struct pfsync_ins_ack *ia, *iaa;
 	struct pf_state *st;
 
 	struct mbuf *mp;
 	int len = count * sizeof(*ia);
 	int offp, i;
 
 	mp = m_pulldown(m, offset, len, &offp);
 	if (mp == NULL) {
 		V_pfsyncstats.pfsyncs_badlen++;
 		return (-1);
 	}
 	iaa = (struct pfsync_ins_ack *)(mp->m_data + offp);
 
 	for (i = 0; i < count; i++) {
 		ia = &iaa[i];
 
 		st = pf_find_state_byid(ia->id, ia->creatorid);
 		if (st == NULL)
 			continue;
 
 		if (st->state_flags & PFSTATE_ACK) {
 			PFSYNC_LOCK(V_pfsyncif);
 			pfsync_undefer_state(st, 0);
 			PFSYNC_UNLOCK(V_pfsyncif);
 		}
 		PF_STATE_UNLOCK(st);
 	}
 	/*
 	 * XXX this is not yet implemented, but we know the size of the
 	 * message so we can skip it.
 	 */
 
 	return (count * sizeof(struct pfsync_ins_ack));
 }
 
 static int
 pfsync_upd_tcp(struct pf_state *st, struct pfsync_state_peer *src,
     struct pfsync_state_peer *dst)
 {
 	int sync = 0;
 
 	PF_STATE_LOCK_ASSERT(st);
 
 	/*
 	 * The state should never go backwards except
 	 * for syn-proxy states.  Neither should the
 	 * sequence window slide backwards.
 	 */
 	if ((st->src.state > src->state &&
 	    (st->src.state < PF_TCPS_PROXY_SRC ||
 	    src->state >= PF_TCPS_PROXY_SRC)) ||
 
 	    (st->src.state == src->state &&
 	    SEQ_GT(st->src.seqlo, ntohl(src->seqlo))))
 		sync++;
 	else
 		pf_state_peer_ntoh(src, &st->src);
 
 	if ((st->dst.state > dst->state) ||
 
 	    (st->dst.state >= TCPS_SYN_SENT &&
 	    SEQ_GT(st->dst.seqlo, ntohl(dst->seqlo))))
 		sync++;
 	else
 		pf_state_peer_ntoh(dst, &st->dst);
 
 	return (sync);
 }
 
 static int
 pfsync_in_upd(struct pfsync_pkt *pkt, struct mbuf *m, int offset, int count)
 {
 	struct pfsync_softc *sc = V_pfsyncif;
 	struct pfsync_state *sa, *sp;
 	struct pf_state *st;
 	int sync;
 
 	struct mbuf *mp;
 	int len = count * sizeof(*sp);
 	int offp, i;
 
 	mp = m_pulldown(m, offset, len, &offp);
 	if (mp == NULL) {
 		V_pfsyncstats.pfsyncs_badlen++;
 		return (-1);
 	}
 	sa = (struct pfsync_state *)(mp->m_data + offp);
 
 	for (i = 0; i < count; i++) {
 		sp = &sa[i];
 
 		/* check for invalid values */
 		if (sp->timeout >= PFTM_MAX ||
 		    sp->src.state > PF_TCPS_PROXY_DST ||
 		    sp->dst.state > PF_TCPS_PROXY_DST) {
 			if (V_pf_status.debug >= PF_DEBUG_MISC) {
 				printf("pfsync_input: PFSYNC_ACT_UPD: "
 				    "invalid value\n");
 			}
 			V_pfsyncstats.pfsyncs_badval++;
 			continue;
 		}
 
 		st = pf_find_state_byid(sp->id, sp->creatorid);
 		if (st == NULL) {
 			/* insert the update */
 			if (pfsync_state_import(sp, 0))
 				V_pfsyncstats.pfsyncs_badstate++;
 			continue;
 		}
 
 		if (st->state_flags & PFSTATE_ACK) {
 			PFSYNC_LOCK(sc);
 			pfsync_undefer_state(st, 1);
 			PFSYNC_UNLOCK(sc);
 		}
 
 		if (st->key[PF_SK_WIRE]->proto == IPPROTO_TCP)
 			sync = pfsync_upd_tcp(st, &sp->src, &sp->dst);
 		else {
 			sync = 0;
 
 			/*
 			 * Non-TCP protocol state machine always go
 			 * forwards
 			 */
 			if (st->src.state > sp->src.state)
 				sync++;
 			else
 				pf_state_peer_ntoh(&sp->src, &st->src);
 			if (st->dst.state > sp->dst.state)
 				sync++;
 			else
 				pf_state_peer_ntoh(&sp->dst, &st->dst);
 		}
 		if (sync < 2) {
 			pfsync_alloc_scrub_memory(&sp->dst, &st->dst);
 			pf_state_peer_ntoh(&sp->dst, &st->dst);
 			st->expire = time_uptime;
 			st->timeout = sp->timeout;
 		}
 		st->pfsync_time = time_uptime;
 
 		if (sync) {
 			V_pfsyncstats.pfsyncs_stale++;
 
 			pfsync_update_state(st);
 			PF_STATE_UNLOCK(st);
 			PFSYNC_LOCK(sc);
 			pfsync_push(sc);
 			PFSYNC_UNLOCK(sc);
 			continue;
 		}
 		PF_STATE_UNLOCK(st);
 	}
 
 	return (len);
 }
 
 static int
 pfsync_in_upd_c(struct pfsync_pkt *pkt, struct mbuf *m, int offset, int count)
 {
 	struct pfsync_softc *sc = V_pfsyncif;
 	struct pfsync_upd_c *ua, *up;
 	struct pf_state *st;
 	int len = count * sizeof(*up);
 	int sync;
 	struct mbuf *mp;
 	int offp, i;
 
 	mp = m_pulldown(m, offset, len, &offp);
 	if (mp == NULL) {
 		V_pfsyncstats.pfsyncs_badlen++;
 		return (-1);
 	}
 	ua = (struct pfsync_upd_c *)(mp->m_data + offp);
 
 	for (i = 0; i < count; i++) {
 		up = &ua[i];
 
 		/* check for invalid values */
 		if (up->timeout >= PFTM_MAX ||
 		    up->src.state > PF_TCPS_PROXY_DST ||
 		    up->dst.state > PF_TCPS_PROXY_DST) {
 			if (V_pf_status.debug >= PF_DEBUG_MISC) {
 				printf("pfsync_input: "
 				    "PFSYNC_ACT_UPD_C: "
 				    "invalid value\n");
 			}
 			V_pfsyncstats.pfsyncs_badval++;
 			continue;
 		}
 
 		st = pf_find_state_byid(up->id, up->creatorid);
 		if (st == NULL) {
 			/* We don't have this state. Ask for it. */
 			PFSYNC_LOCK(sc);
 			pfsync_request_update(up->creatorid, up->id);
 			PFSYNC_UNLOCK(sc);
 			continue;
 		}
 
 		if (st->state_flags & PFSTATE_ACK) {
 			PFSYNC_LOCK(sc);
 			pfsync_undefer_state(st, 1);
 			PFSYNC_UNLOCK(sc);
 		}
 
 		if (st->key[PF_SK_WIRE]->proto == IPPROTO_TCP)
 			sync = pfsync_upd_tcp(st, &up->src, &up->dst);
 		else {
 			sync = 0;
 
 			/*
 			 * Non-TCP protocol state machine always go
 			 * forwards
 			 */
 			if (st->src.state > up->src.state)
 				sync++;
 			else
 				pf_state_peer_ntoh(&up->src, &st->src);
 			if (st->dst.state > up->dst.state)
 				sync++;
 			else
 				pf_state_peer_ntoh(&up->dst, &st->dst);
 		}
 		if (sync < 2) {
 			pfsync_alloc_scrub_memory(&up->dst, &st->dst);
 			pf_state_peer_ntoh(&up->dst, &st->dst);
 			st->expire = time_uptime;
 			st->timeout = up->timeout;
 		}
 		st->pfsync_time = time_uptime;
 
 		if (sync) {
 			V_pfsyncstats.pfsyncs_stale++;
 
 			pfsync_update_state(st);
 			PF_STATE_UNLOCK(st);
 			PFSYNC_LOCK(sc);
 			pfsync_push(sc);
 			PFSYNC_UNLOCK(sc);
 			continue;
 		}
 		PF_STATE_UNLOCK(st);
 	}
 
 	return (len);
 }
 
 static int
 pfsync_in_ureq(struct pfsync_pkt *pkt, struct mbuf *m, int offset, int count)
 {
 	struct pfsync_upd_req *ur, *ura;
 	struct mbuf *mp;
 	int len = count * sizeof(*ur);
 	int i, offp;
 
 	struct pf_state *st;
 
 	mp = m_pulldown(m, offset, len, &offp);
 	if (mp == NULL) {
 		V_pfsyncstats.pfsyncs_badlen++;
 		return (-1);
 	}
 	ura = (struct pfsync_upd_req *)(mp->m_data + offp);
 
 	for (i = 0; i < count; i++) {
 		ur = &ura[i];
 
 		if (ur->id == 0 && ur->creatorid == 0)
 			pfsync_bulk_start();
 		else {
 			st = pf_find_state_byid(ur->id, ur->creatorid);
 			if (st == NULL) {
 				V_pfsyncstats.pfsyncs_badstate++;
 				continue;
 			}
 			if (st->state_flags & PFSTATE_NOSYNC) {
 				PF_STATE_UNLOCK(st);
 				continue;
 			}
 
 			pfsync_update_state_req(st);
 			PF_STATE_UNLOCK(st);
 		}
 	}
 
 	return (len);
 }
 
 static int
 pfsync_in_del(struct pfsync_pkt *pkt, struct mbuf *m, int offset, int count)
 {
 	struct mbuf *mp;
 	struct pfsync_state *sa, *sp;
 	struct pf_state *st;
 	int len = count * sizeof(*sp);
 	int offp, i;
 
 	mp = m_pulldown(m, offset, len, &offp);
 	if (mp == NULL) {
 		V_pfsyncstats.pfsyncs_badlen++;
 		return (-1);
 	}
 	sa = (struct pfsync_state *)(mp->m_data + offp);
 
 	for (i = 0; i < count; i++) {
 		sp = &sa[i];
 
 		st = pf_find_state_byid(sp->id, sp->creatorid);
 		if (st == NULL) {
 			V_pfsyncstats.pfsyncs_badstate++;
 			continue;
 		}
 		st->state_flags |= PFSTATE_NOSYNC;
 		pf_unlink_state(st, PF_ENTER_LOCKED);
 	}
 
 	return (len);
 }
 
 static int
 pfsync_in_del_c(struct pfsync_pkt *pkt, struct mbuf *m, int offset, int count)
 {
 	struct mbuf *mp;
 	struct pfsync_del_c *sa, *sp;
 	struct pf_state *st;
 	int len = count * sizeof(*sp);
 	int offp, i;
 
 	mp = m_pulldown(m, offset, len, &offp);
 	if (mp == NULL) {
 		V_pfsyncstats.pfsyncs_badlen++;
 		return (-1);
 	}
 	sa = (struct pfsync_del_c *)(mp->m_data + offp);
 
 	for (i = 0; i < count; i++) {
 		sp = &sa[i];
 
 		st = pf_find_state_byid(sp->id, sp->creatorid);
 		if (st == NULL) {
 			V_pfsyncstats.pfsyncs_badstate++;
 			continue;
 		}
 
 		st->state_flags |= PFSTATE_NOSYNC;
 		pf_unlink_state(st, PF_ENTER_LOCKED);
 	}
 
 	return (len);
 }
 
 static int
 pfsync_in_bus(struct pfsync_pkt *pkt, struct mbuf *m, int offset, int count)
 {
 	struct pfsync_softc *sc = V_pfsyncif;
 	struct pfsync_bus *bus;
 	struct mbuf *mp;
 	int len = count * sizeof(*bus);
 	int offp;
 
 	PFSYNC_BLOCK(sc);
 
 	/* If we're not waiting for a bulk update, who cares. */
 	if (sc->sc_ureq_sent == 0) {
 		PFSYNC_BUNLOCK(sc);
 		return (len);
 	}
 
 	mp = m_pulldown(m, offset, len, &offp);
 	if (mp == NULL) {
 		PFSYNC_BUNLOCK(sc);
 		V_pfsyncstats.pfsyncs_badlen++;
 		return (-1);
 	}
 	bus = (struct pfsync_bus *)(mp->m_data + offp);
 
 	switch (bus->status) {
 	case PFSYNC_BUS_START:
 		callout_reset(&sc->sc_bulkfail_tmo, 4 * hz +
 		    V_pf_limits[PF_LIMIT_STATES].limit /
 		    ((sc->sc_ifp->if_mtu - PFSYNC_MINPKT) /
 		    sizeof(struct pfsync_state)),
 		    pfsync_bulk_fail, sc);
 		if (V_pf_status.debug >= PF_DEBUG_MISC)
 			printf("pfsync: received bulk update start\n");
 		break;
 
 	case PFSYNC_BUS_END:
 		if (time_uptime - ntohl(bus->endtime) >=
 		    sc->sc_ureq_sent) {
 			/* that's it, we're happy */
 			sc->sc_ureq_sent = 0;
 			sc->sc_bulk_tries = 0;
 			callout_stop(&sc->sc_bulkfail_tmo);
 			if (!(sc->sc_flags & PFSYNCF_OK) && carp_demote_adj_p)
 				(*carp_demote_adj_p)(-V_pfsync_carp_adj,
 				    "pfsync bulk done");
 			sc->sc_flags |= PFSYNCF_OK;
 			if (V_pf_status.debug >= PF_DEBUG_MISC)
 				printf("pfsync: received valid "
 				    "bulk update end\n");
 		} else {
 			if (V_pf_status.debug >= PF_DEBUG_MISC)
 				printf("pfsync: received invalid "
 				    "bulk update end: bad timestamp\n");
 		}
 		break;
 	}
 	PFSYNC_BUNLOCK(sc);
 
 	return (len);
 }
 
 static int
 pfsync_in_tdb(struct pfsync_pkt *pkt, struct mbuf *m, int offset, int count)
 {
 	int len = count * sizeof(struct pfsync_tdb);
 
 #if defined(IPSEC)
 	struct pfsync_tdb *tp;
 	struct mbuf *mp;
 	int offp;
 	int i;
 	int s;
 
 	mp = m_pulldown(m, offset, len, &offp);
 	if (mp == NULL) {
 		V_pfsyncstats.pfsyncs_badlen++;
 		return (-1);
 	}
 	tp = (struct pfsync_tdb *)(mp->m_data + offp);
 
 	for (i = 0; i < count; i++)
 		pfsync_update_net_tdb(&tp[i]);
 #endif
 
 	return (len);
 }
 
 #if defined(IPSEC)
 /* Update an in-kernel tdb. Silently fail if no tdb is found. */
 static void
 pfsync_update_net_tdb(struct pfsync_tdb *pt)
 {
 	struct tdb		*tdb;
 	int			 s;
 
 	/* check for invalid values */
 	if (ntohl(pt->spi) <= SPI_RESERVED_MAX ||
 	    (pt->dst.sa.sa_family != AF_INET &&
 	    pt->dst.sa.sa_family != AF_INET6))
 		goto bad;
 
 	tdb = gettdb(pt->spi, &pt->dst, pt->sproto);
 	if (tdb) {
 		pt->rpl = ntohl(pt->rpl);
 		pt->cur_bytes = (unsigned long long)be64toh(pt->cur_bytes);
 
 		/* Neither replay nor byte counter should ever decrease. */
 		if (pt->rpl < tdb->tdb_rpl ||
 		    pt->cur_bytes < tdb->tdb_cur_bytes) {
 			goto bad;
 		}
 
 		tdb->tdb_rpl = pt->rpl;
 		tdb->tdb_cur_bytes = pt->cur_bytes;
 	}
 	return;
 
 bad:
 	if (V_pf_status.debug >= PF_DEBUG_MISC)
 		printf("pfsync_insert: PFSYNC_ACT_TDB_UPD: "
 		    "invalid value\n");
 	V_pfsyncstats.pfsyncs_badstate++;
 	return;
 }
 #endif
 
 
 static int
 pfsync_in_eof(struct pfsync_pkt *pkt, struct mbuf *m, int offset, int count)
 {
 	/* check if we are at the right place in the packet */
 	if (offset != m->m_pkthdr.len)
 		V_pfsyncstats.pfsyncs_badlen++;
 
 	/* we're done. free and let the caller return */
 	m_freem(m);
 	return (-1);
 }
 
 static int
 pfsync_in_error(struct pfsync_pkt *pkt, struct mbuf *m, int offset, int count)
 {
 	V_pfsyncstats.pfsyncs_badact++;
 
 	m_freem(m);
 	return (-1);
 }
 
 static int
 pfsyncoutput(struct ifnet *ifp, struct mbuf *m, const struct sockaddr *dst,
 	struct route *rt)
 {
 	m_freem(m);
 	return (0);
 }
 
 /* ARGSUSED */
 static int
 pfsyncioctl(struct ifnet *ifp, u_long cmd, caddr_t data)
 {
 	struct pfsync_softc *sc = ifp->if_softc;
 	struct ifreq *ifr = (struct ifreq *)data;
 	struct pfsyncreq pfsyncr;
 	int error;
 
 	switch (cmd) {
 	case SIOCSIFFLAGS:
 		PFSYNC_LOCK(sc);
 		if (ifp->if_flags & IFF_UP) {
 			ifp->if_drv_flags |= IFF_DRV_RUNNING;
 			PFSYNC_UNLOCK(sc);
 			pfsync_pointers_init();
 		} else {
 			ifp->if_drv_flags &= ~IFF_DRV_RUNNING;
 			PFSYNC_UNLOCK(sc);
 			pfsync_pointers_uninit();
 		}
 		break;
 	case SIOCSIFMTU:
 		if (!sc->sc_sync_if ||
 		    ifr->ifr_mtu <= PFSYNC_MINPKT ||
 		    ifr->ifr_mtu > sc->sc_sync_if->if_mtu)
 			return (EINVAL);
 		if (ifr->ifr_mtu < ifp->if_mtu) {
 			PFSYNC_LOCK(sc);
 			if (sc->sc_len > PFSYNC_MINPKT)
 				pfsync_sendout(1);
 			PFSYNC_UNLOCK(sc);
 		}
 		ifp->if_mtu = ifr->ifr_mtu;
 		break;
 	case SIOCGETPFSYNC:
 		bzero(&pfsyncr, sizeof(pfsyncr));
 		PFSYNC_LOCK(sc);
 		if (sc->sc_sync_if) {
 			strlcpy(pfsyncr.pfsyncr_syncdev,
 			    sc->sc_sync_if->if_xname, IFNAMSIZ);
 		}
 		pfsyncr.pfsyncr_syncpeer = sc->sc_sync_peer;
 		pfsyncr.pfsyncr_maxupdates = sc->sc_maxupdates;
 		pfsyncr.pfsyncr_defer = (PFSYNCF_DEFER ==
 		    (sc->sc_flags & PFSYNCF_DEFER));
 		PFSYNC_UNLOCK(sc);
 		return (copyout(&pfsyncr, ifr->ifr_data, sizeof(pfsyncr)));
 
 	case SIOCSETPFSYNC:
 	    {
 		struct ip_moptions *imo = &sc->sc_imo;
 		struct ifnet *sifp;
 		struct ip *ip;
 		void *mship = NULL;
 
 		if ((error = priv_check(curthread, PRIV_NETINET_PF)) != 0)
 			return (error);
 		if ((error = copyin(ifr->ifr_data, &pfsyncr, sizeof(pfsyncr))))
 			return (error);
 
 		if (pfsyncr.pfsyncr_maxupdates > 255)
 			return (EINVAL);
 
 		if (pfsyncr.pfsyncr_syncdev[0] == 0)
 			sifp = NULL;
 		else if ((sifp = ifunit_ref(pfsyncr.pfsyncr_syncdev)) == NULL)
 			return (EINVAL);
 
 		if (sifp != NULL && (
 		    pfsyncr.pfsyncr_syncpeer.s_addr == 0 ||
 		    pfsyncr.pfsyncr_syncpeer.s_addr ==
 		    htonl(INADDR_PFSYNC_GROUP)))
 			mship = malloc((sizeof(struct in_multi *) *
 			    IP_MIN_MEMBERSHIPS), M_PFSYNC, M_WAITOK | M_ZERO);
 
 		PFSYNC_LOCK(sc);
 		if (pfsyncr.pfsyncr_syncpeer.s_addr == 0)
 			sc->sc_sync_peer.s_addr = htonl(INADDR_PFSYNC_GROUP);
 		else
 			sc->sc_sync_peer.s_addr =
 			    pfsyncr.pfsyncr_syncpeer.s_addr;
 
 		sc->sc_maxupdates = pfsyncr.pfsyncr_maxupdates;
 		if (pfsyncr.pfsyncr_defer) {
 			sc->sc_flags |= PFSYNCF_DEFER;
 			pfsync_defer_ptr = pfsync_defer;
 		} else {
 			sc->sc_flags &= ~PFSYNCF_DEFER;
 			pfsync_defer_ptr = NULL;
 		}
 
 		if (sifp == NULL) {
 			if (sc->sc_sync_if)
 				if_rele(sc->sc_sync_if);
 			sc->sc_sync_if = NULL;
 			if (imo->imo_membership)
 				pfsync_multicast_cleanup(sc);
 			PFSYNC_UNLOCK(sc);
 			break;
 		}
 
 		if (sc->sc_len > PFSYNC_MINPKT &&
 		    (sifp->if_mtu < sc->sc_ifp->if_mtu ||
 		    (sc->sc_sync_if != NULL &&
 		    sifp->if_mtu < sc->sc_sync_if->if_mtu) ||
 		    sifp->if_mtu < MCLBYTES - sizeof(struct ip)))
 			pfsync_sendout(1);
 
 		if (imo->imo_membership)
 			pfsync_multicast_cleanup(sc);
 
 		if (sc->sc_sync_peer.s_addr == htonl(INADDR_PFSYNC_GROUP)) {
 			error = pfsync_multicast_setup(sc, sifp, mship);
 			if (error) {
 				if_rele(sifp);
 				free(mship, M_PFSYNC);
 				return (error);
 			}
 		}
 		if (sc->sc_sync_if)
 			if_rele(sc->sc_sync_if);
 		sc->sc_sync_if = sifp;
 
 		ip = &sc->sc_template;
 		bzero(ip, sizeof(*ip));
 		ip->ip_v = IPVERSION;
 		ip->ip_hl = sizeof(sc->sc_template) >> 2;
 		ip->ip_tos = IPTOS_LOWDELAY;
 		/* len and id are set later. */
 		ip->ip_off = htons(IP_DF);
 		ip->ip_ttl = PFSYNC_DFLTTL;
 		ip->ip_p = IPPROTO_PFSYNC;
 		ip->ip_src.s_addr = INADDR_ANY;
 		ip->ip_dst.s_addr = sc->sc_sync_peer.s_addr;
 
 		/* Request a full state table update. */
 		if ((sc->sc_flags & PFSYNCF_OK) && carp_demote_adj_p)
 			(*carp_demote_adj_p)(V_pfsync_carp_adj,
 			    "pfsync bulk start");
 		sc->sc_flags &= ~PFSYNCF_OK;
 		if (V_pf_status.debug >= PF_DEBUG_MISC)
 			printf("pfsync: requesting bulk update\n");
 		pfsync_request_update(0, 0);
 		PFSYNC_UNLOCK(sc);
 		PFSYNC_BLOCK(sc);
 		sc->sc_ureq_sent = time_uptime;
 		callout_reset(&sc->sc_bulkfail_tmo, 5 * hz, pfsync_bulk_fail,
 		    sc);
 		PFSYNC_BUNLOCK(sc);
 
 		break;
 	    }
 	default:
 		return (ENOTTY);
 	}
 
 	return (0);
 }
 
 static void
 pfsync_out_state(struct pf_state *st, void *buf)
 {
 	struct pfsync_state *sp = buf;
 
 	pfsync_state_export(sp, st);
 }
 
 static void
 pfsync_out_iack(struct pf_state *st, void *buf)
 {
 	struct pfsync_ins_ack *iack = buf;
 
 	iack->id = st->id;
 	iack->creatorid = st->creatorid;
 }
 
 static void
 pfsync_out_upd_c(struct pf_state *st, void *buf)
 {
 	struct pfsync_upd_c *up = buf;
 
 	bzero(up, sizeof(*up));
 	up->id = st->id;
 	pf_state_peer_hton(&st->src, &up->src);
 	pf_state_peer_hton(&st->dst, &up->dst);
 	up->creatorid = st->creatorid;
 	up->timeout = st->timeout;
 }
 
 static void
 pfsync_out_del(struct pf_state *st, void *buf)
 {
 	struct pfsync_del_c *dp = buf;
 
 	dp->id = st->id;
 	dp->creatorid = st->creatorid;
 	st->state_flags |= PFSTATE_NOSYNC;
 }
 
 static void
 pfsync_drop(struct pfsync_softc *sc)
 {
 	struct pf_state *st, *next;
 	struct pfsync_upd_req_item *ur;
 	int q;
 
 	for (q = 0; q < PFSYNC_S_COUNT; q++) {
 		if (TAILQ_EMPTY(&sc->sc_qs[q]))
 			continue;
 
 		TAILQ_FOREACH_SAFE(st, &sc->sc_qs[q], sync_list, next) {
 			KASSERT(st->sync_state == q,
 				("%s: st->sync_state == q",
 					__func__));
 			st->sync_state = PFSYNC_S_NONE;
 			pf_release_state(st);
 		}
 		TAILQ_INIT(&sc->sc_qs[q]);
 	}
 
 	while ((ur = TAILQ_FIRST(&sc->sc_upd_req_list)) != NULL) {
 		TAILQ_REMOVE(&sc->sc_upd_req_list, ur, ur_entry);
 		free(ur, M_PFSYNC);
 	}
 
 	sc->sc_plus = NULL;
 	sc->sc_len = PFSYNC_MINPKT;
 }
 
 static void
 pfsync_sendout(int schedswi)
 {
 	struct pfsync_softc *sc = V_pfsyncif;
 	struct ifnet *ifp = sc->sc_ifp;
 	struct mbuf *m;
 	struct ip *ip;
 	struct pfsync_header *ph;
 	struct pfsync_subheader *subh;
 	struct pf_state *st;
 	struct pfsync_upd_req_item *ur;
 	int offset;
 	int q, count = 0;
 
 	KASSERT(sc != NULL, ("%s: null sc", __func__));
 	KASSERT(sc->sc_len > PFSYNC_MINPKT,
 	    ("%s: sc_len %zu", __func__, sc->sc_len));
 	PFSYNC_LOCK_ASSERT(sc);
 
 	if (ifp->if_bpf == NULL && sc->sc_sync_if == NULL) {
 		pfsync_drop(sc);
 		return;
 	}
 
 	m = m_get2(max_linkhdr + sc->sc_len, M_NOWAIT, MT_DATA, M_PKTHDR);
 	if (m == NULL) {
 		if_inc_counter(sc->sc_ifp, IFCOUNTER_OERRORS, 1);
 		V_pfsyncstats.pfsyncs_onomem++;
 		return;
 	}
 	m->m_data += max_linkhdr;
 	m->m_len = m->m_pkthdr.len = sc->sc_len;
 
 	/* build the ip header */
 	ip = (struct ip *)m->m_data;
 	bcopy(&sc->sc_template, ip, sizeof(*ip));
 	offset = sizeof(*ip);
 
 	ip->ip_len = htons(m->m_pkthdr.len);
-	ip->ip_id = htons(ip_randomid());
+	ip_fillid(ip);
 
 	/* build the pfsync header */
 	ph = (struct pfsync_header *)(m->m_data + offset);
 	bzero(ph, sizeof(*ph));
 	offset += sizeof(*ph);
 
 	ph->version = PFSYNC_VERSION;
 	ph->len = htons(sc->sc_len - sizeof(*ip));
 	bcopy(V_pf_status.pf_chksum, ph->pfcksum, PF_MD5_DIGEST_LENGTH);
 
 	/* walk the queues */
 	for (q = 0; q < PFSYNC_S_COUNT; q++) {
 		if (TAILQ_EMPTY(&sc->sc_qs[q]))
 			continue;
 
 		subh = (struct pfsync_subheader *)(m->m_data + offset);
 		offset += sizeof(*subh);
 
 		count = 0;
 		TAILQ_FOREACH(st, &sc->sc_qs[q], sync_list) {
 			KASSERT(st->sync_state == q,
 				("%s: st->sync_state == q",
 					__func__));
 			/*
 			 * XXXGL: some of write methods do unlocked reads
 			 * of state data :(
 			 */
 			pfsync_qs[q].write(st, m->m_data + offset);
 			offset += pfsync_qs[q].len;
 			st->sync_state = PFSYNC_S_NONE;
 			pf_release_state(st);
 			count++;
 		}
 		TAILQ_INIT(&sc->sc_qs[q]);
 
 		bzero(subh, sizeof(*subh));
 		subh->action = pfsync_qs[q].action;
 		subh->count = htons(count);
 		V_pfsyncstats.pfsyncs_oacts[pfsync_qs[q].action] += count;
 	}
 
 	if (!TAILQ_EMPTY(&sc->sc_upd_req_list)) {
 		subh = (struct pfsync_subheader *)(m->m_data + offset);
 		offset += sizeof(*subh);
 
 		count = 0;
 		while ((ur = TAILQ_FIRST(&sc->sc_upd_req_list)) != NULL) {
 			TAILQ_REMOVE(&sc->sc_upd_req_list, ur, ur_entry);
 
 			bcopy(&ur->ur_msg, m->m_data + offset,
 			    sizeof(ur->ur_msg));
 			offset += sizeof(ur->ur_msg);
 			free(ur, M_PFSYNC);
 			count++;
 		}
 
 		bzero(subh, sizeof(*subh));
 		subh->action = PFSYNC_ACT_UPD_REQ;
 		subh->count = htons(count);
 		V_pfsyncstats.pfsyncs_oacts[PFSYNC_ACT_UPD_REQ] += count;
 	}
 
 	/* has someone built a custom region for us to add? */
 	if (sc->sc_plus != NULL) {
 		bcopy(sc->sc_plus, m->m_data + offset, sc->sc_pluslen);
 		offset += sc->sc_pluslen;
 
 		sc->sc_plus = NULL;
 	}
 
 	subh = (struct pfsync_subheader *)(m->m_data + offset);
 	offset += sizeof(*subh);
 
 	bzero(subh, sizeof(*subh));
 	subh->action = PFSYNC_ACT_EOF;
 	subh->count = htons(1);
 	V_pfsyncstats.pfsyncs_oacts[PFSYNC_ACT_EOF]++;
 
 	/* we're done, let's put it on the wire */
 	if (ifp->if_bpf) {
 		m->m_data += sizeof(*ip);
 		m->m_len = m->m_pkthdr.len = sc->sc_len - sizeof(*ip);
 		BPF_MTAP(ifp, m);
 		m->m_data -= sizeof(*ip);
 		m->m_len = m->m_pkthdr.len = sc->sc_len;
 	}
 
 	if (sc->sc_sync_if == NULL) {
 		sc->sc_len = PFSYNC_MINPKT;
 		m_freem(m);
 		return;
 	}
 
 	if_inc_counter(sc->sc_ifp, IFCOUNTER_OPACKETS, 1);
 	if_inc_counter(sc->sc_ifp, IFCOUNTER_OBYTES, m->m_pkthdr.len);
 	sc->sc_len = PFSYNC_MINPKT;
 
 	if (!_IF_QFULL(&sc->sc_ifp->if_snd))
 		_IF_ENQUEUE(&sc->sc_ifp->if_snd, m);
 	else {
 		m_freem(m);
 		if_inc_counter(sc->sc_ifp, IFCOUNTER_OQDROPS, 1);
 	}
 	if (schedswi)
 		swi_sched(V_pfsync_swi_cookie, 0);
 }
 
 static void
 pfsync_insert_state(struct pf_state *st)
 {
 	struct pfsync_softc *sc = V_pfsyncif;
 
 	if (st->state_flags & PFSTATE_NOSYNC)
 		return;
 
 	if ((st->rule.ptr->rule_flag & PFRULE_NOSYNC) ||
 	    st->key[PF_SK_WIRE]->proto == IPPROTO_PFSYNC) {
 		st->state_flags |= PFSTATE_NOSYNC;
 		return;
 	}
 
 	KASSERT(st->sync_state == PFSYNC_S_NONE,
 		("%s: st->sync_state %u", __func__, st->sync_state));
 
 	PFSYNC_LOCK(sc);
 	if (sc->sc_len == PFSYNC_MINPKT)
 		callout_reset(&sc->sc_tmo, 1 * hz, pfsync_timeout, V_pfsyncif);
 
 	pfsync_q_ins(st, PFSYNC_S_INS);
 	PFSYNC_UNLOCK(sc);
 
 	st->sync_updates = 0;
 }
 
 static int
 pfsync_defer(struct pf_state *st, struct mbuf *m)
 {
 	struct pfsync_softc *sc = V_pfsyncif;
 	struct pfsync_deferral *pd;
 
 	if (m->m_flags & (M_BCAST|M_MCAST))
 		return (0);
 
 	PFSYNC_LOCK(sc);
 
 	if (sc == NULL || !(sc->sc_ifp->if_flags & IFF_DRV_RUNNING) ||
 	    !(sc->sc_flags & PFSYNCF_DEFER)) {
 		PFSYNC_UNLOCK(sc);
 		return (0);
 	}
 
 	 if (sc->sc_deferred >= 128)
 		pfsync_undefer(TAILQ_FIRST(&sc->sc_deferrals), 0);
 
 	pd = malloc(sizeof(*pd), M_PFSYNC, M_NOWAIT);
 	if (pd == NULL)
 		return (0);
 	sc->sc_deferred++;
 
 	m->m_flags |= M_SKIP_FIREWALL;
 	st->state_flags |= PFSTATE_ACK;
 
 	pd->pd_sc = sc;
 	pd->pd_refs = 0;
 	pd->pd_st = st;
 	pf_ref_state(st);
 	pd->pd_m = m;
 
 	TAILQ_INSERT_TAIL(&sc->sc_deferrals, pd, pd_entry);
 	callout_init_mtx(&pd->pd_tmo, &sc->sc_mtx, CALLOUT_RETURNUNLOCKED);
 	callout_reset(&pd->pd_tmo, 10, pfsync_defer_tmo, pd);
 
 	pfsync_push(sc);
 
 	return (1);
 }
 
 static void
 pfsync_undefer(struct pfsync_deferral *pd, int drop)
 {
 	struct pfsync_softc *sc = pd->pd_sc;
 	struct mbuf *m = pd->pd_m;
 	struct pf_state *st = pd->pd_st;
 
 	PFSYNC_LOCK_ASSERT(sc);
 
 	TAILQ_REMOVE(&sc->sc_deferrals, pd, pd_entry);
 	sc->sc_deferred--;
 	pd->pd_st->state_flags &= ~PFSTATE_ACK;	/* XXX: locking! */
 	free(pd, M_PFSYNC);
 	pf_release_state(st);
 
 	if (drop)
 		m_freem(m);
 	else {
 		_IF_ENQUEUE(&sc->sc_ifp->if_snd, m);
 		pfsync_push(sc);
 	}
 }
 
 static void
 pfsync_defer_tmo(void *arg)
 {
 	struct pfsync_deferral *pd = arg;
 	struct pfsync_softc *sc = pd->pd_sc;
 	struct mbuf *m = pd->pd_m;
 	struct pf_state *st = pd->pd_st;
 
 	PFSYNC_LOCK_ASSERT(sc);
 
 	CURVNET_SET(m->m_pkthdr.rcvif->if_vnet);
 
 	TAILQ_REMOVE(&sc->sc_deferrals, pd, pd_entry);
 	sc->sc_deferred--;
 	pd->pd_st->state_flags &= ~PFSTATE_ACK;	/* XXX: locking! */
 	if (pd->pd_refs == 0)
 		free(pd, M_PFSYNC);
 	PFSYNC_UNLOCK(sc);
 
 	ip_output(m, NULL, NULL, 0, NULL, NULL);
 
 	pf_release_state(st);
 
 	CURVNET_RESTORE();
 }
 
 static void
 pfsync_undefer_state(struct pf_state *st, int drop)
 {
 	struct pfsync_softc *sc = V_pfsyncif;
 	struct pfsync_deferral *pd;
 
 	PFSYNC_LOCK_ASSERT(sc);
 
 	TAILQ_FOREACH(pd, &sc->sc_deferrals, pd_entry) {
 		 if (pd->pd_st == st) {
 			if (callout_stop(&pd->pd_tmo))
 				pfsync_undefer(pd, drop);
 			return;
 		}
 	}
 
 	panic("%s: unable to find deferred state", __func__);
 }
 
 static void
 pfsync_update_state(struct pf_state *st)
 {
 	struct pfsync_softc *sc = V_pfsyncif;
 	int sync = 0;
 
 	PF_STATE_LOCK_ASSERT(st);
 	PFSYNC_LOCK(sc);
 
 	if (st->state_flags & PFSTATE_ACK)
 		pfsync_undefer_state(st, 0);
 	if (st->state_flags & PFSTATE_NOSYNC) {
 		if (st->sync_state != PFSYNC_S_NONE)
 			pfsync_q_del(st);
 		PFSYNC_UNLOCK(sc);
 		return;
 	}
 
 	if (sc->sc_len == PFSYNC_MINPKT)
 		callout_reset(&sc->sc_tmo, 1 * hz, pfsync_timeout, V_pfsyncif);
 
 	switch (st->sync_state) {
 	case PFSYNC_S_UPD_C:
 	case PFSYNC_S_UPD:
 	case PFSYNC_S_INS:
 		/* we're already handling it */
 
 		if (st->key[PF_SK_WIRE]->proto == IPPROTO_TCP) {
 			st->sync_updates++;
 			if (st->sync_updates >= sc->sc_maxupdates)
 				sync = 1;
 		}
 		break;
 
 	case PFSYNC_S_IACK:
 		pfsync_q_del(st);
 	case PFSYNC_S_NONE:
 		pfsync_q_ins(st, PFSYNC_S_UPD_C);
 		st->sync_updates = 0;
 		break;
 
 	default:
 		panic("%s: unexpected sync state %d", __func__, st->sync_state);
 	}
 
 	if (sync || (time_uptime - st->pfsync_time) < 2)
 		pfsync_push(sc);
 
 	PFSYNC_UNLOCK(sc);
 }
 
 static void
 pfsync_request_update(u_int32_t creatorid, u_int64_t id)
 {
 	struct pfsync_softc *sc = V_pfsyncif;
 	struct pfsync_upd_req_item *item;
 	size_t nlen = sizeof(struct pfsync_upd_req);
 
 	PFSYNC_LOCK_ASSERT(sc);
 
 	/*
 	 * This code does a bit to prevent multiple update requests for the
 	 * same state being generated. It searches current subheader queue,
 	 * but it doesn't lookup into queue of already packed datagrams.
 	 */
 	TAILQ_FOREACH(item, &sc->sc_upd_req_list, ur_entry)
 		if (item->ur_msg.id == id &&
 		    item->ur_msg.creatorid == creatorid)
 			return;
 
 	item = malloc(sizeof(*item), M_PFSYNC, M_NOWAIT);
 	if (item == NULL)
 		return; /* XXX stats */
 
 	item->ur_msg.id = id;
 	item->ur_msg.creatorid = creatorid;
 
 	if (TAILQ_EMPTY(&sc->sc_upd_req_list))
 		nlen += sizeof(struct pfsync_subheader);
 
 	if (sc->sc_len + nlen > sc->sc_ifp->if_mtu) {
 		pfsync_sendout(1);
 
 		nlen = sizeof(struct pfsync_subheader) +
 		    sizeof(struct pfsync_upd_req);
 	}
 
 	TAILQ_INSERT_TAIL(&sc->sc_upd_req_list, item, ur_entry);
 	sc->sc_len += nlen;
 }
 
 static void
 pfsync_update_state_req(struct pf_state *st)
 {
 	struct pfsync_softc *sc = V_pfsyncif;
 
 	PF_STATE_LOCK_ASSERT(st);
 	PFSYNC_LOCK(sc);
 
 	if (st->state_flags & PFSTATE_NOSYNC) {
 		if (st->sync_state != PFSYNC_S_NONE)
 			pfsync_q_del(st);
 		PFSYNC_UNLOCK(sc);
 		return;
 	}
 
 	switch (st->sync_state) {
 	case PFSYNC_S_UPD_C:
 	case PFSYNC_S_IACK:
 		pfsync_q_del(st);
 	case PFSYNC_S_NONE:
 		pfsync_q_ins(st, PFSYNC_S_UPD);
 		pfsync_push(sc);
 		break;
 
 	case PFSYNC_S_INS:
 	case PFSYNC_S_UPD:
 	case PFSYNC_S_DEL:
 		/* we're already handling it */
 		break;
 
 	default:
 		panic("%s: unexpected sync state %d", __func__, st->sync_state);
 	}
 
 	PFSYNC_UNLOCK(sc);
 }
 
 static void
 pfsync_delete_state(struct pf_state *st)
 {
 	struct pfsync_softc *sc = V_pfsyncif;
 
 	PFSYNC_LOCK(sc);
 	if (st->state_flags & PFSTATE_ACK)
 		pfsync_undefer_state(st, 1);
 	if (st->state_flags & PFSTATE_NOSYNC) {
 		if (st->sync_state != PFSYNC_S_NONE)
 			pfsync_q_del(st);
 		PFSYNC_UNLOCK(sc);
 		return;
 	}
 
 	if (sc->sc_len == PFSYNC_MINPKT)
 		callout_reset(&sc->sc_tmo, 1 * hz, pfsync_timeout, V_pfsyncif);
 
 	switch (st->sync_state) {
 	case PFSYNC_S_INS:
 		/* We never got to tell the world so just forget about it. */
 		pfsync_q_del(st);
 		break;
 
 	case PFSYNC_S_UPD_C:
 	case PFSYNC_S_UPD:
 	case PFSYNC_S_IACK:
 		pfsync_q_del(st);
 		/* FALLTHROUGH to putting it on the del list */
 
 	case PFSYNC_S_NONE:
 		pfsync_q_ins(st, PFSYNC_S_DEL);
 		break;
 
 	default:
 		panic("%s: unexpected sync state %d", __func__, st->sync_state);
 	}
 	PFSYNC_UNLOCK(sc);
 }
 
 static void
 pfsync_clear_states(u_int32_t creatorid, const char *ifname)
 {
 	struct pfsync_softc *sc = V_pfsyncif;
 	struct {
 		struct pfsync_subheader subh;
 		struct pfsync_clr clr;
 	} __packed r;
 
 	bzero(&r, sizeof(r));
 
 	r.subh.action = PFSYNC_ACT_CLR;
 	r.subh.count = htons(1);
 	V_pfsyncstats.pfsyncs_oacts[PFSYNC_ACT_CLR]++;
 
 	strlcpy(r.clr.ifname, ifname, sizeof(r.clr.ifname));
 	r.clr.creatorid = creatorid;
 
 	PFSYNC_LOCK(sc);
 	pfsync_send_plus(&r, sizeof(r));
 	PFSYNC_UNLOCK(sc);
 }
 
 static void
 pfsync_q_ins(struct pf_state *st, int q)
 {
 	struct pfsync_softc *sc = V_pfsyncif;
 	size_t nlen = pfsync_qs[q].len;
 
 	PFSYNC_LOCK_ASSERT(sc);
 
 	KASSERT(st->sync_state == PFSYNC_S_NONE,
 		("%s: st->sync_state %u", __func__, st->sync_state));
 	KASSERT(sc->sc_len >= PFSYNC_MINPKT, ("pfsync pkt len is too low %zu",
 	    sc->sc_len));
 
 	if (TAILQ_EMPTY(&sc->sc_qs[q]))
 		nlen += sizeof(struct pfsync_subheader);
 
 	if (sc->sc_len + nlen > sc->sc_ifp->if_mtu) {
 		pfsync_sendout(1);
 
 		nlen = sizeof(struct pfsync_subheader) + pfsync_qs[q].len;
 	}
 
 	sc->sc_len += nlen;
 	TAILQ_INSERT_TAIL(&sc->sc_qs[q], st, sync_list);
 	st->sync_state = q;
 	pf_ref_state(st);
 }
 
 static void
 pfsync_q_del(struct pf_state *st)
 {
 	struct pfsync_softc *sc = V_pfsyncif;
 	int q = st->sync_state;
 
 	PFSYNC_LOCK_ASSERT(sc);
 	KASSERT(st->sync_state != PFSYNC_S_NONE,
 		("%s: st->sync_state != PFSYNC_S_NONE", __func__));
 
 	sc->sc_len -= pfsync_qs[q].len;
 	TAILQ_REMOVE(&sc->sc_qs[q], st, sync_list);
 	st->sync_state = PFSYNC_S_NONE;
 	pf_release_state(st);
 
 	if (TAILQ_EMPTY(&sc->sc_qs[q]))
 		sc->sc_len -= sizeof(struct pfsync_subheader);
 }
 
 static void
 pfsync_bulk_start(void)
 {
 	struct pfsync_softc *sc = V_pfsyncif;
 
 	if (V_pf_status.debug >= PF_DEBUG_MISC)
 		printf("pfsync: received bulk update request\n");
 
 	PFSYNC_BLOCK(sc);
 
 	sc->sc_ureq_received = time_uptime;
 	sc->sc_bulk_hashid = 0;
 	sc->sc_bulk_stateid = 0;
 	pfsync_bulk_status(PFSYNC_BUS_START);
 	callout_reset(&sc->sc_bulk_tmo, 1, pfsync_bulk_update, sc);
 	PFSYNC_BUNLOCK(sc);
 }
 
 static void
 pfsync_bulk_update(void *arg)
 {
 	struct pfsync_softc *sc = arg;
 	struct pf_state *s;
 	int i, sent = 0;
 
 	PFSYNC_BLOCK_ASSERT(sc);
 	CURVNET_SET(sc->sc_ifp->if_vnet);
 
 	/*
 	 * Start with last state from previous invocation.
 	 * It may had gone, in this case start from the
 	 * hash slot.
 	 */
 	s = pf_find_state_byid(sc->sc_bulk_stateid, sc->sc_bulk_creatorid);
 
 	if (s != NULL)
 		i = PF_IDHASH(s);
 	else
 		i = sc->sc_bulk_hashid;
 
 	for (; i <= pf_hashmask; i++) {
 		struct pf_idhash *ih = &V_pf_idhash[i];
 
 		if (s != NULL)
 			PF_HASHROW_ASSERT(ih);
 		else {
 			PF_HASHROW_LOCK(ih);
 			s = LIST_FIRST(&ih->states);
 		}
 
 		for (; s; s = LIST_NEXT(s, entry)) {
 
 			if (sent > 1 && (sc->sc_ifp->if_mtu - sc->sc_len) <
 			    sizeof(struct pfsync_state)) {
 				/* We've filled a packet. */
 				sc->sc_bulk_hashid = i;
 				sc->sc_bulk_stateid = s->id;
 				sc->sc_bulk_creatorid = s->creatorid;
 				PF_HASHROW_UNLOCK(ih);
 				callout_reset(&sc->sc_bulk_tmo, 1,
 				    pfsync_bulk_update, sc);
 				goto full;
 			}
 
 			if (s->sync_state == PFSYNC_S_NONE &&
 			    s->timeout < PFTM_MAX &&
 			    s->pfsync_time <= sc->sc_ureq_received) {
 				pfsync_update_state_req(s);
 				sent++;
 			}
 		}
 		PF_HASHROW_UNLOCK(ih);
 	}
 
 	/* We're done. */
 	pfsync_bulk_status(PFSYNC_BUS_END);
 
 full:
 	CURVNET_RESTORE();
 }
 
 static void
 pfsync_bulk_status(u_int8_t status)
 {
 	struct {
 		struct pfsync_subheader subh;
 		struct pfsync_bus bus;
 	} __packed r;
 
 	struct pfsync_softc *sc = V_pfsyncif;
 
 	bzero(&r, sizeof(r));
 
 	r.subh.action = PFSYNC_ACT_BUS;
 	r.subh.count = htons(1);
 	V_pfsyncstats.pfsyncs_oacts[PFSYNC_ACT_BUS]++;
 
 	r.bus.creatorid = V_pf_status.hostid;
 	r.bus.endtime = htonl(time_uptime - sc->sc_ureq_received);
 	r.bus.status = status;
 
 	PFSYNC_LOCK(sc);
 	pfsync_send_plus(&r, sizeof(r));
 	PFSYNC_UNLOCK(sc);
 }
 
 static void
 pfsync_bulk_fail(void *arg)
 {
 	struct pfsync_softc *sc = arg;
 
 	CURVNET_SET(sc->sc_ifp->if_vnet);
 
 	PFSYNC_BLOCK_ASSERT(sc);
 
 	if (sc->sc_bulk_tries++ < PFSYNC_MAX_BULKTRIES) {
 		/* Try again */
 		callout_reset(&sc->sc_bulkfail_tmo, 5 * hz,
 		    pfsync_bulk_fail, V_pfsyncif);
 		PFSYNC_LOCK(sc);
 		pfsync_request_update(0, 0);
 		PFSYNC_UNLOCK(sc);
 	} else {
 		/* Pretend like the transfer was ok. */
 		sc->sc_ureq_sent = 0;
 		sc->sc_bulk_tries = 0;
 		PFSYNC_LOCK(sc);
 		if (!(sc->sc_flags & PFSYNCF_OK) && carp_demote_adj_p)
 			(*carp_demote_adj_p)(-V_pfsync_carp_adj,
 			    "pfsync bulk fail");
 		sc->sc_flags |= PFSYNCF_OK;
 		PFSYNC_UNLOCK(sc);
 		if (V_pf_status.debug >= PF_DEBUG_MISC)
 			printf("pfsync: failed to receive bulk update\n");
 	}
 
 	CURVNET_RESTORE();
 }
 
 static void
 pfsync_send_plus(void *plus, size_t pluslen)
 {
 	struct pfsync_softc *sc = V_pfsyncif;
 
 	PFSYNC_LOCK_ASSERT(sc);
 
 	if (sc->sc_len + pluslen > sc->sc_ifp->if_mtu)
 		pfsync_sendout(1);
 
 	sc->sc_plus = plus;
 	sc->sc_len += (sc->sc_pluslen = pluslen);
 
 	pfsync_sendout(1);
 }
 
 static void
 pfsync_timeout(void *arg)
 {
 	struct pfsync_softc *sc = arg;
 
 	CURVNET_SET(sc->sc_ifp->if_vnet);
 	PFSYNC_LOCK(sc);
 	pfsync_push(sc);
 	PFSYNC_UNLOCK(sc);
 	CURVNET_RESTORE();
 }
 
 static void
 pfsync_push(struct pfsync_softc *sc)
 {
 
 	PFSYNC_LOCK_ASSERT(sc);
 
 	sc->sc_flags |= PFSYNCF_PUSH;
 	swi_sched(V_pfsync_swi_cookie, 0);
 }
 
 static void
 pfsyncintr(void *arg)
 {
 	struct pfsync_softc *sc = arg;
 	struct mbuf *m, *n;
 
 	CURVNET_SET(sc->sc_ifp->if_vnet);
 
 	PFSYNC_LOCK(sc);
 	if ((sc->sc_flags & PFSYNCF_PUSH) && sc->sc_len > PFSYNC_MINPKT) {
 		pfsync_sendout(0);
 		sc->sc_flags &= ~PFSYNCF_PUSH;
 	}
 	_IF_DEQUEUE_ALL(&sc->sc_ifp->if_snd, m);
 	PFSYNC_UNLOCK(sc);
 
 	for (; m != NULL; m = n) {
 
 		n = m->m_nextpkt;
 		m->m_nextpkt = NULL;
 
 		/*
 		 * We distinguish between a deferral packet and our
 		 * own pfsync packet based on M_SKIP_FIREWALL
 		 * flag. This is XXX.
 		 */
 		if (m->m_flags & M_SKIP_FIREWALL)
 			ip_output(m, NULL, NULL, 0, NULL, NULL);
 		else if (ip_output(m, NULL, NULL, IP_RAWOUTPUT, &sc->sc_imo,
 		    NULL) == 0)
 			V_pfsyncstats.pfsyncs_opackets++;
 		else
 			V_pfsyncstats.pfsyncs_oerrors++;
 	}
 	CURVNET_RESTORE();
 }
 
 static int
 pfsync_multicast_setup(struct pfsync_softc *sc, struct ifnet *ifp, void *mship)
 {
 	struct ip_moptions *imo = &sc->sc_imo;
 	int error;
 
 	if (!(ifp->if_flags & IFF_MULTICAST))
 		return (EADDRNOTAVAIL);
 
 	imo->imo_membership = (struct in_multi **)mship;
 	imo->imo_max_memberships = IP_MIN_MEMBERSHIPS;
 	imo->imo_multicast_vif = -1;
 
 	if ((error = in_joingroup(ifp, &sc->sc_sync_peer, NULL,
 	    &imo->imo_membership[0])) != 0) {
 		imo->imo_membership = NULL;
 		return (error);
 	}
 	imo->imo_num_memberships++;
 	imo->imo_multicast_ifp = ifp;
 	imo->imo_multicast_ttl = PFSYNC_DFLTTL;
 	imo->imo_multicast_loop = 0;
 
 	return (0);
 }
 
 static void
 pfsync_multicast_cleanup(struct pfsync_softc *sc)
 {
 	struct ip_moptions *imo = &sc->sc_imo;
 
 	in_leavegroup(imo->imo_membership[0], NULL);
 	free(imo->imo_membership, M_PFSYNC);
 	imo->imo_membership = NULL;
 	imo->imo_multicast_ifp = NULL;
 }
 
 #ifdef INET
 extern  struct domain inetdomain;
 static struct protosw in_pfsync_protosw = {
 	.pr_type =		SOCK_RAW,
 	.pr_domain =		&inetdomain,
 	.pr_protocol =		IPPROTO_PFSYNC,
 	.pr_flags =		PR_ATOMIC|PR_ADDR,
 	.pr_input =		pfsync_input,
 	.pr_output =		rip_output,
 	.pr_ctloutput =		rip_ctloutput,
 	.pr_usrreqs =		&rip_usrreqs
 };
 #endif
 
 static void
 pfsync_pointers_init()
 {
 
 	PF_RULES_WLOCK();
 	pfsync_state_import_ptr = pfsync_state_import;
 	pfsync_insert_state_ptr = pfsync_insert_state;
 	pfsync_update_state_ptr = pfsync_update_state;
 	pfsync_delete_state_ptr = pfsync_delete_state;
 	pfsync_clear_states_ptr = pfsync_clear_states;
 	pfsync_defer_ptr = pfsync_defer;
 	PF_RULES_WUNLOCK();
 }
 
 static void
 pfsync_pointers_uninit()
 {
 
 	PF_RULES_WLOCK();
 	pfsync_state_import_ptr = NULL;
 	pfsync_insert_state_ptr = NULL;
 	pfsync_update_state_ptr = NULL;
 	pfsync_delete_state_ptr = NULL;
 	pfsync_clear_states_ptr = NULL;
 	pfsync_defer_ptr = NULL;
 	PF_RULES_WUNLOCK();
 }
 
 static int
 pfsync_init()
 {
 	VNET_ITERATOR_DECL(vnet_iter);
 	int error = 0;
 
 	VNET_LIST_RLOCK();
 	VNET_FOREACH(vnet_iter) {
 		CURVNET_SET(vnet_iter);
 		V_pfsync_cloner = if_clone_simple(pfsyncname,
 		    pfsync_clone_create, pfsync_clone_destroy, 1);
 		error = swi_add(NULL, pfsyncname, pfsyncintr, V_pfsyncif,
 		    SWI_NET, INTR_MPSAFE, &V_pfsync_swi_cookie);
 		CURVNET_RESTORE();
 		if (error)
 			goto fail_locked;
 	}
 	VNET_LIST_RUNLOCK();
 #ifdef INET
 	error = pf_proto_register(PF_INET, &in_pfsync_protosw);
 	if (error)
 		goto fail;
 	error = ipproto_register(IPPROTO_PFSYNC);
 	if (error) {
 		pf_proto_unregister(PF_INET, IPPROTO_PFSYNC, SOCK_RAW);
 		goto fail;
 	}
 #endif
 	pfsync_pointers_init();
 
 	return (0);
 
 fail:
 	VNET_LIST_RLOCK();
 fail_locked:
 	VNET_FOREACH(vnet_iter) {
 		CURVNET_SET(vnet_iter);
 		if (V_pfsync_swi_cookie) {
 			swi_remove(V_pfsync_swi_cookie);
 			if_clone_detach(V_pfsync_cloner);
 		}
 		CURVNET_RESTORE();
 	}
 	VNET_LIST_RUNLOCK();
 
 	return (error);
 }
 
 static void
 pfsync_uninit()
 {
 	VNET_ITERATOR_DECL(vnet_iter);
 
 	pfsync_pointers_uninit();
 
 	ipproto_unregister(IPPROTO_PFSYNC);
 	pf_proto_unregister(PF_INET, IPPROTO_PFSYNC, SOCK_RAW);
 	VNET_LIST_RLOCK();
 	VNET_FOREACH(vnet_iter) {
 		CURVNET_SET(vnet_iter);
 		if_clone_detach(V_pfsync_cloner);
 		swi_remove(V_pfsync_swi_cookie);
 		CURVNET_RESTORE();
 	}
 	VNET_LIST_RUNLOCK();
 }
 
 static int
 pfsync_modevent(module_t mod, int type, void *data)
 {
 	int error = 0;
 
 	switch (type) {
 	case MOD_LOAD:
 		error = pfsync_init();
 		break;
 	case MOD_QUIESCE:
 		/*
 		 * Module should not be unloaded due to race conditions.
 		 */
 		error = EBUSY;
 		break;
 	case MOD_UNLOAD:
 		pfsync_uninit();
 		break;
 	default:
 		error = EINVAL;
 		break;
 	}
 
 	return (error);
 }
 
 static moduledata_t pfsync_mod = {
 	pfsyncname,
 	pfsync_modevent,
 	0
 };
 
 #define PFSYNC_MODVER 1
 
 DECLARE_MODULE(pfsync, pfsync_mod, SI_SUB_PROTO_DOMAIN, SI_ORDER_ANY);
 MODULE_VERSION(pfsync, PFSYNC_MODVER);
 MODULE_DEPEND(pfsync, pf, PF_MODVER, PF_MODVER, PF_MODVER);
Index: head/sys/netpfil/pf/pf_norm.c
===================================================================
--- head/sys/netpfil/pf/pf_norm.c	(revision 280970)
+++ head/sys/netpfil/pf/pf_norm.c	(revision 280971)
@@ -1,2293 +1,2293 @@
 /*-
  * Copyright 2001 Niels Provos <provos@citi.umich.edu>
  * Copyright 2011 Alexander Bluhm <bluhm@openbsd.org>
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  *
  *	$OpenBSD: pf_norm.c,v 1.114 2009/01/29 14:11:45 henning Exp $
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include "opt_inet.h"
 #include "opt_inet6.h"
 #include "opt_pf.h"
 
 #include <sys/param.h>
 #include <sys/lock.h>
 #include <sys/mbuf.h>
 #include <sys/mutex.h>
 #include <sys/refcount.h>
 #include <sys/rwlock.h>
 #include <sys/socket.h>
 
 #include <net/if.h>
 #include <net/vnet.h>
 #include <net/pfvar.h>
 #include <net/if_pflog.h>
 
 #include <netinet/in.h>
 #include <netinet/ip.h>
 #include <netinet/ip_var.h>
 #include <netinet6/ip6_var.h>
 #include <netinet/tcp.h>
 #include <netinet/tcp_fsm.h>
 #include <netinet/tcp_seq.h>
 
 #ifdef INET6
 #include <netinet/ip6.h>
 #endif /* INET6 */
 
 struct pf_frent {
 	TAILQ_ENTRY(pf_frent)	fr_next;
 	struct mbuf	*fe_m;
 	uint16_t	fe_hdrlen;	/* ipv4 header lenght with ip options
 					   ipv6, extension, fragment header */
 	uint16_t	fe_extoff;	/* last extension header offset or 0 */
 	uint16_t	fe_len;		/* fragment length */
 	uint16_t	fe_off;		/* fragment offset */
 	uint16_t	fe_mff;		/* more fragment flag */
 };
 
 struct pf_fragment_cmp {
 	struct pf_addr	frc_src;
 	struct pf_addr	frc_dst;
 	uint32_t	frc_id;
 	sa_family_t	frc_af;
 	uint8_t		frc_proto;
 	uint8_t		frc_direction;
 };
 
 struct pf_fragment {
 	struct pf_fragment_cmp	fr_key;
 #define fr_src	fr_key.frc_src
 #define fr_dst	fr_key.frc_dst
 #define fr_id	fr_key.frc_id
 #define fr_af	fr_key.frc_af
 #define fr_proto	fr_key.frc_proto
 #define fr_direction	fr_key.frc_direction
 
 	RB_ENTRY(pf_fragment) fr_entry;
 	TAILQ_ENTRY(pf_fragment) frag_next;
 	uint8_t		fr_flags;	/* status flags */
 #define PFFRAG_SEENLAST		0x0001	/* Seen the last fragment for this */
 #define PFFRAG_NOBUFFER		0x0002	/* Non-buffering fragment cache */
 #define PFFRAG_DROP		0x0004	/* Drop all fragments */
 #define BUFFER_FRAGMENTS(fr)	(!((fr)->fr_flags & PFFRAG_NOBUFFER))
 	uint16_t	fr_max;		/* fragment data max */
 	uint32_t	fr_timeout;
 	uint16_t	fr_maxlen;	/* maximum length of single fragment */
 	TAILQ_HEAD(pf_fragq, pf_frent) fr_queue;
 };
 
 struct pf_fragment_tag {
 	uint16_t	ft_hdrlen;	/* header length of reassembled pkt */
 	uint16_t	ft_extoff;	/* last extension header offset or 0 */
 	uint16_t	ft_maxlen;	/* maximum fragment payload length */
 	uint32_t	ft_id;		/* fragment id */
 };
 
 static struct mtx pf_frag_mtx;
 #define PF_FRAG_LOCK()		mtx_lock(&pf_frag_mtx)
 #define PF_FRAG_UNLOCK()	mtx_unlock(&pf_frag_mtx)
 #define PF_FRAG_ASSERT()	mtx_assert(&pf_frag_mtx, MA_OWNED)
 
 VNET_DEFINE(uma_zone_t, pf_state_scrub_z);	/* XXX: shared with pfsync */
 
 static VNET_DEFINE(uma_zone_t, pf_frent_z);
 #define	V_pf_frent_z	VNET(pf_frent_z)
 static VNET_DEFINE(uma_zone_t, pf_frag_z);
 #define	V_pf_frag_z	VNET(pf_frag_z)
 
 TAILQ_HEAD(pf_fragqueue, pf_fragment);
 TAILQ_HEAD(pf_cachequeue, pf_fragment);
 static VNET_DEFINE(struct pf_fragqueue,	pf_fragqueue);
 #define	V_pf_fragqueue			VNET(pf_fragqueue)
 static VNET_DEFINE(struct pf_cachequeue,	pf_cachequeue);
 #define	V_pf_cachequeue			VNET(pf_cachequeue)
 RB_HEAD(pf_frag_tree, pf_fragment);
 static VNET_DEFINE(struct pf_frag_tree,	pf_frag_tree);
 #define	V_pf_frag_tree			VNET(pf_frag_tree)
 static VNET_DEFINE(struct pf_frag_tree,	pf_cache_tree);
 #define	V_pf_cache_tree			VNET(pf_cache_tree)
 static int		 pf_frag_compare(struct pf_fragment *,
 			    struct pf_fragment *);
 static RB_PROTOTYPE(pf_frag_tree, pf_fragment, fr_entry, pf_frag_compare);
 static RB_GENERATE(pf_frag_tree, pf_fragment, fr_entry, pf_frag_compare);
 
 static void	pf_flush_fragments(void);
 static void	pf_free_fragment(struct pf_fragment *);
 static void	pf_remove_fragment(struct pf_fragment *);
 static int	pf_normalize_tcpopt(struct pf_rule *, struct mbuf *,
 		    struct tcphdr *, int, sa_family_t);
 static struct pf_frent *pf_create_fragment(u_short *);
 static struct pf_fragment *pf_find_fragment(struct pf_fragment_cmp *key,
 		    struct pf_frag_tree *tree);
 static struct pf_fragment *pf_fillup_fragment(struct pf_fragment_cmp *,
 		    struct pf_frent *, u_short *);
 static int	pf_isfull_fragment(struct pf_fragment *);
 static struct mbuf *pf_join_fragment(struct pf_fragment *);
 #ifdef INET
 static void	pf_scrub_ip(struct mbuf **, uint32_t, uint8_t, uint8_t);
 static int	pf_reassemble(struct mbuf **, struct ip *, int, u_short *);
 static struct mbuf *pf_fragcache(struct mbuf **, struct ip*,
 		    struct pf_fragment **, int, int, int *);
 #endif	/* INET */
 #ifdef INET6
 static int	pf_reassemble6(struct mbuf **, struct ip6_hdr *,
 		    struct ip6_frag *, uint16_t, uint16_t, int, u_short *);
 static void	pf_scrub_ip6(struct mbuf **, uint8_t);
 #endif	/* INET6 */
 
 #define	DPFPRINTF(x) do {				\
 	if (V_pf_status.debug >= PF_DEBUG_MISC) {	\
 		printf("%s: ", __func__);		\
 		printf x ;				\
 	}						\
 } while(0)
 
 #ifdef INET
 static void
 pf_ip2key(struct ip *ip, int dir, struct pf_fragment_cmp *key)
 {
 
 	key->frc_src.v4 = ip->ip_src;
 	key->frc_dst.v4 = ip->ip_dst;
 	key->frc_af = AF_INET;
 	key->frc_proto = ip->ip_p;
 	key->frc_id = ip->ip_id;
 	key->frc_direction = dir;
 }
 #endif	/* INET */
 
 void
 pf_normalize_init(void)
 {
 
 	V_pf_frag_z = uma_zcreate("pf frags", sizeof(struct pf_fragment),
 	    NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, 0);
 	V_pf_frent_z = uma_zcreate("pf frag entries", sizeof(struct pf_frent),
 	    NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, 0);
 	V_pf_state_scrub_z = uma_zcreate("pf state scrubs",
 	    sizeof(struct pf_state_scrub),  NULL, NULL, NULL, NULL,
 	    UMA_ALIGN_PTR, 0);
 
 	V_pf_limits[PF_LIMIT_FRAGS].zone = V_pf_frent_z;
 	V_pf_limits[PF_LIMIT_FRAGS].limit = PFFRAG_FRENT_HIWAT;
 	uma_zone_set_max(V_pf_frent_z, PFFRAG_FRENT_HIWAT);
 	uma_zone_set_warning(V_pf_frent_z, "PF frag entries limit reached");
 
 	mtx_init(&pf_frag_mtx, "pf fragments", NULL, MTX_DEF);
 
 	TAILQ_INIT(&V_pf_fragqueue);
 	TAILQ_INIT(&V_pf_cachequeue);
 }
 
 void
 pf_normalize_cleanup(void)
 {
 
 	uma_zdestroy(V_pf_state_scrub_z);
 	uma_zdestroy(V_pf_frent_z);
 	uma_zdestroy(V_pf_frag_z);
 
 	mtx_destroy(&pf_frag_mtx);
 }
 
 static int
 pf_frag_compare(struct pf_fragment *a, struct pf_fragment *b)
 {
 	int	diff;
 
 	if ((diff = a->fr_id - b->fr_id) != 0)
 		return (diff);
 	if ((diff = a->fr_proto - b->fr_proto) != 0)
 		return (diff);
 	if ((diff = a->fr_af - b->fr_af) != 0)
 		return (diff);
 	if ((diff = pf_addr_cmp(&a->fr_src, &b->fr_src, a->fr_af)) != 0)
 		return (diff);
 	if ((diff = pf_addr_cmp(&a->fr_dst, &b->fr_dst, a->fr_af)) != 0)
 		return (diff);
 	return (0);
 }
 
 void
 pf_purge_expired_fragments(void)
 {
 	struct pf_fragment	*frag;
 	u_int32_t		 expire = time_uptime -
 				    V_pf_default_rule.timeout[PFTM_FRAG];
 
 	PF_FRAG_LOCK();
 	while ((frag = TAILQ_LAST(&V_pf_fragqueue, pf_fragqueue)) != NULL) {
 		KASSERT((BUFFER_FRAGMENTS(frag)),
 		    ("BUFFER_FRAGMENTS(frag) == 0: %s", __FUNCTION__));
 		if (frag->fr_timeout > expire)
 			break;
 
 		DPFPRINTF(("expiring %d(%p)\n", frag->fr_id, frag));
 		pf_free_fragment(frag);
 	}
 
 	while ((frag = TAILQ_LAST(&V_pf_cachequeue, pf_cachequeue)) != NULL) {
 		KASSERT((!BUFFER_FRAGMENTS(frag)),
 		    ("BUFFER_FRAGMENTS(frag) != 0: %s", __FUNCTION__));
 		if (frag->fr_timeout > expire)
 			break;
 
 		DPFPRINTF(("expiring %d(%p)\n", frag->fr_id, frag));
 		pf_free_fragment(frag);
 		KASSERT((TAILQ_EMPTY(&V_pf_cachequeue) ||
 		    TAILQ_LAST(&V_pf_cachequeue, pf_cachequeue) != frag),
 		    ("!(TAILQ_EMPTY() || TAILQ_LAST() == farg): %s",
 		    __FUNCTION__));
 	}
 	PF_FRAG_UNLOCK();
 }
 
 /*
  * Try to flush old fragments to make space for new ones
  */
 static void
 pf_flush_fragments(void)
 {
 	struct pf_fragment	*frag, *cache;
 	int			 goal;
 
 	PF_FRAG_ASSERT();
 
 	goal = uma_zone_get_cur(V_pf_frent_z) * 9 / 10;
 	DPFPRINTF(("trying to free %d frag entriess\n", goal));
 	while (goal < uma_zone_get_cur(V_pf_frent_z)) {
 		frag = TAILQ_LAST(&V_pf_fragqueue, pf_fragqueue);
 		if (frag)
 			pf_free_fragment(frag);
 		cache = TAILQ_LAST(&V_pf_cachequeue, pf_cachequeue);
 		if (cache)
 			pf_free_fragment(cache);
 		if (frag == NULL && cache == NULL)
 			break;
 	}
 }
 
 /* Frees the fragments and all associated entries */
 static void
 pf_free_fragment(struct pf_fragment *frag)
 {
 	struct pf_frent		*frent;
 
 	PF_FRAG_ASSERT();
 
 	/* Free all fragments */
 	if (BUFFER_FRAGMENTS(frag)) {
 		for (frent = TAILQ_FIRST(&frag->fr_queue); frent;
 		    frent = TAILQ_FIRST(&frag->fr_queue)) {
 			TAILQ_REMOVE(&frag->fr_queue, frent, fr_next);
 
 			m_freem(frent->fe_m);
 			uma_zfree(V_pf_frent_z, frent);
 		}
 	} else {
 		for (frent = TAILQ_FIRST(&frag->fr_queue); frent;
 		    frent = TAILQ_FIRST(&frag->fr_queue)) {
 			TAILQ_REMOVE(&frag->fr_queue, frent, fr_next);
 
 			KASSERT((TAILQ_EMPTY(&frag->fr_queue) ||
 			    TAILQ_FIRST(&frag->fr_queue)->fe_off >
 			    frent->fe_len),
 			    ("! (TAILQ_EMPTY() || TAILQ_FIRST()->fe_off >"
 			    " frent->fe_len): %s", __func__));
 
 			uma_zfree(V_pf_frent_z, frent);
 		}
 	}
 
 	pf_remove_fragment(frag);
 }
 
 static struct pf_fragment *
 pf_find_fragment(struct pf_fragment_cmp *key, struct pf_frag_tree *tree)
 {
 	struct pf_fragment	*frag;
 
 	PF_FRAG_ASSERT();
 
 	frag = RB_FIND(pf_frag_tree, tree, (struct pf_fragment *)key);
 	if (frag != NULL) {
 		/* XXX Are we sure we want to update the timeout? */
 		frag->fr_timeout = time_uptime;
 		if (BUFFER_FRAGMENTS(frag)) {
 			TAILQ_REMOVE(&V_pf_fragqueue, frag, frag_next);
 			TAILQ_INSERT_HEAD(&V_pf_fragqueue, frag, frag_next);
 		} else {
 			TAILQ_REMOVE(&V_pf_cachequeue, frag, frag_next);
 			TAILQ_INSERT_HEAD(&V_pf_cachequeue, frag, frag_next);
 		}
 	}
 
 	return (frag);
 }
 
 /* Removes a fragment from the fragment queue and frees the fragment */
 static void
 pf_remove_fragment(struct pf_fragment *frag)
 {
 
 	PF_FRAG_ASSERT();
 
 	if (BUFFER_FRAGMENTS(frag)) {
 		RB_REMOVE(pf_frag_tree, &V_pf_frag_tree, frag);
 		TAILQ_REMOVE(&V_pf_fragqueue, frag, frag_next);
 		uma_zfree(V_pf_frag_z, frag);
 	} else {
 		RB_REMOVE(pf_frag_tree, &V_pf_cache_tree, frag);
 		TAILQ_REMOVE(&V_pf_cachequeue, frag, frag_next);
 		uma_zfree(V_pf_frag_z, frag);
 	}
 }
 
 static struct pf_frent *
 pf_create_fragment(u_short *reason)
 {
 	struct pf_frent *frent;
 
 	PF_FRAG_ASSERT();
 
 	frent = uma_zalloc(V_pf_frent_z, M_NOWAIT);
 	if (frent == NULL) {
 		pf_flush_fragments();
 		frent = uma_zalloc(V_pf_frent_z, M_NOWAIT);
 		if (frent == NULL) {
 			REASON_SET(reason, PFRES_MEMORY);
 			return (NULL);
 		}
 	}
 
 	return (frent);
 }
 
 static struct pf_fragment *
 pf_fillup_fragment(struct pf_fragment_cmp *key, struct pf_frent *frent,
 		u_short *reason)
 {
 	struct pf_frent		*after, *next, *prev;
 	struct pf_fragment	*frag;
 	uint16_t		total;
 
 	PF_FRAG_ASSERT();
 
 	/* No empty fragments. */
 	if (frent->fe_len == 0) {
 		DPFPRINTF(("bad fragment: len 0"));
 		goto bad_fragment;
 	}
 
 	/* All fragments are 8 byte aligned. */
 	if (frent->fe_mff && (frent->fe_len & 0x7)) {
 		DPFPRINTF(("bad fragment: mff and len %d", frent->fe_len));
 		goto bad_fragment;
 	}
 
 	/* Respect maximum length, IP_MAXPACKET == IPV6_MAXPACKET. */
 	if (frent->fe_off + frent->fe_len > IP_MAXPACKET) {
 		DPFPRINTF(("bad fragment: max packet %d",
 		    frent->fe_off + frent->fe_len));
 		goto bad_fragment;
 	}
 
 	DPFPRINTF((key->frc_af == AF_INET ?
 	    "reass frag %d @ %d-%d" : "reass frag %#08x @ %d-%d",
 	    key->frc_id, frent->fe_off, frent->fe_off + frent->fe_len));
 
 	/* Fully buffer all of the fragments in this fragment queue. */
 	frag = pf_find_fragment(key, &V_pf_frag_tree);
 
 	/* Create a new reassembly queue for this packet. */
 	if (frag == NULL) {
 		frag = uma_zalloc(V_pf_frag_z, M_NOWAIT);
 		if (frag == NULL) {
 			pf_flush_fragments();
 			frag = uma_zalloc(V_pf_frag_z, M_NOWAIT);
 			if (frag == NULL) {
 				REASON_SET(reason, PFRES_MEMORY);
 				goto drop_fragment;
 			}
 		}
 
 		*(struct pf_fragment_cmp *)frag = *key;
 		frag->fr_timeout = time_second;
 		frag->fr_maxlen = frent->fe_len;
 		TAILQ_INIT(&frag->fr_queue);
 
 		RB_INSERT(pf_frag_tree, &V_pf_frag_tree, frag);
 		TAILQ_INSERT_HEAD(&V_pf_fragqueue, frag, frag_next);
 
 		/* We do not have a previous fragment. */
 		TAILQ_INSERT_HEAD(&frag->fr_queue, frent, fr_next);
 
 		return (frag);
 	}
 
 	KASSERT(!TAILQ_EMPTY(&frag->fr_queue), ("!TAILQ_EMPTY()->fr_queue"));
 
 	/* Remember maximum fragment len for refragmentation. */
 	if (frent->fe_len > frag->fr_maxlen)
 		frag->fr_maxlen = frent->fe_len;
 
 	/* Maximum data we have seen already. */
 	total = TAILQ_LAST(&frag->fr_queue, pf_fragq)->fe_off +
 		TAILQ_LAST(&frag->fr_queue, pf_fragq)->fe_len;
 
 	/* Non terminal fragments must have more fragments flag. */
 	if (frent->fe_off + frent->fe_len < total && !frent->fe_mff)
 		goto bad_fragment;
 
 	/* Check if we saw the last fragment already. */
 	if (!TAILQ_LAST(&frag->fr_queue, pf_fragq)->fe_mff) {
 		if (frent->fe_off + frent->fe_len > total ||
 		    (frent->fe_off + frent->fe_len == total && frent->fe_mff))
 			goto bad_fragment;
 	} else {
 		if (frent->fe_off + frent->fe_len == total && !frent->fe_mff)
 			goto bad_fragment;
 	}
 
 	/* Find a fragment after the current one. */
 	prev = NULL;
 	TAILQ_FOREACH(after, &frag->fr_queue, fr_next) {
 		if (after->fe_off > frent->fe_off)
 			break;
 		prev = after;
 	}
 
 	KASSERT(prev != NULL || after != NULL,
 	    ("prev != NULL || after != NULL"));
 
 	if (prev != NULL && prev->fe_off + prev->fe_len > frent->fe_off) {
 		uint16_t precut;
 
 		precut = prev->fe_off + prev->fe_len - frent->fe_off;
 		if (precut >= frent->fe_len)
 			goto bad_fragment;
 		DPFPRINTF(("overlap -%d", precut));
 		m_adj(frent->fe_m, precut);
 		frent->fe_off += precut;
 		frent->fe_len -= precut;
 	}
 
 	for (; after != NULL && frent->fe_off + frent->fe_len > after->fe_off;
 	    after = next) {
 		uint16_t aftercut;
 
 		aftercut = frent->fe_off + frent->fe_len - after->fe_off;
 		DPFPRINTF(("adjust overlap %d", aftercut));
 		if (aftercut < after->fe_len) {
 			m_adj(after->fe_m, aftercut);
 			after->fe_off += aftercut;
 			after->fe_len -= aftercut;
 			break;
 		}
 
 		/* This fragment is completely overlapped, lose it. */
 		next = TAILQ_NEXT(after, fr_next);
 		m_freem(after->fe_m);
 		TAILQ_REMOVE(&frag->fr_queue, after, fr_next);
 		uma_zfree(V_pf_frent_z, after);
 	}
 
 	if (prev == NULL)
 		TAILQ_INSERT_HEAD(&frag->fr_queue, frent, fr_next);
 	else
 		TAILQ_INSERT_AFTER(&frag->fr_queue, prev, frent, fr_next);
 
 	return (frag);
 
 bad_fragment:
 	REASON_SET(reason, PFRES_FRAG);
 drop_fragment:
 	uma_zfree(V_pf_frent_z, frent);
 	return (NULL);
 }
 
 static int
 pf_isfull_fragment(struct pf_fragment *frag)
 {
 	struct pf_frent	*frent, *next;
 	uint16_t off, total;
 
 	/* Check if we are completely reassembled */
 	if (TAILQ_LAST(&frag->fr_queue, pf_fragq)->fe_mff)
 		return (0);
 
 	/* Maximum data we have seen already */
 	total = TAILQ_LAST(&frag->fr_queue, pf_fragq)->fe_off +
 		TAILQ_LAST(&frag->fr_queue, pf_fragq)->fe_len;
 
 	/* Check if we have all the data */
 	off = 0;
 	for (frent = TAILQ_FIRST(&frag->fr_queue); frent; frent = next) {
 		next = TAILQ_NEXT(frent, fr_next);
 
 		off += frent->fe_len;
 		if (off < total && (next == NULL || next->fe_off != off)) {
 			DPFPRINTF(("missing fragment at %d, next %d, total %d",
 			    off, next == NULL ? -1 : next->fe_off, total));
 			return (0);
 		}
 	}
 	DPFPRINTF(("%d < %d?", off, total));
 	if (off < total)
 		return (0);
 	KASSERT(off == total, ("off == total"));
 
 	return (1);
 }
 
 static struct mbuf *
 pf_join_fragment(struct pf_fragment *frag)
 {
 	struct mbuf *m, *m2;
 	struct pf_frent	*frent, *next;
 
 	frent = TAILQ_FIRST(&frag->fr_queue);
 	next = TAILQ_NEXT(frent, fr_next);
 
 	m = frent->fe_m;
 	m_adj(m, (frent->fe_hdrlen + frent->fe_len) - m->m_pkthdr.len);
 	uma_zfree(V_pf_frent_z, frent);
 	for (frent = next; frent != NULL; frent = next) {
 		next = TAILQ_NEXT(frent, fr_next);
 
 		m2 = frent->fe_m;
 		/* Strip off ip header. */
 		m_adj(m2, frent->fe_hdrlen);
 		/* Strip off any trailing bytes. */
 		m_adj(m2, frent->fe_len - m2->m_pkthdr.len);
 
 		uma_zfree(V_pf_frent_z, frent);
 		m_cat(m, m2);
 	}
 
 	/* Remove from fragment queue. */
 	pf_remove_fragment(frag);
 
 	return (m);
 }
 
 #ifdef INET
 static int
 pf_reassemble(struct mbuf **m0, struct ip *ip, int dir, u_short *reason)
 {
 	struct mbuf		*m = *m0;
 	struct pf_frent		*frent;
 	struct pf_fragment	*frag;
 	struct pf_fragment_cmp	key;
 	uint16_t		total, hdrlen;
 
 	/* Get an entry for the fragment queue */
 	if ((frent = pf_create_fragment(reason)) == NULL)
 		return (PF_DROP);
 
 	frent->fe_m = m;
 	frent->fe_hdrlen = ip->ip_hl << 2;
 	frent->fe_extoff = 0;
 	frent->fe_len = ntohs(ip->ip_len) - (ip->ip_hl << 2);
 	frent->fe_off = (ntohs(ip->ip_off) & IP_OFFMASK) << 3;
 	frent->fe_mff = ntohs(ip->ip_off) & IP_MF;
 
 	pf_ip2key(ip, dir, &key);
 
 	if ((frag = pf_fillup_fragment(&key, frent, reason)) == NULL)
 		return (PF_DROP);
 
 	/* The mbuf is part of the fragment entry, no direct free or access */
 	m = *m0 = NULL;
 
 	if (!pf_isfull_fragment(frag))
 		return (PF_PASS);  /* drop because *m0 is NULL, no error */
 
 	/* We have all the data */
 	frent = TAILQ_FIRST(&frag->fr_queue);
 	KASSERT(frent != NULL, ("frent != NULL"));
 	total = TAILQ_LAST(&frag->fr_queue, pf_fragq)->fe_off +
 		TAILQ_LAST(&frag->fr_queue, pf_fragq)->fe_len;
 	hdrlen = frent->fe_hdrlen;
 
 	m = *m0 = pf_join_fragment(frag);
 	frag = NULL;
 
 	if (m->m_flags & M_PKTHDR) {
 		int plen = 0;
 		for (m = *m0; m; m = m->m_next)
 			plen += m->m_len;
 		m = *m0;
 		m->m_pkthdr.len = plen;
 	}
 
 	ip = mtod(m, struct ip *);
 	ip->ip_len = htons(hdrlen + total);
 	ip->ip_off &= ~(IP_MF|IP_OFFMASK);
 
 	if (hdrlen + total > IP_MAXPACKET) {
 		DPFPRINTF(("drop: too big: %d", total));
 		ip->ip_len = 0;
 		REASON_SET(reason, PFRES_SHORT);
 		/* PF_DROP requires a valid mbuf *m0 in pf_test() */
 		return (PF_DROP);
 	}
 
 	DPFPRINTF(("complete: %p(%d)\n", m, ntohs(ip->ip_len)));
 	return (PF_PASS);
 }
 #endif	/* INET */
 
 #ifdef INET6
 static int
 pf_reassemble6(struct mbuf **m0, struct ip6_hdr *ip6, struct ip6_frag *fraghdr,
     uint16_t hdrlen, uint16_t extoff, int dir, u_short *reason)
 {
 	struct mbuf		*m = *m0;
 	struct pf_frent		*frent;
 	struct pf_fragment	*frag;
 	struct pf_fragment_cmp	 key;
 	struct m_tag		*mtag;
 	struct pf_fragment_tag	*ftag;
 	int			 off;
 	uint32_t		 frag_id;
 	uint16_t		 total, maxlen;
 	uint8_t			 proto;
 
 	PF_FRAG_LOCK();
 
 	/* Get an entry for the fragment queue. */
 	if ((frent = pf_create_fragment(reason)) == NULL) {
 		PF_FRAG_UNLOCK();
 		return (PF_DROP);
 	}
 
 	frent->fe_m = m;
 	frent->fe_hdrlen = hdrlen;
 	frent->fe_extoff = extoff;
 	frent->fe_len = sizeof(struct ip6_hdr) + ntohs(ip6->ip6_plen) - hdrlen;
 	frent->fe_off = ntohs(fraghdr->ip6f_offlg & IP6F_OFF_MASK);
 	frent->fe_mff = fraghdr->ip6f_offlg & IP6F_MORE_FRAG;
 
 	key.frc_src.v6 = ip6->ip6_src;
 	key.frc_dst.v6 = ip6->ip6_dst;
 	key.frc_af = AF_INET6;
 	/* Only the first fragment's protocol is relevant. */
 	key.frc_proto = 0;
 	key.frc_id = fraghdr->ip6f_ident;
 	key.frc_direction = dir;
 
 	if ((frag = pf_fillup_fragment(&key, frent, reason)) == NULL) {
 		PF_FRAG_UNLOCK();
 		return (PF_DROP);
 	}
 
 	/* The mbuf is part of the fragment entry, no direct free or access. */
 	m = *m0 = NULL;
 
 	if (!pf_isfull_fragment(frag)) {
 		PF_FRAG_UNLOCK();
 		return (PF_PASS);  /* Drop because *m0 is NULL, no error. */
 	}
 
 	/* We have all the data. */
 	extoff = frent->fe_extoff;
 	maxlen = frag->fr_maxlen;
 	frag_id = frag->fr_id;
 	frent = TAILQ_FIRST(&frag->fr_queue);
 	KASSERT(frent != NULL, ("frent != NULL"));
 	total = TAILQ_LAST(&frag->fr_queue, pf_fragq)->fe_off +
 		TAILQ_LAST(&frag->fr_queue, pf_fragq)->fe_len;
 	hdrlen = frent->fe_hdrlen - sizeof(struct ip6_frag);
 
 	m = *m0 = pf_join_fragment(frag);
 	frag = NULL;
 
 	PF_FRAG_UNLOCK();
 
 	/* Take protocol from first fragment header. */
 	m = m_getptr(m, hdrlen + offsetof(struct ip6_frag, ip6f_nxt), &off);
 	KASSERT(m, ("%s: short mbuf chain", __func__));
 	proto = *(mtod(m, caddr_t) + off);
 	m = *m0;
 
 	/* Delete frag6 header */
 	if (ip6_deletefraghdr(m, hdrlen, M_NOWAIT) != 0)
 		goto fail;
 
 	if (m->m_flags & M_PKTHDR) {
 		int plen = 0;
 		for (m = *m0; m; m = m->m_next)
 			plen += m->m_len;
 		m = *m0;
 		m->m_pkthdr.len = plen;
 	}
 
 	if ((mtag = m_tag_get(PF_REASSEMBLED, sizeof(struct pf_fragment_tag),
 	    M_NOWAIT)) == NULL)
 		goto fail;
 	ftag = (struct pf_fragment_tag *)(mtag + 1);
 	ftag->ft_hdrlen = hdrlen;
 	ftag->ft_extoff = extoff;
 	ftag->ft_maxlen = maxlen;
 	ftag->ft_id = frag_id;
 	m_tag_prepend(m, mtag);
 
 	ip6 = mtod(m, struct ip6_hdr *);
 	ip6->ip6_plen = htons(hdrlen - sizeof(struct ip6_hdr) + total);
 	if (extoff) {
 		/* Write protocol into next field of last extension header. */
 		m = m_getptr(m, extoff + offsetof(struct ip6_ext, ip6e_nxt),
 		    &off);
 		KASSERT(m, ("%s: short mbuf chain", __func__));
 		*(mtod(m, char *) + off) = proto;
 		m = *m0;
 	} else
 		ip6->ip6_nxt = proto;
 
 	if (hdrlen - sizeof(struct ip6_hdr) + total > IPV6_MAXPACKET) {
 		DPFPRINTF(("drop: too big: %d", total));
 		ip6->ip6_plen = 0;
 		REASON_SET(reason, PFRES_SHORT);
 		/* PF_DROP requires a valid mbuf *m0 in pf_test6(). */
 		return (PF_DROP);
 	}
 
 	DPFPRINTF(("complete: %p(%d)", m, ntohs(ip6->ip6_plen)));
 	return (PF_PASS);
 
 fail:
 	REASON_SET(reason, PFRES_MEMORY);
 	/* PF_DROP requires a valid mbuf *m0 in pf_test6(), will free later. */
 	return (PF_DROP);
 }
 #endif	/* INET6 */
 
 #ifdef INET
 static struct mbuf *
 pf_fragcache(struct mbuf **m0, struct ip *h, struct pf_fragment **frag, int mff,
     int drop, int *nomem)
 {
 	struct mbuf		*m = *m0;
 	struct pf_frent		*frp, *fra, *cur = NULL;
 	int			 ip_len = ntohs(h->ip_len) - (h->ip_hl << 2);
 	u_int16_t		 off = ntohs(h->ip_off) << 3;
 	u_int16_t		 max = ip_len + off;
 	int			 hosed = 0;
 
 	PF_FRAG_ASSERT();
 	KASSERT((*frag == NULL || !BUFFER_FRAGMENTS(*frag)),
 	    ("!(*frag == NULL || !BUFFER_FRAGMENTS(*frag)): %s", __FUNCTION__));
 
 	/* Create a new range queue for this packet */
 	if (*frag == NULL) {
 		*frag = uma_zalloc(V_pf_frag_z, M_NOWAIT);
 		if (*frag == NULL) {
 			pf_flush_fragments();
 			*frag = uma_zalloc(V_pf_frag_z, M_NOWAIT);
 			if (*frag == NULL)
 				goto no_mem;
 		}
 
 		/* Get an entry for the queue */
 		cur = uma_zalloc(V_pf_frent_z, M_NOWAIT);
 		if (cur == NULL) {
 			uma_zfree(V_pf_frag_z, *frag);
 			*frag = NULL;
 			goto no_mem;
 		}
 
 		(*frag)->fr_flags = PFFRAG_NOBUFFER;
 		(*frag)->fr_max = 0;
 		(*frag)->fr_src.v4 = h->ip_src;
 		(*frag)->fr_dst.v4 = h->ip_dst;
 		(*frag)->fr_id = h->ip_id;
 		(*frag)->fr_timeout = time_uptime;
 
 		cur->fe_off = off;
 		cur->fe_len = max; /* TODO: fe_len = max - off ? */
 		TAILQ_INIT(&(*frag)->fr_queue);
 		TAILQ_INSERT_HEAD(&(*frag)->fr_queue, cur, fr_next);
 
 		RB_INSERT(pf_frag_tree, &V_pf_cache_tree, *frag);
 		TAILQ_INSERT_HEAD(&V_pf_cachequeue, *frag, frag_next);
 
 		DPFPRINTF(("fragcache[%d]: new %d-%d\n", h->ip_id, off, max));
 
 		goto pass;
 	}
 
 	/*
 	 * Find a fragment after the current one:
 	 *  - off contains the real shifted offset.
 	 */
 	frp = NULL;
 	TAILQ_FOREACH(fra, &(*frag)->fr_queue, fr_next) {
 		if (fra->fe_off > off)
 			break;
 		frp = fra;
 	}
 
 	KASSERT((frp != NULL || fra != NULL),
 	    ("!(frp != NULL || fra != NULL): %s", __FUNCTION__));
 
 	if (frp != NULL) {
 		int	precut;
 
 		precut = frp->fe_len - off;
 		if (precut >= ip_len) {
 			/* Fragment is entirely a duplicate */
 			DPFPRINTF(("fragcache[%d]: dead (%d-%d) %d-%d\n",
 			    h->ip_id, frp->fe_off, frp->fe_len, off, max));
 			goto drop_fragment;
 		}
 		if (precut == 0) {
 			/* They are adjacent.  Fixup cache entry */
 			DPFPRINTF(("fragcache[%d]: adjacent (%d-%d) %d-%d\n",
 			    h->ip_id, frp->fe_off, frp->fe_len, off, max));
 			frp->fe_len = max;
 		} else if (precut > 0) {
 			/* The first part of this payload overlaps with a
 			 * fragment that has already been passed.
 			 * Need to trim off the first part of the payload.
 			 * But to do so easily, we need to create another
 			 * mbuf to throw the original header into.
 			 */
 
 			DPFPRINTF(("fragcache[%d]: chop %d (%d-%d) %d-%d\n",
 			    h->ip_id, precut, frp->fe_off, frp->fe_len, off,
 			    max));
 
 			off += precut;
 			max -= precut;
 			/* Update the previous frag to encompass this one */
 			frp->fe_len = max;
 
 			if (!drop) {
 				/* XXX Optimization opportunity
 				 * This is a very heavy way to trim the payload.
 				 * we could do it much faster by diddling mbuf
 				 * internals but that would be even less legible
 				 * than this mbuf magic.  For my next trick,
 				 * I'll pull a rabbit out of my laptop.
 				 */
 				*m0 = m_dup(m, M_NOWAIT);
 				if (*m0 == NULL)
 					goto no_mem;
 				/* From KAME Project : We have missed this! */
 				m_adj(*m0, (h->ip_hl << 2) -
 				    (*m0)->m_pkthdr.len);
 
 				KASSERT(((*m0)->m_next == NULL),
 				    ("(*m0)->m_next != NULL: %s",
 				    __FUNCTION__));
 				m_adj(m, precut + (h->ip_hl << 2));
 				m_cat(*m0, m);
 				m = *m0;
 				if (m->m_flags & M_PKTHDR) {
 					int plen = 0;
 					struct mbuf *t;
 					for (t = m; t; t = t->m_next)
 						plen += t->m_len;
 					m->m_pkthdr.len = plen;
 				}
 
 
 				h = mtod(m, struct ip *);
 
 				KASSERT(((int)m->m_len ==
 				    ntohs(h->ip_len) - precut),
 				    ("m->m_len != ntohs(h->ip_len) - precut: %s",
 				    __FUNCTION__));
 				h->ip_off = htons(ntohs(h->ip_off) +
 				    (precut >> 3));
 				h->ip_len = htons(ntohs(h->ip_len) - precut);
 			} else {
 				hosed++;
 			}
 		} else {
 			/* There is a gap between fragments */
 
 			DPFPRINTF(("fragcache[%d]: gap %d (%d-%d) %d-%d\n",
 			    h->ip_id, -precut, frp->fe_off, frp->fe_len, off,
 			    max));
 
 			cur = uma_zalloc(V_pf_frent_z, M_NOWAIT);
 			if (cur == NULL)
 				goto no_mem;
 
 			cur->fe_off = off;
 			cur->fe_len = max;
 			TAILQ_INSERT_AFTER(&(*frag)->fr_queue, frp, cur, fr_next);
 		}
 	}
 
 	if (fra != NULL) {
 		int	aftercut;
 		int	merge = 0;
 
 		aftercut = max - fra->fe_off;
 		if (aftercut == 0) {
 			/* Adjacent fragments */
 			DPFPRINTF(("fragcache[%d]: adjacent %d-%d (%d-%d)\n",
 			    h->ip_id, off, max, fra->fe_off, fra->fe_len));
 			fra->fe_off = off;
 			merge = 1;
 		} else if (aftercut > 0) {
 			/* Need to chop off the tail of this fragment */
 			DPFPRINTF(("fragcache[%d]: chop %d %d-%d (%d-%d)\n",
 			    h->ip_id, aftercut, off, max, fra->fe_off,
 			    fra->fe_len));
 			fra->fe_off = off;
 			max -= aftercut;
 
 			merge = 1;
 
 			if (!drop) {
 				m_adj(m, -aftercut);
 				if (m->m_flags & M_PKTHDR) {
 					int plen = 0;
 					struct mbuf *t;
 					for (t = m; t; t = t->m_next)
 						plen += t->m_len;
 					m->m_pkthdr.len = plen;
 				}
 				h = mtod(m, struct ip *);
 				KASSERT(((int)m->m_len == ntohs(h->ip_len) - aftercut),
 				    ("m->m_len != ntohs(h->ip_len) - aftercut: %s",
 				    __FUNCTION__));
 				h->ip_len = htons(ntohs(h->ip_len) - aftercut);
 			} else {
 				hosed++;
 			}
 		} else if (frp == NULL) {
 			/* There is a gap between fragments */
 			DPFPRINTF(("fragcache[%d]: gap %d %d-%d (%d-%d)\n",
 			    h->ip_id, -aftercut, off, max, fra->fe_off,
 			    fra->fe_len));
 
 			cur = uma_zalloc(V_pf_frent_z, M_NOWAIT);
 			if (cur == NULL)
 				goto no_mem;
 
 			cur->fe_off = off;
 			cur->fe_len = max;
 			TAILQ_INSERT_HEAD(&(*frag)->fr_queue, cur, fr_next);
 		}
 
 
 		/* Need to glue together two separate fragment descriptors */
 		if (merge) {
 			if (cur && fra->fe_off <= cur->fe_len) {
 				/* Need to merge in a previous 'cur' */
 				DPFPRINTF(("fragcache[%d]: adjacent(merge "
 				    "%d-%d) %d-%d (%d-%d)\n",
 				    h->ip_id, cur->fe_off, cur->fe_len, off,
 				    max, fra->fe_off, fra->fe_len));
 				fra->fe_off = cur->fe_off;
 				TAILQ_REMOVE(&(*frag)->fr_queue, cur, fr_next);
 				uma_zfree(V_pf_frent_z, cur);
 				cur = NULL;
 
 			} else if (frp && fra->fe_off <= frp->fe_len) {
 				/* Need to merge in a modified 'frp' */
 				KASSERT((cur == NULL), ("cur != NULL: %s",
 				    __FUNCTION__));
 				DPFPRINTF(("fragcache[%d]: adjacent(merge "
 				    "%d-%d) %d-%d (%d-%d)\n",
 				    h->ip_id, frp->fe_off, frp->fe_len, off,
 				    max, fra->fe_off, fra->fe_len));
 				fra->fe_off = frp->fe_off;
 				TAILQ_REMOVE(&(*frag)->fr_queue, frp, fr_next);
 				uma_zfree(V_pf_frent_z, frp);
 				frp = NULL;
 
 			}
 		}
 	}
 
 	if (hosed) {
 		/*
 		 * We must keep tracking the overall fragment even when
 		 * we're going to drop it anyway so that we know when to
 		 * free the overall descriptor.  Thus we drop the frag late.
 		 */
 		goto drop_fragment;
 	}
 
 
  pass:
 	/* Update maximum data size */
 	if ((*frag)->fr_max < max)
 		(*frag)->fr_max = max;
 
 	/* This is the last segment */
 	if (!mff)
 		(*frag)->fr_flags |= PFFRAG_SEENLAST;
 
 	/* Check if we are completely reassembled */
 	if (((*frag)->fr_flags & PFFRAG_SEENLAST) &&
 	    TAILQ_FIRST(&(*frag)->fr_queue)->fe_off == 0 &&
 	    TAILQ_FIRST(&(*frag)->fr_queue)->fe_len == (*frag)->fr_max) {
 		/* Remove from fragment queue */
 		DPFPRINTF(("fragcache[%d]: done 0-%d\n", h->ip_id,
 		    (*frag)->fr_max));
 		pf_free_fragment(*frag);
 		*frag = NULL;
 	}
 
 	return (m);
 
  no_mem:
 	*nomem = 1;
 
 	/* Still need to pay attention to !IP_MF */
 	if (!mff && *frag != NULL)
 		(*frag)->fr_flags |= PFFRAG_SEENLAST;
 
 	m_freem(m);
 	return (NULL);
 
  drop_fragment:
 
 	/* Still need to pay attention to !IP_MF */
 	if (!mff && *frag != NULL)
 		(*frag)->fr_flags |= PFFRAG_SEENLAST;
 
 	if (drop) {
 		/* This fragment has been deemed bad.  Don't reass */
 		if (((*frag)->fr_flags & PFFRAG_DROP) == 0)
 			DPFPRINTF(("fragcache[%d]: dropping overall fragment\n",
 			    h->ip_id));
 		(*frag)->fr_flags |= PFFRAG_DROP;
 	}
 
 	m_freem(m);
 	return (NULL);
 }
 #endif	/* INET */
 
 #ifdef INET6
 int
 pf_refragment6(struct ifnet *ifp, struct mbuf **m0, struct m_tag *mtag)
 {
 	struct mbuf		*m = *m0, *t;
 	struct pf_fragment_tag	*ftag = (struct pf_fragment_tag *)(mtag + 1);
 	struct pf_pdesc		 pd;
 	uint32_t		 frag_id;
 	uint16_t		 hdrlen, extoff, maxlen;
 	uint8_t			 proto;
 	int			 error, action;
 
 	hdrlen = ftag->ft_hdrlen;
 	extoff = ftag->ft_extoff;
 	maxlen = ftag->ft_maxlen;
 	frag_id = ftag->ft_id;
 	m_tag_delete(m, mtag);
 	mtag = NULL;
 	ftag = NULL;
 
 	if (extoff) {
 		int off;
 
 		/* Use protocol from next field of last extension header */
 		m = m_getptr(m, extoff + offsetof(struct ip6_ext, ip6e_nxt),
 		    &off);
 		KASSERT((m != NULL), ("pf_refragment6: short mbuf chain"));
 		proto = *(mtod(m, caddr_t) + off);
 		*(mtod(m, char *) + off) = IPPROTO_FRAGMENT;
 		m = *m0;
 	} else {
 		struct ip6_hdr *hdr;
 
 		hdr = mtod(m, struct ip6_hdr *);
 		proto = hdr->ip6_nxt;
 		hdr->ip6_nxt = IPPROTO_FRAGMENT;
 	}
 
 	/*
 	 * Maxlen may be less than 8 if there was only a single
 	 * fragment.  As it was fragmented before, add a fragment
 	 * header also for a single fragment.  If total or maxlen
 	 * is less than 8, ip6_fragment() will return EMSGSIZE and
 	 * we drop the packet.
 	 */
 	error = ip6_fragment(ifp, m, hdrlen, proto, maxlen, frag_id);
 	m = (*m0)->m_nextpkt;
 	(*m0)->m_nextpkt = NULL;
 	if (error == 0) {
 		/* The first mbuf contains the unfragmented packet. */
 		m_freem(*m0);
 		*m0 = NULL;
 		action = PF_PASS;
 	} else {
 		/* Drop expects an mbuf to free. */
 		DPFPRINTF(("refragment error %d", error));
 		action = PF_DROP;
 	}
 	for (t = m; m; m = t) {
 		t = m->m_nextpkt;
 		m->m_nextpkt = NULL;
 		memset(&pd, 0, sizeof(pd));
 		pd.pf_mtag = pf_find_mtag(m);
 		if (error == 0)
 			ip6_forward(m, 0);
 		else
 			m_freem(m);
 	}
 
 	return (action);
 }
 #endif /* INET6 */
 
 #ifdef INET
 int
 pf_normalize_ip(struct mbuf **m0, int dir, struct pfi_kif *kif, u_short *reason,
     struct pf_pdesc *pd)
 {
 	struct mbuf		*m = *m0;
 	struct pf_rule		*r;
 	struct pf_fragment	*frag = NULL;
 	struct pf_fragment_cmp	key;
 	struct ip		*h = mtod(m, struct ip *);
 	int			 mff = (ntohs(h->ip_off) & IP_MF);
 	int			 hlen = h->ip_hl << 2;
 	u_int16_t		 fragoff = (ntohs(h->ip_off) & IP_OFFMASK) << 3;
 	u_int16_t		 max;
 	int			 ip_len;
 	int			 ip_off;
 	int			 tag = -1;
 	int			 verdict;
 
 	PF_RULES_RASSERT();
 
 	r = TAILQ_FIRST(pf_main_ruleset.rules[PF_RULESET_SCRUB].active.ptr);
 	while (r != NULL) {
 		r->evaluations++;
 		if (pfi_kif_match(r->kif, kif) == r->ifnot)
 			r = r->skip[PF_SKIP_IFP].ptr;
 		else if (r->direction && r->direction != dir)
 			r = r->skip[PF_SKIP_DIR].ptr;
 		else if (r->af && r->af != AF_INET)
 			r = r->skip[PF_SKIP_AF].ptr;
 		else if (r->proto && r->proto != h->ip_p)
 			r = r->skip[PF_SKIP_PROTO].ptr;
 		else if (PF_MISMATCHAW(&r->src.addr,
 		    (struct pf_addr *)&h->ip_src.s_addr, AF_INET,
 		    r->src.neg, kif, M_GETFIB(m)))
 			r = r->skip[PF_SKIP_SRC_ADDR].ptr;
 		else if (PF_MISMATCHAW(&r->dst.addr,
 		    (struct pf_addr *)&h->ip_dst.s_addr, AF_INET,
 		    r->dst.neg, NULL, M_GETFIB(m)))
 			r = r->skip[PF_SKIP_DST_ADDR].ptr;
 		else if (r->match_tag && !pf_match_tag(m, r, &tag,
 		    pd->pf_mtag ? pd->pf_mtag->tag : 0))
 			r = TAILQ_NEXT(r, entries);
 		else
 			break;
 	}
 
 	if (r == NULL || r->action == PF_NOSCRUB)
 		return (PF_PASS);
 	else {
 		r->packets[dir == PF_OUT]++;
 		r->bytes[dir == PF_OUT] += pd->tot_len;
 	}
 
 	/* Check for illegal packets */
 	if (hlen < (int)sizeof(struct ip))
 		goto drop;
 
 	if (hlen > ntohs(h->ip_len))
 		goto drop;
 
 	/* Clear IP_DF if the rule uses the no-df option */
 	if (r->rule_flag & PFRULE_NODF && h->ip_off & htons(IP_DF)) {
 		u_int16_t ip_off = h->ip_off;
 
 		h->ip_off &= htons(~IP_DF);
 		h->ip_sum = pf_cksum_fixup(h->ip_sum, ip_off, h->ip_off, 0);
 	}
 
 	/* We will need other tests here */
 	if (!fragoff && !mff)
 		goto no_fragment;
 
 	/* We're dealing with a fragment now. Don't allow fragments
 	 * with IP_DF to enter the cache. If the flag was cleared by
 	 * no-df above, fine. Otherwise drop it.
 	 */
 	if (h->ip_off & htons(IP_DF)) {
 		DPFPRINTF(("IP_DF\n"));
 		goto bad;
 	}
 
 	ip_len = ntohs(h->ip_len) - hlen;
 	ip_off = (ntohs(h->ip_off) & IP_OFFMASK) << 3;
 
 	/* All fragments are 8 byte aligned */
 	if (mff && (ip_len & 0x7)) {
 		DPFPRINTF(("mff and %d\n", ip_len));
 		goto bad;
 	}
 
 	/* Respect maximum length */
 	if (fragoff + ip_len > IP_MAXPACKET) {
 		DPFPRINTF(("max packet %d\n", fragoff + ip_len));
 		goto bad;
 	}
 	max = fragoff + ip_len;
 
 	if ((r->rule_flag & (PFRULE_FRAGCROP|PFRULE_FRAGDROP)) == 0) {
 
 		/* Fully buffer all of the fragments */
 		PF_FRAG_LOCK();
 
 		pf_ip2key(h, dir, &key);
 		frag = pf_find_fragment(&key, &V_pf_frag_tree);
 
 		/* Check if we saw the last fragment already */
 		if (frag != NULL && (frag->fr_flags & PFFRAG_SEENLAST) &&
 		    max > frag->fr_max)
 			goto bad;
 
 		/* Might return a completely reassembled mbuf, or NULL */
 		DPFPRINTF(("reass frag %d @ %d-%d\n", h->ip_id, fragoff, max));
 		verdict = pf_reassemble(m0, h, dir, reason);
 		PF_FRAG_UNLOCK();
 
 		if (verdict != PF_PASS)
 			return (PF_DROP);
 
 		m = *m0;
 		if (m == NULL)
 			return (PF_DROP);
 
 		if (frag != NULL && (frag->fr_flags & PFFRAG_DROP))
 			goto drop;
 
 		h = mtod(m, struct ip *);
 	} else {
 		/* non-buffering fragment cache (drops or masks overlaps) */
 		int	nomem = 0;
 
 		if (dir == PF_OUT && pd->pf_mtag &&
 		    pd->pf_mtag->flags & PF_TAG_FRAGCACHE) {
 			/*
 			 * Already passed the fragment cache in the
 			 * input direction.  If we continued, it would
 			 * appear to be a dup and would be dropped.
 			 */
 			goto fragment_pass;
 		}
 
 		PF_FRAG_LOCK();
 		pf_ip2key(h, dir, &key);
 		frag = pf_find_fragment(&key, &V_pf_cache_tree);
 
 		/* Check if we saw the last fragment already */
 		if (frag != NULL && (frag->fr_flags & PFFRAG_SEENLAST) &&
 		    max > frag->fr_max) {
 			if (r->rule_flag & PFRULE_FRAGDROP)
 				frag->fr_flags |= PFFRAG_DROP;
 			goto bad;
 		}
 
 		*m0 = m = pf_fragcache(m0, h, &frag, mff,
 		    (r->rule_flag & PFRULE_FRAGDROP) ? 1 : 0, &nomem);
 		PF_FRAG_UNLOCK();
 		if (m == NULL) {
 			if (nomem)
 				goto no_mem;
 			goto drop;
 		}
 
 		if (dir == PF_IN) {
 			/* Use mtag from copied and trimmed mbuf chain. */
 			pd->pf_mtag = pf_get_mtag(m);
 			if (pd->pf_mtag == NULL) {
 				m_freem(m);
 				*m0 = NULL;
 				goto no_mem;
 			}
 			pd->pf_mtag->flags |= PF_TAG_FRAGCACHE;
 		}
 
 		if (frag != NULL && (frag->fr_flags & PFFRAG_DROP))
 			goto drop;
 		goto fragment_pass;
 	}
 
  no_fragment:
 	/* At this point, only IP_DF is allowed in ip_off */
 	if (h->ip_off & ~htons(IP_DF)) {
 		u_int16_t ip_off = h->ip_off;
 
 		h->ip_off &= htons(IP_DF);
 		h->ip_sum = pf_cksum_fixup(h->ip_sum, ip_off, h->ip_off, 0);
 	}
 
 	/* not missing a return here */
 
  fragment_pass:
 	pf_scrub_ip(&m, r->rule_flag, r->min_ttl, r->set_tos);
 
 	if ((r->rule_flag & (PFRULE_FRAGCROP|PFRULE_FRAGDROP)) == 0)
 		pd->flags |= PFDESC_IP_REAS;
 	return (PF_PASS);
 
  no_mem:
 	REASON_SET(reason, PFRES_MEMORY);
 	if (r != NULL && r->log)
 		PFLOG_PACKET(kif, m, AF_INET, dir, *reason, r, NULL, NULL, pd,
 		    1);
 	return (PF_DROP);
 
  drop:
 	REASON_SET(reason, PFRES_NORM);
 	if (r != NULL && r->log)
 		PFLOG_PACKET(kif, m, AF_INET, dir, *reason, r, NULL, NULL, pd,
 		    1);
 	return (PF_DROP);
 
  bad:
 	DPFPRINTF(("dropping bad fragment\n"));
 
 	/* Free associated fragments */
 	if (frag != NULL) {
 		pf_free_fragment(frag);
 		PF_FRAG_UNLOCK();
 	}
 
 	REASON_SET(reason, PFRES_FRAG);
 	if (r != NULL && r->log)
 		PFLOG_PACKET(kif, m, AF_INET, dir, *reason, r, NULL, NULL, pd,
 		    1);
 
 	return (PF_DROP);
 }
 #endif
 
 #ifdef INET6
 int
 pf_normalize_ip6(struct mbuf **m0, int dir, struct pfi_kif *kif,
     u_short *reason, struct pf_pdesc *pd)
 {
 	struct mbuf		*m = *m0;
 	struct pf_rule		*r;
 	struct ip6_hdr		*h = mtod(m, struct ip6_hdr *);
 	int			 extoff;
 	int			 off;
 	struct ip6_ext		 ext;
 	struct ip6_opt		 opt;
 	struct ip6_opt_jumbo	 jumbo;
 	struct ip6_frag		 frag;
 	u_int32_t		 jumbolen = 0, plen;
 	int			 optend;
 	int			 ooff;
 	u_int8_t		 proto;
 	int			 terminal;
 
 	PF_RULES_RASSERT();
 
 	r = TAILQ_FIRST(pf_main_ruleset.rules[PF_RULESET_SCRUB].active.ptr);
 	while (r != NULL) {
 		r->evaluations++;
 		if (pfi_kif_match(r->kif, kif) == r->ifnot)
 			r = r->skip[PF_SKIP_IFP].ptr;
 		else if (r->direction && r->direction != dir)
 			r = r->skip[PF_SKIP_DIR].ptr;
 		else if (r->af && r->af != AF_INET6)
 			r = r->skip[PF_SKIP_AF].ptr;
 #if 0 /* header chain! */
 		else if (r->proto && r->proto != h->ip6_nxt)
 			r = r->skip[PF_SKIP_PROTO].ptr;
 #endif
 		else if (PF_MISMATCHAW(&r->src.addr,
 		    (struct pf_addr *)&h->ip6_src, AF_INET6,
 		    r->src.neg, kif, M_GETFIB(m)))
 			r = r->skip[PF_SKIP_SRC_ADDR].ptr;
 		else if (PF_MISMATCHAW(&r->dst.addr,
 		    (struct pf_addr *)&h->ip6_dst, AF_INET6,
 		    r->dst.neg, NULL, M_GETFIB(m)))
 			r = r->skip[PF_SKIP_DST_ADDR].ptr;
 		else
 			break;
 	}
 
 	if (r == NULL || r->action == PF_NOSCRUB)
 		return (PF_PASS);
 	else {
 		r->packets[dir == PF_OUT]++;
 		r->bytes[dir == PF_OUT] += pd->tot_len;
 	}
 
 	/* Check for illegal packets */
 	if (sizeof(struct ip6_hdr) + IPV6_MAXPACKET < m->m_pkthdr.len)
 		goto drop;
 
 	extoff = 0;
 	off = sizeof(struct ip6_hdr);
 	proto = h->ip6_nxt;
 	terminal = 0;
 	do {
 		switch (proto) {
 		case IPPROTO_FRAGMENT:
 			goto fragment;
 			break;
 		case IPPROTO_AH:
 		case IPPROTO_ROUTING:
 		case IPPROTO_DSTOPTS:
 			if (!pf_pull_hdr(m, off, &ext, sizeof(ext), NULL,
 			    NULL, AF_INET6))
 				goto shortpkt;
 			extoff = off;
 			if (proto == IPPROTO_AH)
 				off += (ext.ip6e_len + 2) * 4;
 			else
 				off += (ext.ip6e_len + 1) * 8;
 			proto = ext.ip6e_nxt;
 			break;
 		case IPPROTO_HOPOPTS:
 			if (!pf_pull_hdr(m, off, &ext, sizeof(ext), NULL,
 			    NULL, AF_INET6))
 				goto shortpkt;
 			extoff = off;
 			optend = off + (ext.ip6e_len + 1) * 8;
 			ooff = off + sizeof(ext);
 			do {
 				if (!pf_pull_hdr(m, ooff, &opt.ip6o_type,
 				    sizeof(opt.ip6o_type), NULL, NULL,
 				    AF_INET6))
 					goto shortpkt;
 				if (opt.ip6o_type == IP6OPT_PAD1) {
 					ooff++;
 					continue;
 				}
 				if (!pf_pull_hdr(m, ooff, &opt, sizeof(opt),
 				    NULL, NULL, AF_INET6))
 					goto shortpkt;
 				if (ooff + sizeof(opt) + opt.ip6o_len > optend)
 					goto drop;
 				switch (opt.ip6o_type) {
 				case IP6OPT_JUMBO:
 					if (h->ip6_plen != 0)
 						goto drop;
 					if (!pf_pull_hdr(m, ooff, &jumbo,
 					    sizeof(jumbo), NULL, NULL,
 					    AF_INET6))
 						goto shortpkt;
 					memcpy(&jumbolen, jumbo.ip6oj_jumbo_len,
 					    sizeof(jumbolen));
 					jumbolen = ntohl(jumbolen);
 					if (jumbolen <= IPV6_MAXPACKET)
 						goto drop;
 					if (sizeof(struct ip6_hdr) + jumbolen !=
 					    m->m_pkthdr.len)
 						goto drop;
 					break;
 				default:
 					break;
 				}
 				ooff += sizeof(opt) + opt.ip6o_len;
 			} while (ooff < optend);
 
 			off = optend;
 			proto = ext.ip6e_nxt;
 			break;
 		default:
 			terminal = 1;
 			break;
 		}
 	} while (!terminal);
 
 	/* jumbo payload option must be present, or plen > 0 */
 	if (ntohs(h->ip6_plen) == 0)
 		plen = jumbolen;
 	else
 		plen = ntohs(h->ip6_plen);
 	if (plen == 0)
 		goto drop;
 	if (sizeof(struct ip6_hdr) + plen > m->m_pkthdr.len)
 		goto shortpkt;
 
 	pf_scrub_ip6(&m, r->min_ttl);
 
 	return (PF_PASS);
 
  fragment:
 	/* Jumbo payload packets cannot be fragmented. */
 	plen = ntohs(h->ip6_plen);
 	if (plen == 0 || jumbolen)
 		goto drop;
 	if (sizeof(struct ip6_hdr) + plen > m->m_pkthdr.len)
 		goto shortpkt;
 
 	if (!pf_pull_hdr(m, off, &frag, sizeof(frag), NULL, NULL, AF_INET6))
 		goto shortpkt;
 
 	/* Offset now points to data portion. */
 	off += sizeof(frag);
 
 	/* Returns PF_DROP or *m0 is NULL or completely reassembled mbuf. */
 	if (pf_reassemble6(m0, h, &frag, off, extoff, dir, reason) != PF_PASS)
 		return (PF_DROP);
 	m = *m0;
 	if (m == NULL)
 		return (PF_DROP);
 
 	pd->flags |= PFDESC_IP_REAS;
 	return (PF_PASS);
 
  shortpkt:
 	REASON_SET(reason, PFRES_SHORT);
 	if (r != NULL && r->log)
 		PFLOG_PACKET(kif, m, AF_INET6, dir, *reason, r, NULL, NULL, pd,
 		    1);
 	return (PF_DROP);
 
  drop:
 	REASON_SET(reason, PFRES_NORM);
 	if (r != NULL && r->log)
 		PFLOG_PACKET(kif, m, AF_INET6, dir, *reason, r, NULL, NULL, pd,
 		    1);
 	return (PF_DROP);
 }
 #endif /* INET6 */
 
 int
 pf_normalize_tcp(int dir, struct pfi_kif *kif, struct mbuf *m, int ipoff,
     int off, void *h, struct pf_pdesc *pd)
 {
 	struct pf_rule	*r, *rm = NULL;
 	struct tcphdr	*th = pd->hdr.tcp;
 	int		 rewrite = 0;
 	u_short		 reason;
 	u_int8_t	 flags;
 	sa_family_t	 af = pd->af;
 
 	PF_RULES_RASSERT();
 
 	r = TAILQ_FIRST(pf_main_ruleset.rules[PF_RULESET_SCRUB].active.ptr);
 	while (r != NULL) {
 		r->evaluations++;
 		if (pfi_kif_match(r->kif, kif) == r->ifnot)
 			r = r->skip[PF_SKIP_IFP].ptr;
 		else if (r->direction && r->direction != dir)
 			r = r->skip[PF_SKIP_DIR].ptr;
 		else if (r->af && r->af != af)
 			r = r->skip[PF_SKIP_AF].ptr;
 		else if (r->proto && r->proto != pd->proto)
 			r = r->skip[PF_SKIP_PROTO].ptr;
 		else if (PF_MISMATCHAW(&r->src.addr, pd->src, af,
 		    r->src.neg, kif, M_GETFIB(m)))
 			r = r->skip[PF_SKIP_SRC_ADDR].ptr;
 		else if (r->src.port_op && !pf_match_port(r->src.port_op,
 			    r->src.port[0], r->src.port[1], th->th_sport))
 			r = r->skip[PF_SKIP_SRC_PORT].ptr;
 		else if (PF_MISMATCHAW(&r->dst.addr, pd->dst, af,
 		    r->dst.neg, NULL, M_GETFIB(m)))
 			r = r->skip[PF_SKIP_DST_ADDR].ptr;
 		else if (r->dst.port_op && !pf_match_port(r->dst.port_op,
 			    r->dst.port[0], r->dst.port[1], th->th_dport))
 			r = r->skip[PF_SKIP_DST_PORT].ptr;
 		else if (r->os_fingerprint != PF_OSFP_ANY && !pf_osfp_match(
 			    pf_osfp_fingerprint(pd, m, off, th),
 			    r->os_fingerprint))
 			r = TAILQ_NEXT(r, entries);
 		else {
 			rm = r;
 			break;
 		}
 	}
 
 	if (rm == NULL || rm->action == PF_NOSCRUB)
 		return (PF_PASS);
 	else {
 		r->packets[dir == PF_OUT]++;
 		r->bytes[dir == PF_OUT] += pd->tot_len;
 	}
 
 	if (rm->rule_flag & PFRULE_REASSEMBLE_TCP)
 		pd->flags |= PFDESC_TCP_NORM;
 
 	flags = th->th_flags;
 	if (flags & TH_SYN) {
 		/* Illegal packet */
 		if (flags & TH_RST)
 			goto tcp_drop;
 
 		if (flags & TH_FIN)
 			flags &= ~TH_FIN;
 	} else {
 		/* Illegal packet */
 		if (!(flags & (TH_ACK|TH_RST)))
 			goto tcp_drop;
 	}
 
 	if (!(flags & TH_ACK)) {
 		/* These flags are only valid if ACK is set */
 		if ((flags & TH_FIN) || (flags & TH_PUSH) || (flags & TH_URG))
 			goto tcp_drop;
 	}
 
 	/* Check for illegal header length */
 	if (th->th_off < (sizeof(struct tcphdr) >> 2))
 		goto tcp_drop;
 
 	/* If flags changed, or reserved data set, then adjust */
 	if (flags != th->th_flags || th->th_x2 != 0) {
 		u_int16_t	ov, nv;
 
 		ov = *(u_int16_t *)(&th->th_ack + 1);
 		th->th_flags = flags;
 		th->th_x2 = 0;
 		nv = *(u_int16_t *)(&th->th_ack + 1);
 
 		th->th_sum = pf_cksum_fixup(th->th_sum, ov, nv, 0);
 		rewrite = 1;
 	}
 
 	/* Remove urgent pointer, if TH_URG is not set */
 	if (!(flags & TH_URG) && th->th_urp) {
 		th->th_sum = pf_cksum_fixup(th->th_sum, th->th_urp, 0, 0);
 		th->th_urp = 0;
 		rewrite = 1;
 	}
 
 	/* Process options */
 	if (r->max_mss && pf_normalize_tcpopt(r, m, th, off, pd->af))
 		rewrite = 1;
 
 	/* copy back packet headers if we sanitized */
 	if (rewrite)
 		m_copyback(m, off, sizeof(*th), (caddr_t)th);
 
 	return (PF_PASS);
 
  tcp_drop:
 	REASON_SET(&reason, PFRES_NORM);
 	if (rm != NULL && r->log)
 		PFLOG_PACKET(kif, m, AF_INET, dir, reason, r, NULL, NULL, pd,
 		    1);
 	return (PF_DROP);
 }
 
 int
 pf_normalize_tcp_init(struct mbuf *m, int off, struct pf_pdesc *pd,
     struct tcphdr *th, struct pf_state_peer *src, struct pf_state_peer *dst)
 {
 	u_int32_t tsval, tsecr;
 	u_int8_t hdr[60];
 	u_int8_t *opt;
 
 	KASSERT((src->scrub == NULL),
 	    ("pf_normalize_tcp_init: src->scrub != NULL"));
 
 	src->scrub = uma_zalloc(V_pf_state_scrub_z, M_ZERO | M_NOWAIT);
 	if (src->scrub == NULL)
 		return (1);
 
 	switch (pd->af) {
 #ifdef INET
 	case AF_INET: {
 		struct ip *h = mtod(m, struct ip *);
 		src->scrub->pfss_ttl = h->ip_ttl;
 		break;
 	}
 #endif /* INET */
 #ifdef INET6
 	case AF_INET6: {
 		struct ip6_hdr *h = mtod(m, struct ip6_hdr *);
 		src->scrub->pfss_ttl = h->ip6_hlim;
 		break;
 	}
 #endif /* INET6 */
 	}
 
 
 	/*
 	 * All normalizations below are only begun if we see the start of
 	 * the connections.  They must all set an enabled bit in pfss_flags
 	 */
 	if ((th->th_flags & TH_SYN) == 0)
 		return (0);
 
 
 	if (th->th_off > (sizeof(struct tcphdr) >> 2) && src->scrub &&
 	    pf_pull_hdr(m, off, hdr, th->th_off << 2, NULL, NULL, pd->af)) {
 		/* Diddle with TCP options */
 		int hlen;
 		opt = hdr + sizeof(struct tcphdr);
 		hlen = (th->th_off << 2) - sizeof(struct tcphdr);
 		while (hlen >= TCPOLEN_TIMESTAMP) {
 			switch (*opt) {
 			case TCPOPT_EOL:	/* FALLTHROUGH */
 			case TCPOPT_NOP:
 				opt++;
 				hlen--;
 				break;
 			case TCPOPT_TIMESTAMP:
 				if (opt[1] >= TCPOLEN_TIMESTAMP) {
 					src->scrub->pfss_flags |=
 					    PFSS_TIMESTAMP;
 					src->scrub->pfss_ts_mod =
 					    htonl(arc4random());
 
 					/* note PFSS_PAWS not set yet */
 					memcpy(&tsval, &opt[2],
 					    sizeof(u_int32_t));
 					memcpy(&tsecr, &opt[6],
 					    sizeof(u_int32_t));
 					src->scrub->pfss_tsval0 = ntohl(tsval);
 					src->scrub->pfss_tsval = ntohl(tsval);
 					src->scrub->pfss_tsecr = ntohl(tsecr);
 					getmicrouptime(&src->scrub->pfss_last);
 				}
 				/* FALLTHROUGH */
 			default:
 				hlen -= MAX(opt[1], 2);
 				opt += MAX(opt[1], 2);
 				break;
 			}
 		}
 	}
 
 	return (0);
 }
 
 void
 pf_normalize_tcp_cleanup(struct pf_state *state)
 {
 	if (state->src.scrub)
 		uma_zfree(V_pf_state_scrub_z, state->src.scrub);
 	if (state->dst.scrub)
 		uma_zfree(V_pf_state_scrub_z, state->dst.scrub);
 
 	/* Someday... flush the TCP segment reassembly descriptors. */
 }
 
 int
 pf_normalize_tcp_stateful(struct mbuf *m, int off, struct pf_pdesc *pd,
     u_short *reason, struct tcphdr *th, struct pf_state *state,
     struct pf_state_peer *src, struct pf_state_peer *dst, int *writeback)
 {
 	struct timeval uptime;
 	u_int32_t tsval, tsecr;
 	u_int tsval_from_last;
 	u_int8_t hdr[60];
 	u_int8_t *opt;
 	int copyback = 0;
 	int got_ts = 0;
 
 	KASSERT((src->scrub || dst->scrub),
 	    ("%s: src->scrub && dst->scrub!", __func__));
 
 	/*
 	 * Enforce the minimum TTL seen for this connection.  Negate a common
 	 * technique to evade an intrusion detection system and confuse
 	 * firewall state code.
 	 */
 	switch (pd->af) {
 #ifdef INET
 	case AF_INET: {
 		if (src->scrub) {
 			struct ip *h = mtod(m, struct ip *);
 			if (h->ip_ttl > src->scrub->pfss_ttl)
 				src->scrub->pfss_ttl = h->ip_ttl;
 			h->ip_ttl = src->scrub->pfss_ttl;
 		}
 		break;
 	}
 #endif /* INET */
 #ifdef INET6
 	case AF_INET6: {
 		if (src->scrub) {
 			struct ip6_hdr *h = mtod(m, struct ip6_hdr *);
 			if (h->ip6_hlim > src->scrub->pfss_ttl)
 				src->scrub->pfss_ttl = h->ip6_hlim;
 			h->ip6_hlim = src->scrub->pfss_ttl;
 		}
 		break;
 	}
 #endif /* INET6 */
 	}
 
 	if (th->th_off > (sizeof(struct tcphdr) >> 2) &&
 	    ((src->scrub && (src->scrub->pfss_flags & PFSS_TIMESTAMP)) ||
 	    (dst->scrub && (dst->scrub->pfss_flags & PFSS_TIMESTAMP))) &&
 	    pf_pull_hdr(m, off, hdr, th->th_off << 2, NULL, NULL, pd->af)) {
 		/* Diddle with TCP options */
 		int hlen;
 		opt = hdr + sizeof(struct tcphdr);
 		hlen = (th->th_off << 2) - sizeof(struct tcphdr);
 		while (hlen >= TCPOLEN_TIMESTAMP) {
 			switch (*opt) {
 			case TCPOPT_EOL:	/* FALLTHROUGH */
 			case TCPOPT_NOP:
 				opt++;
 				hlen--;
 				break;
 			case TCPOPT_TIMESTAMP:
 				/* Modulate the timestamps.  Can be used for
 				 * NAT detection, OS uptime determination or
 				 * reboot detection.
 				 */
 
 				if (got_ts) {
 					/* Huh?  Multiple timestamps!? */
 					if (V_pf_status.debug >= PF_DEBUG_MISC) {
 						DPFPRINTF(("multiple TS??"));
 						pf_print_state(state);
 						printf("\n");
 					}
 					REASON_SET(reason, PFRES_TS);
 					return (PF_DROP);
 				}
 				if (opt[1] >= TCPOLEN_TIMESTAMP) {
 					memcpy(&tsval, &opt[2],
 					    sizeof(u_int32_t));
 					if (tsval && src->scrub &&
 					    (src->scrub->pfss_flags &
 					    PFSS_TIMESTAMP)) {
 						tsval = ntohl(tsval);
 						pf_change_a(&opt[2],
 						    &th->th_sum,
 						    htonl(tsval +
 						    src->scrub->pfss_ts_mod),
 						    0);
 						copyback = 1;
 					}
 
 					/* Modulate TS reply iff valid (!0) */
 					memcpy(&tsecr, &opt[6],
 					    sizeof(u_int32_t));
 					if (tsecr && dst->scrub &&
 					    (dst->scrub->pfss_flags &
 					    PFSS_TIMESTAMP)) {
 						tsecr = ntohl(tsecr)
 						    - dst->scrub->pfss_ts_mod;
 						pf_change_a(&opt[6],
 						    &th->th_sum, htonl(tsecr),
 						    0);
 						copyback = 1;
 					}
 					got_ts = 1;
 				}
 				/* FALLTHROUGH */
 			default:
 				hlen -= MAX(opt[1], 2);
 				opt += MAX(opt[1], 2);
 				break;
 			}
 		}
 		if (copyback) {
 			/* Copyback the options, caller copys back header */
 			*writeback = 1;
 			m_copyback(m, off + sizeof(struct tcphdr),
 			    (th->th_off << 2) - sizeof(struct tcphdr), hdr +
 			    sizeof(struct tcphdr));
 		}
 	}
 
 
 	/*
 	 * Must invalidate PAWS checks on connections idle for too long.
 	 * The fastest allowed timestamp clock is 1ms.  That turns out to
 	 * be about 24 days before it wraps.  XXX Right now our lowerbound
 	 * TS echo check only works for the first 12 days of a connection
 	 * when the TS has exhausted half its 32bit space
 	 */
 #define TS_MAX_IDLE	(24*24*60*60)
 #define TS_MAX_CONN	(12*24*60*60)	/* XXX remove when better tsecr check */
 
 	getmicrouptime(&uptime);
 	if (src->scrub && (src->scrub->pfss_flags & PFSS_PAWS) &&
 	    (uptime.tv_sec - src->scrub->pfss_last.tv_sec > TS_MAX_IDLE ||
 	    time_uptime - state->creation > TS_MAX_CONN))  {
 		if (V_pf_status.debug >= PF_DEBUG_MISC) {
 			DPFPRINTF(("src idled out of PAWS\n"));
 			pf_print_state(state);
 			printf("\n");
 		}
 		src->scrub->pfss_flags = (src->scrub->pfss_flags & ~PFSS_PAWS)
 		    | PFSS_PAWS_IDLED;
 	}
 	if (dst->scrub && (dst->scrub->pfss_flags & PFSS_PAWS) &&
 	    uptime.tv_sec - dst->scrub->pfss_last.tv_sec > TS_MAX_IDLE) {
 		if (V_pf_status.debug >= PF_DEBUG_MISC) {
 			DPFPRINTF(("dst idled out of PAWS\n"));
 			pf_print_state(state);
 			printf("\n");
 		}
 		dst->scrub->pfss_flags = (dst->scrub->pfss_flags & ~PFSS_PAWS)
 		    | PFSS_PAWS_IDLED;
 	}
 
 	if (got_ts && src->scrub && dst->scrub &&
 	    (src->scrub->pfss_flags & PFSS_PAWS) &&
 	    (dst->scrub->pfss_flags & PFSS_PAWS)) {
 		/* Validate that the timestamps are "in-window".
 		 * RFC1323 describes TCP Timestamp options that allow
 		 * measurement of RTT (round trip time) and PAWS
 		 * (protection against wrapped sequence numbers).  PAWS
 		 * gives us a set of rules for rejecting packets on
 		 * long fat pipes (packets that were somehow delayed
 		 * in transit longer than the time it took to send the
 		 * full TCP sequence space of 4Gb).  We can use these
 		 * rules and infer a few others that will let us treat
 		 * the 32bit timestamp and the 32bit echoed timestamp
 		 * as sequence numbers to prevent a blind attacker from
 		 * inserting packets into a connection.
 		 *
 		 * RFC1323 tells us:
 		 *  - The timestamp on this packet must be greater than
 		 *    or equal to the last value echoed by the other
 		 *    endpoint.  The RFC says those will be discarded
 		 *    since it is a dup that has already been acked.
 		 *    This gives us a lowerbound on the timestamp.
 		 *        timestamp >= other last echoed timestamp
 		 *  - The timestamp will be less than or equal to
 		 *    the last timestamp plus the time between the
 		 *    last packet and now.  The RFC defines the max
 		 *    clock rate as 1ms.  We will allow clocks to be
 		 *    up to 10% fast and will allow a total difference
 		 *    or 30 seconds due to a route change.  And this
 		 *    gives us an upperbound on the timestamp.
 		 *        timestamp <= last timestamp + max ticks
 		 *    We have to be careful here.  Windows will send an
 		 *    initial timestamp of zero and then initialize it
 		 *    to a random value after the 3whs; presumably to
 		 *    avoid a DoS by having to call an expensive RNG
 		 *    during a SYN flood.  Proof MS has at least one
 		 *    good security geek.
 		 *
 		 *  - The TCP timestamp option must also echo the other
 		 *    endpoints timestamp.  The timestamp echoed is the
 		 *    one carried on the earliest unacknowledged segment
 		 *    on the left edge of the sequence window.  The RFC
 		 *    states that the host will reject any echoed
 		 *    timestamps that were larger than any ever sent.
 		 *    This gives us an upperbound on the TS echo.
 		 *        tescr <= largest_tsval
 		 *  - The lowerbound on the TS echo is a little more
 		 *    tricky to determine.  The other endpoint's echoed
 		 *    values will not decrease.  But there may be
 		 *    network conditions that re-order packets and
 		 *    cause our view of them to decrease.  For now the
 		 *    only lowerbound we can safely determine is that
 		 *    the TS echo will never be less than the original
 		 *    TS.  XXX There is probably a better lowerbound.
 		 *    Remove TS_MAX_CONN with better lowerbound check.
 		 *        tescr >= other original TS
 		 *
 		 * It is also important to note that the fastest
 		 * timestamp clock of 1ms will wrap its 32bit space in
 		 * 24 days.  So we just disable TS checking after 24
 		 * days of idle time.  We actually must use a 12d
 		 * connection limit until we can come up with a better
 		 * lowerbound to the TS echo check.
 		 */
 		struct timeval delta_ts;
 		int ts_fudge;
 
 
 		/*
 		 * PFTM_TS_DIFF is how many seconds of leeway to allow
 		 * a host's timestamp.  This can happen if the previous
 		 * packet got delayed in transit for much longer than
 		 * this packet.
 		 */
 		if ((ts_fudge = state->rule.ptr->timeout[PFTM_TS_DIFF]) == 0)
 			ts_fudge = V_pf_default_rule.timeout[PFTM_TS_DIFF];
 
 		/* Calculate max ticks since the last timestamp */
 #define TS_MAXFREQ	1100		/* RFC max TS freq of 1Khz + 10% skew */
 #define TS_MICROSECS	1000000		/* microseconds per second */
 		delta_ts = uptime;
 		timevalsub(&delta_ts, &src->scrub->pfss_last);
 		tsval_from_last = (delta_ts.tv_sec + ts_fudge) * TS_MAXFREQ;
 		tsval_from_last += delta_ts.tv_usec / (TS_MICROSECS/TS_MAXFREQ);
 
 		if ((src->state >= TCPS_ESTABLISHED &&
 		    dst->state >= TCPS_ESTABLISHED) &&
 		    (SEQ_LT(tsval, dst->scrub->pfss_tsecr) ||
 		    SEQ_GT(tsval, src->scrub->pfss_tsval + tsval_from_last) ||
 		    (tsecr && (SEQ_GT(tsecr, dst->scrub->pfss_tsval) ||
 		    SEQ_LT(tsecr, dst->scrub->pfss_tsval0))))) {
 			/* Bad RFC1323 implementation or an insertion attack.
 			 *
 			 * - Solaris 2.6 and 2.7 are known to send another ACK
 			 *   after the FIN,FIN|ACK,ACK closing that carries
 			 *   an old timestamp.
 			 */
 
 			DPFPRINTF(("Timestamp failed %c%c%c%c\n",
 			    SEQ_LT(tsval, dst->scrub->pfss_tsecr) ? '0' : ' ',
 			    SEQ_GT(tsval, src->scrub->pfss_tsval +
 			    tsval_from_last) ? '1' : ' ',
 			    SEQ_GT(tsecr, dst->scrub->pfss_tsval) ? '2' : ' ',
 			    SEQ_LT(tsecr, dst->scrub->pfss_tsval0)? '3' : ' '));
 			DPFPRINTF((" tsval: %u  tsecr: %u  +ticks: %u  "
 			    "idle: %jus %lums\n",
 			    tsval, tsecr, tsval_from_last,
 			    (uintmax_t)delta_ts.tv_sec,
 			    delta_ts.tv_usec / 1000));
 			DPFPRINTF((" src->tsval: %u  tsecr: %u\n",
 			    src->scrub->pfss_tsval, src->scrub->pfss_tsecr));
 			DPFPRINTF((" dst->tsval: %u  tsecr: %u  tsval0: %u"
 			    "\n", dst->scrub->pfss_tsval,
 			    dst->scrub->pfss_tsecr, dst->scrub->pfss_tsval0));
 			if (V_pf_status.debug >= PF_DEBUG_MISC) {
 				pf_print_state(state);
 				pf_print_flags(th->th_flags);
 				printf("\n");
 			}
 			REASON_SET(reason, PFRES_TS);
 			return (PF_DROP);
 		}
 
 		/* XXX I'd really like to require tsecr but it's optional */
 
 	} else if (!got_ts && (th->th_flags & TH_RST) == 0 &&
 	    ((src->state == TCPS_ESTABLISHED && dst->state == TCPS_ESTABLISHED)
 	    || pd->p_len > 0 || (th->th_flags & TH_SYN)) &&
 	    src->scrub && dst->scrub &&
 	    (src->scrub->pfss_flags & PFSS_PAWS) &&
 	    (dst->scrub->pfss_flags & PFSS_PAWS)) {
 		/* Didn't send a timestamp.  Timestamps aren't really useful
 		 * when:
 		 *  - connection opening or closing (often not even sent).
 		 *    but we must not let an attacker to put a FIN on a
 		 *    data packet to sneak it through our ESTABLISHED check.
 		 *  - on a TCP reset.  RFC suggests not even looking at TS.
 		 *  - on an empty ACK.  The TS will not be echoed so it will
 		 *    probably not help keep the RTT calculation in sync and
 		 *    there isn't as much danger when the sequence numbers
 		 *    got wrapped.  So some stacks don't include TS on empty
 		 *    ACKs :-(
 		 *
 		 * To minimize the disruption to mostly RFC1323 conformant
 		 * stacks, we will only require timestamps on data packets.
 		 *
 		 * And what do ya know, we cannot require timestamps on data
 		 * packets.  There appear to be devices that do legitimate
 		 * TCP connection hijacking.  There are HTTP devices that allow
 		 * a 3whs (with timestamps) and then buffer the HTTP request.
 		 * If the intermediate device has the HTTP response cache, it
 		 * will spoof the response but not bother timestamping its
 		 * packets.  So we can look for the presence of a timestamp in
 		 * the first data packet and if there, require it in all future
 		 * packets.
 		 */
 
 		if (pd->p_len > 0 && (src->scrub->pfss_flags & PFSS_DATA_TS)) {
 			/*
 			 * Hey!  Someone tried to sneak a packet in.  Or the
 			 * stack changed its RFC1323 behavior?!?!
 			 */
 			if (V_pf_status.debug >= PF_DEBUG_MISC) {
 				DPFPRINTF(("Did not receive expected RFC1323 "
 				    "timestamp\n"));
 				pf_print_state(state);
 				pf_print_flags(th->th_flags);
 				printf("\n");
 			}
 			REASON_SET(reason, PFRES_TS);
 			return (PF_DROP);
 		}
 	}
 
 
 	/*
 	 * We will note if a host sends his data packets with or without
 	 * timestamps.  And require all data packets to contain a timestamp
 	 * if the first does.  PAWS implicitly requires that all data packets be
 	 * timestamped.  But I think there are middle-man devices that hijack
 	 * TCP streams immediately after the 3whs and don't timestamp their
 	 * packets (seen in a WWW accelerator or cache).
 	 */
 	if (pd->p_len > 0 && src->scrub && (src->scrub->pfss_flags &
 	    (PFSS_TIMESTAMP|PFSS_DATA_TS|PFSS_DATA_NOTS)) == PFSS_TIMESTAMP) {
 		if (got_ts)
 			src->scrub->pfss_flags |= PFSS_DATA_TS;
 		else {
 			src->scrub->pfss_flags |= PFSS_DATA_NOTS;
 			if (V_pf_status.debug >= PF_DEBUG_MISC && dst->scrub &&
 			    (dst->scrub->pfss_flags & PFSS_TIMESTAMP)) {
 				/* Don't warn if other host rejected RFC1323 */
 				DPFPRINTF(("Broken RFC1323 stack did not "
 				    "timestamp data packet. Disabled PAWS "
 				    "security.\n"));
 				pf_print_state(state);
 				pf_print_flags(th->th_flags);
 				printf("\n");
 			}
 		}
 	}
 
 
 	/*
 	 * Update PAWS values
 	 */
 	if (got_ts && src->scrub && PFSS_TIMESTAMP == (src->scrub->pfss_flags &
 	    (PFSS_PAWS_IDLED|PFSS_TIMESTAMP))) {
 		getmicrouptime(&src->scrub->pfss_last);
 		if (SEQ_GEQ(tsval, src->scrub->pfss_tsval) ||
 		    (src->scrub->pfss_flags & PFSS_PAWS) == 0)
 			src->scrub->pfss_tsval = tsval;
 
 		if (tsecr) {
 			if (SEQ_GEQ(tsecr, src->scrub->pfss_tsecr) ||
 			    (src->scrub->pfss_flags & PFSS_PAWS) == 0)
 				src->scrub->pfss_tsecr = tsecr;
 
 			if ((src->scrub->pfss_flags & PFSS_PAWS) == 0 &&
 			    (SEQ_LT(tsval, src->scrub->pfss_tsval0) ||
 			    src->scrub->pfss_tsval0 == 0)) {
 				/* tsval0 MUST be the lowest timestamp */
 				src->scrub->pfss_tsval0 = tsval;
 			}
 
 			/* Only fully initialized after a TS gets echoed */
 			if ((src->scrub->pfss_flags & PFSS_PAWS) == 0)
 				src->scrub->pfss_flags |= PFSS_PAWS;
 		}
 	}
 
 	/* I have a dream....  TCP segment reassembly.... */
 	return (0);
 }
 
 static int
 pf_normalize_tcpopt(struct pf_rule *r, struct mbuf *m, struct tcphdr *th,
     int off, sa_family_t af)
 {
 	u_int16_t	*mss;
 	int		 thoff;
 	int		 opt, cnt, optlen = 0;
 	int		 rewrite = 0;
 	u_char		 opts[TCP_MAXOLEN];
 	u_char		*optp = opts;
 
 	thoff = th->th_off << 2;
 	cnt = thoff - sizeof(struct tcphdr);
 
 	if (cnt > 0 && !pf_pull_hdr(m, off + sizeof(*th), opts, cnt,
 	    NULL, NULL, af))
 		return (rewrite);
 
 	for (; cnt > 0; cnt -= optlen, optp += optlen) {
 		opt = optp[0];
 		if (opt == TCPOPT_EOL)
 			break;
 		if (opt == TCPOPT_NOP)
 			optlen = 1;
 		else {
 			if (cnt < 2)
 				break;
 			optlen = optp[1];
 			if (optlen < 2 || optlen > cnt)
 				break;
 		}
 		switch (opt) {
 		case TCPOPT_MAXSEG:
 			mss = (u_int16_t *)(optp + 2);
 			if ((ntohs(*mss)) > r->max_mss) {
 				th->th_sum = pf_cksum_fixup(th->th_sum,
 				    *mss, htons(r->max_mss), 0);
 				*mss = htons(r->max_mss);
 				rewrite = 1;
 			}
 			break;
 		default:
 			break;
 		}
 	}
 
 	if (rewrite)
 		m_copyback(m, off + sizeof(*th), thoff - sizeof(*th), opts);
 
 	return (rewrite);
 }
 
 #ifdef INET
 static void
 pf_scrub_ip(struct mbuf **m0, u_int32_t flags, u_int8_t min_ttl, u_int8_t tos)
 {
 	struct mbuf		*m = *m0;
 	struct ip		*h = mtod(m, struct ip *);
 
 	/* Clear IP_DF if no-df was requested */
 	if (flags & PFRULE_NODF && h->ip_off & htons(IP_DF)) {
 		u_int16_t ip_off = h->ip_off;
 
 		h->ip_off &= htons(~IP_DF);
 		h->ip_sum = pf_cksum_fixup(h->ip_sum, ip_off, h->ip_off, 0);
 	}
 
 	/* Enforce a minimum ttl, may cause endless packet loops */
 	if (min_ttl && h->ip_ttl < min_ttl) {
 		u_int16_t ip_ttl = h->ip_ttl;
 
 		h->ip_ttl = min_ttl;
 		h->ip_sum = pf_cksum_fixup(h->ip_sum, ip_ttl, h->ip_ttl, 0);
 	}
 
 	/* Enforce tos */
 	if (flags & PFRULE_SET_TOS) {
 		u_int16_t	ov, nv;
 
 		ov = *(u_int16_t *)h;
 		h->ip_tos = tos;
 		nv = *(u_int16_t *)h;
 
 		h->ip_sum = pf_cksum_fixup(h->ip_sum, ov, nv, 0);
 	}
 
 	/* random-id, but not for fragments */
 	if (flags & PFRULE_RANDOMID && !(h->ip_off & ~htons(IP_DF))) {
-		u_int16_t ip_id = h->ip_id;
+		uint16_t ip_id = h->ip_id;
 
-		h->ip_id = ip_randomid();
+		ip_fillid(h);
 		h->ip_sum = pf_cksum_fixup(h->ip_sum, ip_id, h->ip_id, 0);
 	}
 }
 #endif /* INET */
 
 #ifdef INET6
 static void
 pf_scrub_ip6(struct mbuf **m0, u_int8_t min_ttl)
 {
 	struct mbuf		*m = *m0;
 	struct ip6_hdr		*h = mtod(m, struct ip6_hdr *);
 
 	/* Enforce a minimum ttl, may cause endless packet loops */
 	if (min_ttl && h->ip6_hlim < min_ttl)
 		h->ip6_hlim = min_ttl;
 }
 #endif