Index: projects/runtime-coverage-v2/MAINTAINERS
===================================================================
--- projects/runtime-coverage-v2/MAINTAINERS	(revision 346924)
+++ projects/runtime-coverage-v2/MAINTAINERS	(revision 346925)
@@ -1,125 +1,132 @@
 $FreeBSD$
 
 Please note that the content of this file is strictly advisory.
 No locks listed here are valid.  The only strict review requirements
 are granted by core.  These are documented in head/LOCKS and enforced
 by svnadmin/conf/approvers.
 
 The source tree is a community effort.  However, some folks go to the
 trouble of looking after particular areas of the tree.  In return for
 their active caretaking of the code it is polite to coordinate changes
 with them.  This is a list of people who have expressed an interest in
 part of the code or listed their active caretaking role so that other
 committers can easily find somebody who is familiar with it.  The notes
 should specify if there is a 3rd party source tree involved or other
 things that should be kept in mind.
 
 However, this is not a 'big stick', it is an offer to help and a source
 of guidance.  It does not override the communal nature of the tree.
 It is not a registry of 'turf' or private property.
 
 ***
 This list is prone to becoming stale quickly.  The best way to find the recent
 maintainer of a sub-system is to check recent logs for that directory or
 sub-system.
 ***
 
 ***
 Maintainers are encouraged to visit:
   https://reviews.freebsd.org/herald
 
 and configure notifications for parts of the tree which they maintain.
 Notifications can automatically be sent when someone proposes a revision or
 makes a commit to the specified subtree.
 ***
 
 subsystem	login	notes
 -----------------------------
-atf			freebsd-testing,jmmv,ngie	Pre-commit review requested.
 ath(4)		adrian	Pre-commit review requested, send to freebsd-wireless@freebsd.org
+contrib/atf		ngie,#test		Pre-commit review requested.
+contrib/capsicum-test	ngie,#capsicum,#test	Pre-commit review requested.
 contrib/compiler-rt	dim	Pre-commit review preferred.
+contrib/googletest	ngie,#test		Pre-commit review requested.
 contrib/ipfilter	cy	Pre-commit review requested.
 contrib/libc++		dim	Pre-commit review preferred.
 contrib/libcxxrt	dim	Pre-commit review preferred.
 contrib/libunwind	dim,emaste,jhb	Pre-commit review preferred.
 contrib/llvm		dim	Pre-commit review preferred.
 contrib/llvm/tools/lldb	dim,emaste	Pre-commit review preferred.
-contrib/netbsd-tests	freebsd-testing,ngie	Pre-commit review requested.
-contrib/pjdfstest	freebsd-testing,asomers,ngie,pjd	Pre-commit review requested.
+contrib/netbsd-tests	ngie,#test		Pre-commit review requested.
+contrib/pjdfstest	asomers,ngie,pjd,#test	Pre-commit review requested.
 *env(3)		secteam	Due to the problematic security history of this
 			code, please have patches reviewed by secteam.
 etc/mail	gshapiro	Pre-commit review requested.  Keep in sync with -STABLE.
 etc/sendmail	gshapiro	Pre-commit review requested.  Keep in sync with -STABLE.
 fetch		des	Pre-commit review requested, email only.
 geli		pjd	Pre-commit review requested (both sys/geom/eli/ and sbin/geom/class/eli/).
 isci(4)		jimharris	Pre-commit review requested.
 iwm(4)		adrian	Pre-commit review requested, send to freebsd-wireless@freebsd.org
 iwn(4)		adrian	Pre-commit review requested, send to freebsd-wireless@freebsd.org
 kqueue		jmg	Pre-commit review requested.  Documentation Required.
 libdpv		dteske	Pre-commit review requested. Keep in sync with dpv(1).
 libfetch	des	Pre-commit review requested, email only.
 libfigpar	dteske	Pre-commit review requested.
 libm		freebsd-numerics	Send email with patches to freebsd-numerics@
 libpam		des	Pre-commit review requested, email only.
 linprocfs	des	Pre-commit review requested, email only.
 lpr		gad	Pre-commit review requested, particularly for
 			lpd/recvjob.c and lpd/printjob.c.
 nanobsd		imp	Pre-commit phabricator review requested.
 net80211	adrian	Pre-commit review requested, send to freebsd-wireless@freebsd.org
 nfs		freebsd-fs@FreeBSD.org, rmacklem is best for reviews.
 nvd(4)		jimharris	Pre-commit review requested.
 nvme(4)		jimharris	Pre-commit review requested.
 nvmecontrol(8)	jimharris	Pre-commit review requested.
 opencrypto	jmg	Pre-commit review requested.  Documentation Required.
 openssh		des	Pre-commit review requested, email only.
 openssl		benl,jkim	Pre-commit review requested.
 otus(4)		adrian	Pre-commit review requested, send to freebsd-wireless@freebsd.org
 pci bus		imp,jhb	Pre-commit review requested.
 pmcstudy(8)	rrs		Pre-commit review requested.
 procfs		des	Pre-commit review requested, email only.
 pseudofs	des	Pre-commit review requested, email only.
 release/release.sh	gjb,re	Pre-commit review and regression tests
 				requested.
 sctp		rrs,tuexen	Pre-commit review requested (changes need to be backported to github).
 sendmail	gshapiro	Pre-commit review requested.
 sh(1)		jilles		Pre-commit review requested. This also applies
 				to kill(1), printf(1) and test(1) which are
 				compiled in as builtins.
 share/mk	imp, bapt, bdrewery, emaste, sjg	Make is hard.
-share/mk/*.test.mk	freebsd-testing,ngie (same list as share/mk too)	Pre-commit review requested.
+share/mk/*.test.mk	imp,bapt,bdrewery,	Pre-commit review requested.
+			emaste,ngie,sjg,#test
 stand/forth		dteske	Pre-commit review requested.
 stand/lua		kevans	Pre-commit review requested
-sys/compat/linuxkpi	hselasky	If in doubt, ask.
+sys/compat/linuxkpi	hselasky		If in doubt, ask.
+			zeising, johalun	pre-commit review requested via
+						#x11 phabricator group.
+						(to avoid drm graphics drivers
+						impact)
 sys/contrib/ipfilter	cy	Pre-commit review requested.
 sys/dev/e1000	erj	Pre-commit phabricator review requested.
 sys/dev/ixgbe	erj	Pre-commit phabricator review requested.
 sys/dev/ixl	erj	Pre-commit phabricator review requested.
 sys/dev/sound/usb	hselasky	If in doubt, ask.
 sys/dev/usb	hselasky	If in doubt, ask.
 sys/dev/xen	royger		Pre-commit review recommended.
 sys/netinet/ip_carp.c	glebius	Pre-commit review recommended.
 sys/netpfil/pf	kp,glebius	Pre-commit review recommended.
 sys/x86/xen	royger		Pre-commit review recommended.
 sys/xen		royger		Pre-commit review recommended.
-tests			freebsd-testing,ngie	Pre-commit review requested.
+tests			ngie,#test		Pre-commit review requested.
 tools/build	imp	Pre-commit review requested, especially to fix bootstrap issues.
 top(1)		eadler	Pre-commit review requested.
 usr.sbin/bsdconfig	dteske	Pre-commit phabricator review requested.
 usr.sbin/dpv	dteske	Pre-commit review requested. Keep in sync with libdpv.
 usr.sbin/pkg	pkg@	Please coordinate behavior or flag changes with pkg team.
 usr.sbin/sysrc	dteske	Pre-commit phabricator review requested. Keep in sync with bsdconfig(8) sysrc.subr.
 vmm(4)		tychon, jhb	Pre-commit review requested via #bhyve
 				phabricator group.
 libvmmapi	tychon, jhb	Pre-commit review requested via #bhyve
 				phabricator group.
 usr.sbin/bhyve*	tychon, jhb	Pre-commit review requested via #bhyve
 				phabricator group.
 autofs(5)	trasz	Pre-commit review recommended.
 iscsi(4)	trasz	Pre-commit review recommended.
 rctl(8)		trasz	Pre-commit review recommended.
 sys/dev/ofw	nwhitehorn	Pre-commit review recommended.
 sys/dev/drm*	imp	Pre-commit review requested in phabricator. Changes need to
 			be mirrored in github repo.
 sys/dev/usb/wlan adrian	Pre-commit review requested, send to freebsd-wireless@freebsd.org
 sys/arm/allwinner	manu	Pre-commit review requested
 sys/arm64/rockchip	manu	Pre-commit review requested

Property changes on: projects/runtime-coverage-v2/MAINTAINERS
___________________________________________________________________
Modified: svn:mergeinfo
## -0,0 +0,1 ##
   Merged /head/MAINTAINERS:r346493-346924
Index: projects/runtime-coverage-v2/cddl/contrib/opensolaris/cmd/dtrace/test/tst/common/ip/tst.ipv4localsctp.ksh
===================================================================
--- projects/runtime-coverage-v2/cddl/contrib/opensolaris/cmd/dtrace/test/tst/common/ip/tst.ipv4localsctp.ksh	(revision 346924)
+++ projects/runtime-coverage-v2/cddl/contrib/opensolaris/cmd/dtrace/test/tst/common/ip/tst.ipv4localsctp.ksh	(revision 346925)
@@ -1,137 +1,153 @@
 #!/usr/bin/env ksh
 #
 # CDDL HEADER START
 #
 # The contents of this file are subject to the terms of the
 # Common Development and Distribution License (the "License").
 # You may not use this file except in compliance with the License.
 #
 # You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
 # or http://www.opensolaris.org/os/licensing.
 # See the License for the specific language governing permissions
 # and limitations under the License.
 #
 # When distributing Covered Code, include this CDDL HEADER in each
 # file and include the License file at usr/src/OPENSOLARIS.LICENSE.
 # If applicable, add the following below this CDDL HEADER, with the
 # fields enclosed by brackets "[]" replaced with your own identifying
 # information: Portions Copyright [yyyy] [name of copyright owner]
 #
 # CDDL HEADER END
 #
 
 #
 # Copyright (c) 2008, 2010, Oracle and/or its affiliates. All rights reserved.
 #
 
 #
 # Test {ip,sctp}:::{send,receive} of IPv4 SCTP to local host.
 #
 # This may fail due to:
 #
 # 1. A change to the ip stack breaking expected probe behavior,
 #    which is the reason we are testing.
 # 2. The lo0 interface missing or not up.
 # 3. An unlikely race causes the unlocked global send/receive
 #    variables to be corrupted.
 #
 # This test performs a SCTP association and checks that at least the
 # following packet counts were traced:
 #
 # 7 x ip:::send (4 during the setup, 3 during the teardown)
 # 7 x sctp:::send (4 during the setup, 3 during the teardown)
 # 7 x ip:::receive (4 during the setup, 3 during the teardown)
 # 7 x sctp:::receive (4 during the setup, 3 during the teardown)
 
 # The actual count tested is 7 each way, since we are tracing both
 # source and destination events.
 #
 
 if (( $# != 1 )); then
 	print -u2 "expected one argument: <dtrace-path>"
 	exit 2
 fi
 
 dtrace=$1
 local=127.0.0.1
 DIR=/var/tmp/dtest.$$
 
 sctpport=1024
 bound=5000
-while [ $sctpport -lt $bound ]; do
-	ncat --sctp -z $local $sctpport > /dev/null || break
-	sctpport=$(($sctpport + 1))
-done
-if [ $sctpport -eq $bound ]; then
-	echo "couldn't find an available SCTP port"
-	exit 1
-fi
 
 mkdir $DIR
 cd $DIR
 
-# ncat will exit when the association is closed.
-ncat --sctp --listen $local $sctpport &
-
-cat > test.pl <<-EOPERL
+cat > client.pl <<-EOPERL
 	use IO::Socket;
 	my \$s = IO::Socket::INET->new(
 	    Type => SOCK_STREAM,
 	    Proto => "sctp",
 	    LocalAddr => "$local",
 	    PeerAddr => "$local",
-	    PeerPort => $sctpport,
+	    PeerPort => \$ARGV[0],
 	    Timeout => 3);
-	die "Could not connect to host $local port $sctpport \$@" unless \$s;
+	die "Could not connect to host $local port \$ARGV[0] \$@" unless \$s;
 	close \$s;
-	sleep(2);
+	sleep(\$ARGV[1]);
 EOPERL
 
-$dtrace -c 'perl test.pl' -qs /dev/stdin <<EODTRACE
+while [ $sctpport -lt $bound ]; do
+	perl client.pl $sctpport 0 2>&- || break
+	sctpport=$(($sctpport + 1))
+done
+if [ $sctpport -eq $bound ]; then
+	echo "couldn't find an available SCTP port"
+	exit 1
+fi
+
+cat > server.pl <<-EOPERL
+	use IO::Socket;
+	my \$l = IO::Socket::INET->new(
+	    Type => SOCK_STREAM,
+	    Proto => "sctp",
+	    LocalAddr => "$local",
+	    LocalPort => $sctpport,
+	    Listen => 1,
+	    Reuse => 1);
+	die "Could not listen on $local port $sctpport \$@" unless \$l;
+	my \$c = \$l->accept();
+	close \$l;
+	while (<\$c>) {};
+	close \$c;
+EOPERL
+
+perl server.pl &
+
+$dtrace -c "perl client.pl $sctpport 2" -qs /dev/stdin <<EODTRACE
 BEGIN
 {
 	ipsend = sctpsend = ipreceive = sctpreceive = 0;
 }
 
 ip:::send
 /args[2]->ip_saddr == "$local" && args[2]->ip_daddr == "$local" &&
     args[4]->ipv4_protocol == IPPROTO_SCTP/
 {
 	ipsend++;
 }
 
 sctp:::send
 /args[2]->ip_saddr == "$local" && args[2]->ip_daddr == "$local"/
 {
 	sctpsend++;
 }
 
 ip:::receive
 /args[2]->ip_saddr == "$local" && args[2]->ip_daddr == "$local" &&
     args[4]->ipv4_protocol == IPPROTO_SCTP/
 {
 	ipreceive++;
 }
 
 sctp:::receive
 /args[2]->ip_saddr == "$local" && args[2]->ip_daddr == "$local"/
 {
 	sctpreceive++;
 }
 
 END
 {
 	printf("Minimum SCTP events seen\n\n");
-	printf("ip:::send (%d) - %s\n", ipsend, ipsend >= 7 ? "yes" : "no");
-	printf("ip:::receive (%d) - %s\n", ipreceive, ipreceive >= 7 ? "yes" : "no");
-	printf("sctp:::send (%d) - %s\n", sctpsend, sctpsend >= 7 ? "yes" : "no");
-	printf("sctp:::receive (%d) - %s\n", sctpreceive, sctpreceive >= 7 ? "yes" : "no");
+	printf("ip:::send - %s\n", ipsend >= 7 ? "yes" : "no");
+	printf("ip:::receive - %s\n", ipreceive >= 7 ? "yes" : "no");
+	printf("sctp:::send - %s\n", sctpsend >= 7 ? "yes" : "no");
+	printf("sctp:::receive - %s\n", sctpreceive >= 7 ? "yes" : "no");
 }
 EODTRACE
 
 status=$?
 
 cd /
 /bin/rm -rf $DIR
 
 exit $status
Index: projects/runtime-coverage-v2/cddl/contrib/opensolaris/cmd/dtrace/test/tst/common/ip/tst.localsctpstate.ksh
===================================================================
--- projects/runtime-coverage-v2/cddl/contrib/opensolaris/cmd/dtrace/test/tst/common/ip/tst.localsctpstate.ksh	(revision 346924)
+++ projects/runtime-coverage-v2/cddl/contrib/opensolaris/cmd/dtrace/test/tst/common/ip/tst.localsctpstate.ksh	(revision 346925)
@@ -1,159 +1,175 @@
 #!/usr/bin/env ksh
 #
 # CDDL HEADER START
 #
 # The contents of this file are subject to the terms of the
 # Common Development and Distribution License (the "License").
 # You may not use this file except in compliance with the License.
 #
 # You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
 # or http://www.opensolaris.org/os/licensing.
 # See the License for the specific language governing permissions
 # and limitations under the License.
 #
 # When distributing Covered Code, include this CDDL HEADER in each
 # file and include the License file at usr/src/OPENSOLARIS.LICENSE.
 # If applicable, add the following below this CDDL HEADER, with the
 # fields enclosed by brackets "[]" replaced with your own identifying
 # information: Portions Copyright [yyyy] [name of copyright owner]
 #
 # CDDL HEADER END
 #
 
 #
 # Copyright (c) 2010, Oracle and/or its affiliates. All rights reserved.
 #
 
 #
 # Test sctp:::state-change and sctp:::{send,receive} by connecting to
 # the local discard service.
 # A number of state transition events along with SCTP send and
 # receive events for the message should result.
 #
 # This may fail due to:
 #
 # 1. A change to the ip stack breaking expected probe behavior,
 #    which is the reason we are testing.
 # 2. The lo0 interface missing or not up.
 # 3. An unlikely race causes the unlocked global send/receive
 #    variables to be corrupted.
 #
 # This test performs a SCTP connection and checks that at least the
 # following packet counts were traced:
 #
 # 7 x ip:::send (4 during the setup, 3 during the teardown)
 # 7 x sctp:::send (4 during the setup, 3 during the teardown)
 # 7 x ip:::receive (4 during the setup, 3 during the teardown)
 # 7 x sctp:::receive (4 during the setup, 3 during the teardown)
 #
 # The actual count tested is 7 each way, since we are tracing both
 # source and destination events.
 #
 
 if (( $# != 1 )); then
 	print -u2 "expected one argument: <dtrace-path>"
 	exit 2
 fi
 
 dtrace=$1
 local=127.0.0.1
 DIR=/var/tmp/dtest.$$
 
 sctpport=1024
 bound=5000
-while [ $sctpport -lt $bound ]; do
-	ncat --sctp -z $local $sctpport > /dev/null || break
-	sctpport=$(($sctpport + 1))
-done
-if [ $sctpport -eq $bound ]; then
-	echo "couldn't find an available SCTP port"
-	exit 1
-fi
 
 mkdir $DIR
 cd $DIR
 
-# ncat will exit when the association is closed.
-ncat --sctp --listen $local $sctpport &
-
-cat > test.pl <<-EOPERL
+cat > client.pl <<-EOPERL
 	use IO::Socket;
 	my \$s = IO::Socket::INET->new(
 	    Type => SOCK_STREAM,
 	    Proto => "sctp",
 	    LocalAddr => "$local",
 	    PeerAddr => "$local",
-	    PeerPort => $sctpport,
+	    PeerPort => \$ARGV[0],
 	    Timeout => 3);
-	die "Could not connect to host $local port $sctpport \$@" unless \$s;
+	die "Could not connect to host $local port \$ARGV[0] \$@" unless \$s;
 	close \$s;
-	sleep(2);
+	sleep(\$ARGV[1]);
 EOPERL
 
-$dtrace -c 'perl test.pl' -qs /dev/stdin <<EODTRACE
+while [ $sctpport -lt $bound ]; do
+	perl client.pl $sctpport 0 2>&- || break
+	sctpport=$(($sctpport + 1))
+done
+if [ $sctpport -eq $bound ]; then
+	echo "couldn't find an available SCTP port"
+	exit 1
+fi
+
+cat > server.pl <<-EOPERL
+	use IO::Socket;
+	my \$l = IO::Socket::INET->new(
+	    Type => SOCK_STREAM,
+	    Proto => "sctp",
+	    LocalAddr => "$local",
+	    LocalPort => $sctpport,
+	    Listen => 1,
+	    Reuse => 1);
+	die "Could not listen on $local port $sctpport \$@" unless \$l;
+	my \$c = \$l->accept();
+	close \$l;
+	while (<\$c>) {};
+	close \$c;
+EOPERL
+
+perl server.pl &
+
+$dtrace -c "perl client.pl $sctpport 2" -qs /dev/stdin <<EODTRACE
 BEGIN
 {
 	ipsend = sctpsend = ipreceive = sctpreceive = 0;
 }
 
 ip:::send
 /args[2]->ip_saddr == "$local" && args[2]->ip_daddr == "$local" &&
     args[4]->ipv4_protocol == IPPROTO_SCTP/
 {
 	ipsend++;
 }
 
 sctp:::send
 /args[2]->ip_saddr == "$local" && args[2]->ip_daddr == "$local" &&
  (args[4]->sctp_sport == $sctpport || args[4]->sctp_dport == $sctpport)/
 {
 	sctpsend++;
 }
 
 ip:::receive
 /args[2]->ip_saddr == "$local" && args[2]->ip_daddr == "$local" &&
     args[4]->ipv4_protocol == IPPROTO_SCTP/
 {
 	ipreceive++;
 }
 
 sctp:::receive
 /args[2]->ip_saddr == "$local" && args[2]->ip_daddr == "$local" &&
  (args[4]->sctp_sport == $sctpport || args[4]->sctp_dport == $sctpport)/
 {
 	sctpreceive++;
 }
 
 sctp:::state-change
 {
 	state_event[args[3]->sctps_state]++;
 }
 
 END
 {
 	printf("Minimum SCTP events seen\n\n");
 	printf("ip:::send - %s\n", ipsend >= 7 ? "yes" : "no");
 	printf("ip:::receive - %s\n", ipreceive >= 7 ? "yes" : "no");
 	printf("sctp:::send - %s\n", sctpsend >= 7 ? "yes" : "no");
 	printf("sctp:::receive - %s\n", sctpreceive >= 7 ? "yes" : "no");
 	printf("sctp:::state-change to cookie-wait - %s\n",
 	    state_event[SCTP_STATE_COOKIE_WAIT] >=1 ? "yes" : "no");
 	printf("sctp:::state-change to cookie-echoed - %s\n",
 	    state_event[SCTP_STATE_COOKIE_ECHOED] >=1 ? "yes" : "no");
 	printf("sctp:::state-change to established - %s\n",
 	    state_event[SCTP_STATE_ESTABLISHED] >= 2 ? "yes" : "no");
 	printf("sctp:::state-change to shutdown-sent - %s\n",
 	    state_event[SCTP_STATE_SHUTDOWN_SENT] >= 1 ? "yes" : "no");
 	printf("sctp:::state-change to shutdown-received - %s\n",
 	    state_event[SCTP_STATE_SHUTDOWN_RECEIVED] >= 1 ? "yes" : "no");
 	printf("sctp:::state-change to shutdown-ack-sent - %s\n",
 	    state_event[SCTP_STATE_SHUTDOWN_ACK_SENT] >= 1 ? "yes" : "no");
 }
 EODTRACE
 
 status=$?
 
 cd /
 /bin/rm -rf $DIR
 
 exit $status
Index: projects/runtime-coverage-v2/cddl/contrib/opensolaris/cmd/dtrace/test/tst/common/ip/tst.localsctpstate.ksh.out
===================================================================
--- projects/runtime-coverage-v2/cddl/contrib/opensolaris/cmd/dtrace/test/tst/common/ip/tst.localsctpstate.ksh.out	(revision 346924)
+++ projects/runtime-coverage-v2/cddl/contrib/opensolaris/cmd/dtrace/test/tst/common/ip/tst.localsctpstate.ksh.out	(revision 346925)
@@ -1,12 +1,13 @@
 Minimum SCTP events seen
 
 ip:::send - yes
 ip:::receive - yes
 sctp:::send - yes
 sctp:::receive - yes
 sctp:::state-change to cookie-wait - yes
 sctp:::state-change to cookie-echoed - yes
 sctp:::state-change to established - yes
 sctp:::state-change to shutdown-sent - yes
 sctp:::state-change to shutdown-received - yes
 sctp:::state-change to shutdown-ack-sent - yes
+
Index: projects/runtime-coverage-v2/cddl/contrib/opensolaris
===================================================================
--- projects/runtime-coverage-v2/cddl/contrib/opensolaris	(revision 346924)
+++ projects/runtime-coverage-v2/cddl/contrib/opensolaris	(revision 346925)

Property changes on: projects/runtime-coverage-v2/cddl/contrib/opensolaris
___________________________________________________________________
Modified: svn:mergeinfo
## -0,0 +0,1 ##
   Merged /head/cddl/contrib/opensolaris:r345824-346924
Index: projects/runtime-coverage-v2/cddl
===================================================================
--- projects/runtime-coverage-v2/cddl	(revision 346924)
+++ projects/runtime-coverage-v2/cddl	(revision 346925)

Property changes on: projects/runtime-coverage-v2/cddl
___________________________________________________________________
Modified: svn:mergeinfo
## -0,0 +0,1 ##
   Merged /head/cddl:r346493-346924
Index: projects/runtime-coverage-v2/lib/libvgl/main.c
===================================================================
--- projects/runtime-coverage-v2/lib/libvgl/main.c	(revision 346924)
+++ projects/runtime-coverage-v2/lib/libvgl/main.c	(revision 346925)
@@ -1,531 +1,531 @@
 /*-
  * SPDX-License-Identifier: BSD-3-Clause
  *
  * Copyright (c) 1991-1997 Søren Schmidt
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer
  *    in this position and unchanged.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 3. The name of the author may not be used to endorse or promote products
  *    derived from this software without specific prior written permission
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include <signal.h>
 #include <stdio.h>
 #include <sys/types.h>
 #include <sys/file.h>
 #include <sys/ioctl.h>
 #include <sys/mman.h>
 #include <sys/fbio.h>
 #include <sys/kbio.h>
 #include <sys/consio.h>
 #include "vgl.h"
 
 #define min(x, y)	(((x) < (y)) ? (x) : (y))
 #define max(x, y)	(((x) > (y)) ? (x) : (y))
 
 VGLBitmap *VGLDisplay;
 VGLBitmap VGLVDisplay;
 video_info_t VGLModeInfo;
 video_adapter_info_t VGLAdpInfo;
 byte *VGLBuf;
 
 static int VGLMode;
 static int VGLOldMode;
 static size_t VGLBufSize;
 static byte *VGLMem = MAP_FAILED;
 static int VGLSwitchPending;
 static int VGLAbortPending;
 static int VGLOnDisplay;
 static unsigned int VGLCurWindow;
 static int VGLInitDone = 0;
 static video_info_t VGLOldModeInfo;
 static vid_info_t VGLOldVInfo;
 static int VGLOldVXsize;
 
 void
 VGLEnd()
 {
 struct vt_mode smode;
   int size[3];
 
   if (!VGLInitDone)
     return;
   VGLInitDone = 0;
   signal(SIGUSR1, SIG_IGN);
   signal(SIGUSR2, SIG_IGN);
   VGLSwitchPending = 0;
   VGLAbortPending = 0;
-  VGLMousePointerHide();
+  VGLMouseMode(VGL_MOUSEHIDE);
 
   if (VGLMem != MAP_FAILED) {
     VGLClear(VGLDisplay, 0);
     munmap(VGLMem, VGLAdpInfo.va_window_size);
   }
 
   ioctl(0, FBIO_SETLINEWIDTH, &VGLOldVXsize);
 
   if (VGLOldMode >= M_VESA_BASE)
     ioctl(0, _IO('V', VGLOldMode - M_VESA_BASE), 0);
   else
     ioctl(0, _IO('S', VGLOldMode), 0);
   if (VGLOldModeInfo.vi_flags & V_INFO_GRAPHICS) {
     size[0] = VGLOldVInfo.mv_csz;
     size[1] = VGLOldVInfo.mv_rsz;
     size[2] = VGLOldVInfo.font_size;;
     ioctl(0, KDRASTER, size);
   }
   if (VGLModeInfo.vi_mem_model != V_INFO_MM_DIRECT)
     ioctl(0, KDDISABIO, 0);
   ioctl(0, KDSETMODE, KD_TEXT);
   smode.mode = VT_AUTO;
   ioctl(0, VT_SETMODE, &smode);
   if (VGLBuf)
     free(VGLBuf);
   VGLBuf = NULL;
   free(VGLDisplay);
   VGLDisplay = NULL;
   VGLKeyboardEnd();
 }
 
 static void 
 VGLAbort(int arg)
 {
   sigset_t mask;
 
   VGLAbortPending = 1;
   signal(SIGINT, SIG_IGN);
   signal(SIGTERM, SIG_IGN);
   signal(SIGUSR2, SIG_IGN);
   if (arg == SIGBUS || arg == SIGSEGV) {
     signal(arg, SIG_DFL);
     sigemptyset(&mask);
     sigaddset(&mask, arg);
     sigprocmask(SIG_UNBLOCK, &mask, NULL);
     VGLEnd();
     kill(getpid(), arg);
   }
 }
 
 static void
 VGLSwitch(int arg __unused)
 {
   if (!VGLOnDisplay)
     VGLOnDisplay = 1;
   else
     VGLOnDisplay = 0;
   VGLSwitchPending = 1;
   signal(SIGUSR1, VGLSwitch);
 }
 
 int
 VGLInit(int mode)
 {
   struct vt_mode smode;
   int adptype, depth;
 
   if (VGLInitDone)
     return -1;
 
   signal(SIGUSR1, VGLSwitch);
   signal(SIGINT, VGLAbort);
   signal(SIGTERM, VGLAbort);
   signal(SIGSEGV, VGLAbort);
   signal(SIGBUS, VGLAbort);
   signal(SIGUSR2, SIG_IGN);
 
   VGLOnDisplay = 1;
   VGLSwitchPending = 0;
   VGLAbortPending = 0;
 
   if (ioctl(0, CONS_GET, &VGLOldMode) || ioctl(0, CONS_CURRENT, &adptype))
     return -1;
   if (IOCGROUP(mode) == 'V')	/* XXX: this is ugly */
     VGLModeInfo.vi_mode = (mode & 0x0ff) + M_VESA_BASE;
   else
     VGLModeInfo.vi_mode = mode & 0x0ff;
   if (ioctl(0, CONS_MODEINFO, &VGLModeInfo))	/* FBIO_MODEINFO */
     return -1;
 
   /* Save info for old mode to restore font size if old mode is graphics. */
   VGLOldModeInfo.vi_mode = VGLOldMode;
   if (ioctl(0, CONS_MODEINFO, &VGLOldModeInfo))
     return -1;
   VGLOldVInfo.size = sizeof(VGLOldVInfo);
   if (ioctl(0, CONS_GETINFO, &VGLOldVInfo))
     return -1;
 
   VGLDisplay = (VGLBitmap *)malloc(sizeof(VGLBitmap));
   if (VGLDisplay == NULL)
     return -2;
 
   if (VGLModeInfo.vi_mem_model != V_INFO_MM_DIRECT && ioctl(0, KDENABIO, 0)) {
     free(VGLDisplay);
     return -3;
   }
 
   VGLInitDone = 1;
 
   /*
    * vi_mem_model specifies the memory model of the current video mode
    * in -CURRENT.
    */
   switch (VGLModeInfo.vi_mem_model) {
   case V_INFO_MM_PLANAR:
     /* we can handle EGA/VGA planner modes only */
     if (VGLModeInfo.vi_depth != 4 || VGLModeInfo.vi_planes != 4
 	|| (adptype != KD_EGA && adptype != KD_VGA)) {
       VGLEnd();
       return -4;
     }
     VGLDisplay->Type = VIDBUF4;
     VGLDisplay->PixelBytes = 1;
     break;
   case V_INFO_MM_PACKED:
     /* we can do only 256 color packed modes */
     if (VGLModeInfo.vi_depth != 8) {
       VGLEnd();
       return -4;
     }
     VGLDisplay->Type = VIDBUF8;
     VGLDisplay->PixelBytes = 1;
     break;
   case V_INFO_MM_VGAX:
     VGLDisplay->Type = VIDBUF8X;
     VGLDisplay->PixelBytes = 1;
     break;
   case V_INFO_MM_DIRECT:
     VGLDisplay->PixelBytes = VGLModeInfo.vi_pixel_size;
     switch (VGLDisplay->PixelBytes) {
     case 2:
       VGLDisplay->Type = VIDBUF16;
       break;
     case 3:
       VGLDisplay->Type = VIDBUF24;
       break;
     case 4:
       VGLDisplay->Type = VIDBUF32;
       break;
     default:
       VGLEnd();
       return -4;
     }
     break;
   default:
     VGLEnd();
     return -4;
   }
 
   ioctl(0, VT_WAITACTIVE, 0);
   ioctl(0, KDSETMODE, KD_GRAPHICS);
   if (ioctl(0, mode, 0)) {
     VGLEnd();
     return -5;
   }
   if (ioctl(0, CONS_ADPINFO, &VGLAdpInfo)) {	/* FBIO_ADPINFO */
     VGLEnd();
     return -6;
   }
 
   /*
    * Calculate the shadow screen buffer size.  In -CURRENT, va_buffer_size
    * always holds the entire frame buffer size, wheather it's in the linear
    * mode or windowed mode.  
    *     VGLBufSize = VGLAdpInfo.va_buffer_size;
    * In -STABLE, va_buffer_size holds the frame buffer size, only if
    * the linear frame buffer mode is supported. Otherwise the field is zero.
    * We shall calculate the minimal size in this case:
    *     VGLAdpInfo.va_line_width*VGLModeInfo.vi_height*VGLModeInfo.vi_planes
    * or
    *     VGLAdpInfo.va_window_size*VGLModeInfo.vi_planes;
    * Use whichever is larger.
    */
   if (VGLAdpInfo.va_buffer_size != 0)
     VGLBufSize = VGLAdpInfo.va_buffer_size;
   else
     VGLBufSize = max(VGLAdpInfo.va_line_width*VGLModeInfo.vi_height,
 		     VGLAdpInfo.va_window_size)*VGLModeInfo.vi_planes;
   /*
    * The above is for old -CURRENT.  Current -CURRENT since r203535 and/or
    * r248799 restricts va_buffer_size to the displayed size in VESA modes to
    * avoid wasting kva for mapping unused parts of the frame buffer.  But all
    * parts were usable here.  Applying the same restriction to user mappings
    * makes our virtualization useless and breaks our panning, but large frame
    * buffers are also difficult for us to manage (clearing and switching may
    * be too slow, and malloc() may fail).  Restrict ourselves similarly to
    * get the same efficiency and bugs for all kernels.
    */
   if (VGLModeInfo.vi_mode >= M_VESA_BASE)
     VGLBufSize = VGLAdpInfo.va_line_width*VGLModeInfo.vi_height*
                  VGLModeInfo.vi_planes;
   VGLBuf = malloc(VGLBufSize);
   if (VGLBuf == NULL) {
     VGLEnd();
     return -7;
   }
 
 #ifdef LIBVGL_DEBUG
   fprintf(stderr, "VGLBufSize:0x%x\n", VGLBufSize);
 #endif
 
   /* see if we are in the windowed buffer mode or in the linear buffer mode */
   if (VGLBufSize/VGLModeInfo.vi_planes > VGLAdpInfo.va_window_size) {
     switch (VGLDisplay->Type) {
     case VIDBUF4:
       VGLDisplay->Type = VIDBUF4S;
       break;
     case VIDBUF8:
       VGLDisplay->Type = VIDBUF8S;
       break;
     case VIDBUF16:
       VGLDisplay->Type = VIDBUF16S;
       break;
     case VIDBUF24:
       VGLDisplay->Type = VIDBUF24S;
       break;
     case VIDBUF32:
       VGLDisplay->Type = VIDBUF32S;
       break;
     default:
       VGLEnd();
       return -8;
     }
   }
 
   VGLMode = mode;
   VGLCurWindow = 0;
 
   VGLDisplay->Xsize = VGLModeInfo.vi_width;
   VGLDisplay->Ysize = VGLModeInfo.vi_height;
   depth = VGLModeInfo.vi_depth;
   if (depth == 15)
     depth = 16;
   VGLOldVXsize =
   VGLDisplay->VXsize = VGLAdpInfo.va_line_width
 			   *8/(depth/VGLModeInfo.vi_planes);
   VGLDisplay->VYsize = VGLBufSize/VGLModeInfo.vi_planes/VGLAdpInfo.va_line_width;
   VGLDisplay->Xorigin = 0;
   VGLDisplay->Yorigin = 0;
 
   VGLMem = (byte*)mmap(0, VGLAdpInfo.va_window_size, PROT_READ|PROT_WRITE,
 		       MAP_FILE | MAP_SHARED, 0, 0);
   if (VGLMem == MAP_FAILED) {
     VGLEnd();
     return -7;
   }
   VGLDisplay->Bitmap = VGLMem;
 
   VGLVDisplay = *VGLDisplay;
   VGLVDisplay.Type = MEMBUF;
   if (VGLModeInfo.vi_depth < 8)
     VGLVDisplay.Bitmap = malloc(2 * VGLBufSize);
   else
     VGLVDisplay.Bitmap = VGLBuf;
 
   VGLSavePalette();
 
 #ifdef LIBVGL_DEBUG
   fprintf(stderr, "va_line_width:%d\n", VGLAdpInfo.va_line_width);
   fprintf(stderr, "VGLXsize:%d, Ysize:%d, VXsize:%d, VYsize:%d\n",
 	  VGLDisplay->Xsize, VGLDisplay->Ysize, 
 	  VGLDisplay->VXsize, VGLDisplay->VYsize);
 #endif
 
   smode.mode = VT_PROCESS;
   smode.waitv = 0;
   smode.relsig = SIGUSR1;
   smode.acqsig = SIGUSR1;
   smode.frsig  = SIGINT;	
   if (ioctl(0, VT_SETMODE, &smode)) {
     VGLEnd();
     return -9;
   }
   VGLTextSetFontFile((byte*)0);
   VGLClear(VGLDisplay, 0);
   return 0;
 }
 
 void
 VGLCheckSwitch()
 {
   if (VGLAbortPending) {
     VGLEnd();
     exit(0);
   }
   while (VGLSwitchPending) {
     VGLSwitchPending = 0;
     if (VGLOnDisplay) {
       if (VGLModeInfo.vi_mem_model != V_INFO_MM_DIRECT)
         ioctl(0, KDENABIO, 0);
       ioctl(0, KDSETMODE, KD_GRAPHICS);
       ioctl(0, VGLMode, 0);
       VGLCurWindow = 0;
       VGLMem = (byte*)mmap(0, VGLAdpInfo.va_window_size, PROT_READ|PROT_WRITE,
 			   MAP_FILE | MAP_SHARED, 0, 0);
 
       /* XXX: what if mmap() has failed! */
       VGLDisplay->Type = VIDBUF8;	/* XXX */
       switch (VGLModeInfo.vi_mem_model) {
       case V_INFO_MM_PLANAR:
 	if (VGLModeInfo.vi_depth == 4 && VGLModeInfo.vi_planes == 4) {
 	  if (VGLBufSize/VGLModeInfo.vi_planes > VGLAdpInfo.va_window_size)
 	    VGLDisplay->Type = VIDBUF4S;
 	  else
 	    VGLDisplay->Type = VIDBUF4;
 	} else {
 	  /* shouldn't be happening */
 	}
         break;
       case V_INFO_MM_PACKED:
 	if (VGLModeInfo.vi_depth == 8) {
 	  if (VGLBufSize/VGLModeInfo.vi_planes > VGLAdpInfo.va_window_size)
 	    VGLDisplay->Type = VIDBUF8S;
 	  else
 	    VGLDisplay->Type = VIDBUF8;
 	}
         break;
       case V_INFO_MM_VGAX:
 	VGLDisplay->Type = VIDBUF8X;
 	break;
       case V_INFO_MM_DIRECT:
 	switch (VGLModeInfo.vi_pixel_size) {
 	  case 2:
 	    if (VGLBufSize/VGLModeInfo.vi_planes > VGLAdpInfo.va_window_size)
 	      VGLDisplay->Type = VIDBUF16S;
 	    else
 	      VGLDisplay->Type = VIDBUF16;
 	    break;
 	  case 3:
 	    if (VGLBufSize/VGLModeInfo.vi_planes > VGLAdpInfo.va_window_size)
 	      VGLDisplay->Type = VIDBUF24S;
 	    else
 	      VGLDisplay->Type = VIDBUF24;
 	    break;
 	  case 4:
 	    if (VGLBufSize/VGLModeInfo.vi_planes > VGLAdpInfo.va_window_size)
 	      VGLDisplay->Type = VIDBUF32S;
 	    else
 	      VGLDisplay->Type = VIDBUF32;
 	    break;
 	  default:
 	  /* shouldn't be happening */
           break;
         }
       default:
 	/* shouldn't be happening */
         break;
       }
 
       VGLDisplay->Bitmap = VGLMem;
       VGLDisplay->Xsize = VGLModeInfo.vi_width;
       VGLDisplay->Ysize = VGLModeInfo.vi_height;
       VGLSetVScreenSize(VGLDisplay, VGLDisplay->VXsize, VGLDisplay->VYsize);
       VGLRestoreBlank();
       VGLRestoreBorder();
       VGLMouseRestore();
       VGLPanScreen(VGLDisplay, VGLDisplay->Xorigin, VGLDisplay->Yorigin);
       VGLBitmapCopy(&VGLVDisplay, 0, 0, VGLDisplay, 0, 0, 
                     VGLDisplay->VXsize, VGLDisplay->VYsize);
       VGLRestorePalette();
       ioctl(0, VT_RELDISP, VT_ACKACQ);
     }
     else {
       VGLMem = MAP_FAILED;
       munmap(VGLDisplay->Bitmap, VGLAdpInfo.va_window_size);
       ioctl(0, VGLOldMode, 0);
       ioctl(0, KDSETMODE, KD_TEXT);
       if (VGLModeInfo.vi_mem_model != V_INFO_MM_DIRECT)
         ioctl(0, KDDISABIO, 0);
       ioctl(0, VT_RELDISP, VT_TRUE);
       VGLDisplay->Bitmap = VGLBuf;
       VGLDisplay->Type = MEMBUF;
       VGLDisplay->Xsize = VGLDisplay->VXsize;
       VGLDisplay->Ysize = VGLDisplay->VYsize;
       while (!VGLOnDisplay) pause();
     }
   }
 }
 
 int
 VGLSetSegment(unsigned int offset)
 {
   if (offset/VGLAdpInfo.va_window_size != VGLCurWindow) {
     ioctl(0, CONS_SETWINORG, offset);		/* FBIO_SETWINORG */
     VGLCurWindow = offset/VGLAdpInfo.va_window_size;
   }
   return (offset%VGLAdpInfo.va_window_size);
 }
 
 int
 VGLSetVScreenSize(VGLBitmap *object, int VXsize, int VYsize)
 {
   int depth;
 
   if (VXsize < object->Xsize || VYsize < object->Ysize)
     return -1;
   if (object->Type == MEMBUF)
     return -1;
   if (ioctl(0, FBIO_SETLINEWIDTH, &VXsize))
     return -1;
   ioctl(0, CONS_ADPINFO, &VGLAdpInfo);	/* FBIO_ADPINFO */
   depth = VGLModeInfo.vi_depth;
   if (depth == 15)
     depth = 16;
   object->VXsize = VGLAdpInfo.va_line_width
 			   *8/(depth/VGLModeInfo.vi_planes);
   object->VYsize = VGLBufSize/VGLModeInfo.vi_planes/VGLAdpInfo.va_line_width;
   if (VYsize < object->VYsize)
     object->VYsize = VYsize;
 
 #ifdef LIBVGL_DEBUG
   fprintf(stderr, "new size: VGLXsize:%d, Ysize:%d, VXsize:%d, VYsize:%d\n",
 	  object->Xsize, object->Ysize, object->VXsize, object->VYsize);
 #endif
 
   return 0;
 }
 
 int
 VGLPanScreen(VGLBitmap *object, int x, int y)
 {
   video_display_start_t origin;
 
   if (x < 0 || x + object->Xsize > object->VXsize
       || y < 0 || y + object->Ysize > object->VYsize)
     return -1;
   if (object->Type == MEMBUF)
     return 0;
   origin.x = x;
   origin.y = y;
   if (ioctl(0, FBIO_SETDISPSTART, &origin))
     return -1;
   object->Xorigin = x;
   object->Yorigin = y;
 
 #ifdef LIBVGL_DEBUG
   fprintf(stderr, "new origin: (%d, %d)\n", x, y);
 #endif
 
   return 0;
 }
Index: projects/runtime-coverage-v2/lib/libvgl/mouse.c
===================================================================
--- projects/runtime-coverage-v2/lib/libvgl/mouse.c	(revision 346924)
+++ projects/runtime-coverage-v2/lib/libvgl/mouse.c	(revision 346925)
@@ -1,341 +1,426 @@
 /*-
  * SPDX-License-Identifier: BSD-3-Clause
  *
  * Copyright (c) 1991-1997 Søren Schmidt
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer
  *    in this position and unchanged.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 3. The name of the author may not be used to endorse or promote products
  *    derived from this software without specific prior written permission
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include <stdio.h>
 #include <sys/types.h>
 #include <sys/ioctl.h>
 #include <sys/signal.h>
 #include <sys/consio.h>
 #include <sys/fbio.h>
 #include "vgl.h"
 
+static void VGLMouseAction(int dummy);
+
 #define BORDER	0xff	/* default border -- light white in rgb 3:3:2 */
 #define INTERIOR 0xa0	/* default interior -- red in rgb 3:3:2 */
+#define LARGE_MOUSE_IMG_XSIZE	19
+#define LARGE_MOUSE_IMG_YSIZE	32
+#define SMALL_MOUSE_IMG_XSIZE	10
+#define SMALL_MOUSE_IMG_YSIZE	16
 #define X	0xff	/* any nonzero in And mask means part of cursor */
 #define B	BORDER
 #define I	INTERIOR
-static byte StdAndMask[MOUSE_IMG_SIZE*MOUSE_IMG_SIZE] = {
-	X,X,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
-	X,X,X,0,0,0,0,0,0,0,0,0,0,0,0,0,
-	X,X,X,X,0,0,0,0,0,0,0,0,0,0,0,0,
-	X,X,X,X,X,0,0,0,0,0,0,0,0,0,0,0,
-	X,X,X,X,X,X,0,0,0,0,0,0,0,0,0,0,
-	X,X,X,X,X,X,X,0,0,0,0,0,0,0,0,0,
-	X,X,X,X,X,X,X,X,0,0,0,0,0,0,0,0,
-	X,X,X,X,X,X,X,X,X,0,0,0,0,0,0,0,
-	X,X,X,X,X,X,X,X,X,X,0,0,0,0,0,0,
-	X,X,X,X,X,X,X,X,X,X,0,0,0,0,0,0,
-	X,X,X,X,X,X,X,0,0,0,0,0,0,0,0,0,
-	X,X,X,0,X,X,X,X,0,0,0,0,0,0,0,0,
-	X,X,0,0,X,X,X,X,0,0,0,0,0,0,0,0,
-	0,0,0,0,0,X,X,X,X,0,0,0,0,0,0,0,
-	0,0,0,0,0,X,X,X,X,0,0,0,0,0,0,0,
-	0,0,0,0,0,0,X,X,0,0,0,0,0,0,0,0,
+static byte LargeAndMask[] = {
+  X,X,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+  X,X,X,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+  X,X,X,X,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+  X,X,X,X,X,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+  X,X,X,X,X,X,0,0,0,0,0,0,0,0,0,0,0,0,0,
+  X,X,X,X,X,X,X,0,0,0,0,0,0,0,0,0,0,0,0,
+  X,X,X,X,X,X,X,X,0,0,0,0,0,0,0,0,0,0,0,
+  X,X,X,X,X,X,X,X,X,0,0,0,0,0,0,0,0,0,0,
+  X,X,X,X,X,X,X,X,X,X,0,0,0,0,0,0,0,0,0,
+  X,X,X,X,X,X,X,X,X,X,X,0,0,0,0,0,0,0,0,
+  X,X,X,X,X,X,X,X,X,X,X,X,0,0,0,0,0,0,0,
+  X,X,X,X,X,X,X,X,X,X,X,X,X,0,0,0,0,0,0,
+  X,X,X,X,X,X,X,X,X,X,X,X,X,X,0,0,0,0,0,
+  X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,0,0,0,0,
+  X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,0,0,0,
+  X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,0,0,
+  X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,0,
+  X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,
+  X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,
+  X,X,X,X,X,X,X,X,X,X,X,X,0,0,0,0,0,0,0,
+  X,X,X,X,X,X,X,X,X,X,X,X,0,0,0,0,0,0,0,
+  X,X,X,X,X,X,0,X,X,X,X,X,X,0,0,0,0,0,0,
+  X,X,X,X,X,0,0,X,X,X,X,X,X,0,0,0,0,0,0,
+  X,X,X,X,0,0,0,0,X,X,X,X,X,X,0,0,0,0,0,
+  X,X,X,0,0,0,0,0,X,X,X,X,X,X,0,0,0,0,0,
+  X,X,0,0,0,0,0,0,0,X,X,X,X,X,X,0,0,0,0,
+  0,0,0,0,0,0,0,0,0,X,X,X,X,X,X,0,0,0,0,
+  0,0,0,0,0,0,0,0,0,0,X,X,X,X,X,X,0,0,0,
+  0,0,0,0,0,0,0,0,0,0,X,X,X,X,X,X,0,0,0,
+  0,0,0,0,0,0,0,0,0,0,0,X,X,X,X,X,X,0,0,
+  0,0,0,0,0,0,0,0,0,0,0,X,X,X,X,X,X,0,0,
+  0,0,0,0,0,0,0,0,0,0,0,0,X,X,X,X,0,0,0,
 };
-static byte StdOrMask[MOUSE_IMG_SIZE*MOUSE_IMG_SIZE] = {
-	B,B,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
-	B,I,B,0,0,0,0,0,0,0,0,0,0,0,0,0,
-	B,I,I,B,0,0,0,0,0,0,0,0,0,0,0,0,
-	B,I,I,I,B,0,0,0,0,0,0,0,0,0,0,0,
-	B,I,I,I,I,B,0,0,0,0,0,0,0,0,0,0,
-	B,I,I,I,I,I,B,0,0,0,0,0,0,0,0,0,
-	B,I,I,I,I,I,I,B,0,0,0,0,0,0,0,0,
-	B,I,I,I,I,I,I,I,B,0,0,0,0,0,0,0,
-	B,I,I,I,I,I,I,I,I,B,0,0,0,0,0,0,
-	B,I,I,I,I,I,B,B,B,B,0,0,0,0,0,0,
-	B,I,I,B,I,I,B,0,0,0,0,0,0,0,0,0,
-	B,I,B,0,B,I,I,B,0,0,0,0,0,0,0,0,
-	B,B,0,0,B,I,I,B,0,0,0,0,0,0,0,0,
-	0,0,0,0,0,B,I,I,B,0,0,0,0,0,0,0,
-	0,0,0,0,0,B,I,I,B,0,0,0,0,0,0,0,
-	0,0,0,0,0,0,B,B,0,0,0,0,0,0,0,0,
+static byte LargeOrMask[] = {
+  B,B,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+  B,I,B,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+  B,I,I,B,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+  B,I,I,I,B,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+  B,I,I,I,I,B,0,0,0,0,0,0,0,0,0,0,0,0,0,
+  B,I,I,I,I,I,B,0,0,0,0,0,0,0,0,0,0,0,0,
+  B,I,I,I,I,I,I,B,0,0,0,0,0,0,0,0,0,0,0,
+  B,I,I,I,I,I,I,I,B,0,0,0,0,0,0,0,0,0,0,
+  B,I,I,I,I,I,I,I,I,B,0,0,0,0,0,0,0,0,0,
+  B,I,I,I,I,I,I,I,I,I,B,0,0,0,0,0,0,0,0,
+  B,I,I,I,I,I,I,I,I,I,I,B,0,0,0,0,0,0,0,
+  B,I,I,I,I,I,I,I,I,I,I,I,B,0,0,0,0,0,0,
+  B,I,I,I,I,I,I,I,I,I,I,I,I,B,0,0,0,0,0,
+  B,I,I,I,I,I,I,I,I,I,I,I,I,I,B,0,0,0,0,
+  B,I,I,I,I,I,I,I,I,I,I,I,I,I,I,B,0,0,0,
+  B,I,I,I,I,I,I,I,I,I,I,I,I,I,I,I,B,0,0,
+  B,I,I,I,I,I,I,I,I,I,I,I,I,I,I,I,I,B,0,
+  B,I,I,I,I,I,I,I,I,I,I,I,I,I,I,I,I,I,B,
+  B,I,I,I,I,I,I,I,I,I,I,B,B,B,B,B,B,B,B,
+  B,I,I,I,I,I,I,I,I,I,I,B,0,0,0,0,0,0,0,
+  B,I,I,I,I,I,B,I,I,I,I,B,0,0,0,0,0,0,0,
+  B,I,I,I,I,B,0,B,I,I,I,I,B,0,0,0,0,0,0,
+  B,I,I,I,B,0,0,B,I,I,I,I,B,0,0,0,0,0,0,
+  B,I,I,B,0,0,0,0,B,I,I,I,I,B,0,0,0,0,0,
+  B,I,B,0,0,0,0,0,B,I,I,I,I,B,0,0,0,0,0,
+  B,B,0,0,0,0,0,0,0,B,I,I,I,I,B,0,0,0,0,
+  0,0,0,0,0,0,0,0,0,B,I,I,I,I,B,0,0,0,0,
+  0,0,0,0,0,0,0,0,0,0,B,I,I,I,I,B,0,0,0,
+  0,0,0,0,0,0,0,0,0,0,B,I,I,I,I,B,0,0,0,
+  0,0,0,0,0,0,0,0,0,0,0,B,I,I,I,I,B,0,0,
+  0,0,0,0,0,0,0,0,0,0,0,B,I,I,I,I,B,0,0,
+  0,0,0,0,0,0,0,0,0,0,0,0,B,B,B,B,0,0,0,
 };
+static byte SmallAndMask[] = {
+  X,X,0,0,0,0,0,0,0,0,
+  X,X,X,0,0,0,0,0,0,0,
+  X,X,X,X,0,0,0,0,0,0,
+  X,X,X,X,X,0,0,0,0,0,
+  X,X,X,X,X,X,0,0,0,0,
+  X,X,X,X,X,X,X,0,0,0,
+  X,X,X,X,X,X,X,X,0,0,
+  X,X,X,X,X,X,X,X,X,0,
+  X,X,X,X,X,X,X,X,X,X,
+  X,X,X,X,X,X,X,X,X,X,
+  X,X,X,X,X,X,X,0,0,0,
+  X,X,X,0,X,X,X,X,0,0,
+  X,X,0,0,X,X,X,X,0,0,
+  0,0,0,0,0,X,X,X,X,0,
+  0,0,0,0,0,X,X,X,X,0,
+  0,0,0,0,0,0,X,X,0,0,
+};
+static byte SmallOrMask[] = {
+  B,B,0,0,0,0,0,0,0,0,
+  B,I,B,0,0,0,0,0,0,0,
+  B,I,I,B,0,0,0,0,0,0,
+  B,I,I,I,B,0,0,0,0,0,
+  B,I,I,I,I,B,0,0,0,0,
+  B,I,I,I,I,I,B,0,0,0,
+  B,I,I,I,I,I,I,B,0,0,
+  B,I,I,I,I,I,I,I,B,0,
+  B,I,I,I,I,I,I,I,I,B,
+  B,I,I,I,I,I,B,B,B,B,
+  B,I,I,B,I,I,B,0,0,0,
+  B,I,B,0,B,I,I,B,0,0,
+  B,B,0,0,B,I,I,B,0,0,
+  0,0,0,0,0,B,I,I,B,0,
+  0,0,0,0,0,B,I,I,B,0,
+  0,0,0,0,0,0,B,B,0,0,
+};
 #undef X
 #undef B
 #undef I
-static VGLBitmap VGLMouseStdAndMask = 
-    VGLBITMAP_INITIALIZER(MEMBUF, MOUSE_IMG_SIZE, MOUSE_IMG_SIZE, StdAndMask);
-static VGLBitmap VGLMouseStdOrMask = 
-    VGLBITMAP_INITIALIZER(MEMBUF, MOUSE_IMG_SIZE, MOUSE_IMG_SIZE, StdOrMask);
+static VGLBitmap VGLMouseLargeAndMask = 
+  VGLBITMAP_INITIALIZER(MEMBUF, LARGE_MOUSE_IMG_XSIZE, LARGE_MOUSE_IMG_YSIZE,
+                        LargeAndMask);
+static VGLBitmap VGLMouseLargeOrMask = 
+  VGLBITMAP_INITIALIZER(MEMBUF, LARGE_MOUSE_IMG_XSIZE, LARGE_MOUSE_IMG_YSIZE,
+                        LargeOrMask);
+static VGLBitmap VGLMouseSmallAndMask = 
+  VGLBITMAP_INITIALIZER(MEMBUF, SMALL_MOUSE_IMG_XSIZE, SMALL_MOUSE_IMG_YSIZE,
+                        SmallAndMask);
+static VGLBitmap VGLMouseSmallOrMask = 
+  VGLBITMAP_INITIALIZER(MEMBUF, SMALL_MOUSE_IMG_XSIZE, SMALL_MOUSE_IMG_YSIZE,
+                        SmallOrMask);
 static VGLBitmap *VGLMouseAndMask, *VGLMouseOrMask;
-static int VGLMouseVisible = 0;
 static int VGLMouseShown = VGL_MOUSEHIDE;
 static int VGLMouseXpos = 0;
 static int VGLMouseYpos = 0;
 static int VGLMouseButtons = 0;
 static volatile sig_atomic_t VGLMintpending;
 static volatile sig_atomic_t VGLMsuppressint;
 
 #define	INTOFF()	(VGLMsuppressint++)
 #define	INTON()		do { 						\
 				if (--VGLMsuppressint == 0 && VGLMintpending) \
 					VGLMouseAction(0);		\
 			} while (0)
 
-void
-VGLMousePointerShow()
+int
+__VGLMouseMode(int mode)
 {
-  if (!VGLMouseVisible) {
-    INTOFF();
-    VGLMouseVisible = 1;
-    __VGLBitmapCopy(&VGLVDisplay, VGLMouseXpos, VGLMouseYpos, VGLDisplay, 
-		  VGLMouseXpos, VGLMouseYpos, MOUSE_IMG_SIZE, -MOUSE_IMG_SIZE);
-    INTON();
-  }
-}
+  int oldmode;
 
-void
-VGLMousePointerHide()
-{
-  if (VGLMouseVisible) {
-    INTOFF();
-    VGLMouseVisible = 0;
-    __VGLBitmapCopy(&VGLVDisplay, VGLMouseXpos, VGLMouseYpos, VGLDisplay, 
-                    VGLMouseXpos, VGLMouseYpos, MOUSE_IMG_SIZE, MOUSE_IMG_SIZE);
-    INTON();
-  }
-}
-
-void
-VGLMouseMode(int mode)
-{
+  INTOFF();
+  oldmode = VGLMouseShown;
   if (mode == VGL_MOUSESHOW) {
     if (VGLMouseShown == VGL_MOUSEHIDE) {
-      VGLMousePointerShow();
       VGLMouseShown = VGL_MOUSESHOW;
+      __VGLBitmapCopy(&VGLVDisplay, VGLMouseXpos, VGLMouseYpos,
+                      VGLDisplay, VGLMouseXpos, VGLMouseYpos,
+                      VGLMouseAndMask->VXsize, -VGLMouseAndMask->VYsize);
     }
   }
   else {
     if (VGLMouseShown == VGL_MOUSESHOW) {
-      VGLMousePointerHide();
       VGLMouseShown = VGL_MOUSEHIDE;
+      __VGLBitmapCopy(&VGLVDisplay, VGLMouseXpos, VGLMouseYpos,
+                      VGLDisplay, VGLMouseXpos, VGLMouseYpos,
+                      VGLMouseAndMask->VXsize, VGLMouseAndMask->VYsize);
     }
   }
+  INTON();
+  return oldmode;
 }
 
 void
+VGLMouseMode(int mode)
+{
+  __VGLMouseMode(mode);
+}
+
+static void
 VGLMouseAction(int dummy)	
 {
   struct mouse_info mouseinfo;
+  int mousemode;
 
   if (VGLMsuppressint) {
     VGLMintpending = 1;
     return;
   }
 again:
   INTOFF();
   VGLMintpending = 0;
   mouseinfo.operation = MOUSE_GETINFO;
   ioctl(0, CONS_MOUSECTL, &mouseinfo);
-  if (VGLMouseShown == VGL_MOUSESHOW)
-    VGLMousePointerHide();
-  VGLMouseXpos = mouseinfo.u.data.x;
-  VGLMouseYpos = mouseinfo.u.data.y;
+  if (VGLMouseXpos != mouseinfo.u.data.x ||
+      VGLMouseYpos != mouseinfo.u.data.y) {
+    mousemode = __VGLMouseMode(VGL_MOUSEHIDE);
+    VGLMouseXpos = mouseinfo.u.data.x;
+    VGLMouseYpos = mouseinfo.u.data.y;
+    __VGLMouseMode(mousemode);
+  }
   VGLMouseButtons = mouseinfo.u.data.buttons;
-  if (VGLMouseShown == VGL_MOUSESHOW)
-    VGLMousePointerShow();
 
   /* 
    * Loop to handle any new (suppressed) signals.  This is INTON() without
    * recursion.  !SA_RESTART prevents recursion in signal handling.  So the
    * maximum recursion is 2 levels.
    */
   VGLMsuppressint = 0;
   if (VGLMintpending)
     goto again;
 }
 
 void
 VGLMouseSetImage(VGLBitmap *AndMask, VGLBitmap *OrMask)
 {
-  if (VGLMouseShown == VGL_MOUSESHOW)
-    VGLMousePointerHide();
+  int mousemode;
 
+  mousemode = __VGLMouseMode(VGL_MOUSEHIDE);
+
   VGLMouseAndMask = AndMask;
 
   if (VGLMouseOrMask != NULL) {
     free(VGLMouseOrMask->Bitmap);
     free(VGLMouseOrMask);
   }
   VGLMouseOrMask = VGLBitmapCreate(MEMBUF, OrMask->VXsize, OrMask->VYsize, 0);
   VGLBitmapAllocateBits(VGLMouseOrMask);
   VGLBitmapCvt(OrMask, VGLMouseOrMask);
 
-  if (VGLMouseShown == VGL_MOUSESHOW)
-    VGLMousePointerShow();
+  __VGLMouseMode(mousemode);
 }
 
 void
 VGLMouseSetStdImage()
 {
-  VGLMouseSetImage(&VGLMouseStdAndMask, &VGLMouseStdOrMask);
+  if (VGLDisplay->VXsize > 800)
+    VGLMouseSetImage(&VGLMouseLargeAndMask, &VGLMouseLargeOrMask);
+  else
+    VGLMouseSetImage(&VGLMouseSmallAndMask, &VGLMouseSmallOrMask);
 }
 
 int
 VGLMouseInit(int mode)
 {
   struct mouse_info mouseinfo;
+  VGLBitmap *ormask;
   int andmask, border, error, i, interior;
 
   switch (VGLModeInfo.vi_mem_model) {
   case V_INFO_MM_PACKED:
   case V_INFO_MM_PLANAR:
     andmask = 0x0f;
     border = 0x0f;
     interior = 0x04;
     break;
   case V_INFO_MM_VGAX:
     andmask = 0x3f;
     border = 0x3f;
     interior = 0x24;
     break;
   default:
     andmask = 0xff;
     border = BORDER;
     interior = INTERIOR;
     break;
   }
   if (VGLModeInfo.vi_mode == M_BG640x480)
     border = 0;		/* XXX (palette makes 0x04 look like 0x0f) */
   if (getenv("VGLMOUSEBORDERCOLOR") != NULL)
     border = strtoul(getenv("VGLMOUSEBORDERCOLOR"), NULL, 0);
   if (getenv("VGLMOUSEINTERIORCOLOR") != NULL)
     interior = strtoul(getenv("VGLMOUSEINTERIORCOLOR"), NULL, 0);
-  for (i = 0; i < MOUSE_IMG_SIZE*MOUSE_IMG_SIZE; i++)
-    VGLMouseStdOrMask.Bitmap[i] = VGLMouseStdOrMask.Bitmap[i] == BORDER ?
-      border : VGLMouseStdOrMask.Bitmap[i] == INTERIOR ? interior : 0;
+  ormask = &VGLMouseLargeOrMask;
+  for (i = 0; i < ormask->VXsize * ormask->VYsize; i++)
+    ormask->Bitmap[i] = ormask->Bitmap[i] == BORDER ?  border :
+                        ormask->Bitmap[i] == INTERIOR ? interior : 0;
+  ormask = &VGLMouseSmallOrMask;
+  for (i = 0; i < ormask->VXsize * ormask->VYsize; i++)
+    ormask->Bitmap[i] = ormask->Bitmap[i] == BORDER ?  border :
+                        ormask->Bitmap[i] == INTERIOR ? interior : 0;
   VGLMouseSetStdImage();
   mouseinfo.operation = MOUSE_MODE;
   mouseinfo.u.mode.signal = SIGUSR2;
   if ((error = ioctl(0, CONS_MOUSECTL, &mouseinfo)))
     return error;
   signal(SIGUSR2, VGLMouseAction);
   mouseinfo.operation = MOUSE_GETINFO;
   ioctl(0, CONS_MOUSECTL, &mouseinfo);
   VGLMouseXpos = mouseinfo.u.data.x;
   VGLMouseYpos = mouseinfo.u.data.y;
   VGLMouseButtons = mouseinfo.u.data.buttons;
   VGLMouseMode(mode);
   return 0;
 }
 
 void
 VGLMouseRestore(void)
 {
   struct mouse_info mouseinfo;
 
   INTOFF();
   mouseinfo.operation = MOUSE_GETINFO;
   if (ioctl(0, CONS_MOUSECTL, &mouseinfo) == 0) {
     mouseinfo.operation = MOUSE_MOVEABS;
     mouseinfo.u.data.x = VGLMouseXpos;
     mouseinfo.u.data.y = VGLMouseYpos;
     ioctl(0, CONS_MOUSECTL, &mouseinfo);
   }
   INTON();
 }
 
 int
 VGLMouseStatus(int *x, int *y, char *buttons)
 {
   INTOFF();
   *x =  VGLMouseXpos;
   *y =  VGLMouseYpos;
   *buttons =  VGLMouseButtons;
   INTON();
   return VGLMouseShown;
 }
 
 void
 VGLMouseFreeze(void)
 {
   INTOFF();
 }
 
 int
 VGLMouseFreezeXY(int x, int y)
 {
   INTOFF();
   if (VGLMouseShown != VGL_MOUSESHOW)
     return 0;
-  if (x >= VGLMouseXpos && x < VGLMouseXpos + MOUSE_IMG_SIZE &&
-      y >= VGLMouseYpos && y < VGLMouseYpos + MOUSE_IMG_SIZE &&
-      VGLMouseAndMask->Bitmap[(y-VGLMouseYpos)*MOUSE_IMG_SIZE+(x-VGLMouseXpos)])
+  if (x >= VGLMouseXpos && x < VGLMouseXpos + VGLMouseAndMask->VXsize &&
+      y >= VGLMouseYpos && y < VGLMouseYpos + VGLMouseAndMask->VYsize &&
+      VGLMouseAndMask->Bitmap[(y-VGLMouseYpos)*VGLMouseAndMask->VXsize+
+                              (x-VGLMouseXpos)])
     return 1;
   return 0;
 }
 
 int
 VGLMouseOverlap(int x, int y, int width, int hight)
 {
   int overlap;
 
   if (VGLMouseShown != VGL_MOUSESHOW)
     return 0;
   if (x > VGLMouseXpos)
-    overlap = (VGLMouseXpos + MOUSE_IMG_SIZE) - x;
+    overlap = (VGLMouseXpos + VGLMouseAndMask->VXsize) - x;
   else
     overlap = (x + width) - VGLMouseXpos;
   if (overlap <= 0)
     return 0;
   if (y > VGLMouseYpos)
-    overlap = (VGLMouseYpos + MOUSE_IMG_SIZE) - y;
+    overlap = (VGLMouseYpos + VGLMouseAndMask->VYsize) - y;
   else
     overlap = (y + hight) - VGLMouseYpos;
   return overlap > 0;
 }
 
 void
 VGLMouseMerge(int x, int y, int width, byte *line)
 {
   int pos, x1, xend, xstart;
 
   xstart = x;
   if (xstart < VGLMouseXpos)
     xstart = VGLMouseXpos;
   xend = x + width;
-  if (xend > VGLMouseXpos + MOUSE_IMG_SIZE)
-    xend = VGLMouseXpos + MOUSE_IMG_SIZE;
+  if (xend > VGLMouseXpos + VGLMouseAndMask->VXsize)
+    xend = VGLMouseXpos + VGLMouseAndMask->VXsize;
   for (x1 = xstart; x1 < xend; x1++) {
-    pos = (y - VGLMouseYpos) * MOUSE_IMG_SIZE + x1 - VGLMouseXpos;
+    pos = (y - VGLMouseYpos) * VGLMouseAndMask->VXsize + x1 - VGLMouseXpos;
     if (VGLMouseAndMask->Bitmap[pos])
       bcopy(&VGLMouseOrMask->Bitmap[pos * VGLDisplay->PixelBytes],
             &line[(x1 - x) * VGLDisplay->PixelBytes], VGLDisplay->PixelBytes);
   }
 }
 
 void
 VGLMouseUnFreeze()
 {
   INTON();
 }
Index: projects/runtime-coverage-v2/lib/libvgl/simple.c
===================================================================
--- projects/runtime-coverage-v2/lib/libvgl/simple.c	(revision 346924)
+++ projects/runtime-coverage-v2/lib/libvgl/simple.c	(revision 346925)
@@ -1,690 +1,688 @@
 /*-
  * SPDX-License-Identifier: BSD-3-Clause
  *
  * Copyright (c) 1991-1997 Søren Schmidt
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer
  *    in this position and unchanged.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 3. The name of the author may not be used to endorse or promote products
  *    derived from this software without specific prior written permission
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include <signal.h>
 #include <sys/fbio.h>
 #include <sys/kbio.h>
 #include <sys/endian.h>
 #include "vgl.h"
 
 static int VGLBlank;
 static byte VGLBorderColor;
 static byte VGLSavePaletteRed[256];
 static byte VGLSavePaletteGreen[256];
 static byte VGLSavePaletteBlue[256];
 
 #define ABS(a)		(((a)<0) ? -(a) : (a))
 #define SGN(a)		(((a)<0) ? -1 : 1)
 #define min(x, y)	(((x) < (y)) ? (x) : (y))
 #define max(x, y)	(((x) > (y)) ? (x) : (y))
 
 void
 VGLSetXY(VGLBitmap *object, int x, int y, u_long color)
 {
   int offset, soffset, undermouse;
 
   VGLCheckSwitch();
   if (x>=0 && x<object->VXsize && y>=0 && y<object->VYsize) {
     if (object == VGLDisplay) {
       undermouse = VGLMouseFreezeXY(x, y);
       VGLSetXY(&VGLVDisplay, x, y, color);
     } else if (object->Type != MEMBUF)
       return;		/* invalid */
     else
       undermouse = 0;
     if (!undermouse) {
       offset = (y * object->VXsize + x) * object->PixelBytes;
       switch (object->Type) {
       case VIDBUF8S:
       case VIDBUF16S:
       case VIDBUF32S:
         offset = VGLSetSegment(offset);
         /* FALLTHROUGH */
       case MEMBUF:
       case VIDBUF8:
       case VIDBUF16:
       case VIDBUF24:
       case VIDBUF32:
         color = htole32(color);
         switch (object->PixelBytes) {
         case 1:
           memcpy(&object->Bitmap[offset], &color, 1);
           break;
         case 2:
           memcpy(&object->Bitmap[offset], &color, 2);
           break;
         case 3:
           memcpy(&object->Bitmap[offset], &color, 3);
           break;
         case 4:
           memcpy(&object->Bitmap[offset], &color, 4);
           break;
         }
         break;
       case VIDBUF24S:
         soffset = VGLSetSegment(offset);
         color = htole32(color);
         switch (VGLAdpInfo.va_window_size - soffset) {
         case 1:
           memcpy(&object->Bitmap[soffset], &color, 1);
           soffset = VGLSetSegment(offset + 1);
           memcpy(&object->Bitmap[soffset], (byte *)&color + 1, 2);
           break;
         case 2:
           memcpy(&object->Bitmap[soffset], &color, 2);
           soffset = VGLSetSegment(offset + 2);
           memcpy(&object->Bitmap[soffset], (byte *)&color + 2, 1);
           break;
         default:
           memcpy(&object->Bitmap[soffset], &color, 3);
           break;
         }
         break;
       case VIDBUF8X:
         outb(0x3c4, 0x02);
         outb(0x3c5, 0x01 << (x&0x3));
 	object->Bitmap[(unsigned)(VGLAdpInfo.va_line_width*y)+(x/4)] = ((byte)color);
 	break;
       case VIDBUF4S:
 	offset = VGLSetSegment(y*VGLAdpInfo.va_line_width + x/8);
 	goto set_planar;
       case VIDBUF4:
 	offset = y*VGLAdpInfo.va_line_width + x/8;
 set_planar:
         outb(0x3c4, 0x02); outb(0x3c5, 0x0f);
         outb(0x3ce, 0x00); outb(0x3cf, (byte)color & 0x0f);	/* set/reset */
         outb(0x3ce, 0x01); outb(0x3cf, 0x0f);		/* set/reset enable */
         outb(0x3ce, 0x08); outb(0x3cf, 0x80 >> (x%8));	/* bit mask */
 	object->Bitmap[offset] |= (byte)color;
       }
     }
     if (object == VGLDisplay)
       VGLMouseUnFreeze();
   }
 }
 
 u_long
 VGLGetXY(VGLBitmap *object, int x, int y)
 {
   u_long color;
   int offset;
 
   VGLCheckSwitch();
   if (x<0 || x>=object->VXsize || y<0 || y>=object->VYsize)
     return 0;
   if (object == VGLDisplay)
     object = &VGLVDisplay;
   else if (object->Type != MEMBUF)
     return 0;		/* invalid */
   offset = (y * object->VXsize + x) * object->PixelBytes;
   switch (object->PixelBytes) {
   case 1:
     memcpy(&color, &object->Bitmap[offset], 1);
     return le32toh(color) & 0xff;
   case 2:
     memcpy(&color, &object->Bitmap[offset], 2);
     return le32toh(color) & 0xffff;
   case 3:
     memcpy(&color, &object->Bitmap[offset], 3);
     return le32toh(color) & 0xffffff;
   case 4:
     memcpy(&color, &object->Bitmap[offset], 4);
     return le32toh(color);
   }
   return 0;		/* invalid */
 }
 
  /*
   * Symmetric Double Step Line Algorithm by Brian Wyvill from
   * "Graphics Gems", Academic Press, 1990.
   */
 
 #define SL_SWAP(a,b)           {a^=b; b^=a; a^=b;}
 #define SL_ABSOLUTE(i,j,k)     ( (i-j)*(k = ( (i-j)<0 ? -1 : 1)))
 
 void
 plot(VGLBitmap * object, int x, int y, int flag, u_long color)
 {
   /* non-zero flag indicates the pixels need swapping back. */
   if (flag)
     VGLSetXY(object, y, x, color);
   else
     VGLSetXY(object, x, y, color);
 }
 
 
 void
 VGLLine(VGLBitmap *object, int x1, int y1, int x2, int y2, u_long color)
 {
   int dx, dy, incr1, incr2, D, x, y, xend, c, pixels_left;
   int sign_x, sign_y, step, reverse, i;
 
   dx = SL_ABSOLUTE(x2, x1, sign_x);
   dy = SL_ABSOLUTE(y2, y1, sign_y);
   /* decide increment sign by the slope sign */
   if (sign_x == sign_y)
     step = 1;
   else
     step = -1;
 
   if (dy > dx) {	/* chooses axis of greatest movement (make dx) */
     SL_SWAP(x1, y1);
     SL_SWAP(x2, y2);
     SL_SWAP(dx, dy);
     reverse = 1;
   } else
     reverse = 0;
   /* note error check for dx==0 should be included here */
   if (x1 > x2) {      /* start from the smaller coordinate */
     x = x2;
     y = y2;
 /*  x1 = x1;
     y1 = y1; */
   } else {
     x = x1;
     y = y1;
     x1 = x2;
     y1 = y2;
   }
 
 
   /* Note dx=n implies 0 - n or (dx+1) pixels to be set */
   /* Go round loop dx/4 times then plot last 0,1,2 or 3 pixels */
   /* In fact (dx-1)/4 as 2 pixels are already plotted */
   xend = (dx - 1) / 4;
   pixels_left = (dx - 1) % 4;  /* number of pixels left over at the
            * end */
   plot(object, x, y, reverse, color);
   if (pixels_left < 0)
     return;      /* plot only one pixel for zero length
            * vectors */
   plot(object, x1, y1, reverse, color);  /* plot first two points */
   incr2 = 4 * dy - 2 * dx;
   if (incr2 < 0) {    /* slope less than 1/2 */
     c = 2 * dy;
     incr1 = 2 * c;
     D = incr1 - dx;
 
     for (i = 0; i < xend; i++) {  /* plotting loop */
       ++x;
       --x1;
       if (D < 0) {
         /* pattern 1 forwards */
         plot(object, x, y, reverse, color);
         plot(object, ++x, y, reverse, color);
         /* pattern 1 backwards */
         plot(object, x1, y1, reverse, color);
         plot(object, --x1, y1, reverse, color);
         D += incr1;
       } else {
         if (D < c) {
           /* pattern 2 forwards */
           plot(object, x, y, reverse, color);
           plot(object, ++x, y += step, reverse,
               color);
           /* pattern 2 backwards */
           plot(object, x1, y1, reverse, color);
           plot(object, --x1, y1 -= step, reverse,
               color);
         } else {
           /* pattern 3 forwards */
           plot(object, x, y += step, reverse, color);
           plot(object, ++x, y, reverse, color);
           /* pattern 3 backwards */
           plot(object, x1, y1 -= step, reverse,
               color);
           plot(object, --x1, y1, reverse, color);
         }
         D += incr2;
       }
     }      /* end for */
 
     /* plot last pattern */
     if (pixels_left) {
       if (D < 0) {
         plot(object, ++x, y, reverse, color);  /* pattern 1 */
         if (pixels_left > 1)
           plot(object, ++x, y, reverse, color);
         if (pixels_left > 2)
           plot(object, --x1, y1, reverse, color);
       } else {
         if (D < c) {
           plot(object, ++x, y, reverse, color);  /* pattern 2  */
           if (pixels_left > 1)
             plot(object, ++x, y += step, reverse, color);
           if (pixels_left > 2)
             plot(object, --x1, y1, reverse, color);
         } else {
           /* pattern 3 */
           plot(object, ++x, y += step, reverse, color);
           if (pixels_left > 1)
             plot(object, ++x, y, reverse, color);
           if (pixels_left > 2)
             plot(object, --x1, y1 -= step, reverse, color);
         }
       }
     }      /* end if pixels_left */
   }
   /* end slope < 1/2 */
   else {        /* slope greater than 1/2 */
     c = 2 * (dy - dx);
     incr1 = 2 * c;
     D = incr1 + dx;
     for (i = 0; i < xend; i++) {
       ++x;
       --x1;
       if (D > 0) {
         /* pattern 4 forwards */
         plot(object, x, y += step, reverse, color);
         plot(object, ++x, y += step, reverse, color);
         /* pattern 4 backwards */
         plot(object, x1, y1 -= step, reverse, color);
         plot(object, --x1, y1 -= step, reverse, color);
         D += incr1;
       } else {
         if (D < c) {
           /* pattern 2 forwards */
           plot(object, x, y, reverse, color);
           plot(object, ++x, y += step, reverse,
               color);
 
           /* pattern 2 backwards */
           plot(object, x1, y1, reverse, color);
           plot(object, --x1, y1 -= step, reverse,
               color);
         } else {
           /* pattern 3 forwards */
           plot(object, x, y += step, reverse, color);
           plot(object, ++x, y, reverse, color);
           /* pattern 3 backwards */
           plot(object, x1, y1 -= step, reverse, color);
           plot(object, --x1, y1, reverse, color);
         }
         D += incr2;
       }
     }      /* end for */
     /* plot last pattern */
     if (pixels_left) {
       if (D > 0) {
         plot(object, ++x, y += step, reverse, color);  /* pattern 4 */
         if (pixels_left > 1)
           plot(object, ++x, y += step, reverse,
               color);
         if (pixels_left > 2)
           plot(object, --x1, y1 -= step, reverse,
               color);
       } else {
         if (D < c) {
           plot(object, ++x, y, reverse, color);  /* pattern 2  */
           if (pixels_left > 1)
             plot(object, ++x, y += step, reverse, color);
           if (pixels_left > 2)
             plot(object, --x1, y1, reverse, color);
         } else {
           /* pattern 3 */
           plot(object, ++x, y += step, reverse, color);
           if (pixels_left > 1)
             plot(object, ++x, y, reverse, color);
           if (pixels_left > 2) {
             if (D > c)  /* step 3 */
               plot(object, --x1, y1 -= step, reverse, color);
             else  /* step 2 */
               plot(object, --x1, y1, reverse, color);
           }
         }
       }
     }
   }
 }
 
 void
 VGLBox(VGLBitmap *object, int x1, int y1, int x2, int y2, u_long color)
 {
   VGLLine(object, x1, y1, x2, y1, color);
   VGLLine(object, x2, y1, x2, y2, color);
   VGLLine(object, x2, y2, x1, y2, color);
   VGLLine(object, x1, y2, x1, y1, color);
 }
 
 void
 VGLFilledBox(VGLBitmap *object, int x1, int y1, int x2, int y2, u_long color)
 {
   int y;
 
   for (y=y1; y<=y2; y++) VGLLine(object, x1, y, x2, y, color);
 }
 
 static inline void
 set4pixels(VGLBitmap *object, int x, int y, int xc, int yc, u_long color)
 {
   if (x!=0) { 
     VGLSetXY(object, xc+x, yc+y, color); 
     VGLSetXY(object, xc-x, yc+y, color); 
     if (y!=0) { 
       VGLSetXY(object, xc+x, yc-y, color); 
       VGLSetXY(object, xc-x, yc-y, color); 
     } 
   } 
   else { 
     VGLSetXY(object, xc, yc+y, color); 
     if (y!=0) 
       VGLSetXY(object, xc, yc-y, color); 
   } 
 }
 
 void
 VGLEllipse(VGLBitmap *object, int xc, int yc, int a, int b, u_long color)
 {
   int x = 0, y = b, asq = a*a, asq2 = a*a*2, bsq = b*b;
   int bsq2 = b*b*2, d = bsq-asq*b+asq/4, dx = 0, dy = asq2*b;
 
   while (dx<dy) {
     set4pixels(object, x, y, xc, yc, color);
     if (d>0) {
       y--; dy-=asq2; d-=dy;
     }
     x++; dx+=bsq2; d+=bsq+dx;
   }
   d+=(3*(asq-bsq)/2-(dx+dy))/2;
   while (y>=0) {
     set4pixels(object, x, y, xc, yc, color);
     if (d<0) {
       x++; dx+=bsq2; d+=dx;
     }
     y--; dy-=asq2; d+=asq-dy;
   }
 }
 
 static inline void
 set2lines(VGLBitmap *object, int x, int y, int xc, int yc, u_long color)
 {
   if (x!=0) { 
     VGLLine(object, xc+x, yc+y, xc-x, yc+y, color); 
     if (y!=0) 
       VGLLine(object, xc+x, yc-y, xc-x, yc-y, color); 
   } 
   else { 
     VGLLine(object, xc, yc+y, xc, yc-y, color); 
   } 
 }
 
 void
 VGLFilledEllipse(VGLBitmap *object, int xc, int yc, int a, int b, u_long color)
 {
   int x = 0, y = b, asq = a*a, asq2 = a*a*2, bsq = b*b;
   int bsq2 = b*b*2, d = bsq-asq*b+asq/4, dx = 0, dy = asq2*b;
 
   while (dx<dy) {
     set2lines(object, x, y, xc, yc, color);
     if (d>0) {
       y--; dy-=asq2; d-=dy;
     }
     x++; dx+=bsq2; d+=bsq+dx;
   }
   d+=(3*(asq-bsq)/2-(dx+dy))/2;
   while (y>=0) {
     set2lines(object, x, y, xc, yc, color);
     if (d<0) {
       x++; dx+=bsq2; d+=dx;
     }
     y--; dy-=asq2; d+=asq-dy;
   }
 }
 
 void
 VGLClear(VGLBitmap *object, u_long color)
 {
   VGLBitmap src;
-  int i, len, mouseoverlap, offset;
+  int i, len, mousemode, offset;
 
   VGLCheckSwitch();
   if (object == VGLDisplay) {
     VGLMouseFreeze();
-    mouseoverlap = VGLMouseOverlap(0, 0, object->VXsize, object->VYsize);
-    if (mouseoverlap)
-      VGLMousePointerHide();
     VGLClear(&VGLVDisplay, color);
   } else if (object->Type != MEMBUF)
     return;		/* invalid */
   switch (object->Type) {
   case MEMBUF:
   case VIDBUF8:
   case VIDBUF8S:
   case VIDBUF16:
   case VIDBUF16S:
   case VIDBUF24:
   case VIDBUF24S:
   case VIDBUF32:
   case VIDBUF32S:
     src.Type = MEMBUF;
     src.Xsize = object->Xsize;
     src.VXsize = object->VXsize;
     src.Ysize = 1;
     src.VYsize = 1;
     src.Xorigin = 0;
     src.Yorigin = 0;
     src.Bitmap = alloca(object->VXsize * object->PixelBytes);
     src.PixelBytes = object->PixelBytes;
     color = htole32(color);
     for (i = 0; i < object->VXsize; i++)
       bcopy(&color, src.Bitmap + i * object->PixelBytes, object->PixelBytes);
     for (i = 0; i < object->VYsize; i++)
-      __VGLBitmapCopy(&src, 0, 0, object, 0, i, object->VXsize, 1);
+      __VGLBitmapCopy(&src, 0, 0, object, 0, i, object->VXsize, -1);
     break;
 
   case VIDBUF8X:
+    mousemode = __VGLMouseMode(VGL_MOUSEHIDE);
     /* XXX works only for Xsize % 4 = 0 */
     outb(0x3c6, 0xff);
     outb(0x3c4, 0x02); outb(0x3c5, 0x0f);
     memset(object->Bitmap, (byte)color, VGLAdpInfo.va_line_width*object->VYsize);
+    __VGLMouseMode(mousemode);
     break;
 
   case VIDBUF4:
   case VIDBUF4S:
+    mousemode = __VGLMouseMode(VGL_MOUSEHIDE);
     /* XXX works only for Xsize % 8 = 0 */
     outb(0x3c4, 0x02); outb(0x3c5, 0x0f);
     outb(0x3ce, 0x05); outb(0x3cf, 0x02);		/* mode 2 */
     outb(0x3ce, 0x01); outb(0x3cf, 0x00);		/* set/reset enable */
     outb(0x3ce, 0x08); outb(0x3cf, 0xff);		/* bit mask */
     for (offset = 0; offset < VGLAdpInfo.va_line_width*object->VYsize; ) {
       VGLSetSegment(offset);
       len = min(object->VXsize*object->VYsize - offset,
 		VGLAdpInfo.va_window_size);
       memset(object->Bitmap, (byte)color, len);
       offset += len;
     }
     outb(0x3ce, 0x05); outb(0x3cf, 0x00);
+    __VGLMouseMode(mousemode);
     break;
   }
-  if (object == VGLDisplay) {
-    if (mouseoverlap)
-      VGLMousePointerShow();
+  if (object == VGLDisplay)
     VGLMouseUnFreeze();
-  }
 }
 
 static inline u_long
 VGLrgbToNative(uint16_t r, uint16_t g, uint16_t b)
 {
   int nr, ng, nb;
 
   nr = VGLModeInfo.vi_pixel_fsizes[2];
   ng = VGLModeInfo.vi_pixel_fsizes[1];
   nb = VGLModeInfo.vi_pixel_fsizes[0];
   return (r >> (16 - nr) << (ng + nb)) | (g >> (16 - ng) << nb) |
 	 (b >> (16 - nb) << 0);
 }
 
 u_long
 VGLrgb332ToNative(byte c)
 {
   uint16_t r, g, b;
 
   /* 3:3:2 to 16:16:16 */
   r = ((c & 0xe0) >> 5) * 0xffff / 7;
   g = ((c & 0x1c) >> 2) * 0xffff / 7;
   b = ((c & 0x03) >> 0) * 0xffff / 3;
 
   return VGLrgbToNative(r, g, b);
 }
 
 void
 VGLRestorePalette()
 {
   int i;
 
   if (VGLModeInfo.vi_mem_model == V_INFO_MM_DIRECT)
     return;
   outb(0x3C6, 0xFF);
   inb(0x3DA); 
   outb(0x3C8, 0x00);
   for (i=0; i<256; i++) {
     outb(0x3C9, VGLSavePaletteRed[i]);
     inb(0x84);
     outb(0x3C9, VGLSavePaletteGreen[i]);
     inb(0x84);
     outb(0x3C9, VGLSavePaletteBlue[i]);
     inb(0x84);
   }
   inb(0x3DA);
   outb(0x3C0, 0x20);
 }
 
 void
 VGLSavePalette()
 {
   int i;
 
   if (VGLModeInfo.vi_mem_model == V_INFO_MM_DIRECT)
     return;
   outb(0x3C6, 0xFF);
   inb(0x3DA);
   outb(0x3C7, 0x00);
   for (i=0; i<256; i++) {
     VGLSavePaletteRed[i] = inb(0x3C9);
     inb(0x84);
     VGLSavePaletteGreen[i] = inb(0x3C9);
     inb(0x84);
     VGLSavePaletteBlue[i] = inb(0x3C9);
     inb(0x84);
   }
   inb(0x3DA);
   outb(0x3C0, 0x20);
 }
 
 void
 VGLSetPalette(byte *red, byte *green, byte *blue)
 {
   int i;
   
   if (VGLModeInfo.vi_mem_model == V_INFO_MM_DIRECT)
     return;
   for (i=0; i<256; i++) {
     VGLSavePaletteRed[i] = red[i];
     VGLSavePaletteGreen[i] = green[i];
     VGLSavePaletteBlue[i] = blue[i];
   }
   VGLCheckSwitch();
   outb(0x3C6, 0xFF);
   inb(0x3DA); 
   outb(0x3C8, 0x00);
   for (i=0; i<256; i++) {
     outb(0x3C9, VGLSavePaletteRed[i]);
     inb(0x84);
     outb(0x3C9, VGLSavePaletteGreen[i]);
     inb(0x84);
     outb(0x3C9, VGLSavePaletteBlue[i]);
     inb(0x84);
   }
   inb(0x3DA);
   outb(0x3C0, 0x20);
 }
 
 void
 VGLSetPaletteIndex(byte color, byte red, byte green, byte blue)
 {
   if (VGLModeInfo.vi_mem_model == V_INFO_MM_DIRECT)
     return;
   VGLSavePaletteRed[color] = red;
   VGLSavePaletteGreen[color] = green;
   VGLSavePaletteBlue[color] = blue;
   VGLCheckSwitch();
   outb(0x3C6, 0xFF);
   inb(0x3DA);
   outb(0x3C8, color); 
   outb(0x3C9, red); outb(0x3C9, green); outb(0x3C9, blue);
   inb(0x3DA);
   outb(0x3C0, 0x20);
 }
 
 void
 VGLRestoreBorder(void)
 {
   VGLSetBorder(VGLBorderColor);
 }
 
 void
 VGLSetBorder(byte color)
 {
   if (VGLModeInfo.vi_mem_model == V_INFO_MM_DIRECT && ioctl(0, KDENABIO, 0))
     return;
   VGLCheckSwitch();
   inb(0x3DA);
   outb(0x3C0,0x11); outb(0x3C0, color); 
   inb(0x3DA);
   outb(0x3C0, 0x20);
   VGLBorderColor = color;
   if (VGLModeInfo.vi_mem_model == V_INFO_MM_DIRECT)
     ioctl(0, KDDISABIO, 0);
 }
 
 void
 VGLRestoreBlank(void)
 {
   VGLBlankDisplay(VGLBlank);
 }
 
 void
 VGLBlankDisplay(int blank)
 {
   byte val;
 
   if (VGLModeInfo.vi_mem_model == V_INFO_MM_DIRECT && ioctl(0, KDENABIO, 0))
     return;
   VGLCheckSwitch();
   outb(0x3C4, 0x01); val = inb(0x3C5); outb(0x3C4, 0x01);
   outb(0x3C5, ((blank) ? (val |= 0x20) : (val &= 0xDF)));
   VGLBlank = blank;
   if (VGLModeInfo.vi_mem_model == V_INFO_MM_DIRECT)
     ioctl(0, KDDISABIO, 0);
 }
Index: projects/runtime-coverage-v2/lib/libvgl/vgl.h
===================================================================
--- projects/runtime-coverage-v2/lib/libvgl/vgl.h	(revision 346924)
+++ projects/runtime-coverage-v2/lib/libvgl/vgl.h	(revision 346925)
@@ -1,165 +1,163 @@
 /*-
  * SPDX-License-Identifier: BSD-3-Clause
  *
  * Copyright (c) 1991-1997 Søren Schmidt
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer
  *    in this position and unchanged.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 3. The name of the author may not be used to endorse or promote products
  *    derived from this software without specific prior written permission
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  *
  * $FreeBSD$
  */
 
 #ifndef _VGL_H_
 #define _VGL_H_
 
 #include <stdlib.h>
 #include <unistd.h>
 #include <string.h>
 #include <machine/cpufunc.h>
 
 typedef unsigned char byte;
 typedef struct {
   byte 	Type;
   int  	Xsize, Ysize;
   int  	VXsize, VYsize;
   int   Xorigin, Yorigin;
   byte 	*Bitmap;
   int	PixelBytes;
 } VGLBitmap;
 
 #define VGLBITMAP_INITIALIZER(t, x, y, bits)	\
 	{ (t), (x), (y), (x), (y), 0, 0, (bits), -1 }
 
 /*
  * Defined Type's
  */
 #define MEMBUF		0
 #define VIDBUF4		1
 #define VIDBUF8		2
 #define VIDBUF8X	3
 #define VIDBUF8S	4
 #define VIDBUF4S	5
 #define VIDBUF16	6		/* Direct Color linear buffer */
 #define VIDBUF24	7		/* Direct Color linear buffer */
 #define VIDBUF32	8		/* Direct Color linear buffer */
 #define VIDBUF16S	9		/* Direct Color segmented buffer */
 #define VIDBUF24S	10		/* Direct Color segmented buffer */
 #define VIDBUF32S	11		/* Direct Color segmented buffer */
 #define NOBUF		255
 
 typedef struct VGLText {
   byte	Width, Height;
   byte	*BitmapArray;
 } VGLText;
 
 typedef struct VGLObject {
   int	  	Id;
   int	  	Type;
   int	  	Status;
   int	  	Xpos, Ypos;
   int	  	Xhot, Yhot;
   VGLBitmap 	*Image;
   VGLBitmap 	*Mask;
   int		(*CallBackFunction)();
 } VGLObject;
 
 #define MOUSE_IMG_SIZE		16
 #define VGL_MOUSEHIDE		0
 #define VGL_MOUSESHOW		1
 #define VGL_MOUSEFREEZE		0
 #define VGL_MOUSEUNFREEZE	1
 #define VGL_DIR_RIGHT		0
 #define VGL_DIR_UP		1
 #define VGL_DIR_LEFT		2
 #define VGL_DIR_DOWN		3
 #define VGL_RAWKEYS		1
 #define VGL_CODEKEYS		2
 #define VGL_XLATEKEYS		3
 
 extern video_adapter_info_t	VGLAdpInfo;
 extern video_info_t		VGLModeInfo;
 extern VGLBitmap 		*VGLDisplay;
 extern VGLBitmap 		VGLVDisplay;
 extern byte 			*VGLBuf;
 
 /*
  * Prototypes
  */
 /* bitmap.c */
 int __VGLBitmapCopy(VGLBitmap *src, int srcx, int srcy, VGLBitmap *dst, int dstx, int dsty, int width, int hight);
 int VGLBitmapCopy(VGLBitmap *src, int srcx, int srcy, VGLBitmap *dst, int dstx, int dsty, int width, int hight);
 VGLBitmap *VGLBitmapCreate(int type, int xsize, int ysize, byte *bits);
 void VGLBitmapDestroy(VGLBitmap *object);
 int VGLBitmapAllocateBits(VGLBitmap *object);
 void VGLBitmapCvt(VGLBitmap *src, VGLBitmap *dst);
 /* keyboard.c */
 int VGLKeyboardInit(int mode);
 void VGLKeyboardEnd(void);
 int VGLKeyboardGetCh(void);
 /* main.c */
 void VGLEnd(void);
 int VGLInit(int mode);
 void VGLCheckSwitch(void);
 int VGLSetVScreenSize(VGLBitmap *object, int VXsize, int VYsize);
 int VGLPanScreen(VGLBitmap *object, int x, int y);
 int VGLSetSegment(unsigned int offset);
 /* mouse.c */
-void VGLMousePointerShow(void);
-void VGLMousePointerHide(void);
+int __VGLMouseMode(int mode);
 void VGLMouseMode(int mode);
-void VGLMouseAction(int dummy);
 void VGLMouseSetImage(VGLBitmap *AndMask, VGLBitmap *OrMask);
 void VGLMouseSetStdImage(void);
 int VGLMouseInit(int mode);
 void VGLMouseRestore(void);
 int VGLMouseStatus(int *x, int *y, char *buttons);
 void VGLMouseFreeze(void);
 int VGLMouseFreezeXY(int x, int y);
 void VGLMouseMerge(int x, int y, int width, byte *line);
 int VGLMouseOverlap(int x, int y, int width, int hight);
 void VGLMouseUnFreeze(void);
 /* simple.c */
 void VGLSetXY(VGLBitmap *object, int x, int y, u_long color);
 u_long VGLGetXY(VGLBitmap *object, int x, int y);
 void VGLLine(VGLBitmap *object, int x1, int y1, int x2, int y2, u_long color);
 void VGLBox(VGLBitmap *object, int x1, int y1, int x2, int y2, u_long color);
 void VGLFilledBox(VGLBitmap *object, int x1, int y1, int x2, int y2, u_long color);
 void VGLEllipse(VGLBitmap *object, int xc, int yc, int a, int b, u_long color);
 void VGLFilledEllipse(VGLBitmap *object, int xc, int yc, int a, int b, u_long color);
 void VGLClear(VGLBitmap *object, u_long color);
 u_long VGLrgb332ToNative(byte c);
 void VGLRestoreBlank(void);
 void VGLRestoreBorder(void);
 void VGLRestorePalette(void);
 void VGLSavePalette(void);
 void VGLSetPalette(byte *red, byte *green, byte *blue);
 void VGLSetPaletteIndex(byte color, byte red, byte green, byte blue);
 void VGLSetBorder(byte color);
 void VGLBlankDisplay(int blank);
 /* text.c */
 int VGLTextSetFontFile(char *filename);
 void VGLBitmapPutChar(VGLBitmap *Object, int x, int y, byte ch, u_long fgcol, u_long bgcol, int fill, int dir);
 void VGLBitmapString(VGLBitmap *Object, int x, int y, char *str, u_long fgcol, u_long bgcol, int fill, int dir);
 
 #endif /* !_VGL_H_ */
Index: projects/runtime-coverage-v2/sbin/ipfw/ipfw2.c
===================================================================
--- projects/runtime-coverage-v2/sbin/ipfw/ipfw2.c	(revision 346924)
+++ projects/runtime-coverage-v2/sbin/ipfw/ipfw2.c	(revision 346925)
@@ -1,5583 +1,5587 @@
 /*-
  * Copyright (c) 2002-2003 Luigi Rizzo
  * Copyright (c) 1996 Alex Nash, Paul Traina, Poul-Henning Kamp
  * Copyright (c) 1994 Ugen J.S.Antsilevich
  *
  * Idea and grammar partially left from:
  * Copyright (c) 1993 Daniel Boulet
  *
  * Redistribution and use in source forms, with and without modification,
  * are permitted provided that this entire comment appears intact.
  *
  * Redistribution in binary form may occur without any restrictions.
  * Obviously, it would be nice if you gave credit where credit is due
  * but requiring it would be too onerous.
  *
  * This software is provided ``AS IS'' without any warranties of any kind.
  *
  * NEW command line interface for IP firewall facility
  *
  * $FreeBSD$
  */
 
 #include <sys/types.h>
 #include <sys/param.h>
 #include <sys/socket.h>
 #include <sys/sockio.h>
 #include <sys/sysctl.h>
 
 #include "ipfw2.h"
 
 #include <ctype.h>
 #include <err.h>
 #include <errno.h>
 #include <grp.h>
 #include <jail.h>
 #include <netdb.h>
 #include <pwd.h>
 #include <stdio.h>
 #include <stdarg.h>
 #include <stdint.h>
 #include <stdlib.h>
 #include <string.h>
 #include <sysexits.h>
 #include <time.h>	/* ctime */
 #include <timeconv.h>	/* _long_to_time */
 #include <unistd.h>
 #include <fcntl.h>
 #include <stddef.h>	/* offsetof */
 
 #include <net/ethernet.h>
 #include <net/if.h>		/* only IFNAMSIZ */
 #include <netinet/in.h>
 #include <netinet/in_systm.h>	/* only n_short, n_long */
 #include <netinet/ip.h>
 #include <netinet/ip_icmp.h>
 #include <netinet/ip_fw.h>
 #include <netinet/tcp.h>
 #include <arpa/inet.h>
 
 struct cmdline_opts co;	/* global options */
 
 struct format_opts {
 	int bcwidth;
 	int pcwidth;
 	int show_counters;
 	int show_time;		/* show timestamp */
 	uint32_t set_mask;	/* enabled sets mask */
 	uint32_t flags;		/* request flags */
 	uint32_t first;		/* first rule to request */
 	uint32_t last;		/* last rule to request */
 	uint32_t dcnt;		/* number of dynamic states */
 	ipfw_obj_ctlv *tstate;	/* table state data */
 };
 
 int resvd_set_number = RESVD_SET;
 
 int ipfw_socket = -1;
 
 #define	CHECK_LENGTH(v, len) do {				\
 	if ((v) < (len))					\
 		errx(EX_DATAERR, "Rule too long");		\
 	} while (0)
 /*
  * Check if we have enough space in cmd buffer. Note that since
  * first 8? u32 words are reserved by reserved header, full cmd
  * buffer can't be used, so we need to protect from buffer overrun
  * only. At the beginning, cblen is less than actual buffer size by
  * size of ipfw_insn_u32 instruction + 1 u32 work. This eliminates need
  * for checking small instructions fitting in given range.
  * We also (ab)use the fact that ipfw_insn is always the first field
  * for any custom instruction.
  */
 #define	CHECK_CMDLEN	CHECK_LENGTH(cblen, F_LEN((ipfw_insn *)cmd))
 
 #define GET_UINT_ARG(arg, min, max, tok, s_x) do {			\
 	if (!av[0])							\
 		errx(EX_USAGE, "%s: missing argument", match_value(s_x, tok)); \
 	if (_substrcmp(*av, "tablearg") == 0) {				\
 		arg = IP_FW_TARG;					\
 		break;							\
 	}								\
 									\
 	{								\
 	long _xval;							\
 	char *end;							\
 									\
 	_xval = strtol(*av, &end, 10);					\
 									\
 	if (!isdigit(**av) || *end != '\0' || (_xval == 0 && errno == EINVAL)) \
 		errx(EX_DATAERR, "%s: invalid argument: %s",		\
 		    match_value(s_x, tok), *av);			\
 									\
 	if (errno == ERANGE || _xval < min || _xval > max)		\
 		errx(EX_DATAERR, "%s: argument is out of range (%u..%u): %s", \
 		    match_value(s_x, tok), min, max, *av);		\
 									\
 	if (_xval == IP_FW_TARG)					\
 		errx(EX_DATAERR, "%s: illegal argument value: %s",	\
 		    match_value(s_x, tok), *av);			\
 	arg = _xval;							\
 	}								\
 } while (0)
 
 static struct _s_x f_tcpflags[] = {
 	{ "syn", TH_SYN },
 	{ "fin", TH_FIN },
 	{ "ack", TH_ACK },
 	{ "psh", TH_PUSH },
 	{ "rst", TH_RST },
 	{ "urg", TH_URG },
 	{ "tcp flag", 0 },
 	{ NULL,	0 }
 };
 
 static struct _s_x f_tcpopts[] = {
 	{ "mss",	IP_FW_TCPOPT_MSS },
 	{ "maxseg",	IP_FW_TCPOPT_MSS },
 	{ "window",	IP_FW_TCPOPT_WINDOW },
 	{ "sack",	IP_FW_TCPOPT_SACK },
 	{ "ts",		IP_FW_TCPOPT_TS },
 	{ "timestamp",	IP_FW_TCPOPT_TS },
 	{ "cc",		IP_FW_TCPOPT_CC },
 	{ "tcp option",	0 },
 	{ NULL,	0 }
 };
 
 /*
  * IP options span the range 0 to 255 so we need to remap them
  * (though in fact only the low 5 bits are significant).
  */
 static struct _s_x f_ipopts[] = {
 	{ "ssrr",	IP_FW_IPOPT_SSRR},
 	{ "lsrr",	IP_FW_IPOPT_LSRR},
 	{ "rr",		IP_FW_IPOPT_RR},
 	{ "ts",		IP_FW_IPOPT_TS},
 	{ "ip option",	0 },
 	{ NULL,	0 }
 };
 
 static struct _s_x f_iptos[] = {
 	{ "lowdelay",	IPTOS_LOWDELAY},
 	{ "throughput",	IPTOS_THROUGHPUT},
 	{ "reliability", IPTOS_RELIABILITY},
 	{ "mincost",	IPTOS_MINCOST},
 	{ "congestion",	IPTOS_ECN_CE},
 	{ "ecntransport", IPTOS_ECN_ECT0},
 	{ "ip tos option", 0},
 	{ NULL,	0 }
 };
 
 struct _s_x f_ipdscp[] = {
 	{ "af11", IPTOS_DSCP_AF11 >> 2 },	/* 001010 */
 	{ "af12", IPTOS_DSCP_AF12 >> 2 },	/* 001100 */
 	{ "af13", IPTOS_DSCP_AF13 >> 2 },	/* 001110 */
 	{ "af21", IPTOS_DSCP_AF21 >> 2 },	/* 010010 */
 	{ "af22", IPTOS_DSCP_AF22 >> 2 },	/* 010100 */
 	{ "af23", IPTOS_DSCP_AF23 >> 2 },	/* 010110 */
 	{ "af31", IPTOS_DSCP_AF31 >> 2 },	/* 011010 */
 	{ "af32", IPTOS_DSCP_AF32 >> 2 },	/* 011100 */
 	{ "af33", IPTOS_DSCP_AF33 >> 2 },	/* 011110 */
 	{ "af41", IPTOS_DSCP_AF41 >> 2 },	/* 100010 */
 	{ "af42", IPTOS_DSCP_AF42 >> 2 },	/* 100100 */
 	{ "af43", IPTOS_DSCP_AF43 >> 2 },	/* 100110 */
 	{ "be", IPTOS_DSCP_CS0 >> 2 }, 	/* 000000 */
 	{ "ef", IPTOS_DSCP_EF >> 2 },	/* 101110 */
 	{ "cs0", IPTOS_DSCP_CS0 >> 2 },	/* 000000 */
 	{ "cs1", IPTOS_DSCP_CS1 >> 2 },	/* 001000 */
 	{ "cs2", IPTOS_DSCP_CS2 >> 2 },	/* 010000 */
 	{ "cs3", IPTOS_DSCP_CS3 >> 2 },	/* 011000 */
 	{ "cs4", IPTOS_DSCP_CS4 >> 2 },	/* 100000 */
 	{ "cs5", IPTOS_DSCP_CS5 >> 2 },	/* 101000 */
 	{ "cs6", IPTOS_DSCP_CS6 >> 2 },	/* 110000 */
 	{ "cs7", IPTOS_DSCP_CS7 >> 2 },	/* 100000 */
 	{ NULL, 0 }
 };
 
 static struct _s_x limit_masks[] = {
 	{"all",		DYN_SRC_ADDR|DYN_SRC_PORT|DYN_DST_ADDR|DYN_DST_PORT},
 	{"src-addr",	DYN_SRC_ADDR},
 	{"src-port",	DYN_SRC_PORT},
 	{"dst-addr",	DYN_DST_ADDR},
 	{"dst-port",	DYN_DST_PORT},
 	{NULL,		0}
 };
 
 /*
  * we use IPPROTO_ETHERTYPE as a fake protocol id to call the print routines
  * This is only used in this code.
  */
 #define IPPROTO_ETHERTYPE	0x1000
 static struct _s_x ether_types[] = {
     /*
      * Note, we cannot use "-:&/" in the names because they are field
      * separators in the type specifications. Also, we use s = NULL as
      * end-delimiter, because a type of 0 can be legal.
      */
 	{ "ip",		0x0800 },
 	{ "ipv4",	0x0800 },
 	{ "ipv6",	0x86dd },
 	{ "arp",	0x0806 },
 	{ "rarp",	0x8035 },
 	{ "vlan",	0x8100 },
 	{ "loop",	0x9000 },
 	{ "trail",	0x1000 },
 	{ "at",		0x809b },
 	{ "atalk",	0x809b },
 	{ "aarp",	0x80f3 },
 	{ "pppoe_disc",	0x8863 },
 	{ "pppoe_sess",	0x8864 },
 	{ "ipx_8022",	0x00E0 },
 	{ "ipx_8023",	0x0000 },
 	{ "ipx_ii",	0x8137 },
 	{ "ipx_snap",	0x8137 },
 	{ "ipx",	0x8137 },
 	{ "ns",		0x0600 },
 	{ NULL,		0 }
 };
 
 static struct _s_x rule_eactions[] = {
 	{ "nat64clat",		TOK_NAT64CLAT },
 	{ "nat64lsn",		TOK_NAT64LSN },
 	{ "nat64stl",		TOK_NAT64STL },
 	{ "nptv6",		TOK_NPTV6 },
 	{ "tcp-setmss",		TOK_TCPSETMSS },
 	{ NULL, 0 }	/* terminator */
 };
 
 static struct _s_x rule_actions[] = {
 	{ "abort6",		TOK_ABORT6 },
 	{ "abort",		TOK_ABORT },
 	{ "accept",		TOK_ACCEPT },
 	{ "pass",		TOK_ACCEPT },
 	{ "allow",		TOK_ACCEPT },
 	{ "permit",		TOK_ACCEPT },
 	{ "count",		TOK_COUNT },
 	{ "pipe",		TOK_PIPE },
 	{ "queue",		TOK_QUEUE },
 	{ "divert",		TOK_DIVERT },
 	{ "tee",		TOK_TEE },
 	{ "netgraph",		TOK_NETGRAPH },
 	{ "ngtee",		TOK_NGTEE },
 	{ "fwd",		TOK_FORWARD },
 	{ "forward",		TOK_FORWARD },
 	{ "skipto",		TOK_SKIPTO },
 	{ "deny",		TOK_DENY },
 	{ "drop",		TOK_DENY },
 	{ "reject",		TOK_REJECT },
 	{ "reset6",		TOK_RESET6 },
 	{ "reset",		TOK_RESET },
 	{ "unreach6",		TOK_UNREACH6 },
 	{ "unreach",		TOK_UNREACH },
 	{ "check-state",	TOK_CHECKSTATE },
 	{ "//",			TOK_COMMENT },
 	{ "nat",		TOK_NAT },
 	{ "reass",		TOK_REASS },
 	{ "setfib",		TOK_SETFIB },
 	{ "setdscp",		TOK_SETDSCP },
 	{ "call",		TOK_CALL },
 	{ "return",		TOK_RETURN },
 	{ "eaction",		TOK_EACTION },
 	{ "tcp-setmss",		TOK_TCPSETMSS },
 	{ NULL, 0 }	/* terminator */
 };
 
 static struct _s_x rule_action_params[] = {
 	{ "altq",		TOK_ALTQ },
 	{ "log",		TOK_LOG },
 	{ "tag",		TOK_TAG },
 	{ "untag",		TOK_UNTAG },
 	{ NULL, 0 }	/* terminator */
 };
 
 /*
  * The 'lookup' instruction accepts one of the following arguments.
  * -1 is a terminator for the list.
  * Arguments are passed as v[1] in O_DST_LOOKUP options.
  */
 static int lookup_key[] = {
 	TOK_DSTIP, TOK_SRCIP, TOK_DSTPORT, TOK_SRCPORT,
 	TOK_UID, TOK_JAIL, TOK_DSCP, -1 };
 
 static struct _s_x rule_options[] = {
 	{ "tagged",		TOK_TAGGED },
 	{ "uid",		TOK_UID },
 	{ "gid",		TOK_GID },
 	{ "jail",		TOK_JAIL },
 	{ "in",			TOK_IN },
 	{ "limit",		TOK_LIMIT },
 	{ "set-limit",		TOK_SETLIMIT },
 	{ "keep-state",		TOK_KEEPSTATE },
 	{ "record-state",	TOK_RECORDSTATE },
 	{ "bridged",		TOK_LAYER2 },
 	{ "layer2",		TOK_LAYER2 },
 	{ "out",		TOK_OUT },
 	{ "diverted",		TOK_DIVERTED },
 	{ "diverted-loopback",	TOK_DIVERTEDLOOPBACK },
 	{ "diverted-output",	TOK_DIVERTEDOUTPUT },
 	{ "xmit",		TOK_XMIT },
 	{ "recv",		TOK_RECV },
 	{ "via",		TOK_VIA },
 	{ "fragment",		TOK_FRAG },
 	{ "frag",		TOK_FRAG },
 	{ "fib",		TOK_FIB },
 	{ "ipoptions",		TOK_IPOPTS },
 	{ "ipopts",		TOK_IPOPTS },
 	{ "iplen",		TOK_IPLEN },
 	{ "ipid",		TOK_IPID },
 	{ "ipprecedence",	TOK_IPPRECEDENCE },
 	{ "dscp",		TOK_DSCP },
 	{ "iptos",		TOK_IPTOS },
 	{ "ipttl",		TOK_IPTTL },
 	{ "ipversion",		TOK_IPVER },
 	{ "ipver",		TOK_IPVER },
 	{ "estab",		TOK_ESTAB },
 	{ "established",	TOK_ESTAB },
 	{ "setup",		TOK_SETUP },
 	{ "sockarg",		TOK_SOCKARG },
 	{ "tcpdatalen",		TOK_TCPDATALEN },
 	{ "tcpflags",		TOK_TCPFLAGS },
 	{ "tcpflgs",		TOK_TCPFLAGS },
 	{ "tcpoptions",		TOK_TCPOPTS },
 	{ "tcpopts",		TOK_TCPOPTS },
 	{ "tcpseq",		TOK_TCPSEQ },
 	{ "tcpack",		TOK_TCPACK },
 	{ "tcpwin",		TOK_TCPWIN },
 	{ "icmptype",		TOK_ICMPTYPES },
 	{ "icmptypes",		TOK_ICMPTYPES },
 	{ "dst-ip",		TOK_DSTIP },
 	{ "src-ip",		TOK_SRCIP },
 	{ "dst-port",		TOK_DSTPORT },
 	{ "src-port",		TOK_SRCPORT },
 	{ "proto",		TOK_PROTO },
 	{ "MAC",		TOK_MAC },
 	{ "mac",		TOK_MAC },
 	{ "mac-type",		TOK_MACTYPE },
 	{ "verrevpath",		TOK_VERREVPATH },
 	{ "versrcreach",	TOK_VERSRCREACH },
 	{ "antispoof",		TOK_ANTISPOOF },
 	{ "ipsec",		TOK_IPSEC },
 	{ "icmp6type",		TOK_ICMP6TYPES },
 	{ "icmp6types",		TOK_ICMP6TYPES },
 	{ "ext6hdr",		TOK_EXT6HDR},
 	{ "flow-id",		TOK_FLOWID},
 	{ "ipv6",		TOK_IPV6},
 	{ "ip6",		TOK_IPV6},
 	{ "ipv4",		TOK_IPV4},
 	{ "ip4",		TOK_IPV4},
 	{ "dst-ipv6",		TOK_DSTIP6},
 	{ "dst-ip6",		TOK_DSTIP6},
 	{ "src-ipv6",		TOK_SRCIP6},
 	{ "src-ip6",		TOK_SRCIP6},
 	{ "lookup",		TOK_LOOKUP},
 	{ "flow",		TOK_FLOW},
 	{ "defer-action",	TOK_SKIPACTION },
 	{ "defer-immediate-action",	TOK_SKIPACTION },
 	{ "//",			TOK_COMMENT },
 
 	{ "not",		TOK_NOT },		/* pseudo option */
 	{ "!", /* escape ? */	TOK_NOT },		/* pseudo option */
 	{ "or",			TOK_OR },		/* pseudo option */
 	{ "|", /* escape */	TOK_OR },		/* pseudo option */
 	{ "{",			TOK_STARTBRACE },	/* pseudo option */
 	{ "(",			TOK_STARTBRACE },	/* pseudo option */
 	{ "}",			TOK_ENDBRACE },		/* pseudo option */
 	{ ")",			TOK_ENDBRACE },		/* pseudo option */
 	{ NULL, 0 }	/* terminator */
 };
 
 void bprint_uint_arg(struct buf_pr *bp, const char *str, uint32_t arg);
 static int ipfw_get_config(struct cmdline_opts *co, struct format_opts *fo,
     ipfw_cfg_lheader **pcfg, size_t *psize);
 static int ipfw_show_config(struct cmdline_opts *co, struct format_opts *fo,
     ipfw_cfg_lheader *cfg, size_t sz, int ac, char **av);
 static void ipfw_list_tifaces(void);
 
 struct tidx;
 static uint16_t pack_object(struct tidx *tstate, char *name, int otype);
 static uint16_t pack_table(struct tidx *tstate, char *name);
 
 static char *table_search_ctlv(ipfw_obj_ctlv *ctlv, uint16_t idx);
 static void object_sort_ctlv(ipfw_obj_ctlv *ctlv);
 static char *object_search_ctlv(ipfw_obj_ctlv *ctlv, uint16_t idx,
     uint16_t type);
 
 /*
  * Simple string buffer API.
  * Used to simplify buffer passing between function and for
  * transparent overrun handling.
  */
 
 /*
  * Allocates new buffer of given size @sz.
  *
  * Returns 0 on success.
  */
 int
 bp_alloc(struct buf_pr *b, size_t size)
 {
 	memset(b, 0, sizeof(struct buf_pr));
 
 	if ((b->buf = calloc(1, size)) == NULL)
 		return (ENOMEM);
 
 	b->ptr = b->buf;
 	b->size = size;
 	b->avail = b->size;
 
 	return (0);
 }
 
 void
 bp_free(struct buf_pr *b)
 {
 
 	free(b->buf);
 }
 
 /*
  * Flushes buffer so new writer start from beginning.
  */
 void
 bp_flush(struct buf_pr *b)
 {
 
 	b->ptr = b->buf;
 	b->avail = b->size;
 	b->buf[0] = '\0';
 }
 
 /*
  * Print message specified by @format and args.
  * Automatically manage buffer space and transparently handle
  * buffer overruns.
  *
  * Returns number of bytes that should have been printed.
  */
 int
 bprintf(struct buf_pr *b, char *format, ...)
 {
 	va_list args;
 	int i;
 
 	va_start(args, format);
 
 	i = vsnprintf(b->ptr, b->avail, format, args);
 	va_end(args);
 
 	if (i > b->avail || i < 0) {
 		/* Overflow or print error */
 		b->avail = 0;
 	} else {
 		b->ptr += i;
 		b->avail -= i;
 	} 
 
 	b->needed += i;
 
 	return (i);
 }
 
 /*
  * Special values printer for tablearg-aware opcodes.
  */
 void
 bprint_uint_arg(struct buf_pr *bp, const char *str, uint32_t arg)
 {
 
 	if (str != NULL)
 		bprintf(bp, "%s", str);
 	if (arg == IP_FW_TARG)
 		bprintf(bp, "tablearg");
 	else
 		bprintf(bp, "%u", arg);
 }
 
 /*
  * Helper routine to print a possibly unaligned uint64_t on
  * various platform. If width > 0, print the value with
  * the desired width, followed by a space;
  * otherwise, return the required width.
  */
 int
 pr_u64(struct buf_pr *b, uint64_t *pd, int width)
 {
 #ifdef TCC
 #define U64_FMT "I64"
 #else
 #define U64_FMT "llu"
 #endif
 	uint64_t u;
 	unsigned long long d;
 
 	bcopy (pd, &u, sizeof(u));
 	d = u;
 	return (width > 0) ?
 		bprintf(b, "%*" U64_FMT " ", width, d) :
 		snprintf(NULL, 0, "%" U64_FMT, d) ;
 #undef U64_FMT
 }
 
 
 void *
 safe_calloc(size_t number, size_t size)
 {
 	void *ret = calloc(number, size);
 
 	if (ret == NULL)
 		err(EX_OSERR, "calloc");
 	return ret;
 }
 
 void *
 safe_realloc(void *ptr, size_t size)
 {
 	void *ret = realloc(ptr, size);
 
 	if (ret == NULL)
 		err(EX_OSERR, "realloc");
 	return ret;
 }
 
 /*
  * Compare things like interface or table names.
  */
 int
 stringnum_cmp(const char *a, const char *b)
 {
 	int la, lb;
 
 	la = strlen(a);
 	lb = strlen(b);
 
 	if (la > lb)
 		return (1);
 	else if (la < lb)
 		return (-01);
 
 	return (strcmp(a, b));
 }
 
 
 /*
  * conditionally runs the command.
  * Selected options or negative -> getsockopt
  */
 int
 do_cmd(int optname, void *optval, uintptr_t optlen)
 {
 	int i;
 
 	if (co.test_only)
 		return 0;
 
 	if (ipfw_socket == -1)
 		ipfw_socket = socket(AF_INET, SOCK_RAW, IPPROTO_RAW);
 	if (ipfw_socket < 0)
 		err(EX_UNAVAILABLE, "socket");
 
 	if (optname == IP_FW_GET || optname == IP_DUMMYNET_GET ||
 	    optname == IP_FW_ADD || optname == IP_FW3 ||
 	    optname == IP_FW_NAT_GET_CONFIG ||
 	    optname < 0 ||
 	    optname == IP_FW_NAT_GET_LOG) {
 		if (optname < 0)
 			optname = -optname;
 		i = getsockopt(ipfw_socket, IPPROTO_IP, optname, optval,
 			(socklen_t *)optlen);
 	} else {
 		i = setsockopt(ipfw_socket, IPPROTO_IP, optname, optval, optlen);
 	}
 	return i;
 }
 
 /*
  * do_set3 - pass ipfw control cmd to kernel
  * @optname: option name
  * @optval: pointer to option data
  * @optlen: option length
  *
  * Assumes op3 header is already embedded.
  * Calls setsockopt() with IP_FW3 as kernel-visible opcode.
  * Returns 0 on success or errno otherwise.
  */
 int
 do_set3(int optname, ip_fw3_opheader *op3, size_t optlen)
 {
 
 	if (co.test_only)
 		return (0);
 
 	if (ipfw_socket == -1)
 		ipfw_socket = socket(AF_INET, SOCK_RAW, IPPROTO_RAW);
 	if (ipfw_socket < 0)
 		err(EX_UNAVAILABLE, "socket");
 
 	op3->opcode = optname;
 
 	return (setsockopt(ipfw_socket, IPPROTO_IP, IP_FW3, op3, optlen));
 }
 
 /*
  * do_get3 - pass ipfw control cmd to kernel
  * @optname: option name
  * @optval: pointer to option data
  * @optlen: pointer to option length
  *
  * Assumes op3 header is already embedded.
  * Calls getsockopt() with IP_FW3 as kernel-visible opcode.
  * Returns 0 on success or errno otherwise.
  */
 int
 do_get3(int optname, ip_fw3_opheader *op3, size_t *optlen)
 {
 	int error;
 	socklen_t len;
 
 	if (co.test_only)
 		return (0);
 
 	if (ipfw_socket == -1)
 		ipfw_socket = socket(AF_INET, SOCK_RAW, IPPROTO_RAW);
 	if (ipfw_socket < 0)
 		err(EX_UNAVAILABLE, "socket");
 
 	op3->opcode = optname;
 
 	len = *optlen;
 	error = getsockopt(ipfw_socket, IPPROTO_IP, IP_FW3, op3, &len);
 	*optlen = len;
 
 	return (error);
 }
 
 /**
  * match_token takes a table and a string, returns the value associated
  * with the string (-1 in case of failure).
  */
 int
 match_token(struct _s_x *table, const char *string)
 {
 	struct _s_x *pt;
 	uint i = strlen(string);
 
 	for (pt = table ; i && pt->s != NULL ; pt++)
 		if (strlen(pt->s) == i && !bcmp(string, pt->s, i))
 			return pt->x;
 	return (-1);
 }
 
 /**
  * match_token_relaxed takes a table and a string, returns the value associated
  * with the string for the best match.
  *
  * Returns:
  * value from @table for matched records
  * -1 for non-matched records
  * -2 if more than one records match @string.
  */
 int
 match_token_relaxed(struct _s_x *table, const char *string)
 {
 	struct _s_x *pt, *m;
 	int i, c;
 
 	i = strlen(string);
 	c = 0;
 
 	for (pt = table ; i != 0 && pt->s != NULL ; pt++) {
 		if (strncmp(pt->s, string, i) != 0)
 			continue;
 		m = pt;
 		c++;
 	}
 
 	if (c == 1)
 		return (m->x);
 
 	return (c > 0 ? -2: -1);
 }
 
 int
 get_token(struct _s_x *table, const char *string, const char *errbase)
 {
 	int tcmd;
 
 	if ((tcmd = match_token_relaxed(table, string)) < 0)
 		errx(EX_USAGE, "%s %s %s",
 		    (tcmd == 0) ? "invalid" : "ambiguous", errbase, string);
 
 	return (tcmd);
 }
 
 /**
  * match_value takes a table and a value, returns the string associated
  * with the value (NULL in case of failure).
  */
 char const *
 match_value(struct _s_x *p, int value)
 {
 	for (; p->s != NULL; p++)
 		if (p->x == value)
 			return p->s;
 	return NULL;
 }
 
 size_t
 concat_tokens(char *buf, size_t bufsize, struct _s_x *table, char *delimiter)
 {
 	struct _s_x *pt;
 	int l;
 	size_t sz;
 
 	for (sz = 0, pt = table ; pt->s != NULL; pt++) {
 		l = snprintf(buf + sz, bufsize - sz, "%s%s",
 		    (sz == 0) ? "" : delimiter, pt->s);
 		sz += l;
 		bufsize += l;
 		if (sz > bufsize)
 			return (bufsize);
 	}
 
 	return (sz);
 }
 
 /*
  * helper function to process a set of flags and set bits in the
  * appropriate masks.
  */
 int
 fill_flags(struct _s_x *flags, char *p, char **e, uint32_t *set,
     uint32_t *clear)
 {
 	char *q;	/* points to the separator */
 	int val;
 	uint32_t *which;	/* mask we are working on */
 
 	while (p && *p) {
 		if (*p == '!') {
 			p++;
 			which = clear;
 		} else
 			which = set;
 		q = strchr(p, ',');
 		if (q)
 			*q++ = '\0';
 		val = match_token(flags, p);
 		if (val <= 0) {
 			if (e != NULL)
 				*e = p;
 			return (-1);
 		}
 		*which |= (uint32_t)val;
 		p = q;
 	}
 	return (0);
 }
 
 void
 print_flags_buffer(char *buf, size_t sz, struct _s_x *list, uint32_t set)
 {
 	char const *comma = "";
 	int i, l;
 
 	for (i = 0; list[i].x != 0; i++) {
 		if ((set & list[i].x) == 0)
 			continue;
 		
 		set &= ~list[i].x;
 		l = snprintf(buf, sz, "%s%s", comma, list[i].s);
 		if (l >= sz)
 			return;
 		comma = ",";
 		buf += l;
 		sz -=l;
 	}
 }
 
 /*
  * _substrcmp takes two strings and returns 1 if they do not match,
  * and 0 if they match exactly or the first string is a sub-string
  * of the second.  A warning is printed to stderr in the case that the
  * first string is a sub-string of the second.
  *
  * This function will be removed in the future through the usual
  * deprecation process.
  */
 int
 _substrcmp(const char *str1, const char* str2)
 {
 
 	if (strncmp(str1, str2, strlen(str1)) != 0)
 		return 1;
 
 	if (strlen(str1) != strlen(str2))
 		warnx("DEPRECATED: '%s' matched '%s' as a sub-string",
 		    str1, str2);
 	return 0;
 }
 
 /*
  * _substrcmp2 takes three strings and returns 1 if the first two do not match,
  * and 0 if they match exactly or the second string is a sub-string
  * of the first.  A warning is printed to stderr in the case that the
  * first string does not match the third.
  *
  * This function exists to warn about the bizarre construction
  * strncmp(str, "by", 2) which is used to allow people to use a shortcut
  * for "bytes".  The problem is that in addition to accepting "by",
  * "byt", "byte", and "bytes", it also excepts "by_rabid_dogs" and any
  * other string beginning with "by".
  *
  * This function will be removed in the future through the usual
  * deprecation process.
  */
 int
 _substrcmp2(const char *str1, const char* str2, const char* str3)
 {
 
 	if (strncmp(str1, str2, strlen(str2)) != 0)
 		return 1;
 
 	if (strcmp(str1, str3) != 0)
 		warnx("DEPRECATED: '%s' matched '%s'",
 		    str1, str3);
 	return 0;
 }
 
 /*
  * prints one port, symbolic or numeric
  */
 static void
 print_port(struct buf_pr *bp, int proto, uint16_t port)
 {
 
 	if (proto == IPPROTO_ETHERTYPE) {
 		char const *s;
 
 		if (co.do_resolv && (s = match_value(ether_types, port)) )
 			bprintf(bp, "%s", s);
 		else
 			bprintf(bp, "0x%04x", port);
 	} else {
 		struct servent *se = NULL;
 		if (co.do_resolv) {
 			struct protoent *pe = getprotobynumber(proto);
 
 			se = getservbyport(htons(port), pe ? pe->p_name : NULL);
 		}
 		if (se)
 			bprintf(bp, "%s", se->s_name);
 		else
 			bprintf(bp, "%d", port);
 	}
 }
 
 static struct _s_x _port_name[] = {
 	{"dst-port",	O_IP_DSTPORT},
 	{"src-port",	O_IP_SRCPORT},
 	{"ipid",	O_IPID},
 	{"iplen",	O_IPLEN},
 	{"ipttl",	O_IPTTL},
 	{"mac-type",	O_MAC_TYPE},
 	{"tcpdatalen",	O_TCPDATALEN},
 	{"tcpwin",	O_TCPWIN},
 	{"tagged",	O_TAGGED},
 	{NULL,		0}
 };
 
 /*
  * Print the values in a list 16-bit items of the types above.
  * XXX todo: add support for mask.
  */
 static void
 print_newports(struct buf_pr *bp, ipfw_insn_u16 *cmd, int proto, int opcode)
 {
 	uint16_t *p = cmd->ports;
 	int i;
 	char const *sep;
 
 	if (opcode != 0) {
 		sep = match_value(_port_name, opcode);
 		if (sep == NULL)
 			sep = "???";
 		bprintf(bp, " %s", sep);
 	}
 	sep = " ";
 	for (i = F_LEN((ipfw_insn *)cmd) - 1; i > 0; i--, p += 2) {
 		bprintf(bp, "%s", sep);
 		print_port(bp, proto, p[0]);
 		if (p[0] != p[1]) {
 			bprintf(bp, "-");
 			print_port(bp, proto, p[1]);
 		}
 		sep = ",";
 	}
 }
 
 /*
  * Like strtol, but also translates service names into port numbers
  * for some protocols.
  * In particular:
  *	proto == -1 disables the protocol check;
  *	proto == IPPROTO_ETHERTYPE looks up an internal table
  *	proto == <some value in /etc/protocols> matches the values there.
  * Returns *end == s in case the parameter is not found.
  */
 static int
 strtoport(char *s, char **end, int base, int proto)
 {
 	char *p, *buf;
 	char *s1;
 	int i;
 
 	*end = s;		/* default - not found */
 	if (*s == '\0')
 		return 0;	/* not found */
 
 	if (isdigit(*s))
 		return strtol(s, end, base);
 
 	/*
 	 * find separator. '\\' escapes the next char.
 	 */
 	for (s1 = s; *s1 && (isalnum(*s1) || *s1 == '\\' ||
 	    *s1 == '_' || *s1 == '.') ; s1++)
 		if (*s1 == '\\' && s1[1] != '\0')
 			s1++;
 
 	buf = safe_calloc(s1 - s + 1, 1);
 
 	/*
 	 * copy into a buffer skipping backslashes
 	 */
 	for (p = s, i = 0; p != s1 ; p++)
 		if (*p != '\\')
 			buf[i++] = *p;
 	buf[i++] = '\0';
 
 	if (proto == IPPROTO_ETHERTYPE) {
 		i = match_token(ether_types, buf);
 		free(buf);
 		if (i != -1) {	/* found */
 			*end = s1;
 			return i;
 		}
 	} else {
 		struct protoent *pe = NULL;
 		struct servent *se;
 
 		if (proto != 0)
 			pe = getprotobynumber(proto);
 		setservent(1);
 		se = getservbyname(buf, pe ? pe->p_name : NULL);
 		free(buf);
 		if (se != NULL) {
 			*end = s1;
 			return ntohs(se->s_port);
 		}
 	}
 	return 0;	/* not found */
 }
 
 /*
  * Fill the body of the command with the list of port ranges.
  */
 static int
 fill_newports(ipfw_insn_u16 *cmd, char *av, int proto, int cblen)
 {
 	uint16_t a, b, *p = cmd->ports;
 	int i = 0;
 	char *s = av;
 
 	while (*s) {
 		a = strtoport(av, &s, 0, proto);
 		if (s == av) 			/* empty or invalid argument */
 			return (0);
 
 		CHECK_LENGTH(cblen, i + 2);
 
 		switch (*s) {
 		case '-':			/* a range */
 			av = s + 1;
 			b = strtoport(av, &s, 0, proto);
 			/* Reject expressions like '1-abc' or '1-2-3'. */
 			if (s == av || (*s != ',' && *s != '\0'))
 				return (0);
 			p[0] = a;
 			p[1] = b;
 			break;
 		case ',':			/* comma separated list */
 		case '\0':
 			p[0] = p[1] = a;
 			break;
 		default:
 			warnx("port list: invalid separator <%c> in <%s>",
 				*s, av);
 			return (0);
 		}
 
 		i++;
 		p += 2;
 		av = s + 1;
 	}
 	if (i > 0) {
 		if (i + 1 > F_LEN_MASK)
 			errx(EX_DATAERR, "too many ports/ranges\n");
 		cmd->o.len |= i + 1;	/* leave F_NOT and F_OR untouched */
 	}
 	return (i);
 }
 
 /*
  * Fill the body of the command with the list of DiffServ codepoints.
  */
 static void
 fill_dscp(ipfw_insn *cmd, char *av, int cblen)
 {
 	uint32_t *low, *high;
 	char *s = av, *a;
 	int code;
 
 	cmd->opcode = O_DSCP;
 	cmd->len |= F_INSN_SIZE(ipfw_insn_u32) + 1;
 
 	CHECK_CMDLEN;
 
 	low = (uint32_t *)(cmd + 1);
 	high = low + 1;
 
 	*low = 0;
 	*high = 0;
 
 	while (s != NULL) {
 		a = strchr(s, ',');
 
 		if (a != NULL)
 			*a++ = '\0';
 
 		if (isalpha(*s)) {
 			if ((code = match_token(f_ipdscp, s)) == -1)
 				errx(EX_DATAERR, "Unknown DSCP code");
 		} else {
 			code = strtoul(s, NULL, 10);
 			if (code < 0 || code > 63)
 				errx(EX_DATAERR, "Invalid DSCP value");
 		}
 
 		if (code >= 32)
 			*high |= 1 << (code - 32);
 		else
 			*low |= 1 << code;
 
 		s = a;
 	}
 }
 
 static struct _s_x icmpcodes[] = {
       { "net",			ICMP_UNREACH_NET },
       { "host",			ICMP_UNREACH_HOST },
       { "protocol",		ICMP_UNREACH_PROTOCOL },
       { "port",			ICMP_UNREACH_PORT },
       { "needfrag",		ICMP_UNREACH_NEEDFRAG },
       { "srcfail",		ICMP_UNREACH_SRCFAIL },
       { "net-unknown",		ICMP_UNREACH_NET_UNKNOWN },
       { "host-unknown",		ICMP_UNREACH_HOST_UNKNOWN },
       { "isolated",		ICMP_UNREACH_ISOLATED },
       { "net-prohib",		ICMP_UNREACH_NET_PROHIB },
       { "host-prohib",		ICMP_UNREACH_HOST_PROHIB },
       { "tosnet",		ICMP_UNREACH_TOSNET },
       { "toshost",		ICMP_UNREACH_TOSHOST },
       { "filter-prohib",	ICMP_UNREACH_FILTER_PROHIB },
       { "host-precedence",	ICMP_UNREACH_HOST_PRECEDENCE },
       { "precedence-cutoff",	ICMP_UNREACH_PRECEDENCE_CUTOFF },
       { NULL, 0 }
 };
 
 static void
 fill_reject_code(u_short *codep, char *str)
 {
 	int val;
 	char *s;
 
 	val = strtoul(str, &s, 0);
 	if (s == str || *s != '\0' || val >= 0x100)
 		val = match_token(icmpcodes, str);
 	if (val < 0)
 		errx(EX_DATAERR, "unknown ICMP unreachable code ``%s''", str);
 	*codep = val;
 	return;
 }
 
 static void
 print_reject_code(struct buf_pr *bp, uint16_t code)
 {
 	char const *s;
 
 	if ((s = match_value(icmpcodes, code)) != NULL)
 		bprintf(bp, "unreach %s", s);
 	else
 		bprintf(bp, "unreach %u", code);
 }
 
 /*
  * Returns the number of bits set (from left) in a contiguous bitmask,
  * or -1 if the mask is not contiguous.
  * XXX this needs a proper fix.
  * This effectively works on masks in big-endian (network) format.
  * when compiled on little endian architectures.
  *
  * First bit is bit 7 of the first byte -- note, for MAC addresses,
  * the first bit on the wire is bit 0 of the first byte.
  * len is the max length in bits.
  */
 int
 contigmask(uint8_t *p, int len)
 {
 	int i, n;
 
 	for (i=0; i<len ; i++)
 		if ( (p[i/8] & (1 << (7 - (i%8)))) == 0) /* first bit unset */
 			break;
 	for (n=i+1; n < len; n++)
 		if ( (p[n/8] & (1 << (7 - (n%8)))) != 0)
 			return -1; /* mask not contiguous */
 	return i;
 }
 
 /*
  * print flags set/clear in the two bitmasks passed as parameters.
  * There is a specialized check for f_tcpflags.
  */
 static void
 print_flags(struct buf_pr *bp, char const *name, ipfw_insn *cmd,
     struct _s_x *list)
 {
 	char const *comma = "";
 	int i;
 	uint8_t set = cmd->arg1 & 0xff;
 	uint8_t clear = (cmd->arg1 >> 8) & 0xff;
 
 	if (list == f_tcpflags && set == TH_SYN && clear == TH_ACK) {
 		bprintf(bp, " setup");
 		return;
 	}
 
 	bprintf(bp, " %s ", name);
 	for (i=0; list[i].x != 0; i++) {
 		if (set & list[i].x) {
 			set &= ~list[i].x;
 			bprintf(bp, "%s%s", comma, list[i].s);
 			comma = ",";
 		}
 		if (clear & list[i].x) {
 			clear &= ~list[i].x;
 			bprintf(bp, "%s!%s", comma, list[i].s);
 			comma = ",";
 		}
 	}
 }
 
 
 /*
  * Print the ip address contained in a command.
  */
 static void
 print_ip(struct buf_pr *bp, const struct format_opts *fo, ipfw_insn_ip *cmd)
 {
 	struct hostent *he = NULL;
 	struct in_addr *ia;
 	uint32_t len = F_LEN((ipfw_insn *)cmd);
 	uint32_t *a = ((ipfw_insn_u32 *)cmd)->d;
 	char *t;
 
 	bprintf(bp, " ");
 	if (cmd->o.opcode == O_IP_DST_LOOKUP && len > F_INSN_SIZE(ipfw_insn_u32)) {
 		uint32_t d = a[1];
 		const char *arg = "<invalid>";
 
 		if (d < sizeof(lookup_key)/sizeof(lookup_key[0]))
 			arg = match_value(rule_options, lookup_key[d]);
 		t = table_search_ctlv(fo->tstate, ((ipfw_insn *)cmd)->arg1);
 		bprintf(bp, "lookup %s %s", arg, t);
 		return;
 	}
 	if (cmd->o.opcode == O_IP_SRC_ME || cmd->o.opcode == O_IP_DST_ME) {
 		bprintf(bp, "me");
 		return;
 	}
 	if (cmd->o.opcode == O_IP_SRC_LOOKUP ||
 	    cmd->o.opcode == O_IP_DST_LOOKUP) {
 		t = table_search_ctlv(fo->tstate, ((ipfw_insn *)cmd)->arg1);
 		bprintf(bp, "table(%s", t);
 		if (len == F_INSN_SIZE(ipfw_insn_u32))
 			bprintf(bp, ",%u", *a);
 		bprintf(bp, ")");
 		return;
 	}
 	if (cmd->o.opcode == O_IP_SRC_SET || cmd->o.opcode == O_IP_DST_SET) {
 		uint32_t x, *map = (uint32_t *)&(cmd->mask);
 		int i, j;
 		char comma = '{';
 
 		x = cmd->o.arg1 - 1;
 		x = htonl( ~x );
 		cmd->addr.s_addr = htonl(cmd->addr.s_addr);
 		bprintf(bp, "%s/%d", inet_ntoa(cmd->addr),
 			contigmask((uint8_t *)&x, 32));
 		x = cmd->addr.s_addr = htonl(cmd->addr.s_addr);
 		x &= 0xff; /* base */
 		/*
 		 * Print bits and ranges.
 		 * Locate first bit set (i), then locate first bit unset (j).
 		 * If we have 3+ consecutive bits set, then print them as a
 		 * range, otherwise only print the initial bit and rescan.
 		 */
 		for (i=0; i < cmd->o.arg1; i++)
 			if (map[i/32] & (1<<(i & 31))) {
 				for (j=i+1; j < cmd->o.arg1; j++)
 					if (!(map[ j/32] & (1<<(j & 31))))
 						break;
 				bprintf(bp, "%c%d", comma, i+x);
 				if (j>i+2) { /* range has at least 3 elements */
 					bprintf(bp, "-%d", j-1+x);
 					i = j-1;
 				}
 				comma = ',';
 			}
 		bprintf(bp, "}");
 		return;
 	}
 	/*
 	 * len == 2 indicates a single IP, whereas lists of 1 or more
 	 * addr/mask pairs have len = (2n+1). We convert len to n so we
 	 * use that to count the number of entries.
 	 */
     for (len = len / 2; len > 0; len--, a += 2) {
 	int mb =	/* mask length */
 	    (cmd->o.opcode == O_IP_SRC || cmd->o.opcode == O_IP_DST) ?
 		32 : contigmask((uint8_t *)&(a[1]), 32);
 	if (mb == 32 && co.do_resolv)
 		he = gethostbyaddr((char *)&(a[0]), sizeof(in_addr_t),
 		    AF_INET);
 	if (he != NULL)		/* resolved to name */
 		bprintf(bp, "%s", he->h_name);
 	else if (mb == 0)	/* any */
 		bprintf(bp, "any");
 	else {		/* numeric IP followed by some kind of mask */
 		ia = (struct in_addr *)&a[0];
 		bprintf(bp, "%s", inet_ntoa(*ia));
 		if (mb < 0) {
 			ia = (struct in_addr *)&a[1];
 			bprintf(bp, ":%s", inet_ntoa(*ia));
 		} else if (mb < 32)
 			bprintf(bp, "/%d", mb);
 	}
 	if (len > 1)
 		bprintf(bp, ",");
     }
 }
 
 /*
  * prints a MAC address/mask pair
  */
 static void
 format_mac(struct buf_pr *bp, uint8_t *addr, uint8_t *mask)
 {
 	int l = contigmask(mask, 48);
 
 	if (l == 0)
 		bprintf(bp, " any");
 	else {
 		bprintf(bp, " %02x:%02x:%02x:%02x:%02x:%02x",
 		    addr[0], addr[1], addr[2], addr[3], addr[4], addr[5]);
 		if (l == -1)
 			bprintf(bp, "&%02x:%02x:%02x:%02x:%02x:%02x",
 			    mask[0], mask[1], mask[2],
 			    mask[3], mask[4], mask[5]);
 		else if (l < 48)
 			bprintf(bp, "/%d", l);
 	}
 }
 
 static void
 print_mac(struct buf_pr *bp, ipfw_insn_mac *mac)
 {
 
 	bprintf(bp, " MAC");
 	format_mac(bp, mac->addr, mac->mask);
 	format_mac(bp, mac->addr + 6, mac->mask + 6);
 }
 
 static void
 fill_icmptypes(ipfw_insn_u32 *cmd, char *av)
 {
 	uint8_t type;
 
 	cmd->d[0] = 0;
 	while (*av) {
 		if (*av == ',')
 			av++;
 
 		type = strtoul(av, &av, 0);
 
 		if (*av != ',' && *av != '\0')
 			errx(EX_DATAERR, "invalid ICMP type");
 
 		if (type > 31)
 			errx(EX_DATAERR, "ICMP type out of range");
 
 		cmd->d[0] |= 1 << type;
 	}
 	cmd->o.opcode = O_ICMPTYPE;
 	cmd->o.len |= F_INSN_SIZE(ipfw_insn_u32);
 }
 
 static void
 print_icmptypes(struct buf_pr *bp, ipfw_insn_u32 *cmd)
 {
 	int i;
 	char sep= ' ';
 
 	bprintf(bp, " icmptypes");
 	for (i = 0; i < 32; i++) {
 		if ( (cmd->d[0] & (1 << (i))) == 0)
 			continue;
 		bprintf(bp, "%c%d", sep, i);
 		sep = ',';
 	}
 }
 
 static void
 print_dscp(struct buf_pr *bp, ipfw_insn_u32 *cmd)
 {
 	int i = 0;
 	uint32_t *v;
 	char sep= ' ';
 	const char *code;
 
 	bprintf(bp, " dscp");
 	v = cmd->d;
 	while (i < 64) {
 		if (*v & (1 << i)) {
 			if ((code = match_value(f_ipdscp, i)) != NULL)
 				bprintf(bp, "%c%s", sep, code);
 			else
 				bprintf(bp, "%c%d", sep, i);
 			sep = ',';
 		}
 
 		if ((++i % 32) == 0)
 			v++;
 	}
 }
 
 #define	insntod(cmd, type)	((ipfw_insn_ ## type *)(cmd))
 struct show_state {
 	struct ip_fw_rule	*rule;
 	const ipfw_insn		*eaction;
 	uint8_t			*printed;
 	int			flags;
 #define	HAVE_PROTO		0x0001
 #define	HAVE_SRCIP		0x0002
 #define	HAVE_DSTIP		0x0004
 #define	HAVE_PROBE_STATE	0x0008
 	int			proto;
 	int			or_block;
 };
 
 static int
 init_show_state(struct show_state *state, struct ip_fw_rule *rule)
 {
 
 	state->printed = calloc(rule->cmd_len, sizeof(uint8_t));
 	if (state->printed == NULL)
 		return (ENOMEM);
 	state->rule = rule;
 	state->eaction = NULL;
 	state->flags = 0;
 	state->proto = 0;
 	state->or_block = 0;
 	return (0);
 }
 
 static void
 free_show_state(struct show_state *state)
 {
 
 	free(state->printed);
 }
 
 static uint8_t
 is_printed_opcode(struct show_state *state, const ipfw_insn *cmd)
 {
 
 	return (state->printed[cmd - state->rule->cmd]);
 }
 
 static void
 mark_printed(struct show_state *state, const ipfw_insn *cmd)
 {
 
 	state->printed[cmd - state->rule->cmd] = 1;
 }
 
 static void
 print_limit_mask(struct buf_pr *bp, const ipfw_insn_limit *limit)
 {
 	struct _s_x *p = limit_masks;
 	char const *comma = " ";
 	uint8_t x;
 
 	for (x = limit->limit_mask; p->x != 0; p++) {
 		if ((x & p->x) == p->x) {
 			x &= ~p->x;
 			bprintf(bp, "%s%s", comma, p->s);
 			comma = ",";
 		}
 	}
 	bprint_uint_arg(bp, " ", limit->conn_limit);
 }
 
 static int
 print_instruction(struct buf_pr *bp, const struct format_opts *fo,
     struct show_state *state, ipfw_insn *cmd)
 {
 	struct protoent *pe;
 	struct passwd *pwd;
 	struct group *grp;
 	const char *s;
 	double d;
 
 	if (is_printed_opcode(state, cmd))
 		return (0);
 	if ((cmd->len & F_OR) != 0 && state->or_block == 0)
 		bprintf(bp, " {");
 	if (cmd->opcode != O_IN && (cmd->len & F_NOT) != 0)
 		bprintf(bp, " not");
 
 	switch (cmd->opcode) {
 	case O_PROB:
 		d = 1.0 * insntod(cmd, u32)->d[0] / 0x7fffffff;
 		bprintf(bp, "prob %f ", d);
 		break;
 	case O_PROBE_STATE: /* no need to print anything here */
 		state->flags |= HAVE_PROBE_STATE;
 		break;
 	case O_IP_SRC:
 	case O_IP_SRC_LOOKUP:
 	case O_IP_SRC_MASK:
 	case O_IP_SRC_ME:
 	case O_IP_SRC_SET:
 		if (state->flags & HAVE_SRCIP)
 			bprintf(bp, " src-ip");
 		print_ip(bp, fo, insntod(cmd, ip));
 		break;
 	case O_IP_DST:
 	case O_IP_DST_LOOKUP:
 	case O_IP_DST_MASK:
 	case O_IP_DST_ME:
 	case O_IP_DST_SET:
 		if (state->flags & HAVE_DSTIP)
 			bprintf(bp, " dst-ip");
 		print_ip(bp, fo, insntod(cmd, ip));
 		break;
 	case O_IP6_SRC:
 	case O_IP6_SRC_MASK:
 	case O_IP6_SRC_ME:
 		if (state->flags & HAVE_SRCIP)
 			bprintf(bp, " src-ip6");
 		print_ip6(bp, insntod(cmd, ip6));
 		break;
 	case O_IP6_DST:
 	case O_IP6_DST_MASK:
 	case O_IP6_DST_ME:
 		if (state->flags & HAVE_DSTIP)
 			bprintf(bp, " dst-ip6");
 		print_ip6(bp, insntod(cmd, ip6));
 		break;
 	case O_FLOW6ID:
 		print_flow6id(bp, insntod(cmd, u32));
 		break;
 	case O_IP_DSTPORT:
 	case O_IP_SRCPORT:
 		print_newports(bp, insntod(cmd, u16), state->proto,
 		    (state->flags & (HAVE_SRCIP | HAVE_DSTIP)) ==
 		    (HAVE_SRCIP | HAVE_DSTIP) ?  cmd->opcode: 0);
 		break;
 	case O_PROTO:
 		pe = getprotobynumber(cmd->arg1);
 		if (state->flags & HAVE_PROTO)
 			bprintf(bp, " proto");
 		if (pe != NULL)
 			bprintf(bp, " %s", pe->p_name);
 		else
 			bprintf(bp, " %u", cmd->arg1);
 		state->proto = cmd->arg1;
 		break;
 	case O_MACADDR2:
 		print_mac(bp, insntod(cmd, mac));
 		break;
 	case O_MAC_TYPE:
 		print_newports(bp, insntod(cmd, u16),
 		    IPPROTO_ETHERTYPE, cmd->opcode);
 		break;
 	case O_FRAG:
 		bprintf(bp, " frag");
 		break;
 	case O_FIB:
 		bprintf(bp, " fib %u", cmd->arg1);
 		break;
 	case O_SOCKARG:
 		bprintf(bp, " sockarg");
 		break;
 	case O_IN:
 		bprintf(bp, cmd->len & F_NOT ? " out" : " in");
 		break;
 	case O_DIVERTED:
 		switch (cmd->arg1) {
 		case 3:
 			bprintf(bp, " diverted");
 			break;
 		case 2:
 			bprintf(bp, " diverted-output");
 			break;
 		case 1:
 			bprintf(bp, " diverted-loopback");
 			break;
 		default:
 			bprintf(bp, " diverted-?<%u>", cmd->arg1);
 			break;
 		}
 		break;
 	case O_LAYER2:
 		bprintf(bp, " layer2");
 		break;
 	case O_XMIT:
 	case O_RECV:
 	case O_VIA:
 		if (cmd->opcode == O_XMIT)
 			s = "xmit";
 		else if (cmd->opcode == O_RECV)
 			s = "recv";
 		else /* if (cmd->opcode == O_VIA) */
 			s = "via";
 		switch (insntod(cmd, if)->name[0]) {
 		case '\0':
 			bprintf(bp, " %s %s", s,
 			    inet_ntoa(insntod(cmd, if)->p.ip));
 			break;
 		case '\1':
 			bprintf(bp, " %s table(%s)", s,
 			    table_search_ctlv(fo->tstate,
 			    insntod(cmd, if)->p.kidx));
 			break;
 		default:
 			bprintf(bp, " %s %s", s,
 			    insntod(cmd, if)->name);
 		}
 		break;
 	case O_IP_FLOW_LOOKUP:
 		s = table_search_ctlv(fo->tstate, cmd->arg1);
 		bprintf(bp, " flow table(%s", s);
 		if (F_LEN(cmd) == F_INSN_SIZE(ipfw_insn_u32))
 			bprintf(bp, ",%u", insntod(cmd, u32)->d[0]);
 		bprintf(bp, ")");
 		break;
 	case O_IPID:
 	case O_IPTTL:
 	case O_IPLEN:
 	case O_TCPDATALEN:
 	case O_TCPWIN:
 		if (F_LEN(cmd) == 1) {
 			switch (cmd->opcode) {
 			case O_IPID:
 				s = "ipid";
 				break;
 			case O_IPTTL:
 				s = "ipttl";
 				break;
 			case O_IPLEN:
 				s = "iplen";
 				break;
 			case O_TCPDATALEN:
 				s = "tcpdatalen";
 				break;
 			case O_TCPWIN:
 				s = "tcpwin";
 				break;
 			}
 			bprintf(bp, " %s %u", s, cmd->arg1);
 		} else
 			print_newports(bp, insntod(cmd, u16), 0,
 			    cmd->opcode);
 		break;
 	case O_IPVER:
 		bprintf(bp, " ipver %u", cmd->arg1);
 		break;
 	case O_IPPRECEDENCE:
 		bprintf(bp, " ipprecedence %u", cmd->arg1 >> 5);
 		break;
 	case O_DSCP:
 		print_dscp(bp, insntod(cmd, u32));
 		break;
 	case O_IPOPT:
 		print_flags(bp, "ipoptions", cmd, f_ipopts);
 		break;
 	case O_IPTOS:
 		print_flags(bp, "iptos", cmd, f_iptos);
 		break;
 	case O_ICMPTYPE:
 		print_icmptypes(bp, insntod(cmd, u32));
 		break;
 	case O_ESTAB:
 		bprintf(bp, " established");
 		break;
 	case O_TCPFLAGS:
 		print_flags(bp, "tcpflags", cmd, f_tcpflags);
 		break;
 	case O_TCPOPTS:
 		print_flags(bp, "tcpoptions", cmd, f_tcpopts);
 		break;
 	case O_TCPACK:
 		bprintf(bp, " tcpack %d",
 		    ntohl(insntod(cmd, u32)->d[0]));
 		break;
 	case O_TCPSEQ:
 		bprintf(bp, " tcpseq %d",
 		    ntohl(insntod(cmd, u32)->d[0]));
 		break;
 	case O_UID:
 		pwd = getpwuid(insntod(cmd, u32)->d[0]);
 		if (pwd != NULL)
 			bprintf(bp, " uid %s", pwd->pw_name);
 		else
 			bprintf(bp, " uid %u",
 			    insntod(cmd, u32)->d[0]);
 		break;
 	case O_GID:
 		grp = getgrgid(insntod(cmd, u32)->d[0]);
 		if (grp != NULL)
 			bprintf(bp, " gid %s", grp->gr_name);
 		else
 			bprintf(bp, " gid %u",
 			    insntod(cmd, u32)->d[0]);
 		break;
 	case O_JAIL:
 		bprintf(bp, " jail %d", insntod(cmd, u32)->d[0]);
 		break;
 	case O_VERREVPATH:
 		bprintf(bp, " verrevpath");
 		break;
 	case O_VERSRCREACH:
 		bprintf(bp, " versrcreach");
 		break;
 	case O_ANTISPOOF:
 		bprintf(bp, " antispoof");
 		break;
 	case O_IPSEC:
 		bprintf(bp, " ipsec");
 		break;
 	case O_NOP:
 		bprintf(bp, " // %s", (char *)(cmd + 1));
 		break;
 	case O_KEEP_STATE:
 		if (state->flags & HAVE_PROBE_STATE)
 			bprintf(bp, " keep-state");
 		else
 			bprintf(bp, " record-state");
 		bprintf(bp, " :%s",
 		    object_search_ctlv(fo->tstate, cmd->arg1,
 		    IPFW_TLV_STATE_NAME));
 		break;
 	case O_LIMIT:
 		if (state->flags & HAVE_PROBE_STATE)
 			bprintf(bp, " limit");
 		else
 			bprintf(bp, " set-limit");
 		print_limit_mask(bp, insntod(cmd, limit));
 		bprintf(bp, " :%s",
 		    object_search_ctlv(fo->tstate, cmd->arg1,
 		    IPFW_TLV_STATE_NAME));
 		break;
 	case O_IP6:
+		if (state->flags & HAVE_PROTO)
+			bprintf(bp, " proto");
 		bprintf(bp, " ip6");
 		break;
 	case O_IP4:
+		if (state->flags & HAVE_PROTO)
+			bprintf(bp, " proto");
 		bprintf(bp, " ip4");
 		break;
 	case O_ICMP6TYPE:
 		print_icmp6types(bp, insntod(cmd, u32));
 		break;
 	case O_EXT_HDR:
 		print_ext6hdr(bp, cmd);
 		break;
 	case O_TAGGED:
 		if (F_LEN(cmd) == 1)
 			bprint_uint_arg(bp, " tagged ", cmd->arg1);
 		else
 			print_newports(bp, insntod(cmd, u16),
 				    0, O_TAGGED);
 		break;
 	case O_SKIP_ACTION:
 		bprintf(bp, " defer-immediate-action");
 		break;
 	default:
 		bprintf(bp, " [opcode %d len %d]", cmd->opcode,
 		    cmd->len);
 	}
 	if (cmd->len & F_OR) {
 		bprintf(bp, " or");
 		state->or_block = 1;
 	} else if (state->or_block != 0) {
 		bprintf(bp, " }");
 		state->or_block = 0;
 	}
 	mark_printed(state, cmd);
 
 	return (1);
 }
 
 static ipfw_insn *
 print_opcode(struct buf_pr *bp, struct format_opts *fo,
     struct show_state *state, int opcode)
 {
 	ipfw_insn *cmd;
 	int l;
 
 	for (l = state->rule->act_ofs, cmd = state->rule->cmd;
 	    l > 0; l -= F_LEN(cmd), cmd += F_LEN(cmd)) {
 		/* We use zero opcode to print the rest of options */
 		if (opcode >= 0 && cmd->opcode != opcode)
 			continue;
 		/*
 		 * Skip O_NOP, when we printing the rest
 		 * of options, it will be handled separately.
 		 */
 		if (cmd->opcode == O_NOP && opcode != O_NOP)
 			continue;
 		if (!print_instruction(bp, fo, state, cmd))
 			continue;
 		return (cmd);
 	}
 	return (NULL);
 }
 
 static void
 print_fwd(struct buf_pr *bp, const ipfw_insn *cmd)
 {
 	char buf[INET6_ADDRSTRLEN + IF_NAMESIZE + 2];
 	ipfw_insn_sa6 *sa6;
 	ipfw_insn_sa *sa;
 	uint16_t port;
 
 	if (cmd->opcode == O_FORWARD_IP) {
 		sa = insntod(cmd, sa);
 		port = sa->sa.sin_port;
 		if (sa->sa.sin_addr.s_addr == INADDR_ANY)
 			bprintf(bp, "fwd tablearg");
 		else
 			bprintf(bp, "fwd %s", inet_ntoa(sa->sa.sin_addr));
 	} else {
 		sa6 = insntod(cmd, sa6);
 		port = sa6->sa.sin6_port;
 		bprintf(bp, "fwd ");
 		if (getnameinfo((const struct sockaddr *)&sa6->sa,
 		    sizeof(struct sockaddr_in6), buf, sizeof(buf), NULL, 0,
 		    NI_NUMERICHOST) == 0)
 			bprintf(bp, "%s", buf);
 	}
 	if (port != 0)
 		bprintf(bp, ",%u", port);
 }
 
 static int
 print_action_instruction(struct buf_pr *bp, const struct format_opts *fo,
     struct show_state *state, const ipfw_insn *cmd)
 {
 	const char *s;
 
 	if (is_printed_opcode(state, cmd))
 		return (0);
 	switch (cmd->opcode) {
 	case O_CHECK_STATE:
 		bprintf(bp, "check-state");
 		if (cmd->arg1 != 0)
 			s = object_search_ctlv(fo->tstate, cmd->arg1,
 			    IPFW_TLV_STATE_NAME);
 		else
 			s = NULL;
 		bprintf(bp, " :%s", s ? s: "any");
 		break;
 	case O_ACCEPT:
 		bprintf(bp, "allow");
 		break;
 	case O_COUNT:
 		bprintf(bp, "count");
 		break;
 	case O_DENY:
 		bprintf(bp, "deny");
 		break;
 	case O_REJECT:
 		if (cmd->arg1 == ICMP_REJECT_RST)
 			bprintf(bp, "reset");
 		else if (cmd->arg1 == ICMP_REJECT_ABORT)
 			bprintf(bp, "abort");
 		else if (cmd->arg1 == ICMP_UNREACH_HOST)
 			bprintf(bp, "reject");
 		else
 			print_reject_code(bp, cmd->arg1);
 		break;
 	case O_UNREACH6:
 		if (cmd->arg1 == ICMP6_UNREACH_RST)
 			bprintf(bp, "reset6");
 		else if (cmd->arg1 == ICMP6_UNREACH_ABORT)
 			bprintf(bp, "abort6");
 		else
 			print_unreach6_code(bp, cmd->arg1);
 		break;
 	case O_SKIPTO:
 		bprint_uint_arg(bp, "skipto ", cmd->arg1);
 		break;
 	case O_PIPE:
 		bprint_uint_arg(bp, "pipe ", cmd->arg1);
 		break;
 	case O_QUEUE:
 		bprint_uint_arg(bp, "queue ", cmd->arg1);
 		break;
 	case O_DIVERT:
 		bprint_uint_arg(bp, "divert ", cmd->arg1);
 		break;
 	case O_TEE:
 		bprint_uint_arg(bp, "tee ", cmd->arg1);
 		break;
 	case O_NETGRAPH:
 		bprint_uint_arg(bp, "netgraph ", cmd->arg1);
 		break;
 	case O_NGTEE:
 		bprint_uint_arg(bp, "ngtee ", cmd->arg1);
 		break;
 	case O_FORWARD_IP:
 	case O_FORWARD_IP6:
 		print_fwd(bp, cmd);
 		break;
 	case O_LOG:
 		if (insntod(cmd, log)->max_log > 0)
 			bprintf(bp, " log logamount %d",
 			    insntod(cmd, log)->max_log);
 		else
 			bprintf(bp, " log");
 		break;
 	case O_ALTQ:
 #ifndef NO_ALTQ
 		print_altq_cmd(bp, insntod(cmd, altq));
 #endif
 		break;
 	case O_TAG:
 		bprint_uint_arg(bp, cmd->len & F_NOT ? " untag ":
 		    " tag ", cmd->arg1);
 		break;
 	case O_NAT:
 		if (cmd->arg1 != IP_FW_NAT44_GLOBAL)
 			bprint_uint_arg(bp, "nat ", cmd->arg1);
 		else
 			bprintf(bp, "nat global");
 		break;
 	case O_SETFIB:
 		if (cmd->arg1 == IP_FW_TARG)
 			bprint_uint_arg(bp, "setfib ", cmd->arg1);
 		else
 			bprintf(bp, "setfib %u", cmd->arg1 & 0x7FFF);
 		break;
 	case O_EXTERNAL_ACTION:
 		/*
 		 * The external action can consists of two following
 		 * each other opcodes - O_EXTERNAL_ACTION and
 		 * O_EXTERNAL_INSTANCE. The first contains the ID of
 		 * name of external action. The second contains the ID
 		 * of name of external action instance.
 		 * NOTE: in case when external action has no named
 		 * instances support, the second opcode isn't needed.
 		 */
 		state->eaction = cmd;
 		s = object_search_ctlv(fo->tstate, cmd->arg1,
 		    IPFW_TLV_EACTION);
 		if (match_token(rule_eactions, s) != -1)
 			bprintf(bp, "%s", s);
 		else
 			bprintf(bp, "eaction %s", s);
 		break;
 	case O_EXTERNAL_INSTANCE:
 		if (state->eaction == NULL)
 			break;
 		/*
 		 * XXX: we need to teach ipfw(9) to rewrite opcodes
 		 * in the user buffer on rule addition. When we add
 		 * the rule, we specify zero TLV type for
 		 * O_EXTERNAL_INSTANCE object. To show correct
 		 * rule after `ipfw add` we need to search instance
 		 * name with zero type. But when we do `ipfw show`
 		 * we calculate TLV type using IPFW_TLV_EACTION_NAME()
 		 * macro.
 		 */
 		s = object_search_ctlv(fo->tstate, cmd->arg1, 0);
 		if (s == NULL)
 			s = object_search_ctlv(fo->tstate,
 			    cmd->arg1, IPFW_TLV_EACTION_NAME(
 			    state->eaction->arg1));
 		bprintf(bp, " %s", s);
 		break;
 	case O_EXTERNAL_DATA:
 		if (state->eaction == NULL)
 			break;
 		/*
 		 * Currently we support data formatting only for
 		 * external data with datalen u16. For unknown data
 		 * print its size in bytes.
 		 */
 		if (cmd->len == F_INSN_SIZE(ipfw_insn))
 			bprintf(bp, " %u", cmd->arg1);
 		else
 			bprintf(bp, " %ubytes",
 			    cmd->len * sizeof(uint32_t));
 		break;
 	case O_SETDSCP:
 		if (cmd->arg1 == IP_FW_TARG) {
 			bprintf(bp, "setdscp tablearg");
 			break;
 		}
 		s = match_value(f_ipdscp, cmd->arg1 & 0x3F);
 		if (s != NULL)
 			bprintf(bp, "setdscp %s", s);
 		else
 			bprintf(bp, "setdscp %u", cmd->arg1 & 0x3F);
 		break;
 	case O_REASS:
 		bprintf(bp, "reass");
 		break;
 	case O_CALLRETURN:
 		if (cmd->len & F_NOT)
 			bprintf(bp, "return");
 		else
 			bprint_uint_arg(bp, "call ", cmd->arg1);
 		break;
 	default:
 		bprintf(bp, "** unrecognized action %d len %d ",
 			cmd->opcode, cmd->len);
 	}
 	mark_printed(state, cmd);
 
 	return (1);
 }
 
 
 static ipfw_insn *
 print_action(struct buf_pr *bp, struct format_opts *fo,
     struct show_state *state, uint8_t opcode)
 {
 	ipfw_insn *cmd;
 	int l;
 
 	for (l = state->rule->cmd_len - state->rule->act_ofs,
 	    cmd = ACTION_PTR(state->rule); l > 0;
 	    l -= F_LEN(cmd), cmd += F_LEN(cmd)) {
 		if (cmd->opcode != opcode)
 			continue;
 		if (!print_action_instruction(bp, fo, state, cmd))
 			continue;
 		return (cmd);
 	}
 	return (NULL);
 }
 
 static void
 print_proto(struct buf_pr *bp, struct format_opts *fo,
     struct show_state *state)
 {
 	ipfw_insn *cmd;
 	int l, proto, ip4, ip6;
 
 	/* Count all O_PROTO, O_IP4, O_IP6 instructions. */
 	proto = ip4 = ip6 = 0;
 	for (l = state->rule->act_ofs, cmd = state->rule->cmd;
 	    l > 0; l -= F_LEN(cmd), cmd += F_LEN(cmd)) {
 		switch (cmd->opcode) {
 		case O_PROTO:
 			proto++;
 			break;
 		case O_IP4:
 			ip4 = 1;
 			if (cmd->len & F_OR)
 				ip4++;
 			break;
 		case O_IP6:
 			ip6 = 1;
 			if (cmd->len & F_OR)
 				ip6++;
 			break;
 		default:
 			continue;
 		}
 	}
 	if (proto == 0 && ip4 == 0 && ip6 == 0) {
 		state->proto = IPPROTO_IP;
 		state->flags |= HAVE_PROTO;
 		bprintf(bp, " ip");
 		return;
 	}
 	/* To handle the case { ip4 or ip6 }, print opcode with F_OR first */
 	cmd = NULL;
 	if (ip4 || ip6)
 		cmd = print_opcode(bp, fo, state, ip4 > ip6 ? O_IP4: O_IP6);
 	if (cmd != NULL && (cmd->len & F_OR))
 		cmd = print_opcode(bp, fo, state, ip4 > ip6 ? O_IP6: O_IP4);
 	if (cmd == NULL || (cmd->len & F_OR))
 		for (l = proto; l > 0; l--) {
 			cmd = print_opcode(bp, fo, state, O_PROTO);
 			if (cmd == NULL || (cmd->len & F_OR) == 0)
 				break;
 		}
 	/* Initialize proto, it is used by print_newports() */
 	state->flags |= HAVE_PROTO;
 	if (state->proto == 0 && ip6 != 0)
 		state->proto = IPPROTO_IPV6;
 }
 
 static int
 match_opcode(int opcode, const int opcodes[], size_t nops)
 {
 	int i;
 
 	for (i = 0; i < nops; i++)
 		if (opcode == opcodes[i])
 			return (1);
 	return (0);
 }
 
 static void
 print_address(struct buf_pr *bp, struct format_opts *fo,
     struct show_state *state, const int opcodes[], size_t nops, int portop,
     int flag)
 {
 	ipfw_insn *cmd;
 	int count, l, portcnt, pf;
 
 	count = portcnt = 0;
 	for (l = state->rule->act_ofs, cmd = state->rule->cmd;
 	    l > 0; l -= F_LEN(cmd), cmd += F_LEN(cmd)) {
 		if (match_opcode(cmd->opcode, opcodes, nops))
 			count++;
 		else if (cmd->opcode == portop)
 			portcnt++;
 	}
 	if (count == 0)
 		bprintf(bp, " any");
 	for (l = state->rule->act_ofs, cmd = state->rule->cmd;
 	    l > 0 && count > 0; l -= F_LEN(cmd), cmd += F_LEN(cmd)) {
 		if (!match_opcode(cmd->opcode, opcodes, nops))
 			continue;
 		print_instruction(bp, fo, state, cmd);
 		if ((cmd->len & F_OR) == 0)
 			break;
 		count--;
 	}
 	/*
 	 * If several O_IP_?PORT opcodes specified, leave them to the
 	 * options section.
 	 */
 	if (portcnt == 1) {
 		for (l = state->rule->act_ofs, cmd = state->rule->cmd, pf = 0;
 		    l > 0; l -= F_LEN(cmd), cmd += F_LEN(cmd)) {
 			if (cmd->opcode != portop) {
 				pf = (cmd->len & F_OR);
 				continue;
 			}
 			/* Print opcode iff it is not in OR block. */
 			if (pf == 0 && (cmd->len & F_OR) == 0)
 				print_instruction(bp, fo, state, cmd);
 			break;
 		}
 	}
 	state->flags |= flag;
 }
 
 static const int action_opcodes[] = {
 	O_CHECK_STATE, O_ACCEPT, O_COUNT, O_DENY, O_REJECT,
 	O_UNREACH6, O_SKIPTO, O_PIPE, O_QUEUE, O_DIVERT, O_TEE,
 	O_NETGRAPH, O_NGTEE, O_FORWARD_IP, O_FORWARD_IP6, O_NAT,
 	O_SETFIB, O_SETDSCP, O_REASS, O_CALLRETURN,
 	/* keep the following opcodes at the end of the list */
 	O_EXTERNAL_ACTION, O_EXTERNAL_INSTANCE, O_EXTERNAL_DATA
 };
 
 static const int modifier_opcodes[] = {
 	O_LOG, O_ALTQ, O_TAG
 };
 
 static const int src_opcodes[] = {
 	O_IP_SRC, O_IP_SRC_LOOKUP, O_IP_SRC_MASK, O_IP_SRC_ME,
 	O_IP_SRC_SET, O_IP6_SRC, O_IP6_SRC_MASK, O_IP6_SRC_ME
 };
 
 static const int dst_opcodes[] = {
 	O_IP_DST, O_IP_DST_LOOKUP, O_IP_DST_MASK, O_IP_DST_ME,
 	O_IP_DST_SET, O_IP6_DST, O_IP6_DST_MASK, O_IP6_DST_ME
 };
 
 static void
 show_static_rule(struct cmdline_opts *co, struct format_opts *fo,
     struct buf_pr *bp, struct ip_fw_rule *rule, struct ip_fw_bcounter *cntr)
 {
 	struct show_state state;
 	ipfw_insn *cmd;
 	static int twidth = 0;
 	int i;
 
 	/* Print # DISABLED or skip the rule */
 	if ((fo->set_mask & (1 << rule->set)) == 0) {
 		/* disabled mask */
 		if (!co->show_sets)
 			return;
 		else
 			bprintf(bp, "# DISABLED ");
 	}
 	if (init_show_state(&state, rule) != 0) {
 		warn("init_show_state() failed");
 		return;
 	}
 	bprintf(bp, "%05u ", rule->rulenum);
 
 	/* Print counters if enabled */
 	if (fo->pcwidth > 0 || fo->bcwidth > 0) {
 		pr_u64(bp, &cntr->pcnt, fo->pcwidth);
 		pr_u64(bp, &cntr->bcnt, fo->bcwidth);
 	}
 
 	/* Print timestamp */
 	if (co->do_time == TIMESTAMP_NUMERIC)
 		bprintf(bp, "%10u ", cntr->timestamp);
 	else if (co->do_time == TIMESTAMP_STRING) {
 		char timestr[30];
 		time_t t = (time_t)0;
 
 		if (twidth == 0) {
 			strcpy(timestr, ctime(&t));
 			*strchr(timestr, '\n') = '\0';
 			twidth = strlen(timestr);
 		}
 		if (cntr->timestamp > 0) {
 			t = _long_to_time(cntr->timestamp);
 
 			strcpy(timestr, ctime(&t));
 			*strchr(timestr, '\n') = '\0';
 			bprintf(bp, "%s ", timestr);
 		} else {
 			bprintf(bp, "%*s", twidth, " ");
 		}
 	}
 
 	/* Print set number */
 	if (co->show_sets)
 		bprintf(bp, "set %d ", rule->set);
 
 	/* Print the optional "match probability" */
 	cmd = print_opcode(bp, fo, &state, O_PROB);
 	/* Print rule action */
 	for (i = 0; i < nitems(action_opcodes); i++) {
 		cmd = print_action(bp, fo, &state, action_opcodes[i]);
 		if (cmd == NULL)
 			continue;
 		/* Handle special cases */
 		switch (cmd->opcode) {
 		case O_CHECK_STATE:
 			goto end;
 		case O_EXTERNAL_ACTION:
 		case O_EXTERNAL_INSTANCE:
 			/* External action can have several instructions */
 			continue;
 		}
 		break;
 	}
 	/* Print rule modifiers */
 	for (i = 0; i < nitems(modifier_opcodes); i++)
 		print_action(bp, fo, &state, modifier_opcodes[i]);
 	/*
 	 * Print rule body
 	 */
 	if (co->comment_only != 0)
 		goto end;
 
 	if (rule->flags & IPFW_RULE_JUSTOPTS) {
 		state.flags |= HAVE_PROTO | HAVE_SRCIP | HAVE_DSTIP;
 		goto justopts;
 	}
 
 	print_proto(bp, fo, &state);
 
 	/* Print source */
 	bprintf(bp, " from");
 	print_address(bp, fo, &state, src_opcodes, nitems(src_opcodes),
 	    O_IP_SRCPORT, HAVE_SRCIP);
 
 	/* Print destination */
 	bprintf(bp, " to");
 	print_address(bp, fo, &state, dst_opcodes, nitems(dst_opcodes),
 	    O_IP_DSTPORT, HAVE_DSTIP);
 
 justopts:
 	/* Print the rest of options */
 	while (print_opcode(bp, fo, &state, -1))
 		;
 end:
 	/* Print comment at the end */
 	cmd = print_opcode(bp, fo, &state, O_NOP);
 	if (co->comment_only != 0 && cmd == NULL)
 		bprintf(bp, " // ...");
 	bprintf(bp, "\n");
 	free_show_state(&state);
 }
 
 static void
 show_dyn_state(struct cmdline_opts *co, struct format_opts *fo,
     struct buf_pr *bp, ipfw_dyn_rule *d)
 {
 	struct protoent *pe;
 	struct in_addr a;
 	uint16_t rulenum;
 	char buf[INET6_ADDRSTRLEN];
 
 	if (d->expire == 0 && d->dyn_type != O_LIMIT_PARENT)
 		return;
 
 	bcopy(&d->rule, &rulenum, sizeof(rulenum));
 	bprintf(bp, "%05d", rulenum);
 	if (fo->pcwidth > 0 || fo->bcwidth > 0) {
 		bprintf(bp, " ");
 		pr_u64(bp, &d->pcnt, fo->pcwidth);
 		pr_u64(bp, &d->bcnt, fo->bcwidth);
 		bprintf(bp, "(%ds)", d->expire);
 	}
 	switch (d->dyn_type) {
 	case O_LIMIT_PARENT:
 		bprintf(bp, " PARENT %d", d->count);
 		break;
 	case O_LIMIT:
 		bprintf(bp, " LIMIT");
 		break;
 	case O_KEEP_STATE: /* bidir, no mask */
 		bprintf(bp, " STATE");
 		break;
 	}
 
 	if ((pe = getprotobynumber(d->id.proto)) != NULL)
 		bprintf(bp, " %s", pe->p_name);
 	else
 		bprintf(bp, " proto %u", d->id.proto);
 
 	if (d->id.addr_type == 4) {
 		a.s_addr = htonl(d->id.src_ip);
 		bprintf(bp, " %s %d", inet_ntoa(a), d->id.src_port);
 
 		a.s_addr = htonl(d->id.dst_ip);
 		bprintf(bp, " <-> %s %d", inet_ntoa(a), d->id.dst_port);
 	} else if (d->id.addr_type == 6) {
 		bprintf(bp, " %s %d", inet_ntop(AF_INET6, &d->id.src_ip6, buf,
 		    sizeof(buf)), d->id.src_port);
 		bprintf(bp, " <-> %s %d", inet_ntop(AF_INET6, &d->id.dst_ip6,
 		    buf, sizeof(buf)), d->id.dst_port);
 	} else
 		bprintf(bp, " UNKNOWN <-> UNKNOWN");
 	if (d->kidx != 0)
 		bprintf(bp, " :%s", object_search_ctlv(fo->tstate,
 		    d->kidx, IPFW_TLV_STATE_NAME));
 
 #define	BOTH_SYN	(TH_SYN | (TH_SYN << 8))
 #define	BOTH_FIN	(TH_FIN | (TH_FIN << 8))
 	if (co->verbose) {
 		bprintf(bp, " state 0x%08x%s", d->state,
 		    d->state ? " ": ",");
 		if (d->state & IPFW_DYN_ORPHANED)
 			bprintf(bp, "ORPHANED,");
 		if ((d->state & BOTH_SYN) == BOTH_SYN)
 			bprintf(bp, "BOTH_SYN,");
 		else {
 			if (d->state & TH_SYN)
 				bprintf(bp, "F_SYN,");
 			if (d->state & (TH_SYN << 8))
 				bprintf(bp, "R_SYN,");
 		}
 		if ((d->state & BOTH_FIN) == BOTH_FIN)
 			bprintf(bp, "BOTH_FIN,");
 		else {
 			if (d->state & TH_FIN)
 				bprintf(bp, "F_FIN,");
 			if (d->state & (TH_FIN << 8))
 				bprintf(bp, "R_FIN,");
 		}
 		bprintf(bp, " f_ack 0x%x, r_ack 0x%x", d->ack_fwd,
 		    d->ack_rev);
 	}
 }
 
 static int
 do_range_cmd(int cmd, ipfw_range_tlv *rt)
 {
 	ipfw_range_header rh;
 	size_t sz;
 
 	memset(&rh, 0, sizeof(rh));
 	memcpy(&rh.range, rt, sizeof(*rt));
 	rh.range.head.length = sizeof(*rt);
 	rh.range.head.type = IPFW_TLV_RANGE;
 	sz = sizeof(rh);
 
 	if (do_get3(cmd, &rh.opheader, &sz) != 0)
 		return (-1);
 	/* Save number of matched objects */
 	rt->new_set = rh.range.new_set;
 	return (0);
 }
 
 /*
  * This one handles all set-related commands
  * 	ipfw set { show | enable | disable }
  * 	ipfw set swap X Y
  * 	ipfw set move X to Y
  * 	ipfw set move rule X to Y
  */
 void
 ipfw_sets_handler(char *av[])
 {
 	ipfw_range_tlv rt;
 	char *msg;
 	size_t size;
 	uint32_t masks[2];
 	int i;
 	uint16_t rulenum;
 	uint8_t cmd;
 
 	av++;
 	memset(&rt, 0, sizeof(rt));
 
 	if (av[0] == NULL)
 		errx(EX_USAGE, "set needs command");
 	if (_substrcmp(*av, "show") == 0) {
 		struct format_opts fo;
 		ipfw_cfg_lheader *cfg;
 
 		memset(&fo, 0, sizeof(fo));
 		if (ipfw_get_config(&co, &fo, &cfg, &size) != 0)
 			err(EX_OSERR, "requesting config failed");
 
 		for (i = 0, msg = "disable"; i < RESVD_SET; i++)
 			if ((cfg->set_mask & (1<<i)) == 0) {
 				printf("%s %d", msg, i);
 				msg = "";
 			}
 		msg = (cfg->set_mask != (uint32_t)-1) ? " enable" : "enable";
 		for (i = 0; i < RESVD_SET; i++)
 			if ((cfg->set_mask & (1<<i)) != 0) {
 				printf("%s %d", msg, i);
 				msg = "";
 			}
 		printf("\n");
 		free(cfg);
 	} else if (_substrcmp(*av, "swap") == 0) {
 		av++;
 		if ( av[0] == NULL || av[1] == NULL )
 			errx(EX_USAGE, "set swap needs 2 set numbers\n");
 		rt.set = atoi(av[0]);
 		rt.new_set = atoi(av[1]);
 		if (!isdigit(*(av[0])) || rt.set > RESVD_SET)
 			errx(EX_DATAERR, "invalid set number %s\n", av[0]);
 		if (!isdigit(*(av[1])) || rt.new_set > RESVD_SET)
 			errx(EX_DATAERR, "invalid set number %s\n", av[1]);
 		i = do_range_cmd(IP_FW_SET_SWAP, &rt);
 	} else if (_substrcmp(*av, "move") == 0) {
 		av++;
 		if (av[0] && _substrcmp(*av, "rule") == 0) {
 			rt.flags = IPFW_RCFLAG_RANGE; /* move rules to new set */
 			cmd = IP_FW_XMOVE;
 			av++;
 		} else
 			cmd = IP_FW_SET_MOVE; /* Move set to new one */
 		if (av[0] == NULL || av[1] == NULL || av[2] == NULL ||
 				av[3] != NULL ||  _substrcmp(av[1], "to") != 0)
 			errx(EX_USAGE, "syntax: set move [rule] X to Y\n");
 		rulenum = atoi(av[0]);
 		rt.new_set = atoi(av[2]);
 		if (cmd == IP_FW_XMOVE) {
 			rt.start_rule = rulenum;
 			rt.end_rule = rulenum;
 		} else
 			rt.set = rulenum;
 		rt.new_set = atoi(av[2]);
 		if (!isdigit(*(av[0])) || (cmd == 3 && rt.set > RESVD_SET) ||
 			(cmd == 2 && rt.start_rule == IPFW_DEFAULT_RULE) )
 			errx(EX_DATAERR, "invalid source number %s\n", av[0]);
 		if (!isdigit(*(av[2])) || rt.new_set > RESVD_SET)
 			errx(EX_DATAERR, "invalid dest. set %s\n", av[1]);
 		i = do_range_cmd(cmd, &rt);
 		if (i < 0)
 			err(EX_OSERR, "failed to move %s",
 			    cmd == IP_FW_SET_MOVE ? "set": "rule");
 	} else if (_substrcmp(*av, "disable") == 0 ||
 		   _substrcmp(*av, "enable") == 0 ) {
 		int which = _substrcmp(*av, "enable") == 0 ? 1 : 0;
 
 		av++;
 		masks[0] = masks[1] = 0;
 
 		while (av[0]) {
 			if (isdigit(**av)) {
 				i = atoi(*av);
 				if (i < 0 || i > RESVD_SET)
 					errx(EX_DATAERR,
 					    "invalid set number %d\n", i);
 				masks[which] |= (1<<i);
 			} else if (_substrcmp(*av, "disable") == 0)
 				which = 0;
 			else if (_substrcmp(*av, "enable") == 0)
 				which = 1;
 			else
 				errx(EX_DATAERR,
 					"invalid set command %s\n", *av);
 			av++;
 		}
 		if ( (masks[0] & masks[1]) != 0 )
 			errx(EX_DATAERR,
 			    "cannot enable and disable the same set\n");
 
 		rt.set = masks[0];
 		rt.new_set = masks[1];
 		i = do_range_cmd(IP_FW_SET_ENABLE, &rt);
 		if (i)
 			warn("set enable/disable: setsockopt(IP_FW_SET_ENABLE)");
 	} else
 		errx(EX_USAGE, "invalid set command %s\n", *av);
 }
 
 void
 ipfw_sysctl_handler(char *av[], int which)
 {
 	av++;
 
 	if (av[0] == NULL) {
 		warnx("missing keyword to enable/disable\n");
 	} else if (_substrcmp(*av, "firewall") == 0) {
 		sysctlbyname("net.inet.ip.fw.enable", NULL, 0,
 		    &which, sizeof(which));
 		sysctlbyname("net.inet6.ip6.fw.enable", NULL, 0,
 		    &which, sizeof(which));
 	} else if (_substrcmp(*av, "one_pass") == 0) {
 		sysctlbyname("net.inet.ip.fw.one_pass", NULL, 0,
 		    &which, sizeof(which));
 	} else if (_substrcmp(*av, "debug") == 0) {
 		sysctlbyname("net.inet.ip.fw.debug", NULL, 0,
 		    &which, sizeof(which));
 	} else if (_substrcmp(*av, "verbose") == 0) {
 		sysctlbyname("net.inet.ip.fw.verbose", NULL, 0,
 		    &which, sizeof(which));
 	} else if (_substrcmp(*av, "dyn_keepalive") == 0) {
 		sysctlbyname("net.inet.ip.fw.dyn_keepalive", NULL, 0,
 		    &which, sizeof(which));
 #ifndef NO_ALTQ
 	} else if (_substrcmp(*av, "altq") == 0) {
 		altq_set_enabled(which);
 #endif
 	} else {
 		warnx("unrecognize enable/disable keyword: %s\n", *av);
 	}
 }
 
 typedef void state_cb(struct cmdline_opts *co, struct format_opts *fo,
     void *arg, void *state);
 
 static void
 prepare_format_dyn(struct cmdline_opts *co, struct format_opts *fo,
     void *arg, void *_state)
 {
 	ipfw_dyn_rule *d;
 	int width;
 	uint8_t set;
 
 	d = (ipfw_dyn_rule *)_state;
 	/* Count _ALL_ states */
 	fo->dcnt++;
 
 	if (fo->show_counters == 0)
 		return;
 
 	if (co->use_set) {
 		/* skip states from another set */
 		bcopy((char *)&d->rule + sizeof(uint16_t), &set,
 		    sizeof(uint8_t));
 		if (set != co->use_set - 1)
 			return;
 	}
 
 	width = pr_u64(NULL, &d->pcnt, 0);
 	if (width > fo->pcwidth)
 		fo->pcwidth = width;
 
 	width = pr_u64(NULL, &d->bcnt, 0);
 	if (width > fo->bcwidth)
 		fo->bcwidth = width;
 }
 
 static int
 foreach_state(struct cmdline_opts *co, struct format_opts *fo,
     caddr_t base, size_t sz, state_cb dyn_bc, void *dyn_arg)
 {
 	int ttype;
 	state_cb *fptr;
 	void *farg;
 	ipfw_obj_tlv *tlv;
 	ipfw_obj_ctlv *ctlv;
 
 	fptr = NULL;
 	ttype = 0;
 
 	while (sz > 0) {
 		ctlv = (ipfw_obj_ctlv *)base;
 		switch (ctlv->head.type) {
 		case IPFW_TLV_DYNSTATE_LIST:
 			base += sizeof(*ctlv);
 			sz -= sizeof(*ctlv);
 			ttype = IPFW_TLV_DYN_ENT;
 			fptr = dyn_bc;
 			farg = dyn_arg;
 			break;
 		default:
 			return (sz);
 		}
 
 		while (sz > 0) {
 			tlv = (ipfw_obj_tlv *)base;
 			if (tlv->type != ttype)
 				break;
 
 			fptr(co, fo, farg, tlv + 1);
 			sz -= tlv->length;
 			base += tlv->length;
 		}
 	}
 
 	return (sz);
 }
 
 static void
 prepare_format_opts(struct cmdline_opts *co, struct format_opts *fo,
     ipfw_obj_tlv *rtlv, int rcnt, caddr_t dynbase, size_t dynsz)
 {
 	int bcwidth, pcwidth, width;
 	int n;
 	struct ip_fw_bcounter *cntr;
 	struct ip_fw_rule *r;
 
 	bcwidth = 0;
 	pcwidth = 0;
 	if (fo->show_counters != 0) {
 		for (n = 0; n < rcnt; n++,
 		    rtlv = (ipfw_obj_tlv *)((caddr_t)rtlv + rtlv->length)) {
 			cntr = (struct ip_fw_bcounter *)(rtlv + 1);
 			r = (struct ip_fw_rule *)((caddr_t)cntr + cntr->size);
 			/* skip rules from another set */
 			if (co->use_set && r->set != co->use_set - 1)
 				continue;
 
 			/* packet counter */
 			width = pr_u64(NULL, &cntr->pcnt, 0);
 			if (width > pcwidth)
 				pcwidth = width;
 
 			/* byte counter */
 			width = pr_u64(NULL, &cntr->bcnt, 0);
 			if (width > bcwidth)
 				bcwidth = width;
 		}
 	}
 	fo->bcwidth = bcwidth;
 	fo->pcwidth = pcwidth;
 
 	fo->dcnt = 0;
 	if (co->do_dynamic && dynsz > 0)
 		foreach_state(co, fo, dynbase, dynsz, prepare_format_dyn, NULL);
 }
 
 static int
 list_static_range(struct cmdline_opts *co, struct format_opts *fo,
     struct buf_pr *bp, ipfw_obj_tlv *rtlv, int rcnt)
 {
 	int n, seen;
 	struct ip_fw_rule *r;
 	struct ip_fw_bcounter *cntr;
 	int c = 0;
 
 	for (n = seen = 0; n < rcnt; n++,
 	    rtlv = (ipfw_obj_tlv *)((caddr_t)rtlv + rtlv->length)) {
 
 		if ((fo->show_counters | fo->show_time) != 0) {
 			cntr = (struct ip_fw_bcounter *)(rtlv + 1);
 			r = (struct ip_fw_rule *)((caddr_t)cntr + cntr->size);
 		} else {
 			cntr = NULL;
 			r = (struct ip_fw_rule *)(rtlv + 1);
 		}
 		if (r->rulenum > fo->last)
 			break;
 		if (co->use_set && r->set != co->use_set - 1)
 			continue;
 		if (r->rulenum >= fo->first && r->rulenum <= fo->last) {
 			show_static_rule(co, fo, bp, r, cntr);
 			printf("%s", bp->buf);
 			c += rtlv->length;
 			bp_flush(bp);
 			seen++;
 		}
 	}
 
 	return (seen);
 }
 
 static void
 list_dyn_state(struct cmdline_opts *co, struct format_opts *fo,
     void *_arg, void *_state)
 {
 	uint16_t rulenum;
 	uint8_t set;
 	ipfw_dyn_rule *d;
 	struct buf_pr *bp;
 
 	d = (ipfw_dyn_rule *)_state;
 	bp = (struct buf_pr *)_arg;
 
 	bcopy(&d->rule, &rulenum, sizeof(rulenum));
 	if (rulenum > fo->last)
 		return;
 	if (co->use_set) {
 		bcopy((char *)&d->rule + sizeof(uint16_t),
 		      &set, sizeof(uint8_t));
 		if (set != co->use_set - 1)
 			return;
 	}
 	if (rulenum >= fo->first) {
 		show_dyn_state(co, fo, bp, d);
 		printf("%s\n", bp->buf);
 		bp_flush(bp);
 	}
 }
 
 static int
 list_dyn_range(struct cmdline_opts *co, struct format_opts *fo,
     struct buf_pr *bp, caddr_t base, size_t sz)
 {
 
 	sz = foreach_state(co, fo, base, sz, list_dyn_state, bp);
 	return (sz);
 }
 
 void
 ipfw_list(int ac, char *av[], int show_counters)
 {
 	ipfw_cfg_lheader *cfg;
 	struct format_opts sfo;
 	size_t sz;
 	int error;
 	int lac;
 	char **lav;
 	uint32_t rnum;
 	char *endptr;
 
 	if (co.test_only) {
 		fprintf(stderr, "Testing only, list disabled\n");
 		return;
 	}
 	if (co.do_pipe) {
 		dummynet_list(ac, av, show_counters);
 		return;
 	}
 
 	ac--;
 	av++;
 	memset(&sfo, 0, sizeof(sfo));
 
 	/* Determine rule range to request */
 	if (ac > 0) {
 		for (lac = ac, lav = av; lac != 0; lac--) {
 			rnum = strtoul(*lav++, &endptr, 10);
 			if (sfo.first == 0 || rnum < sfo.first)
 				sfo.first = rnum;
 
 			if (*endptr == '-')
 				rnum = strtoul(endptr + 1, &endptr, 10);
 			if (sfo.last == 0 || rnum > sfo.last)
 				sfo.last = rnum;
 		}
 	}
 
 	/* get configuraion from kernel */
 	cfg = NULL;
 	sfo.show_counters = show_counters;
 	sfo.show_time = co.do_time;
 	if (co.do_dynamic != 2)
 		sfo.flags |= IPFW_CFG_GET_STATIC;
 	if (co.do_dynamic != 0)
 		sfo.flags |= IPFW_CFG_GET_STATES;
 	if ((sfo.show_counters | sfo.show_time) != 0)
 		sfo.flags |= IPFW_CFG_GET_COUNTERS;
 	if (ipfw_get_config(&co, &sfo, &cfg, &sz) != 0)
 		err(EX_OSERR, "retrieving config failed");
 
 	error = ipfw_show_config(&co, &sfo, cfg, sz, ac, av);
 
 	free(cfg);
 
 	if (error != EX_OK)
 		exit(error);
 }
 
 static int
 ipfw_show_config(struct cmdline_opts *co, struct format_opts *fo,
     ipfw_cfg_lheader *cfg, size_t sz, int ac, char *av[])
 {
 	caddr_t dynbase;
 	size_t dynsz;
 	int rcnt;
 	int exitval = EX_OK;
 	int lac;
 	char **lav;
 	char *endptr;
 	size_t readsz;
 	struct buf_pr bp;
 	ipfw_obj_ctlv *ctlv, *tstate;
 	ipfw_obj_tlv *rbase;
 
 	/*
 	 * Handle tablenames TLV first, if any
 	 */
 	tstate = NULL;
 	rbase = NULL;
 	dynbase = NULL;
 	dynsz = 0;
 	readsz = sizeof(*cfg);
 	rcnt = 0;
 
 	fo->set_mask = cfg->set_mask;
 
 	ctlv = (ipfw_obj_ctlv *)(cfg + 1);
 	if (ctlv->head.type == IPFW_TLV_TBLNAME_LIST) {
 		object_sort_ctlv(ctlv);
 		fo->tstate = ctlv;
 		readsz += ctlv->head.length;
 		ctlv = (ipfw_obj_ctlv *)((caddr_t)ctlv + ctlv->head.length);
 	}
 
 	if (cfg->flags & IPFW_CFG_GET_STATIC) {
 		/* We've requested static rules */
 		if (ctlv->head.type == IPFW_TLV_RULE_LIST) {
 			rbase = (ipfw_obj_tlv *)(ctlv + 1);
 			rcnt = ctlv->count;
 			readsz += ctlv->head.length;
 			ctlv = (ipfw_obj_ctlv *)((caddr_t)ctlv +
 			    ctlv->head.length);
 		}
 	}
 
 	if ((cfg->flags & IPFW_CFG_GET_STATES) && (readsz != sz))  {
 		/* We may have some dynamic states */
 		dynsz = sz - readsz;
 		/* Skip empty header */
 		if (dynsz != sizeof(ipfw_obj_ctlv))
 			dynbase = (caddr_t)ctlv;
 		else
 			dynsz = 0;
 	}
 
 	prepare_format_opts(co, fo, rbase, rcnt, dynbase, dynsz);
 	bp_alloc(&bp, 4096);
 
 	/* if no rule numbers were specified, list all rules */
 	if (ac == 0) {
 		fo->first = 0;
 		fo->last = IPFW_DEFAULT_RULE;
 		if (cfg->flags & IPFW_CFG_GET_STATIC)
 			list_static_range(co, fo, &bp, rbase, rcnt);
 
 		if (co->do_dynamic && dynsz > 0) {
 			printf("## Dynamic rules (%d %zu):\n", fo->dcnt,
 			    dynsz);
 			list_dyn_range(co, fo, &bp, dynbase, dynsz);
 		}
 
 		bp_free(&bp);
 		return (EX_OK);
 	}
 
 	/* display specific rules requested on command line */
 	for (lac = ac, lav = av; lac != 0; lac--) {
 		/* convert command line rule # */
 		fo->last = fo->first = strtoul(*lav++, &endptr, 10);
 		if (*endptr == '-')
 			fo->last = strtoul(endptr + 1, &endptr, 10);
 		if (*endptr) {
 			exitval = EX_USAGE;
 			warnx("invalid rule number: %s", *(lav - 1));
 			continue;
 		}
 
 		if ((cfg->flags & IPFW_CFG_GET_STATIC) == 0)
 			continue;
 
 		if (list_static_range(co, fo, &bp, rbase, rcnt) == 0) {
 			/* give precedence to other error(s) */
 			if (exitval == EX_OK)
 				exitval = EX_UNAVAILABLE;
 			if (fo->first == fo->last)
 				warnx("rule %u does not exist", fo->first);
 			else
 				warnx("no rules in range %u-%u",
 				    fo->first, fo->last);
 		}
 	}
 
 	if (co->do_dynamic && dynsz > 0) {
 		printf("## Dynamic rules:\n");
 		for (lac = ac, lav = av; lac != 0; lac--) {
 			fo->last = fo->first = strtoul(*lav++, &endptr, 10);
 			if (*endptr == '-')
 				fo->last = strtoul(endptr+1, &endptr, 10);
 			if (*endptr)
 				/* already warned */
 				continue;
 			list_dyn_range(co, fo, &bp, dynbase, dynsz);
 		}
 	}
 
 	bp_free(&bp);
 	return (exitval);
 }
 
 
 /*
  * Retrieves current ipfw configuration of given type
  * and stores its pointer to @pcfg.
  *
  * Caller is responsible for freeing @pcfg.
  *
  * Returns 0 on success.
  */
 
 static int
 ipfw_get_config(struct cmdline_opts *co, struct format_opts *fo,
     ipfw_cfg_lheader **pcfg, size_t *psize)
 {
 	ipfw_cfg_lheader *cfg;
 	size_t sz;
 	int i;
 
 
 	if (co->test_only != 0) {
 		fprintf(stderr, "Testing only, list disabled\n");
 		return (0);
 	}
 
 	/* Start with some data size */
 	sz = 4096;
 	cfg = NULL;
 
 	for (i = 0; i < 16; i++) {
 		if (cfg != NULL)
 			free(cfg);
 		if ((cfg = calloc(1, sz)) == NULL)
 			return (ENOMEM);
 
 		cfg->flags = fo->flags;
 		cfg->start_rule = fo->first;
 		cfg->end_rule = fo->last;
 
 		if (do_get3(IP_FW_XGET, &cfg->opheader, &sz) != 0) {
 			if (errno != ENOMEM) {
 				free(cfg);
 				return (errno);
 			}
 
 			/* Buffer size is not enough. Try to increase */
 			sz = sz * 2;
 			if (sz < cfg->size)
 				sz = cfg->size;
 			continue;
 		}
 
 		*pcfg = cfg;
 		*psize = sz;
 		return (0);
 	}
 
 	free(cfg);
 	return (ENOMEM);
 }
 
 static int
 lookup_host (char *host, struct in_addr *ipaddr)
 {
 	struct hostent *he;
 
 	if (!inet_aton(host, ipaddr)) {
 		if ((he = gethostbyname(host)) == NULL)
 			return(-1);
 		*ipaddr = *(struct in_addr *)he->h_addr_list[0];
 	}
 	return(0);
 }
 
 struct tidx {
 	ipfw_obj_ntlv *idx;
 	uint32_t count;
 	uint32_t size;
 	uint16_t counter;
 	uint8_t set;
 };
 
 int
 ipfw_check_object_name(const char *name)
 {
 	int c, i, l;
 
 	/*
 	 * Check that name is null-terminated and contains
 	 * valid symbols only. Valid mask is:
 	 * [a-zA-Z0-9\-_\.]{1,63}
 	 */
 	l = strlen(name);
 	if (l == 0 || l >= 64)
 		return (EINVAL);
 	for (i = 0; i < l; i++) {
 		c = name[i];
 		if (isalpha(c) || isdigit(c) || c == '_' ||
 		    c == '-' || c == '.')
 			continue;
 		return (EINVAL);
 	}
 	return (0);
 }
 
 static char *default_state_name = "default";
 static int
 state_check_name(const char *name)
 {
 
 	if (ipfw_check_object_name(name) != 0)
 		return (EINVAL);
 	if (strcmp(name, "any") == 0)
 		return (EINVAL);
 	return (0);
 }
 
 static int
 eaction_check_name(const char *name)
 {
 
 	if (ipfw_check_object_name(name) != 0)
 		return (EINVAL);
 	/* Restrict some 'special' names */
 	if (match_token(rule_actions, name) != -1 &&
 	    match_token(rule_action_params, name) != -1)
 		return (EINVAL);
 	return (0);
 }
 
 static uint16_t
 pack_object(struct tidx *tstate, char *name, int otype)
 {
 	int i;
 	ipfw_obj_ntlv *ntlv;
 
 	for (i = 0; i < tstate->count; i++) {
 		if (strcmp(tstate->idx[i].name, name) != 0)
 			continue;
 		if (tstate->idx[i].set != tstate->set)
 			continue;
 		if (tstate->idx[i].head.type != otype)
 			continue;
 
 		return (tstate->idx[i].idx);
 	}
 
 	if (tstate->count + 1 > tstate->size) {
 		tstate->size += 4;
 		tstate->idx = realloc(tstate->idx, tstate->size *
 		    sizeof(ipfw_obj_ntlv));
 		if (tstate->idx == NULL)
 			return (0);
 	}
 
 	ntlv = &tstate->idx[i];
 	memset(ntlv, 0, sizeof(ipfw_obj_ntlv));
 	strlcpy(ntlv->name, name, sizeof(ntlv->name));
 	ntlv->head.type = otype;
 	ntlv->head.length = sizeof(ipfw_obj_ntlv);
 	ntlv->set = tstate->set;
 	ntlv->idx = ++tstate->counter;
 	tstate->count++;
 
 	return (ntlv->idx);
 }
 
 static uint16_t
 pack_table(struct tidx *tstate, char *name)
 {
 
 	if (table_check_name(name) != 0)
 		return (0);
 
 	return (pack_object(tstate, name, IPFW_TLV_TBL_NAME));
 }
 
 void
 fill_table(struct _ipfw_insn *cmd, char *av, uint8_t opcode,
     struct tidx *tstate)
 {
 	uint32_t *d = ((ipfw_insn_u32 *)cmd)->d;
 	uint16_t uidx;
 	char *p;
 
 	if ((p = strchr(av + 6, ')')) == NULL)
 		errx(EX_DATAERR, "forgotten parenthesis: '%s'", av);
 	*p = '\0';
 	p = strchr(av + 6, ',');
 	if (p)
 		*p++ = '\0';
 
 	if ((uidx = pack_table(tstate, av + 6)) == 0)
 		errx(EX_DATAERR, "Invalid table name: %s", av + 6);
 
 	cmd->opcode = opcode;
 	cmd->arg1 = uidx;
 	if (p) {
 		cmd->len |= F_INSN_SIZE(ipfw_insn_u32);
 		d[0] = strtoul(p, NULL, 0);
 	} else
 		cmd->len |= F_INSN_SIZE(ipfw_insn);
 }
 
 
 /*
  * fills the addr and mask fields in the instruction as appropriate from av.
  * Update length as appropriate.
  * The following formats are allowed:
  *	me	returns O_IP_*_ME
  *	1.2.3.4		single IP address
  *	1.2.3.4:5.6.7.8	address:mask
  *	1.2.3.4/24	address/mask
  *	1.2.3.4/26{1,6,5,4,23}	set of addresses in a subnet
  * We can have multiple comma-separated address/mask entries.
  */
 static void
 fill_ip(ipfw_insn_ip *cmd, char *av, int cblen, struct tidx *tstate)
 {
 	int len = 0;
 	uint32_t *d = ((ipfw_insn_u32 *)cmd)->d;
 
 	cmd->o.len &= ~F_LEN_MASK;	/* zero len */
 
 	if (_substrcmp(av, "any") == 0)
 		return;
 
 	if (_substrcmp(av, "me") == 0) {
 		cmd->o.len |= F_INSN_SIZE(ipfw_insn);
 		return;
 	}
 
 	if (strncmp(av, "table(", 6) == 0) {
 		fill_table(&cmd->o, av, O_IP_DST_LOOKUP, tstate);
 		return;
 	}
 
     while (av) {
 	/*
 	 * After the address we can have '/' or ':' indicating a mask,
 	 * ',' indicating another address follows, '{' indicating a
 	 * set of addresses of unspecified size.
 	 */
 	char *t = NULL, *p = strpbrk(av, "/:,{");
 	int masklen;
 	char md, nd = '\0';
 
 	CHECK_LENGTH(cblen, F_INSN_SIZE(ipfw_insn) + 2 + len);
 
 	if (p) {
 		md = *p;
 		*p++ = '\0';
 		if ((t = strpbrk(p, ",{")) != NULL) {
 			nd = *t;
 			*t = '\0';
 		}
 	} else
 		md = '\0';
 
 	if (lookup_host(av, (struct in_addr *)&d[0]) != 0)
 		errx(EX_NOHOST, "hostname ``%s'' unknown", av);
 	switch (md) {
 	case ':':
 		if (!inet_aton(p, (struct in_addr *)&d[1]))
 			errx(EX_DATAERR, "bad netmask ``%s''", p);
 		break;
 	case '/':
 		masklen = atoi(p);
 		if (masklen == 0)
 			d[1] = htonl(0U);	/* mask */
 		else if (masklen > 32)
 			errx(EX_DATAERR, "bad width ``%s''", p);
 		else
 			d[1] = htonl(~0U << (32 - masklen));
 		break;
 	case '{':	/* no mask, assume /24 and put back the '{' */
 		d[1] = htonl(~0U << (32 - 24));
 		*(--p) = md;
 		break;
 
 	case ',':	/* single address plus continuation */
 		*(--p) = md;
 		/* FALLTHROUGH */
 	case 0:		/* initialization value */
 	default:
 		d[1] = htonl(~0U);	/* force /32 */
 		break;
 	}
 	d[0] &= d[1];		/* mask base address with mask */
 	if (t)
 		*t = nd;
 	/* find next separator */
 	if (p)
 		p = strpbrk(p, ",{");
 	if (p && *p == '{') {
 		/*
 		 * We have a set of addresses. They are stored as follows:
 		 *   arg1	is the set size (powers of 2, 2..256)
 		 *   addr	is the base address IN HOST FORMAT
 		 *   mask..	is an array of arg1 bits (rounded up to
 		 *		the next multiple of 32) with bits set
 		 *		for each host in the map.
 		 */
 		uint32_t *map = (uint32_t *)&cmd->mask;
 		int low, high;
 		int i = contigmask((uint8_t *)&(d[1]), 32);
 
 		if (len > 0)
 			errx(EX_DATAERR, "address set cannot be in a list");
 		if (i < 24 || i > 31)
 			errx(EX_DATAERR, "invalid set with mask %d\n", i);
 		cmd->o.arg1 = 1<<(32-i);	/* map length		*/
 		d[0] = ntohl(d[0]);		/* base addr in host format */
 		cmd->o.opcode = O_IP_DST_SET;	/* default */
 		cmd->o.len |= F_INSN_SIZE(ipfw_insn_u32) + (cmd->o.arg1+31)/32;
 		for (i = 0; i < (cmd->o.arg1+31)/32 ; i++)
 			map[i] = 0;	/* clear map */
 
 		av = p + 1;
 		low = d[0] & 0xff;
 		high = low + cmd->o.arg1 - 1;
 		/*
 		 * Here, i stores the previous value when we specify a range
 		 * of addresses within a mask, e.g. 45-63. i = -1 means we
 		 * have no previous value.
 		 */
 		i = -1;	/* previous value in a range */
 		while (isdigit(*av)) {
 			char *s;
 			int a = strtol(av, &s, 0);
 
 			if (s == av) { /* no parameter */
 			    if (*av != '}')
 				errx(EX_DATAERR, "set not closed\n");
 			    if (i != -1)
 				errx(EX_DATAERR, "incomplete range %d-", i);
 			    break;
 			}
 			if (a < low || a > high)
 			    errx(EX_DATAERR, "addr %d out of range [%d-%d]\n",
 				a, low, high);
 			a -= low;
 			if (i == -1)	/* no previous in range */
 			    i = a;
 			else {		/* check that range is valid */
 			    if (i > a)
 				errx(EX_DATAERR, "invalid range %d-%d",
 					i+low, a+low);
 			    if (*s == '-')
 				errx(EX_DATAERR, "double '-' in range");
 			}
 			for (; i <= a; i++)
 			    map[i/32] |= 1<<(i & 31);
 			i = -1;
 			if (*s == '-')
 			    i = a;
 			else if (*s == '}')
 			    break;
 			av = s+1;
 		}
 		return;
 	}
 	av = p;
 	if (av)			/* then *av must be a ',' */
 		av++;
 
 	/* Check this entry */
 	if (d[1] == 0) { /* "any", specified as x.x.x.x/0 */
 		/*
 		 * 'any' turns the entire list into a NOP.
 		 * 'not any' never matches, so it is removed from the
 		 * list unless it is the only item, in which case we
 		 * report an error.
 		 */
 		if (cmd->o.len & F_NOT) {	/* "not any" never matches */
 			if (av == NULL && len == 0) /* only this entry */
 				errx(EX_DATAERR, "not any never matches");
 		}
 		/* else do nothing and skip this entry */
 		return;
 	}
 	/* A single IP can be stored in an optimized format */
 	if (d[1] == (uint32_t)~0 && av == NULL && len == 0) {
 		cmd->o.len |= F_INSN_SIZE(ipfw_insn_u32);
 		return;
 	}
 	len += 2;	/* two words... */
 	d += 2;
     } /* end while */
     if (len + 1 > F_LEN_MASK)
 	errx(EX_DATAERR, "address list too long");
     cmd->o.len |= len+1;
 }
 
 
 /* n2mask sets n bits of the mask */
 void
 n2mask(struct in6_addr *mask, int n)
 {
 	static int	minimask[9] =
 	    { 0x00, 0x80, 0xc0, 0xe0, 0xf0, 0xf8, 0xfc, 0xfe, 0xff };
 	u_char		*p;
 
 	memset(mask, 0, sizeof(struct in6_addr));
 	p = (u_char *) mask;
 	for (; n > 0; p++, n -= 8) {
 		if (n >= 8)
 			*p = 0xff;
 		else
 			*p = minimask[n];
 	}
 	return;
 }
 
 static void
 fill_flags_cmd(ipfw_insn *cmd, enum ipfw_opcodes opcode,
 	struct _s_x *flags, char *p)
 {
 	char *e;
 	uint32_t set = 0, clear = 0;
 
 	if (fill_flags(flags, p, &e, &set, &clear) != 0)
 		errx(EX_DATAERR, "invalid flag %s", e);
 
 	cmd->opcode = opcode;
 	cmd->len =  (cmd->len & (F_NOT | F_OR)) | 1;
 	cmd->arg1 = (set & 0xff) | ( (clear & 0xff) << 8);
 }
 
 
 void
 ipfw_delete(char *av[])
 {
 	ipfw_range_tlv rt;
 	char *sep;
 	int i, j;
 	int exitval = EX_OK;
 	int do_set = 0;
 
 	av++;
 	NEED1("missing rule specification");
 	if ( *av && _substrcmp(*av, "set") == 0) {
 		/* Do not allow using the following syntax:
 		 *	ipfw set N delete set M
 		 */
 		if (co.use_set)
 			errx(EX_DATAERR, "invalid syntax");
 		do_set = 1;	/* delete set */
 		av++;
 	}
 
 	/* Rule number */
 	while (*av && isdigit(**av)) {
 		i = strtol(*av, &sep, 10);
 		j = i;
 		if (*sep== '-')
 			j = strtol(sep + 1, NULL, 10);
 		av++;
 		if (co.do_nat) {
 			exitval = do_cmd(IP_FW_NAT_DEL, &i, sizeof i);
 			if (exitval) {
 				exitval = EX_UNAVAILABLE;
 				if (co.do_quiet)
 					continue;
 				warn("nat %u not available", i);
 			}
 		} else if (co.do_pipe) {
 			exitval = ipfw_delete_pipe(co.do_pipe, i);
 		} else {
 			memset(&rt, 0, sizeof(rt));
 			if (do_set != 0) {
 				rt.set = i & 31;
 				rt.flags = IPFW_RCFLAG_SET;
 			} else {
 				rt.start_rule = i & 0xffff;
 				rt.end_rule = j & 0xffff;
 				if (rt.start_rule == 0 && rt.end_rule == 0)
 					rt.flags |= IPFW_RCFLAG_ALL;
 				else
 					rt.flags |= IPFW_RCFLAG_RANGE;
 				if (co.use_set != 0) {
 					rt.set = co.use_set - 1;
 					rt.flags |= IPFW_RCFLAG_SET;
 				}
 			}
 			if (co.do_dynamic == 2)
 				rt.flags |= IPFW_RCFLAG_DYNAMIC;
 			i = do_range_cmd(IP_FW_XDEL, &rt);
 			if (i != 0) {
 				exitval = EX_UNAVAILABLE;
 				if (co.do_quiet)
 					continue;
 				warn("rule %u: setsockopt(IP_FW_XDEL)",
 				    rt.start_rule);
 			} else if (rt.new_set == 0 && do_set == 0 &&
 			    co.do_dynamic != 2) {
 				exitval = EX_UNAVAILABLE;
 				if (co.do_quiet)
 					continue;
 				if (rt.start_rule != rt.end_rule)
 					warnx("no rules rules in %u-%u range",
 					    rt.start_rule, rt.end_rule);
 				else
 					warnx("rule %u not found",
 					    rt.start_rule);
 			}
 		}
 	}
 	if (exitval != EX_OK && co.do_force == 0)
 		exit(exitval);
 }
 
 
 /*
  * fill the interface structure. We do not check the name as we can
  * create interfaces dynamically, so checking them at insert time
  * makes relatively little sense.
  * Interface names containing '*', '?', or '[' are assumed to be shell
  * patterns which match interfaces.
  */
 static void
 fill_iface(ipfw_insn_if *cmd, char *arg, int cblen, struct tidx *tstate)
 {
 	char *p;
 	uint16_t uidx;
 
 	cmd->name[0] = '\0';
 	cmd->o.len |= F_INSN_SIZE(ipfw_insn_if);
 
 	CHECK_CMDLEN;
 
 	/* Parse the interface or address */
 	if (strcmp(arg, "any") == 0)
 		cmd->o.len = 0;		/* effectively ignore this command */
 	else if (strncmp(arg, "table(", 6) == 0) {
 		if ((p = strchr(arg + 6, ')')) == NULL)
 			errx(EX_DATAERR, "forgotten parenthesis: '%s'", arg);
 		*p = '\0';
 		p = strchr(arg + 6, ',');
 		if (p)
 			*p++ = '\0';
 		if ((uidx = pack_table(tstate, arg + 6)) == 0)
 			errx(EX_DATAERR, "Invalid table name: %s", arg + 6);
 
 		cmd->name[0] = '\1'; /* Special value indicating table */
 		cmd->p.kidx = uidx;
 	} else if (!isdigit(*arg)) {
 		strlcpy(cmd->name, arg, sizeof(cmd->name));
 		cmd->p.glob = strpbrk(arg, "*?[") != NULL ? 1 : 0;
 	} else if (!inet_aton(arg, &cmd->p.ip))
 		errx(EX_DATAERR, "bad ip address ``%s''", arg);
 }
 
 static void
 get_mac_addr_mask(const char *p, uint8_t *addr, uint8_t *mask)
 {
 	int i;
 	size_t l;
 	char *ap, *ptr, *optr;
 	struct ether_addr *mac;
 	const char *macset = "0123456789abcdefABCDEF:";
 
 	if (strcmp(p, "any") == 0) {
 		for (i = 0; i < ETHER_ADDR_LEN; i++)
 			addr[i] = mask[i] = 0;
 		return;
 	}
 
 	optr = ptr = strdup(p);
 	if ((ap = strsep(&ptr, "&/")) != NULL && *ap != 0) {
 		l = strlen(ap);
 		if (strspn(ap, macset) != l || (mac = ether_aton(ap)) == NULL)
 			errx(EX_DATAERR, "Incorrect MAC address");
 		bcopy(mac, addr, ETHER_ADDR_LEN);
 	} else
 		errx(EX_DATAERR, "Incorrect MAC address");
 
 	if (ptr != NULL) { /* we have mask? */
 		if (p[ptr - optr - 1] == '/') { /* mask len */
 			long ml = strtol(ptr, &ap, 10);
 			if (*ap != 0 || ml > ETHER_ADDR_LEN * 8 || ml < 0)
 				errx(EX_DATAERR, "Incorrect mask length");
 			for (i = 0; ml > 0 && i < ETHER_ADDR_LEN; ml -= 8, i++)
 				mask[i] = (ml >= 8) ? 0xff: (~0) << (8 - ml);
 		} else { /* mask */
 			l = strlen(ptr);
 			if (strspn(ptr, macset) != l ||
 			    (mac = ether_aton(ptr)) == NULL)
 				errx(EX_DATAERR, "Incorrect mask");
 			bcopy(mac, mask, ETHER_ADDR_LEN);
 		}
 	} else { /* default mask: ff:ff:ff:ff:ff:ff */
 		for (i = 0; i < ETHER_ADDR_LEN; i++)
 			mask[i] = 0xff;
 	}
 	for (i = 0; i < ETHER_ADDR_LEN; i++)
 		addr[i] &= mask[i];
 
 	free(optr);
 }
 
 /*
  * helper function, updates the pointer to cmd with the length
  * of the current command, and also cleans up the first word of
  * the new command in case it has been clobbered before.
  */
 static ipfw_insn *
 next_cmd(ipfw_insn *cmd, int *len)
 {
 	*len -= F_LEN(cmd);
 	CHECK_LENGTH(*len, 0);
 	cmd += F_LEN(cmd);
 	bzero(cmd, sizeof(*cmd));
 	return cmd;
 }
 
 /*
  * Takes arguments and copies them into a comment
  */
 static void
 fill_comment(ipfw_insn *cmd, char **av, int cblen)
 {
 	int i, l;
 	char *p = (char *)(cmd + 1);
 
 	cmd->opcode = O_NOP;
 	cmd->len =  (cmd->len & (F_NOT | F_OR));
 
 	/* Compute length of comment string. */
 	for (i = 0, l = 0; av[i] != NULL; i++)
 		l += strlen(av[i]) + 1;
 	if (l == 0)
 		return;
 	if (l > 84)
 		errx(EX_DATAERR,
 		    "comment too long (max 80 chars)");
 	l = 1 + (l+3)/4;
 	cmd->len =  (cmd->len & (F_NOT | F_OR)) | l;
 	CHECK_CMDLEN;
 
 	for (i = 0; av[i] != NULL; i++) {
 		strcpy(p, av[i]);
 		p += strlen(av[i]);
 		*p++ = ' ';
 	}
 	*(--p) = '\0';
 }
 
 /*
  * A function to fill simple commands of size 1.
  * Existing flags are preserved.
  */
 static void
 fill_cmd(ipfw_insn *cmd, enum ipfw_opcodes opcode, int flags, uint16_t arg)
 {
 	cmd->opcode = opcode;
 	cmd->len =  ((cmd->len | flags) & (F_NOT | F_OR)) | 1;
 	cmd->arg1 = arg;
 }
 
 /*
  * Fetch and add the MAC address and type, with masks. This generates one or
  * two microinstructions, and returns the pointer to the last one.
  */
 static ipfw_insn *
 add_mac(ipfw_insn *cmd, char *av[], int cblen)
 {
 	ipfw_insn_mac *mac;
 
 	if ( ( av[0] == NULL ) || ( av[1] == NULL ) )
 		errx(EX_DATAERR, "MAC dst src");
 
 	cmd->opcode = O_MACADDR2;
 	cmd->len = (cmd->len & (F_NOT | F_OR)) | F_INSN_SIZE(ipfw_insn_mac);
 	CHECK_CMDLEN;
 
 	mac = (ipfw_insn_mac *)cmd;
 	get_mac_addr_mask(av[0], mac->addr, mac->mask);	/* dst */
 	get_mac_addr_mask(av[1], &(mac->addr[ETHER_ADDR_LEN]),
 	    &(mac->mask[ETHER_ADDR_LEN])); /* src */
 	return cmd;
 }
 
 static ipfw_insn *
 add_mactype(ipfw_insn *cmd, char *av, int cblen)
 {
 	if (!av)
 		errx(EX_DATAERR, "missing MAC type");
 	if (strcmp(av, "any") != 0) { /* we have a non-null type */
 		fill_newports((ipfw_insn_u16 *)cmd, av, IPPROTO_ETHERTYPE,
 		    cblen);
 		cmd->opcode = O_MAC_TYPE;
 		return cmd;
 	} else
 		return NULL;
 }
 
 static ipfw_insn *
 add_proto0(ipfw_insn *cmd, char *av, u_char *protop)
 {
 	struct protoent *pe;
 	char *ep;
 	int proto;
 
 	proto = strtol(av, &ep, 10);
 	if (*ep != '\0' || proto <= 0) {
 		if ((pe = getprotobyname(av)) == NULL)
 			return NULL;
 		proto = pe->p_proto;
 	}
 
 	fill_cmd(cmd, O_PROTO, 0, proto);
 	*protop = proto;
 	return cmd;
 }
 
 static ipfw_insn *
 add_proto(ipfw_insn *cmd, char *av, u_char *protop)
 {
 	u_char proto = IPPROTO_IP;
 
 	if (_substrcmp(av, "all") == 0 || strcmp(av, "ip") == 0)
 		; /* do not set O_IP4 nor O_IP6 */
 	else if (strcmp(av, "ip4") == 0)
 		/* explicit "just IPv4" rule */
 		fill_cmd(cmd, O_IP4, 0, 0);
 	else if (strcmp(av, "ip6") == 0) {
 		/* explicit "just IPv6" rule */
 		proto = IPPROTO_IPV6;
 		fill_cmd(cmd, O_IP6, 0, 0);
 	} else
 		return add_proto0(cmd, av, protop);
 
 	*protop = proto;
 	return cmd;
 }
 
 static ipfw_insn *
 add_proto_compat(ipfw_insn *cmd, char *av, u_char *protop)
 {
 	u_char proto = IPPROTO_IP;
 
 	if (_substrcmp(av, "all") == 0 || strcmp(av, "ip") == 0)
 		; /* do not set O_IP4 nor O_IP6 */
 	else if (strcmp(av, "ipv4") == 0 || strcmp(av, "ip4") == 0)
 		/* explicit "just IPv4" rule */
 		fill_cmd(cmd, O_IP4, 0, 0);
 	else if (strcmp(av, "ipv6") == 0 || strcmp(av, "ip6") == 0) {
 		/* explicit "just IPv6" rule */
 		proto = IPPROTO_IPV6;
 		fill_cmd(cmd, O_IP6, 0, 0);
 	} else
 		return add_proto0(cmd, av, protop);
 
 	*protop = proto;
 	return cmd;
 }
 
 static ipfw_insn *
 add_srcip(ipfw_insn *cmd, char *av, int cblen, struct tidx *tstate)
 {
 	fill_ip((ipfw_insn_ip *)cmd, av, cblen, tstate);
 	if (cmd->opcode == O_IP_DST_SET)			/* set */
 		cmd->opcode = O_IP_SRC_SET;
 	else if (cmd->opcode == O_IP_DST_LOOKUP)		/* table */
 		cmd->opcode = O_IP_SRC_LOOKUP;
 	else if (F_LEN(cmd) == F_INSN_SIZE(ipfw_insn))		/* me */
 		cmd->opcode = O_IP_SRC_ME;
 	else if (F_LEN(cmd) == F_INSN_SIZE(ipfw_insn_u32))	/* one IP */
 		cmd->opcode = O_IP_SRC;
 	else							/* addr/mask */
 		cmd->opcode = O_IP_SRC_MASK;
 	return cmd;
 }
 
 static ipfw_insn *
 add_dstip(ipfw_insn *cmd, char *av, int cblen, struct tidx *tstate)
 {
 	fill_ip((ipfw_insn_ip *)cmd, av, cblen, tstate);
 	if (cmd->opcode == O_IP_DST_SET)			/* set */
 		;
 	else if (cmd->opcode == O_IP_DST_LOOKUP)		/* table */
 		;
 	else if (F_LEN(cmd) == F_INSN_SIZE(ipfw_insn))		/* me */
 		cmd->opcode = O_IP_DST_ME;
 	else if (F_LEN(cmd) == F_INSN_SIZE(ipfw_insn_u32))	/* one IP */
 		cmd->opcode = O_IP_DST;
 	else							/* addr/mask */
 		cmd->opcode = O_IP_DST_MASK;
 	return cmd;
 }
 
 static struct _s_x f_reserved_keywords[] = {
 	{ "altq",	TOK_OR },
 	{ "//",		TOK_OR },
 	{ "diverted",	TOK_OR },
 	{ "dst-port",	TOK_OR },
 	{ "src-port",	TOK_OR },
 	{ "established",	TOK_OR },
 	{ "keep-state",	TOK_OR },
 	{ "frag",	TOK_OR },
 	{ "icmptypes",	TOK_OR },
 	{ "in",		TOK_OR },
 	{ "out",	TOK_OR },
 	{ "ip6",	TOK_OR },
 	{ "any",	TOK_OR },
 	{ "to",		TOK_OR },
 	{ "via",	TOK_OR },
 	{ "{",		TOK_OR },
 	{ NULL, 0 }	/* terminator */
 };
 
 static ipfw_insn *
 add_ports(ipfw_insn *cmd, char *av, u_char proto, int opcode, int cblen)
 {
 
 	if (match_token(f_reserved_keywords, av) != -1)
 		return (NULL);
 
 	if (fill_newports((ipfw_insn_u16 *)cmd, av, proto, cblen)) {
 		/* XXX todo: check that we have a protocol with ports */
 		cmd->opcode = opcode;
 		return cmd;
 	}
 	return NULL;
 }
 
 static ipfw_insn *
 add_src(ipfw_insn *cmd, char *av, u_char proto, int cblen, struct tidx *tstate)
 {
 	struct in6_addr a;
 	char *host, *ch, buf[INET6_ADDRSTRLEN];
 	ipfw_insn *ret = NULL;
 	int len;
 
 	/* Copy first address in set if needed */
 	if ((ch = strpbrk(av, "/,")) != NULL) {
 		len = ch - av;
 		strlcpy(buf, av, sizeof(buf));
 		if (len < sizeof(buf))
 			buf[len] = '\0';
 		host = buf;
 	} else
 		host = av;
 
 	if (proto == IPPROTO_IPV6  || strcmp(av, "me6") == 0 ||
 	    inet_pton(AF_INET6, host, &a) == 1)
 		ret = add_srcip6(cmd, av, cblen, tstate);
 	/* XXX: should check for IPv4, not !IPv6 */
 	if (ret == NULL && (proto == IPPROTO_IP || strcmp(av, "me") == 0 ||
 	    inet_pton(AF_INET6, host, &a) != 1))
 		ret = add_srcip(cmd, av, cblen, tstate);
 	if (ret == NULL && strcmp(av, "any") != 0)
 		ret = cmd;
 
 	return ret;
 }
 
 static ipfw_insn *
 add_dst(ipfw_insn *cmd, char *av, u_char proto, int cblen, struct tidx *tstate)
 {
 	struct in6_addr a;
 	char *host, *ch, buf[INET6_ADDRSTRLEN];
 	ipfw_insn *ret = NULL;
 	int len;
 
 	/* Copy first address in set if needed */
 	if ((ch = strpbrk(av, "/,")) != NULL) {
 		len = ch - av;
 		strlcpy(buf, av, sizeof(buf));
 		if (len < sizeof(buf))
 			buf[len] = '\0';
 		host = buf;
 	} else
 		host = av;
 
 	if (proto == IPPROTO_IPV6  || strcmp(av, "me6") == 0 ||
 	    inet_pton(AF_INET6, host, &a) == 1)
 		ret = add_dstip6(cmd, av, cblen, tstate);
 	/* XXX: should check for IPv4, not !IPv6 */
 	if (ret == NULL && (proto == IPPROTO_IP || strcmp(av, "me") == 0 ||
 	    inet_pton(AF_INET6, host, &a) != 1))
 		ret = add_dstip(cmd, av, cblen, tstate);
 	if (ret == NULL && strcmp(av, "any") != 0)
 		ret = cmd;
 
 	return ret;
 }
 
 /*
  * Parse arguments and assemble the microinstructions which make up a rule.
  * Rules are added into the 'rulebuf' and then copied in the correct order
  * into the actual rule.
  *
  * The syntax for a rule starts with the action, followed by
  * optional action parameters, and the various match patterns.
  * In the assembled microcode, the first opcode must be an O_PROBE_STATE
  * (generated if the rule includes a keep-state option), then the
  * various match patterns, log/altq actions, and the actual action.
  *
  */
 void
 compile_rule(char *av[], uint32_t *rbuf, int *rbufsize, struct tidx *tstate)
 {
 	/*
 	 * rules are added into the 'rulebuf' and then copied in
 	 * the correct order into the actual rule.
 	 * Some things that need to go out of order (prob, action etc.)
 	 * go into actbuf[].
 	 */
 	static uint32_t actbuf[255], cmdbuf[255];
 	int rblen, ablen, cblen;
 
 	ipfw_insn *src, *dst, *cmd, *action, *prev=NULL;
 	ipfw_insn *first_cmd;	/* first match pattern */
 
 	struct ip_fw_rule *rule;
 
 	/*
 	 * various flags used to record that we entered some fields.
 	 */
 	ipfw_insn *have_state = NULL;	/* any state-related option */
 	int have_rstate = 0;
 	ipfw_insn *have_log = NULL, *have_altq = NULL, *have_tag = NULL;
 	ipfw_insn *have_skipcmd = NULL;
 	size_t len;
 
 	int i;
 
 	int open_par = 0;	/* open parenthesis ( */
 
 	/* proto is here because it is used to fetch ports */
 	u_char proto = IPPROTO_IP;	/* default protocol */
 
 	double match_prob = 1; /* match probability, default is always match */
 
 	bzero(actbuf, sizeof(actbuf));		/* actions go here */
 	bzero(cmdbuf, sizeof(cmdbuf));
 	bzero(rbuf, *rbufsize);
 
 	rule = (struct ip_fw_rule *)rbuf;
 	cmd = (ipfw_insn *)cmdbuf;
 	action = (ipfw_insn *)actbuf;
 
 	rblen = *rbufsize / sizeof(uint32_t);
 	rblen -= sizeof(struct ip_fw_rule) / sizeof(uint32_t);
 	ablen = sizeof(actbuf) / sizeof(actbuf[0]);
 	cblen = sizeof(cmdbuf) / sizeof(cmdbuf[0]);
 	cblen -= F_INSN_SIZE(ipfw_insn_u32) + 1;
 
 #define	CHECK_RBUFLEN(len)	{ CHECK_LENGTH(rblen, len); rblen -= len; }
 #define	CHECK_ACTLEN		CHECK_LENGTH(ablen, action->len)
 
 	av++;
 
 	/* [rule N]	-- Rule number optional */
 	if (av[0] && isdigit(**av)) {
 		rule->rulenum = atoi(*av);
 		av++;
 	}
 
 	/* [set N]	-- set number (0..RESVD_SET), optional */
 	if (av[0] && av[1] && _substrcmp(*av, "set") == 0) {
 		int set = strtoul(av[1], NULL, 10);
 		if (set < 0 || set > RESVD_SET)
 			errx(EX_DATAERR, "illegal set %s", av[1]);
 		rule->set = set;
 		tstate->set = set;
 		av += 2;
 	}
 
 	/* [prob D]	-- match probability, optional */
 	if (av[0] && av[1] && _substrcmp(*av, "prob") == 0) {
 		match_prob = strtod(av[1], NULL);
 
 		if (match_prob <= 0 || match_prob > 1)
 			errx(EX_DATAERR, "illegal match prob. %s", av[1]);
 		av += 2;
 	}
 
 	/* action	-- mandatory */
 	NEED1("missing action");
 	i = match_token(rule_actions, *av);
 	av++;
 	action->len = 1;	/* default */
 	CHECK_ACTLEN;
 	switch(i) {
 	case TOK_CHECKSTATE:
 		have_state = action;
 		action->opcode = O_CHECK_STATE;
 		if (*av == NULL ||
 		    match_token(rule_options, *av) == TOK_COMMENT) {
 			action->arg1 = pack_object(tstate,
 			    default_state_name, IPFW_TLV_STATE_NAME);
 			break;
 		}
 		if (*av[0] == ':') {
 			if (strcmp(*av + 1, "any") == 0)
 				action->arg1 = 0;
 			else if (state_check_name(*av + 1) == 0)
 				action->arg1 = pack_object(tstate, *av + 1,
 				    IPFW_TLV_STATE_NAME);
 			else
 				errx(EX_DATAERR, "Invalid state name %s",
 				    *av);
 			av++;
 			break;
 		}
 		errx(EX_DATAERR, "Invalid state name %s", *av);
 		break;
 
 	case TOK_ABORT:
 		action->opcode = O_REJECT;
 		action->arg1 = ICMP_REJECT_ABORT;
 		break;
 
 	case TOK_ABORT6:
 		action->opcode = O_UNREACH6;
 		action->arg1 = ICMP6_UNREACH_ABORT;
 		break;
 
 	case TOK_ACCEPT:
 		action->opcode = O_ACCEPT;
 		break;
 
 	case TOK_DENY:
 		action->opcode = O_DENY;
 		action->arg1 = 0;
 		break;
 
 	case TOK_REJECT:
 		action->opcode = O_REJECT;
 		action->arg1 = ICMP_UNREACH_HOST;
 		break;
 
 	case TOK_RESET:
 		action->opcode = O_REJECT;
 		action->arg1 = ICMP_REJECT_RST;
 		break;
 
 	case TOK_RESET6:
 		action->opcode = O_UNREACH6;
 		action->arg1 = ICMP6_UNREACH_RST;
 		break;
 
 	case TOK_UNREACH:
 		action->opcode = O_REJECT;
 		NEED1("missing reject code");
 		fill_reject_code(&action->arg1, *av);
 		av++;
 		break;
 
 	case TOK_UNREACH6:
 		action->opcode = O_UNREACH6;
 		NEED1("missing unreach code");
 		fill_unreach6_code(&action->arg1, *av);
 		av++;
 		break;
 
 	case TOK_COUNT:
 		action->opcode = O_COUNT;
 		break;
 
 	case TOK_NAT:
 		action->opcode = O_NAT;
 		action->len = F_INSN_SIZE(ipfw_insn_nat);
 		CHECK_ACTLEN;
 		if (*av != NULL && _substrcmp(*av, "global") == 0) {
 			action->arg1 = IP_FW_NAT44_GLOBAL;
 			av++;
 			break;
 		} else
 			goto chkarg;
 	case TOK_QUEUE:
 		action->opcode = O_QUEUE;
 		goto chkarg;
 	case TOK_PIPE:
 		action->opcode = O_PIPE;
 		goto chkarg;
 	case TOK_SKIPTO:
 		action->opcode = O_SKIPTO;
 		goto chkarg;
 	case TOK_NETGRAPH:
 		action->opcode = O_NETGRAPH;
 		goto chkarg;
 	case TOK_NGTEE:
 		action->opcode = O_NGTEE;
 		goto chkarg;
 	case TOK_DIVERT:
 		action->opcode = O_DIVERT;
 		goto chkarg;
 	case TOK_TEE:
 		action->opcode = O_TEE;
 		goto chkarg;
 	case TOK_CALL:
 		action->opcode = O_CALLRETURN;
 chkarg:
 		if (!av[0])
 			errx(EX_USAGE, "missing argument for %s", *(av - 1));
 		if (isdigit(**av)) {
 			action->arg1 = strtoul(*av, NULL, 10);
 			if (action->arg1 <= 0 || action->arg1 >= IP_FW_TABLEARG)
 				errx(EX_DATAERR, "illegal argument for %s",
 				    *(av - 1));
 		} else if (_substrcmp(*av, "tablearg") == 0) {
 			action->arg1 = IP_FW_TARG;
 		} else if (i == TOK_DIVERT || i == TOK_TEE) {
 			struct servent *s;
 			setservent(1);
 			s = getservbyname(av[0], "divert");
 			if (s != NULL)
 				action->arg1 = ntohs(s->s_port);
 			else
 				errx(EX_DATAERR, "illegal divert/tee port");
 		} else
 			errx(EX_DATAERR, "illegal argument for %s", *(av - 1));
 		av++;
 		break;
 
 	case TOK_FORWARD: {
 		/*
 		 * Locate the address-port separator (':' or ',').
 		 * Could be one of the following:
 		 *	hostname:port
 		 *	IPv4 a.b.c.d,port
 		 *	IPv4 a.b.c.d:port
 		 *	IPv6 w:x:y::z,port
 		 * The ':' can only be used with hostname and IPv4 address.
 		 * XXX-BZ Should we also support [w:x:y::z]:port?
 		 */
 		struct sockaddr_storage result;
 		struct addrinfo *res;
 		char *s, *end;
 		int family;
 		u_short port_number;
 
 		NEED1("missing forward address[:port]");
 
 		/*
 		 * locate the address-port separator (':' or ',')
 		 */
 		s = strchr(*av, ',');
 		if (s == NULL) {
 			/* Distinguish between IPv4:port and IPv6 cases. */
 			s = strchr(*av, ':');
 			if (s && strchr(s+1, ':'))
 				s = NULL; /* no port */
 		}
 
 		port_number = 0;
 		if (s != NULL) {
 			/* Terminate host portion and set s to start of port. */
 			*(s++) = '\0';
 			i = strtoport(s, &end, 0 /* base */, 0 /* proto */);
 			if (s == end)
 				errx(EX_DATAERR,
 				    "illegal forwarding port ``%s''", s);
 			port_number = (u_short)i;
 		}
 
 		if (_substrcmp(*av, "tablearg") == 0) {
 			family = PF_INET;
 			((struct sockaddr_in*)&result)->sin_addr.s_addr =
 			    INADDR_ANY;
 		} else {
 			/*
 			 * Resolve the host name or address to a family and a
 			 * network representation of the address.
 			 */
 			if (getaddrinfo(*av, NULL, NULL, &res))
 				errx(EX_DATAERR, NULL);
 			/* Just use the first host in the answer. */
 			family = res->ai_family;
 			memcpy(&result, res->ai_addr, res->ai_addrlen);
 			freeaddrinfo(res);
 		}
 
  		if (family == PF_INET) {
 			ipfw_insn_sa *p = (ipfw_insn_sa *)action;
 
 			action->opcode = O_FORWARD_IP;
 			action->len = F_INSN_SIZE(ipfw_insn_sa);
 			CHECK_ACTLEN;
 
 			/*
 			 * In the kernel we assume AF_INET and use only
 			 * sin_port and sin_addr. Remember to set sin_len as
 			 * the routing code seems to use it too.
 			 */
 			p->sa.sin_len = sizeof(struct sockaddr_in);
 			p->sa.sin_family = AF_INET;
 			p->sa.sin_port = port_number;
 			p->sa.sin_addr.s_addr =
 			     ((struct sockaddr_in *)&result)->sin_addr.s_addr;
 		} else if (family == PF_INET6) {
 			ipfw_insn_sa6 *p = (ipfw_insn_sa6 *)action;
 
 			action->opcode = O_FORWARD_IP6;
 			action->len = F_INSN_SIZE(ipfw_insn_sa6);
 			CHECK_ACTLEN;
 
 			p->sa.sin6_len = sizeof(struct sockaddr_in6);
 			p->sa.sin6_family = AF_INET6;
 			p->sa.sin6_port = port_number;
 			p->sa.sin6_flowinfo = 0;
 			p->sa.sin6_scope_id =
 			    ((struct sockaddr_in6 *)&result)->sin6_scope_id;
 			bcopy(&((struct sockaddr_in6*)&result)->sin6_addr,
 			    &p->sa.sin6_addr, sizeof(p->sa.sin6_addr));
 		} else {
 			errx(EX_DATAERR, "Invalid address family in forward action");
 		}
 		av++;
 		break;
 	    }
 	case TOK_COMMENT:
 		/* pretend it is a 'count' rule followed by the comment */
 		action->opcode = O_COUNT;
 		av--;		/* go back... */
 		break;
 
 	case TOK_SETFIB:
 	    {
 		int numfibs;
 		size_t intsize = sizeof(int);
 
 		action->opcode = O_SETFIB;
 		NEED1("missing fib number");
 		if (_substrcmp(*av, "tablearg") == 0) {
 			action->arg1 = IP_FW_TARG;
 		} else {
 		        action->arg1 = strtoul(*av, NULL, 10);
 			if (sysctlbyname("net.fibs", &numfibs, &intsize,
 			    NULL, 0) == -1)
 				errx(EX_DATAERR, "fibs not suported.\n");
 			if (action->arg1 >= numfibs)  /* Temporary */
 				errx(EX_DATAERR, "fib too large.\n");
 			/* Add high-order bit to fib to make room for tablearg*/
 			action->arg1 |= 0x8000;
 		}
 		av++;
 		break;
 	    }
 
 	case TOK_SETDSCP:
 	    {
 		int code;
 
 		action->opcode = O_SETDSCP;
 		NEED1("missing DSCP code");
 		if (_substrcmp(*av, "tablearg") == 0) {
 			action->arg1 = IP_FW_TARG;
 		} else {
 			if (isalpha(*av[0])) {
 				if ((code = match_token(f_ipdscp, *av)) == -1)
 					errx(EX_DATAERR, "Unknown DSCP code");
 				action->arg1 = code;
 			} else
 			        action->arg1 = strtoul(*av, NULL, 10);
 			/*
 			 * Add high-order bit to DSCP to make room
 			 * for tablearg
 			 */
 			action->arg1 |= 0x8000;
 		}
 		av++;
 		break;
 	    }
 
 	case TOK_REASS:
 		action->opcode = O_REASS;
 		break;
 
 	case TOK_RETURN:
 		fill_cmd(action, O_CALLRETURN, F_NOT, 0);
 		break;
 
 	case TOK_TCPSETMSS: {
 		u_long mss;
 		uint16_t idx;
 
 		idx = pack_object(tstate, "tcp-setmss", IPFW_TLV_EACTION);
 		if (idx == 0)
 			errx(EX_DATAERR, "pack_object failed");
 		fill_cmd(action, O_EXTERNAL_ACTION, 0, idx);
 		NEED1("Missing MSS value");
 		action = next_cmd(action, &ablen);
 		action->len = 1;
 		CHECK_ACTLEN;
 		mss = strtoul(*av, NULL, 10);
 		if (mss == 0 || mss > UINT16_MAX)
 			errx(EX_USAGE, "invalid MSS value %s", *av);
 		fill_cmd(action, O_EXTERNAL_DATA, 0, (uint16_t)mss);
 		av++;
 		break;
 	}
 
 	default:
 		av--;
 		if (match_token(rule_eactions, *av) == -1)
 			errx(EX_DATAERR, "invalid action %s\n", *av);
 		/*
 		 * External actions support.
 		 * XXX: we support only syntax with instance name.
 		 *	For known external actions (from rule_eactions list)
 		 *	we can handle syntax directly. But with `eaction'
 		 *	keyword we can use only `eaction <name> <instance>'
 		 *	syntax.
 		 */
 	case TOK_EACTION: {
 		uint16_t idx;
 
 		NEED1("Missing eaction name");
 		if (eaction_check_name(*av) != 0)
 			errx(EX_DATAERR, "Invalid eaction name %s", *av);
 		idx = pack_object(tstate, *av, IPFW_TLV_EACTION);
 		if (idx == 0)
 			errx(EX_DATAERR, "pack_object failed");
 		fill_cmd(action, O_EXTERNAL_ACTION, 0, idx);
 		av++;
 		NEED1("Missing eaction instance name");
 		action = next_cmd(action, &ablen);
 		action->len = 1;
 		CHECK_ACTLEN;
 		if (eaction_check_name(*av) != 0)
 			errx(EX_DATAERR, "Invalid eaction instance name %s",
 			    *av);
 		/*
 		 * External action instance object has TLV type depended
 		 * from the external action name object index. Since we
 		 * currently don't know this index, use zero as TLV type.
 		 */
 		idx = pack_object(tstate, *av, 0);
 		if (idx == 0)
 			errx(EX_DATAERR, "pack_object failed");
 		fill_cmd(action, O_EXTERNAL_INSTANCE, 0, idx);
 		av++;
 		}
 	}
 	action = next_cmd(action, &ablen);
 
 	/*
 	 * [altq queuename] -- altq tag, optional
 	 * [log [logamount N]]	-- log, optional
 	 *
 	 * If they exist, it go first in the cmdbuf, but then it is
 	 * skipped in the copy section to the end of the buffer.
 	 */
 	while (av[0] != NULL && (i = match_token(rule_action_params, *av)) != -1) {
 		av++;
 		switch (i) {
 		case TOK_LOG:
 		    {
 			ipfw_insn_log *c = (ipfw_insn_log *)cmd;
 			int l;
 
 			if (have_log)
 				errx(EX_DATAERR,
 				    "log cannot be specified more than once");
 			have_log = (ipfw_insn *)c;
 			cmd->len = F_INSN_SIZE(ipfw_insn_log);
 			CHECK_CMDLEN;
 			cmd->opcode = O_LOG;
 			if (av[0] && _substrcmp(*av, "logamount") == 0) {
 				av++;
 				NEED1("logamount requires argument");
 				l = atoi(*av);
 				if (l < 0)
 					errx(EX_DATAERR,
 					    "logamount must be positive");
 				c->max_log = l;
 				av++;
 			} else {
 				len = sizeof(c->max_log);
 				if (sysctlbyname("net.inet.ip.fw.verbose_limit",
 				    &c->max_log, &len, NULL, 0) == -1) {
 					if (co.test_only) {
 						c->max_log = 0;
 						break;
 					}
 					errx(1, "sysctlbyname(\"%s\")",
 					    "net.inet.ip.fw.verbose_limit");
 				}
 			}
 		    }
 			break;
 
 #ifndef NO_ALTQ
 		case TOK_ALTQ:
 		    {
 			ipfw_insn_altq *a = (ipfw_insn_altq *)cmd;
 
 			NEED1("missing altq queue name");
 			if (have_altq)
 				errx(EX_DATAERR,
 				    "altq cannot be specified more than once");
 			have_altq = (ipfw_insn *)a;
 			cmd->len = F_INSN_SIZE(ipfw_insn_altq);
 			CHECK_CMDLEN;
 			cmd->opcode = O_ALTQ;
 			a->qid = altq_name_to_qid(*av);
 			av++;
 		    }
 			break;
 #endif
 
 		case TOK_TAG:
 		case TOK_UNTAG: {
 			uint16_t tag;
 
 			if (have_tag)
 				errx(EX_USAGE, "tag and untag cannot be "
 				    "specified more than once");
 			GET_UINT_ARG(tag, IPFW_ARG_MIN, IPFW_ARG_MAX, i,
 			   rule_action_params);
 			have_tag = cmd;
 			fill_cmd(cmd, O_TAG, (i == TOK_TAG) ? 0: F_NOT, tag);
 			av++;
 			break;
 		}
 
 		default:
 			abort();
 		}
 		cmd = next_cmd(cmd, &cblen);
 	}
 
 	if (have_state)	{ /* must be a check-state, we are done */
 		if (*av != NULL &&
 		    match_token(rule_options, *av) == TOK_COMMENT) {
 			/* check-state has a comment */
 			av++;
 			fill_comment(cmd, av, cblen);
 			cmd = next_cmd(cmd, &cblen);
 			av[0] = NULL;
 		}
 		goto done;
 	}
 
 #define OR_START(target)					\
 	if (av[0] && (*av[0] == '(' || *av[0] == '{')) { 	\
 		if (open_par)					\
 			errx(EX_USAGE, "nested \"(\" not allowed\n"); \
 		prev = NULL;					\
 		open_par = 1;					\
 		if ( (av[0])[1] == '\0') {			\
 			av++;					\
 		} else						\
 			(*av)++;				\
 	}							\
 	target:							\
 
 
 #define	CLOSE_PAR						\
 	if (open_par) {						\
 		if (av[0] && (					\
 		    strcmp(*av, ")") == 0 ||			\
 		    strcmp(*av, "}") == 0)) {			\
 			prev = NULL;				\
 			open_par = 0;				\
 			av++;					\
 		} else						\
 			errx(EX_USAGE, "missing \")\"\n");	\
 	}
 
 #define NOT_BLOCK						\
 	if (av[0] && _substrcmp(*av, "not") == 0) {		\
 		if (cmd->len & F_NOT)				\
 			errx(EX_USAGE, "double \"not\" not allowed\n"); \
 		cmd->len |= F_NOT;				\
 		av++;						\
 	}
 
 #define OR_BLOCK(target)					\
 	if (av[0] && _substrcmp(*av, "or") == 0) {		\
 		if (prev == NULL || open_par == 0)		\
 			errx(EX_DATAERR, "invalid OR block");	\
 		prev->len |= F_OR;				\
 		av++;					\
 		goto target;					\
 	}							\
 	CLOSE_PAR;
 
 	first_cmd = cmd;
 
 #if 0
 	/*
 	 * MAC addresses, optional.
 	 * If we have this, we skip the part "proto from src to dst"
 	 * and jump straight to the option parsing.
 	 */
 	NOT_BLOCK;
 	NEED1("missing protocol");
 	if (_substrcmp(*av, "MAC") == 0 ||
 	    _substrcmp(*av, "mac") == 0) {
 		av++;			/* the "MAC" keyword */
 		add_mac(cmd, av);	/* exits in case of errors */
 		cmd = next_cmd(cmd);
 		av += 2;		/* dst-mac and src-mac */
 		NOT_BLOCK;
 		NEED1("missing mac type");
 		if (add_mactype(cmd, av[0]))
 			cmd = next_cmd(cmd);
 		av++;			/* any or mac-type */
 		goto read_options;
 	}
 #endif
 
 	/*
 	 * protocol, mandatory
 	 */
     OR_START(get_proto);
 	NOT_BLOCK;
 	NEED1("missing protocol");
 	if (add_proto_compat(cmd, *av, &proto)) {
 		av++;
 		if (F_LEN(cmd) != 0) {
 			prev = cmd;
 			cmd = next_cmd(cmd, &cblen);
 		}
 	} else if (first_cmd != cmd) {
 		errx(EX_DATAERR, "invalid protocol ``%s''", *av);
 	} else {
 		rule->flags |= IPFW_RULE_JUSTOPTS;
 		goto read_options;
 	}
     OR_BLOCK(get_proto);
 
 	/*
 	 * "from", mandatory
 	 */
 	if ((av[0] == NULL) || _substrcmp(*av, "from") != 0)
 		errx(EX_USAGE, "missing ``from''");
 	av++;
 
 	/*
 	 * source IP, mandatory
 	 */
     OR_START(source_ip);
 	NOT_BLOCK;	/* optional "not" */
 	NEED1("missing source address");
 	if (add_src(cmd, *av, proto, cblen, tstate)) {
 		av++;
 		if (F_LEN(cmd) != 0) {	/* ! any */
 			prev = cmd;
 			cmd = next_cmd(cmd, &cblen);
 		}
 	} else
 		errx(EX_USAGE, "bad source address %s", *av);
     OR_BLOCK(source_ip);
 
 	/*
 	 * source ports, optional
 	 */
 	NOT_BLOCK;	/* optional "not" */
 	if ( av[0] != NULL ) {
 		if (_substrcmp(*av, "any") == 0 ||
 		    add_ports(cmd, *av, proto, O_IP_SRCPORT, cblen)) {
 			av++;
 			if (F_LEN(cmd) != 0)
 				cmd = next_cmd(cmd, &cblen);
 		}
 	}
 
 	/*
 	 * "to", mandatory
 	 */
 	if ( (av[0] == NULL) || _substrcmp(*av, "to") != 0 )
 		errx(EX_USAGE, "missing ``to''");
 	av++;
 
 	/*
 	 * destination, mandatory
 	 */
     OR_START(dest_ip);
 	NOT_BLOCK;	/* optional "not" */
 	NEED1("missing dst address");
 	if (add_dst(cmd, *av, proto, cblen, tstate)) {
 		av++;
 		if (F_LEN(cmd) != 0) {	/* ! any */
 			prev = cmd;
 			cmd = next_cmd(cmd, &cblen);
 		}
 	} else
 		errx( EX_USAGE, "bad destination address %s", *av);
     OR_BLOCK(dest_ip);
 
 	/*
 	 * dest. ports, optional
 	 */
 	NOT_BLOCK;	/* optional "not" */
 	if (av[0]) {
 		if (_substrcmp(*av, "any") == 0 ||
 		    add_ports(cmd, *av, proto, O_IP_DSTPORT, cblen)) {
 			av++;
 			if (F_LEN(cmd) != 0)
 				cmd = next_cmd(cmd, &cblen);
 		}
 	}
 
 read_options:
 	prev = NULL;
 	while ( av[0] != NULL ) {
 		char *s;
 		ipfw_insn_u32 *cmd32;	/* alias for cmd */
 
 		s = *av;
 		cmd32 = (ipfw_insn_u32 *)cmd;
 
 		if (*s == '!') {	/* alternate syntax for NOT */
 			if (cmd->len & F_NOT)
 				errx(EX_USAGE, "double \"not\" not allowed\n");
 			cmd->len = F_NOT;
 			s++;
 		}
 		i = match_token(rule_options, s);
 		av++;
 		switch(i) {
 		case TOK_NOT:
 			if (cmd->len & F_NOT)
 				errx(EX_USAGE, "double \"not\" not allowed\n");
 			cmd->len = F_NOT;
 			break;
 
 		case TOK_OR:
 			if (open_par == 0 || prev == NULL)
 				errx(EX_USAGE, "invalid \"or\" block\n");
 			prev->len |= F_OR;
 			break;
 
 		case TOK_STARTBRACE:
 			if (open_par)
 				errx(EX_USAGE, "+nested \"(\" not allowed\n");
 			open_par = 1;
 			break;
 
 		case TOK_ENDBRACE:
 			if (!open_par)
 				errx(EX_USAGE, "+missing \")\"\n");
 			open_par = 0;
 			prev = NULL;
 			break;
 
 		case TOK_IN:
 			fill_cmd(cmd, O_IN, 0, 0);
 			break;
 
 		case TOK_OUT:
 			cmd->len ^= F_NOT; /* toggle F_NOT */
 			fill_cmd(cmd, O_IN, 0, 0);
 			break;
 
 		case TOK_DIVERTED:
 			fill_cmd(cmd, O_DIVERTED, 0, 3);
 			break;
 
 		case TOK_DIVERTEDLOOPBACK:
 			fill_cmd(cmd, O_DIVERTED, 0, 1);
 			break;
 
 		case TOK_DIVERTEDOUTPUT:
 			fill_cmd(cmd, O_DIVERTED, 0, 2);
 			break;
 
 		case TOK_FRAG:
 			fill_cmd(cmd, O_FRAG, 0, 0);
 			break;
 
 		case TOK_LAYER2:
 			fill_cmd(cmd, O_LAYER2, 0, 0);
 			break;
 
 		case TOK_XMIT:
 		case TOK_RECV:
 		case TOK_VIA:
 			NEED1("recv, xmit, via require interface name"
 				" or address");
 			fill_iface((ipfw_insn_if *)cmd, av[0], cblen, tstate);
 			av++;
 			if (F_LEN(cmd) == 0)	/* not a valid address */
 				break;
 			if (i == TOK_XMIT)
 				cmd->opcode = O_XMIT;
 			else if (i == TOK_RECV)
 				cmd->opcode = O_RECV;
 			else if (i == TOK_VIA)
 				cmd->opcode = O_VIA;
 			break;
 
 		case TOK_ICMPTYPES:
 			NEED1("icmptypes requires list of types");
 			fill_icmptypes((ipfw_insn_u32 *)cmd, *av);
 			av++;
 			break;
 
 		case TOK_ICMP6TYPES:
 			NEED1("icmptypes requires list of types");
 			fill_icmp6types((ipfw_insn_icmp6 *)cmd, *av, cblen);
 			av++;
 			break;
 
 		case TOK_IPTTL:
 			NEED1("ipttl requires TTL");
 			if (strpbrk(*av, "-,")) {
 			    if (!add_ports(cmd, *av, 0, O_IPTTL, cblen))
 				errx(EX_DATAERR, "invalid ipttl %s", *av);
 			} else
 			    fill_cmd(cmd, O_IPTTL, 0, strtoul(*av, NULL, 0));
 			av++;
 			break;
 
 		case TOK_IPID:
 			NEED1("ipid requires id");
 			if (strpbrk(*av, "-,")) {
 			    if (!add_ports(cmd, *av, 0, O_IPID, cblen))
 				errx(EX_DATAERR, "invalid ipid %s", *av);
 			} else
 			    fill_cmd(cmd, O_IPID, 0, strtoul(*av, NULL, 0));
 			av++;
 			break;
 
 		case TOK_IPLEN:
 			NEED1("iplen requires length");
 			if (strpbrk(*av, "-,")) {
 			    if (!add_ports(cmd, *av, 0, O_IPLEN, cblen))
 				errx(EX_DATAERR, "invalid ip len %s", *av);
 			} else
 			    fill_cmd(cmd, O_IPLEN, 0, strtoul(*av, NULL, 0));
 			av++;
 			break;
 
 		case TOK_IPVER:
 			NEED1("ipver requires version");
 			fill_cmd(cmd, O_IPVER, 0, strtoul(*av, NULL, 0));
 			av++;
 			break;
 
 		case TOK_IPPRECEDENCE:
 			NEED1("ipprecedence requires value");
 			fill_cmd(cmd, O_IPPRECEDENCE, 0,
 			    (strtoul(*av, NULL, 0) & 7) << 5);
 			av++;
 			break;
 
 		case TOK_DSCP:
 			NEED1("missing DSCP code");
 			fill_dscp(cmd, *av, cblen);
 			av++;
 			break;
 
 		case TOK_IPOPTS:
 			NEED1("missing argument for ipoptions");
 			fill_flags_cmd(cmd, O_IPOPT, f_ipopts, *av);
 			av++;
 			break;
 
 		case TOK_IPTOS:
 			NEED1("missing argument for iptos");
 			fill_flags_cmd(cmd, O_IPTOS, f_iptos, *av);
 			av++;
 			break;
 
 		case TOK_UID:
 			NEED1("uid requires argument");
 		    {
 			char *end;
 			uid_t uid;
 			struct passwd *pwd;
 
 			cmd->opcode = O_UID;
 			uid = strtoul(*av, &end, 0);
 			pwd = (*end == '\0') ? getpwuid(uid) : getpwnam(*av);
 			if (pwd == NULL)
 				errx(EX_DATAERR, "uid \"%s\" nonexistent", *av);
 			cmd32->d[0] = pwd->pw_uid;
 			cmd->len |= F_INSN_SIZE(ipfw_insn_u32);
 			av++;
 		    }
 			break;
 
 		case TOK_GID:
 			NEED1("gid requires argument");
 		    {
 			char *end;
 			gid_t gid;
 			struct group *grp;
 
 			cmd->opcode = O_GID;
 			gid = strtoul(*av, &end, 0);
 			grp = (*end == '\0') ? getgrgid(gid) : getgrnam(*av);
 			if (grp == NULL)
 				errx(EX_DATAERR, "gid \"%s\" nonexistent", *av);
 			cmd32->d[0] = grp->gr_gid;
 			cmd->len |= F_INSN_SIZE(ipfw_insn_u32);
 			av++;
 		    }
 			break;
 
 		case TOK_JAIL:
 			NEED1("jail requires argument");
 		    {
 			int jid;
 
 			cmd->opcode = O_JAIL;
 			jid = jail_getid(*av);
 			if (jid < 0)
 				errx(EX_DATAERR, "%s", jail_errmsg);
 			cmd32->d[0] = (uint32_t)jid;
 			cmd->len |= F_INSN_SIZE(ipfw_insn_u32);
 			av++;
 		    }
 			break;
 
 		case TOK_ESTAB:
 			fill_cmd(cmd, O_ESTAB, 0, 0);
 			break;
 
 		case TOK_SETUP:
 			fill_cmd(cmd, O_TCPFLAGS, 0,
 				(TH_SYN) | ( (TH_ACK) & 0xff) <<8 );
 			break;
 
 		case TOK_TCPDATALEN:
 			NEED1("tcpdatalen requires length");
 			if (strpbrk(*av, "-,")) {
 			    if (!add_ports(cmd, *av, 0, O_TCPDATALEN, cblen))
 				errx(EX_DATAERR, "invalid tcpdata len %s", *av);
 			} else
 			    fill_cmd(cmd, O_TCPDATALEN, 0,
 				    strtoul(*av, NULL, 0));
 			av++;
 			break;
 
 		case TOK_TCPOPTS:
 			NEED1("missing argument for tcpoptions");
 			fill_flags_cmd(cmd, O_TCPOPTS, f_tcpopts, *av);
 			av++;
 			break;
 
 		case TOK_TCPSEQ:
 		case TOK_TCPACK:
 			NEED1("tcpseq/tcpack requires argument");
 			cmd->len = F_INSN_SIZE(ipfw_insn_u32);
 			cmd->opcode = (i == TOK_TCPSEQ) ? O_TCPSEQ : O_TCPACK;
 			cmd32->d[0] = htonl(strtoul(*av, NULL, 0));
 			av++;
 			break;
 
 		case TOK_TCPWIN:
 			NEED1("tcpwin requires length");
 			if (strpbrk(*av, "-,")) {
 			    if (!add_ports(cmd, *av, 0, O_TCPWIN, cblen))
 				errx(EX_DATAERR, "invalid tcpwin len %s", *av);
 			} else
 			    fill_cmd(cmd, O_TCPWIN, 0,
 				    strtoul(*av, NULL, 0));
 			av++;
 			break;
 
 		case TOK_TCPFLAGS:
 			NEED1("missing argument for tcpflags");
 			cmd->opcode = O_TCPFLAGS;
 			fill_flags_cmd(cmd, O_TCPFLAGS, f_tcpflags, *av);
 			av++;
 			break;
 
 		case TOK_KEEPSTATE:
 		case TOK_RECORDSTATE: {
 			uint16_t uidx;
 
 			if (open_par)
 				errx(EX_USAGE, "keep-state or record-state cannot be part "
 				    "of an or block");
 			if (have_state)
 				errx(EX_USAGE, "only one of keep-state, record-state, "
 					" limit and set-limit is allowed");
 			if (*av != NULL && *av[0] == ':') {
 				if (state_check_name(*av + 1) != 0)
 					errx(EX_DATAERR,
 					    "Invalid state name %s", *av);
 				uidx = pack_object(tstate, *av + 1,
 				    IPFW_TLV_STATE_NAME);
 				av++;
 			} else
 				uidx = pack_object(tstate, default_state_name,
 				    IPFW_TLV_STATE_NAME);
 			have_state = cmd;
 			have_rstate = i == TOK_RECORDSTATE;
 			fill_cmd(cmd, O_KEEP_STATE, 0, uidx);
 			break;
 		}
 
 		case TOK_LIMIT:
 		case TOK_SETLIMIT: {
 			ipfw_insn_limit *c = (ipfw_insn_limit *)cmd;
 			int val;
 
 			if (open_par)
 				errx(EX_USAGE,
 				    "limit or set-limit cannot be part of an or block");
 			if (have_state)
 				errx(EX_USAGE, "only one of keep-state, record-state, "
 					" limit and set-limit is allowed");
 			have_state = cmd;
 			have_rstate = i == TOK_SETLIMIT;
 
 			cmd->len = F_INSN_SIZE(ipfw_insn_limit);
 			CHECK_CMDLEN;
 			cmd->opcode = O_LIMIT;
 			c->limit_mask = c->conn_limit = 0;
 
 			while ( av[0] != NULL ) {
 				if ((val = match_token(limit_masks, *av)) <= 0)
 					break;
 				c->limit_mask |= val;
 				av++;
 			}
 
 			if (c->limit_mask == 0)
 				errx(EX_USAGE, "limit: missing limit mask");
 
 			GET_UINT_ARG(c->conn_limit, IPFW_ARG_MIN, IPFW_ARG_MAX,
 			    TOK_LIMIT, rule_options);
 			av++;
 
 			if (*av != NULL && *av[0] == ':') {
 				if (state_check_name(*av + 1) != 0)
 					errx(EX_DATAERR,
 					    "Invalid state name %s", *av);
 				cmd->arg1 = pack_object(tstate, *av + 1,
 				    IPFW_TLV_STATE_NAME);
 				av++;
 			} else
 				cmd->arg1 = pack_object(tstate,
 				    default_state_name, IPFW_TLV_STATE_NAME);
 			break;
 		}
 
 		case TOK_PROTO:
 			NEED1("missing protocol");
 			if (add_proto(cmd, *av, &proto)) {
 				av++;
 			} else
 				errx(EX_DATAERR, "invalid protocol ``%s''",
 				    *av);
 			break;
 
 		case TOK_SRCIP:
 			NEED1("missing source IP");
 			if (add_srcip(cmd, *av, cblen, tstate)) {
 				av++;
 			}
 			break;
 
 		case TOK_DSTIP:
 			NEED1("missing destination IP");
 			if (add_dstip(cmd, *av, cblen, tstate)) {
 				av++;
 			}
 			break;
 
 		case TOK_SRCIP6:
 			NEED1("missing source IP6");
 			if (add_srcip6(cmd, *av, cblen, tstate)) {
 				av++;
 			}
 			break;
 
 		case TOK_DSTIP6:
 			NEED1("missing destination IP6");
 			if (add_dstip6(cmd, *av, cblen, tstate)) {
 				av++;
 			}
 			break;
 
 		case TOK_SRCPORT:
 			NEED1("missing source port");
 			if (_substrcmp(*av, "any") == 0 ||
 			    add_ports(cmd, *av, proto, O_IP_SRCPORT, cblen)) {
 				av++;
 			} else
 				errx(EX_DATAERR, "invalid source port %s", *av);
 			break;
 
 		case TOK_DSTPORT:
 			NEED1("missing destination port");
 			if (_substrcmp(*av, "any") == 0 ||
 			    add_ports(cmd, *av, proto, O_IP_DSTPORT, cblen)) {
 				av++;
 			} else
 				errx(EX_DATAERR, "invalid destination port %s",
 				    *av);
 			break;
 
 		case TOK_MAC:
 			if (add_mac(cmd, av, cblen))
 				av += 2;
 			break;
 
 		case TOK_MACTYPE:
 			NEED1("missing mac type");
 			if (!add_mactype(cmd, *av, cblen))
 				errx(EX_DATAERR, "invalid mac type %s", *av);
 			av++;
 			break;
 
 		case TOK_VERREVPATH:
 			fill_cmd(cmd, O_VERREVPATH, 0, 0);
 			break;
 
 		case TOK_VERSRCREACH:
 			fill_cmd(cmd, O_VERSRCREACH, 0, 0);
 			break;
 
 		case TOK_ANTISPOOF:
 			fill_cmd(cmd, O_ANTISPOOF, 0, 0);
 			break;
 
 		case TOK_IPSEC:
 			fill_cmd(cmd, O_IPSEC, 0, 0);
 			break;
 
 		case TOK_IPV6:
 			fill_cmd(cmd, O_IP6, 0, 0);
 			break;
 
 		case TOK_IPV4:
 			fill_cmd(cmd, O_IP4, 0, 0);
 			break;
 
 		case TOK_EXT6HDR:
 			fill_ext6hdr( cmd, *av );
 			av++;
 			break;
 
 		case TOK_FLOWID:
 			if (proto != IPPROTO_IPV6 )
 				errx( EX_USAGE, "flow-id filter is active "
 				    "only for ipv6 protocol\n");
 			fill_flow6( (ipfw_insn_u32 *) cmd, *av, cblen);
 			av++;
 			break;
 
 		case TOK_COMMENT:
 			fill_comment(cmd, av, cblen);
 			av[0]=NULL;
 			break;
 
 		case TOK_TAGGED:
 			if (av[0] && strpbrk(*av, "-,")) {
 				if (!add_ports(cmd, *av, 0, O_TAGGED, cblen))
 					errx(EX_DATAERR, "tagged: invalid tag"
 					    " list: %s", *av);
 			}
 			else {
 				uint16_t tag;
 
 				GET_UINT_ARG(tag, IPFW_ARG_MIN, IPFW_ARG_MAX,
 				    TOK_TAGGED, rule_options);
 				fill_cmd(cmd, O_TAGGED, 0, tag);
 			}
 			av++;
 			break;
 
 		case TOK_FIB:
 			NEED1("fib requires fib number");
 			fill_cmd(cmd, O_FIB, 0, strtoul(*av, NULL, 0));
 			av++;
 			break;
 		case TOK_SOCKARG:
 			fill_cmd(cmd, O_SOCKARG, 0, 0);
 			break;
 
 		case TOK_LOOKUP: {
 			ipfw_insn_u32 *c = (ipfw_insn_u32 *)cmd;
 			int j;
 
 			if (!av[0] || !av[1])
 				errx(EX_USAGE, "format: lookup argument tablenum");
 			cmd->opcode = O_IP_DST_LOOKUP;
 			cmd->len |= F_INSN_SIZE(ipfw_insn) + 2;
 			i = match_token(rule_options, *av);
 			for (j = 0; lookup_key[j] >= 0 ; j++) {
 				if (i == lookup_key[j])
 					break;
 			}
 			if (lookup_key[j] <= 0)
 				errx(EX_USAGE, "format: cannot lookup on %s", *av);
 			__PAST_END(c->d, 1) = j; // i converted to option
 			av++;
 
 			if ((j = pack_table(tstate, *av)) == 0)
 				errx(EX_DATAERR, "Invalid table name: %s", *av);
 
 			cmd->arg1 = j;
 			av++;
 		    }
 			break;
 		case TOK_FLOW:
 			NEED1("missing table name");
 			if (strncmp(*av, "table(", 6) != 0)
 				errx(EX_DATAERR,
 				    "enclose table name into \"table()\"");
 			fill_table(cmd, *av, O_IP_FLOW_LOOKUP, tstate);
 			av++;
 			break;
 
 		case TOK_SKIPACTION:
 			if (have_skipcmd)
 				errx(EX_USAGE, "only one defer-action "
 					"is allowed");
 			have_skipcmd = cmd;
 			fill_cmd(cmd, O_SKIP_ACTION, 0, 0);
 			break;
 
 		default:
 			errx(EX_USAGE, "unrecognised option [%d] %s\n", i, s);
 		}
 		if (F_LEN(cmd) > 0) {	/* prepare to advance */
 			prev = cmd;
 			cmd = next_cmd(cmd, &cblen);
 		}
 	}
 
 done:
 
 	if (!have_state && have_skipcmd)
 		warnx("Rule contains \"defer-immediate-action\" "
 			"and doesn't contain any state-related options.");
 
 	/*
 	 * Now copy stuff into the rule.
 	 * If we have a keep-state option, the first instruction
 	 * must be a PROBE_STATE (which is generated here).
 	 * If we have a LOG option, it was stored as the first command,
 	 * and now must be moved to the top of the action part.
 	 */
 	dst = (ipfw_insn *)rule->cmd;
 
 	/*
 	 * First thing to write into the command stream is the match probability.
 	 */
 	if (match_prob != 1) { /* 1 means always match */
 		dst->opcode = O_PROB;
 		dst->len = 2;
 		*((int32_t *)(dst+1)) = (int32_t)(match_prob * 0x7fffffff);
 		dst += dst->len;
 	}
 
 	/*
 	 * generate O_PROBE_STATE if necessary
 	 */
 	if (have_state && have_state->opcode != O_CHECK_STATE && !have_rstate) {
 		fill_cmd(dst, O_PROBE_STATE, 0, have_state->arg1);
 		dst = next_cmd(dst, &rblen);
 	}
 
 	/*
 	 * copy all commands but O_LOG, O_KEEP_STATE, O_LIMIT, O_ALTQ, O_TAG,
 	 * O_SKIP_ACTION
 	 */
 	for (src = (ipfw_insn *)cmdbuf; src != cmd; src += i) {
 		i = F_LEN(src);
 		CHECK_RBUFLEN(i);
 
 		switch (src->opcode) {
 		case O_LOG:
 		case O_KEEP_STATE:
 		case O_LIMIT:
 		case O_ALTQ:
 		case O_TAG:
 		case O_SKIP_ACTION:
 			break;
 		default:
 			bcopy(src, dst, i * sizeof(uint32_t));
 			dst += i;
 		}
 	}
 
 	/*
 	 * put back the have_state command as last opcode
 	 */
 	if (have_state && have_state->opcode != O_CHECK_STATE) {
 		i = F_LEN(have_state);
 		CHECK_RBUFLEN(i);
 		bcopy(have_state, dst, i * sizeof(uint32_t));
 		dst += i;
 	}
 
 	/*
 	 * put back the have_skipcmd command as very last opcode
 	 */
 	if (have_skipcmd) {
 		i = F_LEN(have_skipcmd);
 		CHECK_RBUFLEN(i);
 		bcopy(have_skipcmd, dst, i * sizeof(uint32_t));
 		dst += i;
 	}
 
 	/*
 	 * start action section
 	 */
 	rule->act_ofs = dst - rule->cmd;
 
 	/* put back O_LOG, O_ALTQ, O_TAG if necessary */
 	if (have_log) {
 		i = F_LEN(have_log);
 		CHECK_RBUFLEN(i);
 		bcopy(have_log, dst, i * sizeof(uint32_t));
 		dst += i;
 	}
 	if (have_altq) {
 		i = F_LEN(have_altq);
 		CHECK_RBUFLEN(i);
 		bcopy(have_altq, dst, i * sizeof(uint32_t));
 		dst += i;
 	}
 	if (have_tag) {
 		i = F_LEN(have_tag);
 		CHECK_RBUFLEN(i);
 		bcopy(have_tag, dst, i * sizeof(uint32_t));
 		dst += i;
 	}
 
 	/*
 	 * copy all other actions
 	 */
 	for (src = (ipfw_insn *)actbuf; src != action; src += i) {
 		i = F_LEN(src);
 		CHECK_RBUFLEN(i);
 		bcopy(src, dst, i * sizeof(uint32_t));
 		dst += i;
 	}
 
 	rule->cmd_len = (uint32_t *)dst - (uint32_t *)(rule->cmd);
 	*rbufsize = (char *)dst - (char *)rule;
 }
 
 static int
 compare_ntlv(const void *_a, const void *_b)
 {
 	ipfw_obj_ntlv *a, *b;
 
 	a = (ipfw_obj_ntlv *)_a;
 	b = (ipfw_obj_ntlv *)_b;
 
 	if (a->set < b->set)
 		return (-1);
 	else if (a->set > b->set)
 		return (1);
 
 	if (a->idx < b->idx)
 		return (-1);
 	else if (a->idx > b->idx)
 		return (1);
 
 	if (a->head.type < b->head.type)
 		return (-1);
 	else if (a->head.type > b->head.type)
 		return (1);
 
 	return (0);
 }
 
 /*
  * Provide kernel with sorted list of referenced objects
  */
 static void
 object_sort_ctlv(ipfw_obj_ctlv *ctlv)
 {
 
 	qsort(ctlv + 1, ctlv->count, ctlv->objsize, compare_ntlv);
 }
 
 struct object_kt {
 	uint16_t	uidx;
 	uint16_t	type;
 };
 static int
 compare_object_kntlv(const void *k, const void *v)
 {
 	ipfw_obj_ntlv *ntlv;
 	struct object_kt key;
 
 	key = *((struct object_kt *)k);
 	ntlv = (ipfw_obj_ntlv *)v;
 
 	if (key.uidx < ntlv->idx)
 		return (-1);
 	else if (key.uidx > ntlv->idx)
 		return (1);
 
 	if (key.type < ntlv->head.type)
 		return (-1);
 	else if (key.type > ntlv->head.type)
 		return (1);
 
 	return (0);
 }
 
 /*
  * Finds object name in @ctlv by @idx and @type.
  * Uses the following facts:
  * 1) All TLVs are the same size
  * 2) Kernel implementation provides already sorted list.
  *
  * Returns table name or NULL.
  */
 static char *
 object_search_ctlv(ipfw_obj_ctlv *ctlv, uint16_t idx, uint16_t type)
 {
 	ipfw_obj_ntlv *ntlv;
 	struct object_kt key;
 
 	key.uidx = idx;
 	key.type = type;
 
 	ntlv = bsearch(&key, (ctlv + 1), ctlv->count, ctlv->objsize,
 	    compare_object_kntlv);
 
 	if (ntlv != NULL)
 		return (ntlv->name);
 
 	return (NULL);
 }
 
 static char *
 table_search_ctlv(ipfw_obj_ctlv *ctlv, uint16_t idx)
 {
 
 	return (object_search_ctlv(ctlv, idx, IPFW_TLV_TBL_NAME));
 }
 
 /*
  * Adds one or more rules to ipfw chain.
  * Data layout:
  * Request:
  * [
  *   ip_fw3_opheader
  *   [ ipfw_obj_ctlv(IPFW_TLV_TBL_LIST) ipfw_obj_ntlv x N ] (optional *1)
  *   [ ipfw_obj_ctlv(IPFW_TLV_RULE_LIST) [ ip_fw_rule ip_fw_insn ] x N ] (*2) (*3)
  * ]
  * Reply:
  * [
  *   ip_fw3_opheader
  *   [ ipfw_obj_ctlv(IPFW_TLV_TBL_LIST) ipfw_obj_ntlv x N ] (optional)
  *   [ ipfw_obj_ctlv(IPFW_TLV_RULE_LIST) [ ip_fw_rule ip_fw_insn ] x N ]
  * ]
  *
  * Rules in reply are modified to store their actual ruleset number.
  *
  * (*1) TLVs inside IPFW_TLV_TBL_LIST needs to be sorted ascending
  * according to their idx field and there has to be no duplicates.
  * (*2) Numbered rules inside IPFW_TLV_RULE_LIST needs to be sorted ascending.
  * (*3) Each ip_fw structure needs to be aligned to u64 boundary.
  */
 void
 ipfw_add(char *av[])
 {
 	uint32_t rulebuf[1024];
 	int rbufsize, default_off, tlen, rlen;
 	size_t sz;
 	struct tidx ts;
 	struct ip_fw_rule *rule;
 	caddr_t tbuf;
 	ip_fw3_opheader *op3;
 	ipfw_obj_ctlv *ctlv, *tstate;
 
 	rbufsize = sizeof(rulebuf);
 	memset(rulebuf, 0, rbufsize);
 	memset(&ts, 0, sizeof(ts));
 
 	/* Optimize case with no tables */
 	default_off = sizeof(ipfw_obj_ctlv) + sizeof(ip_fw3_opheader);
 	op3 = (ip_fw3_opheader *)rulebuf;
 	ctlv = (ipfw_obj_ctlv *)(op3 + 1);
 	rule = (struct ip_fw_rule *)(ctlv + 1);
 	rbufsize -= default_off;
 
 	compile_rule(av, (uint32_t *)rule, &rbufsize, &ts);
 	/* Align rule size to u64 boundary */
 	rlen = roundup2(rbufsize, sizeof(uint64_t));
 
 	tbuf = NULL;
 	sz = 0;
 	tstate = NULL;
 	if (ts.count != 0) {
 		/* Some tables. We have to alloc more data */
 		tlen = ts.count * sizeof(ipfw_obj_ntlv);
 		sz = default_off + sizeof(ipfw_obj_ctlv) + tlen + rlen;
 
 		if ((tbuf = calloc(1, sz)) == NULL)
 			err(EX_UNAVAILABLE, "malloc() failed for IP_FW_ADD");
 		op3 = (ip_fw3_opheader *)tbuf;
 		/* Tables first */
 		ctlv = (ipfw_obj_ctlv *)(op3 + 1);
 		ctlv->head.type = IPFW_TLV_TBLNAME_LIST;
 		ctlv->head.length = sizeof(ipfw_obj_ctlv) + tlen;
 		ctlv->count = ts.count;
 		ctlv->objsize = sizeof(ipfw_obj_ntlv);
 		memcpy(ctlv + 1, ts.idx, tlen);
 		object_sort_ctlv(ctlv);
 		tstate = ctlv;
 		/* Rule next */
 		ctlv = (ipfw_obj_ctlv *)((caddr_t)ctlv + ctlv->head.length);
 		ctlv->head.type = IPFW_TLV_RULE_LIST;
 		ctlv->head.length = sizeof(ipfw_obj_ctlv) + rlen;
 		ctlv->count = 1;
 		memcpy(ctlv + 1, rule, rbufsize);
 	} else {
 		/* Simply add header */
 		sz = rlen + default_off;
 		memset(ctlv, 0, sizeof(*ctlv));
 		ctlv->head.type = IPFW_TLV_RULE_LIST;
 		ctlv->head.length = sizeof(ipfw_obj_ctlv) + rlen;
 		ctlv->count = 1;
 	}
 
 	if (do_get3(IP_FW_XADD, op3, &sz) != 0)
 		err(EX_UNAVAILABLE, "getsockopt(%s)", "IP_FW_XADD");
 
 	if (!co.do_quiet) {
 		struct format_opts sfo;
 		struct buf_pr bp;
 		memset(&sfo, 0, sizeof(sfo));
 		sfo.tstate = tstate;
 		sfo.set_mask = (uint32_t)(-1);
 		bp_alloc(&bp, 4096);
 		show_static_rule(&co, &sfo, &bp, rule, NULL);
 		printf("%s", bp.buf);
 		bp_free(&bp);
 	}
 
 	if (tbuf != NULL)
 		free(tbuf);
 
 	if (ts.idx != NULL)
 		free(ts.idx);
 }
 
 /*
  * clear the counters or the log counters.
  * optname has the following values:
  *  0 (zero both counters and logging)
  *  1 (zero logging only)
  */
 void
 ipfw_zero(int ac, char *av[], int optname)
 {
 	ipfw_range_tlv rt;
 	char const *errstr;
 	char const *name = optname ? "RESETLOG" : "ZERO";
 	uint32_t arg;
 	int failed = EX_OK;
 
 	optname = optname ? IP_FW_XRESETLOG : IP_FW_XZERO;
 	av++; ac--;
 
 	if (ac == 0) {
 		/* clear all entries */
 		memset(&rt, 0, sizeof(rt));
 		rt.flags = IPFW_RCFLAG_ALL;
 		if (do_range_cmd(optname, &rt) < 0)
 			err(EX_UNAVAILABLE, "setsockopt(IP_FW_X%s)", name);
 		if (!co.do_quiet)
 			printf("%s.\n", optname == IP_FW_XZERO ?
 			    "Accounting cleared":"Logging counts reset");
 
 		return;
 	}
 
 	while (ac) {
 		/* Rule number */
 		if (isdigit(**av)) {
 			arg = strtonum(*av, 0, 0xffff, &errstr);
 			if (errstr)
 				errx(EX_DATAERR,
 				    "invalid rule number %s\n", *av);
 			memset(&rt, 0, sizeof(rt));
 			rt.start_rule = arg;
 			rt.end_rule = arg;
 			rt.flags |= IPFW_RCFLAG_RANGE;
 			if (co.use_set != 0) {
 				rt.set = co.use_set - 1;
 				rt.flags |= IPFW_RCFLAG_SET;
 			}
 			if (do_range_cmd(optname, &rt) != 0) {
 				warn("rule %u: setsockopt(IP_FW_X%s)",
 				    arg, name);
 				failed = EX_UNAVAILABLE;
 			} else if (rt.new_set == 0) {
 				printf("Entry %d not found\n", arg);
 				failed = EX_UNAVAILABLE;
 			} else if (!co.do_quiet)
 				printf("Entry %d %s.\n", arg,
 				    optname == IP_FW_XZERO ?
 					"cleared" : "logging count reset");
 		} else {
 			errx(EX_USAGE, "invalid rule number ``%s''", *av);
 		}
 		av++; ac--;
 	}
 	if (failed != EX_OK)
 		exit(failed);
 }
 
 void
 ipfw_flush(int force)
 {
 	ipfw_range_tlv rt;
 
 	if (!force && !co.do_quiet) { /* need to ask user */
 		int c;
 
 		printf("Are you sure? [yn] ");
 		fflush(stdout);
 		do {
 			c = toupper(getc(stdin));
 			while (c != '\n' && getc(stdin) != '\n')
 				if (feof(stdin))
 					return; /* and do not flush */
 		} while (c != 'Y' && c != 'N');
 		printf("\n");
 		if (c == 'N')	/* user said no */
 			return;
 	}
 	if (co.do_pipe) {
 		dummynet_flush();
 		return;
 	}
 	/* `ipfw set N flush` - is the same that `ipfw delete set N` */
 	memset(&rt, 0, sizeof(rt));
 	if (co.use_set != 0) {
 		rt.set = co.use_set - 1;
 		rt.flags = IPFW_RCFLAG_SET;
 	} else
 		rt.flags = IPFW_RCFLAG_ALL;
 	if (do_range_cmd(IP_FW_XDEL, &rt) != 0)
 			err(EX_UNAVAILABLE, "setsockopt(IP_FW_XDEL)");
 	if (!co.do_quiet)
 		printf("Flushed all %s.\n", co.do_pipe ? "pipes" : "rules");
 }
 
 static struct _s_x intcmds[] = {
       { "talist",	TOK_TALIST },
       { "iflist",	TOK_IFLIST },
       { "olist",	TOK_OLIST },
       { "vlist",	TOK_VLIST },
       { NULL, 0 }
 };
 
 static struct _s_x otypes[] = {
 	{ "EACTION",	IPFW_TLV_EACTION },
 	{ "DYNSTATE",	IPFW_TLV_STATE_NAME },
 	{ NULL, 0 }
 };
 
 static const char*
 lookup_eaction_name(ipfw_obj_ntlv *ntlv, int cnt, uint16_t type)
 {
 	const char *name;
 	int i;
 
 	name = NULL;
 	for (i = 0; i < cnt; i++) {
 		if (ntlv[i].head.type != IPFW_TLV_EACTION)
 			continue;
 		if (IPFW_TLV_EACTION_NAME(ntlv[i].idx) != type)
 			continue;
 		name = ntlv[i].name;
 		break;
 	}
 	return (name);
 }
 
 static void
 ipfw_list_objects(int ac, char *av[])
 {
 	ipfw_obj_lheader req, *olh;
 	ipfw_obj_ntlv *ntlv;
 	const char *name;
 	size_t sz;
 	int i;
 
 	memset(&req, 0, sizeof(req));
 	sz = sizeof(req);
 	if (do_get3(IP_FW_DUMP_SRVOBJECTS, &req.opheader, &sz) != 0)
 		if (errno != ENOMEM)
 			return;
 
 	sz = req.size;
 	if ((olh = calloc(1, sz)) == NULL)
 		return;
 
 	olh->size = sz;
 	if (do_get3(IP_FW_DUMP_SRVOBJECTS, &olh->opheader, &sz) != 0) {
 		free(olh);
 		return;
 	}
 
 	if (olh->count > 0)
 		printf("Objects list:\n");
 	else
 		printf("There are no objects\n");
 	ntlv = (ipfw_obj_ntlv *)(olh + 1);
 	for (i = 0; i < olh->count; i++) {
 		name = match_value(otypes, ntlv->head.type);
 		if (name == NULL)
 			name = lookup_eaction_name(
 			    (ipfw_obj_ntlv *)(olh + 1), olh->count,
 			    ntlv->head.type);
 		if (name == NULL)
 			printf(" kidx: %4d\ttype: %10d\tname: %s\n",
 			    ntlv->idx, ntlv->head.type, ntlv->name);
 		else
 			printf(" kidx: %4d\ttype: %10s\tname: %s\n",
 			    ntlv->idx, name, ntlv->name);
 		ntlv++;
 	}
 	free(olh);
 }
 
 void
 ipfw_internal_handler(int ac, char *av[])
 {
 	int tcmd;
 
 	ac--; av++;
 	NEED1("internal cmd required");
 
 	if ((tcmd = match_token(intcmds, *av)) == -1)
 		errx(EX_USAGE, "invalid internal sub-cmd: %s", *av);
 
 	switch (tcmd) {
 	case TOK_IFLIST:
 		ipfw_list_tifaces();
 		break;
 	case TOK_TALIST:
 		ipfw_list_ta(ac, av);
 		break;
 	case TOK_OLIST:
 		ipfw_list_objects(ac, av);
 		break;
 	case TOK_VLIST:
 		ipfw_list_values(ac, av);
 		break;
 	}
 }
 
 static int
 ipfw_get_tracked_ifaces(ipfw_obj_lheader **polh)
 {
 	ipfw_obj_lheader req, *olh;
 	size_t sz;
 
 	memset(&req, 0, sizeof(req));
 	sz = sizeof(req);
 
 	if (do_get3(IP_FW_XIFLIST, &req.opheader, &sz) != 0) {
 		if (errno != ENOMEM)
 			return (errno);
 	}
 
 	sz = req.size;
 	if ((olh = calloc(1, sz)) == NULL)
 		return (ENOMEM);
 
 	olh->size = sz;
 	if (do_get3(IP_FW_XIFLIST, &olh->opheader, &sz) != 0) {
 		free(olh);
 		return (errno);
 	}
 
 	*polh = olh;
 	return (0);
 }
 
 static int
 ifinfo_cmp(const void *a, const void *b)
 {
 	ipfw_iface_info *ia, *ib;
 
 	ia = (ipfw_iface_info *)a;
 	ib = (ipfw_iface_info *)b;
 
 	return (stringnum_cmp(ia->ifname, ib->ifname));
 }
 
 /*
  * Retrieves table list from kernel,
  * optionally sorts it and calls requested function for each table.
  * Returns 0 on success.
  */
 static void
 ipfw_list_tifaces()
 {
 	ipfw_obj_lheader *olh;
 	ipfw_iface_info *info;
 	int i, error;
 
 	if ((error = ipfw_get_tracked_ifaces(&olh)) != 0)
 		err(EX_OSERR, "Unable to request ipfw tracked interface list");
 
 
 	qsort(olh + 1, olh->count, olh->objsize, ifinfo_cmp);
 
 	info = (ipfw_iface_info *)(olh + 1);
 	for (i = 0; i < olh->count; i++) {
 		if (info->flags & IPFW_IFFLAG_RESOLVED)
 			printf("%s ifindex: %d refcount: %u changes: %u\n",
 			    info->ifname, info->ifindex, info->refcnt,
 			    info->gencnt);
 		else
 			printf("%s ifindex: unresolved refcount: %u changes: %u\n",
 			    info->ifname, info->refcnt, info->gencnt);
 		info = (ipfw_iface_info *)((caddr_t)info + olh->objsize);
 	}
 
 	free(olh);
 }
 
 
 
 
Index: projects/runtime-coverage-v2/share/man/man4/Makefile
===================================================================
--- projects/runtime-coverage-v2/share/man/man4/Makefile	(revision 346924)
+++ projects/runtime-coverage-v2/share/man/man4/Makefile	(revision 346925)
@@ -1,1009 +1,1012 @@
 #	@(#)Makefile	8.1 (Berkeley) 6/18/93
 # $FreeBSD$
 
 .include <src.opts.mk>
 
 PACKAGE=runtime-manuals
 
 MAN=	aac.4 \
 	aacraid.4 \
 	acpi.4 \
 	${_acpi_asus.4} \
 	${_acpi_asus_wmi.4} \
 	${_acpi_dock.4} \
 	${_acpi_fujitsu.4} \
 	${_acpi_hp.4} \
 	${_acpi_ibm.4} \
 	${_acpi_panasonic.4} \
 	${_acpi_rapidstart.4} \
 	${_acpi_sony.4} \
 	acpi_thermal.4 \
 	${_acpi_toshiba.4} \
 	acpi_video.4 \
 	${_acpi_wmi.4} \
 	ada.4 \
 	adm6996fc.4 \
 	ae.4 \
 	${_aesni.4} \
 	age.4 \
 	agp.4 \
 	ahc.4 \
 	ahci.4 \
 	ahd.4 \
 	${_aibs.4} \
 	aio.4 \
 	alc.4 \
 	ale.4 \
 	alpm.4 \
 	altera_atse.4 \
 	altera_avgen.4 \
 	altera_jtag_uart.4 \
 	altera_sdcard.4 \
 	altq.4 \
 	amdpm.4 \
 	${_amdsbwd.4} \
 	${_amdsmb.4} \
 	${_amdsmn.4} \
 	${_amdtemp.4} \
 	${_bxe.4} \
 	amr.4 \
 	an.4 \
 	${_aout.4} \
 	${_apic.4} \
 	arcmsr.4 \
 	${_asmc.4} \
 	at45d.4 \
 	ata.4 \
 	ath.4 \
 	ath_ahb.4 \
 	ath_hal.4 \
 	ath_pci.4 \
 	atkbd.4 \
 	atkbdc.4 \
 	atp.4 \
 	${_atf_test_case.4} \
 	${_atrtc.4} \
 	${_attimer.4} \
 	audit.4 \
 	auditpipe.4 \
 	aue.4 \
 	axe.4 \
 	axge.4 \
 	bce.4 \
 	bcma.4 \
 	bfe.4 \
 	bge.4 \
 	${_bhyve.4} \
 	bhnd.4 \
 	bhnd_chipc.4 \
 	bhnd_pmu.4 \
 	bhndb.4 \
 	bhndb_pci.4 \
 	bktr.4 \
 	blackhole.4 \
 	bnxt.4 \
 	bpf.4 \
 	bridge.4 \
 	bt.4 \
 	bwi.4 \
 	bwn.4 \
 	${_bytgpio.4} \
 	${_chvgpio.4} \
 	capsicum.4 \
 	cardbus.4 \
 	carp.4 \
 	cas.4 \
 	cc_cdg.4 \
 	cc_chd.4 \
 	cc_cubic.4 \
 	cc_dctcp.4 \
 	cc_hd.4 \
 	cc_htcp.4 \
 	cc_newreno.4 \
 	cc_vegas.4 \
 	${_ccd.4} \
 	ccr.4 \
 	cd.4 \
 	cdce.4 \
 	cfi.4 \
 	cfumass.4 \
 	ch.4 \
 	chromebook_platform.4 \
 	ciss.4 \
 	cloudabi.4 \
 	cmx.4 \
 	${_coretemp.4} \
 	${_cpuctl.4} \
 	cpufreq.4 \
 	crypto.4 \
 	ctl.4 \
 	cue.4 \
 	cxgb.4 \
 	cxgbe.4 \
 	cxgbev.4 \
 	cy.4 \
 	cyapa.4 \
 	da.4 \
 	dc.4 \
 	dcons.4 \
 	dcons_crom.4 \
 	ddb.4 \
 	de.4 \
 	devctl.4 \
 	disc.4 \
 	divert.4 \
 	${_dpms.4} \
 	ds1307.4 \
 	ds3231.4 \
 	${_dtrace_provs} \
 	dummynet.4 \
 	ed.4 \
 	edsc.4 \
 	ehci.4 \
 	em.4 \
 	ena.4 \
 	enc.4 \
 	epair.4 \
 	esp.4 \
 	est.4 \
 	et.4 \
 	etherswitch.4 \
 	eventtimers.4 \
 	exca.4 \
 	e6060sw.4 \
 	fd.4 \
 	fdc.4 \
 	fdt.4 \
 	fdt_pinctrl.4 \
 	fdtbus.4 \
 	ffclock.4 \
 	filemon.4 \
 	firewire.4 \
 	full.4 \
 	fwe.4 \
 	fwip.4 \
 	fwohci.4 \
 	fxp.4 \
 	gbde.4 \
 	gdb.4 \
 	gem.4 \
 	geom.4 \
 	geom_fox.4 \
 	geom_linux_lvm.4 \
 	geom_map.4 \
 	geom_uzip.4 \
 	gif.4 \
 	gpio.4 \
 	gpioiic.4 \
 	gpioled.4 \
 	gre.4 \
 	h_ertt.4 \
 	hifn.4 \
 	hme.4 \
 	hpet.4 \
 	${_hpt27xx.4} \
 	${_hptiop.4} \
 	${_hptmv.4} \
 	${_hptnr.4} \
 	${_hptrr.4} \
 	${_hv_kvp.4} \
 	${_hv_netvsc.4} \
 	${_hv_storvsc.4} \
 	${_hv_utils.4} \
 	${_hv_vmbus.4} \
 	${_hv_vss.4} \
 	hwpmc.4 \
 	iavf.4 \
 	ichsmb.4 \
 	${_ichwd.4} \
 	icmp.4 \
 	icmp6.4 \
 	ida.4 \
 	if_ipsec.4 \
 	iflib.4 \
 	ifmib.4 \
 	ig4.4 \
 	igmp.4 \
 	iic.4 \
 	iicbb.4 \
 	iicbus.4 \
 	iicsmb.4 \
 	iir.4 \
 	${_imcsmb.4} \
 	inet.4 \
 	inet6.4 \
 	intpm.4 \
 	intro.4 \
 	${_io.4} \
 	${_ioat.4} \
 	ip.4 \
 	ip6.4 \
 	ipfirewall.4 \
 	ipheth.4 \
 	${_ipmi.4} \
 	ips.4 \
 	ipsec.4 \
 	ipw.4 \
 	ipwfw.4 \
 	isci.4 \
 	isl.4 \
 	ismt.4 \
 	isp.4 \
 	ispfw.4 \
 	iwi.4 \
 	iwifw.4 \
 	iwm.4 \
 	iwmfw.4 \
 	iwn.4 \
 	iwnfw.4 \
 	ixgbe.4 \
 	ixl.4 \
 	jedec_dimm.4 \
 	jme.4 \
 	kbdmux.4 \
 	keyboard.4 \
 	kld.4 \
 	ksyms.4 \
 	ksz8995ma.4 \
 	ktr.4 \
 	kue.4 \
 	lagg.4 \
 	le.4 \
 	led.4 \
 	lge.4 \
 	${_linux.4} \
 	liquidio.4 \
 	lm75.4 \
 	lo.4 \
 	lp.4 \
 	lpbb.4 \
 	lpt.4 \
 	mac.4 \
 	mac_biba.4 \
 	mac_bsdextended.4 \
 	mac_ifoff.4 \
 	mac_lomac.4 \
 	mac_mls.4 \
 	mac_none.4 \
 	mac_ntpd.4 \
 	mac_partition.4 \
 	mac_portacl.4 \
 	mac_seeotheruids.4 \
 	mac_stub.4 \
 	mac_test.4 \
 	malo.4 \
 	md.4 \
 	mdio.4 \
 	me.4 \
 	mem.4 \
 	meteor.4 \
 	mfi.4 \
 	miibus.4 \
 	mk48txx.4 \
 	mld.4 \
 	mlx.4 \
 	mlx4en.4 \
 	mlx5en.4 \
 	mly.4 \
 	mmc.4 \
 	mmcsd.4 \
 	mn.4 \
 	mod_cc.4 \
 	mos.4 \
 	mouse.4 \
 	mpr.4 \
 	mps.4 \
 	mpt.4 \
 	mrsas.4 \
 	msk.4 \
 	mtio.4 \
 	multicast.4 \
 	muge.4 \
 	mvs.4 \
 	mwl.4 \
 	mwlfw.4 \
 	mx25l.4 \
 	mxge.4 \
 	my.4 \
 	nand.4 \
 	nandsim.4 \
 	${_ndis.4} \
 	net80211.4 \
 	netdump.4 \
 	netfpga10g_nf10bmac.4 \
 	netgraph.4 \
 	netintro.4 \
 	netmap.4 \
 	${_nfe.4} \
 	${_nfsmb.4} \
 	ng_async.4 \
 	ngatmbase.4 \
 	ng_atmllc.4 \
 	ng_bpf.4 \
 	ng_bridge.4 \
 	ng_bt3c.4 \
 	ng_btsocket.4 \
 	ng_car.4 \
 	ng_ccatm.4 \
 	ng_checksum.4 \
 	ng_cisco.4 \
 	ng_deflate.4 \
 	ng_device.4 \
 	nge.4 \
 	ng_echo.4 \
 	ng_eiface.4 \
 	ng_etf.4 \
 	ng_ether.4 \
 	ng_ether_echo.4 \
 	ng_frame_relay.4 \
 	ng_gif.4 \
 	ng_gif_demux.4 \
 	ng_h4.4 \
 	ng_hci.4 \
 	ng_hole.4 \
 	ng_hub.4 \
 	ng_iface.4 \
 	ng_ipfw.4 \
 	ng_ip_input.4 \
 	ng_ksocket.4 \
 	ng_l2cap.4 \
 	ng_l2tp.4 \
 	ng_lmi.4 \
 	ng_mppc.4 \
 	ng_nat.4 \
 	ng_netflow.4 \
 	ng_one2many.4 \
 	ng_patch.4 \
 	ng_ppp.4 \
 	ng_pppoe.4 \
 	ng_pptpgre.4 \
 	ng_pred1.4 \
 	ng_rfc1490.4 \
 	ng_socket.4 \
 	ng_source.4 \
 	ng_split.4 \
 	ng_sppp.4 \
 	ng_sscfu.4 \
 	ng_sscop.4 \
 	ng_tag.4 \
 	ng_tcpmss.4 \
 	ng_tee.4 \
 	ng_tty.4 \
 	ng_ubt.4 \
 	ng_UI.4 \
 	ng_uni.4 \
 	ng_vjc.4 \
 	ng_vlan.4 \
 	nmdm.4 \
 	${_ntb.4} \
 	${_ntb_hw_intel.4} \
 	${_ntb_hw_plx.4} \
 	${_ntb_transport.4} \
 	${_nda.4} \
 	${_if_ntb.4} \
 	null.4 \
 	numa.4 \
 	${_nvd.4} \
 	${_nvme.4} \
 	${_nvram.4} \
 	${_nvram2env.4} \
 	oce.4 \
 	ocs_fc.4\
 	ohci.4 \
 	orm.4 \
 	ow.4 \
 	ow_temp.4 \
 	owc.4 \
 	${_padlock.4} \
 	pass.4 \
 	pccard.4 \
 	pccbb.4 \
 	pcf.4 \
 	pci.4 \
 	pcib.4 \
 	pcic.4 \
 	pcm.4 \
 	pcn.4 \
 	${_pf.4} \
 	${_pflog.4} \
 	${_pfsync.4} \
 	pim.4 \
 	pms.4 \
 	polling.4 \
 	ppbus.4 \
 	ppc.4 \
 	ppi.4 \
 	procdesc.4 \
 	proto.4 \
 	psm.4 \
 	pst.4 \
 	pt.4 \
 	pts.4 \
 	pty.4 \
 	puc.4 \
 	${_qlxge.4} \
 	${_qlxgb.4} \
 	${_qlxgbe.4} \
 	${_qlnxe.4} \
 	ral.4 \
 	random.4 \
 	rc.4 \
 	rctl.4 \
 	re.4 \
 	rgephy.4 \
 	rights.4 \
 	rl.4 \
 	rndtest.4 \
 	route.4 \
 	rp.4 \
 	rtwn.4 \
 	rtwnfw.4 \
 	rtwn_pci.4 \
 	rue.4 \
 	sa.4 \
 	safe.4 \
 	sbp.4 \
 	sbp_targ.4 \
 	scc.4 \
 	sched_4bsd.4 \
 	sched_ule.4 \
 	screen.4 \
 	scsi.4 \
 	sctp.4 \
 	sdhci.4 \
 	sem.4 \
 	send.4 \
 	ses.4 \
 	sf.4 \
 	${_sfxge.4} \
 	sge.4 \
 	siba.4 \
 	siftr.4 \
 	siis.4 \
 	simplebus.4 \
 	sio.4 \
 	sis.4 \
 	sk.4 \
 	${_smartpqi.4} \
 	smb.4 \
 	smbus.4 \
 	smp.4 \
 	smsc.4 \
 	sn.4 \
 	snd_ad1816.4 \
 	snd_als4000.4 \
 	snd_atiixp.4 \
 	snd_cmi.4 \
 	snd_cs4281.4 \
 	snd_csa.4 \
 	snd_ds1.4 \
 	snd_emu10k1.4 \
 	snd_emu10kx.4 \
 	snd_envy24.4 \
 	snd_envy24ht.4 \
 	snd_es137x.4 \
 	snd_ess.4 \
 	snd_fm801.4 \
 	snd_gusc.4 \
 	snd_hda.4 \
 	snd_hdspe.4 \
 	snd_ich.4 \
 	snd_maestro3.4 \
 	snd_maestro.4 \
 	snd_mss.4 \
 	snd_neomagic.4 \
 	snd_sbc.4 \
 	snd_solo.4 \
 	snd_spicds.4 \
 	snd_t4dwave.4 \
 	snd_uaudio.4 \
 	snd_via8233.4 \
 	snd_via82c686.4 \
 	snd_vibes.4 \
 	snp.4 \
 	spigen.4 \
 	${_spkr.4} \
 	splash.4 \
 	sppp.4 \
 	ste.4 \
 	stf.4 \
 	stge.4 \
 	sym.4 \
 	syncache.4 \
 	syncer.4 \
 	syscons.4 \
 	sysmouse.4 \
 	tap.4 \
 	targ.4 \
 	tcp.4 \
 	tdfx.4 \
 	terasic_mtl.4 \
 	termios.4 \
 	textdump.4 \
 	ti.4 \
 	timecounters.4 \
 	tl.4 \
 	${_tpm.4} \
 	trm.4 \
 	tty.4 \
 	tun.4 \
 	twa.4 \
 	twe.4 \
 	tws.4 \
 	tx.4 \
 	txp.4 \
 	udp.4 \
 	udplite.4 \
 	ure.4 \
 	vale.4 \
 	vga.4 \
 	vge.4 \
 	viapm.4 \
 	${_viawd.4} \
 	${_virtio.4} \
 	${_virtio_balloon.4} \
 	${_virtio_blk.4} \
 	${_virtio_console.4} \
 	${_virtio_random.4} \
 	${_virtio_scsi.4} \
 	${_vmci.4} \
 	vkbd.4 \
 	vlan.4 \
 	vxlan.4 \
 	${_vmm.4} \
 	${_vmx.4} \
 	vpo.4 \
 	vr.4 \
 	vt.4 \
 	vte.4 \
 	${_vtnet.4} \
 	watchdog.4 \
 	wb.4 \
 	${_wbwd.4} \
 	wi.4 \
 	witness.4 \
 	wlan.4 \
 	wlan_acl.4 \
 	wlan_amrr.4 \
 	wlan_ccmp.4 \
 	wlan_tkip.4 \
 	wlan_wep.4 \
 	wlan_xauth.4 \
 	wmt.4 \
 	${_wpi.4} \
 	wsp.4 \
 	xe.4 \
 	${_xen.4} \
 	xhci.4 \
 	xl.4 \
 	${_xnb.4} \
 	xpt.4 \
 	zero.4
 
 MLINKS=	ae.4 if_ae.4
 MLINKS+=age.4 if_age.4
 MLINKS+=agp.4 agpgart.4
 MLINKS+=alc.4 if_alc.4
 MLINKS+=ale.4 if_ale.4
 MLINKS+=altera_atse.4 atse.4
 MLINKS+=altera_sdcard.4 altera_sdcardc.4
 MLINKS+=altq.4 ALTQ.4
 MLINKS+=ath.4 if_ath.4
 MLINKS+=ath_pci.4 if_ath_pci.4
 MLINKS+=an.4 if_an.4
 MLINKS+=aue.4 if_aue.4
 MLINKS+=axe.4 if_axe.4
 MLINKS+=bce.4 if_bce.4
 MLINKS+=bfe.4 if_bfe.4
 MLINKS+=bge.4 if_bge.4
 MLINKS+=bktr.4 brooktree.4
 MLINKS+=bnxt.4 if_bnxt.4
 MLINKS+=bridge.4 if_bridge.4
 MLINKS+=bwi.4 if_bwi.4
 MLINKS+=bwn.4 if_bwn.4
 MLINKS+=${_bxe.4} ${_if_bxe.4}
 MLINKS+=cas.4 if_cas.4
 MLINKS+=cdce.4 if_cdce.4
 MLINKS+=cfi.4 cfid.4
 MLINKS+=cloudabi.4 cloudabi32.4 \
 	cloudabi.4 cloudabi64.4
 MLINKS+=crypto.4 cryptodev.4
 MLINKS+=cue.4 if_cue.4
 MLINKS+=cxgb.4 if_cxgb.4
 MLINKS+=cxgbe.4 if_cxgbe.4 \
 	cxgbe.4 vcxgbe.4 \
 	cxgbe.4 if_vcxgbe.4 \
 	cxgbe.4 cxl.4 \
 	cxgbe.4 if_cxl.4 \
 	cxgbe.4 vcxl.4 \
 	cxgbe.4 if_vcxl.4 \
 	cxgbe.4 cc.4 \
 	cxgbe.4 if_cc.4 \
 	cxgbe.4 vcc.4 \
 	cxgbe.4 if_vcc.4
 MLINKS+=cxgbev.4 if_cxgbev.4 \
 	cxgbev.4 cxlv.4 \
 	cxgbev.4 if_cxlv.4 \
 	cxgbev.4 ccv.4 \
 	cxgbev.4 if_ccv.4
 MLINKS+=dc.4 if_dc.4
 MLINKS+=de.4 if_de.4
 MLINKS+=disc.4 if_disc.4
 MLINKS+=ed.4 if_ed.4
 MLINKS+=edsc.4 if_edsc.4
 MLINKS+=em.4 if_em.4
 MLINKS+=enc.4 if_enc.4
 MLINKS+=epair.4 if_epair.4
 MLINKS+=et.4 if_et.4
 MLINKS+=fd.4 stderr.4 \
 	fd.4 stdin.4 \
 	fd.4 stdout.4
 MLINKS+=fdt.4 FDT.4
 MLINKS+=firewire.4 ieee1394.4
 MLINKS+=fwe.4 if_fwe.4
 MLINKS+=fwip.4 if_fwip.4
 MLINKS+=fxp.4 if_fxp.4
 MLINKS+=gem.4 if_gem.4
 MLINKS+=geom.4 GEOM.4
 MLINKS+=gif.4 if_gif.4
 MLINKS+=gpio.4 gpiobus.4
 MLINKS+=gre.4 if_gre.4
 MLINKS+=hme.4 if_hme.4
 MLINKS+=hpet.4 acpi_hpet.4
 MLINKS+=${_hptrr.4} ${_rr232x.4}
 MLINKS+=${_attimer.4} ${_i8254.4}
 MLINKS+=ip.4 rawip.4
 MLINKS+=ipfirewall.4 ipaccounting.4 \
 	ipfirewall.4 ipacct.4 \
 	ipfirewall.4 ipfw.4
 MLINKS+=ipheth.4 if_ipheth.4
 MLINKS+=ipw.4 if_ipw.4
 MLINKS+=iwi.4 if_iwi.4
 MLINKS+=iwm.4 if_iwm.4
 MLINKS+=iwn.4 if_iwn.4
 MLINKS+=ixgbe.4 ix.4
 MLINKS+=ixgbe.4 if_ix.4
 MLINKS+=ixgbe.4 if_ixgbe.4
 MLINKS+=ixl.4 if_ixl.4
 MLINKS+=iavf.4 if_iavf.4
 MLINKS+=jme.4 if_jme.4
 MLINKS+=kue.4 if_kue.4
 MLINKS+=lagg.4 trunk.4
 MLINKS+=lagg.4 if_lagg.4
 MLINKS+=le.4 if_le.4
 MLINKS+=lge.4 if_lge.4
 MLINKS+=lo.4 loop.4
 MLINKS+=lp.4 plip.4
 MLINKS+=malo.4 if_malo.4
 MLINKS+=md.4 vn.4
 MLINKS+=mem.4 kmem.4
 MLINKS+=mfi.4 mfi_linux.4 \
 	mfi.4 mfip.4
 MLINKS+=mlx5en.4 mce.4
 MLINKS+=mn.4 if_mn.4
 MLINKS+=mos.4 if_mos.4
 MLINKS+=msk.4 if_msk.4
 MLINKS+=mwl.4 if_mwl.4
 MLINKS+=mxge.4 if_mxge.4
 MLINKS+=my.4 if_my.4
 MLINKS+=${_ndis.4} ${_if_ndis.4}
 MLINKS+=netfpga10g_nf10bmac.4 if_nf10bmac.4
 MLINKS+=netintro.4 net.4 \
 	netintro.4 networking.4
 MLINKS+=${_nfe.4} ${_if_nfe.4}
 MLINKS+=nge.4 if_nge.4
 MLINKS+=ow.4 onewire.4
 MLINKS+=pccbb.4 cbb.4
 MLINKS+=pcm.4 snd.4 \
 	pcm.4 sound.4
 MLINKS+=pcn.4 if_pcn.4
 MLINKS+=pms.4 pmspcv.4
 MLINKS+=ral.4 if_ral.4
 MLINKS+=re.4 if_re.4
 MLINKS+=rl.4 if_rl.4
 MLINKS+=rtwn_pci.4 if_rtwn_pci.4
 MLINKS+=rue.4 if_rue.4
 MLINKS+=scsi.4 CAM.4 \
 	scsi.4 cam.4 \
 	scsi.4 scbus.4 \
 	scsi.4 SCSI.4
 MLINKS+=sf.4 if_sf.4
 MLINKS+=sge.4 if_sge.4
 MLINKS+=sis.4 if_sis.4
 MLINKS+=sk.4 if_sk.4
 MLINKS+=smp.4 SMP.4
 MLINKS+=smsc.4 if_smsc.4
 MLINKS+=sn.4 if_sn.4
 MLINKS+=snd_envy24.4 snd_ak452x.4
 MLINKS+=snd_sbc.4 snd_sb16.4 \
 	snd_sbc.4 snd_sb8.4
 MLINKS+=${_spkr.4} ${_speaker.4}
 MLINKS+=splash.4 screensaver.4
 MLINKS+=ste.4 if_ste.4
 MLINKS+=stf.4 if_stf.4
 MLINKS+=stge.4 if_stge.4
 MLINKS+=syncache.4 syncookies.4
 MLINKS+=syscons.4 sc.4
 MLINKS+=tap.4 if_tap.4
 MLINKS+=tdfx.4 tdfx_linux.4
 MLINKS+=ti.4 if_ti.4
 MLINKS+=tl.4 if_tl.4
 MLINKS+=tun.4 if_tun.4
 MLINKS+=tx.4 if_tx.4
 MLINKS+=txp.4 if_txp.4
 MLINKS+=ure.4 if_ure.4
 MLINKS+=vge.4 if_vge.4
 MLINKS+=vlan.4 if_vlan.4
 MLINKS+=vxlan.4 if_vxlan.4
 MLINKS+=${_vmx.4} ${_if_vmx.4}
 MLINKS+=vpo.4 imm.4
 MLINKS+=vr.4 if_vr.4
 MLINKS+=vte.4 if_vte.4
 MLINKS+=${_vtnet.4} ${_if_vtnet.4}
 MLINKS+=watchdog.4 SW_WATCHDOG.4
 MLINKS+=wb.4 if_wb.4
 MLINKS+=wi.4 if_wi.4
 MLINKS+=${_wpi.4} ${_if_wpi.4}
 MLINKS+=xe.4 if_xe.4
 MLINKS+=xl.4 if_xl.4
 
 .if ${MACHINE_CPUARCH} == "amd64" || ${MACHINE_CPUARCH} == "i386"
 _acpi_asus.4=	acpi_asus.4
 _acpi_asus_wmi.4=	acpi_asus_wmi.4
 _acpi_dock.4=	acpi_dock.4
 _acpi_fujitsu.4=acpi_fujitsu.4
 _acpi_hp.4=	acpi_hp.4
 _acpi_ibm.4=	acpi_ibm.4
 _acpi_panasonic.4=acpi_panasonic.4
 _acpi_rapidstart.4=acpi_rapidstart.4
 _acpi_sony.4=	acpi_sony.4
 _acpi_toshiba.4=acpi_toshiba.4
 _acpi_wmi.4=	acpi_wmi.4
 _aesni.4=	aesni.4
 _aout.4=	aout.4
 _apic.4=	apic.4
 _atrtc.4=	atrtc.4
 _attimer.4=	attimer.4
 _aibs.4=	aibs.4
 _amdsbwd.4=	amdsbwd.4
 _amdsmb.4=	amdsmb.4
 _amdsmn.4=	amdsmn.4
 _amdtemp.4=	amdtemp.4
 _asmc.4=	asmc.4
 _bxe.4=		bxe.4
 _bytgpio.4=	bytgpio.4
 _chvgpio.4=	chvgpio.4
 _coretemp.4=	coretemp.4
 _cpuctl.4=	cpuctl.4
 _dpms.4=	dpms.4
 _hpt27xx.4=	hpt27xx.4
 _hptiop.4=	hptiop.4
 _hptmv.4=	hptmv.4
 _hptnr.4=	hptnr.4
 _hptrr.4=	hptrr.4
 _hv_kvp.4=	hv_kvp.4
 _hv_netvsc.4=	hv_netvsc.4
 _hv_storvsc.4=	hv_storvsc.4
 _hv_utils.4=	hv_utils.4
 _hv_vmbus.4=	hv_vmbus.4
 _hv_vss.4=	hv_vss.4
 _i8254.4=	i8254.4
 _ichwd.4=	ichwd.4
 _if_bxe.4=	if_bxe.4
 _if_ndis.4=	if_ndis.4
 _if_nfe.4=	if_nfe.4
 _if_urtw.4=	if_urtw.4
 _if_vmx.4=	if_vmx.4
 _if_vtnet.4=	if_vtnet.4
 _if_wpi.4=	if_wpi.4
 _imcsmb.4=	imcsmb.4
 _ipmi.4=	ipmi.4
 _io.4=		io.4
 _linux.4=	linux.4
 _nda.4=		nda.4
 _ndis.4=	ndis.4
 _nfe.4=		nfe.4
 _nfsmb.4=	nfsmb.4
 _nvd.4=		nvd.4
 _nvme.4=	nvme.4
 _nvram.4=	nvram.4
 _virtio.4=	virtio.4
 _virtio_balloon.4=virtio_balloon.4
 _virtio_blk.4=	virtio_blk.4
 _virtio_console.4=virtio_console.4
 _virtio_random.4= virtio_random.4
 _virtio_scsi.4= virtio_scsi.4
 _vmx.4=		vmx.4
 _vtnet.4=	vtnet.4
 _padlock.4=	padlock.4
 _rr232x.4=	rr232x.4
 _speaker.4=	speaker.4
 _spkr.4=	spkr.4
 _tpm.4=		tpm.4
 _urtw.4=	urtw.4
 _viawd.4=	viawd.4
 _vmci.4=	vmci.4
 _wbwd.4=	wbwd.4
 _wpi.4=		wpi.4
 _xen.4=		xen.4
 _xnb.4=		xnb.4
 
 .endif
 
 .if ${MACHINE_CPUARCH} == "amd64"
 _if_ntb.4=	if_ntb.4
 _ioat.4=	ioat.4
 _ntb.4=		ntb.4
 _ntb_hw_intel.4=	ntb_hw_intel.4
 _ntb_hw_plx.4=	ntb_hw_plx.4
 _ntb_transport.4=ntb_transport.4
 _qlxge.4=	qlxge.4
 _qlxgb.4=	qlxgb.4
 _qlxgbe.4=	qlxgbe.4
 _qlnxe.4=	qlnxe.4
 _sfxge.4=	sfxge.4
 _smartpqi.4=	smartpqi.4
 
 MLINKS+=qlxge.4 if_qlxge.4
 MLINKS+=qlxgb.4 if_qlxgb.4
 MLINKS+=qlxgbe.4 if_qlxgbe.4
 MLINKS+=qlnxe.4 if_qlnxe.4
 MLINKS+=sfxge.4 if_sfxge.4
 
 .if ${MK_BHYVE} != "no"
 _bhyve.4=	bhyve.4
 _vmm.4=		vmm.4
 .endif
 .endif
 
 .if ${MACHINE_CPUARCH} == "mips"
 _nvram2env.4=	nvram2env.4
 .endif
 
 .if ${MACHINE_CPUARCH} == "powerpc"
 _nvd.4= 	nvd.4
 _nvme.4=	nvme.4
 .endif
 
 .if empty(MAN_ARCH)
 __arches=	${MACHINE} ${MACHINE_ARCH} ${MACHINE_CPUARCH}
 .elif ${MAN_ARCH} == "all"
 __arches=	${:!/bin/sh -c "/bin/ls -d ${.CURDIR}/man4.*"!:E}
 .else
 __arches=	${MAN_ARCH}
 .endif
 .for __arch in ${__arches:O:u}
 .if exists(${.CURDIR}/man4.${__arch})
 SUBDIR+=	man4.${__arch}
 .endif
 .endfor
 
 .if ${MK_BLUETOOTH} != "no"
 MAN+=		ng_bluetooth.4
 .endif
 
 .if ${MK_CCD} != "no"
 _ccd.4=		ccd.4
 .endif
 
 .if ${MK_CDDL} != "no"
-_dtrace_provs=	dtrace_io.4 \
+_dtrace_provs=	dtrace_audit.4 \
+		dtrace_io.4 \
 		dtrace_ip.4 \
 		dtrace_lockstat.4 \
 		dtrace_proc.4 \
 		dtrace_sched.4 \
 		dtrace_sctp.4 \
 		dtrace_tcp.4 \
 		dtrace_udp.4 \
 		dtrace_udplite.4
+
+MLINKS+=	dtrace_audit.4 dtaudit.4
 .endif
 
 .if ${MK_EFI} != "no"
 MAN+=		efidev.4
 
 MLINKS+=	efidev.4 efirtc.4
 .endif
 
 .if ${MK_ISCSI} != "no"
 MAN+=		cfiscsi.4
 MAN+=		iscsi.4
 MAN+=		iscsi_initiator.4
 MAN+=		iser.4
 .endif
 
 .if ${MK_OFED} != "no"
 MAN+=		mlx4ib.4
 MAN+=		mlx5ib.4
 .endif
 
 .if ${MK_MLX5TOOL} != "no"
 MAN+=		mlx5io.4
 .endif
 
 .if ${MK_TESTS} != "no"
 ATF=            ${SRCTOP}/contrib/atf
 .PATH:          ${ATF}/doc
 _atf_test_case.4=	atf-test-case.4
 .endif
 
 .if ${MK_PF} != "no"
 _pf.4=		pf.4
 _pflog.4=	pflog.4
 _pfsync.4=	pfsync.4
 .endif
 
 .if ${MK_USB} != "no"
 MAN+=	\
 	otus.4 \
 	otusfw.4 \
 	rsu.4 \
 	rsufw.4 \
 	rtwn_usb.4 \
 	rum.4 \
 	run.4 \
 	runfw.4 \
 	u3g.4 \
 	uark.4 \
 	uart.4 \
 	uath.4 \
 	ubsa.4 \
 	ubsec.4 \
 	ubser.4 \
 	ubtbcmfw.4 \
 	uchcom.4 \
 	ucom.4 \
 	ucycom.4 \
 	udav.4 \
 	udbp.4 \
 	udl.4 \
 	uep.4 \
 	ufm.4 \
 	ufoma.4 \
 	uftdi.4 \
 	ugen.4 \
 	ugold.4 \
 	uhci.4 \
 	uhid.4 \
 	uhso.4 \
 	uipaq.4 \
 	ukbd.4 \
 	uled.4 \
 	ulpt.4 \
 	umass.4 \
 	umcs.4 \
 	umct.4 \
 	umodem.4 \
 	umoscom.4 \
 	ums.4 \
 	unix.4 \
 	upgt.4 \
 	uplcom.4 \
 	ural.4 \
 	urio.4 \
 	urndis.4 \
 	${_urtw.4} \
 	usb.4 \
 	usb_quirk.4 \
 	usb_template.4 \
 	usfs.4 \
 	uslcom.4 \
 	uvisor.4 \
 	uvscom.4 \
 	zyd.4
 
 MLINKS+=otus.4 if_otus.4
 MLINKS+=rsu.4 if_rsu.4
 MLINKS+=rtwn_usb.4 if_rtwn_usb.4
 MLINKS+=rum.4 if_rum.4
 MLINKS+=run.4 if_run.4
 MLINKS+=u3g.4 u3gstub.4
 MLINKS+=uath.4 if_uath.4
 MLINKS+=udav.4 if_udav.4
 MLINKS+=upgt.4 if_upgt.4
 MLINKS+=ural.4 if_ural.4
 MLINKS+=urndis.4 if_urndis.4
 MLINKS+=${_urtw.4} ${_if_urtw.4}
 MLINKS+=zyd.4 if_zyd.4
 .endif
 
 .include <bsd.prog.mk>
Index: projects/runtime-coverage-v2/share/man/man4/audit.4
===================================================================
--- projects/runtime-coverage-v2/share/man/man4/audit.4	(revision 346924)
+++ projects/runtime-coverage-v2/share/man/man4/audit.4	(revision 346925)
@@ -1,148 +1,160 @@
-.\" Copyright (c) 2006 Robert N. M. Watson
+.\" Copyright (c) 2006, 2019 Robert N. M. Watson
 .\" All rights reserved.
 .\"
+.\" This software was developed in part by BAE Systems, the University of
+.\" Cambridge Computer Laboratory, and Memorial University under DARPA/AFRL
+.\" contract FA8650-15-C-7558 ("CADETS"), as part of the DARPA Transparent
+.\" Computing (TC) research program.
+.\"
 .\" Redistribution and use in source and binary forms, with or without
 .\" modification, are permitted provided that the following conditions
 .\" are met:
 .\" 1. Redistributions of source code must retain the above copyright
 .\"    notice, this list of conditions and the following disclaimer.
 .\" 2. Redistributions in binary form must reproduce the above copyright
 .\"    notice, this list of conditions and the following disclaimer in the
 .\"    documentation and/or other materials provided with the distribution.
 .\"
 .\" THIS SOFTWARE IS PROVIDED BY THE AUTHORS AND CONTRIBUTORS ``AS IS'' AND
 .\" ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 .\" IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
 .\" ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE LIABLE
 .\" FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
 .\" DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
 .\" OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
 .\" HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
 .\" LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
 .\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
 .\" SUCH DAMAGE.
 .\"
 .\" $FreeBSD$
 .\"
-.Dd May 31, 2009
+.Dd April 28, 2019
 .Dt AUDIT 4
 .Os
 .Sh NAME
 .Nm audit
 .Nd Security Event Audit
 .Sh SYNOPSIS
 .Cd "options AUDIT"
 .Sh DESCRIPTION
 Security Event Audit is a facility to provide fine-grained, configurable
 logging of security-relevant events, and is intended to meet the requirements
 of the Common Criteria (CC) Common Access Protection Profile (CAPP)
 evaluation.
 The
 .Fx
 .Nm
 facility implements the de facto industry standard BSM API, file
 formats, and command line interface, first found in the Solaris operating
 system.
 Information on the user space implementation can be found in
 .Xr libbsm 3 .
 .Pp
 Audit support is enabled at boot, if present in the kernel, using an
 .Xr rc.conf 5
 flag.
 The audit daemon,
 .Xr auditd 8 ,
 is responsible for configuring the kernel to perform
 .Nm ,
 pushing
 configuration data from the various audit configuration files into the
 kernel.
 .Ss Audit Special Device
 The kernel
 .Nm
 facility provides a special device,
 .Pa /dev/audit ,
 which is used by
 .Xr auditd 8
 to monitor for
 .Nm
 events, such as requests to cycle the log, low disk
 space conditions, and requests to terminate auditing.
 This device is not intended for use by applications.
 .Ss Audit Pipe Special Devices
 Audit pipe special devices, discussed in
 .Xr auditpipe 4 ,
 provide a configurable live tracking mechanism to allow applications to
 tee the audit trail, as well as to configure custom preselection parameters
 to track users and events in a fine-grained manner.
+.Ss DTrace Audit Provider
+The DTrace Audit Provider,
+.Xr dtaudit 4 ,
+allows D scripts to enable capture of in-kernel audit records for kernel audit
+event types, and then process their contents during audit commit or BSM
+generation.
 .Sh SEE ALSO
 .Xr auditreduce 1 ,
 .Xr praudit 1 ,
 .Xr audit 2 ,
 .Xr auditctl 2 ,
 .Xr auditon 2 ,
 .Xr getaudit 2 ,
 .Xr getauid 2 ,
 .Xr poll 2 ,
 .Xr select 2 ,
 .Xr setaudit 2 ,
 .Xr setauid 2 ,
 .Xr libbsm 3 ,
 .Xr auditpipe 4 ,
+.Xr dtaudit 4 ,
 .Xr audit.log 5 ,
 .Xr audit_class 5 ,
 .Xr audit_control 5 ,
 .Xr audit_event 5 ,
 .Xr audit_user 5 ,
 .Xr audit_warn 5 ,
 .Xr rc.conf 5 ,
 .Xr audit 8 ,
 .Xr auditd 8 ,
 .Xr auditdistd 8
 .Sh HISTORY
 The
 .Tn OpenBSM
 implementation was created by McAfee Research, the security
 division of McAfee Inc., under contract to Apple Computer Inc.\& in 2004.
 It was subsequently adopted by the TrustedBSD Project as the foundation for
 the OpenBSM distribution.
 .Pp
 Support for kernel
 .Nm
 first appeared in
 .Fx 6.2 .
 .Sh AUTHORS
 .An -nosplit
 This software was created by McAfee Research, the security research division
 of McAfee, Inc., under contract to Apple Computer Inc.
 Additional authors include
 .An Wayne Salamon ,
 .An Robert Watson ,
 and SPARTA Inc.
 .Pp
 The Basic Security Module (BSM) interface to audit records and audit event
 stream format were defined by Sun Microsystems.
 .Pp
 This manual page was written by
 .An Robert Watson Aq Mt rwatson@FreeBSD.org .
 .Sh BUGS
 The
 .Fx
 kernel does not fully validate that audit records submitted by user
 applications are syntactically valid BSM; as submission of records is limited
 to privileged processes, this is not a critical bug.
 .Pp
 Instrumentation of auditable events in the kernel is not complete, as some
 system calls do not generate audit records, or generate audit records with
 incomplete argument information.
 .Pp
 Mandatory Access Control (MAC) labels, as provided by the
 .Xr mac 4
 facility, are not audited as part of records involving MAC decisions.
 .Pp
 Currently the
 .Nm
 syscalls are not supported for jailed processes.
 However, if a process has
 .Nm
 session state associated with it, audit records will still be produced and a zonename token
 containing the jail's ID or name will be present in the audit records.
Index: projects/runtime-coverage-v2/share/man/man4/auditpipe.4
===================================================================
--- projects/runtime-coverage-v2/share/man/man4/auditpipe.4	(revision 346924)
+++ projects/runtime-coverage-v2/share/man/man4/auditpipe.4	(revision 346925)
@@ -1,256 +1,257 @@
 .\" Copyright (c) 2006 Robert N. M. Watson
 .\" All rights reserved.
 .\"
 .\" Redistribution and use in source and binary forms, with or without
 .\" modification, are permitted provided that the following conditions
 .\" are met:
 .\" 1. Redistributions of source code must retain the above copyright
 .\"    notice, this list of conditions and the following disclaimer.
 .\" 2. Redistributions in binary form must reproduce the above copyright
 .\"    notice, this list of conditions and the following disclaimer in the
 .\"    documentation and/or other materials provided with the distribution.
 .\"
 .\" THIS SOFTWARE IS PROVIDED BY THE AUTHORS AND CONTRIBUTORS ``AS IS'' AND
 .\" ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 .\" IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
 .\" ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE LIABLE
 .\" FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
 .\" DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
 .\" OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
 .\" HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
 .\" LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
 .\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
 .\" SUCH DAMAGE.
 .\"
 .\" $FreeBSD$
 .\"
-.Dd May 30, 2018
+.Dd April 28, 2019
 .Dt AUDITPIPE 4
 .Os
 .Sh NAME
 .Nm auditpipe
 .Nd "pseudo-device for live audit event tracking"
 .Sh SYNOPSIS
 .Cd "options AUDIT"
 .Sh DESCRIPTION
 While audit trail files
 generated with
 .Xr audit 4
 and maintained by
 .Xr auditd 8
 provide a reliable long-term store for audit log information, current log
 files are owned by the audit daemon until terminated making them somewhat
 unwieldy for live monitoring applications such as host-based intrusion
 detection.
 For example, the log may be cycled and new records written to a new file
 without notice to applications that may be accessing the file.
 .Pp
 The audit facility provides an audit pipe facility for applications requiring
 direct access to live BSM audit data for the purposes of real-time
 monitoring.
 Audit pipes are available via a clonable special device,
 .Pa /dev/auditpipe ,
 subject to the permissions on the device node, and provide a
 .Qq tee
 of the audit event stream.
 As the device is clonable, more than one instance of the device may be opened
 at a time; each device instance will provide independent access to all
 records.
 .Pp
 The audit pipe device provides discrete BSM audit records; if the read buffer
 passed by the application is too small to hold the next record in the
 sequence, it will be dropped.
 Unlike audit data written to the audit trail, the reliability of record
 delivery is not guaranteed.
 In particular, when an audit pipe queue fills, records will be dropped.
 Audit pipe devices are blocking by default, but support non-blocking I/O,
 asynchronous I/O using
 .Dv SIGIO ,
 and polled operation via
 .Xr select 2
 and
 .Xr poll 2 .
 .Pp
 Applications may choose to track the global audit trail, or configure local
 preselection parameters independent of the global audit trail parameters.
 .Ss Audit Pipe Queue Ioctls
 The following ioctls retrieve and set various audit pipe record queue
 properties:
 .Bl -tag -width ".Dv AUDITPIPE_GET_MAXAUDITDATA"
 .It Dv AUDITPIPE_GET_QLEN
 Query the current number of records available for reading on the pipe.
 .It Dv AUDITPIPE_GET_QLIMIT
 Retrieve the current maximum number of records that may be queued for reading
 on the pipe.
 .It Dv AUDITPIPE_SET_QLIMIT
 Set the current maximum number of records that may be queued for reading on
 the pipe.
 The new limit must fall between the queue limit minimum and queue limit
 maximum queryable using the following two ioctls.
 .It Dv AUDITPIPE_GET_QLIMIT_MIN
 Query the lowest possible maximum number of records that may be queued for
 reading on the pipe.
 .It Dv AUDITPIPE_GET_QLIMIT_MAX
 Query the highest possible maximum number of records that may be queued for
 reading on the pipe.
 .It Dv AUDITPIPE_FLUSH
 Flush all outstanding records on the audit pipe; useful after setting initial
 preselection properties to delete records queued during the configuration
 process which may not match the interests of the user process.
 .It Dv AUDITPIPE_GET_MAXAUDITDATA
 Query the maximum size of an audit record, which is a useful minimum size for
 a user space buffer intended to hold audit records read from the audit pipe.
 .El
 .Ss Audit Pipe Preselection Mode Ioctls
 By default, the audit pipe facility configures pipes to present records
 matched by the system-wide audit trail, configured by
 .Xr auditd 8 .
 However, the preselection mechanism for audit pipes can be configured using
 alternative criteria, including pipe-local flags and naflags settings, as
 well as auid-specific selection masks.
 This allows applications to track events not captured in the global audit
 trail, as well as limit records presented to those of specific interest to
 the application.
 .Pp
 The following ioctls configure the preselection mode on an audit pipe:
 .Bl -tag -width ".Dv AUDITPIPE_GET_PRESELECT_MODE"
 .It Dv AUDITPIPE_GET_PRESELECT_MODE
 Return the current preselect mode on the audit pipe.
 The ioctl argument should be of type
 .Vt int .
 .It Dv AUDITPIPE_SET_PRESELECT_MODE
 Set the current preselection mode on the audit pipe.
 The ioctl argument should be of type
 .Vt int .
 .El
 .Pp
 Possible preselection mode values are:
 .Bl -tag -width ".Dv AUDITPIPE_PRESELECT_MODE_TRAIL"
 .It Dv AUDITPIPE_PRESELECT_MODE_TRAIL
 Use the global audit trail preselection parameters to select records for the
 audit pipe.
 .It Dv AUDITPIPE_PRESELECT_MODE_LOCAL
 Use local audit pipe preselection; this model is similar to the global audit
 trail configuration model, consisting of global flags and naflags parameters,
 as well as a set of per-auid masks.
 These parameters are configured using further ioctls.
 .El
 .Pp
 After changing the audit pipe preselection mode, records selected under
 earlier preselection configuration may still be in the audit pipe queue.
 The application may flush the current record queue after changing the
 configuration to remove possibly undesired records.
 .Ss Audit Pipe Local Preselection Mode Ioctls
 The following ioctls configure the preselection parameters used when an audit
 pipe is configured for the
 .Dv AUDITPIPE_PRESELECT_MODE_LOCAL
 preselection mode.
 .Bl -tag -width ".Dv AUDITPIPE_GET_PRESELECT_NAFLAGS"
 .It Dv AUDITPIPE_GET_PRESELECT_FLAGS
 Retrieve the current default preselection flags for attributable events on
 the pipe.
 These flags correspond to the
 .Va flags
 field in
 .Xr audit_control 5 .
 The ioctl argument should be of type
 .Vt au_mask_t .
 .It Dv AUDITPIPE_SET_PRESELECT_FLAGS
 Set the current default preselection flags for attributable events on the
 pipe.
 These flags correspond to the
 .Va flags
 field in
 .Xr audit_control 5 .
 The ioctl argument should be of type
 .Vt au_mask_t .
 .It Dv AUDITPIPE_GET_PRESELECT_NAFLAGS
 Retrieve the current default preselection flags for non-attributable events
 on the pipe.
 These flags correspond to the
 .Va naflags
 field in
 .Xr audit_control 5 .
 The ioctl argument should be of type
 .Vt au_mask_t .
 .It Dv AUDITPIPE_SET_PRESELECT_NAFLAGS
 Set the current default preselection flags for non-attributable events on the
 pipe.
 These flags correspond to the
 .Va naflags
 field in
 .Xr audit_control 5 .
 The ioctl argument should be of type
 .Vt au_mask_t .
 .It Dv AUDITPIPE_GET_PRESELECT_AUID
 Query the current preselection masks for a specific auid on the pipe.
 The ioctl argument should be of type
 .Vt "struct auditpipe_ioctl_preselect" .
 The auid to query is specified via the
 .Va ap_auid
 field of type
 .Vt au_id_t ;
 the mask will be returned via
 .Va ap_mask
 of type
 .Vt au_mask_t .
 .It Dv AUDITPIPE_SET_PRESELECT_AUID
 Set the current preselection masks for a specific auid on the pipe.
 Arguments are identical to
 .Dv AUDITPIPE_GET_PRESELECT_AUID ,
 except that the caller should properly initialize the
 .Va ap_mask
 field to hold the desired preselection mask.
 .It Dv AUDITPIPE_DELETE_PRESELECT_AUID
 Delete the current preselection mask for a specific auid on the pipe.
 Once called, events associated with the specified auid will use the default
 flags mask.
 The ioctl argument should be of type
 .Vt au_id_t .
 .It Dv AUDITPIPE_FLUSH_PRESELECT_AUID
 Delete all auid specific preselection specifications.
 .El
 .Sh EXAMPLES
 The
 .Xr praudit 1
 utility
 may be directly executed on
 .Pa /dev/auditpipe
 to review the default audit trail.
 .Sh SEE ALSO
 .Xr poll 2 ,
 .Xr select 2 ,
 .Xr audit 4 ,
+.Xr dtaudit 4 ,
 .Xr audit_control 5 ,
 .Xr audit 8 ,
 .Xr auditd 8
 .Sh HISTORY
 The OpenBSM implementation was created by McAfee Research, the security
 division of McAfee Inc., under contract to Apple Computer Inc.\& in 2004.
 It was subsequently adopted by the TrustedBSD Project as the foundation for
 the OpenBSM distribution.
 .Pp
 Support for kernel audit first appeared in
 .Fx 6.2 .
 .Sh AUTHORS
 The audit pipe facility was designed and implemented by
 .An Robert Watson Aq Mt rwatson@FreeBSD.org .
 .Pp
 The Basic Security Module (BSM) interface to audit records and audit event
 stream format were defined by Sun Microsystems.
 .Sh BUGS
 See the
 .Xr audit 4
 manual page for information on audit-related bugs and limitations.
 .Pp
 The configurable preselection mechanism mirrors the selection model present
 for the global audit trail.
 It might be desirable to provide a more flexible selection model.
 .Pp
 The per-pipe audit event queue is fifo, with drops occurring if either the
 user thread provides in sufficient for the record on the queue head, or on
 enqueue if there is insufficient room.
 It might be desirable to support partial reads of records, which would be
 more compatible with buffered I/O as implemented in system libraries, and to
 allow applications to select which records are dropped, possibly in the style
 of preselection.
Index: projects/runtime-coverage-v2/share/man/man4/dtrace_audit.4
===================================================================
--- projects/runtime-coverage-v2/share/man/man4/dtrace_audit.4	(nonexistent)
+++ projects/runtime-coverage-v2/share/man/man4/dtrace_audit.4	(revision 346925)
@@ -0,0 +1,178 @@
+.\"-
+.\" SPDX-License-Identifier: BSD-2-Clause
+.\"
+.\" Copyright (c) 2019 Robert N. M. Watson
+.\"
+.\" This software was developed by BAE Systems, the University of Cambridge
+.\" Computer Laboratory, and Memorial University under DARPA/AFRL contract
+.\" FA8650-15-C-7558 ("CADETS"), as part of the DARPA Transparent Computing
+.\" (TC) research program.
+.\"
+.\" Redistribution and use in source and binary forms, with or without
+.\" modification, are permitted provided that the following conditions
+.\" are met:
+.\" 1. Redistributions of source code must retain the above copyright
+.\"    notice, this list of conditions and the following disclaimer.
+.\" 2. Redistributions in binary form must reproduce the above copyright
+.\"    notice, this list of conditions and the following disclaimer in the
+.\"    documentation and/or other materials provided with the distribution.
+.\"
+.\" THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+.\" ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+.\" IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+.\" ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+.\" FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+.\" DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+.\" OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+.\" HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+.\" LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+.\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+.\" SUCH DAMAGE.
+.\"
+.\" $FreeBSD$
+.\"
+.Dd April 28, 2019
+.Dt DTRACE_AUDIT 4
+.Os
+.Sh NAME
+.Nm dtrace_audit
+.Nd A DTrace provider for tracing
+.Xr audit 4
+events
+.Sh SYNOPSIS
+.Pp
+.Fn audit:event:aue_*:commit "char *eventname" "struct audit_record *ar"
+.Fn audit:event:aue_*:bsm "char *eventname" "struct audit_record *ar" "const void *" "size_t"
+.Pp
+To compile this module into the kernel, place the following in your kernel
+configuration file:
+.Pp
+.Bd -literal -offset indent
+.Cd "options DTAUDIT"
+.Ed
+.Pp
+Alternatively, to load the module at boot time, place the following line in
+.Xr loader.conf 5 :
+.Bd -literal -offset indent
+dtaudit_load="YES"
+.Ed
+.Sh DESCRIPTION
+The DTrace
+.Nm dtaudit
+provider allows users to trace events in the kernel security auditing
+subsystem,
+.Xr audit 4 .
+.Xr audit 4
+provides detailed logging of a configurable set of security-relevant system
+calls, including key arguments (such as file paths) and return values that are
+copied race-free as the system call proceeds.
+The
+.Nm dtaudit
+provider allows DTrace scripts to selectively enable in-kernel audit-record
+capture for system calls, and then access those records in either the
+in-kernel format or BSM format (\c
+.Xr audit.log 5 )
+when the system call completes.
+While the in-kernel audit record data structure is subject to change as the
+kernel changes over time, it is a much more friendly interface for use in D
+scripts than either those available via the DTrace system-call provider or the
+BSM trail itself.
+.Ss Configuration
+The
+.Nm dtaudit
+provider relies on
+.Xr audit 4
+being compiled into the kernel.
+.Nm dtaudit
+probes become available only once there is an event-to-name mapping installed
+in the kernel, normally done by
+.Xr auditd 8
+during the boot process, if audit is enabled in
+.Xr rc.conf 5 :
+.Bd -literal -offset indent
+auditd_enable="YES"
+.Ed
+.Pp
+If
+.Nm dtaudit
+probes are required earlier in boot -- for example, in single-user mode -- or
+without enabling
+.Xr audit 4 ,
+they can be preloaded in the boot loader by adding this line to
+.Xr loader.conf 5 .
+.Bd -literal -offset indent
+audit_event_load="YES"
+.Ed
+.Ss Probes
+The
+.Fn audit:event:aue_*:commit
+probes fire synchronously during system-call return, giving access to two
+arguments: a
+.Vt char *
+audit event name, and
+the
+.Vt struct audit_record *
+in-kernel audit record.
+Because the probe fires in system-call return, the user thread has not yet
+regained control, and additional information from the thread and process
+remains available for capture by the script.
+.Pp
+The
+.Fn audit:event:aue_*:bsm
+probes fire asynchonously from system-call return, following BSM conversion
+and just prior to being written to disk, giving access to four arguments: a
+.Vt char *
+audit event name, the
+.Vt struct audit_record *
+in-kernel audit record, a
+.Vt const void *
+pointer to the converted BSM record, and a
+.Vt size_t
+for the length of the BSM record.
+.Sh IMPLEMENTATION NOTES
+When a set of
+.Nm dtaudit
+probes are registered, corresponding in-kernel audit records will be captured
+and their probes will fire regardless of whether the
+.Xr audit 4
+subsystem itself would have captured the record for the purposes of writing it
+to the audit trail, or for delivery to a
+.Xr auditpipe 4 .
+In-kernel audit records allocated only because of enabled
+.Xr dtaudit 4
+probes will not be unnecessarily written to the audit trail or enabled pipes.
+.Sh SEE ALSO
+.Xr dtrace 1 ,
+.Xr audit 4 ,
+.Xr audit.log 5 ,
+.Xr loader.conf 5 ,
+.Xr rc.conf 5 ,
+.Xr auditd 8
+.Sh HISTORY
+The
+.Nm dtaudit
+provider first appeared in
+.Fx 12.0 .
+.Sh AUTHORS
+This software and this manual page were developed by BAE Systems, the
+University of Cambridge Computer Laboratory, and Memorial University under
+DARPA/AFRL contract
+.Pq FA8650-15-C-7558
+.Pq Do CADETS Dc ,
+as part of the DARPA Transparent Computing (TC) research program.
+The
+.Nm dtaudit
+provider and this manual page were written by
+.An Robert Watson Aq Mt rwatson@FreeBSD.org .
+.Sh BUGS
+Because
+.Xr audit 4
+maintains its primary event-to-name mapping database in userspace, that
+database must be loaded into the kernel before
+.Nm dtaudit
+probes become available.
+.Pp
+.Nm dtaudit
+is only able to provide access to system-call audit events, not the full
+scope of userspace events, such as those relating to login, password change,
+and so on.

Property changes on: projects/runtime-coverage-v2/share/man/man4/dtrace_audit.4
___________________________________________________________________
Added: svn:eol-style
## -0,0 +1 ##
+native
\ No newline at end of property
Added: svn:keywords
## -0,0 +1 ##
+FreeBSD=%H
\ No newline at end of property
Added: svn:mime-type
## -0,0 +1 ##
+text/plain
\ No newline at end of property
Index: projects/runtime-coverage-v2/stand/efi/loader/main.c
===================================================================
--- projects/runtime-coverage-v2/stand/efi/loader/main.c	(revision 346924)
+++ projects/runtime-coverage-v2/stand/efi/loader/main.c	(revision 346925)
@@ -1,1421 +1,1555 @@
 /*-
  * Copyright (c) 2008-2010 Rui Paulo
  * Copyright (c) 2006 Marcel Moolenaar
  * All rights reserved.
  *
  * Copyright (c) 2016-2019 Netflix, Inc. written by M. Warner Losh
  * 
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  *
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include <stand.h>
 
 #include <sys/disk.h>
 #include <sys/param.h>
 #include <sys/reboot.h>
 #include <sys/boot.h>
 #include <stdint.h>
 #include <string.h>
 #include <setjmp.h>
 #include <disk.h>
 
 #include <efi.h>
 #include <efilib.h>
 #include <efichar.h>
 
 #include <uuid.h>
 
 #include <bootstrap.h>
 #include <smbios.h>
 
 #include "efizfs.h"
 
 #include "loader_efi.h"
 
 struct arch_switch archsw;	/* MI/MD interface boundary */
 
 EFI_GUID acpi = ACPI_TABLE_GUID;
 EFI_GUID acpi20 = ACPI_20_TABLE_GUID;
 EFI_GUID devid = DEVICE_PATH_PROTOCOL;
 EFI_GUID imgid = LOADED_IMAGE_PROTOCOL;
 EFI_GUID mps = MPS_TABLE_GUID;
 EFI_GUID netid = EFI_SIMPLE_NETWORK_PROTOCOL;
 EFI_GUID smbios = SMBIOS_TABLE_GUID;
 EFI_GUID smbios3 = SMBIOS3_TABLE_GUID;
 EFI_GUID dxe = DXE_SERVICES_TABLE_GUID;
 EFI_GUID hoblist = HOB_LIST_TABLE_GUID;
 EFI_GUID lzmadecomp = LZMA_DECOMPRESSION_GUID;
 EFI_GUID mpcore = ARM_MP_CORE_INFO_TABLE_GUID;
 EFI_GUID esrt = ESRT_TABLE_GUID;
 EFI_GUID memtype = MEMORY_TYPE_INFORMATION_TABLE_GUID;
 EFI_GUID debugimg = DEBUG_IMAGE_INFO_TABLE_GUID;
 EFI_GUID fdtdtb = FDT_TABLE_GUID;
 EFI_GUID inputid = SIMPLE_TEXT_INPUT_PROTOCOL;
 
 /*
  * Number of seconds to wait for a keystroke before exiting with failure
  * in the event no currdev is found. -2 means always break, -1 means
  * never break, 0 means poll once and then reboot, > 0 means wait for
  * that many seconds. "fail_timeout" can be set in the environment as
  * well.
  */
 static int fail_timeout = 5;
 
 /*
  * Current boot variable
  */
 UINT16 boot_current;
 
 /*
  * Image that we booted from.
  */
 EFI_LOADED_IMAGE *boot_img;
 
 static bool
 has_keyboard(void)
 {
 	EFI_STATUS status;
 	EFI_DEVICE_PATH *path;
 	EFI_HANDLE *hin, *hin_end, *walker;
 	UINTN sz;
 	bool retval = false;
 
 	/*
 	 * Find all the handles that support the SIMPLE_TEXT_INPUT_PROTOCOL and
 	 * do the typical dance to get the right sized buffer.
 	 */
 	sz = 0;
 	hin = NULL;
 	status = BS->LocateHandle(ByProtocol, &inputid, 0, &sz, 0);
 	if (status == EFI_BUFFER_TOO_SMALL) {
 		hin = (EFI_HANDLE *)malloc(sz);
 		status = BS->LocateHandle(ByProtocol, &inputid, 0, &sz,
 		    hin);
 		if (EFI_ERROR(status))
 			free(hin);
 	}
 	if (EFI_ERROR(status))
 		return retval;
 
 	/*
 	 * Look at each of the handles. If it supports the device path protocol,
 	 * use it to get the device path for this handle. Then see if that
 	 * device path matches either the USB device path for keyboards or the
 	 * legacy device path for keyboards.
 	 */
 	hin_end = &hin[sz / sizeof(*hin)];
 	for (walker = hin; walker < hin_end; walker++) {
 		status = BS->HandleProtocol(*walker, &devid, (VOID **)&path);
 		if (EFI_ERROR(status))
 			continue;
 
 		while (!IsDevicePathEnd(path)) {
 			/*
 			 * Check for the ACPI keyboard node. All PNP3xx nodes
 			 * are keyboards of different flavors. Note: It is
 			 * unclear of there's always a keyboard node when
 			 * there's a keyboard controller, or if there's only one
 			 * when a keyboard is detected at boot.
 			 */
 			if (DevicePathType(path) == ACPI_DEVICE_PATH &&
 			    (DevicePathSubType(path) == ACPI_DP ||
 				DevicePathSubType(path) == ACPI_EXTENDED_DP)) {
 				ACPI_HID_DEVICE_PATH  *acpi;
 
 				acpi = (ACPI_HID_DEVICE_PATH *)(void *)path;
 				if ((EISA_ID_TO_NUM(acpi->HID) & 0xff00) == 0x300 &&
 				    (acpi->HID & 0xffff) == PNP_EISA_ID_CONST) {
 					retval = true;
 					goto out;
 				}
 			/*
 			 * Check for USB keyboard node, if present. Unlike a
 			 * PS/2 keyboard, these definitely only appear when
 			 * connected to the system.
 			 */
 			} else if (DevicePathType(path) == MESSAGING_DEVICE_PATH &&
 			    DevicePathSubType(path) == MSG_USB_CLASS_DP) {
 				USB_CLASS_DEVICE_PATH *usb;
 
 				usb = (USB_CLASS_DEVICE_PATH *)(void *)path;
 				if (usb->DeviceClass == 3 && /* HID */
 				    usb->DeviceSubClass == 1 && /* Boot devices */
 				    usb->DeviceProtocol == 1) { /* Boot keyboards */
 					retval = true;
 					goto out;
 				}
 			}
 			path = NextDevicePathNode(path);
 		}
 	}
 out:
 	free(hin);
 	return retval;
 }
 
 static void
 set_currdev(const char *devname)
 {
 
 	env_setenv("currdev", EV_VOLATILE, devname, efi_setcurrdev, env_nounset);
 	env_setenv("loaddev", EV_VOLATILE, devname, env_noset, env_nounset);
 }
 
 static void
 set_currdev_devdesc(struct devdesc *currdev)
 {
 	const char *devname;
 
 	devname = efi_fmtdev(currdev);
 	printf("Setting currdev to %s\n", devname);
 	set_currdev(devname);
 }
 
 static void
 set_currdev_devsw(struct devsw *dev, int unit)
 {
 	struct devdesc currdev;
 
 	currdev.d_dev = dev;
 	currdev.d_unit = unit;
 
 	set_currdev_devdesc(&currdev);
 }
 
 static void
 set_currdev_pdinfo(pdinfo_t *dp)
 {
 
 	/*
 	 * Disks are special: they have partitions. if the parent
 	 * pointer is non-null, we're a partition not a full disk
 	 * and we need to adjust currdev appropriately.
 	 */
 	if (dp->pd_devsw->dv_type == DEVT_DISK) {
 		struct disk_devdesc currdev;
 
 		currdev.dd.d_dev = dp->pd_devsw;
 		if (dp->pd_parent == NULL) {
 			currdev.dd.d_unit = dp->pd_unit;
 			currdev.d_slice = D_SLICENONE;
 			currdev.d_partition = D_PARTNONE;
 		} else {
 			currdev.dd.d_unit = dp->pd_parent->pd_unit;
 			currdev.d_slice = dp->pd_unit;
 			currdev.d_partition = D_PARTISGPT; /* XXX Assumes GPT */
 		}
 		set_currdev_devdesc((struct devdesc *)&currdev);
 	} else {
 		set_currdev_devsw(dp->pd_devsw, dp->pd_unit);
 	}
 }
 
 static bool
 sanity_check_currdev(void)
 {
 	struct stat st;
 
 	return (stat("/boot/defaults/loader.conf", &st) == 0 ||
 	    stat("/boot/kernel/kernel", &st) == 0);
 }
 
 #ifdef EFI_ZFS_BOOT
 static bool
 probe_zfs_currdev(uint64_t guid)
 {
 	char *devname;
 	struct zfs_devdesc currdev;
 
 	currdev.dd.d_dev = &zfs_dev;
 	currdev.dd.d_unit = 0;
 	currdev.pool_guid = guid;
 	currdev.root_guid = 0;
 	set_currdev_devdesc((struct devdesc *)&currdev);
 	devname = efi_fmtdev(&currdev);
 	init_zfs_bootenv(devname);
 
 	return (sanity_check_currdev());
 }
 #endif
 
 static bool
 try_as_currdev(pdinfo_t *hd, pdinfo_t *pp)
 {
 	uint64_t guid;
 
 #ifdef EFI_ZFS_BOOT
 	/*
 	 * If there's a zpool on this device, try it as a ZFS
 	 * filesystem, which has somewhat different setup than all
 	 * other types of fs due to imperfect loader integration.
 	 * This all stems from ZFS being both a device (zpool) and
 	 * a filesystem, plus the boot env feature.
 	 */
 	if (efizfs_get_guid_by_handle(pp->pd_handle, &guid))
 		return (probe_zfs_currdev(guid));
 #endif
 	/*
 	 * All other filesystems just need the pdinfo
 	 * initialized in the standard way.
 	 */
 	set_currdev_pdinfo(pp);
 	return (sanity_check_currdev());
 }
 
 /*
  * Sometimes we get filenames that are all upper case
  * and/or have backslashes in them. Filter all this out
  * if it looks like we need to do so.
  */
 static void
 fix_dosisms(char *p)
 {
 	while (*p) {
 		if (isupper(*p))
 			*p = tolower(*p);
 		else if (*p == '\\')
 			*p = '/';
 		p++;
 	}
 }
 
 #define SIZE(dp, edp) (size_t)((intptr_t)(void *)edp - (intptr_t)(void *)dp)
 
 enum { BOOT_INFO_OK = 0, BAD_CHOICE = 1, NOT_SPECIFIC = 2  };
 static int
 match_boot_info(char *boot_info, size_t bisz)
 {
 	uint32_t attr;
 	uint16_t fplen;
 	size_t len;
 	char *walker, *ep;
 	EFI_DEVICE_PATH *dp, *edp, *first_dp, *last_dp;
 	pdinfo_t *pp;
 	CHAR16 *descr;
 	char *kernel = NULL;
 	FILEPATH_DEVICE_PATH  *fp;
 	struct stat st;
 	CHAR16 *text;
 
 	/*
 	 * FreeBSD encodes it's boot loading path into the boot loader
 	 * BootXXXX variable. We look for the last one in the path
 	 * and use that to load the kernel. However, if we only fine
 	 * one DEVICE_PATH, then there's nothing specific and we should
 	 * fall back.
 	 *
 	 * In an ideal world, we'd look at the image handle we were
 	 * passed, match up with the loader we are and then return the
 	 * next one in the path. This would be most flexible and cover
 	 * many chain booting scenarios where you need to use this
 	 * boot loader to get to the next boot loader. However, that
 	 * doesn't work. We rarely have the path to the image booted
 	 * (just the device) so we can't count on that. So, we do the
 	 * enxt best thing, we look through the device path(s) passed
 	 * in the BootXXXX varaible. If there's only one, we return
 	 * NOT_SPECIFIC. Otherwise, we look at the last one and try to
 	 * load that. If we can, we return BOOT_INFO_OK. Otherwise we
 	 * return BAD_CHOICE for the caller to sort out.
 	 */
 	if (bisz < sizeof(attr) + sizeof(fplen) + sizeof(CHAR16))
 		return NOT_SPECIFIC;
 	walker = boot_info;
 	ep = walker + bisz;
 	memcpy(&attr, walker, sizeof(attr));
 	walker += sizeof(attr);
 	memcpy(&fplen, walker, sizeof(fplen));
 	walker += sizeof(fplen);
 	descr = (CHAR16 *)(intptr_t)walker;
 	len = ucs2len(descr);
 	walker += (len + 1) * sizeof(CHAR16);
 	last_dp = first_dp = dp = (EFI_DEVICE_PATH *)walker;
 	edp = (EFI_DEVICE_PATH *)(walker + fplen);
 	if ((char *)edp > ep)
 		return NOT_SPECIFIC;
 	while (dp < edp && SIZE(dp, edp) > sizeof(EFI_DEVICE_PATH)) {
 		text = efi_devpath_name(dp);
 		if (text != NULL) {
 			printf("   BootInfo Path: %S\n", text);
 			efi_free_devpath_name(text);
 		}
 		last_dp = dp;
 		dp = (EFI_DEVICE_PATH *)((char *)dp + efi_devpath_length(dp));
 	}
 
 	/*
 	 * If there's only one item in the list, then nothing was
 	 * specified. Or if the last path doesn't have a media
 	 * path in it. Those show up as various VenHw() nodes
 	 * which are basically opaque to us. Don't count those
 	 * as something specifc.
 	 */
 	if (last_dp == first_dp) {
 		printf("Ignoring Boot%04x: Only one DP found\n", boot_current);
 		return NOT_SPECIFIC;
 	}
 	if (efi_devpath_to_media_path(last_dp) == NULL) {
 		printf("Ignoring Boot%04x: No Media Path\n", boot_current);
 		return NOT_SPECIFIC;
 	}
 
 	/*
 	 * OK. At this point we either have a good path or a bad one.
 	 * Let's check.
 	 */
 	pp = efiblk_get_pdinfo_by_device_path(last_dp);
 	if (pp == NULL) {
 		printf("Ignoring Boot%04x: Device Path not found\n", boot_current);
 		return BAD_CHOICE;
 	}
 	set_currdev_pdinfo(pp);
 	if (!sanity_check_currdev()) {
 		printf("Ignoring Boot%04x: sanity check failed\n", boot_current);
 		return BAD_CHOICE;
 	}
 
 	/*
 	 * OK. We've found a device that matches, next we need to check the last
 	 * component of the path. If it's a file, then we set the default kernel
 	 * to that. Otherwise, just use this as the default root.
 	 *
 	 * Reminder: we're running very early, before we've parsed the defaults
 	 * file, so we may need to have a hack override.
 	 */
 	dp = efi_devpath_last_node(last_dp);
 	if (DevicePathType(dp) !=  MEDIA_DEVICE_PATH ||
 	    DevicePathSubType(dp) != MEDIA_FILEPATH_DP) {
 		printf("Using Boot%04x for root partition\n", boot_current);
 		return (BOOT_INFO_OK);		/* use currdir, default kernel */
 	}
 	fp = (FILEPATH_DEVICE_PATH *)dp;
 	ucs2_to_utf8(fp->PathName, &kernel);
 	if (kernel == NULL) {
 		printf("Not using Boot%04x: can't decode kernel\n", boot_current);
 		return (BAD_CHOICE);
 	}
 	if (*kernel == '\\' || isupper(*kernel))
 		fix_dosisms(kernel);
 	if (stat(kernel, &st) != 0) {
 		free(kernel);
 		printf("Not using Boot%04x: can't find %s\n", boot_current,
 		    kernel);
 		return (BAD_CHOICE);
 	}
 	setenv("kernel", kernel, 1);
 	free(kernel);
 	text = efi_devpath_name(last_dp);
 	if (text) {
 		printf("Using Boot%04x %S + %s\n", boot_current, text,
 		    kernel);
 		efi_free_devpath_name(text);
 	}
 
 	return (BOOT_INFO_OK);
 }
 
 /*
  * Look at the passed-in boot_info, if any. If we find it then we need
  * to see if we can find ourselves in the boot chain. If we can, and
  * there's another specified thing to boot next, assume that the file
  * is loaded from / and use that for the root filesystem. If can't
  * find the specified thing, we must fail the boot. If we're last on
  * the list, then we fallback to looking for the first available /
  * candidate (ZFS, if there's a bootable zpool, otherwise a UFS
  * partition that has either /boot/defaults/loader.conf on it or
  * /boot/kernel/kernel (the default kernel) that we can use.
  *
  * We always fail if we can't find the right thing. However, as
  * a concession to buggy UEFI implementations, like u-boot, if
  * we have determined that the host is violating the UEFI boot
  * manager protocol, we'll signal the rest of the program that
  * a drop to the OK boot loader prompt is possible.
  */
 static int
 find_currdev(bool do_bootmgr, bool is_last,
     char *boot_info, size_t boot_info_sz)
 {
 	pdinfo_t *dp, *pp;
 	EFI_DEVICE_PATH *devpath, *copy;
 	EFI_HANDLE h;
 	CHAR16 *text;
 	struct devsw *dev;
 	int unit;
 	uint64_t extra;
 	int rv;
 	char *rootdev;
 
 	/*
 	 * First choice: if rootdev is already set, use that, even if
 	 * it's wrong.
 	 */
 	rootdev = getenv("rootdev");
 	if (rootdev != NULL) {
-		printf("Setting currdev to configured rootdev %s\n", rootdev);
+		printf("    Setting currdev to configured rootdev %s\n",
+		    rootdev);
 		set_currdev(rootdev);
 		return (0);
 	}
 
 	/*
-	 * Second choice: If we can find out image boot_info, and there's
+	 * Second choice: If uefi_rootdev is set, translate that UEFI device
+	 * path to the loader's internal name and use that.
+	 */
+	do {
+		rootdev = getenv("uefi_rootdev");
+		if (rootdev == NULL)
+			break;
+		devpath = efi_name_to_devpath(rootdev);
+		if (devpath == NULL)
+			break;
+		dp = efiblk_get_pdinfo_by_device_path(devpath);
+		efi_devpath_free(devpath);
+		if (dp == NULL)
+			break;
+		printf("    Setting currdev to UEFI path %s\n",
+		    rootdev);
+		set_currdev_pdinfo(dp);
+		return (0);
+	} while (0);
+
+	/*
+	 * Third choice: If we can find out image boot_info, and there's
 	 * a follow-on boot image in that boot_info, use that. In this
 	 * case root will be the partition specified in that image and
 	 * we'll load the kernel specified by the file path. Should there
 	 * not be a filepath, we use the default. This filepath overrides
 	 * loader.conf.
 	 */
 	if (do_bootmgr) {
 		rv = match_boot_info(boot_info, boot_info_sz);
 		switch (rv) {
 		case BOOT_INFO_OK:	/* We found it */
 			return (0);
 		case BAD_CHOICE:	/* specified file not found -> error */
 			/* XXX do we want to have an escape hatch for last in boot order? */
 			return (ENOENT);
 		} /* Nothing specified, try normal match */
 	}
 
 #ifdef EFI_ZFS_BOOT
 	/*
 	 * Did efi_zfs_probe() detect the boot pool? If so, use the zpool
 	 * it found, if it's sane. ZFS is the only thing that looks for
 	 * disks and pools to boot. This may change in the future, however,
 	 * if we allow specifying which pool to boot from via UEFI variables
 	 * rather than the bootenv stuff that FreeBSD uses today.
 	 */
 	if (pool_guid != 0) {
 		printf("Trying ZFS pool\n");
 		if (probe_zfs_currdev(pool_guid))
 			return (0);
 	}
 #endif /* EFI_ZFS_BOOT */
 
 	/*
 	 * Try to find the block device by its handle based on the
 	 * image we're booting. If we can't find a sane partition,
 	 * search all the other partitions of the disk. We do not
 	 * search other disks because it's a violation of the UEFI
 	 * boot protocol to do so. We fail and let UEFI go on to
 	 * the next candidate.
 	 */
 	dp = efiblk_get_pdinfo_by_handle(boot_img->DeviceHandle);
 	if (dp != NULL) {
 		text = efi_devpath_name(dp->pd_devpath);
 		if (text != NULL) {
 			printf("Trying ESP: %S\n", text);
 			efi_free_devpath_name(text);
 		}
 		set_currdev_pdinfo(dp);
 		if (sanity_check_currdev())
 			return (0);
 		if (dp->pd_parent != NULL) {
 			pdinfo_t *espdp = dp;
 			dp = dp->pd_parent;
 			STAILQ_FOREACH(pp, &dp->pd_part, pd_link) {
 				/* Already tried the ESP */
 				if (espdp == pp)
 					continue;
 				/*
 				 * Roll up the ZFS special case
 				 * for those partitions that have
 				 * zpools on them.
 				 */
 				text = efi_devpath_name(pp->pd_devpath);
 				if (text != NULL) {
 					printf("Trying: %S\n", text);
 					efi_free_devpath_name(text);
 				}
 				if (try_as_currdev(dp, pp))
 					return (0);
 			}
 		}
 	}
 
 	/*
 	 * Try the device handle from our loaded image first.  If that
 	 * fails, use the device path from the loaded image and see if
 	 * any of the nodes in that path match one of the enumerated
 	 * handles. Currently, this handle list is only for netboot.
 	 */
 	if (efi_handle_lookup(boot_img->DeviceHandle, &dev, &unit, &extra) == 0) {
 		set_currdev_devsw(dev, unit);
 		if (sanity_check_currdev())
 			return (0);
 	}
 
 	copy = NULL;
 	devpath = efi_lookup_image_devpath(IH);
 	while (devpath != NULL) {
 		h = efi_devpath_handle(devpath);
 		if (h == NULL)
 			break;
 
 		free(copy);
 		copy = NULL;
 
 		if (efi_handle_lookup(h, &dev, &unit, &extra) == 0) {
 			set_currdev_devsw(dev, unit);
 			if (sanity_check_currdev())
 				return (0);
 		}
 
 		devpath = efi_lookup_devpath(h);
 		if (devpath != NULL) {
 			copy = efi_devpath_trim(devpath);
 			devpath = copy;
 		}
 	}
 	free(copy);
 
 	return (ENOENT);
 }
 
 static bool
 interactive_interrupt(const char *msg)
 {
 	time_t now, then, last;
 
 	last = 0;
 	now = then = getsecs();
 	printf("%s\n", msg);
 	if (fail_timeout == -2)		/* Always break to OK */
 		return (true);
 	if (fail_timeout == -1)		/* Never break to OK */
 		return (false);
 	do {
 		if (last != now) {
 			printf("press any key to interrupt reboot in %d seconds\r",
 			    fail_timeout - (int)(now - then));
 			last = now;
 		}
 
 		/* XXX no pause or timeout wait for char */
 		if (ischar())
 			return (true);
 		now = getsecs();
 	} while (now - then < fail_timeout);
 	return (false);
 }
 
 static int
 parse_args(int argc, CHAR16 *argv[])
 {
 	int i, j, howto;
 	bool vargood;
 	char var[128];
 
 	/*
 	 * Parse the args to set the console settings, etc
 	 * boot1.efi passes these in, if it can read /boot.config or /boot/config
 	 * or iPXE may be setup to pass these in. Or the optional argument in the
 	 * boot environment was used to pass these arguments in (in which case
 	 * neither /boot.config nor /boot/config are consulted).
 	 *
 	 * Loop through the args, and for each one that contains an '=' that is
 	 * not the first character, add it to the environment.  This allows
 	 * loader and kernel env vars to be passed on the command line.  Convert
 	 * args from UCS-2 to ASCII (16 to 8 bit) as they are copied (though this
 	 * method is flawed for non-ASCII characters).
 	 */
 	howto = 0;
 	for (i = 1; i < argc; i++) {
 		cpy16to8(argv[i], var, sizeof(var));
 		howto |= boot_parse_arg(var);
 	}
 
 	return (howto);
 }
 
 static void
 setenv_int(const char *key, int val)
 {
 	char buf[20];
 
 	snprintf(buf, sizeof(buf), "%d", val);
 	setenv(key, buf, 1);
 }
 
 /*
  * Parse ConOut (the list of consoles active) and see if we can find a
  * serial port and/or a video port. It would be nice to also walk the
  * ACPI name space to map the UID for the serial port to a port. The
  * latter is especially hard.
  */
 static int
 parse_uefi_con_out(void)
 {
 	int how, rv;
 	int vid_seen = 0, com_seen = 0, seen = 0;
 	size_t sz;
 	char buf[4096], *ep;
 	EFI_DEVICE_PATH *node;
 	ACPI_HID_DEVICE_PATH  *acpi;
 	UART_DEVICE_PATH  *uart;
 	bool pci_pending;
 
 	how = 0;
 	sz = sizeof(buf);
 	rv = efi_global_getenv("ConOut", buf, &sz);
 	if (rv != EFI_SUCCESS)
 		goto out;
 	ep = buf + sz;
 	node = (EFI_DEVICE_PATH *)buf;
 	while ((char *)node < ep) {
 		pci_pending = false;
 		if (DevicePathType(node) == ACPI_DEVICE_PATH &&
 		    DevicePathSubType(node) == ACPI_DP) {
 			/* Check for Serial node */
 			acpi = (void *)node;
 			if (EISA_ID_TO_NUM(acpi->HID) == 0x501) {
 				setenv_int("efi_8250_uid", acpi->UID);
 				com_seen = ++seen;
 			}
 		} else if (DevicePathType(node) == MESSAGING_DEVICE_PATH &&
 		    DevicePathSubType(node) == MSG_UART_DP) {
 
 			uart = (void *)node;
 			setenv_int("efi_com_speed", uart->BaudRate);
 		} else if (DevicePathType(node) == ACPI_DEVICE_PATH &&
 		    DevicePathSubType(node) == ACPI_ADR_DP) {
 			/* Check for AcpiAdr() Node for video */
 			vid_seen = ++seen;
 		} else if (DevicePathType(node) == HARDWARE_DEVICE_PATH &&
 		    DevicePathSubType(node) == HW_PCI_DP) {
 			/*
 			 * Note, vmware fusion has a funky console device
 			 *	PciRoot(0x0)/Pci(0xf,0x0)
 			 * which we can only detect at the end since we also
 			 * have to cope with:
 			 *	PciRoot(0x0)/Pci(0x1f,0x0)/Serial(0x1)
 			 * so only match it if it's last.
 			 */
 			pci_pending = true;
 		}
 		node = NextDevicePathNode(node); /* Skip the end node */
 	}
 	if (pci_pending && vid_seen == 0)
 		vid_seen = ++seen;
 
 	/*
 	 * Truth table for RB_MULTIPLE | RB_SERIAL
 	 * Value		Result
 	 * 0			Use only video console
 	 * RB_SERIAL		Use only serial console
 	 * RB_MULTIPLE		Use both video and serial console
 	 *			(but video is primary so gets rc messages)
 	 * both			Use both video and serial console
 	 *			(but serial is primary so gets rc messages)
 	 *
 	 * Try to honor this as best we can. If only one of serial / video
 	 * found, then use that. Otherwise, use the first one we found.
 	 * This also implies if we found nothing, default to video.
 	 */
 	how = 0;
 	if (vid_seen && com_seen) {
 		how |= RB_MULTIPLE;
 		if (com_seen < vid_seen)
 			how |= RB_SERIAL;
 	} else if (com_seen)
 		how |= RB_SERIAL;
 out:
 	return (how);
 }
 
+void
+parse_loader_efi_config(EFI_HANDLE h, const char *env_fn)
+{
+	pdinfo_t *dp;
+	struct stat st;
+	int fd = -1;
+	char *env = NULL;
+
+	dp = efiblk_get_pdinfo_by_handle(h);
+	if (dp == NULL)
+		return;
+	set_currdev_pdinfo(dp);
+	if (stat(env_fn, &st) != 0)
+		return;
+	fd = open(env_fn, O_RDONLY);
+	if (fd == -1)
+		return;
+	env = malloc(st.st_size + 1);
+	if (env == NULL)
+		goto out;
+	if (read(fd, env, st.st_size) != st.st_size)
+		goto out;
+	env[st.st_size] = '\0';
+	boot_parse_cmdline(env);
+out:
+	free(env);
+	close(fd);
+}
+
+static void
+read_loader_env(const char *name, char *def_fn, bool once)
+{
+	UINTN len;
+	char *fn, *freeme = NULL;
+
+	len = 0;
+	fn = def_fn;
+	if (efi_freebsd_getenv(name, NULL, &len) == EFI_BUFFER_TOO_SMALL) {
+		freeme = fn = malloc(len + 1);
+		if (fn != NULL) {
+			if (efi_freebsd_getenv(name, fn, &len) != EFI_SUCCESS) {
+				free(fn);
+				fn = NULL;
+				printf(
+			    "Can't fetch FreeBSD::%s we know is there\n", name);
+			} else {
+				/*
+				 * if tagged as 'once' delete the env variable so we
+				 * only use it once.
+				 */
+				if (once)
+					efi_freebsd_delenv(name);
+				/*
+				 * We malloced 1 more than len above, then redid the call.
+				 * so now we have room at the end of the string to NUL terminate
+				 * it here, even if the typical idium would have '- 1' here to
+				 * not overflow. len should be the same on return both times.
+				 */
+				fn[len] = '\0';
+			}
+		} else {
+			printf(
+		    "Can't allocate %d bytes to fetch FreeBSD::%s env var\n",
+			    len, name);
+		}
+	}
+	if (fn) {
+		printf("    Reading loader env vars from %s\n", fn);
+		parse_loader_efi_config(boot_img->DeviceHandle, fn);
+	}
+}
+
+
+
 EFI_STATUS
 main(int argc, CHAR16 *argv[])
 {
 	EFI_GUID *guid;
 	int howto, i, uhowto;
 	UINTN k;
 	bool has_kbd, is_last;
 	char *s;
 	EFI_DEVICE_PATH *imgpath;
 	CHAR16 *text;
 	EFI_STATUS rv;
 	size_t sz, bosz = 0, bisz = 0;
 	UINT16 boot_order[100];
 	char boot_info[4096];
 	char buf[32];
 	bool uefi_boot_mgr;
 
 	archsw.arch_autoload = efi_autoload;
 	archsw.arch_getdev = efi_getdev;
 	archsw.arch_copyin = efi_copyin;
 	archsw.arch_copyout = efi_copyout;
 	archsw.arch_readin = efi_readin;
 	archsw.arch_zfs_probe = efi_zfs_probe;
 
         /* Get our loaded image protocol interface structure. */
 	BS->HandleProtocol(IH, &imgid, (VOID**)&boot_img);
 
 	/*
 	 * Chicken-and-egg problem; we want to have console output early, but
 	 * some console attributes may depend on reading from eg. the boot
 	 * device, which we can't do yet.  We can use printf() etc. once this is
 	 * done. So, we set it to the efi console, then call console init. This
 	 * gets us printf early, but also primes the pump for all future console
 	 * changes to take effect, regardless of where they come from.
 	 */
 	setenv("console", "efi", 1);
 	cons_probe();
 
 	/* Init the time source */
 	efi_time_init();
 
 	/*
 	 * Initialise the block cache. Set the upper limit.
 	 */
 	bcache_init(32768, 512);
 
 	/*
 	 * Scan the BLOCK IO MEDIA handles then
 	 * march through the device switch probing for things.
 	 */
 	i = efipart_inithandles();
 	if (i != 0 && i != ENOENT) {
 		printf("efipart_inithandles failed with ERRNO %d, expect "
 		    "failures\n", i);
 	}
 
 	for (i = 0; devsw[i] != NULL; i++)
 		if (devsw[i]->dv_init != NULL)
 			(devsw[i]->dv_init)();
 
 	/*
 	 * Detect console settings two different ways: one via the command
 	 * args (eg -h) or via the UEFI ConOut variable.
 	 */
 	has_kbd = has_keyboard();
 	howto = parse_args(argc, argv);
 	if (!has_kbd && (howto & RB_PROBE))
 		howto |= RB_SERIAL | RB_MULTIPLE;
 	howto &= ~RB_PROBE;
 	uhowto = parse_uefi_con_out();
 
 	/*
+	 * Scan the BLOCK IO MEDIA handles then
+	 * march through the device switch probing for things.
+	 */
+	i = efipart_inithandles();
+	if (i != 0 && i != ENOENT) {
+		printf("efipart_inithandles failed with ERRNO %d, expect "
+		    "failures\n", i);
+	}
+
+	for (i = 0; devsw[i] != NULL; i++)
+		if (devsw[i]->dv_init != NULL)
+			(devsw[i]->dv_init)();
+
+	/*
+	 * Read additional environment variables from the boot device's
+	 * "LoaderEnv" file. Any boot loader environment variable may be set
+	 * there, which are subtly different than loader.conf variables. Only
+	 * the 'simple' ones may be set so things like foo_load="YES" won't work
+	 * for two reasons.  First, the parser is simplistic and doesn't grok
+	 * quotes.  Second, because the variables that cause an action to happen
+	 * are parsed by the lua, 4th or whatever code that's not yet
+	 * loaded. This is relative to the root directory when loader.efi is
+	 * loaded off the UFS root drive (when chain booted), or from the ESP
+	 * when directly loaded by the BIOS.
+	 *
+	 * We also read in NextLoaderEnv if it was specified. This allows next boot
+	 * functionality to be implemented and to override anything in LoaderEnv.
+	 */
+	read_loader_env("LoaderEnv", "/efi/freebsd/loader.env", false);
+	read_loader_env("NextLoaderEnv", NULL, true);
+
+	/*
 	 * We now have two notions of console. howto should be viewed as
 	 * overrides. If console is already set, don't set it again.
 	 */
 #define	VIDEO_ONLY	0
 #define	SERIAL_ONLY	RB_SERIAL
 #define	VID_SER_BOTH	RB_MULTIPLE
 #define	SER_VID_BOTH	(RB_SERIAL | RB_MULTIPLE)
 #define	CON_MASK	(RB_SERIAL | RB_MULTIPLE)
 	if (strcmp(getenv("console"), "efi") == 0) {
 		if ((howto & CON_MASK) == 0) {
 			/* No override, uhowto is controlling and efi cons is perfect */
 			howto = howto | (uhowto & CON_MASK);
 		} else if ((howto & CON_MASK) == (uhowto & CON_MASK)) {
 			/* override matches what UEFI told us, efi console is perfect */
 		} else if ((uhowto & (CON_MASK)) != 0) {
 			/*
 			 * We detected a serial console on ConOut. All possible
 			 * overrides include serial. We can't really override what efi
 			 * gives us, so we use it knowing it's the best choice.
 			 */
 			/* Do nothing */
 		} else {
 			/*
 			 * We detected some kind of serial in the override, but ConOut
 			 * has no serial, so we have to sort out which case it really is.
 			 */
 			switch (howto & CON_MASK) {
 			case SERIAL_ONLY:
 				setenv("console", "comconsole", 1);
 				break;
 			case VID_SER_BOTH:
 				setenv("console", "efi comconsole", 1);
 				break;
 			case SER_VID_BOTH:
 				setenv("console", "comconsole efi", 1);
 				break;
 				/* case VIDEO_ONLY can't happen -- it's the first if above */
 			}
 		}
 	}
 
 	/*
 	 * howto is set now how we want to export the flags to the kernel, so
 	 * set the env based on it.
 	 */
 	boot_howto_to_env(howto);
 
 	if (efi_copy_init()) {
 		printf("failed to allocate staging area\n");
 		return (EFI_BUFFER_TOO_SMALL);
 	}
 
 	if ((s = getenv("fail_timeout")) != NULL)
 		fail_timeout = strtol(s, NULL, 10);
 
 	printf("%s\n", bootprog_info);
 	printf("   Command line arguments:");
 	for (i = 0; i < argc; i++)
 		printf(" %S", argv[i]);
 	printf("\n");
 
 	printf("   EFI version: %d.%02d\n", ST->Hdr.Revision >> 16,
 	    ST->Hdr.Revision & 0xffff);
 	printf("   EFI Firmware: %S (rev %d.%02d)\n", ST->FirmwareVendor,
 	    ST->FirmwareRevision >> 16, ST->FirmwareRevision & 0xffff);
 	printf("   Console: %s (%#x)\n", getenv("console"), howto);
 
 	/* Determine the devpath of our image so we can prefer it. */
 	text = efi_devpath_name(boot_img->FilePath);
 	if (text != NULL) {
 		printf("   Load Path: %S\n", text);
 		efi_setenv_freebsd_wcs("LoaderPath", text);
 		efi_free_devpath_name(text);
 	}
 
 	rv = BS->HandleProtocol(boot_img->DeviceHandle, &devid, (void **)&imgpath);
 	if (rv == EFI_SUCCESS) {
 		text = efi_devpath_name(imgpath);
 		if (text != NULL) {
 			printf("   Load Device: %S\n", text);
 			efi_setenv_freebsd_wcs("LoaderDev", text);
 			efi_free_devpath_name(text);
 		}
 	}
 
-	uefi_boot_mgr = true;
-	boot_current = 0;
-	sz = sizeof(boot_current);
-	rv = efi_global_getenv("BootCurrent", &boot_current, &sz);
-	if (rv == EFI_SUCCESS)
-		printf("   BootCurrent: %04x\n", boot_current);
-	else {
-		boot_current = 0xffff;
+	if (getenv("uefi_ignore_boot_mgr") != NULL) {
+		printf("    Ignoring UEFI boot manager\n");
 		uefi_boot_mgr = false;
-	}
+	} else {
+		uefi_boot_mgr = true;
+		boot_current = 0;
+		sz = sizeof(boot_current);
+		rv = efi_global_getenv("BootCurrent", &boot_current, &sz);
+		if (rv == EFI_SUCCESS)
+			printf("   BootCurrent: %04x\n", boot_current);
+		else {
+			boot_current = 0xffff;
+			uefi_boot_mgr = false;
+		}
 
-	sz = sizeof(boot_order);
-	rv = efi_global_getenv("BootOrder", &boot_order, &sz);
-	if (rv == EFI_SUCCESS) {
-		printf("   BootOrder:");
-		for (i = 0; i < sz / sizeof(boot_order[0]); i++)
-			printf(" %04x%s", boot_order[i],
-			    boot_order[i] == boot_current ? "[*]" : "");
-		printf("\n");
-		is_last = boot_order[(sz / sizeof(boot_order[0])) - 1] == boot_current;
-		bosz = sz;
-	} else if (uefi_boot_mgr) {
-		/*
-		 * u-boot doesn't set BootOrder, but otherwise participates in the
-		 * boot manager protocol. So we fake it here and don't consider it
-		 * a failure.
-		 */
-		bosz = sizeof(boot_order[0]);
-		boot_order[0] = boot_current;
-		is_last = true;
+		sz = sizeof(boot_order);
+		rv = efi_global_getenv("BootOrder", &boot_order, &sz);
+		if (rv == EFI_SUCCESS) {
+			printf("   BootOrder:");
+			for (i = 0; i < sz / sizeof(boot_order[0]); i++)
+				printf(" %04x%s", boot_order[i],
+				    boot_order[i] == boot_current ? "[*]" : "");
+			printf("\n");
+			is_last = boot_order[(sz / sizeof(boot_order[0])) - 1] == boot_current;
+			bosz = sz;
+		} else if (uefi_boot_mgr) {
+			/*
+			 * u-boot doesn't set BootOrder, but otherwise participates in the
+			 * boot manager protocol. So we fake it here and don't consider it
+			 * a failure.
+			 */
+			bosz = sizeof(boot_order[0]);
+			boot_order[0] = boot_current;
+			is_last = true;
+		}
 	}
 
 	/*
 	 * Next, find the boot info structure the UEFI boot manager is
 	 * supposed to setup. We need this so we can walk through it to
 	 * find where we are in the booting process and what to try to
 	 * boot next.
 	 */
 	if (uefi_boot_mgr) {
 		snprintf(buf, sizeof(buf), "Boot%04X", boot_current);
 		sz = sizeof(boot_info);
 		rv = efi_global_getenv(buf, &boot_info, &sz);
 		if (rv == EFI_SUCCESS)
 			bisz = sz;
 		else
 			uefi_boot_mgr = false;
 	}
 
 	/*
 	 * Disable the watchdog timer. By default the boot manager sets
 	 * the timer to 5 minutes before invoking a boot option. If we
 	 * want to return to the boot manager, we have to disable the
 	 * watchdog timer and since we're an interactive program, we don't
 	 * want to wait until the user types "quit". The timer may have
 	 * fired by then. We don't care if this fails. It does not prevent
 	 * normal functioning in any way...
 	 */
 	BS->SetWatchdogTimer(0, 0, 0, NULL);
 
 	/*
 	 * Initialize the trusted/forbidden certificates from UEFI.
 	 * They will be later used to verify the manifest(s),
 	 * which should contain hashes of verified files.
 	 * This needs to be initialized before any configuration files
 	 * are loaded.
 	 */
 #ifdef EFI_SECUREBOOT
 	ve_efi_init();
 #endif
 
 	/*
 	 * Try and find a good currdev based on the image that was booted.
 	 * It might be desirable here to have a short pause to allow falling
 	 * through to the boot loader instead of returning instantly to follow
 	 * the boot protocol and also allow an escape hatch for users wishing
 	 * to try something different.
 	 */
 	if (find_currdev(uefi_boot_mgr, is_last, boot_info, bisz) != 0)
-		if (!interactive_interrupt("Failed to find bootable partition"))
+		if (uefi_boot_mgr &&
+		    !interactive_interrupt("Failed to find bootable partition"))
 			return (EFI_NOT_FOUND);
 
 	efi_init_environment();
 
 #if !defined(__arm__)
 	for (k = 0; k < ST->NumberOfTableEntries; k++) {
 		guid = &ST->ConfigurationTable[k].VendorGuid;
 		if (!memcmp(guid, &smbios, sizeof(EFI_GUID))) {
 			char buf[40];
 
 			snprintf(buf, sizeof(buf), "%p",
 			    ST->ConfigurationTable[k].VendorTable);
 			setenv("hint.smbios.0.mem", buf, 1);
 			smbios_detect(ST->ConfigurationTable[k].VendorTable);
 			break;
 		}
 	}
 #endif
 
 	interact();			/* doesn't return */
 
 	return (EFI_SUCCESS);		/* keep compiler happy */
 }
 
 COMMAND_SET(poweroff, "poweroff", "power off the system", command_poweroff);
 
 static int
 command_poweroff(int argc __unused, char *argv[] __unused)
 {
 	int i;
 
 	for (i = 0; devsw[i] != NULL; ++i)
 		if (devsw[i]->dv_cleanup != NULL)
 			(devsw[i]->dv_cleanup)();
 
 	RS->ResetSystem(EfiResetShutdown, EFI_SUCCESS, 0, NULL);
 
 	/* NOTREACHED */
 	return (CMD_ERROR);
 }
 
 COMMAND_SET(reboot, "reboot", "reboot the system", command_reboot);
 
 static int
 command_reboot(int argc, char *argv[])
 {
 	int i;
 
 	for (i = 0; devsw[i] != NULL; ++i)
 		if (devsw[i]->dv_cleanup != NULL)
 			(devsw[i]->dv_cleanup)();
 
 	RS->ResetSystem(EfiResetCold, EFI_SUCCESS, 0, NULL);
 
 	/* NOTREACHED */
 	return (CMD_ERROR);
 }
 
 COMMAND_SET(quit, "quit", "exit the loader", command_quit);
 
 static int
 command_quit(int argc, char *argv[])
 {
 	exit(0);
 	return (CMD_OK);
 }
 
 COMMAND_SET(memmap, "memmap", "print memory map", command_memmap);
 
 static int
 command_memmap(int argc __unused, char *argv[] __unused)
 {
 	UINTN sz;
 	EFI_MEMORY_DESCRIPTOR *map, *p;
 	UINTN key, dsz;
 	UINT32 dver;
 	EFI_STATUS status;
 	int i, ndesc;
 	char line[80];
 
 	sz = 0;
 	status = BS->GetMemoryMap(&sz, 0, &key, &dsz, &dver);
 	if (status != EFI_BUFFER_TOO_SMALL) {
 		printf("Can't determine memory map size\n");
 		return (CMD_ERROR);
 	}
 	map = malloc(sz);
 	status = BS->GetMemoryMap(&sz, map, &key, &dsz, &dver);
 	if (EFI_ERROR(status)) {
 		printf("Can't read memory map\n");
 		return (CMD_ERROR);
 	}
 
 	ndesc = sz / dsz;
 	snprintf(line, sizeof(line), "%23s %12s %12s %8s %4s\n",
 	    "Type", "Physical", "Virtual", "#Pages", "Attr");
 	pager_open();
 	if (pager_output(line)) {
 		pager_close();
 		return (CMD_OK);
 	}
 
 	for (i = 0, p = map; i < ndesc;
 	     i++, p = NextMemoryDescriptor(p, dsz)) {
 		snprintf(line, sizeof(line), "%23s %012jx %012jx %08jx ",
 		    efi_memory_type(p->Type), (uintmax_t)p->PhysicalStart,
 		    (uintmax_t)p->VirtualStart, (uintmax_t)p->NumberOfPages);
 		if (pager_output(line))
 			break;
 
 		if (p->Attribute & EFI_MEMORY_UC)
 			printf("UC ");
 		if (p->Attribute & EFI_MEMORY_WC)
 			printf("WC ");
 		if (p->Attribute & EFI_MEMORY_WT)
 			printf("WT ");
 		if (p->Attribute & EFI_MEMORY_WB)
 			printf("WB ");
 		if (p->Attribute & EFI_MEMORY_UCE)
 			printf("UCE ");
 		if (p->Attribute & EFI_MEMORY_WP)
 			printf("WP ");
 		if (p->Attribute & EFI_MEMORY_RP)
 			printf("RP ");
 		if (p->Attribute & EFI_MEMORY_XP)
 			printf("XP ");
 		if (p->Attribute & EFI_MEMORY_NV)
 			printf("NV ");
 		if (p->Attribute & EFI_MEMORY_MORE_RELIABLE)
 			printf("MR ");
 		if (p->Attribute & EFI_MEMORY_RO)
 			printf("RO ");
 		if (pager_output("\n"))
 			break;
 	}
 
 	pager_close();
 	return (CMD_OK);
 }
 
 COMMAND_SET(configuration, "configuration", "print configuration tables",
     command_configuration);
 
 static int
 command_configuration(int argc, char *argv[])
 {
 	UINTN i;
 	char *name;
 
 	printf("NumberOfTableEntries=%lu\n",
 		(unsigned long)ST->NumberOfTableEntries);
 
 	for (i = 0; i < ST->NumberOfTableEntries; i++) {
 		EFI_GUID *guid;
 
 		printf("  ");
 		guid = &ST->ConfigurationTable[i].VendorGuid;
 
 		if (efi_guid_to_name(guid, &name) == true) {
 			printf(name);
 			free(name);
 		} else {
 			printf("Error while translating UUID to name");
 		}
 		printf(" at %p\n", ST->ConfigurationTable[i].VendorTable);
 	}
 
 	return (CMD_OK);
 }
 
 
 COMMAND_SET(mode, "mode", "change or display EFI text modes", command_mode);
 
 static int
 command_mode(int argc, char *argv[])
 {
 	UINTN cols, rows;
 	unsigned int mode;
 	int i;
 	char *cp;
 	char rowenv[8];
 	EFI_STATUS status;
 	SIMPLE_TEXT_OUTPUT_INTERFACE *conout;
 	extern void HO(void);
 
 	conout = ST->ConOut;
 
 	if (argc > 1) {
 		mode = strtol(argv[1], &cp, 0);
 		if (cp[0] != '\0') {
 			printf("Invalid mode\n");
 			return (CMD_ERROR);
 		}
 		status = conout->QueryMode(conout, mode, &cols, &rows);
 		if (EFI_ERROR(status)) {
 			printf("invalid mode %d\n", mode);
 			return (CMD_ERROR);
 		}
 		status = conout->SetMode(conout, mode);
 		if (EFI_ERROR(status)) {
 			printf("couldn't set mode %d\n", mode);
 			return (CMD_ERROR);
 		}
 		sprintf(rowenv, "%u", (unsigned)rows);
 		setenv("LINES", rowenv, 1);
 		HO();		/* set cursor */
 		return (CMD_OK);
 	}
 
 	printf("Current mode: %d\n", conout->Mode->Mode);
 	for (i = 0; i <= conout->Mode->MaxMode; i++) {
 		status = conout->QueryMode(conout, i, &cols, &rows);
 		if (EFI_ERROR(status))
 			continue;
 		printf("Mode %d: %u columns, %u rows\n", i, (unsigned)cols,
 		    (unsigned)rows);
 	}
 
 	if (i != 0)
 		printf("Select a mode with the command \"mode <number>\"\n");
 
 	return (CMD_OK);
 }
 
 COMMAND_SET(lsefi, "lsefi", "list EFI handles", command_lsefi);
 
 static int
 command_lsefi(int argc __unused, char *argv[] __unused)
 {
 	char *name;
 	EFI_HANDLE *buffer = NULL;
 	EFI_HANDLE handle;
 	UINTN bufsz = 0, i, j;
 	EFI_STATUS status;
 	int ret = 0;
 
 	status = BS->LocateHandle(AllHandles, NULL, NULL, &bufsz, buffer);
 	if (status != EFI_BUFFER_TOO_SMALL) {
 		snprintf(command_errbuf, sizeof (command_errbuf),
 		    "unexpected error: %lld", (long long)status);
 		return (CMD_ERROR);
 	}
 	if ((buffer = malloc(bufsz)) == NULL) {
 		sprintf(command_errbuf, "out of memory");
 		return (CMD_ERROR);
 	}
 
 	status = BS->LocateHandle(AllHandles, NULL, NULL, &bufsz, buffer);
 	if (EFI_ERROR(status)) {
 		free(buffer);
 		snprintf(command_errbuf, sizeof (command_errbuf),
 		    "LocateHandle() error: %lld", (long long)status);
 		return (CMD_ERROR);
 	}
 
 	pager_open();
 	for (i = 0; i < (bufsz / sizeof (EFI_HANDLE)); i++) {
 		UINTN nproto = 0;
 		EFI_GUID **protocols = NULL;
 
 		handle = buffer[i];
 		printf("Handle %p", handle);
 		if (pager_output("\n"))
 			break;
 		/* device path */
 
 		status = BS->ProtocolsPerHandle(handle, &protocols, &nproto);
 		if (EFI_ERROR(status)) {
 			snprintf(command_errbuf, sizeof (command_errbuf),
 			    "ProtocolsPerHandle() error: %lld",
 			    (long long)status);
 			continue;
 		}
 
 		for (j = 0; j < nproto; j++) {
 			if (efi_guid_to_name(protocols[j], &name) == true) {
 				printf("  %s", name);
 				free(name);
 			} else {
 				printf("Error while translating UUID to name");
 			}
 			if ((ret = pager_output("\n")) != 0)
 				break;
 		}
 		BS->FreePool(protocols);
 		if (ret != 0)
 			break;
 	}
 	pager_close();
 	free(buffer);
 	return (CMD_OK);
 }
 
 #ifdef LOADER_FDT_SUPPORT
 extern int command_fdt_internal(int argc, char *argv[]);
 
 /*
  * Since proper fdt command handling function is defined in fdt_loader_cmd.c,
  * and declaring it as extern is in contradiction with COMMAND_SET() macro
  * (which uses static pointer), we're defining wrapper function, which
  * calls the proper fdt handling routine.
  */
 static int
 command_fdt(int argc, char *argv[])
 {
 
 	return (command_fdt_internal(argc, argv));
 }
 
 COMMAND_SET(fdt, "fdt", "flattened device tree handling", command_fdt);
 #endif
 
 /*
  * Chain load another efi loader.
  */
 static int
 command_chain(int argc, char *argv[])
 {
 	EFI_GUID LoadedImageGUID = LOADED_IMAGE_PROTOCOL;
 	EFI_HANDLE loaderhandle;
 	EFI_LOADED_IMAGE *loaded_image;
 	EFI_STATUS status;
 	struct stat st;
 	struct devdesc *dev;
 	char *name, *path;
 	void *buf;
 	int fd;
 
 	if (argc < 2) {
 		command_errmsg = "wrong number of arguments";
 		return (CMD_ERROR);
 	}
 
 	name = argv[1];
 
 	if ((fd = open(name, O_RDONLY)) < 0) {
 		command_errmsg = "no such file";
 		return (CMD_ERROR);
 	}
 
 	if (fstat(fd, &st) < -1) {
 		command_errmsg = "stat failed";
 		close(fd);
 		return (CMD_ERROR);
 	}
 
 	status = BS->AllocatePool(EfiLoaderCode, (UINTN)st.st_size, &buf);
 	if (status != EFI_SUCCESS) {
 		command_errmsg = "failed to allocate buffer";
 		close(fd);
 		return (CMD_ERROR);
 	}
 	if (read(fd, buf, st.st_size) != st.st_size) {
 		command_errmsg = "error while reading the file";
 		(void)BS->FreePool(buf);
 		close(fd);
 		return (CMD_ERROR);
 	}
 	close(fd);
 	status = BS->LoadImage(FALSE, IH, NULL, buf, st.st_size, &loaderhandle);
 	(void)BS->FreePool(buf);
 	if (status != EFI_SUCCESS) {
 		command_errmsg = "LoadImage failed";
 		return (CMD_ERROR);
 	}
 	status = BS->HandleProtocol(loaderhandle, &LoadedImageGUID,
 	    (void **)&loaded_image);
 
 	if (argc > 2) {
 		int i, len = 0;
 		CHAR16 *argp;
 
 		for (i = 2; i < argc; i++)
 			len += strlen(argv[i]) + 1;
 
 		len *= sizeof (*argp);
 		loaded_image->LoadOptions = argp = malloc (len);
 		loaded_image->LoadOptionsSize = len;
 		for (i = 2; i < argc; i++) {
 			char *ptr = argv[i];
 			while (*ptr)
 				*(argp++) = *(ptr++);
 			*(argp++) = ' ';
 		}
 		*(--argv) = 0;
 	}
 
 	if (efi_getdev((void **)&dev, name, (const char **)&path) == 0) {
 #ifdef EFI_ZFS_BOOT
 		struct zfs_devdesc *z_dev;
 #endif
 		struct disk_devdesc *d_dev;
 		pdinfo_t *hd, *pd;
 
 		switch (dev->d_dev->dv_type) {
 #ifdef EFI_ZFS_BOOT
 		case DEVT_ZFS:
 			z_dev = (struct zfs_devdesc *)dev;
 			loaded_image->DeviceHandle =
 			    efizfs_get_handle_by_guid(z_dev->pool_guid);
 			break;
 #endif
 		case DEVT_NET:
 			loaded_image->DeviceHandle =
 			    efi_find_handle(dev->d_dev, dev->d_unit);
 			break;
 		default:
 			hd = efiblk_get_pdinfo(dev);
 			if (STAILQ_EMPTY(&hd->pd_part)) {
 				loaded_image->DeviceHandle = hd->pd_handle;
 				break;
 			}
 			d_dev = (struct disk_devdesc *)dev;
 			STAILQ_FOREACH(pd, &hd->pd_part, pd_link) {
 				/*
 				 * d_partition should be 255
 				 */
 				if (pd->pd_unit == (uint32_t)d_dev->d_slice) {
 					loaded_image->DeviceHandle =
 					    pd->pd_handle;
 					break;
 				}
 			}
 			break;
 		}
 	}
 
 	dev_cleanup();
 	status = BS->StartImage(loaderhandle, NULL, NULL);
 	if (status != EFI_SUCCESS) {
 		command_errmsg = "StartImage failed";
 		free(loaded_image->LoadOptions);
 		loaded_image->LoadOptions = NULL;
 		status = BS->UnloadImage(loaded_image);
 		return (CMD_ERROR);
 	}
 
 	return (CMD_ERROR);	/* not reached */
 }
 
 COMMAND_SET(chain, "chain", "chain load file", command_chain);
Index: projects/runtime-coverage-v2/sys/conf/files.powerpc
===================================================================
--- projects/runtime-coverage-v2/sys/conf/files.powerpc	(revision 346924)
+++ projects/runtime-coverage-v2/sys/conf/files.powerpc	(revision 346925)
@@ -1,278 +1,278 @@
 # This file tells config what files go into building a kernel,
 # files marked standard are always included.
 #
 # $FreeBSD$
 #
 # The long compile-with and dependency lines are required because of
 # limitations in config: backslash-newline doesn't work in strings, and
 # dependency lines other than the first are silently ignored.
 #
 #
 
 font.h				optional	sc			\
 	compile-with	"uudecode < /usr/share/syscons/fonts/${SC_DFLT_FONT}-8x16.fnt && file2c 'u_char dflt_font_16[16*256] = {' '};' < ${SC_DFLT_FONT}-8x16 > font.h && uudecode < /usr/share/syscons/fonts/${SC_DFLT_FONT}-8x14.fnt && file2c 'u_char dflt_font_14[14*256] = {' '};' < ${SC_DFLT_FONT}-8x14 >> font.h && uudecode < /usr/share/syscons/fonts/${SC_DFLT_FONT}-8x8.fnt && file2c 'u_char dflt_font_8[8*256] = {' '};' < ${SC_DFLT_FONT}-8x8 >> font.h" \
 	no-obj no-implicit-rule before-depend				\
 	clean	"font.h ${SC_DFLT_FONT}-8x14 ${SC_DFLT_FONT}-8x16 ${SC_DFLT_FONT}-8x8"
 #
 # There is only an asm version on ppc64.
 cddl/compat/opensolaris/kern/opensolaris_atomic.c			optional zfs powerpc | dtrace powerpc | zfs powerpcspe | dtrace powerpcspe compile-with "${ZFS_C}"
 cddl/contrib/opensolaris/common/atomic/powerpc64/opensolaris_atomic.S	optional zfs powerpc64 | dtrace powerpc64 compile-with "${ZFS_S}"
 cddl/dev/dtrace/powerpc/dtrace_asm.S		optional dtrace compile-with "${DTRACE_S}"
 cddl/dev/dtrace/powerpc/dtrace_subr.c		optional dtrace compile-with "${DTRACE_C}"
 cddl/dev/fbt/powerpc/fbt_isa.c			optional dtrace_fbt | dtraceall compile-with "${FBT_C}"
 crypto/blowfish/bf_enc.c	optional	crypto | ipsec | ipsec_support
 crypto/des/des_enc.c		optional	crypto | ipsec | ipsec_support | netsmb
 dev/bm/if_bm.c			optional	bm powermac
 dev/adb/adb_bus.c		optional	adb
 dev/adb/adb_kbd.c		optional	adb
 dev/adb/adb_mouse.c		optional	adb
 dev/adb/adb_hb_if.m		optional	adb
 dev/adb/adb_if.m		optional	adb
 dev/adb/adb_buttons.c		optional	adb
 dev/agp/agp_apple.c		optional	agp powermac
 dev/fb/fb.c			optional	sc
 dev/hwpmc/hwpmc_e500.c		optional	hwpmc
 dev/hwpmc/hwpmc_mpc7xxx.c	optional	hwpmc
 dev/hwpmc/hwpmc_powerpc.c	optional	hwpmc
 dev/hwpmc/hwpmc_ppc970.c	optional	hwpmc
 dev/iicbus/ad7417.c		optional	ad7417 powermac
 dev/iicbus/adm1030.c		optional	powermac windtunnel | adm1030 powermac
 dev/iicbus/adt746x.c		optional        adt746x powermac
 dev/iicbus/ds1631.c		optional	ds1631 powermac
 dev/iicbus/ds1775.c		optional	ds1775 powermac
 dev/iicbus/max6690.c		optional	max6690 powermac
 dev/iicbus/ofw_iicbus.c		optional	iicbus aim
 dev/ipmi/ipmi.c			optional	ipmi
 dev/ipmi/ipmi_opal.c		optional	powernv ipmi
 dev/nand/nfc_fsl.c		optional	nand mpc85xx
 dev/nand/nfc_rb.c		optional	nand mpc85xx
 # Most ofw stuff below is brought in by conf/files for options FDT, but
 # we always want it, even on non-FDT platforms.
 dev/fdt/simplebus.c		standard
 dev/ofw/openfirm.c		standard
 dev/ofw/openfirmio.c		standard
 dev/ofw/ofw_bus_if.m		standard
 dev/ofw/ofw_cpu.c		standard
 dev/ofw/ofw_if.m		standard
 dev/ofw/ofw_bus_subr.c		standard
 dev/ofw/ofw_console.c		optional	aim
 dev/ofw/ofw_disk.c		optional	ofwd aim
 dev/ofw/ofwbus.c		standard
 dev/ofw/ofwpci.c		optional 	pci
 dev/ofw/ofw_standard.c		optional	aim powerpc
 dev/ofw/ofw_subr.c		standard
 dev/powermac_nvram/powermac_nvram.c optional	powermac_nvram powermac
 dev/quicc/quicc_bfe_fdt.c	optional	quicc mpc85xx
 dev/random/darn.c		optional	powerpc64 random
 dev/scc/scc_bfe_macio.c		optional	scc powermac
 dev/sdhci/fsl_sdhci.c		optional	mpc85xx sdhci
 dev/sec/sec.c			optional	sec mpc85xx
 dev/sound/macio/aoa.c		optional	snd_davbus | snd_ai2s powermac
 dev/sound/macio/davbus.c	optional	snd_davbus powermac
 dev/sound/macio/i2s.c		optional	snd_ai2s powermac
 dev/sound/macio/onyx.c		optional	snd_ai2s iicbus powermac
 dev/sound/macio/snapper.c	optional	snd_ai2s iicbus powermac
 dev/sound/macio/tumbler.c	optional	snd_ai2s iicbus powermac
 dev/syscons/scgfbrndr.c		optional	sc
 dev/tsec/if_tsec.c		optional	tsec
 dev/tsec/if_tsec_fdt.c		optional	tsec 
 dev/uart/uart_cpu_powerpc.c	optional	uart
 dev/usb/controller/ehci_fsl.c	optional	ehci mpc85xx
 dev/vt/hw/ofwfb/ofwfb.c		optional	vt aim
 kern/kern_clocksource.c		standard
 kern/subr_dummy_vdso_tc.c	standard
 kern/syscalls.c			optional	ktr
 kern/subr_sfbuf.c		standard
 libkern/ashldi3.c		optional	powerpc | powerpcspe
 libkern/ashrdi3.c		optional	powerpc | powerpcspe
 libkern/bcmp.c			standard
 libkern/bcopy.c			standard
 libkern/cmpdi2.c		optional	powerpc | powerpcspe
 libkern/divdi3.c		optional	powerpc | powerpcspe
 libkern/ffs.c			standard
 libkern/ffsl.c			standard
 libkern/ffsll.c			standard
 libkern/fls.c			standard
 libkern/flsl.c			standard
 libkern/flsll.c			standard
 libkern/lshrdi3.c		optional	powerpc | powerpcspe
 libkern/memcmp.c		standard
 libkern/memset.c		standard
 libkern/moddi3.c		optional	powerpc | powerpcspe
 libkern/qdivrem.c		optional	powerpc | powerpcspe
 libkern/ucmpdi2.c		optional	powerpc | powerpcspe
 libkern/udivdi3.c		optional	powerpc | powerpcspe
 libkern/umoddi3.c		optional	powerpc | powerpcspe
 powerpc/aim/locore.S		optional	aim no-obj
 powerpc/aim/aim_machdep.c	optional	aim
 powerpc/aim/mmu_oea.c		optional	aim powerpc
 powerpc/aim/mmu_oea64.c		optional	aim
 powerpc/aim/moea64_if.m		optional	aim
 powerpc/aim/moea64_native.c	optional	aim
 powerpc/aim/mp_cpudep.c		optional	aim
 powerpc/aim/slb.c		optional	aim powerpc64
 powerpc/booke/locore.S		optional	booke no-obj
 powerpc/booke/booke_machdep.c	optional	booke
 powerpc/booke/machdep_e500.c	optional	booke_e500
 powerpc/booke/mp_cpudep.c	optional	booke smp
 powerpc/booke/platform_bare.c	optional	booke
 powerpc/booke/pmap.c		optional	booke
 powerpc/booke/spe.c		optional	powerpcspe
 powerpc/cpufreq/dfs.c		optional	cpufreq
 powerpc/cpufreq/mpc85xx_jog.c	optional	cpufreq mpc85xx
 powerpc/cpufreq/pcr.c		optional	cpufreq aim
 powerpc/cpufreq/pmcr.c		optional	cpufreq aim powerpc64
 powerpc/cpufreq/pmufreq.c	optional	cpufreq aim pmu
 powerpc/fpu/fpu_add.c		optional	fpu_emu | powerpcspe
 powerpc/fpu/fpu_compare.c	optional	fpu_emu | powerpcspe
 powerpc/fpu/fpu_div.c		optional	fpu_emu | powerpcspe
 powerpc/fpu/fpu_emu.c		optional	fpu_emu
 powerpc/fpu/fpu_explode.c	optional	fpu_emu | powerpcspe
 powerpc/fpu/fpu_implode.c	optional	fpu_emu | powerpcspe
 powerpc/fpu/fpu_mul.c		optional	fpu_emu | powerpcspe
 powerpc/fpu/fpu_sqrt.c		optional	fpu_emu
 powerpc/fpu/fpu_subr.c		optional	fpu_emu | powerpcspe
 powerpc/mambo/mambocall.S	optional	mambo
 powerpc/mambo/mambo.c		optional	mambo
 powerpc/mambo/mambo_console.c	optional	mambo
 powerpc/mambo/mambo_disk.c	optional	mambo
 powerpc/mikrotik/platform_rb.c	optional	mikrotik
 powerpc/mikrotik/rb_led.c	optional	mikrotik
 powerpc/mpc85xx/atpic.c		optional	mpc85xx isa
 powerpc/mpc85xx/ds1553_bus_fdt.c	optional	ds1553
 powerpc/mpc85xx/ds1553_core.c	optional	ds1553
 powerpc/mpc85xx/fsl_diu.c	optional	mpc85xx diu
 powerpc/mpc85xx/fsl_espi.c	optional	mpc85xx spibus
 powerpc/mpc85xx/fsl_sata.c	optional	mpc85xx ata
 powerpc/mpc85xx/i2c.c		optional	iicbus
 powerpc/mpc85xx/isa.c		optional	mpc85xx isa
 powerpc/mpc85xx/lbc.c		optional	mpc85xx
 powerpc/mpc85xx/mpc85xx.c	optional	mpc85xx
 powerpc/mpc85xx/mpc85xx_cache.c	optional	mpc85xx
 powerpc/mpc85xx/mpc85xx_gpio.c	optional	mpc85xx gpio
 powerpc/mpc85xx/platform_mpc85xx.c	optional	mpc85xx
 powerpc/mpc85xx/pci_mpc85xx.c	optional	pci mpc85xx
 powerpc/mpc85xx/pci_mpc85xx_pcib.c	optional	pci mpc85xx
 powerpc/mpc85xx/qoriq_gpio.c	optional	mpc85xx gpio
 powerpc/ofw/ofw_machdep.c	standard
 powerpc/ofw/ofw_pcibus.c	optional	pci
 powerpc/ofw/ofw_pcib_pci.c	optional	pci
 powerpc/ofw/ofw_real.c		optional	aim
 powerpc/ofw/ofw_syscons.c	optional	sc aim
 powerpc/ofw/ofwcall32.S		optional	aim powerpc
 powerpc/ofw/ofwcall64.S		optional	aim powerpc64
 powerpc/ofw/openpic_ofw.c	standard
 powerpc/ofw/rtas.c		optional	aim
 powerpc/ofw/ofw_initrd.c	optional	md_root_mem powerpc64
 powerpc/powermac/ata_kauai.c	optional	powermac ata | powermac atamacio
 powerpc/powermac/ata_macio.c	optional	powermac ata | powermac atamacio
 powerpc/powermac/ata_dbdma.c	optional	powermac ata | powermac atamacio
 powerpc/powermac/atibl.c	optional	powermac atibl
 powerpc/powermac/cuda.c		optional	powermac cuda
 powerpc/powermac/cpcht.c	optional	powermac pci
 powerpc/powermac/dbdma.c	optional	powermac pci
 powerpc/powermac/fcu.c		optional	powermac fcu
 powerpc/powermac/grackle.c	optional	powermac pci
 powerpc/powermac/hrowpic.c	optional	powermac pci
 powerpc/powermac/kiic.c		optional	powermac kiic
 powerpc/powermac/macgpio.c	optional	powermac pci 
 powerpc/powermac/macio.c	optional	powermac pci
 powerpc/powermac/nvbl.c		optional	powermac nvbl
 powerpc/powermac/platform_powermac.c optional	powermac
 powerpc/powermac/powermac_thermal.c optional	powermac
 powerpc/powermac/pswitch.c	optional	powermac pswitch
 powerpc/powermac/pmu.c		optional	powermac pmu 
 powerpc/powermac/smu.c		optional	powermac smu 
 powerpc/powermac/smusat.c	optional	powermac smu
 powerpc/powermac/uninorth.c	optional	powermac
 powerpc/powermac/uninorthpci.c	optional	powermac pci
 powerpc/powermac/vcoregpio.c	optional	powermac 
 powerpc/powernv/opal.c		optional	powernv
 powerpc/powernv/opal_async.c	optional	powernv
 powerpc/powernv/opal_console.c	optional	powernv
 powerpc/powernv/opal_dev.c	optional	powernv
-powerpc/powernv/opal_flash.c	optional	powernv
+powerpc/powernv/opal_flash.c	optional	powernv opalflash
 powerpc/powernv/opal_hmi.c	optional	powernv
 powerpc/powernv/opal_i2c.c	optional	iicbus fdt powernv
 powerpc/powernv/opal_i2cm.c	optional	iicbus fdt powernv
 powerpc/powernv/opal_pci.c	optional	powernv pci
 powerpc/powernv/opal_sensor.c	optional	powernv
 powerpc/powernv/opalcall.S	optional	powernv
 powerpc/powernv/platform_powernv.c optional	powernv
 powerpc/powernv/powernv_centaur.c	optional	powernv
 powerpc/powernv/powernv_xscom.c	optional	powernv
 powerpc/powernv/xive.c		optional	powernv
 powerpc/powerpc/altivec.c	optional	powerpc | powerpc64
 powerpc/powerpc/autoconf.c	standard
 powerpc/powerpc/bus_machdep.c	standard
 powerpc/powerpc/busdma_machdep.c standard
 powerpc/powerpc/clock.c		standard
 powerpc/powerpc/copyinout.c	standard
 powerpc/powerpc/copystr.c	standard
 powerpc/powerpc/cpu.c		standard
 powerpc/powerpc/cpu_subr64.S	optional	powerpc64
 powerpc/powerpc/db_disasm.c	optional	ddb
 powerpc/powerpc/db_hwwatch.c	optional	ddb
 powerpc/powerpc/db_interface.c	optional	ddb
 powerpc/powerpc/db_trace.c	optional	ddb
 powerpc/powerpc/dump_machdep.c	standard
 powerpc/powerpc/elf32_machdep.c	optional	powerpc | powerpcspe | compat_freebsd32
 powerpc/powerpc/elf64_machdep.c	optional	powerpc64
 powerpc/powerpc/exec_machdep.c	standard
 powerpc/powerpc/fpu.c		standard
 powerpc/powerpc/gdb_machdep.c	optional	gdb
 powerpc/powerpc/in_cksum.c	optional	inet | inet6
 powerpc/powerpc/interrupt.c	standard
 powerpc/powerpc/intr_machdep.c	standard
 powerpc/powerpc/iommu_if.m	standard
 powerpc/powerpc/machdep.c	standard
 powerpc/powerpc/mem.c		optional	mem
 powerpc/powerpc/mmu_if.m	standard
 powerpc/powerpc/mp_machdep.c	optional	smp
 powerpc/powerpc/nexus.c		standard
 powerpc/powerpc/openpic.c	standard
 powerpc/powerpc/pic_if.m	standard
 powerpc/powerpc/pmap_dispatch.c	standard
 powerpc/powerpc/platform.c	standard
 powerpc/powerpc/platform_if.m	standard
 powerpc/powerpc/ptrace_machdep.c	standard
 powerpc/powerpc/sc_machdep.c	optional	sc
 powerpc/powerpc/setjmp.S	standard
 powerpc/powerpc/sigcode32.S	optional	powerpc | powerpcspe | compat_freebsd32
 powerpc/powerpc/sigcode64.S	optional	powerpc64
 powerpc/powerpc/swtch32.S	optional	powerpc | powerpcspe
 powerpc/powerpc/swtch64.S	optional	powerpc64
 powerpc/powerpc/stack_machdep.c	optional	ddb | stack
 powerpc/powerpc/syncicache.c	standard
 powerpc/powerpc/sys_machdep.c	standard
 powerpc/powerpc/trap.c		standard
 powerpc/powerpc/uio_machdep.c	standard
 powerpc/powerpc/uma_machdep.c	standard
 powerpc/powerpc/vm_machdep.c	standard
 powerpc/ps3/ehci_ps3.c		optional	ps3 ehci
 powerpc/ps3/ohci_ps3.c		optional	ps3 ohci
 powerpc/ps3/if_glc.c		optional	ps3 glc
 powerpc/ps3/mmu_ps3.c		optional	ps3
 powerpc/ps3/platform_ps3.c	optional	ps3
 powerpc/ps3/ps3bus.c		optional	ps3
 powerpc/ps3/ps3cdrom.c		optional	ps3 scbus
 powerpc/ps3/ps3disk.c		optional	ps3
 powerpc/ps3/ps3pic.c		optional	ps3
 powerpc/ps3/ps3_syscons.c	optional	ps3 vt
 powerpc/ps3/ps3-hvcall.S	optional	ps3
 powerpc/pseries/phyp-hvcall.S	optional	pseries powerpc64
 powerpc/pseries/mmu_phyp.c	optional	pseries powerpc64
 powerpc/pseries/phyp_console.c	optional	pseries powerpc64 uart
 powerpc/pseries/phyp_llan.c	optional	llan
 powerpc/pseries/phyp_vscsi.c	optional	pseries powerpc64 scbus
 powerpc/pseries/platform_chrp.c	optional	pseries
 powerpc/pseries/plpar_iommu.c	optional	pseries powerpc64
 powerpc/pseries/plpar_pcibus.c	optional	pseries powerpc64 pci
 powerpc/pseries/rtas_dev.c	optional	pseries
 powerpc/pseries/rtas_pci.c	optional	pseries pci
 powerpc/pseries/vdevice.c	optional	pseries powerpc64
 powerpc/pseries/xics.c		optional	pseries powerpc64
 powerpc/psim/iobus.c 		optional	psim
 powerpc/psim/ata_iobus.c	optional	ata psim
 powerpc/psim/openpic_iobus.c	optional	psim
 powerpc/psim/uart_iobus.c	optional	uart psim
Index: projects/runtime-coverage-v2/sys/contrib/ipfilter/netinet/ip_fil_freebsd.c
===================================================================
--- projects/runtime-coverage-v2/sys/contrib/ipfilter/netinet/ip_fil_freebsd.c	(revision 346924)
+++ projects/runtime-coverage-v2/sys/contrib/ipfilter/netinet/ip_fil_freebsd.c	(revision 346925)
@@ -1,1447 +1,1447 @@
 /*	$FreeBSD$	*/
 
 /*
  * Copyright (C) 2012 by Darren Reed.
  *
  * See the IPFILTER.LICENCE file for details on licencing.
  */
 #if !defined(lint)
 static const char sccsid[] = "@(#)ip_fil.c	2.41 6/5/96 (C) 1993-2000 Darren Reed";
 static const char rcsid[] = "@(#)$Id$";
 #endif
 
 #if defined(KERNEL) || defined(_KERNEL)
 # undef KERNEL
 # undef _KERNEL
 # define	KERNEL	1
 # define	_KERNEL	1
 #endif
 #if defined(__FreeBSD_version) && (__FreeBSD_version >= 400000) && \
     !defined(KLD_MODULE) && !defined(IPFILTER_LKM)
 # include "opt_inet6.h"
 #endif
 #if defined(__FreeBSD_version) && (__FreeBSD_version >= 440000) && \
     !defined(KLD_MODULE) && !defined(IPFILTER_LKM)
 # include "opt_random_ip_id.h"
 #endif
 #include <sys/param.h>
 #include <sys/conf.h>
 #include <sys/errno.h>
 #include <sys/types.h>
 #include <sys/file.h>
 # include <sys/fcntl.h>
 # include <sys/filio.h>
 #include <sys/time.h>
 #include <sys/systm.h>
 # include <sys/dirent.h>
 #if defined(__FreeBSD_version) && (__FreeBSD_version >= 800000)
 #include <sys/jail.h>
 #endif
 # include <sys/malloc.h>
 # include <sys/mbuf.h>
 # include <sys/sockopt.h>
 #include <sys/socket.h>
 # include <sys/selinfo.h>
 # include <netinet/tcp_var.h>
 
 #include <net/if.h>
 # include <net/if_var.h>
 #  include <net/netisr.h>
 #include <net/route.h>
 #include <netinet/in.h>
 #include <netinet/in_fib.h>
 #include <netinet/in_var.h>
 #include <netinet/in_systm.h>
 #include <netinet/ip.h>
 #include <netinet/ip_var.h>
 #include <netinet/tcp.h>
 #include <net/vnet.h>
 #include <netinet/udp.h>
 #include <netinet/tcpip.h>
 #include <netinet/ip_icmp.h>
 #include "netinet/ip_compat.h"
 #ifdef USE_INET6
 # include <netinet/icmp6.h>
 #endif
 #include "netinet/ip_fil.h"
 #include "netinet/ip_nat.h"
 #include "netinet/ip_frag.h"
 #include "netinet/ip_state.h"
 #include "netinet/ip_proxy.h"
 #include "netinet/ip_auth.h"
 #include "netinet/ip_sync.h"
 #include "netinet/ip_lookup.h"
 #include "netinet/ip_dstlist.h"
 #ifdef	IPFILTER_SCAN
 #include "netinet/ip_scan.h"
 #endif
 #include "netinet/ip_pool.h"
 # include <sys/malloc.h>
 #include <sys/kernel.h>
 #ifdef CSUM_DATA_VALID
 #include <machine/in_cksum.h>
 #endif
 extern	int	ip_optcopy __P((struct ip *, struct ip *));
 
 # ifdef IPFILTER_M_IPFILTER
 MALLOC_DEFINE(M_IPFILTER, "ipfilter", "IP Filter packet filter data structures");
 # endif
 
 
 static	int	ipf_send_ip __P((fr_info_t *, mb_t *));
 static void	ipf_timer_func __P((void *arg));
 
 VNET_DEFINE(ipf_main_softc_t, ipfmain) = {
 	.ipf_running		= -2,
 };
 #define	V_ipfmain		VNET(ipfmain)
 
 # include <sys/conf.h>
 #  include <net/pfil.h>
 
 static eventhandler_tag ipf_arrivetag, ipf_departtag;
 #if 0
 /*
  * Disable the "cloner" event handler;  we are getting interface
  * events before the firewall is fully initiallized and also no vnet
  * information thus leading to uninitialised memory accesses.
  * In addition it is unclear why we need it in first place.
  * If it turns out to be needed, well need a dedicated event handler
  * for it to deal with the ifc and the correct vnet.
  */
 static eventhandler_tag ipf_clonetag;
 #endif
 
 static void ipf_ifevent(void *arg, struct ifnet *ifp);
 
 static void ipf_ifevent(arg, ifp)
 	void *arg;
 	struct ifnet *ifp;
 {
 
 	CURVNET_SET(ifp->if_vnet);
 	if (V_ipfmain.ipf_running > 0)
 		ipf_sync(&V_ipfmain, NULL);
 	CURVNET_RESTORE();
 }
 
 
 
 static pfil_return_t
 ipf_check_wrapper(struct mbuf **mp, struct ifnet *ifp, int flags,
     void *ruleset __unused, struct inpcb *inp)
 {
 	struct ip *ip = mtod(*mp, struct ip *);
 	pfil_return_t rv;
 
 	CURVNET_SET(ifp->if_vnet);
 	rv = ipf_check(&V_ipfmain, ip, ip->ip_hl << 2, ifp,
 	    !!(flags & PFIL_OUT), mp);
 	CURVNET_RESTORE();
 	return (rv == 0 ? PFIL_PASS : PFIL_DROPPED);
 }
 
 #ifdef USE_INET6
 static pfil_return_t
 ipf_check_wrapper6(struct mbuf **mp, struct ifnet *ifp, int flags,
     void *ruleset __unused, struct inpcb *inp)
 {
 	pfil_return_t rv;
 
 	CURVNET_SET(ifp->if_vnet);
 	rv = ipf_check(&V_ipfmain, mtod(*mp, struct ip *),
 	    sizeof(struct ip6_hdr), ifp, !!(flags & PFIL_OUT), mp);
 	CURVNET_RESTORE();
 
 	return (rv == 0 ? PFIL_PASS : PFIL_DROPPED);
 }
 # endif
 #if	defined(IPFILTER_LKM)
 int ipf_identify(s)
 	char *s;
 {
 	if (strcmp(s, "ipl") == 0)
 		return 1;
 	return 0;
 }
 #endif /* IPFILTER_LKM */
 
 
 static void
 ipf_timer_func(arg)
 	void *arg;
 {
 	ipf_main_softc_t *softc = arg;
 	SPL_INT(s);
 
 	SPL_NET(s);
 	READ_ENTER(&softc->ipf_global);
 
         if (softc->ipf_running > 0)
 		ipf_slowtimer(softc);
 
 	if (softc->ipf_running == -1 || softc->ipf_running == 1) {
 #if 0
 		softc->ipf_slow_ch = timeout(ipf_timer_func, softc, hz/2);
 #endif
 		callout_init(&softc->ipf_slow_ch, 1);
 		callout_reset(&softc->ipf_slow_ch,
 			(hz / IPF_HZ_DIVIDE) * IPF_HZ_MULT,
 			ipf_timer_func, softc);
 	}
 	RWLOCK_EXIT(&softc->ipf_global);
 	SPL_X(s);
 }
 
 
 int
 ipfattach(softc)
 	ipf_main_softc_t *softc;
 {
 #ifdef USE_SPL
 	int s;
 #endif
 
 	SPL_NET(s);
 	if (softc->ipf_running > 0) {
 		SPL_X(s);
 		return EBUSY;
 	}
 
 	if (ipf_init_all(softc) < 0) {
 		SPL_X(s);
 		return EIO;
 	}
 
 
 	bzero((char *)V_ipfmain.ipf_selwait, sizeof(V_ipfmain.ipf_selwait));
 	softc->ipf_running = 1;
 
 	if (softc->ipf_control_forwarding & 1)
 		V_ipforwarding = 1;
 
 	SPL_X(s);
 #if 0
 	softc->ipf_slow_ch = timeout(ipf_timer_func, softc,
 				     (hz / IPF_HZ_DIVIDE) * IPF_HZ_MULT);
 #endif
 	callout_init(&softc->ipf_slow_ch, 1);
 	callout_reset(&softc->ipf_slow_ch, (hz / IPF_HZ_DIVIDE) * IPF_HZ_MULT,
 		ipf_timer_func, softc);
 	return 0;
 }
 
 
 /*
  * Disable the filter by removing the hooks from the IP input/output
  * stream.
  */
 int
 ipfdetach(softc)
 	ipf_main_softc_t *softc;
 {
 #ifdef USE_SPL
 	int s;
 #endif
 
 	if (softc->ipf_control_forwarding & 2)
 		V_ipforwarding = 0;
 
 	SPL_NET(s);
 
 #if 0
 	if (softc->ipf_slow_ch.callout != NULL)
 		untimeout(ipf_timer_func, softc, softc->ipf_slow_ch);
 	bzero(&softc->ipf_slow, sizeof(softc->ipf_slow));
 #endif
 	callout_drain(&softc->ipf_slow_ch);
 
 	ipf_fini_all(softc);
 
 	softc->ipf_running = -2;
 
 	SPL_X(s);
 
 	return 0;
 }
 
 
 /*
  * Filter ioctl interface.
  */
 int
 ipfioctl(dev, cmd, data, mode, p)
 	struct thread *p;
 #    define	p_cred	td_ucred
 #    define	p_uid	td_ucred->cr_ruid
 	struct cdev *dev;
 	ioctlcmd_t cmd;
 	caddr_t data;
 	int mode;
 {
 	int error = 0, unit = 0;
 	SPL_INT(s);
 
 	CURVNET_SET(TD_TO_VNET(p));
 #if (BSD >= 199306)
         if (securelevel_ge(p->p_cred, 3) && (mode & FWRITE))
 	{
 		V_ipfmain.ipf_interror = 130001;
 		CURVNET_RESTORE();
 		return EPERM;
 	}
 #endif
 
 	unit = GET_MINOR(dev);
 	if ((IPL_LOGMAX < unit) || (unit < 0)) {
 		V_ipfmain.ipf_interror = 130002;
 		CURVNET_RESTORE();
 		return ENXIO;
 	}
 
 	if (V_ipfmain.ipf_running <= 0) {
 		if (unit != IPL_LOGIPF && cmd != SIOCIPFINTERROR) {
 			V_ipfmain.ipf_interror = 130003;
 			CURVNET_RESTORE();
 			return EIO;
 		}
 		if (cmd != SIOCIPFGETNEXT && cmd != SIOCIPFGET &&
 		    cmd != SIOCIPFSET && cmd != SIOCFRENB &&
 		    cmd != SIOCGETFS && cmd != SIOCGETFF &&
 		    cmd != SIOCIPFINTERROR) {
 			V_ipfmain.ipf_interror = 130004;
 			CURVNET_RESTORE();
 			return EIO;
 		}
 	}
 
 	SPL_NET(s);
 
 	error = ipf_ioctlswitch(&V_ipfmain, unit, data, cmd, mode, p->p_uid, p);
 	CURVNET_RESTORE();
 	if (error != -1) {
 		SPL_X(s);
 		return error;
 	}
 
 	SPL_X(s);
 
 	return error;
 }
 
 
 /*
  * ipf_send_reset - this could conceivably be a call to tcp_respond(), but that
  * requires a large amount of setting up and isn't any more efficient.
  */
 int
 ipf_send_reset(fin)
 	fr_info_t *fin;
 {
 	struct tcphdr *tcp, *tcp2;
 	int tlen = 0, hlen;
 	struct mbuf *m;
 #ifdef USE_INET6
 	ip6_t *ip6;
 #endif
 	ip_t *ip;
 
 	tcp = fin->fin_dp;
 	if (tcp->th_flags & TH_RST)
 		return -1;		/* feedback loop */
 
 	if (ipf_checkl4sum(fin) == -1)
 		return -1;
 
 	tlen = fin->fin_dlen - (TCP_OFF(tcp) << 2) +
 			((tcp->th_flags & TH_SYN) ? 1 : 0) +
 			((tcp->th_flags & TH_FIN) ? 1 : 0);
 
 #ifdef USE_INET6
 	hlen = (fin->fin_v == 6) ? sizeof(ip6_t) : sizeof(ip_t);
 #else
 	hlen = sizeof(ip_t);
 #endif
 #ifdef MGETHDR
 	MGETHDR(m, M_NOWAIT, MT_HEADER);
 #else
 	MGET(m, M_NOWAIT, MT_HEADER);
 #endif
 	if (m == NULL)
 		return -1;
 	if (sizeof(*tcp2) + hlen > MLEN) {
 		if (!(MCLGET(m, M_NOWAIT))) {
 			FREE_MB_T(m);
 			return -1;
 		}
 	}
 
 	m->m_len = sizeof(*tcp2) + hlen;
 #if (BSD >= 199103)
 	m->m_data += max_linkhdr;
 	m->m_pkthdr.len = m->m_len;
 	m->m_pkthdr.rcvif = (struct ifnet *)0;
 #endif
 	ip = mtod(m, struct ip *);
 	bzero((char *)ip, hlen);
 #ifdef USE_INET6
 	ip6 = (ip6_t *)ip;
 #endif
 	tcp2 = (struct tcphdr *)((char *)ip + hlen);
 	tcp2->th_sport = tcp->th_dport;
 	tcp2->th_dport = tcp->th_sport;
 
 	if (tcp->th_flags & TH_ACK) {
 		tcp2->th_seq = tcp->th_ack;
 		tcp2->th_flags = TH_RST;
 		tcp2->th_ack = 0;
 	} else {
 		tcp2->th_seq = 0;
 		tcp2->th_ack = ntohl(tcp->th_seq);
 		tcp2->th_ack += tlen;
 		tcp2->th_ack = htonl(tcp2->th_ack);
 		tcp2->th_flags = TH_RST|TH_ACK;
 	}
 	TCP_X2_A(tcp2, 0);
 	TCP_OFF_A(tcp2, sizeof(*tcp2) >> 2);
 	tcp2->th_win = tcp->th_win;
 	tcp2->th_sum = 0;
 	tcp2->th_urp = 0;
 
 #ifdef USE_INET6
 	if (fin->fin_v == 6) {
 		ip6->ip6_flow = ((ip6_t *)fin->fin_ip)->ip6_flow;
 		ip6->ip6_plen = htons(sizeof(struct tcphdr));
 		ip6->ip6_nxt = IPPROTO_TCP;
 		ip6->ip6_hlim = 0;
 		ip6->ip6_src = fin->fin_dst6.in6;
 		ip6->ip6_dst = fin->fin_src6.in6;
 		tcp2->th_sum = in6_cksum(m, IPPROTO_TCP,
 					 sizeof(*ip6), sizeof(*tcp2));
 		return ipf_send_ip(fin, m);
 	}
 #endif
 	ip->ip_p = IPPROTO_TCP;
 	ip->ip_len = htons(sizeof(struct tcphdr));
 	ip->ip_src.s_addr = fin->fin_daddr;
 	ip->ip_dst.s_addr = fin->fin_saddr;
 	tcp2->th_sum = in_cksum(m, hlen + sizeof(*tcp2));
 	ip->ip_len = htons(hlen + sizeof(*tcp2));
 	return ipf_send_ip(fin, m);
 }
 
 
 /*
  * ip_len must be in network byte order when called.
  */
 static int
 ipf_send_ip(fin, m)
 	fr_info_t *fin;
 	mb_t *m;
 {
 	fr_info_t fnew;
 	ip_t *ip, *oip;
 	int hlen;
 
 	ip = mtod(m, ip_t *);
 	bzero((char *)&fnew, sizeof(fnew));
 	fnew.fin_main_soft = fin->fin_main_soft;
 
 	IP_V_A(ip, fin->fin_v);
 	switch (fin->fin_v)
 	{
 	case 4 :
 		oip = fin->fin_ip;
 		hlen = sizeof(*oip);
 		fnew.fin_v = 4;
 		fnew.fin_p = ip->ip_p;
 		fnew.fin_plen = ntohs(ip->ip_len);
 		IP_HL_A(ip, sizeof(*oip) >> 2);
 		ip->ip_tos = oip->ip_tos;
 		ip->ip_id = fin->fin_ip->ip_id;
 		ip->ip_off = htons(V_path_mtu_discovery ? IP_DF : 0);
 		ip->ip_ttl = V_ip_defttl;
 		ip->ip_sum = 0;
 		break;
 #ifdef USE_INET6
 	case 6 :
 	{
 		ip6_t *ip6 = (ip6_t *)ip;
 
 		ip6->ip6_vfc = 0x60;
 		ip6->ip6_hlim = IPDEFTTL;
 
 		hlen = sizeof(*ip6);
 		fnew.fin_p = ip6->ip6_nxt;
 		fnew.fin_v = 6;
 		fnew.fin_plen = ntohs(ip6->ip6_plen) + hlen;
 		break;
 	}
 #endif
 	default :
 		return EINVAL;
 	}
 #ifdef IPSEC
 	m->m_pkthdr.rcvif = NULL;
 #endif
 
 	fnew.fin_ifp = fin->fin_ifp;
 	fnew.fin_flx = FI_NOCKSUM;
 	fnew.fin_m = m;
 	fnew.fin_ip = ip;
 	fnew.fin_mp = &m;
 	fnew.fin_hlen = hlen;
 	fnew.fin_dp = (char *)ip + hlen;
 	(void) ipf_makefrip(hlen, ip, &fnew);
 
 	return ipf_fastroute(m, &m, &fnew, NULL);
 }
 
 
 int
 ipf_send_icmp_err(type, fin, dst)
 	int type;
 	fr_info_t *fin;
 	int dst;
 {
 	int err, hlen, xtra, iclen, ohlen, avail, code;
 	struct in_addr dst4;
 	struct icmp *icmp;
 	struct mbuf *m;
 	i6addr_t dst6;
 	void *ifp;
 #ifdef USE_INET6
 	ip6_t *ip6;
 #endif
 	ip_t *ip, *ip2;
 
 	if ((type < 0) || (type >= ICMP_MAXTYPE))
 		return -1;
 
 	code = fin->fin_icode;
 #ifdef USE_INET6
 	/* See NetBSD ip_fil_netbsd.c r1.4: */
 	if ((code < 0) || (code >= sizeof(icmptoicmp6unreach)/sizeof(int)))
 		return -1;
 #endif
 
 	if (ipf_checkl4sum(fin) == -1)
 		return -1;
 #ifdef MGETHDR
 	MGETHDR(m, M_NOWAIT, MT_HEADER);
 #else
 	MGET(m, M_NOWAIT, MT_HEADER);
 #endif
 	if (m == NULL)
 		return -1;
 	avail = MHLEN;
 
 	xtra = 0;
 	hlen = 0;
 	ohlen = 0;
 	dst4.s_addr = 0;
 	ifp = fin->fin_ifp;
 	if (fin->fin_v == 4) {
 		if ((fin->fin_p == IPPROTO_ICMP) && !(fin->fin_flx & FI_SHORT))
 			switch (ntohs(fin->fin_data[0]) >> 8)
 			{
 			case ICMP_ECHO :
 			case ICMP_TSTAMP :
 			case ICMP_IREQ :
 			case ICMP_MASKREQ :
 				break;
 			default :
 				FREE_MB_T(m);
 				return 0;
 			}
 
 		if (dst == 0) {
 			if (ipf_ifpaddr(&V_ipfmain, 4, FRI_NORMAL, ifp,
 					&dst6, NULL) == -1) {
 				FREE_MB_T(m);
 				return -1;
 			}
 			dst4 = dst6.in4;
 		} else
 			dst4.s_addr = fin->fin_daddr;
 
 		hlen = sizeof(ip_t);
 		ohlen = fin->fin_hlen;
 		iclen = hlen + offsetof(struct icmp, icmp_ip) + ohlen;
 		if (fin->fin_hlen < fin->fin_plen)
 			xtra = MIN(fin->fin_dlen, 8);
 		else
 			xtra = 0;
 	}
 
 #ifdef USE_INET6
 	else if (fin->fin_v == 6) {
 		hlen = sizeof(ip6_t);
 		ohlen = sizeof(ip6_t);
 		iclen = hlen + offsetof(struct icmp, icmp_ip) + ohlen;
 		type = icmptoicmp6types[type];
 		if (type == ICMP6_DST_UNREACH)
 			code = icmptoicmp6unreach[code];
 
 		if (iclen + max_linkhdr + fin->fin_plen > avail) {
 			if (!(MCLGET(m, M_NOWAIT))) {
 				FREE_MB_T(m);
 				return -1;
 			}
 			avail = MCLBYTES;
 		}
 		xtra = MIN(fin->fin_plen, avail - iclen - max_linkhdr);
 		xtra = MIN(xtra, IPV6_MMTU - iclen);
 		if (dst == 0) {
 			if (ipf_ifpaddr(&V_ipfmain, 6, FRI_NORMAL, ifp,
 					&dst6, NULL) == -1) {
 				FREE_MB_T(m);
 				return -1;
 			}
 		} else
 			dst6 = fin->fin_dst6;
 	}
 #endif
 	else {
 		FREE_MB_T(m);
 		return -1;
 	}
 
 	avail -= (max_linkhdr + iclen);
 	if (avail < 0) {
 		FREE_MB_T(m);
 		return -1;
 	}
 	if (xtra > avail)
 		xtra = avail;
 	iclen += xtra;
 	m->m_data += max_linkhdr;
 	m->m_pkthdr.rcvif = (struct ifnet *)0;
 	m->m_pkthdr.len = iclen;
 	m->m_len = iclen;
 	ip = mtod(m, ip_t *);
 	icmp = (struct icmp *)((char *)ip + hlen);
 	ip2 = (ip_t *)&icmp->icmp_ip;
 
 	icmp->icmp_type = type;
 	icmp->icmp_code = fin->fin_icode;
 	icmp->icmp_cksum = 0;
 #ifdef icmp_nextmtu
 	if (type == ICMP_UNREACH && fin->fin_icode == ICMP_UNREACH_NEEDFRAG) {
 		if (fin->fin_mtu != 0) {
 			icmp->icmp_nextmtu = htons(fin->fin_mtu);
 
 		} else if (ifp != NULL) {
 			icmp->icmp_nextmtu = htons(GETIFMTU_4(ifp));
 
 		} else {	/* make up a number... */
 			icmp->icmp_nextmtu = htons(fin->fin_plen - 20);
 		}
 	}
 #endif
 
 	bcopy((char *)fin->fin_ip, (char *)ip2, ohlen);
 
 #ifdef USE_INET6
 	ip6 = (ip6_t *)ip;
 	if (fin->fin_v == 6) {
 		ip6->ip6_flow = ((ip6_t *)fin->fin_ip)->ip6_flow;
 		ip6->ip6_plen = htons(iclen - hlen);
 		ip6->ip6_nxt = IPPROTO_ICMPV6;
 		ip6->ip6_hlim = 0;
 		ip6->ip6_src = dst6.in6;
 		ip6->ip6_dst = fin->fin_src6.in6;
 		if (xtra > 0)
 			bcopy((char *)fin->fin_ip + ohlen,
 			      (char *)&icmp->icmp_ip + ohlen, xtra);
 		icmp->icmp_cksum = in6_cksum(m, IPPROTO_ICMPV6,
 					     sizeof(*ip6), iclen - hlen);
 	} else
 #endif
 	{
 		ip->ip_p = IPPROTO_ICMP;
 		ip->ip_src.s_addr = dst4.s_addr;
 		ip->ip_dst.s_addr = fin->fin_saddr;
 
 		if (xtra > 0)
 			bcopy((char *)fin->fin_ip + ohlen,
 			      (char *)&icmp->icmp_ip + ohlen, xtra);
 		icmp->icmp_cksum = ipf_cksum((u_short *)icmp,
 					     sizeof(*icmp) + 8);
 		ip->ip_len = htons(iclen);
 		ip->ip_p = IPPROTO_ICMP;
 	}
 	err = ipf_send_ip(fin, m);
 	return err;
 }
 
 
 
 
 /*
  * m0 - pointer to mbuf where the IP packet starts
  * mpp - pointer to the mbuf pointer that is the start of the mbuf chain
  */
 int
 ipf_fastroute(m0, mpp, fin, fdp)
 	mb_t *m0, **mpp;
 	fr_info_t *fin;
 	frdest_t *fdp;
 {
 	register struct ip *ip, *mhip;
 	register struct mbuf *m = *mpp;
 	int len, off, error = 0, hlen, code;
 	struct ifnet *ifp, *sifp;
 	struct sockaddr_in dst;
 	struct nhop4_extended nh4;
 	int has_nhop = 0;
 	u_long fibnum = 0;
 	u_short ip_off;
 	frdest_t node;
 	frentry_t *fr;
 
 #ifdef M_WRITABLE
 	/*
 	* HOT FIX/KLUDGE:
 	*
 	* If the mbuf we're about to send is not writable (because of
 	* a cluster reference, for example) we'll need to make a copy
 	* of it since this routine modifies the contents.
 	*
 	* If you have non-crappy network hardware that can transmit data
 	* from the mbuf, rather than making a copy, this is gonna be a
 	* problem.
 	*/
 	if (M_WRITABLE(m) == 0) {
 		m0 = m_dup(m, M_NOWAIT);
 		if (m0 != NULL) {
 			FREE_MB_T(m);
 			m = m0;
 			*mpp = m;
 		} else {
 			error = ENOBUFS;
 			FREE_MB_T(m);
 			goto done;
 		}
 	}
 #endif
 
 #ifdef USE_INET6
 	if (fin->fin_v == 6) {
 		/*
 		 * currently "to <if>" and "to <if>:ip#" are not supported
 		 * for IPv6
 		 */
 		return ip6_output(m, NULL, NULL, 0, NULL, NULL, NULL);
 	}
 #endif
 
 	hlen = fin->fin_hlen;
 	ip = mtod(m0, struct ip *);
 	ifp = NULL;
 
 	/*
 	 * Route packet.
 	 */
 	bzero(&dst, sizeof (dst));
 	dst.sin_family = AF_INET;
 	dst.sin_addr = ip->ip_dst;
 	dst.sin_len = sizeof(dst);
 
 	fr = fin->fin_fr;
 	if ((fr != NULL) && !(fr->fr_flags & FR_KEEPSTATE) && (fdp != NULL) &&
 	    (fdp->fd_type == FRD_DSTLIST)) {
 		if (ipf_dstlist_select_node(fin, fdp->fd_ptr, NULL, &node) == 0)
 			fdp = &node;
 	}
 
 	if (fdp != NULL)
 		ifp = fdp->fd_ptr;
 	else
 		ifp = fin->fin_ifp;
 
 	if ((ifp == NULL) && ((fr == NULL) || !(fr->fr_flags & FR_FASTROUTE))) {
 		error = -2;
 		goto bad;
 	}
 
 	if ((fdp != NULL) && (fdp->fd_ip.s_addr != 0))
 		dst.sin_addr = fdp->fd_ip;
 
 	fibnum = M_GETFIB(m0);
 	if (fib4_lookup_nh_ext(fibnum, dst.sin_addr, NHR_REF, 0, &nh4) != 0) {
 		if (in_localaddr(ip->ip_dst))
 			error = EHOSTUNREACH;
 		else
 			error = ENETUNREACH;
 		goto bad;
 	}
 
 	has_nhop = 1;
 	if (ifp == NULL)
 		ifp = nh4.nh_ifp;
 	if (nh4.nh_flags & NHF_GATEWAY)
 		dst.sin_addr = nh4.nh_addr;
 
 	/*
 	 * For input packets which are being "fastrouted", they won't
 	 * go back through output filtering and miss their chance to get
 	 * NAT'd and counted.  Duplicated packets aren't considered to be
 	 * part of the normal packet stream, so do not NAT them or pass
 	 * them through stateful checking, etc.
 	 */
 	if ((fdp != &fr->fr_dif) && (fin->fin_out == 0)) {
 		sifp = fin->fin_ifp;
 		fin->fin_ifp = ifp;
 		fin->fin_out = 1;
 		(void) ipf_acctpkt(fin, NULL);
 		fin->fin_fr = NULL;
 		if (!fr || !(fr->fr_flags & FR_RETMASK)) {
 			u_32_t pass;
 
 			(void) ipf_state_check(fin, &pass);
 		}
 
 		switch (ipf_nat_checkout(fin, NULL))
 		{
 		case 0 :
 			break;
 		case 1 :
 			ip->ip_sum = 0;
 			break;
 		case -1 :
 			error = -1;
 			goto bad;
 			break;
 		}
 
 		fin->fin_ifp = sifp;
 		fin->fin_out = 0;
 	} else
 		ip->ip_sum = 0;
 	/*
 	 * If small enough for interface, can just send directly.
 	 */
 	if (ntohs(ip->ip_len) <= ifp->if_mtu) {
 		if (!ip->ip_sum)
 			ip->ip_sum = in_cksum(m, hlen);
 		error = (*ifp->if_output)(ifp, m, (struct sockaddr *)&dst,
 			    NULL
 			);
 		goto done;
 	}
 	/*
 	 * Too large for interface; fragment if possible.
 	 * Must be able to put at least 8 bytes per fragment.
 	 */
 	ip_off = ntohs(ip->ip_off);
 	if (ip_off & IP_DF) {
 		error = EMSGSIZE;
 		goto bad;
 	}
 	len = (ifp->if_mtu - hlen) &~ 7;
 	if (len < 8) {
 		error = EMSGSIZE;
 		goto bad;
 	}
 
     {
 	int mhlen, firstlen = len;
 	struct mbuf **mnext = &m->m_act;
 
 	/*
 	 * Loop through length of segment after first fragment,
 	 * make new header and copy data of each part and link onto chain.
 	 */
 	m0 = m;
 	mhlen = sizeof (struct ip);
 	for (off = hlen + len; off < ntohs(ip->ip_len); off += len) {
 #ifdef MGETHDR
 		MGETHDR(m, M_NOWAIT, MT_HEADER);
 #else
 		MGET(m, M_NOWAIT, MT_HEADER);
 #endif
 		if (m == NULL) {
 			m = m0;
 			error = ENOBUFS;
 			goto bad;
 		}
 		m->m_data += max_linkhdr;
 		mhip = mtod(m, struct ip *);
 		bcopy((char *)ip, (char *)mhip, sizeof(*ip));
 		if (hlen > sizeof (struct ip)) {
 			mhlen = ip_optcopy(ip, mhip) + sizeof (struct ip);
 			IP_HL_A(mhip, mhlen >> 2);
 		}
 		m->m_len = mhlen;
 		mhip->ip_off = ((off - hlen) >> 3) + ip_off;
 		if (off + len >= ntohs(ip->ip_len))
 			len = ntohs(ip->ip_len) - off;
 		else
 			mhip->ip_off |= IP_MF;
 		mhip->ip_len = htons((u_short)(len + mhlen));
 		*mnext = m;
 		m->m_next = m_copym(m0, off, len, M_NOWAIT);
 		if (m->m_next == 0) {
 			error = ENOBUFS;	/* ??? */
 			goto sendorfree;
 		}
 		m->m_pkthdr.len = mhlen + len;
 		m->m_pkthdr.rcvif = NULL;
 		mhip->ip_off = htons((u_short)mhip->ip_off);
 		mhip->ip_sum = 0;
 		mhip->ip_sum = in_cksum(m, mhlen);
 		mnext = &m->m_act;
 	}
 	/*
 	 * Update first fragment by trimming what's been copied out
 	 * and updating header, then send each fragment (in order).
 	 */
 	m_adj(m0, hlen + firstlen - ip->ip_len);
 	ip->ip_len = htons((u_short)(hlen + firstlen));
 	ip->ip_off = htons((u_short)IP_MF);
 	ip->ip_sum = 0;
 	ip->ip_sum = in_cksum(m0, hlen);
 sendorfree:
 	for (m = m0; m; m = m0) {
 		m0 = m->m_act;
 		m->m_act = 0;
 		if (error == 0)
 			error = (*ifp->if_output)(ifp, m,
 			    (struct sockaddr *)&dst,
 			    NULL
 			    );
 		else
 			FREE_MB_T(m);
 	}
     }
 done:
 	if (!error)
 		V_ipfmain.ipf_frouteok[0]++;
 	else
 		V_ipfmain.ipf_frouteok[1]++;
 
 	if (has_nhop)
 		fib4_free_nh_ext(fibnum, &nh4);
 
 	return 0;
 bad:
 	if (error == EMSGSIZE) {
 		sifp = fin->fin_ifp;
 		code = fin->fin_icode;
 		fin->fin_icode = ICMP_UNREACH_NEEDFRAG;
 		fin->fin_ifp = ifp;
 		(void) ipf_send_icmp_err(ICMP_UNREACH, fin, 1);
 		fin->fin_ifp = sifp;
 		fin->fin_icode = code;
 	}
 	FREE_MB_T(m);
 	goto done;
 }
 
 
 int
 ipf_verifysrc(fin)
 	fr_info_t *fin;
 {
 	struct nhop4_basic nh4;
 
 	if (fib4_lookup_nh_basic(0, fin->fin_src, 0, 0, &nh4) != 0)
 		return (0);
 	return (fin->fin_ifp == nh4.nh_ifp);
 }
 
 
 /*
  * return the first IP Address associated with an interface
  */
 int
 ipf_ifpaddr(softc, v, atype, ifptr, inp, inpmask)
 	ipf_main_softc_t *softc;
 	int v, atype;
 	void *ifptr;
 	i6addr_t *inp, *inpmask;
 {
 #ifdef USE_INET6
 	struct in6_addr *inp6 = NULL;
 #endif
 	struct sockaddr *sock, *mask;
 	struct sockaddr_in *sin;
 	struct ifaddr *ifa;
 	struct ifnet *ifp;
 
 	if ((ifptr == NULL) || (ifptr == (void *)-1))
 		return -1;
 
 	sin = NULL;
 	ifp = ifptr;
 
 	if (v == 4)
 		inp->in4.s_addr = 0;
 #ifdef USE_INET6
 	else if (v == 6)
 		bzero((char *)inp, sizeof(*inp));
 #endif
 	ifa = CK_STAILQ_FIRST(&ifp->if_addrhead);
 
 	sock = ifa->ifa_addr;
 	while (sock != NULL && ifa != NULL) {
 		sin = (struct sockaddr_in *)sock;
 		if ((v == 4) && (sin->sin_family == AF_INET))
 			break;
 #ifdef USE_INET6
 		if ((v == 6) && (sin->sin_family == AF_INET6)) {
 			inp6 = &((struct sockaddr_in6 *)sin)->sin6_addr;
 			if (!IN6_IS_ADDR_LINKLOCAL(inp6) &&
 			    !IN6_IS_ADDR_LOOPBACK(inp6))
 				break;
 		}
 #endif
 		ifa = CK_STAILQ_NEXT(ifa, ifa_link);
 		if (ifa != NULL)
 			sock = ifa->ifa_addr;
 	}
 
 	if (ifa == NULL || sin == NULL)
 		return -1;
 
 	mask = ifa->ifa_netmask;
 	if (atype == FRI_BROADCAST)
 		sock = ifa->ifa_broadaddr;
 	else if (atype == FRI_PEERADDR)
 		sock = ifa->ifa_dstaddr;
 
 	if (sock == NULL)
 		return -1;
 
 #ifdef USE_INET6
 	if (v == 6) {
 		return ipf_ifpfillv6addr(atype, (struct sockaddr_in6 *)sock,
 					 (struct sockaddr_in6 *)mask,
 					 inp, inpmask);
 	}
 #endif
 	return ipf_ifpfillv4addr(atype, (struct sockaddr_in *)sock,
 				 (struct sockaddr_in *)mask,
 				 &inp->in4, &inpmask->in4);
 }
 
 
 u_32_t
 ipf_newisn(fin)
 	fr_info_t *fin;
 {
 	u_32_t newiss;
 	newiss = arc4random();
 	return newiss;
 }
 
 
 INLINE int
 ipf_checkv4sum(fin)
 	fr_info_t *fin;
 {
 #ifdef CSUM_DATA_VALID
 	int manual = 0;
 	u_short sum;
 	ip_t *ip;
 	mb_t *m;
 
 	if ((fin->fin_flx & FI_NOCKSUM) != 0)
 		return 0;
 
 	if ((fin->fin_flx & FI_SHORT) != 0)
 		return 1;
 
 	if (fin->fin_cksum != FI_CK_NEEDED)
 		return (fin->fin_cksum > FI_CK_NEEDED) ? 0 : -1;
 
 	m = fin->fin_m;
 	if (m == NULL) {
 		manual = 1;
 		goto skipauto;
 	}
 	ip = fin->fin_ip;
 
 	if ((m->m_pkthdr.csum_flags & (CSUM_IP_CHECKED|CSUM_IP_VALID)) ==
 	    CSUM_IP_CHECKED) {
 		fin->fin_cksum = FI_CK_BAD;
 		fin->fin_flx |= FI_BAD;
 		DT2(ipf_fi_bad_checkv4sum_csum_ip_checked, fr_info_t *, fin, u_int, m->m_pkthdr.csum_flags & (CSUM_IP_CHECKED|CSUM_IP_VALID));
 		return -1;
 	}
 	if (m->m_pkthdr.csum_flags & CSUM_DATA_VALID) {
 		/* Depending on the driver, UDP may have zero checksum */
 		if (fin->fin_p == IPPROTO_UDP && (fin->fin_flx &
 		    (FI_FRAG|FI_SHORT|FI_BAD)) == 0) {
 			udphdr_t *udp = fin->fin_dp;
 			if (udp->uh_sum == 0) {
 				/*
 				 * we're good no matter what the hardware
 				 * checksum flags and csum_data say (handling
 				 * of csum_data for zero UDP checksum is not
 				 * consistent across all drivers)
 				 */
 				fin->fin_cksum = 1;
 				return 0;
 			}
 		}
 
 		if (m->m_pkthdr.csum_flags & CSUM_PSEUDO_HDR)
 			sum = m->m_pkthdr.csum_data;
 		else
 			sum = in_pseudo(ip->ip_src.s_addr, ip->ip_dst.s_addr,
 					htonl(m->m_pkthdr.csum_data +
 					fin->fin_dlen + fin->fin_p));
 		sum ^= 0xffff;
 		if (sum != 0) {
 			fin->fin_cksum = FI_CK_BAD;
 			fin->fin_flx |= FI_BAD;
 			DT2(ipf_fi_bad_checkv4sum_sum, fr_info_t *, fin, u_int, sum);
 		} else {
 			fin->fin_cksum = FI_CK_SUMOK;
 			return 0;
 		}
 	} else {
 		if (m->m_pkthdr.csum_flags == CSUM_DELAY_DATA) {
 			fin->fin_cksum = FI_CK_L4FULL;
 			return 0;
 		} else if (m->m_pkthdr.csum_flags == CSUM_TCP ||
 			   m->m_pkthdr.csum_flags == CSUM_UDP) {
 			fin->fin_cksum = FI_CK_L4PART;
 			return 0;
 		} else if (m->m_pkthdr.csum_flags == CSUM_IP) {
 			fin->fin_cksum = FI_CK_L4PART;
 			return 0;
 		} else {
 			manual = 1;
 		}
 	}
 skipauto:
 	if (manual != 0) {
 		if (ipf_checkl4sum(fin) == -1) {
 			fin->fin_flx |= FI_BAD;
 			DT2(ipf_fi_bad_checkv4sum_manual, fr_info_t *, fin, u_int, manual);
 			return -1;
 		}
 	}
 #else
 	if (ipf_checkl4sum(fin) == -1) {
 		fin->fin_flx |= FI_BAD;
 		DT2(ipf_fi_bad_checkv4sum_checkl4sum, fr_info_t *, fin, u_int, -1);
 		return -1;
 	}
 #endif
 	return 0;
 }
 
 
 #ifdef USE_INET6
 INLINE int
 ipf_checkv6sum(fin)
 	fr_info_t *fin;
 {
 	if ((fin->fin_flx & FI_NOCKSUM) != 0) {
 		DT(ipf_checkv6sum_fi_nocksum);
 		return 0;
 	}
 
 	if ((fin->fin_flx & FI_SHORT) != 0) {
 		DT(ipf_checkv6sum_fi_short);
 		return 1;
 	}
 
 	if (fin->fin_cksum != FI_CK_NEEDED) {
 		DT(ipf_checkv6sum_fi_ck_needed);
 		return (fin->fin_cksum > FI_CK_NEEDED) ? 0 : -1;
 	}
 
 	if (ipf_checkl4sum(fin) == -1) {
 		fin->fin_flx |= FI_BAD;
 		DT2(ipf_fi_bad_checkv6sum_checkl4sum, fr_info_t *, fin, u_int, -1);
 		return -1;
 	}
 	return 0;
 }
 #endif /* USE_INET6 */
 
 
 size_t
 mbufchainlen(m0)
 	struct mbuf *m0;
-	{
+{
 	size_t len;
 
 	if ((m0->m_flags & M_PKTHDR) != 0) {
 		len = m0->m_pkthdr.len;
 	} else {
 		struct mbuf *m;
 
 		for (m = m0, len = 0; m != NULL; m = m->m_next)
 			len += m->m_len;
 	}
 	return len;
 }
 
 
 /* ------------------------------------------------------------------------ */
 /* Function:    ipf_pullup                                                  */
 /* Returns:     NULL == pullup failed, else pointer to protocol header      */
 /* Parameters:  xmin(I)- pointer to buffer where data packet starts         */
 /*              fin(I) - pointer to packet information                      */
 /*              len(I) - number of bytes to pullup                          */
 /*                                                                          */
 /* Attempt to move at least len bytes (from the start of the buffer) into a */
 /* single buffer for ease of access.  Operating system native functions are */
 /* used to manage buffers - if necessary.  If the entire packet ends up in  */
 /* a single buffer, set the FI_COALESCE flag even though ipf_coalesce() has */
 /* not been called.  Both fin_ip and fin_dp are updated before exiting _IF_ */
 /* and ONLY if the pullup succeeds.                                         */
 /*                                                                          */
 /* We assume that 'xmin' is a pointer to a buffer that is part of the chain */
 /* of buffers that starts at *fin->fin_mp.                                  */
 /* ------------------------------------------------------------------------ */
 void *
 ipf_pullup(xmin, fin, len)
 	mb_t *xmin;
 	fr_info_t *fin;
 	int len;
 {
 	int dpoff, ipoff;
 	mb_t *m = xmin;
 	char *ip;
 
 	if (m == NULL)
 		return NULL;
 
 	ip = (char *)fin->fin_ip;
 	if ((fin->fin_flx & FI_COALESCE) != 0)
 		return ip;
 
 	ipoff = fin->fin_ipoff;
 	if (fin->fin_dp != NULL)
 		dpoff = (char *)fin->fin_dp - (char *)ip;
 	else
 		dpoff = 0;
 
 	if (M_LEN(m) < len) {
 		mb_t *n = *fin->fin_mp;
 		/*
 		 * Assume that M_PKTHDR is set and just work with what is left
 		 * rather than check..
 		 * Should not make any real difference, anyway.
 		 */
 		if (m != n) {
 			/*
 			 * Record the mbuf that points to the mbuf that we're
 			 * about to go to work on so that we can update the
 			 * m_next appropriately later.
 			 */
 			for (; n->m_next != m; n = n->m_next)
 				;
 		} else {
 			n = NULL;
 		}
 
 #ifdef MHLEN
 		if (len > MHLEN)
 #else
 		if (len > MLEN)
 #endif
 		{
 #ifdef HAVE_M_PULLDOWN
 			if (m_pulldown(m, 0, len, NULL) == NULL)
 				m = NULL;
 #else
 			FREE_MB_T(*fin->fin_mp);
 			m = NULL;
 			n = NULL;
 #endif
 		} else
 		{
 			m = m_pullup(m, len);
 		}
 		if (n != NULL)
 			n->m_next = m;
 		if (m == NULL) {
 			/*
 			 * When n is non-NULL, it indicates that m pointed to
 			 * a sub-chain (tail) of the mbuf and that the head
 			 * of this chain has not yet been free'd.
 			 */
 			if (n != NULL) {
 				FREE_MB_T(*fin->fin_mp);
 			}
 
 			*fin->fin_mp = NULL;
 			fin->fin_m = NULL;
 			return NULL;
 		}
 
 		if (n == NULL)
 			*fin->fin_mp = m;
 
 		while (M_LEN(m) == 0) {
 			m = m->m_next;
 		}
 		fin->fin_m = m;
 		ip = MTOD(m, char *) + ipoff;
 
 		fin->fin_ip = (ip_t *)ip;
 		if (fin->fin_dp != NULL)
 			fin->fin_dp = (char *)fin->fin_ip + dpoff;
 		if (fin->fin_fraghdr != NULL)
 			fin->fin_fraghdr = (char *)ip +
 					   ((char *)fin->fin_fraghdr -
 					    (char *)fin->fin_ip);
 	}
 
 	if (len == fin->fin_plen)
 		fin->fin_flx |= FI_COALESCE;
 	return ip;
 }
 
 
 int
 ipf_inject(fin, m)
 	fr_info_t *fin;
 	mb_t *m;
 {
 	int error = 0;
 
 	if (fin->fin_out == 0) {
 		netisr_dispatch(NETISR_IP, m);
 	} else {
 		fin->fin_ip->ip_len = ntohs(fin->fin_ip->ip_len);
 		fin->fin_ip->ip_off = ntohs(fin->fin_ip->ip_off);
 		error = ip_output(m, NULL, NULL, IP_FORWARDING, NULL, NULL);
 	}
 
 	return error;
 }
 
 VNET_DEFINE_STATIC(pfil_hook_t, ipf_inet_hook);
 VNET_DEFINE_STATIC(pfil_hook_t, ipf_inet6_hook);
 #define	V_ipf_inet_hook		VNET(ipf_inet_hook)
 #define	V_ipf_inet6_hook	VNET(ipf_inet6_hook)
 
 int ipf_pfil_unhook(void) {
 
 	pfil_remove_hook(V_ipf_inet_hook);
 
 #ifdef USE_INET6
 	pfil_remove_hook(V_ipf_inet6_hook);
 #endif
 
 	return (0);
 }
 
 int ipf_pfil_hook(void) {
 	struct pfil_hook_args pha;
 	struct pfil_link_args pla;
 	int error, error6;
 
 	pha.pa_version = PFIL_VERSION;
 	pha.pa_flags = PFIL_IN | PFIL_OUT;
 	pha.pa_modname = "ipfilter";
 	pha.pa_rulname = "default-ip4";
 	pha.pa_func = ipf_check_wrapper;
 	pha.pa_ruleset = NULL;
 	pha.pa_type = PFIL_TYPE_IP4;
 	V_ipf_inet_hook = pfil_add_hook(&pha);
 
 #ifdef USE_INET6
 	pha.pa_rulname = "default-ip6";
 	pha.pa_func = ipf_check_wrapper6;
 	pha.pa_type = PFIL_TYPE_IP6;
 	V_ipf_inet6_hook = pfil_add_hook(&pha);
 #endif
 
 	pla.pa_version = PFIL_VERSION;
 	pla.pa_flags = PFIL_IN | PFIL_OUT |
 	    PFIL_HEADPTR | PFIL_HOOKPTR;
 	pla.pa_head = V_inet_pfil_head;
 	pla.pa_hook = V_ipf_inet_hook;
 	error = pfil_link(&pla);
 
 	error6 = 0;
 #ifdef USE_INET6
 	pla.pa_head = V_inet6_pfil_head;
 	pla.pa_hook = V_ipf_inet6_hook;
 	error6 = pfil_link(&pla);
 #endif
 
 	if (error || error6)
 		error = ENODEV;
 	else
 		error = 0;
 
 	return (error);
 }
 
 void
 ipf_event_reg(void)
 {
 	ipf_arrivetag = EVENTHANDLER_REGISTER(ifnet_arrival_event, \
 					       ipf_ifevent, NULL, \
 					       EVENTHANDLER_PRI_ANY);
 	ipf_departtag = EVENTHANDLER_REGISTER(ifnet_departure_event, \
 					       ipf_ifevent, NULL, \
 					       EVENTHANDLER_PRI_ANY);
 #if 0
 	ipf_clonetag  = EVENTHANDLER_REGISTER(if_clone_event, ipf_ifevent, \
 					       NULL, EVENTHANDLER_PRI_ANY);
 #endif
 }
 
 void
 ipf_event_dereg(void)
 {
 	if (ipf_arrivetag != NULL) {
 		EVENTHANDLER_DEREGISTER(ifnet_arrival_event, ipf_arrivetag);
 	}
 	if (ipf_departtag != NULL) {
 		EVENTHANDLER_DEREGISTER(ifnet_departure_event, ipf_departtag);
 	}
 #if 0
 	if (ipf_clonetag != NULL) {
 		EVENTHANDLER_DEREGISTER(if_clone_event, ipf_clonetag);
 	}
 #endif
 }
 
 
 u_32_t
 ipf_random()
 {
 	return arc4random();
 }
 
 
 u_int
 ipf_pcksum(fin, hlen, sum)
 	fr_info_t *fin;
 	int hlen;
 	u_int sum;
 {
 	struct mbuf *m;
 	u_int sum2;
 	int off;
 
 	m = fin->fin_m;
 	off = (char *)fin->fin_dp - (char *)fin->fin_ip;
 	m->m_data += hlen;
 	m->m_len -= hlen;
 	sum2 = in_cksum(fin->fin_m, fin->fin_plen - off);
 	m->m_len += hlen;
 	m->m_data -= hlen;
 
 	/*
 	 * Both sum and sum2 are partial sums, so combine them together.
 	 */
 	sum += ~sum2 & 0xffff;
 	while (sum > 0xffff)
 		sum = (sum & 0xffff) + (sum >> 16);
 	sum2 = ~sum & 0xffff;
 	return sum2;
 }
Index: projects/runtime-coverage-v2/sys/contrib/ipfilter
===================================================================
--- projects/runtime-coverage-v2/sys/contrib/ipfilter	(revision 346924)
+++ projects/runtime-coverage-v2/sys/contrib/ipfilter	(revision 346925)

Property changes on: projects/runtime-coverage-v2/sys/contrib/ipfilter
___________________________________________________________________
Modified: svn:mergeinfo
## -0,0 +0,1 ##
   Merged /head/sys/contrib/ipfilter:r346493-346924
Index: projects/runtime-coverage-v2/sys/dev/altera/atse/if_atse.c
===================================================================
--- projects/runtime-coverage-v2/sys/dev/altera/atse/if_atse.c	(revision 346924)
+++ projects/runtime-coverage-v2/sys/dev/altera/atse/if_atse.c	(revision 346925)
@@ -1,1603 +1,1608 @@
 /*-
  * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
  *
  * Copyright (c) 2012, 2013 Bjoern A. Zeeb
  * Copyright (c) 2014 Robert N. M. Watson
  * Copyright (c) 2016-2017 Ruslan Bukin <br@bsdpad.com>
  * All rights reserved.
  *
  * This software was developed by SRI International and the University of
  * Cambridge Computer Laboratory under DARPA/AFRL contract (FA8750-11-C-0249)
  * ("MRC2"), as part of the DARPA MRC research programme.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  */
 /*
  * Altera Triple-Speed Ethernet MegaCore, Function User Guide
  * UG-01008-3.0, Software Version: 12.0, June 2012.
  * Available at the time of writing at:
  * http://www.altera.com/literature/ug/ug_ethernet.pdf
  *
  * We are using an Marvell E1111 (Alaska) PHY on the DE4.  See mii/e1000phy.c.
  */
 /*
  * XXX-BZ NOTES:
  * - ifOutBroadcastPkts are only counted if both ether dst and src are all-1s;
  *   seems an IP core bug, they count ether broadcasts as multicast.  Is this
  *   still the case?
  * - figure out why the TX FIFO fill status and intr did not work as expected.
  * - test 100Mbit/s and 10Mbit/s
  * - blacklist the one special factory programmed ethernet address (for now
  *   hardcoded, later from loader?)
  * - resolve all XXX, left as reminders to shake out details later
  * - Jumbo frame support
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include "opt_device_polling.h"
 
 #include <sys/param.h>
 #include <sys/systm.h>
 #include <sys/kernel.h>
 #include <sys/bus.h>
 #include <sys/endian.h>
 #include <sys/jail.h>
 #include <sys/lock.h>
 #include <sys/module.h>
 #include <sys/mutex.h>
 #include <sys/proc.h>
 #include <sys/socket.h>
 #include <sys/sockio.h>
 #include <sys/types.h>
 
 #include <net/ethernet.h>
 #include <net/if.h>
 #include <net/if_var.h>
 #include <net/if_dl.h>
 #include <net/if_media.h>
 #include <net/if_types.h>
 #include <net/if_vlan_var.h>
 
 #include <net/bpf.h>
 
 #include <machine/bus.h>
 #include <machine/resource.h>
 #include <sys/rman.h>
 
 #include <dev/mii/mii.h>
 #include <dev/mii/miivar.h>
 
 #include <dev/altera/atse/if_atsereg.h>
 #include <dev/xdma/xdma.h>
 
 #define	RX_QUEUE_SIZE		4096
 #define	TX_QUEUE_SIZE		4096
 #define	NUM_RX_MBUF		512
 #define	BUFRING_SIZE		8192
 
 #include <machine/cache.h>
 
 /* XXX once we'd do parallel attach, we need a global lock for this. */
 #define	ATSE_ETHERNET_OPTION_BITS_UNDEF	0
 #define	ATSE_ETHERNET_OPTION_BITS_READ	1
 static int atse_ethernet_option_bits_flag = ATSE_ETHERNET_OPTION_BITS_UNDEF;
 static uint8_t atse_ethernet_option_bits[ALTERA_ETHERNET_OPTION_BITS_LEN];
 
 /*
  * Softc and critical resource locking.
  */
 #define	ATSE_LOCK(_sc)		mtx_lock(&(_sc)->atse_mtx)
 #define	ATSE_UNLOCK(_sc)	mtx_unlock(&(_sc)->atse_mtx)
 #define	ATSE_LOCK_ASSERT(_sc)	mtx_assert(&(_sc)->atse_mtx, MA_OWNED)
 
 #define ATSE_DEBUG
 #undef ATSE_DEBUG
 
 #ifdef ATSE_DEBUG
 #define	DPRINTF(format, ...)	printf(format, __VA_ARGS__)
 #else
 #define	DPRINTF(format, ...)
 #endif
 
 /*
  * Register space access macros.
  */
 static inline void
 csr_write_4(struct atse_softc *sc, uint32_t reg, uint32_t val4,
     const char *f, const int l)
 {
 
 	val4 = htole32(val4);
 	DPRINTF("[%s:%d] CSR W %s 0x%08x (0x%08x) = 0x%08x\n", f, l,
 	    "atse_mem_res", reg, reg * 4, val4);
 	bus_write_4(sc->atse_mem_res, reg * 4, val4);
 }
 
 static inline uint32_t
 csr_read_4(struct atse_softc *sc, uint32_t reg, const char *f, const int l)
 {
 	uint32_t val4;
 
 	val4 = le32toh(bus_read_4(sc->atse_mem_res, reg * 4));
 	DPRINTF("[%s:%d] CSR R %s 0x%08x (0x%08x) = 0x%08x\n", f, l, 
 	    "atse_mem_res", reg, reg * 4, val4);
 
 	return (val4);
 }
 
 /*
  * See page 5-2 that it's all dword offsets and the MS 16 bits need to be zero
  * on write and ignored on read.
  */
 static inline void
 pxx_write_2(struct atse_softc *sc, bus_addr_t bmcr, uint32_t reg, uint16_t val,
     const char *f, const int l, const char *s)
 {
 	uint32_t val4;
 
 	val4 = htole32(val & 0x0000ffff);
 	DPRINTF("[%s:%d] %s W %s 0x%08x (0x%08jx) = 0x%08x\n", f, l, s,
 	    "atse_mem_res", reg, (bmcr + reg) * 4, val4);
 	bus_write_4(sc->atse_mem_res, (bmcr + reg) * 4, val4);
 }
 
 static inline uint16_t
 pxx_read_2(struct atse_softc *sc, bus_addr_t bmcr, uint32_t reg, const char *f,
     const int l, const char *s)
 {
 	uint32_t val4;
 	uint16_t val;
 
 	val4 = bus_read_4(sc->atse_mem_res, (bmcr + reg) * 4);
 	val = le32toh(val4) & 0x0000ffff;
 	DPRINTF("[%s:%d] %s R %s 0x%08x (0x%08jx) = 0x%04x\n", f, l, s,
 	    "atse_mem_res", reg, (bmcr + reg) * 4, val);
 
 	return (val);
 }
 
 #define	CSR_WRITE_4(sc, reg, val)	\
 	csr_write_4((sc), (reg), (val), __func__, __LINE__)
 #define	CSR_READ_4(sc, reg)		\
 	csr_read_4((sc), (reg), __func__, __LINE__)
 #define	PCS_WRITE_2(sc, reg, val)	\
 	pxx_write_2((sc), sc->atse_bmcr0, (reg), (val), __func__, __LINE__, \
 	    "PCS")
 #define	PCS_READ_2(sc, reg)		\
 	pxx_read_2((sc), sc->atse_bmcr0, (reg), __func__, __LINE__, "PCS")
 #define	PHY_WRITE_2(sc, reg, val)	\
 	pxx_write_2((sc), sc->atse_bmcr1, (reg), (val), __func__, __LINE__, \
 	    "PHY")
 #define	PHY_READ_2(sc, reg)		\
 	pxx_read_2((sc), sc->atse_bmcr1, (reg), __func__, __LINE__, "PHY")
 
 static void atse_tick(void *);
 static int atse_detach(device_t);
 
 devclass_t atse_devclass;
 
 static int
 atse_rx_enqueue(struct atse_softc *sc, uint32_t n)
 {
 	struct mbuf *m;
 	int i;
 
 	for (i = 0; i < n; i++) {
 		m = m_getcl(M_NOWAIT, MT_DATA, M_PKTHDR);
 		if (m == NULL) {
 			device_printf(sc->dev,
 			    "%s: Can't alloc rx mbuf\n", __func__);
 			return (-1);
 		}
 
 		m->m_pkthdr.len = m->m_len = m->m_ext.ext_size;
 		xdma_enqueue_mbuf(sc->xchan_rx, &m, 0, 4, 4, XDMA_DEV_TO_MEM);
 	}
 
 	return (0);
 }
 
 static int
 atse_xdma_tx_intr(void *arg, xdma_transfer_status_t *status)
 {
 	xdma_transfer_status_t st;
 	struct atse_softc *sc;
 	struct ifnet *ifp;
 	struct mbuf *m;
 	int err;
 
 	sc = arg;
 
 	ATSE_LOCK(sc);
 
 	ifp = sc->atse_ifp;
 
 	for (;;) {
 		err = xdma_dequeue_mbuf(sc->xchan_tx, &m, &st);
 		if (err != 0) {
 			break;
 		}
 
 		if (st.error != 0) {
 			if_inc_counter(ifp, IFCOUNTER_OERRORS, 1);
 		}
 
 		m_freem(m);
 		sc->txcount--;
 	}
 
 	ifp->if_drv_flags &= ~IFF_DRV_OACTIVE;
 
 	ATSE_UNLOCK(sc);
 
 	return (0);
 }
 
 static int
 atse_xdma_rx_intr(void *arg, xdma_transfer_status_t *status)
 {
 	xdma_transfer_status_t st;
 	struct atse_softc *sc;
 	struct ifnet *ifp;
 	struct mbuf *m;
 	int err;
 	uint32_t cnt_processed;
 
 	sc = arg;
 
 	ATSE_LOCK(sc);
 
 	ifp = sc->atse_ifp;
 
 	cnt_processed = 0;
 	for (;;) {
 		err = xdma_dequeue_mbuf(sc->xchan_rx, &m, &st);
 		if (err != 0) {
 			break;
 		}
 		cnt_processed++;
 
 		if (st.error != 0) {
 			if_inc_counter(ifp, IFCOUNTER_IERRORS, 1);
 			m_freem(m);
 			continue;
 		}
 
 		m->m_pkthdr.len = m->m_len = st.transferred;
 		m->m_pkthdr.rcvif = ifp;
 		m_adj(m, ETHER_ALIGN);
 		ATSE_UNLOCK(sc);
 		(*ifp->if_input)(ifp, m);
 		ATSE_LOCK(sc);
 	}
 
 	atse_rx_enqueue(sc, cnt_processed);
 
 	ATSE_UNLOCK(sc);
 
 	return (0);
 }
 
 static int
 atse_transmit_locked(struct ifnet *ifp)
 {
 	struct atse_softc *sc;
 	struct mbuf *m;
 	struct buf_ring *br;
 	int error;
 	int enq;
 
 	sc = ifp->if_softc;
 	br = sc->br;
 
 	enq = 0;
 
 	while ((m = drbr_peek(ifp, br)) != NULL) {
 		error = xdma_enqueue_mbuf(sc->xchan_tx, &m, 0, 4, 4, XDMA_MEM_TO_DEV);
 		if (error != 0) {
 			/* No space in request queue available yet. */
 			drbr_putback(ifp, br, m);
 			break;
 		}
 
 		drbr_advance(ifp, br);
 
 		sc->txcount++;
 		enq++;
 
 		/* If anyone is interested give them a copy. */
 		ETHER_BPF_MTAP(ifp, m);
         }
 
 	if (enq > 0)
 		xdma_queue_submit(sc->xchan_tx);
 
 	return (0);
 }
 
 static int
 atse_transmit(struct ifnet *ifp, struct mbuf *m)
 {
 	struct atse_softc *sc;
 	struct buf_ring *br;
 	int error;
 
 	sc = ifp->if_softc;
 	br = sc->br;
 
 	ATSE_LOCK(sc);
 
 	mtx_lock(&sc->br_mtx);
 
 	if ((ifp->if_drv_flags & (IFF_DRV_RUNNING | IFF_DRV_OACTIVE)) != IFF_DRV_RUNNING) {
 		error = drbr_enqueue(ifp, sc->br, m);
 		mtx_unlock(&sc->br_mtx);
 		ATSE_UNLOCK(sc);
 		return (error);
 	}
 
 	if ((sc->atse_flags & ATSE_FLAGS_LINK) == 0) {
 		error = drbr_enqueue(ifp, sc->br, m);
 		mtx_unlock(&sc->br_mtx);
 		ATSE_UNLOCK(sc);
 		return (error);
 	}
 
 	error = drbr_enqueue(ifp, br, m);
 	if (error) {
 		mtx_unlock(&sc->br_mtx);
 		ATSE_UNLOCK(sc);
 		return (error);
 	}
 	error = atse_transmit_locked(ifp);
 
 	mtx_unlock(&sc->br_mtx);
 	ATSE_UNLOCK(sc);
 
 	return (error);
 }
 
 static void
 atse_qflush(struct ifnet *ifp)
 {
 	struct atse_softc *sc;
 
 	sc = ifp->if_softc;
 
 	printf("%s\n", __func__);
 }
 
 static int
 atse_stop_locked(struct atse_softc *sc)
 {
 	uint32_t mask, val4;
 	struct ifnet *ifp;
 	int i;
 
 	ATSE_LOCK_ASSERT(sc);
 
 	callout_stop(&sc->atse_tick);
 
 	ifp = sc->atse_ifp;
 	ifp->if_drv_flags &= ~(IFF_DRV_RUNNING | IFF_DRV_OACTIVE);
 
 	/* Disable MAC transmit and receive datapath. */
 	mask = BASE_CFG_COMMAND_CONFIG_TX_ENA|BASE_CFG_COMMAND_CONFIG_RX_ENA;
 	val4 = CSR_READ_4(sc, BASE_CFG_COMMAND_CONFIG);
 	val4 &= ~mask;
 	CSR_WRITE_4(sc, BASE_CFG_COMMAND_CONFIG, val4);
 
 	/* Wait for bits to be cleared; i=100 is excessive. */
 	for (i = 0; i < 100; i++) {
 		val4 = CSR_READ_4(sc, BASE_CFG_COMMAND_CONFIG);
 		if ((val4 & mask) == 0) {
 			break;
 		}
 		DELAY(10);
 	}
 
 	if ((val4 & mask) != 0) {
 		device_printf(sc->atse_dev, "Disabling MAC TX/RX timed out.\n");
 		/* Punt. */
 	}
 
 	sc->atse_flags &= ~ATSE_FLAGS_LINK;
 
 	return (0);
 }
 
 static uint8_t
 atse_mchash(struct atse_softc *sc __unused, const uint8_t *addr)
 {
 	uint8_t x, y;
 	int i, j;
 
 	x = 0;
 	for (i = 0; i < ETHER_ADDR_LEN; i++) {
 		y = addr[i] & 0x01;
 		for (j = 1; j < 8; j++)
 			y ^= (addr[i] >> j) & 0x01;
 		x |= (y << i);
 	}
 
 	return (x);
 }
 
 static int
 atse_rxfilter_locked(struct atse_softc *sc)
 {
 	struct ifmultiaddr *ifma;
 	struct ifnet *ifp;
 	uint32_t val4;
 	int i;
 
 	/* XXX-BZ can we find out if we have the MHASH synthesized? */
 	val4 = CSR_READ_4(sc, BASE_CFG_COMMAND_CONFIG);
 	/* For simplicity always hash full 48 bits of addresses. */
 	if ((val4 & BASE_CFG_COMMAND_CONFIG_MHASH_SEL) != 0)
 		val4 &= ~BASE_CFG_COMMAND_CONFIG_MHASH_SEL;
 
 	ifp = sc->atse_ifp;
 	if (ifp->if_flags & IFF_PROMISC) {
 		val4 |= BASE_CFG_COMMAND_CONFIG_PROMIS_EN;
 	} else {
 		val4 &= ~BASE_CFG_COMMAND_CONFIG_PROMIS_EN;
 	}
 
 	CSR_WRITE_4(sc, BASE_CFG_COMMAND_CONFIG, val4);
 
 	if (ifp->if_flags & IFF_ALLMULTI) {
 		/* Accept all multicast addresses. */
 		for (i = 0; i <= MHASH_LEN; i++)
 			CSR_WRITE_4(sc, MHASH_START + i, 0x1);
 	} else {
 		/*
 		 * Can hold MHASH_LEN entries.
 		 * XXX-BZ bitstring.h would be more general.
 		 */
 		uint64_t h;
 
 		h = 0;
 		/*
 		 * Re-build and re-program hash table.  First build the
 		 * bit-field "yes" or "no" for each slot per address, then
 		 * do all the programming afterwards.
 		 */
 		if_maddr_rlock(ifp);
 		CK_STAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) {
 			if (ifma->ifma_addr->sa_family != AF_LINK) {
 				continue;
 			}
 
 			h |= (1 << atse_mchash(sc,
 			    LLADDR((struct sockaddr_dl *)ifma->ifma_addr)));
 		}
 		if_maddr_runlock(ifp);
 		for (i = 0; i <= MHASH_LEN; i++) {
 			CSR_WRITE_4(sc, MHASH_START + i,
 			    (h & (1 << i)) ? 0x01 : 0x00);
 		}
 	}
 
 	return (0);
 }
 
 static int
 atse_ethernet_option_bits_read_fdt(device_t dev)
 {
 	struct resource *res;
 	device_t fdev;
 	int i, rid;
 
 	if (atse_ethernet_option_bits_flag & ATSE_ETHERNET_OPTION_BITS_READ) {
 		return (0);
 	}
 
 	fdev = device_find_child(device_get_parent(dev), "cfi", 0);
 	if (fdev == NULL) {
 		return (ENOENT);
 	}
 
 	rid = 0;
 	res = bus_alloc_resource_any(fdev, SYS_RES_MEMORY, &rid,
 	    RF_ACTIVE | RF_SHAREABLE);
 	if (res == NULL) {
 		return (ENXIO);
 	}
 
 	for (i = 0; i < ALTERA_ETHERNET_OPTION_BITS_LEN; i++) {
 		atse_ethernet_option_bits[i] = bus_read_1(res,
 		    ALTERA_ETHERNET_OPTION_BITS_OFF + i);
 	}
 
 	bus_release_resource(fdev, SYS_RES_MEMORY, rid, res);
 	atse_ethernet_option_bits_flag |= ATSE_ETHERNET_OPTION_BITS_READ;
 
 	return (0);
 }
 
 static int
 atse_ethernet_option_bits_read(device_t dev)
 {
 	int error;
 
 	error = atse_ethernet_option_bits_read_fdt(dev);
 	if (error == 0)
 		return (0);
 
 	device_printf(dev, "Cannot read Ethernet addresses from flash.\n");
 
 	return (error);
 }
 
 static int
 atse_get_eth_address(struct atse_softc *sc)
 {
 	unsigned long hostid;
 	uint32_t val4;
 	int unit;
 
 	/*
 	 * Make sure to only ever do this once.  Otherwise a reset would
 	 * possibly change our ethernet address, which is not good at all.
 	 */
 	if (sc->atse_eth_addr[0] != 0x00 || sc->atse_eth_addr[1] != 0x00 ||
 	    sc->atse_eth_addr[2] != 0x00) {
 		return (0);
 	}
 
 	if ((atse_ethernet_option_bits_flag &
 	    ATSE_ETHERNET_OPTION_BITS_READ) == 0) {
 		goto get_random;
 	}
 
 	val4 = atse_ethernet_option_bits[0] << 24;
 	val4 |= atse_ethernet_option_bits[1] << 16;
 	val4 |= atse_ethernet_option_bits[2] << 8;
 	val4 |= atse_ethernet_option_bits[3];
 	/* They chose "safe". */
 	if (val4 != le32toh(0x00005afe)) {
 		device_printf(sc->atse_dev, "Magic '5afe' is not safe: 0x%08x. "
 		    "Falling back to random numbers for hardware address.\n",
 		     val4);
 		goto get_random;
 	}
 
 	sc->atse_eth_addr[0] = atse_ethernet_option_bits[4];
 	sc->atse_eth_addr[1] = atse_ethernet_option_bits[5];
 	sc->atse_eth_addr[2] = atse_ethernet_option_bits[6];
 	sc->atse_eth_addr[3] = atse_ethernet_option_bits[7];
 	sc->atse_eth_addr[4] = atse_ethernet_option_bits[8];
 	sc->atse_eth_addr[5] = atse_ethernet_option_bits[9];
 
 	/* Handle factory default ethernet addresss: 00:07:ed:ff:ed:15 */
 	if (sc->atse_eth_addr[0] == 0x00 && sc->atse_eth_addr[1] == 0x07 &&
 	    sc->atse_eth_addr[2] == 0xed && sc->atse_eth_addr[3] == 0xff &&
 	    sc->atse_eth_addr[4] == 0xed && sc->atse_eth_addr[5] == 0x15) {
 
 		device_printf(sc->atse_dev, "Factory programmed Ethernet "
 		    "hardware address blacklisted.  Falling back to random "
 		    "address to avoid collisions.\n");
 		device_printf(sc->atse_dev, "Please re-program your flash.\n");
 		goto get_random;
 	}
 
 	if (sc->atse_eth_addr[0] == 0x00 && sc->atse_eth_addr[1] == 0x00 &&
 	    sc->atse_eth_addr[2] == 0x00 && sc->atse_eth_addr[3] == 0x00 &&
 	    sc->atse_eth_addr[4] == 0x00 && sc->atse_eth_addr[5] == 0x00) {
 		device_printf(sc->atse_dev, "All zero's Ethernet hardware "
 		    "address blacklisted.  Falling back to random address.\n");
 		device_printf(sc->atse_dev, "Please re-program your flash.\n");
 		goto get_random;
 	}
 
 	if (ETHER_IS_MULTICAST(sc->atse_eth_addr)) {
 		device_printf(sc->atse_dev, "Multicast Ethernet hardware "
 		    "address blacklisted.  Falling back to random address.\n");
 		device_printf(sc->atse_dev, "Please re-program your flash.\n");
 		goto get_random;
 	}
 
 	/*
 	 * If we find an Altera prefixed address with a 0x0 ending
 	 * adjust by device unit.  If not and this is not the first
 	 * Ethernet, go to random.
 	 */
 	unit = device_get_unit(sc->atse_dev);
 	if (unit == 0x00) {
 		return (0);
 	}
 
 	if (unit > 0x0f) {
 		device_printf(sc->atse_dev, "We do not support Ethernet "
 		    "addresses for more than 16 MACs. Falling back to "
 		    "random hadware address.\n");
 		goto get_random;
 	}
 	if ((sc->atse_eth_addr[0] & ~0x2) != 0 ||
 	    sc->atse_eth_addr[1] != 0x07 || sc->atse_eth_addr[2] != 0xed ||
 	    (sc->atse_eth_addr[5] & 0x0f) != 0x0) {
 		device_printf(sc->atse_dev, "Ethernet address not meeting our "
 		    "multi-MAC standards. Falling back to random hadware "
 		    "address.\n");
 		goto get_random;
 	}
 	sc->atse_eth_addr[5] |= (unit & 0x0f);
 
 	return (0);
 
 get_random:
 	/*
 	 * Fall back to random code we also use on bridge(4).
 	 */
 	getcredhostid(curthread->td_ucred, &hostid);
 	if (hostid == 0) {
 		arc4rand(sc->atse_eth_addr, ETHER_ADDR_LEN, 1);
 		sc->atse_eth_addr[0] &= ~1;/* clear multicast bit */
 		sc->atse_eth_addr[0] |= 2; /* set the LAA bit */
 	} else {
 		sc->atse_eth_addr[0] = 0x2;
 		sc->atse_eth_addr[1] = (hostid >> 24)	& 0xff;
 		sc->atse_eth_addr[2] = (hostid >> 16)	& 0xff;
 		sc->atse_eth_addr[3] = (hostid >> 8 )	& 0xff;
 		sc->atse_eth_addr[4] = hostid		& 0xff;
 		sc->atse_eth_addr[5] = sc->atse_unit	& 0xff;
 	}
 
 	return (0);
 }
 
 static int
 atse_set_eth_address(struct atse_softc *sc, int n)
 {
 	uint32_t v0, v1;
 
 	v0 = (sc->atse_eth_addr[3] << 24) | (sc->atse_eth_addr[2] << 16) |
 	    (sc->atse_eth_addr[1] << 8) | sc->atse_eth_addr[0];
 	v1 = (sc->atse_eth_addr[5] << 8) | sc->atse_eth_addr[4];
 
 	if (n & ATSE_ETH_ADDR_DEF) {
 		CSR_WRITE_4(sc, BASE_CFG_MAC_0, v0);
 		CSR_WRITE_4(sc, BASE_CFG_MAC_1, v1);
 	}
 	if (n & ATSE_ETH_ADDR_SUPP1) {
 		CSR_WRITE_4(sc, SUPPL_ADDR_SMAC_0_0, v0);
 		CSR_WRITE_4(sc, SUPPL_ADDR_SMAC_0_1, v1);
 	}
 	if (n & ATSE_ETH_ADDR_SUPP2) {
 		CSR_WRITE_4(sc, SUPPL_ADDR_SMAC_1_0, v0);
 		CSR_WRITE_4(sc, SUPPL_ADDR_SMAC_1_1, v1);
 	}
 	if (n & ATSE_ETH_ADDR_SUPP3) {
 		CSR_WRITE_4(sc, SUPPL_ADDR_SMAC_2_0, v0);
 		CSR_WRITE_4(sc, SUPPL_ADDR_SMAC_2_1, v1);
 	}
 	if (n & ATSE_ETH_ADDR_SUPP4) {
 		CSR_WRITE_4(sc, SUPPL_ADDR_SMAC_3_0, v0);
 		CSR_WRITE_4(sc, SUPPL_ADDR_SMAC_3_1, v1);
 	}
 
 	return (0);
 }
 
 static int
 atse_reset(struct atse_softc *sc)
 {
 	uint32_t val4, mask;
 	uint16_t val;
 	int i;
 
 	/* 1. External PHY Initialization using MDIO. */
 	/*
 	 * We select the right MDIO space in atse_attach() and let MII do
 	 * anything else.
 	 */
 
 	/* 2. PCS Configuration Register Initialization. */
 	/* a. Set auto negotiation link timer to 1.6ms for SGMII. */
 	PCS_WRITE_2(sc, PCS_EXT_LINK_TIMER_0, 0x0D40);
 	PCS_WRITE_2(sc, PCS_EXT_LINK_TIMER_1, 0x0003);
 
 	/* b. Configure SGMII. */
 	val = PCS_EXT_IF_MODE_SGMII_ENA|PCS_EXT_IF_MODE_USE_SGMII_AN;
 	PCS_WRITE_2(sc, PCS_EXT_IF_MODE, val);
 
 	/* c. Enable auto negotiation. */
 	/* Ignore Bits 6,8,13; should be set,set,unset. */
 	val = PCS_READ_2(sc, PCS_CONTROL);
 	val &= ~(PCS_CONTROL_ISOLATE|PCS_CONTROL_POWERDOWN);
 	val &= ~PCS_CONTROL_LOOPBACK;		/* Make this a -link1 option? */
 	val |= PCS_CONTROL_AUTO_NEGOTIATION_ENABLE;
 	PCS_WRITE_2(sc, PCS_CONTROL, val);
 
 	/* d. PCS reset. */
 	val = PCS_READ_2(sc, PCS_CONTROL);
 	val |= PCS_CONTROL_RESET;
 	PCS_WRITE_2(sc, PCS_CONTROL, val);
 
 	/* Wait for reset bit to clear; i=100 is excessive. */
 	for (i = 0; i < 100; i++) {
 		val = PCS_READ_2(sc, PCS_CONTROL);
 		if ((val & PCS_CONTROL_RESET) == 0) {
 			break;
 		}
 		DELAY(10);
 	}
 
 	if ((val & PCS_CONTROL_RESET) != 0) {
 		device_printf(sc->atse_dev, "PCS reset timed out.\n");
 		return (ENXIO);
 	}
 
 	/* 3. MAC Configuration Register Initialization. */
 	/* a. Disable MAC transmit and receive datapath. */
 	mask = BASE_CFG_COMMAND_CONFIG_TX_ENA|BASE_CFG_COMMAND_CONFIG_RX_ENA;
 	val4 = CSR_READ_4(sc, BASE_CFG_COMMAND_CONFIG);
 	val4 &= ~mask;
 	/* Samples in the manual do have the SW_RESET bit set here, why? */
 	CSR_WRITE_4(sc, BASE_CFG_COMMAND_CONFIG, val4);
 	/* Wait for bits to be cleared; i=100 is excessive. */
 	for (i = 0; i < 100; i++) {
 		val4 = CSR_READ_4(sc, BASE_CFG_COMMAND_CONFIG);
 		if ((val4 & mask) == 0) {
 			break;
 		}
 		DELAY(10);
 	}
 	if ((val4 & mask) != 0) {
 		device_printf(sc->atse_dev, "Disabling MAC TX/RX timed out.\n");
 		return (ENXIO);
 	}
 	/* b. MAC FIFO configuration. */
 	CSR_WRITE_4(sc, BASE_CFG_TX_SECTION_EMPTY, FIFO_DEPTH_TX - 16);
 	CSR_WRITE_4(sc, BASE_CFG_TX_ALMOST_FULL, 3);
 	CSR_WRITE_4(sc, BASE_CFG_TX_ALMOST_EMPTY, 8);
 	CSR_WRITE_4(sc, BASE_CFG_RX_SECTION_EMPTY, FIFO_DEPTH_RX - 16);
 	CSR_WRITE_4(sc, BASE_CFG_RX_ALMOST_FULL, 8);
 	CSR_WRITE_4(sc, BASE_CFG_RX_ALMOST_EMPTY, 8);
 #if 0
 	CSR_WRITE_4(sc, BASE_CFG_TX_SECTION_FULL, 16);
 	CSR_WRITE_4(sc, BASE_CFG_RX_SECTION_FULL, 16);
 #else
 	/* For store-and-forward mode, set this threshold to 0. */
 	CSR_WRITE_4(sc, BASE_CFG_TX_SECTION_FULL, 0);
 	CSR_WRITE_4(sc, BASE_CFG_RX_SECTION_FULL, 0);
 #endif
 	/* c. MAC address configuration. */
 	/* Also intialize supplementary addresses to our primary one. */
 	/* XXX-BZ FreeBSD really needs to grow and API for using these. */
 	atse_get_eth_address(sc);
 	atse_set_eth_address(sc, ATSE_ETH_ADDR_ALL);
 
 	/* d. MAC function configuration. */
 	CSR_WRITE_4(sc, BASE_CFG_FRM_LENGTH, 1518);	/* Default. */
 	CSR_WRITE_4(sc, BASE_CFG_TX_IPG_LENGTH, 12);
 	CSR_WRITE_4(sc, BASE_CFG_PAUSE_QUANT, 0xFFFF);
 
 	val4 = CSR_READ_4(sc, BASE_CFG_COMMAND_CONFIG);
 	/*
 	 * If 1000BASE-X/SGMII PCS is initialized, set the ETH_SPEED (bit 3)
 	 * and ENA_10 (bit 25) in command_config register to 0.  If half duplex
 	 * is reported in the PHY/PCS status register, set the HD_ENA (bit 10)
 	 * to 1 in command_config register.
 	 * BZ: We shoot for 1000 instead.
 	 */
 #if 0
 	val4 |= BASE_CFG_COMMAND_CONFIG_ETH_SPEED;
 #else
 	val4 &= ~BASE_CFG_COMMAND_CONFIG_ETH_SPEED;
 #endif
 	val4 &= ~BASE_CFG_COMMAND_CONFIG_ENA_10;
 #if 0
 	/*
 	 * We do not want to set this, otherwise, we could not even send
 	 * random raw ethernet frames for various other research.  By default
 	 * FreeBSD will use the right ether source address.
 	 */
 	val4 |= BASE_CFG_COMMAND_CONFIG_TX_ADDR_INS;
 #endif
 	val4 |= BASE_CFG_COMMAND_CONFIG_PAD_EN;
 	val4 &= ~BASE_CFG_COMMAND_CONFIG_CRC_FWD;
 #if 0
 	val4 |= BASE_CFG_COMMAND_CONFIG_CNTL_FRM_ENA;
 #endif
 #if 1
 	val4 |= BASE_CFG_COMMAND_CONFIG_RX_ERR_DISC;
 #endif
 	val &= ~BASE_CFG_COMMAND_CONFIG_LOOP_ENA;		/* link0? */
 	CSR_WRITE_4(sc, BASE_CFG_COMMAND_CONFIG, val4);
 
 	/*
 	 * Make sure we do not enable 32bit alignment;  FreeBSD cannot
 	 * cope with the additional padding (though we should!?).
 	 * Also make sure we get the CRC appended.
 	 */
 	val4 = CSR_READ_4(sc, TX_CMD_STAT);
 	val4 &= ~(TX_CMD_STAT_OMIT_CRC|TX_CMD_STAT_TX_SHIFT16);
 	CSR_WRITE_4(sc, TX_CMD_STAT, val4);
 
 	val4 = CSR_READ_4(sc, RX_CMD_STAT);
 	val4 &= ~RX_CMD_STAT_RX_SHIFT16;
 	val4 |= RX_CMD_STAT_RX_SHIFT16;
 	CSR_WRITE_4(sc, RX_CMD_STAT, val4);
 
 	/* e. Reset MAC. */
 	val4 = CSR_READ_4(sc, BASE_CFG_COMMAND_CONFIG);
 	val4 |= BASE_CFG_COMMAND_CONFIG_SW_RESET;
 	CSR_WRITE_4(sc, BASE_CFG_COMMAND_CONFIG, val4);
 	/* Wait for bits to be cleared; i=100 is excessive. */
 	for (i = 0; i < 100; i++) {
 		val4 = CSR_READ_4(sc, BASE_CFG_COMMAND_CONFIG);
 		if ((val4 & BASE_CFG_COMMAND_CONFIG_SW_RESET) == 0) {
 			break;
 		}
 		DELAY(10);
 	}
 	if ((val4 & BASE_CFG_COMMAND_CONFIG_SW_RESET) != 0) {
 		device_printf(sc->atse_dev, "MAC reset timed out.\n");
 		return (ENXIO);
 	}
 
 	/* f. Enable MAC transmit and receive datapath. */
 	mask = BASE_CFG_COMMAND_CONFIG_TX_ENA|BASE_CFG_COMMAND_CONFIG_RX_ENA;
 	val4 = CSR_READ_4(sc, BASE_CFG_COMMAND_CONFIG);
 	val4 |= mask;
 	CSR_WRITE_4(sc, BASE_CFG_COMMAND_CONFIG, val4);
 	/* Wait for bits to be cleared; i=100 is excessive. */
 	for (i = 0; i < 100; i++) {
 		val4 = CSR_READ_4(sc, BASE_CFG_COMMAND_CONFIG);
 		if ((val4 & mask) == mask) {
 			break;
 		}
 		DELAY(10);
 	}
 	if ((val4 & mask) != mask) {
 		device_printf(sc->atse_dev, "Enabling MAC TX/RX timed out.\n");
 		return (ENXIO);
 	}
 
 	return (0);
 }
 
 static void
 atse_init_locked(struct atse_softc *sc)
 {
 	struct ifnet *ifp;
 	struct mii_data *mii;
 	uint8_t *eaddr;
 
 	ATSE_LOCK_ASSERT(sc);
 	ifp = sc->atse_ifp;
 
 	if ((ifp->if_drv_flags & IFF_DRV_RUNNING) != 0) {
 		return;
 	}
 
 	/*
 	 * Must update the ether address if changed.  Given we do not handle
 	 * in atse_ioctl() but it's in the general framework, just always
 	 * do it here before atse_reset().
 	 */
 	eaddr = IF_LLADDR(sc->atse_ifp);
 	bcopy(eaddr, &sc->atse_eth_addr, ETHER_ADDR_LEN);
 
 	/* Make things frind to halt, cleanup, ... */
 	atse_stop_locked(sc);
 
 	atse_reset(sc);
 
 	/* ... and fire up the engine again. */
 	atse_rxfilter_locked(sc);
 
 	sc->atse_flags &= ATSE_FLAGS_LINK;	/* Preserve. */
 
 	mii = device_get_softc(sc->atse_miibus);
 
 	sc->atse_flags &= ~ATSE_FLAGS_LINK;
 	mii_mediachg(mii);
 
 	ifp->if_drv_flags |= IFF_DRV_RUNNING;
 	ifp->if_drv_flags &= ~IFF_DRV_OACTIVE;
 
 	callout_reset(&sc->atse_tick, hz, atse_tick, sc);
 }
 
 static void
 atse_init(void *xsc)
 {
 	struct atse_softc *sc;
 
 	/*
 	 * XXXRW: There is some argument that we should immediately do RX
 	 * processing after enabling interrupts, or one may not fire if there
 	 * are buffered packets.
 	 */
 	sc = (struct atse_softc *)xsc;
 	ATSE_LOCK(sc);
 	atse_init_locked(sc);
 	ATSE_UNLOCK(sc);
 }
 
 static int
 atse_ioctl(struct ifnet *ifp, u_long command, caddr_t data)
 {
 	struct atse_softc *sc;
 	struct ifreq *ifr;
 	int error, mask;
 
 	error = 0;
 	sc = ifp->if_softc;
 	ifr = (struct ifreq *)data;
 
 	switch (command) {
 	case SIOCSIFFLAGS:
 		ATSE_LOCK(sc);
 		if (ifp->if_flags & IFF_UP) {
 			if ((ifp->if_drv_flags & IFF_DRV_RUNNING) != 0 &&
 			    ((ifp->if_flags ^ sc->atse_if_flags) &
 			    (IFF_PROMISC | IFF_ALLMULTI)) != 0)
 				atse_rxfilter_locked(sc);
 			else
 				atse_init_locked(sc);
 		} else if (ifp->if_drv_flags & IFF_DRV_RUNNING)
 			atse_stop_locked(sc);
 		sc->atse_if_flags = ifp->if_flags;
 		ATSE_UNLOCK(sc);
 		break;
 	case SIOCSIFCAP:
 		ATSE_LOCK(sc);
 		mask = ifr->ifr_reqcap ^ ifp->if_capenable;
 		ATSE_UNLOCK(sc);
 		break;
 	case SIOCADDMULTI:
 	case SIOCDELMULTI:
 		ATSE_LOCK(sc);
 		atse_rxfilter_locked(sc);
 		ATSE_UNLOCK(sc);
 		break;
 	case SIOCGIFMEDIA:
 	case SIOCSIFMEDIA:
 	{
 		struct mii_data *mii;
 		struct ifreq *ifr;
 
 		mii = device_get_softc(sc->atse_miibus);
 		ifr = (struct ifreq *)data;
 		error = ifmedia_ioctl(ifp, ifr, &mii->mii_media, command);
 		break;
 	}
 	default:
 		error = ether_ioctl(ifp, command, data);
 		break;
 	}
 
 	return (error);
 }
 
 static void
 atse_tick(void *xsc)
 {
 	struct atse_softc *sc;
 	struct mii_data *mii;
 	struct ifnet *ifp;
 
 	sc = (struct atse_softc *)xsc;
 	ATSE_LOCK_ASSERT(sc);
 	ifp = sc->atse_ifp;
 
 	mii = device_get_softc(sc->atse_miibus);
 	mii_tick(mii);
 	if ((sc->atse_flags & ATSE_FLAGS_LINK) == 0) {
 		atse_miibus_statchg(sc->atse_dev);
 	}
 
 	callout_reset(&sc->atse_tick, hz, atse_tick, sc);
 }
 
 /*
  * Set media options.
  */
 static int
 atse_ifmedia_upd(struct ifnet *ifp)
 {
 	struct atse_softc *sc;
 	struct mii_data *mii;
 	struct mii_softc *miisc;
 	int error;
 
 	sc = ifp->if_softc;
 
 	ATSE_LOCK(sc);
 	mii = device_get_softc(sc->atse_miibus);
 	LIST_FOREACH(miisc, &mii->mii_phys, mii_list) {
 		PHY_RESET(miisc);
 	}
 	error = mii_mediachg(mii);
 	ATSE_UNLOCK(sc);
 
 	return (error);
 }
 
 /*
  * Report current media status.
  */
 static void
 atse_ifmedia_sts(struct ifnet *ifp, struct ifmediareq *ifmr)
 {
 	struct atse_softc *sc;
 	struct mii_data *mii;
 
 	sc = ifp->if_softc;
 
 	ATSE_LOCK(sc);
 	mii = device_get_softc(sc->atse_miibus);
 	mii_pollstat(mii);
 	ifmr->ifm_active = mii->mii_media_active;
 	ifmr->ifm_status = mii->mii_media_status;
 	ATSE_UNLOCK(sc);
 }
 
 static struct atse_mac_stats_regs {
 	const char *name;
 	const char *descr;	/* Mostly copied from Altera datasheet. */
 } atse_mac_stats_regs[] = {
 	[0x1a] =
 	{ "aFramesTransmittedOK",
 	    "The number of frames that are successfully transmitted including "
 	    "the pause frames." },
 	{ "aFramesReceivedOK",
 	    "The number of frames that are successfully received including the "
 	    "pause frames." },
 	{ "aFrameCheckSequenceErrors",
 	    "The number of receive frames with CRC error." },
 	{ "aAlignmentErrors",
 	    "The number of receive frames with alignment error." },
 	{ "aOctetsTransmittedOK",
 	    "The lower 32 bits of the number of data and padding octets that "
 	    "are successfully transmitted." },
 	{ "aOctetsReceivedOK",
 	    "The lower 32 bits of the number of data and padding octets that "
 	    " are successfully received." },
 	{ "aTxPAUSEMACCtrlFrames",
 	    "The number of pause frames transmitted." },
 	{ "aRxPAUSEMACCtrlFrames",
 	    "The number received pause frames received." },
 	{ "ifInErrors",
 	    "The number of errored frames received." },
 	{ "ifOutErrors",
 	    "The number of transmit frames with either a FIFO overflow error, "
 	    "a FIFO underflow error, or a error defined by the user "
 	    "application." },
 	{ "ifInUcastPkts",
 	    "The number of valid unicast frames received." },
 	{ "ifInMulticastPkts",
 	    "The number of valid multicast frames received. The count does "
 	    "not include pause frames." },
 	{ "ifInBroadcastPkts",
 	    "The number of valid broadcast frames received." },
 	{ "ifOutDiscards",
 	    "This statistics counter is not in use.  The MAC function does not "
 	    "discard frames that are written to the FIFO buffer by the user "
 	    "application." },
 	{ "ifOutUcastPkts",
 	    "The number of valid unicast frames transmitted." },
 	{ "ifOutMulticastPkts",
 	    "The number of valid multicast frames transmitted, excluding pause "
 	    "frames." },
 	{ "ifOutBroadcastPkts",
 	    "The number of valid broadcast frames transmitted." },
 	{ "etherStatsDropEvents",
 	    "The number of frames that are dropped due to MAC internal errors "
 	    "when FIFO buffer overflow persists." },
 	{ "etherStatsOctets",
 	    "The lower 32 bits of the total number of octets received. This "
 	    "count includes both good and errored frames." },
 	{ "etherStatsPkts",
 	    "The total number of good and errored frames received." },
 	{ "etherStatsUndersizePkts",
 	    "The number of frames received with length less than 64 bytes. "
 	    "This count does not include errored frames." },
 	{ "etherStatsOversizePkts",
 	    "The number of frames received that are longer than the value "
 	    "configured in the frm_length register. This count does not "
 	    "include errored frames." },
 	{ "etherStatsPkts64Octets",
 	    "The number of 64-byte frames received. This count includes good "
 	    "and errored frames." },
 	{ "etherStatsPkts65to127Octets",
 	    "The number of received good and errored frames between the length "
 	    "of 65 and 127 bytes." },
 	{ "etherStatsPkts128to255Octets",
 	    "The number of received good and errored frames between the length "
 	    "of 128 and 255 bytes." },
 	{ "etherStatsPkts256to511Octets",
 	    "The number of received good and errored frames between the length "
 	    "of 256 and 511 bytes." },
 	{ "etherStatsPkts512to1023Octets",
 	    "The number of received good and errored frames between the length "
 	    "of 512 and 1023 bytes." },
 	{ "etherStatsPkts1024to1518Octets",
 	    "The number of received good and errored frames between the length "
 	    "of 1024 and 1518 bytes." },
 	{ "etherStatsPkts1519toXOctets",
 	    "The number of received good and errored frames between the length "
 	    "of 1519 and the maximum frame length configured in the frm_length "
 	    "register." },
 	{ "etherStatsJabbers",
 	    "Too long frames with CRC error." },
 	{ "etherStatsFragments",
 	    "Too short frames with CRC error." },
 	/* 0x39 unused, 0x3a/b non-stats. */
 	[0x3c] =
 	/* Extended Statistics Counters */
 	{ "msb_aOctetsTransmittedOK",
 	    "Upper 32 bits of the number of data and padding octets that are "
 	    "successfully transmitted." },
 	{ "msb_aOctetsReceivedOK",
 	    "Upper 32 bits of the number of data and padding octets that are "
 	    "successfully received." },
 	{ "msb_etherStatsOctets",
 	    "Upper 32 bits of the total number of octets received. This count "
 	    "includes both good and errored frames." }
 };
 
 static int
 sysctl_atse_mac_stats_proc(SYSCTL_HANDLER_ARGS)
 {
 	struct atse_softc *sc;
 	int error, offset, s;
 
 	sc = arg1;
 	offset = arg2;
 
 	s = CSR_READ_4(sc, offset);
 	error = sysctl_handle_int(oidp, &s, 0, req);
 	if (error || !req->newptr) {
 		return (error);
 	}
 
 	return (0);
 }
 
 static struct atse_rx_err_stats_regs {
 	const char *name;
 	const char *descr;
 } atse_rx_err_stats_regs[] = {
 
 #define	ATSE_RX_ERR_FIFO_THRES_EOP	0 /* FIFO threshold reached, on EOP. */
 #define	ATSE_RX_ERR_ELEN		1 /* Frame/payload length not valid. */
 #define	ATSE_RX_ERR_CRC32		2 /* CRC-32 error. */
 #define	ATSE_RX_ERR_FIFO_THRES_TRUNC	3 /* FIFO thresh., truncated frame. */
 #define	ATSE_RX_ERR_4			4 /* ? */
 #define	ATSE_RX_ERR_5			5 /* / */
 
 	{ "rx_err_fifo_thres_eop",
 	    "FIFO threshold reached, reported on EOP." },
 	{ "rx_err_fifo_elen",
 	    "Frame or payload length not valid." },
 	{ "rx_err_fifo_crc32",
 	    "CRC-32 error." },
 	{ "rx_err_fifo_thres_trunc",
 	    "FIFO threshold reached, truncated frame" },
 	{ "rx_err_4",
 	    "?" },
 	{ "rx_err_5",
 	    "?" },
 };
 
 static int
 sysctl_atse_rx_err_stats_proc(SYSCTL_HANDLER_ARGS)
 {
 	struct atse_softc *sc;
 	int error, offset, s;
 
 	sc = arg1;
 	offset = arg2;
 
 	s = sc->atse_rx_err[offset];
 	error = sysctl_handle_int(oidp, &s, 0, req);
 	if (error || !req->newptr) {
 		return (error);
 	}
 
 	return (0);
 }
 
 static void
 atse_sysctl_stats_attach(device_t dev)
 {
 	struct sysctl_ctx_list *sctx;
 	struct sysctl_oid *soid;
 	struct atse_softc *sc;
 	int i;
 
 	sc = device_get_softc(dev);
 	sctx = device_get_sysctl_ctx(dev);
 	soid = device_get_sysctl_tree(dev);
 
 	/* MAC statistics. */
 	for (i = 0; i < nitems(atse_mac_stats_regs); i++) {
 		if (atse_mac_stats_regs[i].name == NULL ||
 		    atse_mac_stats_regs[i].descr == NULL) {
 			continue;
 		}
 
 		SYSCTL_ADD_PROC(sctx, SYSCTL_CHILDREN(soid), OID_AUTO,
 		    atse_mac_stats_regs[i].name, CTLTYPE_UINT|CTLFLAG_RD,
 		    sc, i, sysctl_atse_mac_stats_proc, "IU",
 		    atse_mac_stats_regs[i].descr);
 	}
 
 	/* rx_err[]. */
 	for (i = 0; i < ATSE_RX_ERR_MAX; i++) {
 		if (atse_rx_err_stats_regs[i].name == NULL ||
 		    atse_rx_err_stats_regs[i].descr == NULL) {
 			continue;
 		}
 
 		SYSCTL_ADD_PROC(sctx, SYSCTL_CHILDREN(soid), OID_AUTO,
 		    atse_rx_err_stats_regs[i].name, CTLTYPE_UINT|CTLFLAG_RD,
 		    sc, i, sysctl_atse_rx_err_stats_proc, "IU",
 		    atse_rx_err_stats_regs[i].descr);
 	}
 }
 
 /*
  * Generic device handling routines.
  */
 int
 atse_attach(device_t dev)
 {
 	struct atse_softc *sc;
 	struct ifnet *ifp;
 	uint32_t caps;
 	int error;
 
 	sc = device_get_softc(dev);
 	sc->dev = dev;
 
 	/* Get xDMA controller */
 	sc->xdma_tx = xdma_ofw_get(sc->dev, "tx");
 	if (sc->xdma_tx == NULL) {
 		device_printf(dev, "Can't find DMA controller.\n");
 		return (ENXIO);
 	}
 
 	/*
 	 * Only final (EOP) write can be less than "symbols per beat" value
 	 * so we have to defrag mbuf chain.
 	 * Chapter 15. On-Chip FIFO Memory Core.
 	 * Embedded Peripherals IP User Guide.
 	 */
-	caps = XCHAN_CAP_BUSDMA_NOSEG;
+	caps = XCHAN_CAP_NOSEG;
 
 	/* Alloc xDMA virtual channel. */
 	sc->xchan_tx = xdma_channel_alloc(sc->xdma_tx, caps);
 	if (sc->xchan_tx == NULL) {
 		device_printf(dev, "Can't alloc virtual DMA channel.\n");
 		return (ENXIO);
 	}
 
 	/* Setup interrupt handler. */
 	error = xdma_setup_intr(sc->xchan_tx, atse_xdma_tx_intr, sc, &sc->ih_tx);
 	if (error) {
 		device_printf(sc->dev,
 		    "Can't setup xDMA interrupt handler.\n");
 		return (ENXIO);
 	}
 
 	xdma_prep_sg(sc->xchan_tx,
 	    TX_QUEUE_SIZE,	/* xchan requests queue size */
 	    MCLBYTES,	/* maxsegsize */
 	    8,		/* maxnsegs */
 	    16,		/* alignment */
 	    0,		/* boundary */
 	    BUS_SPACE_MAXADDR_32BIT,
 	    BUS_SPACE_MAXADDR);
 
 	/* Get RX xDMA controller */
 	sc->xdma_rx = xdma_ofw_get(sc->dev, "rx");
 	if (sc->xdma_rx == NULL) {
 		device_printf(dev, "Can't find DMA controller.\n");
 		return (ENXIO);
 	}
 
 	/* Alloc xDMA virtual channel. */
 	sc->xchan_rx = xdma_channel_alloc(sc->xdma_rx, caps);
 	if (sc->xchan_rx == NULL) {
 		device_printf(dev, "Can't alloc virtual DMA channel.\n");
 		return (ENXIO);
 	}
 
 	/* Setup interrupt handler. */
 	error = xdma_setup_intr(sc->xchan_rx, atse_xdma_rx_intr, sc, &sc->ih_rx);
 	if (error) {
 		device_printf(sc->dev,
 		    "Can't setup xDMA interrupt handler.\n");
 		return (ENXIO);
 	}
 
 	xdma_prep_sg(sc->xchan_rx,
 	    RX_QUEUE_SIZE,	/* xchan requests queue size */
 	    MCLBYTES,		/* maxsegsize */
 	    1,			/* maxnsegs */
 	    16,			/* alignment */
 	    0,			/* boundary */
 	    BUS_SPACE_MAXADDR_32BIT,
 	    BUS_SPACE_MAXADDR);
 
 	mtx_init(&sc->br_mtx, "buf ring mtx", NULL, MTX_DEF);
 	sc->br = buf_ring_alloc(BUFRING_SIZE, M_DEVBUF,
 	    M_NOWAIT, &sc->br_mtx);
 	if (sc->br == NULL) {
 		return (ENOMEM);
 	}
 
 	atse_ethernet_option_bits_read(dev);
 
 	mtx_init(&sc->atse_mtx, device_get_nameunit(dev), MTX_NETWORK_LOCK,
 	    MTX_DEF);
 
 	callout_init_mtx(&sc->atse_tick, &sc->atse_mtx, 0);
 
 	/*
 	 * We are only doing single-PHY with this driver currently.  The
 	 * defaults would be right so that BASE_CFG_MDIO_ADDR0 points to the
 	 * 1st PHY address (0) apart from the fact that BMCR0 is always
 	 * the PCS mapping, so we always use BMCR1. See Table 5-1 0xA0-0xBF.
 	 */
 #if 0	/* Always PCS. */
 	sc->atse_bmcr0 = MDIO_0_START;
 	CSR_WRITE_4(sc, BASE_CFG_MDIO_ADDR0, 0x00);
 #endif
 	/* Always use matching PHY for atse[0..]. */
 	sc->atse_phy_addr = device_get_unit(dev);
 	sc->atse_bmcr1 = MDIO_1_START;
 	CSR_WRITE_4(sc, BASE_CFG_MDIO_ADDR1, sc->atse_phy_addr);
 
 	/* Reset the adapter. */
 	atse_reset(sc);
 
 	/* Setup interface. */
 	ifp = sc->atse_ifp = if_alloc(IFT_ETHER);
 	if (ifp == NULL) {
 		device_printf(dev, "if_alloc() failed\n");
 		error = ENOSPC;
 		goto err;
 	}
 	ifp->if_softc = sc;
 	if_initname(ifp, device_get_name(dev), device_get_unit(dev));
 	ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST;
 	ifp->if_ioctl = atse_ioctl;
 	ifp->if_transmit = atse_transmit;
 	ifp->if_qflush = atse_qflush;
 	ifp->if_init = atse_init;
 	IFQ_SET_MAXLEN(&ifp->if_snd, ATSE_TX_LIST_CNT - 1);
 	ifp->if_snd.ifq_drv_maxlen = ATSE_TX_LIST_CNT - 1;
 	IFQ_SET_READY(&ifp->if_snd);
 
 	/* MII setup. */
 	error = mii_attach(dev, &sc->atse_miibus, ifp, atse_ifmedia_upd,
 	    atse_ifmedia_sts, BMSR_DEFCAPMASK, MII_PHY_ANY, MII_OFFSET_ANY, 0);
 	if (error != 0) {
 		device_printf(dev, "attaching PHY failed: %d\n", error);
 		goto err;
 	}
 
 	/* Call media-indepedent attach routine. */
 	ether_ifattach(ifp, sc->atse_eth_addr);
 
 	/* Tell the upper layer(s) about vlan mtu support. */
 	ifp->if_hdrlen = sizeof(struct ether_vlan_header);
 	ifp->if_capabilities |= IFCAP_VLAN_MTU;
 	ifp->if_capenable = ifp->if_capabilities;
 
 err:
 	if (error != 0) {
 		atse_detach(dev);
 	}
 
 	if (error == 0) {
 		atse_sysctl_stats_attach(dev);
 	}
 
 	atse_rx_enqueue(sc, NUM_RX_MBUF);
 	xdma_queue_submit(sc->xchan_rx);
 
 	return (error);
 }
 
 static int
 atse_detach(device_t dev)
 {
 	struct atse_softc *sc;
 	struct ifnet *ifp;
 
 	sc = device_get_softc(dev);
 	KASSERT(mtx_initialized(&sc->atse_mtx), ("%s: mutex not initialized",
 	    device_get_nameunit(dev)));
 	ifp = sc->atse_ifp;
 
 	/* Only cleanup if attach succeeded. */
 	if (device_is_attached(dev)) {
 		ATSE_LOCK(sc);
 		atse_stop_locked(sc);
 		ATSE_UNLOCK(sc);
 		callout_drain(&sc->atse_tick);
 		ether_ifdetach(ifp);
 	}
 	if (sc->atse_miibus != NULL) {
 		device_delete_child(dev, sc->atse_miibus);
 	}
 
 	if (ifp != NULL) {
 		if_free(ifp);
 	}
 
 	mtx_destroy(&sc->atse_mtx);
+
+	xdma_channel_free(sc->xchan_tx);
+	xdma_channel_free(sc->xchan_rx);
+	xdma_put(sc->xdma_tx);
+	xdma_put(sc->xdma_rx);
 
 	return (0);
 }
 
 /* Shared between nexus and fdt implementation. */
 void
 atse_detach_resources(device_t dev)
 {
 	struct atse_softc *sc;
 
 	sc = device_get_softc(dev);
 
 	if (sc->atse_mem_res != NULL) {
 		bus_release_resource(dev, SYS_RES_MEMORY, sc->atse_mem_rid,
 		    sc->atse_mem_res);
 		sc->atse_mem_res = NULL;
 	}
 }
 
 int
 atse_detach_dev(device_t dev)
 {
 	int error;
 
 	error = atse_detach(dev);
 	if (error) {
 		/* We are basically in undefined state now. */
 		device_printf(dev, "atse_detach() failed: %d\n", error);
 		return (error);
 	}
 
 	atse_detach_resources(dev);
 
 	return (0);
 }
 
 int
 atse_miibus_readreg(device_t dev, int phy, int reg)
 {
 	struct atse_softc *sc;
 	int val;
 
 	sc = device_get_softc(dev);
 
 	/*
 	 * We currently do not support re-mapping of MDIO space on-the-fly
 	 * but de-facto hard-code the phy#.
 	 */
 	if (phy != sc->atse_phy_addr) {
 		return (0);
 	}
 
 	val = PHY_READ_2(sc, reg);
 
 	return (val);
 }
 
 int
 atse_miibus_writereg(device_t dev, int phy, int reg, int data)
 {
 	struct atse_softc *sc;
 
 	sc = device_get_softc(dev);
 
 	/*
 	 * We currently do not support re-mapping of MDIO space on-the-fly
 	 * but de-facto hard-code the phy#.
 	 */
 	if (phy != sc->atse_phy_addr) {
 		return (0);
 	}
 
 	PHY_WRITE_2(sc, reg, data);
 	return (0);
 }
 
 void
 atse_miibus_statchg(device_t dev)
 {
 	struct atse_softc *sc;
 	struct mii_data *mii;
 	struct ifnet *ifp;
 	uint32_t val4;
 
 	sc = device_get_softc(dev);
 	ATSE_LOCK_ASSERT(sc);
 
 	mii = device_get_softc(sc->atse_miibus);
 	ifp = sc->atse_ifp;
 	if (mii == NULL || ifp == NULL ||
 	    (ifp->if_drv_flags & IFF_DRV_RUNNING) == 0) {
 		return;
 	}
 
 	val4 = CSR_READ_4(sc, BASE_CFG_COMMAND_CONFIG);
 
 	/* Assume no link. */
 	sc->atse_flags &= ~ATSE_FLAGS_LINK;
 
 	if ((mii->mii_media_status & (IFM_ACTIVE | IFM_AVALID)) ==
 	    (IFM_ACTIVE | IFM_AVALID)) {
 
 		switch (IFM_SUBTYPE(mii->mii_media_active)) {
 		case IFM_10_T:
 			val4 |= BASE_CFG_COMMAND_CONFIG_ENA_10;
 			val4 &= ~BASE_CFG_COMMAND_CONFIG_ETH_SPEED;
 			sc->atse_flags |= ATSE_FLAGS_LINK;
 			break;
 		case IFM_100_TX:
 			val4 &= ~BASE_CFG_COMMAND_CONFIG_ENA_10;
 			val4 &= ~BASE_CFG_COMMAND_CONFIG_ETH_SPEED;
 			sc->atse_flags |= ATSE_FLAGS_LINK;
 			break;
 		case IFM_1000_T:
 			val4 &= ~BASE_CFG_COMMAND_CONFIG_ENA_10;
 			val4 |= BASE_CFG_COMMAND_CONFIG_ETH_SPEED;
 			sc->atse_flags |= ATSE_FLAGS_LINK;
 			break;
 		default:
 			break;
 		}
 	}
 
 	if ((sc->atse_flags & ATSE_FLAGS_LINK) == 0) {
 		/* Need to stop the MAC? */
 		return;
 	}
 
 	if (IFM_OPTIONS(mii->mii_media_active & IFM_FDX) != 0) {
 		val4 &= ~BASE_CFG_COMMAND_CONFIG_HD_ENA;
 	} else {
 		val4 |= BASE_CFG_COMMAND_CONFIG_HD_ENA;
 	}
 
 	/* flow control? */
 
 	/* Make sure the MAC is activated. */
 	val4 |= BASE_CFG_COMMAND_CONFIG_TX_ENA;
 	val4 |= BASE_CFG_COMMAND_CONFIG_RX_ENA;
 
 	CSR_WRITE_4(sc, BASE_CFG_COMMAND_CONFIG, val4);
 }
 
 MODULE_DEPEND(atse, ether, 1, 1, 1);
 MODULE_DEPEND(atse, miibus, 1, 1, 1);
Index: projects/runtime-coverage-v2/sys/dev/altera/softdma/softdma.c
===================================================================
--- projects/runtime-coverage-v2/sys/dev/altera/softdma/softdma.c	(revision 346924)
+++ projects/runtime-coverage-v2/sys/dev/altera/softdma/softdma.c	(revision 346925)
@@ -1,864 +1,888 @@
 /*-
  * Copyright (c) 2017-2018 Ruslan Bukin <br@bsdpad.com>
  * All rights reserved.
  *
  * This software was developed by SRI International and the University of
  * Cambridge Computer Laboratory under DARPA/AFRL contract FA8750-10-C-0237
  * ("CTSRD"), as part of the DARPA CRASH research programme.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  */
 
 /* This is driver for SoftDMA device built using Altera FIFO component. */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include "opt_platform.h"
 #include <sys/param.h>
 #include <sys/systm.h>
 #include <sys/conf.h>
 #include <sys/bus.h>
 #include <sys/endian.h>
 #include <sys/kernel.h>
 #include <sys/kthread.h>
 #include <sys/module.h>
 #include <sys/lock.h>
 #include <sys/mutex.h>
 #include <sys/resource.h>
 #include <sys/rman.h>
 
 #include <machine/bus.h>
 
 #ifdef FDT
 #include <dev/fdt/fdt_common.h>
 #include <dev/ofw/ofw_bus.h>
 #include <dev/ofw/ofw_bus_subr.h>
 #endif
 
 #include <dev/altera/softdma/a_api.h>
 
 #include <dev/xdma/xdma.h>
 #include "xdma_if.h"
 
 #define SOFTDMA_DEBUG
 #undef SOFTDMA_DEBUG
 
 #ifdef SOFTDMA_DEBUG
 #define dprintf(fmt, ...)  printf(fmt, ##__VA_ARGS__)
 #else
 #define dprintf(fmt, ...)
 #endif
 
 #define	AVALON_FIFO_TX_BASIC_OPTS_DEPTH		16
 #define	SOFTDMA_NCHANNELS			1
 #define	CONTROL_GEN_SOP				(1 << 0)
 #define	CONTROL_GEN_EOP				(1 << 1)
 #define	CONTROL_OWN				(1 << 31)
 
 #define	SOFTDMA_RX_EVENTS	\
 	(A_ONCHIP_FIFO_MEM_CORE_INTR_FULL	| \
 	 A_ONCHIP_FIFO_MEM_CORE_INTR_OVERFLOW	| \
 	 A_ONCHIP_FIFO_MEM_CORE_INTR_UNDERFLOW)
 #define	SOFTDMA_TX_EVENTS	\
 	(A_ONCHIP_FIFO_MEM_CORE_INTR_EMPTY	| \
  	A_ONCHIP_FIFO_MEM_CORE_INTR_OVERFLOW	| \
  	A_ONCHIP_FIFO_MEM_CORE_INTR_UNDERFLOW)
 
 struct softdma_channel {
 	struct softdma_softc	*sc;
 	struct mtx		mtx;
 	xdma_channel_t		*xchan;
 	struct proc		*p;
 	int			used;
 	int			index;
 	int			run;
 	uint32_t		idx_tail;
 	uint32_t		idx_head;
 	struct softdma_desc	*descs;
 
 	uint32_t		descs_num;
 	uint32_t		descs_used_count;
 };
 
 struct softdma_desc {
 	uint64_t		src_addr;
 	uint64_t		dst_addr;
 	uint32_t		len;
 	uint32_t		access_width;
 	uint32_t		count;
 	uint16_t		src_incr;
 	uint16_t		dst_incr;
 	uint32_t		direction;
 	struct softdma_desc	*next;
 	uint32_t		transfered;
 	uint32_t		status;
 	uint32_t		reserved;
 	uint32_t		control;
 };
 
 struct softdma_softc {
 	device_t		dev;
 	struct resource		*res[3];
 	bus_space_tag_t		bst;
 	bus_space_handle_t	bsh;
 	bus_space_tag_t		bst_c;
 	bus_space_handle_t	bsh_c;
 	void			*ih;
 	struct softdma_channel	channels[SOFTDMA_NCHANNELS];
 };
 
 static struct resource_spec softdma_spec[] = {
 	{ SYS_RES_MEMORY,	0,	RF_ACTIVE },	/* fifo */
 	{ SYS_RES_MEMORY,	1,	RF_ACTIVE },	/* core */
 	{ SYS_RES_IRQ,		0,	RF_ACTIVE },
 	{ -1, 0 }
 };
 
 static int softdma_probe(device_t dev);
 static int softdma_attach(device_t dev);
 static int softdma_detach(device_t dev);
 
 static inline uint32_t
 softdma_next_desc(struct softdma_channel *chan, uint32_t curidx)
 {
 
 	return ((curidx + 1) % chan->descs_num);
 }
 
 static void
 softdma_mem_write(struct softdma_softc *sc, uint32_t reg, uint32_t val)
 {
 
 	bus_write_4(sc->res[0], reg, htole32(val));
 }
 
 static uint32_t
 softdma_mem_read(struct softdma_softc *sc, uint32_t reg)
 {
 	uint32_t val;
 
 	val = bus_read_4(sc->res[0], reg);
 
 	return (le32toh(val));
 }
 
 static void
 softdma_memc_write(struct softdma_softc *sc, uint32_t reg, uint32_t val)
 {
 
 	bus_write_4(sc->res[1], reg, htole32(val));
 }
 
 static uint32_t
 softdma_memc_read(struct softdma_softc *sc, uint32_t reg)
 {
 	uint32_t val;
 
 	val = bus_read_4(sc->res[1], reg);
 
 	return (le32toh(val));
 }
 
 static uint32_t
 softdma_fill_level(struct softdma_softc *sc)
 {
 	uint32_t val;
 
 	val = softdma_memc_read(sc,
 	    A_ONCHIP_FIFO_MEM_CORE_STATUS_REG_FILL_LEVEL);
 
 	return (val);
 }
 
+static uint32_t
+fifo_fill_level_wait(struct softdma_softc *sc)
+{
+	uint32_t val;
+
+	do
+		val = softdma_fill_level(sc);
+	while (val == AVALON_FIFO_TX_BASIC_OPTS_DEPTH);
+
+	return (val);
+}
+
 static void
 softdma_intr(void *arg)
 {
 	struct softdma_channel *chan;
 	struct softdma_softc *sc;
 	int reg;
 	int err;
 
 	sc = arg;
 
 	chan = &sc->channels[0];
 
 	reg = softdma_memc_read(sc, A_ONCHIP_FIFO_MEM_CORE_STATUS_REG_EVENT);
 
 	if (reg & (A_ONCHIP_FIFO_MEM_CORE_EVENT_OVERFLOW | 
 	    A_ONCHIP_FIFO_MEM_CORE_EVENT_UNDERFLOW)) {
 		/* Errors */
 		err = (((reg & A_ONCHIP_FIFO_MEM_CORE_ERROR_MASK) >> \
 		    A_ONCHIP_FIFO_MEM_CORE_ERROR_SHIFT) & 0xff);
 	}
 
 	if (reg != 0) {
 		softdma_memc_write(sc,
 		    A_ONCHIP_FIFO_MEM_CORE_STATUS_REG_EVENT, reg);
 		chan->run = 1;
 		wakeup(chan);
 	}
 }
 
 static int
 softdma_probe(device_t dev)
 {
 
 	if (!ofw_bus_status_okay(dev))
 		return (ENXIO);
 
 	if (!ofw_bus_is_compatible(dev, "altr,softdma"))
 		return (ENXIO);
 
 	device_set_desc(dev, "SoftDMA");
 
 	return (BUS_PROBE_DEFAULT);
 }
 
 static int
 softdma_attach(device_t dev)
 {
 	struct softdma_softc *sc;
 	phandle_t xref, node;
 	int err;
 
 	sc = device_get_softc(dev);
 	sc->dev = dev;
 
 	if (bus_alloc_resources(dev, softdma_spec, sc->res)) {
 		device_printf(dev,
 		    "could not allocate resources for device\n");
 		return (ENXIO);
 	}
 
 	/* FIFO memory interface */
 	sc->bst = rman_get_bustag(sc->res[0]);
 	sc->bsh = rman_get_bushandle(sc->res[0]);
 
 	/* FIFO control memory interface */
 	sc->bst_c = rman_get_bustag(sc->res[1]);
 	sc->bsh_c = rman_get_bushandle(sc->res[1]);
 
 	/* Setup interrupt handler */
 	err = bus_setup_intr(dev, sc->res[2], INTR_TYPE_MISC | INTR_MPSAFE,
 	    NULL, softdma_intr, sc, &sc->ih);
 	if (err) {
 		device_printf(dev, "Unable to alloc interrupt resource.\n");
 		return (ENXIO);
 	}
 
 	node = ofw_bus_get_node(dev);
 	xref = OF_xref_from_node(node);
 	OF_device_register_xref(xref, dev);
 
 	return (0);
 }
 
 static int
 softdma_detach(device_t dev)
 {
 	struct softdma_softc *sc;
 
 	sc = device_get_softc(dev);
 
 	return (0);
 }
 
 static int
 softdma_process_tx(struct softdma_channel *chan, struct softdma_desc *desc)
 {
 	struct softdma_softc *sc;
-	uint32_t src_offs, dst_offs;
+	uint64_t addr;
+	uint64_t buf;
+	uint32_t word;
+	uint32_t missing;
 	uint32_t reg;
-	uint32_t fill_level;
-	uint32_t leftm;
-	uint32_t tmp;
-	uint32_t val;
-	uint32_t c;
+	int got_bits;
+	int len;
 
 	sc = chan->sc;
 
-	fill_level = softdma_fill_level(sc);
-	while (fill_level == AVALON_FIFO_TX_BASIC_OPTS_DEPTH)
-		fill_level = softdma_fill_level(sc);
+	fifo_fill_level_wait(sc);
 
 	/* Set start of packet. */
-	if (desc->control & CONTROL_GEN_SOP) {
-		reg = 0;
-		reg |= A_ONCHIP_FIFO_MEM_CORE_SOP;
-		softdma_mem_write(sc, A_ONCHIP_FIFO_MEM_CORE_METADATA, reg);
-	}
+	if (desc->control & CONTROL_GEN_SOP)
+		softdma_mem_write(sc, A_ONCHIP_FIFO_MEM_CORE_METADATA,
+		    A_ONCHIP_FIFO_MEM_CORE_SOP);
 
-	src_offs = dst_offs = 0;
-	c = 0;
-	while ((desc->len - c) >= 4) {
-		val = *(uint32_t *)(desc->src_addr + src_offs);
-		bus_write_4(sc->res[0], A_ONCHIP_FIFO_MEM_CORE_DATA, val);
-		if (desc->src_incr)
-			src_offs += 4;
-		if (desc->dst_incr)
-			dst_offs += 4;
-		fill_level += 1;
+	got_bits = 0;
+	buf = 0;
 
-		while (fill_level == AVALON_FIFO_TX_BASIC_OPTS_DEPTH) {
-			fill_level = softdma_fill_level(sc);
-		}
-		c += 4;
+	addr = desc->src_addr;
+	len = desc->len;
+
+	if (addr & 1) {
+		buf = (buf << 8) | *(uint8_t *)addr;
+		got_bits += 8;
+		addr += 1;
+		len -= 1;
 	}
 
-	val = 0;
-	leftm = (desc->len - c);
+	if (len >= 2 && addr & 2) {
+		buf = (buf << 16) | *(uint16_t *)addr;
+		got_bits += 16;
+		addr += 2;
+		len -= 2;
+	}
 
-	switch (leftm) {
-	case 1:
-		val = *(uint8_t *)(desc->src_addr + src_offs);
-		val <<= 24;
-		src_offs += 1;
-		break;
-	case 2:
-	case 3:
-		val = *(uint16_t *)(desc->src_addr + src_offs);
-		val <<= 16;
-		src_offs += 2;
+	while (len >= 4) {
+		buf = (buf << 32) | (uint64_t)*(uint32_t *)addr;
+		addr += 4;
+		len -= 4;
+		word = (uint32_t)((buf >> got_bits) & 0xffffffff);
 
-		if (leftm == 3) {
-			tmp = *(uint8_t *)(desc->src_addr + src_offs);
-			val |= (tmp << 8);
-			src_offs += 1;
-		}
-		break;
-	case 0:
-	default:
-		break;
+		fifo_fill_level_wait(sc);
+		if (len == 0 && got_bits == 0 &&
+		    (desc->control & CONTROL_GEN_EOP) != 0)
+			softdma_mem_write(sc, A_ONCHIP_FIFO_MEM_CORE_METADATA,
+			    A_ONCHIP_FIFO_MEM_CORE_EOP);
+		bus_write_4(sc->res[0], A_ONCHIP_FIFO_MEM_CORE_DATA, word);
 	}
 
-	/* Set end of packet. */
-	reg = 0;
-	if (desc->control & CONTROL_GEN_EOP)
-		reg |= A_ONCHIP_FIFO_MEM_CORE_EOP;
-	reg |= ((4 - leftm) << A_ONCHIP_FIFO_MEM_CORE_EMPTY_SHIFT);
-	softdma_mem_write(sc, A_ONCHIP_FIFO_MEM_CORE_METADATA, reg);
+	if (len & 2) {
+		buf = (buf << 16) | *(uint16_t *)addr;
+		got_bits += 16;
+		addr += 2;
+		len -= 2;
+	}
 
-	/* Ensure there is a FIFO entry available. */
-	fill_level = softdma_fill_level(sc);
-	while (fill_level == AVALON_FIFO_TX_BASIC_OPTS_DEPTH)
-		fill_level = softdma_fill_level(sc);
+	if (len & 1) {
+		buf = (buf << 8) | *(uint8_t *)addr;
+		got_bits += 8;
+		addr += 1;
+		len -= 1;
+	}
 
-	/* Final write */
-	bus_write_4(sc->res[0], A_ONCHIP_FIFO_MEM_CORE_DATA, val);
+	if (got_bits >= 32) {
+		got_bits -= 32;
+		word = (uint32_t)((buf >> got_bits) & 0xffffffff);
 
-	return (dst_offs);
+		fifo_fill_level_wait(sc);
+		if (len == 0 && got_bits == 0 &&
+		    (desc->control & CONTROL_GEN_EOP) != 0)
+			softdma_mem_write(sc, A_ONCHIP_FIFO_MEM_CORE_METADATA,
+			    A_ONCHIP_FIFO_MEM_CORE_EOP);
+		bus_write_4(sc->res[0], A_ONCHIP_FIFO_MEM_CORE_DATA, word);
+	}
+
+	if (got_bits) {
+		missing = 32 - got_bits;
+		got_bits /= 8;
+
+		fifo_fill_level_wait(sc);
+		reg = A_ONCHIP_FIFO_MEM_CORE_EOP |
+		    ((4 - got_bits) << A_ONCHIP_FIFO_MEM_CORE_EMPTY_SHIFT);
+		softdma_mem_write(sc, A_ONCHIP_FIFO_MEM_CORE_METADATA, reg);
+		word = (uint32_t)((buf << missing) & 0xffffffff);
+		bus_write_4(sc->res[0], A_ONCHIP_FIFO_MEM_CORE_DATA, word);
+	}
+
+	return (desc->len);
 }
 
 static int
 softdma_process_rx(struct softdma_channel *chan, struct softdma_desc *desc)
 {
 	uint32_t src_offs, dst_offs;
 	struct softdma_softc *sc;
 	uint32_t fill_level;
 	uint32_t empty;
 	uint32_t meta;
 	uint32_t data;
 	int sop_rcvd;
 	int timeout;
 	size_t len;
 	int error;
 
 	sc = chan->sc;
 	empty = 0;
 	src_offs = dst_offs = 0;
 	error = 0;
 
 	fill_level = softdma_fill_level(sc);
 	if (fill_level == 0) {
 		/* Nothing to receive. */
 		return (0);
 	}
 
 	len = desc->len;
 
 	sop_rcvd = 0;
 	while (fill_level) {
 		empty = 0;
 		data = bus_read_4(sc->res[0], A_ONCHIP_FIFO_MEM_CORE_DATA);
 		meta = softdma_mem_read(sc, A_ONCHIP_FIFO_MEM_CORE_METADATA);
 
 		if (meta & A_ONCHIP_FIFO_MEM_CORE_ERROR_MASK) {
 			error = 1;
 			break;
 		}
 
 		if ((meta & A_ONCHIP_FIFO_MEM_CORE_CHANNEL_MASK) != 0) {
 			error = 1;
 			break;
 		}
 
 		if (meta & A_ONCHIP_FIFO_MEM_CORE_SOP) {
 			sop_rcvd = 1;
 		}
 
 		if (meta & A_ONCHIP_FIFO_MEM_CORE_EOP) {
 			empty = (meta & A_ONCHIP_FIFO_MEM_CORE_EMPTY_MASK) >>
 			    A_ONCHIP_FIFO_MEM_CORE_EMPTY_SHIFT;
 		}
 
 		if (sop_rcvd == 0) {
 			error = 1;
 			break;
 		}
 
 		if (empty == 0) {
 			*(uint32_t *)(desc->dst_addr + dst_offs) = data;
 			dst_offs += 4;
 		} else if (empty == 1) {
 			*(uint16_t *)(desc->dst_addr + dst_offs) =
 			    ((data >> 16) & 0xffff);
 			dst_offs += 2;
 
 			*(uint8_t *)(desc->dst_addr + dst_offs) =
 			    ((data >> 8) & 0xff);
 			dst_offs += 1;
 		} else {
 			panic("empty %d\n", empty);
 		}
 
 		if (meta & A_ONCHIP_FIFO_MEM_CORE_EOP)
 			break;
 
 		fill_level = softdma_fill_level(sc);
 		timeout = 100;
 		while (fill_level == 0 && timeout--)
 			fill_level = softdma_fill_level(sc);
 		if (timeout == 0) {
 			/* No EOP received. Broken packet. */
 			error = 1;
 			break;
 		}
 	}
 
 	if (error) {
 		return (-1);
 	}
 
 	return (dst_offs);
 }
 
 static uint32_t
 softdma_process_descriptors(struct softdma_channel *chan,
     xdma_transfer_status_t *status)
 {
 	struct xdma_channel *xchan;
 	struct softdma_desc *desc;
 	struct softdma_softc *sc;
 	xdma_transfer_status_t st;
 	int ret;
 
 	sc = chan->sc;
 
 	xchan = chan->xchan;
 
 	desc = &chan->descs[chan->idx_tail];
 
 	while (desc != NULL) {
 
 		if ((desc->control & CONTROL_OWN) == 0) {
 			break;
 		}
 
 		if (desc->direction == XDMA_MEM_TO_DEV) {
 			ret = softdma_process_tx(chan, desc);
 		} else {
 			ret = softdma_process_rx(chan, desc);
 			if (ret == 0) {
 				/* No new data available. */
 				break;
 			}
 		}
 
 		/* Descriptor processed. */
 		desc->control = 0;
 
 		if (ret >= 0) {
 			st.error = 0;
 			st.transferred = ret;
 		} else {
 			st.error = ret;
 			st.transferred = 0;
 		}
 
 		xchan_seg_done(xchan, &st);
 		atomic_subtract_int(&chan->descs_used_count, 1);
 
 		if (ret >= 0) {
 			status->transferred += ret;
 		} else {
 			status->error = 1;
 			break;
 		}
 
 		chan->idx_tail = softdma_next_desc(chan, chan->idx_tail);
 
 		/* Process next descriptor, if any. */
 		desc = desc->next;
 	}
 
 	return (0);
 }
 
 static void
 softdma_worker(void *arg)
 {
 	xdma_transfer_status_t status;
 	struct softdma_channel *chan;
 	struct softdma_softc *sc;
 
 	chan = arg;
 
 	sc = chan->sc;
 
 	while (1) {
 		mtx_lock(&chan->mtx);
 
 		do {
 			mtx_sleep(chan, &chan->mtx, 0, "softdma_wait", hz / 2);
 		} while (chan->run == 0);
 
 		status.error = 0;
 		status.transferred = 0;
 
 		softdma_process_descriptors(chan, &status);
 
 		/* Finish operation */
 		chan->run = 0;
 		xdma_callback(chan->xchan, &status);
 
 		mtx_unlock(&chan->mtx);
 	}
 
 }
 
 static int
 softdma_proc_create(struct softdma_channel *chan)
 {
 	struct softdma_softc *sc;
 
 	sc = chan->sc;
 
 	if (chan->p != NULL) {
 		/* Already created */
 		return (0);
 	}
 
 	mtx_init(&chan->mtx, "SoftDMA", NULL, MTX_DEF);
 
 	if (kproc_create(softdma_worker, (void *)chan, &chan->p, 0, 0,
 	    "softdma_worker") != 0) {
 		device_printf(sc->dev,
 		    "%s: Failed to create worker thread.\n", __func__);
 		return (-1);
 	}
 
 	return (0);
 }
 
 static int
 softdma_channel_alloc(device_t dev, struct xdma_channel *xchan)
 {
 	struct softdma_channel *chan;
 	struct softdma_softc *sc;
 	int i;
 
 	sc = device_get_softc(dev);
 
 	for (i = 0; i < SOFTDMA_NCHANNELS; i++) {
 		chan = &sc->channels[i];
 		if (chan->used == 0) {
 			chan->xchan = xchan;
 			xchan->chan = (void *)chan;
+			xchan->caps |= XCHAN_CAP_NOBUFS;
+			xchan->caps |= XCHAN_CAP_NOSEG;
 			chan->index = i;
 			chan->idx_head = 0;
 			chan->idx_tail = 0;
 			chan->descs_used_count = 0;
 			chan->descs_num = 1024;
 			chan->sc = sc;
 
 			if (softdma_proc_create(chan) != 0) {
 				return (-1);
 			}
 
 			chan->used = 1;
 
 			return (0);
 		}
 	}
 
 	return (-1);
 }
 
 static int
 softdma_channel_free(device_t dev, struct xdma_channel *xchan)
 {
 	struct softdma_channel *chan;
 	struct softdma_softc *sc;
 
 	sc = device_get_softc(dev);
 
 	chan = (struct softdma_channel *)xchan->chan;
 
 	if (chan->descs != NULL) {
 		free(chan->descs, M_DEVBUF);
 	}
 
 	chan->used = 0;
 
 	return (0);
 }
 
 static int
 softdma_desc_alloc(struct xdma_channel *xchan)
 {
 	struct softdma_channel *chan;
 	uint32_t nsegments;
 
 	chan = (struct softdma_channel *)xchan->chan;
 
 	nsegments = chan->descs_num;
 
 	chan->descs = malloc(nsegments * sizeof(struct softdma_desc),
 	    M_DEVBUF, (M_WAITOK | M_ZERO));
 
 	return (0);
 }
 
 static int
 softdma_channel_prep_sg(device_t dev, struct xdma_channel *xchan)
 {
 	struct softdma_channel *chan;
 	struct softdma_desc *desc;
 	struct softdma_softc *sc;
 	int ret;
 	int i;
 
 	sc = device_get_softc(dev);
 
 	chan = (struct softdma_channel *)xchan->chan;
 
 	ret = softdma_desc_alloc(xchan);
 	if (ret != 0) {
 		device_printf(sc->dev,
 		    "%s: Can't allocate descriptors.\n", __func__);
 		return (-1);
 	}
 
 	for (i = 0; i < chan->descs_num; i++) {
 		desc = &chan->descs[i];
 
 		if (i == (chan->descs_num - 1)) {
 			desc->next = &chan->descs[0];
 		} else {
 			desc->next = &chan->descs[i+1];
 		}
 	}
 
 	return (0);
 }
 
 static int
 softdma_channel_capacity(device_t dev, xdma_channel_t *xchan,
     uint32_t *capacity)
 {
 	struct softdma_channel *chan;
 	uint32_t c;
 
 	chan = (struct softdma_channel *)xchan->chan;
 
 	/* At least one descriptor must be left empty. */
 	c = (chan->descs_num - chan->descs_used_count - 1);
 
 	*capacity = c;
 
 	return (0);
 }
 
 static int
 softdma_channel_submit_sg(device_t dev, struct xdma_channel *xchan,
     struct xdma_sglist *sg, uint32_t sg_n)
 {
 	struct softdma_channel *chan;
 	struct softdma_desc *desc;
 	struct softdma_softc *sc;
 	uint32_t enqueued;
 	uint32_t saved_dir;
 	uint32_t tmp;
 	uint32_t len;
 	int i;
 
 	sc = device_get_softc(dev);
 
 	chan = (struct softdma_channel *)xchan->chan;
 
 	enqueued = 0;
 
 	for (i = 0; i < sg_n; i++) {
 		len = (uint32_t)sg[i].len;
 
 		desc = &chan->descs[chan->idx_head];
 		desc->src_addr = sg[i].src_addr;
 		desc->dst_addr = sg[i].dst_addr;
 		if (sg[i].direction == XDMA_MEM_TO_DEV) {
 			desc->src_incr = 1;
 			desc->dst_incr = 0;
 		} else {
 			desc->src_incr = 0;
 			desc->dst_incr = 1;
 		}
 		desc->direction = sg[i].direction;
 		saved_dir = sg[i].direction;
 		desc->len = len;
 		desc->transfered = 0;
 		desc->status = 0;
 		desc->reserved = 0;
 		desc->control = 0;
 
 		if (sg[i].first == 1)
 			desc->control |= CONTROL_GEN_SOP;
 		if (sg[i].last == 1)
 			desc->control |= CONTROL_GEN_EOP;
 
 		tmp = chan->idx_head;
 		chan->idx_head = softdma_next_desc(chan, chan->idx_head);
 		atomic_add_int(&chan->descs_used_count, 1);
 		desc->control |= CONTROL_OWN;
 		enqueued += 1;
 	}
 
 	if (enqueued == 0)
 		return (0);
 
 	if (saved_dir == XDMA_MEM_TO_DEV) {
 		chan->run = 1;
 		wakeup(chan);
 	} else
 		softdma_memc_write(sc,
 		    A_ONCHIP_FIFO_MEM_CORE_STATUS_REG_INT_ENABLE,
 		    SOFTDMA_RX_EVENTS);
 
 	return (0);
 }
 
 static int
 softdma_channel_request(device_t dev, struct xdma_channel *xchan,
     struct xdma_request *req)
 {
 	struct softdma_channel *chan;
 	struct softdma_desc *desc;
 	struct softdma_softc *sc;
 	int ret;
 
 	sc = device_get_softc(dev);
 
 	chan = (struct softdma_channel *)xchan->chan;
 
 	ret = softdma_desc_alloc(xchan);
 	if (ret != 0) {
 		device_printf(sc->dev,
 		    "%s: Can't allocate descriptors.\n", __func__);
 		return (-1);
 	}
 
 	desc = &chan->descs[0];
 
 	desc->src_addr = req->src_addr;
 	desc->dst_addr = req->dst_addr;
 	desc->len = req->block_len;
 	desc->src_incr = 1;
 	desc->dst_incr = 1;
 	desc->next = NULL;
 
 	return (0);
 }
 
 static int
 softdma_channel_control(device_t dev, xdma_channel_t *xchan, int cmd)
 {
 	struct softdma_channel *chan;
 	struct softdma_softc *sc;
 
 	sc = device_get_softc(dev);
 
 	chan = (struct softdma_channel *)xchan->chan;
 
 	switch (cmd) {
 	case XDMA_CMD_BEGIN:
 	case XDMA_CMD_TERMINATE:
 	case XDMA_CMD_PAUSE:
 		/* TODO: implement me */
 		return (-1);
 	}
 
 	return (0);
 }
 
 #ifdef FDT
 static int
 softdma_ofw_md_data(device_t dev, pcell_t *cells,
     int ncells, void **ptr)
 {
 
 	return (0);
 }
 #endif
 
 static device_method_t softdma_methods[] = {
 	/* Device interface */
 	DEVMETHOD(device_probe,			softdma_probe),
 	DEVMETHOD(device_attach,		softdma_attach),
 	DEVMETHOD(device_detach,		softdma_detach),
 
 	/* xDMA Interface */
 	DEVMETHOD(xdma_channel_alloc,		softdma_channel_alloc),
 	DEVMETHOD(xdma_channel_free,		softdma_channel_free),
 	DEVMETHOD(xdma_channel_request,		softdma_channel_request),
 	DEVMETHOD(xdma_channel_control,		softdma_channel_control),
 
 	/* xDMA SG Interface */
 	DEVMETHOD(xdma_channel_prep_sg,		softdma_channel_prep_sg),
 	DEVMETHOD(xdma_channel_submit_sg,	softdma_channel_submit_sg),
 	DEVMETHOD(xdma_channel_capacity,	softdma_channel_capacity),
 
 #ifdef FDT
 	DEVMETHOD(xdma_ofw_md_data,		softdma_ofw_md_data),
 #endif
 
 	DEVMETHOD_END
 };
 
 static driver_t softdma_driver = {
 	"softdma",
 	softdma_methods,
 	sizeof(struct softdma_softc),
 };
 
 static devclass_t softdma_devclass;
 
 EARLY_DRIVER_MODULE(softdma, simplebus, softdma_driver, softdma_devclass, 0, 0,
     BUS_PASS_INTERRUPT + BUS_PASS_ORDER_LATE);
Index: projects/runtime-coverage-v2/sys/dev/isp/isp_pci.c
===================================================================
--- projects/runtime-coverage-v2/sys/dev/isp/isp_pci.c	(revision 346924)
+++ projects/runtime-coverage-v2/sys/dev/isp/isp_pci.c	(revision 346925)
@@ -1,2001 +1,2010 @@
 /*-
  * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
  *
  * Copyright (c) 2009-2018 Alexander Motin <mav@FreeBSD.org>
  * Copyright (c) 1997-2008 by Matthew Jacob
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice immediately at the beginning of the file, without modification,
  *    this list of conditions, and the following disclaimer.
  * 2. The name of the author may not be used to endorse or promote products
  *    derived from this software without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE FOR
  * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  */
 /*
  * PCI specific probe and attach routines for Qlogic ISP SCSI adapters.
  * FreeBSD Version.
  */
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include <sys/param.h>
 #include <sys/systm.h>
 #include <sys/kernel.h>
 #include <sys/module.h>
 #include <sys/linker.h>
 #include <sys/firmware.h>
 #include <sys/bus.h>
 #include <sys/stdint.h>
 #include <dev/pci/pcireg.h>
 #include <dev/pci/pcivar.h>
 #include <machine/bus.h>
 #include <machine/resource.h>
 #include <sys/rman.h>
 #include <sys/malloc.h>
 #include <sys/uio.h>
 
 #ifdef __sparc64__
 #include <dev/ofw/openfirm.h>
 #include <machine/ofw_machdep.h>
 #endif
 
 #include <dev/isp/isp_freebsd.h>
 
 static uint32_t isp_pci_rd_reg(ispsoftc_t *, int);
 static void isp_pci_wr_reg(ispsoftc_t *, int, uint32_t);
 static uint32_t isp_pci_rd_reg_1080(ispsoftc_t *, int);
 static void isp_pci_wr_reg_1080(ispsoftc_t *, int, uint32_t);
 static uint32_t isp_pci_rd_reg_2400(ispsoftc_t *, int);
 static void isp_pci_wr_reg_2400(ispsoftc_t *, int, uint32_t);
 static uint32_t isp_pci_rd_reg_2600(ispsoftc_t *, int);
 static void isp_pci_wr_reg_2600(ispsoftc_t *, int, uint32_t);
 static void isp_pci_run_isr(ispsoftc_t *);
 static void isp_pci_run_isr_2300(ispsoftc_t *);
 static void isp_pci_run_isr_2400(ispsoftc_t *);
 static int isp_pci_mbxdma(ispsoftc_t *);
 static void isp_pci_mbxdmafree(ispsoftc_t *);
 static int isp_pci_dmasetup(ispsoftc_t *, XS_T *, void *);
 static int isp_pci_irqsetup(ispsoftc_t *);
 static void isp_pci_dumpregs(ispsoftc_t *, const char *);
 
 static struct ispmdvec mdvec = {
 	isp_pci_run_isr,
 	isp_pci_rd_reg,
 	isp_pci_wr_reg,
 	isp_pci_mbxdma,
 	isp_pci_dmasetup,
 	isp_common_dmateardown,
 	isp_pci_irqsetup,
 	isp_pci_dumpregs,
 	NULL,
 	BIU_BURST_ENABLE|BIU_PCI_CONF1_FIFO_64
 };
 
 static struct ispmdvec mdvec_1080 = {
 	isp_pci_run_isr,
 	isp_pci_rd_reg_1080,
 	isp_pci_wr_reg_1080,
 	isp_pci_mbxdma,
 	isp_pci_dmasetup,
 	isp_common_dmateardown,
 	isp_pci_irqsetup,
 	isp_pci_dumpregs,
 	NULL,
 	BIU_BURST_ENABLE|BIU_PCI_CONF1_FIFO_64
 };
 
 static struct ispmdvec mdvec_12160 = {
 	isp_pci_run_isr,
 	isp_pci_rd_reg_1080,
 	isp_pci_wr_reg_1080,
 	isp_pci_mbxdma,
 	isp_pci_dmasetup,
 	isp_common_dmateardown,
 	isp_pci_irqsetup,
 	isp_pci_dumpregs,
 	NULL,
 	BIU_BURST_ENABLE|BIU_PCI_CONF1_FIFO_64
 };
 
 static struct ispmdvec mdvec_2100 = {
 	isp_pci_run_isr,
 	isp_pci_rd_reg,
 	isp_pci_wr_reg,
 	isp_pci_mbxdma,
 	isp_pci_dmasetup,
 	isp_common_dmateardown,
 	isp_pci_irqsetup,
 	isp_pci_dumpregs
 };
 
 static struct ispmdvec mdvec_2200 = {
 	isp_pci_run_isr,
 	isp_pci_rd_reg,
 	isp_pci_wr_reg,
 	isp_pci_mbxdma,
 	isp_pci_dmasetup,
 	isp_common_dmateardown,
 	isp_pci_irqsetup,
 	isp_pci_dumpregs
 };
 
 static struct ispmdvec mdvec_2300 = {
 	isp_pci_run_isr_2300,
 	isp_pci_rd_reg,
 	isp_pci_wr_reg,
 	isp_pci_mbxdma,
 	isp_pci_dmasetup,
 	isp_common_dmateardown,
 	isp_pci_irqsetup,
 	isp_pci_dumpregs
 };
 
 static struct ispmdvec mdvec_2400 = {
 	isp_pci_run_isr_2400,
 	isp_pci_rd_reg_2400,
 	isp_pci_wr_reg_2400,
 	isp_pci_mbxdma,
 	isp_pci_dmasetup,
 	isp_common_dmateardown,
 	isp_pci_irqsetup,
 	NULL
 };
 
 static struct ispmdvec mdvec_2500 = {
 	isp_pci_run_isr_2400,
 	isp_pci_rd_reg_2400,
 	isp_pci_wr_reg_2400,
 	isp_pci_mbxdma,
 	isp_pci_dmasetup,
 	isp_common_dmateardown,
 	isp_pci_irqsetup,
 	NULL
 };
 
 static struct ispmdvec mdvec_2600 = {
 	isp_pci_run_isr_2400,
 	isp_pci_rd_reg_2600,
 	isp_pci_wr_reg_2600,
 	isp_pci_mbxdma,
 	isp_pci_dmasetup,
 	isp_common_dmateardown,
 	isp_pci_irqsetup,
 	NULL
 };
 
 static struct ispmdvec mdvec_2700 = {
 	isp_pci_run_isr_2400,
 	isp_pci_rd_reg_2600,
 	isp_pci_wr_reg_2600,
 	isp_pci_mbxdma,
 	isp_pci_dmasetup,
 	isp_common_dmateardown,
 	isp_pci_irqsetup,
 	NULL
 };
 
 #ifndef	PCIM_CMD_INVEN
 #define	PCIM_CMD_INVEN			0x10
 #endif
 #ifndef	PCIM_CMD_BUSMASTEREN
 #define	PCIM_CMD_BUSMASTEREN		0x0004
 #endif
 #ifndef	PCIM_CMD_PERRESPEN
 #define	PCIM_CMD_PERRESPEN		0x0040
 #endif
 #ifndef	PCIM_CMD_SEREN
 #define	PCIM_CMD_SEREN			0x0100
 #endif
 #ifndef	PCIM_CMD_INTX_DISABLE
 #define	PCIM_CMD_INTX_DISABLE		0x0400
 #endif
 
 #ifndef	PCIR_COMMAND
 #define	PCIR_COMMAND			0x04
 #endif
 
 #ifndef	PCIR_CACHELNSZ
 #define	PCIR_CACHELNSZ			0x0c
 #endif
 
 #ifndef	PCIR_LATTIMER
 #define	PCIR_LATTIMER			0x0d
 #endif
 
 #ifndef	PCIR_ROMADDR
 #define	PCIR_ROMADDR			0x30
 #endif
 
 #define	PCI_VENDOR_QLOGIC		0x1077
 
 #define	PCI_PRODUCT_QLOGIC_ISP1020	0x1020
 #define	PCI_PRODUCT_QLOGIC_ISP1080	0x1080
 #define	PCI_PRODUCT_QLOGIC_ISP10160	0x1016
 #define	PCI_PRODUCT_QLOGIC_ISP12160	0x1216
 #define	PCI_PRODUCT_QLOGIC_ISP1240	0x1240
 #define	PCI_PRODUCT_QLOGIC_ISP1280	0x1280
 
 #define	PCI_PRODUCT_QLOGIC_ISP2100	0x2100
 #define	PCI_PRODUCT_QLOGIC_ISP2200	0x2200
 #define	PCI_PRODUCT_QLOGIC_ISP2300	0x2300
 #define	PCI_PRODUCT_QLOGIC_ISP2312	0x2312
 #define	PCI_PRODUCT_QLOGIC_ISP2322	0x2322
 #define	PCI_PRODUCT_QLOGIC_ISP2422	0x2422
 #define	PCI_PRODUCT_QLOGIC_ISP2432	0x2432
 #define	PCI_PRODUCT_QLOGIC_ISP2532	0x2532
 #define	PCI_PRODUCT_QLOGIC_ISP5432	0x5432
 #define	PCI_PRODUCT_QLOGIC_ISP6312	0x6312
 #define	PCI_PRODUCT_QLOGIC_ISP6322	0x6322
 #define	PCI_PRODUCT_QLOGIC_ISP2031	0x2031
 #define	PCI_PRODUCT_QLOGIC_ISP8031	0x8031
 #define	PCI_PRODUCT_QLOGIC_ISP2684	0x2171
 #define	PCI_PRODUCT_QLOGIC_ISP2692	0x2b61
 #define	PCI_PRODUCT_QLOGIC_ISP2714	0x2071
 #define	PCI_PRODUCT_QLOGIC_ISP2722	0x2261
 
 #define	PCI_QLOGIC_ISP1020	\
 	((PCI_PRODUCT_QLOGIC_ISP1020 << 16) | PCI_VENDOR_QLOGIC)
 #define	PCI_QLOGIC_ISP1080	\
 	((PCI_PRODUCT_QLOGIC_ISP1080 << 16) | PCI_VENDOR_QLOGIC)
 #define	PCI_QLOGIC_ISP10160	\
 	((PCI_PRODUCT_QLOGIC_ISP10160 << 16) | PCI_VENDOR_QLOGIC)
 #define	PCI_QLOGIC_ISP12160	\
 	((PCI_PRODUCT_QLOGIC_ISP12160 << 16) | PCI_VENDOR_QLOGIC)
 #define	PCI_QLOGIC_ISP1240	\
 	((PCI_PRODUCT_QLOGIC_ISP1240 << 16) | PCI_VENDOR_QLOGIC)
 #define	PCI_QLOGIC_ISP1280	\
 	((PCI_PRODUCT_QLOGIC_ISP1280 << 16) | PCI_VENDOR_QLOGIC)
 
 #define	PCI_QLOGIC_ISP2100	\
 	((PCI_PRODUCT_QLOGIC_ISP2100 << 16) | PCI_VENDOR_QLOGIC)
 #define	PCI_QLOGIC_ISP2200	\
 	((PCI_PRODUCT_QLOGIC_ISP2200 << 16) | PCI_VENDOR_QLOGIC)
 #define	PCI_QLOGIC_ISP2300	\
 	((PCI_PRODUCT_QLOGIC_ISP2300 << 16) | PCI_VENDOR_QLOGIC)
 #define	PCI_QLOGIC_ISP2312	\
 	((PCI_PRODUCT_QLOGIC_ISP2312 << 16) | PCI_VENDOR_QLOGIC)
 #define	PCI_QLOGIC_ISP2322	\
 	((PCI_PRODUCT_QLOGIC_ISP2322 << 16) | PCI_VENDOR_QLOGIC)
 #define	PCI_QLOGIC_ISP2422	\
 	((PCI_PRODUCT_QLOGIC_ISP2422 << 16) | PCI_VENDOR_QLOGIC)
 #define	PCI_QLOGIC_ISP2432	\
 	((PCI_PRODUCT_QLOGIC_ISP2432 << 16) | PCI_VENDOR_QLOGIC)
 #define	PCI_QLOGIC_ISP2532	\
 	((PCI_PRODUCT_QLOGIC_ISP2532 << 16) | PCI_VENDOR_QLOGIC)
 #define	PCI_QLOGIC_ISP5432	\
 	((PCI_PRODUCT_QLOGIC_ISP5432 << 16) | PCI_VENDOR_QLOGIC)
 #define	PCI_QLOGIC_ISP6312	\
 	((PCI_PRODUCT_QLOGIC_ISP6312 << 16) | PCI_VENDOR_QLOGIC)
 #define	PCI_QLOGIC_ISP6322	\
 	((PCI_PRODUCT_QLOGIC_ISP6322 << 16) | PCI_VENDOR_QLOGIC)
 #define	PCI_QLOGIC_ISP2031	\
 	((PCI_PRODUCT_QLOGIC_ISP2031 << 16) | PCI_VENDOR_QLOGIC)
 #define	PCI_QLOGIC_ISP8031	\
 	((PCI_PRODUCT_QLOGIC_ISP8031 << 16) | PCI_VENDOR_QLOGIC)
 #define	PCI_QLOGIC_ISP2684	\
 	((PCI_PRODUCT_QLOGIC_ISP2684 << 16) | PCI_VENDOR_QLOGIC)
 #define	PCI_QLOGIC_ISP2692	\
 	((PCI_PRODUCT_QLOGIC_ISP2692 << 16) | PCI_VENDOR_QLOGIC)
 #define	PCI_QLOGIC_ISP2714	\
 	((PCI_PRODUCT_QLOGIC_ISP2714 << 16) | PCI_VENDOR_QLOGIC)
 #define	PCI_QLOGIC_ISP2722	\
 	((PCI_PRODUCT_QLOGIC_ISP2722 << 16) | PCI_VENDOR_QLOGIC)
 
 /*
  * Odd case for some AMI raid cards... We need to *not* attach to this.
  */
 #define	AMI_RAID_SUBVENDOR_ID	0x101e
 
 #define	PCI_DFLT_LTNCY	0x40
 #define	PCI_DFLT_LNSZ	0x10
 
 static int isp_pci_probe (device_t);
 static int isp_pci_attach (device_t);
 static int isp_pci_detach (device_t);
 
 
 #define	ISP_PCD(isp)	((struct isp_pcisoftc *)isp)->pci_dev
 struct isp_pcisoftc {
 	ispsoftc_t			pci_isp;
 	device_t			pci_dev;
 	struct resource *		regs;
 	struct resource *		regs1;
 	struct resource *		regs2;
 	struct {
 		int				iqd;
 		struct resource *		irq;
 		void *				ih;
 	} irq[ISP_MAX_IRQS];
 	int				rtp;
 	int				rgd;
 	int				rtp1;
 	int				rgd1;
 	int				rtp2;
 	int				rgd2;
 	int16_t				pci_poff[_NREG_BLKS];
 	bus_dma_tag_t			dmat;
 	int				msicount;
 };
 
 
 static device_method_t isp_pci_methods[] = {
 	/* Device interface */
 	DEVMETHOD(device_probe,		isp_pci_probe),
 	DEVMETHOD(device_attach,	isp_pci_attach),
 	DEVMETHOD(device_detach,	isp_pci_detach),
 	{ 0, 0 }
 };
 
 static driver_t isp_pci_driver = {
 	"isp", isp_pci_methods, sizeof (struct isp_pcisoftc)
 };
 static devclass_t isp_devclass;
 DRIVER_MODULE(isp, pci, isp_pci_driver, isp_devclass, 0, 0);
 MODULE_DEPEND(isp, cam, 1, 1, 1);
 MODULE_DEPEND(isp, firmware, 1, 1, 1);
 static int isp_nvports = 0;
 
 static int
 isp_pci_probe(device_t dev)
 {
 	switch ((pci_get_device(dev) << 16) | (pci_get_vendor(dev))) {
 	case PCI_QLOGIC_ISP1020:
 		device_set_desc(dev, "Qlogic ISP 1020/1040 PCI SCSI Adapter");
 		break;
 	case PCI_QLOGIC_ISP1080:
 		device_set_desc(dev, "Qlogic ISP 1080 PCI SCSI Adapter");
 		break;
 	case PCI_QLOGIC_ISP1240:
 		device_set_desc(dev, "Qlogic ISP 1240 PCI SCSI Adapter");
 		break;
 	case PCI_QLOGIC_ISP1280:
 		device_set_desc(dev, "Qlogic ISP 1280 PCI SCSI Adapter");
 		break;
 	case PCI_QLOGIC_ISP10160:
 		device_set_desc(dev, "Qlogic ISP 10160 PCI SCSI Adapter");
 		break;
 	case PCI_QLOGIC_ISP12160:
 		if (pci_get_subvendor(dev) == AMI_RAID_SUBVENDOR_ID) {
 			return (ENXIO);
 		}
 		device_set_desc(dev, "Qlogic ISP 12160 PCI SCSI Adapter");
 		break;
 	case PCI_QLOGIC_ISP2100:
 		device_set_desc(dev, "Qlogic ISP 2100 PCI FC-AL Adapter");
 		break;
 	case PCI_QLOGIC_ISP2200:
 		device_set_desc(dev, "Qlogic ISP 2200 PCI FC-AL Adapter");
 		break;
 	case PCI_QLOGIC_ISP2300:
 		device_set_desc(dev, "Qlogic ISP 2300 PCI FC-AL Adapter");
 		break;
 	case PCI_QLOGIC_ISP2312:
 		device_set_desc(dev, "Qlogic ISP 2312 PCI FC-AL Adapter");
 		break;
 	case PCI_QLOGIC_ISP2322:
 		device_set_desc(dev, "Qlogic ISP 2322 PCI FC-AL Adapter");
 		break;
 	case PCI_QLOGIC_ISP2422:
 		device_set_desc(dev, "Qlogic ISP 2422 PCI FC-AL Adapter");
 		break;
 	case PCI_QLOGIC_ISP2432:
 		device_set_desc(dev, "Qlogic ISP 2432 PCI FC-AL Adapter");
 		break;
 	case PCI_QLOGIC_ISP2532:
 		device_set_desc(dev, "Qlogic ISP 2532 PCI FC-AL Adapter");
 		break;
 	case PCI_QLOGIC_ISP5432:
 		device_set_desc(dev, "Qlogic ISP 5432 PCI FC-AL Adapter");
 		break;
 	case PCI_QLOGIC_ISP6312:
 		device_set_desc(dev, "Qlogic ISP 6312 PCI FC-AL Adapter");
 		break;
 	case PCI_QLOGIC_ISP6322:
 		device_set_desc(dev, "Qlogic ISP 6322 PCI FC-AL Adapter");
 		break;
 	case PCI_QLOGIC_ISP2031:
 		device_set_desc(dev, "Qlogic ISP 2031 PCI FC-AL Adapter");
 		break;
 	case PCI_QLOGIC_ISP8031:
 		device_set_desc(dev, "Qlogic ISP 8031 PCI FCoE Adapter");
 		break;
 	case PCI_QLOGIC_ISP2684:
 		device_set_desc(dev, "Qlogic ISP 2684 PCI FC Adapter");
 		break;
 	case PCI_QLOGIC_ISP2692:
 		device_set_desc(dev, "Qlogic ISP 2692 PCI FC Adapter");
 		break;
 	case PCI_QLOGIC_ISP2714:
 		device_set_desc(dev, "Qlogic ISP 2714 PCI FC Adapter");
 		break;
 	case PCI_QLOGIC_ISP2722:
 		device_set_desc(dev, "Qlogic ISP 2722 PCI FC Adapter");
 		break;
 	default:
 		return (ENXIO);
 	}
 	if (isp_announced == 0 && bootverbose) {
 		printf("Qlogic ISP Driver, FreeBSD Version %d.%d, "
 		    "Core Version %d.%d\n",
 		    ISP_PLATFORM_VERSION_MAJOR, ISP_PLATFORM_VERSION_MINOR,
 		    ISP_CORE_VERSION_MAJOR, ISP_CORE_VERSION_MINOR);
 		isp_announced++;
 	}
 	/*
 	 * XXXX: Here is where we might load the f/w module
 	 * XXXX: (or increase a reference count to it).
 	 */
 	return (BUS_PROBE_DEFAULT);
 }
 
 static void
 isp_get_generic_options(device_t dev, ispsoftc_t *isp)
 {
 	int tval;
 
 	tval = 0;
 	if (resource_int_value(device_get_name(dev), device_get_unit(dev), "fwload_disable", &tval) == 0 && tval != 0) {
 		isp->isp_confopts |= ISP_CFG_NORELOAD;
 	}
 	tval = 0;
 	if (resource_int_value(device_get_name(dev), device_get_unit(dev), "ignore_nvram", &tval) == 0 && tval != 0) {
 		isp->isp_confopts |= ISP_CFG_NONVRAM;
 	}
 	tval = 0;
 	(void) resource_int_value(device_get_name(dev), device_get_unit(dev), "debug", &tval);
 	if (tval) {
 		isp->isp_dblev = tval;
 	} else {
 		isp->isp_dblev = ISP_LOGWARN|ISP_LOGERR;
 	}
 	if (bootverbose) {
 		isp->isp_dblev |= ISP_LOGCONFIG|ISP_LOGINFO;
 	}
 	tval = -1;
 	(void) resource_int_value(device_get_name(dev), device_get_unit(dev), "vports", &tval);
 	if (tval > 0 && tval <= 254) {
 		isp_nvports = tval;
 	}
 	tval = 7;
 	(void) resource_int_value(device_get_name(dev), device_get_unit(dev), "quickboot_time", &tval);
 	isp_quickboot_time = tval;
 }
 
 static void
 isp_get_specific_options(device_t dev, int chan, ispsoftc_t *isp)
 {
 	const char *sptr;
 	int tval = 0;
 	char prefix[12], name[16];
 
 	if (chan == 0)
 		prefix[0] = 0;
 	else
 		snprintf(prefix, sizeof(prefix), "chan%d.", chan);
 	snprintf(name, sizeof(name), "%siid", prefix);
 	if (resource_int_value(device_get_name(dev), device_get_unit(dev),
 	    name, &tval)) {
 		if (IS_FC(isp)) {
 			ISP_FC_PC(isp, chan)->default_id = 109 - chan;
 		} else {
 #ifdef __sparc64__
 			ISP_SPI_PC(isp, chan)->iid = OF_getscsinitid(dev);
 #else
 			ISP_SPI_PC(isp, chan)->iid = 7;
 #endif
 		}
 	} else {
 		if (IS_FC(isp)) {
 			ISP_FC_PC(isp, chan)->default_id = tval - chan;
 		} else {
 			ISP_SPI_PC(isp, chan)->iid = tval;
 		}
 		isp->isp_confopts |= ISP_CFG_OWNLOOPID;
 	}
 
 	if (IS_SCSI(isp))
 		return;
 
 	tval = -1;
 	snprintf(name, sizeof(name), "%srole", prefix);
 	if (resource_int_value(device_get_name(dev), device_get_unit(dev),
 	    name, &tval) == 0) {
 		switch (tval) {
 		case ISP_ROLE_NONE:
 		case ISP_ROLE_INITIATOR:
 		case ISP_ROLE_TARGET:
 		case ISP_ROLE_BOTH:
 			device_printf(dev, "Chan %d setting role to 0x%x\n", chan, tval);
 			break;
 		default:
 			tval = -1;
 			break;
 		}
 	}
 	if (tval == -1) {
 		tval = ISP_DEFAULT_ROLES;
 	}
 	ISP_FC_PC(isp, chan)->def_role = tval;
 
 	tval = 0;
 	snprintf(name, sizeof(name), "%sfullduplex", prefix);
 	if (resource_int_value(device_get_name(dev), device_get_unit(dev),
 	    name, &tval) == 0 && tval != 0) {
 		isp->isp_confopts |= ISP_CFG_FULL_DUPLEX;
 	}
 	sptr = NULL;
 	snprintf(name, sizeof(name), "%stopology", prefix);
 	if (resource_string_value(device_get_name(dev), device_get_unit(dev),
 	    name, (const char **) &sptr) == 0 && sptr != NULL) {
 		if (strcmp(sptr, "lport") == 0) {
 			isp->isp_confopts |= ISP_CFG_LPORT;
 		} else if (strcmp(sptr, "nport") == 0) {
 			isp->isp_confopts |= ISP_CFG_NPORT;
 		} else if (strcmp(sptr, "lport-only") == 0) {
 			isp->isp_confopts |= ISP_CFG_LPORT_ONLY;
 		} else if (strcmp(sptr, "nport-only") == 0) {
 			isp->isp_confopts |= ISP_CFG_NPORT_ONLY;
 		}
 	}
 
 #ifdef ISP_FCTAPE_OFF
 	isp->isp_confopts |= ISP_CFG_NOFCTAPE;
 #else
 	isp->isp_confopts |= ISP_CFG_FCTAPE;
 #endif
 
 	tval = 0;
 	snprintf(name, sizeof(name), "%snofctape", prefix);
 	(void) resource_int_value(device_get_name(dev), device_get_unit(dev),
 	    name, &tval);
 	if (tval) {
 		isp->isp_confopts &= ~ISP_CFG_FCTAPE;
 		isp->isp_confopts |= ISP_CFG_NOFCTAPE;
 	}
 
 	tval = 0;
 	snprintf(name, sizeof(name), "%sfctape", prefix);
 	(void) resource_int_value(device_get_name(dev), device_get_unit(dev),
 	    name, &tval);
 	if (tval) {
 		isp->isp_confopts &= ~ISP_CFG_NOFCTAPE;
 		isp->isp_confopts |= ISP_CFG_FCTAPE;
 	}
 
 
 	/*
 	 * Because the resource_*_value functions can neither return
 	 * 64 bit integer values, nor can they be directly coerced
 	 * to interpret the right hand side of the assignment as
 	 * you want them to interpret it, we have to force WWN
 	 * hint replacement to specify WWN strings with a leading
 	 * 'w' (e..g w50000000aaaa0001). Sigh.
 	 */
 	sptr = NULL;
 	snprintf(name, sizeof(name), "%sportwwn", prefix);
 	tval = resource_string_value(device_get_name(dev), device_get_unit(dev),
 	    name, (const char **) &sptr);
 	if (tval == 0 && sptr != NULL && *sptr++ == 'w') {
 		char *eptr = NULL;
 		ISP_FC_PC(isp, chan)->def_wwpn = strtouq(sptr, &eptr, 16);
 		if (eptr < sptr + 16 || ISP_FC_PC(isp, chan)->def_wwpn == -1) {
 			device_printf(dev, "mangled portwwn hint '%s'\n", sptr);
 			ISP_FC_PC(isp, chan)->def_wwpn = 0;
 		}
 	}
 
 	sptr = NULL;
 	snprintf(name, sizeof(name), "%snodewwn", prefix);
 	tval = resource_string_value(device_get_name(dev), device_get_unit(dev),
 	    name, (const char **) &sptr);
 	if (tval == 0 && sptr != NULL && *sptr++ == 'w') {
 		char *eptr = NULL;
 		ISP_FC_PC(isp, chan)->def_wwnn = strtouq(sptr, &eptr, 16);
 		if (eptr < sptr + 16 || ISP_FC_PC(isp, chan)->def_wwnn == 0) {
 			device_printf(dev, "mangled nodewwn hint '%s'\n", sptr);
 			ISP_FC_PC(isp, chan)->def_wwnn = 0;
 		}
 	}
 
 	tval = -1;
 	snprintf(name, sizeof(name), "%sloop_down_limit", prefix);
 	(void) resource_int_value(device_get_name(dev), device_get_unit(dev),
 	    name, &tval);
 	if (tval >= 0 && tval < 0xffff) {
 		ISP_FC_PC(isp, chan)->loop_down_limit = tval;
 	} else {
 		ISP_FC_PC(isp, chan)->loop_down_limit = isp_loop_down_limit;
 	}
 
 	tval = -1;
 	snprintf(name, sizeof(name), "%sgone_device_time", prefix);
 	(void) resource_int_value(device_get_name(dev), device_get_unit(dev),
 	    name, &tval);
 	if (tval >= 0 && tval < 0xffff) {
 		ISP_FC_PC(isp, chan)->gone_device_time = tval;
 	} else {
 		ISP_FC_PC(isp, chan)->gone_device_time = isp_gone_device_time;
 	}
 }
 
 static int
 isp_pci_attach(device_t dev)
 {
 	struct isp_pcisoftc *pcs = device_get_softc(dev);
 	ispsoftc_t *isp = &pcs->pci_isp;
 	int i;
 	uint32_t data, cmd, linesz, did;
 	size_t psize, xsize;
 	char fwname[32];
 
 	pcs->pci_dev = dev;
 	isp->isp_dev = dev;
 	isp->isp_nchan = 1;
 	mtx_init(&isp->isp_lock, "isp", NULL, MTX_DEF);
 
 	/*
 	 * Get Generic Options
 	 */
 	isp_nvports = 0;
 	isp_get_generic_options(dev, isp);
 
 	linesz = PCI_DFLT_LNSZ;
 	pcs->regs = pcs->regs2 = NULL;
 	pcs->rgd = pcs->rtp = 0;
 
 	pcs->pci_dev = dev;
 	pcs->pci_poff[BIU_BLOCK >> _BLK_REG_SHFT] = BIU_REGS_OFF;
 	pcs->pci_poff[MBOX_BLOCK >> _BLK_REG_SHFT] = PCI_MBOX_REGS_OFF;
 	pcs->pci_poff[SXP_BLOCK >> _BLK_REG_SHFT] = PCI_SXP_REGS_OFF;
 	pcs->pci_poff[RISC_BLOCK >> _BLK_REG_SHFT] = PCI_RISC_REGS_OFF;
 	pcs->pci_poff[DMA_BLOCK >> _BLK_REG_SHFT] = DMA_REGS_OFF;
 
 	switch (pci_get_devid(dev)) {
 	case PCI_QLOGIC_ISP1020:
 		did = 0x1040;
 		isp->isp_mdvec = &mdvec;
 		isp->isp_type = ISP_HA_SCSI_UNKNOWN;
 		break;
 	case PCI_QLOGIC_ISP1080:
 		did = 0x1080;
 		isp->isp_mdvec = &mdvec_1080;
 		isp->isp_type = ISP_HA_SCSI_1080;
 		pcs->pci_poff[DMA_BLOCK >> _BLK_REG_SHFT] = ISP1080_DMA_REGS_OFF;
 		break;
 	case PCI_QLOGIC_ISP1240:
 		did = 0x1080;
 		isp->isp_mdvec = &mdvec_1080;
 		isp->isp_type = ISP_HA_SCSI_1240;
 		isp->isp_nchan = 2;
 		pcs->pci_poff[DMA_BLOCK >> _BLK_REG_SHFT] = ISP1080_DMA_REGS_OFF;
 		break;
 	case PCI_QLOGIC_ISP1280:
 		did = 0x1080;
 		isp->isp_mdvec = &mdvec_1080;
 		isp->isp_type = ISP_HA_SCSI_1280;
 		pcs->pci_poff[DMA_BLOCK >> _BLK_REG_SHFT] = ISP1080_DMA_REGS_OFF;
 		break;
 	case PCI_QLOGIC_ISP10160:
 		did = 0x12160;
 		isp->isp_mdvec = &mdvec_12160;
 		isp->isp_type = ISP_HA_SCSI_10160;
 		pcs->pci_poff[DMA_BLOCK >> _BLK_REG_SHFT] = ISP1080_DMA_REGS_OFF;
 		break;
 	case PCI_QLOGIC_ISP12160:
 		did = 0x12160;
 		isp->isp_nchan = 2;
 		isp->isp_mdvec = &mdvec_12160;
 		isp->isp_type = ISP_HA_SCSI_12160;
 		pcs->pci_poff[DMA_BLOCK >> _BLK_REG_SHFT] = ISP1080_DMA_REGS_OFF;
 		break;
 	case PCI_QLOGIC_ISP2100:
 		did = 0x2100;
 		isp->isp_mdvec = &mdvec_2100;
 		isp->isp_type = ISP_HA_FC_2100;
 		pcs->pci_poff[MBOX_BLOCK >> _BLK_REG_SHFT] = PCI_MBOX_REGS2100_OFF;
 		if (pci_get_revid(dev) < 3) {
 			/*
 			 * XXX: Need to get the actual revision
 			 * XXX: number of the 2100 FB. At any rate,
 			 * XXX: lower cache line size for early revision
 			 * XXX; boards.
 			 */
 			linesz = 1;
 		}
 		break;
 	case PCI_QLOGIC_ISP2200:
 		did = 0x2200;
 		isp->isp_mdvec = &mdvec_2200;
 		isp->isp_type = ISP_HA_FC_2200;
 		pcs->pci_poff[MBOX_BLOCK >> _BLK_REG_SHFT] = PCI_MBOX_REGS2100_OFF;
 		break;
 	case PCI_QLOGIC_ISP2300:
 		did = 0x2300;
 		isp->isp_mdvec = &mdvec_2300;
 		isp->isp_type = ISP_HA_FC_2300;
 		pcs->pci_poff[MBOX_BLOCK >> _BLK_REG_SHFT] = PCI_MBOX_REGS2300_OFF;
 		break;
 	case PCI_QLOGIC_ISP2312:
 	case PCI_QLOGIC_ISP6312:
 		did = 0x2300;
 		isp->isp_mdvec = &mdvec_2300;
 		isp->isp_type = ISP_HA_FC_2312;
 		pcs->pci_poff[MBOX_BLOCK >> _BLK_REG_SHFT] = PCI_MBOX_REGS2300_OFF;
 		break;
 	case PCI_QLOGIC_ISP2322:
 	case PCI_QLOGIC_ISP6322:
 		did = 0x2322;
 		isp->isp_mdvec = &mdvec_2300;
 		isp->isp_type = ISP_HA_FC_2322;
 		pcs->pci_poff[MBOX_BLOCK >> _BLK_REG_SHFT] = PCI_MBOX_REGS2300_OFF;
 		break;
 	case PCI_QLOGIC_ISP2422:
 	case PCI_QLOGIC_ISP2432:
 		did = 0x2400;
 		isp->isp_nchan += isp_nvports;
 		isp->isp_mdvec = &mdvec_2400;
 		isp->isp_type = ISP_HA_FC_2400;
 		pcs->pci_poff[MBOX_BLOCK >> _BLK_REG_SHFT] = PCI_MBOX_REGS2400_OFF;
 		break;
 	case PCI_QLOGIC_ISP2532:
 		did = 0x2500;
 		isp->isp_nchan += isp_nvports;
 		isp->isp_mdvec = &mdvec_2500;
 		isp->isp_type = ISP_HA_FC_2500;
 		pcs->pci_poff[MBOX_BLOCK >> _BLK_REG_SHFT] = PCI_MBOX_REGS2400_OFF;
 		break;
 	case PCI_QLOGIC_ISP5432:
 		did = 0x2500;
 		isp->isp_mdvec = &mdvec_2500;
 		isp->isp_type = ISP_HA_FC_2500;
 		pcs->pci_poff[MBOX_BLOCK >> _BLK_REG_SHFT] = PCI_MBOX_REGS2400_OFF;
 		break;
 	case PCI_QLOGIC_ISP2031:
 	case PCI_QLOGIC_ISP8031:
 		did = 0x2600;
 		isp->isp_nchan += isp_nvports;
 		isp->isp_mdvec = &mdvec_2600;
 		isp->isp_type = ISP_HA_FC_2600;
 		pcs->pci_poff[MBOX_BLOCK >> _BLK_REG_SHFT] = PCI_MBOX_REGS2400_OFF;
 		break;
 	case PCI_QLOGIC_ISP2684:
 	case PCI_QLOGIC_ISP2692:
 	case PCI_QLOGIC_ISP2714:
 	case PCI_QLOGIC_ISP2722:
 		did = 0x2700;
 		isp->isp_nchan += isp_nvports;
 		isp->isp_mdvec = &mdvec_2700;
 		isp->isp_type = ISP_HA_FC_2700;
 		pcs->pci_poff[MBOX_BLOCK >> _BLK_REG_SHFT] = PCI_MBOX_REGS2400_OFF;
 		break;
 	default:
 		device_printf(dev, "unknown device type\n");
 		goto bad;
 		break;
 	}
 	isp->isp_revision = pci_get_revid(dev);
 
 	if (IS_26XX(isp)) {
 		pcs->rtp = SYS_RES_MEMORY;
 		pcs->rgd = PCIR_BAR(0);
 		pcs->regs = bus_alloc_resource_any(dev, pcs->rtp, &pcs->rgd,
 		    RF_ACTIVE);
 		pcs->rtp1 = SYS_RES_MEMORY;
 		pcs->rgd1 = PCIR_BAR(2);
 		pcs->regs1 = bus_alloc_resource_any(dev, pcs->rtp1, &pcs->rgd1,
 		    RF_ACTIVE);
 		pcs->rtp2 = SYS_RES_MEMORY;
 		pcs->rgd2 = PCIR_BAR(4);
 		pcs->regs2 = bus_alloc_resource_any(dev, pcs->rtp2, &pcs->rgd2,
 		    RF_ACTIVE);
 	} else {
 		pcs->rtp = SYS_RES_MEMORY;
 		pcs->rgd = PCIR_BAR(1);
 		pcs->regs = bus_alloc_resource_any(dev, pcs->rtp, &pcs->rgd,
 		    RF_ACTIVE);
 		if (pcs->regs == NULL) {
 			pcs->rtp = SYS_RES_IOPORT;
 			pcs->rgd = PCIR_BAR(0);
 			pcs->regs = bus_alloc_resource_any(dev, pcs->rtp,
 			    &pcs->rgd, RF_ACTIVE);
 		}
 	}
 	if (pcs->regs == NULL) {
 		device_printf(dev, "Unable to map any ports\n");
 		goto bad;
 	}
 	if (bootverbose) {
 		device_printf(dev, "Using %s space register mapping\n",
 		    (pcs->rtp == SYS_RES_IOPORT)? "I/O" : "Memory");
 	}
 	isp->isp_regs = pcs->regs;
 	isp->isp_regs2 = pcs->regs2;
 
 	if (IS_FC(isp)) {
 		psize = sizeof (fcparam);
 		xsize = sizeof (struct isp_fc);
 	} else {
 		psize = sizeof (sdparam);
 		xsize = sizeof (struct isp_spi);
 	}
 	psize *= isp->isp_nchan;
 	xsize *= isp->isp_nchan;
 	isp->isp_param = malloc(psize, M_DEVBUF, M_NOWAIT | M_ZERO);
 	if (isp->isp_param == NULL) {
 		device_printf(dev, "cannot allocate parameter data\n");
 		goto bad;
 	}
 	isp->isp_osinfo.pc.ptr = malloc(xsize, M_DEVBUF, M_NOWAIT | M_ZERO);
 	if (isp->isp_osinfo.pc.ptr == NULL) {
 		device_printf(dev, "cannot allocate parameter data\n");
 		goto bad;
 	}
 
 	/*
 	 * Now that we know who we are (roughly) get/set specific options
 	 */
 	for (i = 0; i < isp->isp_nchan; i++) {
 		isp_get_specific_options(dev, i, isp);
 	}
 
 	isp->isp_osinfo.fw = NULL;
 	if (isp->isp_osinfo.fw == NULL) {
 		snprintf(fwname, sizeof (fwname), "isp_%04x", did);
 		isp->isp_osinfo.fw = firmware_get(fwname);
 	}
 	if (isp->isp_osinfo.fw != NULL) {
 		isp_prt(isp, ISP_LOGCONFIG, "loaded firmware %s", fwname);
 		isp->isp_mdvec->dv_ispfw = isp->isp_osinfo.fw->data;
 	}
 
 	/*
 	 * Make sure that SERR, PERR, WRITE INVALIDATE and BUSMASTER are set.
 	 */
 	cmd = pci_read_config(dev, PCIR_COMMAND, 2);
 	cmd |= PCIM_CMD_SEREN | PCIM_CMD_PERRESPEN | PCIM_CMD_BUSMASTEREN | PCIM_CMD_INVEN;
 	if (IS_2300(isp)) {	/* per QLogic errata */
 		cmd &= ~PCIM_CMD_INVEN;
 	}
 	if (IS_2322(isp) || pci_get_devid(dev) == PCI_QLOGIC_ISP6312) {
 		cmd &= ~PCIM_CMD_INTX_DISABLE;
 	}
 	if (IS_24XX(isp)) {
 		cmd &= ~PCIM_CMD_INTX_DISABLE;
 	}
 	pci_write_config(dev, PCIR_COMMAND, cmd, 2);
 
 	/*
 	 * Make sure the Cache Line Size register is set sensibly.
 	 */
 	data = pci_read_config(dev, PCIR_CACHELNSZ, 1);
 	if (data == 0 || (linesz != PCI_DFLT_LNSZ && data != linesz)) {
 		isp_prt(isp, ISP_LOGDEBUG0, "set PCI line size to %d from %d", linesz, data);
 		data = linesz;
 		pci_write_config(dev, PCIR_CACHELNSZ, data, 1);
 	}
 
 	/*
 	 * Make sure the Latency Timer is sane.
 	 */
 	data = pci_read_config(dev, PCIR_LATTIMER, 1);
 	if (data < PCI_DFLT_LTNCY) {
 		data = PCI_DFLT_LTNCY;
 		isp_prt(isp, ISP_LOGDEBUG0, "set PCI latency to %d", data);
 		pci_write_config(dev, PCIR_LATTIMER, data, 1);
 	}
 
 	/*
 	 * Make sure we've disabled the ROM.
 	 */
 	data = pci_read_config(dev, PCIR_ROMADDR, 4);
 	data &= ~1;
 	pci_write_config(dev, PCIR_ROMADDR, data, 4);
 
 	/*
 	 * Last minute checks...
 	 */
 	if (IS_23XX(isp) || IS_24XX(isp)) {
 		isp->isp_port = pci_get_function(dev);
 	}
 
 	/*
 	 * Make sure we're in reset state.
 	 */
 	ISP_LOCK(isp);
 	if (isp_reinit(isp, 1) != 0) {
 		ISP_UNLOCK(isp);
 		goto bad;
 	}
 	ISP_UNLOCK(isp);
 	if (isp_attach(isp)) {
 		ISP_LOCK(isp);
 		isp_shutdown(isp);
 		ISP_UNLOCK(isp);
 		goto bad;
 	}
 	return (0);
 
 bad:
+	if (isp->isp_osinfo.fw == NULL && !IS_26XX(isp)) {
+		/*
+		 * Failure to attach at boot time might have been caused
+		 * by a missing ispfw(4).  Except for for 16Gb adapters,
+		 * there's no loadable firmware for them.
+		 */
+		isp_prt(isp, ISP_LOGWARN, "See the ispfw(4) man page on "
+		    "how to load known good firmware at boot time");
+	}
 	for (i = 0; i < isp->isp_nirq; i++) {
 		(void) bus_teardown_intr(dev, pcs->irq[i].irq, pcs->irq[i].ih);
 		(void) bus_release_resource(dev, SYS_RES_IRQ, pcs->irq[i].iqd,
 		    pcs->irq[0].irq);
 	}
 	if (pcs->msicount) {
 		pci_release_msi(dev);
 	}
 	if (pcs->regs)
 		(void) bus_release_resource(dev, pcs->rtp, pcs->rgd, pcs->regs);
 	if (pcs->regs1)
 		(void) bus_release_resource(dev, pcs->rtp1, pcs->rgd1, pcs->regs1);
 	if (pcs->regs2)
 		(void) bus_release_resource(dev, pcs->rtp2, pcs->rgd2, pcs->regs2);
 	if (pcs->pci_isp.isp_param) {
 		free(pcs->pci_isp.isp_param, M_DEVBUF);
 		pcs->pci_isp.isp_param = NULL;
 	}
 	if (pcs->pci_isp.isp_osinfo.pc.ptr) {
 		free(pcs->pci_isp.isp_osinfo.pc.ptr, M_DEVBUF);
 		pcs->pci_isp.isp_osinfo.pc.ptr = NULL;
 	}
 	mtx_destroy(&isp->isp_lock);
 	return (ENXIO);
 }
 
 static int
 isp_pci_detach(device_t dev)
 {
 	struct isp_pcisoftc *pcs = device_get_softc(dev);
 	ispsoftc_t *isp = &pcs->pci_isp;
 	int i, status;
 
 	status = isp_detach(isp);
 	if (status)
 		return (status);
 	ISP_LOCK(isp);
 	isp_shutdown(isp);
 	ISP_UNLOCK(isp);
 	for (i = 0; i < isp->isp_nirq; i++) {
 		(void) bus_teardown_intr(dev, pcs->irq[i].irq, pcs->irq[i].ih);
 		(void) bus_release_resource(dev, SYS_RES_IRQ, pcs->irq[i].iqd,
 		    pcs->irq[i].irq);
 	}
 	if (pcs->msicount)
 		pci_release_msi(dev);
 	(void) bus_release_resource(dev, pcs->rtp, pcs->rgd, pcs->regs);
 	if (pcs->regs1)
 		(void) bus_release_resource(dev, pcs->rtp1, pcs->rgd1, pcs->regs1);
 	if (pcs->regs2)
 		(void) bus_release_resource(dev, pcs->rtp2, pcs->rgd2, pcs->regs2);
 	isp_pci_mbxdmafree(isp);
 	if (pcs->pci_isp.isp_param) {
 		free(pcs->pci_isp.isp_param, M_DEVBUF);
 		pcs->pci_isp.isp_param = NULL;
 	}
 	if (pcs->pci_isp.isp_osinfo.pc.ptr) {
 		free(pcs->pci_isp.isp_osinfo.pc.ptr, M_DEVBUF);
 		pcs->pci_isp.isp_osinfo.pc.ptr = NULL;
 	}
 	mtx_destroy(&isp->isp_lock);
 	return (0);
 }
 
 #define	IspVirt2Off(a, x)	\
 	(((struct isp_pcisoftc *)a)->pci_poff[((x) & _BLK_REG_MASK) >> \
 	_BLK_REG_SHFT] + ((x) & 0xfff))
 
 #define	BXR2(isp, off)		bus_read_2((isp)->isp_regs, (off))
 #define	BXW2(isp, off, v)	bus_write_2((isp)->isp_regs, (off), (v))
 #define	BXR4(isp, off)		bus_read_4((isp)->isp_regs, (off))
 #define	BXW4(isp, off, v)	bus_write_4((isp)->isp_regs, (off), (v))
 #define	B2R4(isp, off)		bus_read_4((isp)->isp_regs2, (off))
 #define	B2W4(isp, off, v)	bus_write_4((isp)->isp_regs2, (off), (v))
 
 static ISP_INLINE uint16_t
 isp_pci_rd_debounced(ispsoftc_t *isp, int off)
 {
 	uint16_t val, prev;
 
 	val = BXR2(isp, IspVirt2Off(isp, off));
 	do {
 		prev = val;
 		val = BXR2(isp, IspVirt2Off(isp, off));
 	} while (val != prev);
 	return (val);
 }
 
 static void
 isp_pci_run_isr(ispsoftc_t *isp)
 {
 	uint16_t isr, sema, info;
 
 	if (IS_2100(isp)) {
 		isr = isp_pci_rd_debounced(isp, BIU_ISR);
 		sema = isp_pci_rd_debounced(isp, BIU_SEMA);
 	} else {
 		isr = BXR2(isp, IspVirt2Off(isp, BIU_ISR));
 		sema = BXR2(isp, IspVirt2Off(isp, BIU_SEMA));
 	}
 	isp_prt(isp, ISP_LOGDEBUG3, "ISR 0x%x SEMA 0x%x", isr, sema);
 	isr &= INT_PENDING_MASK(isp);
 	sema &= BIU_SEMA_LOCK;
 	if (isr == 0 && sema == 0)
 		return;
 	if (sema != 0) {
 		if (IS_2100(isp))
 			info = isp_pci_rd_debounced(isp, OUTMAILBOX0);
 		else
 			info = BXR2(isp, IspVirt2Off(isp, OUTMAILBOX0));
 		if (info & MBOX_COMMAND_COMPLETE)
 			isp_intr_mbox(isp, info);
 		else
 			isp_intr_async(isp, info);
 		if (!IS_FC(isp) && isp->isp_state == ISP_RUNSTATE)
 			isp_intr_respq(isp);
 	} else
 		isp_intr_respq(isp);
 	ISP_WRITE(isp, HCCR, HCCR_CMD_CLEAR_RISC_INT);
 	if (sema)
 		ISP_WRITE(isp, BIU_SEMA, 0);
 }
 
 static void
 isp_pci_run_isr_2300(ispsoftc_t *isp)
 {
 	uint32_t hccr, r2hisr;
 	uint16_t isr, info;
 
 	if ((BXR2(isp, IspVirt2Off(isp, BIU_ISR)) & BIU2100_ISR_RISC_INT) == 0)
 		return;
 	r2hisr = BXR4(isp, IspVirt2Off(isp, BIU_R2HSTSLO));
 	isp_prt(isp, ISP_LOGDEBUG3, "RISC2HOST ISR 0x%x", r2hisr);
 	if ((r2hisr & BIU_R2HST_INTR) == 0)
 		return;
 	isr = r2hisr & BIU_R2HST_ISTAT_MASK;
 	info = r2hisr >> 16;
 	switch (isr) {
 	case ISPR2HST_ROM_MBX_OK:
 	case ISPR2HST_ROM_MBX_FAIL:
 	case ISPR2HST_MBX_OK:
 	case ISPR2HST_MBX_FAIL:
 		isp_intr_mbox(isp, info);
 		break;
 	case ISPR2HST_ASYNC_EVENT:
 		isp_intr_async(isp, info);
 		break;
 	case ISPR2HST_RIO_16:
 		isp_intr_async(isp, ASYNC_RIO16_1);
 		break;
 	case ISPR2HST_FPOST:
 		isp_intr_async(isp, ASYNC_CMD_CMPLT);
 		break;
 	case ISPR2HST_FPOST_CTIO:
 		isp_intr_async(isp, ASYNC_CTIO_DONE);
 		break;
 	case ISPR2HST_RSPQ_UPDATE:
 		isp_intr_respq(isp);
 		break;
 	default:
 		hccr = ISP_READ(isp, HCCR);
 		if (hccr & HCCR_PAUSE) {
 			ISP_WRITE(isp, HCCR, HCCR_RESET);
 			isp_prt(isp, ISP_LOGERR, "RISC paused at interrupt (%x->%x)", hccr, ISP_READ(isp, HCCR));
 			ISP_WRITE(isp, BIU_ICR, 0);
 		} else {
 			isp_prt(isp, ISP_LOGERR, "unknown interrupt 0x%x\n", r2hisr);
 		}
 	}
 	ISP_WRITE(isp, HCCR, HCCR_CMD_CLEAR_RISC_INT);
 	ISP_WRITE(isp, BIU_SEMA, 0);
 }
 
 static void
 isp_pci_run_isr_2400(ispsoftc_t *isp)
 {
 	uint32_t r2hisr;
 	uint16_t isr, info;
 
 	r2hisr = BXR4(isp, IspVirt2Off(isp, BIU2400_R2HSTSLO));
 	isp_prt(isp, ISP_LOGDEBUG3, "RISC2HOST ISR 0x%x", r2hisr);
 	if ((r2hisr & BIU_R2HST_INTR) == 0)
 		return;
 	isr = r2hisr & BIU_R2HST_ISTAT_MASK;
 	info = (r2hisr >> 16);
 	switch (isr) {
 	case ISPR2HST_ROM_MBX_OK:
 	case ISPR2HST_ROM_MBX_FAIL:
 	case ISPR2HST_MBX_OK:
 	case ISPR2HST_MBX_FAIL:
 		isp_intr_mbox(isp, info);
 		break;
 	case ISPR2HST_ASYNC_EVENT:
 		isp_intr_async(isp, info);
 		break;
 	case ISPR2HST_RSPQ_UPDATE:
 		isp_intr_respq(isp);
 		break;
 	case ISPR2HST_RSPQ_UPDATE2:
 #ifdef	ISP_TARGET_MODE
 	case ISPR2HST_ATIO_RSPQ_UPDATE:
 #endif
 		isp_intr_respq(isp);
 		/* FALLTHROUGH */
 #ifdef	ISP_TARGET_MODE
 	case ISPR2HST_ATIO_UPDATE:
 	case ISPR2HST_ATIO_UPDATE2:
 		isp_intr_atioq(isp);
 #endif
 		break;
 	default:
 		isp_prt(isp, ISP_LOGERR, "unknown interrupt 0x%x\n", r2hisr);
 	}
 	ISP_WRITE(isp, BIU2400_HCCR, HCCR_2400_CMD_CLEAR_RISC_INT);
 }
 
 static uint32_t
 isp_pci_rd_reg(ispsoftc_t *isp, int regoff)
 {
 	uint16_t rv;
 	int oldconf = 0;
 
 	if ((regoff & _BLK_REG_MASK) == SXP_BLOCK) {
 		/*
 		 * We will assume that someone has paused the RISC processor.
 		 */
 		oldconf = BXR2(isp, IspVirt2Off(isp, BIU_CONF1));
 		BXW2(isp, IspVirt2Off(isp, BIU_CONF1), oldconf | BIU_PCI_CONF1_SXP);
 		MEMORYBARRIER(isp, SYNC_REG, IspVirt2Off(isp, BIU_CONF1), 2, -1);
 	}
 	rv = BXR2(isp, IspVirt2Off(isp, regoff));
 	if ((regoff & _BLK_REG_MASK) == SXP_BLOCK) {
 		BXW2(isp, IspVirt2Off(isp, BIU_CONF1), oldconf);
 		MEMORYBARRIER(isp, SYNC_REG, IspVirt2Off(isp, BIU_CONF1), 2, -1);
 	}
 	return (rv);
 }
 
 static void
 isp_pci_wr_reg(ispsoftc_t *isp, int regoff, uint32_t val)
 {
 	int oldconf = 0;
 
 	if ((regoff & _BLK_REG_MASK) == SXP_BLOCK) {
 		/*
 		 * We will assume that someone has paused the RISC processor.
 		 */
 		oldconf = BXR2(isp, IspVirt2Off(isp, BIU_CONF1));
 		BXW2(isp, IspVirt2Off(isp, BIU_CONF1),
 		    oldconf | BIU_PCI_CONF1_SXP);
 		MEMORYBARRIER(isp, SYNC_REG, IspVirt2Off(isp, BIU_CONF1), 2, -1);
 	}
 	BXW2(isp, IspVirt2Off(isp, regoff), val);
 	MEMORYBARRIER(isp, SYNC_REG, IspVirt2Off(isp, regoff), 2, -1);
 	if ((regoff & _BLK_REG_MASK) == SXP_BLOCK) {
 		BXW2(isp, IspVirt2Off(isp, BIU_CONF1), oldconf);
 		MEMORYBARRIER(isp, SYNC_REG, IspVirt2Off(isp, BIU_CONF1), 2, -1);
 	}
 
 }
 
 static uint32_t
 isp_pci_rd_reg_1080(ispsoftc_t *isp, int regoff)
 {
 	uint32_t rv, oc = 0;
 
 	if ((regoff & _BLK_REG_MASK) == SXP_BLOCK) {
 		uint32_t tc;
 		/*
 		 * We will assume that someone has paused the RISC processor.
 		 */
 		oc = BXR2(isp, IspVirt2Off(isp, BIU_CONF1));
 		tc = oc & ~BIU_PCI1080_CONF1_DMA;
 		if (regoff & SXP_BANK1_SELECT)
 			tc |= BIU_PCI1080_CONF1_SXP1;
 		else
 			tc |= BIU_PCI1080_CONF1_SXP0;
 		BXW2(isp, IspVirt2Off(isp, BIU_CONF1), tc);
 		MEMORYBARRIER(isp, SYNC_REG, IspVirt2Off(isp, BIU_CONF1), 2, -1);
 	} else if ((regoff & _BLK_REG_MASK) == DMA_BLOCK) {
 		oc = BXR2(isp, IspVirt2Off(isp, BIU_CONF1));
 		BXW2(isp, IspVirt2Off(isp, BIU_CONF1), 
 		    oc | BIU_PCI1080_CONF1_DMA);
 		MEMORYBARRIER(isp, SYNC_REG, IspVirt2Off(isp, BIU_CONF1), 2, -1);
 	}
 	rv = BXR2(isp, IspVirt2Off(isp, regoff));
 	if (oc) {
 		BXW2(isp, IspVirt2Off(isp, BIU_CONF1), oc);
 		MEMORYBARRIER(isp, SYNC_REG, IspVirt2Off(isp, BIU_CONF1), 2, -1);
 	}
 	return (rv);
 }
 
 static void
 isp_pci_wr_reg_1080(ispsoftc_t *isp, int regoff, uint32_t val)
 {
 	int oc = 0;
 
 	if ((regoff & _BLK_REG_MASK) == SXP_BLOCK) {
 		uint32_t tc;
 		/*
 		 * We will assume that someone has paused the RISC processor.
 		 */
 		oc = BXR2(isp, IspVirt2Off(isp, BIU_CONF1));
 		tc = oc & ~BIU_PCI1080_CONF1_DMA;
 		if (regoff & SXP_BANK1_SELECT)
 			tc |= BIU_PCI1080_CONF1_SXP1;
 		else
 			tc |= BIU_PCI1080_CONF1_SXP0;
 		BXW2(isp, IspVirt2Off(isp, BIU_CONF1), tc);
 		MEMORYBARRIER(isp, SYNC_REG, IspVirt2Off(isp, BIU_CONF1), 2, -1);
 	} else if ((regoff & _BLK_REG_MASK) == DMA_BLOCK) {
 		oc = BXR2(isp, IspVirt2Off(isp, BIU_CONF1));
 		BXW2(isp, IspVirt2Off(isp, BIU_CONF1), 
 		    oc | BIU_PCI1080_CONF1_DMA);
 		MEMORYBARRIER(isp, SYNC_REG, IspVirt2Off(isp, BIU_CONF1), 2, -1);
 	}
 	BXW2(isp, IspVirt2Off(isp, regoff), val);
 	MEMORYBARRIER(isp, SYNC_REG, IspVirt2Off(isp, regoff), 2, -1);
 	if (oc) {
 		BXW2(isp, IspVirt2Off(isp, BIU_CONF1), oc);
 		MEMORYBARRIER(isp, SYNC_REG, IspVirt2Off(isp, BIU_CONF1), 2, -1);
 	}
 }
 
 static uint32_t
 isp_pci_rd_reg_2400(ispsoftc_t *isp, int regoff)
 {
 	uint32_t rv;
 	int block = regoff & _BLK_REG_MASK;
 
 	switch (block) {
 	case BIU_BLOCK:
 		break;
 	case MBOX_BLOCK:
 		return (BXR2(isp, IspVirt2Off(isp, regoff)));
 	case SXP_BLOCK:
 		isp_prt(isp, ISP_LOGERR, "SXP_BLOCK read at 0x%x", regoff);
 		return (0xffffffff);
 	case RISC_BLOCK:
 		isp_prt(isp, ISP_LOGERR, "RISC_BLOCK read at 0x%x", regoff);
 		return (0xffffffff);
 	case DMA_BLOCK:
 		isp_prt(isp, ISP_LOGERR, "DMA_BLOCK read at 0x%x", regoff);
 		return (0xffffffff);
 	default:
 		isp_prt(isp, ISP_LOGERR, "unknown block read at 0x%x", regoff);
 		return (0xffffffff);
 	}
 
 	switch (regoff) {
 	case BIU2400_FLASH_ADDR:
 	case BIU2400_FLASH_DATA:
 	case BIU2400_ICR:
 	case BIU2400_ISR:
 	case BIU2400_CSR:
 	case BIU2400_REQINP:
 	case BIU2400_REQOUTP:
 	case BIU2400_RSPINP:
 	case BIU2400_RSPOUTP:
 	case BIU2400_PRI_REQINP:
 	case BIU2400_PRI_REQOUTP:
 	case BIU2400_ATIO_RSPINP:
 	case BIU2400_ATIO_RSPOUTP:
 	case BIU2400_HCCR:
 	case BIU2400_GPIOD:
 	case BIU2400_GPIOE:
 	case BIU2400_HSEMA:
 		rv = BXR4(isp, IspVirt2Off(isp, regoff));
 		break;
 	case BIU2400_R2HSTSLO:
 		rv = BXR4(isp, IspVirt2Off(isp, regoff));
 		break;
 	case BIU2400_R2HSTSHI:
 		rv = BXR4(isp, IspVirt2Off(isp, regoff)) >> 16;
 		break;
 	default:
 		isp_prt(isp, ISP_LOGERR, "unknown register read at 0x%x",
 		    regoff);
 		rv = 0xffffffff;
 		break;
 	}
 	return (rv);
 }
 
 static void
 isp_pci_wr_reg_2400(ispsoftc_t *isp, int regoff, uint32_t val)
 {
 	int block = regoff & _BLK_REG_MASK;
 
 	switch (block) {
 	case BIU_BLOCK:
 		break;
 	case MBOX_BLOCK:
 		BXW2(isp, IspVirt2Off(isp, regoff), val);
 		MEMORYBARRIER(isp, SYNC_REG, IspVirt2Off(isp, regoff), 2, -1);
 		return;
 	case SXP_BLOCK:
 		isp_prt(isp, ISP_LOGERR, "SXP_BLOCK write at 0x%x", regoff);
 		return;
 	case RISC_BLOCK:
 		isp_prt(isp, ISP_LOGERR, "RISC_BLOCK write at 0x%x", regoff);
 		return;
 	case DMA_BLOCK:
 		isp_prt(isp, ISP_LOGERR, "DMA_BLOCK write at 0x%x", regoff);
 		return;
 	default:
 		isp_prt(isp, ISP_LOGERR, "unknown block write at 0x%x", regoff);
 		break;
 	}
 
 	switch (regoff) {
 	case BIU2400_FLASH_ADDR:
 	case BIU2400_FLASH_DATA:
 	case BIU2400_ICR:
 	case BIU2400_ISR:
 	case BIU2400_CSR:
 	case BIU2400_REQINP:
 	case BIU2400_REQOUTP:
 	case BIU2400_RSPINP:
 	case BIU2400_RSPOUTP:
 	case BIU2400_PRI_REQINP:
 	case BIU2400_PRI_REQOUTP:
 	case BIU2400_ATIO_RSPINP:
 	case BIU2400_ATIO_RSPOUTP:
 	case BIU2400_HCCR:
 	case BIU2400_GPIOD:
 	case BIU2400_GPIOE:
 	case BIU2400_HSEMA:
 		BXW4(isp, IspVirt2Off(isp, regoff), val);
 #ifdef MEMORYBARRIERW
 		if (regoff == BIU2400_REQINP ||
 		    regoff == BIU2400_RSPOUTP ||
 		    regoff == BIU2400_PRI_REQINP ||
 		    regoff == BIU2400_ATIO_RSPOUTP)
 			MEMORYBARRIERW(isp, SYNC_REG,
 			    IspVirt2Off(isp, regoff), 4, -1)
 		else
 #endif
 		MEMORYBARRIER(isp, SYNC_REG, IspVirt2Off(isp, regoff), 4, -1);
 		break;
 	default:
 		isp_prt(isp, ISP_LOGERR, "unknown register write at 0x%x",
 		    regoff);
 		break;
 	}
 }
 
 static uint32_t
 isp_pci_rd_reg_2600(ispsoftc_t *isp, int regoff)
 {
 	uint32_t rv;
 
 	switch (regoff) {
 	case BIU2400_PRI_REQINP:
 	case BIU2400_PRI_REQOUTP:
 		isp_prt(isp, ISP_LOGERR, "unknown register read at 0x%x",
 		    regoff);
 		rv = 0xffffffff;
 		break;
 	case BIU2400_REQINP:
 		rv = B2R4(isp, 0x00);
 		break;
 	case BIU2400_REQOUTP:
 		rv = B2R4(isp, 0x04);
 		break;
 	case BIU2400_RSPINP:
 		rv = B2R4(isp, 0x08);
 		break;
 	case BIU2400_RSPOUTP:
 		rv = B2R4(isp, 0x0c);
 		break;
 	case BIU2400_ATIO_RSPINP:
 		rv = B2R4(isp, 0x10);
 		break;
 	case BIU2400_ATIO_RSPOUTP:
 		rv = B2R4(isp, 0x14);
 		break;
 	default:
 		rv = isp_pci_rd_reg_2400(isp, regoff);
 		break;
 	}
 	return (rv);
 }
 
 static void
 isp_pci_wr_reg_2600(ispsoftc_t *isp, int regoff, uint32_t val)
 {
 	int off;
 
 	switch (regoff) {
 	case BIU2400_PRI_REQINP:
 	case BIU2400_PRI_REQOUTP:
 		isp_prt(isp, ISP_LOGERR, "unknown register write at 0x%x",
 		    regoff);
 		return;
 	case BIU2400_REQINP:
 		off = 0x00;
 		break;
 	case BIU2400_REQOUTP:
 		off = 0x04;
 		break;
 	case BIU2400_RSPINP:
 		off = 0x08;
 		break;
 	case BIU2400_RSPOUTP:
 		off = 0x0c;
 		break;
 	case BIU2400_ATIO_RSPINP:
 		off = 0x10;
 		break;
 	case BIU2400_ATIO_RSPOUTP:
 		off = 0x14;
 		break;
 	default:
 		isp_pci_wr_reg_2400(isp, regoff, val);
 		return;
 	}
 	B2W4(isp, off, val);
 }
 
 
 struct imush {
 	bus_addr_t maddr;
 	int error;
 };
 
 static void
 imc(void *arg, bus_dma_segment_t *segs, int nseg, int error)
 {
 	struct imush *imushp = (struct imush *) arg;
 
 	if (!(imushp->error = error))
 		imushp->maddr = segs[0].ds_addr;
 }
 
 static int
 isp_pci_mbxdma(ispsoftc_t *isp)
 {
 	caddr_t base;
 	uint32_t len, nsegs;
 	int i, error, cmap = 0;
 	bus_size_t slim;	/* segment size */
 	bus_addr_t llim;	/* low limit of unavailable dma */
 	bus_addr_t hlim;	/* high limit of unavailable dma */
 	struct imush im;
 	isp_ecmd_t *ecmd;
 
 	/* Already been here? If so, leave... */
 	if (isp->isp_xflist != NULL)
 		return (0);
 	if (isp->isp_rquest != NULL && isp->isp_maxcmds == 0)
 		return (0);
 	ISP_UNLOCK(isp);
 	if (isp->isp_rquest != NULL)
 		goto gotmaxcmds;
 
 	hlim = BUS_SPACE_MAXADDR;
 	if (IS_ULTRA2(isp) || IS_FC(isp) || IS_1240(isp)) {
 		if (sizeof (bus_size_t) > 4)
 			slim = (bus_size_t) (1ULL << 32);
 		else
 			slim = (bus_size_t) (1UL << 31);
 		llim = BUS_SPACE_MAXADDR;
 	} else {
 		slim = (1UL << 24);
 		llim = BUS_SPACE_MAXADDR_32BIT;
 	}
 	if (sizeof (bus_size_t) > 4)
 		nsegs = ISP_NSEG64_MAX;
 	else
 		nsegs = ISP_NSEG_MAX;
 
 	if (bus_dma_tag_create(bus_get_dma_tag(ISP_PCD(isp)), 1,
 	    slim, llim, hlim, NULL, NULL, BUS_SPACE_MAXSIZE, nsegs, slim, 0,
 	    busdma_lock_mutex, &isp->isp_lock, &isp->isp_osinfo.dmat)) {
 		ISP_LOCK(isp);
 		isp_prt(isp, ISP_LOGERR, "could not create master dma tag");
 		return (1);
 	}
 
 	/*
 	 * Allocate and map the request queue and a region for external
 	 * DMA addressable command/status structures (22XX and later).
 	 */
 	len = ISP_QUEUE_SIZE(RQUEST_QUEUE_LEN(isp));
 	if (isp->isp_type >= ISP_HA_FC_2200)
 		len += (N_XCMDS * XCMD_SIZE);
 	if (bus_dma_tag_create(isp->isp_osinfo.dmat, QENTRY_LEN, slim,
 	    BUS_SPACE_MAXADDR_32BIT, BUS_SPACE_MAXADDR, NULL, NULL,
 	    len, 1, len, 0, busdma_lock_mutex, &isp->isp_lock,
 	    &isp->isp_osinfo.reqdmat)) {
 		isp_prt(isp, ISP_LOGERR, "cannot create request DMA tag");
 		goto bad;
 	}
 	if (bus_dmamem_alloc(isp->isp_osinfo.reqdmat, (void **)&base,
 	    BUS_DMA_COHERENT, &isp->isp_osinfo.reqmap) != 0) {
 		isp_prt(isp, ISP_LOGERR, "cannot allocate request DMA memory");
 		bus_dma_tag_destroy(isp->isp_osinfo.reqdmat);
 		goto bad;
 	}
 	isp->isp_rquest = base;
 	im.error = 0;
 	if (bus_dmamap_load(isp->isp_osinfo.reqdmat, isp->isp_osinfo.reqmap,
 	    base, len, imc, &im, 0) || im.error) {
 		isp_prt(isp, ISP_LOGERR, "error loading request DMA map %d", im.error);
 		goto bad;
 	}
 	isp_prt(isp, ISP_LOGDEBUG0, "request area @ 0x%jx/0x%jx",
 	    (uintmax_t)im.maddr, (uintmax_t)len);
 	isp->isp_rquest_dma = im.maddr;
 	base += ISP_QUEUE_SIZE(RQUEST_QUEUE_LEN(isp));
 	im.maddr += ISP_QUEUE_SIZE(RQUEST_QUEUE_LEN(isp));
 	if (isp->isp_type >= ISP_HA_FC_2200) {
 		isp->isp_osinfo.ecmd_dma = im.maddr;
 		isp->isp_osinfo.ecmd_free = (isp_ecmd_t *)base;
 		isp->isp_osinfo.ecmd_base = isp->isp_osinfo.ecmd_free;
 		for (ecmd = isp->isp_osinfo.ecmd_free;
 		    ecmd < &isp->isp_osinfo.ecmd_free[N_XCMDS]; ecmd++) {
 			if (ecmd == &isp->isp_osinfo.ecmd_free[N_XCMDS - 1])
 				ecmd->next = NULL;
 			else
 				ecmd->next = ecmd + 1;
 		}
 	}
 
 	/*
 	 * Allocate and map the result queue.
 	 */
 	len = ISP_QUEUE_SIZE(RESULT_QUEUE_LEN(isp));
 	if (bus_dma_tag_create(isp->isp_osinfo.dmat, QENTRY_LEN, slim,
 	    BUS_SPACE_MAXADDR_32BIT, BUS_SPACE_MAXADDR, NULL, NULL,
 	    len, 1, len, 0, busdma_lock_mutex, &isp->isp_lock,
 	    &isp->isp_osinfo.respdmat)) {
 		isp_prt(isp, ISP_LOGERR, "cannot create response DMA tag");
 		goto bad;
 	}
 	if (bus_dmamem_alloc(isp->isp_osinfo.respdmat, (void **)&base,
 	    BUS_DMA_COHERENT, &isp->isp_osinfo.respmap) != 0) {
 		isp_prt(isp, ISP_LOGERR, "cannot allocate response DMA memory");
 		bus_dma_tag_destroy(isp->isp_osinfo.respdmat);
 		goto bad;
 	}
 	isp->isp_result = base;
 	im.error = 0;
 	if (bus_dmamap_load(isp->isp_osinfo.respdmat, isp->isp_osinfo.respmap,
 	    base, len, imc, &im, 0) || im.error) {
 		isp_prt(isp, ISP_LOGERR, "error loading response DMA map %d", im.error);
 		goto bad;
 	}
 	isp_prt(isp, ISP_LOGDEBUG0, "response area @ 0x%jx/0x%jx",
 	    (uintmax_t)im.maddr, (uintmax_t)len);
 	isp->isp_result_dma = im.maddr;
 
 #ifdef	ISP_TARGET_MODE
 	/*
 	 * Allocate and map ATIO queue on 24xx with target mode.
 	 */
 	if (IS_24XX(isp)) {
 		len = ISP_QUEUE_SIZE(RESULT_QUEUE_LEN(isp));
 		if (bus_dma_tag_create(isp->isp_osinfo.dmat, QENTRY_LEN, slim,
 		    BUS_SPACE_MAXADDR_32BIT, BUS_SPACE_MAXADDR, NULL, NULL,
 		    len, 1, len, 0, busdma_lock_mutex, &isp->isp_lock,
 		    &isp->isp_osinfo.atiodmat)) {
 			isp_prt(isp, ISP_LOGERR, "cannot create ATIO DMA tag");
 			goto bad;
 		}
 		if (bus_dmamem_alloc(isp->isp_osinfo.atiodmat, (void **)&base,
 		    BUS_DMA_COHERENT, &isp->isp_osinfo.atiomap) != 0) {
 			isp_prt(isp, ISP_LOGERR, "cannot allocate ATIO DMA memory");
 			bus_dma_tag_destroy(isp->isp_osinfo.atiodmat);
 			goto bad;
 		}
 		isp->isp_atioq = base;
 		im.error = 0;
 		if (bus_dmamap_load(isp->isp_osinfo.atiodmat, isp->isp_osinfo.atiomap,
 		    base, len, imc, &im, 0) || im.error) {
 			isp_prt(isp, ISP_LOGERR, "error loading ATIO DMA map %d", im.error);
 			goto bad;
 		}
 		isp_prt(isp, ISP_LOGDEBUG0, "ATIO area @ 0x%jx/0x%jx",
 		    (uintmax_t)im.maddr, (uintmax_t)len);
 		isp->isp_atioq_dma = im.maddr;
 	}
 #endif
 
 	if (IS_FC(isp)) {
 		if (bus_dma_tag_create(isp->isp_osinfo.dmat, 64, slim,
 		    BUS_SPACE_MAXADDR, BUS_SPACE_MAXADDR, NULL, NULL,
 		    2*QENTRY_LEN, 1, 2*QENTRY_LEN, 0, busdma_lock_mutex,
 		    &isp->isp_lock, &isp->isp_osinfo.iocbdmat)) {
 			goto bad;
 		}
 		if (bus_dmamem_alloc(isp->isp_osinfo.iocbdmat,
 		    (void **)&base, BUS_DMA_COHERENT, &isp->isp_osinfo.iocbmap) != 0)
 			goto bad;
 		isp->isp_iocb = base;
 		im.error = 0;
 		if (bus_dmamap_load(isp->isp_osinfo.iocbdmat, isp->isp_osinfo.iocbmap,
 		    base, 2*QENTRY_LEN, imc, &im, 0) || im.error)
 			goto bad;
 		isp->isp_iocb_dma = im.maddr;
 
 		if (bus_dma_tag_create(isp->isp_osinfo.dmat, 64, slim,
 		    BUS_SPACE_MAXADDR, BUS_SPACE_MAXADDR, NULL, NULL,
 		    ISP_FC_SCRLEN, 1, ISP_FC_SCRLEN, 0, busdma_lock_mutex,
 		    &isp->isp_lock, &isp->isp_osinfo.scdmat))
 			goto bad;
 		for (cmap = 0; cmap < isp->isp_nchan; cmap++) {
 			struct isp_fc *fc = ISP_FC_PC(isp, cmap);
 			if (bus_dmamem_alloc(isp->isp_osinfo.scdmat,
 			    (void **)&base, BUS_DMA_COHERENT, &fc->scmap) != 0)
 				goto bad;
 			FCPARAM(isp, cmap)->isp_scratch = base;
 			im.error = 0;
 			if (bus_dmamap_load(isp->isp_osinfo.scdmat, fc->scmap,
 			    base, ISP_FC_SCRLEN, imc, &im, 0) || im.error) {
 				bus_dmamem_free(isp->isp_osinfo.scdmat,
 				    base, fc->scmap);
 				FCPARAM(isp, cmap)->isp_scratch = NULL;
 				goto bad;
 			}
 			FCPARAM(isp, cmap)->isp_scdma = im.maddr;
 			if (!IS_2100(isp)) {
 				for (i = 0; i < INITIAL_NEXUS_COUNT; i++) {
 					struct isp_nexus *n = malloc(sizeof (struct isp_nexus), M_DEVBUF, M_NOWAIT | M_ZERO);
 					if (n == NULL) {
 						while (fc->nexus_free_list) {
 							n = fc->nexus_free_list;
 							fc->nexus_free_list = n->next;
 							free(n, M_DEVBUF);
 						}
 						goto bad;
 					}
 					n->next = fc->nexus_free_list;
 					fc->nexus_free_list = n;
 				}
 			}
 		}
 	}
 
 	if (isp->isp_maxcmds == 0) {
 		ISP_LOCK(isp);
 		return (0);
 	}
 
 gotmaxcmds:
 	len = isp->isp_maxcmds * sizeof (struct isp_pcmd);
 	isp->isp_osinfo.pcmd_pool = (struct isp_pcmd *)
 	    malloc(len, M_DEVBUF, M_WAITOK | M_ZERO);
 	for (i = 0; i < isp->isp_maxcmds; i++) {
 		struct isp_pcmd *pcmd = &isp->isp_osinfo.pcmd_pool[i];
 		error = bus_dmamap_create(isp->isp_osinfo.dmat, 0, &pcmd->dmap);
 		if (error) {
 			isp_prt(isp, ISP_LOGERR, "error %d creating per-cmd DMA maps", error);
 			while (--i >= 0) {
 				bus_dmamap_destroy(isp->isp_osinfo.dmat,
 				    isp->isp_osinfo.pcmd_pool[i].dmap);
 			}
 			goto bad;
 		}
 		callout_init_mtx(&pcmd->wdog, &isp->isp_lock, 0);
 		if (i == isp->isp_maxcmds-1)
 			pcmd->next = NULL;
 		else
 			pcmd->next = &isp->isp_osinfo.pcmd_pool[i+1];
 	}
 	isp->isp_osinfo.pcmd_free = &isp->isp_osinfo.pcmd_pool[0];
 
 	len = sizeof (isp_hdl_t) * isp->isp_maxcmds;
 	isp->isp_xflist = (isp_hdl_t *) malloc(len, M_DEVBUF, M_WAITOK | M_ZERO);
 	for (len = 0; len < isp->isp_maxcmds - 1; len++)
 		isp->isp_xflist[len].cmd = &isp->isp_xflist[len+1];
 	isp->isp_xffree = isp->isp_xflist;
 
 	ISP_LOCK(isp);
 	return (0);
 
 bad:
 	isp_pci_mbxdmafree(isp);
 	ISP_LOCK(isp);
 	return (1);
 }
 
 static void
 isp_pci_mbxdmafree(ispsoftc_t *isp)
 {
 	int i;
 
 	if (isp->isp_xflist != NULL) {
 		free(isp->isp_xflist, M_DEVBUF);
 		isp->isp_xflist = NULL;
 	}
 	if (isp->isp_osinfo.pcmd_pool != NULL) {
 		for (i = 0; i < isp->isp_maxcmds; i++) {
 			bus_dmamap_destroy(isp->isp_osinfo.dmat,
 			    isp->isp_osinfo.pcmd_pool[i].dmap);
 		}
 		free(isp->isp_osinfo.pcmd_pool, M_DEVBUF);
 		isp->isp_osinfo.pcmd_pool = NULL;
 	}
 	if (IS_FC(isp)) {
 		for (i = 0; i < isp->isp_nchan; i++) {
 			struct isp_fc *fc = ISP_FC_PC(isp, i);
 			if (FCPARAM(isp, i)->isp_scdma != 0) {
 				bus_dmamap_unload(isp->isp_osinfo.scdmat,
 				    fc->scmap);
 				FCPARAM(isp, i)->isp_scdma = 0;
 			}
 			if (FCPARAM(isp, i)->isp_scratch != NULL) {
 				bus_dmamem_free(isp->isp_osinfo.scdmat,
 				    FCPARAM(isp, i)->isp_scratch, fc->scmap);
 				FCPARAM(isp, i)->isp_scratch = NULL;
 			}
 			while (fc->nexus_free_list) {
 				struct isp_nexus *n = fc->nexus_free_list;
 				fc->nexus_free_list = n->next;
 				free(n, M_DEVBUF);
 			}
 		}
 		if (isp->isp_iocb_dma != 0) {
 			bus_dma_tag_destroy(isp->isp_osinfo.scdmat);
 			bus_dmamap_unload(isp->isp_osinfo.iocbdmat,
 			    isp->isp_osinfo.iocbmap);
 			isp->isp_iocb_dma = 0;
 		}
 		if (isp->isp_iocb != NULL) {
 			bus_dmamem_free(isp->isp_osinfo.iocbdmat,
 			    isp->isp_iocb, isp->isp_osinfo.iocbmap);
 			bus_dma_tag_destroy(isp->isp_osinfo.iocbdmat);
 		}
 	}
 #ifdef	ISP_TARGET_MODE
 	if (IS_24XX(isp)) {
 		if (isp->isp_atioq_dma != 0) {
 			bus_dmamap_unload(isp->isp_osinfo.atiodmat,
 			    isp->isp_osinfo.atiomap);
 			isp->isp_atioq_dma = 0;
 		}
 		if (isp->isp_atioq != NULL) {
 			bus_dmamem_free(isp->isp_osinfo.atiodmat, isp->isp_atioq,
 			    isp->isp_osinfo.atiomap);
 			bus_dma_tag_destroy(isp->isp_osinfo.atiodmat);
 			isp->isp_atioq = NULL;
 		}
 	}
 #endif
 	if (isp->isp_result_dma != 0) {
 		bus_dmamap_unload(isp->isp_osinfo.respdmat,
 		    isp->isp_osinfo.respmap);
 		isp->isp_result_dma = 0;
 	}
 	if (isp->isp_result != NULL) {
 		bus_dmamem_free(isp->isp_osinfo.respdmat, isp->isp_result,
 		    isp->isp_osinfo.respmap);
 		bus_dma_tag_destroy(isp->isp_osinfo.respdmat);
 		isp->isp_result = NULL;
 	}
 	if (isp->isp_rquest_dma != 0) {
 		bus_dmamap_unload(isp->isp_osinfo.reqdmat,
 		    isp->isp_osinfo.reqmap);
 		isp->isp_rquest_dma = 0;
 	}
 	if (isp->isp_rquest != NULL) {
 		bus_dmamem_free(isp->isp_osinfo.reqdmat, isp->isp_rquest,
 		    isp->isp_osinfo.reqmap);
 		bus_dma_tag_destroy(isp->isp_osinfo.reqdmat);
 		isp->isp_rquest = NULL;
 	}
 }
 
 typedef struct {
 	ispsoftc_t *isp;
 	void *cmd_token;
 	void *rq;	/* original request */
 	int error;
 } mush_t;
 
 #define	MUSHERR_NOQENTRIES	-2
 
 static void
 dma2(void *arg, bus_dma_segment_t *dm_segs, int nseg, int error)
 {
 	mush_t *mp = (mush_t *) arg;
 	ispsoftc_t *isp= mp->isp;
 	struct ccb_scsiio *csio = mp->cmd_token;
 	isp_ddir_t ddir;
 	int sdir;
 
 	if (error) {
 		mp->error = error;
 		return;
 	}
 	if (nseg == 0) {
 		ddir = ISP_NOXFR;
 	} else {
 		if ((csio->ccb_h.flags & CAM_DIR_MASK) == CAM_DIR_IN) {
 			ddir = ISP_FROM_DEVICE;
 		} else {
 			ddir = ISP_TO_DEVICE;
 		}
 		if ((csio->ccb_h.func_code == XPT_CONT_TARGET_IO) ^
 		    ((csio->ccb_h.flags & CAM_DIR_MASK) == CAM_DIR_IN)) {
 			sdir = BUS_DMASYNC_PREREAD;
 		} else {
 			sdir = BUS_DMASYNC_PREWRITE;
 		}
 		bus_dmamap_sync(isp->isp_osinfo.dmat, PISP_PCMD(csio)->dmap,
 		    sdir);
 	}
 
 	error = isp_send_cmd(isp, mp->rq, dm_segs, nseg, XS_XFRLEN(csio),
 	    ddir, (ispds64_t *)csio->req_map);
 	switch (error) {
 	case CMD_EAGAIN:
 		mp->error = MUSHERR_NOQENTRIES;
 		break;
 	case CMD_QUEUED:
 		break;
 	default:
 		mp->error = EIO;
 		break;
 	}
 }
 
 static int
 isp_pci_dmasetup(ispsoftc_t *isp, struct ccb_scsiio *csio, void *ff)
 {
 	mush_t mush, *mp;
 	int error;
 
 	mp = &mush;
 	mp->isp = isp;
 	mp->cmd_token = csio;
 	mp->rq = ff;
 	mp->error = 0;
 
 	error = bus_dmamap_load_ccb(isp->isp_osinfo.dmat, PISP_PCMD(csio)->dmap,
 	    (union ccb *)csio, dma2, mp, 0);
 	if (error == EINPROGRESS) {
 		bus_dmamap_unload(isp->isp_osinfo.dmat, PISP_PCMD(csio)->dmap);
 		mp->error = EINVAL;
 		isp_prt(isp, ISP_LOGERR, "deferred dma allocation not supported");
 	} else if (error && mp->error == 0) {
 #ifdef	DIAGNOSTIC
 		isp_prt(isp, ISP_LOGERR, "error %d in dma mapping code", error);
 #endif
 		mp->error = error;
 	}
 	if (mp->error) {
 		int retval = CMD_COMPLETE;
 		if (mp->error == MUSHERR_NOQENTRIES) {
 			retval = CMD_EAGAIN;
 		} else if (mp->error == EFBIG) {
 			csio->ccb_h.status = CAM_REQ_TOO_BIG;
 		} else if (mp->error == EINVAL) {
 			csio->ccb_h.status = CAM_REQ_INVALID;
 		} else {
 			csio->ccb_h.status = CAM_UNREC_HBA_ERROR;
 		}
 		return (retval);
 	}
 	return (CMD_QUEUED);
 }
 
 static int
 isp_pci_irqsetup(ispsoftc_t *isp)
 {
 	device_t dev = isp->isp_osinfo.dev;
 	struct isp_pcisoftc *pcs = device_get_softc(dev);
 	driver_intr_t *f;
 	int i, max_irq;
 
 	/* Allocate IRQs only once. */
 	if (isp->isp_nirq > 0)
 		return (0);
 
 	ISP_UNLOCK(isp);
 	if (ISP_CAP_MSIX(isp)) {
 		max_irq = IS_26XX(isp) ? 3 : (IS_25XX(isp) ? 2 : 0);
 		resource_int_value(device_get_name(dev),
 		    device_get_unit(dev), "msix", &max_irq);
 		max_irq = imin(ISP_MAX_IRQS, max_irq);
 		pcs->msicount = imin(pci_msix_count(dev), max_irq);
 		if (pcs->msicount > 0 &&
 		    pci_alloc_msix(dev, &pcs->msicount) != 0)
 			pcs->msicount = 0;
 	}
 	if (pcs->msicount == 0) {
 		max_irq = 1;
 		resource_int_value(device_get_name(dev),
 		    device_get_unit(dev), "msi", &max_irq);
 		max_irq = imin(1, max_irq);
 		pcs->msicount = imin(pci_msi_count(dev), max_irq);
 		if (pcs->msicount > 0 &&
 		    pci_alloc_msi(dev, &pcs->msicount) != 0)
 			pcs->msicount = 0;
 	}
 	for (i = 0; i < MAX(1, pcs->msicount); i++) {
 		pcs->irq[i].iqd = i + (pcs->msicount > 0);
 		pcs->irq[i].irq = bus_alloc_resource_any(dev, SYS_RES_IRQ,
 		    &pcs->irq[i].iqd, RF_ACTIVE | RF_SHAREABLE);
 		if (pcs->irq[i].irq == NULL) {
 			device_printf(dev, "could not allocate interrupt\n");
 			break;
 		}
 		if (i == 0)
 			f = isp_platform_intr;
 		else if (i == 1)
 			f = isp_platform_intr_resp;
 		else
 			f = isp_platform_intr_atio;
 		if (bus_setup_intr(dev, pcs->irq[i].irq, ISP_IFLAGS, NULL,
 		    f, isp, &pcs->irq[i].ih)) {
 			device_printf(dev, "could not setup interrupt\n");
 			(void) bus_release_resource(dev, SYS_RES_IRQ,
 			    pcs->irq[i].iqd, pcs->irq[i].irq);
 			break;
 		}
 		if (pcs->msicount > 1) {
 			bus_describe_intr(dev, pcs->irq[i].irq, pcs->irq[i].ih,
 			    "%d", i);
 		}
 		isp->isp_nirq = i + 1;
 	}
 	ISP_LOCK(isp);
 
 	return (isp->isp_nirq == 0);
 }
 
 static void
 isp_pci_dumpregs(ispsoftc_t *isp, const char *msg)
 {
 	struct isp_pcisoftc *pcs = (struct isp_pcisoftc *)isp;
 	if (msg)
 		printf("%s: %s\n", device_get_nameunit(isp->isp_dev), msg);
 	else
 		printf("%s:\n", device_get_nameunit(isp->isp_dev));
 	if (IS_SCSI(isp))
 		printf("    biu_conf1=%x", ISP_READ(isp, BIU_CONF1));
 	else
 		printf("    biu_csr=%x", ISP_READ(isp, BIU2100_CSR));
 	printf(" biu_icr=%x biu_isr=%x biu_sema=%x ", ISP_READ(isp, BIU_ICR),
 	    ISP_READ(isp, BIU_ISR), ISP_READ(isp, BIU_SEMA));
 	printf("risc_hccr=%x\n", ISP_READ(isp, HCCR));
 
 
 	if (IS_SCSI(isp)) {
 		ISP_WRITE(isp, HCCR, HCCR_CMD_PAUSE);
 		printf("    cdma_conf=%x cdma_sts=%x cdma_fifostat=%x\n",
 			ISP_READ(isp, CDMA_CONF), ISP_READ(isp, CDMA_STATUS),
 			ISP_READ(isp, CDMA_FIFO_STS));
 		printf("    ddma_conf=%x ddma_sts=%x ddma_fifostat=%x\n",
 			ISP_READ(isp, DDMA_CONF), ISP_READ(isp, DDMA_STATUS),
 			ISP_READ(isp, DDMA_FIFO_STS));
 		printf("    sxp_int=%x sxp_gross=%x sxp(scsi_ctrl)=%x\n",
 			ISP_READ(isp, SXP_INTERRUPT),
 			ISP_READ(isp, SXP_GROSS_ERR),
 			ISP_READ(isp, SXP_PINS_CTRL));
 		ISP_WRITE(isp, HCCR, HCCR_CMD_RELEASE);
 	}
 	printf("    mbox regs: %x %x %x %x %x\n",
 	    ISP_READ(isp, OUTMAILBOX0), ISP_READ(isp, OUTMAILBOX1),
 	    ISP_READ(isp, OUTMAILBOX2), ISP_READ(isp, OUTMAILBOX3),
 	    ISP_READ(isp, OUTMAILBOX4));
 	printf("    PCI Status Command/Status=%x\n",
 	    pci_read_config(pcs->pci_dev, PCIR_COMMAND, 1));
 }
Index: projects/runtime-coverage-v2/sys/dev/xdma/xdma.h
===================================================================
--- projects/runtime-coverage-v2/sys/dev/xdma/xdma.h	(revision 346924)
+++ projects/runtime-coverage-v2/sys/dev/xdma/xdma.h	(revision 346925)
@@ -1,264 +1,264 @@
 /*-
  * Copyright (c) 2016-2018 Ruslan Bukin <br@bsdpad.com>
  * All rights reserved.
  *
  * This software was developed by SRI International and the University of
  * Cambridge Computer Laboratory under DARPA/AFRL contract FA8750-10-C-0237
  * ("CTSRD"), as part of the DARPA CRASH research programme.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  * $FreeBSD$
  */
 
 #ifndef _DEV_XDMA_XDMA_H_
 #define _DEV_XDMA_XDMA_H_
 
 #include <sys/proc.h>
 
 enum xdma_direction {
 	XDMA_MEM_TO_MEM,
 	XDMA_MEM_TO_DEV,
 	XDMA_DEV_TO_MEM,
 	XDMA_DEV_TO_DEV,
 };
 
 enum xdma_operation_type {
 	XDMA_MEMCPY,
 	XDMA_CYCLIC,
 	XDMA_FIFO,
 	XDMA_SG,
 };
 
 enum xdma_request_type {
 	XR_TYPE_PHYS,
 	XR_TYPE_VIRT,
 	XR_TYPE_MBUF,
 	XR_TYPE_BIO,
 };
 
 enum xdma_command {
 	XDMA_CMD_BEGIN,
 	XDMA_CMD_PAUSE,
 	XDMA_CMD_TERMINATE,
 };
 
 struct xdma_transfer_status {
 	uint32_t	transferred;
 	int		error;
 };
 
 typedef struct xdma_transfer_status xdma_transfer_status_t;
 
 struct xdma_controller {
 	device_t dev;		/* DMA consumer device_t. */
 	device_t dma_dev;	/* A real DMA device_t. */
 	void *data;		/* OFW MD part. */
 
 	/* List of virtual channels allocated. */
 	TAILQ_HEAD(xdma_channel_list, xdma_channel)	channels;
 };
 
 typedef struct xdma_controller xdma_controller_t;
 
 struct xchan_buf {
 	bus_dmamap_t			map;
 	uint32_t			nsegs;
 	uint32_t			nsegs_left;
-	void				*cbuf;
 };
 
 struct xdma_request {
 	struct mbuf			*m;
 	struct bio			*bp;
 	enum xdma_operation_type	operation;
 	enum xdma_request_type		req_type;
 	enum xdma_direction		direction;
 	bus_addr_t			src_addr;
 	bus_addr_t			dst_addr;
 	uint8_t				src_width;
 	uint8_t				dst_width;
 	bus_size_t			block_num;
 	bus_size_t			block_len;
 	xdma_transfer_status_t		status;
 	void				*user;
 	TAILQ_ENTRY(xdma_request)	xr_next;
 	struct xchan_buf		buf;
 };
 
 struct xdma_sglist {
 	bus_addr_t			src_addr;
 	bus_addr_t			dst_addr;
 	size_t				len;
 	uint8_t				src_width;
 	uint8_t				dst_width;
 	enum xdma_direction		direction;
 	bool				first;
 	bool				last;
 };
 
 struct xdma_channel {
 	xdma_controller_t		*xdma;
 
 	uint32_t			flags;
 #define	XCHAN_BUFS_ALLOCATED		(1 << 0)
 #define	XCHAN_SGLIST_ALLOCATED		(1 << 1)
 #define	XCHAN_CONFIGURED		(1 << 2)
 #define	XCHAN_TYPE_CYCLIC		(1 << 3)
 #define	XCHAN_TYPE_MEMCPY		(1 << 4)
 #define	XCHAN_TYPE_FIFO			(1 << 5)
 #define	XCHAN_TYPE_SG			(1 << 6)
 
 	uint32_t			caps;
 #define	XCHAN_CAP_BUSDMA		(1 << 0)
-#define	XCHAN_CAP_BUSDMA_NOSEG		(1 << 1)
+#define	XCHAN_CAP_NOSEG			(1 << 1)
+#define	XCHAN_CAP_NOBUFS		(1 << 2)
 
 	/* A real hardware driver channel. */
 	void				*chan;
 
 	/* Interrupt handlers. */
 	TAILQ_HEAD(, xdma_intr_handler)	ie_handlers;
 	TAILQ_ENTRY(xdma_channel)	xchan_next;
 
 	struct sx			sx_lock;
 	struct sx			sx_qin_lock;
 	struct sx			sx_qout_lock;
 	struct sx			sx_bank_lock;
 	struct sx			sx_proc_lock;
 
 	/* Request queue. */
 	bus_dma_tag_t			dma_tag_bufs;
 	struct xdma_request		*xr_mem;
 	uint32_t			xr_num;
 
 	/* Bus dma tag options. */
 	bus_size_t			maxsegsize;
 	bus_size_t			maxnsegs;
 	bus_size_t			alignment;
 	bus_addr_t			boundary;
 	bus_addr_t			lowaddr;
 	bus_addr_t			highaddr;
 
 	struct xdma_sglist		*sg;
 
 	TAILQ_HEAD(, xdma_request)	bank;
 	TAILQ_HEAD(, xdma_request)	queue_in;
 	TAILQ_HEAD(, xdma_request)	queue_out;
 	TAILQ_HEAD(, xdma_request)	processing;
 };
 
 typedef struct xdma_channel xdma_channel_t;
 
 struct xdma_intr_handler {
 	int		(*cb)(void *cb_user, xdma_transfer_status_t *status);
 	void		*cb_user;
 	TAILQ_ENTRY(xdma_intr_handler)	ih_next;
 };
 
 static MALLOC_DEFINE(M_XDMA, "xdma", "xDMA framework");
 
 #define	XCHAN_LOCK(xchan)		sx_xlock(&(xchan)->sx_lock)
 #define	XCHAN_UNLOCK(xchan)		sx_xunlock(&(xchan)->sx_lock)
 #define	XCHAN_ASSERT_LOCKED(xchan)	\
     sx_assert(&(xchan)->sx_lock, SX_XLOCKED)
 
 #define	QUEUE_IN_LOCK(xchan)		sx_xlock(&(xchan)->sx_qin_lock)
 #define	QUEUE_IN_UNLOCK(xchan)		sx_xunlock(&(xchan)->sx_qin_lock)
 #define	QUEUE_IN_ASSERT_LOCKED(xchan)	\
     sx_assert(&(xchan)->sx_qin_lock, SX_XLOCKED)
 
 #define	QUEUE_OUT_LOCK(xchan)		sx_xlock(&(xchan)->sx_qout_lock)
 #define	QUEUE_OUT_UNLOCK(xchan)		sx_xunlock(&(xchan)->sx_qout_lock)
 #define	QUEUE_OUT_ASSERT_LOCKED(xchan)	\
     sx_assert(&(xchan)->sx_qout_lock, SX_XLOCKED)
 
 #define	QUEUE_BANK_LOCK(xchan)		sx_xlock(&(xchan)->sx_bank_lock)
 #define	QUEUE_BANK_UNLOCK(xchan)	sx_xunlock(&(xchan)->sx_bank_lock)
 #define	QUEUE_BANK_ASSERT_LOCKED(xchan)	\
     sx_assert(&(xchan)->sx_bank_lock, SX_XLOCKED)
 
 #define	QUEUE_PROC_LOCK(xchan)		sx_xlock(&(xchan)->sx_proc_lock)
 #define	QUEUE_PROC_UNLOCK(xchan)	sx_xunlock(&(xchan)->sx_proc_lock)
 #define	QUEUE_PROC_ASSERT_LOCKED(xchan)	\
     sx_assert(&(xchan)->sx_proc_lock, SX_XLOCKED)
 
 #define	XDMA_SGLIST_MAXLEN	2048
 #define	XDMA_MAX_SEG		128
 
 /* xDMA controller ops */
 xdma_controller_t *xdma_ofw_get(device_t dev, const char *prop);
 int xdma_put(xdma_controller_t *xdma);
 
 /* xDMA channel ops */
 xdma_channel_t * xdma_channel_alloc(xdma_controller_t *, uint32_t caps);
 int xdma_channel_free(xdma_channel_t *);
 int xdma_request(xdma_channel_t *xchan, struct xdma_request *r);
 
 /* SG interface */
 int xdma_prep_sg(xdma_channel_t *, uint32_t,
     bus_size_t, bus_size_t, bus_size_t, bus_addr_t, bus_addr_t, bus_addr_t);
 void xdma_channel_free_sg(xdma_channel_t *xchan);
 int xdma_queue_submit_sg(xdma_channel_t *xchan);
 void xchan_seg_done(xdma_channel_t *xchan, xdma_transfer_status_t *);
 
 /* Queue operations */
 int xdma_dequeue_mbuf(xdma_channel_t *xchan, struct mbuf **m,
     xdma_transfer_status_t *);
 int xdma_enqueue_mbuf(xdma_channel_t *xchan, struct mbuf **m, uintptr_t addr,
     uint8_t, uint8_t, enum xdma_direction dir);
 int xdma_dequeue_bio(xdma_channel_t *xchan, struct bio **bp,
     xdma_transfer_status_t *status);
 int xdma_enqueue_bio(xdma_channel_t *xchan, struct bio **bp, bus_addr_t addr,
     uint8_t, uint8_t, enum xdma_direction dir);
 int xdma_dequeue(xdma_channel_t *xchan, void **user,
     xdma_transfer_status_t *status);
 int xdma_enqueue(xdma_channel_t *xchan, uintptr_t src, uintptr_t dst,
     uint8_t, uint8_t, bus_size_t, enum xdma_direction dir, void *);
 int xdma_queue_submit(xdma_channel_t *xchan);
 
 /* Mbuf operations */
 uint32_t xdma_mbuf_defrag(xdma_channel_t *xchan, struct xdma_request *xr);
 uint32_t xdma_mbuf_chain_count(struct mbuf *m0);
 
 /* Channel Control */
 int xdma_control(xdma_channel_t *xchan, enum xdma_command cmd);
 
 /* Interrupt callback */
 int xdma_setup_intr(xdma_channel_t *xchan, int (*cb)(void *,
     xdma_transfer_status_t *), void *arg, void **);
 int xdma_teardown_intr(xdma_channel_t *xchan, struct xdma_intr_handler *ih);
 int xdma_teardown_all_intr(xdma_channel_t *xchan);
 void xdma_callback(struct xdma_channel *xchan, xdma_transfer_status_t *status);
 
 /* Sglist */
 int xchan_sglist_alloc(xdma_channel_t *xchan);
 void xchan_sglist_free(xdma_channel_t *xchan);
 int xdma_sglist_add(struct xdma_sglist *sg, struct bus_dma_segment *seg,
     uint32_t nsegs, struct xdma_request *xr);
 
 /* Requests bank */
 void xchan_bank_init(xdma_channel_t *xchan);
 int xchan_bank_free(xdma_channel_t *xchan);
 struct xdma_request * xchan_bank_get(xdma_channel_t *xchan);
 int xchan_bank_put(xdma_channel_t *xchan, struct xdma_request *xr);
 
 #endif /* !_DEV_XDMA_XDMA_H_ */
Index: projects/runtime-coverage-v2/sys/dev/xdma/xdma_mbuf.c
===================================================================
--- projects/runtime-coverage-v2/sys/dev/xdma/xdma_mbuf.c	(revision 346924)
+++ projects/runtime-coverage-v2/sys/dev/xdma/xdma_mbuf.c	(revision 346925)
@@ -1,154 +1,150 @@
 /*-
  * Copyright (c) 2017-2018 Ruslan Bukin <br@bsdpad.com>
  * All rights reserved.
  *
  * This software was developed by SRI International and the University of
  * Cambridge Computer Laboratory under DARPA/AFRL contract FA8750-10-C-0237
  * ("CTSRD"), as part of the DARPA CRASH research programme.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include "opt_platform.h"
 #include <sys/param.h>
 #include <sys/conf.h>
 #include <sys/bus.h>
 #include <sys/kernel.h>
 #include <sys/sx.h>
 #include <sys/mbuf.h>
 
 #include <machine/bus.h>
 
 #ifdef FDT
 #include <dev/fdt/fdt_common.h>
 #include <dev/ofw/ofw_bus.h>
 #include <dev/ofw/ofw_bus_subr.h>
 #endif
 
 #include <dev/xdma/xdma.h>
 
 int
 xdma_dequeue_mbuf(xdma_channel_t *xchan, struct mbuf **mp,
     xdma_transfer_status_t *status)
 {
 	struct xdma_request *xr;
 	struct xdma_request *xr_tmp;
 
 	QUEUE_OUT_LOCK(xchan);
 	TAILQ_FOREACH_SAFE(xr, &xchan->queue_out, xr_next, xr_tmp) {
 		TAILQ_REMOVE(&xchan->queue_out, xr, xr_next);
 		break;
 	}
 	QUEUE_OUT_UNLOCK(xchan);
 
 	if (xr == NULL)
 		return (-1);
 
 	*mp = xr->m;
 	status->error = xr->status.error;
 	status->transferred = xr->status.transferred;
 
 	xchan_bank_put(xchan, xr);
 
 	return (0);
 }
 
 int
 xdma_enqueue_mbuf(xdma_channel_t *xchan, struct mbuf **mp,
     uintptr_t addr, uint8_t src_width, uint8_t dst_width,
     enum xdma_direction dir)
 {
 	struct xdma_request *xr;
 	xdma_controller_t *xdma;
 
 	xdma = xchan->xdma;
 
 	xr = xchan_bank_get(xchan);
 	if (xr == NULL)
 		return (-1); /* No space is available yet. */
 
 	xr->direction = dir;
 	xr->m = *mp;
 	xr->req_type = XR_TYPE_MBUF;
 	if (dir == XDMA_MEM_TO_DEV) {
 		xr->dst_addr = addr;
 		xr->src_addr = 0;
 	} else {
 		xr->src_addr = addr;
 		xr->dst_addr = 0;
 	}
 	xr->src_width = src_width;
 	xr->dst_width = dst_width;
 
 	QUEUE_IN_LOCK(xchan);
 	TAILQ_INSERT_TAIL(&xchan->queue_in, xr, xr_next);
 	QUEUE_IN_UNLOCK(xchan);
 
 	return (0);
 }
 
 uint32_t
 xdma_mbuf_chain_count(struct mbuf *m0)
 {
 	struct mbuf *m;
 	uint32_t c;
 
 	c = 0;
 
 	for (m = m0; m != NULL; m = m->m_next)
 		c++;
 
 	return (c);
 }
 
 uint32_t
 xdma_mbuf_defrag(xdma_channel_t *xchan, struct xdma_request *xr)
 {
 	xdma_controller_t *xdma;
 	struct mbuf *m;
 	uint32_t c;
 
 	xdma = xchan->xdma;
 
 	c = xdma_mbuf_chain_count(xr->m);
 	if (c == 1)
 		return (c); /* Nothing to do. */
 
-	if (xchan->caps & XCHAN_CAP_BUSDMA) {
-		if ((xchan->caps & XCHAN_CAP_BUSDMA_NOSEG) || \
-		    (c > xchan->maxnsegs)) {
-			if ((m = m_defrag(xr->m, M_NOWAIT)) == NULL) {
-				device_printf(xdma->dma_dev,
-				    "%s: Can't defrag mbuf\n",
-				    __func__);
-				return (c);
-			}
-			xr->m = m;
-			c = 1;
-		}
+	if ((m = m_defrag(xr->m, M_NOWAIT)) == NULL) {
+		device_printf(xdma->dma_dev,
+		    "%s: Can't defrag mbuf\n",
+		    __func__);
+		return (c);
 	}
+
+	xr->m = m;
+	c = 1;
 
 	return (c);
 }
Index: projects/runtime-coverage-v2/sys/dev/xdma/xdma_sg.c
===================================================================
--- projects/runtime-coverage-v2/sys/dev/xdma/xdma_sg.c	(revision 346924)
+++ projects/runtime-coverage-v2/sys/dev/xdma/xdma_sg.c	(revision 346925)
@@ -1,594 +1,586 @@
 /*-
  * Copyright (c) 2018 Ruslan Bukin <br@bsdpad.com>
  * All rights reserved.
  *
  * This software was developed by SRI International and the University of
  * Cambridge Computer Laboratory under DARPA/AFRL contract FA8750-10-C-0237
  * ("CTSRD"), as part of the DARPA CRASH research programme.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include "opt_platform.h"
 #include <sys/param.h>
 #include <sys/conf.h>
 #include <sys/bus.h>
 #include <sys/kernel.h>
 #include <sys/malloc.h>
 #include <sys/mbuf.h>
 #include <sys/sx.h>
 
 #include <machine/bus.h>
 
 #ifdef FDT
 #include <dev/fdt/fdt_common.h>
 #include <dev/ofw/ofw_bus.h>
 #include <dev/ofw/ofw_bus_subr.h>
 #endif
 
 #include <dev/xdma/xdma.h>
 
 #include <xdma_if.h>
 
 struct seg_load_request {
 	struct bus_dma_segment *seg;
 	uint32_t nsegs;
 	uint32_t error;
 };
 
 static int
 _xchan_bufs_alloc(xdma_channel_t *xchan)
 {
 	xdma_controller_t *xdma;
 	struct xdma_request *xr;
 	int i;
 
 	xdma = xchan->xdma;
 
 	for (i = 0; i < xchan->xr_num; i++) {
 		xr = &xchan->xr_mem[i];
-		xr->buf.cbuf = contigmalloc(xchan->maxsegsize,
-		    M_XDMA, 0, 0, ~0, PAGE_SIZE, 0);
-		if (xr->buf.cbuf == NULL) {
-			device_printf(xdma->dev,
-			    "%s: Can't allocate contiguous kernel"
-			    " physical memory\n", __func__);
-			return (-1);
-		}
+		/* TODO: bounce buffer */
 	}
 
 	return (0);
 }
 
 static int
 _xchan_bufs_alloc_busdma(xdma_channel_t *xchan)
 {
 	xdma_controller_t *xdma;
 	struct xdma_request *xr;
 	int err;
 	int i;
 
 	xdma = xchan->xdma;
 
 	/* Create bus_dma tag */
 	err = bus_dma_tag_create(
 	    bus_get_dma_tag(xdma->dev),	/* Parent tag. */
 	    xchan->alignment,		/* alignment */
 	    xchan->boundary,		/* boundary */
 	    xchan->lowaddr,		/* lowaddr */
 	    xchan->highaddr,		/* highaddr */
 	    NULL, NULL,			/* filter, filterarg */
 	    xchan->maxsegsize * xchan->maxnsegs, /* maxsize */
 	    xchan->maxnsegs,		/* nsegments */
 	    xchan->maxsegsize,		/* maxsegsize */
 	    0,				/* flags */
 	    NULL, NULL,			/* lockfunc, lockarg */
 	    &xchan->dma_tag_bufs);
 	if (err != 0) {
 		device_printf(xdma->dev,
 		    "%s: Can't create bus_dma tag.\n", __func__);
 		return (-1);
 	}
 
 	for (i = 0; i < xchan->xr_num; i++) {
 		xr = &xchan->xr_mem[i];
 		err = bus_dmamap_create(xchan->dma_tag_bufs, 0,
 		    &xr->buf.map);
 		if (err != 0) {
 			device_printf(xdma->dev,
 			    "%s: Can't create buf DMA map.\n", __func__);
 
 			/* Cleanup. */
 			bus_dma_tag_destroy(xchan->dma_tag_bufs);
 
 			return (-1);
 		}
 	}
 
 	return (0);
 }
 
 static int
 xchan_bufs_alloc(xdma_channel_t *xchan)
 {
 	xdma_controller_t *xdma;
 	int ret;
 
 	xdma = xchan->xdma;
 
 	if (xdma == NULL) {
 		device_printf(xdma->dev,
 		    "%s: Channel was not allocated properly.\n", __func__);
 		return (-1);
 	}
 
 	if (xchan->caps & XCHAN_CAP_BUSDMA)
 		ret = _xchan_bufs_alloc_busdma(xchan);
 	else
 		ret = _xchan_bufs_alloc(xchan);
 	if (ret != 0) {
 		device_printf(xdma->dev,
 		    "%s: Can't allocate bufs.\n", __func__);
 		return (-1);
 	}
 
 	xchan->flags |= XCHAN_BUFS_ALLOCATED;
 
 	return (0);
 }
 
 static int
 xchan_bufs_free(xdma_channel_t *xchan)
 {
 	struct xdma_request *xr;
 	struct xchan_buf *b;
 	int i;
 
 	if ((xchan->flags & XCHAN_BUFS_ALLOCATED) == 0)
 		return (-1);
 
 	if (xchan->caps & XCHAN_CAP_BUSDMA) {
 		for (i = 0; i < xchan->xr_num; i++) {
 			xr = &xchan->xr_mem[i];
 			b = &xr->buf;
 			bus_dmamap_destroy(xchan->dma_tag_bufs, b->map);
 		}
 		bus_dma_tag_destroy(xchan->dma_tag_bufs);
 	} else {
 		for (i = 0; i < xchan->xr_num; i++) {
 			xr = &xchan->xr_mem[i];
-			contigfree(xr->buf.cbuf, xchan->maxsegsize, M_XDMA);
+			/* TODO: bounce buffer */
 		}
 	}
 
 	xchan->flags &= ~XCHAN_BUFS_ALLOCATED;
 
 	return (0);
 }
 
 void
 xdma_channel_free_sg(xdma_channel_t *xchan)
 {
 
 	xchan_bufs_free(xchan);
 	xchan_sglist_free(xchan);
 	xchan_bank_free(xchan);
 }
 
 /*
  * Prepare xchan for a scatter-gather transfer.
  * xr_num - xdma requests queue size,
  * maxsegsize - maximum allowed scatter-gather list element size in bytes
  */
 int
 xdma_prep_sg(xdma_channel_t *xchan, uint32_t xr_num,
     bus_size_t maxsegsize, bus_size_t maxnsegs,
     bus_size_t alignment, bus_addr_t boundary,
     bus_addr_t lowaddr, bus_addr_t highaddr)
 {
 	xdma_controller_t *xdma;
 	int ret;
 
 	xdma = xchan->xdma;
 
 	KASSERT(xdma != NULL, ("xdma is NULL"));
 
 	if (xchan->flags & XCHAN_CONFIGURED) {
 		device_printf(xdma->dev,
 		    "%s: Channel is already configured.\n", __func__);
 		return (-1);
 	}
 
 	xchan->xr_num = xr_num;
 	xchan->maxsegsize = maxsegsize;
 	xchan->maxnsegs = maxnsegs;
 	xchan->alignment = alignment;
 	xchan->boundary = boundary;
 	xchan->lowaddr = lowaddr;
 	xchan->highaddr = highaddr;
 
 	if (xchan->maxnsegs > XDMA_MAX_SEG) {
 		device_printf(xdma->dev, "%s: maxnsegs is too big\n",
 		    __func__);
 		return (-1);
 	}
 
 	xchan_bank_init(xchan);
 
 	/* Allocate sglist. */
 	ret = xchan_sglist_alloc(xchan);
 	if (ret != 0) {
 		device_printf(xdma->dev,
 		    "%s: Can't allocate sglist.\n", __func__);
 		return (-1);
 	}
 
-	/* Allocate bufs. */
-	ret = xchan_bufs_alloc(xchan);
-	if (ret != 0) {
-		device_printf(xdma->dev,
-		    "%s: Can't allocate bufs.\n", __func__);
+	/* Allocate buffers if required. */
+	if ((xchan->caps & XCHAN_CAP_NOBUFS) == 0) {
+		ret = xchan_bufs_alloc(xchan);
+		if (ret != 0) {
+			device_printf(xdma->dev,
+			    "%s: Can't allocate bufs.\n", __func__);
 
-		/* Cleanup */
-		xchan_sglist_free(xchan);
-		xchan_bank_free(xchan);
+			/* Cleanup */
+			xchan_sglist_free(xchan);
+			xchan_bank_free(xchan);
 
-		return (-1);
+			return (-1);
+		}
 	}
 
 	xchan->flags |= (XCHAN_CONFIGURED | XCHAN_TYPE_SG);
 
 	XCHAN_LOCK(xchan);
 	ret = XDMA_CHANNEL_PREP_SG(xdma->dma_dev, xchan);
 	if (ret != 0) {
 		device_printf(xdma->dev,
 		    "%s: Can't prepare SG transfer.\n", __func__);
 		XCHAN_UNLOCK(xchan);
 
 		return (-1);
 	}
 	XCHAN_UNLOCK(xchan);
 
 	return (0);
 }
 
 void
 xchan_seg_done(xdma_channel_t *xchan,
     struct xdma_transfer_status *st)
 {
 	struct xdma_request *xr;
 	xdma_controller_t *xdma;
 	struct xchan_buf *b;
 
 	xdma = xchan->xdma;
 
 	xr = TAILQ_FIRST(&xchan->processing);
 	if (xr == NULL)
 		panic("request not found\n");
 
 	b = &xr->buf;
 
 	atomic_subtract_int(&b->nsegs_left, 1);
 
 	if (b->nsegs_left == 0) {
 		if (xchan->caps & XCHAN_CAP_BUSDMA) {
 			if (xr->direction == XDMA_MEM_TO_DEV)
 				bus_dmamap_sync(xchan->dma_tag_bufs, b->map, 
 				    BUS_DMASYNC_POSTWRITE);
 			else
 				bus_dmamap_sync(xchan->dma_tag_bufs, b->map, 
 				    BUS_DMASYNC_POSTREAD);
 			bus_dmamap_unload(xchan->dma_tag_bufs, b->map);
 		}
 		xr->status.error = st->error;
 		xr->status.transferred = st->transferred;
 
 		QUEUE_PROC_LOCK(xchan);
 		TAILQ_REMOVE(&xchan->processing, xr, xr_next);
 		QUEUE_PROC_UNLOCK(xchan);
 
 		QUEUE_OUT_LOCK(xchan);
 		TAILQ_INSERT_TAIL(&xchan->queue_out, xr, xr_next);
 		QUEUE_OUT_UNLOCK(xchan);
 	}
 }
 
 static void
 xdma_dmamap_cb(void *arg, bus_dma_segment_t *segs, int nsegs, int error)
 {
 	struct seg_load_request *slr;
 	struct bus_dma_segment *seg;
 	int i;
 
 	slr = arg;
 	seg = slr->seg;
 
 	if (error != 0) {
 		slr->error = error;
 		return;
 	}
 
 	slr->nsegs = nsegs;
 
 	for (i = 0; i < nsegs; i++) {
 		seg[i].ds_addr = segs[i].ds_addr;
 		seg[i].ds_len = segs[i].ds_len;
 	}
 }
 
 static int
 _xdma_load_data_busdma(xdma_channel_t *xchan, struct xdma_request *xr,
     struct bus_dma_segment *seg)
 {
 	xdma_controller_t *xdma;
 	struct seg_load_request slr;
 	uint32_t nsegs;
 	void *addr;
 	int error;
 
 	xdma = xchan->xdma;
 
 	error = 0;
 	nsegs = 0;
 
 	switch (xr->req_type) {
 	case XR_TYPE_MBUF:
 		error = bus_dmamap_load_mbuf_sg(xchan->dma_tag_bufs,
 		    xr->buf.map, xr->m, seg, &nsegs, BUS_DMA_NOWAIT);
 		break;
 	case XR_TYPE_BIO:
 		slr.nsegs = 0;
 		slr.error = 0;
 		slr.seg = seg;
 		error = bus_dmamap_load_bio(xchan->dma_tag_bufs,
 		    xr->buf.map, xr->bp, xdma_dmamap_cb, &slr, BUS_DMA_NOWAIT);
 		if (slr.error != 0) {
 			device_printf(xdma->dma_dev,
 			    "%s: bus_dmamap_load failed, err %d\n",
 			    __func__, slr.error);
 			return (0);
 		}
 		nsegs = slr.nsegs;
 		break;
 	case XR_TYPE_VIRT:
 		switch (xr->direction) {
 		case XDMA_MEM_TO_DEV:
 			addr = (void *)xr->src_addr;
 			break;
 		case XDMA_DEV_TO_MEM:
 			addr = (void *)xr->dst_addr;
 			break;
 		default:
 			device_printf(xdma->dma_dev,
 			    "%s: Direction is not supported\n", __func__);
 			return (0);
 		}
 		slr.nsegs = 0;
 		slr.error = 0;
 		slr.seg = seg;
 		error = bus_dmamap_load(xchan->dma_tag_bufs, xr->buf.map,
 		    addr, (xr->block_len * xr->block_num),
 		    xdma_dmamap_cb, &slr, BUS_DMA_NOWAIT);
 		if (slr.error != 0) {
 			device_printf(xdma->dma_dev,
 			    "%s: bus_dmamap_load failed, err %d\n",
 			    __func__, slr.error);
 			return (0);
 		}
 		nsegs = slr.nsegs;
 		break;
 	default:
 		break;
 	}
 
 	if (error != 0) {
 		if (error == ENOMEM) {
 			/*
 			 * Out of memory. Try again later.
 			 * TODO: count errors.
 			 */
 		} else
 			device_printf(xdma->dma_dev,
 			    "%s: bus_dmamap_load failed with err %d\n",
 			    __func__, error);
 		return (0);
 	}
 
 	if (xr->direction == XDMA_MEM_TO_DEV)
 		bus_dmamap_sync(xchan->dma_tag_bufs, xr->buf.map,
 		    BUS_DMASYNC_PREWRITE);
 	else
 		bus_dmamap_sync(xchan->dma_tag_bufs, xr->buf.map,
 		    BUS_DMASYNC_PREREAD);
 
 	return (nsegs);
 }
 
 static int
 _xdma_load_data(xdma_channel_t *xchan, struct xdma_request *xr,
     struct bus_dma_segment *seg)
 {
 	xdma_controller_t *xdma;
 	struct mbuf *m;
 	uint32_t nsegs;
 
 	xdma = xchan->xdma;
 
 	m = xr->m;
 
 	nsegs = 1;
 
 	switch (xr->req_type) {
 	case XR_TYPE_MBUF:
-		if (xr->direction == XDMA_MEM_TO_DEV) {
-			m_copydata(m, 0, m->m_pkthdr.len, xr->buf.cbuf);
-			seg[0].ds_addr = (bus_addr_t)xr->buf.cbuf;
-			seg[0].ds_len = m->m_pkthdr.len;
-		} else {
-			seg[0].ds_addr = mtod(m, bus_addr_t);
-			seg[0].ds_len = m->m_pkthdr.len;
-		}
+		seg[0].ds_addr = mtod(m, bus_addr_t);
+		seg[0].ds_len = m->m_pkthdr.len;
 		break;
 	case XR_TYPE_BIO:
 	case XR_TYPE_VIRT:
 	default:
 		panic("implement me\n");
 	}
 
 	return (nsegs);
 }
 
 static int
 xdma_load_data(xdma_channel_t *xchan,
     struct xdma_request *xr, struct bus_dma_segment *seg)
 {
 	xdma_controller_t *xdma;
 	int error;
 	int nsegs;
 
 	xdma = xchan->xdma;
 
 	error = 0;
 	nsegs = 0;
 
 	if (xchan->caps & XCHAN_CAP_BUSDMA)
 		nsegs = _xdma_load_data_busdma(xchan, xr, seg);
 	else
 		nsegs = _xdma_load_data(xchan, xr, seg);
 	if (nsegs == 0)
 		return (0); /* Try again later. */
 
 	xr->buf.nsegs = nsegs;
 	xr->buf.nsegs_left = nsegs;
 
 	return (nsegs);
 }
 
 static int
 xdma_process(xdma_channel_t *xchan,
     struct xdma_sglist *sg)
 {
 	struct bus_dma_segment seg[XDMA_MAX_SEG];
 	struct xdma_request *xr;
 	struct xdma_request *xr_tmp;
 	xdma_controller_t *xdma;
 	uint32_t capacity;
 	uint32_t n;
 	uint32_t c;
 	int nsegs;
 	int ret;
 
 	XCHAN_ASSERT_LOCKED(xchan);
 
 	xdma = xchan->xdma;
 
 	n = 0;
 
 	ret = XDMA_CHANNEL_CAPACITY(xdma->dma_dev, xchan, &capacity);
 	if (ret != 0) {
 		device_printf(xdma->dev,
 		    "%s: Can't get DMA controller capacity.\n", __func__);
 		return (-1);
 	}
 
 	TAILQ_FOREACH_SAFE(xr, &xchan->queue_in, xr_next, xr_tmp) {
 		switch (xr->req_type) {
 		case XR_TYPE_MBUF:
-			c = xdma_mbuf_defrag(xchan, xr);
+			if ((xchan->caps & XCHAN_CAP_NOSEG) ||
+			    (c > xchan->maxnsegs))
+				c = xdma_mbuf_defrag(xchan, xr);
 			break;
 		case XR_TYPE_BIO:
 		case XR_TYPE_VIRT:
 		default:
 			c = 1;
 		}
 
 		if (capacity <= (c + n)) {
 			/*
 			 * No space yet available for the entire
 			 * request in the DMA engine.
 			 */
 			break;
 		}
 
 		if ((c + n + xchan->maxnsegs) >= XDMA_SGLIST_MAXLEN) {
 			/* Sglist is full. */
 			break;
 		}
 
 		nsegs = xdma_load_data(xchan, xr, seg);
 		if (nsegs == 0)
 			break;
 
 		xdma_sglist_add(&sg[n], seg, nsegs, xr);
 		n += nsegs;
 
 		QUEUE_IN_LOCK(xchan);
 		TAILQ_REMOVE(&xchan->queue_in, xr, xr_next);
 		QUEUE_IN_UNLOCK(xchan);
 
 		QUEUE_PROC_LOCK(xchan);
 		TAILQ_INSERT_TAIL(&xchan->processing, xr, xr_next);
 		QUEUE_PROC_UNLOCK(xchan);
 	}
 
 	return (n);
 }
 
 int
 xdma_queue_submit_sg(xdma_channel_t *xchan)
 {
 	struct xdma_sglist *sg;
 	xdma_controller_t *xdma;
 	uint32_t sg_n;
 	int ret;
 
 	xdma = xchan->xdma;
 	KASSERT(xdma != NULL, ("xdma is NULL"));
 
 	XCHAN_ASSERT_LOCKED(xchan);
 
 	sg = xchan->sg;
 
-	if ((xchan->flags & XCHAN_BUFS_ALLOCATED) == 0) {
+	if ((xchan->caps & XCHAN_CAP_NOBUFS) == 0 &&
+	   (xchan->flags & XCHAN_BUFS_ALLOCATED) == 0) {
 		device_printf(xdma->dev,
 		    "%s: Can't submit a transfer: no bufs\n",
 		    __func__);
 		return (-1);
 	}
 
 	sg_n = xdma_process(xchan, sg);
 	if (sg_n == 0)
 		return (0); /* Nothing to submit */
 
 	/* Now submit sglist to DMA engine driver. */
 	ret = XDMA_CHANNEL_SUBMIT_SG(xdma->dma_dev, xchan, sg, sg_n);
 	if (ret != 0) {
 		device_printf(xdma->dev,
 		    "%s: Can't submit an sglist.\n", __func__);
 		return (-1);
 	}
 
 	return (0);
 }
Index: projects/runtime-coverage-v2/sys/kern/vfs_bio.c
===================================================================
--- projects/runtime-coverage-v2/sys/kern/vfs_bio.c	(revision 346924)
+++ projects/runtime-coverage-v2/sys/kern/vfs_bio.c	(revision 346925)
@@ -1,5505 +1,5499 @@
 /*-
  * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
  *
  * Copyright (c) 2004 Poul-Henning Kamp
  * Copyright (c) 1994,1997 John S. Dyson
  * Copyright (c) 2013 The FreeBSD Foundation
  * All rights reserved.
  *
  * Portions of this software were developed by Konstantin Belousov
  * under sponsorship from the FreeBSD Foundation.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  */
 
 /*
  * this file contains a new buffer I/O scheme implementing a coherent
  * VM object and buffer cache scheme.  Pains have been taken to make
  * sure that the performance degradation associated with schemes such
  * as this is not realized.
  *
  * Author:  John S. Dyson
  * Significant help during the development and debugging phases
  * had been provided by David Greenman, also of the FreeBSD core team.
  *
  * see man buf(9) for more info.
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include <sys/param.h>
 #include <sys/systm.h>
 #include <sys/bio.h>
 #include <sys/bitset.h>
 #include <sys/conf.h>
 #include <sys/counter.h>
 #include <sys/buf.h>
 #include <sys/devicestat.h>
 #include <sys/eventhandler.h>
 #include <sys/fail.h>
 #include <sys/limits.h>
 #include <sys/lock.h>
 #include <sys/malloc.h>
 #include <sys/mount.h>
 #include <sys/mutex.h>
 #include <sys/kernel.h>
 #include <sys/kthread.h>
 #include <sys/proc.h>
 #include <sys/racct.h>
 #include <sys/resourcevar.h>
 #include <sys/rwlock.h>
 #include <sys/smp.h>
 #include <sys/sysctl.h>
 #include <sys/sysproto.h>
 #include <sys/vmem.h>
 #include <sys/vmmeter.h>
 #include <sys/vnode.h>
 #include <sys/watchdog.h>
 #include <geom/geom.h>
 #include <vm/vm.h>
 #include <vm/vm_param.h>
 #include <vm/vm_kern.h>
 #include <vm/vm_object.h>
 #include <vm/vm_page.h>
 #include <vm/vm_pageout.h>
 #include <vm/vm_pager.h>
 #include <vm/vm_extern.h>
 #include <vm/vm_map.h>
 #include <vm/swap_pager.h>
 
 static MALLOC_DEFINE(M_BIOBUF, "biobuf", "BIO buffer");
 
 struct	bio_ops bioops;		/* I/O operation notification */
 
 struct	buf_ops buf_ops_bio = {
 	.bop_name	=	"buf_ops_bio",
 	.bop_write	=	bufwrite,
 	.bop_strategy	=	bufstrategy,
 	.bop_sync	=	bufsync,
 	.bop_bdflush	=	bufbdflush,
 };
 
 struct bufqueue {
 	struct mtx_padalign	bq_lock;
 	TAILQ_HEAD(, buf)	bq_queue;
 	uint8_t			bq_index;
 	uint16_t		bq_subqueue;
 	int			bq_len;
 } __aligned(CACHE_LINE_SIZE);
 
 #define	BQ_LOCKPTR(bq)		(&(bq)->bq_lock)
 #define	BQ_LOCK(bq)		mtx_lock(BQ_LOCKPTR((bq)))
 #define	BQ_UNLOCK(bq)		mtx_unlock(BQ_LOCKPTR((bq)))
 #define	BQ_ASSERT_LOCKED(bq)	mtx_assert(BQ_LOCKPTR((bq)), MA_OWNED)
 
 struct bufdomain {
 	struct bufqueue	bd_subq[MAXCPU + 1]; /* Per-cpu sub queues + global */
 	struct bufqueue bd_dirtyq;
 	struct bufqueue	*bd_cleanq;
 	struct mtx_padalign bd_run_lock;
 	/* Constants */
 	long		bd_maxbufspace;
 	long		bd_hibufspace;
 	long 		bd_lobufspace;
 	long 		bd_bufspacethresh;
 	int		bd_hifreebuffers;
 	int		bd_lofreebuffers;
 	int		bd_hidirtybuffers;
 	int		bd_lodirtybuffers;
 	int		bd_dirtybufthresh;
 	int		bd_lim;
 	/* atomics */
 	int		bd_wanted;
 	int __aligned(CACHE_LINE_SIZE)	bd_numdirtybuffers;
 	int __aligned(CACHE_LINE_SIZE)	bd_running;
 	long __aligned(CACHE_LINE_SIZE) bd_bufspace;
 	int __aligned(CACHE_LINE_SIZE)	bd_freebuffers;
 } __aligned(CACHE_LINE_SIZE);
 
 #define	BD_LOCKPTR(bd)		(&(bd)->bd_cleanq->bq_lock)
 #define	BD_LOCK(bd)		mtx_lock(BD_LOCKPTR((bd)))
 #define	BD_UNLOCK(bd)		mtx_unlock(BD_LOCKPTR((bd)))
 #define	BD_ASSERT_LOCKED(bd)	mtx_assert(BD_LOCKPTR((bd)), MA_OWNED)
 #define	BD_RUN_LOCKPTR(bd)	(&(bd)->bd_run_lock)
 #define	BD_RUN_LOCK(bd)		mtx_lock(BD_RUN_LOCKPTR((bd)))
 #define	BD_RUN_UNLOCK(bd)	mtx_unlock(BD_RUN_LOCKPTR((bd)))
 #define	BD_DOMAIN(bd)		(bd - bdomain)
 
 static struct buf *buf;		/* buffer header pool */
 extern struct buf *swbuf;	/* Swap buffer header pool. */
 caddr_t unmapped_buf;
 
 /* Used below and for softdep flushing threads in ufs/ffs/ffs_softdep.c */
 struct proc *bufdaemonproc;
 
 static int inmem(struct vnode *vp, daddr_t blkno);
 static void vm_hold_free_pages(struct buf *bp, int newbsize);
 static void vm_hold_load_pages(struct buf *bp, vm_offset_t from,
 		vm_offset_t to);
 static void vfs_page_set_valid(struct buf *bp, vm_ooffset_t off, vm_page_t m);
 static void vfs_page_set_validclean(struct buf *bp, vm_ooffset_t off,
 		vm_page_t m);
 static void vfs_clean_pages_dirty_buf(struct buf *bp);
 static void vfs_setdirty_locked_object(struct buf *bp);
 static void vfs_vmio_invalidate(struct buf *bp);
 static void vfs_vmio_truncate(struct buf *bp, int npages);
 static void vfs_vmio_extend(struct buf *bp, int npages, int size);
 static int vfs_bio_clcheck(struct vnode *vp, int size,
 		daddr_t lblkno, daddr_t blkno);
 static void breada(struct vnode *, daddr_t *, int *, int, struct ucred *, int,
 		void (*)(struct buf *));
 static int buf_flush(struct vnode *vp, struct bufdomain *, int);
 static int flushbufqueues(struct vnode *, struct bufdomain *, int, int);
 static void buf_daemon(void);
 static __inline void bd_wakeup(void);
 static int sysctl_runningspace(SYSCTL_HANDLER_ARGS);
 static void bufkva_reclaim(vmem_t *, int);
 static void bufkva_free(struct buf *);
 static int buf_import(void *, void **, int, int, int);
 static void buf_release(void *, void **, int);
 static void maxbcachebuf_adjust(void);
 static inline struct bufdomain *bufdomain(struct buf *);
 static void bq_remove(struct bufqueue *bq, struct buf *bp);
 static void bq_insert(struct bufqueue *bq, struct buf *bp, bool unlock);
 static int buf_recycle(struct bufdomain *, bool kva);
 static void bq_init(struct bufqueue *bq, int qindex, int cpu,
 	    const char *lockname);
 static void bd_init(struct bufdomain *bd);
 static int bd_flushall(struct bufdomain *bd);
 static int sysctl_bufdomain_long(SYSCTL_HANDLER_ARGS);
 static int sysctl_bufdomain_int(SYSCTL_HANDLER_ARGS);
 
 static int sysctl_bufspace(SYSCTL_HANDLER_ARGS);
 int vmiodirenable = TRUE;
 SYSCTL_INT(_vfs, OID_AUTO, vmiodirenable, CTLFLAG_RW, &vmiodirenable, 0,
     "Use the VM system for directory writes");
 long runningbufspace;
 SYSCTL_LONG(_vfs, OID_AUTO, runningbufspace, CTLFLAG_RD, &runningbufspace, 0,
     "Amount of presently outstanding async buffer io");
 SYSCTL_PROC(_vfs, OID_AUTO, bufspace, CTLTYPE_LONG|CTLFLAG_MPSAFE|CTLFLAG_RD,
     NULL, 0, sysctl_bufspace, "L", "Physical memory used for buffers");
 static counter_u64_t bufkvaspace;
 SYSCTL_COUNTER_U64(_vfs, OID_AUTO, bufkvaspace, CTLFLAG_RD, &bufkvaspace,
     "Kernel virtual memory used for buffers");
 static long maxbufspace;
 SYSCTL_PROC(_vfs, OID_AUTO, maxbufspace,
     CTLTYPE_LONG|CTLFLAG_MPSAFE|CTLFLAG_RW, &maxbufspace,
     __offsetof(struct bufdomain, bd_maxbufspace), sysctl_bufdomain_long, "L",
     "Maximum allowed value of bufspace (including metadata)");
 static long bufmallocspace;
 SYSCTL_LONG(_vfs, OID_AUTO, bufmallocspace, CTLFLAG_RD, &bufmallocspace, 0,
     "Amount of malloced memory for buffers");
 static long maxbufmallocspace;
 SYSCTL_LONG(_vfs, OID_AUTO, maxmallocbufspace, CTLFLAG_RW, &maxbufmallocspace,
     0, "Maximum amount of malloced memory for buffers");
 static long lobufspace;
 SYSCTL_PROC(_vfs, OID_AUTO, lobufspace,
     CTLTYPE_LONG|CTLFLAG_MPSAFE|CTLFLAG_RW, &lobufspace,
     __offsetof(struct bufdomain, bd_lobufspace), sysctl_bufdomain_long, "L",
     "Minimum amount of buffers we want to have");
 long hibufspace;
 SYSCTL_PROC(_vfs, OID_AUTO, hibufspace,
     CTLTYPE_LONG|CTLFLAG_MPSAFE|CTLFLAG_RW, &hibufspace,
     __offsetof(struct bufdomain, bd_hibufspace), sysctl_bufdomain_long, "L",
     "Maximum allowed value of bufspace (excluding metadata)");
 long bufspacethresh;
 SYSCTL_PROC(_vfs, OID_AUTO, bufspacethresh,
     CTLTYPE_LONG|CTLFLAG_MPSAFE|CTLFLAG_RW, &bufspacethresh,
     __offsetof(struct bufdomain, bd_bufspacethresh), sysctl_bufdomain_long, "L",
     "Bufspace consumed before waking the daemon to free some");
 static counter_u64_t buffreekvacnt;
 SYSCTL_COUNTER_U64(_vfs, OID_AUTO, buffreekvacnt, CTLFLAG_RW, &buffreekvacnt,
     "Number of times we have freed the KVA space from some buffer");
 static counter_u64_t bufdefragcnt;
 SYSCTL_COUNTER_U64(_vfs, OID_AUTO, bufdefragcnt, CTLFLAG_RW, &bufdefragcnt,
     "Number of times we have had to repeat buffer allocation to defragment");
 static long lorunningspace;
 SYSCTL_PROC(_vfs, OID_AUTO, lorunningspace, CTLTYPE_LONG | CTLFLAG_MPSAFE |
     CTLFLAG_RW, &lorunningspace, 0, sysctl_runningspace, "L",
     "Minimum preferred space used for in-progress I/O");
 static long hirunningspace;
 SYSCTL_PROC(_vfs, OID_AUTO, hirunningspace, CTLTYPE_LONG | CTLFLAG_MPSAFE |
     CTLFLAG_RW, &hirunningspace, 0, sysctl_runningspace, "L",
     "Maximum amount of space to use for in-progress I/O");
 int dirtybufferflushes;
 SYSCTL_INT(_vfs, OID_AUTO, dirtybufferflushes, CTLFLAG_RW, &dirtybufferflushes,
     0, "Number of bdwrite to bawrite conversions to limit dirty buffers");
 int bdwriteskip;
 SYSCTL_INT(_vfs, OID_AUTO, bdwriteskip, CTLFLAG_RW, &bdwriteskip,
     0, "Number of buffers supplied to bdwrite with snapshot deadlock risk");
 int altbufferflushes;
 SYSCTL_INT(_vfs, OID_AUTO, altbufferflushes, CTLFLAG_RW, &altbufferflushes,
     0, "Number of fsync flushes to limit dirty buffers");
 static int recursiveflushes;
 SYSCTL_INT(_vfs, OID_AUTO, recursiveflushes, CTLFLAG_RW, &recursiveflushes,
     0, "Number of flushes skipped due to being recursive");
 static int sysctl_numdirtybuffers(SYSCTL_HANDLER_ARGS);
 SYSCTL_PROC(_vfs, OID_AUTO, numdirtybuffers,
     CTLTYPE_INT|CTLFLAG_MPSAFE|CTLFLAG_RD, NULL, 0, sysctl_numdirtybuffers, "I",
     "Number of buffers that are dirty (has unwritten changes) at the moment");
 static int lodirtybuffers;
 SYSCTL_PROC(_vfs, OID_AUTO, lodirtybuffers,
     CTLTYPE_INT|CTLFLAG_MPSAFE|CTLFLAG_RW, &lodirtybuffers,
     __offsetof(struct bufdomain, bd_lodirtybuffers), sysctl_bufdomain_int, "I",
     "How many buffers we want to have free before bufdaemon can sleep");
 static int hidirtybuffers;
 SYSCTL_PROC(_vfs, OID_AUTO, hidirtybuffers,
     CTLTYPE_INT|CTLFLAG_MPSAFE|CTLFLAG_RW, &hidirtybuffers,
     __offsetof(struct bufdomain, bd_hidirtybuffers), sysctl_bufdomain_int, "I",
     "When the number of dirty buffers is considered severe");
 int dirtybufthresh;
 SYSCTL_PROC(_vfs, OID_AUTO, dirtybufthresh,
     CTLTYPE_INT|CTLFLAG_MPSAFE|CTLFLAG_RW, &dirtybufthresh,
     __offsetof(struct bufdomain, bd_dirtybufthresh), sysctl_bufdomain_int, "I",
     "Number of bdwrite to bawrite conversions to clear dirty buffers");
 static int numfreebuffers;
 SYSCTL_INT(_vfs, OID_AUTO, numfreebuffers, CTLFLAG_RD, &numfreebuffers, 0,
     "Number of free buffers");
 static int lofreebuffers;
 SYSCTL_PROC(_vfs, OID_AUTO, lofreebuffers,
     CTLTYPE_INT|CTLFLAG_MPSAFE|CTLFLAG_RW, &lofreebuffers,
     __offsetof(struct bufdomain, bd_lofreebuffers), sysctl_bufdomain_int, "I",
    "Target number of free buffers");
 static int hifreebuffers;
 SYSCTL_PROC(_vfs, OID_AUTO, hifreebuffers,
     CTLTYPE_INT|CTLFLAG_MPSAFE|CTLFLAG_RW, &hifreebuffers,
     __offsetof(struct bufdomain, bd_hifreebuffers), sysctl_bufdomain_int, "I",
    "Threshold for clean buffer recycling");
 static counter_u64_t getnewbufcalls;
 SYSCTL_COUNTER_U64(_vfs, OID_AUTO, getnewbufcalls, CTLFLAG_RD,
    &getnewbufcalls, "Number of calls to getnewbuf");
 static counter_u64_t getnewbufrestarts;
 SYSCTL_COUNTER_U64(_vfs, OID_AUTO, getnewbufrestarts, CTLFLAG_RD,
     &getnewbufrestarts,
     "Number of times getnewbuf has had to restart a buffer acquisition");
 static counter_u64_t mappingrestarts;
 SYSCTL_COUNTER_U64(_vfs, OID_AUTO, mappingrestarts, CTLFLAG_RD,
     &mappingrestarts,
     "Number of times getblk has had to restart a buffer mapping for "
     "unmapped buffer");
 static counter_u64_t numbufallocfails;
 SYSCTL_COUNTER_U64(_vfs, OID_AUTO, numbufallocfails, CTLFLAG_RW,
     &numbufallocfails, "Number of times buffer allocations failed");
 static int flushbufqtarget = 100;
 SYSCTL_INT(_vfs, OID_AUTO, flushbufqtarget, CTLFLAG_RW, &flushbufqtarget, 0,
     "Amount of work to do in flushbufqueues when helping bufdaemon");
 static counter_u64_t notbufdflushes;
 SYSCTL_COUNTER_U64(_vfs, OID_AUTO, notbufdflushes, CTLFLAG_RD, &notbufdflushes,
     "Number of dirty buffer flushes done by the bufdaemon helpers");
 static long barrierwrites;
 SYSCTL_LONG(_vfs, OID_AUTO, barrierwrites, CTLFLAG_RW, &barrierwrites, 0,
     "Number of barrier writes");
 SYSCTL_INT(_vfs, OID_AUTO, unmapped_buf_allowed, CTLFLAG_RD,
     &unmapped_buf_allowed, 0,
     "Permit the use of the unmapped i/o");
 int maxbcachebuf = MAXBCACHEBUF;
 SYSCTL_INT(_vfs, OID_AUTO, maxbcachebuf, CTLFLAG_RDTUN, &maxbcachebuf, 0,
     "Maximum size of a buffer cache block");
 
 /*
  * This lock synchronizes access to bd_request.
  */
 static struct mtx_padalign __exclusive_cache_line bdlock;
 
 /*
  * This lock protects the runningbufreq and synchronizes runningbufwakeup and
  * waitrunningbufspace().
  */
 static struct mtx_padalign __exclusive_cache_line rbreqlock;
 
 /*
  * Lock that protects bdirtywait.
  */
 static struct mtx_padalign __exclusive_cache_line bdirtylock;
 
 /*
  * Wakeup point for bufdaemon, as well as indicator of whether it is already
  * active.  Set to 1 when the bufdaemon is already "on" the queue, 0 when it
  * is idling.
  */
 static int bd_request;
 
 /*
  * Request for the buf daemon to write more buffers than is indicated by
  * lodirtybuf.  This may be necessary to push out excess dependencies or
  * defragment the address space where a simple count of the number of dirty
  * buffers is insufficient to characterize the demand for flushing them.
  */
 static int bd_speedupreq;
 
 /*
  * Synchronization (sleep/wakeup) variable for active buffer space requests.
  * Set when wait starts, cleared prior to wakeup().
  * Used in runningbufwakeup() and waitrunningbufspace().
  */
 static int runningbufreq;
 
 /*
  * Synchronization for bwillwrite() waiters.
  */
 static int bdirtywait;
 
 /*
  * Definitions for the buffer free lists.
  */
 #define QUEUE_NONE	0	/* on no queue */
 #define QUEUE_EMPTY	1	/* empty buffer headers */
 #define QUEUE_DIRTY	2	/* B_DELWRI buffers */
 #define QUEUE_CLEAN	3	/* non-B_DELWRI buffers */
 #define QUEUE_SENTINEL	4	/* not an queue index, but mark for sentinel */
 
 /* Maximum number of buffer domains. */
 #define	BUF_DOMAINS	8
 
 struct bufdomainset bdlodirty;		/* Domains > lodirty */
 struct bufdomainset bdhidirty;		/* Domains > hidirty */
 
 /* Configured number of clean queues. */
 static int __read_mostly buf_domains;
 
 BITSET_DEFINE(bufdomainset, BUF_DOMAINS);
 struct bufdomain __exclusive_cache_line bdomain[BUF_DOMAINS];
 struct bufqueue __exclusive_cache_line bqempty;
 
 /*
  * per-cpu empty buffer cache.
  */
 uma_zone_t buf_zone;
 
 /*
  * Single global constant for BUF_WMESG, to avoid getting multiple references.
  * buf_wmesg is referred from macros.
  */
 const char *buf_wmesg = BUF_WMESG;
 
 static int
 sysctl_runningspace(SYSCTL_HANDLER_ARGS)
 {
 	long value;
 	int error;
 
 	value = *(long *)arg1;
 	error = sysctl_handle_long(oidp, &value, 0, req);
 	if (error != 0 || req->newptr == NULL)
 		return (error);
 	mtx_lock(&rbreqlock);
 	if (arg1 == &hirunningspace) {
 		if (value < lorunningspace)
 			error = EINVAL;
 		else
 			hirunningspace = value;
 	} else {
 		KASSERT(arg1 == &lorunningspace,
 		    ("%s: unknown arg1", __func__));
 		if (value > hirunningspace)
 			error = EINVAL;
 		else
 			lorunningspace = value;
 	}
 	mtx_unlock(&rbreqlock);
 	return (error);
 }
 
 static int
 sysctl_bufdomain_int(SYSCTL_HANDLER_ARGS)
 {
 	int error;
 	int value;
 	int i;
 
 	value = *(int *)arg1;
 	error = sysctl_handle_int(oidp, &value, 0, req);
 	if (error != 0 || req->newptr == NULL)
 		return (error);
 	*(int *)arg1 = value;
 	for (i = 0; i < buf_domains; i++)
 		*(int *)(uintptr_t)(((uintptr_t)&bdomain[i]) + arg2) =
 		    value / buf_domains;
 
 	return (error);
 }
 
 static int
 sysctl_bufdomain_long(SYSCTL_HANDLER_ARGS)
 {
 	long value;
 	int error;
 	int i;
 
 	value = *(long *)arg1;
 	error = sysctl_handle_long(oidp, &value, 0, req);
 	if (error != 0 || req->newptr == NULL)
 		return (error);
 	*(long *)arg1 = value;
 	for (i = 0; i < buf_domains; i++)
 		*(long *)(uintptr_t)(((uintptr_t)&bdomain[i]) + arg2) =
 		    value / buf_domains;
 
 	return (error);
 }
 
 #if defined(COMPAT_FREEBSD4) || defined(COMPAT_FREEBSD5) || \
     defined(COMPAT_FREEBSD6) || defined(COMPAT_FREEBSD7)
 static int
 sysctl_bufspace(SYSCTL_HANDLER_ARGS)
 {
 	long lvalue;
 	int ivalue;
 	int i;
 
 	lvalue = 0;
 	for (i = 0; i < buf_domains; i++)
 		lvalue += bdomain[i].bd_bufspace;
 	if (sizeof(int) == sizeof(long) || req->oldlen >= sizeof(long))
 		return (sysctl_handle_long(oidp, &lvalue, 0, req));
 	if (lvalue > INT_MAX)
 		/* On overflow, still write out a long to trigger ENOMEM. */
 		return (sysctl_handle_long(oidp, &lvalue, 0, req));
 	ivalue = lvalue;
 	return (sysctl_handle_int(oidp, &ivalue, 0, req));
 }
 #else
 static int
 sysctl_bufspace(SYSCTL_HANDLER_ARGS)
 {
 	long lvalue;
 	int i;
 
 	lvalue = 0;
 	for (i = 0; i < buf_domains; i++)
 		lvalue += bdomain[i].bd_bufspace;
 	return (sysctl_handle_long(oidp, &lvalue, 0, req));
 }
 #endif
 
 static int
 sysctl_numdirtybuffers(SYSCTL_HANDLER_ARGS)
 {
 	int value;
 	int i;
 
 	value = 0;
 	for (i = 0; i < buf_domains; i++)
 		value += bdomain[i].bd_numdirtybuffers;
 	return (sysctl_handle_int(oidp, &value, 0, req));
 }
 
 /*
  *	bdirtywakeup:
  *
  *	Wakeup any bwillwrite() waiters.
  */
 static void
 bdirtywakeup(void)
 {
 	mtx_lock(&bdirtylock);
 	if (bdirtywait) {
 		bdirtywait = 0;
 		wakeup(&bdirtywait);
 	}
 	mtx_unlock(&bdirtylock);
 }
 
 /*
  *	bd_clear:
  *
  *	Clear a domain from the appropriate bitsets when dirtybuffers
  *	is decremented.
  */
 static void
 bd_clear(struct bufdomain *bd)
 {
 
 	mtx_lock(&bdirtylock);
 	if (bd->bd_numdirtybuffers <= bd->bd_lodirtybuffers)
 		BIT_CLR(BUF_DOMAINS, BD_DOMAIN(bd), &bdlodirty);
 	if (bd->bd_numdirtybuffers <= bd->bd_hidirtybuffers)
 		BIT_CLR(BUF_DOMAINS, BD_DOMAIN(bd), &bdhidirty);
 	mtx_unlock(&bdirtylock);
 }
 
 /*
  *	bd_set:
  *
  *	Set a domain in the appropriate bitsets when dirtybuffers
  *	is incremented.
  */
 static void
 bd_set(struct bufdomain *bd)
 {
 
 	mtx_lock(&bdirtylock);
 	if (bd->bd_numdirtybuffers > bd->bd_lodirtybuffers)
 		BIT_SET(BUF_DOMAINS, BD_DOMAIN(bd), &bdlodirty);
 	if (bd->bd_numdirtybuffers > bd->bd_hidirtybuffers)
 		BIT_SET(BUF_DOMAINS, BD_DOMAIN(bd), &bdhidirty);
 	mtx_unlock(&bdirtylock);
 }
 
 /*
  *	bdirtysub:
  *
  *	Decrement the numdirtybuffers count by one and wakeup any
  *	threads blocked in bwillwrite().
  */
 static void
 bdirtysub(struct buf *bp)
 {
 	struct bufdomain *bd;
 	int num;
 
 	bd = bufdomain(bp);
 	num = atomic_fetchadd_int(&bd->bd_numdirtybuffers, -1);
 	if (num == (bd->bd_lodirtybuffers + bd->bd_hidirtybuffers) / 2)
 		bdirtywakeup();
 	if (num == bd->bd_lodirtybuffers || num == bd->bd_hidirtybuffers)
 		bd_clear(bd);
 }
 
 /*
  *	bdirtyadd:
  *
  *	Increment the numdirtybuffers count by one and wakeup the buf 
  *	daemon if needed.
  */
 static void
 bdirtyadd(struct buf *bp)
 {
 	struct bufdomain *bd;
 	int num;
 
 	/*
 	 * Only do the wakeup once as we cross the boundary.  The
 	 * buf daemon will keep running until the condition clears.
 	 */
 	bd = bufdomain(bp);
 	num = atomic_fetchadd_int(&bd->bd_numdirtybuffers, 1);
 	if (num == (bd->bd_lodirtybuffers + bd->bd_hidirtybuffers) / 2)
 		bd_wakeup();
 	if (num == bd->bd_lodirtybuffers || num == bd->bd_hidirtybuffers)
 		bd_set(bd);
 }
 
 /*
  *	bufspace_daemon_wakeup:
  *
  *	Wakeup the daemons responsible for freeing clean bufs.
  */
 static void
 bufspace_daemon_wakeup(struct bufdomain *bd)
 {
 
 	/*
 	 * avoid the lock if the daemon is running.
 	 */
 	if (atomic_fetchadd_int(&bd->bd_running, 1) == 0) {
 		BD_RUN_LOCK(bd);
 		atomic_store_int(&bd->bd_running, 1);
 		wakeup(&bd->bd_running);
 		BD_RUN_UNLOCK(bd);
 	}
 }
 
 /*
  *	bufspace_daemon_wait:
  *
  *	Sleep until the domain falls below a limit or one second passes.
  */
 static void
 bufspace_daemon_wait(struct bufdomain *bd)
 {
 	/*
 	 * Re-check our limits and sleep.  bd_running must be
 	 * cleared prior to checking the limits to avoid missed
 	 * wakeups.  The waker will adjust one of bufspace or
 	 * freebuffers prior to checking bd_running.
 	 */
 	BD_RUN_LOCK(bd);
 	atomic_store_int(&bd->bd_running, 0);
 	if (bd->bd_bufspace < bd->bd_bufspacethresh &&
 	    bd->bd_freebuffers > bd->bd_lofreebuffers) {
 		msleep(&bd->bd_running, BD_RUN_LOCKPTR(bd), PRIBIO|PDROP,
 		    "-", hz);
 	} else {
 		/* Avoid spurious wakeups while running. */
 		atomic_store_int(&bd->bd_running, 1);
 		BD_RUN_UNLOCK(bd);
 	}
 }
 
 /*
  *	bufspace_adjust:
  *
  *	Adjust the reported bufspace for a KVA managed buffer, possibly
  * 	waking any waiters.
  */
 static void
 bufspace_adjust(struct buf *bp, int bufsize)
 {
 	struct bufdomain *bd;
 	long space;
 	int diff;
 
 	KASSERT((bp->b_flags & B_MALLOC) == 0,
 	    ("bufspace_adjust: malloc buf %p", bp));
 	bd = bufdomain(bp);
 	diff = bufsize - bp->b_bufsize;
 	if (diff < 0) {
 		atomic_subtract_long(&bd->bd_bufspace, -diff);
 	} else if (diff > 0) {
 		space = atomic_fetchadd_long(&bd->bd_bufspace, diff);
 		/* Wake up the daemon on the transition. */
 		if (space < bd->bd_bufspacethresh &&
 		    space + diff >= bd->bd_bufspacethresh)
 			bufspace_daemon_wakeup(bd);
 	}
 	bp->b_bufsize = bufsize;
 }
 
 /*
  *	bufspace_reserve:
  *
  *	Reserve bufspace before calling allocbuf().  metadata has a
  *	different space limit than data.
  */
 static int
 bufspace_reserve(struct bufdomain *bd, int size, bool metadata)
 {
 	long limit, new;
 	long space;
 
 	if (metadata)
 		limit = bd->bd_maxbufspace;
 	else
 		limit = bd->bd_hibufspace;
 	space = atomic_fetchadd_long(&bd->bd_bufspace, size);
 	new = space + size;
 	if (new > limit) {
 		atomic_subtract_long(&bd->bd_bufspace, size);
 		return (ENOSPC);
 	}
 
 	/* Wake up the daemon on the transition. */
 	if (space < bd->bd_bufspacethresh && new >= bd->bd_bufspacethresh)
 		bufspace_daemon_wakeup(bd);
 
 	return (0);
 }
 
 /*
  *	bufspace_release:
  *
  *	Release reserved bufspace after bufspace_adjust() has consumed it.
  */
 static void
 bufspace_release(struct bufdomain *bd, int size)
 {
 
 	atomic_subtract_long(&bd->bd_bufspace, size);
 }
 
 /*
  *	bufspace_wait:
  *
  *	Wait for bufspace, acting as the buf daemon if a locked vnode is
  *	supplied.  bd_wanted must be set prior to polling for space.  The
  *	operation must be re-tried on return.
  */
 static void
 bufspace_wait(struct bufdomain *bd, struct vnode *vp, int gbflags,
     int slpflag, int slptimeo)
 {
 	struct thread *td;
 	int error, fl, norunbuf;
 
 	if ((gbflags & GB_NOWAIT_BD) != 0)
 		return;
 
 	td = curthread;
 	BD_LOCK(bd);
 	while (bd->bd_wanted) {
 		if (vp != NULL && vp->v_type != VCHR &&
 		    (td->td_pflags & TDP_BUFNEED) == 0) {
 			BD_UNLOCK(bd);
 			/*
 			 * getblk() is called with a vnode locked, and
 			 * some majority of the dirty buffers may as
 			 * well belong to the vnode.  Flushing the
 			 * buffers there would make a progress that
 			 * cannot be achieved by the buf_daemon, that
 			 * cannot lock the vnode.
 			 */
 			norunbuf = ~(TDP_BUFNEED | TDP_NORUNNINGBUF) |
 			    (td->td_pflags & TDP_NORUNNINGBUF);
 
 			/*
 			 * Play bufdaemon.  The getnewbuf() function
 			 * may be called while the thread owns lock
 			 * for another dirty buffer for the same
 			 * vnode, which makes it impossible to use
 			 * VOP_FSYNC() there, due to the buffer lock
 			 * recursion.
 			 */
 			td->td_pflags |= TDP_BUFNEED | TDP_NORUNNINGBUF;
 			fl = buf_flush(vp, bd, flushbufqtarget);
 			td->td_pflags &= norunbuf;
 			BD_LOCK(bd);
 			if (fl != 0)
 				continue;
 			if (bd->bd_wanted == 0)
 				break;
 		}
 		error = msleep(&bd->bd_wanted, BD_LOCKPTR(bd),
 		    (PRIBIO + 4) | slpflag, "newbuf", slptimeo);
 		if (error != 0)
 			break;
 	}
 	BD_UNLOCK(bd);
 }
 
 
 /*
  *	bufspace_daemon:
  *
  *	buffer space management daemon.  Tries to maintain some marginal
  *	amount of free buffer space so that requesting processes neither
  *	block nor work to reclaim buffers.
  */
 static void
 bufspace_daemon(void *arg)
 {
 	struct bufdomain *bd;
 
 	EVENTHANDLER_REGISTER(shutdown_pre_sync, kthread_shutdown, curthread,
 	    SHUTDOWN_PRI_LAST + 100);
 
 	bd = arg;
 	for (;;) {
 		kthread_suspend_check();
 
 		/*
 		 * Free buffers from the clean queue until we meet our
 		 * targets.
 		 *
 		 * Theory of operation:  The buffer cache is most efficient
 		 * when some free buffer headers and space are always
 		 * available to getnewbuf().  This daemon attempts to prevent
 		 * the excessive blocking and synchronization associated
 		 * with shortfall.  It goes through three phases according
 		 * demand:
 		 *
 		 * 1)	The daemon wakes up voluntarily once per-second
 		 *	during idle periods when the counters are below
 		 *	the wakeup thresholds (bufspacethresh, lofreebuffers).
 		 *
 		 * 2)	The daemon wakes up as we cross the thresholds
 		 *	ahead of any potential blocking.  This may bounce
 		 *	slightly according to the rate of consumption and
 		 *	release.
 		 *
 		 * 3)	The daemon and consumers are starved for working
 		 *	clean buffers.  This is the 'bufspace' sleep below
 		 *	which will inefficiently trade bufs with bqrelse
 		 *	until we return to condition 2.
 		 */
 		while (bd->bd_bufspace > bd->bd_lobufspace ||
 		    bd->bd_freebuffers < bd->bd_hifreebuffers) {
 			if (buf_recycle(bd, false) != 0) {
 				if (bd_flushall(bd))
 					continue;
 				/*
 				 * Speedup dirty if we've run out of clean
 				 * buffers.  This is possible in particular
 				 * because softdep may held many bufs locked
 				 * pending writes to other bufs which are
 				 * marked for delayed write, exhausting
 				 * clean space until they are written.
 				 */
 				bd_speedup();
 				BD_LOCK(bd);
 				if (bd->bd_wanted) {
 					msleep(&bd->bd_wanted, BD_LOCKPTR(bd),
 					    PRIBIO|PDROP, "bufspace", hz/10);
 				} else
 					BD_UNLOCK(bd);
 			}
 			maybe_yield();
 		}
 		bufspace_daemon_wait(bd);
 	}
 }
 
 /*
  *	bufmallocadjust:
  *
  *	Adjust the reported bufspace for a malloc managed buffer, possibly
  *	waking any waiters.
  */
 static void
 bufmallocadjust(struct buf *bp, int bufsize)
 {
 	int diff;
 
 	KASSERT((bp->b_flags & B_MALLOC) != 0,
 	    ("bufmallocadjust: non-malloc buf %p", bp));
 	diff = bufsize - bp->b_bufsize;
 	if (diff < 0)
 		atomic_subtract_long(&bufmallocspace, -diff);
 	else
 		atomic_add_long(&bufmallocspace, diff);
 	bp->b_bufsize = bufsize;
 }
 
 /*
  *	runningwakeup:
  *
  *	Wake up processes that are waiting on asynchronous writes to fall
  *	below lorunningspace.
  */
 static void
 runningwakeup(void)
 {
 
 	mtx_lock(&rbreqlock);
 	if (runningbufreq) {
 		runningbufreq = 0;
 		wakeup(&runningbufreq);
 	}
 	mtx_unlock(&rbreqlock);
 }
 
 /*
  *	runningbufwakeup:
  *
  *	Decrement the outstanding write count according.
  */
 void
 runningbufwakeup(struct buf *bp)
 {
 	long space, bspace;
 
 	bspace = bp->b_runningbufspace;
 	if (bspace == 0)
 		return;
 	space = atomic_fetchadd_long(&runningbufspace, -bspace);
 	KASSERT(space >= bspace, ("runningbufspace underflow %ld %ld",
 	    space, bspace));
 	bp->b_runningbufspace = 0;
 	/*
 	 * Only acquire the lock and wakeup on the transition from exceeding
 	 * the threshold to falling below it.
 	 */
 	if (space < lorunningspace)
 		return;
 	if (space - bspace > lorunningspace)
 		return;
 	runningwakeup();
 }
 
 /*
  *	waitrunningbufspace()
  *
  *	runningbufspace is a measure of the amount of I/O currently
  *	running.  This routine is used in async-write situations to
  *	prevent creating huge backups of pending writes to a device.
  *	Only asynchronous writes are governed by this function.
  *
  *	This does NOT turn an async write into a sync write.  It waits  
  *	for earlier writes to complete and generally returns before the
  *	caller's write has reached the device.
  */
 void
 waitrunningbufspace(void)
 {
 
 	mtx_lock(&rbreqlock);
 	while (runningbufspace > hirunningspace) {
 		runningbufreq = 1;
 		msleep(&runningbufreq, &rbreqlock, PVM, "wdrain", 0);
 	}
 	mtx_unlock(&rbreqlock);
 }
 
 
 /*
  *	vfs_buf_test_cache:
  *
  *	Called when a buffer is extended.  This function clears the B_CACHE
  *	bit if the newly extended portion of the buffer does not contain
  *	valid data.
  */
 static __inline void
 vfs_buf_test_cache(struct buf *bp, vm_ooffset_t foff, vm_offset_t off,
     vm_offset_t size, vm_page_t m)
 {
 
 	VM_OBJECT_ASSERT_LOCKED(m->object);
 	if (bp->b_flags & B_CACHE) {
 		int base = (foff + off) & PAGE_MASK;
 		if (vm_page_is_valid(m, base, size) == 0)
 			bp->b_flags &= ~B_CACHE;
 	}
 }
 
 /* Wake up the buffer daemon if necessary */
 static void
 bd_wakeup(void)
 {
 
 	mtx_lock(&bdlock);
 	if (bd_request == 0) {
 		bd_request = 1;
 		wakeup(&bd_request);
 	}
 	mtx_unlock(&bdlock);
 }
 
 /*
  * Adjust the maxbcachbuf tunable.
  */
 static void
 maxbcachebuf_adjust(void)
 {
 	int i;
 
 	/*
 	 * maxbcachebuf must be a power of 2 >= MAXBSIZE.
 	 */
 	i = 2;
 	while (i * 2 <= maxbcachebuf)
 		i *= 2;
 	maxbcachebuf = i;
 	if (maxbcachebuf < MAXBSIZE)
 		maxbcachebuf = MAXBSIZE;
 	if (maxbcachebuf > MAXPHYS)
 		maxbcachebuf = MAXPHYS;
 	if (bootverbose != 0 && maxbcachebuf != MAXBCACHEBUF)
 		printf("maxbcachebuf=%d\n", maxbcachebuf);
 }
 
 /*
  * bd_speedup - speedup the buffer cache flushing code
  */
 void
 bd_speedup(void)
 {
 	int needwake;
 
 	mtx_lock(&bdlock);
 	needwake = 0;
 	if (bd_speedupreq == 0 || bd_request == 0)
 		needwake = 1;
 	bd_speedupreq = 1;
 	bd_request = 1;
 	if (needwake)
 		wakeup(&bd_request);
 	mtx_unlock(&bdlock);
 }
 
 #ifdef __i386__
 #define	TRANSIENT_DENOM	5
 #else
 #define	TRANSIENT_DENOM 10
 #endif
 
 /*
  * Calculating buffer cache scaling values and reserve space for buffer
  * headers.  This is called during low level kernel initialization and
  * may be called more then once.  We CANNOT write to the memory area
  * being reserved at this time.
  */
 caddr_t
 kern_vfs_bio_buffer_alloc(caddr_t v, long physmem_est)
 {
 	int tuned_nbuf;
 	long maxbuf, maxbuf_sz, buf_sz,	biotmap_sz;
 
 	/*
 	 * physmem_est is in pages.  Convert it to kilobytes (assumes
 	 * PAGE_SIZE is >= 1K)
 	 */
 	physmem_est = physmem_est * (PAGE_SIZE / 1024);
 
 	maxbcachebuf_adjust();
 	/*
 	 * The nominal buffer size (and minimum KVA allocation) is BKVASIZE.
 	 * For the first 64MB of ram nominally allocate sufficient buffers to
 	 * cover 1/4 of our ram.  Beyond the first 64MB allocate additional
 	 * buffers to cover 1/10 of our ram over 64MB.  When auto-sizing
 	 * the buffer cache we limit the eventual kva reservation to
 	 * maxbcache bytes.
 	 *
 	 * factor represents the 1/4 x ram conversion.
 	 */
 	if (nbuf == 0) {
 		int factor = 4 * BKVASIZE / 1024;
 
 		nbuf = 50;
 		if (physmem_est > 4096)
 			nbuf += min((physmem_est - 4096) / factor,
 			    65536 / factor);
 		if (physmem_est > 65536)
 			nbuf += min((physmem_est - 65536) * 2 / (factor * 5),
 			    32 * 1024 * 1024 / (factor * 5));
 
 		if (maxbcache && nbuf > maxbcache / BKVASIZE)
 			nbuf = maxbcache / BKVASIZE;
 		tuned_nbuf = 1;
 	} else
 		tuned_nbuf = 0;
 
 	/* XXX Avoid unsigned long overflows later on with maxbufspace. */
 	maxbuf = (LONG_MAX / 3) / BKVASIZE;
 	if (nbuf > maxbuf) {
 		if (!tuned_nbuf)
 			printf("Warning: nbufs lowered from %d to %ld\n", nbuf,
 			    maxbuf);
 		nbuf = maxbuf;
 	}
 
 	/*
 	 * Ideal allocation size for the transient bio submap is 10%
 	 * of the maximal space buffer map.  This roughly corresponds
 	 * to the amount of the buffer mapped for typical UFS load.
 	 *
 	 * Clip the buffer map to reserve space for the transient
 	 * BIOs, if its extent is bigger than 90% (80% on i386) of the
 	 * maximum buffer map extent on the platform.
 	 *
 	 * The fall-back to the maxbuf in case of maxbcache unset,
 	 * allows to not trim the buffer KVA for the architectures
 	 * with ample KVA space.
 	 */
 	if (bio_transient_maxcnt == 0 && unmapped_buf_allowed) {
 		maxbuf_sz = maxbcache != 0 ? maxbcache : maxbuf * BKVASIZE;
 		buf_sz = (long)nbuf * BKVASIZE;
 		if (buf_sz < maxbuf_sz / TRANSIENT_DENOM *
 		    (TRANSIENT_DENOM - 1)) {
 			/*
 			 * There is more KVA than memory.  Do not
 			 * adjust buffer map size, and assign the rest
 			 * of maxbuf to transient map.
 			 */
 			biotmap_sz = maxbuf_sz - buf_sz;
 		} else {
 			/*
 			 * Buffer map spans all KVA we could afford on
 			 * this platform.  Give 10% (20% on i386) of
 			 * the buffer map to the transient bio map.
 			 */
 			biotmap_sz = buf_sz / TRANSIENT_DENOM;
 			buf_sz -= biotmap_sz;
 		}
 		if (biotmap_sz / INT_MAX > MAXPHYS)
 			bio_transient_maxcnt = INT_MAX;
 		else
 			bio_transient_maxcnt = biotmap_sz / MAXPHYS;
 		/*
 		 * Artificially limit to 1024 simultaneous in-flight I/Os
 		 * using the transient mapping.
 		 */
 		if (bio_transient_maxcnt > 1024)
 			bio_transient_maxcnt = 1024;
 		if (tuned_nbuf)
 			nbuf = buf_sz / BKVASIZE;
 	}
 
 	if (nswbuf == 0) {
 		nswbuf = min(nbuf / 4, 256);
 		if (nswbuf < NSWBUF_MIN)
 			nswbuf = NSWBUF_MIN;
 	}
 
 	/*
 	 * Reserve space for the buffer cache buffers
 	 */
 	buf = (void *)v;
 	v = (caddr_t)(buf + nbuf);
 
 	return(v);
 }
 
 /* Initialize the buffer subsystem.  Called before use of any buffers. */
 void
 bufinit(void)
 {
 	struct buf *bp;
 	int i;
 
 	KASSERT(maxbcachebuf >= MAXBSIZE,
 	    ("maxbcachebuf (%d) must be >= MAXBSIZE (%d)\n", maxbcachebuf,
 	    MAXBSIZE));
 	bq_init(&bqempty, QUEUE_EMPTY, -1, "bufq empty lock");
 	mtx_init(&rbreqlock, "runningbufspace lock", NULL, MTX_DEF);
 	mtx_init(&bdlock, "buffer daemon lock", NULL, MTX_DEF);
 	mtx_init(&bdirtylock, "dirty buf lock", NULL, MTX_DEF);
 
 	unmapped_buf = (caddr_t)kva_alloc(MAXPHYS);
 
 	/* finally, initialize each buffer header and stick on empty q */
 	for (i = 0; i < nbuf; i++) {
 		bp = &buf[i];
 		bzero(bp, sizeof *bp);
 		bp->b_flags = B_INVAL;
 		bp->b_rcred = NOCRED;
 		bp->b_wcred = NOCRED;
 		bp->b_qindex = QUEUE_NONE;
 		bp->b_domain = -1;
 		bp->b_subqueue = mp_maxid + 1;
 		bp->b_xflags = 0;
 		bp->b_data = bp->b_kvabase = unmapped_buf;
 		LIST_INIT(&bp->b_dep);
 		BUF_LOCKINIT(bp);
 		bq_insert(&bqempty, bp, false);
 	}
 
 	/*
 	 * maxbufspace is the absolute maximum amount of buffer space we are 
 	 * allowed to reserve in KVM and in real terms.  The absolute maximum
 	 * is nominally used by metadata.  hibufspace is the nominal maximum
 	 * used by most other requests.  The differential is required to 
 	 * ensure that metadata deadlocks don't occur.
 	 *
 	 * maxbufspace is based on BKVASIZE.  Allocating buffers larger then
 	 * this may result in KVM fragmentation which is not handled optimally
 	 * by the system. XXX This is less true with vmem.  We could use
 	 * PAGE_SIZE.
 	 */
 	maxbufspace = (long)nbuf * BKVASIZE;
 	hibufspace = lmax(3 * maxbufspace / 4, maxbufspace - maxbcachebuf * 10);
 	lobufspace = (hibufspace / 20) * 19; /* 95% */
 	bufspacethresh = lobufspace + (hibufspace - lobufspace) / 2;
 
 	/*
 	 * Note: The 16 MiB upper limit for hirunningspace was chosen
 	 * arbitrarily and may need further tuning. It corresponds to
 	 * 128 outstanding write IO requests (if IO size is 128 KiB),
 	 * which fits with many RAID controllers' tagged queuing limits.
 	 * The lower 1 MiB limit is the historical upper limit for
 	 * hirunningspace.
 	 */
 	hirunningspace = lmax(lmin(roundup(hibufspace / 64, maxbcachebuf),
 	    16 * 1024 * 1024), 1024 * 1024);
 	lorunningspace = roundup((hirunningspace * 2) / 3, maxbcachebuf);
 
 	/*
 	 * Limit the amount of malloc memory since it is wired permanently into
 	 * the kernel space.  Even though this is accounted for in the buffer
 	 * allocation, we don't want the malloced region to grow uncontrolled.
 	 * The malloc scheme improves memory utilization significantly on
 	 * average (small) directories.
 	 */
 	maxbufmallocspace = hibufspace / 20;
 
 	/*
 	 * Reduce the chance of a deadlock occurring by limiting the number
 	 * of delayed-write dirty buffers we allow to stack up.
 	 */
 	hidirtybuffers = nbuf / 4 + 20;
 	dirtybufthresh = hidirtybuffers * 9 / 10;
 	/*
 	 * To support extreme low-memory systems, make sure hidirtybuffers
 	 * cannot eat up all available buffer space.  This occurs when our
 	 * minimum cannot be met.  We try to size hidirtybuffers to 3/4 our
 	 * buffer space assuming BKVASIZE'd buffers.
 	 */
 	while ((long)hidirtybuffers * BKVASIZE > 3 * hibufspace / 4) {
 		hidirtybuffers >>= 1;
 	}
 	lodirtybuffers = hidirtybuffers / 2;
 
 	/*
 	 * lofreebuffers should be sufficient to avoid stalling waiting on
 	 * buf headers under heavy utilization.  The bufs in per-cpu caches
 	 * are counted as free but will be unavailable to threads executing
 	 * on other cpus.
 	 *
 	 * hifreebuffers is the free target for the bufspace daemon.  This
 	 * should be set appropriately to limit work per-iteration.
 	 */
 	lofreebuffers = MIN((nbuf / 25) + (20 * mp_ncpus), 128 * mp_ncpus);
 	hifreebuffers = (3 * lofreebuffers) / 2;
 	numfreebuffers = nbuf;
 
 	/* Setup the kva and free list allocators. */
 	vmem_set_reclaim(buffer_arena, bufkva_reclaim);
 	buf_zone = uma_zcache_create("buf free cache", sizeof(struct buf),
 	    NULL, NULL, NULL, NULL, buf_import, buf_release, NULL, 0);
 
 	/*
 	 * Size the clean queue according to the amount of buffer space.
 	 * One queue per-256mb up to the max.  More queues gives better
 	 * concurrency but less accurate LRU.
 	 */
 	buf_domains = MIN(howmany(maxbufspace, 256*1024*1024), BUF_DOMAINS);
 	for (i = 0 ; i < buf_domains; i++) {
 		struct bufdomain *bd;
 
 		bd = &bdomain[i];
 		bd_init(bd);
 		bd->bd_freebuffers = nbuf / buf_domains;
 		bd->bd_hifreebuffers = hifreebuffers / buf_domains;
 		bd->bd_lofreebuffers = lofreebuffers / buf_domains;
 		bd->bd_bufspace = 0;
 		bd->bd_maxbufspace = maxbufspace / buf_domains;
 		bd->bd_hibufspace = hibufspace / buf_domains;
 		bd->bd_lobufspace = lobufspace / buf_domains;
 		bd->bd_bufspacethresh = bufspacethresh / buf_domains;
 		bd->bd_numdirtybuffers = 0;
 		bd->bd_hidirtybuffers = hidirtybuffers / buf_domains;
 		bd->bd_lodirtybuffers = lodirtybuffers / buf_domains;
 		bd->bd_dirtybufthresh = dirtybufthresh / buf_domains;
 		/* Don't allow more than 2% of bufs in the per-cpu caches. */
 		bd->bd_lim = nbuf / buf_domains / 50 / mp_ncpus;
 	}
 	getnewbufcalls = counter_u64_alloc(M_WAITOK);
 	getnewbufrestarts = counter_u64_alloc(M_WAITOK);
 	mappingrestarts = counter_u64_alloc(M_WAITOK);
 	numbufallocfails = counter_u64_alloc(M_WAITOK);
 	notbufdflushes = counter_u64_alloc(M_WAITOK);
 	buffreekvacnt = counter_u64_alloc(M_WAITOK);
 	bufdefragcnt = counter_u64_alloc(M_WAITOK);
 	bufkvaspace = counter_u64_alloc(M_WAITOK);
 }
 
 #ifdef INVARIANTS
 static inline void
 vfs_buf_check_mapped(struct buf *bp)
 {
 
 	KASSERT(bp->b_kvabase != unmapped_buf,
 	    ("mapped buf: b_kvabase was not updated %p", bp));
 	KASSERT(bp->b_data != unmapped_buf,
 	    ("mapped buf: b_data was not updated %p", bp));
 	KASSERT(bp->b_data < unmapped_buf || bp->b_data >= unmapped_buf +
 	    MAXPHYS, ("b_data + b_offset unmapped %p", bp));
 }
 
 static inline void
 vfs_buf_check_unmapped(struct buf *bp)
 {
 
 	KASSERT(bp->b_data == unmapped_buf,
 	    ("unmapped buf: corrupted b_data %p", bp));
 }
 
 #define	BUF_CHECK_MAPPED(bp) vfs_buf_check_mapped(bp)
 #define	BUF_CHECK_UNMAPPED(bp) vfs_buf_check_unmapped(bp)
 #else
 #define	BUF_CHECK_MAPPED(bp) do {} while (0)
 #define	BUF_CHECK_UNMAPPED(bp) do {} while (0)
 #endif
 
 static int
 isbufbusy(struct buf *bp)
 {
 	if (((bp->b_flags & B_INVAL) == 0 && BUF_ISLOCKED(bp)) ||
 	    ((bp->b_flags & (B_DELWRI | B_INVAL)) == B_DELWRI))
 		return (1);
 	return (0);
 }
 
 /*
  * Shutdown the system cleanly to prepare for reboot, halt, or power off.
  */
 void
 bufshutdown(int show_busybufs)
 {
 	static int first_buf_printf = 1;
 	struct buf *bp;
 	int iter, nbusy, pbusy;
 #ifndef PREEMPTION
 	int subiter;
 #endif
 
 	/* 
 	 * Sync filesystems for shutdown
 	 */
 	wdog_kern_pat(WD_LASTVAL);
 	sys_sync(curthread, NULL);
 
 	/*
 	 * With soft updates, some buffers that are
 	 * written will be remarked as dirty until other
 	 * buffers are written.
 	 */
 	for (iter = pbusy = 0; iter < 20; iter++) {
 		nbusy = 0;
 		for (bp = &buf[nbuf]; --bp >= buf; )
 			if (isbufbusy(bp))
 				nbusy++;
 		if (nbusy == 0) {
 			if (first_buf_printf)
 				printf("All buffers synced.");
 			break;
 		}
 		if (first_buf_printf) {
 			printf("Syncing disks, buffers remaining... ");
 			first_buf_printf = 0;
 		}
 		printf("%d ", nbusy);
 		if (nbusy < pbusy)
 			iter = 0;
 		pbusy = nbusy;
 
 		wdog_kern_pat(WD_LASTVAL);
 		sys_sync(curthread, NULL);
 
 #ifdef PREEMPTION
 		/*
 		 * Spin for a while to allow interrupt threads to run.
 		 */
 		DELAY(50000 * iter);
 #else
 		/*
 		 * Context switch several times to allow interrupt
 		 * threads to run.
 		 */
 		for (subiter = 0; subiter < 50 * iter; subiter++) {
 			thread_lock(curthread);
 			mi_switch(SW_VOL, NULL);
 			thread_unlock(curthread);
 			DELAY(1000);
 		}
 #endif
 	}
 	printf("\n");
 	/*
 	 * Count only busy local buffers to prevent forcing 
 	 * a fsck if we're just a client of a wedged NFS server
 	 */
 	nbusy = 0;
 	for (bp = &buf[nbuf]; --bp >= buf; ) {
 		if (isbufbusy(bp)) {
 #if 0
 /* XXX: This is bogus.  We should probably have a BO_REMOTE flag instead */
 			if (bp->b_dev == NULL) {
 				TAILQ_REMOVE(&mountlist,
 				    bp->b_vp->v_mount, mnt_list);
 				continue;
 			}
 #endif
 			nbusy++;
 			if (show_busybufs > 0) {
 				printf(
 	    "%d: buf:%p, vnode:%p, flags:%0x, blkno:%jd, lblkno:%jd, buflock:",
 				    nbusy, bp, bp->b_vp, bp->b_flags,
 				    (intmax_t)bp->b_blkno,
 				    (intmax_t)bp->b_lblkno);
 				BUF_LOCKPRINTINFO(bp);
 				if (show_busybufs > 1)
 					vn_printf(bp->b_vp,
 					    "vnode content: ");
 			}
 		}
 	}
 	if (nbusy) {
 		/*
 		 * Failed to sync all blocks. Indicate this and don't
 		 * unmount filesystems (thus forcing an fsck on reboot).
 		 */
 		printf("Giving up on %d buffers\n", nbusy);
 		DELAY(5000000);	/* 5 seconds */
 	} else {
 		if (!first_buf_printf)
 			printf("Final sync complete\n");
 		/*
 		 * Unmount filesystems
 		 */
 		if (panicstr == NULL)
 			vfs_unmountall();
 	}
 	swapoff_all();
 	DELAY(100000);		/* wait for console output to finish */
 }
 
 static void
 bpmap_qenter(struct buf *bp)
 {
 
 	BUF_CHECK_MAPPED(bp);
 
 	/*
 	 * bp->b_data is relative to bp->b_offset, but
 	 * bp->b_offset may be offset into the first page.
 	 */
 	bp->b_data = (caddr_t)trunc_page((vm_offset_t)bp->b_data);
 	pmap_qenter((vm_offset_t)bp->b_data, bp->b_pages, bp->b_npages);
 	bp->b_data = (caddr_t)((vm_offset_t)bp->b_data |
 	    (vm_offset_t)(bp->b_offset & PAGE_MASK));
 }
 
 static inline struct bufdomain *
 bufdomain(struct buf *bp)
 {
 
 	return (&bdomain[bp->b_domain]);
 }
 
 static struct bufqueue *
 bufqueue(struct buf *bp)
 {
 
 	switch (bp->b_qindex) {
 	case QUEUE_NONE:
 		/* FALLTHROUGH */
 	case QUEUE_SENTINEL:
 		return (NULL);
 	case QUEUE_EMPTY:
 		return (&bqempty);
 	case QUEUE_DIRTY:
 		return (&bufdomain(bp)->bd_dirtyq);
 	case QUEUE_CLEAN:
 		return (&bufdomain(bp)->bd_subq[bp->b_subqueue]);
 	default:
 		break;
 	}
 	panic("bufqueue(%p): Unhandled type %d\n", bp, bp->b_qindex);
 }
 
 /*
  * Return the locked bufqueue that bp is a member of.
  */
 static struct bufqueue *
 bufqueue_acquire(struct buf *bp)
 {
 	struct bufqueue *bq, *nbq;
 
 	/*
 	 * bp can be pushed from a per-cpu queue to the
 	 * cleanq while we're waiting on the lock.  Retry
 	 * if the queues don't match.
 	 */
 	bq = bufqueue(bp);
 	BQ_LOCK(bq);
 	for (;;) {
 		nbq = bufqueue(bp);
 		if (bq == nbq)
 			break;
 		BQ_UNLOCK(bq);
 		BQ_LOCK(nbq);
 		bq = nbq;
 	}
 	return (bq);
 }
 
 /*
  *	binsfree:
  *
  *	Insert the buffer into the appropriate free list.  Requires a
  *	locked buffer on entry and buffer is unlocked before return.
  */
 static void
 binsfree(struct buf *bp, int qindex)
 {
 	struct bufdomain *bd;
 	struct bufqueue *bq;
 
 	KASSERT(qindex == QUEUE_CLEAN || qindex == QUEUE_DIRTY,
 	    ("binsfree: Invalid qindex %d", qindex));
 	BUF_ASSERT_XLOCKED(bp);
 
 	/*
 	 * Handle delayed bremfree() processing.
 	 */
 	if (bp->b_flags & B_REMFREE) {
 		if (bp->b_qindex == qindex) {
 			bp->b_flags |= B_REUSE;
 			bp->b_flags &= ~B_REMFREE;
 			BUF_UNLOCK(bp);
 			return;
 		}
 		bq = bufqueue_acquire(bp);
 		bq_remove(bq, bp);
 		BQ_UNLOCK(bq);
 	}
 	bd = bufdomain(bp);
 	if (qindex == QUEUE_CLEAN) {
 		if (bd->bd_lim != 0)
 			bq = &bd->bd_subq[PCPU_GET(cpuid)];
 		else
 			bq = bd->bd_cleanq;
 	} else
 		bq = &bd->bd_dirtyq;
 	bq_insert(bq, bp, true);
 }
 
 /*
  * buf_free:
  *
  *	Free a buffer to the buf zone once it no longer has valid contents.
  */
 static void
 buf_free(struct buf *bp)
 {
 
 	if (bp->b_flags & B_REMFREE)
 		bremfreef(bp);
 	if (bp->b_vflags & BV_BKGRDINPROG)
 		panic("losing buffer 1");
 	if (bp->b_rcred != NOCRED) {
 		crfree(bp->b_rcred);
 		bp->b_rcred = NOCRED;
 	}
 	if (bp->b_wcred != NOCRED) {
 		crfree(bp->b_wcred);
 		bp->b_wcred = NOCRED;
 	}
 	if (!LIST_EMPTY(&bp->b_dep))
 		buf_deallocate(bp);
 	bufkva_free(bp);
 	atomic_add_int(&bufdomain(bp)->bd_freebuffers, 1);
 	BUF_UNLOCK(bp);
 	uma_zfree(buf_zone, bp);
 }
 
 /*
  * buf_import:
  *
  *	Import bufs into the uma cache from the buf list.  The system still
  *	expects a static array of bufs and much of the synchronization
  *	around bufs assumes type stable storage.  As a result, UMA is used
  *	only as a per-cpu cache of bufs still maintained on a global list.
  */
 static int
 buf_import(void *arg, void **store, int cnt, int domain, int flags)
 {
 	struct buf *bp;
 	int i;
 
 	BQ_LOCK(&bqempty);
 	for (i = 0; i < cnt; i++) {
 		bp = TAILQ_FIRST(&bqempty.bq_queue);
 		if (bp == NULL)
 			break;
 		bq_remove(&bqempty, bp);
 		store[i] = bp;
 	}
 	BQ_UNLOCK(&bqempty);
 
 	return (i);
 }
 
 /*
  * buf_release:
  *
  *	Release bufs from the uma cache back to the buffer queues.
  */
 static void
 buf_release(void *arg, void **store, int cnt)
 {
 	struct bufqueue *bq;
 	struct buf *bp;
         int i;
 
 	bq = &bqempty;
 	BQ_LOCK(bq);
         for (i = 0; i < cnt; i++) {
 		bp = store[i];
 		/* Inline bq_insert() to batch locking. */
 		TAILQ_INSERT_TAIL(&bq->bq_queue, bp, b_freelist);
 		bp->b_flags &= ~(B_AGE | B_REUSE);
 		bq->bq_len++;
 		bp->b_qindex = bq->bq_index;
 	}
 	BQ_UNLOCK(bq);
 }
 
 /*
  * buf_alloc:
  *
  *	Allocate an empty buffer header.
  */
 static struct buf *
 buf_alloc(struct bufdomain *bd)
 {
 	struct buf *bp;
 	int freebufs;
 
 	/*
 	 * We can only run out of bufs in the buf zone if the average buf
 	 * is less than BKVASIZE.  In this case the actual wait/block will
 	 * come from buf_reycle() failing to flush one of these small bufs.
 	 */
 	bp = NULL;
 	freebufs = atomic_fetchadd_int(&bd->bd_freebuffers, -1);
 	if (freebufs > 0)
 		bp = uma_zalloc(buf_zone, M_NOWAIT);
 	if (bp == NULL) {
 		atomic_add_int(&bd->bd_freebuffers, 1);
 		bufspace_daemon_wakeup(bd);
 		counter_u64_add(numbufallocfails, 1);
 		return (NULL);
 	}
 	/*
 	 * Wake-up the bufspace daemon on transition below threshold.
 	 */
 	if (freebufs == bd->bd_lofreebuffers)
 		bufspace_daemon_wakeup(bd);
 
 	if (BUF_LOCK(bp, LK_EXCLUSIVE | LK_NOWAIT, NULL) != 0)
 		panic("getnewbuf_empty: Locked buf %p on free queue.", bp);
 	
 	KASSERT(bp->b_vp == NULL,
 	    ("bp: %p still has vnode %p.", bp, bp->b_vp));
 	KASSERT((bp->b_flags & (B_DELWRI | B_NOREUSE)) == 0,
 	    ("invalid buffer %p flags %#x", bp, bp->b_flags));
 	KASSERT((bp->b_xflags & (BX_VNCLEAN|BX_VNDIRTY)) == 0,
 	    ("bp: %p still on a buffer list. xflags %X", bp, bp->b_xflags));
 	KASSERT(bp->b_npages == 0,
 	    ("bp: %p still has %d vm pages\n", bp, bp->b_npages));
 	KASSERT(bp->b_kvasize == 0, ("bp: %p still has kva\n", bp));
 	KASSERT(bp->b_bufsize == 0, ("bp: %p still has bufspace\n", bp));
 
 	bp->b_domain = BD_DOMAIN(bd);
 	bp->b_flags = 0;
 	bp->b_ioflags = 0;
 	bp->b_xflags = 0;
 	bp->b_vflags = 0;
 	bp->b_vp = NULL;
 	bp->b_blkno = bp->b_lblkno = 0;
 	bp->b_offset = NOOFFSET;
 	bp->b_iodone = 0;
 	bp->b_error = 0;
 	bp->b_resid = 0;
 	bp->b_bcount = 0;
 	bp->b_npages = 0;
 	bp->b_dirtyoff = bp->b_dirtyend = 0;
 	bp->b_bufobj = NULL;
 	bp->b_data = bp->b_kvabase = unmapped_buf;
 	bp->b_fsprivate1 = NULL;
 	bp->b_fsprivate2 = NULL;
 	bp->b_fsprivate3 = NULL;
 	LIST_INIT(&bp->b_dep);
 
 	return (bp);
 }
 
 /*
  *	buf_recycle:
  *
  *	Free a buffer from the given bufqueue.  kva controls whether the
  *	freed buf must own some kva resources.  This is used for
  *	defragmenting.
  */
 static int
 buf_recycle(struct bufdomain *bd, bool kva)
 {
 	struct bufqueue *bq;
 	struct buf *bp, *nbp;
 
 	if (kva)
 		counter_u64_add(bufdefragcnt, 1);
 	nbp = NULL;
 	bq = bd->bd_cleanq;
 	BQ_LOCK(bq);
 	KASSERT(BQ_LOCKPTR(bq) == BD_LOCKPTR(bd),
 	    ("buf_recycle: Locks don't match"));
 	nbp = TAILQ_FIRST(&bq->bq_queue);
 
 	/*
 	 * Run scan, possibly freeing data and/or kva mappings on the fly
 	 * depending.
 	 */
 	while ((bp = nbp) != NULL) {
 		/*
 		 * Calculate next bp (we can only use it if we do not
 		 * release the bqlock).
 		 */
 		nbp = TAILQ_NEXT(bp, b_freelist);
 
 		/*
 		 * If we are defragging then we need a buffer with 
 		 * some kva to reclaim.
 		 */
 		if (kva && bp->b_kvasize == 0)
 			continue;
 
 		if (BUF_LOCK(bp, LK_EXCLUSIVE | LK_NOWAIT, NULL) != 0)
 			continue;
 
 		/*
 		 * Implement a second chance algorithm for frequently
 		 * accessed buffers.
 		 */
 		if ((bp->b_flags & B_REUSE) != 0) {
 			TAILQ_REMOVE(&bq->bq_queue, bp, b_freelist);
 			TAILQ_INSERT_TAIL(&bq->bq_queue, bp, b_freelist);
 			bp->b_flags &= ~B_REUSE;
 			BUF_UNLOCK(bp);
 			continue;
 		}
 
 		/*
 		 * Skip buffers with background writes in progress.
 		 */
 		if ((bp->b_vflags & BV_BKGRDINPROG) != 0) {
 			BUF_UNLOCK(bp);
 			continue;
 		}
 
 		KASSERT(bp->b_qindex == QUEUE_CLEAN,
 		    ("buf_recycle: inconsistent queue %d bp %p",
 		    bp->b_qindex, bp));
 		KASSERT(bp->b_domain == BD_DOMAIN(bd),
 		    ("getnewbuf: queue domain %d doesn't match request %d",
 		    bp->b_domain, (int)BD_DOMAIN(bd)));
 		/*
 		 * NOTE:  nbp is now entirely invalid.  We can only restart
 		 * the scan from this point on.
 		 */
 		bq_remove(bq, bp);
 		BQ_UNLOCK(bq);
 
 		/*
 		 * Requeue the background write buffer with error and
 		 * restart the scan.
 		 */
 		if ((bp->b_vflags & BV_BKGRDERR) != 0) {
 			bqrelse(bp);
 			BQ_LOCK(bq);
 			nbp = TAILQ_FIRST(&bq->bq_queue);
 			continue;
 		}
 		bp->b_flags |= B_INVAL;
 		brelse(bp);
 		return (0);
 	}
 	bd->bd_wanted = 1;
 	BQ_UNLOCK(bq);
 
 	return (ENOBUFS);
 }
 
 /*
  *	bremfree:
  *
  *	Mark the buffer for removal from the appropriate free list.
  *	
  */
 void
 bremfree(struct buf *bp)
 {
 
 	CTR3(KTR_BUF, "bremfree(%p) vp %p flags %X", bp, bp->b_vp, bp->b_flags);
 	KASSERT((bp->b_flags & B_REMFREE) == 0,
 	    ("bremfree: buffer %p already marked for delayed removal.", bp));
 	KASSERT(bp->b_qindex != QUEUE_NONE,
 	    ("bremfree: buffer %p not on a queue.", bp));
 	BUF_ASSERT_XLOCKED(bp);
 
 	bp->b_flags |= B_REMFREE;
 }
 
 /*
  *	bremfreef:
  *
  *	Force an immediate removal from a free list.  Used only in nfs when
  *	it abuses the b_freelist pointer.
  */
 void
 bremfreef(struct buf *bp)
 {
 	struct bufqueue *bq;
 
 	bq = bufqueue_acquire(bp);
 	bq_remove(bq, bp);
 	BQ_UNLOCK(bq);
 }
 
 static void
 bq_init(struct bufqueue *bq, int qindex, int subqueue, const char *lockname)
 {
 
 	mtx_init(&bq->bq_lock, lockname, NULL, MTX_DEF);
 	TAILQ_INIT(&bq->bq_queue);
 	bq->bq_len = 0;
 	bq->bq_index = qindex;
 	bq->bq_subqueue = subqueue;
 }
 
 static void
 bd_init(struct bufdomain *bd)
 {
 	int i;
 
 	bd->bd_cleanq = &bd->bd_subq[mp_maxid + 1];
 	bq_init(bd->bd_cleanq, QUEUE_CLEAN, mp_maxid + 1, "bufq clean lock");
 	bq_init(&bd->bd_dirtyq, QUEUE_DIRTY, -1, "bufq dirty lock");
 	for (i = 0; i <= mp_maxid; i++)
 		bq_init(&bd->bd_subq[i], QUEUE_CLEAN, i,
 		    "bufq clean subqueue lock");
 	mtx_init(&bd->bd_run_lock, "bufspace daemon run lock", NULL, MTX_DEF);
 }
 
 /*
  *	bq_remove:
  *
  *	Removes a buffer from the free list, must be called with the
  *	correct qlock held.
  */
 static void
 bq_remove(struct bufqueue *bq, struct buf *bp)
 {
 
 	CTR3(KTR_BUF, "bq_remove(%p) vp %p flags %X",
 	    bp, bp->b_vp, bp->b_flags);
 	KASSERT(bp->b_qindex != QUEUE_NONE,
 	    ("bq_remove: buffer %p not on a queue.", bp));
 	KASSERT(bufqueue(bp) == bq,
 	    ("bq_remove: Remove buffer %p from wrong queue.", bp));
 
 	BQ_ASSERT_LOCKED(bq);
 	if (bp->b_qindex != QUEUE_EMPTY) {
 		BUF_ASSERT_XLOCKED(bp);
 	}
 	KASSERT(bq->bq_len >= 1,
 	    ("queue %d underflow", bp->b_qindex));
 	TAILQ_REMOVE(&bq->bq_queue, bp, b_freelist);
 	bq->bq_len--;
 	bp->b_qindex = QUEUE_NONE;
 	bp->b_flags &= ~(B_REMFREE | B_REUSE);
 }
 
 static void
 bd_flush(struct bufdomain *bd, struct bufqueue *bq)
 {
 	struct buf *bp;
 
 	BQ_ASSERT_LOCKED(bq);
 	if (bq != bd->bd_cleanq) {
 		BD_LOCK(bd);
 		while ((bp = TAILQ_FIRST(&bq->bq_queue)) != NULL) {
 			TAILQ_REMOVE(&bq->bq_queue, bp, b_freelist);
 			TAILQ_INSERT_TAIL(&bd->bd_cleanq->bq_queue, bp,
 			    b_freelist);
 			bp->b_subqueue = bd->bd_cleanq->bq_subqueue;
 		}
 		bd->bd_cleanq->bq_len += bq->bq_len;
 		bq->bq_len = 0;
 	}
 	if (bd->bd_wanted) {
 		bd->bd_wanted = 0;
 		wakeup(&bd->bd_wanted);
 	}
 	if (bq != bd->bd_cleanq)
 		BD_UNLOCK(bd);
 }
 
 static int
 bd_flushall(struct bufdomain *bd)
 {
 	struct bufqueue *bq;
 	int flushed;
 	int i;
 
 	if (bd->bd_lim == 0)
 		return (0);
 	flushed = 0;
 	for (i = 0; i <= mp_maxid; i++) {
 		bq = &bd->bd_subq[i];
 		if (bq->bq_len == 0)
 			continue;
 		BQ_LOCK(bq);
 		bd_flush(bd, bq);
 		BQ_UNLOCK(bq);
 		flushed++;
 	}
 
 	return (flushed);
 }
 
 static void
 bq_insert(struct bufqueue *bq, struct buf *bp, bool unlock)
 {
 	struct bufdomain *bd;
 
 	if (bp->b_qindex != QUEUE_NONE)
 		panic("bq_insert: free buffer %p onto another queue?", bp);
 
 	bd = bufdomain(bp);
 	if (bp->b_flags & B_AGE) {
 		/* Place this buf directly on the real queue. */
 		if (bq->bq_index == QUEUE_CLEAN)
 			bq = bd->bd_cleanq;
 		BQ_LOCK(bq);
 		TAILQ_INSERT_HEAD(&bq->bq_queue, bp, b_freelist);
 	} else {
 		BQ_LOCK(bq);
 		TAILQ_INSERT_TAIL(&bq->bq_queue, bp, b_freelist);
 	}
 	bp->b_flags &= ~(B_AGE | B_REUSE);
 	bq->bq_len++;
 	bp->b_qindex = bq->bq_index;
 	bp->b_subqueue = bq->bq_subqueue;
 
 	/*
 	 * Unlock before we notify so that we don't wakeup a waiter that
 	 * fails a trylock on the buf and sleeps again.
 	 */
 	if (unlock)
 		BUF_UNLOCK(bp);
 
 	if (bp->b_qindex == QUEUE_CLEAN) {
 		/*
 		 * Flush the per-cpu queue and notify any waiters.
 		 */
 		if (bd->bd_wanted || (bq != bd->bd_cleanq &&
 		    bq->bq_len >= bd->bd_lim))
 			bd_flush(bd, bq);
 	}
 	BQ_UNLOCK(bq);
 }
 
 /*
  *	bufkva_free:
  *
  *	Free the kva allocation for a buffer.
  *
  */
 static void
 bufkva_free(struct buf *bp)
 {
 
 #ifdef INVARIANTS
 	if (bp->b_kvasize == 0) {
 		KASSERT(bp->b_kvabase == unmapped_buf &&
 		    bp->b_data == unmapped_buf,
 		    ("Leaked KVA space on %p", bp));
 	} else if (buf_mapped(bp))
 		BUF_CHECK_MAPPED(bp);
 	else
 		BUF_CHECK_UNMAPPED(bp);
 #endif
 	if (bp->b_kvasize == 0)
 		return;
 
 	vmem_free(buffer_arena, (vm_offset_t)bp->b_kvabase, bp->b_kvasize);
 	counter_u64_add(bufkvaspace, -bp->b_kvasize);
 	counter_u64_add(buffreekvacnt, 1);
 	bp->b_data = bp->b_kvabase = unmapped_buf;
 	bp->b_kvasize = 0;
 }
 
 /*
  *	bufkva_alloc:
  *
  *	Allocate the buffer KVA and set b_kvasize and b_kvabase.
  */
 static int
 bufkva_alloc(struct buf *bp, int maxsize, int gbflags)
 {
 	vm_offset_t addr;
 	int error;
 
 	KASSERT((gbflags & GB_UNMAPPED) == 0 || (gbflags & GB_KVAALLOC) != 0,
 	    ("Invalid gbflags 0x%x in %s", gbflags, __func__));
 
 	bufkva_free(bp);
 
 	addr = 0;
 	error = vmem_alloc(buffer_arena, maxsize, M_BESTFIT | M_NOWAIT, &addr);
 	if (error != 0) {
 		/*
 		 * Buffer map is too fragmented.  Request the caller
 		 * to defragment the map.
 		 */
 		return (error);
 	}
 	bp->b_kvabase = (caddr_t)addr;
 	bp->b_kvasize = maxsize;
 	counter_u64_add(bufkvaspace, bp->b_kvasize);
 	if ((gbflags & GB_UNMAPPED) != 0) {
 		bp->b_data = unmapped_buf;
 		BUF_CHECK_UNMAPPED(bp);
 	} else {
 		bp->b_data = bp->b_kvabase;
 		BUF_CHECK_MAPPED(bp);
 	}
 	return (0);
 }
 
 /*
  *	bufkva_reclaim:
  *
  *	Reclaim buffer kva by freeing buffers holding kva.  This is a vmem
  *	callback that fires to avoid returning failure.
  */
 static void
 bufkva_reclaim(vmem_t *vmem, int flags)
 {
 	bool done;
 	int q;
 	int i;
 
 	done = false;
 	for (i = 0; i < 5; i++) {
 		for (q = 0; q < buf_domains; q++)
 			if (buf_recycle(&bdomain[q], true) != 0)
 				done = true;
 		if (done)
 			break;
 	}
 	return;
 }
 
 /*
  * Attempt to initiate asynchronous I/O on read-ahead blocks.  We must
  * clear BIO_ERROR and B_INVAL prior to initiating I/O . If B_CACHE is set,
  * the buffer is valid and we do not have to do anything.
  */
 static void
 breada(struct vnode * vp, daddr_t * rablkno, int * rabsize, int cnt,
     struct ucred * cred, int flags, void (*ckhashfunc)(struct buf *))
 {
 	struct buf *rabp;
+	struct thread *td;
 	int i;
 
+	td = curthread;
+
 	for (i = 0; i < cnt; i++, rablkno++, rabsize++) {
 		if (inmem(vp, *rablkno))
 			continue;
 		rabp = getblk(vp, *rablkno, *rabsize, 0, 0, 0);
 		if ((rabp->b_flags & B_CACHE) != 0) {
 			brelse(rabp);
 			continue;
 		}
-		if (!TD_IS_IDLETHREAD(curthread)) {
 #ifdef RACCT
-			if (racct_enable) {
-				PROC_LOCK(curproc);
-				racct_add_buf(curproc, rabp, 0);
-				PROC_UNLOCK(curproc);
-			}
-#endif /* RACCT */
-			curthread->td_ru.ru_inblock++;
+		if (racct_enable) {
+			PROC_LOCK(curproc);
+			racct_add_buf(curproc, rabp, 0);
+			PROC_UNLOCK(curproc);
 		}
+#endif /* RACCT */
+		td->td_ru.ru_inblock++;
 		rabp->b_flags |= B_ASYNC;
 		rabp->b_flags &= ~B_INVAL;
 		if ((flags & GB_CKHASH) != 0) {
 			rabp->b_flags |= B_CKHASH;
 			rabp->b_ckhashcalc = ckhashfunc;
 		}
 		rabp->b_ioflags &= ~BIO_ERROR;
 		rabp->b_iocmd = BIO_READ;
 		if (rabp->b_rcred == NOCRED && cred != NOCRED)
 			rabp->b_rcred = crhold(cred);
 		vfs_busy_pages(rabp, 0);
 		BUF_KERNPROC(rabp);
 		rabp->b_iooffset = dbtob(rabp->b_blkno);
 		bstrategy(rabp);
 	}
 }
 
 /*
  * Entry point for bread() and breadn() via #defines in sys/buf.h.
  *
  * Get a buffer with the specified data.  Look in the cache first.  We
  * must clear BIO_ERROR and B_INVAL prior to initiating I/O.  If B_CACHE
  * is set, the buffer is valid and we do not have to do anything, see
  * getblk(). Also starts asynchronous I/O on read-ahead blocks.
  *
  * Always return a NULL buffer pointer (in bpp) when returning an error.
  */
 int
 breadn_flags(struct vnode *vp, daddr_t blkno, int size, daddr_t *rablkno,
     int *rabsize, int cnt, struct ucred *cred, int flags,
     void (*ckhashfunc)(struct buf *), struct buf **bpp)
 {
 	struct buf *bp;
 	struct thread *td;
 	int error, readwait, rv;
 
 	CTR3(KTR_BUF, "breadn(%p, %jd, %d)", vp, blkno, size);
 	td = curthread;
 	/*
 	 * Can only return NULL if GB_LOCK_NOWAIT or GB_SPARSE flags
 	 * are specified.
 	 */
 	error = getblkx(vp, blkno, size, 0, 0, flags, &bp);
 	if (error != 0) {
 		*bpp = NULL;
 		return (error);
 	}
 	flags &= ~GB_NOSPARSE;
 	*bpp = bp;
 
 	/*
 	 * If not found in cache, do some I/O
 	 */
 	readwait = 0;
 	if ((bp->b_flags & B_CACHE) == 0) {
-		if (!TD_IS_IDLETHREAD(td)) {
 #ifdef RACCT
-			if (racct_enable) {
-				PROC_LOCK(td->td_proc);
-				racct_add_buf(td->td_proc, bp, 0);
-				PROC_UNLOCK(td->td_proc);
-			}
-#endif /* RACCT */
-			td->td_ru.ru_inblock++;
+		if (racct_enable) {
+			PROC_LOCK(td->td_proc);
+			racct_add_buf(td->td_proc, bp, 0);
+			PROC_UNLOCK(td->td_proc);
 		}
+#endif /* RACCT */
+		td->td_ru.ru_inblock++;
 		bp->b_iocmd = BIO_READ;
 		bp->b_flags &= ~B_INVAL;
 		if ((flags & GB_CKHASH) != 0) {
 			bp->b_flags |= B_CKHASH;
 			bp->b_ckhashcalc = ckhashfunc;
 		}
 		bp->b_ioflags &= ~BIO_ERROR;
 		if (bp->b_rcred == NOCRED && cred != NOCRED)
 			bp->b_rcred = crhold(cred);
 		vfs_busy_pages(bp, 0);
 		bp->b_iooffset = dbtob(bp->b_blkno);
 		bstrategy(bp);
 		++readwait;
 	}
 
 	/*
 	 * Attempt to initiate asynchronous I/O on read-ahead blocks.
 	 */
 	breada(vp, rablkno, rabsize, cnt, cred, flags, ckhashfunc);
 
 	rv = 0;
 	if (readwait) {
 		rv = bufwait(bp);
 		if (rv != 0) {
 			brelse(bp);
 			*bpp = NULL;
 		}
 	}
 	return (rv);
 }
 
 /*
  * Write, release buffer on completion.  (Done by iodone
  * if async).  Do not bother writing anything if the buffer
  * is invalid.
  *
  * Note that we set B_CACHE here, indicating that buffer is
  * fully valid and thus cacheable.  This is true even of NFS
  * now so we set it generally.  This could be set either here 
  * or in biodone() since the I/O is synchronous.  We put it
  * here.
  */
 int
 bufwrite(struct buf *bp)
 {
 	int oldflags;
 	struct vnode *vp;
 	long space;
 	int vp_md;
 
 	CTR3(KTR_BUF, "bufwrite(%p) vp %p flags %X", bp, bp->b_vp, bp->b_flags);
 	if ((bp->b_bufobj->bo_flag & BO_DEAD) != 0) {
 		bp->b_flags |= B_INVAL | B_RELBUF;
 		bp->b_flags &= ~B_CACHE;
 		brelse(bp);
 		return (ENXIO);
 	}
 	if (bp->b_flags & B_INVAL) {
 		brelse(bp);
 		return (0);
 	}
 
 	if (bp->b_flags & B_BARRIER)
 		atomic_add_long(&barrierwrites, 1);
 
 	oldflags = bp->b_flags;
 
 	BUF_ASSERT_HELD(bp);
 
 	KASSERT(!(bp->b_vflags & BV_BKGRDINPROG),
 	    ("FFS background buffer should not get here %p", bp));
 
 	vp = bp->b_vp;
 	if (vp)
 		vp_md = vp->v_vflag & VV_MD;
 	else
 		vp_md = 0;
 
 	/*
 	 * Mark the buffer clean.  Increment the bufobj write count
 	 * before bundirty() call, to prevent other thread from seeing
 	 * empty dirty list and zero counter for writes in progress,
 	 * falsely indicating that the bufobj is clean.
 	 */
 	bufobj_wref(bp->b_bufobj);
 	bundirty(bp);
 
 	bp->b_flags &= ~B_DONE;
 	bp->b_ioflags &= ~BIO_ERROR;
 	bp->b_flags |= B_CACHE;
 	bp->b_iocmd = BIO_WRITE;
 
 	vfs_busy_pages(bp, 1);
 
 	/*
 	 * Normal bwrites pipeline writes
 	 */
 	bp->b_runningbufspace = bp->b_bufsize;
 	space = atomic_fetchadd_long(&runningbufspace, bp->b_runningbufspace);
 
-	if (!TD_IS_IDLETHREAD(curthread)) {
 #ifdef RACCT
-		if (racct_enable) {
-			PROC_LOCK(curproc);
-			racct_add_buf(curproc, bp, 1);
-			PROC_UNLOCK(curproc);
-		}
-#endif /* RACCT */
-		curthread->td_ru.ru_oublock++;
+	if (racct_enable) {
+		PROC_LOCK(curproc);
+		racct_add_buf(curproc, bp, 1);
+		PROC_UNLOCK(curproc);
 	}
+#endif /* RACCT */
+	curthread->td_ru.ru_oublock++;
 	if (oldflags & B_ASYNC)
 		BUF_KERNPROC(bp);
 	bp->b_iooffset = dbtob(bp->b_blkno);
 	buf_track(bp, __func__);
 	bstrategy(bp);
 
 	if ((oldflags & B_ASYNC) == 0) {
 		int rtval = bufwait(bp);
 		brelse(bp);
 		return (rtval);
 	} else if (space > hirunningspace) {
 		/*
 		 * don't allow the async write to saturate the I/O
 		 * system.  We will not deadlock here because
 		 * we are blocking waiting for I/O that is already in-progress
 		 * to complete. We do not block here if it is the update
 		 * or syncer daemon trying to clean up as that can lead
 		 * to deadlock.
 		 */
 		if ((curthread->td_pflags & TDP_NORUNNINGBUF) == 0 && !vp_md)
 			waitrunningbufspace();
 	}
 
 	return (0);
 }
 
 void
 bufbdflush(struct bufobj *bo, struct buf *bp)
 {
 	struct buf *nbp;
 
 	if (bo->bo_dirty.bv_cnt > dirtybufthresh + 10) {
 		(void) VOP_FSYNC(bp->b_vp, MNT_NOWAIT, curthread);
 		altbufferflushes++;
 	} else if (bo->bo_dirty.bv_cnt > dirtybufthresh) {
 		BO_LOCK(bo);
 		/*
 		 * Try to find a buffer to flush.
 		 */
 		TAILQ_FOREACH(nbp, &bo->bo_dirty.bv_hd, b_bobufs) {
 			if ((nbp->b_vflags & BV_BKGRDINPROG) ||
 			    BUF_LOCK(nbp,
 				     LK_EXCLUSIVE | LK_NOWAIT, NULL))
 				continue;
 			if (bp == nbp)
 				panic("bdwrite: found ourselves");
 			BO_UNLOCK(bo);
 			/* Don't countdeps with the bo lock held. */
 			if (buf_countdeps(nbp, 0)) {
 				BO_LOCK(bo);
 				BUF_UNLOCK(nbp);
 				continue;
 			}
 			if (nbp->b_flags & B_CLUSTEROK) {
 				vfs_bio_awrite(nbp);
 			} else {
 				bremfree(nbp);
 				bawrite(nbp);
 			}
 			dirtybufferflushes++;
 			break;
 		}
 		if (nbp == NULL)
 			BO_UNLOCK(bo);
 	}
 }
 
 /*
  * Delayed write. (Buffer is marked dirty).  Do not bother writing
  * anything if the buffer is marked invalid.
  *
  * Note that since the buffer must be completely valid, we can safely
  * set B_CACHE.  In fact, we have to set B_CACHE here rather then in
  * biodone() in order to prevent getblk from writing the buffer
  * out synchronously.
  */
 void
 bdwrite(struct buf *bp)
 {
 	struct thread *td = curthread;
 	struct vnode *vp;
 	struct bufobj *bo;
 
 	CTR3(KTR_BUF, "bdwrite(%p) vp %p flags %X", bp, bp->b_vp, bp->b_flags);
 	KASSERT(bp->b_bufobj != NULL, ("No b_bufobj %p", bp));
 	KASSERT((bp->b_flags & B_BARRIER) == 0,
 	    ("Barrier request in delayed write %p", bp));
 	BUF_ASSERT_HELD(bp);
 
 	if (bp->b_flags & B_INVAL) {
 		brelse(bp);
 		return;
 	}
 
 	/*
 	 * If we have too many dirty buffers, don't create any more.
 	 * If we are wildly over our limit, then force a complete
 	 * cleanup. Otherwise, just keep the situation from getting
 	 * out of control. Note that we have to avoid a recursive
 	 * disaster and not try to clean up after our own cleanup!
 	 */
 	vp = bp->b_vp;
 	bo = bp->b_bufobj;
 	if ((td->td_pflags & (TDP_COWINPROGRESS|TDP_INBDFLUSH)) == 0) {
 		td->td_pflags |= TDP_INBDFLUSH;
 		BO_BDFLUSH(bo, bp);
 		td->td_pflags &= ~TDP_INBDFLUSH;
 	} else
 		recursiveflushes++;
 
 	bdirty(bp);
 	/*
 	 * Set B_CACHE, indicating that the buffer is fully valid.  This is
 	 * true even of NFS now.
 	 */
 	bp->b_flags |= B_CACHE;
 
 	/*
 	 * This bmap keeps the system from needing to do the bmap later,
 	 * perhaps when the system is attempting to do a sync.  Since it
 	 * is likely that the indirect block -- or whatever other datastructure
 	 * that the filesystem needs is still in memory now, it is a good
 	 * thing to do this.  Note also, that if the pageout daemon is
 	 * requesting a sync -- there might not be enough memory to do
 	 * the bmap then...  So, this is important to do.
 	 */
 	if (vp->v_type != VCHR && bp->b_lblkno == bp->b_blkno) {
 		VOP_BMAP(vp, bp->b_lblkno, NULL, &bp->b_blkno, NULL, NULL);
 	}
 
 	buf_track(bp, __func__);
 
 	/*
 	 * Set the *dirty* buffer range based upon the VM system dirty
 	 * pages.
 	 *
 	 * Mark the buffer pages as clean.  We need to do this here to
 	 * satisfy the vnode_pager and the pageout daemon, so that it
 	 * thinks that the pages have been "cleaned".  Note that since
 	 * the pages are in a delayed write buffer -- the VFS layer
 	 * "will" see that the pages get written out on the next sync,
 	 * or perhaps the cluster will be completed.
 	 */
 	vfs_clean_pages_dirty_buf(bp);
 	bqrelse(bp);
 
 	/*
 	 * note: we cannot initiate I/O from a bdwrite even if we wanted to,
 	 * due to the softdep code.
 	 */
 }
 
 /*
  *	bdirty:
  *
  *	Turn buffer into delayed write request.  We must clear BIO_READ and
  *	B_RELBUF, and we must set B_DELWRI.  We reassign the buffer to 
  *	itself to properly update it in the dirty/clean lists.  We mark it
  *	B_DONE to ensure that any asynchronization of the buffer properly
  *	clears B_DONE ( else a panic will occur later ).  
  *
  *	bdirty() is kinda like bdwrite() - we have to clear B_INVAL which
  *	might have been set pre-getblk().  Unlike bwrite/bdwrite, bdirty()
  *	should only be called if the buffer is known-good.
  *
  *	Since the buffer is not on a queue, we do not update the numfreebuffers
  *	count.
  *
  *	The buffer must be on QUEUE_NONE.
  */
 void
 bdirty(struct buf *bp)
 {
 
 	CTR3(KTR_BUF, "bdirty(%p) vp %p flags %X",
 	    bp, bp->b_vp, bp->b_flags);
 	KASSERT(bp->b_bufobj != NULL, ("No b_bufobj %p", bp));
 	KASSERT(bp->b_flags & B_REMFREE || bp->b_qindex == QUEUE_NONE,
 	    ("bdirty: buffer %p still on queue %d", bp, bp->b_qindex));
 	BUF_ASSERT_HELD(bp);
 	bp->b_flags &= ~(B_RELBUF);
 	bp->b_iocmd = BIO_WRITE;
 
 	if ((bp->b_flags & B_DELWRI) == 0) {
 		bp->b_flags |= /* XXX B_DONE | */ B_DELWRI;
 		reassignbuf(bp);
 		bdirtyadd(bp);
 	}
 }
 
 /*
  *	bundirty:
  *
  *	Clear B_DELWRI for buffer.
  *
  *	Since the buffer is not on a queue, we do not update the numfreebuffers
  *	count.
  *	
  *	The buffer must be on QUEUE_NONE.
  */
 
 void
 bundirty(struct buf *bp)
 {
 
 	CTR3(KTR_BUF, "bundirty(%p) vp %p flags %X", bp, bp->b_vp, bp->b_flags);
 	KASSERT(bp->b_bufobj != NULL, ("No b_bufobj %p", bp));
 	KASSERT(bp->b_flags & B_REMFREE || bp->b_qindex == QUEUE_NONE,
 	    ("bundirty: buffer %p still on queue %d", bp, bp->b_qindex));
 	BUF_ASSERT_HELD(bp);
 
 	if (bp->b_flags & B_DELWRI) {
 		bp->b_flags &= ~B_DELWRI;
 		reassignbuf(bp);
 		bdirtysub(bp);
 	}
 	/*
 	 * Since it is now being written, we can clear its deferred write flag.
 	 */
 	bp->b_flags &= ~B_DEFERRED;
 }
 
 /*
  *	bawrite:
  *
  *	Asynchronous write.  Start output on a buffer, but do not wait for
  *	it to complete.  The buffer is released when the output completes.
  *
  *	bwrite() ( or the VOP routine anyway ) is responsible for handling 
  *	B_INVAL buffers.  Not us.
  */
 void
 bawrite(struct buf *bp)
 {
 
 	bp->b_flags |= B_ASYNC;
 	(void) bwrite(bp);
 }
 
 /*
  *	babarrierwrite:
  *
  *	Asynchronous barrier write.  Start output on a buffer, but do not
  *	wait for it to complete.  Place a write barrier after this write so
  *	that this buffer and all buffers written before it are committed to
  *	the disk before any buffers written after this write are committed
  *	to the disk.  The buffer is released when the output completes.
  */
 void
 babarrierwrite(struct buf *bp)
 {
 
 	bp->b_flags |= B_ASYNC | B_BARRIER;
 	(void) bwrite(bp);
 }
 
 /*
  *	bbarrierwrite:
  *
  *	Synchronous barrier write.  Start output on a buffer and wait for
  *	it to complete.  Place a write barrier after this write so that
  *	this buffer and all buffers written before it are committed to 
  *	the disk before any buffers written after this write are committed
  *	to the disk.  The buffer is released when the output completes.
  */
 int
 bbarrierwrite(struct buf *bp)
 {
 
 	bp->b_flags |= B_BARRIER;
 	return (bwrite(bp));
 }
 
 /*
  *	bwillwrite:
  *
  *	Called prior to the locking of any vnodes when we are expecting to
  *	write.  We do not want to starve the buffer cache with too many
  *	dirty buffers so we block here.  By blocking prior to the locking
  *	of any vnodes we attempt to avoid the situation where a locked vnode
  *	prevents the various system daemons from flushing related buffers.
  */
 void
 bwillwrite(void)
 {
 
 	if (buf_dirty_count_severe()) {
 		mtx_lock(&bdirtylock);
 		while (buf_dirty_count_severe()) {
 			bdirtywait = 1;
 			msleep(&bdirtywait, &bdirtylock, (PRIBIO + 4),
 			    "flswai", 0);
 		}
 		mtx_unlock(&bdirtylock);
 	}
 }
 
 /*
  * Return true if we have too many dirty buffers.
  */
 int
 buf_dirty_count_severe(void)
 {
 
 	return (!BIT_EMPTY(BUF_DOMAINS, &bdhidirty));
 }
 
 /*
  *	brelse:
  *
  *	Release a busy buffer and, if requested, free its resources.  The
  *	buffer will be stashed in the appropriate bufqueue[] allowing it
  *	to be accessed later as a cache entity or reused for other purposes.
  */
 void
 brelse(struct buf *bp)
 {
 	struct mount *v_mnt;
 	int qindex;
 
 	/*
 	 * Many functions erroneously call brelse with a NULL bp under rare
 	 * error conditions. Simply return when called with a NULL bp.
 	 */
 	if (bp == NULL)
 		return;
 	CTR3(KTR_BUF, "brelse(%p) vp %p flags %X",
 	    bp, bp->b_vp, bp->b_flags);
 	KASSERT(!(bp->b_flags & (B_CLUSTER|B_PAGING)),
 	    ("brelse: inappropriate B_PAGING or B_CLUSTER bp %p", bp));
 	KASSERT((bp->b_flags & B_VMIO) != 0 || (bp->b_flags & B_NOREUSE) == 0,
 	    ("brelse: non-VMIO buffer marked NOREUSE"));
 
 	if (BUF_LOCKRECURSED(bp)) {
 		/*
 		 * Do not process, in particular, do not handle the
 		 * B_INVAL/B_RELBUF and do not release to free list.
 		 */
 		BUF_UNLOCK(bp);
 		return;
 	}
 
 	if (bp->b_flags & B_MANAGED) {
 		bqrelse(bp);
 		return;
 	}
 
 	if ((bp->b_vflags & (BV_BKGRDINPROG | BV_BKGRDERR)) == BV_BKGRDERR) {
 		BO_LOCK(bp->b_bufobj);
 		bp->b_vflags &= ~BV_BKGRDERR;
 		BO_UNLOCK(bp->b_bufobj);
 		bdirty(bp);
 	}
 	if (bp->b_iocmd == BIO_WRITE && (bp->b_ioflags & BIO_ERROR) &&
 	    (bp->b_error != ENXIO || !LIST_EMPTY(&bp->b_dep)) &&
 	    !(bp->b_flags & B_INVAL)) {
 		/*
 		 * Failed write, redirty.  All errors except ENXIO (which
 		 * means the device is gone) are treated as being
 		 * transient.
 		 *
 		 * XXX Treating EIO as transient is not correct; the
 		 * contract with the local storage device drivers is that
 		 * they will only return EIO once the I/O is no longer
 		 * retriable.  Network I/O also respects this through the
 		 * guarantees of TCP and/or the internal retries of NFS.
 		 * ENOMEM might be transient, but we also have no way of
 		 * knowing when its ok to retry/reschedule.  In general,
 		 * this entire case should be made obsolete through better
 		 * error handling/recovery and resource scheduling.
 		 *
 		 * Do this also for buffers that failed with ENXIO, but have
 		 * non-empty dependencies - the soft updates code might need
 		 * to access the buffer to untangle them.
 		 *
 		 * Must clear BIO_ERROR to prevent pages from being scrapped.
 		 */
 		bp->b_ioflags &= ~BIO_ERROR;
 		bdirty(bp);
 	} else if ((bp->b_flags & (B_NOCACHE | B_INVAL)) ||
 	    (bp->b_ioflags & BIO_ERROR) || (bp->b_bufsize <= 0)) {
 		/*
 		 * Either a failed read I/O, or we were asked to free or not
 		 * cache the buffer, or we failed to write to a device that's
 		 * no longer present.
 		 */
 		bp->b_flags |= B_INVAL;
 		if (!LIST_EMPTY(&bp->b_dep))
 			buf_deallocate(bp);
 		if (bp->b_flags & B_DELWRI)
 			bdirtysub(bp);
 		bp->b_flags &= ~(B_DELWRI | B_CACHE);
 		if ((bp->b_flags & B_VMIO) == 0) {
 			allocbuf(bp, 0);
 			if (bp->b_vp)
 				brelvp(bp);
 		}
 	}
 
 	/*
 	 * We must clear B_RELBUF if B_DELWRI is set.  If vfs_vmio_truncate() 
 	 * is called with B_DELWRI set, the underlying pages may wind up
 	 * getting freed causing a previous write (bdwrite()) to get 'lost'
 	 * because pages associated with a B_DELWRI bp are marked clean.
 	 * 
 	 * We still allow the B_INVAL case to call vfs_vmio_truncate(), even
 	 * if B_DELWRI is set.
 	 */
 	if (bp->b_flags & B_DELWRI)
 		bp->b_flags &= ~B_RELBUF;
 
 	/*
 	 * VMIO buffer rundown.  It is not very necessary to keep a VMIO buffer
 	 * constituted, not even NFS buffers now.  Two flags effect this.  If
 	 * B_INVAL, the struct buf is invalidated but the VM object is kept
 	 * around ( i.e. so it is trivial to reconstitute the buffer later ).
 	 *
 	 * If BIO_ERROR or B_NOCACHE is set, pages in the VM object will be
 	 * invalidated.  BIO_ERROR cannot be set for a failed write unless the
 	 * buffer is also B_INVAL because it hits the re-dirtying code above.
 	 *
 	 * Normally we can do this whether a buffer is B_DELWRI or not.  If
 	 * the buffer is an NFS buffer, it is tracking piecemeal writes or
 	 * the commit state and we cannot afford to lose the buffer. If the
 	 * buffer has a background write in progress, we need to keep it
 	 * around to prevent it from being reconstituted and starting a second
 	 * background write.
 	 */
 
 	v_mnt = bp->b_vp != NULL ? bp->b_vp->v_mount : NULL;
 
 	if ((bp->b_flags & B_VMIO) && (bp->b_flags & B_NOCACHE ||
 	    (bp->b_ioflags & BIO_ERROR && bp->b_iocmd == BIO_READ)) &&
 	    (v_mnt == NULL || (v_mnt->mnt_vfc->vfc_flags & VFCF_NETWORK) == 0 ||
 	    vn_isdisk(bp->b_vp, NULL) || (bp->b_flags & B_DELWRI) == 0)) {
 		vfs_vmio_invalidate(bp);
 		allocbuf(bp, 0);
 	}
 
 	if ((bp->b_flags & (B_INVAL | B_RELBUF)) != 0 ||
 	    (bp->b_flags & (B_DELWRI | B_NOREUSE)) == B_NOREUSE) {
 		allocbuf(bp, 0);
 		bp->b_flags &= ~B_NOREUSE;
 		if (bp->b_vp != NULL)
 			brelvp(bp);
 	}
 			
 	/*
 	 * If the buffer has junk contents signal it and eventually
 	 * clean up B_DELWRI and diassociate the vnode so that gbincore()
 	 * doesn't find it.
 	 */
 	if (bp->b_bufsize == 0 || (bp->b_ioflags & BIO_ERROR) != 0 ||
 	    (bp->b_flags & (B_INVAL | B_NOCACHE | B_RELBUF)) != 0)
 		bp->b_flags |= B_INVAL;
 	if (bp->b_flags & B_INVAL) {
 		if (bp->b_flags & B_DELWRI)
 			bundirty(bp);
 		if (bp->b_vp)
 			brelvp(bp);
 	}
 
 	buf_track(bp, __func__);
 
 	/* buffers with no memory */
 	if (bp->b_bufsize == 0) {
 		buf_free(bp);
 		return;
 	}
 	/* buffers with junk contents */
 	if (bp->b_flags & (B_INVAL | B_NOCACHE | B_RELBUF) ||
 	    (bp->b_ioflags & BIO_ERROR)) {
 		bp->b_xflags &= ~(BX_BKGRDWRITE | BX_ALTDATA);
 		if (bp->b_vflags & BV_BKGRDINPROG)
 			panic("losing buffer 2");
 		qindex = QUEUE_CLEAN;
 		bp->b_flags |= B_AGE;
 	/* remaining buffers */
 	} else if (bp->b_flags & B_DELWRI)
 		qindex = QUEUE_DIRTY;
 	else
 		qindex = QUEUE_CLEAN;
 
 	if ((bp->b_flags & B_DELWRI) == 0 && (bp->b_xflags & BX_VNDIRTY))
 		panic("brelse: not dirty");
 
 	bp->b_flags &= ~(B_ASYNC | B_NOCACHE | B_RELBUF | B_DIRECT);
 	/* binsfree unlocks bp. */
 	binsfree(bp, qindex);
 }
 
 /*
  * Release a buffer back to the appropriate queue but do not try to free
  * it.  The buffer is expected to be used again soon.
  *
  * bqrelse() is used by bdwrite() to requeue a delayed write, and used by
  * biodone() to requeue an async I/O on completion.  It is also used when
  * known good buffers need to be requeued but we think we may need the data
  * again soon.
  *
  * XXX we should be able to leave the B_RELBUF hint set on completion.
  */
 void
 bqrelse(struct buf *bp)
 {
 	int qindex;
 
 	CTR3(KTR_BUF, "bqrelse(%p) vp %p flags %X", bp, bp->b_vp, bp->b_flags);
 	KASSERT(!(bp->b_flags & (B_CLUSTER|B_PAGING)),
 	    ("bqrelse: inappropriate B_PAGING or B_CLUSTER bp %p", bp));
 
 	qindex = QUEUE_NONE;
 	if (BUF_LOCKRECURSED(bp)) {
 		/* do not release to free list */
 		BUF_UNLOCK(bp);
 		return;
 	}
 	bp->b_flags &= ~(B_ASYNC | B_NOCACHE | B_AGE | B_RELBUF);
 
 	if (bp->b_flags & B_MANAGED) {
 		if (bp->b_flags & B_REMFREE)
 			bremfreef(bp);
 		goto out;
 	}
 
 	/* buffers with stale but valid contents */
 	if ((bp->b_flags & B_DELWRI) != 0 || (bp->b_vflags & (BV_BKGRDINPROG |
 	    BV_BKGRDERR)) == BV_BKGRDERR) {
 		BO_LOCK(bp->b_bufobj);
 		bp->b_vflags &= ~BV_BKGRDERR;
 		BO_UNLOCK(bp->b_bufobj);
 		qindex = QUEUE_DIRTY;
 	} else {
 		if ((bp->b_flags & B_DELWRI) == 0 &&
 		    (bp->b_xflags & BX_VNDIRTY))
 			panic("bqrelse: not dirty");
 		if ((bp->b_flags & B_NOREUSE) != 0) {
 			brelse(bp);
 			return;
 		}
 		qindex = QUEUE_CLEAN;
 	}
 	buf_track(bp, __func__);
 	/* binsfree unlocks bp. */
 	binsfree(bp, qindex);
 	return;
 
 out:
 	buf_track(bp, __func__);
 	/* unlock */
 	BUF_UNLOCK(bp);
 }
 
 /*
  * Complete I/O to a VMIO backed page.  Validate the pages as appropriate,
  * restore bogus pages.
  */
 static void
 vfs_vmio_iodone(struct buf *bp)
 {
 	vm_ooffset_t foff;
 	vm_page_t m;
 	vm_object_t obj;
 	struct vnode *vp __unused;
 	int i, iosize, resid;
 	bool bogus;
 
 	obj = bp->b_bufobj->bo_object;
 	KASSERT(obj->paging_in_progress >= bp->b_npages,
 	    ("vfs_vmio_iodone: paging in progress(%d) < b_npages(%d)",
 	    obj->paging_in_progress, bp->b_npages));
 
 	vp = bp->b_vp;
 	KASSERT(vp->v_holdcnt > 0,
 	    ("vfs_vmio_iodone: vnode %p has zero hold count", vp));
 	KASSERT(vp->v_object != NULL,
 	    ("vfs_vmio_iodone: vnode %p has no vm_object", vp));
 
 	foff = bp->b_offset;
 	KASSERT(bp->b_offset != NOOFFSET,
 	    ("vfs_vmio_iodone: bp %p has no buffer offset", bp));
 
 	bogus = false;
 	iosize = bp->b_bcount - bp->b_resid;
 	VM_OBJECT_WLOCK(obj);
 	for (i = 0; i < bp->b_npages; i++) {
 		resid = ((foff + PAGE_SIZE) & ~(off_t)PAGE_MASK) - foff;
 		if (resid > iosize)
 			resid = iosize;
 
 		/*
 		 * cleanup bogus pages, restoring the originals
 		 */
 		m = bp->b_pages[i];
 		if (m == bogus_page) {
 			bogus = true;
 			m = vm_page_lookup(obj, OFF_TO_IDX(foff));
 			if (m == NULL)
 				panic("biodone: page disappeared!");
 			bp->b_pages[i] = m;
 		} else if ((bp->b_iocmd == BIO_READ) && resid > 0) {
 			/*
 			 * In the write case, the valid and clean bits are
 			 * already changed correctly ( see bdwrite() ), so we 
 			 * only need to do this here in the read case.
 			 */
 			KASSERT((m->dirty & vm_page_bits(foff & PAGE_MASK,
 			    resid)) == 0, ("vfs_vmio_iodone: page %p "
 			    "has unexpected dirty bits", m));
 			vfs_page_set_valid(bp, foff, m);
 		}
 		KASSERT(OFF_TO_IDX(foff) == m->pindex,
 		    ("vfs_vmio_iodone: foff(%jd)/pindex(%ju) mismatch",
 		    (intmax_t)foff, (uintmax_t)m->pindex));
 
 		vm_page_sunbusy(m);
 		foff = (foff + PAGE_SIZE) & ~(off_t)PAGE_MASK;
 		iosize -= resid;
 	}
 	vm_object_pip_wakeupn(obj, bp->b_npages);
 	VM_OBJECT_WUNLOCK(obj);
 	if (bogus && buf_mapped(bp)) {
 		BUF_CHECK_MAPPED(bp);
 		pmap_qenter(trunc_page((vm_offset_t)bp->b_data),
 		    bp->b_pages, bp->b_npages);
 	}
 }
 
 /*
  * Unwire a page held by a buf and either free it or update the page queues to
  * reflect its recent use.
  */
 static void
 vfs_vmio_unwire(struct buf *bp, vm_page_t m)
 {
 	bool freed;
 
 	vm_page_lock(m);
 	if (vm_page_unwire_noq(m)) {
 		if ((bp->b_flags & B_DIRECT) != 0)
 			freed = vm_page_try_to_free(m);
 		else
 			freed = false;
 		if (!freed) {
 			/*
 			 * Use a racy check of the valid bits to determine
 			 * whether we can accelerate reclamation of the page.
 			 * The valid bits will be stable unless the page is
 			 * being mapped or is referenced by multiple buffers,
 			 * and in those cases we expect races to be rare.  At
 			 * worst we will either accelerate reclamation of a
 			 * valid page and violate LRU, or unnecessarily defer
 			 * reclamation of an invalid page.
 			 *
 			 * The B_NOREUSE flag marks data that is not expected to
 			 * be reused, so accelerate reclamation in that case
 			 * too.  Otherwise, maintain LRU.
 			 */
 			if (m->valid == 0 || (bp->b_flags & B_NOREUSE) != 0)
 				vm_page_deactivate_noreuse(m);
 			else if (vm_page_active(m))
 				vm_page_reference(m);
 			else
 				vm_page_deactivate(m);
 		}
 	}
 	vm_page_unlock(m);
 }
 
 /*
  * Perform page invalidation when a buffer is released.  The fully invalid
  * pages will be reclaimed later in vfs_vmio_truncate().
  */
 static void
 vfs_vmio_invalidate(struct buf *bp)
 {
 	vm_object_t obj;
 	vm_page_t m;
 	int i, resid, poffset, presid;
 
 	if (buf_mapped(bp)) {
 		BUF_CHECK_MAPPED(bp);
 		pmap_qremove(trunc_page((vm_offset_t)bp->b_data), bp->b_npages);
 	} else
 		BUF_CHECK_UNMAPPED(bp);
 	/*
 	 * Get the base offset and length of the buffer.  Note that 
 	 * in the VMIO case if the buffer block size is not
 	 * page-aligned then b_data pointer may not be page-aligned.
 	 * But our b_pages[] array *IS* page aligned.
 	 *
 	 * block sizes less then DEV_BSIZE (usually 512) are not 
 	 * supported due to the page granularity bits (m->valid,
 	 * m->dirty, etc...). 
 	 *
 	 * See man buf(9) for more information
 	 */
 	obj = bp->b_bufobj->bo_object;
 	resid = bp->b_bufsize;
 	poffset = bp->b_offset & PAGE_MASK;
 	VM_OBJECT_WLOCK(obj);
 	for (i = 0; i < bp->b_npages; i++) {
 		m = bp->b_pages[i];
 		if (m == bogus_page)
 			panic("vfs_vmio_invalidate: Unexpected bogus page.");
 		bp->b_pages[i] = NULL;
 
 		presid = resid > (PAGE_SIZE - poffset) ?
 		    (PAGE_SIZE - poffset) : resid;
 		KASSERT(presid >= 0, ("brelse: extra page"));
 		while (vm_page_xbusied(m)) {
 			vm_page_lock(m);
 			VM_OBJECT_WUNLOCK(obj);
 			vm_page_busy_sleep(m, "mbncsh", true);
 			VM_OBJECT_WLOCK(obj);
 		}
 		if (pmap_page_wired_mappings(m) == 0)
 			vm_page_set_invalid(m, poffset, presid);
 		vfs_vmio_unwire(bp, m);
 		resid -= presid;
 		poffset = 0;
 	}
 	VM_OBJECT_WUNLOCK(obj);
 	bp->b_npages = 0;
 }
 
 /*
  * Page-granular truncation of an existing VMIO buffer.
  */
 static void
 vfs_vmio_truncate(struct buf *bp, int desiredpages)
 {
 	vm_object_t obj;
 	vm_page_t m;
 	int i;
 
 	if (bp->b_npages == desiredpages)
 		return;
 
 	if (buf_mapped(bp)) {
 		BUF_CHECK_MAPPED(bp);
 		pmap_qremove((vm_offset_t)trunc_page((vm_offset_t)bp->b_data) +
 		    (desiredpages << PAGE_SHIFT), bp->b_npages - desiredpages);
 	} else
 		BUF_CHECK_UNMAPPED(bp);
 
 	/*
 	 * The object lock is needed only if we will attempt to free pages.
 	 */
 	obj = (bp->b_flags & B_DIRECT) != 0 ? bp->b_bufobj->bo_object : NULL;
 	if (obj != NULL)
 		VM_OBJECT_WLOCK(obj);
 	for (i = desiredpages; i < bp->b_npages; i++) {
 		m = bp->b_pages[i];
 		KASSERT(m != bogus_page, ("allocbuf: bogus page found"));
 		bp->b_pages[i] = NULL;
 		vfs_vmio_unwire(bp, m);
 	}
 	if (obj != NULL)
 		VM_OBJECT_WUNLOCK(obj);
 	bp->b_npages = desiredpages;
 }
 
 /*
  * Byte granular extension of VMIO buffers.
  */
 static void
 vfs_vmio_extend(struct buf *bp, int desiredpages, int size)
 {
 	/*
 	 * We are growing the buffer, possibly in a 
 	 * byte-granular fashion.
 	 */
 	vm_object_t obj;
 	vm_offset_t toff;
 	vm_offset_t tinc;
 	vm_page_t m;
 
 	/*
 	 * Step 1, bring in the VM pages from the object, allocating
 	 * them if necessary.  We must clear B_CACHE if these pages
 	 * are not valid for the range covered by the buffer.
 	 */
 	obj = bp->b_bufobj->bo_object;
 	VM_OBJECT_WLOCK(obj);
 	if (bp->b_npages < desiredpages) {
 		/*
 		 * We must allocate system pages since blocking
 		 * here could interfere with paging I/O, no
 		 * matter which process we are.
 		 *
 		 * Only exclusive busy can be tested here.
 		 * Blocking on shared busy might lead to
 		 * deadlocks once allocbuf() is called after
 		 * pages are vfs_busy_pages().
 		 */
 		(void)vm_page_grab_pages(obj,
 		    OFF_TO_IDX(bp->b_offset) + bp->b_npages,
 		    VM_ALLOC_SYSTEM | VM_ALLOC_IGN_SBUSY |
 		    VM_ALLOC_NOBUSY | VM_ALLOC_WIRED,
 		    &bp->b_pages[bp->b_npages], desiredpages - bp->b_npages);
 		bp->b_npages = desiredpages;
 	}
 
 	/*
 	 * Step 2.  We've loaded the pages into the buffer,
 	 * we have to figure out if we can still have B_CACHE
 	 * set.  Note that B_CACHE is set according to the
 	 * byte-granular range ( bcount and size ), not the
 	 * aligned range ( newbsize ).
 	 *
 	 * The VM test is against m->valid, which is DEV_BSIZE
 	 * aligned.  Needless to say, the validity of the data
 	 * needs to also be DEV_BSIZE aligned.  Note that this
 	 * fails with NFS if the server or some other client
 	 * extends the file's EOF.  If our buffer is resized, 
 	 * B_CACHE may remain set! XXX
 	 */
 	toff = bp->b_bcount;
 	tinc = PAGE_SIZE - ((bp->b_offset + toff) & PAGE_MASK);
 	while ((bp->b_flags & B_CACHE) && toff < size) {
 		vm_pindex_t pi;
 
 		if (tinc > (size - toff))
 			tinc = size - toff;
 		pi = ((bp->b_offset & PAGE_MASK) + toff) >> PAGE_SHIFT;
 		m = bp->b_pages[pi];
 		vfs_buf_test_cache(bp, bp->b_offset, toff, tinc, m);
 		toff += tinc;
 		tinc = PAGE_SIZE;
 	}
 	VM_OBJECT_WUNLOCK(obj);
 
 	/*
 	 * Step 3, fixup the KVA pmap.
 	 */
 	if (buf_mapped(bp))
 		bpmap_qenter(bp);
 	else
 		BUF_CHECK_UNMAPPED(bp);
 }
 
 /*
  * Check to see if a block at a particular lbn is available for a clustered
  * write.
  */
 static int
 vfs_bio_clcheck(struct vnode *vp, int size, daddr_t lblkno, daddr_t blkno)
 {
 	struct buf *bpa;
 	int match;
 
 	match = 0;
 
 	/* If the buf isn't in core skip it */
 	if ((bpa = gbincore(&vp->v_bufobj, lblkno)) == NULL)
 		return (0);
 
 	/* If the buf is busy we don't want to wait for it */
 	if (BUF_LOCK(bpa, LK_EXCLUSIVE | LK_NOWAIT, NULL) != 0)
 		return (0);
 
 	/* Only cluster with valid clusterable delayed write buffers */
 	if ((bpa->b_flags & (B_DELWRI | B_CLUSTEROK | B_INVAL)) !=
 	    (B_DELWRI | B_CLUSTEROK))
 		goto done;
 
 	if (bpa->b_bufsize != size)
 		goto done;
 
 	/*
 	 * Check to see if it is in the expected place on disk and that the
 	 * block has been mapped.
 	 */
 	if ((bpa->b_blkno != bpa->b_lblkno) && (bpa->b_blkno == blkno))
 		match = 1;
 done:
 	BUF_UNLOCK(bpa);
 	return (match);
 }
 
 /*
  *	vfs_bio_awrite:
  *
  *	Implement clustered async writes for clearing out B_DELWRI buffers.
  *	This is much better then the old way of writing only one buffer at
  *	a time.  Note that we may not be presented with the buffers in the 
  *	correct order, so we search for the cluster in both directions.
  */
 int
 vfs_bio_awrite(struct buf *bp)
 {
 	struct bufobj *bo;
 	int i;
 	int j;
 	daddr_t lblkno = bp->b_lblkno;
 	struct vnode *vp = bp->b_vp;
 	int ncl;
 	int nwritten;
 	int size;
 	int maxcl;
 	int gbflags;
 
 	bo = &vp->v_bufobj;
 	gbflags = (bp->b_data == unmapped_buf) ? GB_UNMAPPED : 0;
 	/*
 	 * right now we support clustered writing only to regular files.  If
 	 * we find a clusterable block we could be in the middle of a cluster
 	 * rather then at the beginning.
 	 */
 	if ((vp->v_type == VREG) && 
 	    (vp->v_mount != 0) && /* Only on nodes that have the size info */
 	    (bp->b_flags & (B_CLUSTEROK | B_INVAL)) == B_CLUSTEROK) {
 
 		size = vp->v_mount->mnt_stat.f_iosize;
 		maxcl = MAXPHYS / size;
 
 		BO_RLOCK(bo);
 		for (i = 1; i < maxcl; i++)
 			if (vfs_bio_clcheck(vp, size, lblkno + i,
 			    bp->b_blkno + ((i * size) >> DEV_BSHIFT)) == 0)
 				break;
 
 		for (j = 1; i + j <= maxcl && j <= lblkno; j++) 
 			if (vfs_bio_clcheck(vp, size, lblkno - j,
 			    bp->b_blkno - ((j * size) >> DEV_BSHIFT)) == 0)
 				break;
 		BO_RUNLOCK(bo);
 		--j;
 		ncl = i + j;
 		/*
 		 * this is a possible cluster write
 		 */
 		if (ncl != 1) {
 			BUF_UNLOCK(bp);
 			nwritten = cluster_wbuild(vp, size, lblkno - j, ncl,
 			    gbflags);
 			return (nwritten);
 		}
 	}
 	bremfree(bp);
 	bp->b_flags |= B_ASYNC;
 	/*
 	 * default (old) behavior, writing out only one block
 	 *
 	 * XXX returns b_bufsize instead of b_bcount for nwritten?
 	 */
 	nwritten = bp->b_bufsize;
 	(void) bwrite(bp);
 
 	return (nwritten);
 }
 
 /*
  *	getnewbuf_kva:
  *
  *	Allocate KVA for an empty buf header according to gbflags.
  */
 static int
 getnewbuf_kva(struct buf *bp, int gbflags, int maxsize)
 {
 
 	if ((gbflags & (GB_UNMAPPED | GB_KVAALLOC)) != GB_UNMAPPED) {
 		/*
 		 * In order to keep fragmentation sane we only allocate kva
 		 * in BKVASIZE chunks.  XXX with vmem we can do page size.
 		 */
 		maxsize = (maxsize + BKVAMASK) & ~BKVAMASK;
 
 		if (maxsize != bp->b_kvasize &&
 		    bufkva_alloc(bp, maxsize, gbflags))
 			return (ENOSPC);
 	}
 	return (0);
 }
 
 /*
  *	getnewbuf:
  *
  *	Find and initialize a new buffer header, freeing up existing buffers
  *	in the bufqueues as necessary.  The new buffer is returned locked.
  *
  *	We block if:
  *		We have insufficient buffer headers
  *		We have insufficient buffer space
  *		buffer_arena is too fragmented ( space reservation fails )
  *		If we have to flush dirty buffers ( but we try to avoid this )
  *
  *	The caller is responsible for releasing the reserved bufspace after
  *	allocbuf() is called.
  */
 static struct buf *
 getnewbuf(struct vnode *vp, int slpflag, int slptimeo, int maxsize, int gbflags)
 {
 	struct bufdomain *bd;
 	struct buf *bp;
 	bool metadata, reserved;
 
 	bp = NULL;
 	KASSERT((gbflags & (GB_UNMAPPED | GB_KVAALLOC)) != GB_KVAALLOC,
 	    ("GB_KVAALLOC only makes sense with GB_UNMAPPED"));
 	if (!unmapped_buf_allowed)
 		gbflags &= ~(GB_UNMAPPED | GB_KVAALLOC);
 
 	if (vp == NULL || (vp->v_vflag & (VV_MD | VV_SYSTEM)) != 0 ||
 	    vp->v_type == VCHR)
 		metadata = true;
 	else
 		metadata = false;
 	if (vp == NULL)
 		bd = &bdomain[0];
 	else
 		bd = &bdomain[vp->v_bufobj.bo_domain];
 
 	counter_u64_add(getnewbufcalls, 1);
 	reserved = false;
 	do {
 		if (reserved == false &&
 		    bufspace_reserve(bd, maxsize, metadata) != 0) {
 			counter_u64_add(getnewbufrestarts, 1);
 			continue;
 		}
 		reserved = true;
 		if ((bp = buf_alloc(bd)) == NULL) {
 			counter_u64_add(getnewbufrestarts, 1);
 			continue;
 		}
 		if (getnewbuf_kva(bp, gbflags, maxsize) == 0)
 			return (bp);
 		break;
 	} while (buf_recycle(bd, false) == 0);
 
 	if (reserved)
 		bufspace_release(bd, maxsize);
 	if (bp != NULL) {
 		bp->b_flags |= B_INVAL;
 		brelse(bp);
 	}
 	bufspace_wait(bd, vp, gbflags, slpflag, slptimeo);
 
 	return (NULL);
 }
 
 /*
  *	buf_daemon:
  *
  *	buffer flushing daemon.  Buffers are normally flushed by the
  *	update daemon but if it cannot keep up this process starts to
  *	take the load in an attempt to prevent getnewbuf() from blocking.
  */
 static struct kproc_desc buf_kp = {
 	"bufdaemon",
 	buf_daemon,
 	&bufdaemonproc
 };
 SYSINIT(bufdaemon, SI_SUB_KTHREAD_BUF, SI_ORDER_FIRST, kproc_start, &buf_kp);
 
 static int
 buf_flush(struct vnode *vp, struct bufdomain *bd, int target)
 {
 	int flushed;
 
 	flushed = flushbufqueues(vp, bd, target, 0);
 	if (flushed == 0) {
 		/*
 		 * Could not find any buffers without rollback
 		 * dependencies, so just write the first one
 		 * in the hopes of eventually making progress.
 		 */
 		if (vp != NULL && target > 2)
 			target /= 2;
 		flushbufqueues(vp, bd, target, 1);
 	}
 	return (flushed);
 }
 
 static void
 buf_daemon()
 {
 	struct bufdomain *bd;
 	int speedupreq;
 	int lodirty;
 	int i;
 
 	/*
 	 * This process needs to be suspended prior to shutdown sync.
 	 */
 	EVENTHANDLER_REGISTER(shutdown_pre_sync, kthread_shutdown, curthread,
 	    SHUTDOWN_PRI_LAST + 100);
 
 	/*
 	 * Start the buf clean daemons as children threads.
 	 */
 	for (i = 0 ; i < buf_domains; i++) {
 		int error;
 
 		error = kthread_add((void (*)(void *))bufspace_daemon,
 		    &bdomain[i], curproc, NULL, 0, 0, "bufspacedaemon-%d", i);
 		if (error)
 			panic("error %d spawning bufspace daemon", error);
 	}
 
 	/*
 	 * This process is allowed to take the buffer cache to the limit
 	 */
 	curthread->td_pflags |= TDP_NORUNNINGBUF | TDP_BUFNEED;
 	mtx_lock(&bdlock);
 	for (;;) {
 		bd_request = 0;
 		mtx_unlock(&bdlock);
 
 		kthread_suspend_check();
 
 		/*
 		 * Save speedupreq for this pass and reset to capture new
 		 * requests.
 		 */
 		speedupreq = bd_speedupreq;
 		bd_speedupreq = 0;
 
 		/*
 		 * Flush each domain sequentially according to its level and
 		 * the speedup request.
 		 */
 		for (i = 0; i < buf_domains; i++) {
 			bd = &bdomain[i];
 			if (speedupreq)
 				lodirty = bd->bd_numdirtybuffers / 2;
 			else
 				lodirty = bd->bd_lodirtybuffers;
 			while (bd->bd_numdirtybuffers > lodirty) {
 				if (buf_flush(NULL, bd,
 				    bd->bd_numdirtybuffers - lodirty) == 0)
 					break;
 				kern_yield(PRI_USER);
 			}
 		}
 
 		/*
 		 * Only clear bd_request if we have reached our low water
 		 * mark.  The buf_daemon normally waits 1 second and
 		 * then incrementally flushes any dirty buffers that have
 		 * built up, within reason.
 		 *
 		 * If we were unable to hit our low water mark and couldn't
 		 * find any flushable buffers, we sleep for a short period
 		 * to avoid endless loops on unlockable buffers.
 		 */
 		mtx_lock(&bdlock);
 		if (!BIT_EMPTY(BUF_DOMAINS, &bdlodirty)) {
 			/*
 			 * We reached our low water mark, reset the
 			 * request and sleep until we are needed again.
 			 * The sleep is just so the suspend code works.
 			 */
 			bd_request = 0;
 			/*
 			 * Do an extra wakeup in case dirty threshold
 			 * changed via sysctl and the explicit transition
 			 * out of shortfall was missed.
 			 */
 			bdirtywakeup();
 			if (runningbufspace <= lorunningspace)
 				runningwakeup();
 			msleep(&bd_request, &bdlock, PVM, "psleep", hz);
 		} else {
 			/*
 			 * We couldn't find any flushable dirty buffers but
 			 * still have too many dirty buffers, we
 			 * have to sleep and try again.  (rare)
 			 */
 			msleep(&bd_request, &bdlock, PVM, "qsleep", hz / 10);
 		}
 	}
 }
 
 /*
  *	flushbufqueues:
  *
  *	Try to flush a buffer in the dirty queue.  We must be careful to
  *	free up B_INVAL buffers instead of write them, which NFS is 
  *	particularly sensitive to.
  */
 static int flushwithdeps = 0;
 SYSCTL_INT(_vfs, OID_AUTO, flushwithdeps, CTLFLAG_RW, &flushwithdeps,
     0, "Number of buffers flushed with dependecies that require rollbacks");
 
 static int
 flushbufqueues(struct vnode *lvp, struct bufdomain *bd, int target,
     int flushdeps)
 {
 	struct bufqueue *bq;
 	struct buf *sentinel;
 	struct vnode *vp;
 	struct mount *mp;
 	struct buf *bp;
 	int hasdeps;
 	int flushed;
 	int error;
 	bool unlock;
 
 	flushed = 0;
 	bq = &bd->bd_dirtyq;
 	bp = NULL;
 	sentinel = malloc(sizeof(struct buf), M_TEMP, M_WAITOK | M_ZERO);
 	sentinel->b_qindex = QUEUE_SENTINEL;
 	BQ_LOCK(bq);
 	TAILQ_INSERT_HEAD(&bq->bq_queue, sentinel, b_freelist);
 	BQ_UNLOCK(bq);
 	while (flushed != target) {
 		maybe_yield();
 		BQ_LOCK(bq);
 		bp = TAILQ_NEXT(sentinel, b_freelist);
 		if (bp != NULL) {
 			TAILQ_REMOVE(&bq->bq_queue, sentinel, b_freelist);
 			TAILQ_INSERT_AFTER(&bq->bq_queue, bp, sentinel,
 			    b_freelist);
 		} else {
 			BQ_UNLOCK(bq);
 			break;
 		}
 		/*
 		 * Skip sentinels inserted by other invocations of the
 		 * flushbufqueues(), taking care to not reorder them.
 		 *
 		 * Only flush the buffers that belong to the
 		 * vnode locked by the curthread.
 		 */
 		if (bp->b_qindex == QUEUE_SENTINEL || (lvp != NULL &&
 		    bp->b_vp != lvp)) {
 			BQ_UNLOCK(bq);
 			continue;
 		}
 		error = BUF_LOCK(bp, LK_EXCLUSIVE | LK_NOWAIT, NULL);
 		BQ_UNLOCK(bq);
 		if (error != 0)
 			continue;
 
 		/*
 		 * BKGRDINPROG can only be set with the buf and bufobj
 		 * locks both held.  We tolerate a race to clear it here.
 		 */
 		if ((bp->b_vflags & BV_BKGRDINPROG) != 0 ||
 		    (bp->b_flags & B_DELWRI) == 0) {
 			BUF_UNLOCK(bp);
 			continue;
 		}
 		if (bp->b_flags & B_INVAL) {
 			bremfreef(bp);
 			brelse(bp);
 			flushed++;
 			continue;
 		}
 
 		if (!LIST_EMPTY(&bp->b_dep) && buf_countdeps(bp, 0)) {
 			if (flushdeps == 0) {
 				BUF_UNLOCK(bp);
 				continue;
 			}
 			hasdeps = 1;
 		} else
 			hasdeps = 0;
 		/*
 		 * We must hold the lock on a vnode before writing
 		 * one of its buffers. Otherwise we may confuse, or
 		 * in the case of a snapshot vnode, deadlock the
 		 * system.
 		 *
 		 * The lock order here is the reverse of the normal
 		 * of vnode followed by buf lock.  This is ok because
 		 * the NOWAIT will prevent deadlock.
 		 */
 		vp = bp->b_vp;
 		if (vn_start_write(vp, &mp, V_NOWAIT) != 0) {
 			BUF_UNLOCK(bp);
 			continue;
 		}
 		if (lvp == NULL) {
 			unlock = true;
 			error = vn_lock(vp, LK_EXCLUSIVE | LK_NOWAIT);
 		} else {
 			ASSERT_VOP_LOCKED(vp, "getbuf");
 			unlock = false;
 			error = VOP_ISLOCKED(vp) == LK_EXCLUSIVE ? 0 :
 			    vn_lock(vp, LK_TRYUPGRADE);
 		}
 		if (error == 0) {
 			CTR3(KTR_BUF, "flushbufqueue(%p) vp %p flags %X",
 			    bp, bp->b_vp, bp->b_flags);
 			if (curproc == bufdaemonproc) {
 				vfs_bio_awrite(bp);
 			} else {
 				bremfree(bp);
 				bwrite(bp);
 				counter_u64_add(notbufdflushes, 1);
 			}
 			vn_finished_write(mp);
 			if (unlock)
 				VOP_UNLOCK(vp, 0);
 			flushwithdeps += hasdeps;
 			flushed++;
 
 			/*
 			 * Sleeping on runningbufspace while holding
 			 * vnode lock leads to deadlock.
 			 */
 			if (curproc == bufdaemonproc &&
 			    runningbufspace > hirunningspace)
 				waitrunningbufspace();
 			continue;
 		}
 		vn_finished_write(mp);
 		BUF_UNLOCK(bp);
 	}
 	BQ_LOCK(bq);
 	TAILQ_REMOVE(&bq->bq_queue, sentinel, b_freelist);
 	BQ_UNLOCK(bq);
 	free(sentinel, M_TEMP);
 	return (flushed);
 }
 
 /*
  * Check to see if a block is currently memory resident.
  */
 struct buf *
 incore(struct bufobj *bo, daddr_t blkno)
 {
 	struct buf *bp;
 
 	BO_RLOCK(bo);
 	bp = gbincore(bo, blkno);
 	BO_RUNLOCK(bo);
 	return (bp);
 }
 
 /*
  * Returns true if no I/O is needed to access the
  * associated VM object.  This is like incore except
  * it also hunts around in the VM system for the data.
  */
 
 static int
 inmem(struct vnode * vp, daddr_t blkno)
 {
 	vm_object_t obj;
 	vm_offset_t toff, tinc, size;
 	vm_page_t m;
 	vm_ooffset_t off;
 
 	ASSERT_VOP_LOCKED(vp, "inmem");
 
 	if (incore(&vp->v_bufobj, blkno))
 		return 1;
 	if (vp->v_mount == NULL)
 		return 0;
 	obj = vp->v_object;
 	if (obj == NULL)
 		return (0);
 
 	size = PAGE_SIZE;
 	if (size > vp->v_mount->mnt_stat.f_iosize)
 		size = vp->v_mount->mnt_stat.f_iosize;
 	off = (vm_ooffset_t)blkno * (vm_ooffset_t)vp->v_mount->mnt_stat.f_iosize;
 
 	VM_OBJECT_RLOCK(obj);
 	for (toff = 0; toff < vp->v_mount->mnt_stat.f_iosize; toff += tinc) {
 		m = vm_page_lookup(obj, OFF_TO_IDX(off + toff));
 		if (!m)
 			goto notinmem;
 		tinc = size;
 		if (tinc > PAGE_SIZE - ((toff + off) & PAGE_MASK))
 			tinc = PAGE_SIZE - ((toff + off) & PAGE_MASK);
 		if (vm_page_is_valid(m,
 		    (vm_offset_t) ((toff + off) & PAGE_MASK), tinc) == 0)
 			goto notinmem;
 	}
 	VM_OBJECT_RUNLOCK(obj);
 	return 1;
 
 notinmem:
 	VM_OBJECT_RUNLOCK(obj);
 	return (0);
 }
 
 /*
  * Set the dirty range for a buffer based on the status of the dirty
  * bits in the pages comprising the buffer.  The range is limited
  * to the size of the buffer.
  *
  * Tell the VM system that the pages associated with this buffer
  * are clean.  This is used for delayed writes where the data is
  * going to go to disk eventually without additional VM intevention.
  *
  * Note that while we only really need to clean through to b_bcount, we
  * just go ahead and clean through to b_bufsize.
  */
 static void
 vfs_clean_pages_dirty_buf(struct buf *bp)
 {
 	vm_ooffset_t foff, noff, eoff;
 	vm_page_t m;
 	int i;
 
 	if ((bp->b_flags & B_VMIO) == 0 || bp->b_bufsize == 0)
 		return;
 
 	foff = bp->b_offset;
 	KASSERT(bp->b_offset != NOOFFSET,
 	    ("vfs_clean_pages_dirty_buf: no buffer offset"));
 
 	VM_OBJECT_WLOCK(bp->b_bufobj->bo_object);
 	vfs_drain_busy_pages(bp);
 	vfs_setdirty_locked_object(bp);
 	for (i = 0; i < bp->b_npages; i++) {
 		noff = (foff + PAGE_SIZE) & ~(off_t)PAGE_MASK;
 		eoff = noff;
 		if (eoff > bp->b_offset + bp->b_bufsize)
 			eoff = bp->b_offset + bp->b_bufsize;
 		m = bp->b_pages[i];
 		vfs_page_set_validclean(bp, foff, m);
 		/* vm_page_clear_dirty(m, foff & PAGE_MASK, eoff - foff); */
 		foff = noff;
 	}
 	VM_OBJECT_WUNLOCK(bp->b_bufobj->bo_object);
 }
 
 static void
 vfs_setdirty_locked_object(struct buf *bp)
 {
 	vm_object_t object;
 	int i;
 
 	object = bp->b_bufobj->bo_object;
 	VM_OBJECT_ASSERT_WLOCKED(object);
 
 	/*
 	 * We qualify the scan for modified pages on whether the
 	 * object has been flushed yet.
 	 */
 	if ((object->flags & OBJ_MIGHTBEDIRTY) != 0) {
 		vm_offset_t boffset;
 		vm_offset_t eoffset;
 
 		/*
 		 * test the pages to see if they have been modified directly
 		 * by users through the VM system.
 		 */
 		for (i = 0; i < bp->b_npages; i++)
 			vm_page_test_dirty(bp->b_pages[i]);
 
 		/*
 		 * Calculate the encompassing dirty range, boffset and eoffset,
 		 * (eoffset - boffset) bytes.
 		 */
 
 		for (i = 0; i < bp->b_npages; i++) {
 			if (bp->b_pages[i]->dirty)
 				break;
 		}
 		boffset = (i << PAGE_SHIFT) - (bp->b_offset & PAGE_MASK);
 
 		for (i = bp->b_npages - 1; i >= 0; --i) {
 			if (bp->b_pages[i]->dirty) {
 				break;
 			}
 		}
 		eoffset = ((i + 1) << PAGE_SHIFT) - (bp->b_offset & PAGE_MASK);
 
 		/*
 		 * Fit it to the buffer.
 		 */
 
 		if (eoffset > bp->b_bcount)
 			eoffset = bp->b_bcount;
 
 		/*
 		 * If we have a good dirty range, merge with the existing
 		 * dirty range.
 		 */
 
 		if (boffset < eoffset) {
 			if (bp->b_dirtyoff > boffset)
 				bp->b_dirtyoff = boffset;
 			if (bp->b_dirtyend < eoffset)
 				bp->b_dirtyend = eoffset;
 		}
 	}
 }
 
 /*
  * Allocate the KVA mapping for an existing buffer.
  * If an unmapped buffer is provided but a mapped buffer is requested, take
  * also care to properly setup mappings between pages and KVA.
  */
 static void
 bp_unmapped_get_kva(struct buf *bp, daddr_t blkno, int size, int gbflags)
 {
 	int bsize, maxsize, need_mapping, need_kva;
 	off_t offset;
 
 	need_mapping = bp->b_data == unmapped_buf &&
 	    (gbflags & GB_UNMAPPED) == 0;
 	need_kva = bp->b_kvabase == unmapped_buf &&
 	    bp->b_data == unmapped_buf &&
 	    (gbflags & GB_KVAALLOC) != 0;
 	if (!need_mapping && !need_kva)
 		return;
 
 	BUF_CHECK_UNMAPPED(bp);
 
 	if (need_mapping && bp->b_kvabase != unmapped_buf) {
 		/*
 		 * Buffer is not mapped, but the KVA was already
 		 * reserved at the time of the instantiation.  Use the
 		 * allocated space.
 		 */
 		goto has_addr;
 	}
 
 	/*
 	 * Calculate the amount of the address space we would reserve
 	 * if the buffer was mapped.
 	 */
 	bsize = vn_isdisk(bp->b_vp, NULL) ? DEV_BSIZE : bp->b_bufobj->bo_bsize;
 	KASSERT(bsize != 0, ("bsize == 0, check bo->bo_bsize"));
 	offset = blkno * bsize;
 	maxsize = size + (offset & PAGE_MASK);
 	maxsize = imax(maxsize, bsize);
 
 	while (bufkva_alloc(bp, maxsize, gbflags) != 0) {
 		if ((gbflags & GB_NOWAIT_BD) != 0) {
 			/*
 			 * XXXKIB: defragmentation cannot
 			 * succeed, not sure what else to do.
 			 */
 			panic("GB_NOWAIT_BD and GB_UNMAPPED %p", bp);
 		}
 		counter_u64_add(mappingrestarts, 1);
 		bufspace_wait(bufdomain(bp), bp->b_vp, gbflags, 0, 0);
 	}
 has_addr:
 	if (need_mapping) {
 		/* b_offset is handled by bpmap_qenter. */
 		bp->b_data = bp->b_kvabase;
 		BUF_CHECK_MAPPED(bp);
 		bpmap_qenter(bp);
 	}
 }
 
 struct buf *
 getblk(struct vnode *vp, daddr_t blkno, int size, int slpflag, int slptimeo,
     int flags)
 {
 	struct buf *bp;
 	int error;
 
 	error = getblkx(vp, blkno, size, slpflag, slptimeo, flags, &bp);
 	if (error != 0)
 		return (NULL);
 	return (bp);
 }
 
 /*
  *	getblkx:
  *
  *	Get a block given a specified block and offset into a file/device.
  *	The buffers B_DONE bit will be cleared on return, making it almost
  * 	ready for an I/O initiation.  B_INVAL may or may not be set on 
  *	return.  The caller should clear B_INVAL prior to initiating a
  *	READ.
  *
  *	For a non-VMIO buffer, B_CACHE is set to the opposite of B_INVAL for
  *	an existing buffer.
  *
  *	For a VMIO buffer, B_CACHE is modified according to the backing VM.
  *	If getblk()ing a previously 0-sized invalid buffer, B_CACHE is set
  *	and then cleared based on the backing VM.  If the previous buffer is
  *	non-0-sized but invalid, B_CACHE will be cleared.
  *
  *	If getblk() must create a new buffer, the new buffer is returned with
  *	both B_INVAL and B_CACHE clear unless it is a VMIO buffer, in which
  *	case it is returned with B_INVAL clear and B_CACHE set based on the
  *	backing VM.
  *
  *	getblk() also forces a bwrite() for any B_DELWRI buffer whos
  *	B_CACHE bit is clear.
  *	
  *	What this means, basically, is that the caller should use B_CACHE to
  *	determine whether the buffer is fully valid or not and should clear
  *	B_INVAL prior to issuing a read.  If the caller intends to validate
  *	the buffer by loading its data area with something, the caller needs
  *	to clear B_INVAL.  If the caller does this without issuing an I/O, 
  *	the caller should set B_CACHE ( as an optimization ), else the caller
  *	should issue the I/O and biodone() will set B_CACHE if the I/O was
  *	a write attempt or if it was a successful read.  If the caller 
  *	intends to issue a READ, the caller must clear B_INVAL and BIO_ERROR
  *	prior to issuing the READ.  biodone() will *not* clear B_INVAL.
  */
 int
 getblkx(struct vnode *vp, daddr_t blkno, int size, int slpflag, int slptimeo,
     int flags, struct buf **bpp)
 {
 	struct buf *bp;
 	struct bufobj *bo;
 	daddr_t d_blkno;
 	int bsize, error, maxsize, vmio;
 	off_t offset;
 
 	CTR3(KTR_BUF, "getblk(%p, %ld, %d)", vp, (long)blkno, size);
 	KASSERT((flags & (GB_UNMAPPED | GB_KVAALLOC)) != GB_KVAALLOC,
 	    ("GB_KVAALLOC only makes sense with GB_UNMAPPED"));
 	ASSERT_VOP_LOCKED(vp, "getblk");
 	if (size > maxbcachebuf)
 		panic("getblk: size(%d) > maxbcachebuf(%d)\n", size,
 		    maxbcachebuf);
 	if (!unmapped_buf_allowed)
 		flags &= ~(GB_UNMAPPED | GB_KVAALLOC);
 
 	bo = &vp->v_bufobj;
 	d_blkno = blkno;
 loop:
 	BO_RLOCK(bo);
 	bp = gbincore(bo, blkno);
 	if (bp != NULL) {
 		int lockflags;
 		/*
 		 * Buffer is in-core.  If the buffer is not busy nor managed,
 		 * it must be on a queue.
 		 */
 		lockflags = LK_EXCLUSIVE | LK_SLEEPFAIL | LK_INTERLOCK;
 
 		if ((flags & GB_LOCK_NOWAIT) != 0)
 			lockflags |= LK_NOWAIT;
 
 		error = BUF_TIMELOCK(bp, lockflags,
 		    BO_LOCKPTR(bo), "getblk", slpflag, slptimeo);
 
 		/*
 		 * If we slept and got the lock we have to restart in case
 		 * the buffer changed identities.
 		 */
 		if (error == ENOLCK)
 			goto loop;
 		/* We timed out or were interrupted. */
 		else if (error != 0)
 			return (error);
 		/* If recursed, assume caller knows the rules. */
 		else if (BUF_LOCKRECURSED(bp))
 			goto end;
 
 		/*
 		 * The buffer is locked.  B_CACHE is cleared if the buffer is 
 		 * invalid.  Otherwise, for a non-VMIO buffer, B_CACHE is set
 		 * and for a VMIO buffer B_CACHE is adjusted according to the
 		 * backing VM cache.
 		 */
 		if (bp->b_flags & B_INVAL)
 			bp->b_flags &= ~B_CACHE;
 		else if ((bp->b_flags & (B_VMIO | B_INVAL)) == 0)
 			bp->b_flags |= B_CACHE;
 		if (bp->b_flags & B_MANAGED)
 			MPASS(bp->b_qindex == QUEUE_NONE);
 		else
 			bremfree(bp);
 
 		/*
 		 * check for size inconsistencies for non-VMIO case.
 		 */
 		if (bp->b_bcount != size) {
 			if ((bp->b_flags & B_VMIO) == 0 ||
 			    (size > bp->b_kvasize)) {
 				if (bp->b_flags & B_DELWRI) {
 					bp->b_flags |= B_NOCACHE;
 					bwrite(bp);
 				} else {
 					if (LIST_EMPTY(&bp->b_dep)) {
 						bp->b_flags |= B_RELBUF;
 						brelse(bp);
 					} else {
 						bp->b_flags |= B_NOCACHE;
 						bwrite(bp);
 					}
 				}
 				goto loop;
 			}
 		}
 
 		/*
 		 * Handle the case of unmapped buffer which should
 		 * become mapped, or the buffer for which KVA
 		 * reservation is requested.
 		 */
 		bp_unmapped_get_kva(bp, blkno, size, flags);
 
 		/*
 		 * If the size is inconsistent in the VMIO case, we can resize
 		 * the buffer.  This might lead to B_CACHE getting set or
 		 * cleared.  If the size has not changed, B_CACHE remains
 		 * unchanged from its previous state.
 		 */
 		allocbuf(bp, size);
 
 		KASSERT(bp->b_offset != NOOFFSET, 
 		    ("getblk: no buffer offset"));
 
 		/*
 		 * A buffer with B_DELWRI set and B_CACHE clear must
 		 * be committed before we can return the buffer in
 		 * order to prevent the caller from issuing a read
 		 * ( due to B_CACHE not being set ) and overwriting
 		 * it.
 		 *
 		 * Most callers, including NFS and FFS, need this to
 		 * operate properly either because they assume they
 		 * can issue a read if B_CACHE is not set, or because
 		 * ( for example ) an uncached B_DELWRI might loop due 
 		 * to softupdates re-dirtying the buffer.  In the latter
 		 * case, B_CACHE is set after the first write completes,
 		 * preventing further loops.
 		 * NOTE!  b*write() sets B_CACHE.  If we cleared B_CACHE
 		 * above while extending the buffer, we cannot allow the
 		 * buffer to remain with B_CACHE set after the write
 		 * completes or it will represent a corrupt state.  To
 		 * deal with this we set B_NOCACHE to scrap the buffer
 		 * after the write.
 		 *
 		 * We might be able to do something fancy, like setting
 		 * B_CACHE in bwrite() except if B_DELWRI is already set,
 		 * so the below call doesn't set B_CACHE, but that gets real
 		 * confusing.  This is much easier.
 		 */
 
 		if ((bp->b_flags & (B_CACHE|B_DELWRI)) == B_DELWRI) {
 			bp->b_flags |= B_NOCACHE;
 			bwrite(bp);
 			goto loop;
 		}
 		bp->b_flags &= ~B_DONE;
 	} else {
 		/*
 		 * Buffer is not in-core, create new buffer.  The buffer
 		 * returned by getnewbuf() is locked.  Note that the returned
 		 * buffer is also considered valid (not marked B_INVAL).
 		 */
 		BO_RUNLOCK(bo);
 		/*
 		 * If the user does not want us to create the buffer, bail out
 		 * here.
 		 */
 		if (flags & GB_NOCREAT)
 			return (EEXIST);
-		if (bdomain[bo->bo_domain].bd_freebuffers == 0 &&
-		    TD_IS_IDLETHREAD(curthread))
-			return (EBUSY);
 
 		bsize = vn_isdisk(vp, NULL) ? DEV_BSIZE : bo->bo_bsize;
 		KASSERT(bsize != 0, ("bsize == 0, check bo->bo_bsize"));
 		offset = blkno * bsize;
 		vmio = vp->v_object != NULL;
 		if (vmio) {
 			maxsize = size + (offset & PAGE_MASK);
 		} else {
 			maxsize = size;
 			/* Do not allow non-VMIO notmapped buffers. */
 			flags &= ~(GB_UNMAPPED | GB_KVAALLOC);
 		}
 		maxsize = imax(maxsize, bsize);
 		if ((flags & GB_NOSPARSE) != 0 && vmio &&
 		    !vn_isdisk(vp, NULL)) {
 			error = VOP_BMAP(vp, blkno, NULL, &d_blkno, 0, 0);
 			KASSERT(error != EOPNOTSUPP,
 			    ("GB_NOSPARSE from fs not supporting bmap, vp %p",
 			    vp));
 			if (error != 0)
 				return (error);
 			if (d_blkno == -1)
 				return (EJUSTRETURN);
 		}
 
 		bp = getnewbuf(vp, slpflag, slptimeo, maxsize, flags);
 		if (bp == NULL) {
 			if (slpflag || slptimeo)
 				return (ETIMEDOUT);
 			/*
 			 * XXX This is here until the sleep path is diagnosed
 			 * enough to work under very low memory conditions.
 			 *
 			 * There's an issue on low memory, 4BSD+non-preempt
 			 * systems (eg MIPS routers with 32MB RAM) where buffer
 			 * exhaustion occurs without sleeping for buffer
 			 * reclaimation.  This just sticks in a loop and
 			 * constantly attempts to allocate a buffer, which
 			 * hits exhaustion and tries to wakeup bufdaemon.
 			 * This never happens because we never yield.
 			 *
 			 * The real solution is to identify and fix these cases
 			 * so we aren't effectively busy-waiting in a loop
 			 * until the reclaimation path has cycles to run.
 			 */
 			kern_yield(PRI_USER);
 			goto loop;
 		}
 
 		/*
 		 * This code is used to make sure that a buffer is not
 		 * created while the getnewbuf routine is blocked.
 		 * This can be a problem whether the vnode is locked or not.
 		 * If the buffer is created out from under us, we have to
 		 * throw away the one we just created.
 		 *
 		 * Note: this must occur before we associate the buffer
 		 * with the vp especially considering limitations in
 		 * the splay tree implementation when dealing with duplicate
 		 * lblkno's.
 		 */
 		BO_LOCK(bo);
 		if (gbincore(bo, blkno)) {
 			BO_UNLOCK(bo);
 			bp->b_flags |= B_INVAL;
 			bufspace_release(bufdomain(bp), maxsize);
 			brelse(bp);
 			goto loop;
 		}
 
 		/*
 		 * Insert the buffer into the hash, so that it can
 		 * be found by incore.
 		 */
 		bp->b_lblkno = blkno;
 		bp->b_blkno = d_blkno;
 		bp->b_offset = offset;
 		bgetvp(vp, bp);
 		BO_UNLOCK(bo);
 
 		/*
 		 * set B_VMIO bit.  allocbuf() the buffer bigger.  Since the
 		 * buffer size starts out as 0, B_CACHE will be set by
 		 * allocbuf() for the VMIO case prior to it testing the
 		 * backing store for validity.
 		 */
 
 		if (vmio) {
 			bp->b_flags |= B_VMIO;
 			KASSERT(vp->v_object == bp->b_bufobj->bo_object,
 			    ("ARGH! different b_bufobj->bo_object %p %p %p\n",
 			    bp, vp->v_object, bp->b_bufobj->bo_object));
 		} else {
 			bp->b_flags &= ~B_VMIO;
 			KASSERT(bp->b_bufobj->bo_object == NULL,
 			    ("ARGH! has b_bufobj->bo_object %p %p\n",
 			    bp, bp->b_bufobj->bo_object));
 			BUF_CHECK_MAPPED(bp);
 		}
 
 		allocbuf(bp, size);
 		bufspace_release(bufdomain(bp), maxsize);
 		bp->b_flags &= ~B_DONE;
 	}
 	CTR4(KTR_BUF, "getblk(%p, %ld, %d) = %p", vp, (long)blkno, size, bp);
 	BUF_ASSERT_HELD(bp);
 end:
 	buf_track(bp, __func__);
 	KASSERT(bp->b_bufobj == bo,
 	    ("bp %p wrong b_bufobj %p should be %p", bp, bp->b_bufobj, bo));
 	*bpp = bp;
 	return (0);
 }
 
 /*
  * Get an empty, disassociated buffer of given size.  The buffer is initially
  * set to B_INVAL.
  */
 struct buf *
 geteblk(int size, int flags)
 {
 	struct buf *bp;
 	int maxsize;
 
 	maxsize = (size + BKVAMASK) & ~BKVAMASK;
 	while ((bp = getnewbuf(NULL, 0, 0, maxsize, flags)) == NULL) {
 		if ((flags & GB_NOWAIT_BD) &&
 		    (curthread->td_pflags & TDP_BUFNEED) != 0)
 			return (NULL);
 	}
 	allocbuf(bp, size);
 	bufspace_release(bufdomain(bp), maxsize);
 	bp->b_flags |= B_INVAL;	/* b_dep cleared by getnewbuf() */
 	BUF_ASSERT_HELD(bp);
 	return (bp);
 }
 
 /*
  * Truncate the backing store for a non-vmio buffer.
  */
 static void
 vfs_nonvmio_truncate(struct buf *bp, int newbsize)
 {
 
 	if (bp->b_flags & B_MALLOC) {
 		/*
 		 * malloced buffers are not shrunk
 		 */
 		if (newbsize == 0) {
 			bufmallocadjust(bp, 0);
 			free(bp->b_data, M_BIOBUF);
 			bp->b_data = bp->b_kvabase;
 			bp->b_flags &= ~B_MALLOC;
 		}
 		return;
 	}
 	vm_hold_free_pages(bp, newbsize);
 	bufspace_adjust(bp, newbsize);
 }
 
 /*
  * Extend the backing for a non-VMIO buffer.
  */
 static void
 vfs_nonvmio_extend(struct buf *bp, int newbsize)
 {
 	caddr_t origbuf;
 	int origbufsize;
 
 	/*
 	 * We only use malloced memory on the first allocation.
 	 * and revert to page-allocated memory when the buffer
 	 * grows.
 	 *
 	 * There is a potential smp race here that could lead
 	 * to bufmallocspace slightly passing the max.  It
 	 * is probably extremely rare and not worth worrying
 	 * over.
 	 */
 	if (bp->b_bufsize == 0 && newbsize <= PAGE_SIZE/2 &&
 	    bufmallocspace < maxbufmallocspace) {
 		bp->b_data = malloc(newbsize, M_BIOBUF, M_WAITOK);
 		bp->b_flags |= B_MALLOC;
 		bufmallocadjust(bp, newbsize);
 		return;
 	}
 
 	/*
 	 * If the buffer is growing on its other-than-first
 	 * allocation then we revert to the page-allocation
 	 * scheme.
 	 */
 	origbuf = NULL;
 	origbufsize = 0;
 	if (bp->b_flags & B_MALLOC) {
 		origbuf = bp->b_data;
 		origbufsize = bp->b_bufsize;
 		bp->b_data = bp->b_kvabase;
 		bufmallocadjust(bp, 0);
 		bp->b_flags &= ~B_MALLOC;
 		newbsize = round_page(newbsize);
 	}
 	vm_hold_load_pages(bp, (vm_offset_t) bp->b_data + bp->b_bufsize,
 	    (vm_offset_t) bp->b_data + newbsize);
 	if (origbuf != NULL) {
 		bcopy(origbuf, bp->b_data, origbufsize);
 		free(origbuf, M_BIOBUF);
 	}
 	bufspace_adjust(bp, newbsize);
 }
 
 /*
  * This code constitutes the buffer memory from either anonymous system
  * memory (in the case of non-VMIO operations) or from an associated
  * VM object (in the case of VMIO operations).  This code is able to
  * resize a buffer up or down.
  *
  * Note that this code is tricky, and has many complications to resolve
  * deadlock or inconsistent data situations.  Tread lightly!!! 
  * There are B_CACHE and B_DELWRI interactions that must be dealt with by 
  * the caller.  Calling this code willy nilly can result in the loss of data.
  *
  * allocbuf() only adjusts B_CACHE for VMIO buffers.  getblk() deals with
  * B_CACHE for the non-VMIO case.
  */
 int
 allocbuf(struct buf *bp, int size)
 {
 	int newbsize;
 
 	BUF_ASSERT_HELD(bp);
 
 	if (bp->b_bcount == size)
 		return (1);
 
 	if (bp->b_kvasize != 0 && bp->b_kvasize < size)
 		panic("allocbuf: buffer too small");
 
 	newbsize = roundup2(size, DEV_BSIZE);
 	if ((bp->b_flags & B_VMIO) == 0) {
 		if ((bp->b_flags & B_MALLOC) == 0)
 			newbsize = round_page(newbsize);
 		/*
 		 * Just get anonymous memory from the kernel.  Don't
 		 * mess with B_CACHE.
 		 */
 		if (newbsize < bp->b_bufsize)
 			vfs_nonvmio_truncate(bp, newbsize);
 		else if (newbsize > bp->b_bufsize)
 			vfs_nonvmio_extend(bp, newbsize);
 	} else {
 		int desiredpages;
 
 		desiredpages = (size == 0) ? 0 :
 		    num_pages((bp->b_offset & PAGE_MASK) + newbsize);
 
 		if (bp->b_flags & B_MALLOC)
 			panic("allocbuf: VMIO buffer can't be malloced");
 		/*
 		 * Set B_CACHE initially if buffer is 0 length or will become
 		 * 0-length.
 		 */
 		if (size == 0 || bp->b_bufsize == 0)
 			bp->b_flags |= B_CACHE;
 
 		if (newbsize < bp->b_bufsize)
 			vfs_vmio_truncate(bp, desiredpages);
 		/* XXX This looks as if it should be newbsize > b_bufsize */
 		else if (size > bp->b_bcount)
 			vfs_vmio_extend(bp, desiredpages, size);
 		bufspace_adjust(bp, newbsize);
 	}
 	bp->b_bcount = size;		/* requested buffer size. */
 	return (1);
 }
 
 extern int inflight_transient_maps;
 
 static struct bio_queue nondump_bios;
 
 void
 biodone(struct bio *bp)
 {
 	struct mtx *mtxp;
 	void (*done)(struct bio *);
 	vm_offset_t start, end;
 
 	biotrack(bp, __func__);
 
 	/*
 	 * Avoid completing I/O when dumping after a panic since that may
 	 * result in a deadlock in the filesystem or pager code.  Note that
 	 * this doesn't affect dumps that were started manually since we aim
 	 * to keep the system usable after it has been resumed.
 	 */
 	if (__predict_false(dumping && SCHEDULER_STOPPED())) {
 		TAILQ_INSERT_HEAD(&nondump_bios, bp, bio_queue);
 		return;
 	}
 	if ((bp->bio_flags & BIO_TRANSIENT_MAPPING) != 0) {
 		bp->bio_flags &= ~BIO_TRANSIENT_MAPPING;
 		bp->bio_flags |= BIO_UNMAPPED;
 		start = trunc_page((vm_offset_t)bp->bio_data);
 		end = round_page((vm_offset_t)bp->bio_data + bp->bio_length);
 		bp->bio_data = unmapped_buf;
 		pmap_qremove(start, atop(end - start));
 		vmem_free(transient_arena, start, end - start);
 		atomic_add_int(&inflight_transient_maps, -1);
 	}
 	done = bp->bio_done;
 	if (done == NULL) {
 		mtxp = mtx_pool_find(mtxpool_sleep, bp);
 		mtx_lock(mtxp);
 		bp->bio_flags |= BIO_DONE;
 		wakeup(bp);
 		mtx_unlock(mtxp);
 	} else
 		done(bp);
 }
 
 /*
  * Wait for a BIO to finish.
  */
 int
 biowait(struct bio *bp, const char *wchan)
 {
 	struct mtx *mtxp;
 
 	mtxp = mtx_pool_find(mtxpool_sleep, bp);
 	mtx_lock(mtxp);
 	while ((bp->bio_flags & BIO_DONE) == 0)
 		msleep(bp, mtxp, PRIBIO, wchan, 0);
 	mtx_unlock(mtxp);
 	if (bp->bio_error != 0)
 		return (bp->bio_error);
 	if (!(bp->bio_flags & BIO_ERROR))
 		return (0);
 	return (EIO);
 }
 
 void
 biofinish(struct bio *bp, struct devstat *stat, int error)
 {
 	
 	if (error) {
 		bp->bio_error = error;
 		bp->bio_flags |= BIO_ERROR;
 	}
 	if (stat != NULL)
 		devstat_end_transaction_bio(stat, bp);
 	biodone(bp);
 }
 
 #if defined(BUF_TRACKING) || defined(FULL_BUF_TRACKING)
 void
 biotrack_buf(struct bio *bp, const char *location)
 {
 
 	buf_track(bp->bio_track_bp, location);
 }
 #endif
 
 /*
  *	bufwait:
  *
  *	Wait for buffer I/O completion, returning error status.  The buffer
  *	is left locked and B_DONE on return.  B_EINTR is converted into an EINTR
  *	error and cleared.
  */
 int
 bufwait(struct buf *bp)
 {
 	if (bp->b_iocmd == BIO_READ)
 		bwait(bp, PRIBIO, "biord");
 	else
 		bwait(bp, PRIBIO, "biowr");
 	if (bp->b_flags & B_EINTR) {
 		bp->b_flags &= ~B_EINTR;
 		return (EINTR);
 	}
 	if (bp->b_ioflags & BIO_ERROR) {
 		return (bp->b_error ? bp->b_error : EIO);
 	} else {
 		return (0);
 	}
 }
 
 /*
  *	bufdone:
  *
  *	Finish I/O on a buffer, optionally calling a completion function.
  *	This is usually called from an interrupt so process blocking is
  *	not allowed.
  *
  *	biodone is also responsible for setting B_CACHE in a B_VMIO bp.
  *	In a non-VMIO bp, B_CACHE will be set on the next getblk() 
  *	assuming B_INVAL is clear.
  *
  *	For the VMIO case, we set B_CACHE if the op was a read and no
  *	read error occurred, or if the op was a write.  B_CACHE is never
  *	set if the buffer is invalid or otherwise uncacheable.
  *
  *	bufdone does not mess with B_INVAL, allowing the I/O routine or the
  *	initiator to leave B_INVAL set to brelse the buffer out of existence
  *	in the biodone routine.
  */
 void
 bufdone(struct buf *bp)
 {
 	struct bufobj *dropobj;
 	void    (*biodone)(struct buf *);
 
 	buf_track(bp, __func__);
 	CTR3(KTR_BUF, "bufdone(%p) vp %p flags %X", bp, bp->b_vp, bp->b_flags);
 	dropobj = NULL;
 
 	KASSERT(!(bp->b_flags & B_DONE), ("biodone: bp %p already done", bp));
 	BUF_ASSERT_HELD(bp);
 
 	runningbufwakeup(bp);
 	if (bp->b_iocmd == BIO_WRITE)
 		dropobj = bp->b_bufobj;
 	/* call optional completion function if requested */
 	if (bp->b_iodone != NULL) {
 		biodone = bp->b_iodone;
 		bp->b_iodone = NULL;
 		(*biodone) (bp);
 		if (dropobj)
 			bufobj_wdrop(dropobj);
 		return;
 	}
 	if (bp->b_flags & B_VMIO) {
 		/*
 		 * Set B_CACHE if the op was a normal read and no error
 		 * occurred.  B_CACHE is set for writes in the b*write()
 		 * routines.
 		 */
 		if (bp->b_iocmd == BIO_READ &&
 		    !(bp->b_flags & (B_INVAL|B_NOCACHE)) &&
 		    !(bp->b_ioflags & BIO_ERROR))
 			bp->b_flags |= B_CACHE;
 		vfs_vmio_iodone(bp);
 	}
 	if (!LIST_EMPTY(&bp->b_dep))
 		buf_complete(bp);
 	if ((bp->b_flags & B_CKHASH) != 0) {
 		KASSERT(bp->b_iocmd == BIO_READ,
 		    ("bufdone: b_iocmd %d not BIO_READ", bp->b_iocmd));
 		KASSERT(buf_mapped(bp), ("bufdone: bp %p not mapped", bp));
 		(*bp->b_ckhashcalc)(bp);
 	}
 	/*
 	 * For asynchronous completions, release the buffer now. The brelse
 	 * will do a wakeup there if necessary - so no need to do a wakeup
 	 * here in the async case. The sync case always needs to do a wakeup.
 	 */
 	if (bp->b_flags & B_ASYNC) {
 		if ((bp->b_flags & (B_NOCACHE | B_INVAL | B_RELBUF)) ||
 		    (bp->b_ioflags & BIO_ERROR))
 			brelse(bp);
 		else
 			bqrelse(bp);
 	} else
 		bdone(bp);
 	if (dropobj)
 		bufobj_wdrop(dropobj);
 }
 
 /*
  * This routine is called in lieu of iodone in the case of
  * incomplete I/O.  This keeps the busy status for pages
  * consistent.
  */
 void
 vfs_unbusy_pages(struct buf *bp)
 {
 	int i;
 	vm_object_t obj;
 	vm_page_t m;
 
 	runningbufwakeup(bp);
 	if (!(bp->b_flags & B_VMIO))
 		return;
 
 	obj = bp->b_bufobj->bo_object;
 	VM_OBJECT_WLOCK(obj);
 	for (i = 0; i < bp->b_npages; i++) {
 		m = bp->b_pages[i];
 		if (m == bogus_page) {
 			m = vm_page_lookup(obj, OFF_TO_IDX(bp->b_offset) + i);
 			if (!m)
 				panic("vfs_unbusy_pages: page missing\n");
 			bp->b_pages[i] = m;
 			if (buf_mapped(bp)) {
 				BUF_CHECK_MAPPED(bp);
 				pmap_qenter(trunc_page((vm_offset_t)bp->b_data),
 				    bp->b_pages, bp->b_npages);
 			} else
 				BUF_CHECK_UNMAPPED(bp);
 		}
 		vm_page_sunbusy(m);
 	}
 	vm_object_pip_wakeupn(obj, bp->b_npages);
 	VM_OBJECT_WUNLOCK(obj);
 }
 
 /*
  * vfs_page_set_valid:
  *
  *	Set the valid bits in a page based on the supplied offset.   The
  *	range is restricted to the buffer's size.
  *
  *	This routine is typically called after a read completes.
  */
 static void
 vfs_page_set_valid(struct buf *bp, vm_ooffset_t off, vm_page_t m)
 {
 	vm_ooffset_t eoff;
 
 	/*
 	 * Compute the end offset, eoff, such that [off, eoff) does not span a
 	 * page boundary and eoff is not greater than the end of the buffer.
 	 * The end of the buffer, in this case, is our file EOF, not the
 	 * allocation size of the buffer.
 	 */
 	eoff = (off + PAGE_SIZE) & ~(vm_ooffset_t)PAGE_MASK;
 	if (eoff > bp->b_offset + bp->b_bcount)
 		eoff = bp->b_offset + bp->b_bcount;
 
 	/*
 	 * Set valid range.  This is typically the entire buffer and thus the
 	 * entire page.
 	 */
 	if (eoff > off)
 		vm_page_set_valid_range(m, off & PAGE_MASK, eoff - off);
 }
 
 /*
  * vfs_page_set_validclean:
  *
  *	Set the valid bits and clear the dirty bits in a page based on the
  *	supplied offset.   The range is restricted to the buffer's size.
  */
 static void
 vfs_page_set_validclean(struct buf *bp, vm_ooffset_t off, vm_page_t m)
 {
 	vm_ooffset_t soff, eoff;
 
 	/*
 	 * Start and end offsets in buffer.  eoff - soff may not cross a
 	 * page boundary or cross the end of the buffer.  The end of the
 	 * buffer, in this case, is our file EOF, not the allocation size
 	 * of the buffer.
 	 */
 	soff = off;
 	eoff = (off + PAGE_SIZE) & ~(off_t)PAGE_MASK;
 	if (eoff > bp->b_offset + bp->b_bcount)
 		eoff = bp->b_offset + bp->b_bcount;
 
 	/*
 	 * Set valid range.  This is typically the entire buffer and thus the
 	 * entire page.
 	 */
 	if (eoff > soff) {
 		vm_page_set_validclean(
 		    m,
 		   (vm_offset_t) (soff & PAGE_MASK),
 		   (vm_offset_t) (eoff - soff)
 		);
 	}
 }
 
 /*
  * Ensure that all buffer pages are not exclusive busied.  If any page is
  * exclusive busy, drain it.
  */
 void
 vfs_drain_busy_pages(struct buf *bp)
 {
 	vm_page_t m;
 	int i, last_busied;
 
 	VM_OBJECT_ASSERT_WLOCKED(bp->b_bufobj->bo_object);
 	last_busied = 0;
 	for (i = 0; i < bp->b_npages; i++) {
 		m = bp->b_pages[i];
 		if (vm_page_xbusied(m)) {
 			for (; last_busied < i; last_busied++)
 				vm_page_sbusy(bp->b_pages[last_busied]);
 			while (vm_page_xbusied(m)) {
 				vm_page_lock(m);
 				VM_OBJECT_WUNLOCK(bp->b_bufobj->bo_object);
 				vm_page_busy_sleep(m, "vbpage", true);
 				VM_OBJECT_WLOCK(bp->b_bufobj->bo_object);
 			}
 		}
 	}
 	for (i = 0; i < last_busied; i++)
 		vm_page_sunbusy(bp->b_pages[i]);
 }
 
 /*
  * This routine is called before a device strategy routine.
  * It is used to tell the VM system that paging I/O is in
  * progress, and treat the pages associated with the buffer
  * almost as being exclusive busy.  Also the object paging_in_progress
  * flag is handled to make sure that the object doesn't become
  * inconsistent.
  *
  * Since I/O has not been initiated yet, certain buffer flags
  * such as BIO_ERROR or B_INVAL may be in an inconsistent state
  * and should be ignored.
  */
 void
 vfs_busy_pages(struct buf *bp, int clear_modify)
 {
 	vm_object_t obj;
 	vm_ooffset_t foff;
 	vm_page_t m;
 	int i;
 	bool bogus;
 
 	if (!(bp->b_flags & B_VMIO))
 		return;
 
 	obj = bp->b_bufobj->bo_object;
 	foff = bp->b_offset;
 	KASSERT(bp->b_offset != NOOFFSET,
 	    ("vfs_busy_pages: no buffer offset"));
 	VM_OBJECT_WLOCK(obj);
 	vfs_drain_busy_pages(bp);
 	if (bp->b_bufsize != 0)
 		vfs_setdirty_locked_object(bp);
 	bogus = false;
 	for (i = 0; i < bp->b_npages; i++) {
 		m = bp->b_pages[i];
 
 		if ((bp->b_flags & B_CLUSTER) == 0) {
 			vm_object_pip_add(obj, 1);
 			vm_page_sbusy(m);
 		}
 		/*
 		 * When readying a buffer for a read ( i.e
 		 * clear_modify == 0 ), it is important to do
 		 * bogus_page replacement for valid pages in 
 		 * partially instantiated buffers.  Partially 
 		 * instantiated buffers can, in turn, occur when
 		 * reconstituting a buffer from its VM backing store
 		 * base.  We only have to do this if B_CACHE is
 		 * clear ( which causes the I/O to occur in the
 		 * first place ).  The replacement prevents the read
 		 * I/O from overwriting potentially dirty VM-backed
 		 * pages.  XXX bogus page replacement is, uh, bogus.
 		 * It may not work properly with small-block devices.
 		 * We need to find a better way.
 		 */
 		if (clear_modify) {
 			pmap_remove_write(m);
 			vfs_page_set_validclean(bp, foff, m);
 		} else if (m->valid == VM_PAGE_BITS_ALL &&
 		    (bp->b_flags & B_CACHE) == 0) {
 			bp->b_pages[i] = bogus_page;
 			bogus = true;
 		}
 		foff = (foff + PAGE_SIZE) & ~(off_t)PAGE_MASK;
 	}
 	VM_OBJECT_WUNLOCK(obj);
 	if (bogus && buf_mapped(bp)) {
 		BUF_CHECK_MAPPED(bp);
 		pmap_qenter(trunc_page((vm_offset_t)bp->b_data),
 		    bp->b_pages, bp->b_npages);
 	}
 }
 
 /*
  *	vfs_bio_set_valid:
  *
  *	Set the range within the buffer to valid.  The range is
  *	relative to the beginning of the buffer, b_offset.  Note that
  *	b_offset itself may be offset from the beginning of the first
  *	page.
  */
 void   
 vfs_bio_set_valid(struct buf *bp, int base, int size)
 {
 	int i, n;
 	vm_page_t m;
 
 	if (!(bp->b_flags & B_VMIO))
 		return;
 
 	/*
 	 * Fixup base to be relative to beginning of first page.
 	 * Set initial n to be the maximum number of bytes in the
 	 * first page that can be validated.
 	 */
 	base += (bp->b_offset & PAGE_MASK);
 	n = PAGE_SIZE - (base & PAGE_MASK);
 
 	VM_OBJECT_WLOCK(bp->b_bufobj->bo_object);
 	for (i = base / PAGE_SIZE; size > 0 && i < bp->b_npages; ++i) {
 		m = bp->b_pages[i];
 		if (n > size)
 			n = size;
 		vm_page_set_valid_range(m, base & PAGE_MASK, n);
 		base += n;
 		size -= n;
 		n = PAGE_SIZE;
 	}
 	VM_OBJECT_WUNLOCK(bp->b_bufobj->bo_object);
 }
 
 /*
  *	vfs_bio_clrbuf:
  *
  *	If the specified buffer is a non-VMIO buffer, clear the entire
  *	buffer.  If the specified buffer is a VMIO buffer, clear and
  *	validate only the previously invalid portions of the buffer.
  *	This routine essentially fakes an I/O, so we need to clear
  *	BIO_ERROR and B_INVAL.
  *
  *	Note that while we only theoretically need to clear through b_bcount,
  *	we go ahead and clear through b_bufsize.
  */
 void
 vfs_bio_clrbuf(struct buf *bp) 
 {
 	int i, j, mask, sa, ea, slide;
 
 	if ((bp->b_flags & (B_VMIO | B_MALLOC)) != B_VMIO) {
 		clrbuf(bp);
 		return;
 	}
 	bp->b_flags &= ~B_INVAL;
 	bp->b_ioflags &= ~BIO_ERROR;
 	VM_OBJECT_WLOCK(bp->b_bufobj->bo_object);
 	if ((bp->b_npages == 1) && (bp->b_bufsize < PAGE_SIZE) &&
 	    (bp->b_offset & PAGE_MASK) == 0) {
 		if (bp->b_pages[0] == bogus_page)
 			goto unlock;
 		mask = (1 << (bp->b_bufsize / DEV_BSIZE)) - 1;
 		VM_OBJECT_ASSERT_WLOCKED(bp->b_pages[0]->object);
 		if ((bp->b_pages[0]->valid & mask) == mask)
 			goto unlock;
 		if ((bp->b_pages[0]->valid & mask) == 0) {
 			pmap_zero_page_area(bp->b_pages[0], 0, bp->b_bufsize);
 			bp->b_pages[0]->valid |= mask;
 			goto unlock;
 		}
 	}
 	sa = bp->b_offset & PAGE_MASK;
 	slide = 0;
 	for (i = 0; i < bp->b_npages; i++, sa = 0) {
 		slide = imin(slide + PAGE_SIZE, bp->b_offset + bp->b_bufsize);
 		ea = slide & PAGE_MASK;
 		if (ea == 0)
 			ea = PAGE_SIZE;
 		if (bp->b_pages[i] == bogus_page)
 			continue;
 		j = sa / DEV_BSIZE;
 		mask = ((1 << ((ea - sa) / DEV_BSIZE)) - 1) << j;
 		VM_OBJECT_ASSERT_WLOCKED(bp->b_pages[i]->object);
 		if ((bp->b_pages[i]->valid & mask) == mask)
 			continue;
 		if ((bp->b_pages[i]->valid & mask) == 0)
 			pmap_zero_page_area(bp->b_pages[i], sa, ea - sa);
 		else {
 			for (; sa < ea; sa += DEV_BSIZE, j++) {
 				if ((bp->b_pages[i]->valid & (1 << j)) == 0) {
 					pmap_zero_page_area(bp->b_pages[i],
 					    sa, DEV_BSIZE);
 				}
 			}
 		}
 		bp->b_pages[i]->valid |= mask;
 	}
 unlock:
 	VM_OBJECT_WUNLOCK(bp->b_bufobj->bo_object);
 	bp->b_resid = 0;
 }
 
 void
 vfs_bio_bzero_buf(struct buf *bp, int base, int size)
 {
 	vm_page_t m;
 	int i, n;
 
 	if (buf_mapped(bp)) {
 		BUF_CHECK_MAPPED(bp);
 		bzero(bp->b_data + base, size);
 	} else {
 		BUF_CHECK_UNMAPPED(bp);
 		n = PAGE_SIZE - (base & PAGE_MASK);
 		for (i = base / PAGE_SIZE; size > 0 && i < bp->b_npages; ++i) {
 			m = bp->b_pages[i];
 			if (n > size)
 				n = size;
 			pmap_zero_page_area(m, base & PAGE_MASK, n);
 			base += n;
 			size -= n;
 			n = PAGE_SIZE;
 		}
 	}
 }
 
 /*
  * Update buffer flags based on I/O request parameters, optionally releasing the
  * buffer.  If it's VMIO or direct I/O, the buffer pages are released to the VM,
  * where they may be placed on a page queue (VMIO) or freed immediately (direct
  * I/O).  Otherwise the buffer is released to the cache.
  */
 static void
 b_io_dismiss(struct buf *bp, int ioflag, bool release)
 {
 
 	KASSERT((ioflag & IO_NOREUSE) == 0 || (ioflag & IO_VMIO) != 0,
 	    ("buf %p non-VMIO noreuse", bp));
 
 	if ((ioflag & IO_DIRECT) != 0)
 		bp->b_flags |= B_DIRECT;
 	if ((ioflag & IO_EXT) != 0)
 		bp->b_xflags |= BX_ALTDATA;
 	if ((ioflag & (IO_VMIO | IO_DIRECT)) != 0 && LIST_EMPTY(&bp->b_dep)) {
 		bp->b_flags |= B_RELBUF;
 		if ((ioflag & IO_NOREUSE) != 0)
 			bp->b_flags |= B_NOREUSE;
 		if (release)
 			brelse(bp);
 	} else if (release)
 		bqrelse(bp);
 }
 
 void
 vfs_bio_brelse(struct buf *bp, int ioflag)
 {
 
 	b_io_dismiss(bp, ioflag, true);
 }
 
 void
 vfs_bio_set_flags(struct buf *bp, int ioflag)
 {
 
 	b_io_dismiss(bp, ioflag, false);
 }
 
 /*
  * vm_hold_load_pages and vm_hold_free_pages get pages into
  * a buffers address space.  The pages are anonymous and are
  * not associated with a file object.
  */
 static void
 vm_hold_load_pages(struct buf *bp, vm_offset_t from, vm_offset_t to)
 {
 	vm_offset_t pg;
 	vm_page_t p;
 	int index;
 
 	BUF_CHECK_MAPPED(bp);
 
 	to = round_page(to);
 	from = round_page(from);
 	index = (from - trunc_page((vm_offset_t)bp->b_data)) >> PAGE_SHIFT;
 
 	for (pg = from; pg < to; pg += PAGE_SIZE, index++) {
 		/*
 		 * note: must allocate system pages since blocking here
 		 * could interfere with paging I/O, no matter which
 		 * process we are.
 		 */
 		p = vm_page_alloc(NULL, 0, VM_ALLOC_SYSTEM | VM_ALLOC_NOOBJ |
 		    VM_ALLOC_WIRED | VM_ALLOC_COUNT((to - pg) >> PAGE_SHIFT) |
 		    VM_ALLOC_WAITOK);
 		pmap_qenter(pg, &p, 1);
 		bp->b_pages[index] = p;
 	}
 	bp->b_npages = index;
 }
 
 /* Return pages associated with this buf to the vm system */
 static void
 vm_hold_free_pages(struct buf *bp, int newbsize)
 {
 	vm_offset_t from;
 	vm_page_t p;
 	int index, newnpages;
 
 	BUF_CHECK_MAPPED(bp);
 
 	from = round_page((vm_offset_t)bp->b_data + newbsize);
 	newnpages = (from - trunc_page((vm_offset_t)bp->b_data)) >> PAGE_SHIFT;
 	if (bp->b_npages > newnpages)
 		pmap_qremove(from, bp->b_npages - newnpages);
 	for (index = newnpages; index < bp->b_npages; index++) {
 		p = bp->b_pages[index];
 		bp->b_pages[index] = NULL;
 		p->wire_count--;
 		vm_page_free(p);
 	}
 	vm_wire_sub(bp->b_npages - newnpages);
 	bp->b_npages = newnpages;
 }
 
 /*
  * Map an IO request into kernel virtual address space.
  *
  * All requests are (re)mapped into kernel VA space.
  * Notice that we use b_bufsize for the size of the buffer
  * to be mapped.  b_bcount might be modified by the driver.
  *
  * Note that even if the caller determines that the address space should
  * be valid, a race or a smaller-file mapped into a larger space may
  * actually cause vmapbuf() to fail, so all callers of vmapbuf() MUST
  * check the return value.
  *
  * This function only works with pager buffers.
  */
 int
 vmapbuf(struct buf *bp, int mapbuf)
 {
 	vm_prot_t prot;
 	int pidx;
 
 	if (bp->b_bufsize < 0)
 		return (-1);
 	prot = VM_PROT_READ;
 	if (bp->b_iocmd == BIO_READ)
 		prot |= VM_PROT_WRITE;	/* Less backwards than it looks */
 	if ((pidx = vm_fault_quick_hold_pages(&curproc->p_vmspace->vm_map,
 	    (vm_offset_t)bp->b_data, bp->b_bufsize, prot, bp->b_pages,
 	    btoc(MAXPHYS))) < 0)
 		return (-1);
 	bp->b_npages = pidx;
 	bp->b_offset = ((vm_offset_t)bp->b_data) & PAGE_MASK;
 	if (mapbuf || !unmapped_buf_allowed) {
 		pmap_qenter((vm_offset_t)bp->b_kvabase, bp->b_pages, pidx);
 		bp->b_data = bp->b_kvabase + bp->b_offset;
 	} else
 		bp->b_data = unmapped_buf;
 	return(0);
 }
 
 /*
  * Free the io map PTEs associated with this IO operation.
  * We also invalidate the TLB entries and restore the original b_addr.
  *
  * This function only works with pager buffers.
  */
 void
 vunmapbuf(struct buf *bp)
 {
 	int npages;
 
 	npages = bp->b_npages;
 	if (buf_mapped(bp))
 		pmap_qremove(trunc_page((vm_offset_t)bp->b_data), npages);
 	vm_page_unhold_pages(bp->b_pages, npages);
 
 	bp->b_data = unmapped_buf;
 }
 
 void
 bdone(struct buf *bp)
 {
 	struct mtx *mtxp;
 
 	mtxp = mtx_pool_find(mtxpool_sleep, bp);
 	mtx_lock(mtxp);
 	bp->b_flags |= B_DONE;
 	wakeup(bp);
 	mtx_unlock(mtxp);
 }
 
 void
 bwait(struct buf *bp, u_char pri, const char *wchan)
 {
 	struct mtx *mtxp;
 
 	mtxp = mtx_pool_find(mtxpool_sleep, bp);
 	mtx_lock(mtxp);
 	while ((bp->b_flags & B_DONE) == 0)
 		msleep(bp, mtxp, pri, wchan, 0);
 	mtx_unlock(mtxp);
 }
 
 int
 bufsync(struct bufobj *bo, int waitfor)
 {
 
 	return (VOP_FSYNC(bo2vnode(bo), waitfor, curthread));
 }
 
 void
 bufstrategy(struct bufobj *bo, struct buf *bp)
 {
 	int i __unused;
 	struct vnode *vp;
 
 	vp = bp->b_vp;
 	KASSERT(vp == bo->bo_private, ("Inconsistent vnode bufstrategy"));
 	KASSERT(vp->v_type != VCHR && vp->v_type != VBLK,
 	    ("Wrong vnode in bufstrategy(bp=%p, vp=%p)", bp, vp));
 	i = VOP_STRATEGY(vp, bp);
 	KASSERT(i == 0, ("VOP_STRATEGY failed bp=%p vp=%p", bp, bp->b_vp));
 }
 
 /*
  * Initialize a struct bufobj before use.  Memory is assumed zero filled.
  */
 void
 bufobj_init(struct bufobj *bo, void *private)
 {
 	static volatile int bufobj_cleanq;
 
         bo->bo_domain =
             atomic_fetchadd_int(&bufobj_cleanq, 1) % buf_domains;
         rw_init(BO_LOCKPTR(bo), "bufobj interlock");
         bo->bo_private = private;
         TAILQ_INIT(&bo->bo_clean.bv_hd);
         TAILQ_INIT(&bo->bo_dirty.bv_hd);
 }
 
 void
 bufobj_wrefl(struct bufobj *bo)
 {
 
 	KASSERT(bo != NULL, ("NULL bo in bufobj_wref"));
 	ASSERT_BO_WLOCKED(bo);
 	bo->bo_numoutput++;
 }
 
 void
 bufobj_wref(struct bufobj *bo)
 {
 
 	KASSERT(bo != NULL, ("NULL bo in bufobj_wref"));
 	BO_LOCK(bo);
 	bo->bo_numoutput++;
 	BO_UNLOCK(bo);
 }
 
 void
 bufobj_wdrop(struct bufobj *bo)
 {
 
 	KASSERT(bo != NULL, ("NULL bo in bufobj_wdrop"));
 	BO_LOCK(bo);
 	KASSERT(bo->bo_numoutput > 0, ("bufobj_wdrop non-positive count"));
 	if ((--bo->bo_numoutput == 0) && (bo->bo_flag & BO_WWAIT)) {
 		bo->bo_flag &= ~BO_WWAIT;
 		wakeup(&bo->bo_numoutput);
 	}
 	BO_UNLOCK(bo);
 }
 
 int
 bufobj_wwait(struct bufobj *bo, int slpflag, int timeo)
 {
 	int error;
 
 	KASSERT(bo != NULL, ("NULL bo in bufobj_wwait"));
 	ASSERT_BO_WLOCKED(bo);
 	error = 0;
 	while (bo->bo_numoutput) {
 		bo->bo_flag |= BO_WWAIT;
 		error = msleep(&bo->bo_numoutput, BO_LOCKPTR(bo),
 		    slpflag | (PRIBIO + 1), "bo_wwait", timeo);
 		if (error)
 			break;
 	}
 	return (error);
 }
 
 /*
  * Set bio_data or bio_ma for struct bio from the struct buf.
  */
 void
 bdata2bio(struct buf *bp, struct bio *bip)
 {
 
 	if (!buf_mapped(bp)) {
 		KASSERT(unmapped_buf_allowed, ("unmapped"));
 		bip->bio_ma = bp->b_pages;
 		bip->bio_ma_n = bp->b_npages;
 		bip->bio_data = unmapped_buf;
 		bip->bio_ma_offset = (vm_offset_t)bp->b_offset & PAGE_MASK;
 		bip->bio_flags |= BIO_UNMAPPED;
 		KASSERT(round_page(bip->bio_ma_offset + bip->bio_length) /
 		    PAGE_SIZE == bp->b_npages,
 		    ("Buffer %p too short: %d %lld %d", bp, bip->bio_ma_offset,
 		    (long long)bip->bio_length, bip->bio_ma_n));
 	} else {
 		bip->bio_data = bp->b_data;
 		bip->bio_ma = NULL;
 	}
 }
 
 /*
  * The MIPS pmap code currently doesn't handle aliased pages.
  * The VIPT caches may not handle page aliasing themselves, leading
  * to data corruption.
  *
  * As such, this code makes a system extremely unhappy if said
  * system doesn't support unaliasing the above situation in hardware.
  * Some "recent" systems (eg some mips24k/mips74k cores) don't enable
  * this feature at build time, so it has to be handled in software.
  *
  * Once the MIPS pmap/cache code grows to support this function on
  * earlier chips, it should be flipped back off.
  */
 #ifdef	__mips__
 static int buf_pager_relbuf = 1;
 #else
 static int buf_pager_relbuf = 0;
 #endif
 SYSCTL_INT(_vfs, OID_AUTO, buf_pager_relbuf, CTLFLAG_RWTUN,
     &buf_pager_relbuf, 0,
     "Make buffer pager release buffers after reading");
 
 /*
  * The buffer pager.  It uses buffer reads to validate pages.
  *
  * In contrast to the generic local pager from vm/vnode_pager.c, this
  * pager correctly and easily handles volumes where the underlying
  * device block size is greater than the machine page size.  The
  * buffer cache transparently extends the requested page run to be
  * aligned at the block boundary, and does the necessary bogus page
  * replacements in the addends to avoid obliterating already valid
  * pages.
  *
  * The only non-trivial issue is that the exclusive busy state for
  * pages, which is assumed by the vm_pager_getpages() interface, is
  * incompatible with the VMIO buffer cache's desire to share-busy the
  * pages.  This function performs a trivial downgrade of the pages'
  * state before reading buffers, and a less trivial upgrade from the
  * shared-busy to excl-busy state after the read.
  */
 int
 vfs_bio_getpages(struct vnode *vp, vm_page_t *ma, int count,
     int *rbehind, int *rahead, vbg_get_lblkno_t get_lblkno,
     vbg_get_blksize_t get_blksize)
 {
 	vm_page_t m;
 	vm_object_t object;
 	struct buf *bp;
 	struct mount *mp;
 	daddr_t lbn, lbnp;
 	vm_ooffset_t la, lb, poff, poffe;
 	long bsize;
 	int bo_bs, br_flags, error, i, pgsin, pgsin_a, pgsin_b;
 	bool redo, lpart;
 
 	object = vp->v_object;
 	mp = vp->v_mount;
 	error = 0;
 	la = IDX_TO_OFF(ma[count - 1]->pindex);
 	if (la >= object->un_pager.vnp.vnp_size)
 		return (VM_PAGER_BAD);
 
 	/*
 	 * Change the meaning of la from where the last requested page starts
 	 * to where it ends, because that's the end of the requested region
 	 * and the start of the potential read-ahead region.
 	 */
 	la += PAGE_SIZE;
 	lpart = la > object->un_pager.vnp.vnp_size;
 	bo_bs = get_blksize(vp, get_lblkno(vp, IDX_TO_OFF(ma[0]->pindex)));
 
 	/*
 	 * Calculate read-ahead, behind and total pages.
 	 */
 	pgsin = count;
 	lb = IDX_TO_OFF(ma[0]->pindex);
 	pgsin_b = OFF_TO_IDX(lb - rounddown2(lb, bo_bs));
 	pgsin += pgsin_b;
 	if (rbehind != NULL)
 		*rbehind = pgsin_b;
 	pgsin_a = OFF_TO_IDX(roundup2(la, bo_bs) - la);
 	if (la + IDX_TO_OFF(pgsin_a) >= object->un_pager.vnp.vnp_size)
 		pgsin_a = OFF_TO_IDX(roundup2(object->un_pager.vnp.vnp_size,
 		    PAGE_SIZE) - la);
 	pgsin += pgsin_a;
 	if (rahead != NULL)
 		*rahead = pgsin_a;
 	VM_CNT_INC(v_vnodein);
 	VM_CNT_ADD(v_vnodepgsin, pgsin);
 
 	br_flags = (mp != NULL && (mp->mnt_kern_flag & MNTK_UNMAPPED_BUFS)
 	    != 0) ? GB_UNMAPPED : 0;
 	VM_OBJECT_WLOCK(object);
 again:
 	for (i = 0; i < count; i++)
 		vm_page_busy_downgrade(ma[i]);
 	VM_OBJECT_WUNLOCK(object);
 
 	lbnp = -1;
 	for (i = 0; i < count; i++) {
 		m = ma[i];
 
 		/*
 		 * Pages are shared busy and the object lock is not
 		 * owned, which together allow for the pages'
 		 * invalidation.  The racy test for validity avoids
 		 * useless creation of the buffer for the most typical
 		 * case when invalidation is not used in redo or for
 		 * parallel read.  The shared->excl upgrade loop at
 		 * the end of the function catches the race in a
 		 * reliable way (protected by the object lock).
 		 */
 		if (m->valid == VM_PAGE_BITS_ALL)
 			continue;
 
 		poff = IDX_TO_OFF(m->pindex);
 		poffe = MIN(poff + PAGE_SIZE, object->un_pager.vnp.vnp_size);
 		for (; poff < poffe; poff += bsize) {
 			lbn = get_lblkno(vp, poff);
 			if (lbn == lbnp)
 				goto next_page;
 			lbnp = lbn;
 
 			bsize = get_blksize(vp, lbn);
 			error = bread_gb(vp, lbn, bsize, curthread->td_ucred,
 			    br_flags, &bp);
 			if (error != 0)
 				goto end_pages;
 			if (LIST_EMPTY(&bp->b_dep)) {
 				/*
 				 * Invalidation clears m->valid, but
 				 * may leave B_CACHE flag if the
 				 * buffer existed at the invalidation
 				 * time.  In this case, recycle the
 				 * buffer to do real read on next
 				 * bread() after redo.
 				 *
 				 * Otherwise B_RELBUF is not strictly
 				 * necessary, enable to reduce buf
 				 * cache pressure.
 				 */
 				if (buf_pager_relbuf ||
 				    m->valid != VM_PAGE_BITS_ALL)
 					bp->b_flags |= B_RELBUF;
 
 				bp->b_flags &= ~B_NOCACHE;
 				brelse(bp);
 			} else {
 				bqrelse(bp);
 			}
 		}
 		KASSERT(1 /* racy, enable for debugging */ ||
 		    m->valid == VM_PAGE_BITS_ALL || i == count - 1,
 		    ("buf %d %p invalid", i, m));
 		if (i == count - 1 && lpart) {
 			VM_OBJECT_WLOCK(object);
 			if (m->valid != 0 &&
 			    m->valid != VM_PAGE_BITS_ALL)
 				vm_page_zero_invalid(m, TRUE);
 			VM_OBJECT_WUNLOCK(object);
 		}
 next_page:;
 	}
 end_pages:
 
 	VM_OBJECT_WLOCK(object);
 	redo = false;
 	for (i = 0; i < count; i++) {
 		vm_page_sunbusy(ma[i]);
 		ma[i] = vm_page_grab(object, ma[i]->pindex, VM_ALLOC_NORMAL);
 
 		/*
 		 * Since the pages were only sbusy while neither the
 		 * buffer nor the object lock was held by us, or
 		 * reallocated while vm_page_grab() slept for busy
 		 * relinguish, they could have been invalidated.
 		 * Recheck the valid bits and re-read as needed.
 		 *
 		 * Note that the last page is made fully valid in the
 		 * read loop, and partial validity for the page at
 		 * index count - 1 could mean that the page was
 		 * invalidated or removed, so we must restart for
 		 * safety as well.
 		 */
 		if (ma[i]->valid != VM_PAGE_BITS_ALL)
 			redo = true;
 	}
 	if (redo && error == 0)
 		goto again;
 	VM_OBJECT_WUNLOCK(object);
 	return (error != 0 ? VM_PAGER_ERROR : VM_PAGER_OK);
 }
 
 #include "opt_ddb.h"
 #ifdef DDB
 #include <ddb/ddb.h>
 
 /* DDB command to show buffer data */
 DB_SHOW_COMMAND(buffer, db_show_buffer)
 {
 	/* get args */
 	struct buf *bp = (struct buf *)addr;
 #ifdef FULL_BUF_TRACKING
 	uint32_t i, j;
 #endif
 
 	if (!have_addr) {
 		db_printf("usage: show buffer <addr>\n");
 		return;
 	}
 
 	db_printf("buf at %p\n", bp);
 	db_printf("b_flags = 0x%b, b_xflags=0x%b\n",
 	    (u_int)bp->b_flags, PRINT_BUF_FLAGS,
 	    (u_int)bp->b_xflags, PRINT_BUF_XFLAGS);
 	db_printf("b_vflags=0x%b b_ioflags0x%b\n",
 	    (u_int)bp->b_vflags, PRINT_BUF_VFLAGS,
 	    (u_int)bp->b_ioflags, PRINT_BIO_FLAGS);
 	db_printf(
 	    "b_error = %d, b_bufsize = %ld, b_bcount = %ld, b_resid = %ld\n"
 	    "b_bufobj = (%p), b_data = %p\n, b_blkno = %jd, b_lblkno = %jd, "
 	    "b_vp = %p, b_dep = %p\n",
 	    bp->b_error, bp->b_bufsize, bp->b_bcount, bp->b_resid,
 	    bp->b_bufobj, bp->b_data, (intmax_t)bp->b_blkno,
 	    (intmax_t)bp->b_lblkno, bp->b_vp, bp->b_dep.lh_first);
 	db_printf("b_kvabase = %p, b_kvasize = %d\n",
 	    bp->b_kvabase, bp->b_kvasize);
 	if (bp->b_npages) {
 		int i;
 		db_printf("b_npages = %d, pages(OBJ, IDX, PA): ", bp->b_npages);
 		for (i = 0; i < bp->b_npages; i++) {
 			vm_page_t m;
 			m = bp->b_pages[i];
 			if (m != NULL)
 				db_printf("(%p, 0x%lx, 0x%lx)", m->object,
 				    (u_long)m->pindex,
 				    (u_long)VM_PAGE_TO_PHYS(m));
 			else
 				db_printf("( ??? )");
 			if ((i + 1) < bp->b_npages)
 				db_printf(",");
 		}
 		db_printf("\n");
 	}
 	BUF_LOCKPRINTINFO(bp);
 #if defined(FULL_BUF_TRACKING)
 	db_printf("b_io_tracking: b_io_tcnt = %u\n", bp->b_io_tcnt);
 
 	i = bp->b_io_tcnt % BUF_TRACKING_SIZE;
 	for (j = 1; j <= BUF_TRACKING_SIZE; j++) {
 		if (bp->b_io_tracking[BUF_TRACKING_ENTRY(i - j)] == NULL)
 			continue;
 		db_printf(" %2u: %s\n", j,
 		    bp->b_io_tracking[BUF_TRACKING_ENTRY(i - j)]);
 	}
 #elif defined(BUF_TRACKING)
 	db_printf("b_io_tracking: %s\n", bp->b_io_tracking);
 #endif
 	db_printf(" ");
 }
 
 DB_SHOW_COMMAND(bufqueues, bufqueues)
 {
 	struct bufdomain *bd;
 	struct buf *bp;
 	long total;
 	int i, j, cnt;
 
 	db_printf("bqempty: %d\n", bqempty.bq_len);
 
 	for (i = 0; i < buf_domains; i++) {
 		bd = &bdomain[i];
 		db_printf("Buf domain %d\n", i);
 		db_printf("\tfreebufs\t%d\n", bd->bd_freebuffers);
 		db_printf("\tlofreebufs\t%d\n", bd->bd_lofreebuffers);
 		db_printf("\thifreebufs\t%d\n", bd->bd_hifreebuffers);
 		db_printf("\n");
 		db_printf("\tbufspace\t%ld\n", bd->bd_bufspace);
 		db_printf("\tmaxbufspace\t%ld\n", bd->bd_maxbufspace);
 		db_printf("\thibufspace\t%ld\n", bd->bd_hibufspace);
 		db_printf("\tlobufspace\t%ld\n", bd->bd_lobufspace);
 		db_printf("\tbufspacethresh\t%ld\n", bd->bd_bufspacethresh);
 		db_printf("\n");
 		db_printf("\tnumdirtybuffers\t%d\n", bd->bd_numdirtybuffers);
 		db_printf("\tlodirtybuffers\t%d\n", bd->bd_lodirtybuffers);
 		db_printf("\thidirtybuffers\t%d\n", bd->bd_hidirtybuffers);
 		db_printf("\tdirtybufthresh\t%d\n", bd->bd_dirtybufthresh);
 		db_printf("\n");
 		total = 0;
 		TAILQ_FOREACH(bp, &bd->bd_cleanq->bq_queue, b_freelist)
 			total += bp->b_bufsize;
 		db_printf("\tcleanq count\t%d (%ld)\n",
 		    bd->bd_cleanq->bq_len, total);
 		total = 0;
 		TAILQ_FOREACH(bp, &bd->bd_dirtyq.bq_queue, b_freelist)
 			total += bp->b_bufsize;
 		db_printf("\tdirtyq count\t%d (%ld)\n",
 		    bd->bd_dirtyq.bq_len, total);
 		db_printf("\twakeup\t\t%d\n", bd->bd_wanted);
 		db_printf("\tlim\t\t%d\n", bd->bd_lim);
 		db_printf("\tCPU ");
 		for (j = 0; j <= mp_maxid; j++)
 			db_printf("%d, ", bd->bd_subq[j].bq_len);
 		db_printf("\n");
 		cnt = 0;
 		total = 0;
 		for (j = 0; j < nbuf; j++)
 			if (buf[j].b_domain == i && BUF_ISLOCKED(&buf[j])) {
 				cnt++;
 				total += buf[j].b_bufsize;
 			}
 		db_printf("\tLocked buffers: %d space %ld\n", cnt, total);
 		cnt = 0;
 		total = 0;
 		for (j = 0; j < nbuf; j++)
 			if (buf[j].b_domain == i) {
 				cnt++;
 				total += buf[j].b_bufsize;
 			}
 		db_printf("\tTotal buffers: %d space %ld\n", cnt, total);
 	}
 }
 
 DB_SHOW_COMMAND(lockedbufs, lockedbufs)
 {
 	struct buf *bp;
 	int i;
 
 	for (i = 0; i < nbuf; i++) {
 		bp = &buf[i];
 		if (BUF_ISLOCKED(bp)) {
 			db_show_buffer((uintptr_t)bp, 1, 0, NULL);
 			db_printf("\n");
 			if (db_pager_quit)
 				break;
 		}
 	}
 }
 
 DB_SHOW_COMMAND(vnodebufs, db_show_vnodebufs)
 {
 	struct vnode *vp;
 	struct buf *bp;
 
 	if (!have_addr) {
 		db_printf("usage: show vnodebufs <addr>\n");
 		return;
 	}
 	vp = (struct vnode *)addr;
 	db_printf("Clean buffers:\n");
 	TAILQ_FOREACH(bp, &vp->v_bufobj.bo_clean.bv_hd, b_bobufs) {
 		db_show_buffer((uintptr_t)bp, 1, 0, NULL);
 		db_printf("\n");
 	}
 	db_printf("Dirty buffers:\n");
 	TAILQ_FOREACH(bp, &vp->v_bufobj.bo_dirty.bv_hd, b_bobufs) {
 		db_show_buffer((uintptr_t)bp, 1, 0, NULL);
 		db_printf("\n");
 	}
 }
 
 DB_COMMAND(countfreebufs, db_coundfreebufs)
 {
 	struct buf *bp;
 	int i, used = 0, nfree = 0;
 
 	if (have_addr) {
 		db_printf("usage: countfreebufs\n");
 		return;
 	}
 
 	for (i = 0; i < nbuf; i++) {
 		bp = &buf[i];
 		if (bp->b_qindex == QUEUE_EMPTY)
 			nfree++;
 		else
 			used++;
 	}
 
 	db_printf("Counted %d free, %d used (%d tot)\n", nfree, used,
 	    nfree + used);
 	db_printf("numfreebuffers is %d\n", numfreebuffers);
 }
 #endif /* DDB */
Index: projects/runtime-coverage-v2/sys/netinet/in_mcast.c
===================================================================
--- projects/runtime-coverage-v2/sys/netinet/in_mcast.c	(revision 346924)
+++ projects/runtime-coverage-v2/sys/netinet/in_mcast.c	(revision 346925)
@@ -1,3151 +1,3162 @@
 /*-
  * SPDX-License-Identifier: BSD-3-Clause
  *
  * Copyright (c) 2007-2009 Bruce Simpson.
  * Copyright (c) 2005 Robert N. M. Watson.
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 3. The name of the author may not be used to endorse or promote
  *    products derived from this software without specific prior written
  *    permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  */
 
 /*
  * IPv4 multicast socket, group, and socket option processing module.
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include <sys/param.h>
 #include <sys/systm.h>
 #include <sys/kernel.h>
 #include <sys/lock.h>
 #include <sys/malloc.h>
 #include <sys/mbuf.h>
 #include <sys/protosw.h>
 #include <sys/rmlock.h>
 #include <sys/socket.h>
 #include <sys/socketvar.h>
 #include <sys/protosw.h>
 #include <sys/sysctl.h>
 #include <sys/ktr.h>
 #include <sys/taskqueue.h>
 #include <sys/gtaskqueue.h>
 #include <sys/tree.h>
 
 #include <net/if.h>
 #include <net/if_var.h>
 #include <net/if_dl.h>
 #include <net/route.h>
 #include <net/vnet.h>
 
 #include <net/ethernet.h>
 
 #include <netinet/in.h>
 #include <netinet/in_systm.h>
 #include <netinet/in_fib.h>
 #include <netinet/in_pcb.h>
 #include <netinet/in_var.h>
 #include <netinet/ip_var.h>
 #include <netinet/igmp_var.h>
 
 #ifndef KTR_IGMPV3
 #define KTR_IGMPV3 KTR_INET
 #endif
 
 #ifndef __SOCKUNION_DECLARED
 union sockunion {
 	struct sockaddr_storage	ss;
 	struct sockaddr		sa;
 	struct sockaddr_dl	sdl;
 	struct sockaddr_in	sin;
 };
 typedef union sockunion sockunion_t;
 #define __SOCKUNION_DECLARED
 #endif /* __SOCKUNION_DECLARED */
 
 static MALLOC_DEFINE(M_INMFILTER, "in_mfilter",
     "IPv4 multicast PCB-layer source filter");
 static MALLOC_DEFINE(M_IPMADDR, "in_multi", "IPv4 multicast group");
 static MALLOC_DEFINE(M_IPMOPTS, "ip_moptions", "IPv4 multicast options");
 static MALLOC_DEFINE(M_IPMSOURCE, "ip_msource",
     "IPv4 multicast IGMP-layer source filter");
 
 /*
  * Locking:
  * - Lock order is: Giant, INP_WLOCK, IN_MULTI_LIST_LOCK, IGMP_LOCK, IF_ADDR_LOCK.
  * - The IF_ADDR_LOCK is implicitly taken by inm_lookup() earlier, however
  *   it can be taken by code in net/if.c also.
  * - ip_moptions and in_mfilter are covered by the INP_WLOCK.
  *
  * struct in_multi is covered by IN_MULTI_LIST_LOCK. There isn't strictly
  * any need for in_multi itself to be virtualized -- it is bound to an ifp
  * anyway no matter what happens.
  */
 struct mtx in_multi_list_mtx;
 MTX_SYSINIT(in_multi_mtx, &in_multi_list_mtx, "in_multi_list_mtx", MTX_DEF);
 
 struct mtx in_multi_free_mtx;
 MTX_SYSINIT(in_multi_free_mtx, &in_multi_free_mtx, "in_multi_free_mtx", MTX_DEF);
 
 struct sx in_multi_sx;
 SX_SYSINIT(in_multi_sx, &in_multi_sx, "in_multi_sx");
 
 int ifma_restart;
 
 /*
  * Functions with non-static linkage defined in this file should be
  * declared in in_var.h:
  *  imo_multi_filter()
  *  in_addmulti()
  *  in_delmulti()
  *  in_joingroup()
  *  in_joingroup_locked()
  *  in_leavegroup()
  *  in_leavegroup_locked()
  * and ip_var.h:
  *  inp_freemoptions()
  *  inp_getmoptions()
  *  inp_setmoptions()
  *
  * XXX: Both carp and pf need to use the legacy (*,G) KPIs in_addmulti()
  * and in_delmulti().
  */
 static void	imf_commit(struct in_mfilter *);
 static int	imf_get_source(struct in_mfilter *imf,
 		    const struct sockaddr_in *psin,
 		    struct in_msource **);
 static struct in_msource *
 		imf_graft(struct in_mfilter *, const uint8_t,
 		    const struct sockaddr_in *);
 static void	imf_leave(struct in_mfilter *);
 static int	imf_prune(struct in_mfilter *, const struct sockaddr_in *);
 static void	imf_purge(struct in_mfilter *);
 static void	imf_rollback(struct in_mfilter *);
 static void	imf_reap(struct in_mfilter *);
 static int	imo_grow(struct ip_moptions *);
 static size_t	imo_match_group(const struct ip_moptions *,
 		    const struct ifnet *, const struct sockaddr *);
 static struct in_msource *
 		imo_match_source(const struct ip_moptions *, const size_t,
 		    const struct sockaddr *);
 static void	ims_merge(struct ip_msource *ims,
 		    const struct in_msource *lims, const int rollback);
 static int	in_getmulti(struct ifnet *, const struct in_addr *,
 		    struct in_multi **);
 static int	inm_get_source(struct in_multi *inm, const in_addr_t haddr,
 		    const int noalloc, struct ip_msource **pims);
 #ifdef KTR
 static int	inm_is_ifp_detached(const struct in_multi *);
 #endif
 static int	inm_merge(struct in_multi *, /*const*/ struct in_mfilter *);
 static void	inm_purge(struct in_multi *);
 static void	inm_reap(struct in_multi *);
 static void inm_release(struct in_multi *);
 static struct ip_moptions *
 		inp_findmoptions(struct inpcb *);
 static int	inp_get_source_filters(struct inpcb *, struct sockopt *);
 static int	inp_join_group(struct inpcb *, struct sockopt *);
 static int	inp_leave_group(struct inpcb *, struct sockopt *);
 static struct ifnet *
 		inp_lookup_mcast_ifp(const struct inpcb *,
 		    const struct sockaddr_in *, const struct in_addr);
 static int	inp_block_unblock_source(struct inpcb *, struct sockopt *);
 static int	inp_set_multicast_if(struct inpcb *, struct sockopt *);
 static int	inp_set_source_filters(struct inpcb *, struct sockopt *);
 static int	sysctl_ip_mcast_filters(SYSCTL_HANDLER_ARGS);
 
 static SYSCTL_NODE(_net_inet_ip, OID_AUTO, mcast, CTLFLAG_RW, 0,
     "IPv4 multicast");
 
 static u_long in_mcast_maxgrpsrc = IP_MAX_GROUP_SRC_FILTER;
 SYSCTL_ULONG(_net_inet_ip_mcast, OID_AUTO, maxgrpsrc,
     CTLFLAG_RWTUN, &in_mcast_maxgrpsrc, 0,
     "Max source filters per group");
 
 static u_long in_mcast_maxsocksrc = IP_MAX_SOCK_SRC_FILTER;
 SYSCTL_ULONG(_net_inet_ip_mcast, OID_AUTO, maxsocksrc,
     CTLFLAG_RWTUN, &in_mcast_maxsocksrc, 0,
     "Max source filters per socket");
 
 int in_mcast_loop = IP_DEFAULT_MULTICAST_LOOP;
 SYSCTL_INT(_net_inet_ip_mcast, OID_AUTO, loop, CTLFLAG_RWTUN,
     &in_mcast_loop, 0, "Loopback multicast datagrams by default");
 
 static SYSCTL_NODE(_net_inet_ip_mcast, OID_AUTO, filters,
     CTLFLAG_RD | CTLFLAG_MPSAFE, sysctl_ip_mcast_filters,
     "Per-interface stack-wide source filters");
 
 #ifdef KTR
 /*
  * Inline function which wraps assertions for a valid ifp.
  * The ifnet layer will set the ifma's ifp pointer to NULL if the ifp
  * is detached.
  */
 static int __inline
 inm_is_ifp_detached(const struct in_multi *inm)
 {
 	struct ifnet *ifp;
 
 	KASSERT(inm->inm_ifma != NULL, ("%s: no ifma", __func__));
 	ifp = inm->inm_ifma->ifma_ifp;
 	if (ifp != NULL) {
 		/*
 		 * Sanity check that netinet's notion of ifp is the
 		 * same as net's.
 		 */
 		KASSERT(inm->inm_ifp == ifp, ("%s: bad ifp", __func__));
 	}
 
 	return (ifp == NULL);
 }
 #endif
 
 static struct grouptask free_gtask;
 static struct in_multi_head inm_free_list;
 static void inm_release_task(void *arg __unused);
 static void inm_init(void)
 {
 	SLIST_INIT(&inm_free_list);
 	taskqgroup_config_gtask_init(NULL, &free_gtask, inm_release_task, "inm release task");
 }
 
 #ifdef EARLY_AP_STARTUP
 SYSINIT(inm_init, SI_SUB_SMP + 1, SI_ORDER_FIRST,
 	inm_init, NULL);
 #else
 SYSINIT(inm_init, SI_SUB_ROOT_CONF - 1, SI_ORDER_FIRST,
 	inm_init, NULL);
 #endif
 
 
 void
 inm_release_list_deferred(struct in_multi_head *inmh)
 {
 
 	if (SLIST_EMPTY(inmh))
 		return;
 	mtx_lock(&in_multi_free_mtx);
 	SLIST_CONCAT(&inm_free_list, inmh, in_multi, inm_nrele);
 	mtx_unlock(&in_multi_free_mtx);
 	GROUPTASK_ENQUEUE(&free_gtask);
 }
 
 void
 inm_disconnect(struct in_multi *inm)
 {
 	struct ifnet *ifp;
 	struct ifmultiaddr *ifma, *ll_ifma;
 
 	ifp = inm->inm_ifp;
 	IF_ADDR_WLOCK_ASSERT(ifp);
 	ifma = inm->inm_ifma;
 
 	if_ref(ifp);
 	if (ifma->ifma_flags & IFMA_F_ENQUEUED) {
 		CK_STAILQ_REMOVE(&ifp->if_multiaddrs, ifma, ifmultiaddr, ifma_link);
 		ifma->ifma_flags &= ~IFMA_F_ENQUEUED;
 	}
 	MCDPRINTF("removed ifma: %p from %s\n", ifma, ifp->if_xname);
 	if ((ll_ifma = ifma->ifma_llifma) != NULL) {
 		MPASS(ifma != ll_ifma);
 		ifma->ifma_llifma = NULL;
 		MPASS(ll_ifma->ifma_llifma == NULL);
 		MPASS(ll_ifma->ifma_ifp == ifp);
 		if (--ll_ifma->ifma_refcount == 0) {
 			if (ll_ifma->ifma_flags & IFMA_F_ENQUEUED) {
 				CK_STAILQ_REMOVE(&ifp->if_multiaddrs, ll_ifma, ifmultiaddr, ifma_link);
 				ll_ifma->ifma_flags &= ~IFMA_F_ENQUEUED;
 			}
 			MCDPRINTF("removed ll_ifma: %p from %s\n", ll_ifma, ifp->if_xname);
 			if_freemulti(ll_ifma);
 			ifma_restart = true;
 		}
 	}
 }
 
 void
 inm_release_deferred(struct in_multi *inm)
 {
 	struct in_multi_head tmp;
 
 	IN_MULTI_LIST_LOCK_ASSERT();
 	MPASS(inm->inm_refcount > 0);
 	if (--inm->inm_refcount == 0) {
 		SLIST_INIT(&tmp);
 		inm_disconnect(inm);
 		inm->inm_ifma->ifma_protospec = NULL;
 		SLIST_INSERT_HEAD(&tmp, inm, inm_nrele);
 		inm_release_list_deferred(&tmp);
 	}
 }
 
 static void
 inm_release_task(void *arg __unused)
 {
 	struct in_multi_head inm_free_tmp;
 	struct in_multi *inm, *tinm;
 
 	SLIST_INIT(&inm_free_tmp);
 	mtx_lock(&in_multi_free_mtx);
 	SLIST_CONCAT(&inm_free_tmp, &inm_free_list, in_multi, inm_nrele);
 	mtx_unlock(&in_multi_free_mtx);
 	IN_MULTI_LOCK();
 	SLIST_FOREACH_SAFE(inm, &inm_free_tmp, inm_nrele, tinm) {
 		SLIST_REMOVE_HEAD(&inm_free_tmp, inm_nrele);
 		MPASS(inm);
 		inm_release(inm);
 	}
 	IN_MULTI_UNLOCK();
 }
 
 /*
  * Initialize an in_mfilter structure to a known state at t0, t1
  * with an empty source filter list.
  */
 static __inline void
 imf_init(struct in_mfilter *imf, const int st0, const int st1)
 {
 	memset(imf, 0, sizeof(struct in_mfilter));
 	RB_INIT(&imf->imf_sources);
 	imf->imf_st[0] = st0;
 	imf->imf_st[1] = st1;
 }
 
 /*
  * Function for looking up an in_multi record for an IPv4 multicast address
  * on a given interface. ifp must be valid. If no record found, return NULL.
  * The IN_MULTI_LIST_LOCK and IF_ADDR_LOCK on ifp must be held.
  */
 struct in_multi *
 inm_lookup_locked(struct ifnet *ifp, const struct in_addr ina)
 {
 	struct ifmultiaddr *ifma;
 	struct in_multi *inm;
 
 	IN_MULTI_LIST_LOCK_ASSERT();
 	IF_ADDR_LOCK_ASSERT(ifp);
 
 	inm = NULL;
 	CK_STAILQ_FOREACH(ifma, &((ifp)->if_multiaddrs), ifma_link) {
 		if (ifma->ifma_addr->sa_family != AF_INET ||
 			ifma->ifma_protospec == NULL)
 			continue;
 		inm = (struct in_multi *)ifma->ifma_protospec;
 		if (inm->inm_addr.s_addr == ina.s_addr)
 			break;
 		inm = NULL;
 	}
 	return (inm);
 }
 
 /*
  * Wrapper for inm_lookup_locked().
  * The IF_ADDR_LOCK will be taken on ifp and released on return.
  */
 struct in_multi *
 inm_lookup(struct ifnet *ifp, const struct in_addr ina)
 {
 	struct epoch_tracker et;
 	struct in_multi *inm;
 
 	IN_MULTI_LIST_LOCK_ASSERT();
 	NET_EPOCH_ENTER(et);
 	inm = inm_lookup_locked(ifp, ina);
 	NET_EPOCH_EXIT(et);
 
 	return (inm);
 }
 
 /*
  * Resize the ip_moptions vector to the next power-of-two minus 1.
  * May be called with locks held; do not sleep.
  */
 static int
 imo_grow(struct ip_moptions *imo)
 {
 	struct in_multi		**nmships;
 	struct in_multi		**omships;
 	struct in_mfilter	 *nmfilters;
 	struct in_mfilter	 *omfilters;
 	size_t			  idx;
 	size_t			  newmax;
 	size_t			  oldmax;
 
 	nmships = NULL;
 	nmfilters = NULL;
 	omships = imo->imo_membership;
 	omfilters = imo->imo_mfilters;
 	oldmax = imo->imo_max_memberships;
 	newmax = ((oldmax + 1) * 2) - 1;
 
 	if (newmax <= IP_MAX_MEMBERSHIPS) {
 		nmships = (struct in_multi **)realloc(omships,
 		    sizeof(struct in_multi *) * newmax, M_IPMOPTS, M_NOWAIT);
 		nmfilters = (struct in_mfilter *)realloc(omfilters,
 		    sizeof(struct in_mfilter) * newmax, M_INMFILTER, M_NOWAIT);
 		if (nmships != NULL && nmfilters != NULL) {
 			/* Initialize newly allocated source filter heads. */
 			for (idx = oldmax; idx < newmax; idx++) {
 				imf_init(&nmfilters[idx], MCAST_UNDEFINED,
 				    MCAST_EXCLUDE);
 			}
 			imo->imo_max_memberships = newmax;
 			imo->imo_membership = nmships;
 			imo->imo_mfilters = nmfilters;
 		}
 	}
 
 	if (nmships == NULL || nmfilters == NULL) {
 		if (nmships != NULL)
 			free(nmships, M_IPMOPTS);
 		if (nmfilters != NULL)
 			free(nmfilters, M_INMFILTER);
 		return (ETOOMANYREFS);
 	}
 
 	return (0);
 }
 
 /*
  * Find an IPv4 multicast group entry for this ip_moptions instance
  * which matches the specified group, and optionally an interface.
  * Return its index into the array, or -1 if not found.
  */
 static size_t
 imo_match_group(const struct ip_moptions *imo, const struct ifnet *ifp,
     const struct sockaddr *group)
 {
 	const struct sockaddr_in *gsin;
 	struct in_multi	**pinm;
 	int		  idx;
 	int		  nmships;
 
 	gsin = (const struct sockaddr_in *)group;
 
 	/* The imo_membership array may be lazy allocated. */
 	if (imo->imo_membership == NULL || imo->imo_num_memberships == 0)
 		return (-1);
 
 	nmships = imo->imo_num_memberships;
 	pinm = &imo->imo_membership[0];
 	for (idx = 0; idx < nmships; idx++, pinm++) {
 		if (*pinm == NULL)
 			continue;
 		if ((ifp == NULL || ((*pinm)->inm_ifp == ifp)) &&
 		    in_hosteq((*pinm)->inm_addr, gsin->sin_addr)) {
 			break;
 		}
 	}
 	if (idx >= nmships)
 		idx = -1;
 
 	return (idx);
 }
 
 /*
  * Find an IPv4 multicast source entry for this imo which matches
  * the given group index for this socket, and source address.
  *
  * NOTE: This does not check if the entry is in-mode, merely if
  * it exists, which may not be the desired behaviour.
  */
 static struct in_msource *
 imo_match_source(const struct ip_moptions *imo, const size_t gidx,
     const struct sockaddr *src)
 {
 	struct ip_msource	 find;
 	struct in_mfilter	*imf;
 	struct ip_msource	*ims;
 	const sockunion_t	*psa;
 
 	KASSERT(src->sa_family == AF_INET, ("%s: !AF_INET", __func__));
 	KASSERT(gidx != -1 && gidx < imo->imo_num_memberships,
 	    ("%s: invalid index %d\n", __func__, (int)gidx));
 
 	/* The imo_mfilters array may be lazy allocated. */
 	if (imo->imo_mfilters == NULL)
 		return (NULL);
 	imf = &imo->imo_mfilters[gidx];
 
 	/* Source trees are keyed in host byte order. */
 	psa = (const sockunion_t *)src;
 	find.ims_haddr = ntohl(psa->sin.sin_addr.s_addr);
 	ims = RB_FIND(ip_msource_tree, &imf->imf_sources, &find);
 
 	return ((struct in_msource *)ims);
 }
 
 /*
  * Perform filtering for multicast datagrams on a socket by group and source.
  *
  * Returns 0 if a datagram should be allowed through, or various error codes
  * if the socket was not a member of the group, or the source was muted, etc.
  */
 int
 imo_multi_filter(const struct ip_moptions *imo, const struct ifnet *ifp,
     const struct sockaddr *group, const struct sockaddr *src)
 {
 	size_t gidx;
 	struct in_msource *ims;
 	int mode;
 
 	KASSERT(ifp != NULL, ("%s: null ifp", __func__));
 
 	gidx = imo_match_group(imo, ifp, group);
 	if (gidx == -1)
 		return (MCAST_NOTGMEMBER);
 
 	/*
 	 * Check if the source was included in an (S,G) join.
 	 * Allow reception on exclusive memberships by default,
 	 * reject reception on inclusive memberships by default.
 	 * Exclude source only if an in-mode exclude filter exists.
 	 * Include source only if an in-mode include filter exists.
 	 * NOTE: We are comparing group state here at IGMP t1 (now)
 	 * with socket-layer t0 (since last downcall).
 	 */
 	mode = imo->imo_mfilters[gidx].imf_st[1];
 	ims = imo_match_source(imo, gidx, src);
 
 	if ((ims == NULL && mode == MCAST_INCLUDE) ||
 	    (ims != NULL && ims->imsl_st[0] != mode))
 		return (MCAST_NOTSMEMBER);
 
 	return (MCAST_PASS);
 }
 
 /*
  * Find and return a reference to an in_multi record for (ifp, group),
  * and bump its reference count.
  * If one does not exist, try to allocate it, and update link-layer multicast
  * filters on ifp to listen for group.
  * Assumes the IN_MULTI lock is held across the call.
  * Return 0 if successful, otherwise return an appropriate error code.
  */
 static int
 in_getmulti(struct ifnet *ifp, const struct in_addr *group,
     struct in_multi **pinm)
 {
 	struct sockaddr_in	 gsin;
 	struct ifmultiaddr	*ifma;
 	struct in_ifinfo	*ii;
 	struct in_multi		*inm;
 	int error;
 
 	IN_MULTI_LOCK_ASSERT();
 
 	ii = (struct in_ifinfo *)ifp->if_afdata[AF_INET];
 	IN_MULTI_LIST_LOCK();
 	inm = inm_lookup(ifp, *group);
 	if (inm != NULL) {
 		/*
 		 * If we already joined this group, just bump the
 		 * refcount and return it.
 		 */
 		KASSERT(inm->inm_refcount >= 1,
 		    ("%s: bad refcount %d", __func__, inm->inm_refcount));
 		inm_acquire_locked(inm);
 		*pinm = inm;
 	}
 	IN_MULTI_LIST_UNLOCK();
 	if (inm != NULL)
 		return (0);
 	
 	memset(&gsin, 0, sizeof(gsin));
 	gsin.sin_family = AF_INET;
 	gsin.sin_len = sizeof(struct sockaddr_in);
 	gsin.sin_addr = *group;
 
 	/*
 	 * Check if a link-layer group is already associated
 	 * with this network-layer group on the given ifnet.
 	 */
 	error = if_addmulti(ifp, (struct sockaddr *)&gsin, &ifma);
 	if (error != 0)
 		return (error);
 
 	/* XXX ifma_protospec must be covered by IF_ADDR_LOCK */
 	IN_MULTI_LIST_LOCK();
 	IF_ADDR_WLOCK(ifp);
 
 	/*
 	 * If something other than netinet is occupying the link-layer
 	 * group, print a meaningful error message and back out of
 	 * the allocation.
 	 * Otherwise, bump the refcount on the existing network-layer
 	 * group association and return it.
 	 */
 	if (ifma->ifma_protospec != NULL) {
 		inm = (struct in_multi *)ifma->ifma_protospec;
 #ifdef INVARIANTS
 		KASSERT(ifma->ifma_addr != NULL, ("%s: no ifma_addr",
 		    __func__));
 		KASSERT(ifma->ifma_addr->sa_family == AF_INET,
 		    ("%s: ifma not AF_INET", __func__));
 		KASSERT(inm != NULL, ("%s: no ifma_protospec", __func__));
 		if (inm->inm_ifma != ifma || inm->inm_ifp != ifp ||
 		    !in_hosteq(inm->inm_addr, *group)) {
 			char addrbuf[INET_ADDRSTRLEN];
 
 			panic("%s: ifma %p is inconsistent with %p (%s)",
 			    __func__, ifma, inm, inet_ntoa_r(*group, addrbuf));
 		}
 #endif
 		inm_acquire_locked(inm);
 		*pinm = inm;
 		goto out_locked;
 	}
 
 	IF_ADDR_WLOCK_ASSERT(ifp);
 
 	/*
 	 * A new in_multi record is needed; allocate and initialize it.
 	 * We DO NOT perform an IGMP join as the in_ layer may need to
 	 * push an initial source list down to IGMP to support SSM.
 	 *
 	 * The initial source filter state is INCLUDE, {} as per the RFC.
 	 */
 	inm = malloc(sizeof(*inm), M_IPMADDR, M_NOWAIT | M_ZERO);
 	if (inm == NULL) {
 		IF_ADDR_WUNLOCK(ifp);
 		IN_MULTI_LIST_UNLOCK();
 		if_delmulti_ifma(ifma);
 		return (ENOMEM);
 	}
 	inm->inm_addr = *group;
 	inm->inm_ifp = ifp;
 	inm->inm_igi = ii->ii_igmp;
 	inm->inm_ifma = ifma;
 	inm->inm_refcount = 1;
 	inm->inm_state = IGMP_NOT_MEMBER;
 	mbufq_init(&inm->inm_scq, IGMP_MAX_STATE_CHANGES);
 	inm->inm_st[0].iss_fmode = MCAST_UNDEFINED;
 	inm->inm_st[1].iss_fmode = MCAST_UNDEFINED;
 	RB_INIT(&inm->inm_srcs);
 
 	ifma->ifma_protospec = inm;
 
 	*pinm = inm;
  out_locked:
 	IF_ADDR_WUNLOCK(ifp);
 	IN_MULTI_LIST_UNLOCK();
 	return (0);
 }
 
 /*
  * Drop a reference to an in_multi record.
  *
  * If the refcount drops to 0, free the in_multi record and
  * delete the underlying link-layer membership.
  */
 static void
 inm_release(struct in_multi *inm)
 {
 	struct ifmultiaddr *ifma;
 	struct ifnet *ifp;
 
 	CTR2(KTR_IGMPV3, "%s: refcount is %d", __func__, inm->inm_refcount);
 	MPASS(inm->inm_refcount == 0);
 	CTR2(KTR_IGMPV3, "%s: freeing inm %p", __func__, inm);
 
 	ifma = inm->inm_ifma;
 	ifp = inm->inm_ifp;
 
 	/* XXX this access is not covered by IF_ADDR_LOCK */
 	CTR2(KTR_IGMPV3, "%s: purging ifma %p", __func__, ifma);
 	if (ifp != NULL) {
 		CURVNET_SET(ifp->if_vnet);
 		inm_purge(inm);
 		free(inm, M_IPMADDR);
 		if_delmulti_ifma_flags(ifma, 1);
 		CURVNET_RESTORE();
 		if_rele(ifp);
 	} else {
 		inm_purge(inm);
 		free(inm, M_IPMADDR);
 		if_delmulti_ifma_flags(ifma, 1);
 	}
 }
 
 /*
  * Clear recorded source entries for a group.
  * Used by the IGMP code. Caller must hold the IN_MULTI lock.
  * FIXME: Should reap.
  */
 void
 inm_clear_recorded(struct in_multi *inm)
 {
 	struct ip_msource	*ims;
 
 	IN_MULTI_LIST_LOCK_ASSERT();
 
 	RB_FOREACH(ims, ip_msource_tree, &inm->inm_srcs) {
 		if (ims->ims_stp) {
 			ims->ims_stp = 0;
 			--inm->inm_st[1].iss_rec;
 		}
 	}
 	KASSERT(inm->inm_st[1].iss_rec == 0,
 	    ("%s: iss_rec %d not 0", __func__, inm->inm_st[1].iss_rec));
 }
 
 /*
  * Record a source as pending for a Source-Group IGMPv3 query.
  * This lives here as it modifies the shared tree.
  *
  * inm is the group descriptor.
  * naddr is the address of the source to record in network-byte order.
  *
  * If the net.inet.igmp.sgalloc sysctl is non-zero, we will
  * lazy-allocate a source node in response to an SG query.
  * Otherwise, no allocation is performed. This saves some memory
  * with the trade-off that the source will not be reported to the
  * router if joined in the window between the query response and
  * the group actually being joined on the local host.
  *
  * VIMAGE: XXX: Currently the igmp_sgalloc feature has been removed.
  * This turns off the allocation of a recorded source entry if
  * the group has not been joined.
  *
  * Return 0 if the source didn't exist or was already marked as recorded.
  * Return 1 if the source was marked as recorded by this function.
  * Return <0 if any error occurred (negated errno code).
  */
 int
 inm_record_source(struct in_multi *inm, const in_addr_t naddr)
 {
 	struct ip_msource	 find;
 	struct ip_msource	*ims, *nims;
 
 	IN_MULTI_LIST_LOCK_ASSERT();
 
 	find.ims_haddr = ntohl(naddr);
 	ims = RB_FIND(ip_msource_tree, &inm->inm_srcs, &find);
 	if (ims && ims->ims_stp)
 		return (0);
 	if (ims == NULL) {
 		if (inm->inm_nsrc == in_mcast_maxgrpsrc)
 			return (-ENOSPC);
 		nims = malloc(sizeof(struct ip_msource), M_IPMSOURCE,
 		    M_NOWAIT | M_ZERO);
 		if (nims == NULL)
 			return (-ENOMEM);
 		nims->ims_haddr = find.ims_haddr;
 		RB_INSERT(ip_msource_tree, &inm->inm_srcs, nims);
 		++inm->inm_nsrc;
 		ims = nims;
 	}
 
 	/*
 	 * Mark the source as recorded and update the recorded
 	 * source count.
 	 */
 	++ims->ims_stp;
 	++inm->inm_st[1].iss_rec;
 
 	return (1);
 }
 
 /*
  * Return a pointer to an in_msource owned by an in_mfilter,
  * given its source address.
  * Lazy-allocate if needed. If this is a new entry its filter state is
  * undefined at t0.
  *
  * imf is the filter set being modified.
  * haddr is the source address in *host* byte-order.
  *
  * SMPng: May be called with locks held; malloc must not block.
  */
 static int
 imf_get_source(struct in_mfilter *imf, const struct sockaddr_in *psin,
     struct in_msource **plims)
 {
 	struct ip_msource	 find;
 	struct ip_msource	*ims, *nims;
 	struct in_msource	*lims;
 	int			 error;
 
 	error = 0;
 	ims = NULL;
 	lims = NULL;
 
 	/* key is host byte order */
 	find.ims_haddr = ntohl(psin->sin_addr.s_addr);
 	ims = RB_FIND(ip_msource_tree, &imf->imf_sources, &find);
 	lims = (struct in_msource *)ims;
 	if (lims == NULL) {
 		if (imf->imf_nsrc == in_mcast_maxsocksrc)
 			return (ENOSPC);
 		nims = malloc(sizeof(struct in_msource), M_INMFILTER,
 		    M_NOWAIT | M_ZERO);
 		if (nims == NULL)
 			return (ENOMEM);
 		lims = (struct in_msource *)nims;
 		lims->ims_haddr = find.ims_haddr;
 		lims->imsl_st[0] = MCAST_UNDEFINED;
 		RB_INSERT(ip_msource_tree, &imf->imf_sources, nims);
 		++imf->imf_nsrc;
 	}
 
 	*plims = lims;
 
 	return (error);
 }
 
 /*
  * Graft a source entry into an existing socket-layer filter set,
  * maintaining any required invariants and checking allocations.
  *
  * The source is marked as being in the new filter mode at t1.
  *
  * Return the pointer to the new node, otherwise return NULL.
  */
 static struct in_msource *
 imf_graft(struct in_mfilter *imf, const uint8_t st1,
     const struct sockaddr_in *psin)
 {
 	struct ip_msource	*nims;
 	struct in_msource	*lims;
 
 	nims = malloc(sizeof(struct in_msource), M_INMFILTER,
 	    M_NOWAIT | M_ZERO);
 	if (nims == NULL)
 		return (NULL);
 	lims = (struct in_msource *)nims;
 	lims->ims_haddr = ntohl(psin->sin_addr.s_addr);
 	lims->imsl_st[0] = MCAST_UNDEFINED;
 	lims->imsl_st[1] = st1;
 	RB_INSERT(ip_msource_tree, &imf->imf_sources, nims);
 	++imf->imf_nsrc;
 
 	return (lims);
 }
 
 /*
  * Prune a source entry from an existing socket-layer filter set,
  * maintaining any required invariants and checking allocations.
  *
  * The source is marked as being left at t1, it is not freed.
  *
  * Return 0 if no error occurred, otherwise return an errno value.
  */
 static int
 imf_prune(struct in_mfilter *imf, const struct sockaddr_in *psin)
 {
 	struct ip_msource	 find;
 	struct ip_msource	*ims;
 	struct in_msource	*lims;
 
 	/* key is host byte order */
 	find.ims_haddr = ntohl(psin->sin_addr.s_addr);
 	ims = RB_FIND(ip_msource_tree, &imf->imf_sources, &find);
 	if (ims == NULL)
 		return (ENOENT);
 	lims = (struct in_msource *)ims;
 	lims->imsl_st[1] = MCAST_UNDEFINED;
 	return (0);
 }
 
 /*
  * Revert socket-layer filter set deltas at t1 to t0 state.
  */
 static void
 imf_rollback(struct in_mfilter *imf)
 {
 	struct ip_msource	*ims, *tims;
 	struct in_msource	*lims;
 
 	RB_FOREACH_SAFE(ims, ip_msource_tree, &imf->imf_sources, tims) {
 		lims = (struct in_msource *)ims;
 		if (lims->imsl_st[0] == lims->imsl_st[1]) {
 			/* no change at t1 */
 			continue;
 		} else if (lims->imsl_st[0] != MCAST_UNDEFINED) {
 			/* revert change to existing source at t1 */
 			lims->imsl_st[1] = lims->imsl_st[0];
 		} else {
 			/* revert source added t1 */
 			CTR2(KTR_IGMPV3, "%s: free ims %p", __func__, ims);
 			RB_REMOVE(ip_msource_tree, &imf->imf_sources, ims);
 			free(ims, M_INMFILTER);
 			imf->imf_nsrc--;
 		}
 	}
 	imf->imf_st[1] = imf->imf_st[0];
 }
 
 /*
  * Mark socket-layer filter set as INCLUDE {} at t1.
  */
 static void
 imf_leave(struct in_mfilter *imf)
 {
 	struct ip_msource	*ims;
 	struct in_msource	*lims;
 
 	RB_FOREACH(ims, ip_msource_tree, &imf->imf_sources) {
 		lims = (struct in_msource *)ims;
 		lims->imsl_st[1] = MCAST_UNDEFINED;
 	}
 	imf->imf_st[1] = MCAST_INCLUDE;
 }
 
 /*
  * Mark socket-layer filter set deltas as committed.
  */
 static void
 imf_commit(struct in_mfilter *imf)
 {
 	struct ip_msource	*ims;
 	struct in_msource	*lims;
 
 	RB_FOREACH(ims, ip_msource_tree, &imf->imf_sources) {
 		lims = (struct in_msource *)ims;
 		lims->imsl_st[0] = lims->imsl_st[1];
 	}
 	imf->imf_st[0] = imf->imf_st[1];
 }
 
 /*
  * Reap unreferenced sources from socket-layer filter set.
  */
 static void
 imf_reap(struct in_mfilter *imf)
 {
 	struct ip_msource	*ims, *tims;
 	struct in_msource	*lims;
 
 	RB_FOREACH_SAFE(ims, ip_msource_tree, &imf->imf_sources, tims) {
 		lims = (struct in_msource *)ims;
 		if ((lims->imsl_st[0] == MCAST_UNDEFINED) &&
 		    (lims->imsl_st[1] == MCAST_UNDEFINED)) {
 			CTR2(KTR_IGMPV3, "%s: free lims %p", __func__, ims);
 			RB_REMOVE(ip_msource_tree, &imf->imf_sources, ims);
 			free(ims, M_INMFILTER);
 			imf->imf_nsrc--;
 		}
 	}
 }
 
 /*
  * Purge socket-layer filter set.
  */
 static void
 imf_purge(struct in_mfilter *imf)
 {
 	struct ip_msource	*ims, *tims;
 
 	RB_FOREACH_SAFE(ims, ip_msource_tree, &imf->imf_sources, tims) {
 		CTR2(KTR_IGMPV3, "%s: free ims %p", __func__, ims);
 		RB_REMOVE(ip_msource_tree, &imf->imf_sources, ims);
 		free(ims, M_INMFILTER);
 		imf->imf_nsrc--;
 	}
 	imf->imf_st[0] = imf->imf_st[1] = MCAST_UNDEFINED;
 	KASSERT(RB_EMPTY(&imf->imf_sources),
 	    ("%s: imf_sources not empty", __func__));
 }
 
 /*
  * Look up a source filter entry for a multicast group.
  *
  * inm is the group descriptor to work with.
  * haddr is the host-byte-order IPv4 address to look up.
  * noalloc may be non-zero to suppress allocation of sources.
  * *pims will be set to the address of the retrieved or allocated source.
  *
  * SMPng: NOTE: may be called with locks held.
  * Return 0 if successful, otherwise return a non-zero error code.
  */
 static int
 inm_get_source(struct in_multi *inm, const in_addr_t haddr,
     const int noalloc, struct ip_msource **pims)
 {
 	struct ip_msource	 find;
 	struct ip_msource	*ims, *nims;
 
 	find.ims_haddr = haddr;
 	ims = RB_FIND(ip_msource_tree, &inm->inm_srcs, &find);
 	if (ims == NULL && !noalloc) {
 		if (inm->inm_nsrc == in_mcast_maxgrpsrc)
 			return (ENOSPC);
 		nims = malloc(sizeof(struct ip_msource), M_IPMSOURCE,
 		    M_NOWAIT | M_ZERO);
 		if (nims == NULL)
 			return (ENOMEM);
 		nims->ims_haddr = haddr;
 		RB_INSERT(ip_msource_tree, &inm->inm_srcs, nims);
 		++inm->inm_nsrc;
 		ims = nims;
 #ifdef KTR
 		CTR3(KTR_IGMPV3, "%s: allocated 0x%08x as %p", __func__,
 		    haddr, ims);
 #endif
 	}
 
 	*pims = ims;
 	return (0);
 }
 
 /*
  * Merge socket-layer source into IGMP-layer source.
  * If rollback is non-zero, perform the inverse of the merge.
  */
 static void
 ims_merge(struct ip_msource *ims, const struct in_msource *lims,
     const int rollback)
 {
 	int n = rollback ? -1 : 1;
 
 	if (lims->imsl_st[0] == MCAST_EXCLUDE) {
 		CTR3(KTR_IGMPV3, "%s: t1 ex -= %d on 0x%08x",
 		    __func__, n, ims->ims_haddr);
 		ims->ims_st[1].ex -= n;
 	} else if (lims->imsl_st[0] == MCAST_INCLUDE) {
 		CTR3(KTR_IGMPV3, "%s: t1 in -= %d on 0x%08x",
 		    __func__, n, ims->ims_haddr);
 		ims->ims_st[1].in -= n;
 	}
 
 	if (lims->imsl_st[1] == MCAST_EXCLUDE) {
 		CTR3(KTR_IGMPV3, "%s: t1 ex += %d on 0x%08x",
 		    __func__, n, ims->ims_haddr);
 		ims->ims_st[1].ex += n;
 	} else if (lims->imsl_st[1] == MCAST_INCLUDE) {
 		CTR3(KTR_IGMPV3, "%s: t1 in += %d on 0x%08x",
 		    __func__, n, ims->ims_haddr);
 		ims->ims_st[1].in += n;
 	}
 }
 
 /*
  * Atomically update the global in_multi state, when a membership's
  * filter list is being updated in any way.
  *
  * imf is the per-inpcb-membership group filter pointer.
  * A fake imf may be passed for in-kernel consumers.
  *
  * XXX This is a candidate for a set-symmetric-difference style loop
  * which would eliminate the repeated lookup from root of ims nodes,
  * as they share the same key space.
  *
  * If any error occurred this function will back out of refcounts
  * and return a non-zero value.
  */
 static int
 inm_merge(struct in_multi *inm, /*const*/ struct in_mfilter *imf)
 {
 	struct ip_msource	*ims, *nims;
 	struct in_msource	*lims;
 	int			 schanged, error;
 	int			 nsrc0, nsrc1;
 
 	schanged = 0;
 	error = 0;
 	nsrc1 = nsrc0 = 0;
 	IN_MULTI_LIST_LOCK_ASSERT();
 
 	/*
 	 * Update the source filters first, as this may fail.
 	 * Maintain count of in-mode filters at t0, t1. These are
 	 * used to work out if we transition into ASM mode or not.
 	 * Maintain a count of source filters whose state was
 	 * actually modified by this operation.
 	 */
 	RB_FOREACH(ims, ip_msource_tree, &imf->imf_sources) {
 		lims = (struct in_msource *)ims;
 		if (lims->imsl_st[0] == imf->imf_st[0]) nsrc0++;
 		if (lims->imsl_st[1] == imf->imf_st[1]) nsrc1++;
 		if (lims->imsl_st[0] == lims->imsl_st[1]) continue;
 		error = inm_get_source(inm, lims->ims_haddr, 0, &nims);
 		++schanged;
 		if (error)
 			break;
 		ims_merge(nims, lims, 0);
 	}
 	if (error) {
 		struct ip_msource *bims;
 
 		RB_FOREACH_REVERSE_FROM(ims, ip_msource_tree, nims) {
 			lims = (struct in_msource *)ims;
 			if (lims->imsl_st[0] == lims->imsl_st[1])
 				continue;
 			(void)inm_get_source(inm, lims->ims_haddr, 1, &bims);
 			if (bims == NULL)
 				continue;
 			ims_merge(bims, lims, 1);
 		}
 		goto out_reap;
 	}
 
 	CTR3(KTR_IGMPV3, "%s: imf filters in-mode: %d at t0, %d at t1",
 	    __func__, nsrc0, nsrc1);
 
 	/* Handle transition between INCLUDE {n} and INCLUDE {} on socket. */
 	if (imf->imf_st[0] == imf->imf_st[1] &&
 	    imf->imf_st[1] == MCAST_INCLUDE) {
 		if (nsrc1 == 0) {
 			CTR1(KTR_IGMPV3, "%s: --in on inm at t1", __func__);
 			--inm->inm_st[1].iss_in;
 		}
 	}
 
 	/* Handle filter mode transition on socket. */
 	if (imf->imf_st[0] != imf->imf_st[1]) {
 		CTR3(KTR_IGMPV3, "%s: imf transition %d to %d",
 		    __func__, imf->imf_st[0], imf->imf_st[1]);
 
 		if (imf->imf_st[0] == MCAST_EXCLUDE) {
 			CTR1(KTR_IGMPV3, "%s: --ex on inm at t1", __func__);
 			--inm->inm_st[1].iss_ex;
 		} else if (imf->imf_st[0] == MCAST_INCLUDE) {
 			CTR1(KTR_IGMPV3, "%s: --in on inm at t1", __func__);
 			--inm->inm_st[1].iss_in;
 		}
 
 		if (imf->imf_st[1] == MCAST_EXCLUDE) {
 			CTR1(KTR_IGMPV3, "%s: ex++ on inm at t1", __func__);
 			inm->inm_st[1].iss_ex++;
 		} else if (imf->imf_st[1] == MCAST_INCLUDE && nsrc1 > 0) {
 			CTR1(KTR_IGMPV3, "%s: in++ on inm at t1", __func__);
 			inm->inm_st[1].iss_in++;
 		}
 	}
 
 	/*
 	 * Track inm filter state in terms of listener counts.
 	 * If there are any exclusive listeners, stack-wide
 	 * membership is exclusive.
 	 * Otherwise, if only inclusive listeners, stack-wide is inclusive.
 	 * If no listeners remain, state is undefined at t1,
 	 * and the IGMP lifecycle for this group should finish.
 	 */
 	if (inm->inm_st[1].iss_ex > 0) {
 		CTR1(KTR_IGMPV3, "%s: transition to EX", __func__);
 		inm->inm_st[1].iss_fmode = MCAST_EXCLUDE;
 	} else if (inm->inm_st[1].iss_in > 0) {
 		CTR1(KTR_IGMPV3, "%s: transition to IN", __func__);
 		inm->inm_st[1].iss_fmode = MCAST_INCLUDE;
 	} else {
 		CTR1(KTR_IGMPV3, "%s: transition to UNDEF", __func__);
 		inm->inm_st[1].iss_fmode = MCAST_UNDEFINED;
 	}
 
 	/* Decrement ASM listener count on transition out of ASM mode. */
 	if (imf->imf_st[0] == MCAST_EXCLUDE && nsrc0 == 0) {
 		if ((imf->imf_st[1] != MCAST_EXCLUDE) ||
 		    (imf->imf_st[1] == MCAST_EXCLUDE && nsrc1 > 0)) {
 			CTR1(KTR_IGMPV3, "%s: --asm on inm at t1", __func__);
 			--inm->inm_st[1].iss_asm;
 		}
 	}
 
 	/* Increment ASM listener count on transition to ASM mode. */
 	if (imf->imf_st[1] == MCAST_EXCLUDE && nsrc1 == 0) {
 		CTR1(KTR_IGMPV3, "%s: asm++ on inm at t1", __func__);
 		inm->inm_st[1].iss_asm++;
 	}
 
 	CTR3(KTR_IGMPV3, "%s: merged imf %p to inm %p", __func__, imf, inm);
 	inm_print(inm);
 
 out_reap:
 	if (schanged > 0) {
 		CTR1(KTR_IGMPV3, "%s: sources changed; reaping", __func__);
 		inm_reap(inm);
 	}
 	return (error);
 }
 
 /*
  * Mark an in_multi's filter set deltas as committed.
  * Called by IGMP after a state change has been enqueued.
  */
 void
 inm_commit(struct in_multi *inm)
 {
 	struct ip_msource	*ims;
 
 	CTR2(KTR_IGMPV3, "%s: commit inm %p", __func__, inm);
 	CTR1(KTR_IGMPV3, "%s: pre commit:", __func__);
 	inm_print(inm);
 
 	RB_FOREACH(ims, ip_msource_tree, &inm->inm_srcs) {
 		ims->ims_st[0] = ims->ims_st[1];
 	}
 	inm->inm_st[0] = inm->inm_st[1];
 }
 
 /*
  * Reap unreferenced nodes from an in_multi's filter set.
  */
 static void
 inm_reap(struct in_multi *inm)
 {
 	struct ip_msource	*ims, *tims;
 
 	RB_FOREACH_SAFE(ims, ip_msource_tree, &inm->inm_srcs, tims) {
 		if (ims->ims_st[0].ex > 0 || ims->ims_st[0].in > 0 ||
 		    ims->ims_st[1].ex > 0 || ims->ims_st[1].in > 0 ||
 		    ims->ims_stp != 0)
 			continue;
 		CTR2(KTR_IGMPV3, "%s: free ims %p", __func__, ims);
 		RB_REMOVE(ip_msource_tree, &inm->inm_srcs, ims);
 		free(ims, M_IPMSOURCE);
 		inm->inm_nsrc--;
 	}
 }
 
 /*
  * Purge all source nodes from an in_multi's filter set.
  */
 static void
 inm_purge(struct in_multi *inm)
 {
 	struct ip_msource	*ims, *tims;
 
 	RB_FOREACH_SAFE(ims, ip_msource_tree, &inm->inm_srcs, tims) {
 		CTR2(KTR_IGMPV3, "%s: free ims %p", __func__, ims);
 		RB_REMOVE(ip_msource_tree, &inm->inm_srcs, ims);
 		free(ims, M_IPMSOURCE);
 		inm->inm_nsrc--;
 	}
 }
 
 /*
  * Join a multicast group; unlocked entry point.
  *
  * SMPng: XXX: in_joingroup() is called from in_control() when Giant
  * is not held. Fortunately, ifp is unlikely to have been detached
  * at this point, so we assume it's OK to recurse.
  */
 int
 in_joingroup(struct ifnet *ifp, const struct in_addr *gina,
     /*const*/ struct in_mfilter *imf, struct in_multi **pinm)
 {
 	int error;
 
 	IN_MULTI_LOCK();
 	error = in_joingroup_locked(ifp, gina, imf, pinm);
 	IN_MULTI_UNLOCK();
 
 	return (error);
 }
 
 /*
  * Join a multicast group; real entry point.
  *
  * Only preserves atomicity at inm level.
  * NOTE: imf argument cannot be const due to sys/tree.h limitations.
  *
  * If the IGMP downcall fails, the group is not joined, and an error
  * code is returned.
  */
 int
 in_joingroup_locked(struct ifnet *ifp, const struct in_addr *gina,
     /*const*/ struct in_mfilter *imf, struct in_multi **pinm)
 {
 	struct in_mfilter	 timf;
 	struct in_multi		*inm;
 	int			 error;
 
 	IN_MULTI_LOCK_ASSERT();
 	IN_MULTI_LIST_UNLOCK_ASSERT();
 
 	CTR4(KTR_IGMPV3, "%s: join 0x%08x on %p(%s))", __func__,
 	    ntohl(gina->s_addr), ifp, ifp->if_xname);
 
 	error = 0;
 	inm = NULL;
 
 	/*
 	 * If no imf was specified (i.e. kernel consumer),
 	 * fake one up and assume it is an ASM join.
 	 */
 	if (imf == NULL) {
 		imf_init(&timf, MCAST_UNDEFINED, MCAST_EXCLUDE);
 		imf = &timf;
 	}
 
 	error = in_getmulti(ifp, gina, &inm);
 	if (error) {
 		CTR1(KTR_IGMPV3, "%s: in_getmulti() failure", __func__);
 		return (error);
 	}
 	IN_MULTI_LIST_LOCK();
 	CTR1(KTR_IGMPV3, "%s: merge inm state", __func__);
 	error = inm_merge(inm, imf);
 	if (error) {
 		CTR1(KTR_IGMPV3, "%s: failed to merge inm state", __func__);
 		goto out_inm_release;
 	}
 
 	CTR1(KTR_IGMPV3, "%s: doing igmp downcall", __func__);
 	error = igmp_change_state(inm);
 	if (error) {
 		CTR1(KTR_IGMPV3, "%s: failed to update source", __func__);
 		goto out_inm_release;
 	}
 
  out_inm_release:
 	if (error) {
 
 		CTR2(KTR_IGMPV3, "%s: dropping ref on %p", __func__, inm);
 		inm_release_deferred(inm);
 	} else {
 		*pinm = inm;
 	}
 	IN_MULTI_LIST_UNLOCK();
 
 	return (error);
 }
 
 /*
  * Leave a multicast group; unlocked entry point.
  */
 int
 in_leavegroup(struct in_multi *inm, /*const*/ struct in_mfilter *imf)
 {
 	int error;
 
 	IN_MULTI_LOCK();
 	error = in_leavegroup_locked(inm, imf);
 	IN_MULTI_UNLOCK();
 
 	return (error);
 }
 
 /*
  * Leave a multicast group; real entry point.
  * All source filters will be expunged.
  *
  * Only preserves atomicity at inm level.
  *
  * Holding the write lock for the INP which contains imf
  * is highly advisable. We can't assert for it as imf does not
  * contain a back-pointer to the owning inp.
  *
  * Note: This is not the same as inm_release(*) as this function also
  * makes a state change downcall into IGMP.
  */
 int
 in_leavegroup_locked(struct in_multi *inm, /*const*/ struct in_mfilter *imf)
 {
 	struct in_mfilter	 timf;
 	int			 error;
 
 	error = 0;
 
 	IN_MULTI_LOCK_ASSERT();
 	IN_MULTI_LIST_UNLOCK_ASSERT();
 
 	CTR5(KTR_IGMPV3, "%s: leave inm %p, 0x%08x/%s, imf %p", __func__,
 	    inm, ntohl(inm->inm_addr.s_addr),
 	    (inm_is_ifp_detached(inm) ? "null" : inm->inm_ifp->if_xname),
 	    imf);
 
 	/*
 	 * If no imf was specified (i.e. kernel consumer),
 	 * fake one up and assume it is an ASM join.
 	 */
 	if (imf == NULL) {
 		imf_init(&timf, MCAST_EXCLUDE, MCAST_UNDEFINED);
 		imf = &timf;
 	}
 
 	/*
 	 * Begin state merge transaction at IGMP layer.
 	 *
 	 * As this particular invocation should not cause any memory
 	 * to be allocated, and there is no opportunity to roll back
 	 * the transaction, it MUST NOT fail.
 	 */
 	CTR1(KTR_IGMPV3, "%s: merge inm state", __func__);
 	IN_MULTI_LIST_LOCK();
 	error = inm_merge(inm, imf);
 	KASSERT(error == 0, ("%s: failed to merge inm state", __func__));
 
 	CTR1(KTR_IGMPV3, "%s: doing igmp downcall", __func__);
 	CURVNET_SET(inm->inm_ifp->if_vnet);
 	error = igmp_change_state(inm);
 	IF_ADDR_WLOCK(inm->inm_ifp);
 	inm_release_deferred(inm);
 	IF_ADDR_WUNLOCK(inm->inm_ifp);
 	IN_MULTI_LIST_UNLOCK();
 	CURVNET_RESTORE();
 	if (error)
 		CTR1(KTR_IGMPV3, "%s: failed igmp downcall", __func__);
 
 	CTR2(KTR_IGMPV3, "%s: dropping ref on %p", __func__, inm);
 
 	return (error);
 }
 
 /*#ifndef BURN_BRIDGES*/
 /*
  * Join an IPv4 multicast group in (*,G) exclusive mode.
  * The group must be a 224.0.0.0/24 link-scope group.
  * This KPI is for legacy kernel consumers only.
  */
 struct in_multi *
 in_addmulti(struct in_addr *ap, struct ifnet *ifp)
 {
 	struct in_multi *pinm;
 	int error;
 #ifdef INVARIANTS
 	char addrbuf[INET_ADDRSTRLEN];
 #endif
 
 	KASSERT(IN_LOCAL_GROUP(ntohl(ap->s_addr)),
 	    ("%s: %s not in 224.0.0.0/24", __func__,
 	    inet_ntoa_r(*ap, addrbuf)));
 
 	error = in_joingroup(ifp, ap, NULL, &pinm);
 	if (error != 0)
 		pinm = NULL;
 
 	return (pinm);
 }
 
 /*
  * Block or unblock an ASM multicast source on an inpcb.
  * This implements the delta-based API described in RFC 3678.
  *
  * The delta-based API applies only to exclusive-mode memberships.
  * An IGMP downcall will be performed.
  *
  * SMPng: NOTE: Must take Giant as a join may create a new ifma.
  *
  * Return 0 if successful, otherwise return an appropriate error code.
  */
 static int
 inp_block_unblock_source(struct inpcb *inp, struct sockopt *sopt)
 {
 	struct group_source_req		 gsr;
 	struct rm_priotracker		 in_ifa_tracker;
 	sockunion_t			*gsa, *ssa;
 	struct ifnet			*ifp;
 	struct in_mfilter		*imf;
 	struct ip_moptions		*imo;
 	struct in_msource		*ims;
 	struct in_multi			*inm;
 	size_t				 idx;
 	uint16_t			 fmode;
 	int				 error, doblock;
 
 	ifp = NULL;
 	error = 0;
 	doblock = 0;
 
 	memset(&gsr, 0, sizeof(struct group_source_req));
 	gsa = (sockunion_t *)&gsr.gsr_group;
 	ssa = (sockunion_t *)&gsr.gsr_source;
 
 	switch (sopt->sopt_name) {
 	case IP_BLOCK_SOURCE:
 	case IP_UNBLOCK_SOURCE: {
 		struct ip_mreq_source	 mreqs;
 
 		error = sooptcopyin(sopt, &mreqs,
 		    sizeof(struct ip_mreq_source),
 		    sizeof(struct ip_mreq_source));
 		if (error)
 			return (error);
 
 		gsa->sin.sin_family = AF_INET;
 		gsa->sin.sin_len = sizeof(struct sockaddr_in);
 		gsa->sin.sin_addr = mreqs.imr_multiaddr;
 
 		ssa->sin.sin_family = AF_INET;
 		ssa->sin.sin_len = sizeof(struct sockaddr_in);
 		ssa->sin.sin_addr = mreqs.imr_sourceaddr;
 
 		if (!in_nullhost(mreqs.imr_interface)) {
 			IN_IFADDR_RLOCK(&in_ifa_tracker);
 			INADDR_TO_IFP(mreqs.imr_interface, ifp);
 			IN_IFADDR_RUNLOCK(&in_ifa_tracker);
 		}
 		if (sopt->sopt_name == IP_BLOCK_SOURCE)
 			doblock = 1;
 
 		CTR3(KTR_IGMPV3, "%s: imr_interface = 0x%08x, ifp = %p",
 		    __func__, ntohl(mreqs.imr_interface.s_addr), ifp);
 		break;
 	    }
 
 	case MCAST_BLOCK_SOURCE:
 	case MCAST_UNBLOCK_SOURCE:
 		error = sooptcopyin(sopt, &gsr,
 		    sizeof(struct group_source_req),
 		    sizeof(struct group_source_req));
 		if (error)
 			return (error);
 
 		if (gsa->sin.sin_family != AF_INET ||
 		    gsa->sin.sin_len != sizeof(struct sockaddr_in))
 			return (EINVAL);
 
 		if (ssa->sin.sin_family != AF_INET ||
 		    ssa->sin.sin_len != sizeof(struct sockaddr_in))
 			return (EINVAL);
 
 		if (gsr.gsr_interface == 0 || V_if_index < gsr.gsr_interface)
 			return (EADDRNOTAVAIL);
 
 		ifp = ifnet_byindex(gsr.gsr_interface);
 
 		if (sopt->sopt_name == MCAST_BLOCK_SOURCE)
 			doblock = 1;
 		break;
 
 	default:
 		CTR2(KTR_IGMPV3, "%s: unknown sopt_name %d",
 		    __func__, sopt->sopt_name);
 		return (EOPNOTSUPP);
 		break;
 	}
 
 	if (!IN_MULTICAST(ntohl(gsa->sin.sin_addr.s_addr)))
 		return (EINVAL);
 
 	/*
 	 * Check if we are actually a member of this group.
 	 */
 	imo = inp_findmoptions(inp);
 	idx = imo_match_group(imo, ifp, &gsa->sa);
 	if (idx == -1 || imo->imo_mfilters == NULL) {
 		error = EADDRNOTAVAIL;
 		goto out_inp_locked;
 	}
 
 	KASSERT(imo->imo_mfilters != NULL,
 	    ("%s: imo_mfilters not allocated", __func__));
 	imf = &imo->imo_mfilters[idx];
 	inm = imo->imo_membership[idx];
 
 	/*
 	 * Attempting to use the delta-based API on an
 	 * non exclusive-mode membership is an error.
 	 */
 	fmode = imf->imf_st[0];
 	if (fmode != MCAST_EXCLUDE) {
 		error = EINVAL;
 		goto out_inp_locked;
 	}
 
 	/*
 	 * Deal with error cases up-front:
 	 *  Asked to block, but already blocked; or
 	 *  Asked to unblock, but nothing to unblock.
 	 * If adding a new block entry, allocate it.
 	 */
 	ims = imo_match_source(imo, idx, &ssa->sa);
 	if ((ims != NULL && doblock) || (ims == NULL && !doblock)) {
 		CTR3(KTR_IGMPV3, "%s: source 0x%08x %spresent", __func__,
 		    ntohl(ssa->sin.sin_addr.s_addr), doblock ? "" : "not ");
 		error = EADDRNOTAVAIL;
 		goto out_inp_locked;
 	}
 
 	INP_WLOCK_ASSERT(inp);
 
 	/*
 	 * Begin state merge transaction at socket layer.
 	 */
 	if (doblock) {
 		CTR2(KTR_IGMPV3, "%s: %s source", __func__, "block");
 		ims = imf_graft(imf, fmode, &ssa->sin);
 		if (ims == NULL)
 			error = ENOMEM;
 	} else {
 		CTR2(KTR_IGMPV3, "%s: %s source", __func__, "allow");
 		error = imf_prune(imf, &ssa->sin);
 	}
 
 	if (error) {
 		CTR1(KTR_IGMPV3, "%s: merge imf state failed", __func__);
 		goto out_imf_rollback;
 	}
 
 	/*
 	 * Begin state merge transaction at IGMP layer.
 	 */
 	IN_MULTI_LOCK();
 	CTR1(KTR_IGMPV3, "%s: merge inm state", __func__);
 	IN_MULTI_LIST_LOCK();
 	error = inm_merge(inm, imf);
 	if (error) {
 		CTR1(KTR_IGMPV3, "%s: failed to merge inm state", __func__);
 		IN_MULTI_LIST_UNLOCK();
 		goto out_in_multi_locked;
 	}
 
 	CTR1(KTR_IGMPV3, "%s: doing igmp downcall", __func__);
 	error = igmp_change_state(inm);
 	IN_MULTI_LIST_UNLOCK();
 	if (error)
 		CTR1(KTR_IGMPV3, "%s: failed igmp downcall", __func__);
 
 out_in_multi_locked:
 
 	IN_MULTI_UNLOCK();
 out_imf_rollback:
 	if (error)
 		imf_rollback(imf);
 	else
 		imf_commit(imf);
 
 	imf_reap(imf);
 
 out_inp_locked:
 	INP_WUNLOCK(inp);
 	return (error);
 }
 
 /*
  * Given an inpcb, return its multicast options structure pointer.  Accepts
  * an unlocked inpcb pointer, but will return it locked.  May sleep.
  *
  * SMPng: NOTE: Potentially calls malloc(M_WAITOK) with Giant held.
  * SMPng: NOTE: Returns with the INP write lock held.
  */
 static struct ip_moptions *
 inp_findmoptions(struct inpcb *inp)
 {
 	struct ip_moptions	 *imo;
 	struct in_multi		**immp;
 	struct in_mfilter	 *imfp;
 	size_t			  idx;
 
 	INP_WLOCK(inp);
 	if (inp->inp_moptions != NULL)
 		return (inp->inp_moptions);
 
 	INP_WUNLOCK(inp);
 
 	imo = malloc(sizeof(*imo), M_IPMOPTS, M_WAITOK);
 	immp = malloc(sizeof(*immp) * IP_MIN_MEMBERSHIPS, M_IPMOPTS,
 	    M_WAITOK | M_ZERO);
 	imfp = malloc(sizeof(struct in_mfilter) * IP_MIN_MEMBERSHIPS,
 	    M_INMFILTER, M_WAITOK);
 
 	imo->imo_multicast_ifp = NULL;
 	imo->imo_multicast_addr.s_addr = INADDR_ANY;
 	imo->imo_multicast_vif = -1;
 	imo->imo_multicast_ttl = IP_DEFAULT_MULTICAST_TTL;
 	imo->imo_multicast_loop = in_mcast_loop;
 	imo->imo_num_memberships = 0;
 	imo->imo_max_memberships = IP_MIN_MEMBERSHIPS;
 	imo->imo_membership = immp;
 
 	/* Initialize per-group source filters. */
 	for (idx = 0; idx < IP_MIN_MEMBERSHIPS; idx++)
 		imf_init(&imfp[idx], MCAST_UNDEFINED, MCAST_EXCLUDE);
 	imo->imo_mfilters = imfp;
 
 	INP_WLOCK(inp);
 	if (inp->inp_moptions != NULL) {
 		free(imfp, M_INMFILTER);
 		free(immp, M_IPMOPTS);
 		free(imo, M_IPMOPTS);
 		return (inp->inp_moptions);
 	}
 	inp->inp_moptions = imo;
 	return (imo);
 }
 
 static void
 inp_gcmoptions(struct ip_moptions *imo)
 {
 	struct in_mfilter	*imf;
 	struct in_multi *inm;
 	struct ifnet *ifp;
 	size_t			 idx, nmships;
 
 	nmships = imo->imo_num_memberships;
 	for (idx = 0; idx < nmships; ++idx) {
 		imf = imo->imo_mfilters ? &imo->imo_mfilters[idx] : NULL;
 		if (imf)
 			imf_leave(imf);
 		inm = imo->imo_membership[idx];
 		ifp = inm->inm_ifp;
 		if (ifp != NULL) {
 			CURVNET_SET(ifp->if_vnet);
 			(void)in_leavegroup(inm, imf);
 			CURVNET_RESTORE();
 		} else {
 			(void)in_leavegroup(inm, imf);
 		}
 		if (imf)
 			imf_purge(imf);
 	}
 
 	if (imo->imo_mfilters)
 		free(imo->imo_mfilters, M_INMFILTER);
 	free(imo->imo_membership, M_IPMOPTS);
 	free(imo, M_IPMOPTS);
 }
 
 /*
  * Discard the IP multicast options (and source filters).  To minimize
  * the amount of work done while holding locks such as the INP's
  * pcbinfo lock (which is used in the receive path), the free
  * operation is deferred to the epoch callback task.
  */
 void
 inp_freemoptions(struct ip_moptions *imo)
 {
 	if (imo == NULL)
 		return;
 	inp_gcmoptions(imo);
 }
 
 /*
  * Atomically get source filters on a socket for an IPv4 multicast group.
  * Called with INP lock held; returns with lock released.
  */
 static int
 inp_get_source_filters(struct inpcb *inp, struct sockopt *sopt)
 {
 	struct __msfilterreq	 msfr;
 	sockunion_t		*gsa;
 	struct ifnet		*ifp;
 	struct ip_moptions	*imo;
 	struct in_mfilter	*imf;
 	struct ip_msource	*ims;
 	struct in_msource	*lims;
 	struct sockaddr_in	*psin;
 	struct sockaddr_storage	*ptss;
 	struct sockaddr_storage	*tss;
 	int			 error;
 	size_t			 idx, nsrcs, ncsrcs;
 
 	INP_WLOCK_ASSERT(inp);
 
 	imo = inp->inp_moptions;
 	KASSERT(imo != NULL, ("%s: null ip_moptions", __func__));
 
 	INP_WUNLOCK(inp);
 
 	error = sooptcopyin(sopt, &msfr, sizeof(struct __msfilterreq),
 	    sizeof(struct __msfilterreq));
 	if (error)
 		return (error);
 
 	if (msfr.msfr_ifindex == 0 || V_if_index < msfr.msfr_ifindex)
 		return (EINVAL);
 
 	ifp = ifnet_byindex(msfr.msfr_ifindex);
 	if (ifp == NULL)
 		return (EINVAL);
 
 	INP_WLOCK(inp);
 
 	/*
 	 * Lookup group on the socket.
 	 */
 	gsa = (sockunion_t *)&msfr.msfr_group;
 	idx = imo_match_group(imo, ifp, &gsa->sa);
 	if (idx == -1 || imo->imo_mfilters == NULL) {
 		INP_WUNLOCK(inp);
 		return (EADDRNOTAVAIL);
 	}
 	imf = &imo->imo_mfilters[idx];
 
 	/*
 	 * Ignore memberships which are in limbo.
 	 */
 	if (imf->imf_st[1] == MCAST_UNDEFINED) {
 		INP_WUNLOCK(inp);
 		return (EAGAIN);
 	}
 	msfr.msfr_fmode = imf->imf_st[1];
 
 	/*
 	 * If the user specified a buffer, copy out the source filter
 	 * entries to userland gracefully.
 	 * We only copy out the number of entries which userland
 	 * has asked for, but we always tell userland how big the
 	 * buffer really needs to be.
 	 */
 	if (msfr.msfr_nsrcs > in_mcast_maxsocksrc)
 		msfr.msfr_nsrcs = in_mcast_maxsocksrc;
 	tss = NULL;
 	if (msfr.msfr_srcs != NULL && msfr.msfr_nsrcs > 0) {
 		tss = malloc(sizeof(struct sockaddr_storage) * msfr.msfr_nsrcs,
 		    M_TEMP, M_NOWAIT | M_ZERO);
 		if (tss == NULL) {
 			INP_WUNLOCK(inp);
 			return (ENOBUFS);
 		}
 	}
 
 	/*
 	 * Count number of sources in-mode at t0.
 	 * If buffer space exists and remains, copy out source entries.
 	 */
 	nsrcs = msfr.msfr_nsrcs;
 	ncsrcs = 0;
 	ptss = tss;
 	RB_FOREACH(ims, ip_msource_tree, &imf->imf_sources) {
 		lims = (struct in_msource *)ims;
 		if (lims->imsl_st[0] == MCAST_UNDEFINED ||
 		    lims->imsl_st[0] != imf->imf_st[0])
 			continue;
 		++ncsrcs;
 		if (tss != NULL && nsrcs > 0) {
 			psin = (struct sockaddr_in *)ptss;
 			psin->sin_family = AF_INET;
 			psin->sin_len = sizeof(struct sockaddr_in);
 			psin->sin_addr.s_addr = htonl(lims->ims_haddr);
 			psin->sin_port = 0;
 			++ptss;
 			--nsrcs;
 		}
 	}
 
 	INP_WUNLOCK(inp);
 
 	if (tss != NULL) {
 		error = copyout(tss, msfr.msfr_srcs,
 		    sizeof(struct sockaddr_storage) * msfr.msfr_nsrcs);
 		free(tss, M_TEMP);
 		if (error)
 			return (error);
 	}
 
 	msfr.msfr_nsrcs = ncsrcs;
 	error = sooptcopyout(sopt, &msfr, sizeof(struct __msfilterreq));
 
 	return (error);
 }
 
 /*
  * Return the IP multicast options in response to user getsockopt().
  */
 int
 inp_getmoptions(struct inpcb *inp, struct sockopt *sopt)
 {
 	struct rm_priotracker	 in_ifa_tracker;
 	struct ip_mreqn		 mreqn;
 	struct ip_moptions	*imo;
 	struct ifnet		*ifp;
 	struct in_ifaddr	*ia;
 	int			 error, optval;
 	u_char			 coptval;
 
 	INP_WLOCK(inp);
 	imo = inp->inp_moptions;
 	/*
 	 * If socket is neither of type SOCK_RAW or SOCK_DGRAM,
 	 * or is a divert socket, reject it.
 	 */
 	if (inp->inp_socket->so_proto->pr_protocol == IPPROTO_DIVERT ||
 	    (inp->inp_socket->so_proto->pr_type != SOCK_RAW &&
 	    inp->inp_socket->so_proto->pr_type != SOCK_DGRAM)) {
 		INP_WUNLOCK(inp);
 		return (EOPNOTSUPP);
 	}
 
 	error = 0;
 	switch (sopt->sopt_name) {
 	case IP_MULTICAST_VIF:
 		if (imo != NULL)
 			optval = imo->imo_multicast_vif;
 		else
 			optval = -1;
 		INP_WUNLOCK(inp);
 		error = sooptcopyout(sopt, &optval, sizeof(int));
 		break;
 
 	case IP_MULTICAST_IF:
 		memset(&mreqn, 0, sizeof(struct ip_mreqn));
 		if (imo != NULL) {
 			ifp = imo->imo_multicast_ifp;
 			if (!in_nullhost(imo->imo_multicast_addr)) {
 				mreqn.imr_address = imo->imo_multicast_addr;
 			} else if (ifp != NULL) {
 				struct epoch_tracker et;
 
 				mreqn.imr_ifindex = ifp->if_index;
 				NET_EPOCH_ENTER(et);
 				IFP_TO_IA(ifp, ia, &in_ifa_tracker);
 				if (ia != NULL)
 					mreqn.imr_address =
 					    IA_SIN(ia)->sin_addr;
 				NET_EPOCH_EXIT(et);
 			}
 		}
 		INP_WUNLOCK(inp);
 		if (sopt->sopt_valsize == sizeof(struct ip_mreqn)) {
 			error = sooptcopyout(sopt, &mreqn,
 			    sizeof(struct ip_mreqn));
 		} else {
 			error = sooptcopyout(sopt, &mreqn.imr_address,
 			    sizeof(struct in_addr));
 		}
 		break;
 
 	case IP_MULTICAST_TTL:
 		if (imo == NULL)
 			optval = coptval = IP_DEFAULT_MULTICAST_TTL;
 		else
 			optval = coptval = imo->imo_multicast_ttl;
 		INP_WUNLOCK(inp);
 		if (sopt->sopt_valsize == sizeof(u_char))
 			error = sooptcopyout(sopt, &coptval, sizeof(u_char));
 		else
 			error = sooptcopyout(sopt, &optval, sizeof(int));
 		break;
 
 	case IP_MULTICAST_LOOP:
 		if (imo == NULL)
 			optval = coptval = IP_DEFAULT_MULTICAST_LOOP;
 		else
 			optval = coptval = imo->imo_multicast_loop;
 		INP_WUNLOCK(inp);
 		if (sopt->sopt_valsize == sizeof(u_char))
 			error = sooptcopyout(sopt, &coptval, sizeof(u_char));
 		else
 			error = sooptcopyout(sopt, &optval, sizeof(int));
 		break;
 
 	case IP_MSFILTER:
 		if (imo == NULL) {
 			error = EADDRNOTAVAIL;
 			INP_WUNLOCK(inp);
 		} else {
 			error = inp_get_source_filters(inp, sopt);
 		}
 		break;
 
 	default:
 		INP_WUNLOCK(inp);
 		error = ENOPROTOOPT;
 		break;
 	}
 
 	INP_UNLOCK_ASSERT(inp);
 
 	return (error);
 }
 
 /*
  * Look up the ifnet to use for a multicast group membership,
  * given the IPv4 address of an interface, and the IPv4 group address.
  *
  * This routine exists to support legacy multicast applications
  * which do not understand that multicast memberships are scoped to
  * specific physical links in the networking stack, or which need
  * to join link-scope groups before IPv4 addresses are configured.
  *
  * If inp is non-NULL, use this socket's current FIB number for any
  * required FIB lookup.
  * If ina is INADDR_ANY, look up the group address in the unicast FIB,
  * and use its ifp; usually, this points to the default next-hop.
  *
  * If the FIB lookup fails, attempt to use the first non-loopback
  * interface with multicast capability in the system as a
  * last resort. The legacy IPv4 ASM API requires that we do
  * this in order to allow groups to be joined when the routing
  * table has not yet been populated during boot.
  *
  * Returns NULL if no ifp could be found.
  *
  * FUTURE: Implement IPv4 source-address selection.
  */
 static struct ifnet *
 inp_lookup_mcast_ifp(const struct inpcb *inp,
     const struct sockaddr_in *gsin, const struct in_addr ina)
 {
 	struct rm_priotracker in_ifa_tracker;
 	struct ifnet *ifp;
 	struct nhop4_basic nh4;
 	uint32_t fibnum;
 
 	KASSERT(gsin->sin_family == AF_INET, ("%s: not AF_INET", __func__));
 	KASSERT(IN_MULTICAST(ntohl(gsin->sin_addr.s_addr)),
 	    ("%s: not multicast", __func__));
 
 	ifp = NULL;
 	if (!in_nullhost(ina)) {
 		IN_IFADDR_RLOCK(&in_ifa_tracker);
 		INADDR_TO_IFP(ina, ifp);
 		IN_IFADDR_RUNLOCK(&in_ifa_tracker);
 	} else {
 		fibnum = inp ? inp->inp_inc.inc_fibnum : 0;
 		if (fib4_lookup_nh_basic(fibnum, gsin->sin_addr, 0, 0, &nh4)==0)
 			ifp = nh4.nh_ifp;
 		else {
 			struct in_ifaddr *ia;
 			struct ifnet *mifp;
 
 			mifp = NULL;
 			IN_IFADDR_RLOCK(&in_ifa_tracker);
 			CK_STAILQ_FOREACH(ia, &V_in_ifaddrhead, ia_link) {
 				mifp = ia->ia_ifp;
 				if (!(mifp->if_flags & IFF_LOOPBACK) &&
 				     (mifp->if_flags & IFF_MULTICAST)) {
 					ifp = mifp;
 					break;
 				}
 			}
 			IN_IFADDR_RUNLOCK(&in_ifa_tracker);
 		}
 	}
 
 	return (ifp);
 }
 
 /*
  * Join an IPv4 multicast group, possibly with a source.
  */
 static int
 inp_join_group(struct inpcb *inp, struct sockopt *sopt)
 {
 	struct group_source_req		 gsr;
 	sockunion_t			*gsa, *ssa;
 	struct ifnet			*ifp;
 	struct in_mfilter		*imf;
 	struct ip_moptions		*imo;
 	struct in_multi			*inm;
 	struct in_msource		*lims;
 	size_t				 idx;
 	int				 error, is_new;
 
 	ifp = NULL;
 	imf = NULL;
 	lims = NULL;
 	error = 0;
 	is_new = 0;
 
 	memset(&gsr, 0, sizeof(struct group_source_req));
 	gsa = (sockunion_t *)&gsr.gsr_group;
 	gsa->ss.ss_family = AF_UNSPEC;
 	ssa = (sockunion_t *)&gsr.gsr_source;
 	ssa->ss.ss_family = AF_UNSPEC;
 
 	switch (sopt->sopt_name) {
 	case IP_ADD_MEMBERSHIP: {
 		struct ip_mreqn mreqn;
 
 		if (sopt->sopt_valsize == sizeof(struct ip_mreqn))
 			error = sooptcopyin(sopt, &mreqn,
 			    sizeof(struct ip_mreqn), sizeof(struct ip_mreqn));
 		else
 			error = sooptcopyin(sopt, &mreqn,
 			    sizeof(struct ip_mreq), sizeof(struct ip_mreq));
 		if (error)
 			return (error);
 
 		gsa->sin.sin_family = AF_INET;
 		gsa->sin.sin_len = sizeof(struct sockaddr_in);
 		gsa->sin.sin_addr = mreqn.imr_multiaddr;
 		if (!IN_MULTICAST(ntohl(gsa->sin.sin_addr.s_addr)))
 			return (EINVAL);
 
 		if (sopt->sopt_valsize == sizeof(struct ip_mreqn) &&
 		    mreqn.imr_ifindex != 0)
 			ifp = ifnet_byindex(mreqn.imr_ifindex);
 		else
 			ifp = inp_lookup_mcast_ifp(inp, &gsa->sin,
 			    mreqn.imr_address);
 		break;
 	}
 	case IP_ADD_SOURCE_MEMBERSHIP: {
 		struct ip_mreq_source	 mreqs;
 
 		error = sooptcopyin(sopt, &mreqs, sizeof(struct ip_mreq_source),
 			    sizeof(struct ip_mreq_source));
 		if (error)
 			return (error);
 
 		gsa->sin.sin_family = ssa->sin.sin_family = AF_INET;
 		gsa->sin.sin_len = ssa->sin.sin_len =
 		    sizeof(struct sockaddr_in);
 
 		gsa->sin.sin_addr = mreqs.imr_multiaddr;
 		if (!IN_MULTICAST(ntohl(gsa->sin.sin_addr.s_addr)))
 			return (EINVAL);
 
 		ssa->sin.sin_addr = mreqs.imr_sourceaddr;
 
 		ifp = inp_lookup_mcast_ifp(inp, &gsa->sin,
 		    mreqs.imr_interface);
 		CTR3(KTR_IGMPV3, "%s: imr_interface = 0x%08x, ifp = %p",
 		    __func__, ntohl(mreqs.imr_interface.s_addr), ifp);
 		break;
 	}
 
 	case MCAST_JOIN_GROUP:
 	case MCAST_JOIN_SOURCE_GROUP:
 		if (sopt->sopt_name == MCAST_JOIN_GROUP) {
 			error = sooptcopyin(sopt, &gsr,
 			    sizeof(struct group_req),
 			    sizeof(struct group_req));
 		} else if (sopt->sopt_name == MCAST_JOIN_SOURCE_GROUP) {
 			error = sooptcopyin(sopt, &gsr,
 			    sizeof(struct group_source_req),
 			    sizeof(struct group_source_req));
 		}
 		if (error)
 			return (error);
 
 		if (gsa->sin.sin_family != AF_INET ||
 		    gsa->sin.sin_len != sizeof(struct sockaddr_in))
 			return (EINVAL);
 
 		/*
 		 * Overwrite the port field if present, as the sockaddr
 		 * being copied in may be matched with a binary comparison.
 		 */
 		gsa->sin.sin_port = 0;
 		if (sopt->sopt_name == MCAST_JOIN_SOURCE_GROUP) {
 			if (ssa->sin.sin_family != AF_INET ||
 			    ssa->sin.sin_len != sizeof(struct sockaddr_in))
 				return (EINVAL);
 			ssa->sin.sin_port = 0;
 		}
 
 		if (!IN_MULTICAST(ntohl(gsa->sin.sin_addr.s_addr)))
 			return (EINVAL);
 
 		if (gsr.gsr_interface == 0 || V_if_index < gsr.gsr_interface)
 			return (EADDRNOTAVAIL);
 		ifp = ifnet_byindex(gsr.gsr_interface);
 		break;
 
 	default:
 		CTR2(KTR_IGMPV3, "%s: unknown sopt_name %d",
 		    __func__, sopt->sopt_name);
 		return (EOPNOTSUPP);
 		break;
 	}
 
 	if (ifp == NULL || (ifp->if_flags & IFF_MULTICAST) == 0)
 		return (EADDRNOTAVAIL);
 
 	imo = inp_findmoptions(inp);
 	idx = imo_match_group(imo, ifp, &gsa->sa);
 	if (idx == -1) {
 		is_new = 1;
 	} else {
 		inm = imo->imo_membership[idx];
 		imf = &imo->imo_mfilters[idx];
 		if (ssa->ss.ss_family != AF_UNSPEC) {
 			/*
 			 * MCAST_JOIN_SOURCE_GROUP on an exclusive membership
 			 * is an error. On an existing inclusive membership,
 			 * it just adds the source to the filter list.
 			 */
 			if (imf->imf_st[1] != MCAST_INCLUDE) {
 				error = EINVAL;
 				goto out_inp_locked;
 			}
 			/*
 			 * Throw out duplicates.
 			 *
 			 * XXX FIXME: This makes a naive assumption that
 			 * even if entries exist for *ssa in this imf,
 			 * they will be rejected as dupes, even if they
 			 * are not valid in the current mode (in-mode).
 			 *
 			 * in_msource is transactioned just as for anything
 			 * else in SSM -- but note naive use of inm_graft()
 			 * below for allocating new filter entries.
 			 *
 			 * This is only an issue if someone mixes the
 			 * full-state SSM API with the delta-based API,
 			 * which is discouraged in the relevant RFCs.
 			 */
 			lims = imo_match_source(imo, idx, &ssa->sa);
 			if (lims != NULL /*&&
 			    lims->imsl_st[1] == MCAST_INCLUDE*/) {
 				error = EADDRNOTAVAIL;
 				goto out_inp_locked;
 			}
 		} else {
 			/*
 			 * MCAST_JOIN_GROUP on an existing exclusive
 			 * membership is an error; return EADDRINUSE
 			 * to preserve 4.4BSD API idempotence, and
 			 * avoid tedious detour to code below.
 			 * NOTE: This is bending RFC 3678 a bit.
 			 *
 			 * On an existing inclusive membership, this is also
 			 * an error; if you want to change filter mode,
 			 * you must use the userland API setsourcefilter().
 			 * XXX We don't reject this for imf in UNDEFINED
 			 * state at t1, because allocation of a filter
 			 * is atomic with allocation of a membership.
 			 */
 			error = EINVAL;
 			if (imf->imf_st[1] == MCAST_EXCLUDE)
 				error = EADDRINUSE;
 			goto out_inp_locked;
 		}
 	}
 
 	/*
 	 * Begin state merge transaction at socket layer.
 	 */
 	INP_WLOCK_ASSERT(inp);
 
 	if (is_new) {
 		if (imo->imo_num_memberships == imo->imo_max_memberships) {
 			error = imo_grow(imo);
 			if (error)
 				goto out_inp_locked;
 		}
 		/*
 		 * Allocate the new slot upfront so we can deal with
 		 * grafting the new source filter in same code path
 		 * as for join-source on existing membership.
 		 */
 		idx = imo->imo_num_memberships;
 		imo->imo_membership[idx] = NULL;
 		imo->imo_num_memberships++;
 		KASSERT(imo->imo_mfilters != NULL,
 		    ("%s: imf_mfilters vector was not allocated", __func__));
 		imf = &imo->imo_mfilters[idx];
 		KASSERT(RB_EMPTY(&imf->imf_sources),
 		    ("%s: imf_sources not empty", __func__));
 	}
 
 	/*
 	 * Graft new source into filter list for this inpcb's
 	 * membership of the group. The in_multi may not have
 	 * been allocated yet if this is a new membership, however,
 	 * the in_mfilter slot will be allocated and must be initialized.
 	 *
 	 * Note: Grafting of exclusive mode filters doesn't happen
 	 * in this path.
 	 * XXX: Should check for non-NULL lims (node exists but may
 	 * not be in-mode) for interop with full-state API.
 	 */
 	if (ssa->ss.ss_family != AF_UNSPEC) {
 		/* Membership starts in IN mode */
 		if (is_new) {
 			CTR1(KTR_IGMPV3, "%s: new join w/source", __func__);
 			imf_init(imf, MCAST_UNDEFINED, MCAST_INCLUDE);
 		} else {
 			CTR2(KTR_IGMPV3, "%s: %s source", __func__, "allow");
 		}
 		lims = imf_graft(imf, MCAST_INCLUDE, &ssa->sin);
 		if (lims == NULL) {
 			CTR1(KTR_IGMPV3, "%s: merge imf state failed",
 			    __func__);
 			error = ENOMEM;
 			goto out_imo_free;
 		}
 	} else {
 		/* No address specified; Membership starts in EX mode */
 		if (is_new) {
 			CTR1(KTR_IGMPV3, "%s: new join w/o source", __func__);
 			imf_init(imf, MCAST_UNDEFINED, MCAST_EXCLUDE);
 		}
 	}
 
 	/*
 	 * Begin state merge transaction at IGMP layer.
 	 */
 	in_pcbref(inp);
 	INP_WUNLOCK(inp);
 	IN_MULTI_LOCK();
 
 	if (is_new) {
 		error = in_joingroup_locked(ifp, &gsa->sin.sin_addr, imf,
 		    &inm);
 		if (error) {
                         CTR1(KTR_IGMPV3, "%s: in_joingroup_locked failed", 
                             __func__);
                         IN_MULTI_LIST_UNLOCK();
 			goto out_imo_free;
 		}
 		inm_acquire(inm);
 		imo->imo_membership[idx] = inm;
 	} else {
 		CTR1(KTR_IGMPV3, "%s: merge inm state", __func__);
 		IN_MULTI_LIST_LOCK();
 		error = inm_merge(inm, imf);
 		if (error) {
 			CTR1(KTR_IGMPV3, "%s: failed to merge inm state",
 				 __func__);
 			IN_MULTI_LIST_UNLOCK();
 			goto out_in_multi_locked;
 		}
 		CTR1(KTR_IGMPV3, "%s: doing igmp downcall", __func__);
 		error = igmp_change_state(inm);
 		IN_MULTI_LIST_UNLOCK();
 		if (error) {
 			CTR1(KTR_IGMPV3, "%s: failed igmp downcall",
 			    __func__);
 			goto out_in_multi_locked;
 		}
 	}
 
 out_in_multi_locked:
 
 	IN_MULTI_UNLOCK();
 	INP_WLOCK(inp);
 	if (in_pcbrele_wlocked(inp))
 		return (ENXIO);
 	if (error) {
 		imf_rollback(imf);
 		if (is_new)
 			imf_purge(imf);
 		else
 			imf_reap(imf);
 	} else {
 		imf_commit(imf);
 	}
 
 out_imo_free:
 	if (error && is_new) {
 		inm = imo->imo_membership[idx];
 		if (inm != NULL) {
 			IN_MULTI_LIST_LOCK();
 			inm_release_deferred(inm);
 			IN_MULTI_LIST_UNLOCK();
 		}
 		imo->imo_membership[idx] = NULL;
 		--imo->imo_num_memberships;
 	}
 
 out_inp_locked:
 	INP_WUNLOCK(inp);
 	return (error);
 }
 
 /*
  * Leave an IPv4 multicast group on an inpcb, possibly with a source.
  */
 static int
 inp_leave_group(struct inpcb *inp, struct sockopt *sopt)
 {
 	struct group_source_req		 gsr;
 	struct ip_mreq_source		 mreqs;
 	struct rm_priotracker		 in_ifa_tracker;
 	sockunion_t			*gsa, *ssa;
 	struct ifnet			*ifp;
 	struct in_mfilter		*imf;
 	struct ip_moptions		*imo;
 	struct in_msource		*ims;
 	struct in_multi			*inm;
 	size_t				 idx;
 	int				 error, is_final;
 
 	ifp = NULL;
 	error = 0;
 	is_final = 1;
 
 	memset(&gsr, 0, sizeof(struct group_source_req));
 	gsa = (sockunion_t *)&gsr.gsr_group;
 	gsa->ss.ss_family = AF_UNSPEC;
 	ssa = (sockunion_t *)&gsr.gsr_source;
 	ssa->ss.ss_family = AF_UNSPEC;
 
 	switch (sopt->sopt_name) {
 	case IP_DROP_MEMBERSHIP:
 	case IP_DROP_SOURCE_MEMBERSHIP:
 		if (sopt->sopt_name == IP_DROP_MEMBERSHIP) {
 			error = sooptcopyin(sopt, &mreqs,
 			    sizeof(struct ip_mreq),
 			    sizeof(struct ip_mreq));
 			/*
 			 * Swap interface and sourceaddr arguments,
 			 * as ip_mreq and ip_mreq_source are laid
 			 * out differently.
 			 */
 			mreqs.imr_interface = mreqs.imr_sourceaddr;
 			mreqs.imr_sourceaddr.s_addr = INADDR_ANY;
 		} else if (sopt->sopt_name == IP_DROP_SOURCE_MEMBERSHIP) {
 			error = sooptcopyin(sopt, &mreqs,
 			    sizeof(struct ip_mreq_source),
 			    sizeof(struct ip_mreq_source));
 		}
 		if (error)
 			return (error);
 
 		gsa->sin.sin_family = AF_INET;
 		gsa->sin.sin_len = sizeof(struct sockaddr_in);
 		gsa->sin.sin_addr = mreqs.imr_multiaddr;
 
 		if (sopt->sopt_name == IP_DROP_SOURCE_MEMBERSHIP) {
 			ssa->sin.sin_family = AF_INET;
 			ssa->sin.sin_len = sizeof(struct sockaddr_in);
 			ssa->sin.sin_addr = mreqs.imr_sourceaddr;
 		}
 
 		/*
 		 * Attempt to look up hinted ifp from interface address.
 		 * Fallthrough with null ifp iff lookup fails, to
 		 * preserve 4.4BSD mcast API idempotence.
 		 * XXX NOTE WELL: The RFC 3678 API is preferred because
 		 * using an IPv4 address as a key is racy.
 		 */
 		if (!in_nullhost(mreqs.imr_interface)) {
 			IN_IFADDR_RLOCK(&in_ifa_tracker);
 			INADDR_TO_IFP(mreqs.imr_interface, ifp);
 			IN_IFADDR_RUNLOCK(&in_ifa_tracker);
 		}
 		CTR3(KTR_IGMPV3, "%s: imr_interface = 0x%08x, ifp = %p",
 		    __func__, ntohl(mreqs.imr_interface.s_addr), ifp);
 
 		break;
 
 	case MCAST_LEAVE_GROUP:
 	case MCAST_LEAVE_SOURCE_GROUP:
 		if (sopt->sopt_name == MCAST_LEAVE_GROUP) {
 			error = sooptcopyin(sopt, &gsr,
 			    sizeof(struct group_req),
 			    sizeof(struct group_req));
 		} else if (sopt->sopt_name == MCAST_LEAVE_SOURCE_GROUP) {
 			error = sooptcopyin(sopt, &gsr,
 			    sizeof(struct group_source_req),
 			    sizeof(struct group_source_req));
 		}
 		if (error)
 			return (error);
 
 		if (gsa->sin.sin_family != AF_INET ||
 		    gsa->sin.sin_len != sizeof(struct sockaddr_in))
 			return (EINVAL);
 
 		if (sopt->sopt_name == MCAST_LEAVE_SOURCE_GROUP) {
 			if (ssa->sin.sin_family != AF_INET ||
 			    ssa->sin.sin_len != sizeof(struct sockaddr_in))
 				return (EINVAL);
 		}
 
 		if (gsr.gsr_interface == 0 || V_if_index < gsr.gsr_interface)
 			return (EADDRNOTAVAIL);
 
 		ifp = ifnet_byindex(gsr.gsr_interface);
 
 		if (ifp == NULL)
 			return (EADDRNOTAVAIL);
 		break;
 
 	default:
 		CTR2(KTR_IGMPV3, "%s: unknown sopt_name %d",
 		    __func__, sopt->sopt_name);
 		return (EOPNOTSUPP);
 		break;
 	}
 
 	if (!IN_MULTICAST(ntohl(gsa->sin.sin_addr.s_addr)))
 		return (EINVAL);
 
 	/*
 	 * Find the membership in the membership array.
 	 */
 	imo = inp_findmoptions(inp);
 	idx = imo_match_group(imo, ifp, &gsa->sa);
 	if (idx == -1) {
 		error = EADDRNOTAVAIL;
 		goto out_inp_locked;
 	}
 	inm = imo->imo_membership[idx];
 	imf = &imo->imo_mfilters[idx];
 
 	if (ssa->ss.ss_family != AF_UNSPEC)
 		is_final = 0;
 
 	/*
 	 * Begin state merge transaction at socket layer.
 	 */
 	INP_WLOCK_ASSERT(inp);
 
 	/*
 	 * If we were instructed only to leave a given source, do so.
 	 * MCAST_LEAVE_SOURCE_GROUP is only valid for inclusive memberships.
 	 */
 	if (is_final) {
 		imf_leave(imf);
 	} else {
 		if (imf->imf_st[0] == MCAST_EXCLUDE) {
 			error = EADDRNOTAVAIL;
 			goto out_inp_locked;
 		}
 		ims = imo_match_source(imo, idx, &ssa->sa);
 		if (ims == NULL) {
 			CTR3(KTR_IGMPV3, "%s: source 0x%08x %spresent",
 			    __func__, ntohl(ssa->sin.sin_addr.s_addr), "not ");
 			error = EADDRNOTAVAIL;
 			goto out_inp_locked;
 		}
 		CTR2(KTR_IGMPV3, "%s: %s source", __func__, "block");
 		error = imf_prune(imf, &ssa->sin);
 		if (error) {
 			CTR1(KTR_IGMPV3, "%s: merge imf state failed",
 			    __func__);
 			goto out_inp_locked;
 		}
 	}
 
 	/*
 	 * Begin state merge transaction at IGMP layer.
 	 */
 	in_pcbref(inp);
 	INP_WUNLOCK(inp);
 	IN_MULTI_LOCK();
 
 	if (is_final) {
 		/*
 		 * Give up the multicast address record to which
 		 * the membership points.
 		 */
 		(void)in_leavegroup_locked(inm, imf);
 	} else {
 		CTR1(KTR_IGMPV3, "%s: merge inm state", __func__);
 		IN_MULTI_LIST_LOCK();
 		error = inm_merge(inm, imf);
 		if (error) {
 			CTR1(KTR_IGMPV3, "%s: failed to merge inm state",
 			    __func__);
 			IN_MULTI_LIST_UNLOCK();
 			goto out_in_multi_locked;
 		}
 
 		CTR1(KTR_IGMPV3, "%s: doing igmp downcall", __func__);
 		error = igmp_change_state(inm);
 		IN_MULTI_LIST_UNLOCK();
 		if (error) {
 			CTR1(KTR_IGMPV3, "%s: failed igmp downcall",
 			    __func__);
 		}
 	}
 
 out_in_multi_locked:
 
 	IN_MULTI_UNLOCK();
 	INP_WLOCK(inp);
 	if (in_pcbrele_wlocked(inp))
 		return (ENXIO);
 
 	if (error)
 		imf_rollback(imf);
 	else
 		imf_commit(imf);
 
 	imf_reap(imf);
 
 	if (is_final) {
 		/* Remove the gap in the membership and filter array. */
 		KASSERT(RB_EMPTY(&imf->imf_sources),
 		    ("%s: imf_sources not empty", __func__));
 		for (++idx; idx < imo->imo_num_memberships; ++idx) {
 			imo->imo_membership[idx - 1] = imo->imo_membership[idx];
 			imo->imo_mfilters[idx - 1] = imo->imo_mfilters[idx];
 		}
 		imf_init(&imo->imo_mfilters[idx - 1], MCAST_UNDEFINED,
 		    MCAST_EXCLUDE);
 		imo->imo_num_memberships--;
 	}
 
 out_inp_locked:
 	INP_WUNLOCK(inp);
 	return (error);
 }
 
 /*
  * Select the interface for transmitting IPv4 multicast datagrams.
  *
  * Either an instance of struct in_addr or an instance of struct ip_mreqn
  * may be passed to this socket option. An address of INADDR_ANY or an
  * interface index of 0 is used to remove a previous selection.
  * When no interface is selected, one is chosen for every send.
  */
 static int
 inp_set_multicast_if(struct inpcb *inp, struct sockopt *sopt)
 {
 	struct rm_priotracker	 in_ifa_tracker;
 	struct in_addr		 addr;
 	struct ip_mreqn		 mreqn;
 	struct ifnet		*ifp;
 	struct ip_moptions	*imo;
 	int			 error;
 
 	if (sopt->sopt_valsize == sizeof(struct ip_mreqn)) {
 		/*
 		 * An interface index was specified using the
 		 * Linux-derived ip_mreqn structure.
 		 */
 		error = sooptcopyin(sopt, &mreqn, sizeof(struct ip_mreqn),
 		    sizeof(struct ip_mreqn));
 		if (error)
 			return (error);
 
 		if (mreqn.imr_ifindex < 0 || V_if_index < mreqn.imr_ifindex)
 			return (EINVAL);
 
 		if (mreqn.imr_ifindex == 0) {
 			ifp = NULL;
 		} else {
 			ifp = ifnet_byindex(mreqn.imr_ifindex);
 			if (ifp == NULL)
 				return (EADDRNOTAVAIL);
 		}
 	} else {
 		/*
 		 * An interface was specified by IPv4 address.
 		 * This is the traditional BSD usage.
 		 */
 		error = sooptcopyin(sopt, &addr, sizeof(struct in_addr),
 		    sizeof(struct in_addr));
 		if (error)
 			return (error);
 		if (in_nullhost(addr)) {
 			ifp = NULL;
 		} else {
 			IN_IFADDR_RLOCK(&in_ifa_tracker);
 			INADDR_TO_IFP(addr, ifp);
 			IN_IFADDR_RUNLOCK(&in_ifa_tracker);
 			if (ifp == NULL)
 				return (EADDRNOTAVAIL);
 		}
 		CTR3(KTR_IGMPV3, "%s: ifp = %p, addr = 0x%08x", __func__, ifp,
 		    ntohl(addr.s_addr));
 	}
 
 	/* Reject interfaces which do not support multicast. */
 	if (ifp != NULL && (ifp->if_flags & IFF_MULTICAST) == 0)
 		return (EOPNOTSUPP);
 
 	imo = inp_findmoptions(inp);
 	imo->imo_multicast_ifp = ifp;
 	imo->imo_multicast_addr.s_addr = INADDR_ANY;
 	INP_WUNLOCK(inp);
 
 	return (0);
 }
 
 /*
  * Atomically set source filters on a socket for an IPv4 multicast group.
  *
  * SMPng: NOTE: Potentially calls malloc(M_WAITOK) with Giant held.
  */
 static int
 inp_set_source_filters(struct inpcb *inp, struct sockopt *sopt)
 {
 	struct __msfilterreq	 msfr;
 	sockunion_t		*gsa;
 	struct ifnet		*ifp;
 	struct in_mfilter	*imf;
 	struct ip_moptions	*imo;
 	struct in_multi		*inm;
 	size_t			 idx;
 	int			 error;
 
 	error = sooptcopyin(sopt, &msfr, sizeof(struct __msfilterreq),
 	    sizeof(struct __msfilterreq));
 	if (error)
 		return (error);
 
 	if (msfr.msfr_nsrcs > in_mcast_maxsocksrc)
 		return (ENOBUFS);
 
 	if ((msfr.msfr_fmode != MCAST_EXCLUDE &&
 	     msfr.msfr_fmode != MCAST_INCLUDE))
 		return (EINVAL);
 
 	if (msfr.msfr_group.ss_family != AF_INET ||
 	    msfr.msfr_group.ss_len != sizeof(struct sockaddr_in))
 		return (EINVAL);
 
 	gsa = (sockunion_t *)&msfr.msfr_group;
 	if (!IN_MULTICAST(ntohl(gsa->sin.sin_addr.s_addr)))
 		return (EINVAL);
 
 	gsa->sin.sin_port = 0;	/* ignore port */
 
 	if (msfr.msfr_ifindex == 0 || V_if_index < msfr.msfr_ifindex)
 		return (EADDRNOTAVAIL);
 
 	ifp = ifnet_byindex(msfr.msfr_ifindex);
 	if (ifp == NULL)
 		return (EADDRNOTAVAIL);
 
 	/*
 	 * Take the INP write lock.
 	 * Check if this socket is a member of this group.
 	 */
 	imo = inp_findmoptions(inp);
 	idx = imo_match_group(imo, ifp, &gsa->sa);
 	if (idx == -1 || imo->imo_mfilters == NULL) {
 		error = EADDRNOTAVAIL;
 		goto out_inp_locked;
 	}
 	inm = imo->imo_membership[idx];
 	imf = &imo->imo_mfilters[idx];
 
 	/*
 	 * Begin state merge transaction at socket layer.
 	 */
 	INP_WLOCK_ASSERT(inp);
 
 	imf->imf_st[1] = msfr.msfr_fmode;
 
 	/*
 	 * Apply any new source filters, if present.
 	 * Make a copy of the user-space source vector so
 	 * that we may copy them with a single copyin. This
 	 * allows us to deal with page faults up-front.
 	 */
 	if (msfr.msfr_nsrcs > 0) {
 		struct in_msource	*lims;
 		struct sockaddr_in	*psin;
 		struct sockaddr_storage	*kss, *pkss;
 		int			 i;
 
 		INP_WUNLOCK(inp);
  
 		CTR2(KTR_IGMPV3, "%s: loading %lu source list entries",
 		    __func__, (unsigned long)msfr.msfr_nsrcs);
 		kss = malloc(sizeof(struct sockaddr_storage) * msfr.msfr_nsrcs,
 		    M_TEMP, M_WAITOK);
 		error = copyin(msfr.msfr_srcs, kss,
 		    sizeof(struct sockaddr_storage) * msfr.msfr_nsrcs);
 		if (error) {
 			free(kss, M_TEMP);
 			return (error);
 		}
 
 		INP_WLOCK(inp);
 
 		/*
 		 * Mark all source filters as UNDEFINED at t1.
 		 * Restore new group filter mode, as imf_leave()
 		 * will set it to INCLUDE.
 		 */
 		imf_leave(imf);
 		imf->imf_st[1] = msfr.msfr_fmode;
 
 		/*
 		 * Update socket layer filters at t1, lazy-allocating
 		 * new entries. This saves a bunch of memory at the
 		 * cost of one RB_FIND() per source entry; duplicate
 		 * entries in the msfr_nsrcs vector are ignored.
 		 * If we encounter an error, rollback transaction.
 		 *
 		 * XXX This too could be replaced with a set-symmetric
 		 * difference like loop to avoid walking from root
 		 * every time, as the key space is common.
 		 */
 		for (i = 0, pkss = kss; i < msfr.msfr_nsrcs; i++, pkss++) {
 			psin = (struct sockaddr_in *)pkss;
 			if (psin->sin_family != AF_INET) {
 				error = EAFNOSUPPORT;
 				break;
 			}
 			if (psin->sin_len != sizeof(struct sockaddr_in)) {
 				error = EINVAL;
 				break;
 			}
 			error = imf_get_source(imf, psin, &lims);
 			if (error)
 				break;
 			lims->imsl_st[1] = imf->imf_st[1];
 		}
 		free(kss, M_TEMP);
 	}
 
 	if (error)
 		goto out_imf_rollback;
 
 	INP_WLOCK_ASSERT(inp);
 	IN_MULTI_LOCK();
 
 	/*
 	 * Begin state merge transaction at IGMP layer.
 	 */
 	CTR1(KTR_IGMPV3, "%s: merge inm state", __func__);
 	IN_MULTI_LIST_LOCK();
 	error = inm_merge(inm, imf);
 	if (error) {
 		CTR1(KTR_IGMPV3, "%s: failed to merge inm state", __func__);
 		IN_MULTI_LIST_UNLOCK();
 		goto out_in_multi_locked;
 	}
 
 	CTR1(KTR_IGMPV3, "%s: doing igmp downcall", __func__);
 	error = igmp_change_state(inm);
 	IN_MULTI_LIST_UNLOCK();
 	if (error)
 		CTR1(KTR_IGMPV3, "%s: failed igmp downcall", __func__);
 
 out_in_multi_locked:
 
 	IN_MULTI_UNLOCK();
 
 out_imf_rollback:
 	if (error)
 		imf_rollback(imf);
 	else
 		imf_commit(imf);
 
 	imf_reap(imf);
 
 out_inp_locked:
 	INP_WUNLOCK(inp);
 	return (error);
 }
 
 /*
  * Set the IP multicast options in response to user setsockopt().
  *
  * Many of the socket options handled in this function duplicate the
  * functionality of socket options in the regular unicast API. However,
  * it is not possible to merge the duplicate code, because the idempotence
  * of the IPv4 multicast part of the BSD Sockets API must be preserved;
  * the effects of these options must be treated as separate and distinct.
  *
  * SMPng: XXX: Unlocked read of inp_socket believed OK.
  * FUTURE: The IP_MULTICAST_VIF option may be eliminated if MROUTING
  * is refactored to no longer use vifs.
  */
 int
 inp_setmoptions(struct inpcb *inp, struct sockopt *sopt)
 {
 	struct ip_moptions	*imo;
 	int			 error;
 
 	error = 0;
 
 	/*
 	 * If socket is neither of type SOCK_RAW or SOCK_DGRAM,
 	 * or is a divert socket, reject it.
 	 */
 	if (inp->inp_socket->so_proto->pr_protocol == IPPROTO_DIVERT ||
 	    (inp->inp_socket->so_proto->pr_type != SOCK_RAW &&
 	     inp->inp_socket->so_proto->pr_type != SOCK_DGRAM))
 		return (EOPNOTSUPP);
 
 	switch (sopt->sopt_name) {
 	case IP_MULTICAST_VIF: {
 		int vifi;
 		/*
 		 * Select a multicast VIF for transmission.
 		 * Only useful if multicast forwarding is active.
 		 */
 		if (legal_vif_num == NULL) {
 			error = EOPNOTSUPP;
 			break;
 		}
 		error = sooptcopyin(sopt, &vifi, sizeof(int), sizeof(int));
 		if (error)
 			break;
 		if (!legal_vif_num(vifi) && (vifi != -1)) {
 			error = EINVAL;
 			break;
 		}
 		imo = inp_findmoptions(inp);
 		imo->imo_multicast_vif = vifi;
 		INP_WUNLOCK(inp);
 		break;
 	}
 
 	case IP_MULTICAST_IF:
 		error = inp_set_multicast_if(inp, sopt);
 		break;
 
 	case IP_MULTICAST_TTL: {
 		u_char ttl;
 
 		/*
 		 * Set the IP time-to-live for outgoing multicast packets.
 		 * The original multicast API required a char argument,
 		 * which is inconsistent with the rest of the socket API.
 		 * We allow either a char or an int.
 		 */
 		if (sopt->sopt_valsize == sizeof(u_char)) {
 			error = sooptcopyin(sopt, &ttl, sizeof(u_char),
 			    sizeof(u_char));
 			if (error)
 				break;
 		} else {
 			u_int ittl;
 
 			error = sooptcopyin(sopt, &ittl, sizeof(u_int),
 			    sizeof(u_int));
 			if (error)
 				break;
 			if (ittl > 255) {
 				error = EINVAL;
 				break;
 			}
 			ttl = (u_char)ittl;
 		}
 		imo = inp_findmoptions(inp);
 		imo->imo_multicast_ttl = ttl;
 		INP_WUNLOCK(inp);
 		break;
 	}
 
 	case IP_MULTICAST_LOOP: {
 		u_char loop;
 
 		/*
 		 * Set the loopback flag for outgoing multicast packets.
 		 * Must be zero or one.  The original multicast API required a
 		 * char argument, which is inconsistent with the rest
 		 * of the socket API.  We allow either a char or an int.
 		 */
 		if (sopt->sopt_valsize == sizeof(u_char)) {
 			error = sooptcopyin(sopt, &loop, sizeof(u_char),
 			    sizeof(u_char));
 			if (error)
 				break;
 		} else {
 			u_int iloop;
 
 			error = sooptcopyin(sopt, &iloop, sizeof(u_int),
 					    sizeof(u_int));
 			if (error)
 				break;
 			loop = (u_char)iloop;
 		}
 		imo = inp_findmoptions(inp);
 		imo->imo_multicast_loop = !!loop;
 		INP_WUNLOCK(inp);
 		break;
 	}
 
 	case IP_ADD_MEMBERSHIP:
 	case IP_ADD_SOURCE_MEMBERSHIP:
 	case MCAST_JOIN_GROUP:
 	case MCAST_JOIN_SOURCE_GROUP:
 		error = inp_join_group(inp, sopt);
 		break;
 
 	case IP_DROP_MEMBERSHIP:
 	case IP_DROP_SOURCE_MEMBERSHIP:
 	case MCAST_LEAVE_GROUP:
 	case MCAST_LEAVE_SOURCE_GROUP:
 		error = inp_leave_group(inp, sopt);
 		break;
 
 	case IP_BLOCK_SOURCE:
 	case IP_UNBLOCK_SOURCE:
 	case MCAST_BLOCK_SOURCE:
 	case MCAST_UNBLOCK_SOURCE:
 		error = inp_block_unblock_source(inp, sopt);
 		break;
 
 	case IP_MSFILTER:
 		error = inp_set_source_filters(inp, sopt);
 		break;
 
 	default:
 		error = EOPNOTSUPP;
 		break;
 	}
 
 	INP_UNLOCK_ASSERT(inp);
 
 	return (error);
 }
 
 /*
  * Expose IGMP's multicast filter mode and source list(s) to userland,
  * keyed by (ifindex, group).
  * The filter mode is written out as a uint32_t, followed by
  * 0..n of struct in_addr.
  * For use by ifmcstat(8).
  * SMPng: NOTE: unlocked read of ifindex space.
  */
 static int
 sysctl_ip_mcast_filters(SYSCTL_HANDLER_ARGS)
 {
 	struct in_addr			 src, group;
 	struct epoch_tracker		 et;
 	struct ifnet			*ifp;
 	struct ifmultiaddr		*ifma;
 	struct in_multi			*inm;
 	struct ip_msource		*ims;
 	int				*name;
 	int				 retval;
 	u_int				 namelen;
 	uint32_t			 fmode, ifindex;
 
 	name = (int *)arg1;
 	namelen = arg2;
 
 	if (req->newptr != NULL)
 		return (EPERM);
 
 	if (namelen != 2)
 		return (EINVAL);
 
 	ifindex = name[0];
 	if (ifindex <= 0 || ifindex > V_if_index) {
 		CTR2(KTR_IGMPV3, "%s: ifindex %u out of range",
 		    __func__, ifindex);
 		return (ENOENT);
 	}
 
 	group.s_addr = name[1];
 	if (!IN_MULTICAST(ntohl(group.s_addr))) {
 		CTR2(KTR_IGMPV3, "%s: group 0x%08x is not multicast",
 		    __func__, ntohl(group.s_addr));
 		return (EINVAL);
 	}
 
 	ifp = ifnet_byindex(ifindex);
 	if (ifp == NULL) {
 		CTR2(KTR_IGMPV3, "%s: no ifp for ifindex %u",
 		    __func__, ifindex);
 		return (ENOENT);
 	}
 
 	retval = sysctl_wire_old_buffer(req,
 	    sizeof(uint32_t) + (in_mcast_maxgrpsrc * sizeof(struct in_addr)));
 	if (retval)
 		return (retval);
 
 	IN_MULTI_LIST_LOCK();
 
 	NET_EPOCH_ENTER(et);
 	CK_STAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) {
 		if (ifma->ifma_addr->sa_family != AF_INET ||
 		    ifma->ifma_protospec == NULL)
 			continue;
 		inm = (struct in_multi *)ifma->ifma_protospec;
 		if (!in_hosteq(inm->inm_addr, group))
 			continue;
 		fmode = inm->inm_st[1].iss_fmode;
 		retval = SYSCTL_OUT(req, &fmode, sizeof(uint32_t));
 		if (retval != 0)
 			break;
 		RB_FOREACH(ims, ip_msource_tree, &inm->inm_srcs) {
 			CTR2(KTR_IGMPV3, "%s: visit node 0x%08x", __func__,
 			    ims->ims_haddr);
 			/*
 			 * Only copy-out sources which are in-mode.
 			 */
 			if (fmode != ims_get_mode(inm, ims, 1)) {
 				CTR1(KTR_IGMPV3, "%s: skip non-in-mode",
 				    __func__);
 				continue;
 			}
 			src.s_addr = htonl(ims->ims_haddr);
 			retval = SYSCTL_OUT(req, &src, sizeof(struct in_addr));
 			if (retval != 0)
 				break;
 		}
 	}
 	NET_EPOCH_EXIT(et);
 
 	IN_MULTI_LIST_UNLOCK();
 
 	return (retval);
 }
 
 #if defined(KTR) && (KTR_COMPILE & KTR_IGMPV3)
 
-static const char *inm_modestrs[] = { "un", "in", "ex" };
+static const char *inm_modestrs[] = {
+	[MCAST_UNDEFINED] = "un",
+	[MCAST_INCLUDE] = "in",
+	[MCAST_EXCLUDE] = "ex",
+};
+_Static_assert(MCAST_UNDEFINED == 0 &&
+	       MCAST_EXCLUDE + 1 == nitems(inm_modestrs),
+	       "inm_modestrs: no longer matches #defines");
 
 static const char *
 inm_mode_str(const int mode)
 {
 
 	if (mode >= MCAST_UNDEFINED && mode <= MCAST_EXCLUDE)
 		return (inm_modestrs[mode]);
 	return ("??");
 }
 
 static const char *inm_statestrs[] = {
-	"not-member",
-	"silent",
-	"idle",
-	"lazy",
-	"sleeping",
-	"awakening",
-	"query-pending",
-	"sg-query-pending",
-	"leaving"
+	[IGMP_NOT_MEMBER] = "not-member",
+	[IGMP_SILENT_MEMBER] = "silent",
+	[IGMP_REPORTING_MEMBER] = "reporting",
+	[IGMP_IDLE_MEMBER] = "idle",
+	[IGMP_LAZY_MEMBER] = "lazy",
+	[IGMP_SLEEPING_MEMBER] = "sleeping",
+	[IGMP_AWAKENING_MEMBER] = "awakening",
+	[IGMP_G_QUERY_PENDING_MEMBER] = "query-pending",
+	[IGMP_SG_QUERY_PENDING_MEMBER] = "sg-query-pending",
+	[IGMP_LEAVING_MEMBER] = "leaving",
 };
+_Static_assert(IGMP_NOT_MEMBER == 0 &&
+	       IGMP_LEAVING_MEMBER + 1 == nitems(inm_statestrs),
+	       "inm_statetrs: no longer matches #defines");
 
 static const char *
 inm_state_str(const int state)
 {
 
 	if (state >= IGMP_NOT_MEMBER && state <= IGMP_LEAVING_MEMBER)
 		return (inm_statestrs[state]);
 	return ("??");
 }
 
 /*
  * Dump an in_multi structure to the console.
  */
 void
 inm_print(const struct in_multi *inm)
 {
 	int t;
 	char addrbuf[INET_ADDRSTRLEN];
 
 	if ((ktr_mask & KTR_IGMPV3) == 0)
 		return;
 
 	printf("%s: --- begin inm %p ---\n", __func__, inm);
 	printf("addr %s ifp %p(%s) ifma %p\n",
 	    inet_ntoa_r(inm->inm_addr, addrbuf),
 	    inm->inm_ifp,
 	    inm->inm_ifp->if_xname,
 	    inm->inm_ifma);
 	printf("timer %u state %s refcount %u scq.len %u\n",
 	    inm->inm_timer,
 	    inm_state_str(inm->inm_state),
 	    inm->inm_refcount,
 	    inm->inm_scq.mq_len);
 	printf("igi %p nsrc %lu sctimer %u scrv %u\n",
 	    inm->inm_igi,
 	    inm->inm_nsrc,
 	    inm->inm_sctimer,
 	    inm->inm_scrv);
 	for (t = 0; t < 2; t++) {
 		printf("t%d: fmode %s asm %u ex %u in %u rec %u\n", t,
 		    inm_mode_str(inm->inm_st[t].iss_fmode),
 		    inm->inm_st[t].iss_asm,
 		    inm->inm_st[t].iss_ex,
 		    inm->inm_st[t].iss_in,
 		    inm->inm_st[t].iss_rec);
 	}
 	printf("%s: --- end inm %p ---\n", __func__, inm);
 }
 
 #else /* !KTR || !(KTR_COMPILE & KTR_IGMPV3) */
 
 void
 inm_print(const struct in_multi *inm)
 {
 
 }
 
 #endif /* KTR && (KTR_COMPILE & KTR_IGMPV3) */
 
 RB_GENERATE(ip_msource_tree, ip_msource, ims_link, ip_msource_cmp);
Index: projects/runtime-coverage-v2/sys/netpfil/ipfw/ip_fw2.c
===================================================================
--- projects/runtime-coverage-v2/sys/netpfil/ipfw/ip_fw2.c	(revision 346924)
+++ projects/runtime-coverage-v2/sys/netpfil/ipfw/ip_fw2.c	(revision 346925)
@@ -1,3489 +1,3491 @@
 /*-
  * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
  *
  * Copyright (c) 2002-2009 Luigi Rizzo, Universita` di Pisa
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 /*
  * The FreeBSD IP packet firewall, main file
  */
 
 #include "opt_ipfw.h"
 #include "opt_ipdivert.h"
 #include "opt_inet.h"
 #ifndef INET
 #error "IPFIREWALL requires INET"
 #endif /* INET */
 #include "opt_inet6.h"
 
 #include <sys/param.h>
 #include <sys/systm.h>
 #include <sys/condvar.h>
 #include <sys/counter.h>
 #include <sys/eventhandler.h>
 #include <sys/malloc.h>
 #include <sys/mbuf.h>
 #include <sys/kernel.h>
 #include <sys/lock.h>
 #include <sys/jail.h>
 #include <sys/module.h>
 #include <sys/priv.h>
 #include <sys/proc.h>
 #include <sys/rwlock.h>
 #include <sys/rmlock.h>
 #include <sys/socket.h>
 #include <sys/socketvar.h>
 #include <sys/sysctl.h>
 #include <sys/syslog.h>
 #include <sys/ucred.h>
 #include <net/ethernet.h> /* for ETHERTYPE_IP */
 #include <net/if.h>
 #include <net/if_var.h>
 #include <net/route.h>
 #include <net/pfil.h>
 #include <net/vnet.h>
 
 #include <netpfil/pf/pf_mtag.h>
 
 #include <netinet/in.h>
 #include <netinet/in_var.h>
 #include <netinet/in_pcb.h>
 #include <netinet/ip.h>
 #include <netinet/ip_var.h>
 #include <netinet/ip_icmp.h>
 #include <netinet/ip_fw.h>
 #include <netinet/ip_carp.h>
 #include <netinet/pim.h>
 #include <netinet/tcp_var.h>
 #include <netinet/udp.h>
 #include <netinet/udp_var.h>
 #include <netinet/sctp.h>
 #include <netinet/sctp_crc32.h>
 #include <netinet/sctp_header.h>
 
 #include <netinet/ip6.h>
 #include <netinet/icmp6.h>
 #include <netinet/in_fib.h>
 #ifdef INET6
 #include <netinet6/in6_fib.h>
 #include <netinet6/in6_pcb.h>
 #include <netinet6/scope6_var.h>
 #include <netinet6/ip6_var.h>
 #endif
 
 #include <net/if_gre.h> /* for struct grehdr */
 
 #include <netpfil/ipfw/ip_fw_private.h>
 
 #include <machine/in_cksum.h>	/* XXX for in_cksum */
 
 #ifdef MAC
 #include <security/mac/mac_framework.h>
 #endif
 
 /*
  * static variables followed by global ones.
  * All ipfw global variables are here.
  */
 
 VNET_DEFINE_STATIC(int, fw_deny_unknown_exthdrs);
 #define	V_fw_deny_unknown_exthdrs	VNET(fw_deny_unknown_exthdrs)
 
 VNET_DEFINE_STATIC(int, fw_permit_single_frag6) = 1;
 #define	V_fw_permit_single_frag6	VNET(fw_permit_single_frag6)
 
 #ifdef IPFIREWALL_DEFAULT_TO_ACCEPT
 static int default_to_accept = 1;
 #else
 static int default_to_accept;
 #endif
 
 VNET_DEFINE(int, autoinc_step);
 VNET_DEFINE(int, fw_one_pass) = 1;
 
 VNET_DEFINE(unsigned int, fw_tables_max);
 VNET_DEFINE(unsigned int, fw_tables_sets) = 0;	/* Don't use set-aware tables */
 /* Use 128 tables by default */
 static unsigned int default_fw_tables = IPFW_TABLES_DEFAULT;
 
 #ifndef LINEAR_SKIPTO
 static int jump_fast(struct ip_fw_chain *chain, struct ip_fw *f, int num,
     int tablearg, int jump_backwards);
 #define	JUMP(ch, f, num, targ, back)	jump_fast(ch, f, num, targ, back)
 #else
 static int jump_linear(struct ip_fw_chain *chain, struct ip_fw *f, int num,
     int tablearg, int jump_backwards);
 #define	JUMP(ch, f, num, targ, back)	jump_linear(ch, f, num, targ, back)
 #endif
 
 /*
  * Each rule belongs to one of 32 different sets (0..31).
  * The variable set_disable contains one bit per set.
  * If the bit is set, all rules in the corresponding set
  * are disabled. Set RESVD_SET(31) is reserved for the default rule
  * and rules that are not deleted by the flush command,
  * and CANNOT be disabled.
  * Rules in set RESVD_SET can only be deleted individually.
  */
 VNET_DEFINE(u_int32_t, set_disable);
 #define	V_set_disable			VNET(set_disable)
 
 VNET_DEFINE(int, fw_verbose);
 /* counter for ipfw_log(NULL...) */
 VNET_DEFINE(u_int64_t, norule_counter);
 VNET_DEFINE(int, verbose_limit);
 
 /* layer3_chain contains the list of rules for layer 3 */
 VNET_DEFINE(struct ip_fw_chain, layer3_chain);
 
 /* ipfw_vnet_ready controls when we are open for business */
 VNET_DEFINE(int, ipfw_vnet_ready) = 0;
 
 VNET_DEFINE(int, ipfw_nat_ready) = 0;
 
 ipfw_nat_t *ipfw_nat_ptr = NULL;
 struct cfg_nat *(*lookup_nat_ptr)(struct nat_list *, int);
 ipfw_nat_cfg_t *ipfw_nat_cfg_ptr;
 ipfw_nat_cfg_t *ipfw_nat_del_ptr;
 ipfw_nat_cfg_t *ipfw_nat_get_cfg_ptr;
 ipfw_nat_cfg_t *ipfw_nat_get_log_ptr;
 
 #ifdef SYSCTL_NODE
 uint32_t dummy_def = IPFW_DEFAULT_RULE;
 static int sysctl_ipfw_table_num(SYSCTL_HANDLER_ARGS);
 static int sysctl_ipfw_tables_sets(SYSCTL_HANDLER_ARGS);
 
 SYSBEGIN(f3)
 
 SYSCTL_NODE(_net_inet_ip, OID_AUTO, fw, CTLFLAG_RW, 0, "Firewall");
 SYSCTL_INT(_net_inet_ip_fw, OID_AUTO, one_pass,
     CTLFLAG_VNET | CTLFLAG_RW | CTLFLAG_SECURE3, &VNET_NAME(fw_one_pass), 0,
     "Only do a single pass through ipfw when using dummynet(4)");
 SYSCTL_INT(_net_inet_ip_fw, OID_AUTO, autoinc_step,
     CTLFLAG_VNET | CTLFLAG_RW, &VNET_NAME(autoinc_step), 0,
     "Rule number auto-increment step");
 SYSCTL_INT(_net_inet_ip_fw, OID_AUTO, verbose,
     CTLFLAG_VNET | CTLFLAG_RW | CTLFLAG_SECURE3, &VNET_NAME(fw_verbose), 0,
     "Log matches to ipfw rules");
 SYSCTL_INT(_net_inet_ip_fw, OID_AUTO, verbose_limit,
     CTLFLAG_VNET | CTLFLAG_RW, &VNET_NAME(verbose_limit), 0,
     "Set upper limit of matches of ipfw rules logged");
 SYSCTL_UINT(_net_inet_ip_fw, OID_AUTO, default_rule, CTLFLAG_RD,
     &dummy_def, 0,
     "The default/max possible rule number.");
 SYSCTL_PROC(_net_inet_ip_fw, OID_AUTO, tables_max,
     CTLFLAG_VNET | CTLTYPE_UINT | CTLFLAG_RW, 0, 0, sysctl_ipfw_table_num, "IU",
     "Maximum number of concurrently used tables");
 SYSCTL_PROC(_net_inet_ip_fw, OID_AUTO, tables_sets,
     CTLFLAG_VNET | CTLTYPE_UINT | CTLFLAG_RW,
     0, 0, sysctl_ipfw_tables_sets, "IU",
     "Use per-set namespace for tables");
 SYSCTL_INT(_net_inet_ip_fw, OID_AUTO, default_to_accept, CTLFLAG_RDTUN,
     &default_to_accept, 0,
     "Make the default rule accept all packets.");
 TUNABLE_INT("net.inet.ip.fw.tables_max", (int *)&default_fw_tables);
 SYSCTL_INT(_net_inet_ip_fw, OID_AUTO, static_count,
     CTLFLAG_VNET | CTLFLAG_RD, &VNET_NAME(layer3_chain.n_rules), 0,
     "Number of static rules");
 
 #ifdef INET6
 SYSCTL_DECL(_net_inet6_ip6);
 SYSCTL_NODE(_net_inet6_ip6, OID_AUTO, fw, CTLFLAG_RW, 0, "Firewall");
 SYSCTL_INT(_net_inet6_ip6_fw, OID_AUTO, deny_unknown_exthdrs,
     CTLFLAG_VNET | CTLFLAG_RW | CTLFLAG_SECURE,
     &VNET_NAME(fw_deny_unknown_exthdrs), 0,
     "Deny packets with unknown IPv6 Extension Headers");
 SYSCTL_INT(_net_inet6_ip6_fw, OID_AUTO, permit_single_frag6,
     CTLFLAG_VNET | CTLFLAG_RW | CTLFLAG_SECURE,
     &VNET_NAME(fw_permit_single_frag6), 0,
     "Permit single packet IPv6 fragments");
 #endif /* INET6 */
 
 SYSEND
 
 #endif /* SYSCTL_NODE */
 
 
 /*
  * Some macros used in the various matching options.
  * L3HDR maps an ipv4 pointer into a layer3 header pointer of type T
  * Other macros just cast void * into the appropriate type
  */
 #define	L3HDR(T, ip)	((T *)((u_int32_t *)(ip) + (ip)->ip_hl))
 #define	TCP(p)		((struct tcphdr *)(p))
 #define	SCTP(p)		((struct sctphdr *)(p))
 #define	UDP(p)		((struct udphdr *)(p))
 #define	ICMP(p)		((struct icmphdr *)(p))
 #define	ICMP6(p)	((struct icmp6_hdr *)(p))
 
 static __inline int
 icmptype_match(struct icmphdr *icmp, ipfw_insn_u32 *cmd)
 {
 	int type = icmp->icmp_type;
 
 	return (type <= ICMP_MAXTYPE && (cmd->d[0] & (1<<type)) );
 }
 
 #define TT	( (1 << ICMP_ECHO) | (1 << ICMP_ROUTERSOLICIT) | \
     (1 << ICMP_TSTAMP) | (1 << ICMP_IREQ) | (1 << ICMP_MASKREQ) )
 
 static int
 is_icmp_query(struct icmphdr *icmp)
 {
 	int type = icmp->icmp_type;
 
 	return (type <= ICMP_MAXTYPE && (TT & (1<<type)) );
 }
 #undef TT
 
 /*
  * The following checks use two arrays of 8 or 16 bits to store the
  * bits that we want set or clear, respectively. They are in the
  * low and high half of cmd->arg1 or cmd->d[0].
  *
  * We scan options and store the bits we find set. We succeed if
  *
  *	(want_set & ~bits) == 0 && (want_clear & ~bits) == want_clear
  *
  * The code is sometimes optimized not to store additional variables.
  */
 
 static int
 flags_match(ipfw_insn *cmd, u_int8_t bits)
 {
 	u_char want_clear;
 	bits = ~bits;
 
 	if ( ((cmd->arg1 & 0xff) & bits) != 0)
 		return 0; /* some bits we want set were clear */
 	want_clear = (cmd->arg1 >> 8) & 0xff;
 	if ( (want_clear & bits) != want_clear)
 		return 0; /* some bits we want clear were set */
 	return 1;
 }
 
 static int
 ipopts_match(struct ip *ip, ipfw_insn *cmd)
 {
 	int optlen, bits = 0;
 	u_char *cp = (u_char *)(ip + 1);
 	int x = (ip->ip_hl << 2) - sizeof (struct ip);
 
 	for (; x > 0; x -= optlen, cp += optlen) {
 		int opt = cp[IPOPT_OPTVAL];
 
 		if (opt == IPOPT_EOL)
 			break;
 		if (opt == IPOPT_NOP)
 			optlen = 1;
 		else {
 			optlen = cp[IPOPT_OLEN];
 			if (optlen <= 0 || optlen > x)
 				return 0; /* invalid or truncated */
 		}
 		switch (opt) {
 
 		default:
 			break;
 
 		case IPOPT_LSRR:
 			bits |= IP_FW_IPOPT_LSRR;
 			break;
 
 		case IPOPT_SSRR:
 			bits |= IP_FW_IPOPT_SSRR;
 			break;
 
 		case IPOPT_RR:
 			bits |= IP_FW_IPOPT_RR;
 			break;
 
 		case IPOPT_TS:
 			bits |= IP_FW_IPOPT_TS;
 			break;
 		}
 	}
 	return (flags_match(cmd, bits));
 }
 
 static int
 tcpopts_match(struct tcphdr *tcp, ipfw_insn *cmd)
 {
 	int optlen, bits = 0;
 	u_char *cp = (u_char *)(tcp + 1);
 	int x = (tcp->th_off << 2) - sizeof(struct tcphdr);
 
 	for (; x > 0; x -= optlen, cp += optlen) {
 		int opt = cp[0];
 		if (opt == TCPOPT_EOL)
 			break;
 		if (opt == TCPOPT_NOP)
 			optlen = 1;
 		else {
 			optlen = cp[1];
 			if (optlen <= 0)
 				break;
 		}
 
 		switch (opt) {
 
 		default:
 			break;
 
 		case TCPOPT_MAXSEG:
 			bits |= IP_FW_TCPOPT_MSS;
 			break;
 
 		case TCPOPT_WINDOW:
 			bits |= IP_FW_TCPOPT_WINDOW;
 			break;
 
 		case TCPOPT_SACK_PERMITTED:
 		case TCPOPT_SACK:
 			bits |= IP_FW_TCPOPT_SACK;
 			break;
 
 		case TCPOPT_TIMESTAMP:
 			bits |= IP_FW_TCPOPT_TS;
 			break;
 
 		}
 	}
 	return (flags_match(cmd, bits));
 }
 
 static int
 iface_match(struct ifnet *ifp, ipfw_insn_if *cmd, struct ip_fw_chain *chain,
     uint32_t *tablearg)
 {
 
 	if (ifp == NULL)	/* no iface with this packet, match fails */
 		return (0);
 
 	/* Check by name or by IP address */
 	if (cmd->name[0] != '\0') { /* match by name */
 		if (cmd->name[0] == '\1') /* use tablearg to match */
 			return ipfw_lookup_table(chain, cmd->p.kidx, 0,
 			    &ifp->if_index, tablearg);
 		/* Check name */
 		if (cmd->p.glob) {
 			if (fnmatch(cmd->name, ifp->if_xname, 0) == 0)
 				return(1);
 		} else {
 			if (strncmp(ifp->if_xname, cmd->name, IFNAMSIZ) == 0)
 				return(1);
 		}
 	} else {
 #if !defined(USERSPACE) && defined(__FreeBSD__)	/* and OSX too ? */
 		struct ifaddr *ia;
 
 		if_addr_rlock(ifp);
 		CK_STAILQ_FOREACH(ia, &ifp->if_addrhead, ifa_link) {
 			if (ia->ifa_addr->sa_family != AF_INET)
 				continue;
 			if (cmd->p.ip.s_addr == ((struct sockaddr_in *)
 			    (ia->ifa_addr))->sin_addr.s_addr) {
 				if_addr_runlock(ifp);
 				return(1);	/* match */
 			}
 		}
 		if_addr_runlock(ifp);
 #endif /* __FreeBSD__ */
 	}
 	return(0);	/* no match, fail ... */
 }
 
 /*
  * The verify_path function checks if a route to the src exists and
  * if it is reachable via ifp (when provided).
  * 
  * The 'verrevpath' option checks that the interface that an IP packet
  * arrives on is the same interface that traffic destined for the
  * packet's source address would be routed out of.
  * The 'versrcreach' option just checks that the source address is
  * reachable via any route (except default) in the routing table.
  * These two are a measure to block forged packets. This is also
  * commonly known as "anti-spoofing" or Unicast Reverse Path
  * Forwarding (Unicast RFP) in Cisco-ese. The name of the knobs
  * is purposely reminiscent of the Cisco IOS command,
  *
  *   ip verify unicast reverse-path
  *   ip verify unicast source reachable-via any
  *
  * which implements the same functionality. But note that the syntax
  * is misleading, and the check may be performed on all IP packets
  * whether unicast, multicast, or broadcast.
  */
 static int
 verify_path(struct in_addr src, struct ifnet *ifp, u_int fib)
 {
 #if defined(USERSPACE) || !defined(__FreeBSD__)
 	return 0;
 #else
 	struct nhop4_basic nh4;
 
 	if (fib4_lookup_nh_basic(fib, src, NHR_IFAIF, 0, &nh4) != 0)
 		return (0);
 
 	/*
 	 * If ifp is provided, check for equality with rtentry.
 	 * We should use rt->rt_ifa->ifa_ifp, instead of rt->rt_ifp,
 	 * in order to pass packets injected back by if_simloop():
 	 * routing entry (via lo0) for our own address
 	 * may exist, so we need to handle routing assymetry.
 	 */
 	if (ifp != NULL && ifp != nh4.nh_ifp)
 		return (0);
 
 	/* if no ifp provided, check if rtentry is not default route */
 	if (ifp == NULL && (nh4.nh_flags & NHF_DEFAULT) != 0)
 		return (0);
 
 	/* or if this is a blackhole/reject route */
 	if (ifp == NULL && (nh4.nh_flags & (NHF_REJECT|NHF_BLACKHOLE)) != 0)
 		return (0);
 
 	/* found valid route */
 	return 1;
 #endif /* __FreeBSD__ */
 }
 
 /*
  * Generate an SCTP packet containing an ABORT chunk. The verification tag
  * is given by vtag. The T-bit is set in the ABORT chunk if and only if
  * reflected is not 0.
  */
 
 static struct mbuf *
 ipfw_send_abort(struct mbuf *replyto, struct ipfw_flow_id *id, u_int32_t vtag,
     int reflected)
 {
 	struct mbuf *m;
 	struct ip *ip;
 #ifdef INET6
 	struct ip6_hdr *ip6;
 #endif
 	struct sctphdr *sctp;
 	struct sctp_chunkhdr *chunk;
 	u_int16_t hlen, plen, tlen;
 
 	MGETHDR(m, M_NOWAIT, MT_DATA);
 	if (m == NULL)
 		return (NULL);
 
 	M_SETFIB(m, id->fib);
 #ifdef MAC
 	if (replyto != NULL)
 		mac_netinet_firewall_reply(replyto, m);
 	else
 		mac_netinet_firewall_send(m);
 #else
 	(void)replyto;		/* don't warn about unused arg */
 #endif
 
 	switch (id->addr_type) {
 	case 4:
 		hlen = sizeof(struct ip);
 		break;
 #ifdef INET6
 	case 6:
 		hlen = sizeof(struct ip6_hdr);
 		break;
 #endif
 	default:
 		/* XXX: log me?!? */
 		FREE_PKT(m);
 		return (NULL);
 	}
 	plen = sizeof(struct sctphdr) + sizeof(struct sctp_chunkhdr);
 	tlen = hlen + plen;
 	m->m_data += max_linkhdr;
 	m->m_flags |= M_SKIP_FIREWALL;
 	m->m_pkthdr.len = m->m_len = tlen;
 	m->m_pkthdr.rcvif = NULL;
 	bzero(m->m_data, tlen);
 
 	switch (id->addr_type) {
 	case 4:
 		ip = mtod(m, struct ip *);
 
 		ip->ip_v = 4;
 		ip->ip_hl = sizeof(struct ip) >> 2;
 		ip->ip_tos = IPTOS_LOWDELAY;
 		ip->ip_len = htons(tlen);
 		ip->ip_id = htons(0);
 		ip->ip_off = htons(0);
 		ip->ip_ttl = V_ip_defttl;
 		ip->ip_p = IPPROTO_SCTP;
 		ip->ip_sum = 0;
 		ip->ip_src.s_addr = htonl(id->dst_ip);
 		ip->ip_dst.s_addr = htonl(id->src_ip);
 
 		sctp = (struct sctphdr *)(ip + 1);
 		break;
 #ifdef INET6
 	case 6:
 		ip6 = mtod(m, struct ip6_hdr *);
 
 		ip6->ip6_vfc = IPV6_VERSION;
 		ip6->ip6_plen = htons(plen);
 		ip6->ip6_nxt = IPPROTO_SCTP;
 		ip6->ip6_hlim = IPV6_DEFHLIM;
 		ip6->ip6_src = id->dst_ip6;
 		ip6->ip6_dst = id->src_ip6;
 
 		sctp = (struct sctphdr *)(ip6 + 1);
 		break;
 #endif
 	}
 
 	sctp->src_port = htons(id->dst_port);
 	sctp->dest_port = htons(id->src_port);
 	sctp->v_tag = htonl(vtag);
 	sctp->checksum = htonl(0);
 
 	chunk = (struct sctp_chunkhdr *)(sctp + 1);
 	chunk->chunk_type = SCTP_ABORT_ASSOCIATION;
 	chunk->chunk_flags = 0;
 	if (reflected != 0) {
 		chunk->chunk_flags |= SCTP_HAD_NO_TCB;
 	}
 	chunk->chunk_length = htons(sizeof(struct sctp_chunkhdr));
 
 	sctp->checksum = sctp_calculate_cksum(m, hlen);
 
 	return (m);
 }
 
 /*
  * Generate a TCP packet, containing either a RST or a keepalive.
  * When flags & TH_RST, we are sending a RST packet, because of a
  * "reset" action matched the packet.
  * Otherwise we are sending a keepalive, and flags & TH_
  * The 'replyto' mbuf is the mbuf being replied to, if any, and is required
  * so that MAC can label the reply appropriately.
  */
 struct mbuf *
 ipfw_send_pkt(struct mbuf *replyto, struct ipfw_flow_id *id, u_int32_t seq,
     u_int32_t ack, int flags)
 {
 	struct mbuf *m = NULL;		/* stupid compiler */
 	struct ip *h = NULL;		/* stupid compiler */
 #ifdef INET6
 	struct ip6_hdr *h6 = NULL;
 #endif
 	struct tcphdr *th = NULL;
 	int len, dir;
 
 	MGETHDR(m, M_NOWAIT, MT_DATA);
 	if (m == NULL)
 		return (NULL);
 
 	M_SETFIB(m, id->fib);
 #ifdef MAC
 	if (replyto != NULL)
 		mac_netinet_firewall_reply(replyto, m);
 	else
 		mac_netinet_firewall_send(m);
 #else
 	(void)replyto;		/* don't warn about unused arg */
 #endif
 
 	switch (id->addr_type) {
 	case 4:
 		len = sizeof(struct ip) + sizeof(struct tcphdr);
 		break;
 #ifdef INET6
 	case 6:
 		len = sizeof(struct ip6_hdr) + sizeof(struct tcphdr);
 		break;
 #endif
 	default:
 		/* XXX: log me?!? */
 		FREE_PKT(m);
 		return (NULL);
 	}
 	dir = ((flags & (TH_SYN | TH_RST)) == TH_SYN);
 
 	m->m_data += max_linkhdr;
 	m->m_flags |= M_SKIP_FIREWALL;
 	m->m_pkthdr.len = m->m_len = len;
 	m->m_pkthdr.rcvif = NULL;
 	bzero(m->m_data, len);
 
 	switch (id->addr_type) {
 	case 4:
 		h = mtod(m, struct ip *);
 
 		/* prepare for checksum */
 		h->ip_p = IPPROTO_TCP;
 		h->ip_len = htons(sizeof(struct tcphdr));
 		if (dir) {
 			h->ip_src.s_addr = htonl(id->src_ip);
 			h->ip_dst.s_addr = htonl(id->dst_ip);
 		} else {
 			h->ip_src.s_addr = htonl(id->dst_ip);
 			h->ip_dst.s_addr = htonl(id->src_ip);
 		}
 
 		th = (struct tcphdr *)(h + 1);
 		break;
 #ifdef INET6
 	case 6:
 		h6 = mtod(m, struct ip6_hdr *);
 
 		/* prepare for checksum */
 		h6->ip6_nxt = IPPROTO_TCP;
 		h6->ip6_plen = htons(sizeof(struct tcphdr));
 		if (dir) {
 			h6->ip6_src = id->src_ip6;
 			h6->ip6_dst = id->dst_ip6;
 		} else {
 			h6->ip6_src = id->dst_ip6;
 			h6->ip6_dst = id->src_ip6;
 		}
 
 		th = (struct tcphdr *)(h6 + 1);
 		break;
 #endif
 	}
 
 	if (dir) {
 		th->th_sport = htons(id->src_port);
 		th->th_dport = htons(id->dst_port);
 	} else {
 		th->th_sport = htons(id->dst_port);
 		th->th_dport = htons(id->src_port);
 	}
 	th->th_off = sizeof(struct tcphdr) >> 2;
 
 	if (flags & TH_RST) {
 		if (flags & TH_ACK) {
 			th->th_seq = htonl(ack);
 			th->th_flags = TH_RST;
 		} else {
 			if (flags & TH_SYN)
 				seq++;
 			th->th_ack = htonl(seq);
 			th->th_flags = TH_RST | TH_ACK;
 		}
 	} else {
 		/*
 		 * Keepalive - use caller provided sequence numbers
 		 */
 		th->th_seq = htonl(seq);
 		th->th_ack = htonl(ack);
 		th->th_flags = TH_ACK;
 	}
 
 	switch (id->addr_type) {
 	case 4:
 		th->th_sum = in_cksum(m, len);
 
 		/* finish the ip header */
 		h->ip_v = 4;
 		h->ip_hl = sizeof(*h) >> 2;
 		h->ip_tos = IPTOS_LOWDELAY;
 		h->ip_off = htons(0);
 		h->ip_len = htons(len);
 		h->ip_ttl = V_ip_defttl;
 		h->ip_sum = 0;
 		break;
 #ifdef INET6
 	case 6:
 		th->th_sum = in6_cksum(m, IPPROTO_TCP, sizeof(*h6),
 		    sizeof(struct tcphdr));
 
 		/* finish the ip6 header */
 		h6->ip6_vfc |= IPV6_VERSION;
 		h6->ip6_hlim = IPV6_DEFHLIM;
 		break;
 #endif
 	}
 
 	return (m);
 }
 
 #ifdef INET6
 /*
  * ipv6 specific rules here...
  */
 static __inline int
 icmp6type_match (int type, ipfw_insn_u32 *cmd)
 {
 	return (type <= ICMP6_MAXTYPE && (cmd->d[type/32] & (1<<(type%32)) ) );
 }
 
 static int
 flow6id_match( int curr_flow, ipfw_insn_u32 *cmd )
 {
 	int i;
 	for (i=0; i <= cmd->o.arg1; ++i )
 		if (curr_flow == cmd->d[i] )
 			return 1;
 	return 0;
 }
 
 /* support for IP6_*_ME opcodes */
 static const struct in6_addr lla_mask = {{{
 	0xff, 0xff, 0x00, 0x00, 0xff, 0xff, 0xff, 0xff,
 	0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff
 }}};
 
 static int
 ipfw_localip6(struct in6_addr *in6)
 {
 	struct rm_priotracker in6_ifa_tracker;
 	struct in6_ifaddr *ia;
 
 	if (IN6_IS_ADDR_MULTICAST(in6))
 		return (0);
 
 	if (!IN6_IS_ADDR_LINKLOCAL(in6))
 		return (in6_localip(in6));
 
 	IN6_IFADDR_RLOCK(&in6_ifa_tracker);
 	CK_STAILQ_FOREACH(ia, &V_in6_ifaddrhead, ia_link) {
 		if (!IN6_IS_ADDR_LINKLOCAL(&ia->ia_addr.sin6_addr))
 			continue;
 		if (IN6_ARE_MASKED_ADDR_EQUAL(&ia->ia_addr.sin6_addr,
 		    in6, &lla_mask)) {
 			IN6_IFADDR_RUNLOCK(&in6_ifa_tracker);
 			return (1);
 		}
 	}
 	IN6_IFADDR_RUNLOCK(&in6_ifa_tracker);
 	return (0);
 }
 
 static int
 verify_path6(struct in6_addr *src, struct ifnet *ifp, u_int fib)
 {
 	struct nhop6_basic nh6;
 
 	if (IN6_IS_SCOPE_LINKLOCAL(src))
 		return (1);
 
 	if (fib6_lookup_nh_basic(fib, src, 0, NHR_IFAIF, 0, &nh6) != 0)
 		return (0);
 
 	/* If ifp is provided, check for equality with route table. */
 	if (ifp != NULL && ifp != nh6.nh_ifp)
 		return (0);
 
 	/* if no ifp provided, check if rtentry is not default route */
 	if (ifp == NULL && (nh6.nh_flags & NHF_DEFAULT) != 0)
 		return (0);
 
 	/* or if this is a blackhole/reject route */
 	if (ifp == NULL && (nh6.nh_flags & (NHF_REJECT|NHF_BLACKHOLE)) != 0)
 		return (0);
 
 	/* found valid route */
 	return 1;
 }
 
 static int
 is_icmp6_query(int icmp6_type)
 {
 	if ((icmp6_type <= ICMP6_MAXTYPE) &&
 	    (icmp6_type == ICMP6_ECHO_REQUEST ||
 	    icmp6_type == ICMP6_MEMBERSHIP_QUERY ||
 	    icmp6_type == ICMP6_WRUREQUEST ||
 	    icmp6_type == ICMP6_FQDN_QUERY ||
 	    icmp6_type == ICMP6_NI_QUERY))
 		return (1);
 
 	return (0);
 }
 
 static int
 map_icmp_unreach(int code)
 {
 
 	/* RFC 7915 p4.2 */
 	switch (code) {
 	case ICMP_UNREACH_NET:
 	case ICMP_UNREACH_HOST:
 	case ICMP_UNREACH_SRCFAIL:
 	case ICMP_UNREACH_NET_UNKNOWN:
 	case ICMP_UNREACH_HOST_UNKNOWN:
 	case ICMP_UNREACH_TOSNET:
 	case ICMP_UNREACH_TOSHOST:
 		return (ICMP6_DST_UNREACH_NOROUTE);
 	case ICMP_UNREACH_PORT:
 		return (ICMP6_DST_UNREACH_NOPORT);
 	default:
 		/*
 		 * Map the rest of codes into admit prohibited.
 		 * XXX: unreach proto should be mapped into ICMPv6
 		 * parameter problem, but we use only unreach type.
 		 */
 		return (ICMP6_DST_UNREACH_ADMIN);
 	}
 }
 
 static void
 send_reject6(struct ip_fw_args *args, int code, u_int hlen, struct ip6_hdr *ip6)
 {
 	struct mbuf *m;
 
 	m = args->m;
 	if (code == ICMP6_UNREACH_RST && args->f_id.proto == IPPROTO_TCP) {
 		struct tcphdr *tcp;
 		tcp = (struct tcphdr *)((char *)ip6 + hlen);
 
 		if ((tcp->th_flags & TH_RST) == 0) {
 			struct mbuf *m0;
 			m0 = ipfw_send_pkt(args->m, &(args->f_id),
 			    ntohl(tcp->th_seq), ntohl(tcp->th_ack),
 			    tcp->th_flags | TH_RST);
 			if (m0 != NULL)
 				ip6_output(m0, NULL, NULL, 0, NULL, NULL,
 				    NULL);
 		}
 		FREE_PKT(m);
 	} else if (code == ICMP6_UNREACH_ABORT &&
 	    args->f_id.proto == IPPROTO_SCTP) {
 		struct mbuf *m0;
 		struct sctphdr *sctp;
 		u_int32_t v_tag;
 		int reflected;
 
 		sctp = (struct sctphdr *)((char *)ip6 + hlen);
 		reflected = 1;
 		v_tag = ntohl(sctp->v_tag);
 		/* Investigate the first chunk header if available */
 		if (m->m_len >= hlen + sizeof(struct sctphdr) +
 		    sizeof(struct sctp_chunkhdr)) {
 			struct sctp_chunkhdr *chunk;
 
 			chunk = (struct sctp_chunkhdr *)(sctp + 1);
 			switch (chunk->chunk_type) {
 			case SCTP_INITIATION:
 				/*
 				 * Packets containing an INIT chunk MUST have
 				 * a zero v-tag.
 				 */
 				if (v_tag != 0) {
 					v_tag = 0;
 					break;
 				}
 				/* INIT chunk MUST NOT be bundled */
 				if (m->m_pkthdr.len >
 				    hlen + sizeof(struct sctphdr) +
 				    ntohs(chunk->chunk_length) + 3) {
 					break;
 				}
 				/* Use the initiate tag if available */
 				if ((m->m_len >= hlen + sizeof(struct sctphdr) +
 				    sizeof(struct sctp_chunkhdr) +
 				    offsetof(struct sctp_init, a_rwnd))) {
 					struct sctp_init *init;
 
 					init = (struct sctp_init *)(chunk + 1);
 					v_tag = ntohl(init->initiate_tag);
 					reflected = 0;
 				}
 				break;
 			case SCTP_ABORT_ASSOCIATION:
 				/*
 				 * If the packet contains an ABORT chunk, don't
 				 * reply.
 				 * XXX: We should search through all chunks,
 				 *      but don't do to avoid attacks.
 				 */
 				v_tag = 0;
 				break;
 			}
 		}
 		if (v_tag == 0) {
 			m0 = NULL;
 		} else {
 			m0 = ipfw_send_abort(args->m, &(args->f_id), v_tag,
 			    reflected);
 		}
 		if (m0 != NULL)
 			ip6_output(m0, NULL, NULL, 0, NULL, NULL, NULL);
 		FREE_PKT(m);
 	} else if (code != ICMP6_UNREACH_RST && code != ICMP6_UNREACH_ABORT) {
 		/* Send an ICMPv6 unreach. */
 #if 0
 		/*
 		 * Unlike above, the mbufs need to line up with the ip6 hdr,
 		 * as the contents are read. We need to m_adj() the
 		 * needed amount.
 		 * The mbuf will however be thrown away so we can adjust it.
 		 * Remember we did an m_pullup on it already so we
 		 * can make some assumptions about contiguousness.
 		 */
 		if (args->L3offset)
 			m_adj(m, args->L3offset);
 #endif
 		icmp6_error(m, ICMP6_DST_UNREACH, code, 0);
 	} else
 		FREE_PKT(m);
 
 	args->m = NULL;
 }
 
 #endif /* INET6 */
 
 
 /*
  * sends a reject message, consuming the mbuf passed as an argument.
  */
 static void
 send_reject(struct ip_fw_args *args, int code, int iplen, struct ip *ip)
 {
 
 #if 0
 	/* XXX When ip is not guaranteed to be at mtod() we will
 	 * need to account for this */
 	 * The mbuf will however be thrown away so we can adjust it.
 	 * Remember we did an m_pullup on it already so we
 	 * can make some assumptions about contiguousness.
 	 */
 	if (args->L3offset)
 		m_adj(m, args->L3offset);
 #endif
 	if (code != ICMP_REJECT_RST && code != ICMP_REJECT_ABORT) {
 		/* Send an ICMP unreach */
 		icmp_error(args->m, ICMP_UNREACH, code, 0L, 0);
 	} else if (code == ICMP_REJECT_RST && args->f_id.proto == IPPROTO_TCP) {
 		struct tcphdr *const tcp =
 		    L3HDR(struct tcphdr, mtod(args->m, struct ip *));
 		if ( (tcp->th_flags & TH_RST) == 0) {
 			struct mbuf *m;
 			m = ipfw_send_pkt(args->m, &(args->f_id),
 				ntohl(tcp->th_seq), ntohl(tcp->th_ack),
 				tcp->th_flags | TH_RST);
 			if (m != NULL)
 				ip_output(m, NULL, NULL, 0, NULL, NULL);
 		}
 		FREE_PKT(args->m);
 	} else if (code == ICMP_REJECT_ABORT &&
 	    args->f_id.proto == IPPROTO_SCTP) {
 		struct mbuf *m;
 		struct sctphdr *sctp;
 		struct sctp_chunkhdr *chunk;
 		struct sctp_init *init;
 		u_int32_t v_tag;
 		int reflected;
 
 		sctp = L3HDR(struct sctphdr, mtod(args->m, struct ip *));
 		reflected = 1;
 		v_tag = ntohl(sctp->v_tag);
 		if (iplen >= (ip->ip_hl << 2) + sizeof(struct sctphdr) +
 		    sizeof(struct sctp_chunkhdr)) {
 			/* Look at the first chunk header if available */
 			chunk = (struct sctp_chunkhdr *)(sctp + 1);
 			switch (chunk->chunk_type) {
 			case SCTP_INITIATION:
 				/*
 				 * Packets containing an INIT chunk MUST have
 				 * a zero v-tag.
 				 */
 				if (v_tag != 0) {
 					v_tag = 0;
 					break;
 				}
 				/* INIT chunk MUST NOT be bundled */
 				if (iplen >
 				    (ip->ip_hl << 2) + sizeof(struct sctphdr) +
 				    ntohs(chunk->chunk_length) + 3) {
 					break;
 				}
 				/* Use the initiate tag if available */
 				if ((iplen >= (ip->ip_hl << 2) +
 				    sizeof(struct sctphdr) +
 				    sizeof(struct sctp_chunkhdr) +
 				    offsetof(struct sctp_init, a_rwnd))) {
 					init = (struct sctp_init *)(chunk + 1);
 					v_tag = ntohl(init->initiate_tag);
 					reflected = 0;
 				}
 				break;
 			case SCTP_ABORT_ASSOCIATION:
 				/*
 				 * If the packet contains an ABORT chunk, don't
 				 * reply.
 				 * XXX: We should search through all chunks,
 				 * but don't do to avoid attacks.
 				 */
 				v_tag = 0;
 				break;
 			}
 		}
 		if (v_tag == 0) {
 			m = NULL;
 		} else {
 			m = ipfw_send_abort(args->m, &(args->f_id), v_tag,
 			    reflected);
 		}
 		if (m != NULL)
 			ip_output(m, NULL, NULL, 0, NULL, NULL);
 		FREE_PKT(args->m);
 	} else
 		FREE_PKT(args->m);
 	args->m = NULL;
 }
 
 /*
  * Support for uid/gid/jail lookup. These tests are expensive
  * (because we may need to look into the list of active sockets)
  * so we cache the results. ugid_lookupp is 0 if we have not
  * yet done a lookup, 1 if we succeeded, and -1 if we tried
  * and failed. The function always returns the match value.
  * We could actually spare the variable and use *uc, setting
  * it to '(void *)check_uidgid if we have no info, NULL if
  * we tried and failed, or any other value if successful.
  */
 static int
 check_uidgid(ipfw_insn_u32 *insn, struct ip_fw_args *args, int *ugid_lookupp,
     struct ucred **uc)
 {
 #if defined(USERSPACE)
 	return 0;	// not supported in userspace
 #else
 #ifndef __FreeBSD__
 	/* XXX */
 	return cred_check(insn, proto, oif,
 	    dst_ip, dst_port, src_ip, src_port,
 	    (struct bsd_ucred *)uc, ugid_lookupp, ((struct mbuf *)inp)->m_skb);
 #else  /* FreeBSD */
 	struct in_addr src_ip, dst_ip;
 	struct inpcbinfo *pi;
 	struct ipfw_flow_id *id;
 	struct inpcb *pcb, *inp;
 	int lookupflags;
 	int match;
 
 	id = &args->f_id;
 	inp = args->inp;
 
 	/*
 	 * Check to see if the UDP or TCP stack supplied us with
 	 * the PCB. If so, rather then holding a lock and looking
 	 * up the PCB, we can use the one that was supplied.
 	 */
 	if (inp && *ugid_lookupp == 0) {
 		INP_LOCK_ASSERT(inp);
 		if (inp->inp_socket != NULL) {
 			*uc = crhold(inp->inp_cred);
 			*ugid_lookupp = 1;
 		} else
 			*ugid_lookupp = -1;
 	}
 	/*
 	 * If we have already been here and the packet has no
 	 * PCB entry associated with it, then we can safely
 	 * assume that this is a no match.
 	 */
 	if (*ugid_lookupp == -1)
 		return (0);
 	if (id->proto == IPPROTO_TCP) {
 		lookupflags = 0;
 		pi = &V_tcbinfo;
 	} else if (id->proto == IPPROTO_UDP) {
 		lookupflags = INPLOOKUP_WILDCARD;
 		pi = &V_udbinfo;
 	} else if (id->proto == IPPROTO_UDPLITE) {
 		lookupflags = INPLOOKUP_WILDCARD;
 		pi = &V_ulitecbinfo;
 	} else
 		return 0;
 	lookupflags |= INPLOOKUP_RLOCKPCB;
 	match = 0;
 	if (*ugid_lookupp == 0) {
 		if (id->addr_type == 6) {
 #ifdef INET6
 			if (args->flags & IPFW_ARGS_IN)
 				pcb = in6_pcblookup_mbuf(pi,
 				    &id->src_ip6, htons(id->src_port),
 				    &id->dst_ip6, htons(id->dst_port),
 				    lookupflags, NULL, args->m);
 			else
 				pcb = in6_pcblookup_mbuf(pi,
 				    &id->dst_ip6, htons(id->dst_port),
 				    &id->src_ip6, htons(id->src_port),
 				    lookupflags, args->ifp, args->m);
 #else
 			*ugid_lookupp = -1;
 			return (0);
 #endif
 		} else {
 			src_ip.s_addr = htonl(id->src_ip);
 			dst_ip.s_addr = htonl(id->dst_ip);
 			if (args->flags & IPFW_ARGS_IN)
 				pcb = in_pcblookup_mbuf(pi,
 				    src_ip, htons(id->src_port),
 				    dst_ip, htons(id->dst_port),
 				    lookupflags, NULL, args->m);
 			else
 				pcb = in_pcblookup_mbuf(pi,
 				    dst_ip, htons(id->dst_port),
 				    src_ip, htons(id->src_port),
 				    lookupflags, args->ifp, args->m);
 		}
 		if (pcb != NULL) {
 			INP_RLOCK_ASSERT(pcb);
 			*uc = crhold(pcb->inp_cred);
 			*ugid_lookupp = 1;
 			INP_RUNLOCK(pcb);
 		}
 		if (*ugid_lookupp == 0) {
 			/*
 			 * We tried and failed, set the variable to -1
 			 * so we will not try again on this packet.
 			 */
 			*ugid_lookupp = -1;
 			return (0);
 		}
 	}
 	if (insn->o.opcode == O_UID)
 		match = ((*uc)->cr_uid == (uid_t)insn->d[0]);
 	else if (insn->o.opcode == O_GID)
 		match = groupmember((gid_t)insn->d[0], *uc);
 	else if (insn->o.opcode == O_JAIL)
 		match = ((*uc)->cr_prison->pr_id == (int)insn->d[0]);
 	return (match);
 #endif /* __FreeBSD__ */
 #endif /* not supported in userspace */
 }
 
 /*
  * Helper function to set args with info on the rule after the matching
  * one. slot is precise, whereas we guess rule_id as they are
  * assigned sequentially.
  */
 static inline void
 set_match(struct ip_fw_args *args, int slot,
 	struct ip_fw_chain *chain)
 {
 	args->rule.chain_id = chain->id;
 	args->rule.slot = slot + 1; /* we use 0 as a marker */
 	args->rule.rule_id = 1 + chain->map[slot]->id;
 	args->rule.rulenum = chain->map[slot]->rulenum;
 	args->flags |= IPFW_ARGS_REF;
 }
 
 #ifndef LINEAR_SKIPTO
 /*
  * Helper function to enable cached rule lookups using
  * cached_id and cached_pos fields in ipfw rule.
  */
 static int
 jump_fast(struct ip_fw_chain *chain, struct ip_fw *f, int num,
     int tablearg, int jump_backwards)
 {
 	int f_pos;
 
 	/* If possible use cached f_pos (in f->cached_pos),
 	 * whose version is written in f->cached_id
 	 * (horrible hacks to avoid changing the ABI).
 	 */
 	if (num != IP_FW_TARG && f->cached_id == chain->id)
 		f_pos = f->cached_pos;
 	else {
 		int i = IP_FW_ARG_TABLEARG(chain, num, skipto);
 		/* make sure we do not jump backward */
 		if (jump_backwards == 0 && i <= f->rulenum)
 			i = f->rulenum + 1;
 		if (chain->idxmap != NULL)
 			f_pos = chain->idxmap[i];
 		else
 			f_pos = ipfw_find_rule(chain, i, 0);
 		/* update the cache */
 		if (num != IP_FW_TARG) {
 			f->cached_id = chain->id;
 			f->cached_pos = f_pos;
 		}
 	}
 
 	return (f_pos);
 }
 #else
 /*
  * Helper function to enable real fast rule lookups.
  */
 static int
 jump_linear(struct ip_fw_chain *chain, struct ip_fw *f, int num,
     int tablearg, int jump_backwards)
 {
 	int f_pos;
 
 	num = IP_FW_ARG_TABLEARG(chain, num, skipto);
 	/* make sure we do not jump backward */
 	if (jump_backwards == 0 && num <= f->rulenum)
 		num = f->rulenum + 1;
 	f_pos = chain->idxmap[num];
 
 	return (f_pos);
 }
 #endif
 
 #define	TARG(k, f)	IP_FW_ARG_TABLEARG(chain, k, f)
 /*
  * The main check routine for the firewall.
  *
  * All arguments are in args so we can modify them and return them
  * back to the caller.
  *
  * Parameters:
  *
  *	args->m	(in/out) The packet; we set to NULL when/if we nuke it.
  *		Starts with the IP header.
  *	args->L3offset	Number of bytes bypassed if we came from L2.
  *			e.g. often sizeof(eh)  ** NOTYET **
  *	args->ifp	Incoming or outgoing interface.
  *	args->divert_rule (in/out)
  *		Skip up to the first rule past this rule number;
  *		upon return, non-zero port number for divert or tee.
  *
  *	args->rule	Pointer to the last matching rule (in/out)
  *	args->next_hop	Socket we are forwarding to (out).
  *	args->next_hop6	IPv6 next hop we are forwarding to (out).
  *	args->f_id	Addresses grabbed from the packet (out)
  * 	args->rule.info	a cookie depending on rule action
  *
  * Return value:
  *
  *	IP_FW_PASS	the packet must be accepted
  *	IP_FW_DENY	the packet must be dropped
  *	IP_FW_DIVERT	divert packet, port in m_tag
  *	IP_FW_TEE	tee packet, port in m_tag
  *	IP_FW_DUMMYNET	to dummynet, pipe in args->cookie
  *	IP_FW_NETGRAPH	into netgraph, cookie args->cookie
  *		args->rule contains the matching rule,
  *		args->rule.info has additional information.
  *
  */
 int
 ipfw_chk(struct ip_fw_args *args)
 {
 
 	/*
 	 * Local variables holding state while processing a packet:
 	 *
 	 * IMPORTANT NOTE: to speed up the processing of rules, there
 	 * are some assumption on the values of the variables, which
 	 * are documented here. Should you change them, please check
 	 * the implementation of the various instructions to make sure
 	 * that they still work.
 	 *
 	 * m | args->m	Pointer to the mbuf, as received from the caller.
 	 *	It may change if ipfw_chk() does an m_pullup, or if it
 	 *	consumes the packet because it calls send_reject().
 	 *	XXX This has to change, so that ipfw_chk() never modifies
 	 *	or consumes the buffer.
 	 *	OR
 	 * args->mem	Pointer to contigous memory chunk.
 	 * ip	Is the beginning of the ip(4 or 6) header.
 	 * eh	Ethernet header in case if input is Layer2.
 	 */
 	struct mbuf *m;
 	struct ip *ip;
 	struct ether_header *eh;
 
 	/*
 	 * For rules which contain uid/gid or jail constraints, cache
 	 * a copy of the users credentials after the pcb lookup has been
 	 * executed. This will speed up the processing of rules with
 	 * these types of constraints, as well as decrease contention
 	 * on pcb related locks.
 	 */
 #ifndef __FreeBSD__
 	struct bsd_ucred ucred_cache;
 #else
 	struct ucred *ucred_cache = NULL;
 #endif
 	int ucred_lookup = 0;
 	int f_pos = 0;		/* index of current rule in the array */
 	int retval = 0;
 	struct ifnet *oif, *iif;
 
 	/*
 	 * hlen	The length of the IP header.
 	 */
 	u_int hlen = 0;		/* hlen >0 means we have an IP pkt */
 
 	/*
 	 * offset	The offset of a fragment. offset != 0 means that
 	 *	we have a fragment at this offset of an IPv4 packet.
 	 *	offset == 0 means that (if this is an IPv4 packet)
 	 *	this is the first or only fragment.
 	 *	For IPv6 offset|ip6f_mf == 0 means there is no Fragment Header
 	 *	or there is a single packet fragment (fragment header added
 	 *	without needed).  We will treat a single packet fragment as if
 	 *	there was no fragment header (or log/block depending on the
 	 *	V_fw_permit_single_frag6 sysctl setting).
 	 */
 	u_short offset = 0;
 	u_short ip6f_mf = 0;
 
 	/*
 	 * Local copies of addresses. They are only valid if we have
 	 * an IP packet.
 	 *
 	 * proto	The protocol. Set to 0 for non-ip packets,
 	 *	or to the protocol read from the packet otherwise.
 	 *	proto != 0 means that we have an IPv4 packet.
 	 *
 	 * src_port, dst_port	port numbers, in HOST format. Only
 	 *	valid for TCP and UDP packets.
 	 *
 	 * src_ip, dst_ip	ip addresses, in NETWORK format.
 	 *	Only valid for IPv4 packets.
 	 */
 	uint8_t proto;
 	uint16_t src_port, dst_port;		/* NOTE: host format	*/
 	struct in_addr src_ip, dst_ip;		/* NOTE: network format	*/
 	int iplen = 0;
 	int pktlen;
 
 	struct ipfw_dyn_info dyn_info;
 	struct ip_fw *q = NULL;
 	struct ip_fw_chain *chain = &V_layer3_chain;
 
 	/*
 	 * We store in ulp a pointer to the upper layer protocol header.
 	 * In the ipv4 case this is easy to determine from the header,
 	 * but for ipv6 we might have some additional headers in the middle.
 	 * ulp is NULL if not found.
 	 */
 	void *ulp = NULL;		/* upper layer protocol pointer. */
 
 	/* XXX ipv6 variables */
 	int is_ipv6 = 0;
 	uint8_t	icmp6_type = 0;
 	uint16_t ext_hd = 0;	/* bits vector for extension header filtering */
 	/* end of ipv6 variables */
 
 	int is_ipv4 = 0;
 
 	int done = 0;		/* flag to exit the outer loop */
 	IPFW_RLOCK_TRACKER;
 	bool mem;
 
 	if ((mem = (args->flags & IPFW_ARGS_LENMASK))) {
 		if (args->flags & IPFW_ARGS_ETHER) {
 			eh = (struct ether_header *)args->mem;
 			if (eh->ether_type == htons(ETHERTYPE_VLAN))
 				ip = (struct ip *)
 				    ((struct ether_vlan_header *)eh + 1);
 			else
 				ip = (struct ip *)(eh + 1);
 		} else {
 			eh = NULL;
 			ip = (struct ip *)args->mem;
 		}
 		pktlen = IPFW_ARGS_LENGTH(args->flags);
 		args->f_id.fib = args->ifp->if_fib;	/* best guess */
 	} else {
 		m = args->m;
 		if (m->m_flags & M_SKIP_FIREWALL || (! V_ipfw_vnet_ready))
 			return (IP_FW_PASS);	/* accept */
 		if (args->flags & IPFW_ARGS_ETHER) {
 	                /* We need some amount of data to be contiguous. */
 			if (m->m_len < min(m->m_pkthdr.len, max_protohdr) &&
 			    (args->m = m = m_pullup(m, min(m->m_pkthdr.len,
 			    max_protohdr))) == NULL)
 				goto pullup_failed;
 			eh = mtod(m, struct ether_header *);
 			ip = (struct ip *)(eh + 1);
 		} else {
 			eh = NULL;
 			ip = mtod(m, struct ip *);
 		}
 		pktlen = m->m_pkthdr.len;
 		args->f_id.fib = M_GETFIB(m); /* mbuf not altered */
 	}
 
 	dst_ip.s_addr = 0;		/* make sure it is initialized */
 	src_ip.s_addr = 0;		/* make sure it is initialized */
 	src_port = dst_port = 0;
 
 	DYN_INFO_INIT(&dyn_info);
 /*
  * PULLUP_TO(len, p, T) makes sure that len + sizeof(T) is contiguous,
  * then it sets p to point at the offset "len" in the mbuf. WARNING: the
  * pointer might become stale after other pullups (but we never use it
  * this way).
  */
 #define PULLUP_TO(_len, p, T)	PULLUP_LEN(_len, p, sizeof(T))
 #define	EHLEN	(eh != NULL ? ((char *)ip - (char *)eh) : 0)
 #define PULLUP_LEN(_len, p, T)					\
 do {								\
 	int x = (_len) + T + EHLEN;				\
 	if (mem) {						\
 		MPASS(pktlen >= x);				\
 		p = (char *)args->mem + (_len) + EHLEN;		\
 	} else {						\
 		if (__predict_false((m)->m_len < x)) {		\
 			args->m = m = m_pullup(m, x);		\
 			if (m == NULL)				\
 				goto pullup_failed;		\
 		}						\
 		p = mtod(m, char *) + (_len) + EHLEN;		\
 	}							\
 } while (0)
 /*
  * In case pointers got stale after pullups, update them.
  */
 #define	UPDATE_POINTERS()					\
 do {								\
 	if (!mem) {						\
 		if (eh != NULL) {				\
 			eh = mtod(m, struct ether_header *);	\
 			ip = (struct ip *)(eh + 1);		\
 		} else						\
 			ip = mtod(m, struct ip *);		\
 		args->m = m;					\
 	}							\
 } while (0)
 
 	/* Identify IP packets and fill up variables. */
 	if (pktlen >= sizeof(struct ip6_hdr) &&
 	    (eh == NULL || eh->ether_type == htons(ETHERTYPE_IPV6)) &&
 	    ip->ip_v == 6) {
 		struct ip6_hdr *ip6 = (struct ip6_hdr *)ip;
 
 		is_ipv6 = 1;
 		args->flags |= IPFW_ARGS_IP6;
 		hlen = sizeof(struct ip6_hdr);
 		proto = ip6->ip6_nxt;
 		/* Search extension headers to find upper layer protocols */
 		while (ulp == NULL && offset == 0) {
 			switch (proto) {
 			case IPPROTO_ICMPV6:
 				PULLUP_TO(hlen, ulp, struct icmp6_hdr);
 				icmp6_type = ICMP6(ulp)->icmp6_type;
 				break;
 
 			case IPPROTO_TCP:
 				PULLUP_TO(hlen, ulp, struct tcphdr);
 				dst_port = TCP(ulp)->th_dport;
 				src_port = TCP(ulp)->th_sport;
 				/* save flags for dynamic rules */
 				args->f_id._flags = TCP(ulp)->th_flags;
 				break;
 
 			case IPPROTO_SCTP:
 				if (pktlen >= hlen + sizeof(struct sctphdr) +
 				    sizeof(struct sctp_chunkhdr) +
 				    offsetof(struct sctp_init, a_rwnd))
 					PULLUP_LEN(hlen, ulp,
 					    sizeof(struct sctphdr) +
 					    sizeof(struct sctp_chunkhdr) +
 					    offsetof(struct sctp_init, a_rwnd));
 				else if (pktlen >= hlen + sizeof(struct sctphdr))
 					PULLUP_LEN(hlen, ulp, pktlen - hlen);
 				else
 					PULLUP_LEN(hlen, ulp,
 					    sizeof(struct sctphdr));
 				src_port = SCTP(ulp)->src_port;
 				dst_port = SCTP(ulp)->dest_port;
 				break;
 
 			case IPPROTO_UDP:
 			case IPPROTO_UDPLITE:
 				PULLUP_TO(hlen, ulp, struct udphdr);
 				dst_port = UDP(ulp)->uh_dport;
 				src_port = UDP(ulp)->uh_sport;
 				break;
 
 			case IPPROTO_HOPOPTS:	/* RFC 2460 */
 				PULLUP_TO(hlen, ulp, struct ip6_hbh);
 				ext_hd |= EXT_HOPOPTS;
 				hlen += (((struct ip6_hbh *)ulp)->ip6h_len + 1) << 3;
 				proto = ((struct ip6_hbh *)ulp)->ip6h_nxt;
 				ulp = NULL;
 				break;
 
 			case IPPROTO_ROUTING:	/* RFC 2460 */
 				PULLUP_TO(hlen, ulp, struct ip6_rthdr);
 				switch (((struct ip6_rthdr *)ulp)->ip6r_type) {
 				case 0:
 					ext_hd |= EXT_RTHDR0;
 					break;
 				case 2:
 					ext_hd |= EXT_RTHDR2;
 					break;
 				default:
 					if (V_fw_verbose)
 						printf("IPFW2: IPV6 - Unknown "
 						    "Routing Header type(%d)\n",
 						    ((struct ip6_rthdr *)
 						    ulp)->ip6r_type);
 					if (V_fw_deny_unknown_exthdrs)
 					    return (IP_FW_DENY);
 					break;
 				}
 				ext_hd |= EXT_ROUTING;
 				hlen += (((struct ip6_rthdr *)ulp)->ip6r_len + 1) << 3;
 				proto = ((struct ip6_rthdr *)ulp)->ip6r_nxt;
 				ulp = NULL;
 				break;
 
 			case IPPROTO_FRAGMENT:	/* RFC 2460 */
 				PULLUP_TO(hlen, ulp, struct ip6_frag);
 				ext_hd |= EXT_FRAGMENT;
 				hlen += sizeof (struct ip6_frag);
 				proto = ((struct ip6_frag *)ulp)->ip6f_nxt;
 				offset = ((struct ip6_frag *)ulp)->ip6f_offlg &
 					IP6F_OFF_MASK;
 				ip6f_mf = ((struct ip6_frag *)ulp)->ip6f_offlg &
 					IP6F_MORE_FRAG;
 				if (V_fw_permit_single_frag6 == 0 &&
 				    offset == 0 && ip6f_mf == 0) {
 					if (V_fw_verbose)
 						printf("IPFW2: IPV6 - Invalid "
 						    "Fragment Header\n");
 					if (V_fw_deny_unknown_exthdrs)
 					    return (IP_FW_DENY);
 					break;
 				}
 				args->f_id.extra =
 				    ntohl(((struct ip6_frag *)ulp)->ip6f_ident);
 				ulp = NULL;
 				break;
 
 			case IPPROTO_DSTOPTS:	/* RFC 2460 */
 				PULLUP_TO(hlen, ulp, struct ip6_hbh);
 				ext_hd |= EXT_DSTOPTS;
 				hlen += (((struct ip6_hbh *)ulp)->ip6h_len + 1) << 3;
 				proto = ((struct ip6_hbh *)ulp)->ip6h_nxt;
 				ulp = NULL;
 				break;
 
 			case IPPROTO_AH:	/* RFC 2402 */
 				PULLUP_TO(hlen, ulp, struct ip6_ext);
 				ext_hd |= EXT_AH;
 				hlen += (((struct ip6_ext *)ulp)->ip6e_len + 2) << 2;
 				proto = ((struct ip6_ext *)ulp)->ip6e_nxt;
 				ulp = NULL;
 				break;
 
 			case IPPROTO_ESP:	/* RFC 2406 */
 				PULLUP_TO(hlen, ulp, uint32_t);	/* SPI, Seq# */
 				/* Anything past Seq# is variable length and
 				 * data past this ext. header is encrypted. */
 				ext_hd |= EXT_ESP;
 				break;
 
 			case IPPROTO_NONE:	/* RFC 2460 */
 				/*
 				 * Packet ends here, and IPv6 header has
 				 * already been pulled up. If ip6e_len!=0
 				 * then octets must be ignored.
 				 */
 				ulp = ip; /* non-NULL to get out of loop. */
 				break;
 
 			case IPPROTO_OSPFIGP:
 				/* XXX OSPF header check? */
 				PULLUP_TO(hlen, ulp, struct ip6_ext);
 				break;
 
 			case IPPROTO_PIM:
 				/* XXX PIM header check? */
 				PULLUP_TO(hlen, ulp, struct pim);
 				break;
 
 			case IPPROTO_GRE:	/* RFC 1701 */
 				/* XXX GRE header check? */
 				PULLUP_TO(hlen, ulp, struct grehdr);
 				break;
 
 			case IPPROTO_CARP:
 				PULLUP_TO(hlen, ulp, offsetof(
 				    struct carp_header, carp_counter));
 				if (CARP_ADVERTISEMENT !=
 				    ((struct carp_header *)ulp)->carp_type)
 					return (IP_FW_DENY);
 				break;
 
 			case IPPROTO_IPV6:	/* RFC 2893 */
 				PULLUP_TO(hlen, ulp, struct ip6_hdr);
 				break;
 
 			case IPPROTO_IPV4:	/* RFC 2893 */
 				PULLUP_TO(hlen, ulp, struct ip);
 				break;
 
 			default:
 				if (V_fw_verbose)
 					printf("IPFW2: IPV6 - Unknown "
 					    "Extension Header(%d), ext_hd=%x\n",
 					     proto, ext_hd);
 				if (V_fw_deny_unknown_exthdrs)
 				    return (IP_FW_DENY);
 				PULLUP_TO(hlen, ulp, struct ip6_ext);
 				break;
 			} /*switch */
 		}
 		UPDATE_POINTERS();
 		ip6 = (struct ip6_hdr *)ip;
 		args->f_id.addr_type = 6;
 		args->f_id.src_ip6 = ip6->ip6_src;
 		args->f_id.dst_ip6 = ip6->ip6_dst;
 		args->f_id.flow_id6 = ntohl(ip6->ip6_flow);
 		iplen = ntohs(ip6->ip6_plen) + sizeof(*ip6);
 	} else if (pktlen >= sizeof(struct ip) &&
 	    (eh == NULL || eh->ether_type == htons(ETHERTYPE_IP)) &&
 	    ip->ip_v == 4) {
 		is_ipv4 = 1;
 		args->flags |= IPFW_ARGS_IP4;
 		hlen = ip->ip_hl << 2;
 		/*
 		 * Collect parameters into local variables for faster
 		 * matching.
 		 */
 		proto = ip->ip_p;
 		src_ip = ip->ip_src;
 		dst_ip = ip->ip_dst;
 		offset = ntohs(ip->ip_off) & IP_OFFMASK;
 		iplen = ntohs(ip->ip_len);
 
 		if (offset == 0) {
 			switch (proto) {
 			case IPPROTO_TCP:
 				PULLUP_TO(hlen, ulp, struct tcphdr);
 				dst_port = TCP(ulp)->th_dport;
 				src_port = TCP(ulp)->th_sport;
 				/* save flags for dynamic rules */
 				args->f_id._flags = TCP(ulp)->th_flags;
 				break;
 
 			case IPPROTO_SCTP:
 				if (pktlen >= hlen + sizeof(struct sctphdr) +
 				    sizeof(struct sctp_chunkhdr) +
 				    offsetof(struct sctp_init, a_rwnd))
 					PULLUP_LEN(hlen, ulp,
 					    sizeof(struct sctphdr) +
 					    sizeof(struct sctp_chunkhdr) +
 					    offsetof(struct sctp_init, a_rwnd));
 				else if (pktlen >= hlen + sizeof(struct sctphdr))
 					PULLUP_LEN(hlen, ulp, pktlen - hlen);
 				else
 					PULLUP_LEN(hlen, ulp,
 					    sizeof(struct sctphdr));
 				src_port = SCTP(ulp)->src_port;
 				dst_port = SCTP(ulp)->dest_port;
 				break;
 
 			case IPPROTO_UDP:
 			case IPPROTO_UDPLITE:
 				PULLUP_TO(hlen, ulp, struct udphdr);
 				dst_port = UDP(ulp)->uh_dport;
 				src_port = UDP(ulp)->uh_sport;
 				break;
 
 			case IPPROTO_ICMP:
 				PULLUP_TO(hlen, ulp, struct icmphdr);
 				//args->f_id.flags = ICMP(ulp)->icmp_type;
 				break;
 
 			default:
 				break;
 			}
 		}
 
 		UPDATE_POINTERS();
 		args->f_id.addr_type = 4;
 		args->f_id.src_ip = ntohl(src_ip.s_addr);
 		args->f_id.dst_ip = ntohl(dst_ip.s_addr);
 	} else {
 		proto = 0;
 		dst_ip.s_addr = src_ip.s_addr = 0;
 
 		args->f_id.addr_type = 1; /* XXX */
 	}
 #undef PULLUP_TO
 	pktlen = iplen < pktlen ? iplen: pktlen;
 
 	/* Properly initialize the rest of f_id */
 	args->f_id.proto = proto;
 	args->f_id.src_port = src_port = ntohs(src_port);
 	args->f_id.dst_port = dst_port = ntohs(dst_port);
 
 	IPFW_PF_RLOCK(chain);
 	if (! V_ipfw_vnet_ready) { /* shutting down, leave NOW. */
 		IPFW_PF_RUNLOCK(chain);
 		return (IP_FW_PASS);	/* accept */
 	}
 	if (args->flags & IPFW_ARGS_REF) {
 		/*
 		 * Packet has already been tagged as a result of a previous
 		 * match on rule args->rule aka args->rule_id (PIPE, QUEUE,
 		 * REASS, NETGRAPH, DIVERT/TEE...)
 		 * Validate the slot and continue from the next one
 		 * if still present, otherwise do a lookup.
 		 */
 		f_pos = (args->rule.chain_id == chain->id) ?
 		    args->rule.slot :
 		    ipfw_find_rule(chain, args->rule.rulenum,
 			args->rule.rule_id);
 	} else {
 		f_pos = 0;
 	}
 
 	if (args->flags & IPFW_ARGS_IN) {
 		iif = args->ifp;
 		oif = NULL;
 	} else {
 		MPASS(args->flags & IPFW_ARGS_OUT);
 		iif = mem ? NULL : m->m_pkthdr.rcvif;
 		oif = args->ifp;
 	}
 
 	/*
 	 * Now scan the rules, and parse microinstructions for each rule.
 	 * We have two nested loops and an inner switch. Sometimes we
 	 * need to break out of one or both loops, or re-enter one of
 	 * the loops with updated variables. Loop variables are:
 	 *
 	 *	f_pos (outer loop) points to the current rule.
 	 *		On output it points to the matching rule.
 	 *	done (outer loop) is used as a flag to break the loop.
 	 *	l (inner loop)	residual length of current rule.
 	 *		cmd points to the current microinstruction.
 	 *
 	 * We break the inner loop by setting l=0 and possibly
 	 * cmdlen=0 if we don't want to advance cmd.
 	 * We break the outer loop by setting done=1
 	 * We can restart the inner loop by setting l>0 and f_pos, f, cmd
 	 * as needed.
 	 */
 	for (; f_pos < chain->n_rules; f_pos++) {
 		ipfw_insn *cmd;
 		uint32_t tablearg = 0;
 		int l, cmdlen, skip_or; /* skip rest of OR block */
 		struct ip_fw *f;
 
 		f = chain->map[f_pos];
 		if (V_set_disable & (1 << f->set) )
 			continue;
 
 		skip_or = 0;
 		for (l = f->cmd_len, cmd = f->cmd ; l > 0 ;
 		    l -= cmdlen, cmd += cmdlen) {
 			int match;
 
 			/*
 			 * check_body is a jump target used when we find a
 			 * CHECK_STATE, and need to jump to the body of
 			 * the target rule.
 			 */
 
 /* check_body: */
 			cmdlen = F_LEN(cmd);
 			/*
 			 * An OR block (insn_1 || .. || insn_n) has the
 			 * F_OR bit set in all but the last instruction.
 			 * The first match will set "skip_or", and cause
 			 * the following instructions to be skipped until
 			 * past the one with the F_OR bit clear.
 			 */
 			if (skip_or) {		/* skip this instruction */
 				if ((cmd->len & F_OR) == 0)
 					skip_or = 0;	/* next one is good */
 				continue;
 			}
 			match = 0; /* set to 1 if we succeed */
 
 			switch (cmd->opcode) {
 			/*
 			 * The first set of opcodes compares the packet's
 			 * fields with some pattern, setting 'match' if a
 			 * match is found. At the end of the loop there is
 			 * logic to deal with F_NOT and F_OR flags associated
 			 * with the opcode.
 			 */
 			case O_NOP:
 				match = 1;
 				break;
 
 			case O_FORWARD_MAC:
 				printf("ipfw: opcode %d unimplemented\n",
 				    cmd->opcode);
 				break;
 
 			case O_GID:
 			case O_UID:
 			case O_JAIL:
 				/*
 				 * We only check offset == 0 && proto != 0,
 				 * as this ensures that we have a
 				 * packet with the ports info.
 				 */
 				if (offset != 0)
 					break;
 				if (proto == IPPROTO_TCP ||
 				    proto == IPPROTO_UDP ||
 				    proto == IPPROTO_UDPLITE)
 					match = check_uidgid(
 						    (ipfw_insn_u32 *)cmd,
 						    args, &ucred_lookup,
 #ifdef __FreeBSD__
 						    &ucred_cache);
 #else
 						    (void *)&ucred_cache);
 #endif
 				break;
 
 			case O_RECV:
 				match = iface_match(iif, (ipfw_insn_if *)cmd,
 				    chain, &tablearg);
 				break;
 
 			case O_XMIT:
 				match = iface_match(oif, (ipfw_insn_if *)cmd,
 				    chain, &tablearg);
 				break;
 
 			case O_VIA:
 				match = iface_match(args->ifp,
 				    (ipfw_insn_if *)cmd, chain, &tablearg);
 				break;
 
 			case O_MACADDR2:
 				if (args->flags & IPFW_ARGS_ETHER) {
 					u_int32_t *want = (u_int32_t *)
 						((ipfw_insn_mac *)cmd)->addr;
 					u_int32_t *mask = (u_int32_t *)
 						((ipfw_insn_mac *)cmd)->mask;
 					u_int32_t *hdr = (u_int32_t *)eh;
 
 					match =
 					    ( want[0] == (hdr[0] & mask[0]) &&
 					      want[1] == (hdr[1] & mask[1]) &&
 					      want[2] == (hdr[2] & mask[2]) );
 				}
 				break;
 
 			case O_MAC_TYPE:
 				if (args->flags & IPFW_ARGS_ETHER) {
 					u_int16_t *p =
 					    ((ipfw_insn_u16 *)cmd)->ports;
 					int i;
 
 					for (i = cmdlen - 1; !match && i>0;
 					    i--, p += 2)
 						match =
 						    (ntohs(eh->ether_type) >=
 						    p[0] &&
 						    ntohs(eh->ether_type) <=
 						    p[1]);
 				}
 				break;
 
 			case O_FRAG:
 				match = (offset != 0);
 				break;
 
 			case O_IN:	/* "out" is "not in" */
 				match = (oif == NULL);
 				break;
 
 			case O_LAYER2:
 				match = (args->flags & IPFW_ARGS_ETHER);
 				break;
 
 			case O_DIVERTED:
 				if ((args->flags & IPFW_ARGS_REF) == 0)
 					break;
 				/*
 				 * For diverted packets, args->rule.info
 				 * contains the divert port (in host format)
 				 * reason and direction.
 				 */
 				match = ((args->rule.info & IPFW_IS_MASK) ==
 				    IPFW_IS_DIVERT) && (
 				    ((args->rule.info & IPFW_INFO_IN) ?
 					1: 2) & cmd->arg1);
 				break;
 
 			case O_PROTO:
 				/*
 				 * We do not allow an arg of 0 so the
 				 * check of "proto" only suffices.
 				 */
 				match = (proto == cmd->arg1);
 				break;
 
 			case O_IP_SRC:
 				match = is_ipv4 &&
 				    (((ipfw_insn_ip *)cmd)->addr.s_addr ==
 				    src_ip.s_addr);
 				break;
 
 			case O_IP_DST_LOOKUP:
 			{
 				void *pkey;
 				uint32_t vidx, key;
 				uint16_t keylen;
 
 				if (cmdlen > F_INSN_SIZE(ipfw_insn_u32)) {
 					/* Determine lookup key type */
 					vidx = ((ipfw_insn_u32 *)cmd)->d[1];
 					if (vidx != 4 /* uid */ &&
 					    vidx != 5 /* jail */ &&
 					    is_ipv6 == 0 && is_ipv4 == 0)
 						break;
 					/* Determine key length */
 					if (vidx == 0 /* dst-ip */ ||
 					    vidx == 1 /* src-ip */)
 						keylen = is_ipv6 ?
 						    sizeof(struct in6_addr):
 						    sizeof(in_addr_t);
 					else {
 						keylen = sizeof(key);
 						pkey = &key;
 					}
 					if (vidx == 0 /* dst-ip */)
 						pkey = is_ipv4 ? (void *)&dst_ip:
 						    (void *)&args->f_id.dst_ip6;
 					else if (vidx == 1 /* src-ip */)
 						pkey = is_ipv4 ? (void *)&src_ip:
 						    (void *)&args->f_id.src_ip6;
 					else if (vidx == 6 /* dscp */) {
 						if (is_ipv4)
 							key = ip->ip_tos >> 2;
 						else {
 							key = args->f_id.flow_id6;
 							key = (key & 0x0f) << 2 |
 							    (key & 0xf000) >> 14;
 						}
 						key &= 0x3f;
 					} else if (vidx == 2 /* dst-port */ ||
 					    vidx == 3 /* src-port */) {
 						/* Skip fragments */
 						if (offset != 0)
 							break;
 						/* Skip proto without ports */
 						if (proto != IPPROTO_TCP &&
 						    proto != IPPROTO_UDP &&
 						    proto != IPPROTO_UDPLITE &&
 						    proto != IPPROTO_SCTP)
 							break;
 						if (vidx == 2 /* dst-port */)
 							key = dst_port;
 						else
 							key = src_port;
 					}
 #ifndef USERSPACE
 					else if (vidx == 4 /* uid */ ||
 					    vidx == 5 /* jail */) {
 						check_uidgid(
 						    (ipfw_insn_u32 *)cmd,
 						    args, &ucred_lookup,
 #ifdef __FreeBSD__
 						    &ucred_cache);
 						if (vidx == 4 /* uid */)
 							key = ucred_cache->cr_uid;
 						else if (vidx == 5 /* jail */)
 							key = ucred_cache->cr_prison->pr_id;
 #else /* !__FreeBSD__ */
 						    (void *)&ucred_cache);
 						if (vidx == 4 /* uid */)
 							key = ucred_cache.uid;
 						else if (vidx == 5 /* jail */)
 							key = ucred_cache.xid;
 #endif /* !__FreeBSD__ */
 					}
 #endif /* !USERSPACE */
 					else
 						break;
 					match = ipfw_lookup_table(chain,
 					    cmd->arg1, keylen, pkey, &vidx);
 					if (!match)
 						break;
 					tablearg = vidx;
 					break;
 				}
 				/* cmdlen =< F_INSN_SIZE(ipfw_insn_u32) */
 				/* FALLTHROUGH */
 			}
 			case O_IP_SRC_LOOKUP:
 			{
 				void *pkey;
 				uint32_t vidx;
 				uint16_t keylen;
 
 				if (is_ipv4) {
 					keylen = sizeof(in_addr_t);
 					if (cmd->opcode == O_IP_DST_LOOKUP)
 						pkey = &dst_ip;
 					else
 						pkey = &src_ip;
 				} else if (is_ipv6) {
 					keylen = sizeof(struct in6_addr);
 					if (cmd->opcode == O_IP_DST_LOOKUP)
 						pkey = &args->f_id.dst_ip6;
 					else
 						pkey = &args->f_id.src_ip6;
 				} else
 					break;
 				match = ipfw_lookup_table(chain, cmd->arg1,
 				    keylen, pkey, &vidx);
 				if (!match)
 					break;
 				if (cmdlen == F_INSN_SIZE(ipfw_insn_u32)) {
 					match = ((ipfw_insn_u32 *)cmd)->d[0] ==
 					    TARG_VAL(chain, vidx, tag);
 					if (!match)
 						break;
 				}
 				tablearg = vidx;
 				break;
 			}
 
 			case O_IP_FLOW_LOOKUP:
 				{
 					uint32_t v = 0;
 					match = ipfw_lookup_table(chain,
 					    cmd->arg1, 0, &args->f_id, &v);
 					if (cmdlen == F_INSN_SIZE(ipfw_insn_u32))
 						match = ((ipfw_insn_u32 *)cmd)->d[0] ==
 						    TARG_VAL(chain, v, tag);
 					if (match)
 						tablearg = v;
 				}
 				break;
 			case O_IP_SRC_MASK:
 			case O_IP_DST_MASK:
 				if (is_ipv4) {
 				    uint32_t a =
 					(cmd->opcode == O_IP_DST_MASK) ?
 					    dst_ip.s_addr : src_ip.s_addr;
 				    uint32_t *p = ((ipfw_insn_u32 *)cmd)->d;
 				    int i = cmdlen-1;
 
 				    for (; !match && i>0; i-= 2, p+= 2)
 					match = (p[0] == (a & p[1]));
 				}
 				break;
 
 			case O_IP_SRC_ME:
 				if (is_ipv4) {
 					match = in_localip(src_ip);
 					break;
 				}
 #ifdef INET6
 				/* FALLTHROUGH */
 			case O_IP6_SRC_ME:
 				match = is_ipv6 &&
 				    ipfw_localip6(&args->f_id.src_ip6);
 #endif
 				break;
 
 			case O_IP_DST_SET:
 			case O_IP_SRC_SET:
 				if (is_ipv4) {
 					u_int32_t *d = (u_int32_t *)(cmd+1);
 					u_int32_t addr =
 					    cmd->opcode == O_IP_DST_SET ?
 						args->f_id.dst_ip :
 						args->f_id.src_ip;
 
 					    if (addr < d[0])
 						    break;
 					    addr -= d[0]; /* subtract base */
 					    match = (addr < cmd->arg1) &&
 						( d[ 1 + (addr>>5)] &
 						  (1<<(addr & 0x1f)) );
 				}
 				break;
 
 			case O_IP_DST:
 				match = is_ipv4 &&
 				    (((ipfw_insn_ip *)cmd)->addr.s_addr ==
 				    dst_ip.s_addr);
 				break;
 
 			case O_IP_DST_ME:
 				if (is_ipv4) {
 					match = in_localip(dst_ip);
 					break;
 				}
 #ifdef INET6
 				/* FALLTHROUGH */
 			case O_IP6_DST_ME:
 				match = is_ipv6 &&
 				    ipfw_localip6(&args->f_id.dst_ip6);
 #endif
 				break;
 
 
 			case O_IP_SRCPORT:
 			case O_IP_DSTPORT:
 				/*
 				 * offset == 0 && proto != 0 is enough
 				 * to guarantee that we have a
 				 * packet with port info.
 				 */
 				if ((proto == IPPROTO_UDP ||
 				    proto == IPPROTO_UDPLITE ||
 				    proto == IPPROTO_TCP ||
 				    proto == IPPROTO_SCTP) && offset == 0) {
 					u_int16_t x =
 					    (cmd->opcode == O_IP_SRCPORT) ?
 						src_port : dst_port ;
 					u_int16_t *p =
 					    ((ipfw_insn_u16 *)cmd)->ports;
 					int i;
 
 					for (i = cmdlen - 1; !match && i>0;
 					    i--, p += 2)
 						match = (x>=p[0] && x<=p[1]);
 				}
 				break;
 
 			case O_ICMPTYPE:
 				match = (offset == 0 && proto==IPPROTO_ICMP &&
 				    icmptype_match(ICMP(ulp), (ipfw_insn_u32 *)cmd) );
 				break;
 
 #ifdef INET6
 			case O_ICMP6TYPE:
 				match = is_ipv6 && offset == 0 &&
 				    proto==IPPROTO_ICMPV6 &&
 				    icmp6type_match(
 					ICMP6(ulp)->icmp6_type,
 					(ipfw_insn_u32 *)cmd);
 				break;
 #endif /* INET6 */
 
 			case O_IPOPT:
 				match = (is_ipv4 &&
 				    ipopts_match(ip, cmd) );
 				break;
 
 			case O_IPVER:
 				match = (is_ipv4 &&
 				    cmd->arg1 == ip->ip_v);
 				break;
 
 			case O_IPID:
-			case O_IPLEN:
 			case O_IPTTL:
-				if (is_ipv4) {	/* only for IP packets */
+				if (!is_ipv4)
+					break;
+			case O_IPLEN:
+				{	/* only for IP packets */
 				    uint16_t x;
 				    uint16_t *p;
 				    int i;
 
 				    if (cmd->opcode == O_IPLEN)
 					x = iplen;
 				    else if (cmd->opcode == O_IPTTL)
 					x = ip->ip_ttl;
 				    else /* must be IPID */
 					x = ntohs(ip->ip_id);
 				    if (cmdlen == 1) {
 					match = (cmd->arg1 == x);
 					break;
 				    }
 				    /* otherwise we have ranges */
 				    p = ((ipfw_insn_u16 *)cmd)->ports;
 				    i = cmdlen - 1;
 				    for (; !match && i>0; i--, p += 2)
 					match = (x >= p[0] && x <= p[1]);
 				}
 				break;
 
 			case O_IPPRECEDENCE:
 				match = (is_ipv4 &&
 				    (cmd->arg1 == (ip->ip_tos & 0xe0)) );
 				break;
 
 			case O_IPTOS:
 				match = (is_ipv4 &&
 				    flags_match(cmd, ip->ip_tos));
 				break;
 
 			case O_DSCP:
 			    {
 				uint32_t *p;
 				uint16_t x;
 
 				p = ((ipfw_insn_u32 *)cmd)->d;
 
 				if (is_ipv4)
 					x = ip->ip_tos >> 2;
 				else if (is_ipv6) {
 					uint8_t *v;
 					v = &((struct ip6_hdr *)ip)->ip6_vfc;
 					x = (*v & 0x0F) << 2;
 					v++;
 					x |= *v >> 6;
 				} else
 					break;
 
 				/* DSCP bitmask is stored as low_u32 high_u32 */
 				if (x >= 32)
 					match = *(p + 1) & (1 << (x - 32));
 				else
 					match = *p & (1 << x);
 			    }
 				break;
 
 			case O_TCPDATALEN:
 				if (proto == IPPROTO_TCP && offset == 0) {
 				    struct tcphdr *tcp;
 				    uint16_t x;
 				    uint16_t *p;
 				    int i;
 #ifdef INET6
 				    if (is_ipv6) {
 					    struct ip6_hdr *ip6;
 
 					    ip6 = (struct ip6_hdr *)ip;
 					    if (ip6->ip6_plen == 0) {
 						    /*
 						     * Jumbo payload is not
 						     * supported by this
 						     * opcode.
 						     */
 						    break;
 					    }
 					    x = iplen - hlen;
 				    } else
 #endif /* INET6 */
 					    x = iplen - (ip->ip_hl << 2);
 				    tcp = TCP(ulp);
 				    x -= tcp->th_off << 2;
 				    if (cmdlen == 1) {
 					match = (cmd->arg1 == x);
 					break;
 				    }
 				    /* otherwise we have ranges */
 				    p = ((ipfw_insn_u16 *)cmd)->ports;
 				    i = cmdlen - 1;
 				    for (; !match && i>0; i--, p += 2)
 					match = (x >= p[0] && x <= p[1]);
 				}
 				break;
 
 			case O_TCPFLAGS:
 				match = (proto == IPPROTO_TCP && offset == 0 &&
 				    flags_match(cmd, TCP(ulp)->th_flags));
 				break;
 
 			case O_TCPOPTS:
 				if (proto == IPPROTO_TCP && offset == 0 && ulp){
 					PULLUP_LEN(hlen, ulp,
 					    (TCP(ulp)->th_off << 2));
 					match = tcpopts_match(TCP(ulp), cmd);
 				}
 				break;
 
 			case O_TCPSEQ:
 				match = (proto == IPPROTO_TCP && offset == 0 &&
 				    ((ipfw_insn_u32 *)cmd)->d[0] ==
 					TCP(ulp)->th_seq);
 				break;
 
 			case O_TCPACK:
 				match = (proto == IPPROTO_TCP && offset == 0 &&
 				    ((ipfw_insn_u32 *)cmd)->d[0] ==
 					TCP(ulp)->th_ack);
 				break;
 
 			case O_TCPWIN:
 				if (proto == IPPROTO_TCP && offset == 0) {
 				    uint16_t x;
 				    uint16_t *p;
 				    int i;
 
 				    x = ntohs(TCP(ulp)->th_win);
 				    if (cmdlen == 1) {
 					match = (cmd->arg1 == x);
 					break;
 				    }
 				    /* Otherwise we have ranges. */
 				    p = ((ipfw_insn_u16 *)cmd)->ports;
 				    i = cmdlen - 1;
 				    for (; !match && i > 0; i--, p += 2)
 					match = (x >= p[0] && x <= p[1]);
 				}
 				break;
 
 			case O_ESTAB:
 				/* reject packets which have SYN only */
 				/* XXX should i also check for TH_ACK ? */
 				match = (proto == IPPROTO_TCP && offset == 0 &&
 				    (TCP(ulp)->th_flags &
 				     (TH_RST | TH_ACK | TH_SYN)) != TH_SYN);
 				break;
 
 			case O_ALTQ: {
 				struct pf_mtag *at;
 				struct m_tag *mtag;
 				ipfw_insn_altq *altq = (ipfw_insn_altq *)cmd;
 
 				/*
 				 * ALTQ uses mbuf tags from another
 				 * packet filtering system - pf(4).
 				 * We allocate a tag in its format
 				 * and fill it in, pretending to be pf(4).
 				 */
 				match = 1;
 				at = pf_find_mtag(m);
 				if (at != NULL && at->qid != 0)
 					break;
 				mtag = m_tag_get(PACKET_TAG_PF,
 				    sizeof(struct pf_mtag), M_NOWAIT | M_ZERO);
 				if (mtag == NULL) {
 					/*
 					 * Let the packet fall back to the
 					 * default ALTQ.
 					 */
 					break;
 				}
 				m_tag_prepend(m, mtag);
 				at = (struct pf_mtag *)(mtag + 1);
 				at->qid = altq->qid;
 				at->hdr = ip;
 				break;
 			}
 
 			case O_LOG:
 				ipfw_log(chain, f, hlen, args,
 				    offset | ip6f_mf, tablearg, ip);
 				match = 1;
 				break;
 
 			case O_PROB:
 				match = (random()<((ipfw_insn_u32 *)cmd)->d[0]);
 				break;
 
 			case O_VERREVPATH:
 				/* Outgoing packets automatically pass/match */
 				match = (args->flags & IPFW_ARGS_OUT ||
 				    (
 #ifdef INET6
 				    is_ipv6 ?
 					verify_path6(&(args->f_id.src_ip6),
 					    iif, args->f_id.fib) :
 #endif
 				    verify_path(src_ip, iif, args->f_id.fib)));
 				break;
 
 			case O_VERSRCREACH:
 				/* Outgoing packets automatically pass/match */
 				match = (hlen > 0 && ((oif != NULL) || (
 #ifdef INET6
 				    is_ipv6 ?
 				        verify_path6(&(args->f_id.src_ip6),
 				            NULL, args->f_id.fib) :
 #endif
 				    verify_path(src_ip, NULL, args->f_id.fib))));
 				break;
 
 			case O_ANTISPOOF:
 				/* Outgoing packets automatically pass/match */
 				if (oif == NULL && hlen > 0 &&
 				    (  (is_ipv4 && in_localaddr(src_ip))
 #ifdef INET6
 				    || (is_ipv6 &&
 				        in6_localaddr(&(args->f_id.src_ip6)))
 #endif
 				    ))
 					match =
 #ifdef INET6
 					    is_ipv6 ? verify_path6(
 					        &(args->f_id.src_ip6), iif,
 						args->f_id.fib) :
 #endif
 					    verify_path(src_ip, iif,
 					        args->f_id.fib);
 				else
 					match = 1;
 				break;
 
 			case O_IPSEC:
 				match = (m_tag_find(m,
 				    PACKET_TAG_IPSEC_IN_DONE, NULL) != NULL);
 				/* otherwise no match */
 				break;
 
 #ifdef INET6
 			case O_IP6_SRC:
 				match = is_ipv6 &&
 				    IN6_ARE_ADDR_EQUAL(&args->f_id.src_ip6,
 				    &((ipfw_insn_ip6 *)cmd)->addr6);
 				break;
 
 			case O_IP6_DST:
 				match = is_ipv6 &&
 				IN6_ARE_ADDR_EQUAL(&args->f_id.dst_ip6,
 				    &((ipfw_insn_ip6 *)cmd)->addr6);
 				break;
 			case O_IP6_SRC_MASK:
 			case O_IP6_DST_MASK:
 				if (is_ipv6) {
 					int i = cmdlen - 1;
 					struct in6_addr p;
 					struct in6_addr *d =
 					    &((ipfw_insn_ip6 *)cmd)->addr6;
 
 					for (; !match && i > 0; d += 2,
 					    i -= F_INSN_SIZE(struct in6_addr)
 					    * 2) {
 						p = (cmd->opcode ==
 						    O_IP6_SRC_MASK) ?
 						    args->f_id.src_ip6:
 						    args->f_id.dst_ip6;
 						APPLY_MASK(&p, &d[1]);
 						match =
 						    IN6_ARE_ADDR_EQUAL(&d[0],
 						    &p);
 					}
 				}
 				break;
 
 			case O_FLOW6ID:
 				match = is_ipv6 &&
 				    flow6id_match(args->f_id.flow_id6,
 				    (ipfw_insn_u32 *) cmd);
 				break;
 
 			case O_EXT_HDR:
 				match = is_ipv6 &&
 				    (ext_hd & ((ipfw_insn *) cmd)->arg1);
 				break;
 
 			case O_IP6:
 				match = is_ipv6;
 				break;
 #endif
 
 			case O_IP4:
 				match = is_ipv4;
 				break;
 
 			case O_TAG: {
 				struct m_tag *mtag;
 				uint32_t tag = TARG(cmd->arg1, tag);
 
 				/* Packet is already tagged with this tag? */
 				mtag = m_tag_locate(m, MTAG_IPFW, tag, NULL);
 
 				/* We have `untag' action when F_NOT flag is
 				 * present. And we must remove this mtag from
 				 * mbuf and reset `match' to zero (`match' will
 				 * be inversed later).
 				 * Otherwise we should allocate new mtag and
 				 * push it into mbuf.
 				 */
 				if (cmd->len & F_NOT) { /* `untag' action */
 					if (mtag != NULL)
 						m_tag_delete(m, mtag);
 					match = 0;
 				} else {
 					if (mtag == NULL) {
 						mtag = m_tag_alloc( MTAG_IPFW,
 						    tag, 0, M_NOWAIT);
 						if (mtag != NULL)
 							m_tag_prepend(m, mtag);
 					}
 					match = 1;
 				}
 				break;
 			}
 
 			case O_FIB: /* try match the specified fib */
 				if (args->f_id.fib == cmd->arg1)
 					match = 1;
 				break;
 
 			case O_SOCKARG:	{
 #ifndef USERSPACE	/* not supported in userspace */
 				struct inpcb *inp = args->inp;
 				struct inpcbinfo *pi;
 				
 				if (is_ipv6) /* XXX can we remove this ? */
 					break;
 
 				if (proto == IPPROTO_TCP)
 					pi = &V_tcbinfo;
 				else if (proto == IPPROTO_UDP)
 					pi = &V_udbinfo;
 				else if (proto == IPPROTO_UDPLITE)
 					pi = &V_ulitecbinfo;
 				else
 					break;
 
 				/*
 				 * XXXRW: so_user_cookie should almost
 				 * certainly be inp_user_cookie?
 				 */
 
 				/* For incoming packet, lookup up the 
 				inpcb using the src/dest ip/port tuple */
 				if (inp == NULL) {
 					inp = in_pcblookup(pi, 
 						src_ip, htons(src_port),
 						dst_ip, htons(dst_port),
 						INPLOOKUP_RLOCKPCB, NULL);
 					if (inp != NULL) {
 						tablearg =
 						    inp->inp_socket->so_user_cookie;
 						if (tablearg)
 							match = 1;
 						INP_RUNLOCK(inp);
 					}
 				} else {
 					if (inp->inp_socket) {
 						tablearg =
 						    inp->inp_socket->so_user_cookie;
 						if (tablearg)
 							match = 1;
 					}
 				}
 #endif /* !USERSPACE */
 				break;
 			}
 
 			case O_TAGGED: {
 				struct m_tag *mtag;
 				uint32_t tag = TARG(cmd->arg1, tag);
 
 				if (cmdlen == 1) {
 					match = m_tag_locate(m, MTAG_IPFW,
 					    tag, NULL) != NULL;
 					break;
 				}
 
 				/* we have ranges */
 				for (mtag = m_tag_first(m);
 				    mtag != NULL && !match;
 				    mtag = m_tag_next(m, mtag)) {
 					uint16_t *p;
 					int i;
 
 					if (mtag->m_tag_cookie != MTAG_IPFW)
 						continue;
 
 					p = ((ipfw_insn_u16 *)cmd)->ports;
 					i = cmdlen - 1;
 					for(; !match && i > 0; i--, p += 2)
 						match =
 						    mtag->m_tag_id >= p[0] &&
 						    mtag->m_tag_id <= p[1];
 				}
 				break;
 			}
 				
 			/*
 			 * The second set of opcodes represents 'actions',
 			 * i.e. the terminal part of a rule once the packet
 			 * matches all previous patterns.
 			 * Typically there is only one action for each rule,
 			 * and the opcode is stored at the end of the rule
 			 * (but there are exceptions -- see below).
 			 *
 			 * In general, here we set retval and terminate the
 			 * outer loop (would be a 'break 3' in some language,
 			 * but we need to set l=0, done=1)
 			 *
 			 * Exceptions:
 			 * O_COUNT and O_SKIPTO actions:
 			 *   instead of terminating, we jump to the next rule
 			 *   (setting l=0), or to the SKIPTO target (setting
 			 *   f/f_len, cmd and l as needed), respectively.
 			 *
 			 * O_TAG, O_LOG and O_ALTQ action parameters:
 			 *   perform some action and set match = 1;
 			 *
 			 * O_LIMIT and O_KEEP_STATE: these opcodes are
 			 *   not real 'actions', and are stored right
 			 *   before the 'action' part of the rule (one
 			 *   exception is O_SKIP_ACTION which could be
 			 *   between these opcodes and 'action' one).
 			 *   These opcodes try to install an entry in the
 			 *   state tables; if successful, we continue with
 			 *   the next opcode (match=1; break;), otherwise
 			 *   the packet must be dropped (set retval,
 			 *   break loops with l=0, done=1)
 			 *
 			 * O_PROBE_STATE and O_CHECK_STATE: these opcodes
 			 *   cause a lookup of the state table, and a jump
 			 *   to the 'action' part of the parent rule
 			 *   if an entry is found, or
 			 *   (CHECK_STATE only) a jump to the next rule if
 			 *   the entry is not found.
 			 *   The result of the lookup is cached so that
 			 *   further instances of these opcodes become NOPs.
 			 *   The jump to the next rule is done by setting
 			 *   l=0, cmdlen=0.
 			 *
 			 * O_SKIP_ACTION: this opcode is not a real 'action'
 			 *  either, and is stored right before the 'action'
 			 *  part of the rule, right after the O_KEEP_STATE
 			 *  opcode. It causes match failure so the real
 			 *  'action' could be executed only if the rule
 			 *  is checked via dynamic rule from the state
 			 *  table, as in such case execution starts
 			 *  from the true 'action' opcode directly.
 			 *   
 			 */
 			case O_LIMIT:
 			case O_KEEP_STATE:
 				if (ipfw_dyn_install_state(chain, f,
 				    (ipfw_insn_limit *)cmd, args, ulp,
 				    pktlen, &dyn_info, tablearg)) {
 					/* error or limit violation */
 					retval = IP_FW_DENY;
 					l = 0;	/* exit inner loop */
 					done = 1; /* exit outer loop */
 				}
 				match = 1;
 				break;
 
 			case O_PROBE_STATE:
 			case O_CHECK_STATE:
 				/*
 				 * dynamic rules are checked at the first
 				 * keep-state or check-state occurrence,
 				 * with the result being stored in dyn_info.
 				 * The compiler introduces a PROBE_STATE
 				 * instruction for us when we have a
 				 * KEEP_STATE (because PROBE_STATE needs
 				 * to be run first).
 				 */
 				if (DYN_LOOKUP_NEEDED(&dyn_info, cmd) &&
 				    (q = ipfw_dyn_lookup_state(args, ulp,
 				    pktlen, cmd, &dyn_info)) != NULL) {
 					/*
 					 * Found dynamic entry, jump to the
 					 * 'action' part of the parent rule
 					 * by setting f, cmd, l and clearing
 					 * cmdlen.
 					 */
 					f = q;
 					f_pos = dyn_info.f_pos;
 					cmd = ACTION_PTR(f);
 					l = f->cmd_len - f->act_ofs;
 					cmdlen = 0;
 					match = 1;
 					break;
 				}
 				/*
 				 * Dynamic entry not found. If CHECK_STATE,
 				 * skip to next rule, if PROBE_STATE just
 				 * ignore and continue with next opcode.
 				 */
 				if (cmd->opcode == O_CHECK_STATE)
 					l = 0;	/* exit inner loop */
 				match = 1;
 				break;
 
 			case O_SKIP_ACTION:
 				match = 0;	/* skip to the next rule */
 				l = 0;		/* exit inner loop */
 				break;
 
 			case O_ACCEPT:
 				retval = 0;	/* accept */
 				l = 0;		/* exit inner loop */
 				done = 1;	/* exit outer loop */
 				break;
 
 			case O_PIPE:
 			case O_QUEUE:
 				set_match(args, f_pos, chain);
 				args->rule.info = TARG(cmd->arg1, pipe);
 				if (cmd->opcode == O_PIPE)
 					args->rule.info |= IPFW_IS_PIPE;
 				if (V_fw_one_pass)
 					args->rule.info |= IPFW_ONEPASS;
 				retval = IP_FW_DUMMYNET;
 				l = 0;          /* exit inner loop */
 				done = 1;       /* exit outer loop */
 				break;
 
 			case O_DIVERT:
 			case O_TEE:
 				if (args->flags & IPFW_ARGS_ETHER)
 					break;	/* not on layer 2 */
 				/* otherwise this is terminal */
 				l = 0;		/* exit inner loop */
 				done = 1;	/* exit outer loop */
 				retval = (cmd->opcode == O_DIVERT) ?
 					IP_FW_DIVERT : IP_FW_TEE;
 				set_match(args, f_pos, chain);
 				args->rule.info = TARG(cmd->arg1, divert);
 				break;
 
 			case O_COUNT:
 				IPFW_INC_RULE_COUNTER(f, pktlen);
 				l = 0;		/* exit inner loop */
 				break;
 
 			case O_SKIPTO:
 			    IPFW_INC_RULE_COUNTER(f, pktlen);
 			    f_pos = JUMP(chain, f, cmd->arg1, tablearg, 0);
 			    /*
 			     * Skip disabled rules, and re-enter
 			     * the inner loop with the correct
 			     * f_pos, f, l and cmd.
 			     * Also clear cmdlen and skip_or
 			     */
 			    for (; f_pos < chain->n_rules - 1 &&
 				    (V_set_disable &
 				     (1 << chain->map[f_pos]->set));
 				    f_pos++)
 				;
 			    /* Re-enter the inner loop at the skipto rule. */
 			    f = chain->map[f_pos];
 			    l = f->cmd_len;
 			    cmd = f->cmd;
 			    match = 1;
 			    cmdlen = 0;
 			    skip_or = 0;
 			    continue;
 			    break;	/* not reached */
 
 			case O_CALLRETURN: {
 				/*
 				 * Implementation of `subroutine' call/return,
 				 * in the stack carried in an mbuf tag. This
 				 * is different from `skipto' in that any call
 				 * address is possible (`skipto' must prevent
 				 * backward jumps to avoid endless loops).
 				 * We have `return' action when F_NOT flag is
 				 * present. The `m_tag_id' field is used as
 				 * stack pointer.
 				 */
 				struct m_tag *mtag;
 				uint16_t jmpto, *stack;
 
 #define	IS_CALL		((cmd->len & F_NOT) == 0)
 #define	IS_RETURN	((cmd->len & F_NOT) != 0)
 				/*
 				 * Hand-rolled version of m_tag_locate() with
 				 * wildcard `type'.
 				 * If not already tagged, allocate new tag.
 				 */
 				mtag = m_tag_first(m);
 				while (mtag != NULL) {
 					if (mtag->m_tag_cookie ==
 					    MTAG_IPFW_CALL)
 						break;
 					mtag = m_tag_next(m, mtag);
 				}
 				if (mtag == NULL && IS_CALL) {
 					mtag = m_tag_alloc(MTAG_IPFW_CALL, 0,
 					    IPFW_CALLSTACK_SIZE *
 					    sizeof(uint16_t), M_NOWAIT);
 					if (mtag != NULL)
 						m_tag_prepend(m, mtag);
 				}
 
 				/*
 				 * On error both `call' and `return' just
 				 * continue with next rule.
 				 */
 				if (IS_RETURN && (mtag == NULL ||
 				    mtag->m_tag_id == 0)) {
 					l = 0;		/* exit inner loop */
 					break;
 				}
 				if (IS_CALL && (mtag == NULL ||
 				    mtag->m_tag_id >= IPFW_CALLSTACK_SIZE)) {
 					printf("ipfw: call stack error, "
 					    "go to next rule\n");
 					l = 0;		/* exit inner loop */
 					break;
 				}
 
 				IPFW_INC_RULE_COUNTER(f, pktlen);
 				stack = (uint16_t *)(mtag + 1);
 
 				/*
 				 * The `call' action may use cached f_pos
 				 * (in f->next_rule), whose version is written
 				 * in f->next_rule.
 				 * The `return' action, however, doesn't have
 				 * fixed jump address in cmd->arg1 and can't use
 				 * cache.
 				 */
 				if (IS_CALL) {
 					stack[mtag->m_tag_id] = f->rulenum;
 					mtag->m_tag_id++;
 			    		f_pos = JUMP(chain, f, cmd->arg1,
 					    tablearg, 1);
 				} else {	/* `return' action */
 					mtag->m_tag_id--;
 					jmpto = stack[mtag->m_tag_id] + 1;
 					f_pos = ipfw_find_rule(chain, jmpto, 0);
 				}
 
 				/*
 				 * Skip disabled rules, and re-enter
 				 * the inner loop with the correct
 				 * f_pos, f, l and cmd.
 				 * Also clear cmdlen and skip_or
 				 */
 				for (; f_pos < chain->n_rules - 1 &&
 				    (V_set_disable &
 				    (1 << chain->map[f_pos]->set)); f_pos++)
 					;
 				/* Re-enter the inner loop at the dest rule. */
 				f = chain->map[f_pos];
 				l = f->cmd_len;
 				cmd = f->cmd;
 				cmdlen = 0;
 				skip_or = 0;
 				continue;
 				break;	/* NOTREACHED */
 			}
 #undef IS_CALL
 #undef IS_RETURN
 
 			case O_REJECT:
 				/*
 				 * Drop the packet and send a reject notice
 				 * if the packet is not ICMP (or is an ICMP
 				 * query), and it is not multicast/broadcast.
 				 */
 				if (hlen > 0 && is_ipv4 && offset == 0 &&
 				    (proto != IPPROTO_ICMP ||
 				     is_icmp_query(ICMP(ulp))) &&
 				    !(m->m_flags & (M_BCAST|M_MCAST)) &&
 				    !IN_MULTICAST(ntohl(dst_ip.s_addr))) {
 					send_reject(args, cmd->arg1, iplen, ip);
 					m = args->m;
 				}
 				/* FALLTHROUGH */
 #ifdef INET6
 			case O_UNREACH6:
 				if (hlen > 0 && is_ipv6 &&
 				    ((offset & IP6F_OFF_MASK) == 0) &&
 				    (proto != IPPROTO_ICMPV6 ||
 				     (is_icmp6_query(icmp6_type) == 1)) &&
 				    !(m->m_flags & (M_BCAST|M_MCAST)) &&
 				    !IN6_IS_ADDR_MULTICAST(
 					&args->f_id.dst_ip6)) {
 					send_reject6(args,
 					    cmd->opcode == O_REJECT ?
 					    map_icmp_unreach(cmd->arg1):
 					    cmd->arg1, hlen,
 					    (struct ip6_hdr *)ip);
 					m = args->m;
 				}
 				/* FALLTHROUGH */
 #endif
 			case O_DENY:
 				retval = IP_FW_DENY;
 				l = 0;		/* exit inner loop */
 				done = 1;	/* exit outer loop */
 				break;
 
 			case O_FORWARD_IP:
 				if (args->flags & IPFW_ARGS_ETHER)
 					break;	/* not valid on layer2 pkts */
 				if (q != f ||
 				    dyn_info.direction == MATCH_FORWARD) {
 				    struct sockaddr_in *sa;
 
 				    sa = &(((ipfw_insn_sa *)cmd)->sa);
 				    if (sa->sin_addr.s_addr == INADDR_ANY) {
 #ifdef INET6
 					/*
 					 * We use O_FORWARD_IP opcode for
 					 * fwd rule with tablearg, but tables
 					 * now support IPv6 addresses. And
 					 * when we are inspecting IPv6 packet,
 					 * we can use nh6 field from
 					 * table_value as next_hop6 address.
 					 */
 					if (is_ipv6) {
 						struct ip_fw_nh6 *nh6;
 
 						args->flags |= IPFW_ARGS_NH6;
 						nh6 = &args->hopstore6;
 						nh6->sin6_addr = TARG_VAL(
 						    chain, tablearg, nh6);
 						nh6->sin6_port = sa->sin_port;
 						nh6->sin6_scope_id = TARG_VAL(
 						    chain, tablearg, zoneid);
 					} else
 #endif
 					{
 						args->flags |= IPFW_ARGS_NH4;
 						args->hopstore.sin_port =
 						    sa->sin_port;
 						sa = &args->hopstore;
 						sa->sin_family = AF_INET;
 						sa->sin_len = sizeof(*sa);
 						sa->sin_addr.s_addr = htonl(
 						    TARG_VAL(chain, tablearg,
 						    nh4));
 					}
 				    } else {
 					    args->flags |= IPFW_ARGS_NH4PTR;
 					    args->next_hop = sa;
 				    }
 				}
 				retval = IP_FW_PASS;
 				l = 0;          /* exit inner loop */
 				done = 1;       /* exit outer loop */
 				break;
 
 #ifdef INET6
 			case O_FORWARD_IP6:
 				if (args->flags & IPFW_ARGS_ETHER)
 					break;	/* not valid on layer2 pkts */
 				if (q != f ||
 				    dyn_info.direction == MATCH_FORWARD) {
 					struct sockaddr_in6 *sin6;
 
 					sin6 = &(((ipfw_insn_sa6 *)cmd)->sa);
 					args->flags |= IPFW_ARGS_NH6PTR;
 					args->next_hop6 = sin6;
 				}
 				retval = IP_FW_PASS;
 				l = 0;		/* exit inner loop */
 				done = 1;	/* exit outer loop */
 				break;
 #endif
 
 			case O_NETGRAPH:
 			case O_NGTEE:
 				set_match(args, f_pos, chain);
 				args->rule.info = TARG(cmd->arg1, netgraph);
 				if (V_fw_one_pass)
 					args->rule.info |= IPFW_ONEPASS;
 				retval = (cmd->opcode == O_NETGRAPH) ?
 				    IP_FW_NETGRAPH : IP_FW_NGTEE;
 				l = 0;          /* exit inner loop */
 				done = 1;       /* exit outer loop */
 				break;
 
 			case O_SETFIB: {
 				uint32_t fib;
 
 				IPFW_INC_RULE_COUNTER(f, pktlen);
 				fib = TARG(cmd->arg1, fib) & 0x7FFF;
 				if (fib >= rt_numfibs)
 					fib = 0;
 				M_SETFIB(m, fib);
 				args->f_id.fib = fib; /* XXX */
 				l = 0;		/* exit inner loop */
 				break;
 		        }
 
 			case O_SETDSCP: {
 				uint16_t code;
 
 				code = TARG(cmd->arg1, dscp) & 0x3F;
 				l = 0;		/* exit inner loop */
 				if (is_ipv4) {
 					uint16_t old;
 
 					old = *(uint16_t *)ip;
 					ip->ip_tos = (code << 2) |
 					    (ip->ip_tos & 0x03);
 					ip->ip_sum = cksum_adjust(ip->ip_sum,
 					    old, *(uint16_t *)ip);
 				} else if (is_ipv6) {
 					uint8_t *v;
 
 					v = &((struct ip6_hdr *)ip)->ip6_vfc;
 					*v = (*v & 0xF0) | (code >> 2);
 					v++;
 					*v = (*v & 0x3F) | ((code & 0x03) << 6);
 				} else
 					break;
 
 				IPFW_INC_RULE_COUNTER(f, pktlen);
 				break;
 			}
 
 			case O_NAT:
 				l = 0;          /* exit inner loop */
 				done = 1;       /* exit outer loop */
 				/*
 				 * Ensure that we do not invoke NAT handler for
 				 * non IPv4 packets. Libalias expects only IPv4.
 				 */
 				if (!is_ipv4 || !IPFW_NAT_LOADED) {
 				    retval = IP_FW_DENY;
 				    break;
 				}
 
 				struct cfg_nat *t;
 				int nat_id;
 
 				args->rule.info = 0;
 				set_match(args, f_pos, chain);
 				/* Check if this is 'global' nat rule */
 				if (cmd->arg1 == IP_FW_NAT44_GLOBAL) {
 					retval = ipfw_nat_ptr(args, NULL, m);
 					break;
 				}
 				t = ((ipfw_insn_nat *)cmd)->nat;
 				if (t == NULL) {
 					nat_id = TARG(cmd->arg1, nat);
 					t = (*lookup_nat_ptr)(&chain->nat, nat_id);
 
 					if (t == NULL) {
 					    retval = IP_FW_DENY;
 					    break;
 					}
 					if (cmd->arg1 != IP_FW_TARG)
 					    ((ipfw_insn_nat *)cmd)->nat = t;
 				}
 				retval = ipfw_nat_ptr(args, t, m);
 				break;
 
 			case O_REASS: {
 				int ip_off;
 
 				l = 0;	/* in any case exit inner loop */
 				if (is_ipv6) /* IPv6 is not supported yet */
 					break;
 				IPFW_INC_RULE_COUNTER(f, pktlen);
 				ip_off = ntohs(ip->ip_off);
 
 				/* if not fragmented, go to next rule */
 				if ((ip_off & (IP_MF | IP_OFFMASK)) == 0)
 				    break;
 
 				args->m = m = ip_reass(m);
 
 				/*
 				 * do IP header checksum fixup.
 				 */
 				if (m == NULL) { /* fragment got swallowed */
 				    retval = IP_FW_DENY;
 				} else { /* good, packet complete */
 				    int hlen;
 
 				    ip = mtod(m, struct ip *);
 				    hlen = ip->ip_hl << 2;
 				    ip->ip_sum = 0;
 				    if (hlen == sizeof(struct ip))
 					ip->ip_sum = in_cksum_hdr(ip);
 				    else
 					ip->ip_sum = in_cksum(m, hlen);
 				    retval = IP_FW_REASS;
 				    args->rule.info = 0;
 				    set_match(args, f_pos, chain);
 				}
 				done = 1;	/* exit outer loop */
 				break;
 			}
 			case O_EXTERNAL_ACTION:
 				l = 0; /* in any case exit inner loop */
 				retval = ipfw_run_eaction(chain, args,
 				    cmd, &done);
 				/*
 				 * If both @retval and @done are zero,
 				 * consider this as rule matching and
 				 * update counters.
 				 */
 				if (retval == 0 && done == 0) {
 					IPFW_INC_RULE_COUNTER(f, pktlen);
 					/*
 					 * Reset the result of the last
 					 * dynamic state lookup.
 					 * External action can change
 					 * @args content, and it may be
 					 * used for new state lookup later.
 					 */
 					DYN_INFO_INIT(&dyn_info);
 				}
 				break;
 
 			default:
 				panic("-- unknown opcode %d\n", cmd->opcode);
 			} /* end of switch() on opcodes */
 			/*
 			 * if we get here with l=0, then match is irrelevant.
 			 */
 
 			if (cmd->len & F_NOT)
 				match = !match;
 
 			if (match) {
 				if (cmd->len & F_OR)
 					skip_or = 1;
 			} else {
 				if (!(cmd->len & F_OR)) /* not an OR block, */
 					break;		/* try next rule    */
 			}
 
 		}	/* end of inner loop, scan opcodes */
 #undef PULLUP_LEN
 
 		if (done)
 			break;
 
 /* next_rule:; */	/* try next rule		*/
 
 	}		/* end of outer for, scan rules */
 
 	if (done) {
 		struct ip_fw *rule = chain->map[f_pos];
 		/* Update statistics */
 		IPFW_INC_RULE_COUNTER(rule, pktlen);
 	} else {
 		retval = IP_FW_DENY;
 		printf("ipfw: ouch!, skip past end of rules, denying packet\n");
 	}
 	IPFW_PF_RUNLOCK(chain);
 #ifdef __FreeBSD__
 	if (ucred_cache != NULL)
 		crfree(ucred_cache);
 #endif
 	return (retval);
 
 pullup_failed:
 	if (V_fw_verbose)
 		printf("ipfw: pullup failed\n");
 	return (IP_FW_DENY);
 }
 
 /*
  * Set maximum number of tables that can be used in given VNET ipfw instance.
  */
 #ifdef SYSCTL_NODE
 static int
 sysctl_ipfw_table_num(SYSCTL_HANDLER_ARGS)
 {
 	int error;
 	unsigned int ntables;
 
 	ntables = V_fw_tables_max;
 
 	error = sysctl_handle_int(oidp, &ntables, 0, req);
 	/* Read operation or some error */
 	if ((error != 0) || (req->newptr == NULL))
 		return (error);
 
 	return (ipfw_resize_tables(&V_layer3_chain, ntables));
 }
 
 /*
  * Switches table namespace between global and per-set.
  */
 static int
 sysctl_ipfw_tables_sets(SYSCTL_HANDLER_ARGS)
 {
 	int error;
 	unsigned int sets;
 
 	sets = V_fw_tables_sets;
 
 	error = sysctl_handle_int(oidp, &sets, 0, req);
 	/* Read operation or some error */
 	if ((error != 0) || (req->newptr == NULL))
 		return (error);
 
 	return (ipfw_switch_tables_namespace(&V_layer3_chain, sets));
 }
 #endif
 
 /*
  * Module and VNET glue
  */
 
 /*
  * Stuff that must be initialised only on boot or module load
  */
 static int
 ipfw_init(void)
 {
 	int error = 0;
 
 	/*
  	 * Only print out this stuff the first time around,
 	 * when called from the sysinit code.
 	 */
 	printf("ipfw2 "
 #ifdef INET6
 		"(+ipv6) "
 #endif
 		"initialized, divert %s, nat %s, "
 		"default to %s, logging ",
 #ifdef IPDIVERT
 		"enabled",
 #else
 		"loadable",
 #endif
 #ifdef IPFIREWALL_NAT
 		"enabled",
 #else
 		"loadable",
 #endif
 		default_to_accept ? "accept" : "deny");
 
 	/*
 	 * Note: V_xxx variables can be accessed here but the vnet specific
 	 * initializer may not have been called yet for the VIMAGE case.
 	 * Tuneables will have been processed. We will print out values for
 	 * the default vnet. 
 	 * XXX This should all be rationalized AFTER 8.0
 	 */
 	if (V_fw_verbose == 0)
 		printf("disabled\n");
 	else if (V_verbose_limit == 0)
 		printf("unlimited\n");
 	else
 		printf("limited to %d packets/entry by default\n",
 		    V_verbose_limit);
 
 	/* Check user-supplied table count for validness */
 	if (default_fw_tables > IPFW_TABLES_MAX)
 	  default_fw_tables = IPFW_TABLES_MAX;
 
 	ipfw_init_sopt_handler();
 	ipfw_init_obj_rewriter();
 	ipfw_iface_init();
 	return (error);
 }
 
 /*
  * Called for the removal of the last instance only on module unload.
  */
 static void
 ipfw_destroy(void)
 {
 
 	ipfw_iface_destroy();
 	ipfw_destroy_sopt_handler();
 	ipfw_destroy_obj_rewriter();
 	printf("IP firewall unloaded\n");
 }
 
 /*
  * Stuff that must be initialized for every instance
  * (including the first of course).
  */
 static int
 vnet_ipfw_init(const void *unused)
 {
 	int error, first;
 	struct ip_fw *rule = NULL;
 	struct ip_fw_chain *chain;
 
 	chain = &V_layer3_chain;
 
 	first = IS_DEFAULT_VNET(curvnet) ? 1 : 0;
 
 	/* First set up some values that are compile time options */
 	V_autoinc_step = 100;	/* bounded to 1..1000 in add_rule() */
 	V_fw_deny_unknown_exthdrs = 1;
 #ifdef IPFIREWALL_VERBOSE
 	V_fw_verbose = 1;
 #endif
 #ifdef IPFIREWALL_VERBOSE_LIMIT
 	V_verbose_limit = IPFIREWALL_VERBOSE_LIMIT;
 #endif
 #ifdef IPFIREWALL_NAT
 	LIST_INIT(&chain->nat);
 #endif
 
 	/* Init shared services hash table */
 	ipfw_init_srv(chain);
 
 	ipfw_init_counters();
 	/* Set initial number of tables */
 	V_fw_tables_max = default_fw_tables;
 	error = ipfw_init_tables(chain, first);
 	if (error) {
 		printf("ipfw2: setting up tables failed\n");
 		free(chain->map, M_IPFW);
 		free(rule, M_IPFW);
 		return (ENOSPC);
 	}
 
 	IPFW_LOCK_INIT(chain);
 
 	/* fill and insert the default rule */
 	rule = ipfw_alloc_rule(chain, sizeof(struct ip_fw));
 	rule->cmd_len = 1;
 	rule->cmd[0].len = 1;
 	rule->cmd[0].opcode = default_to_accept ? O_ACCEPT : O_DENY;
 	chain->default_rule = rule;
 	ipfw_add_protected_rule(chain, rule, 0);
 
 	ipfw_dyn_init(chain);
 	ipfw_eaction_init(chain, first);
 #ifdef LINEAR_SKIPTO
 	ipfw_init_skipto_cache(chain);
 #endif
 	ipfw_bpf_init(first);
 
 	/* First set up some values that are compile time options */
 	V_ipfw_vnet_ready = 1;		/* Open for business */
 
 	/*
 	 * Hook the sockopt handler and pfil hooks for ipv4 and ipv6.
 	 * Even if the latter two fail we still keep the module alive
 	 * because the sockopt and layer2 paths are still useful.
 	 * ipfw[6]_hook return 0 on success, ENOENT on failure,
 	 * so we can ignore the exact return value and just set a flag.
 	 *
 	 * Note that V_fw[6]_enable are manipulated by a SYSCTL_PROC so
 	 * changes in the underlying (per-vnet) variables trigger
 	 * immediate hook()/unhook() calls.
 	 * In layer2 we have the same behaviour, except that V_ether_ipfw
 	 * is checked on each packet because there are no pfil hooks.
 	 */
 	V_ip_fw_ctl_ptr = ipfw_ctl3;
 	error = ipfw_attach_hooks();
 	return (error);
 }
 
 /*
  * Called for the removal of each instance.
  */
 static int
 vnet_ipfw_uninit(const void *unused)
 {
 	struct ip_fw *reap;
 	struct ip_fw_chain *chain = &V_layer3_chain;
 	int i, last;
 
 	V_ipfw_vnet_ready = 0; /* tell new callers to go away */
 	/*
 	 * disconnect from ipv4, ipv6, layer2 and sockopt.
 	 * Then grab, release and grab again the WLOCK so we make
 	 * sure the update is propagated and nobody will be in.
 	 */
 	ipfw_detach_hooks();
 	V_ip_fw_ctl_ptr = NULL;
 
 	last = IS_DEFAULT_VNET(curvnet) ? 1 : 0;
 
 	IPFW_UH_WLOCK(chain);
 	IPFW_UH_WUNLOCK(chain);
 
 	ipfw_dyn_uninit(0);	/* run the callout_drain */
 
 	IPFW_UH_WLOCK(chain);
 
 	reap = NULL;
 	IPFW_WLOCK(chain);
 	for (i = 0; i < chain->n_rules; i++)
 		ipfw_reap_add(chain, &reap, chain->map[i]);
 	free(chain->map, M_IPFW);
 #ifdef LINEAR_SKIPTO
 	ipfw_destroy_skipto_cache(chain);
 #endif
 	IPFW_WUNLOCK(chain);
 	IPFW_UH_WUNLOCK(chain);
 	ipfw_destroy_tables(chain, last);
 	ipfw_eaction_uninit(chain, last);
 	if (reap != NULL)
 		ipfw_reap_rules(reap);
 	vnet_ipfw_iface_destroy(chain);
 	ipfw_destroy_srv(chain);
 	IPFW_LOCK_DESTROY(chain);
 	ipfw_dyn_uninit(1);	/* free the remaining parts */
 	ipfw_destroy_counters();
 	ipfw_bpf_uninit(last);
 	return (0);
 }
 
 /*
  * Module event handler.
  * In general we have the choice of handling most of these events by the
  * event handler or by the (VNET_)SYS(UN)INIT handlers. I have chosen to
  * use the SYSINIT handlers as they are more capable of expressing the
  * flow of control during module and vnet operations, so this is just
  * a skeleton. Note there is no SYSINIT equivalent of the module
  * SHUTDOWN handler, but we don't have anything to do in that case anyhow.
  */
 static int
 ipfw_modevent(module_t mod, int type, void *unused)
 {
 	int err = 0;
 
 	switch (type) {
 	case MOD_LOAD:
 		/* Called once at module load or
 	 	 * system boot if compiled in. */
 		break;
 	case MOD_QUIESCE:
 		/* Called before unload. May veto unloading. */
 		break;
 	case MOD_UNLOAD:
 		/* Called during unload. */
 		break;
 	case MOD_SHUTDOWN:
 		/* Called during system shutdown. */
 		break;
 	default:
 		err = EOPNOTSUPP;
 		break;
 	}
 	return err;
 }
 
 static moduledata_t ipfwmod = {
 	"ipfw",
 	ipfw_modevent,
 	0
 };
 
 /* Define startup order. */
 #define	IPFW_SI_SUB_FIREWALL	SI_SUB_PROTO_FIREWALL
 #define	IPFW_MODEVENT_ORDER	(SI_ORDER_ANY - 255) /* On boot slot in here. */
 #define	IPFW_MODULE_ORDER	(IPFW_MODEVENT_ORDER + 1) /* A little later. */
 #define	IPFW_VNET_ORDER		(IPFW_MODEVENT_ORDER + 2) /* Later still. */
 
 DECLARE_MODULE(ipfw, ipfwmod, IPFW_SI_SUB_FIREWALL, IPFW_MODEVENT_ORDER);
 FEATURE(ipfw_ctl3, "ipfw new sockopt calls");
 MODULE_VERSION(ipfw, 3);
 /* should declare some dependencies here */
 
 /*
  * Starting up. Done in order after ipfwmod() has been called.
  * VNET_SYSINIT is also called for each existing vnet and each new vnet.
  */
 SYSINIT(ipfw_init, IPFW_SI_SUB_FIREWALL, IPFW_MODULE_ORDER,
 	    ipfw_init, NULL);
 VNET_SYSINIT(vnet_ipfw_init, IPFW_SI_SUB_FIREWALL, IPFW_VNET_ORDER,
 	    vnet_ipfw_init, NULL);
  
 /*
  * Closing up shop. These are done in REVERSE ORDER, but still
  * after ipfwmod() has been called. Not called on reboot.
  * VNET_SYSUNINIT is also called for each exiting vnet as it exits.
  * or when the module is unloaded.
  */
 SYSUNINIT(ipfw_destroy, IPFW_SI_SUB_FIREWALL, IPFW_MODULE_ORDER,
 	    ipfw_destroy, NULL);
 VNET_SYSUNINIT(vnet_ipfw_uninit, IPFW_SI_SUB_FIREWALL, IPFW_VNET_ORDER,
 	    vnet_ipfw_uninit, NULL);
 /* end of file */
Index: projects/runtime-coverage-v2/sys/powerpc/conf/GENERIC64
===================================================================
--- projects/runtime-coverage-v2/sys/powerpc/conf/GENERIC64	(revision 346924)
+++ projects/runtime-coverage-v2/sys/powerpc/conf/GENERIC64	(revision 346925)
@@ -1,255 +1,256 @@
 #
 # GENERIC -- Generic kernel configuration file for FreeBSD/powerpc
 #
 # For more information on this file, please read the handbook section on
 # Kernel Configuration Files:
 #
 #    https://www.FreeBSD.org/doc/en_US.ISO8859-1/books/handbook/kernelconfig-config.html
 #
 # The handbook is also available locally in /usr/share/doc/handbook
 # if you've installed the doc distribution, otherwise always see the
 # FreeBSD World Wide Web server (https://www.FreeBSD.org/) for the
 # latest information.
 #
 # An exhaustive list of options and more detailed explanations of the
 # device lines is also present in the ../../conf/NOTES and NOTES files. 
 # If you are in doubt as to the purpose or necessity of a line, check first 
 # in NOTES.
 #
 # $FreeBSD$
 
 cpu		AIM
 ident		GENERIC
 
 machine 	powerpc	powerpc64
 
 makeoptions	DEBUG=-g		#Build kernel with gdb(1) debug symbols
 makeoptions	WITH_CTF=1
 
 # Platform support
 options 	POWERMAC		#NewWorld Apple PowerMacs
 options 	PS3			#Sony Playstation 3
 options 	MAMBO			#IBM Mambo Full System Simulator
 options 	PSERIES			#PAPR-compliant systems (e.g. IBM p)
 options 	POWERNV			#Non-virtualized OpenPOWER systems
 
 options		FDT			#Flattened Device Tree
 options 	SCHED_ULE		#ULE scheduler
 options 	NUMA			#Non-Uniform Memory Architecture support
 options 	PREEMPTION		#Enable kernel thread preemption
 options 	VIMAGE			# Subsystem virtualization, e.g. VNET
 options 	INET			#InterNETworking
 options 	INET6			#IPv6 communications protocols
 options 	IPSEC			# IP (v4/v6) security
 options 	IPSEC_SUPPORT		# Allow kldload of ipsec and tcpmd5
 options 	TCP_OFFLOAD		# TCP offload
 options 	TCP_BLACKBOX		# Enhanced TCP event logging
 options 	TCP_HHOOK		# hhook(9) framework for TCP
 options 	TCP_RFC7413		# TCP Fast Open
 options 	SCTP			#Stream Control Transmission Protocol
 options 	FFS			#Berkeley Fast Filesystem
 options 	SOFTUPDATES		#Enable FFS soft updates support
 options 	UFS_ACL			#Support for access control lists
 options 	UFS_DIRHASH		#Improve performance on big directories
 options 	UFS_GJOURNAL		#Enable gjournal-based UFS journaling
 options 	QUOTA			#Enable disk quotas for UFS
 options 	MD_ROOT			#MD is a potential root device
 options 	NFSCL			#Network Filesystem Client
 options 	NFSD			#Network Filesystem Server
 options 	NFSLOCKD		#Network Lock Manager
 options 	NFS_ROOT		#NFS usable as root device
 options 	MSDOSFS			#MSDOS Filesystem
 options 	CD9660			#ISO 9660 Filesystem
 options 	PROCFS			#Process filesystem (requires PSEUDOFS)
 options 	PSEUDOFS		#Pseudo-filesystem framework
 options 	GEOM_PART_APM		#Apple Partition Maps.
 options 	GEOM_PART_GPT		#GUID Partition Tables.
 options 	GEOM_LABEL		#Provides labelization
 options 	COMPAT_FREEBSD32	#Compatible with FreeBSD/powerpc binaries
 options 	COMPAT_FREEBSD5		#Compatible with FreeBSD5
 options 	COMPAT_FREEBSD6		#Compatible with FreeBSD6
 options 	COMPAT_FREEBSD7		#Compatible with FreeBSD7
 options 	COMPAT_FREEBSD9		# Compatible with FreeBSD9
 options 	COMPAT_FREEBSD10	# Compatible with FreeBSD10
 options 	COMPAT_FREEBSD11	# Compatible with FreeBSD11
 options 	SCSI_DELAY=5000		#Delay (in ms) before probing SCSI 
 options 	KTRACE			#ktrace(1) syscall trace support
 options 	STACK			#stack(9) support
 options 	SYSVSHM			#SYSV-style shared memory
 options 	SYSVMSG			#SYSV-style message queues
 options 	SYSVSEM			#SYSV-style semaphores
 options 	_KPOSIX_PRIORITY_SCHEDULING #Posix P1003_1B real-time extensions
 options		PRINTF_BUFR_SIZE=128	# Prevent printf output being interspersed.
 options 	HWPMC_HOOKS		# Necessary kernel hooks for hwpmc(4)
 options 	AUDIT			# Security event auditing
 options 	CAPABILITY_MODE		# Capsicum capability mode
 options 	CAPABILITIES		# Capsicum capabilities
 options 	MAC			# TrustedBSD MAC Framework
 options 	KDTRACE_HOOKS		# Kernel DTrace hooks
 options 	DDB_CTF			# Kernel ELF linker loads CTF data
 options 	INCLUDE_CONFIG_FILE     # Include this file in kernel
 options 	RACCT			# Resource accounting framework
 options 	RACCT_DEFAULT_TO_DISABLED # Set kern.racct.enable=0 by default
 options 	RCTL			# Resource limits
 
 # Debugging support.  Always need this:
 options 	KDB			# Enable kernel debugger support.
 options 	KDB_TRACE		# Print a stack trace for a panic.
 # For full debugger support use (turn off in stable branch):
 options 	DDB			#Support DDB
 #options 	DEADLKRES		#Enable the deadlock resolver
 options 	INVARIANTS		#Enable calls of extra sanity checking
 options 	INVARIANT_SUPPORT	#Extra sanity checks of internal structures, required by INVARIANTS
 options 	WITNESS			#Enable checks to detect deadlocks and cycles
 options 	WITNESS_SKIPSPIN	#Don't run witness on spinlocks for speed
 options 	MALLOC_DEBUG_MAXZONES=8	# Separate malloc(9) zones
 options 	VERBOSE_SYSINIT=0	# Support debug.verbose_sysinit, off by default
 
 # Kernel dump features.
 options 	EKCD			# Support for encrypted kernel dumps
 options 	GZIO			# gzip-compressed kernel and user dumps
 options 	ZSTDIO			# zstd-compressed kernel and user dumps
 options 	NETDUMP			# netdump(4) client support
 
 # Make an SMP-capable kernel by default
 options 	SMP			# Symmetric MultiProcessor Kernel
 
 # CPU frequency control
 device		cpufreq
 
 # Standard busses
 device		pci
 options 	PCI_HP			# PCI-Express native HotPlug
 device		agp
 
 # ATA controllers
 device		ahci		# AHCI-compatible SATA controllers
 device		ata		# Legacy ATA/SATA controllers
 device		mvs		# Marvell 88SX50XX/88SX60XX/88SX70XX/SoC SATA
 device		siis		# SiliconImage SiI3124/SiI3132/SiI3531 SATA
 
 # NVM Express (NVMe) support
 device		nvme		# base NVMe driver
 options		NVME_USE_NVD=0	# prefer the cam(4) based nda(4) driver
 device		nvd		# expose NVMe namespaces as disks, depends on nvme
 
 # SCSI Controllers
 device		ahc		# AHA2940 and onboard AIC7xxx devices
 options 	AHC_ALLOW_MEMIO	# Attempt to use memory mapped I/O
 device		isp		# Qlogic family
 device		ispfw		# Firmware module for Qlogic host adapters
 device		mpt		# LSI-Logic MPT-Fusion
 device		mps		# LSI-Logic MPT-Fusion 2
 device		sym		# NCR/Symbios/LSI Logic 53C8XX/53C1010/53C1510D
 
 # ATA/SCSI peripherals
 device		scbus		# SCSI bus (required for ATA/SCSI)
 device		ch		# SCSI media changers
 device		da		# Direct Access (disks)
 device		sa		# Sequential Access (tape etc)
 device		cd		# CD
 device		pass		# Passthrough device (direct ATA/SCSI access)
 device		ses		# Enclosure Service (SES and SAF-TE)
 
 # vt is the default console driver, resembling an SCO console
 device		vt		# Core console driver
 device		kbdmux
 
 # Serial (COM) ports
 device		scc
 device		uart
 device		uart_z8530
 
 device		iflib
 
 # Ethernet hardware
 device		em		# Intel PRO/1000 Gigabit Ethernet Family
 device		ix		# Intel PRO/10GbE PCIE PF Ethernet Family
 device		ixv		# Intel PRO/10GbE PCIE VF Ethernet Family
 device		glc		# Sony Playstation 3 Ethernet
 device		llan		# IBM pSeries Virtual Ethernet
 device		cxgbe		# Chelsio 10/25G NIC
 
 # PCI Ethernet NICs that use the common MII bus controller code.
 device		miibus		# MII bus support
 device		bge		# Broadcom BCM570xx Gigabit Ethernet
 device		gem		# Sun GEM/Sun ERI/Apple GMAC
 device		dc		# DEC/Intel 21143 and various workalikes
 device		fxp		# Intel EtherExpress PRO/100B (82557, 82558)
 device		re		# RealTek 8139C+/8169/8169S/8110S
 device		rl		# RealTek 8129/8139
 
 # Pseudo devices.
 device		crypto		# core crypto support
 device		loop		# Network loopback
 device		random		# Entropy device
 device		ether		# Ethernet support
 device		vlan		# 802.1Q VLAN support
 device		tun		# Packet tunnel.
 device		md		# Memory "disks"
 device		ofwd		# Open Firmware disks
 device		gif		# IPv6 and IPv4 tunneling
 device		firmware	# firmware assist module
 
 # The `bpf' device enables the Berkeley Packet Filter.
 # Be aware of the administrative consequences of enabling this!
 # Note that 'bpf' is required for DHCP.
 device		bpf		#Berkeley packet filter
 
 # USB support
 options 	USB_DEBUG	# enable debug msgs
 device		uhci		# UHCI PCI->USB interface
 device		ohci		# OHCI PCI->USB interface
 device		ehci		# EHCI PCI->USB interface
 device		xhci		# XHCI PCI->USB interface
 device		usb		# USB Bus (required)
 device		uhid		# "Human Interface Devices"
 device		ukbd		# Keyboard
 options 	KBD_INSTALL_CDEV # install a CDEV entry in /dev
 device		umass		# Disks/Mass storage - Requires scbus and da0
 device		ums		# Mouse
 # USB Ethernet
 device		aue		# ADMtek USB Ethernet
 device		axe		# ASIX Electronics USB Ethernet
 device		cdce		# Generic USB over Ethernet
 device		cue		# CATC USB Ethernet
 device		kue		# Kawasaki LSI USB Ethernet
 
 # Wireless NIC cards
 options 	IEEE80211_SUPPORT_MESH
 
 # FireWire support
 device		firewire	# FireWire bus code
 device		sbp		# SCSI over FireWire (Requires scbus and da)
 device		fwe		# Ethernet over FireWire (non-standard!)
 
 # Misc
 device		iicbus		# I2C bus code
 device		iic
 device		kiic		# Keywest I2C
 device		ad7417		# PowerMac7,2 temperature sensor
 device		ds1631		# PowerMac11,2 temperature sensor
 device		ds1775		# PowerMac7,2 temperature sensor
 device		fcu		# Apple Fan Control Unit
 device		max6690		# PowerMac7,2 temperature sensor
 device		powermac_nvram	# Open Firmware configuration NVRAM
 device		smu		# Apple System Management Unit
 device		atibl		# ATI-based backlight driver for PowerBooks/iBooks
 device		nvbl		# nVidia-based backlight driver for PowerBooks/iBooks
+device		opalflash	# PowerNV embedded flash memory
 
 # ADB support
 device		adb
 device		pmu
 
 # Sound support
 device		sound		# Generic sound driver (required)
 device		snd_ai2s	# Apple I2S audio
 device		snd_uaudio	# USB Audio
 
 # Netmap provides direct access to TX/RX rings on supported NICs
 device		netmap		# netmap(4) support
 
 # evdev interface
 options 	EVDEV_SUPPORT		# evdev support in legacy drivers
 device		evdev			# input event device support
 device		uinput			# install /dev/uinput cdev
Index: projects/runtime-coverage-v2/sys/powerpc/powernv/opal_dev.c
===================================================================
--- projects/runtime-coverage-v2/sys/powerpc/powernv/opal_dev.c	(revision 346924)
+++ projects/runtime-coverage-v2/sys/powerpc/powernv/opal_dev.c	(revision 346925)
@@ -1,423 +1,424 @@
 /*-
  * Copyright (c) 2015 Nathan Whitehorn
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include <sys/param.h>
 #include <sys/systm.h>
 #include <sys/module.h>
 #include <sys/bus.h>
 #include <sys/conf.h>
 #include <sys/clock.h>
 #include <sys/cpu.h>
 #include <sys/kernel.h>
 #include <sys/kthread.h>
 #include <sys/reboot.h>
 #include <sys/sysctl.h>
 #include <sys/endian.h>
 
 #include <vm/vm.h>
 #include <vm/pmap.h>
 
 #include <dev/ofw/ofw_bus.h>
 #include <dev/ofw/ofw_bus_subr.h>
 #include <dev/ofw/openfirm.h>
 
 #include "clock_if.h"
 #include "opal.h"
 
 static int	opaldev_probe(device_t);
 static int	opaldev_attach(device_t);
 /* clock interface */
 static int	opal_gettime(device_t dev, struct timespec *ts);
 static int	opal_settime(device_t dev, struct timespec *ts);
 /* ofw bus interface */
 static const struct ofw_bus_devinfo *opaldev_get_devinfo(device_t dev,
     device_t child);
 
 static void	opal_shutdown(void *arg, int howto);
 static void	opal_handle_shutdown_message(void *unused,
     struct opal_msg *msg);
 static void	opal_intr(void *);
 
 static device_method_t  opaldev_methods[] = {
 	/* Device interface */
 	DEVMETHOD(device_probe,		opaldev_probe),
 	DEVMETHOD(device_attach,	opaldev_attach),
 
 	/* clock interface */
 	DEVMETHOD(clock_gettime,	opal_gettime),
 	DEVMETHOD(clock_settime,	opal_settime),
 
 	/* Bus interface */
 	DEVMETHOD(bus_child_pnpinfo_str, ofw_bus_gen_child_pnpinfo_str),
 
         /* ofw_bus interface */
 	DEVMETHOD(ofw_bus_get_devinfo,	opaldev_get_devinfo),
 	DEVMETHOD(ofw_bus_get_compat,	ofw_bus_gen_get_compat),
 	DEVMETHOD(ofw_bus_get_model,	ofw_bus_gen_get_model),
 	DEVMETHOD(ofw_bus_get_name,	ofw_bus_gen_get_name),
 	DEVMETHOD(ofw_bus_get_node,	ofw_bus_gen_get_node),
 	DEVMETHOD(ofw_bus_get_type,	ofw_bus_gen_get_type),
 	
 	DEVMETHOD_END
 };
 
 static driver_t opaldev_driver = {
 	"opal",
 	opaldev_methods,
 	0
 };
 
 static devclass_t opaldev_devclass;
 
-DRIVER_MODULE(opaldev, ofwbus, opaldev_driver, opaldev_devclass, 0, 0);
+EARLY_DRIVER_MODULE(opaldev, ofwbus, opaldev_driver, opaldev_devclass, 0, 0,
+    BUS_PASS_BUS);
 
 static void opal_heartbeat(void);
 static void opal_handle_messages(void);
 
 static struct proc *opal_hb_proc;
 static struct kproc_desc opal_heartbeat_kp = {
 	"opal_heartbeat",
 	opal_heartbeat,
 	&opal_hb_proc
 };
 
 SYSINIT(opal_heartbeat_setup, SI_SUB_KTHREAD_IDLE, SI_ORDER_ANY, kproc_start,
     &opal_heartbeat_kp);
 
 static int opal_heartbeat_ms;
 EVENTHANDLER_LIST_DEFINE(OPAL_ASYNC_COMP);
 EVENTHANDLER_LIST_DEFINE(OPAL_EPOW);
 EVENTHANDLER_LIST_DEFINE(OPAL_SHUTDOWN);
 EVENTHANDLER_LIST_DEFINE(OPAL_HMI_EVT);
 EVENTHANDLER_LIST_DEFINE(OPAL_DPO);
 EVENTHANDLER_LIST_DEFINE(OPAL_OCC);
 
 #define	OPAL_SOFT_OFF		0
 #define	OPAL_SOFT_REBOOT	1
 
 static void
 opal_heartbeat(void)
 {
 	uint64_t events;
 
 	if (opal_heartbeat_ms == 0)
 		kproc_exit(0);
 
 	while (1) {
 		events = 0;
 		/* Turn the OPAL state crank */
 		opal_call(OPAL_POLL_EVENTS, vtophys(&events));
 		if (events & OPAL_EVENT_MSG_PENDING)
 			opal_handle_messages();
 		tsleep(opal_hb_proc, 0, "opal",
 		    MSEC_2_TICKS(opal_heartbeat_ms));
 	}
 }
 
 static int
 opaldev_probe(device_t dev)
 {
 	phandle_t iparent;
 	pcell_t *irqs;
 	int i, n_irqs;
 
 	if (!ofw_bus_is_compatible(dev, "ibm,opal-v3"))
 		return (ENXIO);
 	if (opal_check() != 0)
 		return (ENXIO);
 
 	device_set_desc(dev, "OPAL Abstraction Firmware");
 
 	/* Manually add IRQs before attaching */
 	if (OF_hasprop(ofw_bus_get_node(dev), "opal-interrupts")) {
 		iparent = OF_finddevice("/interrupt-controller@0");
 		iparent = OF_xref_from_node(iparent);
 
 		n_irqs = OF_getproplen(ofw_bus_get_node(dev),
                     "opal-interrupts") / sizeof(*irqs);
 		irqs = malloc(n_irqs * sizeof(*irqs), M_DEVBUF, M_WAITOK);
 		OF_getencprop(ofw_bus_get_node(dev), "opal-interrupts", irqs,
 		    n_irqs * sizeof(*irqs));
 		for (i = 0; i < n_irqs; i++)
 			bus_set_resource(dev, SYS_RES_IRQ, i,
 			    ofw_bus_map_intr(dev, iparent, 1, &irqs[i]), 1);
 		free(irqs, M_DEVBUF);
 	}
 
 
 	return (BUS_PROBE_SPECIFIC);
 }
 
 static int
 opaldev_attach(device_t dev)
 {
 	phandle_t child;
 	device_t cdev;
 	uint64_t junk;
 	int i, rv;
 	uint32_t async_count;
 	struct ofw_bus_devinfo *dinfo;
 	struct resource *irq;
 
 	/* Test for RTC support and register clock if it works */
 	rv = opal_call(OPAL_RTC_READ, vtophys(&junk), vtophys(&junk));
 	do {
 		rv = opal_call(OPAL_RTC_READ, vtophys(&junk), vtophys(&junk));
 		if (rv == OPAL_BUSY_EVENT)
 			rv = opal_call(OPAL_POLL_EVENTS, 0);
 	} while (rv == OPAL_BUSY_EVENT);
 
 	if (rv == OPAL_SUCCESS)
 		clock_register(dev, 2000);
 	
 	EVENTHANDLER_REGISTER(OPAL_SHUTDOWN, opal_handle_shutdown_message,
 	    NULL, EVENTHANDLER_PRI_ANY);
 	EVENTHANDLER_REGISTER(shutdown_final, opal_shutdown, NULL,
 	    SHUTDOWN_PRI_LAST);
 
 	OF_getencprop(ofw_bus_get_node(dev), "ibm,heartbeat-ms",
 	    &opal_heartbeat_ms, sizeof(opal_heartbeat_ms));
 	/* Bind to interrupts */
 	for (i = 0; (irq = bus_alloc_resource_any(dev, SYS_RES_IRQ, &i,
 	    RF_ACTIVE)) != NULL; i++)
 		bus_setup_intr(dev, irq, INTR_TYPE_TTY | INTR_MPSAFE |
 		    INTR_ENTROPY, NULL, opal_intr, (void *)rman_get_start(irq),
 		    NULL);
 
 	OF_getencprop(ofw_bus_get_node(dev), "opal-msg-async-num",
 	    &async_count, sizeof(async_count));
 	opal_init_async_tokens(async_count);
 
 	for (child = OF_child(ofw_bus_get_node(dev)); child != 0;
 	    child = OF_peer(child)) {
 		dinfo = malloc(sizeof(*dinfo), M_DEVBUF, M_WAITOK | M_ZERO);
 		if (ofw_bus_gen_setup_devinfo(dinfo, child) != 0) {
 			free(dinfo, M_DEVBUF);
 			continue;
 		}
 		cdev = device_add_child(dev, NULL, -1);
 		if (cdev == NULL) {
 			device_printf(dev, "<%s>: device_add_child failed\n",
 			    dinfo->obd_name);
 			ofw_bus_gen_destroy_devinfo(dinfo);
 			free(dinfo, M_DEVBUF);
 			continue;
 		}
 		device_set_ivars(cdev, dinfo);
 	}
 
 	return (bus_generic_attach(dev));
 }
 
 static int
 bcd2bin32(int bcd)
 {
 	int out = 0;
 
 	out += bcd2bin(bcd & 0xff);
 	out += 100*bcd2bin((bcd & 0x0000ff00) >> 8);
 	out += 10000*bcd2bin((bcd & 0x00ff0000) >> 16);
 	out += 1000000*bcd2bin((bcd & 0xffff0000) >> 24);
 
 	return (out);
 }
 
 static int
 bin2bcd32(int bin)
 {
 	int out = 0;
 	int tmp;
 
 	tmp = bin % 100;
 	out += bin2bcd(tmp) * 1;
 	bin = bin / 100;
 
 	tmp = bin % 100;
 	out += bin2bcd(tmp) * 100;
 	bin = bin / 100;
 
 	tmp = bin % 100;
 	out += bin2bcd(tmp) * 10000;
 
 	return (out);
 }
 
 static int
 opal_gettime(device_t dev, struct timespec *ts)
 {
 	int rv;
 	struct clocktime ct;
 	uint32_t ymd;
 	uint64_t hmsm;
 
 	rv = opal_call(OPAL_RTC_READ, vtophys(&ymd), vtophys(&hmsm));
 	while (rv == OPAL_BUSY_EVENT)  {
 		opal_call(OPAL_POLL_EVENTS, 0);
 		pause("opalrtc", 1);
 		rv = opal_call(OPAL_RTC_READ, vtophys(&ymd), vtophys(&hmsm));
 	}
 
 	if (rv != OPAL_SUCCESS)
 		return (ENXIO);
 
 	hmsm = be64toh(hmsm);
 	ymd = be32toh(ymd);
 
 	ct.nsec	= bcd2bin32((hmsm & 0x000000ffffff0000) >> 16) * 1000;
 	ct.sec	= bcd2bin((hmsm & 0x0000ff0000000000) >> 40);
 	ct.min	= bcd2bin((hmsm & 0x00ff000000000000) >> 48);
 	ct.hour	= bcd2bin((hmsm & 0xff00000000000000) >> 56);
 
 	ct.day	= bcd2bin((ymd & 0x000000ff) >> 0);
 	ct.mon	= bcd2bin((ymd & 0x0000ff00) >> 8);
 	ct.year	= bcd2bin32((ymd & 0xffff0000) >> 16);
 
 	return (clock_ct_to_ts(&ct, ts));
 }
 
 static int
 opal_settime(device_t dev, struct timespec *ts)
 {
 	int rv;
 	struct clocktime ct;
 	uint32_t ymd = 0;
 	uint64_t hmsm = 0;
 
 	clock_ts_to_ct(ts, &ct);
 
 	ymd |= (uint32_t)bin2bcd(ct.day);
 	ymd |= ((uint32_t)bin2bcd(ct.mon) << 8);
 	ymd |= ((uint32_t)bin2bcd32(ct.year) << 16);
 
 	hmsm |= ((uint64_t)bin2bcd32(ct.nsec/1000) << 16);
 	hmsm |= ((uint64_t)bin2bcd(ct.sec) << 40);
 	hmsm |= ((uint64_t)bin2bcd(ct.min) << 48);
 	hmsm |= ((uint64_t)bin2bcd(ct.hour) << 56);
 
 	hmsm = htobe64(hmsm);
 	ymd = htobe32(ymd);
 
 	do {
 		rv = opal_call(OPAL_RTC_WRITE, vtophys(&ymd), vtophys(&hmsm));
 		if (rv == OPAL_BUSY_EVENT) {
 			rv = opal_call(OPAL_POLL_EVENTS, 0);
 			pause("opalrtc", 1);
 		}
 	} while (rv == OPAL_BUSY_EVENT);
 
 	if (rv != OPAL_SUCCESS)
 		return (ENXIO);
 
 	return (0);
 }
 
 static const struct ofw_bus_devinfo *
 opaldev_get_devinfo(device_t dev, device_t child)
 {
 	return (device_get_ivars(child));
 }
 
 static void
 opal_shutdown(void *arg, int howto)
 {
 
 	if (howto & RB_HALT)
 		opal_call(OPAL_CEC_POWER_DOWN, 0 /* Normal power off */);
 	else
 		opal_call(OPAL_CEC_REBOOT);
 
 	opal_call(OPAL_RETURN_CPU);
 }
 
 static void
 opal_handle_shutdown_message(void *unused, struct opal_msg *msg)
 {
 	int howto;
 
 	switch (be64toh(msg->params[0])) {
 	case OPAL_SOFT_OFF:
 		howto = RB_POWEROFF;
 		break;
 	case OPAL_SOFT_REBOOT:
 		howto = RB_REROOT;
 		break;
 	}
 	shutdown_nice(howto);
 }
 
 static void
 opal_handle_messages(void)
 {
 	static struct opal_msg msg;
 	uint64_t rv;
 	uint32_t type;
 
 	rv = opal_call(OPAL_GET_MSG, vtophys(&msg), sizeof(msg));
 	
 	if (rv != OPAL_SUCCESS)
 		return;
 
 	type = be32toh(msg.msg_type);
 	switch (type) {
 	case OPAL_MSG_ASYNC_COMP:
 		EVENTHANDLER_DIRECT_INVOKE(OPAL_ASYNC_COMP, &msg);
 		break;
 	case OPAL_MSG_EPOW:
 		EVENTHANDLER_DIRECT_INVOKE(OPAL_EPOW, &msg);
 		break;
 	case OPAL_MSG_SHUTDOWN:
 		EVENTHANDLER_DIRECT_INVOKE(OPAL_SHUTDOWN, &msg);
 		break;
 	case OPAL_MSG_HMI_EVT:
 		EVENTHANDLER_DIRECT_INVOKE(OPAL_HMI_EVT, &msg);
 		break;
 	case OPAL_MSG_DPO:
 		EVENTHANDLER_DIRECT_INVOKE(OPAL_DPO, &msg);
 		break;
 	case OPAL_MSG_OCC:
 		EVENTHANDLER_DIRECT_INVOKE(OPAL_OCC, &msg);
 		break;
 	default:
 		printf("Unknown OPAL message type %d\n", type);
 	}
 }
 
 static void
 opal_intr(void *xintr)
 {
 	uint64_t events = 0;
 
 	opal_call(OPAL_HANDLE_INTERRUPT, (uint32_t)(uint64_t)xintr,
 	    vtophys(&events));
 	/* Wake up the heartbeat, if it's been setup. */
 	if (events != 0 && opal_hb_proc != NULL)
 		wakeup(opal_hb_proc);
 
 }
 
Index: projects/runtime-coverage-v2/sys/x86/x86/busdma_bounce.c
===================================================================
--- projects/runtime-coverage-v2/sys/x86/x86/busdma_bounce.c	(revision 346924)
+++ projects/runtime-coverage-v2/sys/x86/x86/busdma_bounce.c	(revision 346925)
@@ -1,1321 +1,1319 @@
 /*-
  * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
  *
  * Copyright (c) 1997, 1998 Justin T. Gibbs.
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions, and the following disclaimer,
  *    without modification, immediately at the beginning of the file.
  * 2. The name of the author may not be used to endorse or promote products
  *    derived from this software without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE FOR
  * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include <sys/param.h>
 #include <sys/systm.h>
 #include <sys/domainset.h>
 #include <sys/malloc.h>
 #include <sys/bus.h>
 #include <sys/interrupt.h>
 #include <sys/kernel.h>
 #include <sys/ktr.h>
 #include <sys/lock.h>
 #include <sys/proc.h>
 #include <sys/memdesc.h>
 #include <sys/mutex.h>
 #include <sys/sysctl.h>
 #include <sys/uio.h>
 
 #include <vm/vm.h>
 #include <vm/vm_extern.h>
 #include <vm/vm_kern.h>
 #include <vm/vm_page.h>
 #include <vm/vm_map.h>
 
 #include <machine/atomic.h>
 #include <machine/bus.h>
 #include <machine/md_var.h>
 #include <machine/specialreg.h>
 #include <x86/include/busdma_impl.h>
 
 #ifdef __i386__
 #define MAX_BPAGES (Maxmem > atop(0x100000000ULL) ? 8192 : 512)
 #else
 #define MAX_BPAGES 8192
 #endif
 
 enum {
 	BUS_DMA_COULD_BOUNCE	= 0x01,
 	BUS_DMA_MIN_ALLOC_COMP	= 0x02,
 	BUS_DMA_KMEM_ALLOC	= 0x04,
 };
 
 struct bounce_zone;
 
 struct bus_dma_tag {
 	struct bus_dma_tag_common common;
 	int			map_count;
 	int			bounce_flags;
 	bus_dma_segment_t	*segments;
 	struct bounce_zone	*bounce_zone;
 };
 
 struct bounce_page {
 	vm_offset_t	vaddr;		/* kva of bounce buffer */
 	bus_addr_t	busaddr;	/* Physical address */
 	vm_offset_t	datavaddr;	/* kva of client data */
 	vm_offset_t	dataoffs;	/* page offset of client data */
 	vm_page_t	datapage[2];	/* physical page(s) of client data */
 	bus_size_t	datacount;	/* client data count */
 	STAILQ_ENTRY(bounce_page) links;
 };
 
 int busdma_swi_pending;
 
 struct bounce_zone {
 	STAILQ_ENTRY(bounce_zone) links;
 	STAILQ_HEAD(bp_list, bounce_page) bounce_page_list;
 	int		total_bpages;
 	int		free_bpages;
 	int		reserved_bpages;
 	int		active_bpages;
 	int		total_bounced;
 	int		total_deferred;
 	int		map_count;
 	int		domain;
 	bus_size_t	alignment;
 	bus_addr_t	lowaddr;
 	char		zoneid[8];
 	char		lowaddrid[20];
 	struct sysctl_ctx_list sysctl_tree;
 	struct sysctl_oid *sysctl_tree_top;
 };
 
 static struct mtx bounce_lock;
 static int total_bpages;
 static int busdma_zonecount;
 static STAILQ_HEAD(, bounce_zone) bounce_zone_list;
 
 static SYSCTL_NODE(_hw, OID_AUTO, busdma, CTLFLAG_RD, 0, "Busdma parameters");
 SYSCTL_INT(_hw_busdma, OID_AUTO, total_bpages, CTLFLAG_RD, &total_bpages, 0,
 	   "Total bounce pages");
 
 struct bus_dmamap {
 	struct bp_list	       bpages;
 	int		       pagesneeded;
 	int		       pagesreserved;
 	bus_dma_tag_t	       dmat;
 	struct memdesc	       mem;
 	bus_dmamap_callback_t *callback;
 	void		      *callback_arg;
 	STAILQ_ENTRY(bus_dmamap) links;
 };
 
 static STAILQ_HEAD(, bus_dmamap) bounce_map_waitinglist;
 static STAILQ_HEAD(, bus_dmamap) bounce_map_callbacklist;
 static struct bus_dmamap nobounce_dmamap;
 
 static void init_bounce_pages(void *dummy);
 static int alloc_bounce_zone(bus_dma_tag_t dmat);
 static int alloc_bounce_pages(bus_dma_tag_t dmat, u_int numpages);
 static int reserve_bounce_pages(bus_dma_tag_t dmat, bus_dmamap_t map,
     int commit);
 static bus_addr_t add_bounce_page(bus_dma_tag_t dmat, bus_dmamap_t map,
     vm_offset_t vaddr, vm_paddr_t addr1, vm_paddr_t addr2, bus_size_t size);
 static void free_bounce_page(bus_dma_tag_t dmat, struct bounce_page *bpage);
 static void _bus_dmamap_count_pages(bus_dma_tag_t dmat, bus_dmamap_t map,
     pmap_t pmap, void *buf, bus_size_t buflen, int flags);
 static void _bus_dmamap_count_phys(bus_dma_tag_t dmat, bus_dmamap_t map,
     vm_paddr_t buf, bus_size_t buflen, int flags);
 static int _bus_dmamap_reserve_pages(bus_dma_tag_t dmat, bus_dmamap_t map,
     int flags);
 
 static int
 bounce_bus_dma_zone_setup(bus_dma_tag_t dmat)
 {
 	struct bounce_zone *bz;
 	int error;
 
 	/* Must bounce */
 	if ((error = alloc_bounce_zone(dmat)) != 0)
 		return (error);
 	bz = dmat->bounce_zone;
 
 	if (ptoa(bz->total_bpages) < dmat->common.maxsize) {
 		int pages;
 
 		pages = atop(dmat->common.maxsize) - bz->total_bpages;
 
 		/* Add pages to our bounce pool */
 		if (alloc_bounce_pages(dmat, pages) < pages)
 			return (ENOMEM);
 	}
 	/* Performed initial allocation */
 	dmat->bounce_flags |= BUS_DMA_MIN_ALLOC_COMP;
 
 	return (0);
 }
 
 /*
  * Allocate a device specific dma_tag.
  */
 static int
 bounce_bus_dma_tag_create(bus_dma_tag_t parent, bus_size_t alignment,
     bus_addr_t boundary, bus_addr_t lowaddr, bus_addr_t highaddr,
     bus_dma_filter_t *filter, void *filterarg, bus_size_t maxsize,
     int nsegments, bus_size_t maxsegsz, int flags, bus_dma_lock_t *lockfunc,
     void *lockfuncarg, bus_dma_tag_t *dmat)
 {
 	bus_dma_tag_t newtag;
 	int error;
 
 	*dmat = NULL;
 	error = common_bus_dma_tag_create(parent != NULL ? &parent->common :
 	    NULL, alignment, boundary, lowaddr, highaddr, filter, filterarg,
 	    maxsize, nsegments, maxsegsz, flags, lockfunc, lockfuncarg,
 	    sizeof (struct bus_dma_tag), (void **)&newtag);
 	if (error != 0)
 		return (error);
 
 	newtag->common.impl = &bus_dma_bounce_impl;
 	newtag->map_count = 0;
 	newtag->segments = NULL;
 
 	if (parent != NULL && (newtag->common.filter != NULL ||
 	    (parent->bounce_flags & BUS_DMA_COULD_BOUNCE) != 0))
 		newtag->bounce_flags |= BUS_DMA_COULD_BOUNCE;
 
 	if (newtag->common.lowaddr < ptoa((vm_paddr_t)Maxmem) ||
 	    newtag->common.alignment > 1)
 		newtag->bounce_flags |= BUS_DMA_COULD_BOUNCE;
 
 	if ((newtag->bounce_flags & BUS_DMA_COULD_BOUNCE) != 0 &&
 	    (flags & BUS_DMA_ALLOCNOW) != 0)
 		error = bounce_bus_dma_zone_setup(newtag);
 	else
 		error = 0;
 	
 	if (error != 0)
 		free(newtag, M_DEVBUF);
 	else
 		*dmat = newtag;
 	CTR4(KTR_BUSDMA, "%s returned tag %p tag flags 0x%x error %d",
 	    __func__, newtag, (newtag != NULL ? newtag->common.flags : 0),
 	    error);
 	return (error);
 }
 
 /*
  * Update the domain for the tag.  We may need to reallocate the zone and
  * bounce pages.
  */ 
 static int
 bounce_bus_dma_tag_set_domain(bus_dma_tag_t dmat)
 {
 
 	KASSERT(dmat->map_count == 0,
 	    ("bounce_bus_dma_tag_set_domain:  Domain set after use.\n"));
 	if ((dmat->bounce_flags & BUS_DMA_COULD_BOUNCE) == 0 ||
 	    dmat->bounce_zone == NULL)
 		return (0);
 	dmat->bounce_flags &= ~BUS_DMA_MIN_ALLOC_COMP;
 	return (bounce_bus_dma_zone_setup(dmat));
 }
 
 static int
 bounce_bus_dma_tag_destroy(bus_dma_tag_t dmat)
 {
 	bus_dma_tag_t dmat_copy, parent;
 	int error;
 
 	error = 0;
 	dmat_copy = dmat;
 
 	if (dmat != NULL) {
 		if (dmat->map_count != 0) {
 			error = EBUSY;
 			goto out;
 		}
 		while (dmat != NULL) {
 			parent = (bus_dma_tag_t)dmat->common.parent;
 			atomic_subtract_int(&dmat->common.ref_count, 1);
 			if (dmat->common.ref_count == 0) {
 				if (dmat->segments != NULL)
 					free_domain(dmat->segments, M_DEVBUF);
 				free(dmat, M_DEVBUF);
 				/*
 				 * Last reference count, so
 				 * release our reference
 				 * count on our parent.
 				 */
 				dmat = parent;
 			} else
 				dmat = NULL;
 		}
 	}
 out:
 	CTR3(KTR_BUSDMA, "%s tag %p error %d", __func__, dmat_copy, error);
 	return (error);
 }
 
 /*
  * Allocate a handle for mapping from kva/uva/physical
  * address space into bus device space.
  */
 static int
 bounce_bus_dmamap_create(bus_dma_tag_t dmat, int flags, bus_dmamap_t *mapp)
 {
 	struct bounce_zone *bz;
 	int error, maxpages, pages;
 
-	WITNESS_WARN(WARN_GIANTOK | WARN_SLEEPOK, NULL, "%s", __func__);
-
 	error = 0;
 
 	if (dmat->segments == NULL) {
 		dmat->segments = (bus_dma_segment_t *)malloc_domainset(
 		    sizeof(bus_dma_segment_t) * dmat->common.nsegments,
 		    M_DEVBUF, DOMAINSET_PREF(dmat->common.domain), M_NOWAIT);
 		if (dmat->segments == NULL) {
 			CTR3(KTR_BUSDMA, "%s: tag %p error %d",
 			    __func__, dmat, ENOMEM);
 			return (ENOMEM);
 		}
 	}
 
 	/*
 	 * Bouncing might be required if the driver asks for an active
 	 * exclusion region, a data alignment that is stricter than 1, and/or
 	 * an active address boundary.
 	 */
 	if ((dmat->bounce_flags & BUS_DMA_COULD_BOUNCE) != 0) {
 		/* Must bounce */
 		if (dmat->bounce_zone == NULL) {
 			if ((error = alloc_bounce_zone(dmat)) != 0)
 				return (error);
 		}
 		bz = dmat->bounce_zone;
 
 		*mapp = (bus_dmamap_t)malloc_domainset(sizeof(**mapp), M_DEVBUF,
 		    DOMAINSET_PREF(dmat->common.domain), M_NOWAIT | M_ZERO);
 		if (*mapp == NULL) {
 			CTR3(KTR_BUSDMA, "%s: tag %p error %d",
 			    __func__, dmat, ENOMEM);
 			return (ENOMEM);
 		}
 
 		/* Initialize the new map */
 		STAILQ_INIT(&((*mapp)->bpages));
 
 		/*
 		 * Attempt to add pages to our pool on a per-instance
 		 * basis up to a sane limit.
 		 */
 		if (dmat->common.alignment > 1)
 			maxpages = MAX_BPAGES;
 		else
 			maxpages = MIN(MAX_BPAGES, Maxmem -
 			    atop(dmat->common.lowaddr));
 		if ((dmat->bounce_flags & BUS_DMA_MIN_ALLOC_COMP) == 0 ||
 		    (bz->map_count > 0 && bz->total_bpages < maxpages)) {
 			pages = MAX(atop(dmat->common.maxsize), 1);
 			pages = MIN(maxpages - bz->total_bpages, pages);
 			pages = MAX(pages, 1);
 			if (alloc_bounce_pages(dmat, pages) < pages)
 				error = ENOMEM;
 			if ((dmat->bounce_flags & BUS_DMA_MIN_ALLOC_COMP)
 			    == 0) {
 				if (error == 0) {
 					dmat->bounce_flags |=
 					    BUS_DMA_MIN_ALLOC_COMP;
 				}
 			} else
 				error = 0;
 		}
 		bz->map_count++;
 	} else {
 		*mapp = NULL;
 	}
 	if (error == 0)
 		dmat->map_count++;
 	CTR4(KTR_BUSDMA, "%s: tag %p tag flags 0x%x error %d",
 	    __func__, dmat, dmat->common.flags, error);
 	return (error);
 }
 
 /*
  * Destroy a handle for mapping from kva/uva/physical
  * address space into bus device space.
  */
 static int
 bounce_bus_dmamap_destroy(bus_dma_tag_t dmat, bus_dmamap_t map)
 {
 
 	if (map != NULL && map != &nobounce_dmamap) {
 		if (STAILQ_FIRST(&map->bpages) != NULL) {
 			CTR3(KTR_BUSDMA, "%s: tag %p error %d",
 			    __func__, dmat, EBUSY);
 			return (EBUSY);
 		}
 		if (dmat->bounce_zone)
 			dmat->bounce_zone->map_count--;
 		free_domain(map, M_DEVBUF);
 	}
 	dmat->map_count--;
 	CTR2(KTR_BUSDMA, "%s: tag %p error 0", __func__, dmat);
 	return (0);
 }
 
 
 /*
  * Allocate a piece of memory that can be efficiently mapped into
  * bus device space based on the constraints lited in the dma tag.
  * A dmamap to for use with dmamap_load is also allocated.
  */
 static int
 bounce_bus_dmamem_alloc(bus_dma_tag_t dmat, void** vaddr, int flags,
     bus_dmamap_t *mapp)
 {
 	vm_memattr_t attr;
 	int mflags;
 
 	WITNESS_WARN(WARN_GIANTOK | WARN_SLEEPOK, NULL, "%s", __func__);
 
 	if (flags & BUS_DMA_NOWAIT)
 		mflags = M_NOWAIT;
 	else
 		mflags = M_WAITOK;
 
 	/* If we succeed, no mapping/bouncing will be required */
 	*mapp = NULL;
 
 	if (dmat->segments == NULL) {
 		dmat->segments = (bus_dma_segment_t *)malloc_domainset(
 		    sizeof(bus_dma_segment_t) * dmat->common.nsegments,
 		    M_DEVBUF, DOMAINSET_PREF(dmat->common.domain), mflags);
 		if (dmat->segments == NULL) {
 			CTR4(KTR_BUSDMA, "%s: tag %p tag flags 0x%x error %d",
 			    __func__, dmat, dmat->common.flags, ENOMEM);
 			return (ENOMEM);
 		}
 	}
 	if (flags & BUS_DMA_ZERO)
 		mflags |= M_ZERO;
 	if (flags & BUS_DMA_NOCACHE)
 		attr = VM_MEMATTR_UNCACHEABLE;
 	else
 		attr = VM_MEMATTR_DEFAULT;
 
 	/*
 	 * Allocate the buffer from the malloc(9) allocator if...
 	 *  - It's small enough to fit into a single power of two sized bucket.
 	 *  - The alignment is less than or equal to the maximum size
 	 *  - The low address requirement is fulfilled.
 	 * else allocate non-contiguous pages if...
 	 *  - The page count that could get allocated doesn't exceed
 	 *    nsegments also when the maximum segment size is less
 	 *    than PAGE_SIZE.
 	 *  - The alignment constraint isn't larger than a page boundary.
 	 *  - There are no boundary-crossing constraints.
 	 * else allocate a block of contiguous pages because one or more of the
 	 * constraints is something that only the contig allocator can fulfill.
 	 *
 	 * NOTE: The (dmat->common.alignment <= dmat->maxsize) check
 	 * below is just a quick hack. The exact alignment guarantees
 	 * of malloc(9) need to be nailed down, and the code below
 	 * should be rewritten to take that into account.
 	 *
 	 * In the meantime warn the user if malloc gets it wrong.
 	 */
 	if (dmat->common.maxsize <= PAGE_SIZE &&
 	    dmat->common.alignment <= dmat->common.maxsize &&
 	    dmat->common.lowaddr >= ptoa((vm_paddr_t)Maxmem) &&
 	    attr == VM_MEMATTR_DEFAULT) {
 		*vaddr = malloc_domainset(dmat->common.maxsize, M_DEVBUF,
 		    DOMAINSET_PREF(dmat->common.domain), mflags);
 	} else if (dmat->common.nsegments >=
 	    howmany(dmat->common.maxsize, MIN(dmat->common.maxsegsz,
 	    PAGE_SIZE)) &&
 	    dmat->common.alignment <= PAGE_SIZE &&
 	    (dmat->common.boundary % PAGE_SIZE) == 0) {
 		/* Page-based multi-segment allocations allowed */
 		*vaddr = (void *)kmem_alloc_attr_domainset(
 		    DOMAINSET_PREF(dmat->common.domain), dmat->common.maxsize,
 		    mflags, 0ul, dmat->common.lowaddr, attr);
 		dmat->bounce_flags |= BUS_DMA_KMEM_ALLOC;
 	} else {
 		*vaddr = (void *)kmem_alloc_contig_domainset(
 		    DOMAINSET_PREF(dmat->common.domain), dmat->common.maxsize,
 		    mflags, 0ul, dmat->common.lowaddr,
 		    dmat->common.alignment != 0 ? dmat->common.alignment : 1ul,
 		    dmat->common.boundary, attr);
 		dmat->bounce_flags |= BUS_DMA_KMEM_ALLOC;
 	}
 	if (*vaddr == NULL) {
 		CTR4(KTR_BUSDMA, "%s: tag %p tag flags 0x%x error %d",
 		    __func__, dmat, dmat->common.flags, ENOMEM);
 		return (ENOMEM);
 	} else if (vtophys(*vaddr) & (dmat->common.alignment - 1)) {
 		printf("bus_dmamem_alloc failed to align memory properly.\n");
 	}
 	CTR4(KTR_BUSDMA, "%s: tag %p tag flags 0x%x error %d",
 	    __func__, dmat, dmat->common.flags, 0);
 	return (0);
 }
 
 /*
  * Free a piece of memory and it's allociated dmamap, that was allocated
  * via bus_dmamem_alloc.  Make the same choice for free/contigfree.
  */
 static void
 bounce_bus_dmamem_free(bus_dma_tag_t dmat, void *vaddr, bus_dmamap_t map)
 {
 	/*
 	 * dmamem does not need to be bounced, so the map should be
 	 * NULL and the BUS_DMA_KMEM_ALLOC flag cleared if malloc()
 	 * was used and set if kmem_alloc_contig() was used.
 	 */
 	if (map != NULL)
 		panic("bus_dmamem_free: Invalid map freed\n");
 	if ((dmat->bounce_flags & BUS_DMA_KMEM_ALLOC) == 0)
 		free_domain(vaddr, M_DEVBUF);
 	else
 		kmem_free((vm_offset_t)vaddr, dmat->common.maxsize);
 	CTR3(KTR_BUSDMA, "%s: tag %p flags 0x%x", __func__, dmat,
 	    dmat->bounce_flags);
 }
 
 static void
 _bus_dmamap_count_phys(bus_dma_tag_t dmat, bus_dmamap_t map, vm_paddr_t buf,
     bus_size_t buflen, int flags)
 {
 	vm_paddr_t curaddr;
 	bus_size_t sgsize;
 
 	if (map != &nobounce_dmamap && map->pagesneeded == 0) {
 		/*
 		 * Count the number of bounce pages
 		 * needed in order to complete this transfer
 		 */
 		curaddr = buf;
 		while (buflen != 0) {
 			sgsize = MIN(buflen, dmat->common.maxsegsz);
 			if (bus_dma_run_filter(&dmat->common, curaddr)) {
 				sgsize = MIN(sgsize,
 				    PAGE_SIZE - (curaddr & PAGE_MASK));
 				map->pagesneeded++;
 			}
 			curaddr += sgsize;
 			buflen -= sgsize;
 		}
 		CTR1(KTR_BUSDMA, "pagesneeded= %d\n", map->pagesneeded);
 	}
 }
 
 static void
 _bus_dmamap_count_pages(bus_dma_tag_t dmat, bus_dmamap_t map, pmap_t pmap,
     void *buf, bus_size_t buflen, int flags)
 {
 	vm_offset_t vaddr;
 	vm_offset_t vendaddr;
 	vm_paddr_t paddr;
 	bus_size_t sg_len;
 
 	if (map != &nobounce_dmamap && map->pagesneeded == 0) {
 		CTR4(KTR_BUSDMA, "lowaddr= %d Maxmem= %d, boundary= %d, "
 		    "alignment= %d", dmat->common.lowaddr,
 		    ptoa((vm_paddr_t)Maxmem),
 		    dmat->common.boundary, dmat->common.alignment);
 		CTR3(KTR_BUSDMA, "map= %p, nobouncemap= %p, pagesneeded= %d",
 		    map, &nobounce_dmamap, map->pagesneeded);
 		/*
 		 * Count the number of bounce pages
 		 * needed in order to complete this transfer
 		 */
 		vaddr = (vm_offset_t)buf;
 		vendaddr = (vm_offset_t)buf + buflen;
 
 		while (vaddr < vendaddr) {
 			sg_len = PAGE_SIZE - ((vm_offset_t)vaddr & PAGE_MASK);
 			if (pmap == kernel_pmap)
 				paddr = pmap_kextract(vaddr);
 			else
 				paddr = pmap_extract(pmap, vaddr);
 			if (bus_dma_run_filter(&dmat->common, paddr) != 0) {
 				sg_len = roundup2(sg_len,
 				    dmat->common.alignment);
 				map->pagesneeded++;
 			}
 			vaddr += sg_len;
 		}
 		CTR1(KTR_BUSDMA, "pagesneeded= %d\n", map->pagesneeded);
 	}
 }
 
 static void
 _bus_dmamap_count_ma(bus_dma_tag_t dmat, bus_dmamap_t map, struct vm_page **ma,
     int ma_offs, bus_size_t buflen, int flags)
 {
 	bus_size_t sg_len, max_sgsize;
 	int page_index;
 	vm_paddr_t paddr;
 
 	if (map != &nobounce_dmamap && map->pagesneeded == 0) {
 		CTR4(KTR_BUSDMA, "lowaddr= %d Maxmem= %d, boundary= %d, "
 		    "alignment= %d", dmat->common.lowaddr,
 		    ptoa((vm_paddr_t)Maxmem),
 		    dmat->common.boundary, dmat->common.alignment);
 		CTR3(KTR_BUSDMA, "map= %p, nobouncemap= %p, pagesneeded= %d",
 		    map, &nobounce_dmamap, map->pagesneeded);
 
 		/*
 		 * Count the number of bounce pages
 		 * needed in order to complete this transfer
 		 */
 		page_index = 0;
 		while (buflen > 0) {
 			paddr = VM_PAGE_TO_PHYS(ma[page_index]) + ma_offs;
 			sg_len = PAGE_SIZE - ma_offs;
 			max_sgsize = MIN(buflen, dmat->common.maxsegsz);
 			sg_len = MIN(sg_len, max_sgsize);
 			if (bus_dma_run_filter(&dmat->common, paddr) != 0) {
 				sg_len = roundup2(sg_len,
 				    dmat->common.alignment);
 				sg_len = MIN(sg_len, max_sgsize);
 				KASSERT((sg_len & (dmat->common.alignment - 1))
 				    == 0, ("Segment size is not aligned"));
 				map->pagesneeded++;
 			}
 			if (((ma_offs + sg_len) & ~PAGE_MASK) != 0)
 				page_index++;
 			ma_offs = (ma_offs + sg_len) & PAGE_MASK;
 			KASSERT(buflen >= sg_len,
 			    ("Segment length overruns original buffer"));
 			buflen -= sg_len;
 		}
 		CTR1(KTR_BUSDMA, "pagesneeded= %d\n", map->pagesneeded);
 	}
 }
 
 static int
 _bus_dmamap_reserve_pages(bus_dma_tag_t dmat, bus_dmamap_t map, int flags)
 {
 
 	/* Reserve Necessary Bounce Pages */
 	mtx_lock(&bounce_lock);
 	if (flags & BUS_DMA_NOWAIT) {
 		if (reserve_bounce_pages(dmat, map, 0) != 0) {
 			mtx_unlock(&bounce_lock);
 			return (ENOMEM);
 		}
 	} else {
 		if (reserve_bounce_pages(dmat, map, 1) != 0) {
 			/* Queue us for resources */
 			STAILQ_INSERT_TAIL(&bounce_map_waitinglist, map, links);
 			mtx_unlock(&bounce_lock);
 			return (EINPROGRESS);
 		}
 	}
 	mtx_unlock(&bounce_lock);
 
 	return (0);
 }
 
 /*
  * Add a single contiguous physical range to the segment list.
  */
 static int
 _bus_dmamap_addseg(bus_dma_tag_t dmat, bus_dmamap_t map, vm_paddr_t curaddr,
     bus_size_t sgsize, bus_dma_segment_t *segs, int *segp)
 {
 	bus_addr_t baddr, bmask;
 	int seg;
 
 	KASSERT(curaddr <= BUS_SPACE_MAXADDR,
 	    ("ds_addr %#jx > BUS_SPACE_MAXADDR %#jx; dmat %p fl %#x low %#jx "
 	    "hi %#jx",
 	    (uintmax_t)curaddr, (uintmax_t)BUS_SPACE_MAXADDR,
 	    dmat, dmat->bounce_flags, (uintmax_t)dmat->common.lowaddr,
 	    (uintmax_t)dmat->common.highaddr));
 
 	/*
 	 * Make sure we don't cross any boundaries.
 	 */
 	bmask = ~(dmat->common.boundary - 1);
 	if (dmat->common.boundary > 0) {
 		baddr = (curaddr + dmat->common.boundary) & bmask;
 		if (sgsize > (baddr - curaddr))
 			sgsize = (baddr - curaddr);
 	}
 
 	/*
 	 * Insert chunk into a segment, coalescing with
 	 * previous segment if possible.
 	 */
 	seg = *segp;
 	if (seg == -1) {
 		seg = 0;
 		segs[seg].ds_addr = curaddr;
 		segs[seg].ds_len = sgsize;
 	} else {
 		if (curaddr == segs[seg].ds_addr + segs[seg].ds_len &&
 		    (segs[seg].ds_len + sgsize) <= dmat->common.maxsegsz &&
 		    (dmat->common.boundary == 0 ||
 		     (segs[seg].ds_addr & bmask) == (curaddr & bmask)))
 			segs[seg].ds_len += sgsize;
 		else {
 			if (++seg >= dmat->common.nsegments)
 				return (0);
 			segs[seg].ds_addr = curaddr;
 			segs[seg].ds_len = sgsize;
 		}
 	}
 	*segp = seg;
 	return (sgsize);
 }
 
 /*
  * Utility function to load a physical buffer.  segp contains
  * the starting segment on entrace, and the ending segment on exit.
  */
 static int
 bounce_bus_dmamap_load_phys(bus_dma_tag_t dmat, bus_dmamap_t map,
     vm_paddr_t buf, bus_size_t buflen, int flags, bus_dma_segment_t *segs,
     int *segp)
 {
 	bus_size_t sgsize;
 	vm_paddr_t curaddr;
 	int error;
 
 	if (map == NULL)
 		map = &nobounce_dmamap;
 
 	if (segs == NULL)
 		segs = dmat->segments;
 
 	if ((dmat->bounce_flags & BUS_DMA_COULD_BOUNCE) != 0) {
 		_bus_dmamap_count_phys(dmat, map, buf, buflen, flags);
 		if (map->pagesneeded != 0) {
 			error = _bus_dmamap_reserve_pages(dmat, map, flags);
 			if (error)
 				return (error);
 		}
 	}
 
 	while (buflen > 0) {
 		curaddr = buf;
 		sgsize = MIN(buflen, dmat->common.maxsegsz);
 		if ((dmat->bounce_flags & BUS_DMA_COULD_BOUNCE) != 0 &&
 		    map->pagesneeded != 0 &&
 		    bus_dma_run_filter(&dmat->common, curaddr)) {
 			sgsize = MIN(sgsize, PAGE_SIZE - (curaddr & PAGE_MASK));
 			curaddr = add_bounce_page(dmat, map, 0, curaddr, 0,
 			    sgsize);
 		}
 		sgsize = _bus_dmamap_addseg(dmat, map, curaddr, sgsize, segs,
 		    segp);
 		if (sgsize == 0)
 			break;
 		buf += sgsize;
 		buflen -= sgsize;
 	}
 
 	/*
 	 * Did we fit?
 	 */
 	return (buflen != 0 ? EFBIG : 0); /* XXX better return value here? */
 }
 
 /*
  * Utility function to load a linear buffer.  segp contains
  * the starting segment on entrace, and the ending segment on exit.
  */
 static int
 bounce_bus_dmamap_load_buffer(bus_dma_tag_t dmat, bus_dmamap_t map, void *buf,
     bus_size_t buflen, pmap_t pmap, int flags, bus_dma_segment_t *segs,
     int *segp)
 {
 	bus_size_t sgsize, max_sgsize;
 	vm_paddr_t curaddr;
 	vm_offset_t kvaddr, vaddr;
 	int error;
 
 	if (map == NULL)
 		map = &nobounce_dmamap;
 
 	if (segs == NULL)
 		segs = dmat->segments;
 
 	if ((dmat->bounce_flags & BUS_DMA_COULD_BOUNCE) != 0) {
 		_bus_dmamap_count_pages(dmat, map, pmap, buf, buflen, flags);
 		if (map->pagesneeded != 0) {
 			error = _bus_dmamap_reserve_pages(dmat, map, flags);
 			if (error)
 				return (error);
 		}
 	}
 
 	vaddr = (vm_offset_t)buf;
 	while (buflen > 0) {
 		/*
 		 * Get the physical address for this segment.
 		 */
 		if (pmap == kernel_pmap) {
 			curaddr = pmap_kextract(vaddr);
 			kvaddr = vaddr;
 		} else {
 			curaddr = pmap_extract(pmap, vaddr);
 			kvaddr = 0;
 		}
 
 		/*
 		 * Compute the segment size, and adjust counts.
 		 */
 		max_sgsize = MIN(buflen, dmat->common.maxsegsz);
 		sgsize = PAGE_SIZE - (curaddr & PAGE_MASK);
 		if ((dmat->bounce_flags & BUS_DMA_COULD_BOUNCE) != 0 &&
 		    map->pagesneeded != 0 &&
 		    bus_dma_run_filter(&dmat->common, curaddr)) {
 			sgsize = roundup2(sgsize, dmat->common.alignment);
 			sgsize = MIN(sgsize, max_sgsize);
 			curaddr = add_bounce_page(dmat, map, kvaddr, curaddr, 0,
 			    sgsize);
 		} else {
 			sgsize = MIN(sgsize, max_sgsize);
 		}
 		sgsize = _bus_dmamap_addseg(dmat, map, curaddr, sgsize, segs,
 		    segp);
 		if (sgsize == 0)
 			break;
 		vaddr += sgsize;
 		buflen -= sgsize;
 	}
 
 	/*
 	 * Did we fit?
 	 */
 	return (buflen != 0 ? EFBIG : 0); /* XXX better return value here? */
 }
 
 static int
 bounce_bus_dmamap_load_ma(bus_dma_tag_t dmat, bus_dmamap_t map,
     struct vm_page **ma, bus_size_t buflen, int ma_offs, int flags,
     bus_dma_segment_t *segs, int *segp)
 {
 	vm_paddr_t paddr, next_paddr;
 	int error, page_index;
 	bus_size_t sgsize, max_sgsize;
 
 	if (dmat->common.flags & BUS_DMA_KEEP_PG_OFFSET) {
 		/*
 		 * If we have to keep the offset of each page this function
 		 * is not suitable, switch back to bus_dmamap_load_ma_triv
 		 * which is going to do the right thing in this case.
 		 */
 		error = bus_dmamap_load_ma_triv(dmat, map, ma, buflen, ma_offs,
 		    flags, segs, segp);
 		return (error);
 	}
 
 	if (map == NULL)
 		map = &nobounce_dmamap;
 
 	if (segs == NULL)
 		segs = dmat->segments;
 
 	if ((dmat->bounce_flags & BUS_DMA_COULD_BOUNCE) != 0) {
 		_bus_dmamap_count_ma(dmat, map, ma, ma_offs, buflen, flags);
 		if (map->pagesneeded != 0) {
 			error = _bus_dmamap_reserve_pages(dmat, map, flags);
 			if (error)
 				return (error);
 		}
 	}
 
 	page_index = 0;
 	while (buflen > 0) {
 		/*
 		 * Compute the segment size, and adjust counts.
 		 */
 		paddr = VM_PAGE_TO_PHYS(ma[page_index]) + ma_offs;
 		max_sgsize = MIN(buflen, dmat->common.maxsegsz);
 		sgsize = PAGE_SIZE - ma_offs;
 		if ((dmat->bounce_flags & BUS_DMA_COULD_BOUNCE) != 0 &&
 		    map->pagesneeded != 0 &&
 		    bus_dma_run_filter(&dmat->common, paddr)) {
 			sgsize = roundup2(sgsize, dmat->common.alignment);
 			sgsize = MIN(sgsize, max_sgsize);
 			KASSERT((sgsize & (dmat->common.alignment - 1)) == 0,
 			    ("Segment size is not aligned"));
 			/*
 			 * Check if two pages of the user provided buffer
 			 * are used.
 			 */
 			if ((ma_offs + sgsize) > PAGE_SIZE)
 				next_paddr =
 				    VM_PAGE_TO_PHYS(ma[page_index + 1]);
 			else
 				next_paddr = 0;
 			paddr = add_bounce_page(dmat, map, 0, paddr,
 			    next_paddr, sgsize);
 		} else {
 			sgsize = MIN(sgsize, max_sgsize);
 		}
 		sgsize = _bus_dmamap_addseg(dmat, map, paddr, sgsize, segs,
 		    segp);
 		if (sgsize == 0)
 			break;
 		KASSERT(buflen >= sgsize,
 		    ("Segment length overruns original buffer"));
 		buflen -= sgsize;
 		if (((ma_offs + sgsize) & ~PAGE_MASK) != 0)
 			page_index++;
 		ma_offs = (ma_offs + sgsize) & PAGE_MASK;
 	}
 
 	/*
 	 * Did we fit?
 	 */
 	return (buflen != 0 ? EFBIG : 0); /* XXX better return value here? */
 }
 
 static void
 bounce_bus_dmamap_waitok(bus_dma_tag_t dmat, bus_dmamap_t map,
     struct memdesc *mem, bus_dmamap_callback_t *callback, void *callback_arg)
 {
 
 	if (map == NULL)
 		return;
 	map->mem = *mem;
 	map->dmat = dmat;
 	map->callback = callback;
 	map->callback_arg = callback_arg;
 }
 
 static bus_dma_segment_t *
 bounce_bus_dmamap_complete(bus_dma_tag_t dmat, bus_dmamap_t map,
     bus_dma_segment_t *segs, int nsegs, int error)
 {
 
 	if (segs == NULL)
 		segs = dmat->segments;
 	return (segs);
 }
 
 /*
  * Release the mapping held by map.
  */
 static void
 bounce_bus_dmamap_unload(bus_dma_tag_t dmat, bus_dmamap_t map)
 {
 	struct bounce_page *bpage;
 
 	if (map == NULL)
 		return;
 
 	while ((bpage = STAILQ_FIRST(&map->bpages)) != NULL) {
 		STAILQ_REMOVE_HEAD(&map->bpages, links);
 		free_bounce_page(dmat, bpage);
 	}
 }
 
 static void
 bounce_bus_dmamap_sync(bus_dma_tag_t dmat, bus_dmamap_t map,
     bus_dmasync_op_t op)
 {
 	struct bounce_page *bpage;
 	vm_offset_t datavaddr, tempvaddr;
 	bus_size_t datacount1, datacount2;
 
 	if (map == NULL || (bpage = STAILQ_FIRST(&map->bpages)) == NULL)
 		return;
 
 	/*
 	 * Handle data bouncing.  We might also want to add support for
 	 * invalidating the caches on broken hardware.
 	 */
 	CTR4(KTR_BUSDMA, "%s: tag %p tag flags 0x%x op 0x%x "
 	    "performing bounce", __func__, dmat, dmat->common.flags, op);
 
 	if ((op & BUS_DMASYNC_PREWRITE) != 0) {
 		while (bpage != NULL) {
 			tempvaddr = 0;
 			datavaddr = bpage->datavaddr;
 			datacount1 = bpage->datacount;
 			if (datavaddr == 0) {
 				tempvaddr =
 				    pmap_quick_enter_page(bpage->datapage[0]);
 				datavaddr = tempvaddr | bpage->dataoffs;
 				datacount1 = min(PAGE_SIZE - bpage->dataoffs,
 				    datacount1);
 			}
 
 			bcopy((void *)datavaddr,
 			    (void *)bpage->vaddr, datacount1);
 
 			if (tempvaddr != 0)
 				pmap_quick_remove_page(tempvaddr);
 
 			if (bpage->datapage[1] == 0) {
 				KASSERT(datacount1 == bpage->datacount,
 		("Mismatch between data size and provided memory space"));
 				goto next_w;
 			}
 
 			/*
 			 * We are dealing with an unmapped buffer that expands
 			 * over two pages.
 			 */
 			datavaddr = pmap_quick_enter_page(bpage->datapage[1]);
 			datacount2 = bpage->datacount - datacount1;
 			bcopy((void *)datavaddr,
 			    (void *)(bpage->vaddr + datacount1), datacount2);
 			pmap_quick_remove_page(datavaddr);
 
 next_w:
 			bpage = STAILQ_NEXT(bpage, links);
 		}
 		dmat->bounce_zone->total_bounced++;
 	}
 
 	if ((op & BUS_DMASYNC_POSTREAD) != 0) {
 		while (bpage != NULL) {
 			tempvaddr = 0;
 			datavaddr = bpage->datavaddr;
 			datacount1 = bpage->datacount;
 			if (datavaddr == 0) {
 				tempvaddr =
 				    pmap_quick_enter_page(bpage->datapage[0]);
 				datavaddr = tempvaddr | bpage->dataoffs;
 				datacount1 = min(PAGE_SIZE - bpage->dataoffs,
 				    datacount1);
 			}
 
 			bcopy((void *)bpage->vaddr, (void *)datavaddr,
 			    datacount1);
 
 			if (tempvaddr != 0)
 				pmap_quick_remove_page(tempvaddr);
 
 			if (bpage->datapage[1] == 0) {
 				KASSERT(datacount1 == bpage->datacount,
 		("Mismatch between data size and provided memory space"));
 				goto next_r;
 			}
 
 			/*
 			 * We are dealing with an unmapped buffer that expands
 			 * over two pages.
 			 */
 			datavaddr = pmap_quick_enter_page(bpage->datapage[1]);
 			datacount2 = bpage->datacount - datacount1;
 			bcopy((void *)(bpage->vaddr + datacount1),
 			    (void *)datavaddr, datacount2);
 			pmap_quick_remove_page(datavaddr);
 
 next_r:
 			bpage = STAILQ_NEXT(bpage, links);
 		}
 		dmat->bounce_zone->total_bounced++;
 	}
 }
 
 static void
 init_bounce_pages(void *dummy __unused)
 {
 
 	total_bpages = 0;
 	STAILQ_INIT(&bounce_zone_list);
 	STAILQ_INIT(&bounce_map_waitinglist);
 	STAILQ_INIT(&bounce_map_callbacklist);
 	mtx_init(&bounce_lock, "bounce pages lock", NULL, MTX_DEF);
 }
 SYSINIT(bpages, SI_SUB_LOCK, SI_ORDER_ANY, init_bounce_pages, NULL);
 
 static struct sysctl_ctx_list *
 busdma_sysctl_tree(struct bounce_zone *bz)
 {
 
 	return (&bz->sysctl_tree);
 }
 
 static struct sysctl_oid *
 busdma_sysctl_tree_top(struct bounce_zone *bz)
 {
 
 	return (bz->sysctl_tree_top);
 }
 
 static int
 alloc_bounce_zone(bus_dma_tag_t dmat)
 {
 	struct bounce_zone *bz;
 
 	/* Check to see if we already have a suitable zone */
 	STAILQ_FOREACH(bz, &bounce_zone_list, links) {
 		if (dmat->common.alignment <= bz->alignment &&
 		    dmat->common.lowaddr >= bz->lowaddr &&
 		    dmat->common.domain == bz->domain) {
 			dmat->bounce_zone = bz;
 			return (0);
 		}
 	}
 
 	if ((bz = (struct bounce_zone *)malloc(sizeof(*bz), M_DEVBUF,
 	    M_NOWAIT | M_ZERO)) == NULL)
 		return (ENOMEM);
 
 	STAILQ_INIT(&bz->bounce_page_list);
 	bz->free_bpages = 0;
 	bz->reserved_bpages = 0;
 	bz->active_bpages = 0;
 	bz->lowaddr = dmat->common.lowaddr;
 	bz->alignment = MAX(dmat->common.alignment, PAGE_SIZE);
 	bz->map_count = 0;
 	bz->domain = dmat->common.domain;
 	snprintf(bz->zoneid, 8, "zone%d", busdma_zonecount);
 	busdma_zonecount++;
 	snprintf(bz->lowaddrid, 18, "%#jx", (uintmax_t)bz->lowaddr);
 	STAILQ_INSERT_TAIL(&bounce_zone_list, bz, links);
 	dmat->bounce_zone = bz;
 
 	sysctl_ctx_init(&bz->sysctl_tree);
 	bz->sysctl_tree_top = SYSCTL_ADD_NODE(&bz->sysctl_tree,
 	    SYSCTL_STATIC_CHILDREN(_hw_busdma), OID_AUTO, bz->zoneid,
 	    CTLFLAG_RD, 0, "");
 	if (bz->sysctl_tree_top == NULL) {
 		sysctl_ctx_free(&bz->sysctl_tree);
 		return (0);	/* XXX error code? */
 	}
 
 	SYSCTL_ADD_INT(busdma_sysctl_tree(bz),
 	    SYSCTL_CHILDREN(busdma_sysctl_tree_top(bz)), OID_AUTO,
 	    "total_bpages", CTLFLAG_RD, &bz->total_bpages, 0,
 	    "Total bounce pages");
 	SYSCTL_ADD_INT(busdma_sysctl_tree(bz),
 	    SYSCTL_CHILDREN(busdma_sysctl_tree_top(bz)), OID_AUTO,
 	    "free_bpages", CTLFLAG_RD, &bz->free_bpages, 0,
 	    "Free bounce pages");
 	SYSCTL_ADD_INT(busdma_sysctl_tree(bz),
 	    SYSCTL_CHILDREN(busdma_sysctl_tree_top(bz)), OID_AUTO,
 	    "reserved_bpages", CTLFLAG_RD, &bz->reserved_bpages, 0,
 	    "Reserved bounce pages");
 	SYSCTL_ADD_INT(busdma_sysctl_tree(bz),
 	    SYSCTL_CHILDREN(busdma_sysctl_tree_top(bz)), OID_AUTO,
 	    "active_bpages", CTLFLAG_RD, &bz->active_bpages, 0,
 	    "Active bounce pages");
 	SYSCTL_ADD_INT(busdma_sysctl_tree(bz),
 	    SYSCTL_CHILDREN(busdma_sysctl_tree_top(bz)), OID_AUTO,
 	    "total_bounced", CTLFLAG_RD, &bz->total_bounced, 0,
 	    "Total bounce requests");
 	SYSCTL_ADD_INT(busdma_sysctl_tree(bz),
 	    SYSCTL_CHILDREN(busdma_sysctl_tree_top(bz)), OID_AUTO,
 	    "total_deferred", CTLFLAG_RD, &bz->total_deferred, 0,
 	    "Total bounce requests that were deferred");
 	SYSCTL_ADD_STRING(busdma_sysctl_tree(bz),
 	    SYSCTL_CHILDREN(busdma_sysctl_tree_top(bz)), OID_AUTO,
 	    "lowaddr", CTLFLAG_RD, bz->lowaddrid, 0, "");
 	SYSCTL_ADD_UAUTO(busdma_sysctl_tree(bz),
 	    SYSCTL_CHILDREN(busdma_sysctl_tree_top(bz)), OID_AUTO,
 	    "alignment", CTLFLAG_RD, &bz->alignment, "");
 	SYSCTL_ADD_INT(busdma_sysctl_tree(bz),
 	    SYSCTL_CHILDREN(busdma_sysctl_tree_top(bz)), OID_AUTO,
 	    "domain", CTLFLAG_RD, &bz->domain, 0,
 	    "memory domain");
 
 	return (0);
 }
 
 static int
 alloc_bounce_pages(bus_dma_tag_t dmat, u_int numpages)
 {
 	struct bounce_zone *bz;
 	int count;
 
 	bz = dmat->bounce_zone;
 	count = 0;
 	while (numpages > 0) {
 		struct bounce_page *bpage;
 
 		bpage = malloc_domainset(sizeof(*bpage), M_DEVBUF,
 		    DOMAINSET_PREF(dmat->common.domain), M_NOWAIT | M_ZERO);
 
 		if (bpage == NULL)
 			break;
 		bpage->vaddr = (vm_offset_t)contigmalloc_domainset(PAGE_SIZE,
 		    M_DEVBUF, DOMAINSET_PREF(dmat->common.domain), M_NOWAIT,
 		    0ul, bz->lowaddr, PAGE_SIZE, 0);
 		if (bpage->vaddr == 0) {
 			free_domain(bpage, M_DEVBUF);
 			break;
 		}
 		bpage->busaddr = pmap_kextract(bpage->vaddr);
 		mtx_lock(&bounce_lock);
 		STAILQ_INSERT_TAIL(&bz->bounce_page_list, bpage, links);
 		total_bpages++;
 		bz->total_bpages++;
 		bz->free_bpages++;
 		mtx_unlock(&bounce_lock);
 		count++;
 		numpages--;
 	}
 	return (count);
 }
 
 static int
 reserve_bounce_pages(bus_dma_tag_t dmat, bus_dmamap_t map, int commit)
 {
 	struct bounce_zone *bz;
 	int pages;
 
 	mtx_assert(&bounce_lock, MA_OWNED);
 	bz = dmat->bounce_zone;
 	pages = MIN(bz->free_bpages, map->pagesneeded - map->pagesreserved);
 	if (commit == 0 && map->pagesneeded > (map->pagesreserved + pages))
 		return (map->pagesneeded - (map->pagesreserved + pages));
 	bz->free_bpages -= pages;
 	bz->reserved_bpages += pages;
 	map->pagesreserved += pages;
 	pages = map->pagesneeded - map->pagesreserved;
 
 	return (pages);
 }
 
 static bus_addr_t
 add_bounce_page(bus_dma_tag_t dmat, bus_dmamap_t map, vm_offset_t vaddr,
     vm_paddr_t addr1, vm_paddr_t addr2, bus_size_t size)
 {
 	struct bounce_zone *bz;
 	struct bounce_page *bpage;
 
 	KASSERT(dmat->bounce_zone != NULL, ("no bounce zone in dma tag"));
 	KASSERT(map != NULL && map != &nobounce_dmamap,
 	    ("add_bounce_page: bad map %p", map));
 
 	bz = dmat->bounce_zone;
 	if (map->pagesneeded == 0)
 		panic("add_bounce_page: map doesn't need any pages");
 	map->pagesneeded--;
 
 	if (map->pagesreserved == 0)
 		panic("add_bounce_page: map doesn't need any pages");
 	map->pagesreserved--;
 
 	mtx_lock(&bounce_lock);
 	bpage = STAILQ_FIRST(&bz->bounce_page_list);
 	if (bpage == NULL)
 		panic("add_bounce_page: free page list is empty");
 
 	STAILQ_REMOVE_HEAD(&bz->bounce_page_list, links);
 	bz->reserved_bpages--;
 	bz->active_bpages++;
 	mtx_unlock(&bounce_lock);
 
 	if (dmat->common.flags & BUS_DMA_KEEP_PG_OFFSET) {
 		/* Page offset needs to be preserved. */
 		bpage->vaddr |= addr1 & PAGE_MASK;
 		bpage->busaddr |= addr1 & PAGE_MASK;
 		KASSERT(addr2 == 0,
 	("Trying to bounce multiple pages with BUS_DMA_KEEP_PG_OFFSET"));
 	}
 	bpage->datavaddr = vaddr;
 	bpage->datapage[0] = PHYS_TO_VM_PAGE(addr1);
 	KASSERT((addr2 & PAGE_MASK) == 0, ("Second page is not aligned"));
 	bpage->datapage[1] = PHYS_TO_VM_PAGE(addr2);
 	bpage->dataoffs = addr1 & PAGE_MASK;
 	bpage->datacount = size;
 	STAILQ_INSERT_TAIL(&(map->bpages), bpage, links);
 	return (bpage->busaddr);
 }
 
 static void
 free_bounce_page(bus_dma_tag_t dmat, struct bounce_page *bpage)
 {
 	struct bus_dmamap *map;
 	struct bounce_zone *bz;
 
 	bz = dmat->bounce_zone;
 	bpage->datavaddr = 0;
 	bpage->datacount = 0;
 	if (dmat->common.flags & BUS_DMA_KEEP_PG_OFFSET) {
 		/*
 		 * Reset the bounce page to start at offset 0.  Other uses
 		 * of this bounce page may need to store a full page of
 		 * data and/or assume it starts on a page boundary.
 		 */
 		bpage->vaddr &= ~PAGE_MASK;
 		bpage->busaddr &= ~PAGE_MASK;
 	}
 
 	mtx_lock(&bounce_lock);
 	STAILQ_INSERT_HEAD(&bz->bounce_page_list, bpage, links);
 	bz->free_bpages++;
 	bz->active_bpages--;
 	if ((map = STAILQ_FIRST(&bounce_map_waitinglist)) != NULL) {
 		if (reserve_bounce_pages(map->dmat, map, 1) == 0) {
 			STAILQ_REMOVE_HEAD(&bounce_map_waitinglist, links);
 			STAILQ_INSERT_TAIL(&bounce_map_callbacklist,
 			    map, links);
 			busdma_swi_pending = 1;
 			bz->total_deferred++;
 			swi_sched(vm_ih, 0);
 		}
 	}
 	mtx_unlock(&bounce_lock);
 }
 
 void
 busdma_swi(void)
 {
 	bus_dma_tag_t dmat;
 	struct bus_dmamap *map;
 
 	mtx_lock(&bounce_lock);
 	while ((map = STAILQ_FIRST(&bounce_map_callbacklist)) != NULL) {
 		STAILQ_REMOVE_HEAD(&bounce_map_callbacklist, links);
 		mtx_unlock(&bounce_lock);
 		dmat = map->dmat;
 		(dmat->common.lockfunc)(dmat->common.lockfuncarg, BUS_DMA_LOCK);
 		bus_dmamap_load_mem(map->dmat, map, &map->mem,
 		    map->callback, map->callback_arg, BUS_DMA_WAITOK);
 		(dmat->common.lockfunc)(dmat->common.lockfuncarg,
 		    BUS_DMA_UNLOCK);
 		mtx_lock(&bounce_lock);
 	}
 	mtx_unlock(&bounce_lock);
 }
 
 struct bus_dma_impl bus_dma_bounce_impl = {
 	.tag_create = bounce_bus_dma_tag_create,
 	.tag_destroy = bounce_bus_dma_tag_destroy,
 	.tag_set_domain = bounce_bus_dma_tag_set_domain,
 	.map_create = bounce_bus_dmamap_create,
 	.map_destroy = bounce_bus_dmamap_destroy,
 	.mem_alloc = bounce_bus_dmamem_alloc,
 	.mem_free = bounce_bus_dmamem_free,
 	.load_phys = bounce_bus_dmamap_load_phys,
 	.load_buffer = bounce_bus_dmamap_load_buffer,
 	.load_ma = bounce_bus_dmamap_load_ma,
 	.map_waitok = bounce_bus_dmamap_waitok,
 	.map_complete = bounce_bus_dmamap_complete,
 	.map_unload = bounce_bus_dmamap_unload,
 	.map_sync = bounce_bus_dmamap_sync,
 };
Index: projects/runtime-coverage-v2/tools/regression/fsx/fsx.c
===================================================================
--- projects/runtime-coverage-v2/tools/regression/fsx/fsx.c	(revision 346924)
+++ projects/runtime-coverage-v2/tools/regression/fsx/fsx.c	(revision 346925)
@@ -1,1228 +1,1239 @@
 /*
  * Copyright (c) 1998-2001 Apple Computer, Inc. All rights reserved.
  *
  * @APPLE_LICENSE_HEADER_START@
  *
  * The contents of this file constitute Original Code as defined in and
  * are subject to the Apple Public Source License Version 2.0 (the
  * "License").  You may not use this file except in compliance with the
  * License.  Please obtain a copy of the License at
  * http://www.opensource.apple.com/apsl/ and read it before using this file.
  *
  * This Original Code and all software distributed under the License are
  * distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY KIND, EITHER
  * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
  * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
  * FITNESS FOR A PARTICULAR PURPOSE OR NON-INFRINGEMENT.  Please see the
  * License for the specific language governing rights and limitations
  * under the License.
  *
  * @APPLE_LICENSE_HEADER_END@
  *
  *	File:	fsx.c
  *	Author:	Avadis Tevanian, Jr.
  *
  *	File system exerciser. 
  *
  *	Rewrite and enhancements 1998-2001 Conrad Minshall -- conrad@mac.com
  *
  *	Various features from Joe Sokol, Pat Dirks, and Clark Warner.
  *
  *	Small changes to work under Linux -- davej@suse.de
  *
  *	Sundry porting patches from Guy Harris 12/2001
  *
  *	Checks for mmap last-page zero fill.
  *
  *	Updated license to APSL 2.0, 2004/7/27 - Jordan Hubbard
  *
  * $FreeBSD$
  *
  */
 
 #include <sys/types.h>
 #include <sys/stat.h>
 #ifdef _UWIN
 # include <sys/param.h>
 # include <limits.h>
 # include <time.h>
 # include <strings.h>
 #endif
+#include <err.h>
 #include <fcntl.h>
 #include <sys/mman.h>
 #ifndef MAP_FILE
 # define MAP_FILE 0
 #endif
 #include <limits.h>
 #include <signal.h>
 #include <stdio.h>
 #include <stdlib.h>
 #include <string.h>
 #include <unistd.h>
 #include <stdarg.h>
 #include <errno.h>
 
 #define NUMPRINTCOLUMNS 32	/* # columns of data to print on each line */
 
 /*
  *	A log entry is an operation and a bunch of arguments.
  */
 
 struct log_entry {
 	int	operation;
 	int	args[3];
 };
 
 #define	LOGSIZE	1000
 
 struct log_entry	oplog[LOGSIZE];	/* the log */
 int			logptr = 0;	/* current position in log */
 int			logcount = 0;	/* total ops */
 
 /*
  *	Define operations
  */
 
 #define	OP_READ		1
 #define OP_WRITE	2
 #define OP_TRUNCATE	3
 #define OP_CLOSEOPEN	4
 #define OP_MAPREAD	5
 #define OP_MAPWRITE	6
 #define OP_SKIPPED	7
 #define OP_INVALIDATE	8
 
 int page_size;
 int page_mask;
 
 char	*original_buf;			/* a pointer to the original data */
 char	*good_buf;			/* a pointer to the correct data */
 char	*temp_buf;			/* a pointer to the current data */
 char	*fname;				/* name of our test file */
 int	fd;				/* fd for our test file */
 
 off_t		file_size = 0;
 off_t		biggest = 0;
 char		state[256];
 unsigned long	testcalls = 0;		/* calls to function "test" */
 
 unsigned long	simulatedopcount = 0;	/* -b flag */
 int	closeprob = 0;			/* -c flag */
 int	invlprob = 0;			/* -i flag */
 int	debug = 0;			/* -d flag */
 unsigned long	debugstart = 0;		/* -D flag */
 unsigned long	maxfilelen = 256 * 1024;	/* -l flag */
 int	sizechecks = 1;			/* -n flag disables them */
 int	maxoplen = 64 * 1024;		/* -o flag */
 int	quiet = 0;			/* -q flag */
 unsigned long progressinterval = 0;	/* -p flag */
 int	readbdy = 1;			/* -r flag */
 int	style = 0;			/* -s flag */
 int	truncbdy = 1;			/* -t flag */
 int	writebdy = 1;			/* -w flag */
 long	monitorstart = -1;		/* -m flag */
 long	monitorend = -1;		/* -m flag */
 int	lite = 0;			/* -L flag */
 long	numops = -1;			/* -N flag */
 int	randomoplen = 1;		/* -O flag disables it */
 int	seed = 1;			/* -S flag */
 int     mapped_writes = 1;	      /* -W flag disables */
 int 	mapped_reads = 1;		/* -R flag disables it */
 int     mapped_msync = 1;	      /* -U flag disables */
 int	fsxgoodfd = 0;
 FILE *	fsxlogf = NULL;
 int badoff = -1;
 int closeopen = 0;
 int invl = 0;
 
 
 void
 vwarnc(code, fmt, ap)
 	int code;
 	const char *fmt;
 	va_list ap;
 {
 	fprintf(stderr, "fsx: ");
 	if (fmt != NULL) {
 		vfprintf(stderr, fmt, ap);
 		fprintf(stderr, ": ");
 	}
 	fprintf(stderr, "%s\n", strerror(code));
 }
 
 
 void
 warn(const char * fmt, ...)
 {
 	va_list ap;
 	va_start(ap, fmt);
 	vwarnc(errno, fmt, ap);
 	va_end(ap);
 }
 
 
 void
 prt(char *fmt, ...)
 {
 	va_list args;
 
 	va_start(args, fmt);
 	vfprintf(stdout, fmt, args);
 	va_end(args);
 
 	if (fsxlogf) {
 		va_start(args, fmt);
 		vfprintf(fsxlogf, fmt, args);
 		va_end(args);
 	}
 }
 
 void
 prterr(char *prefix)
 {
 	prt("%s%s%s\n", prefix, prefix ? ": " : "", strerror(errno));
 }
 
 
 void
 do_log4(int operation, int arg0, int arg1, int arg2)
 {
 	struct log_entry *le;
 
 	le = &oplog[logptr];
 	le->operation = operation;
 	le->args[0] = arg0;
 	le->args[1] = arg1;
 	le->args[2] = arg2;
 	logptr++;
 	logcount++;
 	if (logptr >= LOGSIZE)
 		logptr = 0;
 }
 
 
 void
 log4(int operation, int arg0, int arg1, int arg2)
 {
 	do_log4(operation, arg0, arg1, arg2);
 	if (closeopen)
 		do_log4(OP_CLOSEOPEN, 0, 0, 0);
 	if (invl)
 		do_log4(OP_INVALIDATE, 0, 0, 0);
 }
 
 
 void
 logdump(void)
 {
 	struct log_entry	*lp;
 	int	i, count, down, opnum;
 
 	prt("LOG DUMP (%d total operations):\n", logcount);
 	if (logcount < LOGSIZE) {
 		i = 0;
 		count = logcount;
 	} else {
 		i = logptr;
 		count = LOGSIZE;
 	}
 
 	opnum = i + 1 + (logcount/LOGSIZE)*LOGSIZE;
 	for ( ; count > 0; count--) {
 		lp = &oplog[i];
 
 		if (lp->operation == OP_CLOSEOPEN ||
 		    lp->operation == OP_INVALIDATE) {
 			switch (lp->operation) {
 			case OP_CLOSEOPEN:
 				prt("\t\tCLOSE/OPEN\n");
 				break;
 			case OP_INVALIDATE:
 				prt("\t\tMS_INVALIDATE\n");
 				break;
 			}
 			i++;
 			if (i == LOGSIZE)
 				i = 0;
 			continue;
 		}
 
 		prt("%d(%d mod 256): ", opnum, opnum%256);
 		switch (lp->operation) {
 		case OP_MAPREAD:
 			prt("MAPREAD\t0x%x thru 0x%x\t(0x%x bytes)",
 			    lp->args[0], lp->args[0] + lp->args[1] - 1,
 			    lp->args[1]);
 			if (badoff >= lp->args[0] && badoff <
 						     lp->args[0] + lp->args[1])
 				prt("\t***RRRR***");
 			break;
 		case OP_MAPWRITE:
 			prt("MAPWRITE 0x%x thru 0x%x\t(0x%x bytes)",
 			    lp->args[0], lp->args[0] + lp->args[1] - 1,
 			    lp->args[1]);
 			if (badoff >= lp->args[0] && badoff <
 						     lp->args[0] + lp->args[1])
 				prt("\t******WWWW");
 			break;
 		case OP_READ:
 			prt("READ\t0x%x thru 0x%x\t(0x%x bytes)",
 			    lp->args[0], lp->args[0] + lp->args[1] - 1,
 			    lp->args[1]);
 			if (badoff >= lp->args[0] &&
 			    badoff < lp->args[0] + lp->args[1])
 				prt("\t***RRRR***");
 			break;
 		case OP_WRITE:
-			prt("WRITE\t0x%x thru 0x%x\t(0x%x bytes)",
-			    lp->args[0], lp->args[0] + lp->args[1] - 1,
-			    lp->args[1]);
-			if (lp->args[0] > lp->args[2])
-				prt(" HOLE");
-			else if (lp->args[0] + lp->args[1] > lp->args[2])
-				prt(" EXTEND");
-			if ((badoff >= lp->args[0] || badoff >=lp->args[2]) &&
-			    badoff < lp->args[0] + lp->args[1])
-				prt("\t***WWWW");
+			{
+				int offset = lp->args[0];
+				int len = lp->args[1];
+				int oldlen = lp->args[2];
+
+				prt("WRITE\t0x%x thru 0x%x\t(0x%x bytes)",
+				    offset, offset + len - 1,
+				    len);
+				if (offset > oldlen)
+					prt(" HOLE");
+				else if (offset + len > oldlen)
+					prt(" EXTEND");
+				if ((badoff >= offset || badoff >=oldlen) &&
+				    badoff < offset + len)
+					prt("\t***WWWW");
+			}
 			break;
 		case OP_TRUNCATE:
 			down = lp->args[0] < lp->args[1];
 			prt("TRUNCATE %s\tfrom 0x%x to 0x%x",
 			    down ? "DOWN" : "UP", lp->args[1], lp->args[0]);
 			if (badoff >= lp->args[!down] &&
 			    badoff < lp->args[!!down])
 				prt("\t******WWWW");
 			break;
 		case OP_SKIPPED:
 			prt("SKIPPED (no operation)");
 			break;
 		default:
 			prt("BOGUS LOG ENTRY (operation code = %d)!",
 			    lp->operation);
 		}
 		prt("\n");
 		opnum++;
 		i++;
 		if (i == LOGSIZE)
 			i = 0;
 	}
 }
 
 
 void
 save_buffer(char *buffer, off_t bufferlength, int fd)
 {
 	off_t ret;
 	ssize_t byteswritten;
 
 	if (fd <= 0 || bufferlength == 0)
 		return;
 
 	if (bufferlength > SSIZE_MAX) {
 		prt("fsx flaw: overflow in save_buffer\n");
 		exit(67);
 	}
 	if (lite) {
 		off_t size_by_seek = lseek(fd, (off_t)0, SEEK_END);
 		if (size_by_seek == (off_t)-1)
 			prterr("save_buffer: lseek eof");
 		else if (bufferlength > size_by_seek) {
 			warn("save_buffer: .fsxgood file too short... will save 0x%llx bytes instead of 0x%llx\n", (unsigned long long)size_by_seek,
 			     (unsigned long long)bufferlength);
 			bufferlength = size_by_seek;
 		}
 	}
 
 	ret = lseek(fd, (off_t)0, SEEK_SET);
 	if (ret == (off_t)-1)
 		prterr("save_buffer: lseek 0");
 	
 	byteswritten = write(fd, buffer, (size_t)bufferlength);
 	if (byteswritten != bufferlength) {
 		if (byteswritten == -1)
 			prterr("save_buffer write");
 		else
 			warn("save_buffer: short write, 0x%x bytes instead of 0x%llx\n",
 			     (unsigned)byteswritten,
 			     (unsigned long long)bufferlength);
 	}
 }
 
 
 void
 report_failure(int status)
 {
 	logdump();
 	
 	if (fsxgoodfd) {
 		if (good_buf) {
 			save_buffer(good_buf, file_size, fsxgoodfd);
 			prt("Correct content saved for comparison\n");
 			prt("(maybe hexdump \"%s\" vs \"%s.fsxgood\")\n",
 			    fname, fname);
 		}
 		close(fsxgoodfd);
 	}
 	exit(status);
 }
 
 
 #define short_at(cp) ((unsigned short)((*((unsigned char *)(cp)) << 8) | \
 					*(((unsigned char *)(cp)) + 1)))
 
 void
 check_buffers(unsigned offset, unsigned size)
 {
 	unsigned char c, t;
 	unsigned i = 0;
 	unsigned n = 0;
 	unsigned op = 0;
 	unsigned bad = 0;
 
 	if (memcmp(good_buf + offset, temp_buf, size) != 0) {
 		prt("READ BAD DATA: offset = 0x%x, size = 0x%x\n",
 		    offset, size);
 		prt("OFFSET\tGOOD\tBAD\tRANGE\n");
 		while (size > 0) {
 			c = good_buf[offset];
 			t = temp_buf[i];
 			if (c != t) {
 				if (n == 0) {
 					bad = short_at(&temp_buf[i]);
 					prt("0x%5x\t0x%04x\t0x%04x", offset,
 					    short_at(&good_buf[offset]), bad);
 					op = temp_buf[offset & 1 ? i+1 : i];
 				}
 				n++;
 				badoff = offset;
 			}
 			offset++;
 			i++;
 			size--;
 		}
 		if (n) {
 			prt("\t0x%5x\n", n);
 			if (bad)
 				prt("operation# (mod 256) for the bad data may be %u\n", ((unsigned)op & 0xff));
 			else
 				prt("operation# (mod 256) for the bad data unknown, check HOLE and EXTEND ops\n");
 		} else
 			prt("????????????????\n");
 		report_failure(110);
 	}
 }
 
 
 void
 check_size(void)
 {
 	struct stat	statbuf;
 	off_t	size_by_seek;
 
 	if (fstat(fd, &statbuf)) {
 		prterr("check_size: fstat");
 		statbuf.st_size = -1;
 	}
 	size_by_seek = lseek(fd, (off_t)0, SEEK_END);
 	if (file_size != statbuf.st_size || file_size != size_by_seek) {
 		prt("Size error: expected 0x%llx stat 0x%llx seek 0x%llx\n",
 		    (unsigned long long)file_size,
 		    (unsigned long long)statbuf.st_size,
 		    (unsigned long long)size_by_seek);
 		report_failure(120);
 	}
 }
 
 
 void
 check_trunc_hack(void)
 {
 	struct stat statbuf;
 
 	ftruncate(fd, (off_t)0);
 	ftruncate(fd, (off_t)100000);
 	fstat(fd, &statbuf);
 	if (statbuf.st_size != (off_t)100000) {
 		prt("no extend on truncate! not posix!\n");
 		exit(130);
 	}
 	ftruncate(fd, (off_t)0);
 }
 
 
 void
 doread(unsigned offset, unsigned size)
 {
 	off_t ret;
 	unsigned iret;
 
 	offset -= offset % readbdy;
 	if (size == 0) {
 		if (!quiet && testcalls > simulatedopcount)
 			prt("skipping zero size read\n");
 		log4(OP_SKIPPED, OP_READ, offset, size);
 		return;
 	}
 	if (size + offset > file_size) {
 		if (!quiet && testcalls > simulatedopcount)
 			prt("skipping seek/read past end of file\n");
 		log4(OP_SKIPPED, OP_READ, offset, size);
 		return;
 	}
 
 	log4(OP_READ, offset, size, 0);
 
 	if (testcalls <= simulatedopcount)
 		return;
 
 	if (!quiet && ((progressinterval &&
 			testcalls % progressinterval == 0) ||
 		       (debug &&
 			(monitorstart == -1 ||
 			 (offset + size > monitorstart &&
 			  (monitorend == -1 || offset <= monitorend))))))
 		prt("%lu read\t0x%x thru\t0x%x\t(0x%x bytes)\n", testcalls,
 		    offset, offset + size - 1, size);
 	ret = lseek(fd, (off_t)offset, SEEK_SET);
 	if (ret == (off_t)-1) {
 		prterr("doread: lseek");
 		report_failure(140);
 	}
 	iret = read(fd, temp_buf, size);
 	if (iret != size) {
 		if (iret == -1)
 			prterr("doread: read");
 		else
 			prt("short read: 0x%x bytes instead of 0x%x\n",
 			    iret, size);
 		report_failure(141);
 	}
 	check_buffers(offset, size);
 }
 
 
 void
 check_eofpage(char *s, unsigned offset, char *p, int size)
 {
 	uintptr_t last_page, should_be_zero;
 
 	if (offset + size <= (file_size & ~page_mask))
 		return;
 	/*
 	 * we landed in the last page of the file
 	 * test to make sure the VM system provided 0's 
 	 * beyond the true end of the file mapping
 	 * (as required by mmap def in 1996 posix 1003.1)
 	 */
 	last_page = ((uintptr_t)p + (offset & page_mask) + size) & ~page_mask;
 
 	for (should_be_zero = last_page + (file_size & page_mask);
 	     should_be_zero < last_page + page_size;
 	     should_be_zero++)
 		if (*(char *)should_be_zero) {
 			prt("Mapped %s: non-zero data past EOF (0x%llx) page offset 0x%x is 0x%04x\n",
 			    s, file_size - 1, should_be_zero & page_mask,
 			    short_at(should_be_zero));
 			report_failure(205);
 		}
 }
 
 
 void
 domapread(unsigned offset, unsigned size)
 {
 	unsigned pg_offset;
 	unsigned map_size;
 	char    *p;
 
 	offset -= offset % readbdy;
 	if (size == 0) {
 		if (!quiet && testcalls > simulatedopcount)
 			prt("skipping zero size read\n");
 		log4(OP_SKIPPED, OP_MAPREAD, offset, size);
 		return;
 	}
 	if (size + offset > file_size) {
 		if (!quiet && testcalls > simulatedopcount)
 			prt("skipping seek/read past end of file\n");
 		log4(OP_SKIPPED, OP_MAPREAD, offset, size);
 		return;
 	}
 
 	log4(OP_MAPREAD, offset, size, 0);
 
 	if (testcalls <= simulatedopcount)
 		return;
 
 	if (!quiet && ((progressinterval &&
 			testcalls % progressinterval == 0) ||
 		       (debug &&
 			(monitorstart == -1 ||
 			 (offset + size > monitorstart &&
 			  (monitorend == -1 || offset <= monitorend))))))
 		prt("%lu mapread\t0x%x thru\t0x%x\t(0x%x bytes)\n", testcalls,
 		    offset, offset + size - 1, size);
 
 	pg_offset = offset & page_mask;
 	map_size  = pg_offset + size;
 
 	if ((p = (char *)mmap(0, map_size, PROT_READ, MAP_FILE | MAP_SHARED, fd,
 			      (off_t)(offset - pg_offset))) == (char *)-1) {
 		prterr("domapread: mmap");
 		report_failure(190);
 	}
 	memcpy(temp_buf, p + pg_offset, size);
 
 	check_eofpage("Read", offset, p, size);
 
 	if (munmap(p, map_size) != 0) {
 		prterr("domapread: munmap");
 		report_failure(191);
 	}
 
 	check_buffers(offset, size);
 }
 
 
 void
 gendata(char *original_buf, char *good_buf, unsigned offset, unsigned size)
 {
 	while (size--) {
 		good_buf[offset] = testcalls % 256; 
 		if (offset % 2)
 			good_buf[offset] += original_buf[offset];
 		offset++;
 	}
 }
 
 
 void
 dowrite(unsigned offset, unsigned size)
 {
 	off_t ret;
 	unsigned iret;
 
 	offset -= offset % writebdy;
 	if (size == 0) {
 		if (!quiet && testcalls > simulatedopcount)
 			prt("skipping zero size write\n");
 		log4(OP_SKIPPED, OP_WRITE, offset, size);
 		return;
 	}
 
 	log4(OP_WRITE, offset, size, file_size);
 
 	gendata(original_buf, good_buf, offset, size);
 	if (file_size < offset + size) {
 		if (file_size < offset)
 			memset(good_buf + file_size, '\0', offset - file_size);
 		file_size = offset + size;
 		if (lite) {
 			warn("Lite file size bug in fsx!");
 			report_failure(149);
 		}
 	}
 
 	if (testcalls <= simulatedopcount)
 		return;
 
 	if (!quiet && ((progressinterval &&
 			testcalls % progressinterval == 0) ||
 		       (debug &&
 			(monitorstart == -1 ||
 			 (offset + size > monitorstart &&
 			  (monitorend == -1 || offset <= monitorend))))))
 		prt("%lu write\t0x%x thru\t0x%x\t(0x%x bytes)\n", testcalls,
 		    offset, offset + size - 1, size);
 	ret = lseek(fd, (off_t)offset, SEEK_SET);
 	if (ret == (off_t)-1) {
 		prterr("dowrite: lseek");
 		report_failure(150);
 	}
 	iret = write(fd, good_buf + offset, size);
 	if (iret != size) {
 		if (iret == -1)
 			prterr("dowrite: write");
 		else
 			prt("short write: 0x%x bytes instead of 0x%x\n",
 			    iret, size);
 		report_failure(151);
 	}
 }
 
 
 void
 domapwrite(unsigned offset, unsigned size)
 {
 	unsigned pg_offset;
 	unsigned map_size;
 	off_t    cur_filesize;
 	char    *p;
 
 	offset -= offset % writebdy;
 	if (size == 0) {
 		if (!quiet && testcalls > simulatedopcount)
 			prt("skipping zero size write\n");
 		log4(OP_SKIPPED, OP_MAPWRITE, offset, size);
 		return;
 	}
 	cur_filesize = file_size;
 
 	log4(OP_MAPWRITE, offset, size, 0);
 
 	gendata(original_buf, good_buf, offset, size);
 	if (file_size < offset + size) {
 		if (file_size < offset)
 			memset(good_buf + file_size, '\0', offset - file_size);
 		file_size = offset + size;
 		if (lite) {
 			warn("Lite file size bug in fsx!");
 			report_failure(200);
 		}
 	}
 
 	if (testcalls <= simulatedopcount)
 		return;
 
 	if (!quiet && ((progressinterval &&
 			testcalls % progressinterval == 0) ||
 		       (debug &&
 			(monitorstart == -1 ||
 			 (offset + size > monitorstart &&
 			  (monitorend == -1 || offset <= monitorend))))))
 		prt("%lu mapwrite\t0x%x thru\t0x%x\t(0x%x bytes)\n", testcalls,
 		    offset, offset + size - 1, size);
 
 	if (file_size > cur_filesize) {
 		if (ftruncate(fd, file_size) == -1) {
 			prterr("domapwrite: ftruncate");
 			exit(201);
 		}
 	}
 	pg_offset = offset & page_mask;
 	map_size  = pg_offset + size;
 
 	if ((p = (char *)mmap(0, map_size, PROT_READ | PROT_WRITE,
 			      MAP_FILE | MAP_SHARED, fd,
 			      (off_t)(offset - pg_offset))) == MAP_FAILED) {
 		prterr("domapwrite: mmap");
 		report_failure(202);
 	}
 	memcpy(p + pg_offset, good_buf + offset, size);
 	if (mapped_msync && msync(p, map_size, MS_SYNC) != 0) {
 		prterr("domapwrite: msync");
 		report_failure(203);
 	}
 
 	check_eofpage("Write", offset, p, size);
 
 	if (munmap(p, map_size) != 0) {
 		prterr("domapwrite: munmap");
 		report_failure(204);
 	}
 }
 
 
 void
 dotruncate(unsigned size)
 {
 	int oldsize = file_size;
 
 	size -= size % truncbdy;
 	if (size > biggest) {
 		biggest = size;
 		if (!quiet && testcalls > simulatedopcount)
 			prt("truncating to largest ever: 0x%x\n", size);
 	}
 
 	log4(OP_TRUNCATE, size, (unsigned)file_size, 0);
 
 	if (size > file_size)
 		memset(good_buf + file_size, '\0', size - file_size);
 	file_size = size;
 
 	if (testcalls <= simulatedopcount)
 		return;
 	
 	if ((progressinterval && testcalls % progressinterval == 0) ||
 	    (debug && (monitorstart == -1 || monitorend == -1 ||
 		       size <= monitorend)))
 		prt("%lu trunc\tfrom 0x%x to 0x%x\n", testcalls, oldsize, size);
 	if (ftruncate(fd, (off_t)size) == -1) {
 		prt("ftruncate1: %x\n", size);
 		prterr("dotruncate: ftruncate");
 		report_failure(160);
 	}
 }
 
 
 void
 writefileimage()
 {
 	ssize_t iret;
 
 	if (lseek(fd, (off_t)0, SEEK_SET) == (off_t)-1) {
 		prterr("writefileimage: lseek");
 		report_failure(171);
 	}
 	iret = write(fd, good_buf, file_size);
 	if ((off_t)iret != file_size) {
 		if (iret == -1)
 			prterr("writefileimage: write");
 		else
 			prt("short write: 0x%x bytes instead of 0x%llx\n",
 			    iret, (unsigned long long)file_size);
 		report_failure(172);
 	}
 	if (lite ? 0 : ftruncate(fd, file_size) == -1) {
 		prt("ftruncate2: %llx\n", (unsigned long long)file_size);
 		prterr("writefileimage: ftruncate");
 		report_failure(173);
 	}
 }
 
 
 void
 docloseopen(void)
 { 
 	if (testcalls <= simulatedopcount)
 		return;
 
 	if (debug)
 		prt("%lu close/open\n", testcalls);
 	if (close(fd)) {
 		prterr("docloseopen: close");
 		report_failure(180);
 	}
 	fd = open(fname, O_RDWR, 0);
 	if (fd < 0) {
 		prterr("docloseopen: open");
 		report_failure(181);
 	}
 }
 
 
 void
 doinvl(void)
 {
 	char *p;
 
 	if (file_size == 0)
 		return;
 	if (testcalls <= simulatedopcount)
 		return;
 	if (debug)
 		prt("%lu msync(MS_INVALIDATE)\n", testcalls);
 
 	if ((p = (char *)mmap(0, file_size, PROT_READ | PROT_WRITE,
 			      MAP_FILE | MAP_SHARED, fd, 0)) == MAP_FAILED) {
 		prterr("doinvl: mmap");
 		report_failure(205);
 	}
 
 	if (msync(p, 0, MS_SYNC | MS_INVALIDATE) != 0) {
 		prterr("doinvl: msync");
 		report_failure(206);
 	}
 
 	if (munmap(p, file_size) != 0) {
 		prterr("doinvl: munmap");
 		report_failure(207);
 	}
 }
 
 
 void
 test(void)
 {
 	unsigned long	offset;
 	unsigned long	size = maxoplen;
 	unsigned long	rv = random();
 	unsigned long	op = rv % (3 + !lite + mapped_writes);
 
 	/* turn off the map read if necessary */
 
 	if (op == 2 && !mapped_reads)
 	    op = 0;
 
 	if (simulatedopcount > 0 && testcalls == simulatedopcount)
 		writefileimage();
 
 	testcalls++;
 
 	if (closeprob)
 		closeopen = (rv >> 3) < (1 << 28) / closeprob;
 	if (invlprob)
 		invl = (rv >> 3) < (1 << 28) / invlprob;
 
 	if (debugstart > 0 && testcalls >= debugstart)
 		debug = 1;
 
 	if (!quiet && testcalls < simulatedopcount && testcalls % 100000 == 0)
 		prt("%lu...\n", testcalls);
 
 	/*
 	 * READ:	op = 0
 	 * WRITE:	op = 1
 	 * MAPREAD:     op = 2
 	 * TRUNCATE:	op = 3
 	 * MAPWRITE:    op = 3 or 4
 	 */
 	if (lite ? 0 : op == 3 && style == 0) /* vanilla truncate? */
 		dotruncate(random() % maxfilelen);
 	else {
 		if (randomoplen)
 			size = random() % (maxoplen+1);
 		if (lite ? 0 : op == 3)
 			dotruncate(size);
 		else {
 			offset = random();
 			if (op == 1 || op == (lite ? 3 : 4)) {
 				offset %= maxfilelen;
 				if (offset + size > maxfilelen)
 					size = maxfilelen - offset;
 				if (op != 1)
 					domapwrite(offset, size);
 				else
 					dowrite(offset, size);
 			} else {
 				if (file_size)
 					offset %= file_size;
 				else
 					offset = 0;
 				if (offset + size > file_size)
 					size = file_size - offset;
 				if (op != 0)
 					domapread(offset, size);
 				else
 					doread(offset, size);
 			}
 		}
 	}
 	if (sizechecks && testcalls > simulatedopcount)
 		check_size();
 	if (invl)
 		doinvl();
 	if (closeopen)
 		docloseopen();
 }
 
 
 void
 cleanup(sig)
 	int	sig;
 {
 	if (sig)
 		prt("signal %d\n", sig);
 	prt("testcalls = %lu\n", testcalls);
 	exit(sig);
 }
 
 
 void
 usage(void)
 {
 	fprintf(stdout, "usage: %s",
 		"fsx [-dnqLOW] [-b opnum] [-c Prob] [-l flen] [-m start:end] [-o oplen] [-p progressinterval] [-r readbdy] [-s style] [-t truncbdy] [-w writebdy] [-D startingop] [-N numops] [-P dirpath] [-S seed] fname\n\
 	-b opnum: beginning operation number (default 1)\n\
 	-c P: 1 in P chance of file close+open at each op (default infinity)\n\
 	-d: debug output for all operations\n\
 	-i P: 1 in P chance of calling msync(MS_INVALIDATE) (default infinity)\n\
 	-l flen: the upper bound on file size (default 262144)\n\
 	-m startop:endop: monitor (print debug output) specified byte range (default 0:infinity)\n\
 	-n: no verifications of file size\n\
 	-o oplen: the upper bound on operation size (default 65536)\n\
 	-p progressinterval: debug output at specified operation interval\n\
 	-q: quieter operation\n\
 	-r readbdy: 4096 would make reads page aligned (default 1)\n\
 	-s style: 1 gives smaller truncates (default 0)\n\
 	-t truncbdy: 4096 would make truncates page aligned (default 1)\n\
 	-w writebdy: 4096 would make writes page aligned (default 1)\n\
 	-D startingop: debug output starting at specified operation\n\
 	-L: fsxLite - no file creations & no file size changes\n\
 	-N numops: total # operations to do (default infinity)\n\
 	-O: use oplen (see -o flag) for every op (default random)\n\
 	-P dirpath: save .fsxlog and .fsxgood files in dirpath (default ./)\n\
 	-S seed: for random # generator (default 1) 0 gets timestamp\n\
 	-W: mapped write operations DISabled\n\
 	-R: mapped read operations DISabled)\n\
 	-U: msync after mapped write operations DISabled\n\
 	fname: this filename is REQUIRED (no default)\n");
 	exit(90);
 }
 
 
 int
 getnum(char *s, char **e)
 {
 	int ret = -1;
 
 	*e = (char *) 0;
 	ret = strtol(s, e, 0);
 	if (*e)
 		switch (**e) {
 		case 'b':
 		case 'B':
 			ret *= 512;
 			*e = *e + 1;
 			break;
 		case 'k':
 		case 'K':
 			ret *= 1024;
 			*e = *e + 1;
 			break;
 		case 'm':
 		case 'M':
 			ret *= 1024*1024;
 			*e = *e + 1;
 			break;
 		case 'w':
 		case 'W':
 			ret *= 4;
 			*e = *e + 1;
 			break;
 		}
 	return (ret);
 }
 
 
 int
 main(int argc, char **argv)
 {
 	int	i, ch;
 	char	*endp;
 	char goodfile[1024];
 	char logfile[1024];
+	struct timespec now;
 
 	goodfile[0] = 0;
 	logfile[0] = 0;
 
 	page_size = getpagesize();
 	page_mask = page_size - 1;
 
 	setvbuf(stdout, (char *)0, _IOLBF, 0); /* line buffered stdout */
 
 	while ((ch = getopt(argc, argv,
 	    "b:c:di:l:m:no:p:qr:s:t:w:D:LN:OP:RS:UW")) != -1)
 		switch (ch) {
 		case 'b':
 			simulatedopcount = getnum(optarg, &endp);
 			if (!quiet)
 				fprintf(stdout, "Will begin at operation %ld\n",
 					simulatedopcount);
 			if (simulatedopcount == 0)
 				usage();
 			simulatedopcount -= 1;
 			break;
 		case 'c':
 			closeprob = getnum(optarg, &endp);
 			if (!quiet)
 				fprintf(stdout,
 					"Chance of close/open is 1 in %d\n",
 					closeprob);
 			if (closeprob <= 0)
 				usage();
 			break;
 		case 'd':
 			debug = 1;
 			break;
 		case 'i':
 			invlprob = getnum(optarg, &endp);
 			if (!quiet)
 				fprintf(stdout,
 					"Chance of MS_INVALIDATE is 1 in %d\n",
 					invlprob);
 			if (invlprob <= 0)
 				usage();
 			break;
 		case 'l':
 			maxfilelen = getnum(optarg, &endp);
 			if (maxfilelen <= 0)
 				usage();
 			break;
 		case 'm':
 			monitorstart = getnum(optarg, &endp);
 			if (monitorstart < 0)
 				usage();
 			if (!endp || *endp++ != ':')
 				usage();
 			monitorend = getnum(endp, &endp);
 			if (monitorend < 0)
 				usage();
 			if (monitorend == 0)
 				monitorend = -1; /* aka infinity */
 			debug = 1;
 		case 'n':
 			sizechecks = 0;
 			break;
 		case 'o':
 			maxoplen = getnum(optarg, &endp);
 			if (maxoplen <= 0)
 				usage();
 			break;
 		case 'p':
 			progressinterval = getnum(optarg, &endp);
 			if (progressinterval < 0)
 				usage();
 			break;
 		case 'q':
 			quiet = 1;
 			break;
 		case 'r':
 			readbdy = getnum(optarg, &endp);
 			if (readbdy <= 0)
 				usage();
 			break;
 		case 's':
 			style = getnum(optarg, &endp);
 			if (style < 0 || style > 1)
 				usage();
 			break;
 		case 't':
 			truncbdy = getnum(optarg, &endp);
 			if (truncbdy <= 0)
 				usage();
 			break;
 		case 'w':
 			writebdy = getnum(optarg, &endp);
 			if (writebdy <= 0)
 				usage();
 			break;
 		case 'D':
 			debugstart = getnum(optarg, &endp);
 			if (debugstart < 1)
 				usage();
 			break;
 		case 'L':
 			lite = 1;
 			break;
 		case 'N':
 			numops = getnum(optarg, &endp);
 			if (numops < 0)
 				usage();
 			break;
 		case 'O':
 			randomoplen = 0;
 			break;
 		case 'P':
 			strncpy(goodfile, optarg, sizeof(goodfile));
 			strcat(goodfile, "/");
 			strncpy(logfile, optarg, sizeof(logfile));
 			strcat(logfile, "/");
 			break;
 		case 'R':
 			mapped_reads = 0;
 			break;
 		case 'S':
 			seed = getnum(optarg, &endp);
-			if (seed == 0)
-				seed = time(0) % 10000;
+			if (seed == 0) {
+				if (clock_gettime(CLOCK_REALTIME, &now) != 0)
+					err(1, "clock_gettime");
+				seed = now.tv_nsec % 10000;
+			}
 			if (!quiet)
 				fprintf(stdout, "Seed set to %d\n", seed);
 			if (seed < 0)
 				usage();
 			break;
 		case 'W':
 			mapped_writes = 0;
 			if (!quiet)
 				fprintf(stdout, "mapped writes DISABLED\n");
 			break;
 		case 'U':
 			mapped_msync = 0;
 			if (!quiet)
 				fprintf(stdout, "mapped msync DISABLED\n");
 			break;
 
 		default:
 			usage();
 			/* NOTREACHED */
 		}
 	argc -= optind;
 	argv += optind;
 	if (argc != 1)
 		usage();
 	fname = argv[0];
 
 	signal(SIGHUP,	cleanup);
 	signal(SIGINT,	cleanup);
 	signal(SIGPIPE,	cleanup);
 	signal(SIGALRM,	cleanup);
 	signal(SIGTERM,	cleanup);
 	signal(SIGXCPU,	cleanup);
 	signal(SIGXFSZ,	cleanup);
 	signal(SIGVTALRM,	cleanup);
 	signal(SIGUSR1,	cleanup);
 	signal(SIGUSR2,	cleanup);
 
 	initstate(seed, state, 256);
 	setstate(state);
 	fd = open(fname, O_RDWR|(lite ? 0 : O_CREAT|O_TRUNC), 0666);
 	if (fd < 0) {
 		prterr(fname);
 		exit(91);
 	}
 	strncat(goodfile, fname, 256);
 	strcat (goodfile, ".fsxgood");
 	fsxgoodfd = open(goodfile, O_RDWR|O_CREAT|O_TRUNC, 0666);
 	if (fsxgoodfd < 0) {
 		prterr(goodfile);
 		exit(92);
 	}
 	strncat(logfile, fname, 256);
 	strcat (logfile, ".fsxlog");
 	fsxlogf = fopen(logfile, "w");
 	if (fsxlogf == NULL) {
 		prterr(logfile);
 		exit(93);
 	}
 	if (lite) {
 		off_t ret;
 		file_size = maxfilelen = lseek(fd, (off_t)0, SEEK_END);
 		if (file_size == (off_t)-1) {
 			prterr(fname);
 			warn("main: lseek eof");
 			exit(94);
 		}
 		ret = lseek(fd, (off_t)0, SEEK_SET);
 		if (ret == (off_t)-1) {
 			prterr(fname);
 			warn("main: lseek 0");
 			exit(95);
 		}
 	}
 	original_buf = (char *) malloc(maxfilelen);
 	for (i = 0; i < maxfilelen; i++)
 		original_buf[i] = random() % 256;
 	good_buf = (char *) malloc(maxfilelen);
 	memset(good_buf, '\0', maxfilelen);
 	temp_buf = (char *) malloc(maxoplen);
 	memset(temp_buf, '\0', maxoplen);
 	if (lite) {	/* zero entire existing file */
 		ssize_t written;
 
 		written = write(fd, good_buf, (size_t)maxfilelen);
 		if (written != maxfilelen) {
 			if (written == -1) {
 				prterr(fname);
 				warn("main: error on write");
 			} else
-				warn("main: short write, 0x%x bytes instead of 0x%x\n",
+				warn("main: short write, 0x%x bytes instead of 0x%lx\n",
 				     (unsigned)written, maxfilelen);
 			exit(98);
 		}
 	} else 
 		check_trunc_hack();
 
 	while (numops == -1 || numops--)
 		test();
 
 	if (close(fd)) {
 		prterr("close");
 		report_failure(99);
 	}
 	prt("All operations completed A-OK!\n");
 
 	exit(0);
 	return 0;
 }
 
Index: projects/runtime-coverage-v2/usr.sbin/nfsdumpstate/nfsdumpstate.c
===================================================================
--- projects/runtime-coverage-v2/usr.sbin/nfsdumpstate/nfsdumpstate.c	(revision 346924)
+++ projects/runtime-coverage-v2/usr.sbin/nfsdumpstate/nfsdumpstate.c	(revision 346925)
@@ -1,311 +1,315 @@
 /*-
  * Copyright (c) 2009 Rick Macklem, University of Guelph
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include <sys/param.h>
 #include <sys/linker.h>
 #include <sys/module.h>
 #include <sys/socket.h>
 
 #include <arpa/inet.h>
 
 #include <netinet/in.h>
 
 #include <nfs/nfssvc.h>
 
 #include <fs/nfs/rpcv2.h>
 #include <fs/nfs/nfsproto.h>
 #include <fs/nfs/nfskpiport.h>
 #include <fs/nfs/nfs.h>
 
 #include <ctype.h>
 #include <err.h>
 #include <errno.h>
 #include <stdio.h>
 #include <stdlib.h>
 #include <string.h>
 #include <unistd.h>
 
 #define	DUMPSIZE	10000
 
 static void dump_lockstate(char *);
 static void dump_openstate(void);
 static void usage(void);
 static char *open_flags(uint32_t);
 static char *deleg_flags(uint32_t);
 static char *lock_flags(uint32_t);
 static char *client_flags(uint32_t);
 
 static struct nfsd_dumpclients dp[DUMPSIZE];
 static struct nfsd_dumplocks lp[DUMPSIZE];
 static char flag_string[20];
 
 int
 main(int argc, char **argv)
 {
 	int ch, openstate;
 	char *lockfile;
 
 	if (modfind("nfsd") < 0)
 		errx(1, "nfsd not loaded - self terminating");
 	openstate = 0;
 	lockfile = NULL;
 	while ((ch = getopt(argc, argv, "ol:")) != -1)
 		switch (ch) {
 		case 'o':
 			openstate = 1;
 			break;
 		case 'l':
 			lockfile = optarg;
 			break;
 		default:
 			usage();
 		}
 	argc -= optind;
 	argv += optind;
 
 	if (openstate == 0 && lockfile == NULL)
 		openstate = 1;
 	else if (openstate != 0 && lockfile != NULL)
 		errx(1, "-o and -l cannot both be specified");
 
 	/*
 	 * For -o, dump all open/lock state.
 	 * For -l, dump lock state for that file.
 	 */
 	if (openstate != 0)
 		dump_openstate();
 	else
 		dump_lockstate(lockfile);
 	exit(0);
 }
 
 static void
 usage(void)
 {
 
 	errx(1, "usage: nfsdumpstate [-o] [-l]");
 }
 
 /*
  * Dump all open/lock state.
  */
 static void
 dump_openstate(void)
 {
 	struct nfsd_dumplist dumplist;
 	int cnt, i;
+#ifdef INET6
 	char nbuf[INET6_ADDRSTRLEN];
+#endif
 
 	dumplist.ndl_size = DUMPSIZE;
 	dumplist.ndl_list = (void *)dp;
 	if (nfssvc(NFSSVC_DUMPCLIENTS, &dumplist) < 0)
 		errx(1, "Can't perform dump clients syscall");
 
 	printf("%-13s %9.9s %9.9s %9.9s %9.9s %9.9s %9.9s %-45s %s\n",
 	    "Flags", "OpenOwner", "Open", "LockOwner",
 	    "Lock", "Deleg", "OldDeleg", "Clientaddr", "ClientID");
 	/*
 	 * Loop through results, printing them out.
 	 */
 	cnt = 0;
 	while (dp[cnt].ndcl_clid.nclid_idlen > 0 && cnt < DUMPSIZE) {
 		printf("%-13s ", client_flags(dp[cnt].ndcl_flags));
 		printf("%9d %9d %9d %9d %9d %9d ",
 		    dp[cnt].ndcl_nopenowners,
 		    dp[cnt].ndcl_nopens,
 		    dp[cnt].ndcl_nlockowners,
 		    dp[cnt].ndcl_nlocks,
 		    dp[cnt].ndcl_ndelegs,
 		    dp[cnt].ndcl_nolddelegs);
 		switch (dp[cnt].ndcl_addrfam) {
 #ifdef INET
 		case AF_INET:
 			printf("%-45s ",
 			    inet_ntoa(dp[cnt].ndcl_cbaddr.sin_addr));
 			break;
 #endif
 #ifdef INET6
 		case AF_INET6:
 			if (inet_ntop(AF_INET6, &dp[cnt].ndcl_cbaddr.sin6_addr,
 			    nbuf, sizeof(nbuf)) != NULL)
 				printf("%-45s ", nbuf);
 			else
 				printf("%-45s ", " ");
 			break;
 #endif
 		}
 		for (i = 0; i < dp[cnt].ndcl_clid.nclid_idlen; i++)
 			printf("%02x", dp[cnt].ndcl_clid.nclid_id[i]);
 		printf("\n");
 		cnt++;
 	}
 }
 
 /*
  * Dump the lock state for a file.
  */
 static void
 dump_lockstate(char *fname)
 {
 	struct nfsd_dumplocklist dumplocklist;
 	int cnt, i;
+#ifdef INET6
 	char nbuf[INET6_ADDRSTRLEN];
+#endif
 
 	dumplocklist.ndllck_size = DUMPSIZE;
 	dumplocklist.ndllck_list = (void *)lp;
 	dumplocklist.ndllck_fname = fname;
 	if (nfssvc(NFSSVC_DUMPLOCKS, &dumplocklist) < 0)
 		errx(1, "Can't dump locks for %s\n", fname);
 
 	printf("%-11s %-36s %-45s %s\n",
 	    "Open/Lock",
 	    "          Stateid or Lock Range",
 	    "Clientaddr",
 	    "Owner and ClientID");
 	/*
 	 * Loop through results, printing them out.
 	 */
 	cnt = 0;
 	while (lp[cnt].ndlck_clid.nclid_idlen > 0 && cnt < DUMPSIZE) {
 		if (lp[cnt].ndlck_flags & NFSLCK_OPEN)
 			printf("%-11s %9d %08x %08x %08x ",
 			    open_flags(lp[cnt].ndlck_flags),
 			    lp[cnt].ndlck_stateid.seqid,
 			    lp[cnt].ndlck_stateid.other[0],
 			    lp[cnt].ndlck_stateid.other[1],
 			    lp[cnt].ndlck_stateid.other[2]);
 		else if (lp[cnt].ndlck_flags & (NFSLCK_DELEGREAD |
 		    NFSLCK_DELEGWRITE))
 			printf("%-11s %9d %08x %08x %08x ",
 			    deleg_flags(lp[cnt].ndlck_flags),
 			    lp[cnt].ndlck_stateid.seqid,
 			    lp[cnt].ndlck_stateid.other[0],
 			    lp[cnt].ndlck_stateid.other[1],
 			    lp[cnt].ndlck_stateid.other[2]);
 		else
 			printf("%-11s  %17jd %17jd ",
 			    lock_flags(lp[cnt].ndlck_flags),
 			    lp[cnt].ndlck_first,
 			    lp[cnt].ndlck_end);
 		switch (lp[cnt].ndlck_addrfam) {
 #ifdef INET
 		case AF_INET:
 			printf("%-45s ",
 			    inet_ntoa(lp[cnt].ndlck_cbaddr.sin_addr));
 			break;
 #endif
 #ifdef INET6
 		case AF_INET6:
 			if (inet_ntop(AF_INET6, &lp[cnt].ndlck_cbaddr.sin6_addr,
 			    nbuf, sizeof(nbuf)) != NULL)
 				printf("%-45s ", nbuf);
 			else
 				printf("%-45s ", " ");
 			break;
 #endif
 		default:
 			printf("%-45s ", "  ");
 			break;
 		}
 		for (i = 0; i < lp[cnt].ndlck_owner.nclid_idlen; i++)
 			printf("%02x", lp[cnt].ndlck_owner.nclid_id[i]);
 		printf(" ");
 		for (i = 0; i < lp[cnt].ndlck_clid.nclid_idlen; i++)
 			printf("%02x", lp[cnt].ndlck_clid.nclid_id[i]);
 		printf("\n");
 		cnt++;
 	}
 }
 
 /*
  * Parse the Open/Lock flag bits and create a string to be printed.
  */
 static char *
 open_flags(uint32_t flags)
 {
 	int i, j;
 
 	strlcpy(flag_string, "Open ", sizeof (flag_string));
 	i = 5;
 	if (flags & NFSLCK_READACCESS)
 		flag_string[i++] = 'R';
 	if (flags & NFSLCK_WRITEACCESS)
 		flag_string[i++] = 'W';
 	flag_string[i++] = ' ';
 	flag_string[i++] = 'D';
 	flag_string[i] = 'N';
 	j = i;
 	if (flags & NFSLCK_READDENY)
 		flag_string[i++] = 'R';
 	if (flags & NFSLCK_WRITEDENY)
 		flag_string[i++] = 'W';
 	if (i == j)
 		i++;
 	flag_string[i] = '\0';
 	return (flag_string);
 }
 
 static char *
 deleg_flags(uint32_t flags)
 {
 
 	if (flags & NFSLCK_DELEGREAD)
 		strlcpy(flag_string, "Deleg R", sizeof (flag_string));
 	else
 		strlcpy(flag_string, "Deleg W", sizeof (flag_string));
 	return (flag_string);
 }
 
 static char *
 lock_flags(uint32_t flags)
 {
 
 	if (flags & NFSLCK_READ)
 		strlcpy(flag_string, "Lock R", sizeof (flag_string));
 	else
 		strlcpy(flag_string, "Lock W", sizeof (flag_string));
 	return (flag_string);
 }
 
 static char *
 client_flags(uint32_t flags)
 {
 
 	flag_string[0] = '\0';
 	if (flags & LCL_NEEDSCONFIRM)
 		strlcat(flag_string, "NC ", sizeof (flag_string));
 	if (flags & LCL_CALLBACKSON)
 		strlcat(flag_string, "CB ", sizeof (flag_string));
 	if (flags & LCL_GSS)
 		strlcat(flag_string, "GSS ", sizeof (flag_string));
 	if (flags & LCL_ADMINREVOKED)
 		strlcat(flag_string, "REV", sizeof (flag_string));
 	return (flag_string);
 }
Index: projects/runtime-coverage-v2
===================================================================
--- projects/runtime-coverage-v2	(revision 346924)
+++ projects/runtime-coverage-v2	(revision 346925)

Property changes on: projects/runtime-coverage-v2
___________________________________________________________________
Modified: svn:mergeinfo
## -0,0 +0,1 ##
   Merged /head:r346801-346924